昕闻更新了图，以及图里面的数据

e0dead58 · Lenovo · 59c0881b · e0dead58 · e0dead58 · e0dead58
Commit e0dead58 authored May 26, 2024 by Lenovo
Hide whitespace changes
Inline Side-by-side

Showing with 11 additions and 11 deletions

main/background.tex
+6 -6

main/introduction.tex
+5 -5

pic/2048.xlsx
+0 -0

pic/2048epsilon-greedy2.pdf
+0 -0

pic/maze-eps-greedy2.pdf
+0 -0

pic/迷宫.xlsx
+0 -0

No files found.
--- a/main/background.tex
+++ b/main/background.tex
@@ -178,15 +178,15 @@ I_{\text{absorbing}}\dot{=}\begin{array}{c|c}
 \end{array}
 \]

-Then,\highlight{
+Then,{
 \[
 N_{\text{absorbing}}\dot{=}\begin{array}{c|ccccc}
 & \text{A} & \text{B} & \text{C} & \text{D} & \text{E}  \\\hline
-\text{A}  & 0 & 0 & 0 & 0 & 0 \\
-\text{B} & 0 & 0 & 0 & 0 & 0 \\
-\text{C} & 0 & 0 & 0 & 0 & 0 \\
-\text{D}  & 0 & 0 & 0 & 0 & 0  \\
-\text{E}  & 0 & 0 & 0 & 0 & 0   
+\text{A}  & \frac{5}{3} & \frac{4}{3} & 1 & \frac{2}{3} & \frac{1}{3} \\
+\text{B} & \frac{4}{3} & \frac{8}{3} & 2 & \frac{4}{3} & \frac{2}{3} \\
+\text{C} & 1 & 2 & 3 & 2 & 1 \\
+\text{D}  & \frac{2}{3} & \frac{4}{3} & 2 & \frac{8}{3} & \frac{4}{3} \\
+\text{E}  & \frac{1}{3} & \frac{2}{3} & 1 & \frac{4}{3} & \frac{5}{3} \\  
 \end{array}
 \],
 }

--- a/main/introduction.tex
+++ b/main/introduction.tex
@@ -63,10 +63,10 @@ softmax or $\epsilon$-greedy strategies.

 \begin{figure*}[!t]
 \centering
-\subfloat[2048 Game]{\includegraphics[width=3in]{pic/2048epsilon-greedy}%
+\subfloat[2048 Game]{\includegraphics[width=3in]{pic/2048epsilon-greedy2}%
 \label{fig_second_case}}
 \hfil
-\subfloat[Maze]{\includegraphics[width=3in]{pic/maze-eps-greedy}%
+\subfloat[Maze]{\includegraphics[width=3in]{pic/maze-eps-greedy2}%
 \label{fig_first_case}}
 \caption{Comparison of returns of $\epsilon$-greedy strageties.}
 \label{fig_sim}
@@ -78,12 +78,12 @@ To validate the above point, we designed two sets of experiments,
  combined with an $\epsilon$-greedy exploration strategy, 
  testing the average score and standard deviation obtained 
  for different  values $\epsilon\in$\{0, 0.001, 0.002, 0.004, 
-  0.008, 0.016, 0.032, 0.064, 0.128, 0.256, 0.512\}. 
+  0.008, 0.016, 0.032, 0.064, 0.128, 0.256, 0.512, 0.6, 0.7, 0.8, 0.9, 1.0\}. 
  In the 2048 game, the value function is based on N-tuple network 
  trained with optimistic initialization \cite{guei2021optimistic}, 
-  achieving an average score of \highlight{300,000}. 
+  achieving an average score of {350,000}. 
  In the maze game, the optimal value function is used, 
-  with the optimal policy achieving a score of \highlight{-58} points.
+  with the optimal policy achieving a score of {-54} points.
  As shown in Figure \ref{fig_sim}, 
  the x-axis represents $\epsilon$, 
  the y-axis represents the average score per game,

--- a/pic/2048.xlsx
+++ b/pic/2048.xlsx
--- a/pic/2048epsilon-greedy2.pdf
+++ b/pic/2048epsilon-greedy2.pdf
--- a/pic/maze-eps-greedy2.pdf
+++ b/pic/maze-eps-greedy2.pdf
--- a/pic/迷宫.xlsx
+++ b/pic/迷宫.xlsx