From e0dead58f6821f43009c25d775e5b0598d0023d5 Mon Sep 17 00:00:00 2001 From: Lenovo Date: Sun, 26 May 2024 05:45:53 +0800 Subject: [PATCH] 昕闻更新了图,以及图里面的数据 --- main/background.tex | 12 ++++++------ main/introduction.tex | 10 +++++----- pic/2048.xlsx | Bin 0 -> 10781 bytes pic/2048epsilon-greedy2.pdf | Bin 0 -> 15224 bytes pic/maze-eps-greedy2.pdf | Bin 0 -> 16980 bytes pic/迷宫.xlsx | Bin 0 -> 10470 bytes 6 files changed, 11 insertions(+), 11 deletions(-) create mode 100644 pic/2048.xlsx create mode 100644 pic/2048epsilon-greedy2.pdf create mode 100644 pic/maze-eps-greedy2.pdf create mode 100644 pic/迷宫.xlsx diff --git a/main/background.tex b/main/background.tex index 87525e4..e5295c4 100644 --- a/main/background.tex +++ b/main/background.tex @@ -178,15 +178,15 @@ I_{\text{absorbing}}\dot{=}\begin{array}{c|c} \end{array} \] -Then,\highlight{ +Then,{ \[ N_{\text{absorbing}}\dot{=}\begin{array}{c|ccccc} & \text{A} & \text{B} & \text{C} & \text{D} & \text{E} \\\hline -\text{A} & 0 & 0 & 0 & 0 & 0 \\ -\text{B} & 0 & 0 & 0 & 0 & 0 \\ -\text{C} & 0 & 0 & 0 & 0 & 0 \\ -\text{D} & 0 & 0 & 0 & 0 & 0 \\ -\text{E} & 0 & 0 & 0 & 0 & 0 +\text{A} & \frac{5}{3} & \frac{4}{3} & 1 & \frac{2}{3} & \frac{1}{3} \\ +\text{B} & \frac{4}{3} & \frac{8}{3} & 2 & \frac{4}{3} & \frac{2}{3} \\ +\text{C} & 1 & 2 & 3 & 2 & 1 \\ +\text{D} & \frac{2}{3} & \frac{4}{3} & 2 & \frac{8}{3} & \frac{4}{3} \\ +\text{E} & \frac{1}{3} & \frac{2}{3} & 1 & \frac{4}{3} & \frac{5}{3} \\ \end{array} \], } diff --git a/main/introduction.tex b/main/introduction.tex index a7a1374..3fc13e7 100644 --- a/main/introduction.tex +++ b/main/introduction.tex @@ -63,10 +63,10 @@ softmax or $\epsilon$-greedy strategies. \begin{figure*}[!t] \centering -\subfloat[2048 Game]{\includegraphics[width=3in]{pic/2048epsilon-greedy}% +\subfloat[2048 Game]{\includegraphics[width=3in]{pic/2048epsilon-greedy2}% \label{fig_second_case}} \hfil -\subfloat[Maze]{\includegraphics[width=3in]{pic/maze-eps-greedy}% +\subfloat[Maze]{\includegraphics[width=3in]{pic/maze-eps-greedy2}% \label{fig_first_case}} \caption{Comparison of returns of $\epsilon$-greedy strageties.} \label{fig_sim} @@ -78,12 +78,12 @@ To validate the above point, we designed two sets of experiments, combined with an $\epsilon$-greedy exploration strategy, testing the average score and standard deviation obtained for different values $\epsilon\in$\{0, 0.001, 0.002, 0.004, - 0.008, 0.016, 0.032, 0.064, 0.128, 0.256, 0.512\}. + 0.008, 0.016, 0.032, 0.064, 0.128, 0.256, 0.512, 0.6, 0.7, 0.8, 0.9, 1.0\}. In the 2048 game, the value function is based on N-tuple network trained with optimistic initialization \cite{guei2021optimistic}, - achieving an average score of \highlight{300,000}. + achieving an average score of {350,000}. In the maze game, the optimal value function is used, - with the optimal policy achieving a score of \highlight{-58} points. + with the optimal policy achieving a score of {-54} points. As shown in Figure \ref{fig_sim}, the x-axis represents $\epsilon$, the y-axis represents the average score per game, diff --git a/pic/2048.xlsx b/pic/2048.xlsx new file mode 100644 index 0000000..a75fe39 Binary files /dev/null and b/pic/2048.xlsx differ diff --git a/pic/2048epsilon-greedy2.pdf b/pic/2048epsilon-greedy2.pdf new file mode 100644 index 0000000..146c58e Binary files /dev/null and b/pic/2048epsilon-greedy2.pdf differ diff --git a/pic/maze-eps-greedy2.pdf b/pic/maze-eps-greedy2.pdf new file mode 100644 index 0000000..06959c8 Binary files /dev/null and b/pic/maze-eps-greedy2.pdf differ diff --git "a/pic/\350\277\267\345\256\253.xlsx" "b/pic/\350\277\267\345\256\253.xlsx" new file mode 100644 index 0000000..9cbf2da Binary files /dev/null and "b/pic/\350\277\267\345\256\253.xlsx" differ -- libgit2 0.26.0