From 38145d790332247e469e9b136a5d9732a562b2a2 Mon Sep 17 00:00:00 2001 From: Lenovo Date: Fri, 24 May 2024 23:20:40 +0800 Subject: [PATCH] 先写到这里 --- documen.synctex.gz | Bin 2366 -> 0 bytes document.tex | 18 +++++++++++------- main/2048prove.tex | 4 ++-- main/background.tex | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------- main/introduction.tex | 135 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------- pic/randomWalk.tex | 39 ++++++++++++++++++++++++--------------- pic/randomWalkRestart.tex | 43 +++++++++++++++++++++++++++++-------------- references.bib | 39 +++++++++++++++++++++++++++++++++++++-- 8 files changed, 283 insertions(+), 80 deletions(-) delete mode 100644 documen.synctex.gz diff --git a/documen.synctex.gz b/documen.synctex.gz deleted file mode 100644 index 468abbf..0000000 Binary files a/documen.synctex.gz and /dev/null differ diff --git a/document.tex b/document.tex index 1731c48..ac8548c 100644 --- a/document.tex +++ b/document.tex @@ -1,5 +1,6 @@ \documentclass[lettersize,journal]{IEEEtran} \usepackage{amsmath,amsfonts} +\usepackage{nicematrix} \usepackage{algorithmic} \usepackage{algorithm} \usepackage{array} @@ -9,6 +10,7 @@ \usepackage{url} \usepackage{verbatim} \usepackage{graphicx} +%\usepackage{natbib} \newtheorem{theorem}{Theorem} \newtheorem{proposition}[theorem]{Proposition} @@ -26,7 +28,7 @@ \usetikzlibrary{decorations.markings} \hyphenation{op-tical net-works semi-conduc-tor IEEE-Xplore} % updated with editorial comments 8/9/2021 - +\newcommand{\highlight}[1]{\textcolor{red}{#1}} \begin{document} \title{Non-ergodicity of Game 2048} @@ -55,7 +57,7 @@ wangwenhao11@nudt.edu.cn). \markboth{IEEE Transaction on Games,~Vol.~14, No.~8, August~202X}% {Shell \MakeLowercase{\textit{et al.}}: A Sample Article Using IEEEtran.cls for IEEE Journals} -\IEEEpubid{0000--0000/00\$00.00~\copyright~2024 IEEE} +%\IEEEpubid{0000--0000/00\$00.00~\copyright~2024 IEEE} % Remember, if you use this you must call \IEEEpubidadjcol in the second % column for its text to clear the IEEEpubid mark. @@ -69,12 +71,14 @@ wangwenhao11@nudt.edu.cn). \end{IEEEkeywords} -\input{main/background} + \input{main/introduction} -\input{main/nonergodicity} -\input{main/paradox} -\input{main/theorem} -\input{main/2048prove} +\input{main/background} + +%\input{main/nonergodicity} +%\input{main/paradox} +%\input{main/theorem} +%\input{main/2048prove} diff --git a/main/2048prove.tex b/main/2048prove.tex index 92976b8..9039eb3 100644 --- a/main/2048prove.tex +++ b/main/2048prove.tex @@ -21,7 +21,7 @@ p=2^{64} \cdot \sum_{m=0}^{15} I(B_m \neq 0) \cdot 2^{B_m} + \sum_{m=0}^{15} (1 本文将这个结果放在比64bit更高的位置上,也就是 64-84bit的位置。这个编码的主要含义是,将局面所有数字之和放在高bit位置上,排序时局面之和大的排在后面, 状态转移时就是从小的下标转移到大的下标上。另外后面64bit就是局面的编码,来保证这个值的唯一性,一个局面会对应一个唯一的值。 -\input{../pic/2048encode} +\input{pic/2048encode} 上面的图中的这个局面的编码$p=(1≪64)∙30784+0x FEDC 5432 0000 0020$。 本文是按照从下往上,从右往左的顺序给格子进行排列,右下角的格子是最低位,左上角的格子是最高位。 @@ -39,7 +39,7 @@ p=2^{64} \cdot \sum_{m=0}^{15} I(B_m \neq 0) \cdot 2^{B_m} + \sum_{m=0}^{15} (1 根据游戏规则,两个相同幂次的方块碰撞会合并成为一个幂次加一的方块, 然后会在一个空格位置随机生成一个2或者4的方块,这一过程本文记为$S_i\to S_(i^')\to S_j$。 -\input{../pic/2048example-p} +\input{pic/2048example-p} 如图3.5所示根据我们的规则可以保证,状态在后的排序也靠后。也就是说在$S_i\to S_j$的过程中,能够保证$p_i] (A) -- (DEAD); - \draw[->] (B) -- (A); - \draw[->] (B) to [bend left=30] (C); - \draw[->] (C) to [bend left=30] (B); - \draw[->] (C) to [bend left=30] (D); - \draw[->] (D) to [bend left=30] (C); - \draw[->] (D) -- (E); - \draw[->] (E) -- (DEAD2); + \draw[->] (A) -- node {0.5} (DEAD); + \draw[->] (A) to [bend left=30] node {0.5} (B); + \draw[->] (B) to [bend left=30] node {0.5} (A); + \draw[->] (B) to [bend left=30] node {0.5} (C); + \draw[->] (C) to [bend left=30] node {0.5} (B); + \draw[->] (C) to [bend left=30] node {0.5} (D); + \draw[->] (D) to [bend left=30] node {0.5} (C); + \draw[->] (D) to [bend left=30] node {0.5} (E); + \draw[->] (E) to [bend left=30] node {0.5} (D); + \draw[->] (E) -- node {0.5} (DEAD2); \draw[->] ([yshift=4ex]C.north) -- ([yshift=4.5ex]C.south); -\end{tikzpicture} \ No newline at end of file +\end{tikzpicture} +} +\caption{Random walk with absorbing states.} +\label{randomwalk} +\end{figure} diff --git a/pic/randomWalkRestart.tex b/pic/randomWalkRestart.tex index e6d4f67..243e318 100644 --- a/pic/randomWalkRestart.tex +++ b/pic/randomWalkRestart.tex @@ -1,18 +1,33 @@ +\begin{figure}[!t] +\centering +\scalebox{0.9}{ \begin{tikzpicture} - - \node[draw, circle] (A) at (0,0) {A}; - \node[draw, circle] (B) at (2,0) {B}; - \node[draw, circle] (C) at (4,0) {C}; + \node[draw, rectangle, fill=gray!50] (DEAD) at (0,0) {T$_1$}; + \node[draw, rectangle, fill=gray!50] (DEAD2) at (9,0) {T$_2$}; + \node[draw, circle] (A) at (1.5,0) {A}; + \node[draw, circle] (B) at (3,0) {B}; + \node[draw, circle] (C) at (4.5,0) {C}; \node[draw, circle] (D) at (6,0) {D}; - \node[draw, circle] (E) at (8,0) {E}; + \node[draw, circle] (E) at (7.5,0) {E}; + + \draw[->] (DEAD.south) to [bend right=30] node {0.5} (C.south); + \draw[->] (A) -- node {0.5} (DEAD); + \draw[->] (A) to [bend left=30] node {0.5} (B); + \draw[->] (B) to [bend left=30] node {0.5} (A); + \draw[->] (B) to [bend left=30] node {0.5} (C); + \draw[->] (C) to [bend left=30] node {0.5} (B); + \draw[->] (C) to [bend left=30] node {0.5} (D); + \draw[->] (D) to [bend left=30] node {0.5} (C); + \draw[->] (D) to [bend left=30] node {0.5} (E); + \draw[->] (E) to [bend left=30] node {0.5} (D); + \draw[->] (E) -- node {0.5} (DEAD2); + \draw[->] (DEAD2.south) to [bend left=30] node {0.5} (C.south); + + \draw[->] ([yshift=4ex]C.north) -- ([yshift=4.5ex]C.south); +\end{tikzpicture} +} +\caption{Random walk with restarts.} +\label{randomwalkRestart} +\end{figure} - \draw[->] (A.north) to [bend left=30] (C.north) - \draw[->] (B) -- (A); - \draw[->] (B) to [bend left=30] (C); - \draw[->] (C) to [bend left=30] (B); - \draw[->] (C) to [bend left=30] (D); - \draw[->] (D) to [bend left=30] (C); - \draw[->] (D) -- (E); - \draw[->] (E.south) to [bend left=30] (C.south) -\end{tikzpicture} \ No newline at end of file diff --git a/references.bib b/references.bib index 4cddacb..57e410f 100644 --- a/references.bib +++ b/references.bib @@ -19,6 +19,27 @@ year={1979}, publisher={IEEE} } +@article{wiewiora2003potential, + title={Potential-based shaping and Q-value initialization are equivalent}, + author={Wiewiora, Eric}, + journal={Journal of Artificial Intelligence Research}, + volume={19}, + pages={205--208}, + year={2003} +} +@inproceedings{devlin2012dynamic, + title={Dynamic potential-based reward shaping}, + author={Devlin, Sam Michael and Kudenko, Daniel}, + booktitle={11th International Conference on Autonomous Agents and Multiagent Systems (AAMAS 2012)}, + pages={433--440}, + year={2012}, + organization={IFAAMAS} +} +@misc{abdelkader20152048, + title={2048 is NP-Complete}, + author={Abdelkader, Ahmed and Acharya, Aditya and Dasler, Philip}, + year={2015}, +} @incollection{bangole2023game, title={Game Playing (2048) Using Deep Neural Networks}, author={Bangole, Narendra Kumar Rao and Moulya, RB and Pranthi, R and Reddy, Sreelekha and Namratha, R}, @@ -50,9 +71,17 @@ volume={14}, number={3}, pages={478--487}, - year={2021}, + year={2022}, publisher={IEEE} } +@inproceedings{rodgers2014an, + title={An investigation into 2048 AI strategies}, + author={Rodgers, Philip and Levine, John}, + booktitle={2014 IEEE Conference on Computational Intelligence and Games}, + pages={1--2}, + year={2014}, + organization={IEEE} +} @inproceedings{szubert2014temporal, title={Temporal difference learning of n-tuple networks for the game 2048}, author={Szubert, Marcin and Ja{\'s}kowski, Wojciech}, @@ -131,7 +160,13 @@ year={2021}, publisher={Information Processing Society of Japan} } - +@book{Sutton2018book, + author = {Sutton, Richard S. and Barto, Andrew G.}, + edition = {Second}, + publisher = {The MIT Press}, + title = {Reinforcement Learning: An Introduction}, + year = {2018 } +} -- libgit2 0.26.0