写一个清晰版本，给李昕闻梳理

7f6fb8c1 · Lenovo · 20baaa14 · 7f6fb8c1 · 7f6fb8c1 · 7f6fb8c1
Commit 7f6fb8c1 authored May 02, 2024 by Lenovo
12 changed files
--- a/.project
+++ b/.project
@@ -14,4 +14,11 @@
 	<natures>
 		<nature>org.eclipse.texlipse.builder.TexlipseNature</nature>
 	</natures>
+	<linkedResources>
+		<link>
+			<name>.tex</name>
+			<type>1</type>
+			<location>c:/texlive/2023/texmf-dist/tex/latex/tools/.tex</location>
+		</link>
+	</linkedResources>
 </projectDescription>
--- a/.texlipse
+++ b/.texlipse
 #TeXlipse project settings
-#Sun Apr 14 11:03:21 CST 2024
+#Thu Apr 18 11:29:35 CST 2024
 outputDir=
 markDer=true
 mainTexFile=document.tex

--- a/documen.synctex.gz
+++ b/documen.synctex.gz
--- a/document.tex
+++ b/document.tex
@@ -9,7 +9,21 @@
 \usepackage{url}
 \usepackage{verbatim}
 \usepackage{graphicx}
+\newtheorem{theorem}{Theorem}
+\newtheorem{proposition}[theorem]{Proposition}
+\newtheorem{lemma}[theorem]{Lemma}
+\newtheorem{corollary}[theorem]{Corollary}
+\newtheorem{definition}[theorem]{Definition}
+\newtheorem{assumption}[theorem]{Assumption}
+\newtheorem{condition}[theorem]{Condition}
+\newtheorem{remark}[theorem]{Remark}
 \usepackage{cite}
+\usepackage{xeCJK}
+\usepackage{tikz}
+\usetikzlibrary{automata, positioning}
+\usetikzlibrary{positioning}
+\usetikzlibrary{decorations.markings}
 \hyphenation{op-tical net-works semi-conduc-tor IEEE-Xplore}
 % updated with editorial comments 8/9/2021
@@ -55,14 +69,15 @@ wangwenhao11@nudt.edu.cn).
 \end{IEEEkeywords}
-\input{main/introduction}
+%\input{main/introduction}
-\input{main/nonergodicity}
+%\input{main/nonergodicity}
+%\input{main/paradox}
+\input{main/theorem}
-\bibliographystyle{IEEEtran.bst}
-%\bibliography{bibliography/IEEEabrv,bibliography/IEEEexample}
-\bibliography{references.bib} 
+\bibliographystyle{IEEEtran}
+\bibliography{template/IEEEabrv,references}
 \end{document}

--- a/main/introduction.tex
+++ b/main/introduction.tex
 \section{Introduction}
-\IEEEPARstart{T}{his} 
+\IEEEPARstart{G}{ame} 2048 is a popular single-player sliding block puzzle game,
+where the game is played on a 4$\times$4 grid,  the player 
+can move the tiles in four directions - up, down, left, and right,
+and the objective is to reach 2048 tile or higher tile.
+While the game is simple to understand, it requires strategic
+ thinking and planning to reach the 2048 tile.
+ 2048 has gained widespread popularity due to its addictive
+  gameplay and simple mechanics, making it a favorite 
+  among puzzle game enthusiasts.
+\cite{szubert2014temporal}
+\cite{wu2014multi}
+\cite{oka2016systematic}
+\cite{matsuzaki2016systematic}
+\cite{yeh2016multistage}
+\cite{jaskowski2017mastering}
+\cite{matsuzaki2017developing}
+\cite{kondo2019playing}
+\cite{matsuzaki2020further}
+\cite{matsuzaki2021developing}
+\cite{guei2021optimistic}
+\cite{bangole2023game}
-\cite{kaplan1979sufficient}
--- a/main/nonergodicity.tex
+++ b/main/nonergodicity.tex
 \section{Non-ergodicity}
+\cite{kaplan1979sufficient}
+We assume that the state-process is ergodic — i.e. all states
+are reachable under any policy from the current state after
+sufficiently many steps. \cite{majeed2018q}
--- a/main/paradox.tex
+++ b/main/paradox.tex
+\subsection{St. Petersburg paradox}
+The St. Petersburg paradox is a paradox associated 
+with gambling and decision theory. It is named after the city 
+of St. Petersburg in Russia and was initially introduced
+ by the mathematician Daniel Bernoulli in 1738.
+The paradox involves a gambling game with the following rules:
+\begin{itemize}
+  \item Participants must pay a fixed entry fee to join the game.
+  \item The game continues until a coin lands heads up. 
+Each toss determines the prize, with the first heads
+ appearing on the $t$-th toss resulting in a prize of $2^t$.
+\end{itemize}
+%\input{pic/FigureParadox}
+The expected return of all possibilities is
+\begin{equation}
+\begin{split}
+\mathbb{E}(x)&=\lim_{n\rightarrow \infty}\sum_{t=1}^n p(x)\times V(x)\\
+&=\lim_{n\rightarrow \infty}\sum_{t=1}^n\frac{1}{2^t} 2^t\\
+&=\infty 
+\end{split}
+\end{equation}
+Despite the potential for the prize to escalate 
+significantly, the expected value calculation 
+in probability theory reveals that the average 
+participant in this gambling game would end up paying
+ an infinite fee. This is due to the prize's expected 
+ value being infinite. Even though the probability of
+  winning is small with each toss, when multiplied,
+   it leads to an infinitely increasing expected value.
+This paradox challenges individuals' intuitions and 
+decision-making regarding gambling. Despite the allure 
+of a potentially substantial prize, the actual expected
+ value of participating in this gambling game is infinite.
+  Consequently, in the long run, participants could face
+   an infinite monetary loss.
--- a/main/theorem.tex
+++ b/main/theorem.tex
+\section{Ergodicity and nonergodicity of a Markov chain}
+\begin{assumption}
+\label{assumption1}
+In the sequel $\{X_n\}$ is a Markov chain with state space
+$S=\{0,1,2,\ldots\}$,
+$\{X_n\}$
+is aperiodic and irreducible,
+ and stationary transition probabilities
+$\forall i,j\in S$, $P_{ij}\geq 0$.
+\end{assumption}
+\begin{theorem}(A sufficient condition for ergodicity \cite{pakes1969some,kaplan1979sufficient})
+Assume Assumption \ref{assumption1},
+ and there exist constants
+$N> 0$, $B> 0$, such that
+\begin{equation}
+\forall i\geq 0, \sum_{j\in S}(j-i)P_{ij}<\infty,
+\end{equation}
+\begin{equation}
+\forall i\geq N, \sum_{j\in S}(j-i)P_{ij}<-B,
+\end{equation}
+$\{X_n\}$ is ergodic.
+\end{theorem}
+请昕闻基于第一个定理完成 sutton 1998年书上 random walk 例子（书中图6.5）的遍历性证明。
+\begin{theorem}(A sufficient condition for nonergodicity \cite{kaplan1979sufficient})
+Assume Assumption \ref{assumption1}, if for some integer $N\geq 0$ and constants $B\geq 0$,
+$c\in[0,1]$ the following two conditions hold, then
+$\{X_n\}$ is not ergodic:
+\begin{equation}
+ \forall i\geq N, \sum_{j\in S} (j-i)P_{ij}>0, 
+\end{equation}
+\begin{equation}
+ \forall i\geq N, \forall z\in[c,1], z^i-\sum_{j\in S}P_{ij}z^j\geq -B(1-z).
+  \end{equation}
+\end{theorem}
+请昕闻基于第二个定理完成 sutton 1998年书上 cliff-walking task 例子（书中图6.13）的非遍历性证明。
+以及圣彼得堡悖论的非遍历性证明。
+\textcolor{red}{注意：证明过程应该是把Markov Chain写成N个状态（状态到底是第几个也需要明确定义），状态之间的转移概率是
+一个矩阵，需要把矩阵元素明确定义出来，然后基于两个定理，明确推导出两个公式是否满足}
--- a/pic/FigureParadox.tex
+++ b/pic/FigureParadox.tex
+\begin{tikzpicture}
+    \node[state] (1) at (0,0) {1};
+    \node[state] (2) at (1.5,0) {2};
+    \node[state] (3) at (3,0) {3};
+    \node (4) at (4.5,0) {$\cdots$};
+    \node[state] (n) at (6,0) {$n$};
+    \node[state] (T) at (6,2) {T};
+    \node (5) at (7.5,0) {$\cdots$};
+   \path[->]
+    (1) edge node [below] {$\frac{1}{2}$} (2)
+    (1) edge node [] {$2$} (T);
+   \path[->] 
+    (2) edge node [below] {$\frac{1}{2}$} (3)
+    (2) edge node [] {${2^3}$} (T);
+   \path[->]  
+    (3) edge node [below] {$\frac{1}{2}$} (4)
+    (3) edge node [] {${2^3}$} (T);
+   \path[->] 
+    (4) edge node [below] {$\frac{1}{2}$} (n);
+   \path[->] 
+    (n) edge node [below] {$\frac{1}{2}$} (5)
+    (n) edge node [] {${2^n}$} (T);
+\end{tikzpicture}
--- a/reference.bib
+++ b/reference.bib
-@article{kaplan1979sufficient,
-  title={A sufficient condition of nonergodicity of a Markov chain (Corresp.)},
-  author={Kaplan, Michael},
-  journal={IEEE Transactions on Information Theory},
-  volume={25},
-  number={4},
-  pages={470--471},
-  year={1979},
-  publisher={IEEE}
-}
--- a/references.bib
+++ b/references.bib
+# encoding:utf-8
+@article{pakes1969some,
+  title={Some conditions for ergodicity and recurrence of Markov chains},
+  author={Pakes, Anthony G},
+  journal={Operations Research},
+  volume={17},
+  number={6},
+  pages={1058--1061},
+  year={1969},
+  publisher={INFORMS}
+}
+@article{kaplan1979sufficient,
+  title={A sufficient condition of nonergodicity of a {Markov} chain (Corresp.)},
+  author={Kaplan, Michael},
+  journal={IEEE Transactions on Information Theory},
+  volume={25},
+  number={4},
+  pages={470--471},
+  year={1979},
+  publisher={IEEE}
+}
+@incollection{bangole2023game,
+  title={Game Playing (2048) Using Deep Neural Networks},
+  author={Bangole, Narendra Kumar Rao and Moulya, RB and Pranthi, R and Reddy, Sreelekha and Namratha, R},
+  booktitle={The Software Principles of Design for Data Modeling},
+  pages={133--144},
+  year={2023},
+  publisher={IGI Global}
+}
+@inproceedings{matsuzaki2020further,
+  title={A further investigation of neural network players for game 2048},
+  author={Matsuzaki, Kiminori},
+  booktitle={Advances in Computer Games: 16th International Conference, ACG 2019, Macao, China, August 11--13, 2019, Revised Selected Papers 16},
+  pages={53--65},
+  year={2020},
+  organization={Springer}
+}
+@inproceedings{majeed2018q,
+  title={On Q-learning Convergence for Non-Markov Decision Processes.},
+  author={Majeed, Sultan Javed and Hutter, Marcus and others},
+  booktitle={IJCAI},
+  volume={18},
+  pages={2546--2552},
+  year={2018}
+}
+@article{guei2021optimistic,
+  title={Optimistic temporal difference learning for 2048},
+  author={Guei, Hung and Chen, Lung-Pin and Wu, I-Chen},
+  journal={IEEE Transactions on Games},
+  volume={14},
+  number={3},
+  pages={478--487},
+  year={2021},
+  publisher={IEEE}
+}
+@inproceedings{szubert2014temporal,
+  title={Temporal difference learning of n-tuple networks for the game 2048},
+  author={Szubert, Marcin and Ja{\'s}kowski, Wojciech},
+  booktitle={2014 IEEE Conference on Computational Intelligence and Games},
+  pages={1--8},
+  year={2014},
+  organization={IEEE}
+}
+@article{jaskowski2017mastering,
+  title={Mastering 2048 with delayed temporal coherence learning, multistage weight promotion, redundant encoding, and carousel shaping},
+  author={Ja{\'s}kowski, Wojciech},
+  journal={IEEE Transactions on Games},
+  volume={10},
+  number={1},
+  pages={3--14},
+  year={2017},
+  publisher={IEEE}
+}
+@article{yeh2016multistage,
+  title={Multistage temporal difference learning for 2048-like games},
+  author={Yeh, Kun-Hao and Wu, I-Chen and Hsueh, Chu-Hsuan and Chang, Chia-Chuan and Liang, Chao-Chin and Chiang, Han},
+  journal={IEEE Transactions on Computational Intelligence and AI in Games},
+  volume={9},
+  number={4},
+  pages={369--380},
+  year={2016},
+  publisher={IEEE}
+}
+@inproceedings{wu2014multi,
+  title={Multi-stage temporal difference learning for 2048},
+  author={Wu, I-Chen and Yeh, Kun-Hao and Liang, Chao-Chin and Chang, Chia-Chuan and Chiang, Han},
+  booktitle={Technologies and Applications of Artificial Intelligence: 19th International Conference, TAAI 2014, Taipei, Taiwan, November 21-23, 2014. Proceedings},
+  pages={366--378},
+  year={2014},
+  organization={Springer}
+}
+@article{kondo2019playing,
+  title={Playing game 2048 with deep convolutional neural networks trained by supervised learning},
+  author={Kondo, Naoki and Matsuzaki, Kiminori},
+  journal={Journal of Information Processing},
+  volume={27},
+  pages={340--347},
+  year={2019},
+  publisher={Information Processing Society of Japan}
+}
+@inproceedings{matsuzaki2017developing,
+  title={Developing a 2048 player with backward temporal coherence learning and restart},
+  author={Matsuzaki, Kiminori},
+  booktitle={Advances in Computer Games: 15th International Conferences, ACG 2017, Leiden, The Netherlands, July 3--5, 2017, Revised Selected Papers 15},
+  pages={176--187},
+  year={2017},
+  organization={Springer}
+}
+@inproceedings{matsuzaki2016systematic,
+  title={Systematic selection of N-tuple networks with consideration of interinfluence for game 2048},
+  author={Matsuzaki, Kiminori},
+  booktitle={2016 Conference on Technologies and Applications of Artificial Intelligence (TAAI)},
+  pages={186--193},
+  year={2016},
+  organization={IEEE}
+}
+@inproceedings{oka2016systematic,
+  title={Systematic selection of N-tuple networks for 2048},
+  author={Oka, Kazuto and Matsuzaki, Kiminori},
+  booktitle={International Conference on Computers and Games},
+  pages={81--92},
+  year={2016},
+  organization={Springer}
+}
+@article{matsuzaki2021developing,
+  title={Developing value networks for game 2048 with reinforcement learning},
+  author={Matsuzaki, Kiminori},
+  journal={Journal of Information Processing},
+  volume={29},
+  pages={336--346},
+  year={2021},
+  publisher={Information Processing Society of Japan}
+}
--- a/IEEEtran.bst
+++ b/IEEEtran.bst