diff --git a/.project b/.project index 2e93f41..a9b3ad8 100644 --- a/.project +++ b/.project @@ -14,4 +14,11 @@ org.eclipse.texlipse.builder.TexlipseNature + + + .tex + 1 + c:/texlive/2023/texmf-dist/tex/latex/tools/.tex + + diff --git a/.texlipse b/.texlipse index e96e760..f66dd13 100644 --- a/.texlipse +++ b/.texlipse @@ -1,5 +1,5 @@ #TeXlipse project settings -#Sun Apr 14 11:03:21 CST 2024 +#Thu Apr 18 11:29:35 CST 2024 outputDir= markDer=true mainTexFile=document.tex diff --git a/documen.synctex.gz b/documen.synctex.gz new file mode 100644 index 0000000..468abbf Binary files /dev/null and b/documen.synctex.gz differ diff --git a/document.tex b/document.tex index c07f6f2..870774a 100644 --- a/document.tex +++ b/document.tex @@ -9,7 +9,21 @@ \usepackage{url} \usepackage{verbatim} \usepackage{graphicx} + +\newtheorem{theorem}{Theorem} +\newtheorem{proposition}[theorem]{Proposition} +\newtheorem{lemma}[theorem]{Lemma} +\newtheorem{corollary}[theorem]{Corollary} +\newtheorem{definition}[theorem]{Definition} +\newtheorem{assumption}[theorem]{Assumption} +\newtheorem{condition}[theorem]{Condition} +\newtheorem{remark}[theorem]{Remark} \usepackage{cite} +\usepackage{xeCJK} +\usepackage{tikz} +\usetikzlibrary{automata, positioning} +\usetikzlibrary{positioning} +\usetikzlibrary{decorations.markings} \hyphenation{op-tical net-works semi-conduc-tor IEEE-Xplore} % updated with editorial comments 8/9/2021 @@ -55,14 +69,15 @@ wangwenhao11@nudt.edu.cn). \end{IEEEkeywords} -\input{main/introduction} -\input{main/nonergodicity} +%\input{main/introduction} +%\input{main/nonergodicity} +%\input{main/paradox} +\input{main/theorem} -\bibliographystyle{IEEEtran.bst} -%\bibliography{bibliography/IEEEabrv,bibliography/IEEEexample} -\bibliography{references.bib} +\bibliographystyle{IEEEtran} +\bibliography{template/IEEEabrv,references} \end{document} diff --git a/main/introduction.tex b/main/introduction.tex index 7e07d80..0d41064 100644 --- a/main/introduction.tex +++ b/main/introduction.tex @@ -1,8 +1,42 @@ \section{Introduction} -\IEEEPARstart{T}{his} +\IEEEPARstart{G}{ame} 2048 is a popular single-player sliding block puzzle game, +where the game is played on a 4$\times$4 grid, the player +can move the tiles in four directions - up, down, left, and right, +and the objective is to reach 2048 tile or higher tile. +While the game is simple to understand, it requires strategic + thinking and planning to reach the 2048 tile. + 2048 has gained widespread popularity due to its addictive + gameplay and simple mechanics, making it a favorite + among puzzle game enthusiasts. + + +\cite{szubert2014temporal} + +\cite{wu2014multi} + +\cite{oka2016systematic} + +\cite{matsuzaki2016systematic} + +\cite{yeh2016multistage} + +\cite{jaskowski2017mastering} + +\cite{matsuzaki2017developing} + +\cite{kondo2019playing} + +\cite{matsuzaki2020further} + +\cite{matsuzaki2021developing} + +\cite{guei2021optimistic} + +\cite{bangole2023game} + + -\cite{kaplan1979sufficient} diff --git a/main/nonergodicity.tex b/main/nonergodicity.tex index 6e0c64e..a9abafd 100644 --- a/main/nonergodicity.tex +++ b/main/nonergodicity.tex @@ -1 +1,38 @@ -\section{Non-ergodicity} \ No newline at end of file +\section{Non-ergodicity} + +\cite{kaplan1979sufficient} + + +We assume that the state-process is ergodic — i.e. all states +are reachable under any policy from the current state after +sufficiently many steps. \cite{majeed2018q} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/main/paradox.tex b/main/paradox.tex new file mode 100644 index 0000000..05e507d --- /dev/null +++ b/main/paradox.tex @@ -0,0 +1,65 @@ +\subsection{St. Petersburg paradox} +The St. Petersburg paradox is a paradox associated +with gambling and decision theory. It is named after the city +of St. Petersburg in Russia and was initially introduced + by the mathematician Daniel Bernoulli in 1738. + +The paradox involves a gambling game with the following rules: +\begin{itemize} + \item Participants must pay a fixed entry fee to join the game. + \item The game continues until a coin lands heads up. +Each toss determines the prize, with the first heads + appearing on the $t$-th toss resulting in a prize of $2^t$. +\end{itemize} + + +%\input{pic/FigureParadox} + +The expected return of all possibilities is +\begin{equation} +\begin{split} +\mathbb{E}(x)&=\lim_{n\rightarrow \infty}\sum_{t=1}^n p(x)\times V(x)\\ +&=\lim_{n\rightarrow \infty}\sum_{t=1}^n\frac{1}{2^t} 2^t\\ +&=\infty +\end{split} +\end{equation} + + +Despite the potential for the prize to escalate +significantly, the expected value calculation +in probability theory reveals that the average +participant in this gambling game would end up paying + an infinite fee. This is due to the prize's expected + value being infinite. Even though the probability of + winning is small with each toss, when multiplied, + it leads to an infinitely increasing expected value. + +This paradox challenges individuals' intuitions and +decision-making regarding gambling. Despite the allure +of a potentially substantial prize, the actual expected + value of participating in this gambling game is infinite. + Consequently, in the long run, participants could face + an infinite monetary loss. + + + + + + + + + + + + + + + + + + + + + + + diff --git a/main/theorem.tex b/main/theorem.tex new file mode 100644 index 0000000..5eeb7af --- /dev/null +++ b/main/theorem.tex @@ -0,0 +1,51 @@ +\section{Ergodicity and nonergodicity of a Markov chain} + +\begin{assumption} +\label{assumption1} +In the sequel $\{X_n\}$ is a Markov chain with state space +$S=\{0,1,2,\ldots\}$, +$\{X_n\}$ +is aperiodic and irreducible, + and stationary transition probabilities +$\forall i,j\in S$, $P_{ij}\geq 0$. +\end{assumption} + + + + +\begin{theorem}(A sufficient condition for ergodicity \cite{pakes1969some,kaplan1979sufficient}) +Assume Assumption \ref{assumption1}, + and there exist constants +$N> 0$, $B> 0$, such that +\begin{equation} +\forall i\geq 0, \sum_{j\in S}(j-i)P_{ij}<\infty, +\end{equation} +\begin{equation} +\forall i\geq N, \sum_{j\in S}(j-i)P_{ij}<-B, +\end{equation} +$\{X_n\}$ is ergodic. +\end{theorem} + +请昕闻基于第一个定理完成 sutton 1998年书上 random walk 例子(书中图6.5)的遍历性证明。 + +\begin{theorem}(A sufficient condition for nonergodicity \cite{kaplan1979sufficient}) +Assume Assumption \ref{assumption1}, if for some integer $N\geq 0$ and constants $B\geq 0$, +$c\in[0,1]$ the following two conditions hold, then +$\{X_n\}$ is not ergodic: +\begin{equation} + \forall i\geq N, \sum_{j\in S} (j-i)P_{ij}>0, +\end{equation} +\begin{equation} + \forall i\geq N, \forall z\in[c,1], z^i-\sum_{j\in S}P_{ij}z^j\geq -B(1-z). + \end{equation} +\end{theorem} + +请昕闻基于第二个定理完成 sutton 1998年书上 cliff-walking task 例子(书中图6.13)的非遍历性证明。 +以及圣彼得堡悖论的非遍历性证明。 + + +\textcolor{red}{注意:证明过程应该是把Markov Chain写成N个状态(状态到底是第几个也需要明确定义),状态之间的转移概率是 +一个矩阵,需要把矩阵元素明确定义出来,然后基于两个定理,明确推导出两个公式是否满足} + + + diff --git a/pic/FigureParadox.tex b/pic/FigureParadox.tex new file mode 100644 index 0000000..fad35f1 --- /dev/null +++ b/pic/FigureParadox.tex @@ -0,0 +1,29 @@ + + +\begin{tikzpicture} + \node[state] (1) at (0,0) {1}; + \node[state] (2) at (1.5,0) {2}; + \node[state] (3) at (3,0) {3}; + \node (4) at (4.5,0) {$\cdots$}; + \node[state] (n) at (6,0) {$n$}; + \node[state] (T) at (6,2) {T}; + \node (5) at (7.5,0) {$\cdots$}; + + \path[->] + (1) edge node [below] {$\frac{1}{2}$} (2) + (1) edge node [] {$2$} (T); + \path[->] + (2) edge node [below] {$\frac{1}{2}$} (3) + (2) edge node [] {${2^3}$} (T); + \path[->] + (3) edge node [below] {$\frac{1}{2}$} (4) + (3) edge node [] {${2^3}$} (T); + \path[->] + (4) edge node [below] {$\frac{1}{2}$} (n); + \path[->] + (n) edge node [below] {$\frac{1}{2}$} (5) + (n) edge node [] {${2^n}$} (T); +\end{tikzpicture} + + + diff --git a/reference.bib b/reference.bib deleted file mode 100644 index 712eba6..0000000 --- a/reference.bib +++ /dev/null @@ -1,10 +0,0 @@ -@article{kaplan1979sufficient, - title={A sufficient condition of nonergodicity of a Markov chain (Corresp.)}, - author={Kaplan, Michael}, - journal={IEEE Transactions on Information Theory}, - volume={25}, - number={4}, - pages={470--471}, - year={1979}, - publisher={IEEE} -} diff --git a/references.bib b/references.bib new file mode 100644 index 0000000..4cddacb --- /dev/null +++ b/references.bib @@ -0,0 +1,147 @@ +# encoding:utf-8 +@article{pakes1969some, + title={Some conditions for ergodicity and recurrence of Markov chains}, + author={Pakes, Anthony G}, + journal={Operations Research}, + volume={17}, + number={6}, + pages={1058--1061}, + year={1969}, + publisher={INFORMS} +} +@article{kaplan1979sufficient, + title={A sufficient condition of nonergodicity of a {Markov} chain (Corresp.)}, + author={Kaplan, Michael}, + journal={IEEE Transactions on Information Theory}, + volume={25}, + number={4}, + pages={470--471}, + year={1979}, + publisher={IEEE} +} +@incollection{bangole2023game, + title={Game Playing (2048) Using Deep Neural Networks}, + author={Bangole, Narendra Kumar Rao and Moulya, RB and Pranthi, R and Reddy, Sreelekha and Namratha, R}, + booktitle={The Software Principles of Design for Data Modeling}, + pages={133--144}, + year={2023}, + publisher={IGI Global} +} +@inproceedings{matsuzaki2020further, + title={A further investigation of neural network players for game 2048}, + author={Matsuzaki, Kiminori}, + booktitle={Advances in Computer Games: 16th International Conference, ACG 2019, Macao, China, August 11--13, 2019, Revised Selected Papers 16}, + pages={53--65}, + year={2020}, + organization={Springer} +} +@inproceedings{majeed2018q, + title={On Q-learning Convergence for Non-Markov Decision Processes.}, + author={Majeed, Sultan Javed and Hutter, Marcus and others}, + booktitle={IJCAI}, + volume={18}, + pages={2546--2552}, + year={2018} +} +@article{guei2021optimistic, + title={Optimistic temporal difference learning for 2048}, + author={Guei, Hung and Chen, Lung-Pin and Wu, I-Chen}, + journal={IEEE Transactions on Games}, + volume={14}, + number={3}, + pages={478--487}, + year={2021}, + publisher={IEEE} +} +@inproceedings{szubert2014temporal, + title={Temporal difference learning of n-tuple networks for the game 2048}, + author={Szubert, Marcin and Ja{\'s}kowski, Wojciech}, + booktitle={2014 IEEE Conference on Computational Intelligence and Games}, + pages={1--8}, + year={2014}, + organization={IEEE} +} +@article{jaskowski2017mastering, + title={Mastering 2048 with delayed temporal coherence learning, multistage weight promotion, redundant encoding, and carousel shaping}, + author={Ja{\'s}kowski, Wojciech}, + journal={IEEE Transactions on Games}, + volume={10}, + number={1}, + pages={3--14}, + year={2017}, + publisher={IEEE} +} +@article{yeh2016multistage, + title={Multistage temporal difference learning for 2048-like games}, + author={Yeh, Kun-Hao and Wu, I-Chen and Hsueh, Chu-Hsuan and Chang, Chia-Chuan and Liang, Chao-Chin and Chiang, Han}, + journal={IEEE Transactions on Computational Intelligence and AI in Games}, + volume={9}, + number={4}, + pages={369--380}, + year={2016}, + publisher={IEEE} +} +@inproceedings{wu2014multi, + title={Multi-stage temporal difference learning for 2048}, + author={Wu, I-Chen and Yeh, Kun-Hao and Liang, Chao-Chin and Chang, Chia-Chuan and Chiang, Han}, + booktitle={Technologies and Applications of Artificial Intelligence: 19th International Conference, TAAI 2014, Taipei, Taiwan, November 21-23, 2014. Proceedings}, + pages={366--378}, + year={2014}, + organization={Springer} +} +@article{kondo2019playing, + title={Playing game 2048 with deep convolutional neural networks trained by supervised learning}, + author={Kondo, Naoki and Matsuzaki, Kiminori}, + journal={Journal of Information Processing}, + volume={27}, + pages={340--347}, + year={2019}, + publisher={Information Processing Society of Japan} +} +@inproceedings{matsuzaki2017developing, + title={Developing a 2048 player with backward temporal coherence learning and restart}, + author={Matsuzaki, Kiminori}, + booktitle={Advances in Computer Games: 15th International Conferences, ACG 2017, Leiden, The Netherlands, July 3--5, 2017, Revised Selected Papers 15}, + pages={176--187}, + year={2017}, + organization={Springer} +} +@inproceedings{matsuzaki2016systematic, + title={Systematic selection of N-tuple networks with consideration of interinfluence for game 2048}, + author={Matsuzaki, Kiminori}, + booktitle={2016 Conference on Technologies and Applications of Artificial Intelligence (TAAI)}, + pages={186--193}, + year={2016}, + organization={IEEE} +} +@inproceedings{oka2016systematic, + title={Systematic selection of N-tuple networks for 2048}, + author={Oka, Kazuto and Matsuzaki, Kiminori}, + booktitle={International Conference on Computers and Games}, + pages={81--92}, + year={2016}, + organization={Springer} +} +@article{matsuzaki2021developing, + title={Developing value networks for game 2048 with reinforcement learning}, + author={Matsuzaki, Kiminori}, + journal={Journal of Information Processing}, + volume={29}, + pages={336--346}, + year={2021}, + publisher={Information Processing Society of Japan} +} + + + + + + + + + + + + + + diff --git a/IEEEtran.bst b/template/IEEEtran.bst similarity index 100% rename from IEEEtran.bst rename to template/IEEEtran.bst