Commit 7f6fb8c1 by Lenovo

写一个清晰版本,给李昕闻梳理

parent 20baaa14
...@@ -14,4 +14,11 @@ ...@@ -14,4 +14,11 @@
<natures> <natures>
<nature>org.eclipse.texlipse.builder.TexlipseNature</nature> <nature>org.eclipse.texlipse.builder.TexlipseNature</nature>
</natures> </natures>
<linkedResources>
<link>
<name>.tex</name>
<type>1</type>
<location>c:/texlive/2023/texmf-dist/tex/latex/tools/.tex</location>
</link>
</linkedResources>
</projectDescription> </projectDescription>
#TeXlipse project settings #TeXlipse project settings
#Sun Apr 14 11:03:21 CST 2024 #Thu Apr 18 11:29:35 CST 2024
outputDir= outputDir=
markDer=true markDer=true
mainTexFile=document.tex mainTexFile=document.tex
......
...@@ -9,7 +9,21 @@ ...@@ -9,7 +9,21 @@
\usepackage{url} \usepackage{url}
\usepackage{verbatim} \usepackage{verbatim}
\usepackage{graphicx} \usepackage{graphicx}
\newtheorem{theorem}{Theorem}
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{assumption}[theorem]{Assumption}
\newtheorem{condition}[theorem]{Condition}
\newtheorem{remark}[theorem]{Remark}
\usepackage{cite} \usepackage{cite}
\usepackage{xeCJK}
\usepackage{tikz}
\usetikzlibrary{automata, positioning}
\usetikzlibrary{positioning}
\usetikzlibrary{decorations.markings}
\hyphenation{op-tical net-works semi-conduc-tor IEEE-Xplore} \hyphenation{op-tical net-works semi-conduc-tor IEEE-Xplore}
% updated with editorial comments 8/9/2021 % updated with editorial comments 8/9/2021
...@@ -55,14 +69,15 @@ wangwenhao11@nudt.edu.cn). ...@@ -55,14 +69,15 @@ wangwenhao11@nudt.edu.cn).
\end{IEEEkeywords} \end{IEEEkeywords}
\input{main/introduction} %\input{main/introduction}
\input{main/nonergodicity} %\input{main/nonergodicity}
%\input{main/paradox}
\input{main/theorem}
\bibliographystyle{IEEEtran.bst}
%\bibliography{bibliography/IEEEabrv,bibliography/IEEEexample}
\bibliography{references.bib}
\bibliographystyle{IEEEtran}
\bibliography{template/IEEEabrv,references}
\end{document} \end{document}
......
\section{Introduction} \section{Introduction}
\IEEEPARstart{T}{his} \IEEEPARstart{G}{ame} 2048 is a popular single-player sliding block puzzle game,
where the game is played on a 4$\times$4 grid, the player
can move the tiles in four directions - up, down, left, and right,
and the objective is to reach 2048 tile or higher tile.
While the game is simple to understand, it requires strategic
thinking and planning to reach the 2048 tile.
2048 has gained widespread popularity due to its addictive
gameplay and simple mechanics, making it a favorite
among puzzle game enthusiasts.
\cite{szubert2014temporal}
\cite{wu2014multi}
\cite{oka2016systematic}
\cite{matsuzaki2016systematic}
\cite{yeh2016multistage}
\cite{jaskowski2017mastering}
\cite{matsuzaki2017developing}
\cite{kondo2019playing}
\cite{matsuzaki2020further}
\cite{matsuzaki2021developing}
\cite{guei2021optimistic}
\cite{bangole2023game}
\cite{kaplan1979sufficient}
\section{Non-ergodicity} \section{Non-ergodicity}
\cite{kaplan1979sufficient}
We assume that the state-process is ergodic — i.e. all states
are reachable under any policy from the current state after
sufficiently many steps. \cite{majeed2018q}
\subsection{St. Petersburg paradox}
The St. Petersburg paradox is a paradox associated
with gambling and decision theory. It is named after the city
of St. Petersburg in Russia and was initially introduced
by the mathematician Daniel Bernoulli in 1738.
The paradox involves a gambling game with the following rules:
\begin{itemize}
\item Participants must pay a fixed entry fee to join the game.
\item The game continues until a coin lands heads up.
Each toss determines the prize, with the first heads
appearing on the $t$-th toss resulting in a prize of $2^t$.
\end{itemize}
%\input{pic/FigureParadox}
The expected return of all possibilities is
\begin{equation}
\begin{split}
\mathbb{E}(x)&=\lim_{n\rightarrow \infty}\sum_{t=1}^n p(x)\times V(x)\\
&=\lim_{n\rightarrow \infty}\sum_{t=1}^n\frac{1}{2^t} 2^t\\
&=\infty
\end{split}
\end{equation}
Despite the potential for the prize to escalate
significantly, the expected value calculation
in probability theory reveals that the average
participant in this gambling game would end up paying
an infinite fee. This is due to the prize's expected
value being infinite. Even though the probability of
winning is small with each toss, when multiplied,
it leads to an infinitely increasing expected value.
This paradox challenges individuals' intuitions and
decision-making regarding gambling. Despite the allure
of a potentially substantial prize, the actual expected
value of participating in this gambling game is infinite.
Consequently, in the long run, participants could face
an infinite monetary loss.
\section{Ergodicity and nonergodicity of a Markov chain}
\begin{assumption}
\label{assumption1}
In the sequel $\{X_n\}$ is a Markov chain with state space
$S=\{0,1,2,\ldots\}$,
$\{X_n\}$
is aperiodic and irreducible,
and stationary transition probabilities
$\forall i,j\in S$, $P_{ij}\geq 0$.
\end{assumption}
\begin{theorem}(A sufficient condition for ergodicity \cite{pakes1969some,kaplan1979sufficient})
Assume Assumption \ref{assumption1},
and there exist constants
$N> 0$, $B> 0$, such that
\begin{equation}
\forall i\geq 0, \sum_{j\in S}(j-i)P_{ij}<\infty,
\end{equation}
\begin{equation}
\forall i\geq N, \sum_{j\in S}(j-i)P_{ij}<-B,
\end{equation}
$\{X_n\}$ is ergodic.
\end{theorem}
请昕闻基于第一个定理完成 sutton 1998年书上 random walk 例子(书中图6.5)的遍历性证明。
\begin{theorem}(A sufficient condition for nonergodicity \cite{kaplan1979sufficient})
Assume Assumption \ref{assumption1}, if for some integer $N\geq 0$ and constants $B\geq 0$,
$c\in[0,1]$ the following two conditions hold, then
$\{X_n\}$ is not ergodic:
\begin{equation}
\forall i\geq N, \sum_{j\in S} (j-i)P_{ij}>0,
\end{equation}
\begin{equation}
\forall i\geq N, \forall z\in[c,1], z^i-\sum_{j\in S}P_{ij}z^j\geq -B(1-z).
\end{equation}
\end{theorem}
请昕闻基于第二个定理完成 sutton 1998年书上 cliff-walking task 例子(书中图6.13)的非遍历性证明。
以及圣彼得堡悖论的非遍历性证明。
\textcolor{red}{注意:证明过程应该是把Markov Chain写成N个状态(状态到底是第几个也需要明确定义),状态之间的转移概率是
一个矩阵,需要把矩阵元素明确定义出来,然后基于两个定理,明确推导出两个公式是否满足}
\begin{tikzpicture}
\node[state] (1) at (0,0) {1};
\node[state] (2) at (1.5,0) {2};
\node[state] (3) at (3,0) {3};
\node (4) at (4.5,0) {$\cdots$};
\node[state] (n) at (6,0) {$n$};
\node[state] (T) at (6,2) {T};
\node (5) at (7.5,0) {$\cdots$};
\path[->]
(1) edge node [below] {$\frac{1}{2}$} (2)
(1) edge node [] {$2$} (T);
\path[->]
(2) edge node [below] {$\frac{1}{2}$} (3)
(2) edge node [] {${2^3}$} (T);
\path[->]
(3) edge node [below] {$\frac{1}{2}$} (4)
(3) edge node [] {${2^3}$} (T);
\path[->]
(4) edge node [below] {$\frac{1}{2}$} (n);
\path[->]
(n) edge node [below] {$\frac{1}{2}$} (5)
(n) edge node [] {${2^n}$} (T);
\end{tikzpicture}
@article{kaplan1979sufficient,
title={A sufficient condition of nonergodicity of a Markov chain (Corresp.)},
author={Kaplan, Michael},
journal={IEEE Transactions on Information Theory},
volume={25},
number={4},
pages={470--471},
year={1979},
publisher={IEEE}
}
# encoding:utf-8
@article{pakes1969some,
title={Some conditions for ergodicity and recurrence of Markov chains},
author={Pakes, Anthony G},
journal={Operations Research},
volume={17},
number={6},
pages={1058--1061},
year={1969},
publisher={INFORMS}
}
@article{kaplan1979sufficient,
title={A sufficient condition of nonergodicity of a {Markov} chain (Corresp.)},
author={Kaplan, Michael},
journal={IEEE Transactions on Information Theory},
volume={25},
number={4},
pages={470--471},
year={1979},
publisher={IEEE}
}
@incollection{bangole2023game,
title={Game Playing (2048) Using Deep Neural Networks},
author={Bangole, Narendra Kumar Rao and Moulya, RB and Pranthi, R and Reddy, Sreelekha and Namratha, R},
booktitle={The Software Principles of Design for Data Modeling},
pages={133--144},
year={2023},
publisher={IGI Global}
}
@inproceedings{matsuzaki2020further,
title={A further investigation of neural network players for game 2048},
author={Matsuzaki, Kiminori},
booktitle={Advances in Computer Games: 16th International Conference, ACG 2019, Macao, China, August 11--13, 2019, Revised Selected Papers 16},
pages={53--65},
year={2020},
organization={Springer}
}
@inproceedings{majeed2018q,
title={On Q-learning Convergence for Non-Markov Decision Processes.},
author={Majeed, Sultan Javed and Hutter, Marcus and others},
booktitle={IJCAI},
volume={18},
pages={2546--2552},
year={2018}
}
@article{guei2021optimistic,
title={Optimistic temporal difference learning for 2048},
author={Guei, Hung and Chen, Lung-Pin and Wu, I-Chen},
journal={IEEE Transactions on Games},
volume={14},
number={3},
pages={478--487},
year={2021},
publisher={IEEE}
}
@inproceedings{szubert2014temporal,
title={Temporal difference learning of n-tuple networks for the game 2048},
author={Szubert, Marcin and Ja{\'s}kowski, Wojciech},
booktitle={2014 IEEE Conference on Computational Intelligence and Games},
pages={1--8},
year={2014},
organization={IEEE}
}
@article{jaskowski2017mastering,
title={Mastering 2048 with delayed temporal coherence learning, multistage weight promotion, redundant encoding, and carousel shaping},
author={Ja{\'s}kowski, Wojciech},
journal={IEEE Transactions on Games},
volume={10},
number={1},
pages={3--14},
year={2017},
publisher={IEEE}
}
@article{yeh2016multistage,
title={Multistage temporal difference learning for 2048-like games},
author={Yeh, Kun-Hao and Wu, I-Chen and Hsueh, Chu-Hsuan and Chang, Chia-Chuan and Liang, Chao-Chin and Chiang, Han},
journal={IEEE Transactions on Computational Intelligence and AI in Games},
volume={9},
number={4},
pages={369--380},
year={2016},
publisher={IEEE}
}
@inproceedings{wu2014multi,
title={Multi-stage temporal difference learning for 2048},
author={Wu, I-Chen and Yeh, Kun-Hao and Liang, Chao-Chin and Chang, Chia-Chuan and Chiang, Han},
booktitle={Technologies and Applications of Artificial Intelligence: 19th International Conference, TAAI 2014, Taipei, Taiwan, November 21-23, 2014. Proceedings},
pages={366--378},
year={2014},
organization={Springer}
}
@article{kondo2019playing,
title={Playing game 2048 with deep convolutional neural networks trained by supervised learning},
author={Kondo, Naoki and Matsuzaki, Kiminori},
journal={Journal of Information Processing},
volume={27},
pages={340--347},
year={2019},
publisher={Information Processing Society of Japan}
}
@inproceedings{matsuzaki2017developing,
title={Developing a 2048 player with backward temporal coherence learning and restart},
author={Matsuzaki, Kiminori},
booktitle={Advances in Computer Games: 15th International Conferences, ACG 2017, Leiden, The Netherlands, July 3--5, 2017, Revised Selected Papers 15},
pages={176--187},
year={2017},
organization={Springer}
}
@inproceedings{matsuzaki2016systematic,
title={Systematic selection of N-tuple networks with consideration of interinfluence for game 2048},
author={Matsuzaki, Kiminori},
booktitle={2016 Conference on Technologies and Applications of Artificial Intelligence (TAAI)},
pages={186--193},
year={2016},
organization={IEEE}
}
@inproceedings{oka2016systematic,
title={Systematic selection of N-tuple networks for 2048},
author={Oka, Kazuto and Matsuzaki, Kiminori},
booktitle={International Conference on Computers and Games},
pages={81--92},
year={2016},
organization={Springer}
}
@article{matsuzaki2021developing,
title={Developing value networks for game 2048 with reinforcement learning},
author={Matsuzaki, Kiminori},
journal={Journal of Information Processing},
volume={29},
pages={336--346},
year={2021},
publisher={Information Processing Society of Japan}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment