diff --git a/document.tex b/document.tex index ac8548c..739e89b 100644 --- a/document.tex +++ b/document.tex @@ -74,6 +74,7 @@ wangwenhao11@nudt.edu.cn). \input{main/introduction} \input{main/background} +\input{main/nonergodic} %\input{main/nonergodicity} %\input{main/paradox} diff --git a/main/background.tex b/main/background.tex index e5295c4..68dc1c5 100644 --- a/main/background.tex +++ b/main/background.tex @@ -31,11 +31,10 @@ That is $\forall s\in \mathcal{S}$, we have \sum_{s'\in \mathcal{S}}P_{\pi}(s',s)d_{\pi}(s')=d_{\pi}(s). \end{equation} -\begin{definition}[Ergodicity] Ergodicity assumption about the MDP assume that $d_{\pi}(s)$ exist for any policy $\pi$ and are independent of initial states \cite{Sutton2018book}. -\end{definition} + This mean all states are reachable under any policy from the current state after sufficiently many steps \cite{majeed2018q}. @@ -67,6 +66,7 @@ P_{\text{absorbing}}\dot{=}\begin{array}{c|ccccccc} \text{E} & \frac{1}{2} & 0 & 0 & 0 & \frac{1}{2} & 0 \end{array} \] +Note that absorbing states can be combined into one. According to (\ref{invariance}), the distribution $d_{\text{absorbing}}=\{1$, $0$, $0$, $0$, $0$, $0$\}. @@ -99,7 +99,7 @@ the distribution $d_{\text{restart}}=\{0.1$, Since the probability of T, A, B, C, D, E are non-zeros, random walk with restarts is ergodic. -\subsection{Ergodicity and Non-ergodicity between non-absorbing states} +\subsection{Ergodicity between non-absorbing states} For Markov chains with absorbing states, we usually decompose the transition matrix $P$ into the following form: \[ @@ -124,23 +124,18 @@ where $Q$ is the matrix of transition probabilities between N\dot{=} \sum_{i=0}^{\infty}Q^i=(I_{n-1}-Q)^{-1}, \end{equation} where $I_{n-1}$ is the $(n-1)\times(n-1)$ identity matrix. -Note that absorbing states can be combined into one. It is now easy to define whether the non-absorbing states are ergodic. \begin{definition}[Ergodicity between non-absorbing states] -Assume that $N$ exist for any policy $\pi$ - and are independent of initial states. +Assume that $N$ exists for any policy $\pi$ + and is independent of initial states. $\forall i,j \in S\setminus\{\text{T}\}$, $N_{ij}>0$, MDP is ergodic between non-absorbing states. + \label{definition2} \end{definition} -\begin{definition}[Non-ergodicity between non-absorbing states] -Assume that $N$ exist for any policy $\pi$ - and are independent of initial states. - $\exists i,j \in S\setminus\{\text{T}\}$, - $N_{ij}=0$, MDP is non-ergodic between non-absorbing states. -\end{definition} + For random walk with absorbing states, \[ @@ -161,26 +156,26 @@ Q_{\text{absorbing}}\dot{=}\begin{array}{c|ccccc} \text{E} & 0 & 0 & 0 & \frac{1}{2} & 0 \end{array} \] +%\[ +% R_{\text{absorbing}}\dot{=}\begin{array}{c|c} +% &\text{T} \\\hline +% \text{A} & \frac{1}{2} \\ +% \text{B} & 0 \\ +% \text{C} & 0 \\ +% \text{D} & 0 \\ +% \text{E} & \frac{1}{2} +% \end{array} +% \] +% \[ +% I_{\text{absorbing}}\dot{=}\begin{array}{c|c} +% &\text{T} \\\hline +% \text{T} & 1 +% \end{array} +% \] + +Then, \[ -R_{\text{absorbing}}\dot{=}\begin{array}{c|c} -&\text{T} \\\hline -\text{A} & \frac{1}{2} \\ -\text{B} & 0 \\ -\text{C} & 0 \\ -\text{D} & 0 \\ -\text{E} & \frac{1}{2} -\end{array} -\] -\[ -I_{\text{absorbing}}\dot{=}\begin{array}{c|c} -&\text{T} \\\hline -\text{T} & 1 -\end{array} -\] - -Then,{ -\[ -N_{\text{absorbing}}\dot{=}\begin{array}{c|ccccc} +N_{\text{absorbing}}=(I_5-Q_{\text{absorbing}})^{-1}=\begin{array}{c|ccccc} & \text{A} & \text{B} & \text{C} & \text{D} & \text{E} \\\hline \text{A} & \frac{5}{3} & \frac{4}{3} & 1 & \frac{2}{3} & \frac{1}{3} \\ \text{B} & \frac{4}{3} & \frac{8}{3} & 2 & \frac{4}{3} & \frac{2}{3} \\ @@ -188,15 +183,9 @@ N_{\text{absorbing}}\dot{=}\begin{array}{c|ccccc} \text{D} & \frac{2}{3} & \frac{4}{3} & 2 & \frac{8}{3} & \frac{4}{3} \\ \text{E} & \frac{1}{3} & \frac{2}{3} & 1 & \frac{4}{3} & \frac{5}{3} \\ \end{array} -\], -} -\highlight{昕闻帮我算这个矩阵} - -通过圣彼得堡例子说明,圣彼得堡不满足非吸收态之间的遍历性。 -给出定理,同样证明2048游戏不满足非吸收态之间的遍历性。 - - - - +\] +Bases on Definition \ref{definition2}, +random walk with absorbing states +is ergodic between non-absorbing states. diff --git a/main/introduction.tex b/main/introduction.tex index 3fc13e7..c7329a0 100644 --- a/main/introduction.tex +++ b/main/introduction.tex @@ -108,7 +108,7 @@ The comparison in this set of experiments indicates that while in the 2048 game, when the agent deviates from the optimal state, it may never have the chance to return to the previous state. - This relates to the game's property of traversability. + This relates to the game's property of ergodicity. In this paper, we proved that the game 2048 is non-ergodic. diff --git a/main/nonergodic.tex b/main/nonergodic.tex new file mode 100644 index 0000000..2a04d68 --- /dev/null +++ b/main/nonergodic.tex @@ -0,0 +1,96 @@ +\section{Non-ergodicity between non-absorbing states} +\begin{definition}[Non-ergodicity between non-absorbing states] +Assume that $N$ exists for any policy $\pi$ + and is independent of initial states. + $\exists i,j \in S\setminus\{\text{T}\}$, + $N_{ij}=0$, MDP is non-ergodic between non-absorbing states. + \label{definition3} +\end{definition} + + + + +\subsection{St. Petersburg paradox} + + + +The St. Petersburg paradox is a paradox associated +with gambling and decision theory. It is named after the city +of St. Petersburg in Russia and was initially introduced + by the mathematician Daniel Bernoulli in 1738. + +The paradox involves a gambling game with the following rules: +\begin{itemize} + \item Participants must pay a fixed entry fee to join the game. + \item The game continues until a coin lands heads up. +Each toss determines the prize, with the first heads + appearing on the $t$-th toss resulting in a prize of $2^t$. +\end{itemize} + + +%\input{pic/FigureParadox} + +The expected return of all possibilities is +\begin{equation} +\begin{split} +\mathbb{E}(x)&=\lim_{n\rightarrow \infty}\sum_{t=1}^n p(x)\times V(x)\\ +&=\lim_{n\rightarrow \infty}\sum_{t=1}^n\frac{1}{2^t} 2^t\\ +&=\infty +\end{split} +\end{equation} + + +Despite the potential for the prize to escalate +significantly, the expected value calculation +in probability theory reveals that the average +participant in this gambling game would end up paying + an infinite fee. This is due to the prize's expected + value being infinite. Even though the probability of + winning is small with each toss, when multiplied, + it leads to an infinitely increasing expected value. + +This paradox challenges individuals' intuitions and +decision-making regarding gambling. Despite the allure +of a potentially substantial prize, the actual expected + value of participating in this gambling game is infinite. + Consequently, in the long run, participants could face + an infinite monetary loss. + +\input{pic/paradox} + +Figure \ref{TruncatedPetersburg} is a truncated version +of the St. Petersburg paradox. The transition probabilities between +non-absorbing states are as follows: +\[ +Q_{\text{truncated}}\dot{=}\begin{array}{c|ccccc} + & \text{S}_1 & \text{S}_2 & \text{S}_3 & \text{S}_4 & \text{S}_5 \\\hline +\text{S}_1 & 0 & \frac{1}{2} & 0 & 0 & 0 \\ +\text{S}_2 & 0 & 0 & \frac{1}{2} & 0 & 0 \\ +\text{S}_3 & 0 & 0 & 0 & \frac{1}{2} & 0 \\ +\text{S}_4 & 0 & 0 & 0 & 0 & \frac{1}{2} \\ +\text{S}_5 & 0 & 0 & 0 & 0 & 0 +\end{array} +\] +Then, +\[ +N_{\text{truncated}}=(I_5-Q_{\text{truncated}})^{-1}=\begin{array}{c|ccccc} +& \text{S}_1 & \text{S}_2 & \text{S}_3 & \text{S}_4 & \text{S}_5 \\\hline +\text{S}_1 & 1 & \frac{1}{2} & \frac{1}{4} & \frac{1}{8} & \frac{1}{16} \\ +\text{S}_2 & 0 & 1 & \frac{1}{2} & \frac{1}{4} & \frac{1}{8} \\ +\text{S}_3 & 0 & 0 & 1 & \frac{1}{2} & \frac{1}{4} \\ +\text{S}_4 & 0 & 0 & 0 & 1 & \frac{1}{2} \\ +\text{S}_5 & 0 & 0 & 0 & 0 & 1 \\ +\end{array} +\] +Bases on Definition \ref{definition3}, +the truncated St. Petersburg paradox +is non-ergodic between non-absorbing states. + + + + + + + + + diff --git a/main/2048prove.tex b/material/2048prove.tex similarity index 100% rename from main/2048prove.tex rename to material/2048prove.tex diff --git a/main/nonergodicity.tex b/material/nonergodicity.tex similarity index 100% rename from main/nonergodicity.tex rename to material/nonergodicity.tex diff --git a/main/paradox.tex b/material/paradox.tex similarity index 100% rename from main/paradox.tex rename to material/paradox.tex diff --git a/main/theorem.tex b/material/theorem.tex similarity index 100% rename from main/theorem.tex rename to material/theorem.tex diff --git a/pic/paradox.tex b/pic/paradox.tex new file mode 100644 index 0000000..4afb944 --- /dev/null +++ b/pic/paradox.tex @@ -0,0 +1,31 @@ +\begin{figure}[!t] +\centering +\scalebox{0.9}{ +\begin{tikzpicture} + \node[draw, rectangle, fill=gray!50] (DEAD1) at (0,1.5) {T}; + \node[draw, rectangle, fill=gray!50] (DEAD2) at (1.5,1.5) {T}; + \node[draw, rectangle, fill=gray!50] (DEAD3) at (3,1.5) {T}; + \node[draw, rectangle, fill=gray!50] (DEAD4) at (4.5,1.5) {T}; + \node[draw, rectangle, fill=gray!50] (DEAD5) at (6,1.5) {T}; + \node[draw, circle] (A) at (0,0) {S$_1$}; + \node[draw, circle] (B) at (1.5,0) {S$_2$}; + \node[draw, circle] (C) at (3,0) {S$_3$}; + \node[draw, circle] (D) at (4.5,0) {S$_4$}; + \node[draw, circle] (E) at (6,0) {S$_5$}; + + \draw[->] (A) -- node {0.5} (DEAD1); + \draw[->] (A) -- node {0.5} (B); + \draw[->] (B) -- node {0.5} (DEAD2); + \draw[->] (B) -- node {0.5} (C); + \draw[->] (C) -- node {0.5} (DEAD3); + \draw[->] (C) -- node {0.5} (D); + \draw[->] (D) -- node {0.5} (DEAD4); + \draw[->] (D) -- node {0.5} (E); + \draw[->] (E) -- node {1.0} (DEAD5); + + \draw[->] ([xshift=-4ex]A.west) -- ([xshift=-5.2ex]A.east); +\end{tikzpicture} +} +\caption{Truncated St. Petersburg.} +\label{TruncatedPetersburg} +\end{figure}