From cded433643dca4f78cd30b0655a8468d0bb98ecc Mon Sep 17 00:00:00 2001 From: Lenovo Date: Fri, 31 May 2024 15:38:26 +0800 Subject: [PATCH] 无环才是本质 --- document.tex | 7 ++++--- main/2048isAcyclic.tex | 138 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ main/2048isNonergodic.tex | 95 ----------------------------------------------------------------------------------------------- main/acyclic.tex | 140 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ main/background.tex | 47 +++++++++++++++++------------------------------ main/introduction.tex | 7 ++++--- main/nonergodic.tex | 161 ----------------------------------------------------------------------------------------------------------------------------------------------------------------- material/nonergodic.tex | 162 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ pic/boyanchain.tex | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ references.bib | 10 ++++++++++ 10 files changed, 526 insertions(+), 292 deletions(-) create mode 100644 main/2048isAcyclic.tex delete mode 100644 main/2048isNonergodic.tex create mode 100644 main/acyclic.tex delete mode 100644 main/nonergodic.tex create mode 100644 material/nonergodic.tex create mode 100644 pic/boyanchain.tex diff --git a/document.tex b/document.tex index ebf07ad..71d3fcb 100644 --- a/document.tex +++ b/document.tex @@ -26,12 +26,13 @@ \usetikzlibrary{automata, positioning} \usetikzlibrary{positioning} \usetikzlibrary{decorations.markings} +\usepackage{cuted} \hyphenation{op-tical net-works semi-conduc-tor IEEE-Xplore} % updated with editorial comments 8/9/2021 \newcommand{\highlight}[1]{\textcolor{red}{#1}} \begin{document} -\title{Non-ergodicity of Game 2048} +\title{Acyclicity of Game 2048} \author{Xingguo Chen, Xinwen Li, Shangdong Yang, and Wenhao Wang \thanks{Manuscript received XXXX; revised XXXX; accepted XXXX. @@ -74,8 +75,8 @@ wangwenhao11@nudt.edu.cn). \input{main/introduction} \input{main/background} -\input{main/nonergodic} -\input{main/2048isNonergodic} +\input{main/acyclic} +\input{main/2048isAcyclic} %\input{main/nonergodicity} %\input{main/paradox} diff --git a/main/2048isAcyclic.tex b/main/2048isAcyclic.tex new file mode 100644 index 0000000..6bc127f --- /dev/null +++ b/main/2048isAcyclic.tex @@ -0,0 +1,138 @@ +\section{Non-ergodicity of the 2048 game} + + +The purpose of this section is to prove the non-ergodicity of the 2048 game +and give some discussions. + +\subsection{Non-ergodicity of the 2048 game} + +The 2048 game consists of a 4$\times$4 grid board, totaling 16 squares. + At the beginning of the game, two squares are randomly filled + with tiles of either 2 or 4. + Players can make moves in four directions: \textit{up}, \textit{down}, + \textit{left}, and \textit{right}. + When a player chooses a direction, + all tiles will move in that direction until + they hit the edge or another tile. + If two tiles with the same number are adjacent + in the moving direction, they will merge into + a tile with the sum of the original numbers. + Each tile can only participate in one merge operation per move. + After each move, a new tile appears on a random empty square. + The new tile is 2 with probability 0.1, and 4 with probability 0.9. +The game ends when all squares are filled, and no valid merge operations can be made. + + +\begin{theorem} +2048 game is non-ergodic between non-absorbing states. +\end{theorem} +\begin{IEEEproof} + To apply Theorem \ref{judgmentTheorem}, what we need + to do is to assign a countable value to the 2048 game board + and demonstrate the properties of the + state transition probabilities in the 2048 game. + + +In the 2048 game, each tile has 16 potential values, + including empty and $2^k$, $k\in\{1,2,3,\ldots,15\}$. +Using 4 bits to represent a tile, the game board is a 4$\times$4 matrix +$B$. The corresponding tile is then computed as follows: +\begin{equation} +1\leq m\text{, }n \leq 4\text{, }tile_{m,n} = +\begin{cases} +0, & \text{if } B_{mn}=0; \\ + 2^{B_{mn}}, & \text{otherwise.} +\end{cases} +\label{equationTile} +\end{equation} +The sum of all tiles in the game board is +\begin{equation} +sum(B) = \sum_{m=1}^4\sum_{n=1}^4 tile_{mn}. +\end{equation} +A 64-bit long integer can uniquely represent any game board state. +\begin{equation} +long(B)= \sum_{m=1}^4\sum_{n=1}^416^{(m-1)*4+(n-1)}\cdot B_{mn}. +\end{equation} +We have +\begin{equation} +long(B)<2^{64}. +\label{size} +\end{equation} +The size of the board space $\mathcal{B}$ is +$|\mathcal{B}|=2^{64}$. +Define a utility function on board, +\begin{equation} +u(B) = 2^{64}\cdot sum(B)+long(B). +\label{utility} +\end{equation} +It is easy to verify that +$\forall B_1, B_2\in \mathcal{B}$, +if $B_1\neq B_2$, then $u(B_1)\neq u(B_2)$. +For all possible board, + $\forall B\in \mathcal{B}$, calculate the utility value + $u(B) $, and sort $B$ by $u(B) $ in ascending order. + Let $I(B)$ be the index of the board $B$ after sorting, + we have + \begin{equation} + \forall B_1, B_2\in \mathcal{B}, u(B_1)sum(B_1')$, that is $sum(B_2)>sum(B_1)$. + +Based on (\ref{size}) and (\ref{utility}), +we have $u(B_2)>u(B_1)$. +That means $I(B_2)>I(B_1)$. +The transition probability between non-absorbing state satisifies (\ref{condition}), +the claim follows by applying Theorem \ref{judgmentTheorem}. +\end{IEEEproof} + +%\input{material/2048prove} + +\subsection{Discussions} + +行为策略采样 +$\langle s_t,a_t,r_{t+1},a_{t+1},s_{t+1} \rangle$,对应的特征 +$\langle \phi_t,r_{t+1},\phi_{t+1} \rangle$ + +目标策略采样 +$\langle s_t,a_t,r_{t+1},a',s_{t+1} \rangle$,对应的特征 +$\langle \phi_t,r_{t+1},\phi' \rangle$ + +\begin{equation} +\theta_{t+1}=\theta_t+\alpha F_t (\rho_tR_t+\gamma \theta_t^{\top}\phi_t'-\theta_t^{\top}\phi_t-\mathbb{E}_{\pi}[\delta])\phi_t +\end{equation} +写的简单点是这样 + +\begin{equation} +\theta_{t+1}=\theta_t+\alpha F_t \rho_t(\delta_t-\mathbb{E}_{\mu}[\rho_t\delta_t])\phi_t, +\end{equation} +where +$\delta_t=R_t+\gamma \theta_t^{\top}\phi_{t+1}-\theta_t^{\top}\phi_t$ + + + + + + + + + + + + + + + + + + + diff --git a/main/2048isNonergodic.tex b/main/2048isNonergodic.tex deleted file mode 100644 index a555247..0000000 --- a/main/2048isNonergodic.tex +++ /dev/null @@ -1,95 +0,0 @@ -\section{Non-ergodicity of 2048} - - -The purpose of this section is to prove the non-ergodicity of the 2048 game. - -\begin{theorem} -2048 game is non-ergodic between non-absorbing states. -\end{theorem} -\begin{IEEEproof} - To apply Theorem \ref{judgmentTheorem}, what we need - to do is to assign a countable value to the 2048 game board - and demonstrate the properties of the - state transition probabilities in the 2048 game. - - -In the 2048 game, each tile has 16 potential values, - including empty and $2^k$, $k\in\{1,2,3,\ldots,15\}$. -Using 4 bits to represent a tile, the game board is a 4$\times$4 matrix -$B$. The corresponding tile is then computed as follows: -\begin{equation} -1\leq m\text{, }n \leq 4\text{, }tile_{m,n} = -\begin{cases} -0, & \text{if } B_{mn}=0; \\ - 2^{B_{mn}}, & \text{otherwise.} -\end{cases} -\label{equationTile} -\end{equation} -The sum of all tiles in the game board is -\begin{equation} -sum(B) = \sum_{m=1}^4\sum_{n=1}^4 tile_{mn}. -\end{equation} -A 64-bit long integer can uniquely represent any game board state. -\begin{equation} -long(B)= \sum_{m=1}^4\sum_{n=1}^416^{(m-1)*4+(n-1)}\cdot B_{mn}. -\end{equation} -We have -\begin{equation} -long(B)<2^{64}. -\label{size} -\end{equation} -The size of the board space $\mathcal{B}$ is -$|\mathcal{B}|=2^{64}$. -Define a utility function on board, -\begin{equation} -u(B) = 2^{64}\cdot sum(B)+long(B). -\label{utility} -\end{equation} -It is easy to verify that -$\forall B_1, B_2\in \mathcal{B}$, -if $B_1\neq B_2$, then $u(B_1)\neq u(B_2)$. -For all possible board, - $\forall B\in \mathcal{B}$, calculate the utility value - $u(B) $, and sort $B$ by $u(B) $ in ascending order. - Let $I(B)$ be the index of the board $B$ after sorting, - we have - \begin{equation} - \forall B_1, B_2\in \mathcal{B}, u(B_1)sum(B_1')$, that is $sum(B_2)>sum(B_1)$. - -Based on (\ref{size}) and (\ref{utility}), -we have $u(B_2)>u(B_1)$. -That means $I(B_2)>I(B_1)$. -The transition probability between non-absorbing state satisifies (\ref{condition}), -the claim follows by applying Theorem \ref{judgmentTheorem}. -\end{IEEEproof} - -%\input{material/2048prove} - - - - - - - - - - - - - - - - - - - - diff --git a/main/acyclic.tex b/main/acyclic.tex new file mode 100644 index 0000000..e4a2d5c --- /dev/null +++ b/main/acyclic.tex @@ -0,0 +1,140 @@ +\section{Acyclicity between non-absorbing states} +\begin{definition}[Acyclicity between non-absorbing states] +Assume that $N$ exists for any policy $\pi$ + and is independent of initial states. + $\forall i,j \in S\setminus\{\text{T}\}$, + if $N_{ij}>0$, then $N_{ji}=0$, + MDP is acyclic between non-absorbing states. + \label{definition3} +\end{definition} + + + + +\subsection{Boyan chain} + + + + + +\input{pic/boyanchain} + +Figure \ref{boyanchain} shows Boyan chain. + The transition probabilities between +non-absorbing states are as follows: +\[ +Q_{\text{bo}}\dot{=}\begin{tiny}\left[ \begin{array}{cccccccccccc} + 0 & 0.5 & 0.5 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\ + 0 & 0 & 0.5 & 0.5 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\ + 0 & 0 & 0 & 0.5 & 0.5 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\ + 0 & 0 & 0 & 0 & 0.5 & 0.5 & 0 & 0 & 0 & 0 & 0 & 0 \\ + 0 & 0 & 0 & 0 & 0 & 0.5 & 0.5 & 0 & 0 & 0 & 0 & 0 \\ + 0 & 0 & 0 & 0 & 0 & 0 & 0.5 & 0.5 & 0 & 0 & 0 & 0 \\ + 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0.5 & 0.5 & 0 & 0 & 0 \\ + 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0.5 & 0.5 & 0 & 0 \\ + 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0.5 & 0.5 & 0 \\ + 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0.5 & 0.5 \\ + 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0.5 \\ + 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 +\end{array}\right] \end{tiny} +\] +Then, +\begin{strip} +\begin{equation} +\begin{split} +N_{\text{bo}}=&(I_{12}-Q_{\text{bo}})^{-1}\\ +=&\begin{tiny}\left[\begin{array}{cccccccccccc} +1 & 0.5 & 0.75 & 0.625 & 0.6875 & 0.65625 & 0.671875 & 0.6640625 & 0.66796875 & 0.666015625 & 0.6669921875 & 0.66650390625 \\ + 0 & 1 & 0.5 & 0.75 & 0.625 & 0.6875 & 0.65625 & 0.671875 & 0.6640625 & 0.66796875 & 0.666015625 & 0.6669921875 \\ + 0 & 0 & 1 & 0.5 & 0.75 & 0.625 & 0.6875 & 0.65625 & 0.671875 & 0.6640625 & 0.66796875 & 0.666015625 \\ + 0 & 0 & 0 & 1 & 0.5 & 0.75 & 0.625 & 0.6875 & 0.65625 & 0.671875 & 0.6640625 & 0.66796875 \\ + 0 & 0 & 0 & 0 & 1 & 0.5 & 0.75 & 0.625 & 0.6875 & 0.65625 & 0.671875 & 0.6640625 \\ + 0 & 0 & 0 & 0 & 0 & 1 & 0.5 & 0.75 & 0.625 & 0.6875 & 0.65625 & 0.671875 \\ + 0 & 0 & 0 & 0 & 0 & 0 & 1 & 0.5 & 0.75 & 0.625 & 0.6875 & 0.65625 \\ + 0 & 0 & 0 & 0 & 0 & 0 & 0 & 1 & 0.5 & 0.75 & 0.625 & 0.6875 \\ + 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 1 & 0.5 & 0.75 & 0.625 \\ + 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 1 & 0.5 & 0.75 \\ + 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 1 & 0.5 \\ + 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 1 +\end{array}\right] +\end{tiny} +\end{split} +\end{equation} +\end{strip} +Bases on Definition \ref{definition3}, +Boyan chain +is acyclic between non-absorbing states. + +\subsection{A sufficient condition for acyclicity between non-absorbing states} +By observing Boyan chain, +it is easy to provide a sufficient condition for acyclicity between non-absorbing states. +\begin{theorem}[A sufficient condition for acyclicity between non-absorbing states] +\label{judgmentTheorem} +Given a Markov chain with absorbing states, +suppose the size of the non-absorbing states $|S\setminus\{\text{T}\}|\geq 2$. +If the transition matrix $Q$ between non-absorbing states satifies, +\begin{equation} +\forall i,j \in S\setminus\{\text{T}\}, Q_{i,j}=\begin{cases} +\geq 0, & \text{if } i\leq j; \\ + 0, & \text{otherwise.} +\end{cases} +\label{condition} +\end{equation} +Then, the Markov chain is acyclic between non-absorbing states. +\end{theorem} +\begin{IEEEproof} +The $Q$ matrix (\ref{condition}) is an upper triangular matrix. +The product of two upper triangular matrices is still an upper triangular matrix. +Furthermore, the sum of two upper triangular matrices +is still an upper triangular matrix. +Based on Definition \ref{definitionN}, +the $N$ matrix is product and sum of upper triangular matrices. +Then, the $N$ matrix is an upper triangular matrix. +The claim now follows based on Definition \ref{definition3}. +\end{IEEEproof} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/main/background.tex b/main/background.tex index 37527c4..793bf00 100644 --- a/main/background.tex +++ b/main/background.tex @@ -1,6 +1,6 @@ \section{Background} -\subsection{MDP and 2048 game} +\subsection{Ergodicity and Non-ergodicity of Markov Chains} Consider Markov decision process (MDP) $\langle \mathcal{S}$, $\mathcal{A}$, $\mathcal{R}$, $\mathcal{T}$$\rangle$, where $\mathcal{S}=\{1,2,3,\ldots\}$ is a finite state space, $|\mathcal{S}|=n$, $\mathcal{A}$ is an action space, @@ -16,23 +16,6 @@ the MDP under policy $\pi$: $V^{\pi}(s)=\mathbb{E}_{\pi}\left[\sum_{t=0}^{\infty}r_t|s_0=s\right]$. -The 2048 game consists of a 4$\times$4 grid board, totaling 16 squares. - At the beginning of the game, two squares are randomly filled - with tiles of either 2 or 4. - Players can make moves in four directions: \textit{up}, \textit{down}, - \textit{left}, and \textit{right}. - When a player chooses a direction, - all tiles will move in that direction until - they hit the edge or another tile. - If two tiles with the same number are adjacent - in the moving direction, they will merge into - a tile with the sum of the original numbers. - Each tile can only participate in one merge operation per move. - After each move, a new tile appears on a random empty square. - The new tile is 2 with probability 0.1, and 4 with probability 0.9. -The game ends when all squares are filled, and no valid merge operations can be made. - -\subsection{Ergodicity and Non-ergodicity of Markov Chains} Given a steady policy $\pi$, MDP becomes a Markov chain on state space $\mathcal{S}$ with a matrix @@ -79,9 +62,9 @@ reaching the leftmost or rightmost node where it terminates. The terminal states are usually called absorbing states. The transition probobility matrix of random walk with absorbing states -$P_{\text{ab}}$ is defined as follows: +$P^{\text{ab}}$ is defined as follows: \[ -P_{\text{ab}}\dot{=}\begin{array}{c|ccccccc} +P^{\text{ab}}\dot{=}\begin{array}{c|ccccccc} &\text{T} & \text{A} & \text{B} & \text{C} & \text{D} & \text{E} \\\hline \text{T} & 1 & 0 & 0 & 0 & 0 & 0 \\ \text{A} & \frac{1}{2} & 0 & \frac{1}{2} & 0 & 0 & 0 \\ @@ -93,7 +76,7 @@ P_{\text{ab}}\dot{=}\begin{array}{c|ccccccc} \] Note that absorbing states can be combined into one. According to (\ref{invariance}), -the distribution $d_{\text{absorbing}}=\{1$, +the distribution $d^{\text{ab}}=\{1$, $0$, $0$, $0$, $0$, $0$\}. Since the probabilities of A, B, C, D, E are all zeros, random walk with absorbing states is non-ergodic. @@ -105,9 +88,9 @@ the distribution $d_{\text{absorbing}}=\{1$, is random walk with restarts. The transition probobility matrix of random walk with restarts -$P_{\text{restart}}$ is defined as follows: +$P^{\text{restart}}$ is defined as follows: \[ -P_{\text{restart}}\dot{=}\begin{array}{c|ccccccc} +P^{\text{restart}}\dot{=}\begin{array}{c|ccccccc} &\text{T} & \text{A} & \text{B} & \text{C} & \text{D} & \text{E} \\\hline \text{T} & 0 & 0 & 0 & 1 & 0 & 0 \\ \text{A} & \frac{1}{2} & 0 & \frac{1}{2} & 0 & 0 & 0 \\ @@ -119,9 +102,9 @@ P_{\text{restart}}\dot{=}\begin{array}{c|ccccccc} \] According to (\ref{invariance}), -the distribution $d_{\text{restart}}=\{0.1$, +the distribution $d^{\text{restart}}=\{0.1$, $0.1$, $0.2$, $0.3$, $0.2$, $0.1\}$. - Since the probability of T, A, B, C, D, E are non-zeros, + Since the probabilities of T, A, B, C, D, E are non-zeros, random walk with restarts is ergodic. \subsection{Ergodicity between non-absorbing states} @@ -150,6 +133,10 @@ where $Q$ is the matrix of transition probabilities between \label{definitionN} \end{equation} where $I_{n-1}$ is the $(n-1)\times(n-1)$ identity matrix. + $N$ is a reachability matrix. + From state $i$, it is possible to reach state $j$ in an + expected number of steps $N_{ij}$. + $N_{ij}=0$ means that state $i$ is not reachable to state $j$. It is now easy to define whether the non-absorbing states are ergodic. @@ -165,15 +152,15 @@ Assume that $N$ exists for any policy $\pi$ For random walk with absorbing states, \[ -P_{\text{ab}} = +P^{\text{ab}} = \begin{bmatrix} -Q_{\text{ab}} & R_{\text{ab}} \\ -0 & I_{\text{ab}} +Q^{\text{ab}} & R^{\text{ab}} \\ +0 & I^{\text{ab}} \end{bmatrix}, \] where \[ -Q_{\text{ab}}\dot{=}\begin{array}{c|ccccc} +Q^{\text{ab}}\dot{=}\begin{array}{c|ccccc} & \text{A} & \text{B} & \text{C} & \text{D} & \text{E} \\\hline \text{A} & 0 & \frac{1}{2} & 0 & 0 & 0 \\ \text{B} & \frac{1}{2} & 0 & \frac{1}{2} & 0 & 0 \\ @@ -201,7 +188,7 @@ Q_{\text{ab}}\dot{=}\begin{array}{c|ccccc} Then, \[ -N_{\text{ab}}=(I_5-Q_{\text{ab}})^{-1}=\begin{array}{c|ccccc} +N^{\text{ab}}=(I_5-Q^{\text{ab}})^{-1}=\begin{array}{c|ccccc} & \text{A} & \text{B} & \text{C} & \text{D} & \text{E} \\\hline \text{A} & \frac{5}{3} & \frac{4}{3} & 1 & \frac{2}{3} & \frac{1}{3} \\ \text{B} & \frac{4}{3} & \frac{8}{3} & 2 & \frac{4}{3} & \frac{2}{3} \\ diff --git a/main/introduction.tex b/main/introduction.tex index c7329a0..c1e2012 100644 --- a/main/introduction.tex +++ b/main/introduction.tex @@ -85,8 +85,8 @@ To validate the above point, we designed two sets of experiments, In the maze game, the optimal value function is used, with the optimal policy achieving a score of {-54} points. As shown in Figure \ref{fig_sim}, - the x-axis represents $\epsilon$, - the y-axis represents the average score per game, + the x-axis represents exploration parameter $\epsilon$, + the y-axis represents the average score per episode, and the shaded area represents the standard deviation. We can find that in the 2048 game, the total score sharply decreases as $\epsilon$ increases, @@ -110,6 +110,7 @@ The comparison in this set of experiments indicates that chance to return to the previous state. This relates to the game's property of ergodicity. -In this paper, we proved that the game 2048 is non-ergodic. +In this paper, we proved that the game 2048 is acyclic +between non-absorbing states. diff --git a/main/nonergodic.tex b/main/nonergodic.tex deleted file mode 100644 index f54779f..0000000 --- a/main/nonergodic.tex +++ /dev/null @@ -1,161 +0,0 @@ -\section{Non-ergodicity between non-absorbing states} -\begin{definition}[Non-ergodicity between non-absorbing states] -Assume that $N$ exists for any policy $\pi$ - and is independent of initial states. - $\exists i,j \in S\setminus\{\text{T}\}$, - $N_{ij}=0$, MDP is non-ergodic between non-absorbing states. - \label{definition3} -\end{definition} - - - - -\subsection{St. Petersburg paradox} - - - -The St. Petersburg paradox is a paradox associated -with gambling and decision theory. It is named after the city -of St. Petersburg in Russia and was initially introduced - by the mathematician Daniel Bernoulli in 1738. - -The paradox involves a gambling game with the following rules: -\begin{itemize} - \item Participants must pay a fixed entry fee to join the game. - \item The game continues until a coin lands heads up. -Each toss determines the prize, with the first heads - appearing on the $t$-th toss resulting in a prize of $2^t$. -\end{itemize} - - -%\input{pic/FigureParadox} - -The expected return of all possibilities is -\begin{equation} -\begin{split} -\mathbb{E}(x)&=\lim_{n\rightarrow \infty}\sum_{t=1}^n p(x)\times V(x)\\ -&=\lim_{n\rightarrow \infty}\sum_{t=1}^n\frac{1}{2^t} 2^t\\ -&=\infty -\end{split} -\end{equation} - - -Despite the potential for the prize to escalate -significantly, the expected value calculation -in probability theory reveals that the average -participant in this gambling game would end up paying - an infinite fee. This is due to the prize's expected - value being infinite. Even though the probability of - winning is small with each toss, when multiplied, - it leads to an infinitely increasing expected value. - -This paradox challenges individuals' intuitions and -decision-making regarding gambling. Despite the allure -of a potentially substantial prize, the actual expected - value of participating in this gambling game is infinite. - Consequently, in the long run, participants could face - an infinite monetary loss. - -\input{pic/paradox} - -Figure \ref{TruncatedPetersburg} is a truncated version -of the St. Petersburg paradox. The transition probabilities between -non-absorbing states are as follows: -\[ -Q_{\text{st}}\dot{=}\begin{array}{c|ccccc} - & \text{S}_1 & \text{S}_2 & \text{S}_3 & \text{S}_4 & \text{S}_5 \\\hline -\text{S}_1 & 0 & \frac{1}{2} & 0 & 0 & 0 \\ -\text{S}_2 & 0 & 0 & \frac{1}{2} & 0 & 0 \\ -\text{S}_3 & 0 & 0 & 0 & \frac{1}{2} & 0 \\ -\text{S}_4 & 0 & 0 & 0 & 0 & \frac{1}{2} \\ -\text{S}_5 & 0 & 0 & 0 & 0 & 0 -\end{array} -\] -Then, -\[ -N_{\text{st}}=(I_5-Q_{\text{st}})^{-1}=\begin{array}{c|ccccc} -& \text{S}_1 & \text{S}_2 & \text{S}_3 & \text{S}_4 & \text{S}_5 \\\hline -\text{S}_1 & 1 & \frac{1}{2} & \frac{1}{4} & \frac{1}{8} & \frac{1}{16} \\ -\text{S}_2 & 0 & 1 & \frac{1}{2} & \frac{1}{4} & \frac{1}{8} \\ -\text{S}_3 & 0 & 0 & 1 & \frac{1}{2} & \frac{1}{4} \\ -\text{S}_4 & 0 & 0 & 0 & 1 & \frac{1}{2} \\ -\text{S}_5 & 0 & 0 & 0 & 0 & 1 \\ -\end{array} -\] -Bases on Definition \ref{definition3}, -the truncated St. Petersburg paradox -is non-ergodic between non-absorbing states. - -\subsection{A sufficient condition for non-ergodicity between non-absorbing states} -By observing the truncated St. Petersburg paradox, -it is easy to provide a sufficient condition for non-ergodicity between non-absorbing states. -\begin{theorem}[A sufficient condition for non-ergodicity between non-absorbing states] -\label{judgmentTheorem} -Given a Markov chain with absorbing states, -suppose the size of the non-absorbing states $|S\setminus\{\text{T}\}|\geq 2$. -If the transition matrix $Q$ between non-absorbing states satifies, -\begin{equation} -\forall i,j \in S\setminus\{\text{T}\}, Q_{i,j}=\begin{cases} -\geq 0, & \text{if } i\leq j; \\ - 0, & \text{otherwise.} -\end{cases} -\label{condition} -\end{equation} -Then, the Markov chain is non-ergodic between non-absorbing states. -\end{theorem} -\begin{IEEEproof} -The $Q$ matrix (\ref{condition}) is an upper triangular matrix. -The product of two upper triangular matrices is still an upper triangular matrix. -Furthermore, the sum of two upper triangular matrices -is still an upper triangular matrix. -Based on Definition \ref{definitionN}, -the $N$ matrix is product and sum of upper triangular matrices. -Then, the $N$ matrix is an upper triangular matrix. -The claim now follows based on Definition \ref{definition3}. -\end{IEEEproof} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/material/nonergodic.tex b/material/nonergodic.tex new file mode 100644 index 0000000..249a4cc --- /dev/null +++ b/material/nonergodic.tex @@ -0,0 +1,162 @@ +\section{Non-ergodicity between non-absorbing states} +\begin{definition}[Non-ergodicity between non-absorbing states] +Assume that $N$ exists for any policy $\pi$ + and is independent of initial states. + $\exists i,j \in S\setminus\{\text{T}\}$, + $N_{ij}=0$, MDP is non-ergodic between non-absorbing states. + \label{definition3} +\end{definition} + + + + +\subsection{St. Petersburg paradox} + + + +The St. Petersburg paradox is a paradox associated +with gambling and decision theory \cite{peters2019ergodicity}. + It is named after the city +of St. Petersburg in Russia and was initially introduced + by the mathematician Daniel Bernoulli in 1738. + +The paradox involves a gambling game with the following rules: +\begin{itemize} + \item Participants must pay a fixed entry fee to join the game. + \item The game continues until a coin lands heads up. +Each toss determines the prize, with the first heads + appearing on the $t$-th toss resulting in a prize of $2^t$. +\end{itemize} + + +%\input{pic/FigureParadox} + +The expected return of all possibilities is +\begin{equation} +\begin{split} +\mathbb{E}(x)&=\lim_{n\rightarrow \infty}\sum_{t=1}^n p(x)\times V(x)\\ +&=\lim_{n\rightarrow \infty}\sum_{t=1}^n\frac{1}{2^t} 2^t\\ +&=\infty +\end{split} +\end{equation} + + +Despite the potential for the prize to escalate +significantly, the expected value calculation +in probability theory reveals that the average +participant in this gambling game would end up paying + an infinite fee. This is due to the prize's expected + value being infinite. Even though the probability of + winning is small with each toss, when multiplied, + it leads to an infinitely increasing expected value. + +This paradox challenges individuals' intuitions and +decision-making regarding gambling. Despite the allure +of a potentially substantial prize, the actual expected + value of participating in this gambling game is infinite. + Consequently, in the long run, participants could face + an infinite monetary loss. + +\input{pic/paradox} + +Figure \ref{TruncatedPetersburg} is a truncated version +of the St. Petersburg paradox. The transition probabilities between +non-absorbing states are as follows: +\[ +Q_{\text{st}}\dot{=}\begin{array}{c|ccccc} + & \text{S}_1 & \text{S}_2 & \text{S}_3 & \text{S}_4 & \text{S}_5 \\\hline +\text{S}_1 & 0 & \frac{1}{2} & 0 & 0 & 0 \\ +\text{S}_2 & 0 & 0 & \frac{1}{2} & 0 & 0 \\ +\text{S}_3 & 0 & 0 & 0 & \frac{1}{2} & 0 \\ +\text{S}_4 & 0 & 0 & 0 & 0 & \frac{1}{2} \\ +\text{S}_5 & 0 & 0 & 0 & 0 & 0 +\end{array} +\] +Then, +\[ +N_{\text{st}}=(I_5-Q_{\text{st}})^{-1}=\begin{array}{c|ccccc} +& \text{S}_1 & \text{S}_2 & \text{S}_3 & \text{S}_4 & \text{S}_5 \\\hline +\text{S}_1 & 1 & \frac{1}{2} & \frac{1}{4} & \frac{1}{8} & \frac{1}{16} \\ +\text{S}_2 & 0 & 1 & \frac{1}{2} & \frac{1}{4} & \frac{1}{8} \\ +\text{S}_3 & 0 & 0 & 1 & \frac{1}{2} & \frac{1}{4} \\ +\text{S}_4 & 0 & 0 & 0 & 1 & \frac{1}{2} \\ +\text{S}_5 & 0 & 0 & 0 & 0 & 1 \\ +\end{array} +\] +Bases on Definition \ref{definition3}, +the truncated St. Petersburg paradox +is non-ergodic between non-absorbing states. + +\subsection{A sufficient condition for non-ergodicity between non-absorbing states} +By observing the truncated St. Petersburg paradox, +it is easy to provide a sufficient condition for non-ergodicity between non-absorbing states. +\begin{theorem}[A sufficient condition for non-ergodicity between non-absorbing states] +\label{judgmentTheorem} +Given a Markov chain with absorbing states, +suppose the size of the non-absorbing states $|S\setminus\{\text{T}\}|\geq 2$. +If the transition matrix $Q$ between non-absorbing states satifies, +\begin{equation} +\forall i,j \in S\setminus\{\text{T}\}, Q_{i,j}=\begin{cases} +\geq 0, & \text{if } i\leq j; \\ + 0, & \text{otherwise.} +\end{cases} +\label{condition} +\end{equation} +Then, the Markov chain is non-ergodic between non-absorbing states. +\end{theorem} +\begin{IEEEproof} +The $Q$ matrix (\ref{condition}) is an upper triangular matrix. +The product of two upper triangular matrices is still an upper triangular matrix. +Furthermore, the sum of two upper triangular matrices +is still an upper triangular matrix. +Based on Definition \ref{definitionN}, +the $N$ matrix is product and sum of upper triangular matrices. +Then, the $N$ matrix is an upper triangular matrix. +The claim now follows based on Definition \ref{definition3}. +\end{IEEEproof} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pic/boyanchain.tex b/pic/boyanchain.tex new file mode 100644 index 0000000..e1aa076 --- /dev/null +++ b/pic/boyanchain.tex @@ -0,0 +1,51 @@ + +%\usetikzlibrary{positioning, shapes, calc} +\begin{figure}[ht] +% + \begin{center} + \scalebox{0.7} + { + \begin{tikzpicture}[node distance=4mm] + % 定义节点样式 + \tikzset{ + stateWithBorder/.style={draw, circle, minimum width=8mm}, + stateWithoutBorder/.style={ draw=none, minimum width=8mm} + } + + % Nodes + \node[stateWithBorder] (0) {1}; + \node[stateWithBorder, right=of 0] (1) {2 }; + \node[stateWithBorder, right=of 1] (2) {3 }; + \node[stateWithoutBorder, right=of 2] (3) {...}; + \node[stateWithoutBorder, right=of 3,node distance=1mm] (4) {...}; + %\node[stateWithBorder, right=of 4] (5) {9}; + \node[stateWithBorder, right=of 4] (6) {10}; + \node[stateWithBorder, right=of 6] (7) {11}; + \node[stateWithBorder, right=of 7] (8) {12}; + \node[draw, rectangle, fill=gray!50, right=of 8] (DEAD) {T}; + + \node[above=of 0.west, node distance=12mm] {start}; + + \draw[solid,->] (0) -- (1); + \draw[solid,->] (1) -- (2); + \draw[solid,->] (2) -- (3); + \draw[solid,->] (4) -- (6); + %\draw[solid,->] (5) -- (6); + \draw[solid,->] (6) -- (7); + \draw[solid,->] (7) -- (8); + \draw[solid,->] (8) -- (DEAD); + \draw[solid,->] (0.north) to[out=60,in=120] (2.north west); + \draw[solid,->] (1.north) to[out=60,in=120] ([yshift=2mm]3.north); + \draw[solid,->] (2.north) to[out=60,in=120] ([yshift=2mm]4.north); + \draw[solid,->] ([yshift=3mm]4.north west) to[out=60,in=120] (6.north west); + \draw[solid,->] (4.north) to[out=60,in=120] (7.north west); + \draw[solid,->] (6.north) to[out=60,in=120] (8.north west); + \draw[solid,->] (7.north) to[out=60,in=120] (DEAD.north west); + %\draw[solid,->] (8.south west) to[out=240,in=300] (0.south east); + + \end{tikzpicture} + } + \caption{Boyan Chain.} + \label{boyanchain} + \end{center} +\end{figure} \ No newline at end of file diff --git a/references.bib b/references.bib index 57e410f..5ae999a 100644 --- a/references.bib +++ b/references.bib @@ -1,4 +1,14 @@ # encoding:utf-8 +@article{peters2019ergodicity, + title={The ergodicity problem in economics}, + author={Peters, Ole}, + journal={Nature Physics}, + volume={15}, + number={12}, + pages={1216--1221}, + year={2019}, + publisher={Nature Publishing Group} +} @article{pakes1969some, title={Some conditions for ergodicity and recurrence of Markov chains}, author={Pakes, Anthony G}, -- libgit2 0.26.0