From cded433643dca4f78cd30b0655a8468d0bb98ecc Mon Sep 17 00:00:00 2001
From: Lenovo <Lenovo@windows10.microdone.cn>
Date: Fri, 31 May 2024 15:38:26 +0800
Subject: [PATCH] 无环才是本质

---
 document.tex              |   7 ++++---
 main/2048isAcyclic.tex    | 138 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 main/2048isNonergodic.tex |  95 -----------------------------------------------------------------------------------------------
 main/acyclic.tex          | 140 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 main/background.tex       |  47 +++++++++++++++++------------------------------
 main/introduction.tex     |   7 ++++---
 main/nonergodic.tex       | 161 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
 material/nonergodic.tex   | 162 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 pic/boyanchain.tex        |  51 +++++++++++++++++++++++++++++++++++++++++++++++++++
 references.bib            |  10 ++++++++++
 10 files changed, 526 insertions(+), 292 deletions(-)
 create mode 100644 main/2048isAcyclic.tex
 delete mode 100644 main/2048isNonergodic.tex
 create mode 100644 main/acyclic.tex
 delete mode 100644 main/nonergodic.tex
 create mode 100644 material/nonergodic.tex
 create mode 100644 pic/boyanchain.tex

diff --git a/document.tex b/document.tex
index ebf07ad..71d3fcb 100644
--- a/document.tex
+++ b/document.tex
@@ -26,12 +26,13 @@
 \usetikzlibrary{automata, positioning}
 \usetikzlibrary{positioning}
 \usetikzlibrary{decorations.markings}
+\usepackage{cuted}
 \hyphenation{op-tical net-works semi-conduc-tor IEEE-Xplore}
 % updated with editorial comments 8/9/2021
 \newcommand{\highlight}[1]{\textcolor{red}{#1}}
 \begin{document}
 
-\title{Non-ergodicity of Game 2048}
+\title{Acyclicity of Game 2048}
 
 \author{Xingguo Chen,  Xinwen Li, Shangdong Yang, and Wenhao Wang
 \thanks{Manuscript received XXXX; revised XXXX; accepted XXXX.
@@ -74,8 +75,8 @@ wangwenhao11@nudt.edu.cn).
 
 \input{main/introduction}
 \input{main/background}
-\input{main/nonergodic}
-\input{main/2048isNonergodic}
+\input{main/acyclic}
+\input{main/2048isAcyclic}
 
 %\input{main/nonergodicity}
 %\input{main/paradox}
diff --git a/main/2048isAcyclic.tex b/main/2048isAcyclic.tex
new file mode 100644
index 0000000..6bc127f
--- /dev/null
+++ b/main/2048isAcyclic.tex
@@ -0,0 +1,138 @@
+\section{Non-ergodicity of the 2048 game}
+
+
+The purpose of this section is to prove the non-ergodicity of the 2048 game
+and give some discussions.
+
+\subsection{Non-ergodicity of the 2048 game}
+
+The 2048 game consists of a 4$\times$4 grid board, totaling 16 squares.
+ At the beginning of the game,  two squares are randomly filled
+  with tiles of either 2 or 4.
+  Players can make moves in four directions: \textit{up}, \textit{down},
+   \textit{left}, and \textit{right}. 
+   When a player chooses a direction, 
+   all tiles will move in that direction until 
+   they hit the edge or another tile. 
+   If two tiles with the same number are adjacent 
+   in the moving direction, they will merge into 
+   a tile with the sum of the original numbers.
+   Each tile can only participate in one merge operation per move.
+   After each move, a new tile appears on a random empty square.
+   The new tile is 2 with  probability 0.1, and 4 with probability 0.9.
+The game ends when all squares are filled, and no valid merge operations can be made. 
+
+
+\begin{theorem}
+2048 game is non-ergodic between non-absorbing states.
+\end{theorem}
+\begin{IEEEproof}
+ To apply Theorem \ref{judgmentTheorem}, what we need 
+ to do is to assign a countable value to the 2048 game board 
+ and demonstrate the properties of the 
+ state transition probabilities in the 2048 game.
+
+
+In the 2048 game, each tile has 16 potential values,
+ including empty and $2^k$, $k\in\{1,2,3,\ldots,15\}$.
+Using 4 bits to represent a tile, the game board is a 4$\times$4 matrix 
+$B$. The corresponding tile is then computed as follows:
+\begin{equation}
+1\leq m\text{, }n \leq 4\text{, }tile_{m,n} =
+\begin{cases}
+0, & \text{if } B_{mn}=0; \\
+ 2^{B_{mn}}, & \text{otherwise.}  
+\end{cases}
+\label{equationTile}
+\end{equation}
+The sum of all tiles in the game board is
+\begin{equation}
+sum(B) = \sum_{m=1}^4\sum_{n=1}^4 tile_{mn}.
+\end{equation}
+A 64-bit long integer can uniquely represent any game board state.
+\begin{equation}
+long(B)= \sum_{m=1}^4\sum_{n=1}^416^{(m-1)*4+(n-1)}\cdot B_{mn}.
+\end{equation}
+We have 
+\begin{equation}
+long(B)<2^{64}.
+\label{size}
+\end{equation}
+The size of the board space $\mathcal{B}$ is
+$|\mathcal{B}|=2^{64}$.
+Define a utility function on board,
+\begin{equation}
+u(B) = 2^{64}\cdot sum(B)+long(B).
+\label{utility}
+\end{equation}
+It is easy to verify that 
+$\forall B_1, B_2\in \mathcal{B}$,
+if $B_1\neq B_2$, then $u(B_1)\neq u(B_2)$.
+For all possible board,
+ $\forall B\in \mathcal{B}$, calculate the utility value
+ $u(B) $, and sort $B$ by $u(B) $ in ascending order.
+ Let $I(B)$ be the index of the board $B$ after sorting,
+ we have
+ \begin{equation}
+ \forall B_1, B_2\in \mathcal{B}, u(B_1)<u(B_2) \iff
+ I(B_1)<I(B_2).
+ \label{basis}
+ \end{equation}
+For any transition $\langle B_1, a, B_1', B_2\rangle$ in the 2048 game,
+we have 
+$sum(B_1)=sum(B_1')$  regardless of whether at least two tiles merge.
+\highlight{需要昕闻画2048的状态迁移图，同一个状态包括两种后续情况，
+一种是发生合并，一种是没发生合并的}
+
+
+Due to a new generated 2-tile or 4-tile in board $B_2$,
+$sum(B_2)>sum(B_1')$, that is $sum(B_2)>sum(B_1)$.
+
+Based on (\ref{size}) and (\ref{utility}),
+we have  $u(B_2)>u(B_1)$.
+That means $I(B_2)>I(B_1)$.
+The transition probability between non-absorbing state satisifies (\ref{condition}),
+the claim follows by applying Theorem \ref{judgmentTheorem}.
+\end{IEEEproof}
+
+%\input{material/2048prove}
+
+\subsection{Discussions}
+
+行为策略采样
+$\langle s_t,a_t,r_{t+1},a_{t+1},s_{t+1} \rangle$，对应的特征
+$\langle \phi_t,r_{t+1},\phi_{t+1} \rangle$ 
+
+目标策略采样
+$\langle s_t,a_t,r_{t+1},a',s_{t+1} \rangle$，对应的特征
+$\langle \phi_t,r_{t+1},\phi' \rangle$ 
+
+\begin{equation}
+\theta_{t+1}=\theta_t+\alpha F_t (\rho_tR_t+\gamma \theta_t^{\top}\phi_t'-\theta_t^{\top}\phi_t-\mathbb{E}_{\pi}[\delta])\phi_t
+\end{equation}
+写的简单点是这样
+
+\begin{equation}
+\theta_{t+1}=\theta_t+\alpha F_t \rho_t(\delta_t-\mathbb{E}_{\mu}[\rho_t\delta_t])\phi_t,
+\end{equation}
+where
+$\delta_t=R_t+\gamma \theta_t^{\top}\phi_{t+1}-\theta_t^{\top}\phi_t$
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/main/2048isNonergodic.tex b/main/2048isNonergodic.tex
deleted file mode 100644
index a555247..0000000
--- a/main/2048isNonergodic.tex
+++ /dev/null
@@ -1,95 +0,0 @@
-\section{Non-ergodicity of 2048}
-
-
-The purpose of this section is to prove the non-ergodicity of the 2048 game.
-
-\begin{theorem}
-2048 game is non-ergodic between non-absorbing states.
-\end{theorem}
-\begin{IEEEproof}
- To apply Theorem \ref{judgmentTheorem}, what we need 
- to do is to assign a countable value to the 2048 game board 
- and demonstrate the properties of the 
- state transition probabilities in the 2048 game.
-
-
-In the 2048 game, each tile has 16 potential values,
- including empty and $2^k$, $k\in\{1,2,3,\ldots,15\}$.
-Using 4 bits to represent a tile, the game board is a 4$\times$4 matrix 
-$B$. The corresponding tile is then computed as follows:
-\begin{equation}
-1\leq m\text{, }n \leq 4\text{, }tile_{m,n} =
-\begin{cases}
-0, & \text{if } B_{mn}=0; \\
- 2^{B_{mn}}, & \text{otherwise.}  
-\end{cases}
-\label{equationTile}
-\end{equation}
-The sum of all tiles in the game board is
-\begin{equation}
-sum(B) = \sum_{m=1}^4\sum_{n=1}^4 tile_{mn}.
-\end{equation}
-A 64-bit long integer can uniquely represent any game board state.
-\begin{equation}
-long(B)= \sum_{m=1}^4\sum_{n=1}^416^{(m-1)*4+(n-1)}\cdot B_{mn}.
-\end{equation}
-We have 
-\begin{equation}
-long(B)<2^{64}.
-\label{size}
-\end{equation}
-The size of the board space $\mathcal{B}$ is
-$|\mathcal{B}|=2^{64}$.
-Define a utility function on board,
-\begin{equation}
-u(B) = 2^{64}\cdot sum(B)+long(B).
-\label{utility}
-\end{equation}
-It is easy to verify that 
-$\forall B_1, B_2\in \mathcal{B}$,
-if $B_1\neq B_2$, then $u(B_1)\neq u(B_2)$.
-For all possible board,
- $\forall B\in \mathcal{B}$, calculate the utility value
- $u(B) $, and sort $B$ by $u(B) $ in ascending order.
- Let $I(B)$ be the index of the board $B$ after sorting,
- we have
- \begin{equation}
- \forall B_1, B_2\in \mathcal{B}, u(B_1)<u(B_2) \iff
- I(B_1)<I(B_2).
- \label{basis}
- \end{equation}
-For any transition $\langle B_1, a, B_1', B_2\rangle$ in the 2048 game,
-we have 
-$sum(B_1)=sum(B_1')$  regardless of whether at least two tiles merge.
-
-Due to a new generated 2-tile or 4-tile in board $B_2$,
-$sum(B_2)>sum(B_1')$, that is $sum(B_2)>sum(B_1)$.
-
-Based on (\ref{size}) and (\ref{utility}),
-we have  $u(B_2)>u(B_1)$.
-That means $I(B_2)>I(B_1)$.
-The transition probability between non-absorbing state satisifies (\ref{condition}),
-the claim follows by applying Theorem \ref{judgmentTheorem}.
-\end{IEEEproof}
-
-%\input{material/2048prove}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/main/acyclic.tex b/main/acyclic.tex
new file mode 100644
index 0000000..e4a2d5c
--- /dev/null
+++ b/main/acyclic.tex
@@ -0,0 +1,140 @@
+\section{Acyclicity between non-absorbing states}
+\begin{definition}[Acyclicity between non-absorbing states]
+Assume that $N$  exists for any policy $\pi$
+ and is independent of initial states.
+ $\forall i,j \in S\setminus\{\text{T}\}$,
+ if $N_{ij}>0$, then $N_{ji}=0$,
+  MDP is acyclic between non-absorbing states.
+  \label{definition3}
+\end{definition}
+
+
+
+
+\subsection{Boyan chain}
+
+
+
+
+
+\input{pic/boyanchain}
+
+Figure \ref{boyanchain} shows Boyan chain.
+ The transition probabilities between
+non-absorbing states are as follows:
+\[
+Q_{\text{bo}}\dot{=}\begin{tiny}\left[ \begin{array}{cccccccccccc}
+ 0 & 0.5 &  0.5 & 0 &  0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 \\
+ 0 & 0 &  0.5 &  0.5 &  0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 \\
+ 0 & 0 &  0 & 0.5 &  0.5 & 0 &  0 & 0 &  0 & 0 &  0 & 0 \\
+ 0 & 0 &  0 & 0 &  0.5 &  0.5 &  0 & 0 &  0 & 0 &  0 & 0 \\
+ 0 & 0 &  0 & 0 &  0 & 0.5 &  0.5 & 0 &  0 & 0 &  0 & 0 \\
+ 0 & 0 &  0 & 0 &  0 & 0 &  0.5 &  0.5 &  0 & 0 &  0 & 0 \\
+ 0 & 0 &  0 & 0 &  0 & 0 &  0 & 0.5 &  0.5 & 0 &  0 & 0 \\
+ 0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 &  0.5 &  0.5 &  0 & 0 \\
+ 0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 &  0 & 0.5 &  0.5 & 0 \\
+ 0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 &  0.5 &  0.5 \\
+ 0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 &  0 & 0.5 \\
+ 0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 
+\end{array}\right] \end{tiny}
+\]
+Then,
+\begin{strip}
+\begin{equation}
+\begin{split}
+N_{\text{bo}}=&(I_{12}-Q_{\text{bo}})^{-1}\\
+=&\begin{tiny}\left[\begin{array}{cccccccccccc}
+1 & 0.5 &  0.75 & 0.625 &  0.6875 & 0.65625 &  0.671875 & 0.6640625 &  0.66796875 & 0.666015625 &  0.6669921875 & 0.66650390625 \\
+ 0 & 1 &  0.5 &  0.75 &  0.625 & 0.6875 &  0.65625 & 0.671875 &  0.6640625 & 0.66796875 &  0.666015625 & 0.6669921875 \\
+ 0 & 0 &  1 & 0.5 &  0.75 & 0.625 &  0.6875 & 0.65625 &  0.671875 & 0.6640625 &  0.66796875 & 0.666015625 \\
+ 0 & 0 &  0 & 1 &  0.5 &  0.75 &  0.625 & 0.6875 &  0.65625 & 0.671875 &  0.6640625 & 0.66796875 \\
+ 0 & 0 &  0 & 0 &  1 & 0.5 &  0.75 & 0.625 &  0.6875 & 0.65625 &  0.671875 & 0.6640625 \\
+ 0 & 0 &  0 & 0 &  0 & 1 &  0.5 &  0.75 &  0.625 & 0.6875 &  0.65625 & 0.671875 \\
+ 0 & 0 &  0 & 0 &  0 & 0 &  1 & 0.5 &  0.75 & 0.625 &  0.6875 & 0.65625 \\
+ 0 & 0 &  0 & 0 &  0 & 0 &  0 & 1 &  0.5 &  0.75 &  0.625 & 0.6875 \\
+ 0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 &  1 & 0.5 &  0.75 & 0.625 \\
+ 0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 &  0 & 1 &  0.5 &  0.75 \\
+ 0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 & 1 & 0.5 \\
+ 0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 &  0 & 1  
+\end{array}\right]
+\end{tiny}
+\end{split}
+\end{equation}
+\end{strip}
+Bases on Definition \ref{definition3}, 
+Boyan chain 
+is acyclic between non-absorbing states.
+
+\subsection{A sufficient condition for acyclicity between non-absorbing states}
+By observing Boyan chain, 
+it is easy to provide a sufficient condition for acyclicity between non-absorbing states.
+\begin{theorem}[A sufficient condition for acyclicity between non-absorbing states]
+\label{judgmentTheorem}
+Given a Markov chain with absorbing states, 
+suppose the size of the non-absorbing states $|S\setminus\{\text{T}\}|\geq 2$.
+If the transition matrix $Q$ between non-absorbing states satifies,
+\begin{equation}
+\forall i,j \in S\setminus\{\text{T}\}, Q_{i,j}=\begin{cases}
+\geq 0, & \text{if } i\leq j; \\
+ 0, & \text{otherwise.}  
+\end{cases}
+\label{condition}
+\end{equation}
+Then, the Markov chain is acyclic between non-absorbing states.
+\end{theorem}
+\begin{IEEEproof}
+The $Q$ matrix (\ref{condition}) is an upper triangular matrix.
+The product of two upper triangular matrices is still an upper triangular matrix.
+Furthermore, the sum of two upper triangular matrices 
+is still an upper triangular matrix.
+Based on Definition \ref{definitionN}, 
+the $N$ matrix is product and sum of upper triangular matrices.
+Then, the $N$ matrix is an upper triangular matrix.
+The claim now follows based on Definition \ref{definition3}.
+\end{IEEEproof}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/main/background.tex b/main/background.tex
index 37527c4..793bf00 100644
--- a/main/background.tex
+++ b/main/background.tex
@@ -1,6 +1,6 @@
 \section{Background}
   
-\subsection{MDP and 2048 game}
+\subsection{Ergodicity and Non-ergodicity of Markov Chains}
 Consider Markov decision process (MDP)
 $\langle \mathcal{S}$, $\mathcal{A}$, $\mathcal{R}$, $\mathcal{T}$$\rangle$, where 
 $\mathcal{S}=\{1,2,3,\ldots\}$ is a finite state space, $|\mathcal{S}|=n$, $\mathcal{A}$ is an action space,
@@ -16,23 +16,6 @@ the MDP under policy $\pi$:
 $V^{\pi}(s)=\mathbb{E}_{\pi}\left[\sum_{t=0}^{\infty}r_t|s_0=s\right]$.
 
 
-The 2048 game consists of a 4$\times$4 grid board, totaling 16 squares.
- At the beginning of the game,  two squares are randomly filled
-  with tiles of either 2 or 4.
-  Players can make moves in four directions: \textit{up}, \textit{down},
-   \textit{left}, and \textit{right}. 
-   When a player chooses a direction, 
-   all tiles will move in that direction until 
-   they hit the edge or another tile. 
-   If two tiles with the same number are adjacent 
-   in the moving direction, they will merge into 
-   a tile with the sum of the original numbers.
-   Each tile can only participate in one merge operation per move.
-   After each move, a new tile appears on a random empty square.
-   The new tile is 2 with  probability 0.1, and 4 with probability 0.9.
-The game ends when all squares are filled, and no valid merge operations can be made. 
-
-\subsection{Ergodicity and Non-ergodicity of Markov Chains}
 
 Given a steady policy $\pi$, MDP becomes a Markov chain on state space
 $\mathcal{S}$ with a   matrix
@@ -79,9 +62,9 @@ reaching the leftmost or rightmost node where it terminates.
 The terminal states are usually called absorbing states.
 The transition probobility matrix
 of random walk with absorbing states 
-$P_{\text{ab}}$ is defined as follows:
+$P^{\text{ab}}$ is defined as follows:
 \[
-P_{\text{ab}}\dot{=}\begin{array}{c|ccccccc}
+P^{\text{ab}}\dot{=}\begin{array}{c|ccccccc}
 &\text{T} & \text{A} & \text{B} & \text{C} & \text{D} & \text{E}  \\\hline
 \text{T} & 1 & 0 & 0 & 0 & 0 & 0 \\
 \text{A} & \frac{1}{2} & 0 & \frac{1}{2} & 0 & 0 & 0 \\
@@ -93,7 +76,7 @@ P_{\text{ab}}\dot{=}\begin{array}{c|ccccccc}
 \]
 Note that absorbing states can be combined into one.
 According to (\ref{invariance}),
-the distribution $d_{\text{absorbing}}=\{1$,
+the distribution $d^{\text{ab}}=\{1$,
  $0$, $0$, $0$, $0$, $0$\}.
  Since the probabilities of A, B, C, D, E are all zeros,
  random walk with absorbing states is non-ergodic.
@@ -105,9 +88,9 @@ the distribution $d_{\text{absorbing}}=\{1$,
  is random walk with restarts.
  The transition probobility matrix
 of random walk with restarts 
-$P_{\text{restart}}$ is defined as follows:
+$P^{\text{restart}}$ is defined as follows:
 \[
-P_{\text{restart}}\dot{=}\begin{array}{c|ccccccc}
+P^{\text{restart}}\dot{=}\begin{array}{c|ccccccc}
 &\text{T} & \text{A} & \text{B} & \text{C} & \text{D} & \text{E} \\\hline
 \text{T} & 0 & 0 & 0 & 1 & 0 & 0 \\
 \text{A} & \frac{1}{2} & 0 & \frac{1}{2} & 0 & 0 & 0 \\
@@ -119,9 +102,9 @@ P_{\text{restart}}\dot{=}\begin{array}{c|ccccccc}
 \]
  
 According to (\ref{invariance}),
-the distribution $d_{\text{restart}}=\{0.1$,
+the distribution $d^{\text{restart}}=\{0.1$,
  $0.1$, $0.2$, $0.3$, $0.2$, $0.1\}$.
- Since the probability of T, A, B, C, D, E are non-zeros,
+ Since the probabilities of T, A, B, C, D, E are non-zeros,
  random walk with restarts is ergodic.
 
 \subsection{Ergodicity  between non-absorbing states}
@@ -150,6 +133,10 @@ where $Q$ is the matrix of transition probabilities between
   \label{definitionN}
   \end{equation}
   where $I_{n-1}$ is the $(n-1)\times(n-1)$ identity matrix.
+  $N$ is a reachability matrix. 
+  From state $i$, it is possible to reach state $j$ in an 
+  expected number of steps $N_{ij}$.
+ $N_{ij}=0$ means that state $i$ is not reachable to state $j$.
 It is now easy to define whether the non-absorbing states 
 are ergodic.
  
@@ -165,15 +152,15 @@ Assume that $N$  exists for any policy $\pi$
 
 For random walk with absorbing states,
 \[
-P_{\text{ab}} =
+P^{\text{ab}} =
 \begin{bmatrix}
-Q_{\text{ab}} & R_{\text{ab}} \\
-0 & I_{\text{ab}}
+Q^{\text{ab}} & R^{\text{ab}} \\
+0 & I^{\text{ab}}
 \end{bmatrix},
 \]
 where
 \[
-Q_{\text{ab}}\dot{=}\begin{array}{c|ccccc}
+Q^{\text{ab}}\dot{=}\begin{array}{c|ccccc}
  & \text{A} & \text{B} & \text{C} & \text{D} & \text{E}  \\\hline
 \text{A}  & 0 & \frac{1}{2} & 0 & 0 & 0 \\
 \text{B} & \frac{1}{2} & 0 & \frac{1}{2} & 0 & 0 \\
@@ -201,7 +188,7 @@ Q_{\text{ab}}\dot{=}\begin{array}{c|ccccc}
 
 Then,
 \[
-N_{\text{ab}}=(I_5-Q_{\text{ab}})^{-1}=\begin{array}{c|ccccc}
+N^{\text{ab}}=(I_5-Q^{\text{ab}})^{-1}=\begin{array}{c|ccccc}
  & \text{A} & \text{B} & \text{C} & \text{D} & \text{E}  \\\hline
 \text{A}  & \frac{5}{3} & \frac{4}{3} & 1 & \frac{2}{3} & \frac{1}{3} \\
 \text{B} & \frac{4}{3} & \frac{8}{3} & 2 & \frac{4}{3} & \frac{2}{3} \\
diff --git a/main/introduction.tex b/main/introduction.tex
index c7329a0..c1e2012 100644
--- a/main/introduction.tex
+++ b/main/introduction.tex
@@ -85,8 +85,8 @@ To validate the above point, we designed two sets of experiments,
   In the maze game, the optimal value function is used, 
   with the optimal policy achieving a score of {-54} points.
   As shown in Figure \ref{fig_sim}, 
-  the x-axis represents $\epsilon$, 
-  the y-axis represents the average score per game,
+  the x-axis represents exploration parameter $\epsilon$, 
+  the y-axis represents the average score per episode,
    and the shaded area represents the standard deviation.
   We can find that in the 2048 game, the total score
    sharply decreases as $\epsilon$ increases,
@@ -110,6 +110,7 @@ The comparison in this set of experiments indicates that
     chance to return to the previous state. 
     This relates to the game's property of ergodicity.
 
-In this paper, we proved that the game 2048 is non-ergodic.
+In this paper, we proved that the game 2048 is acyclic
+between non-absorbing states.
 
 
diff --git a/main/nonergodic.tex b/main/nonergodic.tex
deleted file mode 100644
index f54779f..0000000
--- a/main/nonergodic.tex
+++ /dev/null
@@ -1,161 +0,0 @@
-\section{Non-ergodicity between non-absorbing states}
-\begin{definition}[Non-ergodicity between non-absorbing states]
-Assume that $N$  exists for any policy $\pi$
- and is independent of initial states.
- $\exists i,j \in S\setminus\{\text{T}\}$, 
- $N_{ij}=0$, MDP is non-ergodic between non-absorbing states.
-  \label{definition3}
-\end{definition}
-
-
-
-
-\subsection{St. Petersburg paradox}
-
-
-
-The St. Petersburg paradox is a paradox associated 
-with gambling and decision theory. It is named after the city 
-of St. Petersburg in Russia and was initially introduced
- by the mathematician Daniel Bernoulli in 1738.
-
-The paradox involves a gambling game with the following rules:
-\begin{itemize}
-  \item Participants must pay a fixed entry fee to join the game.
-  \item The game continues until a coin lands heads up. 
-Each toss determines the prize, with the first heads
- appearing on the $t$-th toss resulting in a prize of $2^t$.
-\end{itemize}
-
-
-%\input{pic/FigureParadox}
-
-The expected return of all possibilities is
-\begin{equation}
-\begin{split}
-\mathbb{E}(x)&=\lim_{n\rightarrow \infty}\sum_{t=1}^n p(x)\times V(x)\\
-&=\lim_{n\rightarrow \infty}\sum_{t=1}^n\frac{1}{2^t} 2^t\\
-&=\infty 
-\end{split}
-\end{equation}
-
-
-Despite the potential for the prize to escalate 
-significantly, the expected value calculation 
-in probability theory reveals that the average 
-participant in this gambling game would end up paying
- an infinite fee. This is due to the prize's expected 
- value being infinite. Even though the probability of
-  winning is small with each toss, when multiplied,
-   it leads to an infinitely increasing expected value.
-
-This paradox challenges individuals' intuitions and 
-decision-making regarding gambling. Despite the allure 
-of a potentially substantial prize, the actual expected
- value of participating in this gambling game is infinite.
-  Consequently, in the long run, participants could face
-   an infinite monetary loss.
-
-\input{pic/paradox}
-
-Figure \ref{TruncatedPetersburg} is a truncated version
-of the St. Petersburg paradox. The transition probabilities between
-non-absorbing states are as follows:
-\[
-Q_{\text{st}}\dot{=}\begin{array}{c|ccccc}
- & \text{S}_1 & \text{S}_2 & \text{S}_3 & \text{S}_4 & \text{S}_5  \\\hline
-\text{S}_1  & 0 & \frac{1}{2} & 0 & 0 & 0 \\
-\text{S}_2 & 0 & 0 & \frac{1}{2} & 0 & 0 \\
-\text{S}_3 & 0 & 0 & 0 & \frac{1}{2} & 0 \\
-\text{S}_4  & 0 & 0 & 0 & 0 & \frac{1}{2}  \\
-\text{S}_5 & 0 & 0 & 0 & 0 & 0   
-\end{array}
-\]
-Then,
-\[
-N_{\text{st}}=(I_5-Q_{\text{st}})^{-1}=\begin{array}{c|ccccc}
-& \text{S}_1 & \text{S}_2 & \text{S}_3 & \text{S}_4 & \text{S}_5  \\\hline
-\text{S}_1 & 1 & \frac{1}{2} & \frac{1}{4} & \frac{1}{8} & \frac{1}{16} \\
-\text{S}_2 & 0 & 1 & \frac{1}{2} & \frac{1}{4} & \frac{1}{8} \\
-\text{S}_3 & 0 & 0 & 1 & \frac{1}{2} & \frac{1}{4} \\
-\text{S}_4 & 0 & 0 & 0 & 1 & \frac{1}{2} \\
-\text{S}_5  & 0 & 0 & 0 & 0 & 1 \\  
-\end{array}
-\]
-Bases on Definition \ref{definition3}, 
-the truncated  St. Petersburg paradox
-is non-ergodic between non-absorbing states.
-
-\subsection{A sufficient condition for non-ergodicity between non-absorbing states}
-By observing the truncated  St. Petersburg paradox, 
-it is easy to provide a sufficient condition for non-ergodicity between non-absorbing states.
-\begin{theorem}[A sufficient condition for non-ergodicity between non-absorbing states]
-\label{judgmentTheorem}
-Given a Markov chain with absorbing states, 
-suppose the size of the non-absorbing states $|S\setminus\{\text{T}\}|\geq 2$.
-If the transition matrix $Q$ between non-absorbing states satifies,
-\begin{equation}
-\forall i,j \in S\setminus\{\text{T}\}, Q_{i,j}=\begin{cases}
-\geq 0, & \text{if } i\leq j; \\
- 0, & \text{otherwise.}  
-\end{cases}
-\label{condition}
-\end{equation}
-Then, the Markov chain is non-ergodic between non-absorbing states.
-\end{theorem}
-\begin{IEEEproof}
-The $Q$ matrix (\ref{condition}) is an upper triangular matrix.
-The product of two upper triangular matrices is still an upper triangular matrix.
-Furthermore, the sum of two upper triangular matrices 
-is still an upper triangular matrix.
-Based on Definition \ref{definitionN}, 
-the $N$ matrix is product and sum of upper triangular matrices.
-Then, the $N$ matrix is an upper triangular matrix.
-The claim now follows based on Definition \ref{definition3}.
-\end{IEEEproof}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/material/nonergodic.tex b/material/nonergodic.tex
new file mode 100644
index 0000000..249a4cc
--- /dev/null
+++ b/material/nonergodic.tex
@@ -0,0 +1,162 @@
+\section{Non-ergodicity between non-absorbing states}
+\begin{definition}[Non-ergodicity between non-absorbing states]
+Assume that $N$  exists for any policy $\pi$
+ and is independent of initial states.
+ $\exists i,j \in S\setminus\{\text{T}\}$, 
+ $N_{ij}=0$, MDP is non-ergodic between non-absorbing states.
+  \label{definition3}
+\end{definition}
+
+
+
+
+\subsection{St. Petersburg paradox}
+
+
+
+The St. Petersburg paradox is a paradox associated 
+with gambling and decision theory \cite{peters2019ergodicity}.
+ It is named after the city 
+of St. Petersburg in Russia and was initially introduced
+ by the mathematician Daniel Bernoulli in 1738.
+
+The paradox involves a gambling game with the following rules:
+\begin{itemize}
+  \item Participants must pay a fixed entry fee to join the game.
+  \item The game continues until a coin lands heads up. 
+Each toss determines the prize, with the first heads
+ appearing on the $t$-th toss resulting in a prize of $2^t$.
+\end{itemize}
+
+
+%\input{pic/FigureParadox}
+
+The expected return of all possibilities is
+\begin{equation}
+\begin{split}
+\mathbb{E}(x)&=\lim_{n\rightarrow \infty}\sum_{t=1}^n p(x)\times V(x)\\
+&=\lim_{n\rightarrow \infty}\sum_{t=1}^n\frac{1}{2^t} 2^t\\
+&=\infty 
+\end{split}
+\end{equation}
+
+
+Despite the potential for the prize to escalate 
+significantly, the expected value calculation 
+in probability theory reveals that the average 
+participant in this gambling game would end up paying
+ an infinite fee. This is due to the prize's expected 
+ value being infinite. Even though the probability of
+  winning is small with each toss, when multiplied,
+   it leads to an infinitely increasing expected value.
+
+This paradox challenges individuals' intuitions and 
+decision-making regarding gambling. Despite the allure 
+of a potentially substantial prize, the actual expected
+ value of participating in this gambling game is infinite.
+  Consequently, in the long run, participants could face
+   an infinite monetary loss.
+
+\input{pic/paradox}
+
+Figure \ref{TruncatedPetersburg} is a truncated version
+of the St. Petersburg paradox. The transition probabilities between
+non-absorbing states are as follows:
+\[
+Q_{\text{st}}\dot{=}\begin{array}{c|ccccc}
+ & \text{S}_1 & \text{S}_2 & \text{S}_3 & \text{S}_4 & \text{S}_5  \\\hline
+\text{S}_1  & 0 & \frac{1}{2} & 0 & 0 & 0 \\
+\text{S}_2 & 0 & 0 & \frac{1}{2} & 0 & 0 \\
+\text{S}_3 & 0 & 0 & 0 & \frac{1}{2} & 0 \\
+\text{S}_4  & 0 & 0 & 0 & 0 & \frac{1}{2}  \\
+\text{S}_5 & 0 & 0 & 0 & 0 & 0   
+\end{array}
+\]
+Then,
+\[
+N_{\text{st}}=(I_5-Q_{\text{st}})^{-1}=\begin{array}{c|ccccc}
+& \text{S}_1 & \text{S}_2 & \text{S}_3 & \text{S}_4 & \text{S}_5  \\\hline
+\text{S}_1 & 1 & \frac{1}{2} & \frac{1}{4} & \frac{1}{8} & \frac{1}{16} \\
+\text{S}_2 & 0 & 1 & \frac{1}{2} & \frac{1}{4} & \frac{1}{8} \\
+\text{S}_3 & 0 & 0 & 1 & \frac{1}{2} & \frac{1}{4} \\
+\text{S}_4 & 0 & 0 & 0 & 1 & \frac{1}{2} \\
+\text{S}_5  & 0 & 0 & 0 & 0 & 1 \\  
+\end{array}
+\]
+Bases on Definition \ref{definition3}, 
+the truncated  St. Petersburg paradox
+is non-ergodic between non-absorbing states.
+
+\subsection{A sufficient condition for non-ergodicity between non-absorbing states}
+By observing the truncated  St. Petersburg paradox, 
+it is easy to provide a sufficient condition for non-ergodicity between non-absorbing states.
+\begin{theorem}[A sufficient condition for non-ergodicity between non-absorbing states]
+\label{judgmentTheorem}
+Given a Markov chain with absorbing states, 
+suppose the size of the non-absorbing states $|S\setminus\{\text{T}\}|\geq 2$.
+If the transition matrix $Q$ between non-absorbing states satifies,
+\begin{equation}
+\forall i,j \in S\setminus\{\text{T}\}, Q_{i,j}=\begin{cases}
+\geq 0, & \text{if } i\leq j; \\
+ 0, & \text{otherwise.}  
+\end{cases}
+\label{condition}
+\end{equation}
+Then, the Markov chain is non-ergodic between non-absorbing states.
+\end{theorem}
+\begin{IEEEproof}
+The $Q$ matrix (\ref{condition}) is an upper triangular matrix.
+The product of two upper triangular matrices is still an upper triangular matrix.
+Furthermore, the sum of two upper triangular matrices 
+is still an upper triangular matrix.
+Based on Definition \ref{definitionN}, 
+the $N$ matrix is product and sum of upper triangular matrices.
+Then, the $N$ matrix is an upper triangular matrix.
+The claim now follows based on Definition \ref{definition3}.
+\end{IEEEproof}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/pic/boyanchain.tex b/pic/boyanchain.tex
new file mode 100644
index 0000000..e1aa076
--- /dev/null
+++ b/pic/boyanchain.tex
@@ -0,0 +1,51 @@
+
+%\usetikzlibrary{positioning, shapes, calc}
+\begin{figure}[ht]
+%
+    \begin{center}
+    \scalebox{0.7}
+	{
+        \begin{tikzpicture}[node distance=4mm]
+            % 定义节点样式
+            \tikzset{
+                stateWithBorder/.style={draw, circle, minimum width=8mm},
+                stateWithoutBorder/.style={ draw=none, minimum width=8mm}
+            }
+        
+            % Nodes
+            \node[stateWithBorder] (0) {1};
+            \node[stateWithBorder, right=of 0] (1) {2 };
+            \node[stateWithBorder, right=of 1] (2) {3 };
+            \node[stateWithoutBorder, right=of 2] (3) {...};
+            \node[stateWithoutBorder, right=of 3,node distance=1mm] (4) {...};
+            %\node[stateWithBorder, right=of 4] (5) {9};
+            \node[stateWithBorder, right=of 4] (6) {10};
+            \node[stateWithBorder, right=of 6] (7) {11};
+            \node[stateWithBorder, right=of 7] (8) {12};
+            \node[draw, rectangle, fill=gray!50, right=of 8] (DEAD) {T};
+
+            \node[above=of 0.west, node distance=12mm] {start};
+
+            \draw[solid,->] (0) -- (1);
+            \draw[solid,->] (1) -- (2);
+            \draw[solid,->] (2) -- (3);
+            \draw[solid,->] (4) -- (6);
+            %\draw[solid,->] (5) -- (6);
+            \draw[solid,->] (6) -- (7);
+            \draw[solid,->] (7) -- (8);
+            \draw[solid,->] (8) -- (DEAD);
+            \draw[solid,->] (0.north) to[out=60,in=120] (2.north west);
+            \draw[solid,->] (1.north) to[out=60,in=120] ([yshift=2mm]3.north);
+            \draw[solid,->] (2.north) to[out=60,in=120] ([yshift=2mm]4.north);
+            \draw[solid,->] ([yshift=3mm]4.north west) to[out=60,in=120] (6.north west);
+            \draw[solid,->] (4.north) to[out=60,in=120] (7.north west);
+            \draw[solid,->] (6.north) to[out=60,in=120] (8.north west);
+            \draw[solid,->] (7.north) to[out=60,in=120] (DEAD.north west);
+            %\draw[solid,->] (8.south west) to[out=240,in=300] (0.south east);
+            
+        \end{tikzpicture}
+        }
+    \caption{Boyan Chain.}
+        \label{boyanchain}
+    \end{center}
+\end{figure}
\ No newline at end of file
diff --git a/references.bib b/references.bib
index 57e410f..5ae999a 100644
--- a/references.bib
+++ b/references.bib
@@ -1,4 +1,14 @@
 # encoding:utf-8
+@article{peters2019ergodicity,
+  title={The ergodicity problem in economics},
+  author={Peters, Ole},
+  journal={Nature Physics},
+  volume={15},
+  number={12},
+  pages={1216--1221},
+  year={2019},
+  publisher={Nature Publishing Group}
+}
 @article{pakes1969some,
   title={Some conditions for ergodicity and recurrence of Markov chains},
   author={Pakes, Anthony G},
--
libgit2 0.26.0