无环才是本质

cded4336 · Lenovo · ff9efea9 · cded4336 · cded4336 · cded4336
Commit cded4336 authored May 31, 2024 by Lenovo
Showing with 274 additions and 39 deletions

document.tex
+4 -3

main/2048isAcyclic.tex
+45 -2

main/acyclic.tex
+140 -0

main/background.tex
+17 -30

main/introduction.tex
+4 -3

material/nonergodic.tex
+2 -1

pic/boyanchain.tex
+52 -0

references.bib
+10 -0

No files found.
--- a/document.tex
+++ b/document.tex
@@ -26,12 +26,13 @@
 \usetikzlibrary{automata, positioning}
 \usetikzlibrary{positioning}
 \usetikzlibrary{decorations.markings}
+\usepackage{cuted}
 \hyphenation{op-tical net-works semi-conduc-tor IEEE-Xplore}
 % updated with editorial comments 8/9/2021
 \newcommand{\highlight}[1]{\textcolor{red}{#1}}
 \begin{document}

-\title{Non-ergodicity of Game 2048}
+\title{Acyclicity of Game 2048}

 \author{Xingguo Chen,  Xinwen Li, Shangdong Yang, and Wenhao Wang
 \thanks{Manuscript received XXXX; revised XXXX; accepted XXXX.
@@ -74,8 +75,8 @@ wangwenhao11@nudt.edu.cn).

 \input{main/introduction}
 \input{main/background}
-\input{main/nonergodic}
-\input{main/2048isNonergodic}
+\input{main/acyclic}
+\input{main/2048isAcyclic}

 %\input{main/nonergodicity}
 %\input{main/paradox}

--- a/main/2048isNonergodic.tex
+++ b/main/2048isNonergodic.tex
-\section{Non-ergodicity of 2048}
+\section{Non-ergodicity of the 2048 game}


-The purpose of this section is to prove the non-ergodicity of the 2048 game.
+The purpose of this section is to prove the non-ergodicity of the 2048 game
+and give some discussions.
+
+\subsection{Non-ergodicity of the 2048 game}
+
+The 2048 game consists of a 4$\times$4 grid board, totaling 16 squares.
+ At the beginning of the game,  two squares are randomly filled
+  with tiles of either 2 or 4.
+  Players can make moves in four directions: \textit{up}, \textit{down},
+   \textit{left}, and \textit{right}. 
+   When a player chooses a direction, 
+   all tiles will move in that direction until 
+   they hit the edge or another tile. 
+   If two tiles with the same number are adjacent 
+   in the moving direction, they will merge into 
+   a tile with the sum of the original numbers.
+   Each tile can only participate in one merge operation per move.
+   After each move, a new tile appears on a random empty square.
+   The new tile is 2 with  probability 0.1, and 4 with probability 0.9.
+The game ends when all squares are filled, and no valid merge operations can be made. 
+

 \begin{theorem}
 2048 game is non-ergodic between non-absorbing states.
@@ -61,6 +81,9 @@ For all possible board,
 For any transition $\langle B_1, a, B_1', B_2\rangle$ in the 2048 game,
 we have 
 $sum(B_1)=sum(B_1')$  regardless of whether at least two tiles merge.
+\highlight{需要昕闻画2048的状态迁移图，同一个状态包括两种后续情况，
+一种是发生合并，一种是没发生合并的}
+

 Due to a new generated 2-tile or 4-tile in board $B_2$,
 $sum(B_2)>sum(B_1')$, that is $sum(B_2)>sum(B_1)$.
@@ -74,6 +97,26 @@ the claim follows by applying Theorem \ref{judgmentTheorem}.

 %\input{material/2048prove}

+\subsection{Discussions}
+
+行为策略采样
+$\langle s_t,a_t,r_{t+1},a_{t+1},s_{t+1} \rangle$，对应的特征
+$\langle \phi_t,r_{t+1},\phi_{t+1} \rangle$ 
+
+目标策略采样
+$\langle s_t,a_t,r_{t+1},a',s_{t+1} \rangle$，对应的特征
+$\langle \phi_t,r_{t+1},\phi' \rangle$ 
+
+\begin{equation}
+\theta_{t+1}=\theta_t+\alpha F_t (\rho_tR_t+\gamma \theta_t^{\top}\phi_t'-\theta_t^{\top}\phi_t-\mathbb{E}_{\pi}[\delta])\phi_t
+\end{equation}
+写的简单点是这样
+
+\begin{equation}
+\theta_{t+1}=\theta_t+\alpha F_t \rho_t(\delta_t-\mathbb{E}_{\mu}[\rho_t\delta_t])\phi_t,
+\end{equation}
+where
+$\delta_t=R_t+\gamma \theta_t^{\top}\phi_{t+1}-\theta_t^{\top}\phi_t$




--- a/main/acyclic.tex
+++ b/main/acyclic.tex
+\section{Acyclicity between non-absorbing states}
+\begin{definition}[Acyclicity between non-absorbing states]
+Assume that $N$  exists for any policy $\pi$
+ and is independent of initial states.
+ $\forall i,j \in S\setminus\{\text{T}\}$,
+ if $N_{ij}>0$, then $N_{ji}=0$,
+  MDP is acyclic between non-absorbing states.
+  \label{definition3}
+\end{definition}
+
+
+
+
+\subsection{Boyan chain}
+
+
+
+
+
+\input{pic/boyanchain}
+
+Figure \ref{boyanchain} shows Boyan chain.
+ The transition probabilities between
+non-absorbing states are as follows:
+\[
+Q_{\text{bo}}\dot{=}\begin{tiny}\left[ \begin{array}{cccccccccccc}
+ 0 & 0.5 &  0.5 & 0 &  0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 \\
+ 0 & 0 &  0.5 &  0.5 &  0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 \\
+ 0 & 0 &  0 & 0.5 &  0.5 & 0 &  0 & 0 &  0 & 0 &  0 & 0 \\
+ 0 & 0 &  0 & 0 &  0.5 &  0.5 &  0 & 0 &  0 & 0 &  0 & 0 \\
+ 0 & 0 &  0 & 0 &  0 & 0.5 &  0.5 & 0 &  0 & 0 &  0 & 0 \\
+ 0 & 0 &  0 & 0 &  0 & 0 &  0.5 &  0.5 &  0 & 0 &  0 & 0 \\
+ 0 & 0 &  0 & 0 &  0 & 0 &  0 & 0.5 &  0.5 & 0 &  0 & 0 \\
+ 0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 &  0.5 &  0.5 &  0 & 0 \\
+ 0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 &  0 & 0.5 &  0.5 & 0 \\
+ 0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 &  0.5 &  0.5 \\
+ 0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 &  0 & 0.5 \\
+ 0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 
+\end{array}\right] \end{tiny}
+\]
+Then,
+\begin{strip}
+\begin{equation}
+\begin{split}
+N_{\text{bo}}=&(I_{12}-Q_{\text{bo}})^{-1}\\
+=&\begin{tiny}\left[\begin{array}{cccccccccccc}
+1 & 0.5 &  0.75 & 0.625 &  0.6875 & 0.65625 &  0.671875 & 0.6640625 &  0.66796875 & 0.666015625 &  0.6669921875 & 0.66650390625 \\
+ 0 & 1 &  0.5 &  0.75 &  0.625 & 0.6875 &  0.65625 & 0.671875 &  0.6640625 & 0.66796875 &  0.666015625 & 0.6669921875 \\
+ 0 & 0 &  1 & 0.5 &  0.75 & 0.625 &  0.6875 & 0.65625 &  0.671875 & 0.6640625 &  0.66796875 & 0.666015625 \\
+ 0 & 0 &  0 & 1 &  0.5 &  0.75 &  0.625 & 0.6875 &  0.65625 & 0.671875 &  0.6640625 & 0.66796875 \\
+ 0 & 0 &  0 & 0 &  1 & 0.5 &  0.75 & 0.625 &  0.6875 & 0.65625 &  0.671875 & 0.6640625 \\
+ 0 & 0 &  0 & 0 &  0 & 1 &  0.5 &  0.75 &  0.625 & 0.6875 &  0.65625 & 0.671875 \\
+ 0 & 0 &  0 & 0 &  0 & 0 &  1 & 0.5 &  0.75 & 0.625 &  0.6875 & 0.65625 \\
+ 0 & 0 &  0 & 0 &  0 & 0 &  0 & 1 &  0.5 &  0.75 &  0.625 & 0.6875 \\
+ 0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 &  1 & 0.5 &  0.75 & 0.625 \\
+ 0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 &  0 & 1 &  0.5 &  0.75 \\
+ 0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 & 1 & 0.5 \\
+ 0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 &  0 & 0 &  0 & 1  
+\end{array}\right]
+\end{tiny}
+\end{split}
+\end{equation}
+\end{strip}
+Bases on Definition \ref{definition3}, 
+Boyan chain 
+is acyclic between non-absorbing states.
+
+\subsection{A sufficient condition for acyclicity between non-absorbing states}
+By observing Boyan chain, 
+it is easy to provide a sufficient condition for acyclicity between non-absorbing states.
+\begin{theorem}[A sufficient condition for acyclicity between non-absorbing states]
+\label{judgmentTheorem}
+Given a Markov chain with absorbing states, 
+suppose the size of the non-absorbing states $|S\setminus\{\text{T}\}|\geq 2$.
+If the transition matrix $Q$ between non-absorbing states satifies,
+\begin{equation}
+\forall i,j \in S\setminus\{\text{T}\}, Q_{i,j}=\begin{cases}
+\geq 0, & \text{if } i\leq j; \\
+ 0, & \text{otherwise.}  
+\end{cases}
+\label{condition}
+\end{equation}
+Then, the Markov chain is acyclic between non-absorbing states.
+\end{theorem}
+\begin{IEEEproof}
+The $Q$ matrix (\ref{condition}) is an upper triangular matrix.
+The product of two upper triangular matrices is still an upper triangular matrix.
+Furthermore, the sum of two upper triangular matrices 
+is still an upper triangular matrix.
+Based on Definition \ref{definitionN}, 
+the $N$ matrix is product and sum of upper triangular matrices.
+Then, the $N$ matrix is an upper triangular matrix.
+The claim now follows based on Definition \ref{definition3}.
+\end{IEEEproof}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/main/background.tex
+++ b/main/background.tex
 \section{Background}
  
-\subsection{MDP and 2048 game}
+\subsection{Ergodicity and Non-ergodicity of Markov Chains}
 Consider Markov decision process (MDP)
 $\langle \mathcal{S}$, $\mathcal{A}$, $\mathcal{R}$, $\mathcal{T}$$\rangle$, where 
 $\mathcal{S}=\{1,2,3,\ldots\}$ is a finite state space, $|\mathcal{S}|=n$, $\mathcal{A}$ is an action space,
@@ -16,23 +16,6 @@ the MDP under policy $\pi$:
 $V^{\pi}(s)=\mathbb{E}_{\pi}\left[\sum_{t=0}^{\infty}r_t|s_0=s\right]$.


-The 2048 game consists of a 4$\times$4 grid board, totaling 16 squares.
- At the beginning of the game,  two squares are randomly filled
-  with tiles of either 2 or 4.
-  Players can make moves in four directions: \textit{up}, \textit{down},
-   \textit{left}, and \textit{right}. 
-   When a player chooses a direction, 
-   all tiles will move in that direction until 
-   they hit the edge or another tile. 
-   If two tiles with the same number are adjacent 
-   in the moving direction, they will merge into 
-   a tile with the sum of the original numbers.
-   Each tile can only participate in one merge operation per move.
-   After each move, a new tile appears on a random empty square.
-   The new tile is 2 with  probability 0.1, and 4 with probability 0.9.
-The game ends when all squares are filled, and no valid merge operations can be made. 
-
-\subsection{Ergodicity and Non-ergodicity of Markov Chains}

 Given a steady policy $\pi$, MDP becomes a Markov chain on state space
 $\mathcal{S}$ with a   matrix
@@ -79,9 +62,9 @@ reaching the leftmost or rightmost node where it terminates.
 The terminal states are usually called absorbing states.
 The transition probobility matrix
 of random walk with absorbing states 
-$P_{\text{ab}}$ is defined as follows:
+$P^{\text{ab}}$ is defined as follows:
 \[
-P_{\text{ab}}\dot{=}\begin{array}{c|ccccccc}
+P^{\text{ab}}\dot{=}\begin{array}{c|ccccccc}
 &\text{T} & \text{A} & \text{B} & \text{C} & \text{D} & \text{E}  \\\hline
 \text{T} & 1 & 0 & 0 & 0 & 0 & 0 \\
 \text{A} & \frac{1}{2} & 0 & \frac{1}{2} & 0 & 0 & 0 \\
@@ -93,7 +76,7 @@ P_{\text{ab}}\dot{=}\begin{array}{c|ccccccc}
 \]
 Note that absorbing states can be combined into one.
 According to (\ref{invariance}),
-the distribution $d_{\text{absorbing}}=\{1$,
+the distribution $d^{\text{ab}}=\{1$,
 $0$, $0$, $0$, $0$, $0$\}.
 Since the probabilities of A, B, C, D, E are all zeros,
 random walk with absorbing states is non-ergodic.
@@ -105,9 +88,9 @@ the distribution $d_{\text{absorbing}}=\{1$,
 is random walk with restarts.
 The transition probobility matrix
 of random walk with restarts 
-$P_{\text{restart}}$ is defined as follows:
+$P^{\text{restart}}$ is defined as follows:
 \[
-P_{\text{restart}}\dot{=}\begin{array}{c|ccccccc}
+P^{\text{restart}}\dot{=}\begin{array}{c|ccccccc}
 &\text{T} & \text{A} & \text{B} & \text{C} & \text{D} & \text{E} \\\hline
 \text{T} & 0 & 0 & 0 & 1 & 0 & 0 \\
 \text{A} & \frac{1}{2} & 0 & \frac{1}{2} & 0 & 0 & 0 \\
@@ -119,9 +102,9 @@ P_{\text{restart}}\dot{=}\begin{array}{c|ccccccc}
 \]
 
 According to (\ref{invariance}),
-the distribution $d_{\text{restart}}=\{0.1$,
+the distribution $d^{\text{restart}}=\{0.1$,
 $0.1$, $0.2$, $0.3$, $0.2$, $0.1\}$.
- Since the probability of T, A, B, C, D, E are non-zeros,
+ Since the probabilities of T, A, B, C, D, E are non-zeros,
 random walk with restarts is ergodic.

 \subsection{Ergodicity  between non-absorbing states}
@@ -150,6 +133,10 @@ where $Q$ is the matrix of transition probabilities between
  \label{definitionN}
  \end{equation}
  where $I_{n-1}$ is the $(n-1)\times(n-1)$ identity matrix.
+  $N$ is a reachability matrix. 
+  From state $i$, it is possible to reach state $j$ in an 
+  expected number of steps $N_{ij}$.
+ $N_{ij}=0$ means that state $i$ is not reachable to state $j$.
 It is now easy to define whether the non-absorbing states 
 are ergodic.
 
@@ -165,15 +152,15 @@ Assume that $N$  exists for any policy $\pi$

 For random walk with absorbing states,
 \[
-P_{\text{ab}} =
+P^{\text{ab}} =
 \begin{bmatrix}
-Q_{\text{ab}} & R_{\text{ab}} \\
-0 & I_{\text{ab}}
+Q^{\text{ab}} & R^{\text{ab}} \\
+0 & I^{\text{ab}}
 \end{bmatrix},
 \]
 where
 \[
-Q_{\text{ab}}\dot{=}\begin{array}{c|ccccc}
+Q^{\text{ab}}\dot{=}\begin{array}{c|ccccc}
 & \text{A} & \text{B} & \text{C} & \text{D} & \text{E}  \\\hline
 \text{A}  & 0 & \frac{1}{2} & 0 & 0 & 0 \\
 \text{B} & \frac{1}{2} & 0 & \frac{1}{2} & 0 & 0 \\
@@ -201,7 +188,7 @@ Q_{\text{ab}}\dot{=}\begin{array}{c|ccccc}

 Then,
 \[
-N_{\text{ab}}=(I_5-Q_{\text{ab}})^{-1}=\begin{array}{c|ccccc}
+N^{\text{ab}}=(I_5-Q^{\text{ab}})^{-1}=\begin{array}{c|ccccc}
 & \text{A} & \text{B} & \text{C} & \text{D} & \text{E}  \\\hline
 \text{A}  & \frac{5}{3} & \frac{4}{3} & 1 & \frac{2}{3} & \frac{1}{3} \\
 \text{B} & \frac{4}{3} & \frac{8}{3} & 2 & \frac{4}{3} & \frac{2}{3} \\

--- a/main/introduction.tex
+++ b/main/introduction.tex
@@ -85,8 +85,8 @@ To validate the above point, we designed two sets of experiments,
  In the maze game, the optimal value function is used, 
  with the optimal policy achieving a score of {-54} points.
  As shown in Figure \ref{fig_sim}, 
-  the x-axis represents $\epsilon$, 
-  the y-axis represents the average score per game,
+  the x-axis represents exploration parameter $\epsilon$, 
+  the y-axis represents the average score per episode,
   and the shaded area represents the standard deviation.
  We can find that in the 2048 game, the total score
   sharply decreases as $\epsilon$ increases,
@@ -110,6 +110,7 @@ The comparison in this set of experiments indicates that
    chance to return to the previous state. 
    This relates to the game's property of ergodicity.

-In this paper, we proved that the game 2048 is non-ergodic.
+In this paper, we proved that the game 2048 is acyclic
+between non-absorbing states.


--- a/main/nonergodic.tex
+++ b/main/nonergodic.tex
@@ -15,7 +15,8 @@ Assume that $N$  exists for any policy $\pi$


 The St. Petersburg paradox is a paradox associated 
-with gambling and decision theory. It is named after the city 
+with gambling and decision theory \cite{peters2019ergodicity}.
+ It is named after the city 
 of St. Petersburg in Russia and was initially introduced
 by the mathematician Daniel Bernoulli in 1738.


--- a/pic/boyanchain.tex
+++ b/pic/boyanchain.tex
+
+%\usetikzlibrary{positioning, shapes, calc}
+\begin{figure}[ht]
+%
+    \begin{center}
+    \scalebox{0.7}
+	{
+        \begin{tikzpicture}[node distance=4mm]
+            % 定义节点样式
+            \tikzset{
+                stateWithBorder/.style={draw, circle, minimum width=8mm},
+                stateWithoutBorder/.style={ draw=none, minimum width=8mm}
+            }
+        
+            % Nodes
+            \node[stateWithBorder] (0) {1};
+            \node[stateWithBorder, right=of 0] (1) {2 };
+            \node[stateWithBorder, right=of 1] (2) {3 };
+            \node[stateWithoutBorder, right=of 2] (3) {...};
+            \node[stateWithoutBorder, right=of 3,node distance=1mm] (4) {...};
+            %\node[stateWithBorder, right=of 4] (5) {9};
+            \node[stateWithBorder, right=of 4] (6) {10};
+            \node[stateWithBorder, right=of 6] (7) {11};
+            \node[stateWithBorder, right=of 7] (8) {12};
+            \node[draw, rectangle, fill=gray!50, right=of 8] (DEAD) {T};
+
+            \node[above=of 0.west, node distance=12mm] {start};
+
+            \draw[solid,->] (0) -- (1);
+            \draw[solid,->] (1) -- (2);
+            \draw[solid,->] (2) -- (3);
+            \draw[solid,->] (4) -- (6);
+            %\draw[solid,->] (5) -- (6);
+            \draw[solid,->] (6) -- (7);
+            \draw[solid,->] (7) -- (8);
+            \draw[solid,->] (8) -- (DEAD);
+            \draw[solid,->] (0.north) to[out=60,in=120] (2.north west);
+            \draw[solid,->] (1.north) to[out=60,in=120] ([yshift=2mm]3.north);
+            \draw[solid,->] (2.north) to[out=60,in=120] ([yshift=2mm]4.north);
+            \draw[solid,->] ([yshift=3mm]4.north west) to[out=60,in=120] (6.north west);
+            \draw[solid,->] (4.north) to[out=60,in=120] (7.north west);
+            \draw[solid,->] (6.north) to[out=60,in=120] (8.north west);
+            \draw[solid,->] (7.north) to[out=60,in=120] (DEAD.north west);
+            %\draw[solid,->] (8.south west) to[out=240,in=300] (0.south east);
+            
+        \end{tikzpicture}
+        }
+    \caption{Boyan Chain.}
+        \label{boyanchain}
+    \end{center}
+\end{figure}
\ No newline at end of file
--- a/references.bib
+++ b/references.bib
 # encoding:utf-8
+@article{peters2019ergodicity,
+  title={The ergodicity problem in economics},
+  author={Peters, Ole},
+  journal={Nature Physics},
+  volume={15},
+  number={12},
+  pages={1216--1221},
+  year={2019},
+  publisher={Nature Publishing Group}
+}
 @article{pakes1969some,
  title={Some conditions for ergodicity and recurrence of Markov chains},
  author={Pakes, Anthony G},