Commit cded4336 by Lenovo

无环才是本质

parent ff9efea9
......@@ -26,12 +26,13 @@
\usetikzlibrary{automata, positioning}
\usetikzlibrary{positioning}
\usetikzlibrary{decorations.markings}
\usepackage{cuted}
\hyphenation{op-tical net-works semi-conduc-tor IEEE-Xplore}
% updated with editorial comments 8/9/2021
\newcommand{\highlight}[1]{\textcolor{red}{#1}}
\begin{document}
\title{Non-ergodicity of Game 2048}
\title{Acyclicity of Game 2048}
\author{Xingguo Chen, Xinwen Li, Shangdong Yang, and Wenhao Wang
\thanks{Manuscript received XXXX; revised XXXX; accepted XXXX.
......@@ -74,8 +75,8 @@ wangwenhao11@nudt.edu.cn).
\input{main/introduction}
\input{main/background}
\input{main/nonergodic}
\input{main/2048isNonergodic}
\input{main/acyclic}
\input{main/2048isAcyclic}
%\input{main/nonergodicity}
%\input{main/paradox}
......
\section{Non-ergodicity of 2048}
\section{Non-ergodicity of the 2048 game}
The purpose of this section is to prove the non-ergodicity of the 2048 game.
The purpose of this section is to prove the non-ergodicity of the 2048 game
and give some discussions.
\subsection{Non-ergodicity of the 2048 game}
The 2048 game consists of a 4$\times$4 grid board, totaling 16 squares.
At the beginning of the game, two squares are randomly filled
with tiles of either 2 or 4.
Players can make moves in four directions: \textit{up}, \textit{down},
\textit{left}, and \textit{right}.
When a player chooses a direction,
all tiles will move in that direction until
they hit the edge or another tile.
If two tiles with the same number are adjacent
in the moving direction, they will merge into
a tile with the sum of the original numbers.
Each tile can only participate in one merge operation per move.
After each move, a new tile appears on a random empty square.
The new tile is 2 with probability 0.1, and 4 with probability 0.9.
The game ends when all squares are filled, and no valid merge operations can be made.
\begin{theorem}
2048 game is non-ergodic between non-absorbing states.
......@@ -61,6 +81,9 @@ For all possible board,
For any transition $\langle B_1, a, B_1', B_2\rangle$ in the 2048 game,
we have
$sum(B_1)=sum(B_1')$ regardless of whether at least two tiles merge.
\highlight{需要昕闻画2048的状态迁移图,同一个状态包括两种后续情况,
一种是发生合并,一种是没发生合并的}
Due to a new generated 2-tile or 4-tile in board $B_2$,
$sum(B_2)>sum(B_1')$, that is $sum(B_2)>sum(B_1)$.
......@@ -74,6 +97,26 @@ the claim follows by applying Theorem \ref{judgmentTheorem}.
%\input{material/2048prove}
\subsection{Discussions}
行为策略采样
$\langle s_t,a_t,r_{t+1},a_{t+1},s_{t+1} \rangle$,对应的特征
$\langle \phi_t,r_{t+1},\phi_{t+1} \rangle$
目标策略采样
$\langle s_t,a_t,r_{t+1},a',s_{t+1} \rangle$,对应的特征
$\langle \phi_t,r_{t+1},\phi' \rangle$
\begin{equation}
\theta_{t+1}=\theta_t+\alpha F_t (\rho_tR_t+\gamma \theta_t^{\top}\phi_t'-\theta_t^{\top}\phi_t-\mathbb{E}_{\pi}[\delta])\phi_t
\end{equation}
写的简单点是这样
\begin{equation}
\theta_{t+1}=\theta_t+\alpha F_t \rho_t(\delta_t-\mathbb{E}_{\mu}[\rho_t\delta_t])\phi_t,
\end{equation}
where
$\delta_t=R_t+\gamma \theta_t^{\top}\phi_{t+1}-\theta_t^{\top}\phi_t$
......
\section{Acyclicity between non-absorbing states}
\begin{definition}[Acyclicity between non-absorbing states]
Assume that $N$ exists for any policy $\pi$
and is independent of initial states.
$\forall i,j \in S\setminus\{\text{T}\}$,
if $N_{ij}>0$, then $N_{ji}=0$,
MDP is acyclic between non-absorbing states.
\label{definition3}
\end{definition}
\subsection{Boyan chain}
\input{pic/boyanchain}
Figure \ref{boyanchain} shows Boyan chain.
The transition probabilities between
non-absorbing states are as follows:
\[
Q_{\text{bo}}\dot{=}\begin{tiny}\left[ \begin{array}{cccccccccccc}
0 & 0.5 & 0.5 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
0 & 0 & 0.5 & 0.5 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
0 & 0 & 0 & 0.5 & 0.5 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & 0.5 & 0.5 & 0 & 0 & 0 & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & 0 & 0.5 & 0.5 & 0 & 0 & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 0.5 & 0.5 & 0 & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0.5 & 0.5 & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0.5 & 0.5 & 0 & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0.5 & 0.5 & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0.5 & 0.5 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0.5 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0
\end{array}\right] \end{tiny}
\]
Then,
\begin{strip}
\begin{equation}
\begin{split}
N_{\text{bo}}=&(I_{12}-Q_{\text{bo}})^{-1}\\
=&\begin{tiny}\left[\begin{array}{cccccccccccc}
1 & 0.5 & 0.75 & 0.625 & 0.6875 & 0.65625 & 0.671875 & 0.6640625 & 0.66796875 & 0.666015625 & 0.6669921875 & 0.66650390625 \\
0 & 1 & 0.5 & 0.75 & 0.625 & 0.6875 & 0.65625 & 0.671875 & 0.6640625 & 0.66796875 & 0.666015625 & 0.6669921875 \\
0 & 0 & 1 & 0.5 & 0.75 & 0.625 & 0.6875 & 0.65625 & 0.671875 & 0.6640625 & 0.66796875 & 0.666015625 \\
0 & 0 & 0 & 1 & 0.5 & 0.75 & 0.625 & 0.6875 & 0.65625 & 0.671875 & 0.6640625 & 0.66796875 \\
0 & 0 & 0 & 0 & 1 & 0.5 & 0.75 & 0.625 & 0.6875 & 0.65625 & 0.671875 & 0.6640625 \\
0 & 0 & 0 & 0 & 0 & 1 & 0.5 & 0.75 & 0.625 & 0.6875 & 0.65625 & 0.671875 \\
0 & 0 & 0 & 0 & 0 & 0 & 1 & 0.5 & 0.75 & 0.625 & 0.6875 & 0.65625 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 1 & 0.5 & 0.75 & 0.625 & 0.6875 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 1 & 0.5 & 0.75 & 0.625 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 1 & 0.5 & 0.75 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 1 & 0.5 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 1
\end{array}\right]
\end{tiny}
\end{split}
\end{equation}
\end{strip}
Bases on Definition \ref{definition3},
Boyan chain
is acyclic between non-absorbing states.
\subsection{A sufficient condition for acyclicity between non-absorbing states}
By observing Boyan chain,
it is easy to provide a sufficient condition for acyclicity between non-absorbing states.
\begin{theorem}[A sufficient condition for acyclicity between non-absorbing states]
\label{judgmentTheorem}
Given a Markov chain with absorbing states,
suppose the size of the non-absorbing states $|S\setminus\{\text{T}\}|\geq 2$.
If the transition matrix $Q$ between non-absorbing states satifies,
\begin{equation}
\forall i,j \in S\setminus\{\text{T}\}, Q_{i,j}=\begin{cases}
\geq 0, & \text{if } i\leq j; \\
0, & \text{otherwise.}
\end{cases}
\label{condition}
\end{equation}
Then, the Markov chain is acyclic between non-absorbing states.
\end{theorem}
\begin{IEEEproof}
The $Q$ matrix (\ref{condition}) is an upper triangular matrix.
The product of two upper triangular matrices is still an upper triangular matrix.
Furthermore, the sum of two upper triangular matrices
is still an upper triangular matrix.
Based on Definition \ref{definitionN},
the $N$ matrix is product and sum of upper triangular matrices.
Then, the $N$ matrix is an upper triangular matrix.
The claim now follows based on Definition \ref{definition3}.
\end{IEEEproof}
\section{Background}
\subsection{MDP and 2048 game}
\subsection{Ergodicity and Non-ergodicity of Markov Chains}
Consider Markov decision process (MDP)
$\langle \mathcal{S}$, $\mathcal{A}$, $\mathcal{R}$, $\mathcal{T}$$\rangle$, where
$\mathcal{S}=\{1,2,3,\ldots\}$ is a finite state space, $|\mathcal{S}|=n$, $\mathcal{A}$ is an action space,
......@@ -16,23 +16,6 @@ the MDP under policy $\pi$:
$V^{\pi}(s)=\mathbb{E}_{\pi}\left[\sum_{t=0}^{\infty}r_t|s_0=s\right]$.
The 2048 game consists of a 4$\times$4 grid board, totaling 16 squares.
At the beginning of the game, two squares are randomly filled
with tiles of either 2 or 4.
Players can make moves in four directions: \textit{up}, \textit{down},
\textit{left}, and \textit{right}.
When a player chooses a direction,
all tiles will move in that direction until
they hit the edge or another tile.
If two tiles with the same number are adjacent
in the moving direction, they will merge into
a tile with the sum of the original numbers.
Each tile can only participate in one merge operation per move.
After each move, a new tile appears on a random empty square.
The new tile is 2 with probability 0.1, and 4 with probability 0.9.
The game ends when all squares are filled, and no valid merge operations can be made.
\subsection{Ergodicity and Non-ergodicity of Markov Chains}
Given a steady policy $\pi$, MDP becomes a Markov chain on state space
$\mathcal{S}$ with a matrix
......@@ -79,9 +62,9 @@ reaching the leftmost or rightmost node where it terminates.
The terminal states are usually called absorbing states.
The transition probobility matrix
of random walk with absorbing states
$P_{\text{ab}}$ is defined as follows:
$P^{\text{ab}}$ is defined as follows:
\[
P_{\text{ab}}\dot{=}\begin{array}{c|ccccccc}
P^{\text{ab}}\dot{=}\begin{array}{c|ccccccc}
&\text{T} & \text{A} & \text{B} & \text{C} & \text{D} & \text{E} \\\hline
\text{T} & 1 & 0 & 0 & 0 & 0 & 0 \\
\text{A} & \frac{1}{2} & 0 & \frac{1}{2} & 0 & 0 & 0 \\
......@@ -93,7 +76,7 @@ P_{\text{ab}}\dot{=}\begin{array}{c|ccccccc}
\]
Note that absorbing states can be combined into one.
According to (\ref{invariance}),
the distribution $d_{\text{absorbing}}=\{1$,
the distribution $d^{\text{ab}}=\{1$,
$0$, $0$, $0$, $0$, $0$\}.
Since the probabilities of A, B, C, D, E are all zeros,
random walk with absorbing states is non-ergodic.
......@@ -105,9 +88,9 @@ the distribution $d_{\text{absorbing}}=\{1$,
is random walk with restarts.
The transition probobility matrix
of random walk with restarts
$P_{\text{restart}}$ is defined as follows:
$P^{\text{restart}}$ is defined as follows:
\[
P_{\text{restart}}\dot{=}\begin{array}{c|ccccccc}
P^{\text{restart}}\dot{=}\begin{array}{c|ccccccc}
&\text{T} & \text{A} & \text{B} & \text{C} & \text{D} & \text{E} \\\hline
\text{T} & 0 & 0 & 0 & 1 & 0 & 0 \\
\text{A} & \frac{1}{2} & 0 & \frac{1}{2} & 0 & 0 & 0 \\
......@@ -119,9 +102,9 @@ P_{\text{restart}}\dot{=}\begin{array}{c|ccccccc}
\]
According to (\ref{invariance}),
the distribution $d_{\text{restart}}=\{0.1$,
the distribution $d^{\text{restart}}=\{0.1$,
$0.1$, $0.2$, $0.3$, $0.2$, $0.1\}$.
Since the probability of T, A, B, C, D, E are non-zeros,
Since the probabilities of T, A, B, C, D, E are non-zeros,
random walk with restarts is ergodic.
\subsection{Ergodicity between non-absorbing states}
......@@ -150,6 +133,10 @@ where $Q$ is the matrix of transition probabilities between
\label{definitionN}
\end{equation}
where $I_{n-1}$ is the $(n-1)\times(n-1)$ identity matrix.
$N$ is a reachability matrix.
From state $i$, it is possible to reach state $j$ in an
expected number of steps $N_{ij}$.
$N_{ij}=0$ means that state $i$ is not reachable to state $j$.
It is now easy to define whether the non-absorbing states
are ergodic.
......@@ -165,15 +152,15 @@ Assume that $N$ exists for any policy $\pi$
For random walk with absorbing states,
\[
P_{\text{ab}} =
P^{\text{ab}} =
\begin{bmatrix}
Q_{\text{ab}} & R_{\text{ab}} \\
0 & I_{\text{ab}}
Q^{\text{ab}} & R^{\text{ab}} \\
0 & I^{\text{ab}}
\end{bmatrix},
\]
where
\[
Q_{\text{ab}}\dot{=}\begin{array}{c|ccccc}
Q^{\text{ab}}\dot{=}\begin{array}{c|ccccc}
& \text{A} & \text{B} & \text{C} & \text{D} & \text{E} \\\hline
\text{A} & 0 & \frac{1}{2} & 0 & 0 & 0 \\
\text{B} & \frac{1}{2} & 0 & \frac{1}{2} & 0 & 0 \\
......@@ -201,7 +188,7 @@ Q_{\text{ab}}\dot{=}\begin{array}{c|ccccc}
Then,
\[
N_{\text{ab}}=(I_5-Q_{\text{ab}})^{-1}=\begin{array}{c|ccccc}
N^{\text{ab}}=(I_5-Q^{\text{ab}})^{-1}=\begin{array}{c|ccccc}
& \text{A} & \text{B} & \text{C} & \text{D} & \text{E} \\\hline
\text{A} & \frac{5}{3} & \frac{4}{3} & 1 & \frac{2}{3} & \frac{1}{3} \\
\text{B} & \frac{4}{3} & \frac{8}{3} & 2 & \frac{4}{3} & \frac{2}{3} \\
......
......@@ -85,8 +85,8 @@ To validate the above point, we designed two sets of experiments,
In the maze game, the optimal value function is used,
with the optimal policy achieving a score of {-54} points.
As shown in Figure \ref{fig_sim},
the x-axis represents $\epsilon$,
the y-axis represents the average score per game,
the x-axis represents exploration parameter $\epsilon$,
the y-axis represents the average score per episode,
and the shaded area represents the standard deviation.
We can find that in the 2048 game, the total score
sharply decreases as $\epsilon$ increases,
......@@ -110,6 +110,7 @@ The comparison in this set of experiments indicates that
chance to return to the previous state.
This relates to the game's property of ergodicity.
In this paper, we proved that the game 2048 is non-ergodic.
In this paper, we proved that the game 2048 is acyclic
between non-absorbing states.
......@@ -15,7 +15,8 @@ Assume that $N$ exists for any policy $\pi$
The St. Petersburg paradox is a paradox associated
with gambling and decision theory. It is named after the city
with gambling and decision theory \cite{peters2019ergodicity}.
It is named after the city
of St. Petersburg in Russia and was initially introduced
by the mathematician Daniel Bernoulli in 1738.
......
%\usetikzlibrary{positioning, shapes, calc}
\begin{figure}[ht]
%
\begin{center}
\scalebox{0.7}
{
\begin{tikzpicture}[node distance=4mm]
% 定义节点样式
\tikzset{
stateWithBorder/.style={draw, circle, minimum width=8mm},
stateWithoutBorder/.style={ draw=none, minimum width=8mm}
}
% Nodes
\node[stateWithBorder] (0) {1};
\node[stateWithBorder, right=of 0] (1) {2 };
\node[stateWithBorder, right=of 1] (2) {3 };
\node[stateWithoutBorder, right=of 2] (3) {...};
\node[stateWithoutBorder, right=of 3,node distance=1mm] (4) {...};
%\node[stateWithBorder, right=of 4] (5) {9};
\node[stateWithBorder, right=of 4] (6) {10};
\node[stateWithBorder, right=of 6] (7) {11};
\node[stateWithBorder, right=of 7] (8) {12};
\node[draw, rectangle, fill=gray!50, right=of 8] (DEAD) {T};
\node[above=of 0.west, node distance=12mm] {start};
\draw[solid,->] (0) -- (1);
\draw[solid,->] (1) -- (2);
\draw[solid,->] (2) -- (3);
\draw[solid,->] (4) -- (6);
%\draw[solid,->] (5) -- (6);
\draw[solid,->] (6) -- (7);
\draw[solid,->] (7) -- (8);
\draw[solid,->] (8) -- (DEAD);
\draw[solid,->] (0.north) to[out=60,in=120] (2.north west);
\draw[solid,->] (1.north) to[out=60,in=120] ([yshift=2mm]3.north);
\draw[solid,->] (2.north) to[out=60,in=120] ([yshift=2mm]4.north);
\draw[solid,->] ([yshift=3mm]4.north west) to[out=60,in=120] (6.north west);
\draw[solid,->] (4.north) to[out=60,in=120] (7.north west);
\draw[solid,->] (6.north) to[out=60,in=120] (8.north west);
\draw[solid,->] (7.north) to[out=60,in=120] (DEAD.north west);
%\draw[solid,->] (8.south west) to[out=240,in=300] (0.south east);
\end{tikzpicture}
}
\caption{Boyan Chain.}
\label{boyanchain}
\end{center}
\end{figure}
\ No newline at end of file
# encoding:utf-8
@article{peters2019ergodicity,
title={The ergodicity problem in economics},
author={Peters, Ole},
journal={Nature Physics},
volume={15},
number={12},
pages={1216--1221},
year={2019},
publisher={Nature Publishing Group}
}
@article{pakes1969some,
title={Some conditions for ergodicity and recurrence of Markov chains},
author={Pakes, Anthony G},
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment