\section{Background}
  
\subsection{Ergodicity and Non-ergodicity of Markov Chains}
Consider Markov decision process (MDP)
$\langle \mathcal{S}$, $\mathcal{A}$, $\mathcal{R}$, $\mathcal{T}$$\rangle$, where 
$\mathcal{S}=\{1,2,3,\ldots\}$ is a finite state space, $|\mathcal{S}|=n$, $\mathcal{A}$ is an action space,
$\mathcal{T}:\mathcal{S}\times \mathcal{A}\times \mathcal{S}\rightarrow [0,1]$ 
is a transition function,
$\mathcal{R}:\mathcal{S}\times \mathcal{A}\times \mathcal{S}\rightarrow \mathbb{R}$ is a reward function.
Policy $\pi:S\times A\rightarrow [0,1]$ 
selects an action $a$ in state $s$ 
with probability $\pi(a|s)$.
State value function under policy $\pi$, denoted $V^{\pi}:S\rightarrow
\mathbb{R}$, represents the expected sum of rewards in
the MDP under policy $\pi$:
$V^{\pi}(s)=\mathbb{E}_{\pi}\left[\sum_{t=0}^{\infty}r_t|s_0=s\right]$.



Given a steady policy $\pi$, MDP becomes a Markov chain on state space
$\mathcal{S}$ with a   matrix
$P_{\pi}\in[0,1]^{n\times n}$, where
$P_{\pi}(s_1,s_2)=\sum_{a\in \mathcal{A}}\pi(a|s_1)\mathcal{T}(s_1,a,s_2)$
is the transition probobility from $s_1$ to $s_2$, 
$\forall s\in \mathcal{S}$,   $\sum_{s'\in \mathcal{S}}P_{\pi}(s,s')=1$. 
A stationary  measure for $P_{\pi}$ is a distribution measure
$d_{\pi}$ on $\mathcal{S}$ such that
\begin{equation}
d_{\pi}=P_{\pi}^{\top}d_{\pi}.
\label{invariance}
\end{equation}
That is $\forall s\in \mathcal{S}$, we have
\begin{equation}
\sum_{s'\in \mathcal{S}}P_{\pi}(s',s)d_{\pi}(s')=d_{\pi}(s).
\end{equation}

\begin{definition}[Ergodicity]
 Assume $d_{\pi}(s)$  exist for any policy $\pi$ and 
 are independent of initial states, 
 MDP is ergodic if $\forall s\in \mathcal{S}$,
$d_{\pi}(s)>0$.
\end{definition}




This mean all states are reachable under any policy from the
current state after sufficiently many steps \cite{majeed2018q}.
A sufficient condition for this assumption is that
1 is a simple eigenvalue of the matrix $P_{\pi}$ and 
all other eigenvalues of $P_{\pi}$ are of modulus <1.




\input{pic/randomWalk}

Random walk, see Figure \ref{randomwalk},
is a Markov chain, where agent starts from node C, and takes
a probability of 0.5 to move left or right, until 
reaching the leftmost or rightmost node where it terminates.
The terminal states are usually called absorbing states.
The transition probobility matrix
of random walk with absorbing states 
$P^{\text{ab}}$ is defined as follows:
\[
P^{\text{ab}}\dot{=}\begin{array}{c|ccccccc}
&\text{T} & \text{A} & \text{B} & \text{C} & \text{D} & \text{E}  \\\hline
\text{T} & 1 & 0 & 0 & 0 & 0 & 0 \\
\text{A} & \frac{1}{2} & 0 & \frac{1}{2} & 0 & 0 & 0 \\
\text{B} & 0 & \frac{1}{2} & 0 & \frac{1}{2} & 0 & 0 \\
\text{C} & 0 & 0 & \frac{1}{2} & 0 & \frac{1}{2} & 0 \\
\text{D} & 0 & 0 & 0 & \frac{1}{2} & 0 & \frac{1}{2}  \\
\text{E} & \frac{1}{2} & 0 & 0 & 0 & \frac{1}{2} & 0   
\end{array}
\]
Note that absorbing states can be combined into one.
According to (\ref{invariance}),
the distribution $d^{\text{ab}}=\{1$,
 $0$, $0$, $0$, $0$, $0$\}.
 Since the probabilities of A, B, C, D, E are all zeros,
 random walk with absorbing states is non-ergodic.
 
 \input{pic/randomWalkRestart}
 However, in reinforcement learning, we always assume the ergodicity assumption.
 When encountering an absorbing state, we immediately reset and 
 transition to the initial states. Figure \ref{randomwalkRestart}
 is random walk with restarts.
 The transition probobility matrix
of random walk with restarts 
$P^{\text{restart}}$ is defined as follows:
\[
P^{\text{restart}}\dot{=}\begin{array}{c|ccccccc}
&\text{T} & \text{A} & \text{B} & \text{C} & \text{D} & \text{E} \\\hline
\text{T} & 0 & 0 & 0 & 1 & 0 & 0 \\
\text{A} & \frac{1}{2} & 0 & \frac{1}{2} & 0 & 0 & 0 \\
\text{B} & 0 & \frac{1}{2} & 0 & \frac{1}{2} & 0 & 0 \\
\text{C} & 0 & 0 & \frac{1}{2} & 0 & \frac{1}{2} & 0 \\
\text{D} & 0 & 0 & 0 & \frac{1}{2} & 0 & \frac{1}{2} \\
\text{E} & \frac{1}{2}  & 0 & 0 & 0 & \frac{1}{2} & 0  \\
\end{array}
\]
 
According to (\ref{invariance}),
the distribution $d^{\text{restart}}=\{0.1$,
 $0.1$, $0.2$, $0.3$, $0.2$, $0.1\}$.
 Since the probabilities of T, A, B, C, D, E are non-zeros,
 random walk with restarts is ergodic.

\subsection{Ergodicity  between non-absorbing states}
For Markov chains with absorbing states, we usually decompose
the transition matrix $P$ into the following form:
\[
P =
\begin{bmatrix}
Q & R \\
0 & I
\end{bmatrix},
\]
where $Q$ is the matrix of transition probabilities between
 non-absorbing states,  $R$ represents the transition probabilities
  from non-absorbing states to absorbing states, 
  $I$ is the matrix of transition probabilities between absorbing states, 
  and $0$ is a zero matrix.
  

  
  
  Expected number of visits to non-absorbing states before being absorbed
  is
  \begin{equation}
  N\dot{=} \sum_{i=0}^{\infty}Q^i=(I_{n-1}-Q)^{-1},
  \label{definitionN}
  \end{equation}
  where $I_{n-1}$ is the $(n-1)\times(n-1)$ identity matrix.
  $N$ is a transitive closure, and a reachability relation. 
  From state $i$, it is possible to reach state $j$ in an 
  expected number of steps $N_{ij}$.
 $N_{ij}=0$ means that state $i$ is not reachable to state $j$.
It is now easy to define whether the non-absorbing states 
are ergodic.
 
\begin{definition}[Ergodicity between non-absorbing states]
Assume that $N$  exists for any policy $\pi$
 and is independent of initial states.
 $\forall i,j \in S\setminus\{\text{T}\}$, 
 $N_{ij}>0$, MDP is ergodic between non-absorbing states.
 \label{definition2}
\end{definition}



For random walk with absorbing states,
\[
P^{\text{ab}} =
\begin{bmatrix}
Q^{\text{ab}} & R^{\text{ab}} \\
0 & I^{\text{ab}}
\end{bmatrix},
\]
where
\[
Q^{\text{ab}}\dot{=}\begin{array}{c|ccccc}
 & \text{A} & \text{B} & \text{C} & \text{D} & \text{E}  \\\hline
\text{A}  & 0 & \frac{1}{2} & 0 & 0 & 0 \\
\text{B} & \frac{1}{2} & 0 & \frac{1}{2} & 0 & 0 \\
\text{C} & 0 & \frac{1}{2} & 0 & \frac{1}{2} & 0 \\
\text{D}  & 0 & 0 & \frac{1}{2} & 0 & \frac{1}{2}  \\
\text{E}  & 0 & 0 & 0 & \frac{1}{2} & 0   
\end{array}
\]
%\[
% R_{\text{absorbing}}\dot{=}\begin{array}{c|c}
% &\text{T}  \\\hline
% \text{A} & \frac{1}{2}  \\
% \text{B} & 0  \\
% \text{C} & 0  \\
% \text{D} & 0   \\
% \text{E} & \frac{1}{2}    
% \end{array}
% \]
% \[
% I_{\text{absorbing}}\dot{=}\begin{array}{c|c}
% &\text{T}  \\\hline
% \text{T} & 1 
% \end{array}
% \]

Then,
\[
N^{\text{ab}}=(I_5-Q^{\text{ab}})^{-1}=\begin{array}{c|ccccc}
 & \text{A} & \text{B} & \text{C} & \text{D} & \text{E}  \\\hline
\text{A}  & \frac{5}{3} & \frac{4}{3} & 1 & \frac{2}{3} & \frac{1}{3} \\
\text{B} & \frac{4}{3} & \frac{8}{3} & 2 & \frac{4}{3} & \frac{2}{3} \\
\text{C} & 1 & 2 & 3 & 2 & 1 \\
\text{D}  & \frac{2}{3} & \frac{4}{3} & 2 & \frac{8}{3} & \frac{4}{3} \\
\text{E}  & \frac{1}{3} & \frac{2}{3} & 1 & \frac{4}{3} & \frac{5}{3} \\  
\end{array}
\]

Bases on Definition \ref{definition2}, 
random walk with absorbing states
is ergodic between non-absorbing states.