From 59c0881bfaf02dd475af6e0a54fdd88a2ca230aa Mon Sep 17 00:00:00 2001
From: Lenovo <Lenovo@windows10.microdone.cn>
Date: Sat, 25 May 2024 08:32:31 +0800
Subject: [PATCH] 请昕闻帮忙算一下矩阵的逆

---
 main/background.tex       | 142 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------
 pic/randomWalk.tex        |   4 ++--
 pic/randomWalkRestart.tex |   4 ++--
 3 files changed, 118 insertions(+), 32 deletions(-)

diff --git a/main/background.tex b/main/background.tex
index 8cfe27d..87525e4 100644
--- a/main/background.tex
+++ b/main/background.tex
@@ -58,54 +58,140 @@ of random walk with absorbing states
 $P_{\text{absorbing}}$ is defined as follows:
 \[
 P_{\text{absorbing}}\dot{=}\begin{array}{c|ccccccc}
-&\text{T}_1 & \text{A} & \text{B} & \text{C} & \text{D} & \text{E} & \text{T}_2 \\\hline
-\text{T}_1 & 1 & 0 & 0 & 0 & 0 & 0& 0 \\
-\text{A} & \frac{1}{2} & 0 & \frac{1}{2} & 0 & 0 & 0 & 0\\
-\text{B} & 0 & \frac{1}{2} & 0 & \frac{1}{2} & 0 & 0 & 0\\
-\text{C} & 0 & 0 & \frac{1}{2} & 0 & \frac{1}{2} & 0 & 0\\
-\text{D} & 0 & 0 & 0 & \frac{1}{2} & 0 & \frac{1}{2} & 0 \\
-\text{E} & 0 & 0 & 0 & 0 & \frac{1}{2} & 0 & \frac{1}{2}  \\
-\text{T}_2 & 0 & 0 & 0 & 0 & 0 & 0 & 1
+&\text{T} & \text{A} & \text{B} & \text{C} & \text{D} & \text{E}  \\\hline
+\text{T} & 1 & 0 & 0 & 0 & 0 & 0 \\
+\text{A} & \frac{1}{2} & 0 & \frac{1}{2} & 0 & 0 & 0 \\
+\text{B} & 0 & \frac{1}{2} & 0 & \frac{1}{2} & 0 & 0 \\
+\text{C} & 0 & 0 & \frac{1}{2} & 0 & \frac{1}{2} & 0 \\
+\text{D} & 0 & 0 & 0 & \frac{1}{2} & 0 & \frac{1}{2}  \\
+\text{E} & \frac{1}{2} & 0 & 0 & 0 & \frac{1}{2} & 0   
 \end{array}
 \]
 According to (\ref{invariance}),
-the distribution $d_{\text{absorbing}}=\{\frac{1}{2}$,
- $0$, $0$, $0$, $0$, $0$, $\frac{1}{2}\}$.
- Since the probability of A, B, C, D, E are all zeros,
- random walk with absorbing states are non-ergodic.
+the distribution $d_{\text{absorbing}}=\{1$,
+ $0$, $0$, $0$, $0$, $0$\}.
+ Since the probabilities of A, B, C, D, E are all zeros,
+ random walk with absorbing states is non-ergodic.
  
  \input{pic/randomWalkRestart}
  However, in reinforcement learning, we always assume the ergodicity assumption.
  When encountering an absorbing state, we immediately reset and 
- transit to the initial states. Figure \ref{randomwalkRestart}
+ transition to the initial states. Figure \ref{randomwalkRestart}
  is random walk with restarts.
  The transition probobility matrix
 of random walk with restarts 
 $P_{\text{restart}}$ is defined as follows:
 \[
 P_{\text{restart}}\dot{=}\begin{array}{c|ccccccc}
-&\text{T}_1 & \text{A} & \text{B} & \text{C} & \text{D} & \text{E} & \text{T}_2 \\\hline
-\text{T}_1 & 0 & 0 & 0 & 1 & 0 & 0& 0 \\
-\text{A} & \frac{1}{2} & 0 & \frac{1}{2} & 0 & 0 & 0 & 0\\
-\text{B} & 0 & \frac{1}{2} & 0 & \frac{1}{2} & 0 & 0 & 0\\
-\text{C} & 0 & 0 & \frac{1}{2} & 0 & \frac{1}{2} & 0 & 0\\
-\text{D} & 0 & 0 & 0 & \frac{1}{2} & 0 & \frac{1}{2} & 0 \\
-\text{E} & 0 & 0 & 0 & 0 & \frac{1}{2} & 0 & \frac{1}{2}  \\
-\text{T}_2 & 0 & 0 & 0 & 1 & 0 & 0 & 0
+&\text{T} & \text{A} & \text{B} & \text{C} & \text{D} & \text{E} \\\hline
+\text{T} & 0 & 0 & 0 & 1 & 0 & 0 \\
+\text{A} & \frac{1}{2} & 0 & \frac{1}{2} & 0 & 0 & 0 \\
+\text{B} & 0 & \frac{1}{2} & 0 & \frac{1}{2} & 0 & 0 \\
+\text{C} & 0 & 0 & \frac{1}{2} & 0 & \frac{1}{2} & 0 \\
+\text{D} & 0 & 0 & 0 & \frac{1}{2} & 0 & \frac{1}{2} \\
+\text{E} & \frac{1}{2}  & 0 & 0 & 0 & \frac{1}{2} & 0  \\
 \end{array}
 \]
  
 According to (\ref{invariance}),
-the distribution $d_{\text{restart}}=\{0.05$,
- $0.1$, $0.2$, $0.3$, $0.2$, $0.1$, $0.05\}$.
+the distribution $d_{\text{restart}}=\{0.1$,
+ $0.1$, $0.2$, $0.3$, $0.2$, $0.1\}$.
  Since the probability of T, A, B, C, D, E are non-zeros,
- random walk with restarts are ergodic.
+ random walk with restarts is ergodic.
 
-给出Markov Chain的遍历性定义，和充分条件。
-根据随机游走例子说明 带有Absorbing state的是不满足遍历性的，
-带有重启的强化学习训练设定是满足遍历性的。
+\subsection{Ergodicity and Non-ergodicity between non-absorbing states}
+For Markov chains with absorbing states, we usually decompose
+the transition matrix $P$ into the following form:
+\[
+P =
+\begin{bmatrix}
+Q & R \\
+0 & I
+\end{bmatrix},
+\]
+where $Q$ is the matrix of transition probabilities between
+ non-absorbing states,  $R$ represents the transition probabilities
+  from non-absorbing states to absorbing states, 
+  $I$ is the matrix of transition probabilities between absorbing states, 
+  and $0$ is a zero matrix.
+  
+
+  
+  
+  Expected number of visits to non-absorbing states before being absorbed
+  is
+  \begin{equation}
+  N\dot{=} \sum_{i=0}^{\infty}Q^i=(I_{n-1}-Q)^{-1},
+  \end{equation}
+  where $I_{n-1}$ is the $(n-1)\times(n-1)$ identity matrix.
+Note that absorbing states can be combined into one.
+It is now easy to define whether the non-absorbing states 
+are ergodic.
+ 
+\begin{definition}[Ergodicity between non-absorbing states]
+Assume that $N$  exist for any policy $\pi$
+ and are independent of initial states.
+ $\forall i,j \in S\setminus\{\text{T}\}$, 
+ $N_{ij}>0$, MDP is ergodic between non-absorbing states.
+\end{definition}
+
+\begin{definition}[Non-ergodicity between non-absorbing states]
+Assume that $N$  exist for any policy $\pi$
+ and are independent of initial states.
+ $\exists i,j \in S\setminus\{\text{T}\}$, 
+ $N_{ij}=0$, MDP is non-ergodic between non-absorbing states.
+\end{definition}
+
+For random walk with absorbing states,
+\[
+P_{\text{absorbing}} =
+\begin{bmatrix}
+Q_{\text{absorbing}} & R_{\text{absorbing}} \\
+0 & I_{\text{absorbing}}
+\end{bmatrix},
+\]
+where
+\[
+Q_{\text{absorbing}}\dot{=}\begin{array}{c|ccccc}
+ & \text{A} & \text{B} & \text{C} & \text{D} & \text{E}  \\\hline
+\text{A}  & 0 & \frac{1}{2} & 0 & 0 & 0 \\
+\text{B} & \frac{1}{2} & 0 & \frac{1}{2} & 0 & 0 \\
+\text{C} & 0 & \frac{1}{2} & 0 & \frac{1}{2} & 0 \\
+\text{D}  & 0 & 0 & \frac{1}{2} & 0 & \frac{1}{2}  \\
+\text{E}  & 0 & 0 & 0 & \frac{1}{2} & 0   
+\end{array}
+\]
+\[
+R_{\text{absorbing}}\dot{=}\begin{array}{c|c}
+&\text{T}  \\\hline
+\text{A} & \frac{1}{2}  \\
+\text{B} & 0  \\
+\text{C} & 0  \\
+\text{D} & 0   \\
+\text{E} & \frac{1}{2}    
+\end{array}
+\]
+\[
+I_{\text{absorbing}}\dot{=}\begin{array}{c|c}
+&\text{T}  \\\hline
+\text{T} & 1 
+\end{array}
+\]
+
+Then,\highlight{
+\[
+N_{\text{absorbing}}\dot{=}\begin{array}{c|ccccc}
+ & \text{A} & \text{B} & \text{C} & \text{D} & \text{E}  \\\hline
+\text{A}  & 0 & 0 & 0 & 0 & 0 \\
+\text{B} & 0 & 0 & 0 & 0 & 0 \\
+\text{C} & 0 & 0 & 0 & 0 & 0 \\
+\text{D}  & 0 & 0 & 0 & 0 & 0  \\
+\text{E}  & 0 & 0 & 0 & 0 & 0   
+\end{array}
+\],
+}
+\highlight{昕闻帮我算这个矩阵}
 
-本文关注的是去除吸收态时，非吸收态之间的遍历性。
 通过圣彼得堡例子说明，圣彼得堡不满足非吸收态之间的遍历性。
 给出定理，同样证明2048游戏不满足非吸收态之间的遍历性。
 
diff --git a/pic/randomWalk.tex b/pic/randomWalk.tex
index 3e84d6f..9ce0547 100644
--- a/pic/randomWalk.tex
+++ b/pic/randomWalk.tex
@@ -2,8 +2,8 @@
 \centering
 \scalebox{0.9}{
 \begin{tikzpicture}
-    \node[draw, rectangle, fill=gray!50] (DEAD) at (0,0) {T$_1$};
-    \node[draw, rectangle, fill=gray!50] (DEAD2) at (9,0) {T$_2$};
+    \node[draw, rectangle, fill=gray!50] (DEAD) at (0,0) {T$$};
+    \node[draw, rectangle, fill=gray!50] (DEAD2) at (9,0) {T$$};
     \node[draw, circle] (A) at (1.5,0) {A};
     \node[draw, circle] (B) at (3,0) {B};
     \node[draw, circle] (C) at (4.5,0) {C};
diff --git a/pic/randomWalkRestart.tex b/pic/randomWalkRestart.tex
index 243e318..bf18353 100644
--- a/pic/randomWalkRestart.tex
+++ b/pic/randomWalkRestart.tex
@@ -2,8 +2,8 @@
 \centering
 \scalebox{0.9}{
 \begin{tikzpicture}
-    \node[draw, rectangle, fill=gray!50] (DEAD) at (0,0) {T$_1$};
-    \node[draw, rectangle, fill=gray!50] (DEAD2) at (9,0) {T$_2$};
+    \node[draw, rectangle, fill=gray!50] (DEAD) at (0,0) {T$$};
+    \node[draw, rectangle, fill=gray!50] (DEAD2) at (9,0) {T$$};
     \node[draw, circle] (A) at (1.5,0) {A};
     \node[draw, circle] (B) at (3,0) {B};
     \node[draw, circle] (C) at (4.5,0) {C};
--
libgit2 0.26.0