VMETD的更新公式和收敛性证明添加进去了

828184a4 · GongYu · afbe69ae · 828184a4 · 828184a4 · 828184a4
Commit 828184a4 authored Jun 20, 2024 by GongYu
9 changed files
--- a/main/appendix.tex
+++ b/main/appendix.tex
@@ -577,74 +577,179 @@ the parameter $\theta$ converges to $A^{-1}b$.
 \end{algorithmic}
 \end{algorithm}

-\begin{algorithm}[t]
-    \caption{VMGTD algorithm with linear function approximation in the off-policy setting}
-    \label{alg:algorithm 3}
-\begin{algorithmic}
-    \STATE {\bfseries Input:} $\theta_{0}$, $u_0$, $\omega_{0}$, $\gamma
-    $, learning rate $\alpha_t$, $\zeta_t$ and $\beta_t$, behavior policy $\mu$ and target policy $\pi$
-    \REPEAT
-    \STATE For any episode, initialize $\theta_{0}$ arbitrarily, $u_t$ and $\omega_{0}$ to $0$, $\gamma \in (0,1]$, and $\alpha_t$, $\zeta_t$ and $\beta_t$ are constant.\\
-    \textbf{Output}: $\theta^*$.\\
-    \FOR{$t=0$ {\bfseries to} $T-1$}
-    \STATE Take $A_t$ from $S_t$ according to $\mu$, and arrive at $S_{t+1}$\\
-    \STATE Observe sample ($S_t$,$R_{t+1}$,$S_{t+1}$) at time step $t$ (with their corresponding state feature vectors)\\
-    \STATE $\delta_t = R_{t+1}+\gamma\theta_t^{\top}\phi_{t+1}-\theta_t^{\top}\phi_t$
-    \STATE $\rho_{t} \leftarrow \frac{\pi(A_t | S_t)}{\mu(A_t | S_t)}$
-    \STATE $\theta_{t+1}\leftarrow \theta_{t}+\alpha_t \rho_t[\phi_t - \gamma \phi_{t+1}]\phi^{\top}_{t} u_t$
-    \STATE $u_{t+1}\leftarrow u_{t}+\zeta_t[\rho_t(\delta_t-\omega_t) \phi_t - u_t]$
-    \STATE $\omega_{t+1}\leftarrow \omega_{t}+\beta_t \rho_t(\delta_t-\omega_t)$
-    \STATE $S_t=S_{t+1}$
-    \ENDFOR
-    \UNTIL{terminal episode}
-\end{algorithmic}
-\end{algorithm}

 \begin{algorithm}[t]
-    \caption{VMGTD2 algorithm with linear function approximation in the off-policy setting}
-    \label{alg:algorithm 4}
+    \caption{VMETD algorithm with linear function approximation in the off-policy setting}
+    \label{alg:algorithm 5}
 \begin{algorithmic}
    \STATE {\bfseries Input:} $\theta_{0}$, $u_0$, $\omega_{0}$, $\gamma
    $, learning rate $\alpha_t$, $\zeta_t$ and $\beta_t$, behavior policy $\mu$ and target policy $\pi$
    \REPEAT
-    \STATE For any episode, initialize $\theta_{0}$ arbitrarily, $u_t$ and $\omega_{0}$ to $0$, $\gamma \in (0,1]$, and $\alpha_t$, $\zeta_t$ and $\beta_t$ are constant.\\
+    \STATE For any episode, initialize $\theta_{0}$ arbitrarily, $u_t$ to $1$ and $\omega_{0}$ to $0$, $\gamma \in (0,1]$, and $\alpha_t$, $\zeta_t$ and $\beta_t$ are constant.\\
    \textbf{Output}: $\theta^*$.\\
    \FOR{$t=0$ {\bfseries to} $T-1$}
    \STATE Take $A_t$ from $S_t$ according to $\mu$, and arrive at $S_{t+1}$\\
    \STATE Observe sample ($S_t$,$R_{t+1}$,$S_{t+1}$) at time step $t$ (with their corresponding state feature vectors)\\
    \STATE $\delta_t = R_{t+1}+\gamma\theta_t^{\top}\phi_{t+1}-\theta_t^{\top}\phi_t$
    \STATE $\rho_{t} \leftarrow \frac{\pi(A_t | S_t)}{\mu(A_t | S_t)}$
-    \STATE $\theta_{t+1}\leftarrow \theta_{t}+\alpha_t \rho_t[\phi_t - \gamma \phi_{t+1}]\phi^{\top}_{t} u_t$
-    \STATE $u_{t+1}\leftarrow u_{t}+\zeta_t[\rho_t(\delta_t-\omega_t) - \phi^{\top}_{t} u_t] \phi_t$
-    \STATE $\omega_{t+1}\leftarrow \omega_{t}+\beta_t \rho_t(\delta_t-\omega_t)$
+    \STATE $F_{t}\leftarrow \gamma \rho_t F_{t-1} +1$
+    \STATE $\theta_{t+1}\leftarrow \theta_{t}+\alpha_t (F_t \rho_t\delta_t-\omega_t)\phi_t$
+    \STATE $\omega_{t+1}\leftarrow \omega_{t}+\beta_t (F_t \rho_t\delta_t-\omega_t)$
    \STATE $S_t=S_{t+1}$
    \ENDFOR
    \UNTIL{terminal episode}
 \end{algorithmic}
 \end{algorithm}

-% \begin{algorithm}[t]
-%     \caption{VMETD algorithm with linear function approximation in the off-policy setting}
-%     \label{alg:algorithm 5}
-% \begin{algorithmic}
-%     \STATE {\bfseries Input:} $\theta_{0}$, $u_0$, $\omega_{0}$, $\gamma
-%     $, learning rate $\alpha_t$, $\zeta_t$ and $\beta_t$, behavior policy $\mu$ and target policy $\pi$
-%     \REPEAT
-%     \STATE For any episode, initialize $\theta_{0}$ arbitrarily, $u_t$ to $1$ and $\omega_{0}$ to $0$, $\gamma \in (0,1]$, and $\alpha_t$, $\zeta_t$ and $\beta_t$ are constant.\\
-%     \textbf{Output}: $\theta^*$.\\
-%     \FOR{$t=0$ {\bfseries to} $T-1$}
-%     \STATE Take $A_t$ from $S_t$ according to $\mu$, and arrive at $S_{t+1}$\\
-%     \STATE Observe sample ($S_t$,$R_{t+1}$,$S_{t+1}$) at time step $t$ (with their corresponding state feature vectors)\\
-%     \STATE $\delta_t = R_{t+1}+\gamma\theta_t^{\top}\phi_{t+1}-\theta_t^{\top}\phi_t$
-%     \STATE $\rho_{t} \leftarrow \frac{\pi(A_t | S_t)}{\mu(A_t | S_t)}$
-%     \STATE $\theta_{t+1}\leftarrow \theta_{t}+\alpha_t u_t \rho_t(\delta_t-\omega_t)\phi_t$
-%     \STATE $u_{t+1}\leftarrow \gamma \rho_t u_t +1$
-%     \STATE $\omega_{t+1}\leftarrow \omega_{t}+\beta_t \rho_t(\delta_t-\omega_t)$
-%     \STATE $S_t=S_{t+1}$
-%     \ENDFOR
-%     \UNTIL{terminal episode}
-% \end{algorithmic}
-% \end{algorithm}
+\subsection{Proof of VMETD convergence}
+\label{proofVMETD}
+VMETD's $\theta$ by the following update:
+\begin{equation}
+    \begin{split}
+    \theta_{k+1}&\leftarrow \theta_k+\alpha_k F_k \rho_k (R_{k+1}+\gamma \theta_k^{\top}\phi_{k+1}-\theta_k^{\top}\phi_k)\phi_k -\alpha_k \omega_{k+1}\phi_k\\
+    &=\theta_k+\alpha_k F_k \rho_k (R_{k+1}+\gamma \theta_k^{\top}\phi_{k+1}-\theta_k^{\top}\phi_k)\phi_k -\alpha_k \mathbb{E}_{\mu}[F_k \rho_k \delta_k]\phi_k\\
+    &= \theta_k+\alpha_k \{\underbrace{(F_k\rho_kR_{k+1}-\mathbb{E}_{\mu}[F_k\rho_k R_{k+1}])\phi_k}_{\textbf{b}_{\text{VMETD},k}}
+    -\underbrace{(F_k\rho_k\phi_k(\phi_k-\gamma\phi_{k+1})^{\top}-\phi_k\mathbb{E}_{\mu}[F_k\rho_k (\phi_k-\gamma\phi_{k+1})]^{\top})}_{\textbf{A}_{\text{VMETD},k}}\theta_k\}
+\end{split}
+\end{equation}
+\begin{equation}
+    \begin{split}
+        \textbf{A}_{\text{VMETD}}&=\lim_{k \rightarrow \infty} \mathbb{E}[\textbf{A}_{\text{VMETD},k}]\\
+        &= \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[F_k \rho_k \phi_k (\phi_k - \gamma \phi_{k+1})^{\top}]- \lim_{k\rightarrow \infty} \mathbb{E}_{\mu}[  \phi_k]\mathbb{E}_{\mu}[F_k \rho_k (\phi_k - \gamma \phi_{k+1})]^{\top}\\  
+        &= \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[\underbrace{\phi_k}_{X}\underbrace{F_k \rho_k  (\phi_k - \gamma \phi_{k+1})^{\top}}_{Y}]- \lim_{k\rightarrow \infty} \mathbb{E}_{\mu}[  \phi_k]\mathbb{E}_{\mu}[F_k \rho_k (\phi_k - \gamma \phi_{k+1})]^{\top}\\  
+        &= \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[F_k \rho_k \phi_k (\phi_k - \gamma \phi_{k+1})^{\top}]- \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[ \phi_k]\lim_{k \rightarrow \infty}\mathbb{E}_{\mu}[F_k \rho_k (\phi_k - \gamma \phi_{k+1})]^{\top}\\  
+        &=\sum_{s} f(s) \phi(s)(\phi(s) - \gamma \sum_{s'}[\textbf{P}_{\pi}]_{ss'}\phi(s'))^{\top} - \sum_{s} d_{\mu}(s) \phi(s) * \sum_{s} f(s)(\phi(s) - \gamma \sum_{s'}[\textbf{P}_{\pi}]_{ss'}\phi(s'))^{\top}  \\
+        &={\bm{\Phi}}^{\top} \textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi}) \bm{\Phi} - {\bm{\Phi}}^{\top} \textbf{d}_{\mu} \textbf{f}^{\top} (\textbf{I} - \gamma \textbf{P}_{\mu}) \bm{\Phi}  \\
+        &={\bm{\Phi}}^{\top} (\textbf{F} - \textbf{d}_{\mu} \textbf{f}^{\top}) (\textbf{I} - \gamma \textbf{P}_{\pi}){\bm{\Phi}} \\
+        &={\bm{\Phi}}^{\top} (\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi})-\textbf{d}_{\mu} \textbf{f}^{\top} (\textbf{I} - \gamma \textbf{P}_{\pi})){\bm{\Phi}} \\
+        &={\bm{\Phi}}^{\top} (\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi})-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} ){\bm{\Phi}} \\
+    \end{split}
+\end{equation}
+\begin{proof}
+    Any matrix $\bm{\text{M}}$ is positive definite if and only if
+the symmetric matrix $\bm{\text{S}}=\bm{\text{M}}+\bm{\text{M}}^{\top}$ is positive definite. 
+Any symmetric real matrix $\bm{\text{S}}$ is positive definite if the absolute values of
+its diagonal entries are greater than the sum of the absolute values of the corresponding
+off-diagonal entries. 
+
+\begin{equation}
+    \label{rowsum}
+    \begin{split}
+    (\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi})-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} )\textbf{1}
+    &=\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi})\textbf{1}-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} \textbf{1}\\
+    &=\textbf{F}(\textbf{1}-\gamma \textbf{P}_{\pi} \textbf{1})-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} \textbf{1}\\
+    &=(1-\gamma)\textbf{F}\textbf{1}-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} \textbf{1}\\
+    &=(1-\gamma)\textbf{f}-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} \textbf{1}\\
+    &=(1-\gamma)\textbf{f}-\textbf{d}_{\mu} \\
+    &=(1-\gamma)(\textbf{I}-\gamma\textbf{P}_{\pi}^{\top})^{-1}\textbf{d}_{\mu}-\textbf{d}_{\mu} \\
+    &=(1-\gamma)[(\textbf{I}-\gamma\textbf{P}_{\pi}^{\top})^{-1}-\textbf{I}]\textbf{d}_{\mu} \\
+    &=(1-\gamma)[\sum_{t=0}^{\infty}(\gamma\textbf{P}_{\pi}^{\top})^{t}-\textbf{I}]\textbf{d}_{\mu} \\
+    &=(1-\gamma)[\sum_{t=1}^{\infty}(\gamma\textbf{P}_{\pi}^{\top})^{t}]\textbf{d}_{\mu} > 0 \\
+    \end{split}
+    \end{equation}
+\begin{equation}
+    \label{columnsum}
+    \begin{split}
+    \textbf{1}^{\top}(\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi})-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} )
+    &=\textbf{1}^{\top}\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi})-\textbf{1}^{\top}\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} \\
+    &=\textbf{d}_{\mu}^{\top}-\textbf{1}^{\top}\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} \\
+    &=\textbf{d}_{\mu}^{\top}- \textbf{d}_{\mu}^{\top} \\
+    &=0
+    \end{split}
+\end{equation}
+(\ref{rowsum}) and (\ref{columnsum}) show that the matrix $\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi})-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top}$ of
+ diagonal entries are positive and its off-diagonal entries are negative. So its each row sum plus the corresponding column sum is positive.
+
+The proof is given above
+\end{proof}
+
+
+
+
+
+
+
+
+
+
+% \begin{equation}
+%     F_k = \gamma \rho_{k-1} F_{k-1} + 1,
+% \end{equation}
+% \begin{equation}
+%     \rho_{k} \leftarrow \frac{\pi(A_k | S_k)}{\mu(A_k | S_k)},
+% \end{equation}
+% \begin{equation}
+%     \theta_{k+1}= \alpha_k F_k \rho_k (r_{k+1}+\gamma \theta_k^{\top}\phi_{k}'-\theta_k^{\top}\phi_k)\phi_k.
+% \end{equation}
+
+% ETD(0)' \textbf{A} matrix is:
+% \begin{equation}
+%     \textbf{A}_{\text{ETD}} = {\bm{\Phi}}^{\top} \textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi}) \bm{\Phi},
+% \end{equation}
+% where \textbf{F} is a diagonal matrix with diagonal elements $f(s) = d_{\mu}(s) \lim_{k\rightarrow \infty }\mathbb{E}_{\mu}[F_k|S_k=s]$,
+% which we assume exists. As we show later, the vector $\textbf{f} \in \mathbb{R}^N$ with components 
+% $[\textbf{f}]_s = f(s)$ can be written as 
+% \begin{equation}
+%     \begin{array}{ccl}
+%         \textbf{f}&=& \textbf{d}_\mu + \gamma \textbf{P}^{\top}_{\pi} \textbf{d}_\mu + (\gamma \textbf{P}^{\top}_{\pi} \textbf{d}_\mu)^{2} + \cdots \\
+%         &=&(\textbf{I} - \gamma \textbf{P}^{\top}_{\pi})^{-1} \textbf{d}_\mu.
+%     \end{array}
+% \end{equation}
+
+% The key matrix is $\textbf{F}(\textbf{I} - \gamma \textbf{P}_{\pi})$, and the vector of its column sums is
+% \begin{equation}
+%     \begin{array}{ccl}
+%         \textbf{1}^{\top} \textbf{F}(\textbf{I} - \gamma \textbf{P}_{\pi})&=& \textbf{f}^{\top}(\textbf{I} - \gamma \textbf{P}_{\pi}) \\
+%         &=&\textbf{d}^{\top}_{\mu}(\textbf{I} - \gamma \textbf{P}_{\pi})^{-1} (\textbf{I} - \gamma \textbf{P}_{\pi}) \\
+%         &=&\textbf{d}^{\top}_{\mu},
+%     \end{array}
+% \end{equation}
+% all components of which are positive. Thus, the key matrix and the $\textbf{A}_{\text{ETD}}$ matrix are positive
+% definite and the algorithm is stable.
+
+% VMETD by the following update:
+% \begin{equation}
+%     \theta_{k+1}= \alpha_k F_k \rho_k (r_{k+1}+\gamma \theta_k^{\top}\phi_{k}'-\theta_k^{\top}\phi_k - \mathbb{E}_{\mu}[F_k \rho_k \delta_k])\phi_k.
+% \end{equation}
+
+% % VMETD' \textbf{A} matrix is:
+% % \begin{equation}
+% %     \begin{array}{ccl}
+% %         \textbf{A}_{\text{VMETD}}&=&\lim_{k \rightarrow \infty} \mathbb{E}[\textbf{A}_{\text{VMETD},k}]\\
+% %         &=& \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[F_k \rho_k \phi_k (\phi_k - \gamma \phi'_{k} - \mathbb{E}_{\mu}[\phi_k - \gamma \phi'_{k}])^{\top}]\\  
+% %         &=&\sum_{s} d_{\mu}(s)\lim_{k \rightarrow \infty}\mathbb{E}_{\mu}[F_k \rho_k \phi_k (\phi_k - \gamma \phi'_{k} - \mathbb{E}_{\mu}[\phi_k - \gamma \phi'_{k}])^{\top}|S_k = s]  \\
+% %         &=&\sum_{s} d_{\mu}(s)\lim_{k \rightarrow \infty}\mathbb{E}_{\mu}[F_k|S_k = s]\mathbb{E}_{\mu}[\rho_k \phi_k (\phi_k - \gamma \phi'_{k} - \mathbb{E}_{\mu}[\phi_k - \gamma \phi'_{k}])^{\top}|S_k = s] \\
+% %         &=&\sum_{s} f(s)\mathbb{E}_{\mu}[\rho_t \phi_t (\phi_t - \gamma \phi'_{t} - \mathbb{E}_{\mu}[\phi_t - \gamma \phi'_{t}])^{\top}|S_t = s] \\
+% %         &=&\sum_{s} f(s)\mathbb{E}_{\mu}[\rho_t \phi_t (\phi_t - \gamma \phi'_{t})^{\top}|S_t = s] - \sum_{s} f(s)\mathbb{E}_{\mu}[\rho_t \phi_t \mathbb{E}_{\mu}[\phi_t - \gamma \phi'_{t}]^{\top}|S_t = s] \\
+% %         &=&\sum_{s} f(s)\mathbb{E}_{\pi}[\phi_t (\phi_t - \gamma \phi'_{t})^{\top}|S_t = s] - \sum_{s} f(s)\mathbb{E}_{\pi}[\phi_t |S_t = s]\mathbb{E}_{\mu}[\phi_t - \gamma \phi'_{t}]^{\top} \\
+% %         &=&\sum_{s} f(s)(\mathbb{E}_{\pi}[\phi_t (\phi_t - \gamma \phi'_{t})^{\top}|S_t = s] - \mathbb{E}_{\pi}[\phi_t |S_t = s]\mathbb{E}_{\mu}[\phi_t - \gamma \phi'_{t}]^{\top}) \\
+% %         &=&\sum_{s} f(s) \phi(s) (\phi(s) - \gamma \sum_{s'}[\textbf{P}_{\pi}]_{ss'}\phi(s') - \sum_{s} d_{\mu}(s)(\phi(s) - \gamma \sum_{s'}[\textbf{P}_{\mu}]_{ss'}\phi(s')))^{\top}\\
+% %         &=&{\bm{\Phi}}^{\top} \textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi}) \bm{\Phi} - {\bm{\Phi}}^{\top} \textbf{F}\textbf{E}_\mu
+% %     \end{array}
+% % \end{equation}
+% % where $\textbf{E}_\mu \in \mathbb{R}^{N \times d }$ and $\textbf{E}_\mu$'s every row has elements equal to $\mathbb{E}_{\mu}[\phi_t - \gamma \phi'_{t}]^{\top}$.
+
+% \begin{equation}
+%     \begin{array}{ccl}
+%         \textbf{A}_{\text{VMETD}}&=&\lim_{k \rightarrow \infty} \mathbb{E}[\textbf{A}_{\text{VMETD},k}]\\
+%         &=& \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[F_k \rho_k \phi_k (\phi_k - \gamma \phi'_{k})^{\top}]- \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[F_k \rho_k \phi_k]\mathbb{E}_{\mu}[F_k \rho_k \phi_k - \gamma \phi'_{k}]^{\top}\\  
+%         &=& \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[F_k \rho_k \phi_k (\phi_k - \gamma \phi'_{k})^{\top}]- \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[F_k \rho_k \phi_k]\lim_{k \rightarrow \infty}\mathbb{E}_{\mu}[F_k \rho_k \phi_k - \gamma \phi'_{k}]^{\top}\\  
+%         &=&\sum_{s} f(s) \phi(s)(\phi(s) - \gamma \sum_{s'}[\textbf{P}_{\pi}]_{ss'}\phi(s'))^{\top} - \sum_{s} f(s) \phi(s) * \sum_{s} f(s)(\phi(s) - \gamma \sum_{s'}[\textbf{P}_{\pi}]_{ss'}\phi(s'))^{\top}  \\
+%         &=&{\bm{\Phi}}^{\top} \textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi}) \bm{\Phi} - {\bm{\Phi}}^{\top} \textbf{f} \textbf{f}^{\top} (\textbf{I} - \gamma \textbf{P}_{\mu}) \bm{\Phi}  \\
+%         &=&{\bm{\Phi}}^{\top} (\textbf{F} - \textbf{f} \textbf{f}^{\top}) (\textbf{I} - \gamma \textbf{P}_{\pi}){\bm{\Phi}} \\
+%     \end{array}
+% \end{equation}
+
+
+% The key matrix is $\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi}) - \textbf{f} \textbf{d}_{\mu}^{\top} (\textbf{I} - \gamma \textbf{P}_{\mu})$, and the vector of its column sums is
+% \begin{equation}
+%     \begin{array}{ccl}
+%         \textbf{1}^{\top}(\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi}) - \textbf{f} \textbf{d}_{\mu}^{\top} (\textbf{I} - \gamma \textbf{P}_{\mu}))&=& \textbf{d}^{\top}_{\mu} - \textbf{d}^{\top}_{\mu}(\textbf{I} - \gamma \textbf{P}_{\mu}) \textbf{1}^{\top}\textbf{f}\\
+%         &=&\textbf{d}^{\top}_{\mu} - \textbf{d}^{\top}_{\mu}(1 - \gamma)\textbf{1}^{\top}\textbf{f} \\
+%         &=&\textbf{d}^{\top}_{\mu} - \textbf{d}^{\top}_{\mu} \textbf{1}^{\top} (\textbf{f} - \gamma \textbf{f}),
+%     \end{array}
+% \end{equation}
+

 \section{Experimental details}
 \label{experimentaldetails}

--- a/main/motivation.tex
+++ b/main/motivation.tex
@@ -97,7 +97,7 @@ stochastic gradient descent:
 \end{equation}
 where $\delta_k$ is the TD error as follows:
 \begin{equation}
-\delta_k = r+\gamma
+\delta_k = r_{k+1}+\gamma
 \theta_k^{\top}\phi_{k}'-\theta_k^{\top}\phi_k.
 \label{delta}
 \end{equation}
@@ -204,4 +204,41 @@ and
 \end{equation}
 where $\delta_{k}$ is (\ref{deltaQ}) and $A^{*}_{k+1}={\arg \max}_{a}(\theta_{k}^{\top}\phi(s_{k+1},a))$.

-This paper also introduces an additional parameter $\omega$ into the GTD and GTD2 algorithms. For details, please refer to the appendix.
\ No newline at end of file
+\subsection{Variance Minimization ETD Learning: VMETD}
+VMETD by the following update:
+% \begin{equation}
+%     \delta_{t}= R_{t+1}+\gamma \theta_t^{\top}\phi_{t+1}-\theta_t^{\top}\phi_t.
+% \end{equation}
+\begin{equation}
+\rho_{k} \leftarrow \frac{\pi(A_k | S_k)}{\mu(A_k | S_k)}
+\end{equation}
+\begin{equation}
+    \label{fvmetd}
+    F_k \leftarrow \gamma \rho_{k-1}F_{k-1}+1,
+\end{equation}
+\begin{equation}
+    \label{omegavmetd}
+    \omega_{k+1} \leftarrow \omega_k+\beta_k(F_k  \rho_k \delta_k - \omega_k),
+\end{equation}
+\begin{equation}
+    \label{thetavmetd}
+    \theta_{k+1}\leftarrow \theta_k+\alpha_k F_k \rho_k (R_{k+1}+\gamma \theta_k^{\top}\phi_{k+1}-\theta_k^{\top}\phi_k)\phi_k -\alpha_k \omega_{k+1}\phi_k,
+\end{equation}
+    
+
+where $\mu$ is behavior policy and $\pi$ is target policy, 
+$F_t$ is a scalar variable,
+$F_0=1$, $\omega$  is used to estimate $\mathbb{E}[\delta]$, i.e., $\omega \doteq \mathbb{E}[\delta]$, and 
+$\textbf{F}$ is a diagonal matrix with diagonal elements
+$f(s)\dot{=}d_{\mu}(s)\lim_{t\rightarrow \infty}\mathbb{E}_{\mu}[F_k|S_k=s]$,
+which we assume exists. 
+The vector $\textbf{f}\in \mathbb{R}^N$ with components 
+$[\textbf{f}]_s\dot{=}f(s)$ can be written as
+\begin{equation}
+\begin{split}
+\textbf{f}&=\textbf{d}_{\mu}+\gamma \textbf{P}_{\pi}^{\top}\textbf{d}_{\mu}+(\gamma \textbf{P}_{\pi}^{\top})^2\textbf{d}_{\mu}+\ldots\\
+&=(\textbf{I}-\gamma\textbf{P}_{\pi}^{\top})^{-1}\textbf{d}_{\mu}.
+\end{split}
+\end{equation}
+
+
--- a/main/theory.tex
+++ b/main/theory.tex
@@ -82,4 +82,31 @@ Please refer to the appendix \ref{proofcorollary4_2} for detailed proof process.
    Then the parameter vector $\theta_k$ converges with probability one 
    to $A^{-1}b$.
 \end{theorem}
-Please refer to the appendix \ref{proofth2} for detailed proof process.
\ No newline at end of file
+Please refer to the appendix \ref{proofth2} for detailed proof process.
+
+\begin{theorem}
+    \label{theorem3}(Convergence of VMETD).
+    In the case of off-policy learning, consider the iterations (\ref{omegavmetd}) and (\ref{thetavmetd}) with (\ref{delta})  of VMETD.
+     Let the step-size sequences $\alpha_k$ and $\beta_k$, $k\geq 0$ satisfy in this case $\alpha_k,\beta_k>0$, for all $k$,
+    $
+    \sum_{k=0}^{\infty}\alpha_k=\sum_{k=0}^{\infty}\beta_k=\infty,
+    $
+    $
+    \sum_{k=0}^{\infty}\alpha_k^2<\infty,
+    $
+    $
+    \sum_{k=0}^{\infty}\beta_k^2<\infty,
+    $
+    and  
+    $
+    \alpha_k = o(\beta_k).
+    $
+    Assume that $(\phi_k,r_k,\phi_k')$ is an i.i.d. sequence with
+    uniformly bounded second moments, where $\phi_k$ and $\phi'_{k}$ are sampled from the same Markov chain.
+    Let $A = \mathrm{Cov}(\phi,\phi-\gamma\phi')$,
+    $b=\mathrm{Cov}(r,\phi)$.
+    Assume that matrix $A$ is  non-singular. 
+    Then the parameter vector $\theta_k$ converges with probability one 
+    to $A^{-1}b$.
+\end{theorem}
+Please refer to the appendix \ref{proofVMETD} for detailed proof process.
\ No newline at end of file
--- a/neurips_2024.aux
+++ b/neurips_2024.aux
@@ -61,55 +61,46 @@
 \newlabel{deltaSarsa}{{8}{4}{Variance Minimization TD Learning: VMTD}{equation.3.8}{}}
 \newlabel{deltaQ}{{9}{4}{Variance Minimization TD Learning: VMTD}{equation.3.9}{}}
 \citation{dalal2020tale}
-\citation{dalal2020tale}
 \@writefile{toc}{\contentsline {subsection}{\numberline {3.3}Variance Minimization TDC Learning: VMTDC}{5}{subsection.3.3}\protected@file@percent }
 \newlabel{thetavmtdc}{{11}{5}{Variance Minimization TDC Learning: VMTDC}{equation.3.11}{}}
 \newlabel{uvmtdc}{{12}{5}{Variance Minimization TDC Learning: VMTDC}{equation.3.12}{}}
 \newlabel{omegavmtdc}{{13}{5}{Variance Minimization TDC Learning: VMTDC}{equation.3.13}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {3.4}Variance Minimization ETD Learning: VMETD}{5}{subsection.3.4}\protected@file@percent }
+\newlabel{fvmetd}{{18}{5}{Variance Minimization ETD Learning: VMETD}{equation.3.18}{}}
+\newlabel{omegavmetd}{{19}{5}{Variance Minimization ETD Learning: VMETD}{equation.3.19}{}}
+\newlabel{thetavmetd}{{20}{5}{Variance Minimization ETD Learning: VMETD}{equation.3.20}{}}
 \@writefile{toc}{\contentsline {section}{\numberline {4}Theoretical Analysis}{5}{section.4}\protected@file@percent }
 \newlabel{theorem1}{{4.1}{5}{}{theorem.4.1}{}}
-\newlabel{corollary4_2}{{4.2}{5}{}{theorem.4.2}{}}
+\citation{dalal2020tale}
 \citation{Sutton2018book}
 \citation{sutton2009fast}
 \citation{baird1995residual,sutton2009fast}
 \citation{baird1995residual,sutton2009fast,maei2011gradient}
-\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Random walk.}}{6}{figure.1}\protected@file@percent }
-\newlabel{randomwalk}{{1}{6}{Random walk}{figure.1}{}}
-\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces 7-state version of Baird's off-policy counterexample.}}{6}{figure.2}\protected@file@percent }
-\newlabel{bairdexample}{{2}{6}{7-state version of Baird's off-policy counterexample}{figure.2}{}}
+\newlabel{corollary4_2}{{4.2}{6}{}{theorem.4.2}{}}
 \newlabel{theorem2}{{4.3}{6}{}{theorem.4.3}{}}
+\newlabel{theorem3}{{4.4}{6}{}{theorem.4.4}{}}
 \@writefile{toc}{\contentsline {section}{\numberline {5}Experimental Studies}{6}{section.5}\protected@file@percent }
 \@writefile{toc}{\contentsline {subsection}{\numberline {5.1}Testing Tasks}{6}{subsection.5.1}\protected@file@percent }
+\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Random walk.}}{7}{figure.1}\protected@file@percent }
+\newlabel{randomwalk}{{1}{7}{Random walk}{figure.1}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces 7-state version of Baird's off-policy counterexample.}}{7}{figure.2}\protected@file@percent }
+\newlabel{bairdexample}{{2}{7}{7-state version of Baird's off-policy counterexample}{figure.2}{}}
 \@writefile{toc}{\contentsline {subsection}{\numberline {5.2}Experimental Results and Analysis}{7}{subsection.5.2}\protected@file@percent }
-\newlabel{DependentFull}{{3(a)}{7}{Subfigure 3(a)}{subfigure.3.1}{}}
-\newlabel{sub@DependentFull}{{(a)}{7}{Subfigure 3(a)\relax }{subfigure.3.1}{}}
-\newlabel{TabularFull}{{3(b)}{7}{Subfigure 3(b)}{subfigure.3.2}{}}
-\newlabel{sub@TabularFull}{{(b)}{7}{Subfigure 3(b)\relax }{subfigure.3.2}{}}
-\newlabel{InvertedFull}{{3(c)}{7}{Subfigure 3(c)}{subfigure.3.3}{}}
-\newlabel{sub@InvertedFull}{{(c)}{7}{Subfigure 3(c)\relax }{subfigure.3.3}{}}
-\newlabel{CounterExampleFull}{{3(d)}{7}{Subfigure 3(d)}{subfigure.3.4}{}}
-\newlabel{sub@CounterExampleFull}{{(d)}{7}{Subfigure 3(d)\relax }{subfigure.3.4}{}}
-\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Learning curses of four evaluation environments.}}{7}{figure.3}\protected@file@percent }
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Dependent}}}{7}{figure.3}\protected@file@percent }
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Tabular}}}{7}{figure.3}\protected@file@percent }
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(c)}{\ignorespaces {Inverted}}}{7}{figure.3}\protected@file@percent }
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(d)}{\ignorespaces {counterexample}}}{7}{figure.3}\protected@file@percent }
-\newlabel{Evaluation_full}{{3}{7}{Learning curses of four evaluation environments}{figure.3}{}}
 \citation{schwartz1993reinforcement}
-\newlabel{MazeFull}{{4(a)}{8}{Subfigure 4(a)}{subfigure.4.1}{}}
-\newlabel{sub@MazeFull}{{(a)}{8}{Subfigure 4(a)\relax }{subfigure.4.1}{}}
-\newlabel{CliffWalkingFull}{{4(b)}{8}{Subfigure 4(b)}{subfigure.4.2}{}}
-\newlabel{sub@CliffWalkingFull}{{(b)}{8}{Subfigure 4(b)\relax }{subfigure.4.2}{}}
-\newlabel{MountainCarFull}{{4(c)}{8}{Subfigure 4(c)}{subfigure.4.3}{}}
-\newlabel{sub@MountainCarFull}{{(c)}{8}{Subfigure 4(c)\relax }{subfigure.4.3}{}}
-\newlabel{AcrobotFull}{{4(d)}{8}{Subfigure 4(d)}{subfigure.4.4}{}}
-\newlabel{sub@AcrobotFull}{{(d)}{8}{Subfigure 4(d)\relax }{subfigure.4.4}{}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Learning curses of four contral environments.}}{8}{figure.4}\protected@file@percent }
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Maze}}}{8}{figure.4}\protected@file@percent }
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Cliff Walking}}}{8}{figure.4}\protected@file@percent }
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(c)}{\ignorespaces {Mountain Car}}}{8}{figure.4}\protected@file@percent }
-\@writefile{lof}{\contentsline {subfigure}{\numberline{(d)}{\ignorespaces {Acrobot}}}{8}{figure.4}\protected@file@percent }
-\newlabel{Complete_full}{{4}{8}{Learning curses of four contral environments}{figure.4}{}}
+\newlabel{DependentFull}{{3(a)}{8}{Subfigure 3(a)}{subfigure.3.1}{}}
+\newlabel{sub@DependentFull}{{(a)}{8}{Subfigure 3(a)\relax }{subfigure.3.1}{}}
+\newlabel{TabularFull}{{3(b)}{8}{Subfigure 3(b)}{subfigure.3.2}{}}
+\newlabel{sub@TabularFull}{{(b)}{8}{Subfigure 3(b)\relax }{subfigure.3.2}{}}
+\newlabel{InvertedFull}{{3(c)}{8}{Subfigure 3(c)}{subfigure.3.3}{}}
+\newlabel{sub@InvertedFull}{{(c)}{8}{Subfigure 3(c)\relax }{subfigure.3.3}{}}
+\newlabel{CounterExampleFull}{{3(d)}{8}{Subfigure 3(d)}{subfigure.3.4}{}}
+\newlabel{sub@CounterExampleFull}{{(d)}{8}{Subfigure 3(d)\relax }{subfigure.3.4}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Learning curses of four evaluation environments.}}{8}{figure.3}\protected@file@percent }
+\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Dependent}}}{8}{figure.3}\protected@file@percent }
+\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Tabular}}}{8}{figure.3}\protected@file@percent }
+\@writefile{lof}{\contentsline {subfigure}{\numberline{(c)}{\ignorespaces {Inverted}}}{8}{figure.3}\protected@file@percent }
+\@writefile{lof}{\contentsline {subfigure}{\numberline{(d)}{\ignorespaces {counterexample}}}{8}{figure.3}\protected@file@percent }
+\newlabel{Evaluation_full}{{3}{8}{Learning curses of four evaluation environments}{figure.3}{}}
 \@writefile{toc}{\contentsline {section}{\numberline {6}Related Work}{8}{section.6}\protected@file@percent }
 \@writefile{toc}{\contentsline {subsection}{\numberline {6.1}Difference between VMQ and R-learning}{8}{subsection.6.1}\protected@file@percent }
 \@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Difference between R-learning and tabular VMQ.}}{8}{table.2}\protected@file@percent }
@@ -120,72 +111,86 @@
 \citation{Sutton2018book}
 \citation{schulman2015trust}
 \citation{schulman2017proximal}
-\citation{borkar1997stochastic}
+\newlabel{MazeFull}{{4(a)}{9}{Subfigure 4(a)}{subfigure.4.1}{}}
+\newlabel{sub@MazeFull}{{(a)}{9}{Subfigure 4(a)\relax }{subfigure.4.1}{}}
+\newlabel{CliffWalkingFull}{{4(b)}{9}{Subfigure 4(b)}{subfigure.4.2}{}}
+\newlabel{sub@CliffWalkingFull}{{(b)}{9}{Subfigure 4(b)\relax }{subfigure.4.2}{}}
+\newlabel{MountainCarFull}{{4(c)}{9}{Subfigure 4(c)}{subfigure.4.3}{}}
+\newlabel{sub@MountainCarFull}{{(c)}{9}{Subfigure 4(c)\relax }{subfigure.4.3}{}}
+\newlabel{AcrobotFull}{{4(d)}{9}{Subfigure 4(d)}{subfigure.4.4}{}}
+\newlabel{sub@AcrobotFull}{{(d)}{9}{Subfigure 4(d)\relax }{subfigure.4.4}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Learning curses of four contral environments.}}{9}{figure.4}\protected@file@percent }
+\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Maze}}}{9}{figure.4}\protected@file@percent }
+\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Cliff Walking}}}{9}{figure.4}\protected@file@percent }
+\@writefile{lof}{\contentsline {subfigure}{\numberline{(c)}{\ignorespaces {Mountain Car}}}{9}{figure.4}\protected@file@percent }
+\@writefile{lof}{\contentsline {subfigure}{\numberline{(d)}{\ignorespaces {Acrobot}}}{9}{figure.4}\protected@file@percent }
+\newlabel{Complete_full}{{4}{9}{Learning curses of four contral environments}{figure.4}{}}
 \@writefile{toc}{\contentsline {subsection}{\numberline {6.2}Variance Reduction for TD Learning}{9}{subsection.6.2}\protected@file@percent }
 \@writefile{toc}{\contentsline {subsection}{\numberline {6.3}Variance Reduction for Policy Gradient Algorithms}{9}{subsection.6.3}\protected@file@percent }
-\@writefile{toc}{\contentsline {section}{\numberline {7}Conclusion and Future Work}{9}{section.7}\protected@file@percent }
-\@writefile{toc}{\contentsline {section}{\numberline {A}Relevant proofs}{9}{appendix.A}\protected@file@percent }
-\@writefile{toc}{\contentsline {subsection}{\numberline {A.1}Proof of Theorem \ref {theorem1}}{9}{subsection.A.1}\protected@file@percent }
-\newlabel{proofth1}{{A.1}{9}{Proof of Theorem \ref {theorem1}}{subsection.A.1}{}}
-\newlabel{th1proof}{{A.1}{9}{Proof of Theorem \ref {theorem1}}{subsection.A.1}{}}
+\citation{borkar1997stochastic}
 \citation{hirsch1989convergent}
 \citation{borkar2000ode}
 \citation{borkar2000ode}
 \citation{borkar2000ode}
-\newlabel{thetaFast}{{19}{10}{Proof of Theorem \ref {theorem1}}{equation.A.19}{}}
-\newlabel{omegaFast}{{20}{10}{Proof of Theorem \ref {theorem1}}{equation.A.20}{}}
-\newlabel{omegaFastFinal}{{21}{10}{Proof of Theorem \ref {theorem1}}{equation.A.21}{}}
-\newlabel{omegaInfty}{{22}{10}{Proof of Theorem \ref {theorem1}}{equation.A.22}{}}
-\newlabel{odetheta}{{23}{10}{Proof of Theorem \ref {theorem1}}{equation.A.23}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {7}Conclusion and Future Work}{10}{section.7}\protected@file@percent }
+\@writefile{toc}{\contentsline {section}{\numberline {A}Relevant proofs}{10}{appendix.A}\protected@file@percent }
+\@writefile{toc}{\contentsline {subsection}{\numberline {A.1}Proof of Theorem \ref {theorem1}}{10}{subsection.A.1}\protected@file@percent }
+\newlabel{proofth1}{{A.1}{10}{Proof of Theorem \ref {theorem1}}{subsection.A.1}{}}
+\newlabel{th1proof}{{A.1}{10}{Proof of Theorem \ref {theorem1}}{subsection.A.1}{}}
+\newlabel{thetaFast}{{24}{10}{Proof of Theorem \ref {theorem1}}{equation.A.24}{}}
+\newlabel{omegaFast}{{25}{10}{Proof of Theorem \ref {theorem1}}{equation.A.25}{}}
+\newlabel{omegaFastFinal}{{26}{10}{Proof of Theorem \ref {theorem1}}{equation.A.26}{}}
+\newlabel{omegaInfty}{{27}{10}{Proof of Theorem \ref {theorem1}}{equation.A.27}{}}
+\newlabel{odetheta}{{28}{11}{Proof of Theorem \ref {theorem1}}{equation.A.28}{}}
+\newlabel{covariance}{{29}{11}{Proof of Theorem \ref {theorem1}}{equation.A.29}{}}
+\newlabel{odethetafinal}{{30}{11}{Proof of Theorem \ref {theorem1}}{equation.A.30}{}}
 \citation{dalal2020tale}
 \citation{dalal2020tale}
-\newlabel{covariance}{{24}{11}{Proof of Theorem \ref {theorem1}}{equation.A.24}{}}
-\newlabel{odethetafinal}{{25}{11}{Proof of Theorem \ref {theorem1}}{equation.A.25}{}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {A.2}Proof of Corollary \ref {corollary4_2}}{11}{subsection.A.2}\protected@file@percent }
-\newlabel{proofcorollary4_2}{{A.2}{11}{Proof of Corollary \ref {corollary4_2}}{subsection.A.2}{}}
-\newlabel{matrixassumption}{{A.1}{11}{}{theorem.A.1}{}}
-\newlabel{stepsizeassumption}{{A.2}{11}{}{theorem.A.2}{}}
-\newlabel{sparseprojection}{{A.3}{11}{}{theorem.A.3}{}}
 \citation{dalal2020tale}
 \citation{dalal2020tale}
 \citation{sutton2009fast}
+\@writefile{toc}{\contentsline {subsection}{\numberline {A.2}Proof of Corollary \ref {corollary4_2}}{12}{subsection.A.2}\protected@file@percent }
+\newlabel{proofcorollary4_2}{{A.2}{12}{Proof of Corollary \ref {corollary4_2}}{subsection.A.2}{}}
+\newlabel{matrixassumption}{{A.1}{12}{}{theorem.A.1}{}}
+\newlabel{stepsizeassumption}{{A.2}{12}{}{theorem.A.2}{}}
+\newlabel{sparseprojection}{{A.3}{12}{}{theorem.A.3}{}}
+\newlabel{sparseprojectiontheta}{{35}{12}{}{equation.A.35}{}}
+\newlabel{sparseprojectionomega}{{36}{12}{}{equation.A.36}{}}
 \citation{hirsch1989convergent}
-\newlabel{sparseprojectiontheta}{{30}{12}{}{equation.A.30}{}}
-\newlabel{sparseprojectionomega}{{31}{12}{}{equation.A.31}{}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {A.3}Proof of Theorem \ref {theorem2}}{12}{subsection.A.3}\protected@file@percent }
-\newlabel{proofth2}{{A.3}{12}{Proof of Theorem \ref {theorem2}}{subsection.A.3}{}}
-\newlabel{thetavmtdcFastest}{{32}{12}{Proof of Theorem \ref {theorem2}}{equation.A.32}{}}
-\newlabel{uvmtdcFastest}{{33}{12}{Proof of Theorem \ref {theorem2}}{equation.A.33}{}}
-\newlabel{omegavmtdcFastest}{{34}{12}{Proof of Theorem \ref {theorem2}}{equation.A.34}{}}
 \citation{borkar2000ode}
 \citation{borkar2000ode}
 \citation{borkar2000ode}
+\@writefile{toc}{\contentsline {subsection}{\numberline {A.3}Proof of Theorem \ref {theorem2}}{13}{subsection.A.3}\protected@file@percent }
+\newlabel{proofth2}{{A.3}{13}{Proof of Theorem \ref {theorem2}}{subsection.A.3}{}}
+\newlabel{thetavmtdcFastest}{{37}{13}{Proof of Theorem \ref {theorem2}}{equation.A.37}{}}
+\newlabel{uvmtdcFastest}{{38}{13}{Proof of Theorem \ref {theorem2}}{equation.A.38}{}}
+\newlabel{omegavmtdcFastest}{{39}{13}{Proof of Theorem \ref {theorem2}}{equation.A.39}{}}
+\newlabel{omegavmtdcFastestFinal}{{40}{13}{Proof of Theorem \ref {theorem2}}{equation.A.40}{}}
+\newlabel{omegavmtdcInfty}{{41}{13}{Proof of Theorem \ref {theorem2}}{equation.A.41}{}}
 \citation{hirsch1989convergent}
 \citation{borkar2000ode}
 \citation{borkar2000ode}
 \citation{borkar2000ode}
-\newlabel{omegavmtdcFastestFinal}{{35}{13}{Proof of Theorem \ref {theorem2}}{equation.A.35}{}}
-\newlabel{omegavmtdcInfty}{{36}{13}{Proof of Theorem \ref {theorem2}}{equation.A.36}{}}
-\newlabel{thetavmtdcFaster}{{37}{13}{Proof of Theorem \ref {theorem2}}{equation.A.37}{}}
-\newlabel{uvmtdcFaster}{{38}{13}{Proof of Theorem \ref {theorem2}}{equation.A.38}{}}
-\newlabel{uvmtdcFasterFinal}{{39}{13}{Proof of Theorem \ref {theorem2}}{equation.A.39}{}}
-\newlabel{uvmtdcInfty}{{40}{13}{Proof of Theorem \ref {theorem2}}{equation.A.40}{}}
-\newlabel{thetavmtdcSlowerFinal}{{42}{14}{Proof of Theorem \ref {theorem2}}{equation.A.42}{}}
-\newlabel{odethetavmtdcfinal}{{43}{14}{Proof of Theorem \ref {theorem2}}{equation.A.43}{}}
-\@writefile{toc}{\contentsline {section}{\numberline {B}Experimental details}{14}{appendix.B}\protected@file@percent }
-\newlabel{experimentaldetails}{{B}{14}{Experimental details}{appendix.B}{}}
+\newlabel{thetavmtdcFaster}{{42}{14}{Proof of Theorem \ref {theorem2}}{equation.A.42}{}}
+\newlabel{uvmtdcFaster}{{43}{14}{Proof of Theorem \ref {theorem2}}{equation.A.43}{}}
+\newlabel{uvmtdcFasterFinal}{{44}{14}{Proof of Theorem \ref {theorem2}}{equation.A.44}{}}
+\newlabel{uvmtdcInfty}{{45}{14}{Proof of Theorem \ref {theorem2}}{equation.A.45}{}}
+\newlabel{thetavmtdcSlowerFinal}{{47}{14}{Proof of Theorem \ref {theorem2}}{equation.A.47}{}}
 \@writefile{loa}{\contentsline {algorithm}{\numberline {2}{\ignorespaces VMTDC algorithm with linear function approximation in the off-policy setting}}{15}{algorithm.2}\protected@file@percent }
 \newlabel{alg:algorithm 2}{{2}{15}{Proof of Theorem \ref {theorem2}}{algorithm.2}{}}
-\@writefile{loa}{\contentsline {algorithm}{\numberline {3}{\ignorespaces VMGTD algorithm with linear function approximation in the off-policy setting}}{15}{algorithm.3}\protected@file@percent }
-\newlabel{alg:algorithm 3}{{3}{15}{Proof of Theorem \ref {theorem2}}{algorithm.3}{}}
+\@writefile{loa}{\contentsline {algorithm}{\numberline {3}{\ignorespaces VMETD algorithm with linear function approximation in the off-policy setting}}{15}{algorithm.3}\protected@file@percent }
+\newlabel{alg:algorithm 5}{{3}{15}{Proof of Theorem \ref {theorem2}}{algorithm.3}{}}
+\newlabel{odethetavmtdcfinal}{{48}{15}{Proof of Theorem \ref {theorem2}}{equation.A.48}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {A.4}Proof of VMETD convergence}{16}{subsection.A.4}\protected@file@percent }
+\newlabel{proofVMETD}{{A.4}{16}{Proof of VMETD convergence}{subsection.A.4}{}}
+\newlabel{rowsum}{{51}{16}{Proof of VMETD convergence}{equation.A.51}{}}
+\newlabel{columnsum}{{52}{16}{Proof of VMETD convergence}{equation.A.52}{}}
 \bibstyle{named}
 \bibdata{neurips_2024}
 \bibcite{baird1995residual}{{1}{1995}{{Baird and others}}{{}}}
 \bibcite{basserrano2021logistic}{{2}{2021}{{Bas-Serrano \bgroup \em  et al.\egroup }}{{}}}
-\@writefile{loa}{\contentsline {algorithm}{\numberline {4}{\ignorespaces VMGTD2 algorithm with linear function approximation in the off-policy setting}}{16}{algorithm.4}\protected@file@percent }
-\newlabel{alg:algorithm 4}{{4}{16}{Proof of Theorem \ref {theorem2}}{algorithm.4}{}}
-\@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces Learning rates ($lr$) of four control experiments.}}{16}{table.3}\protected@file@percent }
-\newlabel{lrofways}{{3}{16}{Learning rates ($lr$) of four control experiments}{table.3}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {B}Experimental details}{17}{appendix.B}\protected@file@percent }
+\newlabel{experimentaldetails}{{B}{17}{Experimental details}{appendix.B}{}}
 \bibcite{borkar2000ode}{{3}{2000}{{Borkar and Meyn}}{{}}}
 \bibcite{borkar1997stochastic}{{4}{1997}{{Borkar}}{{}}}
 \bibcite{chen2023modified}{{5}{2023}{{Chen \bgroup \em  et al.\egroup }}{{}}}
@@ -202,6 +207,8 @@
 \bibcite{liu2016proximal}{{16}{2016}{{Liu \bgroup \em  et al.\egroup }}{{}}}
 \bibcite{liu2018proximal}{{17}{2018}{{Liu \bgroup \em  et al.\egroup }}{{}}}
 \bibcite{maei2011gradient}{{18}{2011}{{Maei}}{{}}}
+\@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces Learning rates ($lr$) of four control experiments.}}{18}{table.3}\protected@file@percent }
+\newlabel{lrofways}{{3}{18}{Learning rates ($lr$) of four control experiments}{table.3}{}}
 \bibcite{ng1999policy}{{19}{1999}{{Ng \bgroup \em  et al.\egroup }}{{}}}
 \bibcite{pan2017accelerated}{{20}{2017}{{Pan \bgroup \em  et al.\egroup }}{{}}}
 \bibcite{schulman2015trust}{{21}{2015}{{Schulman \bgroup \em  et al.\egroup }}{{}}}
@@ -216,4 +223,4 @@
 \bibcite{xu2019reanalysis}{{30}{2019}{{Xu \bgroup \em  et al.\egroup }}{{}}}
 \bibcite{xu2020reanalysis}{{31}{2020}{{Xu \bgroup \em  et al.\egroup }}{{}}}
 \bibcite{zhang2022truncated}{{32}{2022}{{Zhang and Whiteson}}{{}}}
-\gdef \@abspage@last{18}
+\gdef \@abspage@last{19}
--- a/neurips_2024.log
+++ b/neurips_2024.log
-This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) (preloaded format=pdflatex 2023.3.31)  19 MAY 2024 17:37
+This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) (preloaded format=pdflatex 2023.3.31)  20 JUN 2024 01:22
 entering extended mode
 restricted \write18 enabled.
 file:line:error style messages enabled.
@@ -633,53 +633,71 @@ File: pgfmodulematrix.code.tex 2023-01-15 v3.1.10 (3.1.10)
 \tikz@expandcount=\count328
 (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/frontendlayer/tikz/libraries/tikzlibrarytopaths.code.tex
 File: tikzlibrarytopaths.code.tex 2023-01-15 v3.1.10 (3.1.10)
-)))
-\c@theorem=\count329
+))) (d:/software/texlive/2023/texmf-dist/tex/latex/tools/bm.sty
+Package: bm 2022/01/05 v1.2f Bold Symbol Support (DPC/FMi)
+\symboldoperators=\mathgroup6
+\symboldletters=\mathgroup7
+\symboldsymbols=\mathgroup8
+Package bm Info: No bold for \OMX/cmex/m/n, using \pmb.
+Package bm Info: No bold for \U/msa/m/n, using \pmb.
+Package bm Info: No bold for \U/msb/m/n, using \pmb.
+LaTeX Font Info:    Redeclaring math alphabet \mathbf on input line 149.
+) (d:/software/texlive/2023/texmf-dist/tex/latex/esvect/esvect.sty
+Package: esvect 
+\symesvector=\mathgroup9
+) (d:/software/texlive/2023/texmf-dist/tex/latex/multirow/multirow.sty
+Package: multirow 2021/03/15 v2.8 Span multiple rows of a table
+\multirow@colwidth=\skip77
+\multirow@cntb=\count329
+\multirow@dima=\skip78
+\bigstrutjot=\dimen319
+)
+\c@theorem=\count330
 (d:/software/texlive/2023/texmf-dist/tex/latex/algorithms/algorithm.sty
 Package: algorithm 2009/08/24 v0.1 Document Style `algorithm' - floating environment
 (d:/software/texlive/2023/texmf-dist/tex/latex/float/float.sty
 Package: float 2001/11/08 v1.3d Float enhancements (AL)
-\c@float@type=\count330
+\c@float@type=\count331
 \float@exts=\toks51
 \float@box=\box73
 \@float@everytoks=\toks52
 \@floatcapt=\box74
 )
 \@float@every@algorithm=\toks53
-\c@algorithm=\count331
+\c@algorithm=\count332
 ) (d:/software/texlive/2023/texmf-dist/tex/latex/algorithms/algorithmic.sty
 Package: algorithmic 2009/08/24 v0.1 Document Style `algorithmic'
-\c@ALC@unique=\count332
-\c@ALC@line=\count333
-\c@ALC@rem=\count334
-\c@ALC@depth=\count335
-\ALC@tlm=\skip77
-\algorithmicindent=\skip78
+\c@ALC@unique=\count333
+\c@ALC@line=\count334
+\c@ALC@rem=\count335
+\c@ALC@depth=\count336
+\ALC@tlm=\skip79
+\algorithmicindent=\skip80
 ) (d:/software/texlive/2023/texmf-dist/tex/latex/l3backend/l3backend-pdftex.def
 File: l3backend-pdftex.def 2023-01-16 L3 backend support: PDF output (pdfTeX)
-\l__color_backend_stack_int=\count336
+\l__color_backend_stack_int=\count337
 \l__pdf_internal_box=\box75
 ) (./neurips_2024.aux)
 \openout1 = `neurips_2024.aux'.

-LaTeX Font Info:    Checking defaults for OML/cmm/m/it on input line 106.
-LaTeX Font Info:    ... okay on input line 106.
-LaTeX Font Info:    Checking defaults for OMS/cmsy/m/n on input line 106.
-LaTeX Font Info:    ... okay on input line 106.
-LaTeX Font Info:    Checking defaults for OT1/cmr/m/n on input line 106.
-LaTeX Font Info:    ... okay on input line 106.
-LaTeX Font Info:    Checking defaults for T1/cmr/m/n on input line 106.
-LaTeX Font Info:    ... okay on input line 106.
-LaTeX Font Info:    Checking defaults for TS1/cmr/m/n on input line 106.
-LaTeX Font Info:    ... okay on input line 106.
-LaTeX Font Info:    Checking defaults for OMX/cmex/m/n on input line 106.
-LaTeX Font Info:    ... okay on input line 106.
-LaTeX Font Info:    Checking defaults for U/cmr/m/n on input line 106.
-LaTeX Font Info:    ... okay on input line 106.
-LaTeX Font Info:    Checking defaults for PD1/pdf/m/n on input line 106.
-LaTeX Font Info:    ... okay on input line 106.
-LaTeX Font Info:    Checking defaults for PU/pdf/m/n on input line 106.
-LaTeX Font Info:    ... okay on input line 106.
+LaTeX Font Info:    Checking defaults for OML/cmm/m/it on input line 109.
+LaTeX Font Info:    ... okay on input line 109.
+LaTeX Font Info:    Checking defaults for OMS/cmsy/m/n on input line 109.
+LaTeX Font Info:    ... okay on input line 109.
+LaTeX Font Info:    Checking defaults for OT1/cmr/m/n on input line 109.
+LaTeX Font Info:    ... okay on input line 109.
+LaTeX Font Info:    Checking defaults for T1/cmr/m/n on input line 109.
+LaTeX Font Info:    ... okay on input line 109.
+LaTeX Font Info:    Checking defaults for TS1/cmr/m/n on input line 109.
+LaTeX Font Info:    ... okay on input line 109.
+LaTeX Font Info:    Checking defaults for OMX/cmex/m/n on input line 109.
+LaTeX Font Info:    ... okay on input line 109.
+LaTeX Font Info:    Checking defaults for U/cmr/m/n on input line 109.
+LaTeX Font Info:    ... okay on input line 109.
+LaTeX Font Info:    Checking defaults for PD1/pdf/m/n on input line 109.
+LaTeX Font Info:    ... okay on input line 109.
+LaTeX Font Info:    Checking defaults for PU/pdf/m/n on input line 109.
+LaTeX Font Info:    ... okay on input line 109.

 *geometry* driver: auto-detecting
 *geometry* detected driver: pdftex
@@ -747,24 +765,24 @@ LaTeX Font Info:    ... okay on input line 106.
 * \@reversemarginfalse
 * (1in=72.27pt=25.4mm, 1cm=28.453pt)

-Package hyperref Info: Link coloring OFF on input line 106.
+Package hyperref Info: Link coloring OFF on input line 109.
 (./neurips_2024.out) (./neurips_2024.out)
 \@outlinefile=\write4
 \openout4 = `neurips_2024.out'.

-LaTeX Info: Redefining \microtypecontext on input line 106.
-Package microtype Info: Applying patch `item' on input line 106.
-Package microtype Info: Applying patch `toc' on input line 106.
-Package microtype Info: Applying patch `eqnum' on input line 106.
-Package microtype Info: Applying patch `footnote' on input line 106.
-Package microtype Info: Applying patch `verbatim' on input line 106.
+LaTeX Info: Redefining \microtypecontext on input line 109.
+Package microtype Info: Applying patch `item' on input line 109.
+Package microtype Info: Applying patch `toc' on input line 109.
+Package microtype Info: Applying patch `eqnum' on input line 109.
+Package microtype Info: Applying patch `footnote' on input line 109.
+Package microtype Info: Applying patch `verbatim' on input line 109.
 Package microtype Info: Generating PDF output.
 Package microtype Info: Character protrusion enabled (level 2).
 Package microtype Info: Using default protrusion set `alltext'.
 Package microtype Info: Automatic font expansion enabled (level 2),
 (microtype)             stretch: 20, shrink: 20, step: 1, non-selected.
 Package microtype Info: Using default expansion set `alltext-nott'.
-LaTeX Info: Redefining \showhyphens on input line 106.
+LaTeX Info: Redefining \showhyphens on input line 109.
 Package microtype Info: No adjustment of tracking.
 Package microtype Info: No adjustment of interword spacing.
 Package microtype Info: No adjustment of character kerning.
@@ -772,16 +790,16 @@ Package microtype Info: No adjustment of character kerning.
 File: mt-ptm.cfg 2006/04/20 v1.7 microtype config. file: Times (RS)
 ) (d:/software/texlive/2023/texmf-dist/tex/context/base/mkii/supp-pdf.mkii
 [Loading MPS to PDF converter (version 2006.09.02).]
-\scratchcounter=\count337
-\scratchdimen=\dimen319
+\scratchcounter=\count338
+\scratchdimen=\dimen320
 \scratchbox=\box76
-\nofMPsegments=\count338
-\nofMParguments=\count339
+\nofMPsegments=\count339
+\nofMParguments=\count340
 \everyMPshowfont=\toks54
-\MPscratchCnt=\count340
-\MPscratchDim=\dimen320
-\MPnumerator=\count341
-\makeMPintoPDFobject=\count342
+\MPscratchCnt=\count341
+\MPscratchDim=\dimen321
+\MPnumerator=\count342
+\makeMPintoPDFobject=\count343
 \everyMPtoPDFconversion=\toks55
 ) (d:/software/texlive/2023/texmf-dist/tex/latex/epstopdf-pkg/epstopdf-base.sty
 Package: epstopdf-base 2020-01-24 v2.11 Base part for package epstopdf
@@ -791,19 +809,23 @@ File: epstopdf-sys.cfg 2010/07/13 v1.3 Configuration of (r)epstopdf for TeX Live
 )) (d:/software/texlive/2023/texmf-dist/tex/latex/microtype/mt-cmr.cfg
 File: mt-cmr.cfg 2013/05/19 v2.2 microtype config. file: Computer Modern Roman (RS)
 )
-LaTeX Font Info:    Trying to load font information for U+msa on input line 110.
+LaTeX Font Info:    Trying to load font information for U+msa on input line 113.
 (d:/software/texlive/2023/texmf-dist/tex/latex/amsfonts/umsa.fd
 File: umsa.fd 2013/01/14 v3.01 AMS symbols A
 ) (d:/software/texlive/2023/texmf-dist/tex/latex/microtype/mt-msa.cfg
 File: mt-msa.cfg 2006/02/04 v1.1 microtype config. file: AMS symbols (a) (RS)
 )
-LaTeX Font Info:    Trying to load font information for U+msb on input line 110.
+LaTeX Font Info:    Trying to load font information for U+msb on input line 113.
 (d:/software/texlive/2023/texmf-dist/tex/latex/amsfonts/umsb.fd
 File: umsb.fd 2013/01/14 v3.01 AMS symbols B
 ) (d:/software/texlive/2023/texmf-dist/tex/latex/microtype/mt-msb.cfg
 File: mt-msb.cfg 2005/06/01 v1.0 microtype config. file: AMS symbols (b) (RS)
 )
-LaTeX Font Info:    Trying to load font information for T1+cmtt on input line 110.
+LaTeX Font Info:    Trying to load font information for U+esvect on input line 113.
+ (d:/software/texlive/2023/texmf-dist/tex/latex/esvect/uesvect.fd
+File: uesvect.fd 
+)
+LaTeX Font Info:    Trying to load font information for T1+cmtt on input line 113.
 (d:/software/texlive/2023/texmf-dist/tex/latex/base/t1cmtt.fd
 File: t1cmtt.fd 2022/07/10 v2.5l Standard LaTeX font definitions
 )
@@ -811,7 +833,7 @@ Package microtype Info: Loading generic protrusion settings for font family
 (microtype)             `cmtt' (encoding: T1).
 (microtype)             For optimal results, create family-specific settings.
 (microtype)             See the microtype manual for details.
-LaTeX Font Info:    Trying to load font information for T1+phv on input line 126.
+LaTeX Font Info:    Trying to load font information for T1+phv on input line 129.
 (d:/software/texlive/2023/texmf-dist/tex/latex/psnfss/t1phv.fd
 File: t1phv.fd 2020/03/25 scalable font definitions for T1/phv.
 )
@@ -829,82 +851,68 @@ pdfTeX warning (ext4): destination with the same identifier (name{table.1}) has 
 l.77 \end{equation*}
                    ]
 Package hyperref Info: bookmark level for unknown algorithm defaults to 0 on input line 138.
- [4]) (./main/theory.tex [5]) (./main/experiment.tex (./main/pic/randomwalk.tex) (./main/pic/BairdExample.tex) [6
-pdfTeX warning (ext4): destination with the same identifier (name{figure.1}) has been already used, duplicate ignored
-<argument> ...shipout:D \box_use:N \l_shipout_box 
-                                                  \__shipout_drop_firstpage_...
-l.46 
-     
-pdfTeX warning (ext4): destination with the same identifier (name{figure.2}) has been already used, duplicate ignored
-<argument> ...shipout:D \box_use:N \l_shipout_box 
-                                                  \__shipout_drop_firstpage_...
-l.46 
-     ]
-<main/pic/maze_13_13.pdf, id=300, 493.1646pt x 387.62602pt>
+ [4]) (./main/theory.tex [5]) (./main/experiment.tex (./main/pic/randomwalk.tex) (./main/pic/BairdExample.tex) [6]
+<main/pic/maze_13_13.pdf, id=318, 493.1646pt x 387.62602pt>
 File: main/pic/maze_13_13.pdf Graphic file (type pdf)
 <use main/pic/maze_13_13.pdf>
 Package pdftex.def Info: main/pic/maze_13_13.pdf  used on input line 53.
 (pdftex.def)             Requested size: 73.9715pt x 58.14139pt.
-<main/pic/dependent_new.pdf, id=302, 557.01889pt x 394.59978pt>
+<main/pic/dependent_new.pdf, id=320, 557.01889pt x 394.59978pt>
 File: main/pic/dependent_new.pdf Graphic file (type pdf)
 <use main/pic/dependent_new.pdf>
 Package pdftex.def Info: main/pic/dependent_new.pdf  used on input line 78.
 (pdftex.def)             Requested size: 119.24675pt x 79.49658pt.
-<main/pic/tabular_new.pdf, id=303, 566.51224pt x 401.1703pt>
+<main/pic/tabular_new.pdf, id=321, 566.51224pt x 401.1703pt>
 File: main/pic/tabular_new.pdf Graphic file (type pdf)
 <use main/pic/tabular_new.pdf>
 Package pdftex.def Info: main/pic/tabular_new.pdf  used on input line 82.
 (pdftex.def)             Requested size: 119.23904pt x 79.49194pt.
-<main/pic/inverted_new.pdf, id=304, 565.61766pt x 402.45422pt>
+<main/pic/inverted_new.pdf, id=322, 565.61766pt x 402.45422pt>
 File: main/pic/inverted_new.pdf Graphic file (type pdf)
 <use main/pic/inverted_new.pdf>
 Package pdftex.def Info: main/pic/inverted_new.pdf  used on input line 87.
 (pdftex.def)             Requested size: 119.24063pt x 79.49458pt.
-<main/pic/counterexample_quanju_new.pdf, id=305, 471.30164pt x 401.08943pt>
+<main/pic/counterexample_quanju_new.pdf, id=323, 471.30164pt x 401.08943pt>
 File: main/pic/counterexample_quanju_new.pdf Graphic file (type pdf)
 <use main/pic/counterexample_quanju_new.pdf>
 Package pdftex.def Info: main/pic/counterexample_quanju_new.pdf  used on input line 91.
 (pdftex.def)             Requested size: 119.24184pt x 79.49428pt.
+
+Underfull \vbox (badness 3907) has occurred while \output is active []
+
 [7
-pdfTeX warning (ext4): destination with the same identifier (name{figure.3}) has been already used, duplicate ignored
+pdfTeX warning (ext4): destination with the same identifier (name{figure.1}) has been already used, duplicate ignored
 <argument> ...shipout:D \box_use:N \l_shipout_box 
                                                  \__shipout_drop_firstpage_...
 l.131 
-       <./main/pic/maze_13_13.pdf> <./main/pic/dependent_new.pdf
-
-pdfTeX warning: pdflatex.exe (file ./main/pic/dependent_new.pdf): PDF inclusion: multiple pdfs with page group included in a single page
-> <./main/pic/tabular_new.pdf
-
-pdfTeX warning: pdflatex.exe (file ./main/pic/tabular_new.pdf): PDF inclusion: multiple pdfs with page group included in a single page
-> <./main/pic/inverted_new.pdf
-
-pdfTeX warning: pdflatex.exe (file ./main/pic/inverted_new.pdf): PDF inclusion: multiple pdfs with page group included in a single page
-> <./main/pic/counterexample_quanju_new.pdf
-
-pdfTeX warning: pdflatex.exe (file ./main/pic/counterexample_quanju_new.pdf): PDF inclusion: multiple pdfs with page group included in a single page
->]) (./main/relatedwork.tex
-<main/pic/maze_complete.pdf, id=426, 595.42892pt x 465.38112pt>
+      
+pdfTeX warning (ext4): destination with the same identifier (name{figure.2}) has been already used, duplicate ignored
+<argument> ...shipout:D \box_use:N \l_shipout_box 
+                                                  \__shipout_drop_firstpage_...
+l.131 
+       <./main/pic/maze_13_13.pdf>]) (./main/relatedwork.tex
+<main/pic/maze_complete.pdf, id=340, 595.42892pt x 465.38112pt>
 File: main/pic/maze_complete.pdf Graphic file (type pdf)
 <use main/pic/maze_complete.pdf>
 Package pdftex.def Info: main/pic/maze_complete.pdf  used on input line 7.
 (pdftex.def)             Requested size: 119.24721pt x 79.4901pt.
-<main/pic/cw_complete.pdf, id=427, 570.46333pt x 465.10928pt>
+<main/pic/cw_complete.pdf, id=341, 570.46333pt x 465.10928pt>
 File: main/pic/cw_complete.pdf Graphic file (type pdf)
 <use main/pic/cw_complete.pdf>
 Package pdftex.def Info: main/pic/cw_complete.pdf  used on input line 11.
 (pdftex.def)             Requested size: 119.24373pt x 79.49335pt.
-<main/pic/mt_complete.pdf, id=428, 569.92673pt x 468.75475pt>
+<main/pic/mt_complete.pdf, id=342, 569.92673pt x 468.75475pt>
 File: main/pic/mt_complete.pdf Graphic file (type pdf)
 <use main/pic/mt_complete.pdf>
 Package pdftex.def Info: main/pic/mt_complete.pdf  used on input line 16.
 (pdftex.def)             Requested size: 119.24463pt x 79.49413pt.
-<main/pic/Acrobot_complete.pdf, id=429, 564.99583pt x 478.09494pt>
+<main/pic/Acrobot_complete.pdf, id=343, 564.99583pt x 478.09494pt>
 File: main/pic/Acrobot_complete.pdf Graphic file (type pdf)
 <use main/pic/Acrobot_complete.pdf>
 Package pdftex.def Info: main/pic/Acrobot_complete.pdf  used on input line 20.
 (pdftex.def)             Requested size: 119.23886pt x 79.49504pt.
 [8
-pdfTeX warning (ext4): destination with the same identifier (name{figure.4}) has been already used, duplicate ignored
+pdfTeX warning (ext4): destination with the same identifier (name{figure.3}) has been already used, duplicate ignored
 <argument> ...shipout:D \box_use:N \l_shipout_box 
                                                  \__shipout_drop_firstpage_...
 l.57 
@@ -913,7 +921,24 @@ pdfTeX warning (ext4): destination with the same identifier (name{table.2}) has 
 <argument> ...shipout:D \box_use:N \l_shipout_box 
                                                  \__shipout_drop_firstpage_...
 l.57 
-      <./main/pic/maze_complete.pdf> <./main/pic/cw_complete.pdf
+      <./main/pic/dependent_new.pdf> <./main/pic/tabular_new.pdf
+
+pdfTeX warning: pdflatex.exe (file ./main/pic/tabular_new.pdf): PDF inclusion: multiple pdfs with page group included in a single page
+> <./main/pic/inverted_new.pdf
+
+pdfTeX warning: pdflatex.exe (file ./main/pic/inverted_new.pdf): PDF inclusion: multiple pdfs with page group included in a single page
+> <./main/pic/counterexample_quanju_new.pdf
+
+pdfTeX warning: pdflatex.exe (file ./main/pic/counterexample_quanju_new.pdf): PDF inclusion: multiple pdfs with page group included in a single page
+>]) (./main/conclusion.tex)
+Underfull \vbox (badness 10000) has occurred while \output is active []
+
+ [9
+pdfTeX warning (ext4): destination with the same identifier (name{figure.4}) has been already used, duplicate ignored
+<argument> ...shipout:D \box_use:N \l_shipout_box 
+                                                  \__shipout_drop_firstpage_...
+l.138 
+       <./main/pic/maze_complete.pdf> <./main/pic/cw_complete.pdf

 pdfTeX warning: pdflatex.exe (file ./main/pic/cw_complete.pdf): PDF inclusion: multiple pdfs with page group included in a single page
 > <./main/pic/mt_complete.pdf
@@ -922,45 +947,58 @@ pdfTeX warning: pdflatex.exe (file ./main/pic/mt_complete.pdf): PDF inclusion: m
 > <./main/pic/Acrobot_complete.pdf

 pdfTeX warning: pdflatex.exe (file ./main/pic/Acrobot_complete.pdf): PDF inclusion: multiple pdfs with page group included in a single page
->]) (./main/conclusion.tex) (./main/appendix.tex [9] [10]
+>] (./main/appendix.tex [10] [11]

 LaTeX Warning: Command \textemdash invalid in math mode on input line 229.


 LaTeX Warning: Command \textemdash invalid in math mode on input line 229.

-[11] [12] [13]
-Underfull \hbox (badness 1946) in paragraph at lines 683--696
+[12] [13] [14]
+Overfull \hbox (68.70882pt too wide) detected at line 614
+[]
+ []
+
+
+Underfull \vbox (badness 3343) has occurred while \output is active []
+
+ [15]
+Overfull \hbox (47.39436pt too wide) detected at line 627
+[]
+ []
+
+[16]
+Underfull \hbox (badness 1946) in paragraph at lines 788--801
 []\T1/ptm/m/n/10 (+20) Three ran-dom walk ex-per-i-ments: the $\OML/cmm/m/it/10 $ \T1/ptm/m/n/10 (+20) val-ues for all al-go-rithms are in the range of
 []

-[14] [15]
-Overfull \hbox (33.58313pt too wide) in paragraph at lines 738--752
+
+Overfull \hbox (33.58313pt too wide) in paragraph at lines 843--857
 [][] 
 []

-) (./neurips_2024.bbl [16
+) (./neurips_2024.bbl [17] [18
 pdfTeX warning (ext4): destination with the same identifier (name{table.3}) has been already used, duplicate ignored
 <argument> ...shipout:D \box_use:N \l_shipout_box 
                                                  \__shipout_drop_firstpage_...
-l.12 
-     ] [17]) [18] (./neurips_2024.aux)
+l.92 
+     ]) [19] (./neurips_2024.aux)
 Package rerunfilecheck Info: File `neurips_2024.out' has not changed.
-(rerunfilecheck)             Checksum: E5788AEC1D4F936207967A17A6B3E0A1;3587.
+(rerunfilecheck)             Checksum: FAC1A00F891A3C2C3EDDFFA999CD212C;4060.
 ) 
 Here is how much of TeX's memory you used:
- 26626 strings out of 476025
- 484842 string characters out of 5789524
- 1897382 words of memory out of 5000000
- 46086 multiletter control sequences out of 15000+600000
- 567455 words of font info for 255 fonts, out of 8000000 for 9000
+ 26929 strings out of 476025
+ 489644 string characters out of 5789524
+ 1891382 words of memory out of 5000000
+ 46337 multiletter control sequences out of 15000+600000
+ 577716 words of font info for 288 fonts, out of 8000000 for 9000
 1141 hyphenation exceptions out of 8191
- 84i,16n,80p,1005b,1065s stack positions out of 10000i,1000n,20000p,200000b,200000s
-<d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmex10.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi10.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi5.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi6.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi7.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi9.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr10.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr5.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr6.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr7.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr9.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy10.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy5.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy6.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy7.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/symbols/msbm10.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/cm-super/sftt1000.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/urw/helvetic/uhvr8a.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/urw/times/utmb8a.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/urw/times/utmr8a.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/urw/times/utmri8a.pfb>
-Output written on neurips_2024.pdf (18 pages, 2290177 bytes).
+ 84i,22n,80p,1005b,1065s stack positions out of 10000i,1000n,20000p,200000b,200000s
+<d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmbx10.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmex10.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi10.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi5.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi6.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi7.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi9.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr10.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr5.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr6.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr7.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr9.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy10.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy5.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy6.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy7.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/symbols/msbm10.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/cm-super/sftt1000.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/urw/helvetic/uhvr8a.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/urw/times/utmb8a.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/urw/times/utmr8a.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/urw/times/utmri8a.pfb>
+Output written on neurips_2024.pdf (19 pages, 2305737 bytes).
 PDF statistics:
- 1011 PDF objects out of 1200 (max. 8388607)
- 839 compressed objects within 9 object streams
- 195 named destinations out of 1000 (max. 500000)
- 52442 words of extra memory for PDF output out of 61914 (max. 10000000)
+ 1031 PDF objects out of 1200 (max. 8388607)
+ 856 compressed objects within 9 object streams
+ 193 named destinations out of 1000 (max. 500000)
+ 61162 words of extra memory for PDF output out of 61914 (max. 10000000)

--- a/neurips_2024.out
+++ b/neurips_2024.out
@@ -4,17 +4,19 @@
 \BOOKMARK [2][-]{subsection.3.1}{\376\377\000M\000o\000t\000i\000v\000a\000t\000i\000o\000n}{section.3}% 4
 \BOOKMARK [2][-]{subsection.3.2}{\376\377\000V\000a\000r\000i\000a\000n\000c\000e\000\040\000M\000i\000n\000i\000m\000i\000z\000a\000t\000i\000o\000n\000\040\000T\000D\000\040\000L\000e\000a\000r\000n\000i\000n\000g\000:\000\040\000V\000M\000T\000D}{section.3}% 5
 \BOOKMARK [2][-]{subsection.3.3}{\376\377\000V\000a\000r\000i\000a\000n\000c\000e\000\040\000M\000i\000n\000i\000m\000i\000z\000a\000t\000i\000o\000n\000\040\000T\000D\000C\000\040\000L\000e\000a\000r\000n\000i\000n\000g\000:\000\040\000V\000M\000T\000D\000C}{section.3}% 6
-\BOOKMARK [1][-]{section.4}{\376\377\000T\000h\000e\000o\000r\000e\000t\000i\000c\000a\000l\000\040\000A\000n\000a\000l\000y\000s\000i\000s}{}% 7
-\BOOKMARK [1][-]{section.5}{\376\377\000E\000x\000p\000e\000r\000i\000m\000e\000n\000t\000a\000l\000\040\000S\000t\000u\000d\000i\000e\000s}{}% 8
-\BOOKMARK [2][-]{subsection.5.1}{\376\377\000T\000e\000s\000t\000i\000n\000g\000\040\000T\000a\000s\000k\000s}{section.5}% 9
-\BOOKMARK [2][-]{subsection.5.2}{\376\377\000E\000x\000p\000e\000r\000i\000m\000e\000n\000t\000a\000l\000\040\000R\000e\000s\000u\000l\000t\000s\000\040\000a\000n\000d\000\040\000A\000n\000a\000l\000y\000s\000i\000s}{section.5}% 10
-\BOOKMARK [1][-]{section.6}{\376\377\000R\000e\000l\000a\000t\000e\000d\000\040\000W\000o\000r\000k}{}% 11
-\BOOKMARK [2][-]{subsection.6.1}{\376\377\000D\000i\000f\000f\000e\000r\000e\000n\000c\000e\000\040\000b\000e\000t\000w\000e\000e\000n\000\040\000V\000M\000Q\000\040\000a\000n\000d\000\040\000R\000-\000l\000e\000a\000r\000n\000i\000n\000g}{section.6}% 12
-\BOOKMARK [2][-]{subsection.6.2}{\376\377\000V\000a\000r\000i\000a\000n\000c\000e\000\040\000R\000e\000d\000u\000c\000t\000i\000o\000n\000\040\000f\000o\000r\000\040\000T\000D\000\040\000L\000e\000a\000r\000n\000i\000n\000g}{section.6}% 13
-\BOOKMARK [2][-]{subsection.6.3}{\376\377\000V\000a\000r\000i\000a\000n\000c\000e\000\040\000R\000e\000d\000u\000c\000t\000i\000o\000n\000\040\000f\000o\000r\000\040\000P\000o\000l\000i\000c\000y\000\040\000G\000r\000a\000d\000i\000e\000n\000t\000\040\000A\000l\000g\000o\000r\000i\000t\000h\000m\000s}{section.6}% 14
-\BOOKMARK [1][-]{section.7}{\376\377\000C\000o\000n\000c\000l\000u\000s\000i\000o\000n\000\040\000a\000n\000d\000\040\000F\000u\000t\000u\000r\000e\000\040\000W\000o\000r\000k}{}% 15
-\BOOKMARK [1][-]{appendix.A}{\376\377\000R\000e\000l\000e\000v\000a\000n\000t\000\040\000p\000r\000o\000o\000f\000s}{}% 16
-\BOOKMARK [2][-]{subsection.A.1}{\376\377\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000T\000h\000e\000o\000r\000e\000m\000\040\0004\000.\0001}{appendix.A}% 17
-\BOOKMARK [2][-]{subsection.A.2}{\376\377\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000C\000o\000r\000o\000l\000l\000a\000r\000y\000\040\0004\000.\0002}{appendix.A}% 18
-\BOOKMARK [2][-]{subsection.A.3}{\376\377\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000T\000h\000e\000o\000r\000e\000m\000\040\0004\000.\0003}{appendix.A}% 19
-\BOOKMARK [1][-]{appendix.B}{\376\377\000E\000x\000p\000e\000r\000i\000m\000e\000n\000t\000a\000l\000\040\000d\000e\000t\000a\000i\000l\000s}{}% 20
+\BOOKMARK [2][-]{subsection.3.4}{\376\377\000V\000a\000r\000i\000a\000n\000c\000e\000\040\000M\000i\000n\000i\000m\000i\000z\000a\000t\000i\000o\000n\000\040\000E\000T\000D\000\040\000L\000e\000a\000r\000n\000i\000n\000g\000:\000\040\000V\000M\000E\000T\000D}{section.3}% 7
+\BOOKMARK [1][-]{section.4}{\376\377\000T\000h\000e\000o\000r\000e\000t\000i\000c\000a\000l\000\040\000A\000n\000a\000l\000y\000s\000i\000s}{}% 8
+\BOOKMARK [1][-]{section.5}{\376\377\000E\000x\000p\000e\000r\000i\000m\000e\000n\000t\000a\000l\000\040\000S\000t\000u\000d\000i\000e\000s}{}% 9
+\BOOKMARK [2][-]{subsection.5.1}{\376\377\000T\000e\000s\000t\000i\000n\000g\000\040\000T\000a\000s\000k\000s}{section.5}% 10
+\BOOKMARK [2][-]{subsection.5.2}{\376\377\000E\000x\000p\000e\000r\000i\000m\000e\000n\000t\000a\000l\000\040\000R\000e\000s\000u\000l\000t\000s\000\040\000a\000n\000d\000\040\000A\000n\000a\000l\000y\000s\000i\000s}{section.5}% 11
+\BOOKMARK [1][-]{section.6}{\376\377\000R\000e\000l\000a\000t\000e\000d\000\040\000W\000o\000r\000k}{}% 12
+\BOOKMARK [2][-]{subsection.6.1}{\376\377\000D\000i\000f\000f\000e\000r\000e\000n\000c\000e\000\040\000b\000e\000t\000w\000e\000e\000n\000\040\000V\000M\000Q\000\040\000a\000n\000d\000\040\000R\000-\000l\000e\000a\000r\000n\000i\000n\000g}{section.6}% 13
+\BOOKMARK [2][-]{subsection.6.2}{\376\377\000V\000a\000r\000i\000a\000n\000c\000e\000\040\000R\000e\000d\000u\000c\000t\000i\000o\000n\000\040\000f\000o\000r\000\040\000T\000D\000\040\000L\000e\000a\000r\000n\000i\000n\000g}{section.6}% 14
+\BOOKMARK [2][-]{subsection.6.3}{\376\377\000V\000a\000r\000i\000a\000n\000c\000e\000\040\000R\000e\000d\000u\000c\000t\000i\000o\000n\000\040\000f\000o\000r\000\040\000P\000o\000l\000i\000c\000y\000\040\000G\000r\000a\000d\000i\000e\000n\000t\000\040\000A\000l\000g\000o\000r\000i\000t\000h\000m\000s}{section.6}% 15
+\BOOKMARK [1][-]{section.7}{\376\377\000C\000o\000n\000c\000l\000u\000s\000i\000o\000n\000\040\000a\000n\000d\000\040\000F\000u\000t\000u\000r\000e\000\040\000W\000o\000r\000k}{}% 16
+\BOOKMARK [1][-]{appendix.A}{\376\377\000R\000e\000l\000e\000v\000a\000n\000t\000\040\000p\000r\000o\000o\000f\000s}{}% 17
+\BOOKMARK [2][-]{subsection.A.1}{\376\377\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000T\000h\000e\000o\000r\000e\000m\000\040\0004\000.\0001}{appendix.A}% 18
+\BOOKMARK [2][-]{subsection.A.2}{\376\377\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000C\000o\000r\000o\000l\000l\000a\000r\000y\000\040\0004\000.\0002}{appendix.A}% 19
+\BOOKMARK [2][-]{subsection.A.3}{\376\377\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000T\000h\000e\000o\000r\000e\000m\000\040\0004\000.\0003}{appendix.A}% 20
+\BOOKMARK [2][-]{subsection.A.4}{\376\377\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000V\000M\000E\000T\000D\000\040\000c\000o\000n\000v\000e\000r\000g\000e\000n\000c\000e}{appendix.A}% 21
+\BOOKMARK [1][-]{appendix.B}{\376\377\000E\000x\000p\000e\000r\000i\000m\000e\000n\000t\000a\000l\000\040\000d\000e\000t\000a\000i\000l\000s}{}% 22
--- a/neurips_2024.pdf
+++ b/neurips_2024.pdf
--- a/neurips_2024.synctex.gz
+++ b/neurips_2024.synctex.gz
--- a/neurips_2024.tex
+++ b/neurips_2024.tex
@@ -42,6 +42,9 @@
 \usepackage{mathtools}
 \usepackage{amsthm}
 \usepackage{tikz}
+\usepackage{bm}
+\usepackage{esvect}
+\usepackage{multirow}

 \theoremstyle{plain}
 \newtheorem{theorem}{Theorem}[section]