From 828184a414d6d42b577e6932f68ca2ae35ca3bd9 Mon Sep 17 00:00:00 2001 From: GongYu <1356681720@qq.com> Date: Thu, 20 Jun 2024 01:24:38 +0800 Subject: [PATCH] VMETD的更新公式和收敛性证明添加进去了 --- main/appendix.tex | 205 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------------------------- main/motivation.tex | 41 +++++++++++++++++++++++++++++++++++++++-- main/theory.tex | 29 ++++++++++++++++++++++++++++- neurips_2024.aux | 159 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------------------------------------------------------- neurips_2024.log | 252 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------------------------------------------------------------------------------------- neurips_2024.out | 30 ++++++++++++++++-------------- neurips_2024.pdf | Bin 2290177 -> 0 bytes neurips_2024.synctex.gz | Bin 227717 -> 0 bytes neurips_2024.tex | 3 +++ 9 files changed, 469 insertions(+), 250 deletions(-) diff --git a/main/appendix.tex b/main/appendix.tex index 62ebedd..5ad28a2 100644 --- a/main/appendix.tex +++ b/main/appendix.tex @@ -577,74 +577,179 @@ the parameter $\theta$ converges to $A^{-1}b$. \end{algorithmic} \end{algorithm} -\begin{algorithm}[t] - \caption{VMGTD algorithm with linear function approximation in the off-policy setting} - \label{alg:algorithm 3} -\begin{algorithmic} - \STATE {\bfseries Input:} $\theta_{0}$, $u_0$, $\omega_{0}$, $\gamma - $, learning rate $\alpha_t$, $\zeta_t$ and $\beta_t$, behavior policy $\mu$ and target policy $\pi$ - \REPEAT - \STATE For any episode, initialize $\theta_{0}$ arbitrarily, $u_t$ and $\omega_{0}$ to $0$, $\gamma \in (0,1]$, and $\alpha_t$, $\zeta_t$ and $\beta_t$ are constant.\\ - \textbf{Output}: $\theta^*$.\\ - \FOR{$t=0$ {\bfseries to} $T-1$} - \STATE Take $A_t$ from $S_t$ according to $\mu$, and arrive at $S_{t+1}$\\ - \STATE Observe sample ($S_t$,$R_{t+1}$,$S_{t+1}$) at time step $t$ (with their corresponding state feature vectors)\\ - \STATE $\delta_t = R_{t+1}+\gamma\theta_t^{\top}\phi_{t+1}-\theta_t^{\top}\phi_t$ - \STATE $\rho_{t} \leftarrow \frac{\pi(A_t | S_t)}{\mu(A_t | S_t)}$ - \STATE $\theta_{t+1}\leftarrow \theta_{t}+\alpha_t \rho_t[\phi_t - \gamma \phi_{t+1}]\phi^{\top}_{t} u_t$ - \STATE $u_{t+1}\leftarrow u_{t}+\zeta_t[\rho_t(\delta_t-\omega_t) \phi_t - u_t]$ - \STATE $\omega_{t+1}\leftarrow \omega_{t}+\beta_t \rho_t(\delta_t-\omega_t)$ - \STATE $S_t=S_{t+1}$ - \ENDFOR - \UNTIL{terminal episode} -\end{algorithmic} -\end{algorithm} \begin{algorithm}[t] - \caption{VMGTD2 algorithm with linear function approximation in the off-policy setting} - \label{alg:algorithm 4} + \caption{VMETD algorithm with linear function approximation in the off-policy setting} + \label{alg:algorithm 5} \begin{algorithmic} \STATE {\bfseries Input:} $\theta_{0}$, $u_0$, $\omega_{0}$, $\gamma $, learning rate $\alpha_t$, $\zeta_t$ and $\beta_t$, behavior policy $\mu$ and target policy $\pi$ \REPEAT - \STATE For any episode, initialize $\theta_{0}$ arbitrarily, $u_t$ and $\omega_{0}$ to $0$, $\gamma \in (0,1]$, and $\alpha_t$, $\zeta_t$ and $\beta_t$ are constant.\\ + \STATE For any episode, initialize $\theta_{0}$ arbitrarily, $u_t$ to $1$ and $\omega_{0}$ to $0$, $\gamma \in (0,1]$, and $\alpha_t$, $\zeta_t$ and $\beta_t$ are constant.\\ \textbf{Output}: $\theta^*$.\\ \FOR{$t=0$ {\bfseries to} $T-1$} \STATE Take $A_t$ from $S_t$ according to $\mu$, and arrive at $S_{t+1}$\\ \STATE Observe sample ($S_t$,$R_{t+1}$,$S_{t+1}$) at time step $t$ (with their corresponding state feature vectors)\\ \STATE $\delta_t = R_{t+1}+\gamma\theta_t^{\top}\phi_{t+1}-\theta_t^{\top}\phi_t$ \STATE $\rho_{t} \leftarrow \frac{\pi(A_t | S_t)}{\mu(A_t | S_t)}$ - \STATE $\theta_{t+1}\leftarrow \theta_{t}+\alpha_t \rho_t[\phi_t - \gamma \phi_{t+1}]\phi^{\top}_{t} u_t$ - \STATE $u_{t+1}\leftarrow u_{t}+\zeta_t[\rho_t(\delta_t-\omega_t) - \phi^{\top}_{t} u_t] \phi_t$ - \STATE $\omega_{t+1}\leftarrow \omega_{t}+\beta_t \rho_t(\delta_t-\omega_t)$ + \STATE $F_{t}\leftarrow \gamma \rho_t F_{t-1} +1$ + \STATE $\theta_{t+1}\leftarrow \theta_{t}+\alpha_t (F_t \rho_t\delta_t-\omega_t)\phi_t$ + \STATE $\omega_{t+1}\leftarrow \omega_{t}+\beta_t (F_t \rho_t\delta_t-\omega_t)$ \STATE $S_t=S_{t+1}$ \ENDFOR \UNTIL{terminal episode} \end{algorithmic} \end{algorithm} -% \begin{algorithm}[t] -% \caption{VMETD algorithm with linear function approximation in the off-policy setting} -% \label{alg:algorithm 5} -% \begin{algorithmic} -% \STATE {\bfseries Input:} $\theta_{0}$, $u_0$, $\omega_{0}$, $\gamma -% $, learning rate $\alpha_t$, $\zeta_t$ and $\beta_t$, behavior policy $\mu$ and target policy $\pi$ -% \REPEAT -% \STATE For any episode, initialize $\theta_{0}$ arbitrarily, $u_t$ to $1$ and $\omega_{0}$ to $0$, $\gamma \in (0,1]$, and $\alpha_t$, $\zeta_t$ and $\beta_t$ are constant.\\ -% \textbf{Output}: $\theta^*$.\\ -% \FOR{$t=0$ {\bfseries to} $T-1$} -% \STATE Take $A_t$ from $S_t$ according to $\mu$, and arrive at $S_{t+1}$\\ -% \STATE Observe sample ($S_t$,$R_{t+1}$,$S_{t+1}$) at time step $t$ (with their corresponding state feature vectors)\\ -% \STATE $\delta_t = R_{t+1}+\gamma\theta_t^{\top}\phi_{t+1}-\theta_t^{\top}\phi_t$ -% \STATE $\rho_{t} \leftarrow \frac{\pi(A_t | S_t)}{\mu(A_t | S_t)}$ -% \STATE $\theta_{t+1}\leftarrow \theta_{t}+\alpha_t u_t \rho_t(\delta_t-\omega_t)\phi_t$ -% \STATE $u_{t+1}\leftarrow \gamma \rho_t u_t +1$ -% \STATE $\omega_{t+1}\leftarrow \omega_{t}+\beta_t \rho_t(\delta_t-\omega_t)$ -% \STATE $S_t=S_{t+1}$ -% \ENDFOR -% \UNTIL{terminal episode} -% \end{algorithmic} -% \end{algorithm} +\subsection{Proof of VMETD convergence} +\label{proofVMETD} +VMETD's $\theta$ by the following update: +\begin{equation} + \begin{split} + \theta_{k+1}&\leftarrow \theta_k+\alpha_k F_k \rho_k (R_{k+1}+\gamma \theta_k^{\top}\phi_{k+1}-\theta_k^{\top}\phi_k)\phi_k -\alpha_k \omega_{k+1}\phi_k\\ + &=\theta_k+\alpha_k F_k \rho_k (R_{k+1}+\gamma \theta_k^{\top}\phi_{k+1}-\theta_k^{\top}\phi_k)\phi_k -\alpha_k \mathbb{E}_{\mu}[F_k \rho_k \delta_k]\phi_k\\ + &= \theta_k+\alpha_k \{\underbrace{(F_k\rho_kR_{k+1}-\mathbb{E}_{\mu}[F_k\rho_k R_{k+1}])\phi_k}_{\textbf{b}_{\text{VMETD},k}} + -\underbrace{(F_k\rho_k\phi_k(\phi_k-\gamma\phi_{k+1})^{\top}-\phi_k\mathbb{E}_{\mu}[F_k\rho_k (\phi_k-\gamma\phi_{k+1})]^{\top})}_{\textbf{A}_{\text{VMETD},k}}\theta_k\} +\end{split} +\end{equation} +\begin{equation} + \begin{split} + \textbf{A}_{\text{VMETD}}&=\lim_{k \rightarrow \infty} \mathbb{E}[\textbf{A}_{\text{VMETD},k}]\\ + &= \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[F_k \rho_k \phi_k (\phi_k - \gamma \phi_{k+1})^{\top}]- \lim_{k\rightarrow \infty} \mathbb{E}_{\mu}[ \phi_k]\mathbb{E}_{\mu}[F_k \rho_k (\phi_k - \gamma \phi_{k+1})]^{\top}\\ + &= \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[\underbrace{\phi_k}_{X}\underbrace{F_k \rho_k (\phi_k - \gamma \phi_{k+1})^{\top}}_{Y}]- \lim_{k\rightarrow \infty} \mathbb{E}_{\mu}[ \phi_k]\mathbb{E}_{\mu}[F_k \rho_k (\phi_k - \gamma \phi_{k+1})]^{\top}\\ + &= \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[F_k \rho_k \phi_k (\phi_k - \gamma \phi_{k+1})^{\top}]- \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[ \phi_k]\lim_{k \rightarrow \infty}\mathbb{E}_{\mu}[F_k \rho_k (\phi_k - \gamma \phi_{k+1})]^{\top}\\ + &=\sum_{s} f(s) \phi(s)(\phi(s) - \gamma \sum_{s'}[\textbf{P}_{\pi}]_{ss'}\phi(s'))^{\top} - \sum_{s} d_{\mu}(s) \phi(s) * \sum_{s} f(s)(\phi(s) - \gamma \sum_{s'}[\textbf{P}_{\pi}]_{ss'}\phi(s'))^{\top} \\ + &={\bm{\Phi}}^{\top} \textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi}) \bm{\Phi} - {\bm{\Phi}}^{\top} \textbf{d}_{\mu} \textbf{f}^{\top} (\textbf{I} - \gamma \textbf{P}_{\mu}) \bm{\Phi} \\ + &={\bm{\Phi}}^{\top} (\textbf{F} - \textbf{d}_{\mu} \textbf{f}^{\top}) (\textbf{I} - \gamma \textbf{P}_{\pi}){\bm{\Phi}} \\ + &={\bm{\Phi}}^{\top} (\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi})-\textbf{d}_{\mu} \textbf{f}^{\top} (\textbf{I} - \gamma \textbf{P}_{\pi})){\bm{\Phi}} \\ + &={\bm{\Phi}}^{\top} (\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi})-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} ){\bm{\Phi}} \\ + \end{split} +\end{equation} +\begin{proof} + Any matrix $\bm{\text{M}}$ is positive definite if and only if +the symmetric matrix $\bm{\text{S}}=\bm{\text{M}}+\bm{\text{M}}^{\top}$ is positive definite. +Any symmetric real matrix $\bm{\text{S}}$ is positive definite if the absolute values of +its diagonal entries are greater than the sum of the absolute values of the corresponding +off-diagonal entries. + +\begin{equation} + \label{rowsum} + \begin{split} + (\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi})-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} )\textbf{1} + &=\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi})\textbf{1}-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} \textbf{1}\\ + &=\textbf{F}(\textbf{1}-\gamma \textbf{P}_{\pi} \textbf{1})-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} \textbf{1}\\ + &=(1-\gamma)\textbf{F}\textbf{1}-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} \textbf{1}\\ + &=(1-\gamma)\textbf{f}-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} \textbf{1}\\ + &=(1-\gamma)\textbf{f}-\textbf{d}_{\mu} \\ + &=(1-\gamma)(\textbf{I}-\gamma\textbf{P}_{\pi}^{\top})^{-1}\textbf{d}_{\mu}-\textbf{d}_{\mu} \\ + &=(1-\gamma)[(\textbf{I}-\gamma\textbf{P}_{\pi}^{\top})^{-1}-\textbf{I}]\textbf{d}_{\mu} \\ + &=(1-\gamma)[\sum_{t=0}^{\infty}(\gamma\textbf{P}_{\pi}^{\top})^{t}-\textbf{I}]\textbf{d}_{\mu} \\ + &=(1-\gamma)[\sum_{t=1}^{\infty}(\gamma\textbf{P}_{\pi}^{\top})^{t}]\textbf{d}_{\mu} > 0 \\ + \end{split} + \end{equation} +\begin{equation} + \label{columnsum} + \begin{split} + \textbf{1}^{\top}(\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi})-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} ) + &=\textbf{1}^{\top}\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi})-\textbf{1}^{\top}\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} \\ + &=\textbf{d}_{\mu}^{\top}-\textbf{1}^{\top}\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} \\ + &=\textbf{d}_{\mu}^{\top}- \textbf{d}_{\mu}^{\top} \\ + &=0 + \end{split} +\end{equation} +(\ref{rowsum}) and (\ref{columnsum}) show that the matrix $\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi})-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top}$ of + diagonal entries are positive and its off-diagonal entries are negative. So its each row sum plus the corresponding column sum is positive. + +The proof is given above +\end{proof} + + + + + + + + + + +% \begin{equation} +% F_k = \gamma \rho_{k-1} F_{k-1} + 1, +% \end{equation} +% \begin{equation} +% \rho_{k} \leftarrow \frac{\pi(A_k | S_k)}{\mu(A_k | S_k)}, +% \end{equation} +% \begin{equation} +% \theta_{k+1}= \alpha_k F_k \rho_k (r_{k+1}+\gamma \theta_k^{\top}\phi_{k}'-\theta_k^{\top}\phi_k)\phi_k. +% \end{equation} + +% ETD(0)' \textbf{A} matrix is: +% \begin{equation} +% \textbf{A}_{\text{ETD}} = {\bm{\Phi}}^{\top} \textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi}) \bm{\Phi}, +% \end{equation} +% where \textbf{F} is a diagonal matrix with diagonal elements $f(s) = d_{\mu}(s) \lim_{k\rightarrow \infty }\mathbb{E}_{\mu}[F_k|S_k=s]$, +% which we assume exists. As we show later, the vector $\textbf{f} \in \mathbb{R}^N$ with components +% $[\textbf{f}]_s = f(s)$ can be written as +% \begin{equation} +% \begin{array}{ccl} +% \textbf{f}&=& \textbf{d}_\mu + \gamma \textbf{P}^{\top}_{\pi} \textbf{d}_\mu + (\gamma \textbf{P}^{\top}_{\pi} \textbf{d}_\mu)^{2} + \cdots \\ +% &=&(\textbf{I} - \gamma \textbf{P}^{\top}_{\pi})^{-1} \textbf{d}_\mu. +% \end{array} +% \end{equation} + +% The key matrix is $\textbf{F}(\textbf{I} - \gamma \textbf{P}_{\pi})$, and the vector of its column sums is +% \begin{equation} +% \begin{array}{ccl} +% \textbf{1}^{\top} \textbf{F}(\textbf{I} - \gamma \textbf{P}_{\pi})&=& \textbf{f}^{\top}(\textbf{I} - \gamma \textbf{P}_{\pi}) \\ +% &=&\textbf{d}^{\top}_{\mu}(\textbf{I} - \gamma \textbf{P}_{\pi})^{-1} (\textbf{I} - \gamma \textbf{P}_{\pi}) \\ +% &=&\textbf{d}^{\top}_{\mu}, +% \end{array} +% \end{equation} +% all components of which are positive. Thus, the key matrix and the $\textbf{A}_{\text{ETD}}$ matrix are positive +% definite and the algorithm is stable. + +% VMETD by the following update: +% \begin{equation} +% \theta_{k+1}= \alpha_k F_k \rho_k (r_{k+1}+\gamma \theta_k^{\top}\phi_{k}'-\theta_k^{\top}\phi_k - \mathbb{E}_{\mu}[F_k \rho_k \delta_k])\phi_k. +% \end{equation} + +% % VMETD' \textbf{A} matrix is: +% % \begin{equation} +% % \begin{array}{ccl} +% % \textbf{A}_{\text{VMETD}}&=&\lim_{k \rightarrow \infty} \mathbb{E}[\textbf{A}_{\text{VMETD},k}]\\ +% % &=& \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[F_k \rho_k \phi_k (\phi_k - \gamma \phi'_{k} - \mathbb{E}_{\mu}[\phi_k - \gamma \phi'_{k}])^{\top}]\\ +% % &=&\sum_{s} d_{\mu}(s)\lim_{k \rightarrow \infty}\mathbb{E}_{\mu}[F_k \rho_k \phi_k (\phi_k - \gamma \phi'_{k} - \mathbb{E}_{\mu}[\phi_k - \gamma \phi'_{k}])^{\top}|S_k = s] \\ +% % &=&\sum_{s} d_{\mu}(s)\lim_{k \rightarrow \infty}\mathbb{E}_{\mu}[F_k|S_k = s]\mathbb{E}_{\mu}[\rho_k \phi_k (\phi_k - \gamma \phi'_{k} - \mathbb{E}_{\mu}[\phi_k - \gamma \phi'_{k}])^{\top}|S_k = s] \\ +% % &=&\sum_{s} f(s)\mathbb{E}_{\mu}[\rho_t \phi_t (\phi_t - \gamma \phi'_{t} - \mathbb{E}_{\mu}[\phi_t - \gamma \phi'_{t}])^{\top}|S_t = s] \\ +% % &=&\sum_{s} f(s)\mathbb{E}_{\mu}[\rho_t \phi_t (\phi_t - \gamma \phi'_{t})^{\top}|S_t = s] - \sum_{s} f(s)\mathbb{E}_{\mu}[\rho_t \phi_t \mathbb{E}_{\mu}[\phi_t - \gamma \phi'_{t}]^{\top}|S_t = s] \\ +% % &=&\sum_{s} f(s)\mathbb{E}_{\pi}[\phi_t (\phi_t - \gamma \phi'_{t})^{\top}|S_t = s] - \sum_{s} f(s)\mathbb{E}_{\pi}[\phi_t |S_t = s]\mathbb{E}_{\mu}[\phi_t - \gamma \phi'_{t}]^{\top} \\ +% % &=&\sum_{s} f(s)(\mathbb{E}_{\pi}[\phi_t (\phi_t - \gamma \phi'_{t})^{\top}|S_t = s] - \mathbb{E}_{\pi}[\phi_t |S_t = s]\mathbb{E}_{\mu}[\phi_t - \gamma \phi'_{t}]^{\top}) \\ +% % &=&\sum_{s} f(s) \phi(s) (\phi(s) - \gamma \sum_{s'}[\textbf{P}_{\pi}]_{ss'}\phi(s') - \sum_{s} d_{\mu}(s)(\phi(s) - \gamma \sum_{s'}[\textbf{P}_{\mu}]_{ss'}\phi(s')))^{\top}\\ +% % &=&{\bm{\Phi}}^{\top} \textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi}) \bm{\Phi} - {\bm{\Phi}}^{\top} \textbf{F}\textbf{E}_\mu +% % \end{array} +% % \end{equation} +% % where $\textbf{E}_\mu \in \mathbb{R}^{N \times d }$ and $\textbf{E}_\mu$'s every row has elements equal to $\mathbb{E}_{\mu}[\phi_t - \gamma \phi'_{t}]^{\top}$. + +% \begin{equation} +% \begin{array}{ccl} +% \textbf{A}_{\text{VMETD}}&=&\lim_{k \rightarrow \infty} \mathbb{E}[\textbf{A}_{\text{VMETD},k}]\\ +% &=& \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[F_k \rho_k \phi_k (\phi_k - \gamma \phi'_{k})^{\top}]- \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[F_k \rho_k \phi_k]\mathbb{E}_{\mu}[F_k \rho_k \phi_k - \gamma \phi'_{k}]^{\top}\\ +% &=& \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[F_k \rho_k \phi_k (\phi_k - \gamma \phi'_{k})^{\top}]- \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[F_k \rho_k \phi_k]\lim_{k \rightarrow \infty}\mathbb{E}_{\mu}[F_k \rho_k \phi_k - \gamma \phi'_{k}]^{\top}\\ +% &=&\sum_{s} f(s) \phi(s)(\phi(s) - \gamma \sum_{s'}[\textbf{P}_{\pi}]_{ss'}\phi(s'))^{\top} - \sum_{s} f(s) \phi(s) * \sum_{s} f(s)(\phi(s) - \gamma \sum_{s'}[\textbf{P}_{\pi}]_{ss'}\phi(s'))^{\top} \\ +% &=&{\bm{\Phi}}^{\top} \textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi}) \bm{\Phi} - {\bm{\Phi}}^{\top} \textbf{f} \textbf{f}^{\top} (\textbf{I} - \gamma \textbf{P}_{\mu}) \bm{\Phi} \\ +% &=&{\bm{\Phi}}^{\top} (\textbf{F} - \textbf{f} \textbf{f}^{\top}) (\textbf{I} - \gamma \textbf{P}_{\pi}){\bm{\Phi}} \\ +% \end{array} +% \end{equation} + + +% The key matrix is $\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi}) - \textbf{f} \textbf{d}_{\mu}^{\top} (\textbf{I} - \gamma \textbf{P}_{\mu})$, and the vector of its column sums is +% \begin{equation} +% \begin{array}{ccl} +% \textbf{1}^{\top}(\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi}) - \textbf{f} \textbf{d}_{\mu}^{\top} (\textbf{I} - \gamma \textbf{P}_{\mu}))&=& \textbf{d}^{\top}_{\mu} - \textbf{d}^{\top}_{\mu}(\textbf{I} - \gamma \textbf{P}_{\mu}) \textbf{1}^{\top}\textbf{f}\\ +% &=&\textbf{d}^{\top}_{\mu} - \textbf{d}^{\top}_{\mu}(1 - \gamma)\textbf{1}^{\top}\textbf{f} \\ +% &=&\textbf{d}^{\top}_{\mu} - \textbf{d}^{\top}_{\mu} \textbf{1}^{\top} (\textbf{f} - \gamma \textbf{f}), +% \end{array} +% \end{equation} + \section{Experimental details} \label{experimentaldetails} diff --git a/main/motivation.tex b/main/motivation.tex index 8dbe330..b2c304e 100644 --- a/main/motivation.tex +++ b/main/motivation.tex @@ -97,7 +97,7 @@ stochastic gradient descent: \end{equation} where $\delta_k$ is the TD error as follows: \begin{equation} -\delta_k = r+\gamma +\delta_k = r_{k+1}+\gamma \theta_k^{\top}\phi_{k}'-\theta_k^{\top}\phi_k. \label{delta} \end{equation} @@ -204,4 +204,41 @@ and \end{equation} where $\delta_{k}$ is (\ref{deltaQ}) and $A^{*}_{k+1}={\arg \max}_{a}(\theta_{k}^{\top}\phi(s_{k+1},a))$. -This paper also introduces an additional parameter $\omega$ into the GTD and GTD2 algorithms. For details, please refer to the appendix. \ No newline at end of file +\subsection{Variance Minimization ETD Learning: VMETD} +VMETD by the following update: +% \begin{equation} +% \delta_{t}= R_{t+1}+\gamma \theta_t^{\top}\phi_{t+1}-\theta_t^{\top}\phi_t. +% \end{equation} +\begin{equation} +\rho_{k} \leftarrow \frac{\pi(A_k | S_k)}{\mu(A_k | S_k)} +\end{equation} +\begin{equation} + \label{fvmetd} + F_k \leftarrow \gamma \rho_{k-1}F_{k-1}+1, +\end{equation} +\begin{equation} + \label{omegavmetd} + \omega_{k+1} \leftarrow \omega_k+\beta_k(F_k \rho_k \delta_k - \omega_k), +\end{equation} +\begin{equation} + \label{thetavmetd} + \theta_{k+1}\leftarrow \theta_k+\alpha_k F_k \rho_k (R_{k+1}+\gamma \theta_k^{\top}\phi_{k+1}-\theta_k^{\top}\phi_k)\phi_k -\alpha_k \omega_{k+1}\phi_k, +\end{equation} + + +where $\mu$ is behavior policy and $\pi$ is target policy, +$F_t$ is a scalar variable, +$F_0=1$, $\omega$ is used to estimate $\mathbb{E}[\delta]$, i.e., $\omega \doteq \mathbb{E}[\delta]$, and +$\textbf{F}$ is a diagonal matrix with diagonal elements +$f(s)\dot{=}d_{\mu}(s)\lim_{t\rightarrow \infty}\mathbb{E}_{\mu}[F_k|S_k=s]$, +which we assume exists. +The vector $\textbf{f}\in \mathbb{R}^N$ with components +$[\textbf{f}]_s\dot{=}f(s)$ can be written as +\begin{equation} +\begin{split} +\textbf{f}&=\textbf{d}_{\mu}+\gamma \textbf{P}_{\pi}^{\top}\textbf{d}_{\mu}+(\gamma \textbf{P}_{\pi}^{\top})^2\textbf{d}_{\mu}+\ldots\\ +&=(\textbf{I}-\gamma\textbf{P}_{\pi}^{\top})^{-1}\textbf{d}_{\mu}. +\end{split} +\end{equation} + + diff --git a/main/theory.tex b/main/theory.tex index 0454189..ec1b933 100644 --- a/main/theory.tex +++ b/main/theory.tex @@ -82,4 +82,31 @@ Please refer to the appendix \ref{proofcorollary4_2} for detailed proof process. Then the parameter vector $\theta_k$ converges with probability one to $A^{-1}b$. \end{theorem} -Please refer to the appendix \ref{proofth2} for detailed proof process. \ No newline at end of file +Please refer to the appendix \ref{proofth2} for detailed proof process. + +\begin{theorem} + \label{theorem3}(Convergence of VMETD). + In the case of off-policy learning, consider the iterations (\ref{omegavmetd}) and (\ref{thetavmetd}) with (\ref{delta}) of VMETD. + Let the step-size sequences $\alpha_k$ and $\beta_k$, $k\geq 0$ satisfy in this case $\alpha_k,\beta_k>0$, for all $k$, + $ + \sum_{k=0}^{\infty}\alpha_k=\sum_{k=0}^{\infty}\beta_k=\infty, + $ + $ + \sum_{k=0}^{\infty}\alpha_k^2<\infty, + $ + $ + \sum_{k=0}^{\infty}\beta_k^2<\infty, + $ + and + $ + \alpha_k = o(\beta_k). + $ + Assume that $(\phi_k,r_k,\phi_k')$ is an i.i.d. sequence with + uniformly bounded second moments, where $\phi_k$ and $\phi'_{k}$ are sampled from the same Markov chain. + Let $A = \mathrm{Cov}(\phi,\phi-\gamma\phi')$, + $b=\mathrm{Cov}(r,\phi)$. + Assume that matrix $A$ is non-singular. + Then the parameter vector $\theta_k$ converges with probability one + to $A^{-1}b$. +\end{theorem} +Please refer to the appendix \ref{proofVMETD} for detailed proof process. \ No newline at end of file diff --git a/neurips_2024.aux b/neurips_2024.aux index 14357af..f8a38be 100644 --- a/neurips_2024.aux +++ b/neurips_2024.aux @@ -61,55 +61,46 @@ \newlabel{deltaSarsa}{{8}{4}{Variance Minimization TD Learning: VMTD}{equation.3.8}{}} \newlabel{deltaQ}{{9}{4}{Variance Minimization TD Learning: VMTD}{equation.3.9}{}} \citation{dalal2020tale} -\citation{dalal2020tale} \@writefile{toc}{\contentsline {subsection}{\numberline {3.3}Variance Minimization TDC Learning: VMTDC}{5}{subsection.3.3}\protected@file@percent } \newlabel{thetavmtdc}{{11}{5}{Variance Minimization TDC Learning: VMTDC}{equation.3.11}{}} \newlabel{uvmtdc}{{12}{5}{Variance Minimization TDC Learning: VMTDC}{equation.3.12}{}} \newlabel{omegavmtdc}{{13}{5}{Variance Minimization TDC Learning: VMTDC}{equation.3.13}{}} +\@writefile{toc}{\contentsline {subsection}{\numberline {3.4}Variance Minimization ETD Learning: VMETD}{5}{subsection.3.4}\protected@file@percent } +\newlabel{fvmetd}{{18}{5}{Variance Minimization ETD Learning: VMETD}{equation.3.18}{}} +\newlabel{omegavmetd}{{19}{5}{Variance Minimization ETD Learning: VMETD}{equation.3.19}{}} +\newlabel{thetavmetd}{{20}{5}{Variance Minimization ETD Learning: VMETD}{equation.3.20}{}} \@writefile{toc}{\contentsline {section}{\numberline {4}Theoretical Analysis}{5}{section.4}\protected@file@percent } \newlabel{theorem1}{{4.1}{5}{}{theorem.4.1}{}} -\newlabel{corollary4_2}{{4.2}{5}{}{theorem.4.2}{}} +\citation{dalal2020tale} \citation{Sutton2018book} \citation{sutton2009fast} \citation{baird1995residual,sutton2009fast} \citation{baird1995residual,sutton2009fast,maei2011gradient} -\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Random walk.}}{6}{figure.1}\protected@file@percent } -\newlabel{randomwalk}{{1}{6}{Random walk}{figure.1}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces 7-state version of Baird's off-policy counterexample.}}{6}{figure.2}\protected@file@percent } -\newlabel{bairdexample}{{2}{6}{7-state version of Baird's off-policy counterexample}{figure.2}{}} +\newlabel{corollary4_2}{{4.2}{6}{}{theorem.4.2}{}} \newlabel{theorem2}{{4.3}{6}{}{theorem.4.3}{}} +\newlabel{theorem3}{{4.4}{6}{}{theorem.4.4}{}} \@writefile{toc}{\contentsline {section}{\numberline {5}Experimental Studies}{6}{section.5}\protected@file@percent } \@writefile{toc}{\contentsline {subsection}{\numberline {5.1}Testing Tasks}{6}{subsection.5.1}\protected@file@percent } +\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Random walk.}}{7}{figure.1}\protected@file@percent } +\newlabel{randomwalk}{{1}{7}{Random walk}{figure.1}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces 7-state version of Baird's off-policy counterexample.}}{7}{figure.2}\protected@file@percent } +\newlabel{bairdexample}{{2}{7}{7-state version of Baird's off-policy counterexample}{figure.2}{}} \@writefile{toc}{\contentsline {subsection}{\numberline {5.2}Experimental Results and Analysis}{7}{subsection.5.2}\protected@file@percent } -\newlabel{DependentFull}{{3(a)}{7}{Subfigure 3(a)}{subfigure.3.1}{}} -\newlabel{sub@DependentFull}{{(a)}{7}{Subfigure 3(a)\relax }{subfigure.3.1}{}} -\newlabel{TabularFull}{{3(b)}{7}{Subfigure 3(b)}{subfigure.3.2}{}} -\newlabel{sub@TabularFull}{{(b)}{7}{Subfigure 3(b)\relax }{subfigure.3.2}{}} -\newlabel{InvertedFull}{{3(c)}{7}{Subfigure 3(c)}{subfigure.3.3}{}} -\newlabel{sub@InvertedFull}{{(c)}{7}{Subfigure 3(c)\relax }{subfigure.3.3}{}} -\newlabel{CounterExampleFull}{{3(d)}{7}{Subfigure 3(d)}{subfigure.3.4}{}} -\newlabel{sub@CounterExampleFull}{{(d)}{7}{Subfigure 3(d)\relax }{subfigure.3.4}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Learning curses of four evaluation environments.}}{7}{figure.3}\protected@file@percent } -\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Dependent}}}{7}{figure.3}\protected@file@percent } -\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Tabular}}}{7}{figure.3}\protected@file@percent } -\@writefile{lof}{\contentsline {subfigure}{\numberline{(c)}{\ignorespaces {Inverted}}}{7}{figure.3}\protected@file@percent } -\@writefile{lof}{\contentsline {subfigure}{\numberline{(d)}{\ignorespaces {counterexample}}}{7}{figure.3}\protected@file@percent } -\newlabel{Evaluation_full}{{3}{7}{Learning curses of four evaluation environments}{figure.3}{}} \citation{schwartz1993reinforcement} -\newlabel{MazeFull}{{4(a)}{8}{Subfigure 4(a)}{subfigure.4.1}{}} -\newlabel{sub@MazeFull}{{(a)}{8}{Subfigure 4(a)\relax }{subfigure.4.1}{}} -\newlabel{CliffWalkingFull}{{4(b)}{8}{Subfigure 4(b)}{subfigure.4.2}{}} -\newlabel{sub@CliffWalkingFull}{{(b)}{8}{Subfigure 4(b)\relax }{subfigure.4.2}{}} -\newlabel{MountainCarFull}{{4(c)}{8}{Subfigure 4(c)}{subfigure.4.3}{}} -\newlabel{sub@MountainCarFull}{{(c)}{8}{Subfigure 4(c)\relax }{subfigure.4.3}{}} -\newlabel{AcrobotFull}{{4(d)}{8}{Subfigure 4(d)}{subfigure.4.4}{}} -\newlabel{sub@AcrobotFull}{{(d)}{8}{Subfigure 4(d)\relax }{subfigure.4.4}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Learning curses of four contral environments.}}{8}{figure.4}\protected@file@percent } -\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Maze}}}{8}{figure.4}\protected@file@percent } -\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Cliff Walking}}}{8}{figure.4}\protected@file@percent } -\@writefile{lof}{\contentsline {subfigure}{\numberline{(c)}{\ignorespaces {Mountain Car}}}{8}{figure.4}\protected@file@percent } -\@writefile{lof}{\contentsline {subfigure}{\numberline{(d)}{\ignorespaces {Acrobot}}}{8}{figure.4}\protected@file@percent } -\newlabel{Complete_full}{{4}{8}{Learning curses of four contral environments}{figure.4}{}} +\newlabel{DependentFull}{{3(a)}{8}{Subfigure 3(a)}{subfigure.3.1}{}} +\newlabel{sub@DependentFull}{{(a)}{8}{Subfigure 3(a)\relax }{subfigure.3.1}{}} +\newlabel{TabularFull}{{3(b)}{8}{Subfigure 3(b)}{subfigure.3.2}{}} +\newlabel{sub@TabularFull}{{(b)}{8}{Subfigure 3(b)\relax }{subfigure.3.2}{}} +\newlabel{InvertedFull}{{3(c)}{8}{Subfigure 3(c)}{subfigure.3.3}{}} +\newlabel{sub@InvertedFull}{{(c)}{8}{Subfigure 3(c)\relax }{subfigure.3.3}{}} +\newlabel{CounterExampleFull}{{3(d)}{8}{Subfigure 3(d)}{subfigure.3.4}{}} +\newlabel{sub@CounterExampleFull}{{(d)}{8}{Subfigure 3(d)\relax }{subfigure.3.4}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Learning curses of four evaluation environments.}}{8}{figure.3}\protected@file@percent } +\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Dependent}}}{8}{figure.3}\protected@file@percent } +\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Tabular}}}{8}{figure.3}\protected@file@percent } +\@writefile{lof}{\contentsline {subfigure}{\numberline{(c)}{\ignorespaces {Inverted}}}{8}{figure.3}\protected@file@percent } +\@writefile{lof}{\contentsline {subfigure}{\numberline{(d)}{\ignorespaces {counterexample}}}{8}{figure.3}\protected@file@percent } +\newlabel{Evaluation_full}{{3}{8}{Learning curses of four evaluation environments}{figure.3}{}} \@writefile{toc}{\contentsline {section}{\numberline {6}Related Work}{8}{section.6}\protected@file@percent } \@writefile{toc}{\contentsline {subsection}{\numberline {6.1}Difference between VMQ and R-learning}{8}{subsection.6.1}\protected@file@percent } \@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Difference between R-learning and tabular VMQ.}}{8}{table.2}\protected@file@percent } @@ -120,72 +111,86 @@ \citation{Sutton2018book} \citation{schulman2015trust} \citation{schulman2017proximal} -\citation{borkar1997stochastic} +\newlabel{MazeFull}{{4(a)}{9}{Subfigure 4(a)}{subfigure.4.1}{}} +\newlabel{sub@MazeFull}{{(a)}{9}{Subfigure 4(a)\relax }{subfigure.4.1}{}} +\newlabel{CliffWalkingFull}{{4(b)}{9}{Subfigure 4(b)}{subfigure.4.2}{}} +\newlabel{sub@CliffWalkingFull}{{(b)}{9}{Subfigure 4(b)\relax }{subfigure.4.2}{}} +\newlabel{MountainCarFull}{{4(c)}{9}{Subfigure 4(c)}{subfigure.4.3}{}} +\newlabel{sub@MountainCarFull}{{(c)}{9}{Subfigure 4(c)\relax }{subfigure.4.3}{}} +\newlabel{AcrobotFull}{{4(d)}{9}{Subfigure 4(d)}{subfigure.4.4}{}} +\newlabel{sub@AcrobotFull}{{(d)}{9}{Subfigure 4(d)\relax }{subfigure.4.4}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Learning curses of four contral environments.}}{9}{figure.4}\protected@file@percent } +\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Maze}}}{9}{figure.4}\protected@file@percent } +\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Cliff Walking}}}{9}{figure.4}\protected@file@percent } +\@writefile{lof}{\contentsline {subfigure}{\numberline{(c)}{\ignorespaces {Mountain Car}}}{9}{figure.4}\protected@file@percent } +\@writefile{lof}{\contentsline {subfigure}{\numberline{(d)}{\ignorespaces {Acrobot}}}{9}{figure.4}\protected@file@percent } +\newlabel{Complete_full}{{4}{9}{Learning curses of four contral environments}{figure.4}{}} \@writefile{toc}{\contentsline {subsection}{\numberline {6.2}Variance Reduction for TD Learning}{9}{subsection.6.2}\protected@file@percent } \@writefile{toc}{\contentsline {subsection}{\numberline {6.3}Variance Reduction for Policy Gradient Algorithms}{9}{subsection.6.3}\protected@file@percent } -\@writefile{toc}{\contentsline {section}{\numberline {7}Conclusion and Future Work}{9}{section.7}\protected@file@percent } -\@writefile{toc}{\contentsline {section}{\numberline {A}Relevant proofs}{9}{appendix.A}\protected@file@percent } -\@writefile{toc}{\contentsline {subsection}{\numberline {A.1}Proof of Theorem \ref {theorem1}}{9}{subsection.A.1}\protected@file@percent } -\newlabel{proofth1}{{A.1}{9}{Proof of Theorem \ref {theorem1}}{subsection.A.1}{}} -\newlabel{th1proof}{{A.1}{9}{Proof of Theorem \ref {theorem1}}{subsection.A.1}{}} +\citation{borkar1997stochastic} \citation{hirsch1989convergent} \citation{borkar2000ode} \citation{borkar2000ode} \citation{borkar2000ode} -\newlabel{thetaFast}{{19}{10}{Proof of Theorem \ref {theorem1}}{equation.A.19}{}} -\newlabel{omegaFast}{{20}{10}{Proof of Theorem \ref {theorem1}}{equation.A.20}{}} -\newlabel{omegaFastFinal}{{21}{10}{Proof of Theorem \ref {theorem1}}{equation.A.21}{}} -\newlabel{omegaInfty}{{22}{10}{Proof of Theorem \ref {theorem1}}{equation.A.22}{}} -\newlabel{odetheta}{{23}{10}{Proof of Theorem \ref {theorem1}}{equation.A.23}{}} +\@writefile{toc}{\contentsline {section}{\numberline {7}Conclusion and Future Work}{10}{section.7}\protected@file@percent } +\@writefile{toc}{\contentsline {section}{\numberline {A}Relevant proofs}{10}{appendix.A}\protected@file@percent } +\@writefile{toc}{\contentsline {subsection}{\numberline {A.1}Proof of Theorem \ref {theorem1}}{10}{subsection.A.1}\protected@file@percent } +\newlabel{proofth1}{{A.1}{10}{Proof of Theorem \ref {theorem1}}{subsection.A.1}{}} +\newlabel{th1proof}{{A.1}{10}{Proof of Theorem \ref {theorem1}}{subsection.A.1}{}} +\newlabel{thetaFast}{{24}{10}{Proof of Theorem \ref {theorem1}}{equation.A.24}{}} +\newlabel{omegaFast}{{25}{10}{Proof of Theorem \ref {theorem1}}{equation.A.25}{}} +\newlabel{omegaFastFinal}{{26}{10}{Proof of Theorem \ref {theorem1}}{equation.A.26}{}} +\newlabel{omegaInfty}{{27}{10}{Proof of Theorem \ref {theorem1}}{equation.A.27}{}} +\newlabel{odetheta}{{28}{11}{Proof of Theorem \ref {theorem1}}{equation.A.28}{}} +\newlabel{covariance}{{29}{11}{Proof of Theorem \ref {theorem1}}{equation.A.29}{}} +\newlabel{odethetafinal}{{30}{11}{Proof of Theorem \ref {theorem1}}{equation.A.30}{}} \citation{dalal2020tale} \citation{dalal2020tale} -\newlabel{covariance}{{24}{11}{Proof of Theorem \ref {theorem1}}{equation.A.24}{}} -\newlabel{odethetafinal}{{25}{11}{Proof of Theorem \ref {theorem1}}{equation.A.25}{}} -\@writefile{toc}{\contentsline {subsection}{\numberline {A.2}Proof of Corollary \ref {corollary4_2}}{11}{subsection.A.2}\protected@file@percent } -\newlabel{proofcorollary4_2}{{A.2}{11}{Proof of Corollary \ref {corollary4_2}}{subsection.A.2}{}} -\newlabel{matrixassumption}{{A.1}{11}{}{theorem.A.1}{}} -\newlabel{stepsizeassumption}{{A.2}{11}{}{theorem.A.2}{}} -\newlabel{sparseprojection}{{A.3}{11}{}{theorem.A.3}{}} \citation{dalal2020tale} \citation{dalal2020tale} \citation{sutton2009fast} +\@writefile{toc}{\contentsline {subsection}{\numberline {A.2}Proof of Corollary \ref {corollary4_2}}{12}{subsection.A.2}\protected@file@percent } +\newlabel{proofcorollary4_2}{{A.2}{12}{Proof of Corollary \ref {corollary4_2}}{subsection.A.2}{}} +\newlabel{matrixassumption}{{A.1}{12}{}{theorem.A.1}{}} +\newlabel{stepsizeassumption}{{A.2}{12}{}{theorem.A.2}{}} +\newlabel{sparseprojection}{{A.3}{12}{}{theorem.A.3}{}} +\newlabel{sparseprojectiontheta}{{35}{12}{}{equation.A.35}{}} +\newlabel{sparseprojectionomega}{{36}{12}{}{equation.A.36}{}} \citation{hirsch1989convergent} -\newlabel{sparseprojectiontheta}{{30}{12}{}{equation.A.30}{}} -\newlabel{sparseprojectionomega}{{31}{12}{}{equation.A.31}{}} -\@writefile{toc}{\contentsline {subsection}{\numberline {A.3}Proof of Theorem \ref {theorem2}}{12}{subsection.A.3}\protected@file@percent } -\newlabel{proofth2}{{A.3}{12}{Proof of Theorem \ref {theorem2}}{subsection.A.3}{}} -\newlabel{thetavmtdcFastest}{{32}{12}{Proof of Theorem \ref {theorem2}}{equation.A.32}{}} -\newlabel{uvmtdcFastest}{{33}{12}{Proof of Theorem \ref {theorem2}}{equation.A.33}{}} -\newlabel{omegavmtdcFastest}{{34}{12}{Proof of Theorem \ref {theorem2}}{equation.A.34}{}} \citation{borkar2000ode} \citation{borkar2000ode} \citation{borkar2000ode} +\@writefile{toc}{\contentsline {subsection}{\numberline {A.3}Proof of Theorem \ref {theorem2}}{13}{subsection.A.3}\protected@file@percent } +\newlabel{proofth2}{{A.3}{13}{Proof of Theorem \ref {theorem2}}{subsection.A.3}{}} +\newlabel{thetavmtdcFastest}{{37}{13}{Proof of Theorem \ref {theorem2}}{equation.A.37}{}} +\newlabel{uvmtdcFastest}{{38}{13}{Proof of Theorem \ref {theorem2}}{equation.A.38}{}} +\newlabel{omegavmtdcFastest}{{39}{13}{Proof of Theorem \ref {theorem2}}{equation.A.39}{}} +\newlabel{omegavmtdcFastestFinal}{{40}{13}{Proof of Theorem \ref {theorem2}}{equation.A.40}{}} +\newlabel{omegavmtdcInfty}{{41}{13}{Proof of Theorem \ref {theorem2}}{equation.A.41}{}} \citation{hirsch1989convergent} \citation{borkar2000ode} \citation{borkar2000ode} \citation{borkar2000ode} -\newlabel{omegavmtdcFastestFinal}{{35}{13}{Proof of Theorem \ref {theorem2}}{equation.A.35}{}} -\newlabel{omegavmtdcInfty}{{36}{13}{Proof of Theorem \ref {theorem2}}{equation.A.36}{}} -\newlabel{thetavmtdcFaster}{{37}{13}{Proof of Theorem \ref {theorem2}}{equation.A.37}{}} -\newlabel{uvmtdcFaster}{{38}{13}{Proof of Theorem \ref {theorem2}}{equation.A.38}{}} -\newlabel{uvmtdcFasterFinal}{{39}{13}{Proof of Theorem \ref {theorem2}}{equation.A.39}{}} -\newlabel{uvmtdcInfty}{{40}{13}{Proof of Theorem \ref {theorem2}}{equation.A.40}{}} -\newlabel{thetavmtdcSlowerFinal}{{42}{14}{Proof of Theorem \ref {theorem2}}{equation.A.42}{}} -\newlabel{odethetavmtdcfinal}{{43}{14}{Proof of Theorem \ref {theorem2}}{equation.A.43}{}} -\@writefile{toc}{\contentsline {section}{\numberline {B}Experimental details}{14}{appendix.B}\protected@file@percent } -\newlabel{experimentaldetails}{{B}{14}{Experimental details}{appendix.B}{}} +\newlabel{thetavmtdcFaster}{{42}{14}{Proof of Theorem \ref {theorem2}}{equation.A.42}{}} +\newlabel{uvmtdcFaster}{{43}{14}{Proof of Theorem \ref {theorem2}}{equation.A.43}{}} +\newlabel{uvmtdcFasterFinal}{{44}{14}{Proof of Theorem \ref {theorem2}}{equation.A.44}{}} +\newlabel{uvmtdcInfty}{{45}{14}{Proof of Theorem \ref {theorem2}}{equation.A.45}{}} +\newlabel{thetavmtdcSlowerFinal}{{47}{14}{Proof of Theorem \ref {theorem2}}{equation.A.47}{}} \@writefile{loa}{\contentsline {algorithm}{\numberline {2}{\ignorespaces VMTDC algorithm with linear function approximation in the off-policy setting}}{15}{algorithm.2}\protected@file@percent } \newlabel{alg:algorithm 2}{{2}{15}{Proof of Theorem \ref {theorem2}}{algorithm.2}{}} -\@writefile{loa}{\contentsline {algorithm}{\numberline {3}{\ignorespaces VMGTD algorithm with linear function approximation in the off-policy setting}}{15}{algorithm.3}\protected@file@percent } -\newlabel{alg:algorithm 3}{{3}{15}{Proof of Theorem \ref {theorem2}}{algorithm.3}{}} +\@writefile{loa}{\contentsline {algorithm}{\numberline {3}{\ignorespaces VMETD algorithm with linear function approximation in the off-policy setting}}{15}{algorithm.3}\protected@file@percent } +\newlabel{alg:algorithm 5}{{3}{15}{Proof of Theorem \ref {theorem2}}{algorithm.3}{}} +\newlabel{odethetavmtdcfinal}{{48}{15}{Proof of Theorem \ref {theorem2}}{equation.A.48}{}} +\@writefile{toc}{\contentsline {subsection}{\numberline {A.4}Proof of VMETD convergence}{16}{subsection.A.4}\protected@file@percent } +\newlabel{proofVMETD}{{A.4}{16}{Proof of VMETD convergence}{subsection.A.4}{}} +\newlabel{rowsum}{{51}{16}{Proof of VMETD convergence}{equation.A.51}{}} +\newlabel{columnsum}{{52}{16}{Proof of VMETD convergence}{equation.A.52}{}} \bibstyle{named} \bibdata{neurips_2024} \bibcite{baird1995residual}{{1}{1995}{{Baird and others}}{{}}} \bibcite{basserrano2021logistic}{{2}{2021}{{Bas-Serrano \bgroup \em et al.\egroup }}{{}}} -\@writefile{loa}{\contentsline {algorithm}{\numberline {4}{\ignorespaces VMGTD2 algorithm with linear function approximation in the off-policy setting}}{16}{algorithm.4}\protected@file@percent } -\newlabel{alg:algorithm 4}{{4}{16}{Proof of Theorem \ref {theorem2}}{algorithm.4}{}} -\@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces Learning rates ($lr$) of four control experiments.}}{16}{table.3}\protected@file@percent } -\newlabel{lrofways}{{3}{16}{Learning rates ($lr$) of four control experiments}{table.3}{}} +\@writefile{toc}{\contentsline {section}{\numberline {B}Experimental details}{17}{appendix.B}\protected@file@percent } +\newlabel{experimentaldetails}{{B}{17}{Experimental details}{appendix.B}{}} \bibcite{borkar2000ode}{{3}{2000}{{Borkar and Meyn}}{{}}} \bibcite{borkar1997stochastic}{{4}{1997}{{Borkar}}{{}}} \bibcite{chen2023modified}{{5}{2023}{{Chen \bgroup \em et al.\egroup }}{{}}} @@ -202,6 +207,8 @@ \bibcite{liu2016proximal}{{16}{2016}{{Liu \bgroup \em et al.\egroup }}{{}}} \bibcite{liu2018proximal}{{17}{2018}{{Liu \bgroup \em et al.\egroup }}{{}}} \bibcite{maei2011gradient}{{18}{2011}{{Maei}}{{}}} +\@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces Learning rates ($lr$) of four control experiments.}}{18}{table.3}\protected@file@percent } +\newlabel{lrofways}{{3}{18}{Learning rates ($lr$) of four control experiments}{table.3}{}} \bibcite{ng1999policy}{{19}{1999}{{Ng \bgroup \em et al.\egroup }}{{}}} \bibcite{pan2017accelerated}{{20}{2017}{{Pan \bgroup \em et al.\egroup }}{{}}} \bibcite{schulman2015trust}{{21}{2015}{{Schulman \bgroup \em et al.\egroup }}{{}}} @@ -216,4 +223,4 @@ \bibcite{xu2019reanalysis}{{30}{2019}{{Xu \bgroup \em et al.\egroup }}{{}}} \bibcite{xu2020reanalysis}{{31}{2020}{{Xu \bgroup \em et al.\egroup }}{{}}} \bibcite{zhang2022truncated}{{32}{2022}{{Zhang and Whiteson}}{{}}} -\gdef \@abspage@last{18} +\gdef \@abspage@last{19} diff --git a/neurips_2024.log b/neurips_2024.log index ea7ebef..5b9f57b 100644 --- a/neurips_2024.log +++ b/neurips_2024.log @@ -1,4 +1,4 @@ -This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) (preloaded format=pdflatex 2023.3.31) 19 MAY 2024 17:37 +This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) (preloaded format=pdflatex 2023.3.31) 20 JUN 2024 01:22 entering extended mode restricted \write18 enabled. file:line:error style messages enabled. @@ -633,53 +633,71 @@ File: pgfmodulematrix.code.tex 2023-01-15 v3.1.10 (3.1.10) \tikz@expandcount=\count328 (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/frontendlayer/tikz/libraries/tikzlibrarytopaths.code.tex File: tikzlibrarytopaths.code.tex 2023-01-15 v3.1.10 (3.1.10) -))) -\c@theorem=\count329 +))) (d:/software/texlive/2023/texmf-dist/tex/latex/tools/bm.sty +Package: bm 2022/01/05 v1.2f Bold Symbol Support (DPC/FMi) +\symboldoperators=\mathgroup6 +\symboldletters=\mathgroup7 +\symboldsymbols=\mathgroup8 +Package bm Info: No bold for \OMX/cmex/m/n, using \pmb. +Package bm Info: No bold for \U/msa/m/n, using \pmb. +Package bm Info: No bold for \U/msb/m/n, using \pmb. +LaTeX Font Info: Redeclaring math alphabet \mathbf on input line 149. +) (d:/software/texlive/2023/texmf-dist/tex/latex/esvect/esvect.sty +Package: esvect +\symesvector=\mathgroup9 +) (d:/software/texlive/2023/texmf-dist/tex/latex/multirow/multirow.sty +Package: multirow 2021/03/15 v2.8 Span multiple rows of a table +\multirow@colwidth=\skip77 +\multirow@cntb=\count329 +\multirow@dima=\skip78 +\bigstrutjot=\dimen319 +) +\c@theorem=\count330 (d:/software/texlive/2023/texmf-dist/tex/latex/algorithms/algorithm.sty Package: algorithm 2009/08/24 v0.1 Document Style `algorithm' - floating environment (d:/software/texlive/2023/texmf-dist/tex/latex/float/float.sty Package: float 2001/11/08 v1.3d Float enhancements (AL) -\c@float@type=\count330 +\c@float@type=\count331 \float@exts=\toks51 \float@box=\box73 \@float@everytoks=\toks52 \@floatcapt=\box74 ) \@float@every@algorithm=\toks53 -\c@algorithm=\count331 +\c@algorithm=\count332 ) (d:/software/texlive/2023/texmf-dist/tex/latex/algorithms/algorithmic.sty Package: algorithmic 2009/08/24 v0.1 Document Style `algorithmic' -\c@ALC@unique=\count332 -\c@ALC@line=\count333 -\c@ALC@rem=\count334 -\c@ALC@depth=\count335 -\ALC@tlm=\skip77 -\algorithmicindent=\skip78 +\c@ALC@unique=\count333 +\c@ALC@line=\count334 +\c@ALC@rem=\count335 +\c@ALC@depth=\count336 +\ALC@tlm=\skip79 +\algorithmicindent=\skip80 ) (d:/software/texlive/2023/texmf-dist/tex/latex/l3backend/l3backend-pdftex.def File: l3backend-pdftex.def 2023-01-16 L3 backend support: PDF output (pdfTeX) -\l__color_backend_stack_int=\count336 +\l__color_backend_stack_int=\count337 \l__pdf_internal_box=\box75 ) (./neurips_2024.aux) \openout1 = `neurips_2024.aux'. -LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 106. -LaTeX Font Info: ... okay on input line 106. -LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 106. -LaTeX Font Info: ... okay on input line 106. -LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 106. -LaTeX Font Info: ... okay on input line 106. -LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 106. -LaTeX Font Info: ... okay on input line 106. -LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 106. -LaTeX Font Info: ... okay on input line 106. -LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 106. -LaTeX Font Info: ... okay on input line 106. -LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 106. -LaTeX Font Info: ... okay on input line 106. -LaTeX Font Info: Checking defaults for PD1/pdf/m/n on input line 106. -LaTeX Font Info: ... okay on input line 106. -LaTeX Font Info: Checking defaults for PU/pdf/m/n on input line 106. -LaTeX Font Info: ... okay on input line 106. +LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 109. +LaTeX Font Info: ... okay on input line 109. +LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 109. +LaTeX Font Info: ... okay on input line 109. +LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 109. +LaTeX Font Info: ... okay on input line 109. +LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 109. +LaTeX Font Info: ... okay on input line 109. +LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 109. +LaTeX Font Info: ... okay on input line 109. +LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 109. +LaTeX Font Info: ... okay on input line 109. +LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 109. +LaTeX Font Info: ... okay on input line 109. +LaTeX Font Info: Checking defaults for PD1/pdf/m/n on input line 109. +LaTeX Font Info: ... okay on input line 109. +LaTeX Font Info: Checking defaults for PU/pdf/m/n on input line 109. +LaTeX Font Info: ... okay on input line 109. *geometry* driver: auto-detecting *geometry* detected driver: pdftex @@ -747,24 +765,24 @@ LaTeX Font Info: ... okay on input line 106. * \@reversemarginfalse * (1in=72.27pt=25.4mm, 1cm=28.453pt) -Package hyperref Info: Link coloring OFF on input line 106. +Package hyperref Info: Link coloring OFF on input line 109. (./neurips_2024.out) (./neurips_2024.out) \@outlinefile=\write4 \openout4 = `neurips_2024.out'. -LaTeX Info: Redefining \microtypecontext on input line 106. -Package microtype Info: Applying patch `item' on input line 106. -Package microtype Info: Applying patch `toc' on input line 106. -Package microtype Info: Applying patch `eqnum' on input line 106. -Package microtype Info: Applying patch `footnote' on input line 106. -Package microtype Info: Applying patch `verbatim' on input line 106. +LaTeX Info: Redefining \microtypecontext on input line 109. +Package microtype Info: Applying patch `item' on input line 109. +Package microtype Info: Applying patch `toc' on input line 109. +Package microtype Info: Applying patch `eqnum' on input line 109. +Package microtype Info: Applying patch `footnote' on input line 109. +Package microtype Info: Applying patch `verbatim' on input line 109. Package microtype Info: Generating PDF output. Package microtype Info: Character protrusion enabled (level 2). Package microtype Info: Using default protrusion set `alltext'. Package microtype Info: Automatic font expansion enabled (level 2), (microtype) stretch: 20, shrink: 20, step: 1, non-selected. Package microtype Info: Using default expansion set `alltext-nott'. -LaTeX Info: Redefining \showhyphens on input line 106. +LaTeX Info: Redefining \showhyphens on input line 109. Package microtype Info: No adjustment of tracking. Package microtype Info: No adjustment of interword spacing. Package microtype Info: No adjustment of character kerning. @@ -772,16 +790,16 @@ Package microtype Info: No adjustment of character kerning. File: mt-ptm.cfg 2006/04/20 v1.7 microtype config. file: Times (RS) ) (d:/software/texlive/2023/texmf-dist/tex/context/base/mkii/supp-pdf.mkii [Loading MPS to PDF converter (version 2006.09.02).] -\scratchcounter=\count337 -\scratchdimen=\dimen319 +\scratchcounter=\count338 +\scratchdimen=\dimen320 \scratchbox=\box76 -\nofMPsegments=\count338 -\nofMParguments=\count339 +\nofMPsegments=\count339 +\nofMParguments=\count340 \everyMPshowfont=\toks54 -\MPscratchCnt=\count340 -\MPscratchDim=\dimen320 -\MPnumerator=\count341 -\makeMPintoPDFobject=\count342 +\MPscratchCnt=\count341 +\MPscratchDim=\dimen321 +\MPnumerator=\count342 +\makeMPintoPDFobject=\count343 \everyMPtoPDFconversion=\toks55 ) (d:/software/texlive/2023/texmf-dist/tex/latex/epstopdf-pkg/epstopdf-base.sty Package: epstopdf-base 2020-01-24 v2.11 Base part for package epstopdf @@ -791,19 +809,23 @@ File: epstopdf-sys.cfg 2010/07/13 v1.3 Configuration of (r)epstopdf for TeX Live )) (d:/software/texlive/2023/texmf-dist/tex/latex/microtype/mt-cmr.cfg File: mt-cmr.cfg 2013/05/19 v2.2 microtype config. file: Computer Modern Roman (RS) ) -LaTeX Font Info: Trying to load font information for U+msa on input line 110. +LaTeX Font Info: Trying to load font information for U+msa on input line 113. (d:/software/texlive/2023/texmf-dist/tex/latex/amsfonts/umsa.fd File: umsa.fd 2013/01/14 v3.01 AMS symbols A ) (d:/software/texlive/2023/texmf-dist/tex/latex/microtype/mt-msa.cfg File: mt-msa.cfg 2006/02/04 v1.1 microtype config. file: AMS symbols (a) (RS) ) -LaTeX Font Info: Trying to load font information for U+msb on input line 110. +LaTeX Font Info: Trying to load font information for U+msb on input line 113. (d:/software/texlive/2023/texmf-dist/tex/latex/amsfonts/umsb.fd File: umsb.fd 2013/01/14 v3.01 AMS symbols B ) (d:/software/texlive/2023/texmf-dist/tex/latex/microtype/mt-msb.cfg File: mt-msb.cfg 2005/06/01 v1.0 microtype config. file: AMS symbols (b) (RS) ) -LaTeX Font Info: Trying to load font information for T1+cmtt on input line 110. +LaTeX Font Info: Trying to load font information for U+esvect on input line 113. + (d:/software/texlive/2023/texmf-dist/tex/latex/esvect/uesvect.fd +File: uesvect.fd +) +LaTeX Font Info: Trying to load font information for T1+cmtt on input line 113. (d:/software/texlive/2023/texmf-dist/tex/latex/base/t1cmtt.fd File: t1cmtt.fd 2022/07/10 v2.5l Standard LaTeX font definitions ) @@ -811,7 +833,7 @@ Package microtype Info: Loading generic protrusion settings for font family (microtype) `cmtt' (encoding: T1). (microtype) For optimal results, create family-specific settings. (microtype) See the microtype manual for details. -LaTeX Font Info: Trying to load font information for T1+phv on input line 126. +LaTeX Font Info: Trying to load font information for T1+phv on input line 129. (d:/software/texlive/2023/texmf-dist/tex/latex/psnfss/t1phv.fd File: t1phv.fd 2020/03/25 scalable font definitions for T1/phv. ) @@ -829,82 +851,68 @@ pdfTeX warning (ext4): destination with the same identifier (name{table.1}) has l.77 \end{equation*} ] Package hyperref Info: bookmark level for unknown algorithm defaults to 0 on input line 138. - [4]) (./main/theory.tex [5]) (./main/experiment.tex (./main/pic/randomwalk.tex) (./main/pic/BairdExample.tex) [6 -pdfTeX warning (ext4): destination with the same identifier (name{figure.1}) has been already used, duplicate ignored - ...shipout:D \box_use:N \l_shipout_box - \__shipout_drop_firstpage_... -l.46 - -pdfTeX warning (ext4): destination with the same identifier (name{figure.2}) has been already used, duplicate ignored - ...shipout:D \box_use:N \l_shipout_box - \__shipout_drop_firstpage_... -l.46 - ] -
+ [4]) (./main/theory.tex [5]) (./main/experiment.tex (./main/pic/randomwalk.tex) (./main/pic/BairdExample.tex) [6] +
File: main/pic/maze_13_13.pdf Graphic file (type pdf) Package pdftex.def Info: main/pic/maze_13_13.pdf used on input line 53. (pdftex.def) Requested size: 73.9715pt x 58.14139pt. -
+
File: main/pic/dependent_new.pdf Graphic file (type pdf) Package pdftex.def Info: main/pic/dependent_new.pdf used on input line 78. (pdftex.def) Requested size: 119.24675pt x 79.49658pt. -
+
File: main/pic/tabular_new.pdf Graphic file (type pdf) Package pdftex.def Info: main/pic/tabular_new.pdf used on input line 82. (pdftex.def) Requested size: 119.23904pt x 79.49194pt. -
+
File: main/pic/inverted_new.pdf Graphic file (type pdf) Package pdftex.def Info: main/pic/inverted_new.pdf used on input line 87. (pdftex.def) Requested size: 119.24063pt x 79.49458pt. -
+
File: main/pic/counterexample_quanju_new.pdf Graphic file (type pdf) Package pdftex.def Info: main/pic/counterexample_quanju_new.pdf used on input line 91. (pdftex.def) Requested size: 119.24184pt x 79.49428pt. + +Underfull \vbox (badness 3907) has occurred while \output is active [] + [7 -pdfTeX warning (ext4): destination with the same identifier (name{figure.3}) has been already used, duplicate ignored +pdfTeX warning (ext4): destination with the same identifier (name{figure.1}) has been already used, duplicate ignored ...shipout:D \box_use:N \l_shipout_box \__shipout_drop_firstpage_... l.131 - <./main/pic/maze_13_13.pdf> <./main/pic/dependent_new.pdf - -pdfTeX warning: pdflatex.exe (file ./main/pic/dependent_new.pdf): PDF inclusion: multiple pdfs with page group included in a single page -> <./main/pic/tabular_new.pdf - -pdfTeX warning: pdflatex.exe (file ./main/pic/tabular_new.pdf): PDF inclusion: multiple pdfs with page group included in a single page -> <./main/pic/inverted_new.pdf - -pdfTeX warning: pdflatex.exe (file ./main/pic/inverted_new.pdf): PDF inclusion: multiple pdfs with page group included in a single page -> <./main/pic/counterexample_quanju_new.pdf - -pdfTeX warning: pdflatex.exe (file ./main/pic/counterexample_quanju_new.pdf): PDF inclusion: multiple pdfs with page group included in a single page ->]) (./main/relatedwork.tex -
+ +pdfTeX warning (ext4): destination with the same identifier (name{figure.2}) has been already used, duplicate ignored + ...shipout:D \box_use:N \l_shipout_box + \__shipout_drop_firstpage_... +l.131 + <./main/pic/maze_13_13.pdf>]) (./main/relatedwork.tex +
File: main/pic/maze_complete.pdf Graphic file (type pdf) Package pdftex.def Info: main/pic/maze_complete.pdf used on input line 7. (pdftex.def) Requested size: 119.24721pt x 79.4901pt. -
+
File: main/pic/cw_complete.pdf Graphic file (type pdf) Package pdftex.def Info: main/pic/cw_complete.pdf used on input line 11. (pdftex.def) Requested size: 119.24373pt x 79.49335pt. -
+
File: main/pic/mt_complete.pdf Graphic file (type pdf) Package pdftex.def Info: main/pic/mt_complete.pdf used on input line 16. (pdftex.def) Requested size: 119.24463pt x 79.49413pt. -
+
File: main/pic/Acrobot_complete.pdf Graphic file (type pdf) Package pdftex.def Info: main/pic/Acrobot_complete.pdf used on input line 20. (pdftex.def) Requested size: 119.23886pt x 79.49504pt. [8 -pdfTeX warning (ext4): destination with the same identifier (name{figure.4}) has been already used, duplicate ignored +pdfTeX warning (ext4): destination with the same identifier (name{figure.3}) has been already used, duplicate ignored ...shipout:D \box_use:N \l_shipout_box \__shipout_drop_firstpage_... l.57 @@ -913,7 +921,24 @@ pdfTeX warning (ext4): destination with the same identifier (name{table.2}) has ...shipout:D \box_use:N \l_shipout_box \__shipout_drop_firstpage_... l.57 - <./main/pic/maze_complete.pdf> <./main/pic/cw_complete.pdf + <./main/pic/dependent_new.pdf> <./main/pic/tabular_new.pdf + +pdfTeX warning: pdflatex.exe (file ./main/pic/tabular_new.pdf): PDF inclusion: multiple pdfs with page group included in a single page +> <./main/pic/inverted_new.pdf + +pdfTeX warning: pdflatex.exe (file ./main/pic/inverted_new.pdf): PDF inclusion: multiple pdfs with page group included in a single page +> <./main/pic/counterexample_quanju_new.pdf + +pdfTeX warning: pdflatex.exe (file ./main/pic/counterexample_quanju_new.pdf): PDF inclusion: multiple pdfs with page group included in a single page +>]) (./main/conclusion.tex) +Underfull \vbox (badness 10000) has occurred while \output is active [] + + [9 +pdfTeX warning (ext4): destination with the same identifier (name{figure.4}) has been already used, duplicate ignored + ...shipout:D \box_use:N \l_shipout_box + \__shipout_drop_firstpage_... +l.138 + <./main/pic/maze_complete.pdf> <./main/pic/cw_complete.pdf pdfTeX warning: pdflatex.exe (file ./main/pic/cw_complete.pdf): PDF inclusion: multiple pdfs with page group included in a single page > <./main/pic/mt_complete.pdf @@ -922,45 +947,58 @@ pdfTeX warning: pdflatex.exe (file ./main/pic/mt_complete.pdf): PDF inclusion: m > <./main/pic/Acrobot_complete.pdf pdfTeX warning: pdflatex.exe (file ./main/pic/Acrobot_complete.pdf): PDF inclusion: multiple pdfs with page group included in a single page ->]) (./main/conclusion.tex) (./main/appendix.tex [9] [10] +>] (./main/appendix.tex [10] [11] LaTeX Warning: Command \textemdash invalid in math mode on input line 229. LaTeX Warning: Command \textemdash invalid in math mode on input line 229. -[11] [12] [13] -Underfull \hbox (badness 1946) in paragraph at lines 683--696 +[12] [13] [14] +Overfull \hbox (68.70882pt too wide) detected at line 614 +[] + [] + + +Underfull \vbox (badness 3343) has occurred while \output is active [] + + [15] +Overfull \hbox (47.39436pt too wide) detected at line 627 +[] + [] + +[16] +Underfull \hbox (badness 1946) in paragraph at lines 788--801 []\T1/ptm/m/n/10 (+20) Three ran-dom walk ex-per-i-ments: the $\OML/cmm/m/it/10 $ \T1/ptm/m/n/10 (+20) val-ues for all al-go-rithms are in the range of [] -[14] [15] -Overfull \hbox (33.58313pt too wide) in paragraph at lines 738--752 + +Overfull \hbox (33.58313pt too wide) in paragraph at lines 843--857 [][] [] -) (./neurips_2024.bbl [16 +) (./neurips_2024.bbl [17] [18 pdfTeX warning (ext4): destination with the same identifier (name{table.3}) has been already used, duplicate ignored ...shipout:D \box_use:N \l_shipout_box \__shipout_drop_firstpage_... -l.12 - ] [17]) [18] (./neurips_2024.aux) +l.92 + ]) [19] (./neurips_2024.aux) Package rerunfilecheck Info: File `neurips_2024.out' has not changed. -(rerunfilecheck) Checksum: E5788AEC1D4F936207967A17A6B3E0A1;3587. +(rerunfilecheck) Checksum: FAC1A00F891A3C2C3EDDFFA999CD212C;4060. ) Here is how much of TeX's memory you used: - 26626 strings out of 476025 - 484842 string characters out of 5789524 - 1897382 words of memory out of 5000000 - 46086 multiletter control sequences out of 15000+600000 - 567455 words of font info for 255 fonts, out of 8000000 for 9000 + 26929 strings out of 476025 + 489644 string characters out of 5789524 + 1891382 words of memory out of 5000000 + 46337 multiletter control sequences out of 15000+600000 + 577716 words of font info for 288 fonts, out of 8000000 for 9000 1141 hyphenation exceptions out of 8191 - 84i,16n,80p,1005b,1065s stack positions out of 10000i,1000n,20000p,200000b,200000s - -Output written on neurips_2024.pdf (18 pages, 2290177 bytes). + 84i,22n,80p,1005b,1065s stack positions out of 10000i,1000n,20000p,200000b,200000s + +Output written on neurips_2024.pdf (19 pages, 2305737 bytes). PDF statistics: - 1011 PDF objects out of 1200 (max. 8388607) - 839 compressed objects within 9 object streams - 195 named destinations out of 1000 (max. 500000) - 52442 words of extra memory for PDF output out of 61914 (max. 10000000) + 1031 PDF objects out of 1200 (max. 8388607) + 856 compressed objects within 9 object streams + 193 named destinations out of 1000 (max. 500000) + 61162 words of extra memory for PDF output out of 61914 (max. 10000000) diff --git a/neurips_2024.out b/neurips_2024.out index fe9cf21..cbd2d9f 100644 --- a/neurips_2024.out +++ b/neurips_2024.out @@ -4,17 +4,19 @@ \BOOKMARK [2][-]{subsection.3.1}{\376\377\000M\000o\000t\000i\000v\000a\000t\000i\000o\000n}{section.3}% 4 \BOOKMARK [2][-]{subsection.3.2}{\376\377\000V\000a\000r\000i\000a\000n\000c\000e\000\040\000M\000i\000n\000i\000m\000i\000z\000a\000t\000i\000o\000n\000\040\000T\000D\000\040\000L\000e\000a\000r\000n\000i\000n\000g\000:\000\040\000V\000M\000T\000D}{section.3}% 5 \BOOKMARK [2][-]{subsection.3.3}{\376\377\000V\000a\000r\000i\000a\000n\000c\000e\000\040\000M\000i\000n\000i\000m\000i\000z\000a\000t\000i\000o\000n\000\040\000T\000D\000C\000\040\000L\000e\000a\000r\000n\000i\000n\000g\000:\000\040\000V\000M\000T\000D\000C}{section.3}% 6 -\BOOKMARK [1][-]{section.4}{\376\377\000T\000h\000e\000o\000r\000e\000t\000i\000c\000a\000l\000\040\000A\000n\000a\000l\000y\000s\000i\000s}{}% 7 -\BOOKMARK [1][-]{section.5}{\376\377\000E\000x\000p\000e\000r\000i\000m\000e\000n\000t\000a\000l\000\040\000S\000t\000u\000d\000i\000e\000s}{}% 8 -\BOOKMARK [2][-]{subsection.5.1}{\376\377\000T\000e\000s\000t\000i\000n\000g\000\040\000T\000a\000s\000k\000s}{section.5}% 9 -\BOOKMARK [2][-]{subsection.5.2}{\376\377\000E\000x\000p\000e\000r\000i\000m\000e\000n\000t\000a\000l\000\040\000R\000e\000s\000u\000l\000t\000s\000\040\000a\000n\000d\000\040\000A\000n\000a\000l\000y\000s\000i\000s}{section.5}% 10 -\BOOKMARK [1][-]{section.6}{\376\377\000R\000e\000l\000a\000t\000e\000d\000\040\000W\000o\000r\000k}{}% 11 -\BOOKMARK [2][-]{subsection.6.1}{\376\377\000D\000i\000f\000f\000e\000r\000e\000n\000c\000e\000\040\000b\000e\000t\000w\000e\000e\000n\000\040\000V\000M\000Q\000\040\000a\000n\000d\000\040\000R\000-\000l\000e\000a\000r\000n\000i\000n\000g}{section.6}% 12 -\BOOKMARK [2][-]{subsection.6.2}{\376\377\000V\000a\000r\000i\000a\000n\000c\000e\000\040\000R\000e\000d\000u\000c\000t\000i\000o\000n\000\040\000f\000o\000r\000\040\000T\000D\000\040\000L\000e\000a\000r\000n\000i\000n\000g}{section.6}% 13 -\BOOKMARK [2][-]{subsection.6.3}{\376\377\000V\000a\000r\000i\000a\000n\000c\000e\000\040\000R\000e\000d\000u\000c\000t\000i\000o\000n\000\040\000f\000o\000r\000\040\000P\000o\000l\000i\000c\000y\000\040\000G\000r\000a\000d\000i\000e\000n\000t\000\040\000A\000l\000g\000o\000r\000i\000t\000h\000m\000s}{section.6}% 14 -\BOOKMARK [1][-]{section.7}{\376\377\000C\000o\000n\000c\000l\000u\000s\000i\000o\000n\000\040\000a\000n\000d\000\040\000F\000u\000t\000u\000r\000e\000\040\000W\000o\000r\000k}{}% 15 -\BOOKMARK [1][-]{appendix.A}{\376\377\000R\000e\000l\000e\000v\000a\000n\000t\000\040\000p\000r\000o\000o\000f\000s}{}% 16 -\BOOKMARK [2][-]{subsection.A.1}{\376\377\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000T\000h\000e\000o\000r\000e\000m\000\040\0004\000.\0001}{appendix.A}% 17 -\BOOKMARK [2][-]{subsection.A.2}{\376\377\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000C\000o\000r\000o\000l\000l\000a\000r\000y\000\040\0004\000.\0002}{appendix.A}% 18 -\BOOKMARK [2][-]{subsection.A.3}{\376\377\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000T\000h\000e\000o\000r\000e\000m\000\040\0004\000.\0003}{appendix.A}% 19 -\BOOKMARK [1][-]{appendix.B}{\376\377\000E\000x\000p\000e\000r\000i\000m\000e\000n\000t\000a\000l\000\040\000d\000e\000t\000a\000i\000l\000s}{}% 20 +\BOOKMARK [2][-]{subsection.3.4}{\376\377\000V\000a\000r\000i\000a\000n\000c\000e\000\040\000M\000i\000n\000i\000m\000i\000z\000a\000t\000i\000o\000n\000\040\000E\000T\000D\000\040\000L\000e\000a\000r\000n\000i\000n\000g\000:\000\040\000V\000M\000E\000T\000D}{section.3}% 7 +\BOOKMARK [1][-]{section.4}{\376\377\000T\000h\000e\000o\000r\000e\000t\000i\000c\000a\000l\000\040\000A\000n\000a\000l\000y\000s\000i\000s}{}% 8 +\BOOKMARK [1][-]{section.5}{\376\377\000E\000x\000p\000e\000r\000i\000m\000e\000n\000t\000a\000l\000\040\000S\000t\000u\000d\000i\000e\000s}{}% 9 +\BOOKMARK [2][-]{subsection.5.1}{\376\377\000T\000e\000s\000t\000i\000n\000g\000\040\000T\000a\000s\000k\000s}{section.5}% 10 +\BOOKMARK [2][-]{subsection.5.2}{\376\377\000E\000x\000p\000e\000r\000i\000m\000e\000n\000t\000a\000l\000\040\000R\000e\000s\000u\000l\000t\000s\000\040\000a\000n\000d\000\040\000A\000n\000a\000l\000y\000s\000i\000s}{section.5}% 11 +\BOOKMARK [1][-]{section.6}{\376\377\000R\000e\000l\000a\000t\000e\000d\000\040\000W\000o\000r\000k}{}% 12 +\BOOKMARK [2][-]{subsection.6.1}{\376\377\000D\000i\000f\000f\000e\000r\000e\000n\000c\000e\000\040\000b\000e\000t\000w\000e\000e\000n\000\040\000V\000M\000Q\000\040\000a\000n\000d\000\040\000R\000-\000l\000e\000a\000r\000n\000i\000n\000g}{section.6}% 13 +\BOOKMARK [2][-]{subsection.6.2}{\376\377\000V\000a\000r\000i\000a\000n\000c\000e\000\040\000R\000e\000d\000u\000c\000t\000i\000o\000n\000\040\000f\000o\000r\000\040\000T\000D\000\040\000L\000e\000a\000r\000n\000i\000n\000g}{section.6}% 14 +\BOOKMARK [2][-]{subsection.6.3}{\376\377\000V\000a\000r\000i\000a\000n\000c\000e\000\040\000R\000e\000d\000u\000c\000t\000i\000o\000n\000\040\000f\000o\000r\000\040\000P\000o\000l\000i\000c\000y\000\040\000G\000r\000a\000d\000i\000e\000n\000t\000\040\000A\000l\000g\000o\000r\000i\000t\000h\000m\000s}{section.6}% 15 +\BOOKMARK [1][-]{section.7}{\376\377\000C\000o\000n\000c\000l\000u\000s\000i\000o\000n\000\040\000a\000n\000d\000\040\000F\000u\000t\000u\000r\000e\000\040\000W\000o\000r\000k}{}% 16 +\BOOKMARK [1][-]{appendix.A}{\376\377\000R\000e\000l\000e\000v\000a\000n\000t\000\040\000p\000r\000o\000o\000f\000s}{}% 17 +\BOOKMARK [2][-]{subsection.A.1}{\376\377\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000T\000h\000e\000o\000r\000e\000m\000\040\0004\000.\0001}{appendix.A}% 18 +\BOOKMARK [2][-]{subsection.A.2}{\376\377\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000C\000o\000r\000o\000l\000l\000a\000r\000y\000\040\0004\000.\0002}{appendix.A}% 19 +\BOOKMARK [2][-]{subsection.A.3}{\376\377\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000T\000h\000e\000o\000r\000e\000m\000\040\0004\000.\0003}{appendix.A}% 20 +\BOOKMARK [2][-]{subsection.A.4}{\376\377\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000V\000M\000E\000T\000D\000\040\000c\000o\000n\000v\000e\000r\000g\000e\000n\000c\000e}{appendix.A}% 21 +\BOOKMARK [1][-]{appendix.B}{\376\377\000E\000x\000p\000e\000r\000i\000m\000e\000n\000t\000a\000l\000\040\000d\000e\000t\000a\000i\000l\000s}{}% 22 diff --git a/neurips_2024.pdf b/neurips_2024.pdf index 4c26ff0..75d9f05 100644 Binary files a/neurips_2024.pdf and b/neurips_2024.pdf differ diff --git a/neurips_2024.synctex.gz b/neurips_2024.synctex.gz index 8998458..35367b4 100644 Binary files a/neurips_2024.synctex.gz and b/neurips_2024.synctex.gz differ diff --git a/neurips_2024.tex b/neurips_2024.tex index b73affa..41f576f 100644 --- a/neurips_2024.tex +++ b/neurips_2024.tex @@ -42,6 +42,9 @@ \usepackage{mathtools} \usepackage{amsthm} \usepackage{tikz} +\usepackage{bm} +\usepackage{esvect} +\usepackage{multirow} \theoremstyle{plain} \newtheorem{theorem}{Theorem}[section] -- libgit2 0.26.0