Commit 828184a4 by GongYu

VMETD的更新公式和收敛性证明添加进去了

parent afbe69ae
...@@ -577,74 +577,179 @@ the parameter $\theta$ converges to $A^{-1}b$. ...@@ -577,74 +577,179 @@ the parameter $\theta$ converges to $A^{-1}b$.
\end{algorithmic} \end{algorithmic}
\end{algorithm} \end{algorithm}
\begin{algorithm}[t]
\caption{VMGTD algorithm with linear function approximation in the off-policy setting}
\label{alg:algorithm 3}
\begin{algorithmic}
\STATE {\bfseries Input:} $\theta_{0}$, $u_0$, $\omega_{0}$, $\gamma
$, learning rate $\alpha_t$, $\zeta_t$ and $\beta_t$, behavior policy $\mu$ and target policy $\pi$
\REPEAT
\STATE For any episode, initialize $\theta_{0}$ arbitrarily, $u_t$ and $\omega_{0}$ to $0$, $\gamma \in (0,1]$, and $\alpha_t$, $\zeta_t$ and $\beta_t$ are constant.\\
\textbf{Output}: $\theta^*$.\\
\FOR{$t=0$ {\bfseries to} $T-1$}
\STATE Take $A_t$ from $S_t$ according to $\mu$, and arrive at $S_{t+1}$\\
\STATE Observe sample ($S_t$,$R_{t+1}$,$S_{t+1}$) at time step $t$ (with their corresponding state feature vectors)\\
\STATE $\delta_t = R_{t+1}+\gamma\theta_t^{\top}\phi_{t+1}-\theta_t^{\top}\phi_t$
\STATE $\rho_{t} \leftarrow \frac{\pi(A_t | S_t)}{\mu(A_t | S_t)}$
\STATE $\theta_{t+1}\leftarrow \theta_{t}+\alpha_t \rho_t[\phi_t - \gamma \phi_{t+1}]\phi^{\top}_{t} u_t$
\STATE $u_{t+1}\leftarrow u_{t}+\zeta_t[\rho_t(\delta_t-\omega_t) \phi_t - u_t]$
\STATE $\omega_{t+1}\leftarrow \omega_{t}+\beta_t \rho_t(\delta_t-\omega_t)$
\STATE $S_t=S_{t+1}$
\ENDFOR
\UNTIL{terminal episode}
\end{algorithmic}
\end{algorithm}
\begin{algorithm}[t] \begin{algorithm}[t]
\caption{VMGTD2 algorithm with linear function approximation in the off-policy setting} \caption{VMETD algorithm with linear function approximation in the off-policy setting}
\label{alg:algorithm 4} \label{alg:algorithm 5}
\begin{algorithmic} \begin{algorithmic}
\STATE {\bfseries Input:} $\theta_{0}$, $u_0$, $\omega_{0}$, $\gamma \STATE {\bfseries Input:} $\theta_{0}$, $u_0$, $\omega_{0}$, $\gamma
$, learning rate $\alpha_t$, $\zeta_t$ and $\beta_t$, behavior policy $\mu$ and target policy $\pi$ $, learning rate $\alpha_t$, $\zeta_t$ and $\beta_t$, behavior policy $\mu$ and target policy $\pi$
\REPEAT \REPEAT
\STATE For any episode, initialize $\theta_{0}$ arbitrarily, $u_t$ and $\omega_{0}$ to $0$, $\gamma \in (0,1]$, and $\alpha_t$, $\zeta_t$ and $\beta_t$ are constant.\\ \STATE For any episode, initialize $\theta_{0}$ arbitrarily, $u_t$ to $1$ and $\omega_{0}$ to $0$, $\gamma \in (0,1]$, and $\alpha_t$, $\zeta_t$ and $\beta_t$ are constant.\\
\textbf{Output}: $\theta^*$.\\ \textbf{Output}: $\theta^*$.\\
\FOR{$t=0$ {\bfseries to} $T-1$} \FOR{$t=0$ {\bfseries to} $T-1$}
\STATE Take $A_t$ from $S_t$ according to $\mu$, and arrive at $S_{t+1}$\\ \STATE Take $A_t$ from $S_t$ according to $\mu$, and arrive at $S_{t+1}$\\
\STATE Observe sample ($S_t$,$R_{t+1}$,$S_{t+1}$) at time step $t$ (with their corresponding state feature vectors)\\ \STATE Observe sample ($S_t$,$R_{t+1}$,$S_{t+1}$) at time step $t$ (with their corresponding state feature vectors)\\
\STATE $\delta_t = R_{t+1}+\gamma\theta_t^{\top}\phi_{t+1}-\theta_t^{\top}\phi_t$ \STATE $\delta_t = R_{t+1}+\gamma\theta_t^{\top}\phi_{t+1}-\theta_t^{\top}\phi_t$
\STATE $\rho_{t} \leftarrow \frac{\pi(A_t | S_t)}{\mu(A_t | S_t)}$ \STATE $\rho_{t} \leftarrow \frac{\pi(A_t | S_t)}{\mu(A_t | S_t)}$
\STATE $\theta_{t+1}\leftarrow \theta_{t}+\alpha_t \rho_t[\phi_t - \gamma \phi_{t+1}]\phi^{\top}_{t} u_t$ \STATE $F_{t}\leftarrow \gamma \rho_t F_{t-1} +1$
\STATE $u_{t+1}\leftarrow u_{t}+\zeta_t[\rho_t(\delta_t-\omega_t) - \phi^{\top}_{t} u_t] \phi_t$ \STATE $\theta_{t+1}\leftarrow \theta_{t}+\alpha_t (F_t \rho_t\delta_t-\omega_t)\phi_t$
\STATE $\omega_{t+1}\leftarrow \omega_{t}+\beta_t \rho_t(\delta_t-\omega_t)$ \STATE $\omega_{t+1}\leftarrow \omega_{t}+\beta_t (F_t \rho_t\delta_t-\omega_t)$
\STATE $S_t=S_{t+1}$ \STATE $S_t=S_{t+1}$
\ENDFOR \ENDFOR
\UNTIL{terminal episode} \UNTIL{terminal episode}
\end{algorithmic} \end{algorithmic}
\end{algorithm} \end{algorithm}
% \begin{algorithm}[t] \subsection{Proof of VMETD convergence}
% \caption{VMETD algorithm with linear function approximation in the off-policy setting} \label{proofVMETD}
% \label{alg:algorithm 5} VMETD's $\theta$ by the following update:
% \begin{algorithmic} \begin{equation}
% \STATE {\bfseries Input:} $\theta_{0}$, $u_0$, $\omega_{0}$, $\gamma \begin{split}
% $, learning rate $\alpha_t$, $\zeta_t$ and $\beta_t$, behavior policy $\mu$ and target policy $\pi$ \theta_{k+1}&\leftarrow \theta_k+\alpha_k F_k \rho_k (R_{k+1}+\gamma \theta_k^{\top}\phi_{k+1}-\theta_k^{\top}\phi_k)\phi_k -\alpha_k \omega_{k+1}\phi_k\\
% \REPEAT &=\theta_k+\alpha_k F_k \rho_k (R_{k+1}+\gamma \theta_k^{\top}\phi_{k+1}-\theta_k^{\top}\phi_k)\phi_k -\alpha_k \mathbb{E}_{\mu}[F_k \rho_k \delta_k]\phi_k\\
% \STATE For any episode, initialize $\theta_{0}$ arbitrarily, $u_t$ to $1$ and $\omega_{0}$ to $0$, $\gamma \in (0,1]$, and $\alpha_t$, $\zeta_t$ and $\beta_t$ are constant.\\ &= \theta_k+\alpha_k \{\underbrace{(F_k\rho_kR_{k+1}-\mathbb{E}_{\mu}[F_k\rho_k R_{k+1}])\phi_k}_{\textbf{b}_{\text{VMETD},k}}
% \textbf{Output}: $\theta^*$.\\ -\underbrace{(F_k\rho_k\phi_k(\phi_k-\gamma\phi_{k+1})^{\top}-\phi_k\mathbb{E}_{\mu}[F_k\rho_k (\phi_k-\gamma\phi_{k+1})]^{\top})}_{\textbf{A}_{\text{VMETD},k}}\theta_k\}
% \FOR{$t=0$ {\bfseries to} $T-1$} \end{split}
% \STATE Take $A_t$ from $S_t$ according to $\mu$, and arrive at $S_{t+1}$\\ \end{equation}
% \STATE Observe sample ($S_t$,$R_{t+1}$,$S_{t+1}$) at time step $t$ (with their corresponding state feature vectors)\\ \begin{equation}
% \STATE $\delta_t = R_{t+1}+\gamma\theta_t^{\top}\phi_{t+1}-\theta_t^{\top}\phi_t$ \begin{split}
% \STATE $\rho_{t} \leftarrow \frac{\pi(A_t | S_t)}{\mu(A_t | S_t)}$ \textbf{A}_{\text{VMETD}}&=\lim_{k \rightarrow \infty} \mathbb{E}[\textbf{A}_{\text{VMETD},k}]\\
% \STATE $\theta_{t+1}\leftarrow \theta_{t}+\alpha_t u_t \rho_t(\delta_t-\omega_t)\phi_t$ &= \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[F_k \rho_k \phi_k (\phi_k - \gamma \phi_{k+1})^{\top}]- \lim_{k\rightarrow \infty} \mathbb{E}_{\mu}[ \phi_k]\mathbb{E}_{\mu}[F_k \rho_k (\phi_k - \gamma \phi_{k+1})]^{\top}\\
% \STATE $u_{t+1}\leftarrow \gamma \rho_t u_t +1$ &= \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[\underbrace{\phi_k}_{X}\underbrace{F_k \rho_k (\phi_k - \gamma \phi_{k+1})^{\top}}_{Y}]- \lim_{k\rightarrow \infty} \mathbb{E}_{\mu}[ \phi_k]\mathbb{E}_{\mu}[F_k \rho_k (\phi_k - \gamma \phi_{k+1})]^{\top}\\
% \STATE $\omega_{t+1}\leftarrow \omega_{t}+\beta_t \rho_t(\delta_t-\omega_t)$ &= \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[F_k \rho_k \phi_k (\phi_k - \gamma \phi_{k+1})^{\top}]- \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[ \phi_k]\lim_{k \rightarrow \infty}\mathbb{E}_{\mu}[F_k \rho_k (\phi_k - \gamma \phi_{k+1})]^{\top}\\
% \STATE $S_t=S_{t+1}$ &=\sum_{s} f(s) \phi(s)(\phi(s) - \gamma \sum_{s'}[\textbf{P}_{\pi}]_{ss'}\phi(s'))^{\top} - \sum_{s} d_{\mu}(s) \phi(s) * \sum_{s} f(s)(\phi(s) - \gamma \sum_{s'}[\textbf{P}_{\pi}]_{ss'}\phi(s'))^{\top} \\
% \ENDFOR &={\bm{\Phi}}^{\top} \textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi}) \bm{\Phi} - {\bm{\Phi}}^{\top} \textbf{d}_{\mu} \textbf{f}^{\top} (\textbf{I} - \gamma \textbf{P}_{\mu}) \bm{\Phi} \\
% \UNTIL{terminal episode} &={\bm{\Phi}}^{\top} (\textbf{F} - \textbf{d}_{\mu} \textbf{f}^{\top}) (\textbf{I} - \gamma \textbf{P}_{\pi}){\bm{\Phi}} \\
% \end{algorithmic} &={\bm{\Phi}}^{\top} (\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi})-\textbf{d}_{\mu} \textbf{f}^{\top} (\textbf{I} - \gamma \textbf{P}_{\pi})){\bm{\Phi}} \\
% \end{algorithm} &={\bm{\Phi}}^{\top} (\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi})-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} ){\bm{\Phi}} \\
\end{split}
\end{equation}
\begin{proof}
Any matrix $\bm{\text{M}}$ is positive definite if and only if
the symmetric matrix $\bm{\text{S}}=\bm{\text{M}}+\bm{\text{M}}^{\top}$ is positive definite.
Any symmetric real matrix $\bm{\text{S}}$ is positive definite if the absolute values of
its diagonal entries are greater than the sum of the absolute values of the corresponding
off-diagonal entries.
\begin{equation}
\label{rowsum}
\begin{split}
(\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi})-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} )\textbf{1}
&=\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi})\textbf{1}-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} \textbf{1}\\
&=\textbf{F}(\textbf{1}-\gamma \textbf{P}_{\pi} \textbf{1})-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} \textbf{1}\\
&=(1-\gamma)\textbf{F}\textbf{1}-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} \textbf{1}\\
&=(1-\gamma)\textbf{f}-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} \textbf{1}\\
&=(1-\gamma)\textbf{f}-\textbf{d}_{\mu} \\
&=(1-\gamma)(\textbf{I}-\gamma\textbf{P}_{\pi}^{\top})^{-1}\textbf{d}_{\mu}-\textbf{d}_{\mu} \\
&=(1-\gamma)[(\textbf{I}-\gamma\textbf{P}_{\pi}^{\top})^{-1}-\textbf{I}]\textbf{d}_{\mu} \\
&=(1-\gamma)[\sum_{t=0}^{\infty}(\gamma\textbf{P}_{\pi}^{\top})^{t}-\textbf{I}]\textbf{d}_{\mu} \\
&=(1-\gamma)[\sum_{t=1}^{\infty}(\gamma\textbf{P}_{\pi}^{\top})^{t}]\textbf{d}_{\mu} > 0 \\
\end{split}
\end{equation}
\begin{equation}
\label{columnsum}
\begin{split}
\textbf{1}^{\top}(\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi})-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} )
&=\textbf{1}^{\top}\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi})-\textbf{1}^{\top}\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} \\
&=\textbf{d}_{\mu}^{\top}-\textbf{1}^{\top}\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} \\
&=\textbf{d}_{\mu}^{\top}- \textbf{d}_{\mu}^{\top} \\
&=0
\end{split}
\end{equation}
(\ref{rowsum}) and (\ref{columnsum}) show that the matrix $\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi})-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top}$ of
diagonal entries are positive and its off-diagonal entries are negative. So its each row sum plus the corresponding column sum is positive.
The proof is given above
\end{proof}
% \begin{equation}
% F_k = \gamma \rho_{k-1} F_{k-1} + 1,
% \end{equation}
% \begin{equation}
% \rho_{k} \leftarrow \frac{\pi(A_k | S_k)}{\mu(A_k | S_k)},
% \end{equation}
% \begin{equation}
% \theta_{k+1}= \alpha_k F_k \rho_k (r_{k+1}+\gamma \theta_k^{\top}\phi_{k}'-\theta_k^{\top}\phi_k)\phi_k.
% \end{equation}
% ETD(0)' \textbf{A} matrix is:
% \begin{equation}
% \textbf{A}_{\text{ETD}} = {\bm{\Phi}}^{\top} \textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi}) \bm{\Phi},
% \end{equation}
% where \textbf{F} is a diagonal matrix with diagonal elements $f(s) = d_{\mu}(s) \lim_{k\rightarrow \infty }\mathbb{E}_{\mu}[F_k|S_k=s]$,
% which we assume exists. As we show later, the vector $\textbf{f} \in \mathbb{R}^N$ with components
% $[\textbf{f}]_s = f(s)$ can be written as
% \begin{equation}
% \begin{array}{ccl}
% \textbf{f}&=& \textbf{d}_\mu + \gamma \textbf{P}^{\top}_{\pi} \textbf{d}_\mu + (\gamma \textbf{P}^{\top}_{\pi} \textbf{d}_\mu)^{2} + \cdots \\
% &=&(\textbf{I} - \gamma \textbf{P}^{\top}_{\pi})^{-1} \textbf{d}_\mu.
% \end{array}
% \end{equation}
% The key matrix is $\textbf{F}(\textbf{I} - \gamma \textbf{P}_{\pi})$, and the vector of its column sums is
% \begin{equation}
% \begin{array}{ccl}
% \textbf{1}^{\top} \textbf{F}(\textbf{I} - \gamma \textbf{P}_{\pi})&=& \textbf{f}^{\top}(\textbf{I} - \gamma \textbf{P}_{\pi}) \\
% &=&\textbf{d}^{\top}_{\mu}(\textbf{I} - \gamma \textbf{P}_{\pi})^{-1} (\textbf{I} - \gamma \textbf{P}_{\pi}) \\
% &=&\textbf{d}^{\top}_{\mu},
% \end{array}
% \end{equation}
% all components of which are positive. Thus, the key matrix and the $\textbf{A}_{\text{ETD}}$ matrix are positive
% definite and the algorithm is stable.
% VMETD by the following update:
% \begin{equation}
% \theta_{k+1}= \alpha_k F_k \rho_k (r_{k+1}+\gamma \theta_k^{\top}\phi_{k}'-\theta_k^{\top}\phi_k - \mathbb{E}_{\mu}[F_k \rho_k \delta_k])\phi_k.
% \end{equation}
% % VMETD' \textbf{A} matrix is:
% % \begin{equation}
% % \begin{array}{ccl}
% % \textbf{A}_{\text{VMETD}}&=&\lim_{k \rightarrow \infty} \mathbb{E}[\textbf{A}_{\text{VMETD},k}]\\
% % &=& \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[F_k \rho_k \phi_k (\phi_k - \gamma \phi'_{k} - \mathbb{E}_{\mu}[\phi_k - \gamma \phi'_{k}])^{\top}]\\
% % &=&\sum_{s} d_{\mu}(s)\lim_{k \rightarrow \infty}\mathbb{E}_{\mu}[F_k \rho_k \phi_k (\phi_k - \gamma \phi'_{k} - \mathbb{E}_{\mu}[\phi_k - \gamma \phi'_{k}])^{\top}|S_k = s] \\
% % &=&\sum_{s} d_{\mu}(s)\lim_{k \rightarrow \infty}\mathbb{E}_{\mu}[F_k|S_k = s]\mathbb{E}_{\mu}[\rho_k \phi_k (\phi_k - \gamma \phi'_{k} - \mathbb{E}_{\mu}[\phi_k - \gamma \phi'_{k}])^{\top}|S_k = s] \\
% % &=&\sum_{s} f(s)\mathbb{E}_{\mu}[\rho_t \phi_t (\phi_t - \gamma \phi'_{t} - \mathbb{E}_{\mu}[\phi_t - \gamma \phi'_{t}])^{\top}|S_t = s] \\
% % &=&\sum_{s} f(s)\mathbb{E}_{\mu}[\rho_t \phi_t (\phi_t - \gamma \phi'_{t})^{\top}|S_t = s] - \sum_{s} f(s)\mathbb{E}_{\mu}[\rho_t \phi_t \mathbb{E}_{\mu}[\phi_t - \gamma \phi'_{t}]^{\top}|S_t = s] \\
% % &=&\sum_{s} f(s)\mathbb{E}_{\pi}[\phi_t (\phi_t - \gamma \phi'_{t})^{\top}|S_t = s] - \sum_{s} f(s)\mathbb{E}_{\pi}[\phi_t |S_t = s]\mathbb{E}_{\mu}[\phi_t - \gamma \phi'_{t}]^{\top} \\
% % &=&\sum_{s} f(s)(\mathbb{E}_{\pi}[\phi_t (\phi_t - \gamma \phi'_{t})^{\top}|S_t = s] - \mathbb{E}_{\pi}[\phi_t |S_t = s]\mathbb{E}_{\mu}[\phi_t - \gamma \phi'_{t}]^{\top}) \\
% % &=&\sum_{s} f(s) \phi(s) (\phi(s) - \gamma \sum_{s'}[\textbf{P}_{\pi}]_{ss'}\phi(s') - \sum_{s} d_{\mu}(s)(\phi(s) - \gamma \sum_{s'}[\textbf{P}_{\mu}]_{ss'}\phi(s')))^{\top}\\
% % &=&{\bm{\Phi}}^{\top} \textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi}) \bm{\Phi} - {\bm{\Phi}}^{\top} \textbf{F}\textbf{E}_\mu
% % \end{array}
% % \end{equation}
% % where $\textbf{E}_\mu \in \mathbb{R}^{N \times d }$ and $\textbf{E}_\mu$'s every row has elements equal to $\mathbb{E}_{\mu}[\phi_t - \gamma \phi'_{t}]^{\top}$.
% \begin{equation}
% \begin{array}{ccl}
% \textbf{A}_{\text{VMETD}}&=&\lim_{k \rightarrow \infty} \mathbb{E}[\textbf{A}_{\text{VMETD},k}]\\
% &=& \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[F_k \rho_k \phi_k (\phi_k - \gamma \phi'_{k})^{\top}]- \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[F_k \rho_k \phi_k]\mathbb{E}_{\mu}[F_k \rho_k \phi_k - \gamma \phi'_{k}]^{\top}\\
% &=& \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[F_k \rho_k \phi_k (\phi_k - \gamma \phi'_{k})^{\top}]- \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[F_k \rho_k \phi_k]\lim_{k \rightarrow \infty}\mathbb{E}_{\mu}[F_k \rho_k \phi_k - \gamma \phi'_{k}]^{\top}\\
% &=&\sum_{s} f(s) \phi(s)(\phi(s) - \gamma \sum_{s'}[\textbf{P}_{\pi}]_{ss'}\phi(s'))^{\top} - \sum_{s} f(s) \phi(s) * \sum_{s} f(s)(\phi(s) - \gamma \sum_{s'}[\textbf{P}_{\pi}]_{ss'}\phi(s'))^{\top} \\
% &=&{\bm{\Phi}}^{\top} \textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi}) \bm{\Phi} - {\bm{\Phi}}^{\top} \textbf{f} \textbf{f}^{\top} (\textbf{I} - \gamma \textbf{P}_{\mu}) \bm{\Phi} \\
% &=&{\bm{\Phi}}^{\top} (\textbf{F} - \textbf{f} \textbf{f}^{\top}) (\textbf{I} - \gamma \textbf{P}_{\pi}){\bm{\Phi}} \\
% \end{array}
% \end{equation}
% The key matrix is $\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi}) - \textbf{f} \textbf{d}_{\mu}^{\top} (\textbf{I} - \gamma \textbf{P}_{\mu})$, and the vector of its column sums is
% \begin{equation}
% \begin{array}{ccl}
% \textbf{1}^{\top}(\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi}) - \textbf{f} \textbf{d}_{\mu}^{\top} (\textbf{I} - \gamma \textbf{P}_{\mu}))&=& \textbf{d}^{\top}_{\mu} - \textbf{d}^{\top}_{\mu}(\textbf{I} - \gamma \textbf{P}_{\mu}) \textbf{1}^{\top}\textbf{f}\\
% &=&\textbf{d}^{\top}_{\mu} - \textbf{d}^{\top}_{\mu}(1 - \gamma)\textbf{1}^{\top}\textbf{f} \\
% &=&\textbf{d}^{\top}_{\mu} - \textbf{d}^{\top}_{\mu} \textbf{1}^{\top} (\textbf{f} - \gamma \textbf{f}),
% \end{array}
% \end{equation}
\section{Experimental details} \section{Experimental details}
\label{experimentaldetails} \label{experimentaldetails}
......
...@@ -97,7 +97,7 @@ stochastic gradient descent: ...@@ -97,7 +97,7 @@ stochastic gradient descent:
\end{equation} \end{equation}
where $\delta_k$ is the TD error as follows: where $\delta_k$ is the TD error as follows:
\begin{equation} \begin{equation}
\delta_k = r+\gamma \delta_k = r_{k+1}+\gamma
\theta_k^{\top}\phi_{k}'-\theta_k^{\top}\phi_k. \theta_k^{\top}\phi_{k}'-\theta_k^{\top}\phi_k.
\label{delta} \label{delta}
\end{equation} \end{equation}
...@@ -204,4 +204,41 @@ and ...@@ -204,4 +204,41 @@ and
\end{equation} \end{equation}
where $\delta_{k}$ is (\ref{deltaQ}) and $A^{*}_{k+1}={\arg \max}_{a}(\theta_{k}^{\top}\phi(s_{k+1},a))$. where $\delta_{k}$ is (\ref{deltaQ}) and $A^{*}_{k+1}={\arg \max}_{a}(\theta_{k}^{\top}\phi(s_{k+1},a))$.
This paper also introduces an additional parameter $\omega$ into the GTD and GTD2 algorithms. For details, please refer to the appendix. \subsection{Variance Minimization ETD Learning: VMETD}
\ No newline at end of file VMETD by the following update:
% \begin{equation}
% \delta_{t}= R_{t+1}+\gamma \theta_t^{\top}\phi_{t+1}-\theta_t^{\top}\phi_t.
% \end{equation}
\begin{equation}
\rho_{k} \leftarrow \frac{\pi(A_k | S_k)}{\mu(A_k | S_k)}
\end{equation}
\begin{equation}
\label{fvmetd}
F_k \leftarrow \gamma \rho_{k-1}F_{k-1}+1,
\end{equation}
\begin{equation}
\label{omegavmetd}
\omega_{k+1} \leftarrow \omega_k+\beta_k(F_k \rho_k \delta_k - \omega_k),
\end{equation}
\begin{equation}
\label{thetavmetd}
\theta_{k+1}\leftarrow \theta_k+\alpha_k F_k \rho_k (R_{k+1}+\gamma \theta_k^{\top}\phi_{k+1}-\theta_k^{\top}\phi_k)\phi_k -\alpha_k \omega_{k+1}\phi_k,
\end{equation}
where $\mu$ is behavior policy and $\pi$ is target policy,
$F_t$ is a scalar variable,
$F_0=1$, $\omega$ is used to estimate $\mathbb{E}[\delta]$, i.e., $\omega \doteq \mathbb{E}[\delta]$, and
$\textbf{F}$ is a diagonal matrix with diagonal elements
$f(s)\dot{=}d_{\mu}(s)\lim_{t\rightarrow \infty}\mathbb{E}_{\mu}[F_k|S_k=s]$,
which we assume exists.
The vector $\textbf{f}\in \mathbb{R}^N$ with components
$[\textbf{f}]_s\dot{=}f(s)$ can be written as
\begin{equation}
\begin{split}
\textbf{f}&=\textbf{d}_{\mu}+\gamma \textbf{P}_{\pi}^{\top}\textbf{d}_{\mu}+(\gamma \textbf{P}_{\pi}^{\top})^2\textbf{d}_{\mu}+\ldots\\
&=(\textbf{I}-\gamma\textbf{P}_{\pi}^{\top})^{-1}\textbf{d}_{\mu}.
\end{split}
\end{equation}
...@@ -82,4 +82,31 @@ Please refer to the appendix \ref{proofcorollary4_2} for detailed proof process. ...@@ -82,4 +82,31 @@ Please refer to the appendix \ref{proofcorollary4_2} for detailed proof process.
Then the parameter vector $\theta_k$ converges with probability one Then the parameter vector $\theta_k$ converges with probability one
to $A^{-1}b$. to $A^{-1}b$.
\end{theorem} \end{theorem}
Please refer to the appendix \ref{proofth2} for detailed proof process. Please refer to the appendix \ref{proofth2} for detailed proof process.
\ No newline at end of file
\begin{theorem}
\label{theorem3}(Convergence of VMETD).
In the case of off-policy learning, consider the iterations (\ref{omegavmetd}) and (\ref{thetavmetd}) with (\ref{delta}) of VMETD.
Let the step-size sequences $\alpha_k$ and $\beta_k$, $k\geq 0$ satisfy in this case $\alpha_k,\beta_k>0$, for all $k$,
$
\sum_{k=0}^{\infty}\alpha_k=\sum_{k=0}^{\infty}\beta_k=\infty,
$
$
\sum_{k=0}^{\infty}\alpha_k^2<\infty,
$
$
\sum_{k=0}^{\infty}\beta_k^2<\infty,
$
and
$
\alpha_k = o(\beta_k).
$
Assume that $(\phi_k,r_k,\phi_k')$ is an i.i.d. sequence with
uniformly bounded second moments, where $\phi_k$ and $\phi'_{k}$ are sampled from the same Markov chain.
Let $A = \mathrm{Cov}(\phi,\phi-\gamma\phi')$,
$b=\mathrm{Cov}(r,\phi)$.
Assume that matrix $A$ is non-singular.
Then the parameter vector $\theta_k$ converges with probability one
to $A^{-1}b$.
\end{theorem}
Please refer to the appendix \ref{proofVMETD} for detailed proof process.
\ No newline at end of file
...@@ -61,55 +61,46 @@ ...@@ -61,55 +61,46 @@
\newlabel{deltaSarsa}{{8}{4}{Variance Minimization TD Learning: VMTD}{equation.3.8}{}} \newlabel{deltaSarsa}{{8}{4}{Variance Minimization TD Learning: VMTD}{equation.3.8}{}}
\newlabel{deltaQ}{{9}{4}{Variance Minimization TD Learning: VMTD}{equation.3.9}{}} \newlabel{deltaQ}{{9}{4}{Variance Minimization TD Learning: VMTD}{equation.3.9}{}}
\citation{dalal2020tale} \citation{dalal2020tale}
\citation{dalal2020tale}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.3}Variance Minimization TDC Learning: VMTDC}{5}{subsection.3.3}\protected@file@percent } \@writefile{toc}{\contentsline {subsection}{\numberline {3.3}Variance Minimization TDC Learning: VMTDC}{5}{subsection.3.3}\protected@file@percent }
\newlabel{thetavmtdc}{{11}{5}{Variance Minimization TDC Learning: VMTDC}{equation.3.11}{}} \newlabel{thetavmtdc}{{11}{5}{Variance Minimization TDC Learning: VMTDC}{equation.3.11}{}}
\newlabel{uvmtdc}{{12}{5}{Variance Minimization TDC Learning: VMTDC}{equation.3.12}{}} \newlabel{uvmtdc}{{12}{5}{Variance Minimization TDC Learning: VMTDC}{equation.3.12}{}}
\newlabel{omegavmtdc}{{13}{5}{Variance Minimization TDC Learning: VMTDC}{equation.3.13}{}} \newlabel{omegavmtdc}{{13}{5}{Variance Minimization TDC Learning: VMTDC}{equation.3.13}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.4}Variance Minimization ETD Learning: VMETD}{5}{subsection.3.4}\protected@file@percent }
\newlabel{fvmetd}{{18}{5}{Variance Minimization ETD Learning: VMETD}{equation.3.18}{}}
\newlabel{omegavmetd}{{19}{5}{Variance Minimization ETD Learning: VMETD}{equation.3.19}{}}
\newlabel{thetavmetd}{{20}{5}{Variance Minimization ETD Learning: VMETD}{equation.3.20}{}}
\@writefile{toc}{\contentsline {section}{\numberline {4}Theoretical Analysis}{5}{section.4}\protected@file@percent } \@writefile{toc}{\contentsline {section}{\numberline {4}Theoretical Analysis}{5}{section.4}\protected@file@percent }
\newlabel{theorem1}{{4.1}{5}{}{theorem.4.1}{}} \newlabel{theorem1}{{4.1}{5}{}{theorem.4.1}{}}
\newlabel{corollary4_2}{{4.2}{5}{}{theorem.4.2}{}} \citation{dalal2020tale}
\citation{Sutton2018book} \citation{Sutton2018book}
\citation{sutton2009fast} \citation{sutton2009fast}
\citation{baird1995residual,sutton2009fast} \citation{baird1995residual,sutton2009fast}
\citation{baird1995residual,sutton2009fast,maei2011gradient} \citation{baird1995residual,sutton2009fast,maei2011gradient}
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Random walk.}}{6}{figure.1}\protected@file@percent } \newlabel{corollary4_2}{{4.2}{6}{}{theorem.4.2}{}}
\newlabel{randomwalk}{{1}{6}{Random walk}{figure.1}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces 7-state version of Baird's off-policy counterexample.}}{6}{figure.2}\protected@file@percent }
\newlabel{bairdexample}{{2}{6}{7-state version of Baird's off-policy counterexample}{figure.2}{}}
\newlabel{theorem2}{{4.3}{6}{}{theorem.4.3}{}} \newlabel{theorem2}{{4.3}{6}{}{theorem.4.3}{}}
\newlabel{theorem3}{{4.4}{6}{}{theorem.4.4}{}}
\@writefile{toc}{\contentsline {section}{\numberline {5}Experimental Studies}{6}{section.5}\protected@file@percent } \@writefile{toc}{\contentsline {section}{\numberline {5}Experimental Studies}{6}{section.5}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {5.1}Testing Tasks}{6}{subsection.5.1}\protected@file@percent } \@writefile{toc}{\contentsline {subsection}{\numberline {5.1}Testing Tasks}{6}{subsection.5.1}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Random walk.}}{7}{figure.1}\protected@file@percent }
\newlabel{randomwalk}{{1}{7}{Random walk}{figure.1}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces 7-state version of Baird's off-policy counterexample.}}{7}{figure.2}\protected@file@percent }
\newlabel{bairdexample}{{2}{7}{7-state version of Baird's off-policy counterexample}{figure.2}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.2}Experimental Results and Analysis}{7}{subsection.5.2}\protected@file@percent } \@writefile{toc}{\contentsline {subsection}{\numberline {5.2}Experimental Results and Analysis}{7}{subsection.5.2}\protected@file@percent }
\newlabel{DependentFull}{{3(a)}{7}{Subfigure 3(a)}{subfigure.3.1}{}}
\newlabel{sub@DependentFull}{{(a)}{7}{Subfigure 3(a)\relax }{subfigure.3.1}{}}
\newlabel{TabularFull}{{3(b)}{7}{Subfigure 3(b)}{subfigure.3.2}{}}
\newlabel{sub@TabularFull}{{(b)}{7}{Subfigure 3(b)\relax }{subfigure.3.2}{}}
\newlabel{InvertedFull}{{3(c)}{7}{Subfigure 3(c)}{subfigure.3.3}{}}
\newlabel{sub@InvertedFull}{{(c)}{7}{Subfigure 3(c)\relax }{subfigure.3.3}{}}
\newlabel{CounterExampleFull}{{3(d)}{7}{Subfigure 3(d)}{subfigure.3.4}{}}
\newlabel{sub@CounterExampleFull}{{(d)}{7}{Subfigure 3(d)\relax }{subfigure.3.4}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Learning curses of four evaluation environments.}}{7}{figure.3}\protected@file@percent }
\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Dependent}}}{7}{figure.3}\protected@file@percent }
\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Tabular}}}{7}{figure.3}\protected@file@percent }
\@writefile{lof}{\contentsline {subfigure}{\numberline{(c)}{\ignorespaces {Inverted}}}{7}{figure.3}\protected@file@percent }
\@writefile{lof}{\contentsline {subfigure}{\numberline{(d)}{\ignorespaces {counterexample}}}{7}{figure.3}\protected@file@percent }
\newlabel{Evaluation_full}{{3}{7}{Learning curses of four evaluation environments}{figure.3}{}}
\citation{schwartz1993reinforcement} \citation{schwartz1993reinforcement}
\newlabel{MazeFull}{{4(a)}{8}{Subfigure 4(a)}{subfigure.4.1}{}} \newlabel{DependentFull}{{3(a)}{8}{Subfigure 3(a)}{subfigure.3.1}{}}
\newlabel{sub@MazeFull}{{(a)}{8}{Subfigure 4(a)\relax }{subfigure.4.1}{}} \newlabel{sub@DependentFull}{{(a)}{8}{Subfigure 3(a)\relax }{subfigure.3.1}{}}
\newlabel{CliffWalkingFull}{{4(b)}{8}{Subfigure 4(b)}{subfigure.4.2}{}} \newlabel{TabularFull}{{3(b)}{8}{Subfigure 3(b)}{subfigure.3.2}{}}
\newlabel{sub@CliffWalkingFull}{{(b)}{8}{Subfigure 4(b)\relax }{subfigure.4.2}{}} \newlabel{sub@TabularFull}{{(b)}{8}{Subfigure 3(b)\relax }{subfigure.3.2}{}}
\newlabel{MountainCarFull}{{4(c)}{8}{Subfigure 4(c)}{subfigure.4.3}{}} \newlabel{InvertedFull}{{3(c)}{8}{Subfigure 3(c)}{subfigure.3.3}{}}
\newlabel{sub@MountainCarFull}{{(c)}{8}{Subfigure 4(c)\relax }{subfigure.4.3}{}} \newlabel{sub@InvertedFull}{{(c)}{8}{Subfigure 3(c)\relax }{subfigure.3.3}{}}
\newlabel{AcrobotFull}{{4(d)}{8}{Subfigure 4(d)}{subfigure.4.4}{}} \newlabel{CounterExampleFull}{{3(d)}{8}{Subfigure 3(d)}{subfigure.3.4}{}}
\newlabel{sub@AcrobotFull}{{(d)}{8}{Subfigure 4(d)\relax }{subfigure.4.4}{}} \newlabel{sub@CounterExampleFull}{{(d)}{8}{Subfigure 3(d)\relax }{subfigure.3.4}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Learning curses of four contral environments.}}{8}{figure.4}\protected@file@percent } \@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Learning curses of four evaluation environments.}}{8}{figure.3}\protected@file@percent }
\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Maze}}}{8}{figure.4}\protected@file@percent } \@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Dependent}}}{8}{figure.3}\protected@file@percent }
\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Cliff Walking}}}{8}{figure.4}\protected@file@percent } \@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Tabular}}}{8}{figure.3}\protected@file@percent }
\@writefile{lof}{\contentsline {subfigure}{\numberline{(c)}{\ignorespaces {Mountain Car}}}{8}{figure.4}\protected@file@percent } \@writefile{lof}{\contentsline {subfigure}{\numberline{(c)}{\ignorespaces {Inverted}}}{8}{figure.3}\protected@file@percent }
\@writefile{lof}{\contentsline {subfigure}{\numberline{(d)}{\ignorespaces {Acrobot}}}{8}{figure.4}\protected@file@percent } \@writefile{lof}{\contentsline {subfigure}{\numberline{(d)}{\ignorespaces {counterexample}}}{8}{figure.3}\protected@file@percent }
\newlabel{Complete_full}{{4}{8}{Learning curses of four contral environments}{figure.4}{}} \newlabel{Evaluation_full}{{3}{8}{Learning curses of four evaluation environments}{figure.3}{}}
\@writefile{toc}{\contentsline {section}{\numberline {6}Related Work}{8}{section.6}\protected@file@percent } \@writefile{toc}{\contentsline {section}{\numberline {6}Related Work}{8}{section.6}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {6.1}Difference between VMQ and R-learning}{8}{subsection.6.1}\protected@file@percent } \@writefile{toc}{\contentsline {subsection}{\numberline {6.1}Difference between VMQ and R-learning}{8}{subsection.6.1}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Difference between R-learning and tabular VMQ.}}{8}{table.2}\protected@file@percent } \@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Difference between R-learning and tabular VMQ.}}{8}{table.2}\protected@file@percent }
...@@ -120,72 +111,86 @@ ...@@ -120,72 +111,86 @@
\citation{Sutton2018book} \citation{Sutton2018book}
\citation{schulman2015trust} \citation{schulman2015trust}
\citation{schulman2017proximal} \citation{schulman2017proximal}
\citation{borkar1997stochastic} \newlabel{MazeFull}{{4(a)}{9}{Subfigure 4(a)}{subfigure.4.1}{}}
\newlabel{sub@MazeFull}{{(a)}{9}{Subfigure 4(a)\relax }{subfigure.4.1}{}}
\newlabel{CliffWalkingFull}{{4(b)}{9}{Subfigure 4(b)}{subfigure.4.2}{}}
\newlabel{sub@CliffWalkingFull}{{(b)}{9}{Subfigure 4(b)\relax }{subfigure.4.2}{}}
\newlabel{MountainCarFull}{{4(c)}{9}{Subfigure 4(c)}{subfigure.4.3}{}}
\newlabel{sub@MountainCarFull}{{(c)}{9}{Subfigure 4(c)\relax }{subfigure.4.3}{}}
\newlabel{AcrobotFull}{{4(d)}{9}{Subfigure 4(d)}{subfigure.4.4}{}}
\newlabel{sub@AcrobotFull}{{(d)}{9}{Subfigure 4(d)\relax }{subfigure.4.4}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Learning curses of four contral environments.}}{9}{figure.4}\protected@file@percent }
\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Maze}}}{9}{figure.4}\protected@file@percent }
\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Cliff Walking}}}{9}{figure.4}\protected@file@percent }
\@writefile{lof}{\contentsline {subfigure}{\numberline{(c)}{\ignorespaces {Mountain Car}}}{9}{figure.4}\protected@file@percent }
\@writefile{lof}{\contentsline {subfigure}{\numberline{(d)}{\ignorespaces {Acrobot}}}{9}{figure.4}\protected@file@percent }
\newlabel{Complete_full}{{4}{9}{Learning curses of four contral environments}{figure.4}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {6.2}Variance Reduction for TD Learning}{9}{subsection.6.2}\protected@file@percent } \@writefile{toc}{\contentsline {subsection}{\numberline {6.2}Variance Reduction for TD Learning}{9}{subsection.6.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {6.3}Variance Reduction for Policy Gradient Algorithms}{9}{subsection.6.3}\protected@file@percent } \@writefile{toc}{\contentsline {subsection}{\numberline {6.3}Variance Reduction for Policy Gradient Algorithms}{9}{subsection.6.3}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {7}Conclusion and Future Work}{9}{section.7}\protected@file@percent } \citation{borkar1997stochastic}
\@writefile{toc}{\contentsline {section}{\numberline {A}Relevant proofs}{9}{appendix.A}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {A.1}Proof of Theorem \ref {theorem1}}{9}{subsection.A.1}\protected@file@percent }
\newlabel{proofth1}{{A.1}{9}{Proof of Theorem \ref {theorem1}}{subsection.A.1}{}}
\newlabel{th1proof}{{A.1}{9}{Proof of Theorem \ref {theorem1}}{subsection.A.1}{}}
\citation{hirsch1989convergent} \citation{hirsch1989convergent}
\citation{borkar2000ode} \citation{borkar2000ode}
\citation{borkar2000ode} \citation{borkar2000ode}
\citation{borkar2000ode} \citation{borkar2000ode}
\newlabel{thetaFast}{{19}{10}{Proof of Theorem \ref {theorem1}}{equation.A.19}{}} \@writefile{toc}{\contentsline {section}{\numberline {7}Conclusion and Future Work}{10}{section.7}\protected@file@percent }
\newlabel{omegaFast}{{20}{10}{Proof of Theorem \ref {theorem1}}{equation.A.20}{}} \@writefile{toc}{\contentsline {section}{\numberline {A}Relevant proofs}{10}{appendix.A}\protected@file@percent }
\newlabel{omegaFastFinal}{{21}{10}{Proof of Theorem \ref {theorem1}}{equation.A.21}{}} \@writefile{toc}{\contentsline {subsection}{\numberline {A.1}Proof of Theorem \ref {theorem1}}{10}{subsection.A.1}\protected@file@percent }
\newlabel{omegaInfty}{{22}{10}{Proof of Theorem \ref {theorem1}}{equation.A.22}{}} \newlabel{proofth1}{{A.1}{10}{Proof of Theorem \ref {theorem1}}{subsection.A.1}{}}
\newlabel{odetheta}{{23}{10}{Proof of Theorem \ref {theorem1}}{equation.A.23}{}} \newlabel{th1proof}{{A.1}{10}{Proof of Theorem \ref {theorem1}}{subsection.A.1}{}}
\newlabel{thetaFast}{{24}{10}{Proof of Theorem \ref {theorem1}}{equation.A.24}{}}
\newlabel{omegaFast}{{25}{10}{Proof of Theorem \ref {theorem1}}{equation.A.25}{}}
\newlabel{omegaFastFinal}{{26}{10}{Proof of Theorem \ref {theorem1}}{equation.A.26}{}}
\newlabel{omegaInfty}{{27}{10}{Proof of Theorem \ref {theorem1}}{equation.A.27}{}}
\newlabel{odetheta}{{28}{11}{Proof of Theorem \ref {theorem1}}{equation.A.28}{}}
\newlabel{covariance}{{29}{11}{Proof of Theorem \ref {theorem1}}{equation.A.29}{}}
\newlabel{odethetafinal}{{30}{11}{Proof of Theorem \ref {theorem1}}{equation.A.30}{}}
\citation{dalal2020tale} \citation{dalal2020tale}
\citation{dalal2020tale} \citation{dalal2020tale}
\newlabel{covariance}{{24}{11}{Proof of Theorem \ref {theorem1}}{equation.A.24}{}}
\newlabel{odethetafinal}{{25}{11}{Proof of Theorem \ref {theorem1}}{equation.A.25}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {A.2}Proof of Corollary \ref {corollary4_2}}{11}{subsection.A.2}\protected@file@percent }
\newlabel{proofcorollary4_2}{{A.2}{11}{Proof of Corollary \ref {corollary4_2}}{subsection.A.2}{}}
\newlabel{matrixassumption}{{A.1}{11}{}{theorem.A.1}{}}
\newlabel{stepsizeassumption}{{A.2}{11}{}{theorem.A.2}{}}
\newlabel{sparseprojection}{{A.3}{11}{}{theorem.A.3}{}}
\citation{dalal2020tale} \citation{dalal2020tale}
\citation{dalal2020tale} \citation{dalal2020tale}
\citation{sutton2009fast} \citation{sutton2009fast}
\@writefile{toc}{\contentsline {subsection}{\numberline {A.2}Proof of Corollary \ref {corollary4_2}}{12}{subsection.A.2}\protected@file@percent }
\newlabel{proofcorollary4_2}{{A.2}{12}{Proof of Corollary \ref {corollary4_2}}{subsection.A.2}{}}
\newlabel{matrixassumption}{{A.1}{12}{}{theorem.A.1}{}}
\newlabel{stepsizeassumption}{{A.2}{12}{}{theorem.A.2}{}}
\newlabel{sparseprojection}{{A.3}{12}{}{theorem.A.3}{}}
\newlabel{sparseprojectiontheta}{{35}{12}{}{equation.A.35}{}}
\newlabel{sparseprojectionomega}{{36}{12}{}{equation.A.36}{}}
\citation{hirsch1989convergent} \citation{hirsch1989convergent}
\newlabel{sparseprojectiontheta}{{30}{12}{}{equation.A.30}{}}
\newlabel{sparseprojectionomega}{{31}{12}{}{equation.A.31}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {A.3}Proof of Theorem \ref {theorem2}}{12}{subsection.A.3}\protected@file@percent }
\newlabel{proofth2}{{A.3}{12}{Proof of Theorem \ref {theorem2}}{subsection.A.3}{}}
\newlabel{thetavmtdcFastest}{{32}{12}{Proof of Theorem \ref {theorem2}}{equation.A.32}{}}
\newlabel{uvmtdcFastest}{{33}{12}{Proof of Theorem \ref {theorem2}}{equation.A.33}{}}
\newlabel{omegavmtdcFastest}{{34}{12}{Proof of Theorem \ref {theorem2}}{equation.A.34}{}}
\citation{borkar2000ode} \citation{borkar2000ode}
\citation{borkar2000ode} \citation{borkar2000ode}
\citation{borkar2000ode} \citation{borkar2000ode}
\@writefile{toc}{\contentsline {subsection}{\numberline {A.3}Proof of Theorem \ref {theorem2}}{13}{subsection.A.3}\protected@file@percent }
\newlabel{proofth2}{{A.3}{13}{Proof of Theorem \ref {theorem2}}{subsection.A.3}{}}
\newlabel{thetavmtdcFastest}{{37}{13}{Proof of Theorem \ref {theorem2}}{equation.A.37}{}}
\newlabel{uvmtdcFastest}{{38}{13}{Proof of Theorem \ref {theorem2}}{equation.A.38}{}}
\newlabel{omegavmtdcFastest}{{39}{13}{Proof of Theorem \ref {theorem2}}{equation.A.39}{}}
\newlabel{omegavmtdcFastestFinal}{{40}{13}{Proof of Theorem \ref {theorem2}}{equation.A.40}{}}
\newlabel{omegavmtdcInfty}{{41}{13}{Proof of Theorem \ref {theorem2}}{equation.A.41}{}}
\citation{hirsch1989convergent} \citation{hirsch1989convergent}
\citation{borkar2000ode} \citation{borkar2000ode}
\citation{borkar2000ode} \citation{borkar2000ode}
\citation{borkar2000ode} \citation{borkar2000ode}
\newlabel{omegavmtdcFastestFinal}{{35}{13}{Proof of Theorem \ref {theorem2}}{equation.A.35}{}} \newlabel{thetavmtdcFaster}{{42}{14}{Proof of Theorem \ref {theorem2}}{equation.A.42}{}}
\newlabel{omegavmtdcInfty}{{36}{13}{Proof of Theorem \ref {theorem2}}{equation.A.36}{}} \newlabel{uvmtdcFaster}{{43}{14}{Proof of Theorem \ref {theorem2}}{equation.A.43}{}}
\newlabel{thetavmtdcFaster}{{37}{13}{Proof of Theorem \ref {theorem2}}{equation.A.37}{}} \newlabel{uvmtdcFasterFinal}{{44}{14}{Proof of Theorem \ref {theorem2}}{equation.A.44}{}}
\newlabel{uvmtdcFaster}{{38}{13}{Proof of Theorem \ref {theorem2}}{equation.A.38}{}} \newlabel{uvmtdcInfty}{{45}{14}{Proof of Theorem \ref {theorem2}}{equation.A.45}{}}
\newlabel{uvmtdcFasterFinal}{{39}{13}{Proof of Theorem \ref {theorem2}}{equation.A.39}{}} \newlabel{thetavmtdcSlowerFinal}{{47}{14}{Proof of Theorem \ref {theorem2}}{equation.A.47}{}}
\newlabel{uvmtdcInfty}{{40}{13}{Proof of Theorem \ref {theorem2}}{equation.A.40}{}}
\newlabel{thetavmtdcSlowerFinal}{{42}{14}{Proof of Theorem \ref {theorem2}}{equation.A.42}{}}
\newlabel{odethetavmtdcfinal}{{43}{14}{Proof of Theorem \ref {theorem2}}{equation.A.43}{}}
\@writefile{toc}{\contentsline {section}{\numberline {B}Experimental details}{14}{appendix.B}\protected@file@percent }
\newlabel{experimentaldetails}{{B}{14}{Experimental details}{appendix.B}{}}
\@writefile{loa}{\contentsline {algorithm}{\numberline {2}{\ignorespaces VMTDC algorithm with linear function approximation in the off-policy setting}}{15}{algorithm.2}\protected@file@percent } \@writefile{loa}{\contentsline {algorithm}{\numberline {2}{\ignorespaces VMTDC algorithm with linear function approximation in the off-policy setting}}{15}{algorithm.2}\protected@file@percent }
\newlabel{alg:algorithm 2}{{2}{15}{Proof of Theorem \ref {theorem2}}{algorithm.2}{}} \newlabel{alg:algorithm 2}{{2}{15}{Proof of Theorem \ref {theorem2}}{algorithm.2}{}}
\@writefile{loa}{\contentsline {algorithm}{\numberline {3}{\ignorespaces VMGTD algorithm with linear function approximation in the off-policy setting}}{15}{algorithm.3}\protected@file@percent } \@writefile{loa}{\contentsline {algorithm}{\numberline {3}{\ignorespaces VMETD algorithm with linear function approximation in the off-policy setting}}{15}{algorithm.3}\protected@file@percent }
\newlabel{alg:algorithm 3}{{3}{15}{Proof of Theorem \ref {theorem2}}{algorithm.3}{}} \newlabel{alg:algorithm 5}{{3}{15}{Proof of Theorem \ref {theorem2}}{algorithm.3}{}}
\newlabel{odethetavmtdcfinal}{{48}{15}{Proof of Theorem \ref {theorem2}}{equation.A.48}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {A.4}Proof of VMETD convergence}{16}{subsection.A.4}\protected@file@percent }
\newlabel{proofVMETD}{{A.4}{16}{Proof of VMETD convergence}{subsection.A.4}{}}
\newlabel{rowsum}{{51}{16}{Proof of VMETD convergence}{equation.A.51}{}}
\newlabel{columnsum}{{52}{16}{Proof of VMETD convergence}{equation.A.52}{}}
\bibstyle{named} \bibstyle{named}
\bibdata{neurips_2024} \bibdata{neurips_2024}
\bibcite{baird1995residual}{{1}{1995}{{Baird and others}}{{}}} \bibcite{baird1995residual}{{1}{1995}{{Baird and others}}{{}}}
\bibcite{basserrano2021logistic}{{2}{2021}{{Bas-Serrano \bgroup \em et al.\egroup }}{{}}} \bibcite{basserrano2021logistic}{{2}{2021}{{Bas-Serrano \bgroup \em et al.\egroup }}{{}}}
\@writefile{loa}{\contentsline {algorithm}{\numberline {4}{\ignorespaces VMGTD2 algorithm with linear function approximation in the off-policy setting}}{16}{algorithm.4}\protected@file@percent } \@writefile{toc}{\contentsline {section}{\numberline {B}Experimental details}{17}{appendix.B}\protected@file@percent }
\newlabel{alg:algorithm 4}{{4}{16}{Proof of Theorem \ref {theorem2}}{algorithm.4}{}} \newlabel{experimentaldetails}{{B}{17}{Experimental details}{appendix.B}{}}
\@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces Learning rates ($lr$) of four control experiments.}}{16}{table.3}\protected@file@percent }
\newlabel{lrofways}{{3}{16}{Learning rates ($lr$) of four control experiments}{table.3}{}}
\bibcite{borkar2000ode}{{3}{2000}{{Borkar and Meyn}}{{}}} \bibcite{borkar2000ode}{{3}{2000}{{Borkar and Meyn}}{{}}}
\bibcite{borkar1997stochastic}{{4}{1997}{{Borkar}}{{}}} \bibcite{borkar1997stochastic}{{4}{1997}{{Borkar}}{{}}}
\bibcite{chen2023modified}{{5}{2023}{{Chen \bgroup \em et al.\egroup }}{{}}} \bibcite{chen2023modified}{{5}{2023}{{Chen \bgroup \em et al.\egroup }}{{}}}
...@@ -202,6 +207,8 @@ ...@@ -202,6 +207,8 @@
\bibcite{liu2016proximal}{{16}{2016}{{Liu \bgroup \em et al.\egroup }}{{}}} \bibcite{liu2016proximal}{{16}{2016}{{Liu \bgroup \em et al.\egroup }}{{}}}
\bibcite{liu2018proximal}{{17}{2018}{{Liu \bgroup \em et al.\egroup }}{{}}} \bibcite{liu2018proximal}{{17}{2018}{{Liu \bgroup \em et al.\egroup }}{{}}}
\bibcite{maei2011gradient}{{18}{2011}{{Maei}}{{}}} \bibcite{maei2011gradient}{{18}{2011}{{Maei}}{{}}}
\@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces Learning rates ($lr$) of four control experiments.}}{18}{table.3}\protected@file@percent }
\newlabel{lrofways}{{3}{18}{Learning rates ($lr$) of four control experiments}{table.3}{}}
\bibcite{ng1999policy}{{19}{1999}{{Ng \bgroup \em et al.\egroup }}{{}}} \bibcite{ng1999policy}{{19}{1999}{{Ng \bgroup \em et al.\egroup }}{{}}}
\bibcite{pan2017accelerated}{{20}{2017}{{Pan \bgroup \em et al.\egroup }}{{}}} \bibcite{pan2017accelerated}{{20}{2017}{{Pan \bgroup \em et al.\egroup }}{{}}}
\bibcite{schulman2015trust}{{21}{2015}{{Schulman \bgroup \em et al.\egroup }}{{}}} \bibcite{schulman2015trust}{{21}{2015}{{Schulman \bgroup \em et al.\egroup }}{{}}}
...@@ -216,4 +223,4 @@ ...@@ -216,4 +223,4 @@
\bibcite{xu2019reanalysis}{{30}{2019}{{Xu \bgroup \em et al.\egroup }}{{}}} \bibcite{xu2019reanalysis}{{30}{2019}{{Xu \bgroup \em et al.\egroup }}{{}}}
\bibcite{xu2020reanalysis}{{31}{2020}{{Xu \bgroup \em et al.\egroup }}{{}}} \bibcite{xu2020reanalysis}{{31}{2020}{{Xu \bgroup \em et al.\egroup }}{{}}}
\bibcite{zhang2022truncated}{{32}{2022}{{Zhang and Whiteson}}{{}}} \bibcite{zhang2022truncated}{{32}{2022}{{Zhang and Whiteson}}{{}}}
\gdef \@abspage@last{18} \gdef \@abspage@last{19}
This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) (preloaded format=pdflatex 2023.3.31) 19 MAY 2024 17:37 This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) (preloaded format=pdflatex 2023.3.31) 20 JUN 2024 01:22
entering extended mode entering extended mode
restricted \write18 enabled. restricted \write18 enabled.
file:line:error style messages enabled. file:line:error style messages enabled.
...@@ -633,53 +633,71 @@ File: pgfmodulematrix.code.tex 2023-01-15 v3.1.10 (3.1.10) ...@@ -633,53 +633,71 @@ File: pgfmodulematrix.code.tex 2023-01-15 v3.1.10 (3.1.10)
\tikz@expandcount=\count328 \tikz@expandcount=\count328
(d:/software/texlive/2023/texmf-dist/tex/generic/pgf/frontendlayer/tikz/libraries/tikzlibrarytopaths.code.tex (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/frontendlayer/tikz/libraries/tikzlibrarytopaths.code.tex
File: tikzlibrarytopaths.code.tex 2023-01-15 v3.1.10 (3.1.10) File: tikzlibrarytopaths.code.tex 2023-01-15 v3.1.10 (3.1.10)
))) ))) (d:/software/texlive/2023/texmf-dist/tex/latex/tools/bm.sty
\c@theorem=\count329 Package: bm 2022/01/05 v1.2f Bold Symbol Support (DPC/FMi)
\symboldoperators=\mathgroup6
\symboldletters=\mathgroup7
\symboldsymbols=\mathgroup8
Package bm Info: No bold for \OMX/cmex/m/n, using \pmb.
Package bm Info: No bold for \U/msa/m/n, using \pmb.
Package bm Info: No bold for \U/msb/m/n, using \pmb.
LaTeX Font Info: Redeclaring math alphabet \mathbf on input line 149.
) (d:/software/texlive/2023/texmf-dist/tex/latex/esvect/esvect.sty
Package: esvect
\symesvector=\mathgroup9
) (d:/software/texlive/2023/texmf-dist/tex/latex/multirow/multirow.sty
Package: multirow 2021/03/15 v2.8 Span multiple rows of a table
\multirow@colwidth=\skip77
\multirow@cntb=\count329
\multirow@dima=\skip78
\bigstrutjot=\dimen319
)
\c@theorem=\count330
(d:/software/texlive/2023/texmf-dist/tex/latex/algorithms/algorithm.sty (d:/software/texlive/2023/texmf-dist/tex/latex/algorithms/algorithm.sty
Package: algorithm 2009/08/24 v0.1 Document Style `algorithm' - floating environment Package: algorithm 2009/08/24 v0.1 Document Style `algorithm' - floating environment
(d:/software/texlive/2023/texmf-dist/tex/latex/float/float.sty (d:/software/texlive/2023/texmf-dist/tex/latex/float/float.sty
Package: float 2001/11/08 v1.3d Float enhancements (AL) Package: float 2001/11/08 v1.3d Float enhancements (AL)
\c@float@type=\count330 \c@float@type=\count331
\float@exts=\toks51 \float@exts=\toks51
\float@box=\box73 \float@box=\box73
\@float@everytoks=\toks52 \@float@everytoks=\toks52
\@floatcapt=\box74 \@floatcapt=\box74
) )
\@float@every@algorithm=\toks53 \@float@every@algorithm=\toks53
\c@algorithm=\count331 \c@algorithm=\count332
) (d:/software/texlive/2023/texmf-dist/tex/latex/algorithms/algorithmic.sty ) (d:/software/texlive/2023/texmf-dist/tex/latex/algorithms/algorithmic.sty
Package: algorithmic 2009/08/24 v0.1 Document Style `algorithmic' Package: algorithmic 2009/08/24 v0.1 Document Style `algorithmic'
\c@ALC@unique=\count332 \c@ALC@unique=\count333
\c@ALC@line=\count333 \c@ALC@line=\count334
\c@ALC@rem=\count334 \c@ALC@rem=\count335
\c@ALC@depth=\count335 \c@ALC@depth=\count336
\ALC@tlm=\skip77 \ALC@tlm=\skip79
\algorithmicindent=\skip78 \algorithmicindent=\skip80
) (d:/software/texlive/2023/texmf-dist/tex/latex/l3backend/l3backend-pdftex.def ) (d:/software/texlive/2023/texmf-dist/tex/latex/l3backend/l3backend-pdftex.def
File: l3backend-pdftex.def 2023-01-16 L3 backend support: PDF output (pdfTeX) File: l3backend-pdftex.def 2023-01-16 L3 backend support: PDF output (pdfTeX)
\l__color_backend_stack_int=\count336 \l__color_backend_stack_int=\count337
\l__pdf_internal_box=\box75 \l__pdf_internal_box=\box75
) (./neurips_2024.aux) ) (./neurips_2024.aux)
\openout1 = `neurips_2024.aux'. \openout1 = `neurips_2024.aux'.
LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 106. LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 109.
LaTeX Font Info: ... okay on input line 106. LaTeX Font Info: ... okay on input line 109.
LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 106. LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 109.
LaTeX Font Info: ... okay on input line 106. LaTeX Font Info: ... okay on input line 109.
LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 106. LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 109.
LaTeX Font Info: ... okay on input line 106. LaTeX Font Info: ... okay on input line 109.
LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 106. LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 109.
LaTeX Font Info: ... okay on input line 106. LaTeX Font Info: ... okay on input line 109.
LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 106. LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 109.
LaTeX Font Info: ... okay on input line 106. LaTeX Font Info: ... okay on input line 109.
LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 106. LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 109.
LaTeX Font Info: ... okay on input line 106. LaTeX Font Info: ... okay on input line 109.
LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 106. LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 109.
LaTeX Font Info: ... okay on input line 106. LaTeX Font Info: ... okay on input line 109.
LaTeX Font Info: Checking defaults for PD1/pdf/m/n on input line 106. LaTeX Font Info: Checking defaults for PD1/pdf/m/n on input line 109.
LaTeX Font Info: ... okay on input line 106. LaTeX Font Info: ... okay on input line 109.
LaTeX Font Info: Checking defaults for PU/pdf/m/n on input line 106. LaTeX Font Info: Checking defaults for PU/pdf/m/n on input line 109.
LaTeX Font Info: ... okay on input line 106. LaTeX Font Info: ... okay on input line 109.
*geometry* driver: auto-detecting *geometry* driver: auto-detecting
*geometry* detected driver: pdftex *geometry* detected driver: pdftex
...@@ -747,24 +765,24 @@ LaTeX Font Info: ... okay on input line 106. ...@@ -747,24 +765,24 @@ LaTeX Font Info: ... okay on input line 106.
* \@reversemarginfalse * \@reversemarginfalse
* (1in=72.27pt=25.4mm, 1cm=28.453pt) * (1in=72.27pt=25.4mm, 1cm=28.453pt)
Package hyperref Info: Link coloring OFF on input line 106. Package hyperref Info: Link coloring OFF on input line 109.
(./neurips_2024.out) (./neurips_2024.out) (./neurips_2024.out) (./neurips_2024.out)
\@outlinefile=\write4 \@outlinefile=\write4
\openout4 = `neurips_2024.out'. \openout4 = `neurips_2024.out'.
LaTeX Info: Redefining \microtypecontext on input line 106. LaTeX Info: Redefining \microtypecontext on input line 109.
Package microtype Info: Applying patch `item' on input line 106. Package microtype Info: Applying patch `item' on input line 109.
Package microtype Info: Applying patch `toc' on input line 106. Package microtype Info: Applying patch `toc' on input line 109.
Package microtype Info: Applying patch `eqnum' on input line 106. Package microtype Info: Applying patch `eqnum' on input line 109.
Package microtype Info: Applying patch `footnote' on input line 106. Package microtype Info: Applying patch `footnote' on input line 109.
Package microtype Info: Applying patch `verbatim' on input line 106. Package microtype Info: Applying patch `verbatim' on input line 109.
Package microtype Info: Generating PDF output. Package microtype Info: Generating PDF output.
Package microtype Info: Character protrusion enabled (level 2). Package microtype Info: Character protrusion enabled (level 2).
Package microtype Info: Using default protrusion set `alltext'. Package microtype Info: Using default protrusion set `alltext'.
Package microtype Info: Automatic font expansion enabled (level 2), Package microtype Info: Automatic font expansion enabled (level 2),
(microtype) stretch: 20, shrink: 20, step: 1, non-selected. (microtype) stretch: 20, shrink: 20, step: 1, non-selected.
Package microtype Info: Using default expansion set `alltext-nott'. Package microtype Info: Using default expansion set `alltext-nott'.
LaTeX Info: Redefining \showhyphens on input line 106. LaTeX Info: Redefining \showhyphens on input line 109.
Package microtype Info: No adjustment of tracking. Package microtype Info: No adjustment of tracking.
Package microtype Info: No adjustment of interword spacing. Package microtype Info: No adjustment of interword spacing.
Package microtype Info: No adjustment of character kerning. Package microtype Info: No adjustment of character kerning.
...@@ -772,16 +790,16 @@ Package microtype Info: No adjustment of character kerning. ...@@ -772,16 +790,16 @@ Package microtype Info: No adjustment of character kerning.
File: mt-ptm.cfg 2006/04/20 v1.7 microtype config. file: Times (RS) File: mt-ptm.cfg 2006/04/20 v1.7 microtype config. file: Times (RS)
) (d:/software/texlive/2023/texmf-dist/tex/context/base/mkii/supp-pdf.mkii ) (d:/software/texlive/2023/texmf-dist/tex/context/base/mkii/supp-pdf.mkii
[Loading MPS to PDF converter (version 2006.09.02).] [Loading MPS to PDF converter (version 2006.09.02).]
\scratchcounter=\count337 \scratchcounter=\count338
\scratchdimen=\dimen319 \scratchdimen=\dimen320
\scratchbox=\box76 \scratchbox=\box76
\nofMPsegments=\count338 \nofMPsegments=\count339
\nofMParguments=\count339 \nofMParguments=\count340
\everyMPshowfont=\toks54 \everyMPshowfont=\toks54
\MPscratchCnt=\count340 \MPscratchCnt=\count341
\MPscratchDim=\dimen320 \MPscratchDim=\dimen321
\MPnumerator=\count341 \MPnumerator=\count342
\makeMPintoPDFobject=\count342 \makeMPintoPDFobject=\count343
\everyMPtoPDFconversion=\toks55 \everyMPtoPDFconversion=\toks55
) (d:/software/texlive/2023/texmf-dist/tex/latex/epstopdf-pkg/epstopdf-base.sty ) (d:/software/texlive/2023/texmf-dist/tex/latex/epstopdf-pkg/epstopdf-base.sty
Package: epstopdf-base 2020-01-24 v2.11 Base part for package epstopdf Package: epstopdf-base 2020-01-24 v2.11 Base part for package epstopdf
...@@ -791,19 +809,23 @@ File: epstopdf-sys.cfg 2010/07/13 v1.3 Configuration of (r)epstopdf for TeX Live ...@@ -791,19 +809,23 @@ File: epstopdf-sys.cfg 2010/07/13 v1.3 Configuration of (r)epstopdf for TeX Live
)) (d:/software/texlive/2023/texmf-dist/tex/latex/microtype/mt-cmr.cfg )) (d:/software/texlive/2023/texmf-dist/tex/latex/microtype/mt-cmr.cfg
File: mt-cmr.cfg 2013/05/19 v2.2 microtype config. file: Computer Modern Roman (RS) File: mt-cmr.cfg 2013/05/19 v2.2 microtype config. file: Computer Modern Roman (RS)
) )
LaTeX Font Info: Trying to load font information for U+msa on input line 110. LaTeX Font Info: Trying to load font information for U+msa on input line 113.
(d:/software/texlive/2023/texmf-dist/tex/latex/amsfonts/umsa.fd (d:/software/texlive/2023/texmf-dist/tex/latex/amsfonts/umsa.fd
File: umsa.fd 2013/01/14 v3.01 AMS symbols A File: umsa.fd 2013/01/14 v3.01 AMS symbols A
) (d:/software/texlive/2023/texmf-dist/tex/latex/microtype/mt-msa.cfg ) (d:/software/texlive/2023/texmf-dist/tex/latex/microtype/mt-msa.cfg
File: mt-msa.cfg 2006/02/04 v1.1 microtype config. file: AMS symbols (a) (RS) File: mt-msa.cfg 2006/02/04 v1.1 microtype config. file: AMS symbols (a) (RS)
) )
LaTeX Font Info: Trying to load font information for U+msb on input line 110. LaTeX Font Info: Trying to load font information for U+msb on input line 113.
(d:/software/texlive/2023/texmf-dist/tex/latex/amsfonts/umsb.fd (d:/software/texlive/2023/texmf-dist/tex/latex/amsfonts/umsb.fd
File: umsb.fd 2013/01/14 v3.01 AMS symbols B File: umsb.fd 2013/01/14 v3.01 AMS symbols B
) (d:/software/texlive/2023/texmf-dist/tex/latex/microtype/mt-msb.cfg ) (d:/software/texlive/2023/texmf-dist/tex/latex/microtype/mt-msb.cfg
File: mt-msb.cfg 2005/06/01 v1.0 microtype config. file: AMS symbols (b) (RS) File: mt-msb.cfg 2005/06/01 v1.0 microtype config. file: AMS symbols (b) (RS)
) )
LaTeX Font Info: Trying to load font information for T1+cmtt on input line 110. LaTeX Font Info: Trying to load font information for U+esvect on input line 113.
(d:/software/texlive/2023/texmf-dist/tex/latex/esvect/uesvect.fd
File: uesvect.fd
)
LaTeX Font Info: Trying to load font information for T1+cmtt on input line 113.
(d:/software/texlive/2023/texmf-dist/tex/latex/base/t1cmtt.fd (d:/software/texlive/2023/texmf-dist/tex/latex/base/t1cmtt.fd
File: t1cmtt.fd 2022/07/10 v2.5l Standard LaTeX font definitions File: t1cmtt.fd 2022/07/10 v2.5l Standard LaTeX font definitions
) )
...@@ -811,7 +833,7 @@ Package microtype Info: Loading generic protrusion settings for font family ...@@ -811,7 +833,7 @@ Package microtype Info: Loading generic protrusion settings for font family
(microtype) `cmtt' (encoding: T1). (microtype) `cmtt' (encoding: T1).
(microtype) For optimal results, create family-specific settings. (microtype) For optimal results, create family-specific settings.
(microtype) See the microtype manual for details. (microtype) See the microtype manual for details.
LaTeX Font Info: Trying to load font information for T1+phv on input line 126. LaTeX Font Info: Trying to load font information for T1+phv on input line 129.
(d:/software/texlive/2023/texmf-dist/tex/latex/psnfss/t1phv.fd (d:/software/texlive/2023/texmf-dist/tex/latex/psnfss/t1phv.fd
File: t1phv.fd 2020/03/25 scalable font definitions for T1/phv. File: t1phv.fd 2020/03/25 scalable font definitions for T1/phv.
) )
...@@ -829,82 +851,68 @@ pdfTeX warning (ext4): destination with the same identifier (name{table.1}) has ...@@ -829,82 +851,68 @@ pdfTeX warning (ext4): destination with the same identifier (name{table.1}) has
l.77 \end{equation*} l.77 \end{equation*}
] ]
Package hyperref Info: bookmark level for unknown algorithm defaults to 0 on input line 138. Package hyperref Info: bookmark level for unknown algorithm defaults to 0 on input line 138.
[4]) (./main/theory.tex [5]) (./main/experiment.tex (./main/pic/randomwalk.tex) (./main/pic/BairdExample.tex) [6 [4]) (./main/theory.tex [5]) (./main/experiment.tex (./main/pic/randomwalk.tex) (./main/pic/BairdExample.tex) [6]
pdfTeX warning (ext4): destination with the same identifier (name{figure.1}) has been already used, duplicate ignored <main/pic/maze_13_13.pdf, id=318, 493.1646pt x 387.62602pt>
<argument> ...shipout:D \box_use:N \l_shipout_box
\__shipout_drop_firstpage_...
l.46
pdfTeX warning (ext4): destination with the same identifier (name{figure.2}) has been already used, duplicate ignored
<argument> ...shipout:D \box_use:N \l_shipout_box
\__shipout_drop_firstpage_...
l.46
]
<main/pic/maze_13_13.pdf, id=300, 493.1646pt x 387.62602pt>
File: main/pic/maze_13_13.pdf Graphic file (type pdf) File: main/pic/maze_13_13.pdf Graphic file (type pdf)
<use main/pic/maze_13_13.pdf> <use main/pic/maze_13_13.pdf>
Package pdftex.def Info: main/pic/maze_13_13.pdf used on input line 53. Package pdftex.def Info: main/pic/maze_13_13.pdf used on input line 53.
(pdftex.def) Requested size: 73.9715pt x 58.14139pt. (pdftex.def) Requested size: 73.9715pt x 58.14139pt.
<main/pic/dependent_new.pdf, id=302, 557.01889pt x 394.59978pt> <main/pic/dependent_new.pdf, id=320, 557.01889pt x 394.59978pt>
File: main/pic/dependent_new.pdf Graphic file (type pdf) File: main/pic/dependent_new.pdf Graphic file (type pdf)
<use main/pic/dependent_new.pdf> <use main/pic/dependent_new.pdf>
Package pdftex.def Info: main/pic/dependent_new.pdf used on input line 78. Package pdftex.def Info: main/pic/dependent_new.pdf used on input line 78.
(pdftex.def) Requested size: 119.24675pt x 79.49658pt. (pdftex.def) Requested size: 119.24675pt x 79.49658pt.
<main/pic/tabular_new.pdf, id=303, 566.51224pt x 401.1703pt> <main/pic/tabular_new.pdf, id=321, 566.51224pt x 401.1703pt>
File: main/pic/tabular_new.pdf Graphic file (type pdf) File: main/pic/tabular_new.pdf Graphic file (type pdf)
<use main/pic/tabular_new.pdf> <use main/pic/tabular_new.pdf>
Package pdftex.def Info: main/pic/tabular_new.pdf used on input line 82. Package pdftex.def Info: main/pic/tabular_new.pdf used on input line 82.
(pdftex.def) Requested size: 119.23904pt x 79.49194pt. (pdftex.def) Requested size: 119.23904pt x 79.49194pt.
<main/pic/inverted_new.pdf, id=304, 565.61766pt x 402.45422pt> <main/pic/inverted_new.pdf, id=322, 565.61766pt x 402.45422pt>
File: main/pic/inverted_new.pdf Graphic file (type pdf) File: main/pic/inverted_new.pdf Graphic file (type pdf)
<use main/pic/inverted_new.pdf> <use main/pic/inverted_new.pdf>
Package pdftex.def Info: main/pic/inverted_new.pdf used on input line 87. Package pdftex.def Info: main/pic/inverted_new.pdf used on input line 87.
(pdftex.def) Requested size: 119.24063pt x 79.49458pt. (pdftex.def) Requested size: 119.24063pt x 79.49458pt.
<main/pic/counterexample_quanju_new.pdf, id=305, 471.30164pt x 401.08943pt> <main/pic/counterexample_quanju_new.pdf, id=323, 471.30164pt x 401.08943pt>
File: main/pic/counterexample_quanju_new.pdf Graphic file (type pdf) File: main/pic/counterexample_quanju_new.pdf Graphic file (type pdf)
<use main/pic/counterexample_quanju_new.pdf> <use main/pic/counterexample_quanju_new.pdf>
Package pdftex.def Info: main/pic/counterexample_quanju_new.pdf used on input line 91. Package pdftex.def Info: main/pic/counterexample_quanju_new.pdf used on input line 91.
(pdftex.def) Requested size: 119.24184pt x 79.49428pt. (pdftex.def) Requested size: 119.24184pt x 79.49428pt.
Underfull \vbox (badness 3907) has occurred while \output is active []
[7 [7
pdfTeX warning (ext4): destination with the same identifier (name{figure.3}) has been already used, duplicate ignored pdfTeX warning (ext4): destination with the same identifier (name{figure.1}) has been already used, duplicate ignored
<argument> ...shipout:D \box_use:N \l_shipout_box <argument> ...shipout:D \box_use:N \l_shipout_box
\__shipout_drop_firstpage_... \__shipout_drop_firstpage_...
l.131 l.131
<./main/pic/maze_13_13.pdf> <./main/pic/dependent_new.pdf
pdfTeX warning (ext4): destination with the same identifier (name{figure.2}) has been already used, duplicate ignored
pdfTeX warning: pdflatex.exe (file ./main/pic/dependent_new.pdf): PDF inclusion: multiple pdfs with page group included in a single page <argument> ...shipout:D \box_use:N \l_shipout_box
> <./main/pic/tabular_new.pdf \__shipout_drop_firstpage_...
l.131
pdfTeX warning: pdflatex.exe (file ./main/pic/tabular_new.pdf): PDF inclusion: multiple pdfs with page group included in a single page <./main/pic/maze_13_13.pdf>]) (./main/relatedwork.tex
> <./main/pic/inverted_new.pdf <main/pic/maze_complete.pdf, id=340, 595.42892pt x 465.38112pt>
pdfTeX warning: pdflatex.exe (file ./main/pic/inverted_new.pdf): PDF inclusion: multiple pdfs with page group included in a single page
> <./main/pic/counterexample_quanju_new.pdf
pdfTeX warning: pdflatex.exe (file ./main/pic/counterexample_quanju_new.pdf): PDF inclusion: multiple pdfs with page group included in a single page
>]) (./main/relatedwork.tex
<main/pic/maze_complete.pdf, id=426, 595.42892pt x 465.38112pt>
File: main/pic/maze_complete.pdf Graphic file (type pdf) File: main/pic/maze_complete.pdf Graphic file (type pdf)
<use main/pic/maze_complete.pdf> <use main/pic/maze_complete.pdf>
Package pdftex.def Info: main/pic/maze_complete.pdf used on input line 7. Package pdftex.def Info: main/pic/maze_complete.pdf used on input line 7.
(pdftex.def) Requested size: 119.24721pt x 79.4901pt. (pdftex.def) Requested size: 119.24721pt x 79.4901pt.
<main/pic/cw_complete.pdf, id=427, 570.46333pt x 465.10928pt> <main/pic/cw_complete.pdf, id=341, 570.46333pt x 465.10928pt>
File: main/pic/cw_complete.pdf Graphic file (type pdf) File: main/pic/cw_complete.pdf Graphic file (type pdf)
<use main/pic/cw_complete.pdf> <use main/pic/cw_complete.pdf>
Package pdftex.def Info: main/pic/cw_complete.pdf used on input line 11. Package pdftex.def Info: main/pic/cw_complete.pdf used on input line 11.
(pdftex.def) Requested size: 119.24373pt x 79.49335pt. (pdftex.def) Requested size: 119.24373pt x 79.49335pt.
<main/pic/mt_complete.pdf, id=428, 569.92673pt x 468.75475pt> <main/pic/mt_complete.pdf, id=342, 569.92673pt x 468.75475pt>
File: main/pic/mt_complete.pdf Graphic file (type pdf) File: main/pic/mt_complete.pdf Graphic file (type pdf)
<use main/pic/mt_complete.pdf> <use main/pic/mt_complete.pdf>
Package pdftex.def Info: main/pic/mt_complete.pdf used on input line 16. Package pdftex.def Info: main/pic/mt_complete.pdf used on input line 16.
(pdftex.def) Requested size: 119.24463pt x 79.49413pt. (pdftex.def) Requested size: 119.24463pt x 79.49413pt.
<main/pic/Acrobot_complete.pdf, id=429, 564.99583pt x 478.09494pt> <main/pic/Acrobot_complete.pdf, id=343, 564.99583pt x 478.09494pt>
File: main/pic/Acrobot_complete.pdf Graphic file (type pdf) File: main/pic/Acrobot_complete.pdf Graphic file (type pdf)
<use main/pic/Acrobot_complete.pdf> <use main/pic/Acrobot_complete.pdf>
Package pdftex.def Info: main/pic/Acrobot_complete.pdf used on input line 20. Package pdftex.def Info: main/pic/Acrobot_complete.pdf used on input line 20.
(pdftex.def) Requested size: 119.23886pt x 79.49504pt. (pdftex.def) Requested size: 119.23886pt x 79.49504pt.
[8 [8
pdfTeX warning (ext4): destination with the same identifier (name{figure.4}) has been already used, duplicate ignored pdfTeX warning (ext4): destination with the same identifier (name{figure.3}) has been already used, duplicate ignored
<argument> ...shipout:D \box_use:N \l_shipout_box <argument> ...shipout:D \box_use:N \l_shipout_box
\__shipout_drop_firstpage_... \__shipout_drop_firstpage_...
l.57 l.57
...@@ -913,7 +921,24 @@ pdfTeX warning (ext4): destination with the same identifier (name{table.2}) has ...@@ -913,7 +921,24 @@ pdfTeX warning (ext4): destination with the same identifier (name{table.2}) has
<argument> ...shipout:D \box_use:N \l_shipout_box <argument> ...shipout:D \box_use:N \l_shipout_box
\__shipout_drop_firstpage_... \__shipout_drop_firstpage_...
l.57 l.57
<./main/pic/maze_complete.pdf> <./main/pic/cw_complete.pdf <./main/pic/dependent_new.pdf> <./main/pic/tabular_new.pdf
pdfTeX warning: pdflatex.exe (file ./main/pic/tabular_new.pdf): PDF inclusion: multiple pdfs with page group included in a single page
> <./main/pic/inverted_new.pdf
pdfTeX warning: pdflatex.exe (file ./main/pic/inverted_new.pdf): PDF inclusion: multiple pdfs with page group included in a single page
> <./main/pic/counterexample_quanju_new.pdf
pdfTeX warning: pdflatex.exe (file ./main/pic/counterexample_quanju_new.pdf): PDF inclusion: multiple pdfs with page group included in a single page
>]) (./main/conclusion.tex)
Underfull \vbox (badness 10000) has occurred while \output is active []
[9
pdfTeX warning (ext4): destination with the same identifier (name{figure.4}) has been already used, duplicate ignored
<argument> ...shipout:D \box_use:N \l_shipout_box
\__shipout_drop_firstpage_...
l.138
<./main/pic/maze_complete.pdf> <./main/pic/cw_complete.pdf
pdfTeX warning: pdflatex.exe (file ./main/pic/cw_complete.pdf): PDF inclusion: multiple pdfs with page group included in a single page pdfTeX warning: pdflatex.exe (file ./main/pic/cw_complete.pdf): PDF inclusion: multiple pdfs with page group included in a single page
> <./main/pic/mt_complete.pdf > <./main/pic/mt_complete.pdf
...@@ -922,45 +947,58 @@ pdfTeX warning: pdflatex.exe (file ./main/pic/mt_complete.pdf): PDF inclusion: m ...@@ -922,45 +947,58 @@ pdfTeX warning: pdflatex.exe (file ./main/pic/mt_complete.pdf): PDF inclusion: m
> <./main/pic/Acrobot_complete.pdf > <./main/pic/Acrobot_complete.pdf
pdfTeX warning: pdflatex.exe (file ./main/pic/Acrobot_complete.pdf): PDF inclusion: multiple pdfs with page group included in a single page pdfTeX warning: pdflatex.exe (file ./main/pic/Acrobot_complete.pdf): PDF inclusion: multiple pdfs with page group included in a single page
>]) (./main/conclusion.tex) (./main/appendix.tex [9] [10] >] (./main/appendix.tex [10] [11]
LaTeX Warning: Command \textemdash invalid in math mode on input line 229. LaTeX Warning: Command \textemdash invalid in math mode on input line 229.
LaTeX Warning: Command \textemdash invalid in math mode on input line 229. LaTeX Warning: Command \textemdash invalid in math mode on input line 229.
[11] [12] [13] [12] [13] [14]
Underfull \hbox (badness 1946) in paragraph at lines 683--696 Overfull \hbox (68.70882pt too wide) detected at line 614
[]
[]
Underfull \vbox (badness 3343) has occurred while \output is active []
[15]
Overfull \hbox (47.39436pt too wide) detected at line 627
[]
[]
[16]
Underfull \hbox (badness 1946) in paragraph at lines 788--801
[]\T1/ptm/m/n/10 (+20) Three ran-dom walk ex-per-i-ments: the $\OML/cmm/m/it/10 $ \T1/ptm/m/n/10 (+20) val-ues for all al-go-rithms are in the range of []\T1/ptm/m/n/10 (+20) Three ran-dom walk ex-per-i-ments: the $\OML/cmm/m/it/10 $ \T1/ptm/m/n/10 (+20) val-ues for all al-go-rithms are in the range of
[] []
[14] [15]
Overfull \hbox (33.58313pt too wide) in paragraph at lines 738--752 Overfull \hbox (33.58313pt too wide) in paragraph at lines 843--857
[][] [][]
[] []
) (./neurips_2024.bbl [16 ) (./neurips_2024.bbl [17] [18
pdfTeX warning (ext4): destination with the same identifier (name{table.3}) has been already used, duplicate ignored pdfTeX warning (ext4): destination with the same identifier (name{table.3}) has been already used, duplicate ignored
<argument> ...shipout:D \box_use:N \l_shipout_box <argument> ...shipout:D \box_use:N \l_shipout_box
\__shipout_drop_firstpage_... \__shipout_drop_firstpage_...
l.12 l.92
] [17]) [18] (./neurips_2024.aux) ]) [19] (./neurips_2024.aux)
Package rerunfilecheck Info: File `neurips_2024.out' has not changed. Package rerunfilecheck Info: File `neurips_2024.out' has not changed.
(rerunfilecheck) Checksum: E5788AEC1D4F936207967A17A6B3E0A1;3587. (rerunfilecheck) Checksum: FAC1A00F891A3C2C3EDDFFA999CD212C;4060.
) )
Here is how much of TeX's memory you used: Here is how much of TeX's memory you used:
26626 strings out of 476025 26929 strings out of 476025
484842 string characters out of 5789524 489644 string characters out of 5789524
1897382 words of memory out of 5000000 1891382 words of memory out of 5000000
46086 multiletter control sequences out of 15000+600000 46337 multiletter control sequences out of 15000+600000
567455 words of font info for 255 fonts, out of 8000000 for 9000 577716 words of font info for 288 fonts, out of 8000000 for 9000
1141 hyphenation exceptions out of 8191 1141 hyphenation exceptions out of 8191
84i,16n,80p,1005b,1065s stack positions out of 10000i,1000n,20000p,200000b,200000s 84i,22n,80p,1005b,1065s stack positions out of 10000i,1000n,20000p,200000b,200000s
<d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmex10.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi10.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi5.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi6.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi7.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi9.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr10.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr5.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr6.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr7.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr9.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy10.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy5.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy6.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy7.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/symbols/msbm10.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/cm-super/sftt1000.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/urw/helvetic/uhvr8a.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/urw/times/utmb8a.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/urw/times/utmr8a.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/urw/times/utmri8a.pfb> <d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmbx10.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmex10.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi10.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi5.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi6.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi7.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi9.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr10.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr5.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr6.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr7.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr9.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy10.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy5.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy6.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy7.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/symbols/msbm10.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/cm-super/sftt1000.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/urw/helvetic/uhvr8a.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/urw/times/utmb8a.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/urw/times/utmr8a.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/urw/times/utmri8a.pfb>
Output written on neurips_2024.pdf (18 pages, 2290177 bytes). Output written on neurips_2024.pdf (19 pages, 2305737 bytes).
PDF statistics: PDF statistics:
1011 PDF objects out of 1200 (max. 8388607) 1031 PDF objects out of 1200 (max. 8388607)
839 compressed objects within 9 object streams 856 compressed objects within 9 object streams
195 named destinations out of 1000 (max. 500000) 193 named destinations out of 1000 (max. 500000)
52442 words of extra memory for PDF output out of 61914 (max. 10000000) 61162 words of extra memory for PDF output out of 61914 (max. 10000000)
...@@ -4,17 +4,19 @@ ...@@ -4,17 +4,19 @@
\BOOKMARK [2][-]{subsection.3.1}{\376\377\000M\000o\000t\000i\000v\000a\000t\000i\000o\000n}{section.3}% 4 \BOOKMARK [2][-]{subsection.3.1}{\376\377\000M\000o\000t\000i\000v\000a\000t\000i\000o\000n}{section.3}% 4
\BOOKMARK [2][-]{subsection.3.2}{\376\377\000V\000a\000r\000i\000a\000n\000c\000e\000\040\000M\000i\000n\000i\000m\000i\000z\000a\000t\000i\000o\000n\000\040\000T\000D\000\040\000L\000e\000a\000r\000n\000i\000n\000g\000:\000\040\000V\000M\000T\000D}{section.3}% 5 \BOOKMARK [2][-]{subsection.3.2}{\376\377\000V\000a\000r\000i\000a\000n\000c\000e\000\040\000M\000i\000n\000i\000m\000i\000z\000a\000t\000i\000o\000n\000\040\000T\000D\000\040\000L\000e\000a\000r\000n\000i\000n\000g\000:\000\040\000V\000M\000T\000D}{section.3}% 5
\BOOKMARK [2][-]{subsection.3.3}{\376\377\000V\000a\000r\000i\000a\000n\000c\000e\000\040\000M\000i\000n\000i\000m\000i\000z\000a\000t\000i\000o\000n\000\040\000T\000D\000C\000\040\000L\000e\000a\000r\000n\000i\000n\000g\000:\000\040\000V\000M\000T\000D\000C}{section.3}% 6 \BOOKMARK [2][-]{subsection.3.3}{\376\377\000V\000a\000r\000i\000a\000n\000c\000e\000\040\000M\000i\000n\000i\000m\000i\000z\000a\000t\000i\000o\000n\000\040\000T\000D\000C\000\040\000L\000e\000a\000r\000n\000i\000n\000g\000:\000\040\000V\000M\000T\000D\000C}{section.3}% 6
\BOOKMARK [1][-]{section.4}{\376\377\000T\000h\000e\000o\000r\000e\000t\000i\000c\000a\000l\000\040\000A\000n\000a\000l\000y\000s\000i\000s}{}% 7 \BOOKMARK [2][-]{subsection.3.4}{\376\377\000V\000a\000r\000i\000a\000n\000c\000e\000\040\000M\000i\000n\000i\000m\000i\000z\000a\000t\000i\000o\000n\000\040\000E\000T\000D\000\040\000L\000e\000a\000r\000n\000i\000n\000g\000:\000\040\000V\000M\000E\000T\000D}{section.3}% 7
\BOOKMARK [1][-]{section.5}{\376\377\000E\000x\000p\000e\000r\000i\000m\000e\000n\000t\000a\000l\000\040\000S\000t\000u\000d\000i\000e\000s}{}% 8 \BOOKMARK [1][-]{section.4}{\376\377\000T\000h\000e\000o\000r\000e\000t\000i\000c\000a\000l\000\040\000A\000n\000a\000l\000y\000s\000i\000s}{}% 8
\BOOKMARK [2][-]{subsection.5.1}{\376\377\000T\000e\000s\000t\000i\000n\000g\000\040\000T\000a\000s\000k\000s}{section.5}% 9 \BOOKMARK [1][-]{section.5}{\376\377\000E\000x\000p\000e\000r\000i\000m\000e\000n\000t\000a\000l\000\040\000S\000t\000u\000d\000i\000e\000s}{}% 9
\BOOKMARK [2][-]{subsection.5.2}{\376\377\000E\000x\000p\000e\000r\000i\000m\000e\000n\000t\000a\000l\000\040\000R\000e\000s\000u\000l\000t\000s\000\040\000a\000n\000d\000\040\000A\000n\000a\000l\000y\000s\000i\000s}{section.5}% 10 \BOOKMARK [2][-]{subsection.5.1}{\376\377\000T\000e\000s\000t\000i\000n\000g\000\040\000T\000a\000s\000k\000s}{section.5}% 10
\BOOKMARK [1][-]{section.6}{\376\377\000R\000e\000l\000a\000t\000e\000d\000\040\000W\000o\000r\000k}{}% 11 \BOOKMARK [2][-]{subsection.5.2}{\376\377\000E\000x\000p\000e\000r\000i\000m\000e\000n\000t\000a\000l\000\040\000R\000e\000s\000u\000l\000t\000s\000\040\000a\000n\000d\000\040\000A\000n\000a\000l\000y\000s\000i\000s}{section.5}% 11
\BOOKMARK [2][-]{subsection.6.1}{\376\377\000D\000i\000f\000f\000e\000r\000e\000n\000c\000e\000\040\000b\000e\000t\000w\000e\000e\000n\000\040\000V\000M\000Q\000\040\000a\000n\000d\000\040\000R\000-\000l\000e\000a\000r\000n\000i\000n\000g}{section.6}% 12 \BOOKMARK [1][-]{section.6}{\376\377\000R\000e\000l\000a\000t\000e\000d\000\040\000W\000o\000r\000k}{}% 12
\BOOKMARK [2][-]{subsection.6.2}{\376\377\000V\000a\000r\000i\000a\000n\000c\000e\000\040\000R\000e\000d\000u\000c\000t\000i\000o\000n\000\040\000f\000o\000r\000\040\000T\000D\000\040\000L\000e\000a\000r\000n\000i\000n\000g}{section.6}% 13 \BOOKMARK [2][-]{subsection.6.1}{\376\377\000D\000i\000f\000f\000e\000r\000e\000n\000c\000e\000\040\000b\000e\000t\000w\000e\000e\000n\000\040\000V\000M\000Q\000\040\000a\000n\000d\000\040\000R\000-\000l\000e\000a\000r\000n\000i\000n\000g}{section.6}% 13
\BOOKMARK [2][-]{subsection.6.3}{\376\377\000V\000a\000r\000i\000a\000n\000c\000e\000\040\000R\000e\000d\000u\000c\000t\000i\000o\000n\000\040\000f\000o\000r\000\040\000P\000o\000l\000i\000c\000y\000\040\000G\000r\000a\000d\000i\000e\000n\000t\000\040\000A\000l\000g\000o\000r\000i\000t\000h\000m\000s}{section.6}% 14 \BOOKMARK [2][-]{subsection.6.2}{\376\377\000V\000a\000r\000i\000a\000n\000c\000e\000\040\000R\000e\000d\000u\000c\000t\000i\000o\000n\000\040\000f\000o\000r\000\040\000T\000D\000\040\000L\000e\000a\000r\000n\000i\000n\000g}{section.6}% 14
\BOOKMARK [1][-]{section.7}{\376\377\000C\000o\000n\000c\000l\000u\000s\000i\000o\000n\000\040\000a\000n\000d\000\040\000F\000u\000t\000u\000r\000e\000\040\000W\000o\000r\000k}{}% 15 \BOOKMARK [2][-]{subsection.6.3}{\376\377\000V\000a\000r\000i\000a\000n\000c\000e\000\040\000R\000e\000d\000u\000c\000t\000i\000o\000n\000\040\000f\000o\000r\000\040\000P\000o\000l\000i\000c\000y\000\040\000G\000r\000a\000d\000i\000e\000n\000t\000\040\000A\000l\000g\000o\000r\000i\000t\000h\000m\000s}{section.6}% 15
\BOOKMARK [1][-]{appendix.A}{\376\377\000R\000e\000l\000e\000v\000a\000n\000t\000\040\000p\000r\000o\000o\000f\000s}{}% 16 \BOOKMARK [1][-]{section.7}{\376\377\000C\000o\000n\000c\000l\000u\000s\000i\000o\000n\000\040\000a\000n\000d\000\040\000F\000u\000t\000u\000r\000e\000\040\000W\000o\000r\000k}{}% 16
\BOOKMARK [2][-]{subsection.A.1}{\376\377\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000T\000h\000e\000o\000r\000e\000m\000\040\0004\000.\0001}{appendix.A}% 17 \BOOKMARK [1][-]{appendix.A}{\376\377\000R\000e\000l\000e\000v\000a\000n\000t\000\040\000p\000r\000o\000o\000f\000s}{}% 17
\BOOKMARK [2][-]{subsection.A.2}{\376\377\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000C\000o\000r\000o\000l\000l\000a\000r\000y\000\040\0004\000.\0002}{appendix.A}% 18 \BOOKMARK [2][-]{subsection.A.1}{\376\377\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000T\000h\000e\000o\000r\000e\000m\000\040\0004\000.\0001}{appendix.A}% 18
\BOOKMARK [2][-]{subsection.A.3}{\376\377\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000T\000h\000e\000o\000r\000e\000m\000\040\0004\000.\0003}{appendix.A}% 19 \BOOKMARK [2][-]{subsection.A.2}{\376\377\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000C\000o\000r\000o\000l\000l\000a\000r\000y\000\040\0004\000.\0002}{appendix.A}% 19
\BOOKMARK [1][-]{appendix.B}{\376\377\000E\000x\000p\000e\000r\000i\000m\000e\000n\000t\000a\000l\000\040\000d\000e\000t\000a\000i\000l\000s}{}% 20 \BOOKMARK [2][-]{subsection.A.3}{\376\377\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000T\000h\000e\000o\000r\000e\000m\000\040\0004\000.\0003}{appendix.A}% 20
\BOOKMARK [2][-]{subsection.A.4}{\376\377\000P\000r\000o\000o\000f\000\040\000o\000f\000\040\000V\000M\000E\000T\000D\000\040\000c\000o\000n\000v\000e\000r\000g\000e\000n\000c\000e}{appendix.A}% 21
\BOOKMARK [1][-]{appendix.B}{\376\377\000E\000x\000p\000e\000r\000i\000m\000e\000n\000t\000a\000l\000\040\000d\000e\000t\000a\000i\000l\000s}{}% 22
No preview for this file type
No preview for this file type
...@@ -42,6 +42,9 @@ ...@@ -42,6 +42,9 @@
\usepackage{mathtools} \usepackage{mathtools}
\usepackage{amsthm} \usepackage{amsthm}
\usepackage{tikz} \usepackage{tikz}
\usepackage{bm}
\usepackage{esvect}
\usepackage{multirow}
\theoremstyle{plain} \theoremstyle{plain}
\newtheorem{theorem}{Theorem}[section] \newtheorem{theorem}{Theorem}[section]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment