最新版VM论文版本—ICML

e9f7dd7f · GongYu · e9f7dd7f · e9f7dd7f · e9f7dd7f · e9f7dd7f
Commit e9f7dd7f authored Jan 30, 2024 by GongYu
26 changed files
--- a/algorithm.sty
+++ b/algorithm.sty
+% ALGORITHM STYLE -- Released 8 April 1996
+%    for LaTeX-2e
+% Copyright -- 1994 Peter Williams
+% E-mail Peter.Williams@dsto.defence.gov.au
+\NeedsTeXFormat{LaTeX2e}
+\ProvidesPackage{algorithm}
+\typeout{Document Style `algorithm' - floating environment}
+
+\RequirePackage{float}
+\RequirePackage{ifthen}
+\newcommand{\ALG@within}{nothing}
+\newboolean{ALG@within}
+\setboolean{ALG@within}{false}
+\newcommand{\ALG@floatstyle}{ruled}
+\newcommand{\ALG@name}{Algorithm}
+\newcommand{\listalgorithmname}{List of \ALG@name s}
+
+% Declare Options
+% first appearance
+\DeclareOption{plain}{
+  \renewcommand{\ALG@floatstyle}{plain}
+}
+\DeclareOption{ruled}{
+  \renewcommand{\ALG@floatstyle}{ruled}
+}
+\DeclareOption{boxed}{
+  \renewcommand{\ALG@floatstyle}{boxed}
+}
+% then numbering convention
+\DeclareOption{part}{
+  \renewcommand{\ALG@within}{part}
+  \setboolean{ALG@within}{true}
+}
+\DeclareOption{chapter}{
+  \renewcommand{\ALG@within}{chapter}
+  \setboolean{ALG@within}{true}
+}
+\DeclareOption{section}{
+  \renewcommand{\ALG@within}{section}
+  \setboolean{ALG@within}{true}
+}
+\DeclareOption{subsection}{
+  \renewcommand{\ALG@within}{subsection}
+  \setboolean{ALG@within}{true}
+}
+\DeclareOption{subsubsection}{
+  \renewcommand{\ALG@within}{subsubsection}
+  \setboolean{ALG@within}{true}
+}
+\DeclareOption{nothing}{
+  \renewcommand{\ALG@within}{nothing}
+  \setboolean{ALG@within}{true}
+}
+\DeclareOption*{\edef\ALG@name{\CurrentOption}}
+
+% ALGORITHM
+%
+\ProcessOptions
+\floatstyle{\ALG@floatstyle}
+\ifthenelse{\boolean{ALG@within}}{
+  \ifthenelse{\equal{\ALG@within}{part}}
+     {\newfloat{algorithm}{htbp}{loa}[part]}{}
+  \ifthenelse{\equal{\ALG@within}{chapter}}
+     {\newfloat{algorithm}{htbp}{loa}[chapter]}{}
+  \ifthenelse{\equal{\ALG@within}{section}}
+     {\newfloat{algorithm}{htbp}{loa}[section]}{}
+  \ifthenelse{\equal{\ALG@within}{subsection}}
+     {\newfloat{algorithm}{htbp}{loa}[subsection]}{}
+  \ifthenelse{\equal{\ALG@within}{subsubsection}}
+     {\newfloat{algorithm}{htbp}{loa}[subsubsection]}{}
+  \ifthenelse{\equal{\ALG@within}{nothing}}
+     {\newfloat{algorithm}{htbp}{loa}}{}
+}{
+  \newfloat{algorithm}{htbp}{loa}
+}
+\floatname{algorithm}{\ALG@name}
+
+\newcommand{\listofalgorithms}{\listof{algorithm}{\listalgorithmname}}
+
--- a/algorithmic.sty
+++ b/algorithmic.sty
+% ALGORITHMIC STYLE -- Released 8 APRIL 1996
+%    for LaTeX version 2e
+% Copyright -- 1994 Peter Williams
+% E-mail PeterWilliams@dsto.defence.gov.au
+%
+% Modified by Alex Smola (08/2000)
+% E-mail Alex.Smola@anu.edu.au
+%
+\NeedsTeXFormat{LaTeX2e}
+\ProvidesPackage{algorithmic}
+\typeout{Document Style `algorithmic' - environment}
+%
+\RequirePackage{ifthen}
+\RequirePackage{calc}
+\newboolean{ALC@noend}
+\setboolean{ALC@noend}{false}
+\newcounter{ALC@line}
+\newcounter{ALC@rem}
+\newlength{\ALC@tlm}
+%
+\DeclareOption{noend}{\setboolean{ALC@noend}{true}}
+%
+\ProcessOptions
+%
+% ALGORITHMIC
+\newcommand{\algorithmicrequire}{\textbf{Require:}}
+\newcommand{\algorithmicensure}{\textbf{Ensure:}}
+\newcommand{\algorithmiccomment}[1]{\{#1\}}
+\newcommand{\algorithmicend}{\textbf{end}}
+\newcommand{\algorithmicif}{\textbf{if}}
+\newcommand{\algorithmicthen}{\textbf{then}}
+\newcommand{\algorithmicelse}{\textbf{else}}
+\newcommand{\algorithmicelsif}{\algorithmicelse\ \algorithmicif}
+\newcommand{\algorithmicendif}{\algorithmicend\ \algorithmicif}
+\newcommand{\algorithmicfor}{\textbf{for}}
+\newcommand{\algorithmicforall}{\textbf{for all}}
+\newcommand{\algorithmicdo}{\textbf{do}}
+\newcommand{\algorithmicendfor}{\algorithmicend\ \algorithmicfor}
+\newcommand{\algorithmicwhile}{\textbf{while}}
+\newcommand{\algorithmicendwhile}{\algorithmicend\ \algorithmicwhile}
+\newcommand{\algorithmicloop}{\textbf{loop}}
+\newcommand{\algorithmicendloop}{\algorithmicend\ \algorithmicloop}
+\newcommand{\algorithmicrepeat}{\textbf{repeat}}
+\newcommand{\algorithmicuntil}{\textbf{until}}
+
+%changed by alex smola
+\newcommand{\algorithmicinput}{\textbf{input}}
+\newcommand{\algorithmicoutput}{\textbf{output}}
+\newcommand{\algorithmicset}{\textbf{set}}
+\newcommand{\algorithmictrue}{\textbf{true}}
+\newcommand{\algorithmicfalse}{\textbf{false}}
+\newcommand{\algorithmicand}{\textbf{and\ }}
+\newcommand{\algorithmicor}{\textbf{or\ }}
+\newcommand{\algorithmicfunction}{\textbf{function}}
+\newcommand{\algorithmicendfunction}{\algorithmicend\ \algorithmicfunction}
+\newcommand{\algorithmicmain}{\textbf{main}}
+\newcommand{\algorithmicendmain}{\algorithmicend\ \algorithmicmain}
+%end changed by alex smola
+
+\def\ALC@item[#1]{%
+\if@noparitem \@donoparitem
+  \else \if@inlabel \indent \par \fi
+         \ifhmode \unskip\unskip \par \fi
+         \if@newlist \if@nobreak \@nbitem \else
+                        \addpenalty\@beginparpenalty
+                        \addvspace\@topsep \addvspace{-\parskip}\fi
+           \else \addpenalty\@itempenalty \addvspace\itemsep
+          \fi
+    \global\@inlabeltrue
+\fi
+\everypar{\global\@minipagefalse\global\@newlistfalse
+          \if@inlabel\global\@inlabelfalse \hskip -\parindent \box\@labels
+             \penalty\z@ \fi
+          \everypar{}}\global\@nobreakfalse
+\if@noitemarg \@noitemargfalse \if@nmbrlist \refstepcounter{\@listctr}\fi \fi
+\sbox\@tempboxa{\makelabel{#1}}%
+\global\setbox\@labels
+ \hbox{\unhbox\@labels \hskip \itemindent
+       \hskip -\labelwidth \hskip -\ALC@tlm
+       \ifdim \wd\@tempboxa >\labelwidth
+                \box\@tempboxa
+          \else \hbox to\labelwidth {\unhbox\@tempboxa}\fi
+       \hskip \ALC@tlm}\ignorespaces}
+%
+\newenvironment{algorithmic}[1][0]{
+\let\@item\ALC@item
+  \newcommand{\ALC@lno}{%
+\ifthenelse{\equal{\arabic{ALC@rem}}{0}}
+{{\footnotesize \arabic{ALC@line}:}}{}%
+}
+\let\@listii\@listi
+\let\@listiii\@listi
+\let\@listiv\@listi
+\let\@listv\@listi
+\let\@listvi\@listi
+\let\@listvii\@listi
+  \newenvironment{ALC@g}{
+    \begin{list}{\ALC@lno}{ \itemsep\z@ \itemindent\z@
+    \listparindent\z@ \rightmargin\z@ 
+    \topsep\z@ \partopsep\z@ \parskip\z@\parsep\z@
+    \leftmargin 1em
+    \addtolength{\ALC@tlm}{\leftmargin}
+    }
+  }
+  {\end{list}}
+  \newcommand{\ALC@it}{\addtocounter{ALC@line}{1}\addtocounter{ALC@rem}{1}\ifthenelse{\equal{\arabic{ALC@rem}}{#1}}{\setcounter{ALC@rem}{0}}{}\item}
+  \newcommand{\ALC@com}[1]{\ifthenelse{\equal{##1}{default}}%
+{}{\ \algorithmiccomment{##1}}}
+  \newcommand{\REQUIRE}{\item[\algorithmicrequire]}
+  \newcommand{\ENSURE}{\item[\algorithmicensure]}
+  \newcommand{\STATE}{\ALC@it}
+  \newcommand{\COMMENT}[1]{\algorithmiccomment{##1}}
+%changes by alex smola
+  \newcommand{\INPUT}{\item[\algorithmicinput]}
+  \newcommand{\OUTPUT}{\item[\algorithmicoutput]}
+  \newcommand{\SET}{\item[\algorithmicset]}
+%  \newcommand{\TRUE}{\algorithmictrue}
+%  \newcommand{\FALSE}{\algorithmicfalse}
+  \newcommand{\AND}{\algorithmicand}
+  \newcommand{\OR}{\algorithmicor}
+  \newenvironment{ALC@func}{\begin{ALC@g}}{\end{ALC@g}}
+  \newenvironment{ALC@main}{\begin{ALC@g}}{\end{ALC@g}}
+%end changes by alex smola
+  \newenvironment{ALC@if}{\begin{ALC@g}}{\end{ALC@g}}
+  \newenvironment{ALC@for}{\begin{ALC@g}}{\end{ALC@g}}
+  \newenvironment{ALC@whl}{\begin{ALC@g}}{\end{ALC@g}}
+  \newenvironment{ALC@loop}{\begin{ALC@g}}{\end{ALC@g}}
+  \newenvironment{ALC@rpt}{\begin{ALC@g}}{\end{ALC@g}}
+  \renewcommand{\\}{\@centercr}
+  \newcommand{\IF}[2][default]{\ALC@it\algorithmicif\ ##2\ \algorithmicthen%
+\ALC@com{##1}\begin{ALC@if}}
+  \newcommand{\SHORTIF}[2]{\ALC@it\algorithmicif\ ##1\
+    \algorithmicthen\ {##2}}
+  \newcommand{\ELSE}[1][default]{\end{ALC@if}\ALC@it\algorithmicelse%
+\ALC@com{##1}\begin{ALC@if}}
+  \newcommand{\ELSIF}[2][default]%
+{\end{ALC@if}\ALC@it\algorithmicelsif\ ##2\ \algorithmicthen%
+\ALC@com{##1}\begin{ALC@if}}
+  \newcommand{\FOR}[2][default]{\ALC@it\algorithmicfor\ ##2\ \algorithmicdo%
+\ALC@com{##1}\begin{ALC@for}}
+  \newcommand{\FORALL}[2][default]{\ALC@it\algorithmicforall\ ##2\ %
+\algorithmicdo%
+\ALC@com{##1}\begin{ALC@for}}
+  \newcommand{\SHORTFORALL}[2]{\ALC@it\algorithmicforall\ ##1\ %
+    \algorithmicdo\ {##2}}
+  \newcommand{\WHILE}[2][default]{\ALC@it\algorithmicwhile\ ##2\ %
+\algorithmicdo%
+\ALC@com{##1}\begin{ALC@whl}}
+  \newcommand{\LOOP}[1][default]{\ALC@it\algorithmicloop%
+\ALC@com{##1}\begin{ALC@loop}}
+%changed by alex smola
+  \newcommand{\FUNCTION}[2][default]{\ALC@it\algorithmicfunction\ ##2\ %
+    \ALC@com{##1}\begin{ALC@func}}
+  \newcommand{\MAIN}[2][default]{\ALC@it\algorithmicmain\ ##2\ %
+    \ALC@com{##1}\begin{ALC@main}}
+%end changed by alex smola
+  \newcommand{\REPEAT}[1][default]{\ALC@it\algorithmicrepeat%
+    \ALC@com{##1}\begin{ALC@rpt}}
+    \newcommand{\UNTIL}[1]{\end{ALC@rpt}\ALC@it\algorithmicuntil\ ##1}
+  \ifthenelse{\boolean{ALC@noend}}{
+    \newcommand{\ENDIF}{\end{ALC@if}}
+    \newcommand{\ENDFOR}{\end{ALC@for}}
+    \newcommand{\ENDWHILE}{\end{ALC@whl}}
+    \newcommand{\ENDLOOP}{\end{ALC@loop}}
+    \newcommand{\ENDFUNCTION}{\end{ALC@func}}
+    \newcommand{\ENDMAIN}{\end{ALC@main}}
+  }{
+    \newcommand{\ENDIF}{\end{ALC@if}\ALC@it\algorithmicendif}
+    \newcommand{\ENDFOR}{\end{ALC@for}\ALC@it\algorithmicendfor}
+    \newcommand{\ENDWHILE}{\end{ALC@whl}\ALC@it\algorithmicendwhile}
+    \newcommand{\ENDLOOP}{\end{ALC@loop}\ALC@it\algorithmicendloop}
+    \newcommand{\ENDFUNCTION}{\end{ALC@func}\ALC@it\algorithmicendfunction}
+    \newcommand{\ENDMAIN}{\end{ALC@main}\ALC@it\algorithmicendmain}
+  } 
+  \renewcommand{\@toodeep}{}
+  \begin{list}{\ALC@lno}{\setcounter{ALC@line}{0}\setcounter{ALC@rem}{0}%
+      \itemsep\z@ \itemindent\z@ \listparindent\z@%
+      \partopsep\z@ \parskip\z@ \parsep\z@%
+      \labelsep 0.5em \topsep 0.2em%
+      \ifthenelse{\equal{#1}{0}}
+      {\labelwidth 0.5em }
+      {\labelwidth  1.2em }
+      \leftmargin\labelwidth \addtolength{\leftmargin}{\labelsep}
+      \ALC@tlm\labelsep
+      }
+    }
+  {\end{list}}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/example_paper.aux
+++ b/example_paper.aux
--- a/example_paper.bbl
+++ b/example_paper.bbl
+\begin{thebibliography}{34}
+\providecommand{\natexlab}[1]{#1}
+\providecommand{\url}[1]{\texttt{#1}}
+\expandafter\ifx\csname urlstyle\endcsname\relax
+  \providecommand{\doi}[1]{doi: #1}\else
+  \providecommand{\doi}{doi: \begingroup \urlstyle{rm}\Url}\fi
+
+\bibitem[Baird et~al.(1995)]{baird1995residual}
+Baird, L. et~al.
+\newblock Residual algorithms: Reinforcement learning with function approximation.
+\newblock In \emph{Proc. 12th Int. Conf. Mach. Learn.}, pp.\  30--37, 1995.
+
+\bibitem[Bas-Serrano et~al.(2021)Bas-Serrano, Curi, Krause, and Neu]{basserrano2021logistic}
+Bas-Serrano, J., Curi, S., Krause, A., and Neu, G.
+\newblock Logistic q-learning.
+\newblock In \emph{International Conference on Artificial Intelligence and Statistics}, pp.\  3610--3618, 2021.
+
+\bibitem[Borkar(1997)]{borkar1997stochastic}
+Borkar, V.~S.
+\newblock Stochastic approximation with two time scales.
+\newblock \emph{Syst. \& Control Letters}, 29\penalty0 (5):\penalty0 291--294, 1997.
+
+\bibitem[Borkar \& Meyn(2000)Borkar and Meyn]{borkar2000ode}
+Borkar, V.~S. and Meyn, S.~P.
+\newblock The ode method for convergence of stochastic approximation and reinforcement learning.
+\newblock \emph{SIAM J. Control Optim.}, 38\penalty0 (2):\penalty0 447--469, 2000.
+
+\bibitem[Chen et~al.(2023)Chen, Ma, Li, Yang, Yang, and Gao]{chen2023modified}
+Chen, X., Ma, X., Li, Y., Yang, G., Yang, S., and Gao, Y.
+\newblock Modified retrace for off-policy temporal difference learning.
+\newblock In \emph{Uncertainty in Artificial Intelligence}, pp.\  303--312. PMLR, 2023.
+
+\bibitem[Dalal et~al.(2020)Dalal, Szorenyi, and Thoppe]{dalal2020tale}
+Dalal, G., Szorenyi, B., and Thoppe, G.
+\newblock A tale of two-timescale reinforcement learning with the tightest finite-time bound.
+\newblock In \emph{Proceedings of the AAAI Conference on Artificial Intelligence}, volume~34, pp.\  3701--3708, 2020.
+
+\bibitem[Devlin \& Kudenko(2012)Devlin and Kudenko]{devlin2012dynamic}
+Devlin, S. and Kudenko, D.
+\newblock Dynamic potential-based reward shaping.
+\newblock In \emph{Proc. 11th Int. Conf. Autonomous Agents and Multiagent Systems}, pp.\  433--440, 2012.
+
+\bibitem[Feng et~al.(2019)Feng, Li, and Liu]{feng2019kernel}
+Feng, Y., Li, L., and Liu, Q.
+\newblock A kernel loss for solving the bellman equation.
+\newblock In \emph{Advances in Neural Information Processing Systems}, pp.\  15430--15441, 2019.
+
+\bibitem[Givchi \& Palhang(2015)Givchi and Palhang]{givchi2015quasi}
+Givchi, A. and Palhang, M.
+\newblock Quasi newton temporal difference learning.
+\newblock In \emph{Asian Conference on Machine Learning}, pp.\  159--172, 2015.
+
+\bibitem[Hackman(2012)]{hackman2012faster}
+Hackman, L.
+\newblock \emph{Faster Gradient-TD Algorithms}.
+\newblock PhD thesis, University of Alberta, 2012.
+
+\bibitem[Hallak et~al.(2016)Hallak, Tamar, Munos, and Mannor]{hallak2016generalized}
+Hallak, A., Tamar, A., Munos, R., and Mannor, S.
+\newblock Generalized emphatic temporal difference learning: bias-variance analysis.
+\newblock In \emph{Proceedings of the 30th AAAI Conference on Artificial Intelligence}, pp.\  1631--1637, 2016.
+
+\bibitem[Hirsch(1989)]{hirsch1989convergent}
+Hirsch, M.~W.
+\newblock Convergent activation dynamics in continuous time networks.
+\newblock \emph{Neural Netw.}, 2\penalty0 (5):\penalty0 331--349, 1989.
+
+\bibitem[Johnson \& Zhang(2013)Johnson and Zhang]{johnson2013accelerating}
+Johnson, R. and Zhang, T.
+\newblock Accelerating stochastic gradient descent using predictive variance reduction.
+\newblock In \emph{Advances in Neural Information Processing Systems}, pp.\  315--323, 2013.
+
+\bibitem[Korda \& La(2015)Korda and La]{korda2015td}
+Korda, N. and La, P.
+\newblock On td (0) with function approximation: Concentration bounds and a centered variant with exponential convergence.
+\newblock In \emph{International conference on machine learning}, pp.\  626--634. PMLR, 2015.
+
+\bibitem[Langley(2000)]{langley00}
+Langley, P.
+\newblock Crafting papers on machine learning.
+\newblock In Langley, P. (ed.), \emph{Proceedings of the 17th International Conference on Machine Learning (ICML 2000)}, pp.\  1207--1216, Stanford, CA, 2000. Morgan Kaufmann.
+
+\bibitem[Liu et~al.(2015)Liu, Liu, Ghavamzadeh, Mahadevan, and Petrik]{liu2015finite}
+Liu, B., Liu, J., Ghavamzadeh, M., Mahadevan, S., and Petrik, M.
+\newblock Finite-sample analysis of proximal gradient td algorithms.
+\newblock In \emph{Proceedings of the 21st Conference on Uncertainty in Artificial Intelligence}, pp.\  504--513, 2015.
+
+\bibitem[Liu et~al.(2016)Liu, Liu, Ghavamzadeh, Mahadevan, and Petrik]{liu2016proximal}
+Liu, B., Liu, J., Ghavamzadeh, M., Mahadevan, S., and Petrik, M.
+\newblock Proximal gradient temporal difference learning algorithms.
+\newblock In \emph{Proceedings of the International Joint Conference on Artificial Intelligence}, pp.\  4195--4199, 2016.
+
+\bibitem[Liu et~al.(2018)Liu, Gemp, Ghavamzadeh, Liu, Mahadevan, and Petrik]{liu2018proximal}
+Liu, B., Gemp, I., Ghavamzadeh, M., Liu, J., Mahadevan, S., and Petrik, M.
+\newblock Proximal gradient temporal difference learning: Stable reinforcement learning with polynomial sample complexity.
+\newblock \emph{Journal of Artificial Intelligence Research}, 63:\penalty0 461--494, 2018.
+
+\bibitem[Maei(2011)]{maei2011gradient}
+Maei, H.~R.
+\newblock \emph{Gradient temporal-difference learning algorithms}.
+\newblock PhD thesis, University of Alberta, 2011.
+
+\bibitem[Ng et~al.(1999)Ng, Harada, and Russell]{ng1999policy}
+Ng, A.~Y., Harada, D., and Russell, S.
+\newblock Policy invariance under reward transformations: Theory and application to reward shaping.
+\newblock In \emph{Proc. 16th Int. Conf. Mach. Learn.}, pp.\  278--287, 1999.
+
+\bibitem[Pan et~al.(2017)Pan, White, and White]{pan2017accelerated}
+Pan, Y., White, A., and White, M.
+\newblock Accelerated gradient temporal difference learning.
+\newblock In \emph{Proceedings of the 21st AAAI Conference on Artificial Intelligence}, pp.\  2464--2470, 2017.
+
+\bibitem[Schulman et~al.(2015)Schulman, Levine, Abbeel, Jordan, and Moritz]{schulman2015trust}
+Schulman, J., Levine, S., Abbeel, P., Jordan, M., and Moritz, P.
+\newblock Trust region policy optimization.
+\newblock In \emph{International Conference on Machine Learning}, pp.\  1889--1897, 2015.
+
+\bibitem[Schulman et~al.(2017)Schulman, Wolski, Dhariwal, Radford, and Klimov]{schulman2017proximal}
+Schulman, J., Wolski, F., Dhariwal, P., Radford, A., and Klimov, O.
+\newblock Proximal policy optimization algorithms.
+\newblock \emph{arXiv preprint arXiv:1707.06347}, 2017.
+
+\bibitem[Schwartz(1993)]{schwartz1993reinforcement}
+Schwartz, A.
+\newblock A reinforcement learning method for maximizing undiscounted rewards.
+\newblock In \emph{Proc. 10th Int. Conf. Mach. Learn.}, volume 298, pp.\  298--305, 1993.
+
+\bibitem[Sutton et~al.(2009)Sutton, Maei, Precup, Bhatnagar, Silver, Szepesv{\'a}ri, and Wiewiora]{sutton2009fast}
+Sutton, R., Maei, H., Precup, D., Bhatnagar, S., Silver, D., Szepesv{\'a}ri, C., and Wiewiora, E.
+\newblock Fast gradient-descent methods for temporal-difference learning with linear function approximation.
+\newblock In \emph{Proc. 26th Int. Conf. Mach. Learn.}, pp.\  993--1000, 2009.
+
+\bibitem[Sutton(1988)]{sutton1988learning}
+Sutton, R.~S.
+\newblock Learning to predict by the methods of temporal differences.
+\newblock \emph{Machine learning}, 3\penalty0 (1):\penalty0 9--44, 1988.
+
+\bibitem[Sutton \& Barto(2018)Sutton and Barto]{Sutton2018book}
+Sutton, R.~S. and Barto, A.~G.
+\newblock \emph{Reinforcement Learning: An Introduction}.
+\newblock The MIT Press, second edition, 2018.
+
+\bibitem[Sutton et~al.(2008)Sutton, Maei, and Szepesv{\'a}ri]{sutton2008convergent}
+Sutton, R.~S., Maei, H.~R., and Szepesv{\'a}ri, C.
+\newblock A convergent $ o (n) $ temporal-difference algorithm for off-policy learning with linear function approximation.
+\newblock In \emph{Advances in Neural Information Processing Systems}, pp.\  1609--1616. Cambridge, MA: MIT Press, 2008.
+
+\bibitem[Sutton et~al.(2016)Sutton, Mahmood, and White]{sutton2016emphatic}
+Sutton, R.~S., Mahmood, A.~R., and White, M.
+\newblock An emphatic approach to the problem of off-policy temporal-difference learning.
+\newblock \emph{The Journal of Machine Learning Research}, 17\penalty0 (1):\penalty0 2603--2631, 2016.
+
+\bibitem[Tsitsiklis \& Van~Roy(1997)Tsitsiklis and Van~Roy]{tsitsiklis1997analysis}
+Tsitsiklis, J.~N. and Van~Roy, B.
+\newblock Analysis of temporal-diffference learning with function approximation.
+\newblock In \emph{Advances in Neural Information Processing Systems}, pp.\  1075--1081, 1997.
+
+\bibitem[Xu et~al.(2019)Xu, Wang, Zhou, and Liang]{xu2019reanalysis}
+Xu, T., Wang, Z., Zhou, Y., and Liang, Y.
+\newblock Reanalysis of variance reduced temporal difference learning.
+\newblock In \emph{International Conference on Learning Representations}, 2019.
+
+\bibitem[Xu et~al.(2020)Xu, Wang, Zhou, and Liang]{xu2020reanalysis}
+Xu, T., Wang, Z., Zhou, Y., and Liang, Y.
+\newblock Reanalysis of variance reduced temporal difference learning.
+\newblock \emph{arXiv preprint arXiv:2001.01898}, 2020.
+
+\bibitem[Zhang \& Whiteson(2022)Zhang and Whiteson]{zhang2022truncated}
+Zhang, S. and Whiteson, S.
+\newblock Truncated emphatic temporal difference methods for prediction and control.
+\newblock \emph{The Journal of Machine Learning Research}, 23\penalty0 (1):\penalty0 6859--6917, 2022.
+
+\bibitem[Zhou(2021)]{zhou2021machine}
+Zhou, Z.-H.
+\newblock \emph{Machine learning}.
+\newblock Springer Nature, 2021.
+
+\end{thebibliography}
--- a/example_paper.bib
+++ b/example_paper.bib
--- a/example_paper.blg
+++ b/example_paper.blg
+This is BibTeX, Version 0.99d (TeX Live 2023)
+Capacity: max_strings=200000, hash_size=200000, hash_prime=170003
+The top-level auxiliary file: example_paper.aux
+The style file: icml2024.bst
+Database file #1: example_paper.bib
+Warning--can't use both volume and number fields in dalal2020tale
+You've used 34 entries,
+            2773 wiz_defined-function locations,
+            790 strings with 10645 characters,
+and the built_in function-call counts, 19097 in all, are:
+= -- 1771
+> -- 980
+< -- 39
+ -- 332
+- -- 298
+* -- 1422
+:= -- 2705
+add.period$ -- 106
+call.type$ -- 34
+change.case$ -- 197
+chr.to.int$ -- 34
+cite$ -- 69
+duplicate$ -- 950
+empty$ -- 1596
+format.name$ -- 349
+if$ -- 4208
+int.to.chr$ -- 1
+int.to.str$ -- 1
+missing$ -- 34
+newline$ -- 178
+num.names$ -- 138
+pop$ -- 445
+preamble$ -- 1
+purify$ -- 166
+quote$ -- 0
+skip$ -- 806
+stack$ -- 0
+substring$ -- 965
+swap$ -- 257
+text.length$ -- 22
+text.prefix$ -- 0
+top$ -- 0
+type$ -- 364
+warning$ -- 1
+while$ -- 176
+width$ -- 0
+write$ -- 452
+(There was 1 warning)
--- a/example_paper.log
+++ b/example_paper.log
--- a/example_paper.out
+++ b/example_paper.out
--- a/example_paper.pdf
+++ b/example_paper.pdf
--- a/example_paper.synctex.gz
+++ b/example_paper.synctex.gz
--- a/example_paper.tex
+++ b/example_paper.tex
--- a/fancyhdr.sty
+++ b/fancyhdr.sty
--- a/icml2024.bst
+++ b/icml2024.bst
--- a/icml2024.sty
+++ b/icml2024.sty
--- a/icml_numpapers.pdf
+++ b/icml_numpapers.pdf
--- a/pic/Acrobot_complete.pdf
+++ b/pic/Acrobot_complete.pdf
--- a/pic/BairdExample.tex
+++ b/pic/BairdExample.tex
+\resizebox{7cm}{4.4cm}{
+\begin{tikzpicture}[smooth]
+\node[coordinate] (origin) at (0.3,0) {};
+\node[coordinate] (num7) at (3,0) {};
+\node[coordinate] (num1) at (1,2.5) {};
+\path (num7) ++ (-10:0.5cm) node (num7_bright1) [coordinate] {};
+\path (num7) ++ (-30:0.7cm) node (num7_bright2) [coordinate] {};
+\path (num7) ++ (-60:0.35cm) node (num7_bright3) [coordinate] {};
+\path (num7) ++ (-60:0.6cm) node (num7_bright4) [coordinate] {};
+\path (origin) ++ (90:3cm) node (origin_above) [coordinate] {};
+\path (origin_above) ++ (0:5.7cm) node (origin_aright) [coordinate] {};
+\path (num1) ++ (90:0.5cm) node (num1_a) [coordinate] {};
+\path (num1) ++ (-90:0.3cm) node (num1_b) [coordinate] {};
+
+\path (num1) ++ (0:1cm) node (num2) [coordinate] {};
+\path (num1_a) ++ (0:1cm) node (num2_a) [coordinate] {};
+\path (num1_b) ++ (0:1cm) node (num2_b) [coordinate] {};
+\path (num2) ++ (0:1cm) node (num3) [coordinate] {};
+\path (num2_a) ++ (0:1cm) node (num3_a) [coordinate] {};
+\path (num2_b) ++ (0:1cm) node (num3_b) [coordinate] {};
+\path (num3) ++ (0:1cm) node (num4) [coordinate] {};
+\path (num3_a) ++ (0:1cm) node (num4_a) [coordinate] {};
+\path (num3_b) ++ (0:1cm) node (num4_b) [coordinate] {};
+\path (num4) ++ (0:1cm) node (num5) [coordinate] {};
+\path (num4_a) ++ (0:1cm) node (num5_a) [coordinate] {};
+\path (num4_b) ++ (0:1cm) node (num5_b) [coordinate] {};
+\path (num5) ++ (0:1cm) node (num6) [coordinate] {};
+\path (num5_a) ++ (0:1cm) node (num6_a) [coordinate] {};
+\path (num5_b) ++ (0:1cm) node (num6_b) [coordinate] {};
+
+
+%\draw[->](0,0) -- (1,1);
+%\draw[dashed,line width = 0.03cm] (0,0) -- (1,1);
+ %\fill (0.5,0.5) circle (0.5);
+ %\draw[shape=circle,fill=white,draw=black] (a) at (num7) {7};
+
+ 
+\draw[dashed,line width = 0.03cm,xshift=3cm] plot[tension=0.06]
+coordinates{(num7) (origin) (origin_above) (origin_aright)}; 
+
+\draw[->,>=stealth,line width = 0.02cm,xshift=3cm] plot[tension=0.5]
+coordinates{(num7) (num7_bright1) (num7_bright2)(num7_bright4) (num7_bright3)};
+ 
+\node[line width = 0.02cm,shape=circle,fill=white,draw=black] (g) at (num7) {7};
+
+
+
+\draw[<->,>=stealth,dashed,line width = 0.03cm,] (num1) -- (num1_a) ;
+\node[line width = 0.02cm,shape=circle,fill=white,draw=black] (a) at (num1_b) {1};
+\draw[<->,>=stealth,dashed,line width = 0.03cm,] (num2) -- (num2_a) ;
+\node[line width = 0.02cm,shape=circle,fill=white,draw=black] (b) at (num2_b) {2};
+\draw[<->,>=stealth,dashed,line width = 0.03cm,] (num3) -- (num3_a) ;
+\node[line width = 0.02cm,shape=circle,fill=white,draw=black] (c) at (num3_b) {3};
+\draw[<->,>=stealth,dashed,line width = 0.03cm,] (num4) -- (num4_a) ;
+\node[line width = 0.02cm,shape=circle,fill=white,draw=black] (d) at (num4_b) {4};
+\draw[<->,>=stealth,dashed,line width = 0.03cm,] (num5) -- (num5_a) ;
+\node[line width = 0.02cm,shape=circle,fill=white,draw=black] (e) at (num5_b) {5};
+\draw[<->,>=stealth,dashed,line width = 0.03cm,] (num6) -- (num6_a) ;
+\node[line width = 0.02cm,shape=circle,fill=white,draw=black] (f) at (num6_b) {6};
+
+\draw[->,>=stealth,line width = 0.02cm] (a)--(g);
+\draw[->,>=stealth,line width = 0.02cm] (b)--(g);
+\draw[->,>=stealth,line width = 0.02cm] (c)--(g);
+\draw[->,>=stealth,line width = 0.02cm] (d)--(g);
+\draw[->,>=stealth,line width = 0.02cm] (e)--(g);
+\draw[->,>=stealth,line width = 0.02cm] (f)--(g);
+\end{tikzpicture}
+}
+
--- a/pic/counterexample_quanju_new.pdf
+++ b/pic/counterexample_quanju_new.pdf
--- a/pic/cw_complete.pdf
+++ b/pic/cw_complete.pdf
--- a/pic/dependent_new.pdf
+++ b/pic/dependent_new.pdf
--- a/pic/inverted_new.pdf
+++ b/pic/inverted_new.pdf
--- a/pic/maze_13_13.pdf
+++ b/pic/maze_13_13.pdf
--- a/pic/maze_complete.pdf
+++ b/pic/maze_complete.pdf
--- a/pic/mt_complete.pdf
+++ b/pic/mt_complete.pdf
--- a/pic/randomwalk.tex
+++ b/pic/randomwalk.tex
+
+% \tikzstyle{int}=[draw, fill=blue!20, minimum size=2em]
+% \tikzstyle{block}=[draw, fill=gray, minimum size=1.5em]
+% \tikzstyle{init} = [pin edge={to-,thin,black}]
+% 	\resizebox{8cm}{1.2cm}{
+% \begin{tikzpicture}[node distance=1.5cm,auto,>=latex']
+%     \node [block] (o) {};
+%     \node (p) [left of=o,node distance=0.5cm, coordinate] {o};
+%     \node [shape=circle,int] (a) [right of=o]{$A$};
+%     \node (b) [left of=a,node distance=1.5cm, coordinate] {a};
+%     \node [shape=circle,int] (c) [right of=a] {$B$};
+%     \node (d) [left of=c,node distance=1.5cm, coordinate] {c};
+%     \node [shape=circle,int, pin={[init]above:$$}] (e) [right of=c]{$C$}; 
+%     \node (f) [left of=e,node distance=1.5cm, coordinate] {e};
+%     \node [shape=circle,int] (g) [right of=e] {$D$};
+%     \node (h) [left of=g,node distance=1.5cm, coordinate] {g};
+%     \node [shape=circle,int] (i) [right of=g] {$E$};
+%     \node (j) [left of=i,node distance=1.5cm, coordinate] {i};
+%     \node [block] (k) [right of=i] {};
+%     \node (l) [left of=k,node distance=0.5cm, coordinate] {k};
+
+%     \path[<-] (o) edge node {$0$} (a);
+%     \path[<->] (a) edge node {$0$} (c);
+%     \path[<->] (c) edge node {$0$} (e);
+%     \path[<->] (e) edge node {$0$} (g);
+%     \path[<->] (g) edge node {$0$} (i);
+%     \draw[->] (i) edge node {$1$} (k);
+% \end{tikzpicture}
+% }
+\tikzstyle{int}=[draw, fill=blue!20, minimum size=2em]
+\tikzstyle{block}=[draw, fill=gray, minimum size=1.5em]
+\tikzstyle{init} = [pin edge={to-,thin,black}]
+
+\resizebox{8cm}{1.5cm}{
+    \begin{tikzpicture}[node distance=1.5cm, auto, >=latex]
+        \node [block] (o) {};
+        \node (p) [left of=o, node distance=0.5cm, coordinate] {o};
+        \node [shape=circle, int] (a) [right of=o] {$A$};
+        \node (b) [left of=a, node distance=1.5cm, coordinate] {a};
+        \node [shape=circle, int] (c) [right of=a] {$B$};
+        \node (d) [left of=c, node distance=1.5cm, coordinate] {c};
+        \node [shape=circle, int, pin={[init]above:$ $}] (e) [right of=c] {$C$};
+        \node (f) [left of=e, node distance=1.5cm, coordinate] {e};
+        \node [shape=circle, int] (g) [right of=e] {$D$};
+        \node (h) [left of=g, node distance=1.5cm, coordinate] {g};
+        \node [shape=circle, int] (i) [right of=g] {$E$};
+        \node (j) [left of=i, node distance=1.5cm, coordinate] {i};
+        \node [block] (k) [right of=i] {};
+        \node (l) [left of=k, node distance=0.5cm, coordinate] {k};
+
+        \path[->] (o) edge node {$0$} (a);
+        \path[<->] (a) edge node {$0$} (c);
+        \path[<->] (c) edge node {$0$} (e);
+        \path[<->] (e) edge node {$0$} (g);
+        \path[<->] (g) edge node {$0$} (i);
+        \draw[->] (i) edge node {$1$} (k);
+    \end{tikzpicture}
+}
+
+
+  
+    
\ No newline at end of file
--- a/pic/tabular_new.pdf
+++ b/pic/tabular_new.pdf