最新版VM论文版本—ICML

e9f7dd7f · GongYu · e9f7dd7f · e9f7dd7f · e9f7dd7f · e9f7dd7f
Commit e9f7dd7f authored Jan 30, 2024 by GongYu
26 changed files
--- a/algorithm.sty
+++ b/algorithm.sty
+% ALGORITHM STYLE -- Released 8 April 1996
+%    for LaTeX-2e
+% Copyright -- 1994 Peter Williams
+% E-mail Peter.Williams@dsto.defence.gov.au
+\NeedsTeXFormat{LaTeX2e}
+\ProvidesPackage{algorithm}
+\typeout{Document Style `algorithm' - floating environment}
+\RequirePackage{float}
+\RequirePackage{ifthen}
+\newcommand{\ALG@within}{nothing}
+\newboolean{ALG@within}
+\setboolean{ALG@within}{false}
+\newcommand{\ALG@floatstyle}{ruled}
+\newcommand{\ALG@name}{Algorithm}
+\newcommand{\listalgorithmname}{List of \ALG@name s}
+% Declare Options
+% first appearance
+\DeclareOption{plain}{
+  \renewcommand{\ALG@floatstyle}{plain}
+}
+\DeclareOption{ruled}{
+  \renewcommand{\ALG@floatstyle}{ruled}
+}
+\DeclareOption{boxed}{
+  \renewcommand{\ALG@floatstyle}{boxed}
+}
+% then numbering convention
+\DeclareOption{part}{
+  \renewcommand{\ALG@within}{part}
+  \setboolean{ALG@within}{true}
+}
+\DeclareOption{chapter}{
+  \renewcommand{\ALG@within}{chapter}
+  \setboolean{ALG@within}{true}
+}
+\DeclareOption{section}{
+  \renewcommand{\ALG@within}{section}
+  \setboolean{ALG@within}{true}
+}
+\DeclareOption{subsection}{
+  \renewcommand{\ALG@within}{subsection}
+  \setboolean{ALG@within}{true}
+}
+\DeclareOption{subsubsection}{
+  \renewcommand{\ALG@within}{subsubsection}
+  \setboolean{ALG@within}{true}
+}
+\DeclareOption{nothing}{
+  \renewcommand{\ALG@within}{nothing}
+  \setboolean{ALG@within}{true}
+}
+\DeclareOption*{\edef\ALG@name{\CurrentOption}}
+% ALGORITHM
+%
+\ProcessOptions
+\floatstyle{\ALG@floatstyle}
+\ifthenelse{\boolean{ALG@within}}{
+  \ifthenelse{\equal{\ALG@within}{part}}
+     {\newfloat{algorithm}{htbp}{loa}[part]}{}
+  \ifthenelse{\equal{\ALG@within}{chapter}}
+     {\newfloat{algorithm}{htbp}{loa}[chapter]}{}
+  \ifthenelse{\equal{\ALG@within}{section}}
+     {\newfloat{algorithm}{htbp}{loa}[section]}{}
+  \ifthenelse{\equal{\ALG@within}{subsection}}
+     {\newfloat{algorithm}{htbp}{loa}[subsection]}{}
+  \ifthenelse{\equal{\ALG@within}{subsubsection}}
+     {\newfloat{algorithm}{htbp}{loa}[subsubsection]}{}
+  \ifthenelse{\equal{\ALG@within}{nothing}}
+     {\newfloat{algorithm}{htbp}{loa}}{}
+}{
+  \newfloat{algorithm}{htbp}{loa}
+}
+\floatname{algorithm}{\ALG@name}
+\newcommand{\listofalgorithms}{\listof{algorithm}{\listalgorithmname}}
--- a/algorithmic.sty
+++ b/algorithmic.sty
+% ALGORITHMIC STYLE -- Released 8 APRIL 1996
+%    for LaTeX version 2e
+% Copyright -- 1994 Peter Williams
+% E-mail PeterWilliams@dsto.defence.gov.au
+%
+% Modified by Alex Smola (08/2000)
+% E-mail Alex.Smola@anu.edu.au
+%
+\NeedsTeXFormat{LaTeX2e}
+\ProvidesPackage{algorithmic}
+\typeout{Document Style `algorithmic' - environment}
+%
+\RequirePackage{ifthen}
+\RequirePackage{calc}
+\newboolean{ALC@noend}
+\setboolean{ALC@noend}{false}
+\newcounter{ALC@line}
+\newcounter{ALC@rem}
+\newlength{\ALC@tlm}
+%
+\DeclareOption{noend}{\setboolean{ALC@noend}{true}}
+%
+\ProcessOptions
+%
+% ALGORITHMIC
+\newcommand{\algorithmicrequire}{\textbf{Require:}}
+\newcommand{\algorithmicensure}{\textbf{Ensure:}}
+\newcommand{\algorithmiccomment}[1]{\{#1\}}
+\newcommand{\algorithmicend}{\textbf{end}}
+\newcommand{\algorithmicif}{\textbf{if}}
+\newcommand{\algorithmicthen}{\textbf{then}}
+\newcommand{\algorithmicelse}{\textbf{else}}
+\newcommand{\algorithmicelsif}{\algorithmicelse\ \algorithmicif}
+\newcommand{\algorithmicendif}{\algorithmicend\ \algorithmicif}
+\newcommand{\algorithmicfor}{\textbf{for}}
+\newcommand{\algorithmicforall}{\textbf{for all}}
+\newcommand{\algorithmicdo}{\textbf{do}}
+\newcommand{\algorithmicendfor}{\algorithmicend\ \algorithmicfor}
+\newcommand{\algorithmicwhile}{\textbf{while}}
+\newcommand{\algorithmicendwhile}{\algorithmicend\ \algorithmicwhile}
+\newcommand{\algorithmicloop}{\textbf{loop}}
+\newcommand{\algorithmicendloop}{\algorithmicend\ \algorithmicloop}
+\newcommand{\algorithmicrepeat}{\textbf{repeat}}
+\newcommand{\algorithmicuntil}{\textbf{until}}
+%changed by alex smola
+\newcommand{\algorithmicinput}{\textbf{input}}
+\newcommand{\algorithmicoutput}{\textbf{output}}
+\newcommand{\algorithmicset}{\textbf{set}}
+\newcommand{\algorithmictrue}{\textbf{true}}
+\newcommand{\algorithmicfalse}{\textbf{false}}
+\newcommand{\algorithmicand}{\textbf{and\ }}
+\newcommand{\algorithmicor}{\textbf{or\ }}
+\newcommand{\algorithmicfunction}{\textbf{function}}
+\newcommand{\algorithmicendfunction}{\algorithmicend\ \algorithmicfunction}
+\newcommand{\algorithmicmain}{\textbf{main}}
+\newcommand{\algorithmicendmain}{\algorithmicend\ \algorithmicmain}
+%end changed by alex smola
+\def\ALC@item[#1]{%
+\if@noparitem \@donoparitem
+  \else \if@inlabel \indent \par \fi
+         \ifhmode \unskip\unskip \par \fi
+         \if@newlist \if@nobreak \@nbitem \else
+                        \addpenalty\@beginparpenalty
+                        \addvspace\@topsep \addvspace{-\parskip}\fi
+           \else \addpenalty\@itempenalty \addvspace\itemsep
+          \fi
+    \global\@inlabeltrue
+\fi
+\everypar{\global\@minipagefalse\global\@newlistfalse
+          \if@inlabel\global\@inlabelfalse \hskip -\parindent \box\@labels
+             \penalty\z@ \fi
+          \everypar{}}\global\@nobreakfalse
+\if@noitemarg \@noitemargfalse \if@nmbrlist \refstepcounter{\@listctr}\fi \fi
+\sbox\@tempboxa{\makelabel{#1}}%
+\global\setbox\@labels
+ \hbox{\unhbox\@labels \hskip \itemindent
+       \hskip -\labelwidth \hskip -\ALC@tlm
+       \ifdim \wd\@tempboxa >\labelwidth
+                \box\@tempboxa
+          \else \hbox to\labelwidth {\unhbox\@tempboxa}\fi
+       \hskip \ALC@tlm}\ignorespaces}
+%
+\newenvironment{algorithmic}[1][0]{
+\let\@item\ALC@item
+  \newcommand{\ALC@lno}{%
+\ifthenelse{\equal{\arabic{ALC@rem}}{0}}
+{{\footnotesize \arabic{ALC@line}:}}{}%
+}
+\let\@listii\@listi
+\let\@listiii\@listi
+\let\@listiv\@listi
+\let\@listv\@listi
+\let\@listvi\@listi
+\let\@listvii\@listi
+  \newenvironment{ALC@g}{
+    \begin{list}{\ALC@lno}{ \itemsep\z@ \itemindent\z@
+    \listparindent\z@ \rightmargin\z@ 
+    \topsep\z@ \partopsep\z@ \parskip\z@\parsep\z@
+    \leftmargin 1em
+    \addtolength{\ALC@tlm}{\leftmargin}
+    }
+  }
+  {\end{list}}
+  \newcommand{\ALC@it}{\addtocounter{ALC@line}{1}\addtocounter{ALC@rem}{1}\ifthenelse{\equal{\arabic{ALC@rem}}{#1}}{\setcounter{ALC@rem}{0}}{}\item}
+  \newcommand{\ALC@com}[1]{\ifthenelse{\equal{##1}{default}}%
+{}{\ \algorithmiccomment{##1}}}
+  \newcommand{\REQUIRE}{\item[\algorithmicrequire]}
+  \newcommand{\ENSURE}{\item[\algorithmicensure]}
+  \newcommand{\STATE}{\ALC@it}
+  \newcommand{\COMMENT}[1]{\algorithmiccomment{##1}}
+%changes by alex smola
+  \newcommand{\INPUT}{\item[\algorithmicinput]}
+  \newcommand{\OUTPUT}{\item[\algorithmicoutput]}
+  \newcommand{\SET}{\item[\algorithmicset]}
+%  \newcommand{\TRUE}{\algorithmictrue}
+%  \newcommand{\FALSE}{\algorithmicfalse}
+  \newcommand{\AND}{\algorithmicand}
+  \newcommand{\OR}{\algorithmicor}
+  \newenvironment{ALC@func}{\begin{ALC@g}}{\end{ALC@g}}
+  \newenvironment{ALC@main}{\begin{ALC@g}}{\end{ALC@g}}
+%end changes by alex smola
+  \newenvironment{ALC@if}{\begin{ALC@g}}{\end{ALC@g}}
+  \newenvironment{ALC@for}{\begin{ALC@g}}{\end{ALC@g}}
+  \newenvironment{ALC@whl}{\begin{ALC@g}}{\end{ALC@g}}
+  \newenvironment{ALC@loop}{\begin{ALC@g}}{\end{ALC@g}}
+  \newenvironment{ALC@rpt}{\begin{ALC@g}}{\end{ALC@g}}
+  \renewcommand{\\}{\@centercr}
+  \newcommand{\IF}[2][default]{\ALC@it\algorithmicif\ ##2\ \algorithmicthen%
+\ALC@com{##1}\begin{ALC@if}}
+  \newcommand{\SHORTIF}[2]{\ALC@it\algorithmicif\ ##1\
+    \algorithmicthen\ {##2}}
+  \newcommand{\ELSE}[1][default]{\end{ALC@if}\ALC@it\algorithmicelse%
+\ALC@com{##1}\begin{ALC@if}}
+  \newcommand{\ELSIF}[2][default]%
+{\end{ALC@if}\ALC@it\algorithmicelsif\ ##2\ \algorithmicthen%
+\ALC@com{##1}\begin{ALC@if}}
+  \newcommand{\FOR}[2][default]{\ALC@it\algorithmicfor\ ##2\ \algorithmicdo%
+\ALC@com{##1}\begin{ALC@for}}
+  \newcommand{\FORALL}[2][default]{\ALC@it\algorithmicforall\ ##2\ %
+\algorithmicdo%
+\ALC@com{##1}\begin{ALC@for}}
+  \newcommand{\SHORTFORALL}[2]{\ALC@it\algorithmicforall\ ##1\ %
+    \algorithmicdo\ {##2}}
+  \newcommand{\WHILE}[2][default]{\ALC@it\algorithmicwhile\ ##2\ %
+\algorithmicdo%
+\ALC@com{##1}\begin{ALC@whl}}
+  \newcommand{\LOOP}[1][default]{\ALC@it\algorithmicloop%
+\ALC@com{##1}\begin{ALC@loop}}
+%changed by alex smola
+  \newcommand{\FUNCTION}[2][default]{\ALC@it\algorithmicfunction\ ##2\ %
+    \ALC@com{##1}\begin{ALC@func}}
+  \newcommand{\MAIN}[2][default]{\ALC@it\algorithmicmain\ ##2\ %
+    \ALC@com{##1}\begin{ALC@main}}
+%end changed by alex smola
+  \newcommand{\REPEAT}[1][default]{\ALC@it\algorithmicrepeat%
+    \ALC@com{##1}\begin{ALC@rpt}}
+    \newcommand{\UNTIL}[1]{\end{ALC@rpt}\ALC@it\algorithmicuntil\ ##1}
+  \ifthenelse{\boolean{ALC@noend}}{
+    \newcommand{\ENDIF}{\end{ALC@if}}
+    \newcommand{\ENDFOR}{\end{ALC@for}}
+    \newcommand{\ENDWHILE}{\end{ALC@whl}}
+    \newcommand{\ENDLOOP}{\end{ALC@loop}}
+    \newcommand{\ENDFUNCTION}{\end{ALC@func}}
+    \newcommand{\ENDMAIN}{\end{ALC@main}}
+  }{
+    \newcommand{\ENDIF}{\end{ALC@if}\ALC@it\algorithmicendif}
+    \newcommand{\ENDFOR}{\end{ALC@for}\ALC@it\algorithmicendfor}
+    \newcommand{\ENDWHILE}{\end{ALC@whl}\ALC@it\algorithmicendwhile}
+    \newcommand{\ENDLOOP}{\end{ALC@loop}\ALC@it\algorithmicendloop}
+    \newcommand{\ENDFUNCTION}{\end{ALC@func}\ALC@it\algorithmicendfunction}
+    \newcommand{\ENDMAIN}{\end{ALC@main}\ALC@it\algorithmicendmain}
+  } 
+  \renewcommand{\@toodeep}{}
+  \begin{list}{\ALC@lno}{\setcounter{ALC@line}{0}\setcounter{ALC@rem}{0}%
+      \itemsep\z@ \itemindent\z@ \listparindent\z@%
+      \partopsep\z@ \parskip\z@ \parsep\z@%
+      \labelsep 0.5em \topsep 0.2em%
+      \ifthenelse{\equal{#1}{0}}
+      {\labelwidth 0.5em }
+      {\labelwidth  1.2em }
+      \leftmargin\labelwidth \addtolength{\leftmargin}{\labelsep}
+      \ALC@tlm\labelsep
+      }
+    }
+  {\end{list}}
--- a/example_paper.aux
+++ b/example_paper.aux
+\relax 
+\providecommand\hyper@newdestlabel[2]{}
+\providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
+\HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
+\global\let\oldnewlabel\newlabel
+\gdef\newlabel#1#2{\newlabelxx{#1}#2}
+\gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
+\AtEndDocument{\ifx\hyper@anchor\@undefined
+\let\newlabel\oldnewlabel
+\fi}
+\fi}
+\global\let\hyper@last\relax 
+\gdef\HyperFirstAtBeginDocument#1{#1}
+\providecommand\HyField@AuxAddToFields[1]{}
+\providecommand\HyField@AuxAddToCoFields[2]{}
+\citation{sutton1988learning}
+\citation{tsitsiklis1997analysis}
+\citation{Sutton2018book}
+\citation{baird1995residual}
+\citation{sutton2008convergent}
+\citation{sutton2009fast}
+\citation{sutton2016emphatic}
+\citation{chen2023modified}
+\citation{hackman2012faster}
+\citation{liu2015finite,liu2016proximal,liu2018proximal}
+\citation{givchi2015quasi}
+\citation{pan2017accelerated}
+\citation{hallak2016generalized}
+\citation{zhang2022truncated}
+\citation{johnson2013accelerating}
+\citation{korda2015td}
+\citation{xu2019reanalysis}
+\citation{Sutton2018book}
+\citation{baird1995residual}
+\citation{sutton2009fast}
+\citation{sutton2009fast}
+\citation{feng2019kernel}
+\citation{basserrano2021logistic}
+\newlabel{introduction}{{1}{1}{}{section.1}{}}
+\newlabel{introduction@cref}{{[section][1][]1}{[1][1][]1}}
+\citation{zhou2021machine}
+\citation{Sutton2018book}
+\citation{Sutton2018book}
+\citation{sutton2009fast}
+\citation{sutton2009fast}
+\citation{ng1999policy}
+\newlabel{preliminaries}{{2}{2}{}{section.2}{}}
+\newlabel{preliminaries@cref}{{[section][2][]2}{[1][2][]2}}
+\newlabel{valuefunction}{{2}{2}{}{section.2}{}}
+\newlabel{valuefunction@cref}{{[section][2][]2}{[1][2][]2}}
+\newlabel{linearvaluefunction}{{1}{2}{}{equation.2.1}{}}
+\newlabel{linearvaluefunction@cref}{{[equation][1][]1}{[1][2][]2}}
+\citation{devlin2012dynamic}
+\newlabel{example_bias}{{1}{3}{Classification accuracies for naive Bayes and flexible Bayes on various data sets}{table.1}{}}
+\newlabel{example_bias@cref}{{[table][1][]1}{[1][2][]3}}
+\newlabel{omega}{{3}{3}{}{equation.3.3}{}}
+\newlabel{omega@cref}{{[equation][3][]3}{[1][3][]3}}
+\newlabel{delta}{{4}{3}{}{equation.3.4}{}}
+\newlabel{delta@cref}{{[equation][4][]4}{[1][3][]3}}
+\newlabel{theta}{{5}{3}{}{equation.3.5}{}}
+\newlabel{theta@cref}{{[equation][5][]5}{[1][3][]3}}
+\newlabel{deltaSarsa}{{8}{3}{}{equation.3.8}{}}
+\newlabel{deltaSarsa@cref}{{[equation][8][]8}{[1][3][]3}}
+\newlabel{deltaQ}{{9}{3}{}{equation.3.9}{}}
+\newlabel{deltaQ@cref}{{[equation][9][]9}{[1][3][]3}}
+\citation{borkar1997stochastic}
+\citation{hirsch1989convergent}
+\newlabel{alg:algorithm 1}{{1}{4}{}{algorithm.1}{}}
+\newlabel{alg:algorithm 1@cref}{{[algorithm][1][]1}{[1][3][]4}}
+\newlabel{thetavmtdc}{{11}{4}{}{equation.3.11}{}}
+\newlabel{thetavmtdc@cref}{{[equation][11][]11}{[1][3][]4}}
+\newlabel{uvmtdc}{{12}{4}{}{equation.3.12}{}}
+\newlabel{uvmtdc@cref}{{[equation][12][]12}{[1][3][]4}}
+\newlabel{omegavmtdc}{{13}{4}{}{equation.3.13}{}}
+\newlabel{omegavmtdc@cref}{{[equation][13][]13}{[1][3][]4}}
+\newlabel{theorem1}{{4.1}{4}{}{theorem.4.1}{}}
+\newlabel{theorem1@cref}{{[theorem][1][4]4.1}{[1][4][]4}}
+\newlabel{th1proof}{{4}{4}{}{theorem.4.1}{}}
+\newlabel{th1proof@cref}{{[section][4][]4}{[1][4][]4}}
+\newlabel{thetaFast}{{17}{4}{}{equation.4.17}{}}
+\newlabel{thetaFast@cref}{{[equation][17][]17}{[1][4][]4}}
+\newlabel{omegaFast}{{18}{4}{}{equation.4.18}{}}
+\newlabel{omegaFast@cref}{{[equation][18][]18}{[1][4][]4}}
+\newlabel{omegaFastFinal}{{19}{4}{}{equation.4.19}{}}
+\newlabel{omegaFastFinal@cref}{{[equation][19][]19}{[1][4][]4}}
+\newlabel{omegaInfty}{{20}{4}{}{equation.4.20}{}}
+\newlabel{omegaInfty@cref}{{[equation][20][]20}{[1][4][]4}}
+\citation{borkar2000ode}
+\citation{borkar2000ode}
+\citation{borkar2000ode}
+\citation{dalal2020tale}
+\citation{dalal2020tale}
+\newlabel{odetheta}{{21}{5}{}{equation.4.21}{}}
+\newlabel{odetheta@cref}{{[equation][21][]21}{[1][5][]5}}
+\newlabel{covariance}{{22}{5}{}{equation.4.22}{}}
+\newlabel{covariance@cref}{{[equation][22][]22}{[1][5][]5}}
+\newlabel{odethetafinal}{{23}{5}{}{equation.4.23}{}}
+\newlabel{odethetafinal@cref}{{[equation][23][]23}{[1][5][]5}}
+\newlabel{corollary4_2}{{4.2}{5}{}{theorem.4.2}{}}
+\newlabel{corollary4_2@cref}{{[corollary][2][4]4.2}{[1][5][]5}}
+\newlabel{theorem2}{{4.3}{5}{}{theorem.4.3}{}}
+\newlabel{theorem2@cref}{{[theorem][3][4]4.3}{[1][5][]5}}
+\citation{Sutton2018book}
+\citation{sutton2009fast}
+\citation{baird1995residual,sutton2009fast}
+\citation{baird1995residual,sutton2009fast,maei2011gradient}
+\newlabel{randomwalk}{{1}{6}{Random walk}{figure.1}{}}
+\newlabel{randomwalk@cref}{{[figure][1][]1}{[1][6][]6}}
+\newlabel{bairdexample}{{2}{6}{7-state version of Baird's off-policy counterexample}{figure.2}{}}
+\newlabel{bairdexample@cref}{{[figure][2][]2}{[1][6][]6}}
+\citation{schwartz1993reinforcement}
+\citation{korda2015td}
+\citation{xu2020reanalysis}
+\newlabel{differenceRandVMQ}{{2}{7}{Difference between R-learning and tabular VMQ}{table.2}{}}
+\newlabel{differenceRandVMQ@cref}{{[table][2][]2}{[1][6][]7}}
+\newlabel{DependentFull}{{3(a)}{7}{Subfigure 3(a)}{subfigure.3.1}{}}
+\newlabel{DependentFull@cref}{{[subfigure][1][3]3(a)}{[1][6][]7}}
+\newlabel{sub@DependentFull}{{(a)}{7}{Subfigure 3(a)\relax }{subfigure.3.1}{}}
+\newlabel{TabularFull}{{3(b)}{7}{Subfigure 3(b)}{subfigure.3.2}{}}
+\newlabel{TabularFull@cref}{{[subfigure][2][3]3(b)}{[1][6][]7}}
+\newlabel{sub@TabularFull}{{(b)}{7}{Subfigure 3(b)\relax }{subfigure.3.2}{}}
+\newlabel{InvertedFull}{{3(c)}{7}{Subfigure 3(c)}{subfigure.3.3}{}}
+\newlabel{InvertedFull@cref}{{[subfigure][3][3]3(c)}{[1][6][]7}}
+\newlabel{sub@InvertedFull}{{(c)}{7}{Subfigure 3(c)\relax }{subfigure.3.3}{}}
+\newlabel{CounterExampleFull}{{3(d)}{7}{Subfigure 3(d)}{subfigure.3.4}{}}
+\newlabel{CounterExampleFull@cref}{{[subfigure][4][3]3(d)}{[1][6][]7}}
+\newlabel{sub@CounterExampleFull}{{(d)}{7}{Subfigure 3(d)\relax }{subfigure.3.4}{}}
+\newlabel{Evaluation_full}{{3}{7}{Learning curses of four evaluation environments}{figure.3}{}}
+\newlabel{Evaluation_full@cref}{{[figure][3][]3}{[1][6][]7}}
+\citation{Sutton2018book}
+\citation{Sutton2018book}
+\citation{schulman2015trust}
+\citation{schulman2017proximal}
+\citation{langley00}
+\bibdata{example_paper}
+\bibcite{baird1995residual}{{1}{1995}{{Baird et~al.}}{{}}}
+\newlabel{MazeFull}{{4(a)}{8}{Subfigure 4(a)}{subfigure.4.1}{}}
+\newlabel{MazeFull@cref}{{[subfigure][1][4]4(a)}{[1][6][]8}}
+\newlabel{sub@MazeFull}{{(a)}{8}{Subfigure 4(a)\relax }{subfigure.4.1}{}}
+\newlabel{CliffWalkingFull}{{4(b)}{8}{Subfigure 4(b)}{subfigure.4.2}{}}
+\newlabel{CliffWalkingFull@cref}{{[subfigure][2][4]4(b)}{[1][6][]8}}
+\newlabel{sub@CliffWalkingFull}{{(b)}{8}{Subfigure 4(b)\relax }{subfigure.4.2}{}}
+\newlabel{MountainCarFull}{{4(c)}{8}{Subfigure 4(c)}{subfigure.4.3}{}}
+\newlabel{MountainCarFull@cref}{{[subfigure][3][4]4(c)}{[1][6][]8}}
+\newlabel{sub@MountainCarFull}{{(c)}{8}{Subfigure 4(c)\relax }{subfigure.4.3}{}}
+\newlabel{AcrobotFull}{{4(d)}{8}{Subfigure 4(d)}{subfigure.4.4}{}}
+\newlabel{AcrobotFull@cref}{{[subfigure][4][4]4(d)}{[1][6][]8}}
+\newlabel{sub@AcrobotFull}{{(d)}{8}{Subfigure 4(d)\relax }{subfigure.4.4}{}}
+\newlabel{Complete_full}{{4}{8}{Learning curses of four contral environments}{figure.4}{}}
+\newlabel{Complete_full@cref}{{[figure][4][]4}{[1][6][]8}}
+\bibcite{basserrano2021logistic}{{2}{2021}{{Bas-Serrano et~al.}}{{Bas-Serrano, Curi, Krause, and Neu}}}
+\bibcite{borkar1997stochastic}{{3}{1997}{{Borkar}}{{}}}
+\bibcite{borkar2000ode}{{4}{2000}{{Borkar \& Meyn}}{{Borkar and Meyn}}}
+\bibcite{chen2023modified}{{5}{2023}{{Chen et~al.}}{{Chen, Ma, Li, Yang, Yang, and Gao}}}
+\bibcite{dalal2020tale}{{6}{2020}{{Dalal et~al.}}{{Dalal, Szorenyi, and Thoppe}}}
+\bibcite{devlin2012dynamic}{{7}{2012}{{Devlin \& Kudenko}}{{Devlin and Kudenko}}}
+\bibcite{feng2019kernel}{{8}{2019}{{Feng et~al.}}{{Feng, Li, and Liu}}}
+\bibcite{givchi2015quasi}{{9}{2015}{{Givchi \& Palhang}}{{Givchi and Palhang}}}
+\bibcite{hackman2012faster}{{10}{2012}{{Hackman}}{{}}}
+\bibcite{hallak2016generalized}{{11}{2016}{{Hallak et~al.}}{{Hallak, Tamar, Munos, and Mannor}}}
+\bibcite{hirsch1989convergent}{{12}{1989}{{Hirsch}}{{}}}
+\bibcite{johnson2013accelerating}{{13}{2013}{{Johnson \& Zhang}}{{Johnson and Zhang}}}
+\bibcite{korda2015td}{{14}{2015}{{Korda \& La}}{{Korda and La}}}
+\bibcite{langley00}{{15}{2000}{{Langley}}{{}}}
+\bibcite{liu2015finite}{{16}{2015}{{Liu et~al.}}{{Liu, Liu, Ghavamzadeh, Mahadevan, and Petrik}}}
+\bibcite{liu2016proximal}{{17}{2016}{{Liu et~al.}}{{Liu, Liu, Ghavamzadeh, Mahadevan, and Petrik}}}
+\bibcite{liu2018proximal}{{18}{2018}{{Liu et~al.}}{{Liu, Gemp, Ghavamzadeh, Liu, Mahadevan, and Petrik}}}
+\bibcite{maei2011gradient}{{19}{2011}{{Maei}}{{}}}
+\bibcite{ng1999policy}{{20}{1999}{{Ng et~al.}}{{Ng, Harada, and Russell}}}
+\bibcite{pan2017accelerated}{{21}{2017}{{Pan et~al.}}{{Pan, White, and White}}}
+\bibcite{schulman2015trust}{{22}{2015}{{Schulman et~al.}}{{Schulman, Levine, Abbeel, Jordan, and Moritz}}}
+\bibcite{schulman2017proximal}{{23}{2017}{{Schulman et~al.}}{{Schulman, Wolski, Dhariwal, Radford, and Klimov}}}
+\bibcite{schwartz1993reinforcement}{{24}{1993}{{Schwartz}}{{}}}
+\bibcite{sutton2009fast}{{25}{2009}{{Sutton et~al.}}{{Sutton, Maei, Precup, Bhatnagar, Silver, Szepesv{\'a}ri, and Wiewiora}}}
+\bibcite{sutton1988learning}{{26}{1988}{{Sutton}}{{}}}
+\bibcite{Sutton2018book}{{27}{2018}{{Sutton \& Barto}}{{Sutton and Barto}}}
+\bibcite{sutton2008convergent}{{28}{2008}{{Sutton et~al.}}{{Sutton, Maei, and Szepesv{\'a}ri}}}
+\bibcite{sutton2016emphatic}{{29}{2016}{{Sutton et~al.}}{{Sutton, Mahmood, and White}}}
+\bibcite{tsitsiklis1997analysis}{{30}{1997}{{Tsitsiklis \& Van~Roy}}{{Tsitsiklis and Van~Roy}}}
+\bibcite{xu2019reanalysis}{{31}{2019}{{Xu et~al.}}{{Xu, Wang, Zhou, and Liang}}}
+\bibcite{xu2020reanalysis}{{32}{2020}{{Xu et~al.}}{{Xu, Wang, Zhou, and Liang}}}
+\bibcite{zhang2022truncated}{{33}{2022}{{Zhang \& Whiteson}}{{Zhang and Whiteson}}}
+\bibcite{zhou2021machine}{{34}{2021}{{Zhou}}{{}}}
+\bibstyle{icml2024}
+\citation{dalal2020tale}
+\citation{dalal2020tale}
+\citation{dalal2020tale}
+\citation{dalal2020tale}
+\citation{sutton2009fast}
+\newlabel{proofcorollary4_2}{{A.1}{11}{}{subsection.A.1}{}}
+\newlabel{proofcorollary4_2@cref}{{[subappendix][1][2147483647,1]A.1}{[1][11][]11}}
+\newlabel{matrixassumption}{{A.1}{11}{}{theorem.A.1}{}}
+\newlabel{matrixassumption@cref}{{[assumption][1][2147483647,1]A.1}{[1][11][]11}}
+\newlabel{stepsizeassumption}{{A.2}{11}{}{theorem.A.2}{}}
+\newlabel{stepsizeassumption@cref}{{[assumption][2][2147483647,1]A.2}{[1][11][]11}}
+\newlabel{sparseprojection}{{A.3}{11}{}{theorem.A.3}{}}
+\newlabel{sparseprojection@cref}{{[definition][3][2147483647,1]A.3}{[1][11][]11}}
+\newlabel{sparseprojectiontheta}{{30}{11}{}{equation.A.30}{}}
+\newlabel{sparseprojectiontheta@cref}{{[equation][30][2147483647]30}{[1][11][]11}}
+\newlabel{sparseprojectionomega}{{31}{11}{}{equation.A.31}{}}
+\newlabel{sparseprojectionomega@cref}{{[equation][31][2147483647]31}{[1][11][]11}}
+\citation{hirsch1989convergent}
+\citation{borkar2000ode}
+\citation{borkar2000ode}
+\citation{borkar2000ode}
+\newlabel{proofth2}{{A.2}{12}{}{subsection.A.2}{}}
+\newlabel{proofth2@cref}{{[subappendix][2][2147483647,1]A.2}{[1][11][]12}}
+\newlabel{thetavmtdcFastest}{{32}{12}{}{equation.A.32}{}}
+\newlabel{thetavmtdcFastest@cref}{{[equation][32][2147483647]32}{[1][12][]12}}
+\newlabel{uvmtdcFastest}{{33}{12}{}{equation.A.33}{}}
+\newlabel{uvmtdcFastest@cref}{{[equation][33][2147483647]33}{[1][12][]12}}
+\newlabel{omegavmtdcFastest}{{34}{12}{}{equation.A.34}{}}
+\newlabel{omegavmtdcFastest@cref}{{[equation][34][2147483647]34}{[1][12][]12}}
+\newlabel{omegavmtdcFastestFinal}{{35}{12}{}{equation.A.35}{}}
+\newlabel{omegavmtdcFastestFinal@cref}{{[equation][35][2147483647]35}{[1][12][]12}}
+\newlabel{omegavmtdcInfty}{{36}{12}{}{equation.A.36}{}}
+\newlabel{omegavmtdcInfty@cref}{{[equation][36][2147483647]36}{[1][12][]12}}
+\citation{hirsch1989convergent}
+\citation{borkar2000ode}
+\citation{borkar2000ode}
+\citation{borkar2000ode}
+\newlabel{thetavmtdcFaster}{{37}{13}{}{equation.A.37}{}}
+\newlabel{thetavmtdcFaster@cref}{{[equation][37][2147483647]37}{[1][13][]13}}
+\newlabel{uvmtdcFaster}{{38}{13}{}{equation.A.38}{}}
+\newlabel{uvmtdcFaster@cref}{{[equation][38][2147483647]38}{[1][13][]13}}
+\newlabel{uvmtdcFasterFinal}{{39}{13}{}{equation.A.39}{}}
+\newlabel{uvmtdcFasterFinal@cref}{{[equation][39][2147483647]39}{[1][13][]13}}
+\newlabel{uvmtdcInfty}{{40}{13}{}{equation.A.40}{}}
+\newlabel{uvmtdcInfty@cref}{{[equation][40][2147483647]40}{[1][13][]13}}
+\newlabel{thetavmtdcSlowerFinal}{{42}{13}{}{equation.A.42}{}}
+\newlabel{thetavmtdcSlowerFinal@cref}{{[equation][42][2147483647]42}{[1][13][]13}}
+\newlabel{alg:algorithm 2}{{2}{14}{}{algorithm.2}{}}
+\newlabel{alg:algorithm 2@cref}{{[algorithm][2][2147483647]2}{[1][14][]14}}
+\newlabel{odethetavmtdcfinal}{{43}{14}{}{equation.A.43}{}}
+\newlabel{odethetavmtdcfinal@cref}{{[equation][43][2147483647]43}{[1][14][]14}}
+\newlabel{experimentaldetails}{{B}{14}{}{appendix.B}{}}
+\newlabel{experimentaldetails@cref}{{[appendix][2][2147483647]B}{[1][14][]14}}
+\newlabel{lrofways}{{3}{15}{Learning rates ($lr$) of four control experiments}{table.3}{}}
+\newlabel{lrofways@cref}{{[table][3][2147483647]3}{[1][15][]15}}
+\gdef \@abspage@last{15}
--- a/example_paper.bbl
+++ b/example_paper.bbl
+\begin{thebibliography}{34}
+\providecommand{\natexlab}[1]{#1}
+\providecommand{\url}[1]{\texttt{#1}}
+\expandafter\ifx\csname urlstyle\endcsname\relax
+  \providecommand{\doi}[1]{doi: #1}\else
+  \providecommand{\doi}{doi: \begingroup \urlstyle{rm}\Url}\fi
+\bibitem[Baird et~al.(1995)]{baird1995residual}
+Baird, L. et~al.
+\newblock Residual algorithms: Reinforcement learning with function approximation.
+\newblock In \emph{Proc. 12th Int. Conf. Mach. Learn.}, pp.\  30--37, 1995.
+\bibitem[Bas-Serrano et~al.(2021)Bas-Serrano, Curi, Krause, and Neu]{basserrano2021logistic}
+Bas-Serrano, J., Curi, S., Krause, A., and Neu, G.
+\newblock Logistic q-learning.
+\newblock In \emph{International Conference on Artificial Intelligence and Statistics}, pp.\  3610--3618, 2021.
+\bibitem[Borkar(1997)]{borkar1997stochastic}
+Borkar, V.~S.
+\newblock Stochastic approximation with two time scales.
+\newblock \emph{Syst. \& Control Letters}, 29\penalty0 (5):\penalty0 291--294, 1997.
+\bibitem[Borkar \& Meyn(2000)Borkar and Meyn]{borkar2000ode}
+Borkar, V.~S. and Meyn, S.~P.
+\newblock The ode method for convergence of stochastic approximation and reinforcement learning.
+\newblock \emph{SIAM J. Control Optim.}, 38\penalty0 (2):\penalty0 447--469, 2000.
+\bibitem[Chen et~al.(2023)Chen, Ma, Li, Yang, Yang, and Gao]{chen2023modified}
+Chen, X., Ma, X., Li, Y., Yang, G., Yang, S., and Gao, Y.
+\newblock Modified retrace for off-policy temporal difference learning.
+\newblock In \emph{Uncertainty in Artificial Intelligence}, pp.\  303--312. PMLR, 2023.
+\bibitem[Dalal et~al.(2020)Dalal, Szorenyi, and Thoppe]{dalal2020tale}
+Dalal, G., Szorenyi, B., and Thoppe, G.
+\newblock A tale of two-timescale reinforcement learning with the tightest finite-time bound.
+\newblock In \emph{Proceedings of the AAAI Conference on Artificial Intelligence}, volume~34, pp.\  3701--3708, 2020.
+\bibitem[Devlin \& Kudenko(2012)Devlin and Kudenko]{devlin2012dynamic}
+Devlin, S. and Kudenko, D.
+\newblock Dynamic potential-based reward shaping.
+\newblock In \emph{Proc. 11th Int. Conf. Autonomous Agents and Multiagent Systems}, pp.\  433--440, 2012.
+\bibitem[Feng et~al.(2019)Feng, Li, and Liu]{feng2019kernel}
+Feng, Y., Li, L., and Liu, Q.
+\newblock A kernel loss for solving the bellman equation.
+\newblock In \emph{Advances in Neural Information Processing Systems}, pp.\  15430--15441, 2019.
+\bibitem[Givchi \& Palhang(2015)Givchi and Palhang]{givchi2015quasi}
+Givchi, A. and Palhang, M.
+\newblock Quasi newton temporal difference learning.
+\newblock In \emph{Asian Conference on Machine Learning}, pp.\  159--172, 2015.
+\bibitem[Hackman(2012)]{hackman2012faster}
+Hackman, L.
+\newblock \emph{Faster Gradient-TD Algorithms}.
+\newblock PhD thesis, University of Alberta, 2012.
+\bibitem[Hallak et~al.(2016)Hallak, Tamar, Munos, and Mannor]{hallak2016generalized}
+Hallak, A., Tamar, A., Munos, R., and Mannor, S.
+\newblock Generalized emphatic temporal difference learning: bias-variance analysis.
+\newblock In \emph{Proceedings of the 30th AAAI Conference on Artificial Intelligence}, pp.\  1631--1637, 2016.
+\bibitem[Hirsch(1989)]{hirsch1989convergent}
+Hirsch, M.~W.
+\newblock Convergent activation dynamics in continuous time networks.
+\newblock \emph{Neural Netw.}, 2\penalty0 (5):\penalty0 331--349, 1989.
+\bibitem[Johnson \& Zhang(2013)Johnson and Zhang]{johnson2013accelerating}
+Johnson, R. and Zhang, T.
+\newblock Accelerating stochastic gradient descent using predictive variance reduction.
+\newblock In \emph{Advances in Neural Information Processing Systems}, pp.\  315--323, 2013.
+\bibitem[Korda \& La(2015)Korda and La]{korda2015td}
+Korda, N. and La, P.
+\newblock On td (0) with function approximation: Concentration bounds and a centered variant with exponential convergence.
+\newblock In \emph{International conference on machine learning}, pp.\  626--634. PMLR, 2015.
+\bibitem[Langley(2000)]{langley00}
+Langley, P.
+\newblock Crafting papers on machine learning.
+\newblock In Langley, P. (ed.), \emph{Proceedings of the 17th International Conference on Machine Learning (ICML 2000)}, pp.\  1207--1216, Stanford, CA, 2000. Morgan Kaufmann.
+\bibitem[Liu et~al.(2015)Liu, Liu, Ghavamzadeh, Mahadevan, and Petrik]{liu2015finite}
+Liu, B., Liu, J., Ghavamzadeh, M., Mahadevan, S., and Petrik, M.
+\newblock Finite-sample analysis of proximal gradient td algorithms.
+\newblock In \emph{Proceedings of the 21st Conference on Uncertainty in Artificial Intelligence}, pp.\  504--513, 2015.
+\bibitem[Liu et~al.(2016)Liu, Liu, Ghavamzadeh, Mahadevan, and Petrik]{liu2016proximal}
+Liu, B., Liu, J., Ghavamzadeh, M., Mahadevan, S., and Petrik, M.
+\newblock Proximal gradient temporal difference learning algorithms.
+\newblock In \emph{Proceedings of the International Joint Conference on Artificial Intelligence}, pp.\  4195--4199, 2016.
+\bibitem[Liu et~al.(2018)Liu, Gemp, Ghavamzadeh, Liu, Mahadevan, and Petrik]{liu2018proximal}
+Liu, B., Gemp, I., Ghavamzadeh, M., Liu, J., Mahadevan, S., and Petrik, M.
+\newblock Proximal gradient temporal difference learning: Stable reinforcement learning with polynomial sample complexity.
+\newblock \emph{Journal of Artificial Intelligence Research}, 63:\penalty0 461--494, 2018.
+\bibitem[Maei(2011)]{maei2011gradient}
+Maei, H.~R.
+\newblock \emph{Gradient temporal-difference learning algorithms}.
+\newblock PhD thesis, University of Alberta, 2011.
+\bibitem[Ng et~al.(1999)Ng, Harada, and Russell]{ng1999policy}
+Ng, A.~Y., Harada, D., and Russell, S.
+\newblock Policy invariance under reward transformations: Theory and application to reward shaping.
+\newblock In \emph{Proc. 16th Int. Conf. Mach. Learn.}, pp.\  278--287, 1999.
+\bibitem[Pan et~al.(2017)Pan, White, and White]{pan2017accelerated}
+Pan, Y., White, A., and White, M.
+\newblock Accelerated gradient temporal difference learning.
+\newblock In \emph{Proceedings of the 21st AAAI Conference on Artificial Intelligence}, pp.\  2464--2470, 2017.
+\bibitem[Schulman et~al.(2015)Schulman, Levine, Abbeel, Jordan, and Moritz]{schulman2015trust}
+Schulman, J., Levine, S., Abbeel, P., Jordan, M., and Moritz, P.
+\newblock Trust region policy optimization.
+\newblock In \emph{International Conference on Machine Learning}, pp.\  1889--1897, 2015.
+\bibitem[Schulman et~al.(2017)Schulman, Wolski, Dhariwal, Radford, and Klimov]{schulman2017proximal}
+Schulman, J., Wolski, F., Dhariwal, P., Radford, A., and Klimov, O.
+\newblock Proximal policy optimization algorithms.
+\newblock \emph{arXiv preprint arXiv:1707.06347}, 2017.
+\bibitem[Schwartz(1993)]{schwartz1993reinforcement}
+Schwartz, A.
+\newblock A reinforcement learning method for maximizing undiscounted rewards.
+\newblock In \emph{Proc. 10th Int. Conf. Mach. Learn.}, volume 298, pp.\  298--305, 1993.
+\bibitem[Sutton et~al.(2009)Sutton, Maei, Precup, Bhatnagar, Silver, Szepesv{\'a}ri, and Wiewiora]{sutton2009fast}
+Sutton, R., Maei, H., Precup, D., Bhatnagar, S., Silver, D., Szepesv{\'a}ri, C., and Wiewiora, E.
+\newblock Fast gradient-descent methods for temporal-difference learning with linear function approximation.
+\newblock In \emph{Proc. 26th Int. Conf. Mach. Learn.}, pp.\  993--1000, 2009.
+\bibitem[Sutton(1988)]{sutton1988learning}
+Sutton, R.~S.
+\newblock Learning to predict by the methods of temporal differences.
+\newblock \emph{Machine learning}, 3\penalty0 (1):\penalty0 9--44, 1988.
+\bibitem[Sutton \& Barto(2018)Sutton and Barto]{Sutton2018book}
+Sutton, R.~S. and Barto, A.~G.
+\newblock \emph{Reinforcement Learning: An Introduction}.
+\newblock The MIT Press, second edition, 2018.
+\bibitem[Sutton et~al.(2008)Sutton, Maei, and Szepesv{\'a}ri]{sutton2008convergent}
+Sutton, R.~S., Maei, H.~R., and Szepesv{\'a}ri, C.
+\newblock A convergent $ o (n) $ temporal-difference algorithm for off-policy learning with linear function approximation.
+\newblock In \emph{Advances in Neural Information Processing Systems}, pp.\  1609--1616. Cambridge, MA: MIT Press, 2008.
+\bibitem[Sutton et~al.(2016)Sutton, Mahmood, and White]{sutton2016emphatic}
+Sutton, R.~S., Mahmood, A.~R., and White, M.
+\newblock An emphatic approach to the problem of off-policy temporal-difference learning.
+\newblock \emph{The Journal of Machine Learning Research}, 17\penalty0 (1):\penalty0 2603--2631, 2016.
+\bibitem[Tsitsiklis \& Van~Roy(1997)Tsitsiklis and Van~Roy]{tsitsiklis1997analysis}
+Tsitsiklis, J.~N. and Van~Roy, B.
+\newblock Analysis of temporal-diffference learning with function approximation.
+\newblock In \emph{Advances in Neural Information Processing Systems}, pp.\  1075--1081, 1997.
+\bibitem[Xu et~al.(2019)Xu, Wang, Zhou, and Liang]{xu2019reanalysis}
+Xu, T., Wang, Z., Zhou, Y., and Liang, Y.
+\newblock Reanalysis of variance reduced temporal difference learning.
+\newblock In \emph{International Conference on Learning Representations}, 2019.
+\bibitem[Xu et~al.(2020)Xu, Wang, Zhou, and Liang]{xu2020reanalysis}
+Xu, T., Wang, Z., Zhou, Y., and Liang, Y.
+\newblock Reanalysis of variance reduced temporal difference learning.
+\newblock \emph{arXiv preprint arXiv:2001.01898}, 2020.
+\bibitem[Zhang \& Whiteson(2022)Zhang and Whiteson]{zhang2022truncated}
+Zhang, S. and Whiteson, S.
+\newblock Truncated emphatic temporal difference methods for prediction and control.
+\newblock \emph{The Journal of Machine Learning Research}, 23\penalty0 (1):\penalty0 6859--6917, 2022.
+\bibitem[Zhou(2021)]{zhou2021machine}
+Zhou, Z.-H.
+\newblock \emph{Machine learning}.
+\newblock Springer Nature, 2021.
+\end{thebibliography}
--- a/example_paper.bib
+++ b/example_paper.bib
+@inproceedings{langley00,
+ author    = {P. Langley},
+ title     = {Crafting Papers on Machine Learning},
+ year      = {2000},
+ pages     = {1207--1216},
+ editor    = {Pat Langley},
+ booktitle     = {Proceedings of the 17th International Conference
+              on Machine Learning (ICML 2000)},
+ address   = {Stanford, CA},
+ publisher = {Morgan Kaufmann}
+}
+@TechReport{mitchell80,
+  author = 	 "T. M. Mitchell",
+  title = 	 "The Need for Biases in Learning Generalizations",
+  institution =  "Computer Science Department, Rutgers University",
+  year = 	 "1980",
+  address =	 "New Brunswick, MA",
+}
+@phdthesis{kearns89,
+  author = {M. J. Kearns},
+  title =  {Computational Complexity of Machine Learning},
+  school = {Department of Computer Science, Harvard University},
+  year =   {1989}
+}
+@Book{MachineLearningI,
+  editor = 	 "R. S. Michalski and J. G. Carbonell and T.
+		  M. Mitchell",
+  title = 	 "Machine Learning: An Artificial Intelligence
+		  Approach, Vol. I",
+  publisher = 	 "Tioga",
+  year = 	 "1983",
+  address =	 "Palo Alto, CA"
+}
+@Book{DudaHart2nd,
+  author =       "R. O. Duda and P. E. Hart and D. G. Stork",
+  title =        "Pattern Classification",
+  publisher =    "John Wiley and Sons",
+  edition =      "2nd",
+  year =         "2000"
+}
+@misc{anonymous,
+  title= {Suppressed for Anonymity},
+  author= {Author, N. N.},
+  year= {2021}
+}
+@InCollection{Newell81,
+  author =       "A. Newell and P. S. Rosenbloom",
+  title =        "Mechanisms of Skill Acquisition and the Law of
+                  Practice", 
+  booktitle =    "Cognitive Skills and Their Acquisition",
+  pages =        "1--51",
+  publisher =    "Lawrence Erlbaum Associates, Inc.",
+  year =         "1981",
+  editor =       "J. R. Anderson",
+  chapter =      "1",
+  address =      "Hillsdale, NJ"
+}
+@Article{Samuel59,
+  author = 	 "A. L. Samuel",
+  title = 	 "Some Studies in Machine Learning Using the Game of
+		  Checkers",
+  journal =	 "IBM Journal of Research and Development",
+  year =	 "1959",
+  volume =	 "3",
+  number =	 "3",
+  pages =	 "211--229"
+}
+@book{em:86,
+  editor  = "Engelmore, Robert and Morgan, Anthony",
+  title   = "Blackboard Systems",
+  year    = 1986,
+  address = "Reading, Mass.",
+  publisher = "Addison-Wesley",
+}
+@inproceedings{dalal2018finite,
+  title={Finite sample analyses for TD (0) with function approximation},
+  author={Dalal, Gal and Szorenyi, Balazs and Thoppe, Gugan and Mannor, Shie},
+  booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence and Thirtieth Innovative Applications of Artificial Intelligence Conference and Eighth AAAI Symposium on Educational Advances in Artificial Intelligence},
+  pages={6144--6160},
+  year={2018}
+}
+@inproceedings{xu2019reanalysis,
+  title={Reanalysis of Variance Reduced Temporal Difference Learning},
+  author={Xu, Tengyu and Wang, Zhe and Zhou, Yi and Liang, Yingbin},
+  booktitle={International Conference on Learning Representations},
+  year={2019}
+}
+@inproceedings{c:83,
+  author  = "Clancey, William J.",
+  year    = 1983,
+  title   = "{Communication, Simulation, and Intelligent
+Agents: Implications of Personal Intelligent Machines
+for Medical Education}",
+  booktitle="Proceedings of the Eighth International Joint Conference on Artificial Intelligence {(IJCAI-83)}", 
+  pages   = "556-560",
+  address = "Menlo Park, Calif",
+  publisher = "{IJCAI Organization}",
+}
+@inproceedings{c:84,
+  author  = "Clancey, William J.",
+  year    = 1984,
+  title   = "{Classification Problem Solving}",
+  booktitle = "Proceedings of the Fourth National 
+              Conference on Artificial Intelligence",
+  pages   = "45-54",
+  address = "Menlo Park, Calif.",
+  publisher="AAAI Press",
+}
+@article{r:80,
+  author = {Robinson, Arthur L.},
+  title = {New Ways to Make Microcircuits Smaller},
+  volume = {208},
+  number = {4447},
+  pages = {1019--1022},
+  year = {1980},
+  doi = {10.1126/science.208.4447.1019},
+  publisher = {American Association for the Advancement of Science},
+  issn = {0036-8075},
+  URL = {https://science.sciencemag.org/content/208/4447/1019},
+  eprint = {https://science.sciencemag.org/content/208/4447/1019.full.pdf},
+  journal = {Science},
+}
+@article{r:80x,
+  author  = "Robinson, Arthur L.",
+  year    = 1980,
+  title   = "{New Ways to Make Microcircuits Smaller---Duplicate Entry}",
+  journal = "Science",
+  volume  =  208,
+  pages   = "1019-1026",
+}
+@article{hcr:83,
+title = {Strategic explanations for a diagnostic consultation system},
+journal = {International Journal of Man-Machine Studies},
+volume = {20},
+number = {1},
+pages = {3-19},
+year = {1984},
+issn = {0020-7373},
+doi = {https://doi.org/10.1016/S0020-7373(84)80003-6},
+url = {https://www.sciencedirect.com/science/article/pii/S0020737384800036},
+author = {Diane Warner Hasling and William J. Clancey and Glenn Rennels},
+abstract = {This article examines the problem of automatte explanation of reasoning, especially as it relates to expert systems. By explanation we mean the ability of a program to discuss what it is doing in some understandable way. We first present a general framework in which to view explanation and review some of the research done in this area. We then focus on the explanation system for NEOMYCIN, a medical consultation program. A consultation program interactively helps a user to solve a problem. Our goal is to have NEOMYCIN explain its problem-solving strategies. An explanation of strategy describes the plan the program is using to reach a solution. Such an explanation is usually concrete, referring to aspects of the current problem situation. Abstract explanations articulate a general principle, which can be applied in different situations; such explanations are useful in teaching and in explaining by analogy. We describe the aspects of NEOMYCIN that make abstract strategic explanations possible—the representation of strategic knowledge explicitly and separately from domain knowledge— and demonstrate how this representation can be used to generate explanations.}
+}
+@article{hcrt:83,
+  author  = "Hasling, Diane Warner and Clancey, William J. and Rennels, Glenn R. and Test, Thomas",
+  year    = 1983,
+  title   = "{Strategic Explanations in Consultation---Duplicate}",
+  journal = "The International Journal of Man-Machine Studies",
+  volume  = 20,
+  number  = 1,
+  pages   = "3-19",
+}
+@techreport{r:86,
+  author  = "Rice, James",
+  year    = 1986,
+  title   = "{Poligon: A System for Parallel Problem Solving}",
+  type    = "Technical Report", 
+  number  = "KSL-86-19", 
+  institution = "Dept.\ of Computer Science, Stanford Univ.",
+}
+@phdthesis{c:79,
+  author  = "Clancey, William J.",
+  year    = 1979,
+  title   = "{Transfer of Rule-Based Expertise
+through a Tutorial Dialogue}",
+  type    = "{Ph.D.} diss.",
+  school  = "Dept.\ of Computer Science, Stanford Univ.",
+  address = "Stanford, Calif.",
+}
+@unpublished{c:21,
+  author  = "Clancey, William J.",
+  title   = "{The Engineering of Qualitative Models}",
+  year    = 2021,
+  note    = "Forthcoming",
+}
+@misc{c:22,
+      title={Attention Is All You Need}, 
+      author={Ashish Vaswani and Noam Shazeer and Niki Parmar and Jakob Uszkoreit and Llion Jones and Aidan N. Gomez and Lukasz Kaiser and Illia Polosukhin},
+      year={2017},
+      eprint={1706.03762},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL}
+}
+@misc{c:23,
+  title        = "Pluto: The 'Other' Red Planet",
+  author       = "{NASA}",
+  howpublished = "\url{https://www.nasa.gov/nh/pluto-the-other-red-planet}",
+  year         = 2015,
+  note         = "Accessed: 2018-12-06"
+}
+@article{r:80x,
+  author  = "Robinson, Arthur L.",
+  year    = 1980,
+  title   = "{New Ways to Make Microcircuits Smaller---Duplicate Entry}",
+  journal = "Science",
+  volume  =  208,
+  pages   = "1019-1026",
+}
+@article{hcrt:83,
+  author  = "Hasling, Diane Warner and Clancey, William J. and Rennels, Glenn R. and Test, Thomas",
+  year    = 1983,
+  title   = "{Strategic Explanations in Consultation---Duplicate}",
+  journal = "The International Journal of Man-Machine Studies",
+  volume  = 20,
+  number  = 1,
+  pages   = "3-19",
+}
+@article{xu2013online,
+  title={Online learning control using adaptive critic designs with sparse kernel machines},
+  author={Xu, Xin and Hou, Zhongsheng and Lian, Chuanqiang and He, Haibo},
+  journal={IEEE Trans. Neural Netw. Learn. Syst.},
+  volume={24},
+  number={5},
+  pages={762--775},
+  year={2013},
+  publisher={IEEE}
+}
+@article{bertsekas2017value,
+  title={Value and policy iterations in optimal control and adaptive dynamic programming},
+  author={Bertsekas, Dimitri P},
+  journal={IEEE Trans. Neural Netw. Learn. Syst.},
+  year={2017},
+  volume={28},
+  number={3},
+  pages={500 - 509},
+  publisher={IEEE}
+}
+@phdthesis{hackman2012faster,
+  title={Faster Gradient-TD Algorithms},
+  author={Hackman, Leah},
+  year={2012},
+  school={University of Alberta}
+}
+@inproceedings{harutyunyan2015multi,
+  title={Multi-scale reward shaping via an off-policy ensemble},
+  author={Harutyunyan, Anna and Brys, Tim and Vrancx, Peter and Now{\'e}, Ann},
+  booktitle={Proc. 2015 Int. Conf. Autonomous Agents and Multiagent Systems},
+  pages={1641--1642},
+  year={2015},
+  organization={International Foundation for Autonomous Agents and Multiagent Systems}
+}
+@inproceedings{harutyunyan2015expressing,
+  title={Expressing Arbitrary Reward Functions as Potential-Based Advice.},
+  author={Harutyunyan, Anna and Devlin, Sam and Vrancx, Peter and Now{\'e}, Ann},
+  booktitle={AAAI},
+  pages={2652--2658},
+  year={2015}
+}
+@article{wiewiora2003potential,
+  title={Potential-based shaping and Q-value initialization are equivalent},
+  author={Wiewiora, Eric},
+  journal={J. Artif. Intell. Res.},
+  volume={19},
+  pages={205--208},
+  year={2003}
+}
+@article{grzes2010online,
+  title={Online learning of shaping rewards in reinforcement learning},
+  author={Grze{\'s}, Marek and Kudenko, Daniel},
+  journal={Neural Netw.},
+  volume={23},
+  number={4},
+  pages={541--550},
+  year={2010},
+  publisher={Elsevier}
+}
+@inproceedings{marthi2007automatic,
+  title={Automatic shaping and decomposition of reward functions},
+  author={Marthi, Bhaskara},
+  booktitle={Proc. 24th Int. Conf. Mach. Learn.},
+  pages={601--608},
+  year={2007}
+}
+@inproceedings{laud2003influence,
+  title={The Influence of Reward on the Speed of Reinforcement Learning: An Analysis of Shaping},
+  author={Laud, Adam and Dejong, Gerald},
+  booktitle={Proc. 20th Int. Conf. Mach. Learn.},
+  pages={440--447},
+  year={2003}
+}
+@phdthesis{laud2004theory,
+  title={Theory and application of reward shaping in reinforcement learning},
+  author={Laud, Adam Daniel},
+  year={2004},
+  school={University of Illinois at Urbana-Champaign}
+}
+@article{geist2013algorithmic,
+  title={Algorithmic survey of parametric value function approximation},
+  author={Geist, Matthieu and Pietquin, Olivier},
+  journal={IEEE Trans. Neural Netw. Learn. Syst.},
+  volume={24},
+  number={6},
+  pages={845--867},
+  year={2013},
+  publisher={IEEE}
+}
+@article{furmston2016approximate,
+  title={Approximate Newton Methods for Policy Search in Markov Decision Processes},
+  author={Furmston, Thomas and Lever, Guy and Barber, David},
+  journal={J. Mach. Learn. Res.},
+  volume={17},
+  number={227},
+  pages={1--51},
+  year={2016}
+}
+@article{silver2016mastering,
+  title={Mastering the game of Go with deep neural networks and tree search},
+  author={Silver, David and Huang, Aja and Maddison, Chris J and Guez, Arthur and Sifre, Laurent and van den Driessche, George and Schrittwieser, Julian and Antonoglou, Ioannis and Panneershelvam, Veda and Lanctot, Marc and others},
+  journal={Nature},
+  volume={529},
+  number={7587},
+  pages={484--489},
+  year={2016},
+  publisher={Nature Publishing Group}
+}
+@article{mnih2015human,
+  title={Human-level control through deep reinforcement learning},
+  author={Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Rusu, Andrei A and Veness, Joel and Bellemare, Marc G and Graves, Alex and Riedmiller, Martin and Fidjeland, Andreas K and Ostrovski, Georg and others},
+  journal={Nature},
+  volume={518},
+  number={7540},
+  pages={529--533},
+  year={2015},
+  publisher={Nature Publishing Group}
+}
+@inproceedings{guo2014deep,
+  title={Deep learning for real-time Atari game play using offline Monte-Carlo tree search planning},
+  author={Guo, Xiaoxiao and Singh, Satinder and Lee, Honglak and Lewis, Richard L and Wang, Xiaoshi},
+  booktitle={Advances in Neural Information Processing Systems},
+  pages={3338--3346},
+  publisher={Cambridge, MA: MIT Press},
+  year={2014}
+}
+@inproceedings{scherrer2010should,
+  title={Should one compute the Temporal Difference fix point or minimize the Bellman Residual? The unified oblique projection view},
+  author={Scherrer, Bruno},
+  booktitle={Proc. 27th Int. Conf. Mach. Learn.},
+  pages={959--966},
+  year={2010}
+}
+@article{hirsch1989convergent,
+  title={Convergent activation dynamics in continuous time networks},
+  author={Hirsch, Morris W},
+  journal={Neural Netw.},
+  volume={2},
+  number={5},
+  pages={331--349},
+  year={1989},
+  publisher={Elsevier}
+}
+@article{borkar1997stochastic,
+  title={Stochastic approximation with two time scales},
+  author={Borkar, Vivek S},
+  journal={Syst. \& Control Letters},
+  volume={29},
+  number={5},
+  pages={291--294},
+  year={1997},
+  publisher={Elsevier}
+}
+@article{ortner2013adaptive,
+  title={Adaptive aggregation for reinforcement learning in average reward Markov decision processes},
+  author={Ortner, Ronald},
+  journal={Annals Oper. Res.},
+  volume={208},
+  number={1},
+  pages={321--336},
+  year={2013},
+  publisher={Springer}
+}
+@article{jaksch2010near,
+  title={Near-optimal regret bounds for reinforcement learning},
+  author={Jaksch, Thomas and Ortner, Ronald and Auer, Peter},
+  journal={Journal of Machine Learning Research},
+  number={Apr},
+  volume={11},
+  pages={1563--1600},
+  year={2010}
+}
+@article{ortner2007logarithmic,
+  title={Logarithmic online regret bounds for undiscounted reinforcement learning},
+  author={Ortner, P and Auer, R},
+  journal={Advances in Neural Information Processing Systems},
+  publisher={Cambridge, MA: MIT Press},
+  volume={19},
+  pages={49},
+  year={2007}
+}
+@article{das1999solving,
+  title={Solving semi-Markov decision problems using average reward reinforcement learning},
+  author={Das, Tapas K and Gosavi, Abhijit and Mahadevan, Sridhar and Marchalleck, Nicholas},
+  journal={Management Science},
+  volume={45},
+  number={4},
+  pages={560--574},
+  year={1999},
+  publisher={INFORMS}
+}
+@article{abounadi2001learning,
+  title={Learning algorithms for Markov decision processes with average cost},
+  author={Abounadi, Jinane and Bertsekas, D and Borkar, Vivek S},
+  journal={SIAM J. Control Optim.},
+  volume={40},
+  number={3},
+  pages={681--698},
+  year={2001},
+  publisher={SIAM}
+}
+@inproceedings{singh1994reinforcement,
+  title={Reinforcement learning algorithms for average-payoff Markovian decision processes},
+  author={Singh, Satinder P},
+  booktitle={AAAI},
+  volume={94},
+  pages={700--705},
+  year={1994}
+}
+@inproceedings{schwartz1993reinforcement,
+  title={A reinforcement learning method for maximizing undiscounted rewards},
+  author={Schwartz, Anton},
+  booktitle={Proc. 10th Int. Conf. Mach. Learn.},
+  volume={298},
+  pages={298--305},
+  year={1993}
+}
+@inproceedings{yang2016efficient,
+  title={Efficient Average Reward Reinforcement Learning Using Constant Shifting Values},
+  author={Yang, Shangdong and Gao, Yang and An, Bo and Wang, Hao and Chen, Xingguo},
+  booktitle={Thirtieth AAAI Conference on Artificial Intelligence},
+  pages={2258-2264},
+  year={2016}
+}
+@inproceedings{devlin2012dynamic,
+  title={Dynamic potential-based reward shaping},
+  author={Devlin, Sam and Kudenko, Daniel},
+  booktitle={Proc. 11th Int. Conf. Autonomous Agents and Multiagent Systems},
+  pages={433--440},
+  year={2012}
+}
+@inproceedings{ng1999policy,
+  title={Policy invariance under reward transformations: Theory and application to reward shaping},
+  author={Ng, Andrew Y and Harada, Daishi and Russell, Stuart},
+  booktitle={Proc. 16th Int. Conf. Mach. Learn.},
+  pages={278--287},
+  year={1999}
+}
+@article{borkar2000ode,
+  title={The ODE method for convergence of stochastic approximation and reinforcement learning},
+  author={Borkar, Vivek S and Meyn, Sean P},
+  journal={SIAM J. Control Optim.},
+  volume={38},
+  number={2},
+  pages={447--469},
+  year={2000},
+  publisher={SIAM}
+}
+@phdthesis{maei2011gradient,
+  title={Gradient temporal-difference learning algorithms},
+  author={Maei, Hamid Reza},
+  year={2011},
+  school={University of Alberta}
+}
+@phdthesis{baird1999reinforcement,
+  title={Reinforcement learning through gradient descent},
+  author={Baird III, Leemon C},
+  year={1999},
+  school={US Air Force Academy, US}
+}
+@PHDTHESIS{Driessens2004,
+  AUTHOR ="Kurt Driessens",
+  TITLE ="Relational Reinforcement Learning",
+  SCHOOL ="Catholic University of Leuven",
+  YEAR ="2004",
+}
+@article{tsitsiklis1996feature,
+  title={Feature-based methods for large scale dynamic programming},
+  author={Tsitsiklis, John N and Van Roy, Benjamin},
+  journal={Mach. Learn.},
+  volume={22},
+  number={1-3},
+  pages={59--94},
+  year={1996},
+  publisher={Springer}
+}
+@inproceedings{chen2009apply,
+  title={Apply ant colony optimization to Tetris},
+  author={Chen, X. and Wang, H. and Wang, W. and Shi, Y. and Gao, Y.},
+  booktitle={Proceedings of the 11th Annual Conference on Genetic and Evolutionary Computation (GECCO)},
+  pages={1741--1742},
+  year={2009},
+  organization={ACM}
+}
+@incollection{farias2006tetris,
+  title={Tetris: A study of randomized constraint sampling},
+  author={Farias, Vivek F and Van Roy, Benjamin},
+  booktitle={Probabilistic and Randomized Methods for Design Under Uncertainty},
+  pages={189--201},
+  year={2006},
+  publisher={Springer}
+}
+@article{bertsekas1996temporal,
+  title={Temporal differences-based policy iteration and applications in neuro-dynamic programming},
+  author={Bertsekas, Dimitri P and Ioffe, Sergey},
+  journal={Lab. for Info. and Decision Systems Report LIDS-P-2349, MIT, Cambridge, MA},
+  year={1996},
+  publisher={Citeseer}
+}
+@inproceedings{kakade2001natural,
+  title={A Natural Policy Gradient.},
+  author={Kakade, Sham},
+  booktitle={Advances in Neural Information Processing Systems},
+  publisher={Cambridge, MA: MIT Press},
+  volume={14},
+  pages={1531--1538},
+  year={2001}
+}
+@article{peters2008natural,
+  title={Natural actor-critic},
+  author={Peters, Jan and Schaal, Stefan},
+  journal={Neurocomputing},
+  volume={71},
+  number={7},
+  pages={1180--1190},
+  year={2008},
+  publisher={Elsevier}
+}
+@article{baxter2001infinite,
+  title={Infinite-horizon policy-gradient estimation},
+  author={Baxter, Jonathan and Bartlett, Peter L.},
+  journal={J. Artif. Intell. Res.},
+  pages={319--350},
+  year={2001}
+}
+@inproceedings{sutton1999policy,
+  title={Policy Gradient Methods for Reinforcement Learning with Function Approximation.},
+  author={Sutton, Richard S and McAllester, David A and Singh, Satinder P and Mansour, Yishay and others},
+  booktitle={Advances in Neural Information Processing Systems},
+  publisher={Cambridge, MA: MIT Press},
+  pages={1057--1063},
+  year={1999}
+}
+@inproceedings{bohm2005evolutionary,
+  title={An evolutionary approach to tetris},
+  author={B{\"o}hm, Niko and K{\'o}kai, Gabriella and Mandl, Stefan},
+  booktitle={Proc. 6th Metaheuristics Int. Conf.},
+  pages={137-148},
+  year={2005}
+}
+@article{szita2006learning,
+  title={Learning Tetris using the noisy cross-entropy method},
+  author={Szita, Istv{\'a}n and L{\"o}rincz, Andr{\'a}s},
+  journal={Neural Comput.},
+  volume={18},
+  number={12},
+  pages={2936--2941},
+  year={2006},
+  publisher={MIT Press}
+}
+@inproceedings{thiery2010least,
+  title={Least-Squares $\lambda$ Policy Iteration: Bias-Variance Trade-off in Control Problems},
+  author={Thiery, Christophe and Scherrer, Bruno},
+  booktitle={Proc. 27th Int. Conf. Mach. Learn.},
+  pages={1071--1078},
+  year={2010}
+}
+@inproceedings{gabillon2013approximate,
+  title={Approximate dynamic programming finally performs well in the game of Tetris},
+  author={Gabillon, Victor and Ghavamzadeh, Mohammad and Scherrer, Bruno},
+  booktitle={Advances in Neural Information Processing Systems},
+  publisher={Cambridge, MA: MIT Press},
+  pages={1754--1762},
+  year={2013}
+}
+@article{scherrer2013performance,
+  title={Performance bounds for $\lambda$ policy iteration and application to the game of Tetris},
+  author={Scherrer, Bruno},
+  journal={J. Mach. Learn. Res.},
+  volume={14},
+  number={1},
+  pages={1181--1227},
+  year={2013},
+  publisher={JMLR. org}
+}
+@article{thiery2009improvements,
+  title={Improvements on Learning Tetris with Cross Entropy},
+  author={Thiery, Christophe and Scherrer, Bruno},
+  journal={Int. Computer Games Assoc. J.},
+  volume={32},
+  number={1},
+  pages={23--33},
+  year={2009}
+}
+@article{scherrer2015approximate,
+  title={Approximate Modified Policy Iteration and its Application to the Game of Tetris},
+  author={Scherrer, Bruno and Ghavamzadeh, Mohammad and Gabillon, Victor and Lesner, Boris and Geist, Matthieu},
+  journal={J. Mach. Learn. Res.},
+  volume={16},
+  pages={1629--1676},
+  year={2015}
+}
+@article{efron2004least,
+  title={Least angle regression},
+  author={Efron, Bradley and Hastie, Trevor and Johnstone, Iain and Tibshirani, Robert and others},
+  journal={The Annals of statistics},
+  volume={32},
+  number={2},
+  pages={407--499},
+  year={2004},
+  publisher={Institute of Mathematical Statistics}
+}
+@MASTERSTHESIS{Brzustowski1992,
+  author ={John Brzustowski},
+  title ={Can you win at tetris?},
+  school = {University of British Columbia},
+  year ={1992}
+}
+@Article{Breukelaar04,
+  author =	 {Ron Breukelaar and Erik D. Demaine and Susan
+                  Hohenberger and Hendrik Jan Hoogeboom and Walter
+                  A. Kosters and David Liben-Nowell},
+  title =	 {Tetris is Hard, Even to Approximate},
+  journal =	 {International Journal of Computational Geometry and
+                  Applications},
+  year =	 {2004},
+  volume =	 {14},
+  number =	 {1--2},
+  pages =	 {41--68},
+  month =	 {April},
+}
+@book{Bertsekas1996,
+  author =	 {Bertsekas, D. and Tsitsiklis, J. N.},
+  title =	 {Neuro-Dynamic Programming},
+  year =	 {1996},
+  publisher =	 {Athena Scientific},
+}
+@inproceedings{maei2010gq,
+  title={GQ ($\lambda$): A general gradient algorithm for temporal-difference prediction learning with eligibility traces},
+  author={Maei, Hamid Reza and Sutton, Richard S},
+  booktitle={Proceedings of the Third Conference on Artificial General Intelligence},
+  volume={1},
+  pages={91--96},
+  year={2010}
+}
+@inproceedings{maei2010toward,
+  title={Toward off-policy learning control with function approximation},
+  author={Maei, Hamid R and Szepesv{\'a}ri, Csaba and Bhatnagar, Shalabh and Sutton, Richard S},
+  booktitle={Proc. 27th Int. Conf. Mach. Learn.},
+  pages={719--726},
+  year={2010}
+}
+@inproceedings{phua2007tracking,
+  title={Tracking value function dynamics to improve reinforcement learning with piecewise linear function approximation},
+  author={Phua, Chee Wee and Fitch, Robert},
+  booktitle={Proc. 24th Int. Conf. Mach. Learn.},
+  pages={751--758},
+  year={2007},
+  organization={ACM}
+}
+@inproceedings{szubert2014temporal,
+  title={Temporal difference learning of N-tuple networks for the game 2048},
+  author={Szubert, Marcin and Jaskowski, Wojciech},
+  booktitle={2014 IEEE Conference on Computational Intelligence and Games (CIG)},
+  pages={1--8},
+  year={2014},
+  organization={IEEE}
+}
+@article{chen2013online,
+  title={Online Selective Kernel-based Temporal Differece Learning},
+  author={Chen, Xingguo and Gao, Yang and Wang, Ruili},
+  journal={IEEE Trans. Neural Netw. Learn. Syst.},
+  year={2013},
+  volume={24},
+  number={12},
+  pages={1944--1956},
+  publisher={IEEE}
+}
+@article{xu2007kernel,
+  title={Kernel-based least squares policy iteration for reinforcement learning},
+  author={Xu, Xin and Hu, Dewen and Lu, Xicheng},
+  journal={IEEE Trans. Neural Netw.},
+  volume={18},
+  number={4},
+  pages={973--992},
+  year={2007},
+  publisher={IEEE}
+}
+@INPROCEEDINGS{Engel03bayesmeets,
+    author = {Yaakov Engel and Shie Mannor and Ron Meir},
+    title = {Bayes meets {B}ellman: the {G}aussian process approach to temporal difference learning},
+    booktitle = {Proc. 20th Int. Conf. Mach. Learn.},
+    year = {2003},
+    pages = {154--161},
+    address={Washington, DC},
+    month={Aug.},
+}
+@inproceedings{robards2011sparse,
+  title={Sparse Kernel-SARSA ($\lambda$) with an eligibility trace},
+  author={Robards, M. and Sunehag, P. and Sanner, S. and Marthi, B.},
+  booktitle = {Proc. 22nd Eur. Conf. Mach. Learn.},
+  pages={1--17},
+  year={2011},
+  month={Sept.},
+   address = {Athens, Greece},
+}
+@conference{reisinger2008online,
+  title={{Online kernel selection for {B}ayesian reinforcement learning}},
+  author={Reisinger, J. and Stone, P. and Miikkulainen, R.},
+  booktitle={Proc. 25th Int. Conf. Mach. Learn.},
+  pages={816--823},
+  year={2008},
+  month={July},
+  address={ Helsinki, Finland},
+}
+@book{Sutton1998,
+  title={{Reinforcement learning: an introduction}},
+  author={Sutton, R.S. and Barto, A.G.},
+  year={1998},
+  publisher={MIT Press},
+  address={Cambridge, MA}
+}
+@book{Sutton2018book,
+  author = {Sutton, Richard S. and Barto, Andrew G.},
+  edition = {Second},
+  publisher = {The MIT Press},
+  title = {Reinforcement Learning: An Introduction},
+  year = {2018 }
+}
+@phdthesis{Bradtke1994phd,
+  title={Incremental Dynamic Programming for On-line Adaptive Optimal Control},
+  author={Bradtke, Steven J},
+  year={1994},
+  school={University of  Massachusetts},
+  month={Sept.},
+  address={Amherst},
+}
+@inproceedings{baird1995residual,
+  title={Residual algorithms: Reinforcement learning with function approximation},
+  author={Baird, Leemon and others},
+  booktitle={Proc. 12th Int. Conf. Mach. Learn.},
+  pages={30--37},
+  year={1995}
+}
+@article{bradtke1996linear,
+  title={Linear least-squares algorithms for temporal difference learning},
+  author={Bradtke, S.J. and Barto, A.G.},
+  journal={Mach. Learn.},
+  volume={22},
+  number={1},
+  pages={33--57},
+  year={1996},
+  publisher={Springer}
+}
+@article{lagoudakis2003least,
+  title={Least-squares policy iteration},
+  author={Lagoudakis, M.G. and Parr, R.},
+  journal={J. Mach. Learn. Res.},
+  volume={4},
+  pages={1107--1149},
+  year={2003},
+  publisher={JMLR. org}
+}
+@article{boyan2002technical,
+  title={Technical update: Least-squares temporal difference learning},
+  author={Boyan, J.A.},
+  journal={Mach. Learn.},
+  volume={49},
+  number={2},
+  pages={233--246},
+  year={2002},
+  publisher={Springer}
+}
+@inproceedings{geramifard2006incremental,
+  title={Incremental least-squares temporal difference learning},
+  author={Geramifard, A. and Bowling, M. and Sutton, R.S.},
+  booktitle={Proc. 21st AAAI Conf. Artif. Intell.},
+  pages={356--361},
+  year={2006},
+  month={July},
+  address={Boston, Massachusetts},
+}
+@inproceedings{sutton2009fast,
+  title={Fast gradient-descent methods for temporal-difference learning with linear function approximation},
+  author={Sutton, R.S. and Maei, H.R. and Precup, D. and Bhatnagar, S. and Silver, D. and Szepesv{\'a}ri, C. and Wiewiora, E.},
+  booktitle={Proc. 26th Int. Conf. Mach. Learn.},
+  pages={993--1000},
+  year={2009}
+}
+@inproceedings{sutton2008convergent,
+  title={A Convergent $ O (n) $ Temporal-difference Algorithm for Off-policy Learning with Linear Function Approximation},
+  author={Sutton, Richard S and Maei, Hamid R and Szepesv{\'a}ri, Csaba},
+  booktitle={Advances in Neural Information Processing Systems},
+  publisher={Cambridge, MA: MIT Press},
+  pages={1609--1616},
+  year={2008}
+}
+@inproceedings{dabney2014natural,
+  title={Natural Temporal Difference Learning},
+  author={Dabney, William and Thomas, Philip},
+  booktitle={Twenty-Eighth AAAI Conference on Artificial Intelligence},
+  year={2014}
+}
+@inproceedings{mahmood2014weighted,
+  title={Weighted importance sampling for off-policy learning with linear function approximation},
+  author={Mahmood, A Rupam and van Hasselt, Hado P and Sutton, Richard S},
+  booktitle={Advances in Neural Information Processing Systems},
+  publisher={Cambridge, MA: MIT Press},
+  pages={3014--3022},
+  year={2014}
+}
+@inproceedings{seijen2014true,
+  title={True Online TD ($\lambda$)},
+  author={Seijen, Harm V and Sutton, Rich},
+  booktitle={Proc. 31st Int. Conf. Mach. Learn.},
+  pages={692--700},
+  year={2014}
+}
+@article{ormoneit2002kernel,
+  title={{Kernel-based reinforcement learning}},
+  author={Ormoneit, D. and Sen, {\'S}.},
+  journal={Mach. Learn.},
+  volume={49},
+  number={2-3},
+  pages={161--178},
+  issn={0885-6125},
+  year={2002},
+  publisher={Springer-Verlag },
+  address = {Hingham, MA, USA},
+}
+@inproceedings{Ghavamzadeh2010lstd,
+  author = {M. Ghavamzadeh and A. Lazaric and O. A. Maillard and R. Munos},
+  title = {{LSTD} with Random Projections},
+  BOOKTITLE={Advances in Neural Information Processing Systems},
+  publisher={Cambridge, MA: MIT Press},
+  volume = {23},
+  pages = {721--729},
+  Address = {Lake Tahoe, Nevada, USA},
+  year = {2010}
+}
+@inproceedings{loth2007sparse,
+  title={Sparse temporal difference learning using LASSO},
+  author={Loth, M. and Davy, M. and Preux, P.},
+  booktitle={Proc. IEEE Symp. Approx. Dynamic Program. Reinforce. Learn.},
+  pages={352--359},
+  year={2007},
+  organization={IEEE}
+}
+@inproceedings{kolter2009regularization,
+  title={Regularization and feature selection in least-squares temporal difference learning},
+  author={Kolter, J.Z. and Ng, A.Y.},
+  booktitle={Proc. 26th Int. Conf. Mach. Learn.},
+  pages={521--528},
+  year={2009},
+  organization={ACM}
+}
+@inproceedings{hoffman2011regularized,
+  title={Regularized least squares temporal difference learning with nested l2 and l1 penalization},
+  author={Hoffman, M.W. and Lazaric, A. and Ghavamzadeh, M. and Munos, R.},
+  booktitle={Proc. Eur. Workshop Reinforce. Learn.},
+  year={2011}
+}
+@inproceedings{Ghavamzadeh2011finite,
+  author = {M. Ghavamzadeh and A. Lazaric and R. Munos and M. Hoffman},
+  title = {Finite-Sample Analysis of {Lasso-TD}},
+  booktitle = {Proc. 28th Int. Conf. Mach. Learn.},
+  year = {2011},
+  month= {June},
+  address={Bellevue, Washington, USA},
+  pages={1177--1184},
+}
+@inproceedings{johnson2013accelerating,
+  title={Accelerating stochastic gradient descent using predictive variance reduction},
+  author={Johnson, R. and Zhang, T.},
+  booktitle={Advances in Neural Information Processing Systems},
+  pages={315--323},
+  year={2013}
+}
+@article{xu2020reanalysis,
+  title={Reanalysis of variance reduced temporal difference learning},
+  author={Xu, T. and Wang, Z. and Zhou, Y. and Liang, Y.},
+  journal={arXiv preprint arXiv:2001.01898},
+  year={2020}
+}
+@inproceedings{schulman2015trust,
+  title={Trust region policy optimization},
+  author={Schulman, J. and Levine, S. and Abbeel, P. and Jordan, M. and Moritz, P.},
+  booktitle={International Conference on Machine Learning},
+  pages={1889--1897},
+  year={2015}
+}
+@article{schulman2017proximal,
+  title={Proximal policy optimization algorithms},
+  author={Schulman, J. and Wolski, F. and Dhariwal, P. and Radford, A. and Klimov, O.},
+  journal={arXiv preprint arXiv:1707.06347},
+  year={2017}
+}
+@inproceedings{defazio2014saga,
+  title={SAGA: A fast incremental gradient method with support for non-strongly convex composite objectives},
+  author={Defazio, A. and Bach, F. and Lacoste-Julien, S.},
+  booktitle={Advances in Neural Information Processing Systems},
+  pages={1646--1654},
+  year={2014}
+}
+@inproceedings{du2017stochastic,
+  title={Stochastic variance reduction methods for policy evaluation},
+  author={Du, S. S. and Chen, J. and Li, L. and Xiao, L. and Zhou, D.},
+  booktitle={Proceedings of the 34th International Conference on Machine Learning},
+  pages={1049--1058},
+  year={2017}
+}
+@inproceedings{chen2023modified,
+  title={Modified Retrace for Off-Policy Temporal Difference Learning},
+  author={Chen, Xingguo and Ma, Xingzhou and Li, Yang and Yang, Guang and Yang, Shangdong and Gao, Yang},
+  booktitle={Uncertainty in Artificial Intelligence},
+  pages={303--312},
+  year={2023},
+  organization={PMLR}
+}
+@article{dalal2017finite,
+  title={Finite Sample Analyses for TD(0) with Function Approximation},
+  author={Dalal, Gal and Szörényi, Balázs and Thoppe, Gugan and Mannor, Shie},
+  journal={arXiv preprint arXiv:1704.01161},
+  year={2017}
+}
+@article{sutton1988learning,
+  title={Learning to predict by the methods of temporal differences},
+  author={Sutton, Richard S},
+  journal={Machine learning},
+  volume={3},
+  number={1},
+  pages={9--44},
+  year={1988},
+  publisher={Springer}
+}
+@inproceedings{tsitsiklis1997analysis,
+  title={Analysis of temporal-diffference learning with function approximation},
+  author={Tsitsiklis, John N and Van Roy, Benjamin},
+  booktitle={Advances in Neural Information Processing Systems},
+  pages={1075--1081},
+  year={1997}
+}
+@article{sutton2016emphatic,
+  title={An emphatic approach to the problem of off-policy temporal-difference learning},
+  author={Sutton, Richard S and Mahmood, A Rupam and White, Martha},
+  journal={The Journal of Machine Learning Research},
+  volume={17},
+  number={1},
+  pages={2603--2631},
+  year={2016},
+  publisher={JMLR. org}
+}
+@inproceedings{liu2015finite,
+  title={Finite-sample analysis of proximal gradient TD algorithms},
+  author={Liu, Bo and Liu, Ji and Ghavamzadeh, Mohammad and Mahadevan, Sridhar and Petrik, Marek},
+  booktitle={Proceedings of the 21st Conference on Uncertainty in Artificial Intelligence},
+  pages={504--513},
+  year={2015}
+}
+@inproceedings{liu2016proximal,
+  title={Proximal Gradient Temporal Difference Learning Algorithms.},
+  author={Liu, Bo and Liu, Ji and Ghavamzadeh, Mohammad and Mahadevan, Sridhar and Petrik, Marek},
+  booktitle={Proceedings of the International Joint Conference on Artificial Intelligence},
+  pages={4195--4199},
+  year={2016}
+}
+@article{liu2018proximal,
+  title={Proximal gradient temporal difference learning: Stable reinforcement learning with polynomial sample complexity},
+  author={Liu, Bo and Gemp, Ian and Ghavamzadeh, Mohammad and Liu, Ji and Mahadevan, Sridhar and Petrik, Marek},
+  journal={Journal of Artificial Intelligence Research},
+  volume={63},
+  pages={461--494},
+  year={2018}
+}
+@inproceedings{givchi2015quasi,
+  title={Quasi newton temporal difference learning},
+  author={Givchi, Arash and Palhang, Maziar},
+  booktitle={Asian Conference on Machine Learning},
+  pages={159--172},
+  year={2015}
+}
+@inproceedings{pan2017accelerated,
+  title={Accelerated gradient temporal difference learning},
+  author={Pan, Yangchen and White, Adam and White, Martha},
+  booktitle={Proceedings of the 21st AAAI Conference on Artificial Intelligence},
+  pages={2464--2470},
+  year={2017}
+}
+@inproceedings{hallak2016generalized,
+  title={Generalized emphatic temporal difference learning: bias-variance analysis},
+  author={Hallak, Assaf and Tamar, Aviv and Munos, Remi and Mannor, Shie},
+  booktitle={Proceedings of the 30th AAAI Conference on Artificial Intelligence},
+  pages={1631--1637},
+  year={2016}
+}
+@article{zhang2022truncated,
+  title={Truncated emphatic temporal difference methods for prediction and control},
+  author={Zhang, Shangtong and Whiteson, Shimon},
+  journal={The Journal of Machine Learning Research},
+  volume={23},
+  number={1},
+  pages={6859--6917},
+  year={2022},
+  publisher={JMLRORG}
+}
+@inproceedings{korda2015td,
+  title={On TD (0) with function approximation: Concentration bounds and a centered variant with exponential convergence},
+  author={Korda, Nathaniel and La, Prashanth},
+  booktitle={International conference on machine learning},
+  pages={626--634},
+  year={2015},
+  organization={PMLR}
+}
+@book{zhou2021machine,
+  title={Machine learning},
+  author={Zhou, Zhi-Hua},
+  year={2021},
+  publisher={Springer Nature}
+}
+@inproceedings{dalal2020tale,
+  title={A tale of two-timescale reinforcement learning with the tightest finite-time bound},
+  author={Dalal, Gal and Szorenyi, Balazs and Thoppe, Gugan},
+  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
+  volume={34},
+  number={04},
+  pages={3701-3708},
+  year={2020}
+}
+@inproceedings{feng2019kernel,
+  title={A kernel loss for solving the Bellman equation},
+  author={Feng, Yihao and Li, Lihong and Liu, Qiang},
+  booktitle={Advances in Neural Information Processing Systems},
+  pages={15430--15441},
+  year={2019}
+}
+@inproceedings{basserrano2021logistic,
+  title={Logistic Q-Learning},
+  author={Bas-Serrano, Joan and Curi, Sebastian and Krause, Andreas and Neu, Gergely},
+  booktitle={International Conference on Artificial Intelligence and Statistics},
+  pages={3610--3618},
+  year={2021}
+}
--- a/example_paper.blg
+++ b/example_paper.blg
+This is BibTeX, Version 0.99d (TeX Live 2023)
+Capacity: max_strings=200000, hash_size=200000, hash_prime=170003
+The top-level auxiliary file: example_paper.aux
+The style file: icml2024.bst
+Database file #1: example_paper.bib
+Warning--can't use both volume and number fields in dalal2020tale
+You've used 34 entries,
+            2773 wiz_defined-function locations,
+            790 strings with 10645 characters,
+and the built_in function-call counts, 19097 in all, are:
+= -- 1771
+> -- 980
+< -- 39
+ -- 332
+- -- 298
+* -- 1422
+:= -- 2705
+add.period$ -- 106
+call.type$ -- 34
+change.case$ -- 197
+chr.to.int$ -- 34
+cite$ -- 69
+duplicate$ -- 950
+empty$ -- 1596
+format.name$ -- 349
+if$ -- 4208
+int.to.chr$ -- 1
+int.to.str$ -- 1
+missing$ -- 34
+newline$ -- 178
+num.names$ -- 138
+pop$ -- 445
+preamble$ -- 1
+purify$ -- 166
+quote$ -- 0
+skip$ -- 806
+stack$ -- 0
+substring$ -- 965
+swap$ -- 257
+text.length$ -- 22
+text.prefix$ -- 0
+top$ -- 0
+type$ -- 364
+warning$ -- 1
+while$ -- 176
+width$ -- 0
+write$ -- 452
+(There was 1 warning)
--- a/example_paper.log
+++ b/example_paper.log
+This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) (preloaded format=pdflatex 2023.3.31)  29 JAN 2024 15:09
+entering extended mode
+ restricted \write18 enabled.
+ file:line:error style messages enabled.
+ %&-line parsing enabled.
+**example_paper
+(./example_paper.tex
+LaTeX2e <2022-11-01> patch level 1
+L3 programming layer <2023-02-22> (d:/software/texlive/2023/texmf-dist/tex/latex/base/article.cls
+Document Class: article 2022/07/02 v1.4n Standard LaTeX document class
+(d:/software/texlive/2023/texmf-dist/tex/latex/base/size10.clo
+File: size10.clo 2022/07/02 v1.4n Standard LaTeX file (size option)
+)
+\c@part=\count185
+\c@section=\count186
+\c@subsection=\count187
+\c@subsubsection=\count188
+\c@paragraph=\count189
+\c@subparagraph=\count190
+\c@figure=\count191
+\c@table=\count192
+\abovecaptionskip=\skip48
+\belowcaptionskip=\skip49
+\bibindent=\dimen140
+) (d:/software/texlive/2023/texmf-dist/tex/latex/microtype/microtype.sty
+Package: microtype 2023/03/13 v3.1a Micro-typographical refinements (RS)
+ (d:/software/texlive/2023/texmf-dist/tex/latex/graphics/keyval.sty
+Package: keyval 2022/05/29 v1.15 key=value parser (DPC)
+\KV@toks@=\toks16
+) (d:/software/texlive/2023/texmf-dist/tex/latex/etoolbox/etoolbox.sty
+Package: etoolbox 2020/10/05 v2.5k e-TeX tools for LaTeX (JAW)
+\etb@tempcnta=\count193
+)
+\MT@toks=\toks17
+\MT@tempbox=\box51
+\MT@count=\count194
+LaTeX Info: Redefining \noprotrusionifhmode on input line 1059.
+LaTeX Info: Redefining \leftprotrusion on input line 1060.
+\MT@prot@toks=\toks18
+LaTeX Info: Redefining \rightprotrusion on input line 1078.
+LaTeX Info: Redefining \textls on input line 1368.
+\MT@outer@kern=\dimen141
+LaTeX Info: Redefining \textmicrotypecontext on input line 1988.
+\MT@listname@count=\count195
+ (d:/software/texlive/2023/texmf-dist/tex/latex/microtype/microtype-pdftex.def
+File: microtype-pdftex.def 2023/03/13 v3.1a Definitions specific to pdftex (RS)
+LaTeX Info: Redefining \lsstyle on input line 902.
+LaTeX Info: Redefining \lslig on input line 902.
+\MT@outer@space=\skip50
+)
+Package microtype Info: Loading configuration file microtype.cfg.
+ (d:/software/texlive/2023/texmf-dist/tex/latex/microtype/microtype.cfg
+File: microtype.cfg 2023/03/13 v3.1a microtype main configuration file (RS)
+)) (d:/software/texlive/2023/texmf-dist/tex/latex/graphics/graphicx.sty
+Package: graphicx 2021/09/16 v1.2d Enhanced LaTeX Graphics (DPC,SPQR)
+ (d:/software/texlive/2023/texmf-dist/tex/latex/graphics/graphics.sty
+Package: graphics 2022/03/10 v1.4e Standard LaTeX Graphics (DPC,SPQR)
+ (d:/software/texlive/2023/texmf-dist/tex/latex/graphics/trig.sty
+Package: trig 2021/08/11 v1.11 sin cos tan (DPC)
+) (d:/software/texlive/2023/texmf-dist/tex/latex/graphics-cfg/graphics.cfg
+File: graphics.cfg 2016/06/04 v1.11 sample graphics configuration
+)
+Package graphics Info: Driver file: pdftex.def on input line 107.
+ (d:/software/texlive/2023/texmf-dist/tex/latex/graphics-def/pdftex.def
+File: pdftex.def 2022/09/22 v1.2b Graphics/color driver for pdftex
+))
+\Gin@req@height=\dimen142
+\Gin@req@width=\dimen143
+) (d:/software/texlive/2023/texmf-dist/tex/latex/subfigure/subfigure.sty
+Package: subfigure 2002/03/15 v2.1.5 subfigure package
+\subfigtopskip=\skip51
+\subfigcapskip=\skip52
+\subfigcaptopadj=\dimen144
+\subfigbottomskip=\skip53
+\subfigcapmargin=\dimen145
+\subfiglabelskip=\skip54
+\c@subfigure=\count196
+\c@subtable=\count197
+****************************************
+* Local config file subfigure.cfg used *
+****************************************
+(d:/software/texlive/2023/texmf-dist/tex/latex/subfigure/subfigure.cfg)
+\subfig@top=\skip55
+\subfig@bottom=\skip56
+) (d:/software/texlive/2023/texmf-dist/tex/latex/diagbox/diagbox.sty
+Package: diagbox 2020/02/09 v2.3 Making table heads with diagonal lines
+ (d:/software/texlive/2023/texmf-dist/tex/latex/pict2e/pict2e.sty
+Package: pict2e 2020/09/30 v0.4b Improved picture commands (HjG,RN,JT)
+ (d:/software/texlive/2023/texmf-dist/tex/latex/pict2e/pict2e.cfg
+File: pict2e.cfg 2016/02/05 v0.1u pict2e configuration for teTeX/TeXLive
+)
+Package pict2e Info: Driver file: pdftex.def on input line 112.
+Package pict2e Info: Driver file for pict2e: p2e-pdftex.def on input line 114.
+ (d:/software/texlive/2023/texmf-dist/tex/latex/pict2e/p2e-pdftex.def
+File: p2e-pdftex.def 2016/02/05 v0.1u Driver-dependant file (RN,HjG,JT)
+)
+\pIIe@GRAPH=\toks19
+\@arclen=\dimen146
+\@arcrad=\dimen147
+\pIIe@tempdima=\dimen148
+\pIIe@tempdimb=\dimen149
+\pIIe@tempdimc=\dimen150
+\pIIe@tempdimd=\dimen151
+\pIIe@tempdime=\dimen152
+\pIIe@tempdimf=\dimen153
+) (d:/software/texlive/2023/texmf-dist/tex/latex/tools/calc.sty
+Package: calc 2017/05/25 v4.3 Infix arithmetic (KKT,FJ)
+\calc@Acount=\count198
+\calc@Bcount=\count199
+\calc@Adimen=\dimen154
+\calc@Bdimen=\dimen155
+\calc@Askip=\skip57
+\calc@Bskip=\skip58
+LaTeX Info: Redefining \setlength on input line 80.
+LaTeX Info: Redefining \addtolength on input line 81.
+\calc@Ccount=\count266
+\calc@Cskip=\skip59
+) (d:/software/texlive/2023/texmf-dist/tex/latex/tools/array.sty
+Package: array 2022/09/04 v2.5g Tabular extension package (FMi)
+\col@sep=\dimen156
+\ar@mcellbox=\box52
+\extrarowheight=\dimen157
+\NC@list=\toks20
+\extratabsurround=\skip60
+\backup@length=\skip61
+\ar@cellbox=\box53
+)
+\diagbox@boxa=\box54
+\diagbox@boxb=\box55
+\diagbox@boxm=\box56
+\diagbox@wd=\dimen158
+\diagbox@ht=\dimen159
+\diagbox@insepl=\dimen160
+\diagbox@insepr=\dimen161
+\diagbox@outsepl=\dimen162
+\diagbox@outsepr=\dimen163
+) (d:/software/texlive/2023/texmf-dist/tex/latex/wrapfig/wrapfig.sty
+\wrapoverhang=\dimen164
+\WF@size=\dimen165
+\c@WF@wrappedlines=\count267
+\WF@box=\box57
+\WF@everypar=\toks21
+Package: wrapfig 2003/01/31  v 3.6
+) (d:/software/texlive/2023/texmf-dist/tex/latex/booktabs/booktabs.sty
+Package: booktabs 2020/01/12 v1.61803398 Publication quality tables
+\heavyrulewidth=\dimen166
+\lightrulewidth=\dimen167
+\cmidrulewidth=\dimen168
+\belowrulesep=\dimen169
+\belowbottomsep=\dimen170
+\aboverulesep=\dimen171
+\abovetopsep=\dimen172
+\cmidrulesep=\dimen173
+\cmidrulekern=\dimen174
+\defaultaddspace=\dimen175
+\@cmidla=\count268
+\@cmidlb=\count269
+\@aboverulesep=\dimen176
+\@belowrulesep=\dimen177
+\@thisruleclass=\count270
+\@lastruleclass=\count271
+\@thisrulewidth=\dimen178
+) (d:/software/texlive/2023/texmf-dist/tex/latex/hyperref/hyperref.sty
+Package: hyperref 2023-02-07 v7.00v Hypertext links for LaTeX
+ (d:/software/texlive/2023/texmf-dist/tex/generic/ltxcmds/ltxcmds.sty
+Package: ltxcmds 2020-05-10 v1.25 LaTeX kernel commands for general use (HO)
+) (d:/software/texlive/2023/texmf-dist/tex/generic/iftex/iftex.sty
+Package: iftex 2022/02/03 v1.0f TeX engine tests
+) (d:/software/texlive/2023/texmf-dist/tex/generic/pdftexcmds/pdftexcmds.sty
+Package: pdftexcmds 2020-06-27 v0.33 Utility functions of pdfTeX for LuaTeX (HO)
+ (d:/software/texlive/2023/texmf-dist/tex/generic/infwarerr/infwarerr.sty
+Package: infwarerr 2019/12/03 v1.5 Providing info/warning/error messages (HO)
+)
+Package pdftexcmds Info: \pdf@primitive is available.
+Package pdftexcmds Info: \pdf@ifprimitive is available.
+Package pdftexcmds Info: \pdfdraftmode found.
+) (d:/software/texlive/2023/texmf-dist/tex/latex/kvsetkeys/kvsetkeys.sty
+Package: kvsetkeys 2022-10-05 v1.19 Key value parser (HO)
+) (d:/software/texlive/2023/texmf-dist/tex/generic/kvdefinekeys/kvdefinekeys.sty
+Package: kvdefinekeys 2019-12-19 v1.6 Define keys (HO)
+) (d:/software/texlive/2023/texmf-dist/tex/generic/pdfescape/pdfescape.sty
+Package: pdfescape 2019/12/09 v1.15 Implements pdfTeX's escape features (HO)
+) (d:/software/texlive/2023/texmf-dist/tex/latex/hycolor/hycolor.sty
+Package: hycolor 2020-01-27 v1.10 Color options for hyperref/bookmark (HO)
+) (d:/software/texlive/2023/texmf-dist/tex/latex/letltxmacro/letltxmacro.sty
+Package: letltxmacro 2019/12/03 v1.6 Let assignment for LaTeX macros (HO)
+) (d:/software/texlive/2023/texmf-dist/tex/latex/auxhook/auxhook.sty
+Package: auxhook 2019-12-17 v1.6 Hooks for auxiliary files (HO)
+) (d:/software/texlive/2023/texmf-dist/tex/latex/hyperref/nameref.sty
+Package: nameref 2022-05-17 v2.50 Cross-referencing by name of section
+ (d:/software/texlive/2023/texmf-dist/tex/latex/refcount/refcount.sty
+Package: refcount 2019/12/15 v3.6 Data extraction from label references (HO)
+) (d:/software/texlive/2023/texmf-dist/tex/generic/gettitlestring/gettitlestring.sty
+Package: gettitlestring 2019/12/15 v1.6 Cleanup title references (HO)
+ (d:/software/texlive/2023/texmf-dist/tex/latex/kvoptions/kvoptions.sty
+Package: kvoptions 2022-06-15 v3.15 Key value format for package options (HO)
+))
+\c@section@level=\count272
+)
+\@linkdim=\dimen179
+\Hy@linkcounter=\count273
+\Hy@pagecounter=\count274
+ (d:/software/texlive/2023/texmf-dist/tex/latex/hyperref/pd1enc.def
+File: pd1enc.def 2023-02-07 v7.00v Hyperref: PDFDocEncoding definition (HO)
+Now handling font encoding PD1 ...
+... no UTF-8 mapping file for font encoding PD1
+) (d:/software/texlive/2023/texmf-dist/tex/generic/intcalc/intcalc.sty
+Package: intcalc 2019/12/15 v1.3 Expandable calculations with integers (HO)
+) (d:/software/texlive/2023/texmf-dist/tex/generic/etexcmds/etexcmds.sty
+Package: etexcmds 2019/12/15 v1.7 Avoid name clashes with e-TeX commands (HO)
+)
+\Hy@SavedSpaceFactor=\count275
+ (d:/software/texlive/2023/texmf-dist/tex/latex/hyperref/puenc.def
+File: puenc.def 2023-02-07 v7.00v Hyperref: PDF Unicode definition (HO)
+Now handling font encoding PU ...
+... no UTF-8 mapping file for font encoding PU
+)
+Package hyperref Info: Hyper figures OFF on input line 4177.
+Package hyperref Info: Link nesting OFF on input line 4182.
+Package hyperref Info: Hyper index ON on input line 4185.
+Package hyperref Info: Plain pages OFF on input line 4192.
+Package hyperref Info: Backreferencing OFF on input line 4197.
+Package hyperref Info: Implicit mode ON; LaTeX internals redefined.
+Package hyperref Info: Bookmarks ON on input line 4425.
+\c@Hy@tempcnt=\count276
+ (d:/software/texlive/2023/texmf-dist/tex/latex/url/url.sty
+\Urlmuskip=\muskip16
+Package: url 2013/09/16  ver 3.4  Verb mode for urls, etc.
+)
+LaTeX Info: Redefining \url on input line 4763.
+\XeTeXLinkMargin=\dimen180
+ (d:/software/texlive/2023/texmf-dist/tex/generic/bitset/bitset.sty
+Package: bitset 2019/12/09 v1.3 Handle bit-vector datatype (HO)
+ (d:/software/texlive/2023/texmf-dist/tex/generic/bigintcalc/bigintcalc.sty
+Package: bigintcalc 2019/12/15 v1.5 Expandable calculations on big integers (HO)
+))
+\Fld@menulength=\count277
+\Field@Width=\dimen181
+\Fld@charsize=\dimen182
+Package hyperref Info: Hyper figures OFF on input line 6042.
+Package hyperref Info: Link nesting OFF on input line 6047.
+Package hyperref Info: Hyper index ON on input line 6050.
+Package hyperref Info: backreferencing OFF on input line 6057.
+Package hyperref Info: Link coloring OFF on input line 6062.
+Package hyperref Info: Link coloring with OCG OFF on input line 6067.
+Package hyperref Info: PDF/A mode OFF on input line 6072.
+ (d:/software/texlive/2023/texmf-dist/tex/latex/base/atbegshi-ltx.sty
+Package: atbegshi-ltx 2021/01/10 v1.0c Emulation of the original atbegshi
+package with kernel methods
+)
+\Hy@abspage=\count278
+\c@Item=\count279
+\c@Hfootnote=\count280
+)
+Package hyperref Info: Driver (autodetected): hpdftex.
+ (d:/software/texlive/2023/texmf-dist/tex/latex/hyperref/hpdftex.def
+File: hpdftex.def 2023-02-07 v7.00v Hyperref driver for pdfTeX
+ (d:/software/texlive/2023/texmf-dist/tex/latex/base/atveryend-ltx.sty
+Package: atveryend-ltx 2020/08/19 v1.0a Emulation of the original atveryend package
+with kernel methods
+)
+\Fld@listcount=\count281
+\c@bookmark@seq@number=\count282
+ (d:/software/texlive/2023/texmf-dist/tex/latex/rerunfilecheck/rerunfilecheck.sty
+Package: rerunfilecheck 2022-07-10 v1.10 Rerun checks for auxiliary files (HO)
+ (d:/software/texlive/2023/texmf-dist/tex/generic/uniquecounter/uniquecounter.sty
+Package: uniquecounter 2019/12/15 v1.4 Provide unlimited unique counter (HO)
+)
+Package uniquecounter Info: New unique counter `rerunfilecheck' on input line 285.
+)
+\Hy@SectionHShift=\skip62
+) (./icml2024.sty
+Package: icml2024 2023/11/23 v2.0 ICML Conference Style File
+ (d:/software/texlive/2023/texmf-dist/tex/latex/psnfss/times.sty
+Package: times 2020/03/25 PSNFSS-v9.3 (SPQR) 
+) (./fancyhdr.sty
+\fancy@headwidth=\skip63
+\f@ncyO@elh=\skip64
+\f@ncyO@erh=\skip65
+\f@ncyO@olh=\skip66
+\f@ncyO@orh=\skip67
+\f@ncyO@elf=\skip68
+\f@ncyO@erf=\skip69
+\f@ncyO@olf=\skip70
+\f@ncyO@orf=\skip71
+) (d:/software/texlive/2023/texmf-dist/tex/latex/xcolor/xcolor.sty
+Package: xcolor 2022/06/12 v2.14 LaTeX color extensions (UK)
+ (d:/software/texlive/2023/texmf-dist/tex/latex/graphics-cfg/color.cfg
+File: color.cfg 2016/01/02 v1.6 sample color configuration
+)
+Package xcolor Info: Driver file: pdftex.def on input line 227.
+ (d:/software/texlive/2023/texmf-dist/tex/latex/graphics/mathcolor.ltx)
+Package xcolor Info: Model `cmy' substituted by `cmy0' on input line 1353.
+Package xcolor Info: Model `hsb' substituted by `rgb' on input line 1357.
+Package xcolor Info: Model `RGB' extended on input line 1369.
+Package xcolor Info: Model `HTML' substituted by `rgb' on input line 1371.
+Package xcolor Info: Model `Hsb' substituted by `hsb' on input line 1372.
+Package xcolor Info: Model `tHsb' substituted by `hsb' on input line 1373.
+Package xcolor Info: Model `HSB' substituted by `hsb' on input line 1374.
+Package xcolor Info: Model `Gray' substituted by `gray' on input line 1375.
+Package xcolor Info: Model `wave' substituted by `hsb' on input line 1376.
+) (./algorithm.sty
+Package: algorithm 
+Document Style `algorithm' - floating environment
+(d:/software/texlive/2023/texmf-dist/tex/latex/float/float.sty
+Package: float 2001/11/08 v1.3d Float enhancements (AL)
+\c@float@type=\count283
+\float@exts=\toks22
+\float@box=\box58
+\@float@everytoks=\toks23
+\@floatcapt=\box59
+) (d:/software/texlive/2023/texmf-dist/tex/latex/base/ifthen.sty
+Package: ifthen 2022/04/13 v1.1d Standard LaTeX ifthen package (DPC)
+)
+\@float@every@algorithm=\toks24
+\c@algorithm=\count284
+) (./algorithmic.sty
+Package: algorithmic 
+Document Style `algorithmic' - environment
+\c@ALC@line=\count285
+\c@ALC@rem=\count286
+\ALC@tlm=\skip72
+) (d:/software/texlive/2023/texmf-dist/tex/latex/natbib/natbib.sty
+Package: natbib 2010/09/13 8.31b (PWD, AO)
+\bibhang=\skip73
+\bibsep=\skip74
+LaTeX Info: Redefining \cite on input line 694.
+\c@NAT@ctr=\count287
+) (d:/software/texlive/2023/texmf-dist/tex/latex/eso-pic/eso-pic.sty
+Package: eso-pic 2020/10/14 v3.0a eso-pic (RN)
+\ESO@tempdima=\dimen183
+\ESO@tempdimb=\dimen184
+) (d:/software/texlive/2023/texmf-dist/tex/latex/forloop/forloop.sty
+Package: forloop 2006/09/18 v3.0 For Loops for LaTeX
+)
+Package hyperref Info: Option `colorlinks' set `true' on input line 151.
+\titrun=\box60
+\c@@affiliationcounter=\count288
+\c@@affilnum=\count289
+\newcaptionbox=\box61
+\newcaptionboxwid=\dimen185
+\icmlrulerbox=\box62
+\icmlrulercount=\count290
+\icmlruleroffset=\dimen186
+\cv@lineheight=\dimen187
+\cv@boxheight=\dimen188
+\cv@tmpbox=\box63
+\cv@refno=\count291
+\cv@tot=\count292
+\cv@tmpc@=\count293
+\cv@tmpc=\count294
+) (d:/software/texlive/2023/texmf-dist/tex/latex/amsmath/amsmath.sty
+Package: amsmath 2022/04/08 v2.17n AMS math features
+\@mathmargin=\skip75
+For additional information on amsmath, use the `?' option.
+(d:/software/texlive/2023/texmf-dist/tex/latex/amsmath/amstext.sty
+Package: amstext 2021/08/26 v2.01 AMS text
+ (d:/software/texlive/2023/texmf-dist/tex/latex/amsmath/amsgen.sty
+File: amsgen.sty 1999/11/30 v2.0 generic functions
+\@emptytoks=\toks25
+\ex@=\dimen189
+)) (d:/software/texlive/2023/texmf-dist/tex/latex/amsmath/amsbsy.sty
+Package: amsbsy 1999/11/29 v1.2d Bold Symbols
+\pmbraise@=\dimen190
+) (d:/software/texlive/2023/texmf-dist/tex/latex/amsmath/amsopn.sty
+Package: amsopn 2022/04/08 v2.04 operator names
+)
+\inf@bad=\count295
+LaTeX Info: Redefining \frac on input line 234.
+\uproot@=\count296
+\leftroot@=\count297
+LaTeX Info: Redefining \overline on input line 399.
+LaTeX Info: Redefining \colon on input line 410.
+\classnum@=\count298
+\DOTSCASE@=\count299
+LaTeX Info: Redefining \ldots on input line 496.
+LaTeX Info: Redefining \dots on input line 499.
+LaTeX Info: Redefining \cdots on input line 620.
+\Mathstrutbox@=\box64
+\strutbox@=\box65
+LaTeX Info: Redefining \big on input line 722.
+LaTeX Info: Redefining \Big on input line 723.
+LaTeX Info: Redefining \bigg on input line 724.
+LaTeX Info: Redefining \Bigg on input line 725.
+\big@size=\dimen191
+LaTeX Font Info:    Redeclaring font encoding OML on input line 743.
+LaTeX Font Info:    Redeclaring font encoding OMS on input line 744.
+\macc@depth=\count300
+LaTeX Info: Redefining \bmod on input line 905.
+LaTeX Info: Redefining \pmod on input line 910.
+LaTeX Info: Redefining \smash on input line 940.
+LaTeX Info: Redefining \relbar on input line 970.
+LaTeX Info: Redefining \Relbar on input line 971.
+\c@MaxMatrixCols=\count301
+\dotsspace@=\muskip17
+\c@parentequation=\count302
+\dspbrk@lvl=\count303
+\tag@help=\toks26
+\row@=\count304
+\column@=\count305
+\maxfields@=\count306
+\andhelp@=\toks27
+\eqnshift@=\dimen192
+\alignsep@=\dimen193
+\tagshift@=\dimen194
+\tagwidth@=\dimen195
+\totwidth@=\dimen196
+\lineht@=\dimen197
+\@envbody=\toks28
+\multlinegap=\skip76
+\multlinetaggap=\skip77
+\mathdisplay@stack=\toks29
+LaTeX Info: Redefining \[ on input line 2953.
+LaTeX Info: Redefining \] on input line 2954.
+) (d:/software/texlive/2023/texmf-dist/tex/latex/amsfonts/amssymb.sty
+Package: amssymb 2013/01/14 v3.01 AMS font symbols
+ (d:/software/texlive/2023/texmf-dist/tex/latex/amsfonts/amsfonts.sty
+Package: amsfonts 2013/01/14 v3.01 Basic AMSFonts support
+\symAMSa=\mathgroup4
+\symAMSb=\mathgroup5
+LaTeX Font Info:    Redeclaring math symbol \hbar on input line 98.
+LaTeX Font Info:    Overwriting math alphabet `\mathfrak' in version `bold'
+(Font)                  U/euf/m/n --> U/euf/b/n on input line 106.
+)) (d:/software/texlive/2023/texmf-dist/tex/latex/mathtools/mathtools.sty
+Package: mathtools 2022/06/29 v1.29 mathematical typesetting tools
+ (d:/software/texlive/2023/texmf-dist/tex/latex/mathtools/mhsetup.sty
+Package: mhsetup 2021/03/18 v1.4 programming setup (MH)
+)
+\g_MT_multlinerow_int=\count307
+\l_MT_multwidth_dim=\dimen198
+\origjot=\skip78
+\l_MT_shortvdotswithinadjustabove_dim=\dimen199
+\l_MT_shortvdotswithinadjustbelow_dim=\dimen256
+\l_MT_above_intertext_sep=\dimen257
+\l_MT_below_intertext_sep=\dimen258
+\l_MT_above_shortintertext_sep=\dimen259
+\l_MT_below_shortintertext_sep=\dimen260
+\xmathstrut@box=\box66
+\xmathstrut@dim=\dimen261
+) (d:/software/texlive/2023/texmf-dist/tex/latex/amscls/amsthm.sty
+Package: amsthm 2020/05/29 v2.20.6
+\thm@style=\toks30
+\thm@bodyfont=\toks31
+\thm@headfont=\toks32
+\thm@notefont=\toks33
+\thm@headpunct=\toks34
+\thm@preskip=\skip79
+\thm@postskip=\skip80
+\thm@headsep=\skip81
+\dth@everypar=\toks35
+) (d:/software/texlive/2023/texmf-dist/tex/latex/cleveref/cleveref.sty
+Package: cleveref 2018/03/27 v0.21.4 Intelligent cross-referencing
+Package cleveref Info: `hyperref' support loaded on input line 2370.
+LaTeX Info: Redefining \cref on input line 2370.
+LaTeX Info: Redefining \Cref on input line 2370.
+LaTeX Info: Redefining \crefrange on input line 2370.
+LaTeX Info: Redefining \Crefrange on input line 2370.
+LaTeX Info: Redefining \cpageref on input line 2370.
+LaTeX Info: Redefining \Cpageref on input line 2370.
+LaTeX Info: Redefining \cpagerefrange on input line 2370.
+LaTeX Info: Redefining \Cpagerefrange on input line 2370.
+LaTeX Info: Redefining \labelcref on input line 2370.
+LaTeX Info: Redefining \labelcpageref on input line 2370.
+Package cleveref Info: `amsthm' support loaded on input line 3026.
+Package cleveref Info: always capitalise cross-reference names on input line 7830.
+Package cleveref Info: always capitalise cross-reference names on input line 7852.
+Package cleveref Info: no abbreviation of names on input line 7852.
+)
+\c@theorem=\count308
+ (d:/software/texlive/2023/texmf-dist/tex/latex/todonotes/todonotes.sty
+Package: todonotes 2023/01/31 v1.1.6 Todonotes source and documentation.
+Package: todonotes 2023/01/31
+ (d:/software/texlive/2023/texmf-dist/tex/latex/xkeyval/xkeyval.sty
+Package: xkeyval 2022/06/16 v2.9 package option processing (HA)
+ (d:/software/texlive/2023/texmf-dist/tex/generic/xkeyval/xkeyval.tex (d:/software/texlive/2023/texmf-dist/tex/generic/xkeyval/xkvutils.tex
+\XKV@toks=\toks36
+\XKV@tempa@toks=\toks37
+)
+\XKV@depth=\count309
+File: xkeyval.tex 2014/12/03 v2.7a key=value parser (HA)
+)) (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/frontendlayer/tikz.sty (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/basiclayer/pgf.sty (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/utilities/pgfrcs.sty (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/utilities/pgfutil-common.tex
+\pgfutil@everybye=\toks38
+\pgfutil@tempdima=\dimen262
+\pgfutil@tempdimb=\dimen263
+) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/utilities/pgfutil-latex.def
+\pgfutil@abb=\box67
+) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/utilities/pgfrcs.code.tex (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/pgf.revision.tex)
+Package: pgfrcs 2023-01-15 v3.1.10 (3.1.10)
+))
+Package: pgf 2023-01-15 v3.1.10 (3.1.10)
+ (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/basiclayer/pgfcore.sty (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/systemlayer/pgfsys.sty (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/systemlayer/pgfsys.code.tex
+Package: pgfsys 2023-01-15 v3.1.10 (3.1.10)
+ (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/utilities/pgfkeys.code.tex
+\pgfkeys@pathtoks=\toks39
+\pgfkeys@temptoks=\toks40
+ (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/utilities/pgfkeyslibraryfiltered.code.tex
+\pgfkeys@tmptoks=\toks41
+))
+\pgf@x=\dimen264
+\pgf@y=\dimen265
+\pgf@xa=\dimen266
+\pgf@ya=\dimen267
+\pgf@xb=\dimen268
+\pgf@yb=\dimen269
+\pgf@xc=\dimen270
+\pgf@yc=\dimen271
+\pgf@xd=\dimen272
+\pgf@yd=\dimen273
+\w@pgf@writea=\write3
+\r@pgf@reada=\read2
+\c@pgf@counta=\count310
+\c@pgf@countb=\count311
+\c@pgf@countc=\count312
+\c@pgf@countd=\count313
+\t@pgf@toka=\toks42
+\t@pgf@tokb=\toks43
+\t@pgf@tokc=\toks44
+\pgf@sys@id@count=\count314
+ (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/systemlayer/pgf.cfg
+File: pgf.cfg 2023-01-15 v3.1.10 (3.1.10)
+)
+Driver file for pgf: pgfsys-pdftex.def
+ (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-pdftex.def
+File: pgfsys-pdftex.def 2023-01-15 v3.1.10 (3.1.10)
+ (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-common-pdf.def
+File: pgfsys-common-pdf.def 2023-01-15 v3.1.10 (3.1.10)
+))) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/systemlayer/pgfsyssoftpath.code.tex
+File: pgfsyssoftpath.code.tex 2023-01-15 v3.1.10 (3.1.10)
+\pgfsyssoftpath@smallbuffer@items=\count315
+\pgfsyssoftpath@bigbuffer@items=\count316
+) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/systemlayer/pgfsysprotocol.code.tex
+File: pgfsysprotocol.code.tex 2023-01-15 v3.1.10 (3.1.10)
+)) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcore.code.tex
+Package: pgfcore 2023-01-15 v3.1.10 (3.1.10)
+ (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathutil.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathparser.code.tex
+\pgfmath@dimen=\dimen274
+\pgfmath@count=\count317
+\pgfmath@box=\box68
+\pgfmath@toks=\toks45
+\pgfmath@stack@operand=\toks46
+\pgfmath@stack@operation=\toks47
+) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.basic.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.trigonometric.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.random.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.comparison.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.base.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.round.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.misc.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.integerarithmetics.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathcalc.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfloat.code.tex
+\c@pgfmathroundto@lastzeros=\count318
+)) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfint.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepoints.code.tex
+File: pgfcorepoints.code.tex 2023-01-15 v3.1.10 (3.1.10)
+\pgf@picminx=\dimen275
+\pgf@picmaxx=\dimen276
+\pgf@picminy=\dimen277
+\pgf@picmaxy=\dimen278
+\pgf@pathminx=\dimen279
+\pgf@pathmaxx=\dimen280
+\pgf@pathminy=\dimen281
+\pgf@pathmaxy=\dimen282
+\pgf@xx=\dimen283
+\pgf@xy=\dimen284
+\pgf@yx=\dimen285
+\pgf@yy=\dimen286
+\pgf@zx=\dimen287
+\pgf@zy=\dimen288
+) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathconstruct.code.tex
+File: pgfcorepathconstruct.code.tex 2023-01-15 v3.1.10 (3.1.10)
+\pgf@path@lastx=\dimen289
+\pgf@path@lasty=\dimen290
+) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathusage.code.tex
+File: pgfcorepathusage.code.tex 2023-01-15 v3.1.10 (3.1.10)
+\pgf@shorten@end@additional=\dimen291
+\pgf@shorten@start@additional=\dimen292
+) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorescopes.code.tex
+File: pgfcorescopes.code.tex 2023-01-15 v3.1.10 (3.1.10)
+\pgfpic=\box69
+\pgf@hbox=\box70
+\pgf@layerbox@main=\box71
+\pgf@picture@serial@count=\count319
+) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcoregraphicstate.code.tex
+File: pgfcoregraphicstate.code.tex 2023-01-15 v3.1.10 (3.1.10)
+\pgflinewidth=\dimen293
+) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretransformations.code.tex
+File: pgfcoretransformations.code.tex 2023-01-15 v3.1.10 (3.1.10)
+\pgf@pt@x=\dimen294
+\pgf@pt@y=\dimen295
+\pgf@pt@temp=\dimen296
+) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorequick.code.tex
+File: pgfcorequick.code.tex 2023-01-15 v3.1.10 (3.1.10)
+) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreobjects.code.tex
+File: pgfcoreobjects.code.tex 2023-01-15 v3.1.10 (3.1.10)
+) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathprocessing.code.tex
+File: pgfcorepathprocessing.code.tex 2023-01-15 v3.1.10 (3.1.10)
+) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorearrows.code.tex
+File: pgfcorearrows.code.tex 2023-01-15 v3.1.10 (3.1.10)
+\pgfarrowsep=\dimen297
+) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreshade.code.tex
+File: pgfcoreshade.code.tex 2023-01-15 v3.1.10 (3.1.10)
+\pgf@max=\dimen298
+\pgf@sys@shading@range@num=\count320
+\pgf@shadingcount=\count321
+) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreimage.code.tex
+File: pgfcoreimage.code.tex 2023-01-15 v3.1.10 (3.1.10)
+) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreexternal.code.tex
+File: pgfcoreexternal.code.tex 2023-01-15 v3.1.10 (3.1.10)
+\pgfexternal@startupbox=\box72
+) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorelayers.code.tex
+File: pgfcorelayers.code.tex 2023-01-15 v3.1.10 (3.1.10)
+) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretransparency.code.tex
+File: pgfcoretransparency.code.tex 2023-01-15 v3.1.10 (3.1.10)
+) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepatterns.code.tex
+File: pgfcorepatterns.code.tex 2023-01-15 v3.1.10 (3.1.10)
+) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorerdf.code.tex
+File: pgfcorerdf.code.tex 2023-01-15 v3.1.10 (3.1.10)
+))) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/modules/pgfmoduleshapes.code.tex
+File: pgfmoduleshapes.code.tex 2023-01-15 v3.1.10 (3.1.10)
+\pgfnodeparttextbox=\box73
+) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/modules/pgfmoduleplot.code.tex
+File: pgfmoduleplot.code.tex 2023-01-15 v3.1.10 (3.1.10)
+) (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version-0-65.sty
+Package: pgfcomp-version-0-65 2023-01-15 v3.1.10 (3.1.10)
+\pgf@nodesepstart=\dimen299
+\pgf@nodesepend=\dimen300
+) (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version-1-18.sty
+Package: pgfcomp-version-1-18 2023-01-15 v3.1.10 (3.1.10)
+)) (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/utilities/pgffor.sty (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/utilities/pgfkeys.sty (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/utilities/pgfkeys.code.tex)) (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/math/pgfmath.sty (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex)) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/utilities/pgffor.code.tex
+Package: pgffor 2023-01-15 v3.1.10 (3.1.10)
+\pgffor@iter=\dimen301
+\pgffor@skip=\dimen302
+\pgffor@stack=\toks48
+\pgffor@toks=\toks49
+)) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/frontendlayer/tikz/tikz.code.tex
+Package: tikz 2023-01-15 v3.1.10 (3.1.10)
+ (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/libraries/pgflibraryplothandlers.code.tex
+File: pgflibraryplothandlers.code.tex 2023-01-15 v3.1.10 (3.1.10)
+\pgf@plot@mark@count=\count322
+\pgfplotmarksize=\dimen303
+)
+\tikz@lastx=\dimen304
+\tikz@lasty=\dimen305
+\tikz@lastxsaved=\dimen306
+\tikz@lastysaved=\dimen307
+\tikz@lastmovetox=\dimen308
+\tikz@lastmovetoy=\dimen309
+\tikzleveldistance=\dimen310
+\tikzsiblingdistance=\dimen311
+\tikz@figbox=\box74
+\tikz@figbox@bg=\box75
+\tikz@tempbox=\box76
+\tikz@tempbox@bg=\box77
+\tikztreelevel=\count323
+\tikznumberofchildren=\count324
+\tikznumberofcurrentchild=\count325
+\tikz@fig@count=\count326
+ (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/modules/pgfmodulematrix.code.tex
+File: pgfmodulematrix.code.tex 2023-01-15 v3.1.10 (3.1.10)
+\pgfmatrixcurrentrow=\count327
+\pgfmatrixcurrentcolumn=\count328
+\pgf@matrix@numberofcolumns=\count329
+)
+\tikz@expandcount=\count330
+ (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/frontendlayer/tikz/libraries/tikzlibrarytopaths.code.tex
+File: tikzlibrarytopaths.code.tex 2023-01-15 v3.1.10 (3.1.10)
+))) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/frontendlayer/tikz/libraries/tikzlibrarypositioning.code.tex
+File: tikzlibrarypositioning.code.tex 2023-01-15 v3.1.10 (3.1.10)
+)
+\c@@todonotes@numberoftodonotes=\count331
+)
+LaTeX Font Info:    Trying to load font information for OT1+ptm on input line 62.
+ (d:/software/texlive/2023/texmf-dist/tex/latex/psnfss/ot1ptm.fd
+File: ot1ptm.fd 2001/06/04 font definitions for OT1/ptm.
+) (d:/software/texlive/2023/texmf-dist/tex/latex/l3backend/l3backend-pdftex.def
+File: l3backend-pdftex.def 2023-01-16 L3 backend support: PDF output (pdfTeX)
+\l__color_backend_stack_int=\count332
+\l__pdf_internal_box=\box78
+) (./example_paper.aux)
+\openout1 = `example_paper.aux'.
+LaTeX Font Info:    Checking defaults for OML/cmm/m/it on input line 62.
+LaTeX Font Info:    ... okay on input line 62.
+LaTeX Font Info:    Checking defaults for OMS/cmsy/m/n on input line 62.
+LaTeX Font Info:    ... okay on input line 62.
+LaTeX Font Info:    Checking defaults for OT1/cmr/m/n on input line 62.
+LaTeX Font Info:    ... okay on input line 62.
+LaTeX Font Info:    Checking defaults for T1/cmr/m/n on input line 62.
+LaTeX Font Info:    ... okay on input line 62.
+LaTeX Font Info:    Checking defaults for TS1/cmr/m/n on input line 62.
+LaTeX Font Info:    ... okay on input line 62.
+LaTeX Font Info:    Checking defaults for OMX/cmex/m/n on input line 62.
+LaTeX Font Info:    ... okay on input line 62.
+LaTeX Font Info:    Checking defaults for U/cmr/m/n on input line 62.
+LaTeX Font Info:    ... okay on input line 62.
+LaTeX Font Info:    Checking defaults for PD1/pdf/m/n on input line 62.
+LaTeX Font Info:    ... okay on input line 62.
+LaTeX Font Info:    Checking defaults for PU/pdf/m/n on input line 62.
+LaTeX Font Info:    ... okay on input line 62.
+LaTeX Info: Redefining \microtypecontext on input line 62.
+Package microtype Info: Applying patch `item' on input line 62.
+Package microtype Info: Applying patch `toc' on input line 62.
+Package microtype Info: Applying patch `eqnum' on input line 62.
+Package microtype Info: Applying patch `footnote' on input line 62.
+Package microtype Info: Applying patch `verbatim' on input line 62.
+Package microtype Info: Generating PDF output.
+Package microtype Info: Character protrusion enabled (level 2).
+Package microtype Info: Using default protrusion set `alltext'.
+Package microtype Info: Automatic font expansion enabled (level 2),
+(microtype)             stretch: 20, shrink: 20, step: 1, non-selected.
+Package microtype Info: Using default expansion set `alltext-nott'.
+LaTeX Info: Redefining \showhyphens on input line 62.
+Package microtype Info: No adjustment of tracking.
+Package microtype Info: No adjustment of interword spacing.
+Package microtype Info: No adjustment of character kerning.
+ (d:/software/texlive/2023/texmf-dist/tex/latex/microtype/mt-ptm.cfg
+File: mt-ptm.cfg 2006/04/20 v1.7 microtype config. file: Times (RS)
+) (d:/software/texlive/2023/texmf-dist/tex/context/base/mkii/supp-pdf.mkii
+[Loading MPS to PDF converter (version 2006.09.02).]
+\scratchcounter=\count333
+\scratchdimen=\dimen312
+\scratchbox=\box79
+\nofMPsegments=\count334
+\nofMParguments=\count335
+\everyMPshowfont=\toks50
+\MPscratchCnt=\count336
+\MPscratchDim=\dimen313
+\MPnumerator=\count337
+\makeMPintoPDFobject=\count338
+\everyMPtoPDFconversion=\toks51
+) (d:/software/texlive/2023/texmf-dist/tex/latex/epstopdf-pkg/epstopdf-base.sty
+Package: epstopdf-base 2020-01-24 v2.11 Base part for package epstopdf
+Package epstopdf-base Info: Redefining graphics rule for `.eps' on input line 485.
+ (d:/software/texlive/2023/texmf-dist/tex/latex/latexconfig/epstopdf-sys.cfg
+File: epstopdf-sys.cfg 2010/07/13 v1.3 Configuration of (r)epstopdf for TeX Live
+))
+Package hyperref Info: Link coloring ON on input line 62.
+ (./example_paper.out) (./example_paper.out)
+\@outlinefile=\write4
+\openout4 = `example_paper.out'.
+\c@@affil@anon=\count339
+ (d:/software/texlive/2023/texmf-dist/tex/latex/microtype/mt-cmr.cfg
+File: mt-cmr.cfg 2013/05/19 v2.2 microtype config. file: Computer Modern Roman (RS)
+)
+LaTeX Font Info:    Trying to load font information for U+msa on input line 110.
+ (d:/software/texlive/2023/texmf-dist/tex/latex/amsfonts/umsa.fd
+File: umsa.fd 2013/01/14 v3.01 AMS symbols A
+) (d:/software/texlive/2023/texmf-dist/tex/latex/microtype/mt-msa.cfg
+File: mt-msa.cfg 2006/02/04 v1.1 microtype config. file: AMS symbols (a) (RS)
+)
+LaTeX Font Info:    Trying to load font information for U+msb on input line 110.
+ (d:/software/texlive/2023/texmf-dist/tex/latex/amsfonts/umsb.fd
+File: umsb.fd 2013/01/14 v3.01 AMS symbols B
+) (d:/software/texlive/2023/texmf-dist/tex/latex/microtype/mt-msb.cfg
+File: mt-msb.cfg 2005/06/01 v1.0 microtype config. file: AMS symbols (b) (RS)
+)
+Package hyperref Warning: Ignoring empty anchor on input line 121.
+LaTeX Font Info:    Trying to load font information for OML+ptm on input line 121.
+(d:/software/texlive/2023/texmf-dist/tex/latex/psnfss/omlptm.fd
+File: omlptm.fd 
+)
+LaTeX Font Info:    Font shape `OML/ptm/m/n' in size <9> not available
+(Font)              Font shape `OML/cmm/m/it' tried instead on input line 121.
+Underfull \vbox (badness 10000) has occurred while \output is active []
+ [1{d:/software/texlive/2023/texmf-var/fonts/map/pdftex/updmap/pdftex.map}{d:/software/texlive/2023/texmf-dist/fonts/enc/dvips/base/8r.enc}
+]
+Underfull \vbox (badness 10000) has occurred while \output is active []
+ [2]
+Overfull \hbox (10.76138pt too wide) detected at line 394
+[][][][]
+ []
+Overfull \hbox (9.68254pt too wide) detected at line 494
+[][][][]
+ []
+[3
+pdfTeX warning (ext4): destination with the same identifier (name{table.1}) has been already used, duplicate ignored
+<argument> ...shipout:D \box_use:N \l_shipout_box 
+                                                  \__shipout_drop_firstpage_...
+l.516 \end{equation}
+                    ] [4] [5] (./pic/randomwalk.tex) (./pic/BairdExample.tex)
+<pic/maze_13_13.pdf, id=252, 493.1646pt x 387.62602pt>
+File: pic/maze_13_13.pdf Graphic file (type pdf)
+<use pic/maze_13_13.pdf>
+Package pdftex.def Info: pic/maze_13_13.pdf  used on input line 869.
+(pdftex.def)             Requested size: 98.63116pt x 77.52382pt.
+Underfull \hbox (badness 1902) in paragraph at lines 871--880
+\OT1/ptm/m/n/10 (+20) four al-ter-na-tive ac-tions:
+ []
+Underfull \hbox (badness 5548) in paragraph at lines 871--880
+\OML/cmm/m/it/10 up$\OT1/ptm/m/n/10 (+20) , $\OML/cmm/m/it/10 down$\OT1/ptm/m/n/10 (+20) , $\OML/cmm/m/it/10 left$\OT1/ptm/m/n/10 (+20) , and
+ []
+Underfull \hbox (badness 1472) in paragraph at lines 871--880
+\OML/cmm/m/it/10 right$\OT1/ptm/m/n/10 (+20) , which takes the
+ []
+<pic/dependent_new.pdf, id=254, 557.01889pt x 394.59978pt>
+File: pic/dependent_new.pdf Graphic file (type pdf)
+<use pic/dependent_new.pdf>
+Package pdftex.def Info: pic/dependent_new.pdf  used on input line 897.
+(pdftex.def)             Requested size: 108.04453pt x 108.04262pt.
+<pic/tabular_new.pdf, id=255, 566.51224pt x 401.1703pt>
+File: pic/tabular_new.pdf Graphic file (type pdf)
+<use pic/tabular_new.pdf>
+Package pdftex.def Info: pic/tabular_new.pdf  used on input line 901.
+(pdftex.def)             Requested size: 108.04472pt x 108.04196pt.
+<pic/inverted_new.pdf, id=256, 565.61766pt x 402.45422pt>
+File: pic/inverted_new.pdf Graphic file (type pdf)
+<use pic/inverted_new.pdf>
+Package pdftex.def Info: pic/inverted_new.pdf  used on input line 906.
+(pdftex.def)             Requested size: 108.03809pt x 108.04385pt.
+<pic/counterexample_quanju_new.pdf, id=257, 471.30164pt x 401.08943pt>
+File: pic/counterexample_quanju_new.pdf Graphic file (type pdf)
+<use pic/counterexample_quanju_new.pdf>
+Package pdftex.def Info: pic/counterexample_quanju_new.pdf  used on input line 910.
+(pdftex.def)             Requested size: 108.04471pt x 108.04466pt.
+<pic/maze_complete.pdf, id=258, 595.42892pt x 465.38112pt>
+File: pic/maze_complete.pdf Graphic file (type pdf)
+<use pic/maze_complete.pdf>
+Package pdftex.def Info: pic/maze_complete.pdf  used on input line 922.
+(pdftex.def)             Requested size: 211.38329pt x 150.31682pt.
+<pic/cw_complete.pdf, id=259, 570.46333pt x 465.10928pt>
+File: pic/cw_complete.pdf Graphic file (type pdf)
+<use pic/cw_complete.pdf>
+Package pdftex.def Info: pic/cw_complete.pdf  used on input line 926.
+(pdftex.def)             Requested size: 211.39018pt x 150.32127pt.
+<pic/mt_complete.pdf, id=260, 569.92673pt x 468.75475pt>
+File: pic/mt_complete.pdf Graphic file (type pdf)
+<use pic/mt_complete.pdf>
+Package pdftex.def Info: pic/mt_complete.pdf  used on input line 931.
+(pdftex.def)             Requested size: 211.38266pt x 150.31929pt.
+<pic/Acrobot_complete.pdf, id=261, 564.99583pt x 478.09494pt>
+File: pic/Acrobot_complete.pdf Graphic file (type pdf)
+<use pic/Acrobot_complete.pdf>
+Package pdftex.def Info: pic/Acrobot_complete.pdf  used on input line 935.
+(pdftex.def)             Requested size: 211.39014pt x 150.3162pt.
+Underfull \vbox (badness 10000) has occurred while \output is active []
+ [6
+pdfTeX warning (ext4): destination with the same identifier (name{figure.1}) has been already used, duplicate ignored
+<argument> ...shipout:D \box_use:N \l_shipout_box 
+                                                  \__shipout_drop_firstpage_...
+l.980 
+pdfTeX warning (ext4): destination with the same identifier (name{figure.2}) has been already used, duplicate ignored
+<argument> ...shipout:D \box_use:N \l_shipout_box 
+                                                  \__shipout_drop_firstpage_...
+l.980 
+       <./pic/maze_13_13.pdf>]
+Underfull \vbox (badness 10000) has occurred while \output is active []
+ [7
+pdfTeX warning (ext4): destination with the same identifier (name{table.2}) has been already used, duplicate ignored
+<argument> ...shipout:D \box_use:N \l_shipout_box 
+                                                  \__shipout_drop_firstpage_...
+l.1039 
+pdfTeX warning (ext4): destination with the same identifier (name{figure.3}) has been already used, duplicate ignored
+<argument> ...shipout:D \box_use:N \l_shipout_box 
+                                                  \__shipout_drop_firstpage_...
+l.1039 
+        <./pic/dependent_new.pdf> <./pic/tabular_new.pdf
+pdfTeX warning: pdflatex.exe (file ./pic/tabular_new.pdf): PDF inclusion: multiple pdfs with page group included in a single page
+> <./pic/inverted_new.pdf
+pdfTeX warning: pdflatex.exe (file ./pic/inverted_new.pdf): PDF inclusion: multiple pdfs with page group included in a single page
+> <./pic/counterexample_quanju_new.pdf
+pdfTeX warning: pdflatex.exe (file ./pic/counterexample_quanju_new.pdf): PDF inclusion: multiple pdfs with page group included in a single page
+>]
+Underfull \vbox (badness 10000) has occurred while \output is active []
+ (./example_paper.bbl
+Underfull \vbox (badness 10000) has occurred while \output is active []
+ [8
+pdfTeX warning (ext4): destination with the same identifier (name{figure.4}) has been already used, duplicate ignored
+<argument> ...shipout:D \box_use:N \l_shipout_box 
+                                                  \__shipout_drop_firstpage_...
+l.12 
+      <./pic/maze_complete.pdf> <./pic/cw_complete.pdf
+pdfTeX warning: pdflatex.exe (file ./pic/cw_complete.pdf): PDF inclusion: multiple pdfs with page group included in a single page
+> <./pic/mt_complete.pdf
+pdfTeX warning: pdflatex.exe (file ./pic/mt_complete.pdf): PDF inclusion: multiple pdfs with page group included in a single page
+> <./pic/Acrobot_complete.pdf
+pdfTeX warning: pdflatex.exe (file ./pic/Acrobot_complete.pdf): PDF inclusion: multiple pdfs with page group included in a single page
+>] [9]) [10
+]
+LaTeX Warning: Command \textemdash invalid in math mode on input line 1212.
+LaTeX Warning: Command \textemdash invalid in math mode on input line 1212.
+[11
+] [12] [13] [14]
+Underfull \hbox (badness 5490) in paragraph at lines 1626--1643
+[]\OT1/ptm/m/n/10 (+20) 7-state ver-sion of Baird's off-policy coun-terex-am-ple: for TD al-go-rithm, $\OML/cmm/m/it/10 $ \OT1/ptm/m/n/10 (+20) is set to 0.1. For the
+ []
+Underfull \hbox (badness 10000) in paragraph at lines 1626--1643
+\OT1/ptm/m/n/10 (+20) TDC al-go-rithm, the range of $\OML/cmm/m/it/10 $ \OT1/ptm/m/n/10 (+20) is $\OMS/cmsy/m/n/10 f\OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 05\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 1\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 2\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 3\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 4\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 5\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 6\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 7\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 8\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 9\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 0\OMS/cmsy/m/n/10 g$\OT1/ptm/m/n/10 (+20) , and the range
+ []
+Underfull \hbox (badness 10000) in paragraph at lines 1626--1643
+\OT1/ptm/m/n/10 (+20) of $\OML/cmm/m/it/10 ^^P$ \OT1/ptm/m/n/10 (+20) is $\OMS/cmsy/m/n/10 f\OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 05\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 1\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 2\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 3\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 4\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 5\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 6\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 7\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 8\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 9\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 0\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 1\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 2\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 3\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 4\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 5\OMS/cmsy/m/n/10 g$\OT1/ptm/m/n/10 (+20) . For the VMTD al-go-
+ []
+Underfull \hbox (badness 10000) in paragraph at lines 1626--1643
+\OT1/ptm/m/n/10 (+20) rithm, the range of $\OML/cmm/m/it/10 $ \OT1/ptm/m/n/10 (+20) is $\OMS/cmsy/m/n/10 f\OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 05\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 1\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 2\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 3\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 4\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 5\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 6\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 7\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 8\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 9\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 0\OMS/cmsy/m/n/10 g$\OT1/ptm/m/n/10 (+20) , and the range of $\OML/cmm/m/it/10 ^^L$ \OT1/ptm/m/n/10 (+20) is
+ []
+Underfull \hbox (badness 2384) in paragraph at lines 1626--1643
+\OMS/cmsy/m/n/10 f\OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 05\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 1\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 2\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 3\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 4\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 5\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 6\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 7\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 8\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 9\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 0\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 1\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 2\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 3\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 4\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 5\OMS/cmsy/m/n/10 g$\OT1/ptm/m/n/10 (+20) . Through ex-per-i-ments, it was found that
+ []
+[15
+pdfTeX warning (ext4): destination with the same identifier (name{table.3}) has been already used, duplicate ignored
+<argument> ...shipout:D \box_use:N \l_shipout_box 
+                                                  \__shipout_drop_firstpage_...
+l.1672 \end{document}
+                     ] (./example_paper.aux)
+Package rerunfilecheck Info: File `example_paper.out' has not changed.
+(rerunfilecheck)             Checksum: D41D8CD98F00B204E9800998ECF8427E;0.
+ ) 
+Here is how much of TeX's memory you used:
+ 27893 strings out of 476025
+ 518891 string characters out of 5789524
+ 1889382 words of memory out of 5000000
+ 47429 multiletter control sequences out of 15000+600000
+ 550322 words of font info for 260 fonts, out of 8000000 for 9000
+ 1141 hyphenation exceptions out of 8191
+ 99i,16n,94p,1006b,1054s stack positions out of 10000i,1000n,20000p,200000b,200000s
+<d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmex10.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi10.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi5.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi6.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi7.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi9.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr10.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr5.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr6.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr7.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr9.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy10.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy5.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy6.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy7.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/symbols/msbm10.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/urw/times/utmb8a.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/urw/times/utmr8a.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/urw/times/utmr8a.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/urw/times/utmri8a.pfb>
+Output written on example_paper.pdf (15 pages, 2276876 bytes).
+PDF statistics:
+ 858 PDF objects out of 1000 (max. 8388607)
+ 693 compressed objects within 7 object streams
+ 137 named destinations out of 1000 (max. 500000)
+ 55866 words of extra memory for PDF output out of 61914 (max. 10000000)
--- a/example_paper.out
+++ b/example_paper.out
--- a/example_paper.pdf
+++ b/example_paper.pdf
--- a/example_paper.synctex.gz
+++ b/example_paper.synctex.gz
--- a/example_paper.tex
+++ b/example_paper.tex
+%%%%%%%% ICML 2024 EXAMPLE LATEX SUBMISSION FILE %%%%%%%%%%%%%%%%%
+\documentclass{article}
+% Recommended, but optional, packages for figures and better typesetting:
+\usepackage{microtype}
+\usepackage{graphicx}
+\usepackage{subfigure}
+\usepackage{diagbox}
+\usepackage{wrapfig}
+\usepackage{booktabs} % for professional tables
+% hyperref makes hyperlinks in the resulting PDF.
+% If your build breaks (sometimes temporarily if a hyperlink spans a page)
+% please comment out the following usepackage line and replace
+% \usepackage{icml2024} with \usepackage[nohyperref]{icml2024} above.
+\usepackage{hyperref}
+% Attempt to make hyperref and algorithmic work together better:
+\newcommand{\theHalgorithm}{\arabic{algorithm}}
+% Use the following line for the initial blind version submitted for review:
+\usepackage{icml2024}
+% If accepted, instead use the following line for the camera-ready submission:
+% \usepackage[accepted]{icml2024}
+% For theorems and such
+\usepackage{amsmath}
+\usepackage{amssymb}
+\usepackage{mathtools}
+\usepackage{amsthm}
+% if you use cleveref..
+\usepackage[capitalize,noabbrev]{cleveref}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% THEOREMS
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\theoremstyle{plain}
+\newtheorem{theorem}{Theorem}[section]
+\newtheorem{proposition}[theorem]{Proposition}
+\newtheorem{lemma}[theorem]{Lemma}
+\newtheorem{corollary}[theorem]{Corollary}
+\theoremstyle{definition}
+\newtheorem{definition}[theorem]{Definition}
+\newtheorem{assumption}[theorem]{Assumption}
+\theoremstyle{remark}
+\newtheorem{remark}[theorem]{Remark}
+% Todonotes is useful during development; simply uncomment the next line
+%    and comment out the line below the next line to turn off comments
+%\usepackage[disable,textsize=tiny]{todonotes}
+\usepackage[textsize=tiny]{todonotes}
+% The \icmltitle you define below is probably too long as a header.
+% Therefore, a short form for the running title is supplied here:
+\icmltitlerunning{Submission and Formatting Instructions for ICML 2024}
+\begin{document}
+\twocolumn[
+\icmltitle{Is Minimizing Errors the Only Option for Value-based Reinforcement Learning?}
+% It is OKAY to include author information, even for blind
+% submissions: the style file will automatically remove it for you
+% unless you've provided the [accepted] option to the icml2024
+% package.
+% List of affiliations: The first argument should be a (short)
+% identifier you will use later to specify author affiliations
+% Academic affiliations should list Department, University, City, Region, Country
+% Industry affiliations should list Company, City, Region, Country
+% You can specify symbols, otherwise they are numbered in order.
+% Ideally, you should not use this facility. Affiliations will be numbered
+% in order of appearance and this is the preferred way.
+\icmlsetsymbol{equal}{*}
+\begin{icmlauthorlist}
+\icmlauthor{Firstname1 Lastname1}{equal,yyy}
+\icmlauthor{Firstname2 Lastname2}{equal,yyy,comp}
+\icmlauthor{Firstname3 Lastname3}{comp}
+\icmlauthor{Firstname4 Lastname4}{sch}
+\icmlauthor{Firstname5 Lastname5}{yyy}
+\icmlauthor{Firstname6 Lastname6}{sch,yyy,comp}
+\icmlauthor{Firstname7 Lastname7}{comp}
+%\icmlauthor{}{sch}
+\icmlauthor{Firstname8 Lastname8}{sch}
+\icmlauthor{Firstname8 Lastname8}{yyy,comp}
+%\icmlauthor{}{sch}
+%\icmlauthor{}{sch}
+\end{icmlauthorlist}
+\icmlaffiliation{yyy}{Department of XXX, University of YYY, Location, Country}
+\icmlaffiliation{comp}{Company Name, Location, Country}
+\icmlaffiliation{sch}{School of ZZZ, Institute of WWW, Location, Country}
+\icmlcorrespondingauthor{Firstname1 Lastname1}{first1.last1@xxx.edu}
+\icmlcorrespondingauthor{Firstname2 Lastname2}{first2.last2@www.uk}
+% You may provide any keywords that you
+% find helpful for describing your paper; these are used to populate
+% the "keywords" metadata in the PDF but will not be shown in the document
+\icmlkeywords{Machine Learning, ICML}
+\vskip 0.3in
+]
+% this must go after the closing bracket ] following \twocolumn[ ...
+% This command actually creates the footnote in the first column
+% listing the affiliations and the copyright notice.
+% The command takes one argument, which is text to display at the start of the footnote.
+% The \icmlEqualContribution command is standard text for equal contribution.
+% Remove it (just {}) if you do not need this facility.
+%\printAffiliationsAndNotice{}  % leave blank if no need to mention equal contribution
+\printAffiliationsAndNotice{\icmlEqualContribution} % otherwise use the standard text.
+\begin{abstract}
+    In the regression task of supervised learning, 
+    we need to minimize the error and trade off the variance. 
+    Drawing on this idea, the existing research on 
+    value-based reinforcement learning also minimizes the error. 
+    However, is error minimization really the only option
+     for value-based reinforcement learning? 
+     We can easily observe that the policy on action 
+     choosing probabilities is often related to the relative values,
+      and has nothing to do with their absolute values. 
+      Based on this observation, we propose the objective
+      of variance minimization instead of error minimization, 
+      derive on-policy and off-policy algorithms respectively, 
+      and conduct an analysis of the convergence rate and experiments. 
+      The experimental results show that our proposed variance minimization algorithms
+       converge much faster.
+\end{abstract}
+\section{Introduction}
+\label{introduction}
+Reinforcement learning can be mainly divided into two
+categories: value-based reinforcement learning
+and policy gradient-based reinforcement learning. This
+paper focuses on temporal difference learning based on
+linear approximated valued functions. Its research is
+usually divided into two steps: the first step is to establish the convergence of the algorithm, and the second
+step is to accelerate the algorithm.
+In terms of stability, \citet{sutton1988learning} established the
+ convergence of on-policy TD(0), and \citet{tsitsiklis1997analysis}
+ established the convergence of on-policy TD($\lambda$).
+ However, ``The deadly triad'' consisting of off-policy learning, 
+ bootstrapping, and function approximation makes 
+ the stability  a difficult problem \citep{Sutton2018book}.
+ To solve this problem, convergent off-policy temporal difference
+  learning algorithms are proposed, e.g., BR \cite{baird1995residual},
+ GTD \cite{sutton2008convergent},  GTD2 and TDC \cite{sutton2009fast},
+  ETD \cite{sutton2016emphatic}, and MRetrace \cite{chen2023modified}.
+In terms of acceleration, \citet{hackman2012faster} 
+proposed Hybrid TD algorithm with on-policy matrix.
+\citet{liu2015finite,liu2016proximal,liu2018proximal} proposed
+true stochastic algorithms, i.e., GTD-MP and GTD2-MP, from 
+a convex-concave saddle-point formulation.
+Second-order  methods are used to accelerate TD learning,
+e.g.,  Quasi Newton TD \cite{givchi2015quasi} and 
+accelerated TD (ATD)  \citep{pan2017accelerated}.
+\citet{hallak2016generalized} introduced an new parameter 
+to reduce variance for ETD.
+\citet{zhang2022truncated} proposed truncated ETD with a lower variance.
+Variance Reduced TD with direct variance reduction technique \citep{johnson2013accelerating} is proposed by \cite{korda2015td} 
+and analysed by  \cite{xu2019reanalysis}.
+How to further improve the convergence rates of reinforcement learning 
+algorithms is currently still an open problem.
+Algorithm stability is prominently reflected in the changes 
+to the objective function, transitioning from mean squared 
+errors (MSE) \citep{Sutton2018book} to mean squared bellman errors (MSBE) \cite{baird1995residual}, then to 
+norm of the expected TD update \cite{sutton2009fast}, and further to 
+mean squared projected Bellman errors (MSPBE) \cite{sutton2009fast}. On the other hand, algorithm 
+acceleration is more centered around optimizing the iterative 
+update formula of the algorithm itself without altering the 
+objective function, thereby speeding up the convergence rate 
+of the algorithm. The emergence of new optimization objective 
+functions often leads to the development of novel algorithms. 
+The introduction of new algorithms, in turn, tends to inspire 
+researchers to explore methods for accelerating algorithms, 
+leading to the iterative creation of increasingly superior algorithms.
+The kernel loss function can be optimized using standard 
+gradient-based methods, addressing the issue of double 
+sampling in residual gradient algorithm \cite{feng2019kernel}. It ensures convergence 
+in both on-policy and off-policy scenarios. The logistic bellman 
+error is convex and smooth in the action-value function parameters, 
+with bounded gradients \cite{basserrano2021logistic}. In contrast, the squared Bellman error is 
+not convex in the action-value function parameters, and RL algorithms 
+based on recursive optimization using it are known to be unstable.
+% The value-based algorithms mentioned above aim to
+% minimize some errors, e.g., mean squared errors \citep{Sutton2018book},
+% mean squared Bellman errors \cite{baird1995residual}, norm
+% of the expected TD update \cite{sutton2009fast}, 
+% mean squared projected Bellman errors (MSPBE) \cite{sutton2009fast}, etc.
+It is necessary to propose a new objective function, but the mentioned objective functions above are all some form of error.
+Is minimizing error the only option for value-based reinforcement learning?
+Error can be decomposed into bias, variance and unavoidable noise.
+Among them, bias measures the difference between the predicted
+ values of the model and the true values, reflecting the 
+ model's fitting ability. Variance, on the other hand, 
+ quantifies the model's sensitivity to different training data,
+  indicating its stability and generalization ability. 
+  Balancing bias and variance is important, as they represent trade-offs
+  \cite{zhou2021machine}.
+  In the context of this paper, where only a linear model is
+   considered and the model complexity is not adjusted, 
+   it is difficult to improve the bias. High bias indicates that the model poorly 
+    fits the training data, resulting in underfitting. 
+   In supervised learning, high bias is generally
+    considered unacceptable. 
+However, in reinforcement learning, high bias may
+be acceptable in certain cases. This is due to the 
+observation that policies based on value functions, such
+as greedy, $\epsilon$-greedy, and softmax policies, often rely on
+the relative values of action values rather than their
+absolute values when selecting different actions.
+Based on this observation, we propose  alternate objective functions 
+instead of minimizing errors. We minimize Variance of Bellman Error (VBE) and
+Variance of Projected Bellman Error (VPBE),  
+and derive Variance Minimization (VM) algorithms.
+These algorithms preserve the invariance of the optimal policy,
+but significantly reduce the variance of gradient estimation,
+and thus hastening convergence.
+The contributions of this paper are as follows:
+(1) Introduction of  novel objective functions based on
+the invariance of the optimal policy.
+(2) Derived two algorithms, one on-policy and one off-policy.
+(3) Proof of their convergence.
+(4) Analysis of the convergence rate of on-policy algorithm.
+(5) Experiments demonstrating the faster convergence speed of the proposed algorithms.
+\section{Preliminaries}
+\label{preliminaries}
+Reinforcement learning agent interacts with environment, observes state,
+ takes sequential decision makings to influence environment, and obtains
+ rewards.
+ Consider an infinite-horizon discounted 
+ Markov Decision Process (MDP), defined by a tuple $\langle S,A,R,P,\gamma
+ \rangle$, where $S=\{1,2,\ldots,N\}$ is a finite set of states of the environment;  $A$
+ is a finite set of actions of the agent; 
+ $R:S\times A \times S \rightarrow \mathbb{R}$ is a bounded deterministic reward
+ function; $P:S\times A\times S \rightarrow [0,1]$ is the transition
+ probability distribution;  and $\gamma\in (0,1)$
+  is the discount factor \cite{Sutton2018book}.
+  Due to the requirements of  online learning, value iteration based on sampling
+  is considered in this paper. 
+  In each sampling, an experience (or transition) $\langle s, a, s', r\rangle$ is
+  obtained.
+  A policy is a mapping $\pi:S\times A \rightarrow [0,1]$. The goal of the
+  agent is to find an optimal policy $\pi^*$ to maximize the expectation of a
+  discounted cumulative rewards in a long period.
+  State value function $V^{\pi}(s)$  for a stationary policy $\pi$ is 
+  defined as:
+  \begin{equation*}
+  V^{\pi}(s)=\mathbb{E}_{\pi}[\sum_{k=0}^{\infty} \gamma^k R_{k}|s_0=s].
+  \label{valuefunction}
+  \end{equation*}
+  Linear value function for state $s\in S$ is defined as:
+   \begin{equation}
+   V_{{\theta}}(s):= {\theta}^{\top}{\phi}(s) = \sum_{i=1}^{m}
+   \theta_i \phi_i(s),
+   \label{linearvaluefunction}
+   \end{equation}
+  where ${\theta}:=(\theta_1,\theta_2,\ldots,\theta_m)^{\top}\in
+  \mathbb{R}^m$ is a parameter vector, 
+  ${\phi}:=(\phi_1,\phi_2,\ldots,\phi_m)^{\top}\in \mathbb{R}^m$ is a feature
+  function  defined on state space $S$, and $m$ is the feature size. 
+  Tabular temporal difference (TD) learning \cite{Sutton2018book} has been successfully applied to small-scale problems.
+  To deal with the well-known curse of dimensionality of large scale MDPs, value
+  function is usually approximated by a linear model, kernel methods, decision
+   trees, or neural networks, etc. This paper focuses on the linear model, where
+   features are usually hand coded by domain experts.
+TD learning can also be used to find optimal strategies. The problem of finding an optimal policy is 
+often called the control problem. Two popular TD methods are Sarsa and Q-leaning. The former is an on-policy 
+TD control, while the latter is an off-policy control. 
+It is well known that TDC algorithm \cite{sutton2009fast} guarantees 
+convergence under off-policy conditions while the off-policy TD algorithm may diverge. The 
+objective function of TDC is MSPBE. 
+TDC is essentially an adjustment or correction of the TD update so that it
+follows the gradient of the MSPBE objective function. In the context of the TDC algorithm, the control algorithm 
+is known as Greedy-GQ($\lambda$) \cite{sutton2009fast}. When $\lambda$ is set to 0, it is denoted 
+as GQ(0).
+\section{Variance Minimization Algorithms}
+\subsection{Motivation}
+In reinforcement learning, bias is acceptable, 
+while in supervised learning it is not. As shown 
+in Table \ref{example_bias}, although there is a bias between the 
+true value and the predicted value, action $a_3$ is 
+still chosen under the greedy-policy.
+On the contrary, supervised learning is usually used to predict temperature, humidity, morbidity, etc. If the bias is too large, the consequences could be serious. 
+\begin{table}[t]
+    \caption{Classification accuracies for naive Bayes and flexible
+    Bayes on various data sets.}
+    \label{example_bias}
+    \vskip 0.15in
+    \begin{center}
+    \begin{small}
+    \begin{sc}
+    \begin{tabular}{lcccr}
+    \toprule
+    action & $Q$ value & $Q$ value with bias \\
+    \midrule
+    $Q(s, a_0)$ & 1& 5 \\
+    $Q(s, a_1)$ & 2& 6 \\
+    $Q(s, a_2)$ & 3& 7 \\
+    $Q(s, a_3)$ & 4& 8 \\
+    $\arg \min_{a}Q(s,a)$ & $a_3$& $a_3$\\
+    \bottomrule
+    \end{tabular}
+    \end{sc}
+    \end{small}
+    \end{center}
+    \vskip -0.1in
+\end{table}
+In addition, reward shaping can significantly speed up the learning  by adding a shaping
+reward $F(s,s')$ to the original reward  $r$, 
+where $F(s,s')$ is the general form of any state-based shaping reward.
+Static potential-based reward shaping (Static PBRS) maintains the policy invariance if the
+shaping reward follows from $F(s,s')=\gamma
+f(s')-f(s)$ \cite{ng1999policy}.
+This means that we can make changes to the TD error $\delta = r+\gamma \theta^{\top}\phi'-\theta^{\top}\phi $ while still ensuring the invariance of the optimal policy,
+\begin{equation*}
+    \delta - \omega= r+\gamma \theta^{\top}\phi'-\theta^{\top}\phi - \omega,
+\end{equation*}
+where $\omega$ is a constant, acting as a static PBRS. 
+This also means that algorithms with the optimization goal 
+of minimizing errors, after introducing reward shaping, 
+may result in larger or smaller bias. Fortunately, 
+as discussed above, bias is acceptable in reinforcement 
+learning. 
+However, the problem is that selecting an appropriate 
+$\omega$ requires expert knowledge. This forces us to learn 
+$\omega$ dynamically, i.e., $\omega=\omega_t $ and dynamic PBRS can also maintain the policy 
+invariance if the shaping reward is $F(s,t,s',t')=\gamma f(s',t')-f(s,t)$,
+where $t$ is the time-step the agent reaches in  state $s$
+\cite{devlin2012dynamic}.
+However, this result requires the convergence guarantee of the dynamic potential
+function $f(s,t)$. If $f(s,t)$ does not converge as the time-step
+$t\rightarrow\infty$, the Q-values of  dynamic PBRS are not 
+guaranteed to converge.
+Let  $f_{\omega_t}(s)=\frac{\omega_t}{\gamma-1}$.
+Thus, $F_{\omega_t}(s,s')=\gamma f_{\omega_t}(s')-f_{\omega_t}(s)= \omega_t$
+is a dynamic PBRS. And if $\omega$ converges finally, the dynamic potential
+function $f(s,t)$ will converge.
+Bias is the expected difference between the predicted value 
+and the true value. Therefore, under the premise of bootstrapping, we first think of 
+letting $\omega \doteq \mathbb{E}[\mathbb{E}[\delta|s]]=\mathbb{E}[\delta]$. 
+As we all know, the optimization process of linear TD(0) (semi-gradient) and linear TDC are as follows, respectively:
+\begin{equation*}
+    \theta^{*}= \arg \min_{\theta} \mathbb{E}[(\mathbb{E}[\delta |s])^2],
+\end{equation*}
+and
+\begin{equation*}
+    \theta^{*}=\arg \min_{\theta} \mathbb{E}[\delta \phi]^{\top} \mathbb{E}[\phi \phi^{\top}]^{-1} \mathbb{E}[\delta\phi].
+\end{equation*}
+As a result, two novel objective functions and their corresponding algorithms are proposed, 
+where $\omega$ is subsequently proven to converge, meaning that these two algorithms can maintain the invariance of the optimal strategy.
+\subsection{Variance Minimization TD Learning: VMTD}
+For on-policy learning,
+a novel objective function, Variance of Bellman Error (VBE), is proposed as follows:
+\begin{equation}
+    \begin{array}{ccl}
+        \arg \min_{\theta}\text{VBE}(\theta)&=&\arg \min_{\theta}\mathbb{E}[(\mathbb{E}[\delta|s]-\mathbb{E}[\mathbb{E}[\delta|s]])^2]\\
+        &=&\arg \min_{\theta,\omega} \mathbb{E}[(\mathbb{E}[\delta|s]-\omega)^2].
+    \end{array}
+\end{equation}
+Clearly, it is no longer to minimize Bellman errors. 
+First, the parameter  $\omega$ is derived directly based on
+stochastic gradient descent:
+\begin{equation}
+\omega_{k+1}\leftarrow \omega_{k}+\beta_k(\delta_k-\omega_k),
+\label{omega}
+\end{equation}
+where $\delta_k$ is the TD error as follows:
+\begin{equation}
+\delta_k = r+\gamma
+\theta_k^{\top}\phi_{k}'-\theta_k^{\top}\phi_k.
+\label{delta}
+\end{equation}
+Then, based on stochastic semi-gradient descent, the update of 
+the parameter $\theta$ is as follows:
+\begin{equation}
+\theta_{k+1}\leftarrow
+\theta_{k}+\alpha_k(\delta_k-\omega_k)\phi_k.
+\label{theta}
+\end{equation}
+The pseudocode of the VMTD algorithm is shown in Algorithm \ref{alg:algorithm 1}.
+For control tasks,  two extensions of VMTD are named VMSarsa and VMQ respectively, 
+and the update formulas are shown below: 
+\begin{equation}
+    \theta_{k+1}\leftarrow
+    \theta_{k}+\alpha_k(\delta_k-\omega_k)\phi(s_k,a_k).
+\end{equation}
+and
+\begin{equation}
+    \omega_{k+1}\leftarrow \omega_{k}+\beta_k(\delta_k-\omega_k),
+\end{equation}
+where $\delta_k$ delta in VMSarsa is:
+\begin{equation}
+    \delta_{k}=r_{k+1}+\gamma \theta_{k}^{\top}\phi(s_{k+1},a_{k+1}) - \theta_{k}^{\top}\phi(s_{k},a_{k}),
+    \label{deltaSarsa}
+\end{equation}
+and $\delta_k$ delta in VMQ is:
+\begin{equation}
+    \delta_{k}=r_{k+1}+\gamma \max_{a\in A}\theta_{k}^{\top}\phi(s_{k+1},a) - \theta_{k}^{\top}\phi(s_{k},a_{k}).
+    \label{deltaQ}
+\end{equation}
+\begin{algorithm}[t]
+    \caption{VMTD algorithm with linear function approximation in the on-policy setting}
+    \label{alg:algorithm 1}
+\begin{algorithmic}
+    \STATE {\bfseries Input:} $\theta_{0}$, $\omega_{0}$, $\gamma
+    $, learning rate $\alpha_t$ and $\beta_t$
+    \REPEAT
+    \STATE For any episode, initialize $\theta_{0}$ arbitrarily,  $\omega_{0}$ to $0$, $\gamma \in (0,1]$, and $\alpha_t$ and $\beta_t$ are constant.\\
+    \FOR{$t=0$ {\bfseries to} $T-1$}
+    \STATE Take $A_t$ from $S_t$ according to policy $\mu$, and arrive at $S_{t+1}$\\
+    \STATE Observe sample ($S_t$,$R_{t+1}$,$S_{t+1}$) at time step $t$ (with their corresponding state feature vectors)\\
+    \STATE $\delta_t = R_{t+1}+\gamma\theta_t^{\top}\phi_{t}'-\theta_t^{\top}\phi_t$
+    \STATE $\theta_{t+1}\leftarrow \theta_{t}+\alpha_t(\delta_t-\omega_t)\phi_t$
+    \STATE $\omega_{t+1}\leftarrow \omega_{t}+\beta_t(\delta_t-\omega_t)$
+    \STATE $S_t=S_{t+1}$
+    \ENDFOR
+    \UNTIL{terminal episode}
+\end{algorithmic}
+\end{algorithm}
+% \begin{algorithm}[t]
+%     \caption{VMTDC algorithm with linear function approximation in the off-policy setting}
+%     \label{alg:algorithm 2}
+% \begin{algorithmic}
+%     \STATE {\bfseries Input:} $\theta_{0}$, $u_0$, $\omega_{0}$, $\gamma
+%     $, learning rate $\alpha_t$, $\zeta_t$ and $\beta_t$, behavior policy $\mu$ and target policy $\pi$
+%     \REPEAT
+%     \STATE For any episode, initialize $\theta_{0}$ arbitrarily, $u_t$ and $\omega_{0}$ to $0$, $\gamma \in (0,1]$, and $\alpha_t$, $\zeta_t$ and $\beta_t$ are constant.\\
+%     \textbf{Output}: $\theta^*$.\\
+%     \FOR{$t=0$ {\bfseries to} $T-1$}
+%     \STATE Take $A_t$ from $S_t$ according to $\mu$, and arrive at $S_{t+1}$\\
+%     \STATE Observe sample ($S_t$,$R_{t+1}$,$S_{t+1}$) at time step $t$ (with their corresponding state feature vectors)\\
+%     \STATE $\delta_t = R_{t+1}+\gamma\theta_t^{\top}\phi_{t+1}-\theta_t^{\top}\phi_t$
+%     \STATE $\rho_{t} \leftarrow \frac{\pi(A_t | S_t)}{\mu(A_t | S_t)}$
+%     \STATE $\theta_{t+1}\leftarrow \theta_{t}+\alpha_t[\rho_t (\delta_t-\omega_t)\phi_t - \gamma \phi_{t+1}(\phi^{\top}_{t} u_t)]$
+%     \STATE $u_{t+1}\leftarrow u_{t}+\zeta_t[\rho_t(\delta_t-\omega_t) - \phi^{\top}_{t} u_t] \phi_t$
+%     \STATE $\omega_{t+1}\leftarrow \omega_{t}+\beta_t \rho_t(\delta_t-\omega_t)$
+%     \STATE $S_t=S_{t+1}$
+%     \ENDFOR
+%     \UNTIL{terminal episode}
+% \end{algorithmic}
+% \end{algorithm}
+\subsection{Variance Minimization TDC Learning: VMTDC}
+For off-policy learning, we employ a projection operator.
+The objective function is called Variance of Projected Bellman error (VPBE), 
+and the corresponding algorithm is called VMTDC.
+\begin{equation}
+    \begin{array}{ccl}
+    \text{VPBE}(\theta)&=&\mathbb{E}[(\delta-\mathbb{E}[\delta]) \phi]^{\top} \mathbb{E}[\phi \phi^{\top}]^{-1}\mathbb{E}[(\delta-\mathbb{E}[\delta])\phi]\\
+    &=&\mathbb{E}[(\delta-\omega) \phi]^{\top} \mathbb{E}[\phi \phi^{\top}]^{-1}\mathbb{E}[(\delta-\omega)\phi],
+    \end{array}
+\end{equation}
+where $\omega$  is used to estimate $\mathbb{E}[\delta]$, i.e., $\omega \doteq \mathbb{E}[\delta]$.
+The derivation process of the VMTDC algorithm is the same 
+as that of the TDC algorithm, the only difference is that the original $\delta$ is replaced by $\delta-\omega$.
+Therefore, we can easily get the updated formula of VMTDC, as follows:
+% \begin{equation*}
+%     \rho_{k} \leftarrow \frac{\pi(a_k | s_k)}{\mu(a_k | s_k)},
+% \end{equation*}
+\begin{equation}
+    \theta_{k+1}\leftarrow\theta_{k}+\alpha_{k}[(\delta_{k}- \omega_k) \phi(s_k)\\
+     - \gamma\phi(s_{k+1})(\phi^{\top} (s_k) u_k)],
+\label{thetavmtdc}
+\end{equation}
+\begin{equation}
+    u_{k+1}\leftarrow u_{k}+\zeta_{k}[\delta_{k}-\omega_k - \phi^{\top} (s_k) u_k]\phi(s_k),
+\label{uvmtdc}
+\end{equation}
+and
+\begin{equation}
+    \omega_{k+1}\leftarrow \omega_{k}+\beta_k (\delta_k- \omega_k),
+    \label{omegavmtdc}
+\end{equation}
+The pseudocode of the VMTDC algorithm for importance-sampling scenario is shown in Algorithm \ref{alg:algorithm 2} of Appendix \ref{proofth2}.
+Now, we will introduce the improved version of the GQ(0) algorithm, named VMGQ(0):
+\begin{equation}
+    \begin{array}{ccl}
+    \theta_{k+1}\leftarrow\theta_{k}&+&\alpha_{k}[(\delta_{k}- \omega_k) \phi(s_k,a_k)\\
+     &-& \gamma\phi(s_{k+1},A^{*}_{k+1})(\phi^{\top} (s_k,a_k) u_k)],
+    \end{array}
+\end{equation}
+\begin{equation}
+    u_{k+1}\leftarrow u_{k}+\zeta_{k}[(\delta_{k}-u_k) - \phi^{\top} (s_k,a_k) u_k]\phi(s_k,a_k),
+\end{equation}
+and
+\begin{equation}
+    \omega_{k+1}\leftarrow \omega_{k}+\beta_k(\delta_k- \omega_k),
+\end{equation}
+where $\delta_{k}$ is (\ref{deltaQ}) and $A^{*}_{k+1}={\arg \max}_{a}(\theta_{k}^{\top}\phi(s_{k+1},a))$.
+\section{Theoretical Analysis}
+The purpose of this section is to establish the stabilities of the VMTD algorithm
+and the VMTDC algorithm, and also presents a corollary on the convergence rate of VMTD.
+\begin{theorem}
+    \label{theorem1}(Convergence of VMTD).
+    In the case of on-policy learning, consider the iterations (\ref{omega}) and (\ref{theta}) with (\ref{delta})  of VMTD.
+     Let the step-size sequences $\alpha_k$ and $\beta_k$, $k\geq 0$ satisfy in this case $\alpha_k,\beta_k>0$, for all $k$,
+    $
+    \sum_{k=0}^{\infty}\alpha_k=\sum_{k=0}^{\infty}\beta_k=\infty,
+    $
+    $
+    \sum_{k=0}^{\infty}\alpha_k^2<\infty,
+    $
+    $
+    \sum_{k=0}^{\infty}\beta_k^2<\infty,
+    $
+    and  
+    $
+    \alpha_k = o(\beta_k).
+    $
+    Assume that $(\phi_k,r_k,\phi_k')$ is an i.i.d. sequence with
+    uniformly bounded second moments, where $\phi_k$ and $\phi'_{k}$ are sampled from the same Markov chain.
+    Let $A = \mathrm{Cov}(\phi,\phi-\gamma\phi')$,
+    $b=\mathrm{Cov}(r,\phi)$.
+    Assume that matrix $A$ is  non-singular. 
+    Then the parameter vector $\theta_k$ converges with probability one 
+    to $A^{-1}b$.
+\end{theorem}
+\begin{proof}
+\label{th1proof}   
+    The proof is  based on Borkar's Theorem   for
+    general stochastic approximation recursions with two time scales
+    \cite{borkar1997stochastic}. 
+    % The new TD error for the linear setting is 
+    % \begin{equation*}
+    % \delta_{\text{new}}=r+\gamma
+    % \theta^{\top}\phi'-\theta^{\top}\phi-\mathbb{E}[\delta].
+    % \end{equation*}
+    A new one-step
+    linear TD solution is defined
+    as: 
+    \begin{equation*}
+    0=\mathbb{E}[(\delta-\mathbb{E}[\delta]) \phi]=-A\theta+b.
+    \end{equation*}
+    Thus, the  VMTD's solution is
+    $\theta_{\text{VMTD}}=A^{-1}b$.
+    First, note that recursion (\ref{theta}) can be rewritten as
+    \begin{equation*}
+    \theta_{k+1}\leftarrow \theta_k+\beta_k\xi(k),
+    \end{equation*}
+    where
+    \begin{equation*}
+    \xi(k)=\frac{\alpha_k}{\beta_k}(\delta_k-\omega_k)\phi_k
+    \end{equation*}
+    Due to the settings of step-size schedule $\alpha_k = o(\beta_k)$,
+    $\xi(k)\rightarrow 0$ almost surely as $k\rightarrow\infty$. 
+    That is the increments in iteration (\ref{omega}) are uniformly larger than
+    those in (\ref{theta}), thus (\ref{omega}) is the faster recursion.
+    Along the faster time scale, iterations of (\ref{omega}) and (\ref{theta})
+    are associated to ODEs system as follows:
+    \begin{equation}
+    \dot{\theta}(t) = 0,
+    \label{thetaFast}
+    \end{equation}
+    \begin{equation}
+    \dot{\omega}(t)=\mathbb{E}[\delta_t|\theta(t)]-\omega(t).
+    \label{omegaFast}
+    \end{equation}
+    Based on the ODE (\ref{thetaFast}), $\theta(t)\equiv \theta$ when
+    viewed from the faster timescale. 
+    By the Hirsch lemma \cite{hirsch1989convergent}, it follows that
+    $||\theta_k-\theta||\rightarrow 0$ a.s. as $k\rightarrow \infty$ for some
+    $\theta$ that depends on the initial condition $\theta_0$ of recursion
+    (\ref{theta}).
+    Thus, the ODE pair (\ref{thetaFast})-(\ref{omegaFast}) can be written as
+    \begin{equation}
+    \dot{\omega}(t)=\mathbb{E}[\delta_t|\theta]-\omega(t).
+    \label{omegaFastFinal}
+    \end{equation}
+    Consider the function $h(\omega)=\mathbb{E}[\delta|\theta]-\omega$,
+    i.e., the driving vector field of the ODE (\ref{omegaFastFinal}).
+    It is easy to find that the function $h$ is Lipschitz with coefficient
+    $-1$.
+    Let $h_{\infty}(\cdot)$ be the function defined by
+     $h_{\infty}(\omega)=\lim_{x\rightarrow \infty}\frac{h(x\omega)}{x}$.
+     Then  $h_{\infty}(\omega)= -\omega$,  is well-defined. 
+     For (\ref{omegaFastFinal}), $\omega^*=\mathbb{E}[\delta|\theta]$
+    is the unique globally asymptotically stable equilibrium.
+     For the ODE
+      \begin{equation}
+     \dot{\omega}(t) = h_{\infty}(\omega(t))= -\omega(t),
+     \label{omegaInfty}
+     \end{equation}
+     apply $\vec{V}(\omega)=(-\omega)^{\top}(-\omega)/2$ as its
+    associated strict Liapunov function. Then,
+    the origin of (\ref{omegaInfty}) is a globally asymptotically stable
+    equilibrium.
+    Consider now the recursion (\ref{omega}).
+    Let
+    $M_{k+1}=(\delta_k-\omega_k)
+    -\mathbb{E}[(\delta_k-\omega_k)|\mathcal{F}(k)]$,
+    where $\mathcal{F}(k)=\sigma(\omega_l,\theta_l,l\leq k;\phi_s,\phi_s',r_s,s<k)$, 
+    $k\geq 1$ are the sigma fields
+    generated by $\omega_0,\theta_0,\omega_{l+1},\theta_{l+1},\phi_l,\phi_l'$,
+    $0\leq l<k$.
+    It is easy to verify that $M_{k+1},k\geq0$ are integrable random variables that 
+    satisfy $\mathbb{E}[M_{k+1}|\mathcal{F}(k)]=0$, $\forall k\geq0$.
+    Because $\phi_k$, $r_k$, and $\phi_k'$   have
+    uniformly bounded second moments, it can be seen that for some constant
+    $c_1>0$, $\forall k\geq0$,
+    \begin{equation*}
+    \mathbb{E}[||M_{k+1}||^2|\mathcal{F}(k)]\leq
+    c_1(1+||\omega_k||^2+||\theta_k||^2).
+    \end{equation*}
+    Now Assumptions (A1) and (A2) of \cite{borkar2000ode} are verified.
+    Furthermore, Assumptions (TS) of \cite{borkar2000ode} is satisfied by our
+    conditions on the step-size sequences $\alpha_k$, $\beta_k$. Thus,
+    by Theorem 2.2 of \cite{borkar2000ode} we obtain that
+    $||\omega_k-\omega^*||\rightarrow 0$ almost surely as $k\rightarrow \infty$.
+    Consider now the slower time scale recursion (\ref{theta}).
+    Based on the above analysis, (\ref{theta}) can be rewritten as 
+    \begin{equation*}
+    \theta_{k+1}\leftarrow
+    \theta_{k}+\alpha_k(\delta_k-\mathbb{E}[\delta_k|\theta_k])\phi_k.
+    \end{equation*}
+    Let $\mathcal{G}(k)=\sigma(\theta_l,l\leq k;\phi_s,\phi_s',r_s,s<k)$, 
+    $k\geq 1$ be the sigma fields
+    generated by $\theta_0,\theta_{l+1},\phi_l,\phi_l'$,
+    $0\leq l<k$.
+    Let 
+    $
+    Z_{k+1} = Y_{t}-\mathbb{E}[Y_{t}|\mathcal{G}(k)],
+    $
+    where
+    \begin{equation*}
+    Y_{k}=(\delta_k-\mathbb{E}[\delta_k|\theta_k])\phi_k.
+    \end{equation*}
+    Consequently,
+    \begin{equation*}
+    \begin{array}{ccl}
+    \mathbb{E}[Y_t|\mathcal{G}(k)]&=&\mathbb{E}[(\delta_k-\mathbb{E}[\delta_k|\theta_k])\phi_k|\mathcal{G}(k)]\\
+    &=&\mathbb{E}[\delta_k\phi_k|\theta_k]
+    -\mathbb{E}[\mathbb{E}[\delta_k|\theta_k]\phi_k]\\
+    &=&\mathbb{E}[\delta_k\phi_k|\theta_k]
+    -\mathbb{E}[\delta_k|\theta_k]\mathbb{E}[\phi_k]\\
+    &=&\mathrm{Cov}(\delta_k|\theta_k,\phi_k),
+    \end{array}
+    \end{equation*}
+    where $\mathrm{Cov}(\cdot,\cdot)$ is a covariance operator.
+     Thus,
+     \begin{equation*}
+    \begin{array}{ccl}
+    Z_{k+1}&=&(\delta_k-\mathbb{E}[\delta_k|\theta_k])\phi_k-\mathrm{Cov}(\delta_k|\theta_k,\phi_k).
+    \end{array}
+    \end{equation*}
+    It is easy to verify that $Z_{k+1},k\geq0$ are integrable random variables that 
+    satisfy $\mathbb{E}[Z_{k+1}|\mathcal{G}(k)]=0$, $\forall k\geq0$.
+    Also, because $\phi_k$, $r_k$, and $\phi_k'$  have
+    uniformly bounded second moments, it can be seen that for some constant
+    $c_2>0$, $\forall k\geq0$,
+    \begin{equation*}
+    \mathbb{E}[||Z_{k+1}||^2|\mathcal{G}(k)]\leq
+    c_2(1+||\theta_k||^2).
+    \end{equation*}
+    Consider now the following ODE associated with (\ref{theta}):
+    \begin{equation}
+    \begin{array}{ccl}
+    \dot{\theta}(t)&=&\mathrm{Cov}(\delta|\theta(t),\phi)\\
+    &=&\mathrm{Cov}(r+(\gamma\phi'-\phi)^{\top}\theta(t),\phi)\\
+    &=&\mathrm{Cov}(r,\phi)-\mathrm{Cov}(\theta(t)^{\top}(\phi-\gamma\phi'),\phi)\\
+    &=&\mathrm{Cov}(r,\phi)-\theta(t)^{\top}\mathrm{Cov}(\phi-\gamma\phi',\phi)\\
+    &=&\mathrm{Cov}(r,\phi)-\mathrm{Cov}(\phi-\gamma\phi',\phi)^{\top}\theta(t)\\
+    &=&\mathrm{Cov}(r,\phi)-\mathrm{Cov}(\phi,\phi-\gamma\phi')\theta(t)\\
+    &=&-A\theta(t)+b.
+    \end{array}
+    \label{odetheta}
+    \end{equation}
+    Let $\vec{h}(\theta(t))$ be the driving vector field of the ODE
+    (\ref{odetheta}).
+    \begin{equation*}
+    \vec{h}(\theta(t))=-A\theta(t)+b.
+    \end{equation*}
+     Consider the cross-covariance matrix,
+    \begin{equation}
+    \begin{array}{ccl}
+    A &=& \mathrm{Cov}(\phi,\phi-\gamma\phi')\\
+      &=&\frac{\mathrm{Cov}(\phi,\phi)+\mathrm{Cov}(\phi-\gamma\phi',\phi-\gamma\phi')-\mathrm{Cov}(\gamma\phi',\gamma\phi')}{2}\\
+      &=&\frac{\mathrm{Cov}(\phi,\phi)+\mathrm{Cov}(\phi-\gamma\phi',\phi-\gamma\phi')-\gamma^2\mathrm{Cov}(\phi',\phi')}{2}\\
+      &=&\frac{(1-\gamma^2)\mathrm{Cov}(\phi,\phi)+\mathrm{Cov}(\phi-\gamma\phi',\phi-\gamma\phi')}{2},\\
+    \end{array}
+    \label{covariance}
+    \end{equation}
+    where we eventually used $\mathrm{Cov}(\phi',\phi')=\mathrm{Cov}(\phi,\phi)$
+    \footnote{The covariance matrix $\mathrm{Cov}(\phi',\phi')$ is equal to
+    the covariance matrix $\mathrm{Cov}(\phi,\phi)$ if the initial state is re-reachable or
+    initialized randomly in a Markov chain for on-policy update.}.
+    Note that the covariance matrix $\mathrm{Cov}(\phi,\phi)$ and
+    $\mathrm{Cov}(\phi-\gamma\phi',\phi-\gamma\phi')$ are semi-positive
+    definite. Then, the matrix $A$ is semi-positive definite because  $A$ is
+    linearly combined  by  two positive-weighted semi-positive definite matrice
+    (\ref{covariance}).
+    Furthermore, $A$ is nonsingular due to the assumption.
+    Hence, the cross-covariance matrix $A$ is positive definite.
+    Therefore,
+    $\theta^*=A^{-1}b$ can be seen to be the unique globally asymptotically
+    stable equilibrium for ODE (\ref{odetheta}).
+    Let $\vec{h}_{\infty}(\theta)=\lim_{r\rightarrow
+    \infty}\frac{\vec{h}(r\theta)}{r}$. Then
+    $\vec{h}_{\infty}(\theta)=-A\theta$ is well-defined. 
+    Consider now the ODE
+    \begin{equation}
+    \dot{\theta}(t)=-A\theta(t).
+    \label{odethetafinal}
+    \end{equation}
+    The ODE (\ref{odethetafinal}) has the origin as its unique globally asymptotically stable equilibrium.
+    Thus, the assumption (A1) and (A2) are verified.
+\end{proof}
+Theorem 3 in \cite{dalal2020tale} provides a general conclusion on the convergence speed of all linear two-timescale 
+algorithms. VMTD satisfies the assumptions of this theorem, leading 
+to the following corollary.
+\begin{corollary}
+    \label{corollary4_2}
+Consider the Sparsely Projected variant of VMTD. Then, for $\alpha_k = 1/(k+1)^{\alpha}$, $\beta_k = 1/(k+1)^{\beta}$, 
+$0<\beta<\alpha<1$, $p>1$, with probility larger than $1- \tau$, for all $k\geq N_3$, we have
+\begin{equation}
+    ||\theta'_{k} - \theta^{*}|| \le C_{3,\theta} \frac{\sqrt{\ln (4d_{1}^{2}(k+1)^{p}/\tau)} }{(k+1)^{\alpha / 2}}
+\end{equation} 
+\begin{equation}
+    ||\omega'_{n} - \omega^{*}|| \le C_{3,\omega} \frac{\sqrt{\ln (4d_{2}^{2}(k+1)^{p}/\tau)} }{(k+1)^{\omega / 2}},
+\end{equation} 
+\end{corollary}
+where $d_1$ and $d_2$ represent the dimensions of $\theta$ and $\omega$, respectively. For VMTD, $d_2 =1$.
+The meanings of $N_3$,$C_{3,\theta}$ and $C_{3,\omega}$ are explained in \cite{dalal2020tale}.
+The formulas for $\theta'_{k}$ and $\omega'_{n}$ can be found in (\ref{sparseprojectiontheta}) and (\ref{sparseprojectionomega}).
+\begin{theorem}
+    \label{theorem2}(Convergence of VMTDC).
+    In the case of off-policy learning, consider the iterations (\ref{omegavmtdc}), (\ref{uvmtdc}) and (\ref{thetavmtdc})   of VMTDC.
+     Let the step-size sequences $\alpha_k$, $\zeta_k$ and $\beta_k$, $k\geq 0$ satisfy in this case $\alpha_k,\zeta_k,\beta_k>0$, for all $k$,
+    $
+    \sum_{k=0}^{\infty}\alpha_k=\sum_{k=0}^{\infty}\beta_k=\sum_{k=0}^{\infty}\zeta_k=\infty,
+    $
+    $
+    \sum_{k=0}^{\infty}\alpha_k^2<\infty,
+    $
+    $
+    \sum_{k=0}^{\infty}\zeta_k^2<\infty,
+    $
+    $
+    \sum_{k=0}^{\infty}\beta_k^2<\infty,
+    $
+    and  
+    $
+    \alpha_k = o(\zeta_k),
+    $
+    $
+    \zeta_k = o(\beta_k).
+    $
+    Assume that $(\phi_k,r_k,\phi_k')$ is an i.i.d. sequence with
+    uniformly bounded second moments.
+    Let $A = \mathrm{Cov}(\phi,\phi-\gamma\phi')$,
+    $b=\mathrm{Cov}(r,\phi)$, and $C=\mathbb{E}[\phi\phi^{\top}]$.
+    Assume that  $A$ and $C$ are  non-singular matrices. 
+    Then the parameter vector $\theta_k$ converges with probability one 
+    to $A^{-1}b$.
+\end{theorem}
+Please refer to the appendix \ref{proofth2} for detailed proof process.
+\section{Experimental Studies}
+This section assesses algorithm performance through experiments, 
+which are divided into policy evaluation experiments and control experiments.
+\subsection{Testing Tasks}
+\textbf{Random-walk:} as shown in Figure \ref{randomwalk}, all episodes
+start in the center state, $C$, and proceed to left or right by one state on each
+step, equiprobably. Episodes terminate either on the extreme left or
+the extreme right, and get a reward of $+1$ if terminate on the right, or
+$0$ in the other case. In this task, the true value for each state is  the
+probability of starting from that state and terminating on the right
+\cite{Sutton2018book}.
+Thus, the true values of states from $A$ to $E$ are
+$\frac{1}{6},\frac{2}{6},\frac{3}{6},\frac{4}{6},\frac{5}{6}$, respectively.
+The discount factor $\gamma=1.0$. 
+There are three standard kinds of features for random-walk problems: tabular
+feature, inverted feature and dependent feature \cite{sutton2009fast}. 
+The feature matrices corresponding to three random walks are shown in Appendix \ref{experimentaldetails}.
+Conduct experiments using
+an on-policy approach in the Random-walk environment.
+\begin{figure}
+    \begin{center}
+    \input{pic/randomwalk.tex}
+    % \captionsetup{width=0.5\textwidth}
+    \caption{Random walk.}
+    \label{randomwalk}
+    \end{center}
+\end{figure}
+\begin{figure}
+    \begin{center}
+    \input{pic/BairdExample.tex}
+    \caption{7-state version of Baird's off-policy counterexample.}
+    \label{bairdexample}
+    \end{center}
+\end{figure}
+\textbf{Baird's off-policy counterexample:} This task is well known as a
+counterexample, in which TD diverges \cite{baird1995residual,sutton2009fast}. As
+shown in Figure \ref{bairdexample}, reward for each transition is zero. Thus the true values are zeros for all states and for any given policy. The behaviour policy
+chooses actions represented by solid lines with a probability of $\frac{1}{7}$
+and actions represented by dotted lines with a probability of $\frac{6}{7}$. The
+target policy is expected to choose the solid line with more probability than $\frac{1}{7}$,
+and it chooses the solid line with probability of $1$ in this paper.
+ The discount factor $\gamma =0.99$, and the feature matrix is
+defined in Appendix \ref{experimentaldetails} \cite{baird1995residual,sutton2009fast,maei2011gradient}.
+\textbf{Maze}:  The learning agent should find a shortest path from the upper
+left corner to the lower right corner. 
+ \begin{wrapfigure}{r}{4cm}
+\centering
+\includegraphics[scale=0.2]{pic/maze_13_13.pdf} 
+% \caption{The 2-state counterexample.}
+\end{wrapfigure}
+ In each state,
+there are four alternative actions: $up$, $down$, $left$, and $right$, which
+takes the agent deterministically to the corresponding neighbour state,
+except when a movement is blocked by an obstacle or the edge
+of the maze. Rewards are $-1$ in all transitions until the
+agent reaches the goal state.
+The discount factor $\gamma=0.99$, and states $s$ are represented by tabular
+features.The maximum number of moves in the game is set to 1000.
+\textbf{The other three control environments}: Cliff Walking, Mountain Car, and Acrobot are 
+selected from the gym official website and correspond to the following 
+versions: ``CliffWalking-v0'', ``MountainCar-v0'' and ``Acrobot-v1''. 
+For specific details, please refer to the gym official website.
+The maximum number of steps for the Mountain Car environment is set to 1000, 
+while the default settings are used for the other two environments. In  Mountain car and Acrobot, features are generated by tile coding.
+Please, refer to the Appendix \ref{experimentaldetails} for the selection of learning rates for all experiments.
+\subsection{Experimental Results and Analysis}
+\begin{figure}[htb]
+    \vskip 0.2in
+    \begin{center}
+    \subfigure[Dependent]{
+        \includegraphics[width=0.46\columnwidth, height=0.46\columnwidth]{pic/dependent_new.pdf}
+        \label{DependentFull}
+    }
+    \subfigure[Tabular]{
+        \includegraphics[width=0.46\columnwidth, height=0.46\columnwidth]{pic/tabular_new.pdf}
+        \label{TabularFull}
+    }
+    \\
+    \subfigure[Inverted]{
+        \includegraphics[width=0.46\columnwidth, height=0.46\columnwidth]{pic/inverted_new.pdf}
+        \label{InvertedFull}
+    }
+    \subfigure[counterexample]{
+        \includegraphics[width=0.46\columnwidth, height=0.46\columnwidth]{pic/counterexample_quanju_new.pdf}
+        \label{CounterExampleFull}
+    }
+        \caption{Learning curses of four evaluation environments.}
+        \label{Evaluation_full}
+    \end{center}
+    \vskip -0.2in
+\end{figure}
+\begin{figure*}[htb]
+    \vskip 0.2in
+    \begin{center}
+    \subfigure[Maze]{
+        \includegraphics[width=0.9\columnwidth, height=0.64\columnwidth]{pic/maze_complete.pdf}
+        \label{MazeFull}
+    }
+    \subfigure[Cliff Walking]{
+        \includegraphics[width=0.9\columnwidth, height=0.64\columnwidth]{pic/cw_complete.pdf}
+        \label{CliffWalkingFull}
+    }
+    \\
+    \subfigure[Mountain Car]{
+        \includegraphics[width=0.9\columnwidth, height=0.64\columnwidth]{pic/mt_complete.pdf}
+        \label{MountainCarFull}
+    }
+    \subfigure[Acrobot]{
+        \includegraphics[width=0.9\columnwidth, height=0.64\columnwidth]{pic/Acrobot_complete.pdf}
+        \label{AcrobotFull}
+    }
+        \caption{Learning curses of four contral environments.}
+        \label{Complete_full}
+    \end{center}
+    \vskip -0.2in
+\end{figure*}
+\begin{table*}
+    \centering
+    \caption{Difference between R-learning and tabular VMQ.}
+    \vskip 0.15in
+    \begin{tabular}{c|cc}
+        \hline
+        algorithms&update formula \\
+        \hline
+         R-learning&$Q_{k+1}(s,a)\leftarrow Q_{k}(s,a)+\alpha_k(r_{k+1}-m_{k}+ \max_{b\in A}Q_{k}(s,b) - Q_{k}(s,a))$\\
+                &$m_{k+1}\leftarrow m_{k}+\beta_k(r_{k+1}+\max_{b\in A}Q_{k}(s,b) - Q_{k}(s,a)-m_{k})$\\
+         tabular VMQ&$Q_{k+1}(s,a)\leftarrow Q_{k}(s,a)+\alpha_k(r_{k+1}+\gamma \max_{b\in A}Q_{k}(s,b) - Q_{k}(s,a)-\omega_k)$\\
+         &$\omega_{k+1}\leftarrow \omega_{k}+\beta_k(r_{k+1}+\gamma \max_{b\in A}Q_{k}(s,b) - Q_{k}(s,a)-\omega_{k})$\\
+         \hline
+    \end{tabular}
+    \label{differenceRandVMQ}
+    \vskip -0.1in
+\end{table*}
+% The learning rates of all algorithms in different environments are shown in Table \ref{lrofways}. 
+% Figure \ref{Complete_full} shows the experimental curves of different algorithms in four environments.
+For policy evaluation experiments, compare the performance of the VMTD, 
+VMTDC, TD, and TDC algorithms. 
+The vertical axis is unified as RVBE.
+For policy evaluation experiments, the criteria for evaluating 
+  algorithms vary. The objective function minimized by our proposed 
+  new algorithm differs from that of other algorithms. Therefore, to 
+ensure fairness in comparisons, this study only contrasts algorithm 
+experiments in controlled settings.
+This study will compare the performance of Sarsa, Q-learning, GQ(0), 
+  AC, VMSarsa, VMQ, and VMGQ(0) in four control environments.
+% All experiments involved in this paper were run independently for 100 times.
+The learning curves of the algorithms corresponding to 
+policy evaluation experiments and control experiments are 
+shown in Figures \ref{Evaluation_full} and \ref{Complete_full}, respectively.
+The shaded area in Figure \ref{Evaluation_full}, \ref{Complete_full} represents the standard deviation (std).
+In the random-walk tasks, VMTD and VMTDC exhibit excellent performance, 
+outperforming TD and TDC in the case of dependent random-walk.
+In the 7-state example counter task, TD diverges, 
+while VMTDC converges and performs better than TDC. 
+From the update formula, it can be observed that the VMTD algorithm, like TDC,  
+is also an adjustment or correction of the TD update.
+What is more surprising is that VMTD also maintains 
+convergence and demonstrates the best performance.
+In  Maze, Mountain Car, and Acrobot, 
+the convergence speed of VMSarsa, VMQ, and VMGQ(0) has 
+been significantly improved compared to Sarsa, Q-learning, 
+and GQ(0), respectively. The performance of the AC algorithm 
+is at an intermediate level. The performances of VMSarsa, 
+VMQ, and VMGQ(0) in these three experimental environments 
+have no significant differences.
+In  Cliff Walking, Sarsa and 
+VMSarsa converge to slightly worse solutions compared to 
+other algorithms. The convergence speed of VMSarsa is significantly 
+better than that of Sarsa. The convergence speed of VMGQ(0) and VMQ 
+is better than other algorithms, and the performance of VMGQ(0) is 
+slightly better than that of VMQ.
+In summary, the performance of VMSarsa, 
+VMQ, and VMGQ(0) is better than that of other algorithms. 
+In the Cliff Walking environment, 
+the performance of VMGQ(0) is slightly better than that of 
+VMSarsa and VMQ. In the other three experimental environments, 
+the performances of VMSarsa, VMQ, and VMGQ(0) are close.
+\section{Related Work}
+\subsection{Difference between VMQ and R-learning}
+Tabular VMQ's update formula bears some resemblance 
+to R-learning's update formula. As shown in Table \ref{differenceRandVMQ}, the update formulas of the two algorithms have the following differences:
+\\(1) The goal of the R-learning algorithm \cite{schwartz1993reinforcement} is to maximize the average 
+reward, rather than the cumulative reward, by learning an estimate 
+of the average reward. This estimate $m$ is then used to update the Q-values.
+On the contrary, the $\omega$ in the tabular VMQ update formula eventually converges to $\mathbb{E}[\delta]$.
+\\(2) When $\gamma=1$ in the tabular VMQ update formula, the 
+R-learning update formula is formally 
+the same as the tabular VMQ update formula. 
+Therefore, R-learning algorithm can be 
+considered as a special case of VMQ algorithm in form.
+\subsection{Variance Reduction for TD Learning}
+ The TD with centering algorithm (CTD) \cite{korda2015td} 
+was proposed, which directly applies variance reduction techniques to 
+the TD algorithm. The CTD algorithm updates its parameters using the 
+average gradient of a batch of Markovian samples and a projection operator. 
+Unfortunately, the authors’ analysis of the CTD algorithm contains technical 
+errors. The VRTD algorithm \cite{xu2020reanalysis} is also a variance-reduced algorithm that updates 
+its parameters using the average gradient of a batch of i.i.d. samples. The 
+authors of VRTD provide a technically sound analysis to demonstrate the 
+advantages of variance reduction. 
+\subsection{Variance Reduction for Policy Gradient Algorithms}
+Policy gradient algorithms are a class of reinforcement 
+learning algorithms that directly optimize cumulative rewards. 
+REINFORCE  is a Monte Carlo algorithm that estimates 
+gradients through sampling, but may have a high variance. 
+Baselines are introduced to reduce variance and to
+accelerate learning \cite{Sutton2018book}. In  Actor-Critic, 
+value function as a baseline and bootstrapping 
+ are used to reduce variance, also accelerating convergence \cite{Sutton2018book}.
+ TRPO \cite{schulman2015trust} and PPO \cite{schulman2017proximal}
+  use generalized advantage 
+estimation, which combines multi-step bootstrapping and Monte Carlo 
+estimation to reduce variance, making gradient estimation more stable and 
+accelerating convergence. 
+In Variance Minimization, 
+the incorporation of $\omega \doteq \mathbb{E}[\delta]$ 
+bears a striking resemblance to the use of a baseline 
+in policy gradient methods. The introduction of a baseline 
+in policy gradient techniques does not alter 
+the expected value of the update; 
+rather, it significantly impacts the variance of gradient estimation. 
+The addition of $\omega \doteq \mathbb{E}[\delta]$ in Variance Minimization 
+ preserves the invariance of the optimal 
+policy while stabilizing gradient estimation, 
+reducing the variance of gradient estimation, 
+and hastening convergence.
+\section{Conclusion and Future Work}
+Value-based reinforcement learning typically aims 
+to minimize error as an optimization objective. 
+As an alternation, this study proposes two new objective 
+functions: VBE and VPBE, and derives an on-policy algorithm: 
+VMTD and an off-policy algorithm: VMTDC. 
+% The VMTD algorithm 
+% is essentially an adjustment or correction to the traditional 
+% TD update. 
+%  Both 
+% algorithms are capable of stabilizing gradient estimation, reducing 
+% the variance of gradient estimation and accelerating convergence.
+Both algorithms demonstrated superior performance in policy 
+evaluation and control experiments.
+Future work may include, but are not limited
+to, (1) analysis of the convergence rate of VMTDC. 
+(2) extensions of VBE and VPBE to multi-step returns. 
+(3) extensions to nonlinear approximations, such as neural networks. 
+% \section{Format of the Paper}
+% All submissions must follow the specified format.
+% \begin{figure}[ht]
+% \vskip 0.2in
+% \begin{center}
+% \centerline{\includegraphics[width=\columnwidth]{icml_numpapers}}
+% \caption{Historical locations and number of accepted papers for International
+% Machine Learning Conferences (ICML 1993 -- ICML 2008) and International
+% Workshops on Machine Learning (ML 1988 -- ML 1992). At the time this figure was
+% produced, the number of accepted papers for ICML 2008 was unknown and instead
+% estimated.}
+% \label{icml-historical}
+% \end{center}
+% \vskip -0.2in
+% \end{figure}
+% \subsection{Figures}
+% You may want to include figures in the paper to illustrate
+% your approach and results. Such artwork should be centered,
+% legible, and separated from the text. Lines should be dark and at
+% least 0.5~points thick for purposes of reproduction, and text should
+% not appear on a gray background.
+% Label all distinct components of each figure. If the figure takes the
+% form of a graph, then give a name for each axis and include a legend
+% that briefly describes each curve. Do not include a title inside the
+% figure; instead, the caption should serve this function.
+% Number figures sequentially, placing the figure number and caption
+% \emph{after} the graphics, with at least 0.1~inches of space before
+% the caption and 0.1~inches after it, as in
+% \cref{icml-historical}. The figure caption should be set in
+% 9~point type and centered unless it runs two or more lines, in which
+% case it should be flush left. You may float figures to the top or
+% bottom of a column, and you may set wide figures across both columns
+% (use the environment \texttt{figure*} in \LaTeX). Always place
+% two-column figures at the top or bottom of the page.
+% \subsection{Theorems and such}
+% The preferred way is to number definitions, propositions, lemmas, etc. consecutively, within sections, as shown below.
+% \begin{definition}
+% \label{def:inj}
+% A function $f:X \to Y$ is injective if for any $x,y\in X$ different, $f(x)\ne f(y)$.
+% \end{definition}
+% Using \cref{def:inj} we immediate get the following result:
+% \begin{proposition}
+% If $f$ is injective mapping a set $X$ to another set $Y$, 
+% the cardinality of $Y$ is at least as large as that of $X$
+% \end{proposition}
+% \begin{proof} 
+% Left as an exercise to the reader. 
+% \end{proof}
+% \cref{lem:usefullemma} stated next will prove to be useful.
+% \begin{lemma}
+% \label{lem:usefullemma}
+% For any $f:X \to Y$ and $g:Y\to Z$ injective functions, $f \circ g$ is injective.
+% \end{lemma}
+% \begin{theorem}
+% \label{thm:bigtheorem}
+% If $f:X\to Y$ is bijective, the cardinality of $X$ and $Y$ are the same.
+% \end{theorem}
+% An easy corollary of \cref{thm:bigtheorem} is the following:
+% \begin{corollary}
+% If $f:X\to Y$ is bijective, 
+% the cardinality of $X$ is at least as large as that of $Y$.
+% \end{corollary}
+% \begin{assumption}
+% The set $X$ is finite.
+% \label{ass:xfinite}
+% \end{assumption}
+% \begin{remark}
+% According to some, it is only the finite case (cf. \cref{ass:xfinite}) that is interesting.
+% \end{remark}
+%restatable
+% In the unusual situation where you want a paper to appear in the
+% references without citing it in the main text, use \nocite
+\nocite{langley00}
+\bibliography{example_paper}
+\bibliographystyle{icml2024}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% APPENDIX
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\newpage
+\appendix
+\onecolumn
+\section{Relevant proofs}
+\subsection{Proof of Corollary \ref{corollary4_2}}
+\label{proofcorollary4_2}
+The update formulas in linear two-timescale algorithms are as follows:
+\begin{equation}
+    \theta_{k+1}=\theta_{k} + \alpha_{k}[h_1(\theta_{k},\omega_{k})+M^{(1)}_{k+1}],
+\end{equation}
+\begin{equation}
+    \omega_{k+1}=\omega_{k} + \alpha_{k}[h_2(\theta_{k},\omega_{k})+M^{(2)}_{k+1}].
+\end{equation}
+where $\alpha_k, \beta_k \in \mathbb{R} $ are stepsizes and $M^{(1)} \in \mathbb{R}^{d_1}, M^{(2)} \in \mathbb{R}^{d_2}$
+denote noise. 
+$h_1 : \mathbb{R}^{d_{1}}\times \mathbb{R}^{d_{2}}\rightarrow \mathbb{R}^{d_{1}}$ and 
+$h_2 : \mathbb{R}^{d_{1}}\times \mathbb{R}^{d_{2}}\rightarrow \mathbb{R}^{d_{2}}$ have the 
+form, respectively, 
+\begin{equation}
+    h_{1}(\theta,\omega)=v_1 - \Gamma_1 \theta - W_1\omega,
+\end{equation}
+\begin{equation}
+    h_{2}(\theta,\omega)=v_2 - \Gamma_2 \theta - W_2\omega,
+\end{equation}
+where $v_1 \in \mathbb{R}^{d_1}$, $v_2 \in \mathbb{R}^{d_2}$, $\Gamma_1 \in \mathbb{R}^{d_1 \times d_1}$
+, $\Gamma_2 \in \mathbb{R}^{d_2 \times d_1}$, $W_1 \in \mathbb{R}^{d_1 \times d_2}$ and 
+$W_2 \in \mathbb{R}^{d_2 \times d_2}$. $d_1$ and $d_2$ are the dimensions of vectors $\theta$ and $\omega$, respectively.
+For Theorem 3 in \cite{dalal2020tale}, the theorem still holds even when $d——1$ is not equal to $d_2$. For the VMTD algorithm, $d_2$ is equal to 1.
+% Before proving the Corollary \ref{corollary4_2}, 
+\cite{dalal2020tale} presents 
+the matrix assumption, step size assumption, and 
+defines sparse projection.
+\begin{assumption}
+\label{matrixassumption}
+(Matrix Assumption).
+$W_2$ and $X_1 = \Gamma_1 - W_1 W_{2}^{-1}\Gamma_2$ are positive definite(not necessarily symmetric).
+\end{assumption}
+\begin{assumption}
+    \label{stepsizeassumption}
+(Step Size Assumption).
+$\alpha_k = (k+1)^{-\alpha}$ and $\beta_k = (k+1)^{-\beta}$, where $1>\alpha > \beta > 0$.
+\end{assumption}
+\begin{definition}
+    \label{sparseprojection}
+(Sparse Projection).
+For $R>0$, let $\Pi_{R}(x)=\min \{1, R/||x||\}$. $x$ be the projection into the ball with redius
+R around the origin. The sparse projection operator
+\begin{equation*}
+    \Pi_{n, R} = \begin{cases}
+        \Pi_{R}, & \text{if } k = n^{n} - 1 \text{ for some } n \in \mathbb{Z}_{>0}, \\
+        I, & \text{otherwise}.
+    \end{cases}
+\end{equation*}
+We call it sparse as it projects only on specific indices that are exponentially far apart.
+Pick an arbitrary $p>1$. Fix some constant $R^{\theta}_{\text{proj}}>0$ and $R^{\omega}_{\text{proj}}>0$ 
+for the radius of the projection ball. Further, let 
+\begin{equation*}
+    \theta^{*}=X^{-1}_{1}b_{1}, \omega^{*}=W^{-1}_{2}(v_2 - \Gamma_2 \theta^{*})
+\end{equation*}
+with $b_1=v_1 - W_1 W_2^{-1}v_2$.
+The formula for the sparse projection update in linear two-timescale algorithms is as follows:
+\begin{equation}
+    \label{sparseprojectiontheta}
+    \theta'_{k+1}=\Pi_{k+1,R^{\theta}_{\text{proj}}}(\theta'_{k} + \alpha_{k}[h_1(\theta'_{k},\omega'_{k})+M^{(1')}_{k+1}]),
+\end{equation}
+\begin{equation}
+    \label{sparseprojectionomega}
+    \omega'_{k+1}=\Pi_{k+1,R^{\omega}_{\text{proj}}}(\omega'_{k} + \beta_{k}[h_2(\theta'_{k},\omega'_{k})+M^{(2')}_{k+1}]).
+\end{equation}
+\end{definition}
+\begin{proof}
+    As long as the VMTD algorithm satisfies Assumption \ref{matrixassumption}, 
+the convergence speed of the VMTD algorithm can be 
+obtained.
+VMTD's update rule is 
+\begin{equation*}
+    \theta_{k+1}=\theta_{k}+\alpha_k(\delta_k-\omega_k)\phi_k.
+\end{equation*}
+\begin{equation*}
+        \omega_{k+1}=\omega_{k}+\beta_k(\delta_k-\omega_k).
+\end{equation*}
+Thus, $h_1(\theta, \omega)=\mathrm{Cov}(r,\phi)-\mathrm{Cov}(\phi,\phi - \gamma\phi')\theta$, 
+$h_2(\theta, \omega)=\mathbb{E}[r]+\mathbb{E}[\gamma \phi'^{\top}-\phi^{\top}]\theta -\omega$, 
+$\Gamma_1 =\mathrm{Cov}(\phi,\phi - \gamma\phi')$, 
+$W_1 = 0$ and 
+$\Gamma_2 = -\mathbb{E}[\gamma \phi'^{\top}-\phi^{\top}]$, 
+$W_2 = 1$, 
+$v_2 = \mathbb{E}[r]$. Additionally, 
+$X_1=\Gamma_1 - W_1 W^{-1}_2 \Gamma_2 = \mathrm{Cov}(\phi,\phi - \gamma\phi')$.
+% By the Assumption \ref{matrixassumption}, 
+It can be deduced from the proof \ref{th1proof} that $X_1$ is a positive definite matrix.
+The VMTD algorithm satisfies the Assumption \ref{matrixassumption}.
+By the proof \ref{th1proof}, Definition 1 in \cite{dalal2020tale} is satisfied.
+We can apply the Theorem 3 in \cite{dalal2020tale} to get the Corollary \ref{corollary4_2}.
+\end{proof}
+\subsection{Proof of Theorem \ref{theorem2}}
+\label{proofth2}
+\begin{proof}
+The proof is similar to that given by \cite{sutton2009fast} for TDC, but it is based on multi-time-scale stochastic approximation.
+For the VMTDC algorithm, a new one-step linear TD solution is defined as:
+\begin{equation*}
+    0=\mathbb{E}[(\phi - \gamma \phi' - \mathbb{E}[\phi - \gamma \phi'])\phi^\top]\mathbb{E}[\phi \phi^{\top}]^{-1}\mathbb{E}[(\delta -\mathbb{E}[\delta])\phi]=A^{\top}C^{-1}(-A\theta+b).
+\end{equation*}
+The matrix $A^{\top}C^{-1}A$ is positive definite. Thus, the  VMTD's solution is
+$\theta_{\text{VMTDC}}=\theta_{\text{VMTD}}=A^{-1}b$.
+First, note that recursion (\ref{thetavmtdc}) and (\ref{uvmtdc}) can be rewritten as, respectively, 
+\begin{equation*}
+    \theta_{k+1}\leftarrow \theta_k+\zeta_k x(k),
+\end{equation*}
+\begin{equation*}
+    u_{k+1}\leftarrow u_k+\beta_k y(k),
+\end{equation*}
+where 
+\begin{equation*}
+    x(k)=\frac{\alpha_k}{\zeta_k}[(\delta_{k}- \omega_k) \phi_k - \gamma\phi'_{k}(\phi^{\top}_k u_k)],
+\end{equation*}
+\begin{equation*}
+    y(k)=\frac{\zeta_k}{\beta_k}[\delta_{k}-\omega_k - \phi^{\top}_k u_k]\phi_k.
+\end{equation*}
+Recursion (\ref{thetavmtdc}) can also be rewritten as
+\begin{equation*}
+    \theta_{k+1}\leftarrow \theta_k+\beta_k z(k),
+\end{equation*}
+where
+\begin{equation*}
+    z(k)=\frac{\alpha_k}{\beta_k}[(\delta_{k}- \omega_k) \phi_k - \gamma\phi'_{k}(\phi^{\top}_k u_k)],
+\end{equation*}
+Due to the settings of step-size schedule 
+$\alpha_k = o(\zeta_k)$, $\zeta_k = o(\beta_k)$, $x(k)\rightarrow 0$, $y(k)\rightarrow 0$, $z(k)\rightarrow 0$ almost surely as $k\rightarrow 0$.
+That is that the increments in iteration (\ref{omegavmtdc}) are uniformly larger than
+those in (\ref{uvmtdc}) and  the increments in iteration (\ref{uvmtdc}) are uniformly larger than
+those in (\ref{thetavmtdc}), thus (\ref{omegavmtdc}) is the fastest recursion, (\ref{uvmtdc}) is the second fast recursion and (\ref{thetavmtdc}) is the slower recursion.
+Along the fastest time scale, iterations of (\ref{thetavmtdc}), (\ref{uvmtdc}) and (\ref{omegavmtdc})
+are associated to ODEs system as follows:
+\begin{equation}
+    \dot{\theta}(t) = 0,
+    \label{thetavmtdcFastest}
+\end{equation}
+\begin{equation}
+    \dot{u}(t) = 0,
+    \label{uvmtdcFastest}
+\end{equation}
+\begin{equation}
+    \dot{\omega}(t)=\mathbb{E}[\delta_t|u(t),\theta(t)]-\omega(t).
+    \label{omegavmtdcFastest}
+\end{equation}
+Based on the ODE (\ref{thetavmtdcFastest}) and (\ref{uvmtdcFastest}), both $\theta(t)\equiv \theta$
+and $u(t)\equiv u$ when viewed from the fastest timescale.
+By the Hirsch lemma \cite{hirsch1989convergent}, it follows that
+$||\theta_k-\theta||\rightarrow 0$ a.s. as $k\rightarrow \infty$ for some
+$\theta$ that depends on the initial condition $\theta_0$ of recursion
+(\ref{thetavmtdc}) and $||u_k-u||\rightarrow 0$ a.s. as $k\rightarrow \infty$ for some
+$u$ that depends on the initial condition $u_0$ of recursion
+(\ref{uvmtdc}). Thus, the ODE pair (\ref{thetavmtdcFastest})-(ref{omegavmtdcFastest})
+can be written as 
+\begin{equation}
+    \dot{\omega}(t)=\mathbb{E}[\delta_t|u,\theta]-\omega(t).
+    \label{omegavmtdcFastestFinal}
+\end{equation}
+Consider the function $h(\omega)=\mathbb{E}[\delta|\theta,u]-\omega$,
+i.e., the driving vector field of the ODE (\ref{omegavmtdcFastestFinal}).
+It is easy to find that the function $h$ is Lipschitz with coefficient
+$-1$.
+Let $h_{\infty}(\cdot)$ be the function defined by
+ $h_{\infty}(\omega)=\lim_{r\rightarrow \infty}\frac{h(r\omega)}{r}$.
+ Then  $h_{\infty}(\omega)= -\omega$,  is well-defined. 
+ For (\ref{omegavmtdcFastestFinal}), $\omega^*=\mathbb{E}[\delta|\theta,u]$
+is the unique globally asymptotically stable equilibrium.
+For the ODE
+\begin{equation}
+ \dot{\omega}(t) = h_{\infty}(\omega(t))= -\omega(t),
+ \label{omegavmtdcInfty}
+\end{equation}
+apply $\vec{V}(\omega)=(-\omega)^{\top}(-\omega)/2$ as its
+associated strict Liapunov function. Then,
+the origin of (\ref{omegavmtdcInfty}) is a globally asymptotically stable
+equilibrium.
+Consider now the recursion (\ref{omegavmtdc}).
+Let
+$M_{k+1}=(\delta_k-\omega_k)
+-\mathbb{E}[(\delta_k-\omega_k)|\mathcal{F}(k)]$,
+where $\mathcal{F}(k)=\sigma(\omega_l,u_l,\theta_l,l\leq k;\phi_s,\phi_s',r_s,s<k)$, 
+$k\geq 1$ are the sigma fields
+generated by $\omega_0,u_0,\theta_0,\omega_{l+1},u_{l+1},\theta_{l+1},\phi_l,\phi_l'$,
+$0\leq l<k$.
+It is easy to verify that $M_{k+1},k\geq0$ are integrable random variables that 
+satisfy $\mathbb{E}[M_{k+1}|\mathcal{F}(k)]=0$, $\forall k\geq0$.
+Because $\phi_k$, $r_k$, and $\phi_k'$   have
+uniformly bounded second moments, it can be seen that for some constant
+$c_1>0$, $\forall k\geq0$,
+\begin{equation*}
+\mathbb{E}[||M_{k+1}||^2|\mathcal{F}(k)]\leq
+c_1(1+||\omega_k||^2+||u_k||^2+||\theta_k||^2).
+\end{equation*}
+Now Assumptions (A1) and (A2) of \cite{borkar2000ode} are verified.
+Furthermore, Assumptions (TS) of \cite{borkar2000ode} is satisfied by our
+conditions on the step-size sequences $\alpha_k$,$\zeta_k$, $\beta_k$. Thus,
+by Theorem 2.2 of \cite{borkar2000ode} we obtain that
+$||\omega_k-\omega^*||\rightarrow 0$ almost surely as $k\rightarrow \infty$.
+Consider now the second time scale recursion (\ref{uvmtdc}).
+Based on the above analysis, (\ref{uvmtdc}) can be rewritten as
+% \begin{equation*}
+%     u_{k+1}\leftarrow u_{k}+\zeta_{k}[\delta_{k}-\mathbb{E}[\delta_k|u_k,\theta_k] - \phi^{\top} (s_k) u_k]\phi(s_k).
+% \end{equation*}
+\begin{equation}
+    \dot{\theta}(t) = 0,
+    \label{thetavmtdcFaster}
+\end{equation}
+\begin{equation}
+    \dot{u}(t) = \mathbb{E}[(\delta_t-\mathbb{E}[\delta_t|u(t),\theta(t)])\phi_t|\theta(t)] - Cu(t).
+    \label{uvmtdcFaster}
+\end{equation}
+The ODE (\ref{thetavmtdcFaster}) suggests that $\theta(t)\equiv \theta$ (i.e., a time invariant parameter)
+when viewed from the second fast timescale.
+By the Hirsch lemma \cite{hirsch1989convergent}, it follows that
+$||\theta_k-\theta||\rightarrow 0$ a.s. as $k\rightarrow \infty$ for some
+$\theta$ that depends on the initial condition $\theta_0$ of recursion
+(\ref{thetavmtdc}). 
+Consider now the recursion (\ref{uvmtdc}).
+Let
+$N_{k+1}=((\delta_k-\mathbb{E}[\delta_k]) - \phi_k \phi^{\top}_k u_k) -\mathbb{E}[((\delta_k-\mathbb{E}[\delta_k]) - \phi_k \phi^{\top}_k u_k)|\mathcal{I} (k)]$,
+where $\mathcal{I}(k)=\sigma(u_l,\theta_l,l\leq k;\phi_s,\phi_s',r_s,s<k)$, 
+$k\geq 1$ are the sigma fields
+generated by $u_0,\theta_0,u_{l+1},\theta_{l+1},\phi_l,\phi_l'$,
+$0\leq l<k$.
+It is easy to verify that $N_{k+1},k\geq0$ are integrable random variables that 
+satisfy $\mathbb{E}[N_{k+1}|\mathcal{I}(k)]=0$, $\forall k\geq0$.
+Because $\phi_k$, $r_k$, and $\phi_k'$   have
+uniformly bounded second moments, it can be seen that for some constant
+$c_2>0$, $\forall k\geq0$,
+\begin{equation*}
+\mathbb{E}[||N_{k+1}||^2|\mathcal{I}(k)]\leq
+c_2(1+||u_k||^2+||\theta_k||^2).
+\end{equation*}
+Because $\theta(t)\equiv \theta$ from (\ref{thetavmtdcFaster}), the ODE pair (\ref{thetavmtdcFaster})-(\ref{uvmtdcFaster})
+can be written as 
+\begin{equation}
+    \dot{u}(t) = \mathbb{E}[(\delta_t-\mathbb{E}[\delta_t|\theta])\phi_t|\theta] - Cu(t).
+    \label{uvmtdcFasterFinal}
+\end{equation}
+Now consider the function $h(u)=\mathbb{E}[\delta_t-\mathbb{E}[\delta_t|\theta]|\theta] -Cu$, i.e., the
+driving vector field of the ODE (\ref{uvmtdcFasterFinal}). For (\ref{uvmtdcFasterFinal}),
+$u^* = C^{-1}\mathbb{E}[(\delta-\mathbb{E}[\delta|\theta])\phi|\theta]$ is the unique globally asymptotically
+stable equilibrium. Let $h_{\infty}(u)=-Cu$.
+For the ODE
+\begin{equation}
+    \dot{u}(t) = h_{\infty}(u(t))= -Cu(t),
+    \label{uvmtdcInfty}
+\end{equation}
+the origin of (\ref{uvmtdcInfty}) is a globally asymptotically stable
+equilibrium because $C$ is a positive definite matrix (because it is nonnegative definite and nonsingular).
+Now Assumptions (A1) and (A2) of \cite{borkar2000ode} are verified.
+Furthermore, Assumptions (TS) of \cite{borkar2000ode} is satisfied by our
+conditions on the step-size sequences $\alpha_k$,$\zeta_k$, $\beta_k$. Thus,
+by Theorem 2.2 of \cite{borkar2000ode} we obtain that
+$||u_k-u^*||\rightarrow 0$ almost surely as $k\rightarrow \infty$.
+Consider now the slower timescale recursion (\ref{thetavmtdc}). In the light of the above,
+(\ref{thetavmtdc}) can be rewritten as 
+\begin{equation}
+    \theta_{k+1} \leftarrow \theta_{k} + \alpha_k (\delta_k -\mathbb{E}[\delta_k|\theta_k]) \phi_k\\
+    - \alpha_k \gamma\phi'_{k}(\phi^{\top}_k C^{-1}\mathbb{E}[(\delta_k -\mathbb{E}[\delta_k|\theta_k])\phi|\theta_k]).
+\end{equation}
+Let $\mathcal{G}(k)=\sigma(\theta_l,l\leq k;\phi_s,\phi_s',r_s,s<k)$, 
+$k\geq 1$ be the sigma fields
+generated by $\theta_0,\theta_{l+1},\phi_l,\phi_l'$,
+$0\leq l<k$. Let
+\begin{equation*}
+    \begin{array}{ccl}
+    Z_{k+1}&=&((\delta_k -\mathbb{E}[\delta_k|\theta_k]) \phi_k - \gamma \phi'_{k}\phi^{\top}_k C^{-1}\mathbb{E}[(\delta_k -\mathbb{E}[\delta_k|\theta_k])\phi|\theta_k])\\ 
+     & &-\mathbb{E}[((\delta_k -\mathbb{E}[\delta_k|\theta_k]) \phi_k - \gamma \phi'_{k}\phi^{\top}_k C^{-1}\mathbb{E}[(\delta_k -\mathbb{E}[\delta_k|\theta_k])\phi|\theta_k])|\mathcal{G}(k)]\\
+    &=&((\delta_k -\mathbb{E}[\delta_k|\theta_k]) \phi_k - \gamma \phi'_{k}\phi^{\top}_k C^{-1}\mathbb{E}[(\delta_k -\mathbb{E}[\delta_k|\theta_k])\phi|\theta_k])\\
+    & &-\mathbb{E}[(\delta_k -\mathbb{E}[\delta_k|\theta_k]) \phi_k|\theta_k] - \gamma\mathbb{E}[\phi' \phi^{\top}]C^{-1}\mathbb{E}[(\delta_k -\mathbb{E}[\delta_k|\theta_k]) \phi_k|\theta_k].
+    \end{array}
+\end{equation*}
+It is easy to see that $Z_k$, $k\geq 0$ are integrable random variables and $\mathbb{E}[Z_{k+1}|\mathcal{G}(k)]=0$, $\forall k\geq0$. Further,
+\begin{equation*}
+\mathbb{E}[||Z_{k+1}||^2|\mathcal{G}(k)]\leq
+c_3(1+||\theta_k||^2), k\geq 0
+\end{equation*}
+for some constant $c_3 \geq 0$, again beacuse $\phi_k$, $r_k$, and $\phi_k'$   have
+uniformly bounded second moments, it can be seen that for some constant.
+Consider now the following ODE associated with (\ref{thetavmtdc}):
+\begin{equation}
+    \dot{\theta}(t) = (I - \mathbb{E}[\gamma \phi' \phi^{\top}]C^{-1})\mathbb{E}[(\delta -\mathbb{E}[\delta|\theta(t)]) \phi|\theta(t)].
+    \label{thetavmtdcSlowerFinal}
+\end{equation}
+Let 
+\begin{equation*}
+\begin{array}{ccl}
+    \vec{h}(\theta(t))&=&(I - \mathbb{E}[\gamma \phi' \phi^{\top}]C^{-1})\mathbb{E}[(\delta -\mathbb{E}[\delta|\theta(t)]) \phi|\theta(t)]\\
+    &=&(C - \mathbb{E}[\gamma \phi' \phi^{\top}])C^{-1}\mathbb{E}[(\delta -\mathbb{E}[\delta|\theta(t)]) \phi|\theta(t)]\\
+    &=& (\mathbb{E}[\phi \phi^{\top}] - \mathbb{E}[\gamma \phi' \phi^{\top}])C^{-1}\mathbb{E}[(\delta -\mathbb{E}[\delta|\theta(t)]) \phi|\theta(t)]\\
+    &=& A^{\top}C^{-1}(-A\theta(t)+b),
+\end{array}
+\end{equation*}
+because $\mathbb{E}[(\delta -\mathbb{E}[\delta|\theta(t)]) \phi|\theta(t)]=-A\theta(t)+b$, where 
+$A = \mathrm{Cov}(\phi,\phi-\gamma\phi')$, $b=\mathrm{Cov}(r,\phi)$, and $C=\mathbb{E}[\phi\phi^{\top}]$
+Therefore,
+$\theta^*=A^{-1}b$ can be seen to be the unique globally asymptotically
+stable equilibrium for ODE (\ref{thetavmtdcSlowerFinal}).
+Let $\vec{h}_{\infty}(\theta)=\lim_{r\rightarrow
+\infty}\frac{\vec{h}(r\theta)}{r}$. Then
+$\vec{h}_{\infty}(\theta)=-A^{\top}C^{-1}A\theta$ is well-defined. 
+Consider now the ODE
+\begin{equation}
+\dot{\theta}(t)=-A^{\top}C^{-1}A\theta(t).
+\label{odethetavmtdcfinal}
+\end{equation}
+Because $C^{-1}$ is positive definite and $A$ has full rank (as it
+is nonsingular by assumption), the matrix $A^{\top} C^{-1}A$ is also
+positive definite. 
+The ODE (\ref{odethetavmtdcfinal}) has the origin as its unique globally asymptotically stable equilibrium.
+Thus, the assumption (A1) and (A2) are verified.
+The proof is given above.
+In the fastest time scale, the parameter $w$ converges to
+$\mathbb{E}[\delta|u_k,\theta_k]$.
+In the second fast time scale,
+the parameter $u$ converges to $C^{-1}\mathbb{E}[(\delta-\mathbb{E}[\delta|\theta_k])\phi|\theta_k]$.
+In the slower time scale,
+the parameter $\theta$ converges to $A^{-1}b$.
+\end{proof}
+\begin{algorithm}[t]
+    \caption{VMTDC algorithm with linear function approximation in the off-policy setting}
+    \label{alg:algorithm 2}
+\begin{algorithmic}
+    \STATE {\bfseries Input:} $\theta_{0}$, $u_0$, $\omega_{0}$, $\gamma
+    $, learning rate $\alpha_t$, $\zeta_t$ and $\beta_t$, behavior policy $\mu$ and target policy $\pi$
+    \REPEAT
+    \STATE For any episode, initialize $\theta_{0}$ arbitrarily, $u_t$ and $\omega_{0}$ to $0$, $\gamma \in (0,1]$, and $\alpha_t$, $\zeta_t$ and $\beta_t$ are constant.\\
+    \textbf{Output}: $\theta^*$.\\
+    \FOR{$t=0$ {\bfseries to} $T-1$}
+    \STATE Take $A_t$ from $S_t$ according to $\mu$, and arrive at $S_{t+1}$\\
+    \STATE Observe sample ($S_t$,$R_{t+1}$,$S_{t+1}$) at time step $t$ (with their corresponding state feature vectors)\\
+    \STATE $\delta_t = R_{t+1}+\gamma\theta_t^{\top}\phi_{t+1}-\theta_t^{\top}\phi_t$
+    \STATE $\rho_{t} \leftarrow \frac{\pi(A_t | S_t)}{\mu(A_t | S_t)}$
+    \STATE $\theta_{t+1}\leftarrow \theta_{t}+\alpha_t[\rho_t (\delta_t-\omega_t)\phi_t - \gamma \phi_{t+1}(\phi^{\top}_{t} u_t)]$
+    \STATE $u_{t+1}\leftarrow u_{t}+\zeta_t[\rho_t(\delta_t-\omega_t) - \phi^{\top}_{t} u_t] \phi_t$
+    \STATE $\omega_{t+1}\leftarrow \omega_{t}+\beta_t \rho_t(\delta_t-\omega_t)$
+    \STATE $S_t=S_{t+1}$
+    \ENDFOR
+    \UNTIL{terminal episode}
+\end{algorithmic}
+\end{algorithm}
+\section{Experimental details}
+\label{experimentaldetails}
+The feature matrices corresponding to three random walks are shown below respectively:
+\begin{equation*}
+    \Phi_{tabular}=\left[ 
+    \begin{array}{ccccc}
+    1 & 0& 0& 0& 0\\
+    0 & 1& 0& 0& 0\\
+    0 & 0& 1& 0& 0\\
+    0 & 0& 0& 1& 0\\
+    0 & 0& 0& 0& 1
+    \end{array}\right]
+    \end{equation*}
+    \begin{equation*}
+    \Phi_{inverted}=\left[ 
+    \begin{array}{ccccc}
+    0 & \frac{1}{2}& \frac{1}{2}& \frac{1}{2}& \frac{1}{2}\\
+    \frac{1}{2} & 0& \frac{1}{2}& \frac{1}{2}& \frac{1}{2}\\
+    \frac{1}{2} & \frac{1}{2}& 0& \frac{1}{2}& \frac{1}{2}\\
+    \frac{1}{2} & \frac{1}{2}& \frac{1}{2}& 0& \frac{1}{2}\\
+    \frac{1}{2} & \frac{1}{2}& \frac{1}{2}& \frac{1}{2}& 0
+    \end{array}\right]
+    \end{equation*}
+    \begin{equation*}
+    \Phi_{dependent}=\left[ 
+    \begin{array}{ccccc}
+    1 & 0& 0\\
+    \frac{1}{\sqrt{2}} & \frac{1}{\sqrt{2}}& 0\\
+    \frac{1}{\sqrt{3}} & \frac{1}{\sqrt{3}}& \frac{1}{\sqrt{3}}\\
+    0 & \frac{1}{\sqrt{2}}& \frac{1}{\sqrt{2}}\\
+    0 & 0& 1
+    \end{array}\right]
+    \end{equation*}
+Three random walk experiments: the $\alpha$ values for 
+all algorithms are in the range of $\{0.008, 0.015, 0.03, 0.06, 0.12, 0.25, 0.5\}$. For the TDC algorithm, 
+the range of the ratio $\frac{\zeta}{\alpha}$ is $\{\frac{1}{512}, \frac{1}{256}, \frac{1}{128}, \frac{1}{64}, \frac{1}{32}, \frac{1}{16}, \frac{1}{8}, \frac{1}{4}, \frac{1}{2}, 1, 2\}$. For the VMTD algorithm, 
+the range of the ratio $\frac{\beta}{\alpha}$ is $\{\frac{1}{512}, \frac{1}{256}, \frac{1}{128}, \frac{1}{64}, \frac{1}{32}, \frac{1}{16}, \frac{1}{8}, \frac{1}{4}, \frac{1}{2}, 1, 2\}$. It can be observed from 
+the update formula of VMTDC that when $\zeta$ takes a very small value, 
+the VMTDC update tends to be similar to VMTD update. Similarly, 
+when $\beta$ takes a very small value, the VMTDC update tends to be 
+similar to TDC update. Through experiments, it was found that 
+setting $\zeta$ to a small value makes VMTDC updates approach VMTD 
+updates, resulting in better performance. Therefore, for the VMTDC 
+algorithm, the range of $\frac{\beta}{\alpha}$ ratio is $\{\frac{1}{512}, \frac{1}{256}, \frac{1}{128}, \frac{1}{64}, \frac{1}{32}, \frac{1}{16}, \frac{1}{8}, \frac{1}{4}, \frac{1}{2}, 1, 2\}$, and the range of 
+$\zeta$ is $\{0.1, 0.01, 0.001, 0.0001, 0.00001\}$. The learning curves in Figure \ref{Evaluation_full} correspond to the optimal 
+parameters.
+The feature matrix of 7-state version of Baird's off-policy counterexample is
+defined as follow:
+\begin{equation*}
+\Phi_{Counter}=\left[ 
+\begin{array}{cccccccc}
+1 & 2& 0& 0& 0& 0& 0& 0\\
+1 & 0& 2& 0& 0& 0& 0& 0\\
+1 & 0& 0& 2& 0& 0& 0& 0\\
+1 & 0& 0& 0& 2& 0& 0& 0\\
+1 & 0& 0& 0& 0& 2& 0& 0\\
+1 & 0& 0& 0& 0& 0& 2& 0\\
+2 & 0& 0& 0& 0& 0& 0& 1
+\end{array}\right]
+\end{equation*}
+7-state version of Baird's off-policy counterexample: 
+for TD algorithm, $\alpha$ is set to 0.1. For the TDC algorithm, the range of 
+$\alpha$ is $\{0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0\}$, 
+and the range of 
+$\zeta$ is $\{0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5\}$. 
+For the VMTD algorithm, the range of 
+$\alpha$ is $\{0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0\}$, 
+and the range of 
+$\beta$ is $\{0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5\}$. Through experiments, it was found 
+that setting $\zeta$ to a small value makes VMTDC updates approach VMTD 
+updates, resulting in better performance. Therefore, for the VMTDC 
+algorithm, The range of values for $\alpha$ and $\beta$ is the same as that of VMTD  
+and the range of $\zeta$ 
+is $\{0.1, 0.01, 0.001, 0.0001, 0.00001\}$. 
+The learning curves in Figure \ref{Complete_full} correspond to the optimal parameters.
+For all policy evaluation experiments, each experiment 
+is independently run 100 times.
+For the four control experiments: The learning rates for each 
+algorithm in all experiments are shown in Table \ref{lrofways}.
+For all control experiments, each experiment is independently run 50 times.
+\begin{table*}[htb]
+    \centering
+    \caption{Learning rates ($lr$) of four control experiments.}
+    \vskip 0.15in
+    \begin{tabular}{c|ccccc}
+        \hline
+        \multicolumn{1}{c|}{\diagbox{algorithms($lr$)}{envs}} &Maze &Cliff walking &Mountain Car &Acrobot \\
+        \hline
+         Sarsa($\alpha$)&$0.1$ &$0.1$ &$0.1$ &$0.1$ \\
+         GQ(0)($\alpha,\zeta$)&$0.1,0.003$ &$0.1,0.004$ &$0.1,0.01$ &$0.1,0.01$ \\
+         VMSarsa($\alpha,\beta$)&$0.1,0.001$ &$0.1,\text{1e-4}$ &$0.1,\text{1e-4}$ &$0.1,\text{1e-4}$ \\
+         VMGQ(0)($\alpha,\zeta,\beta$)&$0.1,0.001,0.001$ &$0.1,0.005,\text{1e-4}$ &$0.1,\text{5e-4},\text{1e-4}$ &$0.1,\text{5e-4},\text{1e-4}$ \\
+         AC($lr_{\text{actor}},lr_{\text{critic}}$)&$0.01,0.1$ &$0.01,0.01$ &$0.01,0.05$ &$0.01,0.05$ \\
+         Q-learning($\alpha$)&$0.1$ &$0.1$ &$0.1$ &$0.1$ \\
+         VMQ($\alpha,\beta$)&$0.1,0.001$ &$0.1,\text{1e-4}$ &$0.1,\text{1e-4}$ &$0.1,\text{1e-4}$ \\
+         \hline
+    \end{tabular}
+    \label{lrofways}
+    \vskip -0.1in
+\end{table*}
+\end{document}
--- a/fancyhdr.sty
+++ b/fancyhdr.sty
+% fancyhdr.sty version 3.2
+% Fancy headers and footers for LaTeX.
+% Piet van Oostrum, 
+% Dept of Computer and Information Sciences, University of Utrecht,
+% Padualaan 14, P.O. Box 80.089, 3508 TB Utrecht, The Netherlands
+% Telephone: +31 30 2532180. Email: piet@cs.uu.nl
+% ========================================================================
+% LICENCE:
+% This file may be distributed under the terms of the LaTeX Project Public
+% License, as described in lppl.txt in the base LaTeX distribution.
+% Either version 1 or, at your option, any later version.
+% ========================================================================
+% MODIFICATION HISTORY:
+% Sep 16, 1994
+% version 1.4: Correction for use with \reversemargin
+% Sep 29, 1994:
+% version 1.5: Added the \iftopfloat, \ifbotfloat and \iffloatpage commands
+% Oct 4, 1994:
+% version 1.6: Reset single spacing in headers/footers for use with
+% setspace.sty or doublespace.sty
+% Oct 4, 1994:
+% version 1.7: changed \let\@mkboth\markboth to
+% \def\@mkboth{\protect\markboth} to make it more robust
+% Dec 5, 1994:
+% version 1.8: corrections for amsbook/amsart: define \@chapapp and (more
+% importantly) use the \chapter/sectionmark definitions from ps@headings if
+% they exist (which should be true for all standard classes).
+% May 31, 1995:
+% version 1.9: The proposed \renewcommand{\headrulewidth}{\iffloatpage...
+% construction in the doc did not work properly with the fancyplain style. 
+% June 1, 1995:
+% version 1.91: The definition of \@mkboth wasn't restored on subsequent
+% \pagestyle{fancy}'s.
+% June 1, 1995:
+% version 1.92: The sequence \pagestyle{fancyplain} \pagestyle{plain}
+% \pagestyle{fancy} would erroneously select the plain version.
+% June 1, 1995:
+% version 1.93: \fancypagestyle command added.
+% Dec 11, 1995:
+% version 1.94: suggested by Conrad Hughes <chughes@maths.tcd.ie>
+% CJCH, Dec 11, 1995: added \footruleskip to allow control over footrule
+% position (old hardcoded value of .3\normalbaselineskip is far too high
+% when used with very small footer fonts).
+% Jan 31, 1996:
+% version 1.95: call \@normalsize in the reset code if that is defined,
+% otherwise \normalsize.
+% this is to solve a problem with ucthesis.cls, as this doesn't
+% define \@currsize. Unfortunately for latex209 calling \normalsize doesn't
+% work as this is optimized to do very little, so there \@normalsize should
+% be called. Hopefully this code works for all versions of LaTeX known to
+% mankind.  
+% April 25, 1996:
+% version 1.96: initialize \headwidth to a magic (negative) value to catch
+% most common cases that people change it before calling \pagestyle{fancy}.
+% Note it can't be initialized when reading in this file, because
+% \textwidth could be changed afterwards. This is quite probable.
+% We also switch to \MakeUppercase rather than \uppercase and introduce a
+% \nouppercase command for use in headers. and footers.
+% May 3, 1996:
+% version 1.97: Two changes:
+% 1. Undo the change in version 1.8 (using the pagestyle{headings} defaults
+% for the chapter and section marks. The current version of amsbook and
+% amsart classes don't seem to need them anymore. Moreover the standard
+% latex classes don't use \markboth if twoside isn't selected, and this is
+% confusing as \leftmark doesn't work as expected.
+% 2. include a call to \ps@empty in ps@@fancy. This is to solve a problem
+% in the amsbook and amsart classes, that make global changes to \topskip,
+% which are reset in \ps@empty. Hopefully this doesn't break other things.
+% May 7, 1996:
+% version 1.98:
+% Added % after the line  \def\nouppercase
+% May 7, 1996:
+% version 1.99: This is the alpha version of fancyhdr 2.0
+% Introduced the new commands \fancyhead, \fancyfoot, and \fancyhf.
+% Changed \headrulewidth, \footrulewidth, \footruleskip to
+% macros rather than length parameters, In this way they can be
+% conditionalized and they don't consume length registers. There is no need
+% to have them as length registers unless you want to do calculations with
+% them, which is unlikely. Note that this may make some uses of them
+% incompatible (i.e. if you have a file that uses \setlength or \xxxx=)
+% May 10, 1996:
+% version 1.99a:
+% Added a few more % signs
+% May 10, 1996:
+% version 1.99b:
+% Changed the syntax of \f@nfor to be resistent to catcode changes of :=
+% Removed the [1] from the defs of \lhead etc. because the parameter is
+% consumed by the \@[xy]lhead etc. macros.
+% June 24, 1997:
+% version 1.99c:
+% corrected \nouppercase to also include the protected form of \MakeUppercase
+% \global added to manipulation of \headwidth.
+% \iffootnote command added.
+% Some comments added about \@fancyhead and \@fancyfoot.
+% Aug 24, 1998
+% version 1.99d
+% Changed the default \ps@empty to \ps@@empty in order to allow
+% \fancypagestyle{empty} redefinition.
+% Oct 11, 2000
+% version 2.0
+% Added LPPL license clause.
+%
+% A check for \headheight is added. An errormessage is given (once) if the
+% header is too large. Empty headers don't generate the error even if
+% \headheight is very small or even 0pt. 
+% Warning added for the use of 'E' option when twoside option is not used.
+% In this case the 'E' fields will never be used.
+%
+% Mar 10, 2002
+% version 2.1beta
+% New command: \fancyhfoffset[place]{length}
+% defines offsets to be applied to the header/footer to let it stick into
+% the margins (if length > 0).
+% place is like in fancyhead, except that only E,O,L,R can be used.
+% This replaces the old calculation based on \headwidth and the marginpar
+% area.
+% \headwidth will be dynamically calculated in the headers/footers when
+% this is used.
+%
+% Mar 26, 2002
+% version 2.1beta2
+% \fancyhfoffset now also takes h,f as possible letters in the argument to
+% allow the header and footer widths to be different.
+% New commands \fancyheadoffset and \fancyfootoffset added comparable to
+% \fancyhead and \fancyfoot.
+% Errormessages and warnings have been made more informative.
+%
+% Dec 9, 2002
+% version 2.1
+% The defaults for \footrulewidth, \plainheadrulewidth and
+% \plainfootrulewidth are changed from \z@skip to 0pt. In this way when
+% someone inadvertantly uses \setlength to change any of these, the value
+% of \z@skip will not be changed, rather an errormessage will be given.
+% March 3, 2004
+% Release of version 3.0
+% Oct 7, 2004
+% version 3.1
+% Added '\endlinechar=13' to \fancy@reset to prevent problems with
+% includegraphics in header when verbatiminput is active.
+% March 22, 2005
+% version 3.2
+% reset \everypar (the real one) in \fancy@reset because spanish.ldf does
+% strange things with \everypar between << and >>.
+\def\ifancy@mpty#1{\def\temp@a{#1}\ifx\temp@a\@empty}
+\def\fancy@def#1#2{\ifancy@mpty{#2}\fancy@gbl\def#1{\leavevmode}\else
+                                   \fancy@gbl\def#1{#2\strut}\fi}
+\let\fancy@gbl\global
+\def\@fancyerrmsg#1{%
+        \ifx\PackageError\undefined
+        \errmessage{#1}\else
+        \PackageError{Fancyhdr}{#1}{}\fi}
+\def\@fancywarning#1{%
+        \ifx\PackageWarning\undefined
+        \errmessage{#1}\else
+        \PackageWarning{Fancyhdr}{#1}{}\fi}
+% Usage: \@forc \var{charstring}{command to be executed for each char}
+% This is similar to LaTeX's \@tfor, but expands the charstring.
+\def\@forc#1#2#3{\expandafter\f@rc\expandafter#1\expandafter{#2}{#3}}
+\def\f@rc#1#2#3{\def\temp@ty{#2}\ifx\@empty\temp@ty\else
+                                    \f@@rc#1#2\f@@rc{#3}\fi}
+\def\f@@rc#1#2#3\f@@rc#4{\def#1{#2}#4\f@rc#1{#3}{#4}}
+% Usage: \f@nfor\name:=list\do{body}
+% Like LaTeX's \@for but an empty list is treated as a list with an empty
+% element
+\newcommand{\f@nfor}[3]{\edef\@fortmp{#2}%
+    \expandafter\@forloop#2,\@nil,\@nil\@@#1{#3}}
+% Usage: \def@ult \cs{defaults}{argument}
+% sets \cs to the characters from defaults appearing in argument
+% or defaults if it would be empty. All characters are lowercased.
+\newcommand\def@ult[3]{%
+    \edef\temp@a{\lowercase{\edef\noexpand\temp@a{#3}}}\temp@a
+    \def#1{}%
+    \@forc\tmpf@ra{#2}%
+        {\expandafter\if@in\tmpf@ra\temp@a{\edef#1{#1\tmpf@ra}}{}}%
+    \ifx\@empty#1\def#1{#2}\fi}
+% 
+% \if@in <char><set><truecase><falsecase>
+%
+\newcommand{\if@in}[4]{%
+    \edef\temp@a{#2}\def\temp@b##1#1##2\temp@b{\def\temp@b{##1}}%
+    \expandafter\temp@b#2#1\temp@b\ifx\temp@a\temp@b #4\else #3\fi}
+\newcommand{\fancyhead}{\@ifnextchar[{\f@ncyhf\fancyhead h}%
+                                     {\f@ncyhf\fancyhead h[]}}
+\newcommand{\fancyfoot}{\@ifnextchar[{\f@ncyhf\fancyfoot f}%
+                                     {\f@ncyhf\fancyfoot f[]}}
+\newcommand{\fancyhf}{\@ifnextchar[{\f@ncyhf\fancyhf{}}%
+                                   {\f@ncyhf\fancyhf{}[]}}
+% New commands for offsets added
+\newcommand{\fancyheadoffset}{\@ifnextchar[{\f@ncyhfoffs\fancyheadoffset h}%
+                                           {\f@ncyhfoffs\fancyheadoffset h[]}}
+\newcommand{\fancyfootoffset}{\@ifnextchar[{\f@ncyhfoffs\fancyfootoffset f}%
+                                           {\f@ncyhfoffs\fancyfootoffset f[]}}
+\newcommand{\fancyhfoffset}{\@ifnextchar[{\f@ncyhfoffs\fancyhfoffset{}}%
+                                         {\f@ncyhfoffs\fancyhfoffset{}[]}}
+% The header and footer fields are stored in command sequences with
+% names of the form: \f@ncy<x><y><z> with <x> for [eo], <y> from [lcr]
+% and <z> from [hf].
+\def\f@ncyhf#1#2[#3]#4{%
+    \def\temp@c{}%
+    \@forc\tmpf@ra{#3}%
+        {\expandafter\if@in\tmpf@ra{eolcrhf,EOLCRHF}%
+            {}{\edef\temp@c{\temp@c\tmpf@ra}}}%
+    \ifx\@empty\temp@c\else
+        \@fancyerrmsg{Illegal char `\temp@c' in \string#1 argument:
+          [#3]}%
+    \fi
+    \f@nfor\temp@c{#3}%
+        {\def@ult\f@@@eo{eo}\temp@c
+         \if@twoside\else
+           \if\f@@@eo e\@fancywarning
+             {\string#1's `E' option without twoside option is useless}\fi\fi
+         \def@ult\f@@@lcr{lcr}\temp@c
+         \def@ult\f@@@hf{hf}{#2\temp@c}%
+         \@forc\f@@eo\f@@@eo
+             {\@forc\f@@lcr\f@@@lcr
+                 {\@forc\f@@hf\f@@@hf
+                     {\expandafter\fancy@def\csname
+                      f@ncy\f@@eo\f@@lcr\f@@hf\endcsname
+                      {#4}}}}}}
+\def\f@ncyhfoffs#1#2[#3]#4{%
+    \def\temp@c{}%
+    \@forc\tmpf@ra{#3}%
+        {\expandafter\if@in\tmpf@ra{eolrhf,EOLRHF}%
+            {}{\edef\temp@c{\temp@c\tmpf@ra}}}%
+    \ifx\@empty\temp@c\else
+        \@fancyerrmsg{Illegal char `\temp@c' in \string#1 argument:
+          [#3]}%
+    \fi
+    \f@nfor\temp@c{#3}%
+        {\def@ult\f@@@eo{eo}\temp@c
+         \if@twoside\else
+           \if\f@@@eo e\@fancywarning
+             {\string#1's `E' option without twoside option is useless}\fi\fi
+         \def@ult\f@@@lcr{lr}\temp@c
+         \def@ult\f@@@hf{hf}{#2\temp@c}%
+         \@forc\f@@eo\f@@@eo
+             {\@forc\f@@lcr\f@@@lcr
+                 {\@forc\f@@hf\f@@@hf
+                     {\expandafter\setlength\csname
+                      f@ncyO@\f@@eo\f@@lcr\f@@hf\endcsname
+                      {#4}}}}}%
+     \fancy@setoffs}
+% Fancyheadings version 1 commands. These are more or less deprecated,
+% but they continue to work.
+\newcommand{\lhead}{\@ifnextchar[{\@xlhead}{\@ylhead}}
+\def\@xlhead[#1]#2{\fancy@def\f@ncyelh{#1}\fancy@def\f@ncyolh{#2}}
+\def\@ylhead#1{\fancy@def\f@ncyelh{#1}\fancy@def\f@ncyolh{#1}}
+\newcommand{\chead}{\@ifnextchar[{\@xchead}{\@ychead}}
+\def\@xchead[#1]#2{\fancy@def\f@ncyech{#1}\fancy@def\f@ncyoch{#2}}
+\def\@ychead#1{\fancy@def\f@ncyech{#1}\fancy@def\f@ncyoch{#1}}
+\newcommand{\rhead}{\@ifnextchar[{\@xrhead}{\@yrhead}}
+\def\@xrhead[#1]#2{\fancy@def\f@ncyerh{#1}\fancy@def\f@ncyorh{#2}}
+\def\@yrhead#1{\fancy@def\f@ncyerh{#1}\fancy@def\f@ncyorh{#1}}
+\newcommand{\lfoot}{\@ifnextchar[{\@xlfoot}{\@ylfoot}}
+\def\@xlfoot[#1]#2{\fancy@def\f@ncyelf{#1}\fancy@def\f@ncyolf{#2}}
+\def\@ylfoot#1{\fancy@def\f@ncyelf{#1}\fancy@def\f@ncyolf{#1}}
+\newcommand{\cfoot}{\@ifnextchar[{\@xcfoot}{\@ycfoot}}
+\def\@xcfoot[#1]#2{\fancy@def\f@ncyecf{#1}\fancy@def\f@ncyocf{#2}}
+\def\@ycfoot#1{\fancy@def\f@ncyecf{#1}\fancy@def\f@ncyocf{#1}}
+\newcommand{\rfoot}{\@ifnextchar[{\@xrfoot}{\@yrfoot}}
+\def\@xrfoot[#1]#2{\fancy@def\f@ncyerf{#1}\fancy@def\f@ncyorf{#2}}
+\def\@yrfoot#1{\fancy@def\f@ncyerf{#1}\fancy@def\f@ncyorf{#1}}
+\newlength{\fancy@headwidth}
+\let\headwidth\fancy@headwidth
+\newlength{\f@ncyO@elh}
+\newlength{\f@ncyO@erh}
+\newlength{\f@ncyO@olh}
+\newlength{\f@ncyO@orh}
+\newlength{\f@ncyO@elf}
+\newlength{\f@ncyO@erf}
+\newlength{\f@ncyO@olf}
+\newlength{\f@ncyO@orf}
+\newcommand{\headrulewidth}{0.4pt}
+\newcommand{\footrulewidth}{0pt}
+\newcommand{\footruleskip}{.3\normalbaselineskip}
+% Fancyplain stuff shouldn't be used anymore (rather
+% \fancypagestyle{plain} should be used), but it must be present for
+% compatibility reasons.
+\newcommand{\plainheadrulewidth}{0pt}
+\newcommand{\plainfootrulewidth}{0pt}
+\newif\if@fancyplain \@fancyplainfalse
+\def\fancyplain#1#2{\if@fancyplain#1\else#2\fi}
+\headwidth=-123456789sp %magic constant
+% Command to reset various things in the headers:
+% a.o.  single spacing (taken from setspace.sty)
+% and the catcode of ^^M (so that epsf files in the header work if a
+% verbatim crosses a page boundary)
+% It also defines a \nouppercase command that disables \uppercase and
+% \Makeuppercase. It can only be used in the headers and footers.
+\let\fnch@everypar\everypar% save real \everypar because of spanish.ldf
+\def\fancy@reset{\fnch@everypar{}\restorecr\endlinechar=13
+ \def\baselinestretch{1}%
+ \def\nouppercase##1{{\let\uppercase\relax\let\MakeUppercase\relax
+     \expandafter\let\csname MakeUppercase \endcsname\relax##1}}%
+ \ifx\undefined\@newbaseline% NFSS not present; 2.09 or 2e
+   \ifx\@normalsize\undefined \normalsize % for ucthesis.cls
+   \else \@normalsize \fi
+ \else% NFSS (2.09) present
+  \@newbaseline%
+ \fi}
+% Initialization of the head and foot text.
+% The default values still contain \fancyplain for compatibility.
+\fancyhf{} % clear all
+% lefthead empty on ``plain'' pages, \rightmark on even, \leftmark on odd pages
+% evenhead empty on ``plain'' pages, \leftmark on even, \rightmark on odd pages
+\if@twoside
+  \fancyhead[el,or]{\fancyplain{}{\sl\rightmark}}
+  \fancyhead[er,ol]{\fancyplain{}{\sl\leftmark}}
+\else
+  \fancyhead[l]{\fancyplain{}{\sl\rightmark}}
+  \fancyhead[r]{\fancyplain{}{\sl\leftmark}}
+\fi
+\fancyfoot[c]{\rm\thepage} % page number
+% Use box 0 as a temp box and dimen 0 as temp dimen. 
+% This can be done, because this code will always
+% be used inside another box, and therefore the changes are local.
+\def\@fancyvbox#1#2{\setbox0\vbox{#2}\ifdim\ht0>#1\@fancywarning
+  {\string#1 is too small (\the#1): ^^J Make it at least \the\ht0.^^J
+    We now make it that large for the rest of the document.^^J
+    This may cause the page layout to be inconsistent, however\@gobble}%
+  \dimen0=#1\global\setlength{#1}{\ht0}\ht0=\dimen0\fi
+  \box0}
+% Put together a header or footer given the left, center and
+% right text, fillers at left and right and a rule.
+% The \lap commands put the text into an hbox of zero size,
+% so overlapping text does not generate an errormessage.
+% These macros have 5 parameters:
+% 1. LEFTSIDE BEARING % This determines at which side the header will stick
+%    out. When \fancyhfoffset is used this calculates \headwidth, otherwise
+%    it is \hss or \relax (after expansion).
+% 2. \f@ncyolh, \f@ncyelh, \f@ncyolf or \f@ncyelf. This is the left component.
+% 3. \f@ncyoch, \f@ncyech, \f@ncyocf or \f@ncyecf. This is the middle comp.
+% 4. \f@ncyorh, \f@ncyerh, \f@ncyorf or \f@ncyerf. This is the right component.
+% 5. RIGHTSIDE BEARING. This is always \relax or \hss (after expansion).
+\def\@fancyhead#1#2#3#4#5{#1\hbox to\headwidth{\fancy@reset
+  \@fancyvbox\headheight{\hbox
+    {\rlap{\parbox[b]{\headwidth}{\raggedright#2}}\hfill
+      \parbox[b]{\headwidth}{\centering#3}\hfill
+      \llap{\parbox[b]{\headwidth}{\raggedleft#4}}}\headrule}}#5}
+\def\@fancyfoot#1#2#3#4#5{#1\hbox to\headwidth{\fancy@reset
+    \@fancyvbox\footskip{\footrule
+      \hbox{\rlap{\parbox[t]{\headwidth}{\raggedright#2}}\hfill
+        \parbox[t]{\headwidth}{\centering#3}\hfill
+        \llap{\parbox[t]{\headwidth}{\raggedleft#4}}}}}#5}
+\def\headrule{{\if@fancyplain\let\headrulewidth\plainheadrulewidth\fi
+    \hrule\@height\headrulewidth\@width\headwidth \vskip-\headrulewidth}}
+\def\footrule{{\if@fancyplain\let\footrulewidth\plainfootrulewidth\fi
+    \vskip-\footruleskip\vskip-\footrulewidth
+    \hrule\@width\headwidth\@height\footrulewidth\vskip\footruleskip}}
+\def\ps@fancy{%
+\@ifundefined{@chapapp}{\let\@chapapp\chaptername}{}%for amsbook
+%
+% Define \MakeUppercase for old LaTeXen.
+% Note: we used \def rather than \let, so that \let\uppercase\relax (from
+% the version 1 documentation) will still work.
+%
+\@ifundefined{MakeUppercase}{\def\MakeUppercase{\uppercase}}{}%
+\@ifundefined{chapter}{\def\sectionmark##1{\markboth
+{\MakeUppercase{\ifnum \c@secnumdepth>\z@
+ \thesection\hskip 1em\relax \fi ##1}}{}}%
+\def\subsectionmark##1{\markright {\ifnum \c@secnumdepth >\@ne
+ \thesubsection\hskip 1em\relax \fi ##1}}}%
+{\def\chaptermark##1{\markboth {\MakeUppercase{\ifnum \c@secnumdepth>\m@ne
+ \@chapapp\ \thechapter. \ \fi ##1}}{}}%
+\def\sectionmark##1{\markright{\MakeUppercase{\ifnum \c@secnumdepth >\z@
+ \thesection. \ \fi ##1}}}}%
+%\csname ps@headings\endcsname % use \ps@headings defaults if they exist
+\ps@@fancy
+\gdef\ps@fancy{\@fancyplainfalse\ps@@fancy}%
+% Initialize \headwidth if the user didn't
+%
+\ifdim\headwidth<0sp
+%
+% This catches the case that \headwidth hasn't been initialized and the
+% case that the user added something to \headwidth in the expectation that
+% it was initialized to \textwidth. We compensate this now. This loses if
+% the user intended to multiply it by a factor. But that case is more
+% likely done by saying something like \headwidth=1.2\textwidth. 
+% The doc says you have to change \headwidth after the first call to
+% \pagestyle{fancy}. This code is just to catch the most common cases were
+% that requirement is violated.
+%
+    \global\advance\headwidth123456789sp\global\advance\headwidth\textwidth
+\fi}
+\def\ps@fancyplain{\ps@fancy \let\ps@plain\ps@plain@fancy}
+\def\ps@plain@fancy{\@fancyplaintrue\ps@@fancy}
+\let\ps@@empty\ps@empty
+\def\ps@@fancy{%
+\ps@@empty % This is for amsbook/amsart, which do strange things with \topskip
+\def\@mkboth{\protect\markboth}%
+\def\@oddhead{\@fancyhead\fancy@Oolh\f@ncyolh\f@ncyoch\f@ncyorh\fancy@Oorh}%
+\def\@oddfoot{\@fancyfoot\fancy@Oolf\f@ncyolf\f@ncyocf\f@ncyorf\fancy@Oorf}%
+\def\@evenhead{\@fancyhead\fancy@Oelh\f@ncyelh\f@ncyech\f@ncyerh\fancy@Oerh}%
+\def\@evenfoot{\@fancyfoot\fancy@Oelf\f@ncyelf\f@ncyecf\f@ncyerf\fancy@Oerf}%
+}
+% Default definitions for compatibility mode:
+% These cause the header/footer to take the defined \headwidth as width
+% And to shift in the direction of the marginpar area
+\def\fancy@Oolh{\if@reversemargin\hss\else\relax\fi}
+\def\fancy@Oorh{\if@reversemargin\relax\else\hss\fi}
+\let\fancy@Oelh\fancy@Oorh
+\let\fancy@Oerh\fancy@Oolh
+\let\fancy@Oolf\fancy@Oolh
+\let\fancy@Oorf\fancy@Oorh
+\let\fancy@Oelf\fancy@Oelh
+\let\fancy@Oerf\fancy@Oerh
+% New definitions for the use of \fancyhfoffset
+% These calculate the \headwidth from \textwidth and the specified offsets.
+\def\fancy@offsolh{\headwidth=\textwidth\advance\headwidth\f@ncyO@olh
+                   \advance\headwidth\f@ncyO@orh\hskip-\f@ncyO@olh}
+\def\fancy@offselh{\headwidth=\textwidth\advance\headwidth\f@ncyO@elh
+                   \advance\headwidth\f@ncyO@erh\hskip-\f@ncyO@elh}
+\def\fancy@offsolf{\headwidth=\textwidth\advance\headwidth\f@ncyO@olf
+                   \advance\headwidth\f@ncyO@orf\hskip-\f@ncyO@olf}
+\def\fancy@offself{\headwidth=\textwidth\advance\headwidth\f@ncyO@elf
+                   \advance\headwidth\f@ncyO@erf\hskip-\f@ncyO@elf}
+\def\fancy@setoffs{%
+% Just in case \let\headwidth\textwidth was used
+  \fancy@gbl\let\headwidth\fancy@headwidth
+  \fancy@gbl\let\fancy@Oolh\fancy@offsolh
+  \fancy@gbl\let\fancy@Oelh\fancy@offselh
+  \fancy@gbl\let\fancy@Oorh\hss
+  \fancy@gbl\let\fancy@Oerh\hss
+  \fancy@gbl\let\fancy@Oolf\fancy@offsolf
+  \fancy@gbl\let\fancy@Oelf\fancy@offself
+  \fancy@gbl\let\fancy@Oorf\hss
+  \fancy@gbl\let\fancy@Oerf\hss}
+\newif\iffootnote
+\let\latex@makecol\@makecol
+\def\@makecol{\ifvoid\footins\footnotetrue\else\footnotefalse\fi
+\let\topfloat\@toplist\let\botfloat\@botlist\latex@makecol}
+\def\iftopfloat#1#2{\ifx\topfloat\empty #2\else #1\fi}
+\def\ifbotfloat#1#2{\ifx\botfloat\empty #2\else #1\fi}
+\def\iffloatpage#1#2{\if@fcolmade #1\else #2\fi}
+\newcommand{\fancypagestyle}[2]{%
+  \@namedef{ps@#1}{\let\fancy@gbl\relax#2\relax\ps@fancy}}
--- a/icml2024.bst
+++ b/icml2024.bst
+%% File: `icml2024.bst'
+%% A modification of `plainnl.bst' for use with natbib package 
+%%
+%% Copyright 2010 Hal Daum\'e III
+%% Modified by J. Fürnkranz
+%% - Changed labels from (X and Y, 2000) to (X & Y, 2000)
+%% - Changed References to last name first and abbreviated first names.
+%% Modified by Iain Murray 2018 (who suggests adopting a standard .bst in future...)
+%% - Made it actually use abbreviated first names
+%%
+%% Copyright 1993-2007 Patrick W Daly
+%% Max-Planck-Institut f\"ur Sonnensystemforschung
+%% Max-Planck-Str. 2
+%% D-37191 Katlenburg-Lindau
+%% Germany
+%% E-mail: daly@mps.mpg.de
+%%
+%% This program can be redistributed and/or modified under the terms
+%% of the LaTeX Project Public License Distributed from CTAN
+%% archives in directory macros/latex/base/lppl.txt; either
+%% version 1 of the License, or any later version.
+%%
+ % Version and source file information:
+ % \ProvidesFile{icml2010.mbs}[2007/11/26 1.93 (PWD)]
+ %
+ % BibTeX `plainnat' family
+ %   version 0.99b for BibTeX versions 0.99a or later,
+ %   for LaTeX versions 2.09 and 2e.
+ %
+ % For use with the `natbib.sty' package; emulates the corresponding
+ %   member of the `plain' family, but with author-year citations.
+ %
+ % With version 6.0 of `natbib.sty', it may also be used for numerical
+ %   citations, while retaining the commands \citeauthor, \citefullauthor,
+ %   and \citeyear to print the corresponding information.
+ %
+ % For version 7.0 of `natbib.sty', the KEY field replaces missing
+ %   authors/editors, and the date is left blank in \bibitem.
+ %
+ % Includes field EID for the sequence/citation number of electronic journals
+ %  which is used instead of page numbers.
+ %
+ % Includes fields ISBN and ISSN.
+ %
+ % Includes field URL for Internet addresses.
+ %
+ % Includes field DOI for Digital Object Idenfifiers.
+ %
+ % Works best with the url.sty package of Donald Arseneau.
+ %
+ % Works with identical authors and year are further sorted by
+ %   citation key, to preserve any natural sequence.
+ %
+ENTRY
+  { address
+    author
+    booktitle
+    chapter
+    doi
+    eid
+    edition
+    editor
+    howpublished
+    institution
+    isbn
+    issn
+    journal
+    key
+    month
+    note
+    number
+    organization
+    pages
+    publisher
+    school
+    series
+    title
+    type
+    url
+    volume
+    year
+  }
+  {}
+  { label extra.label sort.label short.list }
+INTEGERS { output.state before.all mid.sentence after.sentence after.block }
+FUNCTION {init.state.consts}
+{ #0 'before.all :=
+  #1 'mid.sentence :=
+  #2 'after.sentence :=
+  #3 'after.block :=
+}
+STRINGS { s t }
+FUNCTION {output.nonnull}
+{ 's :=
+  output.state mid.sentence =
+    { ", " * write$ }
+    { output.state after.block =
+        { add.period$ write$
+          newline$
+          "\newblock " write$
+        }
+        { output.state before.all =
+            'write$
+            { add.period$ " " * write$ }
+          if$
+        }
+      if$
+      mid.sentence 'output.state :=
+    }
+  if$
+  s
+}
+FUNCTION {output}
+{ duplicate$ empty$
+    'pop$
+    'output.nonnull
+  if$
+}
+FUNCTION {output.check}
+{ 't :=
+  duplicate$ empty$
+    { pop$ "empty " t * " in " * cite$ * warning$ }
+    'output.nonnull
+  if$
+}
+FUNCTION {fin.entry}
+{ add.period$
+  write$
+  newline$
+}
+FUNCTION {new.block}
+{ output.state before.all =
+    'skip$
+    { after.block 'output.state := }
+  if$
+}
+FUNCTION {new.sentence}
+{ output.state after.block =
+    'skip$
+    { output.state before.all =
+        'skip$
+        { after.sentence 'output.state := }
+      if$
+    }
+  if$
+}
+FUNCTION {not}
+{   { #0 }
+    { #1 }
+  if$
+}
+FUNCTION {and}
+{   'skip$
+    { pop$ #0 }
+  if$
+}
+FUNCTION {or}
+{   { pop$ #1 }
+    'skip$
+  if$
+}
+FUNCTION {new.block.checka}
+{ empty$
+    'skip$
+    'new.block
+  if$
+}
+FUNCTION {new.block.checkb}
+{ empty$
+  swap$ empty$
+  and
+    'skip$
+    'new.block
+  if$
+}
+FUNCTION {new.sentence.checka}
+{ empty$
+    'skip$
+    'new.sentence
+  if$
+}
+FUNCTION {new.sentence.checkb}
+{ empty$
+  swap$ empty$
+  and
+    'skip$
+    'new.sentence
+  if$
+}
+FUNCTION {field.or.null}
+{ duplicate$ empty$
+    { pop$ "" }
+    'skip$
+  if$
+}
+FUNCTION {emphasize}
+{ duplicate$ empty$
+    { pop$ "" }
+    { "\emph{" swap$ * "}" * }
+  if$
+}
+INTEGERS { nameptr namesleft numnames }
+FUNCTION {format.names}
+{ 's :=
+  #1 'nameptr :=
+  s num.names$ 'numnames :=
+  numnames 'namesleft :=
+    { namesleft #0 > }
+    { s nameptr "{vv~}{ll}{, jj}{, f.}" format.name$ 't :=
+      nameptr #1 >
+        { namesleft #1 >
+            { ", " * t * }
+            { numnames #2 >
+                { "," * }
+                'skip$
+              if$
+              t "others" =
+                { " et~al." * }
+                { " and " * t * }
+              if$
+            }
+          if$
+        }
+        't
+      if$
+      nameptr #1 + 'nameptr :=
+      namesleft #1 - 'namesleft :=
+    }
+  while$
+}
+FUNCTION {format.key}
+{ empty$
+    { key field.or.null }
+    { "" }
+  if$
+}
+FUNCTION {format.authors}
+{ author empty$
+    { "" }
+    { author format.names }
+  if$
+}
+FUNCTION {format.editors}
+{ editor empty$
+    { "" }
+    { editor format.names
+      editor num.names$ #1 >
+        { " (eds.)" * }
+        { " (ed.)" * }
+      if$
+    }
+  if$
+}
+FUNCTION {format.isbn}
+{ isbn empty$
+    { "" }
+    { new.block "ISBN " isbn * }
+  if$
+}
+FUNCTION {format.issn}
+{ issn empty$
+    { "" }
+    { new.block "ISSN " issn * }
+  if$
+}
+FUNCTION {format.url}
+{ url empty$
+    { "" }
+    { new.block "URL \url{" url * "}" * }
+  if$
+}
+FUNCTION {format.doi}
+{ doi empty$
+    { "" }
+    { new.block "\doi{" doi * "}" * }
+  if$
+}
+FUNCTION {format.title}
+{ title empty$
+    { "" }
+    { title "t" change.case$ }
+  if$
+}
+FUNCTION {format.full.names}
+{'s :=
+  #1 'nameptr :=
+  s num.names$ 'numnames :=
+  numnames 'namesleft :=
+    { namesleft #0 > }
+    { s nameptr
+      "{vv~}{ll}" format.name$ 't :=
+      nameptr #1 >
+        {
+          namesleft #1 >
+            { ", " * t * }
+            {
+              numnames #2 >
+                { "," * }
+                'skip$
+              if$
+              t "others" =
+                { " et~al." * }
+                { " and " * t * }
+              if$
+            }
+          if$
+        }
+        't
+      if$
+      nameptr #1 + 'nameptr :=
+      namesleft #1 - 'namesleft :=
+    }
+  while$
+}
+FUNCTION {author.editor.full}
+{ author empty$
+    { editor empty$
+        { "" }
+        { editor format.full.names }
+      if$
+    }
+    { author format.full.names }
+  if$
+}
+FUNCTION {author.full}
+{ author empty$
+    { "" }
+    { author format.full.names }
+  if$
+}
+FUNCTION {editor.full}
+{ editor empty$
+    { "" }
+    { editor format.full.names }
+  if$
+}
+FUNCTION {make.full.names}
+{ type$ "book" =
+  type$ "inbook" =
+  or
+    'author.editor.full
+    { type$ "proceedings" =
+        'editor.full
+        'author.full
+      if$
+    }
+  if$
+}
+FUNCTION {output.bibitem}
+{ newline$
+  "\bibitem[" write$
+  label write$
+  ")" make.full.names duplicate$ short.list =
+     { pop$ }
+     { * }
+   if$
+  "]{" * write$
+  cite$ write$
+  "}" write$
+  newline$
+  ""
+  before.all 'output.state :=
+}
+FUNCTION {n.dashify}
+{ 't :=
+  ""
+    { t empty$ not }
+    { t #1 #1 substring$ "-" =
+        { t #1 #2 substring$ "--" = not
+            { "--" *
+              t #2 global.max$ substring$ 't :=
+            }
+            {   { t #1 #1 substring$ "-" = }
+                { "-" *
+                  t #2 global.max$ substring$ 't :=
+                }
+              while$
+            }
+          if$
+        }
+        { t #1 #1 substring$ *
+          t #2 global.max$ substring$ 't :=
+        }
+      if$
+    }
+  while$
+}
+FUNCTION {format.date}
+{ year duplicate$ empty$
+    { "empty year in " cite$ * warning$
+       pop$ "" }
+    'skip$
+  if$
+  month empty$
+    'skip$
+    { month
+      " " * swap$ *
+    }
+  if$
+  extra.label *
+}
+FUNCTION {format.btitle}
+{ title emphasize
+}
+FUNCTION {tie.or.space.connect}
+{ duplicate$ text.length$ #3 <
+    { "~" }
+    { " " }
+  if$
+  swap$ * *
+}
+FUNCTION {either.or.check}
+{ empty$
+    'pop$
+    { "can't use both " swap$ * " fields in " * cite$ * warning$ }
+  if$
+}
+FUNCTION {format.bvolume}
+{ volume empty$
+    { "" }
+    { "volume" volume tie.or.space.connect
+      series empty$
+        'skip$
+        { " of " * series emphasize * }
+      if$
+      "volume and number" number either.or.check
+    }
+  if$
+}
+FUNCTION {format.number.series}
+{ volume empty$
+    { number empty$
+        { series field.or.null }
+        { output.state mid.sentence =
+            { "number" }
+            { "Number" }
+          if$
+          number tie.or.space.connect
+          series empty$
+            { "there's a number but no series in " cite$ * warning$ }
+            { " in " * series * }
+          if$
+        }
+      if$
+    }
+    { "" }
+  if$
+}
+FUNCTION {format.edition}
+{ edition empty$
+    { "" }
+    { output.state mid.sentence =
+        { edition "l" change.case$ " edition" * }
+        { edition "t" change.case$ " edition" * }
+      if$
+    }
+  if$
+}
+INTEGERS { multiresult }
+FUNCTION {multi.page.check}
+{ 't :=
+  #0 'multiresult :=
+    { multiresult not
+      t empty$ not
+      and
+    }
+    { t #1 #1 substring$
+      duplicate$ "-" =
+      swap$ duplicate$ "," =
+      swap$ "+" =
+      or or
+        { #1 'multiresult := }
+        { t #2 global.max$ substring$ 't := }
+      if$
+    }
+  while$
+  multiresult
+}
+FUNCTION {format.pages}
+{ pages empty$
+    { "" }
+    { pages multi.page.check
+        { "pp.\ " pages n.dashify tie.or.space.connect }
+        { "pp.\ " pages tie.or.space.connect }
+      if$
+    }
+  if$
+}
+FUNCTION {format.eid}
+{ eid empty$
+    { "" }
+    { "art." eid tie.or.space.connect }
+  if$
+}
+FUNCTION {format.vol.num.pages}
+{ volume field.or.null
+  number empty$
+    'skip$
+    { "\penalty0 (" number * ")" * *
+      volume empty$
+        { "there's a number but no volume in " cite$ * warning$ }
+        'skip$
+      if$
+    }
+  if$
+  pages empty$
+    'skip$
+    { duplicate$ empty$
+        { pop$ format.pages }
+        { ":\penalty0 " * pages n.dashify * }
+      if$
+    }
+  if$
+}
+FUNCTION {format.vol.num.eid}
+{ volume field.or.null
+  number empty$
+    'skip$
+    { "\penalty0 (" number * ")" * *
+      volume empty$
+        { "there's a number but no volume in " cite$ * warning$ }
+        'skip$
+      if$
+    }
+  if$
+  eid empty$
+    'skip$
+    { duplicate$ empty$
+        { pop$ format.eid }
+        { ":\penalty0 " * eid * }
+      if$
+    }
+  if$
+}
+FUNCTION {format.chapter.pages}
+{ chapter empty$
+    'format.pages
+    { type empty$
+        { "chapter" }
+        { type "l" change.case$ }
+      if$
+      chapter tie.or.space.connect
+      pages empty$
+        'skip$
+        { ", " * format.pages * }
+      if$
+    }
+  if$
+}
+FUNCTION {format.in.ed.booktitle}
+{ booktitle empty$
+    { "" }
+    { editor empty$
+        { "In " booktitle emphasize * }
+        { "In " format.editors * ", " * booktitle emphasize * }
+      if$
+    }
+  if$
+}
+FUNCTION {empty.misc.check}
+{ author empty$ title empty$ howpublished empty$
+  month empty$ year empty$ note empty$
+  and and and and and
+  key empty$ not and
+    { "all relevant fields are empty in " cite$ * warning$ }
+    'skip$
+  if$
+}
+FUNCTION {format.thesis.type}
+{ type empty$
+    'skip$
+    { pop$
+      type "t" change.case$
+    }
+  if$
+}
+FUNCTION {format.tr.number}
+{ type empty$
+    { "Technical Report" }
+    'type
+  if$
+  number empty$
+    { "t" change.case$ }
+    { number tie.or.space.connect }
+  if$
+}
+FUNCTION {format.article.crossref}
+{ key empty$
+    { journal empty$
+        { "need key or journal for " cite$ * " to crossref " * crossref *
+          warning$
+          ""
+        }
+        { "In \emph{" journal * "}" * }
+      if$
+    }
+    { "In " }
+  if$
+  " \citet{" * crossref * "}" *
+}
+FUNCTION {format.book.crossref}
+{ volume empty$
+    { "empty volume in " cite$ * "'s crossref of " * crossref * warning$
+      "In "
+    }
+    { "Volume" volume tie.or.space.connect
+      " of " *
+    }
+  if$
+  editor empty$
+  editor field.or.null author field.or.null =
+  or
+    { key empty$
+        { series empty$
+            { "need editor, key, or series for " cite$ * " to crossref " *
+              crossref * warning$
+              "" *
+            }
+            { "\emph{" * series * "}" * }
+          if$
+        }
+        'skip$
+      if$
+    }
+    'skip$
+  if$
+  " \citet{" * crossref * "}" *
+}
+FUNCTION {format.incoll.inproc.crossref}
+{ editor empty$
+  editor field.or.null author field.or.null =
+  or
+    { key empty$
+        { booktitle empty$
+            { "need editor, key, or booktitle for " cite$ * " to crossref " *
+              crossref * warning$
+              ""
+            }
+            { "In \emph{" booktitle * "}" * }
+          if$
+        }
+        { "In " }
+      if$
+    }
+    { "In " }
+  if$
+  " \citet{" * crossref * "}" *
+}
+FUNCTION {article}
+{ output.bibitem
+  format.authors "author" output.check
+  author format.key output
+  new.block
+  format.title "title" output.check
+  new.block
+  crossref missing$
+    { journal emphasize "journal" output.check
+      eid empty$
+        { format.vol.num.pages output }
+        { format.vol.num.eid output }
+      if$
+      format.date "year" output.check
+    }
+    { format.article.crossref output.nonnull
+      eid empty$
+        { format.pages output }
+        { format.eid output }
+      if$
+    }
+  if$
+  format.issn output
+  format.doi output
+  format.url output
+  new.block
+  note output
+  fin.entry
+}
+FUNCTION {book}
+{ output.bibitem
+  author empty$
+    { format.editors "author and editor" output.check
+      editor format.key output
+    }
+    { format.authors output.nonnull
+      crossref missing$
+        { "author and editor" editor either.or.check }
+        'skip$
+      if$
+    }
+  if$
+  new.block
+  format.btitle "title" output.check
+  crossref missing$
+    { format.bvolume output
+      new.block
+      format.number.series output
+      new.sentence
+      publisher "publisher" output.check
+      address output
+    }
+    { new.block
+      format.book.crossref output.nonnull
+    }
+  if$
+  format.edition output
+  format.date "year" output.check
+  format.isbn output
+  format.doi output
+  format.url output
+  new.block
+  note output
+  fin.entry
+}
+FUNCTION {booklet}
+{ output.bibitem
+  format.authors output
+  author format.key output
+  new.block
+  format.title "title" output.check
+  howpublished address new.block.checkb
+  howpublished output
+  address output
+  format.date output
+  format.isbn output
+  format.doi output
+  format.url output
+  new.block
+  note output
+  fin.entry
+}
+FUNCTION {inbook}
+{ output.bibitem
+  author empty$
+    { format.editors "author and editor" output.check
+      editor format.key output
+    }
+    { format.authors output.nonnull
+      crossref missing$
+        { "author and editor" editor either.or.check }
+        'skip$
+      if$
+    }
+  if$
+  new.block
+  format.btitle "title" output.check
+  crossref missing$
+    { format.bvolume output
+      format.chapter.pages "chapter and pages" output.check
+      new.block
+      format.number.series output
+      new.sentence
+      publisher "publisher" output.check
+      address output
+    }
+    { format.chapter.pages "chapter and pages" output.check
+      new.block
+      format.book.crossref output.nonnull
+    }
+  if$
+  format.edition output
+  format.date "year" output.check
+  format.isbn output
+  format.doi output
+  format.url output
+  new.block
+  note output
+  fin.entry
+}
+FUNCTION {incollection}
+{ output.bibitem
+  format.authors "author" output.check
+  author format.key output
+  new.block
+  format.title "title" output.check
+  new.block
+  crossref missing$
+    { format.in.ed.booktitle "booktitle" output.check
+      format.bvolume output
+      format.number.series output
+      format.chapter.pages output
+      new.sentence
+      publisher "publisher" output.check
+      address output
+      format.edition output
+      format.date "year" output.check
+    }
+    { format.incoll.inproc.crossref output.nonnull
+      format.chapter.pages output
+    }
+  if$
+  format.isbn output
+  format.doi output
+  format.url output
+  new.block
+  note output
+  fin.entry
+}
+FUNCTION {inproceedings}
+{ output.bibitem
+  format.authors "author" output.check
+  author format.key output
+  new.block
+  format.title "title" output.check
+  new.block
+  crossref missing$
+    { format.in.ed.booktitle "booktitle" output.check
+      format.bvolume output
+      format.number.series output
+      format.pages output
+      address empty$
+        { organization publisher new.sentence.checkb
+          organization output
+          publisher output
+          format.date "year" output.check
+        }
+        { address output.nonnull
+          format.date "year" output.check
+          new.sentence
+          organization output
+          publisher output
+        }
+      if$
+    }
+    { format.incoll.inproc.crossref output.nonnull
+      format.pages output
+    }
+  if$
+  format.isbn output
+  format.doi output
+  format.url output
+  new.block
+  note output
+  fin.entry
+}
+FUNCTION {conference} { inproceedings }
+FUNCTION {manual}
+{ output.bibitem
+  format.authors output
+  author format.key output
+  new.block
+  format.btitle "title" output.check
+  organization address new.block.checkb
+  organization output
+  address output
+  format.edition output
+  format.date output
+  format.url output
+  new.block
+  note output
+  fin.entry
+}
+FUNCTION {mastersthesis}
+{ output.bibitem
+  format.authors "author" output.check
+  author format.key output
+  new.block
+  format.title "title" output.check
+  new.block
+  "Master's thesis" format.thesis.type output.nonnull
+  school "school" output.check
+  address output
+  format.date "year" output.check
+  format.url output
+  new.block
+  note output
+  fin.entry
+}
+FUNCTION {misc}
+{ output.bibitem
+  format.authors output
+  author format.key output
+  title howpublished new.block.checkb
+  format.title output
+  howpublished new.block.checka
+  howpublished output
+  format.date output
+  format.issn output
+  format.url output
+  new.block
+  note output
+  fin.entry
+  empty.misc.check
+}
+FUNCTION {phdthesis}
+{ output.bibitem
+  format.authors "author" output.check
+  author format.key output
+  new.block
+  format.btitle "title" output.check
+  new.block
+  "PhD thesis" format.thesis.type output.nonnull
+  school "school" output.check
+  address output
+  format.date "year" output.check
+  format.url output
+  new.block
+  note output
+  fin.entry
+}
+FUNCTION {proceedings}
+{ output.bibitem
+  format.editors output
+  editor format.key output
+  new.block
+  format.btitle "title" output.check
+  format.bvolume output
+  format.number.series output
+  address output
+  format.date "year" output.check
+  new.sentence
+  organization output
+  publisher output
+  format.isbn output
+  format.doi output
+  format.url output
+  new.block
+  note output
+  fin.entry
+}
+FUNCTION {techreport}
+{ output.bibitem
+  format.authors "author" output.check
+  author format.key output
+  new.block
+  format.title "title" output.check
+  new.block
+  format.tr.number output.nonnull
+  institution "institution" output.check
+  address output
+  format.date "year" output.check
+  format.url output
+  new.block
+  note output
+  fin.entry
+}
+FUNCTION {unpublished}
+{ output.bibitem
+  format.authors "author" output.check
+  author format.key output
+  new.block
+  format.title "title" output.check
+  new.block
+  note "note" output.check
+  format.date output
+  format.url output
+  fin.entry
+}
+FUNCTION {default.type} { misc }
+MACRO {jan} {"January"}
+MACRO {feb} {"February"}
+MACRO {mar} {"March"}
+MACRO {apr} {"April"}
+MACRO {may} {"May"}
+MACRO {jun} {"June"}
+MACRO {jul} {"July"}
+MACRO {aug} {"August"}
+MACRO {sep} {"September"}
+MACRO {oct} {"October"}
+MACRO {nov} {"November"}
+MACRO {dec} {"December"}
+MACRO {acmcs} {"ACM Computing Surveys"}
+MACRO {acta} {"Acta Informatica"}
+MACRO {cacm} {"Communications of the ACM"}
+MACRO {ibmjrd} {"IBM Journal of Research and Development"}
+MACRO {ibmsj} {"IBM Systems Journal"}
+MACRO {ieeese} {"IEEE Transactions on Software Engineering"}
+MACRO {ieeetc} {"IEEE Transactions on Computers"}
+MACRO {ieeetcad}
+ {"IEEE Transactions on Computer-Aided Design of Integrated Circuits"}
+MACRO {ipl} {"Information Processing Letters"}
+MACRO {jacm} {"Journal of the ACM"}
+MACRO {jcss} {"Journal of Computer and System Sciences"}
+MACRO {scp} {"Science of Computer Programming"}
+MACRO {sicomp} {"SIAM Journal on Computing"}
+MACRO {tocs} {"ACM Transactions on Computer Systems"}
+MACRO {tods} {"ACM Transactions on Database Systems"}
+MACRO {tog} {"ACM Transactions on Graphics"}
+MACRO {toms} {"ACM Transactions on Mathematical Software"}
+MACRO {toois} {"ACM Transactions on Office Information Systems"}
+MACRO {toplas} {"ACM Transactions on Programming Languages and Systems"}
+MACRO {tcs} {"Theoretical Computer Science"}
+READ
+FUNCTION {sortify}
+{ purify$
+  "l" change.case$
+}
+INTEGERS { len }
+FUNCTION {chop.word}
+{ 's :=
+  'len :=
+  s #1 len substring$ =
+    { s len #1 + global.max$ substring$ }
+    's
+  if$
+}
+FUNCTION {format.lab.names}
+{ 's :=
+  s #1 "{vv~}{ll}" format.name$
+  s num.names$ duplicate$
+  #2 >
+    { pop$ " et~al." * }
+    { #2 <
+        'skip$
+        { s #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" =
+            { " et~al." * }
+            { " \& " * s #2 "{vv~}{ll}" format.name$ * }
+          if$
+        }
+      if$
+    }
+  if$
+}
+FUNCTION {author.key.label}
+{ author empty$
+    { key empty$
+        { cite$ #1 #3 substring$ }
+        'key
+      if$
+    }
+    { author format.lab.names }
+  if$
+}
+FUNCTION {author.editor.key.label}
+{ author empty$
+    { editor empty$
+        { key empty$
+            { cite$ #1 #3 substring$ }
+            'key
+          if$
+        }
+        { editor format.lab.names }
+      if$
+    }
+    { author format.lab.names }
+  if$
+}
+FUNCTION {author.key.organization.label}
+{ author empty$
+    { key empty$
+        { organization empty$
+            { cite$ #1 #3 substring$ }
+            { "The " #4 organization chop.word #3 text.prefix$ }
+          if$
+        }
+        'key
+      if$
+    }
+    { author format.lab.names }
+  if$
+}
+FUNCTION {editor.key.organization.label}
+{ editor empty$
+    { key empty$
+        { organization empty$
+            { cite$ #1 #3 substring$ }
+            { "The " #4 organization chop.word #3 text.prefix$ }
+          if$
+        }
+        'key
+      if$
+    }
+    { editor format.lab.names }
+  if$
+}
+FUNCTION {calc.short.authors}
+{ type$ "book" =
+  type$ "inbook" =
+  or
+    'author.editor.key.label
+    { type$ "proceedings" =
+        'editor.key.organization.label
+        { type$ "manual" =
+            'author.key.organization.label
+            'author.key.label
+          if$
+        }
+      if$
+    }
+  if$
+  'short.list :=
+}
+FUNCTION {calc.label}
+{ calc.short.authors
+  short.list
+  "("
+  *
+  year duplicate$ empty$
+  short.list key field.or.null = or
+     { pop$ "" }
+     'skip$
+  if$
+  *
+  'label :=
+}
+FUNCTION {sort.format.names}
+{ 's :=
+  #1 'nameptr :=
+  ""
+  s num.names$ 'numnames :=
+  numnames 'namesleft :=
+    { namesleft #0 > }
+    {
+      s nameptr "{vv{ } }{ll{ }}{  f{ }}{  jj{ }}" format.name$ 't :=
+      nameptr #1 >
+        {
+          "   "  *
+          namesleft #1 = t "others" = and
+            { "zzzzz" * }
+            { numnames #2 > nameptr #2 = and
+                { "zz" * year field.or.null * "   " * }
+                'skip$
+              if$
+              t sortify *
+            }
+          if$
+        }
+        { t sortify * }
+      if$
+      nameptr #1 + 'nameptr :=
+      namesleft #1 - 'namesleft :=
+    }
+  while$
+}
+FUNCTION {sort.format.title}
+{ 't :=
+  "A " #2
+    "An " #3
+      "The " #4 t chop.word
+    chop.word
+  chop.word
+  sortify
+  #1 global.max$ substring$
+}
+FUNCTION {author.sort}
+{ author empty$
+    { key empty$
+        { "to sort, need author or key in " cite$ * warning$
+          ""
+        }
+        { key sortify }
+      if$
+    }
+    { author sort.format.names }
+  if$
+}
+FUNCTION {author.editor.sort}
+{ author empty$
+    { editor empty$
+        { key empty$
+            { "to sort, need author, editor, or key in " cite$ * warning$
+              ""
+            }
+            { key sortify }
+          if$
+        }
+        { editor sort.format.names }
+      if$
+    }
+    { author sort.format.names }
+  if$
+}
+FUNCTION {author.organization.sort}
+{ author empty$
+    { organization empty$
+        { key empty$
+            { "to sort, need author, organization, or key in " cite$ * warning$
+              ""
+            }
+            { key sortify }
+          if$
+        }
+        { "The " #4 organization chop.word sortify }
+      if$
+    }
+    { author sort.format.names }
+  if$
+}
+FUNCTION {editor.organization.sort}
+{ editor empty$
+    { organization empty$
+        { key empty$
+            { "to sort, need editor, organization, or key in " cite$ * warning$
+              ""
+            }
+            { key sortify }
+          if$
+        }
+        { "The " #4 organization chop.word sortify }
+      if$
+    }
+    { editor sort.format.names }
+  if$
+}
+FUNCTION {presort}
+{ calc.label
+  label sortify
+  "    "
+  *
+  type$ "book" =
+  type$ "inbook" =
+  or
+    'author.editor.sort
+    { type$ "proceedings" =
+        'editor.organization.sort
+        { type$ "manual" =
+            'author.organization.sort
+            'author.sort
+          if$
+        }
+      if$
+    }
+  if$
+  "    "
+  *
+  year field.or.null sortify
+  *
+  "    "
+  *
+  cite$
+  *
+  #1 entry.max$ substring$
+  'sort.label :=
+  sort.label *
+  #1 entry.max$ substring$
+  'sort.key$ :=
+}
+ITERATE {presort}
+SORT
+STRINGS { longest.label last.label next.extra }
+INTEGERS { longest.label.width last.extra.num number.label }
+FUNCTION {initialize.longest.label}
+{ "" 'longest.label :=
+  #0 int.to.chr$ 'last.label :=
+  "" 'next.extra :=
+  #0 'longest.label.width :=
+  #0 'last.extra.num :=
+  #0 'number.label :=
+}
+FUNCTION {forward.pass}
+{ last.label label =
+    { last.extra.num #1 + 'last.extra.num :=
+      last.extra.num int.to.chr$ 'extra.label :=
+    }
+    { "a" chr.to.int$ 'last.extra.num :=
+      "" 'extra.label :=
+      label 'last.label :=
+    }
+  if$
+  number.label #1 + 'number.label :=
+}
+FUNCTION {reverse.pass}
+{ next.extra "b" =
+    { "a" 'extra.label := }
+    'skip$
+  if$
+  extra.label 'next.extra :=
+  extra.label
+  duplicate$ empty$
+    'skip$
+    { "{\natexlab{" swap$ * "}}" * }
+  if$
+  'extra.label :=
+  label extra.label * 'label :=
+}
+EXECUTE {initialize.longest.label}
+ITERATE {forward.pass}
+REVERSE {reverse.pass}
+FUNCTION {bib.sort.order}
+{ sort.label  'sort.key$ :=
+}
+ITERATE {bib.sort.order}
+SORT
+FUNCTION {begin.bib}
+{   preamble$ empty$
+    'skip$
+    { preamble$ write$ newline$ }
+  if$
+  "\begin{thebibliography}{" number.label int.to.str$ * "}" *
+  write$ newline$
+  "\providecommand{\natexlab}[1]{#1}"
+  write$ newline$
+  "\providecommand{\url}[1]{\texttt{#1}}"
+  write$ newline$
+  "\expandafter\ifx\csname urlstyle\endcsname\relax"
+  write$ newline$
+  "  \providecommand{\doi}[1]{doi: #1}\else"
+  write$ newline$
+  "  \providecommand{\doi}{doi: \begingroup \urlstyle{rm}\Url}\fi"
+  write$ newline$
+}
+EXECUTE {begin.bib}
+EXECUTE {init.state.consts}
+ITERATE {call.type$}
+FUNCTION {end.bib}
+{ newline$
+  "\end{thebibliography}" write$ newline$
+}
+EXECUTE {end.bib}
--- a/icml2024.sty
+++ b/icml2024.sty
+% File: icml2024.sty (LaTeX style file for ICML-2024, version of 2023-11-23)
+% This file contains the LaTeX formatting parameters for a two-column
+% conference proceedings that is 8.5 inches wide by 11 inches high.
+%
+% Modified by Jonathan Scarlett 2024: changed years, volume, location
+%
+% Modified by Sivan Sabato 2023: changed years and volume number.
+% Modified by Jonathan Scarlett 2023: added page numbers to every page
+%
+% Modified by Csaba Szepesvari 2022: changed years, PMLR ref. Turned off checking marginparwidth
+%     as marginparwidth only controls the space available for margin notes and margin notes
+%     will NEVER be used anyways in submitted versions, so there is no reason one should
+%     check whether marginparwidth has been tampered with.
+%     Also removed pdfview=FitH from hypersetup as it did not do its job; the default choice is a bit better
+%     but of course the double-column format is not supported by this hyperlink preview functionality
+%     in a completely satisfactory fashion.
+% Modified by Gang Niu 2022: Changed color to xcolor
+%
+% Modified by Iain Murray 2018: changed years, location. Remove affiliation notes when anonymous.
+%     Move times dependency from .tex to .sty so fewer people delete it.
+%
+% Modified by Daniel Roy 2017: changed byline to use footnotes for affiliations, and removed emails
+%
+% Modified by Percy Liang 12/2/2013: changed the year, location from the previous template for ICML 2014
+% Modified by Fei Sha 9/2/2013: changed the year, location form the previous template for ICML 2013
+%
+% Modified by Fei Sha 4/24/2013: (1) remove the extra whitespace after the first author's email address (in %the camera-ready version) (2) change the Proceeding ... of ICML 2010 to 2014 so PDF's metadata will show up % correctly
+%
+% Modified by Sanjoy Dasgupta, 2013: changed years, location
+%
+% Modified by Francesco Figari, 2012: changed years, location
+%
+% Modified by Christoph Sawade and Tobias Scheffer, 2011: added line
+% numbers, changed years
+%
+% Modified by Hal Daume III, 2010: changed years, added hyperlinks
+%
+% Modified by Kiri Wagstaff, 2009: changed years
+%
+% Modified by Sam Roweis, 2008: changed years
+%
+% Modified by Ricardo Silva, 2007: update of the ifpdf verification
+%
+% Modified by Prasad Tadepalli and Andrew Moore, merely changing years.
+%
+% Modified by Kristian Kersting, 2005, based on Jennifer Dy's 2004 version
+% - running title. If the original title is to long or is breaking a line,
+%   use \icmltitlerunning{...} in the preamble to supply a shorter form.
+%   Added fancyhdr package to get a running head.
+% - Updated to store the page size because pdflatex does compile the
+%   page size into the pdf.
+%
+% Hacked by Terran Lane, 2003:
+% - Updated to use LaTeX2e style file conventions (ProvidesPackage,
+%   etc.)
+% - Added an ``appearing in'' block at the base of the first column
+%   (thus keeping the ``appearing in'' note out of the bottom margin
+%   where the printer should strip in the page numbers).
+% - Added a package option [accepted] that selects between the ``Under
+%   review'' notice (default, when no option is specified) and the
+%   ``Appearing in'' notice (for use when the paper has been accepted
+%   and will appear).
+%
+%   Originally created as:  ml2k.sty (LaTeX style file for ICML-2000)
+%   by P. Langley (12/23/99)
+%%%%%%%%%%%%%%%%%%%%
+%% This version of the style file supports both a ``review'' version
+%% and a ``final/accepted'' version.  The difference is only in the
+%% text that appears in the note at the bottom of the first column of
+%% the first page.  The default behavior is to print a note to the
+%% effect that the paper is under review and don't distribute it.  The
+%% final/accepted version prints an ``Appearing in'' note.  To get the
+%% latter behavior, in the calling file change the ``usepackage'' line
+%% from:
+%%	\usepackage{icml2024}
+%% to
+%%	\usepackage[accepted]{icml2024}
+%%%%%%%%%%%%%%%%%%%%
+\NeedsTeXFormat{LaTeX2e}
+\ProvidesPackage{icml2024}[2023/11/23 v2.0 ICML Conference Style File]
+% Before 2018, \usepackage{times} was in the example TeX, but inevitably
+% not everybody did it.
+\RequirePackage{times}
+% Use fancyhdr package
+\RequirePackage{fancyhdr}
+\RequirePackage{xcolor} % changed from color to xcolor (2021/11/24)
+\RequirePackage{algorithm}
+\RequirePackage{algorithmic}
+\RequirePackage{natbib}
+\RequirePackage{eso-pic} % used by \AddToShipoutPicture
+\RequirePackage{forloop}
+\RequirePackage{url}
+%%%%%%%% Options
+\DeclareOption{accepted}{%
+  \renewcommand{\Notice@String}{\ICML@appearing}
+  \gdef\isaccepted{1}
+}
+\DeclareOption{nohyperref}{%
+  \gdef\nohyperref{1}
+}
+%%%%%%%%%%%%%%%%%%%%
+% This string is printed at the bottom of the page for the
+% final/accepted version of the ``appearing in'' note.  Modify it to
+% change that text.
+%%%%%%%%%%%%%%%%%%%%
+\newcommand{\ICML@appearing}{\textit{Proceedings of the
+$\mathit{41}^{st}$ International Conference on Machine Learning},
+Vienna, Austria. PMLR 235, 2024.
+Copyright 2024 by the author(s).}
+%%%%%%%%%%%%%%%%%%%%
+% This string is printed at the bottom of the page for the draft/under
+% review version of the ``appearing in'' note.  Modify it to change
+% that text.
+%%%%%%%%%%%%%%%%%%%%
+\newcommand{\Notice@String}{Preliminary work.  Under review by the
+International Conference on Machine Learning (ICML)\@.  Do not distribute.}
+% Cause the declared options to actually be parsed and activated
+\ProcessOptions\relax
+\ifdefined\isaccepted\else\ifdefined\hypersetup
+    \hypersetup{pdfauthor={Anonymous Authors}}
+ \fi
+\fi
+\ifdefined\nohyperref\else\ifdefined\hypersetup
+  \definecolor{mydarkblue}{rgb}{0,0.08,0.45}
+  \hypersetup{ %
+    pdftitle={},
+    pdfsubject={Proceedings of the International Conference on Machine Learning 2024},
+    pdfkeywords={},
+    pdfborder=0 0 0,
+    pdfpagemode=UseNone,
+    colorlinks=true,
+    linkcolor=mydarkblue,
+    citecolor=mydarkblue,
+    filecolor=mydarkblue,
+    urlcolor=mydarkblue,
+    }
+  \fi
+\fi
+% Uncomment the following for debugging.  It will cause LaTeX to dump
+% the version of the ``appearing in'' string that will actually appear
+% in the document.
+%\typeout{>> Notice string='\Notice@String'}
+% Change citation commands to be more like old ICML styles
+\newcommand{\yrcite}[1]{\citeyearpar{#1}}
+\renewcommand{\cite}[1]{\citep{#1}}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% to ensure the letter format is used. pdflatex does compile the
+% page size into the pdf. This is done using \pdfpagewidth and
+% \pdfpageheight. As Latex does not know this directives, we first
+% check whether pdflatex or latex is used.
+%
+% Kristian Kersting 2005
+%
+% in order to account for the more recent use of pdfetex as the default
+% compiler, I have changed the pdf verification.
+%
+% Ricardo Silva 2007
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\paperwidth=8.5in
+\paperheight=11in
+% old PDFLaTex verification, circa 2005
+%
+%\newif\ifpdf\ifx\pdfoutput\undefined
+%  \pdffalse % we are not running PDFLaTeX
+%\else
+%  \pdfoutput=1 % we are running PDFLaTeX
+%  \pdftrue
+%\fi
+\newif\ifpdf %adapted from ifpdf.sty
+\ifx\pdfoutput\undefined
+\else
+   \ifx\pdfoutput\relax
+   \else
+     \ifcase\pdfoutput
+     \else
+       \pdftrue
+     \fi
+   \fi
+\fi
+\ifpdf
+%    \pdfpagewidth=\paperwidth
+%    \pdfpageheight=\paperheight
+  \setlength{\pdfpagewidth}{8.5in}
+  \setlength{\pdfpageheight}{11in}
+\fi
+% Physical page layout
+\evensidemargin -0.23in
+\oddsidemargin -0.23in
+\setlength\textheight{9.0in}
+\setlength\textwidth{6.75in}
+\setlength\columnsep{0.25in}
+\setlength\headheight{10pt}
+\setlength\headsep{10pt}
+\addtolength{\topmargin}{-20pt}
+\addtolength{\topmargin}{-0.29in}
+% Historically many authors tried to include packages like geometry or fullpage,
+% which change the page layout. It either makes the proceedings inconsistent, or
+% wastes organizers' time chasing authors. So let's nip these problems in the
+% bud here. -- Iain Murray 2018.
+%\RequirePackage{printlen}
+\AtBeginDocument{%
+% To get the numbers below, include printlen package above and see lengths like this:
+%\printlength\oddsidemargin\\
+%\printlength\headheight\\
+%\printlength\textheight\\
+%\printlength\marginparsep\\
+%\printlength\footskip\\
+%\printlength\hoffset\\
+%\printlength\paperwidth\\
+%\printlength\topmargin\\
+%\printlength\headsep\\
+%\printlength\textwidth\\
+%\printlength\marginparwidth\\
+%\printlength\marginparpush\\
+%\printlength\voffset\\
+%\printlength\paperheight\\
+%
+\newif\ifmarginsmessedwith
+\marginsmessedwithfalse
+\ifdim\oddsidemargin=-16.62178pt     \else oddsidemargin has been altered.\\ \marginsmessedwithtrue\fi
+\ifdim\headheight=10.0pt             \else headheight has been altered.\\ \marginsmessedwithtrue\fi
+\ifdim\textheight=650.43pt           \else textheight has been altered.\\ \marginsmessedwithtrue\fi
+\ifdim\marginparsep=11.0pt           \else marginparsep has been altered.\\ \marginsmessedwithtrue\fi
+\ifdim\footskip=25.0pt                \else footskip has been altered.\\ \marginsmessedwithtrue\fi
+\ifdim\hoffset=0.0pt                 \else hoffset has been altered.\\ \marginsmessedwithtrue\fi
+\ifdim\paperwidth=614.295pt          \else paperwidth has been altered.\\ \marginsmessedwithtrue\fi
+\ifdim\topmargin=-24.95781pt         \else topmargin has been altered.\\ \marginsmessedwithtrue\fi
+\ifdim\headsep=10.0pt                \else headsep has been altered.\\ \marginsmessedwithtrue\fi
+\ifdim\textwidth=487.8225pt          \else textwidth has been altered.\\ \marginsmessedwithtrue\fi
+%\ifdim\marginparwidth=65.0pt         \else marginparwidth has been altered.\\ \marginsmessedwithtrue\fi
+\ifdim\marginparpush=5.0pt           \else marginparpush has been altered.\\ \marginsmessedwithtrue\fi
+\ifdim\voffset=0.0pt                 \else voffset has been altered.\\ \marginsmessedwithtrue\fi
+\ifdim\paperheight=794.96999pt       \else paperheight has been altered.\\ \marginsmessedwithtrue\fi
+\ifmarginsmessedwith
+\textbf{\large \em The page layout violates the ICML style.}
+Please do not change the page layout, or include packages like geometry,
+savetrees, or fullpage, which change it for you.
+We're not able to reliably undo arbitrary changes to the style. Please remove
+the offending package(s), or layout-changing commands and try again.
+\fi}
+%% The following is adapted from code in the acmconf.sty conference
+%% style file.  The constants in it are somewhat magical, and appear
+%% to work well with the two-column format on US letter paper that
+%% ICML uses, but will break if you change that layout, or if you use
+%% a longer block of text for the copyright notice string.  Fiddle with
+%% them if necessary to get the block to fit/look right.
+%%
+%% -- Terran Lane, 2003
+%%
+%% The following comments are included verbatim from acmconf.sty:
+%%
+%%% This section (written by KBT) handles the 1" box in the lower left
+%%% corner of the left column of the first page by creating a picture,
+%%% and inserting the predefined string at the bottom (with a negative
+%%% displacement to offset the space allocated for a non-existent
+%%% caption).
+%%%
+\def\ftype@copyrightbox{8}
+\def\@copyrightspace{
+% Create a float object positioned at the bottom of the column.  Note
+% that because of the mystical nature of floats, this has to be called
+% before the first column is populated with text (e.g., from the title
+% or abstract blocks).  Otherwise, the text will force the float to
+% the next column.  -- TDRL.
+\@float{copyrightbox}[b]
+\begin{center}
+\setlength{\unitlength}{1pc}
+\begin{picture}(20,1.5)
+% Create a line separating the main text from the note block.
+% 4.818pc==0.8in.
+\put(0,2.5){\line(1,0){4.818}}
+% Insert the text string itself.  Note that the string has to be
+% enclosed in a parbox -- the \put call needs a box object to
+% position.  Without the parbox, the text gets splattered across the
+% bottom of the page semi-randomly.  The 19.75pc distance seems to be
+% the width of the column, though I can't find an appropriate distance
+% variable to substitute here.  -- TDRL.
+\put(0,0){\parbox[b]{19.75pc}{\small \Notice@String}}
+\end{picture}
+\end{center}
+\end@float}
+% Note: A few Latex versions need the next line instead of the former.
+% \addtolength{\topmargin}{0.3in}
+% \setlength\footheight{0pt}
+\setlength\footskip{25.0pt}
+%\pagestyle{empty}
+\flushbottom \twocolumn
+\sloppy
+% Clear out the addcontentsline command
+\def\addcontentsline#1#2#3{}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%% commands for formatting paper title, author names, and addresses.
+%%start%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%% title as running head -- Kristian Kersting 2005 %%%%%%%%%%%%%
+%\makeatletter
+%\newtoks\mytoksa
+%\newtoks\mytoksb
+%\newcommand\addtomylist[2]{%
+%  \mytoksa\expandafter{#1}%
+%  \mytoksb{#2}%
+%  \edef#1{\the\mytoksa\the\mytoksb}%
+%}
+%\makeatother
+% box to check the size of the running head
+\newbox\titrun
+% general page style
+\pagestyle{fancy}
+\fancyhf{}
+\fancyhead{}
+\fancyfoot{}
+\cfoot{\thepage}
+% set the width of the head rule to 1 point
+\renewcommand{\headrulewidth}{1pt}
+% definition to set the head as running head in the preamble
+\def\icmltitlerunning#1{\gdef\@icmltitlerunning{#1}}
+% main definition adapting \icmltitle from 2004
+\long\def\icmltitle#1{%
+   %check whether @icmltitlerunning exists
+   % if not \icmltitle is used as running head
+   \ifx\undefined\@icmltitlerunning%
+	\gdef\@icmltitlerunning{#1}
+   \fi
+   %add it to pdf information
+  \ifdefined\nohyperref\else\ifdefined\hypersetup
+     \hypersetup{pdftitle={#1}}
+   \fi\fi
+   %get the dimension of the running title
+   \global\setbox\titrun=\vbox{\small\bf\@icmltitlerunning}
+   % error flag
+   \gdef\@runningtitleerror{0}
+   % running title too long
+   \ifdim\wd\titrun>\textwidth%
+	  {\gdef\@runningtitleerror{1}}%
+   % running title breaks a line
+   \else\ifdim\ht\titrun>6.25pt
+	   {\gdef\@runningtitleerror{2}}%
+	\fi
+   \fi
+   % if there is somthing wrong with the running title
+   \ifnum\@runningtitleerror>0
+	   \typeout{}%
+           \typeout{}%
+           \typeout{*******************************************************}%
+           \typeout{Title exceeds size limitations for running head.}%
+           \typeout{Please supply a shorter form for the running head}
+           \typeout{with \string\icmltitlerunning{...}\space prior to \string\begin{document}}%
+           \typeout{*******************************************************}%
+ 	    \typeout{}%
+           \typeout{}%
+           % set default running title
+	   \chead{\small\bf Title Suppressed Due to Excessive Size}%
+    \else
+	   % 'everything' fine, set provided running title
+  	   \chead{\small\bf\@icmltitlerunning}%
+    \fi
+  % no running title on the first page of the paper
+  \thispagestyle{plain}
+%%%%%%%%%%%%%%%%%%%% Kristian Kersting %%%%%%%%%%%%%%%%%%%%%%%%%
+%end%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+  {\center\baselineskip 18pt
+                       \toptitlebar{\Large\bf #1}\bottomtitlebar}
+}
+\gdef\icmlfullauthorlist{}
+\newcommand\addstringtofullauthorlist{\g@addto@macro\icmlfullauthorlist}
+\newcommand\addtofullauthorlist[1]{%
+  \ifdefined\icmlanyauthors%
+    \addstringtofullauthorlist{, #1}%
+  \else%
+    \addstringtofullauthorlist{#1}%
+    \gdef\icmlanyauthors{1}%
+  \fi%
+%  \ifdefined\nohyperref\else
+  \ifdefined\hypersetup%
+    \hypersetup{pdfauthor=\icmlfullauthorlist}%
+    \fi%\fi
+  }
+\def\toptitlebar{\hrule height1pt \vskip .25in}
+\def\bottomtitlebar{\vskip .22in \hrule height1pt \vskip .3in}
+\newenvironment{icmlauthorlist}{%
+  \setlength\topsep{0pt}
+  \setlength\parskip{0pt}
+  \begin{center}
+}{%
+  \end{center}
+}
+\newcounter{@affiliationcounter}
+\newcommand{\@pa}[1]{%
+% ``#1''
+\ifcsname the@affil#1\endcsname
+   % do nothing
+\else
+  \ifcsname @icmlsymbol#1\endcsname
+    % nothing
+  \else
+  \stepcounter{@affiliationcounter}%
+  \newcounter{@affil#1}%
+  \setcounter{@affil#1}{\value{@affiliationcounter}}%
+  \fi
+\fi%
+\ifcsname @icmlsymbol#1\endcsname
+  \textsuperscript{\csname @icmlsymbol#1\endcsname\,}%
+\else
+  %\expandafter\footnotemark[\arabic{@affil#1}\,]%
+  \textsuperscript{\arabic{@affil#1}\,}%
+\fi
+}
+%\newcommand{\icmlauthor}[2]{%
+%\addtofullauthorlist{#1}%
+%#1\@for\theaffil:=#2\do{\pa{\theaffil}}%
+%}
+\newcommand{\icmlauthor}[2]{%
+  \ifdefined\isaccepted
+    \mbox{\bf #1}\,\@for\theaffil:=#2\do{\@pa{\theaffil}} \addtofullauthorlist{#1}%
+   \else
+    \ifdefined\@icmlfirsttime
+    \else
+      \gdef\@icmlfirsttime{1}
+      \mbox{\bf Anonymous Authors}\@pa{@anon} \addtofullauthorlist{Anonymous Authors}
+     \fi
+    \fi
+}
+\newcommand{\icmlsetsymbol}[2]{%
+  \expandafter\gdef\csname @icmlsymbol#1\endcsname{#2}
+ }
+\newcommand{\icmlaffiliation}[2]{%
+\ifdefined\isaccepted
+\ifcsname the@affil#1\endcsname
+ \expandafter\gdef\csname @affilname\csname the@affil#1\endcsname\endcsname{#2}%
+\else
+  {\bf AUTHORERR: Error in use of \textbackslash{}icmlaffiliation command. Label ``#1'' not mentioned in some \textbackslash{}icmlauthor\{author name\}\{labels here\} command beforehand. }
+  \typeout{}%
+  \typeout{}%
+  \typeout{*******************************************************}%
+  \typeout{Affiliation label undefined. }%
+  \typeout{Make sure \string\icmlaffiliation\space follows }
+  \typeout{all of \string\icmlauthor\space commands}%
+  \typeout{*******************************************************}%
+  \typeout{}%
+  \typeout{}%
+\fi
+\else % \isaccepted
+ % can be called multiple times... it's idempotent
+ \expandafter\gdef\csname @affilname1\endcsname{Anonymous Institution, Anonymous City, Anonymous Region, Anonymous Country}
+\fi
+}
+\newcommand{\icmlcorrespondingauthor}[2]{
+\ifdefined\isaccepted
+ \ifdefined\icmlcorrespondingauthor@text
+   \g@addto@macro\icmlcorrespondingauthor@text{, #1 \textless{}#2\textgreater{}}
+ \else
+   \gdef\icmlcorrespondingauthor@text{#1 \textless{}#2\textgreater{}}
+ \fi
+\else
+\gdef\icmlcorrespondingauthor@text{Anonymous Author \textless{}anon.email@domain.com\textgreater{}}
+\fi
+}
+\newcommand{\icmlEqualContribution}{\textsuperscript{*}Equal contribution }
+\newcounter{@affilnum}
+\newcommand{\printAffiliationsAndNotice}[1]{%
+\stepcounter{@affiliationcounter}%
+{\let\thefootnote\relax\footnotetext{\hspace*{-\footnotesep}\ifdefined\isaccepted #1\fi%
+\forloop{@affilnum}{1}{\value{@affilnum} < \value{@affiliationcounter}}{
+\textsuperscript{\arabic{@affilnum}}\ifcsname @affilname\the@affilnum\endcsname%
+\csname @affilname\the@affilnum\endcsname%
+\else
+{\bf AUTHORERR: Missing \textbackslash{}icmlaffiliation.}
+\fi
+}.
+\ifdefined\icmlcorrespondingauthor@text
+Correspondence to: \icmlcorrespondingauthor@text.
+\else
+{\bf AUTHORERR: Missing \textbackslash{}icmlcorrespondingauthor.}
+\fi
+\ \\
+\Notice@String
+}
+}
+}
+%\makeatother
+\long\def\icmladdress#1{%
+ {\bf The \textbackslash{}icmladdress command is no longer used.  See the example\_paper PDF .tex for usage of \textbackslash{}icmlauther and \textbackslash{}icmlaffiliation.}
+}
+%% keywords as first class citizens
+\def\icmlkeywords#1{%
+%  \ifdefined\isaccepted \else
+%    \par {\bf Keywords:} #1%
+%  \fi
+%  \ifdefined\nohyperref\else\ifdefined\hypersetup
+%    \hypersetup{pdfkeywords={#1}}
+%  \fi\fi
+%  \ifdefined\isaccepted \else
+%    \par {\bf Keywords:} #1%
+%  \fi
+  \ifdefined\nohyperref\else\ifdefined\hypersetup
+    \hypersetup{pdfkeywords={#1}}
+  \fi\fi
+}
+% modification to natbib citations
+\setcitestyle{authoryear,round,citesep={;},aysep={,},yysep={;}}
+% Redefinition of the abstract environment.
+\renewenvironment{abstract}
+   {%
+% Insert the ``appearing in'' copyright notice.
+%\@copyrightspace
+\centerline{\large\bf Abstract}
+    \vspace{-0.12in}\begin{quote}}
+   {\par\end{quote}\vskip 0.12in}
+% numbered section headings with different treatment of numbers
+\def\@startsection#1#2#3#4#5#6{\if@noskipsec \leavevmode \fi
+   \par \@tempskipa #4\relax
+   \@afterindenttrue
+% Altered the following line to indent a section's first paragraph.
+%  \ifdim \@tempskipa <\z@ \@tempskipa -\@tempskipa \@afterindentfalse\fi
+   \ifdim \@tempskipa <\z@ \@tempskipa -\@tempskipa \fi
+   \if@nobreak \everypar{}\else
+     \addpenalty{\@secpenalty}\addvspace{\@tempskipa}\fi \@ifstar
+     {\@ssect{#3}{#4}{#5}{#6}}{\@dblarg{\@sict{#1}{#2}{#3}{#4}{#5}{#6}}}}
+\def\@sict#1#2#3#4#5#6[#7]#8{\ifnum #2>\c@secnumdepth
+     \def\@svsec{}\else
+     \refstepcounter{#1}\edef\@svsec{\csname the#1\endcsname}\fi
+     \@tempskipa #5\relax
+      \ifdim \@tempskipa>\z@
+        \begingroup #6\relax
+          \@hangfrom{\hskip #3\relax\@svsec.~}{\interlinepenalty \@M #8\par}
+        \endgroup
+       \csname #1mark\endcsname{#7}\addcontentsline
+         {toc}{#1}{\ifnum #2>\c@secnumdepth \else
+                      \protect\numberline{\csname the#1\endcsname}\fi
+                    #7}\else
+        \def\@svsechd{#6\hskip #3\@svsec #8\csname #1mark\endcsname
+                      {#7}\addcontentsline
+                           {toc}{#1}{\ifnum #2>\c@secnumdepth \else
+                             \protect\numberline{\csname the#1\endcsname}\fi
+                       #7}}\fi
+     \@xsect{#5}}
+\def\@sect#1#2#3#4#5#6[#7]#8{\ifnum #2>\c@secnumdepth
+     \def\@svsec{}\else
+     \refstepcounter{#1}\edef\@svsec{\csname the#1\endcsname\hskip 0.4em }\fi
+     \@tempskipa #5\relax
+      \ifdim \@tempskipa>\z@
+        \begingroup #6\relax
+          \@hangfrom{\hskip #3\relax\@svsec}{\interlinepenalty \@M #8\par}
+        \endgroup
+       \csname #1mark\endcsname{#7}\addcontentsline
+         {toc}{#1}{\ifnum #2>\c@secnumdepth \else
+                      \protect\numberline{\csname the#1\endcsname}\fi
+                    #7}\else
+        \def\@svsechd{#6\hskip #3\@svsec #8\csname #1mark\endcsname
+                      {#7}\addcontentsline
+                           {toc}{#1}{\ifnum #2>\c@secnumdepth \else
+                             \protect\numberline{\csname the#1\endcsname}\fi
+                       #7}}\fi
+     \@xsect{#5}}
+% section headings with less space above and below them
+\def\thesection {\arabic{section}}
+\def\thesubsection {\thesection.\arabic{subsection}}
+\def\section{\@startsection{section}{1}{\z@}{-0.12in}{0.02in}
+             {\large\bf\raggedright}}
+\def\subsection{\@startsection{subsection}{2}{\z@}{-0.10in}{0.01in}
+                {\normalsize\bf\raggedright}}
+\def\subsubsection{\@startsection{subsubsection}{3}{\z@}{-0.08in}{0.01in}
+                {\normalsize\sc\raggedright}}
+\def\paragraph{\@startsection{paragraph}{4}{\z@}{1.5ex plus
+  0.5ex minus .2ex}{-1em}{\normalsize\bf}}
+\def\subparagraph{\@startsection{subparagraph}{5}{\z@}{1.5ex plus
+  0.5ex minus .2ex}{-1em}{\normalsize\bf}}
+% Footnotes
+\footnotesep 6.65pt %
+\skip\footins 9pt
+\def\footnoterule{\kern-3pt \hrule width 0.8in \kern 2.6pt }
+\setcounter{footnote}{0}
+% Lists and paragraphs
+\parindent 0pt
+\topsep 4pt plus 1pt minus 2pt
+\partopsep 1pt plus 0.5pt minus 0.5pt
+\itemsep 2pt plus 1pt minus 0.5pt
+\parsep 2pt plus 1pt minus 0.5pt
+\parskip 6pt
+\leftmargin 2em \leftmargini\leftmargin \leftmarginii 2em
+\leftmarginiii 1.5em \leftmarginiv 1.0em \leftmarginv .5em
+\leftmarginvi .5em
+\labelwidth\leftmargini\advance\labelwidth-\labelsep \labelsep 5pt
+\def\@listi{\leftmargin\leftmargini}
+\def\@listii{\leftmargin\leftmarginii
+   \labelwidth\leftmarginii\advance\labelwidth-\labelsep
+   \topsep 2pt plus 1pt minus 0.5pt
+   \parsep 1pt plus 0.5pt minus 0.5pt
+   \itemsep \parsep}
+\def\@listiii{\leftmargin\leftmarginiii
+    \labelwidth\leftmarginiii\advance\labelwidth-\labelsep
+    \topsep 1pt plus 0.5pt minus 0.5pt
+    \parsep \z@ \partopsep 0.5pt plus 0pt minus 0.5pt
+    \itemsep \topsep}
+\def\@listiv{\leftmargin\leftmarginiv
+     \labelwidth\leftmarginiv\advance\labelwidth-\labelsep}
+\def\@listv{\leftmargin\leftmarginv
+     \labelwidth\leftmarginv\advance\labelwidth-\labelsep}
+\def\@listvi{\leftmargin\leftmarginvi
+     \labelwidth\leftmarginvi\advance\labelwidth-\labelsep}
+\abovedisplayskip 7pt plus2pt minus5pt%
+\belowdisplayskip \abovedisplayskip
+\abovedisplayshortskip  0pt plus3pt%
+\belowdisplayshortskip  4pt plus3pt minus3pt%
+% Less leading in most fonts (due to the narrow columns)
+% The choices were between 1-pt and 1.5-pt leading
+\def\@normalsize{\@setsize\normalsize{11pt}\xpt\@xpt}
+\def\small{\@setsize\small{10pt}\ixpt\@ixpt}
+\def\footnotesize{\@setsize\footnotesize{10pt}\ixpt\@ixpt}
+\def\scriptsize{\@setsize\scriptsize{8pt}\viipt\@viipt}
+\def\tiny{\@setsize\tiny{7pt}\vipt\@vipt}
+\def\large{\@setsize\large{14pt}\xiipt\@xiipt}
+\def\Large{\@setsize\Large{16pt}\xivpt\@xivpt}
+\def\LARGE{\@setsize\LARGE{20pt}\xviipt\@xviipt}
+\def\huge{\@setsize\huge{23pt}\xxpt\@xxpt}
+\def\Huge{\@setsize\Huge{28pt}\xxvpt\@xxvpt}
+% Revised formatting for figure captions and table titles.
+\newsavebox\newcaptionbox\newdimen\newcaptionboxwid
+\long\def\@makecaption#1#2{
+ \vskip 10pt
+        \baselineskip 11pt
+        \setbox\@tempboxa\hbox{#1. #2}
+        \ifdim \wd\@tempboxa >\hsize
+        \sbox{\newcaptionbox}{\small\sl #1.~}
+        \newcaptionboxwid=\wd\newcaptionbox
+        \usebox\newcaptionbox {\footnotesize #2}
+%        \usebox\newcaptionbox {\small #2}
+        \else
+          \centerline{{\small\sl #1.} {\small #2}}
+        \fi}
+\def\fnum@figure{Figure \thefigure}
+\def\fnum@table{Table \thetable}
+% Strut macros for skipping spaces above and below text in tables.
+\def\abovestrut#1{\rule[0in]{0in}{#1}\ignorespaces}
+\def\belowstrut#1{\rule[-#1]{0in}{#1}\ignorespaces}
+\def\abovespace{\abovestrut{0.20in}}
+\def\aroundspace{\abovestrut{0.20in}\belowstrut{0.10in}}
+\def\belowspace{\belowstrut{0.10in}}
+% Various personal itemization commands.
+\def\texitem#1{\par\noindent\hangindent 12pt
+               \hbox to 12pt {\hss #1 ~}\ignorespaces}
+\def\icmlitem{\texitem{$\bullet$}}
+% To comment out multiple lines of text.
+\long\def\comment#1{}
+%% Line counter (not in final version). Adapted from NIPS style file by Christoph Sawade
+% Vertical Ruler
+% This code is, largely, from the CVPR 2010 conference style file
+% ----- define vruler
+\makeatletter
+\newbox\icmlrulerbox
+\newcount\icmlrulercount
+\newdimen\icmlruleroffset
+\newdimen\cv@lineheight
+\newdimen\cv@boxheight
+\newbox\cv@tmpbox
+\newcount\cv@refno
+\newcount\cv@tot
+% NUMBER with left flushed zeros  \fillzeros[<WIDTH>]<NUMBER>
+\newcount\cv@tmpc@ \newcount\cv@tmpc
+\def\fillzeros[#1]#2{\cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi
+\cv@tmpc=1 %
+\loop\ifnum\cv@tmpc@<10 \else \divide\cv@tmpc@ by 10 \advance\cv@tmpc by 1 \fi
+   \ifnum\cv@tmpc@=10\relax\cv@tmpc@=11\relax\fi \ifnum\cv@tmpc@>10 \repeat
+\ifnum#2<0\advance\cv@tmpc1\relax-\fi
+\loop\ifnum\cv@tmpc<#1\relax0\advance\cv@tmpc1\relax\fi \ifnum\cv@tmpc<#1 \repeat
+\cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi \relax\the\cv@tmpc@}%
+% \makevruler[<SCALE>][<INITIAL_COUNT>][<STEP>][<DIGITS>][<HEIGHT>]
+\def\makevruler[#1][#2][#3][#4][#5]{
+	\begingroup\offinterlineskip
+		\textheight=#5\vbadness=10000\vfuzz=120ex\overfullrule=0pt%
+		\global\setbox\icmlrulerbox=\vbox to \textheight{%
+			{
+				\parskip=0pt\hfuzz=150em\cv@boxheight=\textheight
+				\cv@lineheight=#1\global\icmlrulercount=#2%
+				\cv@tot\cv@boxheight\divide\cv@tot\cv@lineheight\advance\cv@tot2%
+				\cv@refno1\vskip-\cv@lineheight\vskip1ex%
+				\loop\setbox\cv@tmpbox=\hbox to0cm{					 % side margin
+					\hfil {\hfil\fillzeros[#4]\icmlrulercount}
+				}%
+				\ht\cv@tmpbox\cv@lineheight\dp\cv@tmpbox0pt\box\cv@tmpbox\break
+				\advance\cv@refno1\global\advance\icmlrulercount#3\relax
+				\ifnum\cv@refno<\cv@tot\repeat
+			}
+		}
+	\endgroup
+}%
+\makeatother
+% ----- end of vruler
+% \makevruler[<SCALE>][<INITIAL_COUNT>][<STEP>][<DIGITS>][<HEIGHT>]
+\def\icmlruler#1{\makevruler[12pt][#1][1][3][\textheight]\usebox{\icmlrulerbox}}
+\AddToShipoutPicture{%
+\icmlruleroffset=\textheight
+\advance\icmlruleroffset by 5.2pt % top margin
+  \color[rgb]{.7,.7,.7}
+  \ifdefined\isaccepted \else
+	  \AtTextUpperLeft{%
+	    \put(\LenToUnit{-35pt},\LenToUnit{-\icmlruleroffset}){%left ruler
+	      \icmlruler{\icmlrulercount}}
+%	    \put(\LenToUnit{1.04\textwidth},\LenToUnit{-\icmlruleroffset}){%right ruler
+%	      \icmlruler{\icmlrulercount}}
+	  }
+	 \fi
+}
+\endinput
--- a/icml_numpapers.pdf
+++ b/icml_numpapers.pdf
--- a/pic/Acrobot_complete.pdf
+++ b/pic/Acrobot_complete.pdf
--- a/pic/BairdExample.tex
+++ b/pic/BairdExample.tex
+\resizebox{7cm}{4.4cm}{
+\begin{tikzpicture}[smooth]
+\node[coordinate] (origin) at (0.3,0) {};
+\node[coordinate] (num7) at (3,0) {};
+\node[coordinate] (num1) at (1,2.5) {};
+\path (num7) ++ (-10:0.5cm) node (num7_bright1) [coordinate] {};
+\path (num7) ++ (-30:0.7cm) node (num7_bright2) [coordinate] {};
+\path (num7) ++ (-60:0.35cm) node (num7_bright3) [coordinate] {};
+\path (num7) ++ (-60:0.6cm) node (num7_bright4) [coordinate] {};
+\path (origin) ++ (90:3cm) node (origin_above) [coordinate] {};
+\path (origin_above) ++ (0:5.7cm) node (origin_aright) [coordinate] {};
+\path (num1) ++ (90:0.5cm) node (num1_a) [coordinate] {};
+\path (num1) ++ (-90:0.3cm) node (num1_b) [coordinate] {};
+\path (num1) ++ (0:1cm) node (num2) [coordinate] {};
+\path (num1_a) ++ (0:1cm) node (num2_a) [coordinate] {};
+\path (num1_b) ++ (0:1cm) node (num2_b) [coordinate] {};
+\path (num2) ++ (0:1cm) node (num3) [coordinate] {};
+\path (num2_a) ++ (0:1cm) node (num3_a) [coordinate] {};
+\path (num2_b) ++ (0:1cm) node (num3_b) [coordinate] {};
+\path (num3) ++ (0:1cm) node (num4) [coordinate] {};
+\path (num3_a) ++ (0:1cm) node (num4_a) [coordinate] {};
+\path (num3_b) ++ (0:1cm) node (num4_b) [coordinate] {};
+\path (num4) ++ (0:1cm) node (num5) [coordinate] {};
+\path (num4_a) ++ (0:1cm) node (num5_a) [coordinate] {};
+\path (num4_b) ++ (0:1cm) node (num5_b) [coordinate] {};
+\path (num5) ++ (0:1cm) node (num6) [coordinate] {};
+\path (num5_a) ++ (0:1cm) node (num6_a) [coordinate] {};
+\path (num5_b) ++ (0:1cm) node (num6_b) [coordinate] {};
+%\draw[->](0,0) -- (1,1);
+%\draw[dashed,line width = 0.03cm] (0,0) -- (1,1);
+ %\fill (0.5,0.5) circle (0.5);
+ %\draw[shape=circle,fill=white,draw=black] (a) at (num7) {7};
+\draw[dashed,line width = 0.03cm,xshift=3cm] plot[tension=0.06]
+coordinates{(num7) (origin) (origin_above) (origin_aright)}; 
+\draw[->,>=stealth,line width = 0.02cm,xshift=3cm] plot[tension=0.5]
+coordinates{(num7) (num7_bright1) (num7_bright2)(num7_bright4) (num7_bright3)};
+\node[line width = 0.02cm,shape=circle,fill=white,draw=black] (g) at (num7) {7};
+\draw[<->,>=stealth,dashed,line width = 0.03cm,] (num1) -- (num1_a) ;
+\node[line width = 0.02cm,shape=circle,fill=white,draw=black] (a) at (num1_b) {1};
+\draw[<->,>=stealth,dashed,line width = 0.03cm,] (num2) -- (num2_a) ;
+\node[line width = 0.02cm,shape=circle,fill=white,draw=black] (b) at (num2_b) {2};
+\draw[<->,>=stealth,dashed,line width = 0.03cm,] (num3) -- (num3_a) ;
+\node[line width = 0.02cm,shape=circle,fill=white,draw=black] (c) at (num3_b) {3};
+\draw[<->,>=stealth,dashed,line width = 0.03cm,] (num4) -- (num4_a) ;
+\node[line width = 0.02cm,shape=circle,fill=white,draw=black] (d) at (num4_b) {4};
+\draw[<->,>=stealth,dashed,line width = 0.03cm,] (num5) -- (num5_a) ;
+\node[line width = 0.02cm,shape=circle,fill=white,draw=black] (e) at (num5_b) {5};
+\draw[<->,>=stealth,dashed,line width = 0.03cm,] (num6) -- (num6_a) ;
+\node[line width = 0.02cm,shape=circle,fill=white,draw=black] (f) at (num6_b) {6};
+\draw[->,>=stealth,line width = 0.02cm] (a)--(g);
+\draw[->,>=stealth,line width = 0.02cm] (b)--(g);
+\draw[->,>=stealth,line width = 0.02cm] (c)--(g);
+\draw[->,>=stealth,line width = 0.02cm] (d)--(g);
+\draw[->,>=stealth,line width = 0.02cm] (e)--(g);
+\draw[->,>=stealth,line width = 0.02cm] (f)--(g);
+\end{tikzpicture}
+}
--- a/pic/counterexample_quanju_new.pdf
+++ b/pic/counterexample_quanju_new.pdf
--- a/pic/cw_complete.pdf
+++ b/pic/cw_complete.pdf
--- a/pic/dependent_new.pdf
+++ b/pic/dependent_new.pdf
--- a/pic/inverted_new.pdf
+++ b/pic/inverted_new.pdf
--- a/pic/maze_13_13.pdf
+++ b/pic/maze_13_13.pdf
--- a/pic/maze_complete.pdf
+++ b/pic/maze_complete.pdf
--- a/pic/mt_complete.pdf
+++ b/pic/mt_complete.pdf
--- a/pic/randomwalk.tex
+++ b/pic/randomwalk.tex
+% \tikzstyle{int}=[draw, fill=blue!20, minimum size=2em]
+% \tikzstyle{block}=[draw, fill=gray, minimum size=1.5em]
+% \tikzstyle{init} = [pin edge={to-,thin,black}]
+% 	\resizebox{8cm}{1.2cm}{
+% \begin{tikzpicture}[node distance=1.5cm,auto,>=latex']
+%     \node [block] (o) {};
+%     \node (p) [left of=o,node distance=0.5cm, coordinate] {o};
+%     \node [shape=circle,int] (a) [right of=o]{$A$};
+%     \node (b) [left of=a,node distance=1.5cm, coordinate] {a};
+%     \node [shape=circle,int] (c) [right of=a] {$B$};
+%     \node (d) [left of=c,node distance=1.5cm, coordinate] {c};
+%     \node [shape=circle,int, pin={[init]above:$$}] (e) [right of=c]{$C$}; 
+%     \node (f) [left of=e,node distance=1.5cm, coordinate] {e};
+%     \node [shape=circle,int] (g) [right of=e] {$D$};
+%     \node (h) [left of=g,node distance=1.5cm, coordinate] {g};
+%     \node [shape=circle,int] (i) [right of=g] {$E$};
+%     \node (j) [left of=i,node distance=1.5cm, coordinate] {i};
+%     \node [block] (k) [right of=i] {};
+%     \node (l) [left of=k,node distance=0.5cm, coordinate] {k};
+%     \path[<-] (o) edge node {$0$} (a);
+%     \path[<->] (a) edge node {$0$} (c);
+%     \path[<->] (c) edge node {$0$} (e);
+%     \path[<->] (e) edge node {$0$} (g);
+%     \path[<->] (g) edge node {$0$} (i);
+%     \draw[->] (i) edge node {$1$} (k);
+% \end{tikzpicture}
+% }
+\tikzstyle{int}=[draw, fill=blue!20, minimum size=2em]
+\tikzstyle{block}=[draw, fill=gray, minimum size=1.5em]
+\tikzstyle{init} = [pin edge={to-,thin,black}]
+\resizebox{8cm}{1.5cm}{
+    \begin{tikzpicture}[node distance=1.5cm, auto, >=latex]
+        \node [block] (o) {};
+        \node (p) [left of=o, node distance=0.5cm, coordinate] {o};
+        \node [shape=circle, int] (a) [right of=o] {$A$};
+        \node (b) [left of=a, node distance=1.5cm, coordinate] {a};
+        \node [shape=circle, int] (c) [right of=a] {$B$};
+        \node (d) [left of=c, node distance=1.5cm, coordinate] {c};
+        \node [shape=circle, int, pin={[init]above:$ $}] (e) [right of=c] {$C$};
+        \node (f) [left of=e, node distance=1.5cm, coordinate] {e};
+        \node [shape=circle, int] (g) [right of=e] {$D$};
+        \node (h) [left of=g, node distance=1.5cm, coordinate] {g};
+        \node [shape=circle, int] (i) [right of=g] {$E$};
+        \node (j) [left of=i, node distance=1.5cm, coordinate] {i};
+        \node [block] (k) [right of=i] {};
+        \node (l) [left of=k, node distance=0.5cm, coordinate] {k};
+        \path[->] (o) edge node {$0$} (a);
+        \path[<->] (a) edge node {$0$} (c);
+        \path[<->] (c) edge node {$0$} (e);
+        \path[<->] (e) edge node {$0$} (g);
+        \path[<->] (g) edge node {$0$} (i);
+        \draw[->] (i) edge node {$1$} (k);
+    \end{tikzpicture}
+}
\ No newline at end of file
--- a/pic/tabular_new.pdf
+++ b/pic/tabular_new.pdf