Commit e9f7dd7f by GongYu

最新版VM论文版本—ICML

parents
% ALGORITHM STYLE -- Released 8 April 1996
% for LaTeX-2e
% Copyright -- 1994 Peter Williams
% E-mail Peter.Williams@dsto.defence.gov.au
\NeedsTeXFormat{LaTeX2e}
\ProvidesPackage{algorithm}
\typeout{Document Style `algorithm' - floating environment}
\RequirePackage{float}
\RequirePackage{ifthen}
\newcommand{\ALG@within}{nothing}
\newboolean{ALG@within}
\setboolean{ALG@within}{false}
\newcommand{\ALG@floatstyle}{ruled}
\newcommand{\ALG@name}{Algorithm}
\newcommand{\listalgorithmname}{List of \ALG@name s}
% Declare Options
% first appearance
\DeclareOption{plain}{
\renewcommand{\ALG@floatstyle}{plain}
}
\DeclareOption{ruled}{
\renewcommand{\ALG@floatstyle}{ruled}
}
\DeclareOption{boxed}{
\renewcommand{\ALG@floatstyle}{boxed}
}
% then numbering convention
\DeclareOption{part}{
\renewcommand{\ALG@within}{part}
\setboolean{ALG@within}{true}
}
\DeclareOption{chapter}{
\renewcommand{\ALG@within}{chapter}
\setboolean{ALG@within}{true}
}
\DeclareOption{section}{
\renewcommand{\ALG@within}{section}
\setboolean{ALG@within}{true}
}
\DeclareOption{subsection}{
\renewcommand{\ALG@within}{subsection}
\setboolean{ALG@within}{true}
}
\DeclareOption{subsubsection}{
\renewcommand{\ALG@within}{subsubsection}
\setboolean{ALG@within}{true}
}
\DeclareOption{nothing}{
\renewcommand{\ALG@within}{nothing}
\setboolean{ALG@within}{true}
}
\DeclareOption*{\edef\ALG@name{\CurrentOption}}
% ALGORITHM
%
\ProcessOptions
\floatstyle{\ALG@floatstyle}
\ifthenelse{\boolean{ALG@within}}{
\ifthenelse{\equal{\ALG@within}{part}}
{\newfloat{algorithm}{htbp}{loa}[part]}{}
\ifthenelse{\equal{\ALG@within}{chapter}}
{\newfloat{algorithm}{htbp}{loa}[chapter]}{}
\ifthenelse{\equal{\ALG@within}{section}}
{\newfloat{algorithm}{htbp}{loa}[section]}{}
\ifthenelse{\equal{\ALG@within}{subsection}}
{\newfloat{algorithm}{htbp}{loa}[subsection]}{}
\ifthenelse{\equal{\ALG@within}{subsubsection}}
{\newfloat{algorithm}{htbp}{loa}[subsubsection]}{}
\ifthenelse{\equal{\ALG@within}{nothing}}
{\newfloat{algorithm}{htbp}{loa}}{}
}{
\newfloat{algorithm}{htbp}{loa}
}
\floatname{algorithm}{\ALG@name}
\newcommand{\listofalgorithms}{\listof{algorithm}{\listalgorithmname}}
% ALGORITHMIC STYLE -- Released 8 APRIL 1996
% for LaTeX version 2e
% Copyright -- 1994 Peter Williams
% E-mail PeterWilliams@dsto.defence.gov.au
%
% Modified by Alex Smola (08/2000)
% E-mail Alex.Smola@anu.edu.au
%
\NeedsTeXFormat{LaTeX2e}
\ProvidesPackage{algorithmic}
\typeout{Document Style `algorithmic' - environment}
%
\RequirePackage{ifthen}
\RequirePackage{calc}
\newboolean{ALC@noend}
\setboolean{ALC@noend}{false}
\newcounter{ALC@line}
\newcounter{ALC@rem}
\newlength{\ALC@tlm}
%
\DeclareOption{noend}{\setboolean{ALC@noend}{true}}
%
\ProcessOptions
%
% ALGORITHMIC
\newcommand{\algorithmicrequire}{\textbf{Require:}}
\newcommand{\algorithmicensure}{\textbf{Ensure:}}
\newcommand{\algorithmiccomment}[1]{\{#1\}}
\newcommand{\algorithmicend}{\textbf{end}}
\newcommand{\algorithmicif}{\textbf{if}}
\newcommand{\algorithmicthen}{\textbf{then}}
\newcommand{\algorithmicelse}{\textbf{else}}
\newcommand{\algorithmicelsif}{\algorithmicelse\ \algorithmicif}
\newcommand{\algorithmicendif}{\algorithmicend\ \algorithmicif}
\newcommand{\algorithmicfor}{\textbf{for}}
\newcommand{\algorithmicforall}{\textbf{for all}}
\newcommand{\algorithmicdo}{\textbf{do}}
\newcommand{\algorithmicendfor}{\algorithmicend\ \algorithmicfor}
\newcommand{\algorithmicwhile}{\textbf{while}}
\newcommand{\algorithmicendwhile}{\algorithmicend\ \algorithmicwhile}
\newcommand{\algorithmicloop}{\textbf{loop}}
\newcommand{\algorithmicendloop}{\algorithmicend\ \algorithmicloop}
\newcommand{\algorithmicrepeat}{\textbf{repeat}}
\newcommand{\algorithmicuntil}{\textbf{until}}
%changed by alex smola
\newcommand{\algorithmicinput}{\textbf{input}}
\newcommand{\algorithmicoutput}{\textbf{output}}
\newcommand{\algorithmicset}{\textbf{set}}
\newcommand{\algorithmictrue}{\textbf{true}}
\newcommand{\algorithmicfalse}{\textbf{false}}
\newcommand{\algorithmicand}{\textbf{and\ }}
\newcommand{\algorithmicor}{\textbf{or\ }}
\newcommand{\algorithmicfunction}{\textbf{function}}
\newcommand{\algorithmicendfunction}{\algorithmicend\ \algorithmicfunction}
\newcommand{\algorithmicmain}{\textbf{main}}
\newcommand{\algorithmicendmain}{\algorithmicend\ \algorithmicmain}
%end changed by alex smola
\def\ALC@item[#1]{%
\if@noparitem \@donoparitem
\else \if@inlabel \indent \par \fi
\ifhmode \unskip\unskip \par \fi
\if@newlist \if@nobreak \@nbitem \else
\addpenalty\@beginparpenalty
\addvspace\@topsep \addvspace{-\parskip}\fi
\else \addpenalty\@itempenalty \addvspace\itemsep
\fi
\global\@inlabeltrue
\fi
\everypar{\global\@minipagefalse\global\@newlistfalse
\if@inlabel\global\@inlabelfalse \hskip -\parindent \box\@labels
\penalty\z@ \fi
\everypar{}}\global\@nobreakfalse
\if@noitemarg \@noitemargfalse \if@nmbrlist \refstepcounter{\@listctr}\fi \fi
\sbox\@tempboxa{\makelabel{#1}}%
\global\setbox\@labels
\hbox{\unhbox\@labels \hskip \itemindent
\hskip -\labelwidth \hskip -\ALC@tlm
\ifdim \wd\@tempboxa >\labelwidth
\box\@tempboxa
\else \hbox to\labelwidth {\unhbox\@tempboxa}\fi
\hskip \ALC@tlm}\ignorespaces}
%
\newenvironment{algorithmic}[1][0]{
\let\@item\ALC@item
\newcommand{\ALC@lno}{%
\ifthenelse{\equal{\arabic{ALC@rem}}{0}}
{{\footnotesize \arabic{ALC@line}:}}{}%
}
\let\@listii\@listi
\let\@listiii\@listi
\let\@listiv\@listi
\let\@listv\@listi
\let\@listvi\@listi
\let\@listvii\@listi
\newenvironment{ALC@g}{
\begin{list}{\ALC@lno}{ \itemsep\z@ \itemindent\z@
\listparindent\z@ \rightmargin\z@
\topsep\z@ \partopsep\z@ \parskip\z@\parsep\z@
\leftmargin 1em
\addtolength{\ALC@tlm}{\leftmargin}
}
}
{\end{list}}
\newcommand{\ALC@it}{\addtocounter{ALC@line}{1}\addtocounter{ALC@rem}{1}\ifthenelse{\equal{\arabic{ALC@rem}}{#1}}{\setcounter{ALC@rem}{0}}{}\item}
\newcommand{\ALC@com}[1]{\ifthenelse{\equal{##1}{default}}%
{}{\ \algorithmiccomment{##1}}}
\newcommand{\REQUIRE}{\item[\algorithmicrequire]}
\newcommand{\ENSURE}{\item[\algorithmicensure]}
\newcommand{\STATE}{\ALC@it}
\newcommand{\COMMENT}[1]{\algorithmiccomment{##1}}
%changes by alex smola
\newcommand{\INPUT}{\item[\algorithmicinput]}
\newcommand{\OUTPUT}{\item[\algorithmicoutput]}
\newcommand{\SET}{\item[\algorithmicset]}
% \newcommand{\TRUE}{\algorithmictrue}
% \newcommand{\FALSE}{\algorithmicfalse}
\newcommand{\AND}{\algorithmicand}
\newcommand{\OR}{\algorithmicor}
\newenvironment{ALC@func}{\begin{ALC@g}}{\end{ALC@g}}
\newenvironment{ALC@main}{\begin{ALC@g}}{\end{ALC@g}}
%end changes by alex smola
\newenvironment{ALC@if}{\begin{ALC@g}}{\end{ALC@g}}
\newenvironment{ALC@for}{\begin{ALC@g}}{\end{ALC@g}}
\newenvironment{ALC@whl}{\begin{ALC@g}}{\end{ALC@g}}
\newenvironment{ALC@loop}{\begin{ALC@g}}{\end{ALC@g}}
\newenvironment{ALC@rpt}{\begin{ALC@g}}{\end{ALC@g}}
\renewcommand{\\}{\@centercr}
\newcommand{\IF}[2][default]{\ALC@it\algorithmicif\ ##2\ \algorithmicthen%
\ALC@com{##1}\begin{ALC@if}}
\newcommand{\SHORTIF}[2]{\ALC@it\algorithmicif\ ##1\
\algorithmicthen\ {##2}}
\newcommand{\ELSE}[1][default]{\end{ALC@if}\ALC@it\algorithmicelse%
\ALC@com{##1}\begin{ALC@if}}
\newcommand{\ELSIF}[2][default]%
{\end{ALC@if}\ALC@it\algorithmicelsif\ ##2\ \algorithmicthen%
\ALC@com{##1}\begin{ALC@if}}
\newcommand{\FOR}[2][default]{\ALC@it\algorithmicfor\ ##2\ \algorithmicdo%
\ALC@com{##1}\begin{ALC@for}}
\newcommand{\FORALL}[2][default]{\ALC@it\algorithmicforall\ ##2\ %
\algorithmicdo%
\ALC@com{##1}\begin{ALC@for}}
\newcommand{\SHORTFORALL}[2]{\ALC@it\algorithmicforall\ ##1\ %
\algorithmicdo\ {##2}}
\newcommand{\WHILE}[2][default]{\ALC@it\algorithmicwhile\ ##2\ %
\algorithmicdo%
\ALC@com{##1}\begin{ALC@whl}}
\newcommand{\LOOP}[1][default]{\ALC@it\algorithmicloop%
\ALC@com{##1}\begin{ALC@loop}}
%changed by alex smola
\newcommand{\FUNCTION}[2][default]{\ALC@it\algorithmicfunction\ ##2\ %
\ALC@com{##1}\begin{ALC@func}}
\newcommand{\MAIN}[2][default]{\ALC@it\algorithmicmain\ ##2\ %
\ALC@com{##1}\begin{ALC@main}}
%end changed by alex smola
\newcommand{\REPEAT}[1][default]{\ALC@it\algorithmicrepeat%
\ALC@com{##1}\begin{ALC@rpt}}
\newcommand{\UNTIL}[1]{\end{ALC@rpt}\ALC@it\algorithmicuntil\ ##1}
\ifthenelse{\boolean{ALC@noend}}{
\newcommand{\ENDIF}{\end{ALC@if}}
\newcommand{\ENDFOR}{\end{ALC@for}}
\newcommand{\ENDWHILE}{\end{ALC@whl}}
\newcommand{\ENDLOOP}{\end{ALC@loop}}
\newcommand{\ENDFUNCTION}{\end{ALC@func}}
\newcommand{\ENDMAIN}{\end{ALC@main}}
}{
\newcommand{\ENDIF}{\end{ALC@if}\ALC@it\algorithmicendif}
\newcommand{\ENDFOR}{\end{ALC@for}\ALC@it\algorithmicendfor}
\newcommand{\ENDWHILE}{\end{ALC@whl}\ALC@it\algorithmicendwhile}
\newcommand{\ENDLOOP}{\end{ALC@loop}\ALC@it\algorithmicendloop}
\newcommand{\ENDFUNCTION}{\end{ALC@func}\ALC@it\algorithmicendfunction}
\newcommand{\ENDMAIN}{\end{ALC@main}\ALC@it\algorithmicendmain}
}
\renewcommand{\@toodeep}{}
\begin{list}{\ALC@lno}{\setcounter{ALC@line}{0}\setcounter{ALC@rem}{0}%
\itemsep\z@ \itemindent\z@ \listparindent\z@%
\partopsep\z@ \parskip\z@ \parsep\z@%
\labelsep 0.5em \topsep 0.2em%
\ifthenelse{\equal{#1}{0}}
{\labelwidth 0.5em }
{\labelwidth 1.2em }
\leftmargin\labelwidth \addtolength{\leftmargin}{\labelsep}
\ALC@tlm\labelsep
}
}
{\end{list}}
\relax
\providecommand\hyper@newdestlabel[2]{}
\providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
\HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
\global\let\oldnewlabel\newlabel
\gdef\newlabel#1#2{\newlabelxx{#1}#2}
\gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
\AtEndDocument{\ifx\hyper@anchor\@undefined
\let\newlabel\oldnewlabel
\fi}
\fi}
\global\let\hyper@last\relax
\gdef\HyperFirstAtBeginDocument#1{#1}
\providecommand\HyField@AuxAddToFields[1]{}
\providecommand\HyField@AuxAddToCoFields[2]{}
\citation{sutton1988learning}
\citation{tsitsiklis1997analysis}
\citation{Sutton2018book}
\citation{baird1995residual}
\citation{sutton2008convergent}
\citation{sutton2009fast}
\citation{sutton2016emphatic}
\citation{chen2023modified}
\citation{hackman2012faster}
\citation{liu2015finite,liu2016proximal,liu2018proximal}
\citation{givchi2015quasi}
\citation{pan2017accelerated}
\citation{hallak2016generalized}
\citation{zhang2022truncated}
\citation{johnson2013accelerating}
\citation{korda2015td}
\citation{xu2019reanalysis}
\citation{Sutton2018book}
\citation{baird1995residual}
\citation{sutton2009fast}
\citation{sutton2009fast}
\citation{feng2019kernel}
\citation{basserrano2021logistic}
\newlabel{introduction}{{1}{1}{}{section.1}{}}
\newlabel{introduction@cref}{{[section][1][]1}{[1][1][]1}}
\citation{zhou2021machine}
\citation{Sutton2018book}
\citation{Sutton2018book}
\citation{sutton2009fast}
\citation{sutton2009fast}
\citation{ng1999policy}
\newlabel{preliminaries}{{2}{2}{}{section.2}{}}
\newlabel{preliminaries@cref}{{[section][2][]2}{[1][2][]2}}
\newlabel{valuefunction}{{2}{2}{}{section.2}{}}
\newlabel{valuefunction@cref}{{[section][2][]2}{[1][2][]2}}
\newlabel{linearvaluefunction}{{1}{2}{}{equation.2.1}{}}
\newlabel{linearvaluefunction@cref}{{[equation][1][]1}{[1][2][]2}}
\citation{devlin2012dynamic}
\newlabel{example_bias}{{1}{3}{Classification accuracies for naive Bayes and flexible Bayes on various data sets}{table.1}{}}
\newlabel{example_bias@cref}{{[table][1][]1}{[1][2][]3}}
\newlabel{omega}{{3}{3}{}{equation.3.3}{}}
\newlabel{omega@cref}{{[equation][3][]3}{[1][3][]3}}
\newlabel{delta}{{4}{3}{}{equation.3.4}{}}
\newlabel{delta@cref}{{[equation][4][]4}{[1][3][]3}}
\newlabel{theta}{{5}{3}{}{equation.3.5}{}}
\newlabel{theta@cref}{{[equation][5][]5}{[1][3][]3}}
\newlabel{deltaSarsa}{{8}{3}{}{equation.3.8}{}}
\newlabel{deltaSarsa@cref}{{[equation][8][]8}{[1][3][]3}}
\newlabel{deltaQ}{{9}{3}{}{equation.3.9}{}}
\newlabel{deltaQ@cref}{{[equation][9][]9}{[1][3][]3}}
\citation{borkar1997stochastic}
\citation{hirsch1989convergent}
\newlabel{alg:algorithm 1}{{1}{4}{}{algorithm.1}{}}
\newlabel{alg:algorithm 1@cref}{{[algorithm][1][]1}{[1][3][]4}}
\newlabel{thetavmtdc}{{11}{4}{}{equation.3.11}{}}
\newlabel{thetavmtdc@cref}{{[equation][11][]11}{[1][3][]4}}
\newlabel{uvmtdc}{{12}{4}{}{equation.3.12}{}}
\newlabel{uvmtdc@cref}{{[equation][12][]12}{[1][3][]4}}
\newlabel{omegavmtdc}{{13}{4}{}{equation.3.13}{}}
\newlabel{omegavmtdc@cref}{{[equation][13][]13}{[1][3][]4}}
\newlabel{theorem1}{{4.1}{4}{}{theorem.4.1}{}}
\newlabel{theorem1@cref}{{[theorem][1][4]4.1}{[1][4][]4}}
\newlabel{th1proof}{{4}{4}{}{theorem.4.1}{}}
\newlabel{th1proof@cref}{{[section][4][]4}{[1][4][]4}}
\newlabel{thetaFast}{{17}{4}{}{equation.4.17}{}}
\newlabel{thetaFast@cref}{{[equation][17][]17}{[1][4][]4}}
\newlabel{omegaFast}{{18}{4}{}{equation.4.18}{}}
\newlabel{omegaFast@cref}{{[equation][18][]18}{[1][4][]4}}
\newlabel{omegaFastFinal}{{19}{4}{}{equation.4.19}{}}
\newlabel{omegaFastFinal@cref}{{[equation][19][]19}{[1][4][]4}}
\newlabel{omegaInfty}{{20}{4}{}{equation.4.20}{}}
\newlabel{omegaInfty@cref}{{[equation][20][]20}{[1][4][]4}}
\citation{borkar2000ode}
\citation{borkar2000ode}
\citation{borkar2000ode}
\citation{dalal2020tale}
\citation{dalal2020tale}
\newlabel{odetheta}{{21}{5}{}{equation.4.21}{}}
\newlabel{odetheta@cref}{{[equation][21][]21}{[1][5][]5}}
\newlabel{covariance}{{22}{5}{}{equation.4.22}{}}
\newlabel{covariance@cref}{{[equation][22][]22}{[1][5][]5}}
\newlabel{odethetafinal}{{23}{5}{}{equation.4.23}{}}
\newlabel{odethetafinal@cref}{{[equation][23][]23}{[1][5][]5}}
\newlabel{corollary4_2}{{4.2}{5}{}{theorem.4.2}{}}
\newlabel{corollary4_2@cref}{{[corollary][2][4]4.2}{[1][5][]5}}
\newlabel{theorem2}{{4.3}{5}{}{theorem.4.3}{}}
\newlabel{theorem2@cref}{{[theorem][3][4]4.3}{[1][5][]5}}
\citation{Sutton2018book}
\citation{sutton2009fast}
\citation{baird1995residual,sutton2009fast}
\citation{baird1995residual,sutton2009fast,maei2011gradient}
\newlabel{randomwalk}{{1}{6}{Random walk}{figure.1}{}}
\newlabel{randomwalk@cref}{{[figure][1][]1}{[1][6][]6}}
\newlabel{bairdexample}{{2}{6}{7-state version of Baird's off-policy counterexample}{figure.2}{}}
\newlabel{bairdexample@cref}{{[figure][2][]2}{[1][6][]6}}
\citation{schwartz1993reinforcement}
\citation{korda2015td}
\citation{xu2020reanalysis}
\newlabel{differenceRandVMQ}{{2}{7}{Difference between R-learning and tabular VMQ}{table.2}{}}
\newlabel{differenceRandVMQ@cref}{{[table][2][]2}{[1][6][]7}}
\newlabel{DependentFull}{{3(a)}{7}{Subfigure 3(a)}{subfigure.3.1}{}}
\newlabel{DependentFull@cref}{{[subfigure][1][3]3(a)}{[1][6][]7}}
\newlabel{sub@DependentFull}{{(a)}{7}{Subfigure 3(a)\relax }{subfigure.3.1}{}}
\newlabel{TabularFull}{{3(b)}{7}{Subfigure 3(b)}{subfigure.3.2}{}}
\newlabel{TabularFull@cref}{{[subfigure][2][3]3(b)}{[1][6][]7}}
\newlabel{sub@TabularFull}{{(b)}{7}{Subfigure 3(b)\relax }{subfigure.3.2}{}}
\newlabel{InvertedFull}{{3(c)}{7}{Subfigure 3(c)}{subfigure.3.3}{}}
\newlabel{InvertedFull@cref}{{[subfigure][3][3]3(c)}{[1][6][]7}}
\newlabel{sub@InvertedFull}{{(c)}{7}{Subfigure 3(c)\relax }{subfigure.3.3}{}}
\newlabel{CounterExampleFull}{{3(d)}{7}{Subfigure 3(d)}{subfigure.3.4}{}}
\newlabel{CounterExampleFull@cref}{{[subfigure][4][3]3(d)}{[1][6][]7}}
\newlabel{sub@CounterExampleFull}{{(d)}{7}{Subfigure 3(d)\relax }{subfigure.3.4}{}}
\newlabel{Evaluation_full}{{3}{7}{Learning curses of four evaluation environments}{figure.3}{}}
\newlabel{Evaluation_full@cref}{{[figure][3][]3}{[1][6][]7}}
\citation{Sutton2018book}
\citation{Sutton2018book}
\citation{schulman2015trust}
\citation{schulman2017proximal}
\citation{langley00}
\bibdata{example_paper}
\bibcite{baird1995residual}{{1}{1995}{{Baird et~al.}}{{}}}
\newlabel{MazeFull}{{4(a)}{8}{Subfigure 4(a)}{subfigure.4.1}{}}
\newlabel{MazeFull@cref}{{[subfigure][1][4]4(a)}{[1][6][]8}}
\newlabel{sub@MazeFull}{{(a)}{8}{Subfigure 4(a)\relax }{subfigure.4.1}{}}
\newlabel{CliffWalkingFull}{{4(b)}{8}{Subfigure 4(b)}{subfigure.4.2}{}}
\newlabel{CliffWalkingFull@cref}{{[subfigure][2][4]4(b)}{[1][6][]8}}
\newlabel{sub@CliffWalkingFull}{{(b)}{8}{Subfigure 4(b)\relax }{subfigure.4.2}{}}
\newlabel{MountainCarFull}{{4(c)}{8}{Subfigure 4(c)}{subfigure.4.3}{}}
\newlabel{MountainCarFull@cref}{{[subfigure][3][4]4(c)}{[1][6][]8}}
\newlabel{sub@MountainCarFull}{{(c)}{8}{Subfigure 4(c)\relax }{subfigure.4.3}{}}
\newlabel{AcrobotFull}{{4(d)}{8}{Subfigure 4(d)}{subfigure.4.4}{}}
\newlabel{AcrobotFull@cref}{{[subfigure][4][4]4(d)}{[1][6][]8}}
\newlabel{sub@AcrobotFull}{{(d)}{8}{Subfigure 4(d)\relax }{subfigure.4.4}{}}
\newlabel{Complete_full}{{4}{8}{Learning curses of four contral environments}{figure.4}{}}
\newlabel{Complete_full@cref}{{[figure][4][]4}{[1][6][]8}}
\bibcite{basserrano2021logistic}{{2}{2021}{{Bas-Serrano et~al.}}{{Bas-Serrano, Curi, Krause, and Neu}}}
\bibcite{borkar1997stochastic}{{3}{1997}{{Borkar}}{{}}}
\bibcite{borkar2000ode}{{4}{2000}{{Borkar \& Meyn}}{{Borkar and Meyn}}}
\bibcite{chen2023modified}{{5}{2023}{{Chen et~al.}}{{Chen, Ma, Li, Yang, Yang, and Gao}}}
\bibcite{dalal2020tale}{{6}{2020}{{Dalal et~al.}}{{Dalal, Szorenyi, and Thoppe}}}
\bibcite{devlin2012dynamic}{{7}{2012}{{Devlin \& Kudenko}}{{Devlin and Kudenko}}}
\bibcite{feng2019kernel}{{8}{2019}{{Feng et~al.}}{{Feng, Li, and Liu}}}
\bibcite{givchi2015quasi}{{9}{2015}{{Givchi \& Palhang}}{{Givchi and Palhang}}}
\bibcite{hackman2012faster}{{10}{2012}{{Hackman}}{{}}}
\bibcite{hallak2016generalized}{{11}{2016}{{Hallak et~al.}}{{Hallak, Tamar, Munos, and Mannor}}}
\bibcite{hirsch1989convergent}{{12}{1989}{{Hirsch}}{{}}}
\bibcite{johnson2013accelerating}{{13}{2013}{{Johnson \& Zhang}}{{Johnson and Zhang}}}
\bibcite{korda2015td}{{14}{2015}{{Korda \& La}}{{Korda and La}}}
\bibcite{langley00}{{15}{2000}{{Langley}}{{}}}
\bibcite{liu2015finite}{{16}{2015}{{Liu et~al.}}{{Liu, Liu, Ghavamzadeh, Mahadevan, and Petrik}}}
\bibcite{liu2016proximal}{{17}{2016}{{Liu et~al.}}{{Liu, Liu, Ghavamzadeh, Mahadevan, and Petrik}}}
\bibcite{liu2018proximal}{{18}{2018}{{Liu et~al.}}{{Liu, Gemp, Ghavamzadeh, Liu, Mahadevan, and Petrik}}}
\bibcite{maei2011gradient}{{19}{2011}{{Maei}}{{}}}
\bibcite{ng1999policy}{{20}{1999}{{Ng et~al.}}{{Ng, Harada, and Russell}}}
\bibcite{pan2017accelerated}{{21}{2017}{{Pan et~al.}}{{Pan, White, and White}}}
\bibcite{schulman2015trust}{{22}{2015}{{Schulman et~al.}}{{Schulman, Levine, Abbeel, Jordan, and Moritz}}}
\bibcite{schulman2017proximal}{{23}{2017}{{Schulman et~al.}}{{Schulman, Wolski, Dhariwal, Radford, and Klimov}}}
\bibcite{schwartz1993reinforcement}{{24}{1993}{{Schwartz}}{{}}}
\bibcite{sutton2009fast}{{25}{2009}{{Sutton et~al.}}{{Sutton, Maei, Precup, Bhatnagar, Silver, Szepesv{\'a}ri, and Wiewiora}}}
\bibcite{sutton1988learning}{{26}{1988}{{Sutton}}{{}}}
\bibcite{Sutton2018book}{{27}{2018}{{Sutton \& Barto}}{{Sutton and Barto}}}
\bibcite{sutton2008convergent}{{28}{2008}{{Sutton et~al.}}{{Sutton, Maei, and Szepesv{\'a}ri}}}
\bibcite{sutton2016emphatic}{{29}{2016}{{Sutton et~al.}}{{Sutton, Mahmood, and White}}}
\bibcite{tsitsiklis1997analysis}{{30}{1997}{{Tsitsiklis \& Van~Roy}}{{Tsitsiklis and Van~Roy}}}
\bibcite{xu2019reanalysis}{{31}{2019}{{Xu et~al.}}{{Xu, Wang, Zhou, and Liang}}}
\bibcite{xu2020reanalysis}{{32}{2020}{{Xu et~al.}}{{Xu, Wang, Zhou, and Liang}}}
\bibcite{zhang2022truncated}{{33}{2022}{{Zhang \& Whiteson}}{{Zhang and Whiteson}}}
\bibcite{zhou2021machine}{{34}{2021}{{Zhou}}{{}}}
\bibstyle{icml2024}
\citation{dalal2020tale}
\citation{dalal2020tale}
\citation{dalal2020tale}
\citation{dalal2020tale}
\citation{sutton2009fast}
\newlabel{proofcorollary4_2}{{A.1}{11}{}{subsection.A.1}{}}
\newlabel{proofcorollary4_2@cref}{{[subappendix][1][2147483647,1]A.1}{[1][11][]11}}
\newlabel{matrixassumption}{{A.1}{11}{}{theorem.A.1}{}}
\newlabel{matrixassumption@cref}{{[assumption][1][2147483647,1]A.1}{[1][11][]11}}
\newlabel{stepsizeassumption}{{A.2}{11}{}{theorem.A.2}{}}
\newlabel{stepsizeassumption@cref}{{[assumption][2][2147483647,1]A.2}{[1][11][]11}}
\newlabel{sparseprojection}{{A.3}{11}{}{theorem.A.3}{}}
\newlabel{sparseprojection@cref}{{[definition][3][2147483647,1]A.3}{[1][11][]11}}
\newlabel{sparseprojectiontheta}{{30}{11}{}{equation.A.30}{}}
\newlabel{sparseprojectiontheta@cref}{{[equation][30][2147483647]30}{[1][11][]11}}
\newlabel{sparseprojectionomega}{{31}{11}{}{equation.A.31}{}}
\newlabel{sparseprojectionomega@cref}{{[equation][31][2147483647]31}{[1][11][]11}}
\citation{hirsch1989convergent}
\citation{borkar2000ode}
\citation{borkar2000ode}
\citation{borkar2000ode}
\newlabel{proofth2}{{A.2}{12}{}{subsection.A.2}{}}
\newlabel{proofth2@cref}{{[subappendix][2][2147483647,1]A.2}{[1][11][]12}}
\newlabel{thetavmtdcFastest}{{32}{12}{}{equation.A.32}{}}
\newlabel{thetavmtdcFastest@cref}{{[equation][32][2147483647]32}{[1][12][]12}}
\newlabel{uvmtdcFastest}{{33}{12}{}{equation.A.33}{}}
\newlabel{uvmtdcFastest@cref}{{[equation][33][2147483647]33}{[1][12][]12}}
\newlabel{omegavmtdcFastest}{{34}{12}{}{equation.A.34}{}}
\newlabel{omegavmtdcFastest@cref}{{[equation][34][2147483647]34}{[1][12][]12}}
\newlabel{omegavmtdcFastestFinal}{{35}{12}{}{equation.A.35}{}}
\newlabel{omegavmtdcFastestFinal@cref}{{[equation][35][2147483647]35}{[1][12][]12}}
\newlabel{omegavmtdcInfty}{{36}{12}{}{equation.A.36}{}}
\newlabel{omegavmtdcInfty@cref}{{[equation][36][2147483647]36}{[1][12][]12}}
\citation{hirsch1989convergent}
\citation{borkar2000ode}
\citation{borkar2000ode}
\citation{borkar2000ode}
\newlabel{thetavmtdcFaster}{{37}{13}{}{equation.A.37}{}}
\newlabel{thetavmtdcFaster@cref}{{[equation][37][2147483647]37}{[1][13][]13}}
\newlabel{uvmtdcFaster}{{38}{13}{}{equation.A.38}{}}
\newlabel{uvmtdcFaster@cref}{{[equation][38][2147483647]38}{[1][13][]13}}
\newlabel{uvmtdcFasterFinal}{{39}{13}{}{equation.A.39}{}}
\newlabel{uvmtdcFasterFinal@cref}{{[equation][39][2147483647]39}{[1][13][]13}}
\newlabel{uvmtdcInfty}{{40}{13}{}{equation.A.40}{}}
\newlabel{uvmtdcInfty@cref}{{[equation][40][2147483647]40}{[1][13][]13}}
\newlabel{thetavmtdcSlowerFinal}{{42}{13}{}{equation.A.42}{}}
\newlabel{thetavmtdcSlowerFinal@cref}{{[equation][42][2147483647]42}{[1][13][]13}}
\newlabel{alg:algorithm 2}{{2}{14}{}{algorithm.2}{}}
\newlabel{alg:algorithm 2@cref}{{[algorithm][2][2147483647]2}{[1][14][]14}}
\newlabel{odethetavmtdcfinal}{{43}{14}{}{equation.A.43}{}}
\newlabel{odethetavmtdcfinal@cref}{{[equation][43][2147483647]43}{[1][14][]14}}
\newlabel{experimentaldetails}{{B}{14}{}{appendix.B}{}}
\newlabel{experimentaldetails@cref}{{[appendix][2][2147483647]B}{[1][14][]14}}
\newlabel{lrofways}{{3}{15}{Learning rates ($lr$) of four control experiments}{table.3}{}}
\newlabel{lrofways@cref}{{[table][3][2147483647]3}{[1][15][]15}}
\gdef \@abspage@last{15}
\begin{thebibliography}{34}
\providecommand{\natexlab}[1]{#1}
\providecommand{\url}[1]{\texttt{#1}}
\expandafter\ifx\csname urlstyle\endcsname\relax
\providecommand{\doi}[1]{doi: #1}\else
\providecommand{\doi}{doi: \begingroup \urlstyle{rm}\Url}\fi
\bibitem[Baird et~al.(1995)]{baird1995residual}
Baird, L. et~al.
\newblock Residual algorithms: Reinforcement learning with function approximation.
\newblock In \emph{Proc. 12th Int. Conf. Mach. Learn.}, pp.\ 30--37, 1995.
\bibitem[Bas-Serrano et~al.(2021)Bas-Serrano, Curi, Krause, and Neu]{basserrano2021logistic}
Bas-Serrano, J., Curi, S., Krause, A., and Neu, G.
\newblock Logistic q-learning.
\newblock In \emph{International Conference on Artificial Intelligence and Statistics}, pp.\ 3610--3618, 2021.
\bibitem[Borkar(1997)]{borkar1997stochastic}
Borkar, V.~S.
\newblock Stochastic approximation with two time scales.
\newblock \emph{Syst. \& Control Letters}, 29\penalty0 (5):\penalty0 291--294, 1997.
\bibitem[Borkar \& Meyn(2000)Borkar and Meyn]{borkar2000ode}
Borkar, V.~S. and Meyn, S.~P.
\newblock The ode method for convergence of stochastic approximation and reinforcement learning.
\newblock \emph{SIAM J. Control Optim.}, 38\penalty0 (2):\penalty0 447--469, 2000.
\bibitem[Chen et~al.(2023)Chen, Ma, Li, Yang, Yang, and Gao]{chen2023modified}
Chen, X., Ma, X., Li, Y., Yang, G., Yang, S., and Gao, Y.
\newblock Modified retrace for off-policy temporal difference learning.
\newblock In \emph{Uncertainty in Artificial Intelligence}, pp.\ 303--312. PMLR, 2023.
\bibitem[Dalal et~al.(2020)Dalal, Szorenyi, and Thoppe]{dalal2020tale}
Dalal, G., Szorenyi, B., and Thoppe, G.
\newblock A tale of two-timescale reinforcement learning with the tightest finite-time bound.
\newblock In \emph{Proceedings of the AAAI Conference on Artificial Intelligence}, volume~34, pp.\ 3701--3708, 2020.
\bibitem[Devlin \& Kudenko(2012)Devlin and Kudenko]{devlin2012dynamic}
Devlin, S. and Kudenko, D.
\newblock Dynamic potential-based reward shaping.
\newblock In \emph{Proc. 11th Int. Conf. Autonomous Agents and Multiagent Systems}, pp.\ 433--440, 2012.
\bibitem[Feng et~al.(2019)Feng, Li, and Liu]{feng2019kernel}
Feng, Y., Li, L., and Liu, Q.
\newblock A kernel loss for solving the bellman equation.
\newblock In \emph{Advances in Neural Information Processing Systems}, pp.\ 15430--15441, 2019.
\bibitem[Givchi \& Palhang(2015)Givchi and Palhang]{givchi2015quasi}
Givchi, A. and Palhang, M.
\newblock Quasi newton temporal difference learning.
\newblock In \emph{Asian Conference on Machine Learning}, pp.\ 159--172, 2015.
\bibitem[Hackman(2012)]{hackman2012faster}
Hackman, L.
\newblock \emph{Faster Gradient-TD Algorithms}.
\newblock PhD thesis, University of Alberta, 2012.
\bibitem[Hallak et~al.(2016)Hallak, Tamar, Munos, and Mannor]{hallak2016generalized}
Hallak, A., Tamar, A., Munos, R., and Mannor, S.
\newblock Generalized emphatic temporal difference learning: bias-variance analysis.
\newblock In \emph{Proceedings of the 30th AAAI Conference on Artificial Intelligence}, pp.\ 1631--1637, 2016.
\bibitem[Hirsch(1989)]{hirsch1989convergent}
Hirsch, M.~W.
\newblock Convergent activation dynamics in continuous time networks.
\newblock \emph{Neural Netw.}, 2\penalty0 (5):\penalty0 331--349, 1989.
\bibitem[Johnson \& Zhang(2013)Johnson and Zhang]{johnson2013accelerating}
Johnson, R. and Zhang, T.
\newblock Accelerating stochastic gradient descent using predictive variance reduction.
\newblock In \emph{Advances in Neural Information Processing Systems}, pp.\ 315--323, 2013.
\bibitem[Korda \& La(2015)Korda and La]{korda2015td}
Korda, N. and La, P.
\newblock On td (0) with function approximation: Concentration bounds and a centered variant with exponential convergence.
\newblock In \emph{International conference on machine learning}, pp.\ 626--634. PMLR, 2015.
\bibitem[Langley(2000)]{langley00}
Langley, P.
\newblock Crafting papers on machine learning.
\newblock In Langley, P. (ed.), \emph{Proceedings of the 17th International Conference on Machine Learning (ICML 2000)}, pp.\ 1207--1216, Stanford, CA, 2000. Morgan Kaufmann.
\bibitem[Liu et~al.(2015)Liu, Liu, Ghavamzadeh, Mahadevan, and Petrik]{liu2015finite}
Liu, B., Liu, J., Ghavamzadeh, M., Mahadevan, S., and Petrik, M.
\newblock Finite-sample analysis of proximal gradient td algorithms.
\newblock In \emph{Proceedings of the 21st Conference on Uncertainty in Artificial Intelligence}, pp.\ 504--513, 2015.
\bibitem[Liu et~al.(2016)Liu, Liu, Ghavamzadeh, Mahadevan, and Petrik]{liu2016proximal}
Liu, B., Liu, J., Ghavamzadeh, M., Mahadevan, S., and Petrik, M.
\newblock Proximal gradient temporal difference learning algorithms.
\newblock In \emph{Proceedings of the International Joint Conference on Artificial Intelligence}, pp.\ 4195--4199, 2016.
\bibitem[Liu et~al.(2018)Liu, Gemp, Ghavamzadeh, Liu, Mahadevan, and Petrik]{liu2018proximal}
Liu, B., Gemp, I., Ghavamzadeh, M., Liu, J., Mahadevan, S., and Petrik, M.
\newblock Proximal gradient temporal difference learning: Stable reinforcement learning with polynomial sample complexity.
\newblock \emph{Journal of Artificial Intelligence Research}, 63:\penalty0 461--494, 2018.
\bibitem[Maei(2011)]{maei2011gradient}
Maei, H.~R.
\newblock \emph{Gradient temporal-difference learning algorithms}.
\newblock PhD thesis, University of Alberta, 2011.
\bibitem[Ng et~al.(1999)Ng, Harada, and Russell]{ng1999policy}
Ng, A.~Y., Harada, D., and Russell, S.
\newblock Policy invariance under reward transformations: Theory and application to reward shaping.
\newblock In \emph{Proc. 16th Int. Conf. Mach. Learn.}, pp.\ 278--287, 1999.
\bibitem[Pan et~al.(2017)Pan, White, and White]{pan2017accelerated}
Pan, Y., White, A., and White, M.
\newblock Accelerated gradient temporal difference learning.
\newblock In \emph{Proceedings of the 21st AAAI Conference on Artificial Intelligence}, pp.\ 2464--2470, 2017.
\bibitem[Schulman et~al.(2015)Schulman, Levine, Abbeel, Jordan, and Moritz]{schulman2015trust}
Schulman, J., Levine, S., Abbeel, P., Jordan, M., and Moritz, P.
\newblock Trust region policy optimization.
\newblock In \emph{International Conference on Machine Learning}, pp.\ 1889--1897, 2015.
\bibitem[Schulman et~al.(2017)Schulman, Wolski, Dhariwal, Radford, and Klimov]{schulman2017proximal}
Schulman, J., Wolski, F., Dhariwal, P., Radford, A., and Klimov, O.
\newblock Proximal policy optimization algorithms.
\newblock \emph{arXiv preprint arXiv:1707.06347}, 2017.
\bibitem[Schwartz(1993)]{schwartz1993reinforcement}
Schwartz, A.
\newblock A reinforcement learning method for maximizing undiscounted rewards.
\newblock In \emph{Proc. 10th Int. Conf. Mach. Learn.}, volume 298, pp.\ 298--305, 1993.
\bibitem[Sutton et~al.(2009)Sutton, Maei, Precup, Bhatnagar, Silver, Szepesv{\'a}ri, and Wiewiora]{sutton2009fast}
Sutton, R., Maei, H., Precup, D., Bhatnagar, S., Silver, D., Szepesv{\'a}ri, C., and Wiewiora, E.
\newblock Fast gradient-descent methods for temporal-difference learning with linear function approximation.
\newblock In \emph{Proc. 26th Int. Conf. Mach. Learn.}, pp.\ 993--1000, 2009.
\bibitem[Sutton(1988)]{sutton1988learning}
Sutton, R.~S.
\newblock Learning to predict by the methods of temporal differences.
\newblock \emph{Machine learning}, 3\penalty0 (1):\penalty0 9--44, 1988.
\bibitem[Sutton \& Barto(2018)Sutton and Barto]{Sutton2018book}
Sutton, R.~S. and Barto, A.~G.
\newblock \emph{Reinforcement Learning: An Introduction}.
\newblock The MIT Press, second edition, 2018.
\bibitem[Sutton et~al.(2008)Sutton, Maei, and Szepesv{\'a}ri]{sutton2008convergent}
Sutton, R.~S., Maei, H.~R., and Szepesv{\'a}ri, C.
\newblock A convergent $ o (n) $ temporal-difference algorithm for off-policy learning with linear function approximation.
\newblock In \emph{Advances in Neural Information Processing Systems}, pp.\ 1609--1616. Cambridge, MA: MIT Press, 2008.
\bibitem[Sutton et~al.(2016)Sutton, Mahmood, and White]{sutton2016emphatic}
Sutton, R.~S., Mahmood, A.~R., and White, M.
\newblock An emphatic approach to the problem of off-policy temporal-difference learning.
\newblock \emph{The Journal of Machine Learning Research}, 17\penalty0 (1):\penalty0 2603--2631, 2016.
\bibitem[Tsitsiklis \& Van~Roy(1997)Tsitsiklis and Van~Roy]{tsitsiklis1997analysis}
Tsitsiklis, J.~N. and Van~Roy, B.
\newblock Analysis of temporal-diffference learning with function approximation.
\newblock In \emph{Advances in Neural Information Processing Systems}, pp.\ 1075--1081, 1997.
\bibitem[Xu et~al.(2019)Xu, Wang, Zhou, and Liang]{xu2019reanalysis}
Xu, T., Wang, Z., Zhou, Y., and Liang, Y.
\newblock Reanalysis of variance reduced temporal difference learning.
\newblock In \emph{International Conference on Learning Representations}, 2019.
\bibitem[Xu et~al.(2020)Xu, Wang, Zhou, and Liang]{xu2020reanalysis}
Xu, T., Wang, Z., Zhou, Y., and Liang, Y.
\newblock Reanalysis of variance reduced temporal difference learning.
\newblock \emph{arXiv preprint arXiv:2001.01898}, 2020.
\bibitem[Zhang \& Whiteson(2022)Zhang and Whiteson]{zhang2022truncated}
Zhang, S. and Whiteson, S.
\newblock Truncated emphatic temporal difference methods for prediction and control.
\newblock \emph{The Journal of Machine Learning Research}, 23\penalty0 (1):\penalty0 6859--6917, 2022.
\bibitem[Zhou(2021)]{zhou2021machine}
Zhou, Z.-H.
\newblock \emph{Machine learning}.
\newblock Springer Nature, 2021.
\end{thebibliography}
@inproceedings{langley00,
author = {P. Langley},
title = {Crafting Papers on Machine Learning},
year = {2000},
pages = {1207--1216},
editor = {Pat Langley},
booktitle = {Proceedings of the 17th International Conference
on Machine Learning (ICML 2000)},
address = {Stanford, CA},
publisher = {Morgan Kaufmann}
}
@TechReport{mitchell80,
author = "T. M. Mitchell",
title = "The Need for Biases in Learning Generalizations",
institution = "Computer Science Department, Rutgers University",
year = "1980",
address = "New Brunswick, MA",
}
@phdthesis{kearns89,
author = {M. J. Kearns},
title = {Computational Complexity of Machine Learning},
school = {Department of Computer Science, Harvard University},
year = {1989}
}
@Book{MachineLearningI,
editor = "R. S. Michalski and J. G. Carbonell and T.
M. Mitchell",
title = "Machine Learning: An Artificial Intelligence
Approach, Vol. I",
publisher = "Tioga",
year = "1983",
address = "Palo Alto, CA"
}
@Book{DudaHart2nd,
author = "R. O. Duda and P. E. Hart and D. G. Stork",
title = "Pattern Classification",
publisher = "John Wiley and Sons",
edition = "2nd",
year = "2000"
}
@misc{anonymous,
title= {Suppressed for Anonymity},
author= {Author, N. N.},
year= {2021}
}
@InCollection{Newell81,
author = "A. Newell and P. S. Rosenbloom",
title = "Mechanisms of Skill Acquisition and the Law of
Practice",
booktitle = "Cognitive Skills and Their Acquisition",
pages = "1--51",
publisher = "Lawrence Erlbaum Associates, Inc.",
year = "1981",
editor = "J. R. Anderson",
chapter = "1",
address = "Hillsdale, NJ"
}
@Article{Samuel59,
author = "A. L. Samuel",
title = "Some Studies in Machine Learning Using the Game of
Checkers",
journal = "IBM Journal of Research and Development",
year = "1959",
volume = "3",
number = "3",
pages = "211--229"
}
@book{em:86,
editor = "Engelmore, Robert and Morgan, Anthony",
title = "Blackboard Systems",
year = 1986,
address = "Reading, Mass.",
publisher = "Addison-Wesley",
}
@inproceedings{dalal2018finite,
title={Finite sample analyses for TD (0) with function approximation},
author={Dalal, Gal and Szorenyi, Balazs and Thoppe, Gugan and Mannor, Shie},
booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence and Thirtieth Innovative Applications of Artificial Intelligence Conference and Eighth AAAI Symposium on Educational Advances in Artificial Intelligence},
pages={6144--6160},
year={2018}
}
@inproceedings{xu2019reanalysis,
title={Reanalysis of Variance Reduced Temporal Difference Learning},
author={Xu, Tengyu and Wang, Zhe and Zhou, Yi and Liang, Yingbin},
booktitle={International Conference on Learning Representations},
year={2019}
}
@inproceedings{c:83,
author = "Clancey, William J.",
year = 1983,
title = "{Communication, Simulation, and Intelligent
Agents: Implications of Personal Intelligent Machines
for Medical Education}",
booktitle="Proceedings of the Eighth International Joint Conference on Artificial Intelligence {(IJCAI-83)}",
pages = "556-560",
address = "Menlo Park, Calif",
publisher = "{IJCAI Organization}",
}
@inproceedings{c:84,
author = "Clancey, William J.",
year = 1984,
title = "{Classification Problem Solving}",
booktitle = "Proceedings of the Fourth National
Conference on Artificial Intelligence",
pages = "45-54",
address = "Menlo Park, Calif.",
publisher="AAAI Press",
}
@article{r:80,
author = {Robinson, Arthur L.},
title = {New Ways to Make Microcircuits Smaller},
volume = {208},
number = {4447},
pages = {1019--1022},
year = {1980},
doi = {10.1126/science.208.4447.1019},
publisher = {American Association for the Advancement of Science},
issn = {0036-8075},
URL = {https://science.sciencemag.org/content/208/4447/1019},
eprint = {https://science.sciencemag.org/content/208/4447/1019.full.pdf},
journal = {Science},
}
@article{r:80x,
author = "Robinson, Arthur L.",
year = 1980,
title = "{New Ways to Make Microcircuits Smaller---Duplicate Entry}",
journal = "Science",
volume = 208,
pages = "1019-1026",
}
@article{hcr:83,
title = {Strategic explanations for a diagnostic consultation system},
journal = {International Journal of Man-Machine Studies},
volume = {20},
number = {1},
pages = {3-19},
year = {1984},
issn = {0020-7373},
doi = {https://doi.org/10.1016/S0020-7373(84)80003-6},
url = {https://www.sciencedirect.com/science/article/pii/S0020737384800036},
author = {Diane Warner Hasling and William J. Clancey and Glenn Rennels},
abstract = {This article examines the problem of automatte explanation of reasoning, especially as it relates to expert systems. By explanation we mean the ability of a program to discuss what it is doing in some understandable way. We first present a general framework in which to view explanation and review some of the research done in this area. We then focus on the explanation system for NEOMYCIN, a medical consultation program. A consultation program interactively helps a user to solve a problem. Our goal is to have NEOMYCIN explain its problem-solving strategies. An explanation of strategy describes the plan the program is using to reach a solution. Such an explanation is usually concrete, referring to aspects of the current problem situation. Abstract explanations articulate a general principle, which can be applied in different situations; such explanations are useful in teaching and in explaining by analogy. We describe the aspects of NEOMYCIN that make abstract strategic explanations possible—the representation of strategic knowledge explicitly and separately from domain knowledge— and demonstrate how this representation can be used to generate explanations.}
}
@article{hcrt:83,
author = "Hasling, Diane Warner and Clancey, William J. and Rennels, Glenn R. and Test, Thomas",
year = 1983,
title = "{Strategic Explanations in Consultation---Duplicate}",
journal = "The International Journal of Man-Machine Studies",
volume = 20,
number = 1,
pages = "3-19",
}
@techreport{r:86,
author = "Rice, James",
year = 1986,
title = "{Poligon: A System for Parallel Problem Solving}",
type = "Technical Report",
number = "KSL-86-19",
institution = "Dept.\ of Computer Science, Stanford Univ.",
}
@phdthesis{c:79,
author = "Clancey, William J.",
year = 1979,
title = "{Transfer of Rule-Based Expertise
through a Tutorial Dialogue}",
type = "{Ph.D.} diss.",
school = "Dept.\ of Computer Science, Stanford Univ.",
address = "Stanford, Calif.",
}
@unpublished{c:21,
author = "Clancey, William J.",
title = "{The Engineering of Qualitative Models}",
year = 2021,
note = "Forthcoming",
}
@misc{c:22,
title={Attention Is All You Need},
author={Ashish Vaswani and Noam Shazeer and Niki Parmar and Jakob Uszkoreit and Llion Jones and Aidan N. Gomez and Lukasz Kaiser and Illia Polosukhin},
year={2017},
eprint={1706.03762},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{c:23,
title = "Pluto: The 'Other' Red Planet",
author = "{NASA}",
howpublished = "\url{https://www.nasa.gov/nh/pluto-the-other-red-planet}",
year = 2015,
note = "Accessed: 2018-12-06"
}
@article{r:80x,
author = "Robinson, Arthur L.",
year = 1980,
title = "{New Ways to Make Microcircuits Smaller---Duplicate Entry}",
journal = "Science",
volume = 208,
pages = "1019-1026",
}
@article{hcrt:83,
author = "Hasling, Diane Warner and Clancey, William J. and Rennels, Glenn R. and Test, Thomas",
year = 1983,
title = "{Strategic Explanations in Consultation---Duplicate}",
journal = "The International Journal of Man-Machine Studies",
volume = 20,
number = 1,
pages = "3-19",
}
@article{xu2013online,
title={Online learning control using adaptive critic designs with sparse kernel machines},
author={Xu, Xin and Hou, Zhongsheng and Lian, Chuanqiang and He, Haibo},
journal={IEEE Trans. Neural Netw. Learn. Syst.},
volume={24},
number={5},
pages={762--775},
year={2013},
publisher={IEEE}
}
@article{bertsekas2017value,
title={Value and policy iterations in optimal control and adaptive dynamic programming},
author={Bertsekas, Dimitri P},
journal={IEEE Trans. Neural Netw. Learn. Syst.},
year={2017},
volume={28},
number={3},
pages={500 - 509},
publisher={IEEE}
}
@phdthesis{hackman2012faster,
title={Faster Gradient-TD Algorithms},
author={Hackman, Leah},
year={2012},
school={University of Alberta}
}
@inproceedings{harutyunyan2015multi,
title={Multi-scale reward shaping via an off-policy ensemble},
author={Harutyunyan, Anna and Brys, Tim and Vrancx, Peter and Now{\'e}, Ann},
booktitle={Proc. 2015 Int. Conf. Autonomous Agents and Multiagent Systems},
pages={1641--1642},
year={2015},
organization={International Foundation for Autonomous Agents and Multiagent Systems}
}
@inproceedings{harutyunyan2015expressing,
title={Expressing Arbitrary Reward Functions as Potential-Based Advice.},
author={Harutyunyan, Anna and Devlin, Sam and Vrancx, Peter and Now{\'e}, Ann},
booktitle={AAAI},
pages={2652--2658},
year={2015}
}
@article{wiewiora2003potential,
title={Potential-based shaping and Q-value initialization are equivalent},
author={Wiewiora, Eric},
journal={J. Artif. Intell. Res.},
volume={19},
pages={205--208},
year={2003}
}
@article{grzes2010online,
title={Online learning of shaping rewards in reinforcement learning},
author={Grze{\'s}, Marek and Kudenko, Daniel},
journal={Neural Netw.},
volume={23},
number={4},
pages={541--550},
year={2010},
publisher={Elsevier}
}
@inproceedings{marthi2007automatic,
title={Automatic shaping and decomposition of reward functions},
author={Marthi, Bhaskara},
booktitle={Proc. 24th Int. Conf. Mach. Learn.},
pages={601--608},
year={2007}
}
@inproceedings{laud2003influence,
title={The Influence of Reward on the Speed of Reinforcement Learning: An Analysis of Shaping},
author={Laud, Adam and Dejong, Gerald},
booktitle={Proc. 20th Int. Conf. Mach. Learn.},
pages={440--447},
year={2003}
}
@phdthesis{laud2004theory,
title={Theory and application of reward shaping in reinforcement learning},
author={Laud, Adam Daniel},
year={2004},
school={University of Illinois at Urbana-Champaign}
}
@article{geist2013algorithmic,
title={Algorithmic survey of parametric value function approximation},
author={Geist, Matthieu and Pietquin, Olivier},
journal={IEEE Trans. Neural Netw. Learn. Syst.},
volume={24},
number={6},
pages={845--867},
year={2013},
publisher={IEEE}
}
@article{furmston2016approximate,
title={Approximate Newton Methods for Policy Search in Markov Decision Processes},
author={Furmston, Thomas and Lever, Guy and Barber, David},
journal={J. Mach. Learn. Res.},
volume={17},
number={227},
pages={1--51},
year={2016}
}
@article{silver2016mastering,
title={Mastering the game of Go with deep neural networks and tree search},
author={Silver, David and Huang, Aja and Maddison, Chris J and Guez, Arthur and Sifre, Laurent and van den Driessche, George and Schrittwieser, Julian and Antonoglou, Ioannis and Panneershelvam, Veda and Lanctot, Marc and others},
journal={Nature},
volume={529},
number={7587},
pages={484--489},
year={2016},
publisher={Nature Publishing Group}
}
@article{mnih2015human,
title={Human-level control through deep reinforcement learning},
author={Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Rusu, Andrei A and Veness, Joel and Bellemare, Marc G and Graves, Alex and Riedmiller, Martin and Fidjeland, Andreas K and Ostrovski, Georg and others},
journal={Nature},
volume={518},
number={7540},
pages={529--533},
year={2015},
publisher={Nature Publishing Group}
}
@inproceedings{guo2014deep,
title={Deep learning for real-time Atari game play using offline Monte-Carlo tree search planning},
author={Guo, Xiaoxiao and Singh, Satinder and Lee, Honglak and Lewis, Richard L and Wang, Xiaoshi},
booktitle={Advances in Neural Information Processing Systems},
pages={3338--3346},
publisher={Cambridge, MA: MIT Press},
year={2014}
}
@inproceedings{scherrer2010should,
title={Should one compute the Temporal Difference fix point or minimize the Bellman Residual? The unified oblique projection view},
author={Scherrer, Bruno},
booktitle={Proc. 27th Int. Conf. Mach. Learn.},
pages={959--966},
year={2010}
}
@article{hirsch1989convergent,
title={Convergent activation dynamics in continuous time networks},
author={Hirsch, Morris W},
journal={Neural Netw.},
volume={2},
number={5},
pages={331--349},
year={1989},
publisher={Elsevier}
}
@article{borkar1997stochastic,
title={Stochastic approximation with two time scales},
author={Borkar, Vivek S},
journal={Syst. \& Control Letters},
volume={29},
number={5},
pages={291--294},
year={1997},
publisher={Elsevier}
}
@article{ortner2013adaptive,
title={Adaptive aggregation for reinforcement learning in average reward Markov decision processes},
author={Ortner, Ronald},
journal={Annals Oper. Res.},
volume={208},
number={1},
pages={321--336},
year={2013},
publisher={Springer}
}
@article{jaksch2010near,
title={Near-optimal regret bounds for reinforcement learning},
author={Jaksch, Thomas and Ortner, Ronald and Auer, Peter},
journal={Journal of Machine Learning Research},
number={Apr},
volume={11},
pages={1563--1600},
year={2010}
}
@article{ortner2007logarithmic,
title={Logarithmic online regret bounds for undiscounted reinforcement learning},
author={Ortner, P and Auer, R},
journal={Advances in Neural Information Processing Systems},
publisher={Cambridge, MA: MIT Press},
volume={19},
pages={49},
year={2007}
}
@article{das1999solving,
title={Solving semi-Markov decision problems using average reward reinforcement learning},
author={Das, Tapas K and Gosavi, Abhijit and Mahadevan, Sridhar and Marchalleck, Nicholas},
journal={Management Science},
volume={45},
number={4},
pages={560--574},
year={1999},
publisher={INFORMS}
}
@article{abounadi2001learning,
title={Learning algorithms for Markov decision processes with average cost},
author={Abounadi, Jinane and Bertsekas, D and Borkar, Vivek S},
journal={SIAM J. Control Optim.},
volume={40},
number={3},
pages={681--698},
year={2001},
publisher={SIAM}
}
@inproceedings{singh1994reinforcement,
title={Reinforcement learning algorithms for average-payoff Markovian decision processes},
author={Singh, Satinder P},
booktitle={AAAI},
volume={94},
pages={700--705},
year={1994}
}
@inproceedings{schwartz1993reinforcement,
title={A reinforcement learning method for maximizing undiscounted rewards},
author={Schwartz, Anton},
booktitle={Proc. 10th Int. Conf. Mach. Learn.},
volume={298},
pages={298--305},
year={1993}
}
@inproceedings{yang2016efficient,
title={Efficient Average Reward Reinforcement Learning Using Constant Shifting Values},
author={Yang, Shangdong and Gao, Yang and An, Bo and Wang, Hao and Chen, Xingguo},
booktitle={Thirtieth AAAI Conference on Artificial Intelligence},
pages={2258-2264},
year={2016}
}
@inproceedings{devlin2012dynamic,
title={Dynamic potential-based reward shaping},
author={Devlin, Sam and Kudenko, Daniel},
booktitle={Proc. 11th Int. Conf. Autonomous Agents and Multiagent Systems},
pages={433--440},
year={2012}
}
@inproceedings{ng1999policy,
title={Policy invariance under reward transformations: Theory and application to reward shaping},
author={Ng, Andrew Y and Harada, Daishi and Russell, Stuart},
booktitle={Proc. 16th Int. Conf. Mach. Learn.},
pages={278--287},
year={1999}
}
@article{borkar2000ode,
title={The ODE method for convergence of stochastic approximation and reinforcement learning},
author={Borkar, Vivek S and Meyn, Sean P},
journal={SIAM J. Control Optim.},
volume={38},
number={2},
pages={447--469},
year={2000},
publisher={SIAM}
}
@phdthesis{maei2011gradient,
title={Gradient temporal-difference learning algorithms},
author={Maei, Hamid Reza},
year={2011},
school={University of Alberta}
}
@phdthesis{baird1999reinforcement,
title={Reinforcement learning through gradient descent},
author={Baird III, Leemon C},
year={1999},
school={US Air Force Academy, US}
}
@PHDTHESIS{Driessens2004,
AUTHOR ="Kurt Driessens",
TITLE ="Relational Reinforcement Learning",
SCHOOL ="Catholic University of Leuven",
YEAR ="2004",
}
@article{tsitsiklis1996feature,
title={Feature-based methods for large scale dynamic programming},
author={Tsitsiklis, John N and Van Roy, Benjamin},
journal={Mach. Learn.},
volume={22},
number={1-3},
pages={59--94},
year={1996},
publisher={Springer}
}
@inproceedings{chen2009apply,
title={Apply ant colony optimization to Tetris},
author={Chen, X. and Wang, H. and Wang, W. and Shi, Y. and Gao, Y.},
booktitle={Proceedings of the 11th Annual Conference on Genetic and Evolutionary Computation (GECCO)},
pages={1741--1742},
year={2009},
organization={ACM}
}
@incollection{farias2006tetris,
title={Tetris: A study of randomized constraint sampling},
author={Farias, Vivek F and Van Roy, Benjamin},
booktitle={Probabilistic and Randomized Methods for Design Under Uncertainty},
pages={189--201},
year={2006},
publisher={Springer}
}
@article{bertsekas1996temporal,
title={Temporal differences-based policy iteration and applications in neuro-dynamic programming},
author={Bertsekas, Dimitri P and Ioffe, Sergey},
journal={Lab. for Info. and Decision Systems Report LIDS-P-2349, MIT, Cambridge, MA},
year={1996},
publisher={Citeseer}
}
@inproceedings{kakade2001natural,
title={A Natural Policy Gradient.},
author={Kakade, Sham},
booktitle={Advances in Neural Information Processing Systems},
publisher={Cambridge, MA: MIT Press},
volume={14},
pages={1531--1538},
year={2001}
}
@article{peters2008natural,
title={Natural actor-critic},
author={Peters, Jan and Schaal, Stefan},
journal={Neurocomputing},
volume={71},
number={7},
pages={1180--1190},
year={2008},
publisher={Elsevier}
}
@article{baxter2001infinite,
title={Infinite-horizon policy-gradient estimation},
author={Baxter, Jonathan and Bartlett, Peter L.},
journal={J. Artif. Intell. Res.},
pages={319--350},
year={2001}
}
@inproceedings{sutton1999policy,
title={Policy Gradient Methods for Reinforcement Learning with Function Approximation.},
author={Sutton, Richard S and McAllester, David A and Singh, Satinder P and Mansour, Yishay and others},
booktitle={Advances in Neural Information Processing Systems},
publisher={Cambridge, MA: MIT Press},
pages={1057--1063},
year={1999}
}
@inproceedings{bohm2005evolutionary,
title={An evolutionary approach to tetris},
author={B{\"o}hm, Niko and K{\'o}kai, Gabriella and Mandl, Stefan},
booktitle={Proc. 6th Metaheuristics Int. Conf.},
pages={137-148},
year={2005}
}
@article{szita2006learning,
title={Learning Tetris using the noisy cross-entropy method},
author={Szita, Istv{\'a}n and L{\"o}rincz, Andr{\'a}s},
journal={Neural Comput.},
volume={18},
number={12},
pages={2936--2941},
year={2006},
publisher={MIT Press}
}
@inproceedings{thiery2010least,
title={Least-Squares $\lambda$ Policy Iteration: Bias-Variance Trade-off in Control Problems},
author={Thiery, Christophe and Scherrer, Bruno},
booktitle={Proc. 27th Int. Conf. Mach. Learn.},
pages={1071--1078},
year={2010}
}
@inproceedings{gabillon2013approximate,
title={Approximate dynamic programming finally performs well in the game of Tetris},
author={Gabillon, Victor and Ghavamzadeh, Mohammad and Scherrer, Bruno},
booktitle={Advances in Neural Information Processing Systems},
publisher={Cambridge, MA: MIT Press},
pages={1754--1762},
year={2013}
}
@article{scherrer2013performance,
title={Performance bounds for $\lambda$ policy iteration and application to the game of Tetris},
author={Scherrer, Bruno},
journal={J. Mach. Learn. Res.},
volume={14},
number={1},
pages={1181--1227},
year={2013},
publisher={JMLR. org}
}
@article{thiery2009improvements,
title={Improvements on Learning Tetris with Cross Entropy},
author={Thiery, Christophe and Scherrer, Bruno},
journal={Int. Computer Games Assoc. J.},
volume={32},
number={1},
pages={23--33},
year={2009}
}
@article{scherrer2015approximate,
title={Approximate Modified Policy Iteration and its Application to the Game of Tetris},
author={Scherrer, Bruno and Ghavamzadeh, Mohammad and Gabillon, Victor and Lesner, Boris and Geist, Matthieu},
journal={J. Mach. Learn. Res.},
volume={16},
pages={1629--1676},
year={2015}
}
@article{efron2004least,
title={Least angle regression},
author={Efron, Bradley and Hastie, Trevor and Johnstone, Iain and Tibshirani, Robert and others},
journal={The Annals of statistics},
volume={32},
number={2},
pages={407--499},
year={2004},
publisher={Institute of Mathematical Statistics}
}
@MASTERSTHESIS{Brzustowski1992,
author ={John Brzustowski},
title ={Can you win at tetris?},
school = {University of British Columbia},
year ={1992}
}
@Article{Breukelaar04,
author = {Ron Breukelaar and Erik D. Demaine and Susan
Hohenberger and Hendrik Jan Hoogeboom and Walter
A. Kosters and David Liben-Nowell},
title = {Tetris is Hard, Even to Approximate},
journal = {International Journal of Computational Geometry and
Applications},
year = {2004},
volume = {14},
number = {1--2},
pages = {41--68},
month = {April},
}
@book{Bertsekas1996,
author = {Bertsekas, D. and Tsitsiklis, J. N.},
title = {Neuro-Dynamic Programming},
year = {1996},
publisher = {Athena Scientific},
}
@inproceedings{maei2010gq,
title={GQ ($\lambda$): A general gradient algorithm for temporal-difference prediction learning with eligibility traces},
author={Maei, Hamid Reza and Sutton, Richard S},
booktitle={Proceedings of the Third Conference on Artificial General Intelligence},
volume={1},
pages={91--96},
year={2010}
}
@inproceedings{maei2010toward,
title={Toward off-policy learning control with function approximation},
author={Maei, Hamid R and Szepesv{\'a}ri, Csaba and Bhatnagar, Shalabh and Sutton, Richard S},
booktitle={Proc. 27th Int. Conf. Mach. Learn.},
pages={719--726},
year={2010}
}
@inproceedings{phua2007tracking,
title={Tracking value function dynamics to improve reinforcement learning with piecewise linear function approximation},
author={Phua, Chee Wee and Fitch, Robert},
booktitle={Proc. 24th Int. Conf. Mach. Learn.},
pages={751--758},
year={2007},
organization={ACM}
}
@inproceedings{szubert2014temporal,
title={Temporal difference learning of N-tuple networks for the game 2048},
author={Szubert, Marcin and Jaskowski, Wojciech},
booktitle={2014 IEEE Conference on Computational Intelligence and Games (CIG)},
pages={1--8},
year={2014},
organization={IEEE}
}
@article{chen2013online,
title={Online Selective Kernel-based Temporal Differece Learning},
author={Chen, Xingguo and Gao, Yang and Wang, Ruili},
journal={IEEE Trans. Neural Netw. Learn. Syst.},
year={2013},
volume={24},
number={12},
pages={1944--1956},
publisher={IEEE}
}
@article{xu2007kernel,
title={Kernel-based least squares policy iteration for reinforcement learning},
author={Xu, Xin and Hu, Dewen and Lu, Xicheng},
journal={IEEE Trans. Neural Netw.},
volume={18},
number={4},
pages={973--992},
year={2007},
publisher={IEEE}
}
@INPROCEEDINGS{Engel03bayesmeets,
author = {Yaakov Engel and Shie Mannor and Ron Meir},
title = {Bayes meets {B}ellman: the {G}aussian process approach to temporal difference learning},
booktitle = {Proc. 20th Int. Conf. Mach. Learn.},
year = {2003},
pages = {154--161},
address={Washington, DC},
month={Aug.},
}
@inproceedings{robards2011sparse,
title={Sparse Kernel-SARSA ($\lambda$) with an eligibility trace},
author={Robards, M. and Sunehag, P. and Sanner, S. and Marthi, B.},
booktitle = {Proc. 22nd Eur. Conf. Mach. Learn.},
pages={1--17},
year={2011},
month={Sept.},
address = {Athens, Greece},
}
@conference{reisinger2008online,
title={{Online kernel selection for {B}ayesian reinforcement learning}},
author={Reisinger, J. and Stone, P. and Miikkulainen, R.},
booktitle={Proc. 25th Int. Conf. Mach. Learn.},
pages={816--823},
year={2008},
month={July},
address={ Helsinki, Finland},
}
@book{Sutton1998,
title={{Reinforcement learning: an introduction}},
author={Sutton, R.S. and Barto, A.G.},
year={1998},
publisher={MIT Press},
address={Cambridge, MA}
}
@book{Sutton2018book,
author = {Sutton, Richard S. and Barto, Andrew G.},
edition = {Second},
publisher = {The MIT Press},
title = {Reinforcement Learning: An Introduction},
year = {2018 }
}
@phdthesis{Bradtke1994phd,
title={Incremental Dynamic Programming for On-line Adaptive Optimal Control},
author={Bradtke, Steven J},
year={1994},
school={University of Massachusetts},
month={Sept.},
address={Amherst},
}
@inproceedings{baird1995residual,
title={Residual algorithms: Reinforcement learning with function approximation},
author={Baird, Leemon and others},
booktitle={Proc. 12th Int. Conf. Mach. Learn.},
pages={30--37},
year={1995}
}
@article{bradtke1996linear,
title={Linear least-squares algorithms for temporal difference learning},
author={Bradtke, S.J. and Barto, A.G.},
journal={Mach. Learn.},
volume={22},
number={1},
pages={33--57},
year={1996},
publisher={Springer}
}
@article{lagoudakis2003least,
title={Least-squares policy iteration},
author={Lagoudakis, M.G. and Parr, R.},
journal={J. Mach. Learn. Res.},
volume={4},
pages={1107--1149},
year={2003},
publisher={JMLR. org}
}
@article{boyan2002technical,
title={Technical update: Least-squares temporal difference learning},
author={Boyan, J.A.},
journal={Mach. Learn.},
volume={49},
number={2},
pages={233--246},
year={2002},
publisher={Springer}
}
@inproceedings{geramifard2006incremental,
title={Incremental least-squares temporal difference learning},
author={Geramifard, A. and Bowling, M. and Sutton, R.S.},
booktitle={Proc. 21st AAAI Conf. Artif. Intell.},
pages={356--361},
year={2006},
month={July},
address={Boston, Massachusetts},
}
@inproceedings{sutton2009fast,
title={Fast gradient-descent methods for temporal-difference learning with linear function approximation},
author={Sutton, R.S. and Maei, H.R. and Precup, D. and Bhatnagar, S. and Silver, D. and Szepesv{\'a}ri, C. and Wiewiora, E.},
booktitle={Proc. 26th Int. Conf. Mach. Learn.},
pages={993--1000},
year={2009}
}
@inproceedings{sutton2008convergent,
title={A Convergent $ O (n) $ Temporal-difference Algorithm for Off-policy Learning with Linear Function Approximation},
author={Sutton, Richard S and Maei, Hamid R and Szepesv{\'a}ri, Csaba},
booktitle={Advances in Neural Information Processing Systems},
publisher={Cambridge, MA: MIT Press},
pages={1609--1616},
year={2008}
}
@inproceedings{dabney2014natural,
title={Natural Temporal Difference Learning},
author={Dabney, William and Thomas, Philip},
booktitle={Twenty-Eighth AAAI Conference on Artificial Intelligence},
year={2014}
}
@inproceedings{mahmood2014weighted,
title={Weighted importance sampling for off-policy learning with linear function approximation},
author={Mahmood, A Rupam and van Hasselt, Hado P and Sutton, Richard S},
booktitle={Advances in Neural Information Processing Systems},
publisher={Cambridge, MA: MIT Press},
pages={3014--3022},
year={2014}
}
@inproceedings{seijen2014true,
title={True Online TD ($\lambda$)},
author={Seijen, Harm V and Sutton, Rich},
booktitle={Proc. 31st Int. Conf. Mach. Learn.},
pages={692--700},
year={2014}
}
@article{ormoneit2002kernel,
title={{Kernel-based reinforcement learning}},
author={Ormoneit, D. and Sen, {\'S}.},
journal={Mach. Learn.},
volume={49},
number={2-3},
pages={161--178},
issn={0885-6125},
year={2002},
publisher={Springer-Verlag },
address = {Hingham, MA, USA},
}
@inproceedings{Ghavamzadeh2010lstd,
author = {M. Ghavamzadeh and A. Lazaric and O. A. Maillard and R. Munos},
title = {{LSTD} with Random Projections},
BOOKTITLE={Advances in Neural Information Processing Systems},
publisher={Cambridge, MA: MIT Press},
volume = {23},
pages = {721--729},
Address = {Lake Tahoe, Nevada, USA},
year = {2010}
}
@inproceedings{loth2007sparse,
title={Sparse temporal difference learning using LASSO},
author={Loth, M. and Davy, M. and Preux, P.},
booktitle={Proc. IEEE Symp. Approx. Dynamic Program. Reinforce. Learn.},
pages={352--359},
year={2007},
organization={IEEE}
}
@inproceedings{kolter2009regularization,
title={Regularization and feature selection in least-squares temporal difference learning},
author={Kolter, J.Z. and Ng, A.Y.},
booktitle={Proc. 26th Int. Conf. Mach. Learn.},
pages={521--528},
year={2009},
organization={ACM}
}
@inproceedings{hoffman2011regularized,
title={Regularized least squares temporal difference learning with nested l2 and l1 penalization},
author={Hoffman, M.W. and Lazaric, A. and Ghavamzadeh, M. and Munos, R.},
booktitle={Proc. Eur. Workshop Reinforce. Learn.},
year={2011}
}
@inproceedings{Ghavamzadeh2011finite,
author = {M. Ghavamzadeh and A. Lazaric and R. Munos and M. Hoffman},
title = {Finite-Sample Analysis of {Lasso-TD}},
booktitle = {Proc. 28th Int. Conf. Mach. Learn.},
year = {2011},
month= {June},
address={Bellevue, Washington, USA},
pages={1177--1184},
}
@inproceedings{johnson2013accelerating,
title={Accelerating stochastic gradient descent using predictive variance reduction},
author={Johnson, R. and Zhang, T.},
booktitle={Advances in Neural Information Processing Systems},
pages={315--323},
year={2013}
}
@article{xu2020reanalysis,
title={Reanalysis of variance reduced temporal difference learning},
author={Xu, T. and Wang, Z. and Zhou, Y. and Liang, Y.},
journal={arXiv preprint arXiv:2001.01898},
year={2020}
}
@inproceedings{schulman2015trust,
title={Trust region policy optimization},
author={Schulman, J. and Levine, S. and Abbeel, P. and Jordan, M. and Moritz, P.},
booktitle={International Conference on Machine Learning},
pages={1889--1897},
year={2015}
}
@article{schulman2017proximal,
title={Proximal policy optimization algorithms},
author={Schulman, J. and Wolski, F. and Dhariwal, P. and Radford, A. and Klimov, O.},
journal={arXiv preprint arXiv:1707.06347},
year={2017}
}
@inproceedings{defazio2014saga,
title={SAGA: A fast incremental gradient method with support for non-strongly convex composite objectives},
author={Defazio, A. and Bach, F. and Lacoste-Julien, S.},
booktitle={Advances in Neural Information Processing Systems},
pages={1646--1654},
year={2014}
}
@inproceedings{du2017stochastic,
title={Stochastic variance reduction methods for policy evaluation},
author={Du, S. S. and Chen, J. and Li, L. and Xiao, L. and Zhou, D.},
booktitle={Proceedings of the 34th International Conference on Machine Learning},
pages={1049--1058},
year={2017}
}
@inproceedings{chen2023modified,
title={Modified Retrace for Off-Policy Temporal Difference Learning},
author={Chen, Xingguo and Ma, Xingzhou and Li, Yang and Yang, Guang and Yang, Shangdong and Gao, Yang},
booktitle={Uncertainty in Artificial Intelligence},
pages={303--312},
year={2023},
organization={PMLR}
}
@article{dalal2017finite,
title={Finite Sample Analyses for TD(0) with Function Approximation},
author={Dalal, Gal and Szörényi, Balázs and Thoppe, Gugan and Mannor, Shie},
journal={arXiv preprint arXiv:1704.01161},
year={2017}
}
@article{sutton1988learning,
title={Learning to predict by the methods of temporal differences},
author={Sutton, Richard S},
journal={Machine learning},
volume={3},
number={1},
pages={9--44},
year={1988},
publisher={Springer}
}
@inproceedings{tsitsiklis1997analysis,
title={Analysis of temporal-diffference learning with function approximation},
author={Tsitsiklis, John N and Van Roy, Benjamin},
booktitle={Advances in Neural Information Processing Systems},
pages={1075--1081},
year={1997}
}
@article{sutton2016emphatic,
title={An emphatic approach to the problem of off-policy temporal-difference learning},
author={Sutton, Richard S and Mahmood, A Rupam and White, Martha},
journal={The Journal of Machine Learning Research},
volume={17},
number={1},
pages={2603--2631},
year={2016},
publisher={JMLR. org}
}
@inproceedings{liu2015finite,
title={Finite-sample analysis of proximal gradient TD algorithms},
author={Liu, Bo and Liu, Ji and Ghavamzadeh, Mohammad and Mahadevan, Sridhar and Petrik, Marek},
booktitle={Proceedings of the 21st Conference on Uncertainty in Artificial Intelligence},
pages={504--513},
year={2015}
}
@inproceedings{liu2016proximal,
title={Proximal Gradient Temporal Difference Learning Algorithms.},
author={Liu, Bo and Liu, Ji and Ghavamzadeh, Mohammad and Mahadevan, Sridhar and Petrik, Marek},
booktitle={Proceedings of the International Joint Conference on Artificial Intelligence},
pages={4195--4199},
year={2016}
}
@article{liu2018proximal,
title={Proximal gradient temporal difference learning: Stable reinforcement learning with polynomial sample complexity},
author={Liu, Bo and Gemp, Ian and Ghavamzadeh, Mohammad and Liu, Ji and Mahadevan, Sridhar and Petrik, Marek},
journal={Journal of Artificial Intelligence Research},
volume={63},
pages={461--494},
year={2018}
}
@inproceedings{givchi2015quasi,
title={Quasi newton temporal difference learning},
author={Givchi, Arash and Palhang, Maziar},
booktitle={Asian Conference on Machine Learning},
pages={159--172},
year={2015}
}
@inproceedings{pan2017accelerated,
title={Accelerated gradient temporal difference learning},
author={Pan, Yangchen and White, Adam and White, Martha},
booktitle={Proceedings of the 21st AAAI Conference on Artificial Intelligence},
pages={2464--2470},
year={2017}
}
@inproceedings{hallak2016generalized,
title={Generalized emphatic temporal difference learning: bias-variance analysis},
author={Hallak, Assaf and Tamar, Aviv and Munos, Remi and Mannor, Shie},
booktitle={Proceedings of the 30th AAAI Conference on Artificial Intelligence},
pages={1631--1637},
year={2016}
}
@article{zhang2022truncated,
title={Truncated emphatic temporal difference methods for prediction and control},
author={Zhang, Shangtong and Whiteson, Shimon},
journal={The Journal of Machine Learning Research},
volume={23},
number={1},
pages={6859--6917},
year={2022},
publisher={JMLRORG}
}
@inproceedings{korda2015td,
title={On TD (0) with function approximation: Concentration bounds and a centered variant with exponential convergence},
author={Korda, Nathaniel and La, Prashanth},
booktitle={International conference on machine learning},
pages={626--634},
year={2015},
organization={PMLR}
}
@book{zhou2021machine,
title={Machine learning},
author={Zhou, Zhi-Hua},
year={2021},
publisher={Springer Nature}
}
@inproceedings{dalal2020tale,
title={A tale of two-timescale reinforcement learning with the tightest finite-time bound},
author={Dalal, Gal and Szorenyi, Balazs and Thoppe, Gugan},
booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
volume={34},
number={04},
pages={3701-3708},
year={2020}
}
@inproceedings{feng2019kernel,
title={A kernel loss for solving the Bellman equation},
author={Feng, Yihao and Li, Lihong and Liu, Qiang},
booktitle={Advances in Neural Information Processing Systems},
pages={15430--15441},
year={2019}
}
@inproceedings{basserrano2021logistic,
title={Logistic Q-Learning},
author={Bas-Serrano, Joan and Curi, Sebastian and Krause, Andreas and Neu, Gergely},
booktitle={International Conference on Artificial Intelligence and Statistics},
pages={3610--3618},
year={2021}
}
This is BibTeX, Version 0.99d (TeX Live 2023)
Capacity: max_strings=200000, hash_size=200000, hash_prime=170003
The top-level auxiliary file: example_paper.aux
The style file: icml2024.bst
Database file #1: example_paper.bib
Warning--can't use both volume and number fields in dalal2020tale
You've used 34 entries,
2773 wiz_defined-function locations,
790 strings with 10645 characters,
and the built_in function-call counts, 19097 in all, are:
= -- 1771
> -- 980
< -- 39
+ -- 332
- -- 298
* -- 1422
:= -- 2705
add.period$ -- 106
call.type$ -- 34
change.case$ -- 197
chr.to.int$ -- 34
cite$ -- 69
duplicate$ -- 950
empty$ -- 1596
format.name$ -- 349
if$ -- 4208
int.to.chr$ -- 1
int.to.str$ -- 1
missing$ -- 34
newline$ -- 178
num.names$ -- 138
pop$ -- 445
preamble$ -- 1
purify$ -- 166
quote$ -- 0
skip$ -- 806
stack$ -- 0
substring$ -- 965
swap$ -- 257
text.length$ -- 22
text.prefix$ -- 0
top$ -- 0
type$ -- 364
warning$ -- 1
while$ -- 176
width$ -- 0
write$ -- 452
(There was 1 warning)
This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) (preloaded format=pdflatex 2023.3.31) 29 JAN 2024 15:09
entering extended mode
restricted \write18 enabled.
file:line:error style messages enabled.
%&-line parsing enabled.
**example_paper
(./example_paper.tex
LaTeX2e <2022-11-01> patch level 1
L3 programming layer <2023-02-22> (d:/software/texlive/2023/texmf-dist/tex/latex/base/article.cls
Document Class: article 2022/07/02 v1.4n Standard LaTeX document class
(d:/software/texlive/2023/texmf-dist/tex/latex/base/size10.clo
File: size10.clo 2022/07/02 v1.4n Standard LaTeX file (size option)
)
\c@part=\count185
\c@section=\count186
\c@subsection=\count187
\c@subsubsection=\count188
\c@paragraph=\count189
\c@subparagraph=\count190
\c@figure=\count191
\c@table=\count192
\abovecaptionskip=\skip48
\belowcaptionskip=\skip49
\bibindent=\dimen140
) (d:/software/texlive/2023/texmf-dist/tex/latex/microtype/microtype.sty
Package: microtype 2023/03/13 v3.1a Micro-typographical refinements (RS)
(d:/software/texlive/2023/texmf-dist/tex/latex/graphics/keyval.sty
Package: keyval 2022/05/29 v1.15 key=value parser (DPC)
\KV@toks@=\toks16
) (d:/software/texlive/2023/texmf-dist/tex/latex/etoolbox/etoolbox.sty
Package: etoolbox 2020/10/05 v2.5k e-TeX tools for LaTeX (JAW)
\etb@tempcnta=\count193
)
\MT@toks=\toks17
\MT@tempbox=\box51
\MT@count=\count194
LaTeX Info: Redefining \noprotrusionifhmode on input line 1059.
LaTeX Info: Redefining \leftprotrusion on input line 1060.
\MT@prot@toks=\toks18
LaTeX Info: Redefining \rightprotrusion on input line 1078.
LaTeX Info: Redefining \textls on input line 1368.
\MT@outer@kern=\dimen141
LaTeX Info: Redefining \textmicrotypecontext on input line 1988.
\MT@listname@count=\count195
(d:/software/texlive/2023/texmf-dist/tex/latex/microtype/microtype-pdftex.def
File: microtype-pdftex.def 2023/03/13 v3.1a Definitions specific to pdftex (RS)
LaTeX Info: Redefining \lsstyle on input line 902.
LaTeX Info: Redefining \lslig on input line 902.
\MT@outer@space=\skip50
)
Package microtype Info: Loading configuration file microtype.cfg.
(d:/software/texlive/2023/texmf-dist/tex/latex/microtype/microtype.cfg
File: microtype.cfg 2023/03/13 v3.1a microtype main configuration file (RS)
)) (d:/software/texlive/2023/texmf-dist/tex/latex/graphics/graphicx.sty
Package: graphicx 2021/09/16 v1.2d Enhanced LaTeX Graphics (DPC,SPQR)
(d:/software/texlive/2023/texmf-dist/tex/latex/graphics/graphics.sty
Package: graphics 2022/03/10 v1.4e Standard LaTeX Graphics (DPC,SPQR)
(d:/software/texlive/2023/texmf-dist/tex/latex/graphics/trig.sty
Package: trig 2021/08/11 v1.11 sin cos tan (DPC)
) (d:/software/texlive/2023/texmf-dist/tex/latex/graphics-cfg/graphics.cfg
File: graphics.cfg 2016/06/04 v1.11 sample graphics configuration
)
Package graphics Info: Driver file: pdftex.def on input line 107.
(d:/software/texlive/2023/texmf-dist/tex/latex/graphics-def/pdftex.def
File: pdftex.def 2022/09/22 v1.2b Graphics/color driver for pdftex
))
\Gin@req@height=\dimen142
\Gin@req@width=\dimen143
) (d:/software/texlive/2023/texmf-dist/tex/latex/subfigure/subfigure.sty
Package: subfigure 2002/03/15 v2.1.5 subfigure package
\subfigtopskip=\skip51
\subfigcapskip=\skip52
\subfigcaptopadj=\dimen144
\subfigbottomskip=\skip53
\subfigcapmargin=\dimen145
\subfiglabelskip=\skip54
\c@subfigure=\count196
\c@subtable=\count197
****************************************
* Local config file subfigure.cfg used *
****************************************
(d:/software/texlive/2023/texmf-dist/tex/latex/subfigure/subfigure.cfg)
\subfig@top=\skip55
\subfig@bottom=\skip56
) (d:/software/texlive/2023/texmf-dist/tex/latex/diagbox/diagbox.sty
Package: diagbox 2020/02/09 v2.3 Making table heads with diagonal lines
(d:/software/texlive/2023/texmf-dist/tex/latex/pict2e/pict2e.sty
Package: pict2e 2020/09/30 v0.4b Improved picture commands (HjG,RN,JT)
(d:/software/texlive/2023/texmf-dist/tex/latex/pict2e/pict2e.cfg
File: pict2e.cfg 2016/02/05 v0.1u pict2e configuration for teTeX/TeXLive
)
Package pict2e Info: Driver file: pdftex.def on input line 112.
Package pict2e Info: Driver file for pict2e: p2e-pdftex.def on input line 114.
(d:/software/texlive/2023/texmf-dist/tex/latex/pict2e/p2e-pdftex.def
File: p2e-pdftex.def 2016/02/05 v0.1u Driver-dependant file (RN,HjG,JT)
)
\pIIe@GRAPH=\toks19
\@arclen=\dimen146
\@arcrad=\dimen147
\pIIe@tempdima=\dimen148
\pIIe@tempdimb=\dimen149
\pIIe@tempdimc=\dimen150
\pIIe@tempdimd=\dimen151
\pIIe@tempdime=\dimen152
\pIIe@tempdimf=\dimen153
) (d:/software/texlive/2023/texmf-dist/tex/latex/tools/calc.sty
Package: calc 2017/05/25 v4.3 Infix arithmetic (KKT,FJ)
\calc@Acount=\count198
\calc@Bcount=\count199
\calc@Adimen=\dimen154
\calc@Bdimen=\dimen155
\calc@Askip=\skip57
\calc@Bskip=\skip58
LaTeX Info: Redefining \setlength on input line 80.
LaTeX Info: Redefining \addtolength on input line 81.
\calc@Ccount=\count266
\calc@Cskip=\skip59
) (d:/software/texlive/2023/texmf-dist/tex/latex/tools/array.sty
Package: array 2022/09/04 v2.5g Tabular extension package (FMi)
\col@sep=\dimen156
\ar@mcellbox=\box52
\extrarowheight=\dimen157
\NC@list=\toks20
\extratabsurround=\skip60
\backup@length=\skip61
\ar@cellbox=\box53
)
\diagbox@boxa=\box54
\diagbox@boxb=\box55
\diagbox@boxm=\box56
\diagbox@wd=\dimen158
\diagbox@ht=\dimen159
\diagbox@insepl=\dimen160
\diagbox@insepr=\dimen161
\diagbox@outsepl=\dimen162
\diagbox@outsepr=\dimen163
) (d:/software/texlive/2023/texmf-dist/tex/latex/wrapfig/wrapfig.sty
\wrapoverhang=\dimen164
\WF@size=\dimen165
\c@WF@wrappedlines=\count267
\WF@box=\box57
\WF@everypar=\toks21
Package: wrapfig 2003/01/31 v 3.6
) (d:/software/texlive/2023/texmf-dist/tex/latex/booktabs/booktabs.sty
Package: booktabs 2020/01/12 v1.61803398 Publication quality tables
\heavyrulewidth=\dimen166
\lightrulewidth=\dimen167
\cmidrulewidth=\dimen168
\belowrulesep=\dimen169
\belowbottomsep=\dimen170
\aboverulesep=\dimen171
\abovetopsep=\dimen172
\cmidrulesep=\dimen173
\cmidrulekern=\dimen174
\defaultaddspace=\dimen175
\@cmidla=\count268
\@cmidlb=\count269
\@aboverulesep=\dimen176
\@belowrulesep=\dimen177
\@thisruleclass=\count270
\@lastruleclass=\count271
\@thisrulewidth=\dimen178
) (d:/software/texlive/2023/texmf-dist/tex/latex/hyperref/hyperref.sty
Package: hyperref 2023-02-07 v7.00v Hypertext links for LaTeX
(d:/software/texlive/2023/texmf-dist/tex/generic/ltxcmds/ltxcmds.sty
Package: ltxcmds 2020-05-10 v1.25 LaTeX kernel commands for general use (HO)
) (d:/software/texlive/2023/texmf-dist/tex/generic/iftex/iftex.sty
Package: iftex 2022/02/03 v1.0f TeX engine tests
) (d:/software/texlive/2023/texmf-dist/tex/generic/pdftexcmds/pdftexcmds.sty
Package: pdftexcmds 2020-06-27 v0.33 Utility functions of pdfTeX for LuaTeX (HO)
(d:/software/texlive/2023/texmf-dist/tex/generic/infwarerr/infwarerr.sty
Package: infwarerr 2019/12/03 v1.5 Providing info/warning/error messages (HO)
)
Package pdftexcmds Info: \pdf@primitive is available.
Package pdftexcmds Info: \pdf@ifprimitive is available.
Package pdftexcmds Info: \pdfdraftmode found.
) (d:/software/texlive/2023/texmf-dist/tex/latex/kvsetkeys/kvsetkeys.sty
Package: kvsetkeys 2022-10-05 v1.19 Key value parser (HO)
) (d:/software/texlive/2023/texmf-dist/tex/generic/kvdefinekeys/kvdefinekeys.sty
Package: kvdefinekeys 2019-12-19 v1.6 Define keys (HO)
) (d:/software/texlive/2023/texmf-dist/tex/generic/pdfescape/pdfescape.sty
Package: pdfescape 2019/12/09 v1.15 Implements pdfTeX's escape features (HO)
) (d:/software/texlive/2023/texmf-dist/tex/latex/hycolor/hycolor.sty
Package: hycolor 2020-01-27 v1.10 Color options for hyperref/bookmark (HO)
) (d:/software/texlive/2023/texmf-dist/tex/latex/letltxmacro/letltxmacro.sty
Package: letltxmacro 2019/12/03 v1.6 Let assignment for LaTeX macros (HO)
) (d:/software/texlive/2023/texmf-dist/tex/latex/auxhook/auxhook.sty
Package: auxhook 2019-12-17 v1.6 Hooks for auxiliary files (HO)
) (d:/software/texlive/2023/texmf-dist/tex/latex/hyperref/nameref.sty
Package: nameref 2022-05-17 v2.50 Cross-referencing by name of section
(d:/software/texlive/2023/texmf-dist/tex/latex/refcount/refcount.sty
Package: refcount 2019/12/15 v3.6 Data extraction from label references (HO)
) (d:/software/texlive/2023/texmf-dist/tex/generic/gettitlestring/gettitlestring.sty
Package: gettitlestring 2019/12/15 v1.6 Cleanup title references (HO)
(d:/software/texlive/2023/texmf-dist/tex/latex/kvoptions/kvoptions.sty
Package: kvoptions 2022-06-15 v3.15 Key value format for package options (HO)
))
\c@section@level=\count272
)
\@linkdim=\dimen179
\Hy@linkcounter=\count273
\Hy@pagecounter=\count274
(d:/software/texlive/2023/texmf-dist/tex/latex/hyperref/pd1enc.def
File: pd1enc.def 2023-02-07 v7.00v Hyperref: PDFDocEncoding definition (HO)
Now handling font encoding PD1 ...
... no UTF-8 mapping file for font encoding PD1
) (d:/software/texlive/2023/texmf-dist/tex/generic/intcalc/intcalc.sty
Package: intcalc 2019/12/15 v1.3 Expandable calculations with integers (HO)
) (d:/software/texlive/2023/texmf-dist/tex/generic/etexcmds/etexcmds.sty
Package: etexcmds 2019/12/15 v1.7 Avoid name clashes with e-TeX commands (HO)
)
\Hy@SavedSpaceFactor=\count275
(d:/software/texlive/2023/texmf-dist/tex/latex/hyperref/puenc.def
File: puenc.def 2023-02-07 v7.00v Hyperref: PDF Unicode definition (HO)
Now handling font encoding PU ...
... no UTF-8 mapping file for font encoding PU
)
Package hyperref Info: Hyper figures OFF on input line 4177.
Package hyperref Info: Link nesting OFF on input line 4182.
Package hyperref Info: Hyper index ON on input line 4185.
Package hyperref Info: Plain pages OFF on input line 4192.
Package hyperref Info: Backreferencing OFF on input line 4197.
Package hyperref Info: Implicit mode ON; LaTeX internals redefined.
Package hyperref Info: Bookmarks ON on input line 4425.
\c@Hy@tempcnt=\count276
(d:/software/texlive/2023/texmf-dist/tex/latex/url/url.sty
\Urlmuskip=\muskip16
Package: url 2013/09/16 ver 3.4 Verb mode for urls, etc.
)
LaTeX Info: Redefining \url on input line 4763.
\XeTeXLinkMargin=\dimen180
(d:/software/texlive/2023/texmf-dist/tex/generic/bitset/bitset.sty
Package: bitset 2019/12/09 v1.3 Handle bit-vector datatype (HO)
(d:/software/texlive/2023/texmf-dist/tex/generic/bigintcalc/bigintcalc.sty
Package: bigintcalc 2019/12/15 v1.5 Expandable calculations on big integers (HO)
))
\Fld@menulength=\count277
\Field@Width=\dimen181
\Fld@charsize=\dimen182
Package hyperref Info: Hyper figures OFF on input line 6042.
Package hyperref Info: Link nesting OFF on input line 6047.
Package hyperref Info: Hyper index ON on input line 6050.
Package hyperref Info: backreferencing OFF on input line 6057.
Package hyperref Info: Link coloring OFF on input line 6062.
Package hyperref Info: Link coloring with OCG OFF on input line 6067.
Package hyperref Info: PDF/A mode OFF on input line 6072.
(d:/software/texlive/2023/texmf-dist/tex/latex/base/atbegshi-ltx.sty
Package: atbegshi-ltx 2021/01/10 v1.0c Emulation of the original atbegshi
package with kernel methods
)
\Hy@abspage=\count278
\c@Item=\count279
\c@Hfootnote=\count280
)
Package hyperref Info: Driver (autodetected): hpdftex.
(d:/software/texlive/2023/texmf-dist/tex/latex/hyperref/hpdftex.def
File: hpdftex.def 2023-02-07 v7.00v Hyperref driver for pdfTeX
(d:/software/texlive/2023/texmf-dist/tex/latex/base/atveryend-ltx.sty
Package: atveryend-ltx 2020/08/19 v1.0a Emulation of the original atveryend package
with kernel methods
)
\Fld@listcount=\count281
\c@bookmark@seq@number=\count282
(d:/software/texlive/2023/texmf-dist/tex/latex/rerunfilecheck/rerunfilecheck.sty
Package: rerunfilecheck 2022-07-10 v1.10 Rerun checks for auxiliary files (HO)
(d:/software/texlive/2023/texmf-dist/tex/generic/uniquecounter/uniquecounter.sty
Package: uniquecounter 2019/12/15 v1.4 Provide unlimited unique counter (HO)
)
Package uniquecounter Info: New unique counter `rerunfilecheck' on input line 285.
)
\Hy@SectionHShift=\skip62
) (./icml2024.sty
Package: icml2024 2023/11/23 v2.0 ICML Conference Style File
(d:/software/texlive/2023/texmf-dist/tex/latex/psnfss/times.sty
Package: times 2020/03/25 PSNFSS-v9.3 (SPQR)
) (./fancyhdr.sty
\fancy@headwidth=\skip63
\f@ncyO@elh=\skip64
\f@ncyO@erh=\skip65
\f@ncyO@olh=\skip66
\f@ncyO@orh=\skip67
\f@ncyO@elf=\skip68
\f@ncyO@erf=\skip69
\f@ncyO@olf=\skip70
\f@ncyO@orf=\skip71
) (d:/software/texlive/2023/texmf-dist/tex/latex/xcolor/xcolor.sty
Package: xcolor 2022/06/12 v2.14 LaTeX color extensions (UK)
(d:/software/texlive/2023/texmf-dist/tex/latex/graphics-cfg/color.cfg
File: color.cfg 2016/01/02 v1.6 sample color configuration
)
Package xcolor Info: Driver file: pdftex.def on input line 227.
(d:/software/texlive/2023/texmf-dist/tex/latex/graphics/mathcolor.ltx)
Package xcolor Info: Model `cmy' substituted by `cmy0' on input line 1353.
Package xcolor Info: Model `hsb' substituted by `rgb' on input line 1357.
Package xcolor Info: Model `RGB' extended on input line 1369.
Package xcolor Info: Model `HTML' substituted by `rgb' on input line 1371.
Package xcolor Info: Model `Hsb' substituted by `hsb' on input line 1372.
Package xcolor Info: Model `tHsb' substituted by `hsb' on input line 1373.
Package xcolor Info: Model `HSB' substituted by `hsb' on input line 1374.
Package xcolor Info: Model `Gray' substituted by `gray' on input line 1375.
Package xcolor Info: Model `wave' substituted by `hsb' on input line 1376.
) (./algorithm.sty
Package: algorithm
Document Style `algorithm' - floating environment
(d:/software/texlive/2023/texmf-dist/tex/latex/float/float.sty
Package: float 2001/11/08 v1.3d Float enhancements (AL)
\c@float@type=\count283
\float@exts=\toks22
\float@box=\box58
\@float@everytoks=\toks23
\@floatcapt=\box59
) (d:/software/texlive/2023/texmf-dist/tex/latex/base/ifthen.sty
Package: ifthen 2022/04/13 v1.1d Standard LaTeX ifthen package (DPC)
)
\@float@every@algorithm=\toks24
\c@algorithm=\count284
) (./algorithmic.sty
Package: algorithmic
Document Style `algorithmic' - environment
\c@ALC@line=\count285
\c@ALC@rem=\count286
\ALC@tlm=\skip72
) (d:/software/texlive/2023/texmf-dist/tex/latex/natbib/natbib.sty
Package: natbib 2010/09/13 8.31b (PWD, AO)
\bibhang=\skip73
\bibsep=\skip74
LaTeX Info: Redefining \cite on input line 694.
\c@NAT@ctr=\count287
) (d:/software/texlive/2023/texmf-dist/tex/latex/eso-pic/eso-pic.sty
Package: eso-pic 2020/10/14 v3.0a eso-pic (RN)
\ESO@tempdima=\dimen183
\ESO@tempdimb=\dimen184
) (d:/software/texlive/2023/texmf-dist/tex/latex/forloop/forloop.sty
Package: forloop 2006/09/18 v3.0 For Loops for LaTeX
)
Package hyperref Info: Option `colorlinks' set `true' on input line 151.
\titrun=\box60
\c@@affiliationcounter=\count288
\c@@affilnum=\count289
\newcaptionbox=\box61
\newcaptionboxwid=\dimen185
\icmlrulerbox=\box62
\icmlrulercount=\count290
\icmlruleroffset=\dimen186
\cv@lineheight=\dimen187
\cv@boxheight=\dimen188
\cv@tmpbox=\box63
\cv@refno=\count291
\cv@tot=\count292
\cv@tmpc@=\count293
\cv@tmpc=\count294
) (d:/software/texlive/2023/texmf-dist/tex/latex/amsmath/amsmath.sty
Package: amsmath 2022/04/08 v2.17n AMS math features
\@mathmargin=\skip75
For additional information on amsmath, use the `?' option.
(d:/software/texlive/2023/texmf-dist/tex/latex/amsmath/amstext.sty
Package: amstext 2021/08/26 v2.01 AMS text
(d:/software/texlive/2023/texmf-dist/tex/latex/amsmath/amsgen.sty
File: amsgen.sty 1999/11/30 v2.0 generic functions
\@emptytoks=\toks25
\ex@=\dimen189
)) (d:/software/texlive/2023/texmf-dist/tex/latex/amsmath/amsbsy.sty
Package: amsbsy 1999/11/29 v1.2d Bold Symbols
\pmbraise@=\dimen190
) (d:/software/texlive/2023/texmf-dist/tex/latex/amsmath/amsopn.sty
Package: amsopn 2022/04/08 v2.04 operator names
)
\inf@bad=\count295
LaTeX Info: Redefining \frac on input line 234.
\uproot@=\count296
\leftroot@=\count297
LaTeX Info: Redefining \overline on input line 399.
LaTeX Info: Redefining \colon on input line 410.
\classnum@=\count298
\DOTSCASE@=\count299
LaTeX Info: Redefining \ldots on input line 496.
LaTeX Info: Redefining \dots on input line 499.
LaTeX Info: Redefining \cdots on input line 620.
\Mathstrutbox@=\box64
\strutbox@=\box65
LaTeX Info: Redefining \big on input line 722.
LaTeX Info: Redefining \Big on input line 723.
LaTeX Info: Redefining \bigg on input line 724.
LaTeX Info: Redefining \Bigg on input line 725.
\big@size=\dimen191
LaTeX Font Info: Redeclaring font encoding OML on input line 743.
LaTeX Font Info: Redeclaring font encoding OMS on input line 744.
\macc@depth=\count300
LaTeX Info: Redefining \bmod on input line 905.
LaTeX Info: Redefining \pmod on input line 910.
LaTeX Info: Redefining \smash on input line 940.
LaTeX Info: Redefining \relbar on input line 970.
LaTeX Info: Redefining \Relbar on input line 971.
\c@MaxMatrixCols=\count301
\dotsspace@=\muskip17
\c@parentequation=\count302
\dspbrk@lvl=\count303
\tag@help=\toks26
\row@=\count304
\column@=\count305
\maxfields@=\count306
\andhelp@=\toks27
\eqnshift@=\dimen192
\alignsep@=\dimen193
\tagshift@=\dimen194
\tagwidth@=\dimen195
\totwidth@=\dimen196
\lineht@=\dimen197
\@envbody=\toks28
\multlinegap=\skip76
\multlinetaggap=\skip77
\mathdisplay@stack=\toks29
LaTeX Info: Redefining \[ on input line 2953.
LaTeX Info: Redefining \] on input line 2954.
) (d:/software/texlive/2023/texmf-dist/tex/latex/amsfonts/amssymb.sty
Package: amssymb 2013/01/14 v3.01 AMS font symbols
(d:/software/texlive/2023/texmf-dist/tex/latex/amsfonts/amsfonts.sty
Package: amsfonts 2013/01/14 v3.01 Basic AMSFonts support
\symAMSa=\mathgroup4
\symAMSb=\mathgroup5
LaTeX Font Info: Redeclaring math symbol \hbar on input line 98.
LaTeX Font Info: Overwriting math alphabet `\mathfrak' in version `bold'
(Font) U/euf/m/n --> U/euf/b/n on input line 106.
)) (d:/software/texlive/2023/texmf-dist/tex/latex/mathtools/mathtools.sty
Package: mathtools 2022/06/29 v1.29 mathematical typesetting tools
(d:/software/texlive/2023/texmf-dist/tex/latex/mathtools/mhsetup.sty
Package: mhsetup 2021/03/18 v1.4 programming setup (MH)
)
\g_MT_multlinerow_int=\count307
\l_MT_multwidth_dim=\dimen198
\origjot=\skip78
\l_MT_shortvdotswithinadjustabove_dim=\dimen199
\l_MT_shortvdotswithinadjustbelow_dim=\dimen256
\l_MT_above_intertext_sep=\dimen257
\l_MT_below_intertext_sep=\dimen258
\l_MT_above_shortintertext_sep=\dimen259
\l_MT_below_shortintertext_sep=\dimen260
\xmathstrut@box=\box66
\xmathstrut@dim=\dimen261
) (d:/software/texlive/2023/texmf-dist/tex/latex/amscls/amsthm.sty
Package: amsthm 2020/05/29 v2.20.6
\thm@style=\toks30
\thm@bodyfont=\toks31
\thm@headfont=\toks32
\thm@notefont=\toks33
\thm@headpunct=\toks34
\thm@preskip=\skip79
\thm@postskip=\skip80
\thm@headsep=\skip81
\dth@everypar=\toks35
) (d:/software/texlive/2023/texmf-dist/tex/latex/cleveref/cleveref.sty
Package: cleveref 2018/03/27 v0.21.4 Intelligent cross-referencing
Package cleveref Info: `hyperref' support loaded on input line 2370.
LaTeX Info: Redefining \cref on input line 2370.
LaTeX Info: Redefining \Cref on input line 2370.
LaTeX Info: Redefining \crefrange on input line 2370.
LaTeX Info: Redefining \Crefrange on input line 2370.
LaTeX Info: Redefining \cpageref on input line 2370.
LaTeX Info: Redefining \Cpageref on input line 2370.
LaTeX Info: Redefining \cpagerefrange on input line 2370.
LaTeX Info: Redefining \Cpagerefrange on input line 2370.
LaTeX Info: Redefining \labelcref on input line 2370.
LaTeX Info: Redefining \labelcpageref on input line 2370.
Package cleveref Info: `amsthm' support loaded on input line 3026.
Package cleveref Info: always capitalise cross-reference names on input line 7830.
Package cleveref Info: always capitalise cross-reference names on input line 7852.
Package cleveref Info: no abbreviation of names on input line 7852.
)
\c@theorem=\count308
(d:/software/texlive/2023/texmf-dist/tex/latex/todonotes/todonotes.sty
Package: todonotes 2023/01/31 v1.1.6 Todonotes source and documentation.
Package: todonotes 2023/01/31
(d:/software/texlive/2023/texmf-dist/tex/latex/xkeyval/xkeyval.sty
Package: xkeyval 2022/06/16 v2.9 package option processing (HA)
(d:/software/texlive/2023/texmf-dist/tex/generic/xkeyval/xkeyval.tex (d:/software/texlive/2023/texmf-dist/tex/generic/xkeyval/xkvutils.tex
\XKV@toks=\toks36
\XKV@tempa@toks=\toks37
)
\XKV@depth=\count309
File: xkeyval.tex 2014/12/03 v2.7a key=value parser (HA)
)) (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/frontendlayer/tikz.sty (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/basiclayer/pgf.sty (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/utilities/pgfrcs.sty (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/utilities/pgfutil-common.tex
\pgfutil@everybye=\toks38
\pgfutil@tempdima=\dimen262
\pgfutil@tempdimb=\dimen263
) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/utilities/pgfutil-latex.def
\pgfutil@abb=\box67
) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/utilities/pgfrcs.code.tex (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/pgf.revision.tex)
Package: pgfrcs 2023-01-15 v3.1.10 (3.1.10)
))
Package: pgf 2023-01-15 v3.1.10 (3.1.10)
(d:/software/texlive/2023/texmf-dist/tex/latex/pgf/basiclayer/pgfcore.sty (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/systemlayer/pgfsys.sty (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/systemlayer/pgfsys.code.tex
Package: pgfsys 2023-01-15 v3.1.10 (3.1.10)
(d:/software/texlive/2023/texmf-dist/tex/generic/pgf/utilities/pgfkeys.code.tex
\pgfkeys@pathtoks=\toks39
\pgfkeys@temptoks=\toks40
(d:/software/texlive/2023/texmf-dist/tex/generic/pgf/utilities/pgfkeyslibraryfiltered.code.tex
\pgfkeys@tmptoks=\toks41
))
\pgf@x=\dimen264
\pgf@y=\dimen265
\pgf@xa=\dimen266
\pgf@ya=\dimen267
\pgf@xb=\dimen268
\pgf@yb=\dimen269
\pgf@xc=\dimen270
\pgf@yc=\dimen271
\pgf@xd=\dimen272
\pgf@yd=\dimen273
\w@pgf@writea=\write3
\r@pgf@reada=\read2
\c@pgf@counta=\count310
\c@pgf@countb=\count311
\c@pgf@countc=\count312
\c@pgf@countd=\count313
\t@pgf@toka=\toks42
\t@pgf@tokb=\toks43
\t@pgf@tokc=\toks44
\pgf@sys@id@count=\count314
(d:/software/texlive/2023/texmf-dist/tex/generic/pgf/systemlayer/pgf.cfg
File: pgf.cfg 2023-01-15 v3.1.10 (3.1.10)
)
Driver file for pgf: pgfsys-pdftex.def
(d:/software/texlive/2023/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-pdftex.def
File: pgfsys-pdftex.def 2023-01-15 v3.1.10 (3.1.10)
(d:/software/texlive/2023/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-common-pdf.def
File: pgfsys-common-pdf.def 2023-01-15 v3.1.10 (3.1.10)
))) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/systemlayer/pgfsyssoftpath.code.tex
File: pgfsyssoftpath.code.tex 2023-01-15 v3.1.10 (3.1.10)
\pgfsyssoftpath@smallbuffer@items=\count315
\pgfsyssoftpath@bigbuffer@items=\count316
) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/systemlayer/pgfsysprotocol.code.tex
File: pgfsysprotocol.code.tex 2023-01-15 v3.1.10 (3.1.10)
)) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcore.code.tex
Package: pgfcore 2023-01-15 v3.1.10 (3.1.10)
(d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathutil.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathparser.code.tex
\pgfmath@dimen=\dimen274
\pgfmath@count=\count317
\pgfmath@box=\box68
\pgfmath@toks=\toks45
\pgfmath@stack@operand=\toks46
\pgfmath@stack@operation=\toks47
) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.basic.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.trigonometric.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.random.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.comparison.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.base.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.round.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.misc.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.integerarithmetics.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathcalc.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfloat.code.tex
\c@pgfmathroundto@lastzeros=\count318
)) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfint.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepoints.code.tex
File: pgfcorepoints.code.tex 2023-01-15 v3.1.10 (3.1.10)
\pgf@picminx=\dimen275
\pgf@picmaxx=\dimen276
\pgf@picminy=\dimen277
\pgf@picmaxy=\dimen278
\pgf@pathminx=\dimen279
\pgf@pathmaxx=\dimen280
\pgf@pathminy=\dimen281
\pgf@pathmaxy=\dimen282
\pgf@xx=\dimen283
\pgf@xy=\dimen284
\pgf@yx=\dimen285
\pgf@yy=\dimen286
\pgf@zx=\dimen287
\pgf@zy=\dimen288
) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathconstruct.code.tex
File: pgfcorepathconstruct.code.tex 2023-01-15 v3.1.10 (3.1.10)
\pgf@path@lastx=\dimen289
\pgf@path@lasty=\dimen290
) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathusage.code.tex
File: pgfcorepathusage.code.tex 2023-01-15 v3.1.10 (3.1.10)
\pgf@shorten@end@additional=\dimen291
\pgf@shorten@start@additional=\dimen292
) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorescopes.code.tex
File: pgfcorescopes.code.tex 2023-01-15 v3.1.10 (3.1.10)
\pgfpic=\box69
\pgf@hbox=\box70
\pgf@layerbox@main=\box71
\pgf@picture@serial@count=\count319
) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcoregraphicstate.code.tex
File: pgfcoregraphicstate.code.tex 2023-01-15 v3.1.10 (3.1.10)
\pgflinewidth=\dimen293
) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretransformations.code.tex
File: pgfcoretransformations.code.tex 2023-01-15 v3.1.10 (3.1.10)
\pgf@pt@x=\dimen294
\pgf@pt@y=\dimen295
\pgf@pt@temp=\dimen296
) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorequick.code.tex
File: pgfcorequick.code.tex 2023-01-15 v3.1.10 (3.1.10)
) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreobjects.code.tex
File: pgfcoreobjects.code.tex 2023-01-15 v3.1.10 (3.1.10)
) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathprocessing.code.tex
File: pgfcorepathprocessing.code.tex 2023-01-15 v3.1.10 (3.1.10)
) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorearrows.code.tex
File: pgfcorearrows.code.tex 2023-01-15 v3.1.10 (3.1.10)
\pgfarrowsep=\dimen297
) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreshade.code.tex
File: pgfcoreshade.code.tex 2023-01-15 v3.1.10 (3.1.10)
\pgf@max=\dimen298
\pgf@sys@shading@range@num=\count320
\pgf@shadingcount=\count321
) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreimage.code.tex
File: pgfcoreimage.code.tex 2023-01-15 v3.1.10 (3.1.10)
) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreexternal.code.tex
File: pgfcoreexternal.code.tex 2023-01-15 v3.1.10 (3.1.10)
\pgfexternal@startupbox=\box72
) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorelayers.code.tex
File: pgfcorelayers.code.tex 2023-01-15 v3.1.10 (3.1.10)
) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretransparency.code.tex
File: pgfcoretransparency.code.tex 2023-01-15 v3.1.10 (3.1.10)
) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepatterns.code.tex
File: pgfcorepatterns.code.tex 2023-01-15 v3.1.10 (3.1.10)
) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorerdf.code.tex
File: pgfcorerdf.code.tex 2023-01-15 v3.1.10 (3.1.10)
))) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/modules/pgfmoduleshapes.code.tex
File: pgfmoduleshapes.code.tex 2023-01-15 v3.1.10 (3.1.10)
\pgfnodeparttextbox=\box73
) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/modules/pgfmoduleplot.code.tex
File: pgfmoduleplot.code.tex 2023-01-15 v3.1.10 (3.1.10)
) (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version-0-65.sty
Package: pgfcomp-version-0-65 2023-01-15 v3.1.10 (3.1.10)
\pgf@nodesepstart=\dimen299
\pgf@nodesepend=\dimen300
) (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version-1-18.sty
Package: pgfcomp-version-1-18 2023-01-15 v3.1.10 (3.1.10)
)) (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/utilities/pgffor.sty (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/utilities/pgfkeys.sty (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/utilities/pgfkeys.code.tex)) (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/math/pgfmath.sty (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex)) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/utilities/pgffor.code.tex
Package: pgffor 2023-01-15 v3.1.10 (3.1.10)
\pgffor@iter=\dimen301
\pgffor@skip=\dimen302
\pgffor@stack=\toks48
\pgffor@toks=\toks49
)) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/frontendlayer/tikz/tikz.code.tex
Package: tikz 2023-01-15 v3.1.10 (3.1.10)
(d:/software/texlive/2023/texmf-dist/tex/generic/pgf/libraries/pgflibraryplothandlers.code.tex
File: pgflibraryplothandlers.code.tex 2023-01-15 v3.1.10 (3.1.10)
\pgf@plot@mark@count=\count322
\pgfplotmarksize=\dimen303
)
\tikz@lastx=\dimen304
\tikz@lasty=\dimen305
\tikz@lastxsaved=\dimen306
\tikz@lastysaved=\dimen307
\tikz@lastmovetox=\dimen308
\tikz@lastmovetoy=\dimen309
\tikzleveldistance=\dimen310
\tikzsiblingdistance=\dimen311
\tikz@figbox=\box74
\tikz@figbox@bg=\box75
\tikz@tempbox=\box76
\tikz@tempbox@bg=\box77
\tikztreelevel=\count323
\tikznumberofchildren=\count324
\tikznumberofcurrentchild=\count325
\tikz@fig@count=\count326
(d:/software/texlive/2023/texmf-dist/tex/generic/pgf/modules/pgfmodulematrix.code.tex
File: pgfmodulematrix.code.tex 2023-01-15 v3.1.10 (3.1.10)
\pgfmatrixcurrentrow=\count327
\pgfmatrixcurrentcolumn=\count328
\pgf@matrix@numberofcolumns=\count329
)
\tikz@expandcount=\count330
(d:/software/texlive/2023/texmf-dist/tex/generic/pgf/frontendlayer/tikz/libraries/tikzlibrarytopaths.code.tex
File: tikzlibrarytopaths.code.tex 2023-01-15 v3.1.10 (3.1.10)
))) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/frontendlayer/tikz/libraries/tikzlibrarypositioning.code.tex
File: tikzlibrarypositioning.code.tex 2023-01-15 v3.1.10 (3.1.10)
)
\c@@todonotes@numberoftodonotes=\count331
)
LaTeX Font Info: Trying to load font information for OT1+ptm on input line 62.
(d:/software/texlive/2023/texmf-dist/tex/latex/psnfss/ot1ptm.fd
File: ot1ptm.fd 2001/06/04 font definitions for OT1/ptm.
) (d:/software/texlive/2023/texmf-dist/tex/latex/l3backend/l3backend-pdftex.def
File: l3backend-pdftex.def 2023-01-16 L3 backend support: PDF output (pdfTeX)
\l__color_backend_stack_int=\count332
\l__pdf_internal_box=\box78
) (./example_paper.aux)
\openout1 = `example_paper.aux'.
LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 62.
LaTeX Font Info: ... okay on input line 62.
LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 62.
LaTeX Font Info: ... okay on input line 62.
LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 62.
LaTeX Font Info: ... okay on input line 62.
LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 62.
LaTeX Font Info: ... okay on input line 62.
LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 62.
LaTeX Font Info: ... okay on input line 62.
LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 62.
LaTeX Font Info: ... okay on input line 62.
LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 62.
LaTeX Font Info: ... okay on input line 62.
LaTeX Font Info: Checking defaults for PD1/pdf/m/n on input line 62.
LaTeX Font Info: ... okay on input line 62.
LaTeX Font Info: Checking defaults for PU/pdf/m/n on input line 62.
LaTeX Font Info: ... okay on input line 62.
LaTeX Info: Redefining \microtypecontext on input line 62.
Package microtype Info: Applying patch `item' on input line 62.
Package microtype Info: Applying patch `toc' on input line 62.
Package microtype Info: Applying patch `eqnum' on input line 62.
Package microtype Info: Applying patch `footnote' on input line 62.
Package microtype Info: Applying patch `verbatim' on input line 62.
Package microtype Info: Generating PDF output.
Package microtype Info: Character protrusion enabled (level 2).
Package microtype Info: Using default protrusion set `alltext'.
Package microtype Info: Automatic font expansion enabled (level 2),
(microtype) stretch: 20, shrink: 20, step: 1, non-selected.
Package microtype Info: Using default expansion set `alltext-nott'.
LaTeX Info: Redefining \showhyphens on input line 62.
Package microtype Info: No adjustment of tracking.
Package microtype Info: No adjustment of interword spacing.
Package microtype Info: No adjustment of character kerning.
(d:/software/texlive/2023/texmf-dist/tex/latex/microtype/mt-ptm.cfg
File: mt-ptm.cfg 2006/04/20 v1.7 microtype config. file: Times (RS)
) (d:/software/texlive/2023/texmf-dist/tex/context/base/mkii/supp-pdf.mkii
[Loading MPS to PDF converter (version 2006.09.02).]
\scratchcounter=\count333
\scratchdimen=\dimen312
\scratchbox=\box79
\nofMPsegments=\count334
\nofMParguments=\count335
\everyMPshowfont=\toks50
\MPscratchCnt=\count336
\MPscratchDim=\dimen313
\MPnumerator=\count337
\makeMPintoPDFobject=\count338
\everyMPtoPDFconversion=\toks51
) (d:/software/texlive/2023/texmf-dist/tex/latex/epstopdf-pkg/epstopdf-base.sty
Package: epstopdf-base 2020-01-24 v2.11 Base part for package epstopdf
Package epstopdf-base Info: Redefining graphics rule for `.eps' on input line 485.
(d:/software/texlive/2023/texmf-dist/tex/latex/latexconfig/epstopdf-sys.cfg
File: epstopdf-sys.cfg 2010/07/13 v1.3 Configuration of (r)epstopdf for TeX Live
))
Package hyperref Info: Link coloring ON on input line 62.
(./example_paper.out) (./example_paper.out)
\@outlinefile=\write4
\openout4 = `example_paper.out'.
\c@@affil@anon=\count339
(d:/software/texlive/2023/texmf-dist/tex/latex/microtype/mt-cmr.cfg
File: mt-cmr.cfg 2013/05/19 v2.2 microtype config. file: Computer Modern Roman (RS)
)
LaTeX Font Info: Trying to load font information for U+msa on input line 110.
(d:/software/texlive/2023/texmf-dist/tex/latex/amsfonts/umsa.fd
File: umsa.fd 2013/01/14 v3.01 AMS symbols A
) (d:/software/texlive/2023/texmf-dist/tex/latex/microtype/mt-msa.cfg
File: mt-msa.cfg 2006/02/04 v1.1 microtype config. file: AMS symbols (a) (RS)
)
LaTeX Font Info: Trying to load font information for U+msb on input line 110.
(d:/software/texlive/2023/texmf-dist/tex/latex/amsfonts/umsb.fd
File: umsb.fd 2013/01/14 v3.01 AMS symbols B
) (d:/software/texlive/2023/texmf-dist/tex/latex/microtype/mt-msb.cfg
File: mt-msb.cfg 2005/06/01 v1.0 microtype config. file: AMS symbols (b) (RS)
)
Package hyperref Warning: Ignoring empty anchor on input line 121.
LaTeX Font Info: Trying to load font information for OML+ptm on input line 121.
(d:/software/texlive/2023/texmf-dist/tex/latex/psnfss/omlptm.fd
File: omlptm.fd
)
LaTeX Font Info: Font shape `OML/ptm/m/n' in size <9> not available
(Font) Font shape `OML/cmm/m/it' tried instead on input line 121.
Underfull \vbox (badness 10000) has occurred while \output is active []
[1{d:/software/texlive/2023/texmf-var/fonts/map/pdftex/updmap/pdftex.map}{d:/software/texlive/2023/texmf-dist/fonts/enc/dvips/base/8r.enc}
]
Underfull \vbox (badness 10000) has occurred while \output is active []
[2]
Overfull \hbox (10.76138pt too wide) detected at line 394
[][][][]
[]
Overfull \hbox (9.68254pt too wide) detected at line 494
[][][][]
[]
[3
pdfTeX warning (ext4): destination with the same identifier (name{table.1}) has been already used, duplicate ignored
<argument> ...shipout:D \box_use:N \l_shipout_box
\__shipout_drop_firstpage_...
l.516 \end{equation}
] [4] [5] (./pic/randomwalk.tex) (./pic/BairdExample.tex)
<pic/maze_13_13.pdf, id=252, 493.1646pt x 387.62602pt>
File: pic/maze_13_13.pdf Graphic file (type pdf)
<use pic/maze_13_13.pdf>
Package pdftex.def Info: pic/maze_13_13.pdf used on input line 869.
(pdftex.def) Requested size: 98.63116pt x 77.52382pt.
Underfull \hbox (badness 1902) in paragraph at lines 871--880
\OT1/ptm/m/n/10 (+20) four al-ter-na-tive ac-tions:
[]
Underfull \hbox (badness 5548) in paragraph at lines 871--880
\OML/cmm/m/it/10 up$\OT1/ptm/m/n/10 (+20) , $\OML/cmm/m/it/10 down$\OT1/ptm/m/n/10 (+20) , $\OML/cmm/m/it/10 left$\OT1/ptm/m/n/10 (+20) , and
[]
Underfull \hbox (badness 1472) in paragraph at lines 871--880
\OML/cmm/m/it/10 right$\OT1/ptm/m/n/10 (+20) , which takes the
[]
<pic/dependent_new.pdf, id=254, 557.01889pt x 394.59978pt>
File: pic/dependent_new.pdf Graphic file (type pdf)
<use pic/dependent_new.pdf>
Package pdftex.def Info: pic/dependent_new.pdf used on input line 897.
(pdftex.def) Requested size: 108.04453pt x 108.04262pt.
<pic/tabular_new.pdf, id=255, 566.51224pt x 401.1703pt>
File: pic/tabular_new.pdf Graphic file (type pdf)
<use pic/tabular_new.pdf>
Package pdftex.def Info: pic/tabular_new.pdf used on input line 901.
(pdftex.def) Requested size: 108.04472pt x 108.04196pt.
<pic/inverted_new.pdf, id=256, 565.61766pt x 402.45422pt>
File: pic/inverted_new.pdf Graphic file (type pdf)
<use pic/inverted_new.pdf>
Package pdftex.def Info: pic/inverted_new.pdf used on input line 906.
(pdftex.def) Requested size: 108.03809pt x 108.04385pt.
<pic/counterexample_quanju_new.pdf, id=257, 471.30164pt x 401.08943pt>
File: pic/counterexample_quanju_new.pdf Graphic file (type pdf)
<use pic/counterexample_quanju_new.pdf>
Package pdftex.def Info: pic/counterexample_quanju_new.pdf used on input line 910.
(pdftex.def) Requested size: 108.04471pt x 108.04466pt.
<pic/maze_complete.pdf, id=258, 595.42892pt x 465.38112pt>
File: pic/maze_complete.pdf Graphic file (type pdf)
<use pic/maze_complete.pdf>
Package pdftex.def Info: pic/maze_complete.pdf used on input line 922.
(pdftex.def) Requested size: 211.38329pt x 150.31682pt.
<pic/cw_complete.pdf, id=259, 570.46333pt x 465.10928pt>
File: pic/cw_complete.pdf Graphic file (type pdf)
<use pic/cw_complete.pdf>
Package pdftex.def Info: pic/cw_complete.pdf used on input line 926.
(pdftex.def) Requested size: 211.39018pt x 150.32127pt.
<pic/mt_complete.pdf, id=260, 569.92673pt x 468.75475pt>
File: pic/mt_complete.pdf Graphic file (type pdf)
<use pic/mt_complete.pdf>
Package pdftex.def Info: pic/mt_complete.pdf used on input line 931.
(pdftex.def) Requested size: 211.38266pt x 150.31929pt.
<pic/Acrobot_complete.pdf, id=261, 564.99583pt x 478.09494pt>
File: pic/Acrobot_complete.pdf Graphic file (type pdf)
<use pic/Acrobot_complete.pdf>
Package pdftex.def Info: pic/Acrobot_complete.pdf used on input line 935.
(pdftex.def) Requested size: 211.39014pt x 150.3162pt.
Underfull \vbox (badness 10000) has occurred while \output is active []
[6
pdfTeX warning (ext4): destination with the same identifier (name{figure.1}) has been already used, duplicate ignored
<argument> ...shipout:D \box_use:N \l_shipout_box
\__shipout_drop_firstpage_...
l.980
pdfTeX warning (ext4): destination with the same identifier (name{figure.2}) has been already used, duplicate ignored
<argument> ...shipout:D \box_use:N \l_shipout_box
\__shipout_drop_firstpage_...
l.980
<./pic/maze_13_13.pdf>]
Underfull \vbox (badness 10000) has occurred while \output is active []
[7
pdfTeX warning (ext4): destination with the same identifier (name{table.2}) has been already used, duplicate ignored
<argument> ...shipout:D \box_use:N \l_shipout_box
\__shipout_drop_firstpage_...
l.1039
pdfTeX warning (ext4): destination with the same identifier (name{figure.3}) has been already used, duplicate ignored
<argument> ...shipout:D \box_use:N \l_shipout_box
\__shipout_drop_firstpage_...
l.1039
<./pic/dependent_new.pdf> <./pic/tabular_new.pdf
pdfTeX warning: pdflatex.exe (file ./pic/tabular_new.pdf): PDF inclusion: multiple pdfs with page group included in a single page
> <./pic/inverted_new.pdf
pdfTeX warning: pdflatex.exe (file ./pic/inverted_new.pdf): PDF inclusion: multiple pdfs with page group included in a single page
> <./pic/counterexample_quanju_new.pdf
pdfTeX warning: pdflatex.exe (file ./pic/counterexample_quanju_new.pdf): PDF inclusion: multiple pdfs with page group included in a single page
>]
Underfull \vbox (badness 10000) has occurred while \output is active []
(./example_paper.bbl
Underfull \vbox (badness 10000) has occurred while \output is active []
[8
pdfTeX warning (ext4): destination with the same identifier (name{figure.4}) has been already used, duplicate ignored
<argument> ...shipout:D \box_use:N \l_shipout_box
\__shipout_drop_firstpage_...
l.12
<./pic/maze_complete.pdf> <./pic/cw_complete.pdf
pdfTeX warning: pdflatex.exe (file ./pic/cw_complete.pdf): PDF inclusion: multiple pdfs with page group included in a single page
> <./pic/mt_complete.pdf
pdfTeX warning: pdflatex.exe (file ./pic/mt_complete.pdf): PDF inclusion: multiple pdfs with page group included in a single page
> <./pic/Acrobot_complete.pdf
pdfTeX warning: pdflatex.exe (file ./pic/Acrobot_complete.pdf): PDF inclusion: multiple pdfs with page group included in a single page
>] [9]) [10
]
LaTeX Warning: Command \textemdash invalid in math mode on input line 1212.
LaTeX Warning: Command \textemdash invalid in math mode on input line 1212.
[11
] [12] [13] [14]
Underfull \hbox (badness 5490) in paragraph at lines 1626--1643
[]\OT1/ptm/m/n/10 (+20) 7-state ver-sion of Baird's off-policy coun-terex-am-ple: for TD al-go-rithm, $\OML/cmm/m/it/10 $ \OT1/ptm/m/n/10 (+20) is set to 0.1. For the
[]
Underfull \hbox (badness 10000) in paragraph at lines 1626--1643
\OT1/ptm/m/n/10 (+20) TDC al-go-rithm, the range of $\OML/cmm/m/it/10 $ \OT1/ptm/m/n/10 (+20) is $\OMS/cmsy/m/n/10 f\OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 05\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 1\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 2\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 3\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 4\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 5\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 6\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 7\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 8\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 9\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 0\OMS/cmsy/m/n/10 g$\OT1/ptm/m/n/10 (+20) , and the range
[]
Underfull \hbox (badness 10000) in paragraph at lines 1626--1643
\OT1/ptm/m/n/10 (+20) of $\OML/cmm/m/it/10 ^^P$ \OT1/ptm/m/n/10 (+20) is $\OMS/cmsy/m/n/10 f\OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 05\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 1\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 2\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 3\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 4\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 5\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 6\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 7\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 8\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 9\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 0\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 1\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 2\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 3\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 4\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 5\OMS/cmsy/m/n/10 g$\OT1/ptm/m/n/10 (+20) . For the VMTD al-go-
[]
Underfull \hbox (badness 10000) in paragraph at lines 1626--1643
\OT1/ptm/m/n/10 (+20) rithm, the range of $\OML/cmm/m/it/10 $ \OT1/ptm/m/n/10 (+20) is $\OMS/cmsy/m/n/10 f\OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 05\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 1\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 2\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 3\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 4\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 5\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 6\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 7\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 8\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 9\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 0\OMS/cmsy/m/n/10 g$\OT1/ptm/m/n/10 (+20) , and the range of $\OML/cmm/m/it/10 ^^L$ \OT1/ptm/m/n/10 (+20) is
[]
Underfull \hbox (badness 2384) in paragraph at lines 1626--1643
\OMS/cmsy/m/n/10 f\OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 05\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 1\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 2\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 3\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 4\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 5\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 6\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 7\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 8\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 9\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 0\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 1\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 2\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 3\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 4\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 5\OMS/cmsy/m/n/10 g$\OT1/ptm/m/n/10 (+20) . Through ex-per-i-ments, it was found that
[]
[15
pdfTeX warning (ext4): destination with the same identifier (name{table.3}) has been already used, duplicate ignored
<argument> ...shipout:D \box_use:N \l_shipout_box
\__shipout_drop_firstpage_...
l.1672 \end{document}
] (./example_paper.aux)
Package rerunfilecheck Info: File `example_paper.out' has not changed.
(rerunfilecheck) Checksum: D41D8CD98F00B204E9800998ECF8427E;0.
)
Here is how much of TeX's memory you used:
27893 strings out of 476025
518891 string characters out of 5789524
1889382 words of memory out of 5000000
47429 multiletter control sequences out of 15000+600000
550322 words of font info for 260 fonts, out of 8000000 for 9000
1141 hyphenation exceptions out of 8191
99i,16n,94p,1006b,1054s stack positions out of 10000i,1000n,20000p,200000b,200000s
<d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmex10.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi10.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi5.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi6.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi7.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi9.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr10.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr5.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr6.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr7.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmr9.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy10.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy5.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy6.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy7.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/public/amsfonts/symbols/msbm10.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/urw/times/utmb8a.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/urw/times/utmr8a.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/urw/times/utmr8a.pfb><d:/software/texlive/2023/texmf-dist/fonts/type1/urw/times/utmri8a.pfb>
Output written on example_paper.pdf (15 pages, 2276876 bytes).
PDF statistics:
858 PDF objects out of 1000 (max. 8388607)
693 compressed objects within 7 object streams
137 named destinations out of 1000 (max. 500000)
55866 words of extra memory for PDF output out of 61914 (max. 10000000)
%%%%%%%% ICML 2024 EXAMPLE LATEX SUBMISSION FILE %%%%%%%%%%%%%%%%%
\documentclass{article}
% Recommended, but optional, packages for figures and better typesetting:
\usepackage{microtype}
\usepackage{graphicx}
\usepackage{subfigure}
\usepackage{diagbox}
\usepackage{wrapfig}
\usepackage{booktabs} % for professional tables
% hyperref makes hyperlinks in the resulting PDF.
% If your build breaks (sometimes temporarily if a hyperlink spans a page)
% please comment out the following usepackage line and replace
% \usepackage{icml2024} with \usepackage[nohyperref]{icml2024} above.
\usepackage{hyperref}
% Attempt to make hyperref and algorithmic work together better:
\newcommand{\theHalgorithm}{\arabic{algorithm}}
% Use the following line for the initial blind version submitted for review:
\usepackage{icml2024}
% If accepted, instead use the following line for the camera-ready submission:
% \usepackage[accepted]{icml2024}
% For theorems and such
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{mathtools}
\usepackage{amsthm}
% if you use cleveref..
\usepackage[capitalize,noabbrev]{cleveref}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% THEOREMS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\theoremstyle{plain}
\newtheorem{theorem}{Theorem}[section]
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{corollary}[theorem]{Corollary}
\theoremstyle{definition}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{assumption}[theorem]{Assumption}
\theoremstyle{remark}
\newtheorem{remark}[theorem]{Remark}
% Todonotes is useful during development; simply uncomment the next line
% and comment out the line below the next line to turn off comments
%\usepackage[disable,textsize=tiny]{todonotes}
\usepackage[textsize=tiny]{todonotes}
% The \icmltitle you define below is probably too long as a header.
% Therefore, a short form for the running title is supplied here:
\icmltitlerunning{Submission and Formatting Instructions for ICML 2024}
\begin{document}
\twocolumn[
\icmltitle{Is Minimizing Errors the Only Option for Value-based Reinforcement Learning?}
% It is OKAY to include author information, even for blind
% submissions: the style file will automatically remove it for you
% unless you've provided the [accepted] option to the icml2024
% package.
% List of affiliations: The first argument should be a (short)
% identifier you will use later to specify author affiliations
% Academic affiliations should list Department, University, City, Region, Country
% Industry affiliations should list Company, City, Region, Country
% You can specify symbols, otherwise they are numbered in order.
% Ideally, you should not use this facility. Affiliations will be numbered
% in order of appearance and this is the preferred way.
\icmlsetsymbol{equal}{*}
\begin{icmlauthorlist}
\icmlauthor{Firstname1 Lastname1}{equal,yyy}
\icmlauthor{Firstname2 Lastname2}{equal,yyy,comp}
\icmlauthor{Firstname3 Lastname3}{comp}
\icmlauthor{Firstname4 Lastname4}{sch}
\icmlauthor{Firstname5 Lastname5}{yyy}
\icmlauthor{Firstname6 Lastname6}{sch,yyy,comp}
\icmlauthor{Firstname7 Lastname7}{comp}
%\icmlauthor{}{sch}
\icmlauthor{Firstname8 Lastname8}{sch}
\icmlauthor{Firstname8 Lastname8}{yyy,comp}
%\icmlauthor{}{sch}
%\icmlauthor{}{sch}
\end{icmlauthorlist}
\icmlaffiliation{yyy}{Department of XXX, University of YYY, Location, Country}
\icmlaffiliation{comp}{Company Name, Location, Country}
\icmlaffiliation{sch}{School of ZZZ, Institute of WWW, Location, Country}
\icmlcorrespondingauthor{Firstname1 Lastname1}{first1.last1@xxx.edu}
\icmlcorrespondingauthor{Firstname2 Lastname2}{first2.last2@www.uk}
% You may provide any keywords that you
% find helpful for describing your paper; these are used to populate
% the "keywords" metadata in the PDF but will not be shown in the document
\icmlkeywords{Machine Learning, ICML}
\vskip 0.3in
]
% this must go after the closing bracket ] following \twocolumn[ ...
% This command actually creates the footnote in the first column
% listing the affiliations and the copyright notice.
% The command takes one argument, which is text to display at the start of the footnote.
% The \icmlEqualContribution command is standard text for equal contribution.
% Remove it (just {}) if you do not need this facility.
%\printAffiliationsAndNotice{} % leave blank if no need to mention equal contribution
\printAffiliationsAndNotice{\icmlEqualContribution} % otherwise use the standard text.
\begin{abstract}
In the regression task of supervised learning,
we need to minimize the error and trade off the variance.
Drawing on this idea, the existing research on
value-based reinforcement learning also minimizes the error.
However, is error minimization really the only option
for value-based reinforcement learning?
We can easily observe that the policy on action
choosing probabilities is often related to the relative values,
and has nothing to do with their absolute values.
Based on this observation, we propose the objective
of variance minimization instead of error minimization,
derive on-policy and off-policy algorithms respectively,
and conduct an analysis of the convergence rate and experiments.
The experimental results show that our proposed variance minimization algorithms
converge much faster.
\end{abstract}
\section{Introduction}
\label{introduction}
Reinforcement learning can be mainly divided into two
categories: value-based reinforcement learning
and policy gradient-based reinforcement learning. This
paper focuses on temporal difference learning based on
linear approximated valued functions. Its research is
usually divided into two steps: the first step is to establish the convergence of the algorithm, and the second
step is to accelerate the algorithm.
In terms of stability, \citet{sutton1988learning} established the
convergence of on-policy TD(0), and \citet{tsitsiklis1997analysis}
established the convergence of on-policy TD($\lambda$).
However, ``The deadly triad'' consisting of off-policy learning,
bootstrapping, and function approximation makes
the stability a difficult problem \citep{Sutton2018book}.
To solve this problem, convergent off-policy temporal difference
learning algorithms are proposed, e.g., BR \cite{baird1995residual},
GTD \cite{sutton2008convergent}, GTD2 and TDC \cite{sutton2009fast},
ETD \cite{sutton2016emphatic}, and MRetrace \cite{chen2023modified}.
In terms of acceleration, \citet{hackman2012faster}
proposed Hybrid TD algorithm with on-policy matrix.
\citet{liu2015finite,liu2016proximal,liu2018proximal} proposed
true stochastic algorithms, i.e., GTD-MP and GTD2-MP, from
a convex-concave saddle-point formulation.
Second-order methods are used to accelerate TD learning,
e.g., Quasi Newton TD \cite{givchi2015quasi} and
accelerated TD (ATD) \citep{pan2017accelerated}.
\citet{hallak2016generalized} introduced an new parameter
to reduce variance for ETD.
\citet{zhang2022truncated} proposed truncated ETD with a lower variance.
Variance Reduced TD with direct variance reduction technique \citep{johnson2013accelerating} is proposed by \cite{korda2015td}
and analysed by \cite{xu2019reanalysis}.
How to further improve the convergence rates of reinforcement learning
algorithms is currently still an open problem.
Algorithm stability is prominently reflected in the changes
to the objective function, transitioning from mean squared
errors (MSE) \citep{Sutton2018book} to mean squared bellman errors (MSBE) \cite{baird1995residual}, then to
norm of the expected TD update \cite{sutton2009fast}, and further to
mean squared projected Bellman errors (MSPBE) \cite{sutton2009fast}. On the other hand, algorithm
acceleration is more centered around optimizing the iterative
update formula of the algorithm itself without altering the
objective function, thereby speeding up the convergence rate
of the algorithm. The emergence of new optimization objective
functions often leads to the development of novel algorithms.
The introduction of new algorithms, in turn, tends to inspire
researchers to explore methods for accelerating algorithms,
leading to the iterative creation of increasingly superior algorithms.
The kernel loss function can be optimized using standard
gradient-based methods, addressing the issue of double
sampling in residual gradient algorithm \cite{feng2019kernel}. It ensures convergence
in both on-policy and off-policy scenarios. The logistic bellman
error is convex and smooth in the action-value function parameters,
with bounded gradients \cite{basserrano2021logistic}. In contrast, the squared Bellman error is
not convex in the action-value function parameters, and RL algorithms
based on recursive optimization using it are known to be unstable.
% The value-based algorithms mentioned above aim to
% minimize some errors, e.g., mean squared errors \citep{Sutton2018book},
% mean squared Bellman errors \cite{baird1995residual}, norm
% of the expected TD update \cite{sutton2009fast},
% mean squared projected Bellman errors (MSPBE) \cite{sutton2009fast}, etc.
It is necessary to propose a new objective function, but the mentioned objective functions above are all some form of error.
Is minimizing error the only option for value-based reinforcement learning?
Error can be decomposed into bias, variance and unavoidable noise.
Among them, bias measures the difference between the predicted
values of the model and the true values, reflecting the
model's fitting ability. Variance, on the other hand,
quantifies the model's sensitivity to different training data,
indicating its stability and generalization ability.
Balancing bias and variance is important, as they represent trade-offs
\cite{zhou2021machine}.
In the context of this paper, where only a linear model is
considered and the model complexity is not adjusted,
it is difficult to improve the bias. High bias indicates that the model poorly
fits the training data, resulting in underfitting.
In supervised learning, high bias is generally
considered unacceptable.
However, in reinforcement learning, high bias may
be acceptable in certain cases. This is due to the
observation that policies based on value functions, such
as greedy, $\epsilon$-greedy, and softmax policies, often rely on
the relative values of action values rather than their
absolute values when selecting different actions.
Based on this observation, we propose alternate objective functions
instead of minimizing errors. We minimize Variance of Bellman Error (VBE) and
Variance of Projected Bellman Error (VPBE),
and derive Variance Minimization (VM) algorithms.
These algorithms preserve the invariance of the optimal policy,
but significantly reduce the variance of gradient estimation,
and thus hastening convergence.
The contributions of this paper are as follows:
(1) Introduction of novel objective functions based on
the invariance of the optimal policy.
(2) Derived two algorithms, one on-policy and one off-policy.
(3) Proof of their convergence.
(4) Analysis of the convergence rate of on-policy algorithm.
(5) Experiments demonstrating the faster convergence speed of the proposed algorithms.
\section{Preliminaries}
\label{preliminaries}
Reinforcement learning agent interacts with environment, observes state,
takes sequential decision makings to influence environment, and obtains
rewards.
Consider an infinite-horizon discounted
Markov Decision Process (MDP), defined by a tuple $\langle S,A,R,P,\gamma
\rangle$, where $S=\{1,2,\ldots,N\}$ is a finite set of states of the environment; $A$
is a finite set of actions of the agent;
$R:S\times A \times S \rightarrow \mathbb{R}$ is a bounded deterministic reward
function; $P:S\times A\times S \rightarrow [0,1]$ is the transition
probability distribution; and $\gamma\in (0,1)$
is the discount factor \cite{Sutton2018book}.
Due to the requirements of online learning, value iteration based on sampling
is considered in this paper.
In each sampling, an experience (or transition) $\langle s, a, s', r\rangle$ is
obtained.
A policy is a mapping $\pi:S\times A \rightarrow [0,1]$. The goal of the
agent is to find an optimal policy $\pi^*$ to maximize the expectation of a
discounted cumulative rewards in a long period.
State value function $V^{\pi}(s)$ for a stationary policy $\pi$ is
defined as:
\begin{equation*}
V^{\pi}(s)=\mathbb{E}_{\pi}[\sum_{k=0}^{\infty} \gamma^k R_{k}|s_0=s].
\label{valuefunction}
\end{equation*}
Linear value function for state $s\in S$ is defined as:
\begin{equation}
V_{{\theta}}(s):= {\theta}^{\top}{\phi}(s) = \sum_{i=1}^{m}
\theta_i \phi_i(s),
\label{linearvaluefunction}
\end{equation}
where ${\theta}:=(\theta_1,\theta_2,\ldots,\theta_m)^{\top}\in
\mathbb{R}^m$ is a parameter vector,
${\phi}:=(\phi_1,\phi_2,\ldots,\phi_m)^{\top}\in \mathbb{R}^m$ is a feature
function defined on state space $S$, and $m$ is the feature size.
Tabular temporal difference (TD) learning \cite{Sutton2018book} has been successfully applied to small-scale problems.
To deal with the well-known curse of dimensionality of large scale MDPs, value
function is usually approximated by a linear model, kernel methods, decision
trees, or neural networks, etc. This paper focuses on the linear model, where
features are usually hand coded by domain experts.
TD learning can also be used to find optimal strategies. The problem of finding an optimal policy is
often called the control problem. Two popular TD methods are Sarsa and Q-leaning. The former is an on-policy
TD control, while the latter is an off-policy control.
It is well known that TDC algorithm \cite{sutton2009fast} guarantees
convergence under off-policy conditions while the off-policy TD algorithm may diverge. The
objective function of TDC is MSPBE.
TDC is essentially an adjustment or correction of the TD update so that it
follows the gradient of the MSPBE objective function. In the context of the TDC algorithm, the control algorithm
is known as Greedy-GQ($\lambda$) \cite{sutton2009fast}. When $\lambda$ is set to 0, it is denoted
as GQ(0).
\section{Variance Minimization Algorithms}
\subsection{Motivation}
In reinforcement learning, bias is acceptable,
while in supervised learning it is not. As shown
in Table \ref{example_bias}, although there is a bias between the
true value and the predicted value, action $a_3$ is
still chosen under the greedy-policy.
On the contrary, supervised learning is usually used to predict temperature, humidity, morbidity, etc. If the bias is too large, the consequences could be serious.
\begin{table}[t]
\caption{Classification accuracies for naive Bayes and flexible
Bayes on various data sets.}
\label{example_bias}
\vskip 0.15in
\begin{center}
\begin{small}
\begin{sc}
\begin{tabular}{lcccr}
\toprule
action & $Q$ value & $Q$ value with bias \\
\midrule
$Q(s, a_0)$ & 1& 5 \\
$Q(s, a_1)$ & 2& 6 \\
$Q(s, a_2)$ & 3& 7 \\
$Q(s, a_3)$ & 4& 8 \\
$\arg \min_{a}Q(s,a)$ & $a_3$& $a_3$\\
\bottomrule
\end{tabular}
\end{sc}
\end{small}
\end{center}
\vskip -0.1in
\end{table}
In addition, reward shaping can significantly speed up the learning by adding a shaping
reward $F(s,s')$ to the original reward $r$,
where $F(s,s')$ is the general form of any state-based shaping reward.
Static potential-based reward shaping (Static PBRS) maintains the policy invariance if the
shaping reward follows from $F(s,s')=\gamma
f(s')-f(s)$ \cite{ng1999policy}.
This means that we can make changes to the TD error $\delta = r+\gamma \theta^{\top}\phi'-\theta^{\top}\phi $ while still ensuring the invariance of the optimal policy,
\begin{equation*}
\delta - \omega= r+\gamma \theta^{\top}\phi'-\theta^{\top}\phi - \omega,
\end{equation*}
where $\omega$ is a constant, acting as a static PBRS.
This also means that algorithms with the optimization goal
of minimizing errors, after introducing reward shaping,
may result in larger or smaller bias. Fortunately,
as discussed above, bias is acceptable in reinforcement
learning.
However, the problem is that selecting an appropriate
$\omega$ requires expert knowledge. This forces us to learn
$\omega$ dynamically, i.e., $\omega=\omega_t $ and dynamic PBRS can also maintain the policy
invariance if the shaping reward is $F(s,t,s',t')=\gamma f(s',t')-f(s,t)$,
where $t$ is the time-step the agent reaches in state $s$
\cite{devlin2012dynamic}.
However, this result requires the convergence guarantee of the dynamic potential
function $f(s,t)$. If $f(s,t)$ does not converge as the time-step
$t\rightarrow\infty$, the Q-values of dynamic PBRS are not
guaranteed to converge.
Let $f_{\omega_t}(s)=\frac{\omega_t}{\gamma-1}$.
Thus, $F_{\omega_t}(s,s')=\gamma f_{\omega_t}(s')-f_{\omega_t}(s)= \omega_t$
is a dynamic PBRS. And if $\omega$ converges finally, the dynamic potential
function $f(s,t)$ will converge.
Bias is the expected difference between the predicted value
and the true value. Therefore, under the premise of bootstrapping, we first think of
letting $\omega \doteq \mathbb{E}[\mathbb{E}[\delta|s]]=\mathbb{E}[\delta]$.
As we all know, the optimization process of linear TD(0) (semi-gradient) and linear TDC are as follows, respectively:
\begin{equation*}
\theta^{*}= \arg \min_{\theta} \mathbb{E}[(\mathbb{E}[\delta |s])^2],
\end{equation*}
and
\begin{equation*}
\theta^{*}=\arg \min_{\theta} \mathbb{E}[\delta \phi]^{\top} \mathbb{E}[\phi \phi^{\top}]^{-1} \mathbb{E}[\delta\phi].
\end{equation*}
As a result, two novel objective functions and their corresponding algorithms are proposed,
where $\omega$ is subsequently proven to converge, meaning that these two algorithms can maintain the invariance of the optimal strategy.
\subsection{Variance Minimization TD Learning: VMTD}
For on-policy learning,
a novel objective function, Variance of Bellman Error (VBE), is proposed as follows:
\begin{equation}
\begin{array}{ccl}
\arg \min_{\theta}\text{VBE}(\theta)&=&\arg \min_{\theta}\mathbb{E}[(\mathbb{E}[\delta|s]-\mathbb{E}[\mathbb{E}[\delta|s]])^2]\\
&=&\arg \min_{\theta,\omega} \mathbb{E}[(\mathbb{E}[\delta|s]-\omega)^2].
\end{array}
\end{equation}
Clearly, it is no longer to minimize Bellman errors.
First, the parameter $\omega$ is derived directly based on
stochastic gradient descent:
\begin{equation}
\omega_{k+1}\leftarrow \omega_{k}+\beta_k(\delta_k-\omega_k),
\label{omega}
\end{equation}
where $\delta_k$ is the TD error as follows:
\begin{equation}
\delta_k = r+\gamma
\theta_k^{\top}\phi_{k}'-\theta_k^{\top}\phi_k.
\label{delta}
\end{equation}
Then, based on stochastic semi-gradient descent, the update of
the parameter $\theta$ is as follows:
\begin{equation}
\theta_{k+1}\leftarrow
\theta_{k}+\alpha_k(\delta_k-\omega_k)\phi_k.
\label{theta}
\end{equation}
The pseudocode of the VMTD algorithm is shown in Algorithm \ref{alg:algorithm 1}.
For control tasks, two extensions of VMTD are named VMSarsa and VMQ respectively,
and the update formulas are shown below:
\begin{equation}
\theta_{k+1}\leftarrow
\theta_{k}+\alpha_k(\delta_k-\omega_k)\phi(s_k,a_k).
\end{equation}
and
\begin{equation}
\omega_{k+1}\leftarrow \omega_{k}+\beta_k(\delta_k-\omega_k),
\end{equation}
where $\delta_k$ delta in VMSarsa is:
\begin{equation}
\delta_{k}=r_{k+1}+\gamma \theta_{k}^{\top}\phi(s_{k+1},a_{k+1}) - \theta_{k}^{\top}\phi(s_{k},a_{k}),
\label{deltaSarsa}
\end{equation}
and $\delta_k$ delta in VMQ is:
\begin{equation}
\delta_{k}=r_{k+1}+\gamma \max_{a\in A}\theta_{k}^{\top}\phi(s_{k+1},a) - \theta_{k}^{\top}\phi(s_{k},a_{k}).
\label{deltaQ}
\end{equation}
\begin{algorithm}[t]
\caption{VMTD algorithm with linear function approximation in the on-policy setting}
\label{alg:algorithm 1}
\begin{algorithmic}
\STATE {\bfseries Input:} $\theta_{0}$, $\omega_{0}$, $\gamma
$, learning rate $\alpha_t$ and $\beta_t$
\REPEAT
\STATE For any episode, initialize $\theta_{0}$ arbitrarily, $\omega_{0}$ to $0$, $\gamma \in (0,1]$, and $\alpha_t$ and $\beta_t$ are constant.\\
\FOR{$t=0$ {\bfseries to} $T-1$}
\STATE Take $A_t$ from $S_t$ according to policy $\mu$, and arrive at $S_{t+1}$\\
\STATE Observe sample ($S_t$,$R_{t+1}$,$S_{t+1}$) at time step $t$ (with their corresponding state feature vectors)\\
\STATE $\delta_t = R_{t+1}+\gamma\theta_t^{\top}\phi_{t}'-\theta_t^{\top}\phi_t$
\STATE $\theta_{t+1}\leftarrow \theta_{t}+\alpha_t(\delta_t-\omega_t)\phi_t$
\STATE $\omega_{t+1}\leftarrow \omega_{t}+\beta_t(\delta_t-\omega_t)$
\STATE $S_t=S_{t+1}$
\ENDFOR
\UNTIL{terminal episode}
\end{algorithmic}
\end{algorithm}
% \begin{algorithm}[t]
% \caption{VMTDC algorithm with linear function approximation in the off-policy setting}
% \label{alg:algorithm 2}
% \begin{algorithmic}
% \STATE {\bfseries Input:} $\theta_{0}$, $u_0$, $\omega_{0}$, $\gamma
% $, learning rate $\alpha_t$, $\zeta_t$ and $\beta_t$, behavior policy $\mu$ and target policy $\pi$
% \REPEAT
% \STATE For any episode, initialize $\theta_{0}$ arbitrarily, $u_t$ and $\omega_{0}$ to $0$, $\gamma \in (0,1]$, and $\alpha_t$, $\zeta_t$ and $\beta_t$ are constant.\\
% \textbf{Output}: $\theta^*$.\\
% \FOR{$t=0$ {\bfseries to} $T-1$}
% \STATE Take $A_t$ from $S_t$ according to $\mu$, and arrive at $S_{t+1}$\\
% \STATE Observe sample ($S_t$,$R_{t+1}$,$S_{t+1}$) at time step $t$ (with their corresponding state feature vectors)\\
% \STATE $\delta_t = R_{t+1}+\gamma\theta_t^{\top}\phi_{t+1}-\theta_t^{\top}\phi_t$
% \STATE $\rho_{t} \leftarrow \frac{\pi(A_t | S_t)}{\mu(A_t | S_t)}$
% \STATE $\theta_{t+1}\leftarrow \theta_{t}+\alpha_t[\rho_t (\delta_t-\omega_t)\phi_t - \gamma \phi_{t+1}(\phi^{\top}_{t} u_t)]$
% \STATE $u_{t+1}\leftarrow u_{t}+\zeta_t[\rho_t(\delta_t-\omega_t) - \phi^{\top}_{t} u_t] \phi_t$
% \STATE $\omega_{t+1}\leftarrow \omega_{t}+\beta_t \rho_t(\delta_t-\omega_t)$
% \STATE $S_t=S_{t+1}$
% \ENDFOR
% \UNTIL{terminal episode}
% \end{algorithmic}
% \end{algorithm}
\subsection{Variance Minimization TDC Learning: VMTDC}
For off-policy learning, we employ a projection operator.
The objective function is called Variance of Projected Bellman error (VPBE),
and the corresponding algorithm is called VMTDC.
\begin{equation}
\begin{array}{ccl}
\text{VPBE}(\theta)&=&\mathbb{E}[(\delta-\mathbb{E}[\delta]) \phi]^{\top} \mathbb{E}[\phi \phi^{\top}]^{-1}\mathbb{E}[(\delta-\mathbb{E}[\delta])\phi]\\
&=&\mathbb{E}[(\delta-\omega) \phi]^{\top} \mathbb{E}[\phi \phi^{\top}]^{-1}\mathbb{E}[(\delta-\omega)\phi],
\end{array}
\end{equation}
where $\omega$ is used to estimate $\mathbb{E}[\delta]$, i.e., $\omega \doteq \mathbb{E}[\delta]$.
The derivation process of the VMTDC algorithm is the same
as that of the TDC algorithm, the only difference is that the original $\delta$ is replaced by $\delta-\omega$.
Therefore, we can easily get the updated formula of VMTDC, as follows:
% \begin{equation*}
% \rho_{k} \leftarrow \frac{\pi(a_k | s_k)}{\mu(a_k | s_k)},
% \end{equation*}
\begin{equation}
\theta_{k+1}\leftarrow\theta_{k}+\alpha_{k}[(\delta_{k}- \omega_k) \phi(s_k)\\
- \gamma\phi(s_{k+1})(\phi^{\top} (s_k) u_k)],
\label{thetavmtdc}
\end{equation}
\begin{equation}
u_{k+1}\leftarrow u_{k}+\zeta_{k}[\delta_{k}-\omega_k - \phi^{\top} (s_k) u_k]\phi(s_k),
\label{uvmtdc}
\end{equation}
and
\begin{equation}
\omega_{k+1}\leftarrow \omega_{k}+\beta_k (\delta_k- \omega_k),
\label{omegavmtdc}
\end{equation}
The pseudocode of the VMTDC algorithm for importance-sampling scenario is shown in Algorithm \ref{alg:algorithm 2} of Appendix \ref{proofth2}.
Now, we will introduce the improved version of the GQ(0) algorithm, named VMGQ(0):
\begin{equation}
\begin{array}{ccl}
\theta_{k+1}\leftarrow\theta_{k}&+&\alpha_{k}[(\delta_{k}- \omega_k) \phi(s_k,a_k)\\
&-& \gamma\phi(s_{k+1},A^{*}_{k+1})(\phi^{\top} (s_k,a_k) u_k)],
\end{array}
\end{equation}
\begin{equation}
u_{k+1}\leftarrow u_{k}+\zeta_{k}[(\delta_{k}-u_k) - \phi^{\top} (s_k,a_k) u_k]\phi(s_k,a_k),
\end{equation}
and
\begin{equation}
\omega_{k+1}\leftarrow \omega_{k}+\beta_k(\delta_k- \omega_k),
\end{equation}
where $\delta_{k}$ is (\ref{deltaQ}) and $A^{*}_{k+1}={\arg \max}_{a}(\theta_{k}^{\top}\phi(s_{k+1},a))$.
\section{Theoretical Analysis}
The purpose of this section is to establish the stabilities of the VMTD algorithm
and the VMTDC algorithm, and also presents a corollary on the convergence rate of VMTD.
\begin{theorem}
\label{theorem1}(Convergence of VMTD).
In the case of on-policy learning, consider the iterations (\ref{omega}) and (\ref{theta}) with (\ref{delta}) of VMTD.
Let the step-size sequences $\alpha_k$ and $\beta_k$, $k\geq 0$ satisfy in this case $\alpha_k,\beta_k>0$, for all $k$,
$
\sum_{k=0}^{\infty}\alpha_k=\sum_{k=0}^{\infty}\beta_k=\infty,
$
$
\sum_{k=0}^{\infty}\alpha_k^2<\infty,
$
$
\sum_{k=0}^{\infty}\beta_k^2<\infty,
$
and
$
\alpha_k = o(\beta_k).
$
Assume that $(\phi_k,r_k,\phi_k')$ is an i.i.d. sequence with
uniformly bounded second moments, where $\phi_k$ and $\phi'_{k}$ are sampled from the same Markov chain.
Let $A = \mathrm{Cov}(\phi,\phi-\gamma\phi')$,
$b=\mathrm{Cov}(r,\phi)$.
Assume that matrix $A$ is non-singular.
Then the parameter vector $\theta_k$ converges with probability one
to $A^{-1}b$.
\end{theorem}
\begin{proof}
\label{th1proof}
The proof is based on Borkar's Theorem for
general stochastic approximation recursions with two time scales
\cite{borkar1997stochastic}.
% The new TD error for the linear setting is
% \begin{equation*}
% \delta_{\text{new}}=r+\gamma
% \theta^{\top}\phi'-\theta^{\top}\phi-\mathbb{E}[\delta].
% \end{equation*}
A new one-step
linear TD solution is defined
as:
\begin{equation*}
0=\mathbb{E}[(\delta-\mathbb{E}[\delta]) \phi]=-A\theta+b.
\end{equation*}
Thus, the VMTD's solution is
$\theta_{\text{VMTD}}=A^{-1}b$.
First, note that recursion (\ref{theta}) can be rewritten as
\begin{equation*}
\theta_{k+1}\leftarrow \theta_k+\beta_k\xi(k),
\end{equation*}
where
\begin{equation*}
\xi(k)=\frac{\alpha_k}{\beta_k}(\delta_k-\omega_k)\phi_k
\end{equation*}
Due to the settings of step-size schedule $\alpha_k = o(\beta_k)$,
$\xi(k)\rightarrow 0$ almost surely as $k\rightarrow\infty$.
That is the increments in iteration (\ref{omega}) are uniformly larger than
those in (\ref{theta}), thus (\ref{omega}) is the faster recursion.
Along the faster time scale, iterations of (\ref{omega}) and (\ref{theta})
are associated to ODEs system as follows:
\begin{equation}
\dot{\theta}(t) = 0,
\label{thetaFast}
\end{equation}
\begin{equation}
\dot{\omega}(t)=\mathbb{E}[\delta_t|\theta(t)]-\omega(t).
\label{omegaFast}
\end{equation}
Based on the ODE (\ref{thetaFast}), $\theta(t)\equiv \theta$ when
viewed from the faster timescale.
By the Hirsch lemma \cite{hirsch1989convergent}, it follows that
$||\theta_k-\theta||\rightarrow 0$ a.s. as $k\rightarrow \infty$ for some
$\theta$ that depends on the initial condition $\theta_0$ of recursion
(\ref{theta}).
Thus, the ODE pair (\ref{thetaFast})-(\ref{omegaFast}) can be written as
\begin{equation}
\dot{\omega}(t)=\mathbb{E}[\delta_t|\theta]-\omega(t).
\label{omegaFastFinal}
\end{equation}
Consider the function $h(\omega)=\mathbb{E}[\delta|\theta]-\omega$,
i.e., the driving vector field of the ODE (\ref{omegaFastFinal}).
It is easy to find that the function $h$ is Lipschitz with coefficient
$-1$.
Let $h_{\infty}(\cdot)$ be the function defined by
$h_{\infty}(\omega)=\lim_{x\rightarrow \infty}\frac{h(x\omega)}{x}$.
Then $h_{\infty}(\omega)= -\omega$, is well-defined.
For (\ref{omegaFastFinal}), $\omega^*=\mathbb{E}[\delta|\theta]$
is the unique globally asymptotically stable equilibrium.
For the ODE
\begin{equation}
\dot{\omega}(t) = h_{\infty}(\omega(t))= -\omega(t),
\label{omegaInfty}
\end{equation}
apply $\vec{V}(\omega)=(-\omega)^{\top}(-\omega)/2$ as its
associated strict Liapunov function. Then,
the origin of (\ref{omegaInfty}) is a globally asymptotically stable
equilibrium.
Consider now the recursion (\ref{omega}).
Let
$M_{k+1}=(\delta_k-\omega_k)
-\mathbb{E}[(\delta_k-\omega_k)|\mathcal{F}(k)]$,
where $\mathcal{F}(k)=\sigma(\omega_l,\theta_l,l\leq k;\phi_s,\phi_s',r_s,s<k)$,
$k\geq 1$ are the sigma fields
generated by $\omega_0,\theta_0,\omega_{l+1},\theta_{l+1},\phi_l,\phi_l'$,
$0\leq l<k$.
It is easy to verify that $M_{k+1},k\geq0$ are integrable random variables that
satisfy $\mathbb{E}[M_{k+1}|\mathcal{F}(k)]=0$, $\forall k\geq0$.
Because $\phi_k$, $r_k$, and $\phi_k'$ have
uniformly bounded second moments, it can be seen that for some constant
$c_1>0$, $\forall k\geq0$,
\begin{equation*}
\mathbb{E}[||M_{k+1}||^2|\mathcal{F}(k)]\leq
c_1(1+||\omega_k||^2+||\theta_k||^2).
\end{equation*}
Now Assumptions (A1) and (A2) of \cite{borkar2000ode} are verified.
Furthermore, Assumptions (TS) of \cite{borkar2000ode} is satisfied by our
conditions on the step-size sequences $\alpha_k$, $\beta_k$. Thus,
by Theorem 2.2 of \cite{borkar2000ode} we obtain that
$||\omega_k-\omega^*||\rightarrow 0$ almost surely as $k\rightarrow \infty$.
Consider now the slower time scale recursion (\ref{theta}).
Based on the above analysis, (\ref{theta}) can be rewritten as
\begin{equation*}
\theta_{k+1}\leftarrow
\theta_{k}+\alpha_k(\delta_k-\mathbb{E}[\delta_k|\theta_k])\phi_k.
\end{equation*}
Let $\mathcal{G}(k)=\sigma(\theta_l,l\leq k;\phi_s,\phi_s',r_s,s<k)$,
$k\geq 1$ be the sigma fields
generated by $\theta_0,\theta_{l+1},\phi_l,\phi_l'$,
$0\leq l<k$.
Let
$
Z_{k+1} = Y_{t}-\mathbb{E}[Y_{t}|\mathcal{G}(k)],
$
where
\begin{equation*}
Y_{k}=(\delta_k-\mathbb{E}[\delta_k|\theta_k])\phi_k.
\end{equation*}
Consequently,
\begin{equation*}
\begin{array}{ccl}
\mathbb{E}[Y_t|\mathcal{G}(k)]&=&\mathbb{E}[(\delta_k-\mathbb{E}[\delta_k|\theta_k])\phi_k|\mathcal{G}(k)]\\
&=&\mathbb{E}[\delta_k\phi_k|\theta_k]
-\mathbb{E}[\mathbb{E}[\delta_k|\theta_k]\phi_k]\\
&=&\mathbb{E}[\delta_k\phi_k|\theta_k]
-\mathbb{E}[\delta_k|\theta_k]\mathbb{E}[\phi_k]\\
&=&\mathrm{Cov}(\delta_k|\theta_k,\phi_k),
\end{array}
\end{equation*}
where $\mathrm{Cov}(\cdot,\cdot)$ is a covariance operator.
Thus,
\begin{equation*}
\begin{array}{ccl}
Z_{k+1}&=&(\delta_k-\mathbb{E}[\delta_k|\theta_k])\phi_k-\mathrm{Cov}(\delta_k|\theta_k,\phi_k).
\end{array}
\end{equation*}
It is easy to verify that $Z_{k+1},k\geq0$ are integrable random variables that
satisfy $\mathbb{E}[Z_{k+1}|\mathcal{G}(k)]=0$, $\forall k\geq0$.
Also, because $\phi_k$, $r_k$, and $\phi_k'$ have
uniformly bounded second moments, it can be seen that for some constant
$c_2>0$, $\forall k\geq0$,
\begin{equation*}
\mathbb{E}[||Z_{k+1}||^2|\mathcal{G}(k)]\leq
c_2(1+||\theta_k||^2).
\end{equation*}
Consider now the following ODE associated with (\ref{theta}):
\begin{equation}
\begin{array}{ccl}
\dot{\theta}(t)&=&\mathrm{Cov}(\delta|\theta(t),\phi)\\
&=&\mathrm{Cov}(r+(\gamma\phi'-\phi)^{\top}\theta(t),\phi)\\
&=&\mathrm{Cov}(r,\phi)-\mathrm{Cov}(\theta(t)^{\top}(\phi-\gamma\phi'),\phi)\\
&=&\mathrm{Cov}(r,\phi)-\theta(t)^{\top}\mathrm{Cov}(\phi-\gamma\phi',\phi)\\
&=&\mathrm{Cov}(r,\phi)-\mathrm{Cov}(\phi-\gamma\phi',\phi)^{\top}\theta(t)\\
&=&\mathrm{Cov}(r,\phi)-\mathrm{Cov}(\phi,\phi-\gamma\phi')\theta(t)\\
&=&-A\theta(t)+b.
\end{array}
\label{odetheta}
\end{equation}
Let $\vec{h}(\theta(t))$ be the driving vector field of the ODE
(\ref{odetheta}).
\begin{equation*}
\vec{h}(\theta(t))=-A\theta(t)+b.
\end{equation*}
Consider the cross-covariance matrix,
\begin{equation}
\begin{array}{ccl}
A &=& \mathrm{Cov}(\phi,\phi-\gamma\phi')\\
&=&\frac{\mathrm{Cov}(\phi,\phi)+\mathrm{Cov}(\phi-\gamma\phi',\phi-\gamma\phi')-\mathrm{Cov}(\gamma\phi',\gamma\phi')}{2}\\
&=&\frac{\mathrm{Cov}(\phi,\phi)+\mathrm{Cov}(\phi-\gamma\phi',\phi-\gamma\phi')-\gamma^2\mathrm{Cov}(\phi',\phi')}{2}\\
&=&\frac{(1-\gamma^2)\mathrm{Cov}(\phi,\phi)+\mathrm{Cov}(\phi-\gamma\phi',\phi-\gamma\phi')}{2},\\
\end{array}
\label{covariance}
\end{equation}
where we eventually used $\mathrm{Cov}(\phi',\phi')=\mathrm{Cov}(\phi,\phi)$
\footnote{The covariance matrix $\mathrm{Cov}(\phi',\phi')$ is equal to
the covariance matrix $\mathrm{Cov}(\phi,\phi)$ if the initial state is re-reachable or
initialized randomly in a Markov chain for on-policy update.}.
Note that the covariance matrix $\mathrm{Cov}(\phi,\phi)$ and
$\mathrm{Cov}(\phi-\gamma\phi',\phi-\gamma\phi')$ are semi-positive
definite. Then, the matrix $A$ is semi-positive definite because $A$ is
linearly combined by two positive-weighted semi-positive definite matrice
(\ref{covariance}).
Furthermore, $A$ is nonsingular due to the assumption.
Hence, the cross-covariance matrix $A$ is positive definite.
Therefore,
$\theta^*=A^{-1}b$ can be seen to be the unique globally asymptotically
stable equilibrium for ODE (\ref{odetheta}).
Let $\vec{h}_{\infty}(\theta)=\lim_{r\rightarrow
\infty}\frac{\vec{h}(r\theta)}{r}$. Then
$\vec{h}_{\infty}(\theta)=-A\theta$ is well-defined.
Consider now the ODE
\begin{equation}
\dot{\theta}(t)=-A\theta(t).
\label{odethetafinal}
\end{equation}
The ODE (\ref{odethetafinal}) has the origin as its unique globally asymptotically stable equilibrium.
Thus, the assumption (A1) and (A2) are verified.
\end{proof}
Theorem 3 in \cite{dalal2020tale} provides a general conclusion on the convergence speed of all linear two-timescale
algorithms. VMTD satisfies the assumptions of this theorem, leading
to the following corollary.
\begin{corollary}
\label{corollary4_2}
Consider the Sparsely Projected variant of VMTD. Then, for $\alpha_k = 1/(k+1)^{\alpha}$, $\beta_k = 1/(k+1)^{\beta}$,
$0<\beta<\alpha<1$, $p>1$, with probility larger than $1- \tau$, for all $k\geq N_3$, we have
\begin{equation}
||\theta'_{k} - \theta^{*}|| \le C_{3,\theta} \frac{\sqrt{\ln (4d_{1}^{2}(k+1)^{p}/\tau)} }{(k+1)^{\alpha / 2}}
\end{equation}
\begin{equation}
||\omega'_{n} - \omega^{*}|| \le C_{3,\omega} \frac{\sqrt{\ln (4d_{2}^{2}(k+1)^{p}/\tau)} }{(k+1)^{\omega / 2}},
\end{equation}
\end{corollary}
where $d_1$ and $d_2$ represent the dimensions of $\theta$ and $\omega$, respectively. For VMTD, $d_2 =1$.
The meanings of $N_3$,$C_{3,\theta}$ and $C_{3,\omega}$ are explained in \cite{dalal2020tale}.
The formulas for $\theta'_{k}$ and $\omega'_{n}$ can be found in (\ref{sparseprojectiontheta}) and (\ref{sparseprojectionomega}).
\begin{theorem}
\label{theorem2}(Convergence of VMTDC).
In the case of off-policy learning, consider the iterations (\ref{omegavmtdc}), (\ref{uvmtdc}) and (\ref{thetavmtdc}) of VMTDC.
Let the step-size sequences $\alpha_k$, $\zeta_k$ and $\beta_k$, $k\geq 0$ satisfy in this case $\alpha_k,\zeta_k,\beta_k>0$, for all $k$,
$
\sum_{k=0}^{\infty}\alpha_k=\sum_{k=0}^{\infty}\beta_k=\sum_{k=0}^{\infty}\zeta_k=\infty,
$
$
\sum_{k=0}^{\infty}\alpha_k^2<\infty,
$
$
\sum_{k=0}^{\infty}\zeta_k^2<\infty,
$
$
\sum_{k=0}^{\infty}\beta_k^2<\infty,
$
and
$
\alpha_k = o(\zeta_k),
$
$
\zeta_k = o(\beta_k).
$
Assume that $(\phi_k,r_k,\phi_k')$ is an i.i.d. sequence with
uniformly bounded second moments.
Let $A = \mathrm{Cov}(\phi,\phi-\gamma\phi')$,
$b=\mathrm{Cov}(r,\phi)$, and $C=\mathbb{E}[\phi\phi^{\top}]$.
Assume that $A$ and $C$ are non-singular matrices.
Then the parameter vector $\theta_k$ converges with probability one
to $A^{-1}b$.
\end{theorem}
Please refer to the appendix \ref{proofth2} for detailed proof process.
\section{Experimental Studies}
This section assesses algorithm performance through experiments,
which are divided into policy evaluation experiments and control experiments.
\subsection{Testing Tasks}
\textbf{Random-walk:} as shown in Figure \ref{randomwalk}, all episodes
start in the center state, $C$, and proceed to left or right by one state on each
step, equiprobably. Episodes terminate either on the extreme left or
the extreme right, and get a reward of $+1$ if terminate on the right, or
$0$ in the other case. In this task, the true value for each state is the
probability of starting from that state and terminating on the right
\cite{Sutton2018book}.
Thus, the true values of states from $A$ to $E$ are
$\frac{1}{6},\frac{2}{6},\frac{3}{6},\frac{4}{6},\frac{5}{6}$, respectively.
The discount factor $\gamma=1.0$.
There are three standard kinds of features for random-walk problems: tabular
feature, inverted feature and dependent feature \cite{sutton2009fast}.
The feature matrices corresponding to three random walks are shown in Appendix \ref{experimentaldetails}.
Conduct experiments using
an on-policy approach in the Random-walk environment.
\begin{figure}
\begin{center}
\input{pic/randomwalk.tex}
% \captionsetup{width=0.5\textwidth}
\caption{Random walk.}
\label{randomwalk}
\end{center}
\end{figure}
\begin{figure}
\begin{center}
\input{pic/BairdExample.tex}
\caption{7-state version of Baird's off-policy counterexample.}
\label{bairdexample}
\end{center}
\end{figure}
\textbf{Baird's off-policy counterexample:} This task is well known as a
counterexample, in which TD diverges \cite{baird1995residual,sutton2009fast}. As
shown in Figure \ref{bairdexample}, reward for each transition is zero. Thus the true values are zeros for all states and for any given policy. The behaviour policy
chooses actions represented by solid lines with a probability of $\frac{1}{7}$
and actions represented by dotted lines with a probability of $\frac{6}{7}$. The
target policy is expected to choose the solid line with more probability than $\frac{1}{7}$,
and it chooses the solid line with probability of $1$ in this paper.
The discount factor $\gamma =0.99$, and the feature matrix is
defined in Appendix \ref{experimentaldetails} \cite{baird1995residual,sutton2009fast,maei2011gradient}.
\textbf{Maze}: The learning agent should find a shortest path from the upper
left corner to the lower right corner.
\begin{wrapfigure}{r}{4cm}
\centering
\includegraphics[scale=0.2]{pic/maze_13_13.pdf}
% \caption{The 2-state counterexample.}
\end{wrapfigure}
In each state,
there are four alternative actions: $up$, $down$, $left$, and $right$, which
takes the agent deterministically to the corresponding neighbour state,
except when a movement is blocked by an obstacle or the edge
of the maze. Rewards are $-1$ in all transitions until the
agent reaches the goal state.
The discount factor $\gamma=0.99$, and states $s$ are represented by tabular
features.The maximum number of moves in the game is set to 1000.
\textbf{The other three control environments}: Cliff Walking, Mountain Car, and Acrobot are
selected from the gym official website and correspond to the following
versions: ``CliffWalking-v0'', ``MountainCar-v0'' and ``Acrobot-v1''.
For specific details, please refer to the gym official website.
The maximum number of steps for the Mountain Car environment is set to 1000,
while the default settings are used for the other two environments. In Mountain car and Acrobot, features are generated by tile coding.
Please, refer to the Appendix \ref{experimentaldetails} for the selection of learning rates for all experiments.
\subsection{Experimental Results and Analysis}
\begin{figure}[htb]
\vskip 0.2in
\begin{center}
\subfigure[Dependent]{
\includegraphics[width=0.46\columnwidth, height=0.46\columnwidth]{pic/dependent_new.pdf}
\label{DependentFull}
}
\subfigure[Tabular]{
\includegraphics[width=0.46\columnwidth, height=0.46\columnwidth]{pic/tabular_new.pdf}
\label{TabularFull}
}
\\
\subfigure[Inverted]{
\includegraphics[width=0.46\columnwidth, height=0.46\columnwidth]{pic/inverted_new.pdf}
\label{InvertedFull}
}
\subfigure[counterexample]{
\includegraphics[width=0.46\columnwidth, height=0.46\columnwidth]{pic/counterexample_quanju_new.pdf}
\label{CounterExampleFull}
}
\caption{Learning curses of four evaluation environments.}
\label{Evaluation_full}
\end{center}
\vskip -0.2in
\end{figure}
\begin{figure*}[htb]
\vskip 0.2in
\begin{center}
\subfigure[Maze]{
\includegraphics[width=0.9\columnwidth, height=0.64\columnwidth]{pic/maze_complete.pdf}
\label{MazeFull}
}
\subfigure[Cliff Walking]{
\includegraphics[width=0.9\columnwidth, height=0.64\columnwidth]{pic/cw_complete.pdf}
\label{CliffWalkingFull}
}
\\
\subfigure[Mountain Car]{
\includegraphics[width=0.9\columnwidth, height=0.64\columnwidth]{pic/mt_complete.pdf}
\label{MountainCarFull}
}
\subfigure[Acrobot]{
\includegraphics[width=0.9\columnwidth, height=0.64\columnwidth]{pic/Acrobot_complete.pdf}
\label{AcrobotFull}
}
\caption{Learning curses of four contral environments.}
\label{Complete_full}
\end{center}
\vskip -0.2in
\end{figure*}
\begin{table*}
\centering
\caption{Difference between R-learning and tabular VMQ.}
\vskip 0.15in
\begin{tabular}{c|cc}
\hline
algorithms&update formula \\
\hline
R-learning&$Q_{k+1}(s,a)\leftarrow Q_{k}(s,a)+\alpha_k(r_{k+1}-m_{k}+ \max_{b\in A}Q_{k}(s,b) - Q_{k}(s,a))$\\
&$m_{k+1}\leftarrow m_{k}+\beta_k(r_{k+1}+\max_{b\in A}Q_{k}(s,b) - Q_{k}(s,a)-m_{k})$\\
tabular VMQ&$Q_{k+1}(s,a)\leftarrow Q_{k}(s,a)+\alpha_k(r_{k+1}+\gamma \max_{b\in A}Q_{k}(s,b) - Q_{k}(s,a)-\omega_k)$\\
&$\omega_{k+1}\leftarrow \omega_{k}+\beta_k(r_{k+1}+\gamma \max_{b\in A}Q_{k}(s,b) - Q_{k}(s,a)-\omega_{k})$\\
\hline
\end{tabular}
\label{differenceRandVMQ}
\vskip -0.1in
\end{table*}
% The learning rates of all algorithms in different environments are shown in Table \ref{lrofways}.
% Figure \ref{Complete_full} shows the experimental curves of different algorithms in four environments.
For policy evaluation experiments, compare the performance of the VMTD,
VMTDC, TD, and TDC algorithms.
The vertical axis is unified as RVBE.
For policy evaluation experiments, the criteria for evaluating
algorithms vary. The objective function minimized by our proposed
new algorithm differs from that of other algorithms. Therefore, to
ensure fairness in comparisons, this study only contrasts algorithm
experiments in controlled settings.
This study will compare the performance of Sarsa, Q-learning, GQ(0),
AC, VMSarsa, VMQ, and VMGQ(0) in four control environments.
% All experiments involved in this paper were run independently for 100 times.
The learning curves of the algorithms corresponding to
policy evaluation experiments and control experiments are
shown in Figures \ref{Evaluation_full} and \ref{Complete_full}, respectively.
The shaded area in Figure \ref{Evaluation_full}, \ref{Complete_full} represents the standard deviation (std).
In the random-walk tasks, VMTD and VMTDC exhibit excellent performance,
outperforming TD and TDC in the case of dependent random-walk.
In the 7-state example counter task, TD diverges,
while VMTDC converges and performs better than TDC.
From the update formula, it can be observed that the VMTD algorithm, like TDC,
is also an adjustment or correction of the TD update.
What is more surprising is that VMTD also maintains
convergence and demonstrates the best performance.
In Maze, Mountain Car, and Acrobot,
the convergence speed of VMSarsa, VMQ, and VMGQ(0) has
been significantly improved compared to Sarsa, Q-learning,
and GQ(0), respectively. The performance of the AC algorithm
is at an intermediate level. The performances of VMSarsa,
VMQ, and VMGQ(0) in these three experimental environments
have no significant differences.
In Cliff Walking, Sarsa and
VMSarsa converge to slightly worse solutions compared to
other algorithms. The convergence speed of VMSarsa is significantly
better than that of Sarsa. The convergence speed of VMGQ(0) and VMQ
is better than other algorithms, and the performance of VMGQ(0) is
slightly better than that of VMQ.
In summary, the performance of VMSarsa,
VMQ, and VMGQ(0) is better than that of other algorithms.
In the Cliff Walking environment,
the performance of VMGQ(0) is slightly better than that of
VMSarsa and VMQ. In the other three experimental environments,
the performances of VMSarsa, VMQ, and VMGQ(0) are close.
\section{Related Work}
\subsection{Difference between VMQ and R-learning}
Tabular VMQ's update formula bears some resemblance
to R-learning's update formula. As shown in Table \ref{differenceRandVMQ}, the update formulas of the two algorithms have the following differences:
\\(1) The goal of the R-learning algorithm \cite{schwartz1993reinforcement} is to maximize the average
reward, rather than the cumulative reward, by learning an estimate
of the average reward. This estimate $m$ is then used to update the Q-values.
On the contrary, the $\omega$ in the tabular VMQ update formula eventually converges to $\mathbb{E}[\delta]$.
\\(2) When $\gamma=1$ in the tabular VMQ update formula, the
R-learning update formula is formally
the same as the tabular VMQ update formula.
Therefore, R-learning algorithm can be
considered as a special case of VMQ algorithm in form.
\subsection{Variance Reduction for TD Learning}
The TD with centering algorithm (CTD) \cite{korda2015td}
was proposed, which directly applies variance reduction techniques to
the TD algorithm. The CTD algorithm updates its parameters using the
average gradient of a batch of Markovian samples and a projection operator.
Unfortunately, the authors’ analysis of the CTD algorithm contains technical
errors. The VRTD algorithm \cite{xu2020reanalysis} is also a variance-reduced algorithm that updates
its parameters using the average gradient of a batch of i.i.d. samples. The
authors of VRTD provide a technically sound analysis to demonstrate the
advantages of variance reduction.
\subsection{Variance Reduction for Policy Gradient Algorithms}
Policy gradient algorithms are a class of reinforcement
learning algorithms that directly optimize cumulative rewards.
REINFORCE is a Monte Carlo algorithm that estimates
gradients through sampling, but may have a high variance.
Baselines are introduced to reduce variance and to
accelerate learning \cite{Sutton2018book}. In Actor-Critic,
value function as a baseline and bootstrapping
are used to reduce variance, also accelerating convergence \cite{Sutton2018book}.
TRPO \cite{schulman2015trust} and PPO \cite{schulman2017proximal}
use generalized advantage
estimation, which combines multi-step bootstrapping and Monte Carlo
estimation to reduce variance, making gradient estimation more stable and
accelerating convergence.
In Variance Minimization,
the incorporation of $\omega \doteq \mathbb{E}[\delta]$
bears a striking resemblance to the use of a baseline
in policy gradient methods. The introduction of a baseline
in policy gradient techniques does not alter
the expected value of the update;
rather, it significantly impacts the variance of gradient estimation.
The addition of $\omega \doteq \mathbb{E}[\delta]$ in Variance Minimization
preserves the invariance of the optimal
policy while stabilizing gradient estimation,
reducing the variance of gradient estimation,
and hastening convergence.
\section{Conclusion and Future Work}
Value-based reinforcement learning typically aims
to minimize error as an optimization objective.
As an alternation, this study proposes two new objective
functions: VBE and VPBE, and derives an on-policy algorithm:
VMTD and an off-policy algorithm: VMTDC.
% The VMTD algorithm
% is essentially an adjustment or correction to the traditional
% TD update.
% Both
% algorithms are capable of stabilizing gradient estimation, reducing
% the variance of gradient estimation and accelerating convergence.
Both algorithms demonstrated superior performance in policy
evaluation and control experiments.
Future work may include, but are not limited
to, (1) analysis of the convergence rate of VMTDC.
(2) extensions of VBE and VPBE to multi-step returns.
(3) extensions to nonlinear approximations, such as neural networks.
% \section{Format of the Paper}
% All submissions must follow the specified format.
% \begin{figure}[ht]
% \vskip 0.2in
% \begin{center}
% \centerline{\includegraphics[width=\columnwidth]{icml_numpapers}}
% \caption{Historical locations and number of accepted papers for International
% Machine Learning Conferences (ICML 1993 -- ICML 2008) and International
% Workshops on Machine Learning (ML 1988 -- ML 1992). At the time this figure was
% produced, the number of accepted papers for ICML 2008 was unknown and instead
% estimated.}
% \label{icml-historical}
% \end{center}
% \vskip -0.2in
% \end{figure}
% \subsection{Figures}
% You may want to include figures in the paper to illustrate
% your approach and results. Such artwork should be centered,
% legible, and separated from the text. Lines should be dark and at
% least 0.5~points thick for purposes of reproduction, and text should
% not appear on a gray background.
% Label all distinct components of each figure. If the figure takes the
% form of a graph, then give a name for each axis and include a legend
% that briefly describes each curve. Do not include a title inside the
% figure; instead, the caption should serve this function.
% Number figures sequentially, placing the figure number and caption
% \emph{after} the graphics, with at least 0.1~inches of space before
% the caption and 0.1~inches after it, as in
% \cref{icml-historical}. The figure caption should be set in
% 9~point type and centered unless it runs two or more lines, in which
% case it should be flush left. You may float figures to the top or
% bottom of a column, and you may set wide figures across both columns
% (use the environment \texttt{figure*} in \LaTeX). Always place
% two-column figures at the top or bottom of the page.
% \subsection{Theorems and such}
% The preferred way is to number definitions, propositions, lemmas, etc. consecutively, within sections, as shown below.
% \begin{definition}
% \label{def:inj}
% A function $f:X \to Y$ is injective if for any $x,y\in X$ different, $f(x)\ne f(y)$.
% \end{definition}
% Using \cref{def:inj} we immediate get the following result:
% \begin{proposition}
% If $f$ is injective mapping a set $X$ to another set $Y$,
% the cardinality of $Y$ is at least as large as that of $X$
% \end{proposition}
% \begin{proof}
% Left as an exercise to the reader.
% \end{proof}
% \cref{lem:usefullemma} stated next will prove to be useful.
% \begin{lemma}
% \label{lem:usefullemma}
% For any $f:X \to Y$ and $g:Y\to Z$ injective functions, $f \circ g$ is injective.
% \end{lemma}
% \begin{theorem}
% \label{thm:bigtheorem}
% If $f:X\to Y$ is bijective, the cardinality of $X$ and $Y$ are the same.
% \end{theorem}
% An easy corollary of \cref{thm:bigtheorem} is the following:
% \begin{corollary}
% If $f:X\to Y$ is bijective,
% the cardinality of $X$ is at least as large as that of $Y$.
% \end{corollary}
% \begin{assumption}
% The set $X$ is finite.
% \label{ass:xfinite}
% \end{assumption}
% \begin{remark}
% According to some, it is only the finite case (cf. \cref{ass:xfinite}) that is interesting.
% \end{remark}
%restatable
% In the unusual situation where you want a paper to appear in the
% references without citing it in the main text, use \nocite
\nocite{langley00}
\bibliography{example_paper}
\bibliographystyle{icml2024}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% APPENDIX
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newpage
\appendix
\onecolumn
\section{Relevant proofs}
\subsection{Proof of Corollary \ref{corollary4_2}}
\label{proofcorollary4_2}
The update formulas in linear two-timescale algorithms are as follows:
\begin{equation}
\theta_{k+1}=\theta_{k} + \alpha_{k}[h_1(\theta_{k},\omega_{k})+M^{(1)}_{k+1}],
\end{equation}
\begin{equation}
\omega_{k+1}=\omega_{k} + \alpha_{k}[h_2(\theta_{k},\omega_{k})+M^{(2)}_{k+1}].
\end{equation}
where $\alpha_k, \beta_k \in \mathbb{R} $ are stepsizes and $M^{(1)} \in \mathbb{R}^{d_1}, M^{(2)} \in \mathbb{R}^{d_2}$
denote noise.
$h_1 : \mathbb{R}^{d_{1}}\times \mathbb{R}^{d_{2}}\rightarrow \mathbb{R}^{d_{1}}$ and
$h_2 : \mathbb{R}^{d_{1}}\times \mathbb{R}^{d_{2}}\rightarrow \mathbb{R}^{d_{2}}$ have the
form, respectively,
\begin{equation}
h_{1}(\theta,\omega)=v_1 - \Gamma_1 \theta - W_1\omega,
\end{equation}
\begin{equation}
h_{2}(\theta,\omega)=v_2 - \Gamma_2 \theta - W_2\omega,
\end{equation}
where $v_1 \in \mathbb{R}^{d_1}$, $v_2 \in \mathbb{R}^{d_2}$, $\Gamma_1 \in \mathbb{R}^{d_1 \times d_1}$
, $\Gamma_2 \in \mathbb{R}^{d_2 \times d_1}$, $W_1 \in \mathbb{R}^{d_1 \times d_2}$ and
$W_2 \in \mathbb{R}^{d_2 \times d_2}$. $d_1$ and $d_2$ are the dimensions of vectors $\theta$ and $\omega$, respectively.
For Theorem 3 in \cite{dalal2020tale}, the theorem still holds even when $d——1$ is not equal to $d_2$. For the VMTD algorithm, $d_2$ is equal to 1.
% Before proving the Corollary \ref{corollary4_2},
\cite{dalal2020tale} presents
the matrix assumption, step size assumption, and
defines sparse projection.
\begin{assumption}
\label{matrixassumption}
(Matrix Assumption).
$W_2$ and $X_1 = \Gamma_1 - W_1 W_{2}^{-1}\Gamma_2$ are positive definite(not necessarily symmetric).
\end{assumption}
\begin{assumption}
\label{stepsizeassumption}
(Step Size Assumption).
$\alpha_k = (k+1)^{-\alpha}$ and $\beta_k = (k+1)^{-\beta}$, where $1>\alpha > \beta > 0$.
\end{assumption}
\begin{definition}
\label{sparseprojection}
(Sparse Projection).
For $R>0$, let $\Pi_{R}(x)=\min \{1, R/||x||\}$. $x$ be the projection into the ball with redius
R around the origin. The sparse projection operator
\begin{equation*}
\Pi_{n, R} = \begin{cases}
\Pi_{R}, & \text{if } k = n^{n} - 1 \text{ for some } n \in \mathbb{Z}_{>0}, \\
I, & \text{otherwise}.
\end{cases}
\end{equation*}
We call it sparse as it projects only on specific indices that are exponentially far apart.
Pick an arbitrary $p>1$. Fix some constant $R^{\theta}_{\text{proj}}>0$ and $R^{\omega}_{\text{proj}}>0$
for the radius of the projection ball. Further, let
\begin{equation*}
\theta^{*}=X^{-1}_{1}b_{1}, \omega^{*}=W^{-1}_{2}(v_2 - \Gamma_2 \theta^{*})
\end{equation*}
with $b_1=v_1 - W_1 W_2^{-1}v_2$.
The formula for the sparse projection update in linear two-timescale algorithms is as follows:
\begin{equation}
\label{sparseprojectiontheta}
\theta'_{k+1}=\Pi_{k+1,R^{\theta}_{\text{proj}}}(\theta'_{k} + \alpha_{k}[h_1(\theta'_{k},\omega'_{k})+M^{(1')}_{k+1}]),
\end{equation}
\begin{equation}
\label{sparseprojectionomega}
\omega'_{k+1}=\Pi_{k+1,R^{\omega}_{\text{proj}}}(\omega'_{k} + \beta_{k}[h_2(\theta'_{k},\omega'_{k})+M^{(2')}_{k+1}]).
\end{equation}
\end{definition}
\begin{proof}
As long as the VMTD algorithm satisfies Assumption \ref{matrixassumption},
the convergence speed of the VMTD algorithm can be
obtained.
VMTD's update rule is
\begin{equation*}
\theta_{k+1}=\theta_{k}+\alpha_k(\delta_k-\omega_k)\phi_k.
\end{equation*}
\begin{equation*}
\omega_{k+1}=\omega_{k}+\beta_k(\delta_k-\omega_k).
\end{equation*}
Thus, $h_1(\theta, \omega)=\mathrm{Cov}(r,\phi)-\mathrm{Cov}(\phi,\phi - \gamma\phi')\theta$,
$h_2(\theta, \omega)=\mathbb{E}[r]+\mathbb{E}[\gamma \phi'^{\top}-\phi^{\top}]\theta -\omega$,
$\Gamma_1 =\mathrm{Cov}(\phi,\phi - \gamma\phi')$,
$W_1 = 0$ and
$\Gamma_2 = -\mathbb{E}[\gamma \phi'^{\top}-\phi^{\top}]$,
$W_2 = 1$,
$v_2 = \mathbb{E}[r]$. Additionally,
$X_1=\Gamma_1 - W_1 W^{-1}_2 \Gamma_2 = \mathrm{Cov}(\phi,\phi - \gamma\phi')$.
% By the Assumption \ref{matrixassumption},
It can be deduced from the proof \ref{th1proof} that $X_1$ is a positive definite matrix.
The VMTD algorithm satisfies the Assumption \ref{matrixassumption}.
By the proof \ref{th1proof}, Definition 1 in \cite{dalal2020tale} is satisfied.
We can apply the Theorem 3 in \cite{dalal2020tale} to get the Corollary \ref{corollary4_2}.
\end{proof}
\subsection{Proof of Theorem \ref{theorem2}}
\label{proofth2}
\begin{proof}
The proof is similar to that given by \cite{sutton2009fast} for TDC, but it is based on multi-time-scale stochastic approximation.
For the VMTDC algorithm, a new one-step linear TD solution is defined as:
\begin{equation*}
0=\mathbb{E}[(\phi - \gamma \phi' - \mathbb{E}[\phi - \gamma \phi'])\phi^\top]\mathbb{E}[\phi \phi^{\top}]^{-1}\mathbb{E}[(\delta -\mathbb{E}[\delta])\phi]=A^{\top}C^{-1}(-A\theta+b).
\end{equation*}
The matrix $A^{\top}C^{-1}A$ is positive definite. Thus, the VMTD's solution is
$\theta_{\text{VMTDC}}=\theta_{\text{VMTD}}=A^{-1}b$.
First, note that recursion (\ref{thetavmtdc}) and (\ref{uvmtdc}) can be rewritten as, respectively,
\begin{equation*}
\theta_{k+1}\leftarrow \theta_k+\zeta_k x(k),
\end{equation*}
\begin{equation*}
u_{k+1}\leftarrow u_k+\beta_k y(k),
\end{equation*}
where
\begin{equation*}
x(k)=\frac{\alpha_k}{\zeta_k}[(\delta_{k}- \omega_k) \phi_k - \gamma\phi'_{k}(\phi^{\top}_k u_k)],
\end{equation*}
\begin{equation*}
y(k)=\frac{\zeta_k}{\beta_k}[\delta_{k}-\omega_k - \phi^{\top}_k u_k]\phi_k.
\end{equation*}
Recursion (\ref{thetavmtdc}) can also be rewritten as
\begin{equation*}
\theta_{k+1}\leftarrow \theta_k+\beta_k z(k),
\end{equation*}
where
\begin{equation*}
z(k)=\frac{\alpha_k}{\beta_k}[(\delta_{k}- \omega_k) \phi_k - \gamma\phi'_{k}(\phi^{\top}_k u_k)],
\end{equation*}
Due to the settings of step-size schedule
$\alpha_k = o(\zeta_k)$, $\zeta_k = o(\beta_k)$, $x(k)\rightarrow 0$, $y(k)\rightarrow 0$, $z(k)\rightarrow 0$ almost surely as $k\rightarrow 0$.
That is that the increments in iteration (\ref{omegavmtdc}) are uniformly larger than
those in (\ref{uvmtdc}) and the increments in iteration (\ref{uvmtdc}) are uniformly larger than
those in (\ref{thetavmtdc}), thus (\ref{omegavmtdc}) is the fastest recursion, (\ref{uvmtdc}) is the second fast recursion and (\ref{thetavmtdc}) is the slower recursion.
Along the fastest time scale, iterations of (\ref{thetavmtdc}), (\ref{uvmtdc}) and (\ref{omegavmtdc})
are associated to ODEs system as follows:
\begin{equation}
\dot{\theta}(t) = 0,
\label{thetavmtdcFastest}
\end{equation}
\begin{equation}
\dot{u}(t) = 0,
\label{uvmtdcFastest}
\end{equation}
\begin{equation}
\dot{\omega}(t)=\mathbb{E}[\delta_t|u(t),\theta(t)]-\omega(t).
\label{omegavmtdcFastest}
\end{equation}
Based on the ODE (\ref{thetavmtdcFastest}) and (\ref{uvmtdcFastest}), both $\theta(t)\equiv \theta$
and $u(t)\equiv u$ when viewed from the fastest timescale.
By the Hirsch lemma \cite{hirsch1989convergent}, it follows that
$||\theta_k-\theta||\rightarrow 0$ a.s. as $k\rightarrow \infty$ for some
$\theta$ that depends on the initial condition $\theta_0$ of recursion
(\ref{thetavmtdc}) and $||u_k-u||\rightarrow 0$ a.s. as $k\rightarrow \infty$ for some
$u$ that depends on the initial condition $u_0$ of recursion
(\ref{uvmtdc}). Thus, the ODE pair (\ref{thetavmtdcFastest})-(ref{omegavmtdcFastest})
can be written as
\begin{equation}
\dot{\omega}(t)=\mathbb{E}[\delta_t|u,\theta]-\omega(t).
\label{omegavmtdcFastestFinal}
\end{equation}
Consider the function $h(\omega)=\mathbb{E}[\delta|\theta,u]-\omega$,
i.e., the driving vector field of the ODE (\ref{omegavmtdcFastestFinal}).
It is easy to find that the function $h$ is Lipschitz with coefficient
$-1$.
Let $h_{\infty}(\cdot)$ be the function defined by
$h_{\infty}(\omega)=\lim_{r\rightarrow \infty}\frac{h(r\omega)}{r}$.
Then $h_{\infty}(\omega)= -\omega$, is well-defined.
For (\ref{omegavmtdcFastestFinal}), $\omega^*=\mathbb{E}[\delta|\theta,u]$
is the unique globally asymptotically stable equilibrium.
For the ODE
\begin{equation}
\dot{\omega}(t) = h_{\infty}(\omega(t))= -\omega(t),
\label{omegavmtdcInfty}
\end{equation}
apply $\vec{V}(\omega)=(-\omega)^{\top}(-\omega)/2$ as its
associated strict Liapunov function. Then,
the origin of (\ref{omegavmtdcInfty}) is a globally asymptotically stable
equilibrium.
Consider now the recursion (\ref{omegavmtdc}).
Let
$M_{k+1}=(\delta_k-\omega_k)
-\mathbb{E}[(\delta_k-\omega_k)|\mathcal{F}(k)]$,
where $\mathcal{F}(k)=\sigma(\omega_l,u_l,\theta_l,l\leq k;\phi_s,\phi_s',r_s,s<k)$,
$k\geq 1$ are the sigma fields
generated by $\omega_0,u_0,\theta_0,\omega_{l+1},u_{l+1},\theta_{l+1},\phi_l,\phi_l'$,
$0\leq l<k$.
It is easy to verify that $M_{k+1},k\geq0$ are integrable random variables that
satisfy $\mathbb{E}[M_{k+1}|\mathcal{F}(k)]=0$, $\forall k\geq0$.
Because $\phi_k$, $r_k$, and $\phi_k'$ have
uniformly bounded second moments, it can be seen that for some constant
$c_1>0$, $\forall k\geq0$,
\begin{equation*}
\mathbb{E}[||M_{k+1}||^2|\mathcal{F}(k)]\leq
c_1(1+||\omega_k||^2+||u_k||^2+||\theta_k||^2).
\end{equation*}
Now Assumptions (A1) and (A2) of \cite{borkar2000ode} are verified.
Furthermore, Assumptions (TS) of \cite{borkar2000ode} is satisfied by our
conditions on the step-size sequences $\alpha_k$,$\zeta_k$, $\beta_k$. Thus,
by Theorem 2.2 of \cite{borkar2000ode} we obtain that
$||\omega_k-\omega^*||\rightarrow 0$ almost surely as $k\rightarrow \infty$.
Consider now the second time scale recursion (\ref{uvmtdc}).
Based on the above analysis, (\ref{uvmtdc}) can be rewritten as
% \begin{equation*}
% u_{k+1}\leftarrow u_{k}+\zeta_{k}[\delta_{k}-\mathbb{E}[\delta_k|u_k,\theta_k] - \phi^{\top} (s_k) u_k]\phi(s_k).
% \end{equation*}
\begin{equation}
\dot{\theta}(t) = 0,
\label{thetavmtdcFaster}
\end{equation}
\begin{equation}
\dot{u}(t) = \mathbb{E}[(\delta_t-\mathbb{E}[\delta_t|u(t),\theta(t)])\phi_t|\theta(t)] - Cu(t).
\label{uvmtdcFaster}
\end{equation}
The ODE (\ref{thetavmtdcFaster}) suggests that $\theta(t)\equiv \theta$ (i.e., a time invariant parameter)
when viewed from the second fast timescale.
By the Hirsch lemma \cite{hirsch1989convergent}, it follows that
$||\theta_k-\theta||\rightarrow 0$ a.s. as $k\rightarrow \infty$ for some
$\theta$ that depends on the initial condition $\theta_0$ of recursion
(\ref{thetavmtdc}).
Consider now the recursion (\ref{uvmtdc}).
Let
$N_{k+1}=((\delta_k-\mathbb{E}[\delta_k]) - \phi_k \phi^{\top}_k u_k) -\mathbb{E}[((\delta_k-\mathbb{E}[\delta_k]) - \phi_k \phi^{\top}_k u_k)|\mathcal{I} (k)]$,
where $\mathcal{I}(k)=\sigma(u_l,\theta_l,l\leq k;\phi_s,\phi_s',r_s,s<k)$,
$k\geq 1$ are the sigma fields
generated by $u_0,\theta_0,u_{l+1},\theta_{l+1},\phi_l,\phi_l'$,
$0\leq l<k$.
It is easy to verify that $N_{k+1},k\geq0$ are integrable random variables that
satisfy $\mathbb{E}[N_{k+1}|\mathcal{I}(k)]=0$, $\forall k\geq0$.
Because $\phi_k$, $r_k$, and $\phi_k'$ have
uniformly bounded second moments, it can be seen that for some constant
$c_2>0$, $\forall k\geq0$,
\begin{equation*}
\mathbb{E}[||N_{k+1}||^2|\mathcal{I}(k)]\leq
c_2(1+||u_k||^2+||\theta_k||^2).
\end{equation*}
Because $\theta(t)\equiv \theta$ from (\ref{thetavmtdcFaster}), the ODE pair (\ref{thetavmtdcFaster})-(\ref{uvmtdcFaster})
can be written as
\begin{equation}
\dot{u}(t) = \mathbb{E}[(\delta_t-\mathbb{E}[\delta_t|\theta])\phi_t|\theta] - Cu(t).
\label{uvmtdcFasterFinal}
\end{equation}
Now consider the function $h(u)=\mathbb{E}[\delta_t-\mathbb{E}[\delta_t|\theta]|\theta] -Cu$, i.e., the
driving vector field of the ODE (\ref{uvmtdcFasterFinal}). For (\ref{uvmtdcFasterFinal}),
$u^* = C^{-1}\mathbb{E}[(\delta-\mathbb{E}[\delta|\theta])\phi|\theta]$ is the unique globally asymptotically
stable equilibrium. Let $h_{\infty}(u)=-Cu$.
For the ODE
\begin{equation}
\dot{u}(t) = h_{\infty}(u(t))= -Cu(t),
\label{uvmtdcInfty}
\end{equation}
the origin of (\ref{uvmtdcInfty}) is a globally asymptotically stable
equilibrium because $C$ is a positive definite matrix (because it is nonnegative definite and nonsingular).
Now Assumptions (A1) and (A2) of \cite{borkar2000ode} are verified.
Furthermore, Assumptions (TS) of \cite{borkar2000ode} is satisfied by our
conditions on the step-size sequences $\alpha_k$,$\zeta_k$, $\beta_k$. Thus,
by Theorem 2.2 of \cite{borkar2000ode} we obtain that
$||u_k-u^*||\rightarrow 0$ almost surely as $k\rightarrow \infty$.
Consider now the slower timescale recursion (\ref{thetavmtdc}). In the light of the above,
(\ref{thetavmtdc}) can be rewritten as
\begin{equation}
\theta_{k+1} \leftarrow \theta_{k} + \alpha_k (\delta_k -\mathbb{E}[\delta_k|\theta_k]) \phi_k\\
- \alpha_k \gamma\phi'_{k}(\phi^{\top}_k C^{-1}\mathbb{E}[(\delta_k -\mathbb{E}[\delta_k|\theta_k])\phi|\theta_k]).
\end{equation}
Let $\mathcal{G}(k)=\sigma(\theta_l,l\leq k;\phi_s,\phi_s',r_s,s<k)$,
$k\geq 1$ be the sigma fields
generated by $\theta_0,\theta_{l+1},\phi_l,\phi_l'$,
$0\leq l<k$. Let
\begin{equation*}
\begin{array}{ccl}
Z_{k+1}&=&((\delta_k -\mathbb{E}[\delta_k|\theta_k]) \phi_k - \gamma \phi'_{k}\phi^{\top}_k C^{-1}\mathbb{E}[(\delta_k -\mathbb{E}[\delta_k|\theta_k])\phi|\theta_k])\\
& &-\mathbb{E}[((\delta_k -\mathbb{E}[\delta_k|\theta_k]) \phi_k - \gamma \phi'_{k}\phi^{\top}_k C^{-1}\mathbb{E}[(\delta_k -\mathbb{E}[\delta_k|\theta_k])\phi|\theta_k])|\mathcal{G}(k)]\\
&=&((\delta_k -\mathbb{E}[\delta_k|\theta_k]) \phi_k - \gamma \phi'_{k}\phi^{\top}_k C^{-1}\mathbb{E}[(\delta_k -\mathbb{E}[\delta_k|\theta_k])\phi|\theta_k])\\
& &-\mathbb{E}[(\delta_k -\mathbb{E}[\delta_k|\theta_k]) \phi_k|\theta_k] - \gamma\mathbb{E}[\phi' \phi^{\top}]C^{-1}\mathbb{E}[(\delta_k -\mathbb{E}[\delta_k|\theta_k]) \phi_k|\theta_k].
\end{array}
\end{equation*}
It is easy to see that $Z_k$, $k\geq 0$ are integrable random variables and $\mathbb{E}[Z_{k+1}|\mathcal{G}(k)]=0$, $\forall k\geq0$. Further,
\begin{equation*}
\mathbb{E}[||Z_{k+1}||^2|\mathcal{G}(k)]\leq
c_3(1+||\theta_k||^2), k\geq 0
\end{equation*}
for some constant $c_3 \geq 0$, again beacuse $\phi_k$, $r_k$, and $\phi_k'$ have
uniformly bounded second moments, it can be seen that for some constant.
Consider now the following ODE associated with (\ref{thetavmtdc}):
\begin{equation}
\dot{\theta}(t) = (I - \mathbb{E}[\gamma \phi' \phi^{\top}]C^{-1})\mathbb{E}[(\delta -\mathbb{E}[\delta|\theta(t)]) \phi|\theta(t)].
\label{thetavmtdcSlowerFinal}
\end{equation}
Let
\begin{equation*}
\begin{array}{ccl}
\vec{h}(\theta(t))&=&(I - \mathbb{E}[\gamma \phi' \phi^{\top}]C^{-1})\mathbb{E}[(\delta -\mathbb{E}[\delta|\theta(t)]) \phi|\theta(t)]\\
&=&(C - \mathbb{E}[\gamma \phi' \phi^{\top}])C^{-1}\mathbb{E}[(\delta -\mathbb{E}[\delta|\theta(t)]) \phi|\theta(t)]\\
&=& (\mathbb{E}[\phi \phi^{\top}] - \mathbb{E}[\gamma \phi' \phi^{\top}])C^{-1}\mathbb{E}[(\delta -\mathbb{E}[\delta|\theta(t)]) \phi|\theta(t)]\\
&=& A^{\top}C^{-1}(-A\theta(t)+b),
\end{array}
\end{equation*}
because $\mathbb{E}[(\delta -\mathbb{E}[\delta|\theta(t)]) \phi|\theta(t)]=-A\theta(t)+b$, where
$A = \mathrm{Cov}(\phi,\phi-\gamma\phi')$, $b=\mathrm{Cov}(r,\phi)$, and $C=\mathbb{E}[\phi\phi^{\top}]$
Therefore,
$\theta^*=A^{-1}b$ can be seen to be the unique globally asymptotically
stable equilibrium for ODE (\ref{thetavmtdcSlowerFinal}).
Let $\vec{h}_{\infty}(\theta)=\lim_{r\rightarrow
\infty}\frac{\vec{h}(r\theta)}{r}$. Then
$\vec{h}_{\infty}(\theta)=-A^{\top}C^{-1}A\theta$ is well-defined.
Consider now the ODE
\begin{equation}
\dot{\theta}(t)=-A^{\top}C^{-1}A\theta(t).
\label{odethetavmtdcfinal}
\end{equation}
Because $C^{-1}$ is positive definite and $A$ has full rank (as it
is nonsingular by assumption), the matrix $A^{\top} C^{-1}A$ is also
positive definite.
The ODE (\ref{odethetavmtdcfinal}) has the origin as its unique globally asymptotically stable equilibrium.
Thus, the assumption (A1) and (A2) are verified.
The proof is given above.
In the fastest time scale, the parameter $w$ converges to
$\mathbb{E}[\delta|u_k,\theta_k]$.
In the second fast time scale,
the parameter $u$ converges to $C^{-1}\mathbb{E}[(\delta-\mathbb{E}[\delta|\theta_k])\phi|\theta_k]$.
In the slower time scale,
the parameter $\theta$ converges to $A^{-1}b$.
\end{proof}
\begin{algorithm}[t]
\caption{VMTDC algorithm with linear function approximation in the off-policy setting}
\label{alg:algorithm 2}
\begin{algorithmic}
\STATE {\bfseries Input:} $\theta_{0}$, $u_0$, $\omega_{0}$, $\gamma
$, learning rate $\alpha_t$, $\zeta_t$ and $\beta_t$, behavior policy $\mu$ and target policy $\pi$
\REPEAT
\STATE For any episode, initialize $\theta_{0}$ arbitrarily, $u_t$ and $\omega_{0}$ to $0$, $\gamma \in (0,1]$, and $\alpha_t$, $\zeta_t$ and $\beta_t$ are constant.\\
\textbf{Output}: $\theta^*$.\\
\FOR{$t=0$ {\bfseries to} $T-1$}
\STATE Take $A_t$ from $S_t$ according to $\mu$, and arrive at $S_{t+1}$\\
\STATE Observe sample ($S_t$,$R_{t+1}$,$S_{t+1}$) at time step $t$ (with their corresponding state feature vectors)\\
\STATE $\delta_t = R_{t+1}+\gamma\theta_t^{\top}\phi_{t+1}-\theta_t^{\top}\phi_t$
\STATE $\rho_{t} \leftarrow \frac{\pi(A_t | S_t)}{\mu(A_t | S_t)}$
\STATE $\theta_{t+1}\leftarrow \theta_{t}+\alpha_t[\rho_t (\delta_t-\omega_t)\phi_t - \gamma \phi_{t+1}(\phi^{\top}_{t} u_t)]$
\STATE $u_{t+1}\leftarrow u_{t}+\zeta_t[\rho_t(\delta_t-\omega_t) - \phi^{\top}_{t} u_t] \phi_t$
\STATE $\omega_{t+1}\leftarrow \omega_{t}+\beta_t \rho_t(\delta_t-\omega_t)$
\STATE $S_t=S_{t+1}$
\ENDFOR
\UNTIL{terminal episode}
\end{algorithmic}
\end{algorithm}
\section{Experimental details}
\label{experimentaldetails}
The feature matrices corresponding to three random walks are shown below respectively:
\begin{equation*}
\Phi_{tabular}=\left[
\begin{array}{ccccc}
1 & 0& 0& 0& 0\\
0 & 1& 0& 0& 0\\
0 & 0& 1& 0& 0\\
0 & 0& 0& 1& 0\\
0 & 0& 0& 0& 1
\end{array}\right]
\end{equation*}
\begin{equation*}
\Phi_{inverted}=\left[
\begin{array}{ccccc}
0 & \frac{1}{2}& \frac{1}{2}& \frac{1}{2}& \frac{1}{2}\\
\frac{1}{2} & 0& \frac{1}{2}& \frac{1}{2}& \frac{1}{2}\\
\frac{1}{2} & \frac{1}{2}& 0& \frac{1}{2}& \frac{1}{2}\\
\frac{1}{2} & \frac{1}{2}& \frac{1}{2}& 0& \frac{1}{2}\\
\frac{1}{2} & \frac{1}{2}& \frac{1}{2}& \frac{1}{2}& 0
\end{array}\right]
\end{equation*}
\begin{equation*}
\Phi_{dependent}=\left[
\begin{array}{ccccc}
1 & 0& 0\\
\frac{1}{\sqrt{2}} & \frac{1}{\sqrt{2}}& 0\\
\frac{1}{\sqrt{3}} & \frac{1}{\sqrt{3}}& \frac{1}{\sqrt{3}}\\
0 & \frac{1}{\sqrt{2}}& \frac{1}{\sqrt{2}}\\
0 & 0& 1
\end{array}\right]
\end{equation*}
Three random walk experiments: the $\alpha$ values for
all algorithms are in the range of $\{0.008, 0.015, 0.03, 0.06, 0.12, 0.25, 0.5\}$. For the TDC algorithm,
the range of the ratio $\frac{\zeta}{\alpha}$ is $\{\frac{1}{512}, \frac{1}{256}, \frac{1}{128}, \frac{1}{64}, \frac{1}{32}, \frac{1}{16}, \frac{1}{8}, \frac{1}{4}, \frac{1}{2}, 1, 2\}$. For the VMTD algorithm,
the range of the ratio $\frac{\beta}{\alpha}$ is $\{\frac{1}{512}, \frac{1}{256}, \frac{1}{128}, \frac{1}{64}, \frac{1}{32}, \frac{1}{16}, \frac{1}{8}, \frac{1}{4}, \frac{1}{2}, 1, 2\}$. It can be observed from
the update formula of VMTDC that when $\zeta$ takes a very small value,
the VMTDC update tends to be similar to VMTD update. Similarly,
when $\beta$ takes a very small value, the VMTDC update tends to be
similar to TDC update. Through experiments, it was found that
setting $\zeta$ to a small value makes VMTDC updates approach VMTD
updates, resulting in better performance. Therefore, for the VMTDC
algorithm, the range of $\frac{\beta}{\alpha}$ ratio is $\{\frac{1}{512}, \frac{1}{256}, \frac{1}{128}, \frac{1}{64}, \frac{1}{32}, \frac{1}{16}, \frac{1}{8}, \frac{1}{4}, \frac{1}{2}, 1, 2\}$, and the range of
$\zeta$ is $\{0.1, 0.01, 0.001, 0.0001, 0.00001\}$. The learning curves in Figure \ref{Evaluation_full} correspond to the optimal
parameters.
The feature matrix of 7-state version of Baird's off-policy counterexample is
defined as follow:
\begin{equation*}
\Phi_{Counter}=\left[
\begin{array}{cccccccc}
1 & 2& 0& 0& 0& 0& 0& 0\\
1 & 0& 2& 0& 0& 0& 0& 0\\
1 & 0& 0& 2& 0& 0& 0& 0\\
1 & 0& 0& 0& 2& 0& 0& 0\\
1 & 0& 0& 0& 0& 2& 0& 0\\
1 & 0& 0& 0& 0& 0& 2& 0\\
2 & 0& 0& 0& 0& 0& 0& 1
\end{array}\right]
\end{equation*}
7-state version of Baird's off-policy counterexample:
for TD algorithm, $\alpha$ is set to 0.1. For the TDC algorithm, the range of
$\alpha$ is $\{0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0\}$,
and the range of
$\zeta$ is $\{0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5\}$.
For the VMTD algorithm, the range of
$\alpha$ is $\{0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0\}$,
and the range of
$\beta$ is $\{0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5\}$. Through experiments, it was found
that setting $\zeta$ to a small value makes VMTDC updates approach VMTD
updates, resulting in better performance. Therefore, for the VMTDC
algorithm, The range of values for $\alpha$ and $\beta$ is the same as that of VMTD
and the range of $\zeta$
is $\{0.1, 0.01, 0.001, 0.0001, 0.00001\}$.
The learning curves in Figure \ref{Complete_full} correspond to the optimal parameters.
For all policy evaluation experiments, each experiment
is independently run 100 times.
For the four control experiments: The learning rates for each
algorithm in all experiments are shown in Table \ref{lrofways}.
For all control experiments, each experiment is independently run 50 times.
\begin{table*}[htb]
\centering
\caption{Learning rates ($lr$) of four control experiments.}
\vskip 0.15in
\begin{tabular}{c|ccccc}
\hline
\multicolumn{1}{c|}{\diagbox{algorithms($lr$)}{envs}} &Maze &Cliff walking &Mountain Car &Acrobot \\
\hline
Sarsa($\alpha$)&$0.1$ &$0.1$ &$0.1$ &$0.1$ \\
GQ(0)($\alpha,\zeta$)&$0.1,0.003$ &$0.1,0.004$ &$0.1,0.01$ &$0.1,0.01$ \\
VMSarsa($\alpha,\beta$)&$0.1,0.001$ &$0.1,\text{1e-4}$ &$0.1,\text{1e-4}$ &$0.1,\text{1e-4}$ \\
VMGQ(0)($\alpha,\zeta,\beta$)&$0.1,0.001,0.001$ &$0.1,0.005,\text{1e-4}$ &$0.1,\text{5e-4},\text{1e-4}$ &$0.1,\text{5e-4},\text{1e-4}$ \\
AC($lr_{\text{actor}},lr_{\text{critic}}$)&$0.01,0.1$ &$0.01,0.01$ &$0.01,0.05$ &$0.01,0.05$ \\
Q-learning($\alpha$)&$0.1$ &$0.1$ &$0.1$ &$0.1$ \\
VMQ($\alpha,\beta$)&$0.1,0.001$ &$0.1,\text{1e-4}$ &$0.1,\text{1e-4}$ &$0.1,\text{1e-4}$ \\
\hline
\end{tabular}
\label{lrofways}
\vskip -0.1in
\end{table*}
\end{document}
% fancyhdr.sty version 3.2
% Fancy headers and footers for LaTeX.
% Piet van Oostrum,
% Dept of Computer and Information Sciences, University of Utrecht,
% Padualaan 14, P.O. Box 80.089, 3508 TB Utrecht, The Netherlands
% Telephone: +31 30 2532180. Email: piet@cs.uu.nl
% ========================================================================
% LICENCE:
% This file may be distributed under the terms of the LaTeX Project Public
% License, as described in lppl.txt in the base LaTeX distribution.
% Either version 1 or, at your option, any later version.
% ========================================================================
% MODIFICATION HISTORY:
% Sep 16, 1994
% version 1.4: Correction for use with \reversemargin
% Sep 29, 1994:
% version 1.5: Added the \iftopfloat, \ifbotfloat and \iffloatpage commands
% Oct 4, 1994:
% version 1.6: Reset single spacing in headers/footers for use with
% setspace.sty or doublespace.sty
% Oct 4, 1994:
% version 1.7: changed \let\@mkboth\markboth to
% \def\@mkboth{\protect\markboth} to make it more robust
% Dec 5, 1994:
% version 1.8: corrections for amsbook/amsart: define \@chapapp and (more
% importantly) use the \chapter/sectionmark definitions from ps@headings if
% they exist (which should be true for all standard classes).
% May 31, 1995:
% version 1.9: The proposed \renewcommand{\headrulewidth}{\iffloatpage...
% construction in the doc did not work properly with the fancyplain style.
% June 1, 1995:
% version 1.91: The definition of \@mkboth wasn't restored on subsequent
% \pagestyle{fancy}'s.
% June 1, 1995:
% version 1.92: The sequence \pagestyle{fancyplain} \pagestyle{plain}
% \pagestyle{fancy} would erroneously select the plain version.
% June 1, 1995:
% version 1.93: \fancypagestyle command added.
% Dec 11, 1995:
% version 1.94: suggested by Conrad Hughes <chughes@maths.tcd.ie>
% CJCH, Dec 11, 1995: added \footruleskip to allow control over footrule
% position (old hardcoded value of .3\normalbaselineskip is far too high
% when used with very small footer fonts).
% Jan 31, 1996:
% version 1.95: call \@normalsize in the reset code if that is defined,
% otherwise \normalsize.
% this is to solve a problem with ucthesis.cls, as this doesn't
% define \@currsize. Unfortunately for latex209 calling \normalsize doesn't
% work as this is optimized to do very little, so there \@normalsize should
% be called. Hopefully this code works for all versions of LaTeX known to
% mankind.
% April 25, 1996:
% version 1.96: initialize \headwidth to a magic (negative) value to catch
% most common cases that people change it before calling \pagestyle{fancy}.
% Note it can't be initialized when reading in this file, because
% \textwidth could be changed afterwards. This is quite probable.
% We also switch to \MakeUppercase rather than \uppercase and introduce a
% \nouppercase command for use in headers. and footers.
% May 3, 1996:
% version 1.97: Two changes:
% 1. Undo the change in version 1.8 (using the pagestyle{headings} defaults
% for the chapter and section marks. The current version of amsbook and
% amsart classes don't seem to need them anymore. Moreover the standard
% latex classes don't use \markboth if twoside isn't selected, and this is
% confusing as \leftmark doesn't work as expected.
% 2. include a call to \ps@empty in ps@@fancy. This is to solve a problem
% in the amsbook and amsart classes, that make global changes to \topskip,
% which are reset in \ps@empty. Hopefully this doesn't break other things.
% May 7, 1996:
% version 1.98:
% Added % after the line \def\nouppercase
% May 7, 1996:
% version 1.99: This is the alpha version of fancyhdr 2.0
% Introduced the new commands \fancyhead, \fancyfoot, and \fancyhf.
% Changed \headrulewidth, \footrulewidth, \footruleskip to
% macros rather than length parameters, In this way they can be
% conditionalized and they don't consume length registers. There is no need
% to have them as length registers unless you want to do calculations with
% them, which is unlikely. Note that this may make some uses of them
% incompatible (i.e. if you have a file that uses \setlength or \xxxx=)
% May 10, 1996:
% version 1.99a:
% Added a few more % signs
% May 10, 1996:
% version 1.99b:
% Changed the syntax of \f@nfor to be resistent to catcode changes of :=
% Removed the [1] from the defs of \lhead etc. because the parameter is
% consumed by the \@[xy]lhead etc. macros.
% June 24, 1997:
% version 1.99c:
% corrected \nouppercase to also include the protected form of \MakeUppercase
% \global added to manipulation of \headwidth.
% \iffootnote command added.
% Some comments added about \@fancyhead and \@fancyfoot.
% Aug 24, 1998
% version 1.99d
% Changed the default \ps@empty to \ps@@empty in order to allow
% \fancypagestyle{empty} redefinition.
% Oct 11, 2000
% version 2.0
% Added LPPL license clause.
%
% A check for \headheight is added. An errormessage is given (once) if the
% header is too large. Empty headers don't generate the error even if
% \headheight is very small or even 0pt.
% Warning added for the use of 'E' option when twoside option is not used.
% In this case the 'E' fields will never be used.
%
% Mar 10, 2002
% version 2.1beta
% New command: \fancyhfoffset[place]{length}
% defines offsets to be applied to the header/footer to let it stick into
% the margins (if length > 0).
% place is like in fancyhead, except that only E,O,L,R can be used.
% This replaces the old calculation based on \headwidth and the marginpar
% area.
% \headwidth will be dynamically calculated in the headers/footers when
% this is used.
%
% Mar 26, 2002
% version 2.1beta2
% \fancyhfoffset now also takes h,f as possible letters in the argument to
% allow the header and footer widths to be different.
% New commands \fancyheadoffset and \fancyfootoffset added comparable to
% \fancyhead and \fancyfoot.
% Errormessages and warnings have been made more informative.
%
% Dec 9, 2002
% version 2.1
% The defaults for \footrulewidth, \plainheadrulewidth and
% \plainfootrulewidth are changed from \z@skip to 0pt. In this way when
% someone inadvertantly uses \setlength to change any of these, the value
% of \z@skip will not be changed, rather an errormessage will be given.
% March 3, 2004
% Release of version 3.0
% Oct 7, 2004
% version 3.1
% Added '\endlinechar=13' to \fancy@reset to prevent problems with
% includegraphics in header when verbatiminput is active.
% March 22, 2005
% version 3.2
% reset \everypar (the real one) in \fancy@reset because spanish.ldf does
% strange things with \everypar between << and >>.
\def\ifancy@mpty#1{\def\temp@a{#1}\ifx\temp@a\@empty}
\def\fancy@def#1#2{\ifancy@mpty{#2}\fancy@gbl\def#1{\leavevmode}\else
\fancy@gbl\def#1{#2\strut}\fi}
\let\fancy@gbl\global
\def\@fancyerrmsg#1{%
\ifx\PackageError\undefined
\errmessage{#1}\else
\PackageError{Fancyhdr}{#1}{}\fi}
\def\@fancywarning#1{%
\ifx\PackageWarning\undefined
\errmessage{#1}\else
\PackageWarning{Fancyhdr}{#1}{}\fi}
% Usage: \@forc \var{charstring}{command to be executed for each char}
% This is similar to LaTeX's \@tfor, but expands the charstring.
\def\@forc#1#2#3{\expandafter\f@rc\expandafter#1\expandafter{#2}{#3}}
\def\f@rc#1#2#3{\def\temp@ty{#2}\ifx\@empty\temp@ty\else
\f@@rc#1#2\f@@rc{#3}\fi}
\def\f@@rc#1#2#3\f@@rc#4{\def#1{#2}#4\f@rc#1{#3}{#4}}
% Usage: \f@nfor\name:=list\do{body}
% Like LaTeX's \@for but an empty list is treated as a list with an empty
% element
\newcommand{\f@nfor}[3]{\edef\@fortmp{#2}%
\expandafter\@forloop#2,\@nil,\@nil\@@#1{#3}}
% Usage: \def@ult \cs{defaults}{argument}
% sets \cs to the characters from defaults appearing in argument
% or defaults if it would be empty. All characters are lowercased.
\newcommand\def@ult[3]{%
\edef\temp@a{\lowercase{\edef\noexpand\temp@a{#3}}}\temp@a
\def#1{}%
\@forc\tmpf@ra{#2}%
{\expandafter\if@in\tmpf@ra\temp@a{\edef#1{#1\tmpf@ra}}{}}%
\ifx\@empty#1\def#1{#2}\fi}
%
% \if@in <char><set><truecase><falsecase>
%
\newcommand{\if@in}[4]{%
\edef\temp@a{#2}\def\temp@b##1#1##2\temp@b{\def\temp@b{##1}}%
\expandafter\temp@b#2#1\temp@b\ifx\temp@a\temp@b #4\else #3\fi}
\newcommand{\fancyhead}{\@ifnextchar[{\f@ncyhf\fancyhead h}%
{\f@ncyhf\fancyhead h[]}}
\newcommand{\fancyfoot}{\@ifnextchar[{\f@ncyhf\fancyfoot f}%
{\f@ncyhf\fancyfoot f[]}}
\newcommand{\fancyhf}{\@ifnextchar[{\f@ncyhf\fancyhf{}}%
{\f@ncyhf\fancyhf{}[]}}
% New commands for offsets added
\newcommand{\fancyheadoffset}{\@ifnextchar[{\f@ncyhfoffs\fancyheadoffset h}%
{\f@ncyhfoffs\fancyheadoffset h[]}}
\newcommand{\fancyfootoffset}{\@ifnextchar[{\f@ncyhfoffs\fancyfootoffset f}%
{\f@ncyhfoffs\fancyfootoffset f[]}}
\newcommand{\fancyhfoffset}{\@ifnextchar[{\f@ncyhfoffs\fancyhfoffset{}}%
{\f@ncyhfoffs\fancyhfoffset{}[]}}
% The header and footer fields are stored in command sequences with
% names of the form: \f@ncy<x><y><z> with <x> for [eo], <y> from [lcr]
% and <z> from [hf].
\def\f@ncyhf#1#2[#3]#4{%
\def\temp@c{}%
\@forc\tmpf@ra{#3}%
{\expandafter\if@in\tmpf@ra{eolcrhf,EOLCRHF}%
{}{\edef\temp@c{\temp@c\tmpf@ra}}}%
\ifx\@empty\temp@c\else
\@fancyerrmsg{Illegal char `\temp@c' in \string#1 argument:
[#3]}%
\fi
\f@nfor\temp@c{#3}%
{\def@ult\f@@@eo{eo}\temp@c
\if@twoside\else
\if\f@@@eo e\@fancywarning
{\string#1's `E' option without twoside option is useless}\fi\fi
\def@ult\f@@@lcr{lcr}\temp@c
\def@ult\f@@@hf{hf}{#2\temp@c}%
\@forc\f@@eo\f@@@eo
{\@forc\f@@lcr\f@@@lcr
{\@forc\f@@hf\f@@@hf
{\expandafter\fancy@def\csname
f@ncy\f@@eo\f@@lcr\f@@hf\endcsname
{#4}}}}}}
\def\f@ncyhfoffs#1#2[#3]#4{%
\def\temp@c{}%
\@forc\tmpf@ra{#3}%
{\expandafter\if@in\tmpf@ra{eolrhf,EOLRHF}%
{}{\edef\temp@c{\temp@c\tmpf@ra}}}%
\ifx\@empty\temp@c\else
\@fancyerrmsg{Illegal char `\temp@c' in \string#1 argument:
[#3]}%
\fi
\f@nfor\temp@c{#3}%
{\def@ult\f@@@eo{eo}\temp@c
\if@twoside\else
\if\f@@@eo e\@fancywarning
{\string#1's `E' option without twoside option is useless}\fi\fi
\def@ult\f@@@lcr{lr}\temp@c
\def@ult\f@@@hf{hf}{#2\temp@c}%
\@forc\f@@eo\f@@@eo
{\@forc\f@@lcr\f@@@lcr
{\@forc\f@@hf\f@@@hf
{\expandafter\setlength\csname
f@ncyO@\f@@eo\f@@lcr\f@@hf\endcsname
{#4}}}}}%
\fancy@setoffs}
% Fancyheadings version 1 commands. These are more or less deprecated,
% but they continue to work.
\newcommand{\lhead}{\@ifnextchar[{\@xlhead}{\@ylhead}}
\def\@xlhead[#1]#2{\fancy@def\f@ncyelh{#1}\fancy@def\f@ncyolh{#2}}
\def\@ylhead#1{\fancy@def\f@ncyelh{#1}\fancy@def\f@ncyolh{#1}}
\newcommand{\chead}{\@ifnextchar[{\@xchead}{\@ychead}}
\def\@xchead[#1]#2{\fancy@def\f@ncyech{#1}\fancy@def\f@ncyoch{#2}}
\def\@ychead#1{\fancy@def\f@ncyech{#1}\fancy@def\f@ncyoch{#1}}
\newcommand{\rhead}{\@ifnextchar[{\@xrhead}{\@yrhead}}
\def\@xrhead[#1]#2{\fancy@def\f@ncyerh{#1}\fancy@def\f@ncyorh{#2}}
\def\@yrhead#1{\fancy@def\f@ncyerh{#1}\fancy@def\f@ncyorh{#1}}
\newcommand{\lfoot}{\@ifnextchar[{\@xlfoot}{\@ylfoot}}
\def\@xlfoot[#1]#2{\fancy@def\f@ncyelf{#1}\fancy@def\f@ncyolf{#2}}
\def\@ylfoot#1{\fancy@def\f@ncyelf{#1}\fancy@def\f@ncyolf{#1}}
\newcommand{\cfoot}{\@ifnextchar[{\@xcfoot}{\@ycfoot}}
\def\@xcfoot[#1]#2{\fancy@def\f@ncyecf{#1}\fancy@def\f@ncyocf{#2}}
\def\@ycfoot#1{\fancy@def\f@ncyecf{#1}\fancy@def\f@ncyocf{#1}}
\newcommand{\rfoot}{\@ifnextchar[{\@xrfoot}{\@yrfoot}}
\def\@xrfoot[#1]#2{\fancy@def\f@ncyerf{#1}\fancy@def\f@ncyorf{#2}}
\def\@yrfoot#1{\fancy@def\f@ncyerf{#1}\fancy@def\f@ncyorf{#1}}
\newlength{\fancy@headwidth}
\let\headwidth\fancy@headwidth
\newlength{\f@ncyO@elh}
\newlength{\f@ncyO@erh}
\newlength{\f@ncyO@olh}
\newlength{\f@ncyO@orh}
\newlength{\f@ncyO@elf}
\newlength{\f@ncyO@erf}
\newlength{\f@ncyO@olf}
\newlength{\f@ncyO@orf}
\newcommand{\headrulewidth}{0.4pt}
\newcommand{\footrulewidth}{0pt}
\newcommand{\footruleskip}{.3\normalbaselineskip}
% Fancyplain stuff shouldn't be used anymore (rather
% \fancypagestyle{plain} should be used), but it must be present for
% compatibility reasons.
\newcommand{\plainheadrulewidth}{0pt}
\newcommand{\plainfootrulewidth}{0pt}
\newif\if@fancyplain \@fancyplainfalse
\def\fancyplain#1#2{\if@fancyplain#1\else#2\fi}
\headwidth=-123456789sp %magic constant
% Command to reset various things in the headers:
% a.o. single spacing (taken from setspace.sty)
% and the catcode of ^^M (so that epsf files in the header work if a
% verbatim crosses a page boundary)
% It also defines a \nouppercase command that disables \uppercase and
% \Makeuppercase. It can only be used in the headers and footers.
\let\fnch@everypar\everypar% save real \everypar because of spanish.ldf
\def\fancy@reset{\fnch@everypar{}\restorecr\endlinechar=13
\def\baselinestretch{1}%
\def\nouppercase##1{{\let\uppercase\relax\let\MakeUppercase\relax
\expandafter\let\csname MakeUppercase \endcsname\relax##1}}%
\ifx\undefined\@newbaseline% NFSS not present; 2.09 or 2e
\ifx\@normalsize\undefined \normalsize % for ucthesis.cls
\else \@normalsize \fi
\else% NFSS (2.09) present
\@newbaseline%
\fi}
% Initialization of the head and foot text.
% The default values still contain \fancyplain for compatibility.
\fancyhf{} % clear all
% lefthead empty on ``plain'' pages, \rightmark on even, \leftmark on odd pages
% evenhead empty on ``plain'' pages, \leftmark on even, \rightmark on odd pages
\if@twoside
\fancyhead[el,or]{\fancyplain{}{\sl\rightmark}}
\fancyhead[er,ol]{\fancyplain{}{\sl\leftmark}}
\else
\fancyhead[l]{\fancyplain{}{\sl\rightmark}}
\fancyhead[r]{\fancyplain{}{\sl\leftmark}}
\fi
\fancyfoot[c]{\rm\thepage} % page number
% Use box 0 as a temp box and dimen 0 as temp dimen.
% This can be done, because this code will always
% be used inside another box, and therefore the changes are local.
\def\@fancyvbox#1#2{\setbox0\vbox{#2}\ifdim\ht0>#1\@fancywarning
{\string#1 is too small (\the#1): ^^J Make it at least \the\ht0.^^J
We now make it that large for the rest of the document.^^J
This may cause the page layout to be inconsistent, however\@gobble}%
\dimen0=#1\global\setlength{#1}{\ht0}\ht0=\dimen0\fi
\box0}
% Put together a header or footer given the left, center and
% right text, fillers at left and right and a rule.
% The \lap commands put the text into an hbox of zero size,
% so overlapping text does not generate an errormessage.
% These macros have 5 parameters:
% 1. LEFTSIDE BEARING % This determines at which side the header will stick
% out. When \fancyhfoffset is used this calculates \headwidth, otherwise
% it is \hss or \relax (after expansion).
% 2. \f@ncyolh, \f@ncyelh, \f@ncyolf or \f@ncyelf. This is the left component.
% 3. \f@ncyoch, \f@ncyech, \f@ncyocf or \f@ncyecf. This is the middle comp.
% 4. \f@ncyorh, \f@ncyerh, \f@ncyorf or \f@ncyerf. This is the right component.
% 5. RIGHTSIDE BEARING. This is always \relax or \hss (after expansion).
\def\@fancyhead#1#2#3#4#5{#1\hbox to\headwidth{\fancy@reset
\@fancyvbox\headheight{\hbox
{\rlap{\parbox[b]{\headwidth}{\raggedright#2}}\hfill
\parbox[b]{\headwidth}{\centering#3}\hfill
\llap{\parbox[b]{\headwidth}{\raggedleft#4}}}\headrule}}#5}
\def\@fancyfoot#1#2#3#4#5{#1\hbox to\headwidth{\fancy@reset
\@fancyvbox\footskip{\footrule
\hbox{\rlap{\parbox[t]{\headwidth}{\raggedright#2}}\hfill
\parbox[t]{\headwidth}{\centering#3}\hfill
\llap{\parbox[t]{\headwidth}{\raggedleft#4}}}}}#5}
\def\headrule{{\if@fancyplain\let\headrulewidth\plainheadrulewidth\fi
\hrule\@height\headrulewidth\@width\headwidth \vskip-\headrulewidth}}
\def\footrule{{\if@fancyplain\let\footrulewidth\plainfootrulewidth\fi
\vskip-\footruleskip\vskip-\footrulewidth
\hrule\@width\headwidth\@height\footrulewidth\vskip\footruleskip}}
\def\ps@fancy{%
\@ifundefined{@chapapp}{\let\@chapapp\chaptername}{}%for amsbook
%
% Define \MakeUppercase for old LaTeXen.
% Note: we used \def rather than \let, so that \let\uppercase\relax (from
% the version 1 documentation) will still work.
%
\@ifundefined{MakeUppercase}{\def\MakeUppercase{\uppercase}}{}%
\@ifundefined{chapter}{\def\sectionmark##1{\markboth
{\MakeUppercase{\ifnum \c@secnumdepth>\z@
\thesection\hskip 1em\relax \fi ##1}}{}}%
\def\subsectionmark##1{\markright {\ifnum \c@secnumdepth >\@ne
\thesubsection\hskip 1em\relax \fi ##1}}}%
{\def\chaptermark##1{\markboth {\MakeUppercase{\ifnum \c@secnumdepth>\m@ne
\@chapapp\ \thechapter. \ \fi ##1}}{}}%
\def\sectionmark##1{\markright{\MakeUppercase{\ifnum \c@secnumdepth >\z@
\thesection. \ \fi ##1}}}}%
%\csname ps@headings\endcsname % use \ps@headings defaults if they exist
\ps@@fancy
\gdef\ps@fancy{\@fancyplainfalse\ps@@fancy}%
% Initialize \headwidth if the user didn't
%
\ifdim\headwidth<0sp
%
% This catches the case that \headwidth hasn't been initialized and the
% case that the user added something to \headwidth in the expectation that
% it was initialized to \textwidth. We compensate this now. This loses if
% the user intended to multiply it by a factor. But that case is more
% likely done by saying something like \headwidth=1.2\textwidth.
% The doc says you have to change \headwidth after the first call to
% \pagestyle{fancy}. This code is just to catch the most common cases were
% that requirement is violated.
%
\global\advance\headwidth123456789sp\global\advance\headwidth\textwidth
\fi}
\def\ps@fancyplain{\ps@fancy \let\ps@plain\ps@plain@fancy}
\def\ps@plain@fancy{\@fancyplaintrue\ps@@fancy}
\let\ps@@empty\ps@empty
\def\ps@@fancy{%
\ps@@empty % This is for amsbook/amsart, which do strange things with \topskip
\def\@mkboth{\protect\markboth}%
\def\@oddhead{\@fancyhead\fancy@Oolh\f@ncyolh\f@ncyoch\f@ncyorh\fancy@Oorh}%
\def\@oddfoot{\@fancyfoot\fancy@Oolf\f@ncyolf\f@ncyocf\f@ncyorf\fancy@Oorf}%
\def\@evenhead{\@fancyhead\fancy@Oelh\f@ncyelh\f@ncyech\f@ncyerh\fancy@Oerh}%
\def\@evenfoot{\@fancyfoot\fancy@Oelf\f@ncyelf\f@ncyecf\f@ncyerf\fancy@Oerf}%
}
% Default definitions for compatibility mode:
% These cause the header/footer to take the defined \headwidth as width
% And to shift in the direction of the marginpar area
\def\fancy@Oolh{\if@reversemargin\hss\else\relax\fi}
\def\fancy@Oorh{\if@reversemargin\relax\else\hss\fi}
\let\fancy@Oelh\fancy@Oorh
\let\fancy@Oerh\fancy@Oolh
\let\fancy@Oolf\fancy@Oolh
\let\fancy@Oorf\fancy@Oorh
\let\fancy@Oelf\fancy@Oelh
\let\fancy@Oerf\fancy@Oerh
% New definitions for the use of \fancyhfoffset
% These calculate the \headwidth from \textwidth and the specified offsets.
\def\fancy@offsolh{\headwidth=\textwidth\advance\headwidth\f@ncyO@olh
\advance\headwidth\f@ncyO@orh\hskip-\f@ncyO@olh}
\def\fancy@offselh{\headwidth=\textwidth\advance\headwidth\f@ncyO@elh
\advance\headwidth\f@ncyO@erh\hskip-\f@ncyO@elh}
\def\fancy@offsolf{\headwidth=\textwidth\advance\headwidth\f@ncyO@olf
\advance\headwidth\f@ncyO@orf\hskip-\f@ncyO@olf}
\def\fancy@offself{\headwidth=\textwidth\advance\headwidth\f@ncyO@elf
\advance\headwidth\f@ncyO@erf\hskip-\f@ncyO@elf}
\def\fancy@setoffs{%
% Just in case \let\headwidth\textwidth was used
\fancy@gbl\let\headwidth\fancy@headwidth
\fancy@gbl\let\fancy@Oolh\fancy@offsolh
\fancy@gbl\let\fancy@Oelh\fancy@offselh
\fancy@gbl\let\fancy@Oorh\hss
\fancy@gbl\let\fancy@Oerh\hss
\fancy@gbl\let\fancy@Oolf\fancy@offsolf
\fancy@gbl\let\fancy@Oelf\fancy@offself
\fancy@gbl\let\fancy@Oorf\hss
\fancy@gbl\let\fancy@Oerf\hss}
\newif\iffootnote
\let\latex@makecol\@makecol
\def\@makecol{\ifvoid\footins\footnotetrue\else\footnotefalse\fi
\let\topfloat\@toplist\let\botfloat\@botlist\latex@makecol}
\def\iftopfloat#1#2{\ifx\topfloat\empty #2\else #1\fi}
\def\ifbotfloat#1#2{\ifx\botfloat\empty #2\else #1\fi}
\def\iffloatpage#1#2{\if@fcolmade #1\else #2\fi}
\newcommand{\fancypagestyle}[2]{%
\@namedef{ps@#1}{\let\fancy@gbl\relax#2\relax\ps@fancy}}
%% File: `icml2024.bst'
%% A modification of `plainnl.bst' for use with natbib package
%%
%% Copyright 2010 Hal Daum\'e III
%% Modified by J. Fürnkranz
%% - Changed labels from (X and Y, 2000) to (X & Y, 2000)
%% - Changed References to last name first and abbreviated first names.
%% Modified by Iain Murray 2018 (who suggests adopting a standard .bst in future...)
%% - Made it actually use abbreviated first names
%%
%% Copyright 1993-2007 Patrick W Daly
%% Max-Planck-Institut f\"ur Sonnensystemforschung
%% Max-Planck-Str. 2
%% D-37191 Katlenburg-Lindau
%% Germany
%% E-mail: daly@mps.mpg.de
%%
%% This program can be redistributed and/or modified under the terms
%% of the LaTeX Project Public License Distributed from CTAN
%% archives in directory macros/latex/base/lppl.txt; either
%% version 1 of the License, or any later version.
%%
% Version and source file information:
% \ProvidesFile{icml2010.mbs}[2007/11/26 1.93 (PWD)]
%
% BibTeX `plainnat' family
% version 0.99b for BibTeX versions 0.99a or later,
% for LaTeX versions 2.09 and 2e.
%
% For use with the `natbib.sty' package; emulates the corresponding
% member of the `plain' family, but with author-year citations.
%
% With version 6.0 of `natbib.sty', it may also be used for numerical
% citations, while retaining the commands \citeauthor, \citefullauthor,
% and \citeyear to print the corresponding information.
%
% For version 7.0 of `natbib.sty', the KEY field replaces missing
% authors/editors, and the date is left blank in \bibitem.
%
% Includes field EID for the sequence/citation number of electronic journals
% which is used instead of page numbers.
%
% Includes fields ISBN and ISSN.
%
% Includes field URL for Internet addresses.
%
% Includes field DOI for Digital Object Idenfifiers.
%
% Works best with the url.sty package of Donald Arseneau.
%
% Works with identical authors and year are further sorted by
% citation key, to preserve any natural sequence.
%
ENTRY
{ address
author
booktitle
chapter
doi
eid
edition
editor
howpublished
institution
isbn
issn
journal
key
month
note
number
organization
pages
publisher
school
series
title
type
url
volume
year
}
{}
{ label extra.label sort.label short.list }
INTEGERS { output.state before.all mid.sentence after.sentence after.block }
FUNCTION {init.state.consts}
{ #0 'before.all :=
#1 'mid.sentence :=
#2 'after.sentence :=
#3 'after.block :=
}
STRINGS { s t }
FUNCTION {output.nonnull}
{ 's :=
output.state mid.sentence =
{ ", " * write$ }
{ output.state after.block =
{ add.period$ write$
newline$
"\newblock " write$
}
{ output.state before.all =
'write$
{ add.period$ " " * write$ }
if$
}
if$
mid.sentence 'output.state :=
}
if$
s
}
FUNCTION {output}
{ duplicate$ empty$
'pop$
'output.nonnull
if$
}
FUNCTION {output.check}
{ 't :=
duplicate$ empty$
{ pop$ "empty " t * " in " * cite$ * warning$ }
'output.nonnull
if$
}
FUNCTION {fin.entry}
{ add.period$
write$
newline$
}
FUNCTION {new.block}
{ output.state before.all =
'skip$
{ after.block 'output.state := }
if$
}
FUNCTION {new.sentence}
{ output.state after.block =
'skip$
{ output.state before.all =
'skip$
{ after.sentence 'output.state := }
if$
}
if$
}
FUNCTION {not}
{ { #0 }
{ #1 }
if$
}
FUNCTION {and}
{ 'skip$
{ pop$ #0 }
if$
}
FUNCTION {or}
{ { pop$ #1 }
'skip$
if$
}
FUNCTION {new.block.checka}
{ empty$
'skip$
'new.block
if$
}
FUNCTION {new.block.checkb}
{ empty$
swap$ empty$
and
'skip$
'new.block
if$
}
FUNCTION {new.sentence.checka}
{ empty$
'skip$
'new.sentence
if$
}
FUNCTION {new.sentence.checkb}
{ empty$
swap$ empty$
and
'skip$
'new.sentence
if$
}
FUNCTION {field.or.null}
{ duplicate$ empty$
{ pop$ "" }
'skip$
if$
}
FUNCTION {emphasize}
{ duplicate$ empty$
{ pop$ "" }
{ "\emph{" swap$ * "}" * }
if$
}
INTEGERS { nameptr namesleft numnames }
FUNCTION {format.names}
{ 's :=
#1 'nameptr :=
s num.names$ 'numnames :=
numnames 'namesleft :=
{ namesleft #0 > }
{ s nameptr "{vv~}{ll}{, jj}{, f.}" format.name$ 't :=
nameptr #1 >
{ namesleft #1 >
{ ", " * t * }
{ numnames #2 >
{ "," * }
'skip$
if$
t "others" =
{ " et~al." * }
{ " and " * t * }
if$
}
if$
}
't
if$
nameptr #1 + 'nameptr :=
namesleft #1 - 'namesleft :=
}
while$
}
FUNCTION {format.key}
{ empty$
{ key field.or.null }
{ "" }
if$
}
FUNCTION {format.authors}
{ author empty$
{ "" }
{ author format.names }
if$
}
FUNCTION {format.editors}
{ editor empty$
{ "" }
{ editor format.names
editor num.names$ #1 >
{ " (eds.)" * }
{ " (ed.)" * }
if$
}
if$
}
FUNCTION {format.isbn}
{ isbn empty$
{ "" }
{ new.block "ISBN " isbn * }
if$
}
FUNCTION {format.issn}
{ issn empty$
{ "" }
{ new.block "ISSN " issn * }
if$
}
FUNCTION {format.url}
{ url empty$
{ "" }
{ new.block "URL \url{" url * "}" * }
if$
}
FUNCTION {format.doi}
{ doi empty$
{ "" }
{ new.block "\doi{" doi * "}" * }
if$
}
FUNCTION {format.title}
{ title empty$
{ "" }
{ title "t" change.case$ }
if$
}
FUNCTION {format.full.names}
{'s :=
#1 'nameptr :=
s num.names$ 'numnames :=
numnames 'namesleft :=
{ namesleft #0 > }
{ s nameptr
"{vv~}{ll}" format.name$ 't :=
nameptr #1 >
{
namesleft #1 >
{ ", " * t * }
{
numnames #2 >
{ "," * }
'skip$
if$
t "others" =
{ " et~al." * }
{ " and " * t * }
if$
}
if$
}
't
if$
nameptr #1 + 'nameptr :=
namesleft #1 - 'namesleft :=
}
while$
}
FUNCTION {author.editor.full}
{ author empty$
{ editor empty$
{ "" }
{ editor format.full.names }
if$
}
{ author format.full.names }
if$
}
FUNCTION {author.full}
{ author empty$
{ "" }
{ author format.full.names }
if$
}
FUNCTION {editor.full}
{ editor empty$
{ "" }
{ editor format.full.names }
if$
}
FUNCTION {make.full.names}
{ type$ "book" =
type$ "inbook" =
or
'author.editor.full
{ type$ "proceedings" =
'editor.full
'author.full
if$
}
if$
}
FUNCTION {output.bibitem}
{ newline$
"\bibitem[" write$
label write$
")" make.full.names duplicate$ short.list =
{ pop$ }
{ * }
if$
"]{" * write$
cite$ write$
"}" write$
newline$
""
before.all 'output.state :=
}
FUNCTION {n.dashify}
{ 't :=
""
{ t empty$ not }
{ t #1 #1 substring$ "-" =
{ t #1 #2 substring$ "--" = not
{ "--" *
t #2 global.max$ substring$ 't :=
}
{ { t #1 #1 substring$ "-" = }
{ "-" *
t #2 global.max$ substring$ 't :=
}
while$
}
if$
}
{ t #1 #1 substring$ *
t #2 global.max$ substring$ 't :=
}
if$
}
while$
}
FUNCTION {format.date}
{ year duplicate$ empty$
{ "empty year in " cite$ * warning$
pop$ "" }
'skip$
if$
month empty$
'skip$
{ month
" " * swap$ *
}
if$
extra.label *
}
FUNCTION {format.btitle}
{ title emphasize
}
FUNCTION {tie.or.space.connect}
{ duplicate$ text.length$ #3 <
{ "~" }
{ " " }
if$
swap$ * *
}
FUNCTION {either.or.check}
{ empty$
'pop$
{ "can't use both " swap$ * " fields in " * cite$ * warning$ }
if$
}
FUNCTION {format.bvolume}
{ volume empty$
{ "" }
{ "volume" volume tie.or.space.connect
series empty$
'skip$
{ " of " * series emphasize * }
if$
"volume and number" number either.or.check
}
if$
}
FUNCTION {format.number.series}
{ volume empty$
{ number empty$
{ series field.or.null }
{ output.state mid.sentence =
{ "number" }
{ "Number" }
if$
number tie.or.space.connect
series empty$
{ "there's a number but no series in " cite$ * warning$ }
{ " in " * series * }
if$
}
if$
}
{ "" }
if$
}
FUNCTION {format.edition}
{ edition empty$
{ "" }
{ output.state mid.sentence =
{ edition "l" change.case$ " edition" * }
{ edition "t" change.case$ " edition" * }
if$
}
if$
}
INTEGERS { multiresult }
FUNCTION {multi.page.check}
{ 't :=
#0 'multiresult :=
{ multiresult not
t empty$ not
and
}
{ t #1 #1 substring$
duplicate$ "-" =
swap$ duplicate$ "," =
swap$ "+" =
or or
{ #1 'multiresult := }
{ t #2 global.max$ substring$ 't := }
if$
}
while$
multiresult
}
FUNCTION {format.pages}
{ pages empty$
{ "" }
{ pages multi.page.check
{ "pp.\ " pages n.dashify tie.or.space.connect }
{ "pp.\ " pages tie.or.space.connect }
if$
}
if$
}
FUNCTION {format.eid}
{ eid empty$
{ "" }
{ "art." eid tie.or.space.connect }
if$
}
FUNCTION {format.vol.num.pages}
{ volume field.or.null
number empty$
'skip$
{ "\penalty0 (" number * ")" * *
volume empty$
{ "there's a number but no volume in " cite$ * warning$ }
'skip$
if$
}
if$
pages empty$
'skip$
{ duplicate$ empty$
{ pop$ format.pages }
{ ":\penalty0 " * pages n.dashify * }
if$
}
if$
}
FUNCTION {format.vol.num.eid}
{ volume field.or.null
number empty$
'skip$
{ "\penalty0 (" number * ")" * *
volume empty$
{ "there's a number but no volume in " cite$ * warning$ }
'skip$
if$
}
if$
eid empty$
'skip$
{ duplicate$ empty$
{ pop$ format.eid }
{ ":\penalty0 " * eid * }
if$
}
if$
}
FUNCTION {format.chapter.pages}
{ chapter empty$
'format.pages
{ type empty$
{ "chapter" }
{ type "l" change.case$ }
if$
chapter tie.or.space.connect
pages empty$
'skip$
{ ", " * format.pages * }
if$
}
if$
}
FUNCTION {format.in.ed.booktitle}
{ booktitle empty$
{ "" }
{ editor empty$
{ "In " booktitle emphasize * }
{ "In " format.editors * ", " * booktitle emphasize * }
if$
}
if$
}
FUNCTION {empty.misc.check}
{ author empty$ title empty$ howpublished empty$
month empty$ year empty$ note empty$
and and and and and
key empty$ not and
{ "all relevant fields are empty in " cite$ * warning$ }
'skip$
if$
}
FUNCTION {format.thesis.type}
{ type empty$
'skip$
{ pop$
type "t" change.case$
}
if$
}
FUNCTION {format.tr.number}
{ type empty$
{ "Technical Report" }
'type
if$
number empty$
{ "t" change.case$ }
{ number tie.or.space.connect }
if$
}
FUNCTION {format.article.crossref}
{ key empty$
{ journal empty$
{ "need key or journal for " cite$ * " to crossref " * crossref *
warning$
""
}
{ "In \emph{" journal * "}" * }
if$
}
{ "In " }
if$
" \citet{" * crossref * "}" *
}
FUNCTION {format.book.crossref}
{ volume empty$
{ "empty volume in " cite$ * "'s crossref of " * crossref * warning$
"In "
}
{ "Volume" volume tie.or.space.connect
" of " *
}
if$
editor empty$
editor field.or.null author field.or.null =
or
{ key empty$
{ series empty$
{ "need editor, key, or series for " cite$ * " to crossref " *
crossref * warning$
"" *
}
{ "\emph{" * series * "}" * }
if$
}
'skip$
if$
}
'skip$
if$
" \citet{" * crossref * "}" *
}
FUNCTION {format.incoll.inproc.crossref}
{ editor empty$
editor field.or.null author field.or.null =
or
{ key empty$
{ booktitle empty$
{ "need editor, key, or booktitle for " cite$ * " to crossref " *
crossref * warning$
""
}
{ "In \emph{" booktitle * "}" * }
if$
}
{ "In " }
if$
}
{ "In " }
if$
" \citet{" * crossref * "}" *
}
FUNCTION {article}
{ output.bibitem
format.authors "author" output.check
author format.key output
new.block
format.title "title" output.check
new.block
crossref missing$
{ journal emphasize "journal" output.check
eid empty$
{ format.vol.num.pages output }
{ format.vol.num.eid output }
if$
format.date "year" output.check
}
{ format.article.crossref output.nonnull
eid empty$
{ format.pages output }
{ format.eid output }
if$
}
if$
format.issn output
format.doi output
format.url output
new.block
note output
fin.entry
}
FUNCTION {book}
{ output.bibitem
author empty$
{ format.editors "author and editor" output.check
editor format.key output
}
{ format.authors output.nonnull
crossref missing$
{ "author and editor" editor either.or.check }
'skip$
if$
}
if$
new.block
format.btitle "title" output.check
crossref missing$
{ format.bvolume output
new.block
format.number.series output
new.sentence
publisher "publisher" output.check
address output
}
{ new.block
format.book.crossref output.nonnull
}
if$
format.edition output
format.date "year" output.check
format.isbn output
format.doi output
format.url output
new.block
note output
fin.entry
}
FUNCTION {booklet}
{ output.bibitem
format.authors output
author format.key output
new.block
format.title "title" output.check
howpublished address new.block.checkb
howpublished output
address output
format.date output
format.isbn output
format.doi output
format.url output
new.block
note output
fin.entry
}
FUNCTION {inbook}
{ output.bibitem
author empty$
{ format.editors "author and editor" output.check
editor format.key output
}
{ format.authors output.nonnull
crossref missing$
{ "author and editor" editor either.or.check }
'skip$
if$
}
if$
new.block
format.btitle "title" output.check
crossref missing$
{ format.bvolume output
format.chapter.pages "chapter and pages" output.check
new.block
format.number.series output
new.sentence
publisher "publisher" output.check
address output
}
{ format.chapter.pages "chapter and pages" output.check
new.block
format.book.crossref output.nonnull
}
if$
format.edition output
format.date "year" output.check
format.isbn output
format.doi output
format.url output
new.block
note output
fin.entry
}
FUNCTION {incollection}
{ output.bibitem
format.authors "author" output.check
author format.key output
new.block
format.title "title" output.check
new.block
crossref missing$
{ format.in.ed.booktitle "booktitle" output.check
format.bvolume output
format.number.series output
format.chapter.pages output
new.sentence
publisher "publisher" output.check
address output
format.edition output
format.date "year" output.check
}
{ format.incoll.inproc.crossref output.nonnull
format.chapter.pages output
}
if$
format.isbn output
format.doi output
format.url output
new.block
note output
fin.entry
}
FUNCTION {inproceedings}
{ output.bibitem
format.authors "author" output.check
author format.key output
new.block
format.title "title" output.check
new.block
crossref missing$
{ format.in.ed.booktitle "booktitle" output.check
format.bvolume output
format.number.series output
format.pages output
address empty$
{ organization publisher new.sentence.checkb
organization output
publisher output
format.date "year" output.check
}
{ address output.nonnull
format.date "year" output.check
new.sentence
organization output
publisher output
}
if$
}
{ format.incoll.inproc.crossref output.nonnull
format.pages output
}
if$
format.isbn output
format.doi output
format.url output
new.block
note output
fin.entry
}
FUNCTION {conference} { inproceedings }
FUNCTION {manual}
{ output.bibitem
format.authors output
author format.key output
new.block
format.btitle "title" output.check
organization address new.block.checkb
organization output
address output
format.edition output
format.date output
format.url output
new.block
note output
fin.entry
}
FUNCTION {mastersthesis}
{ output.bibitem
format.authors "author" output.check
author format.key output
new.block
format.title "title" output.check
new.block
"Master's thesis" format.thesis.type output.nonnull
school "school" output.check
address output
format.date "year" output.check
format.url output
new.block
note output
fin.entry
}
FUNCTION {misc}
{ output.bibitem
format.authors output
author format.key output
title howpublished new.block.checkb
format.title output
howpublished new.block.checka
howpublished output
format.date output
format.issn output
format.url output
new.block
note output
fin.entry
empty.misc.check
}
FUNCTION {phdthesis}
{ output.bibitem
format.authors "author" output.check
author format.key output
new.block
format.btitle "title" output.check
new.block
"PhD thesis" format.thesis.type output.nonnull
school "school" output.check
address output
format.date "year" output.check
format.url output
new.block
note output
fin.entry
}
FUNCTION {proceedings}
{ output.bibitem
format.editors output
editor format.key output
new.block
format.btitle "title" output.check
format.bvolume output
format.number.series output
address output
format.date "year" output.check
new.sentence
organization output
publisher output
format.isbn output
format.doi output
format.url output
new.block
note output
fin.entry
}
FUNCTION {techreport}
{ output.bibitem
format.authors "author" output.check
author format.key output
new.block
format.title "title" output.check
new.block
format.tr.number output.nonnull
institution "institution" output.check
address output
format.date "year" output.check
format.url output
new.block
note output
fin.entry
}
FUNCTION {unpublished}
{ output.bibitem
format.authors "author" output.check
author format.key output
new.block
format.title "title" output.check
new.block
note "note" output.check
format.date output
format.url output
fin.entry
}
FUNCTION {default.type} { misc }
MACRO {jan} {"January"}
MACRO {feb} {"February"}
MACRO {mar} {"March"}
MACRO {apr} {"April"}
MACRO {may} {"May"}
MACRO {jun} {"June"}
MACRO {jul} {"July"}
MACRO {aug} {"August"}
MACRO {sep} {"September"}
MACRO {oct} {"October"}
MACRO {nov} {"November"}
MACRO {dec} {"December"}
MACRO {acmcs} {"ACM Computing Surveys"}
MACRO {acta} {"Acta Informatica"}
MACRO {cacm} {"Communications of the ACM"}
MACRO {ibmjrd} {"IBM Journal of Research and Development"}
MACRO {ibmsj} {"IBM Systems Journal"}
MACRO {ieeese} {"IEEE Transactions on Software Engineering"}
MACRO {ieeetc} {"IEEE Transactions on Computers"}
MACRO {ieeetcad}
{"IEEE Transactions on Computer-Aided Design of Integrated Circuits"}
MACRO {ipl} {"Information Processing Letters"}
MACRO {jacm} {"Journal of the ACM"}
MACRO {jcss} {"Journal of Computer and System Sciences"}
MACRO {scp} {"Science of Computer Programming"}
MACRO {sicomp} {"SIAM Journal on Computing"}
MACRO {tocs} {"ACM Transactions on Computer Systems"}
MACRO {tods} {"ACM Transactions on Database Systems"}
MACRO {tog} {"ACM Transactions on Graphics"}
MACRO {toms} {"ACM Transactions on Mathematical Software"}
MACRO {toois} {"ACM Transactions on Office Information Systems"}
MACRO {toplas} {"ACM Transactions on Programming Languages and Systems"}
MACRO {tcs} {"Theoretical Computer Science"}
READ
FUNCTION {sortify}
{ purify$
"l" change.case$
}
INTEGERS { len }
FUNCTION {chop.word}
{ 's :=
'len :=
s #1 len substring$ =
{ s len #1 + global.max$ substring$ }
's
if$
}
FUNCTION {format.lab.names}
{ 's :=
s #1 "{vv~}{ll}" format.name$
s num.names$ duplicate$
#2 >
{ pop$ " et~al." * }
{ #2 <
'skip$
{ s #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" =
{ " et~al." * }
{ " \& " * s #2 "{vv~}{ll}" format.name$ * }
if$
}
if$
}
if$
}
FUNCTION {author.key.label}
{ author empty$
{ key empty$
{ cite$ #1 #3 substring$ }
'key
if$
}
{ author format.lab.names }
if$
}
FUNCTION {author.editor.key.label}
{ author empty$
{ editor empty$
{ key empty$
{ cite$ #1 #3 substring$ }
'key
if$
}
{ editor format.lab.names }
if$
}
{ author format.lab.names }
if$
}
FUNCTION {author.key.organization.label}
{ author empty$
{ key empty$
{ organization empty$
{ cite$ #1 #3 substring$ }
{ "The " #4 organization chop.word #3 text.prefix$ }
if$
}
'key
if$
}
{ author format.lab.names }
if$
}
FUNCTION {editor.key.organization.label}
{ editor empty$
{ key empty$
{ organization empty$
{ cite$ #1 #3 substring$ }
{ "The " #4 organization chop.word #3 text.prefix$ }
if$
}
'key
if$
}
{ editor format.lab.names }
if$
}
FUNCTION {calc.short.authors}
{ type$ "book" =
type$ "inbook" =
or
'author.editor.key.label
{ type$ "proceedings" =
'editor.key.organization.label
{ type$ "manual" =
'author.key.organization.label
'author.key.label
if$
}
if$
}
if$
'short.list :=
}
FUNCTION {calc.label}
{ calc.short.authors
short.list
"("
*
year duplicate$ empty$
short.list key field.or.null = or
{ pop$ "" }
'skip$
if$
*
'label :=
}
FUNCTION {sort.format.names}
{ 's :=
#1 'nameptr :=
""
s num.names$ 'numnames :=
numnames 'namesleft :=
{ namesleft #0 > }
{
s nameptr "{vv{ } }{ll{ }}{ f{ }}{ jj{ }}" format.name$ 't :=
nameptr #1 >
{
" " *
namesleft #1 = t "others" = and
{ "zzzzz" * }
{ numnames #2 > nameptr #2 = and
{ "zz" * year field.or.null * " " * }
'skip$
if$
t sortify *
}
if$
}
{ t sortify * }
if$
nameptr #1 + 'nameptr :=
namesleft #1 - 'namesleft :=
}
while$
}
FUNCTION {sort.format.title}
{ 't :=
"A " #2
"An " #3
"The " #4 t chop.word
chop.word
chop.word
sortify
#1 global.max$ substring$
}
FUNCTION {author.sort}
{ author empty$
{ key empty$
{ "to sort, need author or key in " cite$ * warning$
""
}
{ key sortify }
if$
}
{ author sort.format.names }
if$
}
FUNCTION {author.editor.sort}
{ author empty$
{ editor empty$
{ key empty$
{ "to sort, need author, editor, or key in " cite$ * warning$
""
}
{ key sortify }
if$
}
{ editor sort.format.names }
if$
}
{ author sort.format.names }
if$
}
FUNCTION {author.organization.sort}
{ author empty$
{ organization empty$
{ key empty$
{ "to sort, need author, organization, or key in " cite$ * warning$
""
}
{ key sortify }
if$
}
{ "The " #4 organization chop.word sortify }
if$
}
{ author sort.format.names }
if$
}
FUNCTION {editor.organization.sort}
{ editor empty$
{ organization empty$
{ key empty$
{ "to sort, need editor, organization, or key in " cite$ * warning$
""
}
{ key sortify }
if$
}
{ "The " #4 organization chop.word sortify }
if$
}
{ editor sort.format.names }
if$
}
FUNCTION {presort}
{ calc.label
label sortify
" "
*
type$ "book" =
type$ "inbook" =
or
'author.editor.sort
{ type$ "proceedings" =
'editor.organization.sort
{ type$ "manual" =
'author.organization.sort
'author.sort
if$
}
if$
}
if$
" "
*
year field.or.null sortify
*
" "
*
cite$
*
#1 entry.max$ substring$
'sort.label :=
sort.label *
#1 entry.max$ substring$
'sort.key$ :=
}
ITERATE {presort}
SORT
STRINGS { longest.label last.label next.extra }
INTEGERS { longest.label.width last.extra.num number.label }
FUNCTION {initialize.longest.label}
{ "" 'longest.label :=
#0 int.to.chr$ 'last.label :=
"" 'next.extra :=
#0 'longest.label.width :=
#0 'last.extra.num :=
#0 'number.label :=
}
FUNCTION {forward.pass}
{ last.label label =
{ last.extra.num #1 + 'last.extra.num :=
last.extra.num int.to.chr$ 'extra.label :=
}
{ "a" chr.to.int$ 'last.extra.num :=
"" 'extra.label :=
label 'last.label :=
}
if$
number.label #1 + 'number.label :=
}
FUNCTION {reverse.pass}
{ next.extra "b" =
{ "a" 'extra.label := }
'skip$
if$
extra.label 'next.extra :=
extra.label
duplicate$ empty$
'skip$
{ "{\natexlab{" swap$ * "}}" * }
if$
'extra.label :=
label extra.label * 'label :=
}
EXECUTE {initialize.longest.label}
ITERATE {forward.pass}
REVERSE {reverse.pass}
FUNCTION {bib.sort.order}
{ sort.label 'sort.key$ :=
}
ITERATE {bib.sort.order}
SORT
FUNCTION {begin.bib}
{ preamble$ empty$
'skip$
{ preamble$ write$ newline$ }
if$
"\begin{thebibliography}{" number.label int.to.str$ * "}" *
write$ newline$
"\providecommand{\natexlab}[1]{#1}"
write$ newline$
"\providecommand{\url}[1]{\texttt{#1}}"
write$ newline$
"\expandafter\ifx\csname urlstyle\endcsname\relax"
write$ newline$
" \providecommand{\doi}[1]{doi: #1}\else"
write$ newline$
" \providecommand{\doi}{doi: \begingroup \urlstyle{rm}\Url}\fi"
write$ newline$
}
EXECUTE {begin.bib}
EXECUTE {init.state.consts}
ITERATE {call.type$}
FUNCTION {end.bib}
{ newline$
"\end{thebibliography}" write$ newline$
}
EXECUTE {end.bib}
% File: icml2024.sty (LaTeX style file for ICML-2024, version of 2023-11-23)
% This file contains the LaTeX formatting parameters for a two-column
% conference proceedings that is 8.5 inches wide by 11 inches high.
%
% Modified by Jonathan Scarlett 2024: changed years, volume, location
%
% Modified by Sivan Sabato 2023: changed years and volume number.
% Modified by Jonathan Scarlett 2023: added page numbers to every page
%
% Modified by Csaba Szepesvari 2022: changed years, PMLR ref. Turned off checking marginparwidth
% as marginparwidth only controls the space available for margin notes and margin notes
% will NEVER be used anyways in submitted versions, so there is no reason one should
% check whether marginparwidth has been tampered with.
% Also removed pdfview=FitH from hypersetup as it did not do its job; the default choice is a bit better
% but of course the double-column format is not supported by this hyperlink preview functionality
% in a completely satisfactory fashion.
% Modified by Gang Niu 2022: Changed color to xcolor
%
% Modified by Iain Murray 2018: changed years, location. Remove affiliation notes when anonymous.
% Move times dependency from .tex to .sty so fewer people delete it.
%
% Modified by Daniel Roy 2017: changed byline to use footnotes for affiliations, and removed emails
%
% Modified by Percy Liang 12/2/2013: changed the year, location from the previous template for ICML 2014
% Modified by Fei Sha 9/2/2013: changed the year, location form the previous template for ICML 2013
%
% Modified by Fei Sha 4/24/2013: (1) remove the extra whitespace after the first author's email address (in %the camera-ready version) (2) change the Proceeding ... of ICML 2010 to 2014 so PDF's metadata will show up % correctly
%
% Modified by Sanjoy Dasgupta, 2013: changed years, location
%
% Modified by Francesco Figari, 2012: changed years, location
%
% Modified by Christoph Sawade and Tobias Scheffer, 2011: added line
% numbers, changed years
%
% Modified by Hal Daume III, 2010: changed years, added hyperlinks
%
% Modified by Kiri Wagstaff, 2009: changed years
%
% Modified by Sam Roweis, 2008: changed years
%
% Modified by Ricardo Silva, 2007: update of the ifpdf verification
%
% Modified by Prasad Tadepalli and Andrew Moore, merely changing years.
%
% Modified by Kristian Kersting, 2005, based on Jennifer Dy's 2004 version
% - running title. If the original title is to long or is breaking a line,
% use \icmltitlerunning{...} in the preamble to supply a shorter form.
% Added fancyhdr package to get a running head.
% - Updated to store the page size because pdflatex does compile the
% page size into the pdf.
%
% Hacked by Terran Lane, 2003:
% - Updated to use LaTeX2e style file conventions (ProvidesPackage,
% etc.)
% - Added an ``appearing in'' block at the base of the first column
% (thus keeping the ``appearing in'' note out of the bottom margin
% where the printer should strip in the page numbers).
% - Added a package option [accepted] that selects between the ``Under
% review'' notice (default, when no option is specified) and the
% ``Appearing in'' notice (for use when the paper has been accepted
% and will appear).
%
% Originally created as: ml2k.sty (LaTeX style file for ICML-2000)
% by P. Langley (12/23/99)
%%%%%%%%%%%%%%%%%%%%
%% This version of the style file supports both a ``review'' version
%% and a ``final/accepted'' version. The difference is only in the
%% text that appears in the note at the bottom of the first column of
%% the first page. The default behavior is to print a note to the
%% effect that the paper is under review and don't distribute it. The
%% final/accepted version prints an ``Appearing in'' note. To get the
%% latter behavior, in the calling file change the ``usepackage'' line
%% from:
%% \usepackage{icml2024}
%% to
%% \usepackage[accepted]{icml2024}
%%%%%%%%%%%%%%%%%%%%
\NeedsTeXFormat{LaTeX2e}
\ProvidesPackage{icml2024}[2023/11/23 v2.0 ICML Conference Style File]
% Before 2018, \usepackage{times} was in the example TeX, but inevitably
% not everybody did it.
\RequirePackage{times}
% Use fancyhdr package
\RequirePackage{fancyhdr}
\RequirePackage{xcolor} % changed from color to xcolor (2021/11/24)
\RequirePackage{algorithm}
\RequirePackage{algorithmic}
\RequirePackage{natbib}
\RequirePackage{eso-pic} % used by \AddToShipoutPicture
\RequirePackage{forloop}
\RequirePackage{url}
%%%%%%%% Options
\DeclareOption{accepted}{%
\renewcommand{\Notice@String}{\ICML@appearing}
\gdef\isaccepted{1}
}
\DeclareOption{nohyperref}{%
\gdef\nohyperref{1}
}
%%%%%%%%%%%%%%%%%%%%
% This string is printed at the bottom of the page for the
% final/accepted version of the ``appearing in'' note. Modify it to
% change that text.
%%%%%%%%%%%%%%%%%%%%
\newcommand{\ICML@appearing}{\textit{Proceedings of the
$\mathit{41}^{st}$ International Conference on Machine Learning},
Vienna, Austria. PMLR 235, 2024.
Copyright 2024 by the author(s).}
%%%%%%%%%%%%%%%%%%%%
% This string is printed at the bottom of the page for the draft/under
% review version of the ``appearing in'' note. Modify it to change
% that text.
%%%%%%%%%%%%%%%%%%%%
\newcommand{\Notice@String}{Preliminary work. Under review by the
International Conference on Machine Learning (ICML)\@. Do not distribute.}
% Cause the declared options to actually be parsed and activated
\ProcessOptions\relax
\ifdefined\isaccepted\else\ifdefined\hypersetup
\hypersetup{pdfauthor={Anonymous Authors}}
\fi
\fi
\ifdefined\nohyperref\else\ifdefined\hypersetup
\definecolor{mydarkblue}{rgb}{0,0.08,0.45}
\hypersetup{ %
pdftitle={},
pdfsubject={Proceedings of the International Conference on Machine Learning 2024},
pdfkeywords={},
pdfborder=0 0 0,
pdfpagemode=UseNone,
colorlinks=true,
linkcolor=mydarkblue,
citecolor=mydarkblue,
filecolor=mydarkblue,
urlcolor=mydarkblue,
}
\fi
\fi
% Uncomment the following for debugging. It will cause LaTeX to dump
% the version of the ``appearing in'' string that will actually appear
% in the document.
%\typeout{>> Notice string='\Notice@String'}
% Change citation commands to be more like old ICML styles
\newcommand{\yrcite}[1]{\citeyearpar{#1}}
\renewcommand{\cite}[1]{\citep{#1}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% to ensure the letter format is used. pdflatex does compile the
% page size into the pdf. This is done using \pdfpagewidth and
% \pdfpageheight. As Latex does not know this directives, we first
% check whether pdflatex or latex is used.
%
% Kristian Kersting 2005
%
% in order to account for the more recent use of pdfetex as the default
% compiler, I have changed the pdf verification.
%
% Ricardo Silva 2007
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\paperwidth=8.5in
\paperheight=11in
% old PDFLaTex verification, circa 2005
%
%\newif\ifpdf\ifx\pdfoutput\undefined
% \pdffalse % we are not running PDFLaTeX
%\else
% \pdfoutput=1 % we are running PDFLaTeX
% \pdftrue
%\fi
\newif\ifpdf %adapted from ifpdf.sty
\ifx\pdfoutput\undefined
\else
\ifx\pdfoutput\relax
\else
\ifcase\pdfoutput
\else
\pdftrue
\fi
\fi
\fi
\ifpdf
% \pdfpagewidth=\paperwidth
% \pdfpageheight=\paperheight
\setlength{\pdfpagewidth}{8.5in}
\setlength{\pdfpageheight}{11in}
\fi
% Physical page layout
\evensidemargin -0.23in
\oddsidemargin -0.23in
\setlength\textheight{9.0in}
\setlength\textwidth{6.75in}
\setlength\columnsep{0.25in}
\setlength\headheight{10pt}
\setlength\headsep{10pt}
\addtolength{\topmargin}{-20pt}
\addtolength{\topmargin}{-0.29in}
% Historically many authors tried to include packages like geometry or fullpage,
% which change the page layout. It either makes the proceedings inconsistent, or
% wastes organizers' time chasing authors. So let's nip these problems in the
% bud here. -- Iain Murray 2018.
%\RequirePackage{printlen}
\AtBeginDocument{%
% To get the numbers below, include printlen package above and see lengths like this:
%\printlength\oddsidemargin\\
%\printlength\headheight\\
%\printlength\textheight\\
%\printlength\marginparsep\\
%\printlength\footskip\\
%\printlength\hoffset\\
%\printlength\paperwidth\\
%\printlength\topmargin\\
%\printlength\headsep\\
%\printlength\textwidth\\
%\printlength\marginparwidth\\
%\printlength\marginparpush\\
%\printlength\voffset\\
%\printlength\paperheight\\
%
\newif\ifmarginsmessedwith
\marginsmessedwithfalse
\ifdim\oddsidemargin=-16.62178pt \else oddsidemargin has been altered.\\ \marginsmessedwithtrue\fi
\ifdim\headheight=10.0pt \else headheight has been altered.\\ \marginsmessedwithtrue\fi
\ifdim\textheight=650.43pt \else textheight has been altered.\\ \marginsmessedwithtrue\fi
\ifdim\marginparsep=11.0pt \else marginparsep has been altered.\\ \marginsmessedwithtrue\fi
\ifdim\footskip=25.0pt \else footskip has been altered.\\ \marginsmessedwithtrue\fi
\ifdim\hoffset=0.0pt \else hoffset has been altered.\\ \marginsmessedwithtrue\fi
\ifdim\paperwidth=614.295pt \else paperwidth has been altered.\\ \marginsmessedwithtrue\fi
\ifdim\topmargin=-24.95781pt \else topmargin has been altered.\\ \marginsmessedwithtrue\fi
\ifdim\headsep=10.0pt \else headsep has been altered.\\ \marginsmessedwithtrue\fi
\ifdim\textwidth=487.8225pt \else textwidth has been altered.\\ \marginsmessedwithtrue\fi
%\ifdim\marginparwidth=65.0pt \else marginparwidth has been altered.\\ \marginsmessedwithtrue\fi
\ifdim\marginparpush=5.0pt \else marginparpush has been altered.\\ \marginsmessedwithtrue\fi
\ifdim\voffset=0.0pt \else voffset has been altered.\\ \marginsmessedwithtrue\fi
\ifdim\paperheight=794.96999pt \else paperheight has been altered.\\ \marginsmessedwithtrue\fi
\ifmarginsmessedwith
\textbf{\large \em The page layout violates the ICML style.}
Please do not change the page layout, or include packages like geometry,
savetrees, or fullpage, which change it for you.
We're not able to reliably undo arbitrary changes to the style. Please remove
the offending package(s), or layout-changing commands and try again.
\fi}
%% The following is adapted from code in the acmconf.sty conference
%% style file. The constants in it are somewhat magical, and appear
%% to work well with the two-column format on US letter paper that
%% ICML uses, but will break if you change that layout, or if you use
%% a longer block of text for the copyright notice string. Fiddle with
%% them if necessary to get the block to fit/look right.
%%
%% -- Terran Lane, 2003
%%
%% The following comments are included verbatim from acmconf.sty:
%%
%%% This section (written by KBT) handles the 1" box in the lower left
%%% corner of the left column of the first page by creating a picture,
%%% and inserting the predefined string at the bottom (with a negative
%%% displacement to offset the space allocated for a non-existent
%%% caption).
%%%
\def\ftype@copyrightbox{8}
\def\@copyrightspace{
% Create a float object positioned at the bottom of the column. Note
% that because of the mystical nature of floats, this has to be called
% before the first column is populated with text (e.g., from the title
% or abstract blocks). Otherwise, the text will force the float to
% the next column. -- TDRL.
\@float{copyrightbox}[b]
\begin{center}
\setlength{\unitlength}{1pc}
\begin{picture}(20,1.5)
% Create a line separating the main text from the note block.
% 4.818pc==0.8in.
\put(0,2.5){\line(1,0){4.818}}
% Insert the text string itself. Note that the string has to be
% enclosed in a parbox -- the \put call needs a box object to
% position. Without the parbox, the text gets splattered across the
% bottom of the page semi-randomly. The 19.75pc distance seems to be
% the width of the column, though I can't find an appropriate distance
% variable to substitute here. -- TDRL.
\put(0,0){\parbox[b]{19.75pc}{\small \Notice@String}}
\end{picture}
\end{center}
\end@float}
% Note: A few Latex versions need the next line instead of the former.
% \addtolength{\topmargin}{0.3in}
% \setlength\footheight{0pt}
\setlength\footskip{25.0pt}
%\pagestyle{empty}
\flushbottom \twocolumn
\sloppy
% Clear out the addcontentsline command
\def\addcontentsline#1#2#3{}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% commands for formatting paper title, author names, and addresses.
%%start%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%% title as running head -- Kristian Kersting 2005 %%%%%%%%%%%%%
%\makeatletter
%\newtoks\mytoksa
%\newtoks\mytoksb
%\newcommand\addtomylist[2]{%
% \mytoksa\expandafter{#1}%
% \mytoksb{#2}%
% \edef#1{\the\mytoksa\the\mytoksb}%
%}
%\makeatother
% box to check the size of the running head
\newbox\titrun
% general page style
\pagestyle{fancy}
\fancyhf{}
\fancyhead{}
\fancyfoot{}
\cfoot{\thepage}
% set the width of the head rule to 1 point
\renewcommand{\headrulewidth}{1pt}
% definition to set the head as running head in the preamble
\def\icmltitlerunning#1{\gdef\@icmltitlerunning{#1}}
% main definition adapting \icmltitle from 2004
\long\def\icmltitle#1{%
%check whether @icmltitlerunning exists
% if not \icmltitle is used as running head
\ifx\undefined\@icmltitlerunning%
\gdef\@icmltitlerunning{#1}
\fi
%add it to pdf information
\ifdefined\nohyperref\else\ifdefined\hypersetup
\hypersetup{pdftitle={#1}}
\fi\fi
%get the dimension of the running title
\global\setbox\titrun=\vbox{\small\bf\@icmltitlerunning}
% error flag
\gdef\@runningtitleerror{0}
% running title too long
\ifdim\wd\titrun>\textwidth%
{\gdef\@runningtitleerror{1}}%
% running title breaks a line
\else\ifdim\ht\titrun>6.25pt
{\gdef\@runningtitleerror{2}}%
\fi
\fi
% if there is somthing wrong with the running title
\ifnum\@runningtitleerror>0
\typeout{}%
\typeout{}%
\typeout{*******************************************************}%
\typeout{Title exceeds size limitations for running head.}%
\typeout{Please supply a shorter form for the running head}
\typeout{with \string\icmltitlerunning{...}\space prior to \string\begin{document}}%
\typeout{*******************************************************}%
\typeout{}%
\typeout{}%
% set default running title
\chead{\small\bf Title Suppressed Due to Excessive Size}%
\else
% 'everything' fine, set provided running title
\chead{\small\bf\@icmltitlerunning}%
\fi
% no running title on the first page of the paper
\thispagestyle{plain}
%%%%%%%%%%%%%%%%%%%% Kristian Kersting %%%%%%%%%%%%%%%%%%%%%%%%%
%end%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
{\center\baselineskip 18pt
\toptitlebar{\Large\bf #1}\bottomtitlebar}
}
\gdef\icmlfullauthorlist{}
\newcommand\addstringtofullauthorlist{\g@addto@macro\icmlfullauthorlist}
\newcommand\addtofullauthorlist[1]{%
\ifdefined\icmlanyauthors%
\addstringtofullauthorlist{, #1}%
\else%
\addstringtofullauthorlist{#1}%
\gdef\icmlanyauthors{1}%
\fi%
% \ifdefined\nohyperref\else
\ifdefined\hypersetup%
\hypersetup{pdfauthor=\icmlfullauthorlist}%
\fi%\fi
}
\def\toptitlebar{\hrule height1pt \vskip .25in}
\def\bottomtitlebar{\vskip .22in \hrule height1pt \vskip .3in}
\newenvironment{icmlauthorlist}{%
\setlength\topsep{0pt}
\setlength\parskip{0pt}
\begin{center}
}{%
\end{center}
}
\newcounter{@affiliationcounter}
\newcommand{\@pa}[1]{%
% ``#1''
\ifcsname the@affil#1\endcsname
% do nothing
\else
\ifcsname @icmlsymbol#1\endcsname
% nothing
\else
\stepcounter{@affiliationcounter}%
\newcounter{@affil#1}%
\setcounter{@affil#1}{\value{@affiliationcounter}}%
\fi
\fi%
\ifcsname @icmlsymbol#1\endcsname
\textsuperscript{\csname @icmlsymbol#1\endcsname\,}%
\else
%\expandafter\footnotemark[\arabic{@affil#1}\,]%
\textsuperscript{\arabic{@affil#1}\,}%
\fi
}
%\newcommand{\icmlauthor}[2]{%
%\addtofullauthorlist{#1}%
%#1\@for\theaffil:=#2\do{\pa{\theaffil}}%
%}
\newcommand{\icmlauthor}[2]{%
\ifdefined\isaccepted
\mbox{\bf #1}\,\@for\theaffil:=#2\do{\@pa{\theaffil}} \addtofullauthorlist{#1}%
\else
\ifdefined\@icmlfirsttime
\else
\gdef\@icmlfirsttime{1}
\mbox{\bf Anonymous Authors}\@pa{@anon} \addtofullauthorlist{Anonymous Authors}
\fi
\fi
}
\newcommand{\icmlsetsymbol}[2]{%
\expandafter\gdef\csname @icmlsymbol#1\endcsname{#2}
}
\newcommand{\icmlaffiliation}[2]{%
\ifdefined\isaccepted
\ifcsname the@affil#1\endcsname
\expandafter\gdef\csname @affilname\csname the@affil#1\endcsname\endcsname{#2}%
\else
{\bf AUTHORERR: Error in use of \textbackslash{}icmlaffiliation command. Label ``#1'' not mentioned in some \textbackslash{}icmlauthor\{author name\}\{labels here\} command beforehand. }
\typeout{}%
\typeout{}%
\typeout{*******************************************************}%
\typeout{Affiliation label undefined. }%
\typeout{Make sure \string\icmlaffiliation\space follows }
\typeout{all of \string\icmlauthor\space commands}%
\typeout{*******************************************************}%
\typeout{}%
\typeout{}%
\fi
\else % \isaccepted
% can be called multiple times... it's idempotent
\expandafter\gdef\csname @affilname1\endcsname{Anonymous Institution, Anonymous City, Anonymous Region, Anonymous Country}
\fi
}
\newcommand{\icmlcorrespondingauthor}[2]{
\ifdefined\isaccepted
\ifdefined\icmlcorrespondingauthor@text
\g@addto@macro\icmlcorrespondingauthor@text{, #1 \textless{}#2\textgreater{}}
\else
\gdef\icmlcorrespondingauthor@text{#1 \textless{}#2\textgreater{}}
\fi
\else
\gdef\icmlcorrespondingauthor@text{Anonymous Author \textless{}anon.email@domain.com\textgreater{}}
\fi
}
\newcommand{\icmlEqualContribution}{\textsuperscript{*}Equal contribution }
\newcounter{@affilnum}
\newcommand{\printAffiliationsAndNotice}[1]{%
\stepcounter{@affiliationcounter}%
{\let\thefootnote\relax\footnotetext{\hspace*{-\footnotesep}\ifdefined\isaccepted #1\fi%
\forloop{@affilnum}{1}{\value{@affilnum} < \value{@affiliationcounter}}{
\textsuperscript{\arabic{@affilnum}}\ifcsname @affilname\the@affilnum\endcsname%
\csname @affilname\the@affilnum\endcsname%
\else
{\bf AUTHORERR: Missing \textbackslash{}icmlaffiliation.}
\fi
}.
\ifdefined\icmlcorrespondingauthor@text
Correspondence to: \icmlcorrespondingauthor@text.
\else
{\bf AUTHORERR: Missing \textbackslash{}icmlcorrespondingauthor.}
\fi
\ \\
\Notice@String
}
}
}
%\makeatother
\long\def\icmladdress#1{%
{\bf The \textbackslash{}icmladdress command is no longer used. See the example\_paper PDF .tex for usage of \textbackslash{}icmlauther and \textbackslash{}icmlaffiliation.}
}
%% keywords as first class citizens
\def\icmlkeywords#1{%
% \ifdefined\isaccepted \else
% \par {\bf Keywords:} #1%
% \fi
% \ifdefined\nohyperref\else\ifdefined\hypersetup
% \hypersetup{pdfkeywords={#1}}
% \fi\fi
% \ifdefined\isaccepted \else
% \par {\bf Keywords:} #1%
% \fi
\ifdefined\nohyperref\else\ifdefined\hypersetup
\hypersetup{pdfkeywords={#1}}
\fi\fi
}
% modification to natbib citations
\setcitestyle{authoryear,round,citesep={;},aysep={,},yysep={;}}
% Redefinition of the abstract environment.
\renewenvironment{abstract}
{%
% Insert the ``appearing in'' copyright notice.
%\@copyrightspace
\centerline{\large\bf Abstract}
\vspace{-0.12in}\begin{quote}}
{\par\end{quote}\vskip 0.12in}
% numbered section headings with different treatment of numbers
\def\@startsection#1#2#3#4#5#6{\if@noskipsec \leavevmode \fi
\par \@tempskipa #4\relax
\@afterindenttrue
% Altered the following line to indent a section's first paragraph.
% \ifdim \@tempskipa <\z@ \@tempskipa -\@tempskipa \@afterindentfalse\fi
\ifdim \@tempskipa <\z@ \@tempskipa -\@tempskipa \fi
\if@nobreak \everypar{}\else
\addpenalty{\@secpenalty}\addvspace{\@tempskipa}\fi \@ifstar
{\@ssect{#3}{#4}{#5}{#6}}{\@dblarg{\@sict{#1}{#2}{#3}{#4}{#5}{#6}}}}
\def\@sict#1#2#3#4#5#6[#7]#8{\ifnum #2>\c@secnumdepth
\def\@svsec{}\else
\refstepcounter{#1}\edef\@svsec{\csname the#1\endcsname}\fi
\@tempskipa #5\relax
\ifdim \@tempskipa>\z@
\begingroup #6\relax
\@hangfrom{\hskip #3\relax\@svsec.~}{\interlinepenalty \@M #8\par}
\endgroup
\csname #1mark\endcsname{#7}\addcontentsline
{toc}{#1}{\ifnum #2>\c@secnumdepth \else
\protect\numberline{\csname the#1\endcsname}\fi
#7}\else
\def\@svsechd{#6\hskip #3\@svsec #8\csname #1mark\endcsname
{#7}\addcontentsline
{toc}{#1}{\ifnum #2>\c@secnumdepth \else
\protect\numberline{\csname the#1\endcsname}\fi
#7}}\fi
\@xsect{#5}}
\def\@sect#1#2#3#4#5#6[#7]#8{\ifnum #2>\c@secnumdepth
\def\@svsec{}\else
\refstepcounter{#1}\edef\@svsec{\csname the#1\endcsname\hskip 0.4em }\fi
\@tempskipa #5\relax
\ifdim \@tempskipa>\z@
\begingroup #6\relax
\@hangfrom{\hskip #3\relax\@svsec}{\interlinepenalty \@M #8\par}
\endgroup
\csname #1mark\endcsname{#7}\addcontentsline
{toc}{#1}{\ifnum #2>\c@secnumdepth \else
\protect\numberline{\csname the#1\endcsname}\fi
#7}\else
\def\@svsechd{#6\hskip #3\@svsec #8\csname #1mark\endcsname
{#7}\addcontentsline
{toc}{#1}{\ifnum #2>\c@secnumdepth \else
\protect\numberline{\csname the#1\endcsname}\fi
#7}}\fi
\@xsect{#5}}
% section headings with less space above and below them
\def\thesection {\arabic{section}}
\def\thesubsection {\thesection.\arabic{subsection}}
\def\section{\@startsection{section}{1}{\z@}{-0.12in}{0.02in}
{\large\bf\raggedright}}
\def\subsection{\@startsection{subsection}{2}{\z@}{-0.10in}{0.01in}
{\normalsize\bf\raggedright}}
\def\subsubsection{\@startsection{subsubsection}{3}{\z@}{-0.08in}{0.01in}
{\normalsize\sc\raggedright}}
\def\paragraph{\@startsection{paragraph}{4}{\z@}{1.5ex plus
0.5ex minus .2ex}{-1em}{\normalsize\bf}}
\def\subparagraph{\@startsection{subparagraph}{5}{\z@}{1.5ex plus
0.5ex minus .2ex}{-1em}{\normalsize\bf}}
% Footnotes
\footnotesep 6.65pt %
\skip\footins 9pt
\def\footnoterule{\kern-3pt \hrule width 0.8in \kern 2.6pt }
\setcounter{footnote}{0}
% Lists and paragraphs
\parindent 0pt
\topsep 4pt plus 1pt minus 2pt
\partopsep 1pt plus 0.5pt minus 0.5pt
\itemsep 2pt plus 1pt minus 0.5pt
\parsep 2pt plus 1pt minus 0.5pt
\parskip 6pt
\leftmargin 2em \leftmargini\leftmargin \leftmarginii 2em
\leftmarginiii 1.5em \leftmarginiv 1.0em \leftmarginv .5em
\leftmarginvi .5em
\labelwidth\leftmargini\advance\labelwidth-\labelsep \labelsep 5pt
\def\@listi{\leftmargin\leftmargini}
\def\@listii{\leftmargin\leftmarginii
\labelwidth\leftmarginii\advance\labelwidth-\labelsep
\topsep 2pt plus 1pt minus 0.5pt
\parsep 1pt plus 0.5pt minus 0.5pt
\itemsep \parsep}
\def\@listiii{\leftmargin\leftmarginiii
\labelwidth\leftmarginiii\advance\labelwidth-\labelsep
\topsep 1pt plus 0.5pt minus 0.5pt
\parsep \z@ \partopsep 0.5pt plus 0pt minus 0.5pt
\itemsep \topsep}
\def\@listiv{\leftmargin\leftmarginiv
\labelwidth\leftmarginiv\advance\labelwidth-\labelsep}
\def\@listv{\leftmargin\leftmarginv
\labelwidth\leftmarginv\advance\labelwidth-\labelsep}
\def\@listvi{\leftmargin\leftmarginvi
\labelwidth\leftmarginvi\advance\labelwidth-\labelsep}
\abovedisplayskip 7pt plus2pt minus5pt%
\belowdisplayskip \abovedisplayskip
\abovedisplayshortskip 0pt plus3pt%
\belowdisplayshortskip 4pt plus3pt minus3pt%
% Less leading in most fonts (due to the narrow columns)
% The choices were between 1-pt and 1.5-pt leading
\def\@normalsize{\@setsize\normalsize{11pt}\xpt\@xpt}
\def\small{\@setsize\small{10pt}\ixpt\@ixpt}
\def\footnotesize{\@setsize\footnotesize{10pt}\ixpt\@ixpt}
\def\scriptsize{\@setsize\scriptsize{8pt}\viipt\@viipt}
\def\tiny{\@setsize\tiny{7pt}\vipt\@vipt}
\def\large{\@setsize\large{14pt}\xiipt\@xiipt}
\def\Large{\@setsize\Large{16pt}\xivpt\@xivpt}
\def\LARGE{\@setsize\LARGE{20pt}\xviipt\@xviipt}
\def\huge{\@setsize\huge{23pt}\xxpt\@xxpt}
\def\Huge{\@setsize\Huge{28pt}\xxvpt\@xxvpt}
% Revised formatting for figure captions and table titles.
\newsavebox\newcaptionbox\newdimen\newcaptionboxwid
\long\def\@makecaption#1#2{
\vskip 10pt
\baselineskip 11pt
\setbox\@tempboxa\hbox{#1. #2}
\ifdim \wd\@tempboxa >\hsize
\sbox{\newcaptionbox}{\small\sl #1.~}
\newcaptionboxwid=\wd\newcaptionbox
\usebox\newcaptionbox {\footnotesize #2}
% \usebox\newcaptionbox {\small #2}
\else
\centerline{{\small\sl #1.} {\small #2}}
\fi}
\def\fnum@figure{Figure \thefigure}
\def\fnum@table{Table \thetable}
% Strut macros for skipping spaces above and below text in tables.
\def\abovestrut#1{\rule[0in]{0in}{#1}\ignorespaces}
\def\belowstrut#1{\rule[-#1]{0in}{#1}\ignorespaces}
\def\abovespace{\abovestrut{0.20in}}
\def\aroundspace{\abovestrut{0.20in}\belowstrut{0.10in}}
\def\belowspace{\belowstrut{0.10in}}
% Various personal itemization commands.
\def\texitem#1{\par\noindent\hangindent 12pt
\hbox to 12pt {\hss #1 ~}\ignorespaces}
\def\icmlitem{\texitem{$\bullet$}}
% To comment out multiple lines of text.
\long\def\comment#1{}
%% Line counter (not in final version). Adapted from NIPS style file by Christoph Sawade
% Vertical Ruler
% This code is, largely, from the CVPR 2010 conference style file
% ----- define vruler
\makeatletter
\newbox\icmlrulerbox
\newcount\icmlrulercount
\newdimen\icmlruleroffset
\newdimen\cv@lineheight
\newdimen\cv@boxheight
\newbox\cv@tmpbox
\newcount\cv@refno
\newcount\cv@tot
% NUMBER with left flushed zeros \fillzeros[<WIDTH>]<NUMBER>
\newcount\cv@tmpc@ \newcount\cv@tmpc
\def\fillzeros[#1]#2{\cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi
\cv@tmpc=1 %
\loop\ifnum\cv@tmpc@<10 \else \divide\cv@tmpc@ by 10 \advance\cv@tmpc by 1 \fi
\ifnum\cv@tmpc@=10\relax\cv@tmpc@=11\relax\fi \ifnum\cv@tmpc@>10 \repeat
\ifnum#2<0\advance\cv@tmpc1\relax-\fi
\loop\ifnum\cv@tmpc<#1\relax0\advance\cv@tmpc1\relax\fi \ifnum\cv@tmpc<#1 \repeat
\cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi \relax\the\cv@tmpc@}%
% \makevruler[<SCALE>][<INITIAL_COUNT>][<STEP>][<DIGITS>][<HEIGHT>]
\def\makevruler[#1][#2][#3][#4][#5]{
\begingroup\offinterlineskip
\textheight=#5\vbadness=10000\vfuzz=120ex\overfullrule=0pt%
\global\setbox\icmlrulerbox=\vbox to \textheight{%
{
\parskip=0pt\hfuzz=150em\cv@boxheight=\textheight
\cv@lineheight=#1\global\icmlrulercount=#2%
\cv@tot\cv@boxheight\divide\cv@tot\cv@lineheight\advance\cv@tot2%
\cv@refno1\vskip-\cv@lineheight\vskip1ex%
\loop\setbox\cv@tmpbox=\hbox to0cm{ % side margin
\hfil {\hfil\fillzeros[#4]\icmlrulercount}
}%
\ht\cv@tmpbox\cv@lineheight\dp\cv@tmpbox0pt\box\cv@tmpbox\break
\advance\cv@refno1\global\advance\icmlrulercount#3\relax
\ifnum\cv@refno<\cv@tot\repeat
}
}
\endgroup
}%
\makeatother
% ----- end of vruler
% \makevruler[<SCALE>][<INITIAL_COUNT>][<STEP>][<DIGITS>][<HEIGHT>]
\def\icmlruler#1{\makevruler[12pt][#1][1][3][\textheight]\usebox{\icmlrulerbox}}
\AddToShipoutPicture{%
\icmlruleroffset=\textheight
\advance\icmlruleroffset by 5.2pt % top margin
\color[rgb]{.7,.7,.7}
\ifdefined\isaccepted \else
\AtTextUpperLeft{%
\put(\LenToUnit{-35pt},\LenToUnit{-\icmlruleroffset}){%left ruler
\icmlruler{\icmlrulercount}}
% \put(\LenToUnit{1.04\textwidth},\LenToUnit{-\icmlruleroffset}){%right ruler
% \icmlruler{\icmlrulercount}}
}
\fi
}
\endinput
\resizebox{7cm}{4.4cm}{
\begin{tikzpicture}[smooth]
\node[coordinate] (origin) at (0.3,0) {};
\node[coordinate] (num7) at (3,0) {};
\node[coordinate] (num1) at (1,2.5) {};
\path (num7) ++ (-10:0.5cm) node (num7_bright1) [coordinate] {};
\path (num7) ++ (-30:0.7cm) node (num7_bright2) [coordinate] {};
\path (num7) ++ (-60:0.35cm) node (num7_bright3) [coordinate] {};
\path (num7) ++ (-60:0.6cm) node (num7_bright4) [coordinate] {};
\path (origin) ++ (90:3cm) node (origin_above) [coordinate] {};
\path (origin_above) ++ (0:5.7cm) node (origin_aright) [coordinate] {};
\path (num1) ++ (90:0.5cm) node (num1_a) [coordinate] {};
\path (num1) ++ (-90:0.3cm) node (num1_b) [coordinate] {};
\path (num1) ++ (0:1cm) node (num2) [coordinate] {};
\path (num1_a) ++ (0:1cm) node (num2_a) [coordinate] {};
\path (num1_b) ++ (0:1cm) node (num2_b) [coordinate] {};
\path (num2) ++ (0:1cm) node (num3) [coordinate] {};
\path (num2_a) ++ (0:1cm) node (num3_a) [coordinate] {};
\path (num2_b) ++ (0:1cm) node (num3_b) [coordinate] {};
\path (num3) ++ (0:1cm) node (num4) [coordinate] {};
\path (num3_a) ++ (0:1cm) node (num4_a) [coordinate] {};
\path (num3_b) ++ (0:1cm) node (num4_b) [coordinate] {};
\path (num4) ++ (0:1cm) node (num5) [coordinate] {};
\path (num4_a) ++ (0:1cm) node (num5_a) [coordinate] {};
\path (num4_b) ++ (0:1cm) node (num5_b) [coordinate] {};
\path (num5) ++ (0:1cm) node (num6) [coordinate] {};
\path (num5_a) ++ (0:1cm) node (num6_a) [coordinate] {};
\path (num5_b) ++ (0:1cm) node (num6_b) [coordinate] {};
%\draw[->](0,0) -- (1,1);
%\draw[dashed,line width = 0.03cm] (0,0) -- (1,1);
%\fill (0.5,0.5) circle (0.5);
%\draw[shape=circle,fill=white,draw=black] (a) at (num7) {7};
\draw[dashed,line width = 0.03cm,xshift=3cm] plot[tension=0.06]
coordinates{(num7) (origin) (origin_above) (origin_aright)};
\draw[->,>=stealth,line width = 0.02cm,xshift=3cm] plot[tension=0.5]
coordinates{(num7) (num7_bright1) (num7_bright2)(num7_bright4) (num7_bright3)};
\node[line width = 0.02cm,shape=circle,fill=white,draw=black] (g) at (num7) {7};
\draw[<->,>=stealth,dashed,line width = 0.03cm,] (num1) -- (num1_a) ;
\node[line width = 0.02cm,shape=circle,fill=white,draw=black] (a) at (num1_b) {1};
\draw[<->,>=stealth,dashed,line width = 0.03cm,] (num2) -- (num2_a) ;
\node[line width = 0.02cm,shape=circle,fill=white,draw=black] (b) at (num2_b) {2};
\draw[<->,>=stealth,dashed,line width = 0.03cm,] (num3) -- (num3_a) ;
\node[line width = 0.02cm,shape=circle,fill=white,draw=black] (c) at (num3_b) {3};
\draw[<->,>=stealth,dashed,line width = 0.03cm,] (num4) -- (num4_a) ;
\node[line width = 0.02cm,shape=circle,fill=white,draw=black] (d) at (num4_b) {4};
\draw[<->,>=stealth,dashed,line width = 0.03cm,] (num5) -- (num5_a) ;
\node[line width = 0.02cm,shape=circle,fill=white,draw=black] (e) at (num5_b) {5};
\draw[<->,>=stealth,dashed,line width = 0.03cm,] (num6) -- (num6_a) ;
\node[line width = 0.02cm,shape=circle,fill=white,draw=black] (f) at (num6_b) {6};
\draw[->,>=stealth,line width = 0.02cm] (a)--(g);
\draw[->,>=stealth,line width = 0.02cm] (b)--(g);
\draw[->,>=stealth,line width = 0.02cm] (c)--(g);
\draw[->,>=stealth,line width = 0.02cm] (d)--(g);
\draw[->,>=stealth,line width = 0.02cm] (e)--(g);
\draw[->,>=stealth,line width = 0.02cm] (f)--(g);
\end{tikzpicture}
}
% \tikzstyle{int}=[draw, fill=blue!20, minimum size=2em]
% \tikzstyle{block}=[draw, fill=gray, minimum size=1.5em]
% \tikzstyle{init} = [pin edge={to-,thin,black}]
% \resizebox{8cm}{1.2cm}{
% \begin{tikzpicture}[node distance=1.5cm,auto,>=latex']
% \node [block] (o) {};
% \node (p) [left of=o,node distance=0.5cm, coordinate] {o};
% \node [shape=circle,int] (a) [right of=o]{$A$};
% \node (b) [left of=a,node distance=1.5cm, coordinate] {a};
% \node [shape=circle,int] (c) [right of=a] {$B$};
% \node (d) [left of=c,node distance=1.5cm, coordinate] {c};
% \node [shape=circle,int, pin={[init]above:$$}] (e) [right of=c]{$C$};
% \node (f) [left of=e,node distance=1.5cm, coordinate] {e};
% \node [shape=circle,int] (g) [right of=e] {$D$};
% \node (h) [left of=g,node distance=1.5cm, coordinate] {g};
% \node [shape=circle,int] (i) [right of=g] {$E$};
% \node (j) [left of=i,node distance=1.5cm, coordinate] {i};
% \node [block] (k) [right of=i] {};
% \node (l) [left of=k,node distance=0.5cm, coordinate] {k};
% \path[<-] (o) edge node {$0$} (a);
% \path[<->] (a) edge node {$0$} (c);
% \path[<->] (c) edge node {$0$} (e);
% \path[<->] (e) edge node {$0$} (g);
% \path[<->] (g) edge node {$0$} (i);
% \draw[->] (i) edge node {$1$} (k);
% \end{tikzpicture}
% }
\tikzstyle{int}=[draw, fill=blue!20, minimum size=2em]
\tikzstyle{block}=[draw, fill=gray, minimum size=1.5em]
\tikzstyle{init} = [pin edge={to-,thin,black}]
\resizebox{8cm}{1.5cm}{
\begin{tikzpicture}[node distance=1.5cm, auto, >=latex]
\node [block] (o) {};
\node (p) [left of=o, node distance=0.5cm, coordinate] {o};
\node [shape=circle, int] (a) [right of=o] {$A$};
\node (b) [left of=a, node distance=1.5cm, coordinate] {a};
\node [shape=circle, int] (c) [right of=a] {$B$};
\node (d) [left of=c, node distance=1.5cm, coordinate] {c};
\node [shape=circle, int, pin={[init]above:$ $}] (e) [right of=c] {$C$};
\node (f) [left of=e, node distance=1.5cm, coordinate] {e};
\node [shape=circle, int] (g) [right of=e] {$D$};
\node (h) [left of=g, node distance=1.5cm, coordinate] {g};
\node [shape=circle, int] (i) [right of=g] {$E$};
\node (j) [left of=i, node distance=1.5cm, coordinate] {i};
\node [block] (k) [right of=i] {};
\node (l) [left of=k, node distance=0.5cm, coordinate] {k};
\path[->] (o) edge node {$0$} (a);
\path[<->] (a) edge node {$0$} (c);
\path[<->] (c) edge node {$0$} (e);
\path[<->] (e) edge node {$0$} (g);
\path[<->] (g) edge node {$0$} (i);
\draw[->] (i) edge node {$1$} (k);
\end{tikzpicture}
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment