\relax 
\providecommand\hyper@newdestlabel[2]{}
\providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
\HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
\global\let\oldnewlabel\newlabel
\gdef\newlabel#1#2{\newlabelxx{#1}#2}
\gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
\AtEndDocument{\ifx\hyper@anchor\@undefined
\let\newlabel\oldnewlabel
\fi}
\fi}
\global\let\hyper@last\relax 
\gdef\HyperFirstAtBeginDocument#1{#1}
\providecommand\HyField@AuxAddToFields[1]{}
\providecommand\HyField@AuxAddToCoFields[2]{}
\citation{sutton1988learning}
\citation{tsitsiklis1997analysis}
\citation{Sutton2018book}
\citation{baird1995residual}
\citation{sutton2008convergent}
\citation{sutton2009fast}
\citation{sutton2016emphatic}
\citation{chen2023modified}
\citation{hackman2012faster}
\citation{liu2015finite,liu2016proximal,liu2018proximal}
\citation{givchi2015quasi}
\citation{pan2017accelerated}
\citation{hallak2016generalized}
\citation{zhang2022truncated}
\citation{johnson2013accelerating}
\citation{korda2015td}
\citation{xu2019reanalysis}
\citation{Sutton2018book}
\citation{baird1995residual}
\citation{sutton2009fast}
\citation{sutton2009fast}
\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}{section.1}\protected@file@percent }
\newlabel{introduction}{{1}{1}{Introduction}{section.1}{}}
\citation{feng2019kernel}
\citation{basserrano2021logistic}
\citation{Sutton2018book}
\citation{Sutton2018book}
\@writefile{toc}{\contentsline {section}{\numberline {2}Background}{2}{section.2}\protected@file@percent }
\newlabel{preliminaries}{{2}{2}{Background}{section.2}{}}
\newlabel{valuefunction}{{2}{2}{Background}{section.2}{}}
\newlabel{linearvaluefunction}{{1}{2}{Background}{equation.2.1}{}}
\citation{sutton2009fast}
\citation{sutton2009fast}
\citation{ng1999policy}
\citation{devlin2012dynamic}
\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Classification accuracies for naive Bayes and flexible Bayes on various data sets.}}{3}{table.1}\protected@file@percent }
\newlabel{example_bias}{{1}{3}{Classification accuracies for naive Bayes and flexible Bayes on various data sets}{table.1}{}}
\@writefile{toc}{\contentsline {section}{\numberline {3}Variance Minimization Algorithms}{3}{section.3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Motivation}{3}{subsection.3.1}\protected@file@percent }
\@writefile{loa}{\contentsline {algorithm}{\numberline {1}{\ignorespaces VMTD algorithm with linear function approximation in the on-policy setting}}{4}{algorithm.1}\protected@file@percent }
\newlabel{alg:algorithm 1}{{1}{4}{Variance Minimization TD Learning: VMTD}{algorithm.1}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.2}Variance Minimization TD Learning: VMTD}{4}{subsection.3.2}\protected@file@percent }
\newlabel{omega}{{3}{4}{Variance Minimization TD Learning: VMTD}{equation.3.3}{}}
\newlabel{delta}{{4}{4}{Variance Minimization TD Learning: VMTD}{equation.3.4}{}}
\newlabel{theta}{{5}{4}{Variance Minimization TD Learning: VMTD}{equation.3.5}{}}
\newlabel{deltaSarsa}{{8}{4}{Variance Minimization TD Learning: VMTD}{equation.3.8}{}}
\newlabel{deltaQ}{{9}{4}{Variance Minimization TD Learning: VMTD}{equation.3.9}{}}
\citation{dalal2020tale}
\citation{dalal2020tale}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.3}Variance Minimization TDC Learning: VMTDC}{5}{subsection.3.3}\protected@file@percent }
\newlabel{thetavmtdc}{{11}{5}{Variance Minimization TDC Learning: VMTDC}{equation.3.11}{}}
\newlabel{uvmtdc}{{12}{5}{Variance Minimization TDC Learning: VMTDC}{equation.3.12}{}}
\newlabel{omegavmtdc}{{13}{5}{Variance Minimization TDC Learning: VMTDC}{equation.3.13}{}}
\@writefile{toc}{\contentsline {section}{\numberline {4}Theoretical Analysis}{5}{section.4}\protected@file@percent }
\newlabel{theorem1}{{4.1}{5}{}{theorem.4.1}{}}
\newlabel{corollary4_2}{{4.2}{5}{}{theorem.4.2}{}}
\citation{Sutton2018book}
\citation{sutton2009fast}
\citation{baird1995residual,sutton2009fast}
\citation{baird1995residual,sutton2009fast,maei2011gradient}
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Random walk.}}{6}{figure.1}\protected@file@percent }
\newlabel{randomwalk}{{1}{6}{Random walk}{figure.1}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces 7-state version of Baird's off-policy counterexample.}}{6}{figure.2}\protected@file@percent }
\newlabel{bairdexample}{{2}{6}{7-state version of Baird's off-policy counterexample}{figure.2}{}}
\newlabel{theorem2}{{4.3}{6}{}{theorem.4.3}{}}
\@writefile{toc}{\contentsline {section}{\numberline {5}Experimental Studies}{6}{section.5}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {5.1}Testing Tasks}{6}{subsection.5.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {5.2}Experimental Results and Analysis}{7}{subsection.5.2}\protected@file@percent }
\newlabel{DependentFull}{{3(a)}{7}{Subfigure 3(a)}{subfigure.3.1}{}}
\newlabel{sub@DependentFull}{{(a)}{7}{Subfigure 3(a)\relax }{subfigure.3.1}{}}
\newlabel{TabularFull}{{3(b)}{7}{Subfigure 3(b)}{subfigure.3.2}{}}
\newlabel{sub@TabularFull}{{(b)}{7}{Subfigure 3(b)\relax }{subfigure.3.2}{}}
\newlabel{InvertedFull}{{3(c)}{7}{Subfigure 3(c)}{subfigure.3.3}{}}
\newlabel{sub@InvertedFull}{{(c)}{7}{Subfigure 3(c)\relax }{subfigure.3.3}{}}
\newlabel{CounterExampleFull}{{3(d)}{7}{Subfigure 3(d)}{subfigure.3.4}{}}
\newlabel{sub@CounterExampleFull}{{(d)}{7}{Subfigure 3(d)\relax }{subfigure.3.4}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Learning curses of four evaluation environments.}}{7}{figure.3}\protected@file@percent }
\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Dependent}}}{7}{figure.3}\protected@file@percent }
\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Tabular}}}{7}{figure.3}\protected@file@percent }
\@writefile{lof}{\contentsline {subfigure}{\numberline{(c)}{\ignorespaces {Inverted}}}{7}{figure.3}\protected@file@percent }
\@writefile{lof}{\contentsline {subfigure}{\numberline{(d)}{\ignorespaces {counterexample}}}{7}{figure.3}\protected@file@percent }
\newlabel{Evaluation_full}{{3}{7}{Learning curses of four evaluation environments}{figure.3}{}}
\citation{schwartz1993reinforcement}
\newlabel{MazeFull}{{4(a)}{8}{Subfigure 4(a)}{subfigure.4.1}{}}
\newlabel{sub@MazeFull}{{(a)}{8}{Subfigure 4(a)\relax }{subfigure.4.1}{}}
\newlabel{CliffWalkingFull}{{4(b)}{8}{Subfigure 4(b)}{subfigure.4.2}{}}
\newlabel{sub@CliffWalkingFull}{{(b)}{8}{Subfigure 4(b)\relax }{subfigure.4.2}{}}
\newlabel{MountainCarFull}{{4(c)}{8}{Subfigure 4(c)}{subfigure.4.3}{}}
\newlabel{sub@MountainCarFull}{{(c)}{8}{Subfigure 4(c)\relax }{subfigure.4.3}{}}
\newlabel{AcrobotFull}{{4(d)}{8}{Subfigure 4(d)}{subfigure.4.4}{}}
\newlabel{sub@AcrobotFull}{{(d)}{8}{Subfigure 4(d)\relax }{subfigure.4.4}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Learning curses of four contral environments.}}{8}{figure.4}\protected@file@percent }
\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Maze}}}{8}{figure.4}\protected@file@percent }
\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Cliff Walking}}}{8}{figure.4}\protected@file@percent }
\@writefile{lof}{\contentsline {subfigure}{\numberline{(c)}{\ignorespaces {Mountain Car}}}{8}{figure.4}\protected@file@percent }
\@writefile{lof}{\contentsline {subfigure}{\numberline{(d)}{\ignorespaces {Acrobot}}}{8}{figure.4}\protected@file@percent }
\newlabel{Complete_full}{{4}{8}{Learning curses of four contral environments}{figure.4}{}}
\@writefile{toc}{\contentsline {section}{\numberline {6}Related Work}{8}{section.6}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {6.1}Difference between VMQ and R-learning}{8}{subsection.6.1}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Difference between R-learning and tabular VMQ.}}{8}{table.2}\protected@file@percent }
\newlabel{differenceRandVMQ}{{2}{8}{Difference between R-learning and tabular VMQ}{table.2}{}}
\citation{korda2015td}
\citation{xu2020reanalysis}
\citation{Sutton2018book}
\citation{Sutton2018book}
\citation{schulman2015trust}
\citation{schulman2017proximal}
\citation{borkar1997stochastic}
\@writefile{toc}{\contentsline {subsection}{\numberline {6.2}Variance Reduction for TD Learning}{9}{subsection.6.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {6.3}Variance Reduction for Policy Gradient Algorithms}{9}{subsection.6.3}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {7}Conclusion and Future Work}{9}{section.7}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {A}Relevant proofs}{9}{appendix.A}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {A.1}Proof of Theorem \ref {theorem1}}{9}{subsection.A.1}\protected@file@percent }
\newlabel{proofth1}{{A.1}{9}{Proof of Theorem \ref {theorem1}}{subsection.A.1}{}}
\newlabel{th1proof}{{A.1}{9}{Proof of Theorem \ref {theorem1}}{subsection.A.1}{}}
\citation{hirsch1989convergent}
\citation{borkar2000ode}
\citation{borkar2000ode}
\citation{borkar2000ode}
\newlabel{thetaFast}{{19}{10}{Proof of Theorem \ref {theorem1}}{equation.A.19}{}}
\newlabel{omegaFast}{{20}{10}{Proof of Theorem \ref {theorem1}}{equation.A.20}{}}
\newlabel{omegaFastFinal}{{21}{10}{Proof of Theorem \ref {theorem1}}{equation.A.21}{}}
\newlabel{omegaInfty}{{22}{10}{Proof of Theorem \ref {theorem1}}{equation.A.22}{}}
\newlabel{odetheta}{{23}{10}{Proof of Theorem \ref {theorem1}}{equation.A.23}{}}
\citation{dalal2020tale}
\citation{dalal2020tale}
\newlabel{covariance}{{24}{11}{Proof of Theorem \ref {theorem1}}{equation.A.24}{}}
\newlabel{odethetafinal}{{25}{11}{Proof of Theorem \ref {theorem1}}{equation.A.25}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {A.2}Proof of Corollary \ref {corollary4_2}}{11}{subsection.A.2}\protected@file@percent }
\newlabel{proofcorollary4_2}{{A.2}{11}{Proof of Corollary \ref {corollary4_2}}{subsection.A.2}{}}
\newlabel{matrixassumption}{{A.1}{11}{}{theorem.A.1}{}}
\newlabel{stepsizeassumption}{{A.2}{11}{}{theorem.A.2}{}}
\newlabel{sparseprojection}{{A.3}{11}{}{theorem.A.3}{}}
\citation{dalal2020tale}
\citation{dalal2020tale}
\citation{sutton2009fast}
\citation{hirsch1989convergent}
\newlabel{sparseprojectiontheta}{{30}{12}{}{equation.A.30}{}}
\newlabel{sparseprojectionomega}{{31}{12}{}{equation.A.31}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {A.3}Proof of Theorem \ref {theorem2}}{12}{subsection.A.3}\protected@file@percent }
\newlabel{proofth2}{{A.3}{12}{Proof of Theorem \ref {theorem2}}{subsection.A.3}{}}
\newlabel{thetavmtdcFastest}{{32}{12}{Proof of Theorem \ref {theorem2}}{equation.A.32}{}}
\newlabel{uvmtdcFastest}{{33}{12}{Proof of Theorem \ref {theorem2}}{equation.A.33}{}}
\newlabel{omegavmtdcFastest}{{34}{12}{Proof of Theorem \ref {theorem2}}{equation.A.34}{}}
\citation{borkar2000ode}
\citation{borkar2000ode}
\citation{borkar2000ode}
\citation{hirsch1989convergent}
\citation{borkar2000ode}
\citation{borkar2000ode}
\citation{borkar2000ode}
\newlabel{omegavmtdcFastestFinal}{{35}{13}{Proof of Theorem \ref {theorem2}}{equation.A.35}{}}
\newlabel{omegavmtdcInfty}{{36}{13}{Proof of Theorem \ref {theorem2}}{equation.A.36}{}}
\newlabel{thetavmtdcFaster}{{37}{13}{Proof of Theorem \ref {theorem2}}{equation.A.37}{}}
\newlabel{uvmtdcFaster}{{38}{13}{Proof of Theorem \ref {theorem2}}{equation.A.38}{}}
\newlabel{uvmtdcFasterFinal}{{39}{13}{Proof of Theorem \ref {theorem2}}{equation.A.39}{}}
\newlabel{uvmtdcInfty}{{40}{13}{Proof of Theorem \ref {theorem2}}{equation.A.40}{}}
\newlabel{thetavmtdcSlowerFinal}{{42}{14}{Proof of Theorem \ref {theorem2}}{equation.A.42}{}}
\newlabel{odethetavmtdcfinal}{{43}{14}{Proof of Theorem \ref {theorem2}}{equation.A.43}{}}
\@writefile{toc}{\contentsline {section}{\numberline {B}Experimental details}{14}{appendix.B}\protected@file@percent }
\newlabel{experimentaldetails}{{B}{14}{Experimental details}{appendix.B}{}}
\@writefile{loa}{\contentsline {algorithm}{\numberline {2}{\ignorespaces VMTDC algorithm with linear function approximation in the off-policy setting}}{15}{algorithm.2}\protected@file@percent }
\newlabel{alg:algorithm 2}{{2}{15}{Proof of Theorem \ref {theorem2}}{algorithm.2}{}}
\@writefile{loa}{\contentsline {algorithm}{\numberline {3}{\ignorespaces VMGTD algorithm with linear function approximation in the off-policy setting}}{15}{algorithm.3}\protected@file@percent }
\newlabel{alg:algorithm 3}{{3}{15}{Proof of Theorem \ref {theorem2}}{algorithm.3}{}}
\bibstyle{named}
\bibdata{neurips_2024}
\bibcite{baird1995residual}{{1}{1995}{{Baird and others}}{{}}}
\bibcite{basserrano2021logistic}{{2}{2021}{{Bas-Serrano \bgroup \em  et al.\egroup }}{{}}}
\@writefile{loa}{\contentsline {algorithm}{\numberline {4}{\ignorespaces VMGTD2 algorithm with linear function approximation in the off-policy setting}}{16}{algorithm.4}\protected@file@percent }
\newlabel{alg:algorithm 4}{{4}{16}{Proof of Theorem \ref {theorem2}}{algorithm.4}{}}
\@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces Learning rates ($lr$) of four control experiments.}}{16}{table.3}\protected@file@percent }
\newlabel{lrofways}{{3}{16}{Learning rates ($lr$) of four control experiments}{table.3}{}}
\bibcite{borkar2000ode}{{3}{2000}{{Borkar and Meyn}}{{}}}
\bibcite{borkar1997stochastic}{{4}{1997}{{Borkar}}{{}}}
\bibcite{chen2023modified}{{5}{2023}{{Chen \bgroup \em  et al.\egroup }}{{}}}
\bibcite{dalal2020tale}{{6}{2020}{{Dalal \bgroup \em  et al.\egroup }}{{}}}
\bibcite{devlin2012dynamic}{{7}{2012}{{Devlin and Kudenko}}{{}}}
\bibcite{feng2019kernel}{{8}{2019}{{Feng \bgroup \em  et al.\egroup }}{{}}}
\bibcite{givchi2015quasi}{{9}{2015}{{Givchi and Palhang}}{{}}}
\bibcite{hackman2012faster}{{10}{2012}{{Hackman}}{{}}}
\bibcite{hallak2016generalized}{{11}{2016}{{Hallak \bgroup \em  et al.\egroup }}{{}}}
\bibcite{hirsch1989convergent}{{12}{1989}{{Hirsch}}{{}}}
\bibcite{johnson2013accelerating}{{13}{2013}{{Johnson and Zhang}}{{}}}
\bibcite{korda2015td}{{14}{2015}{{Korda and La}}{{}}}
\bibcite{liu2015finite}{{15}{2015}{{Liu \bgroup \em  et al.\egroup }}{{}}}
\bibcite{liu2016proximal}{{16}{2016}{{Liu \bgroup \em  et al.\egroup }}{{}}}
\bibcite{liu2018proximal}{{17}{2018}{{Liu \bgroup \em  et al.\egroup }}{{}}}
\bibcite{maei2011gradient}{{18}{2011}{{Maei}}{{}}}
\bibcite{ng1999policy}{{19}{1999}{{Ng \bgroup \em  et al.\egroup }}{{}}}
\bibcite{pan2017accelerated}{{20}{2017}{{Pan \bgroup \em  et al.\egroup }}{{}}}
\bibcite{schulman2015trust}{{21}{2015}{{Schulman \bgroup \em  et al.\egroup }}{{}}}
\bibcite{schulman2017proximal}{{22}{2017}{{Schulman \bgroup \em  et al.\egroup }}{{}}}
\bibcite{schwartz1993reinforcement}{{23}{1993}{{Schwartz}}{{}}}
\bibcite{Sutton2018book}{{24}{2018}{{Sutton and Barto}}{{}}}
\bibcite{sutton2008convergent}{{25}{2008}{{Sutton \bgroup \em  et al.\egroup }}{{}}}
\bibcite{sutton2009fast}{{26}{2009}{{Sutton \bgroup \em  et al.\egroup }}{{}}}
\bibcite{sutton2016emphatic}{{27}{2016}{{Sutton \bgroup \em  et al.\egroup }}{{}}}
\bibcite{sutton1988learning}{{28}{1988}{{Sutton}}{{}}}
\bibcite{tsitsiklis1997analysis}{{29}{1997}{{Tsitsiklis and Van~Roy}}{{}}}
\bibcite{xu2019reanalysis}{{30}{2019}{{Xu \bgroup \em  et al.\egroup }}{{}}}
\bibcite{xu2020reanalysis}{{31}{2020}{{Xu \bgroup \em  et al.\egroup }}{{}}}
\bibcite{zhang2022truncated}{{32}{2022}{{Zhang and Whiteson}}{{}}}
\gdef \@abspage@last{18}