\relax \bibstyle{aaai25} \citation{sutton1988learning} \citation{tsitsiklis1997analysis} \citation{Sutton2018book} \citation{baird1995residual} \citation{sutton2008convergent} \citation{sutton2009fast} \citation{sutton2016emphatic} \citation{chen2023modified} \citation{hackman2012faster} \citation{liu2015finite,liu2016proximal,liu2018proximal} \citation{givchi2015quasi} \citation{pan2017accelerated} \citation{hallak2016generalized} \citation{zhang2022truncated} \citation{johnson2013accelerating} \citation{korda2015td} \citation{xu2019reanalysis} \citation{Sutton2018book} \citation{baird1995residual} \citation{sutton2009fast} \citation{sutton2009fast} \citation{feng2019kernel} \citation{basserrano2021logistic} \newlabel{introduction}{{}{1}} \citation{Sutton2018book} \citation{Sutton2018book} \citation{sutton2016emphatic} \newlabel{preliminaries}{{}{2}} \newlabel{valuefunction}{{}{2}} \newlabel{linearvaluefunction}{{1}{2}} \newlabel{thetatd_onpolicy}{{}{2}} \newlabel{thetatd_offpolicy}{{}{2}} \newlabel{thetatdc}{{}{3}} \newlabel{utdc}{{}{3}} \newlabel{fvmetd}{{2}{3}} \newlabel{thetaetd}{{}{3}} \providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}} \newlabel{alg:algorithm 2}{{1}{3}} \newlabel{alg:algorithm 5}{{2}{4}} \newlabel{thetavmtdc}{{5}{4}} \newlabel{uvmtdc}{{6}{4}} \newlabel{omegavmtdc}{{7}{4}} \newlabel{rho_VPBE}{{8}{4}} \newlabel{thetavmetd}{{12}{4}} \newlabel{omegavmetd}{{13}{4}} \citation{sutton2009fast} \citation{hirsch1989convergent} \newlabel{theorem2}{{1}{5}} \newlabel{thetavmtdcFastest}{{14}{5}} \newlabel{uvmtdcFastest}{{15}{5}} \newlabel{omegavmtdcFastest}{{16}{5}} \newlabel{omegavmtdcFastestFinal}{{17}{5}} \newlabel{omegavmtdcInfty}{{18}{5}} \citation{borkar2000ode} \citation{borkar2000ode} \citation{borkar2000ode} \citation{borkar1997stochastic} \citation{ng1999policy} \citation{devlin2012dynamic} \newlabel{theorem3}{{2}{6}} \newlabel{rowsum}{{19}{6}} \newlabel{example_bias}{{2}{6}} \newlabel{columnsum}{{20}{6}} \bibdata{aaai25} \bibcite{baird1995residual}{{1}{1995}{{Baird et~al.}}{{}}} \newlabel{2-state}{{1(a)}{7}} \newlabel{sub@2-state}{{(a)}{7}} \newlabel{7-state}{{1(b)}{7}} \newlabel{sub@7-state}{{(b)}{7}} \newlabel{MazeFull}{{1(c)}{7}} \newlabel{sub@MazeFull}{{(c)}{7}} \newlabel{CliffWalkingFull}{{1(d)}{7}} \newlabel{sub@CliffWalkingFull}{{(d)}{7}} \newlabel{MountainCarFull}{{1(e)}{7}} \newlabel{sub@MountainCarFull}{{(e)}{7}} \newlabel{AcrobotFull}{{1(f)}{7}} \newlabel{sub@AcrobotFull}{{(f)}{7}} \newlabel{Complete_full}{{1}{7}} \bibcite{basserrano2021logistic}{{2}{2021}{{Bas-Serrano et~al.}}{{Bas-Serrano, Curi, Krause, and Neu}}} \bibcite{borkar1997stochastic}{{3}{1997}{{Borkar}}{{}}} \bibcite{borkar2000ode}{{4}{2000}{{Borkar and Meyn}}{{}}} \bibcite{chen2023modified}{{5}{2023}{{Chen et~al.}}{{Chen, Ma, Li, Yang, Yang, and Gao}}} \bibcite{devlin2012dynamic}{{6}{2012}{{Devlin and Kudenko}}{{}}} \bibcite{feng2019kernel}{{7}{2019}{{Feng, Li, and Liu}}{{}}} \bibcite{givchi2015quasi}{{8}{2015}{{Givchi and Palhang}}{{}}} \bibcite{hackman2012faster}{{9}{2012}{{Hackman}}{{}}} \bibcite{hallak2016generalized}{{10}{2016}{{Hallak et~al.}}{{Hallak, Tamar, Munos, and Mannor}}} \bibcite{hirsch1989convergent}{{11}{1989}{{Hirsch}}{{}}} \bibcite{johnson2013accelerating}{{12}{2013}{{Johnson and Zhang}}{{}}} \bibcite{korda2015td}{{13}{2015}{{Korda and La}}{{}}} \bibcite{liu2018proximal}{{14}{2018}{{Liu et~al.}}{{Liu, Gemp, Ghavamzadeh, Liu, Mahadevan, and Petrik}}} \bibcite{liu2015finite}{{15}{2015}{{Liu et~al.}}{{Liu, Liu, Ghavamzadeh, Mahadevan, and Petrik}}} \bibcite{liu2016proximal}{{16}{2016}{{Liu et~al.}}{{Liu, Liu, Ghavamzadeh, Mahadevan, and Petrik}}} \bibcite{ng1999policy}{{17}{1999}{{Ng, Harada, and Russell}}{{}}} \bibcite{pan2017accelerated}{{18}{2017}{{Pan, White, and White}}{{}}} \bibcite{sutton2009fast}{{19}{2009}{{Sutton et~al.}}{{Sutton, Maei, Precup, Bhatnagar, Silver, Szepesv{\'a}ri, and Wiewiora}}} \bibcite{sutton1988learning}{{20}{1988}{{Sutton}}{{}}} \bibcite{Sutton2018book}{{21}{2018}{{Sutton and Barto}}{{}}} \bibcite{sutton2008convergent}{{22}{2008}{{Sutton, Maei, and Szepesv{\'a}ri}}{{}}} \bibcite{sutton2016emphatic}{{23}{2016}{{Sutton, Mahmood, and White}}{{}}} \bibcite{tsitsiklis1997analysis}{{24}{1997}{{Tsitsiklis and Van~Roy}}{{}}} \bibcite{xu2019reanalysis}{{25}{2019}{{Xu et~al.}}{{Xu, Wang, Zhou, and Liang}}} \bibcite{zhang2022truncated}{{26}{2022}{{Zhang and Whiteson}}{{}}} \gdef \@abspage@last{8}