\relax \bibstyle{aaai25} \citation{sutton1988learning} \citation{tsitsiklis1997analysis} \citation{Sutton2018book} \citation{baird1995residual} \citation{sutton2008convergent} \citation{sutton2009fast} \citation{sutton2016emphatic} \citation{chen2023modified} \citation{hackman2012faster} \citation{liu2015finite,liu2016proximal,liu2018proximal} \citation{givchi2015quasi} \citation{pan2017accelerated} \citation{hallak2016generalized} \citation{zhang2022truncated} \citation{johnson2013accelerating} \citation{korda2015td} \citation{xu2019reanalysis} \citation{Sutton2018book} \citation{baird1995residual} \citation{sutton2009fast} \citation{sutton2009fast} \citation{feng2019kernel} \citation{basserrano2021logistic} \newlabel{introduction}{{}{1}} \citation{Sutton2018book} \citation{Sutton2018book} \citation{ng1999policy} \citation{devlin2012dynamic} \newlabel{preliminaries}{{}{2}} \newlabel{valuefunction}{{}{2}} \newlabel{linearvaluefunction}{{1}{2}} \providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}} \newlabel{example_bias}{{1}{2}} \newlabel{alg:algorithm 1}{{1}{3}} \newlabel{omega}{{3}{3}} \newlabel{delta}{{4}{3}} \newlabel{theta}{{5}{3}} \newlabel{deltaSarsa}{{8}{3}} \newlabel{deltaQ}{{9}{3}} \newlabel{alg:algorithm 2}{{2}{3}} \newlabel{thetavmtdc}{{11}{3}} \newlabel{uvmtdc}{{12}{3}} \newlabel{omegavmtdc}{{13}{3}} \newlabel{fvmetd}{{18}{3}} \newlabel{thetavmetd}{{19}{3}} \newlabel{omegavmetd}{{20}{3}} \citation{borkar1997stochastic} \citation{hirsch1989convergent} \citation{borkar2000ode} \citation{borkar2000ode} \citation{borkar2000ode} \newlabel{alg:algorithm 5}{{3}{4}} \newlabel{theorem1}{{1}{4}} \newlabel{th1proof}{{}{4}} \newlabel{thetaFast}{{22}{4}} \newlabel{omegaFast}{{23}{4}} \newlabel{omegaFastFinal}{{24}{4}} \newlabel{omegaInfty}{{25}{4}} \citation{Sutton2018book} \citation{sutton2009fast} \citation{baird1995residual,sutton2009fast} \newlabel{odetheta}{{26}{5}} \newlabel{covariance}{{27}{5}} \newlabel{odethetafinal}{{28}{5}} \newlabel{theorem2}{{2}{5}} \newlabel{randomwalk}{{1}{5}} \newlabel{bairdexample}{{2}{5}} \newlabel{theorem3}{{3}{5}} \citation{schwartz1993reinforcement} \citation{korda2015td} \citation{xu2020reanalysis} \citation{Sutton2018book} \citation{Sutton2018book} \citation{schulman2015trust} \citation{schulman2017proximal} \bibdata{aaai25} \bibcite{baird1995residual}{{1}{1995}{{Baird et~al.}}{{}}} \bibcite{basserrano2021logistic}{{2}{2021}{{Bas-Serrano et~al.}}{{Bas-Serrano, Curi, Krause, and Neu}}} \bibcite{borkar1997stochastic}{{3}{1997}{{Borkar}}{{}}} \bibcite{borkar2000ode}{{4}{2000}{{Borkar and Meyn}}{{}}} \bibcite{chen2023modified}{{5}{2023}{{Chen et~al.}}{{Chen, Ma, Li, Yang, Yang, and Gao}}} \bibcite{devlin2012dynamic}{{6}{2012}{{Devlin and Kudenko}}{{}}} \bibcite{feng2019kernel}{{7}{2019}{{Feng, Li, and Liu}}{{}}} \bibcite{givchi2015quasi}{{8}{2015}{{Givchi and Palhang}}{{}}} \bibcite{hackman2012faster}{{9}{2012}{{Hackman}}{{}}} \bibcite{hallak2016generalized}{{10}{2016}{{Hallak et~al.}}{{Hallak, Tamar, Munos, and Mannor}}} \bibcite{hirsch1989convergent}{{11}{1989}{{Hirsch}}{{}}} \bibcite{johnson2013accelerating}{{12}{2013}{{Johnson and Zhang}}{{}}} \bibcite{korda2015td}{{13}{2015}{{Korda and La}}{{}}} \bibcite{liu2018proximal}{{14}{2018}{{Liu et~al.}}{{Liu, Gemp, Ghavamzadeh, Liu, Mahadevan, and Petrik}}} \bibcite{liu2015finite}{{15}{2015}{{Liu et~al.}}{{Liu, Liu, Ghavamzadeh, Mahadevan, and Petrik}}} \bibcite{liu2016proximal}{{16}{2016}{{Liu et~al.}}{{Liu, Liu, Ghavamzadeh, Mahadevan, and Petrik}}} \bibcite{ng1999policy}{{17}{1999}{{Ng, Harada, and Russell}}{{}}} \bibcite{pan2017accelerated}{{18}{2017}{{Pan, White, and White}}{{}}} \bibcite{schulman2015trust}{{19}{2015}{{Schulman et~al.}}{{Schulman, Levine, Abbeel, Jordan, and Moritz}}} \bibcite{schulman2017proximal}{{20}{2017}{{Schulman et~al.}}{{Schulman, Wolski, Dhariwal, Radford, and Klimov}}} \bibcite{schwartz1993reinforcement}{{21}{1993}{{Schwartz}}{{}}} \bibcite{sutton2009fast}{{22}{2009}{{Sutton et~al.}}{{Sutton, Maei, Precup, Bhatnagar, Silver, Szepesv{\'a}ri, and Wiewiora}}} \bibcite{sutton1988learning}{{23}{1988}{{Sutton}}{{}}} \bibcite{Sutton2018book}{{24}{2018}{{Sutton and Barto}}{{}}} \bibcite{sutton2008convergent}{{25}{2008}{{Sutton, Maei, and Szepesv{\'a}ri}}{{}}} \bibcite{sutton2016emphatic}{{26}{2016}{{Sutton, Mahmood, and White}}{{}}} \bibcite{tsitsiklis1997analysis}{{27}{1997}{{Tsitsiklis and Van~Roy}}{{}}} \bibcite{xu2019reanalysis}{{28}{2019}{{Xu et~al.}}{{Xu, Wang, Zhou, and Liang}}} \bibcite{xu2020reanalysis}{{29}{2020}{{Xu et~al.}}{{Xu, Wang, Zhou, and Liang}}} \bibcite{zhang2022truncated}{{30}{2022}{{Zhang and Whiteson}}{{}}} \gdef \@abspage@last{7}