\relax \bibstyle{aaai25} \citation{sutton1988learning} \citation{tsitsiklis1997analysis} \citation{Sutton2018book} \citation{baird1995residual} \citation{sutton2008convergent} \citation{sutton2009fast} \citation{sutton2016emphatic} \citation{chen2023modified} \citation{hackman2012faster} \citation{liu2015finite,liu2016proximal,liu2018proximal} \citation{givchi2015quasi} \citation{pan2017accelerated} \citation{hallak2016generalized} \citation{zhang2022truncated} \citation{johnson2013accelerating} \citation{korda2015td} \citation{xu2019reanalysis} \citation{Sutton2018book} \citation{baird1995residual} \citation{sutton2009fast} \citation{sutton2009fast} \citation{feng2019kernel} \citation{basserrano2021logistic} \newlabel{introduction}{{}{1}} \citation{Sutton2018book} \citation{Sutton2018book} \newlabel{preliminaries}{{}{2}} \newlabel{valuefunction}{{}{2}} \newlabel{linearvaluefunction}{{1}{2}} \providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}} \newlabel{tab:min_eigenvalues}{{1}{3}} \newlabel{delta}{{3}{3}} \newlabel{omega}{{4}{3}} \newlabel{theta}{{5}{3}} \newlabel{thetavmtdc}{{8}{4}} \newlabel{uvmtdc}{{9}{4}} \newlabel{omegavmtdc}{{10}{4}} \newlabel{fvmetd}{{11}{4}} \newlabel{thetavmetd}{{12}{4}} \newlabel{omegavmetd}{{13}{4}} \citation{borkar1997stochastic} \citation{sutton2009fast} \citation{borkar1997stochastic} \newlabel{theorem1}{{1}{5}} \newlabel{th1proof}{{}{5}} \newlabel{covariance}{{14}{5}} \newlabel{theorem2}{{2}{5}} \newlabel{theorem3}{{3}{5}} \newlabel{rowsum}{{15}{5}} \newlabel{columnsum}{{16}{5}} \citation{ng1999policy} \citation{devlin2012dynamic} \newlabel{example_bias}{{2}{6}} \bibdata{aaai25} \bibcite{baird1995residual}{{1}{1995}{{Baird et~al.}}{{}}} \newlabel{2-state}{{3(a)}{7}} \newlabel{sub@2-state}{{(a)}{7}} \newlabel{7-state}{{3(b)}{7}} \newlabel{sub@7-state}{{(b)}{7}} \newlabel{MazeFull}{{3(c)}{7}} \newlabel{sub@MazeFull}{{(c)}{7}} \newlabel{CliffWalkingFull}{{3(d)}{7}} \newlabel{sub@CliffWalkingFull}{{(d)}{7}} \newlabel{MountainCarFull}{{3(e)}{7}} \newlabel{sub@MountainCarFull}{{(e)}{7}} \newlabel{AcrobotFull}{{3(f)}{7}} \newlabel{sub@AcrobotFull}{{(f)}{7}} \newlabel{Complete_full}{{3}{7}} \bibcite{basserrano2021logistic}{{2}{2021}{{Bas-Serrano et~al.}}{{Bas-Serrano, Curi, Krause, and Neu}}} \bibcite{borkar1997stochastic}{{3}{1997}{{Borkar}}{{}}} \bibcite{chen2023modified}{{4}{2023}{{Chen et~al.}}{{Chen, Ma, Li, Yang, Yang, and Gao}}} \bibcite{devlin2012dynamic}{{5}{2012}{{Devlin and Kudenko}}{{}}} \bibcite{feng2019kernel}{{6}{2019}{{Feng, Li, and Liu}}{{}}} \bibcite{givchi2015quasi}{{7}{2015}{{Givchi and Palhang}}{{}}} \bibcite{hackman2012faster}{{8}{2012}{{Hackman}}{{}}} \bibcite{hallak2016generalized}{{9}{2016}{{Hallak et~al.}}{{Hallak, Tamar, Munos, and Mannor}}} \bibcite{johnson2013accelerating}{{10}{2013}{{Johnson and Zhang}}{{}}} \bibcite{korda2015td}{{11}{2015}{{Korda and La}}{{}}} \bibcite{liu2018proximal}{{12}{2018}{{Liu et~al.}}{{Liu, Gemp, Ghavamzadeh, Liu, Mahadevan, and Petrik}}} \bibcite{liu2015finite}{{13}{2015}{{Liu et~al.}}{{Liu, Liu, Ghavamzadeh, Mahadevan, and Petrik}}} \bibcite{liu2016proximal}{{14}{2016}{{Liu et~al.}}{{Liu, Liu, Ghavamzadeh, Mahadevan, and Petrik}}} \bibcite{ng1999policy}{{15}{1999}{{Ng, Harada, and Russell}}{{}}} \bibcite{pan2017accelerated}{{16}{2017}{{Pan, White, and White}}{{}}} \bibcite{sutton2009fast}{{17}{2009}{{Sutton et~al.}}{{Sutton, Maei, Precup, Bhatnagar, Silver, Szepesv{\'a}ri, and Wiewiora}}} \bibcite{sutton1988learning}{{18}{1988}{{Sutton}}{{}}} \bibcite{Sutton2018book}{{19}{2018}{{Sutton and Barto}}{{}}} \bibcite{sutton2008convergent}{{20}{2008}{{Sutton, Maei, and Szepesv{\'a}ri}}{{}}} \bibcite{sutton2016emphatic}{{21}{2016}{{Sutton, Mahmood, and White}}{{}}} \bibcite{tsitsiklis1997analysis}{{22}{1997}{{Tsitsiklis and Van~Roy}}{{}}} \bibcite{xu2019reanalysis}{{23}{2019}{{Xu et~al.}}{{Xu, Wang, Zhou, and Liang}}} \bibcite{zhang2022truncated}{{24}{2022}{{Zhang and Whiteson}}{{}}} \gdef \@abspage@last{8}