\begin{thebibliography}{5} \providecommand{\natexlab}[1]{#1} \bibitem[{Borkar(1997)}]{borkar1997stochastic} Borkar, V.~S. 1997. \newblock Stochastic approximation with two time scales. \newblock \emph{Syst. \& Control Letters}, 29(5): 291--294. \bibitem[{Borkar and Meyn(2000)}]{borkar2000ode} Borkar, V.~S.; and Meyn, S.~P. 2000. \newblock The ODE method for convergence of stochastic approximation and reinforcement learning. \newblock \emph{SIAM J. Control Optim.}, 38(2): 447--469. \bibitem[{Hirsch(1989)}]{hirsch1989convergent} Hirsch, M.~W. 1989. \newblock Convergent activation dynamics in continuous time networks. \newblock \emph{Neural Netw.}, 2(5): 331--349. \bibitem[{Sutton et~al.(2009)Sutton, Maei, Precup, Bhatnagar, Silver, Szepesv{\'a}ri, and Wiewiora}]{sutton2009fast} Sutton, R.; Maei, H.; Precup, D.; Bhatnagar, S.; Silver, D.; Szepesv{\'a}ri, C.; and Wiewiora, E. 2009. \newblock Fast gradient-descent methods for temporal-difference learning with linear function approximation. \newblock In \emph{Proc. 26th Int. Conf. Mach. Learn.}, 993--1000. \bibitem[{Sutton, Mahmood, and White(2016)}]{sutton2016emphatic} Sutton, R.~S.; Mahmood, A.~R.; and White, M. 2016. \newblock An emphatic approach to the problem of off-policy temporal-difference learning. \newblock \emph{The Journal of Machine Learning Research}, 17(1): 2603--2631. \end{thebibliography}