\begin{thebibliography}{5}
\providecommand{\natexlab}[1]{#1}

\bibitem[{Borkar(1997)}]{borkar1997stochastic}
Borkar, V.~S. 1997.
\newblock Stochastic approximation with two time scales.
\newblock \emph{Syst. \& Control Letters}, 29(5): 291--294.

\bibitem[{Borkar and Meyn(2000)}]{borkar2000ode}
Borkar, V.~S.; and Meyn, S.~P. 2000.
\newblock The ODE method for convergence of stochastic approximation and reinforcement learning.
\newblock \emph{SIAM J. Control Optim.}, 38(2): 447--469.

\bibitem[{Hirsch(1989)}]{hirsch1989convergent}
Hirsch, M.~W. 1989.
\newblock Convergent activation dynamics in continuous time networks.
\newblock \emph{Neural Netw.}, 2(5): 331--349.

\bibitem[{Sutton et~al.(2009)Sutton, Maei, Precup, Bhatnagar, Silver, Szepesv{\'a}ri, and Wiewiora}]{sutton2009fast}
Sutton, R.; Maei, H.; Precup, D.; Bhatnagar, S.; Silver, D.; Szepesv{\'a}ri, C.; and Wiewiora, E. 2009.
\newblock Fast gradient-descent methods for temporal-difference learning with linear function approximation.
\newblock In \emph{Proc. 26th Int. Conf. Mach. Learn.}, 993--1000.

\bibitem[{Sutton, Mahmood, and White(2016)}]{sutton2016emphatic}
Sutton, R.~S.; Mahmood, A.~R.; and White, M. 2016.
\newblock An emphatic approach to the problem of off-policy temporal-difference learning.
\newblock \emph{The Journal of Machine Learning Research}, 17(1): 2603--2631.

\end{thebibliography}