diff --git b/AnonymousSubmission/aaai25.bib a/AnonymousSubmission/aaai25.bib new file mode 100644 index 0000000..08e4dae --- /dev/null +++ a/AnonymousSubmission/aaai25.bib @@ -0,0 +1,1138 @@ +@inproceedings{langley00, + author = {P. Langley}, + title = {Crafting Papers on Machine Learning}, + year = {2000}, + pages = {1207--1216}, + editor = {Pat Langley}, + booktitle = {Proceedings of the 17th International Conference + on Machine Learning (ICML 2000)}, + address = {Stanford, CA}, + publisher = {Morgan Kaufmann} +} + +@TechReport{mitchell80, + author = "T. M. Mitchell", + title = "The Need for Biases in Learning Generalizations", + institution = "Computer Science Department, Rutgers University", + year = "1980", + address = "New Brunswick, MA", +} + +@phdthesis{kearns89, + author = {M. J. Kearns}, + title = {Computational Complexity of Machine Learning}, + school = {Department of Computer Science, Harvard University}, + year = {1989} +} + +@Book{MachineLearningI, + editor = "R. S. Michalski and J. G. Carbonell and T. + M. Mitchell", + title = "Machine Learning: An Artificial Intelligence + Approach, Vol. I", + publisher = "Tioga", + year = "1983", + address = "Palo Alto, CA" +} + +@Book{DudaHart2nd, + author = "R. O. Duda and P. E. Hart and D. G. Stork", + title = "Pattern Classification", + publisher = "John Wiley and Sons", + edition = "2nd", + year = "2000" +} + +@misc{anonymous, + title= {Suppressed for Anonymity}, + author= {Author, N. N.}, + year= {2021} +} + +@InCollection{Newell81, + author = "A. Newell and P. S. Rosenbloom", + title = "Mechanisms of Skill Acquisition and the Law of + Practice", + booktitle = "Cognitive Skills and Their Acquisition", + pages = "1--51", + publisher = "Lawrence Erlbaum Associates, Inc.", + year = "1981", + editor = "J. R. Anderson", + chapter = "1", + address = "Hillsdale, NJ" +} + + +@Article{Samuel59, + author = "A. L. Samuel", + title = "Some Studies in Machine Learning Using the Game of + Checkers", + journal = "IBM Journal of Research and Development", + year = "1959", + volume = "3", + number = "3", + pages = "211--229" +} +@inproceedings{langley00, + author = {P. Langley}, + title = {Crafting Papers on Machine Learning}, + year = {2000}, + pages = {1207--1216}, + editor = {Pat Langley}, + booktitle = {Proceedings of the 17th International Conference + on Machine Learning (ICML 2000)}, + address = {Stanford, CA}, + publisher = {Morgan Kaufmann} +} + +@TechReport{mitchell80, + author = "T. M. Mitchell", + title = "The Need for Biases in Learning Generalizations", + institution = "Computer Science Department, Rutgers University", + year = "1980", + address = "New Brunswick, MA", +} + +@phdthesis{kearns89, + author = {M. J. Kearns}, + title = {Computational Complexity of Machine Learning}, + school = {Department of Computer Science, Harvard University}, + year = {1989} +} + +@Book{MachineLearningI, + editor = "R. S. Michalski and J. G. Carbonell and T. + M. Mitchell", + title = "Machine Learning: An Artificial Intelligence + Approach, Vol. I", + publisher = "Tioga", + year = "1983", + address = "Palo Alto, CA" +} + +@Book{DudaHart2nd, + author = "R. O. Duda and P. E. Hart and D. G. Stork", + title = "Pattern Classification", + publisher = "John Wiley and Sons", + edition = "2nd", + year = "2000" +} + +@misc{anonymous, + title= {Suppressed for Anonymity}, + author= {Author, N. N.}, + year= {2021} +} + +@InCollection{Newell81, + author = "A. Newell and P. S. Rosenbloom", + title = "Mechanisms of Skill Acquisition and the Law of + Practice", + booktitle = "Cognitive Skills and Their Acquisition", + pages = "1--51", + publisher = "Lawrence Erlbaum Associates, Inc.", + year = "1981", + editor = "J. R. Anderson", + chapter = "1", + address = "Hillsdale, NJ" +} + + +@Article{Samuel59, + author = "A. L. Samuel", + title = "Some Studies in Machine Learning Using the Game of + Checkers", + journal = "IBM Journal of Research and Development", + year = "1959", + volume = "3", + number = "3", + pages = "211--229" +} + +@book{em:86, + editor = "Engelmore, Robert and Morgan, Anthony", + title = "Blackboard Systems", + year = 1986, + address = "Reading, Mass.", + publisher = "Addison-Wesley", +} +@inproceedings{dalal2018finite, + title={Finite sample analyses for TD (0) with function approximation}, + author={Dalal, Gal and Szorenyi, Balazs and Thoppe, Gugan and Mannor, Shie}, + booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence and Thirtieth Innovative Applications of Artificial Intelligence Conference and Eighth AAAI Symposium on Educational Advances in Artificial Intelligence}, + pages={6144--6160}, + year={2018} +} +@inproceedings{xu2019reanalysis, + title={Reanalysis of Variance Reduced Temporal Difference Learning}, + author={Xu, Tengyu and Wang, Zhe and Zhou, Yi and Liang, Yingbin}, + booktitle={International Conference on Learning Representations}, + year={2019} +} +@inproceedings{c:83, + author = "Clancey, William J.", + year = 1983, + title = "{Communication, Simulation, and Intelligent +Agents: Implications of Personal Intelligent Machines +for Medical Education}", + booktitle="Proceedings of the Eighth International Joint Conference on Artificial Intelligence {(IJCAI-83)}", + pages = "556-560", + address = "Menlo Park, Calif", + publisher = "{IJCAI Organization}", +} +@inproceedings{c:84, + author = "Clancey, William J.", + year = 1984, + title = "{Classification Problem Solving}", + booktitle = "Proceedings of the Fourth National + Conference on Artificial Intelligence", + pages = "45-54", + address = "Menlo Park, Calif.", + publisher="AAAI Press", +} +@article{r:80, + author = {Robinson, Arthur L.}, + title = {New Ways to Make Microcircuits Smaller}, + volume = {208}, + number = {4447}, + pages = {1019--1022}, + year = {1980}, + doi = {10.1126/science.208.4447.1019}, + publisher = {American Association for the Advancement of Science}, + issn = {0036-8075}, + URL = {https://science.sciencemag.org/content/208/4447/1019}, + eprint = {https://science.sciencemag.org/content/208/4447/1019.full.pdf}, + journal = {Science}, +} +@article{r:80x, + author = "Robinson, Arthur L.", + year = 1980, + title = "{New Ways to Make Microcircuits Smaller---Duplicate Entry}", + journal = "Science", + volume = 208, + pages = "1019-1026", +} +@article{hcr:83, +title = {Strategic explanations for a diagnostic consultation system}, +journal = {International Journal of Man-Machine Studies}, +volume = {20}, +number = {1}, +pages = {3-19}, +year = {1984}, +issn = {0020-7373}, +doi = {https://doi.org/10.1016/S0020-7373(84)80003-6}, +url = {https://www.sciencedirect.com/science/article/pii/S0020737384800036}, +author = {Diane Warner Hasling and William J. Clancey and Glenn Rennels}, +abstract = {This article examines the problem of automatte explanation of reasoning, especially as it relates to expert systems. By explanation we mean the ability of a program to discuss what it is doing in some understandable way. We first present a general framework in which to view explanation and review some of the research done in this area. We then focus on the explanation system for NEOMYCIN, a medical consultation program. A consultation program interactively helps a user to solve a problem. Our goal is to have NEOMYCIN explain its problem-solving strategies. An explanation of strategy describes the plan the program is using to reach a solution. Such an explanation is usually concrete, referring to aspects of the current problem situation. Abstract explanations articulate a general principle, which can be applied in different situations; such explanations are useful in teaching and in explaining by analogy. We describe the aspects of NEOMYCIN that make abstract strategic explanations possible—the representation of strategic knowledge explicitly and separately from domain knowledge— and demonstrate how this representation can be used to generate explanations.} +} +@article{hcrt:83, + author = "Hasling, Diane Warner and Clancey, William J. and Rennels, Glenn R. and Test, Thomas", + year = 1983, + title = "{Strategic Explanations in Consultation---Duplicate}", + journal = "The International Journal of Man-Machine Studies", + volume = 20, + number = 1, + pages = "3-19", +} +@techreport{r:86, + author = "Rice, James", + year = 1986, + title = "{Poligon: A System for Parallel Problem Solving}", + type = "Technical Report", + number = "KSL-86-19", + institution = "Dept.\ of Computer Science, Stanford Univ.", +} +@phdthesis{c:79, + author = "Clancey, William J.", + year = 1979, + title = "{Transfer of Rule-Based Expertise +through a Tutorial Dialogue}", + type = "{Ph.D.} diss.", + school = "Dept.\ of Computer Science, Stanford Univ.", + address = "Stanford, Calif.", +} +@unpublished{c:21, + author = "Clancey, William J.", + title = "{The Engineering of Qualitative Models}", + year = 2021, + note = "Forthcoming", +} +@misc{c:22, + title={Attention Is All You Need}, + author={Ashish Vaswani and Noam Shazeer and Niki Parmar and Jakob Uszkoreit and Llion Jones and Aidan N. Gomez and Lukasz Kaiser and Illia Polosukhin}, + year={2017}, + eprint={1706.03762}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +} +@misc{c:23, + title = "Pluto: The 'Other' Red Planet", + author = "{NASA}", + howpublished = "\url{https://www.nasa.gov/nh/pluto-the-other-red-planet}", + year = 2015, + note = "Accessed: 2018-12-06" +} +@article{r:80x, + author = "Robinson, Arthur L.", + year = 1980, + title = "{New Ways to Make Microcircuits Smaller---Duplicate Entry}", + journal = "Science", + volume = 208, + pages = "1019-1026", +} +@article{hcrt:83, + author = "Hasling, Diane Warner and Clancey, William J. and Rennels, Glenn R. and Test, Thomas", + year = 1983, + title = "{Strategic Explanations in Consultation---Duplicate}", + journal = "The International Journal of Man-Machine Studies", + volume = 20, + number = 1, + pages = "3-19", +} +@article{xu2013online, + title={Online learning control using adaptive critic designs with sparse kernel machines}, + author={Xu, Xin and Hou, Zhongsheng and Lian, Chuanqiang and He, Haibo}, + journal={IEEE Trans. Neural Netw. Learn. Syst.}, + volume={24}, + number={5}, + pages={762--775}, + year={2013}, + publisher={IEEE} +} +@article{bertsekas2017value, + title={Value and policy iterations in optimal control and adaptive dynamic programming}, + author={Bertsekas, Dimitri P}, + journal={IEEE Trans. Neural Netw. Learn. Syst.}, + year={2017}, + volume={28}, + number={3}, + pages={500 - 509}, + publisher={IEEE} +} +@phdthesis{hackman2012faster, + title={Faster Gradient-TD Algorithms}, + author={Hackman, Leah}, + year={2012}, + school={University of Alberta} +} +@inproceedings{harutyunyan2015multi, + title={Multi-scale reward shaping via an off-policy ensemble}, + author={Harutyunyan, Anna and Brys, Tim and Vrancx, Peter and Now{\'e}, Ann}, + booktitle={Proc. 2015 Int. Conf. Autonomous Agents and Multiagent Systems}, + pages={1641--1642}, + year={2015}, + organization={International Foundation for Autonomous Agents and Multiagent Systems} +} +@inproceedings{harutyunyan2015expressing, + title={Expressing Arbitrary Reward Functions as Potential-Based Advice.}, + author={Harutyunyan, Anna and Devlin, Sam and Vrancx, Peter and Now{\'e}, Ann}, + booktitle={AAAI}, + pages={2652--2658}, + year={2015} +} +@article{wiewiora2003potential, + title={Potential-based shaping and Q-value initialization are equivalent}, + author={Wiewiora, Eric}, + journal={J. Artif. Intell. Res.}, + volume={19}, + pages={205--208}, + year={2003} +} +@article{grzes2010online, + title={Online learning of shaping rewards in reinforcement learning}, + author={Grze{\'s}, Marek and Kudenko, Daniel}, + journal={Neural Netw.}, + volume={23}, + number={4}, + pages={541--550}, + year={2010}, + publisher={Elsevier} +} +@inproceedings{marthi2007automatic, + title={Automatic shaping and decomposition of reward functions}, + author={Marthi, Bhaskara}, + booktitle={Proc. 24th Int. Conf. Mach. Learn.}, + pages={601--608}, + year={2007} +} +@inproceedings{laud2003influence, + title={The Influence of Reward on the Speed of Reinforcement Learning: An Analysis of Shaping}, + author={Laud, Adam and Dejong, Gerald}, + booktitle={Proc. 20th Int. Conf. Mach. Learn.}, + pages={440--447}, + year={2003} +} +@phdthesis{laud2004theory, + title={Theory and application of reward shaping in reinforcement learning}, + author={Laud, Adam Daniel}, + year={2004}, + school={University of Illinois at Urbana-Champaign} +} +@article{geist2013algorithmic, + title={Algorithmic survey of parametric value function approximation}, + author={Geist, Matthieu and Pietquin, Olivier}, + journal={IEEE Trans. Neural Netw. Learn. Syst.}, + volume={24}, + number={6}, + pages={845--867}, + year={2013}, + publisher={IEEE} +} +@article{furmston2016approximate, + title={Approximate Newton Methods for Policy Search in Markov Decision Processes}, + author={Furmston, Thomas and Lever, Guy and Barber, David}, + journal={J. Mach. Learn. Res.}, + volume={17}, + number={227}, + pages={1--51}, + year={2016} +} +@article{silver2016mastering, + title={Mastering the game of Go with deep neural networks and tree search}, + author={Silver, David and Huang, Aja and Maddison, Chris J and Guez, Arthur and Sifre, Laurent and van den Driessche, George and Schrittwieser, Julian and Antonoglou, Ioannis and Panneershelvam, Veda and Lanctot, Marc and others}, + journal={Nature}, + volume={529}, + number={7587}, + pages={484--489}, + year={2016}, + publisher={Nature Publishing Group} +} + +@article{mnih2015human, + title={Human-level control through deep reinforcement learning}, + author={Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Rusu, Andrei A and Veness, Joel and Bellemare, Marc G and Graves, Alex and Riedmiller, Martin and Fidjeland, Andreas K and Ostrovski, Georg and others}, + journal={Nature}, + volume={518}, + number={7540}, + pages={529--533}, + year={2015}, + publisher={Nature Publishing Group} +} +@inproceedings{guo2014deep, + title={Deep learning for real-time Atari game play using offline Monte-Carlo tree search planning}, + author={Guo, Xiaoxiao and Singh, Satinder and Lee, Honglak and Lewis, Richard L and Wang, Xiaoshi}, + booktitle={Advances in Neural Information Processing Systems}, + pages={3338--3346}, + publisher={Cambridge, MA: MIT Press}, + year={2014} +} +@inproceedings{scherrer2010should, + title={Should one compute the Temporal Difference fix point or minimize the Bellman Residual? The unified oblique projection view}, + author={Scherrer, Bruno}, + booktitle={Proc. 27th Int. Conf. Mach. Learn.}, + pages={959--966}, + year={2010} +} +@article{hirsch1989convergent, + title={Convergent activation dynamics in continuous time networks}, + author={Hirsch, Morris W}, + journal={Neural Netw.}, + volume={2}, + number={5}, + pages={331--349}, + year={1989}, + publisher={Elsevier} +} +@article{borkar1997stochastic, + title={Stochastic approximation with two time scales}, + author={Borkar, Vivek S}, + journal={Syst. \& Control Letters}, + volume={29}, + number={5}, + pages={291--294}, + year={1997}, + publisher={Elsevier} +} +@article{ortner2013adaptive, + title={Adaptive aggregation for reinforcement learning in average reward Markov decision processes}, + author={Ortner, Ronald}, + journal={Annals Oper. Res.}, + volume={208}, + number={1}, + pages={321--336}, + year={2013}, + publisher={Springer} +} +@article{jaksch2010near, + title={Near-optimal regret bounds for reinforcement learning}, + author={Jaksch, Thomas and Ortner, Ronald and Auer, Peter}, + journal={Journal of Machine Learning Research}, + number={Apr}, + volume={11}, + pages={1563--1600}, + year={2010} +} +@article{ortner2007logarithmic, + title={Logarithmic online regret bounds for undiscounted reinforcement learning}, + author={Ortner, P and Auer, R}, + journal={Advances in Neural Information Processing Systems}, + publisher={Cambridge, MA: MIT Press}, + volume={19}, + pages={49}, + year={2007} +} +@article{das1999solving, + title={Solving semi-Markov decision problems using average reward reinforcement learning}, + author={Das, Tapas K and Gosavi, Abhijit and Mahadevan, Sridhar and Marchalleck, Nicholas}, + journal={Management Science}, + volume={45}, + number={4}, + pages={560--574}, + year={1999}, + publisher={INFORMS} +} +@article{abounadi2001learning, + title={Learning algorithms for Markov decision processes with average cost}, + author={Abounadi, Jinane and Bertsekas, D and Borkar, Vivek S}, + journal={SIAM J. Control Optim.}, + volume={40}, + number={3}, + pages={681--698}, + year={2001}, + publisher={SIAM} +} +@inproceedings{singh1994reinforcement, + title={Reinforcement learning algorithms for average-payoff Markovian decision processes}, + author={Singh, Satinder P}, + booktitle={AAAI}, + volume={94}, + pages={700--705}, + year={1994} +} +@inproceedings{schwartz1993reinforcement, + title={A reinforcement learning method for maximizing undiscounted rewards}, + author={Schwartz, Anton}, + booktitle={Proc. 10th Int. Conf. Mach. Learn.}, + volume={298}, + pages={298--305}, + year={1993} +} + +@inproceedings{yang2016efficient, + title={Efficient Average Reward Reinforcement Learning Using Constant Shifting Values}, + author={Yang, Shangdong and Gao, Yang and An, Bo and Wang, Hao and Chen, Xingguo}, + booktitle={Thirtieth AAAI Conference on Artificial Intelligence}, + pages={2258-2264}, + year={2016} +} +@inproceedings{devlin2012dynamic, + title={Dynamic potential-based reward shaping}, + author={Devlin, Sam and Kudenko, Daniel}, + booktitle={Proc. 11th Int. Conf. Autonomous Agents and Multiagent Systems}, + pages={433--440}, + year={2012} +} + +@inproceedings{ng1999policy, + title={Policy invariance under reward transformations: Theory and application to reward shaping}, + author={Ng, Andrew Y and Harada, Daishi and Russell, Stuart}, + booktitle={Proc. 16th Int. Conf. Mach. Learn.}, + pages={278--287}, + year={1999} +} +@article{borkar2000ode, + title={The ODE method for convergence of stochastic approximation and reinforcement learning}, + author={Borkar, Vivek S and Meyn, Sean P}, + journal={SIAM J. Control Optim.}, + volume={38}, + number={2}, + pages={447--469}, + year={2000}, + publisher={SIAM} +} +@phdthesis{maei2011gradient, + title={Gradient temporal-difference learning algorithms}, + author={Maei, Hamid Reza}, + year={2011}, + school={University of Alberta} +} +@phdthesis{baird1999reinforcement, + title={Reinforcement learning through gradient descent}, + author={Baird III, Leemon C}, + year={1999}, + school={US Air Force Academy, US} +} +@PHDTHESIS{Driessens2004, + AUTHOR ="Kurt Driessens", + TITLE ="Relational Reinforcement Learning", + SCHOOL ="Catholic University of Leuven", + YEAR ="2004", +} +@article{tsitsiklis1996feature, + title={Feature-based methods for large scale dynamic programming}, + author={Tsitsiklis, John N and Van Roy, Benjamin}, + journal={Mach. Learn.}, + volume={22}, + number={1-3}, + pages={59--94}, + year={1996}, + publisher={Springer} +} +@inproceedings{chen2009apply, + title={Apply ant colony optimization to Tetris}, + author={Chen, X. and Wang, H. and Wang, W. and Shi, Y. and Gao, Y.}, + booktitle={Proceedings of the 11th Annual Conference on Genetic and Evolutionary Computation (GECCO)}, + pages={1741--1742}, + year={2009}, + organization={ACM} +} +@incollection{farias2006tetris, + title={Tetris: A study of randomized constraint sampling}, + author={Farias, Vivek F and Van Roy, Benjamin}, + booktitle={Probabilistic and Randomized Methods for Design Under Uncertainty}, + pages={189--201}, + year={2006}, + publisher={Springer} +} +@article{bertsekas1996temporal, + title={Temporal differences-based policy iteration and applications in neuro-dynamic programming}, + author={Bertsekas, Dimitri P and Ioffe, Sergey}, + journal={Lab. for Info. and Decision Systems Report LIDS-P-2349, MIT, Cambridge, MA}, + year={1996}, + publisher={Citeseer} +} +@inproceedings{kakade2001natural, + title={A Natural Policy Gradient.}, + author={Kakade, Sham}, + booktitle={Advances in Neural Information Processing Systems}, + publisher={Cambridge, MA: MIT Press}, + volume={14}, + pages={1531--1538}, + year={2001} +} +@article{peters2008natural, + title={Natural actor-critic}, + author={Peters, Jan and Schaal, Stefan}, + journal={Neurocomputing}, + volume={71}, + number={7}, + pages={1180--1190}, + year={2008}, + publisher={Elsevier} +} +@article{baxter2001infinite, + title={Infinite-horizon policy-gradient estimation}, + author={Baxter, Jonathan and Bartlett, Peter L.}, + journal={J. Artif. Intell. Res.}, + pages={319--350}, + year={2001} +} +@inproceedings{sutton1999policy, + title={Policy Gradient Methods for Reinforcement Learning with Function Approximation.}, + author={Sutton, Richard S and McAllester, David A and Singh, Satinder P and Mansour, Yishay and others}, + booktitle={Advances in Neural Information Processing Systems}, + publisher={Cambridge, MA: MIT Press}, + pages={1057--1063}, + year={1999} +} +@inproceedings{bohm2005evolutionary, + title={An evolutionary approach to tetris}, + author={B{\"o}hm, Niko and K{\'o}kai, Gabriella and Mandl, Stefan}, + booktitle={Proc. 6th Metaheuristics Int. Conf.}, + pages={137-148}, + year={2005} +} +@article{szita2006learning, + title={Learning Tetris using the noisy cross-entropy method}, + author={Szita, Istv{\'a}n and L{\"o}rincz, Andr{\'a}s}, + journal={Neural Comput.}, + volume={18}, + number={12}, + pages={2936--2941}, + year={2006}, + publisher={MIT Press} +} +@inproceedings{thiery2010least, + title={Least-Squares $\lambda$ Policy Iteration: Bias-Variance Trade-off in Control Problems}, + author={Thiery, Christophe and Scherrer, Bruno}, + booktitle={Proc. 27th Int. Conf. Mach. Learn.}, + pages={1071--1078}, + year={2010} +} + +@inproceedings{gabillon2013approximate, + title={Approximate dynamic programming finally performs well in the game of Tetris}, + author={Gabillon, Victor and Ghavamzadeh, Mohammad and Scherrer, Bruno}, + booktitle={Advances in Neural Information Processing Systems}, + publisher={Cambridge, MA: MIT Press}, + pages={1754--1762}, + year={2013} +} +@article{scherrer2013performance, + title={Performance bounds for $\lambda$ policy iteration and application to the game of Tetris}, + author={Scherrer, Bruno}, + journal={J. Mach. Learn. Res.}, + volume={14}, + number={1}, + pages={1181--1227}, + year={2013}, + publisher={JMLR. org} +} +@article{thiery2009improvements, + title={Improvements on Learning Tetris with Cross Entropy}, + author={Thiery, Christophe and Scherrer, Bruno}, + journal={Int. Computer Games Assoc. J.}, + volume={32}, + number={1}, + pages={23--33}, + year={2009} +} +@article{scherrer2015approximate, + title={Approximate Modified Policy Iteration and its Application to the Game of Tetris}, + author={Scherrer, Bruno and Ghavamzadeh, Mohammad and Gabillon, Victor and Lesner, Boris and Geist, Matthieu}, + journal={J. Mach. Learn. Res.}, + volume={16}, + pages={1629--1676}, + year={2015} +} + +@article{efron2004least, + title={Least angle regression}, + author={Efron, Bradley and Hastie, Trevor and Johnstone, Iain and Tibshirani, Robert and others}, + journal={The Annals of statistics}, + volume={32}, + number={2}, + pages={407--499}, + year={2004}, + publisher={Institute of Mathematical Statistics} +} +@MASTERSTHESIS{Brzustowski1992, + author ={John Brzustowski}, + title ={Can you win at tetris?}, + school = {University of British Columbia}, + year ={1992} +} +@Article{Breukelaar04, + author = {Ron Breukelaar and Erik D. Demaine and Susan + Hohenberger and Hendrik Jan Hoogeboom and Walter + A. Kosters and David Liben-Nowell}, + title = {Tetris is Hard, Even to Approximate}, + journal = {International Journal of Computational Geometry and + Applications}, + year = {2004}, + volume = {14}, + number = {1--2}, + pages = {41--68}, + month = {April}, +} +@book{Bertsekas1996, + author = {Bertsekas, D. and Tsitsiklis, J. N.}, + title = {Neuro-Dynamic Programming}, + year = {1996}, + publisher = {Athena Scientific}, +} +@inproceedings{maei2010gq, + title={GQ ($\lambda$): A general gradient algorithm for temporal-difference prediction learning with eligibility traces}, + author={Maei, Hamid Reza and Sutton, Richard S}, + booktitle={Proceedings of the Third Conference on Artificial General Intelligence}, + volume={1}, + pages={91--96}, + year={2010} +} +@inproceedings{maei2010toward, + title={Toward off-policy learning control with function approximation}, + author={Maei, Hamid R and Szepesv{\'a}ri, Csaba and Bhatnagar, Shalabh and Sutton, Richard S}, + booktitle={Proc. 27th Int. Conf. Mach. Learn.}, + pages={719--726}, + year={2010} +} +@inproceedings{phua2007tracking, + title={Tracking value function dynamics to improve reinforcement learning with piecewise linear function approximation}, + author={Phua, Chee Wee and Fitch, Robert}, + booktitle={Proc. 24th Int. Conf. Mach. Learn.}, + pages={751--758}, + year={2007}, + organization={ACM} +} +@inproceedings{szubert2014temporal, + title={Temporal difference learning of N-tuple networks for the game 2048}, + author={Szubert, Marcin and Jaskowski, Wojciech}, + booktitle={2014 IEEE Conference on Computational Intelligence and Games (CIG)}, + pages={1--8}, + year={2014}, + organization={IEEE} +} +@article{chen2013online, + title={Online Selective Kernel-based Temporal Differece Learning}, + author={Chen, Xingguo and Gao, Yang and Wang, Ruili}, + journal={IEEE Trans. Neural Netw. Learn. Syst.}, + year={2013}, + volume={24}, + number={12}, + pages={1944--1956}, + publisher={IEEE} +} + +@article{xu2007kernel, + title={Kernel-based least squares policy iteration for reinforcement learning}, + author={Xu, Xin and Hu, Dewen and Lu, Xicheng}, + journal={IEEE Trans. Neural Netw.}, + volume={18}, + number={4}, + pages={973--992}, + year={2007}, + publisher={IEEE} +} +@INPROCEEDINGS{Engel03bayesmeets, + author = {Yaakov Engel and Shie Mannor and Ron Meir}, + title = {Bayes meets {B}ellman: the {G}aussian process approach to temporal difference learning}, + booktitle = {Proc. 20th Int. Conf. Mach. Learn.}, + year = {2003}, + pages = {154--161}, + address={Washington, DC}, + month={Aug.}, +} +@inproceedings{robards2011sparse, + title={Sparse Kernel-SARSA ($\lambda$) with an eligibility trace}, + author={Robards, M. and Sunehag, P. and Sanner, S. and Marthi, B.}, + booktitle = {Proc. 22nd Eur. Conf. Mach. Learn.}, + pages={1--17}, + year={2011}, + month={Sept.}, + address = {Athens, Greece}, +} +@conference{reisinger2008online, + title={{Online kernel selection for {B}ayesian reinforcement learning}}, + author={Reisinger, J. and Stone, P. and Miikkulainen, R.}, + booktitle={Proc. 25th Int. Conf. Mach. Learn.}, + pages={816--823}, + year={2008}, + month={July}, + address={ Helsinki, Finland}, +} +@book{Sutton1998, + title={{Reinforcement learning: an introduction}}, + author={Sutton, R.S. and Barto, A.G.}, + year={1998}, + publisher={MIT Press}, + address={Cambridge, MA} +} +@book{Sutton2018book, + author = {Sutton, Richard S. and Barto, Andrew G.}, + edition = {Second}, + publisher = {The MIT Press}, + title = {Reinforcement Learning: An Introduction}, + year = {2018 } +} +@phdthesis{Bradtke1994phd, + title={Incremental Dynamic Programming for On-line Adaptive Optimal Control}, + author={Bradtke, Steven J}, + year={1994}, + school={University of Massachusetts}, + month={Sept.}, + address={Amherst}, +} +@inproceedings{baird1995residual, + title={Residual algorithms: Reinforcement learning with function approximation}, + author={Baird, Leemon and others}, + booktitle={Proc. 12th Int. Conf. Mach. Learn.}, + pages={30--37}, + year={1995} +} +@article{bradtke1996linear, + title={Linear least-squares algorithms for temporal difference learning}, + author={Bradtke, S.J. and Barto, A.G.}, + journal={Mach. Learn.}, + volume={22}, + number={1}, + pages={33--57}, + year={1996}, + publisher={Springer} +} +@article{lagoudakis2003least, + title={Least-squares policy iteration}, + author={Lagoudakis, M.G. and Parr, R.}, + journal={J. Mach. Learn. Res.}, + volume={4}, + pages={1107--1149}, + year={2003}, + publisher={JMLR. org} +} +@article{boyan2002technical, + title={Technical update: Least-squares temporal difference learning}, + author={Boyan, J.A.}, + journal={Mach. Learn.}, + volume={49}, + number={2}, + pages={233--246}, + year={2002}, + publisher={Springer} +} +@inproceedings{geramifard2006incremental, + title={Incremental least-squares temporal difference learning}, + author={Geramifard, A. and Bowling, M. and Sutton, R.S.}, + booktitle={Proc. 21st AAAI Conf. Artif. Intell.}, + pages={356--361}, + year={2006}, + month={July}, + address={Boston, Massachusetts}, +} +@inproceedings{sutton2009fast, + title={Fast gradient-descent methods for temporal-difference learning with linear function approximation}, + author={Sutton, R.S. and Maei, H.R. and Precup, D. and Bhatnagar, S. and Silver, D. and Szepesv{\'a}ri, C. and Wiewiora, E.}, + booktitle={Proc. 26th Int. Conf. Mach. Learn.}, + pages={993--1000}, + year={2009} +} +@inproceedings{sutton2008convergent, + title={A Convergent $ O (n) $ Temporal-difference Algorithm for Off-policy Learning with Linear Function Approximation}, + author={Sutton, Richard S and Maei, Hamid R and Szepesv{\'a}ri, Csaba}, + booktitle={Advances in Neural Information Processing Systems}, + publisher={Cambridge, MA: MIT Press}, + pages={1609--1616}, + year={2008} +} +@inproceedings{dabney2014natural, + title={Natural Temporal Difference Learning}, + author={Dabney, William and Thomas, Philip}, + booktitle={Twenty-Eighth AAAI Conference on Artificial Intelligence}, + year={2014} +} +@inproceedings{mahmood2014weighted, + title={Weighted importance sampling for off-policy learning with linear function approximation}, + author={Mahmood, A Rupam and van Hasselt, Hado P and Sutton, Richard S}, + booktitle={Advances in Neural Information Processing Systems}, + publisher={Cambridge, MA: MIT Press}, + pages={3014--3022}, + year={2014} +} +@inproceedings{seijen2014true, + title={True Online TD ($\lambda$)}, + author={Seijen, Harm V and Sutton, Rich}, + booktitle={Proc. 31st Int. Conf. Mach. Learn.}, + pages={692--700}, + year={2014} +} +@article{ormoneit2002kernel, + title={{Kernel-based reinforcement learning}}, + author={Ormoneit, D. and Sen, {\'S}.}, + journal={Mach. Learn.}, + volume={49}, + number={2-3}, + pages={161--178}, + issn={0885-6125}, + year={2002}, + publisher={Springer-Verlag }, + address = {Hingham, MA, USA}, +} +@inproceedings{Ghavamzadeh2010lstd, + author = {M. Ghavamzadeh and A. Lazaric and O. A. Maillard and R. Munos}, + title = {{LSTD} with Random Projections}, + BOOKTITLE={Advances in Neural Information Processing Systems}, + publisher={Cambridge, MA: MIT Press}, + volume = {23}, + pages = {721--729}, + Address = {Lake Tahoe, Nevada, USA}, + year = {2010} +} +@inproceedings{loth2007sparse, + title={Sparse temporal difference learning using LASSO}, + author={Loth, M. and Davy, M. and Preux, P.}, + booktitle={Proc. IEEE Symp. Approx. Dynamic Program. Reinforce. Learn.}, + pages={352--359}, + year={2007}, + organization={IEEE} +} +@inproceedings{kolter2009regularization, + title={Regularization and feature selection in least-squares temporal difference learning}, + author={Kolter, J.Z. and Ng, A.Y.}, + booktitle={Proc. 26th Int. Conf. Mach. Learn.}, + pages={521--528}, + year={2009}, + organization={ACM} +} +@inproceedings{hoffman2011regularized, + title={Regularized least squares temporal difference learning with nested l2 and l1 penalization}, + author={Hoffman, M.W. and Lazaric, A. and Ghavamzadeh, M. and Munos, R.}, + booktitle={Proc. Eur. Workshop Reinforce. Learn.}, + year={2011} +} +@inproceedings{Ghavamzadeh2011finite, + author = {M. Ghavamzadeh and A. Lazaric and R. Munos and M. Hoffman}, + title = {Finite-Sample Analysis of {Lasso-TD}}, + booktitle = {Proc. 28th Int. Conf. Mach. Learn.}, + year = {2011}, + month= {June}, + address={Bellevue, Washington, USA}, + pages={1177--1184}, +} +@inproceedings{johnson2013accelerating, + title={Accelerating stochastic gradient descent using predictive variance reduction}, + author={Johnson, R. and Zhang, T.}, + booktitle={Advances in Neural Information Processing Systems}, + pages={315--323}, + year={2013} +} +@article{xu2020reanalysis, + title={Reanalysis of variance reduced temporal difference learning}, + author={Xu, T. and Wang, Z. and Zhou, Y. and Liang, Y.}, + journal={arXiv preprint arXiv:2001.01898}, + year={2020} +} +@inproceedings{schulman2015trust, + title={Trust region policy optimization}, + author={Schulman, J. and Levine, S. and Abbeel, P. and Jordan, M. and Moritz, P.}, + booktitle={International Conference on Machine Learning}, + pages={1889--1897}, + year={2015} +} +@article{schulman2017proximal, + title={Proximal policy optimization algorithms}, + author={Schulman, J. and Wolski, F. and Dhariwal, P. and Radford, A. and Klimov, O.}, + journal={arXiv preprint arXiv:1707.06347}, + year={2017} +} +@inproceedings{defazio2014saga, + title={SAGA: A fast incremental gradient method with support for non-strongly convex composite objectives}, + author={Defazio, A. and Bach, F. and Lacoste-Julien, S.}, + booktitle={Advances in Neural Information Processing Systems}, + pages={1646--1654}, + year={2014} +} +@inproceedings{du2017stochastic, + title={Stochastic variance reduction methods for policy evaluation}, + author={Du, S. S. and Chen, J. and Li, L. and Xiao, L. and Zhou, D.}, + booktitle={Proceedings of the 34th International Conference on Machine Learning}, + pages={1049--1058}, + year={2017} +} +@inproceedings{chen2023modified, + title={Modified Retrace for Off-Policy Temporal Difference Learning}, + author={Chen, Xingguo and Ma, Xingzhou and Li, Yang and Yang, Guang and Yang, Shangdong and Gao, Yang}, + booktitle={Uncertainty in Artificial Intelligence}, + pages={303--312}, + year={2023}, + organization={PMLR} +} +@article{dalal2017finite, + title={Finite Sample Analyses for TD(0) with Function Approximation}, + author={Dalal, Gal and Szörényi, Balázs and Thoppe, Gugan and Mannor, Shie}, + journal={arXiv preprint arXiv:1704.01161}, + year={2017} +} +@article{sutton1988learning, + title={Learning to predict by the methods of temporal differences}, + author={Sutton, Richard S}, + journal={Machine learning}, + volume={3}, + number={1}, + pages={9--44}, + year={1988}, + publisher={Springer} +} +@inproceedings{tsitsiklis1997analysis, + title={Analysis of temporal-diffference learning with function approximation}, + author={Tsitsiklis, John N and Van Roy, Benjamin}, + booktitle={Advances in Neural Information Processing Systems}, + pages={1075--1081}, + year={1997} +} +@article{sutton2016emphatic, + title={An emphatic approach to the problem of off-policy temporal-difference learning}, + author={Sutton, Richard S and Mahmood, A Rupam and White, Martha}, + journal={The Journal of Machine Learning Research}, + volume={17}, + number={1}, + pages={2603--2631}, + year={2016}, + publisher={JMLR. org} +} +@inproceedings{liu2015finite, + title={Finite-sample analysis of proximal gradient TD algorithms}, + author={Liu, Bo and Liu, Ji and Ghavamzadeh, Mohammad and Mahadevan, Sridhar and Petrik, Marek}, + booktitle={Proceedings of the 21st Conference on Uncertainty in Artificial Intelligence}, + pages={504--513}, + year={2015} +} +@inproceedings{liu2016proximal, + title={Proximal Gradient Temporal Difference Learning Algorithms.}, + author={Liu, Bo and Liu, Ji and Ghavamzadeh, Mohammad and Mahadevan, Sridhar and Petrik, Marek}, + booktitle={Proceedings of the International Joint Conference on Artificial Intelligence}, + pages={4195--4199}, + year={2016} +} +@article{liu2018proximal, + title={Proximal gradient temporal difference learning: Stable reinforcement learning with polynomial sample complexity}, + author={Liu, Bo and Gemp, Ian and Ghavamzadeh, Mohammad and Liu, Ji and Mahadevan, Sridhar and Petrik, Marek}, + journal={Journal of Artificial Intelligence Research}, + volume={63}, + pages={461--494}, + year={2018} +} +@inproceedings{givchi2015quasi, + title={Quasi newton temporal difference learning}, + author={Givchi, Arash and Palhang, Maziar}, + booktitle={Asian Conference on Machine Learning}, + pages={159--172}, + year={2015} +} +@inproceedings{pan2017accelerated, + title={Accelerated gradient temporal difference learning}, + author={Pan, Yangchen and White, Adam and White, Martha}, + booktitle={Proceedings of the 21st AAAI Conference on Artificial Intelligence}, + pages={2464--2470}, + year={2017} +} +@inproceedings{hallak2016generalized, + title={Generalized emphatic temporal difference learning: bias-variance analysis}, + author={Hallak, Assaf and Tamar, Aviv and Munos, Remi and Mannor, Shie}, + booktitle={Proceedings of the 30th AAAI Conference on Artificial Intelligence}, + pages={1631--1637}, + year={2016} +} +@article{zhang2022truncated, + title={Truncated emphatic temporal difference methods for prediction and control}, + author={Zhang, Shangtong and Whiteson, Shimon}, + journal={The Journal of Machine Learning Research}, + volume={23}, + number={1}, + pages={6859--6917}, + year={2022}, + publisher={JMLRORG} +} +@inproceedings{korda2015td, + title={On TD (0) with function approximation: Concentration bounds and a centered variant with exponential convergence}, + author={Korda, Nathaniel and La, Prashanth}, + booktitle={International conference on machine learning}, + pages={626--634}, + year={2015}, + organization={PMLR} +} +@book{zhou2021machine, + title={Machine learning}, + author={Zhou, Zhi-Hua}, + year={2021}, + publisher={Springer Nature} +} +@inproceedings{dalal2020tale, + title={A tale of two-timescale reinforcement learning with the tightest finite-time bound}, + author={Dalal, Gal and Szorenyi, Balazs and Thoppe, Gugan}, + booktitle={Proceedings of the AAAI Conference on Artificial Intelligence}, + volume={34}, + number={04}, + pages={3701-3708}, + year={2020} +} +@inproceedings{feng2019kernel, + title={A kernel loss for solving the Bellman equation}, + author={Feng, Yihao and Li, Lihong and Liu, Qiang}, + booktitle={Advances in Neural Information Processing Systems}, + pages={15430--15441}, + year={2019} +} +@inproceedings{basserrano2021logistic, + title={Logistic Q-Learning}, + author={Bas-Serrano, Joan and Curi, Sebastian and Krause, Andreas and Neu, Gergely}, + booktitle={International Conference on Artificial Intelligence and Statistics}, + pages={3610--3618}, + year={2021} +} + + + + + + + + + diff --git b/AnonymousSubmission/aaai25.bst a/AnonymousSubmission/aaai25.bst new file mode 100644 index 0000000..05b1d4e --- /dev/null +++ a/AnonymousSubmission/aaai25.bst @@ -0,0 +1,1493 @@ +%% +%% This is file `aaai22.bst', +%% generated with the docstrip utility. +%% +%% The original source files were: +%% +%% merlin.mbs (with options: `head,ay,nat,ed-au,nm-rev,ed-rev,jnrlst,aunm-semi,mcite,mct-1,mct-x3,keyxyr,dt-beg,yr-per,yrp-per,note-yr,atit-u,volp-sp,num-xser,bkpg-x,add-pub,isbn,ppx,ed,xedn,and-com,and-com-ed,etal-xc,nfss,,{}') +%% merlin.mbs (with options: `tail,ay,nat,ed-au,nm-rev,ed-rev,jnrlst,aunm-semi,mcite,mct-1,mct-x3,keyxyr,dt-beg,yr-per,yrp-per,note-yr,atit-u,volp-sp,num-xser,bkpg-x,add-pub,isbn,ppx,ed,xedn,and-com,and-com-ed,etal-xc,nfss,,{}') +%% ---------------------------------------- +%% *** Natbib-compatible implementation of 'aaai' bib style *** +%% + % =============================================================== + % IMPORTANT NOTICE: + % This bibliographic style (bst) file has been generated from one or + % more master bibliographic style (mbs) files, listed above. + % + % This generated file can be redistributed and/or modified under the terms + % of the LaTeX Project Public License Distributed from CTAN + % archives in directory macros/latex/base/lppl.txt; either + % version 1 of the License, or any later version. + % =============================================================== + % Name and version information of the main mbs file: + % \ProvidesFile{merlin.mbs}[2011/11/18 4.33 (PWD, AO, DPC)] + % For use with BibTeX version 0.99a or later + %------------------------------------------------------------------- + % This bibliography style file is intended for texts in ENGLISH + % This is an author-year citation style bibliography. As such, it is + % non-standard LaTeX, and requires a special package file to function properly. + % Such a package is natbib.sty by Patrick W. Daly + % The form of the \bibitem entries is + % \bibitem[Jones et al.(1990)]{key}... + % \bibitem[Jones et al.(1990)Jones, Baker, and Smith]{key}... + % The essential feature is that the label (the part in brackets) consists + % of the author names, as they should appear in the citation, with the year + % in parentheses following. There must be no space before the opening + % parenthesis! + % With natbib v5.3, a full list of authors may also follow the year. + % In natbib.sty, it is possible to define the type of enclosures that is + % really wanted (brackets or parentheses), but in either case, there must + % be parentheses in the label. + % The \cite command functions as follows: + % \citet{key} ==>> Jones et al. (1990) + % \citet*{key} ==>> Jones, Baker, and Smith (1990) + % \citep{key} ==>> (Jones et al., 1990) + % \citep*{key} ==>> (Jones, Baker, and Smith, 1990) + % \citep[chap. 2]{key} ==>> (Jones et al., 1990, chap. 2) + % \citep[e.g.][]{key} ==>> (e.g. Jones et al., 1990) + % \citep[e.g.][p. 32]{key} ==>> (e.g. Jones et al., 1990, p. 32) + % \citeauthor{key} ==>> Jones et al. + % \citeauthor*{key} ==>> Jones, Baker, and Smith + % \citeyear{key} ==>> 1990 + %--------------------------------------------------------------------- + +ENTRY + { address + archivePrefix + author + booktitle + chapter + edition + editor + eid + eprint + howpublished + institution + isbn + journal + key + month + note + number + organization + pages + publisher + school + series + title + type + volume + year + } + {} + { label extra.label sort.label short.list } +INTEGERS { output.state before.all mid.sentence after.sentence after.block } +FUNCTION {init.state.consts} +{ #0 'before.all := + #1 'mid.sentence := + #2 'after.sentence := + #3 'after.block := +} +STRINGS { s t} +FUNCTION {output.nonnull} +{ 's := + output.state mid.sentence = + { ", " * write$ } + { output.state after.block = + { add.period$ write$ + newline$ + "\newblock " write$ + } + { output.state before.all = + 'write$ + { add.period$ " " * write$ } + if$ + } + if$ + mid.sentence 'output.state := + } + if$ + s +} +FUNCTION {output} +{ duplicate$ empty$ + 'pop$ + 'output.nonnull + if$ +} +FUNCTION {output.check} +{ 't := + duplicate$ empty$ + { pop$ "empty " t * " in " * cite$ * warning$ } + 'output.nonnull + if$ +} +FUNCTION {fin.entry} +{ add.period$ + write$ + newline$ +} + +FUNCTION {new.block} +{ output.state before.all = + 'skip$ + { after.block 'output.state := } + if$ +} +FUNCTION {new.sentence} +{ output.state after.block = + 'skip$ + { output.state before.all = + 'skip$ + { after.sentence 'output.state := } + if$ + } + if$ +} +FUNCTION {add.blank} +{ " " * before.all 'output.state := +} + +FUNCTION {date.block} +{ + new.block +} + +FUNCTION {not} +{ { #0 } + { #1 } + if$ +} +FUNCTION {and} +{ 'skip$ + { pop$ #0 } + if$ +} +FUNCTION {or} +{ { pop$ #1 } + 'skip$ + if$ +} +FUNCTION {new.block.checkb} +{ empty$ + swap$ empty$ + and + 'skip$ + 'new.block + if$ +} +FUNCTION {field.or.null} +{ duplicate$ empty$ + { pop$ "" } + 'skip$ + if$ +} +FUNCTION {emphasize} +{ duplicate$ empty$ + { pop$ "" } + { "\emph{" swap$ * "}" * } + if$ +} +FUNCTION {tie.or.space.prefix} +{ duplicate$ text.length$ #3 < + { "~" } + { " " } + if$ + swap$ +} + +FUNCTION {capitalize} +{ "u" change.case$ "t" change.case$ } + +FUNCTION {space.word} +{ " " swap$ * " " * } + % Here are the language-specific definitions for explicit words. + % Each function has a name bbl.xxx where xxx is the English word. + % The language selected here is ENGLISH +FUNCTION {bbl.and} +{ "and"} + +FUNCTION {bbl.etal} +{ "et~al." } + +FUNCTION {bbl.editors} +{ "eds." } + +FUNCTION {bbl.editor} +{ "ed." } + +FUNCTION {bbl.edby} +{ "edited by" } + +FUNCTION {bbl.edition} +{ "edition" } + +FUNCTION {bbl.volume} +{ "volume" } + +FUNCTION {bbl.of} +{ "of" } + +FUNCTION {bbl.number} +{ "number" } + +FUNCTION {bbl.nr} +{ "no." } + +FUNCTION {bbl.in} +{ "in" } + +FUNCTION {bbl.pages} +{ "" } + +FUNCTION {bbl.page} +{ "" } + +FUNCTION {bbl.chapter} +{ "chapter" } + +FUNCTION {bbl.techrep} +{ "Technical Report" } + +FUNCTION {bbl.mthesis} +{ "Master's thesis" } + +FUNCTION {bbl.phdthesis} +{ "Ph.D. thesis" } + +MACRO {jan} {"January"} + +MACRO {feb} {"February"} + +MACRO {mar} {"March"} + +MACRO {apr} {"April"} + +MACRO {may} {"May"} + +MACRO {jun} {"June"} + +MACRO {jul} {"July"} + +MACRO {aug} {"August"} + +MACRO {sep} {"September"} + +MACRO {oct} {"October"} + +MACRO {nov} {"November"} + +MACRO {dec} {"December"} + +MACRO {acmcs} {"ACM Computing Surveys"} + +MACRO {acta} {"Acta Informatica"} + +MACRO {cacm} {"Communications of the ACM"} + +MACRO {ibmjrd} {"IBM Journal of Research and Development"} + +MACRO {ibmsj} {"IBM Systems Journal"} + +MACRO {ieeese} {"IEEE Transactions on Software Engineering"} + +MACRO {ieeetc} {"IEEE Transactions on Computers"} + +MACRO {ieeetcad} + {"IEEE Transactions on Computer-Aided Design of Integrated Circuits"} + +MACRO {ipl} {"Information Processing Letters"} + +MACRO {jacm} {"Journal of the ACM"} + +MACRO {jcss} {"Journal of Computer and System Sciences"} + +MACRO {scp} {"Science of Computer Programming"} + +MACRO {sicomp} {"SIAM Journal on Computing"} + +MACRO {tocs} {"ACM Transactions on Computer Systems"} + +MACRO {tods} {"ACM Transactions on Database Systems"} + +MACRO {tog} {"ACM Transactions on Graphics"} + +MACRO {toms} {"ACM Transactions on Mathematical Software"} + +MACRO {toois} {"ACM Transactions on Office Information Systems"} + +MACRO {toplas} {"ACM Transactions on Programming Languages and Systems"} + +MACRO {tcs} {"Theoretical Computer Science"} +FUNCTION {bibinfo.check} +{ swap$ + duplicate$ missing$ + { + pop$ pop$ + "" + } + { duplicate$ empty$ + { + swap$ pop$ + } + { swap$ + pop$ + } + if$ + } + if$ +} +FUNCTION {bibinfo.warn} +{ swap$ + duplicate$ missing$ + { + swap$ "missing " swap$ * " in " * cite$ * warning$ pop$ + "" + } + { duplicate$ empty$ + { + swap$ "empty " swap$ * " in " * cite$ * warning$ + } + { swap$ + pop$ + } + if$ + } + if$ +} +FUNCTION {format.eprint} +{ eprint duplicate$ empty$ + 'skip$ + { archivePrefix duplicate$ empty$ + 'skip$ + { ":" * swap$ } + if$ + * "." * + } + if$ +} +INTEGERS { nameptr namesleft numnames } + + +STRINGS { bibinfo} + +FUNCTION {format.names} +{ 'bibinfo := + duplicate$ empty$ 'skip$ { + 's := + "" 't := + #1 'nameptr := + s num.names$ 'numnames := + numnames 'namesleft := + { namesleft #0 > } + { s nameptr + "{vv~}{ll}{, f.}{, jj}" + format.name$ + bibinfo bibinfo.check + 't := + nameptr #1 > + { + namesleft #1 > + { "; " * t * } + { + s nameptr "{ll}" format.name$ duplicate$ "others" = + { 't := } + { pop$ } + if$ + ";" * + t "others" = + { + " " * bbl.etal * + } + { + bbl.and + space.word * t * + } + if$ + } + if$ + } + 't + if$ + nameptr #1 + 'nameptr := + namesleft #1 - 'namesleft := + } + while$ + } if$ +} +FUNCTION {format.names.ed} +{ + format.names +} +FUNCTION {format.key} +{ empty$ + { key field.or.null } + { "" } + if$ +} + +FUNCTION {format.authors} +{ author "author" format.names +} +FUNCTION {get.bbl.editor} +{ editor num.names$ #1 > 'bbl.editors 'bbl.editor if$ } + +FUNCTION {format.editors} +{ editor "editor" format.names duplicate$ empty$ 'skip$ + { + "," * + " " * + get.bbl.editor + * + } + if$ +} +FUNCTION {format.isbn} +{ isbn "isbn" bibinfo.check + duplicate$ empty$ 'skip$ + { + new.block + "ISBN " swap$ * + } + if$ +} + +FUNCTION {format.note} +{ + note empty$ + { "" } + { note #1 #1 substring$ + duplicate$ "{" = + 'skip$ + { output.state mid.sentence = + { "l" } + { "u" } + if$ + change.case$ + } + if$ + note #2 global.max$ substring$ * "note" bibinfo.check + } + if$ +} + +FUNCTION {format.title} +{ title + "title" bibinfo.check +} +FUNCTION {format.full.names} +{'s := + "" 't := + #1 'nameptr := + s num.names$ 'numnames := + numnames 'namesleft := + { namesleft #0 > } + { s nameptr + "{vv~}{ll}" format.name$ + 't := + nameptr #1 > + { + namesleft #1 > + { ", " * t * } + { + s nameptr "{ll}" format.name$ duplicate$ "others" = + { 't := } + { pop$ } + if$ + t "others" = + { + " " * bbl.etal * + } + { + numnames #2 > + { "," * } + 'skip$ + if$ + bbl.and + space.word * t * + } + if$ + } + if$ + } + 't + if$ + nameptr #1 + 'nameptr := + namesleft #1 - 'namesleft := + } + while$ +} + +FUNCTION {author.editor.key.full} +{ author empty$ + { editor empty$ + { key empty$ + { cite$ #1 #3 substring$ } + 'key + if$ + } + { editor format.full.names } + if$ + } + { author format.full.names } + if$ +} + +FUNCTION {author.key.full} +{ author empty$ + { key empty$ + { cite$ #1 #3 substring$ } + 'key + if$ + } + { author format.full.names } + if$ +} + +FUNCTION {editor.key.full} +{ editor empty$ + { key empty$ + { cite$ #1 #3 substring$ } + 'key + if$ + } + { editor format.full.names } + if$ +} + +FUNCTION {make.full.names} +{ type$ "book" = + type$ "inbook" = + or + 'author.editor.key.full + { type$ "proceedings" = + 'editor.key.full + 'author.key.full + if$ + } + if$ +} + +FUNCTION {output.bibitem} +{ newline$ + "\bibitem[{" write$ + label write$ + ")" make.full.names duplicate$ short.list = + { pop$ } + { * } + if$ + "}]{" * write$ + cite$ write$ + "}" write$ + newline$ + "" + before.all 'output.state := +} + +FUNCTION {n.dashify} +{ + 't := + "" + { t empty$ not } + { t #1 #1 substring$ "-" = + { t #1 #2 substring$ "--" = not + { "--" * + t #2 global.max$ substring$ 't := + } + { { t #1 #1 substring$ "-" = } + { "-" * + t #2 global.max$ substring$ 't := + } + while$ + } + if$ + } + { t #1 #1 substring$ * + t #2 global.max$ substring$ 't := + } + if$ + } + while$ +} + +FUNCTION {word.in} +{ bbl.in capitalize + " " * } + +FUNCTION {format.date} +{ year "year" bibinfo.check duplicate$ empty$ + { + "empty year in " cite$ * "; set to ????" * warning$ + pop$ "????" + } + 'skip$ + if$ + extra.label * + before.all 'output.state := + after.sentence 'output.state := +} +FUNCTION {format.btitle} +{ title "title" bibinfo.check + duplicate$ empty$ 'skip$ + { + emphasize + } + if$ +} +FUNCTION {either.or.check} +{ empty$ + 'pop$ + { "can't use both " swap$ * " fields in " * cite$ * warning$ } + if$ +} +FUNCTION {format.bvolume} +{ volume empty$ + { "" } + { bbl.volume volume tie.or.space.prefix + "volume" bibinfo.check * * + series "series" bibinfo.check + duplicate$ empty$ 'pop$ + { swap$ bbl.of space.word * swap$ + emphasize * } + if$ + "volume and number" number either.or.check + } + if$ +} +FUNCTION {format.number.series} +{ volume empty$ + { number empty$ + { series field.or.null } + { series empty$ + { number "number" bibinfo.check } + { output.state mid.sentence = + { bbl.number } + { bbl.number capitalize } + if$ + number tie.or.space.prefix "number" bibinfo.check * * + bbl.in space.word * + series "series" bibinfo.check * + } + if$ + } + if$ + } + { "" } + if$ +} + +FUNCTION {format.edition} +{ edition duplicate$ empty$ 'skip$ + { + output.state mid.sentence = + { "l" } + { "t" } + if$ change.case$ + "edition" bibinfo.check + " " * bbl.edition * + } + if$ +} +INTEGERS { multiresult } +FUNCTION {multi.page.check} +{ 't := + #0 'multiresult := + { multiresult not + t empty$ not + and + } + { t #1 #1 substring$ + duplicate$ "-" = + swap$ duplicate$ "," = + swap$ "+" = + or or + { #1 'multiresult := } + { t #2 global.max$ substring$ 't := } + if$ + } + while$ + multiresult +} +FUNCTION {format.pages} +{ pages duplicate$ empty$ 'skip$ + { duplicate$ multi.page.check + { + n.dashify + } + { + } + if$ + "pages" bibinfo.check + } + if$ +} +FUNCTION {format.journal.pages} +{ pages duplicate$ empty$ 'pop$ + { swap$ duplicate$ empty$ + { pop$ pop$ format.pages } + { + ": " * + swap$ + n.dashify + "pages" bibinfo.check + * + } + if$ + } + if$ +} +FUNCTION {format.journal.eid} +{ eid "eid" bibinfo.check + duplicate$ empty$ 'pop$ + { swap$ duplicate$ empty$ 'skip$ + { + ": " * + } + if$ + swap$ * + } + if$ +} +FUNCTION {format.vol.num.pages} +{ volume field.or.null + duplicate$ empty$ 'skip$ + { + "volume" bibinfo.check + } + if$ + number "number" bibinfo.check duplicate$ empty$ 'skip$ + { + swap$ duplicate$ empty$ + { "there's a number but no volume in " cite$ * warning$ } + 'skip$ + if$ + swap$ + "(" swap$ * ")" * + } + if$ * + eid empty$ + { format.journal.pages } + { format.journal.eid } + if$ +} + +FUNCTION {format.chapter.pages} +{ chapter empty$ + 'format.pages + { type empty$ + { bbl.chapter } + { type "l" change.case$ + "type" bibinfo.check + } + if$ + chapter tie.or.space.prefix + "chapter" bibinfo.check + * * + pages empty$ + 'skip$ + { ", " * format.pages * } + if$ + } + if$ +} + +FUNCTION {format.booktitle} +{ + booktitle "booktitle" bibinfo.check + emphasize +} +FUNCTION {format.in.ed.booktitle} +{ format.booktitle duplicate$ empty$ 'skip$ + { + editor "editor" format.names.ed duplicate$ empty$ 'pop$ + { + "," * + " " * + get.bbl.editor + ", " * + * swap$ + * } + if$ + word.in swap$ * + } + if$ +} +FUNCTION {format.thesis.type} +{ type duplicate$ empty$ + 'pop$ + { swap$ pop$ + "t" change.case$ "type" bibinfo.check + } + if$ +} +FUNCTION {format.tr.number} +{ number "number" bibinfo.check + type duplicate$ empty$ + { pop$ bbl.techrep } + 'skip$ + if$ + "type" bibinfo.check + swap$ duplicate$ empty$ + { pop$ "t" change.case$ } + { tie.or.space.prefix * * } + if$ +} +FUNCTION {format.article.crossref} +{ + word.in + " \cite{" * crossref * "}" * +} +FUNCTION {format.book.crossref} +{ volume duplicate$ empty$ + { "empty volume in " cite$ * "'s crossref of " * crossref * warning$ + pop$ word.in + } + { bbl.volume + capitalize + swap$ tie.or.space.prefix "volume" bibinfo.check * * bbl.of space.word * + } + if$ + " \cite{" * crossref * "}" * +} +FUNCTION {format.incoll.inproc.crossref} +{ + word.in + " \cite{" * crossref * "}" * +} +FUNCTION {format.org.or.pub} +{ 't := + "" + address empty$ t empty$ and + 'skip$ + { + address "address" bibinfo.check * + t empty$ + 'skip$ + { address empty$ + 'skip$ + { ": " * } + if$ + t * + } + if$ + } + if$ +} +FUNCTION {format.publisher.address} +{ publisher "publisher" bibinfo.warn format.org.or.pub +} + +FUNCTION {format.organization.address} +{ organization "organization" bibinfo.check format.org.or.pub +} + +FUNCTION {article} +{ output.bibitem + format.authors "author" output.check + author format.key output + format.date "year" output.check + date.block + format.title "title" output.check + new.block + crossref missing$ + { + journal + "journal" bibinfo.check + emphasize + "journal" output.check + format.vol.num.pages output + } + { format.article.crossref output.nonnull + format.pages output + } + if$ + new.block + format.note output + fin.entry +} +FUNCTION {book} +{ output.bibitem + author empty$ + { format.editors "author and editor" output.check + editor format.key output + } + { format.authors output.nonnull + crossref missing$ + { "author and editor" editor either.or.check } + 'skip$ + if$ + } + if$ + format.date "year" output.check + date.block + format.btitle "title" output.check + crossref missing$ + { format.bvolume output + new.block + format.number.series output + new.sentence + format.publisher.address output + } + { + new.block + format.book.crossref output.nonnull + } + if$ + format.edition output + format.isbn output + new.block + format.note output + fin.entry +} +FUNCTION {booklet} +{ output.bibitem + format.authors output + author format.key output + format.date "year" output.check + date.block + format.title "title" output.check + new.block + howpublished "howpublished" bibinfo.check output + address "address" bibinfo.check output + format.isbn output + new.block + format.note output + fin.entry +} + +FUNCTION {inbook} +{ output.bibitem + author empty$ + { format.editors "author and editor" output.check + editor format.key output + } + { format.authors output.nonnull + crossref missing$ + { "author and editor" editor either.or.check } + 'skip$ + if$ + } + if$ + format.date "year" output.check + date.block + format.btitle "title" output.check + crossref missing$ + { + format.bvolume output + format.chapter.pages "chapter and pages" output.check + new.block + format.number.series output + new.sentence + format.publisher.address output + } + { + format.chapter.pages "chapter and pages" output.check + new.block + format.book.crossref output.nonnull + } + if$ + format.edition output + crossref missing$ + { format.isbn output } + 'skip$ + if$ + new.block + format.note output + fin.entry +} + +FUNCTION {incollection} +{ output.bibitem + format.authors "author" output.check + author format.key output + format.date "year" output.check + date.block + format.title "title" output.check + new.block + crossref missing$ + { format.in.ed.booktitle "booktitle" output.check + format.bvolume output + format.number.series output + format.chapter.pages output + new.sentence + format.publisher.address output + format.edition output + format.isbn output + } + { format.incoll.inproc.crossref output.nonnull + format.chapter.pages output + } + if$ + new.block + format.note output + fin.entry +} +FUNCTION {inproceedings} +{ output.bibitem + format.authors "author" output.check + author format.key output + format.date "year" output.check + date.block + format.title "title" output.check + new.block + crossref missing$ + { format.in.ed.booktitle "booktitle" output.check + format.bvolume output + format.number.series output + format.pages output + new.sentence + publisher empty$ + { format.organization.address output } + { organization "organization" bibinfo.check output + format.publisher.address output + } + if$ + format.isbn output + } + { format.incoll.inproc.crossref output.nonnull + format.pages output + } + if$ + new.block + format.note output + fin.entry +} +FUNCTION {conference} { inproceedings } +FUNCTION {manual} +{ output.bibitem + format.authors output + author format.key output + format.date "year" output.check + date.block + format.btitle "title" output.check + organization address new.block.checkb + organization "organization" bibinfo.check output + address "address" bibinfo.check output + format.edition output + new.block + format.note output + fin.entry +} + +FUNCTION {mastersthesis} +{ output.bibitem + format.authors "author" output.check + author format.key output + format.date "year" output.check + date.block + format.btitle + "title" output.check + new.block + bbl.mthesis format.thesis.type output.nonnull + school "school" bibinfo.warn output + address "address" bibinfo.check output + new.block + format.note output + fin.entry +} + +FUNCTION {misc} +{ output.bibitem + format.authors output + author format.key output + format.date "year" output.check + date.block + format.title output + new.block + howpublished "howpublished" bibinfo.check output + new.block + format.note output + format.eprint output + fin.entry +} +FUNCTION {phdthesis} +{ output.bibitem + format.authors "author" output.check + author format.key output + format.date "year" output.check + date.block + format.btitle + "title" output.check + new.block + bbl.phdthesis format.thesis.type output.nonnull + school "school" bibinfo.warn output + address "address" bibinfo.check output + new.block + format.note output + fin.entry +} + +FUNCTION {proceedings} +{ output.bibitem + format.editors output + editor format.key output + format.date "year" output.check + date.block + format.btitle "title" output.check + format.bvolume output + format.number.series output + new.sentence + publisher empty$ + { format.organization.address output } + { organization "organization" bibinfo.check output + format.publisher.address output + } + if$ + format.isbn output + new.block + format.note output + fin.entry +} + +FUNCTION {techreport} +{ output.bibitem + format.authors "author" output.check + author format.key output + format.date "year" output.check + date.block + format.title + "title" output.check + new.block + format.tr.number output.nonnull + institution "institution" bibinfo.warn output + address "address" bibinfo.check output + new.block + format.note output + fin.entry +} + +FUNCTION {unpublished} +{ output.bibitem + format.authors "author" output.check + author format.key output + format.date "year" output.check + date.block + format.title "title" output.check + new.block + format.note "note" output.check + fin.entry +} + +FUNCTION {default.type} { misc } +READ +FUNCTION {sortify} +{ purify$ + "l" change.case$ +} +INTEGERS { len } +FUNCTION {chop.word} +{ 's := + 'len := + s #1 len substring$ = + { s len #1 + global.max$ substring$ } + 's + if$ +} +FUNCTION {format.lab.names} +{'s := + "" 't := + #1 'nameptr := + s num.names$ 'numnames := + numnames 'namesleft := + { namesleft #0 > } + { s nameptr + "{vv~}{ll}" format.name$ + 't := + nameptr #1 > + { + nameptr #2 = + numnames #3 > and + { "others" 't := + #1 'namesleft := } + 'skip$ + if$ + namesleft #1 > + { ", " * t * } + { + s nameptr "{ll}" format.name$ duplicate$ "others" = + { 't := } + { pop$ } + if$ + t "others" = + { + " " * bbl.etal * + } + { + numnames #2 > + { "," * } + 'skip$ + if$ + bbl.and + space.word * t * + } + if$ + } + if$ + } + 't + if$ + nameptr #1 + 'nameptr := + namesleft #1 - 'namesleft := + } + while$ +} + +FUNCTION {author.key.label} +{ author empty$ + { key empty$ + { cite$ #1 #3 substring$ } + 'key + if$ + } + { author format.lab.names } + if$ +} + +FUNCTION {author.editor.key.label} +{ author empty$ + { editor empty$ + { key empty$ + { cite$ #1 #3 substring$ } + 'key + if$ + } + { editor format.lab.names } + if$ + } + { author format.lab.names } + if$ +} + +FUNCTION {editor.key.label} +{ editor empty$ + { key empty$ + { cite$ #1 #3 substring$ } + 'key + if$ + } + { editor format.lab.names } + if$ +} + +FUNCTION {calc.short.authors} +{ type$ "book" = + type$ "inbook" = + or + 'author.editor.key.label + { type$ "proceedings" = + 'editor.key.label + 'author.key.label + if$ + } + if$ + 'short.list := +} + +FUNCTION {calc.label} +{ calc.short.authors + short.list + "(" + * + year duplicate$ empty$ + short.list key field.or.null = or + { pop$ "" } + 'skip$ + if$ + * + 'label := +} + +FUNCTION {sort.format.names} +{ 's := + #1 'nameptr := + "" + s num.names$ 'numnames := + numnames 'namesleft := + { namesleft #0 > } + { s nameptr + "{vv{ } }{ll{ }}{ f{ }}{ jj{ }}" + format.name$ 't := + nameptr #1 > + { + " " * + namesleft #1 = t "others" = and + { "zzzzz" 't := } + 'skip$ + if$ + t sortify * + } + { t sortify * } + if$ + nameptr #1 + 'nameptr := + namesleft #1 - 'namesleft := + } + while$ +} + +FUNCTION {sort.format.title} +{ 't := + "A " #2 + "An " #3 + "The " #4 t chop.word + chop.word + chop.word + sortify + #1 global.max$ substring$ +} +FUNCTION {author.sort} +{ author empty$ + { key empty$ + { "to sort, need author or key in " cite$ * warning$ + "" + } + { key sortify } + if$ + } + { author sort.format.names } + if$ +} +FUNCTION {author.editor.sort} +{ author empty$ + { editor empty$ + { key empty$ + { "to sort, need author, editor, or key in " cite$ * warning$ + "" + } + { key sortify } + if$ + } + { editor sort.format.names } + if$ + } + { author sort.format.names } + if$ +} +FUNCTION {editor.sort} +{ editor empty$ + { key empty$ + { "to sort, need editor or key in " cite$ * warning$ + "" + } + { key sortify } + if$ + } + { editor sort.format.names } + if$ +} +FUNCTION {presort} +{ calc.label + label sortify + " " + * + type$ "book" = + type$ "inbook" = + or + 'author.editor.sort + { type$ "proceedings" = + 'editor.sort + 'author.sort + if$ + } + if$ + #1 entry.max$ substring$ + 'sort.label := + sort.label + * + " " + * + title field.or.null + sort.format.title + * + #1 entry.max$ substring$ + 'sort.key$ := +} + +ITERATE {presort} +SORT +STRINGS { last.label next.extra } +INTEGERS { last.extra.num last.extra.num.extended last.extra.num.blank number.label } +FUNCTION {initialize.extra.label.stuff} +{ #0 int.to.chr$ 'last.label := + "" 'next.extra := + #0 'last.extra.num := + "a" chr.to.int$ #1 - 'last.extra.num.blank := + last.extra.num.blank 'last.extra.num.extended := + #0 'number.label := +} +FUNCTION {forward.pass} +{ last.label label = + { last.extra.num #1 + 'last.extra.num := + last.extra.num "z" chr.to.int$ > + { "a" chr.to.int$ 'last.extra.num := + last.extra.num.extended #1 + 'last.extra.num.extended := + } + 'skip$ + if$ + last.extra.num.extended last.extra.num.blank > + { last.extra.num.extended int.to.chr$ + last.extra.num int.to.chr$ + * 'extra.label := } + { last.extra.num int.to.chr$ 'extra.label := } + if$ + } + { "a" chr.to.int$ 'last.extra.num := + "" 'extra.label := + label 'last.label := + } + if$ + number.label #1 + 'number.label := +} +FUNCTION {reverse.pass} +{ next.extra "b" = + { "a" 'extra.label := } + 'skip$ + if$ + extra.label 'next.extra := + extra.label + duplicate$ empty$ + 'skip$ + { "{\natexlab{" swap$ * "}}" * } + if$ + 'extra.label := + label extra.label * 'label := +} +EXECUTE {initialize.extra.label.stuff} +ITERATE {forward.pass} +REVERSE {reverse.pass} +FUNCTION {bib.sort.order} +{ sort.label + " " + * + year field.or.null sortify + * + " " + * + title field.or.null + sort.format.title + * + #1 entry.max$ substring$ + 'sort.key$ := +} +ITERATE {bib.sort.order} +SORT +FUNCTION {begin.bib} +{ preamble$ empty$ + 'skip$ + { preamble$ write$ newline$ } + if$ + "\begin{thebibliography}{" number.label int.to.str$ * "}" * + write$ newline$ + "\providecommand{\natexlab}[1]{#1}" + write$ newline$ +} +EXECUTE {begin.bib} +EXECUTE {init.state.consts} +ITERATE {call.type$} +FUNCTION {end.bib} +{ newline$ + "\end{thebibliography}" write$ newline$ +} +EXECUTE {end.bib} +%% End of customized bst file +%% +%% End of file `aaai22.bst'. diff --git b/AnonymousSubmission/aaai25.sty a/AnonymousSubmission/aaai25.sty new file mode 100644 index 0000000..4a20e41 --- /dev/null +++ a/AnonymousSubmission/aaai25.sty @@ -0,0 +1,315 @@ +\NeedsTeXFormat{LaTeX2e}% +\ProvidesPackage{aaai25}[2025/05/08 AAAI 2025 Submission format]% +\def\year{2025}% +\typeout{Conference Style for AAAI for LaTeX 2e -- version for submission}% +% +\def\copyright@on{T} +\def\showauthors@on{T} +\def\nocopyright{\gdef\copyright@on{}} % Copyright notice is required for camera-ready only. +\DeclareOption{submission}{% + \gdef\copyright@on{}% + \gdef\showauthors@on{}% + \long\gdef\pdfinfo #1{\relax}% +}% +\DeclareOption{draft}{% + \gdef\copyright@on{}% +}% +\ProcessOptions\relax% +% WARNING: IF YOU ARE USING THIS STYLE SHEET FOR AN AAAI PUBLICATION, YOU +% MAY NOT MODIFY IT FOR ANY REASON. MODIFICATIONS (IN YOUR SOURCE +% OR IN THIS STYLE SHEET WILL RESULT IN REJECTION OF YOUR PAPER). +% +% WARNING: This style is NOT guaranteed to work. It is provided in the +% hope that it might make the preparation of papers easier, but this style +% file is provided "as is" without warranty of any kind, either express or +% implied, including but not limited to the implied warranties of +% merchantability, fitness for a particular purpose, or noninfringement. +% You use this style file at your own risk. Standard disclaimers apply. +% There are undoubtably bugs in this style. If you would like to submit +% bug fixes, improvements, etc. please let us know. Please use the contact form +% at www.aaai.org. +% +% Do not use this file unless you are an experienced LaTeX user. +% +% PHYSICAL PAGE LAYOUT +\setlength\topmargin{-0.25in} \setlength\oddsidemargin{-0.25in} +\setlength\textheight{9.0in} \setlength\textwidth{7.0in} +\setlength\columnsep{0.375in} \newlength\titlebox \setlength\titlebox{2.25in} +\setlength\headheight{0pt} \setlength\headsep{0pt} +%\setlength\footheight{0pt} \setlength\footskip{0pt} +\thispagestyle{empty} \pagestyle{empty} +\flushbottom \twocolumn \sloppy +% We're never going to need a table of contents, so just flush it to +% save space --- suggested by drstrip@sandia-2 +\def\addcontentsline#1#2#3{} +% gf: PRINT COPYRIGHT NOTICE +\def\copyright@year{\number\year} +\def\copyright@text{Copyright \copyright\space \copyright@year, +Association for the Advancement of Artificial Intelligence (www.aaai.org). +All rights reserved.} +\def\copyrighttext#1{\gdef\copyright@on{T}\gdef\copyright@text{#1}} +\def\copyrightyear#1{\gdef\copyright@on{T}\gdef\copyright@year{#1}} +% gf: End changes for copyright notice (used in \maketitle, below) +% Title stuff, taken from deproc. +% +\def\maketitle{% + \par% + \begingroup % to make the footnote style local to the title + \def\thefootnote{\fnsymbol{footnote}} + \twocolumn[\@maketitle] \@thanks% + \endgroup% + % Insert copyright slug unless turned off + \if T\copyright@on\insert\footins{\noindent\footnotesize\copyright@text}\fi% + % + \setcounter{footnote}{0}% + \let\maketitle\relax% + \let\@maketitle\relax% + \gdef\@thanks{}% + \gdef\@author{}% + \gdef\@title{}% + \let\thanks\relax% +}% +\long\gdef\affiliations #1{ \def \affiliations_{\if T\showauthors@on#1\fi}}% +% +\def\@maketitle{% + \def\theauthors{\if T\showauthors@on\@author\else Anonymous submission\fi} + \newcounter{eqfn}\setcounter{eqfn}{0}% + \newsavebox{\titlearea} + \sbox{\titlearea}{ + \let\footnote\relax\let\thanks\relax% + \setcounter{footnote}{0}% + \def\equalcontrib{% + \ifnum\value{eqfn}=0% + \footnote{These authors contributed equally.}% + \setcounter{eqfn}{\value{footnote}}% + \else% + \footnotemark[\value{eqfn}]% + \fi% + }% + \vbox{% + \hsize\textwidth% + \linewidth\hsize% + \vskip 0.625in minus 0.125in% + \centering% + {\LARGE\bf \@title \par}% + \vskip 0.1in plus 0.5fil minus 0.05in% + {\Large{\textbf{\theauthors\ifhmode\\\fi}}}% + \vskip .2em plus 0.25fil% + {\normalsize \affiliations_\ifhmode\\\fi}% + \vskip .5em plus 2fil% + }% + }% +% + \newlength\actualheight% + \settoheight{\actualheight}{\usebox{\titlearea}}% + \ifdim\actualheight>\titlebox% + \setlength{\titlebox}{\actualheight}% + \fi% +% + \vbox to \titlebox {% + \let\footnote\thanks\relax% + \setcounter{footnote}{0}% + \def\equalcontrib{% + \ifnum\value{eqfn}=0% + \footnote{These authors contributed equally.}% + \setcounter{eqfn}{\value{footnote}}% + \else% + \footnotemark[\value{eqfn}]% + \fi% + }% + \hsize\textwidth% + \linewidth\hsize% + \vskip 0.625in minus 0.125in% + \centering% + {\LARGE\bf \@title \par}% + \vskip 0.1in plus 0.5fil minus 0.05in% + {\Large{\textbf{\theauthors\ifhmode\\\fi}}}% + \vskip .2em plus 0.25fil% + {\normalsize \affiliations_\ifhmode\\\fi}% + \vskip .5em plus 2fil% + }% +}% +% +\renewenvironment{abstract}{% + \centerline{\bf Abstract}% + \vspace{0.5ex}% + \setlength{\leftmargini}{10pt}% + \begin{quote}% + \small% +}{% + \par% + \end{quote}% + \vskip 1ex% +}% +\newenvironment{links}{% + \newcommand{\link}[2]{\par\textbf{##1} --- \url{##2}}% + \setlength{\hangindent}{10pt}% + \setlength{\parskip}{2pt}% + \begin{flushleft}% +}{% + \end{flushleft}% + \vskip 1ex% +}% +% jsp added: +\def\pubnote#1{ + \thispagestyle{myheadings}% + \pagestyle{myheadings}% + \markboth{#1}{#1}% + \setlength\headheight{10pt}% + \setlength\headsep{10pt}% +}% +% +% SECTIONS with less space +\def\section{\@startsection {section}{1}{\z@}{-2.0ex plus +-0.5ex minus -.2ex}{3pt plus 2pt minus 1pt}{\Large\bf\centering}} +\def\subsection{\@startsection{subsection}{2}{\z@}{-2.0ex plus +-0.5ex minus -.2ex}{3pt plus 2pt minus 1pt}{\large\bf\raggedright}} +\def\subsubsection{\@startsection{subparagraph}{3}{\z@}{-6pt plus +%%% DIEGO changed: 29/11/2009 +%% 2pt minus 1pt}{-1em}{\normalsize\bf}} +-2pt minus -1pt}{-1em}{\normalsize\bf}} +%%% END changed +\renewcommand\paragraph{\@startsection{paragraph}{4}{\z@}{-6pt plus -2pt minus -1pt}{-1em}{\normalsize\bf}}% +\setcounter{secnumdepth}{0} +% add period to section (but not subsection) numbers, reduce space after +%\renewcommand{\thesection} +% {\arabic{section}.\hskip-0.6em} +%\renewcommand{\thesubsection} +% {\arabic{section}.\arabic{subsection}\hskip-0.6em} +% FOOTNOTES +\footnotesep 6.65pt % +\skip\footins 9pt plus 4pt minus 2pt +\def\footnoterule{\kern-3pt \hrule width 5pc \kern 2.6pt } +\setcounter{footnote}{0} +% LISTS AND PARAGRAPHS +\parindent 10pt +\topsep 4pt plus 1pt minus 2pt +\partopsep 1pt plus 0.5pt minus 0.5pt +\itemsep 0.5pt plus 1pt minus 0.5pt +\parsep 2pt plus 1pt minus 0.5pt +\leftmargin 10pt \leftmargini 13pt \leftmarginii 10pt \leftmarginiii 5pt \leftmarginiv 5pt \leftmarginv 5pt \leftmarginvi 5pt +\labelwidth\leftmargini\advance\labelwidth-\labelsep \labelsep 5pt +\def\@listi{\leftmargin\leftmargini} +\def\@listii{\leftmargin\leftmarginii +\labelwidth\leftmarginii\advance\labelwidth-\labelsep +\topsep 2pt plus 1pt minus 0.5pt +\parsep 1pt plus 0.5pt minus 0.5pt +\itemsep \parsep} +\def\@listiii{\leftmargin\leftmarginiii +\labelwidth\leftmarginiii\advance\labelwidth-\labelsep +\topsep 1pt plus 0.5pt minus 0.5pt +\parsep \z@ +\partopsep 0.5pt plus 0pt minus 0.5pt +\itemsep \topsep} +\def\@listiv{\leftmargin\leftmarginiv +\labelwidth\leftmarginiv\advance\labelwidth-\labelsep} +\def\@listv{\leftmargin\leftmarginv +\labelwidth\leftmarginv\advance\labelwidth-\labelsep} +\def\@listvi{\leftmargin\leftmarginvi +\labelwidth\leftmarginvi\advance\labelwidth-\labelsep} +\abovedisplayskip 7pt plus2pt minus5pt% +\belowdisplayskip \abovedisplayskip +\abovedisplayshortskip 0pt plus3pt% +\belowdisplayshortskip 4pt plus3pt minus3pt% +% Less leading in most fonts (due to the narrow columns) +% The choices were between 1-pt and 1.5-pt leading +\def\normalsize{\@setfontsize\normalsize\@xpt{11}} % 10 point on 11 +\def\small{\@setfontsize\small\@ixpt{10}} % 9 point on 10 +\def\footnotesize{\@setfontsize\footnotesize\@ixpt{10}} % 9 point on 10 +\def\scriptsize{\@setfontsize\scriptsize\@viipt{10}} % 7 point on 8 +\def\tiny{\@setfontsize\tiny\@vipt{7}} % 6 point on 7 +\def\large{\@setfontsize\large\@xipt{12}} % 11 point on 12 +\def\Large{\@setfontsize\Large\@xiipt{14}} % 12 point on 14 +\def\LARGE{\@setfontsize\LARGE\@xivpt{16}} % 14 point on 16 +\def\huge{\@setfontsize\huge\@xviipt{20}} % 17 point on 20 +\def\Huge{\@setfontsize\Huge\@xxpt{23}} % 20 point on 23 + +\AtBeginDocument{% + \@ifpackageloaded{natbib}% + {% + % When natbib is in use, set the proper style and fix a few things + \let\cite\citep + \let\shortcite\citeyearpar + \setcitestyle{aysep={}} + \setlength\bibhang{0pt} + \bibliographystyle{aaai25} + }{}% + \@ifpackageloaded{hyperref}% + {% + \PackageError{aaai}{You must not use hyperref in AAAI papers.}{You (or one of the packages you imported) are importing the hyperref package, which is forbidden in AAAI papers. You must remove it from the paper to proceed.} + }{}% + \@ifpackageloaded{bbm}% + {% + \PackageError{aaai}{You must not use bbm package in AAAI papers because it introduces Type 3 fonts which are forbidden.}{See https://tex.stackexchange.com/questions/479160/a-replacement-to-mathbbm1-with-type-1-fonts for possible alternatives.} + }{}% + \@ifpackageloaded{authblk}% + {% + \PackageError{aaai}{Package authblk is forbbidden.}{Package authblk is forbbiden. You must find an alternative.} + }{}% + \@ifpackageloaded{balance}% + {% + \PackageError{aaai}{Package balance is forbbidden.}{Package balance is forbbiden. You must find an alternative.} + }{}% + \@ifpackageloaded{CJK}% + {% + \PackageError{aaai}{Package CJK is forbbidden.}{Package CJK is forbbiden. You must find an alternative.} + }{}% + \@ifpackageloaded{flushend}% + {% + \PackageError{aaai}{Package flushend is forbbidden.}{Package flushend is forbbiden. You must find an alternative.} + }{}% + \@ifpackageloaded{fontenc}% + {% + \PackageError{aaai}{Package fontenc is forbbidden.}{Package fontenc is forbbiden. You must find an alternative.} + }{}% + \@ifpackageloaded{fullpage}% + {% + \PackageError{aaai}{Package fullpage is forbbidden.}{Package fullpage is forbbiden. You must find an alternative.} + }{}% + \@ifpackageloaded{geometry}% + {% + \PackageError{aaai}{Package geometry is forbbidden.}{Package geometry is forbbiden. You must find an alternative.} + }{}% + \@ifpackageloaded{grffile}% + {% + \PackageError{aaai}{Package grffile is forbbidden.}{Package grffile is forbbiden. You must find an alternative.} + }{}% + \@ifpackageloaded{navigator}% + {% + \PackageError{aaai}{Package navigator is forbbidden.}{Package navigator is forbbiden. You must find an alternative.} + }{}% + \@ifpackageloaded{savetrees}% + {% + \PackageError{aaai}{Package savetrees is forbbidden.}{Package savetrees is forbbiden. You must find an alternative.} + }{}% + \@ifpackageloaded{setspace}% + {% + \PackageError{aaai}{Package setspace is forbbidden.}{Package setspace is forbbiden. You must find an alternative.} + }{}% + \@ifpackageloaded{stfloats}% + {% + \PackageError{aaai}{Package stfloats is forbbidden.}{Package stfloats is forbbiden. You must find an alternative.} + }{}% + \@ifpackageloaded{tabu}% + {% + \PackageError{aaai}{Package tabu is forbbidden.}{Package tabu is forbbiden. You must find an alternative.} + }{}% + \@ifpackageloaded{titlesec}% + {% + \PackageError{aaai}{Package titlesec is forbbidden.}{Package titlesec is forbbiden. You must find an alternative.} + }{}% + \@ifpackageloaded{tocbibind}% + {% + \PackageError{aaai}{Package tocbibind is forbbidden.}{Package tocbibind is forbbiden. You must find an alternative.} + }{}% + \@ifpackageloaded{ulem}% + {% + \PackageError{aaai}{Package ulem is forbbidden.}{Package ulem is forbbiden. You must find an alternative.} + }{}% + \@ifpackageloaded{wrapfig}% + {% + \PackageError{aaai}{Package wrapfig is forbbidden.}{Package wrapfig is forbbiden. You must find an alternative.} + }{}% +} + +\let\endthebibliography=\endlist diff --git b/AnonymousSubmission/anonymous-submission-latex-2025.aux a/AnonymousSubmission/anonymous-submission-latex-2025.aux new file mode 100644 index 0000000..1c0866f --- /dev/null +++ a/AnonymousSubmission/anonymous-submission-latex-2025.aux @@ -0,0 +1,109 @@ +\relax +\bibstyle{aaai25} +\citation{sutton1988learning} +\citation{tsitsiklis1997analysis} +\citation{Sutton2018book} +\citation{baird1995residual} +\citation{sutton2008convergent} +\citation{sutton2009fast} +\citation{sutton2016emphatic} +\citation{chen2023modified} +\citation{hackman2012faster} +\citation{liu2015finite,liu2016proximal,liu2018proximal} +\citation{givchi2015quasi} +\citation{pan2017accelerated} +\citation{hallak2016generalized} +\citation{zhang2022truncated} +\citation{johnson2013accelerating} +\citation{korda2015td} +\citation{xu2019reanalysis} +\citation{Sutton2018book} +\citation{baird1995residual} +\citation{sutton2009fast} +\citation{sutton2009fast} +\citation{feng2019kernel} +\citation{basserrano2021logistic} +\newlabel{introduction}{{}{1}} +\citation{Sutton2018book} +\citation{Sutton2018book} +\citation{ng1999policy} +\citation{devlin2012dynamic} +\newlabel{preliminaries}{{}{2}} +\newlabel{valuefunction}{{}{2}} +\newlabel{linearvaluefunction}{{1}{2}} +\providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}} +\newlabel{example_bias}{{1}{2}} +\newlabel{alg:algorithm 1}{{1}{3}} +\newlabel{omega}{{3}{3}} +\newlabel{delta}{{4}{3}} +\newlabel{theta}{{5}{3}} +\newlabel{deltaSarsa}{{8}{3}} +\newlabel{deltaQ}{{9}{3}} +\newlabel{alg:algorithm 2}{{2}{3}} +\newlabel{thetavmtdc}{{11}{3}} +\newlabel{uvmtdc}{{12}{3}} +\newlabel{omegavmtdc}{{13}{3}} +\newlabel{fvmetd}{{18}{3}} +\newlabel{thetavmetd}{{19}{3}} +\newlabel{omegavmetd}{{20}{3}} +\citation{borkar1997stochastic} +\citation{hirsch1989convergent} +\citation{borkar2000ode} +\citation{borkar2000ode} +\citation{borkar2000ode} +\newlabel{alg:algorithm 5}{{3}{4}} +\newlabel{theorem1}{{1}{4}} +\newlabel{th1proof}{{}{4}} +\newlabel{thetaFast}{{22}{4}} +\newlabel{omegaFast}{{23}{4}} +\newlabel{omegaFastFinal}{{24}{4}} +\newlabel{omegaInfty}{{25}{4}} +\citation{Sutton2018book} +\citation{sutton2009fast} +\citation{baird1995residual,sutton2009fast} +\newlabel{odetheta}{{26}{5}} +\newlabel{covariance}{{27}{5}} +\newlabel{odethetafinal}{{28}{5}} +\newlabel{theorem2}{{2}{5}} +\newlabel{randomwalk}{{1}{5}} +\newlabel{bairdexample}{{2}{5}} +\newlabel{theorem3}{{3}{5}} +\citation{schwartz1993reinforcement} +\citation{korda2015td} +\citation{xu2020reanalysis} +\citation{Sutton2018book} +\citation{Sutton2018book} +\citation{schulman2015trust} +\citation{schulman2017proximal} +\bibdata{aaai25} +\bibcite{baird1995residual}{{1}{1995}{{Baird et~al.}}{{}}} +\bibcite{basserrano2021logistic}{{2}{2021}{{Bas-Serrano et~al.}}{{Bas-Serrano, Curi, Krause, and Neu}}} +\bibcite{borkar1997stochastic}{{3}{1997}{{Borkar}}{{}}} +\bibcite{borkar2000ode}{{4}{2000}{{Borkar and Meyn}}{{}}} +\bibcite{chen2023modified}{{5}{2023}{{Chen et~al.}}{{Chen, Ma, Li, Yang, Yang, and Gao}}} +\bibcite{devlin2012dynamic}{{6}{2012}{{Devlin and Kudenko}}{{}}} +\bibcite{feng2019kernel}{{7}{2019}{{Feng, Li, and Liu}}{{}}} +\bibcite{givchi2015quasi}{{8}{2015}{{Givchi and Palhang}}{{}}} +\bibcite{hackman2012faster}{{9}{2012}{{Hackman}}{{}}} +\bibcite{hallak2016generalized}{{10}{2016}{{Hallak et~al.}}{{Hallak, Tamar, Munos, and Mannor}}} +\bibcite{hirsch1989convergent}{{11}{1989}{{Hirsch}}{{}}} +\bibcite{johnson2013accelerating}{{12}{2013}{{Johnson and Zhang}}{{}}} +\bibcite{korda2015td}{{13}{2015}{{Korda and La}}{{}}} +\bibcite{liu2018proximal}{{14}{2018}{{Liu et~al.}}{{Liu, Gemp, Ghavamzadeh, Liu, Mahadevan, and Petrik}}} +\bibcite{liu2015finite}{{15}{2015}{{Liu et~al.}}{{Liu, Liu, Ghavamzadeh, Mahadevan, and Petrik}}} +\bibcite{liu2016proximal}{{16}{2016}{{Liu et~al.}}{{Liu, Liu, Ghavamzadeh, Mahadevan, and Petrik}}} +\bibcite{ng1999policy}{{17}{1999}{{Ng, Harada, and Russell}}{{}}} +\bibcite{pan2017accelerated}{{18}{2017}{{Pan, White, and White}}{{}}} +\bibcite{schulman2015trust}{{19}{2015}{{Schulman et~al.}}{{Schulman, Levine, Abbeel, Jordan, and Moritz}}} +\bibcite{schulman2017proximal}{{20}{2017}{{Schulman et~al.}}{{Schulman, Wolski, Dhariwal, Radford, and Klimov}}} +\bibcite{schwartz1993reinforcement}{{21}{1993}{{Schwartz}}{{}}} +\bibcite{sutton2009fast}{{22}{2009}{{Sutton et~al.}}{{Sutton, Maei, Precup, Bhatnagar, Silver, Szepesv{\'a}ri, and Wiewiora}}} +\bibcite{sutton1988learning}{{23}{1988}{{Sutton}}{{}}} +\bibcite{Sutton2018book}{{24}{2018}{{Sutton and Barto}}{{}}} +\bibcite{sutton2008convergent}{{25}{2008}{{Sutton, Maei, and Szepesv{\'a}ri}}{{}}} +\bibcite{sutton2016emphatic}{{26}{2016}{{Sutton, Mahmood, and White}}{{}}} +\bibcite{tsitsiklis1997analysis}{{27}{1997}{{Tsitsiklis and Van~Roy}}{{}}} +\bibcite{xu2019reanalysis}{{28}{2019}{{Xu et~al.}}{{Xu, Wang, Zhou, and Liang}}} +\bibcite{xu2020reanalysis}{{29}{2020}{{Xu et~al.}}{{Xu, Wang, Zhou, and Liang}}} +\bibcite{zhang2022truncated}{{30}{2022}{{Zhang and Whiteson}}{{}}} +\gdef \@abspage@last{7} diff --git b/AnonymousSubmission/anonymous-submission-latex-2025.bbl a/AnonymousSubmission/anonymous-submission-latex-2025.bbl new file mode 100644 index 0000000..950971c --- /dev/null +++ a/AnonymousSubmission/anonymous-submission-latex-2025.bbl @@ -0,0 +1,154 @@ +\begin{thebibliography}{30} +\providecommand{\natexlab}[1]{#1} + +\bibitem[{Baird et~al.(1995)}]{baird1995residual} +Baird, L.; et~al. 1995. +\newblock Residual algorithms: Reinforcement learning with function approximation. +\newblock In \emph{Proc. 12th Int. Conf. Mach. Learn.}, 30--37. + +\bibitem[{Bas-Serrano et~al.(2021)Bas-Serrano, Curi, Krause, and Neu}]{basserrano2021logistic} +Bas-Serrano, J.; Curi, S.; Krause, A.; and Neu, G. 2021. +\newblock Logistic Q-Learning. +\newblock In \emph{International Conference on Artificial Intelligence and Statistics}, 3610--3618. + +\bibitem[{Borkar(1997)}]{borkar1997stochastic} +Borkar, V.~S. 1997. +\newblock Stochastic approximation with two time scales. +\newblock \emph{Syst. \& Control Letters}, 29(5): 291--294. + +\bibitem[{Borkar and Meyn(2000)}]{borkar2000ode} +Borkar, V.~S.; and Meyn, S.~P. 2000. +\newblock The ODE method for convergence of stochastic approximation and reinforcement learning. +\newblock \emph{SIAM J. Control Optim.}, 38(2): 447--469. + +\bibitem[{Chen et~al.(2023)Chen, Ma, Li, Yang, Yang, and Gao}]{chen2023modified} +Chen, X.; Ma, X.; Li, Y.; Yang, G.; Yang, S.; and Gao, Y. 2023. +\newblock Modified Retrace for Off-Policy Temporal Difference Learning. +\newblock In \emph{Uncertainty in Artificial Intelligence}, 303--312. PMLR. + +\bibitem[{Devlin and Kudenko(2012)}]{devlin2012dynamic} +Devlin, S.; and Kudenko, D. 2012. +\newblock Dynamic potential-based reward shaping. +\newblock In \emph{Proc. 11th Int. Conf. Autonomous Agents and Multiagent Systems}, 433--440. + +\bibitem[{Feng, Li, and Liu(2019)}]{feng2019kernel} +Feng, Y.; Li, L.; and Liu, Q. 2019. +\newblock A kernel loss for solving the Bellman equation. +\newblock In \emph{Advances in Neural Information Processing Systems}, 15430--15441. + +\bibitem[{Givchi and Palhang(2015)}]{givchi2015quasi} +Givchi, A.; and Palhang, M. 2015. +\newblock Quasi newton temporal difference learning. +\newblock In \emph{Asian Conference on Machine Learning}, 159--172. + +\bibitem[{Hackman(2012)}]{hackman2012faster} +Hackman, L. 2012. +\newblock \emph{Faster Gradient-TD Algorithms}. +\newblock Ph.D. thesis, University of Alberta. + +\bibitem[{Hallak et~al.(2016)Hallak, Tamar, Munos, and Mannor}]{hallak2016generalized} +Hallak, A.; Tamar, A.; Munos, R.; and Mannor, S. 2016. +\newblock Generalized emphatic temporal difference learning: bias-variance analysis. +\newblock In \emph{Proceedings of the 30th AAAI Conference on Artificial Intelligence}, 1631--1637. + +\bibitem[{Hirsch(1989)}]{hirsch1989convergent} +Hirsch, M.~W. 1989. +\newblock Convergent activation dynamics in continuous time networks. +\newblock \emph{Neural Netw.}, 2(5): 331--349. + +\bibitem[{Johnson and Zhang(2013)}]{johnson2013accelerating} +Johnson, R.; and Zhang, T. 2013. +\newblock Accelerating stochastic gradient descent using predictive variance reduction. +\newblock In \emph{Advances in Neural Information Processing Systems}, 315--323. + +\bibitem[{Korda and La(2015)}]{korda2015td} +Korda, N.; and La, P. 2015. +\newblock On TD (0) with function approximation: Concentration bounds and a centered variant with exponential convergence. +\newblock In \emph{International conference on machine learning}, 626--634. PMLR. + +\bibitem[{Liu et~al.(2018)Liu, Gemp, Ghavamzadeh, Liu, Mahadevan, and Petrik}]{liu2018proximal} +Liu, B.; Gemp, I.; Ghavamzadeh, M.; Liu, J.; Mahadevan, S.; and Petrik, M. 2018. +\newblock Proximal gradient temporal difference learning: Stable reinforcement learning with polynomial sample complexity. +\newblock \emph{Journal of Artificial Intelligence Research}, 63: 461--494. + +\bibitem[{Liu et~al.(2015)Liu, Liu, Ghavamzadeh, Mahadevan, and Petrik}]{liu2015finite} +Liu, B.; Liu, J.; Ghavamzadeh, M.; Mahadevan, S.; and Petrik, M. 2015. +\newblock Finite-sample analysis of proximal gradient TD algorithms. +\newblock In \emph{Proceedings of the 21st Conference on Uncertainty in Artificial Intelligence}, 504--513. + +\bibitem[{Liu et~al.(2016)Liu, Liu, Ghavamzadeh, Mahadevan, and Petrik}]{liu2016proximal} +Liu, B.; Liu, J.; Ghavamzadeh, M.; Mahadevan, S.; and Petrik, M. 2016. +\newblock Proximal Gradient Temporal Difference Learning Algorithms. +\newblock In \emph{Proceedings of the International Joint Conference on Artificial Intelligence}, 4195--4199. + +\bibitem[{Ng, Harada, and Russell(1999)}]{ng1999policy} +Ng, A.~Y.; Harada, D.; and Russell, S. 1999. +\newblock Policy invariance under reward transformations: Theory and application to reward shaping. +\newblock In \emph{Proc. 16th Int. Conf. Mach. Learn.}, 278--287. + +\bibitem[{Pan, White, and White(2017)}]{pan2017accelerated} +Pan, Y.; White, A.; and White, M. 2017. +\newblock Accelerated gradient temporal difference learning. +\newblock In \emph{Proceedings of the 21st AAAI Conference on Artificial Intelligence}, 2464--2470. + +\bibitem[{Schulman et~al.(2015)Schulman, Levine, Abbeel, Jordan, and Moritz}]{schulman2015trust} +Schulman, J.; Levine, S.; Abbeel, P.; Jordan, M.; and Moritz, P. 2015. +\newblock Trust region policy optimization. +\newblock In \emph{International Conference on Machine Learning}, 1889--1897. + +\bibitem[{Schulman et~al.(2017)Schulman, Wolski, Dhariwal, Radford, and Klimov}]{schulman2017proximal} +Schulman, J.; Wolski, F.; Dhariwal, P.; Radford, A.; and Klimov, O. 2017. +\newblock Proximal policy optimization algorithms. +\newblock \emph{arXiv preprint arXiv:1707.06347}. + +\bibitem[{Schwartz(1993)}]{schwartz1993reinforcement} +Schwartz, A. 1993. +\newblock A reinforcement learning method for maximizing undiscounted rewards. +\newblock In \emph{Proc. 10th Int. Conf. Mach. Learn.}, volume 298, 298--305. + +\bibitem[{Sutton et~al.(2009)Sutton, Maei, Precup, Bhatnagar, Silver, Szepesv{\'a}ri, and Wiewiora}]{sutton2009fast} +Sutton, R.; Maei, H.; Precup, D.; Bhatnagar, S.; Silver, D.; Szepesv{\'a}ri, C.; and Wiewiora, E. 2009. +\newblock Fast gradient-descent methods for temporal-difference learning with linear function approximation. +\newblock In \emph{Proc. 26th Int. Conf. Mach. Learn.}, 993--1000. + +\bibitem[{Sutton(1988)}]{sutton1988learning} +Sutton, R.~S. 1988. +\newblock Learning to predict by the methods of temporal differences. +\newblock \emph{Machine learning}, 3(1): 9--44. + +\bibitem[{Sutton and Barto(2018)}]{Sutton2018book} +Sutton, R.~S.; and Barto, A.~G. 2018. +\newblock \emph{Reinforcement Learning: An Introduction}. +\newblock The MIT Press, second edition. + +\bibitem[{Sutton, Maei, and Szepesv{\'a}ri(2008)}]{sutton2008convergent} +Sutton, R.~S.; Maei, H.~R.; and Szepesv{\'a}ri, C. 2008. +\newblock A Convergent $ O (n) $ Temporal-difference Algorithm for Off-policy Learning with Linear Function Approximation. +\newblock In \emph{Advances in Neural Information Processing Systems}, 1609--1616. Cambridge, MA: MIT Press. + +\bibitem[{Sutton, Mahmood, and White(2016)}]{sutton2016emphatic} +Sutton, R.~S.; Mahmood, A.~R.; and White, M. 2016. +\newblock An emphatic approach to the problem of off-policy temporal-difference learning. +\newblock \emph{The Journal of Machine Learning Research}, 17(1): 2603--2631. + +\bibitem[{Tsitsiklis and Van~Roy(1997)}]{tsitsiklis1997analysis} +Tsitsiklis, J.~N.; and Van~Roy, B. 1997. +\newblock Analysis of temporal-diffference learning with function approximation. +\newblock In \emph{Advances in Neural Information Processing Systems}, 1075--1081. + +\bibitem[{Xu et~al.(2019)Xu, Wang, Zhou, and Liang}]{xu2019reanalysis} +Xu, T.; Wang, Z.; Zhou, Y.; and Liang, Y. 2019. +\newblock Reanalysis of Variance Reduced Temporal Difference Learning. +\newblock In \emph{International Conference on Learning Representations}. + +\bibitem[{Xu et~al.(2020)Xu, Wang, Zhou, and Liang}]{xu2020reanalysis} +Xu, T.; Wang, Z.; Zhou, Y.; and Liang, Y. 2020. +\newblock Reanalysis of variance reduced temporal difference learning. +\newblock \emph{arXiv preprint arXiv:2001.01898}. + +\bibitem[{Zhang and Whiteson(2022)}]{zhang2022truncated} +Zhang, S.; and Whiteson, S. 2022. +\newblock Truncated emphatic temporal difference methods for prediction and control. +\newblock \emph{The Journal of Machine Learning Research}, 23(1): 6859--6917. + +\end{thebibliography} diff --git b/AnonymousSubmission/anonymous-submission-latex-2025.blg a/AnonymousSubmission/anonymous-submission-latex-2025.blg new file mode 100644 index 0000000..adb07a1 --- /dev/null +++ a/AnonymousSubmission/anonymous-submission-latex-2025.blg @@ -0,0 +1,46 @@ +This is BibTeX, Version 0.99d (TeX Live 2023) +Capacity: max_strings=200000, hash_size=200000, hash_prime=170003 +The top-level auxiliary file: anonymous-submission-latex-2025.aux +The style file: aaai25.bst +Database file #1: aaai25.bib +You've used 30 entries, + 2840 wiz_defined-function locations, + 758 strings with 9820 characters, +and the built_in function-call counts, 22009 in all, are: += -- 1873 +> -- 1021 +< -- 1 ++ -- 379 +- -- 340 +* -- 1463 +:= -- 3421 +add.period$ -- 123 +call.type$ -- 30 +change.case$ -- 252 +chr.to.int$ -- 31 +cite$ -- 30 +duplicate$ -- 1509 +empty$ -- 1557 +format.name$ -- 414 +if$ -- 4459 +int.to.chr$ -- 1 +int.to.str$ -- 1 +missing$ -- 302 +newline$ -- 154 +num.names$ -- 120 +pop$ -- 709 +preamble$ -- 1 +purify$ -- 213 +quote$ -- 0 +skip$ -- 793 +stack$ -- 0 +substring$ -- 1161 +swap$ -- 801 +text.length$ -- 1 +text.prefix$ -- 0 +top$ -- 0 +type$ -- 267 +warning$ -- 0 +while$ -- 188 +width$ -- 0 +write$ -- 394 diff --git b/AnonymousSubmission/anonymous-submission-latex-2025.log a/AnonymousSubmission/anonymous-submission-latex-2025.log new file mode 100644 index 0000000..3f0e3ba --- /dev/null +++ a/AnonymousSubmission/anonymous-submission-latex-2025.log @@ -0,0 +1,650 @@ +This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) (preloaded format=pdflatex 2023.3.31) 30 JUN 2024 03:27 +entering extended mode + restricted \write18 enabled. + file:line:error style messages enabled. + %&-line parsing enabled. +**anonymous-submission-latex-2025 +(./anonymous-submission-latex-2025.tex +LaTeX2e <2022-11-01> patch level 1 +L3 programming layer <2023-02-22> (d:/software/texlive/2023/texmf-dist/tex/latex/base/article.cls +Document Class: article 2022/07/02 v1.4n Standard LaTeX document class +(d:/software/texlive/2023/texmf-dist/tex/latex/base/size10.clo +File: size10.clo 2022/07/02 v1.4n Standard LaTeX file (size option) +) +\c@part=\count185 +\c@section=\count186 +\c@subsection=\count187 +\c@subsubsection=\count188 +\c@paragraph=\count189 +\c@subparagraph=\count190 +\c@figure=\count191 +\c@table=\count192 +\abovecaptionskip=\skip48 +\belowcaptionskip=\skip49 +\bibindent=\dimen140 +) (./aaai25.sty +Package: aaai25 2025/05/08 AAAI 2025 Submission format + +Conference Style for AAAI for LaTeX 2e -- version for submission +\titlebox=\skip50 +) (d:/software/texlive/2023/texmf-dist/tex/latex/psnfss/times.sty +Package: times 2020/03/25 PSNFSS-v9.3 (SPQR) +) (d:/software/texlive/2023/texmf-dist/tex/latex/psnfss/helvet.sty +Package: helvet 2020/03/25 PSNFSS-v9.3 (WaS) + (d:/software/texlive/2023/texmf-dist/tex/latex/graphics/keyval.sty +Package: keyval 2022/05/29 v1.15 key=value parser (DPC) +\KV@toks@=\toks16 +)) (d:/software/texlive/2023/texmf-dist/tex/latex/psnfss/courier.sty +Package: courier 2020/03/25 PSNFSS-v9.3 (WaS) +) (d:/software/texlive/2023/texmf-dist/tex/latex/url/url.sty +\Urlmuskip=\muskip16 +Package: url 2013/09/16 ver 3.4 Verb mode for urls, etc. +) (d:/software/texlive/2023/texmf-dist/tex/latex/graphics/graphicx.sty +Package: graphicx 2021/09/16 v1.2d Enhanced LaTeX Graphics (DPC,SPQR) + (d:/software/texlive/2023/texmf-dist/tex/latex/graphics/graphics.sty +Package: graphics 2022/03/10 v1.4e Standard LaTeX Graphics (DPC,SPQR) + (d:/software/texlive/2023/texmf-dist/tex/latex/graphics/trig.sty +Package: trig 2021/08/11 v1.11 sin cos tan (DPC) +) (d:/software/texlive/2023/texmf-dist/tex/latex/graphics-cfg/graphics.cfg +File: graphics.cfg 2016/06/04 v1.11 sample graphics configuration +) +Package graphics Info: Driver file: pdftex.def on input line 107. + (d:/software/texlive/2023/texmf-dist/tex/latex/graphics-def/pdftex.def +File: pdftex.def 2022/09/22 v1.2b Graphics/color driver for pdftex +)) +\Gin@req@height=\dimen141 +\Gin@req@width=\dimen142 +) (d:/software/texlive/2023/texmf-dist/tex/latex/natbib/natbib.sty +Package: natbib 2010/09/13 8.31b (PWD, AO) +\bibhang=\skip51 +\bibsep=\skip52 +LaTeX Info: Redefining \cite on input line 694. +\c@NAT@ctr=\count193 +) (d:/software/texlive/2023/texmf-dist/tex/latex/caption/caption.sty +Package: caption 2023/03/12 v3.6j Customizing captions (AR) + (d:/software/texlive/2023/texmf-dist/tex/latex/caption/caption3.sty +Package: caption3 2023/03/12 v2.4 caption3 kernel (AR) +\caption@tempdima=\dimen143 +\captionmargin=\dimen144 +\caption@leftmargin=\dimen145 +\caption@rightmargin=\dimen146 +\caption@width=\dimen147 +\caption@indent=\dimen148 +\caption@parindent=\dimen149 +\caption@hangindent=\dimen150 +Package caption Info: Standard document class detected. +) +\c@caption@flags=\count194 +\c@continuedfloat=\count195 +) (d:/software/texlive/2023/texmf-dist/tex/latex/algorithms/algorithm.sty +Package: algorithm 2009/08/24 v0.1 Document Style `algorithm' - floating environment + (d:/software/texlive/2023/texmf-dist/tex/latex/float/float.sty +Package: float 2001/11/08 v1.3d Float enhancements (AL) +\c@float@type=\count196 +\float@exts=\toks17 +\float@box=\box51 +\@float@everytoks=\toks18 +\@floatcapt=\box52 +) (d:/software/texlive/2023/texmf-dist/tex/latex/base/ifthen.sty +Package: ifthen 2022/04/13 v1.1d Standard LaTeX ifthen package (DPC) +) +\@float@every@algorithm=\toks19 +\c@algorithm=\count197 +) (d:/software/texlive/2023/texmf-dist/tex/latex/algorithms/algorithmic.sty +Package: algorithmic 2009/08/24 v0.1 Document Style `algorithmic' +\c@ALC@unique=\count198 +\c@ALC@line=\count199 +\c@ALC@rem=\count266 +\c@ALC@depth=\count267 +\ALC@tlm=\skip53 +\algorithmicindent=\skip54 +) (d:/software/texlive/2023/texmf-dist/tex/latex/subfigure/subfigure.sty +Package: subfigure 2002/03/15 v2.1.5 subfigure package +\subfigtopskip=\skip55 +\subfigcapskip=\skip56 +\subfigcaptopadj=\dimen151 +\subfigbottomskip=\skip57 +\subfigcapmargin=\dimen152 +\subfiglabelskip=\skip58 +\c@subfigure=\count268 +\c@subtable=\count269 + +**************************************** +* Local config file subfigure.cfg used * +**************************************** +(d:/software/texlive/2023/texmf-dist/tex/latex/subfigure/subfigure.cfg) +\subfig@top=\skip59 +\subfig@bottom=\skip60 +) (d:/software/texlive/2023/texmf-dist/tex/latex/diagbox/diagbox.sty +Package: diagbox 2020/02/09 v2.3 Making table heads with diagonal lines + (d:/software/texlive/2023/texmf-dist/tex/latex/pict2e/pict2e.sty +Package: pict2e 2020/09/30 v0.4b Improved picture commands (HjG,RN,JT) + (d:/software/texlive/2023/texmf-dist/tex/latex/pict2e/pict2e.cfg +File: pict2e.cfg 2016/02/05 v0.1u pict2e configuration for teTeX/TeXLive +) +Package pict2e Info: Driver file: pdftex.def on input line 112. +Package pict2e Info: Driver file for pict2e: p2e-pdftex.def on input line 114. + (d:/software/texlive/2023/texmf-dist/tex/latex/pict2e/p2e-pdftex.def +File: p2e-pdftex.def 2016/02/05 v0.1u Driver-dependant file (RN,HjG,JT) +) +\pIIe@GRAPH=\toks20 +\@arclen=\dimen153 +\@arcrad=\dimen154 +\pIIe@tempdima=\dimen155 +\pIIe@tempdimb=\dimen156 +\pIIe@tempdimc=\dimen157 +\pIIe@tempdimd=\dimen158 +\pIIe@tempdime=\dimen159 +\pIIe@tempdimf=\dimen160 +) (d:/software/texlive/2023/texmf-dist/tex/latex/tools/calc.sty +Package: calc 2017/05/25 v4.3 Infix arithmetic (KKT,FJ) +\calc@Acount=\count270 +\calc@Bcount=\count271 +\calc@Adimen=\dimen161 +\calc@Bdimen=\dimen162 +\calc@Askip=\skip61 +\calc@Bskip=\skip62 +LaTeX Info: Redefining \setlength on input line 80. +LaTeX Info: Redefining \addtolength on input line 81. +\calc@Ccount=\count272 +\calc@Cskip=\skip63 +) (d:/software/texlive/2023/texmf-dist/tex/latex/tools/array.sty +Package: array 2022/09/04 v2.5g Tabular extension package (FMi) +\col@sep=\dimen163 +\ar@mcellbox=\box53 +\extrarowheight=\dimen164 +\NC@list=\toks21 +\extratabsurround=\skip64 +\backup@length=\skip65 +\ar@cellbox=\box54 +) +\diagbox@boxa=\box55 +\diagbox@boxb=\box56 +\diagbox@boxm=\box57 +\diagbox@wd=\dimen165 +\diagbox@ht=\dimen166 +\diagbox@insepl=\dimen167 +\diagbox@insepr=\dimen168 +\diagbox@outsepl=\dimen169 +\diagbox@outsepr=\dimen170 +) (d:/software/texlive/2023/texmf-dist/tex/latex/booktabs/booktabs.sty +Package: booktabs 2020/01/12 v1.61803398 Publication quality tables +\heavyrulewidth=\dimen171 +\lightrulewidth=\dimen172 +\cmidrulewidth=\dimen173 +\belowrulesep=\dimen174 +\belowbottomsep=\dimen175 +\aboverulesep=\dimen176 +\abovetopsep=\dimen177 +\cmidrulesep=\dimen178 +\cmidrulekern=\dimen179 +\defaultaddspace=\dimen180 +\@cmidla=\count273 +\@cmidlb=\count274 +\@aboverulesep=\dimen181 +\@belowrulesep=\dimen182 +\@thisruleclass=\count275 +\@lastruleclass=\count276 +\@thisrulewidth=\dimen183 +) (d:/software/texlive/2023/texmf-dist/tex/latex/amsmath/amsmath.sty +Package: amsmath 2022/04/08 v2.17n AMS math features +\@mathmargin=\skip66 + +For additional information on amsmath, use the `?' option. +(d:/software/texlive/2023/texmf-dist/tex/latex/amsmath/amstext.sty +Package: amstext 2021/08/26 v2.01 AMS text + (d:/software/texlive/2023/texmf-dist/tex/latex/amsmath/amsgen.sty +File: amsgen.sty 1999/11/30 v2.0 generic functions +\@emptytoks=\toks22 +\ex@=\dimen184 +)) (d:/software/texlive/2023/texmf-dist/tex/latex/amsmath/amsbsy.sty +Package: amsbsy 1999/11/29 v1.2d Bold Symbols +\pmbraise@=\dimen185 +) (d:/software/texlive/2023/texmf-dist/tex/latex/amsmath/amsopn.sty +Package: amsopn 2022/04/08 v2.04 operator names +) +\inf@bad=\count277 +LaTeX Info: Redefining \frac on input line 234. +\uproot@=\count278 +\leftroot@=\count279 +LaTeX Info: Redefining \overline on input line 399. +LaTeX Info: Redefining \colon on input line 410. +\classnum@=\count280 +\DOTSCASE@=\count281 +LaTeX Info: Redefining \ldots on input line 496. +LaTeX Info: Redefining \dots on input line 499. +LaTeX Info: Redefining \cdots on input line 620. +\Mathstrutbox@=\box58 +\strutbox@=\box59 +LaTeX Info: Redefining \big on input line 722. +LaTeX Info: Redefining \Big on input line 723. +LaTeX Info: Redefining \bigg on input line 724. +LaTeX Info: Redefining \Bigg on input line 725. +\big@size=\dimen186 +LaTeX Font Info: Redeclaring font encoding OML on input line 743. +LaTeX Font Info: Redeclaring font encoding OMS on input line 744. +\macc@depth=\count282 +LaTeX Info: Redefining \bmod on input line 905. +LaTeX Info: Redefining \pmod on input line 910. +LaTeX Info: Redefining \smash on input line 940. +LaTeX Info: Redefining \relbar on input line 970. +LaTeX Info: Redefining \Relbar on input line 971. +\c@MaxMatrixCols=\count283 +\dotsspace@=\muskip17 +\c@parentequation=\count284 +\dspbrk@lvl=\count285 +\tag@help=\toks23 +\row@=\count286 +\column@=\count287 +\maxfields@=\count288 +\andhelp@=\toks24 +\eqnshift@=\dimen187 +\alignsep@=\dimen188 +\tagshift@=\dimen189 +\tagwidth@=\dimen190 +\totwidth@=\dimen191 +\lineht@=\dimen192 +\@envbody=\toks25 +\multlinegap=\skip67 +\multlinetaggap=\skip68 +\mathdisplay@stack=\toks26 +LaTeX Info: Redefining \[ on input line 2953. +LaTeX Info: Redefining \] on input line 2954. +) (d:/software/texlive/2023/texmf-dist/tex/latex/amsfonts/amssymb.sty +Package: amssymb 2013/01/14 v3.01 AMS font symbols + (d:/software/texlive/2023/texmf-dist/tex/latex/amsfonts/amsfonts.sty +Package: amsfonts 2013/01/14 v3.01 Basic AMSFonts support +\symAMSa=\mathgroup4 +\symAMSb=\mathgroup5 +LaTeX Font Info: Redeclaring math symbol \hbar on input line 98. +LaTeX Font Info: Overwriting math alphabet `\mathfrak' in version `bold' +(Font) U/euf/m/n --> U/euf/b/n on input line 106. +)) (d:/software/texlive/2023/texmf-dist/tex/latex/mathtools/mathtools.sty +Package: mathtools 2022/06/29 v1.29 mathematical typesetting tools + (d:/software/texlive/2023/texmf-dist/tex/latex/mathtools/mhsetup.sty +Package: mhsetup 2021/03/18 v1.4 programming setup (MH) +) +\g_MT_multlinerow_int=\count289 +\l_MT_multwidth_dim=\dimen193 +\origjot=\skip69 +\l_MT_shortvdotswithinadjustabove_dim=\dimen194 +\l_MT_shortvdotswithinadjustbelow_dim=\dimen195 +\l_MT_above_intertext_sep=\dimen196 +\l_MT_below_intertext_sep=\dimen197 +\l_MT_above_shortintertext_sep=\dimen198 +\l_MT_below_shortintertext_sep=\dimen199 +\xmathstrut@box=\box60 +\xmathstrut@dim=\dimen256 +) (d:/software/texlive/2023/texmf-dist/tex/latex/amscls/amsthm.sty +Package: amsthm 2020/05/29 v2.20.6 +\thm@style=\toks27 +\thm@bodyfont=\toks28 +\thm@headfont=\toks29 +\thm@notefont=\toks30 +\thm@headpunct=\toks31 +\thm@preskip=\skip70 +\thm@postskip=\skip71 +\thm@headsep=\skip72 +\dth@everypar=\toks32 +) (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/frontendlayer/tikz.sty (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/basiclayer/pgf.sty (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/utilities/pgfrcs.sty (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/utilities/pgfutil-common.tex +\pgfutil@everybye=\toks33 +\pgfutil@tempdima=\dimen257 +\pgfutil@tempdimb=\dimen258 +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/utilities/pgfutil-latex.def +\pgfutil@abb=\box61 +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/utilities/pgfrcs.code.tex (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/pgf.revision.tex) +Package: pgfrcs 2023-01-15 v3.1.10 (3.1.10) +)) +Package: pgf 2023-01-15 v3.1.10 (3.1.10) + (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/basiclayer/pgfcore.sty (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/systemlayer/pgfsys.sty (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/systemlayer/pgfsys.code.tex +Package: pgfsys 2023-01-15 v3.1.10 (3.1.10) + (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/utilities/pgfkeys.code.tex +\pgfkeys@pathtoks=\toks34 +\pgfkeys@temptoks=\toks35 + (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/utilities/pgfkeyslibraryfiltered.code.tex +\pgfkeys@tmptoks=\toks36 +)) +\pgf@x=\dimen259 +\pgf@y=\dimen260 +\pgf@xa=\dimen261 +\pgf@ya=\dimen262 +\pgf@xb=\dimen263 +\pgf@yb=\dimen264 +\pgf@xc=\dimen265 +\pgf@yc=\dimen266 +\pgf@xd=\dimen267 +\pgf@yd=\dimen268 +\w@pgf@writea=\write3 +\r@pgf@reada=\read2 +\c@pgf@counta=\count290 +\c@pgf@countb=\count291 +\c@pgf@countc=\count292 +\c@pgf@countd=\count293 +\t@pgf@toka=\toks37 +\t@pgf@tokb=\toks38 +\t@pgf@tokc=\toks39 +\pgf@sys@id@count=\count294 + (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/systemlayer/pgf.cfg +File: pgf.cfg 2023-01-15 v3.1.10 (3.1.10) +) +Driver file for pgf: pgfsys-pdftex.def + (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-pdftex.def +File: pgfsys-pdftex.def 2023-01-15 v3.1.10 (3.1.10) + (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-common-pdf.def +File: pgfsys-common-pdf.def 2023-01-15 v3.1.10 (3.1.10) +))) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/systemlayer/pgfsyssoftpath.code.tex +File: pgfsyssoftpath.code.tex 2023-01-15 v3.1.10 (3.1.10) +\pgfsyssoftpath@smallbuffer@items=\count295 +\pgfsyssoftpath@bigbuffer@items=\count296 +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/systemlayer/pgfsysprotocol.code.tex +File: pgfsysprotocol.code.tex 2023-01-15 v3.1.10 (3.1.10) +)) (d:/software/texlive/2023/texmf-dist/tex/latex/xcolor/xcolor.sty +Package: xcolor 2022/06/12 v2.14 LaTeX color extensions (UK) + (d:/software/texlive/2023/texmf-dist/tex/latex/graphics-cfg/color.cfg +File: color.cfg 2016/01/02 v1.6 sample color configuration +) +Package xcolor Info: Driver file: pdftex.def on input line 227. + (d:/software/texlive/2023/texmf-dist/tex/latex/graphics/mathcolor.ltx) +Package xcolor Info: Model `cmy' substituted by `cmy0' on input line 1353. +Package xcolor Info: Model `hsb' substituted by `rgb' on input line 1357. +Package xcolor Info: Model `RGB' extended on input line 1369. +Package xcolor Info: Model `HTML' substituted by `rgb' on input line 1371. +Package xcolor Info: Model `Hsb' substituted by `hsb' on input line 1372. +Package xcolor Info: Model `tHsb' substituted by `hsb' on input line 1373. +Package xcolor Info: Model `HSB' substituted by `hsb' on input line 1374. +Package xcolor Info: Model `Gray' substituted by `gray' on input line 1375. +Package xcolor Info: Model `wave' substituted by `hsb' on input line 1376. +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcore.code.tex +Package: pgfcore 2023-01-15 v3.1.10 (3.1.10) + (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathutil.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathparser.code.tex +\pgfmath@dimen=\dimen269 +\pgfmath@count=\count297 +\pgfmath@box=\box62 +\pgfmath@toks=\toks40 +\pgfmath@stack@operand=\toks41 +\pgfmath@stack@operation=\toks42 +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.basic.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.trigonometric.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.random.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.comparison.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.base.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.round.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.misc.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.integerarithmetics.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathcalc.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfloat.code.tex +\c@pgfmathroundto@lastzeros=\count298 +)) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfint.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepoints.code.tex +File: pgfcorepoints.code.tex 2023-01-15 v3.1.10 (3.1.10) +\pgf@picminx=\dimen270 +\pgf@picmaxx=\dimen271 +\pgf@picminy=\dimen272 +\pgf@picmaxy=\dimen273 +\pgf@pathminx=\dimen274 +\pgf@pathmaxx=\dimen275 +\pgf@pathminy=\dimen276 +\pgf@pathmaxy=\dimen277 +\pgf@xx=\dimen278 +\pgf@xy=\dimen279 +\pgf@yx=\dimen280 +\pgf@yy=\dimen281 +\pgf@zx=\dimen282 +\pgf@zy=\dimen283 +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathconstruct.code.tex +File: pgfcorepathconstruct.code.tex 2023-01-15 v3.1.10 (3.1.10) +\pgf@path@lastx=\dimen284 +\pgf@path@lasty=\dimen285 +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathusage.code.tex +File: pgfcorepathusage.code.tex 2023-01-15 v3.1.10 (3.1.10) +\pgf@shorten@end@additional=\dimen286 +\pgf@shorten@start@additional=\dimen287 +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorescopes.code.tex +File: pgfcorescopes.code.tex 2023-01-15 v3.1.10 (3.1.10) +\pgfpic=\box63 +\pgf@hbox=\box64 +\pgf@layerbox@main=\box65 +\pgf@picture@serial@count=\count299 +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcoregraphicstate.code.tex +File: pgfcoregraphicstate.code.tex 2023-01-15 v3.1.10 (3.1.10) +\pgflinewidth=\dimen288 +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretransformations.code.tex +File: pgfcoretransformations.code.tex 2023-01-15 v3.1.10 (3.1.10) +\pgf@pt@x=\dimen289 +\pgf@pt@y=\dimen290 +\pgf@pt@temp=\dimen291 +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorequick.code.tex +File: pgfcorequick.code.tex 2023-01-15 v3.1.10 (3.1.10) +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreobjects.code.tex +File: pgfcoreobjects.code.tex 2023-01-15 v3.1.10 (3.1.10) +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathprocessing.code.tex +File: pgfcorepathprocessing.code.tex 2023-01-15 v3.1.10 (3.1.10) +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorearrows.code.tex +File: pgfcorearrows.code.tex 2023-01-15 v3.1.10 (3.1.10) +\pgfarrowsep=\dimen292 +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreshade.code.tex +File: pgfcoreshade.code.tex 2023-01-15 v3.1.10 (3.1.10) +\pgf@max=\dimen293 +\pgf@sys@shading@range@num=\count300 +\pgf@shadingcount=\count301 +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreimage.code.tex +File: pgfcoreimage.code.tex 2023-01-15 v3.1.10 (3.1.10) +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreexternal.code.tex +File: pgfcoreexternal.code.tex 2023-01-15 v3.1.10 (3.1.10) +\pgfexternal@startupbox=\box66 +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorelayers.code.tex +File: pgfcorelayers.code.tex 2023-01-15 v3.1.10 (3.1.10) +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretransparency.code.tex +File: pgfcoretransparency.code.tex 2023-01-15 v3.1.10 (3.1.10) +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepatterns.code.tex +File: pgfcorepatterns.code.tex 2023-01-15 v3.1.10 (3.1.10) +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorerdf.code.tex +File: pgfcorerdf.code.tex 2023-01-15 v3.1.10 (3.1.10) +))) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/modules/pgfmoduleshapes.code.tex +File: pgfmoduleshapes.code.tex 2023-01-15 v3.1.10 (3.1.10) +\pgfnodeparttextbox=\box67 +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/modules/pgfmoduleplot.code.tex +File: pgfmoduleplot.code.tex 2023-01-15 v3.1.10 (3.1.10) +) (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version-0-65.sty +Package: pgfcomp-version-0-65 2023-01-15 v3.1.10 (3.1.10) +\pgf@nodesepstart=\dimen294 +\pgf@nodesepend=\dimen295 +) (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version-1-18.sty +Package: pgfcomp-version-1-18 2023-01-15 v3.1.10 (3.1.10) +)) (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/utilities/pgffor.sty (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/utilities/pgfkeys.sty (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/utilities/pgfkeys.code.tex)) (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/math/pgfmath.sty (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex)) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/utilities/pgffor.code.tex +Package: pgffor 2023-01-15 v3.1.10 (3.1.10) +\pgffor@iter=\dimen296 +\pgffor@skip=\dimen297 +\pgffor@stack=\toks43 +\pgffor@toks=\toks44 +)) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/frontendlayer/tikz/tikz.code.tex +Package: tikz 2023-01-15 v3.1.10 (3.1.10) + (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/libraries/pgflibraryplothandlers.code.tex +File: pgflibraryplothandlers.code.tex 2023-01-15 v3.1.10 (3.1.10) +\pgf@plot@mark@count=\count302 +\pgfplotmarksize=\dimen298 +) +\tikz@lastx=\dimen299 +\tikz@lasty=\dimen300 +\tikz@lastxsaved=\dimen301 +\tikz@lastysaved=\dimen302 +\tikz@lastmovetox=\dimen303 +\tikz@lastmovetoy=\dimen304 +\tikzleveldistance=\dimen305 +\tikzsiblingdistance=\dimen306 +\tikz@figbox=\box68 +\tikz@figbox@bg=\box69 +\tikz@tempbox=\box70 +\tikz@tempbox@bg=\box71 +\tikztreelevel=\count303 +\tikznumberofchildren=\count304 +\tikznumberofcurrentchild=\count305 +\tikz@fig@count=\count306 + (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/modules/pgfmodulematrix.code.tex +File: pgfmodulematrix.code.tex 2023-01-15 v3.1.10 (3.1.10) +\pgfmatrixcurrentrow=\count307 +\pgfmatrixcurrentcolumn=\count308 +\pgf@matrix@numberofcolumns=\count309 +) +\tikz@expandcount=\count310 + (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/frontendlayer/tikz/libraries/tikzlibrarytopaths.code.tex +File: tikzlibrarytopaths.code.tex 2023-01-15 v3.1.10 (3.1.10) +))) (d:/software/texlive/2023/texmf-dist/tex/latex/tools/bm.sty +Package: bm 2022/01/05 v1.2f Bold Symbol Support (DPC/FMi) +\symboldoperators=\mathgroup6 +\symboldletters=\mathgroup7 +\symboldsymbols=\mathgroup8 +Package bm Info: No bold for \OMX/cmex/m/n, using \pmb. +Package bm Info: No bold for \U/msa/m/n, using \pmb. +Package bm Info: No bold for \U/msb/m/n, using \pmb. +LaTeX Font Info: Redeclaring math alphabet \mathbf on input line 149. +) (d:/software/texlive/2023/texmf-dist/tex/latex/esvect/esvect.sty +Package: esvect +\symesvector=\mathgroup9 +) (d:/software/texlive/2023/texmf-dist/tex/latex/multirow/multirow.sty +Package: multirow 2021/03/15 v2.8 Span multiple rows of a table +\multirow@colwidth=\skip73 +\multirow@cntb=\count311 +\multirow@dima=\skip74 +\bigstrutjot=\dimen307 +) +\c@theorem=\count312 + (d:/software/texlive/2023/texmf-dist/tex/latex/newfloat/newfloat.sty +Package: newfloat 2019/09/02 v1.1l Defining new floating environments (AR) +) (d:/software/texlive/2023/texmf-dist/tex/latex/listings/listings.sty +\lst@mode=\count313 +\lst@gtempboxa=\box72 +\lst@token=\toks45 +\lst@length=\count314 +\lst@currlwidth=\dimen308 +\lst@column=\count315 +\lst@pos=\count316 +\lst@lostspace=\dimen309 +\lst@width=\dimen310 +\lst@newlines=\count317 +\lst@lineno=\count318 +\lst@maxwidth=\dimen311 + (d:/software/texlive/2023/texmf-dist/tex/latex/listings/lstmisc.sty +File: lstmisc.sty 2023/02/27 1.9 (Carsten Heinz) +\c@lstnumber=\count319 +\lst@skipnumbers=\count320 +\lst@framebox=\box73 +) (d:/software/texlive/2023/texmf-dist/tex/latex/listings/listings.cfg +File: listings.cfg 2023/02/27 1.9 listings configuration +)) +Package: listings 2023/02/27 1.9 (Carsten Heinz) +\@float@every@listing=\toks46 +\c@listing=\count321 + (d:/software/texlive/2023/texmf-dist/tex/latex/natbib/bibentry.sty +Package: bibentry 2007/10/30 1.5 (PWD) +) +LaTeX Font Info: Trying to load font information for OT1+ptm on input line 192. + (d:/software/texlive/2023/texmf-dist/tex/latex/psnfss/ot1ptm.fd +File: ot1ptm.fd 2001/06/04 font definitions for OT1/ptm. +) (d:/software/texlive/2023/texmf-dist/tex/latex/l3backend/l3backend-pdftex.def +File: l3backend-pdftex.def 2023-01-16 L3 backend support: PDF output (pdfTeX) +\l__color_backend_stack_int=\count322 +\l__pdf_internal_box=\box74 +) (./anonymous-submission-latex-2025.aux) +\openout1 = `anonymous-submission-latex-2025.aux'. + +LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 192. +LaTeX Font Info: ... okay on input line 192. +LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 192. +LaTeX Font Info: ... okay on input line 192. +LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 192. +LaTeX Font Info: ... okay on input line 192. +LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 192. +LaTeX Font Info: ... okay on input line 192. +LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 192. +LaTeX Font Info: ... okay on input line 192. +LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 192. +LaTeX Font Info: ... okay on input line 192. +LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 192. +LaTeX Font Info: ... okay on input line 192. + (d:/software/texlive/2023/texmf-dist/tex/context/base/mkii/supp-pdf.mkii +[Loading MPS to PDF converter (version 2006.09.02).] +\scratchcounter=\count323 +\scratchdimen=\dimen312 +\scratchbox=\box75 +\nofMPsegments=\count324 +\nofMParguments=\count325 +\everyMPshowfont=\toks47 +\MPscratchCnt=\count326 +\MPscratchDim=\dimen313 +\MPnumerator=\count327 +\makeMPintoPDFobject=\count328 +\everyMPtoPDFconversion=\toks48 +) (d:/software/texlive/2023/texmf-dist/tex/latex/epstopdf-pkg/epstopdf-base.sty +Package: epstopdf-base 2020-01-24 v2.11 Base part for package epstopdf +Package epstopdf-base Info: Redefining graphics rule for `.eps' on input line 485. + (d:/software/texlive/2023/texmf-dist/tex/latex/latexconfig/epstopdf-sys.cfg +File: epstopdf-sys.cfg 2010/07/13 v1.3 Configuration of (r)epstopdf for TeX Live +)) +Package caption Info: Begin \AtBeginDocument code. +Package caption Info: float package is loaded. +Package caption Info: listings package is loaded. +Package caption Info: subfigure package is loaded. +Package caption Info: End \AtBeginDocument code. +Package newfloat Info: `float' package detected. +\c@lstlisting=\count329 +\c@eqfn=\count330 +\titlearea=\box76 +LaTeX Font Info: Trying to load font information for U+msa on input line 194. + (d:/software/texlive/2023/texmf-dist/tex/latex/amsfonts/umsa.fd +File: umsa.fd 2013/01/14 v3.01 AMS symbols A +) +LaTeX Font Info: Trying to load font information for U+msb on input line 194. + (d:/software/texlive/2023/texmf-dist/tex/latex/amsfonts/umsb.fd +File: umsb.fd 2013/01/14 v3.01 AMS symbols B +) +LaTeX Font Info: Trying to load font information for U+esvect on input line 194. + (d:/software/texlive/2023/texmf-dist/tex/latex/esvect/uesvect.fd +File: uesvect.fd +) +\actualheight=\skip75 + (./main/introduction.tex +Underfull \hbox (badness 3884) in paragraph at lines 39--52 +[]\OT1/ptm/m/n/10 Algorithm sta-bil-ity is promi-nently re-flected in the + [] + +[1{d:/software/texlive/2023/texmf-var/fonts/map/pdftex/updmap/pdftex.map}{d:/software/texlive/2023/texmf-dist/fonts/enc/dvips/base/8r.enc} + + + +]) (./main/preliminaries.tex) (./main/motivation.tex +Overfull \hbox (8.45978pt too wide) detected at line 89 +[][] + [] + +[2] +Overfull \hbox (10.79735pt too wide) detected at line 209 +[][] + [] + +[3]) (./main/theory.tex +Underfull \hbox (badness 2035) in paragraph at lines 122--127 +\OT1/ptm/m/n/10 2000) are ver-i-fied. Fur-ther-more, As-sump-tions (TS) of + [] + +[4]) (./main/experiment.tex (./main/pic/randomwalk.tex) (./main/pic/BairdExample.tex) [5] +
+File: main/pic/maze_13_13.pdf Graphic file (type pdf) + +Package pdftex.def Info: main/pic/maze_13_13.pdf used on input line 63. +(pdftex.def) Requested size: 98.63116pt x 77.52382pt. +) (./main/relatedwork.tex + +LaTeX Warning: Reference `differenceRandVMQ' on page 6 undefined on input line 4. + +) (./main/conclusion.tex) (./anonymous-submission-latex-2025.bbl [6 <./main/pic/maze_13_13.pdf>]) [7] (./anonymous-submission-latex-2025.aux) + +LaTeX Warning: There were undefined references. + + ) +Here is how much of TeX's memory you used: + 19106 strings out of 476025 + 367928 string characters out of 5789524 + 1890382 words of memory out of 5000000 + 39164 multiletter control sequences out of 15000+600000 + 548871 words of font info for 115 fonts, out of 8000000 for 9000 + 1141 hyphenation exceptions out of 8191 + 84i,17n,89p,423b,1058s stack positions out of 10000i,1000n,20000p,200000b,200000s + +Output written on anonymous-submission-latex-2025.pdf (7 pages, 295290 bytes). +PDF statistics: + 155 PDF objects out of 1000 (max. 8388607) + 96 compressed objects within 1 object stream + 0 named destinations out of 1000 (max. 500000) + 18 words of extra memory for PDF output out of 10000 (max. 10000000) + diff --git b/AnonymousSubmission/anonymous-submission-latex-2025.pdf a/AnonymousSubmission/anonymous-submission-latex-2025.pdf new file mode 100644 index 0000000..f8e0690 Binary files /dev/null and a/AnonymousSubmission/anonymous-submission-latex-2025.pdf differ diff --git b/AnonymousSubmission/anonymous-submission-latex-2025.synctex.gz a/AnonymousSubmission/anonymous-submission-latex-2025.synctex.gz new file mode 100644 index 0000000..7afce30 Binary files /dev/null and a/AnonymousSubmission/anonymous-submission-latex-2025.synctex.gz differ diff --git b/AnonymousSubmission/anonymous-submission-latex-2025.tex a/AnonymousSubmission/anonymous-submission-latex-2025.tex new file mode 100644 index 0000000..015428c --- /dev/null +++ a/AnonymousSubmission/anonymous-submission-latex-2025.tex @@ -0,0 +1,230 @@ +%File: anonymous-submission-latex-2025.tex +\documentclass[letterpaper]{article} % DO NOT CHANGE THIS +\usepackage[submission]{aaai25} % DO NOT CHANGE THIS +\usepackage{times} % DO NOT CHANGE THIS +\usepackage{helvet} % DO NOT CHANGE THIS +\usepackage{courier} % DO NOT CHANGE THIS +\usepackage[hyphens]{url} % DO NOT CHANGE THIS +\usepackage{graphicx} % DO NOT CHANGE THIS +\urlstyle{rm} % DO NOT CHANGE THIS +\def\UrlFont{\rm} % DO NOT CHANGE THIS +\usepackage{natbib} % DO NOT CHANGE THIS AND DO NOT ADD ANY OPTIONS TO IT +\usepackage{caption} % DO NOT CHANGE THIS AND DO NOT ADD ANY OPTIONS TO IT +\frenchspacing % DO NOT CHANGE THIS +\setlength{\pdfpagewidth}{8.5in} % DO NOT CHANGE THIS +\setlength{\pdfpageheight}{11in} % DO NOT CHANGE THIS +% +% These are recommended to typeset algorithms but not required. See the subsubsection on algorithms. Remove them if you don't have algorithms in your paper. +\usepackage{algorithm} +\usepackage{algorithmic} +\usepackage{subfigure} +\usepackage{diagbox} +\usepackage{booktabs} +\usepackage{amsmath} +\usepackage{amssymb} +\usepackage{mathtools} +\usepackage{amsthm} +\usepackage{tikz} +\usepackage{bm} +\usepackage{esvect} +\usepackage{multirow} + +\theoremstyle{plain} +% \newtheorem{theorem}{Theorem}[section] +\newtheorem{theorem}{Theorem} +\newtheorem{proposition}[theorem]{Proposition} +\newtheorem{lemma}[theorem]{Lemma} +\newtheorem{corollary}[theorem]{Corollary} +\theoremstyle{definition} +\newtheorem{definition}[theorem]{Definition} +\newtheorem{assumption}[theorem]{Assumption} +\theoremstyle{remark} +\newtheorem{remark}[theorem]{Remark} + + + + + + +% +% These are are recommended to typeset listings but not required. See the subsubsection on listing. Remove this block if you don't have listings in your paper. +\usepackage{newfloat} +\usepackage{listings} +\DeclareCaptionStyle{ruled}{labelfont=normalfont,labelsep=colon,strut=off} % DO NOT CHANGE THIS +\lstset{% + basicstyle={\footnotesize\ttfamily},% footnotesize acceptable for monospace + numbers=left,numberstyle=\footnotesize,xleftmargin=2em,% show line numbers, remove this entire line if you don't want the numbers. + aboveskip=0pt,belowskip=0pt,% + showstringspaces=false,tabsize=2,breaklines=true} +\floatstyle{ruled} +\newfloat{listing}{tb}{lst}{} +\floatname{listing}{Listing} +% +% Keep the \pdfinfo as shown here. There's no need +% for you to add the /Title and /Author tags. +\pdfinfo{ +/TemplateVersion (2025.1) +} + +% DISALLOWED PACKAGES +% \usepackage{authblk} -- This package is specifically forbidden +% \usepackage{balance} -- This package is specifically forbidden +% \usepackage{color (if used in text) +% \usepackage{CJK} -- This package is specifically forbidden +% \usepackage{float} -- This package is specifically forbidden +% \usepackage{flushend} -- This package is specifically forbidden +% \usepackage{fontenc} -- This package is specifically forbidden +% \usepackage{fullpage} -- This package is specifically forbidden +% \usepackage{geometry} -- This package is specifically forbidden +% \usepackage{grffile} -- This package is specifically forbidden +% \usepackage{hyperref} -- This package is specifically forbidden +% \usepackage{navigator} -- This package is specifically forbidden +% (or any other package that embeds links such as navigator or hyperref) +% \indentfirst} -- This package is specifically forbidden +% \layout} -- This package is specifically forbidden +% \multicol} -- This package is specifically forbidden +% \nameref} -- This package is specifically forbidden +% \usepackage{savetrees} -- This package is specifically forbidden +% \usepackage{setspace} -- This package is specifically forbidden +% \usepackage{stfloats} -- This package is specifically forbidden +% \usepackage{tabu} -- This package is specifically forbidden +% \usepackage{titlesec} -- This package is specifically forbidden +% \usepackage{tocbibind} -- This package is specifically forbidden +% \usepackage{ulem} -- This package is specifically forbidden +% \usepackage{wrapfig} -- This package is specifically forbidden +% DISALLOWED COMMANDS +% \nocopyright -- Your paper will not be published if you use this command +% \addtolength -- This command may not be used +% \balance -- This command may not be used +% \baselinestretch -- Your paper will not be published if you use this command +% \clearpage -- No page breaks of any kind may be used for the final version of your paper +% \columnsep -- This command may not be used +% \newpage -- No page breaks of any kind may be used for the final version of your paper +% \pagebreak -- No page breaks of any kind may be used for the final version of your paperr +% \pagestyle -- This command may not be used +% \tiny -- This is not an acceptable font size. +% \vspace{- -- No negative value may be used in proximity of a caption, figure, table, section, subsection, subsubsection, or reference +% \vskip{- -- No negative value may be used to alter spacing above or below a caption, figure, table, section, subsection, subsubsection, or reference + +\setcounter{secnumdepth}{0} %May be changed to 1 or 2 if section numbers are desired. + +% The file aaai25.sty is the style file for AAAI Press +% proceedings, working notes, and technical reports. +% + +% Title + +% Your title must be in mixed case, not sentence case. +% That means all verbs (including short verbs like be, is, using,and go), +% nouns, adverbs, adjectives should be capitalized, including both words in hyphenated terms, while +% articles, conjunctions, and prepositions are lower case unless they +% directly follow a colon or long dash +\title{AAAI Press Anonymous Submission\\Instructions for Authors Using \LaTeX{}} +\author{ + %Authors + % All authors must be in the same font size and format. + Written by AAAI Press Staff\textsuperscript{\rm 1}\thanks{With help from the AAAI Publications Committee.}\\ + AAAI Style Contributions by Pater Patel Schneider, + Sunil Issar,\\ + J. Scott Penberthy, + George Ferguson, + Hans Guesgen, + Francisco Cruz\equalcontrib, + Marc Pujol-Gonzalez\equalcontrib +} +\affiliations{ + %Afiliations + \textsuperscript{\rm 1}Association for the Advancement of Artificial Intelligence\\ + % If you have multiple authors and multiple affiliations + % use superscripts in text and roman font to identify them. + % For example, + + % Sunil Issar\textsuperscript{\rm 2}, + % J. Scott Penberthy\textsuperscript{\rm 3}, + % George Ferguson\textsuperscript{\rm 4}, + % Hans Guesgen\textsuperscript{\rm 5} + % Note that the comma should be placed after the superscript + + 1101 Pennsylvania Ave, NW Suite 300\\ + Washington, DC 20004 USA\\ + % email address must be in roman text type, not monospace or sans serif + proceedings-questions@aaai.org +% +% See more examples next +} + +%Example, Single Author, ->> remove \iffalse,\fi and place them surrounding AAAI title to use it +\iffalse +\title{My Publication Title --- Single Author} +\author { + Author Name +} +\affiliations{ + Affiliation\\ + Affiliation Line 2\\ + name@example.com +} +\fi + +\iffalse +%Example, Multiple Authors, ->> remove \iffalse,\fi and place them surrounding AAAI title to use it +\title{My Publication Title --- Multiple Authors} +\author { + % Authors + First Author Name\textsuperscript{\rm 1}, + Second Author Name\textsuperscript{\rm 2}, + Third Author Name\textsuperscript{\rm 1} +} +\affiliations { + % Affiliations + \textsuperscript{\rm 1}Affiliation 1\\ + \textsuperscript{\rm 2}Affiliation 2\\ + firstAuthor@affiliation1.com, secondAuthor@affilation2.com, thirdAuthor@affiliation1.com +} +\fi + + +% REMOVE THIS: bibentry +% This is only needed to show inline citations in the guidelines document. You should not need it and can safely delete it. +\usepackage{bibentry} +% END REMOVE bibentry + +\begin{document} +\setcounter{theorem}{0} +\maketitle +% \setcounter{theorem}{0} +\begin{abstract} + The existing research on + value-based reinforcement learning also minimizes the error. + However, is error minimization really the only option + for value-based reinforcement learning? + We can easily observe that the policy on action + choosing probabilities is often related to the relative values, + and has nothing to do with their absolute values. + Based on this observation, we propose the objective + of variance minimization instead of error minimization, + derive many new variance minimization algorithms, both including a traditional parameter $\omega$, + and conduct an analysis of the convergence rate and experiments. + The experimental results show that our proposed variance minimization algorithms + converge much faster. +\end{abstract} + +% Uncomment the following to link to your code, datasets, an extended version or similar. +% +% \begin{links} +% \link{Code}{https://aaai.org/example/code} +% \link{Datasets}{https://aaai.org/example/datasets} +% \link{Extended version}{https://aaai.org/example/extended-version} +% \end{links} + +\input{main/introduction.tex} +\input{main/preliminaries.tex} +\input{main/motivation.tex} +\input{main/theory.tex} +\input{main/experiment.tex} +\input{main/relatedwork.tex} +\input{main/conclusion.tex} + +\bibliography{aaai25} + +\end{document} diff --git b/AnonymousSubmission/figure1.pdf a/AnonymousSubmission/figure1.pdf new file mode 100644 index 0000000..76b3c1f Binary files /dev/null and a/AnonymousSubmission/figure1.pdf differ diff --git b/AnonymousSubmission/figure2.pdf a/AnonymousSubmission/figure2.pdf new file mode 100644 index 0000000..8353d7c Binary files /dev/null and a/AnonymousSubmission/figure2.pdf differ diff --git b/AnonymousSubmission/main/conclusion.tex a/AnonymousSubmission/main/conclusion.tex new file mode 100644 index 0000000..daa3e1f --- /dev/null +++ a/AnonymousSubmission/main/conclusion.tex @@ -0,0 +1,18 @@ +\section{Conclusion and Future Work} +Value-based reinforcement learning typically aims +to minimize error as an optimization objective. +As an alternation, this study proposes new objective +functions: VBE and VPBE, and derives many variance minimization algorithms, including VMTD, +VMTDC and VMETD. +% The VMTD algorithm +% is essentially an adjustment or correction to the traditional +% TD update. +% Both +% algorithms are capable of stabilizing gradient estimation, reducing +% the variance of gradient estimation and accelerating convergence. +All algorithms demonstrated superior performance in policy +evaluation and control experiments. +Future work may include, but are not limited +to, (1) analysis of the convergence rate of VMTDC and VMETD. +(2) extensions of VBE and VPBE to multi-step returns. +(3) extensions to nonlinear approximations, such as neural networks. \ No newline at end of file diff --git b/AnonymousSubmission/main/experiment.tex a/AnonymousSubmission/main/experiment.tex new file mode 100644 index 0000000..955abdd --- /dev/null +++ a/AnonymousSubmission/main/experiment.tex @@ -0,0 +1,196 @@ +\section{Experimental Studies} +This section assesses algorithm performance through experiments, +which are divided into policy evaluation experiments and control experiments. + +\subsection{Testing Tasks} +\textbf{Random-walk:} as shown in Figure \ref{randomwalk}, all episodes +start in the center state, $C$, and proceed to left or right by one state on each +step, equiprobably. Episodes terminate either on the extreme left or +the extreme right, and get a reward of $+1$ if terminate on the right, or +$0$ in the other case. In this task, the true value for each state is the +probability of starting from that state and terminating on the right +\cite{Sutton2018book}. +Thus, the true values of states from $A$ to $E$ are +$\frac{1}{6},\frac{2}{6},\frac{3}{6},\frac{4}{6},\frac{5}{6}$, respectively. +The discount factor $\gamma=1.0$. +There are three standard kinds of features for random-walk problems: tabular +feature, inverted feature and dependent feature \cite{sutton2009fast}. +The feature matrices corresponding to three random walks are shown in Appendix. +Conduct experiments using +an on-policy approach in the Random-walk environment. +\begin{figure} + \begin{center} + \input{main/pic/randomwalk.tex} + \caption{Random walk.} + \label{randomwalk} + \end{center} +\end{figure} +\begin{figure} + \begin{center} + \input{main/pic/BairdExample.tex} + \caption{7-state version of Baird's off-policy counterexample.} + \label{bairdexample} + \end{center} +\end{figure} + +\textbf{Baird's off-policy counterexample:} This task is well known as a +counterexample, in which TD diverges \cite{baird1995residual,sutton2009fast}. As +shown in Figure \ref{bairdexample}, reward for each transition is zero. Thus the true values are zeros for all states and for any given policy. The behaviour policy +chooses actions represented by solid lines with a probability of $\frac{1}{7}$ +and actions represented by dotted lines with a probability of $\frac{6}{7}$. The +target policy is expected to choose the solid line with more probability than $\frac{1}{7}$, +and it chooses the solid line with probability of $1$ in this paper. + The discount factor $\gamma =0.99$, and the feature matrix is +% defined in Appendix \ref{experimentaldetails} \cite{baird1995residual,sutton2009fast,maei2011gradient}. +defined in Appendix. + +\textbf{Maze}: The learning agent should find a shortest path from the upper +left corner to the lower right corner. In each state, +there are four alternative actions: $up$, $down$, $left$, and $right$, which +takes the agent deterministically to the corresponding neighbour state, except when +% \begin{wrapfigure}{r}{3cm} +% \centering +% \includegraphics[scale=0.15]{main/pic/maze_13_13.pdf} +% % \caption{The 2-state counterexample.} +% \end{wrapfigure} + a movement is blocked by an obstacle or the edge +of the maze. Rewards are $-1$ in all transitions until the +agent reaches the goal state. +The discount factor $\gamma=0.99$, and states $s$ are represented by tabular +features.The maximum number of moves in the game is set to 1000. + \begin{figure} +\centering +\includegraphics[scale=0.20]{main/pic/maze_13_13.pdf} +\caption{Maze.} + \end{figure} + +\textbf{The other three control environments}: Cliff Walking, Mountain Car, and Acrobot are +selected from the gym official website and correspond to the following +versions: ``CliffWalking-v0'', ``MountainCar-v0'' and ``Acrobot-v1''. +For specific details, please refer to the gym official website. +The maximum number of steps for the Mountain Car environment is set to 1000, +while the default settings are used for the other two environments. In Mountain car and Acrobot, features are generated by tile coding. + +Please, refer to the Appendix for the selection of learning rates for all experiments. + +\subsection{Experimental Results and Analysis} +% \begin{figure}[htb] +% \vskip 0.2in +% \begin{center} +% \subfigure[Dependent]{ +% \includegraphics[width=0.4\columnwidth, height=0.3\columnwidth]{main/pic/dependent_new.pdf} +% \label{DependentFull} +% } +% \subfigure[Tabular]{ +% \includegraphics[width=0.4\columnwidth, height=0.3\columnwidth]{main/pic/tabular_new.pdf} +% \label{TabularFull} +% } +% \\ +% \subfigure[Inverted]{ +% \includegraphics[width=0.4\columnwidth, height=0.3\columnwidth]{main/pic/inverted_new.pdf} +% \label{InvertedFull} +% } +% \subfigure[counterexample]{ +% \includegraphics[width=0.4\columnwidth, height=0.3\columnwidth]{main/pic/counterexample_quanju_new.pdf} +% \label{CounterExampleFull} +% } +% \caption{Learning curses of four evaluation environments.} +% \label{Evaluation_full} +% \end{center} +% \vskip -0.2in +% \end{figure} +% \begin{figure*}[htb] +% \vskip 0.2in +% \begin{center} +% \subfigure[Maze]{ +% \includegraphics[width=0.55\columnwidth, height=0.4\columnwidth]{main/pic/maze_complete.pdf} +% \label{MazeFull} +% } +% \subfigure[Cliff Walking]{ +% \includegraphics[width=0.55\columnwidth, height=0.4\columnwidth]{main/pic/cw_complete.pdf} +% \label{CliffWalkingFull} +% } +% \\ +% \subfigure[Mountain Car]{ +% \includegraphics[width=0.55\columnwidth, height=0.4\columnwidth]{main/pic/mt_complete.pdf} +% \label{MountainCarFull} +% } +% \subfigure[Acrobot]{ +% \includegraphics[width=0.55\columnwidth, height=0.4\columnwidth]{main/pic/Acrobot_complete.pdf} +% \label{AcrobotFull} +% } +% \caption{Learning curses of four contral environments.} +% \label{Complete_full} +% \end{center} +% \vskip -0.2in +% \end{figure*} +% \begin{table*}[htb] +% \centering +% \caption{Difference between R-learning and tabular VMQ.} +% \vskip 0.15in +% \begin{tabular}{c|cc} +% \hline +% algorithms&update formula \\ +% \hline +% R-learning&$Q_{k+1}(s,a)\leftarrow Q_{k}(s,a)+\alpha_k(r_{k+1}-m_{k}+ \max_{b\in A}Q_{k}(s,b) - Q_{k}(s,a))$\\ +% &$m_{k+1}\leftarrow m_{k}+\beta_k(r_{k+1}+\max_{b\in A}Q_{k}(s,b) - Q_{k}(s,a)-m_{k})$\\ +% tabular VMQ&$Q_{k+1}(s,a)\leftarrow Q_{k}(s,a)+\alpha_k(r_{k+1}+\gamma \max_{b\in A}Q_{k}(s,b) - Q_{k}(s,a)-\omega_k)$\\ +% &$\omega_{k+1}\leftarrow \omega_{k}+\beta_k(r_{k+1}+\gamma \max_{b\in A}Q_{k}(s,b) - Q_{k}(s,a)-\omega_{k})$\\ +% \hline +% \end{tabular} +% \label{differenceRandVMQ} +% \vskip -0.1in +% \end{table*} + +The experiment needs further elaboration. +% For policy evaluation experiments, compare the performance of the VMTD, +% VMTDC, TD, and TDC algorithms. +% The vertical axis is unified as RVBE. + +% For policy evaluation experiments, the criteria for evaluating +% algorithms vary. The objective function minimized by our proposed +% new algorithm differs from that of other algorithms. Therefore, to +% ensure fairness in comparisons, this study only contrasts algorithm +% experiments in controlled settings. + +% This study will compare the performance of Sarsa, Q-learning, GQ(0), +% AC, VMSarsa, VMQ, and VMGQ(0) in four control environments. +% % All experiments involved in this paper were run independently for 100 times. + +% The learning curses of the algorithms corresponding to +% policy evaluation experiments and control experiments are +% shown in Figures \ref{Evaluation_full} and \ref{Complete_full}, respectively. +% The shaded area in Figure \ref{Evaluation_full}, \ref{Complete_full} represents the standard deviation (std). + +% In the random-walk tasks, VMTD and VMTDC exhibit excellent performance, +% outperforming TD and TDC in the case of dependent random-walk. + +% In the 7-state example counter task, TD diverges, +% while VMTDC converges and performs better than TDC. +% From the update formula, it can be observed that the VMTD algorithm, like TDC, +% is also an adjustment or correction of the TD update. +% What is more surprising is that VMTD also maintains +% convergence and demonstrates the best performance. + +% In Maze, Mountain Car, and Acrobot, +% the convergence speed of VMSarsa, VMQ, and VMGQ(0) has +% been significantly improved compared to Sarsa, Q-learning, +% and GQ(0), respectively. The performance of the AC algorithm +% is at an intermediate level. The performances of VMSarsa, +% VMQ, and VMGQ(0) in these three experimental environments +% have no significant differences. + + +% In Cliff Walking, Sarsa and +% VMSarsa converge to slightly worse solutions compared to +% other algorithms. The convergence speed of VMSarsa is significantly +% better than that of Sarsa. The convergence speed of VMGQ(0) and VMQ +% is better than other algorithms, and the performance of VMGQ(0) is +% slightly better than that of VMQ. + +% In summary, the performance of VMSarsa, +% VMQ, and VMGQ(0) is better than that of other algorithms. +% In the Cliff Walking environment, +% the performance of VMGQ(0) is slightly better than that of +% VMSarsa and VMQ. In the other three experimental environments, +% the performances of VMSarsa, VMQ, and VMGQ(0) are close. \ No newline at end of file diff --git b/AnonymousSubmission/main/introduction.tex a/AnonymousSubmission/main/introduction.tex new file mode 100644 index 0000000..eec0038 --- /dev/null +++ a/AnonymousSubmission/main/introduction.tex @@ -0,0 +1,94 @@ +\section{Introduction} +\label{introduction} +Reinforcement learning can be mainly divided into two +categories: value-based reinforcement learning +and policy gradient-based reinforcement learning. This +paper focuses on temporal difference learning based on +linear approximated valued functions. Its research is +usually divided into two steps: the first step is to establish the convergence of the algorithm, and the second +step is to accelerate the algorithm. + + +In terms of stability, \citet{sutton1988learning} established the + convergence of on-policy TD(0), and \citet{tsitsiklis1997analysis} + established the convergence of on-policy TD($\lambda$). + However, ``The deadly triad'' consisting of off-policy learning, + bootstrapping, and function approximation makes + the stability a difficult problem \citep{Sutton2018book}. + To solve this problem, convergent off-policy temporal difference + learning algorithms are proposed, e.g., BR \cite{baird1995residual}, + GTD \cite{sutton2008convergent}, GTD2 and TDC \cite{sutton2009fast}, + ETD \cite{sutton2016emphatic}, and MRetrace \cite{chen2023modified}. + +In terms of acceleration, \citet{hackman2012faster} +proposed Hybrid TD algorithm with on-policy matrix. +\citet{liu2015finite,liu2016proximal,liu2018proximal} proposed +true stochastic algorithms, i.e., GTD-MP and GTD2-MP, from +a convex-concave saddle-point formulation. +Second-order methods are used to accelerate TD learning, +e.g., Quasi Newton TD \cite{givchi2015quasi} and +accelerated TD (ATD) \citep{pan2017accelerated}. +\citet{hallak2016generalized} introduced an new parameter +to reduce variance for ETD. +\citet{zhang2022truncated} proposed truncated ETD with a lower variance. +Variance Reduced TD with direct variance reduction technique \citep{johnson2013accelerating} is proposed by \cite{korda2015td} +and analysed by \cite{xu2019reanalysis}. +How to further improve the convergence rates of reinforcement learning +algorithms is currently still an open problem. + +Algorithm stability is prominently reflected in the changes +to the objective function, transitioning from mean squared +errors (MSE) \citep{Sutton2018book} to mean squared bellman errors (MSBE) \cite{baird1995residual}, then to +norm of the expected TD update \cite{sutton2009fast}, and further to +mean squared projected Bellman errors (MSPBE) \cite{sutton2009fast}. On the other hand, algorithm +acceleration is more centered around optimizing the iterative +update formula of the algorithm itself without altering the +objective function, thereby speeding up the convergence rate +of the algorithm. The emergence of new optimization objective +functions often leads to the development of novel algorithms. +The introduction of new algorithms, in turn, tends to inspire +researchers to explore methods for accelerating algorithms, +leading to the iterative creation of increasingly superior algorithms. + +The kernel loss function can be optimized using standard +gradient-based methods, addressing the issue of double +sampling in residual gradient algorithm \cite{feng2019kernel}. It ensures convergence +in both on-policy and off-policy scenarios. The logistic bellman +error is convex and smooth in the action-value function parameters, +with bounded gradients \cite{basserrano2021logistic}. In contrast, the squared Bellman error is +not convex in the action-value function parameters, and RL algorithms +based on recursive optimization using it are known to be unstable. + + +% The value-based algorithms mentioned above aim to +% minimize some errors, e.g., mean squared errors \citep{Sutton2018book}, +% mean squared Bellman errors \cite{baird1995residual}, norm +% of the expected TD update \cite{sutton2009fast}, +% mean squared projected Bellman errors (MSPBE) \cite{sutton2009fast}, etc. +It is necessary to propose a new objective function, but the mentioned objective functions above are all some form of error. +Is minimizing error the only option for value-based reinforcement learning? + +For policy evaluation experiments, +differences in objective functions may result +in inconsistent fixed points. This inconsistency +makes it difficult to uniformly compare the superiority +of algorithms derived from different objective functions. +However, for control experiments, since the choice of actions +depends on the relative values of the Q values rather than their + absolute values, the presence of solution bias is acceptable. + +Based on this observation, we propose alternate objective functions +instead of minimizing errors. We minimize Variance of Bellman Error (VBE) and +Variance of Projected Bellman Error (VPBE) +and derive Variance Minimization (VM) algorithms. +These algorithms preserve the invariance of the optimal policy in the control environments, +but significantly reduce the variance of gradient estimation, +and thus hastening convergence. + +The contributions of this paper are as follows: +(1) Introduction of novel objective functions based on +the invariance of the optimal policy. +(2) Derived three variance minimization algorithms, including on-policy and off-policy. +(3) Proof of their convergence. +(4) Analysis of the convergence rate of on-policy algorithm. +(5) Experiments demonstrating the faster convergence speed of the proposed algorithms. diff --git b/AnonymousSubmission/main/motivation.tex a/AnonymousSubmission/main/motivation.tex new file mode 100644 index 0000000..a5bf5bb --- /dev/null +++ a/AnonymousSubmission/main/motivation.tex @@ -0,0 +1,288 @@ +\section{Variance Minimization Algorithms} +\subsection{Motivation} + As shown +in Table \ref{example_bias}, although there is a bias between the +true value and the predicted value, action $a_3$ is +still chosen under the greedy-policy. +On the contrary, supervised learning is usually used to predict temperature, humidity, morbidity, etc. If the bias is too large, the consequences could be serious. + +\begin{table}[t] + \caption{Classification accuracies for naive Bayes and flexible + Bayes on various data sets.} + \label{example_bias} + \vskip 0.15in + \begin{center} + \begin{small} + \begin{sc} + \begin{tabular}{lcccr} + \toprule + action & $Q$ value & $Q$ value with bias \\ + \midrule + $Q(s, a_0)$ & 1& 5 \\ + $Q(s, a_1)$ & 2& 6 \\ + $Q(s, a_2)$ & 3& 7 \\ + $Q(s, a_3)$ & 4& 8 \\ + $\arg \min_{a}Q(s,a)$ & $a_3$& $a_3$\\ + \bottomrule + \end{tabular} + \end{sc} + \end{small} + \end{center} + \vskip -0.1in +\end{table} + +In addition, reward shaping can significantly speed up the learning by adding a shaping +reward $F(s,s')$ to the original reward $r$, +where $F(s,s')$ is the general form of any state-based shaping reward. +Static potential-based reward shaping (Static PBRS) maintains the policy invariance if the +shaping reward follows from $F(s,s')=\gamma +f(s')-f(s)$ \cite{ng1999policy}. + +This means that we can make changes to the TD error $\delta = r+\gamma \bm{\theta}^{\top}\bm{\phi}'-\bm{\theta}^{\top}\bm{\phi} $ while still ensuring the invariance of the optimal policy, +\begin{equation*} + \delta - \omega= r+\gamma \bm{\theta}^{\top}\bm{\phi}'-\bm{\theta}^{\top}\bm{\phi} - \omega, +\end{equation*} +where $\omega$ is a constant, acting as a static PBRS. +This also means that algorithms with the optimization goal +of minimizing errors, after introducing reward shaping, +may result in larger or smaller bias. Fortunately, +as discussed above, bias is acceptable in reinforcement +learning. +However, the problem is that selecting an appropriate +$\omega$ requires expert knowledge. This forces us to learn +$\omega$ dynamically, i.e., $\omega=\omega_t $ and dynamic PBRS can also maintain the policy +invariance if the shaping reward is $F(s,t,s',t')=\gamma f(s',t')-f(s,t)$, +where $t$ is the time-step the agent reaches in state $s$ +\cite{devlin2012dynamic}. +However, this result requires the convergence guarantee of the dynamic potential +function $f(s,t)$. If $f(s,t)$ does not converge as the time-step +$t\rightarrow\infty$, the Q-values of dynamic PBRS are not +guaranteed to converge. + +Let $f_{\omega_t}(s)=\frac{\omega_t}{\gamma-1}$. +Thus, $F_{\omega_t}(s,s')=\gamma f_{\omega_t}(s')-f_{\omega_t}(s)= \omega_t$ +is a dynamic PBRS. And if $\omega$ converges finally, the dynamic potential +function $f(s,t)$ will converge. +Bias is the expected difference between the predicted value +and the true value. Therefore, under the premise of bootstrapping, we first think of +letting $\omega \doteq \mathbb{E}[\mathbb{E}[\delta|s]]=\mathbb{E}[\delta]$. + +% As we all know, the optimization process of linear TD(0) (semi-gradient) and linear TDC are as follows, respectively: +% \begin{equation*} +% \theta^{*}= \arg \min_{\theta} \mathbb{E}[(\mathbb{E}[\delta |s])^2], +% \end{equation*} +% and +% \begin{equation*} +% \theta^{*}=\arg \min_{\theta} \mathbb{E}[\delta \phi]^{\top} \mathbb{E}[\phi \phi^{\top}]^{-1} \mathbb{E}[\delta\phi]. +% \end{equation*} +% As a result, two novel objective functions and their corresponding algorithms are proposed, +% where $\omega$ is subsequently proven to converge, meaning that these two algorithms can maintain the invariance of the optimal strategy. + +\subsection{Variance Minimization TD Learning: VMTD} +For on-policy learning, +a novel objective function, Variance of Bellman Error (VBE), is proposed as follows: +\begin{equation} + \begin{array}{ccl} + \arg \min_{\bm{\theta}}\text{VBE}(\bm{\theta})&=&\arg \min_{\bm{\theta}}\mathbb{E}[(\mathbb{E}[\delta|s]-\mathbb{E}[\mathbb{E}[\delta|s]])^2]\\ + &=&\arg \min_{\bm{\theta},\omega} \mathbb{E}[(\mathbb{E}[\delta|s]-\omega)^2]. + \end{array} +\end{equation} +Clearly, it is no longer to minimize Bellman errors. + +First, the parameter $\omega$ is derived directly based on +stochastic gradient descent: +\begin{equation} +\omega_{k+1}\leftarrow \omega_{k}+\beta_k(\delta_k-\omega_k), +\label{omega} +\end{equation} +where $\delta_k$ is the TD error as follows: +\begin{equation} +\delta_k = r_{k+1}+\gamma +\bm{\theta}_k^{\top}\bm{\phi}_{k+1}-\bm{\theta}_k^{\top}\bm{\phi}_k. +\label{delta} +\end{equation} + +Then, based on stochastic semi-gradient descent, the update of +the parameter $\bm{\theta}$ is as follows: +\begin{equation} +\bm{\theta}_{k+1}\leftarrow +\bm{\theta}_{k}+\alpha_k(\bm{\theta}_k-\omega_k)\bm{\phi}_k. +\label{theta} +\end{equation} + +% The pseudocode of the VMTD algorithm is shown in Algorithm \ref{alg:algorithm 1}. + +For control tasks, two extensions of VMTD are named VMSarsa and VMQ respectively, +and the update formulas are shown below: +\begin{equation} + \bm{\theta}_{k+1}\leftarrow + \bm{\theta}_{k}+\alpha_k(\delta_k-\omega_k)\bm{\phi}(s_k,a_k). +\end{equation} +and +\begin{equation} + \omega_{k+1}\leftarrow \omega_{k}+\beta_k(\delta_k-\omega_k), +\end{equation} +where $\delta_k$ delta in VMSarsa is: +\begin{equation} + \delta_{k}=r_{k+1}+\gamma \bm{\theta}_{k}^{\top}\bm{\phi}(s_{k+1},a_{k+1}) - \bm{\theta}_{k}^{\top}\bm{\phi}(s_{k},a_{k}), + \label{deltaSarsa} +\end{equation} +and $\delta_k$ delta in VMQ is: +\begin{equation} + \delta_{k}=r_{k+1}+\gamma \max_{a\in A}\bm{\theta}_{k}^{\top}\bm{\phi}(s_{k+1},a) - \bm{\theta}_{k}^{\top}\bm{\phi}(s_{k},a_{k}). + \label{deltaQ} +\end{equation} + +\begin{algorithm}[t] + \caption{VMTD algorithm with linear function approximation in the on-policy setting} + \label{alg:algorithm 1} +\begin{algorithmic} + \STATE {\bfseries Input:} $\bm{\theta}_{0}$, $\omega_{0}$, $\gamma + $, learning rate $\alpha_t$ and $\beta_t$ + \REPEAT + \STATE For any episode, initialize $\bm{\theta}_{0}$ arbitrarily, $\omega_{0}$ to $0$, $\gamma \in (0,1]$, and $\alpha_t$ and $\beta_t$ are constant.\\ + \FOR{$t=0$ {\bfseries to} $T-1$} + \STATE Take $A_t$ from $S_t$ according to policy $\pi$, and arrive at $S_{t+1}$\\ + \STATE Observe sample ($S_t$,$R_{t+1}$,$S_{t+1}$) at time step $t$ (with their corresponding state feature vectors)\\ + \STATE $\delta_t = R_{t+1}+\gamma\bm{\theta}_t^{\top}\bm{\phi}_{t}'-\bm{\theta}_t^{\top}\bm{\phi}_t$ + \STATE $\bm{\theta}_{t+1}\leftarrow \bm{\theta}_{t}+\alpha_t(\delta_t-\omega_t)\bm{\phi}_t$ + \STATE $\omega_{t+1}\leftarrow \omega_{t}+\beta_t(\delta_t-\omega_t)$ + \STATE $S_t=S_{t+1}$ + \ENDFOR + \UNTIL{terminal episode} +\end{algorithmic} +\end{algorithm} +\begin{algorithm}[t] + \caption{VMTDC algorithm with linear function approximation in the off-policy setting} + \label{alg:algorithm 2} +\begin{algorithmic} + \STATE {\bfseries Input:} $\bm{\theta}_{0}$, $\bm{u}_0$, $\omega_{0}$, $\gamma + $, learning rate $\alpha_t$, $\zeta_t$ and $\beta_t$, behavior policy $\mu$ and target policy $\pi$ + \REPEAT + \STATE For any episode, initialize $\bm{\theta}_{0}$ arbitrarily, $\bm{u}_{0}$ and $\omega_{0}$ to $0$, $\gamma \in (0,1]$, and $\alpha_t$, $\zeta_t$ and $\beta_t$ are constant.\\ + % \textbf{Output}: $\bm{\theta}^*$.\\ + \FOR{$t=0$ {\bfseries to} $T-1$} + \STATE Take $A_t$ from $S_t$ according to $\mu$, and arrive at $S_{t+1}$\\ + \STATE Observe sample ($S_t$,$R_{t+1}$,$S_{t+1}$) at time step $t$ (with their corresponding state feature vectors)\\ + \STATE $\delta_t = R_{t+1}+\gamma\bm{\theta}_t^{\top}\bm{\phi}_{t+1}-\bm{\theta}_t^{\top}\bm{\phi}_t$ + \STATE $\rho_{t} \leftarrow \frac{\pi(A_t | S_t)}{\mu(A_t | S_t)}$ + \STATE $\bm{\theta}_{t+1}\leftarrow \bm{\theta}_{t}+\alpha_t \rho_t[ (\delta_t-\omega_t)\bm{\phi}_t - \gamma \bm{\phi}_{t+1}(\bm{\phi}^{\top}_{t} \bm{u}_{t})]$ + \STATE $\bm{u}_{t+1}\leftarrow \bm{u}_{t}+\zeta_t[\rho_t(\delta_t-\omega_t) - \bm{\phi}^{\top}_{t} \bm{u}_{t}] \bm{\phi}_t$ + \STATE $\omega_{t+1}\leftarrow \omega_{t}+\beta_t \rho_t(\delta_t-\omega_t)$ + \STATE $S_t=S_{t+1}$ + \ENDFOR + \UNTIL{terminal episode} +\end{algorithmic} +\end{algorithm} +\begin{algorithm}[t] + \caption{VMETD algorithm with linear function approximation in the off-policy setting} + \label{alg:algorithm 5} +\begin{algorithmic} + \STATE {\bfseries Input:} $\bm{\theta}_{0}$, $F_0$, $\omega_{0}$, $\gamma + $, learning rate $\alpha_t$, $\zeta_t$ and $\beta_t$, behavior policy $\mu$ and target policy $\pi$ + \REPEAT + \STATE For any episode, initialize $\bm{\theta}_{0}$ arbitrarily, $F_{0}$ to $1$ and $\omega_{0}$ to $0$, $\gamma \in (0,1]$, and $\alpha_t$, $\zeta_t$ and $\beta_t$ are constant.\\ + % \textbf{Output}: $\theta^*$.\\ + \FOR{$t=0$ {\bfseries to} $T-1$} + \STATE Take $A_t$ from $S_t$ according to $\mu$, and arrive at $S_{t+1}$\\ + \STATE Observe sample ($S_t$,$R_{t+1}$,$S_{t+1}$) at time step $t$ (with their corresponding state feature vectors)\\ + \STATE $\delta_t = R_{t+1}+\gamma\bm{\theta}_t^{\top}\bm{\phi}_{t+1}-\bm{\theta}_t^{\top}\bm{\phi}_t$ + \STATE $\rho_{t} \leftarrow \frac{\pi(A_t | S_t)}{\mu(A_t | S_t)}$ + \STATE $F_{t}\leftarrow \gamma \rho_t F_{t-1} +1$ + \STATE $\bm{\theta}_{t+1}\leftarrow \bm{\theta}_{t}+\alpha_t (F_t \rho_t\delta_t-\omega_t)\bm{\phi}_t$ + \STATE $\omega_{t+1}\leftarrow \omega_{t}+\beta_t (F_t \rho_t\delta_t-\omega_t)$ + \STATE $S_t=S_{t+1}$ + \ENDFOR + \UNTIL{terminal episode} +\end{algorithmic} +\end{algorithm} + +\subsection{Variance Minimization TDC Learning: VMTDC} +For off-policy learning, we employ a projection operator. +The objective function is called Variance of Projected Bellman error (VPBE), +and the corresponding algorithm is called VMTDC. +\begin{equation} + \begin{array}{ccl} + \text{VPBE}(\bm{\theta})&=&\mathbb{E}[(\delta-\mathbb{E}[\delta]) \bm{\phi}]^{\top} \mathbb{E}[\bm{\phi} \bm{\phi}^{\top}]^{-1}\mathbb{E}[(\delta-\mathbb{E}[\delta])\bm{\phi}]\\ + &=&\mathbb{E}[(\delta-\omega) \bm{\phi}]^{\top} \mathbb{E}[\bm{\phi} \bm{\phi}^{\top}]^{-1}\mathbb{E}[(\delta-\omega)\bm{\phi}], + \end{array} +\end{equation} +where $\omega$ is used to estimate $\mathbb{E}[\delta]$, i.e., $\omega \doteq \mathbb{E}[\delta]$. + +The derivation process of the VMTDC algorithm is the same +as that of the TDC algorithm, the only difference is that the original $\delta$ is replaced by $\delta-\omega$. +Therefore, we can easily get the updated formula of VMTDC, as follows: + +\begin{equation} + \bm{\theta}_{k+1}\leftarrow\bm{\theta}_{k}+\alpha_{k}[(\delta_{k}- \omega_k) \bm{\phi}(s_k)\\ + - \gamma\bm{\phi}(s_{k+1})(\bm{\phi}^{\top} (s_k) \bm{u}_{k})], +\label{thetavmtdc} +\end{equation} +\begin{equation} + \bm{u}_{k+1}\leftarrow \bm{u}_{k}+\zeta_{k}[\delta_{k}-\omega_k - \bm{\phi}^{\top} (s_k) \bm{u}_{k}]\bm{\phi}(s_k), +\label{uvmtdc} +\end{equation} +and +\begin{equation} + \omega_{k+1}\leftarrow \omega_{k}+\beta_k (\delta_k- \omega_k), + \label{omegavmtdc} +\end{equation} +% The pseudocode of the VMTDC algorithm for importance-sampling scenario is shown in Algorithm \ref{alg:algorithm 2} of Appendix \ref{proofth2}. + +Now, we will introduce the improved version of the GQ(0) algorithm, named VMGQ(0): +\begin{equation} + \begin{array}{ccl} + \bm{\theta}_{k+1}\leftarrow\bm{\theta}_{k}&+&\alpha_{k}[(\delta_{k}- \omega_k) \bm{\phi}(s_k,a_k)\\ + &-& \gamma\bm{\phi}(s_{k+1},A^{*}_{k+1})(\bm{\phi}^{\top} (s_k,a_k) \bm{u}_{k})], + \end{array} +\end{equation} +\begin{equation} + \bm{u}_{k+1}\leftarrow \bm{u}_{k}+\zeta_{k}[(\delta_{k}-\bm{u}_{k}) - \bm{\phi}^{\top} (s_k,a_k) \bm{u}_{k}]\bm{\phi}(s_k,a_k), +\end{equation} +and +\begin{equation} + \omega_{k+1}\leftarrow \omega_{k}+\beta_k(\delta_k- \omega_k), +\end{equation} +where $\delta_{k}$ is (\ref{deltaQ}) and $A^{*}_{k+1}={\arg \max}_{a}(\bm{\theta}_{k}^{\top}\bm{\phi}(s_{k+1},a))$. + +\subsection{Variance Minimization ETD Learning: VMETD} +Based on the off-policy TD algorithm, a scalar, $F$, +is introduced to obtain the ETD algorithm, +which ensures convergence under off-policy +conditions. This paper further introduces a scalar, +$\omega$, based on the ETD algorithm to obtain VMETD. +VMETD by the following update: +% \begin{equation} +% \delta_{t}= R_{t+1}+\gamma \theta_t^{\top}\phi_{t+1}-\theta_t^{\top}\phi_t. +% \end{equation} +\begin{equation} +\rho_{k} \leftarrow \frac{\pi(A_k | S_k)}{\mu(A_k | S_k)} +\end{equation} +\begin{equation} + \label{fvmetd} + F_k \leftarrow \gamma \rho_{k-1}F_{k-1}+1, +\end{equation} +\begin{equation} + \label{thetavmetd} + \bm{\theta}_{k+1}\leftarrow \bm{\theta}_k+\alpha_k (F_k \rho_k\delta_k - \omega_{k})\bm{\phi}_k, +\end{equation} +\begin{equation} + \label{omegavmetd} + \omega_{k+1} \leftarrow \omega_k+\beta_k(F_k \rho_k \delta_k - \omega_k), +\end{equation} +where $\mu$ is behavior policy and $\pi$ is target policy, +$F_t$ is a scalar variable, +$F_0=1$, $\omega$ is used to estimate $\mathbb{E}[\delta]$, i.e., $\omega \doteq \mathbb{E}[\delta]$, and +$\textbf{F}$ is a diagonal matrix with diagonal elements +$f(s)\dot{=}d_{\mu}(s)\lim_{t\rightarrow \infty}\mathbb{E}_{\mu}[F_k|S_k=s]$, +which we assume exists. +The vector $\textbf{f}\in \mathbb{R}^N$ with components +$[\textbf{f}]_s\dot{=}f(s)$ can be written as +\begin{equation} +\begin{split} +\textbf{f}&=\textbf{d}_{\mu}+\gamma \textbf{P}_{\pi}^{\top}\textbf{d}_{\mu}+(\gamma \textbf{P}_{\pi}^{\top})^2\textbf{d}_{\mu}+\ldots\\ +&=(\textbf{I}-\gamma\textbf{P}_{\pi}^{\top})^{-1}\textbf{d}_{\mu}. +\end{split} +\end{equation} + + diff --git b/AnonymousSubmission/main/pic/Acrobot_complete.pdf a/AnonymousSubmission/main/pic/Acrobot_complete.pdf new file mode 100644 index 0000000..8d11e4d Binary files /dev/null and a/AnonymousSubmission/main/pic/Acrobot_complete.pdf differ diff --git b/AnonymousSubmission/main/pic/BairdExample.tex a/AnonymousSubmission/main/pic/BairdExample.tex new file mode 100644 index 0000000..fbfa214 --- /dev/null +++ a/AnonymousSubmission/main/pic/BairdExample.tex @@ -0,0 +1,69 @@ +\resizebox{5cm}{3cm}{ +\begin{tikzpicture}[smooth] +\node[coordinate] (origin) at (0.3,0) {}; +\node[coordinate] (num7) at (3,0) {}; +\node[coordinate] (num1) at (1,2.5) {}; +\path (num7) ++ (-10:0.5cm) node (num7_bright1) [coordinate] {}; +\path (num7) ++ (-30:0.7cm) node (num7_bright2) [coordinate] {}; +\path (num7) ++ (-60:0.35cm) node (num7_bright3) [coordinate] {}; +\path (num7) ++ (-60:0.6cm) node (num7_bright4) [coordinate] {}; +\path (origin) ++ (90:3cm) node (origin_above) [coordinate] {}; +\path (origin_above) ++ (0:5.7cm) node (origin_aright) [coordinate] {}; +\path (num1) ++ (90:0.5cm) node (num1_a) [coordinate] {}; +\path (num1) ++ (-90:0.3cm) node (num1_b) [coordinate] {}; + +\path (num1) ++ (0:1cm) node (num2) [coordinate] {}; +\path (num1_a) ++ (0:1cm) node (num2_a) [coordinate] {}; +\path (num1_b) ++ (0:1cm) node (num2_b) [coordinate] {}; +\path (num2) ++ (0:1cm) node (num3) [coordinate] {}; +\path (num2_a) ++ (0:1cm) node (num3_a) [coordinate] {}; +\path (num2_b) ++ (0:1cm) node (num3_b) [coordinate] {}; +\path (num3) ++ (0:1cm) node (num4) [coordinate] {}; +\path (num3_a) ++ (0:1cm) node (num4_a) [coordinate] {}; +\path (num3_b) ++ (0:1cm) node (num4_b) [coordinate] {}; +\path (num4) ++ (0:1cm) node (num5) [coordinate] {}; +\path (num4_a) ++ (0:1cm) node (num5_a) [coordinate] {}; +\path (num4_b) ++ (0:1cm) node (num5_b) [coordinate] {}; +\path (num5) ++ (0:1cm) node (num6) [coordinate] {}; +\path (num5_a) ++ (0:1cm) node (num6_a) [coordinate] {}; +\path (num5_b) ++ (0:1cm) node (num6_b) [coordinate] {}; + + +%\draw[->](0,0) -- (1,1); +%\draw[dashed,line width = 0.03cm] (0,0) -- (1,1); + %\fill (0.5,0.5) circle (0.5); + %\draw[shape=circle,fill=white,draw=black] (a) at (num7) {7}; + + +\draw[dashed,line width = 0.03cm,xshift=3cm] plot[tension=0.06] +coordinates{(num7) (origin) (origin_above) (origin_aright)}; + +\draw[->,>=stealth,line width = 0.02cm,xshift=3cm] plot[tension=0.5] +coordinates{(num7) (num7_bright1) (num7_bright2)(num7_bright4) (num7_bright3)}; + +\node[line width = 0.02cm,shape=circle,fill=white,draw=black] (g) at (num7) {7}; + + + +\draw[<->,>=stealth,dashed,line width = 0.03cm,] (num1) -- (num1_a) ; +\node[line width = 0.02cm,shape=circle,fill=white,draw=black] (a) at (num1_b) {1}; +\draw[<->,>=stealth,dashed,line width = 0.03cm,] (num2) -- (num2_a) ; +\node[line width = 0.02cm,shape=circle,fill=white,draw=black] (b) at (num2_b) {2}; +\draw[<->,>=stealth,dashed,line width = 0.03cm,] (num3) -- (num3_a) ; +\node[line width = 0.02cm,shape=circle,fill=white,draw=black] (c) at (num3_b) {3}; +\draw[<->,>=stealth,dashed,line width = 0.03cm,] (num4) -- (num4_a) ; +\node[line width = 0.02cm,shape=circle,fill=white,draw=black] (d) at (num4_b) {4}; +\draw[<->,>=stealth,dashed,line width = 0.03cm,] (num5) -- (num5_a) ; +\node[line width = 0.02cm,shape=circle,fill=white,draw=black] (e) at (num5_b) {5}; +\draw[<->,>=stealth,dashed,line width = 0.03cm,] (num6) -- (num6_a) ; +\node[line width = 0.02cm,shape=circle,fill=white,draw=black] (f) at (num6_b) {6}; + +\draw[->,>=stealth,line width = 0.02cm] (a)--(g); +\draw[->,>=stealth,line width = 0.02cm] (b)--(g); +\draw[->,>=stealth,line width = 0.02cm] (c)--(g); +\draw[->,>=stealth,line width = 0.02cm] (d)--(g); +\draw[->,>=stealth,line width = 0.02cm] (e)--(g); +\draw[->,>=stealth,line width = 0.02cm] (f)--(g); +\end{tikzpicture} +} + diff --git b/AnonymousSubmission/main/pic/counterexample_quanju_new.pdf a/AnonymousSubmission/main/pic/counterexample_quanju_new.pdf new file mode 100644 index 0000000..7b39ec5 Binary files /dev/null and a/AnonymousSubmission/main/pic/counterexample_quanju_new.pdf differ diff --git b/AnonymousSubmission/main/pic/cw_complete.pdf a/AnonymousSubmission/main/pic/cw_complete.pdf new file mode 100644 index 0000000..b80dd74 Binary files /dev/null and a/AnonymousSubmission/main/pic/cw_complete.pdf differ diff --git b/AnonymousSubmission/main/pic/dependent_new.pdf a/AnonymousSubmission/main/pic/dependent_new.pdf new file mode 100644 index 0000000..f7f34ce Binary files /dev/null and a/AnonymousSubmission/main/pic/dependent_new.pdf differ diff --git b/AnonymousSubmission/main/pic/inverted_new.pdf a/AnonymousSubmission/main/pic/inverted_new.pdf new file mode 100644 index 0000000..b8e4bc2 Binary files /dev/null and a/AnonymousSubmission/main/pic/inverted_new.pdf differ diff --git b/AnonymousSubmission/main/pic/maze_13_13.pdf a/AnonymousSubmission/main/pic/maze_13_13.pdf new file mode 100644 index 0000000..cda62be Binary files /dev/null and a/AnonymousSubmission/main/pic/maze_13_13.pdf differ diff --git b/AnonymousSubmission/main/pic/maze_complete.pdf a/AnonymousSubmission/main/pic/maze_complete.pdf new file mode 100644 index 0000000..6757bdb Binary files /dev/null and a/AnonymousSubmission/main/pic/maze_complete.pdf differ diff --git b/AnonymousSubmission/main/pic/mt_complete.pdf a/AnonymousSubmission/main/pic/mt_complete.pdf new file mode 100644 index 0000000..aa554fb Binary files /dev/null and a/AnonymousSubmission/main/pic/mt_complete.pdf differ diff --git b/AnonymousSubmission/main/pic/randomwalk.tex a/AnonymousSubmission/main/pic/randomwalk.tex new file mode 100644 index 0000000..38a2f59 --- /dev/null +++ a/AnonymousSubmission/main/pic/randomwalk.tex @@ -0,0 +1,62 @@ + +% \tikzstyle{int}=[draw, fill=blue!20, minimum size=2em] +% \tikzstyle{block}=[draw, fill=gray, minimum size=1.5em] +% \tikzstyle{init} = [pin edge={to-,thin,black}] +% \resizebox{8cm}{1.2cm}{ +% \begin{tikzpicture}[node distance=1.5cm,auto,>=latex'] +% \node [block] (o) {}; +% \node (p) [left of=o,node distance=0.5cm, coordinate] {o}; +% \node [shape=circle,int] (a) [right of=o]{$A$}; +% \node (b) [left of=a,node distance=1.5cm, coordinate] {a}; +% \node [shape=circle,int] (c) [right of=a] {$B$}; +% \node (d) [left of=c,node distance=1.5cm, coordinate] {c}; +% \node [shape=circle,int, pin={[init]above:$$}] (e) [right of=c]{$C$}; +% \node (f) [left of=e,node distance=1.5cm, coordinate] {e}; +% \node [shape=circle,int] (g) [right of=e] {$D$}; +% \node (h) [left of=g,node distance=1.5cm, coordinate] {g}; +% \node [shape=circle,int] (i) [right of=g] {$E$}; +% \node (j) [left of=i,node distance=1.5cm, coordinate] {i}; +% \node [block] (k) [right of=i] {}; +% \node (l) [left of=k,node distance=0.5cm, coordinate] {k}; + +% \path[<-] (o) edge node {$0$} (a); +% \path[<->] (a) edge node {$0$} (c); +% \path[<->] (c) edge node {$0$} (e); +% \path[<->] (e) edge node {$0$} (g); +% \path[<->] (g) edge node {$0$} (i); +% \draw[->] (i) edge node {$1$} (k); +% \end{tikzpicture} +% } +\tikzstyle{int}=[draw, fill=blue!20, minimum size=2em] +\tikzstyle{block}=[draw, fill=gray, minimum size=1.5em] +\tikzstyle{init} = [pin edge={to-,thin,black}] + +\resizebox{5cm}{1cm}{ + \begin{tikzpicture}[node distance=1.5cm, auto, >=latex] + \node [block] (o) {}; + \node (p) [left of=o, node distance=0.5cm, coordinate] {o}; + \node [shape=circle, int] (a) [right of=o] {$A$}; + \node (b) [left of=a, node distance=1.5cm, coordinate] {a}; + \node [shape=circle, int] (c) [right of=a] {$B$}; + \node (d) [left of=c, node distance=1.5cm, coordinate] {c}; + \node [shape=circle, int, pin={[init]above:$ $}] (e) [right of=c] {$C$}; + \node (f) [left of=e, node distance=1.5cm, coordinate] {e}; + \node [shape=circle, int] (g) [right of=e] {$D$}; + \node (h) [left of=g, node distance=1.5cm, coordinate] {g}; + \node [shape=circle, int] (i) [right of=g] {$E$}; + \node (j) [left of=i, node distance=1.5cm, coordinate] {i}; + \node [block] (k) [right of=i] {}; + \node (l) [left of=k, node distance=0.5cm, coordinate] {k}; + + \path[->] (o) edge node {$0$} (a); + \path[<->] (a) edge node {$0$} (c); + \path[<->] (c) edge node {$0$} (e); + \path[<->] (e) edge node {$0$} (g); + \path[<->] (g) edge node {$0$} (i); + \draw[->] (i) edge node {$1$} (k); + \end{tikzpicture} +} + + + + \ No newline at end of file diff --git b/AnonymousSubmission/main/pic/tabular_new.pdf a/AnonymousSubmission/main/pic/tabular_new.pdf new file mode 100644 index 0000000..32bc90b Binary files /dev/null and a/AnonymousSubmission/main/pic/tabular_new.pdf differ diff --git b/AnonymousSubmission/main/preliminaries.tex a/AnonymousSubmission/main/preliminaries.tex new file mode 100644 index 0000000..c69b57d --- /dev/null +++ a/AnonymousSubmission/main/preliminaries.tex @@ -0,0 +1,55 @@ +\section{Background} +\label{preliminaries} +Reinforcement learning agent interacts with environment, observes state, + takes sequential decision makings to influence environment, and obtains + rewards. + Consider an infinite-horizon discounted + Markov Decision Process (MDP), defined by a tuple $\langle S,A,R,P,\gamma + \rangle$, where $S=\{1,2,\ldots,N\}$ is a finite set of states of the environment; $A$ + is a finite set of actions of the agent; + $R:S\times A \times S \rightarrow \mathbb{R}$ is a bounded deterministic reward + function; $P:S\times A\times S \rightarrow [0,1]$ is the transition + probability distribution; and $\gamma\in (0,1)$ + is the discount factor \cite{Sutton2018book}. + Due to the requirements of online learning, value iteration based on sampling + is considered in this paper. + In each sampling, an experience (or transition) $\langle s, a, s', r\rangle$ is + obtained. + + A policy is a mapping $\pi:S\times A \rightarrow [0,1]$. The goal of the + agent is to find an optimal policy $\pi^*$ to maximize the expectation of a + discounted cumulative rewards in a long period. + State value function $V^{\pi}(s)$ for a stationary policy $\pi$ is + defined as: + \begin{equation*} + V^{\pi}(s)=\mathbb{E}_{\pi}[\sum_{k=0}^{\infty} \gamma^k R_{k}|s_0=s]. + \label{valuefunction} + \end{equation*} + Linear value function for state $s\in S$ is defined as: + \begin{equation} + V_{{\theta}}(s):= {\bm{\theta}}^{\top}{\bm{\phi}}(s) = \sum_{i=1}^{m} + \theta_i \phi_i(s), + \label{linearvaluefunction} + \end{equation} + where ${\bm{\theta}}:=(\theta_1,\theta_2,\ldots,\theta_m)^{\top}\in + \mathbb{R}^m$ is a parameter vector, + ${\bm{\phi}}:=(\phi_1,\phi_2,\ldots,\phi_m)^{\top}\in \mathbb{R}^m$ is a feature + function defined on state space $S$, and $m$ is the feature size. + + Tabular temporal difference (TD) learning \cite{Sutton2018book} has been successfully applied to small-scale problems. + To deal with the well-known curse of dimensionality of large scale MDPs, value + function is usually approximated by a linear model, kernel methods, decision + trees, or neural networks, etc. This paper focuses on the linear model, where + features are usually hand coded by domain experts. + +% TD learning can also be used to find optimal strategies. The problem of finding an optimal policy is +% often called the control problem. Two popular TD methods are Sarsa and Q-leaning. The former is an on-policy +% TD control, while the latter is an off-policy control. + +% It is well known that TDC algorithm \cite{sutton2009fast} guarantees +% convergence under off-policy conditions while the off-policy TD algorithm may diverge. The +% objective function of TDC is MSPBE. +% TDC is essentially an adjustment or correction of the TD update so that it +% follows the gradient of the MSPBE objective function. In the context of the TDC algorithm, the control algorithm +% is known as Greedy-GQ($\lambda$) \cite{sutton2009fast}. When $\lambda$ is set to 0, it is denoted +% as GQ(0). \ No newline at end of file diff --git b/AnonymousSubmission/main/relatedwork.tex a/AnonymousSubmission/main/relatedwork.tex new file mode 100644 index 0000000..99bfcff --- /dev/null +++ a/AnonymousSubmission/main/relatedwork.tex @@ -0,0 +1,52 @@ +\section{Related Work} +\subsection{Difference between VMQ and R-learning} +Tabular VMQ's update formula bears some resemblance +to R-learning's update formula. As shown in Table \ref{differenceRandVMQ}, the update formulas of the two algorithms have the following differences: +\\(1) The goal of the R-learning algorithm \cite{schwartz1993reinforcement} is to maximize the average +reward, rather than the cumulative reward, by learning an estimate +of the average reward. This estimate $m$ is then used to update the Q-values. +On the contrary, the $\omega$ in the tabular VMQ update formula eventually converges to $\mathbb{E}[\delta]$. +\\(2) When $\gamma=1$ in the tabular VMQ update formula, the +R-learning update formula is formally +the same as the tabular VMQ update formula. +Therefore, R-learning algorithm can be +considered as a special case of VMQ algorithm in form. + +\subsection{Variance Reduction for TD Learning} + The TD with centering algorithm (CTD) \cite{korda2015td} +was proposed, which directly applies variance reduction techniques to +the TD algorithm. The CTD algorithm updates its parameters using the +average gradient of a batch of Markovian samples and a projection operator. +Unfortunately, the authors’ analysis of the CTD algorithm contains technical +errors. The VRTD algorithm \cite{xu2020reanalysis} is also a variance-reduced algorithm that updates +its parameters using the average gradient of a batch of i.i.d. samples. The +authors of VRTD provide a technically sound analysis to demonstrate the +advantages of variance reduction. + +\subsection{Variance Reduction for Policy Gradient Algorithms} +Policy gradient algorithms are a class of reinforcement +learning algorithms that directly optimize cumulative rewards. +REINFORCE is a Monte Carlo algorithm that estimates +gradients through sampling, but may have a high variance. +Baselines are introduced to reduce variance and to +accelerate learning \cite{Sutton2018book}. In Actor-Critic, +value function as a baseline and bootstrapping + are used to reduce variance, also accelerating convergence \cite{Sutton2018book}. + TRPO \cite{schulman2015trust} and PPO \cite{schulman2017proximal} + use generalized advantage +estimation, which combines multi-step bootstrapping and Monte Carlo +estimation to reduce variance, making gradient estimation more stable and +accelerating convergence. + +In Variance Minimization, +the incorporation of $\omega \doteq \mathbb{E}[\delta]$ +bears a striking resemblance to the use of a baseline +in policy gradient methods. The introduction of a baseline +in policy gradient techniques does not alter +the expected value of the update; +rather, it significantly impacts the variance of gradient estimation. +The addition of $\omega \doteq \mathbb{E}[\delta]$ in Variance Minimization + preserves the invariance of the optimal +policy while stabilizing gradient estimation, +reducing the variance of gradient estimation, +and hastening convergence. \ No newline at end of file diff --git b/AnonymousSubmission/main/theory.tex a/AnonymousSubmission/main/theory.tex new file mode 100644 index 0000000..cbaec98 --- /dev/null +++ a/AnonymousSubmission/main/theory.tex @@ -0,0 +1,310 @@ +\section{Theoretical Analysis} +The purpose of this section is to establish the stabilities of the VMTD algorithm +and the VMTDC algorithm, and also presents a corollary on the convergence rate of VMTD. + +\begin{theorem} + \label{theorem1}(Convergence of VMTD). + In the case of on-policy learning, consider the iterations (\ref{omega}) and (\ref{theta}) with (\ref{delta}) of VMTD. + Let the step-size sequences $\alpha_k$ and $\beta_k$, $k\geq 0$ satisfy in this case $\alpha_k,\beta_k>0$, for all $k$, + $ + \sum_{k=0}^{\infty}\alpha_k=\sum_{k=0}^{\infty}\beta_k=\infty, + $ + $ + \sum_{k=0}^{\infty}\alpha_k^2<\infty, + $ + $ + \sum_{k=0}^{\infty}\beta_k^2<\infty, + $ + and + $ + \alpha_k = o(\beta_k). + $ + Assume that $(\bm{\phi}_k,r_k,\bm{\phi}_k')$ is an i.i.d. sequence with + uniformly bounded second moments, where $\bm{\bm{\phi}}_k$ and $\bm{\bm{\phi}}'_{k}$ are sampled from the same Markov chain. + Let $\textbf{A} = \mathrm{Cov}(\bm{\bm{\phi}},\bm{\bm{\phi}}-\gamma\bm{\bm{\phi}}')$, + $\bm{b}=\mathrm{Cov}(r,\bm{\phi})$. + Assume that matrix $\bm{\theta}$ is non-singular. + Then the parameter vector $\bm{\theta}_k$ converges with probability one + to $\textbf{A}^{-1}\bm{b}$. +\end{theorem} +\begin{proof} + \label{th1proof} + The proof is based on Borkar's Theorem for + general stochastic approximation recursions with two time scales + \cite{borkar1997stochastic}. + + % The new TD error for the linear setting is + % \begin{equation*} + % \delta_{\text{new}}=r+\gamma + % \bm{\theta}^{\top}\bm{\phi}'-\bm{\theta}^{\top}\bm{\phi}-\mathbb{E}[\delta]. + % \end{equation*} + A new one-step + linear TD solution is defined + as: + \begin{equation*} + 0=\mathbb{E}[(\delta-\mathbb{E}[\delta]) \bm{\phi}]=-\textbf{A}\bm{\theta}+\bm{b}. + \end{equation*} + Thus, the VMTD's solution is + $\bm{\theta}_{\text{VMTD}}=\textbf{A}^{-1}\bm{b}$. + + First, note that recursion (\ref{theta}) can be rewritten as + \begin{equation*} + \bm{\theta}_{k+1}\leftarrow \bm{\theta}_k+\beta_k\bm{\xi}(k), + \end{equation*} + where + \begin{equation*} + \bm{\xi}(k)=\frac{\alpha_k}{\beta_k}(\delta_k-\omega_k)\bm{\phi}_k + \end{equation*} + Due to the settings of step-size schedule $\alpha_k = o(\beta_k)$, + $\bm{\xi}(k)\rightarrow 0$ almost surely as $k\rightarrow\infty$. + That is the increments in iteration (\ref{omega}) are uniformly larger than + those in (\ref{theta}), thus (\ref{omega}) is the faster recursion. + Along the faster time scale, iterations of (\ref{omega}) and (\ref{theta}) + are associated to ODEs system as follows: + \begin{equation} + \dot{\bm{\theta}}(t) = 0, + \label{thetaFast} + \end{equation} + \begin{equation} + \dot{\omega}(t)=\mathbb{E}[\delta_t|\bm{\theta}(t)]-\omega(t). + \label{omegaFast} + \end{equation} + Based on the ODE (\ref{thetaFast}), $\bm{\theta}(t)\equiv \bm{\theta}$ when + viewed from the faster timescale. + By the Hirsch lemma \cite{hirsch1989convergent}, it follows that + $||\bm{\theta}_k-\bm{\theta}||\rightarrow 0$ a.s. as $k\rightarrow \infty$ for some + $\bm{\theta}$ that depends on the initial condition $\bm{\theta}_0$ of recursion + (\ref{theta}). + Thus, the ODE pair (\ref{thetaFast})-(\ref{omegaFast}) can be written as + \begin{equation} + \dot{\omega}(t)=\mathbb{E}[\delta_t|\bm{\theta}]-\omega(t). + \label{omegaFastFinal} + \end{equation} + Consider the function $h(\omega)=\mathbb{E}[\delta|\bm{\theta}]-\omega$, + i.e., the driving vector field of the ODE (\ref{omegaFastFinal}). + It is easy to find that the function $h$ is Lipschitz with coefficient + $-1$. + Let $h_{\infty}(\cdot)$ be the function defined by + $h_{\infty}(\omega)=\lim_{x\rightarrow \infty}\frac{h(x\omega)}{x}$. + Then $h_{\infty}(\omega)= -\omega$, is well-defined. + For (\ref{omegaFastFinal}), $\omega^*=\mathbb{E}[\delta|\bm{\theta}]$ + is the unique globally asymptotically stable equilibrium. + For the ODE + \begin{equation} + \dot{\omega}(t) = h_{\infty}(\omega(t))= -\omega(t), + \label{omegaInfty} + \end{equation} + apply $\vec{V}(\omega)=(-\omega)^{\top}(-\omega)/2$ as its + associated strict Liapunov function. Then, + the origin of (\ref{omegaInfty}) is a globally asymptotically stable + equilibrium. + + + Consider now the recursion (\ref{omega}). + Let + $M_{k+1}=(\delta_k-\omega_k) + -\mathbb{E}[(\delta_k-\omega_k)|\mathcal{F}(k)]$, + where $\mathcal{F}(k)=\sigma(\omega_l,\bm{\theta}_l,l\leq k;\bm{\phi}_s,\bm{\phi}_s',r_s,s0$, $\forall k\geq0$, + \begin{equation*} + \mathbb{E}[||M_{k+1}||^2|\mathcal{F}(k)]\leq + c_1(1+||\omega_k||^2+||\bm{\theta}_k||^2). + \end{equation*} + + + Now Assumptions (A1) and (A2) of \cite{borkar2000ode} are verified. + Furthermore, Assumptions (TS) of \cite{borkar2000ode} is satisfied by our + conditions on the step-size sequences $\alpha_k$, $\beta_k$. Thus, + by Theorem 2.2 of \cite{borkar2000ode} we obtain that + $||\omega_k-\omega^*||\rightarrow 0$ almost surely as $k\rightarrow \infty$. + + Consider now the slower time scale recursion (\ref{theta}). + Based on the above analysis, (\ref{theta}) can be rewritten as + \begin{equation*} + \bm{\theta}_{k+1}\leftarrow + \bm{\theta}_{k}+\alpha_k(\delta_k-\mathbb{E}[\delta_k|\bm{\theta}_k])\bm{\phi}_k. + \end{equation*} + + Let $\mathcal{G}(k)=\sigma(\bm{\theta}_l,l\leq k;\bm{\phi}_s,\bm{\phi}_s',r_s,s0$, $\forall k\geq0$, + \begin{equation*} + \mathbb{E}[||Z_{k+1}||^2|\mathcal{G}(k)]\leq + c_2(1+||\bm{\theta}_k||^2). + \end{equation*} + + Consider now the following ODE associated with (\ref{theta}): + \begin{equation} + \begin{array}{ccl} + \dot{\bm{\theta}}(t)&=&\mathrm{Cov}(\delta|\bm{\theta}(t),\bm{\phi})\\ + &=&\mathrm{Cov}(r+(\gamma\bm{\phi}'-\bm{\phi})^{\top}\bm{\theta}(t),\bm{\phi})\\ + &=&\mathrm{Cov}(r,\bm{\phi})-\mathrm{Cov}(\bm{\theta}(t)^{\top}(\bm{\phi}-\gamma\bm{\phi}'),\bm{\phi})\\ + &=&\mathrm{Cov}(r,\bm{\phi})-\bm{\theta}(t)^{\top}\mathrm{Cov}(\bm{\phi}-\gamma\bm{\phi}',\bm{\phi})\\ + &=&\mathrm{Cov}(r,\bm{\phi})-\mathrm{Cov}(\bm{\phi}-\gamma\bm{\phi}',\bm{\phi})^{\top}\bm{\theta}(t)\\ + &=&\mathrm{Cov}(r,\bm{\phi})-\mathrm{Cov}(\bm{\phi},\bm{\phi}-\gamma\bm{\phi}')\bm{\theta}(t)\\ + &=&-\textbf{A}\bm{\theta}(t)+\bm{b}. + \end{array} + \label{odetheta} + \end{equation} + Let $\vec{h}(\bm{\theta}(t))$ be the driving vector field of the ODE + (\ref{odetheta}). + \begin{equation*} + \vec{h}(\bm{\theta}(t))=-\textbf{A}\bm{\theta}(t)+\bm{b}. + \end{equation*} + Consider the cross-covariance matrix, + \begin{equation} + \begin{array}{ccl} + \textbf{A} &=& \mathrm{Cov}(\bm{\phi},\bm{\phi}-\gamma\bm{\phi}')\\ + &=&\frac{\mathrm{Cov}(\bm{\phi},\bm{\phi})+\mathrm{Cov}(\bm{\phi}-\gamma\bm{\phi}',\bm{\phi}-\gamma\bm{\phi}')-\mathrm{Cov}(\gamma\bm{\phi}',\gamma\bm{\phi}')}{2}\\ + &=&\frac{\mathrm{Cov}(\bm{\phi},\bm{\phi})+\mathrm{Cov}(\bm{\phi}-\gamma\bm{\phi}',\bm{\phi}-\gamma\bm{\phi}')-\gamma^2\mathrm{Cov}(\bm{\phi}',\bm{\phi}')}{2}\\ + &=&\frac{(1-\gamma^2)\mathrm{Cov}(\bm{\phi},\bm{\phi})+\mathrm{Cov}(\bm{\phi}-\gamma\bm{\phi}',\bm{\phi}-\gamma\bm{\phi}')}{2},\\ + \end{array} + \label{covariance} + \end{equation} + where we eventually used $\mathrm{Cov}(\bm{\phi}',\bm{\phi}')=\mathrm{Cov}(\bm{\phi},\bm{\phi})$ + \footnote{The covariance matrix $\mathrm{Cov}(\bm{\phi}',\bm{\phi}')$ is equal to + the covariance matrix $\mathrm{Cov}(\bm{\phi},\bm{\phi})$ if the initial state is re-reachable or + initialized randomly in a Markov chain for on-policy update.}. + Note that the covariance matrix $\mathrm{Cov}(\bm{\phi},\bm{\phi})$ and + $\mathrm{Cov}(\bm{\phi}-\gamma\bm{\phi}',\bm{\phi}-\gamma\bm{\phi}')$ are semi-positive + definite. Then, the matrix $\textbf{A}$ is semi-positive definite because $\textbf{A}$ is + linearly combined by two positive-weighted semi-positive definite matrice + (\ref{covariance}). + Furthermore, $\textbf{A}$ is nonsingular due to the assumption. + Hence, the cross-covariance matrix $\textbf{A}$ is positive definite. + + Therefore, + $\bm{\theta}^*=\textbf{A}^{-1}\bm{b}$ can be seen to be the unique globally asymptotically + stable equilibrium for ODE (\ref{odetheta}). + Let $\vec{h}_{\infty}(\bm{\theta})=\lim_{r\rightarrow + \infty}\frac{\vec{h}(r\bm{\theta})}{r}$. Then + $\vec{h}_{\infty}(\bm{\theta})=-\textbf{A}\bm{\theta}$ is well-defined. + Consider now the ODE + \begin{equation} + \dot{\bm{\theta}}(t)=-\textbf{A}\bm{\theta}(t). + \label{odethetafinal} + \end{equation} + The ODE (\ref{odethetafinal}) has the origin as its unique globally asymptotically stable equilibrium. + Thus, the assumption (A1) and (A2) are verified. + \end{proof} + +% Please refer to the appendix \ref{proofth1} for detailed proof process. +% Theorem 3 in \cite{dalal2020tale} provides a general conclusion on the convergence speed of all linear two-timescale +% algorithms. VMTD satisfies the assumptions of this theorem, leading +% to the following corollary. +% \begin{corollary} +% \label{corollary4_2} +% Consider the Sparsely Projected variant of VMTD. Then, for $\alpha_k = 1/(k+1)^{\alpha}$, $\beta_k = 1/(k+1)^{\beta}$, +% $0<\beta<\alpha<1$, $p>1$, with probility larger than $1- \tau$, for all $k\geq N_3$, we have +% \begin{equation} +% ||\bm{\theta}'_{k} - \bm{\theta}^{*}|| \le C_{3,\bm{\theta}} \frac{\sqrt{\ln (4d_{1}^{2}(k+1)^{p}/\tau)} }{(k+1)^{\alpha / 2}} +% \end{equation} +% \begin{equation} +% ||\omega'_{n} - \omega^{*}|| \le C_{3,\omega} \frac{\sqrt{\ln (4d_{2}^{2}(k+1)^{p}/\tau)} }{(k+1)^{\omega / 2}}, +% \end{equation} +% \end{corollary} + +% where $d_1$ and $d_2$ represent the dimensions of $\bm{\theta}$ and $\omega$, respectively. For VMTD, $d_2 =1$. +% The meanings of $N_3$,$C_{3,\bm{\theta}}$ and $C_{3,\omega}$ are explained in \cite{dalal2020tale}. +% The formulas for $\bm{\theta}'_{k}$ and $\omega'_{n}$ can be found in (\ref{sparseprojectiontheta}) and (\ref{sparseprojectionomega}). + +% Please refer to the appendix \ref{proofcorollary4_2} for detailed proof process. +\begin{theorem} + \label{theorem2}(Convergence of VMTDC). + In the case of off-policy learning, consider the iterations (\ref{omegavmtdc}), (\ref{uvmtdc}) and (\ref{thetavmtdc}) of VMTDC. + Let the step-size sequences $\alpha_k$, $\zeta_k$ and $\beta_k$, $k\geq 0$ satisfy in this case $\alpha_k,\zeta_k,\beta_k>0$, for all $k$, + $ + \sum_{k=0}^{\infty}\alpha_k=\sum_{k=0}^{\infty}\beta_k=\sum_{k=0}^{\infty}\zeta_k=\infty, + $ + $ + \sum_{k=0}^{\infty}\alpha_k^2<\infty, + $ + $ + \sum_{k=0}^{\infty}\zeta_k^2<\infty, + $ + $ + \sum_{k=0}^{\infty}\beta_k^2<\infty, + $ + and + $ + \alpha_k = o(\zeta_k), + $ + $ + \zeta_k = o(\beta_k). + $ + Assume that $(\bm{\bm{\phi}}_k,r_k,\bm{\bm{\phi}}_k')$ is an i.i.d. sequence with + uniformly bounded second moments. + Let $\textbf{A} = \mathrm{Cov}(\bm{\bm{\phi}},\bm{\bm{\phi}}-\gamma\bm{\bm{\phi}}')$, + $\bm{b}=\mathrm{Cov}(r,\bm{\bm{\phi}})$, and $\textbf{A}=\mathbb{E}[\bm{\bm{\phi}}\bm{\bm{\phi}}^{\top}]$. + Assume that $\textbf{A}$ and $\textbf{C}$ are non-singular matrices. + Then the parameter vector $\bm{\theta}_k$ converges with probability one + to $\textbf{A}^{-1}\bm{b}$. +\end{theorem} +% Please refer to the appendix \ref{proofth2} for detailed proof process. + +\begin{theorem} + \label{theorem3}(Convergence of VMETD). + In the case of off-policy learning, consider the iterations (\ref{fvmetd}), (\ref{omegavmetd}) and (\ref{thetavmetd}) with (\ref{delta}) of VMETD. + Let the step-size sequences $\alpha_k$ and $\beta_k$, $k\geq 0$ satisfy in this case $\alpha_k,\beta_k>0$, for all $k$, + $ + \sum_{k=0}^{\infty}\alpha_k=\sum_{k=0}^{\infty}\beta_k=\infty, + $ + $ + \sum_{k=0}^{\infty}\alpha_k^2<\infty, + $ + $ + \sum_{k=0}^{\infty}\beta_k^2<\infty, + $ + and + $ + \alpha_k = o(\beta_k). + $ + Assume that $(\bm{\bm{\phi}}_k,r_k,\bm{\bm{\phi}}_k')$ is an i.i.d. sequence with + uniformly bounded second moments, where $\bm{\bm{\phi}}_k$ and $\bm{\bm{\phi}}'_{k}$ are sampled from the same Markov chain. + Let $\textbf{A}_{\textbf{VMETD}} ={\bm{\Phi}}^{\top} (\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi})-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} ){\bm{\Phi}}$, + $\bm{b}_{\textbf{VMETD}}=\bm{\Phi}^{\top}(\textbf{F}-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top})\textbf{r}_{\pi}$. + Assume that matrix $A$ is non-singular. + Then the parameter vector $\bm{\theta}_k$ converges with probability one + to $\textbf{A}_{\textbf{VMETD}}^{-1}\bm{b}_{\textbf{VMETD}}$. +\end{theorem} +% Please refer to the appendix \ref{proofVMETD} for detailed proof process. \ No newline at end of file diff --git b/Apendix/aaai24.bib a/Apendix/aaai24.bib new file mode 100644 index 0000000..08e4dae --- /dev/null +++ a/Apendix/aaai24.bib @@ -0,0 +1,1138 @@ +@inproceedings{langley00, + author = {P. Langley}, + title = {Crafting Papers on Machine Learning}, + year = {2000}, + pages = {1207--1216}, + editor = {Pat Langley}, + booktitle = {Proceedings of the 17th International Conference + on Machine Learning (ICML 2000)}, + address = {Stanford, CA}, + publisher = {Morgan Kaufmann} +} + +@TechReport{mitchell80, + author = "T. M. Mitchell", + title = "The Need for Biases in Learning Generalizations", + institution = "Computer Science Department, Rutgers University", + year = "1980", + address = "New Brunswick, MA", +} + +@phdthesis{kearns89, + author = {M. J. Kearns}, + title = {Computational Complexity of Machine Learning}, + school = {Department of Computer Science, Harvard University}, + year = {1989} +} + +@Book{MachineLearningI, + editor = "R. S. Michalski and J. G. Carbonell and T. + M. Mitchell", + title = "Machine Learning: An Artificial Intelligence + Approach, Vol. I", + publisher = "Tioga", + year = "1983", + address = "Palo Alto, CA" +} + +@Book{DudaHart2nd, + author = "R. O. Duda and P. E. Hart and D. G. Stork", + title = "Pattern Classification", + publisher = "John Wiley and Sons", + edition = "2nd", + year = "2000" +} + +@misc{anonymous, + title= {Suppressed for Anonymity}, + author= {Author, N. N.}, + year= {2021} +} + +@InCollection{Newell81, + author = "A. Newell and P. S. Rosenbloom", + title = "Mechanisms of Skill Acquisition and the Law of + Practice", + booktitle = "Cognitive Skills and Their Acquisition", + pages = "1--51", + publisher = "Lawrence Erlbaum Associates, Inc.", + year = "1981", + editor = "J. R. Anderson", + chapter = "1", + address = "Hillsdale, NJ" +} + + +@Article{Samuel59, + author = "A. L. Samuel", + title = "Some Studies in Machine Learning Using the Game of + Checkers", + journal = "IBM Journal of Research and Development", + year = "1959", + volume = "3", + number = "3", + pages = "211--229" +} +@inproceedings{langley00, + author = {P. Langley}, + title = {Crafting Papers on Machine Learning}, + year = {2000}, + pages = {1207--1216}, + editor = {Pat Langley}, + booktitle = {Proceedings of the 17th International Conference + on Machine Learning (ICML 2000)}, + address = {Stanford, CA}, + publisher = {Morgan Kaufmann} +} + +@TechReport{mitchell80, + author = "T. M. Mitchell", + title = "The Need for Biases in Learning Generalizations", + institution = "Computer Science Department, Rutgers University", + year = "1980", + address = "New Brunswick, MA", +} + +@phdthesis{kearns89, + author = {M. J. Kearns}, + title = {Computational Complexity of Machine Learning}, + school = {Department of Computer Science, Harvard University}, + year = {1989} +} + +@Book{MachineLearningI, + editor = "R. S. Michalski and J. G. Carbonell and T. + M. Mitchell", + title = "Machine Learning: An Artificial Intelligence + Approach, Vol. I", + publisher = "Tioga", + year = "1983", + address = "Palo Alto, CA" +} + +@Book{DudaHart2nd, + author = "R. O. Duda and P. E. Hart and D. G. Stork", + title = "Pattern Classification", + publisher = "John Wiley and Sons", + edition = "2nd", + year = "2000" +} + +@misc{anonymous, + title= {Suppressed for Anonymity}, + author= {Author, N. N.}, + year= {2021} +} + +@InCollection{Newell81, + author = "A. Newell and P. S. Rosenbloom", + title = "Mechanisms of Skill Acquisition and the Law of + Practice", + booktitle = "Cognitive Skills and Their Acquisition", + pages = "1--51", + publisher = "Lawrence Erlbaum Associates, Inc.", + year = "1981", + editor = "J. R. Anderson", + chapter = "1", + address = "Hillsdale, NJ" +} + + +@Article{Samuel59, + author = "A. L. Samuel", + title = "Some Studies in Machine Learning Using the Game of + Checkers", + journal = "IBM Journal of Research and Development", + year = "1959", + volume = "3", + number = "3", + pages = "211--229" +} + +@book{em:86, + editor = "Engelmore, Robert and Morgan, Anthony", + title = "Blackboard Systems", + year = 1986, + address = "Reading, Mass.", + publisher = "Addison-Wesley", +} +@inproceedings{dalal2018finite, + title={Finite sample analyses for TD (0) with function approximation}, + author={Dalal, Gal and Szorenyi, Balazs and Thoppe, Gugan and Mannor, Shie}, + booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence and Thirtieth Innovative Applications of Artificial Intelligence Conference and Eighth AAAI Symposium on Educational Advances in Artificial Intelligence}, + pages={6144--6160}, + year={2018} +} +@inproceedings{xu2019reanalysis, + title={Reanalysis of Variance Reduced Temporal Difference Learning}, + author={Xu, Tengyu and Wang, Zhe and Zhou, Yi and Liang, Yingbin}, + booktitle={International Conference on Learning Representations}, + year={2019} +} +@inproceedings{c:83, + author = "Clancey, William J.", + year = 1983, + title = "{Communication, Simulation, and Intelligent +Agents: Implications of Personal Intelligent Machines +for Medical Education}", + booktitle="Proceedings of the Eighth International Joint Conference on Artificial Intelligence {(IJCAI-83)}", + pages = "556-560", + address = "Menlo Park, Calif", + publisher = "{IJCAI Organization}", +} +@inproceedings{c:84, + author = "Clancey, William J.", + year = 1984, + title = "{Classification Problem Solving}", + booktitle = "Proceedings of the Fourth National + Conference on Artificial Intelligence", + pages = "45-54", + address = "Menlo Park, Calif.", + publisher="AAAI Press", +} +@article{r:80, + author = {Robinson, Arthur L.}, + title = {New Ways to Make Microcircuits Smaller}, + volume = {208}, + number = {4447}, + pages = {1019--1022}, + year = {1980}, + doi = {10.1126/science.208.4447.1019}, + publisher = {American Association for the Advancement of Science}, + issn = {0036-8075}, + URL = {https://science.sciencemag.org/content/208/4447/1019}, + eprint = {https://science.sciencemag.org/content/208/4447/1019.full.pdf}, + journal = {Science}, +} +@article{r:80x, + author = "Robinson, Arthur L.", + year = 1980, + title = "{New Ways to Make Microcircuits Smaller---Duplicate Entry}", + journal = "Science", + volume = 208, + pages = "1019-1026", +} +@article{hcr:83, +title = {Strategic explanations for a diagnostic consultation system}, +journal = {International Journal of Man-Machine Studies}, +volume = {20}, +number = {1}, +pages = {3-19}, +year = {1984}, +issn = {0020-7373}, +doi = {https://doi.org/10.1016/S0020-7373(84)80003-6}, +url = {https://www.sciencedirect.com/science/article/pii/S0020737384800036}, +author = {Diane Warner Hasling and William J. Clancey and Glenn Rennels}, +abstract = {This article examines the problem of automatte explanation of reasoning, especially as it relates to expert systems. By explanation we mean the ability of a program to discuss what it is doing in some understandable way. We first present a general framework in which to view explanation and review some of the research done in this area. We then focus on the explanation system for NEOMYCIN, a medical consultation program. A consultation program interactively helps a user to solve a problem. Our goal is to have NEOMYCIN explain its problem-solving strategies. An explanation of strategy describes the plan the program is using to reach a solution. Such an explanation is usually concrete, referring to aspects of the current problem situation. Abstract explanations articulate a general principle, which can be applied in different situations; such explanations are useful in teaching and in explaining by analogy. We describe the aspects of NEOMYCIN that make abstract strategic explanations possible—the representation of strategic knowledge explicitly and separately from domain knowledge— and demonstrate how this representation can be used to generate explanations.} +} +@article{hcrt:83, + author = "Hasling, Diane Warner and Clancey, William J. and Rennels, Glenn R. and Test, Thomas", + year = 1983, + title = "{Strategic Explanations in Consultation---Duplicate}", + journal = "The International Journal of Man-Machine Studies", + volume = 20, + number = 1, + pages = "3-19", +} +@techreport{r:86, + author = "Rice, James", + year = 1986, + title = "{Poligon: A System for Parallel Problem Solving}", + type = "Technical Report", + number = "KSL-86-19", + institution = "Dept.\ of Computer Science, Stanford Univ.", +} +@phdthesis{c:79, + author = "Clancey, William J.", + year = 1979, + title = "{Transfer of Rule-Based Expertise +through a Tutorial Dialogue}", + type = "{Ph.D.} diss.", + school = "Dept.\ of Computer Science, Stanford Univ.", + address = "Stanford, Calif.", +} +@unpublished{c:21, + author = "Clancey, William J.", + title = "{The Engineering of Qualitative Models}", + year = 2021, + note = "Forthcoming", +} +@misc{c:22, + title={Attention Is All You Need}, + author={Ashish Vaswani and Noam Shazeer and Niki Parmar and Jakob Uszkoreit and Llion Jones and Aidan N. Gomez and Lukasz Kaiser and Illia Polosukhin}, + year={2017}, + eprint={1706.03762}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +} +@misc{c:23, + title = "Pluto: The 'Other' Red Planet", + author = "{NASA}", + howpublished = "\url{https://www.nasa.gov/nh/pluto-the-other-red-planet}", + year = 2015, + note = "Accessed: 2018-12-06" +} +@article{r:80x, + author = "Robinson, Arthur L.", + year = 1980, + title = "{New Ways to Make Microcircuits Smaller---Duplicate Entry}", + journal = "Science", + volume = 208, + pages = "1019-1026", +} +@article{hcrt:83, + author = "Hasling, Diane Warner and Clancey, William J. and Rennels, Glenn R. and Test, Thomas", + year = 1983, + title = "{Strategic Explanations in Consultation---Duplicate}", + journal = "The International Journal of Man-Machine Studies", + volume = 20, + number = 1, + pages = "3-19", +} +@article{xu2013online, + title={Online learning control using adaptive critic designs with sparse kernel machines}, + author={Xu, Xin and Hou, Zhongsheng and Lian, Chuanqiang and He, Haibo}, + journal={IEEE Trans. Neural Netw. Learn. Syst.}, + volume={24}, + number={5}, + pages={762--775}, + year={2013}, + publisher={IEEE} +} +@article{bertsekas2017value, + title={Value and policy iterations in optimal control and adaptive dynamic programming}, + author={Bertsekas, Dimitri P}, + journal={IEEE Trans. Neural Netw. Learn. Syst.}, + year={2017}, + volume={28}, + number={3}, + pages={500 - 509}, + publisher={IEEE} +} +@phdthesis{hackman2012faster, + title={Faster Gradient-TD Algorithms}, + author={Hackman, Leah}, + year={2012}, + school={University of Alberta} +} +@inproceedings{harutyunyan2015multi, + title={Multi-scale reward shaping via an off-policy ensemble}, + author={Harutyunyan, Anna and Brys, Tim and Vrancx, Peter and Now{\'e}, Ann}, + booktitle={Proc. 2015 Int. Conf. Autonomous Agents and Multiagent Systems}, + pages={1641--1642}, + year={2015}, + organization={International Foundation for Autonomous Agents and Multiagent Systems} +} +@inproceedings{harutyunyan2015expressing, + title={Expressing Arbitrary Reward Functions as Potential-Based Advice.}, + author={Harutyunyan, Anna and Devlin, Sam and Vrancx, Peter and Now{\'e}, Ann}, + booktitle={AAAI}, + pages={2652--2658}, + year={2015} +} +@article{wiewiora2003potential, + title={Potential-based shaping and Q-value initialization are equivalent}, + author={Wiewiora, Eric}, + journal={J. Artif. Intell. Res.}, + volume={19}, + pages={205--208}, + year={2003} +} +@article{grzes2010online, + title={Online learning of shaping rewards in reinforcement learning}, + author={Grze{\'s}, Marek and Kudenko, Daniel}, + journal={Neural Netw.}, + volume={23}, + number={4}, + pages={541--550}, + year={2010}, + publisher={Elsevier} +} +@inproceedings{marthi2007automatic, + title={Automatic shaping and decomposition of reward functions}, + author={Marthi, Bhaskara}, + booktitle={Proc. 24th Int. Conf. Mach. Learn.}, + pages={601--608}, + year={2007} +} +@inproceedings{laud2003influence, + title={The Influence of Reward on the Speed of Reinforcement Learning: An Analysis of Shaping}, + author={Laud, Adam and Dejong, Gerald}, + booktitle={Proc. 20th Int. Conf. Mach. Learn.}, + pages={440--447}, + year={2003} +} +@phdthesis{laud2004theory, + title={Theory and application of reward shaping in reinforcement learning}, + author={Laud, Adam Daniel}, + year={2004}, + school={University of Illinois at Urbana-Champaign} +} +@article{geist2013algorithmic, + title={Algorithmic survey of parametric value function approximation}, + author={Geist, Matthieu and Pietquin, Olivier}, + journal={IEEE Trans. Neural Netw. Learn. Syst.}, + volume={24}, + number={6}, + pages={845--867}, + year={2013}, + publisher={IEEE} +} +@article{furmston2016approximate, + title={Approximate Newton Methods for Policy Search in Markov Decision Processes}, + author={Furmston, Thomas and Lever, Guy and Barber, David}, + journal={J. Mach. Learn. Res.}, + volume={17}, + number={227}, + pages={1--51}, + year={2016} +} +@article{silver2016mastering, + title={Mastering the game of Go with deep neural networks and tree search}, + author={Silver, David and Huang, Aja and Maddison, Chris J and Guez, Arthur and Sifre, Laurent and van den Driessche, George and Schrittwieser, Julian and Antonoglou, Ioannis and Panneershelvam, Veda and Lanctot, Marc and others}, + journal={Nature}, + volume={529}, + number={7587}, + pages={484--489}, + year={2016}, + publisher={Nature Publishing Group} +} + +@article{mnih2015human, + title={Human-level control through deep reinforcement learning}, + author={Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Rusu, Andrei A and Veness, Joel and Bellemare, Marc G and Graves, Alex and Riedmiller, Martin and Fidjeland, Andreas K and Ostrovski, Georg and others}, + journal={Nature}, + volume={518}, + number={7540}, + pages={529--533}, + year={2015}, + publisher={Nature Publishing Group} +} +@inproceedings{guo2014deep, + title={Deep learning for real-time Atari game play using offline Monte-Carlo tree search planning}, + author={Guo, Xiaoxiao and Singh, Satinder and Lee, Honglak and Lewis, Richard L and Wang, Xiaoshi}, + booktitle={Advances in Neural Information Processing Systems}, + pages={3338--3346}, + publisher={Cambridge, MA: MIT Press}, + year={2014} +} +@inproceedings{scherrer2010should, + title={Should one compute the Temporal Difference fix point or minimize the Bellman Residual? The unified oblique projection view}, + author={Scherrer, Bruno}, + booktitle={Proc. 27th Int. Conf. Mach. Learn.}, + pages={959--966}, + year={2010} +} +@article{hirsch1989convergent, + title={Convergent activation dynamics in continuous time networks}, + author={Hirsch, Morris W}, + journal={Neural Netw.}, + volume={2}, + number={5}, + pages={331--349}, + year={1989}, + publisher={Elsevier} +} +@article{borkar1997stochastic, + title={Stochastic approximation with two time scales}, + author={Borkar, Vivek S}, + journal={Syst. \& Control Letters}, + volume={29}, + number={5}, + pages={291--294}, + year={1997}, + publisher={Elsevier} +} +@article{ortner2013adaptive, + title={Adaptive aggregation for reinforcement learning in average reward Markov decision processes}, + author={Ortner, Ronald}, + journal={Annals Oper. Res.}, + volume={208}, + number={1}, + pages={321--336}, + year={2013}, + publisher={Springer} +} +@article{jaksch2010near, + title={Near-optimal regret bounds for reinforcement learning}, + author={Jaksch, Thomas and Ortner, Ronald and Auer, Peter}, + journal={Journal of Machine Learning Research}, + number={Apr}, + volume={11}, + pages={1563--1600}, + year={2010} +} +@article{ortner2007logarithmic, + title={Logarithmic online regret bounds for undiscounted reinforcement learning}, + author={Ortner, P and Auer, R}, + journal={Advances in Neural Information Processing Systems}, + publisher={Cambridge, MA: MIT Press}, + volume={19}, + pages={49}, + year={2007} +} +@article{das1999solving, + title={Solving semi-Markov decision problems using average reward reinforcement learning}, + author={Das, Tapas K and Gosavi, Abhijit and Mahadevan, Sridhar and Marchalleck, Nicholas}, + journal={Management Science}, + volume={45}, + number={4}, + pages={560--574}, + year={1999}, + publisher={INFORMS} +} +@article{abounadi2001learning, + title={Learning algorithms for Markov decision processes with average cost}, + author={Abounadi, Jinane and Bertsekas, D and Borkar, Vivek S}, + journal={SIAM J. Control Optim.}, + volume={40}, + number={3}, + pages={681--698}, + year={2001}, + publisher={SIAM} +} +@inproceedings{singh1994reinforcement, + title={Reinforcement learning algorithms for average-payoff Markovian decision processes}, + author={Singh, Satinder P}, + booktitle={AAAI}, + volume={94}, + pages={700--705}, + year={1994} +} +@inproceedings{schwartz1993reinforcement, + title={A reinforcement learning method for maximizing undiscounted rewards}, + author={Schwartz, Anton}, + booktitle={Proc. 10th Int. Conf. Mach. Learn.}, + volume={298}, + pages={298--305}, + year={1993} +} + +@inproceedings{yang2016efficient, + title={Efficient Average Reward Reinforcement Learning Using Constant Shifting Values}, + author={Yang, Shangdong and Gao, Yang and An, Bo and Wang, Hao and Chen, Xingguo}, + booktitle={Thirtieth AAAI Conference on Artificial Intelligence}, + pages={2258-2264}, + year={2016} +} +@inproceedings{devlin2012dynamic, + title={Dynamic potential-based reward shaping}, + author={Devlin, Sam and Kudenko, Daniel}, + booktitle={Proc. 11th Int. Conf. Autonomous Agents and Multiagent Systems}, + pages={433--440}, + year={2012} +} + +@inproceedings{ng1999policy, + title={Policy invariance under reward transformations: Theory and application to reward shaping}, + author={Ng, Andrew Y and Harada, Daishi and Russell, Stuart}, + booktitle={Proc. 16th Int. Conf. Mach. Learn.}, + pages={278--287}, + year={1999} +} +@article{borkar2000ode, + title={The ODE method for convergence of stochastic approximation and reinforcement learning}, + author={Borkar, Vivek S and Meyn, Sean P}, + journal={SIAM J. Control Optim.}, + volume={38}, + number={2}, + pages={447--469}, + year={2000}, + publisher={SIAM} +} +@phdthesis{maei2011gradient, + title={Gradient temporal-difference learning algorithms}, + author={Maei, Hamid Reza}, + year={2011}, + school={University of Alberta} +} +@phdthesis{baird1999reinforcement, + title={Reinforcement learning through gradient descent}, + author={Baird III, Leemon C}, + year={1999}, + school={US Air Force Academy, US} +} +@PHDTHESIS{Driessens2004, + AUTHOR ="Kurt Driessens", + TITLE ="Relational Reinforcement Learning", + SCHOOL ="Catholic University of Leuven", + YEAR ="2004", +} +@article{tsitsiklis1996feature, + title={Feature-based methods for large scale dynamic programming}, + author={Tsitsiklis, John N and Van Roy, Benjamin}, + journal={Mach. Learn.}, + volume={22}, + number={1-3}, + pages={59--94}, + year={1996}, + publisher={Springer} +} +@inproceedings{chen2009apply, + title={Apply ant colony optimization to Tetris}, + author={Chen, X. and Wang, H. and Wang, W. and Shi, Y. and Gao, Y.}, + booktitle={Proceedings of the 11th Annual Conference on Genetic and Evolutionary Computation (GECCO)}, + pages={1741--1742}, + year={2009}, + organization={ACM} +} +@incollection{farias2006tetris, + title={Tetris: A study of randomized constraint sampling}, + author={Farias, Vivek F and Van Roy, Benjamin}, + booktitle={Probabilistic and Randomized Methods for Design Under Uncertainty}, + pages={189--201}, + year={2006}, + publisher={Springer} +} +@article{bertsekas1996temporal, + title={Temporal differences-based policy iteration and applications in neuro-dynamic programming}, + author={Bertsekas, Dimitri P and Ioffe, Sergey}, + journal={Lab. for Info. and Decision Systems Report LIDS-P-2349, MIT, Cambridge, MA}, + year={1996}, + publisher={Citeseer} +} +@inproceedings{kakade2001natural, + title={A Natural Policy Gradient.}, + author={Kakade, Sham}, + booktitle={Advances in Neural Information Processing Systems}, + publisher={Cambridge, MA: MIT Press}, + volume={14}, + pages={1531--1538}, + year={2001} +} +@article{peters2008natural, + title={Natural actor-critic}, + author={Peters, Jan and Schaal, Stefan}, + journal={Neurocomputing}, + volume={71}, + number={7}, + pages={1180--1190}, + year={2008}, + publisher={Elsevier} +} +@article{baxter2001infinite, + title={Infinite-horizon policy-gradient estimation}, + author={Baxter, Jonathan and Bartlett, Peter L.}, + journal={J. Artif. Intell. Res.}, + pages={319--350}, + year={2001} +} +@inproceedings{sutton1999policy, + title={Policy Gradient Methods for Reinforcement Learning with Function Approximation.}, + author={Sutton, Richard S and McAllester, David A and Singh, Satinder P and Mansour, Yishay and others}, + booktitle={Advances in Neural Information Processing Systems}, + publisher={Cambridge, MA: MIT Press}, + pages={1057--1063}, + year={1999} +} +@inproceedings{bohm2005evolutionary, + title={An evolutionary approach to tetris}, + author={B{\"o}hm, Niko and K{\'o}kai, Gabriella and Mandl, Stefan}, + booktitle={Proc. 6th Metaheuristics Int. Conf.}, + pages={137-148}, + year={2005} +} +@article{szita2006learning, + title={Learning Tetris using the noisy cross-entropy method}, + author={Szita, Istv{\'a}n and L{\"o}rincz, Andr{\'a}s}, + journal={Neural Comput.}, + volume={18}, + number={12}, + pages={2936--2941}, + year={2006}, + publisher={MIT Press} +} +@inproceedings{thiery2010least, + title={Least-Squares $\lambda$ Policy Iteration: Bias-Variance Trade-off in Control Problems}, + author={Thiery, Christophe and Scherrer, Bruno}, + booktitle={Proc. 27th Int. Conf. Mach. Learn.}, + pages={1071--1078}, + year={2010} +} + +@inproceedings{gabillon2013approximate, + title={Approximate dynamic programming finally performs well in the game of Tetris}, + author={Gabillon, Victor and Ghavamzadeh, Mohammad and Scherrer, Bruno}, + booktitle={Advances in Neural Information Processing Systems}, + publisher={Cambridge, MA: MIT Press}, + pages={1754--1762}, + year={2013} +} +@article{scherrer2013performance, + title={Performance bounds for $\lambda$ policy iteration and application to the game of Tetris}, + author={Scherrer, Bruno}, + journal={J. Mach. Learn. Res.}, + volume={14}, + number={1}, + pages={1181--1227}, + year={2013}, + publisher={JMLR. org} +} +@article{thiery2009improvements, + title={Improvements on Learning Tetris with Cross Entropy}, + author={Thiery, Christophe and Scherrer, Bruno}, + journal={Int. Computer Games Assoc. J.}, + volume={32}, + number={1}, + pages={23--33}, + year={2009} +} +@article{scherrer2015approximate, + title={Approximate Modified Policy Iteration and its Application to the Game of Tetris}, + author={Scherrer, Bruno and Ghavamzadeh, Mohammad and Gabillon, Victor and Lesner, Boris and Geist, Matthieu}, + journal={J. Mach. Learn. Res.}, + volume={16}, + pages={1629--1676}, + year={2015} +} + +@article{efron2004least, + title={Least angle regression}, + author={Efron, Bradley and Hastie, Trevor and Johnstone, Iain and Tibshirani, Robert and others}, + journal={The Annals of statistics}, + volume={32}, + number={2}, + pages={407--499}, + year={2004}, + publisher={Institute of Mathematical Statistics} +} +@MASTERSTHESIS{Brzustowski1992, + author ={John Brzustowski}, + title ={Can you win at tetris?}, + school = {University of British Columbia}, + year ={1992} +} +@Article{Breukelaar04, + author = {Ron Breukelaar and Erik D. Demaine and Susan + Hohenberger and Hendrik Jan Hoogeboom and Walter + A. Kosters and David Liben-Nowell}, + title = {Tetris is Hard, Even to Approximate}, + journal = {International Journal of Computational Geometry and + Applications}, + year = {2004}, + volume = {14}, + number = {1--2}, + pages = {41--68}, + month = {April}, +} +@book{Bertsekas1996, + author = {Bertsekas, D. and Tsitsiklis, J. N.}, + title = {Neuro-Dynamic Programming}, + year = {1996}, + publisher = {Athena Scientific}, +} +@inproceedings{maei2010gq, + title={GQ ($\lambda$): A general gradient algorithm for temporal-difference prediction learning with eligibility traces}, + author={Maei, Hamid Reza and Sutton, Richard S}, + booktitle={Proceedings of the Third Conference on Artificial General Intelligence}, + volume={1}, + pages={91--96}, + year={2010} +} +@inproceedings{maei2010toward, + title={Toward off-policy learning control with function approximation}, + author={Maei, Hamid R and Szepesv{\'a}ri, Csaba and Bhatnagar, Shalabh and Sutton, Richard S}, + booktitle={Proc. 27th Int. Conf. Mach. Learn.}, + pages={719--726}, + year={2010} +} +@inproceedings{phua2007tracking, + title={Tracking value function dynamics to improve reinforcement learning with piecewise linear function approximation}, + author={Phua, Chee Wee and Fitch, Robert}, + booktitle={Proc. 24th Int. Conf. Mach. Learn.}, + pages={751--758}, + year={2007}, + organization={ACM} +} +@inproceedings{szubert2014temporal, + title={Temporal difference learning of N-tuple networks for the game 2048}, + author={Szubert, Marcin and Jaskowski, Wojciech}, + booktitle={2014 IEEE Conference on Computational Intelligence and Games (CIG)}, + pages={1--8}, + year={2014}, + organization={IEEE} +} +@article{chen2013online, + title={Online Selective Kernel-based Temporal Differece Learning}, + author={Chen, Xingguo and Gao, Yang and Wang, Ruili}, + journal={IEEE Trans. Neural Netw. Learn. Syst.}, + year={2013}, + volume={24}, + number={12}, + pages={1944--1956}, + publisher={IEEE} +} + +@article{xu2007kernel, + title={Kernel-based least squares policy iteration for reinforcement learning}, + author={Xu, Xin and Hu, Dewen and Lu, Xicheng}, + journal={IEEE Trans. Neural Netw.}, + volume={18}, + number={4}, + pages={973--992}, + year={2007}, + publisher={IEEE} +} +@INPROCEEDINGS{Engel03bayesmeets, + author = {Yaakov Engel and Shie Mannor and Ron Meir}, + title = {Bayes meets {B}ellman: the {G}aussian process approach to temporal difference learning}, + booktitle = {Proc. 20th Int. Conf. Mach. Learn.}, + year = {2003}, + pages = {154--161}, + address={Washington, DC}, + month={Aug.}, +} +@inproceedings{robards2011sparse, + title={Sparse Kernel-SARSA ($\lambda$) with an eligibility trace}, + author={Robards, M. and Sunehag, P. and Sanner, S. and Marthi, B.}, + booktitle = {Proc. 22nd Eur. Conf. Mach. Learn.}, + pages={1--17}, + year={2011}, + month={Sept.}, + address = {Athens, Greece}, +} +@conference{reisinger2008online, + title={{Online kernel selection for {B}ayesian reinforcement learning}}, + author={Reisinger, J. and Stone, P. and Miikkulainen, R.}, + booktitle={Proc. 25th Int. Conf. Mach. Learn.}, + pages={816--823}, + year={2008}, + month={July}, + address={ Helsinki, Finland}, +} +@book{Sutton1998, + title={{Reinforcement learning: an introduction}}, + author={Sutton, R.S. and Barto, A.G.}, + year={1998}, + publisher={MIT Press}, + address={Cambridge, MA} +} +@book{Sutton2018book, + author = {Sutton, Richard S. and Barto, Andrew G.}, + edition = {Second}, + publisher = {The MIT Press}, + title = {Reinforcement Learning: An Introduction}, + year = {2018 } +} +@phdthesis{Bradtke1994phd, + title={Incremental Dynamic Programming for On-line Adaptive Optimal Control}, + author={Bradtke, Steven J}, + year={1994}, + school={University of Massachusetts}, + month={Sept.}, + address={Amherst}, +} +@inproceedings{baird1995residual, + title={Residual algorithms: Reinforcement learning with function approximation}, + author={Baird, Leemon and others}, + booktitle={Proc. 12th Int. Conf. Mach. Learn.}, + pages={30--37}, + year={1995} +} +@article{bradtke1996linear, + title={Linear least-squares algorithms for temporal difference learning}, + author={Bradtke, S.J. and Barto, A.G.}, + journal={Mach. Learn.}, + volume={22}, + number={1}, + pages={33--57}, + year={1996}, + publisher={Springer} +} +@article{lagoudakis2003least, + title={Least-squares policy iteration}, + author={Lagoudakis, M.G. and Parr, R.}, + journal={J. Mach. Learn. Res.}, + volume={4}, + pages={1107--1149}, + year={2003}, + publisher={JMLR. org} +} +@article{boyan2002technical, + title={Technical update: Least-squares temporal difference learning}, + author={Boyan, J.A.}, + journal={Mach. Learn.}, + volume={49}, + number={2}, + pages={233--246}, + year={2002}, + publisher={Springer} +} +@inproceedings{geramifard2006incremental, + title={Incremental least-squares temporal difference learning}, + author={Geramifard, A. and Bowling, M. and Sutton, R.S.}, + booktitle={Proc. 21st AAAI Conf. Artif. Intell.}, + pages={356--361}, + year={2006}, + month={July}, + address={Boston, Massachusetts}, +} +@inproceedings{sutton2009fast, + title={Fast gradient-descent methods for temporal-difference learning with linear function approximation}, + author={Sutton, R.S. and Maei, H.R. and Precup, D. and Bhatnagar, S. and Silver, D. and Szepesv{\'a}ri, C. and Wiewiora, E.}, + booktitle={Proc. 26th Int. Conf. Mach. Learn.}, + pages={993--1000}, + year={2009} +} +@inproceedings{sutton2008convergent, + title={A Convergent $ O (n) $ Temporal-difference Algorithm for Off-policy Learning with Linear Function Approximation}, + author={Sutton, Richard S and Maei, Hamid R and Szepesv{\'a}ri, Csaba}, + booktitle={Advances in Neural Information Processing Systems}, + publisher={Cambridge, MA: MIT Press}, + pages={1609--1616}, + year={2008} +} +@inproceedings{dabney2014natural, + title={Natural Temporal Difference Learning}, + author={Dabney, William and Thomas, Philip}, + booktitle={Twenty-Eighth AAAI Conference on Artificial Intelligence}, + year={2014} +} +@inproceedings{mahmood2014weighted, + title={Weighted importance sampling for off-policy learning with linear function approximation}, + author={Mahmood, A Rupam and van Hasselt, Hado P and Sutton, Richard S}, + booktitle={Advances in Neural Information Processing Systems}, + publisher={Cambridge, MA: MIT Press}, + pages={3014--3022}, + year={2014} +} +@inproceedings{seijen2014true, + title={True Online TD ($\lambda$)}, + author={Seijen, Harm V and Sutton, Rich}, + booktitle={Proc. 31st Int. Conf. Mach. Learn.}, + pages={692--700}, + year={2014} +} +@article{ormoneit2002kernel, + title={{Kernel-based reinforcement learning}}, + author={Ormoneit, D. and Sen, {\'S}.}, + journal={Mach. Learn.}, + volume={49}, + number={2-3}, + pages={161--178}, + issn={0885-6125}, + year={2002}, + publisher={Springer-Verlag }, + address = {Hingham, MA, USA}, +} +@inproceedings{Ghavamzadeh2010lstd, + author = {M. Ghavamzadeh and A. Lazaric and O. A. Maillard and R. Munos}, + title = {{LSTD} with Random Projections}, + BOOKTITLE={Advances in Neural Information Processing Systems}, + publisher={Cambridge, MA: MIT Press}, + volume = {23}, + pages = {721--729}, + Address = {Lake Tahoe, Nevada, USA}, + year = {2010} +} +@inproceedings{loth2007sparse, + title={Sparse temporal difference learning using LASSO}, + author={Loth, M. and Davy, M. and Preux, P.}, + booktitle={Proc. IEEE Symp. Approx. Dynamic Program. Reinforce. Learn.}, + pages={352--359}, + year={2007}, + organization={IEEE} +} +@inproceedings{kolter2009regularization, + title={Regularization and feature selection in least-squares temporal difference learning}, + author={Kolter, J.Z. and Ng, A.Y.}, + booktitle={Proc. 26th Int. Conf. Mach. Learn.}, + pages={521--528}, + year={2009}, + organization={ACM} +} +@inproceedings{hoffman2011regularized, + title={Regularized least squares temporal difference learning with nested l2 and l1 penalization}, + author={Hoffman, M.W. and Lazaric, A. and Ghavamzadeh, M. and Munos, R.}, + booktitle={Proc. Eur. Workshop Reinforce. Learn.}, + year={2011} +} +@inproceedings{Ghavamzadeh2011finite, + author = {M. Ghavamzadeh and A. Lazaric and R. Munos and M. Hoffman}, + title = {Finite-Sample Analysis of {Lasso-TD}}, + booktitle = {Proc. 28th Int. Conf. Mach. Learn.}, + year = {2011}, + month= {June}, + address={Bellevue, Washington, USA}, + pages={1177--1184}, +} +@inproceedings{johnson2013accelerating, + title={Accelerating stochastic gradient descent using predictive variance reduction}, + author={Johnson, R. and Zhang, T.}, + booktitle={Advances in Neural Information Processing Systems}, + pages={315--323}, + year={2013} +} +@article{xu2020reanalysis, + title={Reanalysis of variance reduced temporal difference learning}, + author={Xu, T. and Wang, Z. and Zhou, Y. and Liang, Y.}, + journal={arXiv preprint arXiv:2001.01898}, + year={2020} +} +@inproceedings{schulman2015trust, + title={Trust region policy optimization}, + author={Schulman, J. and Levine, S. and Abbeel, P. and Jordan, M. and Moritz, P.}, + booktitle={International Conference on Machine Learning}, + pages={1889--1897}, + year={2015} +} +@article{schulman2017proximal, + title={Proximal policy optimization algorithms}, + author={Schulman, J. and Wolski, F. and Dhariwal, P. and Radford, A. and Klimov, O.}, + journal={arXiv preprint arXiv:1707.06347}, + year={2017} +} +@inproceedings{defazio2014saga, + title={SAGA: A fast incremental gradient method with support for non-strongly convex composite objectives}, + author={Defazio, A. and Bach, F. and Lacoste-Julien, S.}, + booktitle={Advances in Neural Information Processing Systems}, + pages={1646--1654}, + year={2014} +} +@inproceedings{du2017stochastic, + title={Stochastic variance reduction methods for policy evaluation}, + author={Du, S. S. and Chen, J. and Li, L. and Xiao, L. and Zhou, D.}, + booktitle={Proceedings of the 34th International Conference on Machine Learning}, + pages={1049--1058}, + year={2017} +} +@inproceedings{chen2023modified, + title={Modified Retrace for Off-Policy Temporal Difference Learning}, + author={Chen, Xingguo and Ma, Xingzhou and Li, Yang and Yang, Guang and Yang, Shangdong and Gao, Yang}, + booktitle={Uncertainty in Artificial Intelligence}, + pages={303--312}, + year={2023}, + organization={PMLR} +} +@article{dalal2017finite, + title={Finite Sample Analyses for TD(0) with Function Approximation}, + author={Dalal, Gal and Szörényi, Balázs and Thoppe, Gugan and Mannor, Shie}, + journal={arXiv preprint arXiv:1704.01161}, + year={2017} +} +@article{sutton1988learning, + title={Learning to predict by the methods of temporal differences}, + author={Sutton, Richard S}, + journal={Machine learning}, + volume={3}, + number={1}, + pages={9--44}, + year={1988}, + publisher={Springer} +} +@inproceedings{tsitsiklis1997analysis, + title={Analysis of temporal-diffference learning with function approximation}, + author={Tsitsiklis, John N and Van Roy, Benjamin}, + booktitle={Advances in Neural Information Processing Systems}, + pages={1075--1081}, + year={1997} +} +@article{sutton2016emphatic, + title={An emphatic approach to the problem of off-policy temporal-difference learning}, + author={Sutton, Richard S and Mahmood, A Rupam and White, Martha}, + journal={The Journal of Machine Learning Research}, + volume={17}, + number={1}, + pages={2603--2631}, + year={2016}, + publisher={JMLR. org} +} +@inproceedings{liu2015finite, + title={Finite-sample analysis of proximal gradient TD algorithms}, + author={Liu, Bo and Liu, Ji and Ghavamzadeh, Mohammad and Mahadevan, Sridhar and Petrik, Marek}, + booktitle={Proceedings of the 21st Conference on Uncertainty in Artificial Intelligence}, + pages={504--513}, + year={2015} +} +@inproceedings{liu2016proximal, + title={Proximal Gradient Temporal Difference Learning Algorithms.}, + author={Liu, Bo and Liu, Ji and Ghavamzadeh, Mohammad and Mahadevan, Sridhar and Petrik, Marek}, + booktitle={Proceedings of the International Joint Conference on Artificial Intelligence}, + pages={4195--4199}, + year={2016} +} +@article{liu2018proximal, + title={Proximal gradient temporal difference learning: Stable reinforcement learning with polynomial sample complexity}, + author={Liu, Bo and Gemp, Ian and Ghavamzadeh, Mohammad and Liu, Ji and Mahadevan, Sridhar and Petrik, Marek}, + journal={Journal of Artificial Intelligence Research}, + volume={63}, + pages={461--494}, + year={2018} +} +@inproceedings{givchi2015quasi, + title={Quasi newton temporal difference learning}, + author={Givchi, Arash and Palhang, Maziar}, + booktitle={Asian Conference on Machine Learning}, + pages={159--172}, + year={2015} +} +@inproceedings{pan2017accelerated, + title={Accelerated gradient temporal difference learning}, + author={Pan, Yangchen and White, Adam and White, Martha}, + booktitle={Proceedings of the 21st AAAI Conference on Artificial Intelligence}, + pages={2464--2470}, + year={2017} +} +@inproceedings{hallak2016generalized, + title={Generalized emphatic temporal difference learning: bias-variance analysis}, + author={Hallak, Assaf and Tamar, Aviv and Munos, Remi and Mannor, Shie}, + booktitle={Proceedings of the 30th AAAI Conference on Artificial Intelligence}, + pages={1631--1637}, + year={2016} +} +@article{zhang2022truncated, + title={Truncated emphatic temporal difference methods for prediction and control}, + author={Zhang, Shangtong and Whiteson, Shimon}, + journal={The Journal of Machine Learning Research}, + volume={23}, + number={1}, + pages={6859--6917}, + year={2022}, + publisher={JMLRORG} +} +@inproceedings{korda2015td, + title={On TD (0) with function approximation: Concentration bounds and a centered variant with exponential convergence}, + author={Korda, Nathaniel and La, Prashanth}, + booktitle={International conference on machine learning}, + pages={626--634}, + year={2015}, + organization={PMLR} +} +@book{zhou2021machine, + title={Machine learning}, + author={Zhou, Zhi-Hua}, + year={2021}, + publisher={Springer Nature} +} +@inproceedings{dalal2020tale, + title={A tale of two-timescale reinforcement learning with the tightest finite-time bound}, + author={Dalal, Gal and Szorenyi, Balazs and Thoppe, Gugan}, + booktitle={Proceedings of the AAAI Conference on Artificial Intelligence}, + volume={34}, + number={04}, + pages={3701-3708}, + year={2020} +} +@inproceedings{feng2019kernel, + title={A kernel loss for solving the Bellman equation}, + author={Feng, Yihao and Li, Lihong and Liu, Qiang}, + booktitle={Advances in Neural Information Processing Systems}, + pages={15430--15441}, + year={2019} +} +@inproceedings{basserrano2021logistic, + title={Logistic Q-Learning}, + author={Bas-Serrano, Joan and Curi, Sebastian and Krause, Andreas and Neu, Gergely}, + booktitle={International Conference on Artificial Intelligence and Statistics}, + pages={3610--3618}, + year={2021} +} + + + + + + + + + diff --git b/Apendix/aaai24.bst a/Apendix/aaai24.bst new file mode 100644 index 0000000..05b1d4e --- /dev/null +++ a/Apendix/aaai24.bst @@ -0,0 +1,1493 @@ +%% +%% This is file `aaai22.bst', +%% generated with the docstrip utility. +%% +%% The original source files were: +%% +%% merlin.mbs (with options: `head,ay,nat,ed-au,nm-rev,ed-rev,jnrlst,aunm-semi,mcite,mct-1,mct-x3,keyxyr,dt-beg,yr-per,yrp-per,note-yr,atit-u,volp-sp,num-xser,bkpg-x,add-pub,isbn,ppx,ed,xedn,and-com,and-com-ed,etal-xc,nfss,,{}') +%% merlin.mbs (with options: `tail,ay,nat,ed-au,nm-rev,ed-rev,jnrlst,aunm-semi,mcite,mct-1,mct-x3,keyxyr,dt-beg,yr-per,yrp-per,note-yr,atit-u,volp-sp,num-xser,bkpg-x,add-pub,isbn,ppx,ed,xedn,and-com,and-com-ed,etal-xc,nfss,,{}') +%% ---------------------------------------- +%% *** Natbib-compatible implementation of 'aaai' bib style *** +%% + % =============================================================== + % IMPORTANT NOTICE: + % This bibliographic style (bst) file has been generated from one or + % more master bibliographic style (mbs) files, listed above. + % + % This generated file can be redistributed and/or modified under the terms + % of the LaTeX Project Public License Distributed from CTAN + % archives in directory macros/latex/base/lppl.txt; either + % version 1 of the License, or any later version. + % =============================================================== + % Name and version information of the main mbs file: + % \ProvidesFile{merlin.mbs}[2011/11/18 4.33 (PWD, AO, DPC)] + % For use with BibTeX version 0.99a or later + %------------------------------------------------------------------- + % This bibliography style file is intended for texts in ENGLISH + % This is an author-year citation style bibliography. As such, it is + % non-standard LaTeX, and requires a special package file to function properly. + % Such a package is natbib.sty by Patrick W. Daly + % The form of the \bibitem entries is + % \bibitem[Jones et al.(1990)]{key}... + % \bibitem[Jones et al.(1990)Jones, Baker, and Smith]{key}... + % The essential feature is that the label (the part in brackets) consists + % of the author names, as they should appear in the citation, with the year + % in parentheses following. There must be no space before the opening + % parenthesis! + % With natbib v5.3, a full list of authors may also follow the year. + % In natbib.sty, it is possible to define the type of enclosures that is + % really wanted (brackets or parentheses), but in either case, there must + % be parentheses in the label. + % The \cite command functions as follows: + % \citet{key} ==>> Jones et al. (1990) + % \citet*{key} ==>> Jones, Baker, and Smith (1990) + % \citep{key} ==>> (Jones et al., 1990) + % \citep*{key} ==>> (Jones, Baker, and Smith, 1990) + % \citep[chap. 2]{key} ==>> (Jones et al., 1990, chap. 2) + % \citep[e.g.][]{key} ==>> (e.g. Jones et al., 1990) + % \citep[e.g.][p. 32]{key} ==>> (e.g. Jones et al., 1990, p. 32) + % \citeauthor{key} ==>> Jones et al. + % \citeauthor*{key} ==>> Jones, Baker, and Smith + % \citeyear{key} ==>> 1990 + %--------------------------------------------------------------------- + +ENTRY + { address + archivePrefix + author + booktitle + chapter + edition + editor + eid + eprint + howpublished + institution + isbn + journal + key + month + note + number + organization + pages + publisher + school + series + title + type + volume + year + } + {} + { label extra.label sort.label short.list } +INTEGERS { output.state before.all mid.sentence after.sentence after.block } +FUNCTION {init.state.consts} +{ #0 'before.all := + #1 'mid.sentence := + #2 'after.sentence := + #3 'after.block := +} +STRINGS { s t} +FUNCTION {output.nonnull} +{ 's := + output.state mid.sentence = + { ", " * write$ } + { output.state after.block = + { add.period$ write$ + newline$ + "\newblock " write$ + } + { output.state before.all = + 'write$ + { add.period$ " " * write$ } + if$ + } + if$ + mid.sentence 'output.state := + } + if$ + s +} +FUNCTION {output} +{ duplicate$ empty$ + 'pop$ + 'output.nonnull + if$ +} +FUNCTION {output.check} +{ 't := + duplicate$ empty$ + { pop$ "empty " t * " in " * cite$ * warning$ } + 'output.nonnull + if$ +} +FUNCTION {fin.entry} +{ add.period$ + write$ + newline$ +} + +FUNCTION {new.block} +{ output.state before.all = + 'skip$ + { after.block 'output.state := } + if$ +} +FUNCTION {new.sentence} +{ output.state after.block = + 'skip$ + { output.state before.all = + 'skip$ + { after.sentence 'output.state := } + if$ + } + if$ +} +FUNCTION {add.blank} +{ " " * before.all 'output.state := +} + +FUNCTION {date.block} +{ + new.block +} + +FUNCTION {not} +{ { #0 } + { #1 } + if$ +} +FUNCTION {and} +{ 'skip$ + { pop$ #0 } + if$ +} +FUNCTION {or} +{ { pop$ #1 } + 'skip$ + if$ +} +FUNCTION {new.block.checkb} +{ empty$ + swap$ empty$ + and + 'skip$ + 'new.block + if$ +} +FUNCTION {field.or.null} +{ duplicate$ empty$ + { pop$ "" } + 'skip$ + if$ +} +FUNCTION {emphasize} +{ duplicate$ empty$ + { pop$ "" } + { "\emph{" swap$ * "}" * } + if$ +} +FUNCTION {tie.or.space.prefix} +{ duplicate$ text.length$ #3 < + { "~" } + { " " } + if$ + swap$ +} + +FUNCTION {capitalize} +{ "u" change.case$ "t" change.case$ } + +FUNCTION {space.word} +{ " " swap$ * " " * } + % Here are the language-specific definitions for explicit words. + % Each function has a name bbl.xxx where xxx is the English word. + % The language selected here is ENGLISH +FUNCTION {bbl.and} +{ "and"} + +FUNCTION {bbl.etal} +{ "et~al." } + +FUNCTION {bbl.editors} +{ "eds." } + +FUNCTION {bbl.editor} +{ "ed." } + +FUNCTION {bbl.edby} +{ "edited by" } + +FUNCTION {bbl.edition} +{ "edition" } + +FUNCTION {bbl.volume} +{ "volume" } + +FUNCTION {bbl.of} +{ "of" } + +FUNCTION {bbl.number} +{ "number" } + +FUNCTION {bbl.nr} +{ "no." } + +FUNCTION {bbl.in} +{ "in" } + +FUNCTION {bbl.pages} +{ "" } + +FUNCTION {bbl.page} +{ "" } + +FUNCTION {bbl.chapter} +{ "chapter" } + +FUNCTION {bbl.techrep} +{ "Technical Report" } + +FUNCTION {bbl.mthesis} +{ "Master's thesis" } + +FUNCTION {bbl.phdthesis} +{ "Ph.D. thesis" } + +MACRO {jan} {"January"} + +MACRO {feb} {"February"} + +MACRO {mar} {"March"} + +MACRO {apr} {"April"} + +MACRO {may} {"May"} + +MACRO {jun} {"June"} + +MACRO {jul} {"July"} + +MACRO {aug} {"August"} + +MACRO {sep} {"September"} + +MACRO {oct} {"October"} + +MACRO {nov} {"November"} + +MACRO {dec} {"December"} + +MACRO {acmcs} {"ACM Computing Surveys"} + +MACRO {acta} {"Acta Informatica"} + +MACRO {cacm} {"Communications of the ACM"} + +MACRO {ibmjrd} {"IBM Journal of Research and Development"} + +MACRO {ibmsj} {"IBM Systems Journal"} + +MACRO {ieeese} {"IEEE Transactions on Software Engineering"} + +MACRO {ieeetc} {"IEEE Transactions on Computers"} + +MACRO {ieeetcad} + {"IEEE Transactions on Computer-Aided Design of Integrated Circuits"} + +MACRO {ipl} {"Information Processing Letters"} + +MACRO {jacm} {"Journal of the ACM"} + +MACRO {jcss} {"Journal of Computer and System Sciences"} + +MACRO {scp} {"Science of Computer Programming"} + +MACRO {sicomp} {"SIAM Journal on Computing"} + +MACRO {tocs} {"ACM Transactions on Computer Systems"} + +MACRO {tods} {"ACM Transactions on Database Systems"} + +MACRO {tog} {"ACM Transactions on Graphics"} + +MACRO {toms} {"ACM Transactions on Mathematical Software"} + +MACRO {toois} {"ACM Transactions on Office Information Systems"} + +MACRO {toplas} {"ACM Transactions on Programming Languages and Systems"} + +MACRO {tcs} {"Theoretical Computer Science"} +FUNCTION {bibinfo.check} +{ swap$ + duplicate$ missing$ + { + pop$ pop$ + "" + } + { duplicate$ empty$ + { + swap$ pop$ + } + { swap$ + pop$ + } + if$ + } + if$ +} +FUNCTION {bibinfo.warn} +{ swap$ + duplicate$ missing$ + { + swap$ "missing " swap$ * " in " * cite$ * warning$ pop$ + "" + } + { duplicate$ empty$ + { + swap$ "empty " swap$ * " in " * cite$ * warning$ + } + { swap$ + pop$ + } + if$ + } + if$ +} +FUNCTION {format.eprint} +{ eprint duplicate$ empty$ + 'skip$ + { archivePrefix duplicate$ empty$ + 'skip$ + { ":" * swap$ } + if$ + * "." * + } + if$ +} +INTEGERS { nameptr namesleft numnames } + + +STRINGS { bibinfo} + +FUNCTION {format.names} +{ 'bibinfo := + duplicate$ empty$ 'skip$ { + 's := + "" 't := + #1 'nameptr := + s num.names$ 'numnames := + numnames 'namesleft := + { namesleft #0 > } + { s nameptr + "{vv~}{ll}{, f.}{, jj}" + format.name$ + bibinfo bibinfo.check + 't := + nameptr #1 > + { + namesleft #1 > + { "; " * t * } + { + s nameptr "{ll}" format.name$ duplicate$ "others" = + { 't := } + { pop$ } + if$ + ";" * + t "others" = + { + " " * bbl.etal * + } + { + bbl.and + space.word * t * + } + if$ + } + if$ + } + 't + if$ + nameptr #1 + 'nameptr := + namesleft #1 - 'namesleft := + } + while$ + } if$ +} +FUNCTION {format.names.ed} +{ + format.names +} +FUNCTION {format.key} +{ empty$ + { key field.or.null } + { "" } + if$ +} + +FUNCTION {format.authors} +{ author "author" format.names +} +FUNCTION {get.bbl.editor} +{ editor num.names$ #1 > 'bbl.editors 'bbl.editor if$ } + +FUNCTION {format.editors} +{ editor "editor" format.names duplicate$ empty$ 'skip$ + { + "," * + " " * + get.bbl.editor + * + } + if$ +} +FUNCTION {format.isbn} +{ isbn "isbn" bibinfo.check + duplicate$ empty$ 'skip$ + { + new.block + "ISBN " swap$ * + } + if$ +} + +FUNCTION {format.note} +{ + note empty$ + { "" } + { note #1 #1 substring$ + duplicate$ "{" = + 'skip$ + { output.state mid.sentence = + { "l" } + { "u" } + if$ + change.case$ + } + if$ + note #2 global.max$ substring$ * "note" bibinfo.check + } + if$ +} + +FUNCTION {format.title} +{ title + "title" bibinfo.check +} +FUNCTION {format.full.names} +{'s := + "" 't := + #1 'nameptr := + s num.names$ 'numnames := + numnames 'namesleft := + { namesleft #0 > } + { s nameptr + "{vv~}{ll}" format.name$ + 't := + nameptr #1 > + { + namesleft #1 > + { ", " * t * } + { + s nameptr "{ll}" format.name$ duplicate$ "others" = + { 't := } + { pop$ } + if$ + t "others" = + { + " " * bbl.etal * + } + { + numnames #2 > + { "," * } + 'skip$ + if$ + bbl.and + space.word * t * + } + if$ + } + if$ + } + 't + if$ + nameptr #1 + 'nameptr := + namesleft #1 - 'namesleft := + } + while$ +} + +FUNCTION {author.editor.key.full} +{ author empty$ + { editor empty$ + { key empty$ + { cite$ #1 #3 substring$ } + 'key + if$ + } + { editor format.full.names } + if$ + } + { author format.full.names } + if$ +} + +FUNCTION {author.key.full} +{ author empty$ + { key empty$ + { cite$ #1 #3 substring$ } + 'key + if$ + } + { author format.full.names } + if$ +} + +FUNCTION {editor.key.full} +{ editor empty$ + { key empty$ + { cite$ #1 #3 substring$ } + 'key + if$ + } + { editor format.full.names } + if$ +} + +FUNCTION {make.full.names} +{ type$ "book" = + type$ "inbook" = + or + 'author.editor.key.full + { type$ "proceedings" = + 'editor.key.full + 'author.key.full + if$ + } + if$ +} + +FUNCTION {output.bibitem} +{ newline$ + "\bibitem[{" write$ + label write$ + ")" make.full.names duplicate$ short.list = + { pop$ } + { * } + if$ + "}]{" * write$ + cite$ write$ + "}" write$ + newline$ + "" + before.all 'output.state := +} + +FUNCTION {n.dashify} +{ + 't := + "" + { t empty$ not } + { t #1 #1 substring$ "-" = + { t #1 #2 substring$ "--" = not + { "--" * + t #2 global.max$ substring$ 't := + } + { { t #1 #1 substring$ "-" = } + { "-" * + t #2 global.max$ substring$ 't := + } + while$ + } + if$ + } + { t #1 #1 substring$ * + t #2 global.max$ substring$ 't := + } + if$ + } + while$ +} + +FUNCTION {word.in} +{ bbl.in capitalize + " " * } + +FUNCTION {format.date} +{ year "year" bibinfo.check duplicate$ empty$ + { + "empty year in " cite$ * "; set to ????" * warning$ + pop$ "????" + } + 'skip$ + if$ + extra.label * + before.all 'output.state := + after.sentence 'output.state := +} +FUNCTION {format.btitle} +{ title "title" bibinfo.check + duplicate$ empty$ 'skip$ + { + emphasize + } + if$ +} +FUNCTION {either.or.check} +{ empty$ + 'pop$ + { "can't use both " swap$ * " fields in " * cite$ * warning$ } + if$ +} +FUNCTION {format.bvolume} +{ volume empty$ + { "" } + { bbl.volume volume tie.or.space.prefix + "volume" bibinfo.check * * + series "series" bibinfo.check + duplicate$ empty$ 'pop$ + { swap$ bbl.of space.word * swap$ + emphasize * } + if$ + "volume and number" number either.or.check + } + if$ +} +FUNCTION {format.number.series} +{ volume empty$ + { number empty$ + { series field.or.null } + { series empty$ + { number "number" bibinfo.check } + { output.state mid.sentence = + { bbl.number } + { bbl.number capitalize } + if$ + number tie.or.space.prefix "number" bibinfo.check * * + bbl.in space.word * + series "series" bibinfo.check * + } + if$ + } + if$ + } + { "" } + if$ +} + +FUNCTION {format.edition} +{ edition duplicate$ empty$ 'skip$ + { + output.state mid.sentence = + { "l" } + { "t" } + if$ change.case$ + "edition" bibinfo.check + " " * bbl.edition * + } + if$ +} +INTEGERS { multiresult } +FUNCTION {multi.page.check} +{ 't := + #0 'multiresult := + { multiresult not + t empty$ not + and + } + { t #1 #1 substring$ + duplicate$ "-" = + swap$ duplicate$ "," = + swap$ "+" = + or or + { #1 'multiresult := } + { t #2 global.max$ substring$ 't := } + if$ + } + while$ + multiresult +} +FUNCTION {format.pages} +{ pages duplicate$ empty$ 'skip$ + { duplicate$ multi.page.check + { + n.dashify + } + { + } + if$ + "pages" bibinfo.check + } + if$ +} +FUNCTION {format.journal.pages} +{ pages duplicate$ empty$ 'pop$ + { swap$ duplicate$ empty$ + { pop$ pop$ format.pages } + { + ": " * + swap$ + n.dashify + "pages" bibinfo.check + * + } + if$ + } + if$ +} +FUNCTION {format.journal.eid} +{ eid "eid" bibinfo.check + duplicate$ empty$ 'pop$ + { swap$ duplicate$ empty$ 'skip$ + { + ": " * + } + if$ + swap$ * + } + if$ +} +FUNCTION {format.vol.num.pages} +{ volume field.or.null + duplicate$ empty$ 'skip$ + { + "volume" bibinfo.check + } + if$ + number "number" bibinfo.check duplicate$ empty$ 'skip$ + { + swap$ duplicate$ empty$ + { "there's a number but no volume in " cite$ * warning$ } + 'skip$ + if$ + swap$ + "(" swap$ * ")" * + } + if$ * + eid empty$ + { format.journal.pages } + { format.journal.eid } + if$ +} + +FUNCTION {format.chapter.pages} +{ chapter empty$ + 'format.pages + { type empty$ + { bbl.chapter } + { type "l" change.case$ + "type" bibinfo.check + } + if$ + chapter tie.or.space.prefix + "chapter" bibinfo.check + * * + pages empty$ + 'skip$ + { ", " * format.pages * } + if$ + } + if$ +} + +FUNCTION {format.booktitle} +{ + booktitle "booktitle" bibinfo.check + emphasize +} +FUNCTION {format.in.ed.booktitle} +{ format.booktitle duplicate$ empty$ 'skip$ + { + editor "editor" format.names.ed duplicate$ empty$ 'pop$ + { + "," * + " " * + get.bbl.editor + ", " * + * swap$ + * } + if$ + word.in swap$ * + } + if$ +} +FUNCTION {format.thesis.type} +{ type duplicate$ empty$ + 'pop$ + { swap$ pop$ + "t" change.case$ "type" bibinfo.check + } + if$ +} +FUNCTION {format.tr.number} +{ number "number" bibinfo.check + type duplicate$ empty$ + { pop$ bbl.techrep } + 'skip$ + if$ + "type" bibinfo.check + swap$ duplicate$ empty$ + { pop$ "t" change.case$ } + { tie.or.space.prefix * * } + if$ +} +FUNCTION {format.article.crossref} +{ + word.in + " \cite{" * crossref * "}" * +} +FUNCTION {format.book.crossref} +{ volume duplicate$ empty$ + { "empty volume in " cite$ * "'s crossref of " * crossref * warning$ + pop$ word.in + } + { bbl.volume + capitalize + swap$ tie.or.space.prefix "volume" bibinfo.check * * bbl.of space.word * + } + if$ + " \cite{" * crossref * "}" * +} +FUNCTION {format.incoll.inproc.crossref} +{ + word.in + " \cite{" * crossref * "}" * +} +FUNCTION {format.org.or.pub} +{ 't := + "" + address empty$ t empty$ and + 'skip$ + { + address "address" bibinfo.check * + t empty$ + 'skip$ + { address empty$ + 'skip$ + { ": " * } + if$ + t * + } + if$ + } + if$ +} +FUNCTION {format.publisher.address} +{ publisher "publisher" bibinfo.warn format.org.or.pub +} + +FUNCTION {format.organization.address} +{ organization "organization" bibinfo.check format.org.or.pub +} + +FUNCTION {article} +{ output.bibitem + format.authors "author" output.check + author format.key output + format.date "year" output.check + date.block + format.title "title" output.check + new.block + crossref missing$ + { + journal + "journal" bibinfo.check + emphasize + "journal" output.check + format.vol.num.pages output + } + { format.article.crossref output.nonnull + format.pages output + } + if$ + new.block + format.note output + fin.entry +} +FUNCTION {book} +{ output.bibitem + author empty$ + { format.editors "author and editor" output.check + editor format.key output + } + { format.authors output.nonnull + crossref missing$ + { "author and editor" editor either.or.check } + 'skip$ + if$ + } + if$ + format.date "year" output.check + date.block + format.btitle "title" output.check + crossref missing$ + { format.bvolume output + new.block + format.number.series output + new.sentence + format.publisher.address output + } + { + new.block + format.book.crossref output.nonnull + } + if$ + format.edition output + format.isbn output + new.block + format.note output + fin.entry +} +FUNCTION {booklet} +{ output.bibitem + format.authors output + author format.key output + format.date "year" output.check + date.block + format.title "title" output.check + new.block + howpublished "howpublished" bibinfo.check output + address "address" bibinfo.check output + format.isbn output + new.block + format.note output + fin.entry +} + +FUNCTION {inbook} +{ output.bibitem + author empty$ + { format.editors "author and editor" output.check + editor format.key output + } + { format.authors output.nonnull + crossref missing$ + { "author and editor" editor either.or.check } + 'skip$ + if$ + } + if$ + format.date "year" output.check + date.block + format.btitle "title" output.check + crossref missing$ + { + format.bvolume output + format.chapter.pages "chapter and pages" output.check + new.block + format.number.series output + new.sentence + format.publisher.address output + } + { + format.chapter.pages "chapter and pages" output.check + new.block + format.book.crossref output.nonnull + } + if$ + format.edition output + crossref missing$ + { format.isbn output } + 'skip$ + if$ + new.block + format.note output + fin.entry +} + +FUNCTION {incollection} +{ output.bibitem + format.authors "author" output.check + author format.key output + format.date "year" output.check + date.block + format.title "title" output.check + new.block + crossref missing$ + { format.in.ed.booktitle "booktitle" output.check + format.bvolume output + format.number.series output + format.chapter.pages output + new.sentence + format.publisher.address output + format.edition output + format.isbn output + } + { format.incoll.inproc.crossref output.nonnull + format.chapter.pages output + } + if$ + new.block + format.note output + fin.entry +} +FUNCTION {inproceedings} +{ output.bibitem + format.authors "author" output.check + author format.key output + format.date "year" output.check + date.block + format.title "title" output.check + new.block + crossref missing$ + { format.in.ed.booktitle "booktitle" output.check + format.bvolume output + format.number.series output + format.pages output + new.sentence + publisher empty$ + { format.organization.address output } + { organization "organization" bibinfo.check output + format.publisher.address output + } + if$ + format.isbn output + } + { format.incoll.inproc.crossref output.nonnull + format.pages output + } + if$ + new.block + format.note output + fin.entry +} +FUNCTION {conference} { inproceedings } +FUNCTION {manual} +{ output.bibitem + format.authors output + author format.key output + format.date "year" output.check + date.block + format.btitle "title" output.check + organization address new.block.checkb + organization "organization" bibinfo.check output + address "address" bibinfo.check output + format.edition output + new.block + format.note output + fin.entry +} + +FUNCTION {mastersthesis} +{ output.bibitem + format.authors "author" output.check + author format.key output + format.date "year" output.check + date.block + format.btitle + "title" output.check + new.block + bbl.mthesis format.thesis.type output.nonnull + school "school" bibinfo.warn output + address "address" bibinfo.check output + new.block + format.note output + fin.entry +} + +FUNCTION {misc} +{ output.bibitem + format.authors output + author format.key output + format.date "year" output.check + date.block + format.title output + new.block + howpublished "howpublished" bibinfo.check output + new.block + format.note output + format.eprint output + fin.entry +} +FUNCTION {phdthesis} +{ output.bibitem + format.authors "author" output.check + author format.key output + format.date "year" output.check + date.block + format.btitle + "title" output.check + new.block + bbl.phdthesis format.thesis.type output.nonnull + school "school" bibinfo.warn output + address "address" bibinfo.check output + new.block + format.note output + fin.entry +} + +FUNCTION {proceedings} +{ output.bibitem + format.editors output + editor format.key output + format.date "year" output.check + date.block + format.btitle "title" output.check + format.bvolume output + format.number.series output + new.sentence + publisher empty$ + { format.organization.address output } + { organization "organization" bibinfo.check output + format.publisher.address output + } + if$ + format.isbn output + new.block + format.note output + fin.entry +} + +FUNCTION {techreport} +{ output.bibitem + format.authors "author" output.check + author format.key output + format.date "year" output.check + date.block + format.title + "title" output.check + new.block + format.tr.number output.nonnull + institution "institution" bibinfo.warn output + address "address" bibinfo.check output + new.block + format.note output + fin.entry +} + +FUNCTION {unpublished} +{ output.bibitem + format.authors "author" output.check + author format.key output + format.date "year" output.check + date.block + format.title "title" output.check + new.block + format.note "note" output.check + fin.entry +} + +FUNCTION {default.type} { misc } +READ +FUNCTION {sortify} +{ purify$ + "l" change.case$ +} +INTEGERS { len } +FUNCTION {chop.word} +{ 's := + 'len := + s #1 len substring$ = + { s len #1 + global.max$ substring$ } + 's + if$ +} +FUNCTION {format.lab.names} +{'s := + "" 't := + #1 'nameptr := + s num.names$ 'numnames := + numnames 'namesleft := + { namesleft #0 > } + { s nameptr + "{vv~}{ll}" format.name$ + 't := + nameptr #1 > + { + nameptr #2 = + numnames #3 > and + { "others" 't := + #1 'namesleft := } + 'skip$ + if$ + namesleft #1 > + { ", " * t * } + { + s nameptr "{ll}" format.name$ duplicate$ "others" = + { 't := } + { pop$ } + if$ + t "others" = + { + " " * bbl.etal * + } + { + numnames #2 > + { "," * } + 'skip$ + if$ + bbl.and + space.word * t * + } + if$ + } + if$ + } + 't + if$ + nameptr #1 + 'nameptr := + namesleft #1 - 'namesleft := + } + while$ +} + +FUNCTION {author.key.label} +{ author empty$ + { key empty$ + { cite$ #1 #3 substring$ } + 'key + if$ + } + { author format.lab.names } + if$ +} + +FUNCTION {author.editor.key.label} +{ author empty$ + { editor empty$ + { key empty$ + { cite$ #1 #3 substring$ } + 'key + if$ + } + { editor format.lab.names } + if$ + } + { author format.lab.names } + if$ +} + +FUNCTION {editor.key.label} +{ editor empty$ + { key empty$ + { cite$ #1 #3 substring$ } + 'key + if$ + } + { editor format.lab.names } + if$ +} + +FUNCTION {calc.short.authors} +{ type$ "book" = + type$ "inbook" = + or + 'author.editor.key.label + { type$ "proceedings" = + 'editor.key.label + 'author.key.label + if$ + } + if$ + 'short.list := +} + +FUNCTION {calc.label} +{ calc.short.authors + short.list + "(" + * + year duplicate$ empty$ + short.list key field.or.null = or + { pop$ "" } + 'skip$ + if$ + * + 'label := +} + +FUNCTION {sort.format.names} +{ 's := + #1 'nameptr := + "" + s num.names$ 'numnames := + numnames 'namesleft := + { namesleft #0 > } + { s nameptr + "{vv{ } }{ll{ }}{ f{ }}{ jj{ }}" + format.name$ 't := + nameptr #1 > + { + " " * + namesleft #1 = t "others" = and + { "zzzzz" 't := } + 'skip$ + if$ + t sortify * + } + { t sortify * } + if$ + nameptr #1 + 'nameptr := + namesleft #1 - 'namesleft := + } + while$ +} + +FUNCTION {sort.format.title} +{ 't := + "A " #2 + "An " #3 + "The " #4 t chop.word + chop.word + chop.word + sortify + #1 global.max$ substring$ +} +FUNCTION {author.sort} +{ author empty$ + { key empty$ + { "to sort, need author or key in " cite$ * warning$ + "" + } + { key sortify } + if$ + } + { author sort.format.names } + if$ +} +FUNCTION {author.editor.sort} +{ author empty$ + { editor empty$ + { key empty$ + { "to sort, need author, editor, or key in " cite$ * warning$ + "" + } + { key sortify } + if$ + } + { editor sort.format.names } + if$ + } + { author sort.format.names } + if$ +} +FUNCTION {editor.sort} +{ editor empty$ + { key empty$ + { "to sort, need editor or key in " cite$ * warning$ + "" + } + { key sortify } + if$ + } + { editor sort.format.names } + if$ +} +FUNCTION {presort} +{ calc.label + label sortify + " " + * + type$ "book" = + type$ "inbook" = + or + 'author.editor.sort + { type$ "proceedings" = + 'editor.sort + 'author.sort + if$ + } + if$ + #1 entry.max$ substring$ + 'sort.label := + sort.label + * + " " + * + title field.or.null + sort.format.title + * + #1 entry.max$ substring$ + 'sort.key$ := +} + +ITERATE {presort} +SORT +STRINGS { last.label next.extra } +INTEGERS { last.extra.num last.extra.num.extended last.extra.num.blank number.label } +FUNCTION {initialize.extra.label.stuff} +{ #0 int.to.chr$ 'last.label := + "" 'next.extra := + #0 'last.extra.num := + "a" chr.to.int$ #1 - 'last.extra.num.blank := + last.extra.num.blank 'last.extra.num.extended := + #0 'number.label := +} +FUNCTION {forward.pass} +{ last.label label = + { last.extra.num #1 + 'last.extra.num := + last.extra.num "z" chr.to.int$ > + { "a" chr.to.int$ 'last.extra.num := + last.extra.num.extended #1 + 'last.extra.num.extended := + } + 'skip$ + if$ + last.extra.num.extended last.extra.num.blank > + { last.extra.num.extended int.to.chr$ + last.extra.num int.to.chr$ + * 'extra.label := } + { last.extra.num int.to.chr$ 'extra.label := } + if$ + } + { "a" chr.to.int$ 'last.extra.num := + "" 'extra.label := + label 'last.label := + } + if$ + number.label #1 + 'number.label := +} +FUNCTION {reverse.pass} +{ next.extra "b" = + { "a" 'extra.label := } + 'skip$ + if$ + extra.label 'next.extra := + extra.label + duplicate$ empty$ + 'skip$ + { "{\natexlab{" swap$ * "}}" * } + if$ + 'extra.label := + label extra.label * 'label := +} +EXECUTE {initialize.extra.label.stuff} +ITERATE {forward.pass} +REVERSE {reverse.pass} +FUNCTION {bib.sort.order} +{ sort.label + " " + * + year field.or.null sortify + * + " " + * + title field.or.null + sort.format.title + * + #1 entry.max$ substring$ + 'sort.key$ := +} +ITERATE {bib.sort.order} +SORT +FUNCTION {begin.bib} +{ preamble$ empty$ + 'skip$ + { preamble$ write$ newline$ } + if$ + "\begin{thebibliography}{" number.label int.to.str$ * "}" * + write$ newline$ + "\providecommand{\natexlab}[1]{#1}" + write$ newline$ +} +EXECUTE {begin.bib} +EXECUTE {init.state.consts} +ITERATE {call.type$} +FUNCTION {end.bib} +{ newline$ + "\end{thebibliography}" write$ newline$ +} +EXECUTE {end.bib} +%% End of customized bst file +%% +%% End of file `aaai22.bst'. diff --git b/Apendix/aaai24.sty a/Apendix/aaai24.sty new file mode 100644 index 0000000..a68f603 --- /dev/null +++ a/Apendix/aaai24.sty @@ -0,0 +1,303 @@ +\NeedsTeXFormat{LaTeX2e}% +\ProvidesPackage{aaai24}[2023/06/26 AAAI 2024 Submission format]% +\def\year{2024}% +\typeout{Conference Style for AAAI for LaTeX 2e -- version for submission}% +% +\def\copyright@on{T} +\def\showauthors@on{T} +\def\nocopyright{\gdef\copyright@on{}} % Copyright notice is required for camera-ready only. +\DeclareOption{submission}{% + \gdef\copyright@on{}% + \gdef\showauthors@on{}% + \long\gdef\pdfinfo #1{\relax}% +}% +\ProcessOptions\relax% +% WARNING: IF YOU ARE USING THIS STYLE SHEET FOR AN AAAI PUBLICATION, YOU +% MAY NOT MODIFY IT FOR ANY REASON. MODIFICATIONS (IN YOUR SOURCE +% OR IN THIS STYLE SHEET WILL RESULT IN REJECTION OF YOUR PAPER). +% +% WARNING: This style is NOT guaranteed to work. It is provided in the +% hope that it might make the preparation of papers easier, but this style +% file is provided "as is" without warranty of any kind, either express or +% implied, including but not limited to the implied warranties of +% merchantability, fitness for a particular purpose, or noninfringement. +% You use this style file at your own risk. Standard disclaimers apply. +% There are undoubtably bugs in this style. If you would like to submit +% bug fixes, improvements, etc. please let us know. Please use the contact form +% at www.aaai.org. +% +% Do not use this file unless you are an experienced LaTeX user. +% +% PHYSICAL PAGE LAYOUT +\setlength\topmargin{-0.25in} \setlength\oddsidemargin{-0.25in} +\setlength\textheight{9.0in} \setlength\textwidth{7.0in} +\setlength\columnsep{0.375in} \newlength\titlebox \setlength\titlebox{2.25in} +\setlength\headheight{0pt} \setlength\headsep{0pt} +%\setlength\footheight{0pt} \setlength\footskip{0pt} +\thispagestyle{empty} \pagestyle{empty} +\flushbottom \twocolumn \sloppy +% We're never going to need a table of contents, so just flush it to +% save space --- suggested by drstrip@sandia-2 +\def\addcontentsline#1#2#3{} +% gf: PRINT COPYRIGHT NOTICE +\def\copyright@year{\number\year} +\def\copyright@text{Copyright \copyright\space \copyright@year, +Association for the Advancement of Artificial Intelligence (www.aaai.org). +All rights reserved.} +\def\copyrighttext#1{\gdef\copyright@on{T}\gdef\copyright@text{#1}} +\def\copyrightyear#1{\gdef\copyright@on{T}\gdef\copyright@year{#1}} +% gf: End changes for copyright notice (used in \maketitle, below) +% Title stuff, taken from deproc. +% +\def\maketitle{% + \par% + \begingroup % to make the footnote style local to the title + \def\thefootnote{\fnsymbol{footnote}} + \twocolumn[\@maketitle] \@thanks% + \endgroup% + % Insert copyright slug unless turned off + \if T\copyright@on\insert\footins{\noindent\footnotesize\copyright@text}\fi% + % + \setcounter{footnote}{0}% + \let\maketitle\relax% + \let\@maketitle\relax% + \gdef\@thanks{}% + \gdef\@author{}% + \gdef\@title{}% + \let\thanks\relax% +}% +\long\gdef\affiliations #1{ \def \affiliations_{\if T\showauthors@on#1\fi}}% +% +\def\@maketitle{% + \def\theauthors{\if T\showauthors@on\@author\else Anonymous submission\fi} + \newcounter{eqfn}\setcounter{eqfn}{0}% + \newsavebox{\titlearea} + \sbox{\titlearea}{ + \let\footnote\relax\let\thanks\relax% + \setcounter{footnote}{0}% + \def\equalcontrib{% + \ifnum\value{eqfn}=0% + \footnote{These authors contributed equally.}% + \setcounter{eqfn}{\value{footnote}}% + \else% + \footnotemark[\value{eqfn}]% + \fi% + }% + \vbox{% + \hsize\textwidth% + \linewidth\hsize% + \vskip 0.625in minus 0.125in% + \centering% + {\LARGE\bf \@title \par}% + \vskip 0.1in plus 0.5fil minus 0.05in% + {\Large{\textbf{\theauthors\ifhmode\\\fi}}}% + \vskip .2em plus 0.25fil% + {\normalsize \affiliations_\ifhmode\\\fi}% + \vskip .5em plus 2fil% + }% + }% +% + \newlength\actualheight% + \settoheight{\actualheight}{\usebox{\titlearea}}% + \ifdim\actualheight>\titlebox% + \setlength{\titlebox}{\actualheight}% + \fi% +% + \vbox to \titlebox {% + \let\footnote\thanks\relax% + \setcounter{footnote}{0}% + \def\equalcontrib{% + \ifnum\value{eqfn}=0% + \footnote{These authors contributed equally.}% + \setcounter{eqfn}{\value{footnote}}% + \else% + \footnotemark[\value{eqfn}]% + \fi% + }% + \hsize\textwidth% + \linewidth\hsize% + \vskip 0.625in minus 0.125in% + \centering% + {\LARGE\bf \@title \par}% + \vskip 0.1in plus 0.5fil minus 0.05in% + {\Large{\textbf{\theauthors\ifhmode\\\fi}}}% + \vskip .2em plus 0.25fil% + {\normalsize \affiliations_\ifhmode\\\fi}% + \vskip .5em plus 2fil% + }% +}% +% +\renewenvironment{abstract}{% + \centerline{\bf Abstract}% + \vspace{0.5ex}% + \setlength{\leftmargini}{10pt}% + \begin{quote}% + \small% +}{% + \par% + \end{quote}% + \vskip 1ex% +}% +% jsp added: +\def\pubnote#1{ + \thispagestyle{myheadings}% + \pagestyle{myheadings}% + \markboth{#1}{#1}% + \setlength\headheight{10pt}% + \setlength\headsep{10pt}% +}% +% +% SECTIONS with less space +\def\section{\@startsection {section}{1}{\z@}{-2.0ex plus +-0.5ex minus -.2ex}{3pt plus 2pt minus 1pt}{\Large\bf\centering}} +\def\subsection{\@startsection{subsection}{2}{\z@}{-2.0ex plus +-0.5ex minus -.2ex}{3pt plus 2pt minus 1pt}{\large\bf\raggedright}} +\def\subsubsection{\@startsection{subparagraph}{3}{\z@}{-6pt plus +%%% DIEGO changed: 29/11/2009 +%% 2pt minus 1pt}{-1em}{\normalsize\bf}} +-2pt minus -1pt}{-1em}{\normalsize\bf}} +%%% END changed +\renewcommand\paragraph{\@startsection{paragraph}{4}{\z@}{-6pt plus -2pt minus -1pt}{-1em}{\normalsize\bf}}% +\setcounter{secnumdepth}{0} +% add period to section (but not subsection) numbers, reduce space after +%\renewcommand{\thesection} +% {\arabic{section}.\hskip-0.6em} +%\renewcommand{\thesubsection} +% {\arabic{section}.\arabic{subsection}\hskip-0.6em} +% FOOTNOTES +\footnotesep 6.65pt % +\skip\footins 9pt plus 4pt minus 2pt +\def\footnoterule{\kern-3pt \hrule width 5pc \kern 2.6pt } +\setcounter{footnote}{0} +% LISTS AND PARAGRAPHS +\parindent 10pt +\topsep 4pt plus 1pt minus 2pt +\partopsep 1pt plus 0.5pt minus 0.5pt +\itemsep 0.5pt plus 1pt minus 0.5pt +\parsep 2pt plus 1pt minus 0.5pt +\leftmargin 10pt \leftmargini 13pt \leftmarginii 10pt \leftmarginiii 5pt \leftmarginiv 5pt \leftmarginv 5pt \leftmarginvi 5pt +\labelwidth\leftmargini\advance\labelwidth-\labelsep \labelsep 5pt +\def\@listi{\leftmargin\leftmargini} +\def\@listii{\leftmargin\leftmarginii +\labelwidth\leftmarginii\advance\labelwidth-\labelsep +\topsep 2pt plus 1pt minus 0.5pt +\parsep 1pt plus 0.5pt minus 0.5pt +\itemsep \parsep} +\def\@listiii{\leftmargin\leftmarginiii +\labelwidth\leftmarginiii\advance\labelwidth-\labelsep +\topsep 1pt plus 0.5pt minus 0.5pt +\parsep \z@ +\partopsep 0.5pt plus 0pt minus 0.5pt +\itemsep \topsep} +\def\@listiv{\leftmargin\leftmarginiv +\labelwidth\leftmarginiv\advance\labelwidth-\labelsep} +\def\@listv{\leftmargin\leftmarginv +\labelwidth\leftmarginv\advance\labelwidth-\labelsep} +\def\@listvi{\leftmargin\leftmarginvi +\labelwidth\leftmarginvi\advance\labelwidth-\labelsep} +\abovedisplayskip 7pt plus2pt minus5pt% +\belowdisplayskip \abovedisplayskip +\abovedisplayshortskip 0pt plus3pt% +\belowdisplayshortskip 4pt plus3pt minus3pt% +% Less leading in most fonts (due to the narrow columns) +% The choices were between 1-pt and 1.5-pt leading +\def\normalsize{\@setfontsize\normalsize\@xpt{11}} % 10 point on 11 +\def\small{\@setfontsize\small\@ixpt{10}} % 9 point on 10 +\def\footnotesize{\@setfontsize\footnotesize\@ixpt{10}} % 9 point on 10 +\def\scriptsize{\@setfontsize\scriptsize\@viipt{10}} % 7 point on 8 +\def\tiny{\@setfontsize\tiny\@vipt{7}} % 6 point on 7 +\def\large{\@setfontsize\large\@xipt{12}} % 11 point on 12 +\def\Large{\@setfontsize\Large\@xiipt{14}} % 12 point on 14 +\def\LARGE{\@setfontsize\LARGE\@xivpt{16}} % 14 point on 16 +\def\huge{\@setfontsize\huge\@xviipt{20}} % 17 point on 20 +\def\Huge{\@setfontsize\Huge\@xxpt{23}} % 20 point on 23 + +\AtBeginDocument{% + \@ifpackageloaded{natbib}% + {% + % When natbib is in use, set the proper style and fix a few things + \let\cite\citep + \let\shortcite\citeyearpar + \setcitestyle{aysep={}} + \setlength\bibhang{0pt} + \bibliographystyle{aaai24} + }{}% + \@ifpackageloaded{hyperref}% + {% + \PackageError{aaai}{You must not use hyperref in AAAI papers.}{You (or one of the packages you imported) are importing the hyperref package, which is forbidden in AAAI papers. You must remove it from the paper to proceed.} + }{}% + \@ifpackageloaded{bbm}% + {% + \PackageError{aaai}{You must not use bbm package in AAAI papers because it introduces Type 3 fonts which are forbidden.}{See https://tex.stackexchange.com/questions/479160/a-replacement-to-mathbbm1-with-type-1-fonts for possible alternatives.} + }{}% + \@ifpackageloaded{authblk}% + {% + \PackageError{aaai}{Package authblk is forbbidden.}{Package authblk is forbbiden. You must find an alternative.} + }{}% + \@ifpackageloaded{balance}% + {% + \PackageError{aaai}{Package balance is forbbidden.}{Package balance is forbbiden. You must find an alternative.} + }{}% + \@ifpackageloaded{CJK}% + {% + \PackageError{aaai}{Package CJK is forbbidden.}{Package CJK is forbbiden. You must find an alternative.} + }{}% + \@ifpackageloaded{flushend}% + {% + \PackageError{aaai}{Package flushend is forbbidden.}{Package flushend is forbbiden. You must find an alternative.} + }{}% + \@ifpackageloaded{fontenc}% + {% + \PackageError{aaai}{Package fontenc is forbbidden.}{Package fontenc is forbbiden. You must find an alternative.} + }{}% + \@ifpackageloaded{fullpage}% + {% + \PackageError{aaai}{Package fullpage is forbbidden.}{Package fullpage is forbbiden. You must find an alternative.} + }{}% + \@ifpackageloaded{geometry}% + {% + \PackageError{aaai}{Package geometry is forbbidden.}{Package geometry is forbbiden. You must find an alternative.} + }{}% + \@ifpackageloaded{grffile}% + {% + \PackageError{aaai}{Package grffile is forbbidden.}{Package grffile is forbbiden. You must find an alternative.} + }{}% + \@ifpackageloaded{navigator}% + {% + \PackageError{aaai}{Package navigator is forbbidden.}{Package navigator is forbbiden. You must find an alternative.} + }{}% + \@ifpackageloaded{savetrees}% + {% + \PackageError{aaai}{Package savetrees is forbbidden.}{Package savetrees is forbbiden. You must find an alternative.} + }{}% + \@ifpackageloaded{setspace}% + {% + \PackageError{aaai}{Package setspace is forbbidden.}{Package setspace is forbbiden. You must find an alternative.} + }{}% + \@ifpackageloaded{stfloats}% + {% + \PackageError{aaai}{Package stfloats is forbbidden.}{Package stfloats is forbbiden. You must find an alternative.} + }{}% + \@ifpackageloaded{tabu}% + {% + \PackageError{aaai}{Package tabu is forbbidden.}{Package tabu is forbbiden. You must find an alternative.} + }{}% + \@ifpackageloaded{titlesec}% + {% + \PackageError{aaai}{Package titlesec is forbbidden.}{Package titlesec is forbbiden. You must find an alternative.} + }{}% + \@ifpackageloaded{tocbibind}% + {% + \PackageError{aaai}{Package tocbibind is forbbidden.}{Package tocbibind is forbbiden. You must find an alternative.} + }{}% + \@ifpackageloaded{ulem}% + {% + \PackageError{aaai}{Package ulem is forbbidden.}{Package ulem is forbbiden. You must find an alternative.} + }{}% + \@ifpackageloaded{wrapfig}% + {% + \PackageError{aaai}{Package wrapfig is forbbidden.}{Package wrapfig is forbbiden. You must find an alternative.} + }{}% +} + +\let\endthebibliography=\endlist diff --git b/Apendix/anonymous-submission-latex-2024.aux a/Apendix/anonymous-submission-latex-2024.aux new file mode 100644 index 0000000..ef4b422 --- /dev/null +++ a/Apendix/anonymous-submission-latex-2024.aux @@ -0,0 +1,55 @@ +\relax +\bibstyle{aaai24} +\citation{sutton2009fast} +\citation{hirsch1989convergent} +\citation{borkar2000ode} +\citation{borkar2000ode} +\citation{borkar2000ode} +\citation{hirsch1989convergent} +\newlabel{proofth2}{{A.1}{1}} +\newlabel{thetavmtdcFastest}{{A-1}{1}} +\newlabel{uvmtdcFastest}{{A-2}{1}} +\newlabel{omegavmtdcFastest}{{A-3}{1}} +\newlabel{omegavmtdcFastestFinal}{{A-4}{1}} +\newlabel{omegavmtdcInfty}{{A-5}{1}} +\newlabel{thetavmtdcFaster}{{A-6}{1}} +\citation{borkar2000ode} +\citation{borkar2000ode} +\citation{borkar2000ode} +\citation{borkar1997stochastic} +\newlabel{uvmtdcFaster}{{A-7}{2}} +\newlabel{uvmtdcFasterFinal}{{A-8}{2}} +\newlabel{uvmtdcInfty}{{A-9}{2}} +\newlabel{thetavmtdcSlowerFinal}{{A-11}{2}} +\newlabel{odethetavmtdcfinal}{{A-12}{2}} +\citation{hirsch1989convergent} +\citation{borkar2000ode} +\citation{borkar2000ode} +\citation{borkar2000ode} +\newlabel{proofVMETD}{{A.2}{3}} +\newlabel{th1proof}{{A.2}{3}} +\newlabel{thetaFast}{{A-13}{3}} +\newlabel{omegaFast}{{A-14}{3}} +\newlabel{omegaFastFinal}{{A-15}{3}} +\newlabel{omegaInfty}{{A-16}{3}} +\citation{sutton2016emphatic} +\newlabel{odetheta}{{A-17}{4}} +\newlabel{rowsum}{{A-20}{4}} +\newlabel{columnsum}{{A-21}{5}} +\newlabel{odethetafinal}{{A-22}{5}} +\newlabel{mathematicalanalysis}{{B}{5}} +\providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}} +\newlabel{keymatrices}{{1}{5}} +\newlabel{minimumeigenvalues}{{2}{5}} +\newlabel{experimentaldetails}{{C}{5}} +\newlabel{bairdcounterexample}{{\caption@xref {bairdcounterexample}{ on input line 731}}{6}} +\newlabel{randomwalk}{{\caption@xref {randomwalk}{ on input line 754}}{6}} +\newlabel{boyanchain}{{\caption@xref {boyanchain}{ on input line 777}}{6}} +\bibdata{aaai24} +\bibcite{borkar1997stochastic}{{1}{1997}{{Borkar}}{{}}} +\bibcite{borkar2000ode}{{2}{2000}{{Borkar and Meyn}}{{}}} +\bibcite{hirsch1989convergent}{{3}{1989}{{Hirsch}}{{}}} +\bibcite{sutton2009fast}{{4}{2009}{{Sutton et~al.}}{{Sutton, Maei, Precup, Bhatnagar, Silver, Szepesv{\'a}ri, and Wiewiora}}} +\bibcite{sutton2016emphatic}{{5}{2016}{{Sutton, Mahmood, and White}}{{}}} +\newlabel{lrofways}{{6}{7}} +\gdef \@abspage@last{7} diff --git b/Apendix/anonymous-submission-latex-2024.bbl a/Apendix/anonymous-submission-latex-2024.bbl new file mode 100644 index 0000000..8bd13dd --- /dev/null +++ a/Apendix/anonymous-submission-latex-2024.bbl @@ -0,0 +1,29 @@ +\begin{thebibliography}{5} +\providecommand{\natexlab}[1]{#1} + +\bibitem[{Borkar(1997)}]{borkar1997stochastic} +Borkar, V.~S. 1997. +\newblock Stochastic approximation with two time scales. +\newblock \emph{Syst. \& Control Letters}, 29(5): 291--294. + +\bibitem[{Borkar and Meyn(2000)}]{borkar2000ode} +Borkar, V.~S.; and Meyn, S.~P. 2000. +\newblock The ODE method for convergence of stochastic approximation and reinforcement learning. +\newblock \emph{SIAM J. Control Optim.}, 38(2): 447--469. + +\bibitem[{Hirsch(1989)}]{hirsch1989convergent} +Hirsch, M.~W. 1989. +\newblock Convergent activation dynamics in continuous time networks. +\newblock \emph{Neural Netw.}, 2(5): 331--349. + +\bibitem[{Sutton et~al.(2009)Sutton, Maei, Precup, Bhatnagar, Silver, Szepesv{\'a}ri, and Wiewiora}]{sutton2009fast} +Sutton, R.; Maei, H.; Precup, D.; Bhatnagar, S.; Silver, D.; Szepesv{\'a}ri, C.; and Wiewiora, E. 2009. +\newblock Fast gradient-descent methods for temporal-difference learning with linear function approximation. +\newblock In \emph{Proc. 26th Int. Conf. Mach. Learn.}, 993--1000. + +\bibitem[{Sutton, Mahmood, and White(2016)}]{sutton2016emphatic} +Sutton, R.~S.; Mahmood, A.~R.; and White, M. 2016. +\newblock An emphatic approach to the problem of off-policy temporal-difference learning. +\newblock \emph{The Journal of Machine Learning Research}, 17(1): 2603--2631. + +\end{thebibliography} diff --git b/Apendix/anonymous-submission-latex-2024.blg a/Apendix/anonymous-submission-latex-2024.blg new file mode 100644 index 0000000..117d52a --- /dev/null +++ a/Apendix/anonymous-submission-latex-2024.blg @@ -0,0 +1,46 @@ +This is BibTeX, Version 0.99d (TeX Live 2023) +Capacity: max_strings=200000, hash_size=200000, hash_prime=170003 +The top-level auxiliary file: anonymous-submission-latex-2024.aux +The style file: aaai24.bst +Database file #1: aaai24.bib +You've used 5 entries, + 2840 wiz_defined-function locations, + 619 strings with 5446 characters, +and the built_in function-call counts, 3370 in all, are: += -- 277 +> -- 153 +< -- 0 ++ -- 60 +- -- 52 +* -- 242 +:= -- 547 +add.period$ -- 20 +call.type$ -- 5 +change.case$ -- 36 +chr.to.int$ -- 6 +cite$ -- 5 +duplicate$ -- 223 +empty$ -- 240 +format.name$ -- 60 +if$ -- 649 +int.to.chr$ -- 1 +int.to.str$ -- 1 +missing$ -- 49 +newline$ -- 29 +num.names$ -- 20 +pop$ -- 92 +preamble$ -- 1 +purify$ -- 34 +quote$ -- 0 +skip$ -- 96 +stack$ -- 0 +substring$ -- 200 +swap$ -- 128 +text.length$ -- 0 +text.prefix$ -- 0 +top$ -- 0 +type$ -- 45 +warning$ -- 0 +while$ -- 31 +width$ -- 0 +write$ -- 68 diff --git b/Apendix/anonymous-submission-latex-2024.log a/Apendix/anonymous-submission-latex-2024.log new file mode 100644 index 0000000..2304b27 --- /dev/null +++ a/Apendix/anonymous-submission-latex-2024.log @@ -0,0 +1,693 @@ +This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) (preloaded format=pdflatex 2023.3.31) 30 JUN 2024 03:07 +entering extended mode + restricted \write18 enabled. + file:line:error style messages enabled. + %&-line parsing enabled. +**anonymous-submission-latex-2024 +(./anonymous-submission-latex-2024.tex +LaTeX2e <2022-11-01> patch level 1 +L3 programming layer <2023-02-22> (d:/software/texlive/2023/texmf-dist/tex/latex/base/article.cls +Document Class: article 2022/07/02 v1.4n Standard LaTeX document class +(d:/software/texlive/2023/texmf-dist/tex/latex/base/size10.clo +File: size10.clo 2022/07/02 v1.4n Standard LaTeX file (size option) +) +\c@part=\count185 +\c@section=\count186 +\c@subsection=\count187 +\c@subsubsection=\count188 +\c@paragraph=\count189 +\c@subparagraph=\count190 +\c@figure=\count191 +\c@table=\count192 +\abovecaptionskip=\skip48 +\belowcaptionskip=\skip49 +\bibindent=\dimen140 +) (./aaai24.sty +Package: aaai24 2023/06/26 AAAI 2024 Submission format + +Conference Style for AAAI for LaTeX 2e -- version for submission +\titlebox=\skip50 +) (d:/software/texlive/2023/texmf-dist/tex/latex/psnfss/times.sty +Package: times 2020/03/25 PSNFSS-v9.3 (SPQR) +) (d:/software/texlive/2023/texmf-dist/tex/latex/psnfss/helvet.sty +Package: helvet 2020/03/25 PSNFSS-v9.3 (WaS) + (d:/software/texlive/2023/texmf-dist/tex/latex/graphics/keyval.sty +Package: keyval 2022/05/29 v1.15 key=value parser (DPC) +\KV@toks@=\toks16 +)) (d:/software/texlive/2023/texmf-dist/tex/latex/psnfss/courier.sty +Package: courier 2020/03/25 PSNFSS-v9.3 (WaS) +) (d:/software/texlive/2023/texmf-dist/tex/latex/url/url.sty +\Urlmuskip=\muskip16 +Package: url 2013/09/16 ver 3.4 Verb mode for urls, etc. +) (d:/software/texlive/2023/texmf-dist/tex/latex/graphics/graphicx.sty +Package: graphicx 2021/09/16 v1.2d Enhanced LaTeX Graphics (DPC,SPQR) + (d:/software/texlive/2023/texmf-dist/tex/latex/graphics/graphics.sty +Package: graphics 2022/03/10 v1.4e Standard LaTeX Graphics (DPC,SPQR) + (d:/software/texlive/2023/texmf-dist/tex/latex/graphics/trig.sty +Package: trig 2021/08/11 v1.11 sin cos tan (DPC) +) (d:/software/texlive/2023/texmf-dist/tex/latex/graphics-cfg/graphics.cfg +File: graphics.cfg 2016/06/04 v1.11 sample graphics configuration +) +Package graphics Info: Driver file: pdftex.def on input line 107. + (d:/software/texlive/2023/texmf-dist/tex/latex/graphics-def/pdftex.def +File: pdftex.def 2022/09/22 v1.2b Graphics/color driver for pdftex +)) +\Gin@req@height=\dimen141 +\Gin@req@width=\dimen142 +) (d:/software/texlive/2023/texmf-dist/tex/latex/amsfonts/amssymb.sty +Package: amssymb 2013/01/14 v3.01 AMS font symbols + (d:/software/texlive/2023/texmf-dist/tex/latex/amsfonts/amsfonts.sty +Package: amsfonts 2013/01/14 v3.01 Basic AMSFonts support +\@emptytoks=\toks17 +\symAMSa=\mathgroup4 +\symAMSb=\mathgroup5 +LaTeX Font Info: Redeclaring math symbol \hbar on input line 98. +LaTeX Font Info: Overwriting math alphabet `\mathfrak' in version `bold' +(Font) U/euf/m/n --> U/euf/b/n on input line 106. +)) (d:/software/texlive/2023/texmf-dist/tex/latex/amsmath/amsmath.sty +Package: amsmath 2022/04/08 v2.17n AMS math features +\@mathmargin=\skip51 + +For additional information on amsmath, use the `?' option. +(d:/software/texlive/2023/texmf-dist/tex/latex/amsmath/amstext.sty +Package: amstext 2021/08/26 v2.01 AMS text + (d:/software/texlive/2023/texmf-dist/tex/latex/amsmath/amsgen.sty +File: amsgen.sty 1999/11/30 v2.0 generic functions +\@emptytoks=\toks18 +\ex@=\dimen143 +)) (d:/software/texlive/2023/texmf-dist/tex/latex/amsmath/amsbsy.sty +Package: amsbsy 1999/11/29 v1.2d Bold Symbols +\pmbraise@=\dimen144 +) (d:/software/texlive/2023/texmf-dist/tex/latex/amsmath/amsopn.sty +Package: amsopn 2022/04/08 v2.04 operator names +) +\inf@bad=\count193 +LaTeX Info: Redefining \frac on input line 234. +\uproot@=\count194 +\leftroot@=\count195 +LaTeX Info: Redefining \overline on input line 399. +LaTeX Info: Redefining \colon on input line 410. +\classnum@=\count196 +\DOTSCASE@=\count197 +LaTeX Info: Redefining \ldots on input line 496. +LaTeX Info: Redefining \dots on input line 499. +LaTeX Info: Redefining \cdots on input line 620. +\Mathstrutbox@=\box51 +\strutbox@=\box52 +LaTeX Info: Redefining \big on input line 722. +LaTeX Info: Redefining \Big on input line 723. +LaTeX Info: Redefining \bigg on input line 724. +LaTeX Info: Redefining \Bigg on input line 725. +\big@size=\dimen145 +LaTeX Font Info: Redeclaring font encoding OML on input line 743. +LaTeX Font Info: Redeclaring font encoding OMS on input line 744. +\macc@depth=\count198 +LaTeX Info: Redefining \bmod on input line 905. +LaTeX Info: Redefining \pmod on input line 910. +LaTeX Info: Redefining \smash on input line 940. +LaTeX Info: Redefining \relbar on input line 970. +LaTeX Info: Redefining \Relbar on input line 971. +\c@MaxMatrixCols=\count199 +\dotsspace@=\muskip17 +\c@parentequation=\count266 +\dspbrk@lvl=\count267 +\tag@help=\toks19 +\row@=\count268 +\column@=\count269 +\maxfields@=\count270 +\andhelp@=\toks20 +\eqnshift@=\dimen146 +\alignsep@=\dimen147 +\tagshift@=\dimen148 +\tagwidth@=\dimen149 +\totwidth@=\dimen150 +\lineht@=\dimen151 +\@envbody=\toks21 +\multlinegap=\skip52 +\multlinetaggap=\skip53 +\mathdisplay@stack=\toks22 +LaTeX Info: Redefining \[ on input line 2953. +LaTeX Info: Redefining \] on input line 2954. +) (d:/software/texlive/2023/texmf-dist/tex/latex/subfigure/subfigure.sty +Package: subfigure 2002/03/15 v2.1.5 subfigure package +\subfigtopskip=\skip54 +\subfigcapskip=\skip55 +\subfigcaptopadj=\dimen152 +\subfigbottomskip=\skip56 +\subfigcapmargin=\dimen153 +\subfiglabelskip=\skip57 +\c@subfigure=\count271 +\c@subtable=\count272 + +**************************************** +* Local config file subfigure.cfg used * +**************************************** +(d:/software/texlive/2023/texmf-dist/tex/latex/subfigure/subfigure.cfg) +\subfig@top=\skip58 +\subfig@bottom=\skip59 +) (d:/software/texlive/2023/texmf-dist/tex/latex/tools/array.sty +Package: array 2022/09/04 v2.5g Tabular extension package (FMi) +\col@sep=\dimen154 +\ar@mcellbox=\box53 +\extrarowheight=\dimen155 +\NC@list=\toks23 +\extratabsurround=\skip60 +\backup@length=\skip61 +\ar@cellbox=\box54 +) (d:/software/texlive/2023/texmf-dist/tex/latex/diagbox/diagbox.sty +Package: diagbox 2020/02/09 v2.3 Making table heads with diagonal lines + (d:/software/texlive/2023/texmf-dist/tex/latex/pict2e/pict2e.sty +Package: pict2e 2020/09/30 v0.4b Improved picture commands (HjG,RN,JT) + (d:/software/texlive/2023/texmf-dist/tex/latex/pict2e/pict2e.cfg +File: pict2e.cfg 2016/02/05 v0.1u pict2e configuration for teTeX/TeXLive +) +Package pict2e Info: Driver file: pdftex.def on input line 112. +Package pict2e Info: Driver file for pict2e: p2e-pdftex.def on input line 114. + (d:/software/texlive/2023/texmf-dist/tex/latex/pict2e/p2e-pdftex.def +File: p2e-pdftex.def 2016/02/05 v0.1u Driver-dependant file (RN,HjG,JT) +) +\pIIe@GRAPH=\toks24 +\@arclen=\dimen156 +\@arcrad=\dimen157 +\pIIe@tempdima=\dimen158 +\pIIe@tempdimb=\dimen159 +\pIIe@tempdimc=\dimen160 +\pIIe@tempdimd=\dimen161 +\pIIe@tempdime=\dimen162 +\pIIe@tempdimf=\dimen163 +) (d:/software/texlive/2023/texmf-dist/tex/latex/tools/calc.sty +Package: calc 2017/05/25 v4.3 Infix arithmetic (KKT,FJ) +\calc@Acount=\count273 +\calc@Bcount=\count274 +\calc@Adimen=\dimen164 +\calc@Bdimen=\dimen165 +\calc@Askip=\skip62 +\calc@Bskip=\skip63 +LaTeX Info: Redefining \setlength on input line 80. +LaTeX Info: Redefining \addtolength on input line 81. +\calc@Ccount=\count275 +\calc@Cskip=\skip64 +) +\diagbox@boxa=\box55 +\diagbox@boxb=\box56 +\diagbox@boxm=\box57 +\diagbox@wd=\dimen166 +\diagbox@ht=\dimen167 +\diagbox@insepl=\dimen168 +\diagbox@insepr=\dimen169 +\diagbox@outsepl=\dimen170 +\diagbox@outsepr=\dimen171 +) (d:/software/texlive/2023/texmf-dist/tex/latex/siunitx/siunitx.sty +Package: siunitx 2023-03-04 v3.2.2 A comprehensive (SI) units package +\l__siunitx_angle_tmp_dim=\dimen172 +\l__siunitx_angle_marker_box=\box58 +\l__siunitx_angle_unit_box=\box59 +\l__siunitx_compound_count_int=\count276 + (d:/software/texlive/2023/texmf-dist/tex/latex/translations/translations.sty +Package: translations 2022/02/05 v1.12 internationalization of LaTeX2e packages (CN) + (d:/software/texlive/2023/texmf-dist/tex/latex/etoolbox/etoolbox.sty +Package: etoolbox 2020/10/05 v2.5k e-TeX tools for LaTeX (JAW) +\etb@tempcnta=\count277 +) (d:/software/texlive/2023/texmf-dist/tex/generic/pdftexcmds/pdftexcmds.sty +Package: pdftexcmds 2020-06-27 v0.33 Utility functions of pdfTeX for LuaTeX (HO) + (d:/software/texlive/2023/texmf-dist/tex/generic/infwarerr/infwarerr.sty +Package: infwarerr 2019/12/03 v1.5 Providing info/warning/error messages (HO) +) (d:/software/texlive/2023/texmf-dist/tex/generic/iftex/iftex.sty +Package: iftex 2022/02/03 v1.0f TeX engine tests +) (d:/software/texlive/2023/texmf-dist/tex/generic/ltxcmds/ltxcmds.sty +Package: ltxcmds 2020-05-10 v1.25 LaTeX kernel commands for general use (HO) +) +Package pdftexcmds Info: \pdf@primitive is available. +Package pdftexcmds Info: \pdf@ifprimitive is available. +Package pdftexcmds Info: \pdfdraftmode found. +)) +\l__siunitx_number_exponent_fixed_int=\count278 +\l__siunitx_number_min_decimal_int=\count279 +\l__siunitx_number_min_integer_int=\count280 +\l__siunitx_number_round_precision_int=\count281 +\l__siunitx_number_lower_threshold_int=\count282 +\l__siunitx_number_upper_threshold_int=\count283 +\l__siunitx_number_group_first_int=\count284 +\l__siunitx_number_group_size_int=\count285 +\l__siunitx_number_group_minimum_int=\count286 +\l__siunitx_table_tmp_box=\box60 +\l__siunitx_table_tmp_dim=\dimen173 +\l__siunitx_table_column_width_dim=\dimen174 +\l__siunitx_table_integer_box=\box61 +\l__siunitx_table_decimal_box=\box62 +\l__siunitx_table_uncert_box=\box63 +\l__siunitx_table_before_box=\box64 +\l__siunitx_table_after_box=\box65 +\l__siunitx_table_before_dim=\dimen175 +\l__siunitx_table_carry_dim=\dimen176 +\l__siunitx_unit_tmp_int=\count287 +\l__siunitx_unit_position_int=\count288 +\l__siunitx_unit_total_int=\count289 +) (d:/software/texlive/2023/texmf-dist/tex/latex/natbib/natbib.sty +Package: natbib 2010/09/13 8.31b (PWD, AO) +\bibhang=\skip65 +\bibsep=\skip66 +LaTeX Info: Redefining \cite on input line 694. +\c@NAT@ctr=\count290 +) (d:/software/texlive/2023/texmf-dist/tex/latex/caption/caption.sty +Package: caption 2023/03/12 v3.6j Customizing captions (AR) + (d:/software/texlive/2023/texmf-dist/tex/latex/caption/caption3.sty +Package: caption3 2023/03/12 v2.4 caption3 kernel (AR) +\caption@tempdima=\dimen177 +\captionmargin=\dimen178 +\caption@leftmargin=\dimen179 +\caption@rightmargin=\dimen180 +\caption@width=\dimen181 +\caption@indent=\dimen182 +\caption@parindent=\dimen183 +\caption@hangindent=\dimen184 +Package caption Info: Standard document class detected. +) +\c@caption@flags=\count291 +\c@continuedfloat=\count292 +Package caption Info: subfigure package is loaded. +) (d:/software/texlive/2023/texmf-dist/tex/latex/algorithms/algorithm.sty +Package: algorithm 2009/08/24 v0.1 Document Style `algorithm' - floating environment + (d:/software/texlive/2023/texmf-dist/tex/latex/float/float.sty +Package: float 2001/11/08 v1.3d Float enhancements (AL) +\c@float@type=\count293 +\float@exts=\toks25 +\float@box=\box66 +\@float@everytoks=\toks26 +\@floatcapt=\box67 +) (d:/software/texlive/2023/texmf-dist/tex/latex/base/ifthen.sty +Package: ifthen 2022/04/13 v1.1d Standard LaTeX ifthen package (DPC) +) +\@float@every@algorithm=\toks27 +\c@algorithm=\count294 +) (d:/software/texlive/2023/texmf-dist/tex/latex/algorithms/algorithmic.sty +Package: algorithmic 2009/08/24 v0.1 Document Style `algorithmic' +\c@ALC@unique=\count295 +\c@ALC@line=\count296 +\c@ALC@rem=\count297 +\c@ALC@depth=\count298 +\ALC@tlm=\skip67 +\algorithmicindent=\skip68 +) (d:/software/texlive/2023/texmf-dist/tex/latex/booktabs/booktabs.sty +Package: booktabs 2020/01/12 v1.61803398 Publication quality tables +\heavyrulewidth=\dimen185 +\lightrulewidth=\dimen186 +\cmidrulewidth=\dimen187 +\belowrulesep=\dimen188 +\belowbottomsep=\dimen189 +\aboverulesep=\dimen190 +\abovetopsep=\dimen191 +\cmidrulesep=\dimen192 +\cmidrulekern=\dimen193 +\defaultaddspace=\dimen194 +\@cmidla=\count299 +\@cmidlb=\count300 +\@aboverulesep=\dimen195 +\@belowrulesep=\dimen196 +\@thisruleclass=\count301 +\@lastruleclass=\count302 +\@thisrulewidth=\dimen197 +) (d:/software/texlive/2023/texmf-dist/tex/latex/mathtools/mathtools.sty +Package: mathtools 2022/06/29 v1.29 mathematical typesetting tools + (d:/software/texlive/2023/texmf-dist/tex/latex/mathtools/mhsetup.sty +Package: mhsetup 2021/03/18 v1.4 programming setup (MH) +) +\g_MT_multlinerow_int=\count303 +\l_MT_multwidth_dim=\dimen198 +\origjot=\skip69 +\l_MT_shortvdotswithinadjustabove_dim=\dimen199 +\l_MT_shortvdotswithinadjustbelow_dim=\dimen256 +\l_MT_above_intertext_sep=\dimen257 +\l_MT_below_intertext_sep=\dimen258 +\l_MT_above_shortintertext_sep=\dimen259 +\l_MT_below_shortintertext_sep=\dimen260 +\xmathstrut@box=\box68 +\xmathstrut@dim=\dimen261 +) (d:/software/texlive/2023/texmf-dist/tex/latex/amscls/amsthm.sty +Package: amsthm 2020/05/29 v2.20.6 +\thm@style=\toks28 +\thm@bodyfont=\toks29 +\thm@headfont=\toks30 +\thm@notefont=\toks31 +\thm@headpunct=\toks32 +\thm@preskip=\skip70 +\thm@postskip=\skip71 +\thm@headsep=\skip72 +\dth@everypar=\toks33 +) (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/frontendlayer/tikz.sty (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/basiclayer/pgf.sty (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/utilities/pgfrcs.sty (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/utilities/pgfutil-common.tex +\pgfutil@everybye=\toks34 +\pgfutil@tempdima=\dimen262 +\pgfutil@tempdimb=\dimen263 +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/utilities/pgfutil-latex.def +\pgfutil@abb=\box69 +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/utilities/pgfrcs.code.tex (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/pgf.revision.tex) +Package: pgfrcs 2023-01-15 v3.1.10 (3.1.10) +)) +Package: pgf 2023-01-15 v3.1.10 (3.1.10) + (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/basiclayer/pgfcore.sty (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/systemlayer/pgfsys.sty (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/systemlayer/pgfsys.code.tex +Package: pgfsys 2023-01-15 v3.1.10 (3.1.10) + (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/utilities/pgfkeys.code.tex +\pgfkeys@pathtoks=\toks35 +\pgfkeys@temptoks=\toks36 + (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/utilities/pgfkeyslibraryfiltered.code.tex +\pgfkeys@tmptoks=\toks37 +)) +\pgf@x=\dimen264 +\pgf@y=\dimen265 +\pgf@xa=\dimen266 +\pgf@ya=\dimen267 +\pgf@xb=\dimen268 +\pgf@yb=\dimen269 +\pgf@xc=\dimen270 +\pgf@yc=\dimen271 +\pgf@xd=\dimen272 +\pgf@yd=\dimen273 +\w@pgf@writea=\write3 +\r@pgf@reada=\read2 +\c@pgf@counta=\count304 +\c@pgf@countb=\count305 +\c@pgf@countc=\count306 +\c@pgf@countd=\count307 +\t@pgf@toka=\toks38 +\t@pgf@tokb=\toks39 +\t@pgf@tokc=\toks40 +\pgf@sys@id@count=\count308 + (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/systemlayer/pgf.cfg +File: pgf.cfg 2023-01-15 v3.1.10 (3.1.10) +) +Driver file for pgf: pgfsys-pdftex.def + (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-pdftex.def +File: pgfsys-pdftex.def 2023-01-15 v3.1.10 (3.1.10) + (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-common-pdf.def +File: pgfsys-common-pdf.def 2023-01-15 v3.1.10 (3.1.10) +))) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/systemlayer/pgfsyssoftpath.code.tex +File: pgfsyssoftpath.code.tex 2023-01-15 v3.1.10 (3.1.10) +\pgfsyssoftpath@smallbuffer@items=\count309 +\pgfsyssoftpath@bigbuffer@items=\count310 +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/systemlayer/pgfsysprotocol.code.tex +File: pgfsysprotocol.code.tex 2023-01-15 v3.1.10 (3.1.10) +)) (d:/software/texlive/2023/texmf-dist/tex/latex/xcolor/xcolor.sty +Package: xcolor 2022/06/12 v2.14 LaTeX color extensions (UK) + (d:/software/texlive/2023/texmf-dist/tex/latex/graphics-cfg/color.cfg +File: color.cfg 2016/01/02 v1.6 sample color configuration +) +Package xcolor Info: Driver file: pdftex.def on input line 227. + (d:/software/texlive/2023/texmf-dist/tex/latex/graphics/mathcolor.ltx) +Package xcolor Info: Model `cmy' substituted by `cmy0' on input line 1353. +Package xcolor Info: Model `hsb' substituted by `rgb' on input line 1357. +Package xcolor Info: Model `RGB' extended on input line 1369. +Package xcolor Info: Model `HTML' substituted by `rgb' on input line 1371. +Package xcolor Info: Model `Hsb' substituted by `hsb' on input line 1372. +Package xcolor Info: Model `tHsb' substituted by `hsb' on input line 1373. +Package xcolor Info: Model `HSB' substituted by `hsb' on input line 1374. +Package xcolor Info: Model `Gray' substituted by `gray' on input line 1375. +Package xcolor Info: Model `wave' substituted by `hsb' on input line 1376. +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcore.code.tex +Package: pgfcore 2023-01-15 v3.1.10 (3.1.10) + (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathutil.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathparser.code.tex +\pgfmath@dimen=\dimen274 +\pgfmath@count=\count311 +\pgfmath@box=\box70 +\pgfmath@toks=\toks41 +\pgfmath@stack@operand=\toks42 +\pgfmath@stack@operation=\toks43 +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.basic.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.trigonometric.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.random.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.comparison.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.base.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.round.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.misc.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.integerarithmetics.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathcalc.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmathfloat.code.tex +\c@pgfmathroundto@lastzeros=\count312 +)) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfint.code.tex) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepoints.code.tex +File: pgfcorepoints.code.tex 2023-01-15 v3.1.10 (3.1.10) +\pgf@picminx=\dimen275 +\pgf@picmaxx=\dimen276 +\pgf@picminy=\dimen277 +\pgf@picmaxy=\dimen278 +\pgf@pathminx=\dimen279 +\pgf@pathmaxx=\dimen280 +\pgf@pathminy=\dimen281 +\pgf@pathmaxy=\dimen282 +\pgf@xx=\dimen283 +\pgf@xy=\dimen284 +\pgf@yx=\dimen285 +\pgf@yy=\dimen286 +\pgf@zx=\dimen287 +\pgf@zy=\dimen288 +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathconstruct.code.tex +File: pgfcorepathconstruct.code.tex 2023-01-15 v3.1.10 (3.1.10) +\pgf@path@lastx=\dimen289 +\pgf@path@lasty=\dimen290 +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathusage.code.tex +File: pgfcorepathusage.code.tex 2023-01-15 v3.1.10 (3.1.10) +\pgf@shorten@end@additional=\dimen291 +\pgf@shorten@start@additional=\dimen292 +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorescopes.code.tex +File: pgfcorescopes.code.tex 2023-01-15 v3.1.10 (3.1.10) +\pgfpic=\box71 +\pgf@hbox=\box72 +\pgf@layerbox@main=\box73 +\pgf@picture@serial@count=\count313 +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcoregraphicstate.code.tex +File: pgfcoregraphicstate.code.tex 2023-01-15 v3.1.10 (3.1.10) +\pgflinewidth=\dimen293 +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretransformations.code.tex +File: pgfcoretransformations.code.tex 2023-01-15 v3.1.10 (3.1.10) +\pgf@pt@x=\dimen294 +\pgf@pt@y=\dimen295 +\pgf@pt@temp=\dimen296 +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorequick.code.tex +File: pgfcorequick.code.tex 2023-01-15 v3.1.10 (3.1.10) +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreobjects.code.tex +File: pgfcoreobjects.code.tex 2023-01-15 v3.1.10 (3.1.10) +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathprocessing.code.tex +File: pgfcorepathprocessing.code.tex 2023-01-15 v3.1.10 (3.1.10) +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorearrows.code.tex +File: pgfcorearrows.code.tex 2023-01-15 v3.1.10 (3.1.10) +\pgfarrowsep=\dimen297 +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreshade.code.tex +File: pgfcoreshade.code.tex 2023-01-15 v3.1.10 (3.1.10) +\pgf@max=\dimen298 +\pgf@sys@shading@range@num=\count314 +\pgf@shadingcount=\count315 +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreimage.code.tex +File: pgfcoreimage.code.tex 2023-01-15 v3.1.10 (3.1.10) +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreexternal.code.tex +File: pgfcoreexternal.code.tex 2023-01-15 v3.1.10 (3.1.10) +\pgfexternal@startupbox=\box74 +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorelayers.code.tex +File: pgfcorelayers.code.tex 2023-01-15 v3.1.10 (3.1.10) +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretransparency.code.tex +File: pgfcoretransparency.code.tex 2023-01-15 v3.1.10 (3.1.10) +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepatterns.code.tex +File: pgfcorepatterns.code.tex 2023-01-15 v3.1.10 (3.1.10) +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/basiclayer/pgfcorerdf.code.tex +File: pgfcorerdf.code.tex 2023-01-15 v3.1.10 (3.1.10) +))) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/modules/pgfmoduleshapes.code.tex +File: pgfmoduleshapes.code.tex 2023-01-15 v3.1.10 (3.1.10) +\pgfnodeparttextbox=\box75 +) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/modules/pgfmoduleplot.code.tex +File: pgfmoduleplot.code.tex 2023-01-15 v3.1.10 (3.1.10) +) (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version-0-65.sty +Package: pgfcomp-version-0-65 2023-01-15 v3.1.10 (3.1.10) +\pgf@nodesepstart=\dimen299 +\pgf@nodesepend=\dimen300 +) (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version-1-18.sty +Package: pgfcomp-version-1-18 2023-01-15 v3.1.10 (3.1.10) +)) (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/utilities/pgffor.sty (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/utilities/pgfkeys.sty (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/utilities/pgfkeys.code.tex)) (d:/software/texlive/2023/texmf-dist/tex/latex/pgf/math/pgfmath.sty (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex)) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/utilities/pgffor.code.tex +Package: pgffor 2023-01-15 v3.1.10 (3.1.10) +\pgffor@iter=\dimen301 +\pgffor@skip=\dimen302 +\pgffor@stack=\toks44 +\pgffor@toks=\toks45 +)) (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/frontendlayer/tikz/tikz.code.tex +Package: tikz 2023-01-15 v3.1.10 (3.1.10) + (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/libraries/pgflibraryplothandlers.code.tex +File: pgflibraryplothandlers.code.tex 2023-01-15 v3.1.10 (3.1.10) +\pgf@plot@mark@count=\count316 +\pgfplotmarksize=\dimen303 +) +\tikz@lastx=\dimen304 +\tikz@lasty=\dimen305 +\tikz@lastxsaved=\dimen306 +\tikz@lastysaved=\dimen307 +\tikz@lastmovetox=\dimen308 +\tikz@lastmovetoy=\dimen309 +\tikzleveldistance=\dimen310 +\tikzsiblingdistance=\dimen311 +\tikz@figbox=\box76 +\tikz@figbox@bg=\box77 +\tikz@tempbox=\box78 +\tikz@tempbox@bg=\box79 +\tikztreelevel=\count317 +\tikznumberofchildren=\count318 +\tikznumberofcurrentchild=\count319 +\tikz@fig@count=\count320 + (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/modules/pgfmodulematrix.code.tex +File: pgfmodulematrix.code.tex 2023-01-15 v3.1.10 (3.1.10) +\pgfmatrixcurrentrow=\count321 +\pgfmatrixcurrentcolumn=\count322 +\pgf@matrix@numberofcolumns=\count323 +) +\tikz@expandcount=\count324 + (d:/software/texlive/2023/texmf-dist/tex/generic/pgf/frontendlayer/tikz/libraries/tikzlibrarytopaths.code.tex +File: tikzlibrarytopaths.code.tex 2023-01-15 v3.1.10 (3.1.10) +))) (d:/software/texlive/2023/texmf-dist/tex/latex/tools/bm.sty +Package: bm 2022/01/05 v1.2f Bold Symbol Support (DPC/FMi) +\symboldoperators=\mathgroup6 +\symboldletters=\mathgroup7 +\symboldsymbols=\mathgroup8 +Package bm Info: No bold for \OMX/cmex/m/n, using \pmb. +Package bm Info: No bold for \U/msa/m/n, using \pmb. +Package bm Info: No bold for \U/msb/m/n, using \pmb. +LaTeX Font Info: Redeclaring math alphabet \mathbf on input line 149. +) (d:/software/texlive/2023/texmf-dist/tex/latex/esvect/esvect.sty +Package: esvect +\symesvector=\mathgroup9 +) (d:/software/texlive/2023/texmf-dist/tex/latex/multirow/multirow.sty +Package: multirow 2021/03/15 v2.8 Span multiple rows of a table +\multirow@colwidth=\skip73 +\multirow@cntb=\count325 +\multirow@dima=\skip74 +\bigstrutjot=\dimen312 +) (d:/software/texlive/2023/texmf-dist/tex/latex/newfloat/newfloat.sty +Package: newfloat 2019/09/02 v1.1l Defining new floating environments (AR) +) (d:/software/texlive/2023/texmf-dist/tex/latex/listings/listings.sty +\lst@mode=\count326 +\lst@gtempboxa=\box80 +\lst@token=\toks46 +\lst@length=\count327 +\lst@currlwidth=\dimen313 +\lst@column=\count328 +\lst@pos=\count329 +\lst@lostspace=\dimen314 +\lst@width=\dimen315 +\lst@newlines=\count330 +\lst@lineno=\count331 +\lst@maxwidth=\dimen316 + (d:/software/texlive/2023/texmf-dist/tex/latex/listings/lstmisc.sty +File: lstmisc.sty 2023/02/27 1.9 (Carsten Heinz) +\c@lstnumber=\count332 +\lst@skipnumbers=\count333 +\lst@framebox=\box81 +) (d:/software/texlive/2023/texmf-dist/tex/latex/listings/listings.cfg +File: listings.cfg 2023/02/27 1.9 listings configuration +)) +Package: listings 2023/02/27 1.9 (Carsten Heinz) +\@float@every@listing=\toks47 +\c@listing=\count334 + (d:/software/texlive/2023/texmf-dist/tex/latex/natbib/bibentry.sty +Package: bibentry 2007/10/30 1.5 (PWD) +) +Package translations Info: No language package found. I am going to use `english' as default language. on input line 183. +LaTeX Font Info: Trying to load font information for OT1+ptm on input line 183. + (d:/software/texlive/2023/texmf-dist/tex/latex/psnfss/ot1ptm.fd +File: ot1ptm.fd 2001/06/04 font definitions for OT1/ptm. +) (d:/software/texlive/2023/texmf-dist/tex/latex/l3backend/l3backend-pdftex.def +File: l3backend-pdftex.def 2023-01-16 L3 backend support: PDF output (pdfTeX) +\l__color_backend_stack_int=\count335 +\l__pdf_internal_box=\box82 +) (./anonymous-submission-latex-2024.aux) +\openout1 = `anonymous-submission-latex-2024.aux'. + +LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 183. +LaTeX Font Info: ... okay on input line 183. +LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 183. +LaTeX Font Info: ... okay on input line 183. +LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 183. +LaTeX Font Info: ... okay on input line 183. +LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 183. +LaTeX Font Info: ... okay on input line 183. +LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 183. +LaTeX Font Info: ... okay on input line 183. +LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 183. +LaTeX Font Info: ... okay on input line 183. +LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 183. +LaTeX Font Info: ... okay on input line 183. + (d:/software/texlive/2023/texmf-dist/tex/context/base/mkii/supp-pdf.mkii +[Loading MPS to PDF converter (version 2006.09.02).] +\scratchcounter=\count336 +\scratchdimen=\dimen317 +\scratchbox=\box83 +\nofMPsegments=\count337 +\nofMParguments=\count338 +\everyMPshowfont=\toks48 +\MPscratchCnt=\count339 +\MPscratchDim=\dimen318 +\MPnumerator=\count340 +\makeMPintoPDFobject=\count341 +\everyMPtoPDFconversion=\toks49 +) (d:/software/texlive/2023/texmf-dist/tex/latex/epstopdf-pkg/epstopdf-base.sty +Package: epstopdf-base 2020-01-24 v2.11 Base part for package epstopdf +Package epstopdf-base Info: Redefining graphics rule for `.eps' on input line 485. + (d:/software/texlive/2023/texmf-dist/tex/latex/latexconfig/epstopdf-sys.cfg +File: epstopdf-sys.cfg 2010/07/13 v1.3 Configuration of (r)epstopdf for TeX Live +)) (d:/software/texlive/2023/texmf-dist/tex/latex/translations/translations-basic-dictionary-english.trsl +File: translations-basic-dictionary-english.trsl (english translation file `translations-basic-dictionary') +) +Package translations Info: loading dictionary `translations-basic-dictionary' for `english'. on input line 183. +Package caption Info: Begin \AtBeginDocument code. +Package caption Info: float package is loaded. +Package caption Info: listings package is loaded. +Package caption Info: End \AtBeginDocument code. +Package newfloat Info: `float' package detected. +\c@lstlisting=\count342 +LaTeX Font Info: Trying to load font information for U+msa on input line 196. + (d:/software/texlive/2023/texmf-dist/tex/latex/amsfonts/umsa.fd +File: umsa.fd 2013/01/14 v3.01 AMS symbols A +) +LaTeX Font Info: Trying to load font information for U+msb on input line 196. + (d:/software/texlive/2023/texmf-dist/tex/latex/amsfonts/umsb.fd +File: umsb.fd 2013/01/14 v3.01 AMS symbols B +) +LaTeX Font Info: Trying to load font information for U+esvect on input line 196. + (d:/software/texlive/2023/texmf-dist/tex/latex/esvect/uesvect.fd +File: uesvect.fd +) [1 + + + +{d:/software/texlive/2023/texmf-var/fonts/map/pdftex/updmap/pdftex.map}{d:/software/texlive/2023/texmf-dist/fonts/enc/dvips/base/8r.enc}] [2] [3] [4] [5] + +LaTeX Warning: Reference `Evaluation_full' on page 6 undefined on input line 843. + +[6] + +LaTeX Warning: Reference `Complete_full' on page 7 undefined on input line 875. + + +Underfull \hbox (badness 10000) in paragraph at lines 861--878 +[]\OT1/ptm/m/n/10 7-state ver-sion of Baird's off-policy coun-terex-am-ple: for TD al-go-rithm, $\OML/cmm/m/it/10 $ \OT1/ptm/m/n/10 is set to 0.1. For the + [] + + +Underfull \hbox (badness 10000) in paragraph at lines 861--878 +\OT1/ptm/m/n/10 TDC al-go-rithm, the range of $\OML/cmm/m/it/10 $ \OT1/ptm/m/n/10 is $\OMS/cmsy/m/n/10 f\OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 05\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 1\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 2\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 3\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 4\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 5\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 6\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 7\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 8\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 9\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 0\OMS/cmsy/m/n/10 g$\OT1/ptm/m/n/10 , and the range + [] + + +Underfull \hbox (badness 10000) in paragraph at lines 861--878 +\OT1/ptm/m/n/10 of $\OML/cmm/m/it/10 ^^P$ \OT1/ptm/m/n/10 is $\OMS/cmsy/m/n/10 f\OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 05\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 1\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 2\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 3\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 4\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 5\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 6\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 7\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 8\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 9\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 0\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 1\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 2\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 3\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 4\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 5\OMS/cmsy/m/n/10 g$\OT1/ptm/m/n/10 . For the VMTD al-go- + [] + + +Underfull \hbox (badness 10000) in paragraph at lines 861--878 +\OT1/ptm/m/n/10 rithm, the range of $\OML/cmm/m/it/10 $ \OT1/ptm/m/n/10 is $\OMS/cmsy/m/n/10 f\OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 05\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 1\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 2\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 3\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 4\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 5\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 6\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 7\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 8\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 0\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 9\OML/cmm/m/it/10 ; \OT1/cmr/m/n/10 1\OML/cmm/m/it/10 :\OT1/cmr/m/n/10 0\OMS/cmsy/m/n/10 g$\OT1/ptm/m/n/10 , and the range of $\OML/cmm/m/it/10 ^^L$ \OT1/ptm/m/n/10 is + [] + +(./anonymous-submission-latex-2024.bbl) [7] (./anonymous-submission-latex-2024.aux) + +LaTeX Warning: There were undefined references. + + ) +Here is how much of TeX's memory you used: + 22572 strings out of 476025 + 476134 string characters out of 5789524 + 1887382 words of memory out of 5000000 + 42651 multiletter control sequences out of 15000+600000 + 531474 words of font info for 71 fonts, out of 8000000 for 9000 + 1141 hyphenation exceptions out of 8191 + 84i,22n,89p,423b,526s stack positions out of 10000i,1000n,20000p,200000b,200000s + +Output written on anonymous-submission-latex-2024.pdf (7 pages, 213463 bytes). +PDF statistics: + 117 PDF objects out of 1000 (max. 8388607) + 73 compressed objects within 1 object stream + 0 named destinations out of 1000 (max. 500000) + 13 words of extra memory for PDF output out of 10000 (max. 10000000) + diff --git b/Apendix/anonymous-submission-latex-2024.pdf a/Apendix/anonymous-submission-latex-2024.pdf new file mode 100644 index 0000000..abfc15c Binary files /dev/null and a/Apendix/anonymous-submission-latex-2024.pdf differ diff --git b/Apendix/anonymous-submission-latex-2024.synctex.gz a/Apendix/anonymous-submission-latex-2024.synctex.gz new file mode 100644 index 0000000..7107a49 Binary files /dev/null and a/Apendix/anonymous-submission-latex-2024.synctex.gz differ diff --git b/Apendix/anonymous-submission-latex-2024.tex a/Apendix/anonymous-submission-latex-2024.tex new file mode 100644 index 0000000..b2a2c0e --- /dev/null +++ a/Apendix/anonymous-submission-latex-2024.tex @@ -0,0 +1,906 @@ +%File: anonymous-submission-latex-2024.tex +\documentclass[letterpaper]{article} % DO NOT CHANGE THIS +\usepackage[submission]{aaai24} % DO NOT CHANGE THIS +\usepackage{times} % DO NOT CHANGE THIS +\usepackage{helvet} % DO NOT CHANGE THIS +\usepackage{courier} % DO NOT CHANGE THIS +\usepackage[hyphens]{url} % DO NOT CHANGE THIS +\usepackage{graphicx} % DO NOT CHANGE THIS +\usepackage{amssymb} +\usepackage{amsmath} +\usepackage{subfigure} +\usepackage{array} +\usepackage{diagbox} +\usepackage{siunitx} +\urlstyle{rm} % DO NOT CHANGE THIS +\def\UrlFont{\rm} % DO NOT CHANGE THIS +\usepackage{natbib} % DO NOT CHANGE THIS AND DO NOT ADD ANY OPTIONS TO IT +\usepackage{caption} % DO NOT CHANGE THIS AND DO NOT ADD ANY OPTIONS TO IT +\frenchspacing % DO NOT CHANGE THIS +\setlength{\pdfpagewidth}{8.5in} % DO NOT CHANGE THIS +\setlength{\pdfpageheight}{11in} % DO NOT CHANGE THIS +% +% These are recommended to typeset algorithms but not required. See the subsubsection on algorithms. Remove them if you don't have algorithms in your paper. +\usepackage{algorithm} +\usepackage{algorithmic} +\usepackage{subfigure} +\usepackage{diagbox} +\usepackage{booktabs} +\usepackage{amsmath} +\usepackage{amssymb} +\usepackage{mathtools} +\usepackage{amsthm} +\usepackage{tikz} +\usepackage{bm} +\usepackage{esvect} +\usepackage{multirow} + +% +% These are are recommended to typeset listings but not required. See the subsubsection on listing. Remove this block if you don't have listings in your paper. +\usepackage{newfloat} +\usepackage{listings} +\numberwithin{equation}{section} +\renewcommand{\theequation}{\thesection-\arabic{equation}} % 自定义公式编号格式 +\DeclareCaptionStyle{ruled}{labelfont=normalfont,labelsep=colon,strut=off} % DO NOT CHANGE THIS +\lstset{% + basicstyle={\footnotesize\ttfamily},% footnotesize acceptable for monospace + numbers=left,numberstyle=\footnotesize,xleftmargin=2em,% show line numbers, remove this entire line if you don't want the numbers. + aboveskip=0pt,belowskip=0pt,% + showstringspaces=false,tabsize=2,breaklines=true} +\floatstyle{ruled} +\newfloat{listing}{tb}{lst}{} +\floatname{listing}{Listing} +% +% Keep the \pdfinfo as shown here. There's no need +% for you to add the /Title and /Author tags. +\pdfinfo{ +/TemplateVersion (2024.1) +} + +% DISALLOWED PACKAGES +% \usepackage{authblk} -- This package is specifically forbidden +% \usepackage{balance} -- This package is specifically forbidden +% \usepackage{color (if used in text) +% \usepackage{CJK} -- This package is specifically forbidden +% \usepackage{float} -- This package is specifically forbidden +% \usepackage{flushend} -- This package is specifically forbidden +% \usepackage{fontenc} -- This package is specifically forbidden +% \usepackage{fullpage} -- This package is specifically forbidden +% \usepackage{geometry} -- This package is specifically forbidden +% \usepackage{grffile} -- This package is specifically forbidden +% \usepackage{hyperref} -- This package is specifically forbidden +% \usepackage{navigator} -- This package is specifically forbidden +% (or any other package that embeds links such as navigator or hyperref) +% \indentfirst} -- This package is specifically forbidden +% \layout} -- This package is specifically forbidden +% \multicol} -- This package is specifically forbidden +% \nameref} -- This package is specifically forbidden +% \usepackage{savetrees} -- This package is specifically forbidden +% \usepackage{setspace} -- This package is specifically forbidden +% \usepackage{stfloats} -- This package is specifically forbidden +% \usepackage{tabu} -- This package is specifically forbidden +% \usepackage{titlesec} -- This package is specifically forbidden +% \usepackage{tocbibind} -- This package is specifically forbidden +% \usepackage{ulem} -- This package is specifically forbidden +% \usepackage{wrapfig} -- This package is specifically forbidden +% DISALLOWED COMMANDS +% \nocopyright -- Your paper will not be published if you use this command +% \addtolength -- This command may not be used +% \balance -- This command may not be used +% \baselinestretch -- Your paper will not be published if you use this command +% \clearpage -- No page breaks of any kind may be used for the final version of your paper +% \columnsep -- This command may not be used +% \newpage -- No page breaks of any kind may be used for the final version of your paper +% \pagebreak -- No page breaks of any kind may be used for the final version of your paperr +% \pagestyle -- This command may not be used +% \tiny -- This is not an acceptable font size. +% \vspace{- -- No negative value may be used in proximity of a caption, figure, table, section, subsection, subsubsection, or reference +% \vskip{- -- No negative value may be used to alter spacing above or below a caption, figure, table, section, subsection, subsubsection, or reference + +\setcounter{secnumdepth}{2} %May be changed to 1 or 2 if section numbers are desired. + +% The file aaai24.sty is the style file for AAAI Press +% proceedings, working notes, and technical reports. +% + +% Title + +% Your title must be in mixed case, not sentence case. +% That means all verbs (including short verbs like be, is, using,and go), +% nouns, adverbs, adjectives should be capitalized, including both words in hyphenated terms, while +% articles, conjunctions, and prepositions are lower case unless they +% directly follow a colon or long dash +\title{AAAI Press Anonymous Submission\\Instructions for Authors Using \LaTeX{}} +\author{ + %Authors + % All authors must be in the same font size and format. + Written by AAAI Press Staff\textsuperscript{\rm 1}\thanks{With help from the AAAI Publications Committee.}\\ + AAAI Style Contributions by Pater Patel Schneider, + Sunil Issar,\\ + J. Scott Penberthy, + George Ferguson, + Hans Guesgen, + Francisco Cruz\equalcontrib, + Marc Pujol-Gonzalez\equalcontrib +} +\affiliations{ + %Afiliations + \textsuperscript{\rm 1}Association for the Advancement of Artificial Intelligence\\ + % If you have multiple authors and multiple affiliations + % use superscripts in text and roman font to identify them. + % For example, + + % Sunil Issar\textsuperscript{\rm 2}, + % J. Scott Penberthy\textsuperscript{\rm 3}, + % George Ferguson\textsuperscript{\rm 4}, + % Hans Guesgen\textsuperscript{\rm 5} + % Note that the comma should be placed after the superscript + + 1900 Embarcadero Road, Suite 101\\ + Palo Alto, California 94303-3310 USA\\ + % email address must be in roman text type, not monospace or sans serif + proceedings-questions@aaai.org +% +% See more examples next +} + +%Example, Single Author, ->> remove \iffalse,\fi and place them surrounding AAAI title to use it +\iffalse +\title{My Publication Title --- Single Author} +\author { + Author Name +} +\affiliations{ + Affiliation\\ + Affiliation Line 2\\ + name@example.com +} +\fi + +\iffalse +%Example, Multiple Authors, ->> remove \iffalse,\fi and place them surrounding AAAI title to use it +\title{My Publication Title --- Multiple Authors} +\author { + % Authors + First Author Name\textsuperscript{\rm 1}, + Second Author Name\textsuperscript{\rm 2}, + Third Author Name\textsuperscript{\rm 1} +} +\affiliations { + % Affiliations + \textsuperscript{\rm 1}Affiliation 1\\ + \textsuperscript{\rm 2}Affiliation 2\\ + firstAuthor@affiliation1.com, secondAuthor@affilation2.com, thirdAuthor@affiliation1.com +} +\fi + + +% REMOVE THIS: bibentry +% This is only needed to show inline citations in the guidelines document. You should not need it and can safely delete it. +\usepackage{bibentry} +% END REMOVE bibentry + +\begin{document} + +% \maketitle + +\onecolumn +\appendix +\section{Relevant proofs} +\subsection{Proof of Theorem 2} +\label{proofth2} +\begin{proof} +The proof is similar to that given by \cite{sutton2009fast} for TDC, but it is based on multi-time-scale stochastic approximation. + +For the VMTDC algorithm, a new one-step linear TD solution is defined as: +\begin{equation*} + 0=\mathbb{E}[(\bm{\phi} - \gamma \bm{\phi}' - \mathbb{E}[\bm{\phi} - \gamma \bm{\phi}'])\bm{\phi}^\top]\mathbb{E}[\bm{\phi} \bm{\phi}^{\top}]^{-1}\mathbb{E}[(\delta -\mathbb{E}[\delta])\bm{\phi}]=\textbf{A}^{\top}\textbf{C}^{-1}(-\textbf{A}\bm{\theta}+\bm{b}). +\end{equation*} +The matrix $\textbf{A}^{\top}\textbf{C}^{-1}\textbf{A}$ is positive definite. Thus, the VMTD's solution is +$\bm{\theta}_{\text{VMTDC}}=\bm{\theta}_{\text{VMTD}}=\textbf{A}^{-1}\bm{b}$. + +First, note that recursion (11) and (12) can be rewritten as, respectively, +\begin{equation*} + \bm{\theta}_{k+1}\leftarrow \bm{\theta}_k+\zeta_k \bm{x}(k), +\end{equation*} +\begin{equation*} + \bm{u}_{k+1}\leftarrow \bm{u}_k+\beta_k \bm{y}(k), +\end{equation*} +where +\begin{equation*} + \bm{x}(k)=\frac{\alpha_k}{\zeta_k}[(\delta_{k}- \omega_k) \bm{\phi}_k - \gamma\bm{\phi}'_{k}(\bm{\phi}^{\top}_k \bm{u}_k)], +\end{equation*} +\begin{equation*} + \bm{y}(k)=\frac{\zeta_k}{\beta_k}[\delta_{k}-\omega_k - \bm{\phi}^{\top}_k \bm{u}_k]\bm{\phi}_k. +\end{equation*} + +Recursion (11) can also be rewritten as +\begin{equation*} + \bm{\theta}_{k+1}\leftarrow \bm{\theta}_k+\beta_k z(k), +\end{equation*} +where +\begin{equation*} + z(k)=\frac{\alpha_k}{\beta_k}[(\delta_{k}- \omega_k) \bm{\phi}_k - \gamma\bm{\phi}'_{k}(\bm{\phi}^{\top}_k \bm{u}_k)], +\end{equation*} + +Due to the settings of step-size schedule +$\alpha_k = o(\zeta_k)$, $\zeta_k = o(\beta_k)$, $\bm{x}(k)\rightarrow 0$, $\bm{y}(k)\rightarrow 0$, $z(k)\rightarrow 0$ almost surely as $k\rightarrow 0$. +That is that the increments in iteration (13) are uniformly larger than +those in (12) and the increments in iteration (12) are uniformly larger than +those in (11), thus (13) is the fastest recursion, (12) is the second fast recursion and (11) is the slower recursion. +Along the fastest time scale, iterations of (11), (12) and (13) +are associated to ODEs system as follows: +\begin{equation} + \dot{\bm{\theta}}(t) = 0, + \label{thetavmtdcFastest} +\end{equation} +\begin{equation} + \dot{\bm{u}}(t) = 0, + \label{uvmtdcFastest} +\end{equation} +\begin{equation} + \dot{\omega}(t)=\mathbb{E}[\delta_t|\bm{u}(t),\bm{\theta}(t)]-\omega(t). + \label{omegavmtdcFastest} +\end{equation} + +Based on the ODE (\ref{thetavmtdcFastest}) and (\ref{uvmtdcFastest}), both $\bm{\theta}(t)\equiv \bm{\theta}$ +and $\bm{u}(t)\equiv \bm{u}$ when viewed from the fastest timescale. +By the Hirsch lemma \cite{hirsch1989convergent}, it follows that +$||\bm{\theta}_k-\bm{\theta}||\rightarrow 0$ a.s. as $k\rightarrow \infty$ for some +$\bm{\theta}$ that depends on the initial condition $\bm{\theta}_0$ of recursion +(11) and $||\bm{u}_k-\bm{u}||\rightarrow 0$ a.s. as $k\rightarrow \infty$ for some +$u$ that depends on the initial condition $u_0$ of recursion +(12). Thus, the ODE pair (\ref{thetavmtdcFastest})-(ref{omegavmtdcFastest}) +can be written as +\begin{equation} + \dot{\omega}(t)=\mathbb{E}[\delta_t|\bm{u},\bm{\theta}]-\omega(t). + \label{omegavmtdcFastestFinal} +\end{equation} + +Consider the function $h(\omega)=\mathbb{E}[\delta|\bm{\theta},\bm{u}]-\omega$, +i.e., the driving vector field of the ODE (\ref{omegavmtdcFastestFinal}). +It is easy to find that the function $h$ is Lipschitz with coefficient +$-1$. +Let $h_{\infty}(\cdot)$ be the function defined by + $h_{\infty}(\omega)=\lim_{r\rightarrow \infty}\frac{h(r\omega)}{r}$. + Then $h_{\infty}(\omega)= -\omega$, is well-defined. + For (\ref{omegavmtdcFastestFinal}), $\omega^*=\mathbb{E}[\delta|\bm{\theta},\bm{u}]$ +is the unique globally asymptotically stable equilibrium. +For the ODE +\begin{equation} + \dot{\omega}(t) = h_{\infty}(\omega(t))= -\omega(t), + \label{omegavmtdcInfty} +\end{equation} +apply $\vec{V}(\omega)=(-\omega)^{\top}(-\omega)/2$ as its +associated strict Liapunov function. Then, +the origin of (\ref{omegavmtdcInfty}) is a globally asymptotically stable +equilibrium. + +Consider now the recursion (13). +Let +$M_{k+1}=(\delta_k-\omega_k) +-\mathbb{E}[(\delta_k-\omega_k)|\mathcal{F}(k)]$, +where $\mathcal{F}(k)=\sigma(\omega_l,\bm{u}_l,\bm{\theta}_l,l\leq k;\bm{\phi}_s,\bm{\phi}_s',r_s,s0$, $\forall k\geq0$, +\begin{equation*} +\mathbb{E}[||M_{k+1}||^2|\mathcal{F}(k)]\leq +c_1(1+||\omega_k||^2+||\bm{u}_k||^2+||\bm{\theta}_k||^2). +\end{equation*} + + +Now Assumptions (A1) and (A2) of \cite{borkar2000ode} are verified. +Furthermore, Assumptions (TS) of \cite{borkar2000ode} is satisfied by our +conditions on the step-size sequences $\alpha_k$,$\zeta_k$, $\beta_k$. Thus, +by Theorem 2.2 of \cite{borkar2000ode} we obtain that +$||\omega_k-\omega^*||\rightarrow 0$ almost surely as $k\rightarrow \infty$. + +Consider now the second time scale recursion (12). +Based on the above analysis, (12) can be rewritten as +% \begin{equation*} +% \bm{u}_{k+1}\leftarrow u_{k}+\zeta_{k}[\delta_{k}-\mathbb{E}[\delta_k|\bm{u}_k,\bm{\theta}_k] - \bm{\phi}^{\top} (s_k) \bm{u}_k]\bm{\phi}(s_k). +% \end{equation*} +\begin{equation} + \dot{\bm{\theta}}(t) = 0, + \label{thetavmtdcFaster} +\end{equation} +\begin{equation} + \dot{u}(t) = \mathbb{E}[(\delta_t-\mathbb{E}[\delta_t|\bm{u}(t),\bm{\theta}(t)])\bm{\phi}_t|\bm{\theta}(t)] - \textbf{C}\bm{u}(t). + \label{uvmtdcFaster} +\end{equation} +The ODE (\ref{thetavmtdcFaster}) suggests that $\bm{\theta}(t)\equiv \bm{\theta}$ (i.e., a time invariant parameter) +when viewed from the second fast timescale. +By the Hirsch lemma \cite{hirsch1989convergent}, it follows that +$||\bm{\theta}_k-\bm{\theta}||\rightarrow 0$ a.s. as $k\rightarrow \infty$ for some +$\bm{\theta}$ that depends on the initial condition $\bm{\theta}_0$ of recursion +(11). + +Consider now the recursion (12). +Let +$N_{k+1}=((\delta_k-\mathbb{E}[\delta_k]) - \bm{\phi}_k \bm{\phi}^{\top}_k \bm{u}_k) -\mathbb{E}[((\delta_k-\mathbb{E}[\delta_k]) - \bm{\phi}_k \bm{\phi}^{\top}_k \bm{u}_k)|\mathcal{I} (k)]$, +where $\mathcal{I}(k)=\sigma(\bm{u}_l,\bm{\theta}_l,l\leq k;\bm{\phi}_s,\bm{\phi}_s',r_s,s0$, $\forall k\geq0$, +\begin{equation*} +\mathbb{E}[||N_{k+1}||^2|\mathcal{I}(k)]\leq +c_2(1+||\bm{u}_k||^2+||\bm{\theta}_k||^2). +\end{equation*} + +Because $\bm{\theta}(t)\equiv \bm{\theta}$ from (\ref{thetavmtdcFaster}), the ODE pair (\ref{thetavmtdcFaster})-(\ref{uvmtdcFaster}) +can be written as +\begin{equation} + \dot{\bm{u}}(t) = \mathbb{E}[(\delta_t-\mathbb{E}[\delta_t|\bm{\theta}])\bm{\phi}_t|\bm{\theta}] - \textbf{C}\bm{u}(t). + \label{uvmtdcFasterFinal} +\end{equation} +Now consider the function $h(\bm{u})=\mathbb{E}[\delta_t-\mathbb{E}[\delta_t|\bm{\theta}]|\bm{\theta}] -\textbf{C}\bm{u}$, i.e., the +driving vector field of the ODE (\ref{uvmtdcFasterFinal}). For (\ref{uvmtdcFasterFinal}), +$\bm{u}^* = \textbf{C}^{-1}\mathbb{E}[(\delta-\mathbb{E}[\delta|\bm{\theta}])\bm{\phi}|\bm{\theta}]$ is the unique globally asymptotically +stable equilibrium. Let $h_{\infty}(\bm{u})=-\textbf{C}\bm{u}$. +For the ODE +\begin{equation} + \dot{\bm{u}}(t) = h_{\infty}(\bm{u}(t))= -\textbf{C}\bm{u}(t), + \label{uvmtdcInfty} +\end{equation} +the origin of (\ref{uvmtdcInfty}) is a globally asymptotically stable +equilibrium because $\textbf{C}$ is a positive definite matrix (because it is nonnegative definite and nonsingular). +Now Assumptions (A1) and (A2) of \cite{borkar2000ode} are verified. +Furthermore, Assumptions (TS) of \cite{borkar2000ode} is satisfied by our +conditions on the step-size sequences $\alpha_k$,$\zeta_k$, $\beta_k$. Thus, +by Theorem 2.2 of \cite{borkar2000ode} we obtain that +$||\bm{u}_k-\bm{u}^*||\rightarrow 0$ almost surely as $k\rightarrow \infty$. + +Consider now the slower timescale recursion (11). In the light of the above, +(11) can be rewritten as +\begin{equation} + \bm{\theta}_{k+1} \leftarrow \bm{\theta}_{k} + \alpha_k (\delta_k -\mathbb{E}[\delta_k|\bm{\theta}_k]) \bm{\phi}_k\\ + - \alpha_k \gamma\bm{\phi}'_{k}(\bm{\phi}^{\top}_k \textbf{C}^{-1}\mathbb{E}[(\delta_k -\mathbb{E}[\delta_k|\bm{\theta}_k])\bm{\phi}|\bm{\theta}_k]). +\end{equation} +Let $\mathcal{G}(k)=\sigma(\bm{\theta}_l,l\leq k;\bm{\phi}_s,\bm{\phi}_s',r_s,s0$, $\forall k\geq0$, + \begin{equation*} + \mathbb{E}[||M_{k+1}||^2|\mathcal{F}(k)]\leq + c_1(1+||\omega_k||^2+||\bm{\theta}_k||^2). + \end{equation*} + + + Now Assumptions (A1) and (A2) of \cite{borkar2000ode} are verified. + Furthermore, Assumptions (TS) of \cite{borkar2000ode} is satisfied by our + conditions on the step-size sequences $\alpha_k$, $\beta_k$. Thus, + by Theorem 2.2 of \cite{borkar2000ode} we obtain that + $||\omega_k-\omega^*||\rightarrow 0$ almost surely as $k\rightarrow \infty$. + + Consider now the slower time scale recursion (19). + Based on the above analysis, (19) can be rewritten as + % \begin{equation*} + % \bm{\theta}_{k+1}\leftarrow + % \bm{\theta}_{k}+\alpha_k(F_k\rho_k\delta_k-\mathbb{E}_{\mu}[F_k\rho_k\delta_k|\bm{\theta}_k])\bm{\phi}_k. + % \end{equation*} + + \begin{equation*} + \begin{split} + \bm{\theta}_{k+1}&\leftarrow \bm{\theta}_k+\alpha_k (F_k \rho_k\delta_k - \omega_k)\bm{\phi}_k -\alpha_k \omega_{k+1}\bm{\phi}_k\\ + &=\bm{\theta}_{k}+\alpha_k(F_k\rho_k\delta_k-\mathbb{E}_{\mu}[F_k\rho_k\delta_k|\bm{\theta}_k])\bm{\phi}_k\\ + &=\bm{\theta}_k+\alpha_k F_k \rho_k (R_{k+1}+\gamma \bm{\theta}_k^{\top}\bm{\phi}_{k+1}-\bm{\theta}_k^{\top}\bm{\phi}_k)\bm{\phi}_k -\alpha_k \mathbb{E}_{\mu}[F_k \rho_k \delta_k]\bm{\phi}_k\\ + &= \bm{\theta}_k+\alpha_k \{\underbrace{(F_k\rho_kR_{k+1}-\mathbb{E}_{\mu}[F_k\rho_k R_{k+1}])\bm{\phi}_k}_{\bm{b}_{\text{VMETD},k}} + -\underbrace{(F_k\rho_k\bm{\phi}_k(\bm{\phi}_k-\gamma\bm{\phi}_{k+1})^{\top}-\bm{\phi}_k\mathbb{E}_{\mu}[F_k\rho_k (\bm{\phi}_k-\gamma\bm{\phi}_{k+1})]^{\top})}_{\textbf{A}_{\text{VMETD},k}}\bm{\theta}_k\} + \end{split} + \end{equation*} + + Let $\mathcal{G}(k)=\sigma(\bm{\theta}_l,l\leq k;\bm{\phi}_s,\bm{\phi}_s',r_s,s0$, $\forall k\geq0$, + \begin{equation*} + \mathbb{E}[||Z_{k+1}||^2|\mathcal{G}(k)]\leq + c_2(1+||\bm{\theta}_k||^2). + \end{equation*} + + Consider now the following ODE associated with (19): + \begin{equation} + \begin{array}{ccl} + \dot{\bm{\theta}}(t)&=&-\textbf{A}_{\text{VMETD}}\bm{\theta}(t)+\bm{b}_{\text{VMETD}}. + \end{array} + \label{odetheta} + \end{equation} + \begin{equation} + \begin{split} + \textbf{A}_{\text{VMETD}}&=\lim_{k \rightarrow \infty} \mathbb{E}[\textbf{A}_{\text{VMETD},k}]\\ + &= \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[F_k \rho_k \bm{\phi}_k (\bm{\phi}_k - \gamma \bm{\phi}_{k+1})^{\top}]- \lim_{k\rightarrow \infty} \mathbb{E}_{\mu}[ \bm{\phi}_k]\mathbb{E}_{\mu}[F_k \rho_k (\bm{\phi}_k - \gamma \bm{\phi}_{k+1})]^{\top}\\ + % &= \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[\underbrace{\bm{\phi}_k}_{X}\underbrace{F_k \rho_k (\bm{\phi}_k - \gamma \bm{\phi}_{k+1})^{\top}}_{Y}]- \lim_{k\rightarrow \infty} \mathbb{E}_{\mu}[ \bm{\phi}_k]\mathbb{E}_{\mu}[F_k \rho_k (\bm{\phi}_k - \gamma \bm{\phi}_{k+1})]^{\top}\\ + &= \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[\bm{\phi}_kF_k \rho_k (\bm{\phi}_k - \gamma \bm{\phi}_{k+1})^{\top}]- \lim_{k\rightarrow \infty} \mathbb{E}_{\mu}[ \bm{\phi}_k]\mathbb{E}_{\mu}[F_k \rho_k (\bm{\phi}_k - \gamma \bm{\phi}_{k+1})]^{\top}\\ + &= \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[\bm{\phi}_kF_k \rho_k (\bm{\phi}_k - \gamma \bm{\phi}_{k+1})^{\top}]- \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[ \bm{\phi}_k]\lim_{k \rightarrow \infty}\mathbb{E}_{\mu}[F_k \rho_k (\bm{\phi}_k - \gamma \bm{\phi}_{k+1})]^{\top}\\ + &=\sum_{s} f(s) \bm{\phi}(s)(\bm{\phi}(s) - \gamma \sum_{s'}[\textbf{P}_{\pi}]_{ss'}\bm{\phi}(s'))^{\top} - \sum_{s} d_{\mu}(s) \bm{\phi}(s) * \sum_{s} f(s)(\bm{\phi}(s) - \gamma \sum_{s'}[\textbf{P}_{\pi}]_{ss'}\bm{\phi}(s'))^{\top} \\ + &={\bm{\Phi}}^{\top} \textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi}) \bm{\Phi} - {\bm{\Phi}}^{\top} \textbf{d}_{\mu} \textbf{f}^{\top} (\textbf{I} - \gamma \textbf{P}_{\mu}) \bm{\Phi} \\ + &={\bm{\Phi}}^{\top} (\textbf{F} - \textbf{d}_{\mu} \textbf{f}^{\top}) (\textbf{I} - \gamma \textbf{P}_{\pi}){\bm{\Phi}} \\ + &={\bm{\Phi}}^{\top} (\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi})-\textbf{d}_{\mu} \textbf{f}^{\top} (\textbf{I} - \gamma \textbf{P}_{\pi})){\bm{\Phi}} \\ + &={\bm{\Phi}}^{\top} (\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi})-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} ){\bm{\Phi}} \\ + \end{split} + \end{equation} + \begin{equation} + \begin{split} + \bm{b}_{\text{VMETD}}&=\lim_{k \rightarrow \infty} \mathbb{E}[\bm{b}_{\text{VMETD},k}]\\ + &= \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[F_k\rho_kR_{k+1}\bm{\phi}_k]- \lim_{k\rightarrow \infty} \mathbb{E}_{\mu}[\bm{\phi}_k]\mathbb{E}_{\mu}[F_k\rho_kR_{k+1}]\\ + &= \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[\bm{\phi}_kF_k\rho_kR_{k+1}]- \lim_{k\rightarrow \infty} \mathbb{E}_{\mu}[ \bm{\phi}_k]\mathbb{E}_{\mu}[\bm{\phi}_k]\mathbb{E}_{\mu}[F_k\rho_kR_{k+1}]\\ + &= \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[\bm{\phi}_kF_k\rho_kR_{k+1}]- \lim_{k \rightarrow \infty} \mathbb{E}_{\mu}[ \bm{\phi}_k]\lim_{k \rightarrow \infty}\mathbb{E}_{\mu}[F_k\rho_kR_{k+1}]\\ + &=\sum_{s} f(s) \bm{\phi}(s)r_{\pi} - \sum_{s} d_{\mu}(s) \bm{\phi}(s) * \sum_{s} f(s)r_{\pi} \\ + &=\bm{\bm{\Phi}}^{\top}(\textbf{F}-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top})\textbf{r}_{\pi} \\ + \end{split} + \end{equation} + Let $\vec{h}(\bm{\theta}(t))$ be the driving vector field of the ODE + (\ref{odetheta}). + \begin{equation*} + \vec{h}(\bm{\theta}(t))=-\textbf{A}_{\text{VMETD}}\bm{\theta}(t)+\bm{b}_{\text{VMETD}}. + \end{equation*} + + An $\bm{\Phi}^{\top}\bm{\text{X}}\bm{\Phi}$ matrix of this + form will be positive definite whenever the matrix $\bm{\text{X}}$ is positive definite. + Any matrix $\bm{\text{X}}$ is positive definite if and only if + the symmetric matrix $\bm{\text{S}}=\bm{\text{X}}+\bm{\text{X}}^{\top}$ is positive definite. + Any symmetric real matrix $\bm{\text{S}}$ is positive definite if the absolute values of + its diagonal entries are greater than the sum of the absolute values of the corresponding + off-diagonal entries\cite{sutton2016emphatic}. + + \begin{equation} + \label{rowsum} + \begin{split} + (\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi})-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} )\textbf{1} + &=\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi})\textbf{1}-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} \textbf{1}\\ + &=\textbf{F}(\textbf{1}-\gamma \textbf{P}_{\pi} \textbf{1})-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} \textbf{1}\\ + &=(1-\gamma)\textbf{F}\textbf{1}-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} \textbf{1}\\ + &=(1-\gamma)\textbf{f}-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} \textbf{1}\\ + &=(1-\gamma)\textbf{f}-\textbf{d}_{\mu} \\ + &=(1-\gamma)(\textbf{I}-\gamma\textbf{P}_{\pi}^{\top})^{-1}\textbf{d}_{\mu}-\textbf{d}_{\mu} \\ + &=(1-\gamma)[(\textbf{I}-\gamma\textbf{P}_{\pi}^{\top})^{-1}-\textbf{I}]\textbf{d}_{\mu} \\ + &=(1-\gamma)[\sum_{t=0}^{\infty}(\gamma\textbf{P}_{\pi}^{\top})^{t}-\textbf{I}]\textbf{d}_{\mu} \\ + &=(1-\gamma)[\sum_{t=1}^{\infty}(\gamma\textbf{P}_{\pi}^{\top})^{t}]\textbf{d}_{\mu} > 0 \\ + \end{split} + \end{equation} + \begin{equation} + \label{columnsum} + \begin{split} + \textbf{1}^{\top}(\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi})-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} ) + &=\textbf{1}^{\top}\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi})-\textbf{1}^{\top}\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} \\ + &=\textbf{d}_{\mu}^{\top}-\textbf{1}^{\top}\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} \\ + &=\textbf{d}_{\mu}^{\top}- \textbf{d}_{\mu}^{\top} \\ + &=0 + \end{split} + \end{equation} + (\ref{rowsum}) and (\ref{columnsum}) show that the matrix $\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi})-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top}$ of + diagonal entries are positive and its off-diagonal entries are negative. So its each row sum plus the corresponding column sum is positive. + So $\textbf{A}_{\text{VMETD}}$ is positive definite. + + + + Therefore, + $\bm{\theta}^*=\textbf{A}_{\text{VMETD}}^{-1}\bm{b}_{\text{VMETD}}$ can be seen to be the unique globally asymptotically + stable equilibrium for ODE (\ref{odetheta}). + Let $\vec{h}_{\infty}(\bm{\theta})=\lim_{r\rightarrow + \infty}\frac{\vec{h}(r\bm{\theta})}{r}$. Then + $\vec{h}_{\infty}(\bm{\theta})=-\textbf{A}_{\text{VMETD}}\bm{\theta}$ is well-defined. + Consider now the ODE + \begin{equation} + \dot{\bm{\theta}}(t)=-\textbf{A}_{\text{VMETD}}\bm{\theta}(t). + \label{odethetafinal} + \end{equation} + The ODE (\ref{odethetafinal}) has the origin as its unique globally asymptotically stable equilibrium. + Thus, the assumption (A1) and (A2) are verified. + \end{proof} + + +\section{Mathematical Analysis} +\label{mathematicalanalysis} +Table \ref{keymatrices} shows the key matrices of various algorithms. +Table \ref{minimumeigenvalues} shows minimum eigenvalues $\frac{1}{2}\lambda_{\min}(\textbf{A} + \textbf{A}^\top)$ of various algorithms on several examples. +\begin{table}[htb] + \centering + \caption{Key matrices of various algorithms.} + \label{keymatrices} + { + \begin{tabular}{cccc} + \toprule + Algorithm&Key matrix $\textbf{A}$&{Positive definite}&{$\bm{b}$}\\\midrule + On-policy TD&$\bm{\Phi}^{\top}\textbf{D}_{\pi}(\textbf{I}-\gamma + \textbf{P}_{\pi})\bm{\Phi}$&$\checkmark$&$\bm{b}_{\text{on}}=\bm{\Phi}^{\top}\textbf{D}_{\pi}\textbf{r}_{\pi}$\\ + On-policy VMTD&${\bm{\Phi}}^{\top}(\textbf{D}_{\pi}-\textbf{d}_{\pi} \textbf{d}_{\pi}^{\top} )(\textbf{I} - \gamma\textbf{P}_{\pi}){\bm{\Phi}}$ + &$\checkmark$&$\bm{\Phi}^{\top}(\textbf{D}_{\pi}-\textbf{d}_{\pi} \textbf{d}_{\pi}^{\top})\textbf{r}_{\pi}$\\ + \midrule + Off-policy TD&$\textbf{A}_{\text{off}}={\bm{\Phi}}^{\top}\textbf{D}_{\mu}(\textbf{I}-\gamma + \textbf{P}_{\pi}){\bm{\Phi}}$&$\times$&$\bm{b}_{\text{off}}=\bm{\Phi}^{\top}\textbf{D}_{\mu}\textbf{r}_{\pi}$\\ + TDC& $\textbf{A}_{\text{off}}^{\top}\textbf{C}^{-1}\textbf{A}_{\text{off}}$&$\checkmark$&$\textbf{A}_{\text{off}}^{\top}\textbf{C}^{-1}\bm{b}_{\text{off}}$ + \\ + ETD& ${\bm{\Phi}}^{\top}\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi}){\bm{\Phi}}$ + &$\checkmark$&$\bm{\Phi}^{\top}\textbf{F}\textbf{r}_{\pi}$\\ + \midrule + Off-policy VMTD&$\textbf{A}_{\text{VM}}={\bm{\Phi}}^{\top} (\textbf{D}_{\mu}-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} )(\textbf{I} - \gamma\textbf{P}_{\pi}){\bm{\Phi}}$ + &$\times$&$\bm{b}_{\text{VM}}=\bm{\Phi}^{\top}(\textbf{D}_{\mu}-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top})\textbf{r}_{\pi}$\\ + VMTDC& $\textbf{A}_{\text{VM}}^{\top}\textbf{C}^{-1}\textbf{A}_{\text{VM}}$&$\checkmark$&$\textbf{A}_{\text{VM}}^{\top}\textbf{C}^{-1}\bm{b}_{\text{VM}}$ + \\ + VMETD& ${\bm{\Phi}}^{\top} (\textbf{F} (\textbf{I} - \gamma \textbf{P}_{\pi})-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top} ){\bm{\Phi}}$ + &$\checkmark$&$\bm{\Phi}^{\top}(\textbf{F}-\textbf{d}_{\mu} \textbf{d}_{\mu}^{\top})\textbf{r}_{\pi}$\\ + \bottomrule + \end{tabular} + } +\end{table} +\begin{table}[htb] + \centering + \caption{Minimum eigenvalues $\frac{1}{2}\lambda_{\min}(\textbf{A} + \textbf{A}^\top)$ of various algorithms on several examples.} + \label{minimumeigenvalues} + { + \begin{tabular}{ccccccc} + \toprule + \multirow{2}{*}{Algorithm}&\multirow{2}{*}{2-state} & \multirow{2}{*}{Baird's}&\multicolumn{3}{c}{Random Walk}&\multirow{2}{*}{Boyan Chain} + \\\cmidrule{4-6} %\cline{3-4} + &&&Tabular & Inverted & Dependent& \\ + \midrule + TD& $-0.2$ & $-0.791$&$0.018$&$0.017$&$0.07$&$0.024$\\ + VMTD & & & & & & \\ + \midrule + TDC&$0.016$ & $-0.002$&$0.002$&$0.007$&$0.011$&$0.002$\\ + VMTDC & & & & & & \\ + \midrule + ETD&$\textbf{3.4}$& $-2.82e-16$&$\textbf{0.195}$&$\textbf{0.165}$&$\textbf{0.76}$&$\textbf{0.245}$\\ + VMETD & & & & & & \\ + \bottomrule + \end{tabular} + } +\end{table} +\begin{table}[htb] + \label{bairdcounterexample} + \centering + \caption{Baird's Counterexample.} + { + \begin{tabular}{ccc} + \toprule + \multirow{2}{*}{Algorithm}&\multicolumn{2}{c}{Baird's Counterexample} + \\\cmidrule{2-3} + &Two State & Seven State \\ + \midrule + TD& [-0.2]& [-7.91e-01, 1.07e-16, 5.71e-01, 5.71e-01, 5.71e-01, 5.71e-01, 5.71e-01, 7.48e-01]\\ + VMTD & [0.25]& [-2.25e-01, -3.45e-17, 5.711e-01, 5.71e-01, 5.71e-01, 5.71e-01, 5.71e-01, 2.87e+00] \\ + \midrule + TDC&[0.016]&[-0.0024, 0.0078, 0.5714, 0.5714, 0.5714, 0.5714, 0.5729, 1.7682]\\ + VMTDC &[0.025] &[-1.55e-03, -3.15e-04, 3.56e-01, 5.47e-01, 5.70e-01, 5.72e-01, 5.83e-01, 5.98e-01] \\ + \midrule + ETD& [3.4]& [-2.82e-16, 5.71e-01, 5.71e-01, 5.71e-01, 5.71e-01, 5.71e-01, 7.40e-01, 3.72e+00]\\ + VMETD &[1.15] &[-2.25e-01, -3.45e-17, 5.71e-01, 5.71e-01, 5.71e-01, 5.71e-01, 5.71e-01, 2.86e+00] \\ + \bottomrule + \end{tabular} + } +\end{table} +\begin{table}[htb] + \label{randomwalk} + \centering + \caption{Random Walk.} + { + \begin{tabular}{cccc} + \toprule + \multirow{2}{*}{Algorithm}&\multicolumn{3}{c}{Random Walk} + \\\cmidrule{2-4} + &Tabular & Inverted & Dependent \\ + \midrule + TD&[0.018,0.074,0.183,0.260,0.464] &[0.017,0.044,0.065,0.083,0.116] &[0.070,0.113,0.138]\\ + VMTD & [1.62e-17,0.073,0.180,0.260,0.463]& [-2.07e-17,0.018,0.045,0.065,0.115] &[0.022,0.115,0.116]\\ + \midrule + TDC&[0.002,0.046,0.240,0.364,0.769]&[0.007,0.012,0.060,0.091,0.192]&[0.011,0.065,0.182]\\ + VMTDC & [6.74e-17,0.044,0.240,0.364,0.769]& [-7.248e-18,0.011,0.060,0.091,0.192] & [0.0008,0.062,0.182]\\ + \midrule + ETD&[0.195,0.669,1.712,2.765,4.660] & [0.165,0.420,0.678,0.820,1.168]&[0.760,1.084,1.394]\\ + VMETD &[-3.40e-04,0.664,1.69,2.76,4.65] & [-0.001,0.167,0.423,0.689,1.163]&[0.221,1.043,1.293] \\ + \bottomrule + \end{tabular} + } +\end{table} +\begin{table}[htb] + \label{boyanchain} + \centering + \caption{Boyan Chain.} + { + \begin{tabular}{cc} + \toprule + Algorithm&eigenvalues\\ + \midrule + TD&[0.024495,0.054,0.065,0.135]\\ + VMTD&[2.70e-18,0.053,0.065,0.135]\\ + \midrule + TDC&[0.002,0.058,0.067,0.153] \\ + VMTDC& [-1.40e-18,0.057,0.067,0.153]\\ + \midrule + ETD& [0.245,0.540,0.647,1.352]\\ + VMETD&[1.57e-17,0.529,0.647,1.352] \\ + \bottomrule + \end{tabular} + } +\end{table} + +\section{Experimental details} +\label{experimentaldetails} +The feature matrices corresponding to three random walks are shown below respectively: +\begin{equation*} + \Phi_{tabular}=\left[ + \begin{array}{ccccc} + 1 & 0& 0& 0& 0\\ + 0 & 1& 0& 0& 0\\ + 0 & 0& 1& 0& 0\\ + 0 & 0& 0& 1& 0\\ + 0 & 0& 0& 0& 1 + \end{array}\right] + \end{equation*} + \begin{equation*} + \Phi_{inverted}=\left[ + \begin{array}{ccccc} + 0 & \frac{1}{2}& \frac{1}{2}& \frac{1}{2}& \frac{1}{2}\\ + \frac{1}{2} & 0& \frac{1}{2}& \frac{1}{2}& \frac{1}{2}\\ + \frac{1}{2} & \frac{1}{2}& 0& \frac{1}{2}& \frac{1}{2}\\ + \frac{1}{2} & \frac{1}{2}& \frac{1}{2}& 0& \frac{1}{2}\\ + \frac{1}{2} & \frac{1}{2}& \frac{1}{2}& \frac{1}{2}& 0 + \end{array}\right] + \end{equation*} + \begin{equation*} + \Phi_{dependent}=\left[ + \begin{array}{ccccc} + 1 & 0& 0\\ + \frac{1}{\sqrt{2}} & \frac{1}{\sqrt{2}}& 0\\ + \frac{1}{\sqrt{3}} & \frac{1}{\sqrt{3}}& \frac{1}{\sqrt{3}}\\ + 0 & \frac{1}{\sqrt{2}}& \frac{1}{\sqrt{2}}\\ + 0 & 0& 1 + \end{array}\right] + \end{equation*} + +Three random walk experiments: the $\alpha$ values for +all algorithms are in the range of $\{0.008, 0.015, 0.03, 0.06, 0.12, 0.25, 0.5\}$. For the TDC algorithm, +the range of the ratio $\frac{\zeta}{\alpha}$ is $\{\frac{1}{512}, \frac{1}{256}, \frac{1}{128}, \frac{1}{64}, \frac{1}{32}, \frac{1}{16}, \frac{1}{8}, \frac{1}{4}, \frac{1}{2}, 1, 2\}$. For the VMTD algorithm, +the range of the ratio $\frac{\beta}{\alpha}$ is $\{\frac{1}{512}, \frac{1}{256}, \frac{1}{128}, \frac{1}{64}, \frac{1}{32}, \frac{1}{16}, \frac{1}{8}, \frac{1}{4}, \frac{1}{2}, 1, 2\}$. It can be observed from +the update formula of VMTDC that when $\zeta$ takes a very small value, +the VMTDC update tends to be similar to VMTD update. Similarly, +when $\beta$ takes a very small value, the VMTDC update tends to be +similar to TDC update. Through experiments, it was found that +setting $\zeta$ to a small value makes VMTDC updates approach VMTD +updates, resulting in better performance. Therefore, for the VMTDC +algorithm, the range of $\frac{\beta}{\alpha}$ ratio is $\{\frac{1}{512}, \frac{1}{256}, \frac{1}{128}, \frac{1}{64}, \frac{1}{32}, \frac{1}{16}, \frac{1}{8}, \frac{1}{4}, \frac{1}{2}, 1, 2\}$, and the range of +$\zeta$ is $\{0.1, 0.01, 0.001, 0.0001, 0.00001\}$. The learning curves in Figure \ref{Evaluation_full} correspond to the optimal +parameters. + +The feature matrix of 7-state version of Baird's off-policy counterexample is +defined as follow: +\begin{equation*} +\Phi_{Counter}=\left[ +\begin{array}{cccccccc} +1 & 2& 0& 0& 0& 0& 0& 0\\ +1 & 0& 2& 0& 0& 0& 0& 0\\ +1 & 0& 0& 2& 0& 0& 0& 0\\ +1 & 0& 0& 0& 2& 0& 0& 0\\ +1 & 0& 0& 0& 0& 2& 0& 0\\ +1 & 0& 0& 0& 0& 0& 2& 0\\ +2 & 0& 0& 0& 0& 0& 0& 1 +\end{array}\right] +\end{equation*} + +7-state version of Baird's off-policy counterexample: +for TD algorithm, $\alpha$ is set to 0.1. For the TDC algorithm, the range of +$\alpha$ is $\{0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0\}$, +and the range of +$\zeta$ is $\{0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5\}$. +For the VMTD algorithm, the range of +$\alpha$ is $\{0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0\}$, +and the range of +$\beta$ is $\{0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5\}$. Through experiments, it was found +that setting $\zeta$ to a small value makes VMTDC updates approach VMTD +updates, resulting in better performance. Therefore, for the VMTDC +algorithm, The range of values for $\alpha$ and $\beta$ is the same as that of VMTD +and the range of $\zeta$ +is $\{0.1, 0.01, 0.001, 0.0001, 0.00001\}$. +The learning curves in Figure \ref{Complete_full} correspond to the optimal parameters. +For all policy evaluation experiments, each experiment +is independently run 100 times. + +For the four control experiments: The learning rates for each +algorithm in all experiments are shown in Table \ref{lrofways}. +For all control experiments, each experiment is independently run 50 times. + +\begin{table*}[htb] + \centering + \caption{Learning rates ($lr$) of four control experiments.} + \vskip 0.15in + \begin{tabular}{c|ccccc} + \hline + \multicolumn{1}{c|}{\diagbox{algorithms($lr$)}{envs}} &Maze &Cliff walking &Mountain Car &Acrobot \\ + \hline + Sarsa($\alpha$)&$0.1$ &$0.1$ &$0.1$ &$0.1$ \\ + GQ(0)($\alpha,\zeta$)&$0.1,0.003$ &$0.1,0.004$ &$0.1,0.01$ &$0.1,0.01$ \\ + VMSarsa($\alpha,\beta$)&$0.1,0.001$ &$0.1,\text{1e-4}$ &$0.1,\text{1e-4}$ &$0.1,\text{1e-4}$ \\ + VMGQ(0)($\alpha,\zeta,\beta$)&$0.1,0.001,0.001$ &$0.1,0.005,\text{1e-4}$ &$0.1,\text{5e-4},\text{1e-4}$ &$0.1,\text{5e-4},\text{1e-4}$ \\ + AC($lr_{\text{actor}},lr_{\text{critic}}$)&$0.01,0.1$ &$0.01,0.01$ &$0.01,0.05$ &$0.01,0.05$ \\ + Q-learning($\alpha$)&$0.1$ &$0.1$ &$0.1$ &$0.1$ \\ + VMQ($\alpha,\beta$)&$0.1,0.001$ &$0.1,\text{1e-4}$ &$0.1,\text{1e-4}$ &$0.1,\text{1e-4}$ \\ + \hline + \end{tabular} + \label{lrofways} + \vskip -0.1in +\end{table*} + +\bibliography{aaai24} + +\end{document} diff --git b/Apendix/figure1.pdf a/Apendix/figure1.pdf new file mode 100644 index 0000000..76b3c1f Binary files /dev/null and a/Apendix/figure1.pdf differ diff --git b/Apendix/figure2.pdf a/Apendix/figure2.pdf new file mode 100644 index 0000000..8353d7c Binary files /dev/null and a/Apendix/figure2.pdf differ diff --git b/Apendix/pic/2StateExample.pdf a/Apendix/pic/2StateExample.pdf new file mode 100644 index 0000000..a2520f9 Binary files /dev/null and a/Apendix/pic/2StateExample.pdf differ diff --git b/Apendix/pic/Acrobot_complete.pdf a/Apendix/pic/Acrobot_complete.pdf new file mode 100644 index 0000000..7b8cdd3 Binary files /dev/null and a/Apendix/pic/Acrobot_complete.pdf differ diff --git b/Apendix/pic/maze_complete.pdf a/Apendix/pic/maze_complete.pdf new file mode 100644 index 0000000..17cfb21 Binary files /dev/null and a/Apendix/pic/maze_complete.pdf differ diff --git b/Apendix/pic/maze_key_complete.pdf a/Apendix/pic/maze_key_complete.pdf new file mode 100644 index 0000000..5e93041 Binary files /dev/null and a/Apendix/pic/maze_key_complete.pdf differ diff --git b/Apendix/pic/mt_complete.pdf a/Apendix/pic/mt_complete.pdf new file mode 100644 index 0000000..778dfb0 Binary files /dev/null and a/Apendix/pic/mt_complete.pdf differ