{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,24]],"date-time":"2025-07-24T11:43:58Z","timestamp":1753357438386,"version":"3.37.3"},"reference-count":35,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100005416","name":"Norges Forskningsr\u00e5d","doi-asserted-by":"publisher","award":["270053"],"award-info":[{"award-number":["270053"]}],"id":[{"id":"10.13039\/501100005416","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Experimental Infrastructure for Exploration of Exascale Computing"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Games"],"published-print":{"date-parts":[[2023,12]]},"DOI":"10.1109\/tg.2022.3185330","type":"journal-article","created":{"date-parts":[[2022,6,22]],"date-time":"2022-06-22T19:48:57Z","timestamp":1655927337000},"page":"580-589","source":"Crossref","is-referenced-by-count":3,"title":["Improving the Diversity of Bootstrapped DQN by Replacing Priors With Noise"],"prefix":"10.1109","volume":"15","author":[{"ORCID":"https:\/\/linproxy.fan.workers.dev:443\/https\/orcid.org\/0000-0002-8867-9104","authenticated-orcid":false,"given":"Li","family":"Meng","sequence":"first","affiliation":[{"name":"University of Oslo, Oslo, Norway"}]},{"given":"Morten","family":"Goodwin","sequence":"additional","affiliation":[{"name":"Centre for Artificial Intelligence Research, University of Agder, Kristiansand, Norway"}]},{"ORCID":"https:\/\/linproxy.fan.workers.dev:443\/https\/orcid.org\/0000-0001-7591-1659","authenticated-orcid":false,"given":"Anis","family":"Yazidi","sequence":"additional","affiliation":[{"name":"Oslo Metropolitan University, Oslo, Norway"}]},{"given":"Paal E.","family":"Engelstad","sequence":"additional","affiliation":[{"name":"University of Oslo, Oslo, Norway"}]}],"member":"263","reference":[{"key":"ref1","first-page":"29304","article-title":"Deep reinforcement learning at the edge of the statistical precipice","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Agarwal","year":"2021"},{"key":"ref2","first-page":"15084","article-title":"Decision transformer: Reinforcement learning via sequence modeling","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Chen","year":"2021"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.5220\/0010227301070118"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1613\/jair.639"},{"article-title":"Go-explore: A new approach for hard-exploration problems","year":"2019","author":"Ecoffet","key":"ref5"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-020-03157-9"},{"key":"ref7","article-title":"Diversity is all you need: Learning skills without a reward function","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Eysenbach","year":"2019"},{"key":"ref8","article-title":"Noisy networks for exploration","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Fortunato","year":"2018"},{"key":"ref9","article-title":"Efficient Bayes-adaptive reinforcement learning using sample-based search","volume-title":"Adv. Neural Inf. Process. Syst.","volume":"25","author":"Guez","year":"2012"},{"key":"ref10","first-page":"2613","article-title":"Double Q-learning","volume-title":"Advances in Neural Information Processing Systems","volume":"23","author":"Hasselt","year":"2010"},{"key":"ref11","first-page":"15663","article-title":"Using self-supervised learning can improve model robustness and uncertainty","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Hendrycks","year":"2019"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11757"},{"key":"ref13","first-page":"4565","article-title":"Generative adversarial imitation learning","volume-title":"Advances in Neural Information Processing Systems","volume":"29","author":"Ho","year":"2016"},{"issue":"4","key":"ref14","first-page":"1563","article-title":"Near-optimal regret bounds for reinforcement learning","volume":"11","author":"Jaksch","year":"2010","journal-title":"J. Mach. Learn. Res."},{"key":"ref15","article-title":"Adam: A method for stochastic optimization","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kingma","year":"2015"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/s11023-007-9079-x"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1613\/jair.5699"},{"key":"ref18","article-title":"Playing Atari with Deep Reinforcement Learning","volume-title":"Proc. NIPS Deep Learn. Workshop","author":"Mnih","year":"2013"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref20","first-page":"663","article-title":"Algorithms for inverse reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"1","author":"Ng","year":"2000"},{"key":"ref21","article-title":"Randomized prior functions for deep reinforcement learning","volume-title":"Adv. Neural Inf. Process. Syst.","volume":"31","author":"Osband","year":"2018"},{"key":"ref22","first-page":"4026","article-title":"Deep exploration via bootstrapped DQN","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"29","author":"Osband","year":"2016"},{"article-title":"Bootstrapped Thompson sampling and deep exploration","year":"2015","author":"Osband","key":"ref23"},{"issue":"124","key":"ref24","first-page":"1","article-title":"Deep exploration via randomized value functions","volume":"20","author":"Osband","year":"2019","journal-title":"J. Mach. Learn. Res."},{"key":"ref25","first-page":"2377","article-title":"Generalization and exploration via randomized value functions","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Osband","year":"2016"},{"key":"ref26","article-title":"Parameter space noise for exploration","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Plappert","year":"2018"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1561\/2200000070"},{"article-title":"Measuring intelligence through games","year":"2011","author":"Schaul","key":"ref28"},{"key":"ref29","first-page":"8583","article-title":"Planning to explore via self-supervised world models","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Sekar","year":"2020"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.2307\/2332286"},{"key":"ref31","first-page":"255","article-title":"Issues in using function approximation for reinforcement learning","volume-title":"Proc. 4th Connectionist Models Summer Sch.","author":"Thrun","year":"1993"},{"key":"ref32","first-page":"422","article-title":"Randomized value functions via multiplicative normalizing flows","volume-title":"Proc. Uncertainty Artif. Intell.","author":"Touati","year":"2020"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"ref34","first-page":"1995","article-title":"Dueling network architectures for deep reinforcement learning","volume-title":"Proc. 33rd Int. Conf. Mach. Learn.","author":"Wang","year":"2016"},{"issue":"3\/4","key":"ref35","doi-asserted-by":"crossref","first-page":"279","DOI":"10.1023\/A:1022676722315","article-title":"Q-learning","volume":"8","author":"Watkins","year":"1992","journal-title":"Mach. Learn."}],"container-title":["IEEE Transactions on Games"],"original-title":[],"link":[{"URL":"https:\/\/linproxy.fan.workers.dev:443\/http\/xplorestaging.ieee.org\/ielx7\/7782673\/10361571\/09804237.pdf?arnumber=9804237","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,12]],"date-time":"2024-01-12T01:54:31Z","timestamp":1705024471000},"score":1,"resource":{"primary":{"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/ieeexplore.ieee.org\/document\/9804237\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12]]},"references-count":35,"journal-issue":{"issue":"4"},"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/doi.org\/10.1109\/tg.2022.3185330","relation":{},"ISSN":["2475-1502","2475-1510"],"issn-type":[{"type":"print","value":"2475-1502"},{"type":"electronic","value":"2475-1510"}],"subject":[],"published":{"date-parts":[[2023,12]]}}}