{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,30]],"date-time":"2025-10-30T17:34:19Z","timestamp":1761845659309},"reference-count":29,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2023,3,20]],"date-time":"2023-03-20T00:00:00Z","timestamp":1679270400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,3,20]],"date-time":"2023-03-20T00:00:00Z","timestamp":1679270400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Process Lett"],"published-print":{"date-parts":[[2023,12]]},"DOI":"10.1007\/s11063-023-11209-0","type":"journal-article","created":{"date-parts":[[2023,3,20]],"date-time":"2023-03-20T09:03:13Z","timestamp":1679302993000},"page":"9467-9482","update-policy":"https:\/\/linproxy.fan.workers.dev:443\/http\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Application of DQN-IRL Framework in Doudizhu\u2019s Sparse Reward"],"prefix":"10.1007","volume":"55","author":[{"given":"Yan","family":"Kong","sequence":"first","affiliation":[]},{"given":"Hongyuan","family":"Shi","sequence":"additional","affiliation":[]},{"given":"Xiaocong","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Yefeng","family":"Rui","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,3,20]]},"reference":[{"issue":"7587","key":"11209_CR1","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver D, Huang A, Maddison CJ et al (2016) Mastering the game of Go with deep neural networks and tree search. Nature 529(7587):484\u2013489","journal-title":"Nature"},{"issue":"7676","key":"11209_CR2","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1038\/nature24270","volume":"550","author":"D Silver","year":"2017","unstructured":"Silver D, Schrittwieser J, Simonyan K et al (2017) Mastering the game of go without human knowledge. Nature 550(7676):354\u2013359","journal-title":"Nature"},{"key":"11209_CR3","unstructured":"Silver D, Hubert T, Schrittwieser J, et al. (2017) Mastering chess and shogi by self-play with a general reinforcement learning algorithm. arXiv preprint arXiv:1712.01815"},{"key":"11209_CR4","doi-asserted-by":"publisher","first-page":"523","DOI":"10.1613\/jair.5699","volume":"61","author":"MC Machado","year":"2018","unstructured":"Machado MC, Bellemare MG, Talvitie E et al (2018) Revisiting the arcade learning environment: evaluation protocols and open problems for general agents. J Artif Intell Res 61:523\u2013562","journal-title":"J Artif Intell Res"},{"key":"11209_CR5","unstructured":"Vinyals O, Ewalds T, Bartunov S, et al. (2017) Starcraft ii: A new challenge for reinforcement learning. arXiv preprint arXiv:1708.04782"},{"issue":"1","key":"11209_CR6","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TCIAIG.2012.2186810","volume":"4","author":"CB Browne","year":"2012","unstructured":"Browne CB, Powley E, Whitehouse D et al (2012) A survey of monte Carlo tree search methods. IEEE Trans Comput Intell Ai in Games 4(1):1\u201343","journal-title":"IEEE Trans Comput Intell Ai in Games"},{"issue":"6456","key":"11209_CR7","doi-asserted-by":"publisher","first-page":"885","DOI":"10.1126\/science.aay2400","volume":"365","author":"N Brown","year":"2019","unstructured":"Brown N, Sandholm T (2019) Superhuman AI for multiplayer poker. Science 365(6456):885\u2013890","journal-title":"Science"},{"issue":"6374","key":"11209_CR8","doi-asserted-by":"publisher","first-page":"418","DOI":"10.1126\/science.aao1733","volume":"359","author":"N Brown","year":"2018","unstructured":"Brown N, Sandholm T (2018) Superhuman AI for heads-up no-limit poker: libratus beats top professionals. Science 359(6374):418\u2013424","journal-title":"Science"},{"key":"11209_CR9","doi-asserted-by":"crossref","unstructured":"Jiang Q, Li K, Du B, et al. (2019) DeltaDou: Expert-level Doudizhu AI through Self-play. IJCAI. pp 1265\u20131271.","DOI":"10.24963\/ijcai.2019\/176"},{"key":"11209_CR10","unstructured":"You Y, Li L, Guo B, et al. (2019) Combinational Q-Learning for Dou Di Zhu[J]. arXiv preprint arXiv:1901.08925"},{"key":"11209_CR11","unstructured":"Zha D, Xie J, Ma W, et al. (2021) DouZero: Mastering DouDizhu with Self-Play Deep Reinforcement Learning. arXiv preprint arXiv:2106.06135"},{"key":"11209_CR12","doi-asserted-by":"crossref","unstructured":"Zhang X, Wang H, Stojanovic V, et al. (2021) Asynchronous Fault Detection for Interval Type-2 Fuzzy Nonhomogeneous Higher-level Markov Jump Systems with Uncertain Transition Probabilities. IEEE Trans Fuzzy Syst, pp 1\u20131","DOI":"10.1109\/TFUZZ.2021.3086224"},{"key":"11209_CR13","doi-asserted-by":"crossref","unstructured":"Zxa B, Xla B, Vs C (2021) Exponential stability of nonlinear state-dependent delayed impulsive systems with applications. Nonlinear Anal Hybrid Syst, 42","DOI":"10.1016\/j.nahs.2021.101088"},{"issue":"1\u20133","key":"11209_CR14","doi-asserted-by":"crossref","first-page":"126537","DOI":"10.1016\/j.amc.2021.126537","volume":"412","author":"X Xin","year":"2022","unstructured":"Xin X, Tu Y, Stojanovic V et al (2022) Online reinforcement learning multiplayer non-zero sum games of continuous-time Markov jump linear systems. Appl Math Comput 412(1\u20133):126537","journal-title":"Appl Math Comput"},{"key":"11209_CR15","volume-title":"Policy invariance under reward transformations Theory and application to reward shaping","author":"AY Ng","year":"1999","unstructured":"Ng AY, Harada D, Russell S (1999) Policy invariance under reward transformations Theory and application to reward shaping. Morgan Kaufmann Publishers Inc., Burlington"},{"key":"11209_CR16","unstructured":"Jaderberg M, Mnih V, Czarnecki WM, et al. (2016) Reinforcement Learning with unsupervised auxiliary tasks"},{"key":"11209_CR17","unstructured":"Li S, Wang R, Tang M, et al. (2019) Hierarchical reinforcement learning with advantage-based auxiliary rewards"},{"key":"11209_CR18","unstructured":"Kulkarni TD, Narasimhan KR, Saeedi A, et al. (2016) Hierarchical deep reinforcement learning. Integr Temp Abstract Intrinsic Motiv"},{"key":"11209_CR19","unstructured":"Parr R, Russell S (1998) Reinforcement Learning with Hierarchies of Machines. In: Conference on advances in neural information processing systems. MIT Press"},{"key":"11209_CR20","volume-title":"Encyclopedia of machine learning","author":"P Abbeel","year":"2011","unstructured":"Abbeel P, Ng AY (2011) Inverse reinforcement learning. In: Webb GI, Sammut C (eds) Encyclopedia of machine learning. Springer, Boston MA"},{"key":"11209_CR21","unstructured":"Mnih V, Kavukcuoglu K, Silver D, et al. (2013) Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602"},{"issue":"4","key":"11209_CR22","doi-asserted-by":"publisher","first-page":"5355","DOI":"10.1109\/LRA.2020.3005126","volume":"5","author":"Z Wu","year":"2020","unstructured":"Wu Z, Sun L, Zhan W et al (2020) Efficient sampling-based maximum entropy inverse reinforcement learning with application to autonomous driving. IEEE Robot Automation Lett 5(4):5355\u20135362","journal-title":"IEEE Robot Automation Lett"},{"key":"11209_CR23","doi-asserted-by":"crossref","unstructured":"Abbeel P, Ng AY (2004) Apprenticeshipship learning via inverse reinforcement learning. In: Proceedings of the twenty-first international conference on Machine learning. 1","DOI":"10.1145\/1015330.1015430"},{"key":"11209_CR24","doi-asserted-by":"crossref","unstructured":"Zha D, Lai K H, Cao Y, et al. (2019) Rlcard: A toolkit for reinforcement learning in card games. arXiv preprint arXiv:1910.04376","DOI":"10.24963\/ijcai.2020\/764"},{"issue":"5","key":"11209_CR25","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s11704-020-9307-6","volume":"15","author":"L Zhang","year":"2021","unstructured":"Zhang L, Chen Y, Wang W et al (2021) A monte carlo neural fictitious self-play approach to approximate Nash equilibrium in imperfect-information dynamic games. Front Comput Sci 15(5):1\u201314","journal-title":"Front Comput Sci"},{"key":"11209_CR26","doi-asserted-by":"crossref","unstructured":"Cho K, Merrienboer BV, Gulcehre C, et al. (2014) Learning phrase representations using RNN encoder-decoder for statistical machine translation. Comput Sci","DOI":"10.3115\/v1\/D14-1179"},{"key":"11209_CR27","doi-asserted-by":"publisher","unstructured":"Wang Z, Freitas ND, Lanctot M (2015) Dueling network architectures for deep reinforcement learning. JMLR. https:\/\/linproxy.fan.workers.dev:443\/https\/doi.org\/10.48550\/arXiv.1511.06581","DOI":"10.48550\/arXiv.1511.06581"},{"key":"11209_CR28","doi-asserted-by":"crossref","unstructured":"Zhang J, Li Y, Xiao W, et al. (2020) Non-iterative and fast deep learning: multilayer extreme learning machines. J Franklin Inst, 357(13)","DOI":"10.1016\/j.jfranklin.2020.04.033"},{"key":"11209_CR29","doi-asserted-by":"crossref","unstructured":"Zhang J, Li Y, Xiao W, et al. (2020) Robust extreme learning machine for modeling with unknown noise. J Franklin Inst, 357(14)","DOI":"10.1016\/j.jfranklin.2020.06.027"}],"container-title":["Neural Processing Letters"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/link.springer.com\/content\/pdf\/10.1007\/s11063-023-11209-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/link.springer.com\/article\/10.1007\/s11063-023-11209-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/link.springer.com\/content\/pdf\/10.1007\/s11063-023-11209-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,12,9]],"date-time":"2023-12-09T05:50:00Z","timestamp":1702101000000},"score":1,"resource":{"primary":{"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/link.springer.com\/10.1007\/s11063-023-11209-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,3,20]]},"references-count":29,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2023,12]]}},"alternative-id":["11209"],"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/doi.org\/10.1007\/s11063-023-11209-0","relation":{},"ISSN":["1370-4621","1573-773X"],"issn-type":[{"value":"1370-4621","type":"print"},{"value":"1573-773X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,3,20]]},"assertion":[{"value":"24 February 2023","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 March 2023","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no relevant financial or non-financial interests to disclose.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interests"}},{"value":"This is an observational study. The XYZ Research Ethics Committee has confirmed that no ethical approval is required.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics Approval"}},{"value":"Informed consent was obtained from all individual participants included in the study.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to Participate"}},{"value":"The authors affirm that human research participants provided informed consent for publication of the images in the paper.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for Publication"}}]}}