{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T04:47:39Z","timestamp":1773377259156,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":54,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,3,13]],"date-time":"2023-03-13T00:00:00Z","timestamp":1678665600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000006","name":"Office of Naval Research","doi-asserted-by":"publisher","award":["N00014-18-1-2776"],"award-info":[{"award-number":["N00014-18-1-2776"]}],"id":[{"id":"10.13039\/100000006","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2033413, 1955653, 1928448, 1936970, 1813651, 1928448, 2106690, and IIS-2106690"],"award-info":[{"award-number":["2033413, 1955653, 1928448, 1936970, 1813651, 1928448, 2106690, and IIS-2106690"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,3,13]]},"DOI":"10.1145\/3568162.3576983","type":"proceedings-article","created":{"date-parts":[[2023,3,9]],"date-time":"2023-03-09T18:08:48Z","timestamp":1678385328000},"page":"525-533","update-policy":"https:\/\/linproxy.fan.workers.dev:443\/https\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":12,"title":["Interactive Policy Shaping for Human-Robot Collaboration with Transparent Matrix Overlays"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/linproxy.fan.workers.dev:443\/https\/orcid.org\/0000-0002-6315-2293","authenticated-orcid":false,"given":"Jake","family":"Brawer","sequence":"first","affiliation":[{"name":"Yale University, New Haven, CT, USA"}]},{"ORCID":"https:\/\/linproxy.fan.workers.dev:443\/https\/orcid.org\/0000-0001-8317-1865","authenticated-orcid":false,"given":"Debasmita","family":"Ghose","sequence":"additional","affiliation":[{"name":"Yale University, New Haven, CT, USA"}]},{"ORCID":"https:\/\/linproxy.fan.workers.dev:443\/https\/orcid.org\/0000-0002-0152-053X","authenticated-orcid":false,"given":"Kate","family":"Candon","sequence":"additional","affiliation":[{"name":"Yale University, New Haven, CT, USA"}]},{"ORCID":"https:\/\/linproxy.fan.workers.dev:443\/https\/orcid.org\/0000-0002-0983-2554","authenticated-orcid":false,"given":"Meiying","family":"Qin","sequence":"additional","affiliation":[{"name":"Yale University, New Haven, CT, USA"}]},{"ORCID":"https:\/\/linproxy.fan.workers.dev:443\/https\/orcid.org\/0000-0001-7385-1875","authenticated-orcid":false,"given":"Alessandro","family":"Roncone","sequence":"additional","affiliation":[{"name":"University of Colorado, Boulder, CO, USA"}]},{"ORCID":"https:\/\/linproxy.fan.workers.dev:443\/https\/orcid.org\/0000-0003-0698-5472","authenticated-orcid":false,"given":"Marynel","family":"V\u00e1zquez","sequence":"additional","affiliation":[{"name":"Yale University, New Haven, CT, USA"}]},{"ORCID":"https:\/\/linproxy.fan.workers.dev:443\/https\/orcid.org\/0000-0002-7671-7759","authenticated-orcid":false,"given":"Brian","family":"Scassellati","sequence":"additional","affiliation":[{"name":"Yale University, New Haven, CT, USA"}]}],"member":"320","published-online":{"date-parts":[[2023,3,13]]},"reference":[{"key":"e_1_3_2_2_1_1","first-page":"22","volume-title":"PMLR.","author":"Joshua","year":"2017","unstructured":"Joshua Achiam et al. \"Constrained policy optimization\". In: International con-ference on machine learning. PMLR. 2017, pp. 22--31."},{"key":"e_1_3_2_2_2_1","first-page":"1745","volume-title":"Conference on Robot Learning. PMLR.","author":"Agrawal Pulkit","year":"2022","unstructured":"Pulkit Agrawal. \"The Task Specification Problem\". In: Conference on Robot Learning. PMLR. 2022, pp. 1745--1751."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33486-3_8"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-23780-5_11"},{"key":"e_1_3_2_2_5_1","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence.","volume":"32","author":"Mohammed","year":"2018","unstructured":"Mohammed Alshiekh et al. \"Safe reinforcement learning via shielding\". In: Proceedings of the AAAI Conference on Artificial Intelligence. Vol. 32. 1. 2018."},{"key":"e_1_3_2_2_6_1","first-page":"519","volume-title":"Conference on robot learning. PMLR.","author":"Biyik Erdem","year":"2018","unstructured":"Erdem Biyik and Dorsa Sadigh. \"Batch active preference-based learning of reward functions\". In: Conference on robot learning. PMLR. 2018, pp. 519--528."},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1587\/transinf.2019EDP7170"},{"key":"e_1_3_2_2_8_1","first-page":"827","volume-title":"2018 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS). IEEE.","author":"Jake","year":"2018","unstructured":"Jake Brawer et al. \"Situated human--robot collaboration: predicting intent from grounded natural language\". In: 2018 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS). IEEE. 2018, pp. 827--833."},{"key":"e_1_3_2_2_9_1","volume-title":"Reshaping Robot Trajectories Using Natural Language Commands: A Study of Multi-Modal Data Alignment Using Transformers\". In: arXiv preprint arXiv:2203.13411","author":"Arthur Bucker","year":"2022","unstructured":"Arthur Bucker et al. \"Reshaping Robot Trajectories Using Natural Language Commands: A Study of Multi-Modal Data Alignment Using Transformers\". In: arXiv preprint arXiv:2203.13411 (2022)."},{"key":"e_1_3_2_2_10_1","volume-title":"Advances in neural information processing systems 32","author":"Micah Carroll","year":"2019","unstructured":"Micah Carroll et al. \"On the utility of learning about humans for human-ai coordination\". In: Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_2_2_11_1","volume-title":"Advances in neural information processing systems 30","author":"Christiano Paul F","year":"2017","unstructured":"Paul F Christiano et al. \"Deep reinforcement learning from human preferences\". In: Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_2_12_1","volume-title":"The empathic framework for task learning from implicit human feedback\". In: arXiv preprint arXiv:2009.13649","author":"Yuchen Cui","year":"2020","unstructured":"Yuchen Cui et al. \"The empathic framework for task learning from implicit human feedback\". In: arXiv preprint arXiv:2009.13649 (2020)."},{"key":"e_1_3_2_2_13_1","first-page":"6075","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence.","volume":"35","author":"Tyler","year":"2021","unstructured":"Tyler Frasca et al. \"Enabling fast instruction-based modification of learned robot skills\". In: Proceedings of the AAAI Conference on Artificial Intelligence. Vol. 35. 7. 2021, pp. 6075--6083."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.5555\/2789272.2886795"},{"key":"e_1_3_2_2_15_1","volume-title":"Shielding Atari games with bounded prescience\". In: arXiv preprint arXiv:2101.08153","author":"Mirco Giacobbe","year":"2021","unstructured":"Mirco Giacobbe et al. \"Shielding Atari games with bounded prescience\". In: arXiv preprint arXiv:2101.08153 (2021)."},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487760"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/2909824.3020233"},{"key":"e_1_3_2_2_18_1","volume-title":"arXiv preprint arXiv:2212.03363","author":"Hejna Joey","year":"2022","unstructured":"Joey Hejna and Dorsa Sadigh. \"Few-Shot Preference Learning for Human-in-the-Loop RL\". In: arXiv preprint arXiv:2212.03363 (2022)."},{"key":"e_1_3_2_2_19_1","volume-title":"Inner Monologue: Embodied Reasoning through Planning with Language Models\". In: arXiv preprint arXiv:2207.05608","author":"Wenlong Huang","year":"2022","unstructured":"Wenlong Huang et al. \"Inner Monologue: Embodied Reasoning through Planning with Language Models\". In: arXiv preprint arXiv:2207.05608 (2022)."},{"key":"e_1_3_2_2_20_1","volume-title":"Language models as zero-shot planners: Extracting actionable knowledge for embodied agents\". In: arXiv preprint arXiv:2201.07207","author":"Wenlong Huang","year":"2022","unstructured":"Wenlong Huang et al. \"Language models as zero-shot planners: Extracting actionable knowledge for embodied agents\". In: arXiv preprint arXiv:2201.07207 (2022)."},{"key":"e_1_3_2_2_21_1","volume-title":"Advances in neural information processing systems 31","author":"Borja Ibarz","year":"2018","unstructured":"Borja Ibarz et al. \"Reward learning from human preferences and demonstrations in atari\". In: Advances in neural information processing systems 31 (2018)."},{"key":"e_1_3_2_2_22_1","volume-title":"Safe reinforcement learning via probabilistic shields\". In: arXiv preprint arXiv:1807.06096","author":"Nils Jansen","year":"2018","unstructured":"Nils Jansen et al. \"Safe reinforcement learning via probabilistic shields\". In: arXiv preprint arXiv:1807.06096 (2018)."},{"key":"e_1_3_2_2_23_1","volume-title":"Shielded decision-making in MDPs\". In: arXiv preprint arXiv:1807.06096","author":"Nils Jansen","year":"2018","unstructured":"Nils Jansen et al. \"Shielded decision-making in MDPs\". In: arXiv preprint arXiv:1807.06096 (2018)."},{"key":"e_1_3_2_2_24_1","volume-title":"Housekeep: Tidying Virtual Households using Commonsense Reasoning\". In: arXiv preprint arXiv:2205.10712","author":"Yash Kant","year":"2022","unstructured":"Yash Kant et al. \"Housekeep: Tidying Virtual Households using Commonsense Reasoning\". In: arXiv preprint arXiv:2205.10712 (2022)."},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1613\/jair.5575"},{"key":"e_1_3_2_2_26_1","first-page":"231","volume-title":"NASA Formal Methods Symposium. Springer.","author":"Bettina","year":"2021","unstructured":"Bettina K\u00f6nighofer et al. \"Online shielding for stochastic systems\". In: NASA Formal Methods Symposium. Springer. 2021, pp. 231--248."},{"key":"e_1_3_2_2_27_1","volume-title":"Feedback-efficient interactive reinforcement learning via relabeling experience and unsupervised pre-training\". In: arXiv preprint arXiv:2106.05091","author":"Lee Kimin","year":"2021","unstructured":"Kimin Lee, Laura Smith, and Pieter Abbeel. \"Pebble: Feedback-efficient interactive reinforcement learning via relabeling experience and unsupervised pre-training\". In: arXiv preprint arXiv:2106.05091 (2021)."},{"key":"e_1_3_2_2_28_1","volume-title":"B-pref: Benchmarking preference-based reinforcement learning\". In: arXiv preprint arXiv:2111.03026","author":"Kimin Lee","year":"2021","unstructured":"Kimin Lee et al. \"B-pref: Benchmarking preference-based reinforcement learning\". In: arXiv preprint arXiv:2111.03026 (2021)."},{"key":"e_1_3_2_2_29_1","volume-title":"Problems and solutions\". In: arXiv preprint arXiv:2201.08299","author":"Liu Minghuan","year":"2022","unstructured":"Minghuan Liu, Menghui Zhu, and Weinan Zhang. \"Goal-conditioned reinforcement learning: Problems and solutions\". In: arXiv preprint arXiv:2201.08299 (2022)."},{"key":"e_1_3_2_2_30_1","first-page":"30","volume-title":"Autonomous agents and multi-agent systems 30.1","author":"Robert Loftin","year":"2016","unstructured":"Robert Loftin et al. \"Learning behaviors via human-delivered discrete feedback: modeling implicit feedback strategies to speed up learning\". In: Autonomous agents and multi-agent systems 30.1 (2016), pp. 30--59."},{"key":"e_1_3_2_2_31_1","volume-title":"Frontiers in Robotics and AI","author":"Mangin Olivier","year":"2022","unstructured":"Olivier Mangin, Alessandro Roncone, and Brian Scassellati. \"How to be Helpful? Supportive Behaviors and Personalization for Human-Robot Collaboration\". In: Frontiers in Robotics and AI (2022), p. 426."},{"key":"e_1_3_2_2_32_1","first-page":"243","volume-title":"Proceedings of the International Conference on Automated Planning and Scheduling.","volume":"31","author":"Mazzi Giulio","year":"2021","unstructured":"Giulio Mazzi, Alberto Castellini, and Alessandro Farinelli. \"Rule-based Shielding for Partially Observable Monte-Carlo Planning\". In: Proceedings of the International Conference on Automated Planning and Scheduling. Vol. 31. 2021, pp. 243--251."},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.3389\/frobt.2021.584075"},{"key":"e_1_3_2_2_34_1","volume-title":"arXiv preprint arXiv:2003.10386","author":"Payani Ali","year":"2020","unstructured":"Ali Payani and Faramarz Fekri. \"Incorporating relational background knowledge into reinforcement learning via differentiable inductive logic programming\". In: arXiv preprint arXiv:2003.10386 (2020)."},{"key":"e_1_3_2_2_35_1","volume-title":"Guiding safe reinforcement learning policies using structured language constraints","author":"Bharat Prakash","year":"2020","unstructured":"Bharat Prakash et al. \"Guiding safe reinforcement learning policies using structured language constraints\". In: UMBC Student Collection (2020)."},{"key":"e_1_3_2_2_36_1","volume-title":"Active preference-based learning of reward functions","author":"Dorsa Sadigh","year":"2017","unstructured":"Dorsa Sadigh et al. Active preference-based learning of reward functions. 2017."},{"key":"e_1_3_2_2_37_1","volume-title":"LM-Nav: Robotic Navigation with Large Pre-Trained Models of Language, Vision, and Action\". In: arXiv preprint arXiv:2207.04429","author":"Dhruv Shah","year":"2022","unstructured":"Dhruv Shah et al. \"LM-Nav: Robotic Navigation with Large Pre-Trained Models of Language, Vision, and Action\". In: arXiv preprint arXiv:2207.04429 (2022)."},{"key":"e_1_3_2_2_38_1","first-page":"1365","volume-title":"Proceedings of the 29th ACM In-ternational Conference on Information & Knowledge Management.","author":"Shaoyun","year":"2020","unstructured":"Shaoyun Shi et al. \"Neural logic reasoning\". In: Proceedings of the 29th ACM In-ternational Conference on Information & Knowledge Management. 2020, pp. 1365--1374."},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4471-4640-7_1"},{"key":"e_1_3_2_2_40_1","first-page":"477","volume-title":"Conference on Robot Learning. PMLR.","author":"Sanjana","year":"2022","unstructured":"Sanjana Srivastava et al. \"Behavior: Benchmark for everyday household activities in virtual, interactive, and ecological environments\". In: Conference on Robot Learning. PMLR. 2022, pp. 477--490."},{"key":"e_1_3_2_2_41_1","first-page":"1486","volume-title":"Conference on Robot Learning. PMLR.","author":"Elias","year":"2022","unstructured":"Elias Stengel-Eskin et al. \"Guiding Multi-Step Rearrangement Tasks with Natural Language Instructions\". In: Conference on Robot Learning. PMLR. 2022, pp. 1486--1501."},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2015.7139553"},{"key":"e_1_3_2_2_43_1","first-page":"5045","volume-title":"International Conference on Machine Learning. PMLR.","author":"Abhinav","year":"2018","unstructured":"Abhinav Verma et al. \"Programmatically interpretable reinforcement learning\". In: International Conference on Machine Learning. PMLR. 2018, pp. 5045--5054."},{"key":"e_1_3_2_2_44_1","first-page":"9797","volume-title":"International Conference on Machine Learning. PMLR.","author":"Wachi Akifumi","year":"2020","unstructured":"Akifumi Wachi and Yanan Sui. \"Safe reinforcement learning in constrained Markov decision processes\". In: International Conference on Machine Learning. PMLR. 2020, pp. 9797--9806."},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1080\/00140139"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"e_1_3_2_2_47_1","first-page":"1","volume-title":"Proceedings of the 17th ACM\/IEEE International Conference on Human-Robot Interaction.","author":"Sanne","year":"2022","unstructured":"Sanne van Waveren et al. \"Correct Me If I'm Wrong: Using Non-Experts to Repair Reinforcement Learning Policies\". In: Proceedings of the 17th ACM\/IEEE International Conference on Human-Robot Interaction. 2022, pp. 1--9."},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.1017\/S1471068411000494"},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"publisher","DOI":"10.1177\/0278364920910802"},{"key":"e_1_3_2_2_50_1","volume-title":"Advances in neural information processing systems 25","author":"Wilson Aaron","year":"2012","unstructured":"Aaron Wilson, Alan Fern, and Prasad Tadepalli. \"A bayesian approach for policy learning from trajectory preference queries\". In: Advances in neural information processing systems 25 (2012)."},{"issue":"136","key":"e_1_3_2_2_51_1","first-page":"1","article-title":"A survey of preference-based reinforcement learning methods","volume":"18","author":"Christian Wirth","year":"2017","unstructured":"Christian Wirth et al. \"A survey of preference-based reinforcement learning methods\". In: Journal of Machine Learning Research 18.136 (2017), pp. 1--46.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_2_52_1","first-page":"332","volume-title":"Conference on Robot Learning. PMLR.","author":"Yunkun","year":"2022","unstructured":"Yunkun Xu et al. \"Look Before You Leap: Safe Model-Based Reinforcement Learning with Human Intervention\". In: Conference on Robot Learning. PMLR. 2022, pp. 332--341."},{"key":"e_1_3_2_2_53_1","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence.","volume":"32","author":"Wei-Nan","year":"2018","unstructured":"Wei-Nan Zhang et al. \"Exploring implicit feedback for open domain conversation generation\". In: Proceedings of the AAAI Conference on Artificial Intelligence. Vol. 32. 1. 2018."},{"key":"e_1_3_2_2_54_1","unstructured":"Menghui Zhu et al. \"Mapgo: Model-assisted policy optimization for goal-oriented tasks\". In: arXiv preprint arXiv:2105.06350 (2021"}],"event":{"name":"HRI '23: ACM\/IEEE International Conference on Human-Robot Interaction","location":"Stockholm Sweden","acronym":"HRI '23","sponsor":["SIGAI ACM Special Interest Group on Artificial Intelligence","SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 2023 ACM\/IEEE International Conference on Human-Robot Interaction"],"original-title":[],"link":[{"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/dl.acm.org\/doi\/10.1145\/3568162.3576983","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/dl.acm.org\/doi\/pdf\/10.1145\/3568162.3576983","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/dl.acm.org\/doi\/pdf\/10.1145\/3568162.3576983","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T21:26:16Z","timestamp":1750281976000},"score":1,"resource":{"primary":{"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/dl.acm.org\/doi\/10.1145\/3568162.3576983"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,3,13]]},"references-count":54,"alternative-id":["10.1145\/3568162.3576983","10.1145\/3568162"],"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/doi.org\/10.1145\/3568162.3576983","relation":{},"subject":[],"published":{"date-parts":[[2023,3,13]]},"assertion":[{"value":"2023-03-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}