{"id":"https://openalex.org/W4282038053","doi":"https://doi.org/10.1177/02783649221083331","title":"Hybrid control for combining model-based and model-free reinforcement learning","display_name":"Hybrid control for combining model-based and model-free reinforcement learning","publication_year":2022,"publication_date":"2022-06-02","ids":{"openalex":"https://openalex.org/W4282038053","doi":"https://doi.org/10.1177/02783649221083331"},"language":"en","primary_location":{"id":"doi:10.1177/02783649221083331","is_oa":true,"landing_page_url":"https://doi.org/10.1177/02783649221083331","pdf_url":"https://journals.sagepub.com/doi/pdf/10.1177/02783649221083331","source":{"id":"https://openalex.org/S73484101","display_name":"The International Journal of Robotics Research","issn_l":"0278-3649","issn":["0278-3649","1741-3176"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The International Journal of Robotics Research","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://journals.sagepub.com/doi/pdf/10.1177/02783649221083331","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013330580","display_name":"Allison Pinosky","orcid":"https://orcid.org/0000-0002-3095-8856"},"institutions":[{"id":"https://openalex.org/I111979921","display_name":"Northwestern University","ror":"https://ror.org/000e0be47","country_code":"US","type":"education","lineage":["https://openalex.org/I111979921"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Allison Pinosky","raw_affiliation_strings":["Department of Mechanical Engineering, Northwestern University, Evanston, IL, USA"],"affiliations":[{"raw_affiliation_string":"Department of Mechanical Engineering, Northwestern University, Evanston, IL, USA","institution_ids":["https://openalex.org/I111979921"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062827877","display_name":"Ian Abraham","orcid":"https://orcid.org/0000-0003-0299-1760"},"institutions":[{"id":"https://openalex.org/I32971472","display_name":"Yale University","ror":"https://ror.org/03v76x132","country_code":"US","type":"education","lineage":["https://openalex.org/I32971472"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ian Abraham","raw_affiliation_strings":["Department of Mechanical Engineering and Materials Science at Yale University, New Haven, CT, USA"],"affiliations":[{"raw_affiliation_string":"Department of Mechanical Engineering and Materials Science at Yale University, New Haven, CT, USA","institution_ids":["https://openalex.org/I32971472"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053581855","display_name":"Alexander Broad","orcid":"https://orcid.org/0000-0001-9230-7891"},"institutions":[{"id":"https://openalex.org/I4210143335","display_name":"Boston Dynamics (United States)","ror":"https://ror.org/0488ezv32","country_code":"US","type":"company","lineage":["https://openalex.org/I4210143335"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alexander Broad","raw_affiliation_strings":["Boston Dynamics, Waltham, MA, USA"],"affiliations":[{"raw_affiliation_string":"Boston Dynamics, Waltham, MA, USA","institution_ids":["https://openalex.org/I4210143335"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051354595","display_name":"Brenna Argall","orcid":"https://orcid.org/0000-0002-4280-8492"},"institutions":[{"id":"https://openalex.org/I111979921","display_name":"Northwestern University","ror":"https://ror.org/000e0be47","country_code":"US","type":"education","lineage":["https://openalex.org/I111979921"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Brenna Argall","raw_affiliation_strings":["Department of Electrical Engineering and Computer Science, Northwestern University, Evanston, IL, USA","Department of Mechanical Engineering, Northwestern University, Evanston, IL, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Science, Northwestern University, Evanston, IL, USA","institution_ids":["https://openalex.org/I111979921"]},{"raw_affiliation_string":"Department of Mechanical Engineering, Northwestern University, Evanston, IL, USA","institution_ids":["https://openalex.org/I111979921"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5067725461","display_name":"Todd D. Murphey","orcid":"https://orcid.org/0000-0003-2262-8176"},"institutions":[{"id":"https://openalex.org/I111979921","display_name":"Northwestern University","ror":"https://ror.org/000e0be47","country_code":"US","type":"education","lineage":["https://openalex.org/I111979921"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Todd D Murphey","raw_affiliation_strings":["Department of Mechanical Engineering, Northwestern University, Evanston, IL, USA"],"affiliations":[{"raw_affiliation_string":"Department of Mechanical Engineering, Northwestern University, Evanston, IL, USA","institution_ids":["https://openalex.org/I111979921"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5013330580"],"corresponding_institution_ids":["https://openalex.org/I111979921"],"apc_list":null,"apc_paid":null,"fwci":3.8864,"has_fulltext":false,"cited_by_count":30,"citation_normalized_percentile":{"value":0.94184203,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"42","issue":"6","first_page":"337","last_page":"355"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10675","display_name":"Mechanical Circulatory Support Devices","score":0.9789000153541565,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8023145794868469},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7391717433929443},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6903301477432251},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6508195400238037},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.632340669631958},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5912899971008301},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.5475751161575317},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.41797778010368347},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.14317432045936584}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8023145794868469},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7391717433929443},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6903301477432251},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6508195400238037},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.632340669631958},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5912899971008301},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.5475751161575317},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.41797778010368347},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.14317432045936584},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1177/02783649221083331","is_oa":true,"landing_page_url":"https://doi.org/10.1177/02783649221083331","pdf_url":"https://journals.sagepub.com/doi/pdf/10.1177/02783649221083331","source":{"id":"https://openalex.org/S73484101","display_name":"The International Journal of Robotics Research","issn_l":"0278-3649","issn":["0278-3649","1741-3176"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The International Journal of Robotics Research","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1177/02783649221083331","is_oa":true,"landing_page_url":"https://doi.org/10.1177/02783649221083331","pdf_url":"https://journals.sagepub.com/doi/pdf/10.1177/02783649221083331","source":{"id":"https://openalex.org/S73484101","display_name":"The International Journal of Robotics Research","issn_l":"0278-3649","issn":["0278-3649","1741-3176"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The International Journal of Robotics Research","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2609295724","display_name":null,"funder_award_id":"CNS 1837515","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4566812657","display_name":null,"funder_award_id":"N00014-21-1-2706","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320337345","display_name":"Office of Naval Research","ror":"https://ror.org/00rk2pe57"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4282038053.pdf","grobid_xml":"https://content.openalex.org/works/W4282038053.grobid-xml"},"referenced_works_count":24,"referenced_works":["https://openalex.org/W1578969866","https://openalex.org/W1966514629","https://openalex.org/W1981276685","https://openalex.org/W1986014385","https://openalex.org/W2073464235","https://openalex.org/W2158782408","https://openalex.org/W2410617946","https://openalex.org/W2509969228","https://openalex.org/W2529601334","https://openalex.org/W2559655401","https://openalex.org/W2735197468","https://openalex.org/W2738778707","https://openalex.org/W2807050419","https://openalex.org/W2909966514","https://openalex.org/W2962872206","https://openalex.org/W2963523627","https://openalex.org/W2963895143","https://openalex.org/W3006009338","https://openalex.org/W3101360817","https://openalex.org/W3102865626","https://openalex.org/W3106296277","https://openalex.org/W6600025664","https://openalex.org/W6814003322","https://openalex.org/W6815608828"],"related_works":["https://openalex.org/W4296474751","https://openalex.org/W1485630101","https://openalex.org/W3153007185","https://openalex.org/W2498017833","https://openalex.org/W4206669594","https://openalex.org/W2961085424","https://openalex.org/W3037422413","https://openalex.org/W2959276766","https://openalex.org/W4319083788","https://openalex.org/W2983785000"],"abstract_inverted_index":{"We":[0,45,95,109,152],"develop":[1],"an":[2,25],"approach":[3,49,62,79,155],"to":[4,47,86,113],"improve":[5],"the":[6,28,31,69,121,125,171,189],"learning":[7,56,93,102,107,160,195],"capabilities":[8],"of":[9,27,50,71,100,120,138,187,194,201],"robotic":[10],"systems":[11],"by":[12,103],"combining":[13,52],"learned":[14],"predictive":[15,72],"models":[16,23,73],"with":[17,158,174],"experience-based":[18,34,75],"state-action":[19],"policy":[20,36],"mappings.":[21],"Predictive":[22],"provide":[24],"understanding":[26],"task":[29],"and":[30,54,67,74,84,130,168,191],"dynamics,":[32],"while":[33],"(model-free)":[35],"mappings":[37],"encode":[38],"favorable":[39],"actions":[40],"that":[41,117,182],"override":[42],"planned":[43],"actions.":[44],"refer":[46],"our":[48,78,111,154,183],"systematically":[51],"model-based":[53,83],"model-free":[55],"methods":[57],"as":[58,145,147],"hybrid":[59,101],"learning.":[60],"Our":[61,128],"efficiently":[63],"learns":[64],"motor":[65,196],"skills":[66,197],"improves":[68],"performance":[70,190],"policies.":[76],"Moreover,":[77],"enables":[80],"policies":[81],"(both":[82],"model-free)":[85],"be":[87],"updated":[88],"using":[89],"any":[90],"off-policy":[91],"reinforcement":[92],"method.":[94],"derive":[96],"a":[97,114,136,148,175,199],"deterministic":[98,129],"method":[99,112,184],"optimally":[104],"switching":[105],"between":[106],"modalities.":[108],"adapt":[110],"stochastic":[115,131],"variation":[116],"relaxes":[118],"some":[119],"key":[122],"assumptions":[123],"in":[124,143,198],"original":[126],"derivation.":[127],"variations":[132],"are":[133],"tested":[134],"on":[135],"variety":[137,200],"robot":[139],"control":[140],"benchmark":[141],"tasks":[142],"simulation":[144],"well":[146],"hardware":[149],"manipulation":[150],"task.":[151,178],"extend":[153],"for":[156],"use":[157],"imitation":[159],"methods,":[161],"where":[162],"experience":[163],"is":[164,185],"provided":[165],"through":[166],"demonstrations,":[167],"we":[169],"test":[170],"expanded":[172],"capability":[173],"real-world":[176],"pick-and-place":[177],"The":[179],"results":[180],"show":[181],"capable":[186],"improving":[188],"sample":[192],"efficiency":[193],"experimental":[202],"domains.":[203]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":12},{"year":2024,"cited_by_count":9},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":2}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
