{"id":"https://openalex.org/W4393161171","doi":"https://doi.org/10.1609/aaai.v38i17.29926","title":"Visual Hallucination Elevates Speech Recognition","display_name":"Visual Hallucination Elevates Speech Recognition","publication_year":2024,"publication_date":"2024-03-24","ids":{"openalex":"https://openalex.org/W4393161171","doi":"https://doi.org/10.1609/aaai.v38i17.29926"},"language":"en","primary_location":{"id":"doi:10.1609/aaai.v38i17.29926","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v38i17.29926","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/29926/31618","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/29926/31618","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100367221","display_name":"Fang Zhang","orcid":"https://orcid.org/0000-0001-8892-7182"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Fang Zhang","raw_affiliation_strings":["School of Computer Science and Technology, University of Science and Technology of China\nState Key Laboratory of Cognitive Intelligence"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, University of Science and Technology of China\nState Key Laboratory of Cognitive Intelligence","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100768238","display_name":"Yongxin Zhu","orcid":"https://orcid.org/0000-0002-4757-543X"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongxin Zhu","raw_affiliation_strings":["School of Computer Science and Technology, University of Science and Technology of China\nState Key Laboratory of Cognitive Intelligence"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, University of Science and Technology of China\nState Key Laboratory of Cognitive Intelligence","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101720266","display_name":"Xiangxiang Wang","orcid":"https://orcid.org/0000-0002-8031-9657"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangxiang Wang","raw_affiliation_strings":["Tencent YouTu Lab"],"affiliations":[{"raw_affiliation_string":"Tencent YouTu Lab","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102398889","display_name":"Huang Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huang Chen","raw_affiliation_strings":["Tencent YouTu Lab"],"affiliations":[{"raw_affiliation_string":"Tencent YouTu Lab","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101323125","display_name":"Xing Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xing Sun","raw_affiliation_strings":["Tencent YouTu Lab"],"affiliations":[{"raw_affiliation_string":"Tencent YouTu Lab","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009732907","display_name":"Linli Xu","orcid":"https://orcid.org/0000-0003-0227-3793"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Linli Xu","raw_affiliation_strings":["School of Computer Science and Technology, University of Science and Technology of China\nState Key Laboratory of Cognitive Intelligence"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, University of Science and Technology of China\nState Key Laboratory of Cognitive Intelligence","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100367221"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":0.6254,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.7,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":"38","issue":"17","first_page":"19542","last_page":"19550"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13397","display_name":"Hallucinations in medical conditions","score":0.9815000295639038,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T13397","display_name":"Hallucinations in medical conditions","score":0.9815000295639038,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/visual-hallucination","display_name":"Visual Hallucination","score":0.6456983089447021},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.5309716463088989},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.43174561858177185},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.39150702953338623},{"id":"https://openalex.org/keywords/audiology","display_name":"Audiology","score":0.36313748359680176},{"id":"https://openalex.org/keywords/communication","display_name":"Communication","score":0.3539718985557556},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.2863917946815491},{"id":"https://openalex.org/keywords/medicine","display_name":"Medicine","score":0.14587059617042542}],"concepts":[{"id":"https://openalex.org/C2908998935","wikidata":"https://www.wikidata.org/wiki/Q130741","display_name":"Visual Hallucination","level":2,"score":0.6456983089447021},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.5309716463088989},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.43174561858177185},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.39150702953338623},{"id":"https://openalex.org/C548259974","wikidata":"https://www.wikidata.org/wiki/Q569965","display_name":"Audiology","level":1,"score":0.36313748359680176},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.3539718985557556},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.2863917946815491},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.14587059617042542},{"id":"https://openalex.org/C118552586","wikidata":"https://www.wikidata.org/wiki/Q7867","display_name":"Psychiatry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v38i17.29926","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v38i17.29926","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/29926/31618","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v38i17.29926","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v38i17.29926","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/29926/31618","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1121271761","display_name":null,"funder_award_id":"Program","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2428307698","display_name":null,"funder_award_id":"2022YFB3103100","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G3085993365","display_name":null,"funder_award_id":"(Grant No.","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G391238517","display_name":null,"funder_award_id":", and","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G544562989","display_name":null,"funder_award_id":"62276245","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6481167376","display_name":null,"funder_award_id":"2008085J31","funder_id":"https://openalex.org/F4320334897","funder_display_name":"Natural Science Foundation of Anhui Province"},{"id":"https://openalex.org/G7726157001","display_name":null,"funder_award_id":"Grant No.","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320334897","display_name":"Natural Science Foundation of Anhui Province","ror":null},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4393161171.pdf"},"referenced_works_count":26,"referenced_works":["https://openalex.org/W2096391593","https://openalex.org/W2559260703","https://openalex.org/W2892611740","https://openalex.org/W2901907199","https://openalex.org/W2973048981","https://openalex.org/W2987346570","https://openalex.org/W2997540481","https://openalex.org/W3003133812","https://openalex.org/W3010333154","https://openalex.org/W3081492798","https://openalex.org/W3090765191","https://openalex.org/W3114960734","https://openalex.org/W3175825020","https://openalex.org/W4221153068","https://openalex.org/W4221153521","https://openalex.org/W4313598304","https://openalex.org/W4327671461","https://openalex.org/W6679436768","https://openalex.org/W6682243771","https://openalex.org/W6749134952","https://openalex.org/W6754392867","https://openalex.org/W6758698360","https://openalex.org/W6777389884","https://openalex.org/W6790727354","https://openalex.org/W6898505805","https://openalex.org/W7027429494"],"related_works":["https://openalex.org/W2419430421","https://openalex.org/W2334811251","https://openalex.org/W2386464051","https://openalex.org/W32883749","https://openalex.org/W1972094787","https://openalex.org/W2308791691","https://openalex.org/W1971933602","https://openalex.org/W39816624","https://openalex.org/W2002427771","https://openalex.org/W2949208013"],"abstract_inverted_index":{"Due":[0],"to":[1,61,119,185,199,208,256],"the":[2,8,35,44,51,62,65,95,115,121,125,133,144,148,160,164,170,187,219,257,271],"detrimental":[3],"impact":[4],"of":[5,39,46,64,107,223],"noise":[6,209],"on":[7,230,253],"conventional":[9],"audio":[10,24,96,127,145,189],"speech":[11,16],"recognition":[12],"(ASR)":[13],"task,":[14],"audio-visual":[15],"recognition~(AVSR)":[17],"has":[18],"been":[19],"proposed":[20],"by":[21],"incorporating":[22],"both":[23],"and":[25,97,190,210,221],"visual":[26,37,67,98,103,122,165,175,191,212],"video":[27,277],"signals.":[28],"Although":[29],"existing":[30],"methods":[31],"have":[32],"demonstrated":[33],"that":[34,177,204,238],"aligned":[36],"input":[38],"lip":[40],"movements":[41],"can":[42],"enhance":[43],"robustness":[45,222],"AVSR":[47],"systems":[48],"against":[49],"noise,":[50],"paired":[52],"videos":[53,109],"are":[54,178,205],"not":[55],"always":[56],"available":[57,233],"during":[58,100,110],"inference,":[59],"leading":[60],"problem":[63],"missing":[66],"modality,":[68],"which":[69,90,152],"restricts":[70],"their":[71],"practicality":[72],"in":[73,105,147,169,248],"real-world":[74],"scenarios.":[75],"To":[76,112,136,217],"tackle":[77,137],"this":[78,138],"problem,":[79],"we":[80,140,182,226],"propose":[81],"a":[82,242],"Discrete":[83],"Feature":[84],"based":[85],"Visual":[86],"Generative":[87],"Model":[88],"(DFVGM)":[89],"exploits":[91],"semantic":[92,130,155,202],"correspondences":[93,131],"between":[94],"modalities":[99],"training,":[101],"generating":[102],"hallucinations":[104,213],"lieu":[106],"real":[108],"inference.":[111],"achieve":[113],"that,":[114],"primary":[116],"challenge":[117],"is":[118,167],"generate":[120,211],"hallucination":[123],"given":[124],"noisy":[126],"while":[128,263],"preserving":[129],"with":[132,142,159,214],"clean":[134],"speech.":[135],"challenge,":[139],"start":[141],"training":[143],"encoder":[146,166],"Audio-Only":[149,260],"(AO)":[150,261],"setting,":[151,173],"generates":[153],"continuous":[154,188],"features":[156,176],"closely":[157],"associated":[158],"linguistic":[161],"information.":[162],"Simultaneously,":[163],"trained":[168],"Visual-Only":[171],"(VO)":[172],"producing":[174],"phonetically":[179],"related.":[180],"Next,":[181],"employ":[183],"K-means":[184],"discretize":[186],"feature":[192],"spaces.":[193],"The":[194,235],"discretization":[195],"step":[196],"allows":[197],"DFVGM":[198],"capture":[200],"high-level":[201],"structures":[203],"more":[206],"resilient":[207],"high":[215],"quality.":[216],"evaluate":[218],"effectiveness":[220],"our":[224,239],"approach,":[225],"conduct":[227],"extensive":[228],"experiments":[229],"two":[231],"publicly":[232],"datasets.":[234],"results":[236,266],"demonstrate":[237],"method":[240],"achieves":[241],"remarkable":[243],"53%":[244],"relative":[245],"reduction":[246],"(30.5%-&gt;12.9%)":[247],"Word":[249],"Error":[250],"Rate":[251],"(WER)":[252],"average":[254],"compared":[255],"current":[258],"state-of-the-art":[259],"baselines":[262],"maintaining":[264],"comparable":[265],"(&lt;":[267],"5%":[268],"difference)":[269],"under":[270],"Audio-Visual":[272],"(AV)":[273],"setting":[274],"even":[275],"without":[276],"as":[278],"input.":[279]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-18T14:38:29.013473","created_date":"2025-10-10T00:00:00"}
