{"id":"https://openalex.org/W4402853567","doi":"https://doi.org/10.48550/arxiv.2407.15672","title":"Computer Audition: From Task-Specific Machine Learning to Foundation Models","display_name":"Computer Audition: From Task-Specific Machine Learning to Foundation Models","publication_year":2024,"publication_date":"2024-07-22","ids":{"openalex":"https://openalex.org/W4402853567","doi":"https://doi.org/10.48550/arxiv.2407.15672"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2407.15672","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.15672","pdf_url":"https://arxiv.org/pdf/2407.15672","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2407.15672","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5012240826","display_name":"Andreas Triantafyllopoulos","orcid":"https://orcid.org/0000-0001-8338-617X"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Triantafyllopoulos, Andreas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092639661","display_name":"Iosif Tsangko","orcid":"https://orcid.org/0009-0007-9614-5894"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tsangko, Iosif","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023952045","display_name":"Alexander Gebhard","orcid":"https://orcid.org/0000-0003-4169-5403"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gebhard, Alexander","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079981416","display_name":"Annamaria Mesaros","orcid":"https://orcid.org/0000-0002-6640-9752"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mesaros, Annamaria","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049691461","display_name":"Tuomas Virtanen","orcid":"https://orcid.org/0000-0002-4604-9729"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Virtanen, Tuomas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5043060302","display_name":"Bj\u00f6rn W. Schuller","orcid":"https://orcid.org/0000-0002-6478-8699"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Schuller, Bj\u00f6rn","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5012240826"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.6837000250816345,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.6837000250816345,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/foundation","display_name":"Foundation (evidence)","score":0.7041285634040833},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6553743481636047},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6165794134140015},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.45337554812431335},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4519384801387787},{"id":"https://openalex.org/keywords/multi-task-learning","display_name":"Multi-task learning","score":0.43703967332839966},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.42166879773139954},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3537111282348633},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.17817559838294983},{"id":"https://openalex.org/keywords/systems-engineering","display_name":"Systems engineering","score":0.07665395736694336},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.07415568828582764}],"concepts":[{"id":"https://openalex.org/C2780966255","wikidata":"https://www.wikidata.org/wiki/Q5474306","display_name":"Foundation (evidence)","level":2,"score":0.7041285634040833},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6553743481636047},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6165794134140015},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.45337554812431335},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4519384801387787},{"id":"https://openalex.org/C28006648","wikidata":"https://www.wikidata.org/wiki/Q6934509","display_name":"Multi-task learning","level":3,"score":0.43703967332839966},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.42166879773139954},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3537111282348633},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.17817559838294983},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.07665395736694336},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.07415568828582764},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2407.15672","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.15672","pdf_url":"https://arxiv.org/pdf/2407.15672","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2407.15672","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2407.15672","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2407.15672","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.15672","pdf_url":"https://arxiv.org/pdf/2407.15672","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5104279231","display_name":null,"funder_award_id":"Koselleck","funder_id":"https://openalex.org/F4320320879","funder_display_name":"Deutsche Forschungsgemeinschaft"},{"id":"https://openalex.org/G7189395530","display_name":null,"funder_award_id":"442218748","funder_id":"https://openalex.org/F4320320879","funder_display_name":"Deutsche Forschungsgemeinschaft"}],"funders":[{"id":"https://openalex.org/F4320320879","display_name":"Deutsche Forschungsgemeinschaft","ror":"https://ror.org/018mejw64"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4402853567.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2381393187","https://openalex.org/W2332779545","https://openalex.org/W2358060160","https://openalex.org/W2035483685","https://openalex.org/W1969764885","https://openalex.org/W596947562","https://openalex.org/W2793937822","https://openalex.org/W2790817834","https://openalex.org/W3196817267","https://openalex.org/W2951720331"],"abstract_inverted_index":{"Foundation":[0],"models":[1,88],"(FMs)":[2],"are":[3],"increasingly":[4],"spearheading":[5],"recent":[6],"advances":[7],"on":[8],"a":[9,45,78],"variety":[10],"of":[11,18,24,80,99],"tasks":[12,43,131],"that":[13,120,132],"fall":[14],"under":[15],"the":[16,22,38,48,57,71,92,116,133],"purview":[17],"computer":[19],"audition":[20],"--":[21],"use":[23],"machines":[25],"to":[26,40,50,77,83],"understand":[27],"sounds.":[28],"They":[29],"feature":[30],"several":[31],"advantages":[32],"over":[33],"traditional":[34,107],"pipelines:":[35],"among":[36],"others,":[37],"ability":[39],"consolidate":[41],"multiple":[42,130],"in":[44,70],"single":[46],"model,":[47],"option":[49],"leverage":[51],"knowledge":[52],"from":[53,106],"other":[54],"modalities,":[55],"and":[56,74,124],"readily-available":[58],"interaction":[59],"with":[60],"human":[61],"users.":[62],"Naturally,":[63],"these":[64],"promises":[65],"have":[66,75],"created":[67],"substantial":[68],"excitement":[69],"audio":[72,101,134],"community,":[73],"led":[76],"wave":[79],"early":[81],"attempts":[82],"build":[84],"new,":[85],"general-purpose":[86],"foundation":[87,111],"for":[89],"audio.":[90],"In":[91],"present":[93],"contribution,":[94],"we":[95],"give":[96],"an":[97],"overview":[98],"computational":[100],"analysis":[102],"as":[103],"it":[104],"transitions":[105],"pipelines":[108],"towards":[109],"auditory":[110],"models.":[112],"Our":[113],"work":[114],"highlights":[115],"key":[117],"operating":[118],"principles":[119],"underpin":[121],"those":[122],"models,":[123],"showcases":[125],"how":[126],"they":[127],"can":[128],"accommodate":[129],"community":[135],"previously":[136],"tackled":[137],"separately.":[138]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-21T08:13:44.787528","created_date":"2025-10-10T00:00:00"}
