{"id":"https://openalex.org/W4408894716","doi":"https://doi.org/10.1145/3676641.3715986","title":"CoServe: Efficient Collaboration-of-Experts (CoE) Model Inference with Limited Memory","display_name":"CoServe: Efficient Collaboration-of-Experts (CoE) Model Inference with Limited Memory","publication_year":2025,"publication_date":"2025-03-27","ids":{"openalex":"https://openalex.org/W4408894716","doi":"https://doi.org/10.1145/3676641.3715986"},"language":"en","primary_location":{"id":"doi:10.1145/3676641.3715986","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3676641.3715986","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2503.02354","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5029600448","display_name":"Jiashun Suo","orcid":"https://orcid.org/0000-0002-5360-353X"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jiashun Suo","raw_affiliation_strings":["State Key Laboratory of CCSE and School of Computer Science and Engineering, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of CCSE and School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016873422","display_name":"Xiaojian Liao","orcid":"https://orcid.org/0000-0002-7924-9268"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaojian Liao","raw_affiliation_strings":["State Key Laboratory of CCSE and School of Computer Science and Engineering, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of CCSE and School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101586078","display_name":"Limin Xiao","orcid":"https://orcid.org/0000-0001-9438-9181"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Limin Xiao","raw_affiliation_strings":["State Key Laboratory of CCSE and School of Computer Science and Engineering, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of CCSE and School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083200817","display_name":"Li Ruan","orcid":"https://orcid.org/0000-0002-2386-961X"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Li Ruan","raw_affiliation_strings":["State Key Laboratory of CCSE and School of Computer Science and Engineering, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of CCSE and School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020576249","display_name":"Jinquan Wang","orcid":"https://orcid.org/0009-0000-0527-368X"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinquan Wang","raw_affiliation_strings":["State Key Laboratory of CCSE and School of Computer Science and Engineering, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of CCSE and School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103031403","display_name":"Xiao Su","orcid":"https://orcid.org/0000-0001-5365-2537"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiao Su","raw_affiliation_strings":["State Key Laboratory of CCSE and School of Computer Science and Engineering, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of CCSE and School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023297176","display_name":"Zhisheng Huo","orcid":"https://orcid.org/0000-0002-5366-0892"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhisheng Huo","raw_affiliation_strings":["State Key Laboratory of CCSE and School of Computer Science and Engineering, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of CCSE and School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5029600448"],"corresponding_institution_ids":["https://openalex.org/I82880672"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.06965581,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"178","last_page":"191"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13274","display_name":"Expert finding and Q&A systems","score":0.9927999973297119,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13274","display_name":"Expert finding and Q&A systems","score":0.9927999973297119,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.9896000027656555,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10444","display_name":"Context-Aware Activity Recognition Systems","score":0.9883000254631042,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7143025398254395},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6339656710624695},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.39531585574150085},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.37383878231048584}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7143025398254395},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6339656710624695},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.39531585574150085},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37383878231048584}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3676641.3715986","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3676641.3715986","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2503.02354","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2503.02354","pdf_url":"https://arxiv.org/pdf/2503.02354","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2503.02354","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2503.02354","pdf_url":"https://arxiv.org/pdf/2503.02354","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5688315367","display_name":null,"funder_award_id":"2023YFB4503100","funder_id":"https://openalex.org/F4320323817","funder_display_name":"Universitas Brawijaya"},{"id":"https://openalex.org/G7297854774","display_name":null,"funder_award_id":"62272026, 62302257","funder_id":"https://openalex.org/F4320323817","funder_display_name":"Universitas Brawijaya"}],"funders":[{"id":"https://openalex.org/F4320323817","display_name":"Universitas Brawijaya","ror":"https://ror.org/01wk3d929"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W2131806537","https://openalex.org/W2144424977","https://openalex.org/W2150884987","https://openalex.org/W2194775991","https://openalex.org/W2734941459","https://openalex.org/W2911057572","https://openalex.org/W3094809434","https://openalex.org/W3161819896","https://openalex.org/W4220967350","https://openalex.org/W4226515448","https://openalex.org/W4286962429","https://openalex.org/W4307079201","https://openalex.org/W4324301508","https://openalex.org/W4327810158","https://openalex.org/W4379539707","https://openalex.org/W4386185625","https://openalex.org/W4386273009","https://openalex.org/W4387010652","https://openalex.org/W4387321091","https://openalex.org/W4387389714","https://openalex.org/W4387964201","https://openalex.org/W4388747974","https://openalex.org/W4390438324","https://openalex.org/W4390723197","https://openalex.org/W4390833687","https://openalex.org/W4391800686","https://openalex.org/W4392489911","https://openalex.org/W4392781399","https://openalex.org/W4393399080","https://openalex.org/W4394780940","https://openalex.org/W4395073472","https://openalex.org/W4400024360","https://openalex.org/W4400409890","https://openalex.org/W4401211627","https://openalex.org/W4402588116","https://openalex.org/W4402683990","https://openalex.org/W4404954357","https://openalex.org/W4405095313"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2,19],"like":[3],"GPT-4":[4],"are":[5],"resource-intensive,":[6],"but":[7],"recent":[8],"advancements":[9],"suggest":[10],"that":[11],"smaller,":[12],"specialized":[13],"experts":[14,67],"can":[15,70],"outperform":[16],"the":[17,32,63],"monolithic":[18],"on":[20],"specific":[21],"tasks.":[22],"The":[23],"Collaboration-of-Experts":[24],"(CoE)":[25],"approach":[26],"integrates":[27],"multiple":[28],"expert":[29,78],"models,":[30],"improving":[31],"accuracy":[33],"of":[34,66],"generated":[35],"results":[36],"and":[37,83],"offering":[38],"great":[39],"potential":[40],"for":[41],"precision-critical":[42],"applications,":[43],"such":[44],"as":[45],"automatic":[46],"circuit":[47],"board":[48],"quality":[49],"inspection.":[50],"However,":[51],"deploying":[52],"CoE":[53],"serving":[54],"systems":[55],"presents":[56],"challenges":[57],"to":[58,62,72],"memory":[59,82],"capacity":[60],"due":[61],"large":[64],"number":[65],"required,":[68],"which":[69],"lead":[71],"significant":[73],"performance":[74],"overhead":[75],"from":[76],"frequent":[77],"switching":[79],"across":[80],"different":[81],"storage":[84],"tiers.":[85]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
