{"id":"https://openalex.org/W4409264082","doi":"https://doi.org/10.1109/wacv61041.2025.00097","title":"Counting Guidance for High Fidelity Text-to-Image Synthesis","display_name":"Counting Guidance for High Fidelity Text-to-Image Synthesis","publication_year":2025,"publication_date":"2025-02-26","ids":{"openalex":"https://openalex.org/W4409264082","doi":"https://doi.org/10.1109/wacv61041.2025.00097"},"language":"en","primary_location":{"id":"doi:10.1109/wacv61041.2025.00097","is_oa":false,"landing_page_url":"https://doi.org/10.1109/wacv61041.2025.00097","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF Winter Conference on Applications of Computer Vision (WACV)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5079003243","display_name":"Won Jun Kang","orcid":"https://orcid.org/0000-0002-2107-8160"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Wonjun Kang","raw_affiliation_strings":["Seoul National University"],"affiliations":[{"raw_affiliation_string":"Seoul National University","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020064757","display_name":"Kevin Galim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kevin Galim","raw_affiliation_strings":["FuriosaAI"],"affiliations":[{"raw_affiliation_string":"FuriosaAI","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041283782","display_name":"Hyung Il Koo","orcid":"https://orcid.org/0000-0002-6955-8083"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hyung Il Koo","raw_affiliation_strings":["FuriosaAI"],"affiliations":[{"raw_affiliation_string":"FuriosaAI","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055171648","display_name":"Nam Ik Cho","orcid":"https://orcid.org/0000-0001-5297-4649"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Nam Ik Cho","raw_affiliation_strings":["Seoul National University"],"affiliations":[{"raw_affiliation_string":"Seoul National University","institution_ids":["https://openalex.org/I139264467"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5079003243"],"corresponding_institution_ids":["https://openalex.org/I139264467"],"apc_list":null,"apc_paid":null,"fwci":2.6381,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.88980293,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"899","last_page":"908"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9833999872207642,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9833999872207642,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.977400004863739,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9510999917984009,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6648767590522766},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.49509397149086},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.48584792017936707},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.45281827449798584},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3983518183231354},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.06953221559524536}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6648767590522766},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.49509397149086},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.48584792017936707},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.45281827449798584},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3983518183231354},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.06953221559524536}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/wacv61041.2025.00097","is_oa":false,"landing_page_url":"https://doi.org/10.1109/wacv61041.2025.00097","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF Winter Conference on Applications of Computer Vision (WACV)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W2962770929","https://openalex.org/W2963163163","https://openalex.org/W2963966654","https://openalex.org/W3035574324","https://openalex.org/W3174194560","https://openalex.org/W3174807077","https://openalex.org/W4214926101","https://openalex.org/W4221148940","https://openalex.org/W4221160666","https://openalex.org/W4226125322","https://openalex.org/W4312933868","https://openalex.org/W4322747315","https://openalex.org/W4385270985","https://openalex.org/W4385775296","https://openalex.org/W4386076027","https://openalex.org/W4386076390","https://openalex.org/W4386076532","https://openalex.org/W4387968301","https://openalex.org/W4390873054","https://openalex.org/W4390873321","https://openalex.org/W4402733577","https://openalex.org/W4404612908","https://openalex.org/W4405003291","https://openalex.org/W6730746255","https://openalex.org/W6765775151","https://openalex.org/W6779823529","https://openalex.org/W6783713337","https://openalex.org/W6784333009","https://openalex.org/W6786375611","https://openalex.org/W6791353385","https://openalex.org/W6795288823","https://openalex.org/W6809885388","https://openalex.org/W6811013733","https://openalex.org/W6838322790","https://openalex.org/W6838639034","https://openalex.org/W6840815571","https://openalex.org/W6841366371","https://openalex.org/W6850975553","https://openalex.org/W6852915159","https://openalex.org/W6852922754","https://openalex.org/W6854484269","https://openalex.org/W6857083971","https://openalex.org/W6861332544"],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"Recently,":[0],"there":[1],"have":[2],"been":[3],"significant":[4],"improvements":[5],"in":[6,45,53,148],"the":[7,17,35,47,54,62,100,106,125,128,133,141,149,167,171,185,190],"quality":[8],"and":[9,66,131,180],"performance":[10],"of":[11,50,82,127,143,146,192],"text-to-image":[12,25],"generation,":[13],"largely":[14],"due":[15],"to":[16,30,91,157,197],"impressive":[18],"results":[19],"attained":[20],"by":[21,74],"diffusion":[22,26,75,93,193],"models.":[23],"However,":[24],"models":[27,76,94,194],"sometimes":[28],"struggle":[29],"create":[31],"high-fidelity":[32],"content":[33],"for":[34,119,136,161,174],"given":[36,121],"input":[37,107],"prompt.":[38,56,108],"One":[39],"specific":[40],"issue":[41],"is":[42],"their":[43],"difficulty":[44],"generating":[46],"precise":[48],"number":[49,81],"objects":[51,147],"specified":[52],"text":[55],"For":[57],"example,":[58],"when":[59],"provided":[60],"with":[61,195],"prompt":[63],"\u201cfive":[64],"apples":[65],"ten":[67],"lemons":[68],"on":[69,105],"a":[70,89,111],"table,\u201d":[71],"images":[72],"generated":[73],"often":[77],"contain":[78],"an":[79],"incorrect":[80],"objects.":[83],"In":[84],"this":[85],"paper,":[86],"we":[87,151,165,182],"present":[88],"method":[90,187],"improve":[92],"so":[95],"that":[96,114,184],"they":[97],"accurately":[98],"produce":[99],"correct":[101],"object":[102,198],"count":[103],"based":[104],"We":[109,123],"adopt":[110],"counting":[112,118,129],"network":[113,130],"performs":[115],"reference-less":[116],"class-agnostic":[117],"any":[120],"image.":[122],"calculate":[124],"gradients":[126,173],"refine":[132],"predicted":[134],"noise":[135],"each":[137,162,175],"step.":[138],"To":[139],"address":[140],"presence":[142],"multiple":[144],"types":[145],"prompt,":[150],"utilize":[152],"novel":[153],"attention":[154],"map":[155],"guidance":[156],"obtain":[158],"high-quality":[159],"masks":[160],"object.":[163,176],"Finally,":[164],"guide":[166],"denoising":[168],"process":[169],"using":[170],"calculated":[172],"Through":[177],"extensive":[178],"experiments":[179],"evaluation,":[181],"demonstrate":[183],"proposed":[186],"significantly":[188],"enhances":[189],"fidelity":[191],"respect":[196],"count.":[199]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
