{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T19:35:51Z","timestamp":1776886551442,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":64,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3754866","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T06:56:44Z","timestamp":1761375404000},"page":"9424-9433","update-policy":"https:\/\/linproxy.fan.workers.dev:443\/https\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["PMG: Progressive Motion Generation via Sparse Anchor Postures Curriculum Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/linproxy.fan.workers.dev:443\/https\/orcid.org\/0009-0001-8859-4856","authenticated-orcid":false,"given":"Yingjie","family":"Xi","sequence":"first","affiliation":[{"name":"University of Bournemouth, Bournemouth, Dorset, United Kingdom"}]},{"ORCID":"https:\/\/linproxy.fan.workers.dev:443\/https\/orcid.org\/0000-0002-7069-5771","authenticated-orcid":false,"given":"Jian Jun","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of Bournemouth, Bournemouth, Dorset, United Kingdom"}]},{"ORCID":"https:\/\/linproxy.fan.workers.dev:443\/https\/orcid.org\/0000-0003-3815-0584","authenticated-orcid":false,"given":"Xiaosong","family":"Yang","sequence":"additional","affiliation":[{"name":"University of Bournemouth, Bournemouth, Dorset, United Kingdom"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3680528.3687559"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00916"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553380"},{"key":"e_1_3_2_1_4_1","volume-title":"Pay Attention and Move Better: Harnessing Attention for Interactive Motion Generation and Training-free Editing. arXiv preprint arXiv:2410.18977","author":"Chen Ling-Hao","year":"2024","unstructured":"Ling-Hao Chen, Shunlin Lu, Wenxun Dai, Zhiyang Dou, Xuan Ju, Jingbo Wang, Taku Komura, and Lei Zhang. 2024a. Pay Attention and Move Better: Harnessing Attention for Interactive Motion Generation and Training-free Editing. arXiv preprint arXiv:2410.18977 (2024)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3641519.3657440"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00190"},{"key":"e_1_3_2_1_7_1","volume-title":"European Conference on Computer Vision. Springer, 18-36","author":"Chi Seunggeun","year":"2024","unstructured":"Seunggeun Chi, Hyung-gun Chi, Hengbo Ma, Nakul Agarwal, Faizan Siddiqui, Karthik Ramani, and Kwonjoon Lee. 2024. M2d2m: Multi-motion generation from text with discrete diffusion models. In European Conference on Computer Vision. Springer, 18-36."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3677388.3696327"},{"key":"e_1_3_2_1_9_1","volume-title":"ACM SIGGRAPH 2024 Conference Papers. 1-9.","author":"Cohan Setareh","unstructured":"Setareh Cohan, Guy Tevet, Daniele Reda, Xue Bin Peng, and Michiel van de Panne. 2024. Flexible motion in-betweening with diffusion models. In ACM SIGGRAPH 2024 Conference Papers. 1-9."},{"key":"e_1_3_2_1_10_1","volume-title":"Laserhuman: language-guided scene-aware human motion generation in free environment. arXiv preprint arXiv:2403.13307","author":"Cong Peishan","year":"2024","unstructured":"Peishan Cong, Ziyi Wang, Zhiyang Dou, Yiming Ren, Wei Yin, Kai Cheng, Yujing Sun, Xiaoxiao Long, Xinge Zhu, and Yuexin Ma. 2024. Laserhuman: language-guided scene-aware human motion generation in free environment. arXiv preprint arXiv:2403.13307 (2024)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00941"},{"key":"e_1_3_2_1_12_1","unstructured":"Minyue Dai Jingbo Wang Ke Fan Bin Ji Haoyu Zhao Junting Dong and Bo Dai. 2025. Towards Synthesized and Editable Motion In-Betweening Through Part-Wise Phase Representation. arXiv:2503.08180 [cs.CV] https:\/\/linproxy.fan.workers.dev:443\/https\/arxiv.org\/abs\/2503.08180"},{"key":"e_1_3_2_1_13_1","volume-title":"European Conference on Computer Vision. Springer, 390-408","author":"Dai Wenxun","year":"2024","unstructured":"Wenxun Dai, Ling-Hao Chen, Jingbo Wang, Jinpeng Liu, Bo Dai, and Yansong Tang. 2024. Motionlcm: Real-time controllable motion generation via latent consistency model. In European Conference on Computer Vision. Springer, 390-408."},{"key":"e_1_3_2_1_14_1","volume-title":"Human Motion Prediction, Reconstruction, and Generation. arXiv preprint arXiv:2502.15956","author":"Gang Canxuan","year":"2025","unstructured":"Canxuan Gang and Yiran Wang. 2025. Human Motion Prediction, Reconstruction, and Generation. arXiv preprint arXiv:2502.15956 (2025)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00186"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00509"},{"key":"e_1_3_2_1_17_1","volume-title":"MotionLab: Unified Human Motion Generation and Editing via the Motion-Condition-Motion Paradigm. arXiv preprint arXiv:2502.02358","author":"Guo Ziyan","year":"2025","unstructured":"Ziyan Guo, Zeyu Hu, Na Zhao, and De Wen Soh. 2025. MotionLab: Unified Human Motion Generation and Editing via the Motion-Condition-Motion Paradigm. arXiv preprint arXiv:2502.02358 (2025)."},{"key":"e_1_3_2_1_18_1","volume-title":"Gans trained by a two time-scale update rule converge to a local nash equilibrium. Advances in neural information processing systems","author":"Heusel Martin","year":"2017","unstructured":"Martin Heusel, Hubert Ramsauer, Thomas Unterthiner, Bernhard Nessler, and Sepp Hochreiter. 2017. Gans trained by a two time-scale update rule converge to a local nash equilibrium. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_19_1","volume-title":"Denoising diffusion probabilistic models. Advances in neural information processing systems","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. Advances in neural information processing systems, Vol. 33 (2020), 6840-6851."},{"key":"e_1_3_2_1_20_1","first-page":"8633","article-title":"Video diffusion models","volume":"35","author":"Ho Jonathan","year":"2022","unstructured":"Jonathan Ho, Tim Salimans, Alexey Gritsenko, William Chan, Mohammad Norouzi, and David J Fleet. 2022. Video diffusion models. Advances in Neural Information Processing Systems, Vol. 35 (2022), 8633-8646.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681657"},{"key":"e_1_3_2_1_22_1","volume-title":"PackDiT: Joint Human Motion and Text Generation via Mutual Prompting. arXiv preprint arXiv:2501.16551","author":"Jiang Zhongyu","year":"2025","unstructured":"Zhongyu Jiang, Wenhao Chai, Zhuoran Zhou, Cheng-Yen Yang, Hsiang-Wei Huang, and Jenq-Neng Hwang. 2025. PackDiT: Joint Human Motion and Text Generation via Mutual Prompting. arXiv preprint arXiv:2501.16551 (2025)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00670"},{"key":"e_1_3_2_1_24_1","volume-title":"Trajevae: Controllable human motion generation from trajectories. arXiv preprint arXiv:2104.00351","author":"Kania Kacper","year":"2021","unstructured":"Kacper Kania, Marek Kowalski, and Tomasz Trzci'nski. 2021. Trajevae: Controllable human motion generation from trajectories. arXiv preprint arXiv:2104.00351 (2021)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00205"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/BigData62323.2024.10825265"},{"key":"e_1_3_2_1_27_1","volume-title":"Sang-goo Lee, and Taeuk Kim.","author":"Kim Hyuhng Joon","year":"2024","unstructured":"Hyuhng Joon Kim, Youna Kim, Cheonbok Park, Junyeob Kim, Choonghyun Park, Kang Min Yoo, Sang-goo Lee, and Taeuk Kim. 2024. Aligning language models to explicitly handle ambiguity. arXiv preprint arXiv:2404.11972 (2024)."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.108894"},{"key":"e_1_3_2_1_29_1","volume-title":"Strategic Data Ordering: Enhancing Large Language Model Performance through Curriculum Learning. arXiv preprint arXiv:2405.07490","author":"Kim Jisu","year":"2024","unstructured":"Jisu Kim and Juhwan Lee. 2024. Strategic Data Ordering: Enhancing Large Language Model Performance through Curriculum Learning. arXiv preprint arXiv:2405.07490 (2024)."},{"key":"e_1_3_2_1_30_1","volume-title":"Multilingual Relative Clause Attachment Ambiguity Resolution in Large Language Models. arXiv preprint arXiv:2503.02971","author":"Lee So Young","year":"2025","unstructured":"So Young Lee, Russell Scheinberg, Amber Shore, and Ameeta Agrawal. 2025. Multilingual Relative Clause Attachment Ambiguity Resolution in Large Language Models. arXiv preprint arXiv:2503.02971 (2025)."},{"key":"e_1_3_2_1_31_1","volume-title":"Competence-based multimodal curriculum learning for medical report generation. arXiv preprint arXiv:2206.14579","author":"Liu Fenglin","year":"2022","unstructured":"Fenglin Liu, Shen Ge, Yuexian Zou, and Xian Wu. 2022. Competence-based multimodal curriculum learning for medical report generation. arXiv preprint arXiv:2206.14579 (2022)."},{"key":"e_1_3_2_1_32_1","volume-title":"Humantomato: Text-aligned whole-body motion generation. arXiv preprint arXiv:2310.12978","author":"Lu Shunlin","year":"2023","unstructured":"Shunlin Lu, Ling-Hao Chen, Ailing Zeng, Jing Lin, Ruimao Zhang, Lei Zhang, and Heung-Yeung Shum. 2023. Humantomato: Text-aligned whole-body motion generation. arXiv preprint arXiv:2310.12978 (2023)."},{"key":"e_1_3_2_1_33_1","volume-title":"Contact-aware human motion generation from textual descriptions. arXiv preprint arXiv:2403.15709","author":"Ma Sihan","year":"2024","unstructured":"Sihan Ma, Qiong Cao, Jing Zhang, and Dacheng Tao. 2024a. Contact-aware human motion generation from textual descriptions. arXiv preprint arXiv:2403.15709 (2024)."},{"key":"e_1_3_2_1_34_1","volume-title":"Latte: Latent diffusion transformer for video generation. arXiv preprint arXiv:2401.03048","author":"Ma Xin","year":"2024","unstructured":"Xin Ma, Yaohui Wang, Gengyun Jia, Xinyuan Chen, Ziwei Liu, Yuan-Fang Li, Cunjian Chen, and Yu Qiao. 2024b. Latte: Latent diffusion transformer for video generation. arXiv preprint arXiv:2401.03048 (2024)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2021.10.011"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CIS-RAM61939.2024.10673231"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00387"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20047-2_28"},{"key":"e_1_3_2_1_39_1","volume-title":"GPT-Connect: Interaction between Text-Driven Human Motion Generator and 3D Scenes in a Training-free Manner. arXiv preprint arXiv:2403.14947","author":"Qu Haoxuan","year":"2024","unstructured":"Haoxuan Qu, Ziyan Guo, and Jun Liu. 2024. GPT-Connect: Interaction between Text-Driven Human Motion Generator and 3D Scenes in a Training-free Manner. arXiv preprint arXiv:2403.14947 (2024)."},{"key":"e_1_3_2_1_40_1","volume-title":"Human motion diffusion as a generative prior. arXiv preprint arXiv:2303.01418","author":"Shafir Yonatan","year":"2023","unstructured":"Yonatan Shafir, Guy Tevet, Roy Kapon, and Amit H Bermano. 2023. Human motion diffusion as a generative prior. arXiv preprint arXiv:2303.01418 (2023)."},{"key":"e_1_3_2_1_41_1","volume-title":"European Conference on Computer Vision. Springer, 67-86","author":"Shan Mengyi","year":"2024","unstructured":"Mengyi Shan, Lu Dong, Yutao Han, Yuan Yao, Tao Liu, Ifeoma Nwogu, Guo-Jun Qi, and Mitch Hill. 2024. Towards open domain text-driven synthesis of multi-person motions. In European Conference on Computer Vision. Springer, 67-86."},{"key":"e_1_3_2_1_42_1","volume-title":"Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502","author":"Song Jiaming","year":"2020","unstructured":"Jiaming Song, Chenlin Meng, and Stefano Ermon. 2020. Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502 (2020)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2021.103166"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20047-2_21"},{"key":"e_1_3_2_1_45_1","volume-title":"Human motion diffusion model. arXiv preprint arXiv:2209.14916","author":"Tevet Guy","year":"2022","unstructured":"Guy Tevet, Sigal Raab, Brian Gordon, Yonatan Shafir, Daniel Cohen-Or, and Amit H Bermano. 2022b. Human motion diffusion model. arXiv preprint arXiv:2209.14916 (2022)."},{"key":"e_1_3_2_1_46_1","volume-title":"Attention is all you need. Advances in neural information processing systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_47_1","volume-title":"European Conference on Computer Vision. Springer, 37-54","author":"Wan Weilin","year":"2024","unstructured":"Weilin Wan, Zhiyang Dou, Taku Komura, Wenping Wang, Dinesh Jayaraman, and Lingjie Liu. 2024. Tlcontrol: Trajectory and language control for human motion synthesis. In European Conference on Computer Vision. Springer, 37-54."},{"key":"e_1_3_2_1_48_1","volume-title":"T2m-hifigpt: generating high quality human motion from textual descriptions with residual discrete representations. arXiv preprint arXiv:2312.10628","author":"Wang Congyi","year":"2023","unstructured":"Congyi Wang. 2023. T2m-hifigpt: generating high quality human motion from textual descriptions with residual discrete representations. arXiv preprint arXiv:2312.10628 (2023)."},{"key":"e_1_3_2_1_49_1","volume-title":"Grounded curriculum learning. arXiv preprint arXiv:2409.19816","author":"Wang Linji","year":"2024","unstructured":"Linji Wang, Zifan Xu, Peter Stone, and Xuesu Xiao. 2024b. Grounded curriculum learning. arXiv preprint arXiv:2409.19816 (2024)."},{"key":"e_1_3_2_1_50_1","volume-title":"Ziyao Zhang, and Xiaohui Liang.","author":"Wang Yin","year":"2025","unstructured":"Yin Wang, Mu Li, Jiapeng Liu, Zhiying Leng, Frederick WB Li, Ziyao Zhang, and Xiaohui Liang. 2025b. Fg-T2M: LLMs-Augmented Fine-Grained Text Driven Human Motion Generation. arXiv preprint arXiv:2502.05534 (2025)."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00049"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2025.3526236"},{"key":"e_1_3_2_1_53_1","unstructured":"Yiming Xie Varun Jampani Lei Zhong Deqing Sun and Huaizu Jiang. 2024. OmniControl: Control Any Joint at Any Time for Human Motion Generation. arXiv:2310.08580 [cs.CV] https:\/\/linproxy.fan.workers.dev:443\/https\/arxiv.org\/abs\/2310.08580"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3554729"},{"key":"e_1_3_2_1_55_1","volume-title":"Cogvideox: Text-to-video diffusion models with an expert transformer. arXiv preprint arXiv:2408.06072","author":"Yang Zhuoyi","year":"2024","unstructured":"Zhuoyi Yang, Jiayan Teng, Wendi Zheng, Ming Ding, Shiyu Huang, Jiazheng Xu, Yuanming Yang, Wenyi Hong, Xiaohan Zhang, Guanyu Feng, et al., 2024. Cogvideox: Text-to-video diffusion models with an expert transformer. arXiv preprint arXiv:2408.06072 (2024)."},{"key":"e_1_3_2_1_56_1","unstructured":"Ziming Cheng Jiangfeiyang Wang Yihao Liao Yiyu Fu. 2024. AnimationGPT:An AIGC tool for generating game combat motion assets. https:\/\/linproxy.fan.workers.dev:443\/https\/github.com\/fyyakaxyy\/AnimationGPT."},{"key":"e_1_3_2_1_57_1","volume-title":"Flexmatch: Boosting semi-supervised learning with curriculum pseudo labeling. Advances in neural information processing systems","author":"Zhang Bowen","year":"2021","unstructured":"Bowen Zhang, Yidong Wang, Wenxin Hou, Hao Wu, Jindong Wang, Manabu Okumura, and Takahiro Shinozaki. 2021. Flexmatch: Boosting semi-supervised learning with curriculum pseudo labeling. Advances in neural information processing systems, Vol. 34 (2021), 18408-18419."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01415"},{"key":"e_1_3_2_1_59_1","first-page":"13981","article-title":"Finemogen: Fine-grained spatio-temporal motion generation and editing","volume":"36","author":"Zhang Mingyuan","year":"2023","unstructured":"Mingyuan Zhang, Huirong Li, Zhongang Cai, Jiawei Ren, Lei Yang, and Ziwei Liu. 2023a. Finemogen: Fine-grained spatio-temporal motion generation and editing. Advances in Neural Information Processing Systems, Vol. 36 (2023), 13981-13992.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_60_1","volume-title":"Curriculum learning for domain adaptation in neural machine translation. arXiv preprint arXiv:1905.05816","author":"Zhang Xuan","year":"2019","unstructured":"Xuan Zhang, Pamela Shapiro, Gaurav Kumar, Paul McNamee, Marine Carpuat, and Kevin Duh. 2019. Curriculum learning for domain adaptation in neural machine translation. arXiv preprint arXiv:1905.05816 (2019)."},{"key":"e_1_3_2_1_61_1","volume-title":"Motion Anything: Any to Motion Generation. arXiv preprint arXiv:2503.06955","author":"Zhang Zeyu","year":"2025","unstructured":"Zeyu Zhang, Yiran Wang, Wei Mao, Danning Li, Rui Zhao, Biao Wu, Zirui Song, Bohan Zhuang, Ian Reid, and Richard Hartley. 2025. Motion Anything: Any to Motion Generation. arXiv preprint arXiv:2503.06955 (2025)."},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i16.17709"},{"key":"e_1_3_2_1_63_1","volume-title":"ExGes: Expressive Human Motion Retrieval and Modulation for Audio-Driven Gesture Synthesis. arXiv preprint arXiv:2503.06499","author":"Zhou Xukun","year":"2025","unstructured":"Xukun Zhou, Fengxin Li, Ming Chen, Yan Zhou, Pengfei Wan, Di Zhang, Hongyan Liu, Jun He, and Zhaoxin Fan. 2025. ExGes: Expressive Human Motion Retrieval and Modulation for Audio-Driven Gesture Synthesis. arXiv preprint arXiv:2503.06499 (2025)."},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3330935"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","location":"Dublin Ireland","acronym":"MM '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3754866","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:40:08Z","timestamp":1765309208000},"score":1,"resource":{"primary":{"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/dl.acm.org\/doi\/10.1145\/3746027.3754866"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":64,"alternative-id":["10.1145\/3746027.3754866","10.1145\/3746027"],"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/doi.org\/10.1145\/3746027.3754866","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}