{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:16:46Z","timestamp":1750220206866,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":33,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,10,30]],"date-time":"2022-10-30T00:00:00Z","timestamp":1667088000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["CCF-1955909, CMMI-2038625, CCF-2106725"],"award-info":[{"award-number":["CCF-1955909, CMMI-2038625, CCF-2106725"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,10,30]]},"DOI":"10.1145\/3508352.3549402","type":"proceedings-article","created":{"date-parts":[[2022,12,22]],"date-time":"2022-12-22T12:10:54Z","timestamp":1671711054000},"page":"1-9","update-policy":"https:\/\/linproxy.fan.workers.dev:443\/https\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Deep Learning Toolkit-Accelerated Analytical Co-Optimization of CNN Hardware and Dataflow"],"prefix":"10.1145","author":[{"given":"Rongjian","family":"Liang","sequence":"first","affiliation":[{"name":"Nvdia"}]},{"given":"Jianfeng","family":"Song","sequence":"additional","affiliation":[{"name":"Texas A&amp;M University"}]},{"given":"Yuan","family":"Bo","sequence":"additional","affiliation":[{"name":"Rutgers University"}]},{"given":"Jiang","family":"Hu","sequence":"additional","affiliation":[{"name":"Texas A&amp;M University"}]}],"member":"320","published-online":{"date-parts":[[2022,12,22]]},"reference":[{"volume-title":"Design Automation Conference. 1--6.","author":"Abdelfattah M. S.","unstructured":"M. S. Abdelfattah, \u0141. Dudziak, T. Chau, R. Lee, H. Kim, and N. D. Lane. 2020. Best of Both Worlds: AutoML Codesign of a CNN and its Hardware Accelerator. In Design Automation Conference. 1--6.","key":"e_1_3_2_1_1_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_2_1","DOI":"10.1109\/JSSC.2016.2616357"},{"volume-title":"DANCE: Differentiable Accelerator\/Network Co-Exploration. In Design Automation Conference. 337--342","author":"Choi K.","unstructured":"K. Choi, D. Hong, H. Yoon, J. Yu, Y. Kim, and J. Lee. 2021. DANCE: Differentiable Accelerator\/Network Co-Exploration. In Design Automation Conference. 337--342.","key":"e_1_3_2_1_3_1"},{"volume-title":"Parallel and Pipeline Architecture. In Design Automation Conference. 1--6.","author":"Cong J.","unstructured":"J. Cong, P. Wei, C. H. Yu, and P. Zhang. 2018. Automated Accelerator Generation and Optimization with Composable, Parallel and Pipeline Architecture. In Design Automation Conference. 1--6.","key":"e_1_3_2_1_4_1"},{"volume-title":"GoSPA: An Energy-efficient High-performance Globally Optimized SParse Convolutional Neural Network Accelerator. In International Symposium on Computer Architecture. 1110--1123","author":"Deng C.","unstructured":"C. Deng, Y. Sui, S. Liao, X. Qian, and B. Yuan. 2021. GoSPA: An Energy-efficient High-performance Globally Optimized SParse Convolutional Neural Network Accelerator. In International Symposium on Computer Architecture. 1110--1123.","key":"e_1_3_2_1_5_1"},{"unstructured":"Y. Fu Y. A. Zhang Y. Zhang D. Cox and Y. Lin. 2021. Auto-NBA: Efficient and Effective Search Over the Joint Space of Networks Bitwidths and Accelerators. arXiv:2106.06575","key":"e_1_3_2_1_6_1"},{"volume-title":"ExTensor: An Accelerator for Sparse Tensor Algebra. In International Symposium on Microarchitecture. 319--333","author":"Hegde K.","unstructured":"K. Hegde, H. Asghari-Moghaddam, M. Pellauer, N. Crago, A. Jaleel, E. Solomonik, J. Emer, and C. Fletcher. 2019. ExTensor: An Accelerator for Sparse Tensor Algebra. In International Symposium on Microarchitecture. 319--333.","key":"e_1_3_2_1_7_1"},{"volume-title":"Mind Mappings: Enabling Efficient Algorithm-Accelerator Mapping Space Search. In International Conference on Architectural Support for Programming Languages and Operating Systems. 943--958","author":"Hegde K.","unstructured":"K. Hegde, P. Tsai, S. Huang, V. Chandra, A. Parashar, and C. Fletcher. 2021. Mind Mappings: Enabling Efficient Algorithm-Accelerator Mapping Space Search. In International Conference on Architectural Support for Programming Languages and Operating Systems. 943--958.","key":"e_1_3_2_1_8_1"},{"volume-title":"International Symposium on Computer Architecture. 554--566","author":"Huang Q.","unstructured":"Q. Huang, M. Kang, G. Dinh, T. Norell, A. Kalaiah, J. Demmel, J. Wawrzynek, and Y. Shao. 2021. CoSA: Scheduling by Constrained Optimization for Spatial Accelerators. In International Symposium on Computer Architecture. 554--566.","key":"e_1_3_2_1_9_1"},{"volume-title":"Conference on Neural Information Processing Systems. 103--112","author":"Huang Y.","unstructured":"Y. Huang, Y. Cheng, A. Bapna, O. Firat, M. X. Chen, D. Chen, H. Lee, J. Ngiam, Q. V. Le, Y. Wu, and Z. Chen. 2019. GPipe: Efficient Training of Giant Neural Networks using Pipeline Parallelism. In Conference on Neural Information Processing Systems. 103--112.","key":"e_1_3_2_1_10_1"},{"unstructured":"E. Jiang S. Gu and B. Poole. 2017. Categorical Reparameterization with Gumbel-Softmax. arXiv:1611.01144","key":"e_1_3_2_1_11_1"},{"unstructured":"W. Jiang L. Yang E. Sha Q. Zhuge S. Gu S. Dasgupta Y. Shi and J. Hu. 2020. Hardware\/Software Co-Exploration of Neural Architectures. IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems (2020) 1--6.","key":"e_1_3_2_1_12_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_13_1","DOI":"10.1145\/3079856.3080246"},{"volume-title":"International Symposium on Microarchitecture. 622--636","author":"Kao S.","unstructured":"S. Kao, G. Jeong, and T. Krishna. 2020. ConfuciuX: Autonomous Hardware Resource Assignment for DNN Accelerators using Reinforcement Learning. In International Symposium on Microarchitecture. 622--636.","key":"e_1_3_2_1_14_1"},{"volume-title":"International Conference On Computer Aided Design. 1--9.","author":"Kao S.-C.","unstructured":"S.-C. Kao and T. Krishna. 2020. GAMMA: Automating the HW Mapping of DNN Models on Accelerators via Genetic Algorithm. In International Conference On Computer Aided Design. 1--9.","key":"e_1_3_2_1_15_1"},{"key":"e_1_3_2_1_16_1","volume-title":"International Symposium on Microarchitecture 40","author":"Kwon H.","year":"2020","unstructured":"H. Kwon, P. Chatarasi, V. Sarkar, T. Krishna, M. Pellauer, and A. Parashar. 2020. MAESTRO: A Data-Centric Approach to Understand Reuse, Performance, and Hardware Cost of DNN Mappings. International Symposium on Microarchitecture 40, 3 (2020), 20--29."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_17_1","DOI":"10.1145\/3296957.3173176"},{"volume-title":"EDD: Efficient Differentiable DNN Architecture and Implementation Co-search for Embedded AI Solutions. In Design Automation Conference. 1--6.","author":"Li Y.","unstructured":"Y. Li, C. Hao, X. Zhang, X. Liu, Y. Chen, J. Xiong, W. Hwu, and D. Chen. 2020. EDD: Efficient Differentiable DNN Architecture and Implementation Co-search for Embedded AI Solutions. In Design Automation Conference. 1--6.","key":"e_1_3_2_1_18_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_19_1","DOI":"10.1109\/TCAD.2020.3003843"},{"volume-title":"International Symposium on Performance Analysis of Systems and Software. 304--315","author":"Parashar A.","unstructured":"A. Parashar, P. Raina, Y.-S. Shao, Y.-H. Chen, V. A. Ying, A. Mukkara, R. Venkatesan, B. Khailany, S. W. Keckler, and J. Emer. 2019. Timeloop: A Systematic Approach to DNN Accelerator Evaluation. In International Symposium on Performance Analysis of Systems and Software. 304--315.","key":"e_1_3_2_1_20_1"},{"volume-title":"International Symposium on Computer Architecture. 27--40","author":"Parashar A.","unstructured":"A. Parashar, M. Rhu, A. Mukkara, A. Puglielli, R. Venkatesan, B. Khailany, J. Emer, S. Keckler, and S. Dally. 2017. SCNN: An accelerator for compressed-sparse convolutional neural networks. In International Symposium on Computer Architecture. 27--40.","key":"e_1_3_2_1_21_1"},{"doi-asserted-by":"crossref","unstructured":"M. Parsa J. P. Mitchell C. D. Schuman R. M. Patton T. E. Potok and K. Roy. 2020. Bayesian Multi-objective Hyperparameter Optimization for Accurate Fast and Efficient Neural Network Accelerator Design. Frontiers in Neuroscience 14 (2020).","key":"e_1_3_2_1_22_1","DOI":"10.3389\/fnins.2020.00667"},{"key":"e_1_3_2_1_23_1","volume-title":"Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems 32","author":"Paszke A.","year":"2019","unstructured":"A. Paszke, S. Gross, F. Massa, A. Lerer, J. Bradbury, G. Chanan, T. Killeen, Z. Lin, N. Gimelshein, L. Antiga, et al. 2019. Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems 32 (2019)."},{"volume-title":"International Symposium on Low Power Electronics and Design. 1--6.","author":"Reagen B.","unstructured":"B. Reagen, J. M. Hernandez-Lobato, R. Adolf, M. Gelbart, P. Whatmough, G.-Y. Wei, and D. Brooks. 2017. A Case for Efficient Accelerator Design Space Exploration via Bayesian Optimization. In International Symposium on Low Power Electronics and Design. 1--6.","key":"e_1_3_2_1_24_1"},{"unstructured":"O. Sener and V. Koltun. 2018. Multi-task learning as multi-objective optimization. Advances in neural information processing systems (2018) 525--536.","key":"e_1_3_2_1_25_1"},{"unstructured":"A. Stoutchinin F. Conti and L. Benini. 2019. Optimally Scheduling CNN Convolutions for Efficient Memory Access. arXiv:1902.01492","key":"e_1_3_2_1_26_1"},{"key":"e_1_3_2_1_27_1","volume-title":"DeepTools: Compiler and Execution Runtime Extensions for RaPiD AI Accelerator. International Symposium on Microarchitecture 39","author":"Venkataramani S.","year":"2019","unstructured":"S. Venkataramani, J. Choi, et al. 2019. DeepTools: Compiler and Execution Runtime Extensions for RaPiD AI Accelerator. International Symposium on Microarchitecture 39, 5 (2019), 102--111."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_28_1","DOI":"10.1109\/ACCESS.2021.3134930"},{"volume-title":"Energy-Focused Design Space Exploration Methodology for Sparse Tensor Accelerators. In International Symposium on Performance Analysis of Systems and Software. 232--234","author":"Wu Y. N.","unstructured":"Y. N. Wu, P. A. Tsai, A. Parashar, V. Sze, and J. S. Emer. 2021. Sparseloop: An Analytical, Energy-Focused Design Space Exploration Methodology for Sparse Tensor Accelerators. In International Symposium on Performance Analysis of Systems and Software. 232--234.","key":"e_1_3_2_1_29_1"},{"volume-title":"Co-Exploration of Neural Architectures and Heterogeneous ASIC Accelerator Designs Targeting Multiple Tasks. In Design Automation Conference. 1--6.","author":"Yang L.","unstructured":"L. Yang, Z. Yan, M. Li, H. Kwon, L. Lai, T. Krishna, V. Chandra, W. Jiang, and Y. Shi. 2020. Co-Exploration of Neural Architectures and Heterogeneous ASIC Accelerator Designs Targeting Multiple Tasks. In Design Automation Conference. 1--6.","key":"e_1_3_2_1_30_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_31_1","DOI":"10.1145\/3373376.3378514"},{"volume-title":"DNN-Chip Predictor: An Analytical Performance Predictor for DNN Accelerators with Various Dataflows and Hardware Architectures. In International Conference on Acoustics, Speech and Signal Processing.","author":"Zhao Y.","unstructured":"Y. Zhao, C. Li, Y. Wang, P. Xu, Y. Zhang, and Y. Lin. 2020. DNN-Chip Predictor: An Analytical Performance Predictor for DNN Accelerators with Various Dataflows and Hardware Architectures. In International Conference on Acoustics, Speech and Signal Processing.","key":"e_1_3_2_1_32_1"},{"unstructured":"Y. Zhou X. Dong B. Akin M. Tan D. Peng T. Meng A. Yazdanbakhsh D. Huang and R. Narayanaswami. 2021. Rethinking Co-design of Neural Architectures and Hardware Accelerators. arXiv:2102.08619","key":"e_1_3_2_1_33_1"}],"event":{"sponsor":["SIGDA ACM Special Interest Group on Design Automation","IEEE-EDS Electronic Devices Society","IEEE CAS","IEEE CEDA"],"acronym":"ICCAD '22","name":"ICCAD '22: IEEE\/ACM International Conference on Computer-Aided Design","location":"San Diego California"},"container-title":["Proceedings of the 41st IEEE\/ACM International Conference on Computer-Aided Design"],"original-title":[],"link":[{"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/dl.acm.org\/doi\/10.1145\/3508352.3549402","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/dl.acm.org\/doi\/pdf\/10.1145\/3508352.3549402","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/dl.acm.org\/doi\/pdf\/10.1145\/3508352.3549402","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:02:57Z","timestamp":1750186977000},"score":1,"resource":{"primary":{"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/dl.acm.org\/doi\/10.1145\/3508352.3549402"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,30]]},"references-count":33,"alternative-id":["10.1145\/3508352.3549402","10.1145\/3508352"],"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/doi.org\/10.1145\/3508352.3549402","relation":{},"subject":[],"published":{"date-parts":[[2022,10,30]]},"assertion":[{"value":"2022-12-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}