


default search action
BibTeX records: Ganesh Dasika
@article{DBLP:journals/corr/abs-2508-20258,
author = {Arya Tschand and
Muhammad A. Awad and
Ryan Swann and
Kesavan Ramakrishnan and
Jeffrey Ma and
Keith Lowery and
Ganesh Dasika and
Vijay Janapa Reddi},
title = {SwizzlePerf: Hardware-Aware LLMs for {GPU} Kernel Performance Optimization},
journal = {CoRR},
volume = {abs/2508.20258},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2508.20258},
doi = {10.48550/ARXIV.2508.20258},
eprinttype = {arXiv},
eprint = {2508.20258},
timestamp = {Mon, 22 Sep 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2508-20258.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2511-02132,
author = {Mansi Choudhary and
Karthik Sangaiah and
Sonali Singh and
Muhammad Osama and
Lisa Wu Wills and
Ganesh Dasika},
title = {Optimizing Attention on GPUs by Exploiting {GPU} Architectural {NUMA}
Effects},
journal = {CoRR},
volume = {abs/2511.02132},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2511.02132},
doi = {10.48550/ARXIV.2511.02132},
eprinttype = {arXiv},
eprint = {2511.02132},
timestamp = {Sun, 23 Nov 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2511-02132.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2511-02168,
author = {Octavian Alexandru Trifan and
Karthik Sangaiah and
Muhammad Awad and
Muhammad Osama and
Sumanth Gudaparthi and
Alexandru Nicolau and
Alexander V. Veidenbaum and
Ganesh Dasika},
title = {Eliminating Multi-GPU Performance Taxes: {A} Systems Approach to Efficient
Distributed LLMs},
journal = {CoRR},
volume = {abs/2511.02168},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2511.02168},
doi = {10.48550/ARXIV.2511.02168},
eprinttype = {arXiv},
eprint = {2511.02168},
timestamp = {Sun, 01 Feb 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2511-02168.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2512-04226,
author = {Ryan Swann and
Muhammad Osama and
Xiaohu Guo and
Bryant Nelson and
Lixun Zhang and
Alex Brown and
Yen Ong and
Ali Yazdani and
Sean Siddens and
Ganesh Dasika and
Alex Underwood},
title = {tritonBLAS: Triton-based Analytical Approach for {GEMM} Kernel Parameter
Selection},
journal = {CoRR},
volume = {abs/2512.04226},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2512.04226},
doi = {10.48550/ARXIV.2512.04226},
eprinttype = {arXiv},
eprint = {2512.04226},
timestamp = {Fri, 23 Jan 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2512-04226.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jetc/ThakkerFZGMDB21,
author = {Urmish Thakker and
Igor Fedorov and
Chu Zhou and
Dibakar Gope and
Matthew Mattina and
Ganesh Dasika and
Jesse G. Beu},
title = {Compressing RNNs to Kilobyte Budget for IoT Devices Using Kronecker
Products},
journal = {{ACM} J. Emerg. Technol. Comput. Syst.},
volume = {17},
number = {4},
pages = {46:1--46:18},
year = {2021},
url = {https://doi.org/10.1145/3440016},
doi = {10.1145/3440016},
timestamp = {Mon, 11 Oct 2021 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/jetc/ThakkerFZGMDB21.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/emnlp/ThakkerBGDM20,
author = {Urmish Thakker and
Jesse G. Beu and
Dibakar Gope and
Ganesh Dasika and
Matthew Mattina},
editor = {Nafise Sadat Moosavi and
Angela Fan and
Vered Shwartz and
Goran Glavas and
Shafiq R. Joty and
Alex Wang and
Thomas Wolf},
title = {Rank and run-time aware compression of {NLP} Applications},
booktitle = {Proceedings of SustaiNLP: Workshop on Simple and Efficient Natural
Language Processing, SustaiNLP@EMNLP 2020, Online, November 20, 2020},
pages = {8--18},
publisher = {Association for Computational Linguistics},
year = {2020},
url = {https://doi.org/10.18653/v1/2020.sustainlp-1.2},
doi = {10.18653/V1/2020.SUSTAINLP-1.2},
timestamp = {Thu, 22 Sep 2022 17:53:15 +0200},
biburl = {https://dblp.org/rec/conf/emnlp/ThakkerBGDM20.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2010-03193,
author = {Urmish Thakker and
Jesse G. Beu and
Dibakar Gope and
Ganesh Dasika and
Matthew Mattina},
title = {Rank and run-time aware compression of {NLP} Applications},
journal = {CoRR},
volume = {abs/2010.03193},
year = {2020},
url = {https://arxiv.org/abs/2010.03193},
eprinttype = {arXiv},
eprint = {2010.03193},
timestamp = {Tue, 13 Oct 2020 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2010-03193.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/emc2/MajiMDBMM19,
author = {Partha Maji and
Andrew Mundy and
Ganesh Dasika and
Jesse G. Beu and
Matthew Mattina and
Robert Mullins},
title = {Efficient Winograd or Cook-Toom Convolution Kernel Implementation
on Widely Used Mobile CPUs},
booktitle = {2nd Workshop on Energy Efficient Machine Learning and Cognitive Computing
for Embedded Applications, EMC2@HPCA 2019, Washington, DC, USA, February
17, 2019},
pages = {1--5},
publisher = {{IEEE}},
year = {2019},
url = {https://doi.org/10.1109/EMC249363.2019.00008},
doi = {10.1109/EMC249363.2019.00008},
timestamp = {Mon, 26 Jan 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/emc2/MajiMDBMM19.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/emc2/ThakkerBGDM19,
author = {Urmish Thakker and
Jesse G. Beu and
Dibakar Gope and
Ganesh Dasika and
Matthew Mattina},
title = {Run-Time Efficient {RNN} Compression for Inference on Edge Devices},
booktitle = {2nd Workshop on Energy Efficient Machine Learning and Cognitive Computing
for Embedded Applications, EMC2@HPCA 2019, Washington, DC, USA, February
17, 2019},
pages = {26--30},
publisher = {{IEEE}},
year = {2019},
url = {https://doi.org/10.1109/EMC249363.2019.00013},
doi = {10.1109/EMC249363.2019.00013},
timestamp = {Tue, 19 Jul 2022 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/emc2/ThakkerBGDM19.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/mlsys/GopeDM19,
author = {Dibakar Gope and
Ganesh Dasika and
Matthew Mattina},
editor = {Ameet Talwalkar and
Virginia Smith and
Matei Zaharia},
title = {Ternary Hybrid Neural-Tree Networks for Highly Constrained IoT Applications},
booktitle = {Proceedings of the Second Conference on Machine Learning and Systems,
SysML 2019, Stanford, CA, USA, March 31 - April 2, 2019},
publisher = {mlsys.org},
year = {2019},
url = {https://proceedings.mlsys.org/paper\_files/paper/2019/hash/4b1648906c25077d5232aa166af08eb0-Abstract.html},
timestamp = {Fri, 28 Jun 2024 10:42:01 +0200},
biburl = {https://dblp.org/rec/conf/mlsys/GopeDM19.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/ThakkerFBGZDM19,
author = {Urmish Thakker and
Igor Fedorov and
Jesse G. Beu and
Dibakar Gope and
Chu Zhou and
Ganesh Dasika and
Matthew Mattina},
title = {Pushing the limits of {RNN} Compression},
booktitle = {Fifth Workshop on Energy Efficient Machine Learning and Cognitive
Computing - NeurIPS Edition, EMC2@NeurIPS 2019, Vancouver, Canada,
December 13, 2019},
pages = {18--21},
publisher = {{IEEE}},
year = {2019},
url = {https://doi.org/10.1109/EMC2-NIPS53020.2019.00012},
doi = {10.1109/EMC2-NIPS53020.2019.00012},
timestamp = {Wed, 07 Jul 2021 15:49:53 +0200},
biburl = {https://dblp.org/rec/conf/nips/ThakkerFBGZDM19.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sensys/TaoTDB19,
author = {Jin Tao and
Urmish Thakker and
Ganesh Dasika and
Jesse G. Beu},
title = {Skipping {RNN} State Updates without Retraining the Original Model},
booktitle = {Proceedings of the 1st Workshop on Machine Learning on Edge in Sensor
Systems, SenSys-ML 2019, New York, NY, USA, November 10, 2019},
pages = {31--36},
publisher = {{ACM}},
year = {2019},
url = {https://doi.org/10.1145/3362743.3362965},
doi = {10.1145/3362743.3362965},
timestamp = {Wed, 04 Dec 2024 09:44:10 +0100},
biburl = {https://dblp.org/rec/conf/sensys/TaoTDB19.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1903-01521,
author = {Partha Maji and
Andrew Mundy and
Ganesh Dasika and
Jesse G. Beu and
Matthew Mattina and
Robert Mullins},
title = {Efficient Winograd or Cook-Toom Convolution Kernel Implementation
on Widely Used Mobile CPUs},
journal = {CoRR},
volume = {abs/1903.01521},
year = {2019},
url = {http://arxiv.org/abs/1903.01521},
eprinttype = {arXiv},
eprint = {1903.01521},
timestamp = {Mon, 26 Jan 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-1903-01521.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1903-01531,
author = {Dibakar Gope and
Ganesh Dasika and
Matthew Mattina},
title = {Ternary Hybrid Neural-Tree Networks for Highly Constrained IoT Applications},
journal = {CoRR},
volume = {abs/1903.01531},
year = {2019},
url = {http://arxiv.org/abs/1903.01531},
eprinttype = {arXiv},
eprint = {1903.01531},
timestamp = {Sat, 30 Mar 2019 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-1903-01531.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1904-03302,
author = {Urmish Thakker and
Ganesh Dasika and
Jesse G. Beu and
Matthew Mattina},
title = {Measuring scheduling efficiency of RNNs for {NLP} applications},
journal = {CoRR},
volume = {abs/1904.03302},
year = {2019},
url = {http://arxiv.org/abs/1904.03302},
eprinttype = {arXiv},
eprint = {1904.03302},
timestamp = {Sat, 23 Jan 2021 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-1904-03302.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1906-02876,
author = {Urmish Thakker and
Jesse G. Beu and
Dibakar Gope and
Chu Zhou and
Igor Fedorov and
Ganesh Dasika and
Matthew Mattina},
title = {Compressing RNNs for IoT devices by 15-38x using Kronecker Products},
journal = {CoRR},
volume = {abs/1906.02876},
year = {2019},
url = {http://arxiv.org/abs/1906.02876},
eprinttype = {arXiv},
eprint = {1906.02876},
timestamp = {Sat, 23 Jan 2021 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-1906-02876.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1906-04886,
author = {Urmish Thakker and
Jesse G. Beu and
Dibakar Gope and
Ganesh Dasika and
Matthew Mattina},
title = {Run-Time Efficient {RNN} Compression for Inference on Edge Devices},
journal = {CoRR},
volume = {abs/1906.04886},
year = {2019},
url = {http://arxiv.org/abs/1906.04886},
eprinttype = {arXiv},
eprint = {1906.04886},
timestamp = {Sat, 23 Jan 2021 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-1906-04886.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1910-02558,
author = {Urmish Thakker and
Igor Fedorov and
Jesse G. Beu and
Dibakar Gope and
Chu Zhou and
Ganesh Dasika and
Matthew Mattina},
title = {Pushing the limits of {RNN} Compression},
journal = {CoRR},
volume = {abs/1910.02558},
year = {2019},
url = {http://arxiv.org/abs/1910.02558},
eprinttype = {arXiv},
eprint = {1910.02558},
timestamp = {Sat, 23 Jan 2021 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-1910-02558.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jetc/CaoLSD18,
author = {Yu Cao and
Xin Li and
Jae{-}sun Seo and
Ganesh Dasika},
title = {Guest Editors' Introduction: Frontiers of Hardware and Algorithms
for On-chip Learning},
journal = {{ACM} J. Emerg. Technol. Comput. Syst.},
volume = {14},
number = {2},
pages = {14:1--14:2},
year = {2018},
url = {https://doi.org/10.1145/3205944},
doi = {10.1145/3205944},
timestamp = {Fri, 09 Apr 2021 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/jetc/CaoLSD18.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cf/LlewellynnFDFSP17,
author = {Tim Llewellynn and
Maria del Milagro Fern{\'{a}}ndez{-}Carrobles and
Oscar D{\'{e}}niz and
Samuel Fricker and
Amos J. Storkey and
Nuria Pazos and
Gordana Velikic and
Kirsten Leufgen and
Rozenn Dahyot and
Sebastian Koller and
Georgios I. Goumas and
Peter Leitner and
Ganesh Dasika and
Lei Wang and
Kurt Tutschku},
title = {{BONSEYES:} Platform for Open Development of Systems of Artificial
Intelligence: Invited paper},
booktitle = {Proceedings of the Computing Frontiers Conference, CF'17, Siena, Italy,
May 15-17, 2017},
pages = {299--304},
publisher = {{ACM}},
year = {2017},
url = {https://doi.org/10.1145/3075564.3076259},
doi = {10.1145/3075564.3076259},
timestamp = {Thu, 14 Oct 2021 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/cf/LlewellynnFDFSP17.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/fpga/SudaCDMMVSC16,
author = {Naveen Suda and
Vikas Chandra and
Ganesh Dasika and
Abinash Mohanty and
Yufei Ma and
Sarma B. K. Vrudhula and
Jae{-}sun Seo and
Yu Cao},
editor = {Deming Chen and
Jonathan W. Greene},
title = {Throughput-Optimized OpenCL-based {FPGA} Accelerator for Large-Scale
Convolutional Neural Networks},
booktitle = {Proceedings of the 2016 {ACM/SIGDA} International Symposium on Field-Programmable
Gate Arrays, Monterey, CA, USA, February 21-23, 2016},
pages = {16--25},
publisher = {{ACM}},
year = {2016},
url = {https://doi.org/10.1145/2847263.2847276},
doi = {10.1145/2847263.2847276},
timestamp = {Sun, 19 Jan 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/fpga/SudaCDMMVSC16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/huc/RaykovODBL16,
author = {Yordan P. Raykov and
Emre Ozer and
Ganesh Dasika and
Alexis Boukouvalas and
Max A. Little},
editor = {Paul Lukowicz and
Antonio Kr{\"{u}}ger and
Andreas Bulling and
Youn{-}Kyung Lim and
Shwetak N. Patel},
title = {Predicting room occupancy with a single passive infrared {(PIR)} sensor
through behavior extraction},
booktitle = {Proceedings of the 2016 {ACM} International Joint Conference on Pervasive
and Ubiquitous Computing, UbiComp 2016, Heidelberg, Germany, September
12-16, 2016},
pages = {1016--1027},
publisher = {{ACM}},
year = {2016},
url = {https://doi.org/10.1145/2971648.2971746},
doi = {10.1145/2971648.2971746},
timestamp = {Wed, 11 Oct 2023 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/huc/RaykovODBL16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}

manage site settings
To protect your privacy, all features that rely on external API calls from your browser are turned off by default. You need to opt-in for them to become active. All settings here will be stored as cookies with your web browser. For more information see our F.A.Q.


Google
Google Scholar
Semantic Scholar
Internet Archive Scholar
CiteSeerX
ORCID













