BibTeX records: Ganesh Dasika

download as .bib file

@article{DBLP:journals/corr/abs-2508-20258,
  author       = {Arya Tschand and
                  Muhammad A. Awad and
                  Ryan Swann and
                  Kesavan Ramakrishnan and
                  Jeffrey Ma and
                  Keith Lowery and
                  Ganesh Dasika and
                  Vijay Janapa Reddi},
  title        = {SwizzlePerf: Hardware-Aware LLMs for {GPU} Kernel Performance Optimization},
  journal      = {CoRR},
  volume       = {abs/2508.20258},
  year         = {2025},
  url          = {https://doi.org/10.48550/arXiv.2508.20258},
  doi          = {10.48550/ARXIV.2508.20258},
  eprinttype    = {arXiv},
  eprint       = {2508.20258},
  timestamp    = {Mon, 22 Sep 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2508-20258.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2511-02132,
  author       = {Mansi Choudhary and
                  Karthik Sangaiah and
                  Sonali Singh and
                  Muhammad Osama and
                  Lisa Wu Wills and
                  Ganesh Dasika},
  title        = {Optimizing Attention on GPUs by Exploiting {GPU} Architectural {NUMA}
                  Effects},
  journal      = {CoRR},
  volume       = {abs/2511.02132},
  year         = {2025},
  url          = {https://doi.org/10.48550/arXiv.2511.02132},
  doi          = {10.48550/ARXIV.2511.02132},
  eprinttype    = {arXiv},
  eprint       = {2511.02132},
  timestamp    = {Sun, 23 Nov 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2511-02132.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2511-02168,
  author       = {Octavian Alexandru Trifan and
                  Karthik Sangaiah and
                  Muhammad Awad and
                  Muhammad Osama and
                  Sumanth Gudaparthi and
                  Alexandru Nicolau and
                  Alexander V. Veidenbaum and
                  Ganesh Dasika},
  title        = {Eliminating Multi-GPU Performance Taxes: {A} Systems Approach to Efficient
                  Distributed LLMs},
  journal      = {CoRR},
  volume       = {abs/2511.02168},
  year         = {2025},
  url          = {https://doi.org/10.48550/arXiv.2511.02168},
  doi          = {10.48550/ARXIV.2511.02168},
  eprinttype    = {arXiv},
  eprint       = {2511.02168},
  timestamp    = {Sun, 01 Feb 2026 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2511-02168.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2512-04226,
  author       = {Ryan Swann and
                  Muhammad Osama and
                  Xiaohu Guo and
                  Bryant Nelson and
                  Lixun Zhang and
                  Alex Brown and
                  Yen Ong and
                  Ali Yazdani and
                  Sean Siddens and
                  Ganesh Dasika and
                  Alex Underwood},
  title        = {tritonBLAS: Triton-based Analytical Approach for {GEMM} Kernel Parameter
                  Selection},
  journal      = {CoRR},
  volume       = {abs/2512.04226},
  year         = {2025},
  url          = {https://doi.org/10.48550/arXiv.2512.04226},
  doi          = {10.48550/ARXIV.2512.04226},
  eprinttype    = {arXiv},
  eprint       = {2512.04226},
  timestamp    = {Fri, 23 Jan 2026 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2512-04226.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jetc/ThakkerFZGMDB21,
  author       = {Urmish Thakker and
                  Igor Fedorov and
                  Chu Zhou and
                  Dibakar Gope and
                  Matthew Mattina and
                  Ganesh Dasika and
                  Jesse G. Beu},
  title        = {Compressing RNNs to Kilobyte Budget for IoT Devices Using Kronecker
                  Products},
  journal      = {{ACM} J. Emerg. Technol. Comput. Syst.},
  volume       = {17},
  number       = {4},
  pages        = {46:1--46:18},
  year         = {2021},
  url          = {https://doi.org/10.1145/3440016},
  doi          = {10.1145/3440016},
  timestamp    = {Mon, 11 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jetc/ThakkerFZGMDB21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/emnlp/ThakkerBGDM20,
  author       = {Urmish Thakker and
                  Jesse G. Beu and
                  Dibakar Gope and
                  Ganesh Dasika and
                  Matthew Mattina},
  editor       = {Nafise Sadat Moosavi and
                  Angela Fan and
                  Vered Shwartz and
                  Goran Glavas and
                  Shafiq R. Joty and
                  Alex Wang and
                  Thomas Wolf},
  title        = {Rank and run-time aware compression of {NLP} Applications},
  booktitle    = {Proceedings of SustaiNLP: Workshop on Simple and Efficient Natural
                  Language Processing, SustaiNLP@EMNLP 2020, Online, November 20, 2020},
  pages        = {8--18},
  publisher    = {Association for Computational Linguistics},
  year         = {2020},
  url          = {https://doi.org/10.18653/v1/2020.sustainlp-1.2},
  doi          = {10.18653/V1/2020.SUSTAINLP-1.2},
  timestamp    = {Thu, 22 Sep 2022 17:53:15 +0200},
  biburl       = {https://dblp.org/rec/conf/emnlp/ThakkerBGDM20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2010-03193,
  author       = {Urmish Thakker and
                  Jesse G. Beu and
                  Dibakar Gope and
                  Ganesh Dasika and
                  Matthew Mattina},
  title        = {Rank and run-time aware compression of {NLP} Applications},
  journal      = {CoRR},
  volume       = {abs/2010.03193},
  year         = {2020},
  url          = {https://arxiv.org/abs/2010.03193},
  eprinttype    = {arXiv},
  eprint       = {2010.03193},
  timestamp    = {Tue, 13 Oct 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2010-03193.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/emc2/MajiMDBMM19,
  author       = {Partha Maji and
                  Andrew Mundy and
                  Ganesh Dasika and
                  Jesse G. Beu and
                  Matthew Mattina and
                  Robert Mullins},
  title        = {Efficient Winograd or Cook-Toom Convolution Kernel Implementation
                  on Widely Used Mobile CPUs},
  booktitle    = {2nd Workshop on Energy Efficient Machine Learning and Cognitive Computing
                  for Embedded Applications, EMC2@HPCA 2019, Washington, DC, USA, February
                  17, 2019},
  pages        = {1--5},
  publisher    = {{IEEE}},
  year         = {2019},
  url          = {https://doi.org/10.1109/EMC249363.2019.00008},
  doi          = {10.1109/EMC249363.2019.00008},
  timestamp    = {Mon, 26 Jan 2026 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/emc2/MajiMDBMM19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/emc2/ThakkerBGDM19,
  author       = {Urmish Thakker and
                  Jesse G. Beu and
                  Dibakar Gope and
                  Ganesh Dasika and
                  Matthew Mattina},
  title        = {Run-Time Efficient {RNN} Compression for Inference on Edge Devices},
  booktitle    = {2nd Workshop on Energy Efficient Machine Learning and Cognitive Computing
                  for Embedded Applications, EMC2@HPCA 2019, Washington, DC, USA, February
                  17, 2019},
  pages        = {26--30},
  publisher    = {{IEEE}},
  year         = {2019},
  url          = {https://doi.org/10.1109/EMC249363.2019.00013},
  doi          = {10.1109/EMC249363.2019.00013},
  timestamp    = {Tue, 19 Jul 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/emc2/ThakkerBGDM19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/mlsys/GopeDM19,
  author       = {Dibakar Gope and
                  Ganesh Dasika and
                  Matthew Mattina},
  editor       = {Ameet Talwalkar and
                  Virginia Smith and
                  Matei Zaharia},
  title        = {Ternary Hybrid Neural-Tree Networks for Highly Constrained IoT Applications},
  booktitle    = {Proceedings of the Second Conference on Machine Learning and Systems,
                  SysML 2019, Stanford, CA, USA, March 31 - April 2, 2019},
  publisher    = {mlsys.org},
  year         = {2019},
  url          = {https://proceedings.mlsys.org/paper\_files/paper/2019/hash/4b1648906c25077d5232aa166af08eb0-Abstract.html},
  timestamp    = {Fri, 28 Jun 2024 10:42:01 +0200},
  biburl       = {https://dblp.org/rec/conf/mlsys/GopeDM19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/ThakkerFBGZDM19,
  author       = {Urmish Thakker and
                  Igor Fedorov and
                  Jesse G. Beu and
                  Dibakar Gope and
                  Chu Zhou and
                  Ganesh Dasika and
                  Matthew Mattina},
  title        = {Pushing the limits of {RNN} Compression},
  booktitle    = {Fifth Workshop on Energy Efficient Machine Learning and Cognitive
                  Computing - NeurIPS Edition, EMC2@NeurIPS 2019, Vancouver, Canada,
                  December 13, 2019},
  pages        = {18--21},
  publisher    = {{IEEE}},
  year         = {2019},
  url          = {https://doi.org/10.1109/EMC2-NIPS53020.2019.00012},
  doi          = {10.1109/EMC2-NIPS53020.2019.00012},
  timestamp    = {Wed, 07 Jul 2021 15:49:53 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/ThakkerFBGZDM19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sensys/TaoTDB19,
  author       = {Jin Tao and
                  Urmish Thakker and
                  Ganesh Dasika and
                  Jesse G. Beu},
  title        = {Skipping {RNN} State Updates without Retraining the Original Model},
  booktitle    = {Proceedings of the 1st Workshop on Machine Learning on Edge in Sensor
                  Systems, SenSys-ML 2019, New York, NY, USA, November 10, 2019},
  pages        = {31--36},
  publisher    = {{ACM}},
  year         = {2019},
  url          = {https://doi.org/10.1145/3362743.3362965},
  doi          = {10.1145/3362743.3362965},
  timestamp    = {Wed, 04 Dec 2024 09:44:10 +0100},
  biburl       = {https://dblp.org/rec/conf/sensys/TaoTDB19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1903-01521,
  author       = {Partha Maji and
                  Andrew Mundy and
                  Ganesh Dasika and
                  Jesse G. Beu and
                  Matthew Mattina and
                  Robert Mullins},
  title        = {Efficient Winograd or Cook-Toom Convolution Kernel Implementation
                  on Widely Used Mobile CPUs},
  journal      = {CoRR},
  volume       = {abs/1903.01521},
  year         = {2019},
  url          = {http://arxiv.org/abs/1903.01521},
  eprinttype    = {arXiv},
  eprint       = {1903.01521},
  timestamp    = {Mon, 26 Jan 2026 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1903-01521.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1903-01531,
  author       = {Dibakar Gope and
                  Ganesh Dasika and
                  Matthew Mattina},
  title        = {Ternary Hybrid Neural-Tree Networks for Highly Constrained IoT Applications},
  journal      = {CoRR},
  volume       = {abs/1903.01531},
  year         = {2019},
  url          = {http://arxiv.org/abs/1903.01531},
  eprinttype    = {arXiv},
  eprint       = {1903.01531},
  timestamp    = {Sat, 30 Mar 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1903-01531.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1904-03302,
  author       = {Urmish Thakker and
                  Ganesh Dasika and
                  Jesse G. Beu and
                  Matthew Mattina},
  title        = {Measuring scheduling efficiency of RNNs for {NLP} applications},
  journal      = {CoRR},
  volume       = {abs/1904.03302},
  year         = {2019},
  url          = {http://arxiv.org/abs/1904.03302},
  eprinttype    = {arXiv},
  eprint       = {1904.03302},
  timestamp    = {Sat, 23 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1904-03302.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1906-02876,
  author       = {Urmish Thakker and
                  Jesse G. Beu and
                  Dibakar Gope and
                  Chu Zhou and
                  Igor Fedorov and
                  Ganesh Dasika and
                  Matthew Mattina},
  title        = {Compressing RNNs for IoT devices by 15-38x using Kronecker Products},
  journal      = {CoRR},
  volume       = {abs/1906.02876},
  year         = {2019},
  url          = {http://arxiv.org/abs/1906.02876},
  eprinttype    = {arXiv},
  eprint       = {1906.02876},
  timestamp    = {Sat, 23 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1906-02876.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1906-04886,
  author       = {Urmish Thakker and
                  Jesse G. Beu and
                  Dibakar Gope and
                  Ganesh Dasika and
                  Matthew Mattina},
  title        = {Run-Time Efficient {RNN} Compression for Inference on Edge Devices},
  journal      = {CoRR},
  volume       = {abs/1906.04886},
  year         = {2019},
  url          = {http://arxiv.org/abs/1906.04886},
  eprinttype    = {arXiv},
  eprint       = {1906.04886},
  timestamp    = {Sat, 23 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1906-04886.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1910-02558,
  author       = {Urmish Thakker and
                  Igor Fedorov and
                  Jesse G. Beu and
                  Dibakar Gope and
                  Chu Zhou and
                  Ganesh Dasika and
                  Matthew Mattina},
  title        = {Pushing the limits of {RNN} Compression},
  journal      = {CoRR},
  volume       = {abs/1910.02558},
  year         = {2019},
  url          = {http://arxiv.org/abs/1910.02558},
  eprinttype    = {arXiv},
  eprint       = {1910.02558},
  timestamp    = {Sat, 23 Jan 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1910-02558.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jetc/CaoLSD18,
  author       = {Yu Cao and
                  Xin Li and
                  Jae{-}sun Seo and
                  Ganesh Dasika},
  title        = {Guest Editors' Introduction: Frontiers of Hardware and Algorithms
                  for On-chip Learning},
  journal      = {{ACM} J. Emerg. Technol. Comput. Syst.},
  volume       = {14},
  number       = {2},
  pages        = {14:1--14:2},
  year         = {2018},
  url          = {https://doi.org/10.1145/3205944},
  doi          = {10.1145/3205944},
  timestamp    = {Fri, 09 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jetc/CaoLSD18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cf/LlewellynnFDFSP17,
  author       = {Tim Llewellynn and
                  Maria del Milagro Fern{\'{a}}ndez{-}Carrobles and
                  Oscar D{\'{e}}niz and
                  Samuel Fricker and
                  Amos J. Storkey and
                  Nuria Pazos and
                  Gordana Velikic and
                  Kirsten Leufgen and
                  Rozenn Dahyot and
                  Sebastian Koller and
                  Georgios I. Goumas and
                  Peter Leitner and
                  Ganesh Dasika and
                  Lei Wang and
                  Kurt Tutschku},
  title        = {{BONSEYES:} Platform for Open Development of Systems of Artificial
                  Intelligence: Invited paper},
  booktitle    = {Proceedings of the Computing Frontiers Conference, CF'17, Siena, Italy,
                  May 15-17, 2017},
  pages        = {299--304},
  publisher    = {{ACM}},
  year         = {2017},
  url          = {https://doi.org/10.1145/3075564.3076259},
  doi          = {10.1145/3075564.3076259},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/cf/LlewellynnFDFSP17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/fpga/SudaCDMMVSC16,
  author       = {Naveen Suda and
                  Vikas Chandra and
                  Ganesh Dasika and
                  Abinash Mohanty and
                  Yufei Ma and
                  Sarma B. K. Vrudhula and
                  Jae{-}sun Seo and
                  Yu Cao},
  editor       = {Deming Chen and
                  Jonathan W. Greene},
  title        = {Throughput-Optimized OpenCL-based {FPGA} Accelerator for Large-Scale
                  Convolutional Neural Networks},
  booktitle    = {Proceedings of the 2016 {ACM/SIGDA} International Symposium on Field-Programmable
                  Gate Arrays, Monterey, CA, USA, February 21-23, 2016},
  pages        = {16--25},
  publisher    = {{ACM}},
  year         = {2016},
  url          = {https://doi.org/10.1145/2847263.2847276},
  doi          = {10.1145/2847263.2847276},
  timestamp    = {Sun, 19 Jan 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/fpga/SudaCDMMVSC16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/huc/RaykovODBL16,
  author       = {Yordan P. Raykov and
                  Emre Ozer and
                  Ganesh Dasika and
                  Alexis Boukouvalas and
                  Max A. Little},
  editor       = {Paul Lukowicz and
                  Antonio Kr{\"{u}}ger and
                  Andreas Bulling and
                  Youn{-}Kyung Lim and
                  Shwetak N. Patel},
  title        = {Predicting room occupancy with a single passive infrared {(PIR)} sensor
                  through behavior extraction},
  booktitle    = {Proceedings of the 2016 {ACM} International Joint Conference on Pervasive
                  and Ubiquitous Computing, UbiComp 2016, Heidelberg, Germany, September
                  12-16, 2016},
  pages        = {1016--1027},
  publisher    = {{ACM}},
  year         = {2016},
  url          = {https://doi.org/10.1145/2971648.2971746},
  doi          = {10.1145/2971648.2971746},
  timestamp    = {Wed, 11 Oct 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/huc/RaykovODBL16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}