references.bib

@misc{song2023ds4sci,
    title = {DeepSpeed4Science Initiative: Enabling Large-Scale Scientific
             Discovery through Sophisticated AI System Technologies},
    author = {Shuaiwen Leon Song and Bonnie Kruft and Minjia Zhang and Conglong
              Li and Shiyang Chen and Chengming Zhang and Masahiro Tanaka and
              Xiaoxia Wu and Jeff Rasley and Ammar Ahmad Awan and Connor Holmes
              and Martin Cai and Adam Ghanem and Zhongzhu Zhou and Yuxiong He and
              Pete Luferenko and Divya Kumar and Jonathan Weyn and Ruixiong Zhang
              and Sylwester Klocek and Volodymyr Vragov and Mohammed AlQuraishi
              and Gustaf Ahdritz and Christina Floristean and Cristina Negri and
              Rao Kotamarthi and Venkatram Vishwanath and Arvind Ramanathan and
              Sam Foreman and Kyle Hippe and Troy Arcomano and Romit Maulik and
              Maxim Zvyagin and Alexander Brace and Bin Zhang and Cindy Orozco
              Bohorquez and Austin Clyde and Bharat Kale and Danilo Perez-Rivera
              and Heng Ma and Carla M. Mann and Michael Irvin and J. Gregory
              Pauloski and Logan Ward and Valerie Hayot and Murali Emani and Zhen
              Xie and Diangen Lin and Maulik Shukla and Ian Foster and James J.
              Davis and Michael E. Papka and Thomas Brettin and Prasanna
              Balaprakash and Gina Tourassi and John Gounley and Heidi Hanson and
              Thomas E Potok and Massimiliano Lupo Pasini and Kate Evans and Dan
              Lu and Dalton Lunga and Junqi Yin and Sajal Dash and Feiyi Wang and
              Mallikarjun Shankar and Isaac Lyngaas and Xiao Wang and Guojing
              Cong and Pei Zhang and Ming Fan and Siyan Liu and Adolfy Hoisie and
              Shinjae Yoo and Yihui Ren and William Tang and Kyle Felker and
              Alexey Svyatkovskiy and Hang Liu and Ashwin Aji and Angela Dalton
              and Michael Schulte and Karl Schulz and Yuntian Deng and Weili Nie
              and Josh Romero and Christian Dallago and Arash Vahdat and Chaowei
              Xiao and Thomas Gibbs and Anima Anandkumar and Rick Stevens},
    year = {2023},
    eprint = {2310.04610},
    archivePrefix = {arXiv},
    primaryClass = {cs.AI},
    url = {https://arxiv.org/abs/2310.04610},
}

@misc{wei2022emergentabilitieslargelanguage,
    title = {Emergent Abilities of Large Language Models},
    author = {Jason Wei and Yi Tay and Rishi Bommasani and Colin Raffel and
              Barret Zoph and Sebastian Borgeaud and Dani Yogatama and Maarten
              Bosma and Denny Zhou and Donald Metzler and Ed H. Chi and Tatsunori
              Hashimoto and Oriol Vinyals and Percy Liang and Jeff Dean and
              William Fedus},
    year = {2022},
    eprint = {2206.07682},
    archivePrefix = {arXiv},
    primaryClass = {cs.CL},
    url = {https://arxiv.org/abs/2206.07682},
}

@misc{Burdi:2023climrr,
    title = {The Climate Risk & Resilience Portal (ClimRR) Metadata and Data
             Dictionary},
    author = "Burdi, C. and Branham, J., Wall. T",
    year = "2023",
    note = {Available at \url{
            https://anl.app.box.com/s/hmkkgkrkzxxocfe9kpgrzk2gfc4gizp8/file/1055145398460
            }},
    url = {https://dub.sh/ClimRR-Metadata},
}
@misc{wittig2023progress,
    title = {Progress on $(g-2)_\mu$ from Lattice QCD},
    author = {Hartmut Wittig},
    year = {2023},
    eprint = {2306.04165},
    archivePrefix = {arXiv},
    primaryClass = {hep-ph},
}
@article{Duane:1987de,
    author = "Duane, S. and Kennedy, A. D. and Pendleton, B. J. and Roweth, D.",
    title = "{Hybrid Monte Carlo}",
    doi = "10.1016/0370-2693(87)91197-X",
    journal = "Phys. Lett. B",
    volume = "195",
    pages = "216--222",
    year = "1987",
}

@article{Shanahan:2022ifi,
    author = "Shanahan, Phiala and others",
    title = "{Snowmass 2021 Computational Frontier CompF03 Topical Group Report:
             Machine Learning}",
    eprint = "2209.07559",
    archivePrefix = "arXiv",
    primaryClass = "physics.comp-ph",
    reportNumber = "FERMILAB-CONF-22-719-ND-PPD-QIS-SCD",
    month = "9",
    year = "2022",
}

@inproceedings{Boyda:2022nmh,
    author = "Boyda, Denis and others",
    title = "{Applications of Machine Learning to Lattice Quantum Field Theory}",
    booktitle = "{Snowmass 2021}",
    eprint = "2202.05838",
    archivePrefix = "arXiv",
    primaryClass = "hep-lat",
    reportNumber = "MIT-CTP/5405",
    month = "2",
    year = "2022",
}

@article{Foreman:2021ljl,
    author = "Foreman, Sam and Izubuchi, Taku and Jin, Luchang and Jin,
              Xiao-Yong and Osborn, James C. and Tomiya, Akio",
    title = "{HMC with Normalizing Flows}",
    eprint = "2112.01586",
    archivePrefix = "arXiv",
    primaryClass = "cs.LG",
    doi = "10.22323/1.396.0073",
    journal = "PoS",
    volume = "LATTICE2021",
    pages = "073",
    year = "2022",
}

@article{Foreman:2021rhs,
    author = "Foreman, Sam and Jin, Xiao-Yong and Osborn, James C.",
    title = "{LeapfrogLayers: A Trainable Framework for Effective Topological
             Sampling}",
    eprint = "2112.01582",
    archivePrefix = "arXiv",
    primaryClass = "hep-lat",
    doi = "10.22323/1.396.0508",
    journal = "PoS",
    volume = "LATTICE2021",
    pages = "508",
    year = "2022",
}

@inproceedings{Foreman:2021ixr,
    author = "Foreman, Sam and Jin, Xiao-Yong and Osborn, James C.",
    title = "{Deep Learning Hamiltonian Monte Carlo}",
    booktitle = "{9th International Conference on Learning Representations}",
    eprint = "2105.03418",
    archivePrefix = "arXiv",
    primaryClass = "hep-lat",
    month = "5",
    year = "2021",
}

@online{foreman2023climate,
    author = {Foreman, Sam},
    title = {Energy {Justice} {Analysis} of {Climate} {Data} with {ClimRR}},
    date = {2023-08-07},
    url = {https://saforem2.github.io/climate-analysis},
    langid = {en},
}

@misc{foreman2023-l2hmcqcd,
    author = {Foreman, Sam},
    date = {2023-08-19},
    url = {https://saforem2.github.io/l2hmc-qcd},
    langid = {en},
}

@misc{foreman2021deep,
    title = {Deep Learning Hamiltonian Monte Carlo},
    author = {Sam Foreman and Xiao-Yong Jin and James C. Osborn},
    year = {2021},
    eprint = {2105.03418},
    archivePrefix = {arXiv},
    primaryClass = {hep-lat},
}

@inproceedings{foreman2023mlmc,
    title = {MLMC: Machine Learning Monte Carlo for Lattice Gauge Theory},
    author = {Foreman, Sam and Jin, Xiao-Yong and Osborn, James},
    booktitle = {40th International Symposium on Lattice Field Theory (Lattice
                 2023) (Batavia, IL, United States, 07/31/2023 - 08/04/2023)},
    year = {},
    editor = {},
    volume = {},
    number = {},
    series = {},
    pages = {},
    address = {},
    month = {},
    publisher = {},
    note = {, , },
    crossref = {},
}

@misc{wittig2023progress,
    title = {Progress on $(g-2)_\mu$ from Lattice QCD},
    author = {Hartmut Wittig},
    year = {2023},
    eprint = {2306.04165},
    archivePrefix = {arXiv},
    primaryClass = {hep-ph},
}
@article{Duane:1987de,
    author = "Duane, S. and Kennedy, A. D. and Pendleton, B. J. and Roweth, D.",
    title = "{Hybrid Monte Carlo}",
    doi = "10.1016/0370-2693(87)91197-X",
    journal = "Phys. Lett. B",
    volume = "195",
    pages = "216--222",
    year = "1987",
}

@article{Shanahan:2022ifi,
    author = "Shanahan, Phiala and others",
    title = "{Snowmass 2021 Computational Frontier CompF03 Topical Group Report:
             Machine Learning}",
    eprint = "2209.07559",
    archivePrefix = "arXiv",
    primaryClass = "physics.comp-ph",
    reportNumber = "FERMILAB-CONF-22-719-ND-PPD-QIS-SCD",
    month = "9",
    year = "2022",
}

@inproceedings{Boyda:2022nmh,
    author = "Boyda, Denis and others",
    title = "{Applications of Machine Learning to Lattice Quantum Field Theory}",
    booktitle = "{Snowmass 2021}",
    eprint = "2202.05838",
    archivePrefix = "arXiv",
    primaryClass = "hep-lat",
    reportNumber = "MIT-CTP/5405",
    month = "2",
    year = "2022",
}

@article{Foreman:2021rhs,
    author = "Foreman, Sam and Jin, Xiao-Yong and Osborn, James C.",
    title = "{LeapfrogLayers: A Trainable Framework for Effective Topological
             Sampling}",
    eprint = "2112.01582",
    archivePrefix = "arXiv",
    primaryClass = "hep-lat",
    doi = "10.22323/1.396.0508",
    journal = "PoS",
    volume = "LATTICE2021",
    pages = "508",
    month = "05",
    year = "2022",
}


@article{Foreman:2021ljl,
    author = "Foreman, Sam and Izubuchi, Taku and Jin, Luchang and Jin,
              Xiao-Yong and Osborn, James C. and Tomiya, Akio",
    title = "{HMC with Normalizing Flows}",
    eprint = "2112.01586",
    archivePrefix = "arXiv",
    primaryClass = "cs.LG",
    doi = "10.22323/1.396.0073",
    journal = "PoS",
    volume = "LATTICE2021",
    pages = "073",
    year = "2022",
}

@inproceedings{Foreman:2021ixr,
    author = "Foreman, Sam and Jin, Xiao-Yong and Osborn, James C.",
    title = "{Deep Learning Hamiltonian Monte Carlo}",
    booktitle = "{9th International Conference on Learning Representations}",
    eprint = "2105.03418",
    archivePrefix = "arXiv",
    primaryClass = "hep-lat",
    month = "5",
    year = "2021",
}

https://towardsdatascience.com/mastering-language-models-32e1d891511a
@misc{Montgomery_2023,
    title = {Mastering language models},
    url = {https://towardsdatascience.com/mastering-language-models-32e1d891511a
           },
    journal = {Medium},
    publisher = {Towards Data Science},
    author = {Montgomery, Samuel},
    year = {2023},
    month = {Oct},
}

@misc{yang2023harnessing,
    title = {Harnessing the Power of LLMs in Practice: A Survey on ChatGPT and
             Beyond},
    author = {Jingfeng Yang and Hongye Jin and Ruixiang Tang and Xiaotian Han
              and Qizhang Feng and Haoming Jiang and Bing Yin and Xia Hu},
    year = {2023},
    eprint = {2304.13712},
    archivePrefix = {arXiv},
    primaryClass = {cs.CL},
}

@article{Popel_2018,
    doi = {10.2478/pralin-2018-0002},
    url = {https://doi.org/10.2478%2Fpralin-2018-0002},
    year = 2018,
    month = {apr},
    publisher = {Charles University in Prague, Karolinum Press},
    volume = {110},
    number = {1},
    pages = {43--70},
    author = {Martin Popel and Ond{\v{r}}ej Bojar},
    title = {Training Tips for the Transformer Model},
    journal = {The Prague Bulletin of Mathematical Linguistics},
}
@misc{vaswani2017attention,
    title = {Attention Is All You Need},
    author = {Ashish Vaswani and Noam Shazeer and Niki Parmar and Jakob
              Uszkoreit and Llion Jones and Aidan N. Gomez and Lukasz Kaiser and
              Illia Polosukhin},
    year = {2017},
    eprint = {1706.03762},
    archivePrefix = {arXiv},
    primaryClass = {cs.CL},
}

@misc{yao2023tree,
    title = {Tree of Thoughts: Deliberate Problem Solving with Large Language
             Models},
    author = {Shunyu Yao and Dian Yu and Jeffrey Zhao and Izhak Shafran and
              Thomas L. Griffiths and Yuan Cao and Karthik Narasimhan},
    year = {2023},
    eprint = {2305.10601},
    archivePrefix = {arXiv},
    primaryClass = {cs.CL},
}

@article{Zvyagin2022.10.10.511571,
    author = {Maxim Zvyagin and Alexander Brace and Kyle Hippe and Yuntian Deng
              and Bin Zhang and Cindy Orozco Bohorquez and Austin Clyde and
              Bharat Kale and Danilo Perez-Rivera and Heng Ma and Carla M. Mann
              and Michael Irvin and J. Gregory Pauloski and Logan Ward and
              Valerie Hayot-Sasson and Murali Emani and Sam Foreman and Zhen Xie
              and Diangen Lin and Maulik Shukla and Weili Nie and Josh Romero and
              Christian Dallago and Arash Vahdat and Chaowei Xiao and Thomas
              Gibbs and Ian Foster and James J. Davis and Michael E. Papka and
              Thomas Brettin and Rick Stevens and Anima Anandkumar and Venkatram
              Vishwanath and Arvind Ramanathan},
    title = {GenSLMs: Genome-scale language models reveal SARS-CoV-2
             evolutionary dynamics},
    elocation-id = {2022.10.10.511571},
    year = {2022},
    doi = {10.1101/2022.10.10.511571},
    publisher = {Cold Spring Harbor Laboratory},
    abstract = {We seek to transform how new and emergent variants of
                pandemiccausing viruses, specifically SARS-CoV-2, are identified
                and classified. By adapting large language models (LLMs) for
                genomic data, we build genome-scale language models (GenSLMs)
                which can learn the evolutionary landscape of SARS-CoV-2 genomes.
                By pretraining on over 110 million prokaryotic gene sequences and
                finetuning a SARS-CoV-2-specific model on 1.5 million genomes, we
                show that GenSLMs can accurately and rapidly identify variants of
                concern. Thus, to our knowledge, GenSLMs represents one of the
                first whole genome scale foundation models which can generalize
                to other prediction tasks. We demonstrate scaling of GenSLMs on
                GPU-based supercomputers and AI-hardware accelerators utilizing
                1.63 Zettaflops in training runs with a sustained performance of
                121 PFLOPS in mixed precision and peak of 850 PFLOPS. We present
                initial scientific insights from examining GenSLMs in tracking
                evolutionary dynamics of SARS-CoV-2, paving the path to realizing
                this on large biological data.Competing Interest StatementThe
                authors have declared no competing interest.},
    URL = {https://www.biorxiv.org/content/early/2022/11/23/2022.10.10.511571},
    eprint = {
              https://www.biorxiv.org/content/early/2022/11/23/2022.10.10.511571.full.pdf
              },
    journal = {bioRxiv},
}