% references.bib: The Inference-Time Compute Frontier
% All entries verified against arXiv, OpenReview, or publisher pages as of 2026-05-10.

@article{kaplan2020scaling,
  author    = {Jared Kaplan and Sam McCandlish and Tom Henighan and Tom B. Brown and
               Chess Child and Rewon Child and Scott Gray and Alec Radford and
               Jeffrey Wu and Dario Amodei},
  title     = {Scaling Laws for Neural Language Models},
  journal   = {arXiv preprint arXiv:2001.08361},
  year      = {2020},
  url       = {https://arxiv.org/abs/2001.08361},
}

@article{hoffmann2022chinchilla,
  author    = {Jordan Hoffmann and Sebastian Borgeaud and Arthur Mensch and
               Elena Buchatskaya and Trevor Cai and Eliza Rutherford and
               Diego de las Casas and Lisa Anne Hendrycks and Johannes Welbl and
               Aidan Clark and Tom Hennigan and Eric Noland and Katie Millican and
               George van den Driessche and Bogdan Damoc and Aurelia Guy and
               Simon Osindero and Karen Simonyan and Erich Elsen and Jack W. Rae and
               Oriol Vinyals and Laurent Sifre},
  title     = {Training Compute-Optimal Large Language Models},
  journal   = {arXiv preprint arXiv:2203.15556},
  year      = {2022},
  url       = {https://arxiv.org/abs/2203.15556},
}

@article{snell2024testtime,
  author    = {Charlie Snell and Jaehoon Lee and Kelvin Xu and Aviral Kumar},
  title     = {Scaling {LLM} Test-Time Compute Optimally Can be More Effective
               than Scaling Model Parameters},
  journal   = {arXiv preprint arXiv:2408.03314},
  year      = {2024},
  url       = {https://arxiv.org/abs/2408.03314},
}

@article{brown2024monkeys,
  author    = {Bradley Brown and Jordan Juravsky and Ryan Ehrlich and Ronald Clark and
               Quoc V. Le and Christopher Re and Azalia Mirhoseini},
  title     = {Large Language Monkeys: Scaling Inference Compute with Repeated Sampling},
  journal   = {arXiv preprint arXiv:2407.21787},
  year      = {2024},
  url       = {https://arxiv.org/abs/2407.21787},
}

@article{guan2025rstarmath,
  author    = {Xinyu Guan and Li Lyna Zhang and Yifei Liu and Ning Shang and
               Youran Sun and Yi Zhu and Fan Yang and Mao Yang},
  title     = {{rStar-Math}: Small {LLMs} Can Master Math Reasoning with
               Self-Evolved Deep Thinking},
  journal   = {arXiv preprint arXiv:2501.04519},
  year      = {2025},
  url       = {https://arxiv.org/abs/2501.04519},
}

@article{deepseek2025r1,
  author    = {{DeepSeek-AI}},
  title     = {{DeepSeek-R1}: Incentivizing Reasoning Capability in {LLMs} via
               Reinforcement Learning},
  journal   = {arXiv preprint arXiv:2501.12948},
  year      = {2025},
  url       = {https://arxiv.org/abs/2501.12948},
}

@inproceedings{erol2026costofpass,
  author    = {Umutcan Erol and Jad El and Mirac Suzgun and Mert Yuksekgonul
               and James Zou},
  title     = {The Cost of Being Right: Evaluating Language Models by the
               Cost-of-Pass},
  booktitle = {International Conference on Learning Representations},
  year      = {2026},
  url       = {https://openreview.net/forum?id=vC9S20zsgN},
}

@article{lightman2023prm800k,
  author    = {Hunter Lightman and Vineet Kosaraju and Yura Burda and Harri Edwards and
               Bowen Baker and Teddy Lee and Jan Leike and John Schulman and
               Ilya Sutskever and Karl Cobbe},
  title     = {Let's Verify Step by Step},
  journal   = {arXiv preprint arXiv:2305.20050},
  year      = {2023},
  url       = {https://arxiv.org/abs/2305.20050},
}

@article{cobbe2021gsm8k,
  author    = {Karl Cobbe and Vineet Kosaraju and Mohammad Bavarian and Mark Chen and
               Heewoo Jun and Lukasz Kaiser and Matthias Plappert and Jerry Tworek and
               Jacob Hilton and Reiichiro Nakano and Christopher Hesse and
               John Schulman},
  title     = {Training Verifiers to Solve Math Word Problems},
  journal   = {arXiv preprint arXiv:2110.14168},
  year      = {2021},
  url       = {https://arxiv.org/abs/2110.14168},
}

@techreport{aiindex2025,
  author      = {{Stanford Human-Centered AI Institute}},
  title       = {{AI} Index Report 2025},
  institution = {Stanford University},
  year        = {2025},
  url         = {https://hai.stanford.edu/ai-index/2025-ai-index-report},
}

@article{liao2025priceofprogress,
  author    = {Neil Thompson and Sukwoong Choi and Yunjie (Grace) Liao},
  title     = {The Price of Progress: Tracking the Declining Cost of Computing,
               {AI}, and Other Transformative Technologies},
  journal   = {arXiv preprint arXiv:2511.23455},
  year      = {2025},
  url       = {https://arxiv.org/abs/2511.23455},
}

@misc{bhardwaj2026inference,
  author       = {Manu Bhardwaj},
  title        = {The Inference Stack in 2026.
                  A Field Note on Token Economics, Runtime Systems, and Model Architecture},
  howpublished = {Field Notes \#1, ifitsmanu.com},
  year         = {2026},
  month        = {May},
  url          = {https://ifitsmanu.com/papers/the-inference-stack-2026},
}

@misc{bhardwaj2026costcorrect,
  author       = {Manu Bhardwaj},
  title        = {The Cost of Being Right. Verification Economics in 2026},
  howpublished = {Field Notes \#2, ifitsmanu.com},
  year         = {2026},
  month        = {May},
  url          = {https://ifitsmanu.com/papers/the-cost-of-being-right},
}

@misc{bhardwaj2026alpha,
  author       = {Manu Bhardwaj},
  title        = {The {$\alpha$} Asymmetry. Why Verifiers Can Be Smaller Than Generators},
  howpublished = {Field Notes \#3, ifitsmanu.com},
  year         = {2026},
  month        = {May},
  url          = {https://ifitsmanu.com/papers/the-alpha-asymmetry},
}
