This paper introduces CodeGemma, a collection of specialized open code models built on top of Gemma, capable of a variety of code and natural language generation tasks. We release three model variants. CodeGemma 7B pretrained (PT) and instruction-tuned (IT) variants have remarkably resilient natural language understanding, excel in mathematical reasoning, and match code capabilities of other open models. CodeGemma 2B is a state-of-the-art code completion model designed for fast code infilling and open-ended generation in latency-sensitive settings.
@article{arxiv.2406.11409,
title = {CodeGemma: Open Code Models Based on Gemma},
author = {CodeGemma Team and Heri Zhao and Jeffrey Hui and Joshua Howland and Nam Nguyen and Siqi Zuo and Andrea Hu and Christopher A. Choquette-Choo and Jingyue Shen and Joe Kelley and Kshitij Bansal and Luke Vilnis and Mateo Wirth and Paul Michel and Peter Choy and Pratik Joshi and Ravin Kumar and Sarmad Hashmi and Shubham Agrawal and Zhitao Gong and Jane Fine and Tris Warkentin and Ale Jakse Hartman and Bin Ni and Kathy Korevec and Kelly Schaefer and Scott Huffman},
journal= {arXiv preprint arXiv:2406.11409},
year = {2024}
}