Large language models (LLMs) are demonstrating significant promise as an alternate strategy to facilitate analyses and optimizations of high-performance computing programs, circumventing the need for resource-intensive manual tool creation. In this paper, we explore a novel LLM-based data race detection approach combining prompting engineering and fine-tuning techniques. We create a dedicated dataset named DRB-ML, which is derived from DataRaceBench, with fine-grain labels showing the presence of data race pairs and their associated variables, line numbers, and read/write information. DRB-ML is then used to evaluate representative LLMs and fine-tune open-source ones. Our experiment shows that LLMs can be a viable approach to data race detection. However, they still cannot compete with traditional data race detection tools when we need detailed information about variable pairs causing data races.
@article{arxiv.2308.07505,
title = {Data Race Detection Using Large Language Models},
author = {Le Chen and Xianzhong Ding and Murali Emani and Tristan Vanderbruggen and Pei-hung Lin and Chuanhua Liao},
journal= {arXiv preprint arXiv:2308.07505},
year = {2023}
}