Language models are increasingly being deployed as user simulators, but their memory is far more reliable than that of real users. To measure this gap, we run a series of classic memory experiments from psychology on both humans and language models. Across tasks, we find that out-of-the-box language models exhibit better memory than humans, even when prompted to imitate human behavior. We then show that better prompting strategies and the use of a compactor can cause language models to forget content in a more human-like way. Using these methods, we show preliminary evidence that language models with human-like memory constraints can function as more effective user simulators in a downstream education task. Finally, we release human reference data and benchmarks to support future work on simulating human memory with language models.
@article{arxiv.2605.25680,
title = {Simulating Human Memory with Language Models},
author = {Qihan Wang and Nicholas Tomlin and Michael Hu and Brian Dillon and Tal Linzen},
journal= {arXiv preprint arXiv:2605.25680},
year = {2026}
}