This paper considers a distributed reinforcement learning problem in which a network of multiple agents aim to cooperatively maximize the globally averaged return through communication with only local neighbors. A randomized communication-efficient multi-agent actor-critic algorithm is proposed for possibly unidirectional communication relationships depicted by a directed graph. It is shown that the algorithm can solve the problem for strongly connected graphs by allowing each agent to transmit only two scalar-valued variables at one time.
@article{arxiv.1907.03053,
title = {A Communication-Efficient Multi-Agent Actor-Critic Algorithm for Distributed Reinforcement Learning},
author = {Yixuan Lin and Kaiqing Zhang and Zhuoran Yang and Zhaoran Wang and Tamer Başar and Romeil Sandhu and Ji Liu},
journal= {arXiv preprint arXiv:1907.03053},
year = {2019}
}