For distributions P and Q with different supports or undefined densities, the divergence D(P∣∣Q) may not exist. We define a Spread Divergence D~(P∣∣Q) on modified P and Q and describe sufficient conditions for the existence of such a divergence. We demonstrate how to maximize the discriminatory power of a given divergence by parameterizing and learning the spread. We also give examples of using a Spread Divergence to train implicit generative models, including linear models (Independent Components Analysis) and non-linear models (Deep Generative Networks).
@article{arxiv.1811.08968,
title = {Spread Divergence},
author = {Mingtian Zhang and Peter Hayes and Tom Bird and Raza Habib and David Barber},
journal= {arXiv preprint arXiv:1811.08968},
year = {2022}
}