In this work we study parallelization of online learning, a core primitive in machine learning. In a parallel environment all known approaches for parallel online learning lead to delayed updates, where the model is updated using out-of-date information. In the worst case, or when examples are temporally correlated, delay can have a very adverse effect on the learning algorithm. Here, we analyze and present preliminary empirical results on a set of learning architectures based on a feature sharding approach that present various tradeoffs between delay, degree of parallelism, representation power and empirical performance.
@article{arxiv.1103.4204,
title = {Parallel Online Learning},
author = {Daniel Hsu and Nikos Karampatziakis and John Langford and Alex Smola},
journal= {arXiv preprint arXiv:1103.4204},
year = {2011}
}