In this work, we propose a novel and efficient method for articulated human pose estimation in videos using a convolutional network architecture, which incorporates both color and motion features. We propose a new human body pose dataset, FLIC-motion, that extends the FLIC dataset with additional motion features. We apply our architecture to this dataset and report significantly better performance than current state-of-the-art pose detection systems.
@article{arxiv.1409.7963,
title = {MoDeep: A Deep Learning Framework Using Motion Features for Human Pose Estimation},
author = {Arjun Jain and Jonathan Tompson and Yann LeCun and Christoph Bregler},
journal= {arXiv preprint arXiv:1409.7963},
year = {2014}
}