In this paper we present an approach for learning to imitate human behavior on a semantic level by markerless visual observation. We analyze a set of spatial constraints on human pose data extracted using convolutional pose machines and object informations extracted from 2D image sequences. A scene analysis, based on an ontology of objects and affordances, is combined with continuous human pose estimation and spatial object relations. Using a set of constraints we associate the observed human actions with a set of executable robot commands. We demonstrate our approach in a kitchen task, where the robot learns to prepare a meal.
@article{arxiv.1807.11541,
title = {Markerless Visual Robot Programming by Demonstration},
author = {Raphael Memmesheimer and Ivanna Mykhalchyshyna and Viktor Seib and Nick Theisen and Dietrich Paulus},
journal= {arXiv preprint arXiv:1807.11541},
year = {2018}
}