Data building for automatic post-editing (APE) requires extensive and expert-level human effort, as it contains an elaborate process that involves identifying errors in sentences and providing suitable revisions. Hence, we develop a self-supervised data generation tool, deployable as a web application, that minimizes human supervision and constructs personalized APE data from a parallel corpus for several language pairs with English as the target language. Data-centric APE research can be conducted using this tool, involving many language pairs that have not been studied thus far owing to the lack of suitable data.
@article{arxiv.2111.12284,
title = {A Self-Supervised Automatic Post-Editing Data Generation Tool},
author = {Hyeonseok Moon and Chanjun Park and Sugyeong Eo and Jaehyung Seo and SeungJun Lee and Heuiseok Lim},
journal= {arXiv preprint arXiv:2111.12284},
year = {2022}
}