In the wake of a polarizing election, social media is laden with hateful content. To address various limitations of supervised hate speech classification methods including corpus bias and huge cost of annotation, we propose a weakly supervised two-path bootstrapping approach for an online hate speech detection model leveraging large-scale unlabeled data. This system significantly outperforms hate speech detection systems that are trained in a supervised manner using manually annotated data. Applying this model on a large quantity of tweets collected before, after, and on election day reveals motivations and patterns of inflammatory language.
@article{arxiv.1710.07394,
title = {Recognizing Explicit and Implicit Hate Speech Using a Weakly Supervised Two-path Bootstrapping Approach},
author = {Lei Gao and Alexis Kuppersmith and Ruihong Huang},
journal= {arXiv preprint arXiv:1710.07394},
year = {2018}
}