Topic models provide a flexible and principled framework for exploring hidden structure in high-dimensional co-occurrence data and are commonly used natural language processing (NLP) of text. In this paper, we design and implement a Java package, TopicModel4J, which contains 13 kinds of representative algorithms for fitting topic models. The TopicModel4J in the Java programming environment provides an easy-to-use interface for data analysts to run the algorithms, and allow to easily input and output data. In addition, this package provides a few unstructured text preprocessing techniques, such as splitting textual data into words, lowercasing the words, preforming lemmatization and removing the useless characters, URLs and stop words.
@article{arxiv.2010.14707,
title = {TopicModel4J: A Java Package for Topic Models},
author = {Yang Qian and Yuanchun Jiang and Yidong Chai and Yezheng Liu and Jiansha Sun},
journal= {arXiv preprint arXiv:2010.14707},
year = {2020}
}