The Microsoft Malware Classification Challenge was announced in 2015 along with a publication of a huge dataset of nearly 0.5 terabytes, consisting of disassembly and bytecode of more than 20K malware samples. Apart from serving in the Kaggle competition, the dataset has become a standard benchmark for research on modeling malware behaviour. To date, the dataset has been cited in more than 50 research papers. Here we provide a high-level comparison of the publications citing the dataset. The comparison simplifies finding potential research directions in this field and future performance evaluation of the dataset.
@article{arxiv.1802.10135,
title = {Microsoft Malware Classification Challenge},
author = {Royi Ronen and Marian Radu and Corina Feuerstein and Elad Yom-Tov and Mansour Ahmadi},
journal= {arXiv preprint arXiv:1802.10135},
year = {2018}
}