It has been recently shown that sparse, nonnegative tensor factorization of multi-modal electronic health record data is a promising approach to high-throughput computational phenotyping. However, such approaches typically do not leverage available domain knowledge while extracting the phenotypes; hence, some of the suggested phenotypes may not map well to clinical concepts or may be very similar to other suggested phenotypes. To address these issues, we present a novel, automatic approach called PIVETed-Granite that mines existing biomedical literature (PubMed) to obtain cannot-link constraints that are then used as side-information during a tensor-factorization based computational phenotyping process. The resulting improvements are clearly observed in experiments using a large dataset from VUMC to identify phenotypes for hypertensive patients.
@article{arxiv.1808.02602,
title = {PIVETed-Granite: Computational Phenotypes through Constrained Tensor Factorization},
author = {Jette Henderson and Bradley A. Malin and Joyce C. Ho and Joydeep Ghosh},
journal= {arXiv preprint arXiv:1808.02602},
year = {2018}
}