In this paper, we propose a monocular visual localization pipeline leveraging semantic and depth cues. We apply semantic consistency evaluation to rank the image retrieval results and a practical clustering technique to reject estimation outliers. In addition, we demonstrate a substantial performance boost achieved with a combination of multiple feature extractors. Furthermore, by using depth prediction with a deep neural network, we show that a significant amount of falsely matched keypoints are identified and eliminated. The proposed pipeline outperforms most of the existing approaches at the Long-Term Visual Localization benchmark 2020.
@article{arxiv.2005.11922,
title = {Visual Localization Using Semantic Segmentation and Depth Prediction},
author = {Huanhuan Fan and Yuhao Zhou and Ang Li and Shuang Gao and Jijunnan Li and Yandong Guo},
journal= {arXiv preprint arXiv:2005.11922},
year = {2020}
}