Autofocus is an important task for digital cameras, yet current approaches often exhibit poor performance. We propose a learning-based approach to this problem, and provide a realistic dataset of sufficient size for effective learning. Our dataset is labeled with per-pixel depths obtained from multi-view stereo, following "Learning single camera depth estimation using dual-pixels". Using this dataset, we apply modern deep classification models and an ordinal regression loss to obtain an efficient learning-based autofocus technique. We demonstrate that our approach provides a significant improvement compared with previous learned and non-learned methods: our model reduces the mean absolute error by a factor of 3.6 over the best comparable baseline algorithm. Our dataset and code are publicly available.
@article{arxiv.2004.12260,
title = {Learning to Autofocus},
author = {Charles Herrmann and Richard Strong Bowen and Neal Wadhwa and Rahul Garg and Qiurui He and Jonathan T. Barron and Ramin Zabih},
journal= {arXiv preprint arXiv:2004.12260},
year = {2020}
}