Kernel matrix-vector product is ubiquitous in many science and engineering applications. However, a naive method requires O(N2) operations, which becomes prohibitive for large-scale problems. We introduce a parallel method that provably requires O(N) operations to reduce the computation cost. The distinct feature of our method is that it requires only the ability to evaluate the kernel function, offering a black-box interface to users. Our parallel approach targets multi-core shared-memory machines and is implemented using OpenMP. Numerical results demonstrate up to 19× speedup on 32 cores. We also present a real-world application in geostatistics, where our parallel method was used to deliver fast principle component analysis of covariance matrices.
@article{arxiv.1903.02153,
title = {PBBFMM3D: a parallel black-box algorithm for kernel matrix-vector multiplication},
author = {Ruoxi Wang and Chao Chen and Jonghyun Lee and Eric Darve},
journal= {arXiv preprint arXiv:1903.02153},
year = {2021}
}