In this work, we study the problem of co-optimize communication, pre-computing, and computation cost in one-round multi-way join evaluation. We propose a multi-way join approach ADJ (Adaptive Distributed Join) for complex join which finds one optimal query plan to process by exploring cost-effective partial results in terms of the trade-off between pre-computing, communication, and computation.We analyze the input relations for a given join query and find one optimal over a set of query plans in some specific form, with high-quality cost estimation by sampling. Our extensive experiments confirm that ADJ outperforms the existing multi-way join methods by up to orders of magnitude.
@article{arxiv.2102.13370,
title = {Fast Distributed Complex Join Processing},
author = {Hao Zhang and Miao Qiao and Jeffrey Xu Yu and Hong Cheng},
journal= {arXiv preprint arXiv:2102.13370},
year = {2021}
}