In this paper, we introduce a software-defined framework that enables the parallel utilization of all the programmable processing resources available in heterogeneous system-on-chip (SoC) including FPGA-based hardware accelerators and programmable CPUs. Two platforms with different architectures are considered, and a single C/C++ source code is used in both of them for the CPU and FPGA resources. Instead of simply using the hardware accelerator to offload a task from the CPU, we propose a scheduler that dynamically distributes the tasks among all the resources to fully exploit all computing devices while minimizing load unbalance. The multi-architecture study compares an ARMV7 and ARMV8 implementation with different number and type of CPU cores and also different FPGA micro-architecture and size. We measure that both platforms benefit from having the CPU cores assist FPGA execution at the same level of energy requirements.
@article{arxiv.1802.03316,
title = {Parallelizing Workload Execution in Embedded and High-Performance Heterogeneous Systems},
author = {Jose Nunez-Yanez and Mohammad Hosseinabady and Moslem Amiri and Andrés Rodríguez and Rafael Asenjo and Angeles Navarro and Rubén Gran-Tejero and Darío Suárez-Gracia},
journal= {arXiv preprint arXiv:1802.03316},
year = {2018}
}