Abstract

We introduce the Bayesian Compiler Optimization framework (BaCO), a general purpose autotuner for modern compilers targeting CPUs, GPUs, and FPGAs. BaCO provides the flexibility needed to handle the requirements of modern autotuning tasks. Particularly, it deals with permutation, ordered, and continuous parameter types along with both known and unknown parameter constraints. To reason about these parameter types and efficiently deliver high-quality code, BaCO uses Bayesian optimization algorithms specialized towards the autotuning domain. We demonstrate BaCO's effectiveness on three modern compiler systems: TACO, RISE & ELEVATE, and HPVM2FPGA for CPUs, GPUs, and FPGAs respectively. For these domains, BaCO outperforms current state-of-the-art autotuners by delivering on average 1.36x-1.56x faster code with a tiny search budget, and BaCO is able to reach expert-level performance 2.9x-3.9x faster.

Article

Article URL

Article

pdf

BibTeX

  @inproceedings{10.1145/3623278.3624770, 
author = {Hellsten, Erik Orm and Souza, Artur and Lenfers, Johannes and
Lacouture, Rubens and Hsu, Olivia and Ejjeh, Adel and Kjolstad, Fredrik and
Steuwer, Michel and Olukotun, Kunle and Nardi, Luigi},
title = {BaCO: A Fast and Portable Bayesian Compiler Optimization Framework},
year = {2024},
isbn = {9798400703942},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3623278.3624770},
doi = {10.1145/3623278.3624770},
abstract = {We introduce the Bayesian Compiler Optimization framework (BaCO), a
general purpose autotuner for modern compilers targeting CPUs, GPUs, and FPGAs.
BaCO provides the flexibility needed to handle the requirements of modern
autotuning tasks. Particularly, it deals with permutation, ordered, and
continuous parameter types along with both known and unknown parameter
constraints. To reason about these parameter types and efficiently deliver
high-quality code, BaCO uses Bayesian optimization algorithms specialized
towards the autotuning domain. We demonstrate BaCO's effectiveness on three
modern compiler systems: TACO, RISE \& ELEVATE, and HPVM2FPGA for CPUs, GPUs,
and FPGAs respectively. For these domains, BaCO outperforms current
state-of-the-art auto-tuners by delivering on average 1.36X--1.56X faster code
with a tiny search budget, and BaCO is able to reach expert-level performance
2.9X--3.9X faster.},
booktitle = {Proceedings of the 28th ACM International Conference on
Architectural Support for Programming Languages and Operating Systems, Volume
4},
pages = {19–42},
numpages = {24},
keywords = {compiler optimizations, high-performance computing,
bayesian optimization, autotuning, autoscheduling},
location = {Vancouver, BC, Canada},
series = {ASPLOS '23}
}