Optimization with preference feedback is an active research area with many applications in engineering systems where humans play a central role, such as building control and autonomous vehicles. While most existing studies focus on optimizing a static user utility, few have investigated its closed-loop behavior that accounts for system transients. In this work, we propose an online feedback optimization controller that optimizes user utility using pairwise comparison feedback with both optimality and closed-loop stability guarantees. By adding a random exploration signal, the controller estimates the descent direction based on the binary comparison feedback between two consecutive time steps. We analyze its closed-loop behavior when interacting with a nonlinear plant and show that, under mild assumptions, the controller converges to the optimal point without inducing instability. Theoretical findings are further validated through numerical experiments.
@article{arxiv.2506.02225,
title = {Human-in-the-loop: Real-time Preference Optimization},
author = {Wenbin Wang and Wenjie Xu and Colin N. Jones},
journal= {arXiv preprint arXiv:2506.02225},
year = {2026}
}