% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@PHDTHESIS{Mayfrank:1022363,
author = {Mayfrank, Daniel Georg},
othercontributors = {Mitsos, Alexander and Lucia, Sergio},
title = {{E}nd-to-end reinforcement learning of koopman model
predictive control},
volume = {42 (2025)},
school = {Rheinisch-Westfälische Technische Hochschule Aachen},
type = {Dissertation},
address = {Aachen},
publisher = {Aachener Verfahrenstechnik},
reportid = {RWTH-2025-09976},
series = {Aachener Verfahrenstechnik series - AVT.SVT -
Systemverfahrenstechnik - Dissertationen},
pages = {1 Online-Ressource : Illustrationen},
year = {2025},
note = {Veröffentlicht auf dem Publikationsserver der RWTH Aachen
University 2026; Dissertation, Rheinisch-Westfälische
Technische Hochschule Aachen, 2025},
abstract = {Model-based control methods such as Model Predictive
Control (MPC) and variants thereof, e.g., economic nonlinear
MPC (eNMPC), remain indispensable in the chemical industry.
However, mechanistic models are often unavailable or too
computationally expensive for use in (e)NMPC. Data-driven
models, usually trained using system identification (SI)
approaches, can serve as a computationally cheap alternative
to mechanistic models. However, SI focuses narrowly on
maximizing average prediction accuracy, which can result in
suboptimal performance when the model is used as part of a
policy. In contrast, recent research has explored training
data-driven models end-to-end for optimal performance in
predictive control policies using reinforcement learning
(RL) approaches. This thesis contributes to this emerging
research field by developing methods for RL-based end-to-end
learning of Koopman models for (e)NMPC policies. Koopman
models can accurately represent the dynamics of nonlinear
systems while resulting in convex optimal control problems
(OCPs) when used in (e)NMPC, thus striking a favorable
balance between representational capacity and computational
efficiency. By performing post-optimal sensitivity analysis
on the resulting OCPs, we develop a method for constructing
automatically-differentiable Koopman-based (e)NMPC policies,
which can be optimized via the learnable parameters of the
Koopman model. We optimize the (e)NMPC policies for specific
control tasks using the state-of-the-art actor-critic RL
algorithm Proximal Policy Optimization (PPO). Assuming the
availability of full state measurements, we demonstrate the
effectiveness of our method in NMPC (setpoint tracking) and
eNMPC (demand response) case studies. These are based on (i)
a small continuous stirred-tank reactor model with two
differential states and two control inputs and (ii) an air
separation unit with 119 differential states and
approximately 2300 algebraic states. The results show that
the proposed method performs favorably in terms of the
control performance of the resulting policies compared to
traditional benchmarks, including neural network policies
trained using RL and Koopman-based eNMPC policies trained
via system identification. Furthermore, we show that, in
contrast to the neural network policies, the (e)NMPC
policies can react to certain changes in the control setting
without retraining. However, we observe (i) convergence
problems resulting from inaccurate policy gradient estimates
and (ii) low sample efficiency. To address the former issue,
we exploit the automatic differentiability of training
environments based on mechanistic simulation models to aid
the policy optimization, resulting in substantially improved
convergence and control performance. Furthermore, we improve
the sample efficiency of the learning process by integrating
our method for RL-based training of Koopman (e)NMPC policies
with Dyna-style model-based RL. We also show that when
leveraging model-based RL, the sample efficiency can be
increased further by utilizing partial prior knowledge about
the system dynamics via physics-informed model learning. In
sum, this thesis contributes to the field of data-driven
control and shows avenues toward higher-performance,
real-time-capable, data-driven (e)NMPCs.},
cin = {416710},
ddc = {620},
cid = {$I:(DE-82)416710_20140620$},
pnm = {HDS LEE - Helmholtz School for Data Science in Life, Earth
and Energy (HDS LEE) (HDS-LEE-20190612) /
Doktorandenprogramm},
pid = {G:(DE-Juel1)HDS-LEE-20190612 /
G:(DE-HGF)PHD-PROGRAM-20170404},
typ = {PUB:(DE-HGF)11 / PUB:(DE-HGF)3},
doi = {10.18154/RWTH-2025-09976},
url = {https://publications.rwth-aachen.de/record/1022363},
}