% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@PHDTHESIS{Rittig:1012098,
author = {Rittig, Jan Gerald},
othercontributors = {Mitsos, Alexander and Grohe, Martin},
title = {{G}raph machine learning for molecular property prediction
and design},
volume = {35},
school = {Rheinisch-Westfälische Technische Hochschule Aachen},
type = {Dissertation},
address = {Aachen},
publisher = {RWTH Aachen University},
reportid = {RWTH-2025-04861},
series = {Aachener Verfahrenstechnik series - AVT.SVT - Process
systems engineering},
pages = {1 Online-Ressource : Illustrationen},
year = {2025},
note = {Veröffentlicht auf dem Publikationsserver der RWTH Aachen
University; Dissertation, Rheinisch-Westfälische Technische
Hochschule Aachen, 2025},
abstract = {Molecules with optimal properties are essential for
chemical engineering. However, the search for promising
molecules with desired properties – which can also lead to
more efficient chemical processes – is often limited by
missing property data, leading to the need for predictive
models. Based on a graph representation of molecules with
atoms as nodes and bonds as edges, graph machine learning
(ML) has recently emerged as a powerful approach for
predicting molecular properties and exploring the chemical
space. In this dissertation, we therefore utilize graph ML
to advance the identification of optimal molecules for
chemical engineering applications. We first develop graph
neural networks (GNNs) to predict molecular properties that
are highly relevant for chemical engineering. Our developed
GNN models provide highly accurate predictions of pure
component properties, such as normal boiling points and
biodegradability, and mixture properties, e.g., activity
coefficients. The GNNs are applicable to a wide spectrum of
molecules and can be readily transferred to predict other
properties of interest. To further enhance the predictive
quality of GNNs, we incorporate thermodynamic relations into
the model architecture and training. Specifically, we
propose thermodynamics-informed GNNs that learn
thermodynamics through regularization during model training,
and thermodynamic-consistent GNNs that predict fundamental
thermodynamic potentials, such as the Gibbs free energy,
from which related properties can be deduced using automatic
differentiation. Using activity coefficients as a prime
example, we demonstrate that the GNNs provide thermodynamic
consistent predictions with increased accuracy and
generalization capabilities, paving the way for combining ML
with thermodynamics.Targeting the design of molecules with
desired properties, we develop and apply a graph ML
computer-aided molecular design (CAMD) framework. The
framework combines GNNs with generative graph ML and
optimization in a modular way. We use generative models,
i.e., variational autoencoders (VAEs) and generative
adversarial network (GAN), to learn a continuous molecular
space that enables strategic sampling of novel molecules
using optimization approaches, such as Bayesian optimization
(BO) and genetic algorithms. The properties of these
molecules are then predicted by GNNs. Thereby, we provide a
data-driven CAMD framework that enables automated design of
molecules based on available property data. We apply our
framework to the design of high-octane fuels and identify
well-known octane enhancers as well as promising new fuel
candidates, one of which we investigate in engine
experiments, demonstrating an important step towards
ML-driven molecular discovery. Building on our graph ML CAMD
framework, we extend both generative ML models and
optimization in molecular design. That is, we propose a
generative graph transformer model, called GraphXForm, that
constructs molecular graphs with desired properties by
sequentially adding atoms and bonds to an initial structure
in a self-improving loop. We apply GraphXForm to the design
of solvents for liquid-liquid extraction processes,
outperforming state-of-the-art generative ML methods while
allowing for the consideration of molecular structure
constraints and thus increasing flexibility in molecular
design. We further propose an optimization-based CAMD
approach by formulating ML-based molecular design as
mixed-integer linear program to identify molecules with
global optimal predicted properties, which is highly
promising to increase the sample efficiency in molecular
discovery. Overall, we provide predictive and generative
graph ML methods to identify molecules with desired
properties for energy and chemical systems. This
dissertation thus advances both ML and the molecular scale
in chemical engineering, while the process scale can be
integrated in future work.},
cin = {416710},
ddc = {620},
cid = {$I:(DE-82)416710_20140620$},
pnm = {DFG project G:(GEPRIS)466417970 - Generatives
graph-basiertes maschinelles Lernen für das integrierte
Design von Molekülen und Prozessen (466417970) / HDS LEE -
Helmholtz School for Data Science in Life, Earth and Energy
(HDS LEE) (HDS-LEE-20190612) / SPP 2331: Maschinelles Lernen
in der Verfahrenstechnik. Wissen trifft auf Daten:
Interpretierbarkeit, Extrapolation, Verlässlichkeit,
Vertrauen / Doktorandenprogramm},
pid = {G:(GEPRIS)466417970 / G:(DE-Juel1)HDS-LEE-20190612 /
G:(GEPRIS)441958259 / G:(DE-HGF)PHD-PROGRAM-20170404},
typ = {PUB:(DE-HGF)11 / PUB:(DE-HGF)3},
doi = {10.18154/RWTH-2025-04861},
url = {https://publications.rwth-aachen.de/record/1012098},
}