% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@PHDTHESIS{Kaufmann:1010220,
author = {Kaufmann, Lea},
othercontributors = {Kateri, Maria and Moustaki, Irini and Kamps, Udo},
title = {{H}igh-dimensional logistic regression with fusion-type
penalties},
school = {RWTH Aachen University},
type = {Dissertation},
address = {Aachen},
publisher = {RWTH Aachen University},
reportid = {RWTH-2025-03939},
pages = {1 Online-Ressource : Illustrationen},
year = {2025},
note = {Veröffentlicht auf dem Publikationsserver der RWTH Aachen
University; Dissertation, RWTH Aachen University, 2025},
abstract = {In this thesis, penalized regression in the framework of
logistic regression with categorical covariates (i.e.
factors) is discussed. Providing an overview of existing
penalized regression methods along with their
characteristics, theoretical properties given in the
literature for linear regression are transferred to the
setting of logistic regression. First, the focus lies on
penalized regression methods for levels fusion before those
introduced for the purpose of factor selection are examined.
Computational methods employed for obtaining the
corresponding estimates by solving the resulting
minimization problems are discussed. Finally, extensive
simulation studies are conducted using the statistical
software R, investigating the behavior of the presented
methods in different simulation designs, showing the
advantages and disadvantages of these methods. It turns out
that there exists no penalty function so far, which
simultaneously performs factor selection and levels fusion.
To close this gap, a novel penalty function, called L0-Fused
Group Lasso (L0-FGL) is introduced. The theoretical
investigation of L0-FGL is obtained, showing valuable
asymptotic properties. These properties justify that the new
method is a suitable choice for the purpose of obtaining
sparse models in penalized logistic regression with factors.
Then, convenient algorithms to calculate the L0-FGL
estimates are employed. The behavior ofL0-FGL is
investigated in different simulation designs, showing that,
on the one hand, L0-FGL is able to improve the factor
selection performance of those penalties for levels fusion
and, on the other hand, L0-FGL is able to perform both
factor selection and levels fusion. Finally, statistical
inference analysis for L0-FGL is provided. In particular, a
two-stage method called two-stage L0-FGL is proposed,
including a step for dimension reduction through factor
selection and levels fusion, and an inferential step.
Generally speaking, the two-stage method first reduces the
dimension and, having that, those non-influential factors
that are still included in the model are removed through
statistical tests. Considering two different approaches for
corrections for multiplicity of testing, a single and a
multiple sample splitting approach is applied. Based on the
asymptotic properties of L0-FGL, convenient asymptotic error
control properties are shown for two-stage L0-FGL, yielding
that this approach is a reasonable choice with a solid
theoretical basis.},
cin = {116510 / 110000},
ddc = {510},
cid = {$I:(DE-82)116510_20140620$ / $I:(DE-82)110000_20140620$},
typ = {PUB:(DE-HGF)11},
doi = {10.18154/RWTH-2025-03939},
url = {https://publications.rwth-aachen.de/record/1010220},
}