% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@PHDTHESIS{Drichel:1018190,
author = {Drichel, Arthur},
othercontributors = {Meyer, Ulrike Michaela and Desmet, Lieven},
title = {{M}achine learning for domain generation algorithm
classification},
school = {RWTH Aachen University},
type = {Dissertation},
address = {Aachen},
publisher = {RWTH Aachen University},
reportid = {RWTH-2025-07743},
pages = {1 Online-Ressource : Illustrationen},
year = {2025},
note = {Veröffentlicht auf dem Publikationsserver der RWTH Aachen
University; Dissertation, RWTH Aachen University, 2025},
abstract = {Botnets pose a significant threat to cybersecurity as they
enable various malicious activities such as Distributed
Denial-of-Service (DDoS) attacks and spam campaigns. The
growing adoption of Domain Generation Algorithms (DGAs) by
modern botnets to establish connections with their Command
and Control $(C\&C)$ servers complicates containment
measures, creating a pronounced asymmetry where defenders
must block all generated domains, while attackers require
only a single unblocked domain to maintain control. A
promising approach to combat DGA-based botnets involves
utilizing Machine Learning (ML) classifiers, which can be
trained to detect and block queries to potential $C\&C$
domains, offering a significant advantage over traditional
blocklists as they generalize to detect new domains not seen
during training, thereby enabling the detection of even yet
unknown DGAs. Especially, Deep Learning (DL) based
classifiers have demonstrated unprecedented accuracy in
detecting DGAs, yet they also exhibit notable drawbacks,
including issues related to explainability, robustness, and
privacy. This dissertation provides a comprehensive analysis
of the applicability of ML for DGA detection, focusing on
addressing the challenges that hinder the successful
deployment of ML-based DGA classifiers in practice, thereby
presenting a holistic view of the DGA detection problem and
exploring solutions to bridge the gap between theoretical
advancements and real-world applicability. In a
comprehensive, large-scale study we first systematically
quantify the current threat posed by DGA-based botnets,
highlight the shortcomings of existing containment measures,
and underscore the need for enhanced countermeasures to
effectively combat the persistent and ongoing threat posed
by botnets. In this dissertation, we propose a range of
novel classification models that substantially improve the
classification performance beyond the state of the art,
including their ability to detect previously unknown DGAs.
We also address the problem of class imbalance resulting
from the significant disparity in available training samples
across different DGAs and examine the models'
generalizability in response to temporal and environmental
changes. These aspects are critical factors that guide data
selection and retraining strategies, thereby ensuring the
long-term effectiveness of DGA classifiers in real-world
deployments. To further improve classification performance,
we conduct a comprehensive study on collaborative ML for DGA
detection and demonstrate its potential to substantially
reduce the False Positive Rate (FPR).At the same time, we
investigate the associated privacy implications and explore
the feasibility of privacy-preserving
Classification-as-a-Service (CaaS).In our study on
explainability, we conduct a critical analysis of the
features used in DL-based DGA detection and reveal several
biases inherent in state-of-the-art DGA classifier which can
easily be exploited by an adversary to evade detection. To
mitigate these issues, we propose a bias-reduced
classification system that effectively addresses these
biases while maintaining state-of-the-art detection
performance, and introduce visual analytics systems that
facilitate informed decision-making by providing insights
into a classifier's reasoning. Moreover, we critically
examine the robustness of DGA detection classifiers against
adversarial attacks and propose a novel hardening approach
that leverages adversarial latent space vectors and
discretized adversarial domains to substantially improve
their robustness.Finally, to bridge the gap between research
and practical application, we propose a detection system
that integrates our research findings and demonstrate its
effectiveness and feasibility through a comprehensive case
study in which we deploy the system to classify the DNS
network traffic within a real-world network.},
cin = {123520 / 120000},
ddc = {004},
cid = {$I:(DE-82)123520_20140620$ / $I:(DE-82)120000_20140620$},
pnm = {SAPPAN - Sharing and Automation for Privacy Preserving
Attack Neutralization (833418)},
pid = {G:(EU-Grant)833418},
typ = {PUB:(DE-HGF)11},
doi = {10.18154/RWTH-2025-07743},
url = {https://publications.rwth-aachen.de/record/1018190},
}