% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@PHDTHESIS{Hhnerbach:826069,
author = {Höhnerbach, Markus},
othercontributors = {Bientinesi, Paolo and Naumann, Uwe and Kelly, Paul},
title = {{A} framework for the vectorization of molecular dynamics
kernels},
school = {RWTH Aachen University},
type = {Dissertation},
address = {Aachen},
publisher = {RWTH Aachen University},
reportid = {RWTH-2021-08729},
pages = {1 Online-Ressource : Illustrationen, Diagramme},
year = {2020},
note = {Veröffentlicht auf dem Publikationsserver der RWTH Aachen
University 2021; Dissertation, RWTH Aachen University, 2020},
abstract = {This thesis introduces a domain-specific language (DSL) for
many-body potentials, which are used in molecular dynamics
(MD) simulations in the area of materials science. We also
introduce a compiler to translate the DSL into
high-performance code suitable for modern supercomputers. We
begin by studying ways to speedup up potentials on
supercomputers using two case studies: The Tersoff and the
AIREBO potentials. In both case studies, we identify a
number of optimizations, both domain-specific and general,
to achieve speedups of up to 5x; we also introduce a method
to keep the resulting code performance portable. During the
AIREBO case study, we also discover that the existing code
contains a number of errors. This experience motivates us to
include the derivation step, the most error-prone step in
manual optimization, in our automation effort.After having
identified beneficial optimization techniques, we create a
“potential compiler”, short PotC, which generates
fully-usable performance-portable potential implementations
from specifications written in our DSL. DSL code is
significantly shorter (20x to 30x) than a manual code,
reducing both manual work and opportunities to introduce
bugs.We present performance results on five different
platforms: Three CPU platforms (Broadwell, Knights Landing,
and Skylake) and two GPU platforms (Pascal and Volta). While
the performance in some cases remains far below that of
hand-written code, it also manages to match or exceed
manually written implementations in other cases. For these
cases, we achieve speedups of up to 9x compared to
non-vectorized code.},
cin = {123120 / 120000},
ddc = {004},
cid = {$I:(DE-82)123120_20140620$ / $I:(DE-82)120000_20140620$},
typ = {PUB:(DE-HGF)11},
doi = {10.18154/RWTH-2021-08729},
url = {https://publications.rwth-aachen.de/record/826069},
}