% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@PHDTHESIS{Terboven:667504,
author = {Terboven, Christian},
othercontributors = {Müller, Mathias S. and Bischof, Christian and Chapman,
Barbara},
title = {{A}bstractions for {P}erformance {P}rogramming on
{M}ulti-{C}ore {A}rchitectures with {H}ierarchical {M}emory;
1. {A}uflage},
volume = {7},
school = {RWTH Aachen University},
type = {Dissertation},
address = {Aachen},
publisher = {Apprimus Verlag},
reportid = {RWTH-2016-06261},
isbn = {978-3-86359-442-8},
series = {Ergebnisse aus der Informatik},
pages = {1 Online-Ressource (ii, 160 Seiten) : Illustrationen,
Diagramme},
year = {2016},
note = {Auch veröffentlicht auf dem Publikationsserver der RWTH
Aachen University; Dissertation, RWTH Aachen University,
2016},
abstract = {Shared memory parallel programming, for instance by
inserting OpenMP pragmas into program code, might look
simple at first sight. However, the achievable performance
of real-world applications depends on certain machine
properties - for instance the achievable memory bandwidth
within the memory hierarchy - and in how far the application
programmer has taken these into account. This thesis
presents solutions for designing shared memory parallel
applications targeting current and future system
architectures by following a methodical approach. Therefore,
it builds on a successful strategy from the software
engineering discipline: the introduction of
abstractions.With large shared memory machines typically
providing a non-uniform memory access, and taking energy
efficiency into account, expressing and managing data
locality is important today and will be even more so on the
next generation of machines. The de-facto standard
parallelization paradigms MPI and OpenMP were not
well-equipped to allow for that, nor is this a task a domain
scientist is interested in solving. Suitable abstractions
for handling parallelism and data locality have to be
powerful enough to fulfill their purpose while being simple
enough to be applicable to existing application codes. The
means of abstraction in this work are twofold: first, it
relates to the methodical selection of those
architecture-specific properties that are important to
achieve performance, and second, it relates to the design of
parallel programming model concepts and of software
components to foster the parallelization of simulation
codes.For the abstractions to be acceptable by end users,
existing data structures and code designs should be left
unmodified as much as possible, in particular in
object-oriented codes. Hence, the abstractions have to be
formulated in a broadly accepted form, for instance they
have to integrate well with common design patterns. To
achieve this goal, this work first identifies several memory
management idioms for NUMA machines. Second, a powerful yet
simple-to-use and flexible thread affinity model for OpenMP
is developed. Third, the memory management idioms and the
thread affinity model are shown to support the
parallelization with object-oriented abstractions. To
support all this, a set of benchmarks and experiments to
analyze OpenMP implementation behavior is presented. This
work as a whole proposes a methodic approach to develop
parallel scientific software for multi- and many-core
architectures.},
cin = {123010 / 120000},
ddc = {004},
cid = {$I:(DE-82)123010_20140620$ / $I:(DE-82)120000_20140620$},
typ = {PUB:(DE-HGF)11 / PUB:(DE-HGF)3},
urn = {urn:nbn:de:hbz:82-rwth-2016-062610},
doi = {10.18154/RWTH-2016-06261},
url = {https://publications.rwth-aachen.de/record/667504},
}