h1

h2

h3

h4

h5
h6
% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@PHDTHESIS{Terboven:667504,
      author       = {Terboven, Christian},
      othercontributors = {Müller, Mathias S. and Bischof, Christian and Chapman,
                          Barbara},
      title        = {{A}bstractions for {P}erformance {P}rogramming on
                      {M}ulti-{C}ore {A}rchitectures with {H}ierarchical {M}emory;
                      1. {A}uflage},
      volume       = {7},
      school       = {RWTH Aachen University},
      type         = {Dissertation},
      address      = {Aachen},
      publisher    = {Apprimus Verlag},
      reportid     = {RWTH-2016-06261},
      isbn         = {978-3-86359-442-8},
      series       = {Ergebnisse aus der Informatik},
      pages        = {1 Online-Ressource (ii, 160 Seiten) : Illustrationen,
                      Diagramme},
      year         = {2016},
      note         = {Auch veröffentlicht auf dem Publikationsserver der RWTH
                      Aachen University; Dissertation, RWTH Aachen University,
                      2016},
      abstract     = {Shared memory parallel programming, for instance by
                      inserting OpenMP pragmas into program code, might look
                      simple at first sight. However, the achievable performance
                      of real-world applications depends on certain machine
                      properties - for instance the achievable memory bandwidth
                      within the memory hierarchy - and in how far the application
                      programmer has taken these into account. This thesis
                      presents solutions for designing shared memory parallel
                      applications targeting current and future system
                      architectures by following a methodical approach. Therefore,
                      it builds on a successful strategy from the software
                      engineering discipline: the introduction of
                      abstractions.With large shared memory machines typically
                      providing a non-uniform memory access, and taking energy
                      efficiency into account, expressing and managing data
                      locality is important today and will be even more so on the
                      next generation of machines. The de-facto standard
                      parallelization paradigms MPI and OpenMP were not
                      well-equipped to allow for that, nor is this a task a domain
                      scientist is interested in solving. Suitable abstractions
                      for handling parallelism and data locality have to be
                      powerful enough to fulfill their purpose while being simple
                      enough to be applicable to existing application codes. The
                      means of abstraction in this work are twofold: first, it
                      relates to the methodical selection of those
                      architecture-specific properties that are important to
                      achieve performance, and second, it relates to the design of
                      parallel programming model concepts and of software
                      components to foster the parallelization of simulation
                      codes.For the abstractions to be acceptable by end users,
                      existing data structures and code designs should be left
                      unmodified as much as possible, in particular in
                      object-oriented codes. Hence, the abstractions have to be
                      formulated in a broadly accepted form, for instance they
                      have to integrate well with common design patterns. To
                      achieve this goal, this work first identifies several memory
                      management idioms for NUMA machines. Second, a powerful yet
                      simple-to-use and flexible thread affinity model for OpenMP
                      is developed. Third, the memory management idioms and the
                      thread affinity model are shown to support the
                      parallelization with object-oriented abstractions. To
                      support all this, a set of benchmarks and experiments to
                      analyze OpenMP implementation behavior is presented. This
                      work as a whole proposes a methodic approach to develop
                      parallel scientific software for multi- and many-core
                      architectures.},
      cin          = {123010 / 120000},
      ddc          = {004},
      cid          = {$I:(DE-82)123010_20140620$ / $I:(DE-82)120000_20140620$},
      typ          = {PUB:(DE-HGF)11 / PUB:(DE-HGF)3},
      urn          = {urn:nbn:de:hbz:82-rwth-2016-062610},
      doi          = {10.18154/RWTH-2016-06261},
      url          = {https://publications.rwth-aachen.de/record/667504},
}