% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@PHDTHESIS{Cramer:695879,
author = {Cramer, Tim},
othercontributors = {Müller, Matthias S. and Katoen, Joost-Pieter},
title = {{A}nalyzing memory accesses for performance and correctness
of parallel programs},
school = {RWTH Aachen University},
type = {Dissertation},
address = {Aachen},
reportid = {RWTH-2017-06527},
pages = {1 Online-Ressource (viii, 151 Seiten) : Illustrationen,
Diagramme},
year = {2017},
note = {Veröffentlicht auf dem Publikationsserver der RWTH Aachen
University; Dissertation, RWTH Aachen University, 2017},
abstract = {The demand for large compute capabilities in scientific
computing led to wide use and acceptance of highly-parallel
computer architectures during the last decade. This trend is
manifested in the TOP500, listing the fastest supercomputer
of the world, in which about $40\%$ of the performance share
results from accelerator-based systems. Programming for
these architectures in the past often required a
timeconsuming rewrite of the compute-intensive application
parts, until more productive approaches like Open
Accelerators (OpenACC) or the target offloading features of
Open Multi-Processing (OpenMP) came to existence. However,
parallel programming for heterogeneous architectures is
still a complex and error-prone task, posing several
challenges to the programmer who wants to achieve high
application performance. One key factor for the
understanding of the performance and the correctness of a
parallel program is reflected in the analysis of the memory
accesses. This work takes a holistic view on the hardware
properties, the programming paradigm, its particular
implementation and the interfaces for an adequate tool
support with respect to both aspects. The improvement of the
performance and the validation of an application requires a
deep comprehension of the dynamic runtime behavior. Here,
the appropriate data and thread placement is essential for
the performance, and the order of the memory accesses is
essential for the deterministic behavior or rather the
correctness of the application. Therefore, this work will
first present a systematic methodology for the assessment of
OpenMP for target devices, patterns for the efficient usage
of task-based programming on Non-Uniform Memory Access
(NUMA) architectures, and the improvement of
standard-compliant tool support. Based on the gathered
insights, an OpenMP epoch model for correctness checking is
defined, which respects the OpenMP semantics including the
runtime and memory model. The evaluation of the developed
concepts is shown by application to realworld performance
analysis and correctness checking tools.},
cin = {123010 / 120000},
ddc = {004},
cid = {$I:(DE-82)123010_20140620$ / $I:(DE-82)120000_20140620$},
typ = {PUB:(DE-HGF)11},
doi = {10.18154/RWTH-2017-06527},
url = {https://publications.rwth-aachen.de/record/695879},
}