% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@PHDTHESIS{Hermanns:720165,
author = {Hermanns, Marc-André},
othercontributors = {Müller, Matthias S. and Wolf, Felix Gerd Eugen},
title = {{U}nderstanding the formation of wait states in one-sided
communication},
volume = {35},
school = {RWTH Aachen University},
type = {Dissertation},
address = {Jülich},
publisher = {Forschungszentrum Jülich GmbH Zentralbibliothek, Verlag},
reportid = {RWTH-2018-222635},
series = {IAS Series},
pages = {1 Online-Ressource (xiv, 144 Seiten) : Diagramme},
year = {2018},
note = {Druckausgabe: 2018. - Onlineausgabe: 2018. - Auch
veröffentlicht auf dem Publikationsserver der RWTH Aachen
University; Dissertation, RWTH Aachen University, 2017},
abstract = {Due to the available concurrency in modern-day
supercomputers, the complexity of developing efficient
parallel applications for these platforms has grown rapidly
in the last years. Many applications use message passing for
parallelization, offering three main communication
paradigms: point-to-point, collective and one-sided
communication. Each paradigm fits certain domains of
algorithms and communication patterns best. The one-sided
paradigm decouples communication and synchronization and
allows a single process to define a complete communication.
These are important features for runtime systems of new
programming paradigms and state-of-the-art dynamic
load-balancing strategies. In any process interaction, wait
states can occur, where a process is waiting for
another---idling---before it proceeds with its local
computation. To eliminate such wait states, runtime and
application developers alike need support in detecting and
quantifying them and their root causes. However, tool
support for identifying complex wait states in one-sided
communication is scarce. This thesis contributes novel
methods for the scalable detection and quantification of
wait states in one-sided communication, the automatic
identification of their root causes, and the assessment of
optimization potential. The methods for wait-state detection
and quantification, as introduced by Böhme et al. and
extended by this thesis, build upon a parallel post-mortem
traversal of process-local event traces, modeling an
application's runtime behavior. Performance-relevant data is
exchanged just in time on the recorded communication paths.
Through the nature of one-sided communication, information
on such communication paths is not available on all
processes involved, impeding the use of this original
approach for one-sided communication. The use of a novel
high-level messaging framework enables the exchange of
messages on the implicit communication paths of one-sided
communication, while retaining the scalability of the
original approach. This enables the identification of
previously unstudied types of wait states unique to
one-sided communication: lack of remote progress and
resource contention. Beyond simple accounting of waiting
time, other contributed methods allow pinpointing root
causes of such wait states and identifying optimization
potential in one-sided applications. Furthermore, they
distinguish two fundamentally different classes of
wait-state root causes: delays for direct process
synchronization (similar to point-to-point and collective
communication) and contention in case of lock-based process
synchronization, whose resolution strategies are
diametrically opposed to each other. Finally, the
contributed methods enable the identification of the longest
wait-state-free execution path (i.e., critical path) in
parallel applications using one-sided communication. As only
optimization of functions on the critical path will yield
performance improvements, its identification is key to
choosing promising optimization targets. All of these
methods are integrated into the Scalasca performance
toolset. Their scalability and effectiveness are
demonstrated by evaluating a variety of applications using
one-sided communication interfaces running in configurations
with up to 65,536 processes.},
cin = {123010 / 120000 / 056500},
ddc = {004},
cid = {$I:(DE-82)123010_20140620$ / $I:(DE-82)120000_20140620$ /
$I:(DE-82)056500_20140620$},
typ = {PUB:(DE-HGF)11 / PUB:(DE-HGF)3},
doi = {10.18154/RWTH-2018-222635},
url = {https://publications.rwth-aachen.de/record/720165},
}