% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@PHDTHESIS{Springer:755345,
author = {Springer, Paul},
othercontributors = {Bientinesi, Paolo and Wellein, Gerhard},
title = {{H}igh-performance tensor operations : tensor
transpositions, spin summations, and tensor contractions},
school = {RWTH Aachen University},
type = {Dissertation},
address = {Aachen},
reportid = {RWTH-2019-01778},
pages = {1 Online-Ressource (xiii, 169 Seiten) : Illustrationen},
year = {2019},
note = {Veröffentlicht auf dem Publikationsserver der RWTH Aachen
University; Dissertation, RWTH Aachen University, 2019},
abstract = {This dissertation is concerned with the development of
novel high-performance algorithms for tensor transpositions,
spin summations, and tensor contractions. A central
challenge that is common to these operations is the complex
memory access pattern, which is due to the multidimensional
nature of tensors, and which often leads to a poor
utilization of the CPU’s rich cache hierarchy and
consequently to low performance. To overcome this
inefficiency, the algorithms presented in this dissertation
pay special attention to the exploitation of spatial as well
as temporal locality, resulting in a preferable memory
access pattern, and thus high performance. With tensor
transpositions, spin summations, and tensor contractions
being the major performance bottlenecks in many scientific
applications, the goal of this dissertation is to provide
significant speedups over other state-of-the-art software
solutions for such operations. We describe an approach to
tensor transpositions that is able to attain close-to-peak
memory bandwidth across multiple architectures. We also
present a high-performance perspective on spin summations
and propose an algorithm that exploits both the spatial as
well as temporal locality inherent to the problem. Finally,
a novel GEMM-like methodology for tensor contractions is
introduced; this approach avoids the drawbacks of previous
approaches—namely excess memory accesses or an increased
memory footprint—and is able to close the performance gap
between tensor contractions and high-performance
matrix-matrix multiplications.},
cin = {123620 / 120000 / 080003},
ddc = {004},
cid = {$I:(DE-82)123620_20140620$ / $I:(DE-82)120000_20140620$ /
$I:(DE-82)080003_20140620$},
typ = {PUB:(DE-HGF)11},
doi = {10.18154/RWTH-2019-01778},
url = {https://publications.rwth-aachen.de/record/755345},
}