h1

h2

h3

h4

h5
h6
% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@PHDTHESIS{Springer:755345,
      author       = {Springer, Paul},
      othercontributors = {Bientinesi, Paolo and Wellein, Gerhard},
      title        = {{H}igh-performance tensor operations : tensor
                      transpositions, spin summations, and tensor contractions},
      school       = {RWTH Aachen University},
      type         = {Dissertation},
      address      = {Aachen},
      reportid     = {RWTH-2019-01778},
      pages        = {1 Online-Ressource (xiii, 169 Seiten) : Illustrationen},
      year         = {2019},
      note         = {Veröffentlicht auf dem Publikationsserver der RWTH Aachen
                      University; Dissertation, RWTH Aachen University, 2019},
      abstract     = {This dissertation is concerned with the development of
                      novel high-performance algorithms for tensor transpositions,
                      spin summations, and tensor contractions. A central
                      challenge that is common to these operations is the complex
                      memory access pattern, which is due to the multidimensional
                      nature of tensors, and which often leads to a poor
                      utilization of the CPU’s rich cache hierarchy and
                      consequently to low performance. To overcome this
                      inefficiency, the algorithms presented in this dissertation
                      pay special attention to the exploitation of spatial as well
                      as temporal locality, resulting in a preferable memory
                      access pattern, and thus high performance. With tensor
                      transpositions, spin summations, and tensor contractions
                      being the major performance bottlenecks in many scientific
                      applications, the goal of this dissertation is to provide
                      significant speedups over other state-of-the-art software
                      solutions for such operations. We describe an approach to
                      tensor transpositions that is able to attain close-to-peak
                      memory bandwidth across multiple architectures. We also
                      present a high-performance perspective on spin summations
                      and propose an algorithm that exploits both the spatial as
                      well as temporal locality inherent to the problem. Finally,
                      a novel GEMM-like methodology for tensor contractions is
                      introduced; this approach avoids the drawbacks of previous
                      approaches—namely excess memory accesses or an increased
                      memory footprint—and is able to close the performance gap
                      between tensor contractions and high-performance
                      matrix-matrix multiplications.},
      cin          = {123620 / 120000 / 080003},
      ddc          = {004},
      cid          = {$I:(DE-82)123620_20140620$ / $I:(DE-82)120000_20140620$ /
                      $I:(DE-82)080003_20140620$},
      typ          = {PUB:(DE-HGF)11},
      doi          = {10.18154/RWTH-2019-01778},
      url          = {https://publications.rwth-aachen.de/record/755345},
}