h1

h2

h3

h4

h5
h6
% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@PHDTHESIS{Malapally:1026256,
      author       = {Malapally, Nitin},
      othercontributors = {Carloni, Paolo and Fyta, Maria},
      title        = {{E}xascale-ready molecular dynamics simulations with
                      efficient algorithms for extreme core counts},
      school       = {RWTH Aachen University},
      type         = {Dissertation},
      address      = {Aachen},
      publisher    = {RWTH Aachen University},
      reportid     = {RWTH-2026-00768},
      pages        = {1 Online-Ressource : Illustrationen},
      year         = {2026},
      note         = {Veröffentlicht auf dem Publikationsserver der RWTH Aachen
                      University 2026; Dissertation, RWTH Aachen University, 2026},
      abstract     = {Biomolecular simulations, realized by molecular dynamics
                      (MD) and enhanced-sampling approaches, are very powerful
                      tools for studying the structural dynamics, kinetics, and
                      energetics of biological systems. In combination with
                      high-performance computing (HPC), increasingly larger
                      systems and longer timescales can be simulated. However, the
                      sequential nature of MD’s time evolution imposes a hard
                      parallel limit, resulting in reduced scalability and hence
                      under-utilization of HPC systems. As a result, standard MD
                      does not reach the typical timescale (millisecond and
                      beyond) required to study many biological processes.
                      Enhanced-sampling techniques, such as umbrella sampling,
                      metadynamics, and replica-exchange MD, do simulate these
                      long processes but often require various techniques to
                      retrieve kinetic and thermodynamic properties and do not
                      scale well. The arrival of exascale computers has made the
                      need for highly scalable algorithms for MD simulations even
                      more urgent. This doctoral thesis reports on my efforts to
                      address these important issues via algorithmic optimization,
                      design and development. The first was an attempt to speed up
                      MD simulations by means of an alternative parallel 3D
                      discrete Fourier transform (3D DFT) algorithm, which was
                      implemented and benchmarked on the JUWELS Cluster, showing
                      comparable scaling performance to the state-of-the-art. In
                      the second, the software apparatus required for a massively
                      parallel MD strategy was constructed within the highly
                      popular GROMACS code and the PLUMED library. The
                      implementation is capable of both multi-CPU and multi-GPU
                      parallelism and was optimized and benchmarked on the JUWELS
                      Booster. The results revealed its multi-modal scalability in
                      that simulations using it can be both efficiently sped up
                      and also greatly extended for a small increase in runtime.
                      The implementation was shown to scale up to $94\%$ of the
                      JUWELS Booster (3,500 GPUs and 42,000 CPUs) with excellent
                      parallel efficiency. Moreover, a plateauing of parallel
                      efficiency was observed at $50\%$ of the JUWELS Booster,
                      which hints at its ability to scale to even higher node
                      counts. This software has the potential to accelerate MD and
                      thereby enable the study of more complex biological
                      processes than was previously practicable.},
      cin          = {137810 / 130000},
      ddc          = {530},
      cid          = {$I:(DE-82)137810_20140620$ / $I:(DE-82)130000_20140620$},
      typ          = {PUB:(DE-HGF)11},
      doi          = {10.18154/RWTH-2026-00768},
      url          = {https://publications.rwth-aachen.de/record/1026256},
}