% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@PHDTHESIS{Zurstraen:1020075,
author = {Zurstraßen, Niko},
othercontributors = {Leupers, Rainer and Jung, Matthias},
title = {{E}nhancing full-system simulation: techniques for
maximizing performance and accuracy},
school = {Rheinisch-Westfälische Technische Hochschule Aachen},
type = {Dissertation},
address = {Aachen},
publisher = {RWTH Aachen University},
reportid = {RWTH-2025-08754},
pages = {1 Online-Ressource : Illustrationen},
year = {2025},
note = {Veröffentlicht auf dem Publikationsserver der RWTH Aachen
University; Dissertation, Rheinisch-Westfälische Technische
Hochschule Aachen, 2025},
abstract = {Simulating compute systems by pure virtual means has become
a cornerstone of modern hardware and software development.
These virtual twins of computers, also referred to as
Full-System Simulators (FSSs), enable a plethora of unique
use cases. As virtual development platforms, FSSs enable the
development of software long before any hardware prototypes
are available, speeding up the time to market. When also
incorporating microarchitectural details, FSSs facilitate
early design space exploration by estimating a system’s
characteristics (performance, power consumption, cache
hit/miss rates, etc.).Ultimately, all use cases share one
thing in common: the FSS should be as fast as possible while
still providing the required accuracy. As a software
development platform, the performance directly affects the
developer’s productivity. When conducting design space
exploration, the performance determines the number of
explorable designs. Moreover, and much like its real-world
counterpart, a FSS can never be fast enough. In order to
meet the ever-increasing demand for more performance, this
thesis focuses on the development of methods that accelerate
the execution of simulations. Similar to the phases of a
compiler, many of the here presented challenges are
orthogonal but contribute to the same goal. More
specifically, this thesis first presents a parallelized
version of the popular open-source FSS gem5 (Chapter 3).By
leveraging modern multi-core systems, the parallelized
simulator attains speedups of up to 24.7× when simulating
multi-threaded benchmarks. Based on this, analytical models
for performance and accuracy prediction are presented
(Chapter 4).This is followed by introducing new methods for
the fast simulation of (vector) floating point instructions
(Chapter 5).By using the host FPU in a sophisticated way,
individual instructions see speedups of up to 5× compared
to a soft float implementation. Lastly, a global, static
register allocation for Dynamic Binary Translators (DBTs) is
presented (Chapter 6).Compared to local register allocation
methods, as used by the state-of-the-art FSS QEMU, the
method of this thesis achieves speedups of up to 1.4×},
cin = {611910},
ddc = {621.3},
cid = {$I:(DE-82)611910_20140620$},
typ = {PUB:(DE-HGF)11},
doi = {10.18154/RWTH-2025-08754},
url = {https://publications.rwth-aachen.de/record/1020075},
}