% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@PHDTHESIS{EusseGiraldo:782269,
author = {Eusse Giraldo, Juan Fernando},
othercontributors = {Leupers, Rainer and Blume, Holger},
title = {{ASIP} algorithmic/architectural co-exploration based on
high level performance estimation},
school = {Rheinisch-Westfälische Technische Hochschule Aachen},
type = {Dissertation},
address = {Aachen},
reportid = {RWTH-2020-01767},
pages = {1 Online-Ressource (iv, 196 Seiten) : Illustrationen,
Diagramme},
year = {2019},
note = {Veröffentlicht auf dem Publikationsserver der RWTH Aachen
University 2020; Dissertation, Rheinisch-Westfälische
Technische Hochschule Aachen, 2019},
abstract = {The work presented in the thesis aims to improve
state-of-the-art methodologies for Application Specific
Instruction Set Processor (ASIP) design. ASIPs are
customized to efficiently execute an application, either in
terms of time, energy or power consumption [1,3,5,7,11].
Existing design methodologies are based on an iterative
process, in which the desired performance metrics are
achieved mostly by a combination of clever algorithmic
transformations, and careful tailoring of a processor
instruction set and its microarchitecture. Although current
methodologies semi-automate some of the steps required to
create a tailored processor, Algorithmic/Architectural
Co-Exploration remains subject to the designer’s ingenuity
and experience [4,8,10,12,13]. Moreover, quantitative
validation of the ASIP performance is only possible in a
late design stage, where not only an algorithm/architecture
pair has been fixed and implemented, but also the complete
software environment for the processor is available
[6,9,11]. This has an impact on design times, as
intermediate designs that do not satisfy the application
requirements are rendered invalid. While creating an ASIP,
several time consuming iterations are usually needed before
the application specification is met. This raises the need
for tools that enable designers to: (i) perform
Algorithmic/Architectural Co-Exploration before an
implementation of the actual ASIP is done; while (ii)
providing early feedback about the achievable performance
gains and potential bottlenecks for the tailored
architecture. The thesis strives to reduce the number of
design iterations that are needed to create an ASIP,
therefore improving the state-of-the-art methodologies. To
achieve this goal, a set of tools and design flows has been
created and evaluated. A highly configurable profiling
infrastructure conforms the core of the Multi-Grained
Profiling (MGP) approach. The proposed flow provides the
means to gain insight into the algorithms that compose the
application specification, for which the processor must be
tailored. The generated information is kept at the source
code level, and its degree of detail is configurable
according to the ASIP design stage. The generated output can
then be used directly by engineers to perform algorithmic
exploration, or by other tools to predict potential
performance gains from envisioned customization. Such is the
case of the presented pre-architectural datapath performance
estimation, which combines profiling information with a
parameterization of an abstract processor model. The output
of such tool consists of an approximation of the clock
cycles that the given application would consume on the input
processor model. Given the accuracy and agility of the
estimation engine to predict the consumed clock cycles, its
use within Design Space Exploration (DSE) tools is also
explored within the work presented in the thesis. Careful
ASIP datapath design must be accompanied with a proper
memory sub-system that is able to efficiently access data.
Therefore, an extension to the abstract processor model is
performed, and a framework to conduct concurrent memory
sub-system and application optimization is created. This
framework is capable of: (i) distribute application data to
optimally utilize the memory hierarchy; (ii) estimate the
amount of cycles that the application spends accessing data;
and (iii) provide visual feedback about the criticality of
each data object. These outputs not only ensure that the
designer knows how much time does the ASIP need to access
data, but also accounts for an optimized utilization of the
memory sub-system represented by the abstract model. The
work also shows that by using the proposed tools, engineers
are enabled to perform Algorithmic/Architectural
Co-Exploration, instruction set design, and processor class
selection early in the design cycle. This is illustrated
through two case studies, in which ASIPs targeted for
applications originating from the computer vision [2] and
communication domains [14] are designed and implemented.
Overall, the work contributes to reduce the total number of
design iterations needed to create an ASIP, while keeping
processor architects at the center of the design process. By
using the proposed tools, a designer would be able to
rapidly get insights on the underlying algorithms for an
application specification, perform basic optimizations and
simplifications on the algorithm, determine the best
architecture to execute it, and assess the impact of
envisioned datapath and memory architecture
customization.Bibliography / Literaturverzeichnis[1] C.
Galuzzi and K. Bertels, “The Instruction-Set Extension
Problem: A Survey,” ACM Trans. Reconfigurable Technol.
Syst., vol. 4, no. 2, pp. 18:1-18:28, May 2011.[2] C. Grana,
D. Borghesani, and R. Cucchiara, “Optimized Block-Based
Connected Components Labeling With Decision Trees,” Image
Processing, IEEE Transactions on, vol. 19, no. 6, pp.
1596-1609, 2010.[3] M. Gries and K. Keutzer, Building ASIPs:
The Mescal Methodology, 1st ed. Springer Publishing Company,
Incorporated, 2010.[4] J. Großschädl, P. Ienne, L. Pozzi,
S. Tillich, and A. K. Verma, “Combining Algorithm
Exploration with Instruction Set Design: A Case Study in
Elliptic Curve Cryptography,” in Proceedings of the
Conference on Design, Automation and Test in Europe:
Proceedings, ser. DATE ’06. 3001 Leuven, Belgium, Belgium:
European Design and Automation Association, 2006, pp.
218-223.[5] P. Ienne and R. Leupers, Customizable Embedded
Processors: Design Technologies and Applications. San
Francisco, CA, USA: Morgan Kaufmann Publishers Inc.,
2007.[6] R. Jordans, “Instruction-Set Architecture
Synthesis for VLIW Processors,” Ph.D. dissertation,
Technical University of Eindhoven, 2015.[7] L. Józ´wiak,
N. Nedjah, and M. Figueroa, “Modern Development Methods
and Tools for Embedded Reconfigurable Systems: A Survey,”
Integr. VLSI J., vol. 43, no. 1, pp. 1-33, January 2010.[8]
D. Kammler, “Memory architectures for ASIPs,” Ph.D.
dissertation, RWTH Aachen University, 2012.[9] K. Karuri and
R. Leupers, Application Analysis Tools for ASIP Design:
Application Profiling and Instruction-set Customization,
1st ed. Springer Publishing Company, Incorporated, 2011.[10]
Y. Meng, “Algorithm/Architecture Design Space
Co-exploration for Energy Efficient Wireless Communications
Systems,” Ph.D. dissertation, University of California at
Santa Barbara, Santa Barbara, CA, USA, 2006, aAI3233117.[11]
P. Mishra and N. Dutt, Processor Description Languages. San
Francisco, CA, USA: Morgan Kaufmann Publishers Inc.,
2008.[12] M. Shoaib, N. K. Jha, and N. Verma,
“Algorithm-Driven Architectural Design Space Exploration
of Domain-Specific Medical-Sensor Processors,” IEEE
Transactions on Very Large Scale Integration (VLSI) Systems,
vol. 21, no. 10, pp. 1849-1862, Oct 2013.[13] E. Tasdemir,
G. Kappen, and T. G. Noll, “Potential of Using Block
Floating Point Arithmetic in ASIP-Based GNSS-Receivers,”
in ASAP 2010 - 21st IEEE International Conference on
Application-specific Systems, Architectures and Processors,
July 2010, pp. 293-296.[14] G. Wang, P. karanjekar, and G.
Ascheid, “Beamforming with Time-Delay Compensation for 60
GHz MIMO Frequency-Selective Channels,” in Proceedings of
IEEE International Symposium on Personal, Indoor, and Mobile
Radio Communications (PIMRC), August 2015.},
cin = {611910},
ddc = {621.3},
cid = {$I:(DE-82)611910_20140620$},
typ = {PUB:(DE-HGF)11},
doi = {10.18154/RWTH-2020-01767},
url = {https://publications.rwth-aachen.de/record/782269},
}