% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@PHDTHESIS{Nolden:699368,
author = {Nolden, David},
othercontributors = {Ney, Hermann and Gauvain, Jean-Luc},
title = {{P}rogress in {D}ecoding for {L}arge {V}ocabulary
{C}ontinuous {S}peech {R}ecognition},
school = {RWTH Aachen University},
type = {Dissertation},
address = {Aachen},
reportid = {RWTH-2017-08247},
pages = {1 Online-Ressource (8,iv, 208 Seiten) : Illustrationen,
Diagramme},
year = {2017},
note = {Veröffentlicht auf dem Publikationsserver der RWTH Aachen
University; Dissertation, RWTH Aachen University, 2017},
abstract = {The subject of this thesis is the search problem in
automatic speech recognition. The search is responsible for
matching an incoming acoustic speech signal with statistical
speech models, in order to find the word sequence which is
most most likely to have been spoken. In principle, it is
necessary to enumerate all possible word sequences, to
compute a likelihood for each word sequence according to the
models, and to select the best one. When the vocabulary is
large, then such a straightforward approach is not feasible,
due to the huge number of possible word sequences; instead,
state-of-the-art approaches transform the models into
compact search network structures, match the input signal
time-synchronously against the search network, and exploit
recombination and pruning to limit the search effort. In
this work, we analyze existing search strategies, combine
them, and introduce novel extensions which further improve
their efficiency and precision. We give a holistic overview
of the ingredients required for efficient search. We
investigate how the search network should be structured, and
how the search space can be managed most efficiently.
Normally, the search space depends on the language model; we
introduce a novel search space management algorithm, which
partially decouples the search effort from the language
model’s order. We introduce a novel framework which
explains why pruning is possible, and which helps motivating
and finding effective pruning methods; it establishes a
direct relationship between pruning and recombination. Then
we analyze common pruning methods regarding effectiveness
and motivation, introduce novel pruning methods, and propose
improved look-ahead techniques which make the pruning more
effective. Pruning induces a certain amount of search
errors, and usually a specific trade-off between precision
and efficiency needs to be selected manually. In a last
step, we show how search errors can be detected, and derive
a search algorithm which allows efficient search without
search errors. All methods are evaluated experimentally on a
variety of state-of-the-art speech recognition tasks. On all
tasks, a considerable reduction of the search space is
achieved using the new methods, and overall, a speedup of
the core search by a factor of more than 10 is achieved in
comparison to the baseline method.},
cin = {120000 / 122010},
ddc = {004},
cid = {$I:(DE-82)120000_20140620$ / $I:(DE-82)122010_20140620$},
typ = {PUB:(DE-HGF)11},
doi = {10.18154/RWTH-2017-08247},
url = {https://publications.rwth-aachen.de/record/699368},
}