% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@PHDTHESIS{Hilger:59262,
author = {Hilger, Florian Erich},
othercontributors = {Ney, Hermann},
title = {{Q}uantile based histogram equalization for noise robust
speech recognition},
address = {Aachen},
publisher = {Publikationsserver der RWTH Aachen University},
reportid = {RWTH-CONV-121065},
pages = {X, 150 S. : graph. Darst.},
year = {2004},
note = {Prüfungsjahr: 2004. - Publikationsjahr: 2005; Aachen,
Techn. Hochsch., Diss., 2004},
abstract = {In many practical applications automatic speech recognition
systems have to work in adverse acoustic environment
conditions. Automatic systems are much more sensitive to the
variabilities of the acoustic signal than humans. Whenever
noise causes a mismatch between the distribution of the
training data and the data that is to be recognized, the
recognition word error rates will increase. Quantile based
histogram equalization is a method to increase the noise
robustness. During the feature extraction it reduces an
eventual mismatch between the recognition and training data
distributions with a non-linear parametric transformation
function. This work describes the algorithm and presents
detailed experimental evaluations. Based on the quantiles of
the cumulative distributions, the parameters of the
transformation functions can be reliably estimated from
small amounts of data. The approach is integrated into a
modified Mel cepstrum feature extraction, in which the
logarithm is replaced by a root function to further increase
the noise robustness. The actual transformation that is
proposed in this work consists of two steps. First, a power
function transformation is applied to each output of the
Mel-scaled filter-bank, then neighboring filter are channels
combined linearly. To investigate the genericity of the
approach and the proposed setup experimental evaluations
have been carried out with different speech recognition
systems, on several databases with different levels of
complexity, ranging from digit strings (SpeechDat Car) to
larger vocabulary isolated word (Car Navigation) and
continuous speech recognition tasks (Wall Street Journal
with added noise). Consistent recognition results were
observed on all databases. The modified feature extraction,
with the root instead of the logarithm, already outperformed
the original baseline on noisy data. Filter channel specific
quantile equalization always improved these results,
yielding relative improvements between of $5\%$ and $50\%,$
depending on the recognition task and the mismatch of the
data. Finally, the combination of neighboring filter
channels was able to reduce the error rates somewhat
further, especially if the noise, like car noise, was band
limited.},
keywords = {Automatische Spracherkennung (SWD) / Störgeräusch (SWD) /
Robustheit (SWD) / Merkmalsextraktion (SWD) / Histogramm
(SWD) / Quantil (SWD)},
cin = {100000},
ddc = {004},
cid = {$I:(DE-82)100000_20140620$},
typ = {PUB:(DE-HGF)11},
urn = {urn:nbn:de:hbz:82-20050567},
doi = {10.18154/RWTH-CONV-121065},
url = {https://publications.rwth-aachen.de/record/59262},
}