% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@PHDTHESIS{Peitz:709157,
author = {Peitz, Stephan},
othercontributors = {Ney, Hermann and Allauzen, Alexandre},
title = {{G}enerative {T}raining and {S}moothing of {H}ierarchical
{P}hrase-{B}ased {T}ranslation {M}odels},
school = {RWTH Aachen University},
type = {Dissertation},
address = {Aachen},
reportid = {RWTH-2017-09742},
pages = {1 Online-Ressource (xi, 111 Seiten) : Illustrationen,
Diagramme},
year = {2017},
note = {Veröffentlicht auf dem Publikationsserver der RWTH Aachen
University; Dissertation, RWTH Aachen University, 2017},
abstract = {Hierarchical phrase-based translation is a common machine
translation approach for translating between languages with
signicantly dierent word order. The focus of the first part
of this thesis is set on smoothing and training of the
translation models used in hierarchical translation.
Additionally, we present an improved implementation of the
search algorithm and show that our implementation is
competitive compared to other state-of-the-art hierarchical
phrase-based translationengines. Within the second part of
this work, we apply hierarchical phrase-basedtranslation in
the context of spoken language translation. In the
state-of-the-art hierarchical translation model extraction
process, translation rules and their corresponding
translation probabilities are obtained from word-aligned
training data by applying simple heuristics. A common issue
is that even if a large set of training data is provided,
the resulting translation model may suffer from data
sparseness. Smoothing is an approach to remedy this problem
and is well-known from othernatural language processing
tasks (e.g. languagemodeling). The goal of smoothing applied
in the scope of machine translation is to model rarely seen
translation rules better. In this thesis, we investigate and
compare different smoothing techniques for hierarchical
phrase-based translation.Furthermore, the extraction and
translation processes are two separated steps. Therefore,
the extraction does not take into account whether the
obtained translation rules are actually needed in the
translation process. To learn whether a translation rule is
relevant for the translation process, we pursue the approach
of force-decoding the training data. Given a sentence pair
of the training data, the translation of the source sentence
is constrained to produce the corresponding targetsentence.
The applied translation rules are then determined and the
corresponding translation probabilities re-estimated. In
order to be able to translate a large set of training data,
an efficient and fast framework is needed. In this work, we
introduce such a framework for re-estimating hierarchical
translation models. This approach enables us to obtain
smaller translation models while simultaneously improving
the translation quality. We further compare our proposed
schemewith another state-of-the-art translation model
training approach, namely discriminative training, on a
large-scale Chinese-to-English translation task.Spoken
language translation is the task of translating
automatically transcribed speech. Since most automatic
speech recognition systems provide transcriptions without
punctuation marks and case information, this information has
to be re-introduced before the actual translation takes
place. In this work, we show that performing punctuation
prediction and re-casing by applying a machinetranslation
system helps to improve the translation quality. In
particular, we propose to apply hierarchical translation
rather than phrase-based translation for this task. Finally,
experiments were conducted on a large-scale
English-to-French spoken language translation task.All
methods described in this thesis have been made freely
available to the research community as they were integrated
into the open-source translation toolkit Jane.},
cin = {120000 / 122010},
ddc = {004},
cid = {$I:(DE-82)120000_20140620$ / $I:(DE-82)122010_20140620$},
typ = {PUB:(DE-HGF)11},
doi = {10.18154/RWTH-2017-09742},
url = {https://publications.rwth-aachen.de/record/709157},
}