% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@PHDTHESIS{Peter:801694,
author = {Peter, Jan-Thorsten},
othercontributors = {Ney, Hermann and van Genabith, Josef},
title = {{A}n exploration of alignment concepts to bridge the gap
between phrase-based and neural machine translation},
school = {RWTH Aachen University},
type = {Dissertation},
address = {Aachen},
reportid = {RWTH-2020-09034},
pages = {1 Online-Ressource (xi, 110 Seiten) : Illustrationen,
Diagramme},
year = {2020},
note = {Veröffentlicht auf dem Publikationsserver der RWTH Aachen
University; Dissertation, RWTH Aachen University, 2020},
abstract = {Machine translation, the task of automatically translating
text from one natural language into another, has seen
massive changes in recent years. After phrase-based systems
represented the state of the art for over a decade,
advancements were made in the structure of neural networks
and computational power. These advancements made it possible
to build neural machine translation systems which first
improved and later outperformed phrase-based systems. These
two approaches have their strength in different areas. The
well-known phrase-based systems allow fast translations on
CPU that can easily be explained by examining the
translation table. In contrast, neural machine translation
produces more fluent translations and is more robust to
small changes in the provided input. This thesis aims to
improve both systems by combining their advantages. The
first part of this thesis focuses on investigating the
integration of feed-forward neural models into phrase-based
systems. Small changes in the input of a phrase-based system
can turn an event that was seen in the training data into an
unseen event. Neural network models are by design able to
handle such cases due to the continuous space representation
of the input, whereas phrase-based systems are forced to
fall back to shorter phrases. This means a loss of knowledge
about the local context which results in a degradation of
the translation quality. We combine the flexibility provided
by feed-forward neural networks with phrase-based systems
while gaining a significant improvement over the
phrase-based baseline systems. We use feed-forward networks
since they are conceptually simple and computationally fast.
Commonly, their structure only utilizes local source and
target context. Due to this structure, they cannot capture
long-distance dependencies. We improve the performance of
feed-forward neural networks by efficiently incorporating
long-distance dependencies into their structure by using a
bag-of-words input. The second part of this thesis focuses
on the pure neural machine translation approach using the
encoder-decoder model with an attention mechanism. This
mechanism corresponds indirectly to a soft alignment. At
each translation step, this model relies only on its
previous internal state and the current decoder position to
compute the attention weights. There is no direct feedback
from the previously used attention. Inspired by hidden
Markov models where the prediction of the currently-aligned
position depends also on the previously-aligned position, we
improve the attention model by adding direct feedback from
previously-used attention to improve the overall model
performance. Additionally, we utilize word alignments for
neural networks to guide the neural network during training.
By incorporating the alignment as an additional cost
function, the network performs better as our experiments
show. Even though the state-of-the-art neural models do not
require word alignments anymore, there are still
applications that benefit from good alignments. These
include the visualization of parallel sentences, the
creation of dictionaries, the automatic segmentation of long
parallel sentences and the above-mentioned usage during
neural network training. We present a way to apply neural
models to create word alignments that improve over word
alignments trained with IBM and hidden Markov models. These
techniques are evaluated on various large-scale translation
tasks of public-evaluation campaigns. Applying new methods
with usually complex workflows to new translation tasks is a
cumbersome and error-prone exercise. We present a workflow
manager, which is developed as part of this thesis to
simplify this task and enable an easier knowledge transfer.},
cin = {122010 / 120000},
ddc = {004},
cid = {$I:(DE-82)122010_20140620$ / $I:(DE-82)120000_20140620$},
typ = {PUB:(DE-HGF)11},
doi = {10.18154/RWTH-2020-09034},
url = {https://publications.rwth-aachen.de/record/801694},
}