% IMPORTANT: The following is UTF-8 encoded. This means that in the presence % of non-ASCII characters, it will not work with BibTeX 0.99 or older. % Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or % “biber”. @PHDTHESIS{Freitag:681715, author = {Freitag, Markus}, othercontributors = {Ney, Hermann and Yvon, Francois}, title = {{I}nvestigations on machine translation system combination}, school = {RWTH Aachen University}, type = {Dissertation}, address = {Aachen}, reportid = {RWTH-2017-00377}, pages = {1 Online-Ressource (x, 116 Seiten) : Illustrationen, Diagramme}, year = {2016}, note = {Veröffentlicht auf dem Publikationsserver der RWTH Aachen University 2017; Dissertation, RWTH Aachen University, 2016}, abstract = {Machine translation is a task in the field of natural language processing whose objective is to translate documents from one human language into another human language without any human interaction. There has been extensive research in the field of machine translation and many different machine translation approaches have emerged. Current machine translation systems are based on dif- ferent paradigms, such as e.g. phrases, phrases with gaps, hand-written rules, syntactical rules or neural networks. All approaches have been proven to perform well on several international evaluation campaigns, but no one has emerged as the superior approach. In this thesis, we investigate the combination of different machine translation approaches to benefit from all of them.The combination of outputs from multiple machine translation systems has been successfully applied in state-of-the-art machine translation evaluations for several years. System combination is a reliable method to combine the benefits of different machine translation systems into one single translation output. System combination relies on the concept of majority voting and the assumption that different machine translation engines produce different errors at different positions, but the majority agrees on a correct translation. Confusion network decoding has emerged as one of the the most suc- cessful approaches in combining machine translation outputs. The main goal of this thesis is to develop novel methods to improve the translation quality of confusion network system combination. In this thesis, we introduce a novel system combination implementation which has been made available as open-source toolkit to the research community. We extend previous invented approaches by the addition of several models and show that our methods produce better or similar translation results as the previous invented approaches. Moreover, compared to one single system combination approach, our implementation is significantly better in several translation tasks. On top of this high-level baseline, we extend the confusion network approach with an additional model learned by a neural network. The system combination output is typically a combination of the best available system engines and ignores the output of weaker translation systems, although they could be helpful in some situations. We show that our novel model also takes weaker systems into account and detects the positions where the weaker systems help to improve the quality of the combined translation. One of the most important steps in system combination is the pairwise alignment process between the different input systems. We introduce a novel alignment algorithm which is based on the source sentence and improves the translation quality of our combined translation. In addition to automatic evaluations, we also let humans evaluate our novel approach. Furthermore, we investigate the effect of decoding direction in the commonly used phrase-based and hierarchical phrase-based machine translation approaches. We show how to benefit from system combination and combine different machine translation setups that are based on different decoding directions. In addition, we investigate techniques to combine the different configurations in an earlier stage, e.g. after the alignment training or the phrase extraction step.Finally, we present our recent evaluation results that were obtained with our previously invented methods. We participated in the most recent international evaluation campaigns and demonstrate that our methods outperform the translation setups of all participating top-ranked international research labs in several language pairs.}, cin = {122010 / 120000}, ddc = {004}, cid = {$I:(DE-82)122010_20140620$ / $I:(DE-82)120000_20140620$}, typ = {PUB:(DE-HGF)11}, urn = {urn:nbn:de:hbz:82-rwth-2017-003774}, doi = {10.18154/RWTH-2017-00377}, url = {https://publications.rwth-aachen.de/record/681715}, }