h1

h2

h3

h4

h5
h6
% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@PHDTHESIS{Nuhn:772331,
      author       = {Nuhn, Malte},
      othercontributors = {Ney, Hermann Josef and Juan, Alfons},
      title        = {{U}nsupervised training with applications in natural
                      language processing},
      school       = {RWTH Aachen University},
      type         = {Dissertation},
      address      = {Aachen},
      reportid     = {RWTH-2019-10638},
      pages        = {1 Online-Ressource (xiii, 139 Seiten) : Illustrationen,
                      Diagramme},
      year         = {2019},
      note         = {Veröffentlicht auf dem Publikationsserver der RWTH Aachen
                      University 2020; Dissertation, RWTH Aachen University, 2019},
      abstract     = {The state-of-the-art algorithms for various natural
                      language processing tasks require large amounts of labeled
                      training data. At the same time, obtaining labeled data of
                      high quality is often the most costly step in setting up
                      natural language processing systems. Opposed to this,
                      unlabeled data is much cheaper to obtain and available in
                      larger amounts. Currently, only few training algorithms make
                      use of unlabeled data. In practice, training with only
                      unlabeled data is not performed at all. In this thesis, we
                      study how unlabeled data can be used to train a variety of
                      models used in natural language processing. In particular,
                      we study models applicable to solving substitution ciphers,
                      spelling correction, and machine translation. This thesis
                      lays the groundwork for unsupervised training by presenting
                      and analyzing the corresponding models and unsupervised
                      training problems in a consistent manner. We show that the
                      unsupervised training problem that occurs when breaking
                      one-to-one substitution ciphers is equivalent to the
                      quadratic assignment problem (QAP) if a bigram language
                      model is incorporated and therefore NP-hard. Based on this
                      analysis, we present an effective algorithm for unsupervised
                      training for deterministic substitutions. In the case of
                      English one-to-one substitution ciphers, we show that our
                      novel algorithm achieves results close to human performance,
                      as presented in [Shannon 49]. Also, with this algorithm, we
                      present, to the best of our knowledge, the first automatic
                      decipherment of the second part of the Beale ciphers.
                      Further, for the task of spelling correction, we work out
                      the details of the EM algorithm [Dempster $\&$ Laird+ 77]
                      and experimentally show that the error rates achieved using
                      purely unsupervised training reach those of supervised
                      training. For handling large vocabularies, we introduce a
                      novel model initialization as well as multiple training
                      procedures that significantly speed up training without
                      hurting the performance of the resulting models
                      significantly. By incorporating an alignment model, we
                      further extend this model such that it can be applied to the
                      task of machine translation. We show that the true lexical
                      and alignment model parameters can be learned without any
                      labeled data: We experimentally show that the corresponding
                      likelihood function attains its maximum for the true model
                      parameters if a sufficient amount of unlabeled data is
                      available. Further, for the problem of spelling correction
                      with symbol substitutions and local swaps, we also show
                      experimentally that the performance achieved with purely
                      unsupervised EM training reaches that of supervised
                      training. Finally, using the methods developed in this
                      thesis, we present results on an unsupervised training task
                      for machine translation with a ten times larger vocabulary
                      than that of tasks investigated in previous work.},
      cin          = {122010 / 120000},
      ddc          = {004},
      cid          = {$I:(DE-82)122010_20140620$ / $I:(DE-82)120000_20140620$},
      typ          = {PUB:(DE-HGF)11},
      doi          = {10.18154/RWTH-2019-10638},
      url          = {https://publications.rwth-aachen.de/record/772331},
}