% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@PHDTHESIS{Kontogianni:844228,
author = {Kontogianni, Theodora},
othercontributors = {Leibe, Bastian and Schindler, Konrad},
title = {{O}bject discovery, interactive and 3{D} segmentation for
large-scale computer vision tasks},
school = {RWTH Aachen University},
type = {Dissertation},
address = {Aachen},
publisher = {RWTH Aachen University},
reportid = {RWTH-2022-03753},
pages = {1 Online-Ressource : Illustrationen},
year = {2021},
note = {Veröffentlicht auf dem Publikationsserver der RWTH Aachen
University 2022; Dissertation, RWTH Aachen University, 2021},
abstract = {Computer vision has made tremendous leaps during the past
decade. One of the key factors behind this growth is the
vast amount of data that we can generate today: millions of
pictures are shared online daily and new specialized sensors
allow to easily capture 3D data. Along with the recent
advances in deep learning and increased availability of
computational power, it is now possible to take advantage of
these large amounts of high-quality data. As a result,
computer vision achieved impressive performance gains across
numerous fields and applications. However, the increased
amount of available data also introduces new challenges. To
exploit the large body of available data, we either need
efficient unsupervised algorithms to learn patterns from
unlabeled data, or we require efficient labeling tools to
allow the creation of large-scale labeled datasets. These
are essential for the success of most deep learning models.
In this thesis, we deal with issues arising from these
different aspects of computer vision: unsupervised
algorithms for landmark recognition, fully-supervised
methods for semantic segmentation on large-scale 3D point
clouds and interactive object segmentation for out-of-domain
dataset labeling. More specifically, the main contributions
of this thesis are organized into three parts, each one
covering an individual computer vision topic: In the first
part, we address the problem of object discovery in time -
varying, large - scale image collections. We propose a novel
tree structure that closely approximates the Minimum
Spanning Tree and present an efficient construction approach
to incrementally update the tree structure when new data is
added to the image database. This happens either in
online-streaming or batch form. Our proposed tree structure
is created in a local neighborhood of the matching graph
during image retrieval and can be efficiently updated
whenever the image database is extended. We show how our
tree structure can be incorporated in existing clustering
approaches such as Single-Link and Iconoid Shift for
efficient large-scale object discovery in image collections.
In the second part of the thesis, we focus on defining novel
3D convolutional and recurrent operators over unstructured
3D point clouds. The goal is to learn point representations
for the task of 3D semantic segmentation. The recurrent
consolidation unit layer operates on multi-scale and grid
neighborhoods along and allows our model to learn long-range
dependencies. Additionally, we introduce two types of local
neighborhoods for each 3D point that encode local geometry
to facilitate the definition and use of convolutions on 3D
point clouds. Finally, in the third part, we address the
task interactive object segmentation. Aided by an algorithm,
a user segments an object mask in a given image by clicking
inside or outside the object. We present a method that
significantly reduces the number of required user clicks
compared to previous work. In particular, we look at
out-of-domain settings where the test datasets are
significantly different from the datasets used to train our
deep model. We propose to treat user corrections as sparse
supervision to adapt our model parameters on-the-fly. Our
adaptive method can significantly reduce the number of
required clicks to segment an object and handle distribution
shifts from small to large, specialize to a new class of
objects introduced during test time, and can even handle
large domain changes from commercial images to medical and
aerial data.},
cin = {123710 / 120000},
ddc = {004},
cid = {$I:(DE-82)123710_20200205$ / $I:(DE-82)120000_20140620$},
typ = {PUB:(DE-HGF)11},
doi = {10.18154/RWTH-2022-03753},
url = {https://publications.rwth-aachen.de/record/844228},
}