% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@PHDTHESIS{Mller:63728,
author = {Müller, Emmanuel Alexander},
othercontributors = {Seidl, Thomas},
title = {{E}fficient knowledge discovery in subspaces of high
dimensional databases},
address = {Aachen},
publisher = {Publikationsserver der RWTH Aachen University},
reportid = {RWTH-CONV-125151},
pages = {270 S. : graph. Darst.},
year = {2010},
note = {Zsfassung in engl. und dt. Sprache; Aachen, Techn.
Hochsch., Diss., 2010},
abstract = {In many recent applications such as sensor network
analysis, customer segmentation or gene expression analysis
tremendous amount of data is gathered. As collecting and
storing of data is cheap, users tend to record everything
they can. Thus, in today's applications for each object one
uses many attributes to provide as much information as
possible. However, the valuable knowledge to be learned out
of this information is hidden in subsets of the given
attributes. Considering any of these subspaces one expands
the search space significantly. This poses novel challenges
to data mining techniques which aim at extracting this
knowledge out of high dimensional databases. This work has
its focus on clustering as one of the main data mining
tasks. Clustering is an established technique for grouping
objects based on mutual similarity. As traditional
clustering approaches are unable to detect clusters hidden
in subspaces of high dimensional databases, recent subspace
clustering models have been proposed that detect groups of
similar objects in any subset of the given attributes.
However, as the number of possible subspaces scales
exponentially with the number of attributes, development of
efficient techniques is crucial for knowledge discovery in
subspaces of high dimensional databases. In this work we
propose both novel subspace clustering models aiming at high
quality results and efficient processing schemes for these
models. We start with novel subspace cluster definitions
ensuring the detection of clusters in arbitrary subspaces.
We highlight the general challenges of redundancy in recent
subspace clustering models and propose novel non-redundant
subspace clustering definitions. In this context, our aim is
to reduce result sizes to all and only novel knowledge by
optimizing the overall subspace clustering result. According
to these models not all subspace clusters are valuable for
the final result. Based on this general observation we
propose efficient processing schemes. Our novel algorithmic
solutions overcome efficiency problems caused by exhaustive
search of almost all subspace projections and costly
database access. We select only the most promising subspace
regions for efficient subspace clustering. Overall, our
techniques are scalable to large and high dimensional
databases providing only few but high quality subspace
clusters. Furthermore, as a general contribution to the
community we provide a systematic evaluation study on a
broad set of approaches. We show both efficiency and quality
characteristics of major paradigms. As major aspect for
sustained scientific research we ensure repeatability and
comparability for all of our empirical results. Our
evaluation framework is available as open source project and
provides a basis for future enhancements in this research
area. Thus, this thesis provides not only novel methods for
efficient cluster and also outlier detection in subspaces of
high dimensional data, but it is a fundamental basis for
repeatable comparison of recent data mining approaches.},
keywords = {Data Mining (SWD) / Wissensextraktion (SWD) /
Cluster-Analyse (SWD) / Dichtebasiertes Clusterverfahren
(SWD) / Ausreißer <Statistik> (SWD) / Evaluation (SWD) /
Open Source (SWD)},
cin = {122510 / 120000},
ddc = {004},
cid = {$I:(DE-82)122510_20140620$ / $I:(DE-82)120000_20140620$},
shelfmark = {H.2.8},
typ = {PUB:(DE-HGF)11},
urn = {urn:nbn:de:hbz:82-opus-33895},
url = {https://publications.rwth-aachen.de/record/63728},
}