% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@PHDTHESIS{Cheng:1020925,
author = {Cheng, Mingbo},
othercontributors = {Berlage, Thomas Leo and Costa, Ivan G. and Decker, Stefan
Josef},
title = {{C}omputational integration and trajectory inference of
single cell multi-modal data},
school = {RWTH Aachen University},
type = {Dissertation},
address = {Aachen},
publisher = {RWTH Aachen University},
reportid = {RWTH-2025-09334},
pages = {1 Online-Ressource : Illustrationen},
year = {2025},
note = {Veröffentlicht auf dem Publikationsserver der RWTH Aachen
University; Dissertation, RWTH Aachen University, 2025},
abstract = {Single-cell analysis provides a new approach to inspect
biological processes at a single-cell resolution. Recently,
new sequencing protocols have been developed to
simultaneously profile the transcriptome, epigenome, and
proteome features in single cells. With these technologies,
researchers can interpret two or more biological phenomena
such as gene regulation and transcription factor binding
events at the same time improving the ability to draw causal
relationships between these distinct molecular mechanisms.
However, a major challenge in single-cell multimodal
analysis is that the feature spaces of distinct modalities
are extremely different. The differences in feature sizes,
sparsity and distributions pose a significant challenge in
utilizing the features to obtain a shared latent space
across all modalities for downstream analysis. Although
several methods have been proposed to address this issue,
these methods either require prior knowledge to set
parameters or lack scalability. Moreover, several methods
only work for specific modality types or do not allow the
interpretation of inferred latent components. Another
critical issue when dealing with single-cell multimodal data
is to infer trajectories to capture cell lineage
development. Many methods have been developed to infer
trajectories for single-cell data, but few are designed for
single-cell multimodal analysis, which can offer molecular
information on gene expression and gene regulation at the
same cells. Moreover, current approaches have been only used
and applied to simpler cell differentiation trees and are
unlikely to scale to large trees. While cellular graphs are
widely used as representations of single cell data, most
methods only use signals on nodes (cells), but do not
consider signals associated to edges (differentiation
events) between cells. In this thesis, we propose MOJITOO
and PHLOWER, accounting respectively for single-cell
multimodal integration and trajectory inference. MOJITOO
explores Canonical Correlation Analysis (CCA) for an
effective and efficient integration of arbitrary modalities
into a common joint embedding. Moreover, MOJITOO has few
free parameters and allows interpretation, i.e. associates
latent spaces to variables. We performed comprehensive
benchmarking on multimodal single cell data, which evaluated
the MOJITOO properties in the preservation of information in
original modalities and in downstream tasks such as distance
estimation and clustering. This indicates that MOJITOO
performs quite well comparing to competing approaches and
has overall lower computational requirements among evaluated
methods. In a case study with blood cells, we demonstrate
that the latent space obtained by MOJITOO can capture major
blood cell types and demonstrate the relation of latent
dimensions to known molecular markers. Overall, these
results demonstrate that MOJITOO is a powerful computational
approach in biological studies for single-cell multimodal
integration analysis. PHLOWER is a novel trajectory
inference model for multimodal single cell data. It uses
simplicial complex and Hodge Laplacian (HL) decomposition to
find embedding at edge/trajectories spaces. A comprehensive
benchmarking on complex cell differentiation trees indicates
that PHLOWER has the best properties in the recovery of tree
topologies and associating cells to the trees than
state-of-the-art methods. Moreover, we explore the power of
multimodal analysis using PHLOWER on large-scale single-cell
multimodal data with transcriptome and epigenome in a kidney
organoid time course. PHLOWER infers trajectories related to
major kidney cells detected in the organoids. Moreover, it
detects transcription factors that regulate lineage gene
expression. Our analyses shed novel light on mechanisms of
kidney lineage development. Altogether, these results
demonstrate that PHLOWER is a powerful computational
approach in biological studies for single-cell multimodal
trajectory inference analysis.},
cin = {122620},
ddc = {004},
cid = {$I:(DE-82)122620_20140620$},
typ = {PUB:(DE-HGF)11},
doi = {10.18154/RWTH-2025-09334},
url = {https://publications.rwth-aachen.de/record/1020925},
}