% IMPORTANT: The following is UTF-8 encoded. This means that in the presence % of non-ASCII characters, it will not work with BibTeX 0.99 or older. % Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or % “biber”. @PHDTHESIS{Welten:1003572, author = {Welten, Sascha Martin}, othercontributors = {Decker, Stefan Josef and Kirsten, Toralf}, title = {{M}ethods for practical data sharing and decentralised analytics: an integrated approach}, school = {RWTH Aachen University}, type = {Dissertation}, address = {Aachen}, publisher = {RWTH Aachen University}, reportid = {RWTH-2025-01136}, pages = {1 Online-Ressource : Illustrationen}, year = {2025}, note = {Veröffentlicht auf dem Publikationsserver der RWTH Aachen University; Dissertation, RWTH Aachen University, 2025}, abstract = {Sharing data across organisational borders and domains has become indispensable for scientific progress and innovation, mainly due to the growing trend of Artificial Intelligence (AI) and data-driven approaches, which require large volumes of data for optimal performance. However, concerns about data privacy, security, and regulatory compliance often hinder effective data sharing. As a result, data remains siloed and largely inaccessible for research or industry purposes. Although numerous data-sharing concepts have been proposed, they often lack practical realisation and evaluation in real-life scenarios. This dissertation addresses these shortcomings by contributing an integrated framework that combines privacy-preserving data-sharing methods with Decentralised Analytics (DA). Based on current and emerging data-sharing policies and regulations, this dissertation first investigates and identifies several key requirements essential for data sharing. These include establishing trust, ensuring controlled data access, managing distributed data sources, and addressing issues related to data heterogeneity and utility. For each derived requirement, this work conceptualises, implements, and evaluates various proof of concepts, which are subsequently integrated into a lifecycle called ’DAOps’. This DevOps-inspired lifecycle offers a structured and seamless workflow for managing data analysis processes on shared data. To bring this lifecycle into operation and real-world application, a novel data-sharing platform called ’Platform for Analytics and Distributed Machine Learning for Enterprises’ (PADME) is developed, which implements the DAOps lifecycle. The last part of this dissertation covers the evaluation of PADME in its entirety across five research studies in healthcare and hydrology. The evaluation confirms the applicability and operational readiness of PADME in real-world research scenarios. The platform supports a broad spectrum of analysis types, from basic statistics to advanced Machine Learning (ML), while enabling multi-institutional collaborations and managing data with heterogeneous types and varying volumes. It adheres to established data management standards and addresses the increasing demand for ML applications by enabling decentralised model training. The findings demonstrate that the decentralised approaches achieve performance levels comparable to those developed with traditional centralised methods. This suggests that decentralised approaches offer a viable and privacy-preserving alternative to conventional data analysis techniques, which may lack privacy protection and encounter regulatory challenges. Additionally, the evaluation emphasises that common data schema standards and well-balanced data distributions are critical drivers for successful data sharing and DA. In conclusion, this dissertation contributes to the emerging need for data-sharing platforms and brings theoretical data-sharing concepts into practice. The research demonstrates the feasibility of data-driven research with DA through PADME and provides insights into how automated methods support systematic and secure data analysis. Ultimately, the outcomes of this dissertation fuel research collaborations, data-driven innovations, as well as privacy-preserving and data-sovereign sharing of data between stakeholders.}, cin = {124510 / 120000}, ddc = {004}, cid = {$I:(DE-82)124510_20160614$ / $I:(DE-82)120000_20140620$}, typ = {PUB:(DE-HGF)11}, doi = {10.18154/RWTH-2025-01136}, url = {https://publications.rwth-aachen.de/record/1003572}, }