2025
Miller, Catriona J; Golovina, Evgenija; Gokuladhas, Sreemol; Wicker, Jörg; Jacobson, Jessie C; O'Sullivan, Justin M
Unraveling ADHD: genes, co-occurring traits, and developmental dynamics Journal Article
In: Life Science Alliance, vol. 8, no. 5, 2025.
Abstract | Links | BibTeX | Altmetric | PlumX | Tags: bioinformatics, Biological Sciences, biomarkers, computational sustainability, machine learning
@article{miller2025unraveling,
title = {Unraveling ADHD: genes, co-occurring traits, and developmental dynamics},
author = {Catriona J Miller and Evgenija Golovina and Sreemol Gokuladhas and J\"{o}rg Wicker and Jessie C Jacobson and Justin M O\'Sullivan},
doi = {10.26508/lsa.202403029},
year = {2025},
date = {2025-02-25},
journal = {Life Science Alliance},
volume = {8},
number = {5},
abstract = {Attention-deficit/hyperactivity disorder (ADHD) is a heterogeneous neurodevelopmental condition with a high prevalence of co-occurring conditions, contributing to increased difficulty in long-term management. Genome-wide association studies have identified variants shared between ADHD and co-occurring psychiatric disorders; however, the genetic mechanisms are not fully understood. We integrated gene expression and spatial organization data into a two-sample Mendelian randomization study for putatively causal ADHD genes in fetal and adult cortical tissues. We identified four genes putatively causal for ADHD in cortical tissues (fetal: ST3GAL3, PTPRF, PIDD1; adult: ST3GAL3, TIE1). Protein{textendash}protein interaction databases seeded with the causal ADHD genes identified biological pathways linking these genes with conditions (e.g., rheumatoid arthritis) and biomarkers (e.g., lymphocyte counts) known to be associated with ADHD, but without previously shown genetic relationships. The analysis was repeated on adult liver tissue, where putatively causal ADHD gene ST3GAL3 was linked to cholesterol traits. This analysis provides insight into the tissue-dependent temporal relationships between ADHD, co-occurring traits, and biomarkers. Importantly, it delivers evidence for the genetic interplay between co-occurring conditions, both previously studied and unstudied, with ADHD.The multimorbid3D pipeline was created and run in Python (version 3.8.8). All visualizations and data analysis were performed in R (version 4.2.0) through RStudio (version 2022.02.2). Table S16 lists the datasets and software that have been used in our analyses. All scripts are available on GitHub (https://github.com/Catriona-Miller/ADHD_Co-occurring_Traits).Table S16. Software and datasets used for this analysis.Ethics statementEthics approval was obtained from the University of Auckland Human Participants Ethics Committee (Decoding SNPs in context, UAHPEC19373).},
keywords = {bioinformatics, Biological Sciences, biomarkers, computational sustainability, machine learning},
pubstate = {published},
tppubtype = {article}
}
2024
Hafner, Jasmin; Lorsbach, Tim; Schmidt, Sebastian; Brydon, Liam; Dost, Katharina; Zhang, Kunyang; Fenner, Kathrin; Wicker, Jörg
Advancements in Biotransformation Pathway Prediction: Enhancements, Datasets, and Novel Functionalities in enviPath Journal Article
In: Journal of Cheminformatics, vol. 16, no. 1, pp. 93, 2024, ISSN: 1758-2946.
Abstract | Links | BibTeX | Altmetric | PlumX | Tags: applicability domain, biodegradation, bioinformatics, cheminformatics, computational sustainability, enviPath, linked data, machine learning, multi-label classification, Process-based modeling
@article{hafner2023advancements,
title = {Advancements in Biotransformation Pathway Prediction: Enhancements, Datasets, and Novel Functionalities in enviPath},
author = {Jasmin Hafner and Tim Lorsbach and Sebastian Schmidt and Liam Brydon and Katharina Dost and Kunyang Zhang and Kathrin Fenner and J\"{o}rg Wicker},
url = {https://jcheminf.biomedcentral.com/articles/10.1186/s13321-024-00881-6
https://envipath.org},
doi = {10.1186/s13321-024-00881-6},
issn = {1758-2946},
year = {2024},
date = {2024-08-06},
urldate = {2024-08-06},
journal = {Journal of Cheminformatics},
volume = {16},
number = {1},
pages = {93},
abstract = {enviPath is a widely used database and prediction system for microbial biotransformation pathways of primarily xenobiotic compounds. Data and prediction system are freely available both via a web interface and a public REST API. Since its initial release in 2016, we extended the data available in enviPath and improved the performance of the prediction system and usability of the overall system. We now provide three diverse data sets, covering microbial biotransformation in different environments and under different experimental conditions. This also enabled developing a pathway prediction model that is applicable to a more diverse set of chemicals. In the prediction engine, we implemented a new evaluation tailored towards pathway prediction, which returns a more honest and holistic view on the performance. We also implemented a novel applicability domain algorithm, which allows the user to estimate how well the model will perform on their data. Finally, we improved the implementation to speed up the overall system and provide new functionality via a plugin system.
},
keywords = {applicability domain, biodegradation, bioinformatics, cheminformatics, computational sustainability, enviPath, linked data, machine learning, multi-label classification, Process-based modeling},
pubstate = {published},
tppubtype = {article}
}
2023
Miller, Catriona J; Golovina, Evgenija; Wicker, Jörg; Jacobson, Jessie C; O'Sullivan, Justin M
De novo network analysis reveals autism causal genes and developmental links to co-occurring traits Journal Article
In: Life Science Alliance, vol. 6, no. 10, 2023.
Abstract | Links | BibTeX | Altmetric | PlumX | Tags: autism, bioinformatics, genomics
@article{Miller2023denovo,
title = {De novo network analysis reveals autism causal genes and developmental links to co-occurring traits},
author = {Catriona J Miller and Evgenija Golovina and J\"{o}rg Wicker and Jessie C Jacobson and Justin M O\'Sullivan},
url = {https://www.medrxiv.org/content/10.1101/2023.04.24.23289060v1},
doi = {10.26508/lsa.202302142},
year = {2023},
date = {2023-08-08},
urldate = {2023-08-08},
journal = {Life Science Alliance},
volume = {6},
number = {10},
abstract = {Autism is a complex neurodevelopmental condition that manifests in various ways. Autism is often accompanied by other conditions, such as attention-deficit/hyperactivity disorder and schizophrenia, which can complicate diagnosis and management. Although research has investigated the role of specific genes in autism, their relationship with co-occurring traits is not fully understood. To address this, we conducted a two-sample Mendelian randomisation analysis and identified four genes located at the 17q21.31 locus that are putatively causal for autism in fetal cortical tissue (LINC02210, LRRC37A4P, RP11-259G18.1, and RP11-798G7.6). LINC02210 was also identified as putatively causal for autism in adult cortical tissue. By integrating data from expression quantitative trait loci, genes and protein interactions, we identified that the 17q21.31 locus contributes to the intersection between autism and other neurological traits in fetal cortical tissue. We also identified a distinct cluster of co-occurring traits, including cognition and worry, linked to the genetic loci at 3p21.1. Our findings provide insights into the relationship between autism and co-occurring traits, which could be used to develop predictive models for more accurate diagnosis and better clinical management.},
keywords = {autism, bioinformatics, genomics},
pubstate = {published},
tppubtype = {article}
}
2022
Poonawala-Lohani, Nooriyan; Riddle, Pat; Adnan, Mehnaz; Wicker, Jörg
Geographic Ensembles of Observations using Randomised Ensembles of Autoregression Chains: Ensemble methods for spatio-temporal Time Series Forecasting of Influenza-like Illness Proceedings Article
In: pp. 1-7, Association for Computing Machinery, New York, NY, USA, 2022, ISBN: 9781450393867.
Abstract | Links | BibTeX | Altmetric | PlumX | Tags: bioinformatics, computational sustainability, dynamic time warping, forecasting, influenza, machine learning, medicine, time series
@inproceedings{Poonawala-Lohani2022geographic,
title = {Geographic Ensembles of Observations using Randomised Ensembles of Autoregression Chains: Ensemble methods for spatio-temporal Time Series Forecasting of Influenza-like Illness},
author = {Nooriyan Poonawala-Lohani and Pat Riddle and Mehnaz Adnan and J\"{o}rg Wicker},
doi = {10.1145/3535508.3545562},
isbn = {9781450393867},
year = {2022},
date = {2022-08-07},
pages = {1-7},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
abstract = {Influenza is a communicable respiratory illness that can cause serious public health hazards. Flu surveillance in New Zealand tracks case counts from various District health boards (DHBs) in the country to monitor the spread of influenza in different geographic locations. Many factors contribute to the spread of the influenza across a geographic region, and it can be challenging to forecast cases in one region without taking into account case numbers in another region. This paper proposes a novel ensemble method called Geographic Ensembles of Observations using Randomised Ensembles of Autoregression Chains (GEO-Reach). GEO-Reach is an ensemble technique that uses a two layer approach to utilise interdependence of historical case counts between geographic regions in New Zealand. This work extends a previously published method by the authors called Randomized Ensembles of Auto-regression chains (Reach). State-of-the-art forecasting models look at studying the spread of the virus. They focus on accurate forecasting of cases for a location using historical case counts for the same location and other data sources based on human behaviour such as movement of people across cities/geographic regions. This new approach is evaluated using Influenza like illness (ILI) case counts in 7 major regions in New Zealand from the years 2015-2019 and compares its performance with other standard methods such as Dante, ARIMA, Autoregression and Random Forests. The results demonstrate that the proposed method performed better than baseline methods when applied to this multi-variate time series forecasting problem.},
keywords = {bioinformatics, computational sustainability, dynamic time warping, forecasting, influenza, machine learning, medicine, time series},
pubstate = {published},
tppubtype = {inproceedings}
}
2020
Cheng, Xinlai; Haeberle, Stefanie; Shytaj, Iart Luca; Gama-Brambila, Rodrigo A.; Theobald, Jannick; Ghafoory, Shahrouz; Wölker, Jessica; Basu, Uttara; Schmidt, Claudia; Timm, Annika; Taškova, Katerina; Bauer, Andrea S.; Hoheisel, Jörg; Tsopoulidis, Nikolaos; Fackler, Oliver T.; Savarino, Andrea; Andrade-Navarro, Miguel A.; Ott, Ingo; Lusic, Marina; Hadaschik, Eva N.; Wölfl , Stefan
NHC-gold compounds mediate immune suppression through induction of AHR-TGFβ1 signalling in vitro and in scurfy mice Journal Article
In: Communications Biology, vol. 3, pp. 2399-3642, 2020, ISSN: 2399-3642.
Abstract | Links | BibTeX | Altmetric | PlumX | Tags: bioinformatics
@article{Cheng2020NHC,
title = {NHC-gold compounds mediate immune suppression through induction of AHR-TGFβ1 signalling in vitro and in scurfy mice},
author = {Xinlai Cheng and Stefanie Haeberle and Iart Luca Shytaj and Rodrigo A. Gama-Brambila and Jannick Theobald and Shahrouz Ghafoory and Jessica W\"{o}lker and Uttara Basu and Claudia Schmidt and Annika Timm and Katerina Ta\v{s}kova and Andrea S. Bauer and J\"{o}rg Hoheisel and Nikolaos Tsopoulidis and Oliver T. Fackler and Andrea Savarino and Miguel A. Andrade-Navarro and Ingo Ott and Marina Lusic and Eva N. Hadaschik and Stefan W\"{o}lfl },
url = {https://www.nature.com/articles/s42003-019-0716-8},
doi = {10.1038/s42003-019-0716-8},
issn = {2399-3642},
year = {2020},
date = {2020-01-03},
urldate = {2020-01-03},
journal = {Communications Biology},
volume = {3},
pages = {2399-3642},
abstract = {Gold compounds have a long history of use as immunosuppressants, but their precise mechanism of action is not completely understood. Using our recently developed liver-on-a-chip platform we now show that gold compounds containing planar N-heterocyclic carbene (NHC) ligands are potent ligands for the aryl hydrocarbon receptor (AHR). Further studies showed that the lead compound (MC3) activates TGFβ1 signaling and suppresses CD4+ T-cell activation in vitro, in human and mouse T cells. Conversely, genetic knockdown or chemical inhibition of AHR activity or of TGFβ1-SMAD-mediated signaling offsets the MC3-mediated immunosuppression. In scurfy mice, a mouse model of human immunodysregulation polyendocrinopathy enteropathy X-linked syndrome, MC3 treatment reduced autoimmune phenotypes and extended lifespan from 24 to 58 days. Our findings suggest that the immunosuppressive activity of gold compounds can be improved by introducing planar NHC ligands to activate the AHR-associated immunosuppressive pathway, thus expanding their potential clinical application for autoimmune diseases.},
keywords = {bioinformatics},
pubstate = {published},
tppubtype = {article}
}
2019
Taškova, Katerina; Fontaine, Jean-Fred; Mrowka, Ralf; Andrade-Navarro, Miguel A.
Literature optimized integration of gene expression for organ-specific evaluation of toxicogenomics datasets Journal Article
In: PLOS ONE, vol. 14, no. 1, pp. 1-21, 2019.
Abstract | Links | BibTeX | Altmetric | PlumX | Tags: bioinformatics
@article{10.1371/journal.pone.0210467,
title = {Literature optimized integration of gene expression for organ-specific evaluation of toxicogenomics datasets},
author = {Katerina Ta\v{s}kova and Jean-Fred Fontaine and Ralf Mrowka and Miguel A. Andrade-Navarro},
url = {https://doi.org/10.1371/journal.pone.0210467},
doi = {10.1371/journal.pone.0210467},
year = {2019},
date = {2019-01-01},
urldate = {2019-01-01},
journal = {PLOS ONE},
volume = {14},
number = {1},
pages = {1-21},
publisher = {Public Library of Science},
abstract = {The study of drug toxicity in human organs is complicated by their complex inter-relations and by the obvious difficulty to testing drug effects on biologically relevant material. Animal models and human cell cultures offer alternatives for systematic and large-scale profiling of drug effects on gene expression level, as typically found in the so-called toxicogenomics datasets. However, the complexity of these data, which includes variable drug doses, time points, and experimental setups, makes it difficult to choose and integrate the data, and to evaluate the appropriateness of one or another model system to study drug toxicity (of particular drugs) of particular human organs. Here, we define a protocol to integrate drug-wise rankings of gene expression changes in toxicogenomics data, which we apply to the TG-GATEs dataset, to prioritize genes for association to drug toxicity in liver or kidney. Contrast of the results with sets of known human genes associated to drug toxicity in the literature allows to compare different rank aggregation approaches for the task at hand. Collectively, ranks from multiple models point to genes not previously associated to toxicity, notably, the PCNA clamp associated factor (PCLAF), and genes regulated by the master regulator of the antioxidant response NFE2L2, such as NQO1 and SRXN1. In addition, comparing gene ranks from different models allowed us to evaluate striking differences in terms of toxicity-associated genes between human and rat hepatocytes or between rat liver and rat hepatocytes. We interpret these results to point to the different molecular functions associated to organ toxicity that are best described by each model. We conclude that the expected production of toxicogenomics panels with larger numbers of drugs and models, in combination with the ongoing increase of the experimental literature in organ toxicity, will lead to increasingly better associations of genes for organism toxicity.},
keywords = {bioinformatics},
pubstate = {published},
tppubtype = {article}
}
2018
Theobald, Jannick; Ghanem, Ali; Wallisch, Patrick; Banaeiyan, Amin A.; Andrade-Navarro, Miguel A.; Taškova, Katerina; Haltmeier, Manuela; Kurtz, Andreas; Becker, Holger; Reuter, Stefanie; Mrowka, Ralf; Cheng, Xinlai; Wölfl, Stefan
Liver-Kidney-on-Chip To Study Toxicity of Drug Metabolites Journal Article
In: ACS Biomaterials Science & Engineering, vol. 4, no. 1, pp. 78-89, 2018, (PMID: 33418680).
Links | BibTeX | Altmetric | PlumX | Tags: bioinformatics
@article{Theobald2018Liver,
title = {Liver-Kidney-on-Chip To Study Toxicity of Drug Metabolites},
author = {Jannick Theobald and Ali Ghanem and Patrick Wallisch and Amin A. Banaeiyan and Miguel A. Andrade-Navarro and Katerina Ta\v{s}kova and Manuela Haltmeier and Andreas Kurtz and Holger Becker and Stefanie Reuter and Ralf Mrowka and Xinlai Cheng and Stefan W\"{o}lfl},
url = {https://doi.org/10.1021/acsbiomaterials.7b00417},
doi = {10.1021/acsbiomaterials.7b00417},
year = {2018},
date = {2018-01-01},
urldate = {2018-01-01},
journal = {ACS Biomaterials Science \& Engineering},
volume = {4},
number = {1},
pages = {78-89},
note = {PMID: 33418680},
keywords = {bioinformatics},
pubstate = {published},
tppubtype = {article}
}
Mah, Nancy; Taskova, Katerina; Amrani, Khadija El; Hariharan, Krithika; Kurtz, Andreas; Andrade-Navarro, Miguel A.
Evaluating Cell Identity from Transcription Profiles Journal Article
In: bioRxiv, 2018.
Abstract | Links | BibTeX | Altmetric | PlumX | Tags: bioinformatics
@article{Mah250431,
title = {Evaluating Cell Identity from Transcription Profiles},
author = {Nancy Mah and Katerina Taskova and Khadija El Amrani and Krithika Hariharan and Andreas Kurtz and Miguel A. Andrade-Navarro},
url = {https://www.biorxiv.org/content/early/2018/01/19/250431},
doi = {10.1101/250431},
year = {2018},
date = {2018-01-01},
urldate = {2018-01-01},
journal = {bioRxiv},
publisher = {Cold Spring Harbor Laboratory},
abstract = {Induced pluripotent stem cells (iPS) and direct lineage programming offer promising autologous and patient-specific sources of cells for personalized drug-testing and cell-based therapy. Before these engineered cells can be widely used, it is important to evaluate how well the engineered cell types resemble their intended target cell types. We have developed a method to generate CellScore, a cell identity score that can be used to evaluate the success of an engineered cell type in relation to both its initial and desired target cell type, which are used as references. Of 20 cell transitions tested, the most successful transitions were the iPS cells (CellScore \> 0.9), while other transitions (e.g. induced hepatocytes or motor neurons) indicated incomplete transitions (CellScore \< 0.5). In principle, the method can be applied to any engineered cell undergoing a cell transition, where transcription profiles are available for the reference cell types and the engineered cell type.HighlightsA curated standard dataset of transcription profiles from normal cell types was created.CellScore evaluates the cell identity of engineered cell types, using the curated dataset.CellScore considers the initial and desired target cell type.CellScore identifies the most successfully engineered clones for further functional testing.},
keywords = {bioinformatics},
pubstate = {published},
tppubtype = {article}
}
2013
Wicker, Jörg
Large Classifier Systems in Bio- and Cheminformatics PhD Thesis
Technische Universität München, 2013.
Abstract | Links | BibTeX | Tags: biodegradation, bioinformatics, cheminformatics, computational sustainability, data mining, enviPath, machine learning, multi-label classification, multi-relational learning, toxicity
@phdthesis{wicker2013large,
title = {Large Classifier Systems in Bio- and Cheminformatics},
author = {J\"{o}rg Wicker},
url = {http://mediatum.ub.tum.de/node?id=1165858},
year = {2013},
date = {2013-01-01},
school = {Technische Universit\"{a}t M\"{u}nchen},
abstract = {Large classifier systems are machine learning algorithms that use multiple
classifiers to improve the prediction of target values in advanced
classification tasks. Although learning problems in bio- and
cheminformatics commonly provide data in schemes suitable for large
classifier systems, they are rarely used in these domains. This thesis
introduces two new classifiers incorporating systems of classifiers
using Boolean matrix decomposition to handle data in a schema that
often occurs in bio- and cheminformatics.
The first approach, called MLC-BMaD (multi-label classification using
Boolean matrix decomposition), uses Boolean matrix decomposition to
decompose the labels in a multi-label classification task. The
decomposed matrices are a compact representation of the information
in the labels (first matrix) and the dependencies among the labels
(second matrix). The first matrix is used in a further multi-label
classification while the second matrix is used to generate the final
matrix from the predicted values of the first matrix.
MLC-BMaD was evaluated on six standard multi-label data sets, the
experiments showed that MLC-BMaD can perform particularly well on data
sets with a high number of labels and a small number of instances and
can outperform standard multi-label algorithms.
Subsequently, MLC-BMaD is extended to a special case of
multi-relational learning, by considering the labels not as simple
labels, but instances. The algorithm, called ClassFact
(Classification factorization), uses both matrices in a multi-label
classification. Each label represents a mapping between two
instances.
Experiments on three data sets from the domain of bioinformatics show
that ClassFact can outperform the baseline method, which merges the
relations into one, on hard classification tasks.
Furthermore, large classifier systems are used on two cheminformatics
data sets, the first one is used to predict the environmental fate of
chemicals by predicting biodegradation pathways. The second is a data
set from the domain of predictive toxicology. In biodegradation
pathway prediction, I extend a knowledge-based system and incorporate
a machine learning approach to predict a probability for
biotransformation products based on the structure- and knowledge-based
predictions of products, which are based on transformation rules. The
use of multi-label classification improves the performance of the
classifiers and extends the number of transformation rules that can be
covered.
For the prediction of toxic effects of chemicals, I applied large
classifier systems to the ToxCasttexttrademark data set, which maps
toxic effects to chemicals. As the given toxic effects are not easy to
predict due to missing information and a skewed class
distribution, I introduce a filtering step in the multi-label
classification, which finds labels that are usable in multi-label
prediction and does not take the others in the
prediction into account. Experiments show
that this approach can improve upon the baseline method using binary
classification, as well as multi-label approaches using no filtering.
The presented results show that large classifier systems can play a
role in future research challenges, especially in bio- and
cheminformatics, where data sets frequently consist of more complex
structures and data can be rather small in terms of the number of
instances compared to other domains.},
keywords = {biodegradation, bioinformatics, cheminformatics, computational sustainability, data mining, enviPath, machine learning, multi-label classification, multi-relational learning, toxicity},
pubstate = {published},
tppubtype = {phdthesis}
}
classifiers to improve the prediction of target values in advanced
classification tasks. Although learning problems in bio- and
cheminformatics commonly provide data in schemes suitable for large
classifier systems, they are rarely used in these domains. This thesis
introduces two new classifiers incorporating systems of classifiers
using Boolean matrix decomposition to handle data in a schema that
often occurs in bio- and cheminformatics.
The first approach, called MLC-BMaD (multi-label classification using
Boolean matrix decomposition), uses Boolean matrix decomposition to
decompose the labels in a multi-label classification task. The
decomposed matrices are a compact representation of the information
in the labels (first matrix) and the dependencies among the labels
(second matrix). The first matrix is used in a further multi-label
classification while the second matrix is used to generate the final
matrix from the predicted values of the first matrix.
MLC-BMaD was evaluated on six standard multi-label data sets, the
experiments showed that MLC-BMaD can perform particularly well on data
sets with a high number of labels and a small number of instances and
can outperform standard multi-label algorithms.
Subsequently, MLC-BMaD is extended to a special case of
multi-relational learning, by considering the labels not as simple
labels, but instances. The algorithm, called ClassFact
(Classification factorization), uses both matrices in a multi-label
classification. Each label represents a mapping between two
instances.
Experiments on three data sets from the domain of bioinformatics show
that ClassFact can outperform the baseline method, which merges the
relations into one, on hard classification tasks.
Furthermore, large classifier systems are used on two cheminformatics
data sets, the first one is used to predict the environmental fate of
chemicals by predicting biodegradation pathways. The second is a data
set from the domain of predictive toxicology. In biodegradation
pathway prediction, I extend a knowledge-based system and incorporate
a machine learning approach to predict a probability for
biotransformation products based on the structure- and knowledge-based
predictions of products, which are based on transformation rules. The
use of multi-label classification improves the performance of the
classifiers and extends the number of transformation rules that can be
covered.
For the prediction of toxic effects of chemicals, I applied large
classifier systems to the ToxCasttexttrademark data set, which maps
toxic effects to chemicals. As the given toxic effects are not easy to
predict due to missing information and a skewed class
distribution, I introduce a filtering step in the multi-label
classification, which finds labels that are usable in multi-label
prediction and does not take the others in the
prediction into account. Experiments show
that this approach can improve upon the baseline method using binary
classification, as well as multi-label approaches using no filtering.
The presented results show that large classifier systems can play a
role in future research challenges, especially in bio- and
cheminformatics, where data sets frequently consist of more complex
structures and data can be rather small in terms of the number of
instances compared to other domains.