2024
Long, Derek; Eade, Liam; Dost, Katharina; Meier-Menches, Samuel M; Goldstone, David C; Sullivan, Matthew P; Hartinger, Christian; Wicker, Jörg; Taskova, Katerina
AdductHunter: Identifying Protein-Metal Complex Adducts in Mass Spectra Journal Article
In: Journal of Cheminformatics, vol. 16, iss. 1, 2024, ISSN: 1758-2946.
Abstract | Links | BibTeX | Altmetric | PlumX | Tags: cheminformatics, computational sustainability, data mining, dynamic time warping, machine learning, mass spectrometry
@article{Long2023adducthunter,
title = {AdductHunter: Identifying Protein-Metal Complex Adducts in Mass Spectra},
author = {Derek Long and Liam Eade and Katharina Dost and Samuel M Meier-Menches and David C Goldstone and Matthew P Sullivan and Christian Hartinger and J\"{o}rg Wicker and Katerina Taskova},
url = {https://adducthunter.wickerlab.org
https://doi.org/10.21203/rs.3.rs-3322854/v1},
doi = {10.1186/s13321-023-00797-7},
issn = {1758-2946},
year = {2024},
date = {2024-02-06},
urldate = {2024-02-06},
journal = {Journal of Cheminformatics},
volume = {16},
issue = {1},
abstract = {Mass spectrometry (MS) is an analytical technique for molecule identification that can be used for investigating protein-metal complex interactions. Once the MS data is collected, the mass spectra are usually interpreted manually to identify the adducts formed as a result of the interactions between proteins and metal-based species. However, with increasing resolution, dataset size, and species complexity, the time required to identify adducts and the error-prone nature of manual assignment have become limiting factors in MS analysis. AdductHunter is a open-source web-based analysis tool that automates the peak identification process using constraint integer optimization to find feasible combinations of protein and fragments, and dynamic time warping to calculate the dissimilarity between the theoretical isotope pattern of a species and its experimental isotope peak distribution. Empirical evaluation on a collection of 22 unique MS datasetsshows fast and accurate identification of protein-metal complex adducts in deconvoluted mass spectra.},
keywords = {cheminformatics, computational sustainability, data mining, dynamic time warping, machine learning, mass spectrometry},
pubstate = {published},
tppubtype = {article}
}
2022
Poonawala-Lohani, Nooriyan; Riddle, Pat; Adnan, Mehnaz; Wicker, Jörg
Geographic Ensembles of Observations using Randomised Ensembles of Autoregression Chains: Ensemble methods for spatio-temporal Time Series Forecasting of Influenza-like Illness Proceedings Article
In: pp. 1-7, Association for Computing Machinery, New York, NY, USA, 2022, ISBN: 9781450393867.
Abstract | Links | BibTeX | Altmetric | PlumX | Tags: bioinformatics, computational sustainability, dynamic time warping, forecasting, influenza, machine learning, medicine, time series
@inproceedings{Poonawala-Lohani2022geographic,
title = {Geographic Ensembles of Observations using Randomised Ensembles of Autoregression Chains: Ensemble methods for spatio-temporal Time Series Forecasting of Influenza-like Illness},
author = {Nooriyan Poonawala-Lohani and Pat Riddle and Mehnaz Adnan and J\"{o}rg Wicker},
doi = {10.1145/3535508.3545562},
isbn = {9781450393867},
year = {2022},
date = {2022-08-07},
pages = {1-7},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
abstract = {Influenza is a communicable respiratory illness that can cause serious public health hazards. Flu surveillance in New Zealand tracks case counts from various District health boards (DHBs) in the country to monitor the spread of influenza in different geographic locations. Many factors contribute to the spread of the influenza across a geographic region, and it can be challenging to forecast cases in one region without taking into account case numbers in another region. This paper proposes a novel ensemble method called Geographic Ensembles of Observations using Randomised Ensembles of Autoregression Chains (GEO-Reach). GEO-Reach is an ensemble technique that uses a two layer approach to utilise interdependence of historical case counts between geographic regions in New Zealand. This work extends a previously published method by the authors called Randomized Ensembles of Auto-regression chains (Reach). State-of-the-art forecasting models look at studying the spread of the virus. They focus on accurate forecasting of cases for a location using historical case counts for the same location and other data sources based on human behaviour such as movement of people across cities/geographic regions. This new approach is evaluated using Influenza like illness (ILI) case counts in 7 major regions in New Zealand from the years 2015-2019 and compares its performance with other standard methods such as Dante, ARIMA, Autoregression and Random Forests. The results demonstrate that the proposed method performed better than baseline methods when applied to this multi-variate time series forecasting problem.},
keywords = {bioinformatics, computational sustainability, dynamic time warping, forecasting, influenza, machine learning, medicine, time series},
pubstate = {published},
tppubtype = {inproceedings}
}
2016
Raza, Atif; Wicker, Jörg; Kramer, Stefan
Trading Off Accuracy for Efficiency by Randomized Greedy Warping Proceedings Article
In: Proceedings of the 31st Annual ACM Symposium on Applied Computing, pp. 883-890, ACM, New York, NY, USA, 2016, ISBN: 978-1-4503-3739-7.
Abstract | Links | BibTeX | Altmetric | PlumX | Tags: data mining, dynamic time warping, time series
@inproceedings{raza2016trading,
title = {Trading Off Accuracy for Efficiency by Randomized Greedy Warping},
author = {Atif Raza and J\"{o}rg Wicker and Stefan Kramer},
url = {https://wicker.nz/nwp-acm/authorize.php?id=N10030
http://doi.acm.org/10.1145/2851613.2851651},
doi = {10.1145/2851613.2851651},
isbn = {978-1-4503-3739-7},
year = {2016},
date = {2016-01-01},
booktitle = {Proceedings of the 31st Annual ACM Symposium on Applied Computing},
pages = {883-890},
publisher = {ACM},
address = {New York, NY, USA},
series = {SAC '16},
abstract = {Dynamic Time Warping (DTW) is a widely used distance measure for time series data mining. Its quadratic complexity requires the application of various techniques (e.g. warping constraints, lower-bounds) for deployment in real-time scenarios. In this paper we propose a randomized greedy warping algorithm for f i nding similarity between time series instances.We show that the proposed algorithm outperforms the simple greedy approach and also provides very good time series similarity approximation consistently, as compared to DTW. We show that the Randomized Time Warping (RTW) can be used in place of DTW as a fast similarity approximation technique by trading some classification accuracy for very fast classification.},
keywords = {data mining, dynamic time warping, time series},
pubstate = {published},
tppubtype = {inproceedings}
}