2025
1.
Cheena, Asif; Dost, Katharina; Sarris, Theo; Straathof, Nina; Wicker, Jörg
Don't Swim in Data: Real-Time Microbial Forecasting for New Zealand Recreational Waters Unpublished Forthcoming
SRRN, Forthcoming.
Abstract | Links | BibTeX | Altmetric | PlumX | Tags: computational sustainability, machine learning, matrix decomposition, time series forecasting, water quality
@unpublished{Cheena2025dont,
title = {Don\'t Swim in Data: Real-Time Microbial Forecasting for New Zealand Recreational Waters},
author = {Asif Cheena and Katharina Dost and Theo Sarris and Nina Straathof and J\"{o}rg Wicker},
doi = {10.2139/ssrn.5230457},
year = {2025},
date = {2025-04-30},
urldate = {2025-04-30},
abstract = {Traditional water quality monitoring, reliant on infrequent sampling and 48-hour laboratory delays, often fails to capture rapid contamination fluctuations, exposing recreational water users to significant health risks. We propose two novel machine learning frameworks for real-time forecasting of Enterococci concentrations in Canterbury, New Zealand. The Probabilistic Forecasting Framework uses an ensemble of quantile regression models with a gradient boosting meta-learner and Conformalized Quantile Regression (CQR) to produce accurate point forecasts and calibrated 90% prediction intervals. In parallel, the Matrix Decomposition Framework employs Non-negative Matrix Factorization (NMF) to decompose spatio-temporal data into interpretable latent factors, modeled via multi-target Random Forests to enhance generalizability. Evaluated on data from 15 sites (2021\textendash2024, 1047 samples, 100 exceedance events), our frameworks exceed USGS guidelines, achieving exceedance sensitivities of 67.0% and 61.0%, with high precautionary sensitivities of 77.0% and 74.0%, respectively, and competitive performance relative to state-of-the-art systems such as Auckland’s Safeswim.},
howpublished = {SRRN},
keywords = {computational sustainability, machine learning, matrix decomposition, time series forecasting, water quality},
pubstate = {forthcoming},
tppubtype = {unpublished}
}
Traditional water quality monitoring, reliant on infrequent sampling and 48-hour laboratory delays, often fails to capture rapid contamination fluctuations, exposing recreational water users to significant health risks. We propose two novel machine learning frameworks for real-time forecasting of Enterococci concentrations in Canterbury, New Zealand. The Probabilistic Forecasting Framework uses an ensemble of quantile regression models with a gradient boosting meta-learner and Conformalized Quantile Regression (CQR) to produce accurate point forecasts and calibrated 90% prediction intervals. In parallel, the Matrix Decomposition Framework employs Non-negative Matrix Factorization (NMF) to decompose spatio-temporal data into interpretable latent factors, modeled via multi-target Random Forests to enhance generalizability. Evaluated on data from 15 sites (2021–2024, 1047 samples, 100 exceedance events), our frameworks exceed USGS guidelines, achieving exceedance sensitivities of 67.0% and 61.0%, with high precautionary sensitivities of 77.0% and 74.0%, respectively, and competitive performance relative to state-of-the-art systems such as Auckland’s Safeswim.