Publications

2022

Poonawala-Lohani, Nooriyan; Riddle, Pat; Adnan, Mehnaz; Wicker, Jörg

Geographic Ensembles of Observations using Randomised Ensembles of Autoregression Chains: Ensemble methods for spatio-temporal Time Series Forecasting of Influenza-like Illness Proceedings Article

In: pp. 1-7, Association for Computing Machinery, New York, NY, USA, 2022, ISBN: 9781450393867.

@inproceedings{Poonawala-Lohani2022geographic,

title = {Geographic Ensembles of Observations using Randomised Ensembles of Autoregression Chains: Ensemble methods for spatio-temporal Time Series Forecasting of Influenza-like Illness},

author = {Nooriyan Poonawala-Lohani and Pat Riddle and Mehnaz Adnan and J\"{o}rg Wicker},

doi = {10.1145/3535508.3545562},

isbn = {9781450393867},

year  = {2022},

date = {2022-08-07},

pages = {1-7},

publisher = {Association for Computing Machinery},

address = {New York, NY, USA},

abstract = {Influenza is a communicable respiratory illness that can cause serious public health hazards. Flu surveillance in New Zealand tracks case counts from various District health boards (DHBs) in the country to monitor the spread of influenza in different geographic locations. Many factors contribute to the spread of the influenza across a geographic region, and it can be challenging to forecast cases in one region without taking into account case numbers in another region. This paper proposes a novel ensemble method called Geographic Ensembles of Observations using Randomised Ensembles of Autoregression Chains (GEO-Reach). GEO-Reach is an ensemble technique that uses a two layer approach to utilise interdependence of historical case counts between geographic regions in New Zealand. This work extends a previously published method by the authors called Randomized Ensembles of Auto-regression chains (Reach). State-of-the-art forecasting models look at studying the spread of the virus. They focus on accurate forecasting of cases for a location using historical case counts for the same location and other data sources based on human behaviour such as movement of people across cities/geographic regions. This new approach is evaluated using Influenza like illness (ILI) case counts in 7 major regions in New Zealand from the years 2015-2019 and compares its performance with other standard methods such as Dante, ARIMA, Autoregression and Random Forests. The results demonstrate that the proposed method performed better than baseline methods when applied to this multi-variate time series forecasting problem.},

keywords = {bioinformatics, computational sustainability, dynamic time warping, forecasting, influenza, machine learning, medicine, time series},

pubstate = {published},

tppubtype = {inproceedings}

}

2020

Chester, Andrew; Koh, Yun Sing; Wicker, Jörg; Sun, Quan; Lee, Junjae

Balancing Utility and Fairness against Privacy in Medical Data Proceedings Article

In: IEEE Symposium Series on Computational Intelligence (SSCI), pp. 1226-1233, IEEE, 2020.

@inproceedings{chester2020balancing,

title = {Balancing Utility and Fairness against Privacy in Medical Data},

author = {Andrew Chester and Yun Sing Koh and J\"{o}rg Wicker and Quan Sun and Junjae Lee},

url = {https://ieeexplore.ieee.org/abstract/document/9308226},

doi = {10.1109/SSCI47803.2020.9308226},

year  = {2020},

date = {2020-12-01},

booktitle = {IEEE Symposium Series on Computational Intelligence (SSCI)},

pages = {1226-1233},

publisher = {IEEE},

abstract = {There are numerous challenges when designing algorithms that interact with sensitive data, such as, medical or financial records. One of these challenges is privacy. However, there is a tension between privacy, utility (model accuracy), and fairness. While de-identification techniques, such as generalisation and suppression, have been proposed to enable privacy protection, it comes with a cost, specifically to fairness and utility. Recent work on fairness in algorithm design defines fairness as a guarantee of similar outputs for "similar" input data. This notion is discussed in connection to de-identification. This research investigates the trade-off between privacy, fairness, and utility. In contrast, other work investigates the trade-off between privacy and utility of the data or accuracy of the model overall. In this research, we investigate the effects of two standard de-identification techniques, k-anonymity and differential privacy, on both utility and fairness. We propose two measures to calculate the trade-off between privacy-utility and privacy-fairness. Although other research has provided guarantees for privacy regarding utility, this research focuses on the trade-offs given set de-identification levels and relies on guarantees provided by the privacy preservation methods. We discuss the effects of de-identification on data of different characteristics, class imbalance and outcome imbalance. We evaluated this is on synthetic datasets and standard real-world datasets. As a case study, we analysed the Medical Expenditure Panel Survey dataset.},

keywords = {accuracy, computational sustainability, data mining, fairness, imbalance, machine learning, medicine, privacy},

pubstate = {published},

tppubtype = {inproceedings}

}