Abstract
Conditional Density Estimation (CDE) has wide-reaching applicability to various real-world problems, such as spatial density estimation and environmental modelling. CDE estimates the probability density of a random variable rather than a single value and can thus model uncertainty and inverse problems. This task is inherently more complex than regression, and many algorithms suffer from overfitting, particularly when modelled with few labelled data points. For applications where unlabelled data is abundant but labelled data is scarce, we propose Wasserstein Laplacian Regularisation, a semi-supervised learning framework that allows CDE algorithms to leverage these unlabelled data. The framework minimises an objective function which ensures that the learned model is smooth along the manifold of the underlying data, as measured by Wasserstein distance. When applying our framework to Mixture Density Networks, the resulting semi-supervised algorithm can achieve similar performance to a supervised model with up to three times as many labelled data points on baseline datasets. We additionally apply our technique to the problem of remote sensing for chlorophyll-a estimation in inland waters.
Links
BibTeX (Download)
@inproceedings{graffeuille2022semi, title = {Semi-Supervised Conditional Density Estimation with Wasserstein Laplacian Regularisation}, author = {Olivier Graffeuille and Yun Sing Koh and J\"{o}rg Wicker and Moritz Lehmann}, url = {https://ojs.aaai.org/index.php/AAAI/article/view/20630}, doi = {10.1609/aaai.v36i6.20630}, year = {2022}, date = {2022-06-28}, urldate = {2022-06-28}, booktitle = {Proceeding of the Thirty-Sixth AAAI Conference on Artificial Intelligence}, volume = {36}, number = {6}, pages = {6746-6754}, abstract = {Conditional Density Estimation (CDE) has wide-reaching applicability to various real-world problems, such as spatial density estimation and environmental modelling. CDE estimates the probability density of a random variable rather than a single value and can thus model uncertainty and inverse problems. This task is inherently more complex than regression, and many algorithms suffer from overfitting, particularly when modelled with few labelled data points. For applications where unlabelled data is abundant but labelled data is scarce, we propose Wasserstein Laplacian Regularisation, a semi-supervised learning framework that allows CDE algorithms to leverage these unlabelled data. The framework minimises an objective function which ensures that the learned model is smooth along the manifold of the underlying data, as measured by Wasserstein distance. When applying our framework to Mixture Density Networks, the resulting semi-supervised algorithm can achieve similar performance to a supervised model with up to three times as many labelled data points on baseline datasets. We additionally apply our technique to the problem of remote sensing for chlorophyll-a estimation in inland waters.}, keywords = {classification, computational sustainability, machine learning, semi-supervised learning}, pubstate = {published}, tppubtype = {inproceedings} }