diff --git a/paper/paper.bib b/paper/paper.bib index d351d30..55a3545 100644 --- a/paper/paper.bib +++ b/paper/paper.bib @@ -7,14 +7,14 @@ @thesis{althoff2023conform } @article{athey2019generalized, - title={Generalized random forests}, + title={Generalized Random Forests}, author={Athey, Susan and Tibshirani, Julie and Wager, Stefan}, year={2019}, doi={10.1214/18-aos1709} } @article{behnel2010cython, - title={Cython: The best of both worlds}, + title={Cython: The Best of Both Worlds}, author={Behnel, Stefan and Bradshaw, Robert and Citro, Craig and Dalcin, Lisandro and Seljebotn, Dag Sverre and Smith, Kurt}, journal={Computing in Science \& Engineering}, volume={13}, @@ -26,7 +26,7 @@ @article{behnel2010cython } @article{biau2016random, - title={A random forest guided tour}, + title={A Random Forest Guided Tour}, author={Biau, G{\'e}rard and Scornet, Erwan}, journal={Test}, volume={25}, @@ -37,7 +37,7 @@ @article{biau2016random } @article{breiman2001random, - title={Random forests}, + title={Random Forests}, author={Breiman, Leo}, journal={Machine learning}, volume={45}, @@ -48,7 +48,7 @@ @article{breiman2001random } @article{cordoba2021spatially, - title={A spatially based quantile regression forest model for mapping rural land values}, + title={A Spatially Based Quantile Regression Forest Model for Mapping Rural Land Values}, author={C{\'o}rdoba, Mariano and Carranza, Juan Pablo and Piumetto, Mario and Monzani, Federico and Balzarini, M{\'o}nica}, journal={Journal of Environmental Management}, volume={289}, @@ -59,7 +59,7 @@ @article{cordoba2021spatially } @article{dean2022quantile, - title={Quantile regression forests for individualized surgery scheduling}, + title={Quantile Regression Forests for Individualized Surgery Scheduling}, author={Dean, Arlen and Meisami, Amirhossein and Lam, Henry and Van Oyen, Mark P and Stromblad, Christopher and Kastango, Nick}, journal={Health Care Management Science}, volume={25}, @@ -71,7 +71,7 @@ @article{dean2022quantile } @article{fang2018quantile, - title={A quantile regression forest based method to predict drug response and assess prediction reliability}, + title={A Quantile Regression Forest Based Method to Predict Drug Response and Assess Prediction Reliability}, author={Fang, Yun and Xu, Peirong and Yang, Jialiang and Qin, Yufang}, journal={PLoS One}, volume={13}, @@ -82,7 +82,7 @@ @article{fang2018quantile } @article{francke2008estimation, - title={Estimation of suspended sediment concentration and yield using linear models, random forests and quantile regression forests}, + title={Estimation of Suspended Sediment Concentration and Yield Using Linear Models, Random Forests and Quantile Regression Forests}, author={Francke, T and L{\'o}pez-Taraz{\'o}n, JA and Schr{\"o}der, B}, journal={Hydrological Processes}, volume={22}, @@ -94,7 +94,7 @@ @article{francke2008estimation } @article{gyamerah2020long, - title={Long-term exchange rate probability density forecasting using Gaussian kernel and quantile random forest}, + title={Long-Term Exchange Rate Probability Density Forecasting Using {G}aussian Kernel and Quantile Random Forest}, author={Gyamerah, Samuel Asante and Moyo, Edwin}, journal={Complexity}, volume={2020}, @@ -105,7 +105,7 @@ @article{gyamerah2020long } @article{hengl2018random, - title={Random forest as a generic framework for predictive modeling of spatial and spatio-temporal variables}, + title={Random Forest as a Generic Framework for Predictive Modeling of Spatial and Spatio-Temporal Variables}, author={Hengl, Tomislav and Nussbaum, Madlene and Wright, Marvin N and Heuvelink, Gerard BM and Gr{\"a}ler, Benedikt}, journal={PeerJ}, volume={6}, @@ -116,7 +116,7 @@ @article{hengl2018random } @book{koenker2005quantile, - title={Quantile regression}, + title={Quantile Regression}, author={Koenker, Roger}, year={2005}, series={Econometric Society Monographs}, @@ -125,7 +125,7 @@ @book{koenker2005quantile } @article{kramer2016scikit, - title={Scikit-learn}, + title={{scikit-learn}}, author={Kramer, Oliver}, journal={Machine Learning for Evolution Strategies}, pages={45--53}, @@ -135,7 +135,7 @@ @article{kramer2016scikit } @article{meinshausen2006quantile, - title={Quantile regression forests}, + title={Quantile Regression Forests}, author={Meinshausen, Nicolai}, journal={Journal of Machine Learning Research}, volume={7}, @@ -146,7 +146,7 @@ @article{meinshausen2006quantile } @article{molinder2020probabilistic, - title={Probabilistic forecasting of wind turbine icing related production losses using quantile regression forests}, + title={Probabilistic Forecasting of Wind Turbine Icing Related Production Losses Using Quantile Regression Forests}, author={Molinder, Jennie and Scher, Sebastian and Nilsson, Erik and K{\"o}rnich, Heiner and Bergstr{\"o}m, Hans and Sj{\"o}blom, Anna}, journal={Energies}, volume={14}, @@ -158,7 +158,7 @@ @article{molinder2020probabilistic } @article{petropoulos2022forecasting, - title={Forecasting: theory and practice}, + title={Forecasting: Theory and Practice}, author={Petropoulos, Fotios and Apiletti, Daniele and Assimakopoulos, Vassilios and Babai, Mohamed Zied and Barrow, Devon K and Taieb, Souhaib Ben and Bergmeir, Christoph and Bessa, Ricardo J and Bijak, Jakub and Boylan, John E and others}, journal={International Journal of Forecasting}, volume={38}, @@ -170,22 +170,22 @@ @article{petropoulos2022forecasting } @thesis{Prinzhorn2023, + title={Benchmarking Conformal Prediction Methods for Time Series Regression}, author={Derck Walther Eward Prinzhorn}, - title={Benchmarking conformal prediction methods for time series regression}, school={University of Amsterdam}, year={2023}, type={Bachelor's Thesis} } @misc{quantregforest2017, - title={quantregForest: Quantile regresion forests}, + title={{quantregForest}: Quantile Regression Forests}, author={Meinshausen, Nicolai}, year={2017}, url={https://cran.r-project.org/web/packages/quantregForest/index.html} } @thesis{saporta2023statistical, - title={Statistical tools for causal inference and forensic science}, + title={Statistical Tools for Causal Inference and Forensic Science}, author={Saporta, Jason}, year={2023}, school={Iowa State University}, @@ -193,7 +193,7 @@ @thesis{saporta2023statistical } @article{wager2018estimation, - title={Estimation and inference of heterogeneous treatment effects using random forests}, + title={Estimation and Inference of Heterogeneous Treatment Effects Using Random Forests}, author={Wager, Stefan and Athey, Susan}, journal={Journal of the American Statistical Association}, volume={113}, @@ -204,8 +204,20 @@ @article{wager2018estimation doi={10.1080/01621459.2017.1319839} } +@article{wright2017ranger, + title={{ranger}: A Fast Implementation of Random Forests for High Dimensional Data in {C++} And {R}}, + author={Wright, Marvin N and Ziegler, Andreas}, + journal={Journal of Statistical Software}, + volume={77}, + number={1}, + pages={1–17}, + year={2017}, + url={https://www.jstatsoft.org/index.php/jss/article/view/v077i01}, + doi={10.18637/jss.v077.i01} +} + @article{zhang2018parallel, - title={Parallel and reliable probabilistic load forecasting via quantile regression forest and quantile determination}, + title={Parallel and Reliable Probabilistic Load Forecasting via Quantile Regression Forest and Quantile Determination}, author={Zhang, Wenjie and Quan, Hao and Srinivasan, Dipti}, journal={Energy}, volume={160}, diff --git a/paper/paper.md b/paper/paper.md index 62e6149..c4e790f 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -29,7 +29,7 @@ QRF, an extension of the random forest algorithm, provides a flexible, nonlinear Traditional prediction intervals often rely on assumptions such as normality, which may not hold in many real-world scenarios [@gyamerah2020long]. QRF, on the other hand, allows researchers to generate prediction intervals that are non-parametric, flexible, and adaptive to different data distributions. This capability is invaluable for quantifying uncertainties in a wide range of research areas, including finance [@cordoba2021spatially], environmental sciences [@francke2008estimation;@fang2018quantile;@zhang2018parallel], healthcare [@molinder2020probabilistic;@dean2022quantile], and more. A crucial difference between QRF and many other quantile regression approaches is that after training a QRF once, one has access to all the quantiles at inference time, whereas most approaches require retraining separately for each quantile. -As a cutting-edge statistical modeling technique, the QRF algorithm holds immense potential for researchers across many domains, providing them with a powerful tool to address complex problems involving quantile regression and uncertainty estimation. The QRF algorithm is broadly available in R, which is host to the canonical QRF implementation [@quantregforest2017] as well as established alternative implementations [@athey2019generalized]. However Python has emerged as a prevailing standard programming language within the scientific community, making it a popular option for researchers and practitioners. The absence of a comprehensive Python implementation of the QRF algorithm severely hampers researchers' ability to utilize and benefit from its wide-ranging applications. +As a cutting-edge statistical modeling technique, the QRF algorithm holds immense potential for researchers across many domains, providing them with a powerful tool to address complex problems involving quantile regression and uncertainty estimation. The QRF algorithm is broadly available in R, which is host to the canonical QRF implementation [@quantregforest2017] as well as established alternative implementations [@wright2017ranger;@athey2019generalized]. However Python has emerged as a prevailing standard programming language within the scientific community, making it a popular option for researchers and practitioners. The absence of a comprehensive Python implementation of the QRF algorithm severely hampers researchers' ability to utilize and benefit from its wide-ranging applications. We seek to fill this need by providing a comprehensive Python-based implementation of the QRF algorithm. The QRF implementation provided in this package has been optimized for training and inference speed. It allows specifying prediction quantiles after training, permitting a trained model to be reused to estimate conditional quantiles as needed. In addition to this base prediction functionality, the package also includes utilities that enhance the algorithm's applicability and usefulness for researchers and practitioners. These utilities include: