diff --git a/content/publication/chen-sampling-2023/cite.bib b/content/publication/chen-sampling-2023/cite.bib new file mode 100644 index 0000000..d74f282 --- /dev/null +++ b/content/publication/chen-sampling-2023/cite.bib @@ -0,0 +1,68 @@ +@article{chen_sampling_2023, + abstract = {Abstract + +Background +The development of machine learning models for aiding in the diagnosis of mental disorder is recognized as a significant breakthrough in the field of psychiatry. However, clinical practice of such models remains a challenge, with poor generalizability being a major limitation. + + +Methods + +Here, we conducted a pre-registered meta-research assessment on neuroimaging-based models in the psychiatric literature, quantitatively examining global and regional sampling issues over recent decades, from a view that has been relatively underexplored. A total of 476 studies ( +n += 118,137) were included in the current assessment. Based on these findings, we built a comprehensive 5-star rating system to quantitatively evaluate the quality of existing machine learning models for psychiatric diagnoses. + + + +Results + +A global sampling inequality in these models was revealed quantitatively (sampling Gini coefficient ( +G +) = 0.81, +p +\textless .01), varying across different countries (regions) (e.g., China, +G += 0.47; the USA, +G += 0.58; Germany, +G += 0.78; the UK, +G += 0.87). Furthermore, the severity of this sampling inequality was significantly predicted by national economic levels ( +β +=  − 2.75, +p +\textless .001, +R + +2 + + +adj + += 0.40; +r +=  − .84, 95% CI: − .41 to − .97), and was plausibly predictable for model performance, with higher sampling inequality for reporting higher classification accuracy. Further analyses showed that lack of independent testing (84.24% of models, 95% CI: 81.0–87.5%), improper cross-validation (51.68% of models, 95% CI: 47.2–56.2%), and poor technical transparency (87.8% of models, 95% CI: 84.9–90.8%)/availability (80.88% of models, 95% CI: 77.3–84.4%) are prevailing in current diagnostic classifiers despite improvements over time. Relating to these observations, model performances were found decreased in studies with independent cross-country sampling validations (all +p +\textless .001, BF +10 +\textgreater 15). In light of this, we proposed a purpose-built quantitative assessment checklist, which demonstrated that the overall ratings of these models increased by publication year but were negatively associated with model performance. + + + +Conclusions +Together, improving sampling economic equality and hence the quality of machine learning models may be a crucial facet to plausibly translating neuroimaging-based diagnostic classifiers into clinical practice.}, + author = {Chen, Zhiyi and Hu, Bowen and Liu, Xuerong and Becker, Benjamin and Eickhoff, Simon B. and Miao, Kuan and Gu, Xingmei and Tang, Yancheng and Dai, Xin and Li, Chao and Leonov, Artemiy and Xiao, Zhibing and Feng, Zhengzhi and Chen, Ji and Chuan-Peng, Hu}, + doi = {10.1186/s12916-023-02941-4}, + file = {Chen et al. - 2023 - Sampling inequalities affect generalization of neu.pdf:/home/alpron/Zotero/storage/YTDHFA4C/Chen et al. - 2023 - Sampling inequalities affect generalization of neu.pdf:application/pdf}, + issn = {1741-7015}, + journal = {BMC Medicine}, + language = {en}, + month = {July}, + number = {1}, + pages = {241}, + title = {Sampling inequalities affect generalization of neuroimaging-based diagnostic classifiers in psychiatry}, + url = {https://bmcmedicine.biomedcentral.com/articles/10.1186/s12916-023-02941-4}, + urldate = {2024-10-21}, + volume = {21}, + year = {2023} +} diff --git a/content/publication/chen-sampling-2023/index.md b/content/publication/chen-sampling-2023/index.md new file mode 100644 index 0000000..06698e5 --- /dev/null +++ b/content/publication/chen-sampling-2023/index.md @@ -0,0 +1,58 @@ +--- +title: Sampling inequalities affect generalization of neuroimaging-based diagnostic + classifiers in psychiatry +authors: +- Zhiyi Chen +- Bowen Hu +- Xuerong Liu +- Benjamin Becker +- Simon B. Eickhoff +- Kuan Miao +- Xingmei Gu +- Yancheng Tang +- Xin Dai +- Chao Li +- Artemiy Leonov +- Zhibing Xiao +- Zhengzhi Feng +- Ji Chen +- Hu Chuan-Peng +date: '2023-07-01' +publishDate: '2024-10-25T13:25:50.233832Z' +publication_types: +- article-journal +publication: '*BMC Medicine*' +doi: 10.1186/s12916-023-02941-4 +abstract: 'Abstract Background The development of machine learning models for aiding + in the diagnosis of mental disorder is recognized as a significant breakthrough + in the field of psychiatry. However, clinical practice of such models remains a + challenge, with poor generalizability being a major limitation. Methods Here, + we conducted a pre-registered meta-research assessment on neuroimaging-based models + in the psychiatric literature, quantitatively examining global and regional sampling + issues over recent decades, from a view that has been relatively underexplored. + A total of 476 studies ( n = 118,137) were included in the current assessment. Based + on these findings, we built a comprehensive 5-star rating system to quantitatively + evaluate the quality of existing machine learning models for psychiatric diagnoses. Results A + global sampling inequality in these models was revealed quantitatively (sampling + Gini coefficient ( G ) = 0.81, p textless .01), varying across different countries + (regions) (e.g., China, G = 0.47; the USA, G = 0.58; Germany, G = 0.78; the UK, + G = 0.87). Furthermore, the severity of this sampling inequality was significantly + predicted by national economic levels ( β =  − 2.75, p textless .001, R 2 adj = 0.40; + r =  − .84, 95% CI: − .41 to − .97), and was plausibly predictable for model performance, + with higher sampling inequality for reporting higher classification accuracy. Further + analyses showed that lack of independent testing (84.24% of models, 95% CI: 81.0–87.5%), + improper cross-validation (51.68% of models, 95% CI: 47.2–56.2%), and poor technical + transparency (87.8% of models, 95% CI: 84.9–90.8%)/availability (80.88% of models, + 95% CI: 77.3–84.4%) are prevailing in current diagnostic classifiers despite improvements + over time. Relating to these observations, model performances were found decreased + in studies with independent cross-country sampling validations (all p textless .001, + BF 10 textgreater 15). In light of this, we proposed a purpose-built quantitative + assessment checklist, which demonstrated that the overall ratings of these models + increased by publication year but were negatively associated with model performance. Conclusions + Together, improving sampling economic equality and hence the quality of machine + learning models may be a crucial facet to plausibly translating neuroimaging-based + diagnostic classifiers into clinical practice.' +links: +- name: URL + url: https://bmcmedicine.biomedcentral.com/articles/10.1186/s12916-023-02941-4 +--- diff --git a/content/publication/christodoulou-confidence-2024/cite.bib b/content/publication/christodoulou-confidence-2024/cite.bib new file mode 100644 index 0000000..9d7e17b --- /dev/null +++ b/content/publication/christodoulou-confidence-2024/cite.bib @@ -0,0 +1,15 @@ +@misc{christodoulou_confidence_2024, + abstract = {Medical imaging is spearheading the AI transformation of healthcare. Performance reporting is key to determine which methods should be translated into clinical practice. Frequently, broad conclusions are simply derived from mean performance values. In this paper, we argue that this common practice is often a misleading simplification as it ignores performance variability. Our contribution is threefold. (1) Analyzing all MICCAI segmentation papers (n = 221) published in 2023, we first observe that more than 50% of papers do not assess performance variability at all. Moreover, only one (0.5%) paper reported confidence intervals (CIs) for model performance. (2) To address the reporting bottleneck, we show that the unreported standard deviation (SD) in segmentation papers can be approximated by a second-order polynomial function of the mean Dice similarity coefficient (DSC). Based on external validation data from 56 previous MICCAI challenges, we demonstrate that this approximation can accurately reconstruct the CI of a method using information provided in publications. (3) Finally, we reconstructed 95% CIs around the mean DSC of MICCAI 2023 segmentation papers. The median CI width was 0.03 which is three times larger than the median performance gap between the first and second ranked method. For more than 60% of papers, the mean performance of the second-ranked method was within the CI of the first-ranked method. We conclude that current publications typically do not provide sufficient evidence to support which models could potentially be translated into clinical practice.}, + author = {Christodoulou, Evangelia and Reinke, Annika and Houhou, Rola and Kalinowski, Piotr and Erkan, Selen and Sudre, Carole H. and Burgos, Ninon and Boutaj, Sofiène and Loizillon, Sophie and Solal, Maëlys and Rieke, Nicola and Cheplygina, Veronika and Antonelli, Michela and Mayer, Leon D. and Tizabi, Minu D. and Cardoso, M. Jorge and Simpson, Amber and Jäger, Paul F. and Kopp-Schneider, Annette and Varoquaux, Gaël and Colliot, Olivier and Maier-Hein, Lena}, + file = {Christodoulou et al. - 2024 - Confidence intervals uncovered Are we ready for r.pdf:/home/alpron/Zotero/storage/DRN46WQP/Christodoulou et al. - 2024 - Confidence intervals uncovered Are we ready for r.pdf:application/pdf}, + keywords = {Computer Science - Artificial Intelligence, Computer Science - Computer Vision and Pattern Recognition, Computer Science - Machine Learning}, + language = {en}, + month = {September}, + note = {arXiv:2409.17763 [cs]}, + publisher = {arXiv}, + shorttitle = {Confidence intervals uncovered}, + title = {Confidence intervals uncovered: Are we ready for real-world medical imaging AI?}, + url = {http://arxiv.org/abs/2409.17763}, + urldate = {2024-10-21}, + year = {2024} +} diff --git a/content/publication/christodoulou-confidence-2024/index.md b/content/publication/christodoulou-confidence-2024/index.md new file mode 100644 index 0000000..3bc0c66 --- /dev/null +++ b/content/publication/christodoulou-confidence-2024/index.md @@ -0,0 +1,58 @@ +--- +title: 'Confidence intervals uncovered: Are we ready for real-world medical imaging + AI?' +authors: +- Evangelia Christodoulou +- Annika Reinke +- Rola Houhou +- Piotr Kalinowski +- Selen Erkan +- Carole H. Sudre +- Ninon Burgos +- Sofiène Boutaj +- Sophie Loizillon +- Maëlys Solal +- Nicola Rieke +- Veronika Cheplygina +- Michela Antonelli +- Leon D. Mayer +- Minu D. Tizabi +- M. Jorge Cardoso +- Amber Simpson +- Paul F. Jäger +- Annette Kopp-Schneider +- Gaël Varoquaux +- Olivier Colliot +- Lena Maier-Hein +date: '2024-09-01' +publishDate: '2024-10-25T13:25:50.225704Z' +publication_types: +- manuscript +publication: '*arXiv*' +abstract: Medical imaging is spearheading the AI transformation of healthcare. Performance + reporting is key to determine which methods should be translated into clinical practice. + Frequently, broad conclusions are simply derived from mean performance values. In + this paper, we argue that this common practice is often a misleading simplification + as it ignores performance variability. Our contribution is threefold. (1) Analyzing + all MICCAI segmentation papers (n = 221) published in 2023, we first observe that + more than 50% of papers do not assess performance variability at all. Moreover, + only one (0.5%) paper reported confidence intervals (CIs) for model performance. + (2) To address the reporting bottleneck, we show that the unreported standard deviation + (SD) in segmentation papers can be approximated by a second-order polynomial function + of the mean Dice similarity coefficient (DSC). Based on external validation data + from 56 previous MICCAI challenges, we demonstrate that this approximation can accurately + reconstruct the CI of a method using information provided in publications. (3) Finally, + we reconstructed 95% CIs around the mean DSC of MICCAI 2023 segmentation papers. + The median CI width was 0.03 which is three times larger than the median performance + gap between the first and second ranked method. For more than 60% of papers, the + mean performance of the second-ranked method was within the CI of the first-ranked + method. We conclude that current publications typically do not provide sufficient + evidence to support which models could potentially be translated into clinical practice. +tags: +- Computer Science - Artificial Intelligence +- Computer Science - Computer Vision and Pattern Recognition +- Computer Science - Machine Learning +links: +- name: URL + url: http://arxiv.org/abs/2409.17763 +--- diff --git a/content/publication/cokelaer-reprohackathons-2023/cite.bib b/content/publication/cokelaer-reprohackathons-2023/cite.bib new file mode 100644 index 0000000..af254d6 --- /dev/null +++ b/content/publication/cokelaer-reprohackathons-2023/cite.bib @@ -0,0 +1,18 @@ +@article{cokelaer_reprohackathons_2023, + abstract = {The reproducibility crisis has highlighted the importance of improving the way bioinformatics data analyses are implemented, executed, and shared. To address this, various tools such as content versioning systems, workflow management systems, and software environment management systems have been developed. While these tools are becoming more widely used, there is still much work to be done to increase their adoption. The most effective way to ensure reproducibility becomes a standard part of most bioinformatics data analysis projects is to integrate it into the curriculum of bioinformatics Master’s programs.In this article, we present the Reprohackathon, a Master’s course that we have been running for the last 3 years at Université Paris-Saclay (France), and that has been attended by a total of 123 students. The course is divided into two parts. The first part includes lessons on the challenges related to reproducibility, content versioning systems, container management, and workflow systems. In the second part, students work on a data analysis project for 3–4 months, reanalyzing data from a previously published study. The Reprohackaton has taught us many valuable lessons, such as the fact that implementing reproducible analyses is a complex and challenging task that requires significant effort. However, providing in-depth teaching of the concepts and the tools during a Master’s degree program greatly improves students’ understanding and abilities in this area.}, + author = {Cokelaer, Thomas and Cohen-Boulakia, Sarah and Lemoine, Frédéric}, + doi = {10.1093/bioinformatics/btad227}, + file = {Full Text PDF:/home/alpron/Zotero/storage/XGMI5F5J/Cokelaer et al. - 2023 - Reprohackathons promoting reproducibility in bioi.pdf:application/pdf;Snapshot:/home/alpron/Zotero/storage/5USUN9I3/7210451.html:text/html}, + issn = {1367-4811}, + journal = {Bioinformatics}, + keywords = {hackathon, training}, + month = {June}, + number = {Supplement_1}, + pages = {i11--i20}, + shorttitle = {Reprohackathons}, + title = {Reprohackathons: promoting reproducibility in bioinformatics through training}, + url = {https://doi.org/10.1093/bioinformatics/btad227}, + urldate = {2024-10-01}, + volume = {39}, + year = {2023} +} diff --git a/content/publication/cokelaer-reprohackathons-2023/index.md b/content/publication/cokelaer-reprohackathons-2023/index.md new file mode 100644 index 0000000..7d82884 --- /dev/null +++ b/content/publication/cokelaer-reprohackathons-2023/index.md @@ -0,0 +1,37 @@ +--- +title: 'Reprohackathons: promoting reproducibility in bioinformatics through training' +authors: +- Thomas Cokelaer +- Sarah Cohen-Boulakia +- Frédéric Lemoine +date: '2023-06-01' +publishDate: '2024-10-25T13:25:50.211524Z' +publication_types: +- article-journal +publication: '*Bioinformatics*' +doi: 10.1093/bioinformatics/btad227 +abstract: The reproducibility crisis has highlighted the importance of improving the + way bioinformatics data analyses are implemented, executed, and shared. To address + this, various tools such as content versioning systems, workflow management systems, + and software environment management systems have been developed. While these tools + are becoming more widely used, there is still much work to be done to increase their + adoption. The most effective way to ensure reproducibility becomes a standard part + of most bioinformatics data analysis projects is to integrate it into the curriculum + of bioinformatics Master’s programs.In this article, we present the Reprohackathon, + a Master’s course that we have been running for the last 3 years at Université Paris-Saclay + (France), and that has been attended by a total of 123 students. The course is divided + into two parts. The first part includes lessons on the challenges related to reproducibility, + content versioning systems, container management, and workflow systems. In the second + part, students work on a data analysis project for 3–4 months, reanalyzing data + from a previously published study. The Reprohackaton has taught us many valuable + lessons, such as the fact that implementing reproducible analyses is a complex and + challenging task that requires significant effort. However, providing in-depth teaching + of the concepts and the tools during a Master’s degree program greatly improves + students’ understanding and abilities in this area. +tags: +- hackathon +- training +links: +- name: URL + url: https://doi.org/10.1093/bioinformatics/btad227 +--- diff --git a/content/publication/cosmo-software-2023/cite.bib b/content/publication/cosmo-software-2023/cite.bib new file mode 100644 index 0000000..79e4447 --- /dev/null +++ b/content/publication/cosmo-software-2023/cite.bib @@ -0,0 +1,18 @@ +@incollection{cosmo_software_2023, + abstract = {Software Heritage is the largest public archive of software source code and associated development history, as captured by modern version control systems. As of July 2023, it has archived more than 16 billion unique source code files coming from more than 250 million collaborative development projects. In this chapter, we describe the Software Heritage ecosystem, focusing on research and open science use cases.}, + address = {Cham}, + author = {Cosmo, Roberto Di and Zacchiroli, Stefano}, + booktitle = {Software Ecosystems: Tooling and Analytics}, + doi = {10.1007/978-3-031-36060-2_2}, + editor = {Mens, Tom and De Roover, Coen and Cleve, Anthony}, + file = {Cosmo and Zacchiroli - 2023 - The Software Heritage Open Science Ecosystem.pdf:/home/alpron/Zotero/storage/X9QREIGC/Cosmo and Zacchiroli - 2023 - The Software Heritage Open Science Ecosystem.pdf:application/pdf}, + isbn = {978-3-031-36060-2}, + keywords = {Reproducibility, FAIR, open scienc, swh, swh data model, swh features}, + language = {en}, + pages = {33--61}, + publisher = {Springer International Publishing}, + title = {The Software Heritage Open Science Ecosystem}, + url = {https://doi.org/10.1007/978-3-031-36060-2_2}, + urldate = {2024-07-12}, + year = {2023} +} diff --git a/content/publication/cosmo-software-2023/index.md b/content/publication/cosmo-software-2023/index.md new file mode 100644 index 0000000..086285e --- /dev/null +++ b/content/publication/cosmo-software-2023/index.md @@ -0,0 +1,28 @@ +--- +title: The Software Heritage Open Science Ecosystem +authors: +- Roberto Di Cosmo +- Stefano Zacchiroli +date: '2023-01-01' +publishDate: '2024-10-25T13:25:50.260016Z' +publication_types: +- chapter +publication: '*Software Ecosystems: Tooling and Analytics*' +doi: 10.1007/978-3-031-36060-2_2 +abstract: Software Heritage is the largest public archive of software source code + and associated development history, as captured by modern version control systems. + As of July 2023, it has archived more than 16 billion unique source code files coming + from more than 250 million collaborative development projects. In this chapter, + we describe the Software Heritage ecosystem, focusing on research and open science + use cases. +tags: +- Reproducibility +- FAIR +- open scienc +- swh +- swh data model +- swh features +links: +- name: URL + url: https://doi.org/10.1007/978-3-031-36060-2_2 +--- diff --git a/content/publication/marelli-scrutinizing-2018/cite.bib b/content/publication/marelli-scrutinizing-2018/cite.bib new file mode 100644 index 0000000..dd64e4f --- /dev/null +++ b/content/publication/marelli-scrutinizing-2018/cite.bib @@ -0,0 +1,23 @@ +@article{marelli_scrutinizing_2018, + abstract = {How will new decentralized governance impact research? +, + +On 25 May 2018, the European Union (EU) regulation 2016/679 on data protection, also known as the General Data Protection Regulation (GDPR), will take effect. The GDPR, which repeals previous European legislation on data protection (Directive 95/46/EC) ( +1 +), is bound to have major effects on biomedical research and digital health technologies, in Europe and beyond, given the global reach of EU-based research and the prominence of international research networks requiring interoperability of standards. Here we describe ways in which the GDPR will become a critical tool to structure flexible governance for data protection. As a timely forecast for its potential impact, we analyze the implications of the GDPR in an ongoing paradigmatic legal controversy involving the database originally assembled by one of the world's first genomic biobanks, Shardna.}, + author = {Marelli, Luca and Testa, Giuseppe}, + copyright = {http://www.sciencemag.org/about/science-licenses-journal-article-reuse}, + doi = {10.1126/science.aar5419}, + file = {Submitted Version:/home/alpron/Zotero/storage/LIQDLUX2/Marelli and Testa - 2018 - Scrutinizing the EU General Data Protection Regula.pdf:application/pdf}, + issn = {0036-8075, 1095-9203}, + journal = {Science}, + language = {en}, + month = {May}, + number = {6388}, + pages = {496--498}, + title = {Scrutinizing the EU General Data Protection Regulation}, + url = {https://www.science.org/doi/10.1126/science.aar5419}, + urldate = {2024-10-22}, + volume = {360}, + year = {2018} +} diff --git a/content/publication/marelli-scrutinizing-2018/index.md b/content/publication/marelli-scrutinizing-2018/index.md new file mode 100644 index 0000000..a3e10b1 --- /dev/null +++ b/content/publication/marelli-scrutinizing-2018/index.md @@ -0,0 +1,27 @@ +--- +title: Scrutinizing the EU General Data Protection Regulation +authors: +- Luca Marelli +- Giuseppe Testa +date: '2018-05-01' +publishDate: '2024-10-25T13:25:50.242277Z' +publication_types: +- article-journal +publication: '*Science*' +doi: 10.1126/science.aar5419 +abstract: How will new decentralized governance impact research? , On 25 May 2018, + the European Union (EU) regulation 2016/679 on data protection, also known as the + General Data Protection Regulation (GDPR), will take effect. The GDPR, which repeals + previous European legislation on data protection (Directive 95/46/EC) ( 1 ), is + bound to have major effects on biomedical research and digital health technologies, + in Europe and beyond, given the global reach of EU-based research and the prominence + of international research networks requiring interoperability of standards. Here + we describe ways in which the GDPR will become a critical tool to structure flexible + governance for data protection. As a timely forecast for its potential impact, we + analyze the implications of the GDPR in an ongoing paradigmatic legal controversy + involving the database originally assembled by one of the world's first genomic + biobanks, Shardna. +links: +- name: URL + url: https://www.science.org/doi/10.1126/science.aar5419 +--- diff --git a/content/publication/peng-heterogenous-2024/cite.bib b/content/publication/peng-heterogenous-2024/cite.bib new file mode 100644 index 0000000..be4625c --- /dev/null +++ b/content/publication/peng-heterogenous-2024/cite.bib @@ -0,0 +1,17 @@ +@article{peng_heterogenous_2024, + author = {Peng, Shaoling and Cui, Zaixu and Zhong, Suyu and Zhang, Yanyang and Cohen, Alexander L. and Fox, Michael D. and Gong, Gaolang}, + doi = {10.1038/s42003-024-06969-x}, + file = {Peng et al. - 2024 - Heterogenous brain activations across individuals .pdf:/home/alpron/Zotero/storage/7PH942WX/Peng et al. - 2024 - Heterogenous brain activations across individuals .pdf:application/pdf}, + issn = {2399-3642}, + journal = {Communications Biology}, + keywords = {Important, to_read}, + language = {en}, + month = {October}, + number = {1}, + pages = {1270}, + title = {Heterogenous brain activations across individuals localize to a common network}, + url = {https://www.nature.com/articles/s42003-024-06969-x}, + urldate = {2024-10-21}, + volume = {7}, + year = {2024} +} diff --git a/content/publication/peng-heterogenous-2024/index.md b/content/publication/peng-heterogenous-2024/index.md new file mode 100644 index 0000000..9ac59f3 --- /dev/null +++ b/content/publication/peng-heterogenous-2024/index.md @@ -0,0 +1,23 @@ +--- +title: Heterogenous brain activations across individuals localize to a common network +authors: +- Shaoling Peng +- Zaixu Cui +- Suyu Zhong +- Yanyang Zhang +- Alexander L. Cohen +- Michael D. Fox +- Gaolang Gong +date: '2024-10-01' +publishDate: '2024-10-25T13:25:50.219157Z' +publication_types: +- article-journal +publication: '*Communications Biology*' +doi: 10.1038/s42003-024-06969-x +tags: +- Important +- to_read +links: +- name: URL + url: https://www.nature.com/articles/s42003-024-06969-x +--- diff --git a/content/publication/poldrack-past-2024/cite.bib b/content/publication/poldrack-past-2024/cite.bib new file mode 100644 index 0000000..1304df5 --- /dev/null +++ b/content/publication/poldrack-past-2024/cite.bib @@ -0,0 +1,18 @@ +@article{poldrack_past_2024, + abstract = {Abstract +The Brain Imaging Data Structure (BIDS) is a community-driven standard for the organization of data and metadata from a growing range of neuroscience modalities. This paper is meant as a history of how the standard has developed and grown over time. We outline the principles behind the project, the mechanisms by which it has been extended, and some of the challenges being addressed as it evolves. We also discuss the lessons learned through the project, with the aim of enabling researchers in other domains to learn from the success of BIDS.}, + author = {Poldrack, Russell A. and Markiewicz, Christopher J. and Appelhoff, Stefan and Ashar, Yoni K. and Auer, Tibor and Baillet, Sylvain and Bansal, Shashank and Beltrachini, Leandro and Benar, Christian G. and Bertazzoli, Giacomo and Bhogawar, Suyash and Blair, Ross W. and Bortoletto, Marta and Boudreau, Mathieu and Brooks, Teon L. and Calhoun, Vince D. and Castelli, Filippo Maria and Clement, Patricia and Cohen, Alexander L. and Cohen-Adad, Julien and D’Ambrosio, Sasha and De Hollander, Gilles and De La Iglesia-Vayá, María and De La Vega, Alejandro and Delorme, Arnaud and Devinsky, Orrin and Draschkow, Dejan and Duff, Eugene Paul and DuPre, Elizabeth and Earl, Eric and Esteban, Oscar and Feingold, Franklin W. and Flandin, Guillaume and Galassi, Anthony and Gallitto, Giuseppe and Ganz, Melanie and Gau, Rémi and Gholam, James and Ghosh, Satrajit S. and Giacomel, Alessio and Gillman, Ashley G. and Gleeson, Padraig and Gramfort, Alexandre and Guay, Samuel and Guidali, Giacomo and Halchenko, Yaroslav O. and Handwerker, Daniel A. and Hardcastle, Nell and Herholz, Peer and Hermes, Dora and Honey, Christopher J. and Innis, Robert B. and Ioanas, Horea-Ioan and Jahn, Andrew and Karakuzu, Agah and Keator, David B. and Kiar, Gregory and Kincses, Balint and Laird, Angela R. and Lau, Jonathan C. and Lazari, Alberto and Legarreta, Jon Haitz and Li, Adam and Li, Xiangrui and Love, Bradley C. and Lu, Hanzhang and Marcantoni, Eleonora and Maumet, Camille and Mazzamuto, Giacomo and Meisler, Steven L. and Mikkelsen, Mark and Mutsaerts, Henk and Nichols, Thomas E. and Nikolaidis, Aki and Nilsonne, Gustav and Niso, Guiomar and Norgaard, Martin and Okell, Thomas W. and Oostenveld, Robert and Ort, Eduard and Park, Patrick J. and Pawlik, Mateusz and Pernet, Cyril R. and Pestilli, Franco and Petr, Jan and Phillips, Christophe and Poline, Jean-Baptiste and Pollonini, Luca and Raamana, Pradeep Reddy and Ritter, Petra and Rizzo, Gaia and Robbins, Kay A. and Rockhill, Alexander P. and Rogers, Christine and Rokem, Ariel and Rorden, Chris and Routier, Alexandre and Saborit-Torres, Jose Manuel and Salo, Taylor and Schirner, Michael and Smith, Robert E. and Spisak, Tamas and Sprenger, Julia and Swann, Nicole C. and Szinte, Martin and Takerkart, Sylvain and Thirion, Bertrand and Thomas, Adam G. and Torabian, Sajjad and Varoquaux, Gael and Voytek, Bradley and Welzel, Julius and Wilson, Martin and Yarkoni, Tal and Gorgolewski, Krzysztof J.}, + doi = {10.1162/imag_a_00103}, + file = {Poldrack et al. - 2024 - The past, present, and future of the brain imaging.pdf:/home/alpron/Zotero/storage/QCTVGBUZ/Poldrack et al. - 2024 - The past, present, and future of the brain imaging.pdf:application/pdf}, + issn = {2837-6056}, + journal = {Imaging Neuroscience}, + keywords = {Important, Review, BIDS, read}, + language = {en}, + month = {March}, + pages = {1--19}, + title = {The past, present, and future of the brain imaging data structure (BIDS)}, + url = {https://direct.mit.edu/imag/article/doi/10.1162/imag_a_00103/119672/The-past-present-and-future-of-the-brain-imaging}, + urldate = {2024-10-24}, + volume = {2}, + year = {2024} +} diff --git a/content/publication/poldrack-past-2024/index.md b/content/publication/poldrack-past-2024/index.md new file mode 100644 index 0000000..054295f --- /dev/null +++ b/content/publication/poldrack-past-2024/index.md @@ -0,0 +1,141 @@ +--- +title: The past, present, and future of the brain imaging data structure (BIDS) +authors: +- Russell A. Poldrack +- Christopher J. Markiewicz +- Stefan Appelhoff +- Yoni K. Ashar +- Tibor Auer +- Sylvain Baillet +- Shashank Bansal +- Leandro Beltrachini +- Christian G. Benar +- Giacomo Bertazzoli +- Suyash Bhogawar +- Ross W. Blair +- Marta Bortoletto +- Mathieu Boudreau +- Teon L. Brooks +- Vince D. Calhoun +- Filippo Maria Castelli +- Patricia Clement +- Alexander L. Cohen +- Julien Cohen-Adad +- Sasha D’Ambrosio +- Gilles De Hollander +- María De La Iglesia-Vayá +- Alejandro De La Vega +- Arnaud Delorme +- Orrin Devinsky +- Dejan Draschkow +- Eugene Paul Duff +- Elizabeth DuPre +- Eric Earl +- Oscar Esteban +- Franklin W. Feingold +- Guillaume Flandin +- Anthony Galassi +- Giuseppe Gallitto +- Melanie Ganz +- Rémi Gau +- James Gholam +- Satrajit S. Ghosh +- Alessio Giacomel +- Ashley G. Gillman +- Padraig Gleeson +- Alexandre Gramfort +- Samuel Guay +- Giacomo Guidali +- Yaroslav O. Halchenko +- Daniel A. Handwerker +- Nell Hardcastle +- Peer Herholz +- Dora Hermes +- Christopher J. Honey +- Robert B. Innis +- Horea-Ioan Ioanas +- Andrew Jahn +- Agah Karakuzu +- David B. Keator +- Gregory Kiar +- Balint Kincses +- Angela R. Laird +- Jonathan C. Lau +- Alberto Lazari +- Jon Haitz Legarreta +- Adam Li +- Xiangrui Li +- Bradley C. Love +- Hanzhang Lu +- Eleonora Marcantoni +- Camille Maumet +- Giacomo Mazzamuto +- Steven L. Meisler +- Mark Mikkelsen +- Henk Mutsaerts +- Thomas E. Nichols +- Aki Nikolaidis +- Gustav Nilsonne +- Guiomar Niso +- Martin Norgaard +- Thomas W. Okell +- Robert Oostenveld +- Eduard Ort +- Patrick J. Park +- Mateusz Pawlik +- Cyril R. Pernet +- Franco Pestilli +- Jan Petr +- Christophe Phillips +- Jean-Baptiste Poline +- Luca Pollonini +- Pradeep Reddy Raamana +- Petra Ritter +- Gaia Rizzo +- Kay A. Robbins +- Alexander P. Rockhill +- Christine Rogers +- Ariel Rokem +- Chris Rorden +- Alexandre Routier +- Jose Manuel Saborit-Torres +- Taylor Salo +- Michael Schirner +- Robert E. Smith +- Tamas Spisak +- Julia Sprenger +- Nicole C. Swann +- Martin Szinte +- Sylvain Takerkart +- Bertrand Thirion +- Adam G. Thomas +- Sajjad Torabian +- Gael Varoquaux +- Bradley Voytek +- Julius Welzel +- Martin Wilson +- Tal Yarkoni +- Krzysztof J. Gorgolewski +date: '2024-03-01' +publishDate: '2024-10-25T13:25:50.249074Z' +publication_types: +- article-journal +publication: '*Imaging Neuroscience*' +doi: 10.1162/imag_a_00103 +abstract: Abstract The Brain Imaging Data Structure (BIDS) is a community-driven standard + for the organization of data and metadata from a growing range of neuroscience modalities. + This paper is meant as a history of how the standard has developed and grown over + time. We outline the principles behind the project, the mechanisms by which it has + been extended, and some of the challenges being addressed as it evolves. We also + discuss the lessons learned through the project, with the aim of enabling researchers + in other domains to learn from the success of BIDS. +tags: +- Important +- Review +- BIDS +- read +links: +- name: URL + url: + https://direct.mit.edu/imag/article/doi/10.1162/imag_a_00103/119672/The-past-present-and-future-of-the-brain-imaging +--- diff --git a/content/publication/varoquaux-hype-2024/cite.bib b/content/publication/varoquaux-hype-2024/cite.bib new file mode 100644 index 0000000..686e167 --- /dev/null +++ b/content/publication/varoquaux-hype-2024/cite.bib @@ -0,0 +1,14 @@ +@misc{varoquaux_hype_2024, + abstract = {With the growing attention and investment in recent AI approaches such as large language models, the narrative that the larger the AI system the more valuable, powerful and interesting it is is increasingly seen as common sense. But what is this assumption based on, and how are we measuring value, power, and performance? And what are the collateral consequences of this race to ever-increasing scale? Here, we scrutinize the current scaling trends and trade-offs across multiple axes and refute two common assumptions underlying the 'bigger-is-better' AI paradigm: 1) that improved performance is a product of increased scale, and 2) that all interesting problems addressed by AI require large-scale models. Rather, we argue that this approach is not only fragile scientifically, but comes with undesirable consequences. First, it is not sustainable, as its compute demands increase faster than model performance, leading to unreasonable economic requirements and a disproportionate environmental footprint. Second, it implies focusing on certain problems at the expense of others, leaving aside important applications, e.g. health, education, or the climate. Finally, it exacerbates a concentration of power, which centralizes decision-making in the hands of a few actors while threatening to disempower others in the context of shaping both AI research and its applications throughout society.}, + author = {Varoquaux, Gaël and Luccioni, Alexandra Sasha and Whittaker, Meredith}, + doi = {10.48550/arXiv.2409.14160}, + file = {arXiv Fulltext PDF:/home/alpron/Zotero/storage/9WFZRACE/Varoquaux et al. - 2024 - Hype, Sustainability, and the Price of the Bigger-.pdf:application/pdf}, + keywords = {AI ethics, big, Computer Science - Computers and Society}, + month = {September}, + note = {arXiv:2409.14160 [cs]}, + publisher = {arXiv}, + title = {Hype, Sustainability, and the Price of the Bigger-is-Better Paradigm in AI}, + url = {http://arxiv.org/abs/2409.14160}, + urldate = {2024-10-01}, + year = {2024} +} diff --git a/content/publication/varoquaux-hype-2024/index.md b/content/publication/varoquaux-hype-2024/index.md new file mode 100644 index 0000000..f144ff0 --- /dev/null +++ b/content/publication/varoquaux-hype-2024/index.md @@ -0,0 +1,37 @@ +--- +title: Hype, Sustainability, and the Price of the Bigger-is-Better Paradigm in AI +authors: +- Gaël Varoquaux +- Alexandra Sasha Luccioni +- Meredith Whittaker +date: '2024-09-01' +publishDate: '2024-10-25T13:25:50.200758Z' +publication_types: +- manuscript +publication: '*arXiv*' +doi: 10.48550/arXiv.2409.14160 +abstract: "With the growing attention and investment in recent AI approaches such + as large language models, the narrative that the larger the AI system the more valuable, + powerful and interesting it is is increasingly seen as common sense. But what is + this assumption based on, and how are we measuring value, power, and performance? + And what are the collateral consequences of this race to ever-increasing scale? + Here, we scrutinize the current scaling trends and trade-offs across multiple axes + and refute two common assumptions underlying the 'bigger-is-better' AI paradigm: + 1) that improved performance is a product of increased scale, and 2) that all interesting + problems addressed by AI require large-scale models. Rather, we argue that this + approach is not only fragile scientifically, but comes with undesirable consequences. + First, it is not sustainable, as its compute demands increase faster than model + performance, leading to unreasonable economic requirements and a disproportionate + environmental footprint. Second, it implies focusing on certain problems at the + expense of others, leaving aside important applications, e.g. health, education, + or the climate. Finally, it exacerbates a concentration of power, which centralizes + decision-making in the hands of a few actors while threatening to disempower others + in the context of shaping both AI research and its applications throughout society." +tags: +- AI ethics +- big +- Computer Science - Computers and Society +links: +- name: URL + url: http://arxiv.org/abs/2409.14160 +---