Skip to content

Commit

Permalink
Merge pull request #20 from chhoumann/related-work
Browse files Browse the repository at this point in the history
Related work
  • Loading branch information
Ivikhostrup authored Nov 14, 2023
2 parents a7b44a7 + ae40495 commit cfa12f9
Show file tree
Hide file tree
Showing 4 changed files with 160 additions and 16 deletions.
2 changes: 1 addition & 1 deletion report_pre_thesis/src/_preamble.tex
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
\documentclass[acmtog]{acmart}
\usepackage{natbib}

\usepackage{todonotes}

\title{Identifying Limitations in the ChemCam Multivariate Oxide Composition Model for Elemental Quantification in Martian Geological Samples}
\author{Christian Bager Bach Houmann}
Expand Down
118 changes: 117 additions & 1 deletion report_pre_thesis/src/references.bib
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,107 @@ @article{knight2000
langid = {english},
}


@article{castorena_deep_2021,
title = {Deep spectral CNN for laser induced breakdown spectroscopy},
volume = {178},
issn = {05848547},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0584854721000720},
doi = {10.1016/j.sab.2021.106125},
abstract = {This work proposes a spectral convolutional neural network (CNN) operating on laser induced breakdown spectroscopy (LIBS) signals to learn to (1) disentangle spectral signals from the sources of sensor uncertainty (i.e., pre-process) and (2) get qualitative and quantitative measures of chemical content of a sample given a spectral signal (i.e., calibrate). Once the spectral {CNN} is trained, it can accomplish either task through a single feedforward pass, with real-time benefits and without any additional side information requirements including dark current, system response, temperature and detector-to-target range. Our experiments demonstrate that the proposed method outperforms the existing approaches used by the Mars Science Lab for pre-processing and calibration for remote sensing observations from the Mars rover, 'Curiosity'.},
pages = {106125},
journaltitle = {Spectrochimica Acta Part B: Atomic Spectroscopy},
shortjournal = {Spectrochimica Acta Part B: Atomic Spectroscopy},
author = {Castorena, Juan and Oyen, Diane and Ollila, Ann and Legett, Carey and Lanza, Nina},
urldate = {2023-10-30},
date = {2021-04},
langid = {english}
}


@article{bai_application_2023,
title = {Application of elastic net in quantitative analysis of major elements using Martian laser-induced breakdown spectroscopy datasets},
volume = {199},
issn = {05848547},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0584854722002312},
doi = {10.1016/j.sab.2022.106587},
abstract = {Multiple sets of laser-induced breakdown spectroscopy ({LIBS}) instruments, including {ChemCam} (Curiosity), {SuperCam} (Perseverance), and the Mars Surface Composition Detector ({MarSCoDe}, Zhurong), are working currently on the surface of Mars, a booming scene of {LIBS} technology application to planetary exploration. One of the primary challenges faced by those {LIBS} instruments is accurate and quantitative chemical composition determination of unexplored geological targets. Elastic net is a linear regression model that combines ridge regression and lasso model, which inherits the sparseness of lasso and the stability of ridge. In this work, we investigated the spectral features selected by elastic net model, the model performance and application to over 23,000 {LIBS} points. Selected features exhibit significant emission lines that are attributed to a certain single element and a few lines attributed to other related elements. These features enhance the interpretability of the model and weaken the matrix effect in {LIBS} quantitative analysis. In comparison, the results of elastic net are comparable and may be slightly better in some cases than other common linear regression models. For igneous calibration targets (norite, picrite, and shergottite), the predictions of elastic net are closer to the values measured in Earth laboratory than the final multivariate oxide composition values predicted by {ChemCam} team. Finally, we predicted the oxide abundances of 8 major elements from Mars {LIBS} spectra of over 23,000 points using elastic net. The predicted values of the elastic net models are highly correlated with those from the {ChemCam} team model. These results indicate that elastic net is viable for quantitative analysis Mars {LIBS} spectra. We propose that elastic net is an important candidate model for the quantitative analysis of the Zhurong Mar {SCoDe} {LIBS} spectra.},
pages = {106587},
journaltitle = {Spectrochimica Acta Part B: Atomic Spectroscopy},
shortjournal = {Spectrochimica Acta Part B: Atomic Spectroscopy},
author = {Bai, Hongchun and Liu, Ping and Fu, Xiaohui and Qiao, Le and Liu, Changqing and Xin, Yanqi and Ling, Zongcheng},
urldate = {2023-11-07},
date = {2023-01},
langid = {english}
}


@article{dyar_effect_2021,
title = {Effect of data set size on geochemical quantification accuracy with laser-induced breakdown spectroscopy},
volume = {177},
issn = {05848547},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0584854721000094},
doi = {10.1016/j.sab.2021.106073},
abstract = {Laser-induced breakdown spectroscopy ({LIBS}) data acquired from 2959 geochemical standards allow the effects of training set size on {LIBS} accuracy in geochemical analyses to be evaluated. In addition, {LIBS} prediction accuracies are quantified for 65 elements based on a typical benchtop instrument. Analyses used two equivalent randomly selected subsets of the full data set to compare prediction accuracies of partial least squares models using 75, 50, 25, 10, 5, 2.5, 1, and 0.5\% of the total data set for training and the remainder for testing. The number of components, a measure of complexity, in the {PLS} models was shown to increase with the size of the training set. Based on root mean square errors on unseen test data, our results show that the larger the training set, the better (lower) the prediction accuracy will be on unseen data. Calibration (training set) size was shown to have a first-order effect on prediction accuracy relative to spectral resolution and detector sensitivity. Different methods of assessing model accuracy using root mean square error ({RMSE}) are compared, including the error of the calibration ({RMSE}-C), the error of cross-validation ({RMSE}-{CV}), and the error of prediction ({RMSE}-P). Use of {RMSE}-C is inappropriate because the samples being predicted are those on which the model was trained. In data sets that are sufficiently large, use of test data ({RMSE}-P) provides the best measure of prediction accuracy, while {RMSE}-{CV} is useful only to provide an estimate of subsequent model performance. Increasing the number of crossvalidation folds for our large dataset yields surprisingly comparable {RMSE}-{CV} values for models with five or more (up to 100) folds, but this result is likely not applicable to smaller data sets and needs further evaluation.},
pages = {106073},
journaltitle = {Spectrochimica Acta Part B: Atomic Spectroscopy},
shortjournal = {Spectrochimica Acta Part B: Atomic Spectroscopy},
author = {Dyar, M. Darby and Ytsma, Cai R.},
urldate = {2023-11-07},
date = {2021-03},
langid = {english}
}


@article{lepore_quantitative_2022,
title = {Quantitative prediction accuracies derived from laser-induced breakdown spectra using optimized multivariate submodels},
volume = {191},
issn = {05848547},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0584854722000520},
doi = {10.1016/j.sab.2022.106408},
abstract = {The accuracy of laser-induced breakdown spectroscopy ({LIBS}) methods for analyzing geological samples is improved when calibration standards and unknown targets are compositionally similar. A recent study suggests that customized submodels can be used to optimize calibration datasets to achieve more accurate predictions [1]. In practice, this is difficult to implement because the errors inherent in the methods used for sorting unknown targets by composition may affect how successfully this matching can occur. Moreover, creation of submodels intrinsically reduces the size of the dataset on which the model is trained, which has been shown to reduce prediction accuracy. This paper uses {LIBS} spectra of 2990 unique rock powder standards to compare the accuracy of 1) submodels generated for each element over its geochemical range, 2) submodels created using {SiO}2 content only, 3) submodels created using the ratio of Si({II})/Si(I) emission lines to group spectra by a proxy for approximate plasma temperature, and 4) models created using all data. Results indicate that prediction accuracies are not always improved by creating submodels because subdividing a dataset to optimize calibrations will always result in a smaller database available for each submodel, and the reduced training set size negatively affects accuracy. Customized {LIBS} standards for specific applications might overcome this problem in cases where the matrix is similar and the expected concentration range is known. But in a majority of geochemical applications, submodel approaches are only useful in improving prediction accuracies when the initial database is itself extensive enough to support large, robust submodel calibration suites.},
pages = {106408},
journaltitle = {Spectrochimica Acta Part B: Atomic Spectroscopy},
shortjournal = {Spectrochimica Acta Part B: Atomic Spectroscopy},
author = {Lepore, Kate H. and Ytsma, Caroline R. and Dyar, M. Darby},
urldate = {2023-11-07},
date = {2022-05},
langid = {english}
}


@article{takahashi_quantitative_2017,
title = {Quantitative methods for compensation of matrix effects and self-absorption in Laser Induced Breakdown Spectroscopy signals of solids},
volume = {138},
issn = {05848547},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0584854716303299},
doi = {10.1016/j.sab.2017.09.010},
abstract = {This paper reviews methods to compensate for matrix effects and self-absorption during quantitative analysis of compositions of solids measured using Laser Induced Breakdown Spectroscopy ({LIBS}) and their applications to in-situ analysis. Methods to reduce matrix and self-absorption effects on calibration curves are first introduced. The conditions where calibration curves are applicable to quantification of compositions of solid samples and their limitations are discussed. While calibration-free {LIBS} ({CF}-{LIBS}), which corrects matrix effects theoretically based on the Boltzmann distribution law and Saha equation, has been applied in a number of studies, requirements need to be satisfied for the calculation of chemical compositions to be valid. Also, peaks of all elements contained in the target need to be detected, which is a bottleneck for in-situ analysis of unknown materials. Multivariate analysis techniques are gaining momentum in {LIBS} analysis. Among the available techniques, principal component regression ({PCR}) analysis and partial least squares ({PLS}) regression analysis, which can extract related information to compositions from all spectral data, are widely established methods and have been applied to various fields including in-situ applications in air and for planetary explorations. Artificial neural networks ({ANNs}), where non-linear effects can be modelled, have also been investigated as a quantitative method and their applications are introduced. The ability to make quantitative estimates based on {LIBS} signals is seen as a key element for the technique to gain wider acceptance as an analytical method, especially in in-situ applications. In order to accelerate this process, it is recommended that the accuracy should be described using common figures of merit which express the overall normalised accuracy, such as the normalised root mean square errors ({NRMSEs}), when comparing the accuracy obtained from different setups and analytical methods.},
pages = {31--42},
journaltitle = {Spectrochimica Acta Part B: Atomic Spectroscopy},
shortjournal = {Spectrochimica Acta Part B: Atomic Spectroscopy},
author = {Takahashi, Tomoko and Thornton, Blair},
urldate = {2023-11-07},
date = {2017-12},
langid = {english}
}

@inproceedings{chen_xgboost_2016,
location = {San Francisco California {USA}},
title = {{XGBoost}: A Scalable Tree Boosting System},
isbn = {978-1-4503-4232-2},
url = {https://dl.acm.org/doi/10.1145/2939672.2939785},
doi = {10.1145/2939672.2939785},
shorttitle = {{XGBoost}},
abstract = {Tree boosting is a highly effective and widely used machine learning method. In this paper, we describe a scalable endto-end tree boosting system called {XGBoost}, which is used widely by data scientists to achieve state-of-the-art results on many machine learning challenges. We propose a novel sparsity-aware algorithm for sparse data and weighted quantile sketch for approximate tree learning. More importantly, we provide insights on cache access patterns, data compression and sharding to build a scalable tree boosting system. By combining these insights, {XGBoost} scales beyond billions of examples using far fewer resources than existing systems.},
eventtitle = {{KDD} '16: The 22nd {ACM} {SIGKDD} International Conference on Knowledge Discovery and Data Mining},
pages = {785--794},
booktitle = {Proceedings of the 22nd {ACM} {SIGKDD} International Conference on Knowledge Discovery and Data Mining},
publisher = {{ACM}},
author = {Chen, Tianqi and Guestrin, Carlos},
urldate = {2023-11-13},
date = {2016-08-13},
langid = {english}
@article{forniIndependentComponentAnalysis2013,
title = {Independent Component Analysis Classification of Laser Induced Breakdown Spectroscopy Spectra},
author = {Forni, Olivier and Maurice, Sylvestre and Gasnault, Olivier and Wiens, Roger C. and Cousin, Agnès and Clegg, Samuel M. and Sirven, Jean-Baptiste and Lasue, Jérémie},
Expand Down Expand Up @@ -134,5 +235,20 @@ @article{huang_progress_2023
urldate = {2023-11-13},
date = {2023-07},
langid = {english},
keywords = {📚}
}
@article{hu_review_2022,
title = {A review of calibration-free laser-induced breakdown spectroscopy},
volume = {152},
issn = {01659936},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0165993622001017},
doi = {10.1016/j.trac.2022.116618},
abstract = {As an important branch of laser-induced breakdown spectroscopy ({LIBS}), calibration-free {LIBS} ({CF}-{LIBS}) is a famous element quantitative analysis method without standards. This method has many advantages such as real-time, in-situ, on-site, single-point, and multi-elemental analysis, with excellent potential for geology, archaeology, industrial and environmental monitoring, and biomedicine. In this review, we summarized the development of {CF}-{LIBS}. It covered a brief description of the basic theory of {CF}-{LIBS}, several modified methods and variants, proposed to overcome the non-stoichiometric ablation, selfabsorption effect, and high algorithmic complexity. Furthermore, the applications of {CF}-{LIBS} in a variety of fields were reviewed. Finally, the existing problems of {CF}-{LIBS} and its potential were discussed.},
pages = {116618},
journaltitle = {{TrAC} Trends in Analytical Chemistry},
shortjournal = {{TrAC} Trends in Analytical Chemistry},
author = {Hu, Zhenlin and Zhang, Deng and Wang, Weiliang and Chen, Feng and Xu, Yubin and Nie, Junfei and Chu, Yanwu and Guo, Lianbo},
urldate = {2023-11-14},
date = {2022-07},
langid = {english}
}
2 changes: 1 addition & 1 deletion report_pre_thesis/src/sections/introduction.tex
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ \section{Introduction}\label{sec:introduction}
The model is trained on a calibration dataset consisting of LIBS data from 408 terrestrial rock samples, simulated to mimic Martian conditions\cite{cleggRecalibrationMarsScience2017}.

The Mars Science Laboratory has made notable progress in planetary exploration, largely relying on models like the Multivariate Oxide Composition (MOC) to interpret Laser-Induced Breakdown Spectroscopy (LIBS) data from Martian geological samples.
Despite its utility, the existing MOC model shows limitations in predictive accuracy and robustness.
Despite its utility, a domain expert from the ChemCam team has observed that the existing MOC model exhibits limitations in both predictive accuracy and robustness.
Enhancing the predictive accuracy and robustness of the MOC model is crucial for achieving more reliable composition predictions, thereby furthering the scientific objectives of the Mars Science Laboratory in understanding Martian geology and potential habitability.
Accuracy, in this context, is measured as Root Mean Squared Error (RMSE).
Robustness refers to the model's ability to handle the variations in the data.
Expand Down
Loading

0 comments on commit cfa12f9

Please sign in to comment.