From 710fd4dfdd5313c37efa7e7f8a36a530a315539a Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Thu, 4 Apr 2024 10:37:16 +0200 Subject: [PATCH 1/7] delete graphnet.pisa --- src/graphnet/pisa/__init__.py | 1 - src/graphnet/pisa/fitting.py | 805 ------------------ src/graphnet/pisa/plotting.py | 173 ---- .../binning_config_template.cfg | 8 - .../pipeline_config_template.cfg | 106 --- .../pipeline_config_weight_template.cfg | 101 --- .../minimizer/graphnet_standard.json | 24 - 7 files changed, 1218 deletions(-) delete mode 100644 src/graphnet/pisa/__init__.py delete mode 100644 src/graphnet/pisa/fitting.py delete mode 100644 src/graphnet/pisa/plotting.py delete mode 100644 src/graphnet/pisa/resources/configuration_templates/binning_config_template.cfg delete mode 100644 src/graphnet/pisa/resources/configuration_templates/pipeline_config_template.cfg delete mode 100644 src/graphnet/pisa/resources/configuration_templates/pipeline_config_weight_template.cfg delete mode 100644 src/graphnet/pisa/resources/minimizer/graphnet_standard.json diff --git a/src/graphnet/pisa/__init__.py b/src/graphnet/pisa/__init__.py deleted file mode 100644 index d7034c3db..000000000 --- a/src/graphnet/pisa/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Modules for interfacing with PISA for oscillations analyses.""" diff --git a/src/graphnet/pisa/fitting.py b/src/graphnet/pisa/fitting.py deleted file mode 100644 index 5408f9bfc..000000000 --- a/src/graphnet/pisa/fitting.py +++ /dev/null @@ -1,805 +0,0 @@ -"""Functions and classes for fitting contours using PISA.""" - -import configparser -from contextlib import contextmanager -import io -import multiprocessing -import os -import random -from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING - -from configupdater import ConfigUpdater -import matplotlib as mpl -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd - -from graphnet.utilities.imports import has_pisa_package - -if has_pisa_package() or TYPE_CHECKING: - import pisa # pyright: reportMissingImports=false - from pisa.core.distribution_maker import DistributionMaker - from pisa.core.pipeline import Pipeline - from pisa.analysis.analysis import Analysis - from pisa import ureg - -from graphnet.data.utilities import create_table_and_save_to_sql - -mpl.use("pdf") -plt.rc("font", family="serif") - - -@contextmanager -def config_updater( - config_path: str, - new_config_path: Optional[str] = None, - dummy_section: str = "temp", -) -> ConfigUpdater: - """Update config files and saves them to file. - - Args: - config_path: Path to original config file. - new_config_path: Path to save updated config file. - dummy_section: Dummy section name to use for config files without - section headers. - - Yields: - ConfigUpdater instance for programatically updating config file. - """ - # Modify original config file is no new config path is provided. - if new_config_path is None: - new_config_path = config_path - - # Load config file - updater = ConfigUpdater() - has_dummy_section = False - try: - updater.read(config_path) - - # If it is missing section headers (e.g., binning.cfg), add a dummy section - # header before reading file contents. - except configparser.MissingSectionHeaderError: - with open(config_path, "r") as configfile: - updater.read_string(f"[{dummy_section}]\n" + configfile.read()) - has_dummy_section = True - - # Expose updater instance in contest (i.e., - # `with config_updater(...) as updater:``). - try: - yield updater - - # Write new config to file - finally: - with open(new_config_path, "w") as configfile: - if has_dummy_section: - # Removing dummy section header if necessary - with io.StringIO() as buffer: - updater.write(buffer) - buffer.seek(0) - lines = buffer.readlines()[1:] - configfile.writelines(lines) - else: - updater.write(configfile) - - -class WeightFitter: - """Class for fitting weights using PISA.""" - - def __init__( - self, - database_path: str, - truth_table: str = "truth", - index_column: str = "event_no", - statistical_fit: bool = False, - ) -> None: - """Construct `WeightFitter`.""" - self._database_path = database_path - self._truth_table = truth_table - self._index_column = index_column - self._statistical_fit = statistical_fit - - def fit_weights( - self, - config_outdir: str, - weight_name: str = "", - pisa_config_dict: Optional[Dict] = None, - add_to_database: bool = False, - ) -> pd.DataFrame: - """Fit flux weights to each neutrino event in `self._database_path`. - - If `statistical_fit=True`, only statistical effects are accounted for. - If `True`, certain systematic effects are included, but not - hypersurfaces. - - Args: - config_outdir: The output directory in which to store the - configuration. - weight_name: The name of the weight. If `add_to_database=True`, - this will be the name of the table. - pisa_config_dict: The dictionary of PISA configurations. Can be - used to change assumptions regarding the fit. - add_to_database: If `True`, a table will be added to the database - called `weight_name` with two columns: - `[index_column, weight_name]` - - Returns: - A dataframe with columns `[index_column, weight_name]`. - """ - # If its a standard weight - if pisa_config_dict is None: - if not weight_name: - print(weight_name) - weight_name = "pisa_weight_graphnet_standard" - - # If it is a custom weight without name - elif pisa_config_dict is not None: - if not weight_name: - weight_name = "pisa_custom_weight" - - pisa_config_path = self._make_config( - config_outdir, weight_name, pisa_config_dict - ) - - model = Pipeline(pisa_config_path) - - if self._statistical_fit == "True": - # Only free parameters will be [aeff_scale] - corresponding to a statistical fit - free_params = model.params.free.names - for free_param in free_params: - if free_param not in ["aeff_scale"]: - model.params[free_param].is_fixed = True - - # for stage in range(len(model.stages)): - model.stages[-1].apply_mode = "events" - model.stages[-1].calc_mode = "events" - model.run() - - all_data = [] - for container in model.data: - data = pd.DataFrame(container["event_no"], columns=["event_no"]) - data[weight_name] = container["weights"] - all_data.append(data) - results = pd.concat(all_data) - - if add_to_database: - create_table_and_save_to_sql( - results.columns, weight_name, self._database_path - ) - return results.sort_values("event_no").reset_index(drop=True) - - def _make_config( - self, - config_outdir: str, - weight_name: str, - pisa_config_dict: Optional[Dict] = None, - ) -> str: - os.makedirs(config_outdir + "/" + weight_name, exist_ok=True) - if pisa_config_dict is None: - # Run on standard settings - pisa_config_dict = { - "reco_energy": {"num_bins": 8}, - "reco_coszen": {"num_bins": 8}, - "pid": {"bin_edges": [0, 0.5, 1]}, - "true_energy": {"num_bins": 200}, - "true_coszen": {"num_bins": 200}, - "livetime": 10 - * 0.01, # set to 1% of 10 years - correspond to the size of the oscNext burn sample - } - - pisa_config_dict["pipeline"] = self._database_path - pisa_config_dict["post_fix"] = None - pipeline_cfg_path = self._create_configs( - pisa_config_dict, config_outdir + "/" + weight_name - ) - return pipeline_cfg_path - - def _create_configs(self, config_dict: Dict, path: str) -> str: - # Update binning config - root = os.path.realpath( - os.path.join(os.getcwd(), os.path.dirname(__file__)) - ) - if config_dict["post_fix"] is not None: - config_name = "config%s" % config_dict["post_fix"] - else: - # config_dict["post_fix"] = '_pred' - config_name = "config" - - with config_updater( - root - + "/resources/configuration_templates/binning_config_template.cfg", - "%s/binning_%s.cfg" % (path, config_name), - dummy_section="binning", - ) as updater: - updater["binning"][ - "graphnet_dynamic_binning.reco_energy" - ].value = ( - "{'num_bins':%s, 'is_log':True, 'domain':[0.5,55] * units.GeV, 'tex': r'E_{\\rm reco}'}" - % config_dict["reco_energy"]["num_bins"] - ) # noqa: W605 - updater["binning"][ - "graphnet_dynamic_binning.reco_coszen" - ].value = ( - "{'num_bins':%s, 'is_lin':True, 'domain':[-1,1], 'tex':r'\\cos{\\theta}_{\\rm reco}'}" - % config_dict["reco_coszen"]["num_bins"] - ) # noqa: W605 - updater["binning"]["graphnet_dynamic_binning.pid"].value = ( - "{'bin_edges': %s, 'tex':r'{\\rm PID}'}" - % config_dict["pid"]["bin_edges"] - ) # noqa: W605 - updater["binning"]["true_allsky_fine.true_energy"].value = ( - "{'num_bins':%s, 'is_log':True, 'domain':[1,1000] * units.GeV, 'tex': r'E_{\\rm true}'}" - % config_dict["true_energy"]["num_bins"] - ) # noqa: W605 - updater["binning"]["true_allsky_fine.true_coszen"].value = ( - "{'num_bins':%s, 'is_lin':True, 'domain':[-1,1], 'tex':r'\\cos\,\\theta_{Z,{\\rm true}}'}" # noqa: W605 - % config_dict["true_coszen"]["num_bins"] - ) # noqa: W605 - - # Update pipeline config - with config_updater( - root - + "/resources/configuration_templates/pipeline_config_weight_template.cfg", - "%s/pipeline_%s.cfg" % (path, config_name), - ) as updater: - updater["pipeline"].add_before.comment( - "#include %s/binning_%s.cfg as binning" % (path, config_name) - ) - updater["data.sqlite_loader"]["post_fix"].value = config_dict[ - "post_fix" - ] - updater["data.sqlite_loader"]["database"].value = config_dict[ - "pipeline" - ] - if "livetime" in config_dict.keys(): - updater["aeff.aeff"]["param.livetime"].value = ( - "%s * units.common_year" % config_dict["livetime"] - ) - return "%s/pipeline_%s.cfg" % (path, config_name) - - -class ContourFitter: - """Class for fitting contours using PISA.""" - - def __init__( - self, - outdir: str, - pipeline_path: str, - post_fix: str = "_pred", - model_name: str = "gnn", - include_retro: bool = True, - statistical_fit: bool = False, - ): - """Construct `ContourFitter`.""" - self._outdir = outdir - self._pipeline_path = pipeline_path - self._post_fix = post_fix - self._model_name = model_name - self._include_retro = include_retro - self._statistical_fit = str(statistical_fit) - self._allowed_contour_types = ["1d", "2d"] - - def fit_1d_contour( - self, - run_name: str, - config_dict: Dict, - grid_size: int = 30, - n_workers: int = 1, - theta23_minmax: Tuple[float, float] = (36.0, 54.0), - dm31_minmax: Tuple[float, float] = (2.3, 2.7), - ) -> None: - """Fit 1D contours.""" - self._fit_contours( - config_dict=config_dict, - run_name=run_name, - grid_size=grid_size, - n_workers=n_workers, - theta23_minmax=theta23_minmax, - dm31_minmax=dm31_minmax, - contour_type="1d", - ) - - def fit_2d_contour( - self, - run_name: str, - config_dict: Dict, - grid_size: int = 30, - n_workers: int = 1, - theta23_minmax: Tuple[float, float] = (36.0, 54.0), - dm31_minmax: Tuple[float, float] = (2.3, 2.7), - ) -> None: - """Fit 2D contours.""" - self._fit_contours( - config_dict=config_dict, - run_name=run_name, - grid_size=grid_size, - n_workers=n_workers, - theta23_minmax=theta23_minmax, - dm31_minmax=dm31_minmax, - contour_type="2d", - ) - - def _check_inputs( - self, - contour_type: str, - dm31_minmax: Tuple[float, float], - theta23_minmax: Tuple[float, float], - n_workers: int, - ) -> bool: - """Check whether inputs are as expected.""" - if contour_type.lower() not in self._allowed_contour_types: - print( - "%s not recognized as valid contour type. Only %s is recognized" - % (contour_type, self._allowed_contour_types) - ) - return False - if ( - (len(theta23_minmax) != 2) - or (len(dm31_minmax) != 2) - or (dm31_minmax[0] > dm31_minmax[1]) - or (theta23_minmax[0] > theta23_minmax[1]) - ): - print( - "theta23 or dm31 min max values are not understood. Please provide a list on the form [min, max] for both variables" - ) - return False - if n_workers < 1: - print("found n_workers < 1. n_workers must be positive integers.") - return False - return True - - def _fit_contours( - self, - run_name: str, - config_dict: Dict, - grid_size: int, - n_workers: int, - contour_type: str, - theta23_minmax: Tuple[float, float], - dm31_minmax: Tuple[float, float], - ) -> None: - """Fit contours.""" - inputs_ok = self._check_inputs( - contour_type=contour_type, - dm31_minmax=dm31_minmax, - theta23_minmax=theta23_minmax, - n_workers=n_workers, - ) - if not inputs_ok: - return - - minimizer_cfg = self._get_minimizer_path(config_dict) - cfgs = self._setup_config_files(run_name, config_dict) - theta23_range = np.linspace( - theta23_minmax[0], theta23_minmax[1], grid_size - ) - dm31_range = ( - np.linspace(dm31_minmax[0], dm31_minmax[1], grid_size) * 1e-3 - ) - if contour_type.lower() == "1d": - settings = self._make_1d_settings( - cfgs=cfgs, - grid_size=grid_size, - run_name=run_name, - minimizer_cfg=minimizer_cfg, - theta23_range=theta23_range, - dm31_range=dm31_range, - n_workers=n_workers, - ) - p = multiprocessing.Pool(processes=len(settings)) - _ = p.map_async(self._parallel_fit_1d_contour, settings) - p.close() - p.join() - # self._parallel_fit_1d_contour(settings[0]) - elif contour_type.lower() == "2d": - settings = self._make_2d_settings( - cfgs=cfgs, - grid_size=grid_size, - run_name=run_name, - minimizer_cfg=minimizer_cfg, - theta23_range=theta23_range, - dm31_range=dm31_range, - n_workers=n_workers, - ) - p = multiprocessing.Pool(processes=len(settings)) - _ = p.map_async(self._parallel_fit_2d_contour, settings) - p.close() - p.join() - # self._parallel_fit_2d_contour(settings[0]) - df = self._merge_temporary_files(run_name) - df.to_csv(self._outdir + "/" + run_name + "/merged_results.csv") - - def _merge_temporary_files(self, run_name: str) -> pd.DataFrame: - files = os.listdir(self._outdir + "/" + run_name + "/tmp") - df = pd.concat( - [ - pd.read_csv(f"{self._outdir}/{run_name}/tmp/{file}") - for file in files - ], - ignore_index=True, - ) - return df - - def _parallel_fit_2d_contour(self, settings: List[List[Any]]) -> None: - """Fit 2D contours in parallel. - - Length of settings determines the amount of jobs this worker gets. - Results are saved to temporary .csv-files that are later merged. - - Args: - settings: A list of fitting settings. - """ - results = [] - for i in range(len(settings)): - ( - cfg_path, - model_name, - outdir, - theta23_value, - deltam31_value, - id, - run_name, - fix_all, - minimizer_cfg, - ) = settings[i] - minimizer_cfg = pisa.utils.fileio.from_file(minimizer_cfg) - model = DistributionMaker([cfg_path]) - data = model.get_outputs(return_sum=True) - ana = Analysis() - if fix_all == "True": - # Only free parameters will be [parameter, aeff_scale] - corresponding to a statistical fit - free_params = model.params.free.names - for free_param in free_params: - if free_param != "aeff_scale": - if free_param == "theta23": - model.params.theta23.is_fixed = True - model.params.theta23.nominal_value = ( - float(theta23_value) * ureg.degree - ) - elif free_param == "deltam31": - model.params.deltam31.is_fixed = True - model.params.deltam31.nominal_value = ( - float(deltam31_value) * ureg.electron_volt**2 - ) - else: - model.params[free_param].is_fixed = True - else: - # Only fixed parameters will be [parameter] - model.params.theta23.is_fixed = True - model.params.deltam31.is_fixed = True - model.params.theta23.nominal_value = ( - float(theta23_value) * ureg.degree - ) - model.params.deltam31.nominal_value = ( - float(deltam31_value) * ureg.electron_volt**2 - ) - model.reset_all() - result = ana.fit_hypo( - data, - model, - metric="mod_chi2", - minimizer_settings=minimizer_cfg, - fit_octants_separately=True, - ) - results.append( - [ - theta23_value, - deltam31_value, - result[0]["params"].theta23.value, - result[0]["params"].deltam31.value, - result[0]["metric_val"], - model_name, - id, - result[0]["minimizer_metadata"]["success"], - ] - ) - self._save_temporary_results( - outdir=outdir, run_name=run_name, results=results, id=id - ) - - def _parallel_fit_1d_contour(self, settings: List[List[Any]]) -> None: - """Fit 1D contours in parallel. - - Length of settings determines the amount of jobs this worker gets. - Results are saved to temporary .csv-files that are later merged. - - Args: - settings: A list of fitting settings. - """ - results = [] - for i in range(len(settings)): - ( - cfg_path, - model_name, - outdir, - theta23_value, - deltam31_value, - id, - run_name, - parameter, - fix_all, - minimizer_cfg, - ) = settings[i] - minimizer_cfg = pisa.utils.fileio.from_file(minimizer_cfg) - ana = Analysis() - model = DistributionMaker([cfg_path]) - data = model.get_outputs(return_sum=True) - if fix_all == "True": - # Only free parameters will be [parameter, aeff_scale] - corresponding to a statistical fit - free_params = model.params.free.names - for free_param in free_params: - if free_param not in ["aeff_scale", "theta23", "deltam31"]: - model.params[free_param].is_fixed = True - if parameter == "theta23": - model.params.theta23.is_fixed = True - model.params.theta23.nominal_value = ( - float(theta23_value) * ureg.degree - ) - elif parameter == "deltam31": - model.params.deltam31.is_fixed = True - model.params.deltam31.nominal_value = ( - float(deltam31_value) * ureg.electron_volt**2 - ) - else: - # Only fixed parameters will be [parameter] - if parameter == "theta23": - model.params.theta23.is_fixed = True - model.params.theta23.nominal_value = ( - float(theta23_value) * ureg.degree - ) - elif parameter == "deltam31": - model.params.deltam31.is_fixed = True - model.params.deltam31.nominal_value = ( - float(deltam31_value) * ureg.electron_volt**2 - ) - else: - print("parameter not supported: %s" % parameter) - model.reset_all() - result = ana.fit_hypo( - data, - model, - metric="mod_chi2", - minimizer_settings=minimizer_cfg, - fit_octants_separately=True, - ) - results.append( - [ - theta23_value, - deltam31_value, - result[0]["params"].theta23.value, - result[0]["params"].deltam31.value, - result[0]["metric_val"], - model_name, - id, - result[0]["minimizer_metadata"]["success"], - ] - ) - self._save_temporary_results( - outdir=outdir, run_name=run_name, results=results, id=id - ) - - def _save_temporary_results( - self, outdir: str, run_name: str, results: List[List[Any]], id: int - ) -> None: - os.makedirs(outdir + "/" + run_name + "/tmp", exist_ok=True) - results_df = pd.DataFrame( - data=results, - columns=[ - "theta23_fixed", - "dm31_fixed", - "theta23_best_fit", - "dm31_best_fit", - "mod_chi2", - "model", - "id", - "converged", - ], - ) - results_df.to_csv( - outdir + "/" + run_name + "/tmp" + "/tmp_%s.csv" % id - ) - - def _make_2d_settings( - self, - cfgs: Dict, - grid_size: int, - run_name: str, - minimizer_cfg: str, - theta23_range: Tuple[float, float], - dm31_range: Tuple[float, float], - n_workers: int, - ) -> List[np.ndarray]: - settings = [] - count = 0 - for model_name in cfgs.keys(): - for i in range(0, grid_size): - for k in range(0, grid_size): - settings.append( - [ - cfgs[model_name], - model_name, - self._outdir, - theta23_range[i], - dm31_range[k], - count, - run_name, - self._statistical_fit, - minimizer_cfg, - ] - ) - count += 1 - random.shuffle(settings) - return np.array_split(settings, n_workers) - - def _make_1d_settings( - self, - cfgs: Dict, - grid_size: int, - run_name: str, - minimizer_cfg: str, - theta23_range: Tuple[float, float], - dm31_range: Tuple[float, float], - n_workers: int, - ) -> List[np.ndarray]: - settings = [] - count = 0 - for model_name in cfgs.keys(): - for i in range(0, grid_size): - settings.append( - [ - cfgs[model_name], - model_name, - self._outdir, - theta23_range[i], - -1, - count, - run_name, - "theta23", - self._statistical_fit, - minimizer_cfg, - ] - ) - count += 1 - for i in range(0, grid_size): - settings.append( - [ - cfgs[model_name], - model_name, - self._outdir, - -1, - dm31_range[i], - count, - run_name, - "deltam31", - self._statistical_fit, - minimizer_cfg, - ] - ) - count += 1 - random.shuffle(settings) - return np.array_split(settings, n_workers) - - def _setup_config_files(self, run_name: str, config_dict: Dict) -> Dict: - cfgs = {} - cfgs[self._model_name] = self._make_configs( - outdir=self._outdir, - post_fix=self._post_fix, - run_name=run_name, - is_retro=False, - pipeline_path=self._pipeline_path, - config_dict=config_dict, - ) - if self._include_retro: - cfgs["retro"] = self._make_configs( - outdir=self._outdir, - post_fix=self._post_fix, - run_name=run_name, - is_retro=True, - pipeline_path=self._pipeline_path, - config_dict=config_dict, - ) - return cfgs - - def _get_minimizer_path(self, config_dict: Optional[Dict]) -> str: - if config_dict is not None and "minimizer_cfg" in config_dict.keys(): - minimizer_cfg = config_dict["minimizer_cfg"] - else: - root = os.path.realpath( - os.path.join(os.getcwd(), os.path.dirname(__file__)) - ) - minimizer_cfg = ( - root + "/resources/minimizer/graphnet_standard.json" - ) - return minimizer_cfg - - def _create_configs(self, config_dict: Dict, path: str) -> str: - # Update binning config - root = os.path.realpath( - os.path.join(os.getcwd(), os.path.dirname(__file__)) - ) - if config_dict["post_fix"] is not None: - config_name = "config%s" % config_dict["post_fix"] - else: - config_name = "config" - - with config_updater( - root - + "/resources/configuration_templates/binning_config_template.cfg", - "%s/binning_%s.cfg" % (path, config_name), - dummy_section="binning", - ) as updater: - updater["binning"][ - "graphnet_dynamic_binning.reco_energy" - ].value = ( - "{'num_bins':%s, 'is_log':True, 'domain':[0.5,55] * units.GeV, 'tex': r'E_{\\rm reco}'}" - % config_dict["reco_energy"]["num_bins"] - ) # noqa: W605 - updater["binning"][ - "graphnet_dynamic_binning.reco_coszen" - ].value = ( - "{'num_bins':%s, 'is_lin':True, 'domain':[-1,1], 'tex':r'\\cos{\\theta}_{\\rm reco}'}" - % config_dict["reco_coszen"]["num_bins"] - ) # noqa: W605 - updater["binning"]["graphnet_dynamic_binning.pid"].value = ( - "{'bin_edges': %s, 'tex':r'{\\rm PID}'}" - % config_dict["pid"]["bin_edges"] - ) # noqa: W605 - updater["binning"]["true_allsky_fine.true_energy"].value = ( - "{'num_bins':%s, 'is_log':True, 'domain':[1,1000] * units.GeV, 'tex': r'E_{\\rm true}'}" - % config_dict["true_energy"]["num_bins"] - ) # noqa: W605 - updater["binning"]["true_allsky_fine.true_coszen"].value = ( - "{'num_bins':%s, 'is_lin':True, 'domain':[-1,1], 'tex':r'\\cos\,\\theta_{Z,{\\rm true}}'}" # noqa: W605 - % config_dict["true_coszen"]["num_bins"] - ) # noqa: W605 - - # Update pipeline config - with config_updater( - root - + "/resources/configuration_templates/pipeline_config_template.cfg", - "%s/pipeline_%s.cfg" % (path, config_name), - ) as updater: - updater["pipeline"].add_before.comment( - "#include %s/binning_%s.cfg as binning" % (path, config_name) - ) - updater["data.sqlite_loader"]["post_fix"].value = config_dict[ - "post_fix" - ] - updater["data.sqlite_loader"]["database"].value = config_dict[ - "pipeline" - ] - if "livetime" in config_dict.keys(): - updater["aeff.aeff"]["param.livetime"].value = ( - "%s * units.common_year" % config_dict["livetime"] - ) - return "%s/pipeline_%s.cfg" % (path, config_name) - - def _make_configs( - self, - outdir: str, - run_name: str, - is_retro: bool, - pipeline_path: str, - post_fix: str = "_pred", - config_dict: Optional[Dict] = None, - ) -> str: - os.makedirs(outdir + "/" + run_name, exist_ok=True) - if config_dict is None: - # Run on standard settings - config_dict = { - "reco_energy": {"num_bins": 8}, - "reco_coszen": {"num_bins": 8}, - "pid": {"bin_edges": [0, 0.5, 1]}, - "true_energy": {"num_bins": 200}, - "true_coszen": {"num_bins": 200}, - "livetime": 10, - } - - config_dict["pipeline"] = pipeline_path - if is_retro: - config_dict["post_fix"] = "_retro" - else: - config_dict["post_fix"] = post_fix - pipeline_cfg_path = self._create_configs( - config_dict, outdir + "/" + run_name - ) - return pipeline_cfg_path diff --git a/src/graphnet/pisa/plotting.py b/src/graphnet/pisa/plotting.py deleted file mode 100644 index 50b2684be..000000000 --- a/src/graphnet/pisa/plotting.py +++ /dev/null @@ -1,173 +0,0 @@ -"""Functions for plotting contours from PISA fits.""" - -from typing import Any, Dict, List, Tuple - -import matplotlib.pyplot as plt -from matplotlib.figure import Figure -import numpy as np -import pandas as pd - - -def read_entry(entry: Dict) -> Tuple[Any, ...]: - """Parse the contents of `entry`.""" - path = entry["path"] - model_name = entry["model"] - try: - label = entry["label"] - except KeyError: - label = path.split("/")[-2] - try: - ls = entry["linestyle"] - except KeyError: - ls = "-" - try: - color = entry["color"] - except KeyError: - color = None - entry_data = pd.read_csv(path) - - return entry_data, model_name, label, ls, color - - -def plot_2D_contour( - contour_data: List[Dict], - xlim: Tuple[float, float] = (0.4, 0.6), - ylim: Tuple[float, float] = (2.38 * 1e-3, 2.55 * 1e-3), - chi2_critical_value: float = 4.605, - width: float = 3.176, - height: float = 2.388, -) -> Figure: - """Plot 2D contours from GraphNeT PISA fits. - - Args: - contour_data: List of dictionaries with plotting information. Format is - for each dictionary is: - {'path': path_to_pisa_fit_result, - 'model': 'name_of_my_model_in_fit'}. - One can specify optional fields in the dictionary: "label" - the - legend label, "color" - the color of the contour, "linestyle" - the - style of the contour line. - xlim: Lower and upper bound of x-axis. - ylim: Lower and upper bound of y-axis. - chi2_critical_value: The critical value of the chi2 fits. Defaults to - 4.605 (90% CL). @NOTE: This, and the below, can't both be right. - width: width of figure in inches. - height: height of figure in inches. - - Returns: - The figure with contours. - """ - fig, ax = plt.subplots(figsize=(width, height), constrained_layout=True) - proxy = [] - labels = [] - for entry in contour_data: - entry_data, model_name, label, ls, color = read_entry(entry) - model_idx = entry_data["model"] == model_name - model_data = entry_data.loc[model_idx] - x = pd.unique(model_data.sort_values("theta23_fixed")["theta23_fixed"]) - y = pd.unique(model_data.sort_values("dm31_fixed")["dm31_fixed"]) - z = np.zeros((len(y), len(x))) - for i in range(len(x)): - for k in range(len(y)): - idx = (model_data["theta23_fixed"] == x[i]) & ( - model_data["dm31_fixed"] == y[k] - ) - match = model_data["mod_chi2"][idx] - if len(match) > 0: - if model_data["converged"][idx].values is True: - match = float(match) - else: - match = 10000 # Sets the z value very high to exclude it from contour - else: - match = 10000 # Sets the z value very high to exclude it from contour - z[k, i] = match - - CS = ax.contour( - np.sin(np.deg2rad(x)) ** 2, - y, - z, - levels=[chi2_critical_value], - colors=color, - label=label, - linestyles=ls, - linewidths=2, - ) - # ax.clabel(CS, inline=1, fontsize=10) - proxy.extend( - [plt.Rectangle((0, 0), 1, 1, fc=color) for pc in CS.collections] - ) - if chi2_critical_value == 4.605: - label = label + " 90 $\\%$ CL" - labels.append(label) - plt.legend(proxy, labels, frameon=False, loc="upper right") - plt.xlim(xlim[0], xlim[1]) - plt.ylim(ylim[0], ylim[1]) - plt.xlabel("$\\sin^2(\\theta_{23})$", fontsize=12) - plt.ylabel("$\\Delta m_{31}^2 [eV^2]$", fontsize=12) - plt.ticklabel_format(axis="y", style="sci", scilimits=(0, 0)) - plt.title("Sensitivity (Simplified Analysis)") - return fig - - -def plot_1D_contour( - contour_data: List[Dict], - chi2_critical_value: float = 2.706, - width: float = 2 * 3.176, - height: float = 2.388, -) -> Figure: - """Plot 1D contours from GraphNeT PISA fits. - - Args: - contour_data: List of dictionaries with plotting information. Format is - for each dictionary is: - {'path': path_to_pisa_fit_result, - 'model': 'name_of_my_model_in_fit'}. - One can specify optional fields in the dictionary: "label" - the - legend label, "color" - the color of the contour, "linestyle" - the - style of the contour line. - chi2_critical_value: The critical value of the chi2 fits. Defaults to - 2.706 (90% CL). @NOTE: This, and the above, can't both be right. - width: width of figure in inches. - height: height of figure in inches. - - Returns: - The figure with contours. - """ - variables = ["theta23_fixed", "dm31_fixed"] - fig, ax = plt.subplots( - 1, 2, figsize=(width, height), constrained_layout=True - ) - ls = 0 - for entry in contour_data: - entry_data, model_name, label, ls, color = read_entry(entry) - for variable in variables: - model_idx = entry_data["model"] == model_name - padding_idx = entry_data[variable] != -1 - chi2_idx = entry_data["mod_chi2"] < chi2_critical_value - model_data = entry_data.loc[ - (model_idx) & (padding_idx) & (chi2_idx), : - ] - x = model_data.sort_values(variable) - if variable == "theta23_fixed": - ax[0].set_ylabel("$\\chi^2$", fontsize=12) - ax[0].plot( - np.sin(np.deg2rad(x[variable])) ** 2, - x["mod_chi2"], - color=color, - label=label, - ls=ls, - ) - ax[0].set_xlabel("$\\sin(\\theta_{23})^2$", fontsize=12) - elif variable == "dm31_fixed": - ax[1].plot( - x[variable], x["mod_chi2"], color=color, label=label, ls=ls - ) - ax[1].ticklabel_format(axis="x", style="sci", scilimits=(0, 0)) - ax[1].set_xlabel("$\\Delta m_{31}^2 [eV^2]$", fontsize=12) - h = [item.get_text() for item in ax[1].get_yticklabels()] - empty_string_labels = [""] * len(h) - ax[1].set_yticklabels(empty_string_labels) - ax[0].set_ylim(0, chi2_critical_value) - ax[1].set_ylim(0, chi2_critical_value) - ax[0].legend() - return fig diff --git a/src/graphnet/pisa/resources/configuration_templates/binning_config_template.cfg b/src/graphnet/pisa/resources/configuration_templates/binning_config_template.cfg deleted file mode 100644 index 78527840c..000000000 --- a/src/graphnet/pisa/resources/configuration_templates/binning_config_template.cfg +++ /dev/null @@ -1,8 +0,0 @@ -graphnet_dynamic_binning.order = reco_energy, reco_coszen, pid -graphnet_dynamic_binning.reco_energy = {'num_bins':15, 'is_log':True, 'domain':[0.5,55] * units.GeV, 'tex': r'E_{\rm reco}'} -graphnet_dynamic_binning.reco_coszen = {'num_bins':15, 'is_lin':True, 'domain':[-1,1], 'tex':r'\cos{\theta}_{\rm reco}'} -graphnet_dynamic_binning.pid = {'bin_edges': [0, 0.4, 0.8, 1], 'tex':r'{\rm PID}'} - -true_allsky_fine.order = true_energy, true_coszen -true_allsky_fine.true_energy = {'num_bins':200, 'is_log':True, 'domain':[1,1000] * units.GeV, 'tex': r'E_{\rm true}'} -true_allsky_fine.true_coszen = {'num_bins':200, 'is_lin':True, 'domain':[-1,1], 'tex':r'\cos\,\theta_{Z,{\rm true}}'} diff --git a/src/graphnet/pisa/resources/configuration_templates/pipeline_config_template.cfg b/src/graphnet/pisa/resources/configuration_templates/pipeline_config_template.cfg deleted file mode 100644 index 9b32584e4..000000000 --- a/src/graphnet/pisa/resources/configuration_templates/pipeline_config_template.cfg +++ /dev/null @@ -1,106 +0,0 @@ -#include settings/osc/nufitv20.cfg as osc -#include settings/osc/earth.cfg as earth -[pipeline] -order = data.sqlite_loader, flux.honda_ip, flux.barr_simple, osc.prob3, aeff.aeff, utils.hist -param_selections = nh -name = neutrinos -output_binning = graphnet_dynamic_binning -output_key = weights, errors - -[data.sqlite_loader] -calc_mode = events -apply_mode = events -output_names = nue_cc, numu_cc, nutau_cc, nue_nc, numu_nc, nutau_nc, nuebar_cc, numubar_cc, nutaubar_cc, nuebar_nc, numubar_nc, nutaubar_nc -post_fix = _pred -database = /mnt/scratch/rasmus_orsoe/databases/oscillations/dev_lvl7_robustness_muon_neutrino_0000/pipelines/pipeline_oscillation_final/pipeline_oscillation_final.db - -[flux.honda_ip] -calc_mode = true_allsky_fine -apply_mode = events -param.flux_table = flux/honda-2015-spl-solmin-aa.d - -[flux.barr_simple] -calc_mode = true_allsky_fine -apply_mode = events -param.nu_nubar_ratio = 1.0 +/- 0.1 -param.nu_nubar_ratio.fixed = True -param.nu_nubar_ratio.range = nominal + [-3., +3.] * sigma -param.nue_numu_ratio = 1.0 +/- 0.05 -param.nue_numu_ratio.fixed = False -param.nue_numu_ratio.range = nominal + [-0.5, +0.5] -param.Barr_uphor_ratio = 0.0 +/- 1.0 -param.Barr_uphor_ratio.fixed = False -param.Barr_uphor_ratio.range = nominal + [-3.0, +3.0] -param.Barr_nu_nubar_ratio = 0.0 +/- 1.0 -param.Barr_nu_nubar_ratio.fixed = False -param.Barr_nu_nubar_ratio.range = nominal + [-3.0, +3.0] -param.delta_index = 0.0 +/- 0.1 -param.delta_index.fixed = False -param.delta_index.range = nominal + [-5, +5] * sigma - -[osc.prob3] -calc_mode = true_allsky_fine -apply_mode = events -param.earth_model = osc/PREM_12layer.dat -param.YeI = ${earth:YeI} -param.YeM = ${earth:YeM} -param.YeO = ${earth:YeO} -param.detector_depth = ${earth:detector_depth} -param.prop_height = ${earth:prop_height} -param.theta12 = ${osc:theta12} -param.theta12.fixed = True -param.nh.theta13 = ${osc:theta13_nh} -param.nh.theta13.fixed = False -param.nh.theta13.range = ${osc:theta13_nh.range} -param.ih.theta13 = ${osc:theta13_ih} -param.ih.theta13.fixed = False -param.ih.theta13.range = ${osc:theta13_ih.range} -param.nh.theta23 = ${osc:theta23_nh} -param.nh.theta23.fixed = False -param.nh.theta23.range = ${osc:theta23_nh.range} -param.nh.theta23.prior = uniform -param.ih.theta23 = ${osc:theta23_ih} -param.ih.theta23.fixed = False -param.ih.theta23.range = ${osc:theta23_ih.range} -param.ih.theta23.prior = uniform -param.nh.deltacp = 0.0 * units.degree -param.nh.deltacp.fixed = True -param.nh.deltacp.range = ${osc:deltacp_nh.range} -param.nh.deltacp.prior = uniform -param.ih.deltacp = 0.0 * units.degree -param.ih.deltacp.fixed = True -param.deltam21 = ${osc:deltam21} -param.deltam21.fixed = True -param.nh.deltam31 = ${osc:deltam31_nh} -param.nh.deltam31.fixed = False -param.nh.deltam31.prior = uniform -param.nh.deltam31.range = [0.001, +0.007] * units.eV**2 -param.ih.deltam31 = ${osc:deltam31_ih} -param.ih.deltam31.fixed = False -param.ih.deltam31.prior = uniform -param.ih.deltam31.range = [-0.007, -0.001] * units.eV**2 - -[aeff.aeff] -calc_mode = events -apply_mode = events -param.livetime = 10 * units.common_year -param.aeff_scale = 1.0 -param.aeff_scale.fixed = False -param.aeff_scale.prior = uniform -param.aeff_scale.range = [0.,3.] * units.dimensionless -param.nutau_cc_norm = 1.0 -param.nutau_cc_norm.fixed = True -param.nutau_cc_norm.range = [0.2, 2.0] * units.dimensionless -param.nutau_cc_norm.prior = uniform -param.nutau_norm = 1.0 -param.nutau_norm.fixed = False -param.nutau_norm.range = [-1.0, 8.5] * units.dimensionless -param.nutau_norm.prior = uniform -param.nu_nc_norm = 1.0 +/- 0.2 -param.nu_nc_norm.fixed = False -param.nu_nc_norm.range = nominal + [-.5,+.5] - -[utils.hist] -calc_mode = events -apply_mode = graphnet_dynamic_binning -error_method = sumw2 \ No newline at end of file diff --git a/src/graphnet/pisa/resources/configuration_templates/pipeline_config_weight_template.cfg b/src/graphnet/pisa/resources/configuration_templates/pipeline_config_weight_template.cfg deleted file mode 100644 index c8e28ece7..000000000 --- a/src/graphnet/pisa/resources/configuration_templates/pipeline_config_weight_template.cfg +++ /dev/null @@ -1,101 +0,0 @@ -#include settings/osc/nufitv20.cfg as osc -#include settings/osc/earth.cfg as earth -[pipeline] -order = data.sqlite_loader, flux.honda_ip, flux.barr_simple, osc.prob3, aeff.aeff -param_selections = nh -name = neutrinos -output_binning = graphnet_dynamic_binning -output_key = weights, errors - -[data.sqlite_loader] -calc_mode = events -apply_mode = events -output_names = nue_cc, numu_cc, nutau_cc, nue_nc, numu_nc, nutau_nc, nuebar_cc, numubar_cc, nutaubar_cc, nuebar_nc, numubar_nc, nutaubar_nc -post_fix = _pred -database = /mnt/scratch/rasmus_orsoe/databases/oscillations/dev_lvl7_robustness_muon_neutrino_0000/pipelines/pipeline_oscillation_final/pipeline_oscillation_final.db - -[flux.honda_ip] -calc_mode = true_allsky_fine -apply_mode = events -param.flux_table = flux/honda-2015-spl-solmin-aa.d - -[flux.barr_simple] -calc_mode = true_allsky_fine -apply_mode = events -param.nu_nubar_ratio = 1.0 +/- 0.1 -param.nu_nubar_ratio.fixed = True -param.nu_nubar_ratio.range = nominal + [-3., +3.] * sigma -param.nue_numu_ratio = 1.0 +/- 0.05 -param.nue_numu_ratio.fixed = False -param.nue_numu_ratio.range = nominal + [-0.5, +0.5] -param.Barr_uphor_ratio = 0.0 +/- 1.0 -param.Barr_uphor_ratio.fixed = False -param.Barr_uphor_ratio.range = nominal + [-3.0, +3.0] -param.Barr_nu_nubar_ratio = 0.0 +/- 1.0 -param.Barr_nu_nubar_ratio.fixed = False -param.Barr_nu_nubar_ratio.range = nominal + [-3.0, +3.0] -param.delta_index = 0.0 +/- 0.1 -param.delta_index.fixed = False -param.delta_index.range = nominal + [-5, +5] * sigma - -[osc.prob3] -calc_mode = true_allsky_fine -apply_mode = events -param.earth_model = osc/PREM_12layer.dat -param.YeI = ${earth:YeI} -param.YeM = ${earth:YeM} -param.YeO = ${earth:YeO} -param.detector_depth = ${earth:detector_depth} -param.prop_height = ${earth:prop_height} -param.theta12 = ${osc:theta12} -param.theta12.fixed = True -param.nh.theta13 = ${osc:theta13_nh} -param.nh.theta13.fixed = False -param.nh.theta13.range = ${osc:theta13_nh.range} -param.ih.theta13 = ${osc:theta13_ih} -param.ih.theta13.fixed = False -param.ih.theta13.range = ${osc:theta13_ih.range} -param.nh.theta23 = ${osc:theta23_nh} -param.nh.theta23.fixed = False -param.nh.theta23.range = ${osc:theta23_nh.range} -param.nh.theta23.prior = uniform -param.ih.theta23 = ${osc:theta23_ih} -param.ih.theta23.fixed = False -param.ih.theta23.range = ${osc:theta23_ih.range} -param.ih.theta23.prior = uniform -param.nh.deltacp = 0.0 * units.degree -param.nh.deltacp.fixed = True -param.nh.deltacp.range = ${osc:deltacp_nh.range} -param.nh.deltacp.prior = uniform -param.ih.deltacp = 0.0 * units.degree -param.ih.deltacp.fixed = True -param.deltam21 = ${osc:deltam21} -param.deltam21.fixed = True -param.nh.deltam31 = ${osc:deltam31_nh} -param.nh.deltam31.fixed = False -param.nh.deltam31.prior = uniform -param.nh.deltam31.range = [0.001, +0.007] * units.eV**2 -param.ih.deltam31 = ${osc:deltam31_ih} -param.ih.deltam31.fixed = False -param.ih.deltam31.prior = uniform -param.ih.deltam31.range = [-0.007, -0.001] * units.eV**2 - -[aeff.aeff] -calc_mode = events -apply_mode = events -param.livetime = 10 * units.common_year -param.aeff_scale = 1.0 -param.aeff_scale.fixed = False -param.aeff_scale.prior = uniform -param.aeff_scale.range = [0.,3.] * units.dimensionless -param.nutau_cc_norm = 1.0 -param.nutau_cc_norm.fixed = True -param.nutau_cc_norm.range = [0.2, 2.0] * units.dimensionless -param.nutau_cc_norm.prior = uniform -param.nutau_norm = 1.0 -param.nutau_norm.fixed = False -param.nutau_norm.range = [-1.0, 8.5] * units.dimensionless -param.nutau_norm.prior = uniform -param.nu_nc_norm = 1.0 +/- 0.2 -param.nu_nc_norm.fixed = False -param.nu_nc_norm.range = nominal + [-.5,+.5] diff --git a/src/graphnet/pisa/resources/minimizer/graphnet_standard.json b/src/graphnet/pisa/resources/minimizer/graphnet_standard.json deleted file mode 100644 index eefd441ae..000000000 --- a/src/graphnet/pisa/resources/minimizer/graphnet_standard.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "method": { - "value": "L-BFGS-B", - "desc" : "TODO" - }, - "options":{ - "value": { - "disp" : 0, - "ftol" : 1.0e-6, - "gtol" : 1.0e-8, - "eps" : 1.0e-8, - "maxfun" : 15000, - "maxiter": 15000 - }, - "desc": { - "disp" : "TODO", - "ftol" : "TODO", - "gtol" : "TODO", - "eps" : "TODO", - "maxfun" : "TODO", - "maxiter": "TODO" - } - } -} \ No newline at end of file From 4366f1802f8788bee68d571cbcd1254e409d1371 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Thu, 4 Apr 2024 10:38:42 +0200 Subject: [PATCH 2/7] delete 05_pisa example folder --- examples/05_pisa/01_fit_pisa_weights.py | 57 --------- examples/05_pisa/02_make_pipeline_database.py | 109 ------------------ examples/05_pisa/03_contour_fitting.py | 76 ------------ examples/05_pisa/04_contour_plotting.py | 97 ---------------- examples/05_pisa/README.md | 6 - examples/05_pisa/_common_pisa.py | 10 -- 6 files changed, 355 deletions(-) delete mode 100644 examples/05_pisa/01_fit_pisa_weights.py delete mode 100644 examples/05_pisa/02_make_pipeline_database.py delete mode 100644 examples/05_pisa/03_contour_fitting.py delete mode 100644 examples/05_pisa/04_contour_plotting.py delete mode 100644 examples/05_pisa/README.md delete mode 100644 examples/05_pisa/_common_pisa.py diff --git a/examples/05_pisa/01_fit_pisa_weights.py b/examples/05_pisa/01_fit_pisa_weights.py deleted file mode 100644 index 9c95ab967..000000000 --- a/examples/05_pisa/01_fit_pisa_weights.py +++ /dev/null @@ -1,57 +0,0 @@ -"""Example of using the PISA WeightFitter class.""" - -from graphnet.pisa.fitting import WeightFitter -from graphnet.utilities.argparse import ArgumentParser -from graphnet.utilities.imports import has_pisa_package -from graphnet.utilities.logging import Logger - -from _common_pisa import ERROR_MESSAGE_MISSING_PISA - - -def main() -> None: - """Run example.""" - # Configuration - outdir = "/mnt/scratch/rasmus_orsoe/weight_test" # @TEMP - database_path = ( # @TEMP - "/mnt/scratch/rasmus_orsoe/databases/dev_lvl3_genie_burnsample_v5/data" - "/dev_lvl3_genie_burnsample_v5.db" - ) - fitter = WeightFitter(database_path=database_path) - - pisa_config_dict = { - "reco_energy": {"num_bins": 8}, - "reco_coszen": {"num_bins": 8}, - "pid": {"bin_edges": [0, 0.5, 1]}, - "true_energy": {"num_bins": 200}, - "true_coszen": {"num_bins": 200}, - "livetime": 10 * 0.01, # 1% of 10 years, like oscNext burn sample. - } - - # By calling `fitter.fit_weights`` we get the weights returned per event. - # If `add_to_database = True`, a table will be added to the database. - weights = fitter.fit_weights( - outdir, - add_to_database=True, - weight_name="weight_livetime10_1percent", - pisa_config_dict=pisa_config_dict, - ) - - print(weights) - - -if __name__ == "__main__": - - if not has_pisa_package(): - Logger(log_folder=None).error(ERROR_MESSAGE_MISSING_PISA) - - else: - # Parse command-line arguments - parser = ArgumentParser( - description=""" -Use the PISA WeightFitter class. -""" - ) - - args = parser.parse_args() - - main() diff --git a/examples/05_pisa/02_make_pipeline_database.py b/examples/05_pisa/02_make_pipeline_database.py deleted file mode 100644 index 722b997f3..000000000 --- a/examples/05_pisa/02_make_pipeline_database.py +++ /dev/null @@ -1,109 +0,0 @@ -"""Example of building and running PISA Pipeline with SQLite inputs.""" - -from typing import Dict, List - -from graphnet.data.pipeline import InSQLitePipeline -from graphnet.data.constants import TRUTH, FEATURES -from graphnet.utilities.argparse import ArgumentParser -from graphnet.utilities.imports import has_pisa_package -from graphnet.utilities.logging import Logger -from graphnet.models.graphs import KNNGraph -from graphnet.models.detector.icecube import IceCubeDeepCore -from graphnet.models.graphs.nodes import NodesAsPulses - -from _common_pisa import ERROR_MESSAGE_MISSING_PISA - - -def get_output_column_names(target: str) -> List[str]: - """Return the relevant set of output colummn names for `target`.""" - if target in ["azimuth", "zenith"]: - output_column_names = [target + "_pred", target + "_kappa"] - if target in ["track", "neutrino", "energy"]: - output_column_names = [target + "_pred"] - if target == "XYZ": - output_column_names = [ - "position_x_pred", - "position_y_pred", - "position_z_pred", - ] - return output_column_names - - -def build_module_dictionary(targets: List[str]) -> Dict[str, Dict]: - """Build a dictionary of output paths and column names for `targets`.""" - module_dict: Dict[str, Dict] = {} - for target in targets: - module_dict[target] = {} - module_dict[target]["path"] = ( # @TEMP - "/home/iwsatlas1/oersoe/phd/oscillations/models/final/" - f"dynedge_oscillation_final_{target}.pth" - ) - module_dict[target]["output_column_names"] = get_output_column_names( - target - ) - return module_dict - - -def main() -> None: - """Run example.""" - # Configuration - features = FEATURES.ICECUBE86 - truth = TRUTH.ICECUBE86 - pulsemap = "SRTTWOfflinePulsesDC" - batch_size = 1024 * 4 - num_workers = 40 - device = "cuda:1" - targets = ["track", "energy", "zenith"] - pipeline_name = "pipeline_oscillation" - database = ( # @TEMP - "/mnt/scratch/rasmus_orsoe/databases/oscillations/" - "dev_lvl7_robustness_muon_neutrino_0000/data/" - "dev_lvl7_robustness_muon_neutrino_0000.db" - ) - - graph_definition = KNNGraph( - detector=IceCubeDeepCore(), - node_definition=NodesAsPulses(), - nb_nearest_neighbours=8, - input_feature_names=FEATURES.DEEPCORE, - ) - - # Remove `interaction_time` if it exists - try: - del truth[truth.index("interaction_time")] - except ValueError: - # not found in list - pass - - # Build Pipeline - pipeline = InSQLitePipeline( - module_dict=build_module_dictionary(targets), - features=features, - truth=truth, - device=device, - batch_size=batch_size, - n_workers=num_workers, - pipeline_name=pipeline_name, - ) - - # Run Pipeline - pipeline( - database=database, pulsemap=pulsemap, graph_definition=graph_definition - ) - - -if __name__ == "__main__": - if not has_pisa_package(): - Logger(log_folder=None).error(ERROR_MESSAGE_MISSING_PISA) - - else: - # Parse command-line arguments - parser = ArgumentParser( - description=""" -Build and run PISA Pipeline with SQLite inputs. -""" - ) - - args = parser.parse_args() - - main() diff --git a/examples/05_pisa/03_contour_fitting.py b/examples/05_pisa/03_contour_fitting.py deleted file mode 100644 index de868f62e..000000000 --- a/examples/05_pisa/03_contour_fitting.py +++ /dev/null @@ -1,76 +0,0 @@ -"""Example of fitting oscillation parameter contours using PISA.""" - -from graphnet.pisa.fitting import ContourFitter -from graphnet.utilities.argparse import ArgumentParser -from graphnet.utilities.imports import has_pisa_package -from graphnet.utilities.logging import Logger - -from _common_pisa import ERROR_MESSAGE_MISSING_PISA - - -def main() -> None: - """Run example.""" - # This configuration dictionary overwrites our PISA standard with your - # preferences. - # @NOTE: num_bins should not be higer than 25 for reconstructions. - config_dict = { - "reco_energy": {"num_bins": 10}, - "reco_coszen": {"num_bins": 10}, - "pid": {"bin_edges": [0, 0.50, 1]}, - "true_energy": {"num_bins": 200}, - "true_coszen": {"num_bins": 200}, - } - - # Where you want the .csv-file with the results. - outdir = "/home/iwsatlas1/oersoe/phd/oscillations/sensitivities" # @TEMP - - # What you call your run. - run_name = "this_is_a_test_run" - - pipeline_path = ( - "/mnt/scratch/rasmus_orsoe/databases/oscillations/" - "dev_lvl7_robustness_muon_neutrino_0000/pipelines/" - "pipeline_oscillation_final/pipeline_oscillation_final.db" - ) - - fitter = ContourFitter( - outdir=outdir, - pipeline_path=pipeline_path, - post_fix="_pred", - model_name="dynedge", - include_retro=True, - statistical_fit=True, - ) - - # Fits 1D contours of dm31 and theta23 individually - fitter.fit_1d_contour( - run_name=run_name + "_1d", - config_dict=config_dict, - grid_size=5, - n_workers=30, - ) - - # Fits 2D contours of dm31 and theta23 together - fitter.fit_2d_contour( - run_name=run_name + "_2d", - config_dict=config_dict, - grid_size=5, - n_workers=30, - ) - - -if __name__ == "__main__": - if not has_pisa_package(): - Logger(log_folder=None).error(ERROR_MESSAGE_MISSING_PISA) - - else: - # Parse command-line arguments - parser = ArgumentParser( - description=""" -Fit oscillation parameter contours using PISA. -""" - ) - - args = parser.parse_args() - - main() diff --git a/examples/05_pisa/04_contour_plotting.py b/examples/05_pisa/04_contour_plotting.py deleted file mode 100644 index 2b156565e..000000000 --- a/examples/05_pisa/04_contour_plotting.py +++ /dev/null @@ -1,97 +0,0 @@ -"""Example of plotting contours from PISA fit. - -Here we would like to plot two contours in one figure; one for our GNN and one -for RETRO. We build a dictionary for each contour. Each dictionary much contain -"path" and "model". "path" is the path to the .csv file containing the fit -result. "model" is the name of the model in the .csv file - some fits have more -than 1 model! The plotting script returns the figure object - remember to save -it yourself! -""" - -from graphnet.pisa.plotting import plot_2D_contour, plot_1D_contour -from graphnet.utilities.argparse import ArgumentParser -from graphnet.utilities.imports import has_pisa_package -from graphnet.utilities.logging import Logger - -from _common_pisa import ERROR_MESSAGE_MISSING_PISA - - -def example_plot_2d_contour() -> None: - """Plot 2D contour from PISA fit.""" - contour_data_2D = [ - { - "path": ( - "/mnt/scratch/rasmus_orsoe/oscillation/" - "30x30_std_config_final_num_bins_15_lbe_0.4_hbe_0.8/" - "merged_results.csv" - ), - "model": "dynedge", - "label": "dynedge", - "color": "tab:blue", - }, - { - "path": ( - "/mnt/scratch/rasmus_orsoe/oscillation/" - "30x30_oscNext_config_final_num_bins_15_lbe_0.5_hbe_0.85/" - "merged_results.csv" - ), - "model": "retro", - "label": "retro", - "color": "tab:orange", - }, - ] - - figure = plot_2D_contour(contour_data_2D, width=6.3, height=2.3 * 2) - figure.savefig( - "/home/iwsatlas1/oersoe/phd/oscillations/plots/2d_contour_test.pdf" - ) - - -def example_plot_1d_contour() -> None: - """Plot 1D contour from PISA fit.""" - contour_data_1D = [ - { - "path": ( - "/home/iwsatlas1/oersoe/phd/oscillations/sensitivities/" - "100x_bfgs_pid_bin_05_8by8_bins_fix_all_True_philipp_idea/" - "merged_results.csv" - ), - "color": "tab:orange", - "model": "retro", - "label": "retro - vanilla bin", - "ls": "--", - }, - { - "path": ( - "/home/iwsatlas1/oersoe/phd/oscillations/sensitivities/" - "100x_bfgs_pid_bin_05_8by8_bins_fix_all_True_philipp_idea/" - "merged_results.csv" - ), - "color": "tab:blue", - "model": "dynedge", - "label": "dynedge - vanilla bin", - "ls": "--", - }, - ] - - figure = plot_1D_contour(contour_data_1D) - figure.savefig( - "/home/iwsatlas1/oersoe/phd/oscillations/plots/1d_contour_test.pdf" - ) - - -if __name__ == "__main__": - - if not has_pisa_package(): - Logger(log_folder=None).error(ERROR_MESSAGE_MISSING_PISA) - - else: - # Parse command-line arguments - parser = ArgumentParser( - description=""" -Plot oscillation parameter contours using PISA. -""" - ) - - example_plot_2d_contour() - example_plot_1d_contour() diff --git a/examples/05_pisa/README.md b/examples/05_pisa/README.md deleted file mode 100644 index c3996b51c..000000000 --- a/examples/05_pisa/README.md +++ /dev/null @@ -1,6 +0,0 @@ -# PISA examples - -Be aware that the examples in this folder presuppose that GraphNeT is installed with [PISA](https://github.com/icecube/pisa). -In addition, it should be noted that the use of PISA is a highly specialised case, albeit an important one for neutrino oscillation physics in IceCube, and that any oscillation analysis example would require an amount of statistics that is not feasible to bundle with a software package like GraphNeT. -For these reasons, examples in this folder are intentionally not self-contained, in contrast to the other provided examples. -However, the GraphNeT developers are happy to work with users who are interested in leveraging the PISA functionality that is included in this package. diff --git a/examples/05_pisa/_common_pisa.py b/examples/05_pisa/_common_pisa.py deleted file mode 100644 index 3ead1c495..000000000 --- a/examples/05_pisa/_common_pisa.py +++ /dev/null @@ -1,10 +0,0 @@ -ERROR_MESSAGE_MISSING_PISA = ( - "This example requires PISA to be installed, which doesn't seem to be the " - "case. Please install PISA or run an example script in one of the other " - "folders:" - "\n * examples/01_icetray/" - "\n * examples/02_data/" - "\n * examples/03_weights/" - "\n * examples/04_training/" - "\nExiting." -) From b4890561735ddff27e684630ff735dc79748e3da Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Thu, 4 Apr 2024 10:39:16 +0200 Subject: [PATCH 3/7] delete graphnet.data.pipeline --- src/graphnet/data/pipeline.py | 232 ---------------------------------- 1 file changed, 232 deletions(-) delete mode 100644 src/graphnet/data/pipeline.py diff --git a/src/graphnet/data/pipeline.py b/src/graphnet/data/pipeline.py deleted file mode 100644 index 9973c763f..000000000 --- a/src/graphnet/data/pipeline.py +++ /dev/null @@ -1,232 +0,0 @@ -"""Class(es) used for analysis in PISA.""" - -from abc import ABC -import dill -from functools import reduce -import os -from typing import Dict, List, Optional, Tuple - -import numpy as np -import pandas as pd -from pytorch_lightning import Trainer -import sqlite3 -import torch -from torch.utils.data import DataLoader - -from graphnet.data.utilities.sqlite_utilities import ( - create_table_and_save_to_sql, -) -from graphnet.training.utils import get_predictions, make_dataloader -from graphnet.models.graphs import GraphDefinition - -from graphnet.utilities.logging import Logger - - -class InSQLitePipeline(ABC, Logger): - """Create a SQLite database for PISA analysis. - - The database will contain truth and GNN predictions and, if available, - RETRO reconstructions. - """ - - def __init__( - self, - module_dict: Dict, - features: List[str], - truth: List[str], - device: torch.device, - retro_table_name: str = "retro", - outdir: Optional[str] = None, - batch_size: int = 100, - n_workers: int = 10, - pipeline_name: str = "pipeline", - ): - """Initialise the pipeline. - - Args: - module_dict: A dictionary with GNN modules from GraphNet. E.g. - {'energy': gnn_module_for_energy_regression} - features: List of input features for the GNN modules. - truth: List of truth for the GNN ModuleList. - device: The device used for computation. - retro_table_name: Name of the retro table for. - outdir: the directory in which the pipeline database will be - stored. - batch_size: Batch size for inference. - n_workers: Number of workers used in dataloading. - pipeline_name: Name of the pipeline. If such a pipeline already - exists, an error will be prompted to avoid overwriting. - """ - self._pipeline_name = pipeline_name - self._device = device - self.n_workers = n_workers - self._features = features - self._truth = truth - self._batch_size = batch_size - self._outdir = outdir - self._module_dict = module_dict - self._retro_table_name = retro_table_name - - # Base class constructor - super().__init__(name=__name__, class_name=self.__class__.__name__) - - def __call__( - self, - database: str, - pulsemap: str, - graph_definition: GraphDefinition, - chunk_size: int = 1000000, - ) -> None: - """Run inference of each field in self._module_dict[target]['']. - - Args: - database: Path to database with pulsemap and truth. - pulsemap: Name of pulsemaps. - graph_definition: GraphDefinition for Dataset - chunk_size: database will be sliced in chunks of size `chunk_size`. - Use this parameter to control memory usage. - """ - outdir = self._get_outdir(database) - if isinstance( - self._device, str - ): # Because pytorch lightning insists on breaking pytorch cuda device naming scheme - device = int(self._device[-1]) - if not os.path.isdir(outdir): - dataloaders, event_batches = self._setup_dataloaders( - graph_definition=graph_definition, - chunk_size=chunk_size, - db=database, - pulsemap=pulsemap, - selection=None, - persistent_workers=False, - ) - i = 0 - for dataloader in dataloaders: - self.info("CHUNK %s / %s" % (i, len(dataloaders))) - df = self._inference(device, dataloader) - truth = self._get_truth(database, event_batches[i].tolist()) - retro = self._get_retro(database, event_batches[i].tolist()) - self._append_to_pipeline(outdir, truth, retro, df) - i += 1 - else: - self.info(outdir) - self.info( - "WARNING - Pipeline named %s already exists! \n Please rename pipeline!" - % self._pipeline_name - ) - - def _setup_dataloaders( - self, - chunk_size: int, - db: str, - pulsemap: str, - graph_definition: GraphDefinition, - selection: Optional[List[int]] = None, - persistent_workers: bool = False, - ) -> Tuple[List[DataLoader], List[np.ndarray]]: - if selection is None: - selection = self._get_all_event_nos(db) - n_chunks = np.ceil(len(selection) / chunk_size) - event_batches = np.array_split(selection, n_chunks) - dataloaders = [] - for batch in event_batches: - dataloaders.append( - make_dataloader( - db=db, - graph_definition=graph_definition, - pulsemaps=pulsemap, - features=self._features, - truth=self._truth, - batch_size=self._batch_size, - shuffle=False, - selection=batch.tolist(), - num_workers=self.n_workers, - persistent_workers=persistent_workers, - ) - ) - return dataloaders, event_batches - - def _get_all_event_nos(self, db: str) -> List[int]: - with sqlite3.connect(db) as con: - query = "SELECT event_no FROM truth" - selection = pd.read_sql(query, con).values.ravel().tolist() - return selection - - def _combine_outputs(self, dataframes: List[pd.DataFrame]) -> pd.DataFrame: - return reduce(lambda x, y: pd.merge(x, y, on="event_no"), dataframes) - - def _inference( - self, device: torch.device, dataloader: DataLoader - ) -> pd.DataFrame: - dataframes = [] - for target in self._module_dict.keys(): - # dataloader = iter(dataloader) - trainer = Trainer(devices=[device], accelerator="gpu") - model = torch.load( - self._module_dict[target]["path"], - map_location="cpu", - pickle_module=dill, - ) - model.eval() - model.inference() - results = get_predictions( - trainer, - model, - dataloader, - self._module_dict[target]["output_column_names"], - additional_attributes=["event_no"], - ) - dataframes.append( - results.sort_values("event_no").reset_index(drop=True) - ) - df = self._combine_outputs(dataframes) - return df - - def _get_outdir(self, database: str) -> str: - if self._outdir is None: - database_name = database.split("/")[-3] - outdir = ( - database.split(database_name)[0] - + database_name - + "/pipelines/" - + self._pipeline_name - ) - else: - outdir = self._outdir - return outdir - - def _get_truth(self, database: str, selection: List[int]) -> pd.DataFrame: - with sqlite3.connect(database) as con: - query = "SELECT * FROM truth WHERE event_no in %s" % str( - tuple(selection) - ) - truth = pd.read_sql(query, con) - return truth - - def _get_retro(self, database: str, selection: List[int]) -> pd.DataFrame: - try: - with sqlite3.connect(database) as con: - query = "SELECT * FROM %s WHERE event_no in %s" % ( - self._retro_table_name, - str(tuple(selection)), - ) - retro = pd.read_sql(query, con) - return retro - except: # noqa: E722 - self.info("%s table does not exist" % self._retro_table_name) - - def _append_to_pipeline( - self, - outdir: str, - truth: pd.DataFrame, - retro: pd.DataFrame, - df: pd.DataFrame, - ) -> None: - os.makedirs(outdir, exist_ok=True) - pipeline_database = outdir + "/%s.db" % self._pipeline_name - create_table_and_save_to_sql(df, "reconstruction", pipeline_database) - create_table_and_save_to_sql(truth, "truth", pipeline_database) - if isinstance(retro, pd.DataFrame): - create_table_and_save_to_sql( - retro, self._retro_table_name, pipeline_database - ) From 835b783216f7b40aede95071c3eebfed23b776c6 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Thu, 4 Apr 2024 10:40:32 +0200 Subject: [PATCH 4/7] update example readme --- examples/README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/README.md b/examples/README.md index 577cfe849..ab971afe8 100644 --- a/examples/README.md +++ b/examples/README.md @@ -7,7 +7,6 @@ Examples are grouped into five numbered subfolders, roughly in order of how you 2. **Data.** Reading in data in intermediate formats, plotting feature distributions, and converting data between intermediate file formats. These examples are entirely self-contained and can be run by anyone. 3. **Weights.** Fitting per-event weights. 4. **Training.** Training GNN models on various physics tasks. -5. **PISA.** Fitting and plotting oscillation analysis contours. These examples presuppose that GraphNeT has been installed with [PISA](https://github.com/icecube/pisa), and the examples are intentionally not self-contained due to their specialised nature. Each subfolder contains similarly numbered example scripts. Each example script comes with a simple command-line interface and help functionality, e.g. From 690b3c2a6a831c1be570c14494d830f8ba5b114d Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Thu, 4 Apr 2024 10:43:52 +0200 Subject: [PATCH 5/7] remove `has_pisa_package` import checker --- src/graphnet/utilities/imports.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/graphnet/utilities/imports.py b/src/graphnet/utilities/imports.py index b0b692503..a490f413c 100644 --- a/src/graphnet/utilities/imports.py +++ b/src/graphnet/utilities/imports.py @@ -33,19 +33,6 @@ def has_torch_package() -> bool: return False -def has_pisa_package() -> bool: - """Check whether the `pisa` package is available.""" - try: - import pisa # pyright: reportMissingImports=false - - return True - except ImportError: - Logger(log_folder=None).warning_once( - "`pisa` not available. Some functionality may be missing.", - ) - return False - - def requires_icecube(test_function: Callable) -> Callable: """Decorate `test_function` for use only if `icecube` module is present.""" From d4d5652c99c22eabb821fb1ec41f1367c43eb86b Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 5 Apr 2024 09:50:29 +0200 Subject: [PATCH 6/7] rename liquido example folder --- examples/{06_liquido => 05_liquido}/01_convert_h5.py | 0 examples/{06_liquido => 05_liquido}/README.md | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename examples/{06_liquido => 05_liquido}/01_convert_h5.py (100%) rename examples/{06_liquido => 05_liquido}/README.md (100%) diff --git a/examples/06_liquido/01_convert_h5.py b/examples/05_liquido/01_convert_h5.py similarity index 100% rename from examples/06_liquido/01_convert_h5.py rename to examples/05_liquido/01_convert_h5.py diff --git a/examples/06_liquido/README.md b/examples/05_liquido/README.md similarity index 100% rename from examples/06_liquido/README.md rename to examples/05_liquido/README.md From 4bff0e1e540ce955305f4682136eac4a4aac3937 Mon Sep 17 00:00:00 2001 From: Rasmus Oersoe Date: Fri, 5 Apr 2024 09:51:04 +0200 Subject: [PATCH 7/7] update path in liquido unit test --- tests/examples/06_liquido/test_liquido_examples.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/examples/06_liquido/test_liquido_examples.py b/tests/examples/06_liquido/test_liquido_examples.py index 45914b706..f668ad701 100644 --- a/tests/examples/06_liquido/test_liquido_examples.py +++ b/tests/examples/06_liquido/test_liquido_examples.py @@ -6,7 +6,7 @@ from graphnet.constants import GRAPHNET_ROOT_DIR -EXAMPLE_PATH = os.path.join(GRAPHNET_ROOT_DIR, "examples/06_data") +EXAMPLE_PATH = os.path.join(GRAPHNET_ROOT_DIR, "examples/05_liquido") examples = glob(EXAMPLE_PATH + "/*.py")