From caa83be7ca0fcff191b87a56fe078185ad890034 Mon Sep 17 00:00:00 2001 From: JP Swinski Date: Sat, 16 Nov 2024 19:28:06 +0000 Subject: [PATCH] updated ensemble model; calling track_stacker directly --- datasets/bathy/docker/oceaneyes/Dockerfile | 1 + datasets/bathy/docker/oceaneyes/runner.py | 49 ++++++++++++------- datasets/bathy/package/BathyFields.h | 17 ++++--- .../source/release_notes/release-v4-8-14.md | 21 ++++++++ targets/slideruleearth-aws/Makefile | 6 +++ 5 files changed, 68 insertions(+), 26 deletions(-) create mode 100644 docs/rtd/source/release_notes/release-v4-8-14.md diff --git a/datasets/bathy/docker/oceaneyes/Dockerfile b/datasets/bathy/docker/oceaneyes/Dockerfile index a1454f8b..dba410d3 100644 --- a/datasets/bathy/docker/oceaneyes/Dockerfile +++ b/datasets/bathy/docker/oceaneyes/Dockerfile @@ -37,6 +37,7 @@ COPY cshelph /cshelph COPY medianfilter /medianfilter COPY openoceans /openoceans COPY pointnet /pointnet +COPY ensemble /ensemble # container defaults SHELL ["/bin/bash", "-c"] diff --git a/datasets/bathy/docker/oceaneyes/runner.py b/datasets/bathy/docker/oceaneyes/runner.py index 409d9cbc..dd082785 100644 --- a/datasets/bathy/docker/oceaneyes/runner.py +++ b/datasets/bathy/docker/oceaneyes/runner.py @@ -42,6 +42,7 @@ from cshelph import c_shelph as CSHELPH from medianfilter import medianmodel as MEDIANFILTER +from ensemble import classify as ENSEMBLE from bathypathfinder.BathyPathFinder import BathyPathSearch from pointnet.pointnet2 import PointNet2 from openoceans.openoceans import OpenOceans @@ -56,6 +57,7 @@ RELEASE = "1" CLASSIFIERS = ['qtrees', 'coastnet', 'openoceanspp', 'medianfilter', 'cshelph', 'bathypathfinder', 'pointnet', 'openoceans', 'ensemble'] BEAMS = ["gt1l", "gt1r", "gt2l", "gt2r", "gt3l", "gt3r"] +CONFIDENCE_COLUMN = "ensemble_bathy_prob" # ##################### # Command Line Inputs @@ -75,6 +77,9 @@ profile = settings.get('profile', {}) format = settings.get('format', 'parquet') +# get ensemble model filename (required) +ensemble_model_filename = settings['ensemble']['ensemble_model_filename'] + # ##################### # Read In Data # ##################### @@ -251,17 +256,13 @@ def openoceans(spot, df): # ##################### def ensemble(spot, df): - ensemble_model_filename = settings['ensemble']['ensemble_model_filename'] - print(f'loading ensemble model: {ensemble_model_filename}') - df = df[['geoid_corr_h', 'surface_h', 'qtrees', 'cshelph', 'medianfilter', 'bathypathfinder', 'openoceanspp', 'coastnet']] - clf = xgb.XGBClassifier(device='cpu') - clf.load_model(ensemble_model_filename) - x = df.to_numpy() - p = clf.predict(x) - p[p == 1] = 40 - p[p == 2] = 41 - print(f'ensemble completed spot {spot}') - return p + try: + df = ENSEMBLE.classify(df.reset_index(drop=True, inplace=False), False, ensemble_model_filename) + print(f'ensemble completed spot {spot}') + return df + except Exception as e: + print(f'ensemble failed on spot {spot} with error: {e}') + return None # ##################### # Run Classifiers @@ -279,7 +280,11 @@ def runClassifier(classifier, classifier_func, num_processes=6): pool.close() pool.join() for i in range(len(beam_list)): - if results[i] is not None: + if type(results[i]) == pd.core.frame.DataFrame: # special case ensemble + beam_table[beam_list[i]][classifier] = results[i][classifier].to_numpy() + if CONFIDENCE_COLUMN in results[i]: + beam_table[beam_list[i]]["confidence"] = results[i][CONFIDENCE_COLUMN].to_numpy() + elif results[i] is not None: beam_table[beam_list[i]][classifier] = results[i] else: beam_failures[beam_list[i]].append(classifier) @@ -321,7 +326,12 @@ def runClassifier(classifier, classifier_func, num_processes=6): print("Concatenated data frames into a single data frame") # set processing flags -df["processing_flags"] = df["processing_flags"] + ((df["cshelph"] == 40) * 2**28) + ((df["medianfilter"] == 40) * 2**27) + ((df["bathypathfinder"] == 40) * 2**29) + ((df["pointnet"] == 40) * 2**30) +df["processing_flags"] = df["processing_flags"] + \ + ((df["confidence"] * 256).astype(np.uint8) * 256) + \ + ((df["cshelph"] == 40) * 2**28) + \ + ((df["medianfilter"] == 40) * 2**27) + \ + ((df["bathypathfinder"] == 40) * 2**29) + \ + ((df["pointnet"] == 40) * 2**30) # apply subaqueous corrections corrections_start_time = time.time() @@ -361,9 +371,11 @@ def runClassifier(classifier, classifier_func, num_processes=6): # read versions with open("cshelph/cshelph_version.txt") as file: - cshelph_version = file.read() + cshelph_version = file.read().strip() with open("medianfilter/medianfilter_version.txt") as file: - medianfilter_version = file.read() + medianfilter_version = file.read().strip() +with open("ensemble/ensemble_version.txt") as file: + ensemble_version = file.read().strip() # update profile profile["total_duration"] = time.time() - settings["latch"] @@ -371,11 +383,12 @@ def runClassifier(classifier, classifier_func, num_processes=6): # build metadata table metadata = { - "sliderule": json.dumps(rqst_parms), + "sliderule": json.dumps(rqst_parms | + {"cshelph": {"version": cshelph_version}} | + {"medianfilter": {"version": medianfilter_version}} | + {"ensemble": {"version": ensemble_version, "model": settings['ensemble']['ensemble_model_filename']}}), "profile": json.dumps(profile), "stats": json.dumps(stats), - "cshelph": cshelph_version, - "medianfilter": medianfilter_version, "errors": json.dumps(beam_failures) } diff --git a/datasets/bathy/package/BathyFields.h b/datasets/bathy/package/BathyFields.h index 32382d9d..b14995ca 100644 --- a/datasets/bathy/package/BathyFields.h +++ b/datasets/bathy/package/BathyFields.h @@ -47,7 +47,7 @@ #define COASTNET_MODEL "coastnet_model-20241111.json" #define QTREES_MODEL "qtrees_model-20241105.json" -#define ENSEMBLE_MODEL "ensemble_model-20241030.json" +#define ENSEMBLE_MODEL "ensemble_model-20241115.json" #define POINTNET_MODEL "pointnet2_model.pth" /****************************************************************************** @@ -226,13 +226,14 @@ class BathyFields: public Icesat2Fields /* Processing Flags */ typedef enum { - FLAGS_CLEAR = 0x00, - ON_BOUNDARY = 0x01, // set if photon is first after a spatial boundary - SENSOR_DEPTH_EXCEEDED = 0x02, - SEA_SURFACE_UNDETECTED = 0x04, - INVALID_KD = 0x08, - INVALID_WIND_SPEED = 0x10, - NIGHT_FLAG = 0x20, + FLAGS_CLEAR = 0x00000000, + ON_BOUNDARY = 0x00000001, // set if photon is first after a spatial boundary + SENSOR_DEPTH_EXCEEDED = 0x00000002, + SEA_SURFACE_UNDETECTED = 0x00000004, + INVALID_KD = 0x00000008, + INVALID_WIND_SPEED = 0x00000010, + NIGHT_FLAG = 0x00000020, + BATHY_CONFIDENCE = 0x0000FF00, BATHY_QTREES = 0x01000000, BATHY_COASTNET = 0x02000000, BATHY_OPENOCEANSPP = 0x04000000, diff --git a/docs/rtd/source/release_notes/release-v4-8-14.md b/docs/rtd/source/release_notes/release-v4-8-14.md new file mode 100644 index 00000000..4c31cad5 --- /dev/null +++ b/docs/rtd/source/release_notes/release-v4-8-14.md @@ -0,0 +1,21 @@ +# Release v4.8.14 + +2024-11-16 + +Version description of the v4.8.14 release of SlideRule Earth. + +Bathy Version #13. + +## ATL24 Changes + +* Updated ensemble model +* track_stacker code (ensemble) now called directly +* track_stacker includes some blunder detection +* ensemble confidence included in output +* additional version information (track_stacker git commit id and ensemble model name) included in metadata + +## Known Issues and Remaining Tasks + +## Getting This Release + +[https://github.com/SlideRuleEarth/sliderule/releases/tag/v4.8.14](https://github.com/SlideRuleEarth/sliderule/releases/tag/v4.8.14) diff --git a/targets/slideruleearth-aws/Makefile b/targets/slideruleearth-aws/Makefile index 3928031c..79c99027 100644 --- a/targets/slideruleearth-aws/Makefile +++ b/targets/slideruleearth-aws/Makefile @@ -62,6 +62,9 @@ CSHELPH_VERSION = $(shell git --work-tree ${CSHELPH_SOURCE_DIR} --git-dir ${CSHE MEDIANFILTER_SOURCE_DIR ?= $(ROOT)/../ut-ATL24-medianfilter MEDIANFILTER_SOURCE_FILE = $(MEDIANFILTER_SOURCE_DIR)/medianmodel.py MEDIANFILTER_VERSION = $(shell git --work-tree ${MEDIANFILTER_SOURCE_DIR} --git-dir ${MEDIANFILTER_SOURCE_DIR}/.git describe --abbrev --dirty --always --tags --long) +ENSEMBLE_SOURCE_DIR ?= $(ROOT)/../ut-ATL24-track_stacker +ENSEMBLE_SOURCE_FILE = $(ENSEMBLE_SOURCE_DIR)/apps/classify.py +ENSEMBLE_VERSION = $(shell git --work-tree ${ENSEMBLE_SOURCE_DIR} --git-dir ${ENSEMBLE_SOURCE_DIR}/.git describe --abbrev --dirty --always --tags --long) COASTNET_DIR = $(ROOT)/../ut-ATL24-coastnet QTREES_DIR = $(ROOT)/../ut-ATL24-qtrees OPENOCEANSPP_DIR = $(ROOT)/../ut-ATL24-oopp @@ -188,6 +191,9 @@ oceaneyes-docker: mkdir $(OCEANEYES_STAGE_DIR)/medianfilter cp $(MEDIANFILTER_SOURCE_FILE) $(OCEANEYES_STAGE_DIR)/medianfilter @echo $(MEDIANFILTER_VERSION) > $(OCEANEYES_STAGE_DIR)/medianfilter/medianfilter_version.txt + mkdir $(OCEANEYES_STAGE_DIR)/ensemble + cp $(ENSEMBLE_SOURCE_FILE) $(OCEANEYES_STAGE_DIR)/ensemble + @echo $(ENSEMBLE_VERSION) > $(OCEANEYES_STAGE_DIR)/ensemble/ensemble_version.txt cp -R $(OCEANEYES_SRC_DIR)/bathypathfinder $(OCEANEYES_STAGE_DIR) cp -R $(OCEANEYES_SRC_DIR)/pointnet $(OCEANEYES_STAGE_DIR) cp -R $(OCEANEYES_SRC_DIR)/openoceans $(OCEANEYES_STAGE_DIR)