From f54f8822297e500646132bdf288a30d6f62cde29 Mon Sep 17 00:00:00 2001 From: "W. Evan Sheehan" Date: Fri, 3 Nov 2023 14:36:40 -0600 Subject: [PATCH 01/13] Update tests for new JSON response format Updated all the tests for the API to expect a smaller, non-GeoJSON response. This response format will be easier to build efficiently and we weren't deriving any benefits from the GeoJSON. It was just a larger payload and more expensive to generate. --- tests/test_unified_graphics.py | 294 +++++++++------------------------ 1 file changed, 80 insertions(+), 214 deletions(-) diff --git a/tests/test_unified_graphics.py b/tests/test_unified_graphics.py index 2a026464..a8572a26 100644 --- a/tests/test_unified_graphics.py +++ b/tests/test_unified_graphics.py @@ -114,36 +114,10 @@ def test_scalar_diag(t, client): response = client.get(f"/diag/{group}/") # Assert - assert response.json == { - "type": "FeatureCollection", - "features": [ - { - "type": "Feature", - "properties": { - "type": "scalar", - "loop": t.loop, - "variable": "temperature", - "adjusted": 1.0, - "unadjusted": 1.0, - "observed": 1.0, - }, - "geometry": {"type": "Point", "coordinates": [90.0, 22.0]}, - }, - { - "type": "Feature", - "properties": { - "type": "scalar", - "loop": t.loop, - "variable": "temperature", - "adjusted": -1.0, - "unadjusted": -1.0, - "observed": 0.0, - }, - "geometry": {"type": "Point", "coordinates": [91.0, 23.0]}, - }, - ], - } - + assert response.json == [ + {"obs_minus_forecast_adjusted": 1.0, "obs_minus_forecast_unadjusted": 1.0, "observation": 1.0, "longitude": 90.0, "latitude": 22.0}, + {"obs_minus_forecast_adjusted": -1.0, "obs_minus_forecast_unadjusted": -1.0, "observation": 0.0, "longitude": 91.0, "latitude": 23.0}, + ] def test_scalar_history(model, diag_parquet, client, test_dataset): # Arrange @@ -289,35 +263,10 @@ def test_wind_diag(uv, client): response = client.get(f"/diag/{group}/") # Assert - assert response.json == { - "type": "FeatureCollection", - "features": [ - { - "type": "Feature", - "properties": { - "type": "vector", - "variable": "wind", - "loop": "ges", - "adjusted": {"u": 0.0, "v": 1.0}, - "unadjusted": {"u": 0.0, "v": 1.0}, - "observed": {"u": 0.0, "v": 1.0}, - }, - "geometry": {"type": "Point", "coordinates": [90, 22]}, - }, - { - "type": "Feature", - "properties": { - "type": "vector", - "variable": "wind", - "loop": "ges", - "adjusted": {"u": 0.0, "v": -1.0}, - "unadjusted": {"u": 0.0, "v": -1.0}, - "observed": {"u": 1.0, "v": 0.0}, - }, - "geometry": {"type": "Point", "coordinates": [91.0, 23.0]}, - }, - ], - } + assert response.json == [ + {"obs_minus_forecast_adjusted": {"u": 0.0, "v": 1.0}, "obs_minus_forecast_unadjusted": {"u": 0.0, "v": 1.0}, "observation": {"u": 0.0, "v": 1.0}, "longitude": 90.0, "latitude": 22.0}, + {"obs_minus_forecast_adjusted": {"u": 0.0, "v": -1.0}, "obs_minus_forecast_unadjusted": {"u": 0.0, "v": -1.0}, "observation": {"u": 1.0, "v": 0.0}, "longitude": 91.0, "latitude": 23.0}, + ] def test_vector_magnitude(uv, client): @@ -328,35 +277,10 @@ def test_vector_magnitude(uv, client): response = client.get(f"/diag/{group}/magnitude/") # Assert - assert response.json == { - "type": "FeatureCollection", - "features": [ - { - "type": "Feature", - "properties": { - "type": "vector", - "variable": "wind", - "loop": "ges", - "adjusted": 1.0, - "unadjusted": 1.0, - "observed": 1.0, - }, - "geometry": {"type": "Point", "coordinates": [90.0, 22.0]}, - }, - { - "type": "Feature", - "properties": { - "type": "vector", - "variable": "wind", - "loop": "ges", - "adjusted": 1.0, - "unadjusted": 1.0, - "observed": 1.0, - }, - "geometry": {"type": "Point", "coordinates": [91.0, 23.0]}, - }, - ], - } + assert response.json == [ + {"obs_minus_forecast_adjusted": 1.0, "obs_minus_forecast_unadjusted": 1.0, "observation": 1.0, "longitude": 90.0, "latitude": 22.0}, + {"obs_minus_forecast_adjusted": 1.0, "obs_minus_forecast_unadjusted": 1.0, "observation": 1.0, "longitude": 91.0, "latitude": 23.0}, + ] def test_region_filter_scalar(t, client): @@ -366,23 +290,15 @@ def test_region_filter_scalar(t, client): query = "longitude=89.9::90.5&latitude=27::22" response = client.get(f"{url}?{query}") - assert response.json == { - "type": "FeatureCollection", - "features": [ - { - "type": "Feature", - "properties": { - "type": "scalar", - "loop": "ges", - "variable": "temperature", - "adjusted": 1.0, - "unadjusted": 1.0, - "observed": 1.0, - }, - "geometry": {"type": "Point", "coordinates": [90.0, 22.0]}, - }, - ], - } + assert response.json == [ + { + "obs_minus_forecast_adjusted": 1.0, + "obs_minus_forecast_unadjusted": 1.0, + "observation": 1.0, + "longitude": 90.0, + "latitude": 22.0 + }, + ] def test_range_filter_scalar(t, client): @@ -395,23 +311,15 @@ def test_range_filter_scalar(t, client): response = client.get(f"{url}?{query}") # Assert - assert response.json == { - "type": "FeatureCollection", - "features": [ - { - "type": "Feature", - "properties": { - "type": "scalar", - "loop": t.loop, - "variable": "temperature", - "adjusted": 1.0, - "unadjusted": 1.0, - "observed": 1.0, - }, - "geometry": {"type": "Point", "coordinates": [90.0, 22.0]}, - }, - ], - } + assert response.json == [ + { + "obs_minus_forecast_adjusted": 1.0, + "obs_minus_forecast_unadjusted": 1.0, + "observation": 1.0, + "longitude": 90.0, + "latitude": 22.0, + }, + ] def test_region_filter_vector(uv, client): @@ -423,23 +331,15 @@ def test_region_filter_vector(uv, client): # Act response = client.get(f"{url}?{query}") - assert response.json == { - "type": "FeatureCollection", - "features": [ - { - "type": "Feature", - "properties": { - "type": "vector", - "variable": "wind", - "loop": uv.loop, - "adjusted": {"u": 0.0, "v": 1.0}, - "unadjusted": {"u": 0.0, "v": 1.0}, - "observed": {"u": 0.0, "v": 1.0}, - }, - "geometry": {"type": "Point", "coordinates": [90.0, 22.0]}, - }, - ], - } + assert response.json == [ + { + "obs_minus_forecast_adjusted": {"u": 0.0, "v": 1.0}, + "obs_minus_forecast_unadjusted": {"u": 0.0, "v": 1.0}, + "observation": {"u": 0.0, "v": 1.0}, + "longitude": 90.0, + "latitude": 22.0 + }, + ] # BUG: This test is missing a test case @@ -472,23 +372,15 @@ def test_range_filter_vector(uv, client): response = client.get(f"{url}?{query}") # Assert - assert response.json == { - "type": "FeatureCollection", - "features": [ - { - "type": "Feature", - "properties": { - "type": "vector", - "variable": "wind", - "loop": uv.loop, - "adjusted": {"u": 0.0, "v": 1.0}, - "unadjusted": {"u": 0.0, "v": 1.0}, - "observed": {"u": 0.0, "v": 1.0}, - }, - "geometry": {"type": "Point", "coordinates": [90.0, 22.0]}, - }, - ], - } + assert response.json == [ + { + "adjusted": {"u": 0.0, "v": 1.0}, + "unadjusted": {"u": 0.0, "v": 1.0}, + "observed": {"u": 0.0, "v": 1.0}, + "longitude": 90.0, + "latitude": 22.0, + }, + ] def test_unused_filter(t, client): @@ -501,23 +393,15 @@ def test_unused_filter(t, client): response = client.get(f"{url}?{query}") # Assert - assert response.json == { - "type": "FeatureCollection", - "features": [ - { - "type": "Feature", - "properties": { - "type": "scalar", - "loop": t.loop, - "variable": "temperature", - "adjusted": 3.0, - "unadjusted": 3.0, - "observed": 2.0, - }, - "geometry": {"type": "Point", "coordinates": [89.0, 24.0]}, - }, - ], - } + assert response.json == [ + { + "obs_minus_forecast_adjusted": 3.0, + "obs_minus_forecast_unadjusted": 3.0, + "observation": 2.0, + "longitude": 89.0, + "latitude": 24.0, + }, + ] def test_all_obs_filter(t, client): @@ -528,47 +412,29 @@ def test_all_obs_filter(t, client): response = client.get(f"{url}?{query}") assert response.status_code == 200 - assert response.json == { - "type": "FeatureCollection", - "features": [ - { - "type": "Feature", - "properties": { - "type": "scalar", - "loop": t.loop, - "variable": "temperature", - "adjusted": 1.0, - "unadjusted": 1.0, - "observed": 1.0, - }, - "geometry": {"type": "Point", "coordinates": [90.0, 22.0]}, - }, - { - "type": "Feature", - "properties": { - "type": "scalar", - "loop": t.loop, - "variable": "temperature", - "adjusted": -1.0, - "unadjusted": -1.0, - "observed": 0.0, - }, - "geometry": {"type": "Point", "coordinates": [91.0, 23.0]}, - }, - { - "type": "Feature", - "properties": { - "type": "scalar", - "loop": t.loop, - "variable": "temperature", - "adjusted": 3.0, - "unadjusted": 3.0, - "observed": 2.0, - }, - "geometry": {"type": "Point", "coordinates": [89.0, 24.0]}, - }, - ], - } + assert response.json == [ + { + "obs_minus_forecast_adjusted": 1.0, + "obs_minus_forecast_unadjusted": 1.0, + "observation": 1.0, + "longitude": 90.0, + "latitude": 22.0, + }, + { + "obs_minus_forecast_adjusted": -1.0, + "obs_minus_forecast_unadjusted": -1.0, + "observation": 0.0, + "longitude": 91.0, + "latitude": 23.0, + }, + { + "obs_minus_forecast_adjusted": 3.0, + "obs_minus_forecast_unadjusted": 3.0, + "observation": 2.0, + "longitude": 89.0, + "latitude": 24.0, + }, + ] @pytest.mark.parametrize("variable", ["t", "q", "ps", "uv"]) From ce33c749553efabec6129e2d78a62dcbaf80e87d Mon Sep 17 00:00:00 2001 From: "W. Evan Sheehan" Date: Fri, 3 Nov 2023 14:38:13 -0600 Subject: [PATCH 02/13] Update the scalar variable responses Rewrite the functions involved in the scalar variable diagnostics routes to stream the responses as a stripped down JSON format. We're now able to use generators so that we only have to jsonify small dictionaries and stream each chunk of JSON in the response, which is significantly faster than parsing the entire response from an in-memory data structure into JSON and then sending that all at once. --- src/unified_graphics/diag.py | 25 +++++-------------------- src/unified_graphics/routes.py | 24 ++++++++++++++++++------ 2 files changed, 23 insertions(+), 26 deletions(-) diff --git a/src/unified_graphics/diag.py b/src/unified_graphics/diag.py index 9d41d4b3..682910a1 100644 --- a/src/unified_graphics/diag.py +++ b/src/unified_graphics/diag.py @@ -226,7 +226,7 @@ def scalar( initialization_time: str, loop: MinimLoop, filters: MultiDict, -) -> List[Observation]: +) -> pd.DataFrame: data = open_diagnostic( diag_zarr, model, @@ -240,22 +240,7 @@ def scalar( ) data = apply_filters(data, filters) - return [ - Observation( - variable.name.lower(), - VariableType.SCALAR, - loop, - adjusted=float(data["obs_minus_forecast_adjusted"].values[idx]), - unadjusted=float(data["obs_minus_forecast_unadjusted"].values[idx]), - observed=float(data["observation"].values[idx]), - position=Coordinate( - float(data["longitude"].values[idx]), - float(data["latitude"].values[idx]), - ), - ) - for idx in range(data.dims["nobs"]) - ] - + return data.to_dataframe() def temperature( diag_zarr: str, @@ -267,7 +252,7 @@ def temperature( initialization_time: str, loop: MinimLoop, filters: MultiDict, -) -> List[Observation]: +) -> pd.DataFrame: return scalar( diag_zarr, model, @@ -292,7 +277,7 @@ def moisture( initialization_time: str, loop: MinimLoop, filters: MultiDict, -) -> List[Observation]: +) -> pd.DataFrame: return scalar( diag_zarr, model, @@ -317,7 +302,7 @@ def pressure( initialization_time: str, loop: MinimLoop, filters: MultiDict, -) -> List[Observation]: +) -> pd.DataFrame: return scalar( diag_zarr, model, diff --git a/src/unified_graphics/routes.py b/src/unified_graphics/routes.py index 4dff3833..d3c292ab 100644 --- a/src/unified_graphics/routes.py +++ b/src/unified_graphics/routes.py @@ -1,3 +1,5 @@ +import json + from flask import ( Blueprint, current_app, @@ -162,6 +164,21 @@ def history(model, system, domain, background, frequency, variable, loop): def diagnostics( model, system, domain, background, frequency, variable, initialization_time, loop ): + def generate(df): + yield "[" + for idx, obs in enumerate(df.itertuples()): + if idx > 0: + yield "," + yield json.dumps({ + "obs_minus_forecast_adjusted": obs.obs_minus_forecast_adjusted, + "obs_minus_forecast_unadjusted": obs.obs_minus_forecast_unadjusted, + "observation": obs.observation, + "longitude": obs.longitude, + "latitude": obs.latitude, + }) + + yield "]" + try: v = diag.Variable(variable) except ValueError: @@ -179,12 +196,7 @@ def diagnostics( diag.MinimLoop(loop), request.args, ) - - response = jsonify( - {"type": "FeatureCollection", "features": [obs.to_geojson() for obs in data]} - ) - - return response + return generate(data), {"Content-Type": "application/json"} @bp.route( From 8697cca812bccd4a000952b2fb448c763dc07a03 Mon Sep 17 00:00:00 2001 From: "W. Evan Sheehan" Date: Fri, 3 Nov 2023 15:08:39 -0600 Subject: [PATCH 03/13] Implement the vector diag observations --- src/unified_graphics/diag.py | 29 ++--------------------------- src/unified_graphics/routes.py | 29 +++++++++++++++++++++-------- 2 files changed, 23 insertions(+), 35 deletions(-) diff --git a/src/unified_graphics/diag.py b/src/unified_graphics/diag.py index 682910a1..94da78b0 100644 --- a/src/unified_graphics/diag.py +++ b/src/unified_graphics/diag.py @@ -349,7 +349,7 @@ def wind( initialization_time: str, loop: MinimLoop, filters: MultiDict, -) -> List[Observation]: +) -> pd.DataFrame | pd.Series: data = open_diagnostic( diag_zarr, model, @@ -364,32 +364,7 @@ def wind( data = apply_filters(data, filters) - omf_adj_u = data["obs_minus_forecast_adjusted"].sel(component="u").values - omf_adj_v = data["obs_minus_forecast_adjusted"].sel(component="v").values - omf_una_u = data["obs_minus_forecast_unadjusted"].sel(component="u").values - omf_una_v = data["obs_minus_forecast_unadjusted"].sel(component="v").values - obs_u = data["observation"].sel(component="u").values - obs_v = data["observation"].sel(component="v").values - lng = data["longitude"].values - lat = data["latitude"].values - - return [ - Observation( - "wind", - VariableType.VECTOR, - loop, - adjusted=Vector( - round(float(omf_adj_u[idx]), 5), round(float(omf_adj_v[idx]), 5) - ), - unadjusted=Vector( - round(float(omf_una_u[idx]), 5), round(float(omf_una_v[idx]), 5) - ), - observed=Vector(round(float(obs_u[idx]), 5), round(float(obs_v[idx]), 5)), - position=Coordinate(round(float(lng[idx]), 5), round(float(lat[idx]), 5)), - ) - for idx in range(data.dims["nobs"]) - ] - + return data.to_dataframe() def magnitude(dataset: List[Observation]) -> Generator[Observation, None, None]: for obs in dataset: diff --git a/src/unified_graphics/routes.py b/src/unified_graphics/routes.py index d3c292ab..13335b64 100644 --- a/src/unified_graphics/routes.py +++ b/src/unified_graphics/routes.py @@ -166,16 +166,25 @@ def diagnostics( ): def generate(df): yield "[" - for idx, obs in enumerate(df.itertuples()): + for idx, (_, obs) in enumerate(df.iterrows()): if idx > 0: yield "," - yield json.dumps({ - "obs_minus_forecast_adjusted": obs.obs_minus_forecast_adjusted, - "obs_minus_forecast_unadjusted": obs.obs_minus_forecast_unadjusted, - "observation": obs.observation, - "longitude": obs.longitude, - "latitude": obs.latitude, - }) + if "component" in obs.index.names: + yield json.dumps({ + "obs_minus_forecast_adjusted": obs.obs_minus_forecast_adjusted.to_dict(), + "obs_minus_forecast_unadjusted": obs.obs_minus_forecast_unadjusted.to_dict(), + "observation": obs.observation.to_dict(), + "longitude": obs.longitude["u"], + "latitude": obs.latitude["u"], + }) + else: + yield json.dumps({ + "obs_minus_forecast_adjusted": obs.obs_minus_forecast_adjusted, + "obs_minus_forecast_unadjusted": obs.obs_minus_forecast_unadjusted, + "observation": obs.observation, + "longitude": obs.longitude, + "latitude": obs.latitude, + }) yield "]" @@ -196,6 +205,10 @@ def generate(df): diag.MinimLoop(loop), request.args, ) + + if "component" in data.index.names: + data = data.unstack() + return generate(data), {"Content-Type": "application/json"} From 59201926d6b3f5fb9eeb757b8f3ef2b97f47fe02 Mon Sep 17 00:00:00 2001 From: "W. Evan Sheehan" Date: Mon, 6 Nov 2023 11:32:48 -0700 Subject: [PATCH 04/13] Implement magnitude route --- src/unified_graphics/diag.py | 34 +++++++++------------------------- src/unified_graphics/routes.py | 20 +++++++++++++++----- 2 files changed, 24 insertions(+), 30 deletions(-) diff --git a/src/unified_graphics/diag.py b/src/unified_graphics/diag.py index 94da78b0..5e68e102 100644 --- a/src/unified_graphics/diag.py +++ b/src/unified_graphics/diag.py @@ -366,32 +366,16 @@ def wind( return data.to_dataframe() -def magnitude(dataset: List[Observation]) -> Generator[Observation, None, None]: - for obs in dataset: - if isinstance(obs.adjusted, Vector): - adjusted = vector_magnitude(obs.adjusted.u, obs.adjusted.v) - else: - adjusted = abs(obs.adjusted) - - if isinstance(obs.unadjusted, Vector): - unadjusted = vector_magnitude(obs.unadjusted.u, obs.unadjusted.v) - else: - unadjusted = abs(obs.unadjusted) - if isinstance(obs.observed, Vector): - observed = vector_magnitude(obs.observed.u, obs.observed.v) - else: - observed = abs(obs.observed) - - yield Observation( - obs.variable, - obs.variable_type, - obs.loop, - adjusted=adjusted, - unadjusted=unadjusted, - observed=observed, - position=obs.position, - ) +def magnitude(dataset: pd.DataFrame) -> pd.DataFrame: + return dataset.groupby(level=0).aggregate({ + "obs_minus_forecast_adjusted": np.linalg.norm, + "obs_minus_forecast_unadjusted": np.linalg.norm, + "observation": np.linalg.norm, + "longitude": "first", + "latitude": "first", + "is_used": "first", + }) def get_model_run_list( diff --git a/src/unified_graphics/routes.py b/src/unified_graphics/routes.py index 13335b64..9ecdd945 100644 --- a/src/unified_graphics/routes.py +++ b/src/unified_graphics/routes.py @@ -219,6 +219,20 @@ def generate(df): def magnitude( model, system, domain, background, frequency, variable, initialization_time, loop ): + def generate(df): + yield "[" + for idx, obs in enumerate(df.itertuples()): + if idx > 0: + yield "," + yield json.dumps({ + "obs_minus_forecast_adjusted": obs.obs_minus_forecast_adjusted, + "obs_minus_forecast_unadjusted": obs.obs_minus_forecast_unadjusted, + "observation": obs.observation, + "longitude": obs.longitude, + "latitude": obs.latitude, + }) + yield "]" + try: v = diag.Variable(variable) except ValueError: @@ -238,8 +252,4 @@ def magnitude( ) data = diag.magnitude(data) - response = jsonify( - {"type": "FeatureCollection", "features": [obs.to_geojson() for obs in data]} - ) - - return response + return generate(data), {"Content-Type": "application/json"} From 493e2c4e67b426c94680bbdd031a6c97a410e47c Mon Sep 17 00:00:00 2001 From: "W. Evan Sheehan" Date: Mon, 6 Nov 2023 11:45:43 -0700 Subject: [PATCH 05/13] Fix assertion in test_range_filter_vector I made a mistake in the object property names when I updated this expected response value. --- tests/test_unified_graphics.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_unified_graphics.py b/tests/test_unified_graphics.py index a8572a26..38d11e4d 100644 --- a/tests/test_unified_graphics.py +++ b/tests/test_unified_graphics.py @@ -374,9 +374,9 @@ def test_range_filter_vector(uv, client): # Assert assert response.json == [ { - "adjusted": {"u": 0.0, "v": 1.0}, - "unadjusted": {"u": 0.0, "v": 1.0}, - "observed": {"u": 0.0, "v": 1.0}, + "obs_minus_forecast_adjusted": {"u": 0.0, "v": 1.0}, + "obs_minus_forecast_unadjusted": {"u": 0.0, "v": 1.0}, + "observation": {"u": 0.0, "v": 1.0}, "longitude": 90.0, "latitude": 22.0, }, From f1ed32cba7d1fdd800c80e3fac5f846ad24c739b Mon Sep 17 00:00:00 2001 From: "W. Evan Sheehan" Date: Mon, 6 Nov 2023 15:27:22 -0700 Subject: [PATCH 06/13] Use itertuples not iterrows for performance We only see performance gains moving to this generator approach in our Flask routes if we use itertuples to generate our JSON response in the route. iterrows is too slow and the performance is basically the same as jsonify'ing the entire DataFrame at once before sending the response. --- src/unified_graphics/routes.py | 40 +++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/src/unified_graphics/routes.py b/src/unified_graphics/routes.py index 9ecdd945..61a8d1f7 100644 --- a/src/unified_graphics/routes.py +++ b/src/unified_graphics/routes.py @@ -164,27 +164,24 @@ def history(model, system, domain, background, frequency, variable, loop): def diagnostics( model, system, domain, background, frequency, variable, initialization_time, loop ): + def vec_to_dict(s) -> dict: + c = s.copy() + c.index = s.index.droplevel(0) + return c.to_dict() + def generate(df): yield "[" - for idx, (_, obs) in enumerate(df.iterrows()): + for idx, obs in enumerate(df.itertuples()): + print(obs) if idx > 0: yield "," - if "component" in obs.index.names: - yield json.dumps({ - "obs_minus_forecast_adjusted": obs.obs_minus_forecast_adjusted.to_dict(), - "obs_minus_forecast_unadjusted": obs.obs_minus_forecast_unadjusted.to_dict(), - "observation": obs.observation.to_dict(), - "longitude": obs.longitude["u"], - "latitude": obs.latitude["u"], - }) - else: - yield json.dumps({ - "obs_minus_forecast_adjusted": obs.obs_minus_forecast_adjusted, - "obs_minus_forecast_unadjusted": obs.obs_minus_forecast_unadjusted, - "observation": obs.observation, - "longitude": obs.longitude, - "latitude": obs.latitude, - }) + yield json.dumps({ + "obs_minus_forecast_adjusted": obs.obs_minus_forecast_adjusted, + "obs_minus_forecast_unadjusted": obs.obs_minus_forecast_unadjusted, + "observation": obs.observation, + "longitude": obs.longitude, + "latitude": obs.latitude, + }) yield "]" @@ -207,7 +204,14 @@ def generate(df): ) if "component" in data.index.names: - data = data.unstack() + data = data.groupby(level=0).aggregate({ + "obs_minus_forecast_adjusted": vec_to_dict, + "obs_minus_forecast_unadjusted": vec_to_dict, + "observation": vec_to_dict, + "longitude": "first", + "latitude": "first", + "is_used": "first", + }) return generate(data), {"Content-Type": "application/json"} From ba349c12dd166cf22afaab8568c316588558a379 Mon Sep 17 00:00:00 2001 From: "W. Evan Sheehan" Date: Tue, 7 Nov 2023 13:17:49 -0700 Subject: [PATCH 07/13] Change vector tests to expect flattened columns We can more efficiently generate the JSON response for vector variables if we flatten the vector columns instead of having a nested dictionary containing the vector components for the key. Arguably, this flatter structure is better for the frontend, too, since we can just map x and y to properties on each record without having to implement any kind of JavaScript path parsing. --- tests/test_unified_graphics.py | 56 ++++++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 12 deletions(-) diff --git a/tests/test_unified_graphics.py b/tests/test_unified_graphics.py index 38d11e4d..3b97643c 100644 --- a/tests/test_unified_graphics.py +++ b/tests/test_unified_graphics.py @@ -264,8 +264,30 @@ def test_wind_diag(uv, client): # Assert assert response.json == [ - {"obs_minus_forecast_adjusted": {"u": 0.0, "v": 1.0}, "obs_minus_forecast_unadjusted": {"u": 0.0, "v": 1.0}, "observation": {"u": 0.0, "v": 1.0}, "longitude": 90.0, "latitude": 22.0}, - {"obs_minus_forecast_adjusted": {"u": 0.0, "v": -1.0}, "obs_minus_forecast_unadjusted": {"u": 0.0, "v": -1.0}, "observation": {"u": 1.0, "v": 0.0}, "longitude": 91.0, "latitude": 23.0}, + { + "obs_minus_forecast_adjusted_u": 0.0, + "obs_minus_forecast_adjusted_v": 1.0, + "obs_minus_forecast_unadjusted_u": 0.0, + "obs_minus_forecast_unadjusted_v": 1.0, + "observation_u": 0.0, + "observation_v": 1.0, + "longitude_u": 90.0, + "longitude_v": 90.0, + "latitude_u": 22.0, + "latitude_v": 22.0, + }, + { + "obs_minus_forecast_adjusted_u": 0.0, + "obs_minus_forecast_adjusted_v": -1.0, + "obs_minus_forecast_unadjusted_u": 0.0, + "obs_minus_forecast_unadjusted_v": -1.0, + "observation_u": 1.0, + "observation_v": 0.0, + "longitude_u": 91.0, + "longitude_v": 91.0, + "latitude_u": 23.0, + "latitude_v": 23.0, + }, ] @@ -333,11 +355,16 @@ def test_region_filter_vector(uv, client): assert response.json == [ { - "obs_minus_forecast_adjusted": {"u": 0.0, "v": 1.0}, - "obs_minus_forecast_unadjusted": {"u": 0.0, "v": 1.0}, - "observation": {"u": 0.0, "v": 1.0}, - "longitude": 90.0, - "latitude": 22.0 + "obs_minus_forecast_adjusted_u": 0.0, + "obs_minus_forecast_adjusted_v": 1.0, + "obs_minus_forecast_unadjusted_u": 0.0, + "obs_minus_forecast_unadjusted_v": 1.0, + "observation_u": 0.0, + "observation_v": 1.0, + "longitude_u": 90.0, + "longitude_v": 90.0, + "latitude_u": 22.0, + "latitude_v": 22.0, }, ] @@ -374,11 +401,16 @@ def test_range_filter_vector(uv, client): # Assert assert response.json == [ { - "obs_minus_forecast_adjusted": {"u": 0.0, "v": 1.0}, - "obs_minus_forecast_unadjusted": {"u": 0.0, "v": 1.0}, - "observation": {"u": 0.0, "v": 1.0}, - "longitude": 90.0, - "latitude": 22.0, + "obs_minus_forecast_adjusted_u": 0.0, + "obs_minus_forecast_adjusted_v": 1.0, + "obs_minus_forecast_unadjusted_u": 0.0, + "obs_minus_forecast_unadjusted_v": 1.0, + "observation_u": 0.0, + "observation_v": 1.0, + "longitude_u": 90.0, + "longitude_v": 90.0, + "latitude_u": 22.0, + "latitude_v": 22.0, }, ] From be1359304b748533fe5a8553274d143d433dda8b Mon Sep 17 00:00:00 2001 From: "W. Evan Sheehan" Date: Tue, 7 Nov 2023 13:51:24 -0700 Subject: [PATCH 08/13] Use DataFrame.to_json to generate diag responses This is much faster and easier than the generator approach we were using before. Mainly because we handle vectors by just flattening the column hierarchy of the vector components instead of trying to merge the u and v columns into a single dictionary for each column. There's a decent chance that all of this hassle could be avoided by storing the vector data differently. Or perhaps this is for the best because it makes it easier to handle the vector columns when we visualize them because our chart components don't have to worry about parsing JavaScript object paths like "obs_minus_forecast_adjusted.u". --- src/unified_graphics/routes.py | 41 +++++++++------------------------- 1 file changed, 10 insertions(+), 31 deletions(-) diff --git a/src/unified_graphics/routes.py b/src/unified_graphics/routes.py index 61a8d1f7..5f94e250 100644 --- a/src/unified_graphics/routes.py +++ b/src/unified_graphics/routes.py @@ -164,27 +164,6 @@ def history(model, system, domain, background, frequency, variable, loop): def diagnostics( model, system, domain, background, frequency, variable, initialization_time, loop ): - def vec_to_dict(s) -> dict: - c = s.copy() - c.index = s.index.droplevel(0) - return c.to_dict() - - def generate(df): - yield "[" - for idx, obs in enumerate(df.itertuples()): - print(obs) - if idx > 0: - yield "," - yield json.dumps({ - "obs_minus_forecast_adjusted": obs.obs_minus_forecast_adjusted, - "obs_minus_forecast_unadjusted": obs.obs_minus_forecast_unadjusted, - "observation": obs.observation, - "longitude": obs.longitude, - "latitude": obs.latitude, - }) - - yield "]" - try: v = diag.Variable(variable) except ValueError: @@ -201,19 +180,19 @@ def generate(df): initialization_time, diag.MinimLoop(loop), request.args, - ) + )[[ + "obs_minus_forecast_adjusted", + "obs_minus_forecast_unadjusted", + "observation", + "longitude", + "latitude", + ]] if "component" in data.index.names: - data = data.groupby(level=0).aggregate({ - "obs_minus_forecast_adjusted": vec_to_dict, - "obs_minus_forecast_unadjusted": vec_to_dict, - "observation": vec_to_dict, - "longitude": "first", - "latitude": "first", - "is_used": "first", - }) + data = data.unstack() + data.columns = ["_".join(col) for col in data.columns] - return generate(data), {"Content-Type": "application/json"} + return data.to_json(orient="records"), {"Content-Type": "application/json"} @bp.route( From 446c202b1dbd9e9193bf7970a1e3e7afd9d03647 Mon Sep 17 00:00:00 2001 From: "W. Evan Sheehan" Date: Tue, 7 Nov 2023 13:54:18 -0700 Subject: [PATCH 09/13] Switch to DataFrame.to_json for the magnitude route The speed savings here are less significant than they are for the diagnostic data because of how we are calculating the magnitude, but we shave 5ish seconds off the response by simply using to_json instead of iterating over everything ourselves. --- src/unified_graphics/diag.py | 1 - src/unified_graphics/routes.py | 24 ++++++++---------------- 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/src/unified_graphics/diag.py b/src/unified_graphics/diag.py index 5e68e102..abaf0f18 100644 --- a/src/unified_graphics/diag.py +++ b/src/unified_graphics/diag.py @@ -374,7 +374,6 @@ def magnitude(dataset: pd.DataFrame) -> pd.DataFrame: "observation": np.linalg.norm, "longitude": "first", "latitude": "first", - "is_used": "first", }) diff --git a/src/unified_graphics/routes.py b/src/unified_graphics/routes.py index 5f94e250..9b002cfa 100644 --- a/src/unified_graphics/routes.py +++ b/src/unified_graphics/routes.py @@ -202,20 +202,6 @@ def diagnostics( def magnitude( model, system, domain, background, frequency, variable, initialization_time, loop ): - def generate(df): - yield "[" - for idx, obs in enumerate(df.itertuples()): - if idx > 0: - yield "," - yield json.dumps({ - "obs_minus_forecast_adjusted": obs.obs_minus_forecast_adjusted, - "obs_minus_forecast_unadjusted": obs.obs_minus_forecast_unadjusted, - "observation": obs.observation, - "longitude": obs.longitude, - "latitude": obs.latitude, - }) - yield "]" - try: v = diag.Variable(variable) except ValueError: @@ -232,7 +218,13 @@ def generate(df): initialization_time, diag.MinimLoop(loop), request.args, - ) + )[[ + "obs_minus_forecast_adjusted", + "obs_minus_forecast_unadjusted", + "observation", + "longitude", + "latitude", + ]] data = diag.magnitude(data) - return generate(data), {"Content-Type": "application/json"} + return data.to_json(orient="records"), {"Content-Type": "application/json"} From 1fa9d947405bd33055b31a54e6cffcc5aea7cfbe Mon Sep 17 00:00:00 2001 From: "W. Evan Sheehan" Date: Tue, 7 Nov 2023 14:34:58 -0700 Subject: [PATCH 10/13] Fix 2D Histogram for new JSON responses --- .../static/js/component/Chart2DHistogram.js | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/unified_graphics/static/js/component/Chart2DHistogram.js b/src/unified_graphics/static/js/component/Chart2DHistogram.js index 8566d2cb..a733ef7e 100644 --- a/src/unified_graphics/static/js/component/Chart2DHistogram.js +++ b/src/unified_graphics/static/js/component/Chart2DHistogram.js @@ -138,11 +138,9 @@ export default class Chart2DHistogram extends ChartElement { } set data(value) { - if (Array.isArray(value)) { - this.#data = value; - } else { - this.#data = value.features.map((d) => d.properties.adjusted); - } + this.#data = value.map((record) => { + return { u: record["obs_minus_forecast_adjusted_u"], v: record["obs_minus_forecast_adjusted_v"] }; + }); // FIXME: This is duplicated across all charts to ensure that they fire // this event. This event is used by the ColorRamp component so that it From b6705ece4471bb18c0243570e42fc63a3cd55d45 Mon Sep 17 00:00:00 2001 From: "W. Evan Sheehan" Date: Tue, 7 Nov 2023 14:48:41 -0700 Subject: [PATCH 11/13] Fix histogram for new JSON responses --- src/unified_graphics/static/js/component/ChartHistogram.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/unified_graphics/static/js/component/ChartHistogram.js b/src/unified_graphics/static/js/component/ChartHistogram.js index a0664221..6b9c053c 100644 --- a/src/unified_graphics/static/js/component/ChartHistogram.js +++ b/src/unified_graphics/static/js/component/ChartHistogram.js @@ -116,7 +116,7 @@ export default class ChartHistogram extends ChartElement { case "src": fetch(newValue) .then((response) => response.json()) - .then((data) => data.features.map((d) => d.properties.adjusted)) + .then((data) => data.map((d) => d.obs_minus_forecast_adjusted)) .then((data) => (this.data = data)); break; default: From 185200400d8ce8b6d16af4b635eac08974ed569f Mon Sep 17 00:00:00 2001 From: "W. Evan Sheehan" Date: Tue, 7 Nov 2023 14:56:29 -0700 Subject: [PATCH 12/13] Fix map for the new JSON response type --- src/unified_graphics/static/js/component/ChartMap.js | 9 +++++---- src/unified_graphics/templates/layouts/diag.html | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/unified_graphics/static/js/component/ChartMap.js b/src/unified_graphics/static/js/component/ChartMap.js index 4c2dbd04..34ef9a20 100644 --- a/src/unified_graphics/static/js/component/ChartMap.js +++ b/src/unified_graphics/static/js/component/ChartMap.js @@ -149,7 +149,7 @@ export default class ChartMap extends ChartElement { if (!observations) return scaleQuantize().range(schemePurples[9]); /** @type number[] */ - const [lower, upper] = extent(observations.features, (feature) => + const [lower, upper] = extent(observations, (feature) => get(feature, this.fill) ); @@ -203,7 +203,7 @@ export default class ChartMap extends ChartElement { if (!(borders || observations)) return; - this.#projection.fitSize([width, height], observations ?? borders); + this.#projection.fitSize([width, height], borders); const ctx = canvas.getContext("2d"); if (!ctx) return; @@ -230,9 +230,10 @@ export default class ChartMap extends ChartElement { const fill = this.scale; const radius = 2; - observations.features.forEach((feature) => { + console.log(observations); + observations.forEach((feature) => { const value = get(feature, fillProp); - const [x, y] = this.#projection(feature.geometry.coordinates); + const [x, y] = this.#projection([feature.longitude, feature.latitude]); ctx.save(); ctx.fillStyle = fill(value); diff --git a/src/unified_graphics/templates/layouts/diag.html b/src/unified_graphics/templates/layouts/diag.html index 8c80c1a2..ff6edcab 100644 --- a/src/unified_graphics/templates/layouts/diag.html +++ b/src/unified_graphics/templates/layouts/diag.html @@ -41,7 +41,7 @@

{% if minim_loop == "ges" %}Guess{% else %}Analysis + fill="obs_minus_forecast_adjusted"> Observation − Forecast From 1e0fc1ae1d0e115d961fa9b59c233e7686d8972c Mon Sep 17 00:00:00 2001 From: "W. Evan Sheehan" Date: Tue, 7 Nov 2023 15:08:38 -0700 Subject: [PATCH 13/13] Clean up --- src/unified_graphics/diag.py | 89 +++---------------- src/unified_graphics/routes.py | 34 +++---- .../static/js/component/Chart2DHistogram.js | 5 +- .../static/js/component/ChartMap.js | 4 +- tests/test_diag.py | 34 ------- tests/test_unified_graphics.py | 35 ++++++-- 6 files changed, 64 insertions(+), 137 deletions(-) diff --git a/src/unified_graphics/diag.py b/src/unified_graphics/diag.py index abaf0f18..d0b568fe 100644 --- a/src/unified_graphics/diag.py +++ b/src/unified_graphics/diag.py @@ -1,8 +1,7 @@ import os from collections import namedtuple -from dataclasses import dataclass from enum import Enum -from typing import Generator, List, Union +from typing import Union from urllib.parse import urlparse import numpy as np @@ -22,11 +21,6 @@ class MinimLoop(Enum): ANALYSIS = "anl" -class ValueType(Enum): - OBSERVATION = "observation" - FORECAST = "forecast" - - class Variable(Enum): MOISTURE = "q" PRESSURE = "ps" @@ -34,48 +28,6 @@ class Variable(Enum): WIND = "uv" -class VariableType(Enum): - SCALAR = "scalar" - VECTOR = "vector" - - -Coordinate = namedtuple("Coordinate", "longitude latitude") -Vector = namedtuple("Vector", "u v") - - -@dataclass -class Observation: - variable: str - variable_type: VariableType - loop: MinimLoop - adjusted: Union[float, Vector] - unadjusted: Union[float, Vector] - observed: Union[float, Vector] - position: Coordinate - - def to_geojson(self): - properties = { - "type": self.variable_type.value, - "variable": self.variable, - "loop": self.loop.value, - } - - if isinstance(self.adjusted, float): - properties["adjusted"] = self.adjusted - properties["unadjusted"] = self.unadjusted - properties["observed"] = self.observed - else: - properties["adjusted"] = self.adjusted._asdict() - properties["unadjusted"] = self.unadjusted._asdict() - properties["observed"] = self.observed._asdict() - - return { - "type": "Feature", - "properties": properties, - "geometry": {"type": "Point", "coordinates": list(self.position)}, - } - - ModelMetadata = namedtuple( "ModelMetadata", ( @@ -242,6 +194,7 @@ def scalar( return data.to_dataframe() + def temperature( diag_zarr: str, model: str, @@ -317,28 +270,6 @@ def pressure( ) -def vector_direction(u, v): - direction = (90 - np.degrees(np.arctan2(-v, -u))) % 360 - - # Anywhere the magnitude of the vector is 0 - calm = (np.abs(u) == 0) & (np.abs(v) == 0) - - # numpy.arctan2 treats 0.0 and -0.0 differently. Whenever the second - # argument to the function is -0.0, it return pi or -pi depending on the - # sign of the first argument. Whenever the second argument is 0.0, it will - # return 0.0 or -0.0 depending on the sign of the first argument. We - # normalize all calm vectors (magnitude 0) to have a direction of 0.0, per - # the NCAR Command Language docs. - # http://ncl.ucar.edu/Document/Functions/Contributed/wind_direction.shtml - direction[calm] = 0.0 - - return direction - - -def vector_magnitude(u, v): - return np.sqrt(u**2 + v**2) - - def wind( diag_zarr: str, model: str, @@ -368,13 +299,15 @@ def wind( def magnitude(dataset: pd.DataFrame) -> pd.DataFrame: - return dataset.groupby(level=0).aggregate({ - "obs_minus_forecast_adjusted": np.linalg.norm, - "obs_minus_forecast_unadjusted": np.linalg.norm, - "observation": np.linalg.norm, - "longitude": "first", - "latitude": "first", - }) + return dataset.groupby(level=0).aggregate( + { + "obs_minus_forecast_adjusted": np.linalg.norm, + "obs_minus_forecast_unadjusted": np.linalg.norm, + "observation": np.linalg.norm, + "longitude": "first", + "latitude": "first", + } + ) def get_model_run_list( diff --git a/src/unified_graphics/routes.py b/src/unified_graphics/routes.py index 9b002cfa..f796e90f 100644 --- a/src/unified_graphics/routes.py +++ b/src/unified_graphics/routes.py @@ -1,5 +1,3 @@ -import json - from flask import ( Blueprint, current_app, @@ -180,13 +178,15 @@ def diagnostics( initialization_time, diag.MinimLoop(loop), request.args, - )[[ - "obs_minus_forecast_adjusted", - "obs_minus_forecast_unadjusted", - "observation", - "longitude", - "latitude", - ]] + )[ + [ + "obs_minus_forecast_adjusted", + "obs_minus_forecast_unadjusted", + "observation", + "longitude", + "latitude", + ] + ] if "component" in data.index.names: data = data.unstack() @@ -218,13 +218,15 @@ def magnitude( initialization_time, diag.MinimLoop(loop), request.args, - )[[ - "obs_minus_forecast_adjusted", - "obs_minus_forecast_unadjusted", - "observation", - "longitude", - "latitude", - ]] + )[ + [ + "obs_minus_forecast_adjusted", + "obs_minus_forecast_unadjusted", + "observation", + "longitude", + "latitude", + ] + ] data = diag.magnitude(data) return data.to_json(orient="records"), {"Content-Type": "application/json"} diff --git a/src/unified_graphics/static/js/component/Chart2DHistogram.js b/src/unified_graphics/static/js/component/Chart2DHistogram.js index a733ef7e..a08716b5 100644 --- a/src/unified_graphics/static/js/component/Chart2DHistogram.js +++ b/src/unified_graphics/static/js/component/Chart2DHistogram.js @@ -139,7 +139,10 @@ export default class Chart2DHistogram extends ChartElement { set data(value) { this.#data = value.map((record) => { - return { u: record["obs_minus_forecast_adjusted_u"], v: record["obs_minus_forecast_adjusted_v"] }; + return { + u: record["obs_minus_forecast_adjusted_u"], + v: record["obs_minus_forecast_adjusted_v"], + }; }); // FIXME: This is duplicated across all charts to ensure that they fire diff --git a/src/unified_graphics/static/js/component/ChartMap.js b/src/unified_graphics/static/js/component/ChartMap.js index 34ef9a20..bb540b52 100644 --- a/src/unified_graphics/static/js/component/ChartMap.js +++ b/src/unified_graphics/static/js/component/ChartMap.js @@ -149,9 +149,7 @@ export default class ChartMap extends ChartElement { if (!observations) return scaleQuantize().range(schemePurples[9]); /** @type number[] */ - const [lower, upper] = extent(observations, (feature) => - get(feature, this.fill) - ); + const [lower, upper] = extent(observations, (feature) => get(feature, this.fill)); const isDiverging = lower / Math.abs(lower) !== upper / Math.abs(upper); const largestBound = Math.max(Math.abs(lower), Math.abs(upper)); diff --git a/tests/test_diag.py b/tests/test_diag.py index 00202062..5dcb0c7a 100644 --- a/tests/test_diag.py +++ b/tests/test_diag.py @@ -234,40 +234,6 @@ def test_open_diagnostic_s3(moto_server, test_dataset, monkeypatch): xr.testing.assert_equal(result, expected) -# Test cases taken from the examples at -# http://ncl.ucar.edu/Document/Functions/Contributed/wind_direction.shtml -@pytest.mark.parametrize( - "u,v,expected", - ( - [ - np.array([10, 0, 0, -10, 10, 10, -10, -10]), - np.array([0, 10, -10, 0, 10, -10, 10, -10]), - np.array([270, 180, 0, 90, 225, 315, 135, 45]), - ], - [ - np.array([0.0, -0.0, 0.0, -0.0]), - np.array([0.0, 0.0, -0.0, -0.0]), - np.array([0.0, 0.0, 0.0, 0.0]), - ], - ), -) -def test_vector_direction(u, v, expected): - result = diag.vector_direction(u, v) - - np.testing.assert_array_almost_equal(result, expected, decimal=5) - - -def test_vector_magnitude(): - u = np.array([1, 0, 1, 0]) - v = np.array([0, 1, 1, 0]) - - result = diag.vector_magnitude(u, v) - - np.testing.assert_array_almost_equal( - result, np.array([1, 1, 1.41421, 0]), decimal=5 - ) - - @pytest.mark.parametrize( "mapping,expected", [ diff --git a/tests/test_unified_graphics.py b/tests/test_unified_graphics.py index 3b97643c..275b98d4 100644 --- a/tests/test_unified_graphics.py +++ b/tests/test_unified_graphics.py @@ -115,10 +115,23 @@ def test_scalar_diag(t, client): # Assert assert response.json == [ - {"obs_minus_forecast_adjusted": 1.0, "obs_minus_forecast_unadjusted": 1.0, "observation": 1.0, "longitude": 90.0, "latitude": 22.0}, - {"obs_minus_forecast_adjusted": -1.0, "obs_minus_forecast_unadjusted": -1.0, "observation": 0.0, "longitude": 91.0, "latitude": 23.0}, + { + "obs_minus_forecast_adjusted": 1.0, + "obs_minus_forecast_unadjusted": 1.0, + "observation": 1.0, + "longitude": 90.0, + "latitude": 22.0, + }, + { + "obs_minus_forecast_adjusted": -1.0, + "obs_minus_forecast_unadjusted": -1.0, + "observation": 0.0, + "longitude": 91.0, + "latitude": 23.0, + }, ] + def test_scalar_history(model, diag_parquet, client, test_dataset): # Arrange run_list = [ @@ -300,8 +313,20 @@ def test_vector_magnitude(uv, client): # Assert assert response.json == [ - {"obs_minus_forecast_adjusted": 1.0, "obs_minus_forecast_unadjusted": 1.0, "observation": 1.0, "longitude": 90.0, "latitude": 22.0}, - {"obs_minus_forecast_adjusted": 1.0, "obs_minus_forecast_unadjusted": 1.0, "observation": 1.0, "longitude": 91.0, "latitude": 23.0}, + { + "obs_minus_forecast_adjusted": 1.0, + "obs_minus_forecast_unadjusted": 1.0, + "observation": 1.0, + "longitude": 90.0, + "latitude": 22.0, + }, + { + "obs_minus_forecast_adjusted": 1.0, + "obs_minus_forecast_unadjusted": 1.0, + "observation": 1.0, + "longitude": 91.0, + "latitude": 23.0, + }, ] @@ -318,7 +343,7 @@ def test_region_filter_scalar(t, client): "obs_minus_forecast_unadjusted": 1.0, "observation": 1.0, "longitude": 90.0, - "latitude": 22.0 + "latitude": 22.0, }, ]