From 0b04bf82b16be38802157b49f0e186474ee5f5b1 Mon Sep 17 00:00:00 2001
From: Mads Christian Lund <maclu@geus.dk>
Date: Fri, 21 Jun 2024 15:07:24 +0200
Subject: [PATCH 01/16] Fixed bug in get_bufr

Configuration variables were to strictly validated.
* Made bufr_integration_test explicit
---
 src/pypromice/postprocess/get_bufr.py         |  20 +-
 .../bufr_export/test_get_bufr_integration.py  | 257 +++++++++++-------
 2 files changed, 169 insertions(+), 108 deletions(-)

diff --git a/src/pypromice/postprocess/get_bufr.py b/src/pypromice/postprocess/get_bufr.py
index 6b7bc217..0ad4b3a6 100644
--- a/src/pypromice/postprocess/get_bufr.py
+++ b/src/pypromice/postprocess/get_bufr.py
@@ -491,26 +491,30 @@ def get_bufr_variables(
     BUFRVariables used by bufr_utilities
 
     """
-    heightOfStationGroundAboveMeanSeaLevel = np.nan
-    if isinstance(station_configuration.height_of_gps_from_station_ground, float):
+    if station_configuration.height_of_gps_from_station_ground is None:
+        heightOfStationGroundAboveMeanSeaLevel = np.nan
+    else:
         heightOfStationGroundAboveMeanSeaLevel = (
                 data["gps_alt_fit"] - station_configuration.height_of_gps_from_station_ground
         )
 
-    heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH = np.nan
-    if isinstance(station_configuration.temperature_from_sonic_ranger, float):
+    if station_configuration.temperature_from_sonic_ranger is None:
+        heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH = np.nan
+    else:
         heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH = (
                 data["z_boom_u_smooth"]+ station_configuration.temperature_from_sonic_ranger
         )
 
-    heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD = np.nan
-    if isinstance(station_configuration.anemometer_from_sonic_ranger, float):
+    if station_configuration.anemometer_from_sonic_ranger is None:
+        heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD = np.nan
+    else:
         heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD = (
                 data["z_boom_u_smooth"] + station_configuration.anemometer_from_sonic_ranger
         )
 
-    heightOfBarometerAboveMeanSeaLevel = np.nan
-    if isinstance(station_configuration.barometer_from_gps, float):
+    if station_configuration.barometer_from_gps is None:
+        heightOfBarometerAboveMeanSeaLevel = np.nan
+    else:
         heightOfBarometerAboveMeanSeaLevel = (
                 data["gps_alt_fit"] + station_configuration.barometer_from_gps
         )
diff --git a/tests/unit/bufr_export/test_get_bufr_integration.py b/tests/unit/bufr_export/test_get_bufr_integration.py
index b60d235a..541962cd 100644
--- a/tests/unit/bufr_export/test_get_bufr_integration.py
+++ b/tests/unit/bufr_export/test_get_bufr_integration.py
@@ -3,7 +3,6 @@
 """
 
 import datetime
-import hashlib
 import logging
 import pickle
 import shutil
@@ -17,6 +16,7 @@
 import pandas as pd
 
 from pypromice.postprocess import get_bufr
+from pypromice.postprocess.bufr_utilities import read_bufr_message, BUFRVariables
 from pypromice.postprocess.get_bufr import (
     DEFAULT_STATION_CONFIGURATION_PATH,
     StationConfiguration,
@@ -38,10 +38,9 @@ def run_get_bufr(
     latest_timestamps: Optional[Dict[str, datetime.datetime]],
     station_configuration_mapping=None,
     **get_bufr_kwargs,
-) -> Dict[str, str]:
+) -> Optional[BUFRVariables]:
     """
     Run get_bufr using a temporary folder structure for input and output data
-    The output bufr files can be verified using the sha256 hashes.
 
     Parameters
     ----------
@@ -51,7 +50,8 @@ def run_get_bufr(
 
     Returns
     -------
-    mapping from station id to sha256 hashes
+    Optional[BUFRVariables]
+        BUFR variables if the output file was generated successfully
 
     """
     with TemporaryDirectory() as output_path:
@@ -88,13 +88,13 @@ def run_get_bufr(
             **get_bufr_kwargs,
         )
 
-        output_bufr_files = bufr_out.glob("*.bufr")
-        file_hashes = dict()
-        for p in output_bufr_files:
-            with p.open("rb") as fp:
-                file_hashes[p.stem] = hashlib.sha256(fp.read()).hexdigest()
+        output_path = bufr_out.joinpath(f"{stid}.bufr")
+        if not output_path.exists():
+            return None
+
+        with output_path.open("rb") as fp:
+            return read_bufr_message(fp)
 
-        return file_hashes
 
 
 class PreRefactoringBufrTestCase(TestCase):
@@ -104,10 +104,10 @@ def get_station_configuration_mapping(
         wmo_id: str,
         station_site: Optional[str] = None,
         station_type: str = "mobile",
-        barometer_from_gps: float = 0,
+        barometer_from_gps: float = 0.0,
         anemometer_from_sonic_ranger: float = 0.4,
         temperature_from_sonic_ranger: float = -0.1,
-        height_of_gps_from_station_ground: float = 0,
+        height_of_gps_from_station_ground: float = 0.0,
         skipped_variables=(),
         comment=None,
         export_bufr=True,
@@ -137,11 +137,8 @@ def test_get_bufr_has_new_data(self):
         # Newest measurement in DY2_hour: 2023-12-07 23:00:00
         latest_timestamps = {"DY2": datetime.datetime(2023, 12, 1)}
         now_timestamp = datetime.datetime(2023, 12, 8)
-        expected_file_hashes = {
-            stid: "2b94d2ef611cfddb6dd537ca63d0ec4fb5d8e880943f81a6d5e724c042ac8971"
-        }
         mapping = self.get_station_configuration_mapping(stid, wmo_id="04464")
-        file_hashes = run_get_bufr(
+        bufr_data = run_get_bufr(
             l3_data=l3_src,
             now_timestamp=now_timestamp,
             latest_timestamps=latest_timestamps,
@@ -150,10 +147,28 @@ def test_get_bufr_has_new_data(self):
             time_limit="91d",
             station_configuration_mapping=mapping,
         )
-        self.assertDictEqual(
-            expected_file_hashes,
-            file_hashes,
+        expected_bufr_variables = BUFRVariables(
+            wmo_id="04464",
+            station_type="mobile",
+            # Newest measurement in tx_l3_test1.csv: 2023-12-07 23:00:00
+            timestamp=datetime.datetime(2023, 12, 7, 23, 00),
+            relativeHumidity=69,
+            airTemperature=256.0,
+            pressure=77300.0,
+            windDirection=149,
+            windSpeed=14.9,
+            latitude=66.48249,
+            longitude=-46.29427,
+            heightOfStationGroundAboveMeanSeaLevel=2124.7,
+            heightOfBarometerAboveMeanSeaLevel=2124.7,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.1,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.6,
         )
+        pd.testing.assert_series_equal(
+            bufr_data.as_series(),
+            expected_bufr_variables.as_series(),
+        )
+
 
     def test_get_bufr_has_new_data_dont_store_position(self):
         l3_src_filepath = DATA_DIR.joinpath("tx_l3_test1.csv")
@@ -162,11 +177,8 @@ def test_get_bufr_has_new_data_dont_store_position(self):
         # Newest measurement in DY2_hour: 2023-12-07 23:00:00
         latest_timestamps = {"DY2": datetime.datetime(2023, 12, 1)}
         now_timestamp = datetime.datetime(2023, 12, 8)
-        expected_file_hashes = {
-            stid: "2b94d2ef611cfddb6dd537ca63d0ec4fb5d8e880943f81a6d5e724c042ac8971"
-        }
         mapping = self.get_station_configuration_mapping(stid, wmo_id="04464")
-        file_hashes = run_get_bufr(
+        bufr_data = run_get_bufr(
             l3_data=l3_src,
             now_timestamp=now_timestamp,
             latest_timestamps=latest_timestamps,
@@ -175,11 +187,29 @@ def test_get_bufr_has_new_data_dont_store_position(self):
             time_limit="91d",
             station_configuration_mapping=mapping,
         )
-        self.assertDictEqual(
-            expected_file_hashes,
-            file_hashes,
+        expected_bufr_variables = BUFRVariables(
+            wmo_id="04464",
+            station_type="mobile",
+            # Newest measurement in tx_l3_test1.csv: 2023-12-07 23:00:00
+            timestamp=datetime.datetime(2023, 12, 7, 23, 00),
+            relativeHumidity=69,
+            airTemperature=256.0,
+            pressure=77300.0,
+            windDirection=149,
+            windSpeed=14.9,
+            latitude=66.48249,
+            longitude=-46.29427,
+            heightOfStationGroundAboveMeanSeaLevel=2124.7,
+            heightOfBarometerAboveMeanSeaLevel=2124.7,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.1,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.6,
+        )
+        pd.testing.assert_series_equal(
+            bufr_data.as_series(),
+            expected_bufr_variables.as_series(),
         )
 
+
     def test_get_bufr_stid_to_skip(self):
         l3_src_filepath = DATA_DIR.joinpath("tx_l3_test1.csv")
         l3_src = pd.read_csv(l3_src_filepath)
@@ -187,11 +217,10 @@ def test_get_bufr_stid_to_skip(self):
         # Newest measurement in DY2_hour: 2023-12-07 23:00:00
         latest_timestamps = {"DY2": datetime.datetime(2023, 12, 1)}
         now_timestamp = datetime.datetime(2023, 12, 6)
-        expected_file_hashes = {}
         mapping = self.get_station_configuration_mapping(
             stid, wmo_id="04464", export_bufr=False
         )
-        file_hashes = run_get_bufr(
+        bufr_data = run_get_bufr(
             l3_data=l3_src,
             now_timestamp=now_timestamp,
             latest_timestamps=latest_timestamps,
@@ -200,10 +229,7 @@ def test_get_bufr_stid_to_skip(self):
             time_limit="91d",
             station_configuration_mapping=mapping,
         )
-        self.assertDictEqual(
-            expected_file_hashes,
-            file_hashes,
-        )
+        self.assertIsNone(bufr_data)
 
     def test_get_bufr_has_no_data_newer_than_latests_timestamps(self):
         l3_src_filepath = DATA_DIR.joinpath("tx_l3_test1.csv")
@@ -212,10 +238,9 @@ def test_get_bufr_has_no_data_newer_than_latests_timestamps(self):
         # Newest measurement in DY2_hour: 2023-12-07 23:00:00
         latest_timestamps = {stid: datetime.datetime(2023, 12, 7, 23, 00)}
         now_timestamp = datetime.datetime(2023, 12, 8)
-        expected_file_hashes = {}
 
         mapping = self.get_station_configuration_mapping(stid, wmo_id="04464")
-        file_hashes = run_get_bufr(
+        bufr_data = run_get_bufr(
             l3_data=l3_src,
             now_timestamp=now_timestamp,
             latest_timestamps=latest_timestamps,
@@ -224,10 +249,7 @@ def test_get_bufr_has_no_data_newer_than_latests_timestamps(self):
             time_limit="91d",
             station_configuration_mapping=mapping,
         )
-        self.assertDictEqual(
-            expected_file_hashes,
-            file_hashes,
-        )
+        self.assertIsNone(bufr_data)
 
     def test_get_bufr_includes_datasets_not_in_latests_timestamps(self):
         l3_src_filepath = DATA_DIR.joinpath("tx_l3_test1.csv")
@@ -235,12 +257,8 @@ def test_get_bufr_includes_datasets_not_in_latests_timestamps(self):
         stid = "DY2"
         latest_timestamps = {}
         now_timestamp = datetime.datetime(2023, 12, 8)
-        expected_file_hashes = {
-            stid: "2b94d2ef611cfddb6dd537ca63d0ec4fb5d8e880943f81a6d5e724c042ac8971"
-        }
-
         mapping = self.get_station_configuration_mapping(stid, wmo_id="04464")
-        file_hashes = run_get_bufr(
+        bufr_data = run_get_bufr(
             l3_data=l3_src,
             now_timestamp=now_timestamp,
             latest_timestamps=latest_timestamps,
@@ -249,9 +267,27 @@ def test_get_bufr_includes_datasets_not_in_latests_timestamps(self):
             time_limit="91d",
             station_configuration_mapping=mapping,
         )
-        self.assertDictEqual(
-            expected_file_hashes,
-            file_hashes,
+
+        expected_bufr_variables = BUFRVariables(
+            wmo_id="04464",
+            station_type="mobile",
+            # Newest measurement in tx_l3_test1.csv: 2023-12-07 23:00:00
+            timestamp=datetime.datetime(2023, 12, 7, 23, 00),
+            relativeHumidity=69,
+            airTemperature=256.0,
+            pressure=77300.0,
+            windDirection=149,
+            windSpeed=14.9,
+            latitude=66.48249,
+            longitude=-46.29427,
+            heightOfStationGroundAboveMeanSeaLevel=2124.7,
+            heightOfBarometerAboveMeanSeaLevel=2124.7,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.1,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.6,
+        )
+        pd.testing.assert_series_equal(
+            bufr_data.as_series(),
+            expected_bufr_variables.as_series(),
         )
 
     def test_get_bufr_has_old_data_compared_to_now(self):
@@ -261,10 +297,9 @@ def test_get_bufr_has_old_data_compared_to_now(self):
         l3_src = pd.read_csv(l3_src_filepath)
         latest_timestamps = {stid: datetime.datetime(2023, 12, 6)}
         now_timestamp = datetime.datetime(2023, 12, 20)
-        expected_file_hashes = {}
 
         mapping = self.get_station_configuration_mapping(stid, wmo_id="04464")
-        file_hashes = run_get_bufr(
+        bufr_data = run_get_bufr(
             l3_data=l3_src,
             now_timestamp=now_timestamp,
             latest_timestamps=latest_timestamps,
@@ -273,10 +308,7 @@ def test_get_bufr_has_old_data_compared_to_now(self):
             time_limit="91d",
             station_configuration_mapping=mapping,
         )
-        self.assertDictEqual(
-            expected_file_hashes,
-            file_hashes,
-        )
+        self.assertIsNone(bufr_data)
 
     def test_invalid_value_at_last_index(self):
         stid = "DY2"
@@ -287,12 +319,8 @@ def test_invalid_value_at_last_index(self):
         l3_src.loc[140:, "p_i"] = np.nan
         latest_timestamps = {stid: datetime.datetime(2023, 12, 1)}
         now_timestamp = datetime.datetime(2023, 12, 8)
-        expected_file_hashes = {
-            stid: "bb951e0245ce3f6fe656b9bb5c85f097753a6969cc60b2cf8b34e0764495e627"
-        }
-
         mapping = self.get_station_configuration_mapping(stid, wmo_id="04464")
-        file_hashes = run_get_bufr(
+        bufr_data = run_get_bufr(
             l3_data=l3_src,
             now_timestamp=now_timestamp,
             latest_timestamps=latest_timestamps,
@@ -301,9 +329,26 @@ def test_invalid_value_at_last_index(self):
             time_limit="91d",
             station_configuration_mapping=mapping,
         )
-        self.assertDictEqual(
-            expected_file_hashes,
-            file_hashes,
+        expected_bufr_variables = BUFRVariables(
+            wmo_id="04464",
+            station_type="mobile",
+            # Newest measurement in tx_l3_test1.csv: 2023-12-07 23:00:00
+            timestamp=datetime.datetime(2023, 12, 7, 23, 00),
+            relativeHumidity=69,
+            airTemperature=256.0,
+            pressure=np.nan,
+            windDirection=149,
+            windSpeed=14.9,
+            latitude=66.48249,
+            longitude=-46.29427,
+            heightOfStationGroundAboveMeanSeaLevel=2124.7,
+            heightOfBarometerAboveMeanSeaLevel=2124.7,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.1,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.6,
+        )
+        pd.testing.assert_series_equal(
+            bufr_data.as_series(),
+            expected_bufr_variables.as_series(),
         )
 
     def test_multiple_last_valid_indices_all_instantaneous_timestamps_are_none(self):
@@ -324,10 +369,8 @@ def test_multiple_last_valid_indices_all_instantaneous_timestamps_are_none(self)
         ] = np.nan
         latest_timestamps = {stid: datetime.datetime(2023, 12, 1)}
         now_timestamp = datetime.datetime(2023, 12, 6)
-        expected_file_hashes = {}
-
         mapping = self.get_station_configuration_mapping(stid, wmo_id="04464")
-        file_hashes = run_get_bufr(
+        bufr_data = run_get_bufr(
             l3_data=l3_src,
             now_timestamp=now_timestamp,
             latest_timestamps=latest_timestamps,
@@ -336,10 +379,8 @@ def test_multiple_last_valid_indices_all_instantaneous_timestamps_are_none(self)
             time_limit="91d",
             station_configuration_mapping=mapping,
         )
-        self.assertDictEqual(
-            expected_file_hashes,
-            file_hashes,
-        )
+
+        self.assertIsNone(bufr_data)
 
     def test_multiple_last_valid_indices_all_older_than_2days(self):
         stid = "DY2"
@@ -350,10 +391,9 @@ def test_multiple_last_valid_indices_all_older_than_2days(self):
         l3_src.loc[140:, "p_i"] = np.nan
         latest_timestamps = {stid: datetime.datetime(2023, 12, 1)}
         now_timestamp = datetime.datetime(2023, 12, 10)
-        expected_file_hashes = {}
 
         mapping = self.get_station_configuration_mapping(stid, wmo_id="04464")
-        file_hashes = run_get_bufr(
+        bufr_data = run_get_bufr(
             l3_data=l3_src,
             now_timestamp=now_timestamp,
             latest_timestamps=latest_timestamps,
@@ -362,10 +402,7 @@ def test_multiple_last_valid_indices_all_older_than_2days(self):
             time_limit="91d",
             station_configuration_mapping=mapping,
         )
-        self.assertDictEqual(
-            expected_file_hashes,
-            file_hashes,
-        )
+        self.assertIsNone(bufr_data)
 
     def test_min_data_wx_failed(self):
         l3_src_filepath = DATA_DIR.joinpath("tx_l3_test1.csv")
@@ -375,9 +412,8 @@ def test_min_data_wx_failed(self):
         # Newest measurement in DY2_hour: 2023-12-07 23:00:00
         latest_timestamps = {"DY2": datetime.datetime(2023, 12, 1)}
         now_timestamp = datetime.datetime(2023, 12, 6)
-        expected_file_hashes = {}
         mapping = self.get_station_configuration_mapping(stid, wmo_id="04464")
-        file_hashes = run_get_bufr(
+        bufr_data = run_get_bufr(
             l3_data=l3_src,
             now_timestamp=now_timestamp,
             latest_timestamps=latest_timestamps,
@@ -386,10 +422,8 @@ def test_min_data_wx_failed(self):
             time_limit="91d",
             station_configuration_mapping=mapping,
         )
-        self.assertDictEqual(
-            expected_file_hashes,
-            file_hashes,
-        )
+
+        self.assertIsNone(bufr_data)
 
     def test_min_data_pos_failed(self):
         l3_src_filepath = DATA_DIR.joinpath("tx_l3_test1.csv")
@@ -399,9 +433,8 @@ def test_min_data_pos_failed(self):
         # Newest measurement in DY2_hour: 2023-12-07 23:00:00
         latest_timestamps = {"DY2": datetime.datetime(2023, 12, 1)}
         now_timestamp = datetime.datetime(2023, 12, 6)
-        expected_file_hashes = {}
         mapping = self.get_station_configuration_mapping(stid, wmo_id="04464")
-        file_hashes = run_get_bufr(
+        bufr_data = run_get_bufr(
             l3_data=l3_src,
             now_timestamp=now_timestamp,
             latest_timestamps=latest_timestamps,
@@ -410,10 +443,7 @@ def test_min_data_pos_failed(self):
             time_limit="91d",
             station_configuration_mapping=mapping,
         )
-        self.assertDictEqual(
-            expected_file_hashes,
-            file_hashes,
-        )
+        self.assertIsNone(bufr_data)
 
     def test_ignore_newer_data_than_now_input(self):
         l3_src_filepath = DATA_DIR.joinpath("tx_l3_test1.csv")
@@ -427,11 +457,8 @@ def test_ignore_newer_data_than_now_input(self):
             12,
             6,
         )
-        expected_file_hashes = {
-            stid: "976a24edef2d0e6e2f29fa13d6242419fa05b24905db715fe351c19a1aa1d577"
-        }
         mapping = self.get_station_configuration_mapping(stid, wmo_id="04464")
-        file_hashes = run_get_bufr(
+        bufr_data = run_get_bufr(
             l3_data=l3_src,
             now_timestamp=now_timestamp,
             latest_timestamps=latest_timestamps,
@@ -440,9 +467,26 @@ def test_ignore_newer_data_than_now_input(self):
             time_limit="91d",
             station_configuration_mapping=mapping,
         )
-        self.assertDictEqual(
-            expected_file_hashes,
-            file_hashes,
+        expected_bufr_variables = BUFRVariables(
+            wmo_id="04464",
+            station_type="mobile",
+            # Newest measurement in tx_l3_test1.csv: 2023-12-07 23:00:00 but now_timestamp is 2023-12-06
+            timestamp=datetime.datetime(2023, 12, 6, 0, 0),
+            relativeHumidity=82,
+            airTemperature=250.8,
+            pressure=77370.0,
+            windDirection=153,
+            windSpeed=10.4,
+            latitude=66.48249,
+            longitude=-46.29426,
+            heightOfStationGroundAboveMeanSeaLevel=2124.3,
+            heightOfBarometerAboveMeanSeaLevel=2124.3,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.1,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.6,
+        )
+        pd.testing.assert_series_equal(
+            bufr_data.as_series(),
+            expected_bufr_variables.as_series(),
         )
 
     def test_land_station_export(self):
@@ -452,14 +496,10 @@ def test_land_station_export(self):
         # Newest measurement in DY2_hour: 2023-12-07 23:00:00
         latest_timestamps = {"WEG_B": datetime.datetime(2023, 12, 1)}
         now_timestamp = datetime.datetime(2023, 12, 8)
-        expected_file_hashes = {
-            stid: "eb42044f38326a295bcd18bd42fba5ed88800c5a688f885b87147aacaa5f5001"
-        }
-
         mapping = self.get_station_configuration_mapping(
             stid, wmo_id="460", station_type="land"
         )
-        file_hashes = run_get_bufr(
+        bufr_data = run_get_bufr(
             l3_data=l3_src,
             now_timestamp=now_timestamp,
             latest_timestamps=latest_timestamps,
@@ -468,7 +508,24 @@ def test_land_station_export(self):
             time_limit="91d",
             station_configuration_mapping=mapping,
         )
-        self.assertDictEqual(
-            expected_file_hashes,
-            file_hashes,
+        expected_bufr_variables = BUFRVariables(
+            wmo_id="460",
+            station_type="land",
+            # Newest measurement in tx_l3_test1.csv: 2023-12-07 23:00:00
+            timestamp=datetime.datetime(2023, 12, 7, 23, 00),
+            relativeHumidity=69,
+            airTemperature=256.0,
+            pressure=77300.0,
+            windDirection=149,
+            windSpeed=14.9,
+            latitude=66.48249,
+            longitude=-46.29427,
+            heightOfStationGroundAboveMeanSeaLevel=2124.7,
+            heightOfBarometerAboveMeanSeaLevel=2124.7,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.1,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.6,
+        )
+        pd.testing.assert_series_equal(
+            bufr_data.as_series(),
+            expected_bufr_variables.as_series(),
         )

From 76070b0f14ac92aec766f0384f81565f93186a17 Mon Sep 17 00:00:00 2001
From: Mads Christian Lund <maclu@geus.dk>
Date: Tue, 11 Jun 2024 08:32:02 +0200
Subject: [PATCH 02/16] Added __all__ to get_bufr.py

---
 src/pypromice/postprocess/get_bufr.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/pypromice/postprocess/get_bufr.py b/src/pypromice/postprocess/get_bufr.py
index 0ad4b3a6..f92bbabf 100644
--- a/src/pypromice/postprocess/get_bufr.py
+++ b/src/pypromice/postprocess/get_bufr.py
@@ -23,6 +23,14 @@
 from pypromice.postprocess.bufr_utilities import write_bufr_message, BUFRVariables
 from pypromice.postprocess.real_time_utilities import get_latest_data
 
+__all__ = [
+    'get_bufr',
+    'main',
+    'DEFAULT_STATION_CONFIGURATION_PATH',
+    'DEFAULT_POSITION_SEED_PATH',
+    'DEFAULT_LIN_REG_TIME_LIMIT',
+]
+
 logger = logging.getLogger(__name__)
 
 DEFAULT_STATION_CONFIGURATION_PATH = Path(__file__).parent.joinpath(

From fdc14071425a5843044b83c6754c39289ffbe7c1 Mon Sep 17 00:00:00 2001
From: Mads Christian Lund <maclu@geus.dk>
Date: Fri, 21 Jun 2024 12:08:19 +0200
Subject: [PATCH 03/16] Applied black code formatting

---
 src/pypromice/postprocess/bufr_utilities.py   |  2 ++
 src/pypromice/postprocess/get_bufr.py         | 30 +++++++++++--------
 .../postprocess/real_time_utilities.py        |  4 +--
 3 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/src/pypromice/postprocess/bufr_utilities.py b/src/pypromice/postprocess/bufr_utilities.py
index d53f2733..e189c4fe 100644
--- a/src/pypromice/postprocess/bufr_utilities.py
+++ b/src/pypromice/postprocess/bufr_utilities.py
@@ -45,6 +45,7 @@ def round(value: float):
 
     return round
 
+
 # Enforce precision
 # Note the sensor accuracies listed here:
 # https://essd.copernicus.org/articles/13/3819/2021/#section8
@@ -64,6 +65,7 @@ class BUFRVariables:
     * heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD: Corresponds to "#7#heightOfSensorAboveLocalGroundOrDeckOfMarinePlatform" which is height if anemometer relative to ground or deck of marine platform.
 
     """
+
     wmo_id: str
     station_type: str
     timestamp: datetime.datetime
diff --git a/src/pypromice/postprocess/get_bufr.py b/src/pypromice/postprocess/get_bufr.py
index f92bbabf..433f94e2 100644
--- a/src/pypromice/postprocess/get_bufr.py
+++ b/src/pypromice/postprocess/get_bufr.py
@@ -24,11 +24,11 @@
 from pypromice.postprocess.real_time_utilities import get_latest_data
 
 __all__ = [
-    'get_bufr',
-    'main',
-    'DEFAULT_STATION_CONFIGURATION_PATH',
-    'DEFAULT_POSITION_SEED_PATH',
-    'DEFAULT_LIN_REG_TIME_LIMIT',
+    "get_bufr",
+    "main",
+    "DEFAULT_STATION_CONFIGURATION_PATH",
+    "DEFAULT_POSITION_SEED_PATH",
+    "DEFAULT_LIN_REG_TIME_LIMIT",
 ]
 
 logger = logging.getLogger(__name__)
@@ -39,6 +39,7 @@
 DEFAULT_POSITION_SEED_PATH = Path(__file__).parent.joinpath("positions_seed.csv")
 DEFAULT_LIN_REG_TIME_LIMIT = "91d"
 
+
 def parse_arguments_bufr() -> argparse.ArgumentParser:
     parser = argparse.ArgumentParser()
 
@@ -109,10 +110,10 @@ def parse_arguments_bufr() -> argparse.ArgumentParser:
     )
 
     parser.add_argument(
-        '--latest_timestamp',
+        "--latest_timestamp",
         default=datetime.utcnow(),
         type=pd.Timestamp,
-        help="Timestamp used to determine latest data. Default utcnow."
+        help="Timestamp used to determine latest data. Default utcnow.",
     )
 
     parser.add_argument("--verbose", "-v", default=False, action="store_true")
@@ -503,31 +504,32 @@ def get_bufr_variables(
         heightOfStationGroundAboveMeanSeaLevel = np.nan
     else:
         heightOfStationGroundAboveMeanSeaLevel = (
-                data["gps_alt_fit"] - station_configuration.height_of_gps_from_station_ground
+            data["gps_alt_fit"]
+            - station_configuration.height_of_gps_from_station_ground
         )
 
     if station_configuration.temperature_from_sonic_ranger is None:
         heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH = np.nan
     else:
         heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH = (
-                data["z_boom_u_smooth"]+ station_configuration.temperature_from_sonic_ranger
+            data["z_boom_u_smooth"]
+            + station_configuration.temperature_from_sonic_ranger
         )
 
     if station_configuration.anemometer_from_sonic_ranger is None:
         heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD = np.nan
     else:
         heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD = (
-                data["z_boom_u_smooth"] + station_configuration.anemometer_from_sonic_ranger
+            data["z_boom_u_smooth"] + station_configuration.anemometer_from_sonic_ranger
         )
 
     if station_configuration.barometer_from_gps is None:
         heightOfBarometerAboveMeanSeaLevel = np.nan
     else:
         heightOfBarometerAboveMeanSeaLevel = (
-                data["gps_alt_fit"] + station_configuration.barometer_from_gps
+            data["gps_alt_fit"] + station_configuration.barometer_from_gps
         )
 
-
     output_row = BUFRVariables(
         wmo_id=station_configuration.wmo_id,
         station_type=station_configuration.station_type,
@@ -604,6 +606,7 @@ def min_data_check(s):
 
     return min_data_wx_result, min_data_pos_result
 
+
 def main():
     args = parse_arguments_bufr().parse_args()
 
@@ -637,5 +640,6 @@ def main():
         positions_seed_path=args.position_seed,
     )
 
+
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/src/pypromice/postprocess/real_time_utilities.py b/src/pypromice/postprocess/real_time_utilities.py
index 952a69d8..bbc0b337 100644
--- a/src/pypromice/postprocess/real_time_utilities.py
+++ b/src/pypromice/postprocess/real_time_utilities.py
@@ -166,9 +166,7 @@ def find_positions(df, time_limit):
                 df_limited[f"{k}_fit"] = df.loc[df_limited.index, f"{k}_fit"]
             else:
                 logger.info(f"----> No data exists for {k}. Stubbing out with NaN.")
-                df_limited[f"{k}_fit"] = pd.Series(
-                    np.nan, index=df_limited.index
-                )
+                df_limited[f"{k}_fit"] = pd.Series(np.nan, index=df_limited.index)
 
     return df_limited
 

From 6ff1afa0c2cc15f56780ae70576737d78ce2e3c1 Mon Sep 17 00:00:00 2001
From: Mads Christian Lund <maclu@geus.dk>
Date: Tue, 11 Jun 2024 08:33:21 +0200
Subject: [PATCH 04/16] Made bufr_to_csv as cli script in setup.py

* Updated read_bufr_file to use wmo_id as index
---
 setup.py                                    | 1 +
 src/pypromice/postprocess/bufr_to_csv.py    | 7 ++++++-
 src/pypromice/postprocess/bufr_utilities.py | 5 +++--
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/setup.py b/setup.py
index 52a9b216..3e39237b 100644
--- a/setup.py
+++ b/setup.py
@@ -47,6 +47,7 @@
         'get_l2tol3 = pypromice.process.get_l2tol3:main',
         'get_watsontx = pypromice.tx.get_watsontx:get_watsontx',
         'get_bufr = pypromice.postprocess.get_bufr:main',
+        'bufr_to_csv = pypromice.postprocess.bufr_to_csv:main',
         'get_msg = pypromice.tx.get_msg:get_msg'
     ],
 },
diff --git a/src/pypromice/postprocess/bufr_to_csv.py b/src/pypromice/postprocess/bufr_to_csv.py
index 788aef39..d80f99a3 100644
--- a/src/pypromice/postprocess/bufr_to_csv.py
+++ b/src/pypromice/postprocess/bufr_to_csv.py
@@ -3,9 +3,14 @@
 
 from pypromice.postprocess.bufr_utilities import read_bufr_file
 
-if __name__ == "__main__":
+
+def main():
     parser = argparse.ArgumentParser("BUFR to CSV converter")
     parser.add_argument("path", type=Path)
     args = parser.parse_args()
 
     print(read_bufr_file(args.path).to_csv())
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/pypromice/postprocess/bufr_utilities.py b/src/pypromice/postprocess/bufr_utilities.py
index e189c4fe..8293ac86 100644
--- a/src/pypromice/postprocess/bufr_utilities.py
+++ b/src/pypromice/postprocess/bufr_utilities.py
@@ -487,5 +487,6 @@ def read_bufr_file(path: PathLike) -> pd.DataFrame:
             message_vars = read_bufr_message(fp)
             if message_vars is None:
                 break
-            lines.append(message_vars)
-    return pd.DataFrame(lines).rename_axis("message_index")
+            lines.append(message_vars.as_series())
+    data_frame = pd.DataFrame(lines).set_index("wmo_id")
+    return data_frame

From eb5ec232ce45702eac420478b1de86569835dcaf Mon Sep 17 00:00:00 2001
From: Mads Christian Lund <maclu@geus.dk>
Date: Tue, 11 Jun 2024 08:34:19 +0200
Subject: [PATCH 05/16] Added script to recreate bufr files

* Added corresponding unit tests
* Added flag to raise exceptions on errors
* Added create_bufr_files.py to setup
---
 setup.py                                      |   1 +
 .../postprocess/create_bufr_files.py          | 152 ++++++++++++++
 src/pypromice/postprocess/get_bufr.py         |   6 +
 .../postprocess/real_time_utilities.py        |   4 +
 .../bufr_export/test_create_bufr_files.py     | 187 ++++++++++++++++++
 5 files changed, 350 insertions(+)
 create mode 100644 src/pypromice/postprocess/create_bufr_files.py
 create mode 100644 tests/unit/bufr_export/test_create_bufr_files.py

diff --git a/setup.py b/setup.py
index 3e39237b..db9f6365 100644
--- a/setup.py
+++ b/setup.py
@@ -47,6 +47,7 @@
         'get_l2tol3 = pypromice.process.get_l2tol3:main',
         'get_watsontx = pypromice.tx.get_watsontx:get_watsontx',
         'get_bufr = pypromice.postprocess.get_bufr:main',
+        'create_bufr_files = pypromice.postprocess.create_bufr_files:main',
         'bufr_to_csv = pypromice.postprocess.bufr_to_csv:main',
         'get_msg = pypromice.tx.get_msg:get_msg'
     ],
diff --git a/src/pypromice/postprocess/create_bufr_files.py b/src/pypromice/postprocess/create_bufr_files.py
new file mode 100644
index 00000000..01a26770
--- /dev/null
+++ b/src/pypromice/postprocess/create_bufr_files.py
@@ -0,0 +1,152 @@
+import logging
+from pathlib import Path
+from typing import Sequence, List
+
+import pandas as pd
+
+from pypromice.postprocess.get_bufr import (
+    get_bufr,
+    DEFAULT_LIN_REG_TIME_LIMIT,
+    DEFAULT_STATION_CONFIGURATION_PATH,
+    DEFAULT_POSITION_SEED_PATH,
+)
+
+main_logger = logging.getLogger(__name__)
+
+
+def create_bufr_files(
+    input_files: Sequence[Path],
+    period_start: str,
+    period_end: str,
+    output_root: Path,
+    override: bool,
+    break_on_error: bool = False,
+    output_filename_suffix: str = "geus_",
+):
+    """
+    Generate hourly bufr files from the for all input files
+
+    :param input_files: Paths to csv l3 hourly data files
+    :param period_start: Datetime string for period start. Eg '2024-01-01T00:00' or '20240101
+    :param period_end: Datetime string for period end
+    :param output_root: Output dir for both bufr files for individual stations and compiled. Organized in two sub directories.
+    :param override: If False: Skip a period if the compiled output file exists.
+    :param break_on_error: If True: Stop processing if an error occurs
+    :return:
+    """
+    periods = pd.date_range(period_start, period_end, freq="H")
+    output_individual_root = output_root / "individual"
+    output_compiled_root = output_root / "compiled"
+    output_individual_root.mkdir(parents=True, exist_ok=True)
+    output_compiled_root.mkdir(parents=True, exist_ok=True)
+
+    for period in periods:
+        period: pd.Timestamp
+        date_str = period.strftime("%Y%m%dT%H%M")
+        main_logger.info(f"Processing {date_str}")
+        output_dir_path = output_individual_root / f"{date_str}"
+        output_file_path = (
+            output_compiled_root / f"{output_filename_suffix}{date_str}.bufr"
+        )
+
+        main_logger.info(f"{period}, {date_str}")
+        if override or not output_file_path.exists():
+            get_bufr(
+                bufr_out=output_dir_path,
+                input_files=input_files,
+                store_positions=False,
+                positions_filepath=None,
+                time_limit=DEFAULT_LIN_REG_TIME_LIMIT,
+                timestamps_pickle_filepath=None,
+                now_timestamp=period,
+                station_configuration_path=DEFAULT_STATION_CONFIGURATION_PATH,
+                positions_seed_path=DEFAULT_POSITION_SEED_PATH,
+                break_on_error=break_on_error,
+            )
+
+            with output_file_path.open("wb") as fp_dst:
+                for src_path in output_dir_path.glob("*.bufr"):
+                    with src_path.open("rb") as fp_src:
+                        fp_dst.write(fp_src.read())
+        else:
+            main_logger.info(f"Output file exists. Skipping {output_file_path}")
+
+
+# %%
+
+def main():
+    import argparse
+    import glob
+    import sys
+
+    logger_format_string = "%(asctime)s; %(levelname)s; %(name)s; %(message)s"
+    logging.basicConfig(
+        level=logging.ERROR,
+        stream=sys.stdout,
+        format=logger_format_string,
+    )
+
+    main_handler = logging.StreamHandler(sys.stdout)
+    main_handler.setLevel(logging.INFO)
+    formatter = logging.Formatter(logger_format_string)
+    main_handler.setFormatter(formatter)
+    main_logger.addHandler(main_handler)
+    main_logger.setLevel(logging.INFO)
+
+    parser = argparse.ArgumentParser(
+        "Create BUFR files from L3 tx .csv files."
+    )
+    parser.add_argument(
+        "--input_files",
+        "--l3-filepath",
+        "-i",
+        type=Path,
+        nargs="+",
+        required=True,
+        help="Path to L3 tx .csv files. Can be direct paths or glob patterns",
+    )
+    parser.add_argument(
+        "--period_start",
+        "-s",
+        required=True,
+        help="Datetime string for period start. Eg '2024-01-01T00:00' or '20240101",
+    )
+    parser.add_argument(
+        "--period_end", "-e", required=True, help="Datetime string for period end"
+    )
+    parser.add_argument(
+        "--output_root",
+        "-o",
+        required=True,
+        type=Path,
+        help="Output dir for both bufr files for individual stations and compiled. Organized in two sub directories.",
+    )
+    parser.add_argument(
+        "--override",
+        "-f",
+        default=False,
+        action="store_true",
+        help="Recreate and overide existing output files",
+    )
+    args = parser.parse_args()
+
+    # Interpret all input file paths as glob patterns if they don't exist
+    input_files: List[Path] = list()
+    for path in args.input_files:
+        if path.exists():
+            input_files.append(path)
+        else:
+            # The input path might be a glob pattern
+            input_files += map(Path, glob.glob(path.as_posix()))
+
+    main_logger.info(f"Processing {len(input_files)} input files")
+    create_bufr_files(
+        input_files=input_files,
+        period_start=args.period_start,
+        period_end=args.period_end,
+        output_root=args.output_root,
+        override=args.override,
+    )
+
+if __name__ == "__main__":
+    main()
diff --git a/src/pypromice/postprocess/get_bufr.py b/src/pypromice/postprocess/get_bufr.py
index 433f94e2..1c5fbe09 100644
--- a/src/pypromice/postprocess/get_bufr.py
+++ b/src/pypromice/postprocess/get_bufr.py
@@ -300,6 +300,7 @@ def get_bufr(
     earliest_timestamp: datetime = None,
     store_positions: bool = False,
     time_limit: str = "91d",
+    break_on_error: bool = False,
 ):
     """
     Main function for generating BUFR files and determine latest positions from a sequence of csv files
@@ -330,6 +331,8 @@ def get_bufr(
         Flag determine if latest positions are exported.
     time_limit
         Previous time to limit dataframe before applying linear regression.
+    break_on_error
+        If True, the function will raise an exception if an error occurs during processing.
 
     """
     if now_timestamp is None:
@@ -378,6 +381,7 @@ def get_bufr(
 
     # Iterate through csv files
     for file_path in input_files:
+        # TODO: This split is explicitly requiring the filename to have sampleate at suffix. This shuld be more robust
         stid = file_path.stem.rsplit("_", 1)[0]
         logger.info("####### Processing {} #######".format(stid))
 
@@ -405,6 +409,8 @@ def get_bufr(
             )
         except Exception:
             logger.exception(f"Failed processing {stid}")
+            if break_on_error:
+                raise
             continue
 
         if station_position is None:
diff --git a/src/pypromice/postprocess/real_time_utilities.py b/src/pypromice/postprocess/real_time_utilities.py
index bbc0b337..17efac92 100644
--- a/src/pypromice/postprocess/real_time_utilities.py
+++ b/src/pypromice/postprocess/real_time_utilities.py
@@ -66,6 +66,10 @@ def get_latest_data(
         lin_reg_time_limit,
     )
 
+    if last_valid_index not in df_limited.index:
+        logger.info("No valid data limited period")
+        return None
+
     # Apply smoothing to z_boom_u
     # require at least 2 hourly obs? Sometimes seeing once/day data for z_boom_u
     df_limited = rolling_window(df_limited, "z_boom_u", "72H", 2, 1)
diff --git a/tests/unit/bufr_export/test_create_bufr_files.py b/tests/unit/bufr_export/test_create_bufr_files.py
new file mode 100644
index 00000000..0b209fbf
--- /dev/null
+++ b/tests/unit/bufr_export/test_create_bufr_files.py
@@ -0,0 +1,187 @@
+from pathlib import Path
+from tempfile import TemporaryDirectory
+from typing import Optional
+from unittest import TestCase
+
+from pypromice.postprocess.create_bufr_files import create_bufr_files
+
+DATA_DIR = Path(__file__).parent.absolute()
+
+
+def create_data_file(path: Path, src_path: Optional[Path] = None):
+    if src_path is None:
+        src_path = Path("/dev/null")
+
+    path.parent.mkdir(exist_ok=True, parents=True)
+    with src_path.open() as fp_src:
+        with path.open("w") as fp_out:
+            fp_out.write(fp_src.read())
+
+
+class TestCreateBufrFiles(TestCase):
+    def setUp(self):
+        self._temp_dir = TemporaryDirectory()
+        self.temp_dir = Path(self._temp_dir.name)
+
+    def tearDown(self):
+        self._temp_dir.cleanup()
+
+    def test_create_bufr_files(self):
+        """
+        Teste the creation of bufr files and their output folder structure.
+        It does not test the content of the bufr files.
+        """
+        input_dir = self.temp_dir / "input"
+        output_dir = self.temp_dir / "output"
+        input_files = [
+            input_dir / "THU_L2_hourly.csv",
+            input_dir / "KAN_Lv3_hourly.csv",
+        ]
+        # Use the same data for all input files
+        for input_file in input_files:
+            create_data_file(
+                input_file,
+                src_path=DATA_DIR.joinpath("tx_l3_test1.csv"),
+            )
+
+        create_bufr_files(
+            input_files=input_files,
+            period_start="2023-12-06T00:00",
+            period_end="2023-12-06T04:00",
+            output_root=output_dir,
+            override=True,
+            break_on_error=True,
+        )
+
+        compiled_output_dir = output_dir / "compiled"
+        individual_output_root = output_dir / "individual"
+        self.assertTrue(compiled_output_dir.exists())
+        self.assertTrue(individual_output_root.exists())
+        expected_output_timestamps = [
+            "20231206T0000",
+            "20231206T0100",
+            "20231206T0200",
+            "20231206T0300",
+            "20231206T0400",
+        ]
+        compiled_output_files = sorted(compiled_output_dir.glob("*.bufr"))
+        expected_output_file_names = sorted(
+            [
+                f"geus_{timestamp_str}.bufr"
+                for timestamp_str in expected_output_timestamps
+            ]
+        )
+        self.assertListEqual(
+            expected_output_file_names, [p.name for p in compiled_output_files]
+        )
+        individual_output_dirs = sorted(individual_output_root.glob("*"))
+        self.assertListEqual(
+            expected_output_timestamps, [p.stem for p in individual_output_dirs]
+        )
+        for dir in individual_output_dirs:
+            # There should be a bufr file for each station
+            self.assertTrue((dir / "THU_L2.bufr").exists())
+            self.assertTrue((dir / "KAN_Lv3.bufr").exists())
+
+    def test_get_bufr_from_empty_data_file_raises_error(self):
+        input_dir = self.temp_dir / "input"
+        output_dir = self.temp_dir / "output"
+        input_file = input_dir / "THU_L2_hourly.csv"
+        create_data_file(input_file, src_path=None)
+
+        with self.assertRaises(ValueError):
+            create_bufr_files(
+                input_files=[input_file],
+                period_start="2023-12-06T00:00",
+                period_end="2023-12-06T04:00",
+                output_root=output_dir,
+                override=True,
+                break_on_error=True,
+            )
+
+    def test_get_gufr_continues_when_break_on_error_is_false(self):
+        input_dir = self.temp_dir / "input"
+        output_dir = self.temp_dir / "output"
+        input_file_without_data = input_dir / "THU_L2_hourly.csv"
+        create_data_file(input_file_without_data, src_path=None)
+        input_file_with_data = input_dir / "KAN_Lv3_hourly.csv"
+        create_data_file(
+            input_file_with_data, src_path=DATA_DIR.joinpath("tx_l3_test1.csv")
+        )
+        compiled_output_dir = output_dir / "compiled"
+        individual_output_root = output_dir / "individual"
+        expected_compiled_output_file = compiled_output_dir / "geus_20231206T0000.bufr"
+        expected_individual_output_dir = individual_output_root / "20231206T0000"
+        expected_individual_output_file = (
+            expected_individual_output_dir / "KAN_Lv3.bufr"
+        )
+
+        create_bufr_files(
+            input_files=[
+                input_file_without_data,
+                input_file_with_data,
+            ],
+            period_start="2023-12-06T00:00",
+            period_end="2023-12-06T00:00",
+            output_root=output_dir,
+            override=True,
+            break_on_error=False,
+        )
+
+        self.assertTrue(expected_compiled_output_file.exists())
+        # There should only be a single output file since the first input file is empty
+        self.assertEqual(1, len(list(expected_individual_output_dir.glob("*"))))
+        self.assertTrue(expected_individual_output_file.exists())
+        individual_data = expected_individual_output_file.read_bytes()
+        compiled_data = expected_compiled_output_file.read_bytes()
+        self.assertEqual(
+            individual_data,
+            compiled_data,
+        )
+
+    def test_get_bufr_where_period_does_not_exist(self):
+        input_dir = self.temp_dir / "input"
+        output_dir = self.temp_dir / "output"
+        input_file = input_dir / "THU_L2_hourly.csv"
+        create_data_file(input_file, src_path=DATA_DIR.joinpath("tx_l3_test1.csv"))
+
+        create_bufr_files(
+            input_files=[input_file],
+            period_start="2025-12-06T00:00",
+            period_end="2025-12-06T04:00",
+            output_root=output_dir,
+            override=True,
+            break_on_error=True,
+        )
+
+        compiled_output_dir = output_dir / "compiled"
+        individual_output_root = output_dir / "individual"
+        self.assertTrue(compiled_output_dir.exists())
+        self.assertTrue(individual_output_root.exists())
+        expected_output_timestamps = [
+            "20251206T0000",
+            "20251206T0100",
+            "20251206T0200",
+            "20251206T0300",
+            "20251206T0400",
+        ]
+        compiled_output_files = sorted(compiled_output_dir.glob("*.bufr"))
+        expected_output_file_names = sorted(
+            [
+                f"geus_{timestamp_str}.bufr"
+                for timestamp_str in expected_output_timestamps
+            ]
+        )
+        self.assertListEqual(
+            expected_output_file_names, [p.name for p in compiled_output_files]
+        )
+        for file in compiled_output_files:
+            # The compiled bufr files should be empty
+            self.assertEqual(0, file.stat().st_size)
+        individual_output_dirs = sorted(individual_output_root.glob("*"))
+        self.assertListEqual(
+            expected_output_timestamps, [p.stem for p in individual_output_dirs]
+        )
+        for dir in individual_output_dirs:
+            # There should be no bufr files in the individual directories
+            self.assertEqual(0, len(list(dir.glob("*.bufr"))))

From 40547e8031355c1e06b1937b5530ba333c5b846f Mon Sep 17 00:00:00 2001
From: Mads Christian Lund <maclu@geus.dk>
Date: Tue, 11 Jun 2024 15:28:54 +0200
Subject: [PATCH 06/16] Updated tests parameters

Updated station config:
* Added sonic_ranger_from_gps
* Changed height_of_gps_from_station_ground from 0 to 1
---
 .../bufr_export/test_get_bufr_integration.py  | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/tests/unit/bufr_export/test_get_bufr_integration.py b/tests/unit/bufr_export/test_get_bufr_integration.py
index 541962cd..f7b3cde4 100644
--- a/tests/unit/bufr_export/test_get_bufr_integration.py
+++ b/tests/unit/bufr_export/test_get_bufr_integration.py
@@ -96,7 +96,6 @@ def run_get_bufr(
             return read_bufr_message(fp)
 
 
-
 class PreRefactoringBufrTestCase(TestCase):
     @staticmethod
     def get_station_configuration_mapping(
@@ -107,7 +106,8 @@ def get_station_configuration_mapping(
         barometer_from_gps: float = 0.0,
         anemometer_from_sonic_ranger: float = 0.4,
         temperature_from_sonic_ranger: float = -0.1,
-        height_of_gps_from_station_ground: float = 0.0,
+        height_of_gps_from_station_ground: float = 1.0,
+        sonic_ranger_from_gps: float = 1.5,
         skipped_variables=(),
         comment=None,
         export_bufr=True,
@@ -122,6 +122,7 @@ def get_station_configuration_mapping(
                 barometer_from_gps=barometer_from_gps,
                 anemometer_from_sonic_ranger=anemometer_from_sonic_ranger,
                 temperature_from_sonic_ranger=temperature_from_sonic_ranger,
+                sonic_ranger_from_gps=sonic_ranger_from_gps,
                 height_of_gps_from_station_ground=height_of_gps_from_station_ground,
                 skipped_variables=skipped_variables,
                 comment=comment,
@@ -159,7 +160,7 @@ def test_get_bufr_has_new_data(self):
             windSpeed=14.9,
             latitude=66.48249,
             longitude=-46.29427,
-            heightOfStationGroundAboveMeanSeaLevel=2124.7,
+            heightOfStationGroundAboveMeanSeaLevel=2123.7,
             heightOfBarometerAboveMeanSeaLevel=2124.7,
             heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.1,
             heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.6,
@@ -169,7 +170,6 @@ def test_get_bufr_has_new_data(self):
             expected_bufr_variables.as_series(),
         )
 
-
     def test_get_bufr_has_new_data_dont_store_position(self):
         l3_src_filepath = DATA_DIR.joinpath("tx_l3_test1.csv")
         l3_src = pd.read_csv(l3_src_filepath)
@@ -199,7 +199,7 @@ def test_get_bufr_has_new_data_dont_store_position(self):
             windSpeed=14.9,
             latitude=66.48249,
             longitude=-46.29427,
-            heightOfStationGroundAboveMeanSeaLevel=2124.7,
+            heightOfStationGroundAboveMeanSeaLevel=2123.7,
             heightOfBarometerAboveMeanSeaLevel=2124.7,
             heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.1,
             heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.6,
@@ -209,7 +209,6 @@ def test_get_bufr_has_new_data_dont_store_position(self):
             expected_bufr_variables.as_series(),
         )
 
-
     def test_get_bufr_stid_to_skip(self):
         l3_src_filepath = DATA_DIR.joinpath("tx_l3_test1.csv")
         l3_src = pd.read_csv(l3_src_filepath)
@@ -280,7 +279,7 @@ def test_get_bufr_includes_datasets_not_in_latests_timestamps(self):
             windSpeed=14.9,
             latitude=66.48249,
             longitude=-46.29427,
-            heightOfStationGroundAboveMeanSeaLevel=2124.7,
+            heightOfStationGroundAboveMeanSeaLevel=2123.7,
             heightOfBarometerAboveMeanSeaLevel=2124.7,
             heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.1,
             heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.6,
@@ -341,7 +340,7 @@ def test_invalid_value_at_last_index(self):
             windSpeed=14.9,
             latitude=66.48249,
             longitude=-46.29427,
-            heightOfStationGroundAboveMeanSeaLevel=2124.7,
+            heightOfStationGroundAboveMeanSeaLevel=2123.7,
             heightOfBarometerAboveMeanSeaLevel=2124.7,
             heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.1,
             heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.6,
@@ -479,7 +478,7 @@ def test_ignore_newer_data_than_now_input(self):
             windSpeed=10.4,
             latitude=66.48249,
             longitude=-46.29426,
-            heightOfStationGroundAboveMeanSeaLevel=2124.3,
+            heightOfStationGroundAboveMeanSeaLevel=2123.3,
             heightOfBarometerAboveMeanSeaLevel=2124.3,
             heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.1,
             heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.6,
@@ -520,7 +519,7 @@ def test_land_station_export(self):
             windSpeed=14.9,
             latitude=66.48249,
             longitude=-46.29427,
-            heightOfStationGroundAboveMeanSeaLevel=2124.7,
+            heightOfStationGroundAboveMeanSeaLevel=2123.7,
             heightOfBarometerAboveMeanSeaLevel=2124.7,
             heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.1,
             heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.6,

From b37fbf46ab660f5ceb168181845c34f92bf02f81 Mon Sep 17 00:00:00 2001
From: Mads Christian Lund <maclu@geus.dk>
Date: Fri, 21 Jun 2024 13:27:27 +0200
Subject: [PATCH 07/16] Added test for missing data in get_bufr

- Ensure get_bufr_variables raises AttributeError when station dimensions are missing
---
 src/pypromice/postprocess/get_bufr.py   | 33 ++++++++++++---------
 tests/unit/bufr_export/test_get_bufr.py | 39 +++++++------------------
 2 files changed, 30 insertions(+), 42 deletions(-)

diff --git a/src/pypromice/postprocess/get_bufr.py b/src/pypromice/postprocess/get_bufr.py
index 1c5fbe09..e6e248f1 100644
--- a/src/pypromice/postprocess/get_bufr.py
+++ b/src/pypromice/postprocess/get_bufr.py
@@ -492,7 +492,13 @@ def get_bufr_variables(
     station_configuration: StationConfiguration,
 ) -> BUFRVariables:
     """
-    Helper function for converting our  variables to the variables needed for bufr export.
+    Helper function for converting our variables to the variables needed for bufr export.
+
+    Raises AttributeError if station_configuration dont have the minimum dimension fields since they are required to determine barometer heights.
+    * height_of_gps_from_station_ground
+    * barometer_from_gps
+
+
 
     Parameters
     ----------
@@ -506,13 +512,21 @@ def get_bufr_variables(
     BUFRVariables used by bufr_utilities
 
     """
+
     if station_configuration.height_of_gps_from_station_ground is None:
-        heightOfStationGroundAboveMeanSeaLevel = np.nan
-    else:
-        heightOfStationGroundAboveMeanSeaLevel = (
-            data["gps_alt_fit"]
-            - station_configuration.height_of_gps_from_station_ground
+        raise AttributeError(
+            "height_of_gps_from_station_ground is required for BUFR export"
         )
+    if station_configuration.barometer_from_gps is None:
+        raise AttributeError("barometer_from_gps is required for BUFR export")
+
+    heightOfStationGroundAboveMeanSeaLevel = (
+        data["gps_alt_fit"] - station_configuration.height_of_gps_from_station_ground
+    )
+
+    heightOfBarometerAboveMeanSeaLevel = (
+        data["gps_alt_fit"] + station_configuration.barometer_from_gps
+    )
 
     if station_configuration.temperature_from_sonic_ranger is None:
         heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH = np.nan
@@ -529,13 +543,6 @@ def get_bufr_variables(
             data["z_boom_u_smooth"] + station_configuration.anemometer_from_sonic_ranger
         )
 
-    if station_configuration.barometer_from_gps is None:
-        heightOfBarometerAboveMeanSeaLevel = np.nan
-    else:
-        heightOfBarometerAboveMeanSeaLevel = (
-            data["gps_alt_fit"] + station_configuration.barometer_from_gps
-        )
-
     output_row = BUFRVariables(
         wmo_id=station_configuration.wmo_id,
         station_type=station_configuration.station_type,
diff --git a/tests/unit/bufr_export/test_get_bufr.py b/tests/unit/bufr_export/test_get_bufr.py
index 5095c2d8..a987607b 100644
--- a/tests/unit/bufr_export/test_get_bufr.py
+++ b/tests/unit/bufr_export/test_get_bufr.py
@@ -199,7 +199,10 @@ def test_bufr_variables_promice_v3(self):
             heightOfBarometerAboveMeanSeaLevel=2126,
         )
 
-    def test_none_values_in_config(self):
+    def test_fails_on_missing_dimension_values(self):
+        """
+        Test that get_bufr_variables raises an AttributeError if the data is missing
+        """
         timestamp = datetime.datetime.now()
         data = pd.Series(
             data=dict(
@@ -220,36 +223,14 @@ def test_none_values_in_config(self):
             stid="A_STID",
             station_type="land",
             wmo_id="4201",
-            barometer_from_gps=0.2,
-            anemometer_from_sonic_ranger=0.1,
-            temperature_from_sonic_ranger=1.3,
-            height_of_gps_from_station_ground=2.1,
-        )
-
-        output = get_bufr_variables(
-            data,
-            station_configuration=station_config,
+            export_bufr=True,
         )
 
-        self.assertEqual(
-            BUFRVariables(
-                wmo_id=station_config.wmo_id,
-                station_type=station_config.station_type,
-                timestamp=timestamp,
-                relativeHumidity=1.0,
-                airTemperature=252.2,  # Converted to kelvin
-                pressure=199300.0,
-                windDirection=32.0,
-                windSpeed=5.3,
-                latitude=66.0,
-                longitude=-46.0,
-                heightOfStationGroundAboveMeanSeaLevel=1091.9,
-                heightOfBarometerAboveMeanSeaLevel=1094.2,
-                heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=3.4,
-                heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=2.2,
-            ),
-            output,
-        )
+        with self.assertRaises(AttributeError) as context:
+            get_bufr_variables(
+                data,
+                station_configuration=station_config,
+            )
 
     @mock.patch("pypromice.postprocess.get_bufr.write_bufr_message")
     def _test_bufr_variables(

From 7049c0e2577c912476be753d95f07ce00d9cc6b9 Mon Sep 17 00:00:00 2001
From: Mads Christian Lund <maclu@geus.dk>
Date: Fri, 21 Jun 2024 11:31:46 +0200
Subject: [PATCH 08/16] Updated get_bufr to support static GPS heights.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Bedrock stations shouldn’t depend on the noisy GPS signal for elevation.
* Added station dimension values for WEG_B
* Added corresponding unittest
---
 src/pypromice/postprocess/get_bufr.py         | 13 +++-
 .../postprocess/station_configurations.toml   |  6 ++
 tests/unit/bufr_export/test_get_bufr.py       | 62 +++++++++++++++++++
 3 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/src/pypromice/postprocess/get_bufr.py b/src/pypromice/postprocess/get_bufr.py
index e6e248f1..00125050 100644
--- a/src/pypromice/postprocess/get_bufr.py
+++ b/src/pypromice/postprocess/get_bufr.py
@@ -144,6 +144,7 @@ class StationConfiguration:
     temperature_from_sonic_ranger: Optional[float] = None
     height_of_gps_from_station_ground: Optional[float] = None
     sonic_ranger_from_gps: Optional[float] = None
+    static_height_of_gps_from_mean_sea_level: Optional[float] = None
 
     # The station data will be exported to BUFR if True. Otherwise, it will only export latest position
     export_bufr: bool = False
@@ -520,14 +521,22 @@ def get_bufr_variables(
     if station_configuration.barometer_from_gps is None:
         raise AttributeError("barometer_from_gps is required for BUFR export")
 
+    if station_configuration.static_height_of_gps_from_mean_sea_level is None:
+        height_of_gps_above_mean_sea_level = data["gps_alt_fit"]
+    else:
+        height_of_gps_above_mean_sea_level = (
+            station_configuration.static_height_of_gps_from_mean_sea_level
+        )
+
     heightOfStationGroundAboveMeanSeaLevel = (
-        data["gps_alt_fit"] - station_configuration.height_of_gps_from_station_ground
+        height_of_gps_above_mean_sea_level - station_configuration.height_of_gps_from_station_ground
     )
 
     heightOfBarometerAboveMeanSeaLevel = (
-        data["gps_alt_fit"] + station_configuration.barometer_from_gps
+        height_of_gps_above_mean_sea_level + station_configuration.barometer_from_gps
     )
 
+
     if station_configuration.temperature_from_sonic_ranger is None:
         heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH = np.nan
     else:
diff --git a/src/pypromice/postprocess/station_configurations.toml b/src/pypromice/postprocess/station_configurations.toml
index 99bca21e..b6897327 100644
--- a/src/pypromice/postprocess/station_configurations.toml
+++ b/src/pypromice/postprocess/station_configurations.toml
@@ -514,6 +514,12 @@ station_site = "NUK_U"
 project = "Wegener"
 station_type = "land"
 wmo_id = "460"
+barometer_from_gps = 1.3
+anemometer_from_sonic_ranger = 0.4
+temperature_from_sonic_ranger = 0.0
+height_of_gps_from_station_ground = 0.9
+sonic_ranger_from_gps = 1.3
+static_height_of_gps_from_mean_sea_level = 17.5
 export_bufr = true
 skipped_variables = []
 positions_update_timestamp_only = false
diff --git a/tests/unit/bufr_export/test_get_bufr.py b/tests/unit/bufr_export/test_get_bufr.py
index a987607b..d9ffa078 100644
--- a/tests/unit/bufr_export/test_get_bufr.py
+++ b/tests/unit/bufr_export/test_get_bufr.py
@@ -199,6 +199,68 @@ def test_bufr_variables_promice_v3(self):
             heightOfBarometerAboveMeanSeaLevel=2126,
         )
 
+    def test_bufr_variables_static_gps_elevation(self):
+        timestamp = datetime.datetime.now()
+        data = pd.Series(
+            data=dict(
+                rh_i=0.93,
+                t_i=-21,
+                name="",
+                p_i=993,
+                wdir_i=32.1,
+                wspd_i=5.3,
+                gps_lon_fit=-46.0,
+                gps_lat_fit=66.0,
+                # This is a erroneous value that should be overridden by the static value
+                gps_alt_fit=142.1,
+                z_boom_u_smooth=2.1,
+            ),
+            name=timestamp,
+        )
+        station_config = StationConfiguration(
+            stid="A_STID",
+            station_type="land",
+            wmo_id="4201",
+            export_bufr=True,
+            barometer_from_gps=1.3,
+            height_of_gps_from_station_ground=0.9,
+            static_height_of_gps_from_mean_sea_level=17.5,
+            anemometer_from_sonic_ranger=None,
+            temperature_from_sonic_ranger=None,
+            sonic_ranger_from_gps=None,
+        )
+        # The elevations should be determined from the static variable
+        expected_station_ground_elevation = 17.5 - 0.9
+        expected_barometer_elevation = 17.5 + 1.3
+
+        expected_bufr_variables = BUFRVariables(
+            wmo_id=station_config.wmo_id,
+            station_type=station_config.station_type,
+            timestamp=timestamp,
+            relativeHumidity=1.0,
+            airTemperature=252.2,  # Converted to kelvin
+            pressure=199300.0,
+            windDirection=32.0,
+            windSpeed=5.3,
+            latitude=66.0,
+            longitude=-46.0,
+            heightOfStationGroundAboveMeanSeaLevel=expected_station_ground_elevation,
+            heightOfBarometerAboveMeanSeaLevel=expected_barometer_elevation,
+            # The sensor heights are ignored since the necessary dimension values are missing
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=np.nan,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=np.nan,
+        )
+
+        output = get_bufr_variables(
+            data,
+            station_configuration=station_config,
+        )
+
+        self.assertEqual(
+            expected_bufr_variables,
+            output,
+        )
+
     def test_fails_on_missing_dimension_values(self):
         """
         Test that get_bufr_variables raises an AttributeError if the data is missing

From f1285274172f1b7a844098256fc641eb859325e0 Mon Sep 17 00:00:00 2001
From: Mads Christian Lund <maclu@geus.dk>
Date: Fri, 28 Jun 2024 14:23:51 +0200
Subject: [PATCH 09/16] Updated github/workflow to run unittests

Added eccodes installation
---
 .github/workflows/unit_test.yml | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/unit_test.yml b/.github/workflows/unit_test.yml
index 14aa27e8..e0ebc350 100644
--- a/.github/workflows/unit_test.yml
+++ b/.github/workflows/unit_test.yml
@@ -4,7 +4,7 @@ on:
   workflow_dispatch:
   
 jobs:
-  build:
+  test:
     name: unit_test
     runs-on: ubuntu-latest
     strategy:
@@ -19,6 +19,9 @@ jobs:
         uses: actions/checkout@v3
         with:
           token: ${{ secrets.GITHUB_TOKEN }}
+      - name: Install eccodes
+        run : |
+          sudo apt-get install -y libeccodes-dev
       - name: Install dependencies
         shell: bash
         run: |
@@ -30,4 +33,4 @@ jobs:
       - name: Run unit tests
         shell: bash
         run: |
-          python3 -m unittest discover tests.e2e
+          python3 -m unittest discover tests

From 745f85b439cddef99f1c422a5b160f531ca737ec Mon Sep 17 00:00:00 2001
From: Mads Christian Lund <maclu@geus.dk>
Date: Mon, 8 Jul 2024 12:53:02 +0200
Subject: [PATCH 10/16] Updated get_bufr to support station config files in
 folder

* Removed station_configurations.toml from repository
* Updated bufr_utilities.set_station to validate wmo id
* Implemented StationConfig io tests
* Extracted StationConfiguration utils from get_bufr
* Added support for loading multiple station configuration files

Other
* Made ArgumentParser instantiation inline
---
 setup.py                                      |   2 +-
 src/pypromice/postprocess/bufr_utilities.py   |   5 +
 .../postprocess/create_bufr_files.py          |  15 +-
 src/pypromice/postprocess/get_bufr.py         | 268 ++----
 .../postprocess/station_configurations.toml   | 768 ------------------
 src/pypromice/station_configuration.py        |  99 +++
 .../bufr_export/test_create_bufr_files.py     |  45 +-
 tests/unit/bufr_export/test_get_bufr.py       | 208 ++---
 .../bufr_export/test_get_bufr_integration.py  |  21 +-
 tests/unit/test_station_config.py             | 143 ++++
 tests/utilities.py                            |  61 ++
 11 files changed, 514 insertions(+), 1121 deletions(-)
 delete mode 100644 src/pypromice/postprocess/station_configurations.toml
 create mode 100644 src/pypromice/station_configuration.py
 create mode 100644 tests/unit/test_station_config.py
 create mode 100644 tests/utilities.py

diff --git a/setup.py b/setup.py
index db9f6365..08b72656 100644
--- a/setup.py
+++ b/setup.py
@@ -33,7 +33,7 @@
     package_data={
     	"pypromice.tx": ["payload_formats.csv", "payload_types.csv"],
         "pypromice.qc.percentiles": ["thresholds.csv"],
-        "pypromice.postprocess": ["station_configurations.toml", "positions_seed.csv"],
+        "pypromice.postprocess": ["positions_seed.csv"],
     },
     install_requires=['numpy~=1.23', 'pandas>=1.5.0', 'xarray>=2022.6.0', 'toml', 'scipy>=1.9.0', 'Bottleneck', 'netcdf4', 'pyDataverse==0.3.1', 'eccodes', 'scikit-learn>=1.1.0'],
 #    extras_require={'postprocess': ['eccodes','scikit-learn>=1.1.0']},
diff --git a/src/pypromice/postprocess/bufr_utilities.py b/src/pypromice/postprocess/bufr_utilities.py
index 8293ac86..00e036d8 100644
--- a/src/pypromice/postprocess/bufr_utilities.py
+++ b/src/pypromice/postprocess/bufr_utilities.py
@@ -248,6 +248,11 @@ def set_station(ibufr, station_type: str, wmo_id: str):
     elif station_type == "land":
         # StationNumber for land stations are integeres
         wmo_id_int = int(wmo_id)
+        if wmo_id_int >= 1024:
+            raise ValueError(
+                f"Invalid WMO ID {wmo_id}. Land station number must be less than 1024."
+                "See https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_B/001002"
+            )
         station_config = dict(stationNumber=wmo_id_int)
     else:
         raise Exception(f"Unsupported station station type {station_type}")
diff --git a/src/pypromice/postprocess/create_bufr_files.py b/src/pypromice/postprocess/create_bufr_files.py
index 01a26770..2b9925c0 100644
--- a/src/pypromice/postprocess/create_bufr_files.py
+++ b/src/pypromice/postprocess/create_bufr_files.py
@@ -3,11 +3,11 @@
 from typing import Sequence, List
 
 import pandas as pd
+from pypromice.station_configuration import load_station_configuration_mapping
 
 from pypromice.postprocess.get_bufr import (
     get_bufr,
     DEFAULT_LIN_REG_TIME_LIMIT,
-    DEFAULT_STATION_CONFIGURATION_PATH,
     DEFAULT_POSITION_SEED_PATH,
 )
 
@@ -16,6 +16,7 @@
 
 def create_bufr_files(
     input_files: Sequence[Path],
+    station_configuration_root: Path,
     period_start: str,
     period_end: str,
     output_root: Path,
@@ -40,6 +41,8 @@ def create_bufr_files(
     output_individual_root.mkdir(parents=True, exist_ok=True)
     output_compiled_root.mkdir(parents=True, exist_ok=True)
 
+    station_configuration_mapping = load_station_configuration_mapping(station_configuration_root)
+
     for period in periods:
         period: pd.Timestamp
         date_str = period.strftime("%Y%m%dT%H%M")
@@ -59,7 +62,7 @@ def create_bufr_files(
                 time_limit=DEFAULT_LIN_REG_TIME_LIMIT,
                 timestamps_pickle_filepath=None,
                 now_timestamp=period,
-                station_configuration_path=DEFAULT_STATION_CONFIGURATION_PATH,
+                station_configuration_mapping=station_configuration_mapping,
                 positions_seed_path=DEFAULT_POSITION_SEED_PATH,
                 break_on_error=break_on_error,
             )
@@ -121,6 +124,13 @@ def main():
         type=Path,
         help="Output dir for both bufr files for individual stations and compiled. Organized in two sub directories.",
     )
+    parser.add_argument(
+        "--station_configuration_root",
+        "-c",
+        required=True,
+        type=Path,
+        help="Root directory containing station configuration toml files",
+    )
     parser.add_argument(
         "--override",
         "-f",
@@ -146,6 +156,7 @@ def main():
         period_end=args.period_end,
         output_root=args.output_root,
         override=args.override,
+        station_configuration_root=args.station_configuration_root,
     )
 
 if __name__ == "__main__":
diff --git a/src/pypromice/postprocess/get_bufr.py b/src/pypromice/postprocess/get_bufr.py
index 00125050..ab198236 100644
--- a/src/pypromice/postprocess/get_bufr.py
+++ b/src/pypromice/postprocess/get_bufr.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-
 """
 Command-line script for running BUFR file generation
 
@@ -13,12 +11,10 @@
 import sys
 from datetime import datetime, timedelta
 from pathlib import Path
-from typing import List, Dict, Mapping, Optional, Collection, Sequence, Union, TextIO
+from typing import List, Dict, Optional, Collection, Sequence, Mapping
 
-import attrs
 import numpy as np
 import pandas as pd
-import toml
 
 from pypromice.postprocess.bufr_utilities import write_bufr_message, BUFRVariables
 from pypromice.postprocess.real_time_utilities import get_latest_data
@@ -26,184 +22,21 @@
 __all__ = [
     "get_bufr",
     "main",
-    "DEFAULT_STATION_CONFIGURATION_PATH",
     "DEFAULT_POSITION_SEED_PATH",
     "DEFAULT_LIN_REG_TIME_LIMIT",
 ]
 
+from pypromice.station_configuration import (
+    StationConfiguration,
+    load_station_configuration_mapping,
+)
+
 logger = logging.getLogger(__name__)
 
-DEFAULT_STATION_CONFIGURATION_PATH = Path(__file__).parent.joinpath(
-    "station_configurations.toml"
-)
 DEFAULT_POSITION_SEED_PATH = Path(__file__).parent.joinpath("positions_seed.csv")
 DEFAULT_LIN_REG_TIME_LIMIT = "91d"
 
 
-def parse_arguments_bufr() -> argparse.ArgumentParser:
-    parser = argparse.ArgumentParser()
-
-    parser.add_argument(
-        "--store_positions",
-        "--positions",
-        action="store_true",
-        required=False,
-        default=False,
-        help="If included (True), make a positions dict and output AWS_latest_locations.csv file.",
-    )
-
-    parser.add_argument(
-        "--positions-filepath",
-        "-p",
-        type=Path,
-        required=False,
-        help="Path to write AWS_latest_locations.csv file.",
-    )
-
-    parser.add_argument(
-        "--time-limit",
-        default=DEFAULT_LIN_REG_TIME_LIMIT,
-        type=str,
-        required=False,
-        help="Previous time to limit dataframe before applying linear regression.",
-    )
-
-    parser.add_argument(
-        "--input_files",
-        "--l3-filepath",
-        "-i",
-        type=Path,
-        nargs="+",
-        required=True,
-        help="Path to L3 tx .csv files. Can be direct paths or glob patterns",
-    )
-
-    parser.add_argument(
-        "--bufr-out",
-        "-o",
-        type=Path,
-        required=True,
-        help="Path to the BUFR out directory.",
-    )
-
-    parser.add_argument(
-        "--timestamps-pickle-filepath",
-        type=Path,
-        required=False,
-        help="Path to the latest_timestamps.pickle file.",
-    )
-
-    parser.add_argument(
-        "--station_configuration_mapping",
-        default=DEFAULT_STATION_CONFIGURATION_PATH,
-        type=Path,
-        required=False,
-        help="Path to csv file with station meta data and BUFR export configuration",
-    )
-
-    parser.add_argument(
-        "--position_seed",
-        default=DEFAULT_POSITION_SEED_PATH,
-        type=Path,
-        required=False,
-        help="Path to csv file with seed values for output positions.",
-    )
-
-    parser.add_argument(
-        "--latest_timestamp",
-        default=datetime.utcnow(),
-        type=pd.Timestamp,
-        help="Timestamp used to determine latest data. Default utcnow.",
-    )
-
-    parser.add_argument("--verbose", "-v", default=False, action="store_true")
-
-    return parser
-
-
-@attrs.define
-class StationConfiguration:
-    """
-    Helper class for storing station specific configurations with respect to
-
-    * Installation specific distance measurements such as height differences between instruments
-    * Reference strings such as stid, station_site and wmo_id
-    * BUFR export specific parameters
-
-    # TODO: The station related meta data should be fetched from a station specific configuration files in the future or
-    # from header data in data source.
-    """
-
-    stid: str
-    station_site: str = None
-    project: Optional[str] = None
-    station_type: Optional[str] = None
-    wmo_id: Optional[str] = None
-    barometer_from_gps: Optional[float] = None
-    anemometer_from_sonic_ranger: Optional[float] = None
-    temperature_from_sonic_ranger: Optional[float] = None
-    height_of_gps_from_station_ground: Optional[float] = None
-    sonic_ranger_from_gps: Optional[float] = None
-    static_height_of_gps_from_mean_sea_level: Optional[float] = None
-
-    # The station data will be exported to BUFR if True. Otherwise, it will only export latest position
-    export_bufr: bool = False
-    comment: Optional[str] = None
-
-    # skip specific variables for stations
-    # If a variable has known bad data, use this collection to skip the variable
-    # Note that if a station is not reporting both air temp and pressure it will be skipped,
-    # as currently implemented in csv2bufr.min_data_check().
-    # ['p_i'], # EXAMPLE
-    skipped_variables: List[str] = attrs.field(factory=list)
-
-    positions_update_timestamp_only: bool = False
-
-    def as_dict(self) -> Dict:
-        return attrs.asdict(self)
-
-
-def load_station_configuration_mapping(
-    fp: Union[str, Path, TextIO]
-) -> Mapping[str, StationConfiguration]:
-    """
-    Read station configurations from toml file
-
-    Parameters
-    ----------
-    fp :
-        Path to or open toml file
-
-    Returns
-    -------
-    Mapping from stid to StationConfiguration
-
-    """
-    return {
-        stid: StationConfiguration(**config_dict)
-        for stid, config_dict in toml.load(fp).items()
-    }
-
-
-def write_station_configuration_mapping(
-    config_mapping: Mapping[str, StationConfiguration], fp: TextIO
-):
-    """
-    Write station configuration to toml file
-
-    Parameters
-    ----------
-    config_mapping
-        Mapping from stid to StationConfiguration
-    fp
-        open writable TextIO
-    """
-    config_mapping = {
-        config.stid: config.as_dict() for config in config_mapping.values()
-    }
-    toml.dump(config_mapping, fp)
-
-
 def process_station(
     file_path: Path,
     output_path: Path,
@@ -295,7 +128,7 @@ def get_bufr(
     input_files: Sequence[Path],
     positions_filepath: Optional[Path],
     timestamps_pickle_filepath: Optional[Path],
-    station_configuration_path: Optional[Path],
+    station_configuration_mapping: Mapping[str, StationConfiguration],
     now_timestamp: Optional[datetime] = None,
     positions_seed_path: Optional[Path] = None,
     earliest_timestamp: datetime = None,
@@ -320,8 +153,8 @@ def get_bufr(
         Path to write latest positions. Used to retrieve a static set of positions to register stations with DMI/WMO
     timestamps_pickle_filepath
         Path to pickle file used for storing latest timestamp
-    station_configuration_path
-        Path to toml file with configuration entries for each station
+    station_configuration_mapping
+        Mapping of station id to StationConfiguration object
     now_timestamp
         get_bufr will export the latest data before now_timestamp. Default datetime.utcnow()
     positions_seed_path
@@ -351,14 +184,6 @@ def get_bufr(
         logger.info(f"Seed positions for {positions_seed.keys()}")
         positions.update(positions_seed)
 
-    # Prepare station configurations
-    if station_configuration_path is None:
-        station_configuration_mapping = dict()
-    else:
-        station_configuration_mapping = load_station_configuration_mapping(
-            station_configuration_path
-        )
-
     # Prepare bufr output dir
     bufr_out.mkdir(parents=True, exist_ok=True)
 
@@ -529,7 +354,8 @@ def get_bufr_variables(
         )
 
     heightOfStationGroundAboveMeanSeaLevel = (
-        height_of_gps_above_mean_sea_level - station_configuration.height_of_gps_from_station_ground
+        height_of_gps_above_mean_sea_level
+        - station_configuration.height_of_gps_from_station_ground
     )
 
     heightOfBarometerAboveMeanSeaLevel = (
@@ -630,7 +456,73 @@ def min_data_check(s):
 
 
 def main():
-    args = parse_arguments_bufr().parse_args()
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--store_positions",
+        "--positions",
+        action="store_true",
+        required=False,
+        default=False,
+        help="If included (True), make a positions dict and output AWS_latest_locations.csv file.",
+    )
+    parser.add_argument(
+        "--positions-filepath",
+        "-p",
+        type=Path,
+        required=False,
+        help="Path to write AWS_latest_locations.csv file.",
+    )
+    parser.add_argument(
+        "--time-limit",
+        default=DEFAULT_LIN_REG_TIME_LIMIT,
+        type=str,
+        required=False,
+        help="Previous time to limit dataframe before applying linear regression.",
+    )
+    parser.add_argument(
+        "--input_files",
+        "--l3-filepath",
+        "-i",
+        type=Path,
+        nargs="+",
+        required=True,
+        help="Path to L3 tx .csv files. Can be direct paths or glob patterns",
+    )
+    parser.add_argument(
+        "--bufr-out",
+        "-o",
+        type=Path,
+        required=True,
+        help="Path to the BUFR out directory.",
+    )
+    parser.add_argument(
+        "--timestamps-pickle-filepath",
+        type=Path,
+        required=False,
+        help="Path to the latest_timestamps.pickle file.",
+    )
+    parser.add_argument(
+        "--station_configuration_root",
+        type=Path,
+        required=True,
+        help="Path to root directory containing station configuration toml files",
+    )
+    parser.add_argument(
+        "--position_seed",
+        default=DEFAULT_POSITION_SEED_PATH,
+        type=Path,
+        required=False,
+        help="Path to csv file with seed values for output positions.",
+    )
+    parser.add_argument(
+        "--latest_timestamp",
+        default=datetime.utcnow(),
+        type=pd.Timestamp,
+        help="Timestamp used to determine latest data. Default utcnow.",
+    )
+    parser.add_argument("--verbose", "-v", default=False, action="store_true")
+
+    args = parser.parse_args()
 
     log_level = logging.INFO
     if args.verbose:
@@ -650,6 +542,8 @@ def main():
             # The input path might be a glob pattern
             input_files += map(Path, glob.glob(path.as_posix()))
 
+    station_configuration_mapping = load_station_configuration_mapping(args.station_configuration_root)
+
     get_bufr(
         bufr_out=args.bufr_out,
         input_files=input_files,
@@ -658,7 +552,7 @@ def main():
         time_limit=args.time_limit,
         timestamps_pickle_filepath=args.timestamps_pickle_filepath,
         now_timestamp=args.latest_timestamp,
-        station_configuration_path=args.station_configuration_mapping,
+        station_configuration_mapping=args.station_configuration_mapping,
         positions_seed_path=args.position_seed,
     )
 
diff --git a/src/pypromice/postprocess/station_configurations.toml b/src/pypromice/postprocess/station_configurations.toml
deleted file mode 100644
index b6897327..00000000
--- a/src/pypromice/postprocess/station_configurations.toml
+++ /dev/null
@@ -1,768 +0,0 @@
-[CEN2]
-stid = "CEN2"
-station_site = "CEN"
-project = "GC-Net"
-station_type = "mobile"
-wmo_id = "04407"
-barometer_from_gps = 0.55
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.4
-height_of_gps_from_station_ground = 1.5
-sonic_ranger_from_gps = 0.15
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[CP1]
-stid = "CP1"
-station_site = "CP1"
-project = "GC-Net"
-station_type = "mobile"
-wmo_id = "04442"
-barometer_from_gps = 0.55
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.4
-height_of_gps_from_station_ground = 1.5
-sonic_ranger_from_gps = 0.15
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[DY2]
-stid = "DY2"
-station_site = "DY2"
-project = "GC-Net"
-station_type = "mobile"
-wmo_id = "04464"
-barometer_from_gps = 0.55
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.4
-height_of_gps_from_station_ground = 1.5
-sonic_ranger_from_gps = 0.15
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[EGP]
-stid = "EGP"
-station_site = "EGP"
-project = "GC-Net"
-station_type = "mobile"
-wmo_id = "04451"
-barometer_from_gps = 0.55
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.4
-height_of_gps_from_station_ground = 1.5
-sonic_ranger_from_gps = 0.15
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[HUM]
-stid = "HUM"
-station_site = "HUM"
-project = "GC-Net"
-station_type = "mobile"
-wmo_id = "04432"
-barometer_from_gps = 0.55
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.4
-height_of_gps_from_station_ground = 1.5
-sonic_ranger_from_gps = 0.15
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[NAE]
-stid = "NAE"
-station_site = "NAE"
-project = "GC-Net"
-station_type = "mobile"
-wmo_id = "04420"
-barometer_from_gps = 0.55
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.4
-height_of_gps_from_station_ground = 1.5
-sonic_ranger_from_gps = 0.15
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[NAU]
-stid = "NAU"
-station_site = "NAU"
-project = "GC-Net"
-station_type = "mobile"
-wmo_id = "04443"
-barometer_from_gps = 0.55
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.4
-height_of_gps_from_station_ground = 1.5
-sonic_ranger_from_gps = 0.15
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[NEM]
-stid = "NEM"
-station_site = "NEM"
-project = "GC-Net"
-station_type = "mobile"
-wmo_id = "04436"
-barometer_from_gps = 0.55
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.4
-height_of_gps_from_station_ground = 1.5
-sonic_ranger_from_gps = 0.15
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[NSE]
-stid = "NSE"
-station_site = "NSE"
-project = "GC-Net"
-station_type = "mobile"
-wmo_id = "04488"
-barometer_from_gps = 0.55
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.4
-height_of_gps_from_station_ground = 1.5
-sonic_ranger_from_gps = 0.15
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[SDL]
-stid = "SDL"
-station_site = "SDL"
-project = "GC-Net"
-station_type = "mobile"
-wmo_id = "04485"
-barometer_from_gps = 0.55
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.4
-height_of_gps_from_station_ground = 1.5
-sonic_ranger_from_gps = 0.15
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[SDM]
-stid = "SDM"
-station_site = "SDM"
-project = "GC-Net"
-station_type = "mobile"
-wmo_id = "04492"
-barometer_from_gps = 0.55
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.4
-height_of_gps_from_station_ground = 1.5
-sonic_ranger_from_gps = 0.15
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[TUN]
-stid = "TUN"
-station_site = "TUN"
-project = "GC-Net"
-station_type = "mobile"
-wmo_id = "04425"
-barometer_from_gps = 0.55
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.4
-height_of_gps_from_station_ground = 1.5
-sonic_ranger_from_gps = 0.15
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[KAN_M]
-stid = "KAN_M"
-station_site = "KAN_M"
-project = "Promice"
-station_type = "mobile"
-wmo_id = "04411"
-barometer_from_gps = -0.25
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.0
-height_of_gps_from_station_ground = 0.9
-sonic_ranger_from_gps = 1.3
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[KAN_U]
-stid = "KAN_U"
-station_site = "KAN_U"
-project = "Promice"
-station_type = "mobile"
-wmo_id = "04409"
-barometer_from_gps = -0.25
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.0
-height_of_gps_from_station_ground = 0.9
-sonic_ranger_from_gps = 1.3
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[LYN_L]
-stid = "LYN_L"
-station_site = "LYN_L"
-project = "Disko"
-station_type = "mobile"
-wmo_id = "04450"
-barometer_from_gps = -0.25
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.0
-height_of_gps_from_station_ground = 0.9
-sonic_ranger_from_gps = 1.3
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[LYN_T]
-stid = "LYN_T"
-station_site = "LYN_T"
-project = "Disko"
-station_type = "mobile"
-wmo_id = "04429"
-barometer_from_gps = -0.25
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.0
-height_of_gps_from_station_ground = 0.9
-sonic_ranger_from_gps = 1.3
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[MIT]
-stid = "MIT"
-station_site = "MIT"
-project = "Promice"
-station_type = "mobile"
-wmo_id = "04430"
-barometer_from_gps = -0.25
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.0
-height_of_gps_from_station_ground = 0.9
-sonic_ranger_from_gps = 1.3
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[NUK_K]
-stid = "NUK_K"
-station_site = "NUK_K"
-project = "Promice"
-station_type = "mobile"
-wmo_id = "04437"
-barometer_from_gps = -0.25
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.0
-height_of_gps_from_station_ground = 0.9
-sonic_ranger_from_gps = 1.3
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[NUK_L]
-stid = "NUK_L"
-station_site = "NUK_L"
-project = "Promice"
-station_type = "mobile"
-wmo_id = "04403"
-barometer_from_gps = -0.25
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.0
-height_of_gps_from_station_ground = 0.9
-sonic_ranger_from_gps = 1.3
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[SCO_L]
-stid = "SCO_L"
-station_site = "SCO_L"
-project = "Promice"
-station_type = "mobile"
-wmo_id = "04413"
-barometer_from_gps = -0.25
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.0
-height_of_gps_from_station_ground = 0.9
-sonic_ranger_from_gps = 1.3
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[SCO_U]
-stid = "SCO_U"
-station_site = "SCO_U"
-project = "Promice"
-station_type = "mobile"
-wmo_id = "04421"
-barometer_from_gps = -0.25
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.0
-height_of_gps_from_station_ground = 0.9
-sonic_ranger_from_gps = 1.3
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[TAS_A]
-stid = "TAS_A"
-station_site = "TAS_A"
-project = "Promice"
-station_type = "mobile"
-wmo_id = "04408"
-barometer_from_gps = -0.25
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.0
-height_of_gps_from_station_ground = 0.9
-sonic_ranger_from_gps = 1.3
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[TAS_L]
-stid = "TAS_L"
-station_site = "TAS_L"
-project = "Promice"
-station_type = "mobile"
-wmo_id = "04404"
-barometer_from_gps = -0.25
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.0
-height_of_gps_from_station_ground = 0.9
-sonic_ranger_from_gps = 1.3
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[THU_L]
-stid = "THU_L"
-station_site = "THU_L"
-project = "Promice"
-station_type = "mobile"
-wmo_id = "04424"
-barometer_from_gps = -0.25
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.0
-height_of_gps_from_station_ground = 0.9
-sonic_ranger_from_gps = 1.3
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[THU_L2]
-stid = "THU_L2"
-station_site = "THU_L2"
-project = "Promice"
-station_type = "mobile"
-wmo_id = "04453"
-barometer_from_gps = -0.25
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.0
-height_of_gps_from_station_ground = 0.9
-sonic_ranger_from_gps = 1.3
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[UPE_L]
-stid = "UPE_L"
-station_site = "UPE_L"
-project = "Promice"
-station_type = "mobile"
-wmo_id = "04423"
-barometer_from_gps = -0.25
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.0
-height_of_gps_from_station_ground = 0.9
-sonic_ranger_from_gps = 1.3
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[UPE_U]
-stid = "UPE_U"
-station_site = "UPE_U"
-project = "Promice"
-station_type = "mobile"
-wmo_id = "04422"
-barometer_from_gps = -0.25
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.0
-height_of_gps_from_station_ground = 0.9
-sonic_ranger_from_gps = 1.3
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[KAN_Lv3]
-stid = "KAN_Lv3"
-station_site = "KAN_L"
-project = "Promice"
-station_type = "mobile"
-wmo_id = "04412"
-barometer_from_gps = 1.3
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.0
-height_of_gps_from_station_ground = 0.9
-sonic_ranger_from_gps = 1.3
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[NUK_Uv3]
-stid = "NUK_Uv3"
-station_site = "NUK_U"
-project = "Promice"
-station_type = "mobile"
-wmo_id = "04439"
-barometer_from_gps = 1.3
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.0
-height_of_gps_from_station_ground = 0.9
-sonic_ranger_from_gps = 1.3
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[QAS_Lv3]
-stid = "QAS_Lv3"
-station_site = "QAS_L"
-project = "Promice"
-station_type = "mobile"
-wmo_id = "04401"
-barometer_from_gps = 1.3
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.0
-height_of_gps_from_station_ground = 0.9
-sonic_ranger_from_gps = 1.3
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[QAS_Mv3]
-stid = "QAS_Mv3"
-station_site = "QAS_M"
-project = "Promice"
-station_type = "mobile"
-wmo_id = "04441"
-barometer_from_gps = 1.3
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.0
-height_of_gps_from_station_ground = 0.9
-sonic_ranger_from_gps = 1.3
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[QAS_Uv3]
-stid = "QAS_Uv3"
-station_site = "QAS_U"
-project = "Promice"
-station_type = "mobile"
-wmo_id = "04402"
-barometer_from_gps = 1.3
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.0
-height_of_gps_from_station_ground = 0.9
-sonic_ranger_from_gps = 1.3
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[ZAK_Lv3]
-stid = "ZAK_Lv3"
-station_site = "ZAK_L"
-project = "GlacioBasis"
-station_type = "mobile"
-wmo_id = "04461"
-barometer_from_gps = 1.3
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.0
-height_of_gps_from_station_ground = 0.9
-sonic_ranger_from_gps = 1.3
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[ZAK_Uv3]
-stid = "ZAK_Uv3"
-station_site = "ZAK_U"
-project = "GlacioBasis"
-station_type = "mobile"
-wmo_id = "04462"
-barometer_from_gps = 1.3
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.0
-height_of_gps_from_station_ground = 0.9
-sonic_ranger_from_gps = 1.3
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[WEG_B]
-stid = "WEG_B"
-station_site = "NUK_U"
-project = "Wegener"
-station_type = "land"
-wmo_id = "460"
-barometer_from_gps = 1.3
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.0
-height_of_gps_from_station_ground = 0.9
-sonic_ranger_from_gps = 1.3
-static_height_of_gps_from_mean_sea_level = 17.5
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[KAN_B]
-stid = "KAN_B"
-station_site = "KAN_B"
-project = "Promice"
-station_type = "land"
-wmo_id = "445"
-export_bufr = false
-comment = "no_instantaneous"
-skipped_variables = []
-positions_update_timestamp_only = true
-
-[CEN1]
-stid = "CEN1"
-station_site = "CEN1"
-project = "GC-Net"
-export_bufr = false
-comment = "discontinued"
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[JAR_O]
-stid = "JAR_O"
-station_site = "JAR"
-project = "GC-Net"
-wmo_id = "04452"
-export_bufr = false
-comment = "discontinued"
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[KAN_L]
-stid = "KAN_L"
-station_site = "KAN_L"
-project = "Promice"
-wmo_id = "04412"
-export_bufr = false
-comment = "use_v3"
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[KPC_Lv3]
-stid = "KPC_Lv3"
-station_site = "KPC_L"
-project = "Promice"
-station_type = "mobile"
-wmo_id = "04428"
-export_bufr = false
-comment = "v3_bad"
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[NUK_N]
-stid = "NUK_N"
-station_site = "NUK_N"
-export_bufr = false
-comment = "discontinued"
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[NUK_U]
-stid = "NUK_U"
-station_site = "NUK_U"
-project = "Promice"
-wmo_id = "04439"
-export_bufr = false
-comment = "use_v3"
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[QAS_A]
-stid = "QAS_A"
-station_site = "QAS_A"
-project = "Promice"
-export_bufr = false
-comment = "discontinued"
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[QAS_L]
-stid = "QAS_L"
-station_site = "QAS_L"
-project = "Promice"
-wmo_id = "04401"
-export_bufr = false
-comment = "use_v3"
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[QAS_M]
-stid = "QAS_M"
-station_site = "QAS_M"
-project = "Promice"
-wmo_id = "04441"
-export_bufr = false
-comment = "use_v3"
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[QAS_U]
-stid = "QAS_U"
-station_site = "QAS_U"
-project = "Promice"
-wmo_id = "04402"
-export_bufr = false
-comment = "use_v3"
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[SWC_O]
-stid = "SWC_O"
-station_site = "SWC"
-project = "GC-Net"
-wmo_id = "04458"
-export_bufr = false
-comment = "discontinued"
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[TAS_U]
-stid = "TAS_U"
-station_site = "TAS_U"
-project = "Promice"
-export_bufr = false
-comment = "discontinued"
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[THU_U]
-stid = "THU_U"
-station_site = "THU_U"
-project = "Promice"
-station_type = "mobile"
-wmo_id = "04454"
-barometer_from_gps = -0.25
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.0
-height_of_gps_from_station_ground = 0.9
-sonic_ranger_from_gps = 1.3
-export_bufr = false
-comment = "discontinued"
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[UWN]
-stid = "UWN"
-station_site = "UWN"
-export_bufr = false
-comment = "not_registered"
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[WEG_L]
-stid = "WEG_L"
-station_site = "WEG_L"
-project = "Wegener"
-export_bufr = false
-comment = "not_registered"
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[XXX]
-stid = "XXX"
-station_site = "XXX"
-export_bufr = false
-comment = "test"
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[ZAK_A]
-stid = "ZAK_A"
-station_site = "ZAK_A"
-project = "GlacioBasis"
-export_bufr = false
-comment = "not_registered"
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[ZAK_L]
-stid = "ZAK_L"
-station_site = "ZAK_L"
-project = "GlacioBasis"
-wmo_id = "04461"
-export_bufr = false
-comment = "use_v3,no_instantaneous"
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[ZAK_U]
-stid = "ZAK_U"
-station_site = "ZAK_U"
-project = "GlacioBasis"
-wmo_id = "04462"
-export_bufr = false
-comment = "use_v3,no_instantaneous"
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[KPC_Uv3]
-stid = "KPC_Uv3"
-station_site = "KPC_U"
-project = "Promice"
-station_type = "mobile"
-wmo_id = "04427"
-export_bufr = false
-comment = "v3_bad"
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[KPC_L]
-stid = "KPC_L"
-station_site = "KPC_L"
-project = "Promice"
-wmo_id = "04428"
-export_bufr = false
-comment = "use_v3"
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[KPC_U]
-stid = "KPC_U"
-station_site = "KPC_U"
-project = "Promice"
-wmo_id = "04427"
-export_bufr = false
-comment = "use_v3"
-skipped_variables = []
-positions_update_timestamp_only = false
-
-[THU_U2]
-stid = "THU_U2"
-station_site = "THU_U"
-project = "Promice"
-station_type = "mobile"
-wmo_id = "04454"
-barometer_from_gps = -0.25
-anemometer_from_sonic_ranger = 0.4
-temperature_from_sonic_ranger = 0.0
-height_of_gps_from_station_ground = 0.9
-sonic_ranger_from_gps = 1.3
-export_bufr = true
-skipped_variables = []
-positions_update_timestamp_only = false
diff --git a/src/pypromice/station_configuration.py b/src/pypromice/station_configuration.py
new file mode 100644
index 00000000..34e85ff3
--- /dev/null
+++ b/src/pypromice/station_configuration.py
@@ -0,0 +1,99 @@
+from pathlib import Path
+from typing import Optional, Dict, Mapping, Sequence
+
+import attrs
+import toml
+
+
+@attrs.define
+class StationConfiguration:
+    """
+    Helper class for storing station specific configurations with respect to
+
+    * Installation specific distance measurements such as height differences between instruments
+    * Reference strings such as stid, station_site and wmo_id
+    * BUFR export specific parameters
+
+    # TODO: The station related meta data should be fetched from a station specific configuration files in the future or
+    # from header data in data source.
+    """
+
+    stid: str
+    station_site: str = None
+    project: Optional[str] = None
+    station_type: Optional[str] = None
+    wmo_id: Optional[str] = None
+    barometer_from_gps: Optional[float] = None
+    anemometer_from_sonic_ranger: Optional[float] = None
+    temperature_from_sonic_ranger: Optional[float] = None
+    height_of_gps_from_station_ground: Optional[float] = None
+    sonic_ranger_from_gps: Optional[float] = None
+    static_height_of_gps_from_mean_sea_level: Optional[float] = None
+    station_relocation: Sequence[str] = attrs.field(factory=list)
+
+    # The station data will be exported to BUFR if True. Otherwise, it will only export latest position
+    export_bufr: bool = False
+    comment: Optional[str] = None
+
+    # skip specific variables for stations
+    # If a variable has known bad data, use this collection to skip the variable
+    # Note that if a station is not reporting both air temp and pressure it will be skipped,
+    # as currently implemented in csv2bufr.min_data_check().
+    # ['p_i'], # EXAMPLE
+    skipped_variables: Sequence[str] = attrs.field(factory=list)
+
+    positions_update_timestamp_only: bool = False
+
+    @classmethod
+    def load_toml(cls, path):
+        return cls(**toml.load(path))
+
+    def dump_toml(self, path: Path):
+        with path.open("w") as fp:
+            toml.dump(self.as_dict(), fp)
+
+    def as_dict(self) -> Dict:
+        return attrs.asdict(self)
+
+
+def load_station_configuration_mapping(
+    configuration_root_dir: Path,
+) -> Mapping[str, StationConfiguration]:
+    """
+    Load station configurations from toml files in configuration_root_dir
+
+    Parameters
+    ----------
+    configuration_root_dir
+        Root directory containing toml files
+
+    Returns
+    -------
+    Mapping from stid to StationConfiguration
+
+    """
+    return {
+        config_file.stem: StationConfiguration(**toml.load(config_file))
+        for config_file in configuration_root_dir.glob("*.toml")
+    }
+
+
+def write_station_configuration_mapping(
+    station_configurations: Mapping[str, StationConfiguration],
+    configuration_root_dir: Path,
+) -> None:
+    """
+    Write station configurations to toml files in configuration_root_dir
+
+    Parameters
+    ----------
+    station_configurations
+        Mapping from stid to StationConfiguration
+    configuration_root_dir
+        Output directory
+
+    """
+    configuration_root_dir.mkdir(parents=True, exist_ok=True)
+    for stid, station_configuration in station_configurations.items():
+        with (configuration_root_dir / f"{stid}.toml").open("w") as fp:
+            toml.dump(station_configuration.as_dict(), fp)
diff --git a/tests/unit/bufr_export/test_create_bufr_files.py b/tests/unit/bufr_export/test_create_bufr_files.py
index 0b209fbf..f9cf935e 100644
--- a/tests/unit/bufr_export/test_create_bufr_files.py
+++ b/tests/unit/bufr_export/test_create_bufr_files.py
@@ -3,7 +3,11 @@
 from typing import Optional
 from unittest import TestCase
 
+import toml
+from pypromice.station_configuration import write_station_configuration_mapping
+
 from pypromice.postprocess.create_bufr_files import create_bufr_files
+from tests.utilities import get_station_configuration
 
 DATA_DIR = Path(__file__).parent.absolute()
 
@@ -44,6 +48,17 @@ def test_create_bufr_files(self):
                 src_path=DATA_DIR.joinpath("tx_l3_test1.csv"),
             )
 
+        station_configuration_root = self.temp_dir / "station_configuration"
+        station_configuration_root.mkdir(parents=True, exist_ok=True)
+        station_configuration_mapping = {
+            "THU_L2": get_station_configuration(stid="THU_L2", export_bufr=True),
+            "KAN_Lv3": get_station_configuration(stid="KAN_Lv3", export_bufr=True),
+        }
+        write_station_configuration_mapping(
+            station_configurations=station_configuration_mapping,
+            configuration_root_dir=station_configuration_root,
+        )
+
         create_bufr_files(
             input_files=input_files,
             period_start="2023-12-06T00:00",
@@ -51,6 +66,7 @@ def test_create_bufr_files(self):
             output_root=output_dir,
             override=True,
             break_on_error=True,
+            station_configuration_root=station_configuration_root,
         )
 
         compiled_output_dir = output_dir / "compiled"
@@ -88,6 +104,14 @@ def test_get_bufr_from_empty_data_file_raises_error(self):
         output_dir = self.temp_dir / "output"
         input_file = input_dir / "THU_L2_hourly.csv"
         create_data_file(input_file, src_path=None)
+        station_configuration_root = self.temp_dir / "station_configuration"
+        station_configuration = get_station_configuration(
+            stid="KAN_Lv3", export_bufr=True
+        )
+        write_station_configuration_mapping(
+            station_configurations={station_configuration.stid: station_configuration},
+            configuration_root_dir=station_configuration_root,
+        )
 
         with self.assertRaises(ValueError):
             create_bufr_files(
@@ -97,9 +121,10 @@ def test_get_bufr_from_empty_data_file_raises_error(self):
                 output_root=output_dir,
                 override=True,
                 break_on_error=True,
+                station_configuration_root=station_configuration_root,
             )
 
-    def test_get_gufr_continues_when_break_on_error_is_false(self):
+    def test_get_bufr_continues_when_break_on_error_is_false(self):
         input_dir = self.temp_dir / "input"
         output_dir = self.temp_dir / "output"
         input_file_without_data = input_dir / "THU_L2_hourly.csv"
@@ -110,6 +135,14 @@ def test_get_gufr_continues_when_break_on_error_is_false(self):
         )
         compiled_output_dir = output_dir / "compiled"
         individual_output_root = output_dir / "individual"
+        station_configuration_root = self.temp_dir / "station_configuration"
+        write_station_configuration_mapping(
+            station_configurations={
+                "THU_L2": get_station_configuration(stid="THU_L2", export_bufr=True),
+                "KAN_Lv3": get_station_configuration(stid="KAN_Lv3", export_bufr=True),
+            },
+            configuration_root_dir=station_configuration_root,
+        )
         expected_compiled_output_file = compiled_output_dir / "geus_20231206T0000.bufr"
         expected_individual_output_dir = individual_output_root / "20231206T0000"
         expected_individual_output_file = (
@@ -126,6 +159,7 @@ def test_get_gufr_continues_when_break_on_error_is_false(self):
             output_root=output_dir,
             override=True,
             break_on_error=False,
+            station_configuration_root=station_configuration_root,
         )
 
         self.assertTrue(expected_compiled_output_file.exists())
@@ -144,6 +178,14 @@ def test_get_bufr_where_period_does_not_exist(self):
         output_dir = self.temp_dir / "output"
         input_file = input_dir / "THU_L2_hourly.csv"
         create_data_file(input_file, src_path=DATA_DIR.joinpath("tx_l3_test1.csv"))
+        station_configuration_root = self.temp_dir / "station_configuration"
+        station_configuration = get_station_configuration(
+            stid="THU_L2", export_bufr=True
+        )
+        write_station_configuration_mapping(
+            station_configurations={station_configuration.stid: station_configuration},
+            configuration_root_dir=station_configuration_root,
+        )
 
         create_bufr_files(
             input_files=[input_file],
@@ -152,6 +194,7 @@ def test_get_bufr_where_period_does_not_exist(self):
             output_root=output_dir,
             override=True,
             break_on_error=True,
+            station_configuration_root=station_configuration_root,
         )
 
         compiled_output_dir = output_dir / "compiled"
diff --git a/tests/unit/bufr_export/test_get_bufr.py b/tests/unit/bufr_export/test_get_bufr.py
index d9ffa078..2af06c4c 100644
--- a/tests/unit/bufr_export/test_get_bufr.py
+++ b/tests/unit/bufr_export/test_get_bufr.py
@@ -4,7 +4,6 @@
 import sys
 import unittest
 import uuid
-from io import StringIO
 from pathlib import Path
 from tempfile import TemporaryDirectory
 from unittest import TestCase, mock
@@ -15,11 +14,11 @@
 from pypromice.postprocess.bufr_utilities import BUFRVariables
 from pypromice.postprocess.get_bufr import (
     process_station,
-    StationConfiguration,
     get_bufr,
     get_bufr_variables,
-    write_station_configuration_mapping,
-    load_station_configuration_mapping,
+)
+from pypromice.station_configuration import (
+    StationConfiguration,
 )
 from tests.unit.bufr_export.test_get_bufr_integration import (
     DATA_DIR,
@@ -35,121 +34,25 @@
 MOCK_BASE_STR = "pypromice.postprocess.get_bufr.{}"
 
 
-class StationConfigurationTestCase(TestCase):
-    def test_read(self):
-        source_lines = [
-            "[UPE_L]\n",
-            'stid = "UPE_L"\n',
-            'station_site = "UPE_L"\n',
-            'project = "Promice"\n',
-            'station_type = "mobile"\n',
-            'wmo_id = "04423"\n',
-            "barometer_from_gps = -0.25\n",
-            "anemometer_from_sonic_ranger = 0.4\n",
-            "temperature_from_sonic_ranger = 0.0\n",
-            "height_of_gps_from_station_ground = 0.9\n",
-            "sonic_ranger_from_gps = 1.3\n",
-            "export_bufr = true\n",
-            "skipped_variables = []\n",
-            "positions_update_timestamp_only = false\n",
-        ]
-        source_io = StringIO()
-        source_io.writelines(source_lines)
-        source_io.seek(0)
-        expected_configuration_mapping = {
-            "UPE_L": StationConfiguration(
-                stid="UPE_L",
-                station_site="UPE_L",
-                project="Promice",
-                station_type="mobile",
-                wmo_id="04423",
-                barometer_from_gps=-0.25,
-                anemometer_from_sonic_ranger=0.4,
-                temperature_from_sonic_ranger=0.0,
-                height_of_gps_from_station_ground=0.9,
-                sonic_ranger_from_gps=1.3,
-                export_bufr=True,
-                comment=None,
-                skipped_variables=[],
-                positions_update_timestamp_only=False,
-            )
-        }
-
-        station_configuration_mapping = load_station_configuration_mapping(source_io)
-
-        self.assertDictEqual(
-            expected_configuration_mapping,
-            station_configuration_mapping,
-        )
-
-    def test_write_read(self):
-        station_config = StationConfiguration(
-            stid="UPE_L",
-            station_site="UPE_L",
-            project="Promice",
+class BufrVariablesTestCase(TestCase):
+    def test_bufr_variables_gcnet(self):
+        station_configuration = StationConfiguration(
+            stid="DY2",
+            station_site="DY2",
+            project="GC-Net",
+            wmo_id="04464",
             station_type="mobile",
-            wmo_id="04423",
-            barometer_from_gps=-0.25,
+            barometer_from_gps=0.55,
             anemometer_from_sonic_ranger=0.4,
-            temperature_from_sonic_ranger=0.0,
-            height_of_gps_from_station_ground=0.9,
-            sonic_ranger_from_gps=1.3,
+            temperature_from_sonic_ranger=0.4,
+            height_of_gps_from_station_ground=1.5,
+            sonic_ranger_from_gps=0.15,
             export_bufr=True,
-            comment=None,
-            skipped_variables=[],
-            positions_update_timestamp_only=False,
-        )
-        config_mapping = {station_config.stid: station_config}
-        source_io = StringIO()
-
-        write_station_configuration_mapping(config_mapping, source_io)
-        source_io.seek(0)
-        read_mapping = load_station_configuration_mapping(source_io)
-
-        self.assertDictEqual(
-            config_mapping,
-            read_mapping,
         )
 
-    def test_write_read_minimal_config(self):
-        station_config = StationConfiguration(stid="UPE_L")
-        config_mapping = {station_config.stid: station_config}
-        source_io = StringIO()
-
-        write_station_configuration_mapping(config_mapping, source_io)
-        source_io.seek(0)
-        read_mapping = load_station_configuration_mapping(source_io)
-
-        self.maxDiff = None
-        self.assertEqual(
-            station_config,
-            config_mapping[station_config.stid],
-        )
-        self.assertDictEqual(
-            config_mapping,
-            read_mapping,
-        )
-
-    def test_write_read_empty_mapping(self):
-        config_mapping = {}
-        source_io = StringIO()
-
-        write_station_configuration_mapping(config_mapping, source_io)
-        source_io.seek(0)
-        read_mapping = load_station_configuration_mapping(source_io)
-
-        self.assertDictEqual(
-            config_mapping,
-            read_mapping,
-        )
-
-
-class BufrVariablesTestCase(TestCase):
-    def test_bufr_variables_gcnet(self):
         self._test_bufr_variables(
-            stid="DY2",
-            wmo_id="04464",
-            station_type="mobile",
+            stid=station_configuration.stid,
+            station_configuration=station_configuration,
             relativeHumidity=69.0,
             airTemperature=256.0,
             pressure=77300.0,
@@ -164,10 +67,22 @@ def test_bufr_variables_gcnet(self):
         )
 
     def test_bufr_variables_promice_v2(self):
-        self._test_bufr_variables(
+        station_configuration = StationConfiguration(
             stid="NUK_L",
-            wmo_id="04403",
+            station_site="NUK_L",
+            project="Promice",
             station_type="mobile",
+            wmo_id="04403",
+            barometer_from_gps=-0.25,
+            anemometer_from_sonic_ranger=0.4,
+            temperature_from_sonic_ranger=0.0,
+            height_of_gps_from_station_ground=0.9,
+            sonic_ranger_from_gps=1.3,
+            export_bufr=True,
+        )
+        self._test_bufr_variables(
+            stid=station_configuration.stid,
+            station_configuration=station_configuration,
             relativeHumidity=69.0,
             airTemperature=256.0,
             pressure=77300.0,
@@ -182,10 +97,22 @@ def test_bufr_variables_promice_v2(self):
         )
 
     def test_bufr_variables_promice_v3(self):
-        self._test_bufr_variables(
+        station_configuration = StationConfiguration(
             stid="QAS_Mv3",
-            wmo_id="04441",
+            station_site="QAS_M",
+            project="Promice",
             station_type="mobile",
+            wmo_id="04441",
+            barometer_from_gps=1.3,
+            anemometer_from_sonic_ranger=0.4,
+            temperature_from_sonic_ranger=0.0,
+            height_of_gps_from_station_ground=0.9,
+            sonic_ranger_from_gps=1.3,
+            export_bufr=True,
+        )
+        self._test_bufr_variables(
+            stid=station_configuration.stid,
+            station_configuration=station_configuration,
             relativeHumidity=69.0,
             airTemperature=256.0,
             pressure=77300.0,
@@ -299,7 +226,7 @@ def _test_bufr_variables(
         self,
         write_bufr_message_mock: mock.MagicMock,
         stid: str,
-        wmo_id: str,
+        station_configuration: StationConfiguration,
         relativeHumidity: float,
         airTemperature: float,
         pressure: float,
@@ -311,7 +238,6 @@ def _test_bufr_variables(
         heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH: float,
         heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD: float,
         heightOfBarometerAboveMeanSeaLevel: float,
-        station_type: str,
     ):
         l3_src_filepath = DATA_DIR.joinpath("tx_l3_test1.csv")
         l3_src = pd.read_csv(l3_src_filepath)
@@ -325,14 +251,17 @@ def _test_bufr_variables(
             stid=stid,
             store_positions=True,
             time_limit="91d",
+            station_configuration_mapping={
+                station_configuration.stid: station_configuration
+            },
         )
 
         write_bufr_message_mock.assert_called_once()
         call = write_bufr_message_mock.call_args_list[0]
         expected_time = datetime.datetime(year=2023, month=12, day=7, hour=23)
         expected_bufr_variables = BUFRVariables(
-            wmo_id=wmo_id,
-            station_type=station_type,
+            wmo_id=station_configuration.wmo_id,
+            station_type=station_configuration.station_type,
             timestamp=expected_time,
             relativeHumidity=relativeHumidity,
             airTemperature=airTemperature,
@@ -719,7 +648,7 @@ def test_process_station_raises_exception(
             bufr_out=self.bufr_root,
             input_files=[input_file_path],
             positions_filepath=self.positions_file_path,
-            station_configuration_path=None,
+            station_configuration_mapping=dict(),
             timestamps_pickle_filepath=timestamps_pickle_filepath,
             now_timestamp=now_timestamp,
         )
@@ -753,23 +682,18 @@ def test_multiple_stations(self, process_station_mock: mock.MagicMock):
             self.root_path / f"{station_config02.stid}_hourly.csv",
             self.root_path / f"{station_config03.stid}_hourly.csv",
         ]
-        station_configs = {
+        station_config_mapping = {
             station_config01.stid: station_config01,
             station_config02.stid: station_config02,
             station_config03.stid: station_config03,
         }
-        with self.station_configuration_path.open("w") as fp:
-            write_station_configuration_mapping(
-                station_configs,
-                fp,
-            )
 
         get_bufr(
             store_positions=True,
             bufr_out=self.bufr_root,
             input_files=input_files,
             positions_filepath=self.positions_file_path,
-            station_configuration_path=self.station_configuration_path,
+            station_configuration_mapping=station_config_mapping,
             timestamps_pickle_filepath=self.timestamps_pickle_filepath,
             positions_seed_path=None,
             now_timestamp=datetime.datetime.now(),
@@ -794,7 +718,7 @@ def test_no_stations(self):
             bufr_out=self.bufr_root,
             input_files=(),
             positions_filepath=self.positions_file_path,
-            station_configuration_path=None,
+            station_configuration_mapping=dict(),
             timestamps_pickle_filepath=self.timestamps_pickle_filepath,
             now_timestamp=now_timestamp,
         )
@@ -816,11 +740,9 @@ def test_single_station(self, process_station_mock: mock.MagicMock):
         stid = "THE_STID_FOR_A_STATION"
         input_file_path = self.root_path / f"{stid}_hourly.csv"
         station_configuration = StationConfiguration(stid=stid, export_bufr=True)
-        with self.station_configuration_path.open("w") as fp:
-            write_station_configuration_mapping(
-                dict(stid=station_configuration),
-                fp,
-            )
+        station_configuration_mapping = {
+            stid: station_configuration,
+        }
         expected_output_path = self.bufr_root / f"{stid}.bufr"
         expected_latest_timestamp = now_timestamp - datetime.timedelta(days=2)
         expected_station_configuration = StationConfiguration(
@@ -832,7 +754,7 @@ def test_single_station(self, process_station_mock: mock.MagicMock):
             bufr_out=self.bufr_root,
             input_files=[input_file_path],
             positions_filepath=self.positions_file_path,
-            station_configuration_path=self.station_configuration_path,
+            station_configuration_mapping=station_configuration_mapping,
             timestamps_pickle_filepath=self.timestamps_pickle_filepath,
             positions_seed_path=None,
             now_timestamp=now_timestamp,
@@ -861,7 +783,7 @@ def test_station_without_configuration(self, process_station_mock: mock.MagicMoc
             bufr_out=self.bufr_root,
             input_files=[input_file_path],
             positions_filepath=self.positions_file_path,
-            station_configuration_path=None,
+            station_configuration_mapping=dict(),
             timestamps_pickle_filepath=self.timestamps_pickle_filepath,
             positions_seed_path=None,
             now_timestamp=now_timestamp,
@@ -892,7 +814,7 @@ def test_latest_timestamp(self, process_station_mock: mock.MagicMock):
             bufr_out=self.bufr_root,
             input_files=[input_file_path],
             positions_filepath=self.positions_file_path,
-            station_configuration_path=None,
+            station_configuration_mapping=dict(),
             timestamps_pickle_filepath=self.timestamps_pickle_filepath,
             positions_seed_path=None,
             now_timestamp=now_timestamp,
@@ -915,11 +837,7 @@ def test_update_timestamp_only(self, process_station_mock: mock.MagicMock):
         station_config = StationConfiguration(
             stid=stid, positions_update_timestamp_only=True
         )
-        with self.station_configuration_path.open("w") as fp:
-            write_station_configuration_mapping(
-                config_mapping={station_config.stid: station_config},
-                fp=fp,
-            )
+        config_mapping = {station_config.stid: station_config}
         input_file_path = self.root_path / f"{stid}_hourly.csv"
         seed_timestamp = datetime.datetime(2021, 10, 2, 10, 0)
         now_timestamp = datetime.datetime(2023, 3, 3, 5, 0)
@@ -946,7 +864,7 @@ def test_update_timestamp_only(self, process_station_mock: mock.MagicMock):
             bufr_out=self.bufr_root,
             input_files=[input_file_path],
             positions_filepath=self.positions_file_path,
-            station_configuration_path=self.station_configuration_path,
+            station_configuration_mapping=config_mapping,
             timestamps_pickle_filepath=self.timestamps_pickle_filepath,
             positions_seed_path=self.positions_seed_path,
             now_timestamp=now_timestamp,
@@ -977,7 +895,7 @@ def test_position_seed(self):
             bufr_out=self.bufr_root,
             input_files=(),
             positions_filepath=self.positions_file_path,
-            station_configuration_path=None,
+            station_configuration_mapping=dict(),
             timestamps_pickle_filepath=self.timestamps_pickle_filepath,
             positions_seed_path=self.positions_seed_path,
             now_timestamp=datetime.datetime.now(),
diff --git a/tests/unit/bufr_export/test_get_bufr_integration.py b/tests/unit/bufr_export/test_get_bufr_integration.py
index f7b3cde4..64e76f74 100644
--- a/tests/unit/bufr_export/test_get_bufr_integration.py
+++ b/tests/unit/bufr_export/test_get_bufr_integration.py
@@ -17,11 +17,11 @@
 
 from pypromice.postprocess import get_bufr
 from pypromice.postprocess.bufr_utilities import read_bufr_message, BUFRVariables
-from pypromice.postprocess.get_bufr import (
-    DEFAULT_STATION_CONFIGURATION_PATH,
+from pypromice.station_configuration import (
     StationConfiguration,
     write_station_configuration_mapping,
 )
+from tests.utilities import get_station_configuration
 
 logging.basicConfig(
     stream=sys.stdout,
@@ -36,7 +36,7 @@ def run_get_bufr(
     l3_data: pd.DataFrame,
     stid: str,
     latest_timestamps: Optional[Dict[str, datetime.datetime]],
-    station_configuration_mapping=None,
+    station_configuration_mapping: Dict[str, StationConfiguration],
     **get_bufr_kwargs,
 ) -> Optional[BUFRVariables]:
     """
@@ -59,22 +59,9 @@ def run_get_bufr(
         bufr_out = output_path.joinpath("BUFR_out")
         timestamps_pickle_filepath = output_path.joinpath("latest_timestamps.pickle")
         positions_filepath = output_path.joinpath("AWS_latest_locations.csv")
-        station_configuration_path = output_path.joinpath("station_configuration.toml")
         l3_filepath = output_path.joinpath(f"{stid}_hour.csv")
         l3_data.to_csv(l3_filepath)
 
-        if station_configuration_mapping is None:
-            shutil.copy(
-                DEFAULT_STATION_CONFIGURATION_PATH,
-                station_configuration_path,
-            )
-        else:
-            with station_configuration_path.open("w") as fp:
-                write_station_configuration_mapping(
-                    station_configuration_mapping,
-                    fp,
-                )
-
         if latest_timestamps is not None:
             with timestamps_pickle_filepath.open("wb") as fp:
                 pickle.dump(latest_timestamps, fp)
@@ -84,7 +71,7 @@ def run_get_bufr(
             input_files=[l3_filepath],
             timestamps_pickle_filepath=timestamps_pickle_filepath,
             positions_filepath=positions_filepath,
-            station_configuration_path=station_configuration_path,
+            station_configuration_mapping=station_configuration_mapping,
             **get_bufr_kwargs,
         )
 
diff --git a/tests/unit/test_station_config.py b/tests/unit/test_station_config.py
new file mode 100644
index 00000000..a2b117fd
--- /dev/null
+++ b/tests/unit/test_station_config.py
@@ -0,0 +1,143 @@
+from pathlib import Path
+from unittest import TestCase
+from tempfile import TemporaryDirectory
+
+from pypromice.station_configuration import (
+    StationConfiguration,
+    load_station_configuration_mapping,
+    write_station_configuration_mapping,
+)
+from tests.utilities import get_station_configuration
+
+
+class StationConfigurationTestCase(TestCase):
+    def test_read_toml(self):
+        with TemporaryDirectory() as temp_dir:
+            source_path = Path(temp_dir) / "UPE_L.toml"
+            source_str = """
+                stid = "UPE_L"
+                station_site = "UPE_L"
+                project = "Promice"
+                station_type = "mobile"
+                wmo_id = "04423"
+                barometer_from_gps = -0.25
+                anemometer_from_sonic_ranger = 0.4
+                temperature_from_sonic_ranger = 0.0
+                height_of_gps_from_station_ground = 0.9
+                sonic_ranger_from_gps = 1.3
+                export_bufr = true
+                skipped_variables = []
+                positions_update_timestamp_only = false
+            """
+            with source_path.open("w") as source_io:
+                source_io.writelines(source_str)
+
+            expected_configuration = StationConfiguration(
+                stid="UPE_L",
+                station_site="UPE_L",
+                project="Promice",
+                station_type="mobile",
+                wmo_id="04423",
+                barometer_from_gps=-0.25,
+                anemometer_from_sonic_ranger=0.4,
+                temperature_from_sonic_ranger=0.0,
+                height_of_gps_from_station_ground=0.9,
+                sonic_ranger_from_gps=1.3,
+                export_bufr=True,
+                comment=None,
+                skipped_variables=[],
+                positions_update_timestamp_only=False,
+            )
+
+            station_configuration = StationConfiguration.load_toml(source_path)
+            self.assertEqual(
+                expected_configuration,
+                station_configuration,
+            )
+
+    def test_write_read(self):
+        with TemporaryDirectory() as temp_dir:
+            output_path = Path(temp_dir) / "UPE_L.toml"
+            src_station_config = StationConfiguration(
+                stid="UPE_L",
+                station_site="UPE_L",
+                project="Promice",
+                station_type="mobile",
+                wmo_id="04423",
+                barometer_from_gps=-0.25,
+                anemometer_from_sonic_ranger=0.4,
+                temperature_from_sonic_ranger=0.0,
+                height_of_gps_from_station_ground=0.9,
+                sonic_ranger_from_gps=1.3,
+                export_bufr=True,
+                comment=None,
+                skipped_variables=[],
+                positions_update_timestamp_only=False,
+            )
+            src_station_config.dump_toml(output_path)
+
+            read_station_config = StationConfiguration.load_toml(output_path)
+            self.assertEqual(
+                src_station_config,
+                read_station_config,
+            )
+
+    def test_read_station_config_mapping(self):
+        with TemporaryDirectory() as temp_dir:
+            station_config_root = Path(temp_dir) / "station_configs"
+            station_config_root.mkdir()
+            source_mapping = {
+                "UPE_L": get_station_configuration(stid="UPE_L"),
+                "UPE_R": get_station_configuration(stid="UPE_R"),
+            }
+            for stid, station_config in source_mapping.items():
+                station_config.dump_toml(station_config_root / f"{stid}.toml")
+
+            read_mapping = load_station_configuration_mapping(station_config_root)
+            self.assertDictEqual(
+                source_mapping,
+                read_mapping,
+            )
+
+    def test_write_station_config_mapping(self):
+        with TemporaryDirectory() as temp_dir:
+            station_config_root = Path(temp_dir) / "station_configs"
+            station_config_root.mkdir()
+            source_mapping = {
+                "UPE_L": get_station_configuration(stid="UPE_L"),
+                "UPE_R": get_station_configuration(stid="UPE_R"),
+            }
+
+            write_station_configuration_mapping(source_mapping, station_config_root)
+
+            read_mapping = load_station_configuration_mapping(station_config_root)
+            self.assertDictEqual(
+                source_mapping,
+                read_mapping,
+            )
+
+    def test_read_station_config_mapping_empty(self):
+        with TemporaryDirectory() as temp_dir:
+            station_config_root = Path(temp_dir) / "station_configs"
+            station_config_root.mkdir()
+
+            read_mapping = load_station_configuration_mapping(station_config_root)
+            self.assertDictEqual(
+                {},
+                read_mapping,
+            )
+
+    def test_read_station_config_mapping_ingore_filenames(self):
+        def test_read_station_config_mapping(self):
+            with TemporaryDirectory() as temp_dir:
+                station_config_root = Path(temp_dir) / "station_configs"
+                station_config_root.mkdir()
+                station_config = get_station_configuration(stid="UPE_L")
+                station_config.dump_toml(station_config_root / "a_custom_filename.toml")
+                expected_station_config_mapping = {station_config.stid: station_config}
+
+                read_mapping = load_station_configuration_mapping(station_config_root)
+                self.assertDictEqual(
+                    expected_station_config_mapping,
+                    read_mapping,
+                )
diff --git a/tests/utilities.py b/tests/utilities.py
new file mode 100644
index 00000000..742f6861
--- /dev/null
+++ b/tests/utilities.py
@@ -0,0 +1,61 @@
+import random
+import uuid
+
+from pypromice.postprocess.bufr_utilities import BUFR_TEMPLATES
+from pypromice.station_configuration import StationConfiguration
+
+STATION_TYPE_STRINGS = tuple(BUFR_TEMPLATES.keys())
+
+
+def get_station_configuration(**kwargs) -> StationConfiguration:
+    """
+    Create a StationConfiguration object with random values.
+
+    Parameters
+    ----------
+    kwargs : dict
+        Keyword arguments to providie explicit values for the StationConfiguration object.
+    Returns
+    -------
+    """
+    stid = kwargs.get("stid", str(uuid.uuid4()))
+    station_site = kwargs.get("station_site", str(uuid.uuid4()))
+    project = kwargs.get("project", str(uuid.uuid4()))
+    station_type = kwargs.get("station_type", random.choice(STATION_TYPE_STRINGS))
+    # WMO Station number <1024 for land stations
+    # https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_B/001002
+    wmo_id = kwargs.get("wmo_id", "{:05}".format(random.randint(0, 1023)))
+    barometer_from_gps = kwargs.get("barometer_from_gps", random.random() * 3)
+    anemometer_from_sonic_ranger = kwargs.get(
+        "anemometer_from_sonic_ranger", random.random() * 3
+    )
+    temperature_from_sonic_ranger = kwargs.get(
+        "temperature_from_sonic_ranger", random.random() * 3
+    )
+    height_of_gps_from_station_ground = kwargs.get(
+        "height_of_gps_from_station_ground", random.random() * 3
+    )
+    sonic_ranger_from_gps = kwargs.get("sonic_ranger_from_gps", random.random() * 3)
+    export_bufr = kwargs.get("export_bufr", random.random() > 0.5)
+    skipped_variables = kwargs.get("skipped_variables", [])
+    positions_update_timestamp_only = kwargs.get(
+        "positions_update_timestamp_only", random.random() > 0.5
+    )
+    station_relocation = kwargs.get("station_relocation", [])
+
+    return StationConfiguration(
+        stid=stid,
+        station_site=station_site,
+        project=project,
+        station_type=station_type,
+        wmo_id=wmo_id,
+        barometer_from_gps=barometer_from_gps,
+        anemometer_from_sonic_ranger=anemometer_from_sonic_ranger,
+        temperature_from_sonic_ranger=temperature_from_sonic_ranger,
+        height_of_gps_from_station_ground=height_of_gps_from_station_ground,
+        sonic_ranger_from_gps=sonic_ranger_from_gps,
+        export_bufr=export_bufr,
+        skipped_variables=skipped_variables,
+        positions_update_timestamp_only=positions_update_timestamp_only,
+        station_relocation=station_relocation,
+    )

From 828cba29d3b18c767f074ec1a71d5ffce4f3c491 Mon Sep 17 00:00:00 2001
From: Mads Christian Lund <maclu@geus.dk>
Date: Tue, 9 Jul 2024 09:38:33 +0200
Subject: [PATCH 11/16] Updated BUFRVariables with scales and descriptions

* Added detailed descriptions with references to the attributes in BUFRVariables
* Change the attribute order to align with the exported schema
* Changed variable roundings to align with the scales defined in the BUFR schemas:
  * Latitude and longitude is set to 5. Was 6
  * heightOfStationGroundAboveMeanSeaLevel is set to 1. Was 2
  * heightOfBarometerAboveMeanSeaLevel is set to to 1. Was 2
  * pressure is set to -1. Was 1. Note: The BUFRVariable unit is Pa and not hPA
  * airTemperature is set to 2. Was 1.
  * heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH is set to 2. Was 4
  * heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD is set to 2. Was 4
 * Added unit tests to test the roundings
* Updated existing unit tests to align with corrected precision
---
 src/pypromice/postprocess/bufr_utilities.py   | 81 ++++++++++++++++---
 .../unit/bufr_export/test_bufr_utilitites.py  | 34 ++++++++
 .../bufr_export/test_create_bufr_files.py     |  2 +-
 tests/unit/bufr_export/test_get_bufr.py       |  8 +-
 .../bufr_export/test_get_bufr_integration.py  | 12 +--
 5 files changed, 113 insertions(+), 24 deletions(-)

diff --git a/src/pypromice/postprocess/bufr_utilities.py b/src/pypromice/postprocess/bufr_utilities.py
index 00e036d8..8537e7f2 100644
--- a/src/pypromice/postprocess/bufr_utilities.py
+++ b/src/pypromice/postprocess/bufr_utilities.py
@@ -66,28 +66,81 @@ class BUFRVariables:
 
     """
 
-    wmo_id: str
+    # Station type: "mobile" or "land"
+    # ===============================
+    # Fixed land station schema: https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_D/307080
+    # Mobile station schema: https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_D/307090
+
     station_type: str
+
+    # WMO station identifier
+    # Land stations: https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_D/301090
+    # Mobile stations: https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_D/301092
+    # ======================================================================================================
+    wmo_id: str
     timestamp: datetime.datetime
-    relativeHumidity: float = attrs.field(converter=round_converter(0))
-    airTemperature: float = attrs.field(converter=round_converter(1))
-    pressure: float = attrs.field(converter=round_converter(1))
-    windDirection: float = attrs.field(converter=round_converter(0))
-    windSpeed: float = attrs.field(converter=round_converter(1))
-    latitude: float = attrs.field(converter=round_converter(6))
-    longitude: float = attrs.field(converter=round_converter(6))
+
+    # https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_B/005001
+    # Scale: 5, unit: degrees
+    # TODO: Test if eccodes does the rounding as well. The rounding is was 6 which is larger that the scale.
+    latitude: float = attrs.field(converter=round_converter(5))
+    # https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_B/006001
+    # Scale: 5, unit: degrees
+    longitude: float = attrs.field(converter=round_converter(5))
+
+    # https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_B/007030
+    # Scale: 1, unit: m
     heightOfStationGroundAboveMeanSeaLevel: float = attrs.field(
-        converter=round_converter(2)
+        converter=round_converter(1)
     )
-    #
+    # https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_B/007031
+    # Scale: 1, unit: m
     heightOfBarometerAboveMeanSeaLevel: float = attrs.field(
-        converter=round_converter(2),
+        converter=round_converter(1),
     )
+
+    # Pressure information
+    # ====================
+    # Definition table: https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_D/302031
+    # https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_B/007004
+    # Scale: -1, unit: Pa
+    pressure: float = attrs.field(converter=round_converter(-1))
+    # There are two other pressure variables in the template: 302001 and 010062.
+
+    # Basic synoptic "instantaneous" data
+    # ===================================
+    # Definition table: https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_D/302035
+    # This section only include the temperature and humidity data (302032).
+    # Precipitation and cloud data are currently ignored.
+    # https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_B/007032
+    # Scale: 2, unit: m
+    # This is the first appearance of this variable id.
     heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH: float = attrs.field(
-        converter=round_converter(4),
+        converter=round_converter(2),
     )
+    # https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_B/012101
+    # Scale: 2, unit: K
+    airTemperature: float = attrs.field(converter=round_converter(2))
+    # There is also a Dewpoint temperature in this template: 012103 which is currently unused.
+    # https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_B/012103
+    # Scale: 0, unit: %
+    relativeHumidity: float = attrs.field(converter=round_converter(0))
+
+    # Basic synoptic "period" data
+    # ============================
+    # Definition table: https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_D/302043
+    # Wind data: https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_D/302042
+    # Wind direction: https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_B/011001
+    # Scale: 0, unit: degrees
+    windDirection: float = attrs.field(converter=round_converter(0))
+    # Wind speed: https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_B/011002
+    # Scale: 1, unit: m/s
+    windSpeed: float = attrs.field(converter=round_converter(1))
+    # https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_B/007032
+    # Scale: 2, unit: m
+    # This is the 7th appearance of this variable id.
     heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD: float = attrs.field(
-        converter=round_converter(4)
+        converter=round_converter(2)
     )
 
     def as_series(self) -> pd.Series:
@@ -131,6 +184,7 @@ def __eq__(self, other: "BUFRVariables"):
 
 BUFR_TEMPLATES = {
     "mobile": {
+        # Template definition: https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_D/307090
         "unexpandedDescriptors": (307090),  # message template, "synopMobil"
         "edition": 4,  # latest edition
         "masterTableNumber": 0,
@@ -146,6 +200,7 @@ def __eq__(self, other: "BUFRVariables"):
         "compressedData": 0,
     },
     "land": {
+        # Template definition: https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_D/307080
         "unexpandedDescriptors": (307080),  # message template, "synopLand"
         "edition": 4,  # latest edition
         "masterTableNumber": 0,
diff --git a/tests/unit/bufr_export/test_bufr_utilitites.py b/tests/unit/bufr_export/test_bufr_utilitites.py
index 2b9a19b1..49ecc203 100644
--- a/tests/unit/bufr_export/test_bufr_utilitites.py
+++ b/tests/unit/bufr_export/test_bufr_utilitites.py
@@ -181,3 +181,37 @@ def test_nan_value_serialization(self):
             variables_src,
             variables_read,
         )
+
+    def test_precision(self):
+        """
+        Test if the BUFRVariable rounding configurations aligns with the BUFR format.
+
+        Use np.random.random() to generate high precision random values.
+        """
+        variables_src = BUFRVariables(
+            wmo_id="04464",
+            station_type="mobile",
+            timestamp=datetime.datetime(2023, 12, 19, 10, 0),
+            relativeHumidity=np.random.random(),
+            airTemperature=np.random.random(),
+            pressure=1000*np.random.random(),
+            windDirection=np.random.random(),
+            windSpeed=np.random.random(),
+            latitude=np.random.random(),
+            longitude=np.random.random(),
+            heightOfStationGroundAboveMeanSeaLevel=np.random.random(),
+            heightOfBarometerAboveMeanSeaLevel=np.random.random(),
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=np.random.random(),
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=np.random.random(),
+        )
+        with tempfile.TemporaryFile("w+b") as fp:
+            write_bufr_message(variables=variables_src, file=fp)
+            fp.seek(0)
+            variables_read = read_bufr_message(
+                fp=fp,
+            )
+
+        self.assertEqual(
+            variables_src,
+            variables_read,
+        )
diff --git a/tests/unit/bufr_export/test_create_bufr_files.py b/tests/unit/bufr_export/test_create_bufr_files.py
index f9cf935e..2cb25afc 100644
--- a/tests/unit/bufr_export/test_create_bufr_files.py
+++ b/tests/unit/bufr_export/test_create_bufr_files.py
@@ -32,7 +32,7 @@ def tearDown(self):
 
     def test_create_bufr_files(self):
         """
-        Teste the creation of bufr files and their output folder structure.
+        Test the creation of bufr files and their output folder structure.
         It does not test the content of the bufr files.
         """
         input_dir = self.temp_dir / "input"
diff --git a/tests/unit/bufr_export/test_get_bufr.py b/tests/unit/bufr_export/test_get_bufr.py
index 2af06c4c..edb93b5b 100644
--- a/tests/unit/bufr_export/test_get_bufr.py
+++ b/tests/unit/bufr_export/test_get_bufr.py
@@ -54,7 +54,7 @@ def test_bufr_variables_gcnet(self):
             stid=station_configuration.stid,
             station_configuration=station_configuration,
             relativeHumidity=69.0,
-            airTemperature=256.0,
+            airTemperature=255.95,
             pressure=77300.0,
             windDirection=149.0,
             windSpeed=14.9,
@@ -84,7 +84,7 @@ def test_bufr_variables_promice_v2(self):
             stid=station_configuration.stid,
             station_configuration=station_configuration,
             relativeHumidity=69.0,
-            airTemperature=256.0,
+            airTemperature=255.95,
             pressure=77300.0,
             windDirection=149.0,
             windSpeed=14.9,
@@ -114,7 +114,7 @@ def test_bufr_variables_promice_v3(self):
             stid=station_configuration.stid,
             station_configuration=station_configuration,
             relativeHumidity=69.0,
-            airTemperature=256.0,
+            airTemperature=255.95,
             pressure=77300.0,
             windDirection=149.0,
             windSpeed=14.9,
@@ -165,7 +165,7 @@ def test_bufr_variables_static_gps_elevation(self):
             station_type=station_config.station_type,
             timestamp=timestamp,
             relativeHumidity=1.0,
-            airTemperature=252.2,  # Converted to kelvin
+            airTemperature=252.15,  # Converted to kelvin
             pressure=199300.0,
             windDirection=32.0,
             windSpeed=5.3,
diff --git a/tests/unit/bufr_export/test_get_bufr_integration.py b/tests/unit/bufr_export/test_get_bufr_integration.py
index 64e76f74..fd755fd9 100644
--- a/tests/unit/bufr_export/test_get_bufr_integration.py
+++ b/tests/unit/bufr_export/test_get_bufr_integration.py
@@ -141,7 +141,7 @@ def test_get_bufr_has_new_data(self):
             # Newest measurement in tx_l3_test1.csv: 2023-12-07 23:00:00
             timestamp=datetime.datetime(2023, 12, 7, 23, 00),
             relativeHumidity=69,
-            airTemperature=256.0,
+            airTemperature=255.95,
             pressure=77300.0,
             windDirection=149,
             windSpeed=14.9,
@@ -180,7 +180,7 @@ def test_get_bufr_has_new_data_dont_store_position(self):
             # Newest measurement in tx_l3_test1.csv: 2023-12-07 23:00:00
             timestamp=datetime.datetime(2023, 12, 7, 23, 00),
             relativeHumidity=69,
-            airTemperature=256.0,
+            airTemperature=255.95,
             pressure=77300.0,
             windDirection=149,
             windSpeed=14.9,
@@ -260,7 +260,7 @@ def test_get_bufr_includes_datasets_not_in_latests_timestamps(self):
             # Newest measurement in tx_l3_test1.csv: 2023-12-07 23:00:00
             timestamp=datetime.datetime(2023, 12, 7, 23, 00),
             relativeHumidity=69,
-            airTemperature=256.0,
+            airTemperature=255.95,
             pressure=77300.0,
             windDirection=149,
             windSpeed=14.9,
@@ -321,7 +321,7 @@ def test_invalid_value_at_last_index(self):
             # Newest measurement in tx_l3_test1.csv: 2023-12-07 23:00:00
             timestamp=datetime.datetime(2023, 12, 7, 23, 00),
             relativeHumidity=69,
-            airTemperature=256.0,
+            airTemperature=255.95,
             pressure=np.nan,
             windDirection=149,
             windSpeed=14.9,
@@ -459,7 +459,7 @@ def test_ignore_newer_data_than_now_input(self):
             # Newest measurement in tx_l3_test1.csv: 2023-12-07 23:00:00 but now_timestamp is 2023-12-06
             timestamp=datetime.datetime(2023, 12, 6, 0, 0),
             relativeHumidity=82,
-            airTemperature=250.8,
+            airTemperature=250.85,
             pressure=77370.0,
             windDirection=153,
             windSpeed=10.4,
@@ -500,7 +500,7 @@ def test_land_station_export(self):
             # Newest measurement in tx_l3_test1.csv: 2023-12-07 23:00:00
             timestamp=datetime.datetime(2023, 12, 7, 23, 00),
             relativeHumidity=69,
-            airTemperature=256.0,
+            airTemperature=255.95,
             pressure=77300.0,
             windDirection=149,
             windSpeed=14.9,

From c6d15ea9b0bf2057e776c6f8c492a31797c585bb Mon Sep 17 00:00:00 2001
From: Mads Christian Lund <maclu@geus.dk>
Date: Tue, 9 Jul 2024 11:31:45 +0200
Subject: [PATCH 12/16] Increased the real_time_utilities rounding precisions

---
 .../postprocess/real_time_utilities.py        | 12 +++++-----
 tests/unit/bufr_export/test_get_bufr.py       | 16 ++++++-------
 .../bufr_export/test_get_bufr_integration.py  | 24 +++++++++----------
 .../bufr_export/test_realtime_utilitites.py   | 24 +++++++++----------
 4 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/src/pypromice/postprocess/real_time_utilities.py b/src/pypromice/postprocess/real_time_utilities.py
index 17efac92..2352f30d 100644
--- a/src/pypromice/postprocess/real_time_utilities.py
+++ b/src/pypromice/postprocess/real_time_utilities.py
@@ -72,7 +72,7 @@ def get_latest_data(
 
     # Apply smoothing to z_boom_u
     # require at least 2 hourly obs? Sometimes seeing once/day data for z_boom_u
-    df_limited = rolling_window(df_limited, "z_boom_u", "72H", 2, 1)
+    df_limited = rolling_window(df_limited, "z_boom_u", "72H", 2, 3)
 
     # limit to single most recent valid row (convert to series)
     s_current = df_limited.loc[last_valid_index]
@@ -149,9 +149,9 @@ def find_positions(df, time_limit):
     logger.info(f"last transmission: {df_limited.index.max()}")
 
     # Extrapolate recommended for altitude, optional for lat and lon.
-    df_limited, lat_valid = linear_fit(df_limited, "gps_lat", 6)
-    df_limited, lon_valid = linear_fit(df_limited, "gps_lon", 6)
-    df_limited, alt_valid = linear_fit(df_limited, "gps_alt", 1)
+    df_limited, lat_valid = linear_fit(df_limited, "gps_lat", 7)
+    df_limited, lon_valid = linear_fit(df_limited, "gps_lon", 7)
+    df_limited, alt_valid = linear_fit(df_limited, "gps_alt", 4)
 
     # If we have no valid lat, lon or alt data in the df_limited window, then interpolate
     # using full tx dataset.
@@ -162,9 +162,9 @@ def find_positions(df, time_limit):
             logger.info(f"----> Using full history for linear extrapolation: {k}")
             logger.info(f"first transmission: {df.index.min()}")
             if k == "gps_alt":
-                df, valid = linear_fit(df, k, 1)
+                df, valid = linear_fit(df, k, 2)
             else:
-                df, valid = linear_fit(df, k, 6)
+                df, valid = linear_fit(df, k, 7)
             check_valid_again[k] = valid
             if check_valid_again[k] is True:
                 df_limited[f"{k}_fit"] = df.loc[df_limited.index, f"{k}_fit"]
diff --git a/tests/unit/bufr_export/test_get_bufr.py b/tests/unit/bufr_export/test_get_bufr.py
index edb93b5b..a100cd55 100644
--- a/tests/unit/bufr_export/test_get_bufr.py
+++ b/tests/unit/bufr_export/test_get_bufr.py
@@ -61,9 +61,9 @@ def test_bufr_variables_gcnet(self):
             latitude=66.482488,
             longitude=-46.294266,
             heightOfStationGroundAboveMeanSeaLevel=2123.2,
-            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.6,
-            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.6,
-            heightOfBarometerAboveMeanSeaLevel=2125.25,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.59,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.59,
+            heightOfBarometerAboveMeanSeaLevel=2125.3,
         )
 
     def test_bufr_variables_promice_v2(self):
@@ -91,9 +91,9 @@ def test_bufr_variables_promice_v2(self):
             latitude=66.482488,
             longitude=-46.294266,
             heightOfStationGroundAboveMeanSeaLevel=2123.8,
-            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.2,
-            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.6,
-            heightOfBarometerAboveMeanSeaLevel=2124.45,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.19,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.59,
+            heightOfBarometerAboveMeanSeaLevel=2124.5,
         )
 
     def test_bufr_variables_promice_v3(self):
@@ -121,8 +121,8 @@ def test_bufr_variables_promice_v3(self):
             latitude=66.482488,
             longitude=-46.294266,
             heightOfStationGroundAboveMeanSeaLevel=2123.8,
-            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.2,
-            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.6,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.19,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.59,
             heightOfBarometerAboveMeanSeaLevel=2126,
         )
 
diff --git a/tests/unit/bufr_export/test_get_bufr_integration.py b/tests/unit/bufr_export/test_get_bufr_integration.py
index fd755fd9..1a21b3ee 100644
--- a/tests/unit/bufr_export/test_get_bufr_integration.py
+++ b/tests/unit/bufr_export/test_get_bufr_integration.py
@@ -149,8 +149,8 @@ def test_get_bufr_has_new_data(self):
             longitude=-46.29427,
             heightOfStationGroundAboveMeanSeaLevel=2123.7,
             heightOfBarometerAboveMeanSeaLevel=2124.7,
-            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.1,
-            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.6,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.09,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.59,
         )
         pd.testing.assert_series_equal(
             bufr_data.as_series(),
@@ -188,8 +188,8 @@ def test_get_bufr_has_new_data_dont_store_position(self):
             longitude=-46.29427,
             heightOfStationGroundAboveMeanSeaLevel=2123.7,
             heightOfBarometerAboveMeanSeaLevel=2124.7,
-            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.1,
-            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.6,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.09,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.59,
         )
         pd.testing.assert_series_equal(
             bufr_data.as_series(),
@@ -268,8 +268,8 @@ def test_get_bufr_includes_datasets_not_in_latests_timestamps(self):
             longitude=-46.29427,
             heightOfStationGroundAboveMeanSeaLevel=2123.7,
             heightOfBarometerAboveMeanSeaLevel=2124.7,
-            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.1,
-            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.6,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.09,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.59,
         )
         pd.testing.assert_series_equal(
             bufr_data.as_series(),
@@ -329,8 +329,8 @@ def test_invalid_value_at_last_index(self):
             longitude=-46.29427,
             heightOfStationGroundAboveMeanSeaLevel=2123.7,
             heightOfBarometerAboveMeanSeaLevel=2124.7,
-            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.1,
-            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.6,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.09,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.59,
         )
         pd.testing.assert_series_equal(
             bufr_data.as_series(),
@@ -467,8 +467,8 @@ def test_ignore_newer_data_than_now_input(self):
             longitude=-46.29426,
             heightOfStationGroundAboveMeanSeaLevel=2123.3,
             heightOfBarometerAboveMeanSeaLevel=2124.3,
-            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.1,
-            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.6,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.09,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.59,
         )
         pd.testing.assert_series_equal(
             bufr_data.as_series(),
@@ -508,8 +508,8 @@ def test_land_station_export(self):
             longitude=-46.29427,
             heightOfStationGroundAboveMeanSeaLevel=2123.7,
             heightOfBarometerAboveMeanSeaLevel=2124.7,
-            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.1,
-            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.6,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.09,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.59,
         )
         pd.testing.assert_series_equal(
             bufr_data.as_series(),
diff --git a/tests/unit/bufr_export/test_realtime_utilitites.py b/tests/unit/bufr_export/test_realtime_utilitites.py
index a7306a3f..1acdb5b7 100644
--- a/tests/unit/bufr_export/test_realtime_utilitites.py
+++ b/tests/unit/bufr_export/test_realtime_utilitites.py
@@ -50,10 +50,10 @@ def test_1(self):
                 "gps_lon": -46.294232,
                 "gps_alt": 2116.0,
                 "z_boom_u": 4.1901,
-                "gps_lat_fit": 66.482479,
-                "gps_lon_fit": -46.294269,
-                "gps_alt_fit": 2121.4,
-                "z_boom_u_smooth": 4.2,
+                "gps_lat_fit": 66.4824788,
+                "gps_lon_fit": -46.2942685,
+                "gps_alt_fit": 2121.4118,
+                "z_boom_u_smooth": 4.188,
             },
             name=datetime.datetime(2023, 12, 7, 6),
         )
@@ -94,10 +94,10 @@ def test_latest_data_row_is_invalid(self):
                 "gps_lon": -46.294335,
                 "gps_alt": 2125.0,
                 "z_boom_u": 4.1844,
-                "gps_lat_fit": 66.482483,
-                "gps_lon_fit": -46.294275,
-                "gps_alt_fit": 2123.3,
-                "z_boom_u_smooth": 4.2,
+                "gps_lat_fit": 66.4824828,
+                "gps_lon_fit": -46.2942753,
+                "gps_alt_fit": 2123.3088,
+                "z_boom_u_smooth": 4.187,
             },
             name=expected_output_timestamp,
         )
@@ -127,10 +127,10 @@ def test_latest_data_has_some_invalid_values(self):
                 "gps_lon": -46.294232,
                 "gps_alt": 2116.0,
                 "z_boom_u": 4.1901,
-                "gps_lat_fit": 66.482479,
-                "gps_lon_fit": -46.294269,
-                "gps_alt_fit": 2121.4,
-                "z_boom_u_smooth": 4.2,
+                "gps_lat_fit": 66.4824788,
+                "gps_lon_fit": -46.2942685,
+                "gps_alt_fit": 2121.4118,
+                "z_boom_u_smooth": 4.188,
             },
             name=datetime.datetime(2023, 12, 7, 6),
         )

From fb7c69270d05026eca29eaca2f132bd334fe8823 Mon Sep 17 00:00:00 2001
From: Mads Christian Lund <maclu@geus.dk>
Date: Thu, 18 Jul 2024 14:25:38 +0200
Subject: [PATCH 13/16] Updated get_bufr to separate station position from bufr

* The station position determination (AWS_latest_locations) is separated from the bufr file export
* Updated the unit tests

Corrected minimum data check to allow p_i or t_i to be nan

Renamed process_station parameters for readability
* Rename now_timestamp -> target_timestamp
* Rename time_limit -> linear_regression_time_limit

Applied black
---
 .../postprocess/create_bufr_files.py          |    4 +-
 src/pypromice/postprocess/get_bufr.py         |  328 ++--
 .../postprocess/real_time_utilities.py        |   30 +-
 src/pypromice/station_configuration.py        |   18 +-
 .../unit/bufr_export/test_bufr_utilitites.py  |    2 +-
 .../bufr_export/test_create_bufr_files.py     |    8 +-
 tests/unit/bufr_export/test_get_bufr.py       | 1392 ++++++++---------
 .../bufr_export/test_get_bufr_integration.py  |  104 +-
 .../bufr_export/test_realtime_utilitites.py   |   32 +
 tests/utilities.py                            |    2 +-
 10 files changed, 940 insertions(+), 980 deletions(-)

diff --git a/src/pypromice/postprocess/create_bufr_files.py b/src/pypromice/postprocess/create_bufr_files.py
index 2b9925c0..1b6b4b78 100644
--- a/src/pypromice/postprocess/create_bufr_files.py
+++ b/src/pypromice/postprocess/create_bufr_files.py
@@ -59,9 +59,9 @@ def create_bufr_files(
                 input_files=input_files,
                 store_positions=False,
                 positions_filepath=None,
-                time_limit=DEFAULT_LIN_REG_TIME_LIMIT,
+                linear_regression_time_limit=DEFAULT_LIN_REG_TIME_LIMIT,
                 timestamps_pickle_filepath=None,
-                now_timestamp=period,
+                target_timestamp=period,
                 station_configuration_mapping=station_configuration_mapping,
                 positions_seed_path=DEFAULT_POSITION_SEED_PATH,
                 break_on_error=break_on_error,
diff --git a/src/pypromice/postprocess/get_bufr.py b/src/pypromice/postprocess/get_bufr.py
index ab198236..c08b6b95 100644
--- a/src/pypromice/postprocess/get_bufr.py
+++ b/src/pypromice/postprocess/get_bufr.py
@@ -4,6 +4,13 @@
 Post-processing functions for AWS station data, such as converting PROMICE and GC-Net data files to WMO-compliant BUFR files
 
 """
+__all__ = [
+    "get_bufr",
+    "main",
+    "DEFAULT_POSITION_SEED_PATH",
+    "DEFAULT_LIN_REG_TIME_LIMIT",
+]
+
 import argparse
 import glob
 import logging
@@ -11,7 +18,7 @@
 import sys
 from datetime import datetime, timedelta
 from pathlib import Path
-from typing import List, Dict, Optional, Collection, Sequence, Mapping
+from typing import List, Dict, Optional, Collection, Sequence, Mapping, BinaryIO
 
 import numpy as np
 import pandas as pd
@@ -19,12 +26,6 @@
 from pypromice.postprocess.bufr_utilities import write_bufr_message, BUFRVariables
 from pypromice.postprocess.real_time_utilities import get_latest_data
 
-__all__ = [
-    "get_bufr",
-    "main",
-    "DEFAULT_POSITION_SEED_PATH",
-    "DEFAULT_LIN_REG_TIME_LIMIT",
-]
 
 from pypromice.station_configuration import (
     StationConfiguration,
@@ -35,79 +36,27 @@
 
 DEFAULT_POSITION_SEED_PATH = Path(__file__).parent.joinpath("positions_seed.csv")
 DEFAULT_LIN_REG_TIME_LIMIT = "91d"
+REQUIRED_KEYS = (
+    "t_i",
+    "p_i",
+    "rh_i",
+    "wdir_i",
+    "wspd_i",
+    "gps_lat_fit",
+    "gps_lon_fit",
+    "gps_alt_fit",
+    "z_boom_u_smooth",
+)
 
 
-def process_station(
-    file_path: Path,
-    output_path: Path,
-    now_timestamp: datetime,
-    latest_timestamp: Optional[datetime],
-    time_limit: str,
-    stid: str,
-    station_configuration: StationConfiguration,
-) -> Optional[Dict]:
-    df = load_data(file_path, now_timestamp)
-
-    # Select current data
-    latest_data = get_latest_data(
-        df,
-        lin_reg_time_limit=time_limit,
-    )
-
-    if latest_data is None:
-        logger.info("No valid instantaneous timestamps!")
-        return None
-
-    latest_data = filter_skipped_variables(
-        latest_data, vars_to_skip=station_configuration.skipped_variables
-    )
-
-    # Check that we have minimum required valid data
-    sufficient_wx_data, sufficient_position_data = min_data_check(latest_data)
-
-    station_position = dict()
-    station_position["timestamp"] = latest_data.name
-    if sufficient_position_data:
-        station_position["lon"] = latest_data.get("gps_lon_fit")
-        station_position["lat"] = latest_data.get("gps_lat_fit")
-        station_position["alt"] = latest_data.get("gps_alt_fit")
-    else:
-        logger.warning("Insufficient position data")
-        # Don't use any position attributes from latest_data
-        station_position["lon"] = None
-        station_position["lat"] = None
-        station_position["alt"] = None
-        return station_position
-
-    if station_configuration.export_bufr:
-        if not sufficient_wx_data:
-            logger.warning(f"Failed min data wx {stid}")
-            return station_position
-
-        # Store current timest
-        if latest_data.name <= latest_timestamp:
-            logger.info(f"No new data {latest_data.name} <= {latest_timestamp}")
-            return station_position
-
-        # Construct and export BUFR file
-        bufr_variables = get_bufr_variables(
-            data=latest_data,
-            station_configuration=station_configuration,
-        )
-        with output_path.open("bw") as fp:
-            write_bufr_message(variables=bufr_variables, file=fp)
-
-    return station_position
-
-
-def load_data(file_path: Path, now_timestamp: datetime) -> pd.DataFrame:
+def load_data(file_path: Path, latest_timestamp: datetime) -> pd.DataFrame:
     """
-    Read AWS data from csv file using time as index and filter all rows after now_timestamp
+    Read AWS data from csv file using time as index and filter all rows after latest_timestamp
 
     Parameters
     ----------
     file_path
-    now_timestamp
+    latest_timestamp
 
     Returns
     -------
@@ -119,7 +68,7 @@ def load_data(file_path: Path, now_timestamp: datetime) -> pd.DataFrame:
         .set_index("time")
         .sort_index()
     )
-    df = df[:now_timestamp]
+    df = df[:latest_timestamp]
     return df
 
 
@@ -129,11 +78,11 @@ def get_bufr(
     positions_filepath: Optional[Path],
     timestamps_pickle_filepath: Optional[Path],
     station_configuration_mapping: Mapping[str, StationConfiguration],
-    now_timestamp: Optional[datetime] = None,
+    target_timestamp: Optional[datetime] = None,
     positions_seed_path: Optional[Path] = None,
-    earliest_timestamp: datetime = None,
+    time_window_length: timedelta = timedelta(days=2),
     store_positions: bool = False,
-    time_limit: str = "91d",
+    linear_regression_time_limit: str = "91d",
     break_on_error: bool = False,
 ):
     """
@@ -148,38 +97,41 @@ def get_bufr(
     bufr_out
         Path to the BUFR out directory.
     input_files
-        List of L3 csv file paths.
+        List of csv file paths.
     positions_filepath
         Path to write latest positions. Used to retrieve a static set of positions to register stations with DMI/WMO
     timestamps_pickle_filepath
         Path to pickle file used for storing latest timestamp
     station_configuration_mapping
         Mapping of station id to StationConfiguration object
-    now_timestamp
-        get_bufr will export the latest data before now_timestamp. Default datetime.utcnow()
+    target_timestamp
+        get_bufr will export the latest data before target_timestamp. Default datetime.utcnow()
     positions_seed_path
         Path to csv file with position data used as default values for the output position.
-    earliest_timestamp
-        The earliest allowed timestamp for data to be included in the output. Default now_timestamp - 2 days
+    time_window_length
+        The length of the time window to consider for the latest data. Default 2 days
     store_positions
         Flag determine if latest positions are exported.
-    time_limit
+    linear_regression_time_limit
         Previous time to limit dataframe before applying linear regression.
     break_on_error
         If True, the function will raise an exception if an error occurs during processing.
 
     """
-    if now_timestamp is None:
-        now_timestamp = datetime.utcnow()
+    if target_timestamp is None:
+        target_timestamp = datetime.utcnow()
 
-    if earliest_timestamp is None:
-        earliest_timestamp = now_timestamp - timedelta(days=2)
+    # if earliest_timestamp is None:
+    #     earliest_timestamp = now_timestamp - timedelta(days=2)
 
     # Prepare (latest) positions
     positions = dict()
     if positions_seed_path:
         positions_seed = pd.read_csv(
-            positions_seed_path, index_col=0, delimiter=",", parse_dates=["timestamp"]
+            positions_seed_path,
+            index_col="stid",
+            delimiter=",",
+            parse_dates=["timestamp"],
         ).to_dict(orient="index")
         logger.info(f"Seed positions for {positions_seed.keys()}")
         positions.update(positions_seed)
@@ -195,9 +147,6 @@ def get_bufr(
         logger.info("latest_timestamps.pickle not found!")
         latest_timestamps = {}
 
-    # Initiate a new dict for current timestamps
-    current_timestamps = {}
-
     # Setup diagnostic lists (logger.info at end)
     skipped = []
     no_recent_data = []
@@ -220,42 +169,60 @@ def get_bufr(
 
         output_path = bufr_out / f"{stid}.bufr"
         logger.info(f"Generating {output_path} from {file_path}")
-        latest_timestamp = latest_timestamps.get(stid, earliest_timestamp)
-        latest_timestamp = max(earliest_timestamp, latest_timestamp)
+
+        time_window_start = target_timestamp - time_window_length
+        # Use only newer data than the latest timestamp
+        if stid in latest_timestamps:
+            time_window_start = max(latest_timestamps[stid], time_window_start)
 
         try:
-            station_position = process_station(
-                file_path=file_path,
-                output_path=output_path,
-                now_timestamp=now_timestamp,
-                latest_timestamp=latest_timestamp,
-                time_limit=time_limit,
-                stid=stid,
-                station_configuration=station_configuration,
-            )
-        except Exception:
-            logger.exception(f"Failed processing {stid}")
-            if break_on_error:
-                raise
-            continue
+            input_data = load_data(file_path, target_timestamp)
 
-        if station_position is None:
-            logger.warning(f"No position information available for {stid}")
+            # Select current data
+            latest_data = get_latest_data(
+                input_data,
+                lin_reg_time_limit=linear_regression_time_limit,
+                vars_to_skip=station_configuration.skipped_variables,
+            )
+            if latest_data is None:
+                logger.info("No valid instantaneous timestamps!")
+                continue
 
-        else:
+            # Create station positions
+            station_position = get_station_positions(latest_data)
             if stid not in positions:
                 positions[stid] = dict()
-
             if station_configuration.positions_update_timestamp_only:
                 positions[stid]["timestamp"] = station_position["timestamp"]
             else:
                 positions[stid].update(station_position)
 
+            # Create BUFR File
+            if (
+                station_configuration.export_bufr
+                and latest_data.name > time_window_start
+            ):
+                latest_timestamps[stid] = latest_data.name
+                bufr_variables = get_bufr_variables(latest_data, station_configuration)
+                if bufr_variables:
+                    with output_path.open("bw") as output_file:
+                        write_bufr_message(bufr_variables, output_file)
+            else:
+                logger.info(f"No new data {latest_data.name} <= {time_window_start}")
+
+        except Exception:
+            logger.exception(f"Failed processing {stid}")
+            if output_path.exists():
+                output_path.unlink()
+            if break_on_error:
+                raise
+            continue
+
     # Write the most recent timestamps back to the pickle on disk
     logger.info(f"writing latest_timestamps to {timestamps_pickle_filepath}")
     if timestamps_pickle_filepath:
         with timestamps_pickle_filepath.open("wb") as handle:
-            pickle.dump(current_timestamps, handle, protocol=pickle.HIGHEST_PROTOCOL)
+            pickle.dump(latest_timestamps, handle, protocol=pickle.HIGHEST_PROTOCOL)
 
     if store_positions:
         positions_df = pd.DataFrame.from_dict(
@@ -289,38 +256,34 @@ def get_bufr(
     logger.info("--------------------------------")
 
 
-def filter_skipped_variables(
-    row: pd.Series, vars_to_skip: Collection[str]
-) -> pd.Series:
-    """
-    Mutate input series by setting var_to_skip to np.nan
-
-    Parameters
-    ----------
-    row
-    vars_to_skip
-        List of variable names to be skipped
-
-    Returns
-    -------
-    Input series
-
-    """
-    vars_to_skip = set(row.keys()) & set(vars_to_skip)
-    for var_key in vars_to_skip:
-        row[var_key] = np.nan
-        logger.info("----> Skipping var: {}".format(var_key))
-    return row
+def get_station_positions(latest_data: pd.Series) -> Dict:
+    station_position = dict()
+    station_position["timestamp"] = latest_data.name
+    station_position["lat"] = latest_data["gps_lat_fit"]
+    station_position["lon"] = latest_data["gps_lon_fit"]
+    station_position["alt"] = latest_data["gps_alt_fit"]
+    if any(
+        [
+            pd.isna(station_position["lat"]),
+            pd.isna(station_position["lon"]),
+            pd.isna(station_position["alt"]),
+        ]
+    ):
+        logger.warning("Insufficient position data")
+        station_position["lat"] = None
+        station_position["lon"] = None
+        station_position["alt"] = None
+    return station_position
 
 
 def get_bufr_variables(
     data: pd.Series,
     station_configuration: StationConfiguration,
-) -> BUFRVariables:
+) -> Optional[BUFRVariables]:
     """
     Helper function for converting our variables to the variables needed for bufr export.
 
-    Raises AttributeError if station_configuration dont have the minimum dimension fields since they are required to determine barometer heights.
+    Raises AttributeError if station_configuration don't have the minimum dimension fields since they are required to determine barometer heights.
     * height_of_gps_from_station_ground
     * barometer_from_gps
 
@@ -329,7 +292,7 @@ def get_bufr_variables(
     Parameters
     ----------
     data
-        Series with processed l3 variables from get_latest_datas
+        Series with processed variables from get_latest_datas
 
     station_configuration
 
@@ -339,6 +302,24 @@ def get_bufr_variables(
 
     """
 
+    if not all(key in data.index for key in REQUIRED_KEYS):
+        raise ValueError(
+            f"Failed to process BUFRVariables. Missing required keys: {REQUIRED_KEYS}"
+        )
+
+    # Check that we have minimum required fields to proceed with writing to BUFR
+    # Always require minimum a valid air temp or a valid pressure.
+    # If both air temp and pressure are nan, do not submit.
+    # This will allow the case of having only one or the other.
+    if data[["t_i", "p_i"]].isna().all():
+        logger.warning("Failed to process BUFRVariables - insufficient data")
+        return None
+
+    # Always require a valid position data
+    if data[["gps_lat_fit", "gps_lon_fit", "gps_alt_fit"]].isna().any():
+        logger.warning("Failed to process BUFRVariables - insufficient position data")
+        return None
+
     if station_configuration.height_of_gps_from_station_ground is None:
         raise AttributeError(
             "height_of_gps_from_station_ground is required for BUFR export"
@@ -362,7 +343,6 @@ def get_bufr_variables(
         height_of_gps_above_mean_sea_level + station_configuration.barometer_from_gps
     )
 
-
     if station_configuration.temperature_from_sonic_ranger is None:
         heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH = np.nan
     else:
@@ -402,59 +382,6 @@ def get_bufr_variables(
     return output_row
 
 
-def min_data_check(s):
-    """Check that we have minimum required fields to proceed with writing to BUFR
-    For wx vars, we currently require both air temp and pressure to be non-NaN.
-    If you know a specific var is reporting bad data, you can ignore just that var
-    using the vars_to_skip dict in wmo_config.
-
-    Parameters
-    ----------
-    s : pandas series
-        The current obset we are working with (for BUFR submission)
-
-    Returns
-    -------
-    min_data_wx_result : bool
-        True (default), the test for min wx data passed. False, the test failed.
-    min_data_pos_result : bool
-        True (default), the test for min position data passed. False, the test failed.
-    """
-    min_data_wx_result = True
-    min_data_pos_result = True
-
-    # Can use pd.isna() or math.isnan() below...
-
-    # Always require valid air temp and valid pressure (both must be non-nan)
-    # if (pd.isna(s['t_i']) is False) and (pd.isna(s['p_i']) is False):
-    #     pass
-    # else:
-    #     print('----> Failed min_data_check for air temp and pressure!')
-    #     min_data_wx_result = False
-
-    # If both air temp and pressure are nan, do not submit.
-    # This will allow the case of having only one or the other.
-    if (pd.isna(s["t_i"]) is True) and (pd.isna(s["p_i"]) is True):
-        logger.warning("----> Failed min_data_check for air temp and pressure!")
-        min_data_wx_result = False
-
-    # Missing just elevation OK
-    # if (pd.isna(s['gps_lat_fit']) is False) and (pd.isna(s['gps_lon_fit']) is False):
-    #     pass
-    # Require all three: lat, lon, elev
-    if (
-        (pd.isna(s["gps_lat_fit"]) is False)
-        and (pd.isna(s["gps_lon_fit"]) is False)
-        and (pd.isna(s["gps_alt_fit"]) is False)
-    ):
-        pass
-    else:
-        logger.warning("----> Failed min_data_check for position!")
-        min_data_pos_result = False
-
-    return min_data_wx_result, min_data_pos_result
-
-
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument(
@@ -473,6 +400,7 @@ def main():
         help="Path to write AWS_latest_locations.csv file.",
     )
     parser.add_argument(
+        "--linear_regression_time_limit",
         "--time-limit",
         default=DEFAULT_LIN_REG_TIME_LIMIT,
         type=str,
@@ -481,12 +409,11 @@ def main():
     )
     parser.add_argument(
         "--input_files",
-        "--l3-filepath",
         "-i",
         type=Path,
         nargs="+",
         required=True,
-        help="Path to L3 tx .csv files. Can be direct paths or glob patterns",
+        help="Path to input files .csv files. Can be direct paths or glob patterns",
     )
     parser.add_argument(
         "--bufr-out",
@@ -502,7 +429,7 @@ def main():
         help="Path to the latest_timestamps.pickle file.",
     )
     parser.add_argument(
-        "--station_configuration_root",
+        "--station_configurations_root",
         type=Path,
         required=True,
         help="Path to root directory containing station configuration toml files",
@@ -515,7 +442,8 @@ def main():
         help="Path to csv file with seed values for output positions.",
     )
     parser.add_argument(
-        "--latest_timestamp",
+        "--target_timestamp",
+        "--now-timestamp",
         default=datetime.utcnow(),
         type=pd.Timestamp,
         help="Timestamp used to determine latest data. Default utcnow.",
@@ -542,17 +470,19 @@ def main():
             # The input path might be a glob pattern
             input_files += map(Path, glob.glob(path.as_posix()))
 
-    station_configuration_mapping = load_station_configuration_mapping(args.station_configuration_root)
+    station_configuration_mapping = load_station_configuration_mapping(
+        args.station_configurations_root
+    )
 
     get_bufr(
         bufr_out=args.bufr_out,
         input_files=input_files,
         store_positions=args.store_positions,
         positions_filepath=args.positions_filepath,
-        time_limit=args.time_limit,
+        linear_regression_time_limit=args.linear_regression_time_limit,
         timestamps_pickle_filepath=args.timestamps_pickle_filepath,
-        now_timestamp=args.latest_timestamp,
-        station_configuration_mapping=args.station_configuration_mapping,
+        target_timestamp=args.target_timestamp,
+        station_configuration_mapping=station_configuration_mapping,
         positions_seed_path=args.position_seed,
     )
 
diff --git a/src/pypromice/postprocess/real_time_utilities.py b/src/pypromice/postprocess/real_time_utilities.py
index 2352f30d..f79f9ca0 100644
--- a/src/pypromice/postprocess/real_time_utilities.py
+++ b/src/pypromice/postprocess/real_time_utilities.py
@@ -7,7 +7,7 @@
 
 """
 import logging
-from typing import Optional
+from typing import Optional, Collection
 
 import numpy as np
 import pandas as pd
@@ -22,6 +22,7 @@
 def get_latest_data(
     df: pd.DataFrame,
     lin_reg_time_limit: str,
+    vars_to_skip: Optional[Collection[str]] = None,
 ) -> Optional[pd.Series]:
     """
     Determine instantaneous values for the latest valid timestamp in the input dataframe
@@ -77,9 +78,36 @@ def get_latest_data(
     # limit to single most recent valid row (convert to series)
     s_current = df_limited.loc[last_valid_index]
 
+    if vars_to_skip is not None:
+        s_current = filter_skipped_variables(s_current, vars_to_skip)
+
     return s_current
 
 
+def filter_skipped_variables(
+    row: pd.Series, vars_to_skip: Collection[str]
+) -> pd.Series:
+    """
+    Mutate input series by setting var_to_skip to np.nan
+
+    Parameters
+    ----------
+    row
+    vars_to_skip
+        List of variable names to be skipped
+
+    Returns
+    -------
+    Input series
+
+    """
+    vars_to_skip = set(row.keys()) & set(vars_to_skip)
+    for var_key in vars_to_skip:
+        row[var_key] = np.nan
+        logger.info("----> Skipping var: {}".format(var_key))
+    return row
+
+
 def rolling_window(df, column, window, min_periods, decimals) -> pd.DataFrame:
     """Apply a rolling window (smoothing) to the input column
 
diff --git a/src/pypromice/station_configuration.py b/src/pypromice/station_configuration.py
index 34e85ff3..fb8d5439 100644
--- a/src/pypromice/station_configuration.py
+++ b/src/pypromice/station_configuration.py
@@ -57,14 +57,14 @@ def as_dict(self) -> Dict:
 
 
 def load_station_configuration_mapping(
-    configuration_root_dir: Path,
+    configurations_root_dir: Path,
 ) -> Mapping[str, StationConfiguration]:
     """
-    Load station configurations from toml files in configuration_root_dir
+    Load station configurations from toml files in configurations_root_dir
 
     Parameters
     ----------
-    configuration_root_dir
+    configurations_root_dir
         Root directory containing toml files
 
     Returns
@@ -74,26 +74,26 @@ def load_station_configuration_mapping(
     """
     return {
         config_file.stem: StationConfiguration(**toml.load(config_file))
-        for config_file in configuration_root_dir.glob("*.toml")
+        for config_file in configurations_root_dir.glob("*.toml")
     }
 
 
 def write_station_configuration_mapping(
     station_configurations: Mapping[str, StationConfiguration],
-    configuration_root_dir: Path,
+    configurations_root_dir: Path,
 ) -> None:
     """
-    Write station configurations to toml files in configuration_root_dir
+    Write station configurations to toml files in configurations_root_dir
 
     Parameters
     ----------
     station_configurations
         Mapping from stid to StationConfiguration
-    configuration_root_dir
+    configurations_root_dir
         Output directory
 
     """
-    configuration_root_dir.mkdir(parents=True, exist_ok=True)
+    configurations_root_dir.mkdir(parents=True, exist_ok=True)
     for stid, station_configuration in station_configurations.items():
-        with (configuration_root_dir / f"{stid}.toml").open("w") as fp:
+        with (configurations_root_dir / f"{stid}.toml").open("w") as fp:
             toml.dump(station_configuration.as_dict(), fp)
diff --git a/tests/unit/bufr_export/test_bufr_utilitites.py b/tests/unit/bufr_export/test_bufr_utilitites.py
index 49ecc203..bd9ec586 100644
--- a/tests/unit/bufr_export/test_bufr_utilitites.py
+++ b/tests/unit/bufr_export/test_bufr_utilitites.py
@@ -194,7 +194,7 @@ def test_precision(self):
             timestamp=datetime.datetime(2023, 12, 19, 10, 0),
             relativeHumidity=np.random.random(),
             airTemperature=np.random.random(),
-            pressure=1000*np.random.random(),
+            pressure=1000 * np.random.random(),
             windDirection=np.random.random(),
             windSpeed=np.random.random(),
             latitude=np.random.random(),
diff --git a/tests/unit/bufr_export/test_create_bufr_files.py b/tests/unit/bufr_export/test_create_bufr_files.py
index 2cb25afc..1b79b421 100644
--- a/tests/unit/bufr_export/test_create_bufr_files.py
+++ b/tests/unit/bufr_export/test_create_bufr_files.py
@@ -56,7 +56,7 @@ def test_create_bufr_files(self):
         }
         write_station_configuration_mapping(
             station_configurations=station_configuration_mapping,
-            configuration_root_dir=station_configuration_root,
+            configurations_root_dir=station_configuration_root,
         )
 
         create_bufr_files(
@@ -110,7 +110,7 @@ def test_get_bufr_from_empty_data_file_raises_error(self):
         )
         write_station_configuration_mapping(
             station_configurations={station_configuration.stid: station_configuration},
-            configuration_root_dir=station_configuration_root,
+            configurations_root_dir=station_configuration_root,
         )
 
         with self.assertRaises(ValueError):
@@ -141,7 +141,7 @@ def test_get_bufr_continues_when_break_on_error_is_false(self):
                 "THU_L2": get_station_configuration(stid="THU_L2", export_bufr=True),
                 "KAN_Lv3": get_station_configuration(stid="KAN_Lv3", export_bufr=True),
             },
-            configuration_root_dir=station_configuration_root,
+            configurations_root_dir=station_configuration_root,
         )
         expected_compiled_output_file = compiled_output_dir / "geus_20231206T0000.bufr"
         expected_individual_output_dir = individual_output_root / "20231206T0000"
@@ -184,7 +184,7 @@ def test_get_bufr_where_period_does_not_exist(self):
         )
         write_station_configuration_mapping(
             station_configurations={station_configuration.stid: station_configuration},
-            configuration_root_dir=station_configuration_root,
+            configurations_root_dir=station_configuration_root,
         )
 
         create_bufr_files(
diff --git a/tests/unit/bufr_export/test_get_bufr.py b/tests/unit/bufr_export/test_get_bufr.py
index a100cd55..83b650b3 100644
--- a/tests/unit/bufr_export/test_get_bufr.py
+++ b/tests/unit/bufr_export/test_get_bufr.py
@@ -1,29 +1,23 @@
 import datetime
 import logging
-import pickle
+import random
 import sys
-import unittest
-import uuid
+import tempfile
+from io import BufferedWriter
 from pathlib import Path
-from tempfile import TemporaryDirectory
 from unittest import TestCase, mock
 
-import numpy as np
 import pandas as pd
 
 from pypromice.postprocess.bufr_utilities import BUFRVariables
 from pypromice.postprocess.get_bufr import (
-    process_station,
-    get_bufr,
+    get_station_positions,
     get_bufr_variables,
+    REQUIRED_KEYS,
+    get_bufr,
 )
-from pypromice.station_configuration import (
-    StationConfiguration,
-)
-from tests.unit.bufr_export.test_get_bufr_integration import (
-    DATA_DIR,
-    run_get_bufr,
-)
+from pypromice.station_configuration import StationConfiguration
+from tests.utilities import get_station_configuration
 
 logging.basicConfig(
     stream=sys.stdout,
@@ -31,12 +25,106 @@
     level=logging.WARNING,
 )
 
-MOCK_BASE_STR = "pypromice.postprocess.get_bufr.{}"
 
+class GetStationPositionsTestCase(TestCase):
+    def test_all_data_available(self):
+        """
+        Test the get_station_positions function
+        """
+        timestamp = pd.to_datetime("2024-03-01 00:00:00")
+        latest_data = pd.Series(
+            name=timestamp,
+            data={
+                "gps_lat_fit": 78.52901,
+                "gps_lon_fit": -56.8450358,
+                "gps_alt_fit": 1968.561,
+            },
+        )
+
+        positions = get_station_positions(latest_data=latest_data)
+
+        self.assertDictEqual(
+            positions,
+            dict(
+                timestamp=timestamp,
+                lat=78.52901,
+                lon=-56.8450358,
+                alt=1968.561,
+            ),
+        )
+
+    def test_missing_data(self):
+        """
+        Test the get_station_positions function with missing data
+        """
+        timestamp = pd.to_datetime("2024-03-01 00:00:00")
+        latest_data = pd.Series(
+            name=timestamp,
+            data={
+                "gps_lat_fit": 78.52901,
+                "gps_lon_fit": -56.8450358,
+            },
+        )
+
+        with self.assertRaises(KeyError):
+            get_station_positions(latest_data=latest_data)
+
+    def test_nan_latitude(self):
+        """
+        get_station_positions shall discard all position data if latitude is NaN
+        """
+        timestamp = pd.to_datetime("2024-03-01 00:00:00")
+        latest_data = pd.Series(
+            name=timestamp,
+            data={
+                "gps_lat_fit": float("nan"),
+                "gps_lon_fit": -56.8450358,
+                "gps_alt_fit": 1968.561,
+            },
+        )
 
-class BufrVariablesTestCase(TestCase):
+        positions = get_station_positions(latest_data=latest_data)
+
+        self.assertDictEqual(
+            positions,
+            dict(
+                timestamp=timestamp,
+                lat=None,
+                lon=None,
+                alt=None,
+            ),
+        )
+
+    def test_nan_altitude(self):
+        """
+        get_station_positions shall discard all position data if altitude is NaN
+        """
+        timestamp = pd.to_datetime("2024-03-01 00:00:00")
+        latest_data = pd.Series(
+            name=timestamp,
+            data={
+                "gps_lat_fit": 78.52901,
+                "gps_lon_fit": -56.8450358,
+                "gps_alt_fit": float("nan"),
+            },
+        )
+
+        positions = get_station_positions(latest_data=latest_data)
+
+        self.assertDictEqual(
+            positions,
+            dict(
+                timestamp=timestamp,
+                lat=None,
+                lon=None,
+                alt=None,
+            ),
+        )
+
+
+class TestGetBufrVariablesTestCase(TestCase):
     def test_bufr_variables_gcnet(self):
-        station_configuration = StationConfiguration(
+        config = StationConfiguration(
             stid="DY2",
             station_site="DY2",
             project="GC-Net",
@@ -49,85 +137,54 @@ def test_bufr_variables_gcnet(self):
             sonic_ranger_from_gps=0.15,
             export_bufr=True,
         )
-
-        self._test_bufr_variables(
-            stid=station_configuration.stid,
-            station_configuration=station_configuration,
-            relativeHumidity=69.0,
-            airTemperature=255.95,
-            pressure=77300.0,
-            windDirection=149.0,
-            windSpeed=14.9,
-            latitude=66.482488,
-            longitude=-46.294266,
-            heightOfStationGroundAboveMeanSeaLevel=2123.2,
-            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.59,
-            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.59,
-            heightOfBarometerAboveMeanSeaLevel=2125.3,
-        )
-
-    def test_bufr_variables_promice_v2(self):
-        station_configuration = StationConfiguration(
-            stid="NUK_L",
-            station_site="NUK_L",
-            project="Promice",
-            station_type="mobile",
-            wmo_id="04403",
-            barometer_from_gps=-0.25,
-            anemometer_from_sonic_ranger=0.4,
-            temperature_from_sonic_ranger=0.0,
-            height_of_gps_from_station_ground=0.9,
-            sonic_ranger_from_gps=1.3,
-            export_bufr=True,
+        timestamp = pd.to_datetime("2024-03-01 00:00:00")
+        data = pd.Series(
+            name=timestamp,
+            data={
+                "t_i": -12.5,
+                "p_i": 3.1,
+                "rh_i": 0.5,
+                "wspd_i": 2.5,
+                "wdir_i": 182.1,
+                "z_boom_u_smooth": 1.6,
+                "gps_lat_fit": 78.52901,
+                "gps_lon_fit": -56.8450358,
+                "gps_alt_fit": 1968.561,
+            },
         )
-        self._test_bufr_variables(
-            stid=station_configuration.stid,
-            station_configuration=station_configuration,
-            relativeHumidity=69.0,
-            airTemperature=255.95,
-            pressure=77300.0,
-            windDirection=149.0,
-            windSpeed=14.9,
-            latitude=66.482488,
-            longitude=-46.294266,
-            heightOfStationGroundAboveMeanSeaLevel=2123.8,
-            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.19,
-            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.59,
-            heightOfBarometerAboveMeanSeaLevel=2124.5,
+        expected_bufr_variables = BUFRVariables(
+            wmo_id=config.wmo_id,
+            station_type=config.station_type,
+            timestamp=timestamp,
+            relativeHumidity=data.rh_i,
+            airTemperature=data.t_i + 273.15,
+            pressure=100310,
+            windDirection=data.wdir_i,
+            windSpeed=data.wspd_i,
+            latitude=data.gps_lat_fit,
+            longitude=data.gps_lon_fit,
+            heightOfStationGroundAboveMeanSeaLevel=data.gps_alt_fit
+            - config.height_of_gps_from_station_ground,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=data.z_boom_u_smooth
+            + config.temperature_from_sonic_ranger,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=data.z_boom_u_smooth
+            + config.anemometer_from_sonic_ranger,
+            heightOfBarometerAboveMeanSeaLevel=data.gps_alt_fit
+            + config.barometer_from_gps,
+        )
+
+        bufr_variables = get_bufr_variables(
+            data=data,
+            station_configuration=config,
         )
 
-    def test_bufr_variables_promice_v3(self):
-        station_configuration = StationConfiguration(
-            stid="QAS_Mv3",
-            station_site="QAS_M",
-            project="Promice",
-            station_type="mobile",
-            wmo_id="04441",
-            barometer_from_gps=1.3,
-            anemometer_from_sonic_ranger=0.4,
-            temperature_from_sonic_ranger=0.0,
-            height_of_gps_from_station_ground=0.9,
-            sonic_ranger_from_gps=1.3,
-            export_bufr=True,
-        )
-        self._test_bufr_variables(
-            stid=station_configuration.stid,
-            station_configuration=station_configuration,
-            relativeHumidity=69.0,
-            airTemperature=255.95,
-            pressure=77300.0,
-            windDirection=149.0,
-            windSpeed=14.9,
-            latitude=66.482488,
-            longitude=-46.294266,
-            heightOfStationGroundAboveMeanSeaLevel=2123.8,
-            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=4.19,
-            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=4.59,
-            heightOfBarometerAboveMeanSeaLevel=2126,
+        pd.testing.assert_series_equal(
+            bufr_variables.as_series(),
+            expected_bufr_variables.as_series(),
         )
 
     def test_bufr_variables_static_gps_elevation(self):
-        timestamp = datetime.datetime.now()
+        timestamp = pd.to_datetime("2024-03-01 00:00:00")
         data = pd.Series(
             data=dict(
                 rh_i=0.93,
@@ -144,7 +201,7 @@ def test_bufr_variables_static_gps_elevation(self):
             ),
             name=timestamp,
         )
-        station_config = StationConfiguration(
+        config = StationConfiguration(
             stid="A_STID",
             station_type="land",
             wmo_id="4201",
@@ -159,10 +216,9 @@ def test_bufr_variables_static_gps_elevation(self):
         # The elevations should be determined from the static variable
         expected_station_ground_elevation = 17.5 - 0.9
         expected_barometer_elevation = 17.5 + 1.3
-
         expected_bufr_variables = BUFRVariables(
-            wmo_id=station_config.wmo_id,
-            station_type=station_config.station_type,
+            wmo_id=config.wmo_id,
+            station_type=config.station_type,
             timestamp=timestamp,
             relativeHumidity=1.0,
             airTemperature=252.15,  # Converted to kelvin
@@ -174,25 +230,25 @@ def test_bufr_variables_static_gps_elevation(self):
             heightOfStationGroundAboveMeanSeaLevel=expected_station_ground_elevation,
             heightOfBarometerAboveMeanSeaLevel=expected_barometer_elevation,
             # The sensor heights are ignored since the necessary dimension values are missing
-            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=np.nan,
-            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=np.nan,
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=float("nan"),
+            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=float("nan"),
         )
 
-        output = get_bufr_variables(
-            data,
-            station_configuration=station_config,
+        bufr_variables = get_bufr_variables(
+            data=data,
+            station_configuration=config,
         )
 
-        self.assertEqual(
-            expected_bufr_variables,
-            output,
+        pd.testing.assert_series_equal(
+            bufr_variables.as_series(),
+            expected_bufr_variables.as_series(),
         )
 
     def test_fails_on_missing_dimension_values(self):
         """
         Test that get_bufr_variables raises an AttributeError if the data is missing
         """
-        timestamp = datetime.datetime.now()
+        timestamp = pd.to_datetime("2024-03-01 00:00:00")
         data = pd.Series(
             data=dict(
                 rh_i=0.93,
@@ -208,701 +264,597 @@ def test_fails_on_missing_dimension_values(self):
             ),
             name=timestamp,
         )
-        station_config = StationConfiguration(
+        config = StationConfiguration(
             stid="A_STID",
             station_type="land",
             wmo_id="4201",
             export_bufr=True,
         )
 
-        with self.assertRaises(AttributeError) as context:
+        with self.assertRaises(AttributeError):
             get_bufr_variables(
                 data,
-                station_configuration=station_config,
+                station_configuration=config,
             )
 
-    @mock.patch("pypromice.postprocess.get_bufr.write_bufr_message")
-    def _test_bufr_variables(
-        self,
-        write_bufr_message_mock: mock.MagicMock,
-        stid: str,
-        station_configuration: StationConfiguration,
-        relativeHumidity: float,
-        airTemperature: float,
-        pressure: float,
-        windDirection: float,
-        windSpeed: float,
-        latitude: float,
-        longitude: float,
-        heightOfStationGroundAboveMeanSeaLevel: float,
-        heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH: float,
-        heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD: float,
-        heightOfBarometerAboveMeanSeaLevel: float,
-    ):
-        l3_src_filepath = DATA_DIR.joinpath("tx_l3_test1.csv")
-        l3_src = pd.read_csv(l3_src_filepath)
-        now_timestamp = datetime.datetime(2023, 12, 8)
-
-        timestamps = {}
-        run_get_bufr(
-            l3_data=l3_src,
-            now_timestamp=now_timestamp,
-            latest_timestamps=timestamps,
-            stid=stid,
-            store_positions=True,
-            time_limit="91d",
-            station_configuration_mapping={
-                station_configuration.stid: station_configuration
+    def test_nan_location_yields_none(self):
+        config = get_station_configuration(export_bufr=True)
+        data = pd.Series(
+            name=pd.to_datetime("2024-03-01 00:00:00"),
+            data={
+                "t_i": -12.5,
+                "p_i": 1003.1,
+                "rh_i": 0.5,
+                "wspd_i": 2.5,
+                "wdir_i": 182.1,
+                "z_boom_u_smooth": 1.6,
+                "gps_lat_fit": 78.52901,
+                "gps_lon_fit": float("nan"),
+                "gps_alt_fit": 1968.561,
             },
         )
 
-        write_bufr_message_mock.assert_called_once()
-        call = write_bufr_message_mock.call_args_list[0]
-        expected_time = datetime.datetime(year=2023, month=12, day=7, hour=23)
-        expected_bufr_variables = BUFRVariables(
-            wmo_id=station_configuration.wmo_id,
-            station_type=station_configuration.station_type,
-            timestamp=expected_time,
-            relativeHumidity=relativeHumidity,
-            airTemperature=airTemperature,
-            pressure=pressure,
-            windDirection=windDirection,
-            windSpeed=windSpeed,
-            latitude=latitude,
-            longitude=longitude,
-            heightOfStationGroundAboveMeanSeaLevel=heightOfStationGroundAboveMeanSeaLevel,
-            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH=heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH,
-            heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD=heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD,
-            heightOfBarometerAboveMeanSeaLevel=heightOfBarometerAboveMeanSeaLevel,
+        return_value = get_bufr_variables(
+            data,
+            station_configuration=config,
         )
-        pd.testing.assert_series_equal(
-            pd.Series(expected_bufr_variables),
-            pd.Series(call.kwargs["variables"]),
+
+        self.assertIsNone(return_value)
+
+    def test_nan_t_i_and_p_i_yields_none(self):
+        config = get_station_configuration(export_bufr=True)
+        data = pd.Series(
+            name=pd.to_datetime("2024-03-01 00:00:00"),
+            data={
+                "t_i": float("nan"),
+                "p_i": float("nan"),
+                "rh_i": 0.5,
+                "wspd_i": 2.5,
+                "wdir_i": 182.1,
+                "z_boom_u_smooth": 1.6,
+                "gps_lat_fit": 78.52901,
+                "gps_lon_fit": -56.8450358,
+                "gps_alt_fit": 1968.561,
+            },
         )
 
+        return_value = get_bufr_variables(
+            data,
+            station_configuration=config,
+        )
+
+        self.assertIsNone(return_value)
+
+    def test_missing_keys(self):
+        config = get_station_configuration(export_bufr=True)
+        for key in REQUIRED_KEYS:
+            data = pd.Series(
+                name=pd.to_datetime("2024-03-01 00:00:00"),
+                data={
+                    "t_i": -12.5,
+                    "p_i": 1003.1,
+                    "rh_i": 0.5,
+                    "wspd_i": 2.5,
+                    "wdir_i": 182.1,
+                    "z_boom_u_smooth": 1.6,
+                    "gps_lat_fit": 78.52901,
+                    "gps_lon_fit": -56.8450358,
+                    "gps_alt_fit": 1968.561,
+                },
+            )
+            del data[key]
+
+            with self.assertRaises(ValueError, msg=f"Key: {key}"):
+                get_bufr_variables(
+                    data=data,
+                    station_configuration=config,
+                )
+
+
+MOCK_BASE_STR = "pypromice.postprocess.get_bufr.{}"
+
 
+@mock.patch(MOCK_BASE_STR.format("get_station_positions"))
 @mock.patch(MOCK_BASE_STR.format("get_bufr_variables"))
 @mock.patch(MOCK_BASE_STR.format("write_bufr_message"))
 @mock.patch(MOCK_BASE_STR.format("get_latest_data"))
 @mock.patch(MOCK_BASE_STR.format("load_data"))
-class ProcessStationTestCase(unittest.TestCase):
-    def setUp(self) -> None:
-        self.file_path = mock.create_autospec(Path)
-        self.output_path = mock.create_autospec(Path)
-        self.now_timestamp = mock.create_autospec(datetime.datetime)
-        self.time_limit = mock.create_autospec(str)
-        self.stid = str(uuid.uuid4())
-        self.station_configuration = mock.MagicMock()
-        self.earliest_timestamp = mock.MagicMock()
-
-    def test_process_station_no_new_data(
+class TestGetBufrTestCase(TestCase):
+    def test_has_new_data(
         self,
         load_data_mock: mock.MagicMock,
         get_latest_data_mock: mock.MagicMock,
         write_bufr_message_mock: mock.MagicMock,
         get_bufr_variables_mock: mock.MagicMock,
+        get_station_positions_mock: mock.MagicMock,
     ):
-        self.earliest_timestamp = datetime.datetime(2023, 10, 3)
-        latest_data_datetime = datetime.datetime(2023, 10, 3)
-        get_latest_data_mock.return_value = pd.Series(
-            data={
-                "p_i": -227.1,
-                "t_i": -16.7,
-                "rh_i": 84.6,
-                "wspd_i": 14.83,
-                "wdir_i": 142.2,
-                "gps_lat": 66.482469,
-                "gps_lon": -46.294232,
-                "gps_alt": 2116.0,
-                "z_boom_u": 4.1901,
-                "gps_lat_fit": 66.482474,
-                "gps_lon_fit": -46.294261,
-                "gps_alt_fit": 2119.6,
-                "z_boom_u_smooth": 4.2,
-            },
-            name=latest_data_datetime,
-        )
-        expected_output = {
-            "timestamp": latest_data_datetime,
-            "lat": 66.482474,
-            "lon": -46.294261,
-            "alt": 2119.6,
-        }
-
-        output = process_station(
-            file_path=self.file_path,
-            output_path=self.output_path,
-            now_timestamp=self.now_timestamp,
-            latest_timestamp=self.earliest_timestamp,
-            time_limit=self.time_limit,
-            stid=self.stid,
-            station_configuration=self.station_configuration,
-        )
+        with tempfile.TemporaryDirectory() as temp_dir:
+            root_path = Path(temp_dir)
+            output_path = root_path / "bufr_out"
+            station_config = get_station_configuration(
+                export_bufr=True,
+                positions_update_timestamp_only=False,
+            )
+            input_file = root_path / "input" / f"{station_config.stid}_hour.csv"
+            positions_filepath = root_path / "positions.csv"
+            timestamps_pickle_filepath = root_path / "timestamps.pickle"
+            now_timestamp = pd.to_datetime("2024-03-01 00:12:00")
+            latest_timestamp = pd.to_datetime("2024-03-01 00:01:00")
+            get_latest_data_mock.return_value.name = latest_timestamp
+            get_station_positions_mock.return_value = dict(
+                timestamp=latest_timestamp,
+                lat=78.52901,
+                lon=-56.8450358,
+                alt=1968.561,
+            )
 
-        self.assertDictEqual(
-            output,
-            expected_output,
-        )
-        get_bufr_variables_mock.assert_not_called()
-        write_bufr_message_mock.assert_not_called()
+            get_bufr(
+                input_files=[input_file],
+                station_configuration_mapping={station_config.stid: station_config},
+                break_on_error=True,
+                bufr_out=output_path,
+                target_timestamp=now_timestamp,
+                positions_filepath=positions_filepath,
+                store_positions=True,
+                timestamps_pickle_filepath=timestamps_pickle_filepath,
+            )
 
-    def test_process_station_has_new_data(
+            load_data_mock.assert_called_once_with(input_file, now_timestamp)
+            get_latest_data_mock.assert_called_once_with(
+                load_data_mock.return_value,
+                lin_reg_time_limit="91d",
+                vars_to_skip=station_config.skipped_variables,
+            )
+            get_station_positions_mock.assert_called_once_with(
+                get_latest_data_mock.return_value
+            )
+            get_bufr_variables_mock.assert_called_once_with(
+                get_latest_data_mock.return_value,
+                station_config,
+            )
+            write_bufr_message_mock.assert_called_once_with(
+                get_bufr_variables_mock.return_value,
+                mock.ANY,
+            )
+            # Write bufr is invoked with an open file object. It is therefore necessary to check the path of the file
+            expected_output_file_path = output_path / f"{station_config.stid}.bufr"
+            output_file = write_bufr_message_mock.call_args[0][1]
+            self.assertIsInstance(output_file, BufferedWriter)
+            self.assertEqual(Path(output_file.name), expected_output_file_path)
+            written_positions = pd.read_csv(
+                positions_filepath, index_col=0, parse_dates=["timestamp"]
+            )
+            self.assertDictEqual(
+                get_station_positions_mock.return_value,
+                dict(written_positions.loc[station_config.stid]),
+            )
+            self.assertTrue(timestamps_pickle_filepath.exists())
+            timestamps = pd.read_pickle(timestamps_pickle_filepath)
+            self.assertDictEqual(
+                timestamps,
+                {station_config.stid: latest_timestamp},
+            )
+
+    def test_no_new_data(
         self,
         load_data_mock: mock.MagicMock,
         get_latest_data_mock: mock.MagicMock,
         write_bufr_message_mock: mock.MagicMock,
         get_bufr_variables_mock: mock.MagicMock,
+        get_station_positions_mock: mock.MagicMock,
     ):
-        self.earliest_timestamp = datetime.datetime(2023, 10, 2)
-        latest_data_datetime = datetime.datetime(2023, 10, 3)
-        get_latest_data_mock.return_value = pd.Series(
-            data={
-                "p_i": -227.1,
-                "t_i": -16.7,
-                "rh_i": 84.6,
-                "wspd_i": 14.83,
-                "wdir_i": 142.2,
-                "gps_lat": 66.482469,
-                "gps_lon": -46.294232,
-                "gps_alt": 2116.0,
-                "z_boom_u": 4.1901,
-                "gps_lat_fit": 66.482474,
-                "gps_lon_fit": -46.294261,
-                "gps_alt_fit": 2119.6,
-                "z_boom_u_smooth": 4.2,
-            },
-            name=latest_data_datetime,
-        )
-        expected_output = {
-            "timestamp": latest_data_datetime,
-            "lat": 66.482474,
-            "lon": -46.294261,
-            "alt": 2119.6,
-        }
-
-        output = process_station(
-            file_path=self.file_path,
-            output_path=self.output_path,
-            now_timestamp=self.now_timestamp,
-            latest_timestamp=self.earliest_timestamp,
-            time_limit=self.time_limit,
-            stid=self.stid,
-            station_configuration=self.station_configuration,
-        )
+        with tempfile.TemporaryDirectory() as temp_dir:
+            root_path = Path(temp_dir)
+            output_path = root_path / "bur_out"
+            station_config = get_station_configuration(
+                export_bufr=True,
+                positions_update_timestamp_only=False,
+            )
+            input_file = root_path / "input" / f"{station_config.stid}_hour.csv"
+            positions_filepath = root_path / "positions.csv"
+            now_timestamp = pd.to_datetime("2024-03-01 00:12:00")
+            # The latest data is two month old
+            latest_timestamp = pd.to_datetime("2024-01-01 00:12:00")
+            get_latest_data_mock.return_value.name = latest_timestamp
+            get_station_positions_mock.return_value = dict(
+                timestamp=latest_timestamp,
+                lat=78.52901,
+                lon=-56.8450358,
+                alt=1968.561,
+            )
 
-        self.assertDictEqual(
-            output,
-            expected_output,
-        )
-        get_bufr_variables_mock.assert_called_once_with(
-            data=get_latest_data_mock.return_value,
-            station_configuration=self.station_configuration,
-        )
-        write_bufr_message_mock.assert_called_once_with(
-            variables=get_bufr_variables_mock.return_value,
-            file=self.output_path.open().__enter__(),
-        )
+            get_bufr(
+                input_files=[input_file],
+                station_configuration_mapping={station_config.stid: station_config},
+                break_on_error=True,
+                bufr_out=output_path,
+                target_timestamp=now_timestamp,
+                positions_filepath=positions_filepath,
+                store_positions=True,
+                timestamps_pickle_filepath=None,
+                time_window_length=pd.to_timedelta("2d"),
+            )
+
+            get_latest_data_mock.assert_called_once_with(
+                load_data_mock.return_value,
+                lin_reg_time_limit="91d",
+                vars_to_skip=station_config.skipped_variables,
+            )
+            get_station_positions_mock.assert_called_once_with(
+                get_latest_data_mock.return_value
+            )
+            get_bufr_variables_mock.assert_not_called()
+            write_bufr_message_mock.assert_not_called()
+            written_positions = pd.read_csv(
+                positions_filepath, index_col=0, parse_dates=["timestamp"]
+            )
+            self.assertDictEqual(
+                get_station_positions_mock.return_value,
+                dict(written_positions.loc[station_config.stid]),
+            )
 
-    def test_min_data_wx_failed(
+    def test_position_seed(
         self,
         load_data_mock: mock.MagicMock,
         get_latest_data_mock: mock.MagicMock,
         write_bufr_message_mock: mock.MagicMock,
         get_bufr_variables_mock: mock.MagicMock,
+        get_station_positions_mock: mock.MagicMock,
     ):
-        self.earliest_timestamp = datetime.datetime(2023, 10, 2)
-        latest_data_datetime = datetime.datetime(2023, 10, 3)
-        get_latest_data_mock.return_value = pd.Series(
-            data={
-                "p_i": np.nan,
-                "t_i": np.nan,
-                "rh_i": 84.6,
-                "wspd_i": 14.83,
-                "wdir_i": 142.2,
-                "gps_lat": 66.482469,
-                "gps_lon": -46.294232,
-                "gps_alt": 2116.0,
-                "z_boom_u": 4.1901,
-                "gps_lat_fit": 66.482474,
-                "gps_lon_fit": -46.294261,
-                "gps_alt_fit": 2119.6,
-                "z_boom_u_smooth": 4.2,
-            },
-            name=latest_data_datetime,
-        )
-        expected_output = {
-            "timestamp": latest_data_datetime,
-            "lat": 66.482474,
-            "lon": -46.294261,
-            "alt": 2119.6,
-        }
-
-        output = process_station(
-            file_path=self.file_path,
-            output_path=self.output_path,
-            now_timestamp=self.now_timestamp,
-            latest_timestamp=self.earliest_timestamp,
-            time_limit=self.time_limit,
-            stid=self.stid,
-            station_configuration=self.station_configuration,
-        )
+        with tempfile.TemporaryDirectory() as temp_dir:
+            root_path = Path(temp_dir)
+            positions_filepath = root_path / "positions.csv"
+            positions_seed_path = root_path / "positions_seed.csv"
+            positions_seed = pd.DataFrame(
+                columns=["stid", "timestamp", "lat", "lon", "alt"],
+                data=[
+                    ["STATION_A", datetime.datetime(2021, 10, 2), 65.0, -40.0, 800],
+                    ["STATION_B", datetime.datetime(2023, 11, 12), 66.0, -50.0, 1100],
+                ],
+            ).set_index("stid")
+            positions_seed.to_csv(positions_seed_path, index=True)
+
+            get_bufr(
+                input_files=[],
+                station_configuration_mapping=dict(),
+                break_on_error=True,
+                bufr_out=mock.create_autospec(Path),
+                target_timestamp=mock.create_autospec(datetime.timedelta),
+                positions_filepath=positions_filepath,
+                positions_seed_path=positions_seed_path,
+                store_positions=True,
+                timestamps_pickle_filepath=None,
+                time_window_length=pd.to_timedelta("2d"),
+            )
 
-        # The BUFR export step shall be skipped
-        get_bufr_variables_mock.assert_not_called()
-        write_bufr_message_mock.assert_not_called()
-        self.assertDictEqual(
-            output,
-            expected_output,
-        )
+            written_positions = pd.read_csv(
+                positions_filepath, index_col="stid", parse_dates=["timestamp"]
+            )
+            pd.testing.assert_frame_equal(
+                positions_seed,
+                written_positions,
+            )
 
-    def test_min_data_pos_failed(
+    def test_no_input_paths(
         self,
         load_data_mock: mock.MagicMock,
         get_latest_data_mock: mock.MagicMock,
         write_bufr_message_mock: mock.MagicMock,
         get_bufr_variables_mock: mock.MagicMock,
+        get_station_positions_mock: mock.MagicMock,
     ):
-        self.earliest_timestamp = datetime.datetime(2023, 10, 2)
-        latest_data_datetime = datetime.datetime(2023, 10, 3)
-        get_latest_data_mock.return_value = pd.Series(
-            data={
-                "p_i": -227.1,
-                "t_i": -16.7,
-                "rh_i": 84.6,
-                "wspd_i": 14.83,
-                "wdir_i": 142.2,
-                "gps_lat": 66.482469,
-                "gps_lon": -46.294232,
-                "gps_alt": 2116.0,
-                "z_boom_u": 4.1901,
-                "gps_lat_fit": 66.482474,
-                "gps_lon_fit": -46.294261,
-                "gps_alt_fit": np.nan,
-                "z_boom_u_smooth": 4.2,
-            },
-            name=latest_data_datetime,
-        )
-        expected_output = {
-            "timestamp": latest_data_datetime,
-            "lat": None,
-            "lon": None,
-            "alt": None,
-        }
-
-        output = process_station(
-            file_path=self.file_path,
-            output_path=self.output_path,
-            now_timestamp=self.now_timestamp,
-            latest_timestamp=self.earliest_timestamp,
-            time_limit=self.time_limit,
-            stid=self.stid,
-            station_configuration=self.station_configuration,
-        )
+        with tempfile.TemporaryDirectory() as temp_dir:
+            root_path = Path(temp_dir)
+            positions_filepath = root_path / "positions.csv"
+            get_bufr(
+                input_files=[],
+                station_configuration_mapping=dict(),
+                break_on_error=True,
+                bufr_out=mock.create_autospec(Path),
+                target_timestamp=mock.create_autospec(datetime.timedelta),
+                positions_filepath=positions_filepath,
+                store_positions=True,
+                timestamps_pickle_filepath=None,
+                time_window_length=pd.to_timedelta("2d"),
+            )
 
-        # The BUFR export step shall be skipped
-        get_bufr_variables_mock.assert_not_called()
-        write_bufr_message_mock.assert_not_called()
-        self.assertDictEqual(
-            output,
-            expected_output,
-        )
+            load_data_mock.assert_not_called()
+            get_latest_data_mock.assert_not_called()
+            get_station_positions_mock.assert_not_called()
+            get_bufr_variables_mock.assert_not_called()
+            write_bufr_message_mock.assert_not_called()
+            # The positions file should be created, but empty
+            self.assertTrue(positions_filepath.exists())
+            written_positions = pd.read_csv(
+                positions_filepath, index_col=0, parse_dates=["timestamp"]
+            )
+            self.assertEqual(0, len(written_positions))
 
-    def test_no_valid_data(
+    def test_get_latest_data_fails(
         self,
         load_data_mock: mock.MagicMock,
         get_latest_data_mock: mock.MagicMock,
         write_bufr_message_mock: mock.MagicMock,
         get_bufr_variables_mock: mock.MagicMock,
+        get_station_positions_mock: mock.MagicMock,
     ):
-        get_latest_data_mock.return_value = None
-
-        output = process_station(
-            file_path=self.file_path,
-            output_path=self.output_path,
-            now_timestamp=self.now_timestamp,
-            latest_timestamp=self.earliest_timestamp,
-            time_limit=self.time_limit,
-            stid=self.stid,
-            station_configuration=self.station_configuration,
-        )
+        """
+        get_latest_data returns None when there are no valid data available for the staiton
+        """
+        with tempfile.TemporaryDirectory() as temp_dir:
+            root_path = Path(temp_dir)
+            positions_filepath = root_path / "positions.csv"
+            station_config = get_station_configuration(
+                export_bufr=True,
+                positions_update_timestamp_only=False,
+            )
+            input_file = root_path / "input" / f"{station_config.stid}_hour.csv"
+            target_timestamp = mock.create_autospec(datetime.timedelta)
+            get_latest_data_mock.return_value = None
+            get_bufr(
+                input_files=[input_file],
+                station_configuration_mapping={station_config.stid: station_config},
+                break_on_error=True,
+                bufr_out=mock.create_autospec(Path),
+                target_timestamp=target_timestamp,
+                positions_filepath=positions_filepath,
+                store_positions=True,
+                timestamps_pickle_filepath=None,
+                time_window_length=pd.to_timedelta("2d"),
+            )
 
-        load_data_mock.assert_called_once()
-        get_latest_data_mock.assert_called_once()
-        write_bufr_message_mock.assert_not_called()
-        get_bufr_variables_mock.assert_not_called()
-        self.assertIsNone(output)
+            load_data_mock.assert_called_once_with(input_file, target_timestamp)
+            get_latest_data_mock.assert_called_once_with(
+                load_data_mock.return_value,
+                lin_reg_time_limit="91d",
+                vars_to_skip=station_config.skipped_variables,
+            )
+            get_station_positions_mock.assert_not_called()
+            get_bufr_variables_mock.assert_not_called()
+            write_bufr_message_mock.assert_not_called()
+            self.assertTrue(positions_filepath.exists())
 
-    def test_skipped_variables(
+    def test_already_existing_in_latest_timestamps(
         self,
         load_data_mock: mock.MagicMock,
         get_latest_data_mock: mock.MagicMock,
         write_bufr_message_mock: mock.MagicMock,
         get_bufr_variables_mock: mock.MagicMock,
+        get_station_positions_mock: mock.MagicMock,
     ):
-        self.earliest_timestamp = datetime.datetime(2023, 10, 2)
-        latest_data_datetime = datetime.datetime(2023, 10, 3)
-        original_p_i = 42.0
-        get_latest_data_mock.return_value = pd.Series(
-            data={
-                "p_i": original_p_i,
-                "t_i": -16.7,
-                "rh_i": 84.6,
-                "wspd_i": 14.83,
-                "wdir_i": 142.2,
-                "gps_lat": 66.482469,
-                "gps_lon": -46.294232,
-                "gps_alt": 2116.0,
-                "z_boom_u": 4.1901,
-                "gps_lat_fit": 66.482474,
-                "gps_lon_fit": -46.294261,
-                "gps_alt_fit": 2119.6,
-                "z_boom_u_smooth": 4.2,
-            },
-            name=latest_data_datetime,
-        )
-        self.station_configuration = StationConfiguration(
-            stid="A_STID",
-            station_site="A_STATION_SITE",
-            station_type="mobile",
-            wmo_id="04242",
-            skipped_variables=["p_i"],
-            height_of_gps_from_station_ground=1.4,
-            barometer_from_gps=0.1,
-            anemometer_from_sonic_ranger=0.1,
-            temperature_from_sonic_ranger=0.2,
-            export_bufr=True,
-        )
-        expected_output = {
-            "timestamp": latest_data_datetime,
-            "lat": 66.482474,
-            "lon": -46.294261,
-            "alt": 2119.6,
-        }
-        self.assertEqual(
-            original_p_i,
-            get_latest_data_mock.return_value["p_i"],
-        )
-
-        output = process_station(
-            file_path=self.file_path,
-            output_path=self.output_path,
-            now_timestamp=self.now_timestamp,
-            latest_timestamp=self.earliest_timestamp,
-            time_limit=self.time_limit,
-            stid=self.stid,
-            station_configuration=self.station_configuration,
-        )
-
-        self.assertTrue(
-            np.isnan(get_latest_data_mock.return_value["p_i"]),
-            "p_i shall be set to nan since it is in skipped_variables",
-        )
-        self.assertDictEqual(
-            output,
-            expected_output,
-        )
-        get_bufr_variables_mock.assert_called_once_with(
-            data=get_latest_data_mock.return_value,
-            station_configuration=self.station_configuration,
-        )
-        write_bufr_message_mock.assert_called_once_with(
-            variables=get_bufr_variables_mock.return_value,
-            file=self.output_path.open().__enter__(),
-        )
-
-
-class GetBufrTestCase(unittest.TestCase):
-    def setUp(self) -> None:
-        self.temporary_root = TemporaryDirectory()
-        self.root_path = Path(self.temporary_root.name)
-        self.l3_data_root = self.root_path / "l3"
-        self.l3_data_root.mkdir()
-        self.bufr_root = self.root_path / "bufr"
-        self.bufr_root.mkdir()
+        with tempfile.TemporaryDirectory() as temp_dir:
+            root_path = Path(temp_dir)
+            output_path = root_path / "bufr_out"
+            positions_filepath = root_path / "positions.csv"
+            station_config = get_station_configuration(
+                export_bufr=True,
+                positions_update_timestamp_only=False,
+            )
+            now_timestamp = pd.to_datetime("2024-03-01 00:12:00")
+            latest_timestamp = pd.to_datetime("2024-03-01 00:01:00")
+            input_file = root_path / "input" / f"{station_config.stid}_hour.csv"
+            timestamps_pickle_filepath = root_path / "timestamps.pickle"
+            latest_timestamps = {station_config.stid: latest_timestamp}
+            with timestamps_pickle_filepath.open("wb") as f:
+                pd.to_pickle(latest_timestamps, f)
+            get_latest_data_mock.return_value.name = latest_timestamp
+            get_station_positions_mock.return_value = dict(
+                timestamp=latest_timestamp,
+                lat=78.52901,
+                lon=-56.8450358,
+                alt=1968.561,
+            )
 
-        self.positions_file_path = self.root_path / "positions.csv"
-        self.positions_seed_path = self.root_path / "positions_seed.csv"
-        self.timestamps_pickle_filepath = self.root_path / "latest_timestamps.pickle"
-        self.station_configuration_path = self.root_path / "station_configuration.toml"
+            get_bufr(
+                input_files=[input_file],
+                station_configuration_mapping={station_config.stid: station_config},
+                break_on_error=True,
+                bufr_out=output_path,
+                target_timestamp=now_timestamp,
+                positions_filepath=positions_filepath,
+                store_positions=True,
+                timestamps_pickle_filepath=timestamps_pickle_filepath,
+            )
 
-    def tearDown(self) -> None:
-        self.temporary_root.cleanup()
+            get_station_positions_mock.assert_called_once()
+            # The BUFR export should be skipped since the latest timestamp is already in the timestamps
+            get_bufr_variables_mock.assert_not_called()
+            write_bufr_message_mock.assert_not_called()
+            self.assertTrue(positions_filepath.exists())
 
-    @mock.patch(MOCK_BASE_STR.format("process_station"))
-    def test_process_station_raises_exception(
-        self, process_station_mock: mock.MagicMock
+    def test_no_station_configuration(
+        self,
+        load_data_mock: mock.MagicMock,
+        get_latest_data_mock: mock.MagicMock,
+        write_bufr_message_mock: mock.MagicMock,
+        get_bufr_variables_mock: mock.MagicMock,
+        get_station_positions_mock: mock.MagicMock,
     ):
-        """
-        get_bufr should skip stations where process_station raises exception
-        """
-        timestamps_pickle_filepath = self.root_path / "timestamps.pickle"
-        stid = "THE_STID_FOR_A_STATION"
-        input_file_path = self.root_path / f"{stid}_hourly.csv"
-        process_station_mock.side_effect = Exception("Test exception")
-        now_timestamp = datetime.datetime.now()
-        self.assertFalse(self.positions_file_path.exists())
-        self.assertFalse(timestamps_pickle_filepath.exists())
-
-        get_bufr(
-            store_positions=True,
-            bufr_out=self.bufr_root,
-            input_files=[input_file_path],
-            positions_filepath=self.positions_file_path,
-            station_configuration_mapping=dict(),
-            timestamps_pickle_filepath=timestamps_pickle_filepath,
-            now_timestamp=now_timestamp,
-        )
-
-        self.assertTrue(self.positions_file_path.exists())
-        self.assertTrue(timestamps_pickle_filepath.exists())
-
-    @mock.patch(MOCK_BASE_STR.format("process_station"))
-    def test_multiple_stations(self, process_station_mock: mock.MagicMock):
-        station_config01 = StationConfiguration(stid="station_01", export_bufr=True)
-        station_config02 = StationConfiguration(stid="station_02", export_bufr=True)
-        station_config03 = StationConfiguration(stid="station_03", export_bufr=False)
-        process_station_return_values = {
-            station_config01.stid: dict(
-                timestamp=datetime.datetime(2023, 2, 1, 10), lat=1, lon=3, alt=31
-            ),
-            station_config02.stid: dict(
-                timestamp=datetime.datetime(2023, 2, 1, 10), lat=2, lon=3, alt=31
-            ),
-            station_config03.stid: dict(
-                timestamp=datetime.datetime(2023, 2, 1, 10), lat=3, lon=3, alt=31
-            ),
-        }
-        process_station_mock.side_effect = (
-            lambda **kwargs: process_station_return_values[
-                kwargs["station_configuration"].stid
-            ]
-        )
-        input_files = [
-            self.root_path / f"{station_config01.stid}_hourly.csv",
-            self.root_path / f"{station_config02.stid}_hourly.csv",
-            self.root_path / f"{station_config03.stid}_hourly.csv",
-        ]
-        station_config_mapping = {
-            station_config01.stid: station_config01,
-            station_config02.stid: station_config02,
-            station_config03.stid: station_config03,
-        }
-
-        get_bufr(
-            store_positions=True,
-            bufr_out=self.bufr_root,
-            input_files=input_files,
-            positions_filepath=self.positions_file_path,
-            station_configuration_mapping=station_config_mapping,
-            timestamps_pickle_filepath=self.timestamps_pickle_filepath,
-            positions_seed_path=None,
-            now_timestamp=datetime.datetime.now(),
-        )
-
-        self.assertEqual(3, process_station_mock.call_count)
-        read_positions = pd.read_csv(
-            self.positions_file_path, index_col=0, parse_dates=["timestamp"]
-        ).to_dict(orient="index")
-        self.assertDictEqual(
-            read_positions,
-            process_station_return_values,
-        )
-
-    def test_no_stations(self):
-        now_timestamp = datetime.datetime.now()
-        self.assertFalse(self.positions_file_path.exists())
-        self.assertFalse(self.timestamps_pickle_filepath.exists())
-
-        get_bufr(
-            store_positions=True,
-            bufr_out=self.bufr_root,
-            input_files=(),
-            positions_filepath=self.positions_file_path,
-            station_configuration_mapping=dict(),
-            timestamps_pickle_filepath=self.timestamps_pickle_filepath,
-            now_timestamp=now_timestamp,
-        )
-
-        self.assertTrue(self.positions_file_path.exists())
-        self.assertTrue(self.timestamps_pickle_filepath.exists())
-        positions = pd.read_csv(self.positions_file_path)
-        pd.testing.assert_frame_equal(
-            positions,
-            pd.DataFrame(columns=["stid", "timestamp", "lat", "lon", "alt"], data=[]),
-        )
-        with self.timestamps_pickle_filepath.open("br") as fp:
-            timestamps = pickle.load(fp)
-        self.assertDictEqual(dict(), timestamps)
-
-    @mock.patch(MOCK_BASE_STR.format("process_station"))
-    def test_single_station(self, process_station_mock: mock.MagicMock):
-        now_timestamp = datetime.datetime.now()
-        stid = "THE_STID_FOR_A_STATION"
-        input_file_path = self.root_path / f"{stid}_hourly.csv"
-        station_configuration = StationConfiguration(stid=stid, export_bufr=True)
-        station_configuration_mapping = {
-            stid: station_configuration,
-        }
-        expected_output_path = self.bufr_root / f"{stid}.bufr"
-        expected_latest_timestamp = now_timestamp - datetime.timedelta(days=2)
-        expected_station_configuration = StationConfiguration(
-            stid=stid, export_bufr=True
-        )
-
-        get_bufr(
-            store_positions=True,
-            bufr_out=self.bufr_root,
-            input_files=[input_file_path],
-            positions_filepath=self.positions_file_path,
-            station_configuration_mapping=station_configuration_mapping,
-            timestamps_pickle_filepath=self.timestamps_pickle_filepath,
-            positions_seed_path=None,
-            now_timestamp=now_timestamp,
-        )
+        with tempfile.TemporaryDirectory() as temp_dir:
+            root_path = Path(temp_dir)
+            output_path = root_path / "bufr_out"
+            positions_filepath = root_path / "positions.csv"
+            station_id = "A_STID"
+            now_timestamp = pd.to_datetime("2024-03-01 00:12:00")
+            latest_timestamp = pd.to_datetime("2024-03-01 00:01:00")
+            input_file = root_path / "input" / f"{station_id}_hour.csv"
+            get_latest_data_mock.return_value.name = latest_timestamp
+            get_station_positions_mock.return_value = dict(
+                timestamp=latest_timestamp,
+                lat=78.52901,
+                lon=-56.8450358,
+                alt=1968.561,
+            )
 
-        process_station_mock.assert_called_once_with(
-            file_path=input_file_path,
-            output_path=expected_output_path,
-            now_timestamp=now_timestamp,
-            latest_timestamp=expected_latest_timestamp,
-            time_limit="91d",
-            stid=stid,
-            station_configuration=expected_station_configuration,
-        )
+            get_bufr(
+                input_files=[input_file],
+                station_configuration_mapping=dict(),
+                break_on_error=True,
+                bufr_out=output_path,
+                target_timestamp=now_timestamp,
+                positions_filepath=positions_filepath,
+                store_positions=True,
+                timestamps_pickle_filepath=None,
+            )
 
-    @mock.patch(MOCK_BASE_STR.format("process_station"))
-    def test_station_without_configuration(self, process_station_mock: mock.MagicMock):
-        now_timestamp = datetime.datetime.now()
-        stid = "STATION_ID"
-        input_file_path = self.root_path / f"{stid}_hourly.csv"
-        expected_station_configuration = StationConfiguration(stid=stid)
-        expected_output_path = self.bufr_root / f"{stid}.bufr"
-
-        get_bufr(
-            store_positions=True,
-            bufr_out=self.bufr_root,
-            input_files=[input_file_path],
-            positions_filepath=self.positions_file_path,
-            station_configuration_mapping=dict(),
-            timestamps_pickle_filepath=self.timestamps_pickle_filepath,
-            positions_seed_path=None,
-            now_timestamp=now_timestamp,
-        )
+            get_station_positions_mock.assert_called_once()
+            get_bufr_variables_mock.assert_not_called()
+            write_bufr_message_mock.assert_not_called()
+            self.assertTrue(positions_filepath.exists())
 
-        process_station_mock.assert_called_once_with(
-            file_path=input_file_path,
-            output_path=expected_output_path,
-            now_timestamp=now_timestamp,
-            latest_timestamp=now_timestamp - datetime.timedelta(days=2),
-            time_limit="91d",
-            stid=stid,
-            station_configuration=expected_station_configuration,
-        )
+    def test_update_timestamps_only(
+        self,
+        load_data_mock: mock.MagicMock,
+        get_latest_data_mock: mock.MagicMock,
+        write_bufr_message_mock: mock.MagicMock,
+        get_bufr_variables_mock: mock.MagicMock,
+        get_station_positions_mock: mock.MagicMock,
+    ):
+        pass
 
-    @mock.patch(MOCK_BASE_STR.format("process_station"))
-    def test_latest_timestamp(self, process_station_mock: mock.MagicMock):
-        stid = "STATION_ID"
-        now_timestamp = datetime.datetime(2022, 1, 5, 10, 21)
-        latest_timestamp = datetime.datetime(2022, 1, 5, 10, 0)
-        # Save latest timestamp to pickle file
-        with self.timestamps_pickle_filepath.open("wb") as fp:
-            pickle.dump({stid: latest_timestamp}, fp)
-        input_file_path = self.root_path / f"{stid}_hourly.csv"
-
-        get_bufr(
-            store_positions=True,
-            bufr_out=self.bufr_root,
-            input_files=[input_file_path],
-            positions_filepath=self.positions_file_path,
-            station_configuration_mapping=dict(),
-            timestamps_pickle_filepath=self.timestamps_pickle_filepath,
-            positions_seed_path=None,
-            now_timestamp=now_timestamp,
-        )
+    def test_cleans_up_when_on_exception(
+        self,
+        load_data_mock: mock.MagicMock,
+        get_latest_data_mock: mock.MagicMock,
+        write_bufr_message_mock: mock.MagicMock,
+        get_bufr_variables_mock: mock.MagicMock,
+        get_station_positions_mock: mock.MagicMock,
+    ):
+        with tempfile.TemporaryDirectory() as temp_dir:
+            root_path = Path(temp_dir)
+            positions_filepath = root_path / "positions.csv"
+            station_config = get_station_configuration(
+                export_bufr=True,
+                positions_update_timestamp_only=False,
+            )
+            input_file = root_path / "input" / f"{station_config.stid}_hour.csv"
+            target_timestamp = mock.create_autospec(datetime.timedelta)
+            get_latest_data_mock.side_effect = Exception("Test exception")
+
+            get_bufr(
+                input_files=[input_file],
+                station_configuration_mapping={station_config.stid: station_config},
+                break_on_error=False,
+                bufr_out=mock.create_autospec(Path),
+                target_timestamp=target_timestamp,
+                positions_filepath=positions_filepath,
+                store_positions=True,
+                timestamps_pickle_filepath=None,
+                time_window_length=pd.to_timedelta("2d"),
+            )
 
-        process_station_mock.assert_called_once_with(
-            file_path=input_file_path,
-            output_path=self.bufr_root / f"{stid}.bufr",
-            now_timestamp=now_timestamp,
-            latest_timestamp=latest_timestamp,
-            time_limit="91d",
-            stid=stid,
-            station_configuration=StationConfiguration(stid=stid),
-        )
+            load_data_mock.assert_called_once_with(input_file, target_timestamp)
+            get_latest_data_mock.assert_called_once_with(
+                load_data_mock.return_value,
+                lin_reg_time_limit="91d",
+                vars_to_skip=station_config.skipped_variables,
+            )
+            get_station_positions_mock.assert_not_called()
+            get_bufr_variables_mock.assert_not_called()
+            write_bufr_message_mock.assert_not_called()
+            self.assertTrue(positions_filepath.exists())
 
-    @mock.patch(MOCK_BASE_STR.format("process_station"))
-    def test_update_timestamp_only(self, process_station_mock: mock.MagicMock):
-        stid = "STATION_ID"
-        # Prepare station config
-        station_config = StationConfiguration(
-            stid=stid, positions_update_timestamp_only=True
-        )
-        config_mapping = {station_config.stid: station_config}
-        input_file_path = self.root_path / f"{stid}_hourly.csv"
-        seed_timestamp = datetime.datetime(2021, 10, 2, 10, 0)
-        now_timestamp = datetime.datetime(2023, 3, 3, 5, 0)
-        positions_seed = pd.DataFrame(
-            columns=["stid", "timestamp", "lat", "lon", "alt"],
-            data=[
-                [stid, seed_timestamp, 65.0, -40.0, 800],
-            ],
-        )
-        positions_seed.to_csv(self.positions_seed_path, index=False)
-        process_station_mock.return_value = {
-            "timestamp": now_timestamp,
-            # All position values should be ignored
-            "lat": None,
-            "lot": np.nan,
-            "alt": 2414.0,
-        }
-        # Only timestamp should be updated
-        expected_positions = positions_seed.copy()
-        expected_positions["timestamp"] = now_timestamp
-
-        get_bufr(
-            store_positions=True,
-            bufr_out=self.bufr_root,
-            input_files=[input_file_path],
-            positions_filepath=self.positions_file_path,
-            station_configuration_mapping=config_mapping,
-            timestamps_pickle_filepath=self.timestamps_pickle_filepath,
-            positions_seed_path=self.positions_seed_path,
-            now_timestamp=now_timestamp,
-        )
+    def test_multiple_stations(
+        self,
+        load_data_mock: mock.MagicMock,
+        get_latest_data_mock: mock.MagicMock,
+        write_bufr_message_mock: mock.MagicMock,
+        get_bufr_variables_mock: mock.MagicMock,
+        get_station_positions_mock: mock.MagicMock,
+    ):
+        with tempfile.TemporaryDirectory() as temp_dir:
+            root_path = Path(temp_dir)
+            positions_filepath = root_path / "positions.csv"
+            output_path = root_path / "bufr_out"
+            station_config1 = StationConfiguration(stid="station_01", export_bufr=True)
+            station_config2 = StationConfiguration(stid="station_02", export_bufr=True)
+            station_config3 = StationConfiguration(stid="station_03", export_bufr=False)
+            station_configs = [station_config1, station_config2, station_config3]
+            station_configuration_mapping = {
+                config.stid: config for config in station_configs
+            }
+            input_files = [
+                root_path / "input" / f"{config.stid}_hour.csv"
+                for config in station_configs
+            ]
+            target_timestamp = pd.to_datetime("2024-03-01 00:12:00")
+            latest_timestamp = pd.to_datetime("2024-03-01 00:01:00")
+            get_latest_data_mock.return_value.name = latest_timestamp
+            station_positions = [
+                dict(
+                    timestamp=latest_timestamp,
+                    lat=random.random() * 180 - 90,
+                    lon=random.random() * 360 - 180,
+                    alt=2000 * random.random(),
+                )
+                for _ in range(3)
+            ]
+            get_station_positions_mock.side_effect = station_positions
+
+            get_bufr(
+                input_files=input_files,
+                station_configuration_mapping=station_configuration_mapping,
+                break_on_error=True,
+                bufr_out=output_path,
+                target_timestamp=target_timestamp,
+                positions_filepath=positions_filepath,
+                store_positions=True,
+                timestamps_pickle_filepath=None,
+                time_window_length=pd.to_timedelta("2d"),
+            )
 
-        positions = pd.read_csv(self.positions_file_path, parse_dates=["timestamp"])
-        self.assertEqual(1, len(positions))
-        pd.testing.assert_series_equal(
-            positions.iloc[0],
-            expected_positions.iloc[0],
-        )
+            self.assertTrue(positions_filepath.exists())
+            self.assertEqual(3, get_station_positions_mock.call_count)
+            self.assertEqual(2, write_bufr_message_mock.call_count)
+            written_positions = pd.read_csv(
+                positions_filepath, index_col=0, parse_dates=["timestamp"]
+            )
+            self.assertSetEqual(
+                set(written_positions.index),
+                {config.stid for config in station_configs},
+            )
 
-    def test_position_seed(self):
-        """
-        There are no data files available. get_bufr should use the position_seed for output positions.
-        """
-        positions_seed = pd.DataFrame(
-            columns=["stid", "timestamp", "lat", "lon", "alt"],
-            data=[
-                ["STATION_A", datetime.datetime(2021, 10, 2), 65.0, -40.0, 800],
-                ["STATION_B", datetime.datetime(2023, 11, 12), 66.0, -50.0, 1100],
-            ],
-        )
-        positions_seed.to_csv(self.positions_seed_path, index=False)
-
-        get_bufr(
-            store_positions=True,
-            bufr_out=self.bufr_root,
-            input_files=(),
-            positions_filepath=self.positions_file_path,
-            station_configuration_mapping=dict(),
-            timestamps_pickle_filepath=self.timestamps_pickle_filepath,
-            positions_seed_path=self.positions_seed_path,
-            now_timestamp=datetime.datetime.now(),
-        )
+    def test_station_without_configuration(
+        self,
+        load_data_mock: mock.MagicMock,
+        get_latest_data_mock: mock.MagicMock,
+        write_bufr_message_mock: mock.MagicMock,
+        get_bufr_variables_mock: mock.MagicMock,
+        get_station_positions_mock: mock.MagicMock,
+    ):
+        with tempfile.TemporaryDirectory() as temp_dir:
+            root_path = Path(temp_dir)
+            positions_filepath = root_path / "positions.csv"
+            output_path = root_path / "bufr_out"
+            target_timestamp = datetime.datetime.now()
+            stid = "STATION_ID"
+            input_file_path = root_path / f"{stid}_hourly.csv"
+            get_station_positions_mock.return_value = dict(
+                timestamp=target_timestamp,
+                lat=78.52901,
+                lon=-56.8450358,
+                alt=1968.561,
+            )
 
-        for p in self.root_path.glob("*"):
-            print(p)
+            get_bufr(
+                input_files=[input_file_path],
+                station_configuration_mapping={},
+                break_on_error=True,
+                bufr_out=output_path,
+                target_timestamp=target_timestamp,
+                positions_filepath=positions_filepath,
+                store_positions=True,
+                timestamps_pickle_filepath=None,
+                time_window_length=pd.to_timedelta("2d"),
+            )
 
-        positions = pd.read_csv(self.positions_file_path, parse_dates=["timestamp"])
-        pd.testing.assert_frame_equal(positions, positions_seed)
+            get_latest_data_mock.assert_called_once()
+            get_station_positions_mock.assert_called_once()
+            get_bufr_variables_mock.assert_not_called()
+            written_positions = pd.read_csv(
+                positions_filepath, index_col=0, parse_dates=["timestamp"]
+            )
+            self.assertDictEqual(
+                get_station_positions_mock.return_value,
+                dict(written_positions.loc[stid]),
+            )
diff --git a/tests/unit/bufr_export/test_get_bufr_integration.py b/tests/unit/bufr_export/test_get_bufr_integration.py
index 1a21b3ee..f03a60a5 100644
--- a/tests/unit/bufr_export/test_get_bufr_integration.py
+++ b/tests/unit/bufr_export/test_get_bufr_integration.py
@@ -5,7 +5,6 @@
 import datetime
 import logging
 import pickle
-import shutil
 import sys
 from pathlib import Path
 from tempfile import TemporaryDirectory
@@ -19,9 +18,7 @@
 from pypromice.postprocess.bufr_utilities import read_bufr_message, BUFRVariables
 from pypromice.station_configuration import (
     StationConfiguration,
-    write_station_configuration_mapping,
 )
-from tests.utilities import get_station_configuration
 
 logging.basicConfig(
     stream=sys.stdout,
@@ -124,15 +121,15 @@ def test_get_bufr_has_new_data(self):
         stid = "DY2"
         # Newest measurement in DY2_hour: 2023-12-07 23:00:00
         latest_timestamps = {"DY2": datetime.datetime(2023, 12, 1)}
-        now_timestamp = datetime.datetime(2023, 12, 8)
+        target_timestamp = datetime.datetime(2023, 12, 8)
         mapping = self.get_station_configuration_mapping(stid, wmo_id="04464")
         bufr_data = run_get_bufr(
             l3_data=l3_src,
-            now_timestamp=now_timestamp,
+            target_timestamp=target_timestamp,
             latest_timestamps=latest_timestamps,
             stid=stid,
             store_positions=True,
-            time_limit="91d",
+            linear_regression_time_limit="91d",
             station_configuration_mapping=mapping,
         )
         expected_bufr_variables = BUFRVariables(
@@ -163,15 +160,15 @@ def test_get_bufr_has_new_data_dont_store_position(self):
         stid = "DY2"
         # Newest measurement in DY2_hour: 2023-12-07 23:00:00
         latest_timestamps = {"DY2": datetime.datetime(2023, 12, 1)}
-        now_timestamp = datetime.datetime(2023, 12, 8)
+        target_timestamp = datetime.datetime(2023, 12, 8)
         mapping = self.get_station_configuration_mapping(stid, wmo_id="04464")
         bufr_data = run_get_bufr(
             l3_data=l3_src,
-            now_timestamp=now_timestamp,
+            target_timestamp=target_timestamp,
             latest_timestamps=latest_timestamps,
             stid=stid,
             store_positions=False,
-            time_limit="91d",
+            linear_regression_time_limit="91d",
             station_configuration_mapping=mapping,
         )
         expected_bufr_variables = BUFRVariables(
@@ -202,17 +199,17 @@ def test_get_bufr_stid_to_skip(self):
         stid = "DY2"
         # Newest measurement in DY2_hour: 2023-12-07 23:00:00
         latest_timestamps = {"DY2": datetime.datetime(2023, 12, 1)}
-        now_timestamp = datetime.datetime(2023, 12, 6)
+        target_timestamp = datetime.datetime(2023, 12, 6)
         mapping = self.get_station_configuration_mapping(
             stid, wmo_id="04464", export_bufr=False
         )
         bufr_data = run_get_bufr(
             l3_data=l3_src,
-            now_timestamp=now_timestamp,
+            target_timestamp=target_timestamp,
             latest_timestamps=latest_timestamps,
             stid=stid,
             store_positions=True,
-            time_limit="91d",
+            linear_regression_time_limit="91d",
             station_configuration_mapping=mapping,
         )
         self.assertIsNone(bufr_data)
@@ -223,16 +220,16 @@ def test_get_bufr_has_no_data_newer_than_latests_timestamps(self):
         stid = "DY2"
         # Newest measurement in DY2_hour: 2023-12-07 23:00:00
         latest_timestamps = {stid: datetime.datetime(2023, 12, 7, 23, 00)}
-        now_timestamp = datetime.datetime(2023, 12, 8)
+        target_timestamp = datetime.datetime(2023, 12, 8)
 
         mapping = self.get_station_configuration_mapping(stid, wmo_id="04464")
         bufr_data = run_get_bufr(
             l3_data=l3_src,
-            now_timestamp=now_timestamp,
+            target_timestamp=target_timestamp,
             latest_timestamps=latest_timestamps,
             stid=stid,
             store_positions=True,
-            time_limit="91d",
+            linear_regression_time_limit="91d",
             station_configuration_mapping=mapping,
         )
         self.assertIsNone(bufr_data)
@@ -242,15 +239,15 @@ def test_get_bufr_includes_datasets_not_in_latests_timestamps(self):
         l3_src = pd.read_csv(l3_src_filepath)
         stid = "DY2"
         latest_timestamps = {}
-        now_timestamp = datetime.datetime(2023, 12, 8)
+        target_timestamp = datetime.datetime(2023, 12, 8)
         mapping = self.get_station_configuration_mapping(stid, wmo_id="04464")
         bufr_data = run_get_bufr(
             l3_data=l3_src,
-            now_timestamp=now_timestamp,
+            target_timestamp=target_timestamp,
             latest_timestamps=latest_timestamps,
             stid=stid,
             store_positions=True,
-            time_limit="91d",
+            linear_regression_time_limit="91d",
             station_configuration_mapping=mapping,
         )
 
@@ -282,16 +279,16 @@ def test_get_bufr_has_old_data_compared_to_now(self):
         l3_src_filepath = DATA_DIR.joinpath("tx_l3_test1.csv")
         l3_src = pd.read_csv(l3_src_filepath)
         latest_timestamps = {stid: datetime.datetime(2023, 12, 6)}
-        now_timestamp = datetime.datetime(2023, 12, 20)
+        target_timestamp = datetime.datetime(2023, 12, 20)
 
         mapping = self.get_station_configuration_mapping(stid, wmo_id="04464")
         bufr_data = run_get_bufr(
             l3_data=l3_src,
-            now_timestamp=now_timestamp,
+            target_timestamp=target_timestamp,
             latest_timestamps=latest_timestamps,
             stid=stid,
             store_positions=True,
-            time_limit="91d",
+            linear_regression_time_limit="91d",
             station_configuration_mapping=mapping,
         )
         self.assertIsNone(bufr_data)
@@ -304,15 +301,15 @@ def test_invalid_value_at_last_index(self):
         # Set some of instantanous values to nan
         l3_src.loc[140:, "p_i"] = np.nan
         latest_timestamps = {stid: datetime.datetime(2023, 12, 1)}
-        now_timestamp = datetime.datetime(2023, 12, 8)
+        target_timestamp = datetime.datetime(2023, 12, 8)
         mapping = self.get_station_configuration_mapping(stid, wmo_id="04464")
         bufr_data = run_get_bufr(
             l3_data=l3_src,
-            now_timestamp=now_timestamp,
+            target_timestamp=target_timestamp,
             latest_timestamps=latest_timestamps,
             stid=stid,
             store_positions=True,
-            time_limit="91d",
+            linear_regression_time_limit="91d",
             station_configuration_mapping=mapping,
         )
         expected_bufr_variables = BUFRVariables(
@@ -337,6 +334,27 @@ def test_invalid_value_at_last_index(self):
             expected_bufr_variables.as_series(),
         )
 
+    def test_invalid_position_data(self):
+        stid = "DY2"
+        # Newest measurement in DY2_hour: 2023-12-07 23:00:00
+        l3_src_filepath = DATA_DIR.joinpath("tx_l3_test1.csv")
+        l3_src = pd.read_csv(l3_src_filepath)
+        # Set some of instantanous values to nan
+        l3_src.loc[:, "gps_lat"] = np.nan
+        latest_timestamps = {stid: datetime.datetime(2023, 12, 1)}
+        target_timestamp = datetime.datetime(2023, 12, 8)
+        mapping = self.get_station_configuration_mapping(stid, wmo_id="04464")
+        bufr_data = run_get_bufr(
+            l3_data=l3_src,
+            target_timestamp=target_timestamp,
+            latest_timestamps=latest_timestamps,
+            stid=stid,
+            store_positions=True,
+            linear_regression_time_limit="91d",
+            station_configuration_mapping=mapping,
+        )
+        self.assertIsNone(bufr_data)
+
     def test_multiple_last_valid_indices_all_instantaneous_timestamps_are_none(self):
         stid = "DY2"
         # Newest measurement in DY2_hour: 2023-12-07 23:00:00
@@ -354,15 +372,15 @@ def test_multiple_last_valid_indices_all_instantaneous_timestamps_are_none(self)
             ],
         ] = np.nan
         latest_timestamps = {stid: datetime.datetime(2023, 12, 1)}
-        now_timestamp = datetime.datetime(2023, 12, 6)
+        target_timestamp = datetime.datetime(2023, 12, 6)
         mapping = self.get_station_configuration_mapping(stid, wmo_id="04464")
         bufr_data = run_get_bufr(
             l3_data=l3_src,
-            now_timestamp=now_timestamp,
+            target_timestamp=target_timestamp,
             latest_timestamps=latest_timestamps,
             stid=stid,
             store_positions=True,
-            time_limit="91d",
+            linear_regression_time_limit="91d",
             station_configuration_mapping=mapping,
         )
 
@@ -376,16 +394,16 @@ def test_multiple_last_valid_indices_all_older_than_2days(self):
         # Set some of instantanous values to nan
         l3_src.loc[140:, "p_i"] = np.nan
         latest_timestamps = {stid: datetime.datetime(2023, 12, 1)}
-        now_timestamp = datetime.datetime(2023, 12, 10)
+        target_timestamp = datetime.datetime(2023, 12, 10)
 
         mapping = self.get_station_configuration_mapping(stid, wmo_id="04464")
         bufr_data = run_get_bufr(
             l3_data=l3_src,
-            now_timestamp=now_timestamp,
+            target_timestamp=target_timestamp,
             latest_timestamps=latest_timestamps,
             stid=stid,
             store_positions=True,
-            time_limit="91d",
+            linear_regression_time_limit="91d",
             station_configuration_mapping=mapping,
         )
         self.assertIsNone(bufr_data)
@@ -397,15 +415,15 @@ def test_min_data_wx_failed(self):
         stid = "DY2"
         # Newest measurement in DY2_hour: 2023-12-07 23:00:00
         latest_timestamps = {"DY2": datetime.datetime(2023, 12, 1)}
-        now_timestamp = datetime.datetime(2023, 12, 6)
+        target_timestamp = datetime.datetime(2023, 12, 6)
         mapping = self.get_station_configuration_mapping(stid, wmo_id="04464")
         bufr_data = run_get_bufr(
             l3_data=l3_src,
-            now_timestamp=now_timestamp,
+            target_timestamp=target_timestamp,
             latest_timestamps=latest_timestamps,
             stid=stid,
             store_positions=True,
-            time_limit="91d",
+            linear_regression_time_limit="91d",
             station_configuration_mapping=mapping,
         )
 
@@ -418,15 +436,15 @@ def test_min_data_pos_failed(self):
         stid = "DY2"
         # Newest measurement in DY2_hour: 2023-12-07 23:00:00
         latest_timestamps = {"DY2": datetime.datetime(2023, 12, 1)}
-        now_timestamp = datetime.datetime(2023, 12, 6)
+        target_timestamp = datetime.datetime(2023, 12, 6)
         mapping = self.get_station_configuration_mapping(stid, wmo_id="04464")
         bufr_data = run_get_bufr(
             l3_data=l3_src,
-            now_timestamp=now_timestamp,
+            target_timestamp=target_timestamp,
             latest_timestamps=latest_timestamps,
             stid=stid,
             store_positions=True,
-            time_limit="91d",
+            linear_regression_time_limit="91d",
             station_configuration_mapping=mapping,
         )
         self.assertIsNone(bufr_data)
@@ -438,7 +456,7 @@ def test_ignore_newer_data_than_now_input(self):
         # Newest measurement in DY2_hour: 2023-12-07 23:00:00
         latest_timestamps = {stid: datetime.datetime(2023, 12, 1)}
         # New is before the latest data
-        now_timestamp = datetime.datetime(
+        target_timestamp = datetime.datetime(
             2023,
             12,
             6,
@@ -446,17 +464,17 @@ def test_ignore_newer_data_than_now_input(self):
         mapping = self.get_station_configuration_mapping(stid, wmo_id="04464")
         bufr_data = run_get_bufr(
             l3_data=l3_src,
-            now_timestamp=now_timestamp,
+            target_timestamp=target_timestamp,
             latest_timestamps=latest_timestamps,
             stid=stid,
             store_positions=True,
-            time_limit="91d",
+            linear_regression_time_limit="91d",
             station_configuration_mapping=mapping,
         )
         expected_bufr_variables = BUFRVariables(
             wmo_id="04464",
             station_type="mobile",
-            # Newest measurement in tx_l3_test1.csv: 2023-12-07 23:00:00 but now_timestamp is 2023-12-06
+            # Newest measurement in tx_l3_test1.csv: 2023-12-07 23:00:00 but target_timestamp is 2023-12-06
             timestamp=datetime.datetime(2023, 12, 6, 0, 0),
             relativeHumidity=82,
             airTemperature=250.85,
@@ -481,17 +499,17 @@ def test_land_station_export(self):
         stid = "WEG_B"
         # Newest measurement in DY2_hour: 2023-12-07 23:00:00
         latest_timestamps = {"WEG_B": datetime.datetime(2023, 12, 1)}
-        now_timestamp = datetime.datetime(2023, 12, 8)
+        target_timestamp = datetime.datetime(2023, 12, 8)
         mapping = self.get_station_configuration_mapping(
             stid, wmo_id="460", station_type="land"
         )
         bufr_data = run_get_bufr(
             l3_data=l3_src,
-            now_timestamp=now_timestamp,
+            target_timestamp=target_timestamp,
             latest_timestamps=latest_timestamps,
             stid=stid,
             store_positions=True,
-            time_limit="91d",
+            linear_regression_time_limit="91d",
             station_configuration_mapping=mapping,
         )
         expected_bufr_variables = BUFRVariables(
diff --git a/tests/unit/bufr_export/test_realtime_utilitites.py b/tests/unit/bufr_export/test_realtime_utilitites.py
index 1acdb5b7..557ffe5c 100644
--- a/tests/unit/bufr_export/test_realtime_utilitites.py
+++ b/tests/unit/bufr_export/test_realtime_utilitites.py
@@ -173,3 +173,35 @@ def test_auxiliary_input_data(self):
         )
 
         self.assertEqual(expected_output, latest_data["auxiliary_data"])
+
+    def test_skipped_variables(self):
+        """
+        Test that the variables in vars_to_skip are set to nan if they are present in the input data.
+        """
+        data = self.get_data()
+        expected_output = pd.Series(
+            data={
+                "p_i": float("nan"),
+                "t_i": -16.7,
+                "rh_i": 84.6,
+                "wspd_i": 14.83,
+                "wdir_i": 142.2,
+                "gps_lat": 66.482469,
+                "gps_lon": -46.294232,
+                "gps_alt": 2116.0,
+                "z_boom_u": 4.1901,
+                "gps_lat_fit": 66.4824788,
+                "gps_lon_fit": -46.2942685,
+                "gps_alt_fit": 2121.4118,
+                "z_boom_u_smooth": 4.188,
+            },
+            name=datetime.datetime(2023, 12, 7, 6),
+        )
+
+        latest_data = get_latest_data(
+            df=data,
+            lin_reg_time_limit="1w",
+            vars_to_skip=["p_i", "a_non_existing_variable"],
+        )
+
+        pd.testing.assert_series_equal(latest_data, expected_output, rtol=1e-8)
diff --git a/tests/utilities.py b/tests/utilities.py
index 742f6861..a56301d4 100644
--- a/tests/utilities.py
+++ b/tests/utilities.py
@@ -13,7 +13,7 @@ def get_station_configuration(**kwargs) -> StationConfiguration:
 
     Parameters
     ----------
-    kwargs : dict
+    kwargs
         Keyword arguments to providie explicit values for the StationConfiguration object.
     Returns
     -------

From 805e6b365991389c20ed7e1c8d24ec793c7acf0a Mon Sep 17 00:00:00 2001
From: Mads Christian Lund <maclu@geus.dk>
Date: Wed, 7 Aug 2024 09:54:56 +0200
Subject: [PATCH 14/16] Minor cleanup

---
 src/pypromice/postprocess/get_bufr.py | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/src/pypromice/postprocess/get_bufr.py b/src/pypromice/postprocess/get_bufr.py
index c08b6b95..2da014d8 100644
--- a/src/pypromice/postprocess/get_bufr.py
+++ b/src/pypromice/postprocess/get_bufr.py
@@ -18,7 +18,7 @@
 import sys
 from datetime import datetime, timedelta
 from pathlib import Path
-from typing import List, Dict, Optional, Collection, Sequence, Mapping, BinaryIO
+from typing import List, Dict, Optional, Sequence, Mapping
 
 import numpy as np
 import pandas as pd
@@ -121,9 +121,6 @@ def get_bufr(
     if target_timestamp is None:
         target_timestamp = datetime.utcnow()
 
-    # if earliest_timestamp is None:
-    #     earliest_timestamp = now_timestamp - timedelta(days=2)
-
     # Prepare (latest) positions
     positions = dict()
     if positions_seed_path:
@@ -150,9 +147,6 @@ def get_bufr(
     # Setup diagnostic lists (logger.info at end)
     skipped = []
     no_recent_data = []
-    no_entry_latest_timestamps = []
-    failed_min_data_wx = []
-    failed_min_data_pos = []
 
     # Iterate through csv files
     for file_path in input_files:
@@ -186,6 +180,7 @@ def get_bufr(
             )
             if latest_data is None:
                 logger.info("No valid instantaneous timestamps!")
+                skipped.append(stid)
                 continue
 
             # Create station positions
@@ -209,6 +204,7 @@ def get_bufr(
                         write_bufr_message(bufr_variables, output_file)
             else:
                 logger.info(f"No new data {latest_data.name} <= {time_window_start}")
+                no_recent_data.append(stid)
 
         except Exception:
             logger.exception(f"Failed processing {stid}")
@@ -216,6 +212,7 @@ def get_bufr(
                 output_path.unlink()
             if break_on_error:
                 raise
+            skipped.append(stid)
             continue
 
     # Write the most recent timestamps back to the pickle on disk
@@ -235,12 +232,9 @@ def get_bufr(
         positions_df.to_csv(positions_filepath, index_label="stid")
 
     logger.info("--------------------------------")
-    not_processed_wx_pos = set(failed_min_data_wx + failed_min_data_pos)
     not_processed_count = (
         len(skipped)
         + len(no_recent_data)
-        + len(no_entry_latest_timestamps)
-        + len(not_processed_wx_pos)
     )
     logger.info(
         "BUFR exported for {} of {} fpaths.".format(
@@ -250,9 +244,6 @@ def get_bufr(
     logger.info("")
     logger.info("skipped: {}".format(skipped))
     logger.info("no_recent_data: {}".format(no_recent_data))
-    logger.info("no_entry_latest_timestamps: {}".format(no_entry_latest_timestamps))
-    logger.info("failed_min_data_wx: {}".format(failed_min_data_wx))
-    logger.info("failed_min_data_pos: {}".format(failed_min_data_pos))
     logger.info("--------------------------------")
 
 

From f94a7ff178e6c411d724f77cb0109a4a6bf1e322 Mon Sep 17 00:00:00 2001
From: Mads Christian Lund <maclu@geus.dk>
Date: Wed, 7 Aug 2024 14:36:43 +0200
Subject: [PATCH 15/16] Updated StationConfiguration IO to handle unknown
 attributes from input

---
 .../postprocess/create_bufr_files.py          |  7 ++-
 src/pypromice/postprocess/get_bufr.py         |  3 +-
 src/pypromice/station_configuration.py        | 25 +++++++--
 tests/unit/test_station_config.py             | 51 +++++++++++++++++++
 4 files changed, 81 insertions(+), 5 deletions(-)

diff --git a/src/pypromice/postprocess/create_bufr_files.py b/src/pypromice/postprocess/create_bufr_files.py
index 1b6b4b78..f542a8d0 100644
--- a/src/pypromice/postprocess/create_bufr_files.py
+++ b/src/pypromice/postprocess/create_bufr_files.py
@@ -28,11 +28,13 @@ def create_bufr_files(
     Generate hourly bufr files from the for all input files
 
     :param input_files: Paths to csv l3 hourly data files
+    :param station_configuration_root: Root directory containing station configuration toml files
     :param period_start: Datetime string for period start. Eg '2024-01-01T00:00' or '20240101
     :param period_end: Datetime string for period end
     :param output_root: Output dir for both bufr files for individual stations and compiled. Organized in two sub directories.
     :param override: If False: Skip a period if the compiled output file exists.
     :param break_on_error: If True: Stop processing if an error occurs
+    :param output_filename_suffix: Suffix for the compiled output file
     :return:
     """
     periods = pd.date_range(period_start, period_end, freq="H")
@@ -41,7 +43,10 @@ def create_bufr_files(
     output_individual_root.mkdir(parents=True, exist_ok=True)
     output_compiled_root.mkdir(parents=True, exist_ok=True)
 
-    station_configuration_mapping = load_station_configuration_mapping(station_configuration_root)
+    station_configuration_mapping = load_station_configuration_mapping(
+        station_configuration_root,
+        skip_unexpected_fields=True,
+    )
 
     for period in periods:
         period: pd.Timestamp
diff --git a/src/pypromice/postprocess/get_bufr.py b/src/pypromice/postprocess/get_bufr.py
index 2da014d8..48efa656 100644
--- a/src/pypromice/postprocess/get_bufr.py
+++ b/src/pypromice/postprocess/get_bufr.py
@@ -462,7 +462,8 @@ def main():
             input_files += map(Path, glob.glob(path.as_posix()))
 
     station_configuration_mapping = load_station_configuration_mapping(
-        args.station_configurations_root
+        args.station_configurations_root,
+        skip_unexpected_fields=True,
     )
 
     get_bufr(
diff --git a/src/pypromice/station_configuration.py b/src/pypromice/station_configuration.py
index fb8d5439..4ec4baec 100644
--- a/src/pypromice/station_configuration.py
+++ b/src/pypromice/station_configuration.py
@@ -1,3 +1,4 @@
+import logging
 from pathlib import Path
 from typing import Optional, Dict, Mapping, Sequence
 
@@ -30,6 +31,7 @@ class StationConfiguration:
     sonic_ranger_from_gps: Optional[float] = None
     static_height_of_gps_from_mean_sea_level: Optional[float] = None
     station_relocation: Sequence[str] = attrs.field(factory=list)
+    location_type: Optional[str] = None
 
     # The station data will be exported to BUFR if True. Otherwise, it will only export latest position
     export_bufr: bool = False
@@ -45,8 +47,22 @@ class StationConfiguration:
     positions_update_timestamp_only: bool = False
 
     @classmethod
-    def load_toml(cls, path):
-        return cls(**toml.load(path))
+    def load_toml(cls, path, skip_unexpected_fields=False):
+        config_fields = {field.name for field in attrs.fields(cls)}
+        input_dict = toml.load(path)
+        unexpected_fields = set(input_dict.keys()) - config_fields
+        if unexpected_fields:
+            if skip_unexpected_fields:
+                logging.info(
+                    f"Skipping unexpected fields in toml file {path}: "
+                    + ", ".join(unexpected_fields)
+                )
+                for field in unexpected_fields:
+                    input_dict.pop(field)
+            else:
+                raise ValueError(f"Unexpected fields: {unexpected_fields}")
+
+        return cls(**input_dict)
 
     def dump_toml(self, path: Path):
         with path.open("w") as fp:
@@ -58,6 +74,7 @@ def as_dict(self) -> Dict:
 
 def load_station_configuration_mapping(
     configurations_root_dir: Path,
+    **kwargs,
 ) -> Mapping[str, StationConfiguration]:
     """
     Load station configurations from toml files in configurations_root_dir
@@ -66,6 +83,8 @@ def load_station_configuration_mapping(
     ----------
     configurations_root_dir
         Root directory containing toml files
+    kwargs
+        Additional arguments to pass to StationConfiguration.load_toml
 
     Returns
     -------
@@ -73,7 +92,7 @@ def load_station_configuration_mapping(
 
     """
     return {
-        config_file.stem: StationConfiguration(**toml.load(config_file))
+        config_file.stem: StationConfiguration.load_toml(config_file, **kwargs)
         for config_file in configurations_root_dir.glob("*.toml")
     }
 
diff --git a/tests/unit/test_station_config.py b/tests/unit/test_station_config.py
index a2b117fd..4788d019 100644
--- a/tests/unit/test_station_config.py
+++ b/tests/unit/test_station_config.py
@@ -55,6 +55,57 @@ def test_read_toml(self):
                 station_configuration,
             )
 
+    def test_read_toml_with_unexpected_field(self):
+        with TemporaryDirectory() as temp_dir:
+            source_path = Path(temp_dir) / "UPE_L.toml"
+            source_str = """
+                stid = "UPE_L"
+                station_site = "UPE_L"
+                project = "Promice"
+                station_type = "mobile"
+                wmo_id = "04423"
+                barometer_from_gps = -0.25
+                anemometer_from_sonic_ranger = 0.4
+                temperature_from_sonic_ranger = 0.0
+                height_of_gps_from_station_ground = 0.9
+                sonic_ranger_from_gps = 1.3
+                export_bufr = true
+                skipped_variables = []
+                positions_update_timestamp_only = false
+                an_unexpected_field = 42
+            """
+            with source_path.open("w") as source_io:
+                source_io.writelines(source_str)
+
+            expected_configuration = StationConfiguration(
+                stid="UPE_L",
+                station_site="UPE_L",
+                project="Promice",
+                station_type="mobile",
+                wmo_id="04423",
+                barometer_from_gps=-0.25,
+                anemometer_from_sonic_ranger=0.4,
+                temperature_from_sonic_ranger=0.0,
+                height_of_gps_from_station_ground=0.9,
+                sonic_ranger_from_gps=1.3,
+                export_bufr=True,
+                comment=None,
+                skipped_variables=[],
+                positions_update_timestamp_only=False,
+            )
+
+            with self.assertRaises(ValueError):
+                StationConfiguration.load_toml(source_path)
+
+            station_configuration = StationConfiguration.load_toml(source_path, skip_unexpected_fields=True)
+
+            self.assertEqual(
+                expected_configuration,
+                station_configuration,
+            )
+
+
+
     def test_write_read(self):
         with TemporaryDirectory() as temp_dir:
             output_path = Path(temp_dir) / "UPE_L.toml"

From 99659fa6c931b136a5209e2517fba615dd991c95 Mon Sep 17 00:00:00 2001
From: Mads Christian Lund <maclu@geus.dk>
Date: Mon, 12 Aug 2024 15:55:57 +0200
Subject: [PATCH 16/16] Updated docstring in create_bufr_files.py

---
 .../postprocess/create_bufr_files.py          | 34 ++++++++++++-------
 src/pypromice/postprocess/get_bufr.py         |  5 +--
 2 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/src/pypromice/postprocess/create_bufr_files.py b/src/pypromice/postprocess/create_bufr_files.py
index f542a8d0..a6cb7842 100644
--- a/src/pypromice/postprocess/create_bufr_files.py
+++ b/src/pypromice/postprocess/create_bufr_files.py
@@ -27,15 +27,25 @@ def create_bufr_files(
     """
     Generate hourly bufr files from the for all input files
 
-    :param input_files: Paths to csv l3 hourly data files
-    :param station_configuration_root: Root directory containing station configuration toml files
-    :param period_start: Datetime string for period start. Eg '2024-01-01T00:00' or '20240101
-    :param period_end: Datetime string for period end
-    :param output_root: Output dir for both bufr files for individual stations and compiled. Organized in two sub directories.
-    :param override: If False: Skip a period if the compiled output file exists.
-    :param break_on_error: If True: Stop processing if an error occurs
-    :param output_filename_suffix: Suffix for the compiled output file
-    :return:
+    Parameters
+    ----------
+    input_files
+        Paths to csv l3 hourly data files
+    station_configuration_root
+        Root directory containing station configuration toml files
+    period_start
+        Datetime string for period start. Eg '2024-01-01T00:00' or '20240101
+    period_end
+        Datetime string for period end
+    output_root
+        Output dir for both bufr files for individual stations and compiled. Organized in two sub directories.
+    override
+        If False: Skip a period if the compiled output file exists.
+    break_on_error
+        If True: Stop processing if an error occurs
+    output_filename_suffix
+        Suffix for the compiled output file
+
     """
     periods = pd.date_range(period_start, period_end, freq="H")
     output_individual_root = output_root / "individual"
@@ -82,6 +92,7 @@ def create_bufr_files(
 
 # %%
 
+
 def main():
     import argparse
     import glob
@@ -101,9 +112,7 @@ def main():
     main_logger.addHandler(main_handler)
     main_logger.setLevel(logging.INFO)
 
-    parser = argparse.ArgumentParser(
-        "Create BUFR files from L3 tx .csv files."
-    )
+    parser = argparse.ArgumentParser("Create BUFR files from L3 tx .csv files.")
     parser.add_argument(
         "--input_files",
         "--l3-filepath",
@@ -164,5 +173,6 @@ def main():
         station_configuration_root=args.station_configuration_root,
     )
 
+
 if __name__ == "__main__":
     main()
diff --git a/src/pypromice/postprocess/get_bufr.py b/src/pypromice/postprocess/get_bufr.py
index 48efa656..c59553e6 100644
--- a/src/pypromice/postprocess/get_bufr.py
+++ b/src/pypromice/postprocess/get_bufr.py
@@ -232,10 +232,7 @@ def get_bufr(
         positions_df.to_csv(positions_filepath, index_label="stid")
 
     logger.info("--------------------------------")
-    not_processed_count = (
-        len(skipped)
-        + len(no_recent_data)
-    )
+    not_processed_count = len(skipped) + len(no_recent_data)
     logger.info(
         "BUFR exported for {} of {} fpaths.".format(
             (len(input_files) - not_processed_count), len(input_files)