diff --git a/book/chapters/amsr.ipynb b/book/chapters/amsr.ipynb index 635c628..1be1c62 100644 --- a/book/chapters/amsr.ipynb +++ b/book/chapters/amsr.ipynb @@ -1,5 +1,93 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "aab13cdf", + "metadata": {}, + "source": [ + "## Utility Functions for Data Processing and Analysis Pipeline\n", + "\n", + "### 1. Library Imports and Setup for Snow Data\n", + "\n", + "- `KDTree`: From `scipy.spatial`, used for performing efficient nearest-neighbor searches in spatial datasets.\n", + "- `plot_all_variables_in_one_csv`: A custom function from `convert_results_to_images`, used for visualizing processed data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e6d98378", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import h5py\n", + "import subprocess\n", + "import pandas as pd\n", + "import numpy as np\n", + "from datetime import datetime\n", + "from scipy.spatial import KDTree\n", + "import time\n", + "from datetime import datetime, timedelta, date\n", + "import warnings\n", + "import sys\n", + "# from convert_results_to_images import plot_all_variables_in_one_c\n", + "\n", + "homedir = os.path.expanduser('~')\n", + "work_dir = \"../data/gridmet_test_run\"\n", + "test_start_date = \"2024-07-18\"\n", + "western_us_coords = \"../data/dem_file.tif.csv\"" + ] + }, + { + "cell_type": "markdown", + "id": "2550b08d", + "metadata": {}, + "source": [ + "### 2. Identifying Binary Files\n", + "\n", + "Here we determine whether a given file is a binary file or a text file.\n", + "\n", + "- We attempt to open the file in binary mode (`'rb'`) and read a chunk of bytes (1024 bytes).\n", + "- And the we check for null bytes (`b'\\x00'`), which are common in binary files. If a null byte is found, then it is binary file.\n", + "- Next, we check for a high percentage of non-printable ASCII characters by converting the byte chunk to characters and filtering out non-printable ones. If the chunk has no printable characters, the file is considered binary.\n", + "- If neither of the above conditions are met, the function assumes the file is a text file." + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "7004281e", + "metadata": {}, + "outputs": [], + "source": [ + "def is_binary(file_path):\n", + " try:\n", + " with open(file_path, 'rb') as file:\n", + " # Read a chunk of bytes from the file\n", + " chunk = file.read(1024)\n", + "\n", + " # Check for null bytes, a common indicator of binary data\n", + " if b'\\x00' in chunk:\n", + " return True\n", + "\n", + " # Check for a high percentage of non-printable ASCII characters\n", + " text_characters = \"\".join(chr(byte) for byte in chunk if 32 <= byte <= 126)\n", + " if not text_characters:\n", + " return True\n", + "\n", + " # If none of the binary indicators are found, assume it's a text file\n", + " return False\n", + "\n", + " except FileNotFoundError:\n", + " print(f\"File '{file_path}' not found.\")\n", + " return False\n", + " except Exception as e:\n", + " print(f\"An error occurred: {e}\")\n", + " return False\n", + " " + ] + }, { "cell_type": "markdown", "id": "05b77e80", @@ -1507,94 +1595,6 @@ " # Copy the file to the destination directory\n", " shutil.copy(source_file_path, destination_dir)" ] - }, - { - "cell_type": "markdown", - "id": "aab13cdf", - "metadata": {}, - "source": [ - "## Utility Functions for Data Processing and Analysis Pipeline\n", - "\n", - "### 1. Library Imports and Setup for Snow Data\n", - "\n", - "- `KDTree`: From `scipy.spatial`, used for performing efficient nearest-neighbor searches in spatial datasets.\n", - "- `plot_all_variables_in_one_csv`: A custom function from `convert_results_to_images`, used for visualizing processed data." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e6d98378", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import h5py\n", - "import subprocess\n", - "import pandas as pd\n", - "import numpy as np\n", - "from datetime import datetime\n", - "from scipy.spatial import KDTree\n", - "import time\n", - "from datetime import datetime, timedelta, date\n", - "import warnings\n", - "import sys\n", - "# from convert_results_to_images import plot_all_variables_in_one_c\n", - "\n", - "homedir = os.path.expanduser('~')\n", - "work_dir = \"../data/gridmet_test_run\"\n", - "test_start_date = \"2024-07-18\"\n", - "western_us_coords = \"../data/dem_file.tif.csv\"" - ] - }, - { - "cell_type": "markdown", - "id": "2550b08d", - "metadata": {}, - "source": [ - "### 2. Identifying Binary Files\n", - "\n", - "Here we determine whether a given file is a binary file or a text file.\n", - "\n", - "- We attempt to open the file in binary mode (`'rb'`) and read a chunk of bytes (1024 bytes).\n", - "- And the we check for null bytes (`b'\\x00'`), which are common in binary files. If a null byte is found, then it is binary file.\n", - "- Next, we check for a high percentage of non-printable ASCII characters by converting the byte chunk to characters and filtering out non-printable ones. If the chunk has no printable characters, the file is considered binary.\n", - "- If neither of the above conditions are met, the function assumes the file is a text file." - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "id": "7004281e", - "metadata": {}, - "outputs": [], - "source": [ - "def is_binary(file_path):\n", - " try:\n", - " with open(file_path, 'rb') as file:\n", - " # Read a chunk of bytes from the file\n", - " chunk = file.read(1024)\n", - "\n", - " # Check for null bytes, a common indicator of binary data\n", - " if b'\\x00' in chunk:\n", - " return True\n", - "\n", - " # Check for a high percentage of non-printable ASCII characters\n", - " text_characters = \"\".join(chr(byte) for byte in chunk if 32 <= byte <= 126)\n", - " if not text_characters:\n", - " return True\n", - "\n", - " # If none of the binary indicators are found, assume it's a text file\n", - " return False\n", - "\n", - " except FileNotFoundError:\n", - " print(f\"File '{file_path}' not found.\")\n", - " return False\n", - " except Exception as e:\n", - " print(f\"An error occurred: {e}\")\n", - " return False\n", - " " - ] } ], "metadata": { diff --git a/book/chapters/fsCA.ipynb b/book/chapters/fsCA.ipynb index 2fff4b7..f626e96 100644 --- a/book/chapters/fsCA.ipynb +++ b/book/chapters/fsCA.ipynb @@ -79,6 +79,28 @@ "create an account in urs.earthdata.nasa.gov for earth access" ] }, + { + "cell_type": "code", + "execution_count": 2, + "id": "377eb1f1", + "metadata": {}, + "outputs": [], + "source": [ + "work_dir = '../data/fsca'\n", + "def date_to_julian(date_str):\n", + " \"\"\"\n", + " Convert a date to Julian date.\n", + " \"\"\"\n", + " date_object = datetime.strptime(date_str, \"%Y-%m-%d\")\n", + " tt = date_object.timetuple()\n", + " \n", + "\n", + " # Format the result as 'YYYYDDD'\n", + " julian_format = str('%d%03d' % (tt.tm_year, tt.tm_yday))\n", + "\n", + " return julian_format" + ] + }, { "cell_type": "code", "execution_count": null, @@ -336,28 +358,6 @@ "## 3.5.6 Utility Functions" ] }, - { - "cell_type": "code", - "execution_count": 2, - "id": "377eb1f1", - "metadata": {}, - "outputs": [], - "source": [ - "work_dir = '../data/fsca'\n", - "def date_to_julian(date_str):\n", - " \"\"\"\n", - " Convert a date to Julian date.\n", - " \"\"\"\n", - " date_object = datetime.strptime(date_str, \"%Y-%m-%d\")\n", - " tt = date_object.timetuple()\n", - " \n", - "\n", - " # Format the result as 'YYYYDDD'\n", - " julian_format = str('%d%03d' % (tt.tm_year, tt.tm_yday))\n", - "\n", - " return julian_format" - ] - }, { "cell_type": "code", "execution_count": 12, diff --git a/book/chapters/gridmet.ipynb b/book/chapters/gridmet.ipynb index 3bb1811..84e667f 100644 --- a/book/chapters/gridmet.ipynb +++ b/book/chapters/gridmet.ipynb @@ -12,6 +12,54 @@ "GridMET is a high-resolution dataset that provides daily surface meteorological data for the contiguous United States" ] }, + { + "cell_type": "markdown", + "id": "524ca247", + "metadata": {}, + "source": [ + "## Calculate specific Operation Day\n", + "\n", + "Here we calculate the date exactly three days before the current date and returns it as a formatted string.\n", + "\n", + "- `current_date`: A `datetime` object representing the current date and time.\n", + "- `three_days_ago`: A `datetime` object representing the date three days before the current date.\n", + "- `three_days_ago_string`: A string representing the date three days ago, formatted as \"YYYY-MM-DD\".\n", + "- `test_start_date`: A string that stores the returned value from `get_operation_day()`, representing the operation day used in other parts of the code." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "29df8904", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-08-16\n" + ] + } + ], + "source": [ + "from datetime import datetime, timedelta\n", + "def get_operation_day():\n", + " # Get the current date and time\n", + " current_date = datetime.now()\n", + "\n", + " # Calculate three days ago\n", + " three_days_ago = current_date - timedelta(days=3)\n", + "\n", + " # Format the date as a string\n", + " three_days_ago_string = three_days_ago.strftime(\"%Y-%m-%d\")\n", + "\n", + " print(three_days_ago_string)\n", + "\n", + " return three_days_ago_string\n", + "\n", + "test_start_date = get_operation_day()" + ] + }, { "cell_type": "markdown", "id": "4aac33f5", @@ -1340,54 +1388,6 @@ " return (abs(array - value)).argmin()" ] }, - { - "cell_type": "markdown", - "id": "524ca247", - "metadata": {}, - "source": [ - "## 11. Calculate specific Operation Day\n", - "\n", - "Here we calculate the date exactly three days before the current date and returns it as a formatted string.\n", - "\n", - "- `current_date`: A `datetime` object representing the current date and time.\n", - "- `three_days_ago`: A `datetime` object representing the date three days before the current date.\n", - "- `three_days_ago_string`: A string representing the date three days ago, formatted as \"YYYY-MM-DD\".\n", - "- `test_start_date`: A string that stores the returned value from `get_operation_day()`, representing the operation day used in other parts of the code." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "29df8904", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2024-08-16\n" - ] - } - ], - "source": [ - "from datetime import datetime, timedelta\n", - "def get_operation_day():\n", - " # Get the current date and time\n", - " current_date = datetime.now()\n", - "\n", - " # Calculate three days ago\n", - " three_days_ago = current_date - timedelta(days=3)\n", - "\n", - " # Format the date as a string\n", - " three_days_ago_string = three_days_ago.strftime(\"%Y-%m-%d\")\n", - "\n", - " print(three_days_ago_string)\n", - "\n", - " return three_days_ago_string\n", - "\n", - "test_start_date = get_operation_day()" - ] - }, { "cell_type": "markdown", "id": "ea2c5e26",