Merge pull request #24 from LCOGT/update/util-organization-comments

split util into s3_utils and file_utils, updated imports, comment changes
LCOGT · Aug 21, 2024 · 5eb56fc · 5eb56fc
2 parents 4737230 + c1a6866
commit 5eb56fc
Show file tree

Hide file tree

Showing 8 changed files with 126 additions and 117 deletions.
diff --git a/datalab/datalab_session/analysis/get_tif.py b/datalab/datalab_session/analysis/get_tif.py
@@ -1,4 +1,5 @@
-from datalab.datalab_session.util import create_tif, get_fits, add_file_to_bucket, key_exists, get_s3_url
+from datalab.datalab_session.file_utils import create_tif, get_fits
+from datalab.datalab_session.s3_utils import key_exists, add_file_to_bucket, get_s3_url
 
 def get_tif(input: dict):
   """

diff --git a/datalab/datalab_session/analysis/line_profile.py b/datalab/datalab_session/analysis/line_profile.py
@@ -2,8 +2,7 @@
 from astropy.wcs import WCS
 from astropy.wcs import WcsError
 
-from datalab.datalab_session.util import scale_points
-from datalab.datalab_session.util import get_hdu
+from datalab.datalab_session.file_utils import scale_points, get_hdu
 
 # For creating an array of brightness along a user drawn line
 def line_profile(input: dict):

diff --git a/datalab/datalab_session/analysis/source_catalog.py b/datalab/datalab_session/analysis/source_catalog.py
@@ -1,7 +1,6 @@
 import numpy as np
 
-from datalab.datalab_session.util import get_hdu, scale_points
-
+from datalab.datalab_session.file_utils import get_hdu, scale_points
 
 def source_catalog(input: dict):
   """

diff --git a/datalab/datalab_session/data_operations/data_operation.py b/datalab/datalab_session/data_operations/data_operation.py
@@ -6,7 +6,7 @@
 import numpy as np
 
 from datalab.datalab_session.tasks import execute_data_operation
-from datalab.datalab_session.util import get_hdu
+from datalab.datalab_session.file_utils import get_hdu
 
 CACHE_DURATION = 60 * 60 * 24 * 30  # cache for 30 days
 

diff --git a/datalab/datalab_session/data_operations/median.py b/datalab/datalab_session/data_operations/median.py
@@ -3,7 +3,8 @@
 import numpy as np
 
 from datalab.datalab_session.data_operations.data_operation import BaseDataOperation
-from datalab.datalab_session.util import create_fits, stack_arrays, create_jpgs, save_fits_and_thumbnails
+from datalab.datalab_session.file_utils import create_fits, stack_arrays, create_jpgs
+from datalab.datalab_session.s3_utils import save_fits_and_thumbnails
 
 log = logging.getLogger()
 log.setLevel(logging.INFO)

diff --git a/datalab/datalab_session/data_operations/rgb_stack.py b/datalab/datalab_session/data_operations/rgb_stack.py
@@ -3,7 +3,8 @@
 from astropy.io import fits
 
 from datalab.datalab_session.data_operations.data_operation import BaseDataOperation
-from datalab.datalab_session.util import get_fits, stack_arrays, create_fits, save_fits_and_thumbnails, create_jpgs
+from datalab.datalab_session.file_utils import get_fits, stack_arrays, create_fits, create_jpgs
+from datalab.datalab_session.s3_utils import save_fits_and_thumbnails
 
 log = logging.getLogger()
 log.setLevel(logging.INFO)

diff --git a/datalab/datalab_session/file_utils.py b/datalab/datalab_session/file_utils.py
@@ -0,0 +1,114 @@
+import tempfile
+import logging
+
+from astropy.io import fits
+import numpy as np
+from fits2image.conversions import fits_to_jpg, fits_to_tif
+
+from datalab.datalab_session.s3_utils import get_fits, add_file_to_bucket
+
+log = logging.getLogger()
+log.setLevel(logging.INFO)
+
+def get_hdu(basename: str, extension: str = 'SCI', source: str = 'archive') -> list[fits.HDUList]:
+  """
+  Returns a HDU for the given basename from the source
+  Will download the file to a tmp directory so future calls can open it directly
+  Warning: this function returns an opened file that must be closed after use
+  """
+
+  basename_file_path = get_fits(basename, source)
+
+  hdu = fits.open(basename_file_path)
+  try:
+    extension = hdu[extension]
+  except KeyError:
+    raise KeyError(f"{extension} Header not found in fits file {basename}")
+
+  return extension
+
+def get_fits_dimensions(fits_file, extension: str = 'SCI') -> tuple:
+  return fits.open(fits_file)[extension].shape
+
+def create_fits(key: str, image_arr: np.ndarray) -> str:
+  """
+  Creates a fits file with the given key and image array
+  Returns the the path to the fits_file
+  """
+
+  header = fits.Header([('KEY', key)])
+  primary_hdu = fits.PrimaryHDU(header=header)
+  image_hdu = fits.ImageHDU(data=image_arr, name='SCI')
+
+  hdu_list = fits.HDUList([primary_hdu, image_hdu])
+  fits_path = tempfile.NamedTemporaryFile(suffix=f'{key}.fits').name
+  hdu_list.writeto(fits_path)
+
+  return fits_path
+
+def create_tif(key: str, fits_path: np.ndarray) -> str:
+  """
+    Creates a full sized TIFF file from a FITs
+    Returns the path to the TIFF file
+  """
+  height, width = get_fits_dimensions(fits_path)
+  tif_path = tempfile.NamedTemporaryFile(suffix=f'{key}.tif').name
+  fits_to_tif(fits_path, tif_path, width=width, height=height)
+
+  return tif_path
+
+def create_jpgs(cache_key, fits_paths: str, color=False) -> list:
+    """
+    Create jpgs from fits files and save them to S3
+    If using the color option fits_paths need to be in order R, G, B
+    percent and cur_percent are used to update the progress of the operation
+    """
+
+    if not isinstance(fits_paths, list):
+        fits_paths = [fits_paths]
+
+    # create the jpgs from the fits files
+    large_jpg_path      = tempfile.NamedTemporaryFile(suffix=f'{cache_key}-large.jpg').name
+    thumbnail_jpg_path  = tempfile.NamedTemporaryFile(suffix=f'{cache_key}-small.jpg').name
+
+    max_height, max_width = max(get_fits_dimensions(path) for path in fits_paths)
+
+    fits_to_jpg(fits_paths, large_jpg_path, width=max_width, height=max_height, color=color)
+    fits_to_jpg(fits_paths, thumbnail_jpg_path, color=color)
+
+    return large_jpg_path, thumbnail_jpg_path
+
+def stack_arrays(array_list: list):
+  """
+  Takes a list of numpy arrays, crops them to an equal shape, and stacks them to be a 3d numpy array
+
+  """
+  min_shape = min(arr.shape for arr in array_list)
+  cropped_data_list = [arr[:min_shape[0], :min_shape[1]] for arr in array_list]
+
+  stacked = np.stack(cropped_data_list, axis=2)
+
+  return stacked
+
+def scale_points(height_1: int, width_1: int, height_2: int, width_2: int, x_points=[], y_points=[], flip_y = False, flip_x = False):
+  """
+    Scales x_points and y_points from img_1 height and width to img_2 height and width
+    Optionally flips the points on the x or y axis
+  """
+  if any([dim == 0 for dim in [height_1, width_1, height_2, width_2]]):
+    raise ValueError("height and width must be non-zero")
+
+  # normalize the points to be lists in case tuples or other are passed
+  x_points = np.array(x_points)
+  y_points = np.array(y_points)
+
+  x_points = (x_points / width_1 * width_2).astype(int)
+  y_points = (y_points / height_1 * height_2).astype(int)
+
+  if flip_y:
+    y_points = height_2 - y_points
+
+  if flip_x:
+    x_points = width_2 - x_points
+
+  return x_points, y_points
diff --git a/datalab/datalab_session/util.py → datalab/datalab_session/s3_utils.py b/datalab/datalab_session/util.py → datalab/datalab_session/s3_utils.py
@@ -1,16 +1,12 @@
-import tempfile
-import requests
 import logging
+import requests
 import os
 import urllib.request
 
 import boto3
-from astropy.io import fits
-import numpy as np
 from botocore.exceptions import ClientError
 
 from django.conf import settings
-from fits2image.conversions import fits_to_jpg, fits_to_tif
 
 log = logging.getLogger()
 log.setLevel(logging.INFO)
@@ -115,7 +111,7 @@ def get_archive_url(basename: str, archive: str = settings.ARCHIVE_API) -> dict:
 
 def get_fits(basename: str, source: str = 'archive'):
   """
-  Returns a Fits File for the given basename from the source
+  Returns a Fits File for the given basename from the source bucket
   """
   basename = basename.replace('-large', '').replace('-small', '')
   basename_file_path = os.path.join(settings.TEMP_FITS_DIR, basename)
@@ -140,76 +136,9 @@ def get_fits(basename: str, source: str = 'archive'):
 
   return basename_file_path
 
-def get_hdu(basename: str, extension: str = 'SCI', source: str = 'archive') -> list[fits.HDUList]:
-  """
-  Returns a HDU for the given basename from the source
-  Will download the file to a tmp directory so future calls can open it directly
-  Warning: this function returns an opened file that must be closed after use
-  """
-
-  basename_file_path = get_fits(basename, source)
-
-  hdu = fits.open(basename_file_path)
-  try:
-    extension = hdu[extension]
-  except KeyError:
-    raise KeyError(f"{extension} Header not found in fits file {basename}")
-
-  return extension
-
-def get_fits_dimensions(fits_file, extension: str = 'SCI') -> tuple:
-  return fits.open(fits_file)[extension].shape
-
-def create_fits(key: str, image_arr: np.ndarray) -> str:
-  """
-  Creates a fits file with the given key and image array
-  Returns the the path to the fits_file
-  """
-
-  header = fits.Header([('KEY', key)])
-  primary_hdu = fits.PrimaryHDU(header=header)
-  image_hdu = fits.ImageHDU(data=image_arr, name='SCI')
-
-  hdu_list = fits.HDUList([primary_hdu, image_hdu])
-  fits_path = tempfile.NamedTemporaryFile(suffix=f'{key}.fits').name
-  hdu_list.writeto(fits_path)
-
-  return fits_path
-
-def create_tif(key: str, fits_path: np.ndarray) -> str:
-  """
-    Creates a full sized TIFF file from a FITs
-  """
-  height, width = get_fits_dimensions(fits_path)
-  tif_path = tempfile.NamedTemporaryFile(suffix=f'{key}.tif').name
-  fits_to_tif(fits_path, tif_path, width=width, height=height)
-
-  return tif_path
-
-def create_jpgs(cache_key, fits_paths: str, color=False) -> list:
-    """
-    Create jpgs from fits files and save them to S3
-    If using the color option fits_paths need to be in order R, G, B
-    percent and cur_percent are used to update the progress of the operation
-    """
-
-    if not isinstance(fits_paths, list):
-        fits_paths = [fits_paths]
-
-    # create the jpgs from the fits files
-    large_jpg_path      = tempfile.NamedTemporaryFile(suffix=f'{cache_key}-large.jpg').name
-    thumbnail_jpg_path  = tempfile.NamedTemporaryFile(suffix=f'{cache_key}-small.jpg').name
-
-    max_height, max_width = max(get_fits_dimensions(path) for path in fits_paths)
-
-    fits_to_jpg(fits_paths, large_jpg_path, width=max_width, height=max_height, color=color)
-    fits_to_jpg(fits_paths, thumbnail_jpg_path, color=color)
-
-    return large_jpg_path, thumbnail_jpg_path
-
 def save_fits_and_thumbnails(cache_key, fits_path, large_jpg_path, thumbnail_jpg_path, index=None):
     """
-    Save Fits and Thumbnails in S3 Buckets, Returns the URLs in an output object
+    Save Fits and Thumbnails in S3 Buckets, Returns the URLs in an data operation output ready object
     """
     bucket_key = f'{cache_key}/{cache_key}-{index}' if index else f'{cache_key}/{cache_key}'
 
@@ -226,38 +155,3 @@ def save_fits_and_thumbnails(cache_key, fits_path, large_jpg_path, thumbnail_jpg
     )
 
     return output_file
-
-def stack_arrays(array_list: list):
-  """
-  Takes a list of numpy arrays, crops them to an equal shape, and stacks them to be a 3d numpy array
-
-  """
-  min_shape = min(arr.shape for arr in array_list)
-  cropped_data_list = [arr[:min_shape[0], :min_shape[1]] for arr in array_list]
-
-  stacked = np.stack(cropped_data_list, axis=2)
-
-  return stacked
-
-def scale_points(height_1: int, width_1: int, height_2: int, width_2: int, x_points=[], y_points=[], flip_y = False, flip_x = False):
-  """
-    Scales x_points and y_points from img_1 height and width to img_2 height and width
-    Optionally flips the points on the x or y axis
-  """
-  if any([dim == 0 for dim in [height_1, width_1, height_2, width_2]]):
-    raise ValueError("height and width must be non-zero")
-
-  # normalize the points to be lists in case tuples or other are passed
-  x_points = np.array(x_points)
-  y_points = np.array(y_points)
-
-  x_points = (x_points / width_1 * width_2).astype(int)
-  y_points = (y_points / height_1 * height_2).astype(int)
-
-  if flip_y:
-    y_points = height_2 - y_points
-
-  if flip_x:
-    x_points = width_2 - x_points
-
-  return x_points, y_points