diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml new file mode 100644 index 0000000..54fc11d --- /dev/null +++ b/.github/workflows/linter.yml @@ -0,0 +1,36 @@ +name: PEP8 + +on: + push: + branches: + - main + pull_request: + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.11] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install ruff + - name: Lint + run: | + ruff check --statistics *.py + ruff check --statistics apps/ + ruff check --ignore D205 tests/ + - name: Format + run: | + ruff format --check *.py + ruff format --check apps/ + ruff format --check tests/ diff --git a/.github/workflows/run_test.yml b/.github/workflows/run_test.yml new file mode 100644 index 0000000..c1957ee --- /dev/null +++ b/.github/workflows/run_test.yml @@ -0,0 +1,27 @@ +name: Sentinel + +on: + push: + branches: + - main + pull_request: + +jobs: + test-suite: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.11] + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install Python dependencies + run: | + pip install --upgrade pip setuptools wheel + pip install -r requirements.txt + - name: Run test suites + run: | + ./run_tests.sh --url ${{ secrets.APIURLDEV }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bee8a64 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__ diff --git a/README.md b/README.md index e331b37..a5a28b4 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,11 @@ -# fink-object-api -Fink REST API to access object data stored in HBase +# Fink object API + +This repository contains the code source of the Fink REST API used to access object data stored in tables in Apache HBase. + +## Installation + +## Deployment + +## Tests + +## Adding a new route diff --git a/app.py b/app.py new file mode 100644 index 0000000..f1b9c87 --- /dev/null +++ b/app.py @@ -0,0 +1,35 @@ +# Copyright 2024 AstroLab Software +# Author: Julien Peloton +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from flask import Flask + +from apps.utils.utils import extract_configuration + +from apps.routes.objects.api import bp as bp_objects +from apps.routes.cutouts.api import bp as bp_cutouts + +config = extract_configuration("config.yml") + +app = Flask("Fink REST API") + +# Server configuration +app.config["MAX_CONTENT_LENGTH"] = 100 * 1024 * 1024 +app.config["JSONIFY_PRETTYPRINT_REGULAR"] = True +app.config["JSON_SORT_KEYS"] = False + +app.register_blueprint(bp_objects) +app.register_blueprint(bp_cutouts) + +if __name__ == "__main__": + app.run(config["HOST"], debug=True, port=int(config["PORT"])) diff --git a/apps/__init__.py b/apps/__init__.py new file mode 100644 index 0000000..20f0f46 --- /dev/null +++ b/apps/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2024 AstroLab Software +# Author: Julien Peloton +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +__version__ = "0.1" diff --git a/apps/routes/cutouts/__init__.py b/apps/routes/cutouts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/apps/routes/cutouts/api.py b/apps/routes/cutouts/api.py new file mode 100644 index 0000000..6e4c0ea --- /dev/null +++ b/apps/routes/cutouts/api.py @@ -0,0 +1,104 @@ +# Copyright 2024 AstroLab Software +# Author: Julien Peloton +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from flask import Blueprint, Response, jsonify, request +from apps.utils.utils import check_args + +from apps.routes.cutouts.utils import format_and_send_cutout + +bp = Blueprint("cutouts", __name__) + + +# Enable CORS for this blueprint +@bp.after_request +def after_request(response): + response.headers.add("Access-Control-Allow-Origin", "*") + response.headers.add("Access-Control-Allow-Headers", "Content-Type,Authorization") + response.headers.add("Access-Control-Allow-Methods", "GET,PUT,POST,DELETE,OPTIONS") + return response + + +ARGS = [ + { + "name": "objectId", + "required": True, + "description": "ZTF Object ID", + }, + { + "name": "kind", + "required": True, + "description": "Science, Template, or Difference. For output-format=array, you can also specify `kind: All` to get the 3 cutouts.", + }, + { + "name": "output-format", + "required": False, + "description": "PNG[default], FITS, array", + }, + { + "name": "candid", + "required": False, + "description": "Candidate ID of the alert belonging to the object with `objectId`. If not filled, the cutouts of the latest alert is returned", + }, + { + "name": "stretch", + "required": False, + "description": "Stretch function to be applied. Available: sigmoid[default], linear, sqrt, power, log.", + }, + { + "name": "colormap", + "required": False, + "description": "Valid matplotlib colormap name (see matplotlib.cm). Default is grayscale.", + }, + { + "name": "pmin", + "required": False, + "description": "The percentile value used to determine the pixel value of minimum cut level. Default is 0.5. No effect for sigmoid.", + }, + { + "name": "pmax", + "required": False, + "description": "The percentile value used to determine the pixel value of maximum cut level. Default is 99.5. No effect for sigmoid.", + }, + { + "name": "convolution_kernel", + "required": False, + "description": "Convolve the image with a kernel (gauss or box). Default is None (not specified).", + }, +] + + +@bp.route("/api/v1/cutouts", methods=["GET"]) +def return_cutouts_arguments(): + """Obtain information about cutouts""" + if len(request.args) > 0: + # POST from query URL + return return_cutouts(payload=request.args) + else: + return jsonify({"args": ARGS}) + + +@bp.route("/api/v1/cutouts", methods=["POST"]) +def return_cutouts(payload=None): + """Retrieve object data""" + # get payload from the JSON + if payload is None: + payload = request.json + + rep = check_args(ARGS, payload) + if rep["status"] != "ok": + return Response(str(rep), 400) + + assert payload["kind"] in ["Science", "Template", "Difference", "All"] + + return format_and_send_cutout(payload) diff --git a/apps/routes/cutouts/utils.py b/apps/routes/cutouts/utils.py new file mode 100644 index 0000000..b2349d0 --- /dev/null +++ b/apps/routes/cutouts/utils.py @@ -0,0 +1,187 @@ +# Copyright 2024 AstroLab Software +# Author: Julien Peloton +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from flask import send_file, jsonify + +import io +import json +import requests + +import numpy as np +from matplotlib import cm +from PIL import Image + +from apps.utils.client import connect_to_hbase_table +from apps.utils.plotting import sigmoid_normalizer, legacy_normalizer, convolve +from apps.utils.decoding import format_hbase_output +from apps.utils.utils import extract_configuration + +from line_profiler import profile + + +@profile +def format_and_send_cutout(payload: dict): + """Extract data returned by HBase and jsonify it + + Data is from /api/v1/cutouts + + Parameters + ---------- + payload: dict + See https://fink-portal.org/api/v1/cutouts + + Return + ---------- + out: pandas dataframe + """ + output_format = payload.get("output-format", "PNG") + + # default stretch is sigmoid + if "stretch" in payload: + stretch = payload["stretch"] + else: + stretch = "sigmoid" + + if payload["kind"] == "All" and payload["output-format"] != "array": + # TODO: error 400 + pass + + # default name based on parameters + filename = "{}_{}".format( + payload["objectId"], + payload["kind"], + ) + + if output_format == "PNG": + filename = filename + ".png" + elif output_format == "JPEG": + filename = filename + ".jpg" + elif output_format == "FITS": + filename = filename + ".fits" + + # Query the Database (object query) + client = connect_to_hbase_table("ztf.cutouts") + results = client.scan( + "", + "key:key:{}".format(payload["objectId"]), + "d:hdfs_path,i:jd,i:candid,i:objectId", + 0, + False, + False, + ) + + # Format the results + schema_client = client.schema() + client.close() + + pdf = format_hbase_output( + results, + schema_client, + group_alerts=False, + truncated=True, + extract_color=False, + ) + + json_payload = {} + # Extract only the alert of interest + if "candid" in payload: + mask = pdf["i:candid"].astype(str) == str(payload["candid"]) + json_payload.update({"candid": str(payload["candid"])}) + pos_target = np.where(mask)[0][0] + else: + # pdf has been sorted in `format_hbase_output` + pdf = pdf.iloc[0:1] + pos_target = 0 + + json_payload.update( + { + "hdfsPath": pdf["d:hdfs_path"].to_numpy()[pos_target].split("8020")[1], + "kind": payload["kind"], + "objectId": pdf["i:objectId"].to_numpy()[pos_target], + } + ) + + if pdf.empty: + return send_file( + io.BytesIO(), + mimetype="image/png", + as_attachment=True, + download_name=filename, + ) + # Extract cutouts + user_config = extract_configuration("config.yml") + if output_format == "FITS": + json_payload.update({"return_type": "FITS"}) + r0 = requests.post( + "{}/api/v1/cutouts".format(user_config["CUTOUTAPIURL"]), json=json_payload + ) + cutout = io.BytesIO(r0.content) + elif output_format in ["PNG", "array"]: + json_payload.update({"return_type": "array"}) + r0 = requests.post( + "{}/api/v1/cutouts".format(user_config["CUTOUTAPIURL"]), json=json_payload + ) + cutout = json.loads(r0.content) + + # send the FITS file + if output_format == "FITS": + return send_file( + cutout, + mimetype="application/octet-stream", + as_attachment=True, + download_name=filename, + ) + # send the array + elif output_format == "array": + if payload["kind"] != "All": + return jsonify({"b:cutout{}_stampData".format(payload["kind"]): cutout[0]}) + else: + out = { + "b:cutoutScience_stampData": cutout[0], + "b:cutoutTemplate_stampData": cutout[1], + "b:cutoutDifference_stampData": cutout[2], + } + return jsonify(out) + + array = np.nan_to_num(np.array(cutout[0], dtype=float)) + if stretch == "sigmoid": + array = sigmoid_normalizer(array, 0, 1) + elif stretch is not None: + pmin = 0.5 + if "pmin" in payload: + pmin = float(payload["pmin"]) + pmax = 99.5 + if "pmax" in payload: + pmax = float(payload["pmax"]) + array = legacy_normalizer(array, stretch=stretch, pmin=pmin, pmax=pmax) + + if "convolution_kernel" in payload: + assert payload["convolution_kernel"] in ["gauss", "box"] + array = convolve(array, smooth=1, kernel=payload["convolution_kernel"]) + + # colormap + if "colormap" in payload: + colormap = getattr(cm, payload["colormap"]) + else: + colormap = lambda x: x # noqa: E731 + array = np.uint8(colormap(array) * 255) + + # Convert to PNG + data = Image.fromarray(array) + datab = io.BytesIO() + data.save(datab, format="PNG") + datab.seek(0) + return send_file( + datab, mimetype="image/png", as_attachment=True, download_name=filename + ) diff --git a/apps/routes/objects/__init__.py b/apps/routes/objects/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/apps/routes/objects/api.py b/apps/routes/objects/api.py new file mode 100644 index 0000000..025f4c2 --- /dev/null +++ b/apps/routes/objects/api.py @@ -0,0 +1,95 @@ +# Copyright 2024 AstroLab Software +# Author: Julien Peloton +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from flask import Blueprint, Response, jsonify, request +from apps.utils.utils import check_args +from apps.utils.utils import send_tabular_data + +from apps.routes.objects.utils import extract_object_data + +bp = Blueprint("objects", __name__) + + +# Enable CORS for this blueprint +@bp.after_request +def after_request(response): + response.headers.add("Access-Control-Allow-Origin", "*") + response.headers.add("Access-Control-Allow-Headers", "Content-Type,Authorization") + response.headers.add("Access-Control-Allow-Methods", "GET,PUT,POST,DELETE,OPTIONS") + return response + + +ARGS = [ + { + "name": "objectId", + "required": True, + "description": 'single ZTF Object ID, or a comma-separated list of object names, e.g. "ZTF19acmdpyr,ZTF21aaxtctv"', + }, + { + "name": "withupperlim", + "required": False, + "description": "If True, retrieve also upper limit measurements, and bad quality measurements. Use the column `d:tag` in your results: valid, upperlim, badquality.", + }, + { + "name": "withcutouts", + "required": False, + "description": "If True, retrieve also cutout data as 2D array. See also `cutout-kind`. More information on the original cutouts at https://irsa.ipac.caltech.edu/data/ZTF/docs/ztf_explanatory_supplement.pdf", + }, + { + "name": "cutout-kind", + "required": False, + "description": "`Science`, `Template`, or `Difference`. If not specified, returned all three.", + }, + { + "name": "columns", + "required": False, + "description": "Comma-separated data columns to transfer. Default is all columns.", + }, + { + "name": "output-format", + "required": False, + "description": "Output format among json[default], csv, parquet, votable", + }, +] + + +@bp.route("/api/v1/objects", methods=["GET"]) +def return_object_arguments(): + """Obtain information about retrieving object data""" + if len(request.args) > 0: + # POST from query URL + return return_object(payload=request.args) + else: + return jsonify({"args": ARGS}) + + +@bp.route("/api/v1/objects", methods=["POST"]) +def return_object(payload=None): + """Retrieve object data from the Fink database""" + # get payload from the JSON + if payload is None: + payload = request.json + + rep = check_args(ARGS, payload) + if rep["status"] != "ok": + return Response(str(rep), 400) + + out = extract_object_data(payload) + + # Error propagation + if isinstance(out, Response): + return out + + output_format = payload.get("output-format", "json") + return send_tabular_data(out, output_format) diff --git a/apps/routes/objects/utils.py b/apps/routes/objects/utils.py new file mode 100644 index 0000000..605d6aa --- /dev/null +++ b/apps/routes/objects/utils.py @@ -0,0 +1,178 @@ +# Copyright 2024 AstroLab Software +# Author: Julien Peloton +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pandas as pd +from numpy import array as nparray +from apps.utils.utils import download_cutout +from apps.utils.client import connect_to_hbase_table +from apps.utils.decoding import format_hbase_output, hbase_to_dict + +from line_profiler import profile + + +@profile +def extract_object_data(payload: dict) -> pd.DataFrame: + """Extract data returned by HBase and format it in a Pandas dataframe + + Data is from /api/v1/objects + + Parameters + ---------- + payload: dict + See https://fink-portal.org/api/v1/objects + + Return + ---------- + out: pandas dataframe + """ + if "columns" in payload: + cols = payload["columns"].replace(" ", "") + else: + cols = "*" + + if "," in payload["objectId"]: + # multi-objects search + splitids = payload["objectId"].split(",") + objectids = [f"key:key:{i.strip()}" for i in splitids] + else: + # single object search + objectids = ["key:key:{}".format(payload["objectId"])] + + if "withcutouts" in payload and str(payload["withcutouts"]) == "True": + withcutouts = True + else: + withcutouts = False + + if "withupperlim" in payload and str(payload["withupperlim"]) == "True": + withupperlim = True + else: + withupperlim = False + + if cols == "*": + truncated = False + else: + truncated = True + + client = connect_to_hbase_table("ztf") + + # Get data from the main table + results = {} + for to_evaluate in objectids: + result = client.scan( + "", + to_evaluate, + cols, + 0, + True, + True, + ) + results.update(result) + + schema_client = client.schema() + + pdf = format_hbase_output( + results, + schema_client, + group_alerts=False, + truncated=truncated, + ) + + if withcutouts: + # Default `None` returns all 3 cutouts + cutout_kind = payload.get("cutout-kind", "All") + + if cutout_kind == "All": + cols = [ + "b:cutoutScience_stampData", + "b:cutoutTemplate_stampData", + "b:cutoutDifference_stampData", + ] + pdf[cols] = pdf[["i:objectId", "i:candid"]].apply( + lambda x: pd.Series(download_cutout(x.iloc[0], x.iloc[1], cutout_kind)), + axis=1, + ) + else: + colname = "b:cutout{}_stampData".format(cutout_kind) + pdf[colname] = pdf[["i:objectId", "i:candid"]].apply( + lambda x: pd.Series( + [download_cutout(x.iloc[0], x.iloc[1], cutout_kind)] + ), + axis=1, + ) + + if withupperlim: + clientU = connect_to_hbase_table("ztf.upper") + # upper limits + resultsU = {} + for to_evaluate in objectids: + resultU = clientU.scan( + "", + to_evaluate, + "*", + 0, + False, + False, + ) + resultsU.update(resultU) + + # bad quality + clientUV = connect_to_hbase_table("ztf.uppervalid") + resultsUP = {} + for to_evaluate in objectids: + resultUP = clientUV.scan( + "", + to_evaluate, + "*", + 0, + False, + False, + ) + resultsUP.update(resultUP) + + pdfU = pd.DataFrame.from_dict(hbase_to_dict(resultsU), orient="index") + pdfUP = pd.DataFrame.from_dict(hbase_to_dict(resultsUP), orient="index") + + pdf["d:tag"] = "valid" + pdfU["d:tag"] = "upperlim" + pdfUP["d:tag"] = "badquality" + + if "i:jd" in pdfUP.columns: + # workaround -- see https://github.com/astrolabsoftware/fink-science-portal/issues/216 + mask = nparray( + [ + False if float(i) in pdf["i:jd"].to_numpy() else True + for i in pdfUP["i:jd"].to_numpy() + ] + ) + pdfUP = pdfUP[mask] + + # Hacky way to avoid converting concatenated column to float + pdfU["i:candid"] = -1 # None + pdfUP["i:candid"] = -1 # None + + pdf_ = pd.concat((pdf, pdfU, pdfUP), axis=0) + + # replace + if "i:jd" in pdf_.columns: + pdf_["i:jd"] = pdf_["i:jd"].astype(float) + pdf = pdf_.sort_values("i:jd", ascending=False) + else: + pdf = pdf_ + + clientU.close() + clientUV.close() + + client.close() + + return pdf diff --git a/apps/routes/template/README.md b/apps/routes/template/README.md new file mode 100644 index 0000000..9c3e507 --- /dev/null +++ b/apps/routes/template/README.md @@ -0,0 +1,71 @@ +# Route template + +This template shows how to simply create a new route. + +## Structure + +Simply create a new folder following the `template` folder: + +```bash +. +├── app.py +├── apps +│   ├── __init__.py +│   ├── routes +│   │   └── template +│   │   ├── api.py +│   │   ├── __init__.py +│   │   ├── README.md +│   │   └── utils.py +│   └── utils +│   └── utils.py +├── bin +├── config.yml +├── LICENSE +├── README.md +└── requirements.txt +``` + +It should contain `api.py`, `__init__.py`, and eventually `utils.py`. + +## Template + +Once, you have implemented the functionalities, launch the application: + +```bash +python app.py +``` + +and check available arguments (`GET` method): + +```bash +curl http://localhost:32000/api/v1/template +{ + "args": [ + { + "description": "explain me", + "name": "arg1", + "required": true + }, + { + "description": "Output format among json[default], csv, parquet, votable", + "name": "output-format", + "required": false + } + ] +} +``` + +Check a valid `POST`: + +```bash +curl http://localhost:32000/api/v1/template?arg1=1 +[{"1":1},{"1":2},{"1":3}] +``` + +Test that a missing argument is caught: + +```bash +curl http://localhost:32000/api/v1/template?arg3=1 +{'status': 'error', 'text': 'A value for `arg1` is required. Use GET to check arguments.\n'} +``` diff --git a/apps/routes/template/__init__.py b/apps/routes/template/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/apps/routes/template/api.py b/apps/routes/template/api.py new file mode 100644 index 0000000..f6e334c --- /dev/null +++ b/apps/routes/template/api.py @@ -0,0 +1,75 @@ +# Copyright 2024 AstroLab Software +# Author: Julien Peloton +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from flask import Blueprint, Response, jsonify, request +from apps.utils.utils import check_args +from apps.utils.utils import send_tabular_data + +from apps.routes.template.utils import my_function + +bp = Blueprint("template", __name__) + + +# Enable CORS for this blueprint +@bp.after_request +def after_request(response): + response.headers.add("Access-Control-Allow-Origin", "*") + response.headers.add("Access-Control-Allow-Headers", "Content-Type,Authorization") + response.headers.add("Access-Control-Allow-Methods", "GET,PUT,POST,DELETE,OPTIONS") + return response + + +ARGS = [ + { + "name": "arg1", + "required": True, + "description": "explain me", + }, + { + "name": "output-format", + "required": False, + "description": "Output format among json[default], csv, parquet, votable", + }, +] + + +@bp.route("/api/v1/template", methods=["GET"]) +def return_template_arguments(): + """Obtain information about retrieving object data""" + if len(request.args) > 0: + # POST from query URL + return return_template(payload=request.args) + else: + return jsonify({"args": ARGS}) + + +@bp.route("/api/v1/template", methods=["POST"]) +def return_template(payload=None): + """Retrieve object data""" + # get payload from the JSON + if payload is None: + payload = request.json + + rep = check_args(ARGS, payload) + if rep["status"] != "ok": + return Response(str(rep), 400) + + out = my_function(payload) + + # Error propagation + if isinstance(out, Response): + return out + + output_format = payload.get("output-format", "json") + return send_tabular_data(out, output_format) diff --git a/apps/routes/template/utils.py b/apps/routes/template/utils.py new file mode 100644 index 0000000..89e5767 --- /dev/null +++ b/apps/routes/template/utils.py @@ -0,0 +1,19 @@ +# Copyright 2024 AstroLab Software +# Author: Julien Peloton +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pandas as pd + + +def my_function(payload): + return pd.DataFrame({payload["arg1"]: [1, 2, 3]}) diff --git a/apps/utils/client.py b/apps/utils/client.py new file mode 100644 index 0000000..bfb4b62 --- /dev/null +++ b/apps/utils/client.py @@ -0,0 +1,162 @@ +# Copyright 2023-2024 AstroLab Software +# Author: Julien Peloton +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Utilities to work with the Fink HBase client""" + +import os + +import jpype +import jpype.imports +import numpy as np +import yaml + +from apps import __file__ as apps_loc + +from line_profiler import profile + + +@profile +def initialise_jvm(path=None): + """Start a JVM + + Parameters + ---------- + path: str, optional + Path to the HBase client. Default is relative to apps/ + """ + if not jpype.isJVMStarted(): + if path is None: + path = os.path.dirname(apps_loc) + "/../bin/FinkBrowser.exe.jar" + jarpath = f"-Djava.class.path={path}" + jpype.startJVM(jpype.getDefaultJVMPath(), "-ea", jarpath, convertStrings=True) + + jpype.attachThreadToJVM() + + +@profile +def connect_to_hbase_table( + tablename: str, + schema_name=None, + nlimit=10000, + setphysicalrepo=False, + config_path=None, +): + """Return a client connected to a HBase table + + Parameters + ---------- + tablename: str + The name of the table + schema_name: str, optional + Name of the rowkey in the table containing the schema. Default is given by the config file. + nlimit: int, optional + Maximum number of objects to return. Default is 10000 + setphysicalrepo: bool, optional + If True, store cutouts queried on disk ("/tmp/Lomikel/HBaseClientBinaryDataRepository") + Needs client 02.01+. Default is False + config_path: str, optional + Path to the config file. Default is None (relative to the apps/ folder) + """ + initialise_jvm() + + if config_path is None: + config_path = os.path.dirname(apps_loc) + "/../config.yml" + args = yaml.load( + open(config_path), + yaml.Loader, + ) + + import com.Lomikel.HBaser + from com.astrolabsoftware.FinkBrowser.Utils import Init + + Init.init() + + client = com.Lomikel.HBaser.HBaseClient(args["HBASEIP"], args["ZOOPORT"]) + + if schema_name is None: + schema_name = args["SCHEMAVER"] + client.connect(tablename, schema_name) + if setphysicalrepo: + import com.Lomikel.HBaser.FilesBinaryDataRepository + + client.setRepository(com.Lomikel.HBaser.FilesBinaryDataRepository()) + client.setLimit(args["NLIMIT"]) + + return client + + +@profile +def create_or_update_hbase_table( + tablename: str, + families: list, + schema_name: str, + schema: dict, + create=False, + config_path=None, +): + """Create or update a table in HBase + + By default (create=False), it will only update the schema of the table + otherwise it will create the table in HBase and push the schema. The schema + has a rowkey `schema`. + + Currently accepts only a single family name + + Parameters + ---------- + tablename: str + The name of the table + families: list + List of family names, e.g. ['d'] + schema_name: str + Rowkey value for the schema + schema: dict + Dictionary with column names (keys) and column types (values) + create: bool + If true, create the table. Default is False (only update schema) + config_path: str, optional + Path to the config file. Default is None (relative to the apps/ folder) + """ + if len(np.unique(families)) != 1: + raise NotImplementedError("`create_hbase_table` only accepts one family name") + + initialise_jvm() + + if config_path is None: + config_path = os.path.dirname(apps_loc) + "/../config.yml" + args = yaml.load( + open(config_path), + yaml.Loader, + ) + + import com.Lomikel.HBaser + from com.astrolabsoftware.FinkBrowser.Utils import Init + + Init.init() + + client = com.Lomikel.HBaser.HBaseClient(args["HBASEIP"], args["ZOOPORT"]) + + if create: + # Create the table and connect without schema + client.create(tablename, families) + client.connect(tablename) + else: + # Connect by ignoring the current schema + client.connect(tablename, None) + + # Push the schema + out = [f"{families[0]}:{colname}:{coltype}" for colname, coltype in schema.items()] + client.put(schema_name, out) + + client.close() diff --git a/apps/utils/decoding.py b/apps/utils/decoding.py new file mode 100644 index 0000000..0470b6f --- /dev/null +++ b/apps/utils/decoding.py @@ -0,0 +1,274 @@ +# Copyright 2024 AstroLab Software +# Author: Julien Peloton +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Utilities to decode data from the HBase client""" + +import pandas as pd +import numpy as np + +from astropy.time import Time +from astropy.coordinates import SkyCoord, get_constellation + +from fink_filters.classification import extract_fink_classification_ + +from line_profiler import profile + +# For int we use `Int64` due to the presence of NaN +# See https://pandas.pydata.org/pandas-docs/version/1.3/user_guide/integer_na.html +hbase_type_converter = { + "integer": "Int64", + "long": int, + "float": float, + "double": float, + "string": str, + "fits/image": str, + "boolean": bool, +} + + +@profile +def format_hbase_output( + hbase_output, + schema_client, + group_alerts: bool, + truncated: bool = False, + extract_color: bool = True, + with_constellation: bool = True, +): + """ """ + if len(hbase_output) == 0: + return pd.DataFrame({}) + + # Construct the dataframe + pdfs = pd.DataFrame.from_dict(hbase_to_dict(hbase_output), orient="index") + + # Tracklet cell contains null if there is nothing + # and so HBase won't transfer data -- ignoring the column + if "d:tracklet" not in pdfs.columns and not truncated: + pdfs["d:tracklet"] = np.zeros(len(pdfs), dtype="U20") + + if "d:tns" not in pdfs.columns and not truncated: + pdfs["d:tns"] = "" + + # Remove hbase specific fields + for _ in ["key:key", "key:time"]: + if _ in pdfs.columns: + pdfs = pdfs.drop(columns=_) + + if "d:spicy_name" in pdfs.columns: + pdfs = pdfs.drop(columns="d:spicy_name") + + # Remove cutouts if their fields are here but empty + for _ in ["Difference", "Science", "Template"]: + colname = f"b:cutout{_}_stampData" + if colname in pdfs.columns and pdfs[colname].to_numpy()[0].startswith( + "binary:ZTF" + ): + pdfs = pdfs.drop(columns=colname) + + # Type conversion + for col in pdfs.columns: + pdfs[col] = convert_datatype( + pdfs[col], + hbase_type_converter[schema_client.type(col)], + ) + + # cast 'nan' into `[]` for easier json decoding + for col in ["d:lc_features_g", "d:lc_features_r"]: + if col in pdfs.columns: + pdfs[col] = pdfs[col].replace("nan", "[]") + + pdfs = pdfs.copy() # Fix Pandas' "DataFrame is highly fragmented" warning + + if not truncated: + # Fink final classification + classifications = extract_fink_classification_( + pdfs["d:cdsxmatch"], + pdfs["d:roid"], + pdfs["d:mulens"], + pdfs["d:snn_snia_vs_nonia"], + pdfs["d:snn_sn_vs_all"], + pdfs["d:rf_snia_vs_nonia"], + pdfs["i:ndethist"], + pdfs["i:drb"], + pdfs["i:classtar"], + pdfs["i:jd"], + pdfs["i:jdstarthist"], + pdfs["d:rf_kn_vs_nonkn"], + pdfs["d:tracklet"], + ) + + pdfs["v:classification"] = classifications.to_numpy() + + if extract_color: + # Extract color evolution + pdfs = extract_rate_and_color(pdfs) + + # Human readable time + pdfs["v:lastdate"] = convert_jd(pdfs["i:jd"]) + pdfs["v:firstdate"] = convert_jd(pdfs["i:jdstarthist"]) + pdfs["v:lapse"] = pdfs["i:jd"] - pdfs["i:jdstarthist"] + + if with_constellation: + coords = SkyCoord( + pdfs["i:ra"], + pdfs["i:dec"], + unit="deg", + ) + constellations = get_constellation(coords) + pdfs["v:constellation"] = constellations + + # Display only the last alert + if group_alerts and ("i:jd" in pdfs.columns) and ("i:objectId" in pdfs.columns): + pdfs["i:jd"] = pdfs["i:jd"].astype(float) + pdfs = pdfs.loc[pdfs.groupby("i:objectId")["i:jd"].idxmax()] + + # sort values by time + if "i:jd" in pdfs.columns: + pdfs = pdfs.sort_values("i:jd", ascending=False) + + return pdfs + + +@profile +def hbase_to_dict(hbase_output): + """Optimize hbase output TreeMap for faster conversion to DataFrame""" + # Naive Python implementation + # optimized = {i: dict(j) for i, j in hbase_output.items()} + + # Here we assume JPype is already initialized + import json + + from org.json import JSONObject + + # We do bulk export to JSON on Java side to avoid overheads of iterative access + # and then parse it back to Dict in Python + optimized = json.loads(JSONObject(hbase_output).toString()) + + return optimized + + +def convert_datatype(series: pd.Series, type_: type) -> pd.Series: + """Convert Series from HBase data with proper type + + Parameters + ---------- + series: pd.Series + a column of the DataFrame + type_: type + Python built-in type (Int64, int, str, float, bool) + """ + return series.astype(type_) + + +@profile +def extract_rate_and_color(pdf: pd.DataFrame, tolerance: float = 0.3): + """Extract magnitude rates in different filters, color, and color change rate. + + Notes + ----- + It fills the following fields: + - v:rate - magnitude change rate for this filter, defined as magnitude difference since previous measurement, divided by time difference + - v:sigma(rate) - error of previous value, estimated from per-point errors + - v:g-r - color, defined by subtracting the measurements in g and r filter closer than `tolerance` days. Is assigned to both g and r data points with the same value + - v:sigma(g-r) - error of previous value, estimated from per-point errors + - v:rate(g-r) - color change rate, computed using time differences of g band points + - v:sigma(rate(g-r)) - error of previous value, estimated from per-point errors + + Parameters + ---------- + pdf: Pandas DataFrame + DataFrame returned by `format_hbase_output` (see api/api.py) + tolerance: float + Maximum delay between g and r data points to be considered for color computation, in days + + Returns + ------- + pdf: Pandas DataFrame + Modified original DataFrame with added columns. Original order is not preserved + """ + pdfs = pdf.sort_values("i:jd") + + def fn(sub): + """Extract everything relevant on the sub-group corresponding to single object. + + Notes + ----- + Assumes it is already sorted by time. + """ + sidx = [] + + # Extract magnitude rates separately in different filters + for fid in [1, 2]: + idx = sub["i:fid"] == fid + + dmag = sub["i:magpsf"][idx].diff() + dmagerr = np.hypot(sub["i:sigmapsf"][idx], sub["i:sigmapsf"][idx].shift()) + djd = sub["i:jd"][idx].diff() + sub.loc[idx, "v:rate"] = dmag / djd + sub.loc[idx, "v:sigma(rate)"] = dmagerr / djd + + sidx.append(idx) + + if len(sidx) == 2: + # We have both filters, let's try to also get the color! + colnames_gr = ["i:jd", "i:magpsf", "i:sigmapsf"] + gr = pd.merge_asof( + sub[sidx[0]][colnames_gr], + sub[sidx[1]][colnames_gr], + on="i:jd", + suffixes=("_g", "_r"), + direction="nearest", + tolerance=tolerance, + ) + # It is organized around g band points, r columns are null when unmatched + gr = gr.loc[~gr.isna()["i:magpsf_r"]] # Keep only matched rows + + gr["v:g-r"] = gr["i:magpsf_g"] - gr["i:magpsf_r"] + gr["v:sigma(g-r)"] = np.hypot(gr["i:sigmapsf_g"], gr["i:sigmapsf_r"]) + + djd = gr["i:jd"].diff() + dgr = gr["v:g-r"].diff() + dgrerr = np.hypot(gr["v:sigma(g-r)"], gr["v:sigma(g-r)"].shift()) + + gr["v:rate(g-r)"] = dgr / djd + gr["v:sigma(rate(g-r))"] = dgrerr / djd + + # Now we may assign these color values also to corresponding r band points + sub = pd.merge_asof( + sub, + gr[ + [ + "i:jd", + "v:g-r", + "v:sigma(g-r)", + "v:rate(g-r)", + "v:sigma(rate(g-r))", + ] + ], + direction="nearest", + tolerance=tolerance, + ) + + return sub + + # Apply the subroutine defined above to individual objects, and merge the table back + pdfs = pdfs.groupby("i:objectId").apply(fn).droplevel(0) + + return pdfs + + +def convert_jd(jd, to="iso", format="jd"): + """Convert Julian Date into ISO date (UTC).""" + return Time(jd, format=format).to_value(to) diff --git a/apps/utils/plotting.py b/apps/utils/plotting.py new file mode 100644 index 0000000..11b23b0 --- /dev/null +++ b/apps/utils/plotting.py @@ -0,0 +1,162 @@ +# Copyright 2024 AstroLab Software +# Author: Julien Peloton +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np + +from astropy.convolution import Box2DKernel, Gaussian2DKernel +from astropy.convolution import convolve as astropy_convolve +from astropy.visualization import AsymmetricPercentileInterval, simple_norm + +from line_profiler import profile + + +def sigmoid(img: list) -> list: + """Sigmoid function used for img_normalizer + + Parameters + ---------- + img: float array + a float array representing a non-normalized image + + Returns + ------- + out: float array + """ + # Compute mean and std of the image + img_mean, img_std = img.mean(), img.std() + # restore img to normal mean and std + img_normalize = (img - img_mean) / img_std + # image inversion + inv_norm = -img_normalize + # compute exponential of inv img + exp_norm = np.exp(inv_norm) + # perform sigmoid calculation and return it + return 1 / (1 + exp_norm) + + +def sigmoid_normalizer(img: list, vmin: float, vmax: float) -> list: + """Image normalisation between vmin and vmax using Sigmoid function + + Parameters + ---------- + img: float array + a float array representing a non-normalized image + + Returns + ------- + out: float array where data are bounded between vmin and vmax + """ + return (vmax - vmin) * sigmoid(img) + vmin + + +def legacy_normalizer(data: list, stretch="asinh", pmin=0.5, pmax=99.5) -> list: + """Old cutout normalizer which use the central pixel + + Parameters + ---------- + data: float array + a float array representing a non-normalized image + + Returns + ------- + out: float array where data are bouded between vmin and vmax + """ + size = len(data) + vmax = data[int(size / 2), int(size / 2)] + vmin = np.min(data) + 0.2 * np.median(np.abs(data - np.median(data))) + return _data_stretch( + data, vmin=vmin, vmax=vmax, pmin=pmin, pmax=pmax, stretch=stretch + ) + + +@profile +def convolve(image, smooth=3, kernel="gauss"): + """Convolve 2D image. Hacked from aplpy""" + if smooth is None and isinstance(kernel, str) and kernel in ["box", "gauss"]: + return image + + if smooth is not None and not np.isscalar(smooth): + raise ValueError( + "smooth= should be an integer - for more complex " + "kernels, pass an array containing the kernel " + "to the kernel= option" + ) + + # The Astropy convolution doesn't treat +/-Inf values correctly yet, so we + # convert to NaN here. + image_fixed = np.array(image, dtype=float, copy=True) + image_fixed[np.isinf(image)] = np.nan + + if isinstance(kernel, str): + if kernel == "gauss": + kernel = Gaussian2DKernel(smooth, x_size=smooth * 5, y_size=smooth * 5) + elif kernel == "box": + kernel = Box2DKernel(smooth, x_size=smooth * 5, y_size=smooth * 5) + else: + raise ValueError(f"Unknown kernel: {kernel}") + + return astropy_convolve(image, kernel, boundary="extend") + + +@profile +def _data_stretch( + image, + vmin=None, + vmax=None, + pmin=0.25, + pmax=99.75, + stretch="linear", + vmid: float = 10, + exponent=2, +): + """Hacked from aplpy""" + if vmin is None or vmax is None: + interval = AsymmetricPercentileInterval(pmin, pmax, n_samples=10000) + try: + vmin_auto, vmax_auto = interval.get_limits(image) + except IndexError: # no valid values + vmin_auto = vmax_auto = 0 + + if vmin is None: + # log.info("vmin = %10.3e (auto)" % vmin_auto) + vmin = vmin_auto + else: + pass + # log.info("vmin = %10.3e" % vmin) + + if vmax is None: + # log.info("vmax = %10.3e (auto)" % vmax_auto) + vmax = vmax_auto + else: + pass + # log.info("vmax = %10.3e" % vmax) + + if stretch == "arcsinh": + stretch = "asinh" + + normalizer = simple_norm( + image, + stretch=stretch, + power=exponent, + asinh_a=vmid, + min_cut=vmin, + max_cut=vmax, + clip=False, + ) + + data = normalizer(image, clip=True).filled(0) + data = np.nan_to_num(data) + # data = np.clip(data * 255., 0., 255.) + + return data # .astype(np.uint8) diff --git a/apps/utils/utils.py b/apps/utils/utils.py new file mode 100644 index 0000000..2269b49 --- /dev/null +++ b/apps/utils/utils.py @@ -0,0 +1,134 @@ +# Copyright 2024 AstroLab Software +# Author: Julien Peloton +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Various utilities""" + +import io +import json +import yaml +import requests + +from flask import Response + +from astropy.table import Table +from astropy.io import votable + +from line_profiler import profile + + +def extract_configuration(filename): + """Extract user defined configuration + + Parameters + ---------- + filename: str + Full path to the `config.yml` file. + + Returns + ------- + out: dict + Dictionary with user defined values. + """ + config = yaml.load(open("config.yml"), yaml.Loader) + if config["HOST"].endswith(".org"): + config["APIURL"] = "https://" + config["HOST"] + else: + config["APIURL"] = "http://" + config["HOST"] + ":" + str(config["PORT"]) + return config + + +@profile +def download_cutout(objectId, candid, kind): + """ """ + config = extract_configuration("config.yml") + + r = requests.post( + "{}/api/v1/cutouts".format(config["APIURL"]), + json={ + "objectId": objectId, + "candid": candid, + "kind": kind, + "output-format": "array", + }, + ) + if r.status_code == 200: + data = json.loads(r.content) + else: + # TODO: different return based on `kind`? + return [] + + if kind != "All": + return data["b:cutout{}_stampData".format(kind)] + else: + return [ + data["b:cutout{}_stampData".format(k)] + for k in ["Science", "Template", "Difference"] + ] + + +def check_args(args: list, payload: dict) -> dict: + """Check all required arguments have been supplied + + Parameters + ---------- + """ + required_args = [i["name"] for i in args if i["required"] is True] + for required_arg in required_args: + if required_arg not in payload: + rep = { + "status": "error", + "text": f"A value for `{required_arg}` is required. Use GET to check arguments.\n", + } + return rep + return {"status": "ok"} + + +def send_tabular_data(pdf, output_format): + """Send tabular data over HTTP + + Parameters + ---------- + pdf: pd.DataFrame + Pandas DataFrame with data to be sent + output_format: str + Output format: json, csv, votable, parquet. + + Returns + ------- + out: Any + Depends on the `output_format` chosen. In + case of error, returns `Response` object. + """ + if output_format == "json": + return pdf.to_json(orient="records") + elif output_format == "csv": + return pdf.to_csv(index=False) + elif output_format == "votable": + f = io.BytesIO() + table = Table.from_pandas(pdf) + vt = votable.from_table(table) + votable.writeto(vt, f) + f.seek(0) + return f.read() + elif output_format == "parquet": + f = io.BytesIO() + pdf.to_parquet(f) + f.seek(0) + return f.read() + + rep = { + "status": "error", + "text": f"Output format `{output_format}` is not supported. Choose among json, csv, or parquet\n", + } + return Response(str(rep), 400) diff --git a/bin/FinkBrowser.exe.jar b/bin/FinkBrowser.exe.jar new file mode 100644 index 0000000..a1af416 Binary files /dev/null and b/bin/FinkBrowser.exe.jar differ diff --git a/config.yml b/config.yml new file mode 100644 index 0000000..0d6c0a4 --- /dev/null +++ b/config.yml @@ -0,0 +1,17 @@ +# Host and port of the application +HOST: localhost +PORT: 32000 + +# URL of the fink_cutout_api +CUTOUTAPIURL: http://localhost + +# HBase configuration +HBASEIP: localhost +ZOOPORT: 2183 + +# Table schema (schema_{fink_broker}_{fink_science}) +SCHEMAVER: schema_3.1_5.21.14 + +# Maximum number of rows to +# return in one call +NLIMIT: 10000 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..834c7f4 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +astropy +flask +pandas +numpy +fink-filters +fink-utils +line_profiler +requests +pyarrow +matplotlib diff --git a/run_tests.sh b/run_tests.sh new file mode 100755 index 0000000..526cf0c --- /dev/null +++ b/run_tests.sh @@ -0,0 +1,50 @@ +#!/bin/bash +# Copyright 2024 AstroLab Software +# Author: Julien Peloton +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +set -e +message_help=""" +Run the test suite of the modules\n\n +Usage:\n + \t./run_tests.sh [--url]\n\n + +--url is the Science Portal URL you would like to test against. +""" +# Grab the command line arguments +while [ "$#" -gt 0 ]; do + case "$1" in + --url) + URL="$2" + shift 2 + ;; + -h) + echo -e $message_help + exit + ;; + esac +done + +if [[ -f $URL ]]; then + echo "You need to specify an URL" $URL + exit +fi + +# Run the test suite on the utilities +cd tests +for filename in ./*.py +do + echo $filename + # Run test suite + python $filename $URL +done diff --git a/tests/api_cutouts_test.py b/tests/api_cutouts_test.py new file mode 100644 index 0000000..8cf4aae --- /dev/null +++ b/tests/api_cutouts_test.py @@ -0,0 +1,196 @@ +# Copyright 2022-2024 AstroLab Software +# Author: Julien Peloton +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import requests +import numpy as np + +from astropy.io import fits + +from PIL import Image + +import io +import sys + +APIURL = sys.argv[1] + + +def cutouttest( + objectId="ZTF21aaxtctv", + kind="Science", + stretch="sigmoid", + colormap="viridis", + pmin=0.5, + pmax=99.5, + convolution_kernel=None, + output_format="PNG", + candid=None, +): + """Perform a cutout search in the Science Portal using the Fink REST API""" + payload = { + "objectId": objectId, + "kind": kind, # Science, Template, Difference + "stretch": stretch, # sigmoid[default], linear, sqrt, power, log, asinh + "colormap": colormap, # Valid matplotlib colormap name (see matplotlib.cm). Default is grayscale. + "pmin": pmin, # The percentile value used to determine the pixel value of minimum cut level. Default is 0.5. No effect for sigmoid. + "pmax": pmax, # The percentile value used to determine the pixel value of maximum cut level. Default is 99.5. No effect for sigmoid. + "output-format": output_format, + } + + if candid is not None: + payload.update({"candid": candid}) + + # Convolve the image with a kernel (gauss or box). Default is None (not specified). + if convolution_kernel is not None: + payload.update({"convolution_kernel": convolution_kernel}) + + r = requests.post("{}/api/v1/cutouts".format(APIURL), json=payload) + + assert r.status_code == 200, r.content + + if output_format == "PNG": + # Format output in a DataFrame + data = Image.open(io.BytesIO(r.content)) + elif output_format == "FITS": + data = fits.open(io.BytesIO(r.content), ignore_missing_simple=True) + elif output_format == "array": + data = r.json()["b:cutout{}_stampData".format(kind)] + + return data + + +def test_png_cutout() -> None: + """ + Examples + -------- + >>> test_png_cutout() + """ + data = cutouttest() + + assert data.format == "PNG" + assert data.size == (63, 63) + + +def test_fits_cutout() -> None: + """ + Examples + -------- + >>> test_fits_cutout() + """ + data = cutouttest(output_format="FITS") + + assert len(data) == 1 + assert np.shape(data[0].data) == (63, 63) + + +def test_array_cutout() -> None: + """ + Examples + -------- + >>> test_array_cutout() + """ + data = cutouttest(output_format="array") + + assert np.shape(data) == (63, 63), data + assert isinstance(data, list) + + +def test_kind_cutout() -> None: + """ + Examples + -------- + >>> test_kind_cutout() + """ + data1 = cutouttest(kind="Science", output_format="array") + data2 = cutouttest(kind="Template", output_format="array") + data3 = cutouttest(kind="Difference", output_format="array") + + assert data1 != data2 + assert data2 != data3 + + +def test_pvalues_cutout() -> None: + """ + Examples + -------- + >>> test_pvalues_cutout() + """ + # pmin and pmax have no effect if stretch = sigmoid + data1 = cutouttest() + data2 = cutouttest(pmin=0.1, pmax=0.5) + + assert data1.getextrema() == data2.getextrema() + + # pmin and pmax have an effect otherwise + data1 = cutouttest() + data2 = cutouttest(pmin=0.1, pmax=0.5, stretch="linear") + + assert data1.getextrema() != data2.getextrema() + + +def test_stretch_cutout() -> None: + """ + Examples + -------- + >>> test_stretch_cutout() + """ + # pmin and pmax have no effect if stretch = sigmoid + data1 = cutouttest(stretch="sigmoid") + + for stretch in ["linear", "sqrt", "power", "log"]: + data2 = cutouttest(stretch=stretch) + assert data1.getextrema() != data2.getextrema() + + +def test_colormap_cutout() -> None: + """ + Examples + -------- + >>> test_colormap_cutout() + """ + data1 = cutouttest() + data2 = cutouttest(colormap="Greys") + + assert data1.getextrema() != data2.getextrema() + + +def test_convolution_kernel_cutout() -> None: + """ + Examples + -------- + >>> test_convolution_kernel_cutout() + """ + data1 = cutouttest() + data2 = cutouttest(convolution_kernel="gauss") + + assert data1.getextrema() != data2.getextrema() + + +def test_candid_cutout() -> None: + """ + Examples + -------- + >>> test_candid_cutout() + """ + data1 = cutouttest() + data2 = cutouttest(candid="1622215345315015012") + + assert data1.getextrema() != data2.getextrema() + + +if __name__ == "__main__": + """ Execute the test suite """ + import sys + import doctest + + sys.exit(doctest.testmod()[0]) diff --git a/tests/api_single_object_test.py b/tests/api_single_object_test.py new file mode 100644 index 0000000..480481f --- /dev/null +++ b/tests/api_single_object_test.py @@ -0,0 +1,224 @@ +# Copyright 2022-2024 AstroLab Software +# Author: Julien Peloton +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import requests +import pandas as pd +import numpy as np + +import io +import sys + +APIURL = sys.argv[1] + +# Implement random name generator +OID = "ZTF21abfmbix" + + +def get_an_object( + oid="ZTF21abfmbix", + output_format="json", + columns="*", + withupperlim=False, + withcutouts=False, + cutout_kind=None, +): + """Query an object from the Science Portal using the Fink REST API""" + payload = { + "objectId": oid, + "columns": columns, + "output-format": output_format, + "withupperlim": withupperlim, + "withcutouts": withcutouts, + } + + if cutout_kind is not None: + payload.update({"cutout-kind": cutout_kind}) + + r = requests.post("{}/api/v1/objects".format(APIURL), json=payload) + + assert r.status_code == 200, r.content + + if output_format == "json": + # Format output in a DataFrame + pdf = pd.read_json(io.BytesIO(r.content)) + elif output_format == "csv": + pdf = pd.read_csv(io.BytesIO(r.content)) + elif output_format == "parquet": + pdf = pd.read_parquet(io.BytesIO(r.content)) + + return pdf + + +def test_single_object() -> None: + """ + Examples + -------- + >>> test_single_object() + """ + pdf = get_an_object(oid=OID) + + assert not pdf.empty + + +def test_single_object_csv() -> None: + """ + Examples + -------- + >>> test_single_object_csv() + """ + pdf = get_an_object(oid=OID, output_format="csv") + + assert not pdf.empty + + +def test_single_object_parquet() -> None: + """ + Examples + -------- + >>> test_single_object_parquet() + """ + pdf = get_an_object(oid=OID, output_format="parquet") + + assert not pdf.empty + + +def test_column_selection() -> None: + """ + Examples + -------- + >>> test_column_selection() + """ + pdf = get_an_object(oid=OID, columns="i:jd,i:magpsf") + + assert len(pdf.columns) == 2, "I count {} columns".format(len(pdf.columns)) + + +def test_column_length() -> None: + """ + Examples + -------- + >>> test_column_length() + """ + pdf = get_an_object(oid=OID) + + assert len(pdf.columns) == 129, "I count {} columns".format(len(pdf.columns)) + + +def test_withupperlim() -> None: + """ + Examples + -------- + >>> test_withupperlim() + """ + pdf = get_an_object(oid=OID, withupperlim=True) + assert "d:tag" in pdf.columns + + +def test_withcutouts() -> None: + """ + Examples + -------- + >>> test_withcutouts() + """ + pdf = get_an_object(oid=OID, withcutouts=True) + + assert isinstance(pdf["b:cutoutScience_stampData"].to_numpy()[0], list) + assert isinstance(pdf["b:cutoutTemplate_stampData"].to_numpy()[0], list) + assert isinstance(pdf["b:cutoutDifference_stampData"].to_numpy()[0], list) + + +def test_withcutouts_single_field() -> None: + """ + Examples + -------- + >>> test_withcutouts_single_field() + """ + pdf = get_an_object(oid=OID, withcutouts=True, cutout_kind="Science") + + assert isinstance(pdf["b:cutoutScience_stampData"].to_numpy()[0], list) + assert "b:cutoutTemplate_stampData" not in pdf.columns + + +def test_formatting() -> None: + """ + Examples + -------- + >>> test_formatting() + """ + pdf = get_an_object(oid=OID) + + # stupid python cast... + assert isinstance(pdf["i:fid"].to_numpy()[0], np.int64), type( + pdf["i:fid"].to_numpy()[0] + ) + assert isinstance(pdf["i:magpsf"].to_numpy()[0], np.double), type( + pdf["i:magpsf"].to_numpy()[0] + ) + + +def test_misc() -> None: + """ + Examples + -------- + >>> test_misc() + """ + pdf = get_an_object(oid=OID) + assert np.all(pdf["i:fid"].to_numpy() > 0) + assert np.all(pdf["i:magpsf"].to_numpy() > 6) + + +def test_bad_request() -> None: + """ + Examples + -------- + >>> test_bad_request() + """ + pdf = get_an_object(oid="ldfksjflkdsjf") + + assert pdf.empty + + +def test_multiple_objects() -> None: + """ + Examples + -------- + >>> test_multiple_objects() + """ + OIDS_ = ["ZTF21abfmbix", "ZTF21aaxtctv", "ZTF21abfaohe"] + OIDS = ",".join(OIDS_) + pdf = get_an_object(oid=OIDS) + + n_oids = len(np.unique(pdf.groupby("i:objectId").count()["i:ra"])) + assert n_oids == 3 + + n_oids_single = 0 + len_object = 0 + for oid in OIDS_: + pdf_ = get_an_object(oid=oid) + n_oid = len(np.unique(pdf_.groupby("i:objectId").count()["i:ra"])) + n_oids_single += n_oid + len_object += len(pdf_) + + assert n_oids == n_oids_single, "{} is not equal to {}".format( + n_oids, n_oids_single + ) + assert len_object == len(pdf), "{} is not equal to {}".format(len_object, len(pdf)) + + +if __name__ == "__main__": + """ Execute the test suite """ + import sys + import doctest + + sys.exit(doctest.testmod()[0])