diff --git a/README.md b/README.md index ccb421a..a057f78 100644 --- a/README.md +++ b/README.md @@ -10,9 +10,15 @@ The Java Gateway enables the Flask application to communicate with a JVM using [ The Fink cutout API is a Flask application to access cutouts from the Fink datalake. We only store cutout metadata in HBase, and this API retrieves the data from the raw parquet files stored on HDFS. +_From 2019 to 2024, the development of this API was done in [fink-science-portal](https://github.com/astrolabsoftware/fink-science-portal). Check this repository for older issues and PR._ + ## Documentation -The user documentation can be found at this [link](https://fink-broker.readthedocs.io/en/latest/services/search/getting_started/#quick-start-api). Documentation for developpers and maintainers can be found on [GitLab](https://gitlab.in2p3.fr/fink/rubin-performance-check/-/blob/main/portal/README.md?ref_type=heads) (auth required). +There are several forms of documentation, depending on what you are looking for: + +- Tutorials/How-to guides: [Fink user manual](https://fink-broker.readthedocs.io/en/latest/services/search/getting_started/#quick-start-api) +- API Reference guide: [https://api.fink-portal.org](https://api.fink-portal.org) +- Notes for developpers and maintainers (auth required): [GitLab](https://gitlab.in2p3.fr/fink/rubin-performance-check/-/blob/main/portal/README.md?ref_type=heads) ## Requirements and installation @@ -24,7 +30,7 @@ The input parameters can be found in [config.yml](config.yml). Make sure that th ### Debug -After starting [fink-cutout-api](https://github.com/astrolabsoftware/fink-cutout-api), you can simply test the API using: +After starting the Fink Java Gateway and [fink-cutout-api](https://github.com/astrolabsoftware/fink-cutout-api) services, you can simply launch the API in debug mode using: ```bash python app.py @@ -32,7 +38,7 @@ python app.py ### Production -The application is simply managed by `gunicorn` and `systemd` (see [install](install/README.md)), and you can simply manage it using: +The application is simply managed by `gunicorn` and `systemd` (see [install](install/README.md)), and you can manage it using: ```bash # start the application diff --git a/app.py b/app.py index f1b9c87..df9e4fc 100644 --- a/app.py +++ b/app.py @@ -12,24 +12,51 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from flask import Flask +from flask import Flask, Blueprint +from flask_restx import Api + +from apps import __version__ from apps.utils.utils import extract_configuration -from apps.routes.objects.api import bp as bp_objects -from apps.routes.cutouts.api import bp as bp_cutouts +from apps.routes.objects.api import ns as ns_objects +from apps.routes.cutouts.api import ns as ns_cutouts config = extract_configuration("config.yml") app = Flask("Fink REST API") +# Master blueprint +blueprint = Blueprint("api", __name__, url_prefix="/") +api = Api( + blueprint, + version=__version__, + title="Fink object API", + description="REST API to access data from Fink", +) + + +# Enable CORS for this blueprint +@blueprint.after_request +def after_request(response): + response.headers.add("Access-Control-Allow-Origin", "*") + response.headers.add("Access-Control-Allow-Headers", "Content-Type,Authorization") + response.headers.add("Access-Control-Allow-Methods", "GET,PUT,POST,DELETE,OPTIONS") + return response + + # Server configuration app.config["MAX_CONTENT_LENGTH"] = 100 * 1024 * 1024 app.config["JSONIFY_PRETTYPRINT_REGULAR"] = True app.config["JSON_SORT_KEYS"] = False -app.register_blueprint(bp_objects) -app.register_blueprint(bp_cutouts) +# Register namespace +api.add_namespace(ns_objects) +api.add_namespace(ns_cutouts) + +# Register blueprint +app.register_blueprint(blueprint) + if __name__ == "__main__": app.run(config["HOST"], debug=True, port=int(config["PORT"])) diff --git a/apps/routes/cutouts/api.py b/apps/routes/cutouts/api.py index 6e4c0ea..3e42de8 100644 --- a/apps/routes/cutouts/api.py +++ b/apps/routes/cutouts/api.py @@ -12,93 +12,91 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from flask import Blueprint, Response, jsonify, request +from flask import Response, request +from flask_restx import Namespace, Resource, fields + from apps.utils.utils import check_args from apps.routes.cutouts.utils import format_and_send_cutout -bp = Blueprint("cutouts", __name__) - - -# Enable CORS for this blueprint -@bp.after_request -def after_request(response): - response.headers.add("Access-Control-Allow-Origin", "*") - response.headers.add("Access-Control-Allow-Headers", "Content-Type,Authorization") - response.headers.add("Access-Control-Allow-Methods", "GET,PUT,POST,DELETE,OPTIONS") - return response +ns = Namespace("api/v1/cutouts", "Get cutout data based on ZTF ID") - -ARGS = [ - { - "name": "objectId", - "required": True, - "description": "ZTF Object ID", - }, - { - "name": "kind", - "required": True, - "description": "Science, Template, or Difference. For output-format=array, you can also specify `kind: All` to get the 3 cutouts.", - }, - { - "name": "output-format", - "required": False, - "description": "PNG[default], FITS, array", - }, - { - "name": "candid", - "required": False, - "description": "Candidate ID of the alert belonging to the object with `objectId`. If not filled, the cutouts of the latest alert is returned", - }, - { - "name": "stretch", - "required": False, - "description": "Stretch function to be applied. Available: sigmoid[default], linear, sqrt, power, log.", - }, - { - "name": "colormap", - "required": False, - "description": "Valid matplotlib colormap name (see matplotlib.cm). Default is grayscale.", - }, - { - "name": "pmin", - "required": False, - "description": "The percentile value used to determine the pixel value of minimum cut level. Default is 0.5. No effect for sigmoid.", - }, - { - "name": "pmax", - "required": False, - "description": "The percentile value used to determine the pixel value of maximum cut level. Default is 99.5. No effect for sigmoid.", - }, +ARGS = ns.model( + "cutouts", { - "name": "convolution_kernel", - "required": False, - "description": "Convolve the image with a kernel (gauss or box). Default is None (not specified).", + "objectId": fields.String( + description="ZTF Object ID", + example="ZTF24abuunge", + required=True, + ), + "kind": fields.String( + description="Science, Template, or Difference. For output-format=array, you can also specify `kind: All` to get the 3 cutouts.", + example="Science", + required=True, + ), + "output-format": fields.String( + description="PNG[default], FITS, array", example="array", required=False + ), + "candid": fields.Integer( + description="Candidate ID of the alert belonging to the object with `objectId`. If not filled, the cutouts of the latest alert is returned", + example=2890466950515015016, + required=False, + ), + "stretch": fields.String( + description="Stretch function to be applied. Available: sigmoid[default], linear, sqrt, power, log.", + example="sigmoid", + required=False, + ), + "colormap": fields.String( + description="Valid matplotlib colormap name (see matplotlib.cm). Default is grayscale.", + example="Blues", + required=False, + ), + "pmin": fields.Float( + description="The percentile value used to determine the pixel value of minimum cut level. Default is 0.5. No effect for sigmoid.", + example=0.5, + required=False, + ), + "pmax": fields.Float( + description="The percentile value used to determine the pixel value of maximum cut level. Default is 99.5. No effect for sigmoid.", + example=99.5, + required=False, + ), + "convolution_kernel": fields.String( + description="Convolve the image with a kernel (gauss or box). If not specified, no kernel is applied.", + example="gauss", + required=False, + ), }, -] +) -@bp.route("/api/v1/cutouts", methods=["GET"]) -def return_cutouts_arguments(): - """Obtain information about cutouts""" - if len(request.args) > 0: - # POST from query URL - return return_cutouts(payload=request.args) - else: - return jsonify({"args": ARGS}) +@ns.route("/") +@ns.doc(params={k: ARGS[k].description for k in ARGS}) +class Cutouts(Resource): + def get(self): + """Retrieve cutout data from the Fink/ZTF datalake""" + payload = request.args + if len(payload) > 0: + # POST from query URL + return self.post() + else: + return Response(ARGS.description, 200) + @ns.expect(ARGS, location="json", as_dict=True) + def post(self): + """Retrieve cutout data from the Fink/ZTF datalake""" + # get payload from the query URL + payload = request.args -@bp.route("/api/v1/cutouts", methods=["POST"]) -def return_cutouts(payload=None): - """Retrieve object data""" - # get payload from the JSON - if payload is None: - payload = request.json + if payload is None or len(payload) == 0: + # if no payload, try the JSON blob + payload = request.json - rep = check_args(ARGS, payload) - if rep["status"] != "ok": - return Response(str(rep), 400) + rep = check_args(ARGS, payload) + if rep["status"] != "ok": + return Response(str(rep), 400) - assert payload["kind"] in ["Science", "Template", "Difference", "All"] + assert payload["kind"] in ["Science", "Template", "Difference", "All"] - return format_and_send_cutout(payload) + return format_and_send_cutout(payload) diff --git a/apps/routes/cutouts/utils.py b/apps/routes/cutouts/utils.py index b2e13a2..04a57ae 100644 --- a/apps/routes/cutouts/utils.py +++ b/apps/routes/cutouts/utils.py @@ -91,6 +91,7 @@ def format_and_send_cutout(payload: dict): group_alerts=False, truncated=True, extract_color=False, + escape_slash=True, ) json_payload = {} diff --git a/apps/routes/objects/api.py b/apps/routes/objects/api.py index 025f4c2..a437c17 100644 --- a/apps/routes/objects/api.py +++ b/apps/routes/objects/api.py @@ -12,84 +12,84 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from flask import Blueprint, Response, jsonify, request +from flask import Response, request +from flask_restx import Namespace, Resource, fields + from apps.utils.utils import check_args from apps.utils.utils import send_tabular_data from apps.routes.objects.utils import extract_object_data -bp = Blueprint("objects", __name__) - - -# Enable CORS for this blueprint -@bp.after_request -def after_request(response): - response.headers.add("Access-Control-Allow-Origin", "*") - response.headers.add("Access-Control-Allow-Headers", "Content-Type,Authorization") - response.headers.add("Access-Control-Allow-Methods", "GET,PUT,POST,DELETE,OPTIONS") - return response - +ns = Namespace("api/v1/objects", "Get object data based on ZTF ID") -ARGS = [ - { - "name": "objectId", - "required": True, - "description": 'single ZTF Object ID, or a comma-separated list of object names, e.g. "ZTF19acmdpyr,ZTF21aaxtctv"', - }, - { - "name": "withupperlim", - "required": False, - "description": "If True, retrieve also upper limit measurements, and bad quality measurements. Use the column `d:tag` in your results: valid, upperlim, badquality.", - }, - { - "name": "withcutouts", - "required": False, - "description": "If True, retrieve also cutout data as 2D array. See also `cutout-kind`. More information on the original cutouts at https://irsa.ipac.caltech.edu/data/ZTF/docs/ztf_explanatory_supplement.pdf", - }, - { - "name": "cutout-kind", - "required": False, - "description": "`Science`, `Template`, or `Difference`. If not specified, returned all three.", - }, - { - "name": "columns", - "required": False, - "description": "Comma-separated data columns to transfer. Default is all columns.", - }, +ARGS = ns.model( + "objects", { - "name": "output-format", - "required": False, - "description": "Output format among json[default], csv, parquet, votable", + "objectId": fields.String( + description='single ZTF Object ID, or a comma-separated list of object names, e.g. "ZTF19acmdpyr,ZTF21aaxtctv"', + example="ZTF21abfmbix", + required=True, + ), + "withupperlim": fields.Boolean( + description="If True, retrieve also upper limit measurements, and bad quality measurements. Use the column `d:tag` in your results: valid, upperlim, badquality.", + example=False, + required=False, + ), + "withcutouts": fields.Boolean( + description="If True, retrieve also cutout data as 2D array. See also `cutout-kind`. More information on the original cutouts at https://irsa.ipac.caltech.edu/data/ZTF/docs/ztf_explanatory_supplement.pdf", + example=False, + required=False, + ), + "cutout-kind": fields.String( + description="`Science`, `Template`, or `Difference`. If not specified, returned all three.", + example="Science", + required=False, + ), + "columns": fields.String( + description="Comma-separated data columns to transfer, e.g. 'i:magpsf,i:jd'. If not specified, transfer all columns.", + example="i:jd,i:magpsf,i:fid", + required=False, + ), + "output-format": fields.String( + description="Output format among json[default], csv, parquet, votable.", + example="json", + required=False, + ), }, -] +) -@bp.route("/api/v1/objects", methods=["GET"]) -def return_object_arguments(): - """Obtain information about retrieving object data""" - if len(request.args) > 0: - # POST from query URL - return return_object(payload=request.args) - else: - return jsonify({"args": ARGS}) +@ns.route("/") +@ns.doc(params={k: ARGS[k].description for k in ARGS}) +class Objects(Resource): + def get(self): + """Retrieve object data from the Fink/ZTF database""" + payload = request.args + if len(payload) > 0: + # POST from query URL + return self.post() + else: + return Response(ARGS.description, 200) + @ns.expect(ARGS, location="json", as_dict=True) + def post(self): + """Retrieve object data from the Fink/ZTF database""" + # get payload from the query URL + payload = request.args -@bp.route("/api/v1/objects", methods=["POST"]) -def return_object(payload=None): - """Retrieve object data from the Fink database""" - # get payload from the JSON - if payload is None: - payload = request.json + if payload is None or len(payload) == 0: + # if no payload, try the JSON blob + payload = request.json - rep = check_args(ARGS, payload) - if rep["status"] != "ok": - return Response(str(rep), 400) + rep = check_args(ARGS, payload) + if rep["status"] != "ok": + return Response(str(rep), 400) - out = extract_object_data(payload) + out = extract_object_data(payload) - # Error propagation - if isinstance(out, Response): - return out + # Error propagation + if isinstance(out, Response): + return out - output_format = payload.get("output-format", "json") - return send_tabular_data(out, output_format) + output_format = payload.get("output-format", "json") + return send_tabular_data(out, output_format) diff --git a/apps/routes/objects/test.py b/apps/routes/objects/test.py index 480481f..1d2733b 100644 --- a/apps/routes/objects/test.py +++ b/apps/routes/objects/test.py @@ -146,7 +146,9 @@ def test_withcutouts_single_field() -> None: """ pdf = get_an_object(oid=OID, withcutouts=True, cutout_kind="Science") - assert isinstance(pdf["b:cutoutScience_stampData"].to_numpy()[0], list) + cutout = pdf["b:cutoutScience_stampData"].to_numpy()[0] + assert isinstance(cutout, list), cutout + assert len(cutout) != 0, cutout assert "b:cutoutTemplate_stampData" not in pdf.columns diff --git a/apps/utils/decoding.py b/apps/utils/decoding.py index 6468c2c..2aec8b9 100644 --- a/apps/utils/decoding.py +++ b/apps/utils/decoding.py @@ -47,13 +47,16 @@ def format_hbase_output( truncated: bool = False, extract_color: bool = True, with_constellation: bool = True, + escape_slash: bool = False, ): """ """ if len(hbase_output) == 0: return pd.DataFrame({}) # Construct the dataframe - pdfs = pd.DataFrame.from_dict(hbase_to_dict(hbase_output), orient="index") + pdfs = pd.DataFrame.from_dict( + hbase_to_dict(hbase_output, escape_slash=escape_slash), orient="index" + ) # Tracklet cell contains null if there is nothing # and so HBase won't transfer data -- ignoring the column @@ -144,13 +147,15 @@ def format_hbase_output( @profile -def hbase_to_dict(hbase_output): +def hbase_to_dict(hbase_output, escape_slash=False): """Optimize hbase output TreeMap for faster conversion to DataFrame""" gateway = JavaGateway(auto_convert=True) JSONObject = gateway.jvm.org.json.JSONObject # We do bulk export to JSON on Java side to avoid overheads of iterative access # and then parse it back to Dict in Python + if escape_slash: + hbase_output = str(hbase_output) optimized = json.loads(JSONObject(str(hbase_output)).toString()) return optimized diff --git a/apps/utils/utils.py b/apps/utils/utils.py index 2269b49..d4a78ad 100644 --- a/apps/utils/utils.py +++ b/apps/utils/utils.py @@ -18,6 +18,7 @@ import json import yaml import requests +import logging from flask import Response @@ -66,6 +67,9 @@ def download_cutout(objectId, candid, kind): data = json.loads(r.content) else: # TODO: different return based on `kind`? + logging.warning( + "Cutout retrieval failed with status {}: {}".format(r.status_code, r.text) + ) return [] if kind != "All": @@ -83,12 +87,12 @@ def check_args(args: list, payload: dict) -> dict: Parameters ---------- """ - required_args = [i["name"] for i in args if i["required"] is True] + required_args = [k for k in args if args[k].required is True] for required_arg in required_args: if required_arg not in payload: rep = { "status": "error", - "text": f"A value for `{required_arg}` is required. Use GET to check arguments.\n", + "text": f"A value for `{required_arg}` is required. See https://api.fink-portal.org \n", } return rep return {"status": "ok"} @@ -111,24 +115,33 @@ def send_tabular_data(pdf, output_format): case of error, returns `Response` object. """ if output_format == "json": - return pdf.to_json(orient="records") + response = Response(pdf.to_json(orient="records"), 200) + response.headers.set("Content-Type", "application/json") + return response elif output_format == "csv": - return pdf.to_csv(index=False) + # TODO: set header? + response = Response(pdf.to_csv(index=False), 200) + response.headers.set("Content-Type", "application/csv") + return response elif output_format == "votable": f = io.BytesIO() table = Table.from_pandas(pdf) vt = votable.from_table(table) votable.writeto(vt, f) f.seek(0) - return f.read() + response = Response(f.read(), 200) + response.headers.set("Content-Type", "votable") + return response elif output_format == "parquet": f = io.BytesIO() pdf.to_parquet(f) f.seek(0) - return f.read() + response = Response(f.read(), 200) + response.headers.set("Content-Type", "parquet") + return response rep = { "status": "error", - "text": f"Output format `{output_format}` is not supported. Choose among json, csv, or parquet\n", + "text": f"Output format `{output_format}` is not supported. Choose among json, csv, votable, or parquet\n", } return Response(str(rep), 400) diff --git a/requirements.txt b/requirements.txt index 871beb5..550205e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ astropy flask +flask-restx pandas numpy fink-filters