diff --git a/.gitignore b/.gitignore index bee8a64..0835ec5 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ __pycache__ +bin/Lomikel*.jar diff --git a/apps/utils/client.py b/apps/utils/client.py index bfb4b62..1209292 100644 --- a/apps/utils/client.py +++ b/apps/utils/client.py @@ -14,10 +14,9 @@ # limitations under the License. """Utilities to work with the Fink HBase client""" +from py4j.java_gateway import JavaGateway import os -import jpype -import jpype.imports import numpy as np import yaml @@ -26,24 +25,6 @@ from line_profiler import profile -@profile -def initialise_jvm(path=None): - """Start a JVM - - Parameters - ---------- - path: str, optional - Path to the HBase client. Default is relative to apps/ - """ - if not jpype.isJVMStarted(): - if path is None: - path = os.path.dirname(apps_loc) + "/../bin/FinkBrowser.exe.jar" - jarpath = f"-Djava.class.path={path}" - jpype.startJVM(jpype.getDefaultJVMPath(), "-ea", jarpath, convertStrings=True) - - jpype.attachThreadToJVM() - - @profile def connect_to_hbase_table( tablename: str, @@ -62,14 +43,9 @@ def connect_to_hbase_table( Name of the rowkey in the table containing the schema. Default is given by the config file. nlimit: int, optional Maximum number of objects to return. Default is 10000 - setphysicalrepo: bool, optional - If True, store cutouts queried on disk ("/tmp/Lomikel/HBaseClientBinaryDataRepository") - Needs client 02.01+. Default is False config_path: str, optional Path to the config file. Default is None (relative to the apps/ folder) """ - initialise_jvm() - if config_path is None: config_path = os.path.dirname(apps_loc) + "/../config.yml" args = yaml.load( @@ -77,20 +53,14 @@ def connect_to_hbase_table( yaml.Loader, ) - import com.Lomikel.HBaser - from com.astrolabsoftware.FinkBrowser.Utils import Init - - Init.init() - - client = com.Lomikel.HBaser.HBaseClient(args["HBASEIP"], args["ZOOPORT"]) + gateway = JavaGateway(auto_convert=True) + client = gateway.jvm.com.Lomikel.HBaser.HBaseClient( + args["HBASEIP"], args["ZOOPORT"] + ) if schema_name is None: schema_name = args["SCHEMAVER"] client.connect(tablename, schema_name) - if setphysicalrepo: - import com.Lomikel.HBaser.FilesBinaryDataRepository - - client.setRepository(com.Lomikel.HBaser.FilesBinaryDataRepository()) client.setLimit(args["NLIMIT"]) return client @@ -131,8 +101,6 @@ def create_or_update_hbase_table( if len(np.unique(families)) != 1: raise NotImplementedError("`create_hbase_table` only accepts one family name") - initialise_jvm() - if config_path is None: config_path = os.path.dirname(apps_loc) + "/../config.yml" args = yaml.load( @@ -140,12 +108,10 @@ def create_or_update_hbase_table( yaml.Loader, ) - import com.Lomikel.HBaser - from com.astrolabsoftware.FinkBrowser.Utils import Init - - Init.init() - - client = com.Lomikel.HBaser.HBaseClient(args["HBASEIP"], args["ZOOPORT"]) + gateway = JavaGateway(auto_convert=True) + client = gateway.jvm.com.Lomikel.HBaser.HBaseClient( + args["HBASEIP"], args["ZOOPORT"] + ) if create: # Create the table and connect without schema diff --git a/apps/utils/decoding.py b/apps/utils/decoding.py index 0470b6f..6468c2c 100644 --- a/apps/utils/decoding.py +++ b/apps/utils/decoding.py @@ -14,6 +14,8 @@ # limitations under the License. """Utilities to decode data from the HBase client""" +from py4j.java_gateway import JavaGateway +import json import pandas as pd import numpy as np @@ -144,17 +146,12 @@ def format_hbase_output( @profile def hbase_to_dict(hbase_output): """Optimize hbase output TreeMap for faster conversion to DataFrame""" - # Naive Python implementation - # optimized = {i: dict(j) for i, j in hbase_output.items()} - - # Here we assume JPype is already initialized - import json - - from org.json import JSONObject + gateway = JavaGateway(auto_convert=True) + JSONObject = gateway.jvm.org.json.JSONObject # We do bulk export to JSON on Java side to avoid overheads of iterative access # and then parse it back to Dict in Python - optimized = json.loads(JSONObject(hbase_output).toString()) + optimized = json.loads(JSONObject(str(hbase_output)).toString()) return optimized diff --git a/bin/FinkBrowser.exe.jar b/bin/FinkBrowser.exe.jar deleted file mode 100644 index a1af416..0000000 Binary files a/bin/FinkBrowser.exe.jar and /dev/null differ diff --git a/bin/download_client.sh b/bin/download_client.sh new file mode 100755 index 0000000..7e8c4dc --- /dev/null +++ b/bin/download_client.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Copyright 2024 AstroLab Software +# Author: Julien Peloton +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +BASEURL=https://hrivnac.web.cern.ch/Activities/Packages/Lomikel +CLIENTVERSION=03.04.00x + + +files="Lomikel-$CLIENTVERSION-ext.jar Lomikel-$CLIENTVERSION.exe.jar Lomikel-$CLIENTVERSION-HBase.jar Lomikel-$CLIENTVERSION-HBase.exe.jar Lomikel-$CLIENTVERSION.jar" + +for file in $files; do + echo $file + wget --directory-prefix=. $BASEURL/$file +done diff --git a/bin/py4j0.10.9.7.jar b/bin/py4j0.10.9.7.jar new file mode 100644 index 0000000..0ac9307 Binary files /dev/null and b/bin/py4j0.10.9.7.jar differ diff --git a/config.yml b/config.yml index 0d6c0a4..78556b2 100644 --- a/config.yml +++ b/config.yml @@ -8,6 +8,7 @@ CUTOUTAPIURL: http://localhost # HBase configuration HBASEIP: localhost ZOOPORT: 2183 +CLIENTVERSION: 03.04.00x # Table schema (schema_{fink_broker}_{fink_science}) SCHEMAVER: schema_3.1_5.21.14 diff --git a/install/README.md b/install/README.md index c628460..7bc779f 100644 --- a/install/README.md +++ b/install/README.md @@ -14,14 +14,58 @@ pip install -r requirements.txt Follow instructions in the [fink-cutout-api](https://github.com/astrolabsoftware/fink-cutout-api/blob/main/install/README.md). +## Client installation and Fink gateway + +To access HBase tables, we use a client based on [Lomikel](https://github.com/hrivnac/Lomikel). To download the latest version of the client, go to `bin` and execute: + +```bash +cd bin +./download_client.sh +``` + +Do not forget to update the version in the `config.yml` file. Then install (as sudo) a new unit for systemd under `/etc/systemd/system/fink_gateway.service` (check the correct version numbers for JARs): + +```bash +[Unit] +Description=Start a JVM with Fink Java objects +After=network.target + +[Service] +User=almalinux +Group=almalinux +WorkingDirectory=/home/almalinux/fink-object-api/bin + +ExecStart=/bin/sh -c 'source /home/almalinux/.bashrc; exec java -cp "Lomikel-03.04.00x-HBase.exe.jar:py4j0.10.9.7.jar" com.Lomikel.Py4J.LomikelGatewayServer 2>&1 >> /tmp/fink_gateway.out' + +[Install] +WantedBy=multi-user.target +``` + +Reload daemon and start the service: + +```bash +sudo systemctl daemon-reload +sudo systemctl start fink_gateway +``` + +Check carefuly the status: + +```bash +sudo systemctl start fink_gateway +``` + +Note that having a JVM open all the time can lead to a memory leak, so it is probably wise to restart the service from time to time. + + ## Systemctl and gunicorn -Install a new unit for systemd under `/etc/systemd/system/fink_object_api.service`: +Install a new unit (as sudo) for systemd under `/etc/systemd/system/fink_object_api.service`: ```bash [Unit] Description=gunicorn daemon for fink_object_api -After=network.target +After=network.target fink_gateway.service fink_cutout_api.service +Requires=fink_gateway.service fink_cutout_api.service [Service] User=almalinux @@ -41,5 +85,4 @@ sudo systemctl daemon-reload sudo systemctl start fink_object_api ``` - -You are ready to use the API! +Note that this will automatically starts `fink_gateway.service` and `fink_cutout_api.service` if they were not started. You are ready to use the API! diff --git a/requirements.txt b/requirements.txt index e1b8ecc..871beb5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,6 +8,6 @@ line_profiler requests pyarrow matplotlib -JPype1 +py4j PyYAML pyspark