Skip to content

Commit

Permalink
Merge pull request #5 from astrolabsoftware/issue/04/py4j
Browse files Browse the repository at this point in the history
Switch to py4j
  • Loading branch information
JulienPeloton authored Dec 5, 2024
2 parents 5fd6186 + 4ac8a62 commit e680e5e
Show file tree
Hide file tree
Showing 9 changed files with 89 additions and 56 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
__pycache__
bin/Lomikel*.jar
52 changes: 9 additions & 43 deletions apps/utils/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,9 @@
# limitations under the License.
"""Utilities to work with the Fink HBase client"""

from py4j.java_gateway import JavaGateway
import os

import jpype
import jpype.imports
import numpy as np
import yaml

Expand All @@ -26,24 +25,6 @@
from line_profiler import profile


@profile
def initialise_jvm(path=None):
"""Start a JVM
Parameters
----------
path: str, optional
Path to the HBase client. Default is relative to apps/
"""
if not jpype.isJVMStarted():
if path is None:
path = os.path.dirname(apps_loc) + "/../bin/FinkBrowser.exe.jar"
jarpath = f"-Djava.class.path={path}"
jpype.startJVM(jpype.getDefaultJVMPath(), "-ea", jarpath, convertStrings=True)

jpype.attachThreadToJVM()


@profile
def connect_to_hbase_table(
tablename: str,
Expand All @@ -62,35 +43,24 @@ def connect_to_hbase_table(
Name of the rowkey in the table containing the schema. Default is given by the config file.
nlimit: int, optional
Maximum number of objects to return. Default is 10000
setphysicalrepo: bool, optional
If True, store cutouts queried on disk ("/tmp/Lomikel/HBaseClientBinaryDataRepository")
Needs client 02.01+. Default is False
config_path: str, optional
Path to the config file. Default is None (relative to the apps/ folder)
"""
initialise_jvm()

if config_path is None:
config_path = os.path.dirname(apps_loc) + "/../config.yml"
args = yaml.load(
open(config_path),
yaml.Loader,
)

import com.Lomikel.HBaser
from com.astrolabsoftware.FinkBrowser.Utils import Init

Init.init()

client = com.Lomikel.HBaser.HBaseClient(args["HBASEIP"], args["ZOOPORT"])
gateway = JavaGateway(auto_convert=True)
client = gateway.jvm.com.Lomikel.HBaser.HBaseClient(
args["HBASEIP"], args["ZOOPORT"]
)

if schema_name is None:
schema_name = args["SCHEMAVER"]
client.connect(tablename, schema_name)
if setphysicalrepo:
import com.Lomikel.HBaser.FilesBinaryDataRepository

client.setRepository(com.Lomikel.HBaser.FilesBinaryDataRepository())
client.setLimit(args["NLIMIT"])

return client
Expand Down Expand Up @@ -131,21 +101,17 @@ def create_or_update_hbase_table(
if len(np.unique(families)) != 1:
raise NotImplementedError("`create_hbase_table` only accepts one family name")

initialise_jvm()

if config_path is None:
config_path = os.path.dirname(apps_loc) + "/../config.yml"
args = yaml.load(
open(config_path),
yaml.Loader,
)

import com.Lomikel.HBaser
from com.astrolabsoftware.FinkBrowser.Utils import Init

Init.init()

client = com.Lomikel.HBaser.HBaseClient(args["HBASEIP"], args["ZOOPORT"])
gateway = JavaGateway(auto_convert=True)
client = gateway.jvm.com.Lomikel.HBaser.HBaseClient(
args["HBASEIP"], args["ZOOPORT"]
)

if create:
# Create the table and connect without schema
Expand Down
13 changes: 5 additions & 8 deletions apps/utils/decoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
# limitations under the License.
"""Utilities to decode data from the HBase client"""

from py4j.java_gateway import JavaGateway
import json
import pandas as pd
import numpy as np

Expand Down Expand Up @@ -144,17 +146,12 @@ def format_hbase_output(
@profile
def hbase_to_dict(hbase_output):
"""Optimize hbase output TreeMap for faster conversion to DataFrame"""
# Naive Python implementation
# optimized = {i: dict(j) for i, j in hbase_output.items()}

# Here we assume JPype is already initialized
import json

from org.json import JSONObject
gateway = JavaGateway(auto_convert=True)
JSONObject = gateway.jvm.org.json.JSONObject

# We do bulk export to JSON on Java side to avoid overheads of iterative access
# and then parse it back to Dict in Python
optimized = json.loads(JSONObject(hbase_output).toString())
optimized = json.loads(JSONObject(str(hbase_output)).toString())

return optimized

Expand Down
Binary file removed bin/FinkBrowser.exe.jar
Binary file not shown.
25 changes: 25 additions & 0 deletions bin/download_client.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/bin/bash
# Copyright 2024 AstroLab Software
# Author: Julien Peloton
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
BASEURL=https://hrivnac.web.cern.ch/Activities/Packages/Lomikel
CLIENTVERSION=03.04.00x


files="Lomikel-$CLIENTVERSION-ext.jar Lomikel-$CLIENTVERSION.exe.jar Lomikel-$CLIENTVERSION-HBase.jar Lomikel-$CLIENTVERSION-HBase.exe.jar Lomikel-$CLIENTVERSION.jar"

for file in $files; do
echo $file
wget --directory-prefix=. $BASEURL/$file
done
Binary file added bin/py4j0.10.9.7.jar
Binary file not shown.
1 change: 1 addition & 0 deletions config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ CUTOUTAPIURL: http://localhost
# HBase configuration
HBASEIP: localhost
ZOOPORT: 2183
CLIENTVERSION: 03.04.00x

# Table schema (schema_{fink_broker}_{fink_science})
SCHEMAVER: schema_3.1_5.21.14
Expand Down
51 changes: 47 additions & 4 deletions install/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,58 @@ pip install -r requirements.txt

Follow instructions in the [fink-cutout-api](https://github.com/astrolabsoftware/fink-cutout-api/blob/main/install/README.md).

## Client installation and Fink gateway

To access HBase tables, we use a client based on [Lomikel](https://github.com/hrivnac/Lomikel). To download the latest version of the client, go to `bin` and execute:

```bash
cd bin
./download_client.sh
```

Do not forget to update the version in the `config.yml` file. Then install (as sudo) a new unit for systemd under `/etc/systemd/system/fink_gateway.service` (check the correct version numbers for JARs):

```bash
[Unit]
Description=Start a JVM with Fink Java objects
After=network.target

[Service]
User=almalinux
Group=almalinux
WorkingDirectory=/home/almalinux/fink-object-api/bin

ExecStart=/bin/sh -c 'source /home/almalinux/.bashrc; exec java -cp "Lomikel-03.04.00x-HBase.exe.jar:py4j0.10.9.7.jar" com.Lomikel.Py4J.LomikelGatewayServer 2>&1 >> /tmp/fink_gateway.out'

[Install]
WantedBy=multi-user.target
```

Reload daemon and start the service:

```bash
sudo systemctl daemon-reload
sudo systemctl start fink_gateway
```

Check carefuly the status:

```bash
sudo systemctl start fink_gateway
```

Note that having a JVM open all the time can lead to a memory leak, so it is probably wise to restart the service from time to time.


## Systemctl and gunicorn

Install a new unit for systemd under `/etc/systemd/system/fink_object_api.service`:
Install a new unit (as sudo) for systemd under `/etc/systemd/system/fink_object_api.service`:

```bash
[Unit]
Description=gunicorn daemon for fink_object_api
After=network.target
After=network.target fink_gateway.service fink_cutout_api.service
Requires=fink_gateway.service fink_cutout_api.service

[Service]
User=almalinux
Expand All @@ -41,5 +85,4 @@ sudo systemctl daemon-reload
sudo systemctl start fink_object_api
```


You are ready to use the API!
Note that this will automatically starts `fink_gateway.service` and `fink_cutout_api.service` if they were not started. You are ready to use the API!
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@ line_profiler
requests
pyarrow
matplotlib
JPype1
py4j
PyYAML
pyspark

0 comments on commit e680e5e

Please sign in to comment.