Skip to content

Commit

Permalink
Issue/80/refactoring into fink mm (#81)
Browse files Browse the repository at this point in the history
* start the refactoring

* still refactoring

* refactoring the last occurence of fink_grb

* pep8 refactoring and requirements

* refactoring workflows

* restore fink_grb_bronze topics

* test

* rename fink-mm package

* print grb_data

* remove print

* add gcn_test data

* update gitignore

* change columns name

* update test data for distribution

* fix get_assoc_proba test

* pep8 requirements

* update test data for online, test only online in ci

* run all test

* fink_added_value are part of the fink_mm output now

* pep8 requirements

* fix parquet reading issue

* sort colums name before test

* fix test I hope

* test

* add fink added value to the hbase column filter for the offline mode

* new column filters for hbase

* pep8

* remove drop t2 in offline mode doctest

* new test data for offline mode

* -1.0 for offline data test

* test

* test

* test

* test

* pep8

* test

* drop t2 columns

* drop t2 columns

* run all tests

* load gcn data of the previous day for the online mode

* fix append in the online mode

* print test

* test

* restore online as previous

* all test

* pep8
  • Loading branch information
FusRoman authored Jul 17, 2023
1 parent 1508be4 commit 5404735
Show file tree
Hide file tree
Showing 120 changed files with 465 additions and 345 deletions.
14 changes: 7 additions & 7 deletions .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ exclude =
build,
dist
per-file-ignores =
../Fink_GRB/fink_grb/online/ztf_join_gcn.py:W503,E402
../Fink_GRB/fink_grb/offline/spark_offline.py:W503,W605
../Fink_GRB/fink_grb/utils/fun_utils.py:F811
../Fink_GRB/fink_grb/distribution/distribution.py:W503
../Fink_GRB/fink_grb/observatory/__init__.py:E402
../Fink_GRB/setup.py:W503
../Fink_GRB/fink_grb/observatory/LVK/LVK.py:W503
../Fink_MM/fink_mm/online/ztf_join_gcn.py:W503,E402
../Fink_MM/fink_mm/offline/spark_offline.py:W503,W605
../Fink_MM/fink_mm/utils/fun_utils.py:F811
../Fink_MM/fink_mm/distribution/distribution.py:W503
../Fink_MM/fink_mm/observatory/__init__.py:E402
../Fink_MM/setup.py:W503
../Fink_MM/fink_mm/observatory/LVK/LVK.py:W503
4 changes: 2 additions & 2 deletions .github/workflows/linter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install flake8
- name: Flake8 Fink GRB
- name: Flake8 Fink MM
run: |
flake8 ../Fink_GRB
flake8 ../Fink_MM
67 changes: 35 additions & 32 deletions .github/workflows/run_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ on:

env:
NIGHT: 20190903
CONFIGFILE_PATH: fink_grb/conf/integration.conf
CONFIGFILE_PATH: fink_mm/conf/integration.conf
SECRET_ID: ${{ secrets.GCN_ID }}
SECRET_SECRET: ${{ secrets.GCN_SECRET }}

Expand All @@ -35,13 +35,13 @@ jobs:
- name: Set up env [1/3]
run: |
echo "ROOTPATH=`pwd`" >> $GITHUB_ENV
echo "FINK_GRB=$GITHUB_WORKSPACE" >> $GITHUB_ENV
echo "FINK_MM=$GITHUB_WORKSPACE" >> $GITHUB_ENV
echo "JAVA_HOME=$(dirname $(dirname $(readlink -f $(which java))))" >> $GITHUB_ENV
- name: Set up env [2/3]
run: |
echo "PYTHONPATH="${PYTHONPATH}:${SPARKLIB}:${FINK_GRB}"" >> $GITHUB_ENV
echo "PYTHONPATH="${PYTHONPATH}:${SPARKLIB}:${FINK_MM}"" >> $GITHUB_ENV
- name: Install fink-broker
run: |
Expand All @@ -50,7 +50,7 @@ jobs:
cd fink-broker
pip install .
echo "FINK_HOME=${HOME}/fink-broker" >> $GITHUB_ENV
cd $FINK_GRB
cd $FINK_MM
- name: Set up env [3/3]
run: |
Expand All @@ -67,7 +67,7 @@ jobs:
run: |
echo "GITHUB_PATH: $GITHUB_PATH"
echo "PATH: $PATH"
echo "FINK_GRB: $FINK_GRB"
echo "FINK_MM: $FINK_MM"
echo "SPARK_HOME: $SPARK_HOME"
echo "SPARKLIB: $SPARKLIB"
echo "PYTHONPATH: $PYTHONPATH"
Expand All @@ -76,24 +76,27 @@ jobs:
echo "FINK_PACKAGES: $FINK_PACKAGES"
echo "FINK_JARS: $FINK_JARS"
echo `python -V`
echo
echo
export
- name: Start services
run: |
cd $USRLIBS
source scripts/start_services.sh --kafka-version ${KAFKA_VERSION} --hbase-version ${HBASE_VERSION}
cd $FINK_GRB
cd $FINK_MM
- name: Call raw2science
run: |
fink start raw2science -c ${FINK_GRB}/fink_grb/test/test_data/with_hbase/fink_test.conf --night $NIGHT --exit_after 90
fink start raw2science -c ${FINK_MM}/fink_mm/test/test_data/with_hbase/fink_test.conf --night $NIGHT --exit_after 90
- name: Merge data
run: |
fink start merge -c ${FINK_GRB}/fink_grb/test/test_data/with_hbase/fink_test.conf --night $NIGHT
fink start merge -c ${FINK_MM}/fink_mm/test/test_data/with_hbase/fink_test.conf --night $NIGHT
- name: Push Hbase data
run: |
fink start index_archival -c ${FINK_GRB}/fink_grb/test/test_data/with_hbase/fink_test.conf --night $NIGHT --index_table jd_objectId
fink start index_archival -c ${FINK_MM}/fink_mm/test/test_data/with_hbase/fink_test.conf --night $NIGHT --index_table jd_objectId
- name: Check HBase table
run: |
Expand All @@ -104,11 +107,11 @@ jobs:
echo "PYTHONPATH="${SPARK_HOME}/python/test_coverage:$PYTHONPATH"" >> $GITHUB_ENV
echo "COVERAGE_PROCESS_START="${ROOTPATH}/.coveragerc"" >> $GITHUB_ENV
python -m pip install . --upgrade
python -m pip install -U fink_filters
pip install -U fink_filters
- name: Run test suites
run: |
coverage run --source=${ROOTPATH} --rcfile=${ROOTPATH}/.coveragerc -m pytest --doctest-modules fink_grb
coverage run --source=${ROOTPATH} --rcfile=${ROOTPATH}/.coveragerc -m pytest --doctest-modules fink_mm
curl -s https://codecov.io/bash | bash
run-integration-test:
Expand All @@ -128,12 +131,12 @@ jobs:
- name: Set up env [1/3]
run: |
echo "ROOTPATH=`pwd`" >> $GITHUB_ENV
echo "FINK_GRB=$GITHUB_WORKSPACE" >> $GITHUB_ENV
echo "FINK_MM=$GITHUB_WORKSPACE" >> $GITHUB_ENV
echo "JAVA_HOME=$(dirname $(dirname $(readlink -f $(which java))))" >> $GITHUB_ENV
- name: Set up env [2/3]
run: |
echo "PYTHONPATH="${PYTHONPATH}:${SPARKLIB}:${FINK_GRB}"" >> $GITHUB_ENV
echo "PYTHONPATH="${PYTHONPATH}:${SPARKLIB}:${FINK_MM}"" >> $GITHUB_ENV
echo "NIGHT=`date +"%Y%m%d" -d "now"`" >> $GITHUB_ENV
- name: Install fink-broker
Expand All @@ -143,7 +146,7 @@ jobs:
cd fink-broker
pip install .
echo "FINK_HOME=${HOME}/fink-broker" >> $GITHUB_ENV
cd $FINK_GRB
cd $FINK_MM
- name: Set up env [3/3]
run: |
Expand All @@ -162,70 +165,70 @@ jobs:
echo "COVERAGE_PROCESS_START="${ROOTPATH}/.coveragerc"" >> $GITHUB_ENV
python -m pip install . --upgrade
python -m pip install -U fink_filters
mkdir fink_grb/ci_gcn_test
mkdir fink_grb/ci_join_test
mkdir fink_mm/ci_gcn_test
mkdir fink_mm/ci_join_test
- name: Launch GCN stream
# don't put --test for the gcn_stream command otherwise it will miss the gw alerts
shell: bash
run: |
fink_grb gcn_stream start --config $CONFIGFILE_PATH --test > fink_grb_gcnstream_${NIGHT}.log 2>&1 &
fink_mm gcn_stream start --config $CONFIGFILE_PATH --test > fink_mm_gcnstream_${NIGHT}.log 2>&1 &
sleep 180s
- name: Start services
run: |
cd $USRLIBS
source scripts/start_services.sh --kafka-version ${KAFKA_VERSION} --hbase-version ${HBASE_VERSION}
cd $FINK_GRB
cd $FINK_MM
- name: Generate fake gcn counterparts
run: |
python fink_grb/test/prep_ztf_data.py
mv fink_grb/test/test_data/ztf_test/online/raw/year=2019 fink_grb
python fink_mm/test/prep_ztf_data.py
mv fink_mm/test/test_data/ztf_test/online/raw/year=2019 fink_mm
- name: Check data
run: |
echo "GCN ALERTS"
ls -dlth fink_grb/ci_gcn_test/*/*/*/*
ls -dlth fink_mm/ci_gcn_test/*/*/*/*
echo ""
echo "ZTF ALERTS"
echo "--- ONLINE SCIENCE"
ls -dlth fink_grb/test/test_data/ztf_test/online/science/*/*/*/*
ls -dlth fink_mm/test/test_data/ztf_test/online/science/*/*/*/*
echo "--- ARCHIVE SCIENCE"
ls -dlth fink_grb/test/test_data/ztf_test/archive/science/*/*/*/*
ls -dlth fink_mm/test/test_data/ztf_test/archive/science/*/*/*/*
# - name: Call raw2science
# run: |
# fink start raw2science -c ${FINK_GRB}/fink_grb/test/test_data/with_hbase/fink_test.conf --night $NIGHT --exit_after 90
# fink start raw2science -c ${FINK_MM}/fink_mm/test/test_data/with_hbase/fink_test.conf --night $NIGHT --exit_after 90

# - name: Merge data
# run: |
# fink start merge -c ${FINK_GRB}/fink_grb/test/test_data/with_hbase/fink_test.conf --night $NIGHT
# fink start merge -c ${FINK_MM}/fink_mm/test/test_data/with_hbase/fink_test.conf --night $NIGHT

- name: Push Hbase data
run: |
fink start index_archival -c ${FINK_GRB}/fink_grb/test/test_data/with_hbase/fink_test.conf --night $NIGHT --index_table jd_objectId
fink start index_archival -c ${FINK_MM}/fink_mm/test/test_data/with_hbase/fink_test.conf --night $NIGHT --index_table jd_objectId
- name: Run online join streaming
run: |
fink_grb join_stream online --config $CONFIGFILE_PATH --night $NIGHT --exit_after 180
fink_mm join_stream online --config $CONFIGFILE_PATH --night $NIGHT --exit_after 180
- name: Check online stream
run: |
ls -lth fink_grb/ci_join_test/online/*/*/*
ls -lth fink_mm/ci_join_test/online/*/*/*
- name: Run offline stream
run: |
fink_grb join_stream offline --night $NIGHT --config $CONFIGFILE_PATH --test
fink_mm join_stream offline --night $NIGHT --config $CONFIGFILE_PATH --test
- name: Check offline stream
run: |
ls -lth fink_grb/ci_join_test/offline/*/*/*
ls -lth fink_mm/ci_join_test/offline/*/*/*
# - name: Online Distribution
# run: |
# fink_grb distribute --config $CONFIGFILE_PATH --night $NIGHT --exit_after 60
# fink_mm distribute --config $CONFIGFILE_PATH --night $NIGHT --exit_after 60

# - name: Check Distribution
# run: |
#python fink_grb/test/display_distribution.py
#python fink_mm/test/display_distribution.py
8 changes: 6 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -136,5 +136,9 @@ gcn_test
inspect_testdata.py
tmp_alert
build_egg.sh
launch_offline_grb_locally.sh
science2grb_locally.sh
launch_offline_mm_locally.sh
science2mm_locally.sh
.vscode
clean_it.sh
export_unit_test.sh
it_test.sh
36 changes: 18 additions & 18 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Fink_GRB
# Fink_MM

[![PEP8](https://github.com/FusRoman/Fink_GRB/actions/workflows/linter.yml/badge.svg)](https://github.com/FusRoman/Fink_GRB/actions/workflows/linter.yml)
[![Sentinel](https://github.com/FusRoman/Fink_GRB/actions/workflows/run_test.yml/badge.svg)](https://github.com/FusRoman/Fink_GRB/actions/workflows/run_test.yml)
[![PEP8](https://github.com/FusRoman/Fink_MM/actions/workflows/linter.yml/badge.svg)](https://github.com/FusRoman/Fink_MM/actions/workflows/linter.yml)
[![Sentinel](https://github.com/FusRoman/Fink_MM/actions/workflows/run_test.yml/badge.svg)](https://github.com/FusRoman/Fink_MM/actions/workflows/run_test.yml)

Correlation of the [Fink](https://fink-broker.org/) alerts with multi-messenger instruments for real-time purpose
* Available Instruments
Expand All @@ -15,34 +15,34 @@ Correlation of the [Fink](https://fink-broker.org/) alerts with multi-messenger

* Install from GitHub with pip
```console
toto@linux:~$ pip install git+https://github.com/FusRoman/Fink_GRB@v0.6-beta
toto@linux:~$ pip install git+https://github.com/FusRoman/Fink_MM@v0.6-beta
```
* Download the default config file from GitHub:fink_grb/conf/fink_grb.conf and move it in a custom location
* Download the default config file from GitHub:fink_mm/conf/fink_mm.conf and move it in a custom location
Update your configuration file with your custom parameters.
Please note that the configuration file's paths must not end with a '/'.
* Once Fink_GRB has been installed via pip, the command `fink_grb` become available. Type `fink_grb --help` to see the help message and show what you can do with Fink_GRB.
* Once Fink_MM has been installed via pip, the command `fink_mm` become available. Type `fink_mm --help` to see the help message and show what you can do with Fink_MM.

### Setup the Fink_GRB daemons
### Setup the Fink_MM daemons
* Start listening to GCN
```console
toto@linux:~$ fink_grb gcn_stream start --config /config_path
toto@linux:~$ fink_mm gcn_stream start --config /config_path
```
The above command will start a daemon that will store the GCN issued from the instruments registered in the system. The GNC will be stored at the location specified in the configuration file by the entry named 'online_gcn_data_prefix'. The path can be a local path or a hdfs path. In the latter case, the path must start with hdfs://IP:PORT///your_path where IP and PORT refer to the hdfs driver.

> :warning: The GCN stream need to be restarted after each update of Fink_GRB. Use the `ps aux | grep fink_grb` command to identify the process number of the gcn stream and kill it then restart the gcn stream with the same command as above.
> :warning: The GCN stream need to be restarted after each update of Fink_MM. Use the `ps aux | grep fink_mm` command to identify the process number of the gcn stream and kill it then restart the gcn stream with the same command as above.
### Schedulers
Fink_GRB has multiples script in the scheduler folder to launch the different services.
Fink_MM has multiples script in the scheduler folder to launch the different services.
* science2grb.sh will launch the online services that will cross-match in real-time the alerts issued from the ZTF/LSST with incoming GCN. (latency: ZTF/LSST latency + 30 seconds max)
* science2grb_offline.sh launch the offline services that will cross-match all the latest alerts from ZTF/LSST with the GCN within the time window (in days) specified in your config file. (latency: 1 day)
* grb2distribution.sh launch the distribution services that will send the outputs of the online services in real-time to the registered users of the [fink-client](https://github.com/astrolabsoftware/fink-client). (latency: ZTF/LSST latency + 30 seconds + Network latency to reach fink-client)

#### **Modify the scripts**
Download the multiple scripts from GitHub:scheduler/
These scripts use some paths that have to be modified before the deployment.
* Two variables names `FINK_GRB_CONFIG` and `LOG_PATH` are common to all script. The first is the location of your configuration file, and the second is where to store log files. Either you modify the value of these variables directly in the scripts, science2grb.sh and science2grb_offline.sh or you remove the declaration in these scripts and export these variables within your ~/.bashrc or ~/.bash_profile.
* Two variables names `FINK_MM_CONFIG` and `LOG_PATH` are common to all script. The first is the location of your configuration file, and the second is where to store log files. Either you modify the value of these variables directly in the scripts, science2grb.sh and science2grb_offline.sh or you remove the declaration in these scripts and export these variables within your ~/.bashrc or ~/.bash_profile.
```console
export FINK_GRB_CONFIG="path/to/conf/fink_grb.conf"
export FINK_MM_CONFIG="path/to/conf/fink_mm.conf"
export LOG_PATH="path/to/store/log"
```

Expand All @@ -69,7 +69,7 @@ The module output is pushed into the folder specified by the config entry named
The output could be local or in a HDFS cluster.
Two folders are created inside; one called 'online' and one called 'offline'. Inside these two folders, the data are repartitions following the same principle: 'year=year/month=month/day=day'. At the end of the path, you will find ```.parquet``` files containing the data.

### Fink_GRB Output Schema
### Fink_MM Output Schema

|Field |Type |Contents |
|-------------------|------|----------------------------------------------------------------------------------|
Expand All @@ -85,11 +85,11 @@ Two folders are created inside; one called 'online' and one called 'offline'. In
|instrument_or_event|String|Triggering instruments (GBM, XRT, ...) |
|platform |String|Triggering platform (Fermi, Swift, IceCube, ...) |
|triggerId |String|Unique GCN identifier ([GCN viewer](https://heasarc.gsfc.nasa.gov/wsgi-scripts/tach/gcn_v2/tach.wsgi/) to retrieve quickly the notice)|
|grb_ra |float |GCN Event right ascension |
|grb_dec |float |GCN Event declination |
|grb_loc_error |float |GCN error location in arcminute |
|gcn_ra |float |GCN Event right ascension |
|gcn_dec |float |GCN Event declination |
|gcn_loc_error |float |GCN error location in arcminute |
|triggerTimeUTC |String|GCN TriggerTime in UTC |
|grb_proba |float |Serendipitous probability to associate the alerts with the GCN event |
|p_assoc |float |Serendipitous probability to associate the alerts with the GCN event |
|fink_class |String|[Fink Classification](https://fink-broker.readthedocs.io/en/latest/science/classification/) |
| |
|Field available only for the online mode |
Expand All @@ -99,7 +99,7 @@ Two folders are created inside; one called 'online' and one called 'offline'. In
|start_vartime |float |first variation time at 5 sigma of the object (in jd, only valid for 30 days) |
|diff_vartime |float |difference between start_vartime and jd (if above 30, start_vartime is not valid) |

### Fink-client topics related to Fink_GRB
### Fink-client topics related to Fink_MM

The connection to the distribution stream is made by the [fink-client](https://github.com/astrolabsoftware/fink-client). Three topics are available :

Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ secret=

[PATH]
# Prefix path on disk where are save GCN live data. used by the joining stream
online_gcn_data_prefix=fink_grb/test/test_data/gcn_test
online_gcn_data_prefix=fink_mm/test/test_data/gcn_test

# same path as online_gcn_data_prefix without the URI part. used by the gcn_stream monitor
hdfs_gcn_storage=/user/roman.le-montagner/gcn_storage/raw
Expand All @@ -13,10 +13,10 @@ hdfs_gcn_storage=/user/roman.le-montagner/gcn_storage/raw
# They can be in local FS (/path/ or files:///path/) or
# in distributed FS (e.g. hdfs:///path/).
# Be careful though to have enough disk space!
online_ztf_data_prefix=fink_grb/test/test_data/distribution_test_data/
online_ztf_data_prefix=fink_mm/test/test_data/distribution_test_data/

# Prefix path on disk to save GRB join ZTF data (work for both online and offline).
online_grb_data_prefix=fink_grb/test/test_data
online_grb_data_prefix=fink_mm/test/test_data

# Path where are store the hbase catalog in order to query the hbase database
hbase_catalog=/home/roman.le-montagner/fink-broker/catalogs_hbase/ztf.jd.json
Expand Down
6 changes: 3 additions & 3 deletions fink_grb/conf/fink_grb.conf → fink_mm/conf/fink_mm.conf
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ secret=

[PATH]
# Prefix path on disk where are save GCN live data. used by the joining stream
online_gcn_data_prefix=fink_grb/test/test_data/gcn_test/raw
online_gcn_data_prefix=fink_mm/test/test_data/gcn_test/raw

# same path as online_gcn_data_prefix without the URI part. used by the gcn_stream monitor
hdfs_gcn_storage=/user/roman.le-montagner/gcn_storage/raw
Expand All @@ -13,10 +13,10 @@ hdfs_gcn_storage=/user/roman.le-montagner/gcn_storage/raw
# They can be in local FS (/path/ or files:///path/) or
# in distributed FS (e.g. hdfs:///path/).
# Be careful though to have enough disk space!
online_ztf_data_prefix=fink_grb/test/test_data/ztf_test/online
online_ztf_data_prefix=fink_mm/test/test_data/ztf_test/online

# Prefix path on disk to save GRB join ZTF data (work for both online and offline).
online_grb_data_prefix=fink_grb/test/test_output
online_grb_data_prefix=fink_mm/test/test_output

# Path where are store the hbase catalog in order to query the hbase database
hbase_catalog=/home/roman.le-montagner/fink-broker/catalogs_hbase/ztf.jd.json
Expand Down
Loading

0 comments on commit 5404735

Please sign in to comment.