-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(PR GH Action): Workflow and script to validate and build automat…
…ically the index
- Loading branch information
Showing
8 changed files
with
235 additions
and
89 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
import argparse | ||
import json | ||
import os | ||
from pathlib import Path | ||
import yaml | ||
import jsonschema | ||
from datetime import datetime | ||
|
||
def read_yaml_files(base_path, folders): | ||
yaml_files = [] | ||
for folder in folders: | ||
folder_path = base_path.joinpath(folder) | ||
for yaml_file in folder_path.glob('*.yaml'): | ||
with open(yaml_file, 'r', encoding='utf-8') as f: | ||
yaml_files.append((folder, yaml.safe_load(f))) | ||
return yaml_files | ||
|
||
def validate_yaml(yaml_content, schema_path): | ||
with open(schema_path, 'r', encoding='utf-8') as f: | ||
schema = json.load(f) | ||
jsonschema.validate(instance=yaml_content, schema=schema) | ||
|
||
def build_index(yaml_files, schemas_path): | ||
index = {'index_timestamp': str(datetime.now()), 'catalog': {}} | ||
for folder, content in yaml_files: | ||
schema_path = schemas_path.joinpath(f"{folder}.json") | ||
try: | ||
validate_yaml(content, schema_path) | ||
if folder not in index['catalog']: | ||
index['catalog'][folder] = [] | ||
index['catalog'][folder].append(content) | ||
except jsonschema.exceptions.ValidationError as e: | ||
print(f"Validation error in {folder}: {e}") | ||
return index | ||
|
||
def main(): | ||
parser = argparse.ArgumentParser(description="Build and deploy index from YAML files.") | ||
parser.add_argument("--build-branch", "-b", type=str, required=True, help="Path to the build branch.") | ||
parser.add_argument("--deploy-branch", "-d", type=str, required=True, help="Path to the deploy branch.") | ||
parser.add_argument("--folders-to-scan", "-f", type=str, nargs='+', default=["datasets"], help="List of folders to scan for YAML files.") | ||
args = parser.parse_args() | ||
|
||
build_path = Path(args.build_branch) | ||
deploy_path = Path(args.deploy_branch) | ||
schemas_path = build_path.joinpath('catalog/schemas') | ||
|
||
yaml_files = read_yaml_files(build_path.joinpath('catalog'), args.folders_to_scan) | ||
index = build_index(yaml_files, schemas_path) | ||
|
||
deploy_path.mkdir(parents=True, exist_ok=True) | ||
with open(deploy_path.joinpath('index.json'), 'w', encoding='utf-8') as f: | ||
json.dump(index, f, indent=4) | ||
|
||
with open(build_path.joinpath('src/index.json'), 'w', encoding='utf-8') as f: | ||
json.dump(index, f, indent=4) | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
name: Build index | ||
|
||
on: | ||
workflow_dispatch: | ||
push: | ||
branches: | ||
- main | ||
paths: | ||
- 'catalog/**.yaml' | ||
|
||
permissions: | ||
contents: write | ||
|
||
jobs: | ||
build-index: | ||
runs-on: ubuntu-latest | ||
if: github.event.repository.fork == false | ||
|
||
steps: | ||
- name: Checkout main | ||
uses: actions/checkout@v4 | ||
with: | ||
fetch-depth: 2 | ||
ref: 'main' | ||
path: main | ||
|
||
- name: Checkout gh-pages | ||
uses: actions/checkout@v4 | ||
with: | ||
fetch-depth: 2 | ||
ref: 'gh-pages' | ||
path: gh-pages | ||
|
||
- name: Setup python | ||
uses: actions/setup-python@v5 | ||
with: | ||
python-version: '3.11' | ||
|
||
- name: Install yaml & jsonschema | ||
run: pip install pyyaml jsonschema | ||
|
||
- name: Build index | ||
run: python -B main/.github/scripts/build_index.py --build-branch main --deploy-branch gh-pages --folders-to-scan datasets | ||
|
||
- name: Get last commit message - main | ||
id: last-commit-message-main | ||
run: echo "msg=$(git -C main log -1 --pretty=%s)" >> $GITHUB_OUTPUT | ||
|
||
- name: Get last commit message - gh-pages | ||
id: last-commit-message-gh-pages | ||
run: echo "msg=$(git -C gh-pages log -1 --pretty=%s)" >> $GITHUB_OUTPUT | ||
|
||
- name: Commit - main | ||
uses: stefanzweifel/git-auto-commit-action@v5 | ||
with: | ||
commit_message: ${{ steps.last-commit-message-main.outputs.msg }} | ||
commit_options: '--amend --no-edit' | ||
file_pattern: '*.json catalog/**.yaml' | ||
push_options: '--force' | ||
skip_fetch: true | ||
repository: main | ||
|
||
- name: Commit - gh-pages | ||
uses: stefanzweifel/git-auto-commit-action@v5 | ||
with: | ||
commit_message: ${{ steps.last-commit-message-gh-pages.outputs.msg }} | ||
commit_options: '--amend --no-edit' | ||
file_pattern: '*.json' | ||
push_options: '--force' | ||
skip_fetch: true | ||
repository: gh-pages |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
schema_version: "1.0.0" | ||
schema_type: "dataset" | ||
dataset_id: "0001-pneuma-dataset" | ||
title: "pNEUMA Dataset" | ||
doi: "10.5281/zenodo.7426506" | ||
version: "v1" | ||
description: "pNEUMA is an open large-scale dataset of naturalistic trajectories of half a million vehicles that have been collected by a one-of-a-kind experiment by a swarm of drones in the congested downtown area of Athens, Greece." | ||
created_at: "2022-12-08T16:26:11Z" | ||
updated_at: "2022-12-08T16:26:11Z" | ||
data_format: "zip" | ||
tags: | ||
- "traffic" | ||
- "drone" | ||
source: "Zenodo" | ||
authors: | ||
name: "Kim, Sohyeong" | ||
orcid: "0000-0000-0000-0000" | ||
license: "CC BY 4.0" | ||
access_url: "https://zenodo.org/record/7426506/" | ||
documentation_url: "https://zenodo.org/record/7426506" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
schema_version: "1.0.0" | ||
schema_type: "dataset" | ||
dataset_id: "0002-pneuma-vision-dataset" | ||
title: "pNEUMA vision dataset" | ||
doi: "10.5281/zenodo.10491409" | ||
version: "v1" | ||
description: "The pNEUMA dataset is the drone traffic imagery dataset that contains images of frame and vehicle annotations as positions. This dataset is the expansion of the pNEUMA, the urban trajectory dataset collected by swarms of drones in Athens. For more details about pNEUMA and pNEUMA Vision, please check our website at https://open-traffic.epfl.ch and github." | ||
created_at: "2023-08-30T15:12:56Z" | ||
updated_at: "2023-08-30T15:12:56Z" | ||
data_format: "csv" | ||
tags: | ||
- "traffic" | ||
- "GNSS" | ||
- "urban mobility" | ||
- "traffic safety" | ||
source: "Zenodo" | ||
authors: | ||
name: "Barmpounakis, Emmanouil" | ||
orcid: "0000-0000-0000-0000" | ||
license: "CC BY 4.0" | ||
access_url: "https://zenodo.org/record/10491409" | ||
documentation_url: "https://zenodo.org/record/10491409" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,92 +1,49 @@ | ||
{ | ||
"index_timestamp": "2024-08-30T12:00:00Z", | ||
"catalog": { | ||
"datasets": [ | ||
{ | ||
"version": "1.0.0", | ||
"dataset_id": "123e4567-e89b-12d3-a456-426614174000", | ||
"title": "Global Climate Data 2023", | ||
"doi": "10.1234/global-climate-2023", | ||
"description": "This dataset contains global climate data for the year 2023, including temperature, precipitation, and atmospheric pressure readings from various regions around the world.", | ||
"created_at": "2023-01-01T12:00:00Z", | ||
"updated_at": "2023-06-15T12:00:00Z", | ||
"data_format": "CSV", | ||
"tags": ["climate", "temperature", "precipitation", "atmospheric pressure"], | ||
"source": "National Meteorological Organization", | ||
"authors": { | ||
"name": "Dr. Jane Doe", | ||
"orcid": "0000-0002-1825-0097" | ||
}, | ||
"license": "CC BY 4.0", | ||
"access_url": "https://example.com/datasets/global-climate-2023", | ||
"documentation_url": "https://example.com/datasets/global-climate-2023/documentation", | ||
"access_endpoint": "https://api.example.com/climate/2023/data", | ||
"documentation_endpoint": "https://api.example.com/climate/2023/docs" | ||
"index_timestamp": "2024-08-30T12:00:00Z", | ||
"catalog": { | ||
"datasets": [ | ||
{ | ||
"schema_version": "1.0.0", | ||
"schema_type": "dataset", | ||
"dataset_id": "0001-pneuma-dataset", | ||
"title": "pNEUMA Dataset", | ||
"doi": "10.5281/zenodo.7426506", | ||
"version": "v1", | ||
"description": "pNEUMA is an open large-scale dataset of naturalistic trajectories of half a million vehicles that have been collected by a one-of-a-kind experiment by a swarm of drones in the congested downtown area of Athens, Greece. ", | ||
"created_at": "2022-12-08T16:26:11Z", | ||
"updated_at": "2022-12-08T16:26:11Z", | ||
"data_format": "zip", | ||
"tags": ["traffic", "drone"], | ||
"source": "Zenodo", | ||
"authors": { | ||
"name": "Kim, Sohyeong", | ||
"orcid": "0000-0000-0000-0000" | ||
}, | ||
{ | ||
"version": "1.0.0", | ||
"dataset_id": "223e4567-e89b-12d3-a456-426614174111", | ||
"title": "Regional Climate Data 2022", | ||
"doi": "10.1234/regional-climate-2022", | ||
"description": "This dataset provides detailed climate data for specific regions for the year 2022.", | ||
"created_at": "2022-01-01T12:00:00Z", | ||
"updated_at": "2022-12-15T12:00:00Z", | ||
"data_format": "JSON", | ||
"tags": ["climate", "regional data", "temperature"], | ||
"source": "Regional Weather Stations", | ||
"authors": { | ||
"name": "Dr. John Smith", | ||
"orcid": "0000-0002-3456-7890" | ||
}, | ||
"license": "MIT", | ||
"access_url": "https://example.com/datasets/regional-climate-2022", | ||
"documentation_url": "https://example.com/datasets/regional-climate-2022/documentation", | ||
"access_endpoint": "https://api.example.com/climate/2022/data", | ||
"documentation_endpoint": "https://api.example.com/climate/2022/docs" | ||
"license": "CC BY 4.0", | ||
"access_url": "https://zenodo.org/record/7426506/", | ||
"documentation_url": "https://zenodo.org/record/7426506" | ||
}, | ||
{ | ||
"schema_version": "1.0.0", | ||
"schema_type": "dataset", | ||
"dataset_id": "0002-pneuma-vision-dataset", | ||
"title": "pNEUMA vision dataset", | ||
"doi": "10.5281/zenodo.10491409", | ||
"version": "v1", | ||
"description": "The pNEUMA dataset is the drone traffic imagery dataset that contains images of frame and vehicle annotations as positions. This dataset is the expansion of the pNEUMA, the urban trajectory dataset collected by swarms of drones in Athens. For more details about pNEUMA and pNEUMA Vision, please check our website at https://open-traffic.epfl.ch and github. ", | ||
"created_at": "2023-08-30T15:12:56Z", | ||
"updated_at": "2023-08-30T15:12:56Z", | ||
"data_format": "csv", | ||
"tags": ["traffic", "GNSS", "urban mobility", "traffic safety"], | ||
"source": "Zenodo", | ||
"authors": { | ||
"name": "Barmpounakis, Emmanouil", | ||
"orcid": "0000-0000-0000-0000" | ||
}, | ||
{ | ||
"version": "1.0.0", | ||
"dataset_id": "323e4567-e89b-12d3-a456-426614174222", | ||
"title": "Global Economic Indicators 2023", | ||
"doi": "10.1234/global-economic-2023", | ||
"description": "This dataset includes global economic indicators such as GDP, inflation rates, and unemployment figures for the year 2023.", | ||
"created_at": "2023-02-01T12:00:00Z", | ||
"updated_at": "2023-07-01T12:00:00Z", | ||
"data_format": "Parquet", | ||
"tags": ["economy", "GDP", "inflation", "unemployment"], | ||
"source": "World Economic Forum", | ||
"authors": { | ||
"name": "Dr. Emily Johnson", | ||
"orcid": "0000-0003-1234-5678" | ||
}, | ||
"license": "CC BY-SA 4.0", | ||
"access_url": "https://example.com/datasets/global-economic-2023", | ||
"documentation_url": "https://example.com/datasets/global-economic-2023/documentation", | ||
"access_endpoint": "https://api.example.com/economy/2023/data", | ||
"documentation_endpoint": "https://api.example.com/economy/2023/docs" | ||
}, | ||
{ | ||
"version": "1.0.0", | ||
"dataset_id": "423e4567-e89b-12d3-a456-426614174333", | ||
"title": "Regional Economic Data 2022", | ||
"doi": "10.1234/regional-economic-2022", | ||
"description": "This dataset provides economic data for various regions in 2022, including GDP and unemployment rates.", | ||
"created_at": "2022-03-01T12:00:00Z", | ||
"updated_at": "2022-09-01T12:00:00Z", | ||
"data_format": "CSV", | ||
"tags": ["economy", "regional", "GDP", "unemployment"], | ||
"source": "Regional Economic Offices", | ||
"authors": { | ||
"name": "Dr. Michael Lee", | ||
"orcid": "0000-0004-5678-9101" | ||
}, | ||
"license": "Apache 2.0", | ||
"access_url": "https://example.com/datasets/regional-economic-2022", | ||
"documentation_url": "https://example.com/datasets/regional-economic-2022/documentation", | ||
"access_endpoint": "https://api.example.com/economy/2022/data", | ||
"documentation_endpoint": "https://api.example.com/economy/2022/docs" | ||
} | ||
] | ||
} | ||
"license": "CC BY 4.0", | ||
"access_url": "https://zenodo.org/record/10491409", | ||
"documentation_url": "https://zenodo.org/record/10491409" | ||
} | ||
] | ||
} | ||
} |