Skip to content

Commit

Permalink
Add upload_fluxiae_to_pilotage command and cron schedule
Browse files Browse the repository at this point in the history
Added pilotage_s3_client configuration with new environment variables

Fix prevent globbing in bash script

Restore populate_metabase_fluxiae in the import-iae.sh script

Remove PILOTAGE_S3_FLUX_IAE_OBJECT_KEY setting

Replace tdqm in upload command

Rename upload_data_to_pilotage command

Fix asp_riae_shared_bucket directory

Added upload_to_pilotage.sh

Remove manual upload_data_to_pilotage usage

Fix double-quoting when referencing ROOT in bash script
  • Loading branch information
calummackervoy committed Dec 12, 2024
1 parent 2c04ca2 commit 513ba00
Show file tree
Hide file tree
Showing 6 changed files with 87 additions and 0 deletions.
1 change: 1 addition & 0 deletions clevercloud/cron.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
"0 0 * * 1 $ROOT/clevercloud/run_management_command.sh shorten_active_sessions",
"0 2 * * 1 $ROOT/clevercloud/crons/populate_metabase_matomo.sh",
"0 12 * * 1 $ROOT/clevercloud/run_management_command.sh import_ea_eatt --from-asp --wet-run",
"0 12 * * 1 $ROOT/clevercloud/upload_to_pilotage.sh",

"0 0 1 * * $ROOT/clevercloud/run_management_command.sh delete_old_emails --wet-run",
"0 0 1 * * $ROOT/clevercloud/run_management_command.sh sync_cities --wet-run",
Expand Down
13 changes: 13 additions & 0 deletions clevercloud/upload_to_pilotage.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash -l

cd "$APP_HOME" || exit

FLUX_IAE_FILE_GLOB='fluxIAE_*.tar.gz'

FLUX_IAE_FILE=$(find asp_riae_shared_bucket/ -name "$FLUX_IAE_FILE_GLOB" -type f -mtime -5)
if [[ ! -f "$FLUX_IAE_FILE" ]]; then
echo "Missing the flux IAE file."
exit 0
fi

/bin/bash "$ROOT"/clevercloud/run_management_command.sh upload_data_to_pilotage "$FLUX_IAE_FILE"
7 changes: 7 additions & 0 deletions config/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,6 +594,13 @@
AWS_S3_ACCESS_KEY_ID = os.getenv("CELLAR_ADDON_KEY_ID")
AWS_S3_SECRET_ACCESS_KEY = os.getenv("CELLAR_ADDON_KEY_SECRET")
AWS_STORAGE_BUCKET_NAME = os.getenv("S3_STORAGE_BUCKET_NAME")

# S3 store for communicating with the Pilotage.
PILOTAGE_DATASTORE_S3_ENDPOINT_URL = os.getenv("PILOTAGE_DATASTORE_S3_ENDPOINT_URL")
PILOTAGE_DATASTORE_S3_ACCESS_KEY = os.getenv("PILOTAGE_DATASTORE_S3_ACCESS_KEY")
PILOTAGE_DATASTORE_S3_SECRET_KEY = os.getenv("PILOTAGE_DATASTORE_S3_SECRET_KEY")
PILOTAGE_DATASTORE_S3_BUCKET_NAME = os.getenv("PILOTAGE_DATASTORE_S3_BUCKET_NAME")

# The maximum amount of memory (in bytes) a file can take up before being rolled over into a temporary file on disk.
# Picked 5 MB, the max size for a resume. Keep it fast for files under that size, and avoid filling up the RAM.
AWS_S3_MAX_MEMORY_SIZE = 5 * 1024 * 1024
Expand Down
5 changes: 5 additions & 0 deletions config/settings/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@
AWS_S3_SECRET_ACCESS_KEY = "minioadmin"
AWS_STORAGE_BUCKET_NAME = "tests"

PILOTAGE_DATASTORE_S3_ENDPOINT_URL = AWS_S3_ENDPOINT_URL
PILOTAGE_DATASTORE_S3_ACCESS_KEY = AWS_S3_ACCESS_KEY_ID
PILOTAGE_DATASTORE_S3_SECRET_KEY = AWS_S3_SECRET_ACCESS_KEY
PILOTAGE_DATASTORE_S3_BUCKET_NAME = AWS_STORAGE_BUCKET_NAME

API_DATADOG_API_KEY = "abcde"
API_DATADOG_APPLICATION_KEY = "fghij"

Expand Down
51 changes: 51 additions & 0 deletions itou/metabase/management/commands/upload_data_to_pilotage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""
The FluxIAE file contains data used by les emplois and is uploaded to us directly by a supporting organization.
The same file is also parsed by the Pilotage, shared via an S3 bucket.
This command uploads the file from where it has been stored to the S3 bucket for sharing.
"""

import os

from django.conf import settings
from django.core.management.base import CommandError

from itou.utils.command import BaseCommand
from itou.utils.storage.s3 import pilotage_s3_client


class Command(BaseCommand):
help = "Upload FluxIAE to S3 for sharing."

def add_arguments(self, parser):
parser.add_argument("filename", type=str, help="The name of the FluxIAE import file in the import directory")

def handle(self, filename, *args, **options):
# Ran from the home directory on production.
filepath = os.path.join("asp_riae_shared_bucket", filename)
# Confirm the file exists.
if not os.path.exists(filepath):
raise CommandError(f"For upload_data_to_pilotage to work, a file must exist at given file path {filepath}")

# Upload the data to the S3.
file_size = os.stat(filepath).st_size
bytes_transferred = 0
previous_progress = 0

def log_progress(chunk_size):
"""Logs to console or logs the progress of byte transfer"""
nonlocal bytes_transferred
nonlocal previous_progress

bytes_transferred += chunk_size
progress = int((bytes_transferred / file_size) * 100)
if progress != previous_progress:
print(f"{bytes_transferred}/{file_size} bytes transferred ({progress}%).")
previous_progress = progress

pilotage_s3_client().upload_file(
Filename=filepath,
Bucket=settings.PILOTAGE_DATASTORE_S3_BUCKET_NAME,
Key=filename,
Callback=lambda chunk_size: log_progress(chunk_size),
)
10 changes: 10 additions & 0 deletions itou/utils/storage/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,16 @@ def s3_client():
)


def pilotage_s3_client():
"""There is an S3 bucket dedicated to sharing files with Pilotage"""
return boto3.client(
"s3",
endpoint_url=settings.PILOTAGE_DATASTORE_S3_ENDPOINT_URL,
aws_access_key_id=settings.PILOTAGE_DATASTORE_S3_ACCESS_KEY,
aws_secret_access_key=settings.PILOTAGE_DATASTORE_S3_SECRET_KEY,
)


class PublicStorage(S3Boto3Storage):
# Not using the S3StaticStorage backend to ensure the listdir() operation remains forbidden.
# Don’t sign URLs, objects are public.
Expand Down

0 comments on commit 513ba00

Please sign in to comment.