Skip to content

Commit

Permalink
ref: remove backup from S3 to block storage code
Browse files Browse the repository at this point in the history
  • Loading branch information
paulmueller committed May 14, 2024
1 parent 3a3dbe8 commit 268b0ad
Show file tree
Hide file tree
Showing 5 changed files with 4 additions and 96 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
0.14.1
- ref: remove backup from S3 to block storage code
(we now have backup of S3 data)
0.14.0
- BREANKING: remove `depotize` and `internal` submodules and functionalities
- feat: introduce job for backing up data from S3 to local block storage
Expand Down
4 changes: 0 additions & 4 deletions ckanext/dcor_depot/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,10 +260,6 @@ def run_jobs_dcor_depot(modified_days=-1):
if jobs.migrate_resource_to_s3_job(resource=res_dict):
click_echo(f"Migrated to S3 {resource.name}", nl)
nl = True
if jobs.backup_resource_from_s3_to_block_storage_job(
resource=res_dict):
click_echo(f"Backed up {resource.name} from S3", nl)
nl = True
except KeyboardInterrupt:
raise
except BaseException as e:
Expand Down
42 changes: 0 additions & 42 deletions ckanext/dcor_depot/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,48 +20,6 @@ def admin_context():
return {'ignore_auth': True, 'user': 'default'}


def backup_resource_from_s3_to_block_storage_job(resource):
"""Copy resources from S3 to local block storage
As long as we do not have a backup strategy for S3, make sure
that there is a copy of each resource either in the "resources"
directory or in the "dcor_object_store.local_backup_location"
directory.
TODO: remove this method once we have a backup strategy for S3.
"""
rid = resource["id"]
# Make sure the resource is available for processing
wait_for_resource(rid)

# Check the legacy local resource
path_legacy = get_resource_path(rid)
if not path_legacy.exists():
# Check the local backup directory
backup_loc = get_ckan_config_option(
"dcor_object_store.local_backup_location")
if backup_loc is not None:
# We have this variable defined which means we can back up to it
path_bu = pathlib.Path(backup_loc) / rid[:3] / rid[3:6] / rid[6:]
if not path_bu.exists():
path_bu.parent.mkdir(parents=True, exist_ok=True)
# set up a temporary download file path
path_tmp = path_bu.with_name(path_bu.name + "_temp")
path_tmp.unlink(missing_ok=True)
if s3.is_available():
# perform the download from s3
s3_client, _, _ = s3.get_s3()
bucket_name, object_name = \
s3cc.get_s3_bucket_object_for_artifact(rid)
s3_client.download_file(
bucket_name, object_name, str(path_tmp))
# if we got here, then everything went fine
path_tmp.rename(path_bu)
return path_bu

return False


def patch_resource_noauth(package_id, resource_id, data_dict):
"""Patch a resource using package_revise"""
package_revise = logic.get_action("package_revise")
Expand Down
23 changes: 1 addition & 22 deletions ckanext/dcor_depot/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,7 @@
from dcor_shared import s3, s3cc

from .cli import get_commands
from .jobs import (
symlink_user_dataset_job, migrate_resource_to_s3_job,
backup_resource_from_s3_to_block_storage_job
)
from .jobs import symlink_user_dataset_job, migrate_resource_to_s3_job


class DCORDepotPlugin(plugins.SingletonPlugin):
Expand Down Expand Up @@ -78,21 +75,3 @@ def after_resource_create(self, context, resource):
]}
)

jid_backup_s3 = pkg_job_id + "backups3"
if not Job.exists(jid_backup_s3, connection=ckan_jobs_connect()):
toolkit.enqueue_job(
backup_resource_from_s3_to_block_storage_job,
[resource],
title="Backup resource from S3 object store locally",
queue="dcor-normal",
rq_kwargs={"timeout": 3600,
"job_id": jid_backup_s3,
"depends_on": [
# general requirement
jid_symlink,
# requires SHA256 check
pkg_job_id + "sha256",
# just for the sake of sanity
jid_migrate_s3,
]}
)
28 changes: 0 additions & 28 deletions ckanext/dcor_depot/tests/test_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,34 +28,6 @@
data_path = pathlib.Path(__file__).parent / "data"


@pytest.mark.ckan_config('ckan.plugins', 'dcor_depot dcor_schemas')
@pytest.mark.usefixtures('clean_db', 'with_request_context')
@mock.patch('ckan.plugins.toolkit.enqueue_job',
side_effect=synchronous_enqueue_job)
def test_backup_resource_from_s3_to_block_storage(
enqueue_job_mock, monkeypatch, ckan_config, tmpdir):
monkeypatch.setitem(ckan_config, 'ckan.storage_path', str(tmpdir))
monkeypatch.setattr(ckan.lib.uploader,
'get_storage_path',
lambda: str(tmpdir))

# Create a dataset via the S3 route
ds_dict, res_dict = make_dataset_via_s3(
resource_path=data_path / "calibration_beads_47.rtdc",
activate=True,
private=True
)

# After all background jobs are run, the resource should show up
# in the local directory tree.
backup_loc = pathlib.Path(dcor_shared.get_ckan_config_option(
"dcor_object_store.local_backup_location"))
rid = res_dict["id"]
path_bu = backup_loc / rid[:3] / rid[3:6] / rid[6:]
assert dcor_shared.sha256sum(path_bu) == dcor_shared.sha256sum(
data_path / "calibration_beads_47.rtdc")


@pytest.mark.ckan_config('ckan.plugins', 'dcor_depot dcor_schemas')
@pytest.mark.usefixtures('clean_db', 'with_request_context')
@mock.patch('ckan.plugins.toolkit.enqueue_job',
Expand Down

0 comments on commit 268b0ad

Please sign in to comment.