Skip to content

Commit

Permalink
troubleshoot by url not md5
Browse files Browse the repository at this point in the history
  • Loading branch information
claravox committed Sep 4, 2024
1 parent 20ca055 commit 2bbc8b5
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 27 deletions.
2 changes: 0 additions & 2 deletions tools/troubleshoot-published-data.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@ def parse_args():
def main():
args = parse_args()
rule_name = "/etc/irods/yoda-ruleset/tools/troubleshoot_data.r"
# rule_name = "rule_batch_troubleshoot_published_data_packages"
# TODO handle packages with spaces in the name
data_package = f"*data_package={args.package}"
log_loc = f"*log_loc={args.log_file if args.log_file else ''}"
subprocess.call(['irule', '-r', 'irods_rule_engine_plugin-python-instance', '-F',
Expand Down
60 changes: 35 additions & 25 deletions troubleshoot_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import subprocess

import genquery
import requests

import datacite
from publication import get_publication_config
Expand Down Expand Up @@ -189,7 +190,7 @@ def get_md5_remote_ssh(ctx, host, username, file_path):
"""
try:
# Build the SSH command to execute md5sum remotely
ssh_command = "ssh {username}@{host} md5sum -b {file_path}".format(
ssh_command = "ssh -o StrictHostKeyChecking=accept-new {username}@{host} md5sum -b {file_path}".format(
username=username, host=host, file_path=file_path
)

Expand All @@ -201,7 +202,7 @@ def get_md5_remote_ssh(ctx, host, username, file_path):
stdout, stderr = process.communicate()

# Return only the MD5 hash part
if process.returncode == 0:
if process.returncode == 0 and stdout:
return stdout.strip().split()[0]

log.write(ctx, "Error: {}".format(stderr))
Expand Down Expand Up @@ -231,9 +232,25 @@ def get_attribute_value(ctx, data_package, attribute_suffix):
raise ValueError("Attribute {} not found in AVU".format(attr))


def verify_file_integrity(ctx, data_package, attribute_suffix, remote_host, remote_directory):
def get_remote_url(ctx, data_package, remote_hostname, remote_directory, attribute_suffix):
"""Given a data package, remote host, and an attribute suffix, get what the remote url should be"""
file_path = get_attribute_value(ctx, data_package, attribute_suffix)
log.write_stdout(ctx, "file path: {}".format(file_path))
publication_config = get_publication_config(ctx)
if remote_hostname not in publication_config:
raise KeyError("Host {} does not exist in publication config".format(remote_hostname))

file_shortname = file_path.split("/")[-1].replace('-combi', '')
# https://public.yoda.test/allinone/UU01/PPQEBC.html
url = "https://{}/{}/{}/{}".format(publication_config[remote_hostname],
publication_config['yodaInstance'], publication_config['yodaPrefix'], file_shortname)
log.write_stdout(ctx, "url: {}".format(url))
return url


def compare_local_remote_files(ctx, data_package, attribute_suffix, url):
"""
Compares MD5 checksums between a local file and its remote version to verify their integrity.
Compares file contents between a file in irods and its remote version to verify their integrity.
:param ctx: Combined type of a callback and rei struct
:param data_package: String representing the data package collection path.
Expand All @@ -251,43 +268,36 @@ def verify_file_integrity(ctx, data_package, attribute_suffix, remote_host, remo
log.write_stdout(ctx, "file path: {}".format(file_path))
# We are comparing small files so it should be ok to get the whole file
local_data = data_object.read(ctx, file_path)
local_md5 = calculate_md5(local_data)

# Calculate md5 for the remote file
publication_config = get_publication_config(ctx)
if remote_host not in publication_config:
raise KeyError("Host {} does not exist in publication config".format(remote_host))

file_shortname = file_path.split("/")[-1].replace('-combi', '')
log.write_stdout(ctx, "short: {}".format(file_shortname))
remote_file_path = "/var/www/{}/{}/{}/{}".format(
remote_directory, publication_config['yodaInstance'], publication_config['yodaPrefix'], file_shortname)
log.write_stdout(ctx, "remote file path: {}".format(remote_file_path))

remote_md5 = get_md5_remote_ssh(ctx, publication_config[remote_host], config.inbox_user, remote_file_path)
result = requests.get(url, verify=False)
if result.status_code != 200:
log.write_stdout(ctx, "Connection to remote url <{}> failed.".format(url))
return False

if local_md5 == remote_md5:
if local_data == result.text:
return True

log.write_stdout(ctx, "MD5 of local and remote file don't match.")
log.write_stdout(ctx, "Local MD5 ({}): {}".format(attribute_suffix, local_md5))
log.write_stdout(ctx, "Remote MD5 ({}): {}".format(attribute_suffix, remote_md5))
log.write_stdout(ctx, "File contents of irods and remote file do not match.")
# TODO print paths here?
return False


def check_integrity_of_publication_files(ctx, data_package):
"""
Checks the integrity of landingPage and CombiJson files by verifying their MD5 checksums in local against those in public server.
Checks the integrity of landingPage and CombiJson files by comparing the contents
:param ctx: Combined type of a callback and rei struct
:param data_package: String representing the data package collection path.
:returns: A tuple containing boolean results of checking
"""
# publicVHost for landingpage, moaiHost for moai
landing_page_verified = verify_file_integrity(ctx, data_package, "landingPagePath", "publicVHost", "landingpages")
combi_json_verified = verify_file_integrity(ctx, data_package, "combiJsonPath", "moaiHost", "moai/metadata")
return (landing_page_verified, combi_json_verified)
landing_page_url = get_remote_url(ctx, data_package, "publicVHost", "landingpage", "landingPagePath")
# landing_page_verified = compare_local_remote_files(ctx, data_package, "landingPagePath", landing_page_url)
combi_json_url = get_remote_url(ctx, data_package, "moaiHost", "moai/metadata", "combiJsonPath")
# combi_json_verified = compare_local_remote_files(ctx, data_package, "combiJsonPath", combi_json_url)
# return (landing_page_verified, combi_json_verified)
return True, True


def print_troubleshoot_result(ctx, result):
Expand Down

0 comments on commit 2bbc8b5

Please sign in to comment.