diff --git a/__init__.py b/__init__.py
index 722df1424..90707339d 100644
--- a/__init__.py
+++ b/__init__.py
@@ -26,31 +26,32 @@
 # Import all modules containing rules into the package namespace,
 # so that they become visible to iRODS.
 
-from admin                  import *
-from browse                 import *
-from folder                 import *
-from groups                 import *
-from json_datacite          import *
-from json_landing_page      import *
-from mail                   import *
-from meta                   import *
-from meta_form              import *
-from provenance             import *
-from research               import *
-from resources              import *
-from schema                 import *
-from schema_transformation  import *
-from schema_transformations import *
-from vault                  import *
-from datacite               import *
-from epic                   import *
-from publication            import *
-from policies               import *
-from replication            import *
-from revisions              import *
-from settings               import *
-from notifications          import *
-from integration_tests      import *
+from admin                    import *
+from browse                   import *
+from folder                   import *
+from groups                   import *
+from json_datacite            import *
+from json_landing_page        import *
+from mail                     import *
+from meta                     import *
+from meta_form                import *
+from provenance               import *
+from research                 import *
+from resources                import *
+from schema                   import *
+from schema_transformation    import *
+from schema_transformations   import *
+from publication_troubleshoot import *
+from vault                    import *
+from datacite                 import *
+from epic                     import *
+from publication              import *
+from policies                 import *
+from replication              import *
+from revisions                import *
+from settings                 import *
+from notifications            import *
+from integration_tests        import *
 
 # Import certain modules only when enabled.
 from .util.config import config
diff --git a/integration_tests.py b/integration_tests.py
index f345b5c58..f92f4c0dd 100644
--- a/integration_tests.py
+++ b/integration_tests.py
@@ -117,6 +117,27 @@ def _test_avu_rmw_collection(ctx, rmw_attributes):
     return result
 
 
+def _test_avu_get_attr_val_of_coll(ctx, attr, value):
+    # Test getting the value of an attribute on a collection
+    tmp_coll = _create_tmp_collection(ctx)
+    ctx.msi_add_avu('-c', tmp_coll, attr, value, "baz")
+    result = avu.get_attr_val_of_coll(ctx, tmp_coll, attr)
+    collection.remove(ctx, tmp_coll)
+    return result
+
+
+def _test_avu_get_attr_val_of_coll_exception(ctx):
+    # Test that getting a non existing attribute on a collection raises an exception (True for exception raised)
+    tmp_coll = _create_tmp_collection(ctx)
+    result = False
+    try:
+        result = avu.get_attr_val_of_coll(ctx, tmp_coll, "foo")
+    except Exception:
+        result = True
+    collection.remove(ctx, tmp_coll)
+    return result
+
+
 def _test_folder_set_retry_avus(ctx):
     tmp_coll = _create_tmp_collection(ctx)
     folder.folder_secure_set_retry_avus(ctx, tmp_coll, 2)
@@ -482,6 +503,12 @@ def _test_folder_secure_func(ctx, func):
      "check": lambda x: (("aap", "noot", "mies") in x
                          and len([a for a in x if a[0] not in ["org_replication_scheduled"]]) == 1
                          )},
+    {"name": "avu.get_attr_val_of_coll.exists.yes",
+     "test": lambda ctx: _test_avu_get_attr_val_of_coll(ctx, "foo", "bar"),
+     "check": lambda x: x == "bar"},
+    {"name": "avu.get_attr_val_of_coll.exists.no",
+     "test": lambda ctx: _test_avu_get_attr_val_of_coll_exception(ctx),
+     "check": lambda x: x},
     {"name": "avu.apply_atomic_operations.collection",
      "test": lambda ctx: _test_msvc_apply_atomic_operations_collection(ctx),
      "check": lambda x: (("foo", "bar", "baz") in x and len(x) == 1)},
diff --git a/meta.py b/meta.py
index 887401958..797162ec5 100644
--- a/meta.py
+++ b/meta.py
@@ -13,6 +13,7 @@
 import irods_types
 from deepdiff import DeepDiff
 
+import meta_form
 import provenance
 import publication
 import schema as schema_
@@ -790,3 +791,50 @@ def copy_user_metadata(ctx, source, target):
         log.write(ctx, "copy_user_metadata: copied user metadata from <{}> to <{}/original>".format(source, target))
     except Exception:
         log.write(ctx, "copy_user_metadata: failed to copy user metadata from <{}> to <{}/original>".format(source, target))
+
+
+def vault_metadata_matches_schema(ctx, coll_name, schema_cache, report_name, write_stdout):
+    """Process a single data package to retrieve and validate that its metadata conforms to the schema.
+
+    :param ctx:          Combined type of a callback and rei struct
+    :param coll_name:    String representing the data package collection path.
+    :param schema_cache: Dictionary storing schema blueprints, can be empty.
+    :param report_name:  Name of report script (for logging)
+    :param write_stdout: A boolean representing whether to write to stdout or rodsLog
+
+    :returns:            A dictionary result containing if schema matches and the schema short name.
+    """
+    metadata_path = get_latest_vault_metadata_path(ctx, coll_name)
+
+    if not metadata_path:
+        log.write(ctx, "{} skips {}, because metadata could not be found.".format(report_name, coll_name), write_stdout)
+        return None
+
+    try:
+        metadata = jsonutil.read(ctx, metadata_path)
+    except Exception as exc:
+        log.write(ctx, "{} skips {}, because of exception while reading metadata file {}: {}".format(report_name, coll_name, metadata_path, str(exc)), write_stdout)
+        log.write(ctx, "vault_metadata_matches_schema: Error while reading metadata file {} of data package {}: {}".format(metadata_path, coll_name, str(exc)), write_stdout)
+        return None
+
+    # Determine schema
+    schema_id = schema_.get_schema_id(ctx, metadata_path)
+    schema_shortname = schema_id.split("/")[-2]
+
+    # Retrieve schema and cache it for future use
+    schema_path = schema_.get_schema_path_by_id(ctx, metadata_path, schema_id)
+    if schema_shortname in schema_cache:
+        schema_contents = schema_cache[schema_shortname]
+    else:
+        schema_contents = jsonutil.read(ctx, schema_path)
+        schema_cache[schema_shortname] = schema_contents
+
+    # Check whether metadata matches schema and log any errors
+    error_list = get_json_metadata_errors(ctx, metadata_path, metadata=metadata, schema=schema_contents)
+    match_schema = len(error_list) == 0
+    if not match_schema:
+        errors_formatted = [meta_form.humanize_validation_error(e).encode('utf-8') for e in error_list]
+        log.write(ctx, "{}: metadata {} did not match schema {}: {}".format(report_name, metadata_path, schema_shortname, str(errors_formatted)), write_stdout)
+        log.write(ctx, "vault_metadata_matches_schema: Metadata {} of data package {} did not match the schema {}. Error list: {}".format(metadata_path, coll_name, schema_shortname, str(errors_formatted)), write_stdout)
+
+    return {"schema": schema_shortname, "match_schema": match_schema}
diff --git a/publication.py b/publication.py
index 47e7d3470..d97507197 100644
--- a/publication.py
+++ b/publication.py
@@ -1326,10 +1326,10 @@ def rule_update_publication(ctx, vault_package, update_datacite, update_landingp
     :param update_moai:        Flag that indicates updating MOAI (OAI-PMH)
     """
     if user.user_type(ctx) != 'rodsadmin':
-        log.write_stdout(ctx, "User is no rodsadmin")
+        log.write(ctx, "User is no rodsadmin", True)
         return
 
-    log.write_stdout(ctx, "[UPDATE PUBLICATIONS] Start for {}".format(vault_package))
+    log.write(ctx, "[UPDATE PUBLICATIONS] Start for {}".format(vault_package), True)
     collections = genquery.row_iterator(
         "COLL_NAME",
         "COLL_NAME like '%%/home/vault-%%' "
@@ -1345,12 +1345,12 @@ def rule_update_publication(ctx, vault_package, update_datacite, update_landingp
         if ((vault_package == '*' and re.match(r'/[^/]+/home/vault-.*', coll_name)) or (vault_package != '*' and re.match(r'/[^/]+/home/vault-.*', coll_name) and coll_name == vault_package)):
             packages_found = True
             output = update_publication(ctx, coll_name, update_datacite == 'Yes', update_landingpage == 'Yes', update_moai == 'Yes')
-            log.write_stdout(ctx, coll_name + ': ' + output)
+            log.write(ctx, coll_name + ': ' + output, True)
 
     if not packages_found:
-        log.write_stdout(ctx, "[UPDATE PUBLICATIONS] No packages found for {}".format(vault_package))
+        log.write(ctx, "[UPDATE PUBLICATIONS] No packages found for {}".format(vault_package), True)
     else:
-        log.write_stdout(ctx, "[UPDATE PUBLICATIONS] Finished for {}".format(vault_package))
+        log.write(ctx, "[UPDATE PUBLICATIONS] Finished for {}".format(vault_package), True)
 
 
 def update_publication(ctx, vault_package, update_datacite=False, update_landingpage=False, update_moai=False):
diff --git a/publication_troubleshoot.py b/publication_troubleshoot.py
new file mode 100644
index 000000000..8f948fcbb
--- /dev/null
+++ b/publication_troubleshoot.py
@@ -0,0 +1,440 @@
+# -*- coding: utf-8 -*-
+"""Functions and rules for troubleshooting published data packages."""
+
+__copyright__ = 'Copyright (c) 2024, Utrecht University'
+__license__   = 'GPLv3, see LICENSE'
+
+__all__ = [
+    'api_batch_troubleshoot_published_data_packages',
+    'rule_batch_troubleshoot_published_data_packages'
+]
+
+import json
+from datetime import datetime
+
+import genquery
+import requests
+import urllib3
+
+import datacite
+from meta import vault_metadata_matches_schema
+from publication import get_publication_config
+from util import *
+
+
+def find_full_package_path(ctx, package_name, write_stdout):
+    """
+    Find the full path of a data package based on its short name.
+
+    :param ctx:          Combined type of a callback and rei struct
+    :param package_name: The short name of the data package to find.
+    :param write_stdout: A boolean representing whether to write to stdout or rodsLog
+
+    :returns: The full path of the data package if found, otherwise None.
+    """
+    try:
+        query_condition = (
+            "COLL_NAME like '%{}%'".format(package_name)
+        )
+        query_attributes = "COLL_NAME"
+        iter = genquery.row_iterator(query_attributes, query_condition, genquery.AS_LIST, ctx)
+
+        # Return full package path if exists
+        for row in iter:
+            return row[0]
+    except Exception as e:
+        log.write(ctx, "find_full_package_path: An error occurred while executing the query: {}".format(e), write_stdout)
+        return None
+
+
+def find_data_packages(ctx, write_stdout):
+    """
+    Find all data packages in Retry, Unrecoverable and Unknown status by matching its AVU.
+
+    :param ctx:          Combined type of a callback and rei struct
+    :param write_stdout: A boolean representing whether to write to stdout or rodsLog
+
+    :returns:   A list of collection names that have not been processed successfully
+    """
+    user_zone = user.zone(ctx)
+
+    try:
+        # Get all the vault packages that have org_publication_status in metadata
+        query_condition = (
+            "COLL_NAME like '/{}/home/vault-%' AND "
+            "META_COLL_ATTR_NAME = '{}publication_status'".format(user_zone, constants.UUORGMETADATAPREFIX)
+        )
+        query_attributes = "COLL_NAME"
+        iter = genquery.row_iterator(query_attributes, query_condition, genquery.AS_LIST, ctx)
+
+        # Collecting only the collection names
+        return [row[0] for row in iter]
+
+    except Exception as e:
+        log.write(ctx, "find_data_packages: An error occurred while executing the query: {}".format(e), write_stdout)
+        return []
+
+
+def check_print_data_package_system_avus(ctx, data_package, write_stdout):
+    """
+    Checks whether a data package has the expected system AVUs that start with constants.UUORGMETADATAPREFIX (i.e, 'org_').
+    This function compares the AVUs of the provided data package against a set of ground truth AVUs derived from
+    a successfully published data package.
+    This also prints if there are any missing or unexpected results.
+
+    :param ctx:          Combined type of a callback and rei struct
+    :param data_package: String representing the data package collection path.
+    :param write_stdout: A boolean representing whether to write to stdout or rodsLog
+
+    :returns:            A 2-tuple containing boolean results of checking results
+    """
+    extracted_avus = avu.of_coll(ctx, data_package)
+    results = misc.check_data_package_system_avus(extracted_avus)
+
+    if not results["no_missing_avus"]:
+        log.write(ctx, "check_data_package_system_avus: There are some missing AVUs in data package <{}> - {}".format(data_package, list(results["missing_avus"])), write_stdout)
+
+    if not results["no_unexpected_avus"]:
+        log.write(ctx, "check_data_package_system_avus: There are some unexpected AVUs in data package <{}> - {}".format(data_package, list(results["unexpected_avus"])), write_stdout)
+
+    return (results["no_missing_avus"], results["no_unexpected_avus"])
+
+
+def check_one_datacite_doi_reg(ctx, data_package, doi_name, write_stdout):
+    try:
+        doi = get_val_for_attr_with_pub_prefix(ctx, data_package, doi_name)
+    except ValueError as e:
+        log.write(ctx, "check_datacite_doi_registration: Error while trying to get {} - {}".format(doi_name, e), write_stdout)
+        return False
+
+    status_code = datacite.metadata_get(ctx, doi)
+    return status_code == 200
+
+
+def check_datacite_doi_registration(ctx, data_package, write_stdout):
+    """
+    Check the registration status of both versionDOI and baseDOI with the DataCite API,
+    ensuring that both DOIs return a 200 status code, which indicates successful registration.
+
+    :param ctx:          Combined type of a callback and rei struct
+    :param data_package: String representing the data package collection path.
+    :param write_stdout: A boolean representing whether to write to stdout or rodsLog
+
+    :returns:            A tuple of booleans indicating check success or not (base doi check may be None if not relevant).
+    """
+    version_doi_check = check_one_datacite_doi_reg(ctx, data_package, "versionDOI", write_stdout)
+
+    previous_version = ''
+    try:
+        previous_version = get_val_for_attr_with_pub_prefix(ctx, data_package, "previous_version")
+    except Exception:
+        pass
+
+    if previous_version:
+        base_doi_check = check_one_datacite_doi_reg(ctx, data_package, "baseDOI", write_stdout)
+        return version_doi_check, base_doi_check
+
+    return (version_doi_check, None)
+
+
+def get_val_for_attr_with_pub_prefix(ctx, data_package, attribute_suffix):
+    """
+    Retrieves the value given the suffix of the attribute from a data package.
+
+    :param ctx:              Combined type of a callback and rei struct
+    :param data_package:     String representing the data package collection path.
+    :param attribute_suffix: Suffix of the attribute before adding prefix such as "org_publication_"
+
+    :returns:                Value of the attribute.
+    """
+    attr = constants.UUORGMETADATAPREFIX + "publication_" + attribute_suffix
+    return avu.get_attr_val_of_coll(ctx, data_package, attr)
+
+
+def get_landingpage_paths(ctx, data_package, write_stdout):
+    """Given a data package get what the path and remote url should be"""
+    file_path = ''
+    try:
+        file_path = get_val_for_attr_with_pub_prefix(ctx, data_package, "landingPagePath")
+        url = get_val_for_attr_with_pub_prefix(ctx, data_package, "landingPageUrl")
+        return file_path, url
+
+    except Exception:
+        log.write(ctx, "get_landingpage_paths: Could not find landing page for data package: {}".format(data_package), write_stdout)
+        return '', ''
+
+
+def compare_local_remote_landingpage(ctx, file_path, url, offline, api_call):
+    """
+    Compares file contents between a file in irods and its remote version to verify their integrity.
+
+    :param ctx:          Combined type of a callback and rei struct
+    :param file_path:    Path to file in irods
+    :param url:          URL of file on remote
+    :param offline:      Whether to skip requests.get call
+    :param api_call:     Boolean representing whether was called by api and not a script
+
+    :returns:         True if the file contents match, False otherwise
+    """
+    write_stdout = not api_call
+    # Local/irods file
+    if api_call:
+        # If called by technicaladmin, only check that the file exists since we don't have access to the contents
+        return data_object.exists(ctx, file_path)
+    else:
+        try:
+            local_data = data_object.read(ctx, file_path)
+        except Exception:
+            log.write(ctx, "compare_local_remote_landingpage: Local file not found at path {}.".format(file_path), write_stdout)
+            return False
+
+    if offline:
+        return len(local_data) > 0
+
+    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+    try:
+        response = requests.get(url, verify=False)
+    except requests.exceptions.ConnectionError as e:
+        log.write(ctx, "compare_local_remote_landingpage: Failed to connect to {}".format(url), write_stdout)
+        log.write(ctx, "compare_local_remote_landingpage: Error: {}".format(e), write_stdout)
+        return False
+
+    if response.status_code != 200:
+        log.write(ctx, "compare_local_remote_landingpage: Error {} when connecting to <{}>.".format(response.status_code, url), write_stdout)
+        return False
+
+    # Set encoding to utf-8 for the response text (otherwise will not match local_data)
+    response.encoding = 'utf-8'
+
+    if local_data == response.text:
+        return True
+
+    log.write(ctx, "compare_local_remote_landingpage: File contents at irods path <{}> and remote landing page <{}> do not match.".format(file_path, url), write_stdout)
+    return False
+
+
+def check_landingpage(ctx, data_package, offline, api_call):
+    """
+    Checks the integrity of landing page by comparing the contents
+
+    :param ctx:                Combined type of a callback and rei struct
+    :param data_package:       String representing the data package collection path.
+    :param offline:            Whether to skip any checks that require external server access
+    :param api_call:           Boolean of whether this is for an api call version of the troubleshooting script
+
+    :returns:                  A tuple containing boolean results of checking
+    """
+    irods_file_path, landing_page_url = get_landingpage_paths(ctx, data_package, not api_call)
+    if len(irods_file_path) == 0 or len(landing_page_url) == 0:
+        return False
+
+    return compare_local_remote_landingpage(ctx, irods_file_path, landing_page_url, offline, api_call)
+
+
+def check_combi_json(ctx, data_package, publication_config, offline, write_stdout):
+    """
+    Checks the integrity of combi JSON by checking URL and existence of file.
+
+    :param ctx:                Combined type of a callback and rei struct
+    :param data_package:       String representing the data package collection path.
+    :param publication_config: Dictionary of publication config
+    :param offline:            Whether to skip any checks that require external server access
+    :param write_stdout:       A boolean representing whether to write to stdout or rodsLog
+
+    :returns:                  A tuple containing boolean results of checking
+    """
+    # Check that the combi json in irods exists
+    file_path = ''
+    try:
+        file_path = get_val_for_attr_with_pub_prefix(ctx, data_package, "combiJsonPath")
+    except Exception:
+        pass
+    exists = data_object.exists(ctx, file_path)
+    if not exists:
+        log.write(ctx, "check_combi_json: combi JSON file in irods does not exist: {}".format(file_path), write_stdout)
+        return False
+
+    if offline:
+        return True
+
+    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+    # Get the version doi
+    version_doi = ''
+    try:
+        version_doi = get_val_for_attr_with_pub_prefix(ctx, data_package, "versionDOI")
+    except Exception:
+        pass
+    url = "https://{}/oai/oai?verb=GetRecord&metadataPrefix=oai_datacite&identifier=oai:{}".format(publication_config["publicVHost"], version_doi)
+    try:
+        response = requests.get(url, verify=False)
+    except requests.exceptions.ConnectionError as e:
+        log.write(ctx, "check_combi_json: Failed to connect to {}".format(url), write_stdout)
+        log.write(ctx, "check_combi_json: Error: {}".format(e), write_stdout)
+        return False
+
+    if response.status_code != 200:
+        log.write(ctx, "check_combi_json: Error {} when connecting to <{}>.".format(response.status_code, url), write_stdout)
+        return False
+
+    # Look at the first few parts of the response for signs of error.
+    if "idDoesNotExist" in response.text[:5000]:
+        log.write(ctx, "check_combi_json: combiJson not found in oai for data package <{}>".format(data_package), write_stdout)
+        return False
+
+    return True
+
+
+def print_troubleshoot_result(ctx, data_package, result, datacite_check):
+    """Print the result of troubleshooting one package in human-friendly format"""
+    pass_all_tests = all(result.values())
+
+    log.write(ctx, "Results for: {}".format(data_package), True)
+    if pass_all_tests:
+        log.write(ctx, "Package passed all tests.", True)
+    else:
+        log.write(ctx, "Package FAILED one or more tests:", True)
+        log.write(ctx, "Schema matches: {}".format(result['schema_check']), True)
+        log.write(ctx, "All expected AVUs exist: {}".format(result['no_missing_AVUs_check']), True)
+        log.write(ctx, "No unexpected AVUs: {}".format(result['no_unexpected_AVUs_check']), True)
+
+        if datacite_check:
+            log.write(ctx, "Version DOI matches: {}".format(result['versionDOI_check']), True)
+            if 'baseDOI_check' in result:
+                log.write(ctx, "Base DOI matches: {}".format(result['baseDOI_check']), True)
+
+        log.write(ctx, "Landing page matches: {}".format(result['landingPage_check']), True)
+        log.write(ctx, "Combined JSON matches: {}".format(result['combiJson_check']), True)
+
+    log.write(ctx, "", True)
+
+
+def collect_troubleshoot_data_packages(ctx, requested_package, write_stdout):
+    data_packages = []
+
+    if requested_package == 'None':
+        # Retrieve all data packages
+        all_packages = find_data_packages(ctx, write_stdout)
+        if not all_packages:
+            log.write(ctx, "collect_troubleshoot_data_packages: No packages found.", write_stdout)
+            return None
+
+        data_packages = all_packages
+    else:
+        # Get full path of the given package
+        full_package_path = find_full_package_path(ctx, requested_package, write_stdout)
+
+        if not full_package_path:
+            log.write(ctx, "collect_troubleshoot_data_packages: Data package '{}' cannot be found.".format(requested_package), write_stdout)
+            return None
+
+        data_packages.append(full_package_path)
+
+    return data_packages
+
+
+def batch_troubleshoot_published_data_packages(ctx, requested_package, log_file, offline, api_call, check_datacite):
+    """
+    Troubleshoots published data packages.
+
+    :param ctx:               Context that combines a callback and rei struct.
+    :param requested_package: A string representing a specific data package path or all packages with failed publications.
+    :param log_file:          A boolean representing to write results in log.
+    :param offline:           A boolean representing whether to perform all checks without connecting to external servers.
+    :param api_call:          Boolean of whether this is run by a script or api test.
+    :param check_datacite:    Boolean representing whether to do the datacite checks
+
+    :returns: A dictionary of dictionaries providing the results of the job.
+    """
+    write_stdout = not api_call
+    # Check permissions - rodsadmin only
+    if user.user_type(ctx) != 'rodsadmin':
+        log.write(ctx, "User is not rodsadmin", write_stdout)
+        return {}
+
+    data_packages = collect_troubleshoot_data_packages(ctx, requested_package, write_stdout)
+    if not data_packages:
+        return {}
+    schema_cache = {}
+    results = {}
+
+    # Troubleshooting
+    for data_package in data_packages:
+        log.write(ctx, "Troubleshooting data package: {}".format(data_package), write_stdout)
+        result = {}
+        # Cannot check the metadata as technicaladmin
+        if not api_call:
+            schema_check_dict = vault_metadata_matches_schema(ctx, data_package, schema_cache, "troubleshoot-publications", write_stdout)
+            result['schema_check'] = schema_check_dict['match_schema'] if schema_check_dict else False
+
+        result['no_missing_AVUs_check'], result['no_unexpected_AVUs_check'] = check_print_data_package_system_avus(ctx, data_package, write_stdout)
+
+        # Only check datacite if enabled
+        if check_datacite:
+            result['versionDOI_check'], base_doi_check = check_datacite_doi_registration(ctx, data_package, write_stdout)
+            if base_doi_check is not None:
+                result['baseDOI_check'] = base_doi_check
+
+        result['landingPage_check'] = check_landingpage(ctx, data_package, offline, api_call)
+        publication_config = get_publication_config(ctx)
+        result['combiJson_check'] = check_combi_json(ctx, data_package, publication_config, offline, write_stdout)
+
+        results[data_package] = result
+
+        if not api_call:
+            print_troubleshoot_result(ctx, data_package, result, check_datacite)
+
+        if log_file:
+            log_loc = "/var/lib/irods/log/troubleshoot_publications.log"
+            with open(log_loc, "a") as writer:
+                writer.writelines("Batch run date and time: {}".format(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
+                writer.writelines('\n')
+                writer.writelines("Troubleshooting data package: {}".format(data_package))
+                writer.writelines('\n')
+                json.dump(result, writer)
+                writer.writelines('\n')
+
+    return results
+
+
+@api.make()
+def api_batch_troubleshoot_published_data_packages(ctx, requested_package, log_file, offline):
+    """
+    Wrapper for the batch script for troubleshooting published data packages.
+    Runs a subset of the tests since "technicaladmin" is usually more restricted than "rods".
+
+    :param ctx:               Combined type of a callback and rei struct
+    :param requested_package: A string representing a specific data package path or all packages with failed publications.
+    :param log_file:          A boolean representing to write results in log.
+    :param offline:           A boolean representing whether to perform all checks without connecting to external servers.
+
+    :returns: A dictionary of dictionaries providing the results of the job.
+    """
+    return batch_troubleshoot_published_data_packages(ctx, requested_package, log_file, offline, True, False)
+
+
+@rule.make(inputs=[0, 1, 2, 3], outputs=[])
+def rule_batch_troubleshoot_published_data_packages(ctx, requested_package, log_file, offline, no_datacite):
+    """
+    Troubleshoots published data packages.
+
+    Prints results of the following checks:
+        1. Metadata schema compliance.
+        2. Presence and correctness of expected AVUs.
+        3. Registration with Data Cite.
+        4. File integrity of landing page and combi JSON files.
+
+    Operates on either a single specified package or all published packages, depending on the input.
+
+    :param ctx:               Context that combines a callback and rei struct.
+    :param requested_package: A string representing a specific data package path or all packages with failed publications.
+    :param log_file:          A string boolean representing to write results in log.
+    :param offline:           A string boolean representing whether to perform all checks without connecting to external servers.
+    :param no_datacite:       A string boolean representing whether to skip the datacite checks
+    """
+    offline = offline == "True"
+    log_file = log_file == "True"
+    check_datacite = no_datacite == "False"
+
+    batch_troubleshoot_published_data_packages(ctx, requested_package, log_file, offline, False, check_datacite)
diff --git a/schema_transformation.py b/schema_transformation.py
index d7f7cc947..77299ada3 100644
--- a/schema_transformation.py
+++ b/schema_transformation.py
@@ -19,7 +19,6 @@
 import session_vars
 
 import meta
-import meta_form
 import schema
 import schema_transformations
 from util import *
@@ -404,41 +403,13 @@ def rule_batch_vault_metadata_schema_report(ctx):
         genquery.AS_LIST, ctx)
 
     for row in iter:
-        coll_name = row[0]
-        metadata_path = meta.get_latest_vault_metadata_path(ctx, coll_name)
-
-        if metadata_path == '' or metadata_path is None:
-            log.write(ctx, "Vault metadata schema report skips %s, because metadata could not be found."
-                           % (coll_name))
-            continue
-
         try:
-            metadata = jsonutil.read(ctx, metadata_path)
-        except Exception as exc:
-            log.write(ctx, "Vault metadata report skips %s, because of exception while reading metadata file %s: %s."
-                           % (coll_name, metadata_path, str(exc)))
+            coll_name = row[0]
+            result = meta.vault_metadata_matches_schema(ctx, coll_name, schema_cache, "Vault metadata schema report", True)
+            if result:
+                results[coll_name] = result
+        except Exception as e:
+            log.write(ctx, "Error processing collection {}: {}".format(coll_name, str(e)))
             continue
 
-        # Determine schema
-        schema_id = schema.get_schema_id(ctx, metadata_path)
-        schema_shortname = schema_id.split("/")[-2]
-
-        # Retrieve schema and cache it for future use
-        schema_path = schema.get_schema_path_by_id(ctx, metadata_path, schema_id)
-        if schema_shortname in schema_cache:
-            schema_contents = schema_cache[schema_shortname]
-        else:
-            schema_contents = jsonutil.read(ctx, schema_path)
-            schema_cache[schema_shortname] = schema_contents
-
-        # Check whether metadata matches schema and log any errors
-        error_list = meta.get_json_metadata_errors(ctx, metadata_path, metadata=metadata, schema=schema_contents)
-        match_schema = len(error_list) == 0
-        if not match_schema:
-            log.write(ctx, "Vault metadata schema report: metadata %s did not match schema %s: %s" %
-                           (metadata_path, schema_shortname, str([meta_form.humanize_validation_error(e).encode('utf-8') for e in error_list])))
-
-        # Update results
-        results[coll_name] = {"schema": schema_shortname, "match_schema": match_schema}
-
     return json.dumps(results)
diff --git a/tests/features/api/api_vault.feature b/tests/features/api/api_vault.feature
index 0039a709b..4ed3d018b 100644
--- a/tests/features/api/api_vault.feature
+++ b/tests/features/api/api_vault.feature
@@ -98,6 +98,17 @@ Feature: Vault API
             | /tempZone/home/vault-default-2 |
             | /tempZone/home/vault-core-2    |
             | /tempZone/home/vault-default-3 |
+    
+
+    Scenario Outline: Published vault package passes troubleshooting script checks
+        Given user technicaladmin is authenticated
+        And data package exists in <vault>
+        Then data package in <vault> passes troubleshooting script checks
+
+        Examples:
+            | vault                          |
+            | /tempZone/home/vault-default-2 |
+            | /tempZone/home/vault-default-3 |
 
 
     Scenario Outline: Vault preservable formats lists
diff --git a/tests/step_defs/api/common_vault.py b/tests/step_defs/api/common_vault.py
index 2cfa8fa55..9b2706221 100644
--- a/tests/step_defs/api/common_vault.py
+++ b/tests/step_defs/api/common_vault.py
@@ -174,6 +174,21 @@ def data_package_status(user, vault, data_package, status):
     raise AssertionError()
 
 
+@then(parsers.parse('data package in {vault} passes troubleshooting script checks'))
+def api_vault_batch_troubleshoot(user, vault, data_package):
+    http_status, result = api_request(
+        user,
+        "batch_troubleshoot_published_data_packages",
+        {"requested_package": data_package, "log_file": True, "offline": True}
+    )
+    assert http_status == 200
+    data = result['data']
+    assert len(data) == 1
+    # Confirm that all checks passed for this data package
+    for checks in data.values():
+        assert all(checks.values())
+
+
 @then('preservable formats lists are returned')
 def preservable_formats_lists(api_response):
     http_status, body = api_response
diff --git a/tools/troubleshoot-published-data.py b/tools/troubleshoot-published-data.py
new file mode 100644
index 000000000..bba14bc72
--- /dev/null
+++ b/tools/troubleshoot-published-data.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python3
+"""This script collects all published packages and checks that they have all the required info.
+
+Example:
+To check all published packages:
+python3 troubleshoot-published-data.py
+
+To check one specific package by name:
+python3 troubleshoot-published-data.py -p research-initial[1725262507]
+
+To put results into a log file and complete the checks offline:
+python3 troubleshoot-published-data.py -l -o
+"""
+import argparse
+import subprocess
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        prog="troubleshoot-published-data.py",
+        description=__doc__,
+        formatter_class=argparse.RawTextHelpFormatter)
+    parser.add_argument("-l", "--log-file", action='store_true',
+                        help="If log file parameter is true then write to log at: /var/lib/irods/log/troubleshoot_publications.log")
+    parser.add_argument("-o", "--offline", action='store_true',
+                        help="If actions should be performed without connecting to external servers (needed for the Yoda team's development setup).")
+    parser.add_argument("-n", "--no-datacite", action='store_true',
+                        help="If datacite check should be skipped (needed for the Yoda team's development environment in some cases).")
+    parser.add_argument("-p", "--package", type=str, required=False,
+                        help="Troubleshoot a specific data package by name (default: troubleshoot all packages)")
+    return parser.parse_args()
+
+
+def main():
+    args = parse_args()
+    rule_name = "/etc/irods/yoda-ruleset/tools/troubleshoot_data.r"
+    data_package = f"*data_package={args.package}"
+    log_loc = f"*log_loc={args.log_file if args.log_file else ''}"
+    offline = f"*offline={args.offline}"
+    no_datacite = f"*no_datacite={args.no_datacite}"
+    subprocess.call(['irule', '-r', 'irods_rule_engine_plugin-python-instance', '-F',
+                    rule_name, data_package, log_loc, offline, no_datacite])
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/troubleshoot_data.r b/tools/troubleshoot_data.r
new file mode 100644
index 000000000..3caac4671
--- /dev/null
+++ b/tools/troubleshoot_data.r
@@ -0,0 +1,11 @@
+#!/usr/bin/irule -r irods_rule_engine_plugin-python-instance -F
+
+def main(rule_args, callback, rei):
+    data_package = global_vars["*data_package"].strip('"')
+    log_loc = global_vars["*log_loc"].strip('"')
+    offline = global_vars["*offline"].strip('"')
+    no_datacite = global_vars["*no_datacite"].strip('"')
+    callback.rule_batch_troubleshoot_published_data_packages(data_package, log_loc, offline, no_datacite)
+
+INPUT *data_package="", *log_loc="", *offline="", *no_datacite=""
+OUTPUT ruleExecOut
diff --git a/unit-tests/test_util_misc.py b/unit-tests/test_util_misc.py
index aa03ef2c2..428fa33e8 100644
--- a/unit-tests/test_util_misc.py
+++ b/unit-tests/test_util_misc.py
@@ -6,16 +6,181 @@
 
 import sys
 import time
-from collections import OrderedDict
+from collections import namedtuple, OrderedDict
 from unittest import TestCase
 
 sys.path.append('../util')
 
-from misc import human_readable_size, last_run_time_acceptable, remove_empty_objects
+from misc import check_data_package_system_avus, human_readable_size, last_run_time_acceptable, remove_empty_objects
+
+# AVs of a successfully published data package, that is the first version of the package
+avs_success_data_package = {
+    "org_publication_accessRestriction": "Open - freely retrievable",
+    "org_publication_anonymousAccess": "yes",
+    "org_publication_approval_actor": "datamanager#tempZone",
+    "org_publication_combiJsonPath": "/tempZone/yoda/publication/ICGVFV-combi.json",
+    "org_publication_dataCiteJsonPath": "/tempZone/yoda/publication/ICGVFV-dataCite.json",
+    "org_publication_dataCiteMetadataPosted": "yes",
+    "org_publication_landingPagePath": "/tempZone/yoda/publication/ICGVFV.html",
+    "org_publication_landingPageUploaded": "yes",
+    "org_publication_landingPageUrl": "https://public.yoda.test/allinone/UU01/ICGVFV.html",
+    "org_publication_lastModifiedDateTime": "2024-10-04T15:32:46.000000",
+    "org_publication_license": "Creative Commons Attribution 4.0 International Public License",
+    "org_publication_licenseUri": "https://creativecommons.org/licenses/by/4.0/legalcode",
+    "org_publication_oaiUploaded": "yes",
+    "org_publication_publicationDate": "2024-10-04T15:33:17.853806",
+    "org_publication_randomId": "ICGVFV",
+    "org_publication_status": "OK",
+    "org_publication_submission_actor": "researcher#tempZone",
+    "org_publication_vaultPackage": "/tempZone/home/vault-default-3/research-default-3[1728048679]",
+    "org_publication_versionDOI": "10.00012/UU01-ICGVFV",
+    "org_publication_versionDOIMinted": "yes",
+}
+
+avs_success_data_package_multiversion = {
+    "org_publication_accessRestriction": "Open - freely retrievable",
+    "org_publication_anonymousAccess": "yes",
+    "org_publication_approval_actor": "datamanager#tempZone",
+    "org_publication_baseDOI": "10.00012/UU01-X0GU3S",
+    "org_publication_baseDOIMinted": "yes",
+    "org_publication_baseRandomId": "X0GU3S",
+    "org_publication_combiJsonPath": "/tempZone/yoda/publication/YU0JDH-combi.json",
+    "org_publication_dataCiteJsonPath": "/tempZone/yoda/publication/YU0JDH-dataCite.json",
+    "org_publication_dataCiteMetadataPosted": "yes",
+    "org_publication_landingPagePath": "/tempZone/yoda/publication/YU0JDH.html",
+    "org_publication_landingPageUploaded": "yes",
+    "org_publication_landingPageUrl": "https://public.yoda.test/allinone/UU01/YU0JDH.html",
+    "org_publication_lastModifiedDateTime": "2024-10-11T08:49:17.000000",
+    "org_publication_license": "Custom",
+    "org_publication_oaiUploaded": "yes",
+    "org_publication_previous_version": "/tempZone/home/vault-initial1/new-group01[1728550839]",
+    "org_publication_publicationDate": "2024-10-11T08:50:01.812220",
+    "org_publication_randomId": "YU0JDH",
+    "org_publication_status": "OK",
+    "org_publication_submission_actor": "datamanager#tempZone",
+    "org_publication_vaultPackage": "/tempZone/home/vault-initial1/new-group01[1728629336]",
+    "org_publication_versionDOI": "10.00012/UU01-YU0JDH",
+    "org_publication_versionDOIMinted": "yes"
+}
+
+avs_success_data_package_multiversion_first = {
+    "org_publication_accessRestriction": "Open - freely retrievable",
+    "org_publication_anonymousAccess": "yes",
+    "org_publication_approval_actor": "datamanager#tempZone",
+    "org_publication_baseDOI": "10.00012/UU01-X0GU3S",
+    "org_publication_baseRandomId": "X0GU3S",
+    "org_publication_combiJsonPath": "/tempZone/yoda/publication/T8D8QU-combi.json",
+    "org_publication_dataCiteJsonPath": "/tempZone/yoda/publication/T8D8QU-dataCite.json",
+    "org_publication_dataCiteMetadataPosted": "yes",
+    "org_publication_landingPagePath": "/tempZone/yoda/publication/T8D8QU.html",
+    "org_publication_landingPageUploaded": "yes",
+    "org_publication_landingPageUrl": "https://public.yoda.test/allinone/UU01/T8D8QU.html",
+    "org_publication_lastModifiedDateTime": "2024-10-10T09:06:05.000000",
+    "org_publication_license": "Creative Commons Attribution 4.0 International Public License",
+    "org_publication_licenseUri": "https://creativecommons.org/licenses/by/4.0/legalcode",
+    "org_publication_next_version": "/tempZone/home/vault-initial1/new-group01[1728545387]",
+    "org_publication_oaiUploaded": "yes",
+    "org_publication_publicationDate": "2024-10-10T09:06:02.177810",
+    "org_publication_randomId": "T8D8QU",
+    "org_publication_status": "OK",
+    "org_publication_submission_actor": "datamanager#tempZone",
+    "org_publication_vaultPackage": "/tempZone/home/vault-initial1/new-group01[1728543897]",
+    "org_publication_versionDOI": "10.00012/UU01-T8D8QU",
+    "org_publication_versionDOIMinted": "yes",
+}
+
+# From avu.py
+Avu = namedtuple('Avu', list('avu'))
+Avu.attr  = Avu.a
+Avu.value = Avu.v
+Avu.unit  = Avu.u
 
 
 class UtilMiscTest(TestCase):
 
+    def test_check_data_package_system_avus(self):
+        # Success
+        avs = avs_success_data_package
+        avus_success = [Avu(attr, val, "") for attr, val in avs.items()]
+        result = check_data_package_system_avus(avus_success)
+        self.assertTrue(result['no_missing_avus'])
+        self.assertTrue(result['no_unexpected_avus'])
+        self.assertTrue(len(result['missing_avus']) == 0)
+        self.assertTrue(len(result['unexpected_avus']) == 0)
+
+        # Success, extra optional avu
+        avs['org_publication_baseDOIAvailable'] = 'yes'
+        avus_success = [Avu(attr, val, "") for attr, val in avs.items()]
+        result = check_data_package_system_avus(avus_success)
+        self.assertTrue(result['no_missing_avus'])
+        self.assertTrue(result['no_unexpected_avus'])
+        self.assertTrue(len(result['missing_avus']) == 0)
+        self.assertTrue(len(result['unexpected_avus']) == 0)
+        del avs['org_publication_baseDOIAvailable']
+
+        # Missing license Uri for non-custom license
+        del avs['org_publication_licenseUri']
+        avus_missing_license_uri = [Avu(attr, val, "") for attr, val in avs.items()]
+        result = check_data_package_system_avus(avus_missing_license_uri)
+        self.assertFalse(result['no_missing_avus'])
+        self.assertTrue(result['no_unexpected_avus'])
+        self.assertTrue(len(result['missing_avus']) == 1)
+        self.assertTrue(len(result['unexpected_avus']) == 0)
+
+        # Custom license, no license Uri (happy flow)
+        avs['org_publication_license'] = "Custom"
+        avus_custom_license = [Avu(attr, val, "") for attr, val in avs.items()]
+        result = check_data_package_system_avus(avus_custom_license)
+        self.assertTrue(result['no_missing_avus'])
+        self.assertTrue(result['no_unexpected_avus'])
+        self.assertTrue(len(result['missing_avus']) == 0)
+        self.assertTrue(len(result['unexpected_avus']) == 0)
+
+        # Unexpected
+        avs['org_publication_userAddedSomethingWeird'] = "yodayoda:)"
+        avus_unexpected = [Avu(attr, val, "") for attr, val in avs.items()]
+        result = check_data_package_system_avus(avus_unexpected)
+        self.assertTrue(result['no_missing_avus'])
+        self.assertFalse(result['no_unexpected_avus'])
+        self.assertTrue(len(result['missing_avus']) == 0)
+        self.assertTrue(len(result['unexpected_avus']) == 1)
+
+        # Missing and unexpected
+        del avs['org_publication_landingPagePath']
+        avus_missing_unexpected = [Avu(attr, val, "") for attr, val in avs.items()]
+        result = check_data_package_system_avus(avus_missing_unexpected)
+        self.assertFalse(result['no_missing_avus'])
+        self.assertFalse(result['no_unexpected_avus'])
+        self.assertTrue(len(result['missing_avus']) == 1)
+        self.assertTrue(len(result['unexpected_avus']) == 1)
+
+        # Missing
+        del avs['org_publication_userAddedSomethingWeird']
+        avus_missing = [Avu(attr, val, "") for attr, val in avs.items()]
+        result = check_data_package_system_avus(avus_missing)
+        self.assertFalse(result['no_missing_avus'])
+        self.assertTrue(result['no_unexpected_avus'])
+        self.assertTrue(len(result['missing_avus']) == 1)
+        self.assertTrue(len(result['unexpected_avus']) == 0)
+
+        # Success, latest version of a publication
+        avs = avs_success_data_package_multiversion
+        avus_success = [Avu(attr, val, "") for attr, val in avs.items()]
+        result = check_data_package_system_avus(avus_success)
+        self.assertTrue(result['no_missing_avus'])
+        self.assertTrue(result['no_unexpected_avus'])
+        self.assertTrue(len(result['missing_avus']) == 0)
+        self.assertTrue(len(result['unexpected_avus']) == 0)
+
+        # Success, first version of a publication that has had other versions
+        avs = avs_success_data_package_multiversion_first
+        avus_success = [Avu(attr, val, "") for attr, val in avs.items()]
+        result = check_data_package_system_avus(avus_success)
+        self.assertTrue(result['no_missing_avus'])
+        self.assertTrue(result['no_unexpected_avus'])
+        self.assertTrue(len(result['missing_avus']) == 0)
+        self.assertTrue(len(result['unexpected_avus']) == 0)
+
     def test_last_run_time_acceptable(self):
         """Test the last run time for copy to vault"""
         # No last run time (job hasn't been tried before)
diff --git a/util/avu.py b/util/avu.py
index 470620403..0098fcea4 100644
--- a/util/avu.py
+++ b/util/avu.py
@@ -35,6 +35,18 @@ def of_coll(ctx, coll):
                                               "COLL_NAME = '{}'".format(coll)))
 
 
+def get_attr_val_of_coll(ctx, coll, attr):
+    """Get the value corresponding to an attr for a given collection."""
+    iter = genquery.Query(
+        ctx,
+        "META_COLL_ATTR_VALUE",
+        "META_COLL_ATTR_NAME = '{}' AND COLL_NAME = '{}'".format(attr, coll))
+
+    for row in iter:
+        return row
+    raise ValueError("Attribute {} not found in AVUs of collection {}".format(attr, coll))
+
+
 def inside_coll(ctx, path, recursive=False):
     """Get a list of all AVUs inside a collection with corresponding paths.
 
diff --git a/util/log.py b/util/log.py
index 897b9562c..545e626ca 100644
--- a/util/log.py
+++ b/util/log.py
@@ -17,15 +17,20 @@
     import user
 
 
-def write(ctx, message):
-    """Write a message to the log, including client name and originating module.
+def write(ctx, message, write_stdout=False):
+    """Write a message to the log or stdout.
+    Includes client name and originating module if writing to log.
 
-    :param ctx:     Combined type of a callback and rei struct
-    :param message: Message to write to log
+    :param ctx:          Combined type of a callback and rei struct
+    :param message:      Message to write to log
+    :param write_stdout: Whether to write to stdout (used for a few of our scripts)
     """
-    stack = inspect.stack()[1]
-    module = inspect.getmodule(stack[0])
-    _write(ctx, '[{}] {}'.format(module.__name__.replace("rules_uu.", ""), message))
+    if write_stdout:
+        ctx.writeLine("stdout", message)
+    else:
+        stack = inspect.stack()[1]
+        module = inspect.getmodule(stack[0])
+        _write(ctx, '[{}] {}'.format(module.__name__.replace("rules_uu.", ""), message))
 
 
 def _write(ctx, message):
@@ -40,15 +45,6 @@ def _write(ctx, message):
         ctx.writeLine('serverLog', message)
 
 
-def write_stdout(ctx, message):
-    """Write a message to stdout. Used for some of our scripts.
-
-    :param ctx:      Combined type of a callback and rei struct
-    :param message:  Message to write to log
-    """
-    ctx.writeLine("stdout", message)
-
-
 def debug(ctx, message):
     """"Write a message to the log, if in a development environment.
 
diff --git a/util/misc.py b/util/misc.py
index a7d1c4471..73b05d2e6 100644
--- a/util/misc.py
+++ b/util/misc.py
@@ -8,6 +8,88 @@
 import time
 from collections import OrderedDict
 
+import constants
+
+
+def check_data_package_system_avus(extracted_avus):
+    """
+    Checks whether a data package has the expected system AVUs that start with constants.UUORGMETADATAPREFIX (i.e, 'org_').
+    This function compares the AVUs of the provided data package against a set of ground truth AVUs derived from
+    a successfully published data package.
+
+    :param extracted_avus: AVUs of the data package in AVU form
+
+    :returns:            Dictionary of the results of the check
+    """
+    # Filter those starting with 'org_publication'
+    extracted_avs = {}
+    for m in extracted_avus:
+        if m.attr.startswith(constants.UUORGMETADATAPREFIX + 'publication_'):
+            extracted_avs[m.attr] = m.value
+    extracted_attrs = set(extracted_avs.keys())
+
+    # Define the set of ground truth AVUs
+    avu_names_suffix = {
+        'approval_actor', 'randomId',
+        'versionDOI', 'dataCiteJsonPath', 'license',
+        'anonymousAccess', 'versionDOIMinted',
+        'accessRestriction', 'landingPagePath',
+        'publicationDate',
+        'vaultPackage', 'submission_actor', 'status',
+        'lastModifiedDateTime', 'combiJsonPath',
+        'landingPageUploaded', 'oaiUploaded',
+        'landingPageUrl', 'dataCiteMetadataPosted'
+    }
+
+    # If the license is not Custom, it must have a licenseUri
+    if constants.UUORGMETADATAPREFIX + 'publication_license' in extracted_attrs:
+        if extracted_avs[constants.UUORGMETADATAPREFIX + 'publication_license'] != "Custom":
+            avu_names_suffix.add('licenseUri')
+
+    # Define additional set of AVUs with more than one version of publication
+    avu_names_version_suffix = {
+        'previous_version', 'baseDOI', 'baseRandomId',
+        'baseDOIMinted'
+    }
+
+    # Define additional set of AVUs expected for the first version of a publication, when there are multiple versions
+    avu_names_first_version_suffix = {
+        'baseRandomId', 'baseDOI', 'next_version'
+    }
+
+    # for the second version, all we need is next_version in addition to avu_names_version_suffix
+    avu_names_previous_version_suffix = {'next_version'}
+
+    # optional avus
+    avu_names_optional_suffix = {
+        'versionDOIAvailable', 'baseDOIAvailable'
+    }
+
+    combined_avu_names_suffix = avu_names_suffix
+
+    if constants.UUORGMETADATAPREFIX + 'publication_previous_version' in extracted_attrs:
+        combined_avu_names_suffix.update(avu_names_version_suffix)
+        if constants.UUORGMETADATAPREFIX + 'publication_next_version' in extracted_attrs:
+            combined_avu_names_suffix.update(avu_names_previous_version_suffix)
+    elif constants.UUORGMETADATAPREFIX + 'publication_next_version' in extracted_attrs:
+        combined_avu_names_suffix.update(avu_names_first_version_suffix)
+
+    ground_truth_avus = {"{}publication_{}".format(constants.UUORGMETADATAPREFIX, name) for name in combined_avu_names_suffix}
+    combined_avu_names_suffix.update(avu_names_optional_suffix)
+    ground_truth_avus_with_optional = {"{}publication_{}".format(constants.UUORGMETADATAPREFIX, name) for name in combined_avu_names_suffix}
+    # Find missing and unexpected AVUs
+    missing_avus = ground_truth_avus - extracted_attrs
+    unexpected_avus = extracted_attrs - ground_truth_avus_with_optional
+
+    results = {
+        'no_missing_avus': not bool(missing_avus),
+        'missing_avus': list(missing_avus),
+        'no_unexpected_avus': not bool(unexpected_avus),
+        'unexpected_avus': list(unexpected_avus)
+    }
+
+    return results
+
 
 def last_run_time_acceptable(found, last_run, config_backoff_time):
     """Return whether the last run time is acceptable to continue with task."""