diff --git a/.github/workflows/api-and-integration-tests.yml b/.github/workflows/api-and-integration-tests.yml
index bdedd570f..e69d7b878 100644
--- a/.github/workflows/api-and-integration-tests.yml
+++ b/.github/workflows/api-and-integration-tests.yml
@@ -114,7 +114,7 @@ jobs:
         cd tests
         nohup bash -c 'while true ; do sleep 5 ;  ../yoda/docker/run-cronjob.sh copytovault >> ../copytovault.log 2>&1 ; ../yoda/docker/run-cronjob.sh publication >> ../publication.log 2>&1 ; done' &
         test -d mycache || mkdir -p mycache
-        python3 -m pytest --skip-ui --intake --datarequest --deposit -o cache_dir=mycache --environment environments/docker.json
+        python3 -m pytest --skip-ui --datarequest --deposit -o cache_dir=mycache --environment environments/docker.json
         cat ../copytovault.log
         cat ../publication.log
 
diff --git a/.github/workflows/api-documentation.yml b/.github/workflows/api-documentation.yml
index ce1bf53dd..80a3c1e3a 100644
--- a/.github/workflows/api-documentation.yml
+++ b/.github/workflows/api-documentation.yml
@@ -55,13 +55,11 @@ jobs:
           export PYTHONPATH="${PYTHONPATH}:."
           python tools/api/generate-openapi.py rules_uu --module datarequest > build/api_datarequest.json
           python tools/api/generate-openapi.py rules_uu --module deposit > build/api_deposit.json
-          python tools/api/generate-openapi.py rules_uu --module intake > build/api_intake.json
 
       - name: Validate Yoda module API documentation
         run: |
           openapi-spec-validator build/api_datarequest.json
           openapi-spec-validator build/api_deposit.json
-          openapi-spec-validator build/api_intake.json
 
       - name: Deploy 🚀
         uses: JamesIves/github-pages-deploy-action@releases/v3
diff --git a/__init__.py b/__init__.py
index 90707339d..92c63a291 100644
--- a/__init__.py
+++ b/__init__.py
@@ -56,10 +56,6 @@
 # Import certain modules only when enabled.
 from .util.config import config
 
-if config.enable_intake:
-    from intake import *
-    from intake_vault import *
-
 if config.enable_datarequest:
     from datarequest import *
 
diff --git a/intake.py b/intake.py
deleted file mode 100644
index d304a6e7a..000000000
--- a/intake.py
+++ /dev/null
@@ -1,924 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Functions for intake module."""
-
-__copyright__ = 'Copyright (c) 2019-2021, Utrecht University'
-__license__   = 'GPLv3, see LICENSE'
-
-import fnmatch
-import itertools
-import time
-import traceback
-
-import genquery
-
-import intake_dataset
-import intake_lock
-import intake_scan
-from util import *
-
-
-__all__ = ['api_intake_list_studies',
-           'api_intake_list_dm_studies',
-           'api_intake_count_total_files',
-           'api_intake_list_unrecognized_files',
-           'api_intake_list_datasets',
-           'api_intake_scan_for_datasets',
-           'api_intake_lock_dataset',
-           'api_intake_unlock_dataset',
-           'api_intake_dataset_get_details',
-           'api_intake_dataset_add_comment',
-           'api_intake_report_vault_dataset_counts_per_study',
-           'api_intake_report_vault_aggregated_info',
-           'api_intake_report_export_study_data',
-           'rule_intake_scan_for_datasets']
-
-INTAKE_FILE_EXCLUSION_PATTERNS = ['*.abc', '*.PNG']
-""" List of file patterns not to take into account within INTAKE module."""
-
-
-@api.make()
-def api_intake_list_studies(ctx):
-    """Get list of all studies current user is involved in.
-
-    :param ctx: Combined type of a callback and rei struct
-
-    :returns: List of studies
-
-    """
-    groups = []
-    user_name = user.name(ctx)
-    user_zone = user.zone(ctx)
-
-    iter = genquery.row_iterator(
-        "USER_GROUP_NAME",
-        "USER_NAME = '" + user_name + "' AND USER_ZONE = '" + user_zone + "'",
-        genquery.AS_LIST, ctx
-    )
-
-    for row in iter:
-        if row[0].startswith('grp-intake-'):
-            groups.append(row[0][11:])
-        elif row[0].startswith('intake-'):
-            groups.append(row[0][7:])
-
-    groups.sort()
-    return groups
-
-
-@api.make()
-def api_intake_list_dm_studies(ctx):
-    """Return list of studies current user is datamanager of.
-
-    :param ctx: Combined type of a callback and rei struct
-
-    :returns: List of dm studies
-    """
-    datamanager_groups = []
-    user_name = user.name(ctx)
-    user_zone = user.zone(ctx)
-
-    iter = genquery.row_iterator(
-        "USER_GROUP_NAME",
-        "USER_NAME = '" + user_name + "' AND USER_ZONE = '" + user_zone + "'",
-        genquery.AS_LIST, ctx
-    )
-
-    for row in iter:
-        study = ''
-        if row[0].startswith('grp-intake-'):
-            study = row[0][11:]
-        elif row[0].startswith('intake-'):
-            study = row[0][7:]
-
-        if study:
-            # Is a member of this study ... check whether member of corresponding datamanager group
-            iter2 = genquery.row_iterator(
-                "USER_NAME",
-                "USER_TYPE = 'rodsgroup' AND USER_NAME like 'grp-datamanager-" + study + "'",
-                genquery.AS_LIST, ctx
-            )
-            for row2 in iter2:
-                datamanager_group = row2[0]
-                if user.is_member_of(ctx, datamanager_group):
-                    datamanager_groups.append(study)
-
-    return datamanager_groups
-
-
-@api.make()
-def api_intake_count_total_files(ctx, coll):
-    """Get the total count of all files in collection
-    .
-    :param ctx:  Combined type of a callback and rei struct
-    :param coll: Collection from which to count all datasets
-
-    :returns: Total file count
-    """
-    main_collection_iterator = genquery.row_iterator(
-        "COLL_NAME, DATA_NAME",
-        "COLL_NAME = '" + coll + "'",
-        genquery.AS_LIST, ctx
-    )
-
-    subcollection_iterator = genquery.row_iterator(
-        "COLL_NAME, DATA_NAME",
-        "COLL_NAME like '" + coll + "/%'",
-        genquery.AS_LIST, ctx
-    )
-
-    count = 0
-    for row in itertools.chain(main_collection_iterator, subcollection_iterator):
-        exclusion_matched = any(fnmatch.fnmatch(row[1], p) for p in INTAKE_FILE_EXCLUSION_PATTERNS)
-        if not exclusion_matched:
-            count += 1
-
-    return count
-
-
-@api.make()
-def api_intake_list_unrecognized_files(ctx, coll):
-    """Get list of all unrecognized files for given path including relevant metadata.
-
-    :param ctx:  Combined type of a callback and rei struct
-    :param coll: Collection from which to list all unrecognized files
-
-    :returns: List of unrecognized files
-    """
-    # check permissions
-    parts = coll.split('/')
-    group = parts[3]
-    datamanager_group = intake_group_to_datamanager_group(group)
-
-    if user.is_member_of(ctx, group):
-        pass
-    elif user.is_member_of(ctx, datamanager_group):
-        pass
-    else:
-        return {}
-
-    # Include coll name as equal names do occur and genquery delivers distinct results.
-    main_collection_iterator = genquery.row_iterator(
-        "COLL_NAME, DATA_NAME, COLL_CREATE_TIME, DATA_OWNER_NAME",
-        "COLL_NAME = '" + coll + "' AND META_DATA_ATTR_NAME = 'unrecognized'",
-        genquery.AS_LIST, ctx
-    )
-
-    subcollection_iterator = genquery.row_iterator(
-        "COLL_NAME, DATA_NAME, COLL_CREATE_TIME, DATA_OWNER_NAME",
-        "COLL_NAME like '" + coll + "/%' AND META_DATA_ATTR_NAME = 'unrecognized'",
-        genquery.AS_LIST, ctx
-    )
-
-    files = []
-    for row in itertools.chain(main_collection_iterator, subcollection_iterator):
-        # Check whether object type is within exclusion pattern
-        exclusion_matched = any(fnmatch.fnmatch(row[1], p) for p in INTAKE_FILE_EXCLUSION_PATTERNS)
-        if not exclusion_matched:
-            # Error is hardcoded! (like in the original) and initialize attributes already as empty strings.
-            file_data = {"name": row[1],
-                         "path": row[0],
-                         "date": time.strftime('%Y-%m-%d', time.localtime(int(row[2]))),
-                         "creator": row[3],
-                         "error": 'Experiment type, wave or pseudocode is missing from path',
-                         "experiment_type": '',
-                         "pseudocode": '',
-                         "wave": '',
-                         "version": ''}
-
-            # per data object get relevant metadata (experiment type, version, wave, pseudocode) if present
-            iter2 = genquery.row_iterator(
-                "META_DATA_ATTR_NAME, META_DATA_ATTR_VALUE",
-                "COLL_NAME = '" + row[0] + "' AND DATA_NAME = '" + row[1] + "' AND META_DATA_ATTR_NAME in ('experiment_type', 'pseudocode', 'wave', 'version')",
-                genquery.AS_LIST, ctx
-            )
-            for row2 in iter2:
-                file_data[row2[0]] = row2[1]
-
-            files.append(file_data)
-
-    return files
-
-
-@api.make()
-def api_intake_list_datasets(ctx, coll):
-    """Get list of datasets for given path.
-
-    A dataset is distinguished by attribute name 'dataset_toplevel' which can either reside on a collection or a data object.
-    That is why 2 separate queries have to be performed.
-
-    :param ctx:  Combined type of a callback and rei struct
-    :param coll: Collection from which to list all datasets
-
-    :returns: list of datasets
-    """
-    datasets = []
-
-    # 1) Query for datasets distinguished by collections
-    c_main_collection_iterator = genquery.row_iterator(
-        "META_COLL_ATTR_VALUE, COLL_NAME",
-        "COLL_NAME = '" + coll + "' AND META_COLL_ATTR_NAME = 'dataset_toplevel' ",
-        genquery.AS_LIST, ctx
-    )
-
-    c_subcollection_iterator = genquery.row_iterator(
-        "META_COLL_ATTR_VALUE, COLL_NAME",
-        "COLL_NAME LIKE '" + coll + "/%' AND META_COLL_ATTR_NAME = 'dataset_toplevel' ",
-        genquery.AS_LIST, ctx
-    )
-
-    for row in itertools.chain(c_main_collection_iterator, c_subcollection_iterator):
-        dataset = get_dataset_details(ctx, row[0], row[1])
-        datasets.append(dataset)
-
-    # 2) Query for datasets distinguished dataobjects
-    d_main_collection_iterator = genquery.row_iterator(
-        "META_DATA_ATTR_VALUE, COLL_NAME",
-        "COLL_NAME = '" + coll + "' AND META_DATA_ATTR_NAME = 'dataset_toplevel' ",
-        genquery.AS_LIST, ctx
-    )
-
-    d_subcollection_iterator = genquery.row_iterator(
-        "META_DATA_ATTR_VALUE, COLL_NAME",
-        "COLL_NAME LIKE '" + coll + "/%' AND META_DATA_ATTR_NAME = 'dataset_toplevel' ",
-        genquery.AS_LIST, ctx
-    )
-
-    for row in itertools.chain(d_main_collection_iterator, d_subcollection_iterator):
-        dataset = get_dataset_details(ctx, row[0], row[1])
-        datasets.append(dataset)
-
-    return datasets
-
-
-def get_dataset_details(ctx, dataset_id, path):
-    """Get details of dataset based on dataset identifier.
-
-    :param ctx:        Combined type of a callback and rei struct
-    :param dataset_id: Identifier of dataset
-    :param path:       Path to dataset
-
-    :returns: Dict holding objects for the dataset
-    """
-    # Inialise all attributes
-    dataset = {"dataset_id": dataset_id,
-               "path": path}
-
-    # Parse dataset_id to get WEPV-items individually
-    dataset_parts = dataset_id.split('\t')
-    dataset['wave'] = dataset_parts[0]
-    dataset['experiment_type'] = dataset_parts[1]
-    dataset['pseudocode'] = dataset_parts[2]
-    dataset['version'] = dataset_parts[3]
-    dataset['datasetStatus'] = 'scanned'
-    dataset['datasetCreateName'] = '==UNKNOWN=='
-    dataset['datasetCreateDate'] = 0
-    dataset['datasetCreateDateFormatted'] = ''
-    dataset['datasetErrors'] = 0
-    dataset['datasetWarnings'] = 0
-    dataset['datasetComments'] = 0
-    dataset['objects'] = 0
-    dataset['objectErrors'] = 0
-    dataset['objectWarnings'] = 0
-
-    tl_info = get_dataset_toplevel_objects(ctx, path, dataset_id)
-    is_collection = tl_info['is_collection']
-    tl_objects = tl_info['objects']
-
-    if is_collection:
-        """ dataset is based on a collection """
-        tl_collection = tl_objects[0]
-        iter = genquery.row_iterator(
-            "COLL_NAME, COLL_OWNER_NAME, COLL_CREATE_TIME",
-            "COLL_NAME = '" + tl_collection + "' ",
-            genquery.AS_LIST, ctx
-        )
-        for row in iter:
-            dataset['datasetCreateName'] = row[1]
-            dataset['datasetCreateDate'] = int(row[2])
-            dataset['datasetCreateDateFormatted'] = time.strftime('%Y-%m-%d', time.localtime(int(row[2])))
-            dataset['datasetCreatedByWhen'] = row[1] + ':' + row[2]
-
-        iter = genquery.row_iterator(
-            "COLL_NAME, META_COLL_ATTR_NAME, count(META_COLL_ATTR_VALUE)",
-            "COLL_NAME = '" + tl_collection + "' ",
-            genquery.AS_LIST, ctx
-        )
-        for row in iter:
-            if row[1] == 'dataset_error':
-                dataset['datasetErrors'] += int(row[2])
-            if row[1] == 'dataset_warning':
-                dataset['datasetWarnings'] += int(row[2])
-            if row[1] == 'comment':
-                dataset['datasetComments'] += int(row[2])
-            if row[1] == 'to_vault_freeze':
-                dataset['datasetStatus'] = 'frozen'
-            if row[1] == 'to_vault_lock':
-                dataset['datasetStatus'] = 'locked'
-
-        iter = genquery.row_iterator(
-            "COLL_NAME, META_COLL_ATTR_NAME, META_COLL_ATTR_VALUE",
-            "COLL_NAME = '" + tl_collection + "' ",
-            genquery.AS_LIST, ctx
-        )
-        for row in iter:
-            if row[1] == 'object_count':
-                dataset['objects'] += int(row[2])
-            if row[1] == 'object_errors':
-                dataset['objectErrors'] += int(row[2])
-            if row[1] == 'object_warnings':
-                dataset['objectWarnings'] += int(row[2])
-    else:
-        # Dataset is based on a dataobject
-        # Step through all data objects as found in tl_objects
-        objects = 0
-        object_errors = 0
-        object_warnings = 0
-        for tl_object in tl_objects:
-
-            # split tl_object
-            tlo = pathutil.chop(tl_object)
-            parent = tlo[0]
-            base_name = tlo[1]
-
-            objects += 1
-            if objects == 1:
-                iter = genquery.row_iterator(
-                    "DATA_OWNER_NAME, DATA_CREATE_TIME",
-                    "COLL_NAME = '" + parent + "' and DATA_NAME = '" + base_name + "' ",
-                    genquery.AS_LIST, ctx
-                )
-                for row in iter:
-                    dataset['datasetCreateName'] = row[0]
-                    dataset['datasetCreateDate'] = int(row[1])
-                    dataset['datasetCreateDateFormatted'] = time.strftime('%Y-%m-%d', time.localtime(int(row[1])))
-                    dataset['datasetCreatedByWhen'] = row[0] + ':' + row[1]
-
-            iter = genquery.row_iterator(
-                "META_DATA_ATTR_NAME, META_DATA_ATTR_VALUE",
-                "COLL_NAME = '" + parent + "' and DATA_NAME = '" + base_name + "' ",
-                genquery.AS_LIST, ctx
-            )
-            for row in iter:
-                if row[0] == 'error':
-                    object_errors += 1
-                if row[0] == 'warning':
-                    object_warnings += 1
-                if objects == 1:
-                    # Only look at these items when objects==1 as they are added to each toplevel object present
-                    if row[0] == 'dataset_error':
-                        dataset['datasetErrors'] += 1
-                    if row[0] == 'dataset_warning':
-                        dataset['datasetWarnings'] += 1
-                    if row[0] == 'comment':
-                        dataset['datasetComments'] += 1
-                if row[0] == 'to_vault_freeze':
-                    dataset['datasetStatus'] = 'frozen'
-                if row[0] == 'to_vault_lock':
-                    dataset['datasetStatus'] = 'locked'
-        dataset['objects'] = objects
-        dataset['objectErrors'] = object_errors
-        dataset['objectWarnings'] = object_warnings
-
-    return dataset
-
-
-def get_dataset_toplevel_objects(ctx, root, dataset_id):
-    """Returns dict with toplevel object paths and whether is collection based dataset.
-
-    If is a collection - only one object is returned (collection path).
-    If not a collection- all objects are returned with full object path.
-
-    :param ctx:        Combined type of a callback and rei struct
-    :param root:       Path within which to search for datasets (e.g. an intake group collection)
-    :param dataset_id: Identifier of the dataset
-
-    :returns:          Dict holding top-level object paths for the dataset (in the 'objects' key) and a boolean value which
-                       says whether it is a collection-based dataset (in the 'is_collection' key)
-    """
-    c_main_collection_iterator = genquery.row_iterator(
-        "COLL_NAME",
-        "COLL_NAME = '" + root + "' AND META_COLL_ATTR_NAME = 'dataset_toplevel' "
-        "AND META_COLL_ATTR_VALUE = '" + dataset_id + "'",
-        genquery.AS_LIST, ctx
-    )
-
-    c_subcollection_iterator = genquery.row_iterator(
-        "COLL_NAME",
-        "COLL_NAME LIKE '" + root + "/%' AND META_COLL_ATTR_NAME = 'dataset_toplevel' "
-        "AND META_COLL_ATTR_VALUE = '" + dataset_id + "'",
-        genquery.AS_LIST, ctx
-    )
-
-    for row in itertools.chain(c_main_collection_iterator, c_subcollection_iterator):
-        return {'is_collection': True,
-                'objects': [row[0]]}
-
-    # For dataobject situation gather all object path strings as a list
-    d_main_collection_iterator = genquery.row_iterator(
-        "DATA_NAME, COLL_NAME",
-        "COLL_NAME = '" + root + "' AND META_DATA_ATTR_NAME = 'dataset_toplevel' "
-        "AND META_DATA_ATTR_VALUE = '" + dataset_id + "'",
-        genquery.AS_LIST, ctx
-    )
-
-    d_subcollection_iterator = genquery.row_iterator(
-        "DATA_NAME, COLL_NAME",
-        "COLL_NAME LIKE '" + root + "/%' AND META_DATA_ATTR_NAME = 'dataset_toplevel' "
-        "AND META_DATA_ATTR_VALUE = '" + dataset_id + "'",
-        genquery.AS_LIST, ctx
-    )
-
-    objects = []
-    for row in itertools.chain(d_main_collection_iterator, d_subcollection_iterator):
-        objects.append(row[1] + '/' + row[0])
-    return {'is_collection': False,
-            'objects': objects}
-
-
-@api.make()
-def api_intake_scan_for_datasets(ctx, coll):
-    """The toplevel of a dataset can be determined by attribute 'dataset_toplevel'
-    and can either be a collection or a data_object.
-
-    :param ctx:  Combined type of a callback and rei struct
-    :param coll: Collection to scan for datasets
-
-    :returns: indication correct
-    """
-
-    if _intake_check_authorized_to_scan(ctx, coll):
-        try:
-            _intake_scan_for_datasets(ctx, coll)
-        except Exception:
-            log.write(ctx, "Intake scan (API) failed with the following exception: " + traceback.format_exc())
-            return {"proc_status": "NOK", "error_msg": "Error during scanning process"}
-    else:
-        return {"proc_status": "NOK", "error_msg": "No permissions to scan collection"}
-
-    return {"proc_status": "OK"}
-
-
-@rule.make(inputs=[0], outputs=[1])
-def rule_intake_scan_for_datasets(ctx, coll):
-    """The toplevel of a dataset can be determined by attribute 'dataset_toplevel'
-    and can either be a collection or a data_object.
-
-    :param ctx:  Combined type of a callback and rei struct
-    :param coll: Collection to scan for datasets
-
-    :returns: 0=correct, 1=insufficient rights, 2=error during scanning process
-    """
-    if not collection.exists(ctx, coll):
-        return "Non existing collection: " + coll
-    if _intake_check_authorized_to_scan(ctx, coll):
-        try:
-            _intake_scan_for_datasets(ctx, coll, tl_datasets_log_target='stdout')
-        except Exception:
-            log.write(ctx, "Intake scan (rule) failed with the following exception: " + traceback.format_exc())
-            return "Error scanning for datasets for collection: " + coll
-    else:
-        return "Insufficient permissions for collection: " + coll
-
-    return 0
-
-
-def _intake_check_authorized_to_scan(ctx, coll):
-    """Checks that user is authorized to scan intake group, either as
-       a data manager or as an intake group member.
-
-    :param ctx:  Combined type of a callback and rei struct
-    :param coll: Collection to scan for datasets
-
-    :returns: boolean - whether user is authorized
-    """
-    parts = coll.split('/')
-    group = parts[3]
-    datamanager_group = intake_group_to_datamanager_group(group)
-
-    if (user.is_member_of(ctx, group) or user.is_member_of(ctx, datamanager_group)):
-        return True
-    else:
-        log.write(ctx, "No permissions to scan collection")
-        return False
-
-
-def _intake_scan_for_datasets(ctx, coll, tl_datasets_log_target=''):
-    """Internal function for actually running intake scan
-
-    :param ctx:  Combined type of a callback and rei struct
-    :param coll: Collection to scan for datasets
-    :param tl_datasets_log_target: If in ['stdout', 'serverLog'] logging of toplevel datasets will take place to the specified target
-
-    """
-    scope = {"wave": "",
-             "experiment_type": "",
-             "pseudocode": ""}
-    found_datasets = []
-    found_datasets = intake_scan.intake_scan_collection(ctx, coll, scope, False, found_datasets)
-
-    if tl_datasets_log_target in ['stdout', 'serverLog']:
-        for subscope in found_datasets:
-            try:
-                version = subscope['version']
-            except KeyError:
-                version = 'Raw'
-            ctx.writeLine(tl_datasets_log_target, ("Found dataset toplevel collection: "
-                                                   + "W<" + subscope['wave']
-                                                   + "> E<" + subscope['experiment_type']
-                                                   + "> P<" + subscope['pseudocode']
-                                                   + "> V<" + version
-                                                   + "> D<" + subscope['directory']
-                                                   + ">"))
-
-    intake_scan.intake_check_datasets(ctx, coll)
-
-
-@api.make()
-def api_intake_lock_dataset(ctx, path, dataset_ids):
-    """Lock datasets as an indication it can be 'frozen' for it to progress to vault.
-
-    Lock = datamanager only
-
-    :param ctx:         Combined type of a callback and rei struct
-    :param path:        Collection for which to lock a specific dataset id
-    :param dataset_ids: Comma separated identifiers of datasets to be locked
-
-    :returns: indication correct
-    """
-    # check permissions - datamanager only
-    parts = path.split('/')
-    group = parts[3]
-    datamanager_group = intake_group_to_datamanager_group(group)
-
-    if not user.is_member_of(ctx, datamanager_group):
-        log.write(ctx, "No permissions to lock dataset")
-        return {"proc_status": "NOK",
-                "error_msg": "No permissions to lock dataset(s)",
-                "error_dataset_ids": []}
-
-    error_dataset_ids = []
-    for dataset_id in dataset_ids.split(','):
-        # error_dataset_ids.append(dataset_id)
-        try:
-            intake_lock.intake_dataset_lock(ctx, path, dataset_id)
-        except Exception:
-            error_dataset_ids.append(dataset_id)
-
-    if error_dataset_ids:
-        return {"proc_status": "NOK",
-                "error_msg": "Something went wrong locking datasets",
-                "error_dataset_ids": error_dataset_ids}
-
-    return {"proc_status": "OK"}
-
-
-@api.make()
-def api_intake_unlock_dataset(ctx, path, dataset_ids):
-    """Unlock a dataset to remove the indication so it can be 'frozen' for it to progress to vault
-
-    Unlock = datamanager only
-
-    :param ctx:         Combined type of a callback and rei struct
-    :param path:        Collection for which to lock a specific dataset id
-    :param dataset_ids: Comma separated identifiers of datasets to be locked
-
-    :returns: indication correct
-    """
-    # check permissions - datamanager only
-    parts = path.split('/')
-    group = parts[3]
-    datamanager_group = intake_group_to_datamanager_group(group)
-
-    if not user.is_member_of(ctx, datamanager_group):
-        log.write(ctx, "No permissions to unlock dataset(s)")
-        return {"proc_status": "NOK",
-                "error_msg": "No permissions to unlock dataset",
-                "error_dataset_ids": []}
-
-    error_dataset_ids = []
-    for dataset_id in dataset_ids.split(','):
-        # error_dataset_ids.append(dataset_id)
-        try:
-            intake_lock.intake_dataset_unlock(ctx, path, dataset_id)
-        except Exception:
-            error_dataset_ids.append(dataset_id)
-
-    if error_dataset_ids:
-        return {"proc_status": "NOK",
-                "error_msg": "Something went wrong unlocking datasets",
-                "error_dataset_ids": error_dataset_ids}
-
-    return {"proc_status": "OK"}
-
-
-@api.make()
-def api_intake_dataset_add_comment(ctx, study_id, dataset_id, comment):
-    """Add a comment to a dataset.
-
-    :param ctx:        Combined type of a callback and rei struct
-    :param study_id:   Id of the study given dataset belongs to
-    :param dataset_id: Identifier of the dataset to add a comment to
-    :param comment:    Comment as added by user
-
-    :returns: indication correct
-    """
-    coll = '/' + user.zone(ctx) + '/home/' + study_id
-
-    # check permissions - can be researcher or datamanager
-    parts = coll.split('/')
-    group = parts[3]
-    datamanager_group = intake_group_to_datamanager_group(group)
-
-    if not (user.is_member_of(ctx, group) or user.is_member_of(ctx, datamanager_group)):
-        log.write(ctx, "No permissions to scan collection")
-        return {}
-
-    tl_info = get_dataset_toplevel_objects(ctx, coll, dataset_id)
-    is_collection = tl_info['is_collection']
-    tl_objects = tl_info['objects']
-
-    if not is_collection and len(tl_objects) == 0:
-        return {"proc_status": "NOK",
-                "error_msg": "Dataset does not exist"}
-
-    timestamp = int(time.time())  # int(datetime.timestamp(datetime.now()))
-    comment_data = user.name(ctx) + ':' + str(timestamp) + ':' + comment
-
-    for tl in tl_objects:
-        if is_collection:
-            avu.associate_to_coll(ctx, tl, 'comment', comment_data)
-        else:
-            avu.associate_to_data(ctx, tl, 'comment', comment_data)
-
-    return {'user': user.name(ctx), 'timestamp': time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(timestamp)), 'comment': comment}
-
-
-@api.make()
-def api_intake_dataset_get_details(ctx, coll, dataset_id):
-    """Get all details for a dataset (errors/warnings, scanned by who/when, comments, file tree).
-
-    1) Errors/warnings
-    2) Comments
-    3) Tree view of files within dataset.
-
-    :param ctx:        Combined type of a callback and rei struct
-    :param coll:       Collection to start from
-    :param dataset_id: Identifier of the dataset to get details for
-
-    :returns: dictionary with all dataset data
-    """
-    # check permissions - can be researcher or datamanager
-    parts = coll.split('/')
-    group = parts[3]
-    datamanager_group = intake_group_to_datamanager_group(group)
-
-    if not (user.is_member_of(ctx, group) or user.is_member_of(ctx, datamanager_group)):
-        log.write(ctx, "No permissions to scan collection")
-        return {}
-
-    tl_info = get_dataset_toplevel_objects(ctx, coll, dataset_id)
-    is_collection = tl_info['is_collection']
-    tl_objects = tl_info['objects']
-
-    scanned = ''
-    comments = []
-    dataset_warnings = []
-    dataset_errors = []
-    files = {}
-    for tl in tl_objects:
-        if is_collection:
-            coll = tl
-            # Dataset based on a collection
-            iter = genquery.row_iterator(
-                "META_COLL_ATTR_VALUE, META_COLL_ATTR_NAME, order_asc(META_COLL_MODIFY_TIME)",
-                "COLL_NAME = '{}' and META_COLL_ATTR_NAME in ('dataset_error', 'dataset_warning', 'comment')".format(coll),
-                genquery.AS_LIST, ctx
-            )
-            for row in iter:
-                if row[1] == 'dataset_error':
-                    dataset_errors.append(row[0])
-                elif row[1] == 'dataset_warning':
-                    dataset_warnings.append(row[0])
-                else:
-                    comments.append(row[0])
-
-            # Scanned by/when
-            iter = genquery.row_iterator(
-                "META_DATA_ATTR_VALUE",
-                "META_DATA_ATTR_NAME = 'scanned' AND COLL_NAME = '{}'".format(coll),
-                genquery.AS_LIST, ctx
-            )
-            for row in iter:
-                scanned = row[0]
-                break
-
-            break
-        else:
-            # Dataset is based on a data object
-            parts = pathutil.chop(tl)
-            coll = parts[0]
-            file = parts[1]
-            iter = genquery.row_iterator(
-                "META_DATA_ATTR_VALUE, META_DATA_ATTR_NAME, order_asc(META_DATA_MODIFY_TIME)",
-                "COLL_NAME = '{}' AND DATA_NAME = '{}' and META_DATA_ATTR_NAME in ('dataset_error','dataset_warning','comment', 'scanned')".format(coll, file),
-                genquery.AS_LIST, ctx
-            )
-            for row in iter:
-                if row[1] == 'dataset_error':
-                    dataset_errors.append(row[0])
-                elif row[1] == 'dataset_warning':
-                    dataset_warnings.append(row[0])
-                elif row[1] == 'scanned':
-                    scanned = row[0]
-                else:
-                    comments.append(row[0])
-
-            # do it only once - all data is gathered in the first run
-            break
-
-    level = '0'
-    files = coll_objects(ctx, level, coll, dataset_id)
-
-    if len(scanned.split(':')) != 2:
-        # Retrieve scannedby/when information in a different way
-        dataset = get_dataset_details(ctx, dataset_id, coll)
-        scanned = dataset.get('datasetCreatedByWhen', "unknown")
-
-    return {"files": files,
-            # "is_collection": is_collection,
-            # "tlobj": tl_objects,
-            "scanned": scanned,
-            "comments": comments,
-            "dataset_warnings": dataset_warnings,
-            "dataset_errors": dataset_errors}
-
-
-def coll_objects(ctx, level, coll, dataset_id):
-    """Recursive function to pass entire folder/file structure in such that frontend
-    can do something useful with it including errors/warnings on object level
-
-    :param ctx:   Combined type of a callback and rei struct
-    :param level: Level in hierarchy (tree)
-    :param coll:  Collection to collect
-    :param dataset_id: id of the dataset involved
-
-    :returns: Tree of collections and files
-    """
-    # First get the sub collections
-    counter = 0
-    files = {}
-
-    # COLLECTIONS
-    iter = genquery.row_iterator(
-        "COLL_NAME, COLL_ID",
-        "COLL_PARENT_NAME = '{}' AND META_COLL_ATTR_NAME = 'dataset_id' AND META_COLL_ATTR_VALUE = '{}'".format(coll, dataset_id),
-        genquery.AS_LIST, ctx
-    )
-    for row in iter:
-        # files(pathutil.basename(row[0]))
-        node = {}
-        node['name'] = pathutil.basename(row[0])
-        node['isFolder'] = True
-        node['parent_id'] = level
-        warnings = []
-        errors = []
-        # Per collection add errors/warnings from scan process
-        iter2 = genquery.row_iterator(
-            "META_COLL_ATTR_VALUE, META_COLL_ATTR_NAME",
-            "META_COLL_ATTR_NAME in ('warning', 'error') AND COLL_ID = '{}'".format(row[1]),
-            genquery.AS_LIST, ctx
-        )
-        for row2 in iter2:
-            if row[1] == 'error':
-                errors.append(row2[0])
-            else:
-                warnings.append(row2[0])
-        node['errors'] = errors
-        node['warnings'] = warnings
-
-        files[level + "." + str(counter)] = node
-
-        files.update(coll_objects(ctx, level + "." + str(counter), row[0], dataset_id))
-
-        counter += 1
-
-    # DATA OBJECTS
-    iter = genquery.row_iterator(
-        "DATA_NAME, DATA_ID",
-        "COLL_NAME = '{}' AND META_DATA_ATTR_NAME = 'dataset_id' AND META_DATA_ATTR_VALUE = '{}'".format(coll, dataset_id),
-        genquery.AS_LIST, ctx
-    )
-    for row in iter:
-        node = {}
-        node['name'] = row[0]
-        node['isFolder'] = False
-        node['parent_id'] = level
-        # Per data object add errors/warnings from scan process
-        iter2 = genquery.row_iterator(
-            "META_DATA_ATTR_VALUE, META_DATA_ATTR_NAME",
-            "META_DATA_ATTR_NAME in ('warning', 'error') AND DATA_ID = '{}'".format(row[1]),
-            genquery.AS_LIST, ctx
-        )
-        warnings = []
-        errors = []
-        for row2 in iter2:
-            if row2[1] == 'error':
-                errors.append(row2[0])
-            else:
-                warnings.append(row2[0])
-        node['errors'] = errors
-        node['warnings'] = warnings
-
-        files[level + "." + str(counter)] = node
-
-        counter += 1
-
-    return files
-
-
-# Reporting / export functions
-@api.make()
-def api_intake_report_vault_dataset_counts_per_study(ctx, study_id):
-    """Get the count of datasets wave/experimenttype.
-
-    In the vault a dataset is always located in a folder.
-    Therefore, looking at the folders only is enough.
-
-    :param ctx:      Combined type of a callback and rei struct
-    :param study_id: Study id
-
-    :returns: Dictionary with relevant aggregated counts
-    """
-    # check permissions - datamanager only
-    datamanager_group = "grp-datamanager-" + study_id
-
-    if not user.is_member_of(ctx, datamanager_group):
-        log.write(ctx, "No permissions for reporting functionality")
-        return {}
-
-    return intake_dataset.intake_youth_dataset_counts_per_study(ctx, study_id)
-
-
-@api.make()
-def api_intake_report_vault_aggregated_info(ctx, study_id):
-    """Collects the following information for Raw, Processed datasets.
-    Including a totalisation of this all (Raw/processed is kept in VERSION).
-
-    -Total datasets
-    -Total files
-    -Total file size
-    -File size growth in a month
-    -Datasets growth in a month
-    -Pseudocodes  (distinct)
-
-    :param ctx:      Combined type of a callback and rei struct
-    :param study_id: Study id
-
-    :returns: Dictionary with data for analysis
-    """
-    # check permissions - datamanager only
-    datamanager_group = "grp-datamanager-" + study_id
-
-    if not user.is_member_of(ctx, datamanager_group):
-        log.write(ctx, "No permissions for reporting functionality")
-        return {}
-
-    return intake_dataset.vault_aggregated_info(ctx, study_id)
-
-
-@api.make()
-def api_intake_report_export_study_data(ctx, study_id):
-    """Find all datasets in the vault for $studyID.
-
-    Include file count and total file size as well as dataset meta data version, experiment type, pseudocode and wave
-
-    :param ctx:      Combined type of a callback and rei struct
-    :param study_id: Study id to get a report from
-
-    :returns: Study report
-    """
-    # check permissions - datamanager only
-    datamanager_group = "grp-datamanager-" + study_id
-
-    if not user.is_member_of(ctx, datamanager_group):
-        log.write(ctx, "No permissions to export data for this study")
-        return {}
-
-    return intake_dataset.intake_report_export_study_data(ctx, study_id)
-
-
-def intake_group_to_datamanager_group(intake_group):
-    """Determines the name of the data manager group of a particular intake group.
-
-    :param intake_group: name of intake group
-
-    :returns: name of datamanager group
-
-    :raises ValueError: if provided group name is not a valid intake group name
-    """
-    if intake_group.startswith("grp-intake-"):
-        return intake_group.replace("-intake-", "-datamanager-", 1)
-    elif intake_group.startswith("intake-"):
-        return intake_group.replace("intake-", "grp-datamanager-", 1)
-    else:
-        raise ValueError("Unexpected intake group format for group " + intake_group)
diff --git a/intake_checksums.py b/intake_checksums.py
deleted file mode 100644
index 669d06afe..000000000
--- a/intake_checksums.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Functions for intake checksums."""
-
-__copyright__ = 'Copyright (c) 2019-2021, Utrecht University'
-__license__   = 'GPLv3, see LICENSE'
-
-import itertools
-
-import genquery
-
-from util import *
-
-
-def chop_checksum(checksum):
-    """Chop iRODS checksum in checksum type and checksum string.
-
-    Checksum format is ({type}:){checksum}, if type is missing then it is "md5".
-
-    :param checksum: iRODS checksum string
-    :returns: type checksum
-    """
-    checksum_split = checksum.split(":")
-
-    if len(checksum_split) > 1:
-        type = checksum_split[0]
-        checksum = checksum_split[1]
-
-    return type, checksum
-
-
-def intake_generate_dataset_checksums(ctx, dataset_path, checksum_file):
-    """"Generate data object with all checksums of a dataset.
-
-    :param ctx:    Combined type of a callback and rei struct
-    :param dataset_path:  Root collection of dataset to be indexed
-    :param checksum_file: Data object to write checksums to
-    """
-    q_root = genquery.row_iterator("COLL_NAME, DATA_NAME, DATA_CHECKSUM, DATA_SIZE",
-                                   "COLL_NAME = '{}'".format(dataset_path),
-                                   genquery.AS_LIST, ctx)
-
-    q_sub = genquery.row_iterator("COLL_NAME, DATA_NAME, DATA_CHECKSUM, DATA_SIZE",
-                                  "COLL_NAME like '{}/%'".format(dataset_path),
-                                  genquery.AS_LIST, ctx)
-
-    # Create checksums file.
-    checksums = ""
-    for row in itertools.chain(q_root, q_sub):
-        type, checksum = chop_checksum(row[2])
-        checksums += "{} {} {} {}/{}\n".format(type, checksum, row[3], row[0], row[1])
-
-    # Write checksums file.
-    data_object.write(ctx, checksum_file, checksums)
diff --git a/intake_dataset.py b/intake_dataset.py
deleted file mode 100644
index d8417fc71..000000000
--- a/intake_dataset.py
+++ /dev/null
@@ -1,284 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Functions for intake datasets."""
-
-__copyright__ = 'Copyright (c) 2019-2021, Utrecht University'
-__license__   = 'GPLv3, see LICENSE'
-
-import itertools
-
-import genquery
-
-from util import *
-
-
-def intake_report_export_study_data(ctx, study_id):
-    """ Get the information for the export functionality
-
-    Retrieved metadata for a study:
-    - dataset_date_created
-    - wave
-    - version
-    - experiment_type
-    - pseudocode
-    - number of files
-    - total file size
-
-    :param ctx:      Combined type of a callback and rei struct
-    :param study_id: Unique identifier op study
-    :returns: returns datasets
-    """
-    zone = user.zone(ctx)
-
-    main_collection_iterator = genquery.row_iterator("COLL_NAME, COLL_PARENT_NAME, META_COLL_ATTR_NAME, META_COLL_ATTR_VALUE",
-                                                     " = '/{}/home/grp-vault-{}' AND META_COLL_ATTR_NAME IN ('dataset_id', 'dataset_date_created', 'wave', 'version', 'experiment_type', 'pseudocode')".format(zone, study_id),
-                                                     genquery.AS_LIST, ctx)
-
-    subcollection_iterator = genquery.row_iterator("COLL_NAME, COLL_PARENT_NAME, META_COLL_ATTR_NAME, META_COLL_ATTR_VALUE",
-                                                   "COLL_NAME like '/{}/home/grp-vault-{}/%' AND META_COLL_ATTR_NAME IN ('dataset_id', 'dataset_date_created', 'wave', 'version', 'experiment_type', 'pseudocode')".format(zone, study_id),
-                                                   genquery.AS_LIST, ctx)
-
-    datasets = {}
-    for row in itertools.chain(main_collection_iterator, subcollection_iterator):
-        path = row[0]
-        try:
-            datasets[path][row[2]] = row[3]
-        except KeyError:
-            datasets[path] = {row[2]: row[3]}
-
-    real_datasets = {}
-    for set_path in datasets:
-        if 'dataset_date_created' in datasets[set_path]:
-            real_datasets[set_path] = datasets[set_path]
-            # collect total file size and total amount of files
-            real_datasets[set_path]['totalFileSize'] = 0
-            real_datasets[set_path]['totalFiles'] = 0
-
-            # get the filesize and file count
-            stat_main_collection_iterator = genquery.row_iterator("count(DATA_ID), sum(DATA_SIZE)",
-                                                                  "COLL_NAME = '{}'".format(set_path),
-                                                                  genquery.AS_LIST, ctx)
-
-            stat_subcollection_iterator = genquery.row_iterator("count(DATA_ID), sum(DATA_SIZE)",
-                                                                "COLL_NAME like '{}/%'".format(set_path),
-                                                                genquery.AS_LIST, ctx)
-
-            for row in itertools.chain(stat_main_collection_iterator, stat_subcollection_iterator):
-                real_datasets[set_path]['totalFiles'] = int(row[0]) / 2
-                totalFileSize = 0
-                if row[1]:
-                    totalFileSize = int(row[1])
-                real_datasets[set_path]['totalFileSize'] = totalFileSize / 2
-
-    return real_datasets
-
-
-def intake_youth_get_datasets_in_study(ctx, study_id):
-    """Get the of datasets (with relevant metadata) in a study.
-
-    Retrieved metadata:
-    - 'dataset_id'
-    - 'dataset_date_created'
-    - 'wave'
-    - 'version'
-    - 'experiment_type'
-    - 'pseudocode'
-
-    :param ctx:      Combined type of a callback and rei struct
-    :param study_id: Unique identifier of study
-
-    :returns: Dict with datasets and relevant metadata.
-    """
-    zone = user.zone(ctx)
-
-    main_collection_iterator = genquery.row_iterator("COLL_NAME, COLL_PARENT_NAME, META_COLL_ATTR_NAME, META_COLL_ATTR_VALUE",
-                                                     "COLL_NAME = '/{}/home/grp-vault-{}' AND META_COLL_ATTR_NAME IN ('dataset_id', 'dataset_date_created', 'wave', 'version', 'experiment_type', 'pseudocode')".format(zone, study_id),
-                                                     genquery.AS_LIST, ctx)
-
-    subcollection_iterator = genquery.row_iterator("COLL_NAME, COLL_PARENT_NAME, META_COLL_ATTR_NAME, META_COLL_ATTR_VALUE",
-                                                   "COLL_NAME LIKE '/{}/home/grp-vault-{}/*' AND META_COLL_ATTR_NAME IN ('dataset_id', 'dataset_date_created', 'wave', 'version', 'experiment_type', 'pseudocode')".format(zone, study_id),
-                                                   genquery.AS_LIST, ctx)
-
-    datasets = {}
-
-    # Construct all datasets.
-    for row in itertools.chain(main_collection_iterator, subcollection_iterator):
-        dataset = row[0]
-        attribute_name = row[2]
-        attribute_value = row[3]
-
-        if attribute_name in ['dataset_date_created', 'wave', 'version', 'experiment_type', 'pseudocode']:
-            if attribute_name in ['version', 'experiment_type']:
-                val = attribute_value.lower()
-            else:
-                val = attribute_value
-            try:
-                datasets[dataset][attribute_name] = val
-            except KeyError:
-                datasets[dataset] = {attribute_name: val}
-
-    return datasets
-
-
-def intake_youth_dataset_counts_per_study(ctx, study_id):
-    """"Get the counts of datasets wave/experimenttype.
-
-    In the vault a dataset is always located in a folder.
-    Therefore, looking at the folders only is enough.
-
-    :param ctx:      Combined type of a callback and rei struct
-    :param study_id: Unique identifier op study
-
-    :returns: Dict with counts of datasets wave/experimenttype
-    """
-    datasets = intake_youth_get_datasets_in_study(ctx, study_id)
-
-    dataset_type_counts = {}
-    # Loop through datasets and count wave and experimenttype.
-    for dataset in datasets:
-        # Meta attribute 'dataset_date_created' defines that a folder holds a complete set.
-        if 'dataset_date_created' in datasets[dataset]:
-            type = datasets[dataset]['experiment_type']
-            wave = datasets[dataset]['wave']
-            version = datasets[dataset]['version']
-
-            try:
-                dataset_type_counts[type][wave][version] += 1
-            except KeyError:
-                if type not in dataset_type_counts:
-                    dataset_type_counts[type] = {wave: {version: 1}}
-                elif wave not in dataset_type_counts[type]:
-                    dataset_type_counts[type][wave] = {version: 1}
-                else:
-                    dataset_type_counts[type][wave][version] = 1
-
-    return dataset_type_counts
-
-
-def vault_aggregated_info(ctx, study_id):
-    """Collects aggregated information for raw and processed datasets.
-
-    Collects the following information for RAW and PROCESSED datasets.
-    Including a totalisation of this all (raw/processed is kept in VERSION)
-        - Total datasets
-        - Total files
-        - Total file size
-        - File size growth in a month
-        - Datasets growth in a month
-        - Pseudocodes  (distinct)
-
-    :param ctx:      Combined type of a callback and rei struct
-    :param study_id: Unique identifier op study
-
-    :returns: Dict with aggregated information for raw and processed datasets
-    """
-    datasets = intake_youth_get_datasets_in_study(ctx, study_id)
-
-    dataset_count = {'raw': 0, 'processed': 0}
-    dataset_growth = {'raw': 0, 'processed': 0}
-    dataset_file_count = {'raw': 0, 'processed': 0}
-    dataset_file_size = {'raw': 0, 'processed': 0}
-    dataset_file_growth = {'raw': 0, 'processed': 0}
-    dataset_pseudocodes = {'raw': [], 'processed': []}
-
-    # Determine full last month reference point
-    import time
-    from datetime import datetime, date, timedelta
-
-    last_day_of_prev_month = date.today().replace(day=1) - timedelta(days=1)
-    month = int(last_day_of_prev_month.strftime("%m"))
-    year = int(last_day_of_prev_month.strftime("%Y"))
-
-    last_month = int(time.time() - int(datetime(year, month, int(date.today().strftime("%d")), 0, 0, 0).strftime('%s')))
-
-    dataset_paths = []
-    for dataset in datasets:
-        # Meta attribute 'dataset_date_created' defines that a folder holds a complete set.
-        if 'dataset_date_created' in datasets[dataset]:
-            dataset_paths.append(dataset)
-
-            if datasets[dataset]['version'].lower() == 'raw':
-                version = 'raw'
-            else:
-                version = 'processed'
-
-            # if version in ['raw', 'processed']:
-            dataset_count[version] += 1
-
-            try:
-                date_created = int(datasets[dataset]['dataset_date_created'])
-            except Exception:
-                # This is nonsense and arose from an erroneous situation
-                date_created = last_month
-
-            if date_created - last_month >= 0:
-                dataset_growth[version] += 1
-
-            try:
-                pseudocode = datasets[dataset]['pseudocode']
-                if pseudocode not in dataset_pseudocodes[version]:
-                    dataset_pseudocodes[version].append(pseudocode)
-            except KeyError:
-                continue
-
-    zone = user.zone(ctx)
-    main_collection_iterator = genquery.row_iterator("DATA_NAME, COLL_NAME, DATA_SIZE, COLL_CREATE_TIME",
-                                                     "COLL_NAME = '/{}/home/grp-vault-{}'".format(zone, study_id),
-                                                     genquery.AS_LIST, ctx)
-
-    subcollection_iterator = genquery.row_iterator("DATA_NAME, COLL_NAME, DATA_SIZE, COLL_CREATE_TIME",
-                                                   "COLL_NAME like '/{}/home/grp-vault-{}/%'".format(zone, study_id),
-                                                   genquery.AS_LIST, ctx)
-
-    for row in itertools.chain(main_collection_iterator, subcollection_iterator):
-        coll_name = row[1]
-        data_size = int(row[2])
-        coll_create_time = int(row[3])
-
-        # Check whether the file is part of a dataset.
-        part_of_dataset = False
-        for dataset in dataset_paths:
-            if dataset in coll_name:
-                part_of_dataset = True
-                break
-
-        # File is part of dataset.
-        if part_of_dataset:
-            # version = datasets[dataset]['version']
-
-            if datasets[dataset]['version'].lower() == 'raw':
-                version = 'raw'
-            else:
-                version = 'processed'
-
-            dataset_file_count[version] += 1
-            dataset_file_size[version] += data_size
-
-            if coll_create_time - last_month >= 0:
-                dataset_file_growth[version] += data_size
-
-    return {
-        'total': {
-            'totalDatasets': dataset_count['raw'] + dataset_count['processed'],
-            'totalFiles': dataset_file_count['raw'] + dataset_file_count['processed'],
-            'totalFileSize': dataset_file_size['raw'] + dataset_file_size['processed'],
-            'totalFileSizeMonthGrowth': dataset_file_growth['raw'] + dataset_file_growth['processed'],
-            'datasetsMonthGrowth': dataset_growth['raw'] + dataset_growth['processed'],
-            'distinctPseudoCodes': len(dataset_pseudocodes['raw']) + len(dataset_pseudocodes['processed']),
-        },
-        'raw': {
-            'totalDatasets': dataset_count['raw'],
-            'totalFiles': dataset_file_count['raw'],
-            'totalFileSize': dataset_file_size['raw'],
-            'totalFileSizeMonthGrowth': dataset_file_growth['raw'],
-            'datasetsMonthGrowth': dataset_growth['raw'],
-            'distinctPseudoCodes': len(dataset_pseudocodes['raw']),
-        },
-        'notRaw': {
-            'totalDatasets': dataset_count['processed'],
-            'totalFiles': dataset_file_count['processed'],
-            'totalFileSize': dataset_file_size['processed'],
-            'totalFileSizeMonthGrowth': dataset_file_growth['processed'],
-            'datasetsMonthGrowth': dataset_growth['processed'],
-            'distinctPseudoCodes': len(dataset_pseudocodes['processed']),
-        },
-    }
diff --git a/intake_lock.py b/intake_lock.py
deleted file mode 100644
index d31c202ce..000000000
--- a/intake_lock.py
+++ /dev/null
@@ -1,203 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Functions for intake locking."""
-
-__copyright__ = 'Copyright (c) 2019-2021, Utrecht University'
-__license__   = 'GPLv3, see LICENSE'
-
-import time
-
-import genquery
-
-import intake
-from util import *
-
-
-def intake_dataset_treewalk_change_status(ctx, collection, status, timestamp, remove):
-    """Treewalk dataset collection and change status.
-
-    :param ctx:        Combined type of a callback and rei struct
-    :param collection: Will change every time as it represents every collection that has to be processed
-    :param status:     Status to set on dataset objects
-    :param timestamp:  Timestamp of status change
-    :param remove:     Boolean, set or remove status
-    """
-    # 1. Change status on this collection.
-    if remove:
-        try:
-            avu.rmw_from_coll(ctx, collection, status, "%")
-        except msi.Error as e:
-            log.write(ctx, 'ERROR REMOVE')
-            log.write(ctx, e)
-    else:
-        log.write(ctx, 'step1 . set_on_col')
-        avu.set_on_coll(ctx, collection, status, timestamp)
-
-    # 2. Change status on data objects located directly within the collection.
-    data_objects = genquery.row_iterator(
-        "DATA_NAME",
-        "COLL_NAME = '{}'".format(collection),
-        genquery.AS_LIST, ctx
-    )
-
-    for row in data_objects:
-        if remove:
-            avu.rmw_from_data(ctx, "{}/{}".format(collection, row[0]), status, "%")
-        else:
-            log.write(ctx, 'step2 . set_on_data')
-            avu.set_on_data(ctx, "{}/{}".format(collection, row[0]), status, timestamp)
-
-    # 3. Loop through subcollections.
-    subcollections = genquery.row_iterator(
-        "COLL_NAME",
-        "COLL_PARENT_NAME = '{}'".format(collection),
-        genquery.AS_LIST, ctx
-    )
-
-    for row in subcollections:
-        intake_dataset_treewalk_change_status(ctx, row[0], status, timestamp, remove)
-
-
-def intake_dataset_change_status(ctx, object, is_collection, dataset_id, status, timestamp, remove):
-    """Change status on dataset.
-
-    :param ctx:           Combined type of a callback and rei struct
-    :param object:        Will change every time as it represents every object of the dataset
-    :param is_collection: Indicator if dataset is within a collection
-    :param dataset_id:    Dataset identifier
-    :param status:        Status to set on dataset objects
-    :param timestamp:     Timestamp of status change
-    :param remove:        Boolean, set or remove status
-    """
-    # Is dataset a collection?
-    if is_collection:
-        # Recursively change the status on all objects in the dataset
-        intake_dataset_treewalk_change_status(ctx, object, status, timestamp, remove)
-    else:
-        # Dataset is not a collection, find all the dataset objects.
-        data_objects = genquery.row_iterator("DATA_NAME",
-                                             "COLL_NAME = '{}' AND META_DATA_ATTR_NAME = 'dataset_toplevel' AND META_DATA_ATTR_VALUE = '{}'".format(object, dataset_id),
-                                             genquery.AS_LIST, ctx)
-
-        # Change dataset status on all objects.
-        for row in data_objects:
-            if remove:
-                avu.rmw_from_data(ctx, "{}/{}".format(object, row[0]), status, "%")
-            else:
-                avu.set_on_data(ctx, "{}/{}".format(object, row[0]), status, timestamp)
-
-
-def intake_dataset_lock(ctx, collection, dataset_id):
-    timestamp = str(int(time.time()))
-
-    tl_info = intake.get_dataset_toplevel_objects(ctx, collection, dataset_id)
-    is_collection = tl_info['is_collection']
-    tl_objects = tl_info['objects']
-    log.write(ctx, tl_info)
-
-    if not is_collection and len(tl_objects) == 0:
-        raise Exception("Dataset \"{}\" in collection {} not found".format(collection, dataset_id))
-
-    if is_collection:
-        intake_dataset_change_status(ctx, tl_objects[0], is_collection, dataset_id, "to_vault_lock", timestamp, False)
-    else:
-        # Dataset based on
-        for tl_object in tl_objects:
-            avu.set_on_data(ctx, tl_object, "to_vault_lock", timestamp)
-
-
-def intake_dataset_unlock(ctx, collection, dataset_id):
-    timestamp = str(int(time.time()))
-
-    tl_info = intake.get_dataset_toplevel_objects(ctx, collection, dataset_id)
-    is_collection = tl_info['is_collection']
-    tl_objects = tl_info['objects']
-
-    if not is_collection and len(tl_objects) == 0:
-        raise Exception("Dataset \"{}\" in collection {} not found".format(collection, dataset_id))
-
-    # It is possible that the status of the dataset status has moved on.
-    if is_collection:
-        intake_dataset_change_status(ctx, tl_objects[0], is_collection, dataset_id, "to_vault_lock", timestamp, True)
-    else:
-        # Dataset based on data objects
-        for tl_object in tl_objects:
-            avu.rmw_from_data(ctx, tl_object, "to_vault_lock", "%")
-
-
-def intake_dataset_freeze(ctx, collection, dataset_id):
-    # timestamp = str(int(time.time()))
-    # top_collection = ""
-    # is_collection = ""
-    # ctx.uuYcDatasetGetTopLevel(collection, dataset_id, top_collection, is_collection)
-
-    # intake_dataset_change_status(ctx, top_collection, is_collection, dataset_id, "to_vault_freeze", timestamp, False)
-
-    timestamp = str(int(time.time()))
-
-    tl_info = intake.get_dataset_toplevel_objects(ctx, collection, dataset_id)
-    is_collection = tl_info['is_collection']
-    tl_objects = tl_info['objects']
-    log.write(ctx, tl_info)
-
-    if is_collection:
-        intake_dataset_change_status(ctx, tl_objects[0], is_collection, dataset_id, "to_vault_freeze", timestamp, False)
-    else:
-        # Dataset based on
-        for tl_object in tl_objects:
-            avu.set_on_data(ctx, tl_object, "to_vault_freeze", timestamp)
-
-
-def intake_dataset_melt(ctx, collection, dataset_id):
-    # timestamp = str(int(time.time()))
-    # top_collection = ""
-    # is_collection = ""
-    # ctx.uuYcDatasetGetTopLevel(collection, dataset_id, top_collection, is_collection)
-
-    # intake_dataset_change_status(ctx, top_collection, is_collection, dataset_id, "to_vault_freeze", timestamp, True)
-
-    timestamp = str(int(time.time()))
-
-    tl_info = intake.get_dataset_toplevel_objects(ctx, collection, dataset_id)
-    is_collection = tl_info['is_collection']
-    tl_objects = tl_info['objects']
-
-    # It is possible that the status of the dataset status has moved on.
-    if is_collection:
-        intake_dataset_change_status(ctx, tl_objects[0], is_collection, dataset_id, "to_vault_freeze", timestamp, True)
-    else:
-        # Dataset based on data objects
-        for tl_object in tl_objects:
-            avu.rmw_from_data(ctx, tl_object, "to_vault_freeze", "%")
-
-
-def intake_dataset_object_get_status(ctx, path):
-    """Get the status of an object in a dataset.
-
-    :param ctx:  Combined type of a callback and rei struct
-    :param path: Path of dataset object
-
-    :returns: Tuple booleans indicating if the object is locked or frozen
-    """
-    locked = False
-    frozen = False
-
-    if collection.exists(ctx, path):
-        attribute_names = genquery.row_iterator("META_COLL_ATTR_NAME",
-                                                "COLL_NAME = '{}'".format(path),
-                                                genquery.AS_LIST, ctx)
-    else:
-        coll_name, data_name = pathutil.chop(path)
-        attribute_names = genquery.row_iterator("META_DATA_ATTR_NAME",
-                                                "COLL_NAME = '{}' AND DATA_NAME = '{}'".format(coll_name, data_name),
-                                                genquery.AS_LIST, ctx)
-
-    for row in attribute_names:
-        attribute_name = row[0]
-        if attribute_name in ["to_vault_lock", "to_vault_freeze"]:
-            locked = True
-
-            if attribute_name == "to_vault_freeze":
-                frozen = True
-                break
-
-    return locked, frozen
diff --git a/intake_scan.py b/intake_scan.py
deleted file mode 100644
index ba024c4cc..000000000
--- a/intake_scan.py
+++ /dev/null
@@ -1,462 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Functions for intake scanning."""
-
-__copyright__ = 'Copyright (c) 2019-2021, Utrecht University'
-__license__   = 'GPLv3, see LICENSE'
-
-import itertools
-import time
-
-import genquery
-
-import intake
-from intake_utils import dataset_parse_id, intake_scan_get_metadata_update
-from util import *
-
-
-def intake_scan_collection(ctx, root, scope, in_dataset, found_datasets):
-    """Recursively scan a directory in a Youth Cohort intake.
-
-    :param ctx:    Combined type of a callback and rei struct
-    :param root:   the directory to scan
-    :param scope:     a scoped kvlist buffer
-    :param in_dataset: whether this collection is within a dataset collection
-    :param found_datasets: collection of subscopes that were found in order to report toplevel datasets in the scanning process
-
-    :returns: Found datasets
-    """
-
-    # Loop until pseudocode, experiment type and wave are complete.
-    # But the found values can be overwritten when deeper levels are found.
-
-    # Scan files under root
-    iter = genquery.row_iterator(
-        "DATA_NAME, COLL_NAME",
-        "COLL_NAME = '" + root + "'",
-        genquery.AS_LIST, ctx
-    )
-    for row in iter:
-        path = row[1] + '/' + row[0]
-
-        # Determene lock state for object (no collectoin
-        locked_state = object_is_locked(ctx, path, False)
-
-        if locked_state['locked'] or locked_state['frozen']:
-            continue
-
-        remove_dataset_metadata(ctx, path, False)
-        scan_mark_scanned(ctx, path, False)
-
-        parent_in_dataset = in_dataset
-        metadata_update = intake_scan_get_metadata_update(ctx, path, False, in_dataset, scope)
-
-        if metadata_update["in_dataset"]:
-            apply_dataset_metadata(ctx, path, metadata_update["new_metadata"], False)
-            if not parent_in_dataset:
-                # We found a top-level dataset data object.
-                found_datasets.append(metadata_update["new_metadata"])
-        else:
-            apply_partial_metadata(ctx, metadata_update["new_metadata"], path, False)
-            avu.set_on_data(ctx, path, "unrecognized", "Experiment type, wave or pseudocode missing from path")
-
-    # Scan collections under root
-    iter = genquery.row_iterator(
-        "COLL_NAME",
-        "COLL_PARENT_NAME = '" + root + "'",
-        genquery.AS_LIST, ctx
-    )
-    counter = 0
-    for row in iter:
-        path = row[0]
-        counter = counter + 1
-        dirname = pathutil.basename(path)
-
-        if dirname != '/':
-            # get locked /frozen status
-            locked_state = object_is_locked(ctx, path, True)
-
-            if locked_state['locked'] or locked_state['frozen']:
-                continue
-
-            remove_dataset_metadata(ctx, path, True)
-            scan_mark_scanned(ctx, path, True)
-
-            parent_in_dataset = in_dataset
-            metadata_update = intake_scan_get_metadata_update(ctx, path, True, in_dataset, scope)
-
-            if metadata_update["in_dataset"]:
-                apply_dataset_metadata(ctx, path, metadata_update["new_metadata"], True)
-                if not parent_in_dataset:
-                    # We found a new top-level dataset data object.
-                    found_datasets.append(metadata_update["new_metadata"])
-            else:
-                apply_partial_metadata(ctx, metadata_update["new_metadata"], path, True)
-
-            found_datasets = intake_scan_collection(ctx,
-                                                    path,
-                                                    metadata_update["new_metadata"],
-                                                    parent_in_dataset or metadata_update["in_dataset"],
-                                                    found_datasets)
-
-    return found_datasets
-
-
-def object_is_locked(ctx, path, is_collection):
-    """Returns whether given object in path (collection or dataobject) is locked or frozen
-
-    :param ctx:           Combined type of a callback and rei struct
-    :param path:          Path to object or collection
-    :param is_collection: Whether path contains a collection or data object
-
-    :returns: Returns locked state
-    """
-    locked_state = {"locked": False,
-                    "frozen": False}
-
-    if is_collection:
-        iter = genquery.row_iterator(
-            "META_COLL_ATTR_NAME",
-            "COLL_NAME = '" + path + "'",
-            genquery.AS_LIST, ctx
-        )
-        for row in iter:
-            if row[0] in ['to_vault_lock', 'to_vault_freeze']:
-                locked_state['locked'] = True
-                if row[0] == 'to_vault_freeze':
-                    locked_state['frozen'] = True
-    else:
-        parent_coll = pathutil.dirname(path)
-        iter = genquery.row_iterator(
-            "META_DATA_ATTR_NAME",
-            "COLL_NAME = '" + parent_coll + "' AND DATA_NAME = '" + pathutil.basename(path) + "'",
-            genquery.AS_LIST, ctx
-        )
-        # return locked_state
-        for row in iter:
-            if row[0] in ['to_vault_lock', 'to_vault_freeze']:
-                locked_state['locked'] = True
-                if row[0] == 'to_vault_freeze':
-                    locked_state['frozen'] = True
-
-    return locked_state
-
-
-def remove_dataset_metadata(ctx, path, is_collection):
-    """Remove all intake metadata from dataset.
-
-    :param ctx:           Combined type of a callback and rei struct
-    :param path:          Path to collection or data object
-    :param is_collection: Whether is a collection or data object
-    """
-    intake_metadata = ["wave",
-                       "experiment_type",
-                       "pseudocode",
-                       "version",
-                       "dataset_id",
-                       "dataset_toplevel",
-                       "error",
-                       "warning",
-                       "dataset_error",
-                       "dataset_warning",
-                       "unrecognized",
-                       "object_count",
-                       "object_errors",
-                       "object_warnings"]
-    intake_metadata_set = set(intake_metadata)
-
-    # Add the following two lines to remove accumulated metadata during testing.
-    # "comment"
-    # "scanned"]
-
-    if is_collection:
-        iter = genquery.row_iterator(
-            "COLL_ID, META_COLL_ATTR_NAME, META_COLL_ATTR_VALUE",
-            "COLL_NAME = '" + path + "'",
-            genquery.AS_LIST, ctx
-        )
-    else:
-        iter = genquery.row_iterator(
-            "DATA_ID, META_DATA_ATTR_NAME, META_DATA_ATTR_VALUE",
-            "COLL_NAME = '" + pathutil.dirname(path) + "' AND DATA_NAME = '" + pathutil.basename(path) + "'",
-            genquery.AS_LIST, ctx
-        )
-
-    for _row in iter:
-        metadata_name = _row[1]
-        if metadata_name in intake_metadata_set:
-            if is_collection:
-                try:
-                    avu.rmw_from_coll(ctx, path, metadata_name, '%')
-                except Exception as e:
-                    log.write(ctx, "Warning: unable to remove metadata attr {} from {}".format(metadata_name, path))
-                    log.write(ctx, "Removing metadata failed with exception {}".format(str(e)))
-            else:
-                try:
-                    avu.rmw_from_data(ctx, path, metadata_name, '%')
-                except Exception as e:
-                    log.write(ctx, "Warning: unable to remove metadata attr {} from {}".format(metadata_name, path))
-                    log.write(ctx, "Removing metadata failed with exception {}".format(str(e)))
-
-
-def scan_mark_scanned(ctx, path, is_collection):
-    """Sets the username of the scanner and a timestamp as metadata on the scanned object.
-
-    :param ctx:           Combined type of a callback and rei struct
-    :param path:          Path on which to add scan indication to
-    :param is_collection: Is scanned object a collection?
-    """
-    timestamp = int(time.time())
-    user_and_timestamp = user.name(ctx) + ':' + str(timestamp)  # str(datetime.date.today())
-
-    if is_collection:
-        avu.set_on_coll(ctx, path, 'scanned', user_and_timestamp)
-    else:
-        avu.set_on_data(ctx, path, 'scanned', user_and_timestamp)
-
-
-def apply_dataset_metadata(ctx, path, scope, is_collection):
-    """Apply dataset metadata to an object in a dataset.
-
-    :param ctx:           Combined type of a callback and rei struct
-    :param path:          Path to the object
-    :param scope:         A scanner scope containing WEPV values
-    :param is_collection: Whether the object is a collection
-    """
-    for key in scope:
-        if scope[key]:
-            if is_collection:
-                avu.set_on_coll(ctx, path, key, scope[key])
-            else:
-                avu.set_on_data(ctx, path, key, scope[key])
-
-
-def apply_partial_metadata(ctx, scope, path, is_collection):
-    """Apply any available id component metadata to the given object.
-
-    To be called only for objects outside datasets. When inside a dataset
-    (or at a dataset toplevel), use intake_apply_dataset_metadata() instead.
-
-    :param ctx:           Combined type of a callback and rei struct
-    :param scope:         A scanner scope containing some WEPV values
-    :param path:          Path to the object
-    :param is_collection: Whether the object is a collection
-    """
-    keys = ['wave', 'experiment_type', 'pseudocode', 'version']
-    for key in keys:
-        if key in scope:
-            if scope[key]:
-                if is_collection:
-                    avu.set_on_coll(ctx, path, key, scope[key])
-                else:
-                    avu.set_on_data(ctx, path, key, scope[key])
-
-
-def dataset_add_error(ctx, top_levels, is_collection_toplevel, text, suppress_duplicate_avu_error=False):
-    """Add a dataset error to all given dataset toplevels.
-
-    :param ctx:                    Combined type of a callback and rei struct
-    :param top_levels:             A list of toplevel datasets
-    :param is_collection_toplevel: Indication of whether it is a collection or object
-    :param text:                   Error text
-    :param suppress_duplicate_avu_error: If an AVU already exists, suppress the irods-error. Allow for this situation
-
-    :raises Exception: Raises exception when associating error to collection or data object fails
-    """
-    for tl in top_levels:
-        if is_collection_toplevel:
-            try:
-                avu.associate_to_coll(ctx, tl, "dataset_error", text)
-            except msi.Error as e:
-                # iRODS errorcode 809000 (CATALOG_ALREADY_HAS_ITEM_BY_THAT_NAME)
-                if suppress_duplicate_avu_error and str(e).find("809000") > -1:
-                    log.write(ctx, "Trying to associate dataset_error already present on collection: {}".format(tl))
-                    log.write(ctx, "Suppress error handling for AVU: dataset_error - {}".format(text))
-                else:
-                    raise Exception(e)
-        else:
-            try:
-                avu.associate_to_data(ctx, tl, "dataset_error", text)
-            except msi.Error as e:
-                # iRODS errorcode 809000 (CATALOG_ALREADY_HAS_ITEM_BY_THAT_NAME)
-                if suppress_duplicate_avu_error and str(e).find("809000") > -1:
-                    log.write(ctx, "Trying to associate dataset_error already present on data object: {}".format(tl))
-                    log.write(ctx, "Suppress error handling for AVU: dataset_error - {}".format(text))
-                else:
-                    raise Exception(e)
-
-
-def dataset_get_ids(ctx, coll):
-    """Find dataset ids under collection.
-    :param ctx:  Combined type of a callback and rei struct
-    :param coll: Collection name for which to find dataset-ids
-    :returns: Returns a set of dataset ids
-    """
-    data_ids = set()
-
-    # Get distinct data_ids
-    main_collection_iterator = genquery.row_iterator(
-        "META_DATA_ATTR_VALUE",
-        "COLL_NAME = '" + coll + "' AND META_DATA_ATTR_NAME = 'dataset_id' ",
-        genquery.AS_LIST, ctx
-    )
-
-    subcollection_iterator = genquery.row_iterator(
-        "META_DATA_ATTR_VALUE",
-        "COLL_NAME LIKE '" + coll + "/%' AND META_DATA_ATTR_NAME = 'dataset_id' ",
-        genquery.AS_LIST, ctx
-    )
-    for row in itertools.chain(main_collection_iterator, subcollection_iterator):
-        if row[0]:
-            data_ids.add(row[0])
-
-    return data_ids
-
-
-def intake_check_datasets(ctx, root):
-    """Run checks on all datasets under root.
-
-    :param ctx:  Combined type of a callback and rei struct
-    :param root: The collection to get datasets for
-    """
-    dataset_ids = dataset_get_ids(ctx, root)
-    for dataset_id in dataset_ids:
-        intake_check_dataset(ctx, root, dataset_id)
-
-
-def intake_check_dataset(ctx, root, dataset_id):
-    """Run checks on the dataset specified by the given dataset id.
-
-    This function adds object counts and error counts to top-level objects within the dataset.
-    For historical reasons, it also adds a warning count, which is always 0.
-
-    :param ctx:        Combined type of a callback and rei struct
-    :param root:       Collection name
-    :param dataset_id: Dataset identifier
-    """
-    tl_info = intake.get_dataset_toplevel_objects(ctx, root, dataset_id)
-    is_collection = tl_info['is_collection']
-    tl_objects = tl_info['objects']
-
-    # Check validity of wav
-    waves = ["20w", "30w", "0m", "5m", "10m", "3y", "6y", "9y", "12y", "15y"]
-    components = dataset_parse_id(dataset_id)
-    if components['wave'] not in waves:
-        dataset_add_error(ctx, tl_objects, is_collection, "The wave '" + components['wave'] + "' is not in the list of accepted waves")
-
-    # check presence of wave, pseudo-ID and experiment
-    if '' in [components['wave'], components['experiment_type'], components['pseudocode']]:
-        # Suppress error handing and continue normal processing should a situation arise where Wepv missing is already present on the dataobject/collection
-        dataset_add_error(ctx, tl_objects, is_collection, "Wave, experiment type or pseudo-ID missing", True)
-
-    for tl in tl_objects:
-        # Save the aggregated counts of #objects, #warnings, #errors on object level
-
-        count = get_aggregated_object_count(ctx, dataset_id, tl)
-        if is_collection:
-            avu.set_on_coll(ctx, tl, "object_count", str(count))
-        else:
-            avu.set_on_data(ctx, tl, "object_count", str(count))
-
-        count = get_aggregated_object_error_count(ctx, tl)
-        if is_collection:
-            avu.set_on_coll(ctx, tl, "object_errors", str(count))
-        else:
-            avu.set_on_data(ctx, tl, "object_errors", str(count))
-
-        count = 0
-        if is_collection:
-            avu.set_on_coll(ctx, tl, "object_warnings", str(count))
-        else:
-            avu.set_on_data(ctx, tl, "object_warnings", str(count))
-
-
-def get_rel_paths_objects(ctx, root, dataset_id):
-    """Get a list of relative paths to all data objects in a dataset.
-
-    :param ctx:        Combined type of a callback and rei struct
-    :param root:       Root path of the dataset
-    :param dataset_id: Dataset identifier
-
-    :returns: List of objects of relative object paths (e.g. file1.dat, some-subdir/file2.dat...)
-    """
-    tl_info = intake.get_dataset_toplevel_objects(ctx, root, dataset_id)
-    is_collection = tl_info['is_collection']
-    tl_objects = tl_info['objects']
-
-    rel_path_objects = []
-
-    # get the correct parent_collection
-    try:
-        if is_collection:
-            parent_coll = tl_objects[0]
-        else:
-            parent_coll = pathutil.dirname(tl_objects[0])
-    except Exception:
-        parent_coll = '/'
-
-    main_collection_iterator = genquery.row_iterator(
-        "DATA_NAME, COLL_NAME",
-        "COLL_NAME = '" + parent_coll + "' AND META_DATA_ATTR_NAME = 'dataset_id' AND META_DATA_ATTR_VALUE = '" + dataset_id + "' ",
-        genquery.AS_LIST, ctx
-    )
-
-    subcollection_iterator = genquery.row_iterator(
-        "DATA_NAME, COLL_NAME",
-        "COLL_NAME LIKE '" + parent_coll + "/%' AND META_DATA_ATTR_NAME = 'dataset_id' AND META_DATA_ATTR_VALUE = '" + dataset_id + "' ",
-        genquery.AS_LIST, ctx
-    )
-
-    for row in itertools.chain(main_collection_iterator, subcollection_iterator):
-        # Add objects including relative paths
-        rel_path_objects.append(row[1][len(parent_coll):] + '/' + row[0])
-
-    return rel_path_objects
-
-
-def get_aggregated_object_count(ctx, dataset_id, tl_collection):
-    """Return total amounts of objects.
-
-    :param ctx:           Combined type of a callback and rei struct
-    :param dataset_id:    Dataset id
-    :param tl_collection: Collection name of top level
-
-    :returns: Aggregated object count
-    """
-    main_collection_iterator = genquery.row_iterator(
-        "DATA_ID",
-        "COLL_NAME = '" + tl_collection + "' AND META_DATA_ATTR_NAME = 'dataset_id' "
-        "AND META_DATA_ATTR_VALUE = '" + dataset_id + "' ",
-        genquery.AS_LIST, ctx
-    )
-
-    subcollection_iterator = genquery.row_iterator(
-        "DATA_ID",
-        "COLL_NAME like '" + tl_collection + "/%' AND META_DATA_ATTR_NAME = 'dataset_id' "
-        "AND META_DATA_ATTR_VALUE = '" + dataset_id + "' ",
-        genquery.AS_LIST, ctx
-    )
-
-    return len(list(main_collection_iterator) + list(subcollection_iterator))
-
-
-def get_aggregated_object_error_count(ctx, tl_collection):
-    """Return total amount of object errors.
-
-    :param ctx:           Combined type of a callback and rei struct
-    :param tl_collection: Collection name of top level
-
-    :returns: Total amount of object errors
-    """
-    main_collection_iterator = genquery.row_iterator(
-        "DATA_ID",
-        "COLL_NAME = '" + tl_collection + "' AND META_DATA_ATTR_NAME = 'error' ",
-        genquery.AS_LIST, ctx
-    )
-
-    subcollection_iterator = genquery.row_iterator(
-        "DATA_ID",
-        "COLL_NAME like '" + tl_collection + "/%' AND META_DATA_ATTR_NAME = 'error' ",
-        genquery.AS_LIST, ctx
-    )
-
-    return len(list(main_collection_iterator) + list(subcollection_iterator))
diff --git a/intake_utils.py b/intake_utils.py
deleted file mode 100644
index ff90cf7f6..000000000
--- a/intake_utils.py
+++ /dev/null
@@ -1,204 +0,0 @@
-# -*- coding: utf-8 -*-
-
-"""Utility functions for the intake module. These are in a separate file so that
-   we can test the main logic without having iRODS-related dependencies in the way."""
-
-__copyright__ = 'Copyright (c) 2019-2021, Utrecht University'
-__license__   = 'GPLv3, see LICENSE'
-
-import os
-import re
-
-
-def intake_tokens_identify_dataset(tokens):
-    """Check whether the tokens gathered so far are sufficient for identifying a dataset.
-
-    :param tokens: A dictionary of tokens
-
-    :returns: Returns whether a dataset is identified
-    """
-    required = ['wave', 'experiment_type', 'pseudocode']  # version is optional
-
-    missing = 0
-    for req_token in required:
-        # required tokens must be present and must have a value
-        if req_token not in tokens or tokens[req_token] == "":
-            missing = missing + 1
-
-    return (missing == 0)
-
-
-def intake_ensure_version_present(ctx, metadata):
-    """Adds a version attribute with a default value to metadata if it is not yet present.
-
-    :param ctx:           Combined type of a callback and rei struct
-    :param metadata:      Dictionary with intake module metadata
-    """
-    if "version" not in metadata:
-        metadata["version"] = "Raw"
-
-
-def intake_extract_tokens_from_name(ctx, path, scoped_buffer):
-    """Extract one or more tokens from a file / directory name and add dataset information as metadata.
-    :param ctx:           Combined type of a callback and rei struct
-    :param path:          Full path of the data object or collection
-    :param scoped_buffer: Holds dataset buffer with prefilled keys
-    :returns: Returns extended scope buffer
-    """
-    basename = os.path.basename(path)
-    name_without_ext = os.path.splitext(basename)[0]
-    parts = re.split("[_-]", name_without_ext)
-    for part in parts:
-        scoped_buffer.update(intake_extract_tokens(ctx, part))
-    return scoped_buffer
-
-
-def intake_extract_tokens(ctx, string):
-    """Extract tokens from a string and return as dict.
-
-    :param ctx:    Combined type of a callback and rei struct
-    :param string: Token of which to be determined whether experiment type, version etc
-
-    :returns: Returns found kv's
-    """
-    exp_types = ["pci",
-                 "echo",
-                 "facehouse",
-                 "faceemo",
-                 "coherence",
-                 "infprogap",
-                 "infsgaze",
-                 "infpop",
-                 # "mriinhibition",
-                 # "mriemotion",
-                 # "mockinhibition",
-                 "chprogap",
-                 "chantigap",
-                 "chsgaze",
-                 "pciconflict",
-                 "pcivacation",
-                 "peabody",
-                 "discount",
-                 "cyberball",
-                 "trustgame",
-                 "other",
-                 # MRI:
-                 "inhibmockbehav",
-                 "inhibmribehav",
-                 "emotionmribehav",
-                 "emotionmriscan",
-                 "anatomymriscan",
-                 "restingstatemriscan",
-                 "dtiamriscan",
-                 "dtipmriscan",
-                 "mriqcreport",
-                 "mriqceval",
-                 "vasmri",
-                 "vasmock",
-                 #
-                 "looklisten",
-                 "handgame",
-                 "infpeabody",
-                 "delaygratification",
-                 "dtimriscan",
-                 "inhibmriscan",
-                 # 16-Apr-2019 fbyoda email request new exp type:
-                 "chdualet",
-                 # 15-Feb-2021 fbyoda email request new exp type:
-                 "functionalmriscan",
-                 "infdualet",
-                 "vrbartbehav",
-                 "infssat"]
-
-    str_lower = string.lower()
-    str_upper = string.upper()
-    str_for_pseudocode_test = string.split('.')[0]
-    str_for_version_test = string.translate(None, ".")
-
-    foundKVs = {}
-    if re.match('^[0-9]{1,2}[wmy]$', str_lower) is not None:
-        # String contains a wave.
-        # Wave validity is checked later on in the dataset checks.
-        foundKVs["wave"] = str_lower
-    elif re.match('^[bap][0-9]{5}$', str_for_pseudocode_test.lower()) is not None:
-        # String contains a pseudocode.
-        foundKVs["pseudocode"] = str_upper[0:len(str_for_pseudocode_test)]
-    elif re.match('^[Vv][Ee][Rr][A-Z][a-zA-Z0-9-]*$', str_for_version_test) is not None:
-        foundKVs["version"] = string[3:len(string)]
-    elif str_lower in exp_types:
-        foundKVs["experiment_type"] = str_lower
-
-    return foundKVs
-
-
-def intake_scan_get_metadata_update(ctx, path, is_collection, in_dataset, parent_metadata):
-    """Determine metadata to be updated for a particular collection or data object, based
-       on its name and parent metadata.
-
-       This function is separate from the function that actually performs the updates, so
-       that we can test the logic separately.
-
-    :param ctx:    Combined type of a callback and rei struct
-    :param path:   Full path of the data object or collection
-    :param is_collection: true if it's a collection, false if it's a data object
-    :param in_dataset: true if the parent already has complete WEP(V) attributes. Otherwise false.
-    :param parent_metadata: dict containing the intake module metadata of the parent collection ( if any)
-
-    :returns: Returns a dictionary with the following keys / values:
-        new_metadata: dictionary of new metadata to apply to this data object or collection
-        in_dataset: true if current object (along with values passed from parents) has complete WEP(V) values.
-                    otherwise false.
-    """
-
-    local_metadata = parent_metadata.copy()
-
-    result = {"new_metadata": local_metadata, "in_dataset": in_dataset}
-
-    if in_dataset:
-        # If we already are in a dataset, we get all the metadata from the parent. We
-        # cannot override attributes in this case. However we need to remove the top-level
-        # attribute, because the present object is within in a dataset, and thus not a top-level
-        # data object.
-        if "dataset_toplevel" in local_metadata:
-            del [local_metadata["dataset_toplevel"]]
-    else:
-        intake_extract_tokens_from_name(ctx, path, local_metadata)
-        if intake_tokens_identify_dataset(local_metadata):
-            intake_ensure_version_present(ctx, local_metadata)
-            local_metadata["directory"] = path if is_collection else os.path.dirname(path)
-            local_metadata["dataset_id"] = dataset_make_id(local_metadata)
-            local_metadata["dataset_toplevel"] = dataset_make_id(local_metadata)
-            result["in_dataset"] = True
-        else:
-            # result["in_dataset"] is already set to false
-            pass
-
-    return result
-
-
-def dataset_make_id(scope):
-    """Construct a dataset based on WEPV and directory.
-
-    :param scope: Create a dataset id
-
-    :returns: Dataset identifier
-    """
-    return scope['wave'] + '\t' + scope['experiment_type'] + '\t' + scope['pseudocode'] + '\t' + scope['version'] + '\t' + scope['directory']
-
-
-def dataset_parse_id(dataset_id):
-    """Parse a dataset into its consructive data.
-
-    :param dataset_id: Dataset identifier
-
-    :returns: Dataset as a dict
-    """
-    dataset_parts = dataset_id.split('\t')
-    dataset = {}
-    dataset['wave'] = dataset_parts[0]
-    dataset['experiment_type'] = dataset_parts[1]
-    dataset['pseudocode'] = dataset_parts[2]
-    dataset['version'] = dataset_parts[3]
-    dataset['directory'] = dataset_parts[4]
-
-    return dataset
diff --git a/intake_vault.py b/intake_vault.py
deleted file mode 100644
index bc0e85258..000000000
--- a/intake_vault.py
+++ /dev/null
@@ -1,412 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Functions for intake vault."""
-
-__copyright__ = 'Copyright (c) 2021, Utrecht University'
-__license__   = 'GPLv3, see LICENSE'
-
-import itertools
-import time
-
-import genquery
-
-import intake
-import intake_lock
-import intake_scan
-from util import *
-
-__all__ = ['rule_intake_to_vault']
-
-
-@rule.make(inputs=range(2), outputs=range(2, 2))
-def rule_intake_to_vault(ctx, intake_root, vault_root):
-    # 1. add to_vault_freeze metadata lock to the dataset
-    # 2. check that dataset does not yet exist in the vault
-    # 3. copy dataset to vault with its metadata
-    # 4. remove dataset from intake
-    # upon any error:
-    # - delete partial data from vault
-    # - add error to intake dataset metadata
-    # - remove locks on intake dataset (to_vault_freeze, to_vault_lock)
-
-    # note that we have to allow for multiple types of datasets:
-    #    type A: a single toplevel collection with a tree underneath
-    #    type B: one or more datafiles located within the same collection
-    # processing varies slightly between them, so process each type in turn
-    #
-
-    # status: 0 is success, nonzero is error
-    status = 0
-    # counter of datasets moved to the vault area
-    datasets_moved = 0
-
-    # TYPE A:
-    c_main_collection_iterator = genquery.row_iterator(
-        "COLL_NAME, META_COLL_ATTR_VALUE",
-        "META_COLL_ATTR_NAME = 'dataset_toplevel' AND COLL_NAME = '" + intake_root + "'",
-        genquery.AS_LIST, ctx)
-
-    for row in itertools.chain(c_main_collection_iterator):
-        toplevel_collection = row[0]
-        dataset_id = row[1]
-        # Get status ( locked / frozen )
-        locked_state = intake_scan.object_is_locked(ctx, toplevel_collection, True)
-        if locked_state['locked']:
-            # Freeze the dataset
-            intake_lock.intake_dataset_freeze(ctx, toplevel_collection, dataset_id)
-
-            # Dataset frozen, now move to vault and remove from intake area
-            status = dataset_collection_move_2_vault(ctx, toplevel_collection, dataset_id, vault_root)
-            if status == 0:
-                datasets_moved += 1
-
-    # TYPE B:
-    d_main_collection_iterator = genquery.row_iterator(
-        "COLL_NAME, META_DATA_ATTR_VALUE",
-        "META_DATA_ATTR_NAME = 'dataset_toplevel' AND COLL_NAME = '" + intake_root + "'",
-        genquery.AS_LIST, ctx)
-
-    for row in itertools.chain(d_main_collection_iterator):
-        toplevel_collection = row[0]
-        dataset_id = row[1]
-        # check if to_vault_lock exists on all the dataobjects of this dataset
-        all_locked = True
-        iter2 = genquery.row_iterator(
-            "DATA_NAME",
-            "COLL_NAME = '" + toplevel_collection + "' "
-            "AND META_DATA_ATTR_NAME = 'dataset_toplevel' "
-            "AND META_DATA_ATTR_VALUE = '" + dataset_id + "'",
-            genquery.AS_LIST, ctx)
-
-        for row2 in iter2:
-            locked_state = intake_scan.object_is_locked(ctx, toplevel_collection + '/' + row2[0], False)
-            all_locked = all_locked and locked_state['locked']
-            if not all_locked:
-                break
-
-        if all_locked:
-            # Freeze the dataset
-            intake_lock.intake_dataset_freeze(ctx, toplevel_collection, dataset_id)
-
-            # Dataset frozen, now move to fault and remove from intake area
-            status = dataset_objects_only_move_2_vault(ctx, toplevel_collection, dataset_id, vault_root)
-            if status == 0:
-                datasets_moved += 1
-
-    if datasets_moved:
-        log.write(ctx, "Datasets moved to the vault: " + str(datasets_moved))
-
-    return 0
-
-
-def dataset_collection_move_2_vault(ctx, toplevel_collection, dataset_id, vault_root):
-    """Move intake datasets consisting of collections to the vault
-
-    :param ctx:                 Combined type of a callback and rei struct
-    :param toplevel_collection: Toplevel collection
-    :param dataset_id:          Identifier of dataset
-    :param vault_root:          Root path of vault
-
-    :returns: Status
-    """
-    status = 0
-    if vault_dataset_exists(ctx, vault_root, dataset_id):
-        # duplicate dataset, signal error and throw out of vault queue
-        log.write(ctx, "INFO: version already exists in vault: " + dataset_id)
-        message = "Duplicate dataset, version already exists in vault"
-        intake_scan.dataset_add_error(ctx, [toplevel_collection], True, message)
-        intake_lock.intake_dataset_melt(ctx, toplevel_collection, dataset_id)
-        intake_lock.intake_dataset_unlock(ctx, toplevel_collection, dataset_id)
-        return 1
-
-    # Dataset does not exist - move from research to vault area
-    vault_path = get_dataset_path(vault_root, dataset_id)
-
-    vault_parent = pathutil.chop(vault_path)[0]
-    try:
-        collection.create(ctx, vault_parent, "1")
-    except Exception:
-        log.write(ctx, "ERROR: parent collection could not be created " + vault_parent)
-        return 2
-
-    # variable for treewalk interface
-    buffer = {}
-    buffer["source"] = toplevel_collection
-    buffer["destination"] = vault_path
-
-    status = vault_tree_walk_collection(ctx, toplevel_collection, buffer, vault_walk_ingest_object)
-
-    # reset buffer
-    buffer = {}
-    if status == 0:
-        # stamp the vault dataset collection with additional metadata
-        avu.set_on_coll(ctx, vault_path, "dataset_date_created", str(int(time.time())))
-
-        # and finally remove the dataset original in the intake area
-        try:
-            collection.remove(ctx, toplevel_collection)
-        except Exception:
-            log.write(ctx, "ERROR: unable to remove intake collection " + toplevel_collection)
-            return 3
-    else:
-        # move failed (partially), cleanup vault
-        # NB: keep the dataset in the vault queue so we can retry some other time
-        log.write("ERROR: Ingest failed for " + dataset_id + ", error = " + status)
-        status = vault_tree_walk_collection(ctx, vault_path, buffer, vault_walk_remove_object)
-
-    return status
-
-
-def dataset_objects_only_move_2_vault(ctx, toplevel_collection, dataset_id, vault_root):
-    """Move intake datasets consisting of data objects to the vault
-
-    :param ctx:                 Combined type of a callback and rei struct
-    :param toplevel_collection: Toplevel collection
-    :param dataset_id:          Identifier of dataset
-    :param vault_root:          Root path of vault
-
-    :returns: Status
-    """
-    status = 0
-    if vault_dataset_exists(ctx, vault_root, dataset_id):
-        # duplicate dataset, signal error and throw out of vault queue
-        log.write(ctx, "INFO: version already exists in vault: " + dataset_id)
-        message = "Duplicate dataset, version already exists in vault"
-
-        tl_info = intake.get_dataset_toplevel_objects(ctx, toplevel_collection, dataset_id)
-        is_collection = tl_info['is_collection']
-        tl_objects = tl_info['objects']
-
-        # dataset_add_error(ctx, tl_objects, is_collection, "The wave '" + components['wave'] + "' is not in the list of accepted waves")
-
-        intake_scan.dataset_add_error(ctx, tl_objects, is_collection, message)
-        intake_lock.intake_dataset_melt(ctx, toplevel_collection, dataset_id)
-        intake_lock.intake_dataset_unlock(ctx, toplevel_collection, dataset_id)
-        return 1
-
-    # Dataset does not exist - move it from research to vault space
-    # new dataset(version) we can safely ingest into vault
-    vault_path = get_dataset_path(vault_root, dataset_id)
-
-    # create path to and including the toplevel collection (will create in-between levels)
-    try:
-        collection.create(ctx, vault_path, "1")
-    except Exception:
-        log.write(ctx, "ERROR: parent collection could not be created " + vault_path)
-        return 2
-
-    # stamp the vault dataset collection with default metadata
-    try:
-        vault_dataset_add_default_metadata(ctx, vault_path, dataset_id)
-    except Exception:
-        log.write(ctx, "ERROR: default metadata could not be added to " + vault_path)
-        return 3
-
-    # copy data objects to the vault
-    iter = genquery.row_iterator(
-        "DATA_NAME",
-        "COLL_NAME = '" + toplevel_collection + "' "
-        "AND META_DATA_ATTR_NAME = 'dataset_toplevel' "
-        "AND META_DATA_ATTR_VALUE = '" + dataset_id + "' ",
-        genquery.AS_LIST, ctx)
-
-    for row in iter:
-        intake_path = toplevel_collection + '/' + row[0]
-
-        status = vault_ingest_object(ctx, intake_path, False, vault_path + "/" + row[0])
-        if status:
-            break
-
-    # data ingested, what's left is to delete the original in intake area
-    # this will also melt/unfreeze etc because metadata is removed too
-    iter = genquery.row_iterator(
-        "DATA_NAME",
-        "COLL_NAME = '" + toplevel_collection + "' "
-        "AND META_DATA_ATTR_NAME = 'dataset_toplevel' "
-        "AND META_DATA_ATTR_VALUE = '" + dataset_id + "' ",
-        genquery.AS_LIST, ctx)
-
-    for row in iter:
-        intake_path = toplevel_collection + "/" + row[0]
-        # Now remove data object in intake
-        try:
-            data_object.remove(ctx, intake_path, force=True)
-        except Exception:
-            log.write(ctx, "ERROR: unable to remove intake object " + intake_path)
-            # error occurred during ingest, cleanup vault area and relay the error to user
-            # NB: keep the dataset in the vault queue so we can retry some other time
-            log.write(ctx, "ERROR: Ingest failed for *datasetId error = *status")
-
-            # reset buffer interface
-            buffer = {}
-            status = vault_tree_walk_collection(ctx, vault_path, buffer, vault_walk_remove_object)
-
-    # Finally return status
-    return status
-
-
-def vault_ingest_object(ctx, object_path, is_collection, vault_path):
-    # from the original object only the below list is copied to the vault object, other info is ignored
-    copied_metadata = ["wave", "experiment_type", "pseudocode", "version",
-                       "error", "warning", "comment", "dataset_error",
-                       "dataset_warning", "datasetid"]
-
-    if not is_collection:
-        # first chksum the original file then use it to verify the vault copy
-        try:
-            ctx.msiDataObjChksum(object_path, "forceChksum=", 0)
-            ctx.msiDataObjCopy(object_path, vault_path, 'verifyChksum=', 0)
-        except msi.Error:
-            return 1
-
-        coll, dataname = pathutil.chop(object_path)
-
-        iter = genquery.row_iterator(
-            "META_DATA_ATTR_NAME, META_DATA_ATTR_VALUE",
-            "COLL_NAME = '" + coll + "' AND DATA_NAME = '" + dataname + "' ",
-            genquery.AS_LIST, ctx)
-
-        for row in iter:
-            if row[0] in copied_metadata:
-                avu.set_on_data(ctx, vault_path, row[0], row[1])
-
-        # add metadata found in system info
-        iter = genquery.row_iterator(
-            "DATA_OWNER_NAME, DATA_OWNER_ZONE, DATA_CREATE_TIME",
-            "COLL_NAME = '" + coll + "' AND DATA_NAME = '" + dataname + "' ",
-            genquery.AS_LIST, ctx)
-
-        for row in iter:
-            avu.set_on_data(ctx, vault_path, "submitted_by=", row[0] + '#' + row[1])
-            avu.set_on_data(ctx, vault_path, "submitted_date", row[2])
-    else:
-        # CREATE COLLECTION
-        try:
-            collection.create(ctx, vault_path, "1")
-        except msi.Error:
-            return 1
-
-        iter = genquery.row_iterator(
-            "META_COLL_ATTR_NAME, META_COLL_ATTR_VALUE",
-            "COLL_NAME = '" + object_path + "' ",
-            genquery.AS_LIST, ctx)
-
-        for row in iter:
-            if row[0] in copied_metadata:
-                avu.set_on_coll(ctx, vault_path, row[0], row[1])
-
-        # add metadata found in system info
-        iter = genquery.row_iterator(
-            "COLL_OWNER_NAME, COLL_OWNER_ZONE, COLL_CREATE_TIME",
-            "COLL_NAME = '" + object_path + "' ",
-            genquery.AS_LIST, ctx)
-
-        for row in iter:
-            avu.set_on_coll(ctx, vault_path, "submitted_by=", row[0] + '#' + row[1])
-            avu.set_on_coll(ctx, vault_path, "submitted_date", row[2])
-
-    return 0
-
-
-def vault_walk_remove_object(ctx, item_parent, item_name, is_collection):
-    status = 0
-    try:
-        if is_collection:
-            collection.remove(ctx, item_parent + '/' + item_name)
-        else:
-            data_object.remove(ctx, item_parent + '/' + item_name, force=True)
-    except Exception:
-        status = 1
-
-    return status
-
-
-def vault_walk_ingest_object(ctx, item_parent, item_name, is_collection, buffer):
-    source_path = item_parent + '/' + item_name
-    dest_path = buffer["destination"]
-    if source_path != buffer["source"]:
-        # rewrite path to copy objects that are located underneath the toplevel collection
-        source_length = len(source_path)
-        relative_path = source_path[(len(buffer["source"]) + 1): source_length]
-        dest_path = buffer["destination"] + '/' + relative_path
-
-    return vault_ingest_object(ctx, source_path, is_collection, dest_path)
-
-
-def vault_tree_walk_collection(ctx, path, buffer, rule_to_process):
-    """Walk a subtree and perform 'rule_to_process' per item.
-
-    :param ctx:             Combined type of a callback and rei struct
-    :param path:            Path of collection to treewalk
-    :param buffer:          Exclusively to be used by the rule we will can
-    :param rule_to_process: Name of the rule to be executed in the context of a tree-item
-
-    :returns: Error status
-    """
-    parent_collection, collection = pathutil.chop(path)
-
-    error = 0
-    # first deal with any subcollections within this collection
-    iter = genquery.row_iterator(
-        "COLL_NAME",
-        "COLL_PARENT_NAME = '" + path + "' ",
-        genquery.AS_LIST, ctx)
-    for row in iter:
-        error = vault_tree_walk_collection(ctx, row[0], buffer, rule_to_process)
-        if error:
-            break
-
-    # when done then process the dataobjects directly located within this collection
-    if error == 0:
-        iter = genquery.row_iterator(
-            "DATA_NAME",
-            "COLL_NAME = '" + path + "' ",
-            genquery.AS_LIST, ctx)
-        for row in iter:
-            error = rule_to_process(ctx, path, row[0], False, buffer)
-            if error:
-                break
-
-    # and lastly process the collection itself
-    if error == 0:
-        error = rule_to_process(ctx, parent_collection, collection, True, buffer)
-
-    return error
-
-
-def vault_dataset_add_default_metadata(ctx, vault_path, dataset_id):
-    id_components = intake_scan.dataset_parse_id(dataset_id)
-    # my_date = datetime.now()
-    # id_components["dataset_date_created"] = my_date.strftime('%Y-%m-%dT%H:%M:%S.%f%z')
-    id_components["dataset_date_created"] = str(int(time.time()))
-
-    keys = ["wave", "experiment_type", "pseudocode", "version", "dataset_date_created"]
-    for key in keys:
-        try:
-            avu.set_on_data(ctx, vault_path, key, id_components[key])
-        except Exception:
-            avu.set_on_coll(ctx, vault_path, key, id_components[key])
-
-
-def vault_dataset_exists(ctx, vault_root, dataset_id):
-    id_components = intake_scan.dataset_parse_id(dataset_id)
-    # Beware! extra 'ver' before version from original code: *wepv = *wave ++ *sep ++ *experimentType ++ *sep ++ *pseudocode ++ *sep ++ "ver*version";
-    wepv = id_components["wave"] + "_" + id_components["experiment_type"] + "_" + id_components["pseudocode"] + "_ver" + id_components["version"]
-    dataset_path = vault_root + '/' + id_components["wave"] + "/" + id_components["experiment_type"] + "/" + id_components["pseudocode"] + "/" + wepv
-
-    iter = genquery.row_iterator(
-        "COLL_NAME",
-        "COLL_NAME = '" + dataset_path + "' ",
-        genquery.AS_LIST, ctx)
-
-    for _row in iter:
-        return True
-
-    return False
-
-
-def get_dataset_path(root, dataset_id):
-    id_components = intake_scan.dataset_parse_id(dataset_id)
-    # Beware! extra 'ver' before version from original code: *wepv = *wave ++ *sep ++ *experimentType ++ *sep ++ *pseudocode ++ *sep ++ "ver*version";
-    wepv = id_components["wave"] + "_" + id_components["experiment_type"] + "_" + id_components["pseudocode"] + "_ver" + id_components["version"]
-
-    return root + '/' + id_components["wave"] + "/" + id_components["experiment_type"] + "/" + id_components["pseudocode"] + "/" + wepv
diff --git a/policies_intake.py b/policies_intake.py
index 5b490e63e..159ddbca9 100644
--- a/policies_intake.py
+++ b/policies_intake.py
@@ -1,15 +1,54 @@
 # -*- coding: utf-8 -*-
-"""iRODS policy implementations."""
+"""Policies for intake."""
 
-__copyright__ = 'Copyright (c) 2021, Utrecht University'
+__copyright__ = 'Copyright (c) 2021-2024, Utrecht University'
 __license__   = 'GPLv3, see LICENSE'
 
 import genquery
 
-import intake_scan
 from util import *
 
 
+def object_is_locked(ctx, path, is_collection):
+    """Returns whether given object in path (collection or dataobject) is locked or frozen
+
+    :param ctx:           Combined type of a callback and rei struct
+    :param path:          Path to object or collection
+    :param is_collection: Whether path contains a collection or data object
+
+    :returns: Returns locked state
+    """
+    locked_state = {"locked": False,
+                    "frozen": False}
+
+    if is_collection:
+        iter = genquery.row_iterator(
+            "META_COLL_ATTR_NAME",
+            "COLL_NAME = '" + path + "'",
+            genquery.AS_LIST, ctx
+        )
+        for row in iter:
+            if row[0] in ['to_vault_lock', 'to_vault_freeze']:
+                locked_state['locked'] = True
+                if row[0] == 'to_vault_freeze':
+                    locked_state['frozen'] = True
+    else:
+        parent_coll = pathutil.dirname(path)
+        iter = genquery.row_iterator(
+            "META_DATA_ATTR_NAME",
+            "COLL_NAME = '" + parent_coll + "' AND DATA_NAME = '" + pathutil.basename(path) + "'",
+            genquery.AS_LIST, ctx
+        )
+        # return locked_state
+        for row in iter:
+            if row[0] in ['to_vault_lock', 'to_vault_freeze']:
+                locked_state['locked'] = True
+                if row[0] == 'to_vault_freeze':
+                    locked_state['frozen'] = True
+
+    return locked_state
+
+
 def is_data_in_locked_dataset(ctx, actor, path):
     """ Check whether given data object is within a locked dataset """
     dataset_id = ''
@@ -64,7 +103,7 @@ def is_data_in_locked_dataset(ctx, actor, path):
                 toplevel_is_collection = False
 
         if toplevel_collection:
-            locked_state = intake_scan.object_is_locked(ctx, toplevel_collection, toplevel_is_collection)
+            locked_state = object_is_locked(ctx, toplevel_collection, toplevel_is_collection)
             log.debug(ctx, locked_state)
             return (locked_state['locked'] or locked_state['frozen']) and not user.is_admin(ctx, actor)
         else:
@@ -117,7 +156,7 @@ def is_coll_in_locked_dataset(ctx, actor, coll):
                 toplevel_is_collection = False
 
         if toplevel_collection:
-            locked_state = intake_scan.object_is_locked(ctx, toplevel_collection, toplevel_is_collection)
+            locked_state = object_is_locked(ctx, toplevel_collection, toplevel_is_collection)
             log.debug(ctx, locked_state)
             return (locked_state['locked'] or locked_state['frozen']) and not user.is_admin(ctx, actor)
         else:
@@ -169,7 +208,7 @@ def coll_in_path_of_locked_dataset(ctx, actor, coll):
                 toplevel_is_collection = False
 
         if toplevel_collection:
-            locked_state = intake_scan.object_is_locked(ctx, toplevel_collection, toplevel_is_collection)
+            locked_state = object_is_locked(ctx, toplevel_collection, toplevel_is_collection)
             log.debug(ctx, locked_state)
             return (locked_state['locked'] or locked_state['frozen']) and not user.is_admin(ctx, actor)
         else:
diff --git a/rules_uu.cfg.template b/rules_uu.cfg.template
index 50a1b863d..8524920ff 100644
--- a/rules_uu.cfg.template
+++ b/rules_uu.cfg.template
@@ -36,7 +36,6 @@ eus_api_tls_verify             =
 
 enable_deposit                 =
 enable_open_search             =
-enable_intake                  =
 enable_datarequest             =
 yoda_portal_fqdn               =
 
diff --git a/setup.cfg b/setup.cfg
index ab6ee494a..a9ef75804 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -5,4 +5,4 @@ strictness=short
 docstring_style=sphinx
 max-line-length=127
 exclude=__init__.py,tools,tests/env/
-application-import-names=avu,conftest,util,api,config,constants,data_access_token,datacite,datarequest,data_object,epic,error,folder,groups,groups_import,intake,intake_dataset,intake_lock,intake_scan,intake_utils,intake_vault,json_datacite,json_landing_page,jsonutil,log,mail,meta,meta_form,msi,notifications,schema,schema_transformation,schema_transformations,settings,pathutil,provenance,policies_intake,policies_datamanager,policies_datapackage_status,policies_folder_status,policies_datarequest_status,publication,query,replication,revisions,revision_strategies,revision_utils,rule,user,vault,sram,arb_data_manager,cached_data_manager,resource,yoda_names,policies_utils
+application-import-names=avu,conftest,util,api,config,constants,data_access_token,datacite,datarequest,data_object,epic,error,folder,groups,groups_import,json_datacite,json_landing_page,jsonutil,log,mail,meta,meta_form,msi,notifications,schema,schema_transformation,schema_transformations,settings,pathutil,provenance,policies_intake,policies_datamanager,policies_datapackage_status,policies_folder_status,policies_datarequest_status,publication,query,replication,revisions,revision_strategies,revision_utils,rule,user,vault,sram,arb_data_manager,cached_data_manager,resource,yoda_names,policies_utils
diff --git a/tests/conftest.py b/tests/conftest.py
index a7c983b15..a13b3b9da 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -25,7 +25,6 @@
 
 datarequest = False
 deposit = False
-intake = False
 archive = False
 smoke = False
 skip_api = False
@@ -45,7 +44,6 @@
 def pytest_addoption(parser):
     parser.addoption("--datarequest", action="store_true", default=False, help="Run datarequest tests")
     parser.addoption("--deposit", action="store_true", default=False, help="Run deposit tests")
-    parser.addoption("--intake", action="store_true", default=False, help="Run intake tests")
     parser.addoption("--archive", action="store_true", default=False, help="Run vault archive tests")
     parser.addoption("--no-env-csrf", action="store_true", default=False, help="Do not get CSRF token from environment (this is enabled by default for smoke tests)")
     parser.addoption("--smoke", action="store_true", default=False, help="Run Smoke tests")
@@ -59,7 +57,6 @@ def pytest_addoption(parser):
 def pytest_configure(config):
     config.addinivalue_line("markers", "datarequest: Run datarequest tests")
     config.addinivalue_line("markers", "deposit: Run deposit tests")
-    config.addinivalue_line("markers", "intake: Run intake tests")
     config.addinivalue_line("markers", "archive: Run vault archive tests")
     config.addinivalue_line("markers", "all: Run all tests")
     config.addinivalue_line("markers", "ui: UI test")
@@ -86,10 +83,9 @@ def pytest_configure(config):
     global verbose_test
     verbose_test = config.getoption("--verbose-test")
 
-    global datarequest, deposit, intake, archive, smoke, run_all, skip_api, skip_ui, no_env_csrf
+    global datarequest, deposit, archive, smoke, run_all, skip_api, skip_ui, no_env_csrf
     datarequest = config.getoption("--datarequest")
     deposit = config.getoption("--deposit")
-    intake = config.getoption("--intake")
     archive = config.getoption("--archive")
     smoke = config.getoption("--smoke")
     skip_ui = config.getoption("--skip-ui")
@@ -109,7 +105,6 @@ def pytest_configure(config):
     if run_all:
         datarequest = True
         deposit = True
-        intake = True
         archive = True
 
     # Store cookies for each user.
@@ -131,10 +126,6 @@ def pytest_bdd_apply_tag(tag, function):
         marker = pytest.mark.skip(reason="Skip deposit")
         marker(function)
         return True
-    elif tag == 'intake' and not intake:
-        marker = pytest.mark.skip(reason="Skip intake")
-        marker(function)
-        return True
     elif tag == 'archive' and not archive:
         marker = pytest.mark.skip(reason="Skip vault archive")
         marker(function)
diff --git a/tests/features/api/api_intake.feature b/tests/features/api/api_intake.feature
deleted file mode 100644
index 7f3e1e007..000000000
--- a/tests/features/api/api_intake.feature
+++ /dev/null
@@ -1,195 +0,0 @@
-@api @intake
-Feature: Intake API
-
-    Scenario Outline: Find all studies a user is involved with
-        Given user <user> is authenticated
-        And the Yoda intake list studies API is queried
-        Then the response status code is "200"
-        And study <study> is returned
-
-        Examples:
-            | user        | study   |
-            | researcher  | initial |
-            | researcher  | test    |
-            | datamanager | initial |
-            | datamanager | test    |
-
-
-    Scenario Outline: Find all studies a user is datamanager of
-        Given user <user> is authenticated
-        And the Yoda intake list datamanager studies API is queried
-        Then the response status code is "200"
-        And study <study> is returned
-
-        Examples:
-            | user        | study   |
-            | datamanager | initial |
-            | datamanager | test    |
-
-
-    Scenario Outline: Get the total count of all files in a collection
-        Given user <user> is authenticated
-        And the Yoda intake count total files API is queried with collection <collection>
-        Then the response status code is "200"
-        # And ...
-
-        Examples:
-            | user        | collection                        |
-            | datamanager | /tempZone/home/grp-intake-initial |
-            | researcher  | /tempZone/home/grp-intake-initial |
-
-
-    Scenario Outline: Get list of all unrecognized and unscanned files
-        Given user <user> is authenticated
-        And the Yoda intake list unrecognized files API is queried with collection <collection>
-        Then the response status code is "200"
-        # And ...
-
-        Examples:
-            | user        | collection                        |
-            | datamanager | /tempZone/yoda/grp-intake-initial |
-            | researcher  | /tempZone/yoda/grp-intake-initial |
-
-
-    Scenario Outline: Get list of all datasets
-        Given user <user> is authenticated
-        And the Yoda intake list datasets API is queried with collection <collection>
-        Then the response status code is "200"
-        # And ...
-
-        Examples:
-            | user        | collection                        |
-            | datamanager | /tempZone/home/grp-intake-initial |
-            | researcher  | /tempZone/home/grp-intake-initial |
-
-
-    Scenario Outline: Scan for and recognize datasets in study intake area
-        Given user <user> is authenticated
-        And the Yoda intake scan for datasets API is queried with collection <collection>
-        Then the response status code is "200"
-        # And ...
-
-        Examples:
-            | user        | collection                        |
-            | datamanager | /tempZone/home/grp-intake-initial |
-            | researcher  | /tempZone/home/grp-intake-initial |
-
-
-    Scenario Outline: Lock dataset in study intake area
-        Given user <user> is authenticated
-        And the Yoda intake lock API is queried with dataset id <dataset_id> and collection <collection>
-        Then the response status code is "200"
-        # And ...
-
-        Examples:
-            | user        | collection                        | dataset_id              |
-            | datamanager | /tempZone/home/grp-intake-initial | 3y*discount*B00000*Raw  |
-            | researcher  | /tempZone/home/grp-intake-initial | 3y*discount*B00001*Raw  |
-
-
-    Scenario Outline: Cannot lock non-existent dataset
-        Given user <user> is authenticated
-        And the Yoda intake lock API is queried with dataset id <dataset_id> and collection <collection>
-        # Errors during locking individual datasets do not result in an error status code. This test
-        # codifies current behaviour of this API endpoint.
-        Then the response status code is "200"
-        And the result is equivalent to {"error_dataset_ids": ["3y\ndiscount\nB99999\nRaw"], "error_msg": "Something went wrong locking datasets", "proc_status": "NOK"}
-
-        Examples:
-            | user        | collection                        | dataset_id             |
-            | datamanager | /tempZone/home/grp-intake-initial | 3y*discount*B99999*Raw |
-
-
-    Scenario Outline: Unlock dataset in study intake area
-        Given user <user> is authenticated
-        And the Yoda intake unlock API is queried with dataset id <dataset_id> and collection <collection>
-        Then the response status code is "200"
-        # And ...
-
-        Examples:
-            | user        | collection                        | dataset_id             |
-            | datamanager | /tempZone/home/grp-intake-initial | 3y*discount*B00000*Raw |
-            | researcher  | /tempZone/home/grp-intake-initial | 3y*discount*B00001*Raw |
-
-
-    Scenario Outline: Cannot unlock non-existent dataset
-        Given user <user> is authenticated
-        And the Yoda intake unlock API is queried with dataset id <dataset_id> and collection <collection>
-        # Errors during unlocking individual datasets do not result in an error status code. This test
-        # codifies current behaviour of this API endpoint.
-        Then the response status code is "200"
-        And the result is equivalent to {"error_dataset_ids": ["3y\ndiscount\nB99999\nRaw"], "error_msg": "Something went wrong unlocking datasets", "proc_status": "NOK"}
-
-        Examples:
-            | user        | collection                        | dataset_id             |
-            | datamanager | /tempZone/home/grp-intake-initial | 3y*discount*B99999*Raw |
-
-
-    Scenario Outline: Get all details for a dataset
-        Given user <user> is authenticated
-        And the Yoda intake dataset get details API is queried with dataset id <dataset_id> and collection <collection>
-        Then the response status code is "200"
-        # And ...
-
-        Examples:
-            | user        | collection                             | dataset_id              |
-            | datamanager | /tempZone/home/grp-intake-initial      | 3y*discount*B00000*Raw  |
-            | researcher  | /tempZone/home/grp-intake-initial      | 3y*discount*B00001*Raw  |
-
-
-    Scenario Outline: Add a comment to a dataset
-        Given user <user> is authenticated
-        And the Yoda intake dataset add comment API is queried with dataset id <dataset_id>, study id <study_id> and comment <comment>
-        Then the response status code is "200"
-        # And ...
-
-        Examples:
-            | user        | study_id            | comment  | dataset_id             |
-            | datamanager | grp-intake-initial  | comment1 | 3y*discount*B00000*Raw |
-            | researcher  | grp-intake-initial  | comment2 | 3y*discount*B00001*Raw |
-
-
-    Scenario Outline: Cannot add comment to nonexistent dataset
-        Given user <user> is authenticated
-        And the Yoda intake dataset add comment API is queried with dataset id <dataset_id>, study id <study_id> and comment <comment>
-        # Adding a comment to a nonexistent dataset currently does not result in an error status code. This test
-        # codifies current behaviour of this API endpoint.
-        Then the response status code is "200"
-        And the result is equivalent to {"error_msg": "Dataset does not exist", "proc_status": "NOK"}
-
-        Examples:
-            | user        | study_id            | comment  | dataset_id             |
-            | datamanager | grp-intake-initial  | comment1 | 3y*discount*B99999*Raw |
-
-
-    Scenario Outline: Get vault dataset related counts for reporting for a study
-        Given user <user> is authenticated
-        And the Yoda intake report vault dataset counts per study API is queried with study id <study_id>
-        Then the response status code is "200"
-        # And ...
-
-        Examples:
-            | user        | study_id              |
-            | datamanager | grp-intake-initial    |
-
-
-    Scenario Outline: Get aggregated vault dataset info for reporting for a study
-        Given user <user> is authenticated
-        And the Yoda intake report vault aggregated info API is queried with study id <study_id>
-        Then the response status code is "200"
-        # And ...
-
-        Examples:
-            | user        | study_id            |
-            | datamanager | grp-intake-initial  |
-
-
-    Scenario Outline: Get vault data for export of a study
-        Given user <user> is authenticated
-        And the Yoda intake report export study data API is queried with study id <study_id>
-        Then the response status code is "200"
-        # And ...
-
-        Examples:
-            | user        | study_id            |
-            | datamanager | grp-intake-initial  |
diff --git a/tests/features/api/api_resources.feature b/tests/features/api/api_resources.feature
index 52ab3356e..ba3c35432 100644
--- a/tests/features/api/api_resources.feature
+++ b/tests/features/api/api_resources.feature
@@ -21,7 +21,7 @@ Feature: Resources API
         And only 1 group is found
 
         Examples:
-            | user        | group           |        
+            | user        | group           |
             | researcher  | research-core-1 |
             | datamanager | research-core-1 |
 
@@ -40,7 +40,6 @@ Feature: Resources API
             | datamanager | deposit-pilot1 |
 
 
-    @intake
     Scenario Outline: Get paginated result when searching for one specific intake / grp group
         Given user <user> is authenticated
         And the Yoda resources API is queried for a paginated range of research groups filtered on group <group>
@@ -82,36 +81,35 @@ Feature: Resources API
             | datamanager | deposit-pilot |
 
 
-    @intake
     Scenario Outline: Get a full year of storage data for intake group
         Given user <user> is authenticated
         And the Yoda resources full year differentiated group data API is queried with <group>
-	    Then the response status code is "200"
-	    And storage data for group is found
+	      Then the response status code is "200"
+	      And storage data for group is found
 
         Examples:
             | user        | group             |
             | researcher  | research-initial  |
             | datamanager | research-initial  |
-    
+
     @deposit
     Scenario Outline: Get a full year of differentiated storage data starting from current month and look back one year
         Given user <user> is authenticated
         And the Yoda resources full year differentiated group data API is queried with <group>
-	    Then the response status code is "200"
-	    And storage data for group is found
+	      Then the response status code is "200"
+	      And storage data for group is found
 
         Examples:
             | user        | group                  |
             | researcher  | research-deposit-test  |
             | datamanager | research-deposit-test  |
 
-    @intake
+
     Scenario Outline: Get a full year of differentiated storage data starting from current month and look back one year
         Given user <user> is authenticated
         And the Yoda resources full year differentiated group data API is queried with <group>
-	    Then the response status code is "200"
-	    And storage data for group is found
+	      Then the response status code is "200"
+	      And storage data for group is found
 
         Examples:
             | user        | group              |
@@ -136,8 +134,8 @@ Feature: Resources API
    Scenario Outline: Collect storage stats for all twelve months based upon categories a user is datamanager of
         Given user <user> is authenticated
         And the Yoda resources monthly category stats API is queried
-	    Then the response status code is "200"
-	    And storage data for export is found
+	      Then the response status code is "200"
+	      And storage data for export is found
 
         Examples:
             | user           |
@@ -152,7 +150,7 @@ Feature: Resources API
         And group data are sorted by <sort_on> in <sort_order> order
 
         Examples:
-            | user        | sort_on | sort_order | 
+            | user        | sort_on | sort_order |
             | researcher  | name    | asc        |
             | researcher  | name    | desc       |
             | researcher  | size    | asc        |
diff --git a/tests/features/ui/ui_intake.feature b/tests/features/ui/ui_intake.feature
deleted file mode 100644
index fea264427..000000000
--- a/tests/features/ui/ui_intake.feature
+++ /dev/null
@@ -1,43 +0,0 @@
-@ui @intake
-Feature: Intake UI
-
-    @fail
-    Scenario: Intake scan only and find datasets and unrecognized files
-        Given user datamanager is logged in
-        And module "intake" is shown
-        When activate study "test"
-        And total datasets is "0"
-        When activate study "initial"
-        And total datasets is "0"
-        And unscanned files are present
-        When scanned for datasets
-        Then scan button is disabled
-        When scanning for datasets is successful
-        And total datasets is "3"
-        And unrecognized files are present
-
-        When click for details of first dataset row
-
-        When add "COMMENTS" to comment field and press comment button
-
-        When check first dataset for locking
-        And lock and unlock buttons are "enabled"
-
-        When uncheck first dataset for locking
-        And lock and unlock buttons are "disabled"
-
-        When check all datasets for locking
-
-        Then click lock button
-        And wait for all datasets to be in locked state successfully
-        And wait for all datasets to be in frozen state
-        And wait for frozen sets to be added to vault
-
-    Scenario: Intake reporting
-        Given user datamanager is logged in
-        And module "intake" is shown
-
-        When open intake reporting area
-        When check reporting result
-        When export all data and download file
-        When return to intake area
diff --git a/tests/features/ui/ui_statistics.feature b/tests/features/ui/ui_statistics.feature
index 516e7d47f..843880d77 100644
--- a/tests/features/ui/ui_statistics.feature
+++ b/tests/features/ui/ui_statistics.feature
@@ -28,7 +28,6 @@ Feature: Statistics UI
             | datamanager | deposit-pilot |
 
 
-    @intake
     Scenario Outline: Viewing storage details of a intake / grp group
         Given user <user> is logged in
         And module "stats" is shown
@@ -55,7 +54,6 @@ Feature: Statistics UI
             | datamanager    | test-automation |
 
 
-    @intake
     Scenario Outline: Viewing intake category storage details as a technicaladmin or datamanager
         Given user <user> is logged in
         When module "stats" is shown
diff --git a/tests/step_defs/api/test_api_intake.py b/tests/step_defs/api/test_api_intake.py
deleted file mode 100644
index b26223004..000000000
--- a/tests/step_defs/api/test_api_intake.py
+++ /dev/null
@@ -1,157 +0,0 @@
-# coding=utf-8
-"""Intake API feature tests."""
-
-__copyright__ = 'Copyright (c) 2020-2022, Utrecht University'
-__license__   = 'GPLv3, see LICENSE'
-
-import json
-
-from deepdiff import DeepDiff
-from pytest_bdd import (
-    given,
-    parsers,
-    scenarios,
-    then,
-)
-
-from conftest import api_request
-
-scenarios('../../features/api/api_intake.feature')
-
-
-@given('the Yoda intake list studies API is queried', target_fixture="api_response")
-def api_intake_list_studies(user):
-    return api_request(
-        user,
-        "intake_list_studies",
-        {}
-    )
-
-
-@given('the Yoda intake list datamanager studies API is queried', target_fixture="api_response")
-def api_intake_list_dm_studies(user):
-    return api_request(
-        user,
-        "intake_list_dm_studies",
-        {}
-    )
-
-
-@given(parsers.parse("the Yoda intake count total files API is queried with collection {collection}"), target_fixture="api_response")
-def api_intake_count_total_files(user, collection):
-    return api_request(
-        user,
-        "intake_count_total_files",
-        {"coll": collection}
-    )
-
-
-@given(parsers.parse("the Yoda intake list unrecognized files API is queried with collection {collection}"), target_fixture="api_response")
-def api_intake_list_unrecognized_files(user, collection):
-    return api_request(
-        user,
-        "intake_list_unrecognized_files",
-        {"coll": collection}
-    )
-
-
-@given(parsers.parse("the Yoda intake list datasets API is queried with collection {collection}"), target_fixture="api_response")
-def api_intake_list_datasets(user, collection):
-    return api_request(
-        user,
-        "intake_list_datasets",
-        {"coll": collection}
-    )
-
-
-@given(parsers.parse("the Yoda intake scan for datasets API is queried with collection {collection}"), target_fixture="api_response")
-def api_intake_scan_for_datasets(user, collection):
-    return api_request(
-        user,
-        "intake_scan_for_datasets",
-        {"coll": collection}
-    )
-
-
-@given(parsers.parse("the Yoda intake lock API is queried with dataset id {dataset_id} and collection {collection}"), target_fixture="api_response")
-def api_intake_lock_dataset(user, dataset_id, collection):
-    return api_request(
-        user,
-        "intake_lock_dataset",
-        {"path": collection, "dataset_ids": dataset_id.replace("*", "\n")}
-    )
-
-
-@given(parsers.parse("the Yoda intake unlock API is queried with dataset id {dataset_id} and collection {collection}"), target_fixture="api_response")
-def api_intake_unlock_dataset(user, dataset_id, collection):
-    return api_request(
-        user,
-        "intake_unlock_dataset",
-        {"path": collection, "dataset_ids": dataset_id.replace("*", "\n")}
-    )
-
-
-@given(parsers.parse("the Yoda intake dataset get details API is queried with dataset id {dataset_id} and collection {collection}"), target_fixture="api_response")
-def api_intake_dataset_get_details(user, dataset_id, collection):
-    return api_request(
-        user,
-        "intake_dataset_get_details",
-        {"coll": collection, "dataset_id": dataset_id.replace("*", "\t")}
-    )
-
-
-@given(parsers.parse("the Yoda intake dataset add comment API is queried with dataset id {dataset_id}, study id {study_id} and comment {comment}"), target_fixture="api_response")
-def api_intake_dataset_add_comment(user, dataset_id, study_id, comment):
-    return api_request(
-        user,
-        "intake_dataset_add_comment",
-        {"study_id": study_id, "dataset_id": dataset_id.replace("*", "\n"), "comment": comment}
-    )
-
-
-@given(parsers.parse("the Yoda intake report vault dataset counts per study API is queried with study id {study_id}"), target_fixture="api_response")
-def api_intake_report_vault_dataset_counts_per_study(user, study_id):
-    return api_request(
-        user,
-        "intake_report_vault_dataset_counts_per_study",
-        {"study_id": study_id}
-    )
-
-
-@given(parsers.parse("the Yoda intake report vault aggregated info API is queried with study id {study_id}"), target_fixture="api_response")
-def api_intake_report_vault_aggregated_info(user, study_id):
-    return api_request(
-        user,
-        "intake_report_vault_aggregated_info",
-        {"study_id": study_id}
-    )
-
-
-@given(parsers.parse("the Yoda intake report export study data API is queried with study id {study_id}"), target_fixture="api_response")
-def api_intake_report_export_study_data(user, study_id):
-    return api_request(
-        user,
-        "intake_report_export_study_data",
-        {"study_id": study_id}
-    )
-
-
-@then(parsers.parse("study {study} is returned"))
-def study_returned(api_response, study):
-    _, body = api_response
-
-    assert study in body['data']
-
-
-@then('debug')
-def debug(api_response):
-    _, body = api_response
-
-    assert 0, body
-
-
-@then(parsers.parse("the result is equivalent to {result}"))
-def result_equivalent_to(api_response, result):
-    _, body = api_response
-
-    assert DeepDiff(json.loads(result), body['data']) == {}
diff --git a/tests/step_defs/ui/test_ui_intake.py b/tests/step_defs/ui/test_ui_intake.py
deleted file mode 100644
index d6d0f226d..000000000
--- a/tests/step_defs/ui/test_ui_intake.py
+++ /dev/null
@@ -1,191 +0,0 @@
-# coding =utf-8
-"""Vault UI feature tests."""
-
-__copyright__ = 'Copyright (c) 2020-2021, Utrecht University'
-__license__   = 'GPLv3, see LICENSE'
-
-import time
-
-from pytest_bdd import (
-    parsers,
-    scenarios,
-    then,
-    when,
-)
-
-scenarios('../../features/ui/ui_intake.feature')
-
-
-# GENERIC FUNCTIONS
-def get_unscanned_from_error_area_text(browser):
-    # Unrecognised and unscanned (17) files or Unrecognised (12) and unscanned (-) files
-    error_area_text = browser.find_by_id('scan_result_text')
-    parts = error_area_text.value.split(' and ')
-    s = parts[1]
-    return s[s.find("(") + 1:s.find(")")]
-
-
-def get_unrecognized_from_error_area_text(browser):
-    error_area_text = browser.find_by_id('scan_result_text')
-    parts = error_area_text.value.split(' and ')
-    s = parts[0]
-    first_bracket = s.find("(")
-    if first_bracket == -1:
-        return "0"
-    return s[first_bracket + 1:s.find(")")]
-
-
-# SCENARIO 1
-@when(parsers.parse('activate study "{study}"'))
-def ui_intake_activate_study(browser, study):
-    dropdown = browser.find_by_id('dropdown-select-study')
-    dropdown.click()
-    table = browser.find_by_id('select-study')
-    rows = table.find_by_tag('tr')
-    for row in rows:
-        if row.has_class('ta-' + study):
-            row.find_by_tag('td').click()
-            return True
-    assert False
-
-
-@when(parsers.parse('total datasets is "{dataset_count}"'))
-def ui_intake_total_dataset_count(browser, dataset_count):
-    dataset_count_area = browser.find_by_id('datatable_info')
-    if dataset_count == '0':
-        assert dataset_count_area.value == 'No datasets present'
-    else:
-        assert dataset_count_area.value == "Total datasets: " + dataset_count
-
-
-@when('unscanned files are present')  # ben ik hier niet de prerequisite aan het testen???
-def ui_intake_unscanned_files_present(browser):
-    assert int(get_unscanned_from_error_area_text(browser)) > 0
-
-
-@when('scanned for datasets')
-def ui_intake_scanned_for_datasets(browser):
-    browser.find_by_id('btn-start-scan').click()
-
-
-@then('scan button is disabled')
-def ui_intake_scan_button_is_disabled(browser):
-    assert browser.find_by_id('btn-start-scan').has_class('disabled')
-
-
-@when('scanning for datasets is successful')
-def ui_intake_scanning_is_successful(browser):
-    assert browser.is_text_present('Successfully scanned for datasets.', wait_time=20)
-
-
-@when('unrecognized files are present')
-def ui_intake_unrecognized_files_are_present(browser):
-    assert int(get_unrecognized_from_error_area_text(browser)) > 0
-
-
-@when('click for details of first dataset row')
-def ui_intake_click_for_details_of_first_dataset_row(browser):
-    browser.find_by_id('datatable')[0].click()
-
-
-@when(parsers.parse('add "{comments}" to comment field and press comment button'))
-def ui_intake_add_comments_to_dataset(browser, comments):
-    browser.find_by_name('comments').fill(comments)
-    browser.find_by_css(".btn-add-comment").click()
-
-
-@when('check first dataset for locking')
-def ui_check_first_dataset_for_locking(browser):
-    browser.find_by_css('.cbDataSet')[0].click()
-
-
-@when(parsers.parse('lock and unlock buttons are "{enabled_state}"'))
-def ui_intake_lock_and_unlock_buttons_are(browser, enabled_state):
-    if enabled_state == 'enabled':
-        assert not browser.find_by_id('btn-unlock').has_class('disabled')
-        assert not browser.find_by_id('btn-lock').has_class('disabled')
-    else:
-        assert browser.find_by_id('btn-unlock').has_class('disabled')
-        assert browser.find_by_id('btn-lock').has_class('disabled')
-
-
-@when('uncheck first dataset for locking')
-def ui_uncheck_first_dataset_for_locking(browser):
-    # if not checkbox.is_selected() meenemen hier
-    browser.find_by_css('.cbDataSet')[0].click()
-
-
-@when('check all datasets for locking')
-def ui_check_all_datasets_for_locking(browser):
-    browser.find_by_css('.control-all-cbDataSets').click()
-
-
-@then('click lock button')
-def ui_intake_click_lock_button(browser):
-    browser.find_by_id("btn-lock").click()
-
-
-@then('wait for all datasets to be in locked state successfully')
-def ui_intake_wait_all_datasets_in_locked_state(browser):
-    assert browser.is_text_present('Successfully locked the selected dataset(s).', wait_time=30)
-
-    assert len(browser.find_by_css('.datasetstatus_locked', wait_time=30)) == 2
-
-
-@then('wait for all datasets to be in frozen state')
-def ui_intake_wait_all_datasets_in_frozen_state(browser):
-    i = 0
-    no_more_locked_datasets_present = False
-    while i < 20:
-        time.sleep(20)
-        browser.visit(browser.url)
-        # if there are no longer datasets in locked state -> frozen or error
-        if len(browser.find_by_css('.datasetstatus_locked', wait_time=5)) == 0:  # .datasetstatus_frozen
-            no_more_locked_datasets_present = True
-            # either datasets are frozen now. Or have been marked errorenous
-            break
-        i = i + 1
-    assert no_more_locked_datasets_present
-
-
-@then('wait for frozen sets to be added to vault')
-def ui_intake_wait_frozen_datasets_to_vault(browser):
-    # When all frozen datasets have been moved to the vault only 1 will remain with dataset_status_scanned
-    i = 0
-    no_more_frozen_datasets_present = False
-    while i < 20:
-        time.sleep(20)
-        browser.visit(browser.url)
-        # if there are no longer datasets in locked state -> frozen or error
-        if len(browser.find_by_css('.datasetstatus_scanned', wait_time=5)) == 3:  # .datasetstatus_frozen
-            no_more_frozen_datasets_present = True
-            # either datasets are frozen now. Or have been marked errorenous
-            break
-        i = i + 1
-    assert no_more_frozen_datasets_present
-
-
-# SCENARIO 2
-@when('open intake reporting area')
-def ui_intake_open_intake_reporting_area(browser):
-    browser.find_by_css('.btn-goto-reports').click()
-
-
-@when('check reporting result')
-def ui_intake_check_reporting_result(browser):
-    # classes are part of rows in result table.
-    assert len(browser.find_by_css('.dataset-type-counts-raw')) > 0
-    assert len(browser.find_by_css('.dataset-type-counts-processed')) == 0
-    assert len(browser.find_by_css('.dataset-aggregated-version-raw')) > 0
-    assert len(browser.find_by_css('.dataset-aggregated-version-processed')) > 0
-    assert len(browser.find_by_css('.dataset-aggregated-version-total')) > 0
-
-
-@when('export all data and download file')
-def ui_intake_export_all_data_and_download_file(browser):
-    browser.find_by_css('.btn-export-data').click()
-
-
-@when('return to intake area')
-def ui_intake_return_to_intake_area(browser):
-    browser.find_by_css('.btn-goto-intake').click()
diff --git a/tools/api/generate-openapi.py b/tools/api/generate-openapi.py
index c3393aafc..b922b86e1 100755
--- a/tools/api/generate-openapi.py
+++ b/tools/api/generate-openapi.py
@@ -282,7 +282,7 @@ def gen_fn_spec(name, fn):
     name = re.sub('^api_', '', name)
 
     if core:
-        modules = ['datarequest', 'deposit', 'intake']
+        modules = ['datarequest', 'deposit']
         if name.startswith(tuple(modules)):
             continue
 
diff --git a/tools/intake/ExportDatasetErrorsAndWarnings.r b/tools/intake/ExportDatasetErrorsAndWarnings.r
deleted file mode 100644
index ce2bedf1a..000000000
--- a/tools/intake/ExportDatasetErrorsAndWarnings.r
+++ /dev/null
@@ -1,115 +0,0 @@
-# Date: 2019-01-16
-# Functionality:
-# Find files within the dynamic area of an intake study that have errors and/or warnings at file level.
-# A check for errors/warnings is performed ONLY on file level. 
-# Errors that can be found on dataset-toplevel or on collection level within a dataset, are NOT reported
-
-# Parameters:
-# - Study: Name of the study the export has to search 
-
-# Run with DatasetErrorsAndWarnins.sh script to have the export added to a csv file.
-
-ExportDatasetErrorsAndWarnings {
-   ## OVERRULE PARAMS FOR NOW as I wasn't able to add multiple input params 
-#   *studyParam="test";
-
-   # Possibly use uuClientFullName as user, or $userNameClienterNameClient;  ????????????????????????
-   # writeLine("stdout", "uuClientFullName: " ++ uuClientFullName);
-
-
-   # Initialisation of variables based on command line parameters
-#   *user="datamanager";
-   *user = uuClientFullName
-   *study = *studyParam;
-   *datamanagerGroup = 'grp-datamanager-' ++ *study;
-   *studyFolder = "/" ++ $rodsZoneClient ++ "/"  ++ 'home/grp-intake-' ++ *studyParam;
-
-   # Check whether user is a datamanager for the study involved
-   *isDatamanager = false;
-   foreach (*row in
-            SELECT USER_NAME
-            WHERE  USER_TYPE            = 'rodsgroup'
-                    AND USER_NAME = *datamanagerGroup ) {
-
-                uuGroupUserExists(*datamanagerGroup, *user, true, *membership)
-                if (*membership) {
-                        *isDatamanager = true;
-                }
-   }
-
-   if (!*isDatamanager) {
-	writeLine("stdout", 'Not the datamanager of current group');
-        succeed; # the journey ends here 
-   }
-
-
-   # Setup list of dataset ids that are later used to find data objects having this dataset_id's
-   *datasetList = list();
-   foreach(*row in SELECT  META_DATA_ATTR_NAME, META_DATA_ATTR_VALUE WHERE COLL_NAME like '*studyFolder%%' AND META_DATA_ATTR_NAME='dataset_toplevel') {
-       msiGetValByKey(*row, "META_DATA_ATTR_VALUE", *datasetId);
-       *datasetList = cons(*datasetId, *datasetList);
-   }
-
-   foreach(*row in SELECT COLL_ID, META_COLL_ATTR_NAME, META_COLL_ATTR_VALUE WHERE COLL_NAME like '*studyFolder%%' AND META_COLL_ATTR_NAME='dataset_toplevel') {
-       msiGetValByKey(*row, "META_COLL_ATTR_VALUE", *datasetId);
-       *datasetList = cons(*datasetId, *datasetList);
-   }
-
-   # Write header row for the export table
-   writeLine('stdout', "Wave,Experiment type,Pseudocode,Version,Bestand,Errors,Warnings");
-
-   # At first find datasets, designated by presence of metadata attribute 'dataset_toplevel'.
-   # The value of the datasetId is combination of wepv and path to make it unique.
-   foreach(*datasetId in *datasetList) {
-       # Collect all data objects with a given datasetId
-       # And per data object find out whether it contains errors or warnings in its metadata       
-       foreach(*row2 in SELECT META_DATA_ATTR_NAME, META_DATA_ATTR_VALUE, DATA_NAME, DATA_ID, COLL_NAME WHERE META_DATA_ATTR_VALUE='*datasetId' AND META_DATA_ATTR_NAME='dataset_id') {
-          msiGetValByKey(*row2, "DATA_NAME", *dataName);
-          msiGetValByKey(*row2, "COLL_NAME", *collName);
-          msiGetValByKey(*row2, "DATA_ID", *dataId);
-	 
-          # Given 1 object step thtough all its metadata attributes. 
-
-          msiString2KeyValPair("", *kvp);
-
-	  # build list of all attributes that are involved
-          *attrList = list('wave', 'experiment_type', 'pseudocode', 'version', 'error', 'warning');
-	  # initialize all attributes to empty strings
-	  foreach (*attr in *attrList) {
-              *kvp."*attr" = '';
-          }
-
-          foreach(*row3 in SELECT META_DATA_ATTR_NAME, META_DATA_ATTR_VALUE WHERE DATA_ID=*dataId ) {
-              msiGetValByKey(*row3, "META_DATA_ATTR_NAME", *attrName);
-              msiGetValByKey(*row3, "META_DATA_ATTR_VALUE", *attrValue);
-
-              foreach (*attr in *attrList) {
-                  #writeLine('stdout', 'attrLIST: ' ++ *attr);
-                  if (*attrName==*attr) {
-		      if (*attr=='error' || *attr=='warning') { # must be concatination as there can be more errors/warnings on 1 data object
-		      	  if (strlen(*kvp."*attr")>0) {
-		              *kvp."*attr" =  *kvp."*attr" ++ ' - ' ++ *attrValue;
-			  }
-			  else {
-                              *kvp."*attr" = *attrValue;
-			  }
-		      }
-		      else {
-                          *kvp."*attr" = *attrValue;
-		      }
-                  }
-              }
-          }
-	  # Add data object to file - only if errors or warnins present.
-	  if (strlen(*kvp.'error')>0 || strlen(*kvp.'warning')>0) {
-	      *dataPath = *collName ++ '/' ++ *dataName;
-              writeLine('stdout', *kvp."wave" ++ "," ++ *kvp."experiment_type" ++ "," ++ *kvp."pseudocode"++ "," ++ *kvp."version" ++ "," ++ *dataPath ++ "," ++ *kvp."error" ++ "," ++ *kvp."warning");
-          }
-       }
-   }
-}
-
-
-input *studyParam="test"
-output ruleExecOut
-
diff --git a/tools/intake/ExportDatasetErrorsAndWarnings.sh b/tools/intake/ExportDatasetErrorsAndWarnings.sh
deleted file mode 100755
index e63ddce41..000000000
--- a/tools/intake/ExportDatasetErrorsAndWarnings.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/bin/sh
-# /Date: 2019-01-16
-# /Functionality:
-# /Find files within the dynamic area of an intake study that have errors and/or warnings at file level.
-# /A check for errors/warnings is performed ONLY on file level. 
-# /Errors that can be found on dataset-toplevel or on collection level within a dataset, are NOT reported
-
-# /Parameters:
-# /Study: Name of the study the export has to search 
-
-# /Run with DatasetErrorsAndWarnins.sh script to have the export added to a csv file.
-
-irule -r irods_rule_engine_plugin-irods_rule_language-instance -F ExportDatasetErrorsAndWarnings.r "*studyParam='$1'" > DatasetErrorsAndWarnings.csv
diff --git a/tools/intake/collCopyPseudo.r b/tools/intake/collCopyPseudo.r
deleted file mode 100644
index 6264a99f6..000000000
--- a/tools/intake/collCopyPseudo.r
+++ /dev/null
@@ -1,31 +0,0 @@
-#Author Harm de Raaff
-#Date: 2019-01-16
-
-collCopyPseudo {
-    #changes YYYY-MM-DD.hh:mm:ss into seconds since epoch format
-    msiHumanToSystemTime(*datefrom, *datefrom)
-    msiHumanToSystemTime(*datetill, *datetill)
-
-    # pseudocodes are passes as a comma-separated list. 
-    *pseudoList = split(*pseudoCodes,',');
-
-    foreach(*row in SELECT COLL_OWNER_ZONE) {
-        *zone=*row.COLL_OWNER_ZONE;
-        foreach(*pc in *pseudoList) {
-            foreach(*row2 in SELECT COLL_NAME
-                       WHERE COLL_NAME like '/*zone/home/grp-vault-%'
-                       AND META_COLL_ATTR_NAME = 'pseudocode'
-                       AND META_COLL_ATTR_VALUE = *pc
-                       AND COLL_CREATE_TIME between *datefrom *datetill
-                       #datefrom must be the same amount of digits as datetill
-                       #wont be a problem if chosing times from yodas existence till future
-                       ) {
-                *name=*row2.COLL_NAME;
-                writeLine('stdout', *name);
-            }
-        }
-    }
-}
-
-input *pseudoCodes="", *datefrom="", *datetill=""
-output ruleExecOut
diff --git a/tools/intake/collCopyPseudo.sh b/tools/intake/collCopyPseudo.sh
deleted file mode 100755
index a6a752a07..000000000
--- a/tools/intake/collCopyPseudo.sh
+++ /dev/null
@@ -1,75 +0,0 @@
-#!/bin/bash 
-# \author       Niek Bats
-# \date         2019-01-19
-# \file         collCopyPseudo.sh
-# \brief        copies all collections which matches pseudocodes as passed in a file ($3) and in between datefrom ($4) and datetill ($5) to a folder ($1)
-# \how to use   store the .sh file and .r file to your linux folder and make it the current directory (using cd)
-# \             if you want to copy the collections to your linux subfolder, specify iget ($2). The folder ($1) is created in your current linux folder.
-# \             if you want to copy the collections to a yoda subfolder, specify icp ($2) instead. The folder ($1) should be preceeded by the yoda
-# \             group-folder (e.g. research-copiedcollections/pseudocodelist1, the folder pseudocodelist1 is created by the script)
-# \copyright    Copyright (c) 2018, Utrecht University. All rights reserved
-# \dependencies requires login on an irods user (e.g. datamanager) with execution right to this script and permission to execute user icommands
-# \usage        bash randomCollCopy.sh <folder> <iget | icp> <pseudocode filename> <dateFrom> <dateTill>
-
-#invalid input handling
-
-if [[ $1 = "" || $2 = "" || $3 = "" || $4 = "" || $5 = "" ]] ; then
- echo "the usage of this script is: "
- echo "bash randomCollCopy.sh <folder> <howtoCopy iget-icp> <filename holding comma separated pseudecodes> <dateFrom> <dateTill>"
- echo "where folder, howtoCopy is text. dateFrom and dateTill is text in YYYY-MM-DD.HH:mm:ss format"
- echo "folder is the created subfolder, when using iget. For icp, the folder to be created should be preceeded by the yoda research-name "
- echo "e.g. 'research-copiedcollections/pseudocodelist1' and you must be a user of research-copiedcollection."
- exit 1
-fi
-
-#convert input params to named variables for readability also insta docu of what they are
-folder="$1" #is text
-copyHow="$2" #iget or icp
-pseudocodeCsvFile="$3" #is filename of file holding pseudocodes
-dateFrom="$4" #is text in YYYY-MM-DD.HH:mm:ss format
-dateTill="$5" #is text in YYYY-MM-DD.HH:mm:ss format
-
-if [[ $copyHow != "iget" && $copyHow != "icp" ]] ; then
-  echo "Your copy method is not correct. It must either be  'iget' or 'icp'"
-  echo "Now it is $copyHow"
-  exit 1
-fi
-
-#Collect comma separated pseudocodes from file
-pseudoCodes=`cat $pseudocodeCsvFile`
-echo "pseudocodes: $pseudoCodes"
-
-#run rule put output in an array
-read -ra array <<< $(irule -r irods_rule_engine_plugin-irods_rule_language-instance -F collCopyPseudo.r "'$pseudoCodes'" "'$dateFrom'" "'$dateTill'")
-
-#if array is empty give notice and exit
-if [ ${#array[@]} -eq 0 ]; then
- echo "couldnt find any collections matching your parameters at the moment"
- echo "possible causes there arent any matches, the servers are down or you dont have a connection"
- exit 1
-fi
-
-#make folder
-if [[ "$copyHow" == "iget" ]] ; then 
-   mkdir "$folder"
-   cd "$folder"
-fi
-if [[ "$copyHow" == "icp" ]] ; then
-   imkdir ../"$folder"
-   icd ../"$folder"
-fi
-
-
-echo "Copy selection: "
-for item in ${array[@]}
-do
- echo "$item"
-
- if [[ "$copyHow" == "iget" ]] ; then
-   iget -r "$item"
- fi
- if [[ "$copyHow" == "icp" ]] ; then
-   icp -r "$item" .
- fi
-done
-
diff --git a/tools/intake/intakeDataCheck.sh b/tools/intake/intakeDataCheck.sh
deleted file mode 100644
index e9bcb8e60..000000000
--- a/tools/intake/intakeDataCheck.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/bin/bash
-#Author Niek Bats
-#$1 wave
-#$2 experiment
-#$3 pseudocode
-#lists all files, when found any grp-intake-folder using specified parameter(s) 
-
-#input check and build query
-if [[ "$1" != "" ]] #if no wave dont do anything
-then
-        query="like '%/grp-intake-%' AND DATA_PATH like '%$1%'"
-        if [[ "$2" != "" ]]
-        then
-                query="$query AND DATA_PATH like '%$2%'"
-                if [[ "$3" != "" ]]
-                then
-                        query="$query AND DATA_PATH like '%$3%'"
-                fi
-        elif [[ "$3" != "" ]]
-        then
-        exit 1
-        fi
-
-echo $query
-
-#icommand format query is in printf format
-output=$(iquest ""%s";%s" "SELECT DATA_PATH, DATA_SIZE WHERE DATA_PATH $query")
-
-#echo $output
-
-printf ""Filepath/name";"filesize"\n" > outputIntake.csv
-printf "$output" >> outputIntake.csv
-
-fi
diff --git a/tools/intake/randomCollCopy.r b/tools/intake/randomCollCopy.r
deleted file mode 100644
index 2ab5c9e4c..000000000
--- a/tools/intake/randomCollCopy.r
+++ /dev/null
@@ -1,36 +0,0 @@
-#Author Niek Bats
-#Date: 2019-01-16
-
-randomCollCopy {
- #changes YYYY-MM-DD.hh:mm:ss into seconds since epoch format
- msiHumanToSystemTime(*datefrom, *datefrom)
- msiHumanToSystemTime(*datetill, *datetill)
- 
- foreach(*row in SELECT COLL_OWNER_ZONE) {
-  *zone=*row.COLL_OWNER_ZONE;
-  foreach(*row2 in SELECT COLL_NAME
-                   WHERE COLL_NAME like '/*zone/home/grp-vault-%'
-                   AND META_COLL_ATTR_NAME = 'wave'
-                   AND META_COLL_ATTR_VALUE = *wave
-                   # AND COLL_CREATE_TIME between *datefrom *datetill
-                   #datefrom must be the same amount of digits as datetill
-                   #wont be a problem if chosing times from yodas existence till future
-                   ) {
-   *name=*row2.COLL_NAME;
-   foreach(*row3 in SELECT COLL_CREATE_TIME
-                    WHERE COLL_NAME = *name
-                    AND META_COLL_ATTR_NAME = 'experiment_type'
-                    AND META_COLL_ATTR_VALUE = *experiment
-                    ) {
-     *collCreateTime=int(*row3.COLL_CREATE_TIME);
-     writeLine("stdout", "*name");
-	 
-	 # test if already present in list - we do not want multiples.
-   }
-  }
- }
-}
-
-input *wave="", *experiment="", *datefrom="", *datetill=""
-output ruleExecOut
-
diff --git a/tools/intake/randomCollCopy.sh b/tools/intake/randomCollCopy.sh
deleted file mode 100755
index 9d6ac9dfc..000000000
--- a/tools/intake/randomCollCopy.sh
+++ /dev/null
@@ -1,93 +0,0 @@
-#!/bin/bash
-# \author       Niek Bats
-# \date         2019-01-16
-# \file         randomCollCopy.sh
-# \brief        copies random collections which matches selected wave ($3) experiment ($4) in between datefrom ($5) and datetill ($6) to a folder ($1)
-# \             with a maximum $6 collections, if specified.
-# \how to use   store the .sh file and .r file to your linux folder and make it the current directory (using cd)
-# \             if you want to copy the collections to your linux subfolder, specify iget ($2). The folder ($1) is created in your current linux folder.
-# \             if you want to copy the collections to a yoda subfolder, specify icp ($2) instead. The folder ($1) should be preceeded by the yoda
-# \             group-folder (e.g. research-collection/30w-pci, the folder 30w-pci is created by the script)
-# \             will be created and the collections copied
-# \copyright    Copyright (c) 2018, Utrecht University. All rights reserved
-# \dependencies requires login on an irods user (e.g. datamanager) with execution right to this script and permission to execute user icommands
-# \usage        bash randomCollCopy.sh <folder> <iget | icp> <wave> <experimentType> <dateFrom> <dateTill> <(optional) amount>
-
-#invalid input handling
-
-if [[ $1 = "" || $2 = "" || $3 = "" || $4 = "" || $5 = "" || $6 = "" ]] || [[ ! $7 -gt 0 && ! $7 = "" ]] ; then
-#[[ ! $6 -gt 0 ]] check if = a number and more then 0
- echo "the usage of this script is: "
- echo "bash randomCollCopy.sh <folder> <howtoCopy iget-icp> <wave> <experimentType> <dateFrom> <dateTill> <(optional) amount>"
- echo "where folder, wave, experimentType is text. dateFrom and dateTill is text in YYYY-MM-DD.HH:mm:ss format and amount is an number"
- echo "folder is the created subfolder, when using iget. For icp, the folder to be created should be preceeded by the yoda research-name"
- echo "e.g. 'research-copiedcollection/30w-pci' and you should be a user of research-copiedcollection."
- exit 1
-fi
-
-#convert input params to named variables for readability also insta docu of what they are
-folder="$1" #is text
-copyHow="$2" #iget or icp
-wave="$3" #is text
-experimentType="$4" #is text
-dateFrom="$5" #is text in YYYY-MM-DD.HH:mm:ss format
-dateTill="$6" #is text in YYYY-MM-DD.HH:mm:ss format
-amount=10 #is a positive number default=10
-if [[ $7 != "" ]] ; then
- amount="$7"
-fi
-
-if [[ $copyHow != "iget" && $copyHow != "icp" ]] ; then
-  echo "Your copy method is not correct. It must either be  'iget' or 'icp'"
-  echo "Now it is $copyHow"
-  exit 1
-fi
-
-#run rule put output in an array
-read -ra array <<< $(irule -r irods_rule_engine_plugin-irods_rule_language-instance -F randomCollCopy.r "'$wave'" "'$experimentType'" "'$dateFrom'" "'$dateTill'")
-
-#if array is empty give notice and exit
-if [ ${#array[@]} -eq 0 ]; then
- echo "couldnt find any collections matching your parameters at the moment"
- echo "possible causes there arent any matches, the servers are down or you dont have a connection"
- exit 1
-fi
-
-echo "Selecting $amount items from following list: "
-for item in ${array[@]}
-do
- echo "$item"
-done
-
-#make folder
-if [[ "$copyHow" == "iget" ]] ; then 
-   mkdir "$folder"
-   cd "$folder"
-fi
-if [[ "$copyHow" == "icp" ]] ; then
-   imkdir ../"$folder"
-   icd ../"$folder"
- fi
-
-echo "selected: "
-#make loop to select amount collections from array
-for (( i=0; i<$amount; i++ ));
-do
- #select a random collection from list
-
- if [[ ${#array[@]} -ne 0 ]] ; then
-  randomNr=$(( RANDOM % ${#array[@]} ))
-  #echo which one is copied and copy
-  echo "${array[$randomNr]}"
-  if [[ "$copyHow" == "iget" ]] ; then 
-    iget -r "${array[$randomNr]}"
-  fi
-  if [[ "$copyHow" == "icp" ]] ; then
-    icp -r "${array[$randomNr]}" .
-  fi
- 
-  #remove from list
-  unset array[$randomNr]
-  array=( "${array[@]}" )
- fi
-done
diff --git a/tools/intake/vaultedDataCheck.sh b/tools/intake/vaultedDataCheck.sh
deleted file mode 100644
index 97da7c2f8..000000000
--- a/tools/intake/vaultedDataCheck.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-#Author Niek Bats
-#$1 wave
-#$2 experiment
-#$3 pseudocode
-#lists all files, when found any grp-vault-folder using specified parameter(s) 
-
-#input check
-if("$1" == "")  do #if no wave kill script
-        exit 1
-done
-
-#build iquest query
-query="%"
-for arg in "$@" #add per argument: "$argument/"
-do
-        if [ "$arg" != "" ]
-        then
-                query="$query$arg/"
-        fi
-done
-
-query="$query%"
-
-#icommand format query is in printf format
-output=$(iquest ""%s";%s" "SELECT DATA_PATH, DATA_SIZE WHERE DATA_PATH like '$query'")
-
-printf ""Filepath/name";"filesize"\n" > outputVault.csv
-printf "$output" >> outputVault.csv
diff --git a/tools/intake/youthIntakeCheck.r b/tools/intake/youthIntakeCheck.r
deleted file mode 100644
index 585e92dcc..000000000
--- a/tools/intake/youthIntakeCheck.r
+++ /dev/null
@@ -1,91 +0,0 @@
-#Author Niek Bats
-
-youthIntakeCheck {
-        *intakeOrVault="intake"; #intake vault
-        
-        #non empty *wave, *experiment and *pseudocode
-        if ((*wave != "") && (*experiment != "") && (*pseudocode != "")) then {
-                foreach(*row in SELECT COLL_OWNER_ZONE) {
-                        *zone=*row.COLL_OWNER_ZONE;
-
-                        foreach(*row2 in SELECT COLL_NAME
-                        WHERE COLL_NAME like '/*zone/home/grp-*intakeOrVault-%'
-                        AND META_DATA_ATTR_NAME = 'wave'
-                        AND META_DATA_ATTR_VALUE = *wave) {
-                                *path=*row2.COLL_NAME;
-
-                                foreach(*row3 in SELECT DATA_NAME
-                                WHERE COLL_NAME = *path
-                                AND META_DATA_ATTR_NAME = 'experiment_type'
-                                AND META_DATA_ATTR_VALUE = *experiment) {
-                                        *nameExtension=*row3.DATA_NAME;
-
-                                        foreach(*row4 in SELECT DATA_SIZE
-                                        WHERE DATA_NAME = *nameExtension
-                                        AND COLL_NAME = *path
-                                        AND META_DATA_ATTR_NAME = 'pseudocode'
-                                        AND META_DATA_ATTR_VALUE = *pseudocode) {
-                                                *size=*row4.DATA_SIZE;
-                                                *name=trimr(*nameExtension, ".");
-                                                *extension=triml(*nameExtension, *name);
-                                        
-                                                writeLine("stdout", "\"*path\";\"*name\";\"*extension\";\"*size\"");
-                                        }
-                                }
-                        }
-                }
-        }
-        
-        #non empty *wave and *experiment
-        else if ((*wave != "") && (*experiment != "")) then {
-                foreach(*row in SELECT COLL_OWNER_ZONE) {
-                        *zone=*row.COLL_OWNER_ZONE;
-                        
-                        foreach(*row2 in SELECT COLL_NAME
-                        WHERE COLL_NAME like '/*zone/home/grp-*intakeOrVault-%'
-                        AND META_DATA_ATTR_NAME = 'wave'
-                        AND META_DATA_ATTR_VALUE = *wave) {
-                                *path=*row2.COLL_NAME;
-                                
-                                foreach(*row3 in SELECT DATA_NAME, DATA_SIZE
-                                WHERE COLL_NAME = *path
-                                AND META_DATA_ATTR_NAME = 'experiment_type'
-                                AND META_DATA_ATTR_VALUE = *experiment) {
-                                        *nameExtension=*row3.DATA_NAME;
-                                        *size=*row3.DATA_SIZE;
-                                        *name=trimr(*nameExtension, ".");
-                                        *extension=triml(*nameExtension, *name);
-                                        
-                                        writeLine("stdout", "\"*path\";\"*name\";\"*extension\";\"*size\"");
-                                }
-                        }
-                }
-        }
-        
-        #non empty wave pseudocode is empty
-        else if (*wave != "" && *pseudocode == "") then {
-                foreach(*row in SELECT COLL_OWNER_ZONE) {
-                        *zone=*row.COLL_OWNER_ZONE;
-
-                        foreach(*row2 in SELECT COLL_NAME, DATA_NAME, DATA_SIZE
-                        WHERE COLL_NAME like '/*zone/home/grp-*intakeOrVault-%'
-                        AND META_DATA_ATTR_NAME ='wave'
-                        AND META_DATA_ATTR_VALUE = *wave) {
-                                *path=*row2.COLL_NAME;
-                                *nameExtension=*row2.DATA_NAME;
-                                *size=*row2.DATA_SIZE;
-                                *name=trimr(*nameExtension, ".");
-                                *extension=triml(*nameExtension, *name);
-                                
-                                writeLine("stdout", "\"*path\";\"*name\";\"*extension\";\"*size\"");
-                        }
-                }
-        }
-
-        else {
-                writeLine("stdout", "Invalid input");
-        }
-}
-
-input *wave="", *experiment="", *pseudocode=""
-output ruleExecOut
\ No newline at end of file
diff --git a/tools/intake/youthIntakeCheck.sh b/tools/intake/youthIntakeCheck.sh
deleted file mode 100644
index bb24b4821..000000000
--- a/tools/intake/youthIntakeCheck.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/bash
-#Author Niek Bats
-#$1 wave
-#$2 experiment
-#$3 pseudocode
-#lists all files, when found any grp-intake-folder using specified parameter(s) 
-
-output=$(irule -r irods_rule_engine_plugin-irods_rule_language-instance -F youthIntakeCheck.r "'$1'" "'$2'" "'$3'")
-#echo $output
-if [[ "$output" == "" ]]
-then
-    echo "no results with parameters $1 $2 $3"
-    echo "please note that files have to be scanned to be found"
-
-elif [[ $output == "Invalid input" ]]
-then
-    echo "$output"
-
-else
-    outputFile="intake-$1"
-    if [[ "$2" != "" ]]
-    then
-        outputFile="$outputFile-$2"
-    fi
-    if [[ "$3" != "" ]]
-    then
-        outputFile="$outputFile-$3"
-    fi
-    outputFile="$outputFile.csv"
-    
-    printf "\"Filepath\";\"name\";\"extension\";\"filesize\"\n" > "$outputFile"
-    printf "$output" >> "$outputFile"
-fi
diff --git a/tools/intake/youthVaultCheck.r b/tools/intake/youthVaultCheck.r
deleted file mode 100644
index 1b2dbbdb1..000000000
--- a/tools/intake/youthVaultCheck.r
+++ /dev/null
@@ -1,91 +0,0 @@
-#Author Niek Bats
-
-youthVaultCheck {
-        *intakeOrVault="vault"; #intake vault
-        
-        #non empty *wave, *experiment and *pseudocode
-        if ((*wave != "") && (*experiment != "") && (*pseudocode != "")) then {
-                foreach(*row in SELECT COLL_OWNER_ZONE) {
-                        *zone=*row.COLL_OWNER_ZONE;
-
-                        foreach(*row2 in SELECT COLL_NAME
-                        WHERE COLL_NAME like '/*zone/home/grp-*intakeOrVault-%'
-                        AND META_DATA_ATTR_NAME = 'wave'
-                        AND META_DATA_ATTR_VALUE = *wave) {
-                                *path=*row2.COLL_NAME;
-
-                                foreach(*row3 in SELECT DATA_NAME
-                                WHERE COLL_NAME = *path
-                                AND META_DATA_ATTR_NAME = 'experiment_type'
-                                AND META_DATA_ATTR_VALUE = *experiment) {
-                                        *nameExtension=*row3.DATA_NAME;
-
-                                        foreach(*row4 in SELECT DATA_SIZE
-                                        WHERE DATA_NAME = *nameExtension
-                                        AND COLL_NAME = *path
-                                        AND META_DATA_ATTR_NAME = 'pseudocode'
-                                        AND META_DATA_ATTR_VALUE = *pseudocode) {
-                                                *size=*row4.DATA_SIZE;
-                                                *name=trimr(*nameExtension, ".");
-                                                *extension=triml(*nameExtension, *name);
-                                        
-                                                writeLine("stdout", "\"*path\";\"*name\";\"*extension\";\"*size\"");
-                                        }
-                                }
-                        }
-                }
-        }
-        
-        #non empty *wave and *experiment
-        else if ((*wave != "") && (*experiment != "")) then {
-                foreach(*row in SELECT COLL_OWNER_ZONE) {
-                        *zone=*row.COLL_OWNER_ZONE;
-                        
-                        foreach(*row2 in SELECT COLL_NAME
-                        WHERE COLL_NAME like '/*zone/home/grp-*intakeOrVault-%'
-                        AND META_DATA_ATTR_NAME = 'wave'
-                        AND META_DATA_ATTR_VALUE = *wave) {
-                                *path=*row2.COLL_NAME;
-                                
-                                foreach(*row3 in SELECT DATA_NAME, DATA_SIZE
-                                WHERE COLL_NAME = *path
-                                AND META_DATA_ATTR_NAME = 'experiment_type'
-                                AND META_DATA_ATTR_VALUE = *experiment) {
-                                        *nameExtension=*row3.DATA_NAME;
-                                        *size=*row3.DATA_SIZE;
-                                        *name=trimr(*nameExtension, ".");
-                                        *extension=triml(*nameExtension, *name);
-                                        
-                                        writeLine("stdout", "\"*path\";\"*name\";\"*extension\";\"*size\"");
-                                }
-                        }
-                }
-        }
-        
-        #non empty wave pseudocode is empty
-        else if (*wave != "" && *pseudocode == "") then {
-                foreach(*row in SELECT COLL_OWNER_ZONE) {
-                        *zone=*row.COLL_OWNER_ZONE;
-
-                        foreach(*row2 in SELECT COLL_NAME, DATA_NAME, DATA_SIZE
-                        WHERE COLL_NAME like '/*zone/home/grp-*intakeOrVault-%'
-                        AND META_DATA_ATTR_NAME ='wave'
-                        AND META_DATA_ATTR_VALUE = *wave) {
-                                *path=*row2.COLL_NAME;
-                                *nameExtension=*row2.DATA_NAME;
-                                *size=*row2.DATA_SIZE;
-                                *name=trimr(*nameExtension, ".");
-                                *extension=triml(*nameExtension, *name);
-                                
-                                writeLine("stdout", "\"*path\";\"*name\";\"*extension\";\"*size\"");
-                        }
-                }
-        }
-
-        else {
-                writeLine("stdout", "Invalid input");
-        }
-}
-
-input *wave="", *experiment="", *pseudocode=""
-output ruleExecOut
\ No newline at end of file
diff --git a/tools/intake/youthVaultCheck.sh b/tools/intake/youthVaultCheck.sh
deleted file mode 100644
index 2f4d9c6c7..000000000
--- a/tools/intake/youthVaultCheck.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/bash
-#Author Niek Bats
-#$1 wave
-#$2 experiment
-#$3 pseudocode
-#lists all files, when found any grp-vault-folder using specified parameter(s) 
-
-output=$(irule -r irods_rule_engine_plugin-irods_rule_language-instance -F youthVaultCheck.r "'$1'" "'$2'" "'$3'")
-#echo $output
-if [[ "$output" == "" ]]
-then
-    echo "no results with parameters $1 $2 $3"
-
-elif [[ $output == "Invalid input" ]]
-then
-    echo "$output"
-
-else
-    outputFile="vault-$1"
-    if [[ "$2" != "" ]]
-    then
-        outputFile="$outputFile-$2"
-    fi
-    if [[ "$3" != "" ]]
-    then
-        outputFile="$outputFile-$3"
-    fi
-    outputFile="$outputFile.csv"
-    
-    printf "\"Filepath\";\"name\";\"extension\";\"filesize\"\n" > "$outputFile"
-    printf "$output" >> "$outputFile"
-fi
diff --git a/tools/job_scan.r b/tools/job_scan.r
deleted file mode 100644
index dd9b2377c..000000000
--- a/tools/job_scan.r
+++ /dev/null
@@ -1,43 +0,0 @@
-# \file
-# \brief job
-# \author Ton Smeele, Sietse Snel
-# \copyright Copyright (c) 2015-2021, Utrecht university. All rights reserved
-# \license GPLv3, see LICENSE
-#
-#  This file can be executed manually or scheduled e.g. once a day.
-#  It scans an intake collection for datasets and checks the sets, if no collection
-#  is provided, it will scan a predefined list on intake groups (*groupList)
-#
-#  Prerequisite:  the irods user should have write access on the collection and its objects
-#
-#
-
-
-uuYcRunIntakeScan {
-        *collectionList = list();
-        # intake areas can be added to the group list as needed
-        *groupList = list('youth');
-        *zone = $rodsZoneClient;
-
-        if ( *intakeRoot == 'dummy' ) {
-                foreach (*grp in *groupList) {
-                        *root = "/*zone/home/grp-intake-*grp";
-                        *collectionList = cons( *root, *collectionList);
-                }
-        }
-        else {
-                *collectionList = cons (*intakeRoot, *collectionList);
-        }
-
-        foreach (*coll in *collectionList) {
-                writeLine("stdout","Running intake scan for *coll ...");
-                *status = "0";
-                rule_intake_scan_for_datasets(*coll, *status);
-                if (*status == "0" ) then *result = "ok" else *result = "ERROR (*status)";
-                writeLine("stdout","RunIntakeScan for *intakeRoot result = *result");
-        }
-
-}
-
-input *intakeRoot='dummy'
-output ruleExecOut
diff --git a/unit-tests/test_intake.py b/unit-tests/test_intake.py
deleted file mode 100644
index 43d737dcc..000000000
--- a/unit-tests/test_intake.py
+++ /dev/null
@@ -1,159 +0,0 @@
-# -*- coding: utf-8 -*-
-
-"""Unit tests for the intake module
-"""
-
-__copyright__ = 'Copyright (c) 2019-2021, Utrecht University'
-__license__   = 'GPLv3, see LICENSE'
-
-import os
-import sys
-from unittest import TestCase
-
-sys.path.append('..')
-
-from intake_utils import dataset_make_id, dataset_parse_id, intake_extract_tokens, intake_extract_tokens_from_name, intake_scan_get_metadata_update, intake_tokens_identify_dataset
-
-
-class IntakeTest(TestCase):
-
-    def test_intake_tokens_identify_dataset(self):
-        empty_data = dict()
-        self.assertFalse(intake_tokens_identify_dataset(empty_data))
-        missing_key_data = {"wave": "1", "pseudocode": "2"}
-        self.assertFalse(intake_tokens_identify_dataset(missing_key_data))
-        missing_value_data = {"wave": "1", "pseudocode": "2", "experiment_type": ""}
-        self.assertFalse(intake_tokens_identify_dataset(missing_value_data))
-        complete_data = {"wave": "1", "pseudocode": "2", "experiment_type": "3"}
-        self.assertTrue(intake_tokens_identify_dataset(complete_data))
-
-    def test_intake_extract_tokens(self):
-        no_token_data = intake_extract_tokens(None, "")
-        self.assertEquals(len(no_token_data), 0)
-        wave_data = intake_extract_tokens(None, "20w")
-        self.assertEquals(len(wave_data), 1)
-        self.assertEquals(wave_data["wave"], "20w")
-        et_data = intake_extract_tokens(None, "chantigap")
-        self.assertEquals(len(et_data), 1)
-        self.assertEquals(et_data["experiment_type"], "chantigap")
-        pseudocode_data = intake_extract_tokens(None, "B12345")
-        self.assertEquals(len(pseudocode_data), 1)
-        self.assertEquals(pseudocode_data["pseudocode"], "B12345")
-        version_data = intake_extract_tokens(None, "VerABC")
-        self.assertEquals(len(version_data), 1)
-        self.assertEquals(version_data["version"],  "ABC")
-
-    def test_intake_extract_tokens_from_name(self):
-        buffer = dict()
-        output = intake_extract_tokens_from_name(None, "20w_chantigap_B12345_VerABC.txt", buffer)
-        self.assertEquals(len(output), 4)
-        self.assertEquals(output["wave"], "20w")
-        self.assertEquals(output["experiment_type"], "chantigap")
-        self.assertEquals(output["version"], "ABC")
-        self.assertEquals(output["pseudocode"], "B12345")
-
-    def test_intake_scan_get_metadata_update_coll_in_dataset(self):
-        parent_path = "/foo/bar/chantigap_10w_B12345"
-        path = parent_path + "/chantigap_20w_B12346"
-        complete_metadata = {"wave": "1",
-                             "pseudocode": "2",
-                             "experiment_type": "3",
-                             "version": "Raw",
-                             "directory": parent_path,
-                             "dataset_id": "4",
-                             "dataset_toplevel": "5"}
-
-        output = intake_scan_get_metadata_update(None, path, True, True, complete_metadata)
-        self.assertEquals(output["in_dataset"], True)
-        self.assertEquals(len(output["new_metadata"]), 6)
-        self.assertEquals(output["new_metadata"]["directory"], parent_path)
-        self.assertEquals(output["new_metadata"]["wave"], "1")
-        self.assertEquals(output["new_metadata"]["pseudocode"], "2")
-        self.assertEquals(output["new_metadata"]["experiment_type"], "3")
-        self.assertEquals(output["new_metadata"]["version"], "Raw")
-        self.assertEquals(output["new_metadata"]["dataset_id"], "4")
-        self.assertTrue("dataset_toplevel" not in output["new_metadata"])
-
-    def test_intake_scan_get_metadata_update_coll_out_dataset_complete(self):
-        incomplete_metadata = {"wave": "1", "pseudocode": "2"}
-        path = "/foo/bar/chantigap_10w_B12345/chantigap_B12346"
-        output = intake_scan_get_metadata_update(None, path, True, False, incomplete_metadata)
-        self.assertEquals(output["in_dataset"], True)
-        self.assertEquals(len(output["new_metadata"]), 7)
-        self.assertEquals(output["new_metadata"]["directory"], path)
-        self.assertEquals(output["new_metadata"]["dataset_toplevel"], dataset_make_id(output["new_metadata"]))
-        self.assertEquals(output["new_metadata"]["dataset_id"], dataset_make_id(output["new_metadata"]))
-        self.assertEquals(output["new_metadata"]["wave"], "1")
-        self.assertEquals(output["new_metadata"]["version"], "Raw")
-        self.assertEquals(output["new_metadata"]["pseudocode"], "B12346")
-        self.assertEquals(output["new_metadata"]["experiment_type"], "chantigap")
-
-    def test_intake_scan_get_metadata_update_coll_out_dataset_incomplete(self):
-        incomplete_metadata = {"wave": "1"}
-        path = "/foo/bar/chantigap_10w_B12345/B12346"
-        output = intake_scan_get_metadata_update(None, path, True, False, incomplete_metadata)
-        self.assertEquals(output["in_dataset"], False)
-        self.assertEquals(len(output["new_metadata"]), 2)
-        self.assertEquals(output["new_metadata"]["wave"], "1")
-        self.assertEquals(output["new_metadata"]["pseudocode"], "B12346")
-
-    def test_intake_scan_get_metadata_update_do_in_dataset(self):
-        complete_metadata = {"wave": "1",
-                             "pseudocode": "2",
-                             "experiment_type": "3",
-                             "version": "Raw",
-                             "dataset_id": "4",
-                             "dataset_toplevel": "5",
-                             "directory": "6"}
-        path = "/foo/bar/chantigap_10w_B12345/chantigap_20w_B12346.txt"
-        output = intake_scan_get_metadata_update(None, path, False, True, complete_metadata)
-        self.assertEquals(output["in_dataset"], True)
-        self.assertEquals(len(output["new_metadata"]), 6)
-        self.assertEquals(output["new_metadata"]["wave"], "1")
-        self.assertEquals(output["new_metadata"]["pseudocode"], "2")
-        self.assertEquals(output["new_metadata"]["experiment_type"], "3")
-        self.assertEquals(output["new_metadata"]["version"], "Raw")
-        self.assertEquals(output["new_metadata"]["dataset_id"], "4")
-        self.assertTrue("dataset_toplevel" not in output["new_metadata"])
-
-    def test_intake_scan_get_metadata_update_do_out_dataset_complete(self):
-        incomplete_metadata = {"wave": "1", "pseudocode": "2"}
-        path = "/foo/bar/chantigap_10w_B12345/chantigap_B12346.txt"
-        coll = os.path.dirname(path)
-        output = intake_scan_get_metadata_update(None, path, False, False, incomplete_metadata)
-        self.assertEquals(output["in_dataset"], True)
-        self.assertEquals(len(output["new_metadata"]), 7)
-        self.assertEquals(output["new_metadata"]["directory"], coll)
-        self.assertEquals(output["new_metadata"]["dataset_id"], dataset_make_id(output["new_metadata"]))
-        self.assertEquals(output["new_metadata"]["dataset_toplevel"], dataset_make_id(output["new_metadata"]))
-        self.assertEquals(output["new_metadata"]["wave"], "1")
-        self.assertEquals(output["new_metadata"]["version"], "Raw")
-        self.assertEquals(output["new_metadata"]["pseudocode"], "B12346")
-        self.assertEquals(output["new_metadata"]["experiment_type"], "chantigap")
-
-    def test_intake_scan_get_metadata_update_do_out_dataset_incomplete(self):
-        incomplete_metadata = {"wave": "1"}
-        path = "/foo/bar/chantigap_10w_B12345/B12346.txt"
-        output = intake_scan_get_metadata_update(None, path, False, False, incomplete_metadata)
-        self.assertEquals(output["in_dataset"], False)
-        self.assertEquals(len(output["new_metadata"]), 2)
-        self.assertEquals(output["new_metadata"]["wave"], "1")
-        self.assertEquals(output["new_metadata"]["pseudocode"], "B12346")
-
-    def test_dataset_make_id(self):
-        input = {"wave": "20w",
-                 "experiment_type": "echo",
-                 "pseudocode": "B12345",
-                 "version": "Raw",
-                 "directory": "/foo/bar/baz"}
-        self.assertEquals(dataset_make_id(input),
-                          "20w\techo\tB12345\tRaw\t/foo/bar/baz")
-
-    def test_dataset_parse_id(self):
-        input = "20w\techo\tB12345\tRaw\t/foo/bar/baz"
-        output = dataset_parse_id(input)
-        self.assertEquals(output.get("wave"), "20w")
-        self.assertEquals(output.get("experiment_type"), "echo")
-        self.assertEquals(output.get("pseudocode"), "B12345")
-        self.assertEquals(output.get("version"), "Raw")
-        self.assertEquals(output.get("directory"), "/foo/bar/baz")
diff --git a/unit-tests/unit_tests.py b/unit-tests/unit_tests.py
index 3bd9d873e..8af940d91 100644
--- a/unit-tests/unit_tests.py
+++ b/unit-tests/unit_tests.py
@@ -6,7 +6,6 @@
 from unittest import makeSuite, TestSuite
 
 from test_group_import import GroupImportTest
-from test_intake import IntakeTest
 from test_policies import PoliciesTest
 from test_revisions import RevisionTest
 from test_schema_transformations import CorrectifyIsniTest, CorrectifyOrcidTest, CorrectifyScopusTest
@@ -21,7 +20,6 @@ def suite():
     test_suite.addTest(makeSuite(CorrectifyOrcidTest))
     test_suite.addTest(makeSuite(CorrectifyScopusTest))
     test_suite.addTest(makeSuite(GroupImportTest))
-    test_suite.addTest(makeSuite(IntakeTest))
     test_suite.addTest(makeSuite(PoliciesTest))
     test_suite.addTest(makeSuite(RevisionTest))
     test_suite.addTest(makeSuite(UtilMiscTest))
diff --git a/util/config.py b/util/config.py
index 731d131d9..4827294c1 100644
--- a/util/config.py
+++ b/util/config.py
@@ -99,7 +99,6 @@ def __repr__(self):
                 enable_deposit=False,
                 enable_open_search=False,
                 enable_inactivity_notification=False,
-                enable_intake=False,
                 enable_datarequest=False,
                 enable_data_package_archive=False,
                 enable_data_package_download=False,
diff --git a/uuGroupPolicies.r b/uuGroupPolicies.r
index 7bf09aec9..c2cea29a3 100644
--- a/uuGroupPolicies.r
+++ b/uuGroupPolicies.r
@@ -59,14 +59,14 @@ uuGroupPreSudoGroupAdd(*groupName, *initialAttr, *initialValue, *initialUnit, *p
 		uuGetBaseGroup(*groupName, *baseName);
 		if (*baseName == *groupName) {
 			# Do not allow creating a standalone "read-" or "vault-" group.
-			# There must always be a corresponding "intake-" or "research-" group.
+			# There must always be a corresponding "research-" group.
 			fail;
 		}
 
 		uuGroupUserIsManager(*baseName, uuClientFullName, *isManagerInBaseGroup);
 		if (!*isManagerInBaseGroup) {
 			# Only allow creation of a read or vault group if the creator is a
-			# manager in the base group. (research or intake).
+			# manager in the research group.
 			fail;
 		}
 
@@ -469,14 +469,14 @@ uuPostSudoGroupAdd(*groupName, *initialAttr, *initialValue, *initialUnit, *polic
 		# taken after automatic creation of vault groups.
 
 	} else {
-		# This is a group manager managed group (i.e. 'research-', 'deposit-','grp-', 'intake-', 'priv-', 'datamanager-').
+		# This is a group manager managed group (i.e. 'research-', 'deposit-', 'priv-', 'datamanager-').
 		# Add the creator as a member.
 
 		errorcode(msiSudoGroupMemberAdd(*groupName, uuClientFullName, ""));
 
-		# Perform group prefix-dependent actions (e.g. create vaults for intake/research groups).
+		# Perform group prefix-dependent actions (e.g. create vaults for research groups).
 
-		if (*groupName like regex "(intake|research)-.*") {
+		if (*groupName like regex "research-.*") {
 			# Create a corresponding RO group.
 			uuChop(*groupName, *_, *baseName, "-", true);
 			*roGroupName = "read-*baseName";
@@ -502,7 +502,7 @@ uuPostSudoGroupAdd(*groupName, *initialAttr, *initialValue, *initialUnit, *polic
 
 		} else if (*groupName like "datamanager-*") {
 			# Give the newly created datamanager group read access to all
-			# existing intake/research home dirs and vaults in its category.
+			# existing research home dirs and vaults in its category.
 			*category = *policyKv."category";
 
 			foreach (
@@ -513,9 +513,9 @@ uuPostSudoGroupAdd(*groupName, *initialAttr, *initialValue, *initialUnit, *polic
 				  AND  META_USER_ATTR_NAME  = 'category'
 				  AND  META_USER_ATTR_VALUE = '*category'
 			) {
-				# Filter down to intake/research groups and get their vault groups.
+				# Filter down to research groups and get their vault groups.
 				*catGroup = *row."USER_GROUP_NAME";
-				if (*catGroup like regex "(intake|research)-.*") {
+				if (*catGroup like regex "research-.*") {
 
 					*aclKv."forGroup" = *catGroup;
 					msiSudoObjAclSet("recursive", "read", *groupName, "/$rodsZoneClient/home/*catGroup", *aclKv);
@@ -548,7 +548,7 @@ uuPostSudoGroupAdd(*groupName, *initialAttr, *initialValue, *initialUnit, *polic
 
 		# Set group manager-managed group metadata.
 		#
-		# Note: Setting the category of an intake/research group will trigger
+		# Note: Setting the category of an research group will trigger
 		# an ACL change: The datamanager group in the category, if it exists
 		# will get read access to this group an its accompanying vault.
 		# See uuPostSudoObjMetaSet.
diff --git a/uuGroupPolicyChecks.r b/uuGroupPolicyChecks.r
index 54d08ee04..374b84e44 100644
--- a/uuGroupPolicyChecks.r
+++ b/uuGroupPolicyChecks.r
@@ -38,7 +38,7 @@ uuUserNameIsValid(*name)
 #
 # Group names must:
 #
-# - be prefixed with 'intake-' or 'research-' or 'deposit-'
+# - be prefixed with 'research-' or 'deposit-'
 # - contain only lowercase characters, numbers and hyphens
 # - not start or end with a hyphen
 #
@@ -49,7 +49,7 @@ uuUserNameIsValid(*name)
 # \param[in] name
 #
 uuGroupNameIsValid(*name)
-	= *name like regex ``(intake|research|deposit)-([a-z0-9]|[a-z0-9][a-z0-9-]*[a-z0-9])``;
+	= *name like regex ``(research|deposit)-([a-z0-9]|[a-z0-9][a-z0-9-]*[a-z0-9])``;
 
 uuGroupNameIsDatamanager(*name)
 	= *name like regex ``(datamanager)-([a-z0-9]|[a-z0-9][a-z0-9-]*[a-z0-9])``;
@@ -198,7 +198,7 @@ uuGroupPolicyCanGroupAdd(*actor, *groupName, *category, *subcategory, *expiratio
 
 					uuChop(*groupName, *prefix, *base, "-", true);
 
-					# For research and intake groups: Make sure their ro and
+					# For research groups: Make sure their ro and
 					# vault groups do not exist yet.
 					*roName = "read-*base";
 					uuGroupExists(*roName, *roExists);
@@ -274,7 +274,7 @@ uuGroupPolicyCanGroupAdd(*actor, *groupName, *category, *subcategory, *expiratio
 				*reason = "You must have priv-group-add and priv-cat-add to add a datamanger group"
 			}
 		} else {
-			*reason = "Group names must start with one of 'intake-', 'research-', 'deposit-', or 'datamanager-' and may only contain lowercase letters (a-z) and hyphens (-).";
+			*reason = "Group names must start with one of 'research-', 'deposit-', or 'datamanager-' and may only contain lowercase letters (a-z) and hyphens (-).";
 		}
 	} else {
 		*reason = "You cannot create groups because you are not a member of the priv-group-add group.";
diff --git a/uuLock.r b/uuLock.r
deleted file mode 100644
index 2f16e4820..000000000
--- a/uuLock.r
+++ /dev/null
@@ -1,140 +0,0 @@
-# \file      uuLock.r
-# \brief     Locking functions.
-# \author    Ton Smeele
-# \copyright Copyright (c) 2015, Utrecht University. All rights reserved.
-# \license   GPLv3, see LICENSE.
-
-# \brief Obtain a lock on a collection.
-#
-# \param[in] collection  name of the collection to be locked
-# \param[out] status     0 = locked, nonzero  = lock failed (e.g. in use)
-#
-uuLock(*collection, *status) {
-	msiGetIcatTime(*dateTime, "unix");
-	*lockId = $userNameClient ++ ":" ++ *dateTime;
-	# let everyone know we need a lock
-	# NB: a race condition could happen when another process owned by
-	#     the same user requests a lock at the very same second.
-	#     to minimize the risk we include username in the lockid
-	msiString2KeyValPair("uuLockRequest=*lockId",*kvLockRequest);
-	msiAssociateKeyValuePairsToObj(*kvLockRequest, *collection, "-C");
-	# check upstream and on collection itself if lock (request) exists
-	*path = "";
-	*lockFound = false;
-	foreach (*segment in split(*collection, '/')) {
-		*path = "*path/*segment";
-		if (*path != *collection) {
-			uuLockExists(*path, *lockFound);
-			if (*lockFound) {
-				break;
-			}
-		} else {
-			# TODO check collection itself yet ignore our own request
-			foreach (*row in SELECT META_COLL_ATTR_NAME,META_COLL_ATTR_VALUE
-				WHERE COLL_NAME = *collection
-		   	AND META_COLL_ATTR_NAME LIKE "uuLock%"
-				) {
-				msiGetValByKey(*row, "META_COLL_ATTR_NAME", *key);
-				msiGetValByKey(*row, "META_COLL_ATTR_VALUE", *value);
-				if ("*key=*value" != "uuLockRequest=*lockId"){
-					*lockFound = true;
-				}
-			}
-		}
-	}
-	if (!*lockFound) {
-		# also check downstream if other have (requested) a lock
-		# we can check all subcollections in one go
-		foreach (*rows in SELECT META_COLL_ATTR_NAME,COLL_NAME
-					WHERE  COLL_PARENT_NAME LIKE '*collection%'
-					AND META_COLL_ATTR_NAME LIKE 'uuLock%'
-			){
-			# SELECT does not support 'OR' construct, therefore we need to
-			# check and ignore collections that start with similar prefix
-			# yet are in a different tree
-			#    e.g. /zone/home/col/col2  and /zone/home/cola/col2
-			#         both cases col2 appears to have parent "col%"
-			msiGetValByKey(*rows, "COLL_NAME", *thisCollection);
-			if (*thisCollection like "*collection/\*") {
-				# we have an existing lock
-				*lockFound = true;
-				break;
-			}
-		}
-	}
-	if (*lockFound) {
-		*status = 1;
-		# retract our lock request, someone else got a lock
-		msiRemoveKeyValuePairsFromObj(*kvLockRequest, *collection, "-C");
-	} else {
-		# change our request into a real lock
-		msiString2KeyValPair("uuLocked=*lockId",*kvLock);
-		msiAssociateKeyValuePairsToObj(*kvLock, *collection, "-C");
-		msiRemoveKeyValuePairsFromObj(*kvLockRequest, *collection, "-C");
-		*status = 0;
-	}
-}
-
-#
-# \brief  uuUnlock   unlocks a collection
-#
-# \param[in] collection  name of the collection to unlock
-uuUnlock(*collection) {
-	# NB: always succeeds regardless if lock actually exists
-	foreach (*rows in SELECT META_COLL_ATTR_VALUE
-				WHERE COLL_NAME = '*collection'
-				AND META_COLL_ATTR_NAME = 'uuLocked'
-		){
-		# should return max 1 row, otherwise we have multiple locks??
-		msiGetValByKey(*rows,"META_COLL_ATTR_VALUE",*lockValue);
-		msiString2KeyValPair("uuLocked=*lockValue",*kvLocked);
-		msiRemoveKeyValuePairsFromObj(*kvLocked, *collection, "-C")
-	}
-}
-
-# \brief See if a collection has a lock on it.
-#
-# \param[in] collection  name of the collection
-# \param[out] isLocked     true if collection has a lock(request)
-#
-uuLockExists(*collection, *isLocked) {
-	# NB: reports true for both existing locks and lock requests
-	*isLocked = false;
-	msiGetIcatTime(*currentTime, "unix");
-	foreach (*row in SELECT META_COLL_ATTR_NAME,META_COLL_ATTR_VALUE
-			WHERE COLL_NAME = *collection
-		   AND META_COLL_ATTR_NAME LIKE "uuLock%"
-		) {
-		# rows found means there is an existing lock (request)
-		# our last hope is that this is an expired request that we can ignore
-		msiGetValByKey(*row,"META_COLL_ATTR_NAME",*lockKey);
-		msiGetValByKey(*row,"META_COLL_ATTR_VALUE",*lockValue);
-		*lockTime = double(uuLockGetDateTime(*lockValue));
-		if (
-			    ((*lockTime + 7 * 86400 ) < *currentTime)
-					#	remove locks/requests after expire time of 1 week
-				 	#			 && (*lockKey == "lockRequest")
-			) {
-			# cleanup lock requests older than 5 minutes
-		   msiString2KeyValPair("*lockKey=*lockValue",*kvExpiredLock);
-		   msiRemoveKeyValuePairsFromObj(*kvExpiredLock, *collection, "-C");
-		} else {
-			# there is a valid existing lock
-			*isLocked = true;
-		}
-	}
-}
-
-# \brief Function to get the username part of a lock.
-#
-# \param[in] lock  name of the lock
-# \return username
-#
-uuLockGetUser(*lock) = substr(*lock, 0, strlen(*lock) - strlen(triml(*lock,":")) -1);
-
-# \brief Function to get the datestamp part of a lock.
-#
-# \param[in] lock  name of the lock
-# \return datetimestamp (in seconds since epoch)
-#
-uuLockGetDateTime(*lock) = triml(*lock,":");
diff --git a/yc2Vault.r b/yc2Vault.r
deleted file mode 100644
index ce9331e6a..000000000
--- a/yc2Vault.r
+++ /dev/null
@@ -1,388 +0,0 @@
-# \file
-# \brief move selected datasets from intake area to the vault area
-#        this rule is to be executed by a background process with write access to vault
-#			and read access to the intake area
-# \author Ton Smeele
-# \copyright Copyright (c) 2015, Utrecht university. All rights reserved
-# \license GPLv3, see LICENSE
-#
-#test {
-#	*intakeRoot = '/nluu1ot/home/grp-intake-youth';
-#	*vaultRoot = '/nluu1ot/home/grp-vault-youth';
-#	uuYc2Vault(*intakeRoot, *vaultRoot, *status);
-#	writeLine("serverLog","result status of yc2Vault is *status");
-#}
-
-
-# \brief
-#
-# \param[in] path  pathname of the tree-item
-# \param[in] name  segment of path, name of collection or data object
-# \param[in] isCol  true if the object is a collection, otherwise false
-# \param[in,out] buffer
-#
-#uuTreeMyRule(*parent, *objectName, *isCol, *buffer) {
-#	writeLine("serverLog","parent      = *parent");
-#	writeLine("serverLog","name        = *objectName");
-#	writeLine("serverLog","isCol       = *isCol");
-#	writeLine("serverLog","buffer[path]= " ++ *buffer."path");
-#	if (*isCol) {
-#	   *buffer."path" = *buffer."path"++"=";
-#	}
-#}
-
-
-
-
-uuYcVaultDatasetGetPath(*vaultRoot, *datasetId, *datasetPath) {
-	uuYcDatasetParseId(*datasetId, *datasetComponents);
-	*wave = *datasetComponents."wave";
-	*experimentType = *datasetComponents."experiment_type";
-	*pseudocode = *datasetComponents."pseudocode";
-	*version = *datasetComponents."version";
-	*sep = "_";
-	*wepv = *wave ++ *sep ++ *experimentType ++ *sep ++ *pseudocode ++ *sep ++ "ver*version";
-   *datasetPath = "*vaultRoot/*wave/*experimentType/*pseudocode/*wepv";
-}
-
-uuYcVaultDatasetExists(*vaultRoot, *datasetId, *exists) {
-	*exists = false;
-	uuYcVaultDatasetGetPath(*vaultRoot, *datasetId, *datasetPath);
-	foreach (*row in SELECT COLL_NAME WHERE COLL_NAME = '*datasetPath') {
-		*exists = true;
-		break;
-	}
-}
-
-
-uuYcVaultDatasetAddMeta(*vaultPath, *datasetId) {
-	uuYcDatasetParseId(*datasetId, *datasetComponents);
-	*wave = *datasetComponents."wave";
-	*experimentType = *datasetComponents."experiment_type";
-	*pseudocode = *datasetComponents."pseudocode";
-	*version = *datasetComponents."version";
-	msiGetIcatTime(*date, "unix");
-	msiAddKeyVal(*kv, "wave", *wave);
-	msiAddKeyVal(*kv, "experiment_type", *experimentType);
-	msiAddKeyVal(*kv, "pseudocode", *pseudocode);
-	msiAddKeyVal(*kv, "version", *version);
-	msiAddKeyVal(*kv, "dataset_date_created", *date);
-	msiAssociateKeyValuePairsToObj(*kv, *vaultPath, "-C");
-}
-
-uuYcVaultWalkRemoveObject(*itemParent, *itemName, *itemIsCollection, *buffer, *status) {
-#	writeLine("serverLog", "...removing *itemParent/*itemName");
-	if (*itemIsCollection) {
-		msiRmColl("*itemParent/*itemName", "forceFlag=", *status);
-	} else {
-		msiDataObjUnlink("objPath=*itemParent/*itemName++++forceFlag=", *status);
-	}
-}
-
-
-uuYcVaultIngestObject(*objectPath, *isCollection, *vaultPath, *status) {
-	# from the original object only the below list '*copiedMetadata' of metadata keys
-	# is copied to the vault object, other info is ignored
-	*copiedMetadata = list("wave", "experiment_type", "pseudocode", "version",
-									 "error", "warning", "comment", "dataset_error",
-									 "dataset_warning", "datasetid");
-	*status = 0;
-	if (*isCollection) {
-		msiCollCreate(*vaultPath, "1", *status);
-		if (*status == 0) {
-			foreach (*row in SELECT META_COLL_ATTR_NAME, META_COLL_ATTR_VALUE
-								WHERE COLL_NAME = '*objectPath'
-			) {
-				msiGetValByKey(*row, "META_COLL_ATTR_NAME", *key);
-				msiGetValByKey(*row, "META_COLL_ATTR_VALUE", *value);
-				msiString2KeyValPair("*key=*value",*kv);
-				# add relevant kvlist to vault collection object
-				foreach (*meta in *copiedMetadata) {
-					if (*key == *meta) {
-						msiAssociateKeyValuePairsToObj(*kv, *vaultPath, "-C");
-					}
-				}
-			}
-			foreach (*row in SELECT COLL_OWNER_NAME, COLL_OWNER_ZONE, COLL_CREATE_TIME
-								WHERE COLL_NAME = '*objectPath'
-			) {
-				msiGetValByKey(*row, "COLL_OWNER_NAME", *ownerName);
-				msiGetValByKey(*row, "COLL_OWNER_ZONE", *ownerZone);
-				msiGetValByKey(*row, "COLL_CREATE_TIME", *createTime);
-				msiString2KeyValPair("submitted_by=*ownerName#*ownerZone",*kvSubmittedBy);
-				msiString2KeyValPair("submitted_date=*createTime",*kvSubmittedDate);
-				msiAssociateKeyValuePairsToObj(*kvSubmittedBy, *vaultPath, "-C");
-				msiAssociateKeyValuePairsToObj(*kvSubmittedDate, *vaultPath, "-C");
-			}
-		}
-	} else {   # its not a collection but a data object
-		# first chksum the original file, then use it to verify the vault copy
-		msiDataObjChksum(*objectPath, "forceChksum=", *checksum);
-		msiDataObjCopy(*objectPath, *vaultPath, "verifyChksum=", *status);
-		if (*status == 0) {
-			uuChopPath(*objectPath, *collection, *dataName);
-			foreach (*row in SELECT META_DATA_ATTR_NAME, META_DATA_ATTR_VALUE
-								      WHERE COLL_NAME = '*collection'
-								        AND DATA_NAME = '*dataName'
-			) {
-				msiGetValByKey(*row, "META_DATA_ATTR_NAME", *key);
-				msiGetValByKey(*row, "META_DATA_ATTR_VALUE", *value);
-				# add relevant kvlist to vault collection object
-				msiString2KeyValPair("*key=*value",*kv);
-				foreach (*meta in *copiedMetadata) {
-					if (*key == *meta) {
-						msiAssociateKeyValuePairsToObj(*kv, *vaultPath, "-d");
-					}
-				}
-			}
-			# add metadata found in system info
-			foreach (*row in SELECT DATA_OWNER_NAME, DATA_OWNER_ZONE, DATA_CREATE_TIME
-				                  WHERE COLL_NAME = '*collection'
-				                    AND DATA_NAME = '*dataName'
-			) {
-				msiGetValByKey(*row, "DATA_OWNER_NAME", *ownerName);
-				msiGetValByKey(*row, "DATA_OWNER_ZONE", *ownerZone);
-				msiGetValByKey(*row, "DATA_CREATE_TIME", *createTime);
-				msiString2KeyValPair("submitted_by=*ownerName#*ownerZone",*kvSubmittedBy);
-				msiString2KeyValPair("submitted_date=*createTime",*kvSubmittedDate);
-				msiAssociateKeyValuePairsToObj(*kvSubmittedBy, *vaultPath, "-d");
-				msiAssociateKeyValuePairsToObj(*kvSubmittedDate, *vaultPath, "-d");
-				# Skip duplicas
-				break;
-			}
-		}
-	}
-}
-
-
-
-uuYcVaultWalkIngestObject(*itemParent, *itemName, *itemIsCollection, *buffer, *status) {
-	*sourcePath = "*itemParent/*itemName";
-	*destPath = *buffer."destination"; # top level destination is specified
-	if (*sourcePath != *buffer."source") {
-		# rewrite path to copy objects that are located underneath the toplevel collection
-		*sourceLength = strlen(*sourcePath);
-		*relativePath = substr(*sourcePath, strlen(*buffer."source") + 1, *sourceLength);
-		*destPath = *buffer."destination" ++ "/" ++ *relativePath;
-	}
-#	writeLine("serverLog","VLT from = *sourcePath");
-#	writeLine("serverLog","VLT to   = *destPath");
-	uuYcVaultIngestObject(*sourcePath, *itemIsCollection, *destPath, *status);
-}
-
-
-uuYcDatasetCollectionMove2Vault(*intakeRoot,*topLevelCollection, *datasetId, *vaultRoot, *status) {
-	writeLine("serverLog","\nmoving dataset-typeA *datasetId from *topLevelCollection to vault");
-	*status = 0;
-	uuYcVaultDatasetExists(*vaultRoot, *datasetId, *exists);
-	if (!*exists) {
-		uuYcVaultDatasetGetPath(*vaultRoot, *datasetId, *vaultPath);
-		# create the in-between levels of the path to the toplevel collection
-		uuChopPath(*vaultPath, *vaultParent, *vaultCollection);
-		msiCollCreate(*vaultParent, "1", *status);
-#		writeLine("serverLog","VAULT: dataset created *datasetId status=*status path=*vaultPath");
-		if (*status == 0) {
-			# copy the dataset tree to the vault
-			uuChopPath(*topLevelCollection, *intakeParent, *intakeCollection);
-			*buffer."source" = *topLevelCollection;
-			*buffer."destination" = *vaultPath;
-#			writeLine("serverLog","VAULT: source = *topLevelCollection");
-#			writeLine("serverLog","VAULT: dest   = *vaultPath");
-			uuTreeWalk(
-				"forward",
-				*topLevelCollection,
-				"uuYcVaultWalkIngestObject",
-				*buffer,
-				*status
-				);
-                       uuKvClear(*buffer);
-			if (*status == 0) {
-				# stamp the vault dataset collection with additional metadata
-				msiGetIcatTime(*date, "unix");
-				msiAddKeyVal(*kv, "dataset_date_created", *date);
-				msiAssociateKeyValuePairsToObj(*kv, *vaultPath, "-C");
-				# and finally remove the dataset original in the intake area
-				msiRmColl(*topLevelCollection, "forceFlag=", *error);
-#				uuTreeWalk(
-#					"reverse",
-#					*topLevelCollection,
-#					"uuYcVaultWalkRemoveObject",
-#					*buffer,
-#					*error
-#					);
-				if (*error != 0) {
-					writeLine("serverLog",
-						"ERROR: unable to remove intake collection *topLevelCollection");
-				}
-			} else {
-				# move failed (partially), cleanup vault
-				# NB: keep the dataset in the vault queue so we can retry some other time
-				writeLine("serverLog","ERROR: Ingest failed for *datasetId error = *status");
-				uuTreeWalk("reverse", *vaultPath, "uuYcVaultWalkRemoveObject", *buffer, *error);
-			}
-
-		}
-	} else {
-		writeLine("serverLog","INFO: version already exists in vault: *datasetId");
-		# duplicate dataset, signal error and throw out of vault queue
-		*message = "Duplicate dataset, version already exists in vault";
-		uuYcDatasetErrorAdd(*intakeRoot, *datasetId,*message);
-		uuYcDatasetMelt(*topLevelCollection, *datasetId, *status);
-		uuYcDatasetUnlock(*topLevelCollection, *datasetId, *status);
-		*status = 1; # duplicate dataset version error
-	}
-}
-
-uuYcDatasetObjectsOnlyMove2Vault(*intakeRoot, *topLevelCollection, *datasetId, *vaultRoot, *status) {
-	writeLine("serverLog","\nmoving dataset-typeB *datasetId from *topLevelCollection to vault");
-	uuYcVaultDatasetExists(*vaultRoot, *datasetId, *exists);
-	if (!*exists) {
-		# new dataset(version) we can safely ingest into vault
-		uuYcVaultDatasetGetPath(*vaultRoot, *datasetId, *vaultPath);
-		# create path to and including the toplevel collection (will create in-between levels)
-		msiCollCreate(*vaultPath, "1", *status);
-#		writeLine("serverLog","VAULT: dataset created *datasetId status=*status path=*vaultPath");
-		if (*status == 0) {
-			# stamp the vault dataset collection with default metadata
-			uuYcVaultDatasetAddMeta(*vaultPath, *datasetId);
-			# copy data objects to the vault
-			foreach (*dataRow in SELECT DATA_NAME
-						WHERE COLL_NAME = '*topLevelCollection'
-						  AND META_DATA_ATTR_NAME = 'dataset_toplevel'
-						  AND META_DATA_ATTR_VALUE = '*datasetId'
-				) {
-				msiGetValByKey(*dataRow, "DATA_NAME", *dataName);
-				*intakePath = "*topLevelCollection/*dataName";
-				uuYcVaultIngestObject(*intakePath, false, "*vaultPath/*dataName", *status);
-				if (*status != 0) {
-					break;
-				}
-			}
-			if (*status == 0) {
-				# data ingested, what's left is to delete the original in intake area
-				# this will also melt/unfreeze etc because metadata is removed too
-				foreach (*dataRow in SELECT DATA_NAME
-						WHERE COLL_NAME = '*topLevelCollection'
-						  AND META_DATA_ATTR_NAME = 'dataset_toplevel'
-						  AND META_DATA_ATTR_VALUE = '*datasetId'
-				) {
-					msiGetValByKey(*dataRow, "DATA_NAME", *dataName);
-					*intakePath = "*topLevelCollection/*dataName";
-#					writeLine("serverLog","removing intake file: *intakePath");
-					msiDataObjUnlink("objPath=*intakePath++++forceFlag=", *error);
-					if (*error != 0) {
-						writeLine("serverLog","ERROR: unable to remove intake object *intakePath");
-					}
-				}
-			} else {
-				# error occurred during ingest, cleanup vault area and relay the error to user
-				# NB: keep the dataset in the vault queue so we can retry some other time
-				writeLine("serverLog","ERROR: Ingest failed for *datasetId error = *status");
-				*buffer = "required yet dummy parameter";
-				uuTreeWalk("reverse", *vaultPath, "uuYcVaultWalkRemoveObject", *buffer, *error);
-			}
-		}
-	} else {
-		# duplicate dataset, signal error and throw out of vault queue
-		writeLine("serverLog","INFO: version already exists in vault: *datasetId");
-		*message = "Duplicate dataset, version already exists in vault";
-		uuYcDatasetErrorAdd(*intakeRoot, *datasetId,*message);
-		uuYcDatasetMelt(*topLevelCollection, *datasetId, *status);
-		uuYcDatasetUnlock(*topLevelCollection, *datasetId, *status);
-		*status = 1; # duplicate dataset version error
-	}
-}
-
-
-
-# \brief move all locked datasets to the vault
-#
-# \param[in]  intakeCollection  pathname root of intake area
-# \param[in]  vaultCollection   pathname root of vault area
-# \param[out] status            result of operation either "ok" or "error"
-#
-uuYc2Vault(*intakeRoot, *vaultRoot, *status) {
-	# 1. add to_vault_freeze metadata lock to the dataset
-	# 2. check that dataset does not yet exist in the vault
-	# 3. copy dataset to vault with its metadata
-	# 4. remove dataset from intake
-	# upon any error:
-	# - delete partial data from vault
-	# - add error to intake dataset metadata
-	# - remove locks on intake dataset (to_vault_freeze, to_vault_lock)
-	*status = 0; # 0 is success, nonzero is error
-	*datasets_moved = 0;
-
-	# note that we have to allow for multiple types of datasets:
-	#    type A: a single toplevel collection with a tree underneath
-	#    type B: one or more data files located within the same collection
-	# processing varies slightly between them, so process each type in turn
-	#
-	# TYPE A:
-	foreach (*row in SELECT COLL_NAME, META_COLL_ATTR_VALUE
-				WHERE META_COLL_ATTR_NAME = 'dataset_toplevel'
-				  AND COLL_NAME like '*intakeRoot/%') {
-		msiGetValByKey(*row, "COLL_NAME", *topLevelCollection);
-		msiGetValByKey(*row, "META_COLL_ATTR_VALUE", *datasetId);
-		uuYcObjectIsLocked(*topLevelCollection, true, *locked, *frozen);
-		if (*locked) {
-			uuYcDatasetFreeze(*topLevelCollection, *datasetId, *status);
-			if (*status == 0) {
-				# dataset frozen; now move to vault and remove from intake area
-				uuYcDatasetCollectionMove2Vault(
-						*intakeRoot,
-						*topLevelCollection,
-						*datasetId,
-						*vaultRoot,
-						*status
-						);
-				if (*status == 0) {
-					*datasets_moved = *datasets_moved + 1;
-				}
-			}
-		}
-	}
-	# TYPE B:
-	foreach (*row in SELECT COLL_NAME, META_DATA_ATTR_VALUE
-				WHERE META_DATA_ATTR_NAME = 'dataset_toplevel'
-				  AND COLL_NAME like '*intakeRoot%'
-# fixme: skip collnames that are not in the same tree yet share the prefix
-				) {
-
-		msiGetValByKey(*row, "COLL_NAME", *topLevelCollection);
-		msiGetValByKey(*row, "META_DATA_ATTR_VALUE", *datasetId);
-		# check if to_vault_lock exists on all the dataobjects of this dataset
-		*allLocked = true;
-		foreach (*dataRow in SELECT DATA_NAME
-						WHERE COLL_NAME = '*topLevelCollection'
-						  AND META_DATA_ATTR_NAME = 'dataset_toplevel'
-						  AND META_DATA_ATTR_VALUE = '*datasetId'
-			) {
-			msiGetValByKey(*dataRow, "DATA_NAME", *dataName);
-			uuYcObjectIsLocked("*topLevelCollection/*dataName", false, *locked, *frozen);
-			*allLocked = *allLocked && *locked;
-		}
-		if (*allLocked) {
-			uuYcDatasetFreeze(*topLevelCollection, *datasetId, *status);
-			if (*status == 0) {
-				# dataset frozen, now move to fault and remove from intake area
-				uuYcDatasetObjectsOnlyMove2Vault(
-					*intakeRoot,
-					*topLevelCollection,
-					*datasetId,
-					*vaultRoot,
-					*status
-					);
-				if (*status == 0) {
-					*datasets_moved = *datasets_moved + 1;
-				}
-			}
-		}
-	}
-	if (*datasets_moved > 0) {
-		writeLine("serverLog","\nmoved in total *datasets_moved dataset(s) to the vault");
-	}
-}
-
-#input null
-#output ruleExecOut
diff --git a/ycDataset.r b/ycDataset.r
deleted file mode 100644
index 4cb09dab7..000000000
--- a/ycDataset.r
+++ /dev/null
@@ -1,175 +0,0 @@
-# \file
-# \brief     Youth Cohort - Dataset related functions.
-# \author    Chris Smeele
-# \copyright Copyright (c) 2015, Utrecht University. All rights reserved.
-# \license   GPLv3, see LICENSE
-
-# \brief Generate a dataset identifier based on WEPV values.
-#
-# \param[in]  idComponents a kvList containing WEPV values
-# \param[out] id a dataset id string
-#
-uuYcDatasetMakeId(*idComponents, *id){
-	*id =
-		           *idComponents."wave"
-		++ "\t" ++ *idComponents."experiment_type"
-		++ "\t" ++ *idComponents."pseudocode"
-		++ "\t" ++ *idComponents."version"
-		++ "\t" ++ *idComponents."directory";
-}
-
-# \brief Parse a dataset identifier and return WEPV values.
-#
-# \param[in]  id a dataset id string
-# \param[out] idComponents a kvList containing WEPV values
-#
-uuYcDatasetParseId(*id, *idComponents){
-	*idParts = split(*id, "\t");
-	*idComponents."wave"            = elem(*idParts, 0);
-	*idComponents."experiment_type" = elem(*idParts, 1);
-	*idComponents."pseudocode"      = elem(*idParts, 2);
-	*idComponents."version"         = elem(*idParts, 3);
-	*idComponents."directory"       = elem(*idParts, 4);
-}
-
-# \brief Find dataset ids under *root.
-#
-# \param[in]  root
-# \param[out] ids  a list of dataset ids
-#
-uuYcDatasetGetIds(*root, *ids) {
-	*idsString = "";
-	foreach (*item in SELECT META_DATA_ATTR_VALUE WHERE COLL_NAME = "*root" AND META_DATA_ATTR_NAME = 'dataset_id') {
-		# Datasets directly under *root need to be checked for separately due to limitations on the general query system.
-		if (strlen(*idsString) > 0) {
-			*idsString = *idsString ++ "\n";
-		}
-		*idsString = *idsString ++ *item."META_DATA_ATTR_VALUE";
-	}
-	foreach (*item in SELECT META_DATA_ATTR_VALUE WHERE COLL_NAME LIKE "*root/%" AND META_DATA_ATTR_NAME = 'dataset_id') {
-		if (strlen(*idsString) > 0) {
-			*idsString = *idsString ++ "\n";
-		}
-		*idsString = *idsString ++ *item."META_DATA_ATTR_VALUE";
-	}
-	*ids = split(*idsString, "\n");
-}
-
-# \brief Get a list of toplevel objects that belong to the given dataset id.
-#
-# \param[in]  root
-# \param[in]  id
-# \param[out] objects      a list of toplevel object paths
-# \param[out] isCollection whether this dataset consists of a single toplevel collection
-#
-uuYcDatasetGetToplevelObjects(*root, *id, *objects, *isCollection) {
-	*isCollection = false;
-
-	*objectsString = "";
-	foreach (*item in SELECT COLL_NAME WHERE COLL_NAME LIKE "*root/%" AND META_COLL_ATTR_NAME = 'dataset_toplevel' AND META_COLL_ATTR_VALUE = "*id") {
-		*isCollection = true;
-		*objectsString = *item."COLL_NAME";
-	}
-	if (!*isCollection) {
-		foreach (*item in SELECT DATA_NAME, COLL_NAME WHERE COLL_NAME = "*root" AND META_DATA_ATTR_NAME = 'dataset_toplevel' AND META_DATA_ATTR_VALUE = "*id") {
-			# Datasets directly under *root need to be checked for separately due to limitations on the general query system.
-			if (strlen(*objectsString) > 0) {
-				*objectsString = *objectsString ++ "\n";
-			}
-			*objectsString = *objectsString ++ *item."COLL_NAME" ++ "/" ++ *item."DATA_NAME";
-		}
-		foreach (*item in SELECT DATA_NAME, COLL_NAME WHERE COLL_NAME LIKE "*root/%" AND META_DATA_ATTR_NAME = 'dataset_toplevel' AND META_DATA_ATTR_VALUE = "*id") {
-			if (strlen(*objectsString) > 0) {
-				*objectsString = *objectsString ++ "\n";
-			}
-			*objectsString = *objectsString ++ *item."COLL_NAME" ++ "/" ++ *item."DATA_NAME";
-		}
-	}
-	*objects = split(*objectsString, "\n");
-	#writeLine("stdout", "Got dataset toplevel objects for <*id>: *objectsString");
-}
-
-# \brief Get a list of relative paths to all data objects in a dataset.
-#
-# \param[in]  root
-# \param[in]  id
-# \param[out] objects a list of relative object paths (e.g. file1.dat, some-subdir/file2.dat...)
-#
-uuYcDatasetGetDataObjectRelPaths(*root, *id, *objects) {
-
-	uuYcDatasetGetToplevelObjects(*root, *id, *toplevelObjects, *isCollection);
-
-	# NOTE: This will crash when an invalid dataset id is provided.
-	if (*isCollection) {
-		*parentCollection = elem(*toplevelObjects, 0);
-	} else {
-		uuChopPath(elem(*toplevelObjects, 0), *dataObjectParent, *dataObjectName);
-		*parentCollection = *dataObjectParent;
-	}
-
-	*objectsString = "";
-	foreach (*item in SELECT DATA_NAME, COLL_NAME WHERE COLL_NAME = "*parentCollection" AND META_DATA_ATTR_NAME = 'dataset_id' AND META_DATA_ATTR_VALUE = "*id") {
-		# Datasets directly under *root need to be checked for separately due to limitations on the general query system.
-		if (strlen(*objectsString) > 0) {
-			*objectsString = *objectsString ++ "\n";
-		}
-		*objectsString = *objectsString ++ *item."DATA_NAME";
-	}
-	foreach (*item in SELECT DATA_NAME, COLL_NAME WHERE COLL_NAME LIKE "*parentCollection/%" AND META_DATA_ATTR_NAME = 'dataset_id' AND META_DATA_ATTR_VALUE = "*id") {
-		if (strlen(*objectsString) > 0) {
-			*objectsString = *objectsString ++ "\n";
-		}
-		*objectsString = *objectsString
-			++ substr(*item."COLL_NAME", strlen(*parentCollection)+1, strlen(*item."COLL_NAME"))
-			++ "/"
-			++ *item."DATA_NAME";
-	}
-	*objects = split(*objectsString, "\n");
-}
-
-# \brief Check if a dataset id is locked.
-#
-# \param[in]  root
-# \param[in]  id
-# \param[out] isLocked
-# \param[out] isFrozen
-#
-uuYcDatasetIsLocked(*root, *id, *isLocked, *isFrozen) {
-	uuYcDatasetGetToplevelObjects(*root, *id, *toplevelObjects, *isCollection);
-
-	*isLocked = false;
-	*isFrozen = false;
-	foreach (*item in *toplevelObjects) {
-		uuYcObjectIsLocked(*item, *isCollection, *isLocked, *isFrozen);
-		if (*isLocked || *isFrozen) {
-			break;
-		}
-	}
-}
-
-
-# \brief Adds an error to the dataset specified by *datasetId.
-#
-# \param[in] root
-# \param[in] datasetId
-# \param[in] message
-#
-uuYcDatasetErrorAdd(*root, *datasetId, *message) {
-
-	uuYcDatasetGetToplevelObjects(*root, *datasetId, *toplevelObjects, *isCollection);
-
-	foreach (*toplevel in *toplevelObjects) {
-		msiAddKeyVal(*kv, "dataset_error", "*message");
-		# note that we want to silently ignore any duplicates of the message (using errorcode)
-		errorcode(msiAssociateKeyValuePairsToObj(*kv, *toplevel, if *isCollection then "-C" else "-d"));
-
-		# This does not work for some reason.
-		#uuSetMetaData(
-		#	*toplevel,
-		#	"comment",
-		#	*comment,
-		#	if *isCollection then "-C" else "-d"
-		#);
-	}
-}
-
diff --git a/ycDatasetGetToplevel.r b/ycDatasetGetToplevel.r
deleted file mode 100644
index 54817e1fe..000000000
--- a/ycDatasetGetToplevel.r
+++ /dev/null
@@ -1,76 +0,0 @@
-# \file
-# \brief dataset lookup function
-# \author Ton Smeele
-# \copyright Copyright (c) 2015, Utrecht university. All rights reserved
-# \license GPLv3, see LICENSE
-#
-
-#test {
-#	uuYcDatasetGetTopLevel("/tsm/home/rods", "x", *collection, *isCol);
-#	writeLine("stdout","coll = *collection  and isCol = *isCol");
-#}
-
-
-# \brief uuYcDatasetGetTopLevel  retrieves the collection path and dataset type for a dataset
-#
-# \param[in]   rootcollection       path of a tree to search for the dataset
-# \param[in]	datasetid            unique identifier of the dataset
-# \param[out]  topLevelCollection   collection that has the dataset
-#                                   if dataset is not found an empty string is returned
-# \param[out]  topLevelIsCollection type of dataset: true = collection false = data objects
-#
-uuYcDatasetGetTopLevel(*rootCollection, *datasetId, *topLevelCollection, *topLevelIsCollection) {
-	# datasets can be
-	#  A) one collection with a subtree
-	#  B) one or more data objects located (possibly with other objects) in same collection
-	*topLevelIsCollection = false;
-	*topLevelCollection = "";
-	# try to find a collection. note we will expect 0 or 1 rows:
-	foreach (*row in SELECT COLL_NAME
-					WHERE META_COLL_ATTR_NAME = 'dataset_toplevel'
-					  AND META_COLL_ATTR_VALUE = '*datasetId'
-					  AND COLL_NAME LIKE '*rootCollection/%'
-				) {
-		*topLevelIsCollection = true;
-		msiGetValByKey(*row, "COLL_NAME", *topLevelCollection);
-	}
-	if (! *topLevelIsCollection) {
-		# also try the root itself
-		foreach (*row in SELECT COLL_NAME
-						WHERE META_COLL_ATTR_NAME = 'dataset_toplevel'
-						  AND META_COLL_ATTR_VALUE = '*datasetId'
-						  AND COLL_NAME = '*rootCollection'
-					) {
-			*topLevelIsCollection = true;
-			msiGetValByKey(*row, "COLL_NAME", *topLevelCollection);
-		}
-	}
-	if (! *topLevelIsCollection) {
-		# apparently not a collection, let's search for data objects instead
-		foreach (*row in SELECT COLL_NAME,DATA_NAME
-					WHERE META_DATA_ATTR_NAME = 'dataset_toplevel'
-					  AND META_DATA_ATTR_VALUE = '*datasetId'
-					  AND COLL_NAME LIKE '*rootCollection/%'
-				) {
-			msiGetValByKey(*row, "COLL_NAME", *topLevelCollection);
-			break;
-		}
-		if (*topLevelCollection == "") {
-			# not found yet, maybe data object(s) in the rootcollection itself?
-
-			foreach (*row in SELECT COLL_NAME,DATA_NAME
-						WHERE META_DATA_ATTR_NAME = 'dataset_toplevel'
-						  AND META_DATA_ATTR_VALUE = '*datasetId'
-						  AND COLL_NAME = '*rootCollection'
-					) {
-				msiGetValByKey(*row, "COLL_NAME", *topLevelCollection);
-				break;
-			}
-		} else {
-			#  dataset not found!
-		}
-	}
-}
-
-#input null
-#output ruleExecOut
diff --git a/ycDatasetLock.r b/ycDatasetLock.r
deleted file mode 100644
index f0497d59b..000000000
--- a/ycDatasetLock.r
+++ /dev/null
@@ -1,253 +0,0 @@
-# \file
-# \brief lock/freeze and unlock/unfreeze datasets within a collection
-# \author Ton Smeele
-# \copyright Copyright (c) 2015, Utrecht university. All rights reserved
-# \license GPLv3, see LICENSE
-#
-
-#test {
-#*collection = "/nluu1ot/home/ton";
-#*datasetId = "y";
-#uuYcDatasetLock(*collection, *datasetId, *result);
-#writeLine("stdout","lock result = *result");
-#uuYcDatasetFreeze(*collection, *datasetId, *result);
-#writeLine("stdout","freeze result = *result");
-#uuYcObjectIsLocked("*collection/Newfile.txt",false, *locked, *frozen);
-#writeLine("stdout","locked = *locked  and frozen = *frozen");
-
-#uuYcDatasetUnlock(*collection, *datasetId, *result);
-#writeLine("stdout","unlock result = *result");
-#uuYcDatasetMelt(*collection, *datasetId, *result);
-#writeLine("stdout","melt result = *result");
-#uuYcDatasetUnlock(*collection, *datasetId, *result);
-#writeLine("stdout","unlock result = *result");
-#}
-
-uuYcDatasetLockChangeObject(*parentCollection, *objectName, *isCollection,
-						 *lockName, *lockIt, *dateTime,*result) {
-	*objectType = "-d";
-	*path = "*parentCollection/*objectName";
-	if (*isCollection) {
-		*objectType = "-C";
-		*collection = *objectName;
-	}
-	if (*lockIt) {
-		msiString2KeyValPair("*lockName=*dateTime",*kvPair);
-		*result = errorcode(msiSetKeyValuePairsToObj(*kvPair, *path, *objectType));
-	} else {  # unlock it
-		#
-		# if the lock is of type to_vault_lock this operation is
-		# disallowed if the object also has a to_vault_freeze lock
-		uuYcObjectIsLocked(*path,*isCollection,*locked,*frozen);
-		*allowed = (*lockName == "to_vault_freeze") || !*frozen;
-		if (*allowed) {
-			*result = 0;
-			# in order to remove the key we need to lookup its value(s)
-			if (*isCollection) {
-				# remove lock from collection
-				foreach (*row in SELECT META_COLL_ATTR_VALUE
-									WHERE COLL_NAME = '*path'
-									  AND META_COLL_ATTR_NAME = '*lockName') {
-					msiGetValByKey(*row, "META_COLL_ATTR_VALUE", *value);
-					msiString2KeyValPair("*lockName=*value", *kvPair);
-					*result = errorcode(
-								msiRemoveKeyValuePairsFromObj(*kvPair, *path, "-C")
-								);
-					if (*result != 0) {
-						break;
-					}
-				}
-			} else {
-				# remove lock from data object
-				foreach (*row in SELECT META_DATA_ATTR_VALUE
-								WHERE DATA_NAME = '*objectName'
-								  AND COLL_NAME = '*parentCollection'
-								  AND META_DATA_ATTR_NAME = '*lockName'
-					) {
-					msiGetValByKey(*row,"META_DATA_ATTR_VALUE",*value);
-					msiString2KeyValPair("*lockName=*value",*kvPair);
-					*result = errorcode(
-								msiRemoveKeyValuePairsFromObj(
-										*kvPair,
-										"*parentCollection/*objectName",
-										"-d"
-									)
-								);
-					if (*result != 0) {
-						break;
-					}
-				}
-			} # end else remove lock from dataobject
-		} else { # unlock not allowed
-			*result = -1;
-		}
-	}
-}
-
-uuYcDatasetWalkVaultLock(*itemCollection, *itemName, *itemIsCollection, *buffer, *error) {
-	msiGetIcatTime(*dateTime,"unix");
-	uuYcDatasetLockChangeObject(*itemCollection, *itemName, *itemIsCollection,
-						 "to_vault_lock", true, *dateTime, *error);
-}
-
-uuYcDatasetWalkVaultUnlock(*itemCollection, *itemName, *itemIsCollection, *buffer, *error) {
-	msiGetIcatTime(*dateTime,"unix");
-	uuYcDatasetLockChangeObject(*itemCollection, *itemName, *itemIsCollection,
-						 "to_vault_lock", false, *dateTime, *error);
-}
-
-uuYcDatasetWalkFreezeLock(*itemCollection, *itemName, *itemIsCollection, *buffer, *error) {
-	msiGetIcatTime(*dateTime,"unix");
-	uuYcDatasetLockChangeObject(*itemCollection, *itemName, *itemIsCollection,
-						 "to_vault_freeze", true, *dateTime, *error);
-}
-
-
-uuYcDatasetWalkFreezeUnlock(*itemCollection, *itemName, *itemIsCollection, *buffer, *error) {
-	msiGetIcatTime(*dateTime,"unix");
-	uuYcDatasetLockChangeObject(*itemCollection, *itemName, *itemIsCollection,
-						 "to_vault_freeze", false, *dateTime, *error);
-}
-
-
-uuYcDatasetLockChange(*rootCollection, *datasetId, *lockName, *lockIt, *status){
-   *status = -1;
-	*lock = "Unlock";
-	if (*lockIt) {
-		*lock = "Lock";
-	}
-	*lockProcedure = "Vault";
-	if (*lockName == "to_vault_freeze") {
-		*lockProcedure = "Freeze";
-	}
-	# find the toplevel collection for this dataset
-	uuYcDatasetGetTopLevel(*rootCollection, *datasetId, *collection, *isCollection);
-	if (*collection != "") {
-		# we found the dataset, now change the lock on each object
-		if (*isCollection) {
-			*buffer = "dummy";
-			uuTreeWalk("forward", *collection, "uuYcDatasetWalk*lockProcedure*lock", *buffer, *error);
-			*status = *error;
-#			if (*error == "0") {
-#				*status = 0;
-#			}
-		} else {
-			# dataset is not a collection, let's find the objects and make the change
-			msiGetIcatTime(*dateTime,"unix");
-			*status = 0;
-			foreach (*row in SELECT DATA_NAME
-						WHERE COLL_NAME = '*collection'
-						  AND META_DATA_ATTR_NAME = 'dataset_toplevel'
-						  AND META_DATA_ATTR_VALUE = '*datasetId'
-				) {
-				msiGetValByKey(*row,"DATA_NAME",*dataName);
-				# now change it ....
-				uuYcDatasetLockChangeObject(
-							*collection,
-							*dataName,
-							false,
-							*lockName,
-							*lockIt,
-							*dateTime,
-							*error);
-				if (*error != 0 ) {
-					*status = *error;
-					break;
-				}
-			}
-		}
-
-	} else {
-		# result is false "dataset not found"
-	}
-}
-
-
-# \brief uuYcDatasetLock locks (all objects of) a dataset
-#
-# \param[in]  collection collection that may have datasets
-# \param[in]  datasetId  identifier to depict the dataset
-# \param[out] status     0 upon success, otherwise nonzero
-#
-uuYcDatasetLock(*collection, *datasetId, *status) {
-	uuYcDatasetLockChange(*collection, *datasetId,"to_vault_lock", true, *status);
-}
-
-# \brief uuYcDatasetUnlock  unlocks (all objects of) a dataset
-#
-# \param[in]  collection collection that may have datasets
-# \param[in]  datasetId  identifier to depict the dataset
-# \param[out] result     "true" upon success, otherwise "false"
-# \param[out] status     0 upon success, otherwise nonzero
-#
-uuYcDatasetUnlock(*collection, *datasetId, *status) {
-	uuYcDatasetLockChange(*collection, *datasetId, "to_vault_lock", false, *status);
-}
-
-# \brief uuYcDatasetFreeze  freeze-locks (all objects of) a dataset
-#
-# \param[in]  collection collection that may have datasets
-# \param[in]  datasetId  identifier to depict the dataset
-# \param[out] status     0 upon success, otherwise nonzero
-#
-uuYcDatasetFreeze(*collection, *datasetId, *status) {
-	uuYcDatasetLockChange(*collection, *datasetId,"to_vault_freeze", true, *status);
-}
-
-# \brief uuYcDatasetUnfreeze  undo freeze-locks (all objects of) a dataset
-#
-# \param[in]  collection collection that may have datasets
-# \param[in]  datasetId  identifier to depict the dataset
-# \param[out] status     0 upon success, otherwise nonzero
-#
-uuYcDatasetMelt(*collection, *datasetId, *status) {
-	uuYcDatasetLockChange(*collection, *datasetId, "to_vault_freeze", false, *status);
-}
-
-# \brief uuYcObjectIsLocked  query an object to see if it is locked
-#
-# \param[in]  objectPath    full path to collection of data object
-# \param[in]  isCollection  true if path references a collection
-# \param[out] locked        true if the object is vault-locked
-# \param[out] frozen        true if the object is vault-frozen
-
-uuYcObjectIsLocked(*objectPath, *isCollection, *locked, *frozen) {
-	*locked = false;
-	*frozen = false;
-	if (*isCollection) {
-		foreach (*row in SELECT META_COLL_ATTR_NAME
-					WHERE COLL_NAME = '*objectPath'
-					) {
-			msiGetValByKey(*row, "META_COLL_ATTR_NAME", *key);
-			if (   *key == "to_vault_lock"
-				 || *key == "to_vault_freeze"
-				 ) {
-				*locked = true;
-				if (*key == "to_vault_freeze") {
-					*frozen = true;
-					break;
-				}
-			}
-		}
-	} else {
-		uuChopPath(*objectPath, *parentCollection, *dataName);
-		foreach (*row in SELECT META_DATA_ATTR_NAME
-					WHERE COLL_NAME = '*parentCollection'
-					  AND DATA_NAME = '*dataName'
-			) {
-			msiGetValByKey(*row, "META_DATA_ATTR_NAME", *key);
-			if (   *key == "to_vault_lock"
-				 || *key == "to_vault_freeze"
-				 ) {
-				*locked = true;
-				if (*key == "to_vault_freeze") {
-					*frozen = true;
-					break;
-				}
-			}
-		}
-	}
-}
-
-#input null
-#output ruleExecOut
diff --git a/ycModule.r b/ycModule.r
deleted file mode 100644
index e37ea9afa..000000000
--- a/ycModule.r
+++ /dev/null
@@ -1,194 +0,0 @@
-# \file      ycModule.r
-# \brief     Youth Cohort module
-# \copyright Copyright (c) 2016-2021, Utrecht University. All rights reserved.
-# \license   GPLv3, see LICENSE
-
-
-# \brief (over)write data object with a list of vault object checksums
-#
-# \param[in]  vaultRoot          root collection to be indexed
-# \param[in]  destinationObject  dataobject that will be written to
-# \param[out] status             0 = success,  nonzero is error
-uuYcGenerateDatasetsIndex(*vaultRoot, *destinationObject, *status) {
-   *status = 0;
-   msiDataObjCreate(*destinationObject, "forceFlag=", *FHANDLE);
-
-   foreach (*row in SELECT COLL_NAME, DATA_NAME, DATA_CHECKSUM, DATA_SIZE
-                    WHERE COLL_NAME = "*vaultRoot" ) {
-      *checksum = *row."DATA_CHECKSUM";
-      *name     = *row."DATA_NAME";
-      *col      = *row."COLL_NAME";
-      *size     = *row."DATA_SIZE";
-      uuChopChecksum(*checksum, *type, *checksumOut);
-      *textLine = "*type *checksumOut *size *col/*name\n";
-      msiStrlen(*textLine, *length);
-      msiStrToBytesBuf(*textLine, *buffer);
-      msiDataObjWrite(*FHANDLE, *buffer, *bytesWritten);
-      if (int(*length) != *bytesWritten) then {
-         *status = 1;
-      }
-   }
-   foreach (*row in SELECT COLL_NAME, DATA_NAME, DATA_CHECKSUM, DATA_SIZE
-                    WHERE COLL_NAME like '*vaultRoot/%' ) {
-      *checksum = *row."DATA_CHECKSUM";
-      *name     = *row."DATA_NAME";
-      *col      = *row."COLL_NAME";
-      *size     = *row."DATA_SIZE";
-      uuChopChecksum(*checksum, *type, *checksumOut);
-      *textLine = "*type *checksumOut *size *col/*name\n";
-      msiStrlen(*textLine, *length);
-      msiStrToBytesBuf(*textLine, *buffer);
-      msiDataObjWrite(*FHANDLE, *buffer, *bytesWritten);
-      if (int(*length) != *bytesWritten) then {
-         *status = 1;
-      }
-   }
-   msiDataObjClose(*FHANDLE, *status2);
-   *status;
-}
-
-# \brief Add a dataset warning to all given dataset toplevels.
-#
-# \param[in] toplevels
-# \param[in] isCollectionToplevel
-# \param[in] text
-#
-uuYcIntakeCheckAddDatasetWarning(*toplevels, *isCollectionToplevel, *text) {
-	msiAddKeyVal(*kv, "dataset_warning", *text);
-
-	foreach (*toplevel in *toplevels) {
-		msiAssociateKeyValuePairsToObj(*kv, *toplevel, if *isCollectionToplevel then "-C" else "-d");
-	}
-}
-
-# \brief Add a dataset error to all given dataset toplevels.
-#
-# \param[in] toplevels
-# \param[in] isCollectionToplevel
-# \param[in] text
-#
-uuYcIntakeCheckAddDatasetError(*toplevels, *isCollectionToplevel, *text) {
-	msiAddKeyVal(*kv, "dataset_error", *text);
-
-	foreach (*toplevel in *toplevels) {
-		msiAssociateKeyValuePairsToObj(*kv, *toplevel, if *isCollectionToplevel then "-C" else "-d");
-	}
-}
-
-# Reusable check utilities {{{
-
-# \brief Check if a certain filename pattern has enough occurrences in a dataset.
-#
-# Adds a warning if the match count is out of range.
-#
-# NOTE: Currently, patterns must match the full relative object path.
-#       At the time of writing, Echo is the only experiment type we run this
-#       check for, and it is a flat dataset without subdirectories, so it makes
-#       no difference there.
-#
-#       For other experiment types it may be desirable to match patterns with
-#       basenames instead of paths. In this case the currently commented-out
-#       code in this function can be used.
-#
-# \param[in] datasetParent        either the dataset collection or the first parent of a data-object dataset toplevel
-# \param[in] toplevels            a list of toplevel objects
-# \param[in] isCollectionToplevel
-# \param[in] objects              a list of dataset object paths relative to the datasetParent parameter
-# \param[in] patternHuman         a human-readable pattern (e.g.: 'I0000000.raw')
-# \param[in] patternRegex         a regular expression that matches filenames (e.g.: 'I[0-9]{7}\.raw')
-# \param[in] min                  the minimum amount of occurrences. set to -1 to disable minimum check.
-# \param[in] max                  the maximum amount of occurrences. set to -1 to disable maximum check.
-#
-uuYcIntakeCheckFileCount(*datasetParent, *toplevels, *isCollectionToplevel, *objects, *patternHuman, *patternRegex, *min, *max) {
-	*count = 0;
-	foreach (*path in *objects) {
-		*name = *path;
-
-		#if (*path like "*/*") {
-		#	# We might want to match basenames instead of paths relative to the dataset root.
-		#	uuChopPath(*path, *parent, *name);
-		#} else {
-		#	*name = *path;
-		#}
-		if (*name like regex *patternRegex) {
-			*count = *count + 1;
-		}
-	}
-
-	if (*min != -1 && *count < *min) {
-		uuYcIntakeCheckAddDatasetWarning(*toplevels, *isCollectionToplevel, "Expected at least *min files of type '*patternHuman', found *count");
-	}
-	if (*max != -1 && *count > *max) {
-		uuYcIntakeCheckAddDatasetWarning(*toplevels, *isCollectionToplevel, "Expected at most *max files of type '*patternHuman', found *count");
-	}
-}
-
-# }}}
-# Generic checks {{{
-
-# \brief Check if a dataset's wave is a valid one.
-#
-# \param[in] root
-# \param[in] id                   the dataset id to check
-# \param[in] toplevels            a list of toplevel objects for this dataset id
-# \param[in] isCollectionToplevel
-#
-uuYcIntakeCheckWaveValidity(*root, *id, *toplevels, *isCollectionToplevel) {
-	# Note: It might be cleaner to grab the wave metadata tag from the toplevel instead.
-	uuYcDatasetParseId(*id, *idComponents);
-	uuStrToLower(*idComponents."wave", *wave);
-
-	*waves = list(
-		"20w", "30w",
-		"0m", "5m", "10m",
-		"3y", "6y", "9y", "12y", "15y"
-	);
-
-	uuListContains(*waves, *wave, *waveIsValid);
-	if (!*waveIsValid) {
-		uuYcIntakeCheckAddDatasetError(*toplevels, *isCollectionToplevel, "The wave '*wave' is not in the list of accepted waves");
-	}
-}
-
-# \brief Run checks that must be applied to all datasets regardless of WEPV values.
-#
-# Call any generic checks you make in this function.
-#
-# \param[in] root
-# \param[in] id           the dataset id to check
-# \param[in] toplevels    a list of toplevel objects for this dataset id
-# \param[in] isCollection
-#
-uuYcIntakeCheckGeneric(*root, *id, *toplevels, *isCollection) {
-	uuYcIntakeCheckWaveValidity(*root, *id, *toplevels, *isCollection);
-}
-
-# }}}
-# Experiment type specific checks {{{
-# Echo {{{
-
-# \brief Run checks specific to the Echo experiment type.
-#
-# \param[in] root
-# \param[in] id           the dataset id to check
-# \param[in] toplevels    a list of toplevel objects for this dataset id
-# \param[in] isCollection
-#
-uuYcIntakeCheckEtEcho(*root, *id, *toplevels, *isCollection) {
-	if (*isCollection) {
-		*datasetParent = elem(*toplevels, 0);
-	} else {
-		uuChopPath(elem(*toplevels, 0), *dataObjectParent, *dataObjectName);
-		*datasetParent = *dataObjectParent;
-	}
-
-	uuYcDatasetGetDataObjectRelPaths(*root, *id, *objects);
-
-	uuYcIntakeCheckFileCount(*datasetParent, *toplevels, *isCollection, *objects, ``I0000000.index.jpg``, ``(.*/)?I[0-9]{7}\.index\.jpe?g``, 13, -1);
-	uuYcIntakeCheckFileCount(*datasetParent, *toplevels, *isCollection, *objects, ``I0000000.raw``,       ``(.*/)?I[0-9]{7}\.raw``,           7, -1);
-	uuYcIntakeCheckFileCount(*datasetParent, *toplevels, *isCollection, *objects, ``I0000000.dcm``,       ``(.*/)?I[0-9]{7}\.dcm``,           6, -1);
-	uuYcIntakeCheckFileCount(*datasetParent, *toplevels, *isCollection, *objects, ``I0000000.vol``,       ``(.*/)?I[0-9]{7}\.vol``,           6, -1);
-}
-
-# }}}
-# }}}
diff --git a/ycUtil.r b/ycUtil.r
deleted file mode 100644
index 5fa8f4d19..000000000
--- a/ycUtil.r
+++ /dev/null
@@ -1,36 +0,0 @@
-# Youth cohort utility functions
-
-# \brief Clears a kv-list's contents.
-#
-# \param kvList
-#
-uuKvClear(*kvList) {
-        *kvList."." = ".";
-        foreach (*key in *kvList) {
-                *kvList.*key = ".";
-        }
-}
-
-uuYcObjectIsLocked(*objPath, *locked) {
-        msiGetObjType(*objPath, *objType);
-        *locked = false;
-        if (*objType == '-d') {
-                uuChopPath(*objPath, *collection, *dataName);
-                foreach (*row in SELECT META_DATA_ATTR_VALUE
-                                        WHERE COLL_NAME = '*collection'
-                                          AND DATA_NAME = '*dataName'
-                                          AND META_DATA_ATTR_NAME = 'to_vault_lock'
-                        ) {
-                        *locked = true;
-                        break;
-                }
-        } else {
-                foreach (*row in SELECT META_COLL_ATTR_VALUE
-                                        WHERE COLL_NAME = '*objPath'
-                                          AND META_COLL_ATTR_NAME = 'to_vault_lock'
-                        ) {
-                        *locked = true;
-                        break;
-                }
-        }
-}