From 139f2fee663aaf7a8c5e7f2f258c8b9883e864ff Mon Sep 17 00:00:00 2001 From: vshand11 <105606628+vshand11@users.noreply.github.com> Date: Fri, 1 Nov 2024 15:19:03 -0400 Subject: [PATCH 1/9] Implemented --- mongo_db_script/updateDataRecordsScript.js | 29 ++++++++++++++++++++++ src/common/mongo_dao.py | 25 +++++-------------- src/validator.py | 6 +++-- 3 files changed, 39 insertions(+), 21 deletions(-) create mode 100644 mongo_db_script/updateDataRecordsScript.js diff --git a/mongo_db_script/updateDataRecordsScript.js b/mongo_db_script/updateDataRecordsScript.js new file mode 100644 index 0000000..eea25a3 --- /dev/null +++ b/mongo_db_script/updateDataRecordsScript.js @@ -0,0 +1,29 @@ +db.dataRecords.aggregate([ + { + // Lookup to join submissions collection based on submissionID + $lookup: { + from: "submissions", + localField: "submissionID", + foreignField: "_id", + as: "submission_info" + } + }, + { + // Unwind to access the submission_info document as a single object + $unwind: "$submission_info" + }, + { + // Set the studyID in dataRecords based on the studyID from the submissions + $set: { + "studyID": "$submission_info.studyID" + } + }, + { + // Merge the updates back into the dataRecords collection + $merge: { + into: "dataRecords", + whenMatched: "merge", + whenNotMatched: "discard" + } + } +]); diff --git a/src/common/mongo_dao.py b/src/common/mongo_dao.py index 8b82095..c4d407e 100644 --- a/src/common/mongo_dao.py +++ b/src/common/mongo_dao.py @@ -596,7 +596,7 @@ def find_submissions(self, query): """ set dataRecords search index, 'submissionID_nodeType_nodeID' """ - def set_search_index_dataRecords(self, submission_index, crdc_index): + def set_search_index_dataRecords(self, submission_index, crdc_index, study_entity_type_index): db = self.client[self.db_name] data_collection = db[DATA_COLlECTION] try: @@ -607,6 +607,9 @@ def set_search_index_dataRecords(self, submission_index, crdc_index): if not index_dict.get(crdc_index): result = data_collection.create_index([(DATA_COMMON_NAME), (NODE_TYPE),(NODE_ID)], \ name=crdc_index) + if not index_dict.get(study_entity_type_index): + result = data_collection.create_index([(STUDY_ID), (ENTITY_TYPE),(NODE_ID)], \ + name=study_entity_type_index) return True except errors.PyMongoError as pe: self.log.exception(pe) @@ -789,25 +792,9 @@ def search_node_by_study(self, studyID, entity_type, node_id): :return: """ db = self.client[self.db_name] - data_collection = db[RELEASE_COLLECTION] + data_collection = db[DATA_COLlECTION] try: - submissions = self.find_submissions({STUDY_ID: studyID}) - if len(submissions) < 2: #if there is only one submission that's own submission, skip. - return None - submission_id_list = [item[ID] for item in submissions] - results = data_collection.find({ENTITY_TYPE: entity_type, NODE_ID: node_id, SUBMISSION_ID: {"$in": submission_id_list}}) - released_nodes = [node for node in results if node.get(SUBMISSION_REL_STATUS) != SUBMISSION_REL_STATUS_DELETED ] - if len(released_nodes) == 0: - deleted_submission_ids = [rel[SUBMISSION_ID] for rel in results if rel.get(SUBMISSION_REL_STATUS) == SUBMISSION_REL_STATUS_DELETED ] - submission_id_list = [item for item in submission_id_list if item not in deleted_submission_ids] - if len(submission_id_list) < 2: - return None - # search dataRecords - data_collection = db[DATA_COLlECTION] - rtn_val = data_collection.find_one({ENTITY_TYPE: entity_type, NODE_ID: node_id, SUBMISSION_ID: {"$in": submission_id_list}}) - else: - rtn_val = released_nodes[0] - return rtn_val + return data_collection.find_one({STUDY_ID: studyID, ENTITY_TYPE: entity_type, NODE_ID: node_id}) except errors.PyMongoError as pe: self.log.exception(pe) self.log.exception(f"Failed to search node for study: {get_exception_msg()}") diff --git a/src/validator.py b/src/validator.py index da437e1..de6598a 100644 --- a/src/validator.py +++ b/src/validator.py @@ -20,9 +20,11 @@ DATA_RECORDS_SEARCH_INDEX = "submissionID_nodeType_nodeID" DATA_RECORDS_CRDC_SEARCH_INDEX = "dataCommons_nodeType_nodeID" +DATA_RECORDS_STUDY_ENTITY_INDEX = 'studyID_entityType_nodeID' RELEASE_SEARCH_INDEX = "dataCommons_nodeType_nodeID" CRDCID_SEARCH_INDEX = "CRDC_ID" CDE_SEARCH_INDEX = 'CDECode_1_CDEVersion_1' + #Set log file prefix for bento logger if LOG_PREFIX not in os.environ: os.environ[LOG_PREFIX] = 'Validation Service' @@ -47,11 +49,11 @@ def controller(): job_queue = Queue(configs[SQS_NAME]) mongo_dao = MongoDao(configs[MONGO_DB], configs[DB]) # set dataRecord search index - if not mongo_dao.set_search_index_dataRecords(DATA_RECORDS_SEARCH_INDEX, DATA_RECORDS_CRDC_SEARCH_INDEX): + if not mongo_dao.set_search_index_dataRecords(DATA_RECORDS_SEARCH_INDEX, DATA_RECORDS_CRDC_SEARCH_INDEX, DATA_RECORDS_STUDY_ENTITY_INDEX): log.error("Failed to set dataRecords search index!") return 1 # set release search index - if not mongo_dao.set_search_release_index(RELEASE_SEARCH_INDEX , CRDCID_SEARCH_INDEX): + if not mongo_dao.set_search_release_index(RELEASE_SEARCH_INDEX, CRDCID_SEARCH_INDEX): log.error("Failed to set release search index!") return 1 From e2025aedad31458cab8ddaa58d2fd7885bcadc98 Mon Sep 17 00:00:00 2001 From: vshand11 <105606628+vshand11@users.noreply.github.com> Date: Mon, 4 Nov 2024 14:52:45 -0500 Subject: [PATCH 2/9] add db script for release collection --- mongo_db_script/updateReleaseScript.js | 59 ++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 mongo_db_script/updateReleaseScript.js diff --git a/mongo_db_script/updateReleaseScript.js b/mongo_db_script/updateReleaseScript.js new file mode 100644 index 0000000..0918f56 --- /dev/null +++ b/mongo_db_script/updateReleaseScript.js @@ -0,0 +1,59 @@ +db.release.aggregate([ + { + // Lookup to join submissions collection based on submissionID + $lookup: { + from: "submissions", + localField: "submissionID", + foreignField: "_id", + as: "submission_info" + } + }, + { + // Unwind to access the submission_info document as a single object + $unwind: "$submission_info" + }, + { + // Set the studyID in dataRecords based on the studyID from the submissions + $set: { + "studyID": "$submission_info.studyID" + } + }, + { + // Merge the updates back into the dataRecords collection + $merge: { + into: "dataRecords", + whenMatched: "merge", + whenNotMatched: "discard" + } + } +]); + + +db.release.updateMany({"nodeType": "program"}, + {"$set": {"entityType": "Program"}} +); + +db.release.updateMany({"nodeType": "study"}, + {"$set": {"entityType": "Study"}} +); + +db.release.updateMany({"nodeType": {"$in": [ "participant", "subject", "case"]} +}, + {"$set": {"entityType": "Participant"}} +); + +db.release.updateMany({"nodeType": {"$in": [ "sample", "specimen"]} +}, + {"$set": {"entityType": "Sample"}} +); + +db.release.updateMany({"nodeType": {"$in": [ "principal_investigator", "study_personnel"]} +}, + {"$set": {"entityType": "Principal Investigator"}} +); + +db.release.updateMany({"nodeType": {"$in": [ "file", "data_file", "clinical_measure_file", "cytogenomic_file", "radiology_file", "methylation_array_file", "sequencing_file"]} +}, {"$set": {"entityType": "File"}} +); + + From 5b112efe406731cef22f4c08505b4b6ce202f26b Mon Sep 17 00:00:00 2001 From: vshand11 <105606628+vshand11@users.noreply.github.com> Date: Tue, 5 Nov 2024 10:15:14 -0500 Subject: [PATCH 3/9] updated --- mongo_db_script/updateReleaseScript.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mongo_db_script/updateReleaseScript.js b/mongo_db_script/updateReleaseScript.js index 0918f56..b3f9c61 100644 --- a/mongo_db_script/updateReleaseScript.js +++ b/mongo_db_script/updateReleaseScript.js @@ -19,9 +19,9 @@ db.release.aggregate([ } }, { - // Merge the updates back into the dataRecords collection + // Merge the updates back into the release collection $merge: { - into: "dataRecords", + into: "release", whenMatched: "merge", whenNotMatched: "discard" } From cbb2151f74ee05fbc4675510b97231e6523bb549 Mon Sep 17 00:00:00 2001 From: vshand11 <105606628+vshand11@users.noreply.github.com> Date: Wed, 13 Nov 2024 15:34:09 -0500 Subject: [PATCH 4/9] Fixed 2014 --- src/common/constants.py | 1 + src/metadata_validator.py | 40 +++++++++++++++++++-------------------- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/src/common/constants.py b/src/common/constants.py index df86faf..14e4cf8 100644 --- a/src/common/constants.py +++ b/src/common/constants.py @@ -189,3 +189,4 @@ VALIDATION_TYPE_METADATA = "metadata" VALIDATION_TYPE_FILE= "data file" BATCH_IDS = "batchIDs" +QC_VALIDATE_DATE = "validatedDate" diff --git a/src/metadata_validator.py b/src/metadata_validator.py index 51ef6e9..d91102f 100644 --- a/src/metadata_validator.py +++ b/src/metadata_validator.py @@ -10,7 +10,7 @@ VALIDATED_AT, SERVICE_TYPE_METADATA, NODE_ID, PROPERTIES, PARENTS, KEY, NODE_ID, PARENT_TYPE, PARENT_ID_NAME, PARENT_ID_VAL, \ SUBMISSION_INTENTION, SUBMISSION_INTENTION_NEW_UPDATE, SUBMISSION_INTENTION_DELETE, TYPE_METADATA_VALIDATE, TYPE_CROSS_SUBMISSION, \ SUBMISSION_REL_STATUS_RELEASED, VALIDATION_ID, VALIDATION_ENDED, CDE_TERM, TERM_CODE, TERM_VERSION, CDE_PERMISSIVE_VALUES, \ - QC_RESULT_ID, BATCH_IDS, VALIDATION_TYPE_METADATA, S3_FILE_INFO, VALIDATION_TYPE_FILE, QC_SEVERITY + QC_RESULT_ID, BATCH_IDS, VALIDATION_TYPE_METADATA, S3_FILE_INFO, VALIDATION_TYPE_FILE, QC_SEVERITY, QC_VALIDATE_DATE from common.utils import current_datetime, get_exception_msg, dump_dict_to_json, create_error, get_uuid_str from common.model_store import ModelFactory from common.model_reader import valid_prop_types @@ -163,33 +163,33 @@ def validate_nodes(self, data_records): if record.get(QC_RESULT_ID): qc_result = self.mongo_dao.get_qcRecord(record[QC_RESULT_ID]) status, errors, warnings = self.validate_node(record) - if errors and len(errors) > 0: - self.isError = True - if not qc_result: - qc_result = get_qc_result(record, self.submission, VALIDATION_TYPE_METADATA, self.mongo_dao) - qc_result[ERRORS] = errors - else: - if qc_result: - qc_result[ERRORS] = [] - if warnings and len(warnings)> 0: - self.isWarning = True - if not qc_result: - qc_result = get_qc_result(record, self.submission, VALIDATION_TYPE_METADATA, self.mongo_dao) - qc_result[WARNINGS] = warnings - else: - if qc_result: - qc_result[WARNINGS] = [] - if status == STATUS_PASSED: if qc_result: self.mongo_dao.delete_qcRecord(qc_result[ID]) qc_result = None record[QC_RESULT_ID] = None else: - qc_result[QC_SEVERITY] = STATUS_ERROR if self.isError else STATUS_WARNING - qc_result["validatedDate"] = current_datetime() + if not qc_result: + record[QC_RESULT_ID] = None + qc_result = get_qc_result(record, VALIDATION_TYPE_METADATA, self.mongo_dao) + if errors and len(errors) > 0: + self.isError = True + qc_result[ERRORS] = errors + qc_result[QC_SEVERITY] = STATUS_ERROR + else: + qc_result[ERRORS] = [] + if warnings and len(warnings)> 0: + self.isWarning = True + qc_result[WARNINGS] = warnings + if not errors or len(errors) == 0: + qc_result[QC_SEVERITY] = STATUS_WARNING + else: + qc_result[WARNINGS] = [] + + qc_result[QC_VALIDATE_DATE] = current_datetime() qc_results.append(qc_result) record[QC_RESULT_ID] = qc_result[ID] + record[STATUS] = status record[UPDATED_AT] = record[VALIDATED_AT] = current_datetime() updated_records.append(record) From 192b44f78afda2ba96c17aa3ee713ff3b3e9dbed Mon Sep 17 00:00:00 2001 From: vshand11 <105606628+vshand11@users.noreply.github.com> Date: Wed, 13 Nov 2024 16:12:32 -0500 Subject: [PATCH 5/9] update --- src/file_validator.py | 6 +++--- src/metadata_validator.py | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/file_validator.py b/src/file_validator.py index e6e0c83..27056ae 100644 --- a/src/file_validator.py +++ b/src/file_validator.py @@ -8,7 +8,7 @@ from common.constants import ERRORS, WARNINGS, STATUS, S3_FILE_INFO, ID, SIZE, MD5, UPDATED_AT, \ FILE_NAME, SQS_TYPE, SQS_NAME, FILE_ID, STATUS_ERROR, STATUS_WARNING, STATUS_PASSED, SUBMISSION_ID, \ BATCH_BUCKET, SERVICE_TYPE_FILE, LAST_MODIFIED, CREATED_AT, TYPE, SUBMISSION_INTENTION, SUBMISSION_INTENTION_DELETE,\ - VALIDATION_ID, VALIDATION_ENDED, QC_RESULT_ID, BATCH_IDS, VALIDATION_TYPE_FILE, QC_SEVERITY + VALIDATION_ID, VALIDATION_ENDED, QC_RESULT_ID, VALIDATION_TYPE_FILE, QC_SEVERITY, QC_VALIDATE_DATE from common.utils import get_exception_msg, current_datetime, get_s3_file_info, get_s3_file_md5, create_error, get_uuid_str from service.ecs_agent import set_scale_in_protection from metadata_validator import get_qc_result @@ -406,7 +406,7 @@ def save_qc_result(self, fileRecord, status, error): qc_result = self.mongo_dao.get_qcRecord(fileRecord[S3_FILE_INFO][QC_RESULT_ID]) if status == STATUS_ERROR or status == STATUS_WARNING: if not qc_result: - qc_result = get_qc_result(fileRecord, self.submission, VALIDATION_TYPE_FILE, self.mongo_dao) + qc_result = get_qc_result(fileRecord, VALIDATION_TYPE_FILE, self.mongo_dao) self.set_status(fileRecord, qc_result, status, error) if status == STATUS_PASSED and qc_result: self.mongo_dao.delete_qcRecord(qc_result[ID]) @@ -414,5 +414,5 @@ def save_qc_result(self, fileRecord, status, error): fileRecord[S3_FILE_INFO][QC_RESULT_ID] = None if qc_result: # save QC result fileRecord[S3_FILE_INFO][QC_RESULT_ID] = qc_result[ID] - qc_result["validatedDate"] = current_datetime() + qc_result[QC_VALIDATE_DATE] = current_datetime() self.mongo_dao.save_qc_results([qc_result]) \ No newline at end of file diff --git a/src/metadata_validator.py b/src/metadata_validator.py index d91102f..6dbc409 100644 --- a/src/metadata_validator.py +++ b/src/metadata_validator.py @@ -595,18 +595,18 @@ def check_boundary(value, min, max, msg_prefix, prop_name): """ get qc result for the node record by qc_id """ -def get_qc_result(node, submission, validation_type, mongo_dao): +def get_qc_result(node, validation_type, mongo_dao): qc_id = node.get(QC_RESULT_ID) if validation_type == VALIDATION_TYPE_METADATA else node[S3_FILE_INFO].get(QC_RESULT_ID) rc_result = None if not qc_id: - rc_result = create_new_qc_result(node, submission, validation_type) + rc_result = create_new_qc_result(node, validation_type) else: rc_result = mongo_dao.get_qcRecord(qc_id) if not rc_result: - rc_result = create_new_qc_result(node, submission, validation_type) + rc_result = create_new_qc_result(node, validation_type) return rc_result -def create_new_qc_result(node, submission, validation_type): +def create_new_qc_result(node, validation_type): qc_result = { ID: get_uuid_str(), SUBMISSION_ID: node[SUBMISSION_ID], From 21d91aaf3c533e7effe04fe99924e9f71970169a Mon Sep 17 00:00:00 2001 From: vshand11 <105606628+vshand11@users.noreply.github.com> Date: Thu, 14 Nov 2024 15:33:20 -0500 Subject: [PATCH 6/9] Enhanced --- src/metadata_validator.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/metadata_validator.py b/src/metadata_validator.py index 51ef6e9..439bf83 100644 --- a/src/metadata_validator.py +++ b/src/metadata_validator.py @@ -540,10 +540,13 @@ def validate_prop_value(self, prop_name, value, prop_def, msg_prefix): def get_permissive_value(self, prop_def): permissive_vals = prop_def.get("permissible_values") if prop_def.get(CDE_TERM) and len(prop_def.get(CDE_TERM)) > 0: - # retrieve permissible values from DB - cde_term = prop_def[CDE_TERM][0] - cde_code = cde_term.get(TERM_CODE) - cde_version = cde_term.get(TERM_VERSION) + # retrieve permissible values from DB or cde site + cde_code = None + cde_terms = [ct for ct in prop_def[CDE_TERM] if 'caDSR' in ct.get('Origin', '')] + if cde_terms and len(cde_terms): + cde_code = cde_terms[0].get(TERM_CODE) + cde_version = cde_terms[0].get(TERM_VERSION) + if not cde_code: return permissive_vals @@ -565,8 +568,7 @@ def get_permissive_value(self, prop_def): permissive_vals = None #escape validation self.mongo_dao.insert_cde([cde]) return permissive_vals - - + """util functions""" def check_permissive(value, permissive_vals, msg_prefix, prop_name): result = True, From 42179a456a2ab170829e0679d35eb619113602b7 Mon Sep 17 00:00:00 2001 From: knockknockyoo Date: Tue, 26 Nov 2024 13:16:04 -0500 Subject: [PATCH 7/9] crdc is should be generated for a new node. --- src/data_loader.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/data_loader.py b/src/data_loader.py index 568c094..a3c8270 100644 --- a/src/data_loader.py +++ b/src/data_loader.py @@ -187,6 +187,10 @@ def get_crdc_id(self, exist_node, node_type, node_id, studyID): if studyID and node_id and entity_type: result = self.mongo_dao.search_node_by_study(studyID, entity_type, node_id) crdc_id = result.get(CRDC_ID) if result else None + # if the crdc_id can't be identified from the existing dataset, a new crdc_id should be generated. + if not crdc_id: + crdc_id = get_uuid_str() + else: crdc_id = exist_node.get(CRDC_ID) return crdc_id From e6aa0271e7d0eefa0e8c814d469941245553726f Mon Sep 17 00:00:00 2001 From: vshand11 <105606628+vshand11@users.noreply.github.com> Date: Mon, 2 Dec 2024 15:11:23 -0500 Subject: [PATCH 8/9] Fixed --- src/common/mongo_dao.py | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/src/common/mongo_dao.py b/src/common/mongo_dao.py index 58cdd16..4c4918c 100644 --- a/src/common/mongo_dao.py +++ b/src/common/mongo_dao.py @@ -7,7 +7,7 @@ VALUE_PROP, ERRORS, WARNINGS, VALIDATED_AT, STATUS_ERROR, STATUS_WARNING, PARENT_ID_NAME, \ SUBMISSION_REL_STATUS, SUBMISSION_REL_STATUS_DELETED, STUDY_ABBREVIATION, SUBMISSION_STATUS, STUDY_ID, \ CROSS_SUBMISSION_VALIDATION_STATUS, ADDITION_ERRORS, VALIDATION_COLLECTION, VALIDATION_ENDED, CONFIG_COLLECTION, \ - BATCH_BUCKET, CDE_COLLECTION, CDE_CODE, CDE_VERSION, ENTITY_TYPE, QC_COLLECTION, QC_RESULT_ID + BATCH_BUCKET, CDE_COLLECTION, CDE_CODE, CDE_VERSION, ENTITY_TYPE, QC_COLLECTION, QC_RESULT_ID, SUBMISSION_REL_STATUS_RELEASED from common.utils import get_exception_msg, current_datetime MAX_SIZE = 10000 @@ -774,7 +774,7 @@ def search_node(self, data_commons, node_type, node_id): released_nodes = [node for node in results if node.get(SUBMISSION_REL_STATUS) != SUBMISSION_REL_STATUS_DELETED ] if len(released_nodes) == 0: # search dataRecords - deleted_submission_ids = [rel[SUBMISSION_ID] for rel in results if rel.get(SUBMISSION_REL_STATUS) == SUBMISSION_REL_STATUS_DELETED ] + deleted_submission_ids = [rel[SUBMISSION_ID] for rel in results if SUBMISSION_REL_STATUS_DELETED in rel.get(SUBMISSION_REL_STATUS)] rtn_val = self.search_node_by_index_crdc(data_commons, node_type, node_id, deleted_submission_ids) else: rtn_val = released_nodes[0] @@ -799,7 +799,10 @@ def search_node_by_study(self, studyID, entity_type, node_id): db = self.client[self.db_name] data_collection = db[DATA_COLlECTION] try: - return data_collection.find_one({STUDY_ID: studyID, ENTITY_TYPE: entity_type, NODE_ID: node_id}) + existed_node = self.search_released_node_by_study(studyID, entity_type, node_id) + if not existed_node: + existed_node = data_collection.find_one({STUDY_ID: studyID, ENTITY_TYPE: entity_type, NODE_ID: node_id}) + return existed_node except errors.PyMongoError as pe: self.log.exception(pe) self.log.exception(f"Failed to search node for study: {get_exception_msg()}") @@ -808,6 +811,32 @@ def search_node_by_study(self, studyID, entity_type, node_id): self.log.exception(e) self.log.exception(f"Failed to search node for study {get_exception_msg()}") return None + + def search_released_node_by_study(self, studyID, entity_type, node_id): + """ + Search release collection for given node + :param data_commons: + :param node_type: + :param node_id: + :return: + """ + db = self.client[self.db_name] + data_collection = db[RELEASE_COLLECTION] + try: + query = {STUDY_ID: studyID, ENTITY_TYPE: entity_type, NODE_ID: node_id} + results = list(data_collection.find(query)) + if not results or len(results) == 0: + return None + released_node = next(node for node in results if SUBMISSION_REL_STATUS_DELETED not in node.get(SUBMISSION_REL_STATUS)) + return released_node + except errors.PyMongoError as pe: + self.log.exception(pe) + self.log.exception(f"Failed to find release record for {studyID}/{entity_type}/{node_id}: {get_exception_msg()}") + return None + except Exception as e: + self.log.exception(e) + self.log.exception(f"Failed to find release record for {studyID}/{entity_type}/{node_id}: {get_exception_msg()}") + return None def search_released_node(self, data_commons, node_type, node_id): """ From b2fd05c621521188efc2b89ab2deb5cbaa4648db Mon Sep 17 00:00:00 2001 From: vshand11 <105606628+vshand11@users.noreply.github.com> Date: Mon, 2 Dec 2024 15:23:39 -0500 Subject: [PATCH 9/9] add index in release collection --- src/common/mongo_dao.py | 5 ++++- src/validator.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/common/mongo_dao.py b/src/common/mongo_dao.py index 4c4918c..a744181 100644 --- a/src/common/mongo_dao.py +++ b/src/common/mongo_dao.py @@ -628,7 +628,7 @@ def set_search_index_dataRecords(self, submission_index, crdc_index, study_entit """ set release search index, 'dataCommons_nodeType_nodeID' """ - def set_search_release_index(self, dataCommon_index, crdcID_index): + def set_search_release_index(self, dataCommon_index, crdcID_index, study_entity_type_index): db = self.client[self.db_name] data_collection = db[RELEASE_COLLECTION] try: @@ -639,6 +639,9 @@ def set_search_release_index(self, dataCommon_index, crdcID_index): if not index_dict or not index_dict.get(crdcID_index): result = data_collection.create_index([(CRDC_ID)], \ name=crdcID_index) + if not index_dict.get(study_entity_type_index): + result = data_collection.create_index([(STUDY_ID), (ENTITY_TYPE),(NODE_ID)], \ + name=study_entity_type_index) return True except errors.PyMongoError as pe: self.log.exception(pe) diff --git a/src/validator.py b/src/validator.py index de6598a..473c1a1 100644 --- a/src/validator.py +++ b/src/validator.py @@ -53,7 +53,7 @@ def controller(): log.error("Failed to set dataRecords search index!") return 1 # set release search index - if not mongo_dao.set_search_release_index(RELEASE_SEARCH_INDEX, CRDCID_SEARCH_INDEX): + if not mongo_dao.set_search_release_index(RELEASE_SEARCH_INDEX, CRDCID_SEARCH_INDEX, DATA_RECORDS_STUDY_ENTITY_INDEX): log.error("Failed to set release search index!") return 1