Skip to content

Commit

Permalink
filter by taxonomy when exporting data
Browse files Browse the repository at this point in the history
  • Loading branch information
nitin-ebi committed Aug 22, 2023
1 parent cfaa462 commit 344503e
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,15 @@
"clusteredVariantOperationEntity": "inactiveObjects.asm"
}

submitted_collections_taxonomy_attribute_map = {
collections_taxonomy_attribute_map = {
"dbsnpSubmittedVariantEntity": "tax",
"dbsnpSubmittedVariantOperationEntity": "inactiveObjects.tax",
"submittedVariantEntity": "tax",
"submittedVariantOperationEntity": "inactiveObjects.tax"
"submittedVariantOperationEntity": "inactiveObjects.tax",
"dbsnpClusteredVariantEntity": "tax",
"dbsnpClusteredVariantOperationEntity": "inactiveObjects.tax",
"clusteredVariantEntity": "tax",
"clusteredVariantOperationEntity": "inactiveObjects.tax"
}


Expand All @@ -62,12 +66,8 @@ def mongo_data_copy_to_remote_host(local_forwarded_port, private_config_xml_file

for collection, collection_assembly_attribute_name in sorted(collections_to_copy_map.items()):
logger.info("Begin processing collection: " + collection)
if "clustered" in collection.lower():
# Curly braces when they are not positional parameters
query = """'{{"{0}": {{"$in":["{1}"]}}}}'""".format(collection_assembly_attribute_name, assembly_accession)
else:
taxonomy_attribute_path = submitted_collections_taxonomy_attribute_map[collection]
query = """'{{"{0}": {{"$in":["{1}"]}}, "{2}": {{"$in":[{3}]}}}}'""".format(
taxonomy_attribute_path = collections_taxonomy_attribute_map[collection]
query = """'{{"{0}": {{"$in":["{1}"]}}, "{2}": {{"$in":[{3}]}}}}'""".format(
collection_assembly_attribute_name, assembly_accession, taxonomy_attribute_path, taxonomy_id)
sharded_db_name = "eva_accession_sharded"
mongodump_args = {"db": sharded_db_name, "host": mongo_host,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from run_release_in_embassy.release_common_utils import open_mongo_port_to_tempmongo, close_mongo_port_to_tempmongo, \
get_release_db_name_in_tempmongo_instance
from run_release_in_embassy.copy_accessioning_collections_to_embassy import collections_assembly_attribute_map, \
submitted_collections_taxonomy_attribute_map
collections_taxonomy_attribute_map
from pymongo import MongoClient

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -425,7 +425,7 @@ def export_unique_rs_ids_from_mongo(mongo_database_handle, taxonomy_id, assembly
collection_rs_ids_file = mongo_unique_rs_ids_file.replace(".txt", "_{0}.txt".format(collection))
agg_pipeline = []
for sve_coll in (sve_collection_name, dbsnp_sve_collection_name):
taxonomy_attribute_path = submitted_collections_taxonomy_attribute_map[sve_coll]
taxonomy_attribute_path = collections_taxonomy_attribute_map[sve_coll]
assembly_attribute_path = collections_assembly_attribute_map[sve_coll]
agg_pipeline.append({
"$lookup": {
Expand Down

0 comments on commit 344503e

Please sign in to comment.