From 74585044222519d8f5e4317b087fc1bc6903710a Mon Sep 17 00:00:00 2001 From: Qiao Qiao <68757394+qiaouchicago@users.noreply.github.com> Date: Fri, 14 Oct 2022 13:24:54 -0500 Subject: [PATCH] DEV-1484: Add isort ymlfmt toml sort and reformat (#390) Add isort, ymlfmt and toml-sort to pre-commit-config and reformat. --- .pre-commit-config.yaml | 65 +- .secrets.baseline | 8 +- .travis.yml | 3 +- bin/update_related_case_caches.py | 2 +- docker-compose-ci.yaml | 9 +- docs/bin/schemata_to_graphviz.py | 4 +- gdcdatamodel/__main__.py | 8 +- gdcdatamodel/gdc_postgres_admin.py | 6 +- gdcdatamodel/models/__init__.py | 40 +- gdcdatamodel/models/indexes.py | 3 +- gdcdatamodel/models/versioned_nodes.py | 6 +- gdcdatamodel/models/versioning.py | 1 + gdcdatamodel/query.py | 2 +- gdcdatamodel/validators/__init__.py | 2 +- gdcdatamodel/validators/json_validators.py | 3 +- .../xml_mappings/tcga_biospecimen.yaml | 26 +- gdcdatamodel/xml_mappings/tcga_clinical.yaml | 751 +++++++++--------- migrations/index_secondary_keys.py | 8 +- migrations/notifications.py | 4 +- migrations/set_null_edge_columns.py | 4 +- migrations/update_case_cache_append_only.py | 4 +- migrations/update_legacy_states.py | 8 +- setup.py | 2 +- test/conftest.py | 9 +- test/helpers.py | 2 +- test/sample.yaml | 1 + test/schema/basic.yaml | 1 + test/schema/data/sample.yaml | 1 + test/test_cache_related_cases.py | 5 +- test/test_datamodel.py | 3 +- test/test_dictionary_loadiing.py | 1 + test/test_gdc_postgres_admin.py | 6 +- test/test_node_tagging.py | 3 +- test/test_update_case_cache.py | 2 +- test/test_validators.py | 5 +- test/test_versioned_nodes.py | 4 +- test/unit/test_tagging.py | 3 +- 37 files changed, 512 insertions(+), 503 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5a605e22..7fd0f026 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,21 +1,46 @@ +--- repos: - - repo: git@github.com:Yelp/detect-secrets - rev: v0.13.0 - hooks: - - id: detect-secrets - args: [ '--baseline', '.secrets.baseline' ] - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.1.0 # last version to support py36 - hooks: - - id: check-json - - id: check-toml - - id: check-yaml - - id: end-of-file-fixer - - id: fix-encoding-pragma - args: [ --remove ] - - id: no-commit-to-branch - args: [ --branch, develop, --branch, master, --pattern, release/.* ] - - id: pretty-format-json - args: [ --autofix ] - - id: trailing-whitespace - args: [ --markdown-linebreak-ext=md ] + - repo: git@github.com:Yelp/detect-secrets + rev: v0.13.0 + hooks: + - id: detect-secrets + args: [--baseline, .secrets.baseline] + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.1.0 # last version to support py36 + hooks: + - id: check-json + - id: check-toml + - id: check-yaml + - id: end-of-file-fixer + - id: fix-encoding-pragma + args: [--remove] + - id: no-commit-to-branch + args: [--branch, develop, --branch, master, --pattern, release/.*] + - id: pretty-format-json + args: [--autofix] + - id: trailing-whitespace + args: [--markdown-linebreak-ext=md] + - repo: https://github.com/pycqa/isort + rev: 5.6.4 # last version to support pre-commit 1.21.0 in Jenkins + hooks: + - id: isort + name: isort (python) + args: [--profile, black] + - id: isort + name: isort (cython) + types: [cython] + args: [--profile, black] + - id: isort + name: isort (pyi) + types: [pyi] + args: [--profile, black] + - repo: https://github.com/jumanjihouse/pre-commit-hook-yamlfmt + rev: 0.2.1 + hooks: + - id: yamlfmt + args: [--mapping, '2', --sequence, '4', --offset, '2', --width, '80'] + - repo: https://github.com/pappasam/toml-sort + rev: v0.19.0 # last version to support py36 + hooks: + - id: toml-sort + args: [--in-place] diff --git a/.secrets.baseline b/.secrets.baseline index d24135dd..9e872771 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -3,7 +3,7 @@ "files": "^.secrets.baseline$", "lines": null }, - "generated_at": "2022-09-14T17:17:24Z", + "generated_at": "2022-10-11T18:55:31Z", "plugins_used": [ { "name": "AWSKeyDetector" @@ -51,7 +51,7 @@ "hashed_secret": "5d0fa74acf95d1d6bebd0d37f76a94e77d604fd9", "is_secret": false, "is_verified": false, - "line_number": 67, + "line_number": 63, "type": "Basic Auth Credentials" } ], @@ -60,7 +60,7 @@ "hashed_secret": "5d0fa74acf95d1d6bebd0d37f76a94e77d604fd9", "is_secret": false, "is_verified": false, - "line_number": 42, + "line_number": 41, "type": "Basic Auth Credentials" } ], @@ -69,7 +69,7 @@ "hashed_secret": "5d0fa74acf95d1d6bebd0d37f76a94e77d604fd9", "is_secret": false, "is_verified": false, - "line_number": 37, + "line_number": 33, "type": "Basic Auth Credentials" } ] diff --git a/.travis.yml b/.travis.yml index bf4f6ab9..28522faa 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,3 +1,4 @@ +--- language: python dist: focal @@ -9,7 +10,7 @@ python: - 3.8 addons: - postgresql: "13" + postgresql: '13' apt: sources: - sourceline: deb http://apt.postgresql.org/pub/repos/apt/ focal-pgdg main 13 diff --git a/bin/update_related_case_caches.py b/bin/update_related_case_caches.py index 802290b5..9cd660d9 100644 --- a/bin/update_related_case_caches.py +++ b/bin/update_related_case_caches.py @@ -10,9 +10,9 @@ import getpass import logging -from gdcdatamodel import models as md from psqlgraph import PsqlGraphDriver +from gdcdatamodel import models as md logging.basicConfig() logger = logging.getLogger("update_related_cases_caches") diff --git a/docker-compose-ci.yaml b/docker-compose-ci.yaml index 8e4a3dae..6f7ca2c0 100644 --- a/docker-compose-ci.yaml +++ b/docker-compose-ci.yaml @@ -1,4 +1,5 @@ -version: "3.3" +--- +version: '3.3' services: postgres: image: postgres:9.4 @@ -16,9 +17,9 @@ services: HTTPS_PROXY: http://cloud-proxy:3128 HTTP_PROXY: http://cloud-proxy:3128 volumes: - - .:/home/jenkins - - $SSH_AUTH_SOCK:$SSH_AUTH_SOCK + - .:/home/jenkins + - $SSH_AUTH_SOCK:$SSH_AUTH_SOCK command: bash -c "./wait-for-it.sh localhost:5432 -t 120 && tox" - network_mode: "service:postgres" + network_mode: service:postgres depends_on: - postgres diff --git a/docs/bin/schemata_to_graphviz.py b/docs/bin/schemata_to_graphviz.py index 482fd15b..e5bb2407 100644 --- a/docs/bin/schemata_to_graphviz.py +++ b/docs/bin/schemata_to_graphviz.py @@ -1,7 +1,9 @@ import os -from gdcdatamodel import models as m + from graphviz import Digraph +from gdcdatamodel import models as m + def build_visualization(): print('Building schema documentation...') diff --git a/gdcdatamodel/__main__.py b/gdcdatamodel/__main__.py index cf685bda..f3a9e387 100644 --- a/gdcdatamodel/__main__.py +++ b/gdcdatamodel/__main__.py @@ -1,11 +1,11 @@ import argparse import getpass -import psqlgraph -from models import * # noqa -from psqlgraph import * # noqa -from sqlalchemy import * # noqa +import psqlgraph +from models import * # noqa from models.versioned_nodes import VersionedNode # noqa +from psqlgraph import * # noqa +from sqlalchemy import * # noqa try: import IPython diff --git a/gdcdatamodel/gdc_postgres_admin.py b/gdcdatamodel/gdc_postgres_admin.py index 9d64474b..21f40ad8 100644 --- a/gdcdatamodel/gdc_postgres_admin.py +++ b/gdcdatamodel/gdc_postgres_admin.py @@ -11,6 +11,7 @@ import time import sqlalchemy as sa +from psqlgraph import create_all, ext from psqlgraph.base import ORMBase from sqlalchemy import create_engine from sqlalchemy.exc import OperationalError @@ -18,11 +19,6 @@ #: Required but 'unused' import to register GDC models from . import models # noqa -from psqlgraph import ( - create_all, - ext, -) - logging.basicConfig() logger = logging.getLogger("gdc_postgres_admin") logger.setLevel(logging.INFO) diff --git a/gdcdatamodel/models/__init__.py b/gdcdatamodel/models/__init__.py index 2b7faafb..10aa9e03 100644 --- a/gdcdatamodel/models/__init__.py +++ b/gdcdatamodel/models/__init__.py @@ -23,54 +23,40 @@ except ImportError: from functools32 import lru_cache -from types import ModuleType +import hashlib import logging - from collections import defaultdict +from types import ModuleType +from psqlgraph import Edge, Node, ext, pg_property +from sqlalchemy import and_, event +from sqlalchemy.ext.hybrid import Comparator, hybrid_property from sqlalchemy.orm import configure_mappers -import hashlib from gdcdatamodel.models import ( - versioned_nodes, + batch, notifications, - submission, - redaction, qcreport, + redaction, released_data, studyrule, - batch, + submission, + versioned_nodes, versioning, ) - -from sqlalchemy import event, and_ - -from psqlgraph import Node, Edge, pg_property - -from psqlgraph import ext - -from sqlalchemy.ext.hybrid import ( - Comparator, - hybrid_property, -) - from gdcdatamodel.models.caching import ( NOT_RELATED_CASES_CATEGORIES, RELATED_CASES_LINK_NAME, - cache_related_cases_on_update, - cache_related_cases_on_insert, cache_related_cases_on_delete, + cache_related_cases_on_insert, + cache_related_cases_on_update, related_cases_from_cache, related_cases_from_parents, ) - -from gdcdatamodel.models.indexes import ( - cls_add_indexes, - get_secondary_key_indexes, -) +from gdcdatamodel.models.indexes import cls_add_indexes, get_secondary_key_indexes from gdcdatamodel.models.misc import FileReport # noqa -from gdcdatamodel.models.versioned_nodes import VersionedNode # noqa from gdcdatamodel.models.utils import py3_to_bytes +from gdcdatamodel.models.versioned_nodes import VersionedNode # noqa logger = logging.getLogger("gdcdatamodel") diff --git a/gdcdatamodel/models/indexes.py b/gdcdatamodel/models/indexes.py index 2100f224..eb500cba 100644 --- a/gdcdatamodel/models/indexes.py +++ b/gdcdatamodel/models/indexes.py @@ -10,14 +10,13 @@ """ +import hashlib import logging from sqlalchemy import Index, func -import hashlib from gdcdatamodel.models.utils import py3_to_bytes - logger = logging.getLogger(__name__) diff --git a/gdcdatamodel/models/versioned_nodes.py b/gdcdatamodel/models/versioned_nodes.py index 529766b5..24bcfea9 100644 --- a/gdcdatamodel/models/versioned_nodes.py +++ b/gdcdatamodel/models/versioned_nodes.py @@ -1,8 +1,8 @@ -from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.dialects.postgresql import ARRAY, JSONB -from sqlalchemy import Column, Text, DateTime, BigInteger, text, Index from copy import copy +from sqlalchemy import BigInteger, Column, DateTime, Index, Text, text +from sqlalchemy.dialects.postgresql import ARRAY, JSONB +from sqlalchemy.ext.declarative import declarative_base Base = declarative_base() diff --git a/gdcdatamodel/models/versioning.py b/gdcdatamodel/models/versioning.py index 4b29ff16..d382c658 100644 --- a/gdcdatamodel/models/versioning.py +++ b/gdcdatamodel/models/versioning.py @@ -3,6 +3,7 @@ import six from sqlalchemy import and_, event, select + try: from functools import lru_cache except ImportError: diff --git a/gdcdatamodel/query.py b/gdcdatamodel/query.py index e21cc09d..3a4f9ba6 100644 --- a/gdcdatamodel/query.py +++ b/gdcdatamodel/query.py @@ -1,4 +1,4 @@ -from psqlgraph import Node, Edge +from psqlgraph import Edge, Node traversals = {} terminal_nodes = ['annotations', 'centers', 'archives', 'tissue_source_sites', diff --git a/gdcdatamodel/validators/__init__.py b/gdcdatamodel/validators/__init__.py index 51e0a0bd..88d4e232 100644 --- a/gdcdatamodel/validators/__init__.py +++ b/gdcdatamodel/validators/__init__.py @@ -1,2 +1,2 @@ -from .json_validators import GDCJSONValidator from .graph_validators import GDCGraphValidator +from .json_validators import GDCJSONValidator diff --git a/gdcdatamodel/validators/json_validators.py b/gdcdatamodel/validators/json_validators.py index 376d2e39..13923170 100644 --- a/gdcdatamodel/validators/json_validators.py +++ b/gdcdatamodel/validators/json_validators.py @@ -1,6 +1,7 @@ +import re + from gdcdictionary import gdcdictionary from jsonschema import Draft4Validator -import re missing_prop_re = re.compile("\'([a-zA-Z_-]+)\' is a required property") extra_prop_re = re.compile("Additional properties are not allowed \(u\'([a-zA-Z_-]+)\' was unexpected\)") diff --git a/gdcdatamodel/xml_mappings/tcga_biospecimen.yaml b/gdcdatamodel/xml_mappings/tcga_biospecimen.yaml index 897aa7ec..ddddbee1 100644 --- a/gdcdatamodel/xml_mappings/tcga_biospecimen.yaml +++ b/gdcdatamodel/xml_mappings/tcga_biospecimen.yaml @@ -1,3 +1,4 @@ +--- # example: # - # root: xpath @@ -26,8 +27,7 @@ # year: xpath aliquot: - - - root: //bio:aliquot + - root: //bio:aliquot id: .//bio:bcr_aliquot_uuid edges: analytes: ancestor::bio:analyte/bio:bcr_analyte_uuid @@ -72,8 +72,7 @@ aliquot: type: float analyte: - - - root: //bio:analyte + - root: //bio:analyte id: .//bio:bcr_analyte_uuid edges: portions: ancestor::bio:portion/bio:bcr_portion_uuid @@ -109,8 +108,7 @@ analyte: type: str portion: - - - root: //bio:portion + - root: //bio:portion id: .//bio:bcr_portion_uuid edges: samples: ancestor::bio:sample/bio:bcr_sample_uuid @@ -132,8 +130,7 @@ portion: day: ./bio:day_of_creation month: ./bio:month_of_creation year: ./bio:year_of_creation - - - root: //bio:shipment_portion + - root: //bio:shipment_portion id: .//bio:bcr_shipment_portion_uuid edges: samples: ancestor::bio:sample/bio:bcr_sample_uuid @@ -183,8 +180,7 @@ portion: year: ./bio:shipment_portion_year_of_shipment sample: - - - root: + - root: - //bio_model:sample - //bio:sample id: @@ -301,12 +297,11 @@ sample: - ./bio_model:time_between_excision_and_freezing - ./bio:time_between_excision_and_freezing type: float - tumor_code_id: null - tumor_code: null + tumor_code_id: + tumor_code: case: - - - root: //*[local-name()='patient'] + - root: //*[local-name()='patient'] id: .//shared:bcr_patient_uuid edges_by_property: tissue_source_sites: @@ -319,8 +314,7 @@ case: type: str slide: - - - root: //bio:slide + - root: //bio:slide id: .//shared:bcr_slide_uuid edges: portions: ancestor::bio:portion/bio:bcr_portion_uuid diff --git a/gdcdatamodel/xml_mappings/tcga_clinical.yaml b/gdcdatamodel/xml_mappings/tcga_clinical.yaml index e42c1194..f5af1e92 100644 --- a/gdcdatamodel/xml_mappings/tcga_clinical.yaml +++ b/gdcdatamodel/xml_mappings/tcga_clinical.yaml @@ -1,3 +1,4 @@ +--- # Demographic demographic: - root: //*[local-name()='patient'] @@ -36,7 +37,8 @@ demographic: days_to_death: evaluator: name: filter - path: (//shared:days_to_death|//clin_shared:days_to_death)[not(@procurement_status = "Not Applicable" or @procurement_status = "Not Available")] + path: (//shared:days_to_death|//clin_shared:days_to_death)[not(@procurement_status + = "Not Applicable" or @procurement_status = "Not Available")] type: int maximum: 32872 minimum: -32872 @@ -54,15 +56,16 @@ demographic: type: int minimum: -32872 maximum: 0 - default: null + default: values: -32872: - - "-32873" + - '-32873' vital_status: evaluator: name: vital_status - dead_vital_status_search: (//clin_shared:vital_status|//shared:vital_status)[translate(text(), 'DEA', 'dea') = 'dead'] + dead_vital_status_search: (//clin_shared:vital_status|//shared:vital_status)[translate(text(), + 'DEA', 'dea') = 'dead'] follow_up_path: //shared:days_to_last_followup|//clin_shared:days_to_last_followup path: ./shared:vital_status|./clin_shared:vital_status type: str.title @@ -80,397 +83,397 @@ diagnosis: nullable: false type: str.lower properties: - submitter_id: - path: ./shared:bcr_patient_barcode - suffix: _diagnosis - type: str - - age_at_diagnosis: - path: ./shared:days_to_birth|./clin_shared:days_to_birth * -1 - type: int - maximum: 32872 - minimum: 0 - values: - 32872: - - 32873 - - days_to_last_follow_up: - evaluator: - name: last_follow_up - path: //shared:days_to_last_followup|//clin_shared:days_to_last_followup - type: int - maximum: 32872 - minimum: -32872 - default: null + submitter_id: + path: ./shared:bcr_patient_barcode + suffix: _diagnosis + type: str + + age_at_diagnosis: + path: ./shared:days_to_birth|./clin_shared:days_to_birth * -1 + type: int + maximum: 32872 + minimum: 0 + values: + 32872: + - 32873 + + days_to_last_follow_up: + evaluator: + name: last_follow_up + path: //shared:days_to_last_followup|//clin_shared:days_to_last_followup + type: int + maximum: 32872 + minimum: -32872 + default: # mapped to Not Reported see https://jira.opensciencedatacloud.org/browse/DAT-2409 - primary_diagnosis: - path: "na" - type: str - default: Not Reported - - morphology: - path: "na" - type: str - default: Not Reported - - tissue_or_organ_of_origin: - path: "na" - type: str - default: Not Reported - - site_of_resection_or_biopsy: - path: "na" - type: str - default: Not Reported - - classification_of_tumor: - path: "primary" - type: str - default: not reported - - prior_malignancy: - evaluator: - name: unique_value - path: //shared:other_dx/text() - default: not reported - type: str.lower - values: - 'No': - - 'no' - - yes, history of synchronous/bilateral malignancy - 'Yes': - - 'yes' - - yes, history of prior malignancy - - synchronous_malignancy: - evaluator: - name: unique_value - path: //shared:other_dx/text() - default: not reported - type: str.title - values: - 'Yes': - - yes, history of synchronous/bilateral malignancy - 'No': - - 'no' - - yes, history of prior malignancy - not reported: - - 'yes' - - tumor_grade: - path: ./shared_stage:gleason_grading/shared_stage:gleason_score - type: str - default: not reported - - progression_or_recurrence: - path: "unknown" - type: str - default: not reported - - days_to_recurrence: - path: "unknown" - type: int - maximum: 32872 - minimum: -32872 - default: null - - last_known_disease_status: - path: "unknown" - type: str - default: not reported - - days_to_last_known_disease_status: - path: "unknown" - type: int - maximum: 32872 - minimum: -32872 - default: null - - year_of_diagnosis: - evaluator: - name: unique_value - path: //clin_shared:year_of_initial_pathologic_diagnosis/text() - type: int - maximum: 32872 - minimum: -32872 - - icd_10_code: - path: ./clin_shared:icd_10 - type: str - - days_to_diagnosis: - evaluator: + primary_diagnosis: + path: na + type: str + default: Not Reported + + morphology: + path: na + type: str + default: Not Reported + + tissue_or_organ_of_origin: + path: na + type: str + default: Not Reported + + site_of_resection_or_biopsy: + path: na + type: str + default: Not Reported + + classification_of_tumor: + path: primary + type: str + default: not reported + + prior_malignancy: + evaluator: + name: unique_value + path: //shared:other_dx/text() + default: not reported + type: str.lower + values: + No: + - no + - yes, history of synchronous/bilateral malignancy + Yes: + - yes + - yes, history of prior malignancy + + synchronous_malignancy: + evaluator: + name: unique_value + path: //shared:other_dx/text() + default: not reported + type: str.title + values: + Yes: + - yes, history of synchronous/bilateral malignancy + No: + - no + - yes, history of prior malignancy + not reported: + - yes + + tumor_grade: + path: ./shared_stage:gleason_grading/shared_stage:gleason_score + type: str + default: not reported + + progression_or_recurrence: + path: unknown + type: str + default: not reported + + days_to_recurrence: + path: unknown + type: int + maximum: 32872 + minimum: -32872 + default: + + last_known_disease_status: + path: unknown + type: str + default: not reported + + days_to_last_known_disease_status: + path: unknown + type: int + maximum: 32872 + minimum: -32872 + default: + + year_of_diagnosis: + evaluator: + name: unique_value + path: //clin_shared:year_of_initial_pathologic_diagnosis/text() + type: int + maximum: 32872 + minimum: -32872 + + icd_10_code: + path: ./clin_shared:icd_10 + type: str + + days_to_diagnosis: + evaluator: # description: search all elements for path and pick value when: # all elements have the same value, or only one element has a value - name: unique_value - path: //clin_shared:days_to_initial_pathologic_diagnosis/text() - type: int - - prior_treatment: - path: ./shared:history_of_neoadjuvant_treatment - type: str.title - default: not reported - values: - 'No': - - 'no' - 'Yes': - - 'yes' - - yes, radiation prior to resection - - yes, pharmaceutical treatment prior to resection + name: unique_value + path: //clin_shared:days_to_initial_pathologic_diagnosis/text() + type: int + + prior_treatment: + path: ./shared:history_of_neoadjuvant_treatment + type: str.title + default: not reported + values: + No: + - no + Yes: + - yes + - yes, radiation prior to resection + - yes, pharmaceutical treatment prior to resection # see https://jira.opensciencedatacloud.org/browse/DAT-2398 - ajcc_staging_system_edition: - path: //shared_stage:stage_event[not(@system) or @system='AJCC']/shared_stage:system_version - type: str.lower + ajcc_staging_system_edition: + path: //shared_stage:stage_event[not(@system) or @system='AJCC']/shared_stage:system_version + type: str.lower # see https://jira.opensciencedatacloud.org/browse/DAT-2399 - ann_arbor_b_symptoms: - path: //shared_stage:b_symptoms - type: str.title + ann_arbor_b_symptoms: + path: //shared_stage:b_symptoms + type: str.title - ann_arbor_extranodal_involvement: - path: //shared_stage:extranodal_involvement - type: str.title + ann_arbor_extranodal_involvement: + path: //shared_stage:extranodal_involvement + type: str.title - ajcc_pathologic_t: - path: //shared_stage:pathologic_T - type: str + ajcc_pathologic_t: + path: //shared_stage:pathologic_T + type: str - ajcc_pathologic_n: - path: //shared_stage:pathologic_N - type: str + ajcc_pathologic_n: + path: //shared_stage:pathologic_N + type: str - ajcc_pathologic_m: - path: //shared_stage:pathologic_M - type: str + ajcc_pathologic_m: + path: //shared_stage:pathologic_M + type: str - ajcc_clinical_t: - path: //shared_stage:clinical_T - type: str + ajcc_clinical_t: + path: //shared_stage:clinical_T + type: str - ajcc_clinical_n: - path: //shared_stage:clinical_N - type: str + ajcc_clinical_n: + path: //shared_stage:clinical_N + type: str - ajcc_clinical_m: - path: //shared_stage:clinical_M - type: str + ajcc_clinical_m: + path: //shared_stage:clinical_M + type: str # see https://jira.opensciencedatacloud.org/browse/DAT-2402 - ajcc_clinical_stage: - path: //shared_stage:stage_event[not(@system) or @system='AJCC']/shared_stage:clinical_stage - type: str - values: - Stage 0: - - stage 0 - Stage I: - - stage i - Stage IA: - - stage ia - Stage IB: - - stage ib - Stage IC: - - stage ic - Stage II: - - stage ii - Stage IIA: - - stage iia - Stage IIB: - - stage iib - Stage IIC: - - stage iic - Stage III: - - stage iii - Stage IIIA: - - stage iiia - Stage IIIB: - - stage iiib - Stage IIIC: - - stage iiic - Stage IS: - - is - - stage is - Stage IV: - - stage iv - Stage IVA: - - stage iva - Stage IVB: - - stage ivb - Stage IVC: - - stage ivc - Stage X: - - stage x - Not Reported: - - i/ii nos + ajcc_clinical_stage: + path: //shared_stage:stage_event[not(@system) or @system='AJCC']/shared_stage:clinical_stage + type: str + values: + Stage 0: + - stage 0 + Stage I: + - stage i + Stage IA: + - stage ia + Stage IB: + - stage ib + Stage IC: + - stage ic + Stage II: + - stage ii + Stage IIA: + - stage iia + Stage IIB: + - stage iib + Stage IIC: + - stage iic + Stage III: + - stage iii + Stage IIIA: + - stage iiia + Stage IIIB: + - stage iiib + Stage IIIC: + - stage iiic + Stage IS: + - is + - stage is + Stage IV: + - stage iv + Stage IVA: + - stage iva + Stage IVB: + - stage ivb + Stage IVC: + - stage ivc + Stage X: + - stage x + Not Reported: + - i/ii nos # see https://jira.opensciencedatacloud.org/browse/DAT-2403 - ann_arbor_clinical_stage: - path: //shared_stage:stage_event[@system='ANN_ARBOR']/shared_stage:clinical_stage - type: str - values: - Stage I: - - stage i - Stage II: - - stage ii - Stage III: - - stage iii - Stage IV: - - stage iv + ann_arbor_clinical_stage: + path: //shared_stage:stage_event[@system='ANN_ARBOR']/shared_stage:clinical_stage + type: str + values: + Stage I: + - stage i + Stage II: + - stage ii + Stage III: + - stage iii + Stage IV: + - stage iv # see https://jira.opensciencedatacloud.org/browse/DAT-2400 - ajcc_pathologic_stage: - path: //shared_stage:stage_event[not(@system) or @system='AJCC']/shared_stage:pathologic_stage - type: str - values: - Stage 0: - - stage 0 - Stage I: - - stage i - Stage IA: - - stage ia - Stage IB: - - stage ib - Stage II: - - stage ii - Stage IS: - - is - Stage IIA: - - stage iia - Stage IIB: - - stage iib - Stage IIC: - - stage iic - Stage III: - - stage iii - Stage IIIA: - - stage iiia - Stage IIIB: - - stage iiib - Stage IIIC: - - stage iiic - Stage IV: - - stage iv - Stage IVA: - - stage iva - Stage IVB: - - stage ivb - Stage IVC: - - stage ivc - Stage X: - - stage x - Not Reported: - - i/ii nos + ajcc_pathologic_stage: + path: //shared_stage:stage_event[not(@system) or @system='AJCC']/shared_stage:pathologic_stage + type: str + values: + Stage 0: + - stage 0 + Stage I: + - stage i + Stage IA: + - stage ia + Stage IB: + - stage ib + Stage II: + - stage ii + Stage IS: + - is + Stage IIA: + - stage iia + Stage IIB: + - stage iib + Stage IIC: + - stage iic + Stage III: + - stage iii + Stage IIIA: + - stage iiia + Stage IIIB: + - stage iiib + Stage IIIC: + - stage iiic + Stage IV: + - stage iv + Stage IVA: + - stage iva + Stage IVB: + - stage ivb + Stage IVC: + - stage ivc + Stage X: + - stage x + Not Reported: + - i/ii nos # see https://jira.opensciencedatacloud.org/browse/DAT-2401 - figo_stage: - path: //shared_stage:stage_event[@system='FIGO']/shared_stage:clinical_stage - type: str - values: - Stage I: - - stage i - Stage IA: - - stage ia - Stage IA1: - - stage ia1 - Stage IA2: - - stage ia2 - Stage IB: - - stage ib - Stage IB1: - - stage ib1 - Stage IB2: - - stage ib2 - Stage IC: - - stage ic - Stage II: - - stage ii - Stage IIA: - - stage iia - Stage IIA1: - - stage iia1 - Stage IIA2: - - stage iia2 - Stage IIB: - - stage iib - Stage IIC: - - stage iic - Stage III: - - stage iii - Stage IIIA: - - stage iiia - Stage IIIB: - - stage iiib - Stage IIIC: - - stage iiic - Stage IIIC1: - - stage iiic1 - Stage IIIC2: - - stage iiic2 - Stage IV: - - stage iv - Stage IVA: - - stage iva - Stage IVB: - - stage ivb + figo_stage: + path: //shared_stage:stage_event[@system='FIGO']/shared_stage:clinical_stage + type: str + values: + Stage I: + - stage i + Stage IA: + - stage ia + Stage IA1: + - stage ia1 + Stage IA2: + - stage ia2 + Stage IB: + - stage ib + Stage IB1: + - stage ib1 + Stage IB2: + - stage ib2 + Stage IC: + - stage ic + Stage II: + - stage ii + Stage IIA: + - stage iia + Stage IIA1: + - stage iia1 + Stage IIA2: + - stage iia2 + Stage IIB: + - stage iib + Stage IIC: + - stage iic + Stage III: + - stage iii + Stage IIIA: + - stage iiia + Stage IIIB: + - stage iiib + Stage IIIC: + - stage iiic + Stage IIIC1: + - stage iiic1 + Stage IIIC2: + - stage iiic2 + Stage IV: + - stage iv + Stage IVA: + - stage iva + Stage IVB: + - stage ivb # see https://jira.opensciencedatacloud.org/browse/DAT-2404 - primary_gleason_grade: - path: //shared_stage:primary_pattern - type: str - values: - Pattern 2: - - "2" - Pattern 3: - - "3" - Pattern 4: - - "4" - Pattern 5: - - "5" + primary_gleason_grade: + path: //shared_stage:primary_pattern + type: str + values: + Pattern 2: + - '2' + Pattern 3: + - '3' + Pattern 4: + - '4' + Pattern 5: + - '5' # see https://jira.opensciencedatacloud.org/browse/DAT-2405 - secondary_gleason_grade: - path: //shared_stage:secondary_pattern - type: str - values: - Pattern 2: - - "2" - Pattern 3: - - "3" - Pattern 4: - - "4" - Pattern 5: - - "5" + secondary_gleason_grade: + path: //shared_stage:secondary_pattern + type: str + values: + Pattern 2: + - '2' + Pattern 3: + - '3' + Pattern 4: + - '4' + Pattern 5: + - '5' # see https://jira.opensciencedatacloud.org/browse/DAT-2406 - igcccg_stage: - path: //shared_stage:igcccg_stage - type: str - values: - Poor Prognosis: - - poor - Good Prognosis: - - good - Intermediate Prognosis: - - intermediate + igcccg_stage: + path: //shared_stage:igcccg_stage + type: str + values: + Poor Prognosis: + - poor + Good Prognosis: + - good + Intermediate Prognosis: + - intermediate # see https://jira.opensciencedatacloud.org/browse/DAT-2407 - masaoka_stage: - path: //shared_stage:masaoka_stage - type: str - values: - Stage I: - - i - Stage IIa: - - iia - Stage IIb: - - iib - Stage III: - - iii - Stage IVa: - - iva - Stage IVb: - - ivb + masaoka_stage: + path: //shared_stage:masaoka_stage + type: str + values: + Stage I: + - i + Stage IIa: + - iia + Stage IIb: + - iib + Stage III: + - iii + Stage IVa: + - iva + Stage IVb: + - ivb exposure: - root: //*[local-name()='patient'] @@ -495,20 +498,20 @@ exposure: type: float alcohol_history: - path: "./clin_shared:alcohol_history_documented" + path: ./clin_shared:alcohol_history_documented type: str.title default: Not Reported alcohol_intensity: - path: "unknown" + path: unknown type: str height: - path: "./clin_shared:height" + path: ./clin_shared:height type: float weight: - path: "./clin_shared:weight" + path: ./clin_shared:weight type: float pack_years_smoked: @@ -560,8 +563,8 @@ treatment: - Recurrence - Regional lymph node path: - - //clin_shared:radiation_therapy - - //clin_shared:postoperative_rx_tx + - //clin_shared:radiation_therapy + - //clin_shared:postoperative_rx_tx term: TBD enum: - yes @@ -571,24 +574,24 @@ treatment: default: not reported treatment_intent_type: - path: "unknown" + path: unknown term: TBD type: str therapeutic_agents: - path: "unknown" + path: unknown term: TBD type: str days_to_treatment_start: - path: "unknown" + path: unknown term: TBD type: int maximum: 32872 minimum: -32872 days_to_treatment_end: - path: "unknown" + path: unknown term: TBD type: int maximum: 32872 diff --git a/migrations/index_secondary_keys.py b/migrations/index_secondary_keys.py index 7f257f61..aa60fc1c 100644 --- a/migrations/index_secondary_keys.py +++ b/migrations/index_secondary_keys.py @@ -11,13 +11,13 @@ """ +import logging + from psqlgraph import Node -from gdcdatamodel.models import get_secondary_key_indexes -from gdcdatamodel.models.submission import TransactionLog from sqlalchemy import Index - -import logging +from gdcdatamodel.models import get_secondary_key_indexes +from gdcdatamodel.models.submission import TransactionLog logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) diff --git a/migrations/notifications.py b/migrations/notifications.py index 30eff750..74857dfb 100644 --- a/migrations/notifications.py +++ b/migrations/notifications.py @@ -5,10 +5,10 @@ Create `notifications` table. """ -from gdcdatamodel import models - import logging +from gdcdatamodel import models + logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) diff --git a/migrations/set_null_edge_columns.py b/migrations/set_null_edge_columns.py index cd5eaac4..0e97d176 100644 --- a/migrations/set_null_edge_columns.py +++ b/migrations/set_null_edge_columns.py @@ -1,8 +1,8 @@ #!/usr/bin/env python -from psqlgraph import Node, Edge -from gdcdatamodel import models as md +from psqlgraph import Edge, Node +from gdcdatamodel import models as md CACHE_EDGES = { Node.get_subclass_named(edge.__src_class__): edge diff --git a/migrations/update_case_cache_append_only.py b/migrations/update_case_cache_append_only.py index b10a44bd..ae20bfe4 100644 --- a/migrations/update_case_cache_append_only.py +++ b/migrations/update_case_cache_append_only.py @@ -1,8 +1,8 @@ #!/usr/bin/env python -from psqlgraph import Node, Edge -from gdcdatamodel import models as md +from psqlgraph import Edge, Node +from gdcdatamodel import models as md CACHE_EDGES = { Node.get_subclass_named(edge.__src_class__): edge diff --git a/migrations/update_legacy_states.py b/migrations/update_legacy_states.py index 4dfa8372..8faed978 100644 --- a/migrations/update_legacy_states.py +++ b/migrations/update_legacy_states.py @@ -35,13 +35,13 @@ """ import logging +from collections import namedtuple +from multiprocessing import Process, Queue, cpu_count -from sqlalchemy import not_, or_, and_ from psqlgraph import Node, PsqlGraphDriver -from gdcdatamodel import models as md -from multiprocessing import Process, cpu_count, Queue -from collections import namedtuple +from sqlalchemy import and_, not_, or_ +from gdcdatamodel import models as md CLS_WITH_PROJECT_ID = { cls for cls in Node.get_subclasses() diff --git a/setup.py b/setup.py index 99eee56e..bf8d52d6 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -from setuptools import setup, find_packages +from setuptools import find_packages, setup setup( name="gdcdatamodel", diff --git a/test/conftest.py b/test/conftest.py index aa3f9a3f..726d7ea4 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -7,17 +7,16 @@ import random import unittest import uuid -import pkg_resources +from test.helpers import create_tables, truncate +from test.models import BasicDictionary +import pkg_resources import pytest import yaml -from gdcdatamodel import models from psqlgraph import PsqlGraphDriver, mocks from sqlalchemy import create_engine -from test.helpers import truncate, create_tables -from test.models import BasicDictionary - +from gdcdatamodel import models models.load_dictionary(BasicDictionary, "basic") from gdcdatamodel.models import basic # noqa diff --git a/test/helpers.py b/test/helpers.py index 9aed970d..b8ce1c46 100644 --- a/test/helpers.py +++ b/test/helpers.py @@ -1,5 +1,5 @@ import psqlgraph -from psqlgraph import Node, Edge, create_all, ext +from psqlgraph import Edge, Node, create_all, ext from gdcdatamodel import models diff --git a/test/sample.yaml b/test/sample.yaml index 7229e6b3..ba1043ff 100644 --- a/test/sample.yaml +++ b/test/sample.yaml @@ -1,3 +1,4 @@ +--- nodes: - label: program name: GDC diff --git a/test/schema/basic.yaml b/test/schema/basic.yaml index 0a084d97..abeef823 100644 --- a/test/schema/basic.yaml +++ b/test/schema/basic.yaml @@ -1,3 +1,4 @@ +--- # dummy dictionary for testing purposes program: id: program diff --git a/test/schema/data/sample.yaml b/test/schema/data/sample.yaml index 731d6534..7215b571 100644 --- a/test/schema/data/sample.yaml +++ b/test/schema/data/sample.yaml @@ -1,3 +1,4 @@ +--- nodes: - label: program name: GDC diff --git a/test/test_cache_related_cases.py b/test/test_cache_related_cases.py index c501e1b5..bb3ab094 100644 --- a/test/test_cache_related_cases.py +++ b/test/test_cache_related_cases.py @@ -1,7 +1,8 @@ -from gdcdatamodel import models as md +from test.conftest import BaseTestCase + from psqlgraph import Node -from test.conftest import BaseTestCase +from gdcdatamodel import models as md class TestCacheRelatedCases(BaseTestCase): diff --git a/test/test_datamodel.py b/test/test_datamodel.py index 89fd544b..c0c322d3 100644 --- a/test/test_datamodel.py +++ b/test/test_datamodel.py @@ -1,9 +1,10 @@ -from datetime import datetime import logging import unittest +from datetime import datetime from psqlgraph import Edge, Node, PsqlGraphDriver from psqlgraph.exc import ValidationError + from gdcdatamodel import models as md logging.basicConfig(level=logging.INFO) diff --git a/test/test_dictionary_loadiing.py b/test/test_dictionary_loadiing.py index 2b2180c8..87a5018e 100644 --- a/test/test_dictionary_loadiing.py +++ b/test/test_dictionary_loadiing.py @@ -1,4 +1,5 @@ import pytest + from gdcdatamodel import models diff --git a/test/test_gdc_postgres_admin.py b/test/test_gdc_postgres_admin.py index 33e3679d..e08d080e 100644 --- a/test/test_gdc_postgres_admin.py +++ b/test/test_gdc_postgres_admin.py @@ -5,11 +5,7 @@ import logging import unittest -from psqlgraph import ( - Edge, - Node, - PsqlGraphDriver, -) +from psqlgraph import Edge, Node, PsqlGraphDriver from sqlalchemy.exc import ProgrammingError from gdcdatamodel import gdc_postgres_admin as pgadmin diff --git a/test/test_node_tagging.py b/test/test_node_tagging.py index 8bde0f29..65bbb46b 100644 --- a/test/test_node_tagging.py +++ b/test/test_node_tagging.py @@ -1,8 +1,9 @@ +from test.helpers import create_tables, truncate + import pytest from psqlgraph import PsqlGraphDriver from gdcdatamodel.models import basic, versioning # noqa -from test.helpers import create_tables, truncate @pytest.fixture(scope='module') diff --git a/test/test_update_case_cache.py b/test/test_update_case_cache.py index a2eb24dd..87170de0 100644 --- a/test/test_update_case_cache.py +++ b/test/test_update_case_cache.py @@ -7,8 +7,8 @@ """ import pytest -from gdcdatamodel import models as md +from gdcdatamodel import models as md from migrations import update_case_cache diff --git a/test/test_validators.py b/test/test_validators.py index 624b29fa..6a4c5d79 100644 --- a/test/test_validators.py +++ b/test/test_validators.py @@ -1,10 +1,9 @@ import uuid from copy import copy +from test.conftest import BaseTestCase -from gdcdatamodel.validators import GDCJSONValidator, GDCGraphValidator from gdcdatamodel.models import * - -from test.conftest import BaseTestCase +from gdcdatamodel.validators import GDCGraphValidator, GDCJSONValidator class MockSubmissionEntity(object): diff --git a/test/test_versioned_nodes.py b/test/test_versioned_nodes.py index c6e63206..82f82856 100644 --- a/test/test_versioned_nodes.py +++ b/test/test_versioned_nodes.py @@ -1,7 +1,7 @@ -from gdcdatamodel import models as md - from test.conftest import BaseTestCase +from gdcdatamodel import models as md + class TestValidators(BaseTestCase): diff --git a/test/unit/test_tagging.py b/test/unit/test_tagging.py index f68a87ce..c59e0e1a 100644 --- a/test/unit/test_tagging.py +++ b/test/unit/test_tagging.py @@ -1,8 +1,7 @@ import pytest -from gdcdatamodel.models import versioning as v from gdcdatamodel.models import basic # noqa - +from gdcdatamodel.models import versioning as v EXPECTED_TAGS = { "be66197b-f6cc-4366-bded-365856ec4f63": "84044bd2-54a4-5837-b83d-f920eb97c18d",