Skip to content

Commit

Permalink
Feat/aliquot 2.0.0 (#48)
Browse files Browse the repository at this point in the history
* feat(aliquot-2): UNIPROT_ISOFORM

* feat(aliquot-2): MANE, APPRIS, FLAGS

* fix(aliquot-2): shift RefSeq

* feat(aliquot-2): 100G columns

* feat(aliquot-2): ESP columns

* feat(aliquot-2): gnomAD columns

* feat(aliquot-2): VEP max AF columns

* feat(aliquot-2): non-cancer gnomAD columns

* feat(aliquot-2): TF and miRNA columns

* feat(aliquot-2): drop exac

* feat(genie): genie 2.0.0

Co-authored-by: Charles Czysz <[email protected]>
  • Loading branch information
Kyle Hernandez and czyszCTDS authored Jun 25, 2021
1 parent b203de9 commit 04753bc
Show file tree
Hide file tree
Showing 2 changed files with 227 additions and 0 deletions.
118 changes: 118 additions & 0 deletions maflib/schemas/gdc-2.0.0-aliquot.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
{
"version" : "gdc-1.0.0",
"annotation-spec" : "gdc-2.0.0-aliquot",
"extends" : "gdc-1.0.0",
"columns" : [
[ "HGVSc", "NullableStringColumn", "The coding sequence of the variant in HGVS recommended format" ],
[ "HGVSp", "NullableStringColumn", "The protein sequence of the variant in HGVS recommended format. \"p.=\" signifies no change in the protein" ],
[ "HGVSp_Short", "NullableStringColumn", "Same as the HGVSp column, but using 1-letter amino-acid codes" ],
[ "Transcript_ID", "NullableStringColumn", "Ensembl ID of the transcript affected by the variant" ],
[ "Exon_Number", "NullableStringColumn", "The exon number (out of total number)" ],
[ "t_depth", "ZeroBasedIntegerColumn", "Read depth across this locus in tumor BAM" ],
[ "t_ref_count", "ZeroBasedIntegerColumn", "Read depth supporting the reference allele in tumor BAM" ],
[ "t_alt_count", "ZeroBasedIntegerColumn", "Read depth supporting the variant allele in tumor BAM" ],
[ "n_depth", "NullableZeroBasedIntegerColumn", "Read depth across this locus in normal BAM" ],
[ "n_ref_count", "NullableZeroBasedIntegerColumn", "Read depth supporting the reference allele in normal BAM (cleared in somatic MAF)" ],
[ "n_alt_count", "NullableZeroBasedIntegerColumn", "Read depth supporting the variant allele in normal BAM (cleared in somatic MAF)" ],
[ "all_effects", "SequenceOfStrings", "A semicolon delimited list of all possible variant effects, sorted by priority ([Symbol,Consequence,HGVSp_Short,Transcript_ID,RefSeq,HGVSc,Impact,Canonical,Sift,PolyPhen,Strand])" ],
[ "Allele", "StringColumn", "The variant allele used to calculate the consequence" ],
[ "Gene", "NullableStringColumn", "Stable Ensembl ID of affected gene" ],
[ "Feature", "NullableStringColumn", "Stable Ensembl ID of feature (transcript, regulatory, motif)" ],
[ "Feature_type", "FeatureType", "Type of feature. Currently one of Transcript, RegulatoryFeature, MotifFeature (or blank)" ],
[ "One_Consequence", "StringColumn", "The single consequence of the canonical transcript in sequence ontology terms" ],
[ "Consequence", "SequenceOfStrings", "Consequence type of this variant" ],
[ "cDNA_position", "NullableStringColumn", "Relative position of base pair in the cDNA sequence as a fraction. A \"-\" symbol is displayed as the numerator if the variant does not appear in cDNA" ],
[ "CDS_position", "NullableStringColumn", "Relative position of base pair in coding sequence. A \"-\" symbol is displayed as the numerator if the variant does not appear in coding sequence" ],
[ "Protein_position", "NullableStringColumn", "Relative position of affected amino acid in protein. A \"-\" symbol is displayed as the numerator if the variant does not appear in coding sequence" ],
[ "Amino_acids", "NullableStringColumn", "Only given if the variation affects the protein-coding sequence" ],
[ "Codons", "NullableStringColumn", "The alternative codons with the variant base in upper case" ],
[ "Existing_variation", "SequenceOfStrings", "Known identifier of existing variation" ],
[ "ALLELE_NUM", "ZeroBasedIntegerColumn", "Allele number from input; 0 is reference, 1 is first alternate etc." ],
[ "DISTANCE", "NullableIntegerColumn", "Shortest distance from the variant to transcript" ],
[ "TRANSCRIPT_STRAND", "TranscriptStrand", "The DNA strand (1 or -1) on which the transcript/feature lies" ],
[ "SYMBOL", "NullableStringColumn", "The gene symbol" ],
[ "SYMBOL_SOURCE", "NullableStringColumn", "The source of the gene symbol" ],
[ "HGNC_ID", "NullableStringColumn", "Gene identifier from the HUGO Gene Nomenclature Committee if applicable" ],
[ "BIOTYPE", "NullableStringColumn", "Biotype of transcript" ],
[ "CANONICAL", "Canonical", "A flag (YES) indicating that the VEP-based canonical transcript, the longest translation, was used for this gene. If not, the value is null" ],
[ "CCDS", "NullableStringColumn", "The CCDS identifier for this transcript, where applicable" ],
[ "ENSP", "NullableStringColumn", "The Ensembl protein identifier of the affected transcript" ],
[ "SWISSPROT", "SequenceOfStrings", "UniProtKB/Swiss-Prot accession" ],
[ "TREMBL", "SequenceOfStrings", "UniProtKB/TrEMBL identifier of protein product" ],
[ "UNIPARC", "SequenceOfStrings", "UniParc identifier of protein product" ],
[ "UNIPROT_ISOFORM", "SequenceOfStrings", "Direct mappings to UniProtKB isoforms" ],
[ "RefSeq", "SequenceOfStrings", "RefSeq identifier for this transcript" ],
[ "MANE", "SequenceOfStrings", "MANE (Matched Annotation by NCBI and EMBL-EBI) Transcript" ],
[ "APPRIS", "NullableStringColumn", "Annotates alternatively spliced transcripts as primary or alternate based on a range of computational methods" ],
[ "FLAGS", "SequenceOfStrings", "Transcript quality flags" ],
[ "SIFT", "NullableStringColumn", "The SIFT prediction and/or score, with both given as prediction (score)" ],
[ "PolyPhen", "NullableStringColumn", "The PolyPhen prediction and/or score" ],
[ "EXON", "NullableStringColumn", "The exon number (out of total number)" ],
[ "INTRON", "NullableStringColumn", "The intron number (out of total number)" ],
[ "DOMAINS", "SequenceOfStrings", "The source and identifier of any overlapping protein domains" ],
[ "1000G_AF", "NullableFloatColumn", "Non-reference allele and frequency of existing variant in 1000 Genomes" ],
[ "1000G_AFR_AF", "NullableFloatColumn", "Non-reference allele and frequency of existing variant in 1000 Genomes combined African population" ],
[ "1000G_AMR_AF", "NullableFloatColumn", "Non-reference allele and frequency of existing variant in 1000 Genomes combined American population" ],
[ "1000G_EAS_AF", "NullableFloatColumn", "Non-reference allele and frequency of existing variant in 1000 Genomes combined East Asian population" ],
[ "1000G_EUR_AF", "NullableFloatColumn", "Non-reference allele and frequency of existing variant in 1000 Genomes combined European population" ],
[ "1000G_SAS_AF", "NullableFloatColumn", "Non-reference allele and frequency of existing variant in 1000 Genomes combined South Asian population" ],
[ "ESP_AA_AF", "NullableFloatColumn", "Non-reference allele and frequency of existing variant in NHLBI-ESP African American population" ],
[ "ESP_EA_AF", "NullableFloatColumn", "Non-reference allele and frequency of existing variant in NHLBI-ESP European American population" ],
[ "gnomAD_AF", "NullableFloatColumn", "Frequency of existing variant in gnomAD exomes combined population" ],
[ "gnomAD_AFR_AF", "NullableFloatColumn", "Frequency of existing variant in gnomAD exomes African/American population" ],
[ "gnomAD_AMR_AF", "NullableFloatColumn", "Frequency of existing variant in gnomAD exomes American population" ],
[ "gnomAD_ASJ_AF", "NullableFloatColumn", "Frequency of existing variant in gnomAD exomes Ashkenazi Jewish population" ],
[ "gnomAD_EAS_AF", "NullableFloatColumn", "Frequency of existing variant in gnomAD exomes East Asian population" ],
[ "gnomAD_FIN_AF", "NullableFloatColumn", "Frequency of existing variant in gnomAD exomes Finnish population" ],
[ "gnomAD_NFE_AF", "NullableFloatColumn", "Frequency of existing variant in gnomAD exomes Non-Finnish European population" ],
[ "gnomAD_OTH_AF", "NullableFloatColumn", "Frequency of existing variant in gnomAD exomes other combined population" ],
[ "gnomAD_SAS_AF", "NullableFloatColumn", "Frequency of existing variant in gnomAD exomes South Asian population" ],
[ "MAX_AF", "NullableFloatColumn", "Maximum observed allele frequency in 1000 Genomes, ESP and ExAC/gnomAD" ],
[ "MAX_AF_POPS", "SequenceOfStrings", "Populations in which maximum allele frequency was observed" ],
[ "gnomAD_non_cancer_AF", "NullableFloatColumn", "Frequency of exisiting variant in gnomAD genomes combined non-cancer population" ],
[ "gnomAD_non_cancer_AFR_AF", "NullableFloatColumn", "Frequency of exisiting variant in gnomAD genomes non-cancer African/American population" ],
[ "gnomAD_non_cancer_AMI_AF", "NullableFloatColumn", "Frequency of exisiting variant in gnomAD genomes non-cancer Amish population" ],
[ "gnomAD_non_cancer_AMR_AF", "NullableFloatColumn", "Frequency of exisiting variant in gnomAD genomes non-cancer Latino population" ],
[ "gnomAD_non_cancer_ASJ_AF", "NullableFloatColumn", "Frequency of exisiting variant in gnomAD genomes non-cancer Ashkenazi Jewish population" ],
[ "gnomAD_non_cancer_EAS_AF", "NullableFloatColumn", "Frequency of exisiting variant in gnomAD genomes non-cancer East Asian population" ],
[ "gnomAD_non_cancer_FIN_AF", "NullableFloatColumn", "Frequency of exisiting variant in gnomAD genomes non-cancer Finnish population" ],
[ "gnomAD_non_cancer_MID_AF", "NullableFloatColumn", "Frequency of exisiting variant in gnomAD genomes non-cancer Middle Eastern population" ],
[ "gnomAD_non_cancer_NFE_AF", "NullableFloatColumn", "Frequency of exisiting variant in gnomAD genomes non-cancer Non-Finnish European population" ],
[ "gnomAD_non_cancer_OTH_AF", "NullableFloatColumn", "Frequency of exisiting variant in gnomAD genomes non-cancer Other population" ],
[ "gnomAD_non_cancer_SAS_AF", "NullableFloatColumn", "Frequency of exisiting variant in gnomAD genomes non-cancer South Asian population" ],
[ "gnomAD_non_cancer_MAX_AF_adj", "NullableFloatColumn", "Maximum observed allele frequency in non-cancer gnomAD genomes populations after removing subpopulations with less than 2 allele counts" ],
[ "gnomAD_non_cancer_MAX_AF_POPS_adj", "SequenceOfStrings", "Non-cancer gnomAD genomes populations in which the maximum allele frequency was observed after removing those with less than 2 allele counts" ],
[ "CLIN_SIG", "SequenceOfStrings", "Clinical significance of variant from dbSNP" ],
[ "SOMATIC", "SequenceOfNullableYesOrNo", "Somatic status of each ID reported under Existing_variation (0, 1, or null)" ],
[ "PUBMED", "SequenceOfIntegers", "Pubmed ID(s) of publications that cite existing variant" ],
[ "TRANSCRIPTION_FACTORS", "SequenceOfStrings", "List of transcription factors which bind to the transcription factor binding profile" ],
[ "MOTIF_NAME", "NullableStringColumn", "The stable identifier of a transcription factor binding profile (TFBP) aligned at this position" ],
[ "MOTIF_POS", "NullableIntegerColumn", "The relative position of the variation in the aligned TFBP" ],
[ "HIGH_INF_POS", "NullableYOrN", "A flag indicating if the variant falls in a high information position of a transcription factor binding profile (TFBP) (Y, N, or null)" ],
[ "MOTIF_SCORE_CHANGE", "NullableFloatColumn", "The difference in motif score of the reference and variant sequences for the TFBP" ],
[ "miRNA", "SequenceOfStrings", "SO terms of overlapped miRNA secondary structure feature(s)" ],
[ "IMPACT", "Impact", "The impact modifier for the consequence type" ],
[ "PICK", "PickColumn", "Indicates if this block of consequence data was picked by VEP's pick feature (1 or null)" ],
[ "VARIANT_CLASS", "NullableStringColumn", "Sequence Ontology variant class" ],
[ "TSL", "NullableIntegerColumn", "Transcript support level, which is based on independent RNA analyses" ],
[ "HGVS_OFFSET", "NullableIntegerColumn", "Indicates by how many bases the HGVS notations for this variant have been shifted" ],
[ "PHENO", "SequenceOfNullableYesOrNo", "Indicates if existing variant is associated with a phenotype, disease or trait (0, 1, or null)" ],
[ "MINIMISED", "PickColumn", "Alleles in this variant have been converted to minimal representation before consequence calculation (1 or null)" ],
[ "GENE_PHENO", "NullableYesOrNo", "Indicates if gene that the variant maps to is associated with a phenotype, disease or trait (0, 1, or null)" ],
[ "FILTER", "SequenceOfStrings", "Copied from input VCF. This includes filters implemented directly by the variant caller and other external software used in the DNA-Seq pipeline. See below for additional details." ],
[ "CONTEXT", "StringColumn", "The reference allele per VCF specs, and its five flanking base pairs" ],
[ "src_vcf_id", "UUIDColumn", "GDC UUID for the input VCF file" ],
[ "tumor_bam_uuid", "UUIDColumn", "GDC UUID for the tumor bam file" ],
[ "normal_bam_uuid", "NullableUUIDColumn", "GDC UUID for the normal bam file" ],
[ "case_id", "UUIDColumn", "GDC UUID for the case" ],
[ "GDC_FILTER", "SequenceOfStrings", "GDC filters applied universally across all MAFs" ],
[ "COSMIC", "SequenceOfStrings", "Overlapping COSMIC variants" ],
[ "hotspot", "NullableYOrN", "A flag indicating if the variant is a known hotspot (Y, N, or null)" ],
[ "vcf_region", "StringColumn", "Colon separated string containing the CHROM, POS, ID, REF, and ALT columns from the VCF file (e.g., chrZ:20:rs1234:A:T) (not in somatic MAF)" ],
[ "vcf_info", "NullableStringColumn", "INFO column from VCF (not in somatic MAF)" ],
[ "vcf_format", "StringColumn", "FORMAT column from VCF (not in somatic MAF)" ],
[ "vcf_tumor_gt", "StringColumn", "Tumor sample genotype column from VCF (not in somatic MAF)" ],
[ "vcf_normal_gt", "NullableStringColumn", "Normal sample genotype column from VCF (not in somatic MAF)" ]
],
"filtered" : "None"
}
Loading

0 comments on commit 04753bc

Please sign in to comment.