Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

C14 schema #1182

Draft
wants to merge 13 commits into
base: master
Choose a base branch
from
Draft
8 changes: 7 additions & 1 deletion .github/workflows/check_dataset.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install amdirt==1.6.1
pip install amdirt==1.6.2
pip install jsonschema==4.17.0
- name: Make validation results directory
run: mkdir validation/
Expand Down Expand Up @@ -55,6 +55,12 @@ jobs:
run: |
echo "## Ancient Metagenome Environmental" >> validation/validation_results_raw.txt
AMDirT validate -s -d -c -m ancientmetagenome-environmental/libraries/ancientmetagenome-environmental_libraries.tsv ancientmetagenome-environmental/libraries/ancientmetagenome-environmental_libraries_schema.json &>> validation/validation_results_raw.txt
- name: RADIOCARBON DATES test ancient single genomes (e.g. pathogens)
if: always()
run: |
echo "# Radiocarbon Dates" >> validation/validation_results_raw.txt
echo "## Ancient Single Genome Host Associated" >> validation/validation_results_raw.txt
AMDirT validate -s -d -c -m ancientsinglegenome-hostassociated/radiocarbondates/ancientsinglegenome-hostassociated_radiocarbondates.tsv ancientsinglegenome-hostassociated/radiocarbondates/ancientsinglegenome-hostassociated_radiocarbondates_schema.json &>> validation/validation_results_raw.txt
- name: cleanup validation results from streamlit warnings
if: always()
run: |
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
project_name publication_year data_publication_doi sample_name archive_project archive_sample_accession date_information_present date_is_radiocarbon multiple_dates reference_location reference_citation_depth primary_secondary_reference_citation_doi direct_dating radiocarbon_lab_code spectrometry_type sample_material delta_13c uncalibrated_date uncalibrated_uncertainty_plus_minus calibration_reported calibration_curve calibration_software calibration_software_version calibrated_range_lower calibrated_range_upper calibrated_range_median calibrated_range_suffix reservoir_offset_mentioned reservoir_offset_applied reservoir_offset_reported
AndradesValtuena2017 2017 10.1016/j.cub.2017.10.025 1343UnTal85 PRJEB19335 ERS1892067 true true false main text 2 10.1371/journal.pone.0139705 true MAMS-18949 AMS tooth -20.5 3819 24 true IntCal13 OxCal v4.2.24 4346 4098 NA cal AD true false NA
AndradesValtuena2017 2017 10.1016/j.cub.2017.10.025 6Post PRJEB19335 ERS1892066 true true false main text 2 10.1371/journal.pone.0139705 true MAMS-18955 AMS tooth -20.7 3574 19 true IntCal13 OxCal v4.2.24 3957 3832 NA cal AD true false NA
Spyrou2018 2018 10.1038/s41467-018-04550-9 RT5 PRJEB24296 ERS2106903 true true false main text 1 10.1038/s41467-018-04550-9 true MAMS-29430 NA tooth -99999 3517 27 true NR NR NR 3868 3704 NA cal BP false NA NA
Spyrou2018 2018 10.1038/s41467-018-04550-9 RT6 PRJEB24296 ERS2106904 true true false main text 1 10.1038/s41467-018-04550-9 true MAMS-29431 NA tooth -99999 3499 25 true NR NR NR 3842 3696 NA cal BP false NA NA
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://spaam-community.github.io/AncientMetagenomeDir/ancientsinglegenome-hostassociated/ancientsinglegenome-hostassociated_radiocarbondates_schema.json",
"type": "array",
"title": "JSON schema for AncientMetagenomeDir ancient host-associated single-genome C14 data",
"description": "The JSON schema for AncientMetagenomeDir ancient host-associated radiocarbon date (C14) information",
"additionalItems": false,
"items": {
"$id": "#/items",
"type": "object",
"title": "The items schema",
"description": "An explanation about the purpose of this instance.",
"default": {},
"required": [
"project_name",
"publication_year",
"data_publication_doi",
"sample_name",
"archive_project",
"archive_sample_accession"
],
"properties": {
"project_name": {
"$id": "#/items/properties/project_name",
"type": "string",
"title": "AncientMetagenomeDir key of the publication",
"description": "Format: surnameYYYY (if duplicate key but different publication, add b,c,d etc. as necessary). Must match a AncientMetagenomeDir samples table entry",
"pattern": "^[a-zA-Z]+\\d{4}[b-z]?$",
"examples": ["Warinner2014", "Muhlemann2018", "Muhlemann2018a"]
},
"publication_year": {
"$id": "#/items/properties/publication_year",
"type": "integer",
"minimum": 1950,
"maximum": 2100,
"title": "Year of publication",
"description": "Format: YYYY",
"examples": [2014]
},
"data_publication_doi": {
"$id": "#/items/properties/data_publication_doi",
"type": "string",
"pattern": "^10.\\d{4,9}\\/[^,]+$",
"title": "Digital Object Identifier (DOI) of the publication.",
"description": "A valid DOI code (not as an URL). Must match a AncientMetagenomeDir samples table entry",
"examples": ["10.1038/ng.2906"]
},
"sample_name": {
"$id": "#/items/properties/sample_name",
"type": "string",
"title": "Name of the sample",
"description": "In most cases this should be the name of the host individual. Must match a AncientMetagenomeDir samples table entry",
"examples": ["B61"]
},
"archive_project": {
"$id": "#/items/properties/archive_project",
"type": "string",
"title": "Archive project accession platform",
"description": "Name of the nucleotide data archiving platform. Must match a AncientMetagenomeDir samples table entry",
"examples": ["PRJNA438985", "mgp13354"]
},
"archive_sample_accession": {
"$id": "#/items/properties/archive_sample_accession",
"type": "string",
"pattern": "^[\\S]+$",
"title": "Archive accession number",
"description": "Samples archive accession numbers, multiple records can be separated with commas. No spaces allowed. . Must match a AncientMetagenomeDir samples table entry",
"examples": ["SRS473742,SRS473743,SRS473744,SRS473745"]
},
"date_information_present": {
"$id": "#/items/properties/date_information_present",
"type": "boolean",
"title": "Is Date Information Present?",
"description": "Whether a sample has a specific year-date reported (e.g. 1245, not 15th Century or similar), If false, all other downstream fields should be set to NA",
"examples": ["true", "false"]
},
"date_is_radiocarbon": {
"$id": "#/items/properties/date_is_radiocarbon",
"type": "string",
"title": "Is Date Radiocarbon?",
"enum": ["true", "false", "NA", "NR"],
"description": "Whether a sample has a specific year-date reported (e.g. 1245, not 15th Century or similar), If false, all other downstream fields should be set to NA",
"examples": ["true", "false"]
},
"multiple_dates": {
"$id": "#/items/properties/multiple_dates",
"type": "string",
"title": "Multiple Direct Dates Present?",
"enum": ["true", "false", "NA"],
"description": "Whether multiple (direct) dates are present for this sample; if so make multiple rows for the sample with one date per row.",
"examples": ["true", "false", "NA"]
},
"reference_location": {
"$id": "#/items/properties/reference_location",
"type": "string",
"title": "Location of Reference to Date",
"enum": ["main text", "supplement text", "supplement table"],
"description": "First place where the precise-radiocarbon date was recorded in the primary citation publication (i.e, the publication in ancientMetagenomeDir). main text > supplementary text > supplmentary table.",
"examples": ["main text", "supplement text", "supplement table", "NA"]
},
"reference_citation_depth": {
"$id": "#/items/properties/reference_citation_depth",
"type": "string",
"title": "Reference Citation Depth?",
"enum": ["main text", "supplement text", "supplement table"],
"description": "First place where the precise-radiocarbon date was recorded in the primary citation publication (i.e, the publication in ancientMetagenomeDir). main text > supplementary text > supplmentary table.",
"examples": ["1", "2", "3", "9", "NA"]
},
"primary_secondary_reference_citation_doi": {
"$id": "#/items/properties/primary_secondary_reference_citation_doi",
"type": "string",
"pattern": "^10.\\d{4,9}\\/[^,]+$",
"title": "Digital Object Identifier (DOI) of the publication that the date was originally reported.",
"description": "DOI of the primary or secondary reference (i.e. the DOI of the publication in which the date was original reported)",
"examples": ["10.1038/ng.2906"]
},
"direct_dating": {
"$id": "#/items/properties/direct_dating",
"type": "string",
"title": "Date Directly from Sample?",
"enum": ["true", "false", "NA"],
"description": "Whether the date of the ancient metagenomic sample was directly from the same skeleton (or similar), or whether inferred from other samples in the same context",
"examples": ["true", "false", "NA"]
},
"radiocarbon_lab_code": {
"$id": "#/items/properties/radiocarbon_lab_sample_id",
"type": "string",
"title": "Radiocarbon Lab Sample ID",
"$ref": "https://spaam-community.github.io/AncientMetagenomeDir/assets/enums/c14_lab_code.json",
"description": "Lab code of the date, from https://radiocarbon.webhost.uits.arizona.edu/laboratories from Labs-2023_02_17.pdf. NA is no date available, NR is date available but no lab code",
"examples": ["OxA", "ANAS", "Beta", "NR", "NA"]
},
"radiocarbon_lab_sample_id": {
"$id": "#/items/properties/radiocarbon_lab_sample_id",
"type": "integer",
"title": "Radiocarbon Lab Sample ID",
"description": "C14 sample code of the radiocarbon date from the lab",
"examples": ["12355", "44034"]
},
"spectrometry_type": {
"$id": "#/items/properties/spectrometry_type",
"type": "string",
"title": "Radiocarbon Lab Sample ID",
"$ref": "https://spaam-community.github.io/AncientMetagenomeDir/assets/enums/c14_lab_code.json",
"description": "Lab code of the date, from https://radiocarbon.webhost.uits.arizona.edu/laboratories from Labs-2023_02_17.pdf. NA is no date available, NR is date available but no lab code",
"examples": ["AMS", "IMRS", "NR", "NA"]
},
"material": {
"$id": "#/items/properties/material",
"type": "string",
"title": "Sample Material Used For Dating",
"$ref": "https://spaam-community.github.io/AncientMetagenomeDir/assets/enums/material.json",
"description": "Sample material used for extraction of e.g. collagen for generating the radiocarbon date",
"examples": ["both", "enamel"]
},
"delta_13c": {
"$id": "#/items/properties/delta_13c",
"type": "number",
"title": "δ13C value",
"description": "The δ13C value of the dating in ppm (‰). Not reported should be represented as -99999",
"examples": [-20.5, -17.6, -99999]
},
"uncalibrated_date ": {
"$id": "#/items/properties/delta_13c",
"type": "integer",
"minimum": 0,
"maximum": 50000,
"title": "Uncalibrated Date Year",
"description": "The uncalibrated date in calendar year date Before Present notation",
"examples": [934, 3960, 13000]
},
"uncalibrated_uncertainty_plus_minus": {
"$id": "#/items/properties/uncalibrated_uncertainty_plus_minus",
"type": "integer",
"title": "Uncalibrated Date Year Uncertainty",
"description": "Uncertainty value around uncalibrated date in calendar year date Before Present notation, typically indicated by ±",
"examples": [32, 5, 150]
},
"calibration_reported": {
"$id": "#/items/properties/calibration_reported",
"type": "boolean",
"title": "Is Calibration Reported?",
"description": "Whether the date has additionally been calibrated.",
"examples": ["true", "false"]
},
"calibration_curve": {
"$id": "#/items/properties/calibration_curve",
"type": "string",
"enum": ["IntCal20", "CalPal2007_HULU", "SHCal20", "Marine20", "NR"],
"title": "Calibration Curve",
"description": "The tree-ring calibration curve used for calibration."
},
"calibration_software": {
"$id": "#/items/properties/calibration_software",
"type": "string",
"enum": ["OxCal", "CalPal", "NR"],
"title": "Calibration Software",
"description": "Software used for radiocarbon calibration."
},
"calibration_software_version": {
"$id": "#/items/properties/calibration_software_version",
"type": "string",
"title": "Calibration Software",
"description": "Which version of the calibration software used, (set NR if not reported)",
"examples": ["v1.20", "0.35", "NR"]
},
"calibrated_range_lower": {
"$id": "#/items/properties/calibrated_range_lower",
"type": "integer",
"title": "Lower Date of Calibrated Date Range",
"description": "The lower range of the calibrated date",
"examples": ["1650"]
},
"calibrated_range_upper": {
"$id": "#/items/properties/calibrated_range_upper",
"type": "integer",
"title": "Upper Date of Calibrated Date Range",
"description": "The upper range of the calibrated date",
"examples": ["1450"]
},
"calibrated_range_median": {
"$id": "#/items/properties/calibrated_range_median",
"type": "integer",
"title": "Median Date of Calibrated Date Range",
"description": "The median date of the calibrated date range",
"examples": ["1550"]
},
"calibrated_range_suffix": {
"$id": "#/items/properties/calibrated_range_suffix",
"type": "string",
"enum": ["cal AD", "cal BC", "cal CE", "cal BCE", "cal BP"],
"title": "Suffix of the calibrated date range",
"description": "The suffix of the calibrated date range",
"examples": ["cal BP"]
},
"reservoir_offset_mentioned": {
"$id": "#/items/properties/reservoir_offset_mentioned",
"type": "boolean",
"title": "Is Reservoir Offset Mentioned?",
"description": "If radiocarbon C14 reservoir offset mentioned in any form. False here corresponds to not recorded (NR)",
"examples": ["true", "false"]
},
"reservoir_offset_applied": {
"$id": "#/items/properties/reservoir_offset_applied",
"type": "boolean",
"title": "Is Reservoir Offset Applied?",
"description": "If an offset correction or recalibration has been reported to have been applied ",
"examples": ["true", "false"]
},
"reservoir_offset_reported": {
"$id": "#/items/properties/reservoir_offset_reported",
"type": "integer",
"title": "Reservoir Offset Reported",
"description": "If the actual value of the offset has been reported (set NR if applied but actual value of offset not reported)",
"examples": [250, 400]
}
}
}
}
Loading
Loading