From 8d4c7c5881f8681b2f65090e36ec8589c056d73f Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 27 Feb 2023 08:34:48 +0100 Subject: [PATCH 01/11] New example dir --- ...genome-hostassociated_radiocarbondates.tsv | 1 + ...ostassociated_radiocarbondates_schema.json | 96 +++++++++++++++++++ 2 files changed, 97 insertions(+) create mode 100644 ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_radiocarbondates.tsv create mode 100644 ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_radiocarbondates_schema.json diff --git a/ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_radiocarbondates.tsv b/ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_radiocarbondates.tsv new file mode 100644 index 000000000..d7019ec4b --- /dev/null +++ b/ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_radiocarbondates.tsv @@ -0,0 +1 @@ +project_name publication_year data_publication_doi sample_name archive_project archive_sample_accession date_information_present date_is_radiocarbon multiple_dates reference_location reference_citation_depth primary_secondary_reference_citation_doi direct_dating radiocarbon_lab_code spectrometry_type sample_material delta_13c uncalibrated_date uncalibrated_uncertainty_plus_minus calibration_reported calibration_curve calibration_software calibration_software_version calibrated_range_lower calibrated_range_upper calibrated_range_median calibrated_range_suffix reservoir_offset_mentioned reservoir_offset_applied reservoir_offset_reported Notes and comments \ No newline at end of file diff --git a/ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_radiocarbondates_schema.json b/ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_radiocarbondates_schema.json new file mode 100644 index 000000000..812000dc1 --- /dev/null +++ b/ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_radiocarbondates_schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://spaam-community.github.io/AncientMetagenomeDir/ancientsinglegenome-hostassociated/ancientsinglegenome-hostassociated_radiocarbondates_schema.json", + "type": "array", + "title": "JSON schema for AncientMetagenomeDir ancient host-associated single-genome C14 data", + "description": "The JSON schema for AncientMetagenomeDir ancient host-associated radiocarbon date (C14) information", + "additionalItems": false, + "items": { + "$id": "#/items", + "type": "object", + "title": "The items schema", + "description": "An explanation about the purpose of this instance.", + "default": {}, + "required": [ + "project_name", + "publication_year", + "data_publication_doi", + "sample_name", + "archive_project", + "archive_sample_accession" + ], + "properties": { + "project_name": { + "$id": "#/items/properties/project_name", + "type": "string", + "title": "AncientMetagenomeDir key of the publication", + "description": "Format: surnameYYYY (if duplicate key but different publication, add b,c,d etc. as necessary). Must match a AncientMetagenomeDir samples table entry", + "pattern": "^[a-zA-Z]+\\d{4}[b-z]?$", + "examples": ["Warinner2014", "Muhlemann2018", "Muhlemann2018a"] + }, + "publication_year": { + "$id": "#/items/properties/publication_year", + "type": "integer", + "minimum": 1950, + "maximum": 2100, + "title": "Year of publication", + "description": "Format: YYYY", + "examples": [2014] + }, + "data_publication_doi": { + "$id": "#/items/properties/data_publication_doi", + "type": "string", + "pattern": "^10.\\d{4,9}\\/[^,]+$", + "title": "Digital Object Identifier (DOI) of the publication.", + "description": "A valid DOI code (not as an URL). Must match a AncientMetagenomeDir samples table entry", + "examples": ["10.1038/ng.2906"] + }, + "sample_name": { + "$id": "#/items/properties/sample_name", + "type": "string", + "title": "Name of the sample", + "description": "In most cases this should be the name of the host individual. Must match a AncientMetagenomeDir samples table entry", + "examples": ["B61"] + }, + "archive_project": { + "$id": "#/items/properties/archive_project", + "type": "string", + "title": "Archive project accession platform", + "description": "Name of the nucleotide data archiving platform. Must match a AncientMetagenomeDir samples table entry", + "examples": ["PRJNA438985", "mgp13354"] + }, + "archive_sample_accession": { + "$id": "#/items/properties/archive_sample_accession", + "type": "string", + "pattern": "^[\\S]+$", + "title": "Archive accession number", + "description": "Samples archive accession numbers, multiple records can be separated with commas. No spaces allowed. . Must match a AncientMetagenomeDir samples table entry", + "examples": ["SRS473742,SRS473743,SRS473744,SRS473745"] + }, + "date_information_present": {}, + "date_is_radiocarbon": {}, + "multiple_dates": {}, + "reference_location": {}, + "reference_citation_depth": {}, + "primary_secondary_reference_citation_doi": {}, + "direct_dating": {}, + "radiocarbon_lab_code": {}, + "spectrometry_type": {}, + "sample_material": {}, + "delta_13c": {}, + "uncalibrated_date ": {}, + "uncalibrated_uncertainty_plus_minus": {}, + "calibration_reported": {}, + "calibration_curve": {}, + "calibration_software": {}, + "calibration_software_version": {}, + "calibrated_range_lower": {}, + "calibrated_range_upper": {}, + "calibrated_range_median": {}, + "calibrated_range_suffix": {}, + "reservoir_offset_mentioned": {}, + "reservoir_offset_applied": {}, + "reservoir_offset_reported": {} + } + } +} From 266d5a1e3d8e9d0176132238053fce8ea2daf78d Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 3 Mar 2023 16:13:35 +0100 Subject: [PATCH 02/11] Continue extending the schema --- ...ostassociated_radiocarbondates_schema.json | 87 +++++- assets/enums/c14_lab_code.json | 291 ++++++++++++++++++ 2 files changed, 369 insertions(+), 9 deletions(-) create mode 100644 assets/enums/c14_lab_code.json diff --git a/ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_radiocarbondates_schema.json b/ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_radiocarbondates_schema.json index 812000dc1..aadd9bb32 100644 --- a/ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_radiocarbondates_schema.json +++ b/ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_radiocarbondates_schema.json @@ -67,15 +67,84 @@ "description": "Samples archive accession numbers, multiple records can be separated with commas. No spaces allowed. . Must match a AncientMetagenomeDir samples table entry", "examples": ["SRS473742,SRS473743,SRS473744,SRS473745"] }, - "date_information_present": {}, - "date_is_radiocarbon": {}, - "multiple_dates": {}, - "reference_location": {}, - "reference_citation_depth": {}, - "primary_secondary_reference_citation_doi": {}, - "direct_dating": {}, - "radiocarbon_lab_code": {}, - "spectrometry_type": {}, + "date_information_present": { + "$id": "#/items/properties/date_information_present", + "type": "boolean", + "title": "Is Date Information Present?", + "description": "Whether a sample has a specific year-date reported (e.g. 1245, not 15th Century or similar), If false, all other downstream fields should be set to NA", + "examples": ["true", "false"] + }, + "date_is_radiocarbon": { + "$id": "#/items/properties/date_is_radiocarbon", + "type": "string", + "title": "Is Date Radiocarbon?", + "enum": ["true", "false", "NA", "NR"], + "description": "Whether a sample has a specific year-date reported (e.g. 1245, not 15th Century or similar), If false, all other downstream fields should be set to NA", + "examples": ["true", "false"] + }, + "multiple_dates": { + "$id": "#/items/properties/multiple_dates", + "type": "string", + "title": "Multiple Direct Dates Present?", + "enum": ["true", "false", "NA"], + "description": "Whether multiple (direct) dates are present for this sample; if so make multiple rows for the sample with one date per row.", + "examples": ["true", "false", "NA"] + }, + "reference_location": { + "$id": "#/items/properties/reference_location", + "type": "string", + "title": "Location of Reference to Date", + "enum": ["main text", "supplement text", "supplement table"], + "description": "First place where the precise-radiocarbon date was recorded in the primary citation publication (i.e, the publication in ancientMetagenomeDir). main text > supplementary text > supplmentary table.", + "examples": ["main text", "supplement text", "supplement table", "NA"] + }, + "reference_citation_depth": { + "$id": "#/items/properties/reference_citation_depth", + "type": "string", + "title": "Reference Citation Depth?", + "enum": ["main text", "supplement text", "supplement table"], + "description": "First place where the precise-radiocarbon date was recorded in the primary citation publication (i.e, the publication in ancientMetagenomeDir). main text > supplementary text > supplmentary table.", + "examples": ["1", "2", "3", "9", "NA"] + }, + "primary_secondary_reference_citation_doi": { + "$id": "#/items/properties/primary_secondary_reference_citation_doi", + "type": "string", + "pattern": "^10.\\d{4,9}\\/[^,]+$", + "title": "Digital Object Identifier (DOI) of the publication that the date was originally reported.", + "description": "DOI of the primary or secondary reference (i.e. the DOI of the publication in which the date was original reported)", + "examples": ["10.1038/ng.2906"] + }, + "direct_dating": { + "$id": "#/items/properties/direct_dating", + "type": "string", + "title": "Date Directly from Sample?", + "enum": ["true", "false", "NA"], + "description": "Whether the date of the ancient metagenomic sample was directly from the same skeleton (or similar), or whether inferred from other samples in the same context", + "examples": ["true", "false", "NA"] + }, + "radiocarbon_lab_code": { + "$id": "#/items/properties/radiocarbon_lab_sample_id", + "type": "string", + "title": "Radiocarbon Lab Sample ID", + "$ref": "https://spaam-community.github.io/AncientMetagenomeDir/assets/enums/c14_lab_code.json", + "description": "Lab code of the date, from https://radiocarbon.webhost.uits.arizona.edu/laboratories from Labs-2023_02_17.pdf. NA is no date available, NR is date available but no lab code", + "examples": ["OxA", "ANAS", "Beta", "NR", "NA"] + }, + "radiocarbon_lab_sample_id": { + "$id": "#/items/properties/radiocarbon_lab_sample_id", + "type": "integer", + "title": "Radiocarbon Lab Sample ID", + "description": "C14 sample code of the radiocarbon date from the lab", + "examples": ["12355", "44034"] + }, + "spectrometry_type": { + "$id": "#/items/properties/radiocarbon_lab_sample_id", + "type": "string", + "title": "Radiocarbon Lab Sample ID", + "$ref": "https://spaam-community.github.io/AncientMetagenomeDir/assets/enums/c14_lab_code.json", + "description": "Lab code of the date, from https://radiocarbon.webhost.uits.arizona.edu/laboratories from Labs-2023_02_17.pdf. NA is no date available, NR is date available but no lab code", + "examples": ["AMS", "IMRS", "NR" "NA"] + }, "sample_material": {}, "delta_13c": {}, "uncalibrated_date ": {}, diff --git a/assets/enums/c14_lab_code.json b/assets/enums/c14_lab_code.json new file mode 100644 index 000000000..970ef6310 --- /dev/null +++ b/assets/enums/c14_lab_code.json @@ -0,0 +1,291 @@ +{ + "enum": [ + "NA", + "NR", + "A", + "AA", + "AAR", + "AC", + "AECV", + "AERIK", + "ALG", + "ANAS", + "ANL", + "ANTW", + "ANU", + "ANUA", + "AU", + "AURIS", + "B", + "Ba", + "BC", + "BE", + "Beta", + "BGS", + "BIOCAMS", + "Birm", + "Bln", + "BM", + "BONN", + "BRAMS", + "BS", + "C", + "CAMS", + "CAR", + "CENA", + "CG", + "CH", + "CIRAM", + "CN-XX", + "CNA", + "COL", + "CRCA", + "CRL", + "CSIC", + "CSM", + "CT", + "CU", + "D-AMS", + "D", + "Dak", + "DAL", + "DE", + "Deb", + "DebA", + "DEM", + "DGC", + "DIC", + "DK", + "DRI", + "DSA", + "ENEA", + "Erl", + "ETH", + "F", + "Fi", + "Fr", + "Fra", + "FSU", + "FTMC", + "FZ", + "G", + "GAK", + "Gd", + "GD", + "GdA,", + "Gif", + "GifA", + "GIN", + "GL", + "GrA", + "GrM", + "GrN", + "GrO", + "GSC", + "GU", + "GV", + "GX", + "GXNUAMS", + "H", + "HAM", + "HAR", + "Hd", + "Hel", + "Hela", + "HIG", + "HL", + "HNS", + "Hv", + "I", + "IAA", + "IAAA", + "IAEA-MEL", + "IAEA", + "ICA", + "ICEN", + "IEMAE", + "IFAO", + "IGAN", + "IGS", + "IGSB", + "IHME", + "II", + "IMTA", + "IOAN", + "IORAN", + "IRPA", + "ISGS", + "IUACD", + "IVAN", + "IVIC", + "IWP", + "JAT", + "K", + "KATRI", + "KEEA", + "KGM", + "Ki", + "KI", + "KIA", + "KIK", + "KN", + "KR", + "KRIL", + "KSU", + "L", + "LACUFF", + "LAEC", + "LAR", + "LE", + "LEMA", + "LIH", + "LJ", + "LTL", + "LU", + "Lu", + "LuA", + "LuS", + "Lv", + "Ly", + "LZ", + "LZU", + "M", + "Ma", + "MAG", + "MAMS", + "MC", + "METU", + "MKL", + "MTC", + "N", + "NIST", + "NPL", + "NS", + "NSRL", + "NSTF", + "NSW", + "NTU", + "NU", + "NUTA", + "Ny", + "NZ", + "NZA", + "O", + "OBDY", + "OR", + "ORINS", + "OS", + "OWU", + "OX", + "OxA", + "OZ", + "P", + "P", + "PAL", + "Pi", + "PI", + "PIC", + "PITT", + "PKU", + "PKUAMS", + "PL", + "PLD", + "Poz", + "Pr", + "PRI", + "PRL", + "PRLCH", + "PSU", + "PSUAMS", + "Pta", + "Q", + "QC", + "QL", + "QU", + "R", + "RCD", + "RCMib", + "RI", + "RICH", + "RIDDL", + "Riga", + "RL", + "RoAMS", + "RT", + "RTK", + "RU", + "S", + "Sa", + "Sac", + "SacA", + "SANU", + "SFU", + "Sh", + "SI", + "SL", + "SM", + "SMU", + "SNU", + "SPb", + "T", + "TB", + "TBNC", + "TEM", + "TF", + "TK", + "TKA", + "TKa", + "TKU", + "Tln", + "TO", + "TRa", + "TUa", + "TUBITAK", + "TUNC", + "Tx", + "U", + "Ua", + "UB", + "UBA", + "UBAR", + "UCD", + "UCI", + "UCLA", + "UCR", + "UD", + "UGa", + "UGAMS", + "UGRA", + "UL", + "ULA", + "UM", + "UNAM", + "und", + "UNSW", + "UOC", + "UQ", + "URCRM", + "URU", + "USGS", + "UtC", + "UTCAG", + "UW", + "UZH", + "V", + "VERA", + "VRI", + "Vs", + "W", + "WAT", + "WIS", + "Wk", + "WRD", + "WSU", + "X", + "XLLQ", + "Y", + "Ya", + "YU", + "Z" + ] +} From e91b6ab3ceaf19e95c04f52bdd62fcae5d672bf0 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 3 Mar 2023 16:13:49 +0100 Subject: [PATCH 03/11] Prettier --- ...ientsinglegenome-hostassociated_radiocarbondates_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_radiocarbondates_schema.json b/ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_radiocarbondates_schema.json index aadd9bb32..6ca217443 100644 --- a/ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_radiocarbondates_schema.json +++ b/ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_radiocarbondates_schema.json @@ -143,7 +143,7 @@ "title": "Radiocarbon Lab Sample ID", "$ref": "https://spaam-community.github.io/AncientMetagenomeDir/assets/enums/c14_lab_code.json", "description": "Lab code of the date, from https://radiocarbon.webhost.uits.arizona.edu/laboratories from Labs-2023_02_17.pdf. NA is no date available, NR is date available but no lab code", - "examples": ["AMS", "IMRS", "NR" "NA"] + "examples": ["AMS", "IMRS", "NR", "NA"] }, "sample_material": {}, "delta_13c": {}, From 1a466cd3f434fb797376d8d22ec01d01b0597b37 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 30 Mar 2023 14:27:26 +0200 Subject: [PATCH 04/11] More progress --- ...ostassociated_radiocarbondates_schema.json | 91 ++++++++++++++++--- 1 file changed, 79 insertions(+), 12 deletions(-) diff --git a/ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_radiocarbondates_schema.json b/ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_radiocarbondates_schema.json index 6ca217443..4f7fa3105 100644 --- a/ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_radiocarbondates_schema.json +++ b/ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_radiocarbondates_schema.json @@ -138,24 +138,91 @@ "examples": ["12355", "44034"] }, "spectrometry_type": { - "$id": "#/items/properties/radiocarbon_lab_sample_id", + "$id": "#/items/properties/spectrometry_type", "type": "string", "title": "Radiocarbon Lab Sample ID", "$ref": "https://spaam-community.github.io/AncientMetagenomeDir/assets/enums/c14_lab_code.json", "description": "Lab code of the date, from https://radiocarbon.webhost.uits.arizona.edu/laboratories from Labs-2023_02_17.pdf. NA is no date available, NR is date available but no lab code", "examples": ["AMS", "IMRS", "NR", "NA"] }, - "sample_material": {}, - "delta_13c": {}, - "uncalibrated_date ": {}, - "uncalibrated_uncertainty_plus_minus": {}, - "calibration_reported": {}, - "calibration_curve": {}, - "calibration_software": {}, - "calibration_software_version": {}, - "calibrated_range_lower": {}, - "calibrated_range_upper": {}, - "calibrated_range_median": {}, + "material": { + "$id": "#/items/properties/material", + "type": "string", + "title": "Sample Material Used For Dating", + "$ref": "https://spaam-community.github.io/AncientMetagenomeDir/assets/enums/material.json", + "description": "Sample material used for extraction of e.g. collagen for generating the radiocarbon date", + "examples": ["both", "enamel"] + }, + "delta_13c": { + "$id": "#/items/properties/delta_13c", + "type": "number", + "title": "δ13C value", + "description": "The δ13C value of the dating in ppm (‰)", + "examples": [-20.5, -17.6] + }, + "uncalibrated_date ": { + "$id": "#/items/properties/delta_13c", + "type": "integer", + "minimum": 0, + "maximum": 50000, + "title": "Uncalibrated Date Year", + "description": "The uncalibrated date in calendar year date Before Present notation", + "examples": [934, 3960, 13000] + }, + "uncalibrated_uncertainty_plus_minus": { + "$id": "#/items/properties/uncalibrated_uncertainty_plus_minus", + "type": "integer", + "title": "Uncalibrated Date Year Uncertainty", + "description": "Uncertainty value around uncalibrated date in calendar year date Before Present notation, typically indicated by ±", + "examples": [32, 5, 150] + }, + "calibration_reported": { + "$id": "#/items/properties/calibration_reported", + "type": "boolean", + "title": "Is Calibration Reported?", + "description": "Whether the date has additionally been calibrated.", + "examples": ["true", "false"] + }, + "calibration_curve": { + "$id": "#/items/properties/calibration_curve", + "enum": ["IntCal20", "CalPal2007_HULU", "SHCal20","Marine20", "NR"], + "title": "Calibration Curve", + "description": "The tree-ring calibration curve used for calibration.", + }, + "calibration_software": { + "$id": "#/items/properties/calibration_software", + "enum": ["OxCal", "CalPal", "NR"], + "title": "Calibration Software", + "description": "Software used for radiocarbon calibration." + }, + "calibration_software_version": { + "$id": "#/items/properties/calibration_software_version", + "type": "string", + "title": "Calibration Software", + "description": "Which version of the calibration software used, (set NR if not reported)", + "examples": ["v1.20", "0.35", "NR"] + }, + "calibrated_range_lower": { + "$id": "#/items/properties/calibrated_range_lower", + "type": "integer", + "title": "Lower Date of Calibrated Date Range", + "description": "The lower range of the calibrated date", + "examples": ["1450"] + }, + "calibrated_range_upper": { + "$id": "#/items/properties/calibrated_range_upper", + "type": "integer", + "title": "Upper Date of Calibrated Date Range", + "description": "The upper range of the calibrated date", + "examples": ["1450"] + }, + "calibrated_range_median": { + "$id": "#/items/properties/calibrated_range_median", + "type": "integer", + "title": "Median Date of Calibrated Date Range", + "description": "The upper range of the calibrated date", + "examples": ["1450"] + }, "calibrated_range_suffix": {}, "reservoir_offset_mentioned": {}, "reservoir_offset_applied": {}, From ec5bcae867ec14b0b7dd12794ea52e0ba5990e77 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 16 Jun 2023 08:42:37 +0200 Subject: [PATCH 05/11] Set -99999 as NR value for numeric columns --- ...genome-hostassociated_radiocarbondates.tsv | 6 ++- ...ostassociated_radiocarbondates_schema.json | 49 ++++++++++++++----- 2 files changed, 43 insertions(+), 12 deletions(-) diff --git a/ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_radiocarbondates.tsv b/ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_radiocarbondates.tsv index d7019ec4b..052c22c39 100644 --- a/ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_radiocarbondates.tsv +++ b/ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_radiocarbondates.tsv @@ -1 +1,5 @@ -project_name publication_year data_publication_doi sample_name archive_project archive_sample_accession date_information_present date_is_radiocarbon multiple_dates reference_location reference_citation_depth primary_secondary_reference_citation_doi direct_dating radiocarbon_lab_code spectrometry_type sample_material delta_13c uncalibrated_date uncalibrated_uncertainty_plus_minus calibration_reported calibration_curve calibration_software calibration_software_version calibrated_range_lower calibrated_range_upper calibrated_range_median calibrated_range_suffix reservoir_offset_mentioned reservoir_offset_applied reservoir_offset_reported Notes and comments \ No newline at end of file +project_name publication_year data_publication_doi sample_name archive_project archive_sample_accession date_information_present date_is_radiocarbon multiple_dates reference_location reference_citation_depth primary_secondary_reference_citation_doi direct_dating radiocarbon_lab_code spectrometry_type sample_material delta_13c uncalibrated_date uncalibrated_uncertainty_plus_minus calibration_reported calibration_curve calibration_software calibration_software_version calibrated_range_lower calibrated_range_upper calibrated_range_median calibrated_range_suffix reservoir_offset_mentioned reservoir_offset_applied reservoir_offset_reported +AndradesValtuena2017 2017 10.1016/j.cub.2017.10.025 1343UnTal85 PRJEB19335 ERS1892067 true true false main text 2 10.1371/journal.pone.0139705 true MAMS-18949 AMS tooth -20.5 3819 24 true IntCal13 OxCal v4.2.24 4346 4098 NA cal AD true false NA +AndradesValtuena2017 2017 10.1016/j.cub.2017.10.025 6Post PRJEB19335 ERS1892066 true true false main text 2 10.1371/journal.pone.0139705 true MAMS-18955 AMS tooth -20.7 3574 19 true IntCal13 OxCal v4.2.24 3957 3832 NA cal AD true false NA +Spyrou2018 2018 10.1038/s41467-018-04550-9 RT5 PRJEB24296 ERS2106903 true true false main text 1 10.1038/s41467-018-04550-9 true MAMS-29430 NA tooth -99999 3517 27 true NR NR NR 3868 3704 NA cal BP false NA NA +Spyrou2018 2018 10.1038/s41467-018-04550-9 RT6 PRJEB24296 ERS2106904 true true false main text 1 10.1038/s41467-018-04550-9 true MAMS-29431 NA tooth -99999 3499 25 true NR NR NR 3842 3696 NA cal BP false NA NA \ No newline at end of file diff --git a/ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_radiocarbondates_schema.json b/ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_radiocarbondates_schema.json index 4f7fa3105..e87c2b86d 100644 --- a/ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_radiocarbondates_schema.json +++ b/ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_radiocarbondates_schema.json @@ -157,8 +157,8 @@ "$id": "#/items/properties/delta_13c", "type": "number", "title": "δ13C value", - "description": "The δ13C value of the dating in ppm (‰)", - "examples": [-20.5, -17.6] + "description": "The δ13C value of the dating in ppm (‰). Not reported should be represented as -99999", + "examples": [-20.5, -17.6, -99999] }, "uncalibrated_date ": { "$id": "#/items/properties/delta_13c", @@ -185,12 +185,14 @@ }, "calibration_curve": { "$id": "#/items/properties/calibration_curve", - "enum": ["IntCal20", "CalPal2007_HULU", "SHCal20","Marine20", "NR"], + "type": "string", + "enum": ["IntCal20", "CalPal2007_HULU", "SHCal20", "Marine20", "NR"], "title": "Calibration Curve", - "description": "The tree-ring calibration curve used for calibration.", + "description": "The tree-ring calibration curve used for calibration." }, "calibration_software": { "$id": "#/items/properties/calibration_software", + "type": "string", "enum": ["OxCal", "CalPal", "NR"], "title": "Calibration Software", "description": "Software used for radiocarbon calibration." @@ -207,7 +209,7 @@ "type": "integer", "title": "Lower Date of Calibrated Date Range", "description": "The lower range of the calibrated date", - "examples": ["1450"] + "examples": ["1650"] }, "calibrated_range_upper": { "$id": "#/items/properties/calibrated_range_upper", @@ -220,13 +222,38 @@ "$id": "#/items/properties/calibrated_range_median", "type": "integer", "title": "Median Date of Calibrated Date Range", - "description": "The upper range of the calibrated date", - "examples": ["1450"] + "description": "The median date of the calibrated date range", + "examples": ["1550"] + }, + "calibrated_range_suffix": { + "$id": "#/items/properties/calibrated_range_suffix", + "type": "string", + "enum": ["cal AD", "cal BC", "cal CE", "cal BCE", "cal BP"], + "title": "Suffix of the calibrated date range", + "description": "The suffix of the calibrated date range", + "examples": ["cal BP"] }, - "calibrated_range_suffix": {}, - "reservoir_offset_mentioned": {}, - "reservoir_offset_applied": {}, - "reservoir_offset_reported": {} + "reservoir_offset_mentioned": { + "$id": "#/items/properties/reservoir_offset_mentioned", + "type": "boolean", + "title": "Is Reservoir Offset Mentioned?", + "description": "If radiocarbon C14 reservoir offset mentioned in any form. False here corresponds to not recorded (NR)", + "examples": ["true", "false"] + }, + "reservoir_offset_applied": { + "$id": "#/items/properties/reservoir_offset_applied", + "type": "boolean", + "title": "Is Reservoir Offset Applied?", + "description": "If an offset correction or recalibration has been reported to have been applied ", + "examples": ["true", "false"] + }, + "reservoir_offset_reported": { + "$id": "#/items/properties/reservoir_offset_reported", + "type": "integer", + "title": "Reservoir Offset Reported", + "description": "If the actual value of the offset has been reported (set NR if applied but actual value of offset not reported)", + "examples": [250, 400] + } } } } From 903ccfaaff7824aa3c5fba3794f413718949374d Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 16 Jun 2023 08:48:29 +0200 Subject: [PATCH 06/11] Add auto schema check --- .github/workflows/check_dataset.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/check_dataset.yml b/.github/workflows/check_dataset.yml index 11f4d097e..969ee76aa 100644 --- a/.github/workflows/check_dataset.yml +++ b/.github/workflows/check_dataset.yml @@ -54,6 +54,12 @@ jobs: run: | echo "## Ancient Metagenome Environmental" >> validation/validation_results_raw.txt AMDirT validate -s -d -c -m ancientmetagenome-environmental/libraries/ancientmetagenome-environmental_libraries.tsv ancientmetagenome-environmental/libraries/ancientmetagenome-environmental_libraries_schema.json &>> validation/validation_results_raw.txt + - name: C14 DATES test ancient single genomes (e.g. pathogens) + if: always() + run: | + echo "# C14 Dates" >> validation/validation_results_raw.txt + echo "## Ancient Single Genome Host Associated" >> validation/validation_results_raw.txt + AMDirT validate -s -d -c -m ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_dates.tsv ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_dates_schema.json &>> validation/validation_results_raw.txt - name: cleanup validation results from streamlit warnings if: always() run: | From 6529edaf013acfcced5115fe4f23dca82a30dfbc Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 16 Jun 2023 08:49:21 +0200 Subject: [PATCH 07/11] Fix indent --- .github/workflows/check_dataset.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/check_dataset.yml b/.github/workflows/check_dataset.yml index 969ee76aa..dcc7f3b6d 100644 --- a/.github/workflows/check_dataset.yml +++ b/.github/workflows/check_dataset.yml @@ -54,7 +54,7 @@ jobs: run: | echo "## Ancient Metagenome Environmental" >> validation/validation_results_raw.txt AMDirT validate -s -d -c -m ancientmetagenome-environmental/libraries/ancientmetagenome-environmental_libraries.tsv ancientmetagenome-environmental/libraries/ancientmetagenome-environmental_libraries_schema.json &>> validation/validation_results_raw.txt - - name: C14 DATES test ancient single genomes (e.g. pathogens) + - name: C14 DATES test ancient single genomes (e.g. pathogens) if: always() run: | echo "# C14 Dates" >> validation/validation_results_raw.txt @@ -63,7 +63,7 @@ jobs: - name: cleanup validation results from streamlit warnings if: always() run: | - grep -v streamlit validation/validation_results_raw.txt > validation/validation_results.txt + grep -v streamlit validation/validation_results_raw.txt > validation/validation_results.txt - uses: actions/upload-artifact@v3 if: always() with: From e9f8432976412f14ff79141e2211a6ff22330f00 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 16 Jun 2023 08:58:17 +0200 Subject: [PATCH 08/11] Fix paths --- .github/workflows/check_dataset.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/check_dataset.yml b/.github/workflows/check_dataset.yml index dcc7f3b6d..e6ad0fc09 100644 --- a/.github/workflows/check_dataset.yml +++ b/.github/workflows/check_dataset.yml @@ -54,12 +54,12 @@ jobs: run: | echo "## Ancient Metagenome Environmental" >> validation/validation_results_raw.txt AMDirT validate -s -d -c -m ancientmetagenome-environmental/libraries/ancientmetagenome-environmental_libraries.tsv ancientmetagenome-environmental/libraries/ancientmetagenome-environmental_libraries_schema.json &>> validation/validation_results_raw.txt - - name: C14 DATES test ancient single genomes (e.g. pathogens) + - name: RADIOCARBON DATES test ancient single genomes (e.g. pathogens) if: always() run: | - echo "# C14 Dates" >> validation/validation_results_raw.txt + echo "# Radiocarbon Dates" >> validation/validation_results_raw.txt echo "## Ancient Single Genome Host Associated" >> validation/validation_results_raw.txt - AMDirT validate -s -d -c -m ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_dates.tsv ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_dates_schema.json &>> validation/validation_results_raw.txt + AMDirT validate -s -d -c -m /home/james/git/jfy133/AncientMetagenomeDir/ancientsinglegenome-hostassociated/radiocarbondates/ancientsinglegenome-hostassociated_radiocarbondates.tsv ancientsinglegenome-hostassociated/radiocarbondates/ancientsinglegenome-hostassociated_radiocarbondates_schema.json &>> validation/validation_results_raw.txt - name: cleanup validation results from streamlit warnings if: always() run: | From 30eebcb487cb5ee5cd6b653358240302091ee3a0 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 16 Jun 2023 08:58:23 +0200 Subject: [PATCH 09/11] Fix paths --- .../ancientsinglegenome-hostassociated_radiocarbondates.tsv | 0 ...ncientsinglegenome-hostassociated_radiocarbondates_schema.json | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename ancientsinglegenome-hostassociated/{dates => radiocarbondates}/ancientsinglegenome-hostassociated_radiocarbondates.tsv (100%) rename ancientsinglegenome-hostassociated/{dates => radiocarbondates}/ancientsinglegenome-hostassociated_radiocarbondates_schema.json (100%) diff --git a/ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_radiocarbondates.tsv b/ancientsinglegenome-hostassociated/radiocarbondates/ancientsinglegenome-hostassociated_radiocarbondates.tsv similarity index 100% rename from ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_radiocarbondates.tsv rename to ancientsinglegenome-hostassociated/radiocarbondates/ancientsinglegenome-hostassociated_radiocarbondates.tsv diff --git a/ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_radiocarbondates_schema.json b/ancientsinglegenome-hostassociated/radiocarbondates/ancientsinglegenome-hostassociated_radiocarbondates_schema.json similarity index 100% rename from ancientsinglegenome-hostassociated/dates/ancientsinglegenome-hostassociated_radiocarbondates_schema.json rename to ancientsinglegenome-hostassociated/radiocarbondates/ancientsinglegenome-hostassociated_radiocarbondates_schema.json From d78093cbfb0dbc21580a628a9b5d3693f132bbc3 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 16 Jun 2023 09:04:57 +0200 Subject: [PATCH 10/11] Remove local path typo --- .github/workflows/check_dataset.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/check_dataset.yml b/.github/workflows/check_dataset.yml index e6ad0fc09..74fcb077d 100644 --- a/.github/workflows/check_dataset.yml +++ b/.github/workflows/check_dataset.yml @@ -59,7 +59,7 @@ jobs: run: | echo "# Radiocarbon Dates" >> validation/validation_results_raw.txt echo "## Ancient Single Genome Host Associated" >> validation/validation_results_raw.txt - AMDirT validate -s -d -c -m /home/james/git/jfy133/AncientMetagenomeDir/ancientsinglegenome-hostassociated/radiocarbondates/ancientsinglegenome-hostassociated_radiocarbondates.tsv ancientsinglegenome-hostassociated/radiocarbondates/ancientsinglegenome-hostassociated_radiocarbondates_schema.json &>> validation/validation_results_raw.txt + AMDirT validate -s -d -c -m ancientsinglegenome-hostassociated/radiocarbondates/ancientsinglegenome-hostassociated_radiocarbondates.tsv ancientsinglegenome-hostassociated/radiocarbondates/ancientsinglegenome-hostassociated_radiocarbondates_schema.json &>> validation/validation_results_raw.txt - name: cleanup validation results from streamlit warnings if: always() run: | From a15f29869348cfb1273b168abfcf72611a8485e5 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 24 Jul 2024 17:59:22 +0200 Subject: [PATCH 11/11] Update check_dataset.yml --- .github/workflows/check_dataset.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/check_dataset.yml b/.github/workflows/check_dataset.yml index 997770c8f..5a2cc9671 100644 --- a/.github/workflows/check_dataset.yml +++ b/.github/workflows/check_dataset.yml @@ -16,7 +16,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install amdirt==1.6.1 + pip install amdirt==1.6.2 pip install jsonschema==4.17.0 - name: Make validation results directory run: mkdir validation/