From ce27ca375ad8720b8313f6a222d471fba3d942ae Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Tue, 1 Oct 2024 00:24:29 -0500 Subject: [PATCH 1/4] Add passthrough options for BCO Signed-off-by: Ben Sherman --- BCO.md | 147 ++++++++++++++++++ README.md | 2 +- .../src/main/nextflow/prov/BcoRenderer.groovy | 17 +- 3 files changed, 163 insertions(+), 3 deletions(-) create mode 100644 BCO.md diff --git a/BCO.md b/BCO.md new file mode 100644 index 0000000..04adcab --- /dev/null +++ b/BCO.md @@ -0,0 +1,147 @@ +# Additional BCO configuration + +The `bco` format supports additional "pass-through" options for certain BCO fields. These fields cannot be inferred automatically from a pipeline or run, and so must be entered through the config. External systems can use these config options to inject fields automatically. + +The following config options are supported: + +- `prov.formats.bco.provenance_domain.review` +- `prov.formats.bco.provenance_domain.derived_from` +- `prov.formats.bco.provenance_domain.obsolete_after` +- `prov.formats.bco.provenance_domain.embargo` +- `prov.formats.bco.usability_domain` +- `prov.formats.bco.description_domain.keywords` +- `prov.formats.bco.description_domain.xref` + +These options correspond exactly to fields in the BCO JSON schema. Refer to the [BCO User Guide](https://docs.biocomputeobject.org/user_guide/) for more information about these fields. + +Here is an example config based on the BCO User Guide: + +```groovy +prov { + formats { + bco { + provenance_domain { + review = [ + [ + "status": "approved", + "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer", + "date": "2017-11-12T12:30:48-0400", + "reviewer": [ + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": "curatedBy", + "orcid": "https://orcid.org/0000-0003-1409-4549" + ] + ], + [ + "status": "approved", + "reviewer_comment": "The revised BCO looks fine", + "date": "2017-12-12T12:30:48-0400", + "reviewer": [ + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": "curatedBy" + ] + ] + ] + derived_from = 'https://example.com/BCO_948701/1.0' + obsolete_after = '2118-09-26T14:43:43-0400' + embargo = [ + "start_time": "2000-09-26T14:43:43-0400", + "end_time": "2000-09-26T14:43:45-0400" + ] + } + usability_domain = [ + "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", + "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", + "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", + "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" + ] + description_domain { + keywords = [ + "HCV1a", + "Ledipasvir", + "antiviral resistance", + "SNP", + "amino acid substitutions" + ] + xref = [ + [ + "namespace": "pubchem.compound", + "name": "PubChem-compound", + "ids": ["67505836"], + "access_time": "2018-13-02T10:15-05:00" + ], + [ + "namespace": "pubmed", + "name": "PubMed", + "ids": ["26508693"], + "access_time": "2018-13-02T10:15-05:00" + ], + [ + "namespace": "so", + "name": "Sequence Ontology", + "ids": ["SO:000002", "SO:0000694", "SO:0000667", "SO:0000045"], + "access_time": "2018-13-02T10:15-05:00" + ], + [ + "namespace": "taxonomy", + "name": "Taxonomy", + "ids": ["31646"], + "access_time": "2018-13-02T10:15-05:00" + ] + ] + } + } + } +} +``` + +Alternatively, you can use params to make it easier for an external system: + +```groovy +prov { + formats { + bco { + provenance_domain { + review = params.bco_provenance_domain_review + derived_from = params.bco_provenance_domain_derived_from + obsolete_after = params.bco_provenance_domain_obsolete_after + embargo = params.bco_provenance_domain_embargo + } + usability_domain = params.bco_usability_domain + description_domain { + keywords = params.bco_description_domain_keywords + xref = params.bco_description_domain_xref + } + } + } +} +``` + +This way, the pass-through options can be provided as JSON in a [params file](https://nextflow.io/docs/latest/reference/cli.html#run): + +```jsonc +{ + "bco_provenance_domain_review": [ + // ... + ], + "derived_from": "...", + "obsolete_after": "...", + "embargo": { + "start_time": "...", + "end_time": "..." + }, + "bco_usability_domain": [ + // ... + ], + "bco_description_domain_keywords": [ + // ... + ], + "bco_description_domain_xref": [ + // ... + ] +} +``` diff --git a/README.md b/README.md index e4e815c..b26df7e 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ Configuration scope for the desired output formats. The following formats are av - `bco`: Render a [BioCompute Object](https://biocomputeobject.org/). Supports the `file` and `overwrite` options. - Visit the [BCO User Guide](https://docs.biocomputeobject.org/user_guide/) to learn more about this format and how to extend it with information that isn't available to Nextflow. + *New in version 1.3.0*: additional "pass-through" options are available for BCO fields that can't be inferred from the pipeline. See [BCO.md](./BCO.md) for more information. - `dag`: Render the task graph as a Mermaid diagram embedded in an HTML document. Supports the `file` and `overwrite` options. diff --git a/plugins/nf-prov/src/main/nextflow/prov/BcoRenderer.groovy b/plugins/nf-prov/src/main/nextflow/prov/BcoRenderer.groovy index 6350e05..2321142 100644 --- a/plugins/nf-prov/src/main/nextflow/prov/BcoRenderer.groovy +++ b/plugins/nf-prov/src/main/nextflow/prov/BcoRenderer.groovy @@ -67,6 +67,15 @@ class BcoRenderer implements Renderer { final nextflowVersion = nextflowMeta.version.toString() final params = session.config.params as Map + final config = session.config + final review = config.navigate('prov.formats.bco.provenance_domain.review', []) as List> + final derived_from = config.navigate('prov.formats.bco.provenance_domain.derived_from') as String + final obsolete_after = config.navigate('prov.formats.bco.provenance_domain.obsolete_after') as String + final embargo = config.navigate('prov.formats.bco.provenance_domain.embargo') as Map + final usability = config.navigate('prov.formats.bco.usability_domain', []) as List + final keywords = config.navigate('prov.formats.bco.description_domain.keywords', []) as List + final xref = config.navigate('prov.formats.bco.description_domain.xref', []) as List> + // create BCO manifest final bco = [ "object_id": null, @@ -75,6 +84,9 @@ class BcoRenderer implements Renderer { "provenance_domain": [ "name": manifest.name ?: "", "version": manifest.version ?: "", + "review": review, + "obsolete_after": obsolete_after, + "embargo": embargo, "created": dateCreated, "modified": dateCreated, "contributors": authors.collect( name -> [ @@ -83,10 +95,11 @@ class BcoRenderer implements Renderer { ] ), "license": "" ], - "usability_domain": [], + "usability_domain": usability, "extension_domain": [], "description_domain": [ - "keywords": [], + "keywords": keywords, + "xref": xref, "platform": ["Nextflow"], "pipeline_steps": tasks.sort( (task) -> task.id ).collect { task -> [ "step_number": task.id, From 459447778002d44313af647b7a1a7ac4eec25718 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Mon, 21 Oct 2024 12:37:14 -0500 Subject: [PATCH 2/4] Add options for external_data_endpoints and environment_variables Signed-off-by: Ben Sherman --- BCO.md | 33 +++++++++++++++++++ .../src/main/nextflow/prov/BcoRenderer.groovy | 25 +++++++++----- 2 files changed, 49 insertions(+), 9 deletions(-) diff --git a/BCO.md b/BCO.md index 04adcab..c709ab9 100644 --- a/BCO.md +++ b/BCO.md @@ -1,5 +1,7 @@ # Additional BCO configuration +*New in version 1.3.0* + The `bco` format supports additional "pass-through" options for certain BCO fields. These fields cannot be inferred automatically from a pipeline or run, and so must be entered through the config. External systems can use these config options to inject fields automatically. The following config options are supported: @@ -11,9 +13,13 @@ The following config options are supported: - `prov.formats.bco.usability_domain` - `prov.formats.bco.description_domain.keywords` - `prov.formats.bco.description_domain.xref` +- `prov.formats.bco.execution_domain.external_data_endpoints` +- `prov.formats.bco.execution_domain.environment_variables` These options correspond exactly to fields in the BCO JSON schema. Refer to the [BCO User Guide](https://docs.biocomputeobject.org/user_guide/) for more information about these fields. +*NOTE: The `environment_variables` setting differs from the BCO standard in that it only specifies the variable names. Only the variables specified in this list will be populated in the BCO, if they are present in the execution environment.* + Here is an example config based on the BCO User Guide: ```groovy @@ -94,6 +100,23 @@ prov { ] ] } + execution_domain { + external_data_endpoints = [ + [ + "url": "protocol://domain:port/application/path", + "name": "generic name" + ], + [ + "url": "ftp://data.example.com:21/", + "name": "access to ftp server" + ], + [ + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils", + "name": "access to e-utils web service" + ] + ] + environment_variables = ["HOSTTYPE", "EDITOR"] + } } } } @@ -116,6 +139,10 @@ prov { keywords = params.bco_description_domain_keywords xref = params.bco_description_domain_xref } + execution_domain { + external_data_endpoints = params.bco_execution_domain_external_data_endpoints + environment_variables = params.bco_execution_domain_environment_variables + } } } } @@ -142,6 +169,12 @@ This way, the pass-through options can be provided as JSON in a [params file](ht ], "bco_description_domain_xref": [ // ... + ], + "bco_execution_domain_external_data_endpoints": [ + // ... + ], + "bco_execution_domain_environment_variables": [ + // ... ] } ``` diff --git a/plugins/nf-prov/src/main/nextflow/prov/BcoRenderer.groovy b/plugins/nf-prov/src/main/nextflow/prov/BcoRenderer.groovy index 2321142..130a3a4 100644 --- a/plugins/nf-prov/src/main/nextflow/prov/BcoRenderer.groovy +++ b/plugins/nf-prov/src/main/nextflow/prov/BcoRenderer.groovy @@ -23,6 +23,7 @@ import java.time.format.DateTimeFormatter import groovy.json.JsonOutput import groovy.transform.CompileStatic import nextflow.Session +import nextflow.SysEnv import nextflow.processor.TaskRun import nextflow.script.WorkflowMetadata import nextflow.util.CacheHelper @@ -68,13 +69,15 @@ class BcoRenderer implements Renderer { final params = session.config.params as Map final config = session.config - final review = config.navigate('prov.formats.bco.provenance_domain.review', []) as List> - final derived_from = config.navigate('prov.formats.bco.provenance_domain.derived_from') as String - final obsolete_after = config.navigate('prov.formats.bco.provenance_domain.obsolete_after') as String - final embargo = config.navigate('prov.formats.bco.provenance_domain.embargo') as Map - final usability = config.navigate('prov.formats.bco.usability_domain', []) as List - final keywords = config.navigate('prov.formats.bco.description_domain.keywords', []) as List - final xref = config.navigate('prov.formats.bco.description_domain.xref', []) as List> + final review = config.navigate('prov.formats.bco.provenance_domain.review', []) as List> + final derived_from = config.navigate('prov.formats.bco.provenance_domain.derived_from') as String + final obsolete_after = config.navigate('prov.formats.bco.provenance_domain.obsolete_after') as String + final embargo = config.navigate('prov.formats.bco.provenance_domain.embargo') as Map + final usability = config.navigate('prov.formats.bco.usability_domain', []) as List + final keywords = config.navigate('prov.formats.bco.description_domain.keywords', []) as List + final xref = config.navigate('prov.formats.bco.description_domain.xref', []) as List> + final external_data_endpoints = config.navigate('prov.formats.bco.execution_domain.external_data_endpoints', []) as List> + final environment_variables = config.navigate('prov.formats.bco.execution_domain.environment_variables', []) as List // create BCO manifest final bco = [ @@ -125,8 +128,12 @@ class BcoRenderer implements Renderer { ] ] ], - "external_data_endpoints": [], - "environment_variables": [:] + "external_data_endpoints": external_data_endpoints, + "environment_variables": environment_variables.inject([:]) { acc, name -> + if( SysEnv.containsKey(name) ) + acc.put(name, SysEnv.get(name)) + acc + } ], "parametric_domain": params.toConfigObject().flatten().collect( (k, v) -> [ "param": k, From f2c9416e3017b0b91705e1b91c22ee9acbf210d2 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Tue, 5 Nov 2024 18:37:33 +0100 Subject: [PATCH 3/4] Add manifest contributors and license Signed-off-by: Ben Sherman --- plugins/nf-prov/build.gradle | 19 +++++------- .../src/main/nextflow/prov/BcoRenderer.groovy | 29 +++++++++++++++---- .../src/main/nextflow/prov/DagRenderer.groovy | 16 +++++----- .../src/resources/META-INF/MANIFEST.MF | 2 +- 4 files changed, 40 insertions(+), 26 deletions(-) diff --git a/plugins/nf-prov/build.gradle b/plugins/nf-prov/build.gradle index 3eee961..6065014 100644 --- a/plugins/nf-prov/build.gradle +++ b/plugins/nf-prov/build.gradle @@ -56,21 +56,18 @@ sourceSets { dependencies { // This dependency is exported to consumers, that is to say found on their compile classpath. - compileOnly 'io.nextflow:nextflow:23.04.0' + compileOnly 'io.nextflow:nextflow:24.10.0' compileOnly 'org.slf4j:slf4j-api:1.7.10' - compileOnly 'org.pf4j:pf4j:3.4.1' - // add here plugins depepencies + compileOnly 'org.pf4j:pf4j:3.12.0' // test configuration - testImplementation "org.codehaus.groovy:groovy:3.0.8" - testImplementation "org.codehaus.groovy:groovy-nio:3.0.8" - testImplementation 'io.nextflow:nextflow:23.04.0' - testImplementation ("org.codehaus.groovy:groovy-test:3.0.8") { exclude group: 'org.codehaus.groovy' } + testImplementation 'io.nextflow:nextflow:24.10.0' + testImplementation ("org.codehaus.groovy:groovy-test:4.0.23") { exclude group: 'org.codehaus.groovy' } testImplementation ("cglib:cglib-nodep:3.3.0") - testImplementation ("org.objenesis:objenesis:3.1") - testImplementation ("org.spockframework:spock-core:2.0-M3-groovy-3.0") { exclude group: 'org.codehaus.groovy'; exclude group: 'net.bytebuddy' } - testImplementation ('org.spockframework:spock-junit4:2.0-M3-groovy-3.0') { exclude group: 'org.codehaus.groovy'; exclude group: 'net.bytebuddy' } - testImplementation ('com.google.jimfs:jimfs:1.1') + testImplementation ("org.objenesis:objenesis:3.2") + testImplementation ("org.spockframework:spock-core:2.3-groovy-4.0") { exclude group: 'org.codehaus.groovy'; exclude group: 'net.bytebuddy' } + testImplementation ('org.spockframework:spock-junit4:2.3-groovy-4.0') { exclude group: 'org.codehaus.groovy'; exclude group: 'net.bytebuddy' } + testImplementation ('com.google.jimfs:jimfs:1.2') // see https://docs.gradle.org/4.1/userguide/dependency_management.html#sec:module_replacement modules { diff --git a/plugins/nf-prov/src/main/nextflow/prov/BcoRenderer.groovy b/plugins/nf-prov/src/main/nextflow/prov/BcoRenderer.groovy index 130a3a4..aededdb 100644 --- a/plugins/nf-prov/src/main/nextflow/prov/BcoRenderer.groovy +++ b/plugins/nf-prov/src/main/nextflow/prov/BcoRenderer.groovy @@ -24,10 +24,13 @@ import groovy.json.JsonOutput import groovy.transform.CompileStatic import nextflow.Session import nextflow.SysEnv +import nextflow.config.Manifest import nextflow.processor.TaskRun import nextflow.script.WorkflowMetadata import nextflow.util.CacheHelper +import static nextflow.config.Manifest.ContributionType + /** * Renderer for the BioCompute Object (BCO) format. * @@ -64,7 +67,7 @@ class BcoRenderer implements Renderer { final nextflowMeta = metadata.nextflow final dateCreated = DateTimeFormatter.ISO_OFFSET_DATE_TIME.format(metadata.start) - final authors = (manifest.author ?: '').tokenize(',')*.trim() + final contributors = getContributors(manifest) final nextflowVersion = nextflowMeta.version.toString() final params = session.config.params as Map @@ -88,15 +91,13 @@ class BcoRenderer implements Renderer { "name": manifest.name ?: "", "version": manifest.version ?: "", "review": review, + "derived_from": derived_from, "obsolete_after": obsolete_after, "embargo": embargo, "created": dateCreated, "modified": dateCreated, - "contributors": authors.collect( name -> [ - "contribution": ["authoredBy"], - "name": name - ] ), - "license": "" + "contributors": contributors, + "license": manifest.license ], "usability_domain": usability, "extension_domain": [], @@ -191,4 +192,20 @@ class BcoRenderer implements Renderer { path.text = JsonOutput.prettyPrint(JsonOutput.toJson(bco)) } + private List getContributors(Manifest manifest) { + manifest.contributors.collect { c -> [ + "name": c.name, + "affiliation": c.affiliation, + "email": c.email, + "contribution": c.contribution.collect { ct -> CONTRIBUTION_TYPES[ct] }, + "orcid": c.orcid + ] } + } + + private static Map CONTRIBUTION_TYPES = [ + (ContributionType.AUTHOR) : "authoredBy", + (ContributionType.MAINTAINER) : "curatedBy", + (ContributionType.CONTRIBUTOR) : "curatedBy", + ] + } diff --git a/plugins/nf-prov/src/main/nextflow/prov/DagRenderer.groovy b/plugins/nf-prov/src/main/nextflow/prov/DagRenderer.groovy index 6cd3448..af23828 100644 --- a/plugins/nf-prov/src/main/nextflow/prov/DagRenderer.groovy +++ b/plugins/nf-prov/src/main/nextflow/prov/DagRenderer.groovy @@ -64,8 +64,8 @@ class DagRenderer implements Renderer { } private Map getVertices(Set tasks) { - def result = [:] - for( def task : tasks ) { + Map result = [:] + for( final task : tasks ) { final inputs = task.getInputFilesMap() final outputs = ProvHelper.getTaskOutputs(task) @@ -154,7 +154,7 @@ class DagRenderer implements Renderer { } // render task outputs - final outputs = [:] as Map + Map outputs = [:] dag.vertices.each { task, vertex -> vertex.outputs.each { path -> @@ -184,11 +184,11 @@ class DagRenderer implements Renderer { * @param vertices */ private Map getTaskTree(Map vertices) { - def taskTree = [:] + final taskTree = [:] - for( def entry : vertices ) { - def task = entry.key - def vertex = entry.value + for( final entry : vertices ) { + final task = entry.key + final vertex = entry.value // infer subgraph keys from fully qualified process name final result = getSubgraphKeys(task.processor.name) @@ -200,7 +200,7 @@ class DagRenderer implements Renderer { // navigate to given subgraph def subgraph = taskTree - for( def key : keys ) { + for( final key : keys ) { if( key !in subgraph ) subgraph[key] = [:] subgraph = subgraph[key] diff --git a/plugins/nf-prov/src/resources/META-INF/MANIFEST.MF b/plugins/nf-prov/src/resources/META-INF/MANIFEST.MF index 1bb2525..0497576 100644 --- a/plugins/nf-prov/src/resources/META-INF/MANIFEST.MF +++ b/plugins/nf-prov/src/resources/META-INF/MANIFEST.MF @@ -3,4 +3,4 @@ Plugin-Id: nf-prov Plugin-Version: 1.2.4 Plugin-Class: nextflow.prov.ProvPlugin Plugin-Provider: nextflow -Plugin-Requires: >=23.04.0 +Plugin-Requires: >=24.10.0 From 837f06c6a1007f704bae9ccdf10d91520d3b3555 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Tue, 5 Nov 2024 19:08:05 +0100 Subject: [PATCH 4/4] Remove broken test dependency Signed-off-by: Ben Sherman --- plugins/nf-prov/build.gradle | 1 - 1 file changed, 1 deletion(-) diff --git a/plugins/nf-prov/build.gradle b/plugins/nf-prov/build.gradle index 6065014..4432fcf 100644 --- a/plugins/nf-prov/build.gradle +++ b/plugins/nf-prov/build.gradle @@ -62,7 +62,6 @@ dependencies { // test configuration testImplementation 'io.nextflow:nextflow:24.10.0' - testImplementation ("org.codehaus.groovy:groovy-test:4.0.23") { exclude group: 'org.codehaus.groovy' } testImplementation ("cglib:cglib-nodep:3.3.0") testImplementation ("org.objenesis:objenesis:3.2") testImplementation ("org.spockframework:spock-core:2.3-groovy-4.0") { exclude group: 'org.codehaus.groovy'; exclude group: 'net.bytebuddy' }