Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sources endpoint #124

Merged
merged 8 commits into from
May 16, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 105 additions & 1 deletion dd-api-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -850,6 +850,38 @@ paths:
description: No Concepts with terms matching the specified string.
'5XX':
description: Unknown error
/sources:
get:
operationId: sources_get
summary: Returns a list of Concept nodes with preferred terms that exactly match the specified string.
parameters:
- name: sab
in: query
required: true
description: A source (SAB) to which to limit the response.
schema:
type: string
example: UBERON
- name: context
in: query
required: true
description: A UBKG context to which to limit the response. Possible values are base_context, data_distillery_context, hubmap_sennet_context
schema:
type: string
example: base_context
responses:
'200':
description: An array of Concepts with preferred terms that exactly match the specified string
content:
application/json:
schema:
$ref: '#/components/schemas/Source'
'400':
description: Invalid parameter name; Invalid parameter value
'404':
description: No sources.
'5XX':
description: Unknown error
components:
securitySchemes:
api_key:
Expand Down Expand Up @@ -1476,4 +1508,76 @@ components:
items:
type: string
description: a Concept Unique Identifier (CUI)
example: C0794811
example: C0794811
Source: #Schema name
type: object
description: An array of information on the sources that can be imported into a UBKG context.
properties:
sources:
type: array
items:
type: object
description: array of source objects
properties:
citations:
type: array
description: References to the source recommended by the source's steward
items:
type: object
description: citation reference
properties:
PMID:
description: PubMedID, if applicable
example: 37953324
url:
description: full URL to the citation
example: "https://pubmed.ncbi.nlm.nih.gov/37953324"
contexts:
type: array
description: UBKG contexts that contain the source
items:
type: string
example: base_context
description:
description: longform description of the source
example: "An ontology for describing the classification of human diseases organized by etiology."
download_date:
description: Date when the source was obtained, in format YYYY_MM_DD
example: "2024_02_24"
home_urls:
type: array
description: URLs to steward web sites
items:
type: string
example: "https://obofoundry.org/ontology/doid.html"
licenses:
type: array
description: applicable licenses or terms of use of the source specified by stewards
items:
type: object
description: license
properties:
type:
description: the type of license
example: Creative Commons license (creativecommons.org)
subtype:
description: the subtype of license
example: CCO
version:
description: the versoin of the license
example: version 4
name:
description: name of the source
example: Human Disease Ontology
sab:
description: acronym for Source ABbreviation
example: DOID
source_etl:
description: the parameter provided to the UBKG generation framework ETL script. For sources that are from OWL files, source_etl is the URL to the OWL; for other sources, source_etl is a command for one of the python scripts that the generation framework uses to import the source.
example: "http://purl.obolibrary.org/obo/doid.owl"
source_type:
description: The type of source file. Possible values are owl, api, ftp, ubkg_edge_node, umls, and simpleknowledge.
example: owl
source_version:
description: A reference to the version of the source. If the source is an OWL file, the usual version is the date extracted from the VersionIRI of the file; if from a website, the file date. Dates are in format YYYY_MM_DD.
example: "2024_04_22"
2 changes: 2 additions & 0 deletions src/ubkg_api/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from common_routes.property_types.property_types_controller import property_types_blueprint
from common_routes.relationship_types.relationship_types_controller import relationship_types_blueprint
from common_routes.sabs.sabs_controller import sabs_blueprint
from common_routes.sources.sources_controller import sources_blueprint

logging.basicConfig(format='[%(asctime)s] %(levelname)s in %(module)s: %(message)s', level=logging.DEBUG,
datefmt='%Y-%m-%d %H:%M:%S')
Expand Down Expand Up @@ -61,6 +62,7 @@ def __init__(self, config, package_base_dir):
self.app.register_blueprint(property_types_blueprint)
self.app.register_blueprint(relationship_types_blueprint)
self.app.register_blueprint(sabs_blueprint)
self.app.register_blueprint(sources_blueprint)

self.app.neo4jConnectionHelper = None

Expand Down
44 changes: 44 additions & 0 deletions src/ubkg_api/common_routes/common_neo4j_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1039,3 +1039,47 @@ def sab_term_type_get_logic(neo4j_instance, sab=None, skip=None, limit=None) ->
# The query returns a single record.

return termtype

def sources_get_logic(neo4j_instance, sab=None, context=None) -> dict:
"""
Obtains information on sources, or nodes in the UBKGSOURCE ontology.

The return from the query is simple, and there is no need for a model class.

:param neo4j_instance: neo4j connection
:param sab: source (SAB)
:param context: UBKG context

"""
sources: [dict] = []

# Load and parameterize query.
querytxt = loadquerystring('sources.cypher')
# Filter by code SAB.
if len(sab) == 0:
querytxt = querytxt.replace('$sabfilter', '')
else:
querytxt = querytxt.replace('$sabfilter', f" AND t.name IN {sab}")

# Filter by ubkg context.
if len(context) == 0:
querytxt = querytxt.replace('$contextfilter', '')
else:
querytxt = querytxt.replace('$contextfilter', f" AND t.name IN {context}")

# Set timeout for query based on value in app.cfg.
query = neo4j.Query(text=querytxt, timeout=neo4j_instance.timeout)

with neo4j_instance.driver.session() as session:
recds: neo4j.Result = session.run(query)
for record in recds:

source = record.get('response')
try:
sources.append(source)

except KeyError:
pass

# The query has a single record.
return sources
Empty file.
39 changes: 39 additions & 0 deletions src/ubkg_api/common_routes/sources/sources_controller.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from flask import Blueprint, jsonify, current_app, make_response, request
from ..common_neo4j_logic import sources_get_logic
from utils.http_error_string import get_404_error_string, validate_query_parameter_names, \
validate_parameter_value_in_enum, validate_required_parameters, validate_parameter_is_numeric, \
validate_parameter_is_nonnegative, validate_parameter_range_order, check_payload_size
from utils.http_parameter import parameter_as_list, set_default_minimum, set_default_maximum

sources_blueprint = Blueprint('sources', __name__, url_prefix='/sources')


@sources_blueprint.route('', methods=['GET'])
def sources_get():

# Returns relationship types

neo4j_instance = current_app.neo4jConnectionHelper.instance()

# Validate parameters.
# Check for invalid parameter names.
err = validate_query_parameter_names(parameter_name_list=['sab', 'context'])
if err != 'ok':
return make_response(err, 400)

sab = parameter_as_list(param_name='sab')
context = parameter_as_list(param_name='context')

result = sources_get_logic(neo4j_instance, sab=sab, context=context)
iserr = result is None or result == []

if not iserr:
# Check for no results.
sources = result[0].get('sources')
iserr = len(sources) == 0

if iserr:
err = get_404_error_string(prompt_string="No sources")
return make_response(err, 404)

return jsonify(result)
160 changes: 160 additions & 0 deletions src/ubkg_api/cypher/sources.cypher
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
// Used by the sources endpoint.
// Returns information on sources--i.e., nodes in the UBKGSOURCE ontology.

// Get the source nodes, which are children of the "Source" node.
// Filter by UBKG context in the calling function.
CALL
{
MATCH (t:Term)<-[r:PT]-(c:Code)<-[:CODE]-(p:Concept)-[:isa]->(pParent:Concept),(p:Concept)-[:in_ubkg_context]->(pContext:Concept)-[:CODE]->(cContext:Code)-[rContext:PT]-(tContext:Term)
WHERE pParent.CUI = 'UBKGSOURCE:C000001 CUI'
AND r.CUI=p.CUI
$contextfilter
RETURN DISTINCT p.CUI as CUISource, c.CodeID As CodeIDSource, t.name AS nameSource
}
// SAB
// Filter by SAB in the calling function.
CALL
{
WITH CUISource
MATCH (pSource:Concept)-[:has_sab]->(p:Concept)-[:CODE]->(c:Code)-[r:PT]->(t:Term)
WHERE pSource.CUI=CUISource
$sabfilter
AND r.CUI = p.CUI
RETURN t.name AS sab
}
// Source name
CALL
{
WITH CUISource
OPTIONAL MATCH (pSource:Concept)-[:has_name]->(p:Concept)-[:CODE]->(c:Code)-[r:PT]->(t:Term)
WHERE pSource.CUI=CUISource
AND r.CUI = p.CUI
RETURN t.name AS source_name
}
// Source description
CALL
{
WITH CUISource
OPTIONAL MATCH (pSource:Concept)-[:has_description]->(p:Concept)-[:CODE]->(c:Code)-[r:PT]->(t:Term)
WHERE pSource.CUI=CUISource
AND r.CUI = p.CUI
RETURN DISTINCT t.name AS source_description
}
// home urls
CALL
{
WITH CUISource
MATCH (pSource:Concept)-[:has_home_url]->(p:Concept)-[:CODE]->(c:Code)-[r:PT]->(t:Term)
WHERE pSource.CUI=CUISource
AND r.CUI = p.CUI
RETURN COLLECT(t.name) AS home_urls
}
// citations as both PMID and URL
CALL
{
WITH CUISource
MATCH (pSource:Concept)-[:has_citation]->(p:Concept)-[:CODE]->(c:Code)-[r:PT]->(t:Term)
WHERE pSource.CUI=CUISource
AND r.CUI = p.CUI
RETURN COLLECT(DISTINCT {PMID:split(t.name,':')[1], url:'https://pubmed.ncbi.nlm.nih.gov/'+split(t.name,':')[1]}) AS citations
}
// ETL command
CALL
{
WITH CUISource
OPTIONAL MATCH (pSource:Concept)-[:has_etl_command]->(p:Concept)-[:CODE]->(c:Code)-[r:PT]->(t:Term)
WHERE pSource.CUI=CUISource
AND r.CUI = p.CUI
RETURN t.name AS source_etl_command
}
// ETL OWL
CALL
{
WITH CUISource
OPTIONAL MATCH (pSource:Concept)-[:has_etl_owl]->(p:Concept)-[:CODE]->(c:Code)-[r:PT]->(t:Term)
WHERE pSource.CUI=CUISource
AND r.CUI = p.CUI
RETURN t.name AS source_etl_owl
}
// source_version
CALL
{
WITH CUISource
OPTIONAL MATCH (pSource:Concept)-[:has_source_version]->(p:Concept)-[:CODE]->(c:Code)-[r:PT]->(t:Term)
WHERE pSource.CUI=CUISource
AND r.CUI = p.CUI
RETURN t.name AS source_version
}
// source_type, translated
CALL
{
WITH CUISource
OPTIONAL MATCH (pSource:Concept)-[:has_source_type]->(p:Concept)-[:CODE]->(c:Code)-[r:PT]->(t:Term)
WHERE pSource.CUI=CUISource
AND r.CUI = p.CUI
RETURN split(t.name,'source_type_')[1] AS source_type
}
// download_date
CALL
{
WITH CUISource
OPTIONAL MATCH (pSource:Concept)-[:has_download_date]->(p:Concept)-[:CODE]->(c:Code)-[r:PT]->(t:Term)
WHERE pSource.CUI=CUISource
AND r.CUI = p.CUI
RETURN t.name AS source_download_date
}
// license
CALL
{
WITH CUISource
OPTIONAL MATCH (pSource:Concept)-[:has_license]->(pLicense:Concept)
WHERE pSource.CUI = CUISource
RETURN pLicense.CUI AS CUILicense
}
// license type
CALL
{
WITH CUILicense
OPTIONAL MATCH (pLicense:Concept)-[:has_license_type]->(p:Concept)-[:CODE]->(c:Code)-[r:PT]->(t:Term)
WHERE pLicense.CUI=CUILicense
RETURN t.name AS license_type, p.CUI AS CUILicenseType
}
// license definition, which is linked to the license type
CALL
{
WITH CUILicenseType
OPTIONAL MATCH (pLicense:Concept)-[:DEF]->(d:Definition)
WHERE pLicense.CUI=CUILicenseType
RETURN d.DEF AS license_definition
}
// license subtype
CALL
{
WITH CUILicense
OPTIONAL MATCH (pLicense:Concept)-[:has_license_subtype]->(p:Concept)-[:CODE]->(c:Code)-[r:PT]->(t:Term)
WHERE pLicense.CUI=CUILicense
RETURN t.name AS license_subtype
}
// license version
CALL
{
WITH CUILicense
OPTIONAL MATCH (pLicense:Concept)-[:has_license_version]->(p:Concept)-[:CODE]->(c:Code)-[r:PT]->(t:Term)
WHERE pLicense.CUI=CUILicense
RETURN t.name AS license_version
}
// ubkg context
CALL
{
WITH CUISource
OPTIONAL MATCH (pSource:Concept)-[:in_ubkg_context]->(p:Concept)-[:CODE]->(c:Code)-[r:PT]->(t:Term)
WHERE pSource.CUI=CUISource
RETURN t.name AS context
}

WITH CUISource,sab,source_name,source_description, home_urls, citations,
CASE WHEN source_etl_command IS NULL THEN source_etl_owl ELSE source_etl_command END as source_etl,
source_version, source_type,source_download_date, COLLECT(DISTINCT CASE WHEN license_type is NULL THEN NULL ELSE {type: CASE WHEN license_definition IS NULL THEN license_type ELSE license_definition END,subtype:license_subtype,version:license_version} END) AS licenses, COLLECT(DISTINCT context) AS contexts
WITH {sab:sab,name:source_name,description:source_description,home_urls:home_urls,citations:citations,source_etl:source_etl,source_version:source_version, source_type:source_type, download_date:source_download_date,licenses:licenses,contexts:contexts} AS source
WITH COLLECT(source) AS sources
RETURN {sources:sources} AS response
Loading
Loading