From 4a364a65844f54ff18c2b1734b9593f05af8b27a Mon Sep 17 00:00:00 2001 From: katie stahl Date: Tue, 5 Sep 2023 15:36:06 -0400 Subject: [PATCH 1/2] fix: adding validation to disallow same-gene fusions (#251) * fix: adding validation to disallow same-gene fusions * remove console log * fix: adding validation to disallow same-gene fusions * fix: fixing console error for unique keys --- .../Pages/Summary/Invalid/Invalid.tsx | 31 +++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/client/src/components/Pages/Summary/Invalid/Invalid.tsx b/client/src/components/Pages/Summary/Invalid/Invalid.tsx index 793dc850..97bca2dd 100644 --- a/client/src/components/Pages/Summary/Invalid/Invalid.tsx +++ b/client/src/components/Pages/Summary/Invalid/Invalid.tsx @@ -49,6 +49,26 @@ export const Invalid: React.FC = ({ })); const classes = useStyles(); + const duplicateGeneError = (duplicateGenes: string[]) => { + return ( + + Duplicate gene element(s) detected: {duplicateGenes.join(", ")}. Per the{" "} + + Gene Fusion Specification + + , Internal Tandem Duplications are not considered gene fusions, as they do not involve an interaction + between two or more genes.{" "} + setVisibleTab(0)}> + Edit elements to resolve. + + + ) + }; + const elementNumberError = ( Insufficient number of structural and regulatory elements. Per the{" "} @@ -146,6 +166,10 @@ export const Invalid: React.FC = ({ const CURIE_PATTERN = /^\w[^:]*:.+$/; + const geneElements = fusion.structural_elements.filter(el => el.type === "GeneElement").map(el => { return el.nomenclature }) + const findDuplicates = arr => arr.filter((item, index) => arr.indexOf(item) !== index) + const duplicateGenes = findDuplicates(geneElements) + const checkErrors = () => { const errorElements: React.ReactFragment[] = []; if ( @@ -166,6 +190,9 @@ export const Invalid: React.FC = ({ errorElements.push(noGeneElementsError); } } + if (duplicateGenes.length > 0) { + errorElements.push(duplicateGeneError(duplicateGenes)) + } if (fusion.type == "AssayedFusion") { if ( !( @@ -210,10 +237,10 @@ export const Invalid: React.FC = ({ {checkErrors().map((error, index: number) => ( - <> + {index > 0 ? : <>} {error} - + ))} From 85ba7ff4fa93032a8e8762e8765b9fdd0b84ebc9 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Thu, 28 Sep 2023 12:23:57 -0400 Subject: [PATCH 2/2] Autocomplete updates (#245) --- .../Input/StructuralElementInputAccordion.tsx | 5 +- .../TxSegmentElementInput.tsx | 3 +- .../GetCoordinates/GetCoordinates.tsx | 3 +- client/src/components/main/App/App.scss | 2 +- client/src/components/main/App/App.tsx | 5 +- .../GeneAutocomplete/GeneAutocomplete.tsx | 179 ++++++++++------- client/src/global/styles/theme.ts | 2 +- client/src/services/ResponseModels.ts | 183 +++++++++++------- server/curfu/cli.py | 6 +- server/curfu/devtools/build_gene_suggest.py | 148 ++++++++++++++ .../{interpro.py => build_interpro.py} | 3 +- server/curfu/devtools/gene.py | 124 ------------ server/curfu/gene_services.py | 123 ++++++------ server/curfu/main.py | 1 - server/curfu/routers/complete.py | 26 ++- server/curfu/schemas.py | 15 +- server/curfu/utils.py | 22 ++- server/setup.cfg | 1 + server/tests/integration/test_complete.py | 58 ++++-- 19 files changed, 529 insertions(+), 380 deletions(-) create mode 100644 server/curfu/devtools/build_gene_suggest.py rename server/curfu/devtools/{interpro.py => build_interpro.py} (99%) delete mode 100644 server/curfu/devtools/gene.py diff --git a/client/src/components/Pages/Structure/Input/StructuralElementInputAccordion.tsx b/client/src/components/Pages/Structure/Input/StructuralElementInputAccordion.tsx index 0753b150..f47cc2ca 100644 --- a/client/src/components/Pages/Structure/Input/StructuralElementInputAccordion.tsx +++ b/client/src/components/Pages/Structure/Input/StructuralElementInputAccordion.tsx @@ -115,7 +115,10 @@ const StructuralElementInputAccordion: React.FC< > {validated ? ( - + ) : ( diff --git a/client/src/components/Pages/Structure/Input/TxSegmentElementInput/TxSegmentElementInput.tsx b/client/src/components/Pages/Structure/Input/TxSegmentElementInput/TxSegmentElementInput.tsx index 1fb31dd2..5b8abe46 100644 --- a/client/src/components/Pages/Structure/Input/TxSegmentElementInput/TxSegmentElementInput.tsx +++ b/client/src/components/Pages/Structure/Input/TxSegmentElementInput/TxSegmentElementInput.tsx @@ -443,7 +443,8 @@ const TxSegmentCompInput: React.FC = ({ tooltipDirection="bottom" geneText={txGeneText} setGeneText={setTxGeneText} - style={{ width: 125 }} + setChromosome={setTxChrom} + setStrand={setTxStrand} /> {genomicCoordinateInfo} diff --git a/client/src/components/Utilities/GetCoordinates/GetCoordinates.tsx b/client/src/components/Utilities/GetCoordinates/GetCoordinates.tsx index f0b36c30..5e74cdb6 100644 --- a/client/src/components/Utilities/GetCoordinates/GetCoordinates.tsx +++ b/client/src/components/Utilities/GetCoordinates/GetCoordinates.tsx @@ -284,7 +284,8 @@ const GetCoordinates: React.FC = () => { setGene={setGene} geneText={geneText} setGeneText={setGeneText} - style={{ width: 125 }} + setChromosome={setChromosome} + setStrand={setStrand} /> {genomicCoordinateInfo} diff --git a/client/src/components/main/App/App.scss b/client/src/components/main/App/App.scss index 2068049e..910af101 100644 --- a/client/src/components/main/App/App.scss +++ b/client/src/components/main/App/App.scss @@ -37,7 +37,7 @@ h3 { .MuiDrawer-paper { width: 160px; overflow-x: hidden !important; - background-color: #18252B; + background-color: #18252b; color: white !important; } } diff --git a/client/src/components/main/App/App.tsx b/client/src/components/main/App/App.tsx index c64152ad..586b3ee5 100644 --- a/client/src/components/main/App/App.tsx +++ b/client/src/components/main/App/App.tsx @@ -161,7 +161,10 @@ const App = (): JSX.Element => { * readability. */ const fusionIsEmpty = () => { - if (fusion?.structural_elements.length === 0 && fusion?.regulatory_element === undefined) { + if ( + fusion?.structural_elements.length === 0 && + fusion?.regulatory_element === undefined + ) { return true; } else if (fusion.structural_elements.length > 0) { return false; diff --git a/client/src/components/main/shared/GeneAutocomplete/GeneAutocomplete.tsx b/client/src/components/main/shared/GeneAutocomplete/GeneAutocomplete.tsx index 710dc80a..85821a70 100644 --- a/client/src/components/main/shared/GeneAutocomplete/GeneAutocomplete.tsx +++ b/client/src/components/main/shared/GeneAutocomplete/GeneAutocomplete.tsx @@ -1,13 +1,12 @@ -import React, { useState, useEffect } from "react"; -import { TextField, Typography } from "@material-ui/core"; -import Autocomplete from "@material-ui/lab/Autocomplete"; -import { getGeneId, getGeneSuggestions } from "../../../../services/main"; -import { CSSProperties } from "@material-ui/core/styles/withStyles"; -import { - NormalizeGeneResponse, - SuggestGeneResponse, -} from "../../../../services/ResponseModels"; +import React, { useState, useEffect, ReactNode } from "react"; +import { TextField, Typography, makeStyles } from "@material-ui/core"; +import Autocomplete, { + AutocompleteRenderGroupParams, +} from "@material-ui/lab/Autocomplete"; +import { getGeneSuggestions } from "../../../../services/main"; +import { SuggestGeneResponse } from "../../../../services/ResponseModels"; import HelpTooltip from "../HelpTooltip/HelpTooltip"; +import { useColorTheme } from "../../../../global/contexts/Theme/ColorThemeContext"; export enum GeneSuggestionType { conceptId = "Concept ID", @@ -16,7 +15,13 @@ export enum GeneSuggestionType { prevSymbol = "Previous Symbol", none = "", } -export type SuggestedGeneOption = { value: string; type: GeneSuggestionType }; + +export type SuggestedGeneOption = { + value: string; + type: GeneSuggestionType | string; + chromosome?: string; + strand?: string; +}; const defaultGeneOption: SuggestedGeneOption = { value: "", @@ -43,6 +48,8 @@ interface Props { | "top-start" | undefined; promptText?: string | undefined; + setChromosome?: CallableFunction; + setStrand?: CallableFunction; } export const GeneAutocomplete: React.FC = ({ @@ -52,6 +59,8 @@ export const GeneAutocomplete: React.FC = ({ setGeneText, tooltipDirection, promptText, + setChromosome, + setStrand, }) => { const existingGeneOption = gene ? { value: gene, type: GeneSuggestionType.symbol } @@ -59,6 +68,17 @@ export const GeneAutocomplete: React.FC = ({ const [geneOptions, setGeneOptions] = useState([]); const [geneValue, setGeneValue] = useState(existingGeneOption); const [inputValue, setInputValue] = useState(existingGeneOption); + const [loading, setLoading] = useState(false); + + const { colorTheme } = useColorTheme(); + const useStyles = makeStyles(() => ({ + autocompleteGroupHeader: { + paddingLeft: "8px", + color: colorTheme["--dark-gray"], + fontSizeAdjust: "0.5", + }, + })); + const classes = useStyles(); /** * Simple wrapper around state setters to ensure updates to local selected value are reflected @@ -68,6 +88,12 @@ export const GeneAutocomplete: React.FC = ({ const updateSelection = (selection: SuggestedGeneOption) => { setGene(selection.value); setGeneValue(selection); + if (setChromosome) { + setChromosome(selection.chromosome); + } + if (setStrand) { + setStrand(selection.strand); + } }; // Update options @@ -75,23 +101,19 @@ export const GeneAutocomplete: React.FC = ({ if (inputValue.value === "") { setGeneText(""); setGeneOptions([]); + setLoading(false); } else { - const delayDebounce = setTimeout(() => { - getGeneSuggestions(inputValue.value).then((suggestResponseJson) => { - if ( - !suggestResponseJson.symbols && - !suggestResponseJson.prev_symbols && - !suggestResponseJson.aliases - ) { - setGeneText("Unrecognized term"); - setGeneOptions([]); - } else { - setGeneText(""); - setGeneOptions(buildOptions(suggestResponseJson)); - } - }); - }, 300); - return () => clearTimeout(delayDebounce); + setLoading(true); + getGeneSuggestions(inputValue.value).then((suggestResponseJson) => { + setLoading(false); + if (suggestResponseJson.matches_count === 0) { + setGeneText("Unrecognized term"); + setGeneOptions([]); + } else { + setGeneText(""); + setGeneOptions(buildOptions(suggestResponseJson, inputValue.value)); + } + }); } }, [inputValue]); @@ -103,48 +125,43 @@ export const GeneAutocomplete: React.FC = ({ }, [gene]); /** - * Attempt exact match for entered text. Should be called if user-submitted text - * isn't specific enough to narrow options down to a reasonable number (the - * `MAX_SUGGESTIONS` value set server-side), in case their entered value - * happens to match a real gene term. - * No return value, but updates dropdown options if successful. + * Generate group HTML element. Needed to properly display text about # of other possible completions. + * @param params group object processed by autocomplete + * @returns group node to render */ - const tryExactMatch = (input: string) => { - getGeneId(input).then((geneResponseJson: NormalizeGeneResponse) => { - // just provide entered term, but correctly-cased - setGeneText(""); - if (geneResponseJson.cased) { - setGeneOptions([ - { - value: geneResponseJson.cased, - type: geneResponseJson.cased.match(/^\w[^:]*:.+$/) - ? GeneSuggestionType.conceptId - : GeneSuggestionType.symbol, - }, - ]); - } - }); + const makeGroup = (params: AutocompleteRenderGroupParams): ReactNode => { + const children = params.group.includes("possible") ? [] : params.children; + const groupElement = ( +
+ {params.group} +
+ ); + return [groupElement, children]; }; - // if geneOptions is empty, try an exact match (note: keep this useEffect separately, as we want to do this after all of the autocomplete lookups) - useEffect(() => { - if (!geneOptions.length) { - tryExactMatch(inputValue.value); - } - }, [geneOptions]); - - /** - * Construct options for use in MUI Autocomplete GroupBy - * @param suggestResponse response from suggestions API received from server - * @returns array of option objects - */ const buildOptions = ( - suggestResponse: SuggestGeneResponse + suggestResponse: SuggestGeneResponse, + inputValue: string ): SuggestedGeneOption[] => { const options: SuggestedGeneOption[] = []; - if (suggestResponse.symbols) { - suggestResponse.symbols.map((suggestion) => - options.push({ value: suggestion[0], type: GeneSuggestionType.symbol }) + if (suggestResponse.concept_id) { + suggestResponse.concept_id.map((suggestion) => + options.push({ + value: suggestion[0], + type: GeneSuggestionType.conceptId, + chromosome: suggestion[3], + strand: suggestion[4], + }) + ); + } + if (suggestResponse.symbol) { + suggestResponse.symbol.map((suggestion) => + options.push({ + value: suggestion[0], + type: GeneSuggestionType.symbol, + chromosome: suggestion[3], + strand: suggestion[4], + }) ); } if (suggestResponse.prev_symbols) { @@ -152,22 +169,50 @@ export const GeneAutocomplete: React.FC = ({ options.push({ value: suggestion[0], type: GeneSuggestionType.prevSymbol, + chromosome: suggestion[3], + strand: suggestion[4], }) ); } if (suggestResponse.aliases) { suggestResponse.aliases.map((suggestion) => - options.push({ value: suggestion[0], type: GeneSuggestionType.alias }) + options.push({ + value: suggestion[0], + type: GeneSuggestionType.alias, + chromosome: suggestion[3], + strand: suggestion[4], + }) ); } + // slightly hack-y way to insert message about number of possible options: create an option group + // with the message as the group title, and then in `makeGroup()`, remove all of its child elements. + // `value` needs to be set to `inputValue` (or another valid completion of user text) for the autocomplete object + // to render the group at all + if (suggestResponse.warnings) { + suggestResponse.warnings.map((warn: string) => { + if (warn.startsWith("Exceeds max matches")) { + const maxExceededMsg = + options.length > 0 + ? `+ ${suggestResponse.matches_count} possible options` + : `${suggestResponse.matches_count} possible options`; + options.push({ + value: inputValue, + type: maxExceededMsg, + }); + } + }); + } return options; }; return ( { if (newValue) { updateSelection(newValue); @@ -181,13 +226,11 @@ export const GeneAutocomplete: React.FC = ({ }} options={geneOptions} groupBy={(option) => (option ? option.type : "")} + renderGroup={makeGroup} getOptionLabel={(option) => (option.value ? option.value : "")} getOptionSelected={(option, selected) => { return option.value === selected.value; }} - clearOnBlur={false} - clearOnEscape - disableClearable={inputValue.value === ""} renderInput={(params) => ( = ({ variant="standard" label={promptText ? promptText : "Gene Symbol"} margin="dense" - style={{minWidth: "250px !important"}} + style={{ minWidth: "250px !important" }} error={geneText !== ""} helperText={geneText ? geneText : null} /> diff --git a/client/src/global/styles/theme.ts b/client/src/global/styles/theme.ts index 944c8bbc..4561b371 100644 --- a/client/src/global/styles/theme.ts +++ b/client/src/global/styles/theme.ts @@ -40,7 +40,7 @@ const theme = createTheme({ secondary: { main: COLORTHEMES.light["--secondary"], contrastText: COLORTHEMES.light["--white"], - } + }, }, }); diff --git a/client/src/services/ResponseModels.ts b/client/src/services/ResponseModels.ts index 52033882..2b822703 100644 --- a/client/src/services/ResponseModels.ts +++ b/client/src/services/ResponseModels.ts @@ -7,9 +7,8 @@ /** * Define possible classes of Regulatory Elements. Options are the possible values - * for /regulatory_class value property in the INSDC controlled vocabulary: - * https://www.insdc.org/controlled-vocabulary-regulatoryclass - * + * for /regulatory_class value property in the INSDC controlled vocabulary: + * https://www.insdc.org/controlled-vocabulary-regulatoryclass */ export type RegulatoryClass = | "attenuator" @@ -32,20 +31,7 @@ export type RegulatoryClass = | "terminator" | "other"; /** - * A string that refers to an object uniquely. The lifetime and scope of - * an id is defined by the sender. VRS does not impose any constraints on - * strings used as ids in messages. However, to maximize sharability of data, - * VRS RECOMMENDS that implementations use [W3C Compact URI (CURIE)] - * (https://www.w3.org/TR/curie/) syntax. String CURIEs are represented as - * `prefix`:`reference` (W3C terminology), but often referred to as ` - * namespace`:`accession` or `namespace`:`local id` colloquially. VRS also - * RECOMMENDS that `prefix` be defined in identifiers.org. The `reference` - * component is an unconstrained string. A CURIE is a URI. URIs may *locate* - * objects (i.e., specify where to retrieve them) or *name* objects - * conceptually. VRS uses CURIEs primarily as a naming mechanism. - * Implementations MAY provide CURIE resolution mechanisms for prefixes to - * make these objects locatable. Using internal ids in public messages is - * strongly discouraged. + * A `W3C Compact URI `_ formatted string. A CURIE string has the structure ``prefix``:``reference``, as defined by the W3C syntax. */ export type CURIE = string; /** @@ -53,9 +39,7 @@ export type CURIE = string; */ export type Comparator = "<=" | ">="; /** - * A interval on a stained metaphase chromosome specified by cytobands. - * CytobandIntervals include the regions described by the start and end - * cytobands. + * A character string representing cytobands derived from the *International System for Human Cytogenomic Nomenclature* (ISCN) `guidelines `_. */ export type HumanCytoband = string; /** @@ -63,16 +47,12 @@ export type HumanCytoband = string; */ export type Strand = "+" | "-"; /** - * A character string of residues that represents a biological sequence - * using the conventional sequence order (5’-to-3’ for nucleic acid sequences, - * and amino-to-carboxyl for amino acid sequences). IUPAC ambiguity codes - * are permitted in Sequences. + * A character string of Residues that represents a biological sequence using the conventional sequence order (5'-to-3' for nucleic acid sequences, and amino-to-carboxyl for amino acid sequences). IUPAC ambiguity codes are permitted in Sequences. */ export type Sequence = string; /** * Permissible values for describing the underlying causative event driving an - * assayed fusion. - * + * assayed fusion. */ export type EventType = "rearrangement" | "read-through" | "trans-splicing"; /** @@ -143,13 +123,15 @@ export interface Extension { value?: unknown; } /** - * A gene is an authoritative representation of one or more heritable - * :ref:`Locations ` that includes all sequence elements necessary - * to perform a biological function. A gene may include regulatory, - * transcribed, and/or other functional Locations. + * A reference to a Gene as defined by an authority. For human genes, the use of + * `hgnc `_ as the gene authority is + * RECOMMENDED. */ export interface Gene { type?: "Gene"; + /** + * A CURIE reference to a Gene concept + */ gene_id: CURIE; } /** @@ -170,77 +152,128 @@ export interface LocationDescriptor { * A Location defined by an interval on a referenced Sequence. */ export interface SequenceLocation { + /** + * Variation Id. MUST be unique within document. + */ _id?: CURIE; type?: "SequenceLocation"; + /** + * A VRS Computed Identifier for the reference Sequence. + */ sequence_id: CURIE; + /** + * Reference sequence region defined by a SequenceInterval. + */ interval: SequenceInterval | SimpleInterval; } /** - * A SequenceInterval represents a span of sequence. Positions are always - * represented by contiguous spans using interbase coordinates. - * SequenceInterval is intended to be compatible with that in Sequence - * Ontology ([SO:0000001](http://www.sequenceontology.org/browser/ - * current_svn/term/SO:0000001)), with the exception that the GA4GH VRS - * SequenceInterval may be zero-width. The SO definition is for an extent - * greater than zero. + * A SequenceInterval represents a span on a Sequence. Positions are always + * represented by contiguous spans using interbase coordinates or coordinate ranges. */ export interface SequenceInterval { type?: "SequenceInterval"; - start: Number | IndefiniteRange | DefiniteRange; - end: Number | IndefiniteRange | DefiniteRange; + /** + * The start coordinate or range of the interval. The minimum value of this coordinate or range is 0. MUST represent a coordinate or range less than the value of `end`. + */ + start: DefiniteRange | IndefiniteRange | Number; + /** + * The end coordinate or range of the interval. The minimum value of this coordinate or range is 0. MUST represent a coordinate or range greater than the value of `start`. + */ + end: DefiniteRange | IndefiniteRange | Number; } /** - * A simple number value as a VRS class. + * A bounded, inclusive range of numbers. */ -export interface Number { - type?: "Number"; - value: number; +export interface DefiniteRange { + type?: "DefiniteRange"; + /** + * The minimum value; inclusive + */ + min: number; + /** + * The maximum value; inclusive + */ + max: number; } /** - * An indefinite range represented as a number and associated comparator. - * The bound operator is interpreted as follows: `>=` are all values greater - * than and including the value, `<=` are all numbers less than and including - * the value. + * A half-bounded range of numbers represented as a number bound and associated + * comparator. The bound operator is interpreted as follows: '>=' are all numbers + * greater than and including `value`, '<=' are all numbers less than and including + * `value`. */ export interface IndefiniteRange { type?: "IndefiniteRange"; + /** + * The bounded value; inclusive + */ value: number; + /** + * MUST be one of '<=' or '>=', indicating which direction the range is indefinite + */ comparator: Comparator; } /** - * A bounded, inclusive range of numbers. + * A simple integer value as a VRS class. */ -export interface DefiniteRange { - type?: "DefiniteRange"; - min: number; - max: number; +export interface Number { + type?: "Number"; + /** + * The value represented by Number + */ + value: number; } /** - * DEPRECATED: A SimpleInterval represents a span of sequence. Positions - * are always represented by contiguous spans using interbase coordinates. + * DEPRECATED: A SimpleInterval represents a span of sequence. Positions are always + * represented by contiguous spans using interbase coordinates. * This class is deprecated. Use SequenceInterval instead. */ export interface SimpleInterval { type?: "SimpleInterval"; + /** + * The start coordinate + */ start: number; + /** + * The end coordinate + */ end: number; } /** * A Location on a chromosome defined by a species and chromosome name. */ export interface ChromosomeLocation { - type?: "ChromosomeLocation"; + /** + * Location Id. MUST be unique within document. + */ _id?: CURIE; - species_id: CURIE; + type?: "ChromosomeLocation"; + /** + * CURIE representing a species from the `NCBI species taxonomy `_. Default: 'taxonomy:9606' (human) + */ + species_id?: CURIE & string; + /** + * The symbolic chromosome name. For humans, For humans, chromosome names MUST be one of 1..22, X, Y (case-sensitive) + */ chr: string; + /** + * The chromosome region defined by a CytobandInterval + */ interval: CytobandInterval; } /** - * A contiguous region specified by chromosomal bands features. + * A contiguous span on a chromosome defined by cytoband features. The span includes + * the constituent regions described by the start and end cytobands, as well as any + * intervening regions. */ export interface CytobandInterval { type?: "CytobandInterval"; + /** + * The start cytoband region. MUST specify a region nearer the terminal end (telomere) of the chromosome p-arm than `end`. + */ start: HumanCytoband; + /** + * The end cytoband region. MUST specify a region nearer the terminal end (telomere) of the chromosome q-arm than `start`. + */ end: HumanCytoband; } /** @@ -395,7 +428,7 @@ export interface FunctionalDomain { */ export interface ClientAssayedFusion { type?: "AssayedFusion"; - regulatory_element?: RegulatoryElement; + regulatory_element?: ClientRegulatoryElement; structural_elements: ( | ClientTranscriptSegmentElement | ClientGeneElement @@ -406,6 +439,18 @@ export interface ClientAssayedFusion { causative_event: CausativeEvent; assay: Assay; } +/** + * Define regulatory element object used client-side. + */ +export interface ClientRegulatoryElement { + type?: "RegulatoryElement"; + regulatory_class: RegulatoryClass; + feature_id?: string; + associated_gene?: GeneDescriptor; + feature_location?: LocationDescriptor; + display_class: string; + nomenclature: string; +} /** * TranscriptSegment element class used client-side. */ @@ -478,7 +523,7 @@ export interface ClientUnknownGeneElement { */ export interface ClientCategoricalFusion { type?: "CategoricalFusion"; - regulatory_element?: RegulatoryElement; + regulatory_element?: ClientRegulatoryElement; structural_elements: ( | ClientTranscriptSegmentElement | ClientGeneElement @@ -509,18 +554,6 @@ export interface ClientFunctionalDomain { sequence_location?: LocationDescriptor; domain_id: string; } -/** - * Define regulatory element object used client-side. - */ -export interface ClientRegulatoryElement { - type?: "RegulatoryElement"; - regulatory_class: RegulatoryClass; - feature_id?: string; - associated_gene?: GeneDescriptor; - feature_location?: LocationDescriptor; - display_class: string; - nomenclature: string; -} /** * Abstract class to provide identification properties used by client. */ @@ -663,9 +696,11 @@ export interface ServiceInfoResponse { export interface SuggestGeneResponse { warnings?: string[]; term: string; - symbols?: [string, string, string][]; - prev_symbols?: [string, string, string][]; - aliases?: [string, string, string][]; + matches_count: number; + concept_id?: [string, string, string, string, string][]; + symbol?: [string, string, string, string, string][]; + prev_symbols?: [string, string, string, string, string][]; + aliases?: [string, string, string, string, string][]; } /** * Response model for transcript segment element construction endpoint. diff --git a/server/curfu/cli.py b/server/curfu/cli.py index d5c22595..15f7a25e 100644 --- a/server/curfu/cli.py +++ b/server/curfu/cli.py @@ -8,8 +8,8 @@ from curfu.devtools import DEFAULT_INTERPRO_TYPES from curfu.devtools.build_client_types import build_client_types -from curfu.devtools.interpro import build_gene_domain_maps -from curfu.devtools.gene import GeneSuggestionBuilder +from curfu.devtools.build_interpro import build_gene_domain_maps +from curfu.devtools.build_gene_suggest import GeneSuggestionBuilder @click.command() @@ -93,7 +93,7 @@ def domains( def genes() -> None: """Build gene mappings for use in Fusion Curation gene autocomplete.""" builder = GeneSuggestionBuilder() - builder.build_gene_suggest_maps() + builder.build_gene_suggestion_file() @devtools.command() diff --git a/server/curfu/devtools/build_gene_suggest.py b/server/curfu/devtools/build_gene_suggest.py new file mode 100644 index 00000000..88ec206b --- /dev/null +++ b/server/curfu/devtools/build_gene_suggest.py @@ -0,0 +1,148 @@ +"""Provide tools to build backend data relating to gene identification.""" +import csv +from typing import Dict, List, Optional +from pathlib import Path +from datetime import datetime as dt +from timeit import default_timer as timer +from biocommons.seqrepo.seqrepo import SeqRepo + +from gene.database import create_db +from gene.schemas import RecordType +import click + +from curfu import APP_ROOT, SEQREPO_DATA_PATH, logger + + +class GeneSuggestionBuilder: + """Provide build tools for gene autosuggest mappings. + + Implemented as a class for easier sharing of database resources between methods. + """ + + def __init__(self): + """Initialize class.""" + self.gene_db = create_db() + self.sr = SeqRepo(SEQREPO_DATA_PATH) + self.genes = [] + + def _get_chromosome(self, record: Dict) -> Optional[str]: + """Extract readable chromosome identifier from gene extensions. + + :param record: stored normalized record + :return: NC_ chromosome ID if successful + """ + for source in ("ncbi_locations", "ensembl_locations", "locations"): + for location in record.get(source, []): + if location["type"] == "SequenceLocation": + identifiers = self.sr.translate_identifier( + location["sequence_id"], "NCBI" + ) + if identifiers: + return identifiers[0] + return None + + @staticmethod + def _make_list_column(values: List[str]) -> str: + """Convert a list of strings into a comma-separated string, filtering out + non-alphabetic values. + + This static method takes a list of strings as input and converts it into a + comma-separated string. The method filters out non-alphabetic values and + ensures that only unique, alphabetic values are included in the result. + + Note: + - The method performs a case-insensitive comparison when filtering unique + values. + - If the input list contains non-alphabetic values or duplicates, they will be + excluded from the result. + - The result will be a comma-separated string with no leading or trailing + commas. + + :param values: A list of strings to be converted into a comma-separated string. + :return: A comma-separated string containing unique, alphabetic values from the + input list. + """ + unique = {v.upper() for v in values} + filtered = {v for v in unique if any(char.isalpha() for char in v)} + return ",".join(filtered) + + def _process_gene_record(self, record: Dict) -> None: + """Add the gene record to processed suggestions. + + :param record: gene record object retrieved from DB + """ + symbol = record.get("symbol") + chromosome = self._get_chromosome(record) + strand = record.get("strand") + if not all([symbol, chromosome, strand]): + return + gene_data = { + "concept_id": record["concept_id"], + "symbol": symbol, + "aliases": self._make_list_column(record.get("aliases", [])), + "previous_symbols": self._make_list_column( + record.get("previous_symbols", []) + ), + "chromosome": self._get_chromosome(record), + "strand": record.get("strand"), + } + self.genes.append(gene_data) + + def _save_suggest_file(self, output_dir: Path) -> None: + """Save the gene suggestions table to a CSV file. + + This method takes the processed gene suggestions stored in the `self.genes` + attribute and saves them to a CSV file. The CSV file will have the following + columns: + + - `concept_id`: The unique identifier for the gene concept. + - `symbol`: The primary gene symbol. + - `aliases`: Comma-separated list of gene aliases. + - `previous_symbols`: Comma-separated list of previous gene symbols. + - `chromosome`: The chromosome where the gene is located. + - `strand`: The genomic strand where the gene is located. + + The CSV file will be named using the current date in the format + "gene_suggest_YYYYMMDD.csv" and will be saved in the specified `output_dir`. + + :param output_dir: The directory where the gene suggestions table file will be + saved. + """ + fieldnames = [ + "concept_id", + "symbol", + "aliases", + "previous_symbols", + "chromosome", + "strand", + ] + today = dt.strftime(dt.today(), "%Y%m%d") + with open(output_dir / f"gene_suggest_{today}.csv", "w") as csvfile: + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + writer.writeheader() + for row in self.genes: + writer.writerow(row) + + def build_gene_suggestion_file(self, output_dir: Path = APP_ROOT / "data") -> None: + """ + Build the gene suggestions table file by processing gene records from the gene + database. + + - The gene database should be initialized before calling this method. + - The gene suggestions table file will be saved in CSV format. + + :param output_dir: The directory where the gene suggestions table file will be + saved. Default is the 'data' directory within the application root. + :return: None + """ + start = timer() + + for record in self.gene_db.get_all_records(RecordType.MERGER): + self._process_gene_record(record) + + self._save_suggest_file(output_dir) + + stop = timer() + msg = f"Built gene suggestions table in {(stop - start):.5f} seconds." + click.echo(msg) + logger.info(msg) diff --git a/server/curfu/devtools/interpro.py b/server/curfu/devtools/build_interpro.py similarity index 99% rename from server/curfu/devtools/interpro.py rename to server/curfu/devtools/build_interpro.py index 9f88c79b..b1337dbb 100644 --- a/server/curfu/devtools/interpro.py +++ b/server/curfu/devtools/build_interpro.py @@ -8,6 +8,7 @@ import os import shutil import xml.etree.ElementTree as ET +from gene.database import create_db from gene.query import QueryHandler import click @@ -63,7 +64,7 @@ def get_uniprot_refs() -> UniprotRefs: start = timer() # scanning on DynamoDB_Local is extremely slow - q = QueryHandler() + q = QueryHandler(create_db()) # must be dynamodb genes = q.db.genes uniprot_ids: UniprotRefs = {} diff --git a/server/curfu/devtools/gene.py b/server/curfu/devtools/gene.py deleted file mode 100644 index 94b1e884..00000000 --- a/server/curfu/devtools/gene.py +++ /dev/null @@ -1,124 +0,0 @@ -"""Provide tools to build backend data relating to gene identification.""" -from typing import Dict, Tuple -from pathlib import Path -from datetime import datetime as dt -from timeit import default_timer as timer - -from gene.query import QueryHandler -import click - -from curfu import APP_ROOT, logger - - -# type stub -Map = Dict[str, Tuple[str, str, str]] - - -class GeneSuggestionBuilder: - """Provide build tools for gene autosuggest mappings. - - Implemented as a class for easier sharing of DynamoDB resources between methods. - """ - - xrefs_map = {} - symbol_map = {} - label_map = {} - prev_symbol_map = {} - alias_map = {} - assoc_with_map = {} - - def __init__(self): - """Initialize class. - - TODO: think about how best to force prod environment - """ - self.q = QueryHandler() - self.genes = self.q.db.genes - - @staticmethod - def write_map_to_file(mapping: Map, outfile_path: Path) -> None: - """Save individual gene mapping to file. - :param Map mapping: dictionary keying values of a specific item_type set - to normalized gene data - :param outfile_path Path: path to save mapping at - """ - with open(outfile_path, "w") as fp: - for normed in mapping.values(): - fp.write(f"{normed[0]}\t{normed[1]}\t{normed[2]}\n") - - def update_maps(self, record: Dict) -> None: - """Add map entries for relevant data in given DB record. - :param Dict record: individual identity or merged record from DDB. Ideally, - should not duplicate previous records (i.e., `record` should not be a record - for which an associated merged record exists). - """ - norm_id = record["concept_id"] - norm_symbol = record["symbol"] - - for xref in [norm_id] + record.get("xrefs", []): - self.xrefs_map[xref.lower()] = (xref, norm_id, norm_symbol) - - self.symbol_map[norm_symbol.lower()] = (norm_symbol, norm_id, "") - - for prev_symbol in record.get("previous_symbols", []): - self.prev_symbol_map[prev_symbol.lower()] = ( - prev_symbol, - norm_id, - norm_symbol, - ) - - for assoc_with in record.get("associated_with", []): - self.assoc_with_map[assoc_with.lower()] = (assoc_with, norm_id, norm_symbol) - - label = record.get("label") - if label: - self.label_map[label.lower()] = (label, norm_id, norm_symbol) - - for alias in record.get("aliases", []): - self.alias_map[alias.lower()] = (alias, norm_id, norm_symbol) - - def build_gene_suggest_maps(self, output_dir: Path = APP_ROOT / "data") -> None: - """Construct gene autocomplete suggestion mappings. - Scan existing gene_concepts table and gather all possible terms that can be - used to look up normalized concepts. Then, link them with their associated - normalized concept IDs/labels and save them. - - :param Path output_dir: path to directory to save output files in - """ - start = timer() - - last_evaluated_key = None - valid_item_types = ("identity", "merger") - while True: - if last_evaluated_key: - response = self.genes.scan(ExclusiveStartKey=last_evaluated_key) - else: - response = self.genes.scan() - last_evaluated_key = response.get("LastEvaluatedKey") - records = response["Items"] - - for record in records: - if record["item_type"] not in valid_item_types: - continue - elif "merge_ref" in record: - continue - self.update_maps(record) - - if not last_evaluated_key: - break - - today = dt.strftime(dt.today(), "%Y%m%d") - for (map, name) in ( - (self.xrefs_map, "xrefs"), - (self.symbol_map, "symbols"), - (self.label_map, "labels"), - (self.prev_symbol_map, "prev_symbols"), - (self.alias_map, "aliases"), - (self.assoc_with_map, "assoc_with"), - ): - self.write_map_to_file(map, output_dir / f"gene_{name}_suggest_{today}.tsv") - - stop = timer() - msg = f"Built gene suggestions table in {(stop - start):.5f} seconds." - click.echo(msg) - logger.info(msg) diff --git a/server/curfu/gene_services.py b/server/curfu/gene_services.py index f70ed5ce..a39c691d 100644 --- a/server/curfu/gene_services.py +++ b/server/curfu/gene_services.py @@ -1,39 +1,48 @@ """Wrapper for required Gene Normalization services.""" -from typing import List, Tuple, Dict, Union +from pathlib import Path +from typing import List, Optional, Tuple, Dict, Union import csv from gene.query import QueryHandler from gene.schemas import MatchType from ga4gh.vrsatile.pydantic.vrsatile_models import CURIE -from curfu import logger, ServiceWarning, MAX_SUGGESTIONS +from curfu import logger, ServiceWarning from curfu.utils import get_data_file - -# term -> (normalized ID, normalized label) -Map = Dict[str, Tuple[str, str, str]] +# term, symbol, concept ID, chromosome, strand +Suggestion = Tuple[str, str, str, str, str] class GeneService: """Provide gene ID resolution and term autocorrect suggestions.""" - symbols_map: Map = {} - prev_symbols_map: Map = {} - aliases_map: Map = {} + def __init__(self, suggestions_file: Optional[Path] = None): + """Initialize gene service provider class. - def load_mapping(self) -> None: - """Load mapping files for use in autocomplete.""" - map_pairs = ( - ("symbols", self.symbols_map), - ("prev_symbols", self.prev_symbols_map), - ("aliases", self.aliases_map), - ) - for name, map in map_pairs: - map_file = get_data_file(f"gene_{name}") - with open(map_file, "r") as m: - reader = csv.reader(m, delimiter="\t") - for term, normalized_id, normalized_label in reader: - map[term.lower()] = (term, normalized_id, normalized_label) + :param suggestions_file: path to existing suggestions file. If not provided, + will use newest available file in expected location. + """ + if not suggestions_file: + suggestions_file = get_data_file("gene_suggest") + + self.concept_id_map: Dict[str, Suggestion] = {} + self.symbol_map: Dict[str, Suggestion] = {} + self.aliases_map: Dict[str, Suggestion] = {} + self.prev_symbols_map: Dict[str, Suggestion] = {} + + for row in csv.DictReader(open(suggestions_file, "r")): + symbol = row["symbol"] + concept_id = row["concept_id"] + suggestion = [symbol, concept_id, row["chromosome"], row["strand"]] + self.concept_id_map[concept_id.upper()] = tuple([concept_id] + suggestion) + self.symbol_map[symbol.upper()] = tuple([symbol] + suggestion) + for alias in row.get("aliases", []): + self.aliases_map[alias.upper()] = tuple([alias] + suggestion) + for prev_symbol in row.get("previous_symbols", []): + self.prev_symbols_map[prev_symbol.upper()] = tuple( + [prev_symbol] + suggestion + ) @staticmethod def get_normalized_gene( @@ -102,57 +111,41 @@ def get_normalized_gene( logger.warning(warn) raise ServiceWarning(warn) - def suggest_genes(self, query: str) -> Dict[str, List[Tuple[str, str, str]]]: - """Provide autocomplete suggestions based on submitted term. + @staticmethod + def _get_completion_results(term: str, lookup: Dict) -> List[Suggestion]: + """Filter valid completions for term. - Outstanding questions: - * Where to make decisions about item types -- in client? provide as route - parameter? in gene services? All of the above? - * how to safely reduce redundant suggestions + :param term: user-entered text + :param lookup: stored mapping where key is a name (e.g. symbol or alias) and + value is the complete suggestion + :return: List of suggested completions along with relevant metadata + """ + matches = [] + for key, data in lookup.items(): + if key.startswith(term): + matches.append(data) + matches = sorted(matches, key=lambda s: s[0]) + return matches + + def suggest_genes(self, query: str) -> Dict[str, List[Suggestion]]: + """Provide autocomplete suggestions based on submitted term. :param str query: text entered by user :returns: dict returning list containing any number of suggestion tuples, where - each is the correctly-cased term, normalized ID, normalized label, for each - item type + each is the correctly-cased term, normalized ID, normalized label, for each + item type :raises ServiceWarning: if number of matching suggestions exceeds - MAX_SUGGESTIONS + MAX_SUGGESTIONS """ - # tentatively, just search terms - q_lower = query.lower() + q_upper = query.upper() suggestions = {} - suggestions["symbols"] = sorted( - [ - (v[0], v[1], v[0]) - for t, v in self.symbols_map.items() - if t.startswith(q_lower) - ], - key=lambda s: s[0], + suggestions["concept_id"] = self._get_completion_results( + q_upper, self.concept_id_map ) - suggestions["prev_symbols"] = sorted( - [ - (v[0], v[1], v[2]) - for t, v in self.prev_symbols_map.items() - if t.startswith(q_lower) - ], - key=lambda s: s[0], - ) - suggestions["aliases"] = sorted( - [ - (v[0], v[1], v[2]) - for t, v in self.aliases_map.items() - if t.startswith(q_lower) - ], - key=lambda s: s[0], + suggestions["symbol"] = self._get_completion_results(q_upper, self.symbol_map) + suggestions["prev_symbols"] = self._get_completion_results( + q_upper, self.prev_symbols_map ) + suggestions["aliases"] = self._get_completion_results(q_upper, self.aliases_map) - n = ( - len(suggestions["symbols"]) - + len(suggestions["prev_symbols"]) - + len(suggestions["aliases"]) - ) - if n > MAX_SUGGESTIONS: - warn = f"Exceeds max matches: Got {n} possible matches for {query} (limit: {MAX_SUGGESTIONS})" # noqa: E501 - logger.warning(warn) - raise ServiceWarning(warn) - else: - return suggestions + return suggestions diff --git a/server/curfu/main.py b/server/curfu/main.py index 7f083a98..6010bd68 100644 --- a/server/curfu/main.py +++ b/server/curfu/main.py @@ -103,7 +103,6 @@ def get_gene_services() -> GeneService: :return: GeneService instance """ gene_services = GeneService() - gene_services.load_mapping() return gene_services diff --git a/server/curfu/routers/complete.py b/server/curfu/routers/complete.py index b879a5d4..b7039663 100644 --- a/server/curfu/routers/complete.py +++ b/server/curfu/routers/complete.py @@ -3,7 +3,7 @@ from fastapi import Query, APIRouter, Request -from curfu import ServiceWarning +from curfu import MAX_SUGGESTIONS, ServiceWarning from curfu.schemas import ResponseDict, AssociatedDomainResponse, SuggestGeneResponse @@ -26,11 +26,25 @@ def suggest_gene(request: Request, term: str = Query("")) -> ResponseDict: provide suggestions. """ response: ResponseDict = {"term": term} - try: - possible_matches = request.app.state.genes.suggest_genes(term) - response.update(possible_matches) - except ServiceWarning as e: - response["warnings"] = [str(e)] + possible_matches = request.app.state.genes.suggest_genes(term) + n = ( + len(possible_matches["concept_id"]) + + len(possible_matches["symbol"]) + + len(possible_matches["prev_symbols"]) + + len(possible_matches["aliases"]) + ) + + response["matches_count"] = n + if n > MAX_SUGGESTIONS: + warn = f"Exceeds max matches: Got {n} possible matches for {term} (limit: {MAX_SUGGESTIONS})" # noqa: E501 + response["warnings"] = [warn] + term_upper = term.upper() + for match_type in ("concept_id", "symbol", "prev_symbols", "aliases"): + reduced = [ + m for m in possible_matches[match_type] if m[0].upper() == term_upper + ] + possible_matches[match_type] = reduced + response.update(possible_matches) return response diff --git a/server/curfu/schemas.py b/server/curfu/schemas.py index 15971ec3..0f3d1dcb 100644 --- a/server/curfu/schemas.py +++ b/server/curfu/schemas.py @@ -23,7 +23,10 @@ ResponseWarnings = Optional[List[StrictStr]] ResponseDict = Dict[ - str, Union[str, CURIE, List[str], List[Tuple[str, str, str, str]], FunctionalDomain] + str, + Union[ + str, int, CURIE, List[str], List[Tuple[str, str, str, str]], FunctionalDomain + ], ] Warnings = List[str] @@ -149,10 +152,12 @@ class SuggestGeneResponse(Response): """Response model for gene autocomplete suggestions endpoint.""" term: StrictStr - # complete term, normalized ID, normalized label - symbols: Optional[List[Tuple[str, str, str]]] - prev_symbols: Optional[List[Tuple[str, str, str]]] - aliases: Optional[List[Tuple[str, str, str]]] + matches_count: int + # complete term, normalized symbol, normalized concept ID, chromosome ID, strand + concept_id: Optional[List[Tuple[str, str, str, str, str]]] + symbol: Optional[List[Tuple[str, str, str, str, str]]] + prev_symbols: Optional[List[Tuple[str, str, str, str, str]]] + aliases: Optional[List[Tuple[str, str, str, str, str]]] class DomainParams(BaseModel): diff --git a/server/curfu/utils.py b/server/curfu/utils.py index b2dabaaf..2b56208e 100644 --- a/server/curfu/utils.py +++ b/server/curfu/utils.py @@ -16,11 +16,11 @@ def get_latest_s3_file(file_prefix: str) -> ObjectSummary: """Get latest S3 object representation for data file + :param file_prefix: filename prefix for data file :return: boto3 ObjectSummary - :raise: - ResourceLoadException: if Boto3 S3 initialization fails - FileNotFoundError: if no matching files exist in the bucket + :raise ResourceLoadException: if Boto3 S3 initialization fails + :raise FileNotFoundError: if no matching files exist in the bucket """ logger.info(f"Attempting S3 lookup for data file pattern {file_prefix}...") s3 = boto3.resource("s3", config=Config(region_name="us-east-2")) @@ -42,6 +42,7 @@ def get_latest_s3_file(file_prefix: str) -> ObjectSummary: def download_s3_file(bucket_object: ObjectSummary) -> Path: """Download local copy of file from S3 + :param bucket_object: boto object representation of S3 file :return: Path to downloaded file """ @@ -58,9 +59,9 @@ def download_s3_file(bucket_object: ObjectSummary) -> Path: def get_latest_data_file(file_prefix: str, local_files: List[Path]) -> Path: - """ - Get path to latest version of given data file. Download from S3 if not + """Get path to latest version of given data file. Download from S3 if not available locally. + :param file_prefix: leading pattern in filename (eg `gene_aliases`) :param local_files: local files matching pattern :return: path to up-to-date file @@ -74,15 +75,16 @@ def get_latest_data_file(file_prefix: str, local_files: List[Path]) -> Path: def get_data_file(filename_prefix: str) -> Path: - """ - Acquire most recent version of static data file. Download from S3 if not available locally. - :param filename_prefix: leading text of filename, eg `gene_aliases_suggest`. Should not - include filetype or date information. + """Acquire most recent version of static data file. Download from S3 if not + available locally. + + :param filename_prefix: leading text of filename, eg `gene_aliases_suggest`. Should + not include filetype or date information. :return: Path to acquired file. """ data_dir = APP_ROOT / "data" data_dir.mkdir(exist_ok=True) - file_glob = f"{filename_prefix}*.tsv" + file_glob = f"{filename_prefix}*sv" files = list(data_dir.glob(file_glob)) if not files: return download_s3_file(get_latest_s3_file(filename_prefix)) diff --git a/server/setup.cfg b/server/setup.cfg index 3c635989..9127d2bd 100644 --- a/server/setup.cfg +++ b/server/setup.cfg @@ -37,6 +37,7 @@ dev = flake8-docstrings black pre-commit + gene-normalizer ~= 0.1.39 pydantic-to-typescript diff --git a/server/tests/integration/test_complete.py b/server/tests/integration/test_complete.py index d99ee099..08c51394 100644 --- a/server/tests/integration/test_complete.py +++ b/server/tests/integration/test_complete.py @@ -10,31 +10,55 @@ async def test_normalize_gene(async_client: AsyncClient): assert response.status_code == 200 assert response.json() == { "term": "NTRK", - "symbols": [ - ["NTRK1", "hgnc:8031", "NTRK1"], - ["NTRK2", "hgnc:8032", "NTRK2"], - ["NTRK3", "hgnc:8033", "NTRK3"], - ["NTRK3-AS1", "hgnc:27532", "NTRK3-AS1"], + "matches_count": 4, + "concept_id": [], + "symbol": [ + ["NTRK1", "NTRK1", "hgnc:8031", "NCBI:NC_000001.11", "+"], + ["NTRK2", "NTRK2", "hgnc:8032", "NCBI:NC_000009.12", "+"], + ["NTRK3", "NTRK3", "hgnc:8033", "NCBI:NC_000015.10", "-"], + ["NTRK3-AS1", "NTRK3-AS1", "hgnc:27532", "NCBI:NC_000015.10", "+"], ], + "prev_symbols": [], + "aliases": [], + } + + response = await async_client.get("/api/complete/gene?term=a") + assert response.status_code == 200 + assert response.json() == { + "warnings": [ + "Exceeds max matches: Got 2096 possible matches for a (limit: 50)" + ], + "term": "a", + "matches_count": 2096, + "concept_id": [], + "symbol": [], "prev_symbols": [ - ["NTRK4", "hgnc:2730", "DDR1"], - ["NTRKR1", "hgnc:10256", "ROR1"], - ["NTRKR2", "hgnc:10257", "ROR2"], - ["NTRKR3", "hgnc:2731", "DDR2"], + ["A", "LOC100420587", "ncbigene:100420587", "NCBI:NC_000019.10", "-"] ], "aliases": [ - ["NTRK4", "hgnc:2730", "DDR1"], - ["NTRKR1", "hgnc:10256", "ROR1"], - ["NTRKR2", "hgnc:10257", "ROR2"], - ["NTRKR3", "hgnc:2731", "DDR2"], + ["A", "LOC110467529", "ncbigene:110467529", "NCBI:NC_000021.9", "+"] ], } - response = await async_client.get("/api/complete/gene?term=a") + response = await async_client.get("/api/complete/gene?term=hgnc:1097") assert response.status_code == 200 assert response.json() == { - "term": "a", - "warnings": [ - "Exceeds max matches: Got 6645 possible matches for a (limit: 50)" + "term": "hgnc:1097", + "matches_count": 11, + "concept_id": [ + ["hgnc:1097", "BRAF", "hgnc:1097", "NCBI:NC_000007.14", "-"], + ["hgnc:10970", "SLC22A6", "hgnc:10970", "NCBI:NC_000011.10", "-"], + ["hgnc:10971", "SLC22A7", "hgnc:10971", "NCBI:NC_000006.12", "+"], + ["hgnc:10972", "SLC22A8", "hgnc:10972", "NCBI:NC_000011.10", "-"], + ["hgnc:10973", "SLC23A2", "hgnc:10973", "NCBI:NC_000020.11", "-"], + ["hgnc:10974", "SLC23A1", "hgnc:10974", "NCBI:NC_000005.10", "-"], + ["hgnc:10975", "SLC24A1", "hgnc:10975", "NCBI:NC_000015.10", "+"], + ["hgnc:10976", "SLC24A2", "hgnc:10976", "NCBI:NC_000009.12", "-"], + ["hgnc:10977", "SLC24A3", "hgnc:10977", "NCBI:NC_000020.11", "+"], + ["hgnc:10978", "SLC24A4", "hgnc:10978", "NCBI:NC_000014.9", "+"], + ["hgnc:10979", "SLC25A1", "hgnc:10979", "NCBI:NC_000022.11", "-"], ], + "symbol": [], + "prev_symbols": [], + "aliases": [], }