From 2555349e5662ae74d5f32f920182e50c7c76ef4b Mon Sep 17 00:00:00 2001 From: Tianhao-Gu Date: Mon, 29 Jan 2024 13:27:14 -0600 Subject: [PATCH 01/22] add sample meta endpont --- src/service/data_products/common_functions.py | 30 ++++++++++ .../data_products/genome_attributes.py | 56 +++++++++---------- src/service/data_products/samples.py | 34 ++++++++++- 3 files changed, 87 insertions(+), 33 deletions(-) diff --git a/src/service/data_products/common_functions.py b/src/service/data_products/common_functions.py index 23a6c8857..79010eee5 100644 --- a/src/service/data_products/common_functions.py +++ b/src/service/data_products/common_functions.py @@ -4,6 +4,7 @@ from typing import Any, Callable, NamedTuple import src.common.storage.collection_and_field_names as names +from src.common.product_models import columnar_attribs_common_models as col_models from src.common.storage.db_doc_conversions import ( collection_load_version_key, collection_data_id_key, @@ -109,6 +110,35 @@ async def get_collection_singleton_from_db( ) +async def get_columnar_attribs_meta( + storage: ArangoStorage, + collection: str, + collection_id: str, + load_ver: str, + load_ver_override: bool +) -> col_models.ColumnarAttributesMeta: + """ + Get the columnar attributes meta document for a collection. The document is expected to be + a singleton per collection load version. + + storage - the storage system. + collection - the arango collection containing the document. + collection_id - the KBase collection containing the document. + load_ver - the load version of the collection. + load_ver_override - whether to override the load version. + """ + doc = await get_collection_singleton_from_db( + storage, + collection, + collection_id, + load_ver, + bool(load_ver_override) + ) + doc[col_models.FIELD_COLUMNS] = [col_models.AttributesColumn(**d) + for d in doc[col_models.FIELD_COLUMNS]] + return col_models.ColumnarAttributesMeta(**remove_collection_keys(doc)) + + async def get_doc_from_collection_by_unique_id( store: ArangoStorage, collection: str, diff --git a/src/service/data_products/genome_attributes.py b/src/service/data_products/genome_attributes.py index 673dcece4..6158620de 100644 --- a/src/service/data_products/genome_attributes.py +++ b/src/service/data_products/genome_attributes.py @@ -2,25 +2,25 @@ The genome_attribs data product, which provides genome attributes for a collection. """ -from collections import defaultdict import logging +from collections import defaultdict from typing import Any, Callable, Annotated +import numpy as np from fastapi import APIRouter, Request, Depends, Query from pydantic import BaseModel from pydantic import Field -import numpy as np - import src.common.storage.collection_and_field_names as names from src.common.product_models import columnar_attribs_common_models as col_models from src.service import app_state -from src.service.app_state_data_structures import CollectionsState, PickleableDependencies from src.service import errors from src.service import kb_auth -from src.service import processing_matches from src.service import models +from src.service import processing_matches from src.service import processing_selections +from src.service.app_state_data_structures import CollectionsState, PickleableDependencies +from src.service.data_products import common_models from src.service.data_products.common_functions import ( get_load_version, remove_collection_keys, @@ -29,9 +29,8 @@ override_load_version, query_table, query_simple_collection_list, - get_collection_singleton_from_db, + get_columnar_attribs_meta, ) -from src.service.data_products import common_models from src.service.data_products.data_product_processing import ( MATCH_ID_PREFIX, SELECTION_ID_PREFIX, @@ -266,27 +265,12 @@ async def get_genome_attributes_meta( ) -> col_models.ColumnarAttributesMeta: storage = app_state.get_app_state(r).arangostorage _, load_ver = await get_load_version(storage, collection_id, ID, load_ver_override, user) - meta = await _get_genome_attributes_meta_internal( - storage, collection_id, load_ver, load_ver_override) + meta = await get_columnar_attribs_meta( + storage, names.COLL_GENOME_ATTRIBS_META, collection_id, load_ver, load_ver_override) meta.columns = [c for c in meta.columns if not c.non_visible] return meta -async def _get_genome_attributes_meta_internal( - storage: ArangoStorage, collection_id: str, load_ver: str, load_ver_override: bool -) -> col_models.ColumnarAttributesMeta: - doc = await get_collection_singleton_from_db( - storage, - names.COLL_GENOME_ATTRIBS_META, - collection_id, - load_ver, - bool(load_ver_override) - ) - doc[col_models.FIELD_COLUMNS] = [col_models.AttributesColumn(**d) - for d in doc[col_models.FIELD_COLUMNS]] - return col_models.ColumnarAttributesMeta(**remove_collection_keys(doc)) - - @_ROUTER.get( "/", response_model=TableAttributes, @@ -336,8 +320,12 @@ async def get_genome_attributes( load_ver, load_ver_override, ID, - (await _get_genome_attributes_meta_internal( - appstate.arangostorage, collection_id, load_ver, load_ver_override)).columns, + (await get_columnar_attribs_meta( + appstate.arangostorage, + names.COLL_GENOME_ATTRIBS_META, + collection_id, + load_ver, + load_ver_override)).columns, view_name=coll.get_data_product(ID).search_view if coll else None, count=count, sort_on=sort_on, @@ -415,8 +403,12 @@ async def get_histogram( load_ver, load_ver_override, ID, - (await _get_genome_attributes_meta_internal( - appstate.arangostorage, collection_id, load_ver, load_ver_override)).columns, + (await get_columnar_attribs_meta( + appstate.arangostorage, + names.COLL_GENOME_ATTRIBS_META, + collection_id, + load_ver, + load_ver_override)).columns, view_name=coll.get_data_product(ID).search_view if coll else None, filter_conjunction=conjunction, match_spec=match_spec, @@ -498,8 +490,12 @@ async def get_xy_scatter( load_ver, load_ver_override, ID, - (await _get_genome_attributes_meta_internal( - appstate.arangostorage, collection_id, load_ver, load_ver_override)).columns, + (await get_columnar_attribs_meta( + appstate.arangostorage, + names.COLL_GENOME_ATTRIBS_META, + collection_id, + load_ver, + load_ver_override)).columns, view_name=coll.get_data_product(ID).search_view if coll else None, filter_conjunction=conjunction, match_spec=match_spec, diff --git a/src/service/data_products/samples.py b/src/service/data_products/samples.py index 2cf3276a9..b281762fa 100644 --- a/src/service/data_products/samples.py +++ b/src/service/data_products/samples.py @@ -7,16 +7,18 @@ from pydantic import BaseModel, Field import src.common.storage.collection_and_field_names as names +from src.common.product_models import columnar_attribs_common_models as col_models from src.common.product_models.common_models import SubsetProcessStates from src.service import app_state, errors, kb_auth, models +from src.service.data_products import common_models from src.service.data_products.common_functions import ( remove_collection_keys, remove_marked_subset, query_table, get_load_version, - QueryTableResult + QueryTableResult, + get_columnar_attribs_meta ) -from src.service.data_products import common_models from src.service.data_products.data_product_processing import ( MATCH_ID_PREFIX, SELECTION_ID_PREFIX, @@ -38,7 +40,7 @@ # reworked later to remove the many duplicate sample records and instead have some kind of # M:1 kbase_id:sample relationship. -ID = "samples" +ID = names.SAMPLES_PRODUCT_ID _ROUTER = APIRouter(tags=["Samples"], prefix=f"/{ID}") @@ -72,8 +74,13 @@ async def delete_selection(self, storage: ArangoStorage, internal_selection_id: data_product=ID, router=_ROUTER, db_collections=[ + common_models.DBCollection( + name=names.COLL_SAMPLES_META, + indexes=[] # lookup is by key + ), common_models.DBCollection( name=names.COLL_SAMPLES, + view_required=True, indexes=[ [ names.FLD_COLLECTION_ID, @@ -159,6 +166,27 @@ class Samples(BaseModel): ) +@_ROUTER.get( + "/meta", + response_model=col_models.ColumnarAttributesMeta, + description= +""" +Get metadata about the samples table including column names, type, +minimum and maximum values, etc. +""") +async def get_samples_meta( + r: Request, + collection_id: str = PATH_VALIDATOR_COLLECTION_ID, + load_ver_override: common_models.QUERY_VALIDATOR_LOAD_VERSION_OVERRIDE = None, + user: kb_auth.KBaseUser = Depends(_OPT_AUTH) +) -> col_models.ColumnarAttributesMeta: + storage = app_state.get_app_state(r).arangostorage + _, load_ver = await get_load_version(storage, collection_id, ID, load_ver_override, user) + meta = await get_columnar_attribs_meta(storage, names.COLL_SAMPLES_META, collection_id, load_ver, load_ver_override) + meta.columns = [c for c in meta.columns if not c.non_visible] + return meta + + # At some point we're going to want to filter/sort on fields. We may want a list of fields # somewhere to check input fields are ok... but really we could just fetch the first document # in the collection and check the fields From f64762a763bdbe96c62a5bb3bd6fb86ce080006e Mon Sep 17 00:00:00 2001 From: Tianhao Gu Date: Mon, 29 Jan 2024 17:45:05 -0600 Subject: [PATCH 02/22] Update src/service/data_products/genome_attributes.py Co-authored-by: MrCreosote --- src/service/data_products/genome_attributes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/service/data_products/genome_attributes.py b/src/service/data_products/genome_attributes.py index 6158620de..d1459c1cb 100644 --- a/src/service/data_products/genome_attributes.py +++ b/src/service/data_products/genome_attributes.py @@ -266,7 +266,7 @@ async def get_genome_attributes_meta( storage = app_state.get_app_state(r).arangostorage _, load_ver = await get_load_version(storage, collection_id, ID, load_ver_override, user) meta = await get_columnar_attribs_meta( - storage, names.COLL_GENOME_ATTRIBS_META, collection_id, load_ver, load_ver_override) + storage, names.COLL_GENOME_ATTRIBS_META, collection_id, load_ver, bool(load_ver_override)) meta.columns = [c for c in meta.columns if not c.non_visible] return meta From 50f3a631cce98e6257df555ae99c726eba08685a Mon Sep 17 00:00:00 2001 From: Tianhao Gu Date: Mon, 29 Jan 2024 17:45:10 -0600 Subject: [PATCH 03/22] Update src/service/data_products/samples.py Co-authored-by: MrCreosote --- src/service/data_products/samples.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/service/data_products/samples.py b/src/service/data_products/samples.py index b281762fa..6820564e1 100644 --- a/src/service/data_products/samples.py +++ b/src/service/data_products/samples.py @@ -182,7 +182,7 @@ async def get_samples_meta( ) -> col_models.ColumnarAttributesMeta: storage = app_state.get_app_state(r).arangostorage _, load_ver = await get_load_version(storage, collection_id, ID, load_ver_override, user) - meta = await get_columnar_attribs_meta(storage, names.COLL_SAMPLES_META, collection_id, load_ver, load_ver_override) + meta = await get_columnar_attribs_meta(storage, names.COLL_SAMPLES_META, collection_id, load_ver, bool(load_ver_override)) meta.columns = [c for c in meta.columns if not c.non_visible] return meta From af6d5531fcd3d62741fbd732c4080fdfa78ec2fc Mon Sep 17 00:00:00 2001 From: Tianhao-Gu Date: Mon, 29 Jan 2024 18:58:40 -0600 Subject: [PATCH 04/22] abstract more for get_columnar_attribs_meta --- src/service/data_products/common_functions.py | 37 ++++++++++++++----- .../data_products/genome_attributes.py | 36 ++++++++++-------- src/service/data_products/samples.py | 14 +++++-- 3 files changed, 59 insertions(+), 28 deletions(-) diff --git a/src/service/data_products/common_functions.py b/src/service/data_products/common_functions.py index 79010eee5..093bc34a0 100644 --- a/src/service/data_products/common_functions.py +++ b/src/service/data_products/common_functions.py @@ -3,13 +3,15 @@ """ from typing import Any, Callable, NamedTuple +from fastapi import Request + import src.common.storage.collection_and_field_names as names from src.common.product_models import columnar_attribs_common_models as col_models from src.common.storage.db_doc_conversions import ( collection_load_version_key, collection_data_id_key, ) -from src.service import errors, kb_auth, models +from src.service import errors, kb_auth, models, app_state from src.service.filtering.filters import FilterSet from src.service.storage_arango import ArangoStorage @@ -111,22 +113,34 @@ async def get_collection_singleton_from_db( async def get_columnar_attribs_meta( - storage: ArangoStorage, + r: Request, collection: str, collection_id: str, - load_ver: str, - load_ver_override: bool + data_product: str, + load_ver_override, + user: kb_auth.KBaseUser, + return_only_visible: bool = False + ) -> col_models.ColumnarAttributesMeta: """ Get the columnar attributes meta document for a collection. The document is expected to be a singleton per collection load version. - storage - the storage system. + r - the request. collection - the arango collection containing the document. - collection_id - the KBase collection containing the document. - load_ver - the load version of the collection. - load_ver_override - whether to override the load version. + collection_id - the ID of the Collection from which to retrieve the load version and possibly + collection object. + data_product - the ID of the data product from which to retrieve the load version. + load_ver_override - an override for the load version. If provided: + * the user must be a service administrator + * the collection is not checked for the existence of the data product. + user - the user. Ignored if load_ver is not provided; must be a service administrator. + return_only_visible - whether to return only visible columns. Default false. + """ + storage = app_state.get_app_state(r).arangostorage + _, load_ver = await get_load_version(storage, collection_id, data_product, load_ver_override, user) + doc = await get_collection_singleton_from_db( storage, collection, @@ -136,7 +150,12 @@ async def get_columnar_attribs_meta( ) doc[col_models.FIELD_COLUMNS] = [col_models.AttributesColumn(**d) for d in doc[col_models.FIELD_COLUMNS]] - return col_models.ColumnarAttributesMeta(**remove_collection_keys(doc)) + + meta = col_models.ColumnarAttributesMeta(**remove_collection_keys(doc)) + if return_only_visible: + meta.columns = [c for c in meta.columns if not c.non_visible] + + return meta async def get_doc_from_collection_by_unique_id( diff --git a/src/service/data_products/genome_attributes.py b/src/service/data_products/genome_attributes.py index d1459c1cb..a372d28b1 100644 --- a/src/service/data_products/genome_attributes.py +++ b/src/service/data_products/genome_attributes.py @@ -262,12 +262,15 @@ async def get_genome_attributes_meta( collection_id: str = PATH_VALIDATOR_COLLECTION_ID, load_ver_override: common_models.QUERY_VALIDATOR_LOAD_VERSION_OVERRIDE = None, user: kb_auth.KBaseUser = Depends(_OPT_AUTH) - ) -> col_models.ColumnarAttributesMeta: - storage = app_state.get_app_state(r).arangostorage - _, load_ver = await get_load_version(storage, collection_id, ID, load_ver_override, user) - meta = await get_columnar_attribs_meta( - storage, names.COLL_GENOME_ATTRIBS_META, collection_id, load_ver, bool(load_ver_override)) - meta.columns = [c for c in meta.columns if not c.non_visible] +) -> col_models.ColumnarAttributesMeta: + meta = await get_columnar_attribs_meta(r, + names.COLL_GENOME_ATTRIBS_META, + collection_id, + ID, + load_ver_override, + user, + return_only_visible=True) + # TODO: remote non_visible field from meta return meta @@ -321,11 +324,12 @@ async def get_genome_attributes( load_ver_override, ID, (await get_columnar_attribs_meta( - appstate.arangostorage, + r, names.COLL_GENOME_ATTRIBS_META, collection_id, - load_ver, - load_ver_override)).columns, + ID, + load_ver_override, + user)).columns, view_name=coll.get_data_product(ID).search_view if coll else None, count=count, sort_on=sort_on, @@ -404,11 +408,12 @@ async def get_histogram( load_ver_override, ID, (await get_columnar_attribs_meta( - appstate.arangostorage, + r, names.COLL_GENOME_ATTRIBS_META, collection_id, - load_ver, - load_ver_override)).columns, + ID, + load_ver_override, + user)).columns, view_name=coll.get_data_product(ID).search_view if coll else None, filter_conjunction=conjunction, match_spec=match_spec, @@ -491,11 +496,12 @@ async def get_xy_scatter( load_ver_override, ID, (await get_columnar_attribs_meta( - appstate.arangostorage, + r, names.COLL_GENOME_ATTRIBS_META, collection_id, - load_ver, - load_ver_override)).columns, + ID, + load_ver_override, + user)).columns, view_name=coll.get_data_product(ID).search_view if coll else None, filter_conjunction=conjunction, match_spec=match_spec, diff --git a/src/service/data_products/samples.py b/src/service/data_products/samples.py index 6820564e1..deb7f9c61 100644 --- a/src/service/data_products/samples.py +++ b/src/service/data_products/samples.py @@ -180,10 +180,16 @@ async def get_samples_meta( load_ver_override: common_models.QUERY_VALIDATOR_LOAD_VERSION_OVERRIDE = None, user: kb_auth.KBaseUser = Depends(_OPT_AUTH) ) -> col_models.ColumnarAttributesMeta: - storage = app_state.get_app_state(r).arangostorage - _, load_ver = await get_load_version(storage, collection_id, ID, load_ver_override, user) - meta = await get_columnar_attribs_meta(storage, names.COLL_SAMPLES_META, collection_id, load_ver, bool(load_ver_override)) - meta.columns = [c for c in meta.columns if not c.non_visible] + + meta = await get_columnar_attribs_meta(r, + names.COLL_SAMPLES_META, + collection_id, + ID, + load_ver_override, + user, + return_only_visible=True) + # TODO: remote non_visible field from meta + return meta From 8331cafb23bbe0359ea07a8405cc62558a0bba2e Mon Sep 17 00:00:00 2001 From: Tianhao-Gu Date: Mon, 29 Jan 2024 19:14:43 -0600 Subject: [PATCH 05/22] inline returns --- src/service/data_products/genome_attributes.py | 6 +++--- src/service/data_products/samples.py | 5 +---- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/service/data_products/genome_attributes.py b/src/service/data_products/genome_attributes.py index a372d28b1..c8660c487 100644 --- a/src/service/data_products/genome_attributes.py +++ b/src/service/data_products/genome_attributes.py @@ -263,15 +263,15 @@ async def get_genome_attributes_meta( load_ver_override: common_models.QUERY_VALIDATOR_LOAD_VERSION_OVERRIDE = None, user: kb_auth.KBaseUser = Depends(_OPT_AUTH) ) -> col_models.ColumnarAttributesMeta: - meta = await get_columnar_attribs_meta(r, + + return await get_columnar_attribs_meta(r, names.COLL_GENOME_ATTRIBS_META, collection_id, ID, load_ver_override, user, return_only_visible=True) - # TODO: remote non_visible field from meta - return meta + @_ROUTER.get( diff --git a/src/service/data_products/samples.py b/src/service/data_products/samples.py index deb7f9c61..5fc27a3ce 100644 --- a/src/service/data_products/samples.py +++ b/src/service/data_products/samples.py @@ -181,16 +181,13 @@ async def get_samples_meta( user: kb_auth.KBaseUser = Depends(_OPT_AUTH) ) -> col_models.ColumnarAttributesMeta: - meta = await get_columnar_attribs_meta(r, + return await get_columnar_attribs_meta(r, names.COLL_SAMPLES_META, collection_id, ID, load_ver_override, user, return_only_visible=True) - # TODO: remote non_visible field from meta - - return meta # At some point we're going to want to filter/sort on fields. We may want a list of fields From 60968e71622124966d01ee651a49215afc031022 Mon Sep 17 00:00:00 2001 From: Tianhao-Gu Date: Mon, 29 Jan 2024 19:15:24 -0600 Subject: [PATCH 06/22] remove extra empty lines --- src/service/data_products/genome_attributes.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/service/data_products/genome_attributes.py b/src/service/data_products/genome_attributes.py index c8660c487..04325b78f 100644 --- a/src/service/data_products/genome_attributes.py +++ b/src/service/data_products/genome_attributes.py @@ -273,7 +273,6 @@ async def get_genome_attributes_meta( return_only_visible=True) - @_ROUTER.get( "/", response_model=TableAttributes, From 725673babfb841bef437a89fa4a43737d7d77109 Mon Sep 17 00:00:00 2001 From: Tianhao-Gu Date: Mon, 29 Jan 2024 19:50:52 -0600 Subject: [PATCH 07/22] abstract only /meta endpoints --- src/service/data_products/common_functions.py | 67 +++++++++++++++++-- .../data_products/genome_attributes.py | 46 ++++++------- src/service/data_products/samples.py | 19 +++--- 3 files changed, 91 insertions(+), 41 deletions(-) diff --git a/src/service/data_products/common_functions.py b/src/service/data_products/common_functions.py index 093bc34a0..f180af831 100644 --- a/src/service/data_products/common_functions.py +++ b/src/service/data_products/common_functions.py @@ -113,18 +113,45 @@ async def get_collection_singleton_from_db( async def get_columnar_attribs_meta( + storage: ArangoStorage, + collection: str, + collection_id: str, + load_ver: str, + load_ver_override: bool +) -> col_models.ColumnarAttributesMeta: + """ + Get the columnar attributes meta document for a collection. The document is expected to be + a singleton per collection load version. + + storage - the storage system. + collection - the arango collection containing the document. + collection_id - the KBase collection containing the document. + load_ver - the load version of the collection. + load_ver_override - whether to override the load version. + """ + doc = await get_collection_singleton_from_db( + storage, + collection, + collection_id, + load_ver, + bool(load_ver_override) + ) + doc[col_models.FIELD_COLUMNS] = [col_models.AttributesColumn(**d) + for d in doc[col_models.FIELD_COLUMNS]] + return col_models.ColumnarAttributesMeta(**remove_collection_keys(doc)) + + +async def get_product_meta( r: Request, collection: str, collection_id: str, data_product: str, load_ver_override, user: kb_auth.KBaseUser, - return_only_visible: bool = False ) -> col_models.ColumnarAttributesMeta: """ - Get the columnar attributes meta document for a collection. The document is expected to be - a singleton per collection load version. + Get the columnar attributes meta document for a collection used by /meta endpoint. r - the request. collection - the arango collection containing the document. @@ -135,7 +162,6 @@ async def get_columnar_attribs_meta( * the user must be a service administrator * the collection is not checked for the existence of the data product. user - the user. Ignored if load_ver is not provided; must be a service administrator. - return_only_visible - whether to return only visible columns. Default false. """ storage = app_state.get_app_state(r).arangostorage @@ -152,8 +178,37 @@ async def get_columnar_attribs_meta( for d in doc[col_models.FIELD_COLUMNS]] meta = col_models.ColumnarAttributesMeta(**remove_collection_keys(doc)) - if return_only_visible: - meta.columns = [c for c in meta.columns if not c.non_visible] + meta.columns = [c for c in meta.columns if not c.non_visible] + + return meta + + +async def get_table_meta( + r: Request, + collection: str, + collection_id: str, + data_product: str, + load_ver_override, + user: kb_auth.KBaseUser, + +) -> col_models.ColumnarAttributesMeta: + """ + Get the columnar attributes meta document for a collection used by /meta endpoint. + + r - the request. + collection - the arango collection containing the document. + collection_id - the ID of the Collection from which to retrieve the load version and possibly + collection object. + data_product - the ID of the data product from which to retrieve the load version. + load_ver_override - an override for the load version. If provided: + * the user must be a service administrator + * the collection is not checked for the existence of the data product. + user - the user. Ignored if load_ver is not provided; must be a service administrator. + """ + storage = app_state.get_app_state(r).arangostorage + _, load_ver = await get_load_version(storage, collection_id, data_product, load_ver_override, user) + meta = await get_columnar_attribs_meta(storage, collection, collection_id, load_ver, bool(load_ver_override)) + meta.columns = [c for c in meta.columns if not c.non_visible] return meta diff --git a/src/service/data_products/genome_attributes.py b/src/service/data_products/genome_attributes.py index 04325b78f..666fe6be5 100644 --- a/src/service/data_products/genome_attributes.py +++ b/src/service/data_products/genome_attributes.py @@ -30,6 +30,7 @@ query_table, query_simple_collection_list, get_columnar_attribs_meta, + get_product_meta, ) from src.service.data_products.data_product_processing import ( MATCH_ID_PREFIX, @@ -262,15 +263,13 @@ async def get_genome_attributes_meta( collection_id: str = PATH_VALIDATOR_COLLECTION_ID, load_ver_override: common_models.QUERY_VALIDATOR_LOAD_VERSION_OVERRIDE = None, user: kb_auth.KBaseUser = Depends(_OPT_AUTH) -) -> col_models.ColumnarAttributesMeta: - - return await get_columnar_attribs_meta(r, - names.COLL_GENOME_ATTRIBS_META, - collection_id, - ID, - load_ver_override, - user, - return_only_visible=True) + ) -> col_models.ColumnarAttributesMeta: + return await get_product_meta(r, + names.COLL_GENOME_ATTRIBS_META, + collection_id, + ID, + load_ver_override, + user) @_ROUTER.get( @@ -323,12 +322,11 @@ async def get_genome_attributes( load_ver_override, ID, (await get_columnar_attribs_meta( - r, + appstate.arangostorage, names.COLL_GENOME_ATTRIBS_META, collection_id, - ID, - load_ver_override, - user)).columns, + load_ver, + load_ver_override)).columns, view_name=coll.get_data_product(ID).search_view if coll else None, count=count, sort_on=sort_on, @@ -352,7 +350,7 @@ async def get_genome_attributes( class Histogram(BaseModel): - + bins: Annotated[list[float], Field( example=[2.5, 3.5, 4.5, 5.5], description="The location of the histogram bins. Each bin starts at index i, " @@ -407,12 +405,11 @@ async def get_histogram( load_ver_override, ID, (await get_columnar_attribs_meta( - r, + appstate.arangostorage, names.COLL_GENOME_ATTRIBS_META, collection_id, - ID, - load_ver_override, - user)).columns, + load_ver, + load_ver_override)).columns, view_name=coll.get_data_product(ID).search_view if coll else None, filter_conjunction=conjunction, match_spec=match_spec, @@ -436,7 +433,7 @@ async def get_histogram( class XYScatter(BaseModel): - + xcolumn: Annotated[str, Field( example="Completeness", description="The name of the x column." @@ -495,12 +492,11 @@ async def get_xy_scatter( load_ver_override, ID, (await get_columnar_attribs_meta( - r, + appstate.arangostorage, names.COLL_GENOME_ATTRIBS_META, collection_id, - ID, - load_ver_override, - user)).columns, + load_ver, + load_ver_override)).columns, view_name=coll.get_data_product(ID).search_view if coll else None, filter_conjunction=conjunction, match_spec=match_spec, @@ -737,7 +733,7 @@ async def process_subset_documents( RETURN KEEP(d, @keep) """ bind_vars["keep"] = fields - else: + else: aql += """ RETURN d """ @@ -747,4 +743,4 @@ async def process_subset_documents( async for d in cur: acceptor(d) finally: - await cur.close(ignore_missing=True) + await cur.close(ignore_missing=True) \ No newline at end of file diff --git a/src/service/data_products/samples.py b/src/service/data_products/samples.py index 5fc27a3ce..8191cfc49 100644 --- a/src/service/data_products/samples.py +++ b/src/service/data_products/samples.py @@ -17,7 +17,7 @@ query_table, get_load_version, QueryTableResult, - get_columnar_attribs_meta + get_columnar_attribs_meta, get_product_meta ) from src.service.data_products.data_product_processing import ( MATCH_ID_PREFIX, @@ -181,18 +181,17 @@ async def get_samples_meta( user: kb_auth.KBaseUser = Depends(_OPT_AUTH) ) -> col_models.ColumnarAttributesMeta: - return await get_columnar_attribs_meta(r, - names.COLL_SAMPLES_META, - collection_id, - ID, - load_ver_override, - user, - return_only_visible=True) + return await get_product_meta(r, + names.COLL_SAMPLES_META, + collection_id, + ID, + load_ver_override, + user) # At some point we're going to want to filter/sort on fields. We may want a list of fields # somewhere to check input fields are ok... but really we could just fetch the first document -# in the collection and check the fields +# in the collection and check the fields @_ROUTER.get( "/", response_model=SamplesTable, @@ -510,4 +509,4 @@ async def get_missing_ids( selection_id=selection_id, user=user, multiple_ids=True, - ) + ) \ No newline at end of file From eb16fc6afc46890c809580d63888c79a2ca06513 Mon Sep 17 00:00:00 2001 From: Tianhao-Gu Date: Mon, 29 Jan 2024 19:53:16 -0600 Subject: [PATCH 08/22] remove unused method --- src/service/data_products/common_functions.py | 40 ------------------- 1 file changed, 40 deletions(-) diff --git a/src/service/data_products/common_functions.py b/src/service/data_products/common_functions.py index f180af831..d5001a6d6 100644 --- a/src/service/data_products/common_functions.py +++ b/src/service/data_products/common_functions.py @@ -164,47 +164,7 @@ async def get_product_meta( user - the user. Ignored if load_ver is not provided; must be a service administrator. """ - storage = app_state.get_app_state(r).arangostorage - _, load_ver = await get_load_version(storage, collection_id, data_product, load_ver_override, user) - - doc = await get_collection_singleton_from_db( - storage, - collection, - collection_id, - load_ver, - bool(load_ver_override) - ) - doc[col_models.FIELD_COLUMNS] = [col_models.AttributesColumn(**d) - for d in doc[col_models.FIELD_COLUMNS]] - - meta = col_models.ColumnarAttributesMeta(**remove_collection_keys(doc)) - meta.columns = [c for c in meta.columns if not c.non_visible] - - return meta - - -async def get_table_meta( - r: Request, - collection: str, - collection_id: str, - data_product: str, - load_ver_override, - user: kb_auth.KBaseUser, -) -> col_models.ColumnarAttributesMeta: - """ - Get the columnar attributes meta document for a collection used by /meta endpoint. - - r - the request. - collection - the arango collection containing the document. - collection_id - the ID of the Collection from which to retrieve the load version and possibly - collection object. - data_product - the ID of the data product from which to retrieve the load version. - load_ver_override - an override for the load version. If provided: - * the user must be a service administrator - * the collection is not checked for the existence of the data product. - user - the user. Ignored if load_ver is not provided; must be a service administrator. - """ storage = app_state.get_app_state(r).arangostorage _, load_ver = await get_load_version(storage, collection_id, data_product, load_ver_override, user) meta = await get_columnar_attribs_meta(storage, collection, collection_id, load_ver, bool(load_ver_override)) From 15d9e6379eced40a869a29d0d60e3ae8942020b8 Mon Sep 17 00:00:00 2001 From: Tianhao-Gu Date: Mon, 29 Jan 2024 20:12:17 -0600 Subject: [PATCH 09/22] remove non_visible field --- .../columnar_attribs_common_models.py | 24 ++++++++++++------- src/service/data_products/common_functions.py | 21 ++++++++++++---- src/service/data_products/samples.py | 2 +- 3 files changed, 33 insertions(+), 14 deletions(-) diff --git a/src/common/product_models/columnar_attribs_common_models.py b/src/common/product_models/columnar_attribs_common_models.py index cd27c54be..fa7e91241 100644 --- a/src/common/product_models/columnar_attribs_common_models.py +++ b/src/common/product_models/columnar_attribs_common_models.py @@ -13,6 +13,7 @@ FIELD_COLUMNS = "columns" +NON_VISIBLE = "non_visible" class ColumnType(str, Enum): @@ -49,9 +50,9 @@ class FilterStrategy(str, Enum): """ A search based on ngram matching. """ -class AttributesColumnSpec(BaseModel): +class AttributesColumnBase(BaseModel): """ - A specification for a column in an attributes table. + A base class for a specification for a column in an attributes table. """ key: str = Field( example="checkm_completeness", @@ -66,11 +67,7 @@ class AttributesColumnSpec(BaseModel): description="The filter strategy for the column if any. Not all column types need " + "a filter strategy." )] = None - non_visible: Annotated[bool, Field( - example=False, - description="Whether the column is visible to the user. " - + "If True, the display name and category fields are not required" - )] = False + display_name: Annotated[str | None, Field( example="Completeness", description="The display name of the column. " @@ -95,6 +92,17 @@ def _check_filter_strategy(self) -> Self: raise ValueError("Only string types may have a filter strategy") return self + +class AttributesColumnSpec(AttributesColumnBase): + """ + A specification for a column in an attributes table. + """ + non_visible: Annotated[bool, Field( + example=False, + description="Whether the column is visible to the user. " + + "If True, the display name and category fields are not required" + )] = False + @model_validator(mode="after") def _check_visible_col(self) -> Self: if not self.non_visible: @@ -115,7 +123,7 @@ class ColumnarAttributesSpec(BaseModel): )] = list() -class AttributesColumn(AttributesColumnSpec): +class AttributesColumn(AttributesColumnBase): min_value: Annotated[int | float | str | None, Field( example="2023-08-25T22:08:30.576+0000", description="The minimum value for the column for numeric and date columns. " diff --git a/src/service/data_products/common_functions.py b/src/service/data_products/common_functions.py index d5001a6d6..f2a8a3cc1 100644 --- a/src/service/data_products/common_functions.py +++ b/src/service/data_products/common_functions.py @@ -117,7 +117,8 @@ async def get_columnar_attribs_meta( collection: str, collection_id: str, load_ver: str, - load_ver_override: bool + load_ver_override: bool, + return_only_visible: bool = False ) -> col_models.ColumnarAttributesMeta: """ Get the columnar attributes meta document for a collection. The document is expected to be @@ -128,6 +129,8 @@ async def get_columnar_attribs_meta( collection_id - the KBase collection containing the document. load_ver - the load version of the collection. load_ver_override - whether to override the load version. + return_only_visible - whether to return only visible columns. + """ doc = await get_collection_singleton_from_db( storage, @@ -136,8 +139,12 @@ async def get_columnar_attribs_meta( load_ver, bool(load_ver_override) ) - doc[col_models.FIELD_COLUMNS] = [col_models.AttributesColumn(**d) - for d in doc[col_models.FIELD_COLUMNS]] + + doc[col_models.FIELD_COLUMNS] = [ + col_models.AttributesColumn(**d) for d in doc[col_models.FIELD_COLUMNS] if + not return_only_visible or not d[col_models.NON_VISIBLE] + ] + return col_models.ColumnarAttributesMeta(**remove_collection_keys(doc)) @@ -167,8 +174,12 @@ async def get_product_meta( storage = app_state.get_app_state(r).arangostorage _, load_ver = await get_load_version(storage, collection_id, data_product, load_ver_override, user) - meta = await get_columnar_attribs_meta(storage, collection, collection_id, load_ver, bool(load_ver_override)) - meta.columns = [c for c in meta.columns if not c.non_visible] + meta = await get_columnar_attribs_meta(storage, + collection, + collection_id, + load_ver, + bool(load_ver_override), + return_only_visible=True) return meta diff --git a/src/service/data_products/samples.py b/src/service/data_products/samples.py index 8191cfc49..b81847b30 100644 --- a/src/service/data_products/samples.py +++ b/src/service/data_products/samples.py @@ -17,7 +17,7 @@ query_table, get_load_version, QueryTableResult, - get_columnar_attribs_meta, get_product_meta + get_product_meta ) from src.service.data_products.data_product_processing import ( MATCH_ID_PREFIX, From bdfaaf5fbd168009441a0ac890710b2bf47d2ff7 Mon Sep 17 00:00:00 2001 From: Tianhao Gu Date: Tue, 30 Jan 2024 08:12:22 -0600 Subject: [PATCH 10/22] Update src/service/data_products/common_functions.py Co-authored-by: MrCreosote --- src/service/data_products/common_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/service/data_products/common_functions.py b/src/service/data_products/common_functions.py index f2a8a3cc1..3556d0fd8 100644 --- a/src/service/data_products/common_functions.py +++ b/src/service/data_products/common_functions.py @@ -137,7 +137,7 @@ async def get_columnar_attribs_meta( collection, collection_id, load_ver, - bool(load_ver_override) + load_ver_override ) doc[col_models.FIELD_COLUMNS] = [ From 6e414d39a9d3e0ea80a10b7f5dd7be7b1474d0bc Mon Sep 17 00:00:00 2001 From: Tianhao Gu Date: Tue, 30 Jan 2024 08:12:29 -0600 Subject: [PATCH 11/22] Update src/service/data_products/common_functions.py Co-authored-by: MrCreosote --- src/service/data_products/common_functions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/service/data_products/common_functions.py b/src/service/data_products/common_functions.py index 3556d0fd8..fcc04fadd 100644 --- a/src/service/data_products/common_functions.py +++ b/src/service/data_products/common_functions.py @@ -141,8 +141,8 @@ async def get_columnar_attribs_meta( ) doc[col_models.FIELD_COLUMNS] = [ - col_models.AttributesColumn(**d) for d in doc[col_models.FIELD_COLUMNS] if - not return_only_visible or not d[col_models.NON_VISIBLE] + col_models.AttributesColumn(**d) for d in doc[col_models.FIELD_COLUMNS] + if not return_only_visible or not d[col_models.NON_VISIBLE] ] return col_models.ColumnarAttributesMeta(**remove_collection_keys(doc)) From 8ff344521886688ec9b7cc69c9fc2e9a7e38d4d9 Mon Sep 17 00:00:00 2001 From: Tianhao Gu Date: Tue, 30 Jan 2024 08:12:37 -0600 Subject: [PATCH 12/22] Update src/service/data_products/common_functions.py Co-authored-by: MrCreosote --- src/service/data_products/common_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/service/data_products/common_functions.py b/src/service/data_products/common_functions.py index fcc04fadd..6e9c39f72 100644 --- a/src/service/data_products/common_functions.py +++ b/src/service/data_products/common_functions.py @@ -153,7 +153,7 @@ async def get_product_meta( collection: str, collection_id: str, data_product: str, - load_ver_override, + load_ver_override: str, user: kb_auth.KBaseUser, ) -> col_models.ColumnarAttributesMeta: From d065fab5b06c77a375c3dfaee0a909812db6f27d Mon Sep 17 00:00:00 2001 From: Tianhao Gu Date: Tue, 30 Jan 2024 08:12:47 -0600 Subject: [PATCH 13/22] Update src/service/data_products/common_functions.py Co-authored-by: MrCreosote --- src/service/data_products/common_functions.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/service/data_products/common_functions.py b/src/service/data_products/common_functions.py index 6e9c39f72..9e600d4f0 100644 --- a/src/service/data_products/common_functions.py +++ b/src/service/data_products/common_functions.py @@ -162,8 +162,7 @@ async def get_product_meta( r - the request. collection - the arango collection containing the document. - collection_id - the ID of the Collection from which to retrieve the load version and possibly - collection object. + collection_id - the ID of the Collection for which to retrieve the meta information. data_product - the ID of the data product from which to retrieve the load version. load_ver_override - an override for the load version. If provided: * the user must be a service administrator From f5a7a47b0fd38f1a668f40f9d493394de0ec509f Mon Sep 17 00:00:00 2001 From: Tianhao Gu Date: Tue, 30 Jan 2024 08:12:55 -0600 Subject: [PATCH 14/22] Update src/service/data_products/common_functions.py Co-authored-by: MrCreosote --- src/service/data_products/common_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/service/data_products/common_functions.py b/src/service/data_products/common_functions.py index 9e600d4f0..088c423c3 100644 --- a/src/service/data_products/common_functions.py +++ b/src/service/data_products/common_functions.py @@ -167,7 +167,7 @@ async def get_product_meta( load_ver_override - an override for the load version. If provided: * the user must be a service administrator * the collection is not checked for the existence of the data product. - user - the user. Ignored if load_ver is not provided; must be a service administrator. + user - the user. Ignored if load_ver_override is not provided; must be a service administrator. """ From 893d9816f2d130160d5f686944b1d2ca9e2377b0 Mon Sep 17 00:00:00 2001 From: Tianhao Gu Date: Tue, 30 Jan 2024 08:13:03 -0600 Subject: [PATCH 15/22] Update src/service/data_products/common_functions.py Co-authored-by: MrCreosote --- src/service/data_products/common_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/service/data_products/common_functions.py b/src/service/data_products/common_functions.py index 088c423c3..7ab3c4a6d 100644 --- a/src/service/data_products/common_functions.py +++ b/src/service/data_products/common_functions.py @@ -161,7 +161,7 @@ async def get_product_meta( Get the columnar attributes meta document for a collection used by /meta endpoint. r - the request. - collection - the arango collection containing the document. + collection - the arango collection containing the meta information. collection_id - the ID of the Collection for which to retrieve the meta information. data_product - the ID of the data product from which to retrieve the load version. load_ver_override - an override for the load version. If provided: From 155117f0d3451cd8a427d8b9e0e2d0829a008a99 Mon Sep 17 00:00:00 2001 From: Tianhao-Gu Date: Tue, 30 Jan 2024 11:44:57 -0600 Subject: [PATCH 16/22] add update samples filter set to enable filtering --- src/service/data_products/samples.py | 38 +++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/src/service/data_products/samples.py b/src/service/data_products/samples.py index b81847b30..b6a3c00f5 100644 --- a/src/service/data_products/samples.py +++ b/src/service/data_products/samples.py @@ -17,7 +17,8 @@ query_table, get_load_version, QueryTableResult, - get_product_meta + get_product_meta, + get_columnar_attribs_meta ) from src.service.data_products.data_product_processing import ( MATCH_ID_PREFIX, @@ -26,7 +27,7 @@ get_missing_ids as _get_missing_ids ) from src.service.data_products.table_models import TableAttributes -from src.service.filtering.filters import FilterSet +from src.service.filtering.filtering_processing import get_filters, FILTER_STRATEGY_TEXT from src.service.http_bearer import KBaseHTTPBearer from src.service.processing import SubsetSpecification from src.service.routes_common import PATH_VALIDATOR_COLLECTION_ID @@ -46,6 +47,19 @@ _MAX_SAMPLE_IDS = 1000 +_FILTERING_TEXT = """ +**FILTERING:** + +The returned data can be filtered by column content by adding query parameters of the format +``` +filter_= +``` +For example: +``` +GET /collections/ENIGMA/data_products/samples/?filter_genome_count=[5,10] +``` +""" + FILTER_STRATEGY_TEXT + class SamplesSpec(common_models.DataProductSpec): @@ -199,6 +213,7 @@ async def get_samples_meta( + "which may differ from collection to collection.\n\n" + "Authentication is not required unless submitting a match ID or overriding the load " + "version; in the latter case service administration permissions are required.\n\n" + + _FILTERING_TEXT # TODO SAMPLES - how should we support creating a selection from samples? ) async def get_samples( @@ -239,17 +254,28 @@ async def get_samples( ) if status_only: return _response(dp_match=dp_match, dp_sel=dp_sel) - filters = FilterSet( + coll = await appstate.arangostorage.get_collection_active(collection_id) + filters = await get_filters( + r, + names.COLL_SAMPLES, collection_id, load_ver, - collection=names.COLL_SAMPLES, + load_ver_override, + ID, + (await get_columnar_attribs_meta( + appstate.arangostorage, + names.COLL_SAMPLES_META, + collection_id, + load_ver, + load_ver_override)).columns, + view_name=coll.get_data_product(ID).search_view if coll else None, count=count, + sort_on=sort_on, + sort_desc=sort_desc, match_spec=SubsetSpecification( subset_process=dp_match, mark_only=match_mark, prefix=MATCH_ID_PREFIX), selection_spec=SubsetSpecification( subset_process=dp_sel, mark_only=selection_mark, prefix=SELECTION_ID_PREFIX), - sort_on=sort_on, - sort_descending=sort_desc, skip=skip, limit=limit, ) From c6e9e62f7319cb9f9b6366f9ca42f49927abaf18 Mon Sep 17 00:00:00 2001 From: Tianhao-Gu Date: Tue, 30 Jan 2024 16:42:10 -0600 Subject: [PATCH 17/22] fix workspace download bug --- src/loaders/workspace_downloader/workspace_downloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/loaders/workspace_downloader/workspace_downloader.py b/src/loaders/workspace_downloader/workspace_downloader.py index ffb804e84..79f8f3e28 100644 --- a/src/loaders/workspace_downloader/workspace_downloader.py +++ b/src/loaders/workspace_downloader/workspace_downloader.py @@ -126,7 +126,7 @@ def _process_object_info( loader_common_names.FLD_KB_OBJ_TIMESTAMP: obj_info[3], loader_common_names.FLD_KB_OBJ_GENOME_UPA: "{6}/{0}/{4}".format(*genome_info), loader_common_names.ASSEMBLY_OBJ_INFO_KEY: obj_info, - loader_common_names.GENOME_METADATA_FILE: genome_info} + loader_common_names.GENOME_OBJ_INFO_KEY: genome_info} return res_dict From 081f91c8c576eb6535a69129126bb374e47f2f42 Mon Sep 17 00:00:00 2001 From: Tianhao-Gu Date: Tue, 30 Jan 2024 18:54:17 -0600 Subject: [PATCH 18/22] fix both assembly and genome obj links to the same sample --- src/loaders/workspace_downloader/workspace_downloader.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/loaders/workspace_downloader/workspace_downloader.py b/src/loaders/workspace_downloader/workspace_downloader.py index 79f8f3e28..c45d5db75 100644 --- a/src/loaders/workspace_downloader/workspace_downloader.py +++ b/src/loaders/workspace_downloader/workspace_downloader.py @@ -238,14 +238,15 @@ def _find_sample_upa( # find one and only one sample associated upa from input upas and retrieve the sample data # raise error if multiple samples are found - found_sample, sample_ret, sample_upa, sample_effective_time = False, None, None, None + found_sample_id, sample_ret, sample_upa, sample_effective_time = None, None, None, None for upa in upas: try: sample_ret, sample_effective_time = _retrieve_sample(conf, upa) - if found_sample: + sample_id = sample_ret['id'] + if found_sample_id and found_sample_id != sample_id: raise ValueError(f"Found multiple samples in input {upas}") - found_sample, sample_upa = True, upa + found_sample_id, sample_upa = sample_id, upa except NoDataLinkError: pass From 92e5b5355ff831a96193614076b42137bb32c4b4 Mon Sep 17 00:00:00 2001 From: Tianhao-Gu Date: Tue, 30 Jan 2024 20:17:31 -0600 Subject: [PATCH 19/22] make get_load_version_and_processes return coll --- src/service/data_products/data_product_processing.py | 6 +++--- src/service/data_products/heatmap.py | 2 +- src/service/data_products/samples.py | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/service/data_products/data_product_processing.py b/src/service/data_products/data_product_processing.py index 690a387cc..51e2f2eb8 100644 --- a/src/service/data_products/data_product_processing.py +++ b/src/service/data_products/data_product_processing.py @@ -43,7 +43,7 @@ async def get_load_version_and_processes( # pretty huge method sig here match_id: str | None = None, selection_id: str | None = None, multiple_ids: bool = False, -) -> tuple[str, models.DataProductProcess, models.DataProductProcess]: +) -> tuple[str, models.DataProductProcess, models.DataProductProcess, models.ActiveCollection | None]: f""" Get the appropriate load version to use when querying data along with match and / or selection processes, if match and selection IDs are provided. @@ -93,7 +93,7 @@ async def get_load_version_and_processes( # pretty huge method sig here appstate, coll, selection_id, data_product, partial(_process_subset, collection, multiple_ids) ) - return load_ver, dp_match, dp_sel + return load_ver, dp_match, dp_sel, coll async def _process_subset( @@ -154,7 +154,7 @@ async def get_missing_ids( if not match_id and not selection_id: raise errors.IllegalParameterError( "At last one of a match ID or selection ID must be supplied") - _, dp_match, dp_sel = await get_load_version_and_processes( + _, dp_match, dp_sel, _ = await get_load_version_and_processes( appstate, user, collection, diff --git a/src/service/data_products/heatmap.py b/src/service/data_products/heatmap.py index 3cc27a136..5ce38a900 100644 --- a/src/service/data_products/heatmap.py +++ b/src/service/data_products/heatmap.py @@ -345,7 +345,7 @@ async def get_heatmap( # For some reason returning the data as a model slows down the endpoint by ~10x. # Serializing manually and returning a plain response is much faster appstate = app_state.get_app_state(r) - load_ver, dp_match, dp_sel = await get_load_version_and_processes( + load_ver, dp_match, dp_sel, _ = await get_load_version_and_processes( appstate, user, self._colname_data, diff --git a/src/service/data_products/samples.py b/src/service/data_products/samples.py index b6a3c00f5..780389c65 100644 --- a/src/service/data_products/samples.py +++ b/src/service/data_products/samples.py @@ -241,7 +241,7 @@ async def get_samples( # we have a max limit of 1000, which means sorting is O(n log2 1000). # Otherwise we need indexes for every sort appstate = app_state.get_app_state(r) - load_ver, dp_match, dp_sel = await get_load_version_and_processes( + load_ver, dp_match, dp_sel, coll = await get_load_version_and_processes( appstate, user, names.COLL_SAMPLES, @@ -254,7 +254,7 @@ async def get_samples( ) if status_only: return _response(dp_match=dp_match, dp_sel=dp_sel) - coll = await appstate.arangostorage.get_collection_active(collection_id) + filters = await get_filters( r, names.COLL_SAMPLES, @@ -352,7 +352,7 @@ async def get_sample_locations( # might need to return a bare Response if the pydantic checking gets too expensive # might need some sort of pagination appstate = app_state.get_app_state(r) - load_ver, dp_match, dp_sel = await get_load_version_and_processes( + load_ver, dp_match, dp_sel, _ = await get_load_version_and_processes( appstate, user, names.COLL_SAMPLES, From bb5dea94d9a5467318fda7a9b8692984741814e2 Mon Sep 17 00:00:00 2001 From: Tianhao-Gu Date: Tue, 30 Jan 2024 21:53:53 -0600 Subject: [PATCH 20/22] include non_visible field for parser script --- src/loaders/common/loader_helper.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/loaders/common/loader_helper.py b/src/loaders/common/loader_helper.py index 4b407d0f1..0a9efb84f 100644 --- a/src/loaders/common/loader_helper.py +++ b/src/loaders/common/loader_helper.py @@ -19,7 +19,6 @@ from src.common.collection_column_specs.load_specs import load_spec from src.common.product_models.columnar_attribs_common_models import ( ColumnType, - AttributesColumn, ColumnarAttributesMeta, ) from src.common.storage.db_doc_conversions import ( @@ -51,7 +50,7 @@ def _convert_to_iso8601(date_string: str) -> str: # Convert a date string to ISO 8601 format formats_to_try = ["%Y/%m/%d", "%Y-%m-%d", - "%m/%d/%y",] # Add more formats as needed + "%m/%d/%y", ] # Add more formats as needed # The current code always leaves the date in day precision with no time zone information as that's # all that's available from the current data. # If higher precision dates are encountered in the future the code should be adapted to @@ -139,17 +138,17 @@ def process_columnar_meta( enum_values = list(set(values)) enum_values.sort() - attri_column = AttributesColumn( - **col_spec.model_dump(), - min_value=min_value, - max_value=max_value, - enum_values=enum_values - ) + attri_column = { + 'min_value': min_value, + 'max_value': max_value, + 'enum_values': enum_values, + **col_spec.model_dump() + } + columns.append(attri_column) - columnar_attri_meta = ColumnarAttributesMeta(columns=columns, count=len(docs)) + meta_doc = {'columns': columns, 'count': len(docs)} - meta_doc = columnar_attri_meta.model_dump() meta_doc.update({ names.FLD_ARANGO_KEY: collection_load_version_key(kbase_collection, load_ver), names.FLD_COLLECTION_ID: kbase_collection, From 60d7ee22af59b13524d1c358816c4efcc0453f22 Mon Sep 17 00:00:00 2001 From: Tianhao-Gu Date: Tue, 30 Jan 2024 22:01:37 -0600 Subject: [PATCH 21/22] using AttributesColumnSpec instead --- src/loaders/common/loader_helper.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/loaders/common/loader_helper.py b/src/loaders/common/loader_helper.py index 0a9efb84f..7f662e52e 100644 --- a/src/loaders/common/loader_helper.py +++ b/src/loaders/common/loader_helper.py @@ -19,7 +19,8 @@ from src.common.collection_column_specs.load_specs import load_spec from src.common.product_models.columnar_attribs_common_models import ( ColumnType, - ColumnarAttributesMeta, + AttributesColumnSpec, + ColumnarAttributesSpec, ) from src.common.storage.db_doc_conversions import ( collection_data_id_key, @@ -138,17 +139,18 @@ def process_columnar_meta( enum_values = list(set(values)) enum_values.sort() - attri_column = { - 'min_value': min_value, - 'max_value': max_value, - 'enum_values': enum_values, - **col_spec.model_dump() - } + attri_column = AttributesColumnSpec( + **col_spec.model_dump(), + min_value=min_value, + max_value=max_value, + enum_values=enum_values + ) columns.append(attri_column) - meta_doc = {'columns': columns, 'count': len(docs)} + columnar_attri_meta = ColumnarAttributesSpec(columns=columns, count=len(docs)) + meta_doc = columnar_attri_meta.model_dump() meta_doc.update({ names.FLD_ARANGO_KEY: collection_load_version_key(kbase_collection, load_ver), names.FLD_COLLECTION_ID: kbase_collection, From e26297b0a41b6dbfd723933306f9a1fefd426a6f Mon Sep 17 00:00:00 2001 From: Tianhao-Gu Date: Tue, 30 Jan 2024 22:05:51 -0600 Subject: [PATCH 22/22] revert using ColumnarAttributesSpec --- src/loaders/common/loader_helper.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/src/loaders/common/loader_helper.py b/src/loaders/common/loader_helper.py index 7f662e52e..ecc2ff702 100644 --- a/src/loaders/common/loader_helper.py +++ b/src/loaders/common/loader_helper.py @@ -19,8 +19,6 @@ from src.common.collection_column_specs.load_specs import load_spec from src.common.product_models.columnar_attribs_common_models import ( ColumnType, - AttributesColumnSpec, - ColumnarAttributesSpec, ) from src.common.storage.db_doc_conversions import ( collection_data_id_key, @@ -139,18 +137,16 @@ def process_columnar_meta( enum_values = list(set(values)) enum_values.sort() - attri_column = AttributesColumnSpec( - **col_spec.model_dump(), - min_value=min_value, - max_value=max_value, - enum_values=enum_values - ) + attri_column = { + 'min_value': min_value, + 'max_value': max_value, + 'enum_values': enum_values, + **col_spec.model_dump() + } columns.append(attri_column) - columnar_attri_meta = ColumnarAttributesSpec(columns=columns, count=len(docs)) - - meta_doc = columnar_attri_meta.model_dump() + meta_doc = {'columns': columns, 'count': len(docs)} meta_doc.update({ names.FLD_ARANGO_KEY: collection_load_version_key(kbase_collection, load_ver), names.FLD_COLLECTION_ID: kbase_collection,