diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py index 22ad1e8d3..d18a861d1 100644 --- a/backend/btrixcloud/colls.py +++ b/backend/btrixcloud/colls.py @@ -44,6 +44,7 @@ ImageFile, ImageFilePreparer, MIN_UPLOAD_PART_SIZE, + PublicCollOut, ) from .utils import dt_now @@ -244,7 +245,8 @@ async def get_collection_out( async def list_collections( self, - oid: UUID, + org: Organization, + public_colls_out: bool = False, page_size: int = DEFAULT_PAGE_SIZE, page: int = 1, sort_by: Optional[str] = None, @@ -259,16 +261,17 @@ async def list_collections( page = page - 1 skip = page * page_size - match_query: dict[str, object] = {"oid": oid} + match_query: dict[str, object] = {"oid": org.id} if name: match_query["name"] = name - elif name_prefix: regex_pattern = f"^{name_prefix}" match_query["name"] = {"$regex": regex_pattern, "$options": "i"} - if access: + if public_colls_out: + match_query["access"] = CollAccessType.PUBLIC + elif access: match_query["access"] = access aggregate = [{"$match": match_query}] @@ -307,7 +310,22 @@ async def list_collections( except (IndexError, ValueError): total = 0 - collections = [CollOut.from_dict(res) for res in items] + collections: List[Union[CollOut, PublicCollOut]] = [] + + for res in items: + if public_colls_out: + res["resources"] = await self.get_collection_crawl_resources(res["_id"]) + + thumbnail = res.get("thumbnail") + if thumbnail: + image_file = ImageFile(**thumbnail) + res["thumbnail"] = await image_file.get_public_image_file_out( + org, self.storage_ops + ) + + collections.append(PublicCollOut.from_dict(res)) + else: + collections.append(CollOut.from_dict(res)) return collections, total @@ -446,7 +464,14 @@ async def add_successful_crawl_to_collections(self, crawl_id: str, cid: UUID): ) await self.update_crawl_collections(crawl_id) - async def get_org_public_collections(self, org_slug: str): + async def get_org_public_collections( + self, + org_slug: str, + page_size: int = DEFAULT_PAGE_SIZE, + page: int = 1, + sort_by: Optional[str] = None, + sort_direction: int = 1, + ): """List public collections for org""" try: org = await self.orgs.get_org_by_slug(org_slug) @@ -459,7 +484,12 @@ async def get_org_public_collections(self, org_slug: str): raise HTTPException(status_code=404, detail="public_profile_not_found") collections, _ = await self.list_collections( - org.id, access=CollAccessType.PUBLIC + org, + page_size=page_size, + page=page, + sort_by=sort_by, + sort_direction=sort_direction, + public_colls_out=True, ) public_org_details = PublicOrgDetails( @@ -658,7 +688,7 @@ async def list_collection_all( access: Optional[str] = None, ): collections, total = await colls.list_collections( - org.id, + org, page_size=pageSize, page=page, sort_by=sortBy, @@ -677,7 +707,7 @@ async def list_collection_all( async def get_collection_all(org: Organization = Depends(org_viewer_dep)): results = {} try: - all_collections, _ = await colls.list_collections(org.id, page_size=10_000) + all_collections, _ = await colls.list_collections(org, page_size=10_000) for collection in all_collections: results[collection.name] = await colls.get_collection_crawl_resources( collection.id @@ -811,8 +841,20 @@ async def download_collection( tags=["collections"], response_model=OrgPublicCollections, ) - async def get_org_public_collections(org_slug: str): - return await colls.get_org_public_collections(org_slug) + async def get_org_public_collections( + org_slug: str, + pageSize: int = DEFAULT_PAGE_SIZE, + page: int = 1, + sortBy: Optional[str] = None, + sortDirection: int = 1, + ): + return await colls.get_org_public_collections( + org_slug, + page_size=pageSize, + page=page, + sort_by=sortBy, + sort_direction=sortDirection, + ) @app.get( "/orgs/{oid}/collections/{coll_id}/urls", diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py index e7bcc4474..1e53ebdc0 100644 --- a/backend/btrixcloud/models.py +++ b/backend/btrixcloud/models.py @@ -1115,15 +1115,15 @@ class ImageFileOut(BaseModel): # ============================================================================ -# class PublicImageFileOut(BaseModel): -# """public output for user-upload imaged file (conformance to Data Resource Spec)""" +class PublicImageFileOut(BaseModel): + """public output for user-upload imaged file (conformance to Data Resource Spec)""" -# name: str -# path: str -# hash: str -# size: int + name: str + path: str + hash: str + size: int -# mime: str + mime: str # ============================================================================ @@ -1154,19 +1154,19 @@ async def get_image_file_out(self, org, storage_ops) -> ImageFileOut: created=self.created, ) - # async def get_public_image_file_out(self, org, storage_ops) -> PublicImageFileOut: - # """Get PublicImageFileOut with new presigned url""" - # presigned_url = await storage_ops.get_presigned_url( - # org, self, PRESIGN_DURATION_SECONDS - # ) + async def get_public_image_file_out(self, org, storage_ops) -> PublicImageFileOut: + """Get PublicImageFileOut with new presigned url""" + presigned_url = await storage_ops.get_presigned_url( + org, self, PRESIGN_DURATION_SECONDS + ) - # return PublicImageFileOut( - # name=self.filename, - # path=presigned_url or "", - # hash=self.hash, - # size=self.size, - # mime=self.mime, - # ) + return PublicImageFileOut( + name=self.filename, + path=presigned_url or "", + hash=self.hash, + size=self.size, + mime=self.mime, + ) # ============================================================================ @@ -1285,6 +1285,24 @@ class CollOut(BaseMongoModel): thumbnail: Optional[ImageFileOut] = None +# ============================================================================ +class PublicCollOut(BaseMongoModel): + """Collection output model with annotations.""" + + name: str + description: Optional[str] = None + # caption: Optional[str] = None + + # earliestDate: Optional[datetime] = None + # latestDate: Optional[datetime] = None + + homeUrl: Optional[AnyHttpUrl] = None + homeUrlTs: Optional[datetime] = None + + resources: List[CrawlFileOut] = [] + thumbnail: Optional[PublicImageFileOut] = None + + # ============================================================================ class UpdateColl(BaseModel): """Update collection""" @@ -1358,7 +1376,7 @@ class OrgPublicCollections(BaseModel): org: PublicOrgDetails - collections: List[CollOut] = [] + collections: List[PublicCollOut] = [] # ============================================================================ diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py index f2a41c00e..5f3c5d262 100644 --- a/backend/test/test_collections.py +++ b/backend/test/test_collections.py @@ -16,8 +16,10 @@ _coll_id = None _second_coll_id = None _public_coll_id = None +_second_public_coll_id = None upload_id = None modified = None +default_org_slug = None curr_dir = os.path.dirname(os.path.realpath(__file__)) @@ -742,11 +744,14 @@ def test_list_public_collections( json={ "crawlIds": [crawler_crawl_id], "name": "Second public collection", + "description": "Lorem ipsum", "access": "public", }, ) assert r.status_code == 200 - second_public_coll_id = r.json()["id"] + + global _second_public_coll_id + _second_public_coll_id = r.json()["id"] # Get default org slug r = requests.get( @@ -755,7 +760,10 @@ def test_list_public_collections( ) assert r.status_code == 200 data = r.json() - org_slug = data["slug"] + + global default_org_slug + default_org_slug = data["slug"] + org_name = data["name"] # Verify that public profile isn't enabled @@ -764,7 +772,7 @@ def test_list_public_collections( assert data["publicUrl"] == "" # Try listing public collections without org public profile enabled - r = requests.get(f"{API_PREFIX}/public-collections/{org_slug}") + r = requests.get(f"{API_PREFIX}/public-collections/{default_org_slug}") assert r.status_code == 404 assert r.json()["detail"] == "public_profile_not_found" @@ -795,7 +803,7 @@ def test_list_public_collections( assert data["publicUrl"] == public_url # List public collections with no auth (no public profile) - r = requests.get(f"{API_PREFIX}/public-collections/{org_slug}") + r = requests.get(f"{API_PREFIX}/public-collections/{default_org_slug}") assert r.status_code == 200 data = r.json() @@ -807,8 +815,8 @@ def test_list_public_collections( collections = data["collections"] assert len(collections) == 2 for collection in collections: - assert collection["id"] in (_public_coll_id, second_public_coll_id) - assert collection["access"] == "public" + assert collection["id"] in (_public_coll_id, _second_public_coll_id) + assert collection["name"] # Test non-existing slug - it should return a 404 but not reveal # whether or not an org exists with that slug @@ -940,6 +948,53 @@ def test_upload_collection_thumbnail(crawler_auth_headers, default_org_id): assert thumbnail["created"] +def test_list_public_colls_home_url_thumbnail(): + # Check we get expected data for each public collection + # and nothing we don't expect + non_public_fields = ( + "oid", + "modified", + "crawlCount", + "pageCount", + "totalSize", + "tags", + "access", + "homeUrlPageId", + ) + non_public_image_fields = ("originalFilename", "userid", "userName", "created") + + r = requests.get(f"{API_PREFIX}/public-collections/{default_org_slug}") + assert r.status_code == 200 + collections = r.json()["collections"] + assert len(collections) == 2 + + for coll in collections: + assert coll["id"] in (_public_coll_id, _second_public_coll_id) + assert coll["name"] + assert coll["resources"] + + for field in non_public_fields: + assert field not in coll + + if coll["id"] == _public_coll_id: + assert coll["homeUrl"] + assert coll["homeUrlTs"] + + if coll["id"] == _second_public_coll_id: + assert coll["description"] + thumbnail = coll["thumbnail"] + assert thumbnail + + assert thumbnail["name"] + assert thumbnail["path"] + assert thumbnail["hash"] + assert thumbnail["size"] + assert thumbnail["mime"] + + for field in non_public_image_fields: + assert field not in thumbnail + + def test_delete_collection(crawler_auth_headers, default_org_id, crawler_crawl_id): # Delete second collection r = requests.delete(