Skip to content

Commit

Permalink
WIP: Only share subset of coll and image data for public endpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
tw4l committed Nov 28, 2024
1 parent 233bdd1 commit 3d78c6b
Show file tree
Hide file tree
Showing 3 changed files with 152 additions and 37 deletions.
64 changes: 53 additions & 11 deletions backend/btrixcloud/colls.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
ImageFile,
ImageFilePreparer,
MIN_UPLOAD_PART_SIZE,
PublicCollOut,
)
from .utils import dt_now

Expand Down Expand Up @@ -244,7 +245,8 @@ async def get_collection_out(

async def list_collections(
self,
oid: UUID,
org: Organization,
public_colls_out: bool = False,
page_size: int = DEFAULT_PAGE_SIZE,
page: int = 1,
sort_by: Optional[str] = None,
Expand All @@ -259,16 +261,17 @@ async def list_collections(
page = page - 1
skip = page * page_size

match_query: dict[str, object] = {"oid": oid}
match_query: dict[str, object] = {"oid": org.id}

if name:
match_query["name"] = name

elif name_prefix:
regex_pattern = f"^{name_prefix}"
match_query["name"] = {"$regex": regex_pattern, "$options": "i"}

if access:
if public_colls_out:
match_query["access"] = CollAccessType.PUBLIC
elif access:
match_query["access"] = access

aggregate = [{"$match": match_query}]
Expand Down Expand Up @@ -307,7 +310,22 @@ async def list_collections(
except (IndexError, ValueError):
total = 0

collections = [CollOut.from_dict(res) for res in items]
collections: List[Union[CollOut, PublicCollOut]] = []

for res in items:
if public_colls_out:
res["resources"] = await self.get_collection_crawl_resources(res["_id"])

thumbnail = res.get("thumbnail")
if thumbnail:
image_file = ImageFile(**thumbnail)
res["thumbnail"] = await image_file.get_public_image_file_out(
org, self.storage_ops
)

collections.append(PublicCollOut.from_dict(res))
else:
collections.append(CollOut.from_dict(res))

return collections, total

Expand Down Expand Up @@ -446,7 +464,14 @@ async def add_successful_crawl_to_collections(self, crawl_id: str, cid: UUID):
)
await self.update_crawl_collections(crawl_id)

async def get_org_public_collections(self, org_slug: str):
async def get_org_public_collections(
self,
org_slug: str,
page_size: int = DEFAULT_PAGE_SIZE,
page: int = 1,
sort_by: Optional[str] = None,
sort_direction: int = 1,
):
"""List public collections for org"""
try:
org = await self.orgs.get_org_by_slug(org_slug)
Expand All @@ -459,7 +484,12 @@ async def get_org_public_collections(self, org_slug: str):
raise HTTPException(status_code=404, detail="public_profile_not_found")

collections, _ = await self.list_collections(
org.id, access=CollAccessType.PUBLIC
org,
page_size=page_size,
page=page,
sort_by=sort_by,
sort_direction=sort_direction,
public_colls_out=True,
)

public_org_details = PublicOrgDetails(
Expand Down Expand Up @@ -658,7 +688,7 @@ async def list_collection_all(
access: Optional[str] = None,
):
collections, total = await colls.list_collections(
org.id,
org,
page_size=pageSize,
page=page,
sort_by=sortBy,
Expand All @@ -677,7 +707,7 @@ async def list_collection_all(
async def get_collection_all(org: Organization = Depends(org_viewer_dep)):
results = {}
try:
all_collections, _ = await colls.list_collections(org.id, page_size=10_000)
all_collections, _ = await colls.list_collections(org, page_size=10_000)
for collection in all_collections:
results[collection.name] = await colls.get_collection_crawl_resources(
collection.id
Expand Down Expand Up @@ -811,8 +841,20 @@ async def download_collection(
tags=["collections"],
response_model=OrgPublicCollections,
)
async def get_org_public_collections(org_slug: str):
return await colls.get_org_public_collections(org_slug)
async def get_org_public_collections(
org_slug: str,
pageSize: int = DEFAULT_PAGE_SIZE,
page: int = 1,
sortBy: Optional[str] = None,
sortDirection: int = 1,
):
return await colls.get_org_public_collections(
org_slug,
page_size=pageSize,
page=page,
sort_by=sortBy,
sort_direction=sortDirection,
)

@app.get(
"/orgs/{oid}/collections/{coll_id}/urls",
Expand Down
58 changes: 38 additions & 20 deletions backend/btrixcloud/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1115,15 +1115,15 @@ class ImageFileOut(BaseModel):


# ============================================================================
# class PublicImageFileOut(BaseModel):
# """public output for user-upload imaged file (conformance to Data Resource Spec)"""
class PublicImageFileOut(BaseModel):
"""public output for user-upload imaged file (conformance to Data Resource Spec)"""

# name: str
# path: str
# hash: str
# size: int
name: str
path: str
hash: str
size: int

# mime: str
mime: str


# ============================================================================
Expand Down Expand Up @@ -1154,19 +1154,19 @@ async def get_image_file_out(self, org, storage_ops) -> ImageFileOut:
created=self.created,
)

# async def get_public_image_file_out(self, org, storage_ops) -> PublicImageFileOut:
# """Get PublicImageFileOut with new presigned url"""
# presigned_url = await storage_ops.get_presigned_url(
# org, self, PRESIGN_DURATION_SECONDS
# )
async def get_public_image_file_out(self, org, storage_ops) -> PublicImageFileOut:
"""Get PublicImageFileOut with new presigned url"""
presigned_url = await storage_ops.get_presigned_url(
org, self, PRESIGN_DURATION_SECONDS
)

# return PublicImageFileOut(
# name=self.filename,
# path=presigned_url or "",
# hash=self.hash,
# size=self.size,
# mime=self.mime,
# )
return PublicImageFileOut(
name=self.filename,
path=presigned_url or "",
hash=self.hash,
size=self.size,
mime=self.mime,
)


# ============================================================================
Expand Down Expand Up @@ -1285,6 +1285,24 @@ class CollOut(BaseMongoModel):
thumbnail: Optional[ImageFileOut] = None


# ============================================================================
class PublicCollOut(BaseMongoModel):
"""Collection output model with annotations."""

name: str
description: Optional[str] = None
# caption: Optional[str] = None

# earliestDate: Optional[datetime] = None
# latestDate: Optional[datetime] = None

homeUrl: Optional[AnyHttpUrl] = None
homeUrlTs: Optional[datetime] = None

resources: List[CrawlFileOut] = []
thumbnail: Optional[PublicImageFileOut] = None


# ============================================================================
class UpdateColl(BaseModel):
"""Update collection"""
Expand Down Expand Up @@ -1358,7 +1376,7 @@ class OrgPublicCollections(BaseModel):

org: PublicOrgDetails

collections: List[CollOut] = []
collections: List[PublicCollOut] = []


# ============================================================================
Expand Down
67 changes: 61 additions & 6 deletions backend/test/test_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@
_coll_id = None
_second_coll_id = None
_public_coll_id = None
_second_public_coll_id = None
upload_id = None
modified = None
default_org_slug = None

curr_dir = os.path.dirname(os.path.realpath(__file__))

Expand Down Expand Up @@ -742,11 +744,14 @@ def test_list_public_collections(
json={
"crawlIds": [crawler_crawl_id],
"name": "Second public collection",
"description": "Lorem ipsum",
"access": "public",
},
)
assert r.status_code == 200
second_public_coll_id = r.json()["id"]

global _second_public_coll_id
_second_public_coll_id = r.json()["id"]

# Get default org slug
r = requests.get(
Expand All @@ -755,7 +760,10 @@ def test_list_public_collections(
)
assert r.status_code == 200
data = r.json()
org_slug = data["slug"]

global default_org_slug
default_org_slug = data["slug"]

org_name = data["name"]

# Verify that public profile isn't enabled
Expand All @@ -764,7 +772,7 @@ def test_list_public_collections(
assert data["publicUrl"] == ""

# Try listing public collections without org public profile enabled
r = requests.get(f"{API_PREFIX}/public-collections/{org_slug}")
r = requests.get(f"{API_PREFIX}/public-collections/{default_org_slug}")
assert r.status_code == 404
assert r.json()["detail"] == "public_profile_not_found"

Expand Down Expand Up @@ -795,7 +803,7 @@ def test_list_public_collections(
assert data["publicUrl"] == public_url

# List public collections with no auth (no public profile)
r = requests.get(f"{API_PREFIX}/public-collections/{org_slug}")
r = requests.get(f"{API_PREFIX}/public-collections/{default_org_slug}")
assert r.status_code == 200
data = r.json()

Expand All @@ -807,8 +815,8 @@ def test_list_public_collections(
collections = data["collections"]
assert len(collections) == 2
for collection in collections:
assert collection["id"] in (_public_coll_id, second_public_coll_id)
assert collection["access"] == "public"
assert collection["id"] in (_public_coll_id, _second_public_coll_id)
assert collection["name"]

# Test non-existing slug - it should return a 404 but not reveal
# whether or not an org exists with that slug
Expand Down Expand Up @@ -940,6 +948,53 @@ def test_upload_collection_thumbnail(crawler_auth_headers, default_org_id):
assert thumbnail["created"]


def test_list_public_colls_home_url_thumbnail():
# Check we get expected data for each public collection
# and nothing we don't expect
non_public_fields = (
"oid",
"modified",
"crawlCount",
"pageCount",
"totalSize",
"tags",
"access",
"homeUrlPageId",
)
non_public_image_fields = ("originalFilename", "userid", "userName", "created")

r = requests.get(f"{API_PREFIX}/public-collections/{default_org_slug}")
assert r.status_code == 200
collections = r.json()["collections"]
assert len(collections) == 2

for coll in collections:
assert coll["id"] in (_public_coll_id, _second_public_coll_id)
assert coll["name"]
assert coll["resources"]

for field in non_public_fields:
assert field not in coll

if coll["id"] == _public_coll_id:
assert coll["homeUrl"]
assert coll["homeUrlTs"]

if coll["id"] == _second_public_coll_id:
assert coll["description"]
thumbnail = coll["thumbnail"]
assert thumbnail

assert thumbnail["name"]
assert thumbnail["path"]
assert thumbnail["hash"]
assert thumbnail["size"]
assert thumbnail["mime"]

for field in non_public_image_fields:
assert field not in thumbnail


def test_delete_collection(crawler_auth_headers, default_org_id, crawler_crawl_id):
# Delete second collection
r = requests.delete(
Expand Down

0 comments on commit 3d78c6b

Please sign in to comment.