Skip to content

Commit

Permalink
python(feature): file attachments upload + download service (#85)
Browse files Browse the repository at this point in the history
  • Loading branch information
solidiquis authored Aug 23, 2024
1 parent 43d1ec9 commit 4c3cf15
Show file tree
Hide file tree
Showing 11 changed files with 660 additions and 0 deletions.
7 changes: 7 additions & 0 deletions python/lib/sift_py/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
* [Ingestion Performance](#ingestion-performance)
- [Buffered Ingestion](#buffered-ingestion)
* [Downloading Telemetry](#downloading-telemetry)
* [File attachments](#file-attachments)
* [More Examples](#more-examples)
## Introduction
Expand Down Expand Up @@ -909,6 +910,12 @@ async def channel_demo():
asyncio.run(example())
```
## File attachments
See the module-level documentation for `sift_py.file_attachment` to learn uploading and downloading
file attachments to various entities such as runs, annotations, and annotation logs. Once file attachments
are created they become viewable in the Sift application.
## More Examples
For more comphrensive examples demonstrating a little bit of everything, you may
Expand Down
File renamed without changes.
88 changes: 88 additions & 0 deletions python/lib/sift_py/file_attachment/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
"""
This module contains services to facilitate uploading and downloading file attachments.
It also provides utilities to easily query all file attachments for a given entity
which could be a run, annotation, or annotation logs. File attachment deletion is also supported.
Once files have been attached, they should be viewable on the Sift application, attached to their
respective entities. Below are various examples on how to leverage the `sift_py.file_attachment.service.FileAttachmentService`.
## Initializing the file attachment service
Unlike other services throughout `sift_py`, the `sift_py.file_attachment.service.FileAttachmentService` does rely on both
REST and gRPC APIs, so with that in mind we can initialize our service like so:
```python
from sift_py.grpc.transport import SiftChannelConfig, use_sift_channel
from sift_py.file_attachment.service import FileAttachmentService
from sift_py.file_attachment.entity import Entity, EntityType
from sift_py.file_attachment.metadata import ImageMetadata
from sift_py.rest import SiftRestConfig
from sift.remote_files.v1.remote_files_pb2 import GetRemoteFileRequest
from sift.remote_files.v1.remote_files_pb2_grpc import RemoteFileServiceStub
rest_config: SiftRestConfig = {
# Be sure to exclude the "https://" or "http://" scheme out of the uri
"uri": rest_base_uri,
"apikey": apikey,
}
sift_channel_config = SiftChannelConfig(uri=grpc_base_uri, apikey=apikey)
with use_sift_channel(sift_channel_config) as channel:
file_attachment_service = FileAttachmentService(channel, rest_config)
...
```
With the service initialized we can now interact with the file attachments API.
## Various Examples
For demonstrative purposes we will upload an `mp4` file and attach to a run of `run_id`.
Once it is uploaded we will query all file attachments for a particular run and re-download
what we just uploaded.
```python
from sift_py.grpc.transport import SiftChannelConfig, use_sift_channel
from sift_py.file_attachment.service import FileAttachmentService
from sift_py.file_attachment.entity import Entity, EntityType
from sift_py.file_attachment.metadata import VideoMetadata
from sift_py.rest import SiftRestConfig
from sift.remote_files.v1.remote_files_pb2 import GetRemoteFileRequest
from sift.remote_files.v1.remote_files_pb2_grpc import RemoteFileServiceStub
...
with use_sift_channel(sift_channel_config) as channel:
file_attachment_service = FileAttachmentService(channel, rest_config)
run = entity=Entity(
entity_id=run_id, # some arbitrary run ID that refers to an existing run
entity_type=EntityType.RUN,
)
# uploading the file attachment and attaching it to a run of `run_id`
remote_file = file_attachment_service.upload_attachment(
path="path/to/foo.mp4",
entity=run,
# Metatadata.. optional but recommended for optimal viewing in the application
metadata=VideoMetadata(height=2160, width=3840, duration_seconds=5.5),
description="thrusters getting too hot" ,
)
# retrieving all of the file attachments for our run
all_file_attachments = file_attachment_service.retrieve_attachments(run)
# downloading our file_attachment and saving it to our current working dir
file_attachment_service.download_attachment(remote_file)
# downloading our file_attachment and saving it somewhere else with a different name
file_attachment_service.download_attachment(remote_file, "somewhere/else/foo.mp4")
# deleting out file attachment from Sift
file_attachment_service.delete_file_attachments(remote_file_1, remote_file_2, remote_file_etc)
```
"""
Empty file.
13 changes: 13 additions & 0 deletions python/lib/sift_py/file_attachment/_internal/download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import io
from pathlib import Path

import requests


def download_remote_file(url: str, out: Path):
with requests.get(url, stream=True) as req:
req.raise_for_status()
with open(out, "wb") as output_file:
for chunk in req.iter_content(chunk_size=io.DEFAULT_BUFFER_SIZE):
if chunk:
output_file.write(chunk)
113 changes: 113 additions & 0 deletions python/lib/sift_py/file_attachment/_internal/upload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import mimetypes
from pathlib import Path
from typing import Any, Dict, Optional, Tuple, Union
from urllib.parse import urljoin, urlparse

import requests
from requests_toolbelt import MultipartEncoder

from sift_py._internal.convert.json import to_json
from sift_py.file_attachment.entity import Entity
from sift_py.file_attachment.metadata import Metadata
from sift_py.rest import SiftRestConfig


class UploadService:
UPLOAD_PATH = "/api/v0/remote-files/upload"
UPLOAD_BULK_PATH = "/api/v0/remote-files/upload:bulk"

_upload_uri: str
_upload_bulk_uri: str
_apikey: str

def __init__(self, restconf: SiftRestConfig):
base_uri = self.__class__._compute_uri(restconf)
self._upload_uri = urljoin(base_uri, self.UPLOAD_PATH)
self._upload_bulk_uri = urljoin(base_uri, self.UPLOAD_BULK_PATH)
self._apikey = restconf["apikey"]

def upload_attachment(
self,
path: Union[str, Path],
entity: Entity,
metadata: Optional[Metadata] = None,
description: Optional[str] = None,
organization_id: Optional[str] = None,
) -> str:
posix_path = Path(path) if isinstance(path, str) else path

if not posix_path.is_file():
raise Exception(f"Provided path, '{path}', does not point to a regular file.")

file_name, mimetype, content_encoding = self.__class__._mime_and_content_type_from_path(
posix_path
)

if not mimetype:
raise Exception(f"The MIME-type of '{posix_path}' could not be computed.")

with open(path, "rb") as file:
form_fields: Dict[str, Any] = {
"entityId": entity.entity_id,
"entityType": entity.entity_type.value,
}

if content_encoding:
form_fields["file"] = (
file_name,
file,
mimetype,
{
"Content-Encoding": content_encoding,
},
)
else:
form_fields["file"] = (file_name, file, mimetype)

if metadata:
form_fields["metadata"] = to_json(metadata)

if organization_id:
form_fields["organizationId"] = organization_id

if description:
form_fields["description"] = description

form_data = MultipartEncoder(fields=form_fields)

# https://github.com/requests/toolbelt/issues/312
# Issue above is reason for the type ignoring
response = requests.post(
url=self._upload_uri,
data=form_data, # type: ignore
headers={
"Authorization": f"Bearer {self._apikey}",
"Content-Type": form_data.content_type,
},
)

if response.status_code != 200:
raise Exception(
f"Request failed with status code {response.status_code} ({response.reason})."
)

return response.json().get("remoteFile").get("remoteFileId")

@staticmethod
def _mime_and_content_type_from_path(path: Path) -> Tuple[str, Optional[str], Optional[str]]:
file_name = path.name
mime, encoding = mimetypes.guess_type(path)
return file_name, mime, encoding

@staticmethod
def _compute_uri(restconf: SiftRestConfig) -> str:
uri = restconf["uri"]
parsed_uri = urlparse(uri)

if parsed_uri.scheme != "":
raise Exception(f"The URL scheme '{parsed_uri.scheme}' should not be included")

if restconf.get("use_ssl", True):
return f"https://{uri}"

return f"http://{uri}"
160 changes: 160 additions & 0 deletions python/lib/sift_py/file_attachment/_service_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
import json

import pytest
from pytest_mock import MockFixture
from sift.remote_files.v1.remote_files_pb2 import GetRemoteFileResponse, RemoteFile

from sift_py._internal.test_util.channel import MockChannel
from sift_py.file_attachment.entity import Entity, EntityType
from sift_py.file_attachment.metadata import ImageMetadata
from sift_py.file_attachment.service import FileAttachmentService


class MockResponse:
status_code: int
text: str

def __init__(self, status_code: int, text: str):
self.status_code = status_code
self.text = text

def json(self):
return json.loads(self.text)


class MockMultipartEncoder:
@property
def content_type(self):
return "multipart/form-data"


def test_file_attachments_service_upload_validate_uri():
mock_channel = MockChannel()

with pytest.raises(Exception, match="URL scheme"):
svc = FileAttachmentService(
mock_channel,
{
"uri": "https://some_uri.com",
"apikey": "123123123",
},
)

svc = FileAttachmentService(
mock_channel,
{
"uri": "some_uri.com",
"apikey": "123123123",
},
)

assert svc is not None


def test_file_attachments_service_upload_validate_path(mocker: MockFixture):
mock_channel = MockChannel()

mock_path_is_file = mocker.patch("sift_py.file_attachment._internal.upload.Path.is_file")
mock_path_is_file.return_value = False

with pytest.raises(Exception, match="does not point to a regular file"):
svc = FileAttachmentService(
mock_channel,
{
"uri": "some_uri.com",
"apikey": "123123123",
},
)

svc.upload_attachment(
path="some_image.png.gz",
entity=Entity(
entity_id="123-123-123",
entity_type=EntityType.ANNOTATION_LOG,
),
metadata=ImageMetadata(
width=16,
height=9,
),
)


def test_file_attachments_service_upload_validate_mimetype(mocker: MockFixture):
mock_channel = MockChannel()

mock_path_is_file = mocker.patch("sift_py.file_attachment._internal.upload.Path.is_file")
mock_path_is_file.return_value = True

with pytest.raises(Exception, match="MIME"):
svc = FileAttachmentService(
mock_channel,
{
"uri": "some_uri.com",
"apikey": "123123123",
},
)

svc.upload_attachment(
path="some_image.asdlkjfh",
entity=Entity(
entity_id="123-123-123",
entity_type=EntityType.ANNOTATION_LOG,
),
metadata=ImageMetadata(
width=16,
height=9,
),
)


def test_file_attachments_service_upload_returns_remote_file(mocker: MockFixture):
mock_channel = MockChannel()

mock_path_is_file = mocker.patch("sift_py.file_attachment._internal.upload.Path.is_file")
mock_path_is_file.return_value = True

mocker.patch(
"sift_py.file_attachment._internal.upload.open",
mocker.mock_open(read_data=b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR"),
)

mock_multipart_encoder = mocker.patch(
"sift_py.file_attachment._internal.upload.MultipartEncoder"
)
mock_multipart_encoder.return_value = MockMultipartEncoder()

mock_requests_post = mocker.patch("sift_py.file_attachment._internal.upload.requests.post")
mock_requests_post.return_value = MockResponse(
status_code=200, text=json.dumps({"remoteFile": {"remoteFileId": "abc"}})
)

svc = FileAttachmentService(
mock_channel,
{
"uri": "some_uri.com",
"apikey": "123123123",
},
)

mock_get_remote_file = mocker.patch.object(
svc._remote_file_service_stub,
"GetRemoteFile",
return_value=GetRemoteFileResponse(remote_file=RemoteFile(remote_file_id="abc")),
)

remote_file = svc.upload_attachment(
path="some_image.png.gz",
entity=Entity(
entity_id="123-123-123",
entity_type=EntityType.ANNOTATION_LOG,
),
metadata=ImageMetadata(
width=16,
height=9,
),
)
mock_get_remote_file.assert_called_once()
mock_multipart_encoder.assert_called_once()
mock_requests_post.assert_called_once()

assert remote_file.remote_file_id == "abc"
Loading

0 comments on commit 4c3cf15

Please sign in to comment.