Skip to content

Commit

Permalink
bug-1905455: Enable anonymous access to S3 buckets.
Browse files Browse the repository at this point in the history
  • Loading branch information
smarnach committed Jul 10, 2024
1 parent 6e5a0e5 commit 00f092b
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 69 deletions.
86 changes: 19 additions & 67 deletions tecken/ext/s3/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,95 +9,47 @@
from urllib.parse import quote, urlparse

import boto3.session
from botocore import UNSIGNED
from botocore.config import Config
from botocore.exceptions import BotoCoreError, ClientError

from everett.manager import parse_bool
from django.conf import settings

from tecken.libstorage import ObjectMetadata, StorageBackend, StorageError


ALL_POSSIBLE_S3_REGIONS: tuple[str] = tuple(
boto3.session.Session().get_available_regions("s3")
)


class S3Storage(StorageBackend):
"""
An implementation of the StorageBackend interface for Amazon S3.
"""

accepted_hostnames = (".amazonaws.com", "localstack", "s3.example.com")

# A substring match of the domain is used to recognize storage backends.
# For emulated backends, the name should be present in the docker compose
# service name.
_URL_FINGERPRINT: list[str] = {
# AWS S3, like bucket-name.s3.amazonaws.com
"s3": ".amazonaws.com",
# Localstack S3 Emulator
"emulated-s3": "localstack",
# S3 test domain
"test-s3": "s3.example.com",
}

def __init__(self, url: str, try_symbols: bool = False, file_prefix: str = "v1"):
def __init__(self, url: str, try_symbols: bool = False, anonymous: str = "false"):
url = url.removesuffix("/")
self.url = url
parsed = urlparse(url)
self.scheme = parsed.scheme
self.netloc = parsed.netloc

# Determine the backend from the netloc (domain plus port)
self.backend = None
for backend, fingerprint in self._URL_FINGERPRINT.items():
if fingerprint in self.netloc:
self.backend = backend
break
if self.backend is None:
raise ValueError(f"Storage backend not recognized in {url!r}")

try:
name, prefix = parsed.path[1:].split("/", 1)
if prefix.endswith("/"):
prefix = prefix[:-1]
except ValueError:
prefix = ""
name = parsed.path[1:]
self.name = name
if file_prefix:
if prefix:
prefix += f"/{file_prefix}"
else:
prefix = file_prefix
self.prefix = prefix
parsed_url = urlparse(url)
self.endpoint_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
self.name, _, self.prefix = parsed_url.path[1:].partition("/")
self.prefix = (self.prefix + "/v1").removeprefix("/")
self.try_symbols = try_symbols
self.endpoint_url = None
self.region = None
if not self.backend == "s3":
# the endpoint_url will be all but the path
self.endpoint_url = f"{parsed.scheme}://{parsed.netloc}"
region = re.findall(r"s3[.-](.*)\.amazonaws\.com", parsed.netloc)
self.anonymous = parse_bool(anonymous)
self.clients = threading.local()
region = re.findall(r"s3[.-](.*)\.amazonaws\.com", parsed_url.netloc)
if region:
if region[0] not in ALL_POSSIBLE_S3_REGIONS:
raise ValueError(f"Not valid S3 region {region[0]}")
self.region = region[0]
else:
self.region = None
self.clients = threading.local()

@property
def base_url(self):
"""Return base url for objects managed by this storage backend
For objects in S3, this includes the domain and bucket name.
"""
return f"{self.scheme}://{self.netloc}/{self.name}"

def get_storage_client(self):
def _get_client(self):
"""Return a backend-specific client."""
if not hasattr(self.clients, "storage"):
options = {
"config": Config(
read_timeout=settings.S3_READ_TIMEOUT,
connect_timeout=settings.S3_CONNECT_TIMEOUT,
signature_version=UNSIGNED if self.anonymous else None,
)
}
if self.endpoint_url:
Expand All @@ -114,7 +66,7 @@ def exists(self) -> bool:
:raises StorageError: an unexpected backend-specific error was raised
"""
client = self.get_storage_client()
client = self._get_client()

try:
client.head_bucket(Bucket=self.name)
Expand Down Expand Up @@ -142,7 +94,7 @@ def get_object_metadata(self, key: str) -> Optional[ObjectMetadata]:
:raises StorageError: an unexpected backend-specific error was raised
"""
client = self.get_storage_client()
client = self._get_client()
try:
response = client.head_object(Bucket=self.name, Key=f"{self.prefix}/{key}")
except ClientError as exc:
Expand All @@ -159,7 +111,7 @@ def get_object_metadata(self, key: str) -> Optional[ObjectMetadata]:
except ValueError:
original_content_length = None
metadata = ObjectMetadata(
download_url=f"{self.base_url}/{self.prefix}/{quote(key)}",
download_url=f"{self.url}/v1/{quote(key)}",
content_type=response.get("ContentType"),
content_length=response["ContentLength"],
content_encoding=response.get("ContentEncoding"),
Expand Down Expand Up @@ -200,7 +152,7 @@ def upload(self, key: str, body: BufferedReader, metadata: ObjectMetadata):
if metadata.content_length:
kwargs["ContentLength"] = metadata.content_length

client = self.get_storage_client()
client = self._get_client()
try:
client.put_object(**kwargs)
except (ClientError, BotoCoreError) as exc:
Expand Down
2 changes: 1 addition & 1 deletion tecken/libdockerflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def check_storage_urls(app_configs, **kwargs):
errors.append(
checks.Error(
f"Unable to connect to {backend.url} (bucket={backend.name!r}), "
f"due to {error.backend_msg}",
f"due to {error}",
id="tecken.health.E002",
)
)
Expand Down
2 changes: 1 addition & 1 deletion tecken/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def clear_s3_storage(self: S3Storage):
"""Make sure the S3 bucket exists and delete all files under the prefix."""
# NOTE(smarnach): This gets patched into S3Storage as a method. I don't want this to exist in
# production code, since it should never get called there.
client = self.get_storage_client()
client = self._get_client()
client.create_bucket(Bucket=self.name)
response = client.list_objects_v2(Bucket=self.name, Prefix=self.prefix)
for object in response.get("Contents", []):
Expand Down

0 comments on commit 00f092b

Please sign in to comment.