Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bug-1827506: Implement the storage interface for GCS. #2957

Merged
merged 19 commits into from
Jul 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
97c609b
bug-1564452: Simplify StorageError class.
smarnach Jul 3, 2024
ce4b234
bug-1827506: Add GCS implementation of StorageInterface.
smarnach Jul 2, 2024
1f6fa08
bug-1827506: Add basic GCSStorage test.
smarnach Jul 3, 2024
03f783f
bug-1827506: Make SymbolStorage backend-agnostic.
smarnach Jul 4, 2024
6e90872
bug-1827506: Refactor storage backend tests.
smarnach Jul 8, 2024
190f926
bug-1827506: Make SymbolStorage tests run with both GCS and S3 backends.
smarnach Jul 8, 2024
1301d70
bug-1827506: Make download tests run with both GCS and S3 backends.
smarnach Jul 8, 2024
67bad26
bug-1827506: Inline and remove create_storage_backend fixture.
smarnach Jul 9, 2024
a699f44
bug-1827506: Make upload tests run with both GCS and S3 backends.
smarnach Jul 9, 2024
655585a
bug-1908868: Use structured configuration for backends.
smarnach Jul 19, 2024
1dddeeb
fixup! bug-1908868: Use structured configuration for backends.
smarnach Jul 23, 2024
8a82839
bug-1827506: Prevent decompressive transcoding for GCS downloads.
smarnach Jul 23, 2024
2ec435d
fixup! bug-1908868: Use structured configuration for backends.
smarnach Jul 24, 2024
b34a5be
bug-1827506: de-obscure StorageError
willkg Jul 25, 2024
f94f899
bug-1827506: move test data into subdirectory
willkg Jul 25, 2024
e90ff03
bug-1827506: move sym header extraction, rework tests
willkg Jul 25, 2024
08c8c25
bug-1827506: Include the try_symbols flag in the storage backend repr.
smarnach Jul 26, 2024
669f9fc
bug-1827506: Log backend errors in the Dockerflow health check.
smarnach Jul 29, 2024
ea0ff03
bug-1827506: Rename storage timeout settings.
smarnach Jul 29, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions bin/setup-services.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,12 @@
set -eo pipefail

# Set up S3
python bin/s3_cli.py delete "${UPLOAD_DEFAULT_URL}"
python bin/s3_cli.py create "${UPLOAD_DEFAULT_URL}"
python bin/s3_cli.py delete "${UPLOAD_S3_BUCKET}"
python bin/s3_cli.py create "${UPLOAD_S3_BUCKET}"

# Set up GCS
# FIXME bug 1827506: update argument as needed once GCS is
# implemented in the source code.
python bin/gcs_cli.py delete publicbucket
python bin/gcs_cli.py create publicbucket
python bin/gcs_cli.py delete "${UPLOAD_GCS_BUCKET}"
python bin/gcs_cli.py create "${UPLOAD_GCS_BUCKET}"

# Set up db
python bin/db.py drop || true
Expand Down
6 changes: 3 additions & 3 deletions docker/config/local_dev.env
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ AWS_ENDPOINT_URL=http://localstack:4566/

DEBUG=true
LOCAL_DEV_ENV=true
SYMBOL_URLS=http://localstack:4566/publicbucket/
UPLOAD_DEFAULT_URL=http://localstack:4566/publicbucket/
UPLOAD_TRY_SYMBOLS_URL=http://localstack:4566/publicbucket/try/
CLOUD_SERVICE_PROVIDER=GCS
UPLOAD_GCS_BUCKET=publicbucket
UPLOAD_S3_BUCKET=publicbucket

# Default to the test oidcprovider container for Open ID Connect
#
Expand Down
4 changes: 2 additions & 2 deletions tecken/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,8 @@ def possible_upload_urls(request):
context = {
"urls": [
{
"url": upload_backend.url,
"bucket_name": upload_backend.name,
"bucket_name": upload_backend.bucket,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This gets used by the "upload now" ui view. I bet we could remove this ui view and the API endpoints it uses. I wrote up a bug to consider doing that.

https://bugzilla.mozilla.org/show_bug.cgi?id=1909013

"prefix": upload_backend.prefix,
"default": True,
}
]
Expand Down
25 changes: 7 additions & 18 deletions tecken/base/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.

import json
import time
from urllib.parse import urlparse, urlunparse

Expand All @@ -22,6 +21,8 @@

import redis.exceptions

from tecken.base import symbolstorage


ACTION_TO_NAME = {ADDITION: "add", CHANGE: "change", DELETION: "delete"}

Expand Down Expand Up @@ -117,25 +118,13 @@ def clean_url(value):
for key in keys:
value = getattr(settings, key)
context["settings"].append({"key": key, "value": value})

# Now for some oddballs
context["settings"].append(
{"key": "UPLOAD_DEFAULT_URL", "value": clean_url(settings.UPLOAD_DEFAULT_URL)}
)
context["settings"].append(
{
"key": "UPLOAD_TRY_SYMBOLS_URL",
"value": clean_url(settings.UPLOAD_TRY_SYMBOLS_URL),
}
)
context["settings"].append(
{
"key": "SYMBOL_URLS",
"value": json.dumps([clean_url(x) for x in settings.SYMBOL_URLS]),
}
)
context["settings"].sort(key=lambda x: x["key"])

context["backends"] = [
(backend.__class__.__name__, backend)
for backend in symbolstorage.SYMBOL_STORAGE.get_download_backends(True)
]

# Get some table counts
tables = [
"auth_user",
Expand Down
7 changes: 1 addition & 6 deletions tecken/base/apps.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
# file, You can obtain one at https://mozilla.org/MPL/2.0/.

from django.apps import AppConfig
from django.conf import settings

from tecken.base import symbolstorage

Expand All @@ -12,8 +11,4 @@ class BaseAppConfig(AppConfig):
name = "tecken.base"

def ready(self):
symbolstorage.SYMBOL_STORAGE = symbolstorage.SymbolStorage(
upload_url=settings.UPLOAD_DEFAULT_URL,
download_urls=settings.SYMBOL_URLS,
try_url=settings.UPLOAD_TRY_SYMBOLS_URL,
)
symbolstorage.SYMBOL_STORAGE = symbolstorage.SymbolStorage.from_settings()
61 changes: 29 additions & 32 deletions tecken/base/symbolstorage.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,61 +5,58 @@
import logging
from typing import Optional

from django.conf import settings
from django.utils import timezone

from tecken.libmarkus import METRICS
from tecken.ext.s3.storage import S3Storage
from tecken.libstorage import ObjectMetadata, StorageBackend
from tecken.libstorage import ObjectMetadata, StorageBackend, backend_from_config


logger = logging.getLogger("tecken")


class SymbolStorage:
"""Persistent wrapper around multiple StorageBackend instances."""
"""Persistent wrapper around multiple StorageBackend instances.

:arg upload_backend: The upload and download backend for regular storage.
:arg try_upload_backend: The upload and download backend for try storage.
:arg download_backends: Additional download backends.
"""

def __init__(
self, upload_url: str, download_urls: list[str], try_url: Optional[str] = None
self,
upload_backend: StorageBackend,
try_upload_backend: StorageBackend,
download_backends: list[StorageBackend],
):
# The upload backend for regular storage.
self.upload_backend: StorageBackend = S3Storage(upload_url)

# All additional download backends, except for the regular upload backend.
download_backends = [
S3Storage(url) for url in download_urls if url != upload_url
]

# All backends
self.backends: list[StorageBackend] = [self.upload_backend, *download_backends]

# The try storage backend for both upload and download, if any.
if try_url is None:
self.try_backend: Optional[StorageBackend] = None
else:
self.try_backend: Optional[StorageBackend] = S3Storage(
try_url, try_symbols=True
)
self.backends.append(self.try_backend)
self.upload_backend = upload_backend
self.try_upload_backend = try_upload_backend
self.backends = [upload_backend, try_upload_backend, *download_backends]

@classmethod
def from_settings(cls):
upload_backend = backend_from_config(settings.UPLOAD_BACKEND)
try_upload_backend = backend_from_config(settings.TRY_UPLOAD_BACKEND)
download_backends = list(map(backend_from_config, settings.DOWNLOAD_BACKENDS))
return cls(upload_backend, try_upload_backend, download_backends)

def __repr__(self):
urls = [backend.url for backend in self.backends]
return f"<{self.__class__.__name__} urls={urls}>"
backend_reprs = " ".join(map(repr, self.backends))
return f"<{self.__class__.__name__} backends: {backend_reprs}>"

def get_download_backends(self, try_storage: bool) -> list[StorageBackend]:
"""Return a list of all download backends.

Includes the try backend if `try_storage` is set to `True`.
"""
return [
backend
for backend in self.backends
if try_storage or not backend.try_symbols
]
if try_storage:
return self.backends
return [backend for backend in self.backends if not backend.try_symbols]

def get_upload_backend(self, try_storage: bool) -> StorageBackend:
"""Return either the regular or the try upload backends."""
if try_storage:
return self.try_backend
return self.try_upload_backend
return self.upload_backend

@staticmethod
Expand Down Expand Up @@ -100,6 +97,6 @@ def get_metadata(


def symbol_storage() -> Optional[SymbolStorage]:
"""Return the global SymbolStorage instance for regular storage."""
"""Return the global SymbolStorage instance."""
# This function exists to make it easier to patch the SymbolStorage singleton in tests.
return SYMBOL_STORAGE
22 changes: 22 additions & 0 deletions tecken/base/templates/admin/site_status.html
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,28 @@ <h2>Settings</h2>
</tbody>
</table>

<h2>Backends</h2>
<table>
<thead>
<tr>
<th>class</th>
<th>bucket</th>
<th>prefix</th>
<th>try_symbols</th>
</tr>
</thead>
<tbody>
{% for class_name, item in backends %}
<tr>
<td>{{ class_name }}</td>
<td>{{ item.bucket }}</td>
<td>{{ item.prefix }}</td>
<td>{{ item.try_symbols }}</td>
</tr>
{% endfor %}
</tbody>
</table>

<h2>Table counts</h2>
<table>
<thead>
Expand Down
3 changes: 3 additions & 0 deletions tecken/ext/gcs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
148 changes: 148 additions & 0 deletions tecken/ext/gcs/storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.

from io import BufferedReader
import threading
from typing import Optional
from urllib.parse import quote

from django.conf import settings

from google.api_core.client_options import ClientOptions
from google.api_core.exceptions import ClientError, NotFound
from google.cloud import storage

from tecken.libstorage import ObjectMetadata, StorageBackend, StorageError


class GCSStorage(StorageBackend):
"""
An implementation of the StorageBackend interface for Google Cloud Storage.
"""

def __init__(
self,
bucket: str,
prefix: str,
try_symbols: bool = False,
endpoint_url: Optional[str] = None,
):
self.bucket = bucket
self.prefix = prefix
self.try_symbols = try_symbols
self.endpoint_url = endpoint_url
self.clients = threading.local()
# The Cloud Storage client doesn't support setting global timeouts for all requests, so we
# need to pass the timeout for every single request. the default timeout is 60 seconds for
# both connecting and reading from the socket.
self.timeout = (settings.STORAGE_CONNECT_TIMEOUT, settings.STORAGE_READ_TIMEOUT)

def __repr__(self):
return f"<{self.__class__.__name__} gs://{self.bucket}/{self.prefix}> try:{self.try_symbols}"

def _get_client(self) -> storage.Client:
"""Return a thread-local low-level storage client."""
if not hasattr(self.clients, "client"):
options = ClientOptions(api_endpoint=self.endpoint_url)
self.clients.client = storage.Client(client_options=options)
return self.clients.client

def _get_bucket(self) -> storage.Bucket:
"""Return a thread-local low-level storage bucket client."""
if not hasattr(self.clients, "bucket"):
client = self._get_client()
try:
self.clients.bucket = client.get_bucket(
self.bucket, timeout=self.timeout
)
except NotFound as exc:
raise StorageError(str(exc), backend=self) from exc
return self.clients.bucket

def exists(self) -> bool:
"""Check that this storage exists.

:returns: True if the storage exists and False if not

:raises StorageError: an unexpected backend-specific error was raised
"""
try:
self._get_bucket()
except StorageError:
return False
except ClientError as exc:
raise StorageError(str(exc), backend=self) from exc
return True

def get_download_url(self, key: str) -> str:
"""Return the download URL for the given key."""
endpoint_url = self.endpoint_url or self._get_client().meta.endpoint_url
endpoint_url = endpoint_url.removesuffix("/")
return f"{endpoint_url}/{self.bucket}/{self.prefix}/{quote(key)}"

def get_object_metadata(self, key: str) -> Optional[ObjectMetadata]:
"""Return object metadata for the object with the given key.

:arg key: the key of the symbol file not including the prefix, i.e. the key in the format
``<debug-file>/<debug-id>/<symbols-file>``.

:returns: An OjbectMetadata instance if the object exist, None otherwise.

:raises StorageError: an unexpected backend-specific error was raised
"""
bucket = self._get_bucket()
gcs_key = f"{self.prefix}/{key}"
try:
blob = bucket.get_blob(gcs_key, timeout=self.timeout)
if not blob:
return None
except ClientError as exc:
raise StorageError(str(exc), backend=self) from exc
gcs_metadata = blob.metadata or {}
original_content_length = gcs_metadata.get("original_size")
if original_content_length is not None:
try:
original_content_length = int(original_content_length)
except ValueError:
original_content_length = None
metadata = ObjectMetadata(
download_url=blob.public_url,
content_type=blob.content_type,
content_length=blob.size,
content_encoding=blob.content_encoding,
original_content_length=original_content_length,
original_md5_sum=gcs_metadata.get("original_md5_hash"),
last_modified=blob.updated,
)
return metadata

def upload(self, key: str, body: BufferedReader, metadata: ObjectMetadata):
"""Upload the object with the given key and body to the storage backend.

:arg key: the key of the symbol file not including the prefix, i.e. the key in the format
``<debug-file>/<debug-id>/<symbols-file>``.
:arg body: A stream yielding the symbols file contents.
:arg metadata: An ObjectMetadata instance with the metadata.

:raises StorageError: an unexpected backend-specific error was raised
"""
bucket = self._get_bucket()
blob = bucket.blob(f"{self.prefix}/{key}")
gcs_metadata = {}
if metadata.original_content_length:
# All metadata values must be strings.
gcs_metadata["original_size"] = str(metadata.original_content_length)
if metadata.original_md5_sum:
gcs_metadata["original_md5_hash"] = metadata.original_md5_sum
blob.metadata = gcs_metadata
blob.content_type = metadata.content_type
blob.content_encoding = metadata.content_encoding
# Prevent "decompressive transcoding" on download. See bug 1827506.
blob.cache_control = "no-transform"
try:
blob.upload_from_file(
body, size=metadata.content_length, timeout=self.timeout
)
except ClientError as exc:
raise StorageError(str(exc), backend=self) from exc
Loading