Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BACK-1791: ensure prop IPFS gateway gets used instead of pinata #94

Merged
merged 1 commit into from
Nov 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion offchain/metadata/adapters/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from .arweave import ARWeaveAdapter
from .base_adapter import AdapterConfig, BaseAdapter
from .base_adapter import Adapter, AdapterConfig, BaseAdapter
from .data_uri import DataURIAdapter
from .default_adapter_configs import DEFAULT_ADAPTER_CONFIGS
from .http_adapter import HTTPAdapter
from .ipfs import IPFSAdapter
3 changes: 2 additions & 1 deletion offchain/metadata/adapters/data_uri.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ def send(self, request: PreparedRequest, *args, **kwargs): # type: ignore[no-un
newResponse.status_code = 200
newResponse.headers = response.headers
newResponse.raw = response
newResponse.encoding = "utf-8"
newResponse._content = response.read()
newResponse.encoding = response.info().get_param("charset") or "utf-8"
self.response = response
finally:
return newResponse
Expand Down
30 changes: 30 additions & 0 deletions offchain/metadata/adapters/default_adapter_configs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from offchain.metadata.adapters.arweave import ARWeaveAdapter
from offchain.metadata.adapters.base_adapter import AdapterConfig
from offchain.metadata.adapters.data_uri import DataURIAdapter
from offchain.metadata.adapters.http_adapter import HTTPAdapter
from offchain.metadata.adapters.ipfs import IPFSAdapter

DEFAULT_ADAPTER_CONFIGS: list[AdapterConfig] = [
AdapterConfig(
adapter_cls=ARWeaveAdapter,
mount_prefixes=["ar://"],
host_prefixes=["https://arweave.net/"],
kwargs={"pool_connections": 100, "pool_maxsize": 1000, "max_retries": 0},
),
AdapterConfig(adapter_cls=DataURIAdapter, mount_prefixes=["data:"]),
AdapterConfig(
adapter_cls=IPFSAdapter,
mount_prefixes=[
"ipfs://",
"https://gateway.pinata.cloud/",
"https://ipfs.io/",
],
host_prefixes=["https://gateway.pinata.cloud/ipfs/"],
kwargs={"pool_connections": 100, "pool_maxsize": 1000, "max_retries": 0},
),
AdapterConfig(
adapter_cls=HTTPAdapter,
mount_prefixes=["https://", "http://"],
kwargs={"pool_connections": 100, "pool_maxsize": 1000, "max_retries": 0},
),
]
3 changes: 2 additions & 1 deletion offchain/metadata/fetchers/base_fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ def set_max_retries(self, new_max_retries: int): # type: ignore[no-untyped-def]
pass

def register_adapter(self, adapter: Adapter, url_prefix: str): # type: ignore[no-untyped-def] # noqa: E501
"""Register an adapter to a url prefix.
"""Register an adapter to a url prefix. Note this only affects synchronous http
requests (via the requests library).

Args:
adapter (Adapter): an Adapter instance to register.
Expand Down
58 changes: 34 additions & 24 deletions offchain/metadata/fetchers/metadata_fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import requests

from offchain.logger.logging import logger
from offchain.metadata.adapters.base_adapter import Adapter, AdapterConfig
from offchain.metadata.adapters import Adapter, AdapterConfig, DEFAULT_ADAPTER_CONFIGS
from offchain.metadata.fetchers.base_fetcher import BaseFetcher
from offchain.metadata.registries.fetcher_registry import FetcherRegistry

Expand All @@ -24,7 +24,7 @@ def __init__(
self,
timeout: int = 30,
max_retries: int = 0,
async_adapter_configs: Optional[list[AdapterConfig]] = None,
async_adapter_configs: Optional[list[AdapterConfig]] = DEFAULT_ADAPTER_CONFIGS,
) -> None:
self.timeout = timeout
self.max_retries = max_retries
Expand All @@ -33,7 +33,8 @@ def __init__(
self.async_adapter_configs = async_adapter_configs

def register_adapter(self, adapter: Adapter, url_prefix: str): # type: ignore[no-untyped-def] # noqa: E501
"""Register an adapter to a url prefix.
"""Register an adapter to a url prefix. Note this only affects synchronous http
requests (via the requests library).

Args:
adapter (Adapter): an Adapter instance to register.
Expand All @@ -57,35 +58,44 @@ def set_timeout(self, timeout: int): # type: ignore[no-untyped-def]
"""
self.timeout = timeout

def _get_async_adapter_for_uri(self, uri: str) -> Optional[Adapter]:
if self.async_adapter_configs is None:
logger.error("Async adapter config doesn't exist. This shouldn't happen!")
return None

for async_adapter_config in self.async_adapter_configs:
if any(
uri.startswith(prefix) for prefix in async_adapter_config.mount_prefixes
):
logger.debug(
f"Selected {async_adapter_config.adapter_cls.__name__} for making async http requests for uri={uri}" # noqa: E501
)
return async_adapter_config.adapter_cls(
host_prefixes=async_adapter_config.host_prefixes,
**async_adapter_config.kwargs,
)
logger.warning(
f"Unable to selected an adapter for async http requests for uri={uri}"
)
return None

def _head(self, uri: str): # type: ignore[no-untyped-def]
return self.sess.head(uri, timeout=self.timeout, allow_redirects=True)

def _get(self, uri: str): # type: ignore[no-untyped-def]
return self.sess.get(uri, timeout=self.timeout, allow_redirects=True)

async def _gen(self, uri: str, method: Optional[str] = "GET") -> httpx.Response:
from offchain.metadata.pipelines.metadata_pipeline import (
DEFAULT_ADAPTER_CONFIGS,
)

configs = DEFAULT_ADAPTER_CONFIGS

if self.async_adapter_configs:
configs = self.async_adapter_configs

for adapter_config in configs:
if any(uri.startswith(prefix) for prefix in adapter_config.mount_prefixes):
adapter = adapter_config.adapter_cls(
host_prefixes=adapter_config.host_prefixes, **adapter_config.kwargs
async_adapter = self._get_async_adapter_for_uri(uri)
if async_adapter is not None:
if method == "HEAD":
return await async_adapter.gen_head(
url=uri, timeout=self.timeout, sess=self.async_sess
)
else:
return await async_adapter.gen_send(
url=uri, timeout=self.timeout, sess=self.async_sess
)
if method == "HEAD":
return await adapter.gen_head(
url=uri, timeout=self.timeout, sess=self.async_sess
)
else:
return await adapter.gen_send(
url=uri, timeout=self.timeout, sess=self.async_sess
)
return await self.async_sess.get(
uri, timeout=self.timeout, follow_redirects=True
)
Expand Down
17 changes: 11 additions & 6 deletions offchain/metadata/pipelines/metadata_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from offchain.metadata.registries.parser_registry import ParserRegistry
from offchain.web3.contract_caller import ContractCaller

# TODO(luke): move the data repo's usage of this symbol to the new file, then remove this
DEFAULT_ADAPTER_CONFIGS: list[AdapterConfig] = [
AdapterConfig(
adapter_cls=ARWeaveAdapter,
Expand All @@ -31,11 +32,6 @@
kwargs={"pool_connections": 100, "pool_maxsize": 1000, "max_retries": 0},
),
AdapterConfig(adapter_cls=DataURIAdapter, mount_prefixes=["data:"]),
AdapterConfig(
adapter_cls=HTTPAdapter,
mount_prefixes=["https://", "http://"],
kwargs={"pool_connections": 100, "pool_maxsize": 1000, "max_retries": 0},
),
AdapterConfig(
adapter_cls=IPFSAdapter,
mount_prefixes=[
Expand All @@ -46,6 +42,11 @@
host_prefixes=["https://gateway.pinata.cloud/ipfs/"],
kwargs={"pool_connections": 100, "pool_maxsize": 1000, "max_retries": 0},
),
AdapterConfig(
adapter_cls=HTTPAdapter,
mount_prefixes=["https://", "http://"],
kwargs={"pool_connections": 100, "pool_maxsize": 1000, "max_retries": 0},
),
]

DEFAULT_PARSERS = (
Expand All @@ -66,7 +67,7 @@ class MetadataPipeline(BasePipeline):
mime type, and size by making network requests.
parsers (list[BaseParser], optional): a list of parser instances for parsing token metadata.
adapter_configs: (list[AdapterConfig], optional): a list of adapter configs used to register adapters
to specified url prefixes.
to specified url prefixes. This configuration affects both sync and async requests.
""" # noqa: E501

def __init__(
Expand All @@ -79,6 +80,10 @@ def __init__(
self.contract_caller = contract_caller or ContractCaller()
self.fetcher = fetcher or MetadataFetcher(async_adapter_configs=adapter_configs)
if adapter_configs is None:
# TODO(luke): move the line below to the file's import section once this
# file's DEFAULT_ADAPTER_CONFIGS is gone
from offchain.metadata.adapters import DEFAULT_ADAPTER_CONFIGS

adapter_configs = DEFAULT_ADAPTER_CONFIGS
for adapter_config in adapter_configs:
self.mount_adapter(
Expand Down
2 changes: 1 addition & 1 deletion offchain/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ async def wrapped(*args, **kwargs): # type: ignore[no-untyped-def]
logger.error(msg)
if not silent:
raise
logger.warn(msg)
logger.warning(msg)
await asyncio.sleep(retry_delay)
return None

Expand Down
237 changes: 137 additions & 100 deletions tests/metadata/fetchers/test_metadata_fetcher.py

Large diffs are not rendered by default.

71 changes: 65 additions & 6 deletions tests/metadata/pipelines/test_metadata_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
# flake8: noqa: E501

from pytest_httpx import HTTPXMock
from typing import Tuple
from unittest.mock import AsyncMock, MagicMock

import pytest

from offchain.metadata.adapters.http_adapter import HTTPAdapter
from offchain.metadata.adapters.ipfs import IPFSAdapter
from offchain.metadata.adapters import (
AdapterConfig,
DEFAULT_ADAPTER_CONFIGS,
HTTPAdapter,
IPFSAdapter,
)
from offchain.metadata.fetchers.metadata_fetcher import MetadataFetcher
from offchain.metadata.models.metadata import (
Attribute,
Expand All @@ -18,10 +23,8 @@
)
from offchain.metadata.models.metadata_processing_error import MetadataProcessingError
from offchain.metadata.models.token import Token
from offchain.metadata.pipelines.metadata_pipeline import ( # type: ignore[attr-defined]
AdapterConfig,
MetadataPipeline,
)
from offchain.metadata.pipelines.metadata_pipeline import MetadataPipeline

from offchain.web3.contract_caller import ContractCaller
from offchain.web3.jsonrpc import EthereumJSONRPC

Expand Down Expand Up @@ -59,6 +62,62 @@ def test_metadata_pipeline_mounts_adapters(self): # type: ignore[no-untyped-def
== ipfs_adapter
)

@pytest.mark.asyncio
async def test_ipfs_adapter_uses_specified_ipfs_provider(
self, httpx_mock: HTTPXMock
):
# integration test, the following setup reflects usage in prod
IPFS_PROVIDER = "https://ipfs.decentralized-content.com/ipfs/"

def set_async_adapters() -> list[AdapterConfig]:
async_adapters = []
for adapter in DEFAULT_ADAPTER_CONFIGS:
if adapter.adapter_cls is IPFSAdapter:
ipfs_adapter = AdapterConfig(
adapter_cls=IPFSAdapter,
mount_prefixes=[
"ipfs://",
"https://gateway.pinata.cloud/",
"https://ipfs.io/",
"https://ipfs.decentralized-content.com/",
],
host_prefixes=[IPFS_PROVIDER],
)
async_adapters.append(ipfs_adapter)

else:
async_adapters.append(adapter)

return async_adapters

adapters = set_async_adapters()
pipeline = MetadataPipeline(adapter_configs=adapters)

httpx_mock.add_response(
json=[
{
"name": "Beast #485",
"image": "https://gateway.pinata.cloud/ipfs/QmcimtwbWGKXLJ3pTMRu2ncEeeuK9DUwYye6uhJhZC9C6A/beast485.png",
"external_url": "https://tierzeronft.com/",
"attributes": [
{"trait_type": "Background", "value": "Blue"},
{"trait_type": "Fur", "value": "Dark Grey"},
{"trait_type": "Shoes", "value": "Feet"},
{"trait_type": "Eyes", "value": "Green"},
{"trait_type": "Hat", "value": "Headset"},
{"trait_type": "Unit", "value": "Unit I"},
],
}
],
url=f"{IPFS_PROVIDER}QmY3Lz7DfQPtPkK4n5StZcqc2zA6cmJC7wcAgzYXvGQLGm/485",
)
content = await pipeline.fetcher.gen_fetch_content(
"https://gateway.pinata.cloud/ipfs/QmY3Lz7DfQPtPkK4n5StZcqc2zA6cmJC7wcAgzYXvGQLGm/485"
)
assert (
content is not None
), "Call to gateway.pinata.cloud did not get redirected to ipfs.decentralized-content.com"

def test_metadata_pipeline_fetch_token_uri(self, raw_crypto_coven_metadata): # type: ignore[no-untyped-def]
token = Token(
chain_identifier="ETHEREUM-MAINNET",
Expand Down
Loading