Skip to content

Commit

Permalink
Merge pull request #94 from ourzora/BACK-1791
Browse files Browse the repository at this point in the history
BACK-1791: ensure prop IPFS gateway gets used instead of pinata
  • Loading branch information
zylora authored Nov 10, 2023
2 parents 199fec0 + d607902 commit b88c916
Show file tree
Hide file tree
Showing 9 changed files with 284 additions and 140 deletions.
3 changes: 2 additions & 1 deletion offchain/metadata/adapters/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from .arweave import ARWeaveAdapter
from .base_adapter import AdapterConfig, BaseAdapter
from .base_adapter import Adapter, AdapterConfig, BaseAdapter
from .data_uri import DataURIAdapter
from .default_adapter_configs import DEFAULT_ADAPTER_CONFIGS
from .http_adapter import HTTPAdapter
from .ipfs import IPFSAdapter
3 changes: 2 additions & 1 deletion offchain/metadata/adapters/data_uri.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ def send(self, request: PreparedRequest, *args, **kwargs): # type: ignore[no-un
newResponse.status_code = 200
newResponse.headers = response.headers
newResponse.raw = response
newResponse.encoding = "utf-8"
newResponse._content = response.read()
newResponse.encoding = response.info().get_param("charset") or "utf-8"
self.response = response
finally:
return newResponse
Expand Down
30 changes: 30 additions & 0 deletions offchain/metadata/adapters/default_adapter_configs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from offchain.metadata.adapters.arweave import ARWeaveAdapter
from offchain.metadata.adapters.base_adapter import AdapterConfig
from offchain.metadata.adapters.data_uri import DataURIAdapter
from offchain.metadata.adapters.http_adapter import HTTPAdapter
from offchain.metadata.adapters.ipfs import IPFSAdapter

DEFAULT_ADAPTER_CONFIGS: list[AdapterConfig] = [
AdapterConfig(
adapter_cls=ARWeaveAdapter,
mount_prefixes=["ar://"],
host_prefixes=["https://arweave.net/"],
kwargs={"pool_connections": 100, "pool_maxsize": 1000, "max_retries": 0},
),
AdapterConfig(adapter_cls=DataURIAdapter, mount_prefixes=["data:"]),
AdapterConfig(
adapter_cls=IPFSAdapter,
mount_prefixes=[
"ipfs://",
"https://gateway.pinata.cloud/",
"https://ipfs.io/",
],
host_prefixes=["https://gateway.pinata.cloud/ipfs/"],
kwargs={"pool_connections": 100, "pool_maxsize": 1000, "max_retries": 0},
),
AdapterConfig(
adapter_cls=HTTPAdapter,
mount_prefixes=["https://", "http://"],
kwargs={"pool_connections": 100, "pool_maxsize": 1000, "max_retries": 0},
),
]
3 changes: 2 additions & 1 deletion offchain/metadata/fetchers/base_fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ def set_max_retries(self, new_max_retries: int): # type: ignore[no-untyped-def]
pass

def register_adapter(self, adapter: Adapter, url_prefix: str): # type: ignore[no-untyped-def] # noqa: E501
"""Register an adapter to a url prefix.
"""Register an adapter to a url prefix. Note this only affects synchronous http
requests (via the requests library).
Args:
adapter (Adapter): an Adapter instance to register.
Expand Down
58 changes: 34 additions & 24 deletions offchain/metadata/fetchers/metadata_fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import requests

from offchain.logger.logging import logger
from offchain.metadata.adapters.base_adapter import Adapter, AdapterConfig
from offchain.metadata.adapters import Adapter, AdapterConfig, DEFAULT_ADAPTER_CONFIGS
from offchain.metadata.fetchers.base_fetcher import BaseFetcher
from offchain.metadata.registries.fetcher_registry import FetcherRegistry

Expand All @@ -24,7 +24,7 @@ def __init__(
self,
timeout: int = 30,
max_retries: int = 0,
async_adapter_configs: Optional[list[AdapterConfig]] = None,
async_adapter_configs: Optional[list[AdapterConfig]] = DEFAULT_ADAPTER_CONFIGS,
) -> None:
self.timeout = timeout
self.max_retries = max_retries
Expand All @@ -33,7 +33,8 @@ def __init__(
self.async_adapter_configs = async_adapter_configs

def register_adapter(self, adapter: Adapter, url_prefix: str): # type: ignore[no-untyped-def] # noqa: E501
"""Register an adapter to a url prefix.
"""Register an adapter to a url prefix. Note this only affects synchronous http
requests (via the requests library).
Args:
adapter (Adapter): an Adapter instance to register.
Expand All @@ -57,35 +58,44 @@ def set_timeout(self, timeout: int): # type: ignore[no-untyped-def]
"""
self.timeout = timeout

def _get_async_adapter_for_uri(self, uri: str) -> Optional[Adapter]:
if self.async_adapter_configs is None:
logger.error("Async adapter config doesn't exist. This shouldn't happen!")
return None

for async_adapter_config in self.async_adapter_configs:
if any(
uri.startswith(prefix) for prefix in async_adapter_config.mount_prefixes
):
logger.debug(
f"Selected {async_adapter_config.adapter_cls.__name__} for making async http requests for uri={uri}" # noqa: E501
)
return async_adapter_config.adapter_cls(
host_prefixes=async_adapter_config.host_prefixes,
**async_adapter_config.kwargs,
)
logger.warning(
f"Unable to selected an adapter for async http requests for uri={uri}"
)
return None

def _head(self, uri: str): # type: ignore[no-untyped-def]
return self.sess.head(uri, timeout=self.timeout, allow_redirects=True)

def _get(self, uri: str): # type: ignore[no-untyped-def]
return self.sess.get(uri, timeout=self.timeout, allow_redirects=True)

async def _gen(self, uri: str, method: Optional[str] = "GET") -> httpx.Response:
from offchain.metadata.pipelines.metadata_pipeline import (
DEFAULT_ADAPTER_CONFIGS,
)

configs = DEFAULT_ADAPTER_CONFIGS

if self.async_adapter_configs:
configs = self.async_adapter_configs

for adapter_config in configs:
if any(uri.startswith(prefix) for prefix in adapter_config.mount_prefixes):
adapter = adapter_config.adapter_cls(
host_prefixes=adapter_config.host_prefixes, **adapter_config.kwargs
async_adapter = self._get_async_adapter_for_uri(uri)
if async_adapter is not None:
if method == "HEAD":
return await async_adapter.gen_head(
url=uri, timeout=self.timeout, sess=self.async_sess
)
else:
return await async_adapter.gen_send(
url=uri, timeout=self.timeout, sess=self.async_sess
)
if method == "HEAD":
return await adapter.gen_head(
url=uri, timeout=self.timeout, sess=self.async_sess
)
else:
return await adapter.gen_send(
url=uri, timeout=self.timeout, sess=self.async_sess
)
return await self.async_sess.get(
uri, timeout=self.timeout, follow_redirects=True
)
Expand Down
17 changes: 11 additions & 6 deletions offchain/metadata/pipelines/metadata_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from offchain.metadata.registries.parser_registry import ParserRegistry
from offchain.web3.contract_caller import ContractCaller

# TODO(luke): move the data repo's usage of this symbol to the new file, then remove this
DEFAULT_ADAPTER_CONFIGS: list[AdapterConfig] = [
AdapterConfig(
adapter_cls=ARWeaveAdapter,
Expand All @@ -31,11 +32,6 @@
kwargs={"pool_connections": 100, "pool_maxsize": 1000, "max_retries": 0},
),
AdapterConfig(adapter_cls=DataURIAdapter, mount_prefixes=["data:"]),
AdapterConfig(
adapter_cls=HTTPAdapter,
mount_prefixes=["https://", "http://"],
kwargs={"pool_connections": 100, "pool_maxsize": 1000, "max_retries": 0},
),
AdapterConfig(
adapter_cls=IPFSAdapter,
mount_prefixes=[
Expand All @@ -46,6 +42,11 @@
host_prefixes=["https://gateway.pinata.cloud/ipfs/"],
kwargs={"pool_connections": 100, "pool_maxsize": 1000, "max_retries": 0},
),
AdapterConfig(
adapter_cls=HTTPAdapter,
mount_prefixes=["https://", "http://"],
kwargs={"pool_connections": 100, "pool_maxsize": 1000, "max_retries": 0},
),
]

DEFAULT_PARSERS = (
Expand All @@ -66,7 +67,7 @@ class MetadataPipeline(BasePipeline):
mime type, and size by making network requests.
parsers (list[BaseParser], optional): a list of parser instances for parsing token metadata.
adapter_configs: (list[AdapterConfig], optional): a list of adapter configs used to register adapters
to specified url prefixes.
to specified url prefixes. This configuration affects both sync and async requests.
""" # noqa: E501

def __init__(
Expand All @@ -79,6 +80,10 @@ def __init__(
self.contract_caller = contract_caller or ContractCaller()
self.fetcher = fetcher or MetadataFetcher(async_adapter_configs=adapter_configs)
if adapter_configs is None:
# TODO(luke): move the line below to the file's import section once this
# file's DEFAULT_ADAPTER_CONFIGS is gone
from offchain.metadata.adapters import DEFAULT_ADAPTER_CONFIGS

adapter_configs = DEFAULT_ADAPTER_CONFIGS
for adapter_config in adapter_configs:
self.mount_adapter(
Expand Down
2 changes: 1 addition & 1 deletion offchain/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ async def wrapped(*args, **kwargs): # type: ignore[no-untyped-def]
logger.error(msg)
if not silent:
raise
logger.warn(msg)
logger.warning(msg)
await asyncio.sleep(retry_delay)
return None

Expand Down
237 changes: 137 additions & 100 deletions tests/metadata/fetchers/test_metadata_fetcher.py

Large diffs are not rendered by default.

71 changes: 65 additions & 6 deletions tests/metadata/pipelines/test_metadata_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
# flake8: noqa: E501

from pytest_httpx import HTTPXMock
from typing import Tuple
from unittest.mock import AsyncMock, MagicMock

import pytest

from offchain.metadata.adapters.http_adapter import HTTPAdapter
from offchain.metadata.adapters.ipfs import IPFSAdapter
from offchain.metadata.adapters import (
AdapterConfig,
DEFAULT_ADAPTER_CONFIGS,
HTTPAdapter,
IPFSAdapter,
)
from offchain.metadata.fetchers.metadata_fetcher import MetadataFetcher
from offchain.metadata.models.metadata import (
Attribute,
Expand All @@ -18,10 +23,8 @@
)
from offchain.metadata.models.metadata_processing_error import MetadataProcessingError
from offchain.metadata.models.token import Token
from offchain.metadata.pipelines.metadata_pipeline import ( # type: ignore[attr-defined]
AdapterConfig,
MetadataPipeline,
)
from offchain.metadata.pipelines.metadata_pipeline import MetadataPipeline

from offchain.web3.contract_caller import ContractCaller
from offchain.web3.jsonrpc import EthereumJSONRPC

Expand Down Expand Up @@ -59,6 +62,62 @@ def test_metadata_pipeline_mounts_adapters(self): # type: ignore[no-untyped-def
== ipfs_adapter
)

@pytest.mark.asyncio
async def test_ipfs_adapter_uses_specified_ipfs_provider(
self, httpx_mock: HTTPXMock
):
# integration test, the following setup reflects usage in prod
IPFS_PROVIDER = "https://ipfs.decentralized-content.com/ipfs/"

def set_async_adapters() -> list[AdapterConfig]:
async_adapters = []
for adapter in DEFAULT_ADAPTER_CONFIGS:
if adapter.adapter_cls is IPFSAdapter:
ipfs_adapter = AdapterConfig(
adapter_cls=IPFSAdapter,
mount_prefixes=[
"ipfs://",
"https://gateway.pinata.cloud/",
"https://ipfs.io/",
"https://ipfs.decentralized-content.com/",
],
host_prefixes=[IPFS_PROVIDER],
)
async_adapters.append(ipfs_adapter)

else:
async_adapters.append(adapter)

return async_adapters

adapters = set_async_adapters()
pipeline = MetadataPipeline(adapter_configs=adapters)

httpx_mock.add_response(
json=[
{
"name": "Beast #485",
"image": "https://gateway.pinata.cloud/ipfs/QmcimtwbWGKXLJ3pTMRu2ncEeeuK9DUwYye6uhJhZC9C6A/beast485.png",
"external_url": "https://tierzeronft.com/",
"attributes": [
{"trait_type": "Background", "value": "Blue"},
{"trait_type": "Fur", "value": "Dark Grey"},
{"trait_type": "Shoes", "value": "Feet"},
{"trait_type": "Eyes", "value": "Green"},
{"trait_type": "Hat", "value": "Headset"},
{"trait_type": "Unit", "value": "Unit I"},
],
}
],
url=f"{IPFS_PROVIDER}QmY3Lz7DfQPtPkK4n5StZcqc2zA6cmJC7wcAgzYXvGQLGm/485",
)
content = await pipeline.fetcher.gen_fetch_content(
"https://gateway.pinata.cloud/ipfs/QmY3Lz7DfQPtPkK4n5StZcqc2zA6cmJC7wcAgzYXvGQLGm/485"
)
assert (
content is not None
), "Call to gateway.pinata.cloud did not get redirected to ipfs.decentralized-content.com"

def test_metadata_pipeline_fetch_token_uri(self, raw_crypto_coven_metadata): # type: ignore[no-untyped-def]
token = Token(
chain_identifier="ETHEREUM-MAINNET",
Expand Down

0 comments on commit b88c916

Please sign in to comment.