Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BACK-1720: implement HEAD requests for metadata fetcher #92

Merged
merged 2 commits into from
Oct 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions offchain/metadata/adapters/arweave.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def parse_ar_url(self, url: str) -> str:
return url

async def gen_send(self, url: str, sess: httpx.AsyncClient(), *args, **kwargs) -> httpx.Response: # type: ignore[no-untyped-def, valid-type] # noqa: E501
"""Format and send async request to ARWeave host.
"""Format and send an async `GET` request to ARWeave host at parsed url.

Args:
url (str): url to send request to
Expand All @@ -72,7 +72,7 @@ async def gen_send(self, url: str, sess: httpx.AsyncClient(), *args, **kwargs) -
return await sess.get(self.parse_ar_url(url), timeout=self.timeout, follow_redirects=True) # type: ignore[no-any-return] # noqa: E501

def send(self, request: PreparedRequest, *args, **kwargs) -> Response: # type: ignore[no-untyped-def] # noqa: E501
"""Format and send request to ARWeave host.
"""Format and send a `GET` request to ARWeave host at parsed url.

Args:
request (PreparedRequest): incoming request
Expand All @@ -83,3 +83,15 @@ def send(self, request: PreparedRequest, *args, **kwargs) -> Response: # type:
request.url = self.parse_ar_url(request.url) # type: ignore[arg-type]
kwargs["timeout"] = self.timeout
return super().send(request, *args, **kwargs)

async def gen_head(self, url: str, sess: httpx.AsyncClient(), *args, **kwargs) -> httpx.Response: # type: ignore[no-untyped-def, valid-type] # noqa: E501
"""Format and send an async `HEAD` request to ARWeave host at parsed url.

Args:
url (str): url to send request to
sess (httpx.AsyncClient()): async client

Returns:
httpx.Response: response from ARWeave host.
"""
return await sess.head(self.parse_ar_url(url), timeout=self.timeout, follow_redirects=True) # type: ignore[no-any-return] # noqa: E501
31 changes: 29 additions & 2 deletions offchain/metadata/adapters/base_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,22 @@ def __init__(self, *args, **kwargs): # type: ignore[no-untyped-def]
super().__init__()

async def gen_send(self, url: str, *args, **kwargs) -> httpx.Response: # type: ignore[no-untyped-def] # noqa: E501
"""Format and send async request to url host.
"""
Format and send an async `GET` request to url host.
Abstract method, implemented in subclasses.

Args:
url (str): url to send request to

Returns:
httpx.Response: response from host.
"""
raise NotImplementedError

async def gen_head(self, url: str, *args, **kwargs) -> httpx.Response: # type: ignore[no-untyped-def] # noqa: E501
"""
Format and send an async `HEAD` request to url host.
Abstract method, implemented in subclasses.

Args:
url (str): url to send request to
Expand All @@ -40,7 +55,7 @@ def __init__( # type: ignore[no-untyped-def]
super().__init__(pool_connections, pool_maxsize, max_retries, pool_block)

async def gen_send(self, url: str, sess: httpx.AsyncClient(), *args, **kwargs) -> httpx.Response: # type: ignore[no-untyped-def, valid-type] # noqa: E501
"""Format and send async request to url host.
"""Format and send an async `GET` request to url host.

Args:
url (str): url to send request to
Expand All @@ -50,6 +65,18 @@ async def gen_send(self, url: str, sess: httpx.AsyncClient(), *args, **kwargs) -
"""
return await sess.get(url, follow_redirects=True) # type: ignore[no-any-return]

async def gen_head(self, url: str, sess: httpx.AsyncClient(), *args, **kwargs) -> httpx.Response: # type: ignore[no-untyped-def, valid-type] # noqa: E501
"""Format and send an async `HEAD` request to url host.

Args:
url (str): url to send request to
sess (httpx.AsyncClient()): async client

Returns:
httpx.Response: response from host.
"""
return await sess.head(url, follow_redirects=True) # type: ignore[no-any-return]


Adapter = Union[BaseAdapter, HTTPAdapter]

Expand Down
25 changes: 23 additions & 2 deletions offchain/metadata/adapters/data_uri.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import base64
from email.message import Message
from urllib.request import urlopen

import httpx
Expand Down Expand Up @@ -28,7 +29,7 @@ def __init__(self, *args, **kwargs): # type: ignore[no-untyped-def]
super().__init__(*args, **kwargs) # type: ignore[no-untyped-call]

async def gen_send(self, url: str, *args, **kwargs) -> httpx.Response: # type: ignore[no-untyped-def] # noqa: E501
"""Handle async data uri request.
"""Handle async data uri `GET` request.

Args:
url (str): url
Expand All @@ -44,7 +45,7 @@ async def gen_send(self, url: str, *args, **kwargs) -> httpx.Response: # type:
return response

def send(self, request: PreparedRequest, *args, **kwargs): # type: ignore[no-untyped-def] # noqa: E501
"""Handle data uri request.
"""Handle data uri `GET` request.

Args:
request (PreparedRequest): incoming request
Expand All @@ -66,5 +67,25 @@ def send(self, request: PreparedRequest, *args, **kwargs): # type: ignore[no-un
finally:
return newResponse

async def gen_head(self, url: str, *args, **kwargs) -> httpx.Response: # type: ignore[no-untyped-def] # noqa: E501
"""Handle async data uri `HEAD` request.

Args:
url (str): url

Returns:
httpx.Response: encoded data uri response.
"""
response_headers = {}
with urlopen(url) as r:
message: Message = r.info()
response_headers = dict(message._headers) # type: ignore[attr-defined]
response = httpx.Response(
status_code=200,
headers=response_headers,
request=httpx.Request(method="HEAD", url=url),
)
return response

def close(self): # type: ignore[no-untyped-def]
self.response.close()
14 changes: 13 additions & 1 deletion offchain/metadata/adapters/ipfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def make_request_url(self, request_url: str, gateway: Optional[str] = None) -> s
return build_request_url(gateway=gateway, request_url=request_url)

async def gen_send(self, url: str, sess: httpx.AsyncClient(), *args, **kwargs) -> httpx.Response: # type: ignore[no-untyped-def, valid-type] # noqa: E501
"""Format and send async request to IPFS host.
"""Format and send an async `GET` request to IPFS host.

Args:
url (str): url to send request to
Expand All @@ -121,3 +121,15 @@ def send(self, request: PreparedRequest, *args, **kwargs) -> Response: # type:

kwargs["timeout"] = self.timeout
return super().send(request, *args, **kwargs)

async def gen_head(self, url: str, sess: httpx.AsyncClient(), *args, **kwargs) -> httpx.Response: # type: ignore[no-untyped-def, valid-type] # noqa: E501
"""Format and send an async `HEAD` request to IPFS host.

Args:
url (str): url to send request to
sess (httpx.AsyncClient()): async client session

Returns:
httpx.Response: response from IPFS host.
"""
return await sess.head(self.make_request_url(url), timeout=self.timeout, follow_redirects=True) # type: ignore[no-any-return] # noqa: E501
25 changes: 16 additions & 9 deletions offchain/metadata/fetchers/metadata_fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def _head(self, uri: str): # type: ignore[no-untyped-def]
def _get(self, uri: str): # type: ignore[no-untyped-def]
return self.sess.get(uri, timeout=self.timeout, allow_redirects=True)

async def _gen(self, uri: str) -> httpx.Response:
async def _gen(self, uri: str, method: Optional[str] = "GET") -> httpx.Response:
from offchain.metadata.pipelines.metadata_pipeline import (
DEFAULT_ADAPTER_CONFIGS,
)
Expand All @@ -78,13 +78,21 @@ async def _gen(self, uri: str) -> httpx.Response:
adapter = adapter_config.adapter_cls(
host_prefixes=adapter_config.host_prefixes, **adapter_config.kwargs
)
return await adapter.gen_send(
url=uri, timeout=self.timeout, sess=self.async_sess
)
if method == "HEAD":
return await adapter.gen_head(
url=uri, timeout=self.timeout, sess=self.async_sess
)
else:
return await adapter.gen_send(
url=uri, timeout=self.timeout, sess=self.async_sess
)
return await self.async_sess.get(
uri, timeout=self.timeout, follow_redirects=True
)

async def _gen_head(self, uri: str) -> httpx.Response:
return await self._gen(uri=uri, method="HEAD")

def fetch_mime_type_and_size(self, uri: str) -> Tuple[str, int]:
"""Fetch the mime type and size of the content at a given uri.

Expand Down Expand Up @@ -123,11 +131,10 @@ async def gen_fetch_mime_type_and_size(self, uri: str) -> Tuple[str, int]:
tuple[str, int]: mime type and size
"""
try:
# try skip head request
# res = await self._gen_head(uri)
# # For any error status, try a get
# if 300 <= res.status_code < 600:
res = await self._gen(uri)
res = await self._gen_head(uri)
# For any error status, try a get
if 300 <= res.status_code < 600:
res = await self._gen(uri)
res.raise_for_status()
headers = res.headers
size = headers.get("content-length", 0)
Expand Down
Loading
Loading