From 6b6096ce4f04b00fca80117d36a68968ac1b630c Mon Sep 17 00:00:00 2001 From: Steven Kalt Date: Mon, 1 Jul 2024 20:48:35 -0400 Subject: [PATCH 01/39] feat(wrappers/python): draft python wrapper --- wrappers/python/.editorconfig | 5 + wrappers/python/README.md | 1 + wrappers/python/pyproject.toml | 19 ++ .../python/src/pagefind_python/__init__.py | 18 ++ .../src/pagefind_python/index/__init__.py | 134 +++++++++++++++ wrappers/python/src/pagefind_python/py.typed | 0 .../src/pagefind_python/service/__init__.py | 144 ++++++++++++++++ .../src/pagefind_python/service/types.py | 162 ++++++++++++++++++ 8 files changed, 483 insertions(+) create mode 100644 wrappers/python/.editorconfig create mode 100644 wrappers/python/README.md create mode 100644 wrappers/python/pyproject.toml create mode 100644 wrappers/python/src/pagefind_python/__init__.py create mode 100644 wrappers/python/src/pagefind_python/index/__init__.py create mode 100644 wrappers/python/src/pagefind_python/py.typed create mode 100644 wrappers/python/src/pagefind_python/service/__init__.py create mode 100644 wrappers/python/src/pagefind_python/service/types.py diff --git a/wrappers/python/.editorconfig b/wrappers/python/.editorconfig new file mode 100644 index 00000000..4b8143be --- /dev/null +++ b/wrappers/python/.editorconfig @@ -0,0 +1,5 @@ +[*.{py,toml}] +indent_size = 4 +indent_style = space +trim_trailing_whitespace = true +insert_final_newline = true diff --git a/wrappers/python/README.md b/wrappers/python/README.md new file mode 100644 index 00000000..f03dd564 --- /dev/null +++ b/wrappers/python/README.md @@ -0,0 +1 @@ + diff --git a/wrappers/python/pyproject.toml b/wrappers/python/pyproject.toml new file mode 100644 index 00000000..c339b421 --- /dev/null +++ b/wrappers/python/pyproject.toml @@ -0,0 +1,19 @@ +[tool.poetry] +name = "pagefind_python" +version = "0.1.0" +description = "Python API for Pagefind" +authors = ["Your Name "] # TODO: add name and email +license = "MIT" +readme = "README.md" +include = [] # TODO: figure out path for local binaries **IN CI** + +[tool.poetry.dependencies] +python = ">=3.8" + +[tool.poetry.group.dev.dependencies] +ruff = "^0.5.0" +mypy = "^1.10.1" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/wrappers/python/src/pagefind_python/__init__.py b/wrappers/python/src/pagefind_python/__init__.py new file mode 100644 index 00000000..6f3619d0 --- /dev/null +++ b/wrappers/python/src/pagefind_python/__init__.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python3 +# assume the python version is >= 3.8, which is the oldest LTS version as of +# the time of writing, 2024-06-29 +from types import Union, Optional +from pathlib import Path +import subprocess +import enum +import sys +import platform +import asyncio + +from .service import * + +# TODO: __all__ = [] + + +# https://docs.python.org/3/reference/datamodel.html#async-context-managers +# https://docs.python.org/3/library/contextlib.html#contextlib.asynccontextmanager diff --git a/wrappers/python/src/pagefind_python/index/__init__.py b/wrappers/python/src/pagefind_python/index/__init__.py new file mode 100644 index 00000000..22511af2 --- /dev/null +++ b/wrappers/python/src/pagefind_python/index/__init__.py @@ -0,0 +1,134 @@ +from typing import Dict, List, Optional, NamedTuple, cast + +from ..service.types import ( + InternalAddFileRequest, + InternalAddRecordRequest, + InternalDeleteIndexRequest, + InternalGetFilesRequest, + InternalIndexedFileResponse, + InternalAddDirRequest, + InternalIndexedDirResponse, +) +from ..service import Service + + +class HtmlFile(NamedTuple): + content: str + """The source HTML content of the file to be parsed.""" + + source_path: Optional[str] = None + """ + The source path of the HTML file if it were to exist on disk. + Must be a relative path, or an absolute path within the current working directory. + Pagefind will compute the result URL from this path. + + If not supplied, url must be supplied. + + @example "about/index.html" + @example "/Users/user/Documents/site/about/index.html" + """ + + url: Optional[str] = None + """ + An explicit URL to use, instead of having Pagefind + compute the URL based on the sourcePath. + + If not supplied, source_path must be supplied. + + @example "/about/" + """ + + +class CustomRecord(NamedTuple): + """ + The data required for Pagefind to index a custom record that isn't backed by an HTML file + """ + + url: str + """The output URL of this record. Pagefind will not alter this.""" + + content: str + """The raw content of this record""" + + language: str + """What language is this record written in. Multiple languages will be split into separate indexes. Expects an ISO 639-1 code.""" + + meta: Optional[Dict[str, str]] = None + """The metadata to attach to this record. Supplying a `title` is highly recommended.""" + + filters: Optional[Dict[str, List[str]]] = None + """The filters to attach to this record. Filters are used to group records together.""" + + sort: Optional[Dict[str, str]] = None + """The sort keys to attach to this record.""" + + +class SiteDirectory(NamedTuple): + path: str + """The path to the directory to index. If relative, it's relative to the current working directory.""" + glob: Optional[str] = None + """Optionally, a custom glob to evaluate for finding files. Default to all HTML files.""" + + +class PagefindIndex: + def __init__(self, service: Service, index_id: int): + self._service = service + self.index_id = index_id + + async def add_html_file(self, html_file: HtmlFile) -> InternalIndexedFileResponse: + result = await self._service.send( + InternalAddFileRequest( + type="AddFile", + index_id=self.index_id, + url=html_file.url, + file_contents=html_file.content, + file_path=html_file.source_path, + ) + ) + assert result["type"] == "IndexedFile" + return cast(InternalIndexedFileResponse, result) + + async def add_directory( + self, directory: SiteDirectory + ) -> InternalIndexedDirResponse: + result = await self._service.send( + InternalAddDirRequest( + type="AddDir", + index_id=self.index_id, + path=directory.path, + glob=directory.glob, + ) + ) + assert result["type"] == "IndexedDir" + return cast(InternalIndexedDirResponse, result) + + async def get_files(self): + result = await self._service.send( + InternalGetFilesRequest(type="GetFiles", index_id=self.index_id) + ) + assert result["type"] == "GetFiles" + return cast(List[InternalIndexedFileResponse], result) + + async def delete_index(self): + result = await self._service.send( + InternalDeleteIndexRequest(type="DeleteIndex", index_id=self.index_id) + ) + assert result["type"] == "DeletedIndex" + + async def add_custom_record( + self, record: CustomRecord + ) -> InternalIndexedFileResponse: + result = await self._service.send( + InternalAddRecordRequest( + type="AddRecord", + index_id=self.index_id, + url=record.url, + content=record.content, + language=record.language, + meta=record.meta, + filters=record.filters, + sort=record.sort, + ) + ) + assert result["type"] == "IndexedFile" + return cast(InternalIndexedFileResponse, result) diff --git a/wrappers/python/src/pagefind_python/py.typed b/wrappers/python/src/pagefind_python/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/wrappers/python/src/pagefind_python/service/__init__.py b/wrappers/python/src/pagefind_python/service/__init__.py new file mode 100644 index 00000000..7be3e495 --- /dev/null +++ b/wrappers/python/src/pagefind_python/service/__init__.py @@ -0,0 +1,144 @@ +import os +import platform + +from pathlib import Path +import json +from contextlib import AbstractAsyncContextManager +from typing import Any, Dict, Optional, Union, cast + +from .types import ( + InternalRequestPayload, + InternalResponsePayload, + InternalResponseError, + InternalServiceRequest, + InternalServiceResponse, + InternalResponseType, +) +import asyncio +import base64 + + +# _bin: Optional[Path] = None + + +def _find_binary() -> Union[Path, None]: + # TODO: verify this is the correct path + this_dir = Path(__file__).parent + names = ["pagefind_extended", "pagefind"] + extensions = [""] + if platform.system().lower() == "Windows": + extensions.append(".exe") + result = None + for name in [n + ext for n in names for ext in extensions]: + if (bin := this_dir / name).exists(): + if not bin.is_file(): + raise FileNotFoundError(f"{bin} is not a file") + result = bin + break + return result + + +def _encode(req: InternalServiceRequest) -> bytes: + return base64.b64encode(json.dumps(req).encode("utf-8")) + + +class Service(AbstractAsyncContextManager["Service"]): + _bin: Path + _backend: asyncio.subprocess.Process + _message_id: int = 0 + _responses: Dict[int, asyncio.Future[InternalResponsePayload]] + _loop: asyncio.AbstractEventLoop + _poll_task: asyncio.Task[None] + + # _messages + def __init__(self): + self._loop = asyncio.get_event_loop() + _bin = _find_binary() + if _bin is None: + raise FileNotFoundError( + "Could not find `pagefind` or `pagefind_extended` binary" + ) + self._bin = _bin + + async def launch(self) -> "Service": + # TODO: detach process on windows? + # creation_flags: int = 0 + # if platform.system().lower() == "windows": + # creation_flags = subprocess.CREATE_NO_WINDOW | subprocess.CREATE_DETACHED + self._backend = await asyncio.create_subprocess_exec( + self._bin, + "--service", + cwd=os.getcwd(), + stdin=None, + stdout=None, + # creationflags=creation_flags, + ) + self._poll_task = self._loop.create_task(self.wait_for_responses()) + + return self + + async def send(self, payload: InternalRequestPayload) -> InternalResponsePayload: + self._message_id += 1 + message_id = self._message_id + if (_ := self._responses.get(message_id)) is not None: + raise KeyError(f"message_id {message_id} already in use") + else: + future: asyncio.Future[InternalResponsePayload] = self._loop.create_future() + self._responses[message_id] = future + # FIXME: check stdin not none? + if self._backend.stdin is None: + ... # restart the backend + await self.launch() + assert self._backend.stdin is not None + + req = InternalServiceRequest(message_id=message_id, payload=payload) + self._backend.stdin.write(_encode(req)) + await self._backend.stdin.drain() + + return await future + + async def wait_for_responses(self) -> None: + """ + Poll the subprocess's stdout for responses + """ + while True: + assert self._backend.stdout is not None + output = await self._backend.stdout.readuntil(b",") + if (resp := json.loads(base64.b64decode(output[:-1]))) is None: + continue + resp = cast(InternalServiceResponse, resp) + if (message_id := resp.get("message_id")) is not None: + assert self._message_id <= message_id, "message_id out of order" + if (future := self._responses.get(message_id)) is not None: + payload = resp["payload"] + if payload["type"] == InternalResponseType.ERROR.value: + exc = cast(InternalResponseError, payload) + future.set_exception( + Exception(exc["message"], exc.get("original_message")) + ) + else: + future.set_result(cast(InternalResponsePayload, payload)) + else: + payload = cast(InternalResponseError, resp["payload"]) + # assert ( + # payload["type"] == InternalResponseType.ERROR.value + # ), f"unexpected message type: {payload['type']}" + # FIXME: figure out how to surface the error + + async def close(self): + # wait for all _responses to be resolved + await asyncio.gather(*self._responses.values()) # IDEA: add timeout? + self._poll_task.cancel() + self._backend.terminate() + await self._backend.wait() + + async def __aenter__(self) -> "Service": + return await self.launch() + + async def __aexit__( + self, + exc_type: Optional[Any], + exc_value: Optional[Any], + traceback: Optional[Any], + ) -> None: + await self.close() diff --git a/wrappers/python/src/pagefind_python/service/types.py b/wrappers/python/src/pagefind_python/service/types.py new file mode 100644 index 00000000..a6bfc6e2 --- /dev/null +++ b/wrappers/python/src/pagefind_python/service/types.py @@ -0,0 +1,162 @@ +from enum import StrEnum +from typing import Dict, List, Union, Optional, TypedDict, Sequence, Literal + + +class InternalRequestType(StrEnum): + NEW_INDEX = "NewIndex" + ADD_FILE = "AddFile" + ADD_RECORD = "AddRecord" + ADD_DIR = "AddDir" + WRITE_FILES = "WriteFiles" + GET_FILES = "GetFiles" + DELETE_INDEX = "DeleteIndex" + + +class InternalPagefindServiceConfig(TypedDict): + root_selector: Optional[str] + exclude_selectors: Optional[Sequence[str]] + force_language: Optional[str] + verbose: Optional[bool] + logfile: Optional[str] + keep_index_url: Optional[bool] + + +class InternalNewIndexRequest(TypedDict): + type: Literal["NewIndex"] + config: Optional[InternalPagefindServiceConfig] + + +class InternalAddFileRequest(TypedDict): + type: Literal["AddFile"] + index_id: int + """index_id must be positive.""" + file_path: Optional[str] + url: Optional[str] + file_contents: str + + +class InternalAddRecordRequest(TypedDict): + type: Literal["AddRecord"] + index_id: int + """index_id must be positive.""" + url: str + content: str + language: str + meta: Optional[Dict[str, str]] + filters: Optional[Dict[str, List[str]]] + sort: Optional[Dict[str, str]] + + +class InternalAddDirRequest(TypedDict, total=False): + type: Literal["AddDir"] + index_id: int + path: str # TODO: support Path + glob: Optional[str] + + +class InternalWriteFilesRequest(TypedDict, total=False): + type: Literal["WriteFiles"] + index_id: int + """index_id must be positive.""" + output_path: Optional[str] + + +class InternalGetFilesRequest(TypedDict): + type: Literal["GetFiles"] + index_id: int + """index_id must be positive.""" + + +class InternalDeleteIndexRequest(TypedDict): + type: Literal["DeleteIndex"] + index_id: int + """index_id must be positive.""" + + +InternalRequestPayload = Union[ + InternalNewIndexRequest, + InternalAddFileRequest, + InternalAddRecordRequest, + InternalAddDirRequest, + InternalWriteFilesRequest, + InternalGetFilesRequest, + InternalDeleteIndexRequest, +] + + +class InternalServiceRequest(TypedDict): + message_id: Optional[int] + payload: InternalRequestPayload + + +class InternalResponseType(StrEnum): + NEW_INDEX = "NewIndex" + INDEXED_FILE = "IndexedFile" + INDEXED_DIR = "IndexedDir" + WRITE_FILES = "WriteFiles" + GET_FILES = "GetFiles" + DELETE_INDEX = "DeleteIndex" + ERROR = "Error" + + +class InternalResponseError(TypedDict): + type: Literal["Error"] + message: str + original_message: Optional[str] + + +class InternalNewIndexResponse(TypedDict): + type: Literal["NewIndex"] + index_id: int + + +class InternalIndexedFileResponse(TypedDict): + type: Literal["IndexedFile"] + page_word_count: int + page_url: str + page_meta: Dict[str, str] + + +class InternalIndexedDirResponse(TypedDict): + type: str + page_count: int + + +class InternalWriteFilesResponse(TypedDict): + type: Literal["IndexedFile"] + output_path: str + + +class InternalSyntheticFile(TypedDict): + path: str + content: str + + +class InternalGetFilesResponse(TypedDict): + type: Literal["GetFiles"] + files: List[InternalSyntheticFile] + + +class InternalDeleteIndexResponse(TypedDict): + type: Literal["DeleteIndex"] + + +InternalResponsePayload = Union[ + InternalNewIndexResponse, + InternalIndexedFileResponse, + InternalIndexedDirResponse, + InternalWriteFilesResponse, + InternalGetFilesResponse, + InternalDeleteIndexResponse, +] + + +class InternalServiceResponse(TypedDict): + message_id: Optional[int] + payload: Union[InternalResponsePayload, InternalResponseError] + + +class InternalResponseCallback(TypedDict, total=False): + exception: Optional[Exception] + err: Optional[InternalResponseError] + result: Optional[InternalResponsePayload] From 1f83df3c14a83d0442f59610a22b35b8e40d09b3 Mon Sep 17 00:00:00 2001 From: Steven Kalt Date: Fri, 5 Jul 2024 15:19:24 -0400 Subject: [PATCH 02/39] feat(wrappers/python): complete integration test --- .../python/src/pagefind_python/__init__.py | 11 - .../src/pagefind_python/index/__init__.py | 232 +++++++++++------- .../src/pagefind_python/service/__init__.py | 114 ++++++--- .../src/pagefind_python/service/types.py | 3 +- wrappers/python/src/tests/integration.py | 44 ++++ 5 files changed, 273 insertions(+), 131 deletions(-) create mode 100644 wrappers/python/src/tests/integration.py diff --git a/wrappers/python/src/pagefind_python/__init__.py b/wrappers/python/src/pagefind_python/__init__.py index 6f3619d0..0a64f367 100644 --- a/wrappers/python/src/pagefind_python/__init__.py +++ b/wrappers/python/src/pagefind_python/__init__.py @@ -1,17 +1,6 @@ #!/usr/bin/env python3 # assume the python version is >= 3.8, which is the oldest LTS version as of # the time of writing, 2024-06-29 -from types import Union, Optional -from pathlib import Path -import subprocess -import enum -import sys -import platform -import asyncio - -from .service import * - -# TODO: __all__ = [] # https://docs.python.org/3/reference/datamodel.html#async-context-managers diff --git a/wrappers/python/src/pagefind_python/index/__init__.py b/wrappers/python/src/pagefind_python/index/__init__.py index 22511af2..c534ea76 100644 --- a/wrappers/python/src/pagefind_python/index/__init__.py +++ b/wrappers/python/src/pagefind_python/index/__init__.py @@ -1,134 +1,198 @@ -from typing import Dict, List, Optional, NamedTuple, cast - +from typing import Any, Dict, List, Optional, Sequence, TypedDict, cast +import logging from ..service.types import ( InternalAddFileRequest, InternalAddRecordRequest, InternalDeleteIndexRequest, InternalGetFilesRequest, + InternalGetFilesResponse, InternalIndexedFileResponse, InternalAddDirRequest, InternalIndexedDirResponse, + InternalSyntheticFile, + InternalWriteFilesRequest, ) -from ..service import Service - - -class HtmlFile(NamedTuple): - content: str - """The source HTML content of the file to be parsed.""" - - source_path: Optional[str] = None - """ - The source path of the HTML file if it were to exist on disk. - Must be a relative path, or an absolute path within the current working directory. - Pagefind will compute the result URL from this path. - - If not supplied, url must be supplied. - - @example "about/index.html" - @example "/Users/user/Documents/site/about/index.html" - """ - - url: Optional[str] = None - """ - An explicit URL to use, instead of having Pagefind - compute the URL based on the sourcePath. - - If not supplied, source_path must be supplied. - - @example "/about/" - """ - - -class CustomRecord(NamedTuple): - """ - The data required for Pagefind to index a custom record that isn't backed by an HTML file - """ - - url: str - """The output URL of this record. Pagefind will not alter this.""" - - content: str - """The raw content of this record""" - language: str - """What language is this record written in. Multiple languages will be split into separate indexes. Expects an ISO 639-1 code.""" +from ..service import PagefindService - meta: Optional[Dict[str, str]] = None - """The metadata to attach to this record. Supplying a `title` is highly recommended.""" +log = logging.getLogger(__name__) - filters: Optional[Dict[str, List[str]]] = None - """The filters to attach to this record. Filters are used to group records together.""" - sort: Optional[Dict[str, str]] = None - """The sort keys to attach to this record.""" - - -class SiteDirectory(NamedTuple): - path: str - """The path to the directory to index. If relative, it's relative to the current working directory.""" - glob: Optional[str] = None - """Optionally, a custom glob to evaluate for finding files. Default to all HTML files.""" +class IndexConfig(TypedDict, total=False): + root_selector: Optional[str] + exclude_selectors: Optional[Sequence[str]] + force_language: Optional[str] + verbose: Optional[bool] + logfile: Optional[str] + keep_index_url: Optional[bool] + output_path: Optional[str] class PagefindIndex: - def __init__(self, service: Service, index_id: int): - self._service = service - self.index_id = index_id - - async def add_html_file(self, html_file: HtmlFile) -> InternalIndexedFileResponse: + _service: Optional["PagefindService"] = None + _index_id: Optional[int] = None + config: Optional[IndexConfig] = None + """Note that config is immutable after initialization.""" + + def __init__( + self, + config: Optional[IndexConfig] = None, + *, + _service: Optional["PagefindService"] = None, + _index_id: Optional[int] = None, + # TODO: cache config + ): + self._service = _service + self._index_id = _index_id + self.config = config + + async def _start(self) -> "PagefindIndex": + assert self._index_id is None + assert self._service is None + self._service = await PagefindService().launch() + _index = await self._service.create_index(self.config) + self._index_id = _index._index_id + return self + + async def add_html_file( + self, + *, + content: str, + source_path: Optional[str] = None, + url: Optional[str] = None, + ) -> InternalIndexedFileResponse: + """ + ARGS: + content: The source HTML content of the file to be parsed. + source_path: The source path of the HTML file if it were to exist on disk. \ + Must be a relative path, or an absolute path within the current working directory. \ + Pagefind will compute the result URL from this path. + url: an explicit URL to use, instead of having Pagefind compute the URL \ + based on the source_path. If not supplied, source_path must be supplied. + """ + assert self._service is not None + assert self._index_id is not None result = await self._service.send( InternalAddFileRequest( type="AddFile", - index_id=self.index_id, - url=html_file.url, - file_contents=html_file.content, - file_path=html_file.source_path, + index_id=self._index_id, + url=url, + file_contents=content, + file_path=source_path, ) ) assert result["type"] == "IndexedFile" return cast(InternalIndexedFileResponse, result) async def add_directory( - self, directory: SiteDirectory + self, path: str, *, glob: Optional[str] = None ) -> InternalIndexedDirResponse: + assert self._service is not None + assert self._index_id is not None result = await self._service.send( InternalAddDirRequest( type="AddDir", - index_id=self.index_id, - path=directory.path, - glob=directory.glob, + index_id=self._index_id, + path=path, + glob=glob, ) ) assert result["type"] == "IndexedDir" return cast(InternalIndexedDirResponse, result) - async def get_files(self): - result = await self._service.send( - InternalGetFilesRequest(type="GetFiles", index_id=self.index_id) + async def get_files(self) -> List[InternalSyntheticFile]: + """ + WATCH OUT: this method emits all files. This can be a lot of data, and + this amount of data can cause reading from the subprocess pipes to deadlock. + + STRICTLY PREFER calling `self.write_files()`. + """ + assert self._service is not None + assert self._index_id is not None + + response = await self._service.send( + InternalGetFilesRequest(type="GetFiles", index_id=self._index_id) ) - assert result["type"] == "GetFiles" - return cast(List[InternalIndexedFileResponse], result) + assert response["type"] == "GetFiles" + result = cast(InternalGetFilesResponse, response)["files"] + return result async def delete_index(self): + assert self._service is not None + assert self._index_id is not None result = await self._service.send( - InternalDeleteIndexRequest(type="DeleteIndex", index_id=self.index_id) + InternalDeleteIndexRequest(type="DeleteIndex", index_id=self._index_id) ) assert result["type"] == "DeletedIndex" + self._index_id = None + self._service = None async def add_custom_record( - self, record: CustomRecord + self, + *, + url: str, + content: str, + language: str, + meta: Optional[Dict[str, str]] = None, + filters: Optional[Dict[str, List[str]]] = None, + sort: Optional[Dict[str, str]] = None, ) -> InternalIndexedFileResponse: + """ + ARGS: + content: the raw content of this record. + url: the output URL of this record. Pagefind will not alter this. + language: ISO 639-1 code of the language this record is written in. + meta: the metadata to attach to this record. Supplying a `title` is highly recommended. + filters: the filters to attach to this record. Filters are used to group records together. + sort: the sort keys to attach to this record. + """ + assert self._service is not None + assert self._index_id is not None result = await self._service.send( InternalAddRecordRequest( type="AddRecord", - index_id=self.index_id, - url=record.url, - content=record.content, - language=record.language, - meta=record.meta, - filters=record.filters, - sort=record.sort, + index_id=self._index_id, + url=url, + content=content, + language=language, + meta=meta, + filters=filters, + sort=sort, ) ) assert result["type"] == "IndexedFile" return cast(InternalIndexedFileResponse, result) + + async def write_files(self): + assert self._service is not None + assert self._index_id is not None + if not self.config: + output_path = None + else: + output_path = self.config.get("output_path") + + result = await self._service.send( + InternalWriteFilesRequest( + type="WriteFiles", + index_id=self._index_id, + output_path=output_path, + ) + ) + assert result["type"] == "WriteFiles" + + async def __aenter__(self) -> "PagefindIndex": + assert self._service is None + assert self._index_id is None + return await self._start() + + async def __aexit__( + self, + exc_type: Optional[Any], + exc_value: Optional[Any], + traceback: Optional[Any], + ) -> None: + assert self._service is not None + assert self._index_id is not None + if exc_type is None: + await self.write_files() + await self._service.close() diff --git a/wrappers/python/src/pagefind_python/service/__init__.py b/wrappers/python/src/pagefind_python/service/__init__.py index 7be3e495..e984b1fa 100644 --- a/wrappers/python/src/pagefind_python/service/__init__.py +++ b/wrappers/python/src/pagefind_python/service/__init__.py @@ -4,9 +4,15 @@ from pathlib import Path import json from contextlib import AbstractAsyncContextManager -from typing import Any, Dict, Optional, Union, cast +from typing import Any, Dict, List, Optional, cast, TYPE_CHECKING +import asyncio +import base64 +import logging + from .types import ( + InternalNewIndexRequest, + InternalNewIndexResponse, InternalRequestPayload, InternalResponsePayload, InternalResponseError, @@ -14,35 +20,41 @@ InternalServiceResponse, InternalResponseType, ) -import asyncio -import base64 +if TYPE_CHECKING: + from ..index import IndexConfig, PagefindIndex -# _bin: Optional[Path] = None +log = logging.getLogger(__name__) -def _find_binary() -> Union[Path, None]: - # TODO: verify this is the correct path +def _binary_candidates() -> List[Path]: this_dir = Path(__file__).parent + package_root = this_dir.parent names = ["pagefind_extended", "pagefind"] extensions = [""] if platform.system().lower() == "Windows": extensions.append(".exe") - result = None - for name in [n + ext for n in names for ext in extensions]: - if (bin := this_dir / name).exists(): - if not bin.is_file(): - raise FileNotFoundError(f"{bin} is not a file") - result = bin - break - return result + + return [package_root / (n + ext) for n in names for ext in extensions] + + +def _must_find_binary() -> Path: + # TODO: verify this is the correct path + candidates = _binary_candidates() + for candidate in candidates: + if candidate.exists(): + if not candidate.is_file(): + raise FileNotFoundError(f"{candidate} is not a file") + else: + return candidate + raise FileNotFoundError(f"Could not find any of {candidates}") def _encode(req: InternalServiceRequest) -> bytes: return base64.b64encode(json.dumps(req).encode("utf-8")) -class Service(AbstractAsyncContextManager["Service"]): +class PagefindService(AbstractAsyncContextManager["PagefindService"]): _bin: Path _backend: asyncio.subprocess.Process _message_id: int = 0 @@ -53,14 +65,11 @@ class Service(AbstractAsyncContextManager["Service"]): # _messages def __init__(self): self._loop = asyncio.get_event_loop() - _bin = _find_binary() - if _bin is None: - raise FileNotFoundError( - "Could not find `pagefind` or `pagefind_extended` binary" - ) - self._bin = _bin - - async def launch(self) -> "Service": + self._bin = _must_find_binary() + self._responses = dict() + + async def launch(self) -> "PagefindService": + log.debug(f"launching {self._bin}") # TODO: detach process on windows? # creation_flags: int = 0 # if platform.system().lower() == "windows": @@ -68,13 +77,19 @@ async def launch(self) -> "Service": self._backend = await asyncio.create_subprocess_exec( self._bin, "--service", + "--verbose", cwd=os.getcwd(), - stdin=None, - stdout=None, - # creationflags=creation_flags, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.STDOUT, + limit=2**21, # <- 2MiB + # anything less and the _wait_for_responses loop will hang + # due to the stdout pipes deadlocking due to the buffer filling up ) - self._poll_task = self._loop.create_task(self.wait_for_responses()) - + log.debug(f"launched {self._bin}: {self._backend}.") + log.debug("polling for responses") + self._poll_task = self._loop.create_task(self._wait_for_responses()) + log.debug(f"polling task created: {self._poll_task}") return self async def send(self, payload: InternalRequestPayload) -> InternalResponsePayload: @@ -87,23 +102,32 @@ async def send(self, payload: InternalRequestPayload) -> InternalResponsePayload self._responses[message_id] = future # FIXME: check stdin not none? if self._backend.stdin is None: - ... # restart the backend + # restart the backend + log.debug("restarting backend") await self.launch() + log.debug("backend restarted") assert self._backend.stdin is not None - req = InternalServiceRequest(message_id=message_id, payload=payload) - self._backend.stdin.write(_encode(req)) + log.debug(f"sending request: {req}") + self._backend.stdin.write(_encode(req) + b",") + # backend waits for a comma before responding await self._backend.stdin.drain() + log.debug(f"request sent: {req}") - return await future + result = await future + log.debug(f"received response: {result}") + return result - async def wait_for_responses(self) -> None: + async def _wait_for_responses(self) -> None: """ Poll the subprocess's stdout for responses """ while True: + await asyncio.sleep(0.1) assert self._backend.stdout is not None + log.debug("checking for data") output = await self._backend.stdout.readuntil(b",") + log.debug(f"received data: {output}") if (resp := json.loads(base64.b64decode(output[:-1]))) is None: continue resp = cast(InternalServiceResponse, resp) @@ -119,11 +143,11 @@ async def wait_for_responses(self) -> None: else: future.set_result(cast(InternalResponsePayload, payload)) else: + # FIXME: figure out how to surface the error payload = cast(InternalResponseError, resp["payload"]) # assert ( # payload["type"] == InternalResponseType.ERROR.value # ), f"unexpected message type: {payload['type']}" - # FIXME: figure out how to surface the error async def close(self): # wait for all _responses to be resolved @@ -132,7 +156,7 @@ async def close(self): self._backend.terminate() await self._backend.wait() - async def __aenter__(self) -> "Service": + async def __aenter__(self) -> "PagefindService": return await self.launch() async def __aexit__( @@ -142,3 +166,23 @@ async def __aexit__( traceback: Optional[Any], ) -> None: await self.close() + + async def create_index( + self, config: Optional["IndexConfig"] = None + ) -> "PagefindIndex": + from ..index import PagefindIndex + + _config: Optional["IndexConfig"] = None + if config is not None: + _config = {**config} + _ = _config.pop("output_path", None) + else: + _config = None + log.debug(f"creating index with config: {_config}") + result = await self.send( + InternalNewIndexRequest(type="NewIndex", config=_config) + ) + log.debug(f"received response: {result}") + assert result["type"] == "NewIndex" + result = cast(InternalNewIndexResponse, result) + return PagefindIndex(config=config, _service=self, _index_id=result["index_id"]) diff --git a/wrappers/python/src/pagefind_python/service/types.py b/wrappers/python/src/pagefind_python/service/types.py index a6bfc6e2..53d061b7 100644 --- a/wrappers/python/src/pagefind_python/service/types.py +++ b/wrappers/python/src/pagefind_python/service/types.py @@ -12,7 +12,8 @@ class InternalRequestType(StrEnum): DELETE_INDEX = "DeleteIndex" -class InternalPagefindServiceConfig(TypedDict): +class InternalPagefindServiceConfig(TypedDict, total=False): + # FIXME: document root_selector: Optional[str] exclude_selectors: Optional[Sequence[str]] force_language: Optional[str] diff --git a/wrappers/python/src/tests/integration.py b/wrappers/python/src/tests/integration.py new file mode 100644 index 00000000..d5c6ee79 --- /dev/null +++ b/wrappers/python/src/tests/integration.py @@ -0,0 +1,44 @@ +from pagefind_python.index import PagefindIndex, IndexConfig + + +async def main(): + config = IndexConfig( + root_selector="main", logfile="index.log", output_path="./output", verbose=True + ) + async with PagefindIndex(config=config) as index: + await index.add_directory("./public") + new_file = await index.add_html_file( + content=( + "" + " " + "
" + "

Example HTML

" + "

This is an example HTML page.

" + "
" + " " + "" + ), + url="https://example.com", + source_path="other/example.html", + ) + print(f"new_file={new_file}") + new_record = await index.add_custom_record( + url="/elephants/", + content="Some testing content regarding elephants", + language="en", + meta={"title": "Elephants"}, + ) + print(f"new_record={new_record}") + + new_dir = await index.add_directory("./public") + print(f"new_dir={new_dir}") + + files = await index.get_files() + for f in files: + print(f"files= {len(f['content']):10}B {f['path']}") + + +if __name__ == "__main__": + import asyncio + + asyncio.run(main()) From 1551d80a2b1cc91c3657af0f3a3048c6fd1394b2 Mon Sep 17 00:00:00 2001 From: Steven Kalt Date: Sun, 4 Aug 2024 12:27:46 -0400 Subject: [PATCH 03/39] feat(python): script building binary-only wheels --- .gitignore | 8 + .vscode/settings.json | 9 +- wrappers/python/README.md | 70 ++++- wrappers/python/build_binary_only_wheel.py | 240 ++++++++++++++++++ wrappers/python/poetry.lock | 142 +++++++++++ wrappers/python/pyproject.toml | 31 ++- .../python/src/pagefind_python/__main__.py | 12 + .../src/pagefind_python/service/__init__.py | 59 +++-- .../python/src/pagefind_python_bin/README.md | 13 + .../src/pagefind_python_bin/__init__.py | 51 ++++ wrappers/python/src/tests/integration.py | 67 +++-- 11 files changed, 644 insertions(+), 58 deletions(-) create mode 100644 wrappers/python/build_binary_only_wheel.py create mode 100644 wrappers/python/poetry.lock create mode 100644 wrappers/python/src/pagefind_python/__main__.py create mode 100644 wrappers/python/src/pagefind_python_bin/README.md create mode 100644 wrappers/python/src/pagefind_python_bin/__init__.py diff --git a/.gitignore b/.gitignore index 488090a3..2d966778 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,11 @@ vendor # Node node_modules + +# Python +__pycache__/ +*.pyc +dist +*.whl +*.egg-info +*.log diff --git a/.vscode/settings.json b/.vscode/settings.json index 4d9636b5..781bbf65 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,3 +1,8 @@ { - "rust-analyzer.showUnlinkedFileNotification": false -} \ No newline at end of file + "rust-analyzer.showUnlinkedFileNotification": false, + "python.analysis.typeCheckingMode": "standard", + "[python]": { + "editor.formatOnSave": true, + "editor.defaultFormatter": "charliermarsh.ruff" + } +} diff --git a/wrappers/python/README.md b/wrappers/python/README.md index f03dd564..97c9162b 100644 --- a/wrappers/python/README.md +++ b/wrappers/python/README.md @@ -1 +1,69 @@ - +# `pagefind_python` +An async python API for the [pagefind](https://pagefind.app) binary. + +## Installation + +## Usage + +```py +import asyncio +import logging +from pagefind_python.index import PagefindIndex, IndexConfig + +logging.basicConfig(level=logging.DEBUG) +log = logging.getLogger(__name__) +html_content = ( + "" + " " + "
" + "

Example HTML

" + "

This is an example HTML page.

" + "
" + " " + "" +) + + +async def main(): + config = IndexConfig( + root_selector="main", logfile="index.log", output_path="./output", verbose=True + ) + async with PagefindIndex(config=config) as index: + log.debug("opened index") + new_file, new_record, new_dir = await asyncio.gather( + index.add_html_file( + content=html_content, + url="https://example.com", + source_path="other/example.html", + ), + index.add_custom_record( + url="/elephants/", + content="Some testing content regarding elephants", + language="en", + meta={"title": "Elephants"}, + ), + index.add_directory("./public"), + ) + print(f"new_file={new_file}") + print(f"new_record={new_record}") + print(f"new_dir={new_dir}") + + files = await index.get_files() + for f in files: + print(f"files= {len(f['content']):10}B {f['path']}") + + +if __name__ == "__main__": + asyncio.run(main()) + +``` + diff --git a/wrappers/python/build_binary_only_wheel.py b/wrappers/python/build_binary_only_wheel.py new file mode 100644 index 00000000..ba141709 --- /dev/null +++ b/wrappers/python/build_binary_only_wheel.py @@ -0,0 +1,240 @@ +#!/usr/bin/env python3 +# Adapted from https://github.com/ziglang/zig-pypi/blob/a0ca0d8b2d5104498f4eececff09ed2b1ede2d0b/make_wheels.py +# See also https://simonwillison.net/2022/May/23/bundling-binary-tools-in-python-wheels/ +# +# Note that this script assumes that the relevant files are on disk and either +# the files hashes have been verified or we trust the files. +import argparse +import logging +from typing import Any, Dict, List, Optional, Tuple, Union +from pathlib import Path +from email.message import EmailMessage +import wheel +from wheel.wheelfile import WheelFile +from zipfile import ZipInfo, ZIP_DEFLATED + +import wheel.wheelfile + +# constants +HOMEPAGE = "https://pagefind.app" +REPO = "https://github.com/CloudCannon/pagefind/" +REQUIRED_PYTHON_VERSION = "~=3.9" + +this_dir = Path(__file__).parent + + +# as of the time of writing, these are the supported platforms: +# See https://doc.rust-lang.org/nightly/rustc/platform-support.html +# wheel name format: {dist}-{version}(-{build})?-{python}-{abi}-{platform}.whl +# this dict helps look up the last part of the wheel name: ^^^^^^^^^^ +LLVM_TRIPLES_TO_PYTHON_WHEEL_PLATFORMS = { + # LLVM triple: Python platform + # only the LLVM triples that are produced in CI are listed here; see + # https://github.com/CloudCannon/pagefind/releases/latest + # the python platform mapping is copied from zig-pypi's script. + # See also: https://github.com/PyO3/maturin/blob/main/src/auditwheel/manylinux-policy.json + # See also: https://github.com/PyO3/maturin/blob/main/src/auditwheel/musllinux-policy.json + # TODO: check the python platforms are correct. + "aarch64-apple-darwin": "macosx_12_0_arm64", + "aarch64-unknown-linux-musl": "manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64", + "x86_64-apple-darwin": "macosx_12_0_arm64", + "x86_64-pc-windows-msvc": "win_amd64", + "x86_64-unknown-linux-musl": "manylinux_2_12_x86_64.manylinux2010_x86_64.musllinux_1_1_x86_64", +} + +def as_zip_info(file: Path, *, alias: str)-> Tuple[ZipInfo, bytes]: + zip_info = ZipInfo(alias or file.name, (1980, 1, 1, 0, 0, 0)) + zip_info.external_attr = file.stat().st_mode << 16 + with file.open("rb") as f: + data = f.read() + zip_info.file_size = len(data) + return zip_info, data + +class ReproducibleWheelFile(wheel.wheelfile.WheelFile): + + def writestr( + self, zip_info_or_arc_name: Union[ZipInfo, str], data: Any, *args, **kwargs + ): + if isinstance(zip_info_or_arc_name, ZipInfo): + zip_info = zip_info_or_arc_name + else: + assert isinstance(zip_info_or_arc_name, str) + zip_info = ZipInfo(zip_info_or_arc_name) + zip_info.file_size = len(data) + zip_info.external_attr = 0o0644 << 16 + if zip_info_or_arc_name.endswith(".dist-info/RECORD"): + zip_info.external_attr = 0o0664 << 16 + + zip_info.compress_type = ZIP_DEFLATED + zip_info.date_time = (1980, 1, 1, 0, 0, 0) + zip_info.create_system = 3 + WheelFile.writestr(self, zip_info, data, *args, **kwargs) + + +def make_message( + headers: Dict[str, Union[str, List[str]]], + payload: Optional[Union[str, bytes]] = None, +): + msg = EmailMessage() + for name, value in headers.items(): + if isinstance(value, list): + for value_part in value: + msg[name] = value_part + else: + msg[name] = value + if payload: + msg.set_payload(payload) + return msg + + +def write_wheel_file( + filename: Path, + contents: Dict[str, Union[str, bytes, EmailMessage, ZipInfo]] +) -> Path: + with ReproducibleWheelFile(filename, "w") as wheel: + for member_info, member_source in contents.items(): + if isinstance(member_source, str): + data = member_source.encode("utf-8") + elif isinstance(member_source, bytes): + data = member_source + elif isinstance(member_source, EmailMessage): + data = member_source.as_bytes( + policy=member_source.policy.clone(linesep="\n"), unixfrom=False + ) + elif isinstance(member_source, Path): + member_info, data = as_zip_info(member_source, alias=member_info) + else: + raise ValueError(f"unexpected content: {type(member_source)}") + wheel.writestr(member_info, data) + return filename + + +def write_wheel( + out_dir: Path, + *, + name: str, + version: str, + tag: str, + metadata: Dict[str, Any], + description: str, + contents, +) -> Path: + wheel_name = f"{name}-{version}-{tag}.whl" + dist_info = f"{name}-{version}.dist-info" + return write_wheel_file( + (out_dir / wheel_name), + { + **contents, + f"{dist_info}/METADATA": make_message( + { + # see https://packaging.python.org/en/latest/specifications/core-metadata/ + "Metadata-Version": "2.1", + "Name": name, + "Version": version, + **metadata, + }, + description, + ), + f"{dist_info}/WHEEL": make_message( + { + "Wheel-Version": "1.0", + "Generator": "build_binary_only_wheel.py", + "Root-Is-Purelib": "false", # see https://packaging.python.org/en/latest/specifications/binary-distribution-format/#what-s-the-deal-with-purelib-vs-platlib + "Tag": tag, + } + ), + }, + ) + + +def write_pagefind_bin_only_wheel( + *, + executable: Path, + output_dir: Path, + version: str, + platform: str, +) -> Path: + # FIXME: update when package support is stabilized + if "extended" in executable.name: + name = "experimental_pagefind_python_bin_extended" + else: + name = "experimental_pagefind_python_bin" + src_dir = this_dir / "src" / "pagefind_python_bin" + contents = { + f"{name}/__init__.py": (src_dir / "__init__.py"), + f"{name}/{executable.name}": executable, + } + + # Load in static files + with (src_dir / "README.md").open() as f: + description = f.read().replace("pagefind_python_bin", name) + + return write_wheel( + output_dir, + name=name, + version=version, + tag=f"py3-none-{platform}", + metadata={ + "Summary": "Pagefind is a library for performant, low-bandwidth, fully static search.", + "Description-Content-Type": "text/markdown", + "License": "MIT", + "Author": "CloudCannon", + "Classifier": [ + "License :: OSI Approved :: MIT License", + "Development Status :: 3 - Alpha", # FIXME: update when package name stabilized + "Intended Audience :: Developers", + ], + "Project-URL": [ + f"Homepage, {HOMEPAGE}", + f"Source Code, {REPO}", + f"Bug Tracker, {REPO}/issues", + ], + "Requires-Python": REQUIRED_PYTHON_VERSION, + }, + description=description, + contents=contents, + ) + + +def get_arg_parser(): + parser = argparse.ArgumentParser( + prog=__file__, description="Repackage Pagefind binaries as Python wheels" + ) + parser.add_argument( + "--version", + default=None, + help="version to package", + ) + parser.add_argument("--suffix", default="", help="wheel version suffix") + parser.add_argument("--bin-path", help="path to the binary to embed", required=True) + parser.add_argument( + "--output-dir", + default="dist/", + help="Output directory in which to place the built wheel", + ) + parser.add_argument( + "--llvm-triple", + required=True, + choices=list(LLVM_TRIPLES_TO_PYTHON_WHEEL_PLATFORMS.keys()), + help="platform to build for", + ) + return parser + + +def main(): + args = get_arg_parser().parse_args() + platform = LLVM_TRIPLES_TO_PYTHON_WHEEL_PLATFORMS.get(args.llvm_triple) + if platform is None: + raise ValueError(f"Unsupported platform: {args.llvm_triple}") + + logging.getLogger(wheel.__name__).setLevel(logging.WARNING) + write_pagefind_bin_only_wheel( + output_dir=Path(args.output_dir), + executable=Path(args.bin_path), + version=args.version, + platform=platform, + ) + + +if __name__ == "__main__": + main() diff --git a/wrappers/python/poetry.lock b/wrappers/python/poetry.lock new file mode 100644 index 00000000..a914fddc --- /dev/null +++ b/wrappers/python/poetry.lock @@ -0,0 +1,142 @@ +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. + +[[package]] +name = "cogapp" +version = "3.4.1" +description = "Cog: A content generator for executing Python snippets in source files." +optional = false +python-versions = ">=3.7" +files = [ + {file = "cogapp-3.4.1-py3-none-any.whl", hash = "sha256:1daba7b6c8bb23b733c64833de7aa3a42476c05afba19cff937e1b522216859d"}, + {file = "cogapp-3.4.1.tar.gz", hash = "sha256:a806d5db9e318a1a2d3fce988008179168e7db13e5e55b19b79763f9bb9d2982"}, +] + +[[package]] +name = "mypy" +version = "1.10.1" +description = "Optional static typing for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "mypy-1.10.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e36f229acfe250dc660790840916eb49726c928e8ce10fbdf90715090fe4ae02"}, + {file = "mypy-1.10.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:51a46974340baaa4145363b9e051812a2446cf583dfaeba124af966fa44593f7"}, + {file = "mypy-1.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:901c89c2d67bba57aaaca91ccdb659aa3a312de67f23b9dfb059727cce2e2e0a"}, + {file = "mypy-1.10.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0cd62192a4a32b77ceb31272d9e74d23cd88c8060c34d1d3622db3267679a5d9"}, + {file = "mypy-1.10.1-cp310-cp310-win_amd64.whl", hash = "sha256:a2cbc68cb9e943ac0814c13e2452d2046c2f2b23ff0278e26599224cf164e78d"}, + {file = "mypy-1.10.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bd6f629b67bb43dc0d9211ee98b96d8dabc97b1ad38b9b25f5e4c4d7569a0c6a"}, + {file = "mypy-1.10.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a1bbb3a6f5ff319d2b9d40b4080d46cd639abe3516d5a62c070cf0114a457d84"}, + {file = "mypy-1.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8edd4e9bbbc9d7b79502eb9592cab808585516ae1bcc1446eb9122656c6066f"}, + {file = "mypy-1.10.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6166a88b15f1759f94a46fa474c7b1b05d134b1b61fca627dd7335454cc9aa6b"}, + {file = "mypy-1.10.1-cp311-cp311-win_amd64.whl", hash = "sha256:5bb9cd11c01c8606a9d0b83ffa91d0b236a0e91bc4126d9ba9ce62906ada868e"}, + {file = "mypy-1.10.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d8681909f7b44d0b7b86e653ca152d6dff0eb5eb41694e163c6092124f8246d7"}, + {file = "mypy-1.10.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:378c03f53f10bbdd55ca94e46ec3ba255279706a6aacaecac52ad248f98205d3"}, + {file = "mypy-1.10.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6bacf8f3a3d7d849f40ca6caea5c055122efe70e81480c8328ad29c55c69e93e"}, + {file = "mypy-1.10.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:701b5f71413f1e9855566a34d6e9d12624e9e0a8818a5704d74d6b0402e66c04"}, + {file = "mypy-1.10.1-cp312-cp312-win_amd64.whl", hash = "sha256:3c4c2992f6ea46ff7fce0072642cfb62af7a2484efe69017ed8b095f7b39ef31"}, + {file = "mypy-1.10.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:604282c886497645ffb87b8f35a57ec773a4a2721161e709a4422c1636ddde5c"}, + {file = "mypy-1.10.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37fd87cab83f09842653f08de066ee68f1182b9b5282e4634cdb4b407266bade"}, + {file = "mypy-1.10.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8addf6313777dbb92e9564c5d32ec122bf2c6c39d683ea64de6a1fd98b90fe37"}, + {file = "mypy-1.10.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5cc3ca0a244eb9a5249c7c583ad9a7e881aa5d7b73c35652296ddcdb33b2b9c7"}, + {file = "mypy-1.10.1-cp38-cp38-win_amd64.whl", hash = "sha256:1b3a2ffce52cc4dbaeee4df762f20a2905aa171ef157b82192f2e2f368eec05d"}, + {file = "mypy-1.10.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fe85ed6836165d52ae8b88f99527d3d1b2362e0cb90b005409b8bed90e9059b3"}, + {file = "mypy-1.10.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c2ae450d60d7d020d67ab440c6e3fae375809988119817214440033f26ddf7bf"}, + {file = "mypy-1.10.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6be84c06e6abd72f960ba9a71561c14137a583093ffcf9bbfaf5e613d63fa531"}, + {file = "mypy-1.10.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2189ff1e39db399f08205e22a797383613ce1cb0cb3b13d8bcf0170e45b96cc3"}, + {file = "mypy-1.10.1-cp39-cp39-win_amd64.whl", hash = "sha256:97a131ee36ac37ce9581f4220311247ab6cba896b4395b9c87af0675a13a755f"}, + {file = "mypy-1.10.1-py3-none-any.whl", hash = "sha256:71d8ac0b906354ebda8ef1673e5fde785936ac1f29ff6987c7483cfbd5a4235a"}, + {file = "mypy-1.10.1.tar.gz", hash = "sha256:1f8f492d7db9e3593ef42d4f115f04e556130f2819ad33ab84551403e97dd4c0"}, +] + +[package.dependencies] +mypy-extensions = ">=1.0.0" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = ">=4.1.0" + +[package.extras] +dmypy = ["psutil (>=4.0)"] +install-types = ["pip"] +mypyc = ["setuptools (>=50)"] +reports = ["lxml"] + +[[package]] +name = "mypy-extensions" +version = "1.0.0" +description = "Type system extensions for programs checked with the mypy type checker." +optional = false +python-versions = ">=3.5" +files = [ + {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, + {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, +] + +[[package]] +name = "ruff" +version = "0.5.2" +description = "An extremely fast Python linter and code formatter, written in Rust." +optional = false +python-versions = ">=3.7" +files = [ + {file = "ruff-0.5.2-py3-none-linux_armv6l.whl", hash = "sha256:7bab8345df60f9368d5f4594bfb8b71157496b44c30ff035d1d01972e764d3be"}, + {file = "ruff-0.5.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:1aa7acad382ada0189dbe76095cf0a36cd0036779607c397ffdea16517f535b1"}, + {file = "ruff-0.5.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:aec618d5a0cdba5592c60c2dee7d9c865180627f1a4a691257dea14ac1aa264d"}, + {file = "ruff-0.5.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0b62adc5ce81780ff04077e88bac0986363e4a3260ad3ef11ae9c14aa0e67ef"}, + {file = "ruff-0.5.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:dc42ebf56ede83cb080a50eba35a06e636775649a1ffd03dc986533f878702a3"}, + {file = "ruff-0.5.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c15c6e9f88c67ffa442681365d11df38afb11059fc44238e71a9d9f1fd51de70"}, + {file = "ruff-0.5.2-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:d3de9a5960f72c335ef00763d861fc5005ef0644cb260ba1b5a115a102157251"}, + {file = "ruff-0.5.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fe5a968ae933e8f7627a7b2fc8893336ac2be0eb0aace762d3421f6e8f7b7f83"}, + {file = "ruff-0.5.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a04f54a9018f75615ae52f36ea1c5515e356e5d5e214b22609ddb546baef7132"}, + {file = "ruff-0.5.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ed02fb52e3741f0738db5f93e10ae0fb5c71eb33a4f2ba87c9a2fa97462a649"}, + {file = "ruff-0.5.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:3cf8fe659f6362530435d97d738eb413e9f090e7e993f88711b0377fbdc99f60"}, + {file = "ruff-0.5.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:237a37e673e9f3cbfff0d2243e797c4862a44c93d2f52a52021c1a1b0899f846"}, + {file = "ruff-0.5.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:2a2949ce7c1cbd8317432ada80fe32156df825b2fd611688814c8557824ef060"}, + {file = "ruff-0.5.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:481af57c8e99da92ad168924fd82220266043c8255942a1cb87958b108ac9335"}, + {file = "ruff-0.5.2-py3-none-win32.whl", hash = "sha256:f1aea290c56d913e363066d83d3fc26848814a1fed3d72144ff9c930e8c7c718"}, + {file = "ruff-0.5.2-py3-none-win_amd64.whl", hash = "sha256:8532660b72b5d94d2a0a7a27ae7b9b40053662d00357bb2a6864dd7e38819084"}, + {file = "ruff-0.5.2-py3-none-win_arm64.whl", hash = "sha256:73439805c5cb68f364d826a5c5c4b6c798ded6b7ebaa4011f01ce6c94e4d5583"}, + {file = "ruff-0.5.2.tar.gz", hash = "sha256:2c0df2d2de685433794a14d8d2e240df619b748fbe3367346baa519d8e6f1ca2"}, +] + +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] + +[[package]] +name = "typing-extensions" +version = "4.12.2" +description = "Backported and Experimental Type Hints for Python 3.8+" +optional = false +python-versions = ">=3.8" +files = [ + {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, + {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, +] + +[[package]] +name = "wheel" +version = "0.43.0" +description = "A built-package format for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "wheel-0.43.0-py3-none-any.whl", hash = "sha256:55c570405f142630c6b9f72fe09d9b67cf1477fcf543ae5b8dcb1f5b7377da81"}, + {file = "wheel-0.43.0.tar.gz", hash = "sha256:465ef92c69fa5c5da2d1cf8ac40559a8c940886afcef87dcf14b9470862f1d85"}, +] + +[package.extras] +test = ["pytest (>=6.0.0)", "setuptools (>=65)"] + +[extras] +bin = [] +extended = [] + +[metadata] +lock-version = "2.0" +python-versions = ">=3.9" +content-hash = "98add445221126c3eb81877368002a05d210383c508828620637170af2d55258" diff --git a/wrappers/python/pyproject.toml b/wrappers/python/pyproject.toml index c339b421..e72550ba 100644 --- a/wrappers/python/pyproject.toml +++ b/wrappers/python/pyproject.toml @@ -2,18 +2,43 @@ name = "pagefind_python" version = "0.1.0" description = "Python API for Pagefind" -authors = ["Your Name "] # TODO: add name and email +authors = ["CloudCannon"] license = "MIT" readme = "README.md" -include = [] # TODO: figure out path for local binaries **IN CI** +include = [] +classifiers = [ + "License :: OSI Approved :: MIT License", + "Topic :: Text Processing :: Indexing", + "Topic :: Text Processing :: Markup :: HTML", +] + +# Note: we *aren't* including an `entry-points` section here to avoid clobbering +# the user's natively-installed `pagefind` binary. Using `python3 -m pagefind_python` +# is an informatively-namespaced alternative that doesn't add too many keystrokes. +# See https://packaging.python.org/en/latest/specifications/entry-points/ [tool.poetry.dependencies] -python = ">=3.8" +python = ">=3.9" +# FIXME: update bin-package names once those stabilize +# experimental_pagefind_python_bin = { version = "*", optional = true } +# experimental_pagefind_python_bin_extended = { version = "*", optional = true } + +[tool.poetry.extras] +bin = ["experimental_pagefind_python_bin"] +extended = ["experimental_pagefind_python_bin_extended"] [tool.poetry.group.dev.dependencies] ruff = "^0.5.0" mypy = "^1.10.1" +wheel = "^0.43.0" +cogapp = "^3.4.1" [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" +# note that poetry can currently only build `purelib`s, or pure-python wheels. +# (see https://python-poetry.org/docs/cli#build) +# This means poetry can't handle building wheels that contain pagefind's binaries, +# which are necessarily platform-dependent. +# For more information on purelibs/pure-python wheels, see +# https://peps.python.org/pep-0427/#what-s-the-deal-with-purelib-vs-platlib diff --git a/wrappers/python/src/pagefind_python/__main__.py b/wrappers/python/src/pagefind_python/__main__.py new file mode 100644 index 00000000..31f85556 --- /dev/null +++ b/wrappers/python/src/pagefind_python/__main__.py @@ -0,0 +1,12 @@ +import os +import sys +from .service import _must_find_binary + +bin = _must_find_binary().absolute() +argv = [bin, *sys.argv[1:]] +if os.name == "posix": + os.execv(bin, argv) +else: + import subprocess + + sys.exit(subprocess.call(argv)) diff --git a/wrappers/python/src/pagefind_python/service/__init__.py b/wrappers/python/src/pagefind_python/service/__init__.py index e984b1fa..6e8675f1 100644 --- a/wrappers/python/src/pagefind_python/service/__init__.py +++ b/wrappers/python/src/pagefind_python/service/__init__.py @@ -1,14 +1,13 @@ import os -import platform from pathlib import Path import json from contextlib import AbstractAsyncContextManager -from typing import Any, Dict, List, Optional, cast, TYPE_CHECKING +from typing import Any, Dict, Optional, cast, TYPE_CHECKING import asyncio import base64 import logging - +import shutil from .types import ( InternalNewIndexRequest, @@ -27,27 +26,30 @@ log = logging.getLogger(__name__) -def _binary_candidates() -> List[Path]: - this_dir = Path(__file__).parent - package_root = this_dir.parent - names = ["pagefind_extended", "pagefind"] - extensions = [""] - if platform.system().lower() == "Windows": - extensions.append(".exe") +def _must_find_binary() -> Path: + try: + from experimental_pagefind_python_bin_extended import get_executable # type: ignore - return [package_root / (n + ext) for n in names for ext in extensions] + executable: Path = get_executable() + log.debug(f"using {executable}") + return executable + except ImportError: + log.debug("unable to import experimental_pagefind_python_bin_extended") + try: + from experimental_pagefind_python_bin import get_executable # type: ignore -def _must_find_binary() -> Path: - # TODO: verify this is the correct path - candidates = _binary_candidates() - for candidate in candidates: - if candidate.exists(): - if not candidate.is_file(): - raise FileNotFoundError(f"{candidate} is not a file") - else: - return candidate - raise FileNotFoundError(f"Could not find any of {candidates}") + executable: Path = get_executable() + log.debug(f"using {executable}") + return executable + except ImportError: + log.debug("unable to import experimental_pagefind_python_bin") + + exe: Optional[str] = shutil.which("pagefind_extended") or shutil.which("pagefind") + if exe is None: + raise FileNotFoundError("Could not find pagefind binary") + else: + return Path(exe) def _encode(req: InternalServiceRequest) -> bytes: @@ -113,7 +115,6 @@ async def send(self, payload: InternalRequestPayload) -> InternalResponsePayload # backend waits for a comma before responding await self._backend.stdin.drain() log.debug(f"request sent: {req}") - result = await future log.debug(f"received response: {result}") return result @@ -127,13 +128,22 @@ async def _wait_for_responses(self) -> None: assert self._backend.stdout is not None log.debug("checking for data") output = await self._backend.stdout.readuntil(b",") - log.debug(f"received data: {output}") + if len(output) <= 100: + log.debug(f"received data: {output}") + else: + log.debug( + f"received data: {output[:30]}...{len(output) - 40}B...{output[-10:]}" + ) if (resp := json.loads(base64.b64decode(output[:-1]))) is None: continue resp = cast(InternalServiceResponse, resp) if (message_id := resp.get("message_id")) is not None: - assert self._message_id <= message_id, "message_id out of order" + log.debug(f"received response for message {message_id}") + assert ( + self._message_id >= message_id + ), f"message_id out of order: incoming {message_id} > current: {self._message_id}" if (future := self._responses.get(message_id)) is not None: + log.debug(f"resolving future for message {message_id}") payload = resp["payload"] if payload["type"] == InternalResponseType.ERROR.value: exc = cast(InternalResponseError, payload) @@ -143,6 +153,7 @@ async def _wait_for_responses(self) -> None: else: future.set_result(cast(InternalResponsePayload, payload)) else: + log.debug(f"no receiving future for message {message_id}") # FIXME: figure out how to surface the error payload = cast(InternalResponseError, resp["payload"]) # assert ( diff --git a/wrappers/python/src/pagefind_python_bin/README.md b/wrappers/python/src/pagefind_python_bin/README.md new file mode 100644 index 00000000..d77859e9 --- /dev/null +++ b/wrappers/python/src/pagefind_python_bin/README.md @@ -0,0 +1,13 @@ + +# `pagefind_python_bin` +A python wrapper for the pagefind binary. + +## Usage + +```py +from pagefind_python_bin import get_executable +print(get_executable()) # yields absolute path to the binary +``` +```sh +python3 -m pagefind_python_bin --help +``` diff --git a/wrappers/python/src/pagefind_python_bin/__init__.py b/wrappers/python/src/pagefind_python_bin/__init__.py new file mode 100644 index 00000000..21cbe924 --- /dev/null +++ b/wrappers/python/src/pagefind_python_bin/__init__.py @@ -0,0 +1,51 @@ +import logging +from pathlib import Path +import platform +from typing import List +import os +import sys + +__all__ = ["get_executable", "cli"] + + +this_dir = Path(__file__).parent +log = logging.getLogger(__name__) + + +def get_candidate_paths() -> List[Path]: + names = ["pagefind_extended", "pagefind"] + extensions = [""] + if platform.system().lower() == "Windows": + extensions.append(".exe") + bin_names = [n + ext for n in names for ext in extensions] + paths = [this_dir / bin for bin in bin_names] + return paths + + +def get_executable() -> Path: + candidates = get_candidate_paths() + for candidate in candidates: + if candidate.exists(): + log.debug(f"{candidate} found") + if candidate.is_file(): + return candidate + else: + raise FileNotFoundError(f"{candidate} is not a file") + else: + log.debug(f"{candidate} not found") + raise FileNotFoundError(f"Could not find any of {candidates}") + + +def cli(): + bin = get_executable().absolute() + argv = [bin, *sys.argv[1:]] + if os.name == "posix": + os.execv(bin, argv) + else: + import subprocess + + sys.exit(subprocess.call(argv)) + + +if __name__ == "__main__": + cli() diff --git a/wrappers/python/src/tests/integration.py b/wrappers/python/src/tests/integration.py index d5c6ee79..a434acca 100644 --- a/wrappers/python/src/tests/integration.py +++ b/wrappers/python/src/tests/integration.py @@ -1,44 +1,55 @@ +import asyncio +import json +import logging +import os from pagefind_python.index import PagefindIndex, IndexConfig +logging.basicConfig(level=os.environ.get("LOG_LEVEL", "INFO")) +log = logging.getLogger(__name__) +html_content = ( + "" + " " + "
" + "

Example HTML

" + "

This is an example HTML page.

" + "
" + " " + "" +) + + +def prefix(pre: str, s: str) -> str: + return pre + s.replace("\n", f"\n{pre}") + async def main(): config = IndexConfig( root_selector="main", logfile="index.log", output_path="./output", verbose=True ) async with PagefindIndex(config=config) as index: - await index.add_directory("./public") - new_file = await index.add_html_file( - content=( - "" - " " - "
" - "

Example HTML

" - "

This is an example HTML page.

" - "
" - " " - "" + log.debug("opened index") + new_file, new_record, new_dir = await asyncio.gather( + index.add_html_file( + content=html_content, + url="https://example.com", + source_path="other/example.html", ), - url="https://example.com", - source_path="other/example.html", - ) - print(f"new_file={new_file}") - new_record = await index.add_custom_record( - url="/elephants/", - content="Some testing content regarding elephants", - language="en", - meta={"title": "Elephants"}, + index.add_custom_record( + url="/elephants/", + content="Some testing content regarding elephants", + language="en", + meta={"title": "Elephants"}, + ), + index.add_directory("./public"), ) - print(f"new_record={new_record}") - - new_dir = await index.add_directory("./public") - print(f"new_dir={new_dir}") + print(prefix("new_file ", json.dumps(new_file, indent=2))) + print(prefix("new_record ", json.dumps(new_record, indent=2))) + print(prefix("new_dir ", json.dumps(new_dir, indent=2))) files = await index.get_files() - for f in files: - print(f"files= {len(f['content']):10}B {f['path']}") + for file in files: + print(prefix("files", f"{len(file['content']):10}B {file['path']}")) if __name__ == "__main__": - import asyncio - asyncio.run(main()) From 8478ccbc6aed98ec340c1109c11b20a0a84a1a88 Mon Sep 17 00:00:00 2001 From: Steven Kalt Date: Sun, 4 Aug 2024 12:58:51 -0400 Subject: [PATCH 04/39] refactor(python): remove 'experimental' prefix from python package names --- wrappers/python/README.md | 24 ++++++++++++------- wrappers/python/build_binary_only_wheel.py | 17 +++++++------ wrappers/python/pyproject.toml | 7 +++--- .../{pagefind_python => pagefind}/__init__.py | 0 .../{pagefind_python => pagefind}/__main__.py | 0 .../index/__init__.py | 0 .../{pagefind_python => pagefind}/py.typed | 0 .../service/__init__.py | 8 +++---- .../service/types.py | 0 wrappers/python/src/pagefind_bin/README.md | 13 ++++++++++ .../__init__.py | 0 .../python/src/pagefind_python_bin/README.md | 13 ---------- wrappers/python/src/tests/integration.py | 2 +- 13 files changed, 45 insertions(+), 39 deletions(-) rename wrappers/python/src/{pagefind_python => pagefind}/__init__.py (100%) rename wrappers/python/src/{pagefind_python => pagefind}/__main__.py (100%) rename wrappers/python/src/{pagefind_python => pagefind}/index/__init__.py (100%) rename wrappers/python/src/{pagefind_python => pagefind}/py.typed (100%) rename wrappers/python/src/{pagefind_python => pagefind}/service/__init__.py (95%) rename wrappers/python/src/{pagefind_python => pagefind}/service/types.py (100%) create mode 100644 wrappers/python/src/pagefind_bin/README.md rename wrappers/python/src/{pagefind_python_bin => pagefind_bin}/__init__.py (100%) delete mode 100644 wrappers/python/src/pagefind_python_bin/README.md diff --git a/wrappers/python/README.md b/wrappers/python/README.md index 97c9162b..7be0138b 100644 --- a/wrappers/python/README.md +++ b/wrappers/python/README.md @@ -2,12 +2,12 @@ An async python API for the [pagefind](https://pagefind.app) binary. ## Installation - + ## Usage ```py import asyncio +import json import logging -from pagefind_python.index import PagefindIndex, IndexConfig +import os +from pagefind.index import PagefindIndex, IndexConfig -logging.basicConfig(level=logging.DEBUG) +logging.basicConfig(level=os.environ.get("LOG_LEVEL", "INFO")) log = logging.getLogger(__name__) html_content = ( "" @@ -33,6 +35,10 @@ html_content = ( ) +def prefix(pre: str, s: str) -> str: + return pre + s.replace("\n", f"\n{pre}") + + async def main(): config = IndexConfig( root_selector="main", logfile="index.log", output_path="./output", verbose=True @@ -53,13 +59,13 @@ async def main(): ), index.add_directory("./public"), ) - print(f"new_file={new_file}") - print(f"new_record={new_record}") - print(f"new_dir={new_dir}") + print(prefix("new_file ", json.dumps(new_file, indent=2))) + print(prefix("new_record ", json.dumps(new_record, indent=2))) + print(prefix("new_dir ", json.dumps(new_dir, indent=2))) files = await index.get_files() - for f in files: - print(f"files= {len(f['content']):10}B {f['path']}") + for file in files: + print(prefix("files", f"{len(file['content']):10}B {file['path']}")) if __name__ == "__main__": diff --git a/wrappers/python/build_binary_only_wheel.py b/wrappers/python/build_binary_only_wheel.py index ba141709..2f7167db 100644 --- a/wrappers/python/build_binary_only_wheel.py +++ b/wrappers/python/build_binary_only_wheel.py @@ -42,7 +42,8 @@ "x86_64-unknown-linux-musl": "manylinux_2_12_x86_64.manylinux2010_x86_64.musllinux_1_1_x86_64", } -def as_zip_info(file: Path, *, alias: str)-> Tuple[ZipInfo, bytes]: + +def as_zip_info(file: Path, *, alias: str) -> Tuple[ZipInfo, bytes]: zip_info = ZipInfo(alias or file.name, (1980, 1, 1, 0, 0, 0)) zip_info.external_attr = file.stat().st_mode << 16 with file.open("rb") as f: @@ -50,8 +51,8 @@ def as_zip_info(file: Path, *, alias: str)-> Tuple[ZipInfo, bytes]: zip_info.file_size = len(data) return zip_info, data -class ReproducibleWheelFile(wheel.wheelfile.WheelFile): +class ReproducibleWheelFile(wheel.wheelfile.WheelFile): def writestr( self, zip_info_or_arc_name: Union[ZipInfo, str], data: Any, *args, **kwargs ): @@ -88,8 +89,7 @@ def make_message( def write_wheel_file( - filename: Path, - contents: Dict[str, Union[str, bytes, EmailMessage, ZipInfo]] + filename: Path, contents: Dict[str, Union[str, bytes, EmailMessage, ZipInfo]] ) -> Path: with ReproducibleWheelFile(filename, "w") as wheel: for member_info, member_source in contents.items(): @@ -155,11 +155,10 @@ def write_pagefind_bin_only_wheel( platform: str, ) -> Path: # FIXME: update when package support is stabilized + name = "pagefind_bin" if "extended" in executable.name: - name = "experimental_pagefind_python_bin_extended" - else: - name = "experimental_pagefind_python_bin" - src_dir = this_dir / "src" / "pagefind_python_bin" + name += "_extended" + src_dir = this_dir / "src" / "pagefind_bin" contents = { f"{name}/__init__.py": (src_dir / "__init__.py"), f"{name}/{executable.name}": executable, @@ -167,7 +166,7 @@ def write_pagefind_bin_only_wheel( # Load in static files with (src_dir / "README.md").open() as f: - description = f.read().replace("pagefind_python_bin", name) + description = f.read().replace("pagefind_bin", name) return write_wheel( output_dir, diff --git a/wrappers/python/pyproject.toml b/wrappers/python/pyproject.toml index e72550ba..42a2bd65 100644 --- a/wrappers/python/pyproject.toml +++ b/wrappers/python/pyproject.toml @@ -1,5 +1,5 @@ [tool.poetry] -name = "pagefind_python" +name = "pagefind" version = "0.1.0" description = "Python API for Pagefind" authors = ["CloudCannon"] @@ -19,13 +19,14 @@ classifiers = [ [tool.poetry.dependencies] python = ">=3.9" +# TODO: uncomment # FIXME: update bin-package names once those stabilize # experimental_pagefind_python_bin = { version = "*", optional = true } # experimental_pagefind_python_bin_extended = { version = "*", optional = true } [tool.poetry.extras] -bin = ["experimental_pagefind_python_bin"] -extended = ["experimental_pagefind_python_bin_extended"] +bin = ["pagefind_bin"] +extended = ["pagefind_bin_extended"] [tool.poetry.group.dev.dependencies] ruff = "^0.5.0" diff --git a/wrappers/python/src/pagefind_python/__init__.py b/wrappers/python/src/pagefind/__init__.py similarity index 100% rename from wrappers/python/src/pagefind_python/__init__.py rename to wrappers/python/src/pagefind/__init__.py diff --git a/wrappers/python/src/pagefind_python/__main__.py b/wrappers/python/src/pagefind/__main__.py similarity index 100% rename from wrappers/python/src/pagefind_python/__main__.py rename to wrappers/python/src/pagefind/__main__.py diff --git a/wrappers/python/src/pagefind_python/index/__init__.py b/wrappers/python/src/pagefind/index/__init__.py similarity index 100% rename from wrappers/python/src/pagefind_python/index/__init__.py rename to wrappers/python/src/pagefind/index/__init__.py diff --git a/wrappers/python/src/pagefind_python/py.typed b/wrappers/python/src/pagefind/py.typed similarity index 100% rename from wrappers/python/src/pagefind_python/py.typed rename to wrappers/python/src/pagefind/py.typed diff --git a/wrappers/python/src/pagefind_python/service/__init__.py b/wrappers/python/src/pagefind/service/__init__.py similarity index 95% rename from wrappers/python/src/pagefind_python/service/__init__.py rename to wrappers/python/src/pagefind/service/__init__.py index 6e8675f1..87e286d2 100644 --- a/wrappers/python/src/pagefind_python/service/__init__.py +++ b/wrappers/python/src/pagefind/service/__init__.py @@ -28,22 +28,22 @@ def _must_find_binary() -> Path: try: - from experimental_pagefind_python_bin_extended import get_executable # type: ignore + from pagefind_bin_extended import get_executable # type: ignore executable: Path = get_executable() log.debug(f"using {executable}") return executable except ImportError: - log.debug("unable to import experimental_pagefind_python_bin_extended") + log.debug("unable to import pagefind_bin_extended") try: - from experimental_pagefind_python_bin import get_executable # type: ignore + from pagefind_bin import get_executable # type: ignore executable: Path = get_executable() log.debug(f"using {executable}") return executable except ImportError: - log.debug("unable to import experimental_pagefind_python_bin") + log.debug("unable to import pagefind_bin") exe: Optional[str] = shutil.which("pagefind_extended") or shutil.which("pagefind") if exe is None: diff --git a/wrappers/python/src/pagefind_python/service/types.py b/wrappers/python/src/pagefind/service/types.py similarity index 100% rename from wrappers/python/src/pagefind_python/service/types.py rename to wrappers/python/src/pagefind/service/types.py diff --git a/wrappers/python/src/pagefind_bin/README.md b/wrappers/python/src/pagefind_bin/README.md new file mode 100644 index 00000000..8652aaa8 --- /dev/null +++ b/wrappers/python/src/pagefind_bin/README.md @@ -0,0 +1,13 @@ + +# `pagefind_bin` +A python wrapper for the pagefind binary. + +## Usage + +```py +from pagefind_bin import get_executable +print(get_executable()) # yields absolute path to the binary +``` +```sh +python3 -m pagefind_bin --help +``` diff --git a/wrappers/python/src/pagefind_python_bin/__init__.py b/wrappers/python/src/pagefind_bin/__init__.py similarity index 100% rename from wrappers/python/src/pagefind_python_bin/__init__.py rename to wrappers/python/src/pagefind_bin/__init__.py diff --git a/wrappers/python/src/pagefind_python_bin/README.md b/wrappers/python/src/pagefind_python_bin/README.md deleted file mode 100644 index d77859e9..00000000 --- a/wrappers/python/src/pagefind_python_bin/README.md +++ /dev/null @@ -1,13 +0,0 @@ - -# `pagefind_python_bin` -A python wrapper for the pagefind binary. - -## Usage - -```py -from pagefind_python_bin import get_executable -print(get_executable()) # yields absolute path to the binary -``` -```sh -python3 -m pagefind_python_bin --help -``` diff --git a/wrappers/python/src/tests/integration.py b/wrappers/python/src/tests/integration.py index a434acca..2ac78141 100644 --- a/wrappers/python/src/tests/integration.py +++ b/wrappers/python/src/tests/integration.py @@ -2,7 +2,7 @@ import json import logging import os -from pagefind_python.index import PagefindIndex, IndexConfig +from pagefind.index import PagefindIndex, IndexConfig logging.basicConfig(level=os.environ.get("LOG_LEVEL", "INFO")) log = logging.getLogger(__name__) From ecdb22ccc4acb3e8cd3216e6726ba442dd64eb43 Mon Sep 17 00:00:00 2001 From: Steven Kalt Date: Tue, 20 Aug 2024 08:26:45 -0400 Subject: [PATCH 05/39] chore(python): WIP backport of changes from skalt/pagefind_python --- .shellcheckrc | 4 + test_ci.sh | 4 +- wrappers/python/.gitignore | 2 + wrappers/python/poetry.toml | 2 + wrappers/python/pyproject.toml | 37 ++- wrappers/python/scripts/__init__.py | 0 wrappers/python/scripts/build/__init__.py | 16 ++ wrappers/python/scripts/build/all.py | 70 +++++ wrappers/python/scripts/build/api_package.py | 28 ++ .../python/scripts/build/binary_only_wheel.py | 254 ++++++++++++++++++ .../scripts/build/download_verification.py | 38 +++ .../scripts/build/get_pagefind_release.py | 83 ++++++ wrappers/python/scripts/ci/cog/check.sh | 9 + wrappers/python/scripts/ci/cog/files.sh | 7 + wrappers/python/scripts/ci/cog/update.sh | 9 + .../ci/github/install_dev_dependencies.sh | 6 + .../scripts/ci/github/integration_tests.sh | 66 +++++ .../ci/github/scrape_upstream_version.sh | 20 ++ .../python/scripts/ci/github/setup_poetry.sh | 10 + wrappers/python/scripts/ci/python_lints.sh | 5 + wrappers/python/scripts/ci/shellcheck.sh | 3 + .../python/scripts/publish_to_test_pypi.sh | 5 + wrappers/python/src/pagefind/__init__.py | 15 +- wrappers/python/src/pagefind/__main__.py | 5 +- .../python/src/pagefind/index/__init__.py | 14 +- .../python/src/pagefind/service/__init__.py | 72 +++-- wrappers/python/src/pagefind/service/types.py | 8 +- .../README.md | 7 +- .../__init__.py | 16 +- .../src/pagefind_python_bin/__main__.py | 3 + 30 files changed, 756 insertions(+), 62 deletions(-) create mode 100644 .shellcheckrc create mode 100644 wrappers/python/.gitignore create mode 100644 wrappers/python/poetry.toml create mode 100644 wrappers/python/scripts/__init__.py create mode 100644 wrappers/python/scripts/build/__init__.py create mode 100644 wrappers/python/scripts/build/all.py create mode 100644 wrappers/python/scripts/build/api_package.py create mode 100644 wrappers/python/scripts/build/binary_only_wheel.py create mode 100644 wrappers/python/scripts/build/download_verification.py create mode 100644 wrappers/python/scripts/build/get_pagefind_release.py create mode 100755 wrappers/python/scripts/ci/cog/check.sh create mode 100755 wrappers/python/scripts/ci/cog/files.sh create mode 100755 wrappers/python/scripts/ci/cog/update.sh create mode 100755 wrappers/python/scripts/ci/github/install_dev_dependencies.sh create mode 100755 wrappers/python/scripts/ci/github/integration_tests.sh create mode 100755 wrappers/python/scripts/ci/github/scrape_upstream_version.sh create mode 100755 wrappers/python/scripts/ci/github/setup_poetry.sh create mode 100755 wrappers/python/scripts/ci/python_lints.sh create mode 100644 wrappers/python/scripts/ci/shellcheck.sh create mode 100755 wrappers/python/scripts/publish_to_test_pypi.sh rename wrappers/python/src/{pagefind_bin => pagefind_python_bin}/README.md (65%) rename wrappers/python/src/{pagefind_bin => pagefind_python_bin}/__init__.py (89%) create mode 100644 wrappers/python/src/pagefind_python_bin/__main__.py diff --git a/.shellcheckrc b/.shellcheckrc new file mode 100644 index 00000000..470b279e --- /dev/null +++ b/.shellcheckrc @@ -0,0 +1,4 @@ +external-sources=true +source-path=SCRIPTDIR +disable=SC2002 +# SC2002: ignore "useless cat" warning: starting pipes with `cat` improves composability diff --git a/test_ci.sh b/test_ci.sh index 9e35da2c..543e4277 100755 --- a/test_ci.sh +++ b/test_ci.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash - +set -eu SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) -cd $SCRIPT_DIR +cd "$SCRIPT_DIR" PAGEFIND=$(realpath "$SCRIPT_DIR/target/$1/pagefind") REPO_WD=$(realpath "$SCRIPT_DIR") diff --git a/wrappers/python/.gitignore b/wrappers/python/.gitignore new file mode 100644 index 00000000..594ed2a7 --- /dev/null +++ b/wrappers/python/.gitignore @@ -0,0 +1,2 @@ +output +# ^ from src/tests/integration.py diff --git a/wrappers/python/poetry.toml b/wrappers/python/poetry.toml new file mode 100644 index 00000000..ab1033bd --- /dev/null +++ b/wrappers/python/poetry.toml @@ -0,0 +1,2 @@ +[virtualenvs] +in-project = true diff --git a/wrappers/python/pyproject.toml b/wrappers/python/pyproject.toml index 42a2bd65..32ed4ae0 100644 --- a/wrappers/python/pyproject.toml +++ b/wrappers/python/pyproject.toml @@ -1,11 +1,22 @@ [tool.poetry] -name = "pagefind" -version = "0.1.0" +name = "pagefind_python" +version = "0.0.0a0" +# note that ^this^ is the version number of the python API, not the version of +# the pagefind executable. description = "Python API for Pagefind" authors = ["CloudCannon"] license = "MIT" readme = "README.md" include = [] +exclude = [ + "dist", + "output", + "*.whl", + "*.egg-info", + "*.log", + ".venv", + "pagefind_python_bin" +] classifiers = [ "License :: OSI Approved :: MIT License", "Topic :: Text Processing :: Indexing", @@ -19,10 +30,22 @@ classifiers = [ [tool.poetry.dependencies] python = ">=3.9" -# TODO: uncomment -# FIXME: update bin-package names once those stabilize -# experimental_pagefind_python_bin = { version = "*", optional = true } -# experimental_pagefind_python_bin_extended = { version = "*", optional = true } + +# during the building of the pagefind_python package, the pagefind binary packages +# aren't yet published. Thus, `poetry lock` will fail if we include them here. +# However, `poetry build` fails to include the binary package extras in +# `pagefind_python`'s distribution info if these lines are commented out. Thus, +# we temporarily uncomment these lines during the build process, and then re-comment +# them afterwards + +# [[[cog +# version = open("pagefind_version.txt").read().strip() +# print(f"# pagefind_bin = {{ version = \"~={version}\", optional = true }} #!!opt") +# print(f"# pagefind_bin_extended = {{ version = \"~={version}\", optional = true }} #!!opt") +# ]]] +# pagefind_bin = { version = "~=1.1.0", optional = true } #!!opt +# pagefind_bin_extended = { version = "~=1.1.0", optional = true } #!!opt +# [[[end]]] [tool.poetry.extras] bin = ["pagefind_bin"] @@ -33,6 +56,8 @@ ruff = "^0.5.0" mypy = "^1.10.1" wheel = "^0.43.0" cogapp = "^3.4.1" +twine = "^5.1.1" +docutils = "^0.21.2" [build-system] requires = ["poetry-core"] diff --git a/wrappers/python/scripts/__init__.py b/wrappers/python/scripts/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/wrappers/python/scripts/build/__init__.py b/wrappers/python/scripts/build/__init__.py new file mode 100644 index 00000000..b7066541 --- /dev/null +++ b/wrappers/python/scripts/build/__init__.py @@ -0,0 +1,16 @@ +import logging +import os +from pathlib import Path + +this_file = Path(__file__) +this_dir = Path(__file__).parent +python_root = this_dir.parent.parent.resolve().absolute() +upstream_version_file = python_root / "pagefind_version.txt" +dist_dir = python_root / "dist" +vendor_dir = python_root / "vendor" + + +def setup_logging() -> None: + logging.basicConfig( + level=os.environ.get("PAGEFIND_PYTHON_LOG_LEVEL") or logging.INFO + ) diff --git a/wrappers/python/scripts/build/all.py b/wrappers/python/scripts/build/all.py new file mode 100644 index 00000000..2dc5358d --- /dev/null +++ b/wrappers/python/scripts/build/all.py @@ -0,0 +1,70 @@ +import tarfile +import tempfile +from pathlib import Path +from typing import List + +from . import dist_dir, setup_logging +from .binary_only_wheel import ( + LLVM_TRIPLES_TO_PYTHON_WHEEL_PLATFORMS, + write_pagefind_bin_only_wheel, +) +from .get_pagefind_release import download + +__candidates = ( + "pagefind", + "pagefind.exe", + "pagefind_extended", + "pagefind_extended.exe", +) + + +def find_bin(dir: Path) -> Path: + for file in dir.iterdir(): + if file.is_file() and file.name in __candidates: + return file + raise FileNotFoundError(f"Could not find any of {__candidates} in {dir}") + + +def get_llvm_triple(tar_gz: Path) -> str: + assert tar_gz.name.endswith(".tar.gz") + # parse the llvm triple from the archive name + llvm_triple = tar_gz.name + llvm_triple = llvm_triple.removesuffix(".tar.gz") + llvm_triple = llvm_triple.removeprefix(f"pagefind-{tag_name}-") + llvm_triple = llvm_triple.removeprefix(f"pagefind_extended-{tag_name}-") + return llvm_triple + + +def check_platforms(certified: List[Path]) -> None: + for compressed_archive in certified: + llvm_triple = get_llvm_triple(compressed_archive) + platform = LLVM_TRIPLES_TO_PYTHON_WHEEL_PLATFORMS.get(llvm_triple) + if platform is None: + raise ValueError(f"Unsupported platform: {llvm_triple}") + + +if __name__ == "__main__": + setup_logging() + certified, tag_name = download("latest", dry_run=False) + # create a temp directory to hold the extracted binaries + check_platforms(certified) + dist_dir.mkdir(exist_ok=True) + for tar_gz in certified: + llvm_triple = get_llvm_triple(tar_gz) + platform = LLVM_TRIPLES_TO_PYTHON_WHEEL_PLATFORMS.get(llvm_triple) + if platform is None: + raise ValueError(f"Unsupported platform: {llvm_triple}") + + # FIXME: avoid writing the extracted bin to disk + # unpack the tar.gz archive + name = tar_gz.name.removesuffix(".tar.gz") + with tempfile.TemporaryDirectory(prefix=name + "~") as _temp_dir: + temp_dir = Path(_temp_dir) + with tarfile.open(tar_gz, "r:gz") as tar: + tar.extractall(_temp_dir) + write_pagefind_bin_only_wheel( + executable=find_bin(temp_dir), + output_dir=dist_dir, + version=tag_name.removeprefix("v"), + platform=platform, + ) diff --git a/wrappers/python/scripts/build/api_package.py b/wrappers/python/scripts/build/api_package.py new file mode 100644 index 00000000..0473ecfc --- /dev/null +++ b/wrappers/python/scripts/build/api_package.py @@ -0,0 +1,28 @@ +# HACK: This script is a hack to build the API package without using poetry to lock the +# optional dependencies. It might be preferable to use setuptools directly rather than +# work around poetry. + +from . import python_root, setup_logging +import subprocess + +pyproject_toml = python_root / "pyproject.toml" + + +def main() -> None: + original = pyproject_toml.read_text() + temp = "" + for line in original.splitlines(): + if line.endswith("#!!opt"): + temp += line.removeprefix("# ") + "\n" + else: + temp += line + "\n" + with pyproject_toml.open("w") as f: + f.write(temp) + subprocess.run(["poetry", "build"], check=True) + with pyproject_toml.open("w") as f: + f.write(original) + + +if __name__ == "__main__": + setup_logging() + main() diff --git a/wrappers/python/scripts/build/binary_only_wheel.py b/wrappers/python/scripts/build/binary_only_wheel.py new file mode 100644 index 00000000..faec258d --- /dev/null +++ b/wrappers/python/scripts/build/binary_only_wheel.py @@ -0,0 +1,254 @@ +#!/usr/bin/env python3 +# Adapted from https://github.com/ziglang/zig-pypi/blob/a0ca0d8b2d5104498f4eececff09ed2b1ede2d0b/make_wheels.py +# See also https://simonwillison.net/2022/May/23/bundling-binary-tools-in-python-wheels/ +# +# Note that this script assumes that the relevant files are on disk and either +# the files hashes have been verified or we trust the files. +import argparse +import logging +from email.message import EmailMessage +from pathlib import Path +from typing import Any, Dict, List, Mapping, Optional, Tuple, Union +from zipfile import ZIP_DEFLATED, ZipInfo + +import wheel # type: ignore +import wheel.wheelfile # type: ignore + +from . import python_root, setup_logging + +log = logging.getLogger(__name__) +# constants +HOMEPAGE = "https://pagefind.app" +REPO = "https://github.com/CloudCannon/pagefind/" +REQUIRED_PYTHON_VERSION = "~=3.9" + + +src_dir = python_root / "src" / "pagefind_python_bin" +assert src_dir.is_dir(), f"{src_dir} is not a directory" + + +# as of the time of writing, these are the supported platforms: +# See https://doc.rust-lang.org/nightly/rustc/platform-support.html +# wheel name format: {dist}-{version}(-{build})?-{python}-{abi}-{platform}.whl +# this dict helps look up the last part of the wheel name: ^^^^^^^^^^ +LLVM_TRIPLES_TO_PYTHON_WHEEL_PLATFORMS = { + # LLVM triple: Python platform + # only the LLVM triples that are produced in CI are listed here; see + # https://github.com/CloudCannon/pagefind/releases/latest + # the python platform mapping is copied from zig-pypi's script. + # See also: https://github.com/PyO3/maturin/blob/main/src/auditwheel/manylinux-policy.json + # See also: https://github.com/PyO3/maturin/blob/main/src/auditwheel/musllinux-policy.json + # TODO: check the python platforms are correct. + "aarch64-apple-darwin": "macosx_12_0_arm64", + "aarch64-unknown-linux-musl": "manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64", + "x86_64-apple-darwin": "macosx_12_0_arm64", + "x86_64-pc-windows-msvc": "win_amd64", + "x86_64-unknown-linux-musl": "manylinux_2_12_x86_64.manylinux2010_x86_64.musllinux_1_1_x86_64", +} + + +def as_zip_info(file: Path, *, alias: str) -> Tuple[ZipInfo, bytes]: + zip_info = ZipInfo(alias or file.name, (1980, 1, 1, 0, 0, 0)) + zip_info.external_attr = file.stat().st_mode << 16 + with file.open("rb") as f: + data = f.read() + zip_info.file_size = len(data) + return zip_info, data + + +class ReproducibleWheelFile(wheel.wheelfile.WheelFile): # type: ignore + def writestr( + self, + zip_info_or_arc_name: Union[ZipInfo, str], + data: Any, + *args: Any, + **kwargs: Any, + ) -> None: + if isinstance(zip_info_or_arc_name, ZipInfo): + zip_info = zip_info_or_arc_name + else: + assert isinstance(zip_info_or_arc_name, str) + zip_info = ZipInfo(zip_info_or_arc_name) + zip_info.file_size = len(data) + zip_info.external_attr = 0o0644 << 16 + if zip_info_or_arc_name.endswith(".dist-info/RECORD"): + zip_info.external_attr = 0o0664 << 16 + + zip_info.compress_type = ZIP_DEFLATED + zip_info.date_time = (1980, 1, 1, 0, 0, 0) + zip_info.create_system = 3 + wheel.wheelfile.WheelFile.writestr(self, zip_info, data, *args, **kwargs) + + +def make_message( + headers: Dict[str, Union[str, List[str]]], + payload: Optional[Union[str, bytes]] = None, +) -> EmailMessage: + msg = EmailMessage() + for name, value in headers.items(): + if isinstance(value, list): + for value_part in value: + msg[name] = value_part + else: + msg[name] = value + if payload: + msg.set_payload(payload) + return msg + + +def write_wheel_file( + filename: Path, + contents: Mapping[ + Union[str, ZipInfo], Union[str, bytes, EmailMessage, ZipInfo, Path] + ], +) -> Path: + with ReproducibleWheelFile(filename, "w") as wheel: + for member_info, member_source in contents.items(): + if isinstance(member_source, str): + data = member_source.encode("utf-8") + elif isinstance(member_source, bytes): + data = member_source + elif isinstance(member_source, EmailMessage): + data = member_source.as_bytes( + policy=member_source.policy.clone(linesep="\n"), unixfrom=False + ) + elif isinstance(member_source, Path): + assert type(member_info) is str + member_info, data = as_zip_info(member_source, alias=member_info) + else: + raise ValueError(f"unexpected content: {type(member_source)}") + wheel.writestr(member_info, data) + return filename + + +def write_wheel( + out_dir: Path, + *, + name: str, + version: str, + tag: str, + metadata: Dict[str, Any], + description: str, + contents: Mapping[ + Union[str, ZipInfo], Union[str, bytes, EmailMessage, ZipInfo, Path] + ], +) -> Path: + wheel_name = f"{name}-{version}-{tag}.whl" + dist_info = f"{name}-{version}.dist-info" + return write_wheel_file( + (out_dir / wheel_name), + { + **contents, + f"{dist_info}/METADATA": make_message( + { + # see https://packaging.python.org/en/latest/specifications/core-metadata/ + "Metadata-Version": "2.1", + "Name": name, + "Version": version, + **metadata, + }, + description, + ), + f"{dist_info}/WHEEL": make_message( + { + "Wheel-Version": "1.0", + "Generator": "build_binary_only_wheel.py", + "Root-Is-Purelib": "false", # see https://packaging.python.org/en/latest/specifications/binary-distribution-format/#what-s-the-deal-with-purelib-vs-platlib + "Tag": tag, + } + ), + }, + ) + + +def write_pagefind_bin_only_wheel( + *, + executable: Path, + output_dir: Path, + version: str, + platform: str, +) -> Path: + # FIXME: update when package support is stabilized + name = "pagefind_bin" + if "extended" in executable.name: + name += "_extended" + contents: Mapping[Union[str, ZipInfo], Path] = { + f"{name}/__init__.py": (src_dir / "__init__.py"), + f"{name}/__main__.py": (src_dir / "__main__.py"), + f"{name}/{executable.name}": executable, + } + + # Load in static files + with (src_dir / "README.md").open() as f: + description = f.read().replace("pagefind_bin", name) + + return write_wheel( + output_dir, + name=name, + version=version, + tag=f"py3-none-{platform}", + metadata={ + "Summary": "Pagefind is a library for performant, low-bandwidth, fully static search.", + "Description-Content-Type": "text/markdown", + "License": "MIT", + "Author": "CloudCannon", + "Classifier": [ + "License :: OSI Approved :: MIT License", + "Development Status :: 3 - Alpha", # FIXME: update when package name stabilized + "Intended Audience :: Developers", + ], + "Project-URL": [ + f"Homepage, {HOMEPAGE}", + f"Source Code, {REPO}", + f"Bug Tracker, {REPO}/issues", + ], + "Requires-Python": REQUIRED_PYTHON_VERSION, + }, + description=description, + contents=contents, + ) + + +def get_arg_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + prog=__file__, description="Repackage Pagefind binaries as Python wheels" + ) + parser.add_argument( + "--version", + default=None, + help="version to package", + ) + parser.add_argument("--suffix", default="", help="wheel version suffix") + parser.add_argument("--bin-path", help="path to the binary to embed", required=True) + parser.add_argument( + "--output-dir", + default="dist/", + help="Output directory in which to place the built wheel", + ) + parser.add_argument( + "--llvm-triple", + required=True, + choices=list(LLVM_TRIPLES_TO_PYTHON_WHEEL_PLATFORMS.keys()), + help="platform to build for", + ) + return parser + + +def main() -> None: + setup_logging() + args = get_arg_parser().parse_args() + platform = LLVM_TRIPLES_TO_PYTHON_WHEEL_PLATFORMS.get(args.llvm_triple) + if platform is None: + raise ValueError(f"Unsupported platform: {args.llvm_triple}") + + logging.getLogger(wheel.__name__).setLevel(logging.WARNING) + write_pagefind_bin_only_wheel( + output_dir=Path(args.output_dir), + executable=Path(args.bin_path), + version=args.version, + platform=platform, + ) + + +if __name__ == "__main__": + main() diff --git a/wrappers/python/scripts/build/download_verification.py b/wrappers/python/scripts/build/download_verification.py new file mode 100644 index 00000000..fe324f2e --- /dev/null +++ b/wrappers/python/scripts/build/download_verification.py @@ -0,0 +1,38 @@ +import hashlib +import logging +from pathlib import Path +from typing import Dict, List + +log = logging.getLogger(__name__) + + +def verify_hashes(version_vendor_dir: Path, name_to_hash: Dict[str, str]) -> List[Path]: + verified = [] + assert ( + version_vendor_dir.is_dir() + ), f"{version_vendor_dir} is not a directory; pwd={Path.cwd()}" + for name, hash_name in name_to_hash.items(): + to_verify = version_vendor_dir / name + hash_file = version_vendor_dir / hash_name + + assert hash_name.endswith(".sha256"), f"{hash_name} does not end with .sha256" + assert to_verify.is_file(), f"{to_verify} is not a file" + assert hash_file.is_file(), f"{hash_file} is not a file" + + with hash_file.open() as f: + expected_hash, expected_name = f.read().strip().split() + expected_name = expected_name.removeprefix("*") + with to_verify.open("rb") as f: + actual_hash = hashlib.sha256(f.read()).hexdigest() + if name != expected_name: + raise ValueError( + f"name mismatch: actual {to_verify.name} != expected {expected_name}" + ) + if actual_hash != expected_hash: + raise ValueError( + f"hash mismatch: actual {actual_hash} != expected {expected_hash}" + ) + else: + verified.append(to_verify) + log.info(f"hash {actual_hash} verified for {name}") + return verified diff --git a/wrappers/python/scripts/build/get_pagefind_release.py b/wrappers/python/scripts/build/get_pagefind_release.py new file mode 100644 index 00000000..6295519a --- /dev/null +++ b/wrappers/python/scripts/build/get_pagefind_release.py @@ -0,0 +1,83 @@ +import json +import logging +import os +import sys +from pathlib import Path +from typing import Any, Dict, List, Tuple, Union +from urllib.request import urlopen + +from . import vendor_dir, upstream_version_file +from .download_verification import verify_hashes + +log = logging.getLogger(__name__) +logging.basicConfig(level=os.environ.get("PAGEFIND_PYTHON_LOG_LEVEL") or logging.INFO) + +if sys.argv[1:]: + version = sys.argv[1] +elif "PAGEFIND_VERSION" in os.environ: + version = os.environ["PAGEFIND_VERSION"] +else: + version = "latest" + + +def get_version_downloads( + version: str, +) -> tuple[ + List[str], # urls + Dict[str, str], # file: hash_file mapping + str, # tag_name +]: + url = f"https://api.github.com/repos/CloudCannon/pagefind/releases/{version}" + response = urlopen(url) + data = json.loads(response.read()) + all_assets: Dict[str, Dict[str, Any]] = dict() + for asset in data["assets"]: + all_assets[asset["name"]] = asset + tag_name = data["tag_name"] + + files: Dict[str, str] = dict() + urls = [] + for name in all_assets: + if name.endswith(".sha256"): + name = name + file_name = name.removesuffix(".sha256") + files[file_name] = name + urls.append(all_assets[name]["browser_download_url"]) + urls.append(all_assets[file_name]["browser_download_url"]) + + return urls, files, tag_name + + +def download( + version: Union[str, None] = None, dry_run: bool = True +) -> Tuple[List[Path], str]: + urls, files, tag_name = get_version_downloads(version or "latest") + target_dir = vendor_dir / tag_name + if dry_run: + log.info(f"would download {len(urls)} assets to {target_dir}") + for url in urls: + log.info(f" - {url}") + return [], tag_name + target_dir.mkdir(parents=True, exist_ok=True) + log.info(f"downloading {len(urls)} assets to {target_dir}") + for i, url in enumerate(urls): + name = url.split("/")[-1] + with urlopen(url) as response: + target_file = target_dir / name + with target_file.open("wb") as local_artifact: + local_artifact.write(response.read()) + log.info(f"{i}/{len(urls)} downloaded {name} to {target_file}") + log.info(f"downloaded {len(urls)} assets to {target_dir}") + with (target_dir / "files.json").open("w") as files_json: + json.dump(files, files_json) + certified = verify_hashes(target_dir, files) + return certified, tag_name + + +if __name__ == "__main__": + _urls, _files, tag_name = get_version_downloads("latest") + version = tag_name.removeprefix("v") + with upstream_version_file.open("w") as f: + f.write(version + "\n") + # to avoid IDEs adding a trailing newline and causing a diff, we add one here. + print(version) diff --git a/wrappers/python/scripts/ci/cog/check.sh b/wrappers/python/scripts/ci/cog/check.sh new file mode 100755 index 00000000..532c8008 --- /dev/null +++ b/wrappers/python/scripts/ci/cog/check.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +set -eu +if [[ "${BASH_SOURCE[0]}" = */* ]]; then this_dir="${BASH_SOURCE[0]%/*}"; # bash +else this_dir=.; +fi +# shellcheck source=./files.sh +. "$this_dir"/files.sh +cog -PUe --check "${files_to_cog[@]}" + diff --git a/wrappers/python/scripts/ci/cog/files.sh b/wrappers/python/scripts/ci/cog/files.sh new file mode 100755 index 00000000..4fe33c84 --- /dev/null +++ b/wrappers/python/scripts/ci/cog/files.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +export files_to_cog=( + README.md + src/pagefind_python/__init__.py + pyproject.toml +) +# you can check this list by running `rg -l '\[\[\[cog' ./` in the repo root diff --git a/wrappers/python/scripts/ci/cog/update.sh b/wrappers/python/scripts/ci/cog/update.sh new file mode 100755 index 00000000..42ae347a --- /dev/null +++ b/wrappers/python/scripts/ci/cog/update.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# shellcheck disable=SC2296 +if [[ "${BASH_SOURCE[0]}" = */* ]]; then this_dir="${BASH_SOURCE[0]%/*}"; # bash +else this_dir=.; +fi +# shellcheck source=./files.sh +. "$this_dir"/files.sh + +cog -PUre "${files_to_cog[@]}" diff --git a/wrappers/python/scripts/ci/github/install_dev_dependencies.sh b/wrappers/python/scripts/ci/github/install_dev_dependencies.sh new file mode 100755 index 00000000..b171ceb4 --- /dev/null +++ b/wrappers/python/scripts/ci/github/install_dev_dependencies.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash +set -eu +python3 -m poetry install --only=dev --no-root +export VIRTUAL_ENV=$PWD/.venv +echo "VIRTUAL_ENV=$VIRTUAL_ENV" >> "$GITHUB_ENV" +echo "PATH=$VIRTUAL_ENV/bin:$PATH" >> "$GITHUB_ENV" diff --git a/wrappers/python/scripts/ci/github/integration_tests.sh b/wrappers/python/scripts/ci/github/integration_tests.sh new file mode 100755 index 00000000..9a86c458 --- /dev/null +++ b/wrappers/python/scripts/ci/github/integration_tests.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +set -eu +# ensure pagefind is not installed +if command -v pagefind; then + exit 1 +fi +# ensure pagefind_python is not installed in the current python environment +if python3 -c "import pagefind_python"; then + echo "dirty python environment: unexpectedly found pagefind_python" + exit 1 +fi + +# use pagefind installed from the officially-maintained node.js channel +pagefind_version="$(cat ./pagefind_version.txt)" #~ +npm i "pagefind@$pagefind_version" +_prev_path="$PATH" +export PATH="$PWD/node_modules/.bin:$PATH" + + +# remove_src_from_pythonpath=" +# import os +# import sys +# from pathlib import Path +# repo_root = Path(os.getcwd()) +# src = repo_root / 'src' +# sys.path.remove(str(src)) +# " +_get_executable=' +import logging +import os +from pagefind_python.service import get_executable + +logging.basicConfig(level=os.environ.get("PAGEFIND_PYTHON_LOG_LEVEL", "INFO")) +print(get_executable()) +' +export PAGEFIND_PYTHON_LOG_LEVEL=DEBUG + + +python3 -m pip install \ + --no-index --find-links=dist \ + --only-binary :all: \ + pagefind_python + +python3 -c "$_get_executable" +python3 -m pagefind_python --help +echo "starting integration tests using system pagefind" +python3 src/tests/integration.py + +# remove the externally installed pagefind binary +rm -rf node_modules output +export PATH="$_prev_path" +if command -v pagefind; then + echo "dirty PATH: unexpectedly found pagefind" + exit 1 +fi + +python3 -m pip install \ + --no-index --find-links=dist \ + --only-binary :all: \ + 'pagefind_python[bin]' + +python3 -c "$_get_executable" +python3 -m pagefind_python --help +echo "starting integration tests using pagefind_bin python module" +python3 src/tests/integration.py + diff --git a/wrappers/python/scripts/ci/github/scrape_upstream_version.sh b/wrappers/python/scripts/ci/github/scrape_upstream_version.sh new file mode 100755 index 00000000..5dcfec44 --- /dev/null +++ b/wrappers/python/scripts/ci/github/scrape_upstream_version.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +# fetch the current version of the pagefind executable +# see https://simonwillison.net/2020/Oct/9/git-scraping/ +set -eu +export PATH="$PWD/.venv/bin:$PATH" +file="pagefind_version.txt" + +python3 -m scripts.build.get_pagefind_release +pagefind_version=""; pagefind_version=$(cat ./"$file") + +if ! git --no-pager diff --exit-code -- "$file"; then # there's a new version + ./scripts/ci/cog/update.sh # note that $PWD is the repo root + git add -u + git config user.name "Automated" + git config user.email "actions@users.noreply.github.com" + git commit -m "chore: update pagefind binary to $pagefind_version" + git tag "bin/$pagefind_version" + git push + git push --tags --follow-tags +fi diff --git a/wrappers/python/scripts/ci/github/setup_poetry.sh b/wrappers/python/scripts/ci/github/setup_poetry.sh new file mode 100755 index 00000000..9a244ef4 --- /dev/null +++ b/wrappers/python/scripts/ci/github/setup_poetry.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +set -eu +python3 -m pip install poetry +command -v poetry || true # <- debugging: check if poetry is installed on $PATH + +# not using pipx since this is a CI environment that will be reset -- +# there's not much risk of poetry's dependencies conflicting with ours + +# python3 -m pip install pipx +# python3 -m pipx install poetry diff --git a/wrappers/python/scripts/ci/python_lints.sh b/wrappers/python/scripts/ci/python_lints.sh new file mode 100755 index 00000000..eaad2b7e --- /dev/null +++ b/wrappers/python/scripts/ci/python_lints.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash +set -eu +mypy src scripts +ruff check +ruff format --check diff --git a/wrappers/python/scripts/ci/shellcheck.sh b/wrappers/python/scripts/ci/shellcheck.sh new file mode 100644 index 00000000..bba03767 --- /dev/null +++ b/wrappers/python/scripts/ci/shellcheck.sh @@ -0,0 +1,3 @@ +#!/bin/sh +set -eu +shellcheck diff --git a/wrappers/python/scripts/publish_to_test_pypi.sh b/wrappers/python/scripts/publish_to_test_pypi.sh new file mode 100755 index 00000000..df21f202 --- /dev/null +++ b/wrappers/python/scripts/publish_to_test_pypi.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash +export TWINE_REPOSITORY=testpypi +export TWINE_USERNAME=__token__ +export TWINE_PASSWORD="${TEST_PYPI_API_TOKEN:?missing TEST_PYPI_API_TOKEN}" +python3 -m twine upload --verbose ./dist/* diff --git a/wrappers/python/src/pagefind/__init__.py b/wrappers/python/src/pagefind/__init__.py index 0a64f367..f7c09204 100644 --- a/wrappers/python/src/pagefind/__init__.py +++ b/wrappers/python/src/pagefind/__init__.py @@ -1,7 +1,18 @@ #!/usr/bin/env python3 -# assume the python version is >= 3.8, which is the oldest LTS version as of -# the time of writing, 2024-06-29 +# assume the python version is >= 3.9, which is the oldest LTS version with +# more 2 months of life as of the time of writing, 2024-08-18 # https://docs.python.org/3/reference/datamodel.html#async-context-managers # https://docs.python.org/3/library/contextlib.html#contextlib.asynccontextmanager + +# [[[cog +# import tomllib # ok since the development environment must be python >= 3.11 +# from pathlib import Path +# pyproject = Path("pyproject.toml") # note the CWD is the project root +# assert pyproject.is_file(), f"expected {pyproject.absolute()} to be a file" +# version = tomllib.load(pyproject.open("rb"))["tool"]["poetry"]["version"] +# print(f'__version__ = "{version}"') +# ]]] +__version__ = "0.0.0a0" +# [[[end]]] diff --git a/wrappers/python/src/pagefind/__main__.py b/wrappers/python/src/pagefind/__main__.py index 31f85556..a3910ba8 100644 --- a/wrappers/python/src/pagefind/__main__.py +++ b/wrappers/python/src/pagefind/__main__.py @@ -1,8 +1,9 @@ import os import sys -from .service import _must_find_binary -bin = _must_find_binary().absolute() +from .service import _must_get_executable + +bin = str(_must_get_executable().resolve().absolute()) argv = [bin, *sys.argv[1:]] if os.name == "posix": os.execv(bin, argv) diff --git a/wrappers/python/src/pagefind/index/__init__.py b/wrappers/python/src/pagefind/index/__init__.py index c534ea76..8bcc3cc8 100644 --- a/wrappers/python/src/pagefind/index/__init__.py +++ b/wrappers/python/src/pagefind/index/__init__.py @@ -1,20 +1,20 @@ -from typing import Any, Dict, List, Optional, Sequence, TypedDict, cast import logging +from typing import Any, Dict, List, Optional, Sequence, TypedDict, cast + +from ..service import PagefindService from ..service.types import ( + InternalAddDirRequest, InternalAddFileRequest, InternalAddRecordRequest, InternalDeleteIndexRequest, InternalGetFilesRequest, InternalGetFilesResponse, - InternalIndexedFileResponse, - InternalAddDirRequest, InternalIndexedDirResponse, + InternalIndexedFileResponse, InternalSyntheticFile, InternalWriteFilesRequest, ) -from ..service import PagefindService - log = logging.getLogger(__name__) @@ -117,7 +117,7 @@ async def get_files(self) -> List[InternalSyntheticFile]: result = cast(InternalGetFilesResponse, response)["files"] return result - async def delete_index(self): + async def delete_index(self) -> None: assert self._service is not None assert self._index_id is not None result = await self._service.send( @@ -163,7 +163,7 @@ async def add_custom_record( assert result["type"] == "IndexedFile" return cast(InternalIndexedFileResponse, result) - async def write_files(self): + async def write_files(self) -> None: assert self._service is not None assert self._index_id is not None if not self.config: diff --git a/wrappers/python/src/pagefind/service/__init__.py b/wrappers/python/src/pagefind/service/__init__.py index 87e286d2..8302d7e2 100644 --- a/wrappers/python/src/pagefind/service/__init__.py +++ b/wrappers/python/src/pagefind/service/__init__.py @@ -1,23 +1,23 @@ -import os - -from pathlib import Path -import json -from contextlib import AbstractAsyncContextManager -from typing import Any, Dict, Optional, cast, TYPE_CHECKING import asyncio import base64 +import json import logging +import os import shutil +from contextlib import AbstractAsyncContextManager +from pathlib import Path +from typing import TYPE_CHECKING, Any, Dict, List, Optional, cast from .types import ( InternalNewIndexRequest, InternalNewIndexResponse, InternalRequestPayload, - InternalResponsePayload, InternalResponseError, + InternalResponsePayload, + InternalResponseType, InternalServiceRequest, InternalServiceResponse, - InternalResponseType, + InternalSyntheticFile, ) if TYPE_CHECKING: @@ -26,30 +26,42 @@ log = logging.getLogger(__name__) -def _must_find_binary() -> Path: +__all__ = ["PagefindService", "get_executable"] + + +def get_executable() -> Optional[Path]: try: from pagefind_bin_extended import get_executable # type: ignore - executable: Path = get_executable() - log.debug(f"using {executable}") - return executable + extended: Path = get_executable() + log.debug(f"using {extended}") + return extended except ImportError: log.debug("unable to import pagefind_bin_extended") try: from pagefind_bin import get_executable # type: ignore - executable: Path = get_executable() - log.debug(f"using {executable}") - return executable + bin: Path = get_executable() + log.debug(f"using {bin}") + return bin except ImportError: log.debug("unable to import pagefind_bin") - exe: Optional[str] = shutil.which("pagefind_extended") or shutil.which("pagefind") - if exe is None: - raise FileNotFoundError("Could not find pagefind binary") + external: Optional[str] = shutil.which("pagefind_extended") + external = external or shutil.which("pagefind") + if external is None: + log.debug("Could not find externally-installed pagefind binary") + return None else: - return Path(exe) + log.debug(f"using {external}") + return Path(external) + + +def _must_get_executable() -> Path: + if (bin := get_executable()) is None: + raise FileNotFoundError("Could not find pagefind binary") + return bin def _encode(req: InternalServiceRequest) -> bytes: @@ -65,9 +77,9 @@ class PagefindService(AbstractAsyncContextManager["PagefindService"]): _poll_task: asyncio.Task[None] # _messages - def __init__(self): + def __init__(self) -> None: self._loop = asyncio.get_event_loop() - self._bin = _must_find_binary() + self._bin = _must_get_executable() self._responses = dict() async def launch(self) -> "PagefindService": @@ -79,7 +91,7 @@ async def launch(self) -> "PagefindService": self._backend = await asyncio.create_subprocess_exec( self._bin, "--service", - "--verbose", + # "--verbose", # <- verbose emits debug logs to stdout, which is also used for IPC cwd=os.getcwd(), stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE, @@ -116,7 +128,13 @@ async def send(self, payload: InternalRequestPayload) -> InternalResponsePayload await self._backend.stdin.drain() log.debug(f"request sent: {req}") result = await future - log.debug(f"received response: {result}") + if result["type"] == InternalResponseType.GET_FILES.value: # these are HUGE + if (files := result.get("files")) is not None: + files = cast(List[InternalSyntheticFile], files) + base64_ch = sum(len(file["content"]) for file in files) + log.debug(f"received response: <{len(files)} files, {base64_ch} chars>") + else: + log.debug(f"received response: {result}") return result async def _wait_for_responses(self) -> None: @@ -128,11 +146,11 @@ async def _wait_for_responses(self) -> None: assert self._backend.stdout is not None log.debug("checking for data") output = await self._backend.stdout.readuntil(b",") - if len(output) <= 100: - log.debug(f"received data: {output}") + if len(output) <= 200: + log.debug(f"received data: {output!r}") else: log.debug( - f"received data: {output[:30]}...{len(output) - 40}B...{output[-10:]}" + f"received data: {output[:30]!r}...{len(output) - 40}B...{output[-10:]!r}" ) if (resp := json.loads(base64.b64decode(output[:-1]))) is None: continue @@ -160,7 +178,7 @@ async def _wait_for_responses(self) -> None: # payload["type"] == InternalResponseType.ERROR.value # ), f"unexpected message type: {payload['type']}" - async def close(self): + async def close(self) -> None: # wait for all _responses to be resolved await asyncio.gather(*self._responses.values()) # IDEA: add timeout? self._poll_task.cancel() diff --git a/wrappers/python/src/pagefind/service/types.py b/wrappers/python/src/pagefind/service/types.py index 53d061b7..fa76aec7 100644 --- a/wrappers/python/src/pagefind/service/types.py +++ b/wrappers/python/src/pagefind/service/types.py @@ -1,8 +1,8 @@ -from enum import StrEnum -from typing import Dict, List, Union, Optional, TypedDict, Sequence, Literal +from enum import Enum +from typing import Dict, List, Literal, Optional, Sequence, TypedDict, Union -class InternalRequestType(StrEnum): +class InternalRequestType(Enum): NEW_INDEX = "NewIndex" ADD_FILE = "AddFile" ADD_RECORD = "AddRecord" @@ -90,7 +90,7 @@ class InternalServiceRequest(TypedDict): payload: InternalRequestPayload -class InternalResponseType(StrEnum): +class InternalResponseType(Enum): NEW_INDEX = "NewIndex" INDEXED_FILE = "IndexedFile" INDEXED_DIR = "IndexedDir" diff --git a/wrappers/python/src/pagefind_bin/README.md b/wrappers/python/src/pagefind_python_bin/README.md similarity index 65% rename from wrappers/python/src/pagefind_bin/README.md rename to wrappers/python/src/pagefind_python_bin/README.md index 8652aaa8..369f7441 100644 --- a/wrappers/python/src/pagefind_bin/README.md +++ b/wrappers/python/src/pagefind_python_bin/README.md @@ -1,13 +1,16 @@ # `pagefind_bin` -A python wrapper for the pagefind binary. +A python wrapper for the `pagefind` executable. ## Usage ```py +#!/usr/bin/env python3 from pagefind_bin import get_executable -print(get_executable()) # yields absolute path to the binary +print(get_executable()) # yields absolute path to the executable ``` + ```sh +#!/usr/bin/env bash python3 -m pagefind_bin --help ``` diff --git a/wrappers/python/src/pagefind_bin/__init__.py b/wrappers/python/src/pagefind_python_bin/__init__.py similarity index 89% rename from wrappers/python/src/pagefind_bin/__init__.py rename to wrappers/python/src/pagefind_python_bin/__init__.py index 21cbe924..0dd7b0b5 100644 --- a/wrappers/python/src/pagefind_bin/__init__.py +++ b/wrappers/python/src/pagefind_python_bin/__init__.py @@ -1,9 +1,9 @@ import logging -from pathlib import Path -import platform -from typing import List import os +import platform import sys +from pathlib import Path +from typing import List __all__ = ["get_executable", "cli"] @@ -15,7 +15,7 @@ def get_candidate_paths() -> List[Path]: names = ["pagefind_extended", "pagefind"] extensions = [""] - if platform.system().lower() == "Windows": + if platform.system().lower() == "windows": extensions.append(".exe") bin_names = [n + ext for n in names for ext in extensions] paths = [this_dir / bin for bin in bin_names] @@ -36,8 +36,8 @@ def get_executable() -> Path: raise FileNotFoundError(f"Could not find any of {candidates}") -def cli(): - bin = get_executable().absolute() +def cli() -> None: + bin = str(get_executable().absolute()) argv = [bin, *sys.argv[1:]] if os.name == "posix": os.execv(bin, argv) @@ -45,7 +45,3 @@ def cli(): import subprocess sys.exit(subprocess.call(argv)) - - -if __name__ == "__main__": - cli() diff --git a/wrappers/python/src/pagefind_python_bin/__main__.py b/wrappers/python/src/pagefind_python_bin/__main__.py new file mode 100644 index 00000000..bae58a86 --- /dev/null +++ b/wrappers/python/src/pagefind_python_bin/__main__.py @@ -0,0 +1,3 @@ +from . import cli + +cli() From 6cdcdf2000628876c568f9fc934deeb135c62a9e Mon Sep 17 00:00:00 2001 From: Steven Kalt Date: Sat, 24 Aug 2024 11:49:32 -0400 Subject: [PATCH 06/39] build(wrappers/python): add python build scripts for GH Actions --- .github/workflows/release.yml | 62 ++ .github/workflows/test.yml | 38 + wrappers/python/README.md | 2 +- wrappers/python/build_binary_only_wheel.py | 239 ------ wrappers/python/poetry.lock | 762 ++++++++++++++++-- wrappers/python/pyproject.toml | 23 +- wrappers/python/scripts/build/__init__.py | 1 - wrappers/python/scripts/build/all.py | 41 +- wrappers/python/scripts/build/api_package.py | 11 +- .../python/scripts/build/binary_only_wheel.py | 2 +- .../scripts/build/get_pagefind_release.py | 17 +- wrappers/python/scripts/ci/cog/files.sh | 3 +- wrappers/python/scripts/ci/github/README.md | 1 + .../scripts/ci/github/debug_python_paths.sh | 12 + .../ci/github/install_dev_dependencies.sh | 5 +- .../scripts/ci/github/integration_tests.sh | 63 +- .../ci/github/scrape_upstream_version.sh | 20 - .../python/scripts/ci/github/setup_poetry.sh | 1 - .../python/src/pagefind/service/__init__.py | 7 +- .../python/src/pagefind_python_bin/README.md | 4 +- 20 files changed, 908 insertions(+), 406 deletions(-) delete mode 100644 wrappers/python/build_binary_only_wheel.py create mode 100644 wrappers/python/scripts/ci/github/README.md create mode 100644 wrappers/python/scripts/ci/github/debug_python_paths.sh delete mode 100755 wrappers/python/scripts/ci/github/scrape_upstream_version.sh diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 4479650f..01dbd2d2 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -87,6 +87,67 @@ jobs: env: CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} + publish-pypi-packages: + name: Publish PyPi packages + runs-on: ubuntu-latest # ok since none of the scripts depend on version-specific features + defaults: + run: + shell: bash + needs: publish-github-release + steps: + - name: Clone + uses: actions/checkout@v4 + - name: Download CLI binaries + uses: actions/download-artifact@v4 + with: + pattern: release-* + merge-multiple: true + path: ./wrappers/python/vendor + - name: Set up python 3.12 + uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Set up poetry + run: bash ./wrappers/python/scripts/ci/github/setup_poetry.sh + - name: cache venv + uses: actions/cache@v4 + with: + path: .venv + key: ${{ runner.os }}-poetry-3.12-${{ hashFiles('**/poetry.lock') }} + - name: Install dev dependencies + run: bash ./wrappers/python/scripts/ci/github/install_dev_dependencies.sh + working-directory: ./wrappers/python + - name: debug + run: | + set -x + echo "$PATH" | tr ':' '\n' + command -v python + command -v python3 + command -v poetry || echo "missing poetry" + stat .venv/bin/python + .venv/bin/python --version + - name: package binaries + working-directory: ./wrappers/python + run: | + export PATH="$PWD/.venv/bin:$PATH" + python3 -m scripts.build.all ./vendor # should take ~30s + - name: package python api + run: | + export PATH="$PWD/.venv/bin:$PATH" + python3 -m scripts.build.api_package + - name: Archive dist + uses: actions/upload-artifact@v4 + with: + path: dist + name: python-packages + if-no-files-found: error + # TODO: once we have a TEST_PYPI_TOKEN, test publishing the packages. + # - name: Publish python packages + # working-directory: ./wrappers/python + # run: | + # export PATH="$PWD/.venv/bin:$PATH" + # ./scripts/publish_to_test_pypi.sh + publish-npm-package: name: Publish NPM packages runs-on: ubuntu-20.04 @@ -145,6 +206,7 @@ jobs: env: NPM_TOKEN: ${{ secrets.NPM_TOKEN }} + publish-binary-npm-packages: name: Publish NPM binaries runs-on: ubuntu-20.04 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 36a80957..03aa488e 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -111,3 +111,41 @@ jobs: - name: Test CLI run: ./test_ci.sh "release" + + - name: Set up python 3.12 + uses: actions/setup-python@v5 + with: + python-version: "3.12" + # NOTE: ^this strategy leaves older python versions intentionally + # without test coverage to keep CI fast. + + - name: Set up poetry + run: ./wrappers/python/scripts/ci/github/setup_poetry.sh + + - name: cache venv + uses: actions/cache@v4 + with: + path: wrappers/python/.venv + key: ${{ runner.os }}-poetry-3.12-${{ hashFiles('**/poetry.lock') }} + + - name: Install dev dependencies + run: ./wrappers/python/scripts/ci/github/install_dev_dependencies.sh + + - name: debug python paths + run: ./wrappers/python/scripts/ci/github/debug_python_paths.sh + + - name: Lint python + working-directory: ./wrappers/python + run: | + export VIRTUAL_ENV="$PWD/.venv" + export PATH="$VIRTUAL_ENV/bin:$PATH" + bash ./scripts/ci/python_lints.sh + + - name: ensure cog up-to-date + working-directory: ./wrappers/python + run: ./scripts/ci/github/cog/check.sh + + - name: Test python API + timeout-minutes: 1 + run: ./wrappers/python/scripts/ci/github/integration_tests.sh + diff --git a/wrappers/python/README.md b/wrappers/python/README.md index 7be0138b..b7738640 100644 --- a/wrappers/python/README.md +++ b/wrappers/python/README.md @@ -1,4 +1,4 @@ -# `pagefind_python` +# `pagefind` An async python API for the [pagefind](https://pagefind.app) binary. ## Installation diff --git a/wrappers/python/build_binary_only_wheel.py b/wrappers/python/build_binary_only_wheel.py deleted file mode 100644 index 2f7167db..00000000 --- a/wrappers/python/build_binary_only_wheel.py +++ /dev/null @@ -1,239 +0,0 @@ -#!/usr/bin/env python3 -# Adapted from https://github.com/ziglang/zig-pypi/blob/a0ca0d8b2d5104498f4eececff09ed2b1ede2d0b/make_wheels.py -# See also https://simonwillison.net/2022/May/23/bundling-binary-tools-in-python-wheels/ -# -# Note that this script assumes that the relevant files are on disk and either -# the files hashes have been verified or we trust the files. -import argparse -import logging -from typing import Any, Dict, List, Optional, Tuple, Union -from pathlib import Path -from email.message import EmailMessage -import wheel -from wheel.wheelfile import WheelFile -from zipfile import ZipInfo, ZIP_DEFLATED - -import wheel.wheelfile - -# constants -HOMEPAGE = "https://pagefind.app" -REPO = "https://github.com/CloudCannon/pagefind/" -REQUIRED_PYTHON_VERSION = "~=3.9" - -this_dir = Path(__file__).parent - - -# as of the time of writing, these are the supported platforms: -# See https://doc.rust-lang.org/nightly/rustc/platform-support.html -# wheel name format: {dist}-{version}(-{build})?-{python}-{abi}-{platform}.whl -# this dict helps look up the last part of the wheel name: ^^^^^^^^^^ -LLVM_TRIPLES_TO_PYTHON_WHEEL_PLATFORMS = { - # LLVM triple: Python platform - # only the LLVM triples that are produced in CI are listed here; see - # https://github.com/CloudCannon/pagefind/releases/latest - # the python platform mapping is copied from zig-pypi's script. - # See also: https://github.com/PyO3/maturin/blob/main/src/auditwheel/manylinux-policy.json - # See also: https://github.com/PyO3/maturin/blob/main/src/auditwheel/musllinux-policy.json - # TODO: check the python platforms are correct. - "aarch64-apple-darwin": "macosx_12_0_arm64", - "aarch64-unknown-linux-musl": "manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64", - "x86_64-apple-darwin": "macosx_12_0_arm64", - "x86_64-pc-windows-msvc": "win_amd64", - "x86_64-unknown-linux-musl": "manylinux_2_12_x86_64.manylinux2010_x86_64.musllinux_1_1_x86_64", -} - - -def as_zip_info(file: Path, *, alias: str) -> Tuple[ZipInfo, bytes]: - zip_info = ZipInfo(alias or file.name, (1980, 1, 1, 0, 0, 0)) - zip_info.external_attr = file.stat().st_mode << 16 - with file.open("rb") as f: - data = f.read() - zip_info.file_size = len(data) - return zip_info, data - - -class ReproducibleWheelFile(wheel.wheelfile.WheelFile): - def writestr( - self, zip_info_or_arc_name: Union[ZipInfo, str], data: Any, *args, **kwargs - ): - if isinstance(zip_info_or_arc_name, ZipInfo): - zip_info = zip_info_or_arc_name - else: - assert isinstance(zip_info_or_arc_name, str) - zip_info = ZipInfo(zip_info_or_arc_name) - zip_info.file_size = len(data) - zip_info.external_attr = 0o0644 << 16 - if zip_info_or_arc_name.endswith(".dist-info/RECORD"): - zip_info.external_attr = 0o0664 << 16 - - zip_info.compress_type = ZIP_DEFLATED - zip_info.date_time = (1980, 1, 1, 0, 0, 0) - zip_info.create_system = 3 - WheelFile.writestr(self, zip_info, data, *args, **kwargs) - - -def make_message( - headers: Dict[str, Union[str, List[str]]], - payload: Optional[Union[str, bytes]] = None, -): - msg = EmailMessage() - for name, value in headers.items(): - if isinstance(value, list): - for value_part in value: - msg[name] = value_part - else: - msg[name] = value - if payload: - msg.set_payload(payload) - return msg - - -def write_wheel_file( - filename: Path, contents: Dict[str, Union[str, bytes, EmailMessage, ZipInfo]] -) -> Path: - with ReproducibleWheelFile(filename, "w") as wheel: - for member_info, member_source in contents.items(): - if isinstance(member_source, str): - data = member_source.encode("utf-8") - elif isinstance(member_source, bytes): - data = member_source - elif isinstance(member_source, EmailMessage): - data = member_source.as_bytes( - policy=member_source.policy.clone(linesep="\n"), unixfrom=False - ) - elif isinstance(member_source, Path): - member_info, data = as_zip_info(member_source, alias=member_info) - else: - raise ValueError(f"unexpected content: {type(member_source)}") - wheel.writestr(member_info, data) - return filename - - -def write_wheel( - out_dir: Path, - *, - name: str, - version: str, - tag: str, - metadata: Dict[str, Any], - description: str, - contents, -) -> Path: - wheel_name = f"{name}-{version}-{tag}.whl" - dist_info = f"{name}-{version}.dist-info" - return write_wheel_file( - (out_dir / wheel_name), - { - **contents, - f"{dist_info}/METADATA": make_message( - { - # see https://packaging.python.org/en/latest/specifications/core-metadata/ - "Metadata-Version": "2.1", - "Name": name, - "Version": version, - **metadata, - }, - description, - ), - f"{dist_info}/WHEEL": make_message( - { - "Wheel-Version": "1.0", - "Generator": "build_binary_only_wheel.py", - "Root-Is-Purelib": "false", # see https://packaging.python.org/en/latest/specifications/binary-distribution-format/#what-s-the-deal-with-purelib-vs-platlib - "Tag": tag, - } - ), - }, - ) - - -def write_pagefind_bin_only_wheel( - *, - executable: Path, - output_dir: Path, - version: str, - platform: str, -) -> Path: - # FIXME: update when package support is stabilized - name = "pagefind_bin" - if "extended" in executable.name: - name += "_extended" - src_dir = this_dir / "src" / "pagefind_bin" - contents = { - f"{name}/__init__.py": (src_dir / "__init__.py"), - f"{name}/{executable.name}": executable, - } - - # Load in static files - with (src_dir / "README.md").open() as f: - description = f.read().replace("pagefind_bin", name) - - return write_wheel( - output_dir, - name=name, - version=version, - tag=f"py3-none-{platform}", - metadata={ - "Summary": "Pagefind is a library for performant, low-bandwidth, fully static search.", - "Description-Content-Type": "text/markdown", - "License": "MIT", - "Author": "CloudCannon", - "Classifier": [ - "License :: OSI Approved :: MIT License", - "Development Status :: 3 - Alpha", # FIXME: update when package name stabilized - "Intended Audience :: Developers", - ], - "Project-URL": [ - f"Homepage, {HOMEPAGE}", - f"Source Code, {REPO}", - f"Bug Tracker, {REPO}/issues", - ], - "Requires-Python": REQUIRED_PYTHON_VERSION, - }, - description=description, - contents=contents, - ) - - -def get_arg_parser(): - parser = argparse.ArgumentParser( - prog=__file__, description="Repackage Pagefind binaries as Python wheels" - ) - parser.add_argument( - "--version", - default=None, - help="version to package", - ) - parser.add_argument("--suffix", default="", help="wheel version suffix") - parser.add_argument("--bin-path", help="path to the binary to embed", required=True) - parser.add_argument( - "--output-dir", - default="dist/", - help="Output directory in which to place the built wheel", - ) - parser.add_argument( - "--llvm-triple", - required=True, - choices=list(LLVM_TRIPLES_TO_PYTHON_WHEEL_PLATFORMS.keys()), - help="platform to build for", - ) - return parser - - -def main(): - args = get_arg_parser().parse_args() - platform = LLVM_TRIPLES_TO_PYTHON_WHEEL_PLATFORMS.get(args.llvm_triple) - if platform is None: - raise ValueError(f"Unsupported platform: {args.llvm_triple}") - - logging.getLogger(wheel.__name__).setLevel(logging.WARNING) - write_pagefind_bin_only_wheel( - output_dir=Path(args.output_dir), - executable=Path(args.bin_path), - version=args.version, - platform=platform, - ) - - -if __name__ == "__main__": - main() diff --git a/wrappers/python/poetry.lock b/wrappers/python/poetry.lock index a914fddc..bd24997c 100644 --- a/wrappers/python/poetry.lock +++ b/wrappers/python/poetry.lock @@ -1,5 +1,209 @@ # This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +[[package]] +name = "backports-tarfile" +version = "1.2.0" +description = "Backport of CPython tarfile module" +optional = false +python-versions = ">=3.8" +files = [ + {file = "backports.tarfile-1.2.0-py3-none-any.whl", hash = "sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34"}, + {file = "backports_tarfile-1.2.0.tar.gz", hash = "sha256:d75e02c268746e1b8144c278978b6e98e85de6ad16f8e4b0844a154557eca991"}, +] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["jaraco.test", "pytest (!=8.0.*)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)"] + +[[package]] +name = "certifi" +version = "2024.7.4" +description = "Python package for providing Mozilla's CA Bundle." +optional = false +python-versions = ">=3.6" +files = [ + {file = "certifi-2024.7.4-py3-none-any.whl", hash = "sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90"}, + {file = "certifi-2024.7.4.tar.gz", hash = "sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b"}, +] + +[[package]] +name = "cffi" +version = "1.17.0" +description = "Foreign Function Interface for Python calling C code." +optional = false +python-versions = ">=3.8" +files = [ + {file = "cffi-1.17.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f9338cc05451f1942d0d8203ec2c346c830f8e86469903d5126c1f0a13a2bcbb"}, + {file = "cffi-1.17.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a0ce71725cacc9ebf839630772b07eeec220cbb5f03be1399e0457a1464f8e1a"}, + {file = "cffi-1.17.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c815270206f983309915a6844fe994b2fa47e5d05c4c4cef267c3b30e34dbe42"}, + {file = "cffi-1.17.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d6bdcd415ba87846fd317bee0774e412e8792832e7805938987e4ede1d13046d"}, + {file = "cffi-1.17.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8a98748ed1a1df4ee1d6f927e151ed6c1a09d5ec21684de879c7ea6aa96f58f2"}, + {file = "cffi-1.17.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0a048d4f6630113e54bb4b77e315e1ba32a5a31512c31a273807d0027a7e69ab"}, + {file = "cffi-1.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24aa705a5f5bd3a8bcfa4d123f03413de5d86e497435693b638cbffb7d5d8a1b"}, + {file = "cffi-1.17.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:856bf0924d24e7f93b8aee12a3a1095c34085600aa805693fb7f5d1962393206"}, + {file = "cffi-1.17.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:4304d4416ff032ed50ad6bb87416d802e67139e31c0bde4628f36a47a3164bfa"}, + {file = "cffi-1.17.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:331ad15c39c9fe9186ceaf87203a9ecf5ae0ba2538c9e898e3a6967e8ad3db6f"}, + {file = "cffi-1.17.0-cp310-cp310-win32.whl", hash = "sha256:669b29a9eca6146465cc574659058ed949748f0809a2582d1f1a324eb91054dc"}, + {file = "cffi-1.17.0-cp310-cp310-win_amd64.whl", hash = "sha256:48b389b1fd5144603d61d752afd7167dfd205973a43151ae5045b35793232aa2"}, + {file = "cffi-1.17.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c5d97162c196ce54af6700949ddf9409e9833ef1003b4741c2b39ef46f1d9720"}, + {file = "cffi-1.17.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5ba5c243f4004c750836f81606a9fcb7841f8874ad8f3bf204ff5e56332b72b9"}, + {file = "cffi-1.17.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bb9333f58fc3a2296fb1d54576138d4cf5d496a2cc118422bd77835e6ae0b9cb"}, + {file = "cffi-1.17.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:435a22d00ec7d7ea533db494da8581b05977f9c37338c80bc86314bec2619424"}, + {file = "cffi-1.17.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d1df34588123fcc88c872f5acb6f74ae59e9d182a2707097f9e28275ec26a12d"}, + {file = "cffi-1.17.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:df8bb0010fdd0a743b7542589223a2816bdde4d94bb5ad67884348fa2c1c67e8"}, + {file = "cffi-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8b5b9712783415695663bd463990e2f00c6750562e6ad1d28e072a611c5f2a6"}, + {file = "cffi-1.17.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ffef8fd58a36fb5f1196919638f73dd3ae0db1a878982b27a9a5a176ede4ba91"}, + {file = "cffi-1.17.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4e67d26532bfd8b7f7c05d5a766d6f437b362c1bf203a3a5ce3593a645e870b8"}, + {file = "cffi-1.17.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:45f7cd36186db767d803b1473b3c659d57a23b5fa491ad83c6d40f2af58e4dbb"}, + {file = "cffi-1.17.0-cp311-cp311-win32.whl", hash = "sha256:a9015f5b8af1bb6837a3fcb0cdf3b874fe3385ff6274e8b7925d81ccaec3c5c9"}, + {file = "cffi-1.17.0-cp311-cp311-win_amd64.whl", hash = "sha256:b50aaac7d05c2c26dfd50c3321199f019ba76bb650e346a6ef3616306eed67b0"}, + {file = "cffi-1.17.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aec510255ce690d240f7cb23d7114f6b351c733a74c279a84def763660a2c3bc"}, + {file = "cffi-1.17.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2770bb0d5e3cc0e31e7318db06efcbcdb7b31bcb1a70086d3177692a02256f59"}, + {file = "cffi-1.17.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:db9a30ec064129d605d0f1aedc93e00894b9334ec74ba9c6bdd08147434b33eb"}, + {file = "cffi-1.17.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a47eef975d2b8b721775a0fa286f50eab535b9d56c70a6e62842134cf7841195"}, + {file = "cffi-1.17.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f3e0992f23bbb0be00a921eae5363329253c3b86287db27092461c887b791e5e"}, + {file = "cffi-1.17.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6107e445faf057c118d5050560695e46d272e5301feffda3c41849641222a828"}, + {file = "cffi-1.17.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eb862356ee9391dc5a0b3cbc00f416b48c1b9a52d252d898e5b7696a5f9fe150"}, + {file = "cffi-1.17.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c1c13185b90bbd3f8b5963cd8ce7ad4ff441924c31e23c975cb150e27c2bf67a"}, + {file = "cffi-1.17.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:17c6d6d3260c7f2d94f657e6872591fe8733872a86ed1345bda872cfc8c74885"}, + {file = "cffi-1.17.0-cp312-cp312-win32.whl", hash = "sha256:c3b8bd3133cd50f6b637bb4322822c94c5ce4bf0d724ed5ae70afce62187c492"}, + {file = "cffi-1.17.0-cp312-cp312-win_amd64.whl", hash = "sha256:dca802c8db0720ce1c49cce1149ff7b06e91ba15fa84b1d59144fef1a1bc7ac2"}, + {file = "cffi-1.17.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6ce01337d23884b21c03869d2f68c5523d43174d4fc405490eb0091057943118"}, + {file = "cffi-1.17.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cab2eba3830bf4f6d91e2d6718e0e1c14a2f5ad1af68a89d24ace0c6b17cced7"}, + {file = "cffi-1.17.0-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:14b9cbc8f7ac98a739558eb86fabc283d4d564dafed50216e7f7ee62d0d25377"}, + {file = "cffi-1.17.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b00e7bcd71caa0282cbe3c90966f738e2db91e64092a877c3ff7f19a1628fdcb"}, + {file = "cffi-1.17.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:41f4915e09218744d8bae14759f983e466ab69b178de38066f7579892ff2a555"}, + {file = "cffi-1.17.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e4760a68cab57bfaa628938e9c2971137e05ce48e762a9cb53b76c9b569f1204"}, + {file = "cffi-1.17.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:011aff3524d578a9412c8b3cfaa50f2c0bd78e03eb7af7aa5e0df59b158efb2f"}, + {file = "cffi-1.17.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:a003ac9edc22d99ae1286b0875c460351f4e101f8c9d9d2576e78d7e048f64e0"}, + {file = "cffi-1.17.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ef9528915df81b8f4c7612b19b8628214c65c9b7f74db2e34a646a0a2a0da2d4"}, + {file = "cffi-1.17.0-cp313-cp313-win32.whl", hash = "sha256:70d2aa9fb00cf52034feac4b913181a6e10356019b18ef89bc7c12a283bf5f5a"}, + {file = "cffi-1.17.0-cp313-cp313-win_amd64.whl", hash = "sha256:b7b6ea9e36d32582cda3465f54c4b454f62f23cb083ebc7a94e2ca6ef011c3a7"}, + {file = "cffi-1.17.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:964823b2fc77b55355999ade496c54dde161c621cb1f6eac61dc30ed1b63cd4c"}, + {file = "cffi-1.17.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:516a405f174fd3b88829eabfe4bb296ac602d6a0f68e0d64d5ac9456194a5b7e"}, + {file = "cffi-1.17.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dec6b307ce928e8e112a6bb9921a1cb00a0e14979bf28b98e084a4b8a742bd9b"}, + {file = "cffi-1.17.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e4094c7b464cf0a858e75cd14b03509e84789abf7b79f8537e6a72152109c76e"}, + {file = "cffi-1.17.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2404f3de742f47cb62d023f0ba7c5a916c9c653d5b368cc966382ae4e57da401"}, + {file = "cffi-1.17.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3aa9d43b02a0c681f0bfbc12d476d47b2b2b6a3f9287f11ee42989a268a1833c"}, + {file = "cffi-1.17.0-cp38-cp38-win32.whl", hash = "sha256:0bb15e7acf8ab35ca8b24b90af52c8b391690ef5c4aec3d31f38f0d37d2cc499"}, + {file = "cffi-1.17.0-cp38-cp38-win_amd64.whl", hash = "sha256:93a7350f6706b31f457c1457d3a3259ff9071a66f312ae64dc024f049055f72c"}, + {file = "cffi-1.17.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1a2ddbac59dc3716bc79f27906c010406155031a1c801410f1bafff17ea304d2"}, + {file = "cffi-1.17.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6327b572f5770293fc062a7ec04160e89741e8552bf1c358d1a23eba68166759"}, + {file = "cffi-1.17.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dbc183e7bef690c9abe5ea67b7b60fdbca81aa8da43468287dae7b5c046107d4"}, + {file = "cffi-1.17.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bdc0f1f610d067c70aa3737ed06e2726fd9d6f7bfee4a351f4c40b6831f4e82"}, + {file = "cffi-1.17.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6d872186c1617d143969defeadac5a904e6e374183e07977eedef9c07c8953bf"}, + {file = "cffi-1.17.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0d46ee4764b88b91f16661a8befc6bfb24806d885e27436fdc292ed7e6f6d058"}, + {file = "cffi-1.17.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f76a90c345796c01d85e6332e81cab6d70de83b829cf1d9762d0a3da59c7932"}, + {file = "cffi-1.17.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0e60821d312f99d3e1569202518dddf10ae547e799d75aef3bca3a2d9e8ee693"}, + {file = "cffi-1.17.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:eb09b82377233b902d4c3fbeeb7ad731cdab579c6c6fda1f763cd779139e47c3"}, + {file = "cffi-1.17.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:24658baf6224d8f280e827f0a50c46ad819ec8ba380a42448e24459daf809cf4"}, + {file = "cffi-1.17.0-cp39-cp39-win32.whl", hash = "sha256:0fdacad9e0d9fc23e519efd5ea24a70348305e8d7d85ecbb1a5fa66dc834e7fb"}, + {file = "cffi-1.17.0-cp39-cp39-win_amd64.whl", hash = "sha256:7cbc78dc018596315d4e7841c8c3a7ae31cc4d638c9b627f87d52e8abaaf2d29"}, + {file = "cffi-1.17.0.tar.gz", hash = "sha256:f3157624b7558b914cb039fd1af735e5e8049a87c817cc215109ad1c8779df76"}, +] + +[package.dependencies] +pycparser = "*" + +[[package]] +name = "charset-normalizer" +version = "3.3.2" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"}, + {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, +] + [[package]] name = "cogapp" version = "3.4.1" @@ -11,46 +215,276 @@ files = [ {file = "cogapp-3.4.1.tar.gz", hash = "sha256:a806d5db9e318a1a2d3fce988008179168e7db13e5e55b19b79763f9bb9d2982"}, ] +[[package]] +name = "cryptography" +version = "43.0.0" +description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." +optional = false +python-versions = ">=3.7" +files = [ + {file = "cryptography-43.0.0-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:64c3f16e2a4fc51c0d06af28441881f98c5d91009b8caaff40cf3548089e9c74"}, + {file = "cryptography-43.0.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3dcdedae5c7710b9f97ac6bba7e1052b95c7083c9d0e9df96e02a1932e777895"}, + {file = "cryptography-43.0.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d9a1eca329405219b605fac09ecfc09ac09e595d6def650a437523fcd08dd22"}, + {file = "cryptography-43.0.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:ea9e57f8ea880eeea38ab5abf9fbe39f923544d7884228ec67d666abd60f5a47"}, + {file = "cryptography-43.0.0-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:9a8d6802e0825767476f62aafed40532bd435e8a5f7d23bd8b4f5fd04cc80ecf"}, + {file = "cryptography-43.0.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:cc70b4b581f28d0a254d006f26949245e3657d40d8857066c2ae22a61222ef55"}, + {file = "cryptography-43.0.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:4a997df8c1c2aae1e1e5ac49c2e4f610ad037fc5a3aadc7b64e39dea42249431"}, + {file = "cryptography-43.0.0-cp37-abi3-win32.whl", hash = "sha256:6e2b11c55d260d03a8cf29ac9b5e0608d35f08077d8c087be96287f43af3ccdc"}, + {file = "cryptography-43.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:31e44a986ceccec3d0498e16f3d27b2ee5fdf69ce2ab89b52eaad1d2f33d8778"}, + {file = "cryptography-43.0.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:7b3f5fe74a5ca32d4d0f302ffe6680fcc5c28f8ef0dc0ae8f40c0f3a1b4fca66"}, + {file = "cryptography-43.0.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac1955ce000cb29ab40def14fd1bbfa7af2017cca696ee696925615cafd0dce5"}, + {file = "cryptography-43.0.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:299d3da8e00b7e2b54bb02ef58d73cd5f55fb31f33ebbf33bd00d9aa6807df7e"}, + {file = "cryptography-43.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:ee0c405832ade84d4de74b9029bedb7b31200600fa524d218fc29bfa371e97f5"}, + {file = "cryptography-43.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:cb013933d4c127349b3948aa8aaf2f12c0353ad0eccd715ca789c8a0f671646f"}, + {file = "cryptography-43.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:fdcb265de28585de5b859ae13e3846a8e805268a823a12a4da2597f1f5afc9f0"}, + {file = "cryptography-43.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:2905ccf93a8a2a416f3ec01b1a7911c3fe4073ef35640e7ee5296754e30b762b"}, + {file = "cryptography-43.0.0-cp39-abi3-win32.whl", hash = "sha256:47ca71115e545954e6c1d207dd13461ab81f4eccfcb1345eac874828b5e3eaaf"}, + {file = "cryptography-43.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:0663585d02f76929792470451a5ba64424acc3cd5227b03921dab0e2f27b1709"}, + {file = "cryptography-43.0.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2c6d112bf61c5ef44042c253e4859b3cbbb50df2f78fa8fae6747a7814484a70"}, + {file = "cryptography-43.0.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:844b6d608374e7d08f4f6e6f9f7b951f9256db41421917dfb2d003dde4cd6b66"}, + {file = "cryptography-43.0.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:51956cf8730665e2bdf8ddb8da0056f699c1a5715648c1b0144670c1ba00b48f"}, + {file = "cryptography-43.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:aae4d918f6b180a8ab8bf6511a419473d107df4dbb4225c7b48c5c9602c38c7f"}, + {file = "cryptography-43.0.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:232ce02943a579095a339ac4b390fbbe97f5b5d5d107f8a08260ea2768be8cc2"}, + {file = "cryptography-43.0.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:5bcb8a5620008a8034d39bce21dc3e23735dfdb6a33a06974739bfa04f853947"}, + {file = "cryptography-43.0.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:08a24a7070b2b6804c1940ff0f910ff728932a9d0e80e7814234269f9d46d069"}, + {file = "cryptography-43.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:e9c5266c432a1e23738d178e51c2c7a5e2ddf790f248be939448c0ba2021f9d1"}, + {file = "cryptography-43.0.0.tar.gz", hash = "sha256:b88075ada2d51aa9f18283532c9f60e72170041bba88d7f37e49cbb10275299e"}, +] + +[package.dependencies] +cffi = {version = ">=1.12", markers = "platform_python_implementation != \"PyPy\""} + +[package.extras] +docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=1.1.1)"] +docstest = ["pyenchant (>=1.6.11)", "readme-renderer", "sphinxcontrib-spelling (>=4.0.1)"] +nox = ["nox"] +pep8test = ["check-sdist", "click", "mypy", "ruff"] +sdist = ["build"] +ssh = ["bcrypt (>=3.1.5)"] +test = ["certifi", "cryptography-vectors (==43.0.0)", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] +test-randomorder = ["pytest-randomly"] + +[[package]] +name = "docutils" +version = "0.21.2" +description = "Docutils -- Python Documentation Utilities" +optional = false +python-versions = ">=3.9" +files = [ + {file = "docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2"}, + {file = "docutils-0.21.2.tar.gz", hash = "sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f"}, +] + +[[package]] +name = "idna" +version = "3.8" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=3.6" +files = [ + {file = "idna-3.8-py3-none-any.whl", hash = "sha256:050b4e5baadcd44d760cedbd2b8e639f2ff89bbc7a5730fcc662954303377aac"}, + {file = "idna-3.8.tar.gz", hash = "sha256:d838c2c0ed6fced7693d5e8ab8e734d5f8fda53a039c0164afb0b82e771e3603"}, +] + +[[package]] +name = "importlib-metadata" +version = "8.4.0" +description = "Read metadata from Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "importlib_metadata-8.4.0-py3-none-any.whl", hash = "sha256:66f342cc6ac9818fc6ff340576acd24d65ba0b3efabb2b4ac08b598965a4a2f1"}, + {file = "importlib_metadata-8.4.0.tar.gz", hash = "sha256:9a547d3bc3608b025f93d403fdd1aae741c24fbb8314df4b155675742ce303c5"}, +] + +[package.dependencies] +zipp = ">=0.5" + +[package.extras] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +perf = ["ipython"] +test = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-perf (>=0.9.2)", "pytest-ruff (>=0.2.1)"] + +[[package]] +name = "jaraco-classes" +version = "3.4.0" +description = "Utility functions for Python class constructs" +optional = false +python-versions = ">=3.8" +files = [ + {file = "jaraco.classes-3.4.0-py3-none-any.whl", hash = "sha256:f662826b6bed8cace05e7ff873ce0f9283b5c924470fe664fff1c2f00f581790"}, + {file = "jaraco.classes-3.4.0.tar.gz", hash = "sha256:47a024b51d0239c0dd8c8540c6c7f484be3b8fcf0b2d85c13825780d3b3f3acd"}, +] + +[package.dependencies] +more-itertools = "*" + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)"] + +[[package]] +name = "jaraco-context" +version = "6.0.1" +description = "Useful decorators and context managers" +optional = false +python-versions = ">=3.8" +files = [ + {file = "jaraco.context-6.0.1-py3-none-any.whl", hash = "sha256:f797fc481b490edb305122c9181830a3a5b76d84ef6d1aef2fb9b47ab956f9e4"}, + {file = "jaraco_context-6.0.1.tar.gz", hash = "sha256:9bae4ea555cf0b14938dc0aee7c9f32ed303aa20a3b73e7dc80111628792d1b3"}, +] + +[package.dependencies] +"backports.tarfile" = {version = "*", markers = "python_version < \"3.12\""} + +[package.extras] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +test = ["portend", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)"] + +[[package]] +name = "jaraco-functools" +version = "4.0.2" +description = "Functools like those found in stdlib" +optional = false +python-versions = ">=3.8" +files = [ + {file = "jaraco.functools-4.0.2-py3-none-any.whl", hash = "sha256:c9d16a3ed4ccb5a889ad8e0b7a343401ee5b2a71cee6ed192d3f68bc351e94e3"}, + {file = "jaraco_functools-4.0.2.tar.gz", hash = "sha256:3460c74cd0d32bf82b9576bbb3527c4364d5b27a21f5158a62aed6c4b42e23f5"}, +] + +[package.dependencies] +more-itertools = "*" + +[package.extras] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +test = ["jaraco.classes", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)"] + +[[package]] +name = "jeepney" +version = "0.8.0" +description = "Low-level, pure Python DBus protocol wrapper." +optional = false +python-versions = ">=3.7" +files = [ + {file = "jeepney-0.8.0-py3-none-any.whl", hash = "sha256:c0a454ad016ca575060802ee4d590dd912e35c122fa04e70306de3d076cce755"}, + {file = "jeepney-0.8.0.tar.gz", hash = "sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806"}, +] + +[package.extras] +test = ["async-timeout", "pytest", "pytest-asyncio (>=0.17)", "pytest-trio", "testpath", "trio"] +trio = ["async_generator", "trio"] + +[[package]] +name = "keyring" +version = "25.3.0" +description = "Store and access your passwords safely." +optional = false +python-versions = ">=3.8" +files = [ + {file = "keyring-25.3.0-py3-none-any.whl", hash = "sha256:8d963da00ccdf06e356acd9bf3b743208878751032d8599c6cc89eb51310ffae"}, + {file = "keyring-25.3.0.tar.gz", hash = "sha256:8d85a1ea5d6db8515b59e1c5d1d1678b03cf7fc8b8dcfb1651e8c4a524eb42ef"}, +] + +[package.dependencies] +importlib-metadata = {version = ">=4.11.4", markers = "python_version < \"3.12\""} +"jaraco.classes" = "*" +"jaraco.context" = "*" +"jaraco.functools" = "*" +jeepney = {version = ">=0.4.2", markers = "sys_platform == \"linux\""} +pywin32-ctypes = {version = ">=0.2.0", markers = "sys_platform == \"win32\""} +SecretStorage = {version = ">=3.2", markers = "sys_platform == \"linux\""} + +[package.extras] +completion = ["shtab (>=1.1.0)"] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +test = ["pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)"] + +[[package]] +name = "markdown-it-py" +version = "3.0.0" +description = "Python port of markdown-it. Markdown parsing, done right!" +optional = false +python-versions = ">=3.8" +files = [ + {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"}, + {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"}, +] + +[package.dependencies] +mdurl = ">=0.1,<1.0" + +[package.extras] +benchmarking = ["psutil", "pytest", "pytest-benchmark"] +code-style = ["pre-commit (>=3.0,<4.0)"] +compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"] +linkify = ["linkify-it-py (>=1,<3)"] +plugins = ["mdit-py-plugins"] +profiling = ["gprof2dot"] +rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"] +testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] + +[[package]] +name = "mdurl" +version = "0.1.2" +description = "Markdown URL utilities" +optional = false +python-versions = ">=3.7" +files = [ + {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, + {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, +] + +[[package]] +name = "more-itertools" +version = "10.4.0" +description = "More routines for operating on iterables, beyond itertools" +optional = false +python-versions = ">=3.8" +files = [ + {file = "more-itertools-10.4.0.tar.gz", hash = "sha256:fe0e63c4ab068eac62410ab05cccca2dc71ec44ba8ef29916a0090df061cf923"}, + {file = "more_itertools-10.4.0-py3-none-any.whl", hash = "sha256:0f7d9f83a0a8dcfa8a2694a770590d98a67ea943e3d9f5298309a484758c4e27"}, +] + [[package]] name = "mypy" -version = "1.10.1" +version = "1.11.1" description = "Optional static typing for Python" optional = false python-versions = ">=3.8" files = [ - {file = "mypy-1.10.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e36f229acfe250dc660790840916eb49726c928e8ce10fbdf90715090fe4ae02"}, - {file = "mypy-1.10.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:51a46974340baaa4145363b9e051812a2446cf583dfaeba124af966fa44593f7"}, - {file = "mypy-1.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:901c89c2d67bba57aaaca91ccdb659aa3a312de67f23b9dfb059727cce2e2e0a"}, - {file = "mypy-1.10.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0cd62192a4a32b77ceb31272d9e74d23cd88c8060c34d1d3622db3267679a5d9"}, - {file = "mypy-1.10.1-cp310-cp310-win_amd64.whl", hash = "sha256:a2cbc68cb9e943ac0814c13e2452d2046c2f2b23ff0278e26599224cf164e78d"}, - {file = "mypy-1.10.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bd6f629b67bb43dc0d9211ee98b96d8dabc97b1ad38b9b25f5e4c4d7569a0c6a"}, - {file = "mypy-1.10.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a1bbb3a6f5ff319d2b9d40b4080d46cd639abe3516d5a62c070cf0114a457d84"}, - {file = "mypy-1.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8edd4e9bbbc9d7b79502eb9592cab808585516ae1bcc1446eb9122656c6066f"}, - {file = "mypy-1.10.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6166a88b15f1759f94a46fa474c7b1b05d134b1b61fca627dd7335454cc9aa6b"}, - {file = "mypy-1.10.1-cp311-cp311-win_amd64.whl", hash = "sha256:5bb9cd11c01c8606a9d0b83ffa91d0b236a0e91bc4126d9ba9ce62906ada868e"}, - {file = "mypy-1.10.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d8681909f7b44d0b7b86e653ca152d6dff0eb5eb41694e163c6092124f8246d7"}, - {file = "mypy-1.10.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:378c03f53f10bbdd55ca94e46ec3ba255279706a6aacaecac52ad248f98205d3"}, - {file = "mypy-1.10.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6bacf8f3a3d7d849f40ca6caea5c055122efe70e81480c8328ad29c55c69e93e"}, - {file = "mypy-1.10.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:701b5f71413f1e9855566a34d6e9d12624e9e0a8818a5704d74d6b0402e66c04"}, - {file = "mypy-1.10.1-cp312-cp312-win_amd64.whl", hash = "sha256:3c4c2992f6ea46ff7fce0072642cfb62af7a2484efe69017ed8b095f7b39ef31"}, - {file = "mypy-1.10.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:604282c886497645ffb87b8f35a57ec773a4a2721161e709a4422c1636ddde5c"}, - {file = "mypy-1.10.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37fd87cab83f09842653f08de066ee68f1182b9b5282e4634cdb4b407266bade"}, - {file = "mypy-1.10.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8addf6313777dbb92e9564c5d32ec122bf2c6c39d683ea64de6a1fd98b90fe37"}, - {file = "mypy-1.10.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5cc3ca0a244eb9a5249c7c583ad9a7e881aa5d7b73c35652296ddcdb33b2b9c7"}, - {file = "mypy-1.10.1-cp38-cp38-win_amd64.whl", hash = "sha256:1b3a2ffce52cc4dbaeee4df762f20a2905aa171ef157b82192f2e2f368eec05d"}, - {file = "mypy-1.10.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fe85ed6836165d52ae8b88f99527d3d1b2362e0cb90b005409b8bed90e9059b3"}, - {file = "mypy-1.10.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c2ae450d60d7d020d67ab440c6e3fae375809988119817214440033f26ddf7bf"}, - {file = "mypy-1.10.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6be84c06e6abd72f960ba9a71561c14137a583093ffcf9bbfaf5e613d63fa531"}, - {file = "mypy-1.10.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2189ff1e39db399f08205e22a797383613ce1cb0cb3b13d8bcf0170e45b96cc3"}, - {file = "mypy-1.10.1-cp39-cp39-win_amd64.whl", hash = "sha256:97a131ee36ac37ce9581f4220311247ab6cba896b4395b9c87af0675a13a755f"}, - {file = "mypy-1.10.1-py3-none-any.whl", hash = "sha256:71d8ac0b906354ebda8ef1673e5fde785936ac1f29ff6987c7483cfbd5a4235a"}, - {file = "mypy-1.10.1.tar.gz", hash = "sha256:1f8f492d7db9e3593ef42d4f115f04e556130f2819ad33ab84551403e97dd4c0"}, + {file = "mypy-1.11.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a32fc80b63de4b5b3e65f4be82b4cfa362a46702672aa6a0f443b4689af7008c"}, + {file = "mypy-1.11.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c1952f5ea8a5a959b05ed5f16452fddadbaae48b5d39235ab4c3fc444d5fd411"}, + {file = "mypy-1.11.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e1e30dc3bfa4e157e53c1d17a0dad20f89dc433393e7702b813c10e200843b03"}, + {file = "mypy-1.11.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2c63350af88f43a66d3dfeeeb8d77af34a4f07d760b9eb3a8697f0386c7590b4"}, + {file = "mypy-1.11.1-cp310-cp310-win_amd64.whl", hash = "sha256:a831671bad47186603872a3abc19634f3011d7f83b083762c942442d51c58d58"}, + {file = "mypy-1.11.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7b6343d338390bb946d449677726edf60102a1c96079b4f002dedff375953fc5"}, + {file = "mypy-1.11.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e4fe9f4e5e521b458d8feb52547f4bade7ef8c93238dfb5bbc790d9ff2d770ca"}, + {file = "mypy-1.11.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:886c9dbecc87b9516eff294541bf7f3655722bf22bb898ee06985cd7269898de"}, + {file = "mypy-1.11.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fca4a60e1dd9fd0193ae0067eaeeb962f2d79e0d9f0f66223a0682f26ffcc809"}, + {file = "mypy-1.11.1-cp311-cp311-win_amd64.whl", hash = "sha256:0bd53faf56de9643336aeea1c925012837432b5faf1701ccca7fde70166ccf72"}, + {file = "mypy-1.11.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f39918a50f74dc5969807dcfaecafa804fa7f90c9d60506835036cc1bc891dc8"}, + {file = "mypy-1.11.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0bc71d1fb27a428139dd78621953effe0d208aed9857cb08d002280b0422003a"}, + {file = "mypy-1.11.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b868d3bcff720dd7217c383474008ddabaf048fad8d78ed948bb4b624870a417"}, + {file = "mypy-1.11.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a707ec1527ffcdd1c784d0924bf5cb15cd7f22683b919668a04d2b9c34549d2e"}, + {file = "mypy-1.11.1-cp312-cp312-win_amd64.whl", hash = "sha256:64f4a90e3ea07f590c5bcf9029035cf0efeae5ba8be511a8caada1a4893f5525"}, + {file = "mypy-1.11.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:749fd3213916f1751fff995fccf20c6195cae941dc968f3aaadf9bb4e430e5a2"}, + {file = "mypy-1.11.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b639dce63a0b19085213ec5fdd8cffd1d81988f47a2dec7100e93564f3e8fb3b"}, + {file = "mypy-1.11.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4c956b49c5d865394d62941b109728c5c596a415e9c5b2be663dd26a1ff07bc0"}, + {file = "mypy-1.11.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:45df906e8b6804ef4b666af29a87ad9f5921aad091c79cc38e12198e220beabd"}, + {file = "mypy-1.11.1-cp38-cp38-win_amd64.whl", hash = "sha256:d44be7551689d9d47b7abc27c71257adfdb53f03880841a5db15ddb22dc63edb"}, + {file = "mypy-1.11.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2684d3f693073ab89d76da8e3921883019ea8a3ec20fa5d8ecca6a2db4c54bbe"}, + {file = "mypy-1.11.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:79c07eb282cb457473add5052b63925e5cc97dfab9812ee65a7c7ab5e3cb551c"}, + {file = "mypy-1.11.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11965c2f571ded6239977b14deebd3f4c3abd9a92398712d6da3a772974fad69"}, + {file = "mypy-1.11.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a2b43895a0f8154df6519706d9bca8280cda52d3d9d1514b2d9c3e26792a0b74"}, + {file = "mypy-1.11.1-cp39-cp39-win_amd64.whl", hash = "sha256:1a81cf05975fd61aec5ae16501a091cfb9f605dc3e3c878c0da32f250b74760b"}, + {file = "mypy-1.11.1-py3-none-any.whl", hash = "sha256:0624bdb940255d2dd24e829d99a13cfeb72e4e9031f9492148f410ed30bcab54"}, + {file = "mypy-1.11.1.tar.gz", hash = "sha256:f404a0b069709f18bbdb702eb3dcfe51910602995de00bd39cea3050b5772d08"}, ] [package.dependencies] mypy-extensions = ">=1.0.0" tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} -typing-extensions = ">=4.1.0" +typing-extensions = ">=4.6.0" [package.extras] dmypy = ["psutil (>=4.0)"] @@ -69,33 +503,209 @@ files = [ {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] +[[package]] +name = "nh3" +version = "0.2.18" +description = "Python bindings to the ammonia HTML sanitization library." +optional = false +python-versions = "*" +files = [ + {file = "nh3-0.2.18-cp37-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:14c5a72e9fe82aea5fe3072116ad4661af5cf8e8ff8fc5ad3450f123e4925e86"}, + {file = "nh3-0.2.18-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:7b7c2a3c9eb1a827d42539aa64091640bd275b81e097cd1d8d82ef91ffa2e811"}, + {file = "nh3-0.2.18-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42c64511469005058cd17cc1537578eac40ae9f7200bedcfd1fc1a05f4f8c200"}, + {file = "nh3-0.2.18-cp37-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0411beb0589eacb6734f28d5497ca2ed379eafab8ad8c84b31bb5c34072b7164"}, + {file = "nh3-0.2.18-cp37-abi3-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:5f36b271dae35c465ef5e9090e1fdaba4a60a56f0bb0ba03e0932a66f28b9189"}, + {file = "nh3-0.2.18-cp37-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:34c03fa78e328c691f982b7c03d4423bdfd7da69cd707fe572f544cf74ac23ad"}, + {file = "nh3-0.2.18-cp37-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:19aaba96e0f795bd0a6c56291495ff59364f4300d4a39b29a0abc9cb3774a84b"}, + {file = "nh3-0.2.18-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de3ceed6e661954871d6cd78b410213bdcb136f79aafe22aa7182e028b8c7307"}, + {file = "nh3-0.2.18-cp37-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6955369e4d9f48f41e3f238a9e60f9410645db7e07435e62c6a9ea6135a4907f"}, + {file = "nh3-0.2.18-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:f0eca9ca8628dbb4e916ae2491d72957fdd35f7a5d326b7032a345f111ac07fe"}, + {file = "nh3-0.2.18-cp37-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:3a157ab149e591bb638a55c8c6bcb8cdb559c8b12c13a8affaba6cedfe51713a"}, + {file = "nh3-0.2.18-cp37-abi3-musllinux_1_2_i686.whl", hash = "sha256:c8b3a1cebcba9b3669ed1a84cc65bf005728d2f0bc1ed2a6594a992e817f3a50"}, + {file = "nh3-0.2.18-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:36c95d4b70530b320b365659bb5034341316e6a9b30f0b25fa9c9eff4c27a204"}, + {file = "nh3-0.2.18-cp37-abi3-win32.whl", hash = "sha256:a7f1b5b2c15866f2db413a3649a8fe4fd7b428ae58be2c0f6bca5eefd53ca2be"}, + {file = "nh3-0.2.18-cp37-abi3-win_amd64.whl", hash = "sha256:8ce0f819d2f1933953fca255db2471ad58184a60508f03e6285e5114b6254844"}, + {file = "nh3-0.2.18.tar.gz", hash = "sha256:94a166927e53972a9698af9542ace4e38b9de50c34352b962f4d9a7d4c927af4"}, +] + +[[package]] +name = "pkginfo" +version = "1.10.0" +description = "Query metadata from sdists / bdists / installed packages." +optional = false +python-versions = ">=3.6" +files = [ + {file = "pkginfo-1.10.0-py3-none-any.whl", hash = "sha256:889a6da2ed7ffc58ab5b900d888ddce90bce912f2d2de1dc1c26f4cb9fe65097"}, + {file = "pkginfo-1.10.0.tar.gz", hash = "sha256:5df73835398d10db79f8eecd5cd86b1f6d29317589ea70796994d49399af6297"}, +] + +[package.extras] +testing = ["pytest", "pytest-cov", "wheel"] + +[[package]] +name = "pycparser" +version = "2.22" +description = "C parser in Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, + {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, +] + +[[package]] +name = "pygments" +version = "2.18.0" +description = "Pygments is a syntax highlighting package written in Python." +optional = false +python-versions = ">=3.8" +files = [ + {file = "pygments-2.18.0-py3-none-any.whl", hash = "sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a"}, + {file = "pygments-2.18.0.tar.gz", hash = "sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199"}, +] + +[package.extras] +windows-terminal = ["colorama (>=0.4.6)"] + +[[package]] +name = "pywin32-ctypes" +version = "0.2.3" +description = "A (partial) reimplementation of pywin32 using ctypes/cffi" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pywin32-ctypes-0.2.3.tar.gz", hash = "sha256:d162dc04946d704503b2edc4d55f3dba5c1d539ead017afa00142c38b9885755"}, + {file = "pywin32_ctypes-0.2.3-py3-none-any.whl", hash = "sha256:8a1513379d709975552d202d942d9837758905c8d01eb82b8bcc30918929e7b8"}, +] + +[[package]] +name = "readme-renderer" +version = "44.0" +description = "readme_renderer is a library for rendering readme descriptions for Warehouse" +optional = false +python-versions = ">=3.9" +files = [ + {file = "readme_renderer-44.0-py3-none-any.whl", hash = "sha256:2fbca89b81a08526aadf1357a8c2ae889ec05fb03f5da67f9769c9a592166151"}, + {file = "readme_renderer-44.0.tar.gz", hash = "sha256:8712034eabbfa6805cacf1402b4eeb2a73028f72d1166d6f5cb7f9c047c5d1e1"}, +] + +[package.dependencies] +docutils = ">=0.21.2" +nh3 = ">=0.2.14" +Pygments = ">=2.5.1" + +[package.extras] +md = ["cmarkgfm (>=0.8.0)"] + +[[package]] +name = "requests" +version = "2.32.3" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=3.8" +files = [ + {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, + {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "requests-toolbelt" +version = "1.0.0" +description = "A utility belt for advanced users of python-requests" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6"}, + {file = "requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06"}, +] + +[package.dependencies] +requests = ">=2.0.1,<3.0.0" + +[[package]] +name = "rfc3986" +version = "2.0.0" +description = "Validating URI References per RFC 3986" +optional = false +python-versions = ">=3.7" +files = [ + {file = "rfc3986-2.0.0-py2.py3-none-any.whl", hash = "sha256:50b1502b60e289cb37883f3dfd34532b8873c7de9f49bb546641ce9cbd256ebd"}, + {file = "rfc3986-2.0.0.tar.gz", hash = "sha256:97aacf9dbd4bfd829baad6e6309fa6573aaf1be3f6fa735c8ab05e46cecb261c"}, +] + +[package.extras] +idna2008 = ["idna"] + +[[package]] +name = "rich" +version = "13.7.1" +description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "rich-13.7.1-py3-none-any.whl", hash = "sha256:4edbae314f59eb482f54e9e30bf00d33350aaa94f4bfcd4e9e3110e64d0d7222"}, + {file = "rich-13.7.1.tar.gz", hash = "sha256:9be308cb1fe2f1f57d67ce99e95af38a1e2bc71ad9813b0e247cf7ffbcc3a432"}, +] + +[package.dependencies] +markdown-it-py = ">=2.2.0" +pygments = ">=2.13.0,<3.0.0" + +[package.extras] +jupyter = ["ipywidgets (>=7.5.1,<9)"] + [[package]] name = "ruff" -version = "0.5.2" +version = "0.5.7" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.5.2-py3-none-linux_armv6l.whl", hash = "sha256:7bab8345df60f9368d5f4594bfb8b71157496b44c30ff035d1d01972e764d3be"}, - {file = "ruff-0.5.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:1aa7acad382ada0189dbe76095cf0a36cd0036779607c397ffdea16517f535b1"}, - {file = "ruff-0.5.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:aec618d5a0cdba5592c60c2dee7d9c865180627f1a4a691257dea14ac1aa264d"}, - {file = "ruff-0.5.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0b62adc5ce81780ff04077e88bac0986363e4a3260ad3ef11ae9c14aa0e67ef"}, - {file = "ruff-0.5.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:dc42ebf56ede83cb080a50eba35a06e636775649a1ffd03dc986533f878702a3"}, - {file = "ruff-0.5.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c15c6e9f88c67ffa442681365d11df38afb11059fc44238e71a9d9f1fd51de70"}, - {file = "ruff-0.5.2-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:d3de9a5960f72c335ef00763d861fc5005ef0644cb260ba1b5a115a102157251"}, - {file = "ruff-0.5.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fe5a968ae933e8f7627a7b2fc8893336ac2be0eb0aace762d3421f6e8f7b7f83"}, - {file = "ruff-0.5.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a04f54a9018f75615ae52f36ea1c5515e356e5d5e214b22609ddb546baef7132"}, - {file = "ruff-0.5.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ed02fb52e3741f0738db5f93e10ae0fb5c71eb33a4f2ba87c9a2fa97462a649"}, - {file = "ruff-0.5.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:3cf8fe659f6362530435d97d738eb413e9f090e7e993f88711b0377fbdc99f60"}, - {file = "ruff-0.5.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:237a37e673e9f3cbfff0d2243e797c4862a44c93d2f52a52021c1a1b0899f846"}, - {file = "ruff-0.5.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:2a2949ce7c1cbd8317432ada80fe32156df825b2fd611688814c8557824ef060"}, - {file = "ruff-0.5.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:481af57c8e99da92ad168924fd82220266043c8255942a1cb87958b108ac9335"}, - {file = "ruff-0.5.2-py3-none-win32.whl", hash = "sha256:f1aea290c56d913e363066d83d3fc26848814a1fed3d72144ff9c930e8c7c718"}, - {file = "ruff-0.5.2-py3-none-win_amd64.whl", hash = "sha256:8532660b72b5d94d2a0a7a27ae7b9b40053662d00357bb2a6864dd7e38819084"}, - {file = "ruff-0.5.2-py3-none-win_arm64.whl", hash = "sha256:73439805c5cb68f364d826a5c5c4b6c798ded6b7ebaa4011f01ce6c94e4d5583"}, - {file = "ruff-0.5.2.tar.gz", hash = "sha256:2c0df2d2de685433794a14d8d2e240df619b748fbe3367346baa519d8e6f1ca2"}, + {file = "ruff-0.5.7-py3-none-linux_armv6l.whl", hash = "sha256:548992d342fc404ee2e15a242cdbea4f8e39a52f2e7752d0e4cbe88d2d2f416a"}, + {file = "ruff-0.5.7-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:00cc8872331055ee017c4f1071a8a31ca0809ccc0657da1d154a1d2abac5c0be"}, + {file = "ruff-0.5.7-py3-none-macosx_11_0_arm64.whl", hash = "sha256:eaf3d86a1fdac1aec8a3417a63587d93f906c678bb9ed0b796da7b59c1114a1e"}, + {file = "ruff-0.5.7-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a01c34400097b06cf8a6e61b35d6d456d5bd1ae6961542de18ec81eaf33b4cb8"}, + {file = "ruff-0.5.7-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fcc8054f1a717e2213500edaddcf1dbb0abad40d98e1bd9d0ad364f75c763eea"}, + {file = "ruff-0.5.7-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7f70284e73f36558ef51602254451e50dd6cc479f8b6f8413a95fcb5db4a55fc"}, + {file = "ruff-0.5.7-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:a78ad870ae3c460394fc95437d43deb5c04b5c29297815a2a1de028903f19692"}, + {file = "ruff-0.5.7-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9ccd078c66a8e419475174bfe60a69adb36ce04f8d4e91b006f1329d5cd44bcf"}, + {file = "ruff-0.5.7-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7e31c9bad4ebf8fdb77b59cae75814440731060a09a0e0077d559a556453acbb"}, + {file = "ruff-0.5.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d796327eed8e168164346b769dd9a27a70e0298d667b4ecee6877ce8095ec8e"}, + {file = "ruff-0.5.7-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:4a09ea2c3f7778cc635e7f6edf57d566a8ee8f485f3c4454db7771efb692c499"}, + {file = "ruff-0.5.7-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:a36d8dcf55b3a3bc353270d544fb170d75d2dff41eba5df57b4e0b67a95bb64e"}, + {file = "ruff-0.5.7-py3-none-musllinux_1_2_i686.whl", hash = "sha256:9369c218f789eefbd1b8d82a8cf25017b523ac47d96b2f531eba73770971c9e5"}, + {file = "ruff-0.5.7-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:b88ca3db7eb377eb24fb7c82840546fb7acef75af4a74bd36e9ceb37a890257e"}, + {file = "ruff-0.5.7-py3-none-win32.whl", hash = "sha256:33d61fc0e902198a3e55719f4be6b375b28f860b09c281e4bdbf783c0566576a"}, + {file = "ruff-0.5.7-py3-none-win_amd64.whl", hash = "sha256:083bbcbe6fadb93cd86709037acc510f86eed5a314203079df174c40bbbca6b3"}, + {file = "ruff-0.5.7-py3-none-win_arm64.whl", hash = "sha256:2dca26154ff9571995107221d0aeaad0e75a77b5a682d6236cf89a58c70b76f4"}, + {file = "ruff-0.5.7.tar.gz", hash = "sha256:8dfc0a458797f5d9fb622dd0efc52d796f23f0a1493a9527f4e49a550ae9a7e5"}, +] + +[[package]] +name = "secretstorage" +version = "3.3.3" +description = "Python bindings to FreeDesktop.org Secret Service API" +optional = false +python-versions = ">=3.6" +files = [ + {file = "SecretStorage-3.3.3-py3-none-any.whl", hash = "sha256:f356e6628222568e3af06f2eba8df495efa13b3b63081dafd4f7d9a7b7bc9f99"}, + {file = "SecretStorage-3.3.3.tar.gz", hash = "sha256:2403533ef369eca6d2ba81718576c5e0f564d5cca1b58f73a8b23e7d4eeebd77"}, ] +[package.dependencies] +cryptography = ">=2.0" +jeepney = ">=0.6" + [[package]] name = "tomli" version = "2.0.1" @@ -107,6 +717,28 @@ files = [ {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, ] +[[package]] +name = "twine" +version = "5.1.1" +description = "Collection of utilities for publishing packages on PyPI" +optional = false +python-versions = ">=3.8" +files = [ + {file = "twine-5.1.1-py3-none-any.whl", hash = "sha256:215dbe7b4b94c2c50a7315c0275d2258399280fbb7d04182c7e55e24b5f93997"}, + {file = "twine-5.1.1.tar.gz", hash = "sha256:9aa0825139c02b3434d913545c7b847a21c835e11597f5255842d457da2322db"}, +] + +[package.dependencies] +importlib-metadata = ">=3.6" +keyring = ">=15.1" +pkginfo = ">=1.8.1,<1.11" +readme-renderer = ">=35.0" +requests = ">=2.20" +requests-toolbelt = ">=0.8.0,<0.9.0 || >0.9.0" +rfc3986 = ">=1.4.0" +rich = ">=12.0.0" +urllib3 = ">=1.26.0" + [[package]] name = "typing-extensions" version = "4.12.2" @@ -118,6 +750,23 @@ files = [ {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, ] +[[package]] +name = "urllib3" +version = "2.2.2" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=3.8" +files = [ + {file = "urllib3-2.2.2-py3-none-any.whl", hash = "sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472"}, + {file = "urllib3-2.2.2.tar.gz", hash = "sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +h2 = ["h2 (>=4,<5)"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["zstandard (>=0.18.0)"] + [[package]] name = "wheel" version = "0.43.0" @@ -132,6 +781,21 @@ files = [ [package.extras] test = ["pytest (>=6.0.0)", "setuptools (>=65)"] +[[package]] +name = "zipp" +version = "3.20.0" +description = "Backport of pathlib-compatible object wrapper for zip files" +optional = false +python-versions = ">=3.8" +files = [ + {file = "zipp-3.20.0-py3-none-any.whl", hash = "sha256:58da6168be89f0be59beb194da1250516fdaa062ccebd30127ac65d30045e10d"}, + {file = "zipp-3.20.0.tar.gz", hash = "sha256:0145e43d89664cfe1a2e533adc75adafed82fe2da404b4bbb6b026c0157bdb31"}, +] + +[package.extras] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"] + [extras] bin = [] extended = [] @@ -139,4 +803,4 @@ extended = [] [metadata] lock-version = "2.0" python-versions = ">=3.9" -content-hash = "98add445221126c3eb81877368002a05d210383c508828620637170af2d55258" +content-hash = "e092308a951e575c41d91b84aed8b90a20449e970bdb27a30502be4aa2a1b7fc" diff --git a/wrappers/python/pyproject.toml b/wrappers/python/pyproject.toml index 32ed4ae0..68742ea7 100644 --- a/wrappers/python/pyproject.toml +++ b/wrappers/python/pyproject.toml @@ -1,5 +1,5 @@ [tool.poetry] -name = "pagefind_python" +name = "pagefind" version = "0.0.0a0" # note that ^this^ is the version number of the python API, not the version of # the pagefind executable. @@ -15,7 +15,9 @@ exclude = [ "*.egg-info", "*.log", ".venv", - "pagefind_python_bin" + "pagefind_python_bin" # poetry has a *.pth file in its .venv that causes + # directories in src/ to be preferentially imported. To allow testing + # `import pagefind_bin`, we use ./src/pagefind_python_bin as a workaround. ] classifiers = [ "License :: OSI Approved :: MIT License", @@ -24,28 +26,23 @@ classifiers = [ ] # Note: we *aren't* including an `entry-points` section here to avoid clobbering -# the user's natively-installed `pagefind` binary. Using `python3 -m pagefind_python` +# the user's natively-installed `pagefind` binary. Using `python3 -m pagefind` # is an informatively-namespaced alternative that doesn't add too many keystrokes. # See https://packaging.python.org/en/latest/specifications/entry-points/ [tool.poetry.dependencies] python = ">=3.9" -# during the building of the pagefind_python package, the pagefind binary packages +# during the building of the `pagefind` python package, the pagefind binary packages # aren't yet published. Thus, `poetry lock` will fail if we include them here. # However, `poetry build` fails to include the binary package extras in -# `pagefind_python`'s distribution info if these lines are commented out. Thus, +# `pagefind`'s distribution info if these lines are commented out. Thus, # we temporarily uncomment these lines during the build process, and then re-comment # them afterwards -# [[[cog -# version = open("pagefind_version.txt").read().strip() -# print(f"# pagefind_bin = {{ version = \"~={version}\", optional = true }} #!!opt") -# print(f"# pagefind_bin_extended = {{ version = \"~={version}\", optional = true }} #!!opt") -# ]]] -# pagefind_bin = { version = "~=1.1.0", optional = true } #!!opt -# pagefind_bin_extended = { version = "~=1.1.0", optional = true } #!!opt -# [[[end]]] +# these next two lines are owned by ./scripts/build/api_package.py +# pagefind_bin = { version = "~=1", optional = true } #!!opt +# pagefind_bin_extended = { version = "~=1", optional = true } #!!opt [tool.poetry.extras] bin = ["pagefind_bin"] diff --git a/wrappers/python/scripts/build/__init__.py b/wrappers/python/scripts/build/__init__.py index b7066541..76899ba8 100644 --- a/wrappers/python/scripts/build/__init__.py +++ b/wrappers/python/scripts/build/__init__.py @@ -5,7 +5,6 @@ this_file = Path(__file__) this_dir = Path(__file__).parent python_root = this_dir.parent.parent.resolve().absolute() -upstream_version_file = python_root / "pagefind_version.txt" dist_dir = python_root / "dist" vendor_dir = python_root / "vendor" diff --git a/wrappers/python/scripts/build/all.py b/wrappers/python/scripts/build/all.py index 2dc5358d..7c73d6cc 100644 --- a/wrappers/python/scripts/build/all.py +++ b/wrappers/python/scripts/build/all.py @@ -1,14 +1,16 @@ +import os import tarfile import tempfile from pathlib import Path -from typing import List +from typing import List, Optional +from argparse import ArgumentParser from . import dist_dir, setup_logging from .binary_only_wheel import ( LLVM_TRIPLES_TO_PYTHON_WHEEL_PLATFORMS, write_pagefind_bin_only_wheel, ) -from .get_pagefind_release import download +from .get_pagefind_release import download, find_bins __candidates = ( "pagefind", @@ -36,27 +38,48 @@ def get_llvm_triple(tar_gz: Path) -> str: def check_platforms(certified: List[Path]) -> None: + unsupported = [] for compressed_archive in certified: llvm_triple = get_llvm_triple(compressed_archive) platform = LLVM_TRIPLES_TO_PYTHON_WHEEL_PLATFORMS.get(llvm_triple) if platform is None: - raise ValueError(f"Unsupported platform: {llvm_triple}") + unsupported.append(llvm_triple) + if unsupported: + err_message = "Unsupported platforms:\n" + "\n".join(sorted(unsupported)) + raise ValueError(err_message) + + +def parse_args(): + parser = ArgumentParser() + parser.add_argument("--dry-run", action="store_true") + parser.add_argument("DIR", type=Path, default=None, nargs="?") + args = parser.parse_args() + dry_run: bool = args.dry_run + bin_dir: Optional[Path] = args.DIR + return dry_run, bin_dir if __name__ == "__main__": + dry_run, bin_dir = parse_args() setup_logging() - certified, tag_name = download("latest", dry_run=False) - # create a temp directory to hold the extracted binaries + if bin_dir is None: + certified, tag_name = download("latest", dry_run=False) + else: + if (tag_name := os.environ.get("GIT_VERSION")) is None: + raise KeyError("Missing DIR argument and GIT_VERSION environment variable") + certified = find_bins(bin_dir) check_platforms(certified) + + if not dry_run: + dist_dir.rmdir() dist_dir.mkdir(exist_ok=True) + for tar_gz in certified: llvm_triple = get_llvm_triple(tar_gz) - platform = LLVM_TRIPLES_TO_PYTHON_WHEEL_PLATFORMS.get(llvm_triple) + platform = LLVM_TRIPLES_TO_PYTHON_WHEEL_PLATFORMS[llvm_triple] if platform is None: raise ValueError(f"Unsupported platform: {llvm_triple}") - - # FIXME: avoid writing the extracted bin to disk - # unpack the tar.gz archive + # TODO: avoid writing the extracted bin to disk name = tar_gz.name.removesuffix(".tar.gz") with tempfile.TemporaryDirectory(prefix=name + "~") as _temp_dir: temp_dir = Path(_temp_dir) diff --git a/wrappers/python/scripts/build/api_package.py b/wrappers/python/scripts/build/api_package.py index 0473ecfc..9a26e994 100644 --- a/wrappers/python/scripts/build/api_package.py +++ b/wrappers/python/scripts/build/api_package.py @@ -4,18 +4,23 @@ from . import python_root, setup_logging import subprocess +import re +import os pyproject_toml = python_root / "pyproject.toml" def main() -> None: + version = os.environ.get("PAGEFIND_VERSION") + if version is None: + version = "1" original = pyproject_toml.read_text() temp = "" for line in original.splitlines(): if line.endswith("#!!opt"): - temp += line.removeprefix("# ") + "\n" - else: - temp += line + "\n" + line = line.removeprefix("# ") + "\n" + line = re.sub(r'version = "[^"]+"', f'version = "~={version}"', line) + temp += line + "\n" with pyproject_toml.open("w") as f: f.write(temp) subprocess.run(["poetry", "build"], check=True) diff --git a/wrappers/python/scripts/build/binary_only_wheel.py b/wrappers/python/scripts/build/binary_only_wheel.py index faec258d..3712cffe 100644 --- a/wrappers/python/scripts/build/binary_only_wheel.py +++ b/wrappers/python/scripts/build/binary_only_wheel.py @@ -152,7 +152,7 @@ def write_wheel( f"{dist_info}/WHEEL": make_message( { "Wheel-Version": "1.0", - "Generator": "build_binary_only_wheel.py", + "Generator": "scripts/build/binary_only_wheel.py", "Root-Is-Purelib": "false", # see https://packaging.python.org/en/latest/specifications/binary-distribution-format/#what-s-the-deal-with-purelib-vs-platlib "Tag": tag, } diff --git a/wrappers/python/scripts/build/get_pagefind_release.py b/wrappers/python/scripts/build/get_pagefind_release.py index 6295519a..394a5987 100644 --- a/wrappers/python/scripts/build/get_pagefind_release.py +++ b/wrappers/python/scripts/build/get_pagefind_release.py @@ -6,7 +6,7 @@ from typing import Any, Dict, List, Tuple, Union from urllib.request import urlopen -from . import vendor_dir, upstream_version_file +from . import vendor_dir from .download_verification import verify_hashes log = logging.getLogger(__name__) @@ -48,11 +48,20 @@ def get_version_downloads( return urls, files, tag_name +def find_bins(target_dir: Path) -> List[Path]: + assert target_dir.is_dir() + name_to_hash = {} + for hash_file in vendor_dir.glob("*.sha256"): + if (file := vendor_dir / hash_file.name.removesuffix(".sha256")).exists(): + name_to_hash[file.name] = hash_file.name + return verify_hashes(target_dir, name_to_hash) + + def download( version: Union[str, None] = None, dry_run: bool = True ) -> Tuple[List[Path], str]: urls, files, tag_name = get_version_downloads(version or "latest") - target_dir = vendor_dir / tag_name + target_dir = vendor_dir / tag_name # TODO: rm -rf this to ensure it's clean if dry_run: log.info(f"would download {len(urls)} assets to {target_dir}") for url in urls: @@ -60,6 +69,7 @@ def download( return [], tag_name target_dir.mkdir(parents=True, exist_ok=True) log.info(f"downloading {len(urls)} assets to {target_dir}") + # TODO: parallelize downloads for i, url in enumerate(urls): name = url.split("/")[-1] with urlopen(url) as response: @@ -77,7 +87,4 @@ def download( if __name__ == "__main__": _urls, _files, tag_name = get_version_downloads("latest") version = tag_name.removeprefix("v") - with upstream_version_file.open("w") as f: - f.write(version + "\n") - # to avoid IDEs adding a trailing newline and causing a diff, we add one here. print(version) diff --git a/wrappers/python/scripts/ci/cog/files.sh b/wrappers/python/scripts/ci/cog/files.sh index 4fe33c84..6a4ca389 100755 --- a/wrappers/python/scripts/ci/cog/files.sh +++ b/wrappers/python/scripts/ci/cog/files.sh @@ -1,7 +1,6 @@ #!/usr/bin/env bash export files_to_cog=( README.md - src/pagefind_python/__init__.py - pyproject.toml + src/pagefind/__init__.py ) # you can check this list by running `rg -l '\[\[\[cog' ./` in the repo root diff --git a/wrappers/python/scripts/ci/github/README.md b/wrappers/python/scripts/ci/github/README.md new file mode 100644 index 00000000..37c09b9b --- /dev/null +++ b/wrappers/python/scripts/ci/github/README.md @@ -0,0 +1 @@ +CI scripts that are specific to GitHub Actions. diff --git a/wrappers/python/scripts/ci/github/debug_python_paths.sh b/wrappers/python/scripts/ci/github/debug_python_paths.sh new file mode 100644 index 00000000..5d0236f4 --- /dev/null +++ b/wrappers/python/scripts/ci/github/debug_python_paths.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +set -eu +cd wrappers/python +export VIRTUAL_ENV="$PWD/.venv" +export PATH="$VIRTUAL_ENV/bin:$PATH" +set -x +echo "$PATH" | tr ':' '\n' +command -v python +command -v python3 +command -v poetry || echo "missing poetry" +stat ./.venv/bin/python +./.venv/bin/python --version diff --git a/wrappers/python/scripts/ci/github/install_dev_dependencies.sh b/wrappers/python/scripts/ci/github/install_dev_dependencies.sh index b171ceb4..52c86e44 100755 --- a/wrappers/python/scripts/ci/github/install_dev_dependencies.sh +++ b/wrappers/python/scripts/ci/github/install_dev_dependencies.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash set -eu +cd wrappers/python python3 -m poetry install --only=dev --no-root export VIRTUAL_ENV=$PWD/.venv -echo "VIRTUAL_ENV=$VIRTUAL_ENV" >> "$GITHUB_ENV" -echo "PATH=$VIRTUAL_ENV/bin:$PATH" >> "$GITHUB_ENV" +# echo "VIRTUAL_ENV=$VIRTUAL_ENV" >> "$GITHUB_ENV" +# echo "PATH=$VIRTUAL_ENV/bin:$PATH" >> "$GITHUB_ENV" diff --git a/wrappers/python/scripts/ci/github/integration_tests.sh b/wrappers/python/scripts/ci/github/integration_tests.sh index 9a86c458..83be21cc 100755 --- a/wrappers/python/scripts/ci/github/integration_tests.sh +++ b/wrappers/python/scripts/ci/github/integration_tests.sh @@ -1,66 +1,19 @@ #!/usr/bin/env bash set -eu -# ensure pagefind is not installed -if command -v pagefind; then - exit 1 -fi -# ensure pagefind_python is not installed in the current python environment -if python3 -c "import pagefind_python"; then - echo "dirty python environment: unexpectedly found pagefind_python" - exit 1 -fi - -# use pagefind installed from the officially-maintained node.js channel -pagefind_version="$(cat ./pagefind_version.txt)" #~ -npm i "pagefind@$pagefind_version" -_prev_path="$PATH" -export PATH="$PWD/node_modules/.bin:$PATH" - +export PATH="$PWD/target/release:$PATH" +export PYTHONPATH="$PWD/wrappers/python/src:$PYTHONPATH" +export PAGEFIND_PYTHON_LOG_LEVEL=DEBUG -# remove_src_from_pythonpath=" -# import os -# import sys -# from pathlib import Path -# repo_root = Path(os.getcwd()) -# src = repo_root / 'src' -# sys.path.remove(str(src)) -# " -_get_executable=' +cd wrappers/python +python3 -c 'import sys; print("pythonpath"\n" + "\n".join(sys.path))' +python3 -c ' import logging import os -from pagefind_python.service import get_executable +from pagefind.service import get_executable logging.basicConfig(level=os.environ.get("PAGEFIND_PYTHON_LOG_LEVEL", "INFO")) print(get_executable()) ' -export PAGEFIND_PYTHON_LOG_LEVEL=DEBUG - - -python3 -m pip install \ - --no-index --find-links=dist \ - --only-binary :all: \ - pagefind_python -python3 -c "$_get_executable" -python3 -m pagefind_python --help -echo "starting integration tests using system pagefind" +python3 -m pagefind --help python3 src/tests/integration.py - -# remove the externally installed pagefind binary -rm -rf node_modules output -export PATH="$_prev_path" -if command -v pagefind; then - echo "dirty PATH: unexpectedly found pagefind" - exit 1 -fi - -python3 -m pip install \ - --no-index --find-links=dist \ - --only-binary :all: \ - 'pagefind_python[bin]' - -python3 -c "$_get_executable" -python3 -m pagefind_python --help -echo "starting integration tests using pagefind_bin python module" -python3 src/tests/integration.py - diff --git a/wrappers/python/scripts/ci/github/scrape_upstream_version.sh b/wrappers/python/scripts/ci/github/scrape_upstream_version.sh deleted file mode 100755 index 5dcfec44..00000000 --- a/wrappers/python/scripts/ci/github/scrape_upstream_version.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env bash -# fetch the current version of the pagefind executable -# see https://simonwillison.net/2020/Oct/9/git-scraping/ -set -eu -export PATH="$PWD/.venv/bin:$PATH" -file="pagefind_version.txt" - -python3 -m scripts.build.get_pagefind_release -pagefind_version=""; pagefind_version=$(cat ./"$file") - -if ! git --no-pager diff --exit-code -- "$file"; then # there's a new version - ./scripts/ci/cog/update.sh # note that $PWD is the repo root - git add -u - git config user.name "Automated" - git config user.email "actions@users.noreply.github.com" - git commit -m "chore: update pagefind binary to $pagefind_version" - git tag "bin/$pagefind_version" - git push - git push --tags --follow-tags -fi diff --git a/wrappers/python/scripts/ci/github/setup_poetry.sh b/wrappers/python/scripts/ci/github/setup_poetry.sh index 9a244ef4..731e882b 100755 --- a/wrappers/python/scripts/ci/github/setup_poetry.sh +++ b/wrappers/python/scripts/ci/github/setup_poetry.sh @@ -1,7 +1,6 @@ #!/usr/bin/env bash set -eu python3 -m pip install poetry -command -v poetry || true # <- debugging: check if poetry is installed on $PATH # not using pipx since this is a CI environment that will be reset -- # there's not much risk of poetry's dependencies conflicting with ours diff --git a/wrappers/python/src/pagefind/service/__init__.py b/wrappers/python/src/pagefind/service/__init__.py index 8302d7e2..bd4e39f8 100644 --- a/wrappers/python/src/pagefind/service/__init__.py +++ b/wrappers/python/src/pagefind/service/__init__.py @@ -203,10 +203,9 @@ async def create_index( _config: Optional["IndexConfig"] = None if config is not None: - _config = {**config} - _ = _config.pop("output_path", None) - else: - _config = None + _config = {**config} # clone the config to avoid modifying the original + _config.pop("output_path", None) + log.debug(f"creating index with config: {_config}") result = await self.send( InternalNewIndexRequest(type="NewIndex", config=_config) diff --git a/wrappers/python/src/pagefind_python_bin/README.md b/wrappers/python/src/pagefind_python_bin/README.md index 369f7441..e4147cd7 100644 --- a/wrappers/python/src/pagefind_python_bin/README.md +++ b/wrappers/python/src/pagefind_python_bin/README.md @@ -1,4 +1,6 @@ - + # `pagefind_bin` A python wrapper for the `pagefind` executable. From dab6f27ba454b8935e95d20f90427cf910a0fc67 Mon Sep 17 00:00:00 2001 From: Steven Kalt Date: Sat, 24 Aug 2024 12:11:54 -0400 Subject: [PATCH 07/39] feat(wrappers/python): add logging, timeout to process shutdown --- wrappers/python/src/pagefind/service/__init__.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/wrappers/python/src/pagefind/service/__init__.py b/wrappers/python/src/pagefind/service/__init__.py index bd4e39f8..75c2c788 100644 --- a/wrappers/python/src/pagefind/service/__init__.py +++ b/wrappers/python/src/pagefind/service/__init__.py @@ -180,10 +180,18 @@ async def _wait_for_responses(self) -> None: async def close(self) -> None: # wait for all _responses to be resolved - await asyncio.gather(*self._responses.values()) # IDEA: add timeout? + log.debug("waiting for all responses to be resolved") + try: + # wait at most 5s for all responses to be resolved + async with asyncio.timeout(5): + await asyncio.gather(*self._responses.values()) + log.debug("all responses resolved") + except asyncio.TimeoutError: + log.error("timed out waiting for responses to be resolved") self._poll_task.cancel() self._backend.terminate() await self._backend.wait() + log.debug("backend terminated") async def __aenter__(self) -> "PagefindService": return await self.launch() From e6704542a4fc8c2310cedd062a9eed91a57cca96 Mon Sep 17 00:00:00 2001 From: Steven Kalt Date: Sat, 24 Aug 2024 12:52:19 -0400 Subject: [PATCH 08/39] ci(wrappers/python): make sure scripts are executable --- wrappers/python/scripts/ci/github/debug_python_paths.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 wrappers/python/scripts/ci/github/debug_python_paths.sh diff --git a/wrappers/python/scripts/ci/github/debug_python_paths.sh b/wrappers/python/scripts/ci/github/debug_python_paths.sh old mode 100644 new mode 100755 From 3e1637f7842a75af99ff92ba1e7bc07399268cc0 Mon Sep 17 00:00:00 2001 From: Steven Kalt Date: Sun, 25 Aug 2024 09:28:00 -0400 Subject: [PATCH 09/39] chore(wrappers/python): update poetry.lock --- wrappers/python/poetry.lock | 56 ++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/wrappers/python/poetry.lock b/wrappers/python/poetry.lock index bd24997c..0ac42b68 100644 --- a/wrappers/python/poetry.lock +++ b/wrappers/python/poetry.lock @@ -447,38 +447,38 @@ files = [ [[package]] name = "mypy" -version = "1.11.1" +version = "1.11.2" description = "Optional static typing for Python" optional = false python-versions = ">=3.8" files = [ - {file = "mypy-1.11.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a32fc80b63de4b5b3e65f4be82b4cfa362a46702672aa6a0f443b4689af7008c"}, - {file = "mypy-1.11.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c1952f5ea8a5a959b05ed5f16452fddadbaae48b5d39235ab4c3fc444d5fd411"}, - {file = "mypy-1.11.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e1e30dc3bfa4e157e53c1d17a0dad20f89dc433393e7702b813c10e200843b03"}, - {file = "mypy-1.11.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2c63350af88f43a66d3dfeeeb8d77af34a4f07d760b9eb3a8697f0386c7590b4"}, - {file = "mypy-1.11.1-cp310-cp310-win_amd64.whl", hash = "sha256:a831671bad47186603872a3abc19634f3011d7f83b083762c942442d51c58d58"}, - {file = "mypy-1.11.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7b6343d338390bb946d449677726edf60102a1c96079b4f002dedff375953fc5"}, - {file = "mypy-1.11.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e4fe9f4e5e521b458d8feb52547f4bade7ef8c93238dfb5bbc790d9ff2d770ca"}, - {file = "mypy-1.11.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:886c9dbecc87b9516eff294541bf7f3655722bf22bb898ee06985cd7269898de"}, - {file = "mypy-1.11.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fca4a60e1dd9fd0193ae0067eaeeb962f2d79e0d9f0f66223a0682f26ffcc809"}, - {file = "mypy-1.11.1-cp311-cp311-win_amd64.whl", hash = "sha256:0bd53faf56de9643336aeea1c925012837432b5faf1701ccca7fde70166ccf72"}, - {file = "mypy-1.11.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f39918a50f74dc5969807dcfaecafa804fa7f90c9d60506835036cc1bc891dc8"}, - {file = "mypy-1.11.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0bc71d1fb27a428139dd78621953effe0d208aed9857cb08d002280b0422003a"}, - {file = "mypy-1.11.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b868d3bcff720dd7217c383474008ddabaf048fad8d78ed948bb4b624870a417"}, - {file = "mypy-1.11.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a707ec1527ffcdd1c784d0924bf5cb15cd7f22683b919668a04d2b9c34549d2e"}, - {file = "mypy-1.11.1-cp312-cp312-win_amd64.whl", hash = "sha256:64f4a90e3ea07f590c5bcf9029035cf0efeae5ba8be511a8caada1a4893f5525"}, - {file = "mypy-1.11.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:749fd3213916f1751fff995fccf20c6195cae941dc968f3aaadf9bb4e430e5a2"}, - {file = "mypy-1.11.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b639dce63a0b19085213ec5fdd8cffd1d81988f47a2dec7100e93564f3e8fb3b"}, - {file = "mypy-1.11.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4c956b49c5d865394d62941b109728c5c596a415e9c5b2be663dd26a1ff07bc0"}, - {file = "mypy-1.11.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:45df906e8b6804ef4b666af29a87ad9f5921aad091c79cc38e12198e220beabd"}, - {file = "mypy-1.11.1-cp38-cp38-win_amd64.whl", hash = "sha256:d44be7551689d9d47b7abc27c71257adfdb53f03880841a5db15ddb22dc63edb"}, - {file = "mypy-1.11.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2684d3f693073ab89d76da8e3921883019ea8a3ec20fa5d8ecca6a2db4c54bbe"}, - {file = "mypy-1.11.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:79c07eb282cb457473add5052b63925e5cc97dfab9812ee65a7c7ab5e3cb551c"}, - {file = "mypy-1.11.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11965c2f571ded6239977b14deebd3f4c3abd9a92398712d6da3a772974fad69"}, - {file = "mypy-1.11.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a2b43895a0f8154df6519706d9bca8280cda52d3d9d1514b2d9c3e26792a0b74"}, - {file = "mypy-1.11.1-cp39-cp39-win_amd64.whl", hash = "sha256:1a81cf05975fd61aec5ae16501a091cfb9f605dc3e3c878c0da32f250b74760b"}, - {file = "mypy-1.11.1-py3-none-any.whl", hash = "sha256:0624bdb940255d2dd24e829d99a13cfeb72e4e9031f9492148f410ed30bcab54"}, - {file = "mypy-1.11.1.tar.gz", hash = "sha256:f404a0b069709f18bbdb702eb3dcfe51910602995de00bd39cea3050b5772d08"}, + {file = "mypy-1.11.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d42a6dd818ffce7be66cce644f1dff482f1d97c53ca70908dff0b9ddc120b77a"}, + {file = "mypy-1.11.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:801780c56d1cdb896eacd5619a83e427ce436d86a3bdf9112527f24a66618fef"}, + {file = "mypy-1.11.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:41ea707d036a5307ac674ea172875f40c9d55c5394f888b168033177fce47383"}, + {file = "mypy-1.11.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6e658bd2d20565ea86da7d91331b0eed6d2eee22dc031579e6297f3e12c758c8"}, + {file = "mypy-1.11.2-cp310-cp310-win_amd64.whl", hash = "sha256:478db5f5036817fe45adb7332d927daa62417159d49783041338921dcf646fc7"}, + {file = "mypy-1.11.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:75746e06d5fa1e91bfd5432448d00d34593b52e7e91a187d981d08d1f33d4385"}, + {file = "mypy-1.11.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a976775ab2256aadc6add633d44f100a2517d2388906ec4f13231fafbb0eccca"}, + {file = "mypy-1.11.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cd953f221ac1379050a8a646585a29574488974f79d8082cedef62744f0a0104"}, + {file = "mypy-1.11.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:57555a7715c0a34421013144a33d280e73c08df70f3a18a552938587ce9274f4"}, + {file = "mypy-1.11.2-cp311-cp311-win_amd64.whl", hash = "sha256:36383a4fcbad95f2657642a07ba22ff797de26277158f1cc7bd234821468b1b6"}, + {file = "mypy-1.11.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e8960dbbbf36906c5c0b7f4fbf2f0c7ffb20f4898e6a879fcf56a41a08b0d318"}, + {file = "mypy-1.11.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:06d26c277962f3fb50e13044674aa10553981ae514288cb7d0a738f495550b36"}, + {file = "mypy-1.11.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6e7184632d89d677973a14d00ae4d03214c8bc301ceefcdaf5c474866814c987"}, + {file = "mypy-1.11.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:3a66169b92452f72117e2da3a576087025449018afc2d8e9bfe5ffab865709ca"}, + {file = "mypy-1.11.2-cp312-cp312-win_amd64.whl", hash = "sha256:969ea3ef09617aff826885a22ece0ddef69d95852cdad2f60c8bb06bf1f71f70"}, + {file = "mypy-1.11.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:37c7fa6121c1cdfcaac97ce3d3b5588e847aa79b580c1e922bb5d5d2902df19b"}, + {file = "mypy-1.11.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4a8a53bc3ffbd161b5b2a4fff2f0f1e23a33b0168f1c0778ec70e1a3d66deb86"}, + {file = "mypy-1.11.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2ff93107f01968ed834f4256bc1fc4475e2fecf6c661260066a985b52741ddce"}, + {file = "mypy-1.11.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:edb91dded4df17eae4537668b23f0ff6baf3707683734b6a818d5b9d0c0c31a1"}, + {file = "mypy-1.11.2-cp38-cp38-win_amd64.whl", hash = "sha256:ee23de8530d99b6db0573c4ef4bd8f39a2a6f9b60655bf7a1357e585a3486f2b"}, + {file = "mypy-1.11.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:801ca29f43d5acce85f8e999b1e431fb479cb02d0e11deb7d2abb56bdaf24fd6"}, + {file = "mypy-1.11.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:af8d155170fcf87a2afb55b35dc1a0ac21df4431e7d96717621962e4b9192e70"}, + {file = "mypy-1.11.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f7821776e5c4286b6a13138cc935e2e9b6fde05e081bdebf5cdb2bb97c9df81d"}, + {file = "mypy-1.11.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:539c570477a96a4e6fb718b8d5c3e0c0eba1f485df13f86d2970c91f0673148d"}, + {file = "mypy-1.11.2-cp39-cp39-win_amd64.whl", hash = "sha256:3f14cd3d386ac4d05c5a39a51b84387403dadbd936e17cb35882134d4f8f0d24"}, + {file = "mypy-1.11.2-py3-none-any.whl", hash = "sha256:b499bc07dbdcd3de92b0a8b29fdf592c111276f6a12fe29c30f6c417dd546d12"}, + {file = "mypy-1.11.2.tar.gz", hash = "sha256:7f9993ad3e0ffdc95c2a14b66dee63729f021968bff8ad911867579c65d13a79"}, ] [package.dependencies] From 54f70ad751fe85b54e0f2ab0194c7c94bf402fc9 Mon Sep 17 00:00:00 2001 From: Steven Kalt Date: Sun, 25 Aug 2024 09:37:25 -0400 Subject: [PATCH 10/39] fix(wrappers/python): trivial type error --- wrappers/python/scripts/build/all.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/wrappers/python/scripts/build/all.py b/wrappers/python/scripts/build/all.py index 7c73d6cc..65c53317 100644 --- a/wrappers/python/scripts/build/all.py +++ b/wrappers/python/scripts/build/all.py @@ -65,8 +65,10 @@ def parse_args(): if bin_dir is None: certified, tag_name = download("latest", dry_run=False) else: - if (tag_name := os.environ.get("GIT_VERSION")) is None: + if os.environ.get("GIT_VERSION") is None: raise KeyError("Missing DIR argument and GIT_VERSION environment variable") + else: + tag_name = os.environ["GIT_VERSION"] certified = find_bins(bin_dir) check_platforms(certified) From 2a5512a3203be83441505ef269fbcf8dea322cf6 Mon Sep 17 00:00:00 2001 From: Steven Kalt Date: Sun, 25 Aug 2024 11:00:32 -0400 Subject: [PATCH 11/39] ci(wrappers/python): correct path to non-github-specific script --- .github/workflows/test.yml | 5 ++++- wrappers/python/scripts/ci/github/debug_python_paths.sh | 4 ++-- wrappers/python/scripts/ci/github/integration_tests.sh | 4 ++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 03aa488e..b7116138 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -143,7 +143,10 @@ jobs: - name: ensure cog up-to-date working-directory: ./wrappers/python - run: ./scripts/ci/github/cog/check.sh + run: | + export VIRTUAL_ENV="$PWD/.venv" + export PATH="$VIRTUAL_ENV/bin:$PATH" + ./scripts/ci/cog/check.sh - name: Test python API timeout-minutes: 1 diff --git a/wrappers/python/scripts/ci/github/debug_python_paths.sh b/wrappers/python/scripts/ci/github/debug_python_paths.sh index 5d0236f4..5e5e5ccc 100755 --- a/wrappers/python/scripts/ci/github/debug_python_paths.sh +++ b/wrappers/python/scripts/ci/github/debug_python_paths.sh @@ -8,5 +8,5 @@ echo "$PATH" | tr ':' '\n' command -v python command -v python3 command -v poetry || echo "missing poetry" -stat ./.venv/bin/python -./.venv/bin/python --version +stat ./.venv/bin/python || stat ./.venv/bin/python.exe || echo "missing .venv/bin/python{.exe}" +python --version diff --git a/wrappers/python/scripts/ci/github/integration_tests.sh b/wrappers/python/scripts/ci/github/integration_tests.sh index 83be21cc..5029d23d 100755 --- a/wrappers/python/scripts/ci/github/integration_tests.sh +++ b/wrappers/python/scripts/ci/github/integration_tests.sh @@ -1,11 +1,11 @@ #!/usr/bin/env bash set -eu export PATH="$PWD/target/release:$PATH" -export PYTHONPATH="$PWD/wrappers/python/src:$PYTHONPATH" +export PYTHONPATH="$PWD/wrappers/python/src:${PYTHONPATH:-}" export PAGEFIND_PYTHON_LOG_LEVEL=DEBUG cd wrappers/python -python3 -c 'import sys; print("pythonpath"\n" + "\n".join(sys.path))' +python3 -c 'import sys; print("pythonpath\n" + "\n".join(sys.path))' python3 -c ' import logging import os From ef41fa58f52993e62e7169714f2fc3a476fe6499 Mon Sep 17 00:00:00 2001 From: Steven Kalt Date: Tue, 24 Sep 2024 21:27:47 -0400 Subject: [PATCH 12/39] ci(wrappers/python): debug path to mypy on windows --- .../python/scripts/ci/github/debug_python_paths.sh | 13 +++++++++++-- wrappers/python/scripts/ci/python_lints.sh | 6 +++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/wrappers/python/scripts/ci/github/debug_python_paths.sh b/wrappers/python/scripts/ci/github/debug_python_paths.sh index 5e5e5ccc..580cb601 100755 --- a/wrappers/python/scripts/ci/github/debug_python_paths.sh +++ b/wrappers/python/scripts/ci/github/debug_python_paths.sh @@ -3,10 +3,19 @@ set -eu cd wrappers/python export VIRTUAL_ENV="$PWD/.venv" export PATH="$VIRTUAL_ENV/bin:$PATH" -set -x -echo "$PATH" | tr ':' '\n' +# shellcheck disable=SC2016 +echo '$PATH:' +echo "$PATH" | tr ':' '\n - ' + command -v python command -v python3 command -v poetry || echo "missing poetry" +if ! command -v mypy; then + if command -v mypy.exe; then + echo "missing mypy, but found mypy.exe" + else + echo "missing mypy{.exe}" + fi +fi stat ./.venv/bin/python || stat ./.venv/bin/python.exe || echo "missing .venv/bin/python{.exe}" python --version diff --git a/wrappers/python/scripts/ci/python_lints.sh b/wrappers/python/scripts/ci/python_lints.sh index eaad2b7e..5206b242 100755 --- a/wrappers/python/scripts/ci/python_lints.sh +++ b/wrappers/python/scripts/ci/python_lints.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash set -eu -mypy src scripts -ruff check -ruff format --check +python3 -m mypy src scripts +python3 -m ruff check +python3 -m ruff format --check From 112f4a8c05e4191ab25bdf346543133ac94e3bfd Mon Sep 17 00:00:00 2001 From: Steven Kalt Date: Tue, 24 Sep 2024 22:48:38 -0400 Subject: [PATCH 13/39] ci(wrappers/python): avoid invoking external python packages on windows --- .github/workflows/test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b7116138..4a8172a6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -135,6 +135,7 @@ jobs: run: ./wrappers/python/scripts/ci/github/debug_python_paths.sh - name: Lint python + if: runner.os == 'Linux' working-directory: ./wrappers/python run: | export VIRTUAL_ENV="$PWD/.venv" @@ -142,6 +143,7 @@ jobs: bash ./scripts/ci/python_lints.sh - name: ensure cog up-to-date + if: runner.os == 'Linux' working-directory: ./wrappers/python run: | export VIRTUAL_ENV="$PWD/.venv" From 8109fdd6c486457193d95d0af25a0b60290587a6 Mon Sep 17 00:00:00 2001 From: Steven Kalt Date: Tue, 24 Sep 2024 23:50:40 -0400 Subject: [PATCH 14/39] docs(wrappers/python): document python API The structure is more-or-less entirely plagiarized from node-api.md, but with the Python-specific details filled in. --- docs/content/docs/py-api.md | 292 ++++++++++++++++++++++++++++++++++++ 1 file changed, 292 insertions(+) create mode 100644 docs/content/docs/py-api.md diff --git a/docs/content/docs/py-api.md b/docs/content/docs/py-api.md new file mode 100644 index 00000000..08de3f14 --- /dev/null +++ b/docs/content/docs/py-api.md @@ -0,0 +1,292 @@ +--- +title: "Indexing content using the Python API" +nav_title: "Using the Python API" +nav_section: References +weight: 54 +--- + +Pagefind provides an interface to the indexing binary as a Python package you can install and import. + +There are situations where using this Python package is beneficial: +- Integrating Pagefind into an existing Python project, e.g. writing a plugin for a static site generator that can pass in-memory HTML files to Pagefind. + Pagefind can also return the search index in-memory, to be hosted via the dev mode alongside the files. +- Users looking to index their site and augment that index with extra non-HTML pages can run a standard Pagefind crawl with [`add_directory`](#indexadddirectory) and augment it with [`add_custom_record`](#indexaddcustomrecord). +- Users looking to use Pagefind's engine for searching miscellaneous content such as PDFs or subtitles, where [`add_custom_record`](#indexaddcustomrecord) can be used to build the entire index from scratch. + +## Example Usage + + + +```py +import asyncio +import json +import logging +import os +from pagefind.index import PagefindIndex, IndexConfig + +logging.basicConfig(level=os.environ.get("LOG_LEVEL", "INFO")) +log = logging.getLogger(__name__) +html_content = ( + "" + " " + "
" + "

Example HTML

" + "

This is an example HTML page.

" + "
" + " " + "" +) + + +def prefix(pre: str, s: str) -> str: + return pre + s.replace("\n", f"\n{pre}") + + +async def main(): + config = IndexConfig( + root_selector="main", logfile="index.log", output_path="./output", verbose=True + ) + async with PagefindIndex(config=config) as index: + log.debug("opened index") + new_file, new_record, new_dir = await asyncio.gather( + index.add_html_file( + content=html_content, + url="https://example.com", + source_path="other/example.html", + ), + index.add_custom_record( + url="/elephants/", + content="Some testing content regarding elephants", + language="en", + meta={"title": "Elephants"}, + ), + index.add_directory("./public"), + ) + print(prefix("new_file ", json.dumps(new_file, indent=2))) + print(prefix("new_record ", json.dumps(new_record, indent=2))) + print(prefix("new_dir ", json.dumps(new_dir, indent=2))) + + files = await index.get_files() + for file in files: + print(prefix("files", f"{len(file['content']):10}B {file['path']}")) + + +if __name__ == "__main__": + asyncio.run(main()) +``` + +All interactions with Pagefind are asynchronous, as they communicate with the native Pagefind binary in the background. + +## PagefindIndex + +`pagefind.index.PagefindIndex` manages a pagefind index. + +`PagefindIndex` operates as an async contextmanager. +Entering the context starts a backing Pagefind service and creates an in-memory index in the backing service. +Exiting the context writes the in-memory index to disk and then shuts down the backing Pagefind service. + +```py +from pagefind.index import PagefindIndex + +async def main(): + async with PagefindIndex() as index: # open the index + ... # write to the index + # the index is closed here and files are written to disk. +``` + +`PagefindIndex` optionally takes a configuration dictionary that can apply parts of the [Pagefind CLI config](/docs/config-options/). The options available at this level are: + +```py +from pagefind.index import PagefindIndex, IndexConfig +config = IndexConfig( + root_selector="main", + exclude_selectors="nav", + force_language="en", + verbose=True, + logfile="index.log", + keep_index_url=True, + output_path="./output", +) + +async def main(): + async with PagefindIndex(config=config) as index: + ... +``` + +See the relevant documentation for these configuration options in the [Configuring the Pagefind CLI](/docs/config-options/) documentation. + +## index.add_directory + +Indexes a directory from disk using the standard Pagefind indexing behaviour. +This is equivalent to running the Pagefind binary with `--site `. + +```py +# Index all the HTML files in the public directory +indexed_dir = await index.add_directory("./public") +page_count: int = new_dir["page_count"] +``` +If the `path` provided is relative, it will be relative to the current working directory of your Python process. + +```py +# Index files in a directory matching a given glob pattern. +indexed_dir = await index.add_directory("./public", glob="**.{html}") +``` + +Optionally, a custom `glob` can be supplied which controls which files Pagefind will consume within the directory. The default is shown, and the `glob` option can be omitted entirely. +See [Wax patterns documentation](https://github.com/olson-sean-k/wax#patterns) for more details. + + + +## index.add_html_file + +Adds a virtual HTML file to the Pagefind index. Useful for files that don't exist on disk, for example a static site generator that is serving files from memory. + +```py +html_content = ( + "" + "

A Full HTML Document

" + "

...

" + "" +) + +# Index a file as if Pagefind was indexing from disk +new_file = await index.add_html_file( + content=html_content, + source_path="other/example.html", +) + +# Index HTML content, giving it a specific URL +new_file = await index.add_html_file( + content=html_content, + url="https://example.com", +) +``` + +The `source_path` should represent the path of this HTML file if it were to exist on disk. Pagefind will use this path to generate the URL. It should be relative, or absolute to a path within the current working directory. + +Instead of `source_path`, a `url` may be supplied to explicitly set the URL of this search result. + +The `content` should be the full HTML source, including the outer ` ` tags. This will be run through Pagefind's standard HTML indexing process, and should contain any required Pagefind attributes to control behaviour. + + +If successful, the `file` object is returned containing metadata about the completed indexing. + +## index.add_custom_record +Adds a direct record to the Pagefind index. +Useful for adding non-HTML content to the search results. + +```py +custom_record = await index.add_custom_record( + url="/contact/", + content=( + "My raw content to be indexed for search. " + "Will be lightly processed by Pagefind." + ), + language="en", + meta={ + "title": "Contact", + "category": "Landing Page" + }, + filters={"tags": ["landing", "company"]}, + sort={"weight": "20"}, +) + +page_word_count: int = custom_record["page_word_count"] +page_url: str = custom_record["page_url"] +page_meta: dict[str, str] = custom_record["page_meta"] +``` + +The `url`, `content`, and `language` fields are all required. `language` should be an [ISO 639-1 code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes). + +`meta` is optional, and is strictly a flat object of keys to string values. +See the [Metadata documentation](https://pagefind.app/docs/metadata/) for semantics. + +`filters` is optional, and is strictly a flat object of keys to arrays of string values. +See the [Filters documentation](https://pagefind.app/docs/filtering/) for semantics. + +`sort` is optional, and is strictly a flat object of keys to string values. +See the [Sort documentation](https://pagefind.app/docs/sorts/) for semantics. +*When Pagefind is processing an index, number-like strings will be sorted numerically rather than alphabetically. As such, the value passed in should be `"20"` and not `20`* + + + +If successful, the `file` object is returned containing metadata about the completed indexing. + +## index.get_files + +Get raw data of all files in the Pagefind index. +Useful for integrating a Pagefind index into the development mode of a static site generator and hosting these files yourself. + +**WATCH OUT**: these files can be large enough to clog the pipe reading from the `pagefind` binary's subprocess, causing a deadlock. + +```py +for file in (await index.get_files()): + path: str = file["path"] + content: str = file["content"] + ... +``` + +## index.write_files + +Closing the `PagefindIndex`'s context automatically calls `index.write_files`. + +If you aren't using `PagefindIndex` as a context manager, calling `index.write_files()` writes the index files to disk, as they would be written when running the standard Pagefind binary directly. + +```py +await index.write_files("./public/pagefind") +``` + +The `output_path` option should contain the path to the desired Pagefind bundle directory. If relative, is relative to the current working directory of your Python process. + +## index.delete_index + +Deletes the data for the given index from its backing Pagefind service. +Doesn't affect any written files or data returned by `get_files()`. + +```python +await index.delete_index(); +``` + +Calling `index.get_files()` or `index.write_files()` doesn't consume the index, and further modifications can be made. In situations where many indexes are being created, the `delete_index` call helps clear out memory from a shared Pagefind binary service. + +Reusing an `PagefindIndex` object after calling `index.delete_index()` will cause errors to be returned. + +Not calling this method is fine — these indexes will be cleaned up when your `PagefindIndex`'s context closes, its backing Pagefind service closes, or your Python process exits. + +## PagefindService + +`PagefindService` manages a pagefind service running in a subprocess. + +`PagefindService` operates as an async context manager: when the context is entered, the backing service starts, and when the context exits, the backing service shuts down. + +```py +from pagefind.service import PagefindService + +async def main(): + # or you can write + service = await PagefindService().launch() + ... + await service.close() + + async with PagefindService() as service: # the service launches + ... + # the service closes +``` + +You should invoke `PagefindService` directly when you want to use the same backing service for many indexes: + +```py +async with PagefindService() as service: + default_index = await service.create_index() + other_index = await service.create_index( + config=IndexConfig(output_path="./search/nonstandard"), + ) + await asyncio.gather( + default_index.add_directory("./a"), + other_index.add_directory("./b"), + ) + await asyncio.gather( + default_index.write_files(), + other_index.write_files(), + ) +``` From 6cdf2d95a627dfdb30d84b21a35c12f2c62e5b5c Mon Sep 17 00:00:00 2001 From: Steven Kalt Date: Wed, 25 Sep 2024 00:07:26 -0400 Subject: [PATCH 15/39] ci(wrappers/python): explore powershell venv activation script --- .github/workflows/test.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4a8172a6..61baecdd 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -131,11 +131,17 @@ jobs: - name: Install dev dependencies run: ./wrappers/python/scripts/ci/github/install_dev_dependencies.sh + - name: activate venv on windows + if: runner.os == 'Windows' + shell: pwsh + working-directory: ./wrappers/python + run: .\.venv\Scripts\Activate.ps1 + - name: debug python paths run: ./wrappers/python/scripts/ci/github/debug_python_paths.sh - name: Lint python - if: runner.os == 'Linux' + # if: runner.os == 'Linux' working-directory: ./wrappers/python run: | export VIRTUAL_ENV="$PWD/.venv" From a6f5c578e3d0e72e8fbfbc244220c1be7d281a9e Mon Sep 17 00:00:00 2001 From: Steven Kalt Date: Sat, 28 Sep 2024 09:21:17 -0400 Subject: [PATCH 16/39] ci(wrappers/python): fix windows venv activation It turns out windows virtualenvs have a different directory structure than normal OSs: binaries are placed in `Scripts` instead of `bin`, and only `python` is available as an executable (no `python3`). --- .github/workflows/test.yml | 27 ++++++++---------- wrappers/python/scripts/build/all.py | 6 ++-- .../python/scripts/ci/github/activate_venv.sh | 25 +++++++++++++++++ .../scripts/ci/github/debug_python_paths.sh | 28 +++++++++++++++---- .../scripts/ci/github/integration_tests.sh | 9 +++--- wrappers/python/scripts/ci/python_lints.sh | 6 ++-- 6 files changed, 71 insertions(+), 30 deletions(-) create mode 100755 wrappers/python/scripts/ci/github/activate_venv.sh diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 61baecdd..4fbf377a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -131,32 +131,27 @@ jobs: - name: Install dev dependencies run: ./wrappers/python/scripts/ci/github/install_dev_dependencies.sh - - name: activate venv on windows - if: runner.os == 'Windows' - shell: pwsh - working-directory: ./wrappers/python - run: .\.venv\Scripts\Activate.ps1 - + - name: activate venv + run: ./wrappers/python/scripts/ci/github/activate_venv.sh + - name: debug python paths run: ./wrappers/python/scripts/ci/github/debug_python_paths.sh - name: Lint python - # if: runner.os == 'Linux' + # avoid duplicating linting work on different OSes + if: runner.os == 'Linux' working-directory: ./wrappers/python - run: | - export VIRTUAL_ENV="$PWD/.venv" - export PATH="$VIRTUAL_ENV/bin:$PATH" - bash ./scripts/ci/python_lints.sh - + run: ./scripts/ci/python_lints.sh + - name: ensure cog up-to-date + # avoid duplicating linting work on different OSes if: runner.os == 'Linux' working-directory: ./wrappers/python - run: | - export VIRTUAL_ENV="$PWD/.venv" - export PATH="$VIRTUAL_ENV/bin:$PATH" - ./scripts/ci/cog/check.sh + run: ./scripts/ci/cog/check.sh - name: Test python API timeout-minutes: 1 + # ^ guard against the tests getting deadlock if the subprocess pipe + # gets clogged run: ./wrappers/python/scripts/ci/github/integration_tests.sh diff --git a/wrappers/python/scripts/build/all.py b/wrappers/python/scripts/build/all.py index 65c53317..946e615d 100644 --- a/wrappers/python/scripts/build/all.py +++ b/wrappers/python/scripts/build/all.py @@ -1,8 +1,10 @@ +"""A script that builds all the pagefind binary-only wheels.""" + import os import tarfile import tempfile from pathlib import Path -from typing import List, Optional +from typing import List, Optional, Tuple from argparse import ArgumentParser from . import dist_dir, setup_logging @@ -49,7 +51,7 @@ def check_platforms(certified: List[Path]) -> None: raise ValueError(err_message) -def parse_args(): +def parse_args() -> Tuple[bool, Optional[Path]]: parser = ArgumentParser() parser.add_argument("--dry-run", action="store_true") parser.add_argument("DIR", type=Path, default=None, nargs="?") diff --git a/wrappers/python/scripts/ci/github/activate_venv.sh b/wrappers/python/scripts/ci/github/activate_venv.sh new file mode 100755 index 00000000..1b0c681b --- /dev/null +++ b/wrappers/python/scripts/ci/github/activate_venv.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +set -eu + +cd wrappers/python + +VIRTUAL_ENV="$PWD/.venv" +echo "VIRTUAL_ENV=$VIRTUAL_ENV" >> "$GITHUB_ENV" + +if ! [ -d "$VIRTUAL_ENV" ]; then + echo "No virtualenv found at $VIRTUAL_ENV" + exit 127 +fi + +# Ensure binaries from the virtualenv are available at the start of $PATH +# see https://docs.python.org/3/library/venv.html#creating-virtual-environments +if [ -d "$VIRTUAL_ENV/bin" ]; then + # on unix systems, virtualenv puts executables in .venv/bin + venv_bin_path="$VIRTUAL_ENV/bin" +elif [ -d "$VIRTUAL_ENV/Scripts" ]; then + # on windows, virtualenv places executables in .venv/Scripts + venv_bin_path="$VIRTUAL_ENV/Scripts" +fi + +echo "$venv_bin_path" >> "$GITHUB_PATH" +# see https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#adding-a-system-path diff --git a/wrappers/python/scripts/ci/github/debug_python_paths.sh b/wrappers/python/scripts/ci/github/debug_python_paths.sh index 580cb601..1cee4911 100755 --- a/wrappers/python/scripts/ci/github/debug_python_paths.sh +++ b/wrappers/python/scripts/ci/github/debug_python_paths.sh @@ -1,15 +1,34 @@ #!/usr/bin/env bash set -eu cd wrappers/python -export VIRTUAL_ENV="$PWD/.venv" -export PATH="$VIRTUAL_ENV/bin:$PATH" + +echo "VIRTUAL_ENV=$VIRTUAL_ENV" + # shellcheck disable=SC2016 echo '$PATH:' -echo "$PATH" | tr ':' '\n - ' +echo "$PATH" | tr ':' '\n' | sed 's/^/ - /g' + +echo +echo " python ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ " +echo +python --version command -v python command -v python3 +stat ./.venv/bin/python \ + || stat ./.venv/Scripts/python.exe \ + || echo "missing .venv/bin/python{.exe}" + +echo +echo " poetry ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ " +echo + command -v poetry || echo "missing poetry" + +echo +echo " mypy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ " +echo + if ! command -v mypy; then if command -v mypy.exe; then echo "missing mypy, but found mypy.exe" @@ -17,5 +36,4 @@ if ! command -v mypy; then echo "missing mypy{.exe}" fi fi -stat ./.venv/bin/python || stat ./.venv/bin/python.exe || echo "missing .venv/bin/python{.exe}" -python --version + diff --git a/wrappers/python/scripts/ci/github/integration_tests.sh b/wrappers/python/scripts/ci/github/integration_tests.sh index 5029d23d..c7887b23 100755 --- a/wrappers/python/scripts/ci/github/integration_tests.sh +++ b/wrappers/python/scripts/ci/github/integration_tests.sh @@ -2,11 +2,12 @@ set -eu export PATH="$PWD/target/release:$PATH" export PYTHONPATH="$PWD/wrappers/python/src:${PYTHONPATH:-}" +# ^ ensure `import pagefind` imports wrappers/python/src/pagefind/__init__.py export PAGEFIND_PYTHON_LOG_LEVEL=DEBUG cd wrappers/python -python3 -c 'import sys; print("pythonpath\n" + "\n".join(sys.path))' -python3 -c ' +python -c 'import sys; print("pythonpath\n - " + "\n - ".join(sys.path))' +python -c ' import logging import os from pagefind.service import get_executable @@ -15,5 +16,5 @@ logging.basicConfig(level=os.environ.get("PAGEFIND_PYTHON_LOG_LEVEL", "INFO")) print(get_executable()) ' -python3 -m pagefind --help -python3 src/tests/integration.py +python -m pagefind --help +python src/tests/integration.py diff --git a/wrappers/python/scripts/ci/python_lints.sh b/wrappers/python/scripts/ci/python_lints.sh index 5206b242..eaad2b7e 100755 --- a/wrappers/python/scripts/ci/python_lints.sh +++ b/wrappers/python/scripts/ci/python_lints.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash set -eu -python3 -m mypy src scripts -python3 -m ruff check -python3 -m ruff format --check +mypy src scripts +ruff check +ruff format --check From 8f58c1d1e97027c5d8dae317d39d96137f75d2f5 Mon Sep 17 00:00:00 2001 From: Steven Kalt Date: Sat, 28 Sep 2024 10:55:58 -0400 Subject: [PATCH 17/39] ci(wrappers/python): prepare to publish added comments, links to docs about publication to pypi, plus the venv activation script that we can now rely on. --- .github/workflows/release.yml | 47 +++++++++++++++++------------------ 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 01dbd2d2..33489496 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -87,13 +87,15 @@ jobs: env: CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} - publish-pypi-packages: - name: Publish PyPi packages + publish-python-packages: + name: Publish python packages runs-on: ubuntu-latest # ok since none of the scripts depend on version-specific features defaults: run: shell: bash needs: publish-github-release + permissions: # required for trusted publishing to pypi/test-pypi + id-token: write steps: - name: Clone uses: actions/checkout@v4 @@ -117,36 +119,33 @@ jobs: - name: Install dev dependencies run: bash ./wrappers/python/scripts/ci/github/install_dev_dependencies.sh working-directory: ./wrappers/python - - name: debug - run: | - set -x - echo "$PATH" | tr ':' '\n' - command -v python - command -v python3 - command -v poetry || echo "missing poetry" - stat .venv/bin/python - .venv/bin/python --version + - name: activate venv + run: ./wrappers/python/scripts/ci/github/activate_venv.sh + - name: debug python paths + run: ./wrappers/python/scripts/ci/github/debug_python_paths.sh - name: package binaries working-directory: ./wrappers/python - run: | - export PATH="$PWD/.venv/bin:$PATH" - python3 -m scripts.build.all ./vendor # should take ~30s + run: python -m scripts.build.all ./vendor # should take ~30s - name: package python api - run: | - export PATH="$PWD/.venv/bin:$PATH" - python3 -m scripts.build.api_package + working-directory: ./wrappers/python + run: python3 -m scripts.build.api_package - name: Archive dist uses: actions/upload-artifact@v4 with: - path: dist + path: wrappers/python/dist name: python-packages if-no-files-found: error - # TODO: once we have a TEST_PYPI_TOKEN, test publishing the packages. - # - name: Publish python packages - # working-directory: ./wrappers/python - # run: | - # export PATH="$PWD/.venv/bin:$PATH" - # ./scripts/publish_to_test_pypi.sh + + - name: Publish to pypi + uses: pypa/gh-action-pypi-publish@release/v1 + # Note: this action requires test-pypi / pypi trusted publishing to be + # configured in the target repository. For instructions, see + # - https://docs.github.com/en/actions/security-for-github-actions/security-hardening-your-deployments/configuring-openid-connect-in-pypi + # - https://docs.pypi.org/trusted-publishers/ + with: # see https://github.com/pypa/gh-action-pypi-publish/tree/release/v1/?tab=readme-ov-file#customizing-target-package-dists-directory + packages-dir: wrappers/python/dist + repository-url: https://test.pypi.org/ # FIXME: comment this out when ready to publish to pypi + verbose: true # can be commented out once the action is working as expected publish-npm-package: name: Publish NPM packages From 89c795c62649544598ea2743f04ea34ad1b4103a Mon Sep 17 00:00:00 2001 From: Steven Kalt Date: Sat, 28 Sep 2024 12:07:43 -0400 Subject: [PATCH 18/39] docs(wrappers/python): add docstrings, check links Note that the python docstrings are written using reStructuredText (see https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html#rst-primer, https://sphinx-rtd-tutorial.readthedocs.io/en/latest/docstrings.html). This has some notable differences from markdown: ```rst links: `link text ` inline code: ``code`` ``` As a drive-by fix, I made the `PagefindIndex.config -> _config` private instead of noting that it should be immutable -- I think this sends a clearer message. Finally, I checked that all the documentation site links were correct: ```sh cd docs npm i hugo # build the docs lychee --include-fragments public/ # check the links ``` This validated the link in ./docs/content/docs/py-api.md work, but it turned up another interesting finding: there's a broken link to https://github.com/CloudCannon/pagefind/blob/main/pagefind/features/compound_filtering.feature. --- docs/content/docs/py-api.md | 35 ++++-- .../python/src/pagefind/index/__init__.py | 110 ++++++++++++++---- 2 files changed, 110 insertions(+), 35 deletions(-) diff --git a/docs/content/docs/py-api.md b/docs/content/docs/py-api.md index 08de3f14..ef19afb0 100644 --- a/docs/content/docs/py-api.md +++ b/docs/content/docs/py-api.md @@ -2,7 +2,7 @@ title: "Indexing content using the Python API" nav_title: "Using the Python API" nav_section: References -weight: 54 +weight: 54 # slightly less weight than the node API --- Pagefind provides an interface to the indexing binary as a Python package you can install and import. @@ -10,12 +10,12 @@ Pagefind provides an interface to the indexing binary as a Python package you ca There are situations where using this Python package is beneficial: - Integrating Pagefind into an existing Python project, e.g. writing a plugin for a static site generator that can pass in-memory HTML files to Pagefind. Pagefind can also return the search index in-memory, to be hosted via the dev mode alongside the files. -- Users looking to index their site and augment that index with extra non-HTML pages can run a standard Pagefind crawl with [`add_directory`](#indexadddirectory) and augment it with [`add_custom_record`](#indexaddcustomrecord). -- Users looking to use Pagefind's engine for searching miscellaneous content such as PDFs or subtitles, where [`add_custom_record`](#indexaddcustomrecord) can be used to build the entire index from scratch. +- Users looking to index their site and augment that index with extra non-HTML pages can run a standard Pagefind crawl with [`add_directory`](#indexadd_directory) and augment it with [`add_custom_record`](#indexadd_custom_record). +- Users looking to use Pagefind's engine for searching miscellaneous content such as PDFs or subtitles, where [`add_custom_record`](#indexadd_custom_record) can be used to build the entire index from scratch. ## Example Usage - + ```py import asyncio @@ -90,10 +90,21 @@ from pagefind.index import PagefindIndex async def main(): async with PagefindIndex() as index: # open the index - ... # write to the index + ... # update the index # the index is closed here and files are written to disk. ``` +Each method of `PagefindIndex` that talks to the backing Pagefind service can raise errors. +If an error is is thrown inside `PagefindIndex`'s context, the context closes without writing the index files to disk. + +```py +async def main(): + async with PagefindIndex() as index: # open the index + await index.add_directory("./public") + raise Exception("not today") + # the index closes without writing anything to disk +``` + `PagefindIndex` optionally takes a configuration dictionary that can apply parts of the [Pagefind CLI config](/docs/config-options/). The options available at this level are: ```py @@ -135,8 +146,6 @@ indexed_dir = await index.add_directory("./public", glob="**.{html}") Optionally, a custom `glob` can be supplied which controls which files Pagefind will consume within the directory. The default is shown, and the `glob` option can be omitted entirely. See [Wax patterns documentation](https://github.com/olson-sean-k/wax#patterns) for more details. - - ## index.add_html_file Adds a virtual HTML file to the Pagefind index. Useful for files that don't exist on disk, for example a static site generator that is serving files from memory. @@ -168,7 +177,6 @@ Instead of `source_path`, a `url` may be supplied to explicitly set the URL of t The `content` should be the full HTML source, including the outer ` ` tags. This will be run through Pagefind's standard HTML indexing process, and should contain any required Pagefind attributes to control behaviour. - If successful, the `file` object is returned containing metadata about the completed indexing. ## index.add_custom_record @@ -208,8 +216,6 @@ See the [Filters documentation](https://pagefind.app/docs/filtering/) for semant See the [Sort documentation](https://pagefind.app/docs/sorts/) for semantics. *When Pagefind is processing an index, number-like strings will be sorted numerically rather than alphabetically. As such, the value passed in should be `"20"` and not `20`* - - If successful, the `file` object is returned containing metadata about the completed indexing. ## index.get_files @@ -233,7 +239,12 @@ Closing the `PagefindIndex`'s context automatically calls `index.write_files`. If you aren't using `PagefindIndex` as a context manager, calling `index.write_files()` writes the index files to disk, as they would be written when running the standard Pagefind binary directly. ```py -await index.write_files("./public/pagefind") +await index = PagefindIndex( + IndexConfig( + output_path="./public/pagefind", + ), +) +await index.write_files() ``` The `output_path` option should contain the path to the desired Pagefind bundle directory. If relative, is relative to the current working directory of your Python process. @@ -244,7 +255,7 @@ Deletes the data for the given index from its backing Pagefind service. Doesn't affect any written files or data returned by `get_files()`. ```python -await index.delete_index(); +await index.delete_index() ``` Calling `index.get_files()` or `index.write_files()` doesn't consume the index, and further modifications can be made. In situations where many indexes are being created, the `delete_index` call helps clear out memory from a shared Pagefind binary service. diff --git a/wrappers/python/src/pagefind/index/__init__.py b/wrappers/python/src/pagefind/index/__init__.py index 8bcc3cc8..19c72960 100644 --- a/wrappers/python/src/pagefind/index/__init__.py +++ b/wrappers/python/src/pagefind/index/__init__.py @@ -20,19 +20,61 @@ class IndexConfig(TypedDict, total=False): root_selector: Optional[str] + """ + The root selector to use for the index. + If not supplied, Pagefind will use the ```` tag. + """ exclude_selectors: Optional[Sequence[str]] + """Extra element selectors that Pagefind should ignore when indexing.""" force_language: Optional[str] + """ + Ignores any detected languages and creates a single index for the entire site as the + provided language. Expects an ISO 639-1 code, such as ``en`` or ``pt``. + """ verbose: Optional[bool] + """ + Prints extra logging while indexing the site. Only affects the CLI, does not impact + web-facing search. + """ logfile: Optional[str] + """ + A path to a file to log indexing output to in addition to stdout. + The file will be created if it doesn't exist and overwritten on each run. + """ keep_index_url: Optional[bool] + """Whether to keep ``index.html`` at the end of search result paths. + + By default, a file at ``animals/cat/index.html`` will be given the URL + ``/animals/cat/``. Setting this option to ``true`` will result in the URL + ``/animals/cat/index.html``. + """ output_path: Optional[str] + """ + The folder to output the search bundle into, relative to the processed site. + Defaults to ``pagefind``. + """ class PagefindIndex: + """Manages a Pagefind index. + + ``PagefindIndex`` operates as an async contextmanager. + Entering the context starts a backing Pagefind service and creates an in-memory index in the backing service. + Exiting the context writes the in-memory index to disk and then shuts down the backing Pagefind service. + + Each method of ``PagefindIndex`` that talks to the backing Pagefind service can raise errors. + If an exception is is rased inside ``PagefindIndex``'s context, the context closes without writing the index files to disk. + + ``PagefindIndex`` optionally takes a configuration dictionary that can apply parts of the [Pagefind CLI config](/docs/config-options/). The options available at this level are: + + See the relevant documentation for these configuration options in the + `Configuring the Pagefind CLI ` documentation. + """ + _service: Optional["PagefindService"] = None _index_id: Optional[int] = None - config: Optional[IndexConfig] = None - """Note that config is immutable after initialization.""" + _config: Optional[IndexConfig] = None + """Note that config should be immutable.""" def __init__( self, @@ -40,17 +82,17 @@ def __init__( *, _service: Optional["PagefindService"] = None, _index_id: Optional[int] = None, - # TODO: cache config ): self._service = _service self._index_id = _index_id - self.config = config + self._config = config async def _start(self) -> "PagefindIndex": + """Start the backing Pagefind service and create an in-memory index.""" assert self._index_id is None assert self._service is None self._service = await PagefindService().launch() - _index = await self._service.create_index(self.config) + _index = await self._service.create_index(self._config) self._index_id = _index._index_id return self @@ -61,14 +103,14 @@ async def add_html_file( source_path: Optional[str] = None, url: Optional[str] = None, ) -> InternalIndexedFileResponse: - """ - ARGS: - content: The source HTML content of the file to be parsed. - source_path: The source path of the HTML file if it were to exist on disk. \ + """Add an HTML file to the index. + + :param content: The source HTML content of the file to be parsed. + :param source_path: The source path of the HTML file would have on disk. \ Must be a relative path, or an absolute path within the current working directory. \ Pagefind will compute the result URL from this path. - url: an explicit URL to use, instead of having Pagefind compute the URL \ - based on the source_path. If not supplied, source_path must be supplied. + :param url: an explicit URL to use, instead of having Pagefind compute the \ + URL based on the source_path. If not supplied, source_path must be supplied. """ assert self._service is not None assert self._index_id is not None @@ -87,6 +129,16 @@ async def add_html_file( async def add_directory( self, path: str, *, glob: Optional[str] = None ) -> InternalIndexedDirResponse: + """Indexes a directory from disk using the standard Pagefind indexing behaviour. + + This is equivalent to running the Pagefind binary with ``--site ``. + + :param path: the path to the directory to index. If the `path` provided is relative, \ + it will be relative to the current working directory of your Python process. + :param glob: a glob pattern to filter files in the directory. If not provided, all \ + files matching ``**.{html}`` are indexed. For more information on glob patterns, \ + see the `Wax patterns documentation `. + """ assert self._service is not None assert self._index_id is not None result = await self._service.send( @@ -101,11 +153,12 @@ async def add_directory( return cast(InternalIndexedDirResponse, result) async def get_files(self) -> List[InternalSyntheticFile]: - """ + """Get raw data of all files in the Pagefind index. + WATCH OUT: this method emits all files. This can be a lot of data, and this amount of data can cause reading from the subprocess pipes to deadlock. - STRICTLY PREFER calling `self.write_files()`. + STRICTLY PREFER calling ``self.write_files()``. """ assert self._service is not None assert self._index_id is not None @@ -118,6 +171,10 @@ async def get_files(self) -> List[InternalSyntheticFile]: return result async def delete_index(self) -> None: + """ + Deletes the data for the given index from its backing Pagefind service. + Doesn't affect any written files or data returned by ``get_files()``. + """ assert self._service is not None assert self._index_id is not None result = await self._service.send( @@ -137,14 +194,16 @@ async def add_custom_record( filters: Optional[Dict[str, List[str]]] = None, sort: Optional[Dict[str, str]] = None, ) -> InternalIndexedFileResponse: - """ - ARGS: - content: the raw content of this record. - url: the output URL of this record. Pagefind will not alter this. - language: ISO 639-1 code of the language this record is written in. - meta: the metadata to attach to this record. Supplying a `title` is highly recommended. - filters: the filters to attach to this record. Filters are used to group records together. - sort: the sort keys to attach to this record. + """Add a direct record to the Pagefind index. + + This method is useful for adding non-HTML content to the search results. + + :param content: the raw content of this record. + :param url: the output URL of this record. Pagefind will not alter this. + :param language: ISO 639-1 code of the language this record is written in. + :param meta: the metadata to attach to this record. Supplying a ``title`` is highly recommended. + :param filters: the filters to attach to this record. Filters are used to group records together. + :param sort: the sort keys to attach to this record. """ assert self._service is not None assert self._index_id is not None @@ -164,12 +223,17 @@ async def add_custom_record( return cast(InternalIndexedFileResponse, result) async def write_files(self) -> None: + """Write the index files to disk. + + If you're using PagefindIndex as a context manager, there's no need to call this method: + if no error occurred, closing the context automatically writes the index files to disk. + """ assert self._service is not None assert self._index_id is not None - if not self.config: + if not self._config: output_path = None else: - output_path = self.config.get("output_path") + output_path = self._config.get("output_path") result = await self._service.send( InternalWriteFilesRequest( From 7f670f062d3dd1bb240acb420bf11e2a9accf306 Mon Sep 17 00:00:00 2001 From: Steven Kalt Date: Sat, 28 Sep 2024 13:04:19 -0400 Subject: [PATCH 19/39] ci(wrappers/python): fix PYTHONPATH before windows integration tests --- .github/workflows/test.yml | 7 ++++ .../ci/github/add_src_to_pythonpath.py | 19 ++++++++++ .../scripts/ci/github/integration_tests.sh | 38 ++++++++++++++++--- 3 files changed, 58 insertions(+), 6 deletions(-) create mode 100644 wrappers/python/scripts/ci/github/add_src_to_pythonpath.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4fbf377a..9603dd43 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -149,6 +149,13 @@ jobs: working-directory: ./wrappers/python run: ./scripts/ci/cog/check.sh + - name: set PYTHONPATH + shell: bash + working-directory: ./wrappers/python + run: python ./scripts/ci/github/add_src_to_pythonpath.py + - name: add target/release to windows PATH + shell: bash + run: echo $PWD/target/release >> "$GITHUB_PATH" - name: Test python API timeout-minutes: 1 # ^ guard against the tests getting deadlock if the subprocess pipe diff --git a/wrappers/python/scripts/ci/github/add_src_to_pythonpath.py b/wrappers/python/scripts/ci/github/add_src_to_pythonpath.py new file mode 100644 index 00000000..1df6c370 --- /dev/null +++ b/wrappers/python/scripts/ci/github/add_src_to_pythonpath.py @@ -0,0 +1,19 @@ +""" +Prepend wrappers/python/src to PYTHONPATH. +""" + +import os +from pathlib import Path + + +new_pythonpath = str(Path("src").absolute()) +if old_pythonpath := os.environ.get("PYTHONPATH"): + new_pythonpath = os.pathsep.join( + [ # os.pathsep is ":" for unix, ";" for windows + new_pythonpath, + old_pythonpath, + ] + ) + +with open(os.environ["GITHUB_ENV"], "a") as f: + f.write(f"PYTHONPATH={new_pythonpath}\n") diff --git a/wrappers/python/scripts/ci/github/integration_tests.sh b/wrappers/python/scripts/ci/github/integration_tests.sh index c7887b23..884d6bc6 100755 --- a/wrappers/python/scripts/ci/github/integration_tests.sh +++ b/wrappers/python/scripts/ci/github/integration_tests.sh @@ -1,20 +1,46 @@ #!/usr/bin/env bash set -eu -export PATH="$PWD/target/release:$PATH" -export PYTHONPATH="$PWD/wrappers/python/src:${PYTHONPATH:-}" -# ^ ensure `import pagefind` imports wrappers/python/src/pagefind/__init__.py -export PAGEFIND_PYTHON_LOG_LEVEL=DEBUG +# starting in repo root cd wrappers/python -python -c 'import sys; print("pythonpath\n - " + "\n - ".join(sys.path))' + +echo "PATH: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +echo "$PATH" | tr ':' '\n' | sed 's/^/ - /g' + +if ! command -v pagefind; then + echo "pagefind not found in PATH" + exit 1 +fi + +# check that PYTHONPATH is set correctly +echo +echo "PYTHONPATH: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +echo + +python -c 'import sys;print(" - " + "\n - ".join(sys.path))' +# ^ wrappers/python/src should be at the front of the path + +echo +echo "testing import ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +echo + +export PAGEFIND_PYTHON_LOG_LEVEL=DEBUG python -c ' import logging import os from pagefind.service import get_executable logging.basicConfig(level=os.environ.get("PAGEFIND_PYTHON_LOG_LEVEL", "INFO")) -print(get_executable()) +print(f"exe={get_executable()}") ' +echo +echo "python -m pagefind --help ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +echo python -m pagefind --help + +echo +echo "running integration tests ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +echo + python src/tests/integration.py From 27032b25c52c59cb07c5a4383104809a621c9f95 Mon Sep 17 00:00:00 2001 From: Steven Kalt Date: Sat, 28 Sep 2024 14:57:06 -0400 Subject: [PATCH 20/39] ci(wrappers/python): more logging for python build scripts --- .github/workflows/release.yml | 10 ++- wrappers/python/pyproject.toml | 4 +- .../{all.py => all_binary_only_wheels.py} | 42 +++++++--- wrappers/python/scripts/build/api_package.py | 78 +++++++++++++++++-- 4 files changed, 111 insertions(+), 23 deletions(-) rename wrappers/python/scripts/build/{all.py => all_binary_only_wheels.py} (69%) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 33489496..165b62ed 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -125,10 +125,16 @@ jobs: run: ./wrappers/python/scripts/ci/github/debug_python_paths.sh - name: package binaries working-directory: ./wrappers/python - run: python -m scripts.build.all ./vendor # should take ~30s + run: | # should take ~30s; writes wheels to wrappers/python/dist + export PAGEFIND_PYTHON_LOG_LEVEL=DEBUG + python -m scripts.build.all_binary_only_wheels \ + --git-tag "${{ github.ref_name }}" \ + --bin-dir ./vendor - name: package python api working-directory: ./wrappers/python - run: python3 -m scripts.build.api_package + run: | # writes stdist + wheel to wrappers/python/dist + export PAGEFIND_PYTHON_LOG_LEVEL=DEBUG + python -m scripts.build.api_package --tag "${{ github.ref_name }}" - name: Archive dist uses: actions/upload-artifact@v4 with: diff --git a/wrappers/python/pyproject.toml b/wrappers/python/pyproject.toml index 68742ea7..67f39657 100644 --- a/wrappers/python/pyproject.toml +++ b/wrappers/python/pyproject.toml @@ -1,8 +1,8 @@ + [tool.poetry] name = "pagefind" version = "0.0.0a0" -# note that ^this^ is the version number of the python API, not the version of -# the pagefind executable. +# note this^^^^^^^ version will be replaced by scripts/build/api_package.py description = "Python API for Pagefind" authors = ["CloudCannon"] license = "MIT" diff --git a/wrappers/python/scripts/build/all.py b/wrappers/python/scripts/build/all_binary_only_wheels.py similarity index 69% rename from wrappers/python/scripts/build/all.py rename to wrappers/python/scripts/build/all_binary_only_wheels.py index 946e615d..e0d6a179 100644 --- a/wrappers/python/scripts/build/all.py +++ b/wrappers/python/scripts/build/all_binary_only_wheels.py @@ -1,10 +1,11 @@ """A script that builds all the pagefind binary-only wheels.""" -import os +import logging +import re import tarfile import tempfile from pathlib import Path -from typing import List, Optional, Tuple +from typing import List, NamedTuple, Optional from argparse import ArgumentParser from . import dist_dir, setup_logging @@ -21,9 +22,12 @@ "pagefind_extended.exe", ) +log = logging.getLogger(__name__) + def find_bin(dir: Path) -> Path: for file in dir.iterdir(): + log.debug("Checking for executable @ %s", (dir / file).absolute()) if file.is_file() and file.name in __candidates: return file raise FileNotFoundError(f"Could not find any of {__candidates} in {dir}") @@ -36,6 +40,7 @@ def get_llvm_triple(tar_gz: Path) -> str: llvm_triple = llvm_triple.removesuffix(".tar.gz") llvm_triple = llvm_triple.removeprefix(f"pagefind-{tag_name}-") llvm_triple = llvm_triple.removeprefix(f"pagefind_extended-{tag_name}-") + log.debug(f"derived llvm_triple {llvm_triple} from {tar_gz.name}") return llvm_triple @@ -51,27 +56,39 @@ def check_platforms(certified: List[Path]) -> None: raise ValueError(err_message) -def parse_args() -> Tuple[bool, Optional[Path]]: +class Args(NamedTuple): + dry_run: bool + bin_dir: Optional[Path] + tag: Optional[str] + + +def parse_args() -> Args: parser = ArgumentParser() + parser.add_argument("--tag", type=str, default=None) parser.add_argument("--dry-run", action="store_true") - parser.add_argument("DIR", type=Path, default=None, nargs="?") + parser.add_argument("--bin-dir", type=Path, default=None) args = parser.parse_args() dry_run: bool = args.dry_run - bin_dir: Optional[Path] = args.DIR - return dry_run, bin_dir + bin_dir: Optional[Path] = args.bin_dir + tag: Optional[str] = args.tag + return Args(dry_run=dry_run, bin_dir=bin_dir, tag=tag) if __name__ == "__main__": - dry_run, bin_dir = parse_args() + dry_run, bin_dir, tag_name = parse_args() + log.debug("args: dry_run=%s; bin_dir=%s; tag_name=%s", dry_run, bin_dir, tag_name) setup_logging() if bin_dir is None: + log.debug("no bin_dir specified, downloading latest release") + assert tag_name is None, f"--tag={tag_name} conflicts with downloading" certified, tag_name = download("latest", dry_run=False) else: - if os.environ.get("GIT_VERSION") is None: - raise KeyError("Missing DIR argument and GIT_VERSION environment variable") - else: - tag_name = os.environ["GIT_VERSION"] certified = find_bins(bin_dir) + if tag_name is None: + raise ValueError("tag_name is None") + assert re.match( + r"^v\d+\.\d+\.\d+(-\w+)?", tag_name + ), f"Invalid tag_name: {tag_name}" check_platforms(certified) if not dry_run: @@ -79,8 +96,11 @@ def parse_args() -> Tuple[bool, Optional[Path]]: dist_dir.mkdir(exist_ok=True) for tar_gz in certified: + log.info("Processing %s", tar_gz) llvm_triple = get_llvm_triple(tar_gz) + log.debug("llvm_triple=%s", llvm_triple) platform = LLVM_TRIPLES_TO_PYTHON_WHEEL_PLATFORMS[llvm_triple] + log.debug("platform=%s", platform) if platform is None: raise ValueError(f"Unsupported platform: {llvm_triple}") # TODO: avoid writing the extracted bin to disk diff --git a/wrappers/python/scripts/build/api_package.py b/wrappers/python/scripts/build/api_package.py index 9a26e994..99de3364 100644 --- a/wrappers/python/scripts/build/api_package.py +++ b/wrappers/python/scripts/build/api_package.py @@ -2,30 +2,92 @@ # optional dependencies. It might be preferable to use setuptools directly rather than # work around poetry. -from . import python_root, setup_logging +import logging import subprocess import re -import os +from argparse import ArgumentParser + +from . import python_root, setup_logging pyproject_toml = python_root / "pyproject.toml" +cli = ArgumentParser() +cli.add_argument("--dry-run", action="store_true") +cli.add_argument("--tag", required=True, help="The version to build.") +log = logging.getLogger(__name__) + + +def process_tag(tag: str) -> str: + """Convert a git tag to a version string compliant with PEP 440. + See https://peps.python.org/pep-0440/#public-version-identifiers + """ + pattern = ( + # note that this pattern accepts a superset of the tagging pattern used + # in this repository. + r"^v(?P\d+)" + r"\.(?P\d+)" + r"\.(?P\d+)" + r"(-" + r"(?Palpha|beta|rc)" + r"\.?(?P\d+)" + ")?" + ) + parts = re.match(pattern, tag) + if parts is None: + raise ValueError(f"Invalid tag: `{tag}` does not match pattern `{pattern}`") + major = int(parts["major"]) + minor = int(parts["minor"]) + patch = int(parts["patch"]) + suffix = "" + + if (prerelease_kind := parts["prerelease_kind"]) is not None: + if prerelease_kind == "rc": + suffix = "rc" + elif prerelease_kind.startswith("alpha"): + suffix = "a" + elif prerelease_kind.startswith("beta"): + suffix = "b" + if (prerelease_number := parts["prerelease_number"]) is not None: + suffix += str(int(prerelease_number)) + + return f"{major}.{minor}.{patch}{suffix}" + def main() -> None: - version = os.environ.get("PAGEFIND_VERSION") - if version is None: - version = "1" + setup_logging() + args = cli.parse_args() + tag: str = args.tag + dry_run: bool = args.dry_run + log.debug("args: dry_run=%s; tag=%s", dry_run, tag) + version = process_tag(tag) + + log.info("Building version %s", version) + # create a pyproject.toml with updated versions original = pyproject_toml.read_text() temp = "" for line in original.splitlines(): - if line.endswith("#!!opt"): - line = line.removeprefix("# ") + "\n" + if "0.0.0a0" in line: + line = line.replace("0.0.0a0", version) + log.debug("patching: %s", line) + elif line.endswith("#!!opt"): + line = line.removeprefix("# ").removesuffix("#!!opt") line = re.sub(r'version = "[^"]+"', f'version = "~={version}"', line) + log.debug("patching: %s", line) temp += line + "\n" + log.debug("patched pyproject.toml", extra={"updated": temp}) + + if dry_run: + return + with pyproject_toml.open("w") as f: f.write(temp) + log.debug("wrote patched pyproject.toml") + + log.info("Building API package") subprocess.run(["poetry", "build"], check=True) - with pyproject_toml.open("w") as f: + with pyproject_toml.open("w") as f: # restore the original f.write(original) + log.debug("restored original pyproject.toml") if __name__ == "__main__": From c929fce215547288d1b64c1f8acdb091ee97a2f6 Mon Sep 17 00:00:00 2001 From: Liam Bigelow <40188355+bglw@users.noreply.github.com> Date: Mon, 30 Sep 2024 09:27:53 +1300 Subject: [PATCH 21/39] Decode the files returned from `get_files` in the Python API --- wrappers/python/src/pagefind/index/__init__.py | 14 +++++++++++--- wrappers/python/src/pagefind/service/types.py | 3 +++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/wrappers/python/src/pagefind/index/__init__.py b/wrappers/python/src/pagefind/index/__init__.py index 19c72960..fcfb4023 100644 --- a/wrappers/python/src/pagefind/index/__init__.py +++ b/wrappers/python/src/pagefind/index/__init__.py @@ -1,4 +1,5 @@ import logging +import base64 from typing import Any, Dict, List, Optional, Sequence, TypedDict, cast from ..service import PagefindService @@ -12,6 +13,7 @@ InternalIndexedDirResponse, InternalIndexedFileResponse, InternalSyntheticFile, + InternalDecodedFile, InternalWriteFilesRequest, ) @@ -152,7 +154,7 @@ async def add_directory( assert result["type"] == "IndexedDir" return cast(InternalIndexedDirResponse, result) - async def get_files(self) -> List[InternalSyntheticFile]: + async def get_files(self) -> List[InternalDecodedFile]: """Get raw data of all files in the Pagefind index. WATCH OUT: this method emits all files. This can be a lot of data, and @@ -167,8 +169,14 @@ async def get_files(self) -> List[InternalSyntheticFile]: InternalGetFilesRequest(type="GetFiles", index_id=self._index_id) ) assert response["type"] == "GetFiles" - result = cast(InternalGetFilesResponse, response)["files"] - return result + files = cast(InternalGetFilesResponse, response)["files"] + + decoded_files = [ + {'path': file['path'], 'content': base64.b64decode(file['content'])} + for file in files + ] + + return cast(List[InternalDecodedFile], decoded_files) async def delete_index(self) -> None: """ diff --git a/wrappers/python/src/pagefind/service/types.py b/wrappers/python/src/pagefind/service/types.py index fa76aec7..1930a8d9 100644 --- a/wrappers/python/src/pagefind/service/types.py +++ b/wrappers/python/src/pagefind/service/types.py @@ -132,6 +132,9 @@ class InternalSyntheticFile(TypedDict): path: str content: str +class InternalDecodedFile(TypedDict): + path: str + content: bytes class InternalGetFilesResponse(TypedDict): type: Literal["GetFiles"] From afd90744de72444447ec73370821937c8240b5ba Mon Sep 17 00:00:00 2001 From: Liam Bigelow <40188355+bglw@users.noreply.github.com> Date: Mon, 30 Sep 2024 09:28:45 +1300 Subject: [PATCH 22/39] Check `PAGEFIND_BINARY_PATH` in the Python API --- wrappers/python/src/pagefind/service/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/wrappers/python/src/pagefind/service/__init__.py b/wrappers/python/src/pagefind/service/__init__.py index 75c2c788..5a0239b5 100644 --- a/wrappers/python/src/pagefind/service/__init__.py +++ b/wrappers/python/src/pagefind/service/__init__.py @@ -30,6 +30,11 @@ def get_executable() -> Optional[Path]: + env_bin_path = os.getenv("PAGEFIND_BINARY_PATH") + if env_bin_path is not None: + log.debug(f"using {env_bin_path}") + return Path(env_bin_path) + try: from pagefind_bin_extended import get_executable # type: ignore From d66e88731e625cca0cac661a1b63d45f75afbbe2 Mon Sep 17 00:00:00 2001 From: Liam Bigelow <40188355+bglw@users.noreply.github.com> Date: Mon, 30 Sep 2024 09:29:13 +1300 Subject: [PATCH 23/39] Update toolproof --- test_ci.sh | 2 +- test_interactive.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test_ci.sh b/test_ci.sh index 543e4277..b6591c1a 100755 --- a/test_ci.sh +++ b/test_ci.sh @@ -6,4 +6,4 @@ cd "$SCRIPT_DIR" PAGEFIND=$(realpath "$SCRIPT_DIR/target/$1/pagefind") REPO_WD=$(realpath "$SCRIPT_DIR") -npx -y toolproof@0.3.0 --placeholders pagefind_exec_path="$PAGEFIND" repo_wd="$REPO_WD" -c 1 +npx -y toolproof@0.4.0 --placeholders pagefind_exec_path="$PAGEFIND" repo_wd="$REPO_WD" -c 1 diff --git a/test_interactive.sh b/test_interactive.sh index d4b42cfb..2e6a4bac 100755 --- a/test_interactive.sh +++ b/test_interactive.sh @@ -10,4 +10,4 @@ cd $SCRIPT_DIR PAGEFIND=$(realpath "$SCRIPT_DIR/target/release/pagefind") REPO_WD=$(realpath "$SCRIPT_DIR") -npx -y toolproof@0.3.0 --placeholders pagefind_exec_path="$PAGEFIND" repo_wd="$REPO_WD" -c 4 -i +npx -y toolproof@0.4.0 --placeholders pagefind_exec_path="$PAGEFIND" repo_wd="$REPO_WD" -c 4 -i From 0d495b318588d86b1f235803435e6214cd4204df Mon Sep 17 00:00:00 2001 From: Liam Bigelow <40188355+bglw@users.noreply.github.com> Date: Mon, 30 Sep 2024 09:31:53 +1300 Subject: [PATCH 24/39] Add first Python API Toolproof test --- .../python_api/background.toolproof.yml | 13 ++++++ ...-index-to-memory-via-the-api.toolproof.yml | 43 +++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 pagefind/integration_tests/python_api/background.toolproof.yml create mode 100644 pagefind/integration_tests/python_api/py-build-a-synthetic-index-to-memory-via-the-api.toolproof.yml diff --git a/pagefind/integration_tests/python_api/background.toolproof.yml b/pagefind/integration_tests/python_api/background.toolproof.yml new file mode 100644 index 00000000..2331011b --- /dev/null +++ b/pagefind/integration_tests/python_api/background.toolproof.yml @@ -0,0 +1,13 @@ +# Requirements: +# venv +# python>=3.11 + +name: Python API Setup +type: reference +steps: + - step: I have a "public/index.html" file with the content {html} + html: >- +

Nothing

+ - step: I run 'python3 -m venv "$PWD/.venv"' + - step: I run 'source "$PWD/.venv/bin/activate"' diff --git a/pagefind/integration_tests/python_api/py-build-a-synthetic-index-to-memory-via-the-api.toolproof.yml b/pagefind/integration_tests/python_api/py-build-a-synthetic-index-to-memory-via-the-api.toolproof.yml new file mode 100644 index 00000000..45bc9441 --- /dev/null +++ b/pagefind/integration_tests/python_api/py-build-a-synthetic-index-to-memory-via-the-api.toolproof.yml @@ -0,0 +1,43 @@ +name: Python API > Build a synthetic index to memory via the api +platforms: + - linux + - mac + +steps: + - ref: ./background.toolproof.yml + - step: I have a "run.py" file with the content {python} + python: |2- + import sys + sys.path.append('%repo_wd%/wrappers/python/src') + + import asyncio + import json + import logging + import os + from pagefind.index import PagefindIndex, IndexConfig + + async def main(): + async with PagefindIndex() as index: + await index.add_html_file( + content="

Testing, testing

", + source_path="dogs/index.html", + ) + + files = await index.get_files() + + for file in files: + if "pagefind.js" in file["path"]: + print(file["content"]) + print(f"JS is at {file["path"]}") + + fragments = [file for file in files if "fragment" in file["path"]] + print(f"{len(fragments)} fragment(s)") + + if __name__ == "__main__": + asyncio.run(main()) + - step: I run "PAGEFIND_BINARY_PATH=%pagefind_exec_path% python3 run.py" + - step: stdout should contain "pagefind_version=" + - step: stdout should contain "JS is at pagefind.js" + - step: stdout should contain "1 fragment(s)" + - step: I run "ls -lh public/pagefind/pagefind.js" and expect it to fail + notes: "TODO: Build a file existence check into toolproof" From 72595ca17bbf9292c37d3bc653524ac8a6e77c02 Mon Sep 17 00:00:00 2001 From: Liam Bigelow <40188355+bglw@users.noreply.github.com> Date: Mon, 30 Sep 2024 09:43:03 +1300 Subject: [PATCH 25/39] Remove `source` command --- pagefind/integration_tests/python_api/background.toolproof.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/pagefind/integration_tests/python_api/background.toolproof.yml b/pagefind/integration_tests/python_api/background.toolproof.yml index 2331011b..70b9ad61 100644 --- a/pagefind/integration_tests/python_api/background.toolproof.yml +++ b/pagefind/integration_tests/python_api/background.toolproof.yml @@ -10,4 +10,3 @@ steps:

Nothing

- step: I run 'python3 -m venv "$PWD/.venv"' - - step: I run 'source "$PWD/.venv/bin/activate"' From 8ad5f0e7e0fb5162b2f87109641cec24bc632149 Mon Sep 17 00:00:00 2001 From: Liam Bigelow <40188355+bglw@users.noreply.github.com> Date: Mon, 30 Sep 2024 09:56:12 +1300 Subject: [PATCH 26/39] Install Python earlier in CI --- .github/workflows/test.yml | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9603dd43..15f54e70 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -65,6 +65,13 @@ jobs: default: true components: rustfmt, clippy + - name: Set up python 3.12 + uses: actions/setup-python@v5 + with: + python-version: "3.12" + # NOTE: ^this strategy leaves older python versions intentionally + # without test coverage to keep CI fast. + - name: Check versions run: | cargo --version @@ -112,16 +119,9 @@ jobs: - name: Test CLI run: ./test_ci.sh "release" - - name: Set up python 3.12 - uses: actions/setup-python@v5 - with: - python-version: "3.12" - # NOTE: ^this strategy leaves older python versions intentionally - # without test coverage to keep CI fast. - - name: Set up poetry run: ./wrappers/python/scripts/ci/github/setup_poetry.sh - + - name: cache venv uses: actions/cache@v4 with: @@ -142,7 +142,7 @@ jobs: if: runner.os == 'Linux' working-directory: ./wrappers/python run: ./scripts/ci/python_lints.sh - + - name: ensure cog up-to-date # avoid duplicating linting work on different OSes if: runner.os == 'Linux' @@ -161,4 +161,3 @@ jobs: # ^ guard against the tests getting deadlock if the subprocess pipe # gets clogged run: ./wrappers/python/scripts/ci/github/integration_tests.sh - From cf1e868edb27654234e3a1e51ce9f9ce95c53bc8 Mon Sep 17 00:00:00 2001 From: Liam Bigelow <40188355+bglw@users.noreply.github.com> Date: Mon, 30 Sep 2024 09:56:56 +1300 Subject: [PATCH 27/39] Python API: Allow `write_files` to take an optional `output_path` --- wrappers/python/src/pagefind/index/__init__.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/wrappers/python/src/pagefind/index/__init__.py b/wrappers/python/src/pagefind/index/__init__.py index fcfb4023..905e9c41 100644 --- a/wrappers/python/src/pagefind/index/__init__.py +++ b/wrappers/python/src/pagefind/index/__init__.py @@ -230,18 +230,24 @@ async def add_custom_record( assert result["type"] == "IndexedFile" return cast(InternalIndexedFileResponse, result) - async def write_files(self) -> None: + async def write_files( + self, + output_path: Optional[str] = None + ) -> None: """Write the index files to disk. If you're using PagefindIndex as a context manager, there's no need to call this method: if no error occurred, closing the context automatically writes the index files to disk. + + :param output_path: a path to override the configured output path for the index. """ assert self._service is not None assert self._index_id is not None - if not self._config: - output_path = None - else: - output_path = self._config.get("output_path") + if not output_path: + if not self._config: + output_path = None + else: + output_path = self._config.get("output_path") result = await self._service.send( InternalWriteFilesRequest( From 39613f6384858e75c4f21bf4a749a0ad386a9de4 Mon Sep 17 00:00:00 2001 From: Liam Bigelow <40188355+bglw@users.noreply.github.com> Date: Mon, 30 Sep 2024 09:57:41 +1300 Subject: [PATCH 28/39] Add Python API toolproof test --- ...dex-is-not-consumed-on-write.toolproof.yml | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 pagefind/integration_tests/python_api/py-an-index-is-not-consumed-on-write.toolproof.yml diff --git a/pagefind/integration_tests/python_api/py-an-index-is-not-consumed-on-write.toolproof.yml b/pagefind/integration_tests/python_api/py-an-index-is-not-consumed-on-write.toolproof.yml new file mode 100644 index 00000000..289cf689 --- /dev/null +++ b/pagefind/integration_tests/python_api/py-an-index-is-not-consumed-on-write.toolproof.yml @@ -0,0 +1,83 @@ +name: Python API > An index is not consumed on write +platforms: + - linux + - mac + +steps: + - ref: ./background.toolproof.yml + - step: I have a "output/index.html" file with the content {html} + html: >- +

Nothing

+ - step: I have a "public/run.py" file with the content {python} + python: |2- + import sys + sys.path.append('%repo_wd%/wrappers/python/src') + + import asyncio + import json + import logging + import os + from pagefind.index import PagefindIndex, IndexConfig + + async def main(): + config = IndexConfig( + output_path="./pagefind", + ) + + async with PagefindIndex(config=config) as index: + await index.add_html_file( + content="

Testing, testing

", + source_path="dogs/index.html", + ) + await index.write_files(output_path="../output/pagefind"); + + await index.add_html_file( + content="

Testing, testing

", + source_path="rabbits/index.html", + ) + + files = await index.get_files() + + fragments = [file for file in files if "fragment" in file["path"]] + print(f"{len(fragments)} fragment(s)") + + await index.add_html_file( + content="

Testing, testing

", + source_path="cats/index.html", + ) + + print("Finished") + + if __name__ == "__main__": + asyncio.run(main()) + - step: I run "cd public && PAGEFIND_BINARY_PATH=%pagefind_exec_path% python3 run.py" + - step: stdout should contain "Finished" + - step: stdout should contain "2 fragment(s)" + - step: The file "output/pagefind/pagefind.js" should not be empty + - step: I serve the directory "output" + - step: In my browser, I load "/" + - step: In my browser, I evaluate {js} + js: |- + let pagefind = await import("/pagefind/pagefind.js"); + + let search = await pagefind.search("testing"); + + let pages = await Promise.all(search.results.map(r => r.data())); + let matches = pages.map(p => p.url).sort().join(', '); + + toolproof.assert_eq(matches, `/dogs/`); + - step: In my browser, the console should be empty + - step: I serve the directory "public" + - step: In my browser, I load "/" + - step: In my browser, I evaluate {js} + js: |- + let pagefind = await import("/pagefind/pagefind.js"); + + let search = await pagefind.search("testing"); + + let pages = await Promise.all(search.results.map(r => r.data())); + let matches = pages.map(p => p.url).sort().join(', '); + + toolproof.assert_eq(matches, `/cats/, /dogs/, /rabbits/`); + - step: In my browser, the console should be empty From 3d81b6427b648634d798ddd3e78d3efec7c71f1d Mon Sep 17 00:00:00 2001 From: Liam Bigelow <40188355+bglw@users.noreply.github.com> Date: Mon, 30 Sep 2024 10:06:41 +1300 Subject: [PATCH 29/39] Fix Python lint --- wrappers/python/src/pagefind/index/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/wrappers/python/src/pagefind/index/__init__.py b/wrappers/python/src/pagefind/index/__init__.py index 905e9c41..4b108182 100644 --- a/wrappers/python/src/pagefind/index/__init__.py +++ b/wrappers/python/src/pagefind/index/__init__.py @@ -12,7 +12,6 @@ InternalGetFilesResponse, InternalIndexedDirResponse, InternalIndexedFileResponse, - InternalSyntheticFile, InternalDecodedFile, InternalWriteFilesRequest, ) From 6b4684ae0a9c08f021ecff29ba9258c4e405170c Mon Sep 17 00:00:00 2001 From: Liam Bigelow <40188355+bglw@users.noreply.github.com> Date: Mon, 30 Sep 2024 10:07:01 +1300 Subject: [PATCH 30/39] Toolproof: "Python API > Build a blended index to memory via the api" --- ...-index-to-memory-via-the-api.toolproof.yml | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 pagefind/integration_tests/python_api/py-build-a-blended-index-to-memory-via-the-api.toolproof.yml diff --git a/pagefind/integration_tests/python_api/py-build-a-blended-index-to-memory-via-the-api.toolproof.yml b/pagefind/integration_tests/python_api/py-build-a-blended-index-to-memory-via-the-api.toolproof.yml new file mode 100644 index 00000000..43798e2a --- /dev/null +++ b/pagefind/integration_tests/python_api/py-build-a-blended-index-to-memory-via-the-api.toolproof.yml @@ -0,0 +1,66 @@ +name: Python API > Build a blended index to memory via the api +platforms: + - linux + - mac + +steps: + - ref: ./background.toolproof.yml + - step: >- + I have a "public/custom_files/real/index.html" file with the content + {html} + html: >- +

A testing file that + exists on disk

+ - step: I have a "public/run.py" file with the content {python} + python: |2- + import sys + sys.path.append('%repo_wd%/wrappers/python/src') + + import asyncio + import json + import logging + import os + from pagefind.index import PagefindIndex, IndexConfig + + async def main(): + async with PagefindIndex() as index: + await index.add_directory( + path="custom_files" + ) + await index.add_custom_record( + url="/synth/", + content="A testing file that doesn't exist.", + language="en" + ) + + files = await index.get_files() + + for file in files: + output_path = os.path.join("pagefind", file["path"]) + dir = os.path.dirname(output_path) + if not os.path.exists(dir): + os.makedirs(dir, exist_ok=True) + + with open(output_path, 'wb') as f: + f.write(file["content"]) + + print("Donezo!") + + if __name__ == "__main__": + asyncio.run(main()) + - step: I run "cd public && PAGEFIND_BINARY_PATH=%pagefind_exec_path% python3 run.py" + - step: stdout should contain "Donezo!" + - step: The file "public/pagefind/pagefind.js" should not be empty + - step: I serve the directory "public" + - step: In my browser, I load "/" + - step: In my browser, I evaluate {js} + js: |- + let pagefind = await import("/pagefind/pagefind.js"); + + let search = await pagefind.search("testing"); + + let pages = await Promise.all(search.results.map(r => r.data())); + let matches = pages.map(p => p.url).sort().join(', '); + + toolproof.assert_eq(matches, `/real/, /synth/`); + - step: In my browser, the console should be empty From 213986101853ea70ce8c30ff0f3deb820b46eb91 Mon Sep 17 00:00:00 2001 From: Liam Bigelow <40188355+bglw@users.noreply.github.com> Date: Mon, 30 Sep 2024 10:44:59 +1300 Subject: [PATCH 31/39] Python formatting --- wrappers/python/src/pagefind/index/__init__.py | 7 ++----- wrappers/python/src/pagefind/service/types.py | 2 ++ 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/wrappers/python/src/pagefind/index/__init__.py b/wrappers/python/src/pagefind/index/__init__.py index 4b108182..68e04371 100644 --- a/wrappers/python/src/pagefind/index/__init__.py +++ b/wrappers/python/src/pagefind/index/__init__.py @@ -171,7 +171,7 @@ async def get_files(self) -> List[InternalDecodedFile]: files = cast(InternalGetFilesResponse, response)["files"] decoded_files = [ - {'path': file['path'], 'content': base64.b64decode(file['content'])} + {"path": file["path"], "content": base64.b64decode(file["content"])} for file in files ] @@ -229,10 +229,7 @@ async def add_custom_record( assert result["type"] == "IndexedFile" return cast(InternalIndexedFileResponse, result) - async def write_files( - self, - output_path: Optional[str] = None - ) -> None: + async def write_files(self, output_path: Optional[str] = None) -> None: """Write the index files to disk. If you're using PagefindIndex as a context manager, there's no need to call this method: diff --git a/wrappers/python/src/pagefind/service/types.py b/wrappers/python/src/pagefind/service/types.py index 1930a8d9..03ee6943 100644 --- a/wrappers/python/src/pagefind/service/types.py +++ b/wrappers/python/src/pagefind/service/types.py @@ -132,10 +132,12 @@ class InternalSyntheticFile(TypedDict): path: str content: str + class InternalDecodedFile(TypedDict): path: str content: bytes + class InternalGetFilesResponse(TypedDict): type: Literal["GetFiles"] files: List[InternalSyntheticFile] From e6c6f5e4a978d59ebbdff6f21da66d4f5543e175 Mon Sep 17 00:00:00 2001 From: Liam Bigelow <40188355+bglw@users.noreply.github.com> Date: Mon, 30 Sep 2024 11:10:51 +1300 Subject: [PATCH 32/39] Python API: Fix `DeleteIndex` response --- wrappers/python/src/pagefind/index/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wrappers/python/src/pagefind/index/__init__.py b/wrappers/python/src/pagefind/index/__init__.py index 68e04371..ff86adc4 100644 --- a/wrappers/python/src/pagefind/index/__init__.py +++ b/wrappers/python/src/pagefind/index/__init__.py @@ -187,7 +187,7 @@ async def delete_index(self) -> None: result = await self._service.send( InternalDeleteIndexRequest(type="DeleteIndex", index_id=self._index_id) ) - assert result["type"] == "DeletedIndex" + assert result["type"] == "DeleteIndex" self._index_id = None self._service = None From 982708c64b9e44aa0880d9ab3c19c4a42d2516b3 Mon Sep 17 00:00:00 2001 From: Liam Bigelow <40188355+bglw@users.noreply.github.com> Date: Mon, 30 Sep 2024 11:18:55 +1300 Subject: [PATCH 33/39] Allow deleting a Pagefind index in a context, update docs to match --- docs/content/docs/py-api.md | 25 ++++++++---- ...y-close-the-pagefind-backend.toolproof.yml | 38 +++++++++++++++++++ .../python/src/pagefind/index/__init__.py | 6 ++- 3 files changed, 60 insertions(+), 9 deletions(-) create mode 100644 pagefind/integration_tests/python_api/py-close-the-pagefind-backend.toolproof.yml diff --git a/docs/content/docs/py-api.md b/docs/content/docs/py-api.md index ef19afb0..c12758bd 100644 --- a/docs/content/docs/py-api.md +++ b/docs/content/docs/py-api.md @@ -234,17 +234,28 @@ for file in (await index.get_files()): ## index.write_files -Closing the `PagefindIndex`'s context automatically calls `index.write_files`. +Calling `index.write_files()` writes the index files to disk, as they would be written when running the standard Pagefind binary directly. -If you aren't using `PagefindIndex` as a context manager, calling `index.write_files()` writes the index files to disk, as they would be written when running the standard Pagefind binary directly. +Closing the `PagefindIndex`'s context automatically calls `index.write_files`, so calling this function is not necessary in normal operation. + +Calling this function won't prevent files being written when the context closes, which may cause duplicate files to be written. +If calling this function manually, you probably want to also call `index.delete_index()`. ```py -await index = PagefindIndex( - IndexConfig( - output_path="./public/pagefind", - ), +config = IndexConfig( + output_path="./public/pagefind", ) -await index.write_files() +async with PagefindIndex(config=config) as index: + # ... add content to index + + # write files to the configured output path for the index: + await index.write_files() + + # write files to a different output path: + await index.write_files(output_path="./custom/pagefind") + + # prevent also writing files when closing the `PagefindIndex`: + await index.delete_index() ``` The `output_path` option should contain the path to the desired Pagefind bundle directory. If relative, is relative to the current working directory of your Python process. diff --git a/pagefind/integration_tests/python_api/py-close-the-pagefind-backend.toolproof.yml b/pagefind/integration_tests/python_api/py-close-the-pagefind-backend.toolproof.yml new file mode 100644 index 00000000..8eba935f --- /dev/null +++ b/pagefind/integration_tests/python_api/py-close-the-pagefind-backend.toolproof.yml @@ -0,0 +1,38 @@ +name: Python API > Close the Pagefind backend +platforms: + - linux + - mac + +steps: + - ref: ./background.toolproof.yml + - step: I have a "public/run.py" file with the content {python} + python: |2- + import sys + sys.path.append('%repo_wd%/wrappers/python/src') + + import asyncio + import json + import logging + import os + from pagefind.index import PagefindIndex, IndexConfig + + async def main(): + async with PagefindIndex() as index: + files = await index.get_files() + + for file in files: + print(file["path"]) + + await index.delete_index() + + try: + files = await index.get_files() + except AssertionError: + print("errored getting files after close") + + if __name__ == "__main__": + asyncio.run(main()) + - step: I run "cd public && PAGEFIND_BINARY_PATH=%pagefind_exec_path% python3 run.py" + - step: stdout should contain "pagefind.js" + - step: stdout should contain "pagefind-ui.js" + - step: stdout should contain "errored getting files after close" diff --git a/wrappers/python/src/pagefind/index/__init__.py b/wrappers/python/src/pagefind/index/__init__.py index ff86adc4..3791c097 100644 --- a/wrappers/python/src/pagefind/index/__init__.py +++ b/wrappers/python/src/pagefind/index/__init__.py @@ -265,8 +265,10 @@ async def __aexit__( exc_value: Optional[Any], traceback: Optional[Any], ) -> None: - assert self._service is not None - assert self._index_id is not None + if self._service is None: + return + if self._index_id is None: + return if exc_type is None: await self.write_files() await self._service.close() From a53f3c2586edf6b169627ebc675fae2292fd2b16 Mon Sep 17 00:00:00 2001 From: Liam Bigelow <40188355+bglw@users.noreply.github.com> Date: Mon, 30 Sep 2024 11:51:20 +1300 Subject: [PATCH 34/39] Python API: Improve error handling for messages that failed to parse --- .../py-pagefind-error-handling.toolproof.yml | 38 +++++++++++++++++++ .../python/src/pagefind/service/__init__.py | 9 ++++- 2 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 pagefind/integration_tests/python_api/py-pagefind-error-handling.toolproof.yml diff --git a/pagefind/integration_tests/python_api/py-pagefind-error-handling.toolproof.yml b/pagefind/integration_tests/python_api/py-pagefind-error-handling.toolproof.yml new file mode 100644 index 00000000..49c11d48 --- /dev/null +++ b/pagefind/integration_tests/python_api/py-pagefind-error-handling.toolproof.yml @@ -0,0 +1,38 @@ +name: Python API > Pagefind error handling +platforms: + - linux + - mac + +steps: + - ref: ./background.toolproof.yml + - step: I have a "public/run.py" file with the content {python} + python: |2- + import sys + sys.path.append('%repo_wd%/wrappers/python/src') + + import asyncio + import json + import logging + import os + from pagefind.index import PagefindIndex, IndexConfig + + async def main(): + async with PagefindIndex() as index: + await index.delete_index() + try: + files = await index.get_files() + except AssertionError: + print("Index deleted") + + try: + async with PagefindIndex(IndexConfig(root_selector=5)) as index: + await index.delete_index() + except Exception as e: + print(f"Caught error {e}") + + print("Complete") + if __name__ == "__main__": + asyncio.run(main()) + - step: I run "cd public && PAGEFIND_BINARY_PATH=%pagefind_exec_path% python3 run.py" + - step: 'stdout should contain "invalid type: integer `5`"' + - step: stdout should contain "Index deleted" diff --git a/wrappers/python/src/pagefind/service/__init__.py b/wrappers/python/src/pagefind/service/__init__.py index 5a0239b5..dba1864b 100644 --- a/wrappers/python/src/pagefind/service/__init__.py +++ b/wrappers/python/src/pagefind/service/__init__.py @@ -160,7 +160,14 @@ async def _wait_for_responses(self) -> None: if (resp := json.loads(base64.b64decode(output[:-1]))) is None: continue resp = cast(InternalServiceResponse, resp) - if (message_id := resp.get("message_id")) is not None: + message_id = resp.get("message_id") + if message_id is None: + # If the backend service failed to parse the message, it won't return the ID + # However it does return the message itself, so we can retrieve the ID we sent + if (orginal := resp["payload"].get("original_message")) is not None: + if (sent := json.loads(orginal)) is not None: + message_id = sent.get("message_id") + if message_id is not None: log.debug(f"received response for message {message_id}") assert ( self._message_id >= message_id From 035f7a7e2fe1dd4eef82c77aabeac96710f79410 Mon Sep 17 00:00:00 2001 From: Liam Bigelow <40188355+bglw@users.noreply.github.com> Date: Mon, 30 Sep 2024 11:55:41 +1300 Subject: [PATCH 35/39] Final tranche of Toolproof Python API tests to meet Node API parity --- ...ic-index-to-disk-via-the-api.toolproof.yml | 42 ++++++++++++++++ ...den-urls-to-disk-via-the-api.toolproof.yml | 43 ++++++++++++++++ ...ue-index-to-disk-via-the-api.toolproof.yml | 47 +++++++++++++++++ ...om-disk-location-via-the-api.toolproof.yml | 50 +++++++++++++++++++ ...akes-precedence-over-records.toolproof.yml | 47 +++++++++++++++++ ...d-empty-index-returns-assets.toolproof.yml | 38 ++++++++++++++ .../py-pagefind-service-config.toolproof.yml | 48 ++++++++++++++++++ 7 files changed, 315 insertions(+) create mode 100644 pagefind/integration_tests/python_api/py-build-a-synthetic-index-to-disk-via-the-api.toolproof.yml create mode 100644 pagefind/integration_tests/python_api/py-build-a-synthetic-index-with-overridden-urls-to-disk-via-the-api.toolproof.yml create mode 100644 pagefind/integration_tests/python_api/py-build-a-true-index-to-disk-via-the-api.toolproof.yml create mode 100644 pagefind/integration_tests/python_api/py-build-an-index-to-a-custom-disk-location-via-the-api.toolproof.yml create mode 100644 pagefind/integration_tests/python_api/py-force-language-takes-precedence-over-records.toolproof.yml create mode 100644 pagefind/integration_tests/python_api/py-pagefind-empty-index-returns-assets.toolproof.yml create mode 100644 pagefind/integration_tests/python_api/py-pagefind-service-config.toolproof.yml diff --git a/pagefind/integration_tests/python_api/py-build-a-synthetic-index-to-disk-via-the-api.toolproof.yml b/pagefind/integration_tests/python_api/py-build-a-synthetic-index-to-disk-via-the-api.toolproof.yml new file mode 100644 index 00000000..3f0f78d3 --- /dev/null +++ b/pagefind/integration_tests/python_api/py-build-a-synthetic-index-to-disk-via-the-api.toolproof.yml @@ -0,0 +1,42 @@ +name: Python API > Build a synthetic index to disk via the api +platforms: + - linux + - mac + +steps: + - ref: ./background.toolproof.yml + - step: I have a "public/run.py" file with the content {python} + python: |2- + import sys + sys.path.append('%repo_wd%/wrappers/python/src') + + import asyncio + import json + import logging + import os + from pagefind.index import PagefindIndex, IndexConfig + + async def main(): + async with PagefindIndex() as index: + await index.add_html_file( + content="

Testing, testing

", + source_path="dogs/index.html", + ) + + print("Complete") + if __name__ == "__main__": + asyncio.run(main()) + - step: I run "cd public && PAGEFIND_BINARY_PATH=%pagefind_exec_path% python3 run.py" + - step: stdout should contain "Complete" + - step: The file "public/pagefind/pagefind.js" should not be empty + - step: I serve the directory "public" + - step: In my browser, I load "/" + - step: In my browser, I evaluate {js} + js: |- + let pagefind = await import("/pagefind/pagefind.js"); + + let search = await pagefind.search("testing"); + + let data = await search.results[0].data(); + toolproof.assert_eq(data.url, `/dogs/`); + - step: In my browser, the console should be empty diff --git a/pagefind/integration_tests/python_api/py-build-a-synthetic-index-with-overridden-urls-to-disk-via-the-api.toolproof.yml b/pagefind/integration_tests/python_api/py-build-a-synthetic-index-with-overridden-urls-to-disk-via-the-api.toolproof.yml new file mode 100644 index 00000000..6e815be7 --- /dev/null +++ b/pagefind/integration_tests/python_api/py-build-a-synthetic-index-with-overridden-urls-to-disk-via-the-api.toolproof.yml @@ -0,0 +1,43 @@ +name: Python API > Build a synthetic index with overridden urls to disk via the api + +platforms: + - linux + - mac + +steps: + - ref: ./background.toolproof.yml + - step: I have a "public/run.py" file with the content {python} + python: |2- + import sys + sys.path.append('%repo_wd%/wrappers/python/src') + + import asyncio + import json + import logging + import os + from pagefind.index import PagefindIndex, IndexConfig + + async def main(): + async with PagefindIndex() as index: + await index.add_html_file( + content="

Testing, testing

", + url="/my-custom-url/", + ) + + print("Complete") + if __name__ == "__main__": + asyncio.run(main()) + - step: I run "cd public && PAGEFIND_BINARY_PATH=%pagefind_exec_path% python3 run.py" + - step: stdout should contain "Complete" + - step: The file "public/pagefind/pagefind.js" should not be empty + - step: I serve the directory "public" + - step: In my browser, I load "/" + - step: In my browser, I evaluate {js} + js: |- + let pagefind = await import("/pagefind/pagefind.js"); + + let search = await pagefind.search("testing"); + + let data = await search.results[0].data(); + toolproof.assert_eq(data.url, `/my-custom-url/`); + - step: In my browser, the console should be empty diff --git a/pagefind/integration_tests/python_api/py-build-a-true-index-to-disk-via-the-api.toolproof.yml b/pagefind/integration_tests/python_api/py-build-a-true-index-to-disk-via-the-api.toolproof.yml new file mode 100644 index 00000000..850f7e83 --- /dev/null +++ b/pagefind/integration_tests/python_api/py-build-a-true-index-to-disk-via-the-api.toolproof.yml @@ -0,0 +1,47 @@ +name: Python API > Build a true index to disk via the api +platforms: + - linux + - mac + +steps: + - ref: ./background.toolproof.yml + - step: >- + I have a "public/custom_files/real/index.html" file with the content + {html} + html: >- +

A testing file that + exists on disk

+ - step: I have a "public/run.py" file with the content {python} + python: |2- + import sys + sys.path.append('%repo_wd%/wrappers/python/src') + + import asyncio + import json + import logging + import os + from pagefind.index import PagefindIndex, IndexConfig + + async def main(): + async with PagefindIndex() as index: + await index.add_directory( + path="custom_files", + ) + + print("Complete") + if __name__ == "__main__": + asyncio.run(main()) + - step: I run "cd public && PAGEFIND_BINARY_PATH=%pagefind_exec_path% python3 run.py" + - step: stdout should contain "Complete" + - step: The file "public/pagefind/pagefind.js" should not be empty + - step: I serve the directory "public" + - step: In my browser, I load "/" + - step: In my browser, I evaluate {js} + js: |- + let pagefind = await import("/pagefind/pagefind.js"); + + let search = await pagefind.search("testing"); + + let data = await search.results[0].data(); + toolproof.assert_eq(data.url, `/real/`); + - step: In my browser, the console should be empty diff --git a/pagefind/integration_tests/python_api/py-build-an-index-to-a-custom-disk-location-via-the-api.toolproof.yml b/pagefind/integration_tests/python_api/py-build-an-index-to-a-custom-disk-location-via-the-api.toolproof.yml new file mode 100644 index 00000000..14cec42c --- /dev/null +++ b/pagefind/integration_tests/python_api/py-build-an-index-to-a-custom-disk-location-via-the-api.toolproof.yml @@ -0,0 +1,50 @@ +name: Python API > Build an index to a custom disk location via the api +platforms: + - linux + - mac + +steps: + - ref: ./background.toolproof.yml + - step: I have a "output/index.html" file with the content {html} + html: >- +

Nothing

+ - step: I have a "public/run.py" file with the content {python} + python: |2- + import sys + sys.path.append('%repo_wd%/wrappers/python/src') + + import asyncio + import json + import logging + import os + from pagefind.index import PagefindIndex, IndexConfig + + async def main(): + config = IndexConfig( + output_path="../output/pagefind", + ) + + async with PagefindIndex(config=config) as index: + await index.add_html_file( + content="

Testing, testing

", + source_path="dogs/index.html", + ) + + print("Complete") + if __name__ == "__main__": + asyncio.run(main()) + - step: I run "cd public && PAGEFIND_BINARY_PATH=%pagefind_exec_path% python3 run.py" + - step: stdout should contain "Complete" + - step: The file "output/pagefind/pagefind.js" should not be empty + - step: I serve the directory "output" + - step: In my browser, I load "/" + - step: In my browser, I evaluate {js} + js: |- + let pagefind = await import("/pagefind/pagefind.js"); + + let search = await pagefind.search("testing"); + + let data = await search.results[0].data(); + toolproof.assert_eq(data.url, `/dogs/`); + - step: In my browser, the console should be empty diff --git a/pagefind/integration_tests/python_api/py-force-language-takes-precedence-over-records.toolproof.yml b/pagefind/integration_tests/python_api/py-force-language-takes-precedence-over-records.toolproof.yml new file mode 100644 index 00000000..1c966ee1 --- /dev/null +++ b/pagefind/integration_tests/python_api/py-force-language-takes-precedence-over-records.toolproof.yml @@ -0,0 +1,47 @@ +name: Python API > Force language takes precedence over records +platforms: + - linux + - mac + +steps: + - ref: ./background.toolproof.yml + - step: I have a "public/run.py" file with the content {python} + python: |2- + import sys + sys.path.append('%repo_wd%/wrappers/python/src') + + import asyncio + import json + import logging + import os + from pagefind.index import PagefindIndex, IndexConfig + + async def main(): + config = IndexConfig( + force_language="fr", + ) + async with PagefindIndex(config=config) as index: + await index.add_custom_record( + url="/one/", + content="Testing file #1", + language="pt", + ) + await index.add_html_file( + source_path="two/index.html", + content="

Testing file #2

", + ) + + print("Complete") + if __name__ == "__main__": + asyncio.run(main()) + - step: I run "cd public && PAGEFIND_BINARY_PATH=%pagefind_exec_path% python3 run.py" + - step: stdout should contain "Complete" + - step: The file "public/pagefind/pagefind.js" should not be empty + - step: I run "ls -lh public/pagefind/wasm.unknown.pagefind" + notes: "TODO: Build a file existence check into toolproof" + - step: I run "ls -lh public/pagefind/wasm.fr.pagefind" + notes: "TODO: Build a file existence check into toolproof" + - step: I run "ls -lh public/pagefind/wasm.pt.pagefind" and expect it to fail + notes: "TODO: Build a file existence check into toolproof" + - step: I run "ls -lh public/pagefind/wasm.en.pagefind" and expect it to fail + notes: "TODO: Build a file existence check into toolproof" diff --git a/pagefind/integration_tests/python_api/py-pagefind-empty-index-returns-assets.toolproof.yml b/pagefind/integration_tests/python_api/py-pagefind-empty-index-returns-assets.toolproof.yml new file mode 100644 index 00000000..3f067e37 --- /dev/null +++ b/pagefind/integration_tests/python_api/py-pagefind-empty-index-returns-assets.toolproof.yml @@ -0,0 +1,38 @@ +name: Python API > Pagefind empty index returns assets +platforms: + - linux + - mac + +steps: + - ref: ./background.toolproof.yml + - step: I have a "public/run.py" file with the content {python} + python: |2- + import sys + sys.path.append('%repo_wd%/wrappers/python/src') + + import asyncio + import json + import logging + import os + from pagefind.index import PagefindIndex, IndexConfig + + async def main(): + async with PagefindIndex() as index: + files = await index.get_files() + + for file in files: + print(file["path"]) + + await index.delete_index() + + print("Complete") + if __name__ == "__main__": + asyncio.run(main()) + - step: I run "cd public && PAGEFIND_BINARY_PATH=%pagefind_exec_path% python3 run.py" + - step: stdout should contain "Complete" + - step: stdout should contain "pagefind.js" + - step: stdout should contain "pagefind-ui.js" + - step: stdout should contain "pagefind-ui.css" + - step: stdout should contain "pagefind-modular-ui.js" + - step: stdout should contain "pagefind-modular-ui.css" + - step: stdout should contain "wasm.unknown.pagefind" diff --git a/pagefind/integration_tests/python_api/py-pagefind-service-config.toolproof.yml b/pagefind/integration_tests/python_api/py-pagefind-service-config.toolproof.yml new file mode 100644 index 00000000..d8314e7f --- /dev/null +++ b/pagefind/integration_tests/python_api/py-pagefind-service-config.toolproof.yml @@ -0,0 +1,48 @@ +name: Python API > Pagefind service config +platforms: + - linux + - mac + +steps: + - ref: ./background.toolproof.yml + - step: I have a "public/run.py" file with the content {python} + python: |2- + import sys + sys.path.append('%repo_wd%/wrappers/python/src') + + import asyncio + import json + import logging + import os + from pagefind.index import PagefindIndex, IndexConfig + + async def main(): + config = IndexConfig( + root_selector="h1", + exclude_selectors=["span"], + keep_index_url=True + ) + async with PagefindIndex(config=config) as index: + await index.add_html_file( + content="

Testing, testing

", + source_path="dogs/index.html", + ) + + print("Complete") + if __name__ == "__main__": + asyncio.run(main()) + - step: I run "cd public && PAGEFIND_BINARY_PATH=%pagefind_exec_path% python3 run.py" + - step: stdout should contain "Complete" + - step: The file "public/pagefind/pagefind.js" should not be empty + - step: I serve the directory "public" + - step: In my browser, I load "/" + - step: In my browser, I evaluate {js} + js: |- + let pagefind = await import("/pagefind/pagefind.js"); + + let search = await pagefind.search("testing"); + + let data = await search.results[0].data(); + toolproof.assert_eq(data.url, `/dogs/index.html`); + toolproof.assert_eq(data.content, `Testing,`); + - step: In my browser, the console should be empty From 51118da2c004322b5789d1f5621c27995f1f66d1 Mon Sep 17 00:00:00 2001 From: Liam Bigelow <40188355+bglw@users.noreply.github.com> Date: Mon, 30 Sep 2024 11:56:03 +1300 Subject: [PATCH 36/39] Note down useful testing script for local Python API integration tests --- wrappers/python/src/tests/README.md | 37 +++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 wrappers/python/src/tests/README.md diff --git a/wrappers/python/src/tests/README.md b/wrappers/python/src/tests/README.md new file mode 100644 index 00000000..d4e08ae3 --- /dev/null +++ b/wrappers/python/src/tests/README.md @@ -0,0 +1,37 @@ +Script to run tests from the repo root on an M* macOS: + +```py +bin="$PWD/target/release/pagefind" +ext="$PWD/target/release/pagefind_extended" + +cd wrappers/python + +# set up the python virtual environment +poetry install --no-root # for dev dependencies +export VIRTUAL_ENV="${PWD}/.venv" +export PATH="$VIRTUAL_ENV/bin:$PATH" + +# build and install the binary-only wheels + +python3 -m scripts.build.binary_only_wheel \ + --llvm-triple="aarch64-apple-darwin" \ + --bin-path=$bin \ + --version=1.1.0 + +python3 -m scripts.build.binary_only_wheel \ + --llvm-triple="aarch64-apple-darwin" \ + --bin-path=$ext \ + --version=1.1.0 + +python3 -m scripts.build.api_package + +poetry build # build the source-only distribution for the python API +# install all the wheels +pip install ./dist/*.whl --force-reinstall +pip show --verbose pagefind +pip show --verbose pagefind_bin +pip show --verbose pagefind_bin_extended +python3 --version + +LOG_LEVEL="DEBUG" python3 ./src/tests/integration.py 2>&1 | tee /tmp/integration_test.log +``` From 38b3461777cd82e67d5d4b79979db752be2dd8d1 Mon Sep 17 00:00:00 2001 From: Liam Bigelow <40188355+bglw@users.noreply.github.com> Date: Wed, 2 Oct 2024 21:32:11 +1300 Subject: [PATCH 37/39] Flesh out documentation around Python installs --- docs/content/docs/_index.md | 13 ++++++++--- docs/content/docs/installation.md | 38 +++++++++++++++++++++++++++---- docs/content/docs/py-api.md | 27 ++++++++++++++++++---- 3 files changed, 66 insertions(+), 12 deletions(-) diff --git a/docs/content/docs/_index.md b/docs/content/docs/_index.md index 274c81fc..87fdf08c 100644 --- a/docs/content/docs/_index.md +++ b/docs/content/docs/_index.md @@ -30,15 +30,22 @@ Now build your site to an output directory — this guide assumes that you're ru ## Indexing your site -The easiest way to run pagefind is through npx. If you don't have Node and npm installed, or want to install Pagefind another way, see the [Installing Pagefind](/docs/installation/) guide. +The easiest way to run Pagefind is through one of the official wrapper packages. If you don't have Node or Python installed, or want to install Pagefind another way, see the [Installing Pagefind](/docs/installation/) guide. -Run the following command from your terminal, where `--site` points to the output directory of your static site generator. We'll also add `--serve` so that we can view our final site right away. +To use the Node wrapper, run the following command from your terminal, where `--site` points to the output directory of your static site generator. We'll also add `--serve` so that we can view our final site right away. ```bash npx -y pagefind --site public --serve ``` -You should see some output along the lines of: +Using the Python wrapper is similar, but requires an initial install: + +```bash +python3 -m pip install 'pagefind[extended]' +python3 -m pagefind --site public --serve +``` + +Regardless of the command you choose, after Pagefind has downloaded you should see some output along the lines of: ``` Indexed 2496 pages Indexed 22852 words diff --git a/docs/content/docs/installation.md b/docs/content/docs/installation.md index e4ffcfea..f63fcfbb 100644 --- a/docs/content/docs/installation.md +++ b/docs/content/docs/installation.md @@ -5,25 +5,55 @@ nav_section: References weight: 49 --- -Pagefind is a static binary with no dynamic dependencies, so in most cases will be simple to install and run. Pagefind is currently supported on Windows, macOS, and x86-64 Linux distributions. +Pagefind is a static binary with no dynamic dependencies, so in most cases will be simple to install and run. Pagefind is currently supported on Windows, macOS, and Linux distributions. ## Running via npx +For users with a NodeJS toolchain already installed, Pagefind publishes a [wrapper package through npm](https://www.npmjs.com/package/pagefind): + ```bash npx pagefind --site "public" ``` -Pagefind publishes a [wrapper package through npm](https://www.npmjs.com/package/pagefind), which is the easiest way to get started. This package will download the correct [binary of the latest release](https://github.com/CloudCannon/pagefind/releases) as an npm dependency for your platform and run it. +This package includes the correct [binary of the relevant release](https://github.com/CloudCannon/pagefind/releases) as a dependency for your platform. Specific versions can be run by passing a version tag: ```bash npx pagefind@latest --site "public" -npx pagefind@v0.2.0 --site "public" +npx pagefind@v1.1.1 --site "public" +``` + +Running Pagefind via npx will always download the `pagefind_extended` release, which includes specialized support for indexing Chinese and Japanese pages. + +> Pagefind's npm package can also be imported and controlled from a script. See the [Node API documentation](/docs/node-api/) for details. + +## Running via Python + +For users with a Python toolchain already installed, Pagefind publishes a [wrapper package through pypi](https://pypi.org/project/pagefind/): + +```bash +python3 -m pip install 'pagefind[extended]' +python3 -m pagefind --site "public" +``` + +This package includes the correct [binary of the relevant release](https://github.com/CloudCannon/pagefind/releases) as a dependency for your platform. + +Specific versions can be installed by passing a version: + +```bash +python3 -m pip install 'pagefind[extended]==1.1.1' +``` + +The above example shows installing the `pagefind_extended` release, which includes specialized support for indexing Chinese and Japanese pages. +To install the smaller standard release, run: + +```bash +python3 -m pip install 'pagefind[bin]' ``` -> Running Pagefind via npx will download the `pagefind_extended` release, which includes specialized support for indexing Chinese and Japanese pages. +> Pagefind's Python package can also be imported and controlled from a script. See the [Python API documentation](/docs/py-api/) for details. ## Downloading a precompiled binary diff --git a/docs/content/docs/py-api.md b/docs/content/docs/py-api.md index c12758bd..dcd11aa4 100644 --- a/docs/content/docs/py-api.md +++ b/docs/content/docs/py-api.md @@ -13,6 +13,23 @@ There are situations where using this Python package is beneficial: - Users looking to index their site and augment that index with extra non-HTML pages can run a standard Pagefind crawl with [`add_directory`](#indexadd_directory) and augment it with [`add_custom_record`](#indexadd_custom_record). - Users looking to use Pagefind's engine for searching miscellaneous content such as PDFs or subtitles, where [`add_custom_record`](#indexadd_custom_record) can be used to build the entire index from scratch. +## Installation + +To install just the Python wrapper, and use a `pagefind` executable from your system: +```bash +python3 -m pip install 'pagefind' +``` + +To install the Python wrapper as well as the standard binary for your platform: +```bash +python3 -m pip install 'pagefind[bin]' +``` + +To install the Python wrapper as well as the extended binary for your platform: +```bash +python3 -m pip install 'pagefind[extended]' +``` + ## Example Usage @@ -143,7 +160,7 @@ If the `path` provided is relative, it will be relative to the current working d indexed_dir = await index.add_directory("./public", glob="**.{html}") ``` -Optionally, a custom `glob` can be supplied which controls which files Pagefind will consume within the directory. The default is shown, and the `glob` option can be omitted entirely. +Optionally, a custom `glob` can be supplied which controls which files Pagefind will consume within the directory. The default is shown, and the `glob` option can be omitted entirely. See [Wax patterns documentation](https://github.com/olson-sean-k/wax#patterns) for more details. ## index.add_html_file @@ -206,14 +223,14 @@ page_meta: dict[str, str] = custom_record["page_meta"] The `url`, `content`, and `language` fields are all required. `language` should be an [ISO 639-1 code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes). -`meta` is optional, and is strictly a flat object of keys to string values. +`meta` is optional, and is strictly a flat object of keys to string values. See the [Metadata documentation](https://pagefind.app/docs/metadata/) for semantics. -`filters` is optional, and is strictly a flat object of keys to arrays of string values. +`filters` is optional, and is strictly a flat object of keys to arrays of string values. See the [Filters documentation](https://pagefind.app/docs/filtering/) for semantics. -`sort` is optional, and is strictly a flat object of keys to string values. -See the [Sort documentation](https://pagefind.app/docs/sorts/) for semantics. +`sort` is optional, and is strictly a flat object of keys to string values. +See the [Sort documentation](https://pagefind.app/docs/sorts/) for semantics. *When Pagefind is processing an index, number-like strings will be sorted numerically rather than alphabetically. As such, the value passed in should be `"20"` and not `20`* If successful, the `file` object is returned containing metadata about the completed indexing. From 9bdbe2d42c1b0a019b14cfe9c428185ad3870751 Mon Sep 17 00:00:00 2001 From: Liam Bigelow <40188355+bglw@users.noreply.github.com> Date: Mon, 30 Sep 2024 21:42:32 +1300 Subject: [PATCH 38/39] Finalize Python release scripts and workflow --- .github/workflows/release.yml | 35 ++++++++--------- test_ci.sh | 2 +- test_interactive.sh | 2 +- .../scripts/build/all_binary_only_wheels.py | 10 +++-- wrappers/python/scripts/build/api_package.py | 38 +------------------ wrappers/python/scripts/build/versioning.py | 36 ++++++++++++++++++ 6 files changed, 61 insertions(+), 62 deletions(-) create mode 100644 wrappers/python/scripts/build/versioning.py diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 165b62ed..27de7d2c 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -9,7 +9,6 @@ on: env: CARGO_TERM_COLOR: always - HUMANE_VERSION: "0.9.0" WASM_PACK_VERSION: "v0.10.3" jobs: @@ -109,26 +108,31 @@ jobs: uses: actions/setup-python@v5 with: python-version: "3.12" + - name: Set up poetry - run: bash ./wrappers/python/scripts/ci/github/setup_poetry.sh + run: ./wrappers/python/scripts/ci/github/setup_poetry.sh + - name: cache venv uses: actions/cache@v4 with: - path: .venv + path: wrappers/python/.venv key: ${{ runner.os }}-poetry-3.12-${{ hashFiles('**/poetry.lock') }} + - name: Install dev dependencies - run: bash ./wrappers/python/scripts/ci/github/install_dev_dependencies.sh - working-directory: ./wrappers/python + run: ./wrappers/python/scripts/ci/github/install_dev_dependencies.sh + - name: activate venv run: ./wrappers/python/scripts/ci/github/activate_venv.sh + - name: debug python paths run: ./wrappers/python/scripts/ci/github/debug_python_paths.sh + - name: package binaries working-directory: ./wrappers/python run: | # should take ~30s; writes wheels to wrappers/python/dist export PAGEFIND_PYTHON_LOG_LEVEL=DEBUG python -m scripts.build.all_binary_only_wheels \ - --git-tag "${{ github.ref_name }}" \ + --tag "${{ github.ref_name }}" \ --bin-dir ./vendor - name: package python api working-directory: ./wrappers/python @@ -141,7 +145,7 @@ jobs: path: wrappers/python/dist name: python-packages if-no-files-found: error - + - name: Publish to pypi uses: pypa/gh-action-pypi-publish@release/v1 # Note: this action requires test-pypi / pypi trusted publishing to be @@ -150,7 +154,7 @@ jobs: # - https://docs.pypi.org/trusted-publishers/ with: # see https://github.com/pypa/gh-action-pypi-publish/tree/release/v1/?tab=readme-ov-file#customizing-target-package-dists-directory packages-dir: wrappers/python/dist - repository-url: https://test.pypi.org/ # FIXME: comment this out when ready to publish to pypi + # repository-url: https://test.pypi.org/legacy/ # Use to publish test packages verbose: true # can be commented out once the action is working as expected publish-npm-package: @@ -211,7 +215,6 @@ jobs: env: NPM_TOKEN: ${{ secrets.NPM_TOKEN }} - publish-binary-npm-packages: name: Publish NPM binaries runs-on: ubuntu-20.04 @@ -388,35 +391,30 @@ jobs: os: windows-latest rust: stable target: x86_64-pc-windows-msvc - humane_target: x86_64-pc-windows-msvc cross: false run_tests: true - build: linux os: ubuntu-latest rust: stable target: x86_64-unknown-linux-musl - humane_target: x86_64-unknown-linux-musl cross: false run_tests: true - build: linux os: ubuntu-latest rust: stable target: aarch64-unknown-linux-musl - humane_target: x86_64-unknown-linux-musl cross: false run_tests: false - build: macos os: macos-latest rust: stable target: x86_64-apple-darwin - humane_target: x86_64-apple-darwin cross: false run_tests: true - build: macos-m1 os: macos-latest rust: stable target: aarch64-apple-darwin - humane_target: x86_64-apple-darwin cross: false run_tests: false steps: @@ -484,13 +482,10 @@ jobs: with: version: ${{env.WASM_PACK_VERSION}} - - name: Install humane - if: matrix.run_tests == true - uses: supplypike/setup-bin@v3 + - name: Set up python 3.12 + uses: actions/setup-python@v5 with: - uri: "https://github.com/CloudCannon/humane/releases/download/v${{env.HUMANE_VERSION}}/humane-v${{env.HUMANE_VERSION}}-${{matrix.humane_target}}.tar.gz" - name: "humane" - version: ${{env.HUMANE_VERSION}} + python-version: "3.12" - name: Prepare Git run: | diff --git a/test_ci.sh b/test_ci.sh index b6591c1a..f16b819c 100755 --- a/test_ci.sh +++ b/test_ci.sh @@ -6,4 +6,4 @@ cd "$SCRIPT_DIR" PAGEFIND=$(realpath "$SCRIPT_DIR/target/$1/pagefind") REPO_WD=$(realpath "$SCRIPT_DIR") -npx -y toolproof@0.4.0 --placeholders pagefind_exec_path="$PAGEFIND" repo_wd="$REPO_WD" -c 1 +npx -y toolproof@0.4.1 --placeholders pagefind_exec_path="$PAGEFIND" repo_wd="$REPO_WD" -c 1 diff --git a/test_interactive.sh b/test_interactive.sh index 2e6a4bac..26aee2a7 100755 --- a/test_interactive.sh +++ b/test_interactive.sh @@ -10,4 +10,4 @@ cd $SCRIPT_DIR PAGEFIND=$(realpath "$SCRIPT_DIR/target/release/pagefind") REPO_WD=$(realpath "$SCRIPT_DIR") -npx -y toolproof@0.4.0 --placeholders pagefind_exec_path="$PAGEFIND" repo_wd="$REPO_WD" -c 4 -i +npx -y toolproof@0.4.1 --placeholders pagefind_exec_path="$PAGEFIND" repo_wd="$REPO_WD" -c 4 -i diff --git a/wrappers/python/scripts/build/all_binary_only_wheels.py b/wrappers/python/scripts/build/all_binary_only_wheels.py index e0d6a179..205f060a 100644 --- a/wrappers/python/scripts/build/all_binary_only_wheels.py +++ b/wrappers/python/scripts/build/all_binary_only_wheels.py @@ -14,6 +14,7 @@ write_pagefind_bin_only_wheel, ) from .get_pagefind_release import download, find_bins +from .versioning import process_tag __candidates = ( "pagefind", @@ -87,14 +88,17 @@ def parse_args() -> Args: if tag_name is None: raise ValueError("tag_name is None") assert re.match( - r"^v\d+\.\d+\.\d+(-\w+)?", tag_name + r"^v\d+\.\d+\.\d+(-\w+\.?\d*)?", tag_name ), f"Invalid tag_name: {tag_name}" check_platforms(certified) if not dry_run: - dist_dir.rmdir() + if dist_dir.exists(): + dist_dir.rmdir() dist_dir.mkdir(exist_ok=True) + version = process_tag(tag_name) + for tar_gz in certified: log.info("Processing %s", tar_gz) llvm_triple = get_llvm_triple(tar_gz) @@ -112,6 +116,6 @@ def parse_args() -> Args: write_pagefind_bin_only_wheel( executable=find_bin(temp_dir), output_dir=dist_dir, - version=tag_name.removeprefix("v"), + version=version, platform=platform, ) diff --git a/wrappers/python/scripts/build/api_package.py b/wrappers/python/scripts/build/api_package.py index 99de3364..b9b22283 100644 --- a/wrappers/python/scripts/build/api_package.py +++ b/wrappers/python/scripts/build/api_package.py @@ -8,6 +8,7 @@ from argparse import ArgumentParser from . import python_root, setup_logging +from .versioning import process_tag pyproject_toml = python_root / "pyproject.toml" @@ -16,43 +17,6 @@ cli.add_argument("--tag", required=True, help="The version to build.") log = logging.getLogger(__name__) - -def process_tag(tag: str) -> str: - """Convert a git tag to a version string compliant with PEP 440. - See https://peps.python.org/pep-0440/#public-version-identifiers - """ - pattern = ( - # note that this pattern accepts a superset of the tagging pattern used - # in this repository. - r"^v(?P\d+)" - r"\.(?P\d+)" - r"\.(?P\d+)" - r"(-" - r"(?Palpha|beta|rc)" - r"\.?(?P\d+)" - ")?" - ) - parts = re.match(pattern, tag) - if parts is None: - raise ValueError(f"Invalid tag: `{tag}` does not match pattern `{pattern}`") - major = int(parts["major"]) - minor = int(parts["minor"]) - patch = int(parts["patch"]) - suffix = "" - - if (prerelease_kind := parts["prerelease_kind"]) is not None: - if prerelease_kind == "rc": - suffix = "rc" - elif prerelease_kind.startswith("alpha"): - suffix = "a" - elif prerelease_kind.startswith("beta"): - suffix = "b" - if (prerelease_number := parts["prerelease_number"]) is not None: - suffix += str(int(prerelease_number)) - - return f"{major}.{minor}.{patch}{suffix}" - - def main() -> None: setup_logging() args = cli.parse_args() diff --git a/wrappers/python/scripts/build/versioning.py b/wrappers/python/scripts/build/versioning.py new file mode 100644 index 00000000..efb19b63 --- /dev/null +++ b/wrappers/python/scripts/build/versioning.py @@ -0,0 +1,36 @@ +import re + +def process_tag(tag: str) -> str: + """Convert a git tag to a version string compliant with PEP 440. + See https://peps.python.org/pep-0440/#public-version-identifiers + """ + pattern = ( + # note that this pattern accepts a superset of the tagging pattern used + # in this repository. + r"^v(?P\d+)" + r"\.(?P\d+)" + r"\.(?P\d+)" + r"(-" + r"(?Palpha|beta|rc)" + r"\.?(?P\d+)" + ")?" + ) + parts = re.match(pattern, tag) + if parts is None: + raise ValueError(f"Invalid tag: `{tag}` does not match pattern `{pattern}`") + major = int(parts["major"]) + minor = int(parts["minor"]) + patch = int(parts["patch"]) + suffix = "" + + if (prerelease_kind := parts["prerelease_kind"]) is not None: + if prerelease_kind == "rc": + suffix = "rc" + elif prerelease_kind.startswith("alpha"): + suffix = "a" + elif prerelease_kind.startswith("beta"): + suffix = "b" + if (prerelease_number := parts["prerelease_number"]) is not None: + suffix += str(int(prerelease_number)) + + return f"{major}.{minor}.{patch}{suffix}" From be66cf5f377081ad1a3951c97f83010538dd3117 Mon Sep 17 00:00:00 2001 From: Liam Bigelow <40188355+bglw@users.noreply.github.com> Date: Wed, 2 Oct 2024 22:51:23 +1300 Subject: [PATCH 39/39] Python formatting --- wrappers/python/scripts/build/api_package.py | 1 + wrappers/python/scripts/build/versioning.py | 1 + 2 files changed, 2 insertions(+) diff --git a/wrappers/python/scripts/build/api_package.py b/wrappers/python/scripts/build/api_package.py index b9b22283..a68ba935 100644 --- a/wrappers/python/scripts/build/api_package.py +++ b/wrappers/python/scripts/build/api_package.py @@ -17,6 +17,7 @@ cli.add_argument("--tag", required=True, help="The version to build.") log = logging.getLogger(__name__) + def main() -> None: setup_logging() args = cli.parse_args() diff --git a/wrappers/python/scripts/build/versioning.py b/wrappers/python/scripts/build/versioning.py index efb19b63..b0426bb8 100644 --- a/wrappers/python/scripts/build/versioning.py +++ b/wrappers/python/scripts/build/versioning.py @@ -1,5 +1,6 @@ import re + def process_tag(tag: str) -> str: """Convert a git tag to a version string compliant with PEP 440. See https://peps.python.org/pep-0440/#public-version-identifiers