From 1a7499df89fdc1ac2ee867552ee65c25a309a86c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=87=83=E5=A4=8F?= Date: Tue, 4 Jun 2024 17:56:29 +0800 Subject: [PATCH 1/6] tabular reader --- pyproject.toml | 1 + .../integrations/readers/pai_csv_reader.py | 167 ++++++++++++++++++ .../integrations/readers/pai_excel_reader.py | 77 ++++++++ .../modules/datareader/datareader_factory.py | 11 ++ tests/data_readers/test_csv_reader.py | 48 +++++ tests/data_readers/test_excel_reader.py | 32 ++++ 6 files changed, 336 insertions(+) create mode 100644 src/pai_rag/integrations/readers/pai_csv_reader.py create mode 100644 src/pai_rag/integrations/readers/pai_excel_reader.py create mode 100644 tests/data_readers/test_csv_reader.py create mode 100644 tests/data_readers/test_excel_reader.py diff --git a/pyproject.toml b/pyproject.toml index 5398cebc..aa7ce73b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,6 +69,7 @@ openinference-instrumentation = "^0.1.7" llama-index-llms-huggingface = "^0.2.0" pytest-asyncio = "^0.23.7" pytest-cov = "^5.0.0" +xlrd = "^2.0.1" [tool.poetry.scripts] pai_rag = "pai_rag.main:main" diff --git a/src/pai_rag/integrations/readers/pai_csv_reader.py b/src/pai_rag/integrations/readers/pai_csv_reader.py new file mode 100644 index 00000000..70b76d7c --- /dev/null +++ b/src/pai_rag/integrations/readers/pai_csv_reader.py @@ -0,0 +1,167 @@ +"""Tabular parser-CSV parser. + +Contains parsers for tabular data files. + +""" + +from pathlib import Path +from typing import Any, Dict, List, Optional +from fsspec import AbstractFileSystem + +import pandas as pd +from llama_index.core.readers.base import BaseReader +from llama_index.core.schema import Document + + +class PaiCSVReader(BaseReader): + """CSV parser. + + Args: + concat_rows (bool): whether to concatenate all rows into one document. + If set to False, a Document will be created for each row. + True by default. + csv_config (dict): Options for the reader.Set to empty dict by default, + this means reader will try to figure + out the separators, table head, etc. on its own. + + """ + + def __init__( + self, *args: Any, concat_rows: bool = True, csv_config: dict = {}, **kwargs: Any + ) -> None: + """Init params.""" + super().__init__(*args, **kwargs) + self._concat_rows = concat_rows + self._csv_config = csv_config + + def load_data( + self, file: Path, extra_info: Optional[Dict] = None + ) -> List[Document]: + """Parse csv file. + + Returns: + Union[str, List[str]]: a string or a List of strings. + + """ + try: + import csv + except ImportError: + raise ImportError("csv module is required to read CSV files.") + text_list = [] + headers = [] + data_lines = [] + data_line_start_index = 1 + if ( + "header" in self._csv_config + and self._csv_config["header"] is not None + and isinstance(self._csv_config["header"], list) + ): + data_line_start_index = max(self._csv_config["header"]) + 1 + elif ( + "header" in self._csv_config + and self._csv_config["header"] is not None + and isinstance(self._csv_config["header"], int) + ): + data_line_start_index = self._csv_config["header"] + 1 + self._csv_config["header"] = [self._csv_config["header"]] + + with open(file) as fp: + has_header = csv.Sniffer().has_header(fp.read(2048)) + fp.seek(0) + + if "header" not in self._csv_config and has_header: + self._csv_config["header"] = [0] + elif "header" not in self._csv_config and not has_header: + self._csv_config["header"] = None + + csv_reader = csv.reader(fp) + + if self._csv_config["header"] is None: + for row in csv_reader: + text_list.append(", ".join(row)) + else: + for i, row in enumerate(csv_reader): + if i in self._csv_config["header"]: + headers.append(row) + elif i >= data_line_start_index: + data_lines.append(row) + headers = [tuple(group) for group in zip(*headers)] + for line in data_lines: + if len(line) == len(headers): + data_entry = str(dict(zip(headers, line))) + text_list.append(data_entry) + + metadata = {"filename": file.name, "extension": file.suffix} + if extra_info: + metadata = {**metadata, **extra_info} + + if self._concat_rows: + return [Document(text="\n".join(text_list), metadata=metadata)] + else: + return [Document(text=text, metadata=metadata) for text in text_list] + + +class PaiPandasCSVReader(BaseReader): + r"""Pandas-based CSV parser. + + Parses CSVs using the separator detection from Pandas `read_csv`function. + If special parameters are required, use the `pandas_config` dict. + + Args: + concat_rows (bool): whether to concatenate all rows into one document. + If set to False, a Document will be created for each row. + True by default. + + row_joiner (str): Separator to use for joining each row. + Only used when `concat_rows=True`. + Set to "\n" by default. + + pandas_config (dict): Options for the `pandas.read_csv` function call. + Refer to https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html + for more information. + Set to empty dict by default, this means pandas will try to figure + out the separators, table head, etc. on its own. + + """ + + def __init__( + self, + *args: Any, + concat_rows: bool = True, + row_joiner: str = "\n", + pandas_config: dict = {}, + **kwargs: Any + ) -> None: + """Init params.""" + super().__init__(*args, **kwargs) + self._concat_rows = concat_rows + self._row_joiner = row_joiner + self._pandas_config = pandas_config + + def load_data( + self, + file: Path, + extra_info: Optional[Dict] = None, + fs: Optional[AbstractFileSystem] = None, + ) -> List[Document]: + """Parse csv file.""" + if fs: + with fs.open(file) as f: + df = pd.read_csv(f, **self._pandas_config) + else: + df = pd.read_csv(file, **self._pandas_config) + + text_list = df.apply( + lambda row: str(dict(zip(df.columns, row.astype(str)))), axis=1 + ).tolist() + + if self._concat_rows: + return [ + Document( + text=(self._row_joiner).join(text_list), metadata=extra_info or {} + ) + ] + else: + return [ + Document(text=text, metadata=extra_info or {}) for text in text_list + ] diff --git a/src/pai_rag/integrations/readers/pai_excel_reader.py b/src/pai_rag/integrations/readers/pai_excel_reader.py new file mode 100644 index 00000000..462ad5af --- /dev/null +++ b/src/pai_rag/integrations/readers/pai_excel_reader.py @@ -0,0 +1,77 @@ +"""Tabular parser-Excel parser. + +Contains parsers for tabular data files. + +""" + +from pathlib import Path +from typing import Any, Dict, List, Optional +from fsspec import AbstractFileSystem + +import pandas as pd +from llama_index.core.readers.base import BaseReader +from llama_index.core.schema import Document + + +class PaiPandasExcelReader(BaseReader): + r"""Pandas-based Excel parser. + + + Args: + concat_rows (bool): whether to concatenate all rows into one document. + If set to False, a Document will be created for each row. + True by default. + + row_joiner (str): Separator to use for joining each row. + Only used when `concat_rows=True`. + Set to "\n" by default. + + pandas_config (dict): Options for the `pandas.read_excel` function call. + Refer to https://pandas.pydata.org/docs/reference/api/pandas.read_excel.html + for more information. + Set to empty dict by default, this means pandas will try to figure + out the separators, table head, etc. on its own. + + """ + + def __init__( + self, + *args: Any, + concat_rows: bool = True, + row_joiner: str = "\n", + pandas_config: dict = {}, + **kwargs: Any + ) -> None: + """Init params.""" + super().__init__(*args, **kwargs) + self._concat_rows = concat_rows + self._row_joiner = row_joiner + self._pandas_config = pandas_config + + def load_data( + self, + file: Path, + extra_info: Optional[Dict] = None, + fs: Optional[AbstractFileSystem] = None, + ) -> List[Document]: + """Parse Excel file. only process the first sheet""" + if fs: + with fs.open(file) as f: + df = pd.read_excel(f, sheet_name=0, **self._pandas_config) + else: + df = pd.read_excel(file, sheet_name=0, **self._pandas_config) + + text_list = df.apply( + lambda row: str(dict(zip(df.columns, row.astype(str)))), axis=1 + ).tolist() + + if self._concat_rows: + return [ + Document( + text=(self._row_joiner).join(text_list), metadata=extra_info or {} + ) + ] + else: + return [ + Document(text=text, metadata=extra_info or {}) for text in text_list + ] diff --git a/src/pai_rag/modules/datareader/datareader_factory.py b/src/pai_rag/modules/datareader/datareader_factory.py index abb46105..8c0b0c60 100644 --- a/src/pai_rag/modules/datareader/datareader_factory.py +++ b/src/pai_rag/modules/datareader/datareader_factory.py @@ -4,6 +4,8 @@ from pai_rag.integrations.readers.pai_pdf_reader import PaiPDFReader from pai_rag.integrations.readers.llama_parse_reader import LlamaParseDirectoryReader from pai_rag.integrations.readers.html.html_reader import HtmlReader +from pai_rag.integrations.readers.pai_csv_reader import PaiPandasCSVReader +from pai_rag.integrations.readers.pai_excel_reader import PaiPandasExcelReader from llama_index.readers.database import DatabaseReader from llama_index.core import SimpleDirectoryReader import logging @@ -25,6 +27,15 @@ def _create_new_instance(self, new_params: Dict[str, Any]): enable_image_ocr=self.reader_config.get("enable_image_ocr", False), model_dir=self.reader_config.get("easyocr_model_dir", None), ), + ".csv": PaiPandasCSVReader( + concat_rows=self.reader_config.get("concat_rows", False), + ), + ".xlsx": PaiPandasExcelReader( + concat_rows=self.reader_config.get("concat_rows", False), + ), + ".xls": PaiPandasExcelReader( + concat_rows=self.reader_config.get("concat_rows", False), + ), } return self diff --git a/tests/data_readers/test_csv_reader.py b/tests/data_readers/test_csv_reader.py new file mode 100644 index 00000000..9da1c2b0 --- /dev/null +++ b/tests/data_readers/test_csv_reader.py @@ -0,0 +1,48 @@ +import os +from pathlib import Path +from pai_rag.core.rag_configuration import RagConfiguration +from pai_rag.modules.module_registry import module_registry +from pai_rag.integrations.readers.pai_csv_reader import PaiCSVReader, PaiPandasCSVReader +from llama_index.core import SimpleDirectoryReader + +BASE_DIR = Path(__file__).parent.parent.parent + + +def test_csv_reader(): + config_file = os.path.join(BASE_DIR, "src/pai_rag/config/settings.toml") + config = RagConfiguration.from_file(config_file).get_value() + module_registry.init_modules(config) + reader_config = config["data_reader"] + directory_reader = SimpleDirectoryReader( + input_dir="tests/testdata/data/csv_data", + file_extractor={ + ".csv": PaiCSVReader( + concat_rows=reader_config.get("concat_rows", False), + csv_config={"header": [0, 1]}, + ) + }, + ) + documents = directory_reader.load_data() + for doc in documents: + print(doc) + assert len(documents) > 0 + + +def test_pandas_csv_reader(): + config_file = os.path.join(BASE_DIR, "src/pai_rag/config/settings.toml") + config = RagConfiguration.from_file(config_file).get_value() + module_registry.init_modules(config) + reader_config = config["data_reader"] + directory_reader = SimpleDirectoryReader( + input_dir="tests/testdata/data/csv_data", + file_extractor={ + ".csv": PaiPandasCSVReader( + concat_rows=reader_config.get("concat_rows", False), + pandas_config={"header": [0, 1]}, + ) + }, + ) + documents = directory_reader.load_data() + for doc in documents: + print(doc) + assert len(documents) > 0 diff --git a/tests/data_readers/test_excel_reader.py b/tests/data_readers/test_excel_reader.py new file mode 100644 index 00000000..b572735f --- /dev/null +++ b/tests/data_readers/test_excel_reader.py @@ -0,0 +1,32 @@ +import os +from pathlib import Path +from pai_rag.core.rag_configuration import RagConfiguration +from pai_rag.modules.module_registry import module_registry +from pai_rag.integrations.readers.pai_excel_reader import PaiPandasExcelReader +from llama_index.core import SimpleDirectoryReader + +BASE_DIR = Path(__file__).parent.parent.parent + + +def test_pandas_excel_reader(): + config_file = os.path.join(BASE_DIR, "src/pai_rag/config/settings.toml") + config = RagConfiguration.from_file(config_file).get_value() + module_registry.init_modules(config) + reader_config = config["data_reader"] + directory_reader = SimpleDirectoryReader( + input_dir="tests/testdata/data/excel_data", + file_extractor={ + ".xlsx": PaiPandasExcelReader( + concat_rows=reader_config.get("concat_rows", False), + pandas_config={"header": [0, 1]}, + ), + ".xls": PaiPandasExcelReader( + concat_rows=reader_config.get("concat_rows", False), + pandas_config={"header": [0, 1]}, + ), + }, + ) + documents = directory_reader.load_data() + for doc in documents: + print(doc) + assert len(documents) > 0 From 56ae8e08868517d67b6c66d5ac58e79fd971c720 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=87=83=E5=A4=8F?= Date: Tue, 4 Jun 2024 18:08:03 +0800 Subject: [PATCH 2/6] tabular reader --- poetry.lock | 244 ++++++++++++++++++++++++++++------------------------ 1 file changed, 130 insertions(+), 114 deletions(-) diff --git a/poetry.lock b/poetry.lock index 9bfd357d..4e338697 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1282,13 +1282,13 @@ files = [ [[package]] name = "duckduckgo-search" -version = "6.1.4" +version = "6.1.5" description = "Search for words, documents, images, news, maps and text translation using the DuckDuckGo.com search engine." optional = false python-versions = ">=3.8" files = [ - {file = "duckduckgo_search-6.1.4-py3-none-any.whl", hash = "sha256:2248def55da5982a5d055a22972e880646de6b6e5f77d1e4fdb971aed151bd05"}, - {file = "duckduckgo_search-6.1.4.tar.gz", hash = "sha256:4b7baf2aebf05302bbb6791df05a968c4c5a3835e5f3f422b5048fed230d2699"}, + {file = "duckduckgo_search-6.1.5-py3-none-any.whl", hash = "sha256:f0a18fe5f20323ba6bb11865ce32d4520bb90086a6ae62f5da510865f5a7dca8"}, + {file = "duckduckgo_search-6.1.5.tar.gz", hash = "sha256:10e5c4d09a4243fd9d85007dc4fe637456c3c3995fd2e1e9b49ffd6f75bb0afb"}, ] [package.dependencies] @@ -1828,17 +1828,17 @@ tool = ["click (>=6.0.0)"] [[package]] name = "googleapis-common-protos" -version = "1.63.0" +version = "1.63.1" description = "Common protobufs used in Google APIs" optional = false python-versions = ">=3.7" files = [ - {file = "googleapis-common-protos-1.63.0.tar.gz", hash = "sha256:17ad01b11d5f1d0171c06d3ba5c04c54474e883b66b949722b4938ee2694ef4e"}, - {file = "googleapis_common_protos-1.63.0-py2.py3-none-any.whl", hash = "sha256:ae45f75702f7c08b541f750854a678bd8f534a1a6bace6afe975f1d0a82d6632"}, + {file = "googleapis-common-protos-1.63.1.tar.gz", hash = "sha256:c6442f7a0a6b2a80369457d79e6672bb7dcbaab88e0848302497e3ec80780a6a"}, + {file = "googleapis_common_protos-1.63.1-py2.py3-none-any.whl", hash = "sha256:0e1c2cdfcbc354b76e4a211a35ea35d6926a835cba1377073c4861db904a1877"}, ] [package.dependencies] -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0.dev0" +protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0" [package.extras] grpc = ["grpcio (>=1.44.0,<2.0.0.dev0)"] @@ -3963,13 +3963,13 @@ sympy = "*" [[package]] name = "openai" -version = "1.30.5" +version = "1.31.0" description = "The official Python library for the openai API" optional = false python-versions = ">=3.7.1" files = [ - {file = "openai-1.30.5-py3-none-any.whl", hash = "sha256:2ad95e926de0d2e09cde632a9204b0a6dca4a03c2cdcc84329b01f355784355a"}, - {file = "openai-1.30.5.tar.gz", hash = "sha256:5366562eb2c5917e6116ae0391b7ae6e3acd62b0ae3f565ada32b35d8fcfa106"}, + {file = "openai-1.31.0-py3-none-any.whl", hash = "sha256:82044ee3122113f2a468a1f308a8882324d09556ba5348687c535d3655ee331c"}, + {file = "openai-1.31.0.tar.gz", hash = "sha256:54ae0625b005d6a3b895db2b8438dae1059cffff0cd262a26e9015c13a29ab06"}, ] [package.dependencies] @@ -3986,48 +3986,48 @@ datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] [[package]] name = "opencv-python" -version = "4.9.0.80" +version = "4.10.0.82" description = "Wrapper package for OpenCV python bindings." optional = false python-versions = ">=3.6" files = [ - {file = "opencv-python-4.9.0.80.tar.gz", hash = "sha256:1a9f0e6267de3a1a1db0c54213d022c7c8b5b9ca4b580e80bdc58516c922c9e1"}, - {file = "opencv_python-4.9.0.80-cp37-abi3-macosx_10_16_x86_64.whl", hash = "sha256:7e5f7aa4486651a6ebfa8ed4b594b65bd2d2f41beeb4241a3e4b1b85acbbbadb"}, - {file = "opencv_python-4.9.0.80-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:71dfb9555ccccdd77305fc3dcca5897fbf0cf28b297c51ee55e079c065d812a3"}, - {file = "opencv_python-4.9.0.80-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b34a52e9da36dda8c151c6394aed602e4b17fa041df0b9f5b93ae10b0fcca2a"}, - {file = "opencv_python-4.9.0.80-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4088cab82b66a3b37ffc452976b14a3c599269c247895ae9ceb4066d8188a57"}, - {file = "opencv_python-4.9.0.80-cp37-abi3-win32.whl", hash = "sha256:dcf000c36dd1651118a2462257e3a9e76db789a78432e1f303c7bac54f63ef6c"}, - {file = "opencv_python-4.9.0.80-cp37-abi3-win_amd64.whl", hash = "sha256:3f16f08e02b2a2da44259c7cc712e779eff1dd8b55fdb0323e8cab09548086c0"}, + {file = "opencv-python-4.10.0.82.tar.gz", hash = "sha256:dbc021eaa310c4145c47cd648cb973db69bb5780d6e635386cd53d3ea76bd2d5"}, + {file = "opencv_python-4.10.0.82-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:5f78652339957ec24b80a782becfb32f822d2008a865512121fad8c3ce233e9a"}, + {file = "opencv_python-4.10.0.82-cp37-abi3-macosx_12_0_x86_64.whl", hash = "sha256:e6be19a0615aa8c4e0d34e0c7b133e26e386f4b7e9b557b69479104ab2c133ec"}, + {file = "opencv_python-4.10.0.82-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b49e530f7fd86f671514b39ffacdf5d14ceb073bc79d0de46bbb6b0cad78eaf"}, + {file = "opencv_python-4.10.0.82-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:955c5ce8ac90c9e4636ad7f5c0d9c75b80abbe347182cfd09b0e3eec6e50472c"}, + {file = "opencv_python-4.10.0.82-cp37-abi3-win32.whl", hash = "sha256:ff54adc9e4daaf438e669664af08bec4a268c7b7356079338b8e4fae03810f2c"}, + {file = "opencv_python-4.10.0.82-cp37-abi3-win_amd64.whl", hash = "sha256:2e3c2557b176f1e528417520a52c0600a92c1bb1c359f3df8e6411ab4293f065"}, ] [package.dependencies] numpy = [ - {version = ">=1.23.5", markers = "python_version >= \"3.11\""}, {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""}, {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""}, + {version = ">=1.23.5", markers = "python_version >= \"3.11\""}, ] [[package]] name = "opencv-python-headless" -version = "4.9.0.80" +version = "4.10.0.82" description = "Wrapper package for OpenCV python bindings." optional = false python-versions = ">=3.6" files = [ - {file = "opencv-python-headless-4.9.0.80.tar.gz", hash = "sha256:71a4cd8cf7c37122901d8e81295db7fb188730e33a0e40039a4e59c1030b0958"}, - {file = "opencv_python_headless-4.9.0.80-cp37-abi3-macosx_10_16_x86_64.whl", hash = "sha256:2ea8a2edc4db87841991b2fbab55fc07b97ecb602e0f47d5d485bd75cee17c1a"}, - {file = "opencv_python_headless-4.9.0.80-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:e0ee54e27be493e8f7850847edae3128e18b540dac1d7b2e4001b8944e11e1c6"}, - {file = "opencv_python_headless-4.9.0.80-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:57ce2865e8fec431c6f97a81e9faaf23fa5be61011d0a75ccf47a3c0d65fa73d"}, - {file = "opencv_python_headless-4.9.0.80-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:976656362d68d9f40a5c66f83901430538002465f7db59142784f3893918f3df"}, - {file = "opencv_python_headless-4.9.0.80-cp37-abi3-win32.whl", hash = "sha256:11e3849d83e6651d4e7699aadda9ec7ed7c38957cbbcb99db074f2a2d2de9670"}, - {file = "opencv_python_headless-4.9.0.80-cp37-abi3-win_amd64.whl", hash = "sha256:a8056c2cb37cd65dfcdf4153ca16f7362afcf3a50d600d6bb69c660fc61ee29c"}, + {file = "opencv-python-headless-4.10.0.82.tar.gz", hash = "sha256:de9e742c1b9540816fbd115b0b03841d41ed0c65566b0d7a5371f98b131b7e6d"}, + {file = "opencv_python_headless-4.10.0.82-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:a09ed50ba21cc5bf5d436cb0e784ad09c692d6b1d1454252772f6c8f2c7b4088"}, + {file = "opencv_python_headless-4.10.0.82-cp37-abi3-macosx_12_0_x86_64.whl", hash = "sha256:977a5fd21e1fe0d3d2134887db4441f8725abeae95150126302f31fcd9f548fa"}, + {file = "opencv_python_headless-4.10.0.82-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db4ec6755838b0be12510bfc9ffb014779c612418f11f4f7e6f505c36124a3aa"}, + {file = "opencv_python_headless-4.10.0.82-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10a37fa5276967ecf6eb297295b16b28b7a2eb3b568ca0ee469fb1a5954de298"}, + {file = "opencv_python_headless-4.10.0.82-cp37-abi3-win32.whl", hash = "sha256:94736e9b322d13db4768fd35588ad5e8995e78e207263076bfbee18aac835ad5"}, + {file = "opencv_python_headless-4.10.0.82-cp37-abi3-win_amd64.whl", hash = "sha256:c1822fa23d1641c0249ed5eb906f4c385f7959ff1bd601a776d56b0c18914af4"}, ] [package.dependencies] numpy = [ - {version = ">=1.23.5", markers = "python_version >= \"3.11\""}, {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""}, {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""}, + {version = ">=1.23.5", markers = "python_version >= \"3.11\""}, ] [[package]] @@ -4394,8 +4394,8 @@ files = [ [package.dependencies] numpy = [ - {version = ">=1.23.2", markers = "python_version == \"3.11\""}, {version = ">=1.22.4", markers = "python_version < \"3.11\""}, + {version = ">=1.23.2", markers = "python_version == \"3.11\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -4956,18 +4956,18 @@ files = [ [[package]] name = "pydantic" -version = "2.7.2" +version = "2.7.3" description = "Data validation using Python type hints" optional = false python-versions = ">=3.8" files = [ - {file = "pydantic-2.7.2-py3-none-any.whl", hash = "sha256:834ab954175f94e6e68258537dc49402c4a5e9d0409b9f1b86b7e934a8372de7"}, - {file = "pydantic-2.7.2.tar.gz", hash = "sha256:71b2945998f9c9b7919a45bde9a50397b289937d215ae141c1d0903ba7149fd7"}, + {file = "pydantic-2.7.3-py3-none-any.whl", hash = "sha256:ea91b002777bf643bb20dd717c028ec43216b24a6001a280f83877fd2655d0b4"}, + {file = "pydantic-2.7.3.tar.gz", hash = "sha256:c46c76a40bb1296728d7a8b99aa73dd70a48c3510111ff290034f860c99c419e"}, ] [package.dependencies] annotated-types = ">=0.4.0" -pydantic-core = "2.18.3" +pydantic-core = "2.18.4" typing-extensions = ">=4.6.1" [package.extras] @@ -4975,90 +4975,90 @@ email = ["email-validator (>=2.0.0)"] [[package]] name = "pydantic-core" -version = "2.18.3" +version = "2.18.4" description = "Core functionality for Pydantic validation and serialization" optional = false python-versions = ">=3.8" files = [ - {file = "pydantic_core-2.18.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:744697428fcdec6be5670460b578161d1ffe34743a5c15656be7ea82b008197c"}, - {file = "pydantic_core-2.18.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:37b40c05ced1ba4218b14986fe6f283d22e1ae2ff4c8e28881a70fb81fbfcda7"}, - {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:544a9a75622357076efb6b311983ff190fbfb3c12fc3a853122b34d3d358126c"}, - {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e2e253af04ceaebde8eb201eb3f3e3e7e390f2d275a88300d6a1959d710539e2"}, - {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:855ec66589c68aa367d989da5c4755bb74ee92ccad4fdb6af942c3612c067e34"}, - {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3d3e42bb54e7e9d72c13ce112e02eb1b3b55681ee948d748842171201a03a98a"}, - {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c6ac9ffccc9d2e69d9fba841441d4259cb668ac180e51b30d3632cd7abca2b9b"}, - {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c56eca1686539fa0c9bda992e7bd6a37583f20083c37590413381acfc5f192d6"}, - {file = "pydantic_core-2.18.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:17954d784bf8abfc0ec2a633108207ebc4fa2df1a0e4c0c3ccbaa9bb01d2c426"}, - {file = "pydantic_core-2.18.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:98ed737567d8f2ecd54f7c8d4f8572ca7c7921ede93a2e52939416170d357812"}, - {file = "pydantic_core-2.18.3-cp310-none-win32.whl", hash = "sha256:9f9e04afebd3ed8c15d67a564ed0a34b54e52136c6d40d14c5547b238390e779"}, - {file = "pydantic_core-2.18.3-cp310-none-win_amd64.whl", hash = "sha256:45e4ffbae34f7ae30d0047697e724e534a7ec0a82ef9994b7913a412c21462a0"}, - {file = "pydantic_core-2.18.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:b9ebe8231726c49518b16b237b9fe0d7d361dd221302af511a83d4ada01183ab"}, - {file = "pydantic_core-2.18.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b8e20e15d18bf7dbb453be78a2d858f946f5cdf06c5072453dace00ab652e2b2"}, - {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c0d9ff283cd3459fa0bf9b0256a2b6f01ac1ff9ffb034e24457b9035f75587cb"}, - {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2f7ef5f0ebb77ba24c9970da18b771711edc5feaf00c10b18461e0f5f5949231"}, - {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:73038d66614d2e5cde30435b5afdced2b473b4c77d4ca3a8624dd3e41a9c19be"}, - {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6afd5c867a74c4d314c557b5ea9520183fadfbd1df4c2d6e09fd0d990ce412cd"}, - {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bd7df92f28d351bb9f12470f4c533cf03d1b52ec5a6e5c58c65b183055a60106"}, - {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:80aea0ffeb1049336043d07799eace1c9602519fb3192916ff525b0287b2b1e4"}, - {file = "pydantic_core-2.18.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:aaee40f25bba38132e655ffa3d1998a6d576ba7cf81deff8bfa189fb43fd2bbe"}, - {file = "pydantic_core-2.18.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9128089da8f4fe73f7a91973895ebf2502539d627891a14034e45fb9e707e26d"}, - {file = "pydantic_core-2.18.3-cp311-none-win32.whl", hash = "sha256:fec02527e1e03257aa25b1a4dcbe697b40a22f1229f5d026503e8b7ff6d2eda7"}, - {file = "pydantic_core-2.18.3-cp311-none-win_amd64.whl", hash = "sha256:58ff8631dbab6c7c982e6425da8347108449321f61fe427c52ddfadd66642af7"}, - {file = "pydantic_core-2.18.3-cp311-none-win_arm64.whl", hash = "sha256:3fc1c7f67f34c6c2ef9c213e0f2a351797cda98249d9ca56a70ce4ebcaba45f4"}, - {file = "pydantic_core-2.18.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f0928cde2ae416a2d1ebe6dee324709c6f73e93494d8c7aea92df99aab1fc40f"}, - {file = "pydantic_core-2.18.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0bee9bb305a562f8b9271855afb6ce00223f545de3d68560b3c1649c7c5295e9"}, - {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e862823be114387257dacbfa7d78547165a85d7add33b446ca4f4fae92c7ff5c"}, - {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6a36f78674cbddc165abab0df961b5f96b14461d05feec5e1f78da58808b97e7"}, - {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ba905d184f62e7ddbb7a5a751d8a5c805463511c7b08d1aca4a3e8c11f2e5048"}, - {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7fdd362f6a586e681ff86550b2379e532fee63c52def1c666887956748eaa326"}, - {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24b214b7ee3bd3b865e963dbed0f8bc5375f49449d70e8d407b567af3222aae4"}, - {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:691018785779766127f531674fa82bb368df5b36b461622b12e176c18e119022"}, - {file = "pydantic_core-2.18.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:60e4c625e6f7155d7d0dcac151edf5858102bc61bf959d04469ca6ee4e8381bd"}, - {file = "pydantic_core-2.18.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a4e651e47d981c1b701dcc74ab8fec5a60a5b004650416b4abbef13db23bc7be"}, - {file = "pydantic_core-2.18.3-cp312-none-win32.whl", hash = "sha256:ffecbb5edb7f5ffae13599aec33b735e9e4c7676ca1633c60f2c606beb17efc5"}, - {file = "pydantic_core-2.18.3-cp312-none-win_amd64.whl", hash = "sha256:2c8333f6e934733483c7eddffdb094c143b9463d2af7e6bd85ebcb2d4a1b82c6"}, - {file = "pydantic_core-2.18.3-cp312-none-win_arm64.whl", hash = "sha256:7a20dded653e516a4655f4c98e97ccafb13753987434fe7cf044aa25f5b7d417"}, - {file = "pydantic_core-2.18.3-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:eecf63195be644b0396f972c82598cd15693550f0ff236dcf7ab92e2eb6d3522"}, - {file = "pydantic_core-2.18.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2c44efdd3b6125419c28821590d7ec891c9cb0dff33a7a78d9d5c8b6f66b9702"}, - {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6e59fca51ffbdd1638b3856779342ed69bcecb8484c1d4b8bdb237d0eb5a45e2"}, - {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:70cf099197d6b98953468461d753563b28e73cf1eade2ffe069675d2657ed1d5"}, - {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:63081a49dddc6124754b32a3774331467bfc3d2bd5ff8f10df36a95602560361"}, - {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:370059b7883485c9edb9655355ff46d912f4b03b009d929220d9294c7fd9fd60"}, - {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a64faeedfd8254f05f5cf6fc755023a7e1606af3959cfc1a9285744cc711044"}, - {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:19d2e725de0f90d8671f89e420d36c3dd97639b98145e42fcc0e1f6d492a46dc"}, - {file = "pydantic_core-2.18.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:67bc078025d70ec5aefe6200ef094576c9d86bd36982df1301c758a9fff7d7f4"}, - {file = "pydantic_core-2.18.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:adf952c3f4100e203cbaf8e0c907c835d3e28f9041474e52b651761dc248a3c0"}, - {file = "pydantic_core-2.18.3-cp38-none-win32.whl", hash = "sha256:9a46795b1f3beb167eaee91736d5d17ac3a994bf2215a996aed825a45f897558"}, - {file = "pydantic_core-2.18.3-cp38-none-win_amd64.whl", hash = "sha256:200ad4e3133cb99ed82342a101a5abf3d924722e71cd581cc113fe828f727fbc"}, - {file = "pydantic_core-2.18.3-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:304378b7bf92206036c8ddd83a2ba7b7d1a5b425acafff637172a3aa72ad7083"}, - {file = "pydantic_core-2.18.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c826870b277143e701c9ccf34ebc33ddb4d072612683a044e7cce2d52f6c3fef"}, - {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e201935d282707394f3668380e41ccf25b5794d1b131cdd96b07f615a33ca4b1"}, - {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5560dda746c44b48bf82b3d191d74fe8efc5686a9ef18e69bdabccbbb9ad9442"}, - {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6b32c2a1f8032570842257e4c19288eba9a2bba4712af542327de9a1204faff8"}, - {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:929c24e9dea3990bc8bcd27c5f2d3916c0c86f5511d2caa69e0d5290115344a9"}, - {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1a8376fef60790152564b0eab376b3e23dd6e54f29d84aad46f7b264ecca943"}, - {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dccf3ef1400390ddd1fb55bf0632209d39140552d068ee5ac45553b556780e06"}, - {file = "pydantic_core-2.18.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:41dbdcb0c7252b58fa931fec47937edb422c9cb22528f41cb8963665c372caf6"}, - {file = "pydantic_core-2.18.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:666e45cf071669fde468886654742fa10b0e74cd0fa0430a46ba6056b24fb0af"}, - {file = "pydantic_core-2.18.3-cp39-none-win32.whl", hash = "sha256:f9c08cabff68704a1b4667d33f534d544b8a07b8e5d039c37067fceb18789e78"}, - {file = "pydantic_core-2.18.3-cp39-none-win_amd64.whl", hash = "sha256:4afa5f5973e8572b5c0dcb4e2d4fda7890e7cd63329bd5cc3263a25c92ef0026"}, - {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:77319771a026f7c7d29c6ebc623de889e9563b7087911b46fd06c044a12aa5e9"}, - {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:df11fa992e9f576473038510d66dd305bcd51d7dd508c163a8c8fe148454e059"}, - {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d531076bdfb65af593326ffd567e6ab3da145020dafb9187a1d131064a55f97c"}, - {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d33ce258e4e6e6038f2b9e8b8a631d17d017567db43483314993b3ca345dcbbb"}, - {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1f9cd7f5635b719939019be9bda47ecb56e165e51dd26c9a217a433e3d0d59a9"}, - {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:cd4a032bb65cc132cae1fe3e52877daecc2097965cd3914e44fbd12b00dae7c5"}, - {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:82f2718430098bcdf60402136c845e4126a189959d103900ebabb6774a5d9fdb"}, - {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c0037a92cf0c580ed14e10953cdd26528e8796307bb8bb312dc65f71547df04d"}, - {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b95a0972fac2b1ff3c94629fc9081b16371dad870959f1408cc33b2f78ad347a"}, - {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a62e437d687cc148381bdd5f51e3e81f5b20a735c55f690c5be94e05da2b0d5c"}, - {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b367a73a414bbb08507da102dc2cde0fa7afe57d09b3240ce82a16d608a7679c"}, - {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ecce4b2360aa3f008da3327d652e74a0e743908eac306198b47e1c58b03dd2b"}, - {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bd4435b8d83f0c9561a2a9585b1de78f1abb17cb0cef5f39bf6a4b47d19bafe3"}, - {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:616221a6d473c5b9aa83fa8982745441f6a4a62a66436be9445c65f241b86c94"}, - {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:7e6382ce89a92bc1d0c0c5edd51e931432202b9080dc921d8d003e616402efd1"}, - {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:ff58f379345603d940e461eae474b6bbb6dab66ed9a851ecd3cb3709bf4dcf6a"}, - {file = "pydantic_core-2.18.3.tar.gz", hash = "sha256:432e999088d85c8f36b9a3f769a8e2b57aabd817bbb729a90d1fe7f18f6f1f39"}, + {file = "pydantic_core-2.18.4-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:f76d0ad001edd426b92233d45c746fd08f467d56100fd8f30e9ace4b005266e4"}, + {file = "pydantic_core-2.18.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:59ff3e89f4eaf14050c8022011862df275b552caef8082e37b542b066ce1ff26"}, + {file = "pydantic_core-2.18.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a55b5b16c839df1070bc113c1f7f94a0af4433fcfa1b41799ce7606e5c79ce0a"}, + {file = "pydantic_core-2.18.4-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4d0dcc59664fcb8974b356fe0a18a672d6d7cf9f54746c05f43275fc48636851"}, + {file = "pydantic_core-2.18.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8951eee36c57cd128f779e641e21eb40bc5073eb28b2d23f33eb0ef14ffb3f5d"}, + {file = "pydantic_core-2.18.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4701b19f7e3a06ea655513f7938de6f108123bf7c86bbebb1196eb9bd35cf724"}, + {file = "pydantic_core-2.18.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e00a3f196329e08e43d99b79b286d60ce46bed10f2280d25a1718399457e06be"}, + {file = "pydantic_core-2.18.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:97736815b9cc893b2b7f663628e63f436018b75f44854c8027040e05230eeddb"}, + {file = "pydantic_core-2.18.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:6891a2ae0e8692679c07728819b6e2b822fb30ca7445f67bbf6509b25a96332c"}, + {file = "pydantic_core-2.18.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bc4ff9805858bd54d1a20efff925ccd89c9d2e7cf4986144b30802bf78091c3e"}, + {file = "pydantic_core-2.18.4-cp310-none-win32.whl", hash = "sha256:1b4de2e51bbcb61fdebd0ab86ef28062704f62c82bbf4addc4e37fa4b00b7cbc"}, + {file = "pydantic_core-2.18.4-cp310-none-win_amd64.whl", hash = "sha256:6a750aec7bf431517a9fd78cb93c97b9b0c496090fee84a47a0d23668976b4b0"}, + {file = "pydantic_core-2.18.4-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:942ba11e7dfb66dc70f9ae66b33452f51ac7bb90676da39a7345e99ffb55402d"}, + {file = "pydantic_core-2.18.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b2ebef0e0b4454320274f5e83a41844c63438fdc874ea40a8b5b4ecb7693f1c4"}, + {file = "pydantic_core-2.18.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a642295cd0c8df1b86fc3dced1d067874c353a188dc8e0f744626d49e9aa51c4"}, + {file = "pydantic_core-2.18.4-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5f09baa656c904807e832cf9cce799c6460c450c4ad80803517032da0cd062e2"}, + {file = "pydantic_core-2.18.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:98906207f29bc2c459ff64fa007afd10a8c8ac080f7e4d5beff4c97086a3dabd"}, + {file = "pydantic_core-2.18.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:19894b95aacfa98e7cb093cd7881a0c76f55731efad31073db4521e2b6ff5b7d"}, + {file = "pydantic_core-2.18.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0fbbdc827fe5e42e4d196c746b890b3d72876bdbf160b0eafe9f0334525119c8"}, + {file = "pydantic_core-2.18.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f85d05aa0918283cf29a30b547b4df2fbb56b45b135f9e35b6807cb28bc47951"}, + {file = "pydantic_core-2.18.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e85637bc8fe81ddb73fda9e56bab24560bdddfa98aa64f87aaa4e4b6730c23d2"}, + {file = "pydantic_core-2.18.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:2f5966897e5461f818e136b8451d0551a2e77259eb0f73a837027b47dc95dab9"}, + {file = "pydantic_core-2.18.4-cp311-none-win32.whl", hash = "sha256:44c7486a4228413c317952e9d89598bcdfb06399735e49e0f8df643e1ccd0558"}, + {file = "pydantic_core-2.18.4-cp311-none-win_amd64.whl", hash = "sha256:8a7164fe2005d03c64fd3b85649891cd4953a8de53107940bf272500ba8a788b"}, + {file = "pydantic_core-2.18.4-cp311-none-win_arm64.whl", hash = "sha256:4e99bc050fe65c450344421017f98298a97cefc18c53bb2f7b3531eb39bc7805"}, + {file = "pydantic_core-2.18.4-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:6f5c4d41b2771c730ea1c34e458e781b18cc668d194958e0112455fff4e402b2"}, + {file = "pydantic_core-2.18.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2fdf2156aa3d017fddf8aea5adfba9f777db1d6022d392b682d2a8329e087cef"}, + {file = "pydantic_core-2.18.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4748321b5078216070b151d5271ef3e7cc905ab170bbfd27d5c83ee3ec436695"}, + {file = "pydantic_core-2.18.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:847a35c4d58721c5dc3dba599878ebbdfd96784f3fb8bb2c356e123bdcd73f34"}, + {file = "pydantic_core-2.18.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3c40d4eaad41f78e3bbda31b89edc46a3f3dc6e171bf0ecf097ff7a0ffff7cb1"}, + {file = "pydantic_core-2.18.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:21a5e440dbe315ab9825fcd459b8814bb92b27c974cbc23c3e8baa2b76890077"}, + {file = "pydantic_core-2.18.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01dd777215e2aa86dfd664daed5957704b769e726626393438f9c87690ce78c3"}, + {file = "pydantic_core-2.18.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4b06beb3b3f1479d32befd1f3079cc47b34fa2da62457cdf6c963393340b56e9"}, + {file = "pydantic_core-2.18.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:564d7922e4b13a16b98772441879fcdcbe82ff50daa622d681dd682175ea918c"}, + {file = "pydantic_core-2.18.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:0eb2a4f660fcd8e2b1c90ad566db2b98d7f3f4717c64fe0a83e0adb39766d5b8"}, + {file = "pydantic_core-2.18.4-cp312-none-win32.whl", hash = "sha256:8b8bab4c97248095ae0c4455b5a1cd1cdd96e4e4769306ab19dda135ea4cdb07"}, + {file = "pydantic_core-2.18.4-cp312-none-win_amd64.whl", hash = "sha256:14601cdb733d741b8958224030e2bfe21a4a881fb3dd6fbb21f071cabd48fa0a"}, + {file = "pydantic_core-2.18.4-cp312-none-win_arm64.whl", hash = "sha256:c1322d7dd74713dcc157a2b7898a564ab091ca6c58302d5c7b4c07296e3fd00f"}, + {file = "pydantic_core-2.18.4-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:823be1deb01793da05ecb0484d6c9e20baebb39bd42b5d72636ae9cf8350dbd2"}, + {file = "pydantic_core-2.18.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ebef0dd9bf9b812bf75bda96743f2a6c5734a02092ae7f721c048d156d5fabae"}, + {file = "pydantic_core-2.18.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ae1d6df168efb88d7d522664693607b80b4080be6750c913eefb77e34c12c71a"}, + {file = "pydantic_core-2.18.4-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f9899c94762343f2cc2fc64c13e7cae4c3cc65cdfc87dd810a31654c9b7358cc"}, + {file = "pydantic_core-2.18.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:99457f184ad90235cfe8461c4d70ab7dd2680e28821c29eca00252ba90308c78"}, + {file = "pydantic_core-2.18.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:18f469a3d2a2fdafe99296a87e8a4c37748b5080a26b806a707f25a902c040a8"}, + {file = "pydantic_core-2.18.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b7cdf28938ac6b8b49ae5e92f2735056a7ba99c9b110a474473fd71185c1af5d"}, + {file = "pydantic_core-2.18.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:938cb21650855054dc54dfd9120a851c974f95450f00683399006aa6e8abb057"}, + {file = "pydantic_core-2.18.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:44cd83ab6a51da80fb5adbd9560e26018e2ac7826f9626bc06ca3dc074cd198b"}, + {file = "pydantic_core-2.18.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:972658f4a72d02b8abfa2581d92d59f59897d2e9f7e708fdabe922f9087773af"}, + {file = "pydantic_core-2.18.4-cp38-none-win32.whl", hash = "sha256:1d886dc848e60cb7666f771e406acae54ab279b9f1e4143babc9c2258213daa2"}, + {file = "pydantic_core-2.18.4-cp38-none-win_amd64.whl", hash = "sha256:bb4462bd43c2460774914b8525f79b00f8f407c945d50881568f294c1d9b4443"}, + {file = "pydantic_core-2.18.4-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:44a688331d4a4e2129140a8118479443bd6f1905231138971372fcde37e43528"}, + {file = "pydantic_core-2.18.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a2fdd81edd64342c85ac7cf2753ccae0b79bf2dfa063785503cb85a7d3593223"}, + {file = "pydantic_core-2.18.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:86110d7e1907ab36691f80b33eb2da87d780f4739ae773e5fc83fb272f88825f"}, + {file = "pydantic_core-2.18.4-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:46387e38bd641b3ee5ce247563b60c5ca098da9c56c75c157a05eaa0933ed154"}, + {file = "pydantic_core-2.18.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:123c3cec203e3f5ac7b000bd82235f1a3eced8665b63d18be751f115588fea30"}, + {file = "pydantic_core-2.18.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dc1803ac5c32ec324c5261c7209e8f8ce88e83254c4e1aebdc8b0a39f9ddb443"}, + {file = "pydantic_core-2.18.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:53db086f9f6ab2b4061958d9c276d1dbe3690e8dd727d6abf2321d6cce37fa94"}, + {file = "pydantic_core-2.18.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:abc267fa9837245cc28ea6929f19fa335f3dc330a35d2e45509b6566dc18be23"}, + {file = "pydantic_core-2.18.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a0d829524aaefdebccb869eed855e2d04c21d2d7479b6cada7ace5448416597b"}, + {file = "pydantic_core-2.18.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:509daade3b8649f80d4e5ff21aa5673e4ebe58590b25fe42fac5f0f52c6f034a"}, + {file = "pydantic_core-2.18.4-cp39-none-win32.whl", hash = "sha256:ca26a1e73c48cfc54c4a76ff78df3727b9d9f4ccc8dbee4ae3f73306a591676d"}, + {file = "pydantic_core-2.18.4-cp39-none-win_amd64.whl", hash = "sha256:c67598100338d5d985db1b3d21f3619ef392e185e71b8d52bceacc4a7771ea7e"}, + {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:574d92eac874f7f4db0ca653514d823a0d22e2354359d0759e3f6a406db5d55d"}, + {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:1f4d26ceb5eb9eed4af91bebeae4b06c3fb28966ca3a8fb765208cf6b51102ab"}, + {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77450e6d20016ec41f43ca4a6c63e9fdde03f0ae3fe90e7c27bdbeaece8b1ed4"}, + {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d323a01da91851a4f17bf592faf46149c9169d68430b3146dcba2bb5e5719abc"}, + {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:43d447dd2ae072a0065389092a231283f62d960030ecd27565672bd40746c507"}, + {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:578e24f761f3b425834f297b9935e1ce2e30f51400964ce4801002435a1b41ef"}, + {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:81b5efb2f126454586d0f40c4d834010979cb80785173d1586df845a632e4e6d"}, + {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ab86ce7c8f9bea87b9d12c7f0af71102acbf5ecbc66c17796cff45dae54ef9a5"}, + {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:90afc12421df2b1b4dcc975f814e21bc1754640d502a2fbcc6d41e77af5ec312"}, + {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:51991a89639a912c17bef4b45c87bd83593aee0437d8102556af4885811d59f5"}, + {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:293afe532740370aba8c060882f7d26cfd00c94cae32fd2e212a3a6e3b7bc15e"}, + {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b48ece5bde2e768197a2d0f6e925f9d7e3e826f0ad2271120f8144a9db18d5c8"}, + {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:eae237477a873ab46e8dd748e515c72c0c804fb380fbe6c85533c7de51f23a8f"}, + {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:834b5230b5dfc0c1ec37b2fda433b271cbbc0e507560b5d1588e2cc1148cf1ce"}, + {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:e858ac0a25074ba4bce653f9b5d0a85b7456eaddadc0ce82d3878c22489fa4ee"}, + {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2fd41f6eff4c20778d717af1cc50eca52f5afe7805ee530a4fbd0bae284f16e9"}, + {file = "pydantic_core-2.18.4.tar.gz", hash = "sha256:ec3beeada09ff865c344ff3bc2f427f5e6c26401cc6113d77e372c3fdac73864"}, ] [package.dependencies] @@ -7601,6 +7601,22 @@ files = [ {file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d"}, ] +[[package]] +name = "xlrd" +version = "2.0.1" +description = "Library for developers to extract data from Microsoft Excel (tm) .xls spreadsheet files" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +files = [ + {file = "xlrd-2.0.1-py2.py3-none-any.whl", hash = "sha256:6a33ee89877bd9abc1158129f6e94be74e2679636b8a205b43b85206c3f0bbdd"}, + {file = "xlrd-2.0.1.tar.gz", hash = "sha256:f72f148f54442c6b056bf931dbc34f986fd0c3b0b6b5a58d013c9aef274d0c88"}, +] + +[package.extras] +build = ["twine", "wheel"] +docs = ["sphinx"] +test = ["pytest", "pytest-cov"] + [[package]] name = "xxhash" version = "3.4.1" @@ -7839,4 +7855,4 @@ test = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-it [metadata] lock-version = "2.0" python-versions = ">=3.10.0,<3.12" -content-hash = "5c891b42861023d77d87bf4027b2b3e53fe7eaad67d81c893f7f5be7e45a8e69" +content-hash = "efc14eedd786e12d84883e0b2dd2319a4b538a1a88891d995cd4c3fde14da2c3" From d057c2516cbf8d54791750090883cbf3ef15890b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=87=83=E5=A4=8F?= Date: Tue, 4 Jun 2024 19:22:34 +0800 Subject: [PATCH 3/6] tabular reader --- ...\345\255\230\347\216\207_csv_two_header.csv" | 9 +++++++++ ...5\255\230\347\216\207_excel_one_header.xlsx" | Bin 0 -> 9065 bytes ...45\255\230\347\216\207_excel_two_header.xls" | Bin 0 -> 25600 bytes 3 files changed, 9 insertions(+) create mode 100644 "tests/testdata/data/csv_data/30\345\244\251\347\225\231\345\255\230\347\216\207_csv_two_header.csv" create mode 100644 "tests/testdata/data/excel_data/30\345\244\251\347\225\231\345\255\230\347\216\207_excel_one_header.xlsx" create mode 100644 "tests/testdata/data/excel_data/30\345\244\251\347\225\231\345\255\230\347\216\207_excel_two_header.xls" diff --git "a/tests/testdata/data/csv_data/30\345\244\251\347\225\231\345\255\230\347\216\207_csv_two_header.csv" "b/tests/testdata/data/csv_data/30\345\244\251\347\225\231\345\255\230\347\216\207_csv_two_header.csv" new file mode 100644 index 00000000..b0585488 --- /dev/null +++ "b/tests/testdata/data/csv_data/30\345\244\251\347\225\231\345\255\230\347\216\207_csv_two_header.csv" @@ -0,0 +1,9 @@ +time,metric +date,rate +20240101,0.9375 +20240201,0.9744 +20240301,0.9767 +20240401,0.9375 +20240501,0.9091 +20240601,0.9474 +20240701,0.9667 \ No newline at end of file diff --git "a/tests/testdata/data/excel_data/30\345\244\251\347\225\231\345\255\230\347\216\207_excel_one_header.xlsx" "b/tests/testdata/data/excel_data/30\345\244\251\347\225\231\345\255\230\347\216\207_excel_one_header.xlsx" new file mode 100644 index 0000000000000000000000000000000000000000..7f4e5079e2ad5edca300dfa01c107a3e0905ca49 GIT binary patch literal 9065 zcmeHNg*;u&hIa{cg?f+dggid_w5zmyWX|lZ)>TdpppR40hj;)fB|5+muqW^1OU831pw{= zFp-VqT%0{Y&YosZ{aisFCOp1Qj`s^uk=gPA$cX3vSN@AfpfYt-0|q2iIDdRCx6ZAy zRQCu|Xg6$-m_u8tyDw#++{y@IXLkpQz9N(@Ahj2HM?A3z%ORz{6w3|J!bHZ9g-(AQzN zeA426`C7HpgRNQMgSi#*GBDy*2R?j!1--3Pgic+Ms_t#fNx9*`D+kW5K;uCFF1Cc& z6RpcuVNzQ&ZX?K}P=>JSd4QHfYy(*pzw<;;pEPNa+UP;=efSm2>OGpi6>95NNa8DN z4GKmluFu7T5P3~85st9n(xKsI(QeIME1E#(k|~UJRE3K>k~5;s*5htj!r^WP=Ag*k z&qJFJ19m1ij9%;=rf-7zZ_xpOyE_zs)?Z-Rs0(B|LByIG0(CeDSem(m96fk>e%}8F z$Nyps{;BJg$?BRgAa2C|gtDqWGLPFBo(6{~FA+4x7co=AyB^RGKBanr zq6F>;z8+bGOT=#tF`TUlR>l&OJY<6UR7JeLbo0XCV0BMdbgNwLBl7-m`r#~7NyU%N zyCadKqPeUtcBV5)hdeBwFH{E(IWnC7pwiW`6|<|ggt&_{`k7ffb|1@Ucip=Q zl49(RSX>rhsDyDEh8A}?;r`4N_~Bd7?D;4 zNT>jqNWPA|zvINm#ofWu#l_(#d;J?TNC*l;c=_)xl~2@F`+$TUIJc2NpKNbZq6JT0 z#vR>l0?gqC_C;0(f#8!BYK~SD{W(=$6qg9U{jovsb0PdSlzXQ=oMkb@sQ&jH_aCCU zj2(}mU|Q~0(S<3Y-6P)F+OF8e!O!xz*CLfX#04P{{d&R1LK#_9!YF^zG94jkKMwTb zxA&N&9YMs*Xv2EQWw_2xRaiq))BP-Vn~l2`xy3W|yf8odsbfkkWeGe#z|{9y2F{Bf zg)_{f_G=q;zj242qD$NjnhI6Aav{mU2+#YGLrU~u_GFKZ!pt_^{Z&uds}c6D#UrlK zlYN`@;lU3ph?)L-$mC|cQOiRC0ICqX9|FV^h>-bnuvF@*yL=HQ>>&RRCiH#wcqEw% zN1H6&x&SFz;j?eb83~Y>mS`$cRr}ow^O^v5|D~C|#*fg~g{8OI!nSt`k#!r)*@dq4 zLe@{1BNJV0mbI>G^gtF%t_xAE*o1JVq0hR8&kI|zC$kMK{fBHxD){wgNeL#oT_!7D z@OI6c%^h9VRoCDF%CZ0(D%^o_*@E9#9L~1TYRo~Vqsit1cZnO0*!Uol&F>+Yi@ z?ULJ7U0qdggOEkeEecMoL>#b6{=3T*^UVeA;>RHPtVbipq_6sA=3T0}N?y46sRVi( z!-D=1b%94Shy_kw=ia2hheE-U7q2K|n5!3vxr{TF!!3eCyECECGch+UG;evBl_su( z#*aN$rp!t*fh-tq(Xp<{YTbruq}|O?9PW!r@3;}j7#}xbU{pql*%FB%_mS2h0g7r7 zhtm)0CrR8=3B?x)4yyhtzQa}{vpSFpUc??QsC{JinX`fGbOGei`_#Fi4wEo=CgfOj zgPj)@7NjjG`YuO0_-6HdMQZKJtn=!)^!$7u+HwBfIsuE>z{ls%;?EML>iNCW89=b@9XB zerahHe^VTd_MImN&US(+1e^3w%brgV!>*dA8BAAFCQg9M%oFt{Qh14Q0=NB(w#rwZ zu0TnuUEw}_iV-KLlt;iK6Z|VDcFreSU3=nkqC-@6IGsAprkb)-Mc{3#^&A zArO(e`;sW5aXa%_qA+C%EZSVU8_>aB;?=vEt`)Gm;AoDrPzqN^1i^>Y3Awot$Q(Y> z%AmE}AkHv=Ei8n1ri2Mu+skPtIwe#9HBCKdJ+#sreA~M?41+t5KgCI_h=o#LwWcPowD|JgY4K!Kr{wd-m{vW z8y!faWBOptIz4!}RC@E-TKRQ@ z0%N4FPy5wa;PF^o(Tt*NLvgE(%D}wzA+V5NM=S*tXJ%GBPEz!VF!NqDbNpe#+=Zau zC3E>$!px{pF*;IsrB%CdNmiS4Zq?iSr_tqQ;UX`m!zkvrrmHyBl}#5_!`ec>sT<2= z^wIVjTc0Bl$#=n@YLv(;;2QCbzpw7^eQbRk`-DSXk4Dz%nr~R^i>7A?Rfo^C;r;~? z*G#$IOEDzlXQXJO-{G#$_ujKC5MjNdk6e$eCSO`x{;tFrZ=&Axek3xbOlx|UO^4df zQeq2F`DvG&QLoH0piMcIy;_hzpO|^LiA))FS7Ryghv)HWb|a}guyeZ--Y7IBR0d~= zA2`#h@B-@shuVc&g=2Wqi?$cQM*?rkMs?$6W*xxihb!(QbVK#UMkr?F;c&@^zfrAoe zt5>}{Q-*!}Y=olhka>iXJ&k!u1==;m8=|2o*3}?+ThjazPV(4Up5wg&omB>z-p&ni z)X6A*%_1|=YGrDe3L9NH-l!wDO>MDBhaj3f@Z_T(y(n}uu-R&sgdpm(&jpg}wa_~h zDMsCXA9m|r;n|xX$S|2gm*O9<@U62223Bfnz=5}zMWv;Wa}?vy@_aZ0meunaWrs%_lRtR(o9K`taH zB{8K~Pthf9CXvlOG1HytANy8-ZfY{~Wo+@igbA`ykoYn3{tnC>cn8yW?qbqnzV$Wr zpjd8wu!sTnYv}i_-RyN~!|tUUkiubIKAt?!@m&8g?kig|?#YjxD|qr^!@6CRV8)d& z;9}@-x21C>2-ERkbqOdVmJ(2PhK*NspV;y4Q7@cxl}>tkSo0r2Q&@XPV2S|D*!`d9 z{oe@C(-!0e;{EN;_Y*oh21c>eA|%1QC(^jbdl$lAMu}Yxg%+(vunZm)CmI@^u~kq~ zDlf-lg(gd-$rijyZzoYPyVO#_;TCKlxZ#B-PmU&jPqNubnscwtPELoLkR@K{78Q9U zNac#PUX15v_$y`L^d_g9b!r_+=ETisFwdleJw(|A12rF~#u0(AoPXeOUHd>+g0~qz zB@-pQ!#Ikbm*tjDc8BV6#Sq~#xrL6oBCn`ZBqoI`NpUBLiKBP$aqFoR8}cbKA9kSj z=bwGDmQ|d;x|!ky2c8}@5lBTyg9F5r@_OZ?N$kz#e{^Oc8HG_jl;YwH*N@`aSSBju zrZ1tc|H9D86qLxwuo|0gHZ2|Oc4bNA1d@H~{7PsY{qjM|DC$5YspNw^x=L2OFko5Z zvrD0n5ijGe<(g9m&LFA~MJ9ba``D^_(@k{BIY2QzZEL`(X`|yR-pvQ0?8So1#sjL< zErPj&7c%7SwyQ#H{gDnAwKF>>jyF`PYYq+<{%_$pTj{}gF>K2CA18}z^>%rlwS}os zz}nzet>|?N0*Gr6JrO{L_4gR=bf z-S%Pz^T2oOgZuQ-*XxNcbfOJSV&cG6Iby3-lm5YFJTe$E?b#QUTT5aXCXp(#LjGbw zD=!m{uUBXp&c_}oj*FW)CHcYhH59ZR-1jY6)TWcTpUgFp?~}#SW(cP}4-5wLZVi|0OcR(78SQD{((h znjDGF%xc?XMINSe@<8jgOIg`Br5XG+(gU*1*41wMHAE+z-N3!t{V4@UrWWQ%p4_%5 zzIzK+k#Nj@fzmPNXtkZTHNqz(ejhVEd(JaDkXWc;L9ynXOB1YN*^jDAl>NfW zT)r+5jjCoWT)D|Sz--em8MBHtS}amg&O=^AwKjatr&6sPCv}MU~5g}iJ#fI>05%LY#|ZUj6h3VctcM&>+{ zlzu3hvQI?0t7MB_RZ!<_uAxxk&(ThKZ<22s)>;&!xj7e9>eoIvU{_jitYR;h}Tx)V!bp=Lv^?M0z&hA&G3u?aU_$kdo!6=`WhW_w&U_IReEe~Q4{Oe8H!UV;0G zo;nMyZhrP&cIA>vZ@8J_JnQ3u)4(wDRj!9)=~o5=t(lRt{F*J6E!6G;Tk#)>926K! zn?qSybtd~BI{#?2$9&DnBe5$x{ zipB^!Us6UuY9F^5H#iMXaEhkTiStr4f@bk zood(MiheV?O6CQc-VXZs#10hkZz6c@-t(&)v}?d(>g%-^ z8w9v8aW!X!FPyv-MI{gG9!L-6jvIPC;@Vcpy}10k)c(N+qbXFv-xVxc`sBM-Z?2G% z)=9C*D|V-DvKFR#Dk3RA4-Lax_>+13*0fq9Hlf~NhBEYGStiG4IJ>~g^ec;&+3lx$ zJw1#?%`_!VXDH3R`LR2DrinH1mBCS?EF2~>^32XWUVGT-O;F2PYZrZaw%-cVBXJ*x zG@1HOJG0nh_5oU${<(?;3sqj&9fnQI)F-VLMPkk00+gs|8YfM+X*hTxC&tp)BXnA%=dt z@pW@HeZ#^6dN%_vTlSZM7Gb8oI&|hnCV1G9i@g)G*X@*Jv=iQB`3f^yG|W|nc>_By z{JpW&_tvktb6LAAY(BEMm|KCj(A6qQhh!6&vc30}u(=@=!q)P6uSkh6nJZwK!^xIC z5I$%tODRqT?!TqWgPk0cCow-)G%{&RdOghkZS+VdnRo8d@9pZDx;odDHI*0^8uF1^{dqZ$tz)P0IN?3;|P%+DFt;M<%>vc2F>I&u4@ zzkTgCysi9IsQ4g(HdF;@@c!!7Z^N~AWngIPh#b=#KQIons9I9B$_L*2T$>I_GG z>X!C>nZR&JFnu11<0V}%U)dWnZ0=>>4J!KnJ5JJBPu?ZXAYbH8r<}Z8{{zI%>Fy3O zU3YhZGe+1ifP`q1RgcLgCE}F1`6j31qcCsmV~N^sY*eNw1***4JJx~sa5se$jvMo? zC`tw^vcwA%(jYgI8%-%he7J-VhMU0bhe__O^0e#3FK9aBRTs1z#ha70d!9( zQdTs4!f7tVciN?DE*LC!$#PQ&$eAob^}G0=OFYU@?7ifub?H-^;J_Yo)&|M>nqerM3JzfOc|i$L zkB0SL&Llw}=)<6k9R=rE8BW&-9Q<9enKTDi-eXkxQSkXWAH_X=?Z7jv z+V4IiL4FddB(I#Z_sP37B@xwr2q*d_{;cd_=jX%0aaT`8b?;r2hZ5;4``$!4lBAW8oUT9@2# zK=y5>oZMpQ2!saPnXU8Xx;KunvT`B3v%{|K!w3lj@37dp&qN2a*$#*{FzlUd1u~0v z9I}?0M*$S=Q|0SbA?(Vcm>EM(wn-!|!_?R0Vh)%y=#V()Zc!xR17;AG19pQ)H3Nka z6h+x-opvNPO*mN`EM&B#b*9l2G9wDH!b))8V%rO`5Fnl+8eWKUlv0DEh{Cj9pl)z! z6rzFyXesQ#+~3opb+CPGuhj;db%dY45N$oziyMGC6$x)!U0h=Yn@CEW7e&Z=wONLY zPxqy{1J!!SnvKSlLZy)ev7Y{%OXU8NBxGLClur z$L2~{hU3A@j0yLo0+E7-V56w)vB(sUBo<_|FdRCZcZ1A4C)>~LN#$abxJu%<13Bc? zXL;VJl?(>aZ{3#jxtKnZ$QhzpVM=5H&#!rkKRoRtG8eyOEq*or zRT%xlxDEGr0{xFn`m5=$^3Wfq2Z(eD;=R9$MZY@ubwm4y12Fl28veKK?XO;bCH+6V z6j1%%gI~7*zk2v9$^Kyv0C*rs`EQi_tNCA-#y^{1)Bef)Ukjy{DjH&T0RU{oF9gBr JGxvY~`#+s}y-olC literal 0 HcmV?d00001 diff --git "a/tests/testdata/data/excel_data/30\345\244\251\347\225\231\345\255\230\347\216\207_excel_two_header.xls" "b/tests/testdata/data/excel_data/30\345\244\251\347\225\231\345\255\230\347\216\207_excel_two_header.xls" new file mode 100644 index 0000000000000000000000000000000000000000..039af979200fd9e3bc86df554c09d57665353835 GIT binary patch literal 25600 zcmeHQcU%+6+nRIp9yw9^C$tKw#{K_Bi`)2se?#?qa-{+ZUpP8L`b~aqSV(>6$ zuUQq53;slnd=qLB4H}&V*Y4u9K7o9p5R>Aw8(afP75*1#K#L1EQbV2m6kMu#lCX&e zmyjwr@8MvIkZOowWD;pfNWZaB@e>m!Mo-}dM@Ns167U8k2;zDFr4T^L5XnLrPJrS0 z!4->S1J!_-Qj)cxj)v56H&yN!b@U@GNDBL@+KIS#ex0B;fqFyPU8r(PsG~J?)Pp07 z9HQ#np(yQ%5qS;exg-Nko%}-BtUj}j&;C!+3jkM`{O{uPNDN5mC6SX$$;l%@WGop& zV#pMNGCQ+aQ05q_P^K*$CG@+X52o^{69Wk^S)CXuq5CSK`!mv6gwG)7Ymlb{Bb@-9 zNrx<=rKG$zV|g}d#Yks?4Jhgj^`IqJ-jdUrr~?YI&a<|0wy|-KaP$dt5GTSMEjevq z3~eowKRi@0I$XApHPIv%(n7B8vN(`ND0>jgTE3JVT76M((vEl#5`m($A zgfK^+dV;l!HOL*|&?N%6xDrSBgR1m;?RfM`?RYXZ_{5oXmSrYtfh<2tG#X1jU&_>r zIDySdnTqR6_=+o2tm!q)K>IK{h1rT58Rk%bjViMuCd7=MTdyu!5grUE=8_^J#u5yQ zttH0}KzMRxWRa+X524p#crS!eRT56{RCH9ve_jtPLen!_gs%ylDE;G5i-EI&(!U~o zw-S1~68c&t^mR(;+mz6ED4{EpXQh(z+m+Cj;VUb@R|(F4Tb?8Je6>XP>yzkqiR}a? zA|m=bS-O^(euIR-rIon6wk+LPOm8No^9V^XQJ~A=>x#=u>}Mw>r;Y+$bbo=7NX{E1 zk`RB1Jen{lQg$fUE=?Gjndk;$oD~KJtD3E9MmVDTZYZHgM<2I3ZbdkUD0yJaW7Ln4 z5_$_U{RSBcmoO-b`->B!Jeu$jV3Jc$j3d#H$gZ#_QsqH@k(?s=o73o`c8Tl?o|&TZ z5}ZG%b}7;iD4~n=pjdvtlJX)uS1d2K!+LqL4GfH%8H4^s{T)|8)Hs6%i2=r$M6MA@ z6sIX7pZM$^)-s9fLXu?YR0X07&jmHO#AzkqzX6!HK_s)WlgGpeY;5&uyZ%5 zg1@8!wJd?f(0BR+Ok$hH!Lz9qoSIs}xv3Rk&fc^(*vM4C4olY(D~a1xZkVx!B5IOR zL%sr=TR;9_PZmw{c|1iByT*VB`3-}#Z48Kqr!0v4sA&%%QnONMj-u$WUa4C`%WL&n(p2w{I1M;`8|m zLeXhrp^#QD6A=qVrzr~MA|uq_NlMhsBotjM|DLAgdm}7lY1XJ9l$ecEnC>8ksAHmC!M&2T0mWAMK{1ihU@H{n zKjKyhg(V8D*tc(=g3-`v;#MFHu&E$68ahpJj)jhDX)DkUWK^5P9VFKu+d&16WO!9* z1?nA}M<3Ks?_!#`KPrw}-cUV|@`Jy8@98810tsfF;3QMIPH;8#-R20Q8wSCp!goPRf*J+^x8Cbem+X&A(`F(4&D4TFH=;QNwzcs2~;BLe~nW}V<_NV!hXgU|?_Gzg{ys=aP{SZ?8Uy0tNe5ZZHU&qiHeq?d z`&&E0BQ6lTOpl+-1D|^qw--bJtq&}YrBEB*9VnJW;aNn7XmSM66;*GA>UfQh8<+;z zp*I8h7%=nkVBllT$OjjE^?cL?(O>?&#muJ#GapX|K5ZHK;5xCMkA@)n<;$1Md<>cS zbYS4qj**XOK|twEQxGkDTf)r8h?!4E20pHgd~kYR-yW_Y`d;lvWx(ydDFj|DRyCVR7G zS+HxEx?MKm)X9y1>% zdvj;xL$@~%&vmCBGxKS~%!kR|IxzB~+na~y)~idI_;84Iin=h1+Zu*V7!T_MOI=;4 zMrKdb2)!{5Nz2(<#0FNHEX|nJtFyRHvW3-}3)uv7a4||fOJS+kePAs0EM+Zc>k?a7 zQBrk9&uKro`uai+RUdQFcI7eHnnVvC)&LrR1{#5gc2Y#NSb0PP38KhKl^_cFxreLj z5!*@;(Kh7~TSyQ^_NW9=)Zy5ajP_DQv_dzzRvStX(ee~E2eS}O;s6sB)~oO=F4`B~ z3Q=IO%!MrSDqP&t)zO0i?mUzVuhHjMKa&Bqr4YEoayN}d;HGsNaVA~DT6O}2Z_R*p z(lABVX23dW7@ak`BgL%Y<)N6h7Q7(@qv;jrGg9GUsb&iNF$cX3lrri3HE1`S1%4Ol z?biU+a)?t32byFis}pq&K-w>1jX)h?Dd!YTD8-X2r3s>9so^ljf&!_sS}>CYNplvh znw78m7?IS{6jvrsOD46P-JH0B5?Ci< zYQ_v2(5=63Ocr;loD{8qpERE?3wq8d4MJ0J$DWKV=q}TsG>Ec@Qes&|DX}b~6w2a; zvOG$^m}x(9zQB1*78fVil%}Wyi`^0UN%O@PByMcI;pu{Yq1#6<>N`+`BG&=f&7cE_ zbLki$r`Up;!-xQ~d|QzhCDsdHP8Ji8MVc=jK2R3PeL?#{0{SUM_T!PF29{wd3#G0G z0(BeEFIXdV#Y$``+K^|829&gwE2$yWl$4aUqYh|C>C(M$?(pRX<3LhRvr_$#`%bguvA_G7)m6CFomIy~s#v+ieja3FPIWPHcAL{MJ{;&?{o3hdKA|fBBF|das8Q1R&t|s zR?>2G7qdx7pc=;r=y5YEH9O`I@AFkwo>z?qW-3E{3Y#>ef* zy!7K~cl{n8dw-c%KUl*?3v$^M&p3hA@abb_51d||o!@u&+W;rqTJv7vc@MmUD|QY}pJQg?8=2R9^}E79jyZVGPwA7s zHZ*V6<4I$J_W3Q%h_}kveEsiE>`NV|wN3e&qE|Y#|6P;X^vq6=io2E9s2$v6G1Tu> z&F9s&)fokae6JsO^zFu-xBo=ejTc)!_bfIJaJ_iM{=MsFr}UiCVW$RuHr?}dgrMWK zj$7{Y-Zop@;`AP!^oT2Cc&09okNd1oe^GgBSlqFDOZP2J^cqp}<4K{>$oKrGhGiwm z!;?WHLYNiH>MQS7{*x1+Coi~XxDO&CUJ#ey=8DJEWmjTPyX(xaODmdpEvf(Y(!pUFQ^DU7P>27^tEFB>wSAv zb{J)woS!*4qTFirdeetqL1piaL(9EK?pn5`Sh)YRd&&CXlgYcCYkF`dc&`W=`{$^f zEpCqn6lH7?Y`bB3^}ylJx4uGC6dO8j=1Svv(7=Td@$JZz7{Pdf8|~OHazf-7K|ETB zPDwxQ(>(nyRY^J1Gb+5Jjym)hweQldqaR4OY_qQCH(uXRSzhyKp7-SkMmN_5>JD?x z*3?}nD0N+M=yjrVzWJ6ry<4Akj;lJ~^6|47CdCCy3?KBlkkCKL=4M9P&I9*&w+=Yn zY*XiaZ^RMXSieQRhCD6%=vdvO)~T&FyA!kUzxETntc2cj$chl`4K%SuO=yo|zZiCJ zoOv((WVd(8on95~9bIeVl{xQyOZzviGPAPI|5{Kud|cg!+u;@0hWB3IHz%Zb;0WzJ zw-TmQEabb}xg1<_?d8|#$A>%UDmjcdpa+v;cu!fbEAOlO7wVC3mt- zdhzx1^FxOo_z54qdGh+n#LIC74kiopLYgIA^ts4B$muxEy>yKWU8Iv|WZuM^2@qII+)W-*{EPF(fxw=<3bkhc7C`$ZD;h>*k|P%GqPqBq`0nd z8n>ikg6D;dYvb(`i^5`u49UNkU^Keh%J7S4GS0>i+tuS~#v3nUlwsXxRPc{2e7h(0 zeyowZ`F#4JZgcfpxOHuBxF>9=OF^@7uOq&$d}nXbzhF*go1HUQhI{vH{xl=^N+0WS z-z=^^H_s^FJ>oatnKR%1xoUZSaPays9S-aqG%UW-K`-dY^ashGKYA}KIzIc`hkN^X zhnD>{BeMFzmroVzB9fCg{Cs=ztA*te$7lTY_H6Bwh+|)?)^z*FxR;;deLDl=`uI9&eZbN(nlx8XD{n_*DrNx*g6!$D!3HaAvI zoMts+YR~Ya6RK^@wU(*p>b(sQa`C<&5S4Dd>Z0qyEze$C*mi4Gd^z=Fzp%jG3#Ozv z{`HfZb82AphSRf`Y*@~#9}IJMwb64JVfIRsGrY>bNdNBq>e$eWhTZR(CiHTi7#UqNaZJE`?$K=} z#>;j*FFc#>>GS;2iM}r_7CUzCskXT2pF(g@*~3x=5y@IVP2EN zN1n%S+MlV_7CFI5oWxD01(E%W zpA87kwqUhwlQ?j^p3cz2DK;*TCb#9=dAvw-WbPe{vu z6thbA`tr-8`u`UD`)`AIYj(G~SkXB*M_CYkn^Ex|+Yklil}&udPm$S9(Xy z^yhK?f9SK}+1#?$w(l;xrkyREcXn#tY~yX}#$8+%X(!fvc(~~2QnyuIIH?ca_0Ih@ zXW;;|=fkEiSNs|xVS|MV4cR_h#%e(}K@LnCX zt`%%rebzUvHgx^9YeUvFU%dU<`ja`o1pdBcPpd5&<2v7b{`;`<-ak0MEX;4_c*V4b zXWg2?gZhV;g`ah9w%^PB$)!adXU@A>ayF!_L~p~&SD&+v`fs0_kPvXo?nKoZVe9aZ zI=Uw}t~B4j#qr!C!O)v8o^8A56K(FBc4yey)c)_CJP#zyzVvfyS!?GJTmAxt-8yxB zlGl*f;9Bn|kB5)H<&#={E!A;)P~O#?)akccpX_qunrENEUIkAd^WkBq_$=4Gma*X= zIJ)43BTmKEw}w!OFD>dqT#seg9V_2S$1 zfALaxmowLOe&io(#vWbuTd`r3$Nly(+swF~pZz*{?aA?nUo7nsYW!n*h0o|cBj+xi z=>4+gn9M+jer;_-9bDV5j@PsAyFB~q!hxStc6HjcZ1A7fn||+l;po-gAEzd5e6x;~ zc-FRPl>V%n#B^oITH*e$oy^k5xLFy8f9u~hp(0J)A@^L#Lz{Kmf;bn47Z#p*AZa>_ z?E*VTl+AFV7%ec&cS8bY0pzEA(%SBzPuc`fzM}jTN?Oqr=9UU`5}Nwuk>uwyQlwi< zNYv~`9z)&;q!=~u%9Son111p?sE&IZV4*Z(Uq7!V-rKOSI8!h zA3@R4??~WFEIiJoC@Ite2a*u%z^_9{7uFoeISGp&g5O<{c~t3C>ev=aM^oW3A3s(T zUpq)pp7I@O*E#Sxn;Ia3#}xixhZQWWJgHE|7oVFS&*ruz*>XAn#**NRz5*%?0x3W{ zY~06;-|At&ws6_FM+&dIpi*Hy*_@(bLY{`@Q$pZ0dG<6}Oy$e$b>A<73o5~RG%P7LRDt>Ysqw$7jQ94U*6?eizM zB1N?{s9Mef)9RtEUQ(#oR(}#5CCW!vus@j;p?TS~|0^mV+wD(2j27i{sr;JAoLe30 zUh&Ie*>M;ex)Ih+t>Kynf1;y5y9GH&^`S0MA47*aLKZ=9StMgA9(5LA^J@5V1ic7q z@Taa`2cw!gxXK6_!)*l-tBV436m);7brX%(>A>Q92}#+ zJ(EkE?DtpT=Fbv{cthQX+Wg`l?GHAP-#G=f&Wh$XlVV;DrF(XWQ<{}71y0{}xI;-_+>A>zw?TzjLxZ3;wO_a{Qc*T<<4(I2oFq7K9j z5b6E^^d(>rtDh;SKpwfU!11b%uH1PjZ1Gp;21v9R>as7?kByd{=E}rq7KCbm~ zzY(szam|kFcidfr>waA08$iT&{W5afcH#H8Uozo$vF6+2;~?jCO7&F zdz~cwQHI7=wDc2|REJNL@S~;i@KF=Cy$@(1nvw}AQ{Y3Ui9k^v9DqOeA?!bm e*m)qRlkUJFijqM<-GQdb)9GJmKWx8|@c$o>eF$Ly literal 0 HcmV?d00001 From 4cd41767f89eec89ee452d79ab3360a2248fffed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=87=83=E5=A4=8F?= Date: Wed, 5 Jun 2024 13:54:33 +0800 Subject: [PATCH 4/6] tabular reader --- src/pai_rag/docs/tabular_doc.md | 82 ++++++++++++++++++ .../integrations/readers/pai_csv_reader.py | 8 +- tests/data_readers/test_csv_reader.py | 4 +- tests/data_readers/test_excel_reader.py | 2 +- ...\255\230\347\216\207_excel_two_header.xls" | Bin 25600 -> 0 bytes ...255\230\347\216\207_excel_two_header.xlsx" | Bin 9065 -> 9100 bytes 6 files changed, 90 insertions(+), 6 deletions(-) create mode 100644 src/pai_rag/docs/tabular_doc.md delete mode 100644 "tests/testdata/data/excel_data/30\345\244\251\347\225\231\345\255\230\347\216\207_excel_two_header.xls" rename "tests/testdata/data/excel_data/30\345\244\251\347\225\231\345\255\230\347\216\207_excel_one_header.xlsx" => "tests/testdata/data/excel_data/30\345\244\251\347\225\231\345\255\230\347\216\207_excel_two_header.xlsx" (64%) diff --git a/src/pai_rag/docs/tabular_doc.md b/src/pai_rag/docs/tabular_doc.md new file mode 100644 index 00000000..142179f5 --- /dev/null +++ b/src/pai_rag/docs/tabular_doc.md @@ -0,0 +1,82 @@ +# Tabular processing with PAI-RAG + +## PaiCSVReader + +PaiCSVReader(concat_rows=True, row_joiner="\n", csv_config={}) + +### Parameters: + +**concat_rows:** _bool, default=True._ +Whether to concatenate rows into one document. + +**row_joiner:** _str, default="\n"._ +The separator used to join rows. + +**csv_config:** _dict, default={}._ +The configuration of csv reader +Set to empty dict by default, this means pandas will try to figure out the separators, table head, etc. on its own. + +#### one important parameter: + +**header:** _None or int, list of int, default 0._ +Row (0-indexed) to use for the column labels of the parsed DataFrame. If a list of integers is passed those row +positions will be combined into a MultiIndex. Use None if there is no header. + +### Functions: + +load_data(file: Path, extra_info: Optional[Dict] = None, fs: Optional[AbstractFileSystem] = None) + +## PaiPandasCSVReader + +PaiPandasCSVReader(concat_rows=True, row_joiner="\n", pandas_config={}) + +### Parameters: + +**concat_rows:** _bool, default=True._ +Whether to concatenate rows into one document. + +**row_joiner:** _str, default="\n"._ +The separator used to join rows. + +**pandas_config:** _dict, default={}._ +The configuration of pandas.read_csv. +Refer to https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html for more information. +Set to empty dict by default, this means pandas will try to figure out the separators, table head, etc. on its own. + +#### one important parameter: + +**header:** _None or int, list of int, default 0._ +Row (0-indexed) to use for the column labels of the parsed DataFrame. If a list of integers is passed those row +positions will be combined into a MultiIndex. Use None if there is no header. + +### Functions: + +load_data(file: Path, extra_info: Optional[Dict] = None, fs: Optional[AbstractFileSystem] = None) + +## PaiPandasExcelReader + +PaiPandasExcelReader(concat_rows=True, row_joiner="\n", pandas_config={}) + +### Parameters: + +**concat_rows:** _bool, default=True._ +Whether to concatenate rows into one document. + +**row_joiner:** _str, default="\n"._ +The separator used to join rows. + +**pandas_config:** _dict, default={}._ +The configuration of pandas.read_csv. +Refer to https://pandas.pydata.org/docs/reference/api/pandas.read_excel.html for more information. +Set to empty dict by default, this means pandas will try to figure out the separators, table head, etc. on its own. + +#### one important parameter: + +**header:** _None or int, list of int, default 0._ +Row (0-indexed) to use for the column labels of the parsed DataFrame. If a list of integers is passed those row +positions will be combined into a MultiIndex. Use None if there is no header. + +### Functions: + +load_data(file: Path, extra_info: Optional[Dict] = None, fs: Optional[AbstractFileSystem] = None) +only process the first sheet diff --git a/src/pai_rag/integrations/readers/pai_csv_reader.py b/src/pai_rag/integrations/readers/pai_csv_reader.py index 70b76d7c..67e9093a 100644 --- a/src/pai_rag/integrations/readers/pai_csv_reader.py +++ b/src/pai_rag/integrations/readers/pai_csv_reader.py @@ -20,9 +20,11 @@ class PaiCSVReader(BaseReader): concat_rows (bool): whether to concatenate all rows into one document. If set to False, a Document will be created for each row. True by default. - csv_config (dict): Options for the reader.Set to empty dict by default, - this means reader will try to figure - out the separators, table head, etc. on its own. + csv_config (dict): Options for the reader.Set to empty dict by default. + one important parameter: + "header": None or int, list of int, default 0. + Row (0-indexed) to use for the column labels of the parsed DataFrame. If a list of integers is passed those row + positions will be combined into a MultiIndex. Use None if there is no header. """ diff --git a/tests/data_readers/test_csv_reader.py b/tests/data_readers/test_csv_reader.py index 9da1c2b0..58a365ec 100644 --- a/tests/data_readers/test_csv_reader.py +++ b/tests/data_readers/test_csv_reader.py @@ -25,7 +25,7 @@ def test_csv_reader(): documents = directory_reader.load_data() for doc in documents: print(doc) - assert len(documents) > 0 + assert len(documents) == 7 def test_pandas_csv_reader(): @@ -45,4 +45,4 @@ def test_pandas_csv_reader(): documents = directory_reader.load_data() for doc in documents: print(doc) - assert len(documents) > 0 + assert len(documents) == 7 diff --git a/tests/data_readers/test_excel_reader.py b/tests/data_readers/test_excel_reader.py index b572735f..c98aad42 100644 --- a/tests/data_readers/test_excel_reader.py +++ b/tests/data_readers/test_excel_reader.py @@ -29,4 +29,4 @@ def test_pandas_excel_reader(): documents = directory_reader.load_data() for doc in documents: print(doc) - assert len(documents) > 0 + assert len(documents) == 7 diff --git "a/tests/testdata/data/excel_data/30\345\244\251\347\225\231\345\255\230\347\216\207_excel_two_header.xls" "b/tests/testdata/data/excel_data/30\345\244\251\347\225\231\345\255\230\347\216\207_excel_two_header.xls" deleted file mode 100644 index 039af979200fd9e3bc86df554c09d57665353835..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 25600 zcmeHQcU%+6+nRIp9yw9^C$tKw#{K_Bi`)2se?#?qa-{+ZUpP8L`b~aqSV(>6$ zuUQq53;slnd=qLB4H}&V*Y4u9K7o9p5R>Aw8(afP75*1#K#L1EQbV2m6kMu#lCX&e zmyjwr@8MvIkZOowWD;pfNWZaB@e>m!Mo-}dM@Ns167U8k2;zDFr4T^L5XnLrPJrS0 z!4->S1J!_-Qj)cxj)v56H&yN!b@U@GNDBL@+KIS#ex0B;fqFyPU8r(PsG~J?)Pp07 z9HQ#np(yQ%5qS;exg-Nko%}-BtUj}j&;C!+3jkM`{O{uPNDN5mC6SX$$;l%@WGop& zV#pMNGCQ+aQ05q_P^K*$CG@+X52o^{69Wk^S)CXuq5CSK`!mv6gwG)7Ymlb{Bb@-9 zNrx<=rKG$zV|g}d#Yks?4Jhgj^`IqJ-jdUrr~?YI&a<|0wy|-KaP$dt5GTSMEjevq z3~eowKRi@0I$XApHPIv%(n7B8vN(`ND0>jgTE3JVT76M((vEl#5`m($A zgfK^+dV;l!HOL*|&?N%6xDrSBgR1m;?RfM`?RYXZ_{5oXmSrYtfh<2tG#X1jU&_>r zIDySdnTqR6_=+o2tm!q)K>IK{h1rT58Rk%bjViMuCd7=MTdyu!5grUE=8_^J#u5yQ zttH0}KzMRxWRa+X524p#crS!eRT56{RCH9ve_jtPLen!_gs%ylDE;G5i-EI&(!U~o zw-S1~68c&t^mR(;+mz6ED4{EpXQh(z+m+Cj;VUb@R|(F4Tb?8Je6>XP>yzkqiR}a? zA|m=bS-O^(euIR-rIon6wk+LPOm8No^9V^XQJ~A=>x#=u>}Mw>r;Y+$bbo=7NX{E1 zk`RB1Jen{lQg$fUE=?Gjndk;$oD~KJtD3E9MmVDTZYZHgM<2I3ZbdkUD0yJaW7Ln4 z5_$_U{RSBcmoO-b`->B!Jeu$jV3Jc$j3d#H$gZ#_QsqH@k(?s=o73o`c8Tl?o|&TZ z5}ZG%b}7;iD4~n=pjdvtlJX)uS1d2K!+LqL4GfH%8H4^s{T)|8)Hs6%i2=r$M6MA@ z6sIX7pZM$^)-s9fLXu?YR0X07&jmHO#AzkqzX6!HK_s)WlgGpeY;5&uyZ%5 zg1@8!wJd?f(0BR+Ok$hH!Lz9qoSIs}xv3Rk&fc^(*vM4C4olY(D~a1xZkVx!B5IOR zL%sr=TR;9_PZmw{c|1iByT*VB`3-}#Z48Kqr!0v4sA&%%QnONMj-u$WUa4C`%WL&n(p2w{I1M;`8|m zLeXhrp^#QD6A=qVrzr~MA|uq_NlMhsBotjM|DLAgdm}7lY1XJ9l$ecEnC>8ksAHmC!M&2T0mWAMK{1ihU@H{n zKjKyhg(V8D*tc(=g3-`v;#MFHu&E$68ahpJj)jhDX)DkUWK^5P9VFKu+d&16WO!9* z1?nA}M<3Ks?_!#`KPrw}-cUV|@`Jy8@98810tsfF;3QMIPH;8#-R20Q8wSCp!goPRf*J+^x8Cbem+X&A(`F(4&D4TFH=;QNwzcs2~;BLe~nW}V<_NV!hXgU|?_Gzg{ys=aP{SZ?8Uy0tNe5ZZHU&qiHeq?d z`&&E0BQ6lTOpl+-1D|^qw--bJtq&}YrBEB*9VnJW;aNn7XmSM66;*GA>UfQh8<+;z zp*I8h7%=nkVBllT$OjjE^?cL?(O>?&#muJ#GapX|K5ZHK;5xCMkA@)n<;$1Md<>cS zbYS4qj**XOK|twEQxGkDTf)r8h?!4E20pHgd~kYR-yW_Y`d;lvWx(ydDFj|DRyCVR7G zS+HxEx?MKm)X9y1>% zdvj;xL$@~%&vmCBGxKS~%!kR|IxzB~+na~y)~idI_;84Iin=h1+Zu*V7!T_MOI=;4 zMrKdb2)!{5Nz2(<#0FNHEX|nJtFyRHvW3-}3)uv7a4||fOJS+kePAs0EM+Zc>k?a7 zQBrk9&uKro`uai+RUdQFcI7eHnnVvC)&LrR1{#5gc2Y#NSb0PP38KhKl^_cFxreLj z5!*@;(Kh7~TSyQ^_NW9=)Zy5ajP_DQv_dzzRvStX(ee~E2eS}O;s6sB)~oO=F4`B~ z3Q=IO%!MrSDqP&t)zO0i?mUzVuhHjMKa&Bqr4YEoayN}d;HGsNaVA~DT6O}2Z_R*p z(lABVX23dW7@ak`BgL%Y<)N6h7Q7(@qv;jrGg9GUsb&iNF$cX3lrri3HE1`S1%4Ol z?biU+a)?t32byFis}pq&K-w>1jX)h?Dd!YTD8-X2r3s>9so^ljf&!_sS}>CYNplvh znw78m7?IS{6jvrsOD46P-JH0B5?Ci< zYQ_v2(5=63Ocr;loD{8qpERE?3wq8d4MJ0J$DWKV=q}TsG>Ec@Qes&|DX}b~6w2a; zvOG$^m}x(9zQB1*78fVil%}Wyi`^0UN%O@PByMcI;pu{Yq1#6<>N`+`BG&=f&7cE_ zbLki$r`Up;!-xQ~d|QzhCDsdHP8Ji8MVc=jK2R3PeL?#{0{SUM_T!PF29{wd3#G0G z0(BeEFIXdV#Y$``+K^|829&gwE2$yWl$4aUqYh|C>C(M$?(pRX<3LhRvr_$#`%bguvA_G7)m6CFomIy~s#v+ieja3FPIWPHcAL{MJ{;&?{o3hdKA|fBBF|das8Q1R&t|s zR?>2G7qdx7pc=;r=y5YEH9O`I@AFkwo>z?qW-3E{3Y#>ef* zy!7K~cl{n8dw-c%KUl*?3v$^M&p3hA@abb_51d||o!@u&+W;rqTJv7vc@MmUD|QY}pJQg?8=2R9^}E79jyZVGPwA7s zHZ*V6<4I$J_W3Q%h_}kveEsiE>`NV|wN3e&qE|Y#|6P;X^vq6=io2E9s2$v6G1Tu> z&F9s&)fokae6JsO^zFu-xBo=ejTc)!_bfIJaJ_iM{=MsFr}UiCVW$RuHr?}dgrMWK zj$7{Y-Zop@;`AP!^oT2Cc&09okNd1oe^GgBSlqFDOZP2J^cqp}<4K{>$oKrGhGiwm z!;?WHLYNiH>MQS7{*x1+Coi~XxDO&CUJ#ey=8DJEWmjTPyX(xaODmdpEvf(Y(!pUFQ^DU7P>27^tEFB>wSAv zb{J)woS!*4qTFirdeetqL1piaL(9EK?pn5`Sh)YRd&&CXlgYcCYkF`dc&`W=`{$^f zEpCqn6lH7?Y`bB3^}ylJx4uGC6dO8j=1Svv(7=Td@$JZz7{Pdf8|~OHazf-7K|ETB zPDwxQ(>(nyRY^J1Gb+5Jjym)hweQldqaR4OY_qQCH(uXRSzhyKp7-SkMmN_5>JD?x z*3?}nD0N+M=yjrVzWJ6ry<4Akj;lJ~^6|47CdCCy3?KBlkkCKL=4M9P&I9*&w+=Yn zY*XiaZ^RMXSieQRhCD6%=vdvO)~T&FyA!kUzxETntc2cj$chl`4K%SuO=yo|zZiCJ zoOv((WVd(8on95~9bIeVl{xQyOZzviGPAPI|5{Kud|cg!+u;@0hWB3IHz%Zb;0WzJ zw-TmQEabb}xg1<_?d8|#$A>%UDmjcdpa+v;cu!fbEAOlO7wVC3mt- zdhzx1^FxOo_z54qdGh+n#LIC74kiopLYgIA^ts4B$muxEy>yKWU8Iv|WZuM^2@qII+)W-*{EPF(fxw=<3bkhc7C`$ZD;h>*k|P%GqPqBq`0nd z8n>ikg6D;dYvb(`i^5`u49UNkU^Keh%J7S4GS0>i+tuS~#v3nUlwsXxRPc{2e7h(0 zeyowZ`F#4JZgcfpxOHuBxF>9=OF^@7uOq&$d}nXbzhF*go1HUQhI{vH{xl=^N+0WS z-z=^^H_s^FJ>oatnKR%1xoUZSaPays9S-aqG%UW-K`-dY^ashGKYA}KIzIc`hkN^X zhnD>{BeMFzmroVzB9fCg{Cs=ztA*te$7lTY_H6Bwh+|)?)^z*FxR;;deLDl=`uI9&eZbN(nlx8XD{n_*DrNx*g6!$D!3HaAvI zoMts+YR~Ya6RK^@wU(*p>b(sQa`C<&5S4Dd>Z0qyEze$C*mi4Gd^z=Fzp%jG3#Ozv z{`HfZb82AphSRf`Y*@~#9}IJMwb64JVfIRsGrY>bNdNBq>e$eWhTZR(CiHTi7#UqNaZJE`?$K=} z#>;j*FFc#>>GS;2iM}r_7CUzCskXT2pF(g@*~3x=5y@IVP2EN zN1n%S+MlV_7CFI5oWxD01(E%W zpA87kwqUhwlQ?j^p3cz2DK;*TCb#9=dAvw-WbPe{vu z6thbA`tr-8`u`UD`)`AIYj(G~SkXB*M_CYkn^Ex|+Yklil}&udPm$S9(Xy z^yhK?f9SK}+1#?$w(l;xrkyREcXn#tY~yX}#$8+%X(!fvc(~~2QnyuIIH?ca_0Ih@ zXW;;|=fkEiSNs|xVS|MV4cR_h#%e(}K@LnCX zt`%%rebzUvHgx^9YeUvFU%dU<`ja`o1pdBcPpd5&<2v7b{`;`<-ak0MEX;4_c*V4b zXWg2?gZhV;g`ah9w%^PB$)!adXU@A>ayF!_L~p~&SD&+v`fs0_kPvXo?nKoZVe9aZ zI=Uw}t~B4j#qr!C!O)v8o^8A56K(FBc4yey)c)_CJP#zyzVvfyS!?GJTmAxt-8yxB zlGl*f;9Bn|kB5)H<&#={E!A;)P~O#?)akccpX_qunrENEUIkAd^WkBq_$=4Gma*X= zIJ)43BTmKEw}w!OFD>dqT#seg9V_2S$1 zfALaxmowLOe&io(#vWbuTd`r3$Nly(+swF~pZz*{?aA?nUo7nsYW!n*h0o|cBj+xi z=>4+gn9M+jer;_-9bDV5j@PsAyFB~q!hxStc6HjcZ1A7fn||+l;po-gAEzd5e6x;~ zc-FRPl>V%n#B^oITH*e$oy^k5xLFy8f9u~hp(0J)A@^L#Lz{Kmf;bn47Z#p*AZa>_ z?E*VTl+AFV7%ec&cS8bY0pzEA(%SBzPuc`fzM}jTN?Oqr=9UU`5}Nwuk>uwyQlwi< zNYv~`9z)&;q!=~u%9Son111p?sE&IZV4*Z(Uq7!V-rKOSI8!h zA3@R4??~WFEIiJoC@Ite2a*u%z^_9{7uFoeISGp&g5O<{c~t3C>ev=aM^oW3A3s(T zUpq)pp7I@O*E#Sxn;Ia3#}xixhZQWWJgHE|7oVFS&*ruz*>XAn#**NRz5*%?0x3W{ zY~06;-|At&ws6_FM+&dIpi*Hy*_@(bLY{`@Q$pZ0dG<6}Oy$e$b>A<73o5~RG%P7LRDt>Ysqw$7jQ94U*6?eizM zB1N?{s9Mef)9RtEUQ(#oR(}#5CCW!vus@j;p?TS~|0^mV+wD(2j27i{sr;JAoLe30 zUh&Ie*>M;ex)Ih+t>Kynf1;y5y9GH&^`S0MA47*aLKZ=9StMgA9(5LA^J@5V1ic7q z@Taa`2cw!gxXK6_!)*l-tBV436m);7brX%(>A>Q92}#+ zJ(EkE?DtpT=Fbv{cthQX+Wg`l?GHAP-#G=f&Wh$XlVV;DrF(XWQ<{}71y0{}xI;-_+>A>zw?TzjLxZ3;wO_a{Qc*T<<4(I2oFq7K9j z5b6E^^d(>rtDh;SKpwfU!11b%uH1PjZ1Gp;21v9R>as7?kByd{=E}rq7KCbm~ zzY(szam|kFcidfr>waA08$iT&{W5afcH#H8Uozo$vF6+2;~?jCO7&F zdz~cwQHI7=wDc2|REJNL@S~;i@KF=Cy$@(1nvw}AQ{Y3Ui9k^v9DqOeA?!bm e*m)qRlkUJFijqM<-GQdb)9GJmKWx8|@c$o>eF$Ly diff --git "a/tests/testdata/data/excel_data/30\345\244\251\347\225\231\345\255\230\347\216\207_excel_one_header.xlsx" "b/tests/testdata/data/excel_data/30\345\244\251\347\225\231\345\255\230\347\216\207_excel_two_header.xlsx" similarity index 64% rename from "tests/testdata/data/excel_data/30\345\244\251\347\225\231\345\255\230\347\216\207_excel_one_header.xlsx" rename to "tests/testdata/data/excel_data/30\345\244\251\347\225\231\345\255\230\347\216\207_excel_two_header.xlsx" index 7f4e5079e2ad5edca300dfa01c107a3e0905ca49..ef7ec96f94bfe4af8ecb542400a2a6a57f8cf5bf 100644 GIT binary patch delta 2270 zcmV<42qE|BMvO_Dx*bRF<-xBjsD``+i&(4wYa{l>uU`Fs>auIe<|l4 zLQR^pk~VwwoK>(gt3^pA48;{{=7Q0z$r+PnflQ1YR8n5)D+EvMf>FgSxV-H^b*)Uj!>g(m^s)fAm&DeV0v-qc z1Vdx?C&ALzQ6*1B-PAL=2heCme|pP*f)OE4La^M9!EQ(v66}Vn9Bx<%=b5aW4VLo& zM(|go5s+4=6%T^VS&EDS7>SK(aYw9%3Nzj;p^yC1J0wArL5o-^+nO3#I*nX?c*Q6 ze?a%Y{`mRlclYmpx&P_YU+>?dN1CPE1@a$MOH&mJ1hH80UA=!97oaV?UYRFSXz+d* z9YJcTtYY|%UjMh&&) zIj^#M;p>hCKHc^s5El#=-P$?NWbW(c5IpMVzGM6<2g1a59aSB|e^kVL{Vt50FmN$3 zg21&63tP4^3QPz>WV!?=!PprD9U=6HQFpTfv#!^wt{P6&ZA;(^SA7>_T@#)0wJ%2h?%jXl*S>)FlZY&EgY)05KkCXtH! zdiTE>o`UxU+tD~V+tDz5c|1Md(e@~wUYsX88jmNF@#|^)5PiP|{bPVomq68avO|5^ z50j4r7PH9=#0m)poUL320{{T1la?1Cf0IjZ;xG`$?xQt z9cMx;{K(qoQLXm7cM=4x#06EdrSaJQ{bxMm#p6jUascP0H3dyZF(qJxt)|JJ zsHBqNWq?wm#r%0|tmBo!yiZBS1vxn!DbDdlX7JxAX(^obwh1F7vvbV1>E|qGTwF=F z`QL>}GUnhQ`vSh9(vO9dJX~4ne;aK4LD=L9>z~Wb_p&bNzj%|cCwV-LzO0fmnvJtO zn$4zVl$TErS-GCBSLyUWT`p>g74EYl4w`~KCG%BI*>W-T&uW3>;d>Rn+v8V|+cuzgg1}b*0096000030|9sP14uUWc1<Z>>a^<+YmCASIe9G+Y7Ad93ZjlihKw1}V|f@@3nDEB_YHvjC(7 zs0#pB5+ECe$Jvh~8Rx`mkP^)m8m=VGda_kNw1(9nC7LfZTuGX92tGRt)&8S?0FxgY zkPdWrDBP<6008m;000yKlfWVte{7CH3c@fDMfZYt2(!1TA|jHc3&9iU0YW?4Kr^YC zsp##Eh`7+@d;fpBqh9}=O32QoBnJ~Pas z%9hwdFEZ}5=C;S^xk@8aNzN%pF9q{O^O(_h+#^aH^s25mI{HAM+BA_^CRl@-C|oDp zf7lrg&WOui&}eZ+zxeICKTbmY=|z+r+V5JI&+ZA6kpmP2qcRb?vxOTr0e@o)Z3ilQ zawyoL_!g4eE!ZX{iFW(;rtMZMQ+!O$xxbuqNxC}JmG=qS2qiNV1tIc4GR1}5WoTQh z{RQ%jrIOQ1NyyL<3|i&aS1D@C9k%&%6&PHVOp{pLi8HNgKkk;TRi14@9g09W4 z1HCeF*1{ee*Xa~@%I0hkNB6;q(b%?a&?bEhmp{UD+VY}IU`I1wMyq3 z!L%}}wBBP`3I^WRfKKtm(QUJ$#%|o=O9A}$_}9>-(Y@3c8=eF1xjm?4D0Ws}lDqq2 zjq*5*XMPy@;j)Ngk|ZR)dFyignD3$w7xgL4zr?v8&WdnBqB)r@C&V)#Pw~Zf$-e-T zkpmR73LcvS1tOh2Tb+}tAyNUkll>t&0q&DHA|@OLoUL320{{T12LJ#V0000000000 z000000DTven-?0B6(bNEbayD+s{jB1@&Et;6aWAK0000000000006m@zal3aqcRb? sQUL$}djbFe5dZ)H0000000000005Sg0V6&FA0CrIBO(U&AOHXW08<_)tN;K2 delta 2249 zcmY+GdpOgN7sqFY*(5Z%EB9$+!d$*|aVwW{DQY2eStgWAE@}A)B{jt8HYT?yxz~u? zW9GUwUs)`aRV2BUZMc0 z;!Ymv%hrRt?Yt9MD3onC6-WMoK+C8|vr?VhYLju*mv!SO*bC3!(C!wM2`CuCrd?_s zPm@vO%J-or^fZI8gRVgy1 z>FdEPw?~3$4-cmpY~na-P97zm2?rb>)~=IrvmCe5BY5se)N?Ur+|hO5{$>u&u^(Ci)=WxAS|y{kZynIbiOAM`VP*@(>H~y%n#dj43EAx8S`Iy zkZW;VWFKs_+(8>L3^q>jYngG;4s3>cxwY>pO^Ji<3gr>i(@DcvuW|+Gi&Qb2H!9c+ z8pMwT@5Xl{GUptxN&=<{SE@c*%p!yI-w*O^Urg1-2t`~6CcZx=$ zU-=KlZU}s&u+G?Asit+it%UnY^J5Rpx*ROURq|Cg@Qa(7{91tBHVH%;_vbm7ko8)wZ~r@I*k&}5 zIw~nNNe{H4!x^Uvzv%i^!Bz?cZ?Ji(HBj6OMl z(Dq&PT09^MjTIjW)A?(nY-8#Ih@2&isHaQ3f=Tw2PRq}OK_Ck;5J(TZxd^j0-VsD=;3b3(mnxUk?wX(-yG!A>RpSDkh=$s?b6yLE%vaIA zs-*|N;q>0@p|g;$pa{0?$#Lb)pDp4tq^HGCimXw+OJb*9djD#q?0UxGh_ZeO*aZ+D|P zqOk*f!6jaJ&16q6YgrD8lO&R$#Z^{Lm9gB;paZh;tRHX-mOy2o+=!8wu&Y2gbX_&U zu)Jg^O#fXRPjOc_u!)i#xo9xqawQJj8kSd(AHTQ*1lrsLZ6zYx3zm$Q5@A*z*~zpd zIp`ZQU#H)IZk)R%-Mn$uk)>c%=^NQHgWe%Ai^*7dUci@sM6Y;YQD~P--LT>{kfjYJ zq^N2eM9N=TkxjNLGcw7_t?5o2*ASbJ^(WhFKqa7dfMu^x@$4^QCg%{M3#Va(p@1+Ys$R99$&s`>ZyK6ay?-`X(!sZ79_oU;8Hwji zROyTTD^EprS>A=yj-yX?PJXxft4^`RcIDc26gFz;TTj+FuqygzG~>YzSSMNvLO!Ah zh%<3O#lznd_mY{4O2CPXLtDshEh!dS-|*Py#*ys35SNEy&_ROmV9D7>MGdCvd%auN zE-fD2bSB5+rtdL2X~Boowwc>DVH@*YC|1awE*&oSnVoQj_=YnIXWgD_(a<*s#40bz z8(uoUOgeElWG}<1JCbcFw|e!ktpj=CZA=+C6LPK>o_0-AX~cS|b!v$|6n@d(dnw8O zmeG2bubd#S%4I@n>yjWpZd;P;%at%ffQ`-g)jp~+>^!pd+Dr@_72WA<{?nF zD3x@+-Ao9xsEHR3fjx_=Qj_8nzN|NlJs7zi@h1Rso;phDk7$1P5r5cLQAmETuA{WICU+qa=<;vBll~8nB5P{w2mewb2WqH` JvUhK>e*rFlC(Zx> From 8929e296486b472b24dcf11f5666989c443bb622 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=87=83=E5=A4=8F?= Date: Wed, 5 Jun 2024 15:31:09 +0800 Subject: [PATCH 5/6] tabular reader --- .../integrations/readers/pai_excel_reader.py | 54 ++++++++++++++++-- ...255\230\347\216\207_excel_two_header.xlsx" | Bin 9100 -> 9117 bytes 2 files changed, 49 insertions(+), 5 deletions(-) diff --git a/src/pai_rag/integrations/readers/pai_excel_reader.py b/src/pai_rag/integrations/readers/pai_excel_reader.py index 462ad5af..c300985a 100644 --- a/src/pai_rag/integrations/readers/pai_excel_reader.py +++ b/src/pai_rag/integrations/readers/pai_excel_reader.py @@ -7,6 +7,7 @@ from pathlib import Path from typing import Any, Dict, List, Optional from fsspec import AbstractFileSystem +from openpyxl import load_workbook import pandas as pd from llama_index.core.readers.base import BaseReader @@ -48,6 +49,52 @@ def __init__( self._row_joiner = row_joiner self._pandas_config = pandas_config + def read_xlsx( + self, + file: Path, + fs: Optional[AbstractFileSystem] = None, + ): + """Parse Excel file。""" + if fs: + with fs.open(file) as f: + excel = pd.ExcelFile(load_workbook(f), engine="openpyxl") + else: + excel = pd.ExcelFile(load_workbook(file), engine="openpyxl") + sheet_name = excel.sheet_names[0] + sheet = excel.book[sheet_name] + df = excel.parse(sheet_name, **self._pandas_config) + + header_max = 0 + if ( + "header" in self._pandas_config + and self._pandas_config["header"] is not None + and isinstance(self._pandas_config["header"], list) + ): + header_max = max(self._pandas_config["header"]) + elif ( + "header" in self._pandas_config + and self._pandas_config["header"] is not None + and isinstance(self._pandas_config["header"], int) + ): + header_max = self._pandas_config["header"] + + for item in sheet.merged_cells: + top_col, top_row, bottom_col, bottom_row = item.bounds + base_value = item.start_cell.value + # Convert 1-based index to 0-based index + top_row -= 1 + top_col -= 1 + # Since the previous lines are set as headers, the coordinates need to be adjusted here. + if ( + "header" in self._pandas_config + and self._pandas_config["header"] is not None + ) or "header" not in self._pandas_config: + top_row -= header_max + 1 + bottom_row -= header_max + 1 + + df.iloc[top_row:bottom_row, top_col:bottom_col] = base_value + return df + def load_data( self, file: Path, @@ -55,11 +102,8 @@ def load_data( fs: Optional[AbstractFileSystem] = None, ) -> List[Document]: """Parse Excel file. only process the first sheet""" - if fs: - with fs.open(file) as f: - df = pd.read_excel(f, sheet_name=0, **self._pandas_config) - else: - df = pd.read_excel(file, sheet_name=0, **self._pandas_config) + + df = self.read_xlsx(file, fs) text_list = df.apply( lambda row: str(dict(zip(df.columns, row.astype(str)))), axis=1 diff --git "a/tests/testdata/data/excel_data/30\345\244\251\347\225\231\345\255\230\347\216\207_excel_two_header.xlsx" "b/tests/testdata/data/excel_data/30\345\244\251\347\225\231\345\255\230\347\216\207_excel_two_header.xlsx" index ef7ec96f94bfe4af8ecb542400a2a6a57f8cf5bf..50f23a76b733415239358cef59c43690f08d6097 100644 GIT binary patch delta 2925 zcmV-z3zGDVN1aEo{Rn^J3Ro|V0{{TP2mk;N0001ZY%h0ja%*C5Z)+}iZEUPnU2mf} z6n$T5{sZEj+U6?+m5v~g-AL^yN;B>5Q{{qFJmn*6Op~gb|324FNZZM3mF;XqY;0d2 zpL_4Q=KS-vu8b!pI^H&sNzSlouqJN{-aJI+*X7g-Orw*uDQJJyHY_rqS!aH}`1ITP zrWKF(ZTn~dK+{ELDdjpusLM-M)9$QYvj$dHt*EJlp?E;uS}sd((yR^HZYfVk%sm(Lp1H0nH@ zJv6PL_Z7IkC7yq=74Z1*Cm0%gI0=@Hj;eVs+OA#6Gk``r(ntOij0kxbg6(k(PD65$ zU{74-@W4v=A7tepu>4mrf`1u}fV8w$LI~am(YymU+q^hm@rvE-q-)Ui`U|a9rYh5@ zXeYCROIAeY2!`#3y_*pII_Y@@tAt=2nCRjl=eaNnwxWN1C6^%N0Uiu}e{6fo5`+;~ zl8J^&Hfb9PI^6s9tD4i;ld=Vc8w>V#&jo|R0)2t6(9%4l_uZVz(&$AMndoZ=&ZC^= ztl|{SpYxK6dM?^;ESDXc$1`gY|BfX1-s!fWk|N!(E$q3|uWw{bk89>X2$|+8ItaqE zb7A-XbEtp76A=z#ol9ZB`z*bLgk94oNFRu#*jM*V<;S^eauJfd7vlIN#dc~1Bj2%{ z%q3RrOft)zII)MbG@atncW_4dVcybSmV0F==*V<*>Q~Z#dg@uNqQAV|&Y+qTNKM?}AUy!w7%$2E&bByXA!}BhwxQ%49e92jdSV z5GG#WtAY`>A{Lo1lhmIi0VYp*M{5Wed!7Z#23< zuGIxh;8s_?jPP99=jIK z9H4&}jBP7$9ow3?shxSFEX@*6rAK`%!vEvS)FlxPUm?X3)GwEU=8y30TCi0@JJ8e4 zA>jL#Y~lq8c7UQgB~!~KV+>LCUCT?SjyEFdB=e@PWR?4?4_;US(z_XzJruL5V?7R4 zJKfJ7CRe+OeV*QxUbskA-1ocx&F~t$uh=b##?zY<4JTi&mRBd*US`X?+v$nM@nRAG zwaoq;wxE9u5Yh=$t(F}c(thy+laT`y3N)Lcb0Gr&0Nx0b(F_!SR?CjlFcjS*@eh{w znLL`5wn{RhbRvy3NJukCY~93ZE&PZaca(zIvEfTtu^+MFYY=<}*GbxD#H%xdn8nn- zwa>Zdo@=}DX(Kqgrn+WICfGmmFrqS71(T};zr4zv2}ZRcvLIYZn&2(1@oD<_QCyoX zrJwE_3Ez}T3u0iZS8iQtN{ZTU65I_uZ|n-fB<>Jq zB0mO15dC^nIk^%QF>J{=GusvmBazR}S5j%RNIhal?i#HcCnkiqZp1JL+uVS9W; zs)}zjsDwRGN9p}%Y#sO0U4sHY$L$&e$@+h ztxOAlk>JDl!k#C8Y&mFSE^4^!;1g{$OYqyX`IG75EDM~8H=j7+h(^vdnlGFvd^TS! zvZ)t%&)y9lqX#@i?b`_l!w+XQhX+gdV*BJT53vLfCSCgNO#*2TbDsud?`h(_4?CFcb&y_f7UYG=0YcItoUdjJRg! zZL`dKaiDm>_4`(K{-SAd52v}??Yl@= z-62s)CBw^rQliEDd26iWmBPAD$%G4Xa(JY8jxRC8{7sUU!dY*dFhZG~*L+)k&T_`Z zRl2SJE+&~U2M5`2;2V{GY)Q$()k@!gu<-}6=@sigmz(cpUC@8=CSOnUcouzGC1sS2 zCwY`*vogxdr-w8u>_1&DYKc4CS4A8&1$|29tDLgsV(6dW5{~}em;_$^00kld zW+asK?O9p7>mPi^sTf5+JkX0=1bKiGltQ@g@W1DbtPfz=eOQ00030{{R30|NnH;TMmLS5Czb!m@dFRX~h&0 zjT<3^#4k0J;P&lMLtg`5-{j+yKpa7&fzzd*mFdUF504V_5e`i85&PpMzuyZR- zg98c$Ak6?#0cphpMk$tD3fCG`gR~>CAioM2M^;*>go#1ZCe`Bb*>*IlLD~@+HGh1? zdO~H_4iC?u0HhhvR6wZg0Bd_Gd((ES zl_@?Z=iFb;xg=fg>&p8CZG@5;ih>Y%AerJqZZotgR{jEc#!|^?r6gqN00u4d%ZrpX zgeeVAS~Z}p0OL85G=w!7+F9EWj1AjCO-|ojqHf0{{T62mk;N0001ZY%h0ja%*C5Z)+}iZEUPnO>g5i z5WN@Ze-Lz6NXn8d3sDx<7aAa01dcc9o&rXqBO>~6DJpgl>~$|ik9*p^?seNs(Nq6T z8*KkYhqC1)>!NV8HDppGhmUV&-cb85mSv&cFwyY3@^!L@b&Y>jX`S)v%GX~_6T{WD zM$#&yMO`spzh#a7;^5nF_7}CdzO3tO4FIaf*K;Z79zso;vywJ@^_*3(GOI;NB@D$C zYUYB`tjQUZWr0kL9aK_Y=_>?J?1E9xW;|t4owg;bfY*9zLmIOCFKzCMIuyET3)~^AsD+lI@rkhL}(eC z(YBCN5ONO>hK@5bZDk3<7#EU>ib@vNl>{BG{rXzXb=YBEgTl25`=RB6L1BTuz*lH# z>e0*QgvwlNMd9n{RRhkWl%=fT6!k^Xpv1KQ{O#i(zkfjYzyA37=Xdw-e!2hY(_io3 zqDPvg+Xa8}A5=?I6$%8gSn*xGe;F5`ExcZtCsJteei$7=c24OH{{4Jhy`!96&kT)CSfu%lO(}!z!~9qX-!+1uVtX1ecghbZ!I0u zWxqnO*YfOv#vNY&4ApV$n zInQM7>*f$V>gT>={3-{+#C9E39l}(^eElwroG@@PF@nIg4GUYgF$zowLS(uGC&Abm z1RWvth*5X50<*5ys;(MN)on}Q3Ris>V_g%TIzt?0q|0HyEu}>Y>Q`N72nU%XWxk63s-#3ReV@c`(#BhzrLfoX(RWXAR|j^e;p=}}(>@Bg?TJNJpwD4t%NCp#LCCzJ8(Y5aY^1^r`yP?tc}cCtf#+7FYD0~QMGxabTD0{{T(2a~}J z6n|CCZqq;zz9aDttG&DaNgR^Mc14<6sss{Ji^O$fubmb9W34xl1LDSoC*j1s;=GN2M9$+s3s1no}TeED?q^_#U7L@>VV ztlhOW(KV>uA&tiNP^)!+RWQiBHM%y6E|#O~*?;mgq^QePjMLO>fA{g^dav!z=D zU&i?%_kqnl7?KEz)KD!`*r0xWwSTK1^Ah^0OILgDFQ-dQc76~JF1W43Bw8t*L;vno z_rY^#N+M1R1A=H)uIBHg8^#Tj z9YHJM;Rayl%~3SZ){Ld)K{ni69bU%xxPD1Wgn(a@QNi=`8e&lbyNI`abW?89I% z|HD{rzavm2{&-$<7~Z-k+XsJnw8eO^>B4Wj3c?=7J_{z^^U(L4bmaNYctWPmbTSH^ zH1vbzWV|>J({K=b_{j0?xxU|h2yF1-+%Q2olY6c93Jt9w5^VoK!U^3g;T}KI$xi^2 zkpmR70}zh~1qPh0Tn3Yu7bJg^OK;*Z5XbK;?RT(z4^Bb~1QDtd7Hw~LM;5q z+U8NM_Pciy1g*pcRkEe=*#7-zJmbaVNh@*y=cP3TO-3;#V1%uu*%tKI&rLL=#0PF_ zuB?HAzJRBX%g>({N9%UJ1qcKQ#uu~=p_?=2MGKnyk?p|XPGg)*x`Q%4Kz`JT@~Kj7q!jM(BFR?skn#Isz0(0ElT3Y zsiEO}6~5c!SCHE_pm&16R{#J2|Nj600RR7d(_0RLFc1aMt(Y!A`l1z6nrPezAtZjO z0fO7NLk!HcqkNIe4Tot;OnhN}NIu4RA2Y>g_tdjm8JKuQ6X0cAt-fMEeh0a(9XYAcn@1zeHG#`u~CEDJzN z09^pMk^pb5M3?2Yn$;jBnkh6~0nK@=?ZcDZajXU@(bDo|+k-3r7SFQ)qy(r709O(q z8->T&k0Tl9#A=Wd%@rE1B+YuVRX?`}U^oRx49{OwPH#oO4OKI@Fc-3EBuH zGZY0O@<1}hh1_LmTde&B@{FaD(@IIm&=Cw;<=0m!YY0;sp0sK}TLH#%Bxwk1GPJk0 zAs8FBhnkweIiwRyrE6;4QSWd=**o0B+{Yp{QmQ%i9N^qY~0PDsv1Wb>h`9>5uf z3Tlwn;3$ajx7UKM&94K!GI7?z9vj!`6nDzzY!FBH!HCh=wr$WReGHdB!mpc$=f0{h zWVb5@C{H;fOarw_=NrMaGOD!RV_6CY-qwIl@x;+>v!cds+~P|C{Py_Q(52D6)E66` z1MWV#sAMR1R$h|3`(lmqIE-h082RC{h+>i?B))m;a{QR@q7N7KDb2scxgXAoa6zIu znJp*8Gayg##dpcS0F#je6tg}an*#|VojqHf0{{T6lj0#X8|=8~3$-bR#1M J_8 Date: Wed, 5 Jun 2024 16:28:14 +0800 Subject: [PATCH 6/6] tabular reader --- src/pai_rag/docs/tabular_doc.md | 8 +--- .../integrations/readers/pai_csv_reader.py | 38 +++++------------- tests/data_readers/test_csv_reader.py | 2 +- ...255\230\347\216\207_excel_two_header.xlsx" | Bin 9117 -> 9147 bytes 4 files changed, 12 insertions(+), 36 deletions(-) diff --git a/src/pai_rag/docs/tabular_doc.md b/src/pai_rag/docs/tabular_doc.md index 142179f5..8791cf64 100644 --- a/src/pai_rag/docs/tabular_doc.md +++ b/src/pai_rag/docs/tabular_doc.md @@ -12,14 +12,8 @@ Whether to concatenate rows into one document. **row_joiner:** _str, default="\n"._ The separator used to join rows. -**csv_config:** _dict, default={}._ -The configuration of csv reader -Set to empty dict by default, this means pandas will try to figure out the separators, table head, etc. on its own. - -#### one important parameter: - **header:** _None or int, list of int, default 0._ -Row (0-indexed) to use for the column labels of the parsed DataFrame. If a list of integers is passed those row +row (0-indexed) to use for the column labels of the parsed DataFrame. If a list of integers is passed those row positions will be combined into a MultiIndex. Use None if there is no header. ### Functions: diff --git a/src/pai_rag/integrations/readers/pai_csv_reader.py b/src/pai_rag/integrations/readers/pai_csv_reader.py index 67e9093a..3d1bde60 100644 --- a/src/pai_rag/integrations/readers/pai_csv_reader.py +++ b/src/pai_rag/integrations/readers/pai_csv_reader.py @@ -20,21 +20,19 @@ class PaiCSVReader(BaseReader): concat_rows (bool): whether to concatenate all rows into one document. If set to False, a Document will be created for each row. True by default. - csv_config (dict): Options for the reader.Set to empty dict by default. - one important parameter: - "header": None or int, list of int, default 0. + header (object): None or int, list of int, default 0. Row (0-indexed) to use for the column labels of the parsed DataFrame. If a list of integers is passed those row positions will be combined into a MultiIndex. Use None if there is no header. """ def __init__( - self, *args: Any, concat_rows: bool = True, csv_config: dict = {}, **kwargs: Any + self, *args: Any, concat_rows: bool = True, header: object = 0, **kwargs: Any ) -> None: """Init params.""" super().__init__(*args, **kwargs) self._concat_rows = concat_rows - self._csv_config = csv_config + self._header = header def load_data( self, file: Path, extra_info: Optional[Dict] = None @@ -53,37 +51,21 @@ def load_data( headers = [] data_lines = [] data_line_start_index = 1 - if ( - "header" in self._csv_config - and self._csv_config["header"] is not None - and isinstance(self._csv_config["header"], list) - ): - data_line_start_index = max(self._csv_config["header"]) + 1 - elif ( - "header" in self._csv_config - and self._csv_config["header"] is not None - and isinstance(self._csv_config["header"], int) - ): - data_line_start_index = self._csv_config["header"] + 1 - self._csv_config["header"] = [self._csv_config["header"]] + if isinstance(self._header, list): + data_line_start_index = max(self._header) + 1 + elif isinstance(self._header, int): + data_line_start_index = self._header + 1 + self._header = [self._header] with open(file) as fp: - has_header = csv.Sniffer().has_header(fp.read(2048)) - fp.seek(0) - - if "header" not in self._csv_config and has_header: - self._csv_config["header"] = [0] - elif "header" not in self._csv_config and not has_header: - self._csv_config["header"] = None - csv_reader = csv.reader(fp) - if self._csv_config["header"] is None: + if self._header is None: for row in csv_reader: text_list.append(", ".join(row)) else: for i, row in enumerate(csv_reader): - if i in self._csv_config["header"]: + if i in self._header: headers.append(row) elif i >= data_line_start_index: data_lines.append(row) diff --git a/tests/data_readers/test_csv_reader.py b/tests/data_readers/test_csv_reader.py index 58a365ec..f3de5b14 100644 --- a/tests/data_readers/test_csv_reader.py +++ b/tests/data_readers/test_csv_reader.py @@ -18,7 +18,7 @@ def test_csv_reader(): file_extractor={ ".csv": PaiCSVReader( concat_rows=reader_config.get("concat_rows", False), - csv_config={"header": [0, 1]}, + header=[0, 1], ) }, ) diff --git "a/tests/testdata/data/excel_data/30\345\244\251\347\225\231\345\255\230\347\216\207_excel_two_header.xlsx" "b/tests/testdata/data/excel_data/30\345\244\251\347\225\231\345\255\230\347\216\207_excel_two_header.xlsx" index 50f23a76b733415239358cef59c43690f08d6097..dc548004539cff0dff3adf34dfcef10705d628c3 100644 GIT binary patch delta 2257 zcmV;?2rl=XN4rO`{Rn?XE=qHb0{{TP2mk;N0001ZY%h0ja%*C5Z)+}iZEUPnQE#I- z5Pm;t{{!Nk8WSJ{m97GWyGZRSO0(_VQ{{qFyak*yrb$)pf8VhQX}dX{a@~uFjqUO0 zZ)UzR=bv|VWjs;QGTuZcKC?}OHaRa?^AMR|SD6)f&*a?4leE4G<8fQ2OmX409SuS|X*YXUYQBV5Fe{3U+--Tdz9D~!4TqNid zQ#m}a68;BS`3Efj6%53O9wjG%0;vnaRFbcXRT_t~4Amjla41Is%c*+ulF|8yO z4UshAjRYO;{rXkSY3!urpm1YJ|L&NeP*|WZ@D*B;hvdFp5Lp_Xs3H@6ZNYhz)0|d} zpv7}u5>YP%|3-7!qQ!J>EvLUD3BI?wEr=vYH*^Pk?)2*$8I$9h`42)Sxrz>g@boV9 z?>~nMJQ084Al8Kx2E5PHOGwx?d4lwTNQ!-R&sBa#cTFxreD{(i>3A{@Vk-!|zzR}# zWCgP{v4YXqNz%lgi~{@}oDqJQbJEFjuM7npnJ&oSXz7aVh81jw9V=dGyx9AnrTU+m zVePweSlxj)jBeXk4XL5AyJbzmw~^_);M4Ok0=<91aI4pDSs}~FbjE=)>F55S?4bn0 z*b97BFv3y9BJ(9l{Ui=-Y{hZlS+4DR)+BaPYnnO%wzGKZk77*-yv?>I^Y2u+HJIEhkkD1)(}^2nRV6WkGlHvX#0AEi#~oU3)sVC$44B zMnHcrm^fD8jvOm-Q)lLlXXz~VRC?6MBK$wDOkEP;@D)-lLH%+iNd5@lt|eW^qy;_o z4gue{WMeOg?GaFPGn`p2p4bpo-?h9n8+l`#CNnR4C9B+DfAGQzklsy+?4X!c9qVzZ z+H602m|XW0`#ilXy>OYTxbJuWo8dKhU(qfTjkB8*4U;cdtE&@jFK4T}+w4T+>2f*! zYc>0G*n<8sKu9N0wOV#)Nc+VP0F#je6tlMs+X@Na(9?o10{{TRlbaVIf6I=WFcb#h zeWkvG&Y0%1r57Qyj~J$|rGSkQ2Mw*iAQlw& zxPI5j&RsPv9^o{%`$HE9qdO!@sic1yP)fAOo_DRWyi{2CIht@m&K8dt&+#>8n7>QX zQdr|m?MEoH%bM@YPg%;ixJmc*Kg2{6X5lFN4Sb{Gk1Z*h-mLfye;a=go87SfbJ^@5 ztDOD|x9Mhc9TmZRJV}H3JSl>-SWPFzCfTgxtbBv1`0rFu^Mmr=&FL%1UE^_fhTtmz0096000030 z|8$c<4uc>NMXzEwKtVvL4Un|yMT{}&s%_NR>zC1185v!f$Im?As~a9&H{P4E39f&A zir%x56~nuE4)N_GS#T}DE6{mu^lXT93exc@INtsew>+WNe*}-SSAir2K(V0Q3G#%X z1&R~^1;M;`A+;#Q-jGJbsaCe41)3B9MFN=#(nu6otwr)^+E$S^pq@cNu?jej%4|zy zIWgFbO>NuZQ9FjT0rgBV{%?B$>+Hj;vgA0RXxswUaiT8Mt!|$h+Ul8{4HVU2vWrP0 z@nW<^H5}M#f6@jN*=%M@BhhAz-Sy13pYRI+0RR6000960M2=Ak!Y~j-uM+kER*_PW zG*J5Ghu{@zyG?>^Hf&bK+uI1yzj?z9GkQg~*yFM!HPRzU3kNXz=PJgzg|0qz4OB)~ z|G3f7Q9$wImDNl^&QtCxc}Hv^P*35BoIHhGjDy9?Ex^`?q!ygFnf4*-*q0~C|k8r=nD6QzStv$`850e?|T!!Q(u z-v$3e$$Oi$bwf+rfy$m73T`OAh2(Y%wn<2$-Tu94yVc4RACq(Lmvb)37yG6TKEYU_ zWP##{pa3LOT*z&KHtS`0jRNba5uf z8XAz!;y8-&x7UGb?5_j8GI2J-9klOsj5}p>Hi)BpZ^dYA+cs*`K8DX9v!>Rq{NgJC{7Uxl*U-7qztk5S zo&z5EsM%2Lq`ahe_v}Q&63$$m*dBL7k#*Bj%ofS&OYoeV6hJlaT`yvpyc10|iDdN^_2r-XT%}s*^DyIs*0&lMxUVEKo}U0xbX( z00saE0000X0N>Elf-eIA0Kx|V02lxO00000000000001m7n7hD8k5K(Dgo+~^CBk& fWfP@?P?H}c6#>VSH6uO&PLp&aBL*cQ00000z?&;{ delta 2226 zcmV;j2u=69N1aEo{Rn^J3Ro|V0{{TP2mk;N0001ZY%h0ja%*C5Z)+}iZEUPnU2mf} z6n$T5{sZEj+U6?+m5v~g-AL^yN;B>5Q{{qFJmn*6Op~gb|324FNZZM3mF;XqY;0d2 zpL_4Q=KS-vu8b!pI^H&sNzSlouqJN{-aJI+*X7g-Orw*uDQJJyHY_rqS!aH}`1ITP zrWKF(ZTn~dK+{ELDdjpusLM-M)9$QYvj$dHt*EJlp?E;uS}sd((yR^HZYfVk%sm(Lp1H0nH@ zJv6PL_Z7IkC7yq=74Z1*Cm0%gI0=@Hj;eVs+OA#6Gk``r(ntOij0kxbg6(k(PD65$ zU{74-@W4v=A7tepu>4mrf`1u}fV8w$LI~am(YymU+q^hm@rvE-q-)Ui`U|a9rYh5@ zXeYCROIAeY2!`#3y_*pII_Y@@tAt=2nCRjl=eaNnwxWN1C6^%N0Uiu}e{6fo5`+;~ zl8J^&Hfb9PI^6s9tD4i;ld=Vc8w>V#&jo|R0)2t6(9%4l_uZVz(&$AMndoZ=&ZC^= ztl|{SpYxK6dM?^;ESDXc$1`gY|BfX1-s!fWk|N!(E$q3|uWw{bk89>X2$|+8ItaqE zb7A-XbEtp76A=z#ol9ZB`z*bLgk94oNFRu#*jM*V<;S^eauJfd7vlIN#dc~1Bj2%{ z%q3RrOft)zII)MbG@atncW_4dVcybSmV0F==*V<*>Q~Z#dg@uNqQAV|&Y+qTNKM?}AUy!w7%$2E&bByXA!}BhwxQ%49e92jdSV z5GG#WtAY`>A{Lo1lhmIi0VYp*M{5Wed!7Z#23< zuGIxh;8s_?jPP99=jIK z9H4&}jBP7$9ow3?shxSFEX@*6rAK`%!vEvS)FlxPUm?X3)GwEU=8y30TCi0@JJ8e4 zA>jL#Y~lq8c7UQgB~!~KV+>LCUCT?SjyEFdB=e@PWR?4?4_;US(z_XzJruL5V?7R4 zJKfJ7CRe+OeV*QxUbskA-1ocx&F~t$uh=b##?zY<4JTi&mRBd*US`X?+v$nM@nRAG zwaoq;wxE9u5Yh=$t(F}c(thy+laT`yv$zb~3JGOA+a(YK005|yoEIT~TaThJ6bJD4 zP4+u9ea8Yi3PzlaxMt^Vv)AWR4${~Q+t$luv)_FV7@bLcF_S=23jF>(^ssn5X+;j; zytJmE$tb1-jIgyd+k*c3xrs7LeBh?$${Hx>3wZjt{QPNgv~K5HfIv_%zMyRg-JCHm zTF~5&YzGGKG}dVz@X~F6neQC%dU#1Imd5dvX)X;t3v+jmVVgz@SX;5zV8R)3pm@Ob z`&M@TqG@ptr@7nhyGU5wAyG*s!^?nDqQ(4qYpmmy!n#k%gbQ+Vc%*oaFEPXXO_G+v zS#O&#LYbY{d|Q6ba>m6~x~=~%CYdk?2ib4n8Lb&hnzvqms4`A1X z_v1_2W@zYdj#S)#Luu6?S%(%Q@yDs5;d>Rn+v8V|+cx0r6v0;j0096000030|8&z^ z4uUWc1<HKBOGlhf*Irn<_Lh$S#=UOHy+Qqdvv)wC0His<3!rW=9FQmg zDFEAlXF@T~N+GSVb1O}Q0}2Ho%>YpWX~hFZDVAIc*BVrVv?H(}zX}*fR$8cpi9ypQ z)#C8kb~LI%+7THwe|*JyLS@$u56_?gq#4juK&b2hYk4|Od>ylKE~o}+wklm|X~iF& zGTEvQv=-HWAk8#WX=%l4TASispz%Zf0RRC1{{R30|Nk`2!3x4K5CqU)3Ht$75$UDr z#ghl2zYyDP64afrSrq@?D&#hA4l_eZ5Vkx|>Q;zd7EcFq_9^U1aE82CYlcua4kbjs z28~&&d+=(W_Pd^(=9pSGlz)37cY_(8O^L(8`qPmPwkKG_w%fg-_nMe(`~s7a0~C|^ z8r=n|m)u5Dv)UUb0e@dh!!Q)Z-vz%z$$OjradU>Y1C>2F6zouZ3(4&kY?G2iyM24p zcB_>sJ|^efU(UHCUGD43`vh%-k{ODE5P2Y(;zDjSv?*5p0(r(#$!Vn|Wat0}E%VEZ zlr@AY4NqD%psfJoIg&JlH5uAj+YpQm+d)lD;2hG4rP4LE?tiGaxS{NwZUM((IL9?u z%BiKe1NzNKL`NiNBeMC>RS)0{Lj^TRYj6}q_}gni*XGxOUYR&+VGoV#bc{P?b2f;h zdvC;OY}+k_c^t-9ei-@TO%cT;Nk}|< z>vH^<@1hSE^)b!A#JL~Nitw7m3zEzy#1kM-@!5CDzW|ew0~E7d9-9LN;tE(Vjg$N# zQUR%xPa--2^^=MsCLCov+a(YK005{5000;O000000000000000gcp;c7aEi1A}RsO zlMy2)8>*MwMp6L)0DA%e01*HH00000000000001|lT#x;0Wg!2BO?X}ApigX0J(%H AZU6uP