From b492801d48a8914400fbf3610a42db2e33d83531 Mon Sep 17 00:00:00 2001 From: Ceceliachenen <162673161+Ceceliachenen@users.noreply.github.com> Date: Wed, 12 Jun 2024 20:14:10 +0800 Subject: [PATCH] Bugfix: a case that files' encodings can not be detected by chardet (#61) --- src/pai_rag/integrations/readers/pai_csv_reader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pai_rag/integrations/readers/pai_csv_reader.py b/src/pai_rag/integrations/readers/pai_csv_reader.py index 0b653395..6653f501 100644 --- a/src/pai_rag/integrations/readers/pai_csv_reader.py +++ b/src/pai_rag/integrations/readers/pai_csv_reader.py @@ -141,7 +141,7 @@ def load_data( with fs.open(file) as f: encoding = chardet.detect(f.read(100000))["encoding"] f.seek(0) - if "GB" in encoding.upper(): + if encoding is not None and "GB" in encoding.upper(): self._pandas_config["encoding"] = "GB18030" try: df = pd.read_csv(f, **self._pandas_config) @@ -155,7 +155,7 @@ def load_data( with open(file, "rb") as f: encoding = chardet.detect(f.read(100000))["encoding"] f.seek(0) - if "GB" in encoding.upper(): + if encoding is not None and "GB" in encoding.upper(): self._pandas_config["encoding"] = "GB18030" try: df = pd.read_csv(file, **self._pandas_config)