From b492801d48a8914400fbf3610a42db2e33d83531 Mon Sep 17 00:00:00 2001
From: Ceceliachenen <162673161+Ceceliachenen@users.noreply.github.com>
Date: Wed, 12 Jun 2024 20:14:10 +0800
Subject: [PATCH] Bugfix: a case that files' encodings can not be detected by
 chardet (#61)

---
 src/pai_rag/integrations/readers/pai_csv_reader.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/pai_rag/integrations/readers/pai_csv_reader.py b/src/pai_rag/integrations/readers/pai_csv_reader.py
index 0b653395..6653f501 100644
--- a/src/pai_rag/integrations/readers/pai_csv_reader.py
+++ b/src/pai_rag/integrations/readers/pai_csv_reader.py
@@ -141,7 +141,7 @@ def load_data(
             with fs.open(file) as f:
                 encoding = chardet.detect(f.read(100000))["encoding"]
                 f.seek(0)
-                if "GB" in encoding.upper():
+                if encoding is not None and "GB" in encoding.upper():
                     self._pandas_config["encoding"] = "GB18030"
                 try:
                     df = pd.read_csv(f, **self._pandas_config)
@@ -155,7 +155,7 @@ def load_data(
             with open(file, "rb") as f:
                 encoding = chardet.detect(f.read(100000))["encoding"]
                 f.seek(0)
-                if "GB" in encoding.upper():
+                if encoding is not None and "GB" in encoding.upper():
                     self._pandas_config["encoding"] = "GB18030"
                 try:
                     df = pd.read_csv(file, **self._pandas_config)