Skip to content

Commit

Permalink
Bugfix: a case that files' encodings can not be detected by chardet (#61
Browse files Browse the repository at this point in the history
)
  • Loading branch information
Ceceliachenen authored Jun 12, 2024
1 parent a707e6d commit b492801
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions src/pai_rag/integrations/readers/pai_csv_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def load_data(
with fs.open(file) as f:
encoding = chardet.detect(f.read(100000))["encoding"]
f.seek(0)
if "GB" in encoding.upper():
if encoding is not None and "GB" in encoding.upper():
self._pandas_config["encoding"] = "GB18030"
try:
df = pd.read_csv(f, **self._pandas_config)
Expand All @@ -155,7 +155,7 @@ def load_data(
with open(file, "rb") as f:
encoding = chardet.detect(f.read(100000))["encoding"]
f.seek(0)
if "GB" in encoding.upper():
if encoding is not None and "GB" in encoding.upper():
self._pandas_config["encoding"] = "GB18030"
try:
df = pd.read_csv(file, **self._pandas_config)
Expand Down

0 comments on commit b492801

Please sign in to comment.