From cbd47667ef1866d2c2fa27d1ea3c0d9a0c2cfd17 Mon Sep 17 00:00:00 2001
From: BBC-Esq <bbc@chintellalaw.com>
Date: Sat, 6 Jan 2024 19:31:19 -0500
Subject: [PATCH] correct name of doc/docx loader

---
 src/constants.py          | 3 ++-
 src/document_processor.py | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/constants.py b/src/constants.py
index 7268cfbf..65b3cf90 100644
--- a/src/constants.py
+++ b/src/constants.py
@@ -273,7 +273,8 @@
 
 DOCUMENT_LOADERS = {
     ".pdf": "PyMuPDFLoader",
-    ".docx": "Docx2txtLoader",
+    ".docx": "UnstructuredWordDocumentLoader",
+    ".doc": "UnstructuredWordDocumentLoader",
     ".txt": "TextLoader",
     ".enex": "EverNoteLoader",
     ".epub": "UnstructuredEPubLoader",
diff --git a/src/document_processor.py b/src/document_processor.py
index bdf14e9e..3742fa78 100644
--- a/src/document_processor.py
+++ b/src/document_processor.py
@@ -7,7 +7,7 @@
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.document_loaders import (
     PyMuPDFLoader,
-    Docx2txtLoader,
+    UnstructuredWordDocumentLoader,
     TextLoader,
     EverNoteLoader,
     UnstructuredEPubLoader,
@@ -67,7 +67,7 @@ def load_single_document(file_path: Path) -> Document:
         elif file_extension == ".epub":
             loader = UnstructuredEPubLoader(str(file_path), mode="single", strategy="fast")
         elif file_extension == ".docx":
-            loader = Docx2txtLoader(str(file_path), mode="single", strategy="fast")
+            loader = UnstructuredWordDocumentLoader(str(file_path), mode="single", strategy="fast")
         elif file_extension == ".rtf":
             loader = UnstructuredRTFLoader(str(file_path), mode="single", strategy="fast")
         elif file_extension == ".odt":