From 2e3cdd3eea7df42ce23400669f4fec25c10a2567 Mon Sep 17 00:00:00 2001 From: Carlos Salgado Date: Thu, 18 Apr 2024 17:11:12 -0400 Subject: [PATCH] rework io bug --- app.py | 12 +++++++++--- scripts.py | 13 ++++++++----- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/app.py b/app.py index 2d3e6f4..87dd9bb 100644 --- a/app.py +++ b/app.py @@ -12,11 +12,17 @@ if uploaded_file is not None: try: - file_ext = uploaded_file.name.split('.')[-1].lower() - pdf_file = io.BytesIO(uploaded_file.read()) - docs = ingest(pdf_file, file_ext) + with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as tmp: + tmp.write(uploaded_file.read()) + file_path = tmp.name + + docs = ingest(file_path) metadata = generate_metadata(docs) st.write('## Converted Text') st.write(metadata) + + # Clean up the temporary file + os.remove(file_path) + except Exception as e: st.error(f'Error: {e}') \ No newline at end of file diff --git a/scripts.py b/scripts.py index 646ef04..d4060c8 100644 --- a/scripts.py +++ b/scripts.py @@ -15,11 +15,13 @@ import io -def ingest(file_obj, file_ext='pdf'): - if file_ext == 'pdf': - loader = UnstructuredPDFLoader(file_obj) - elif file_ext == 'txt': - loader = TextLoader(file_obj) +def ingest(file_path): + extension = os.path.splitext(file_path)[1].lower() + + if extension == '.pdf': + loader = UnstructuredPDFLoader(file_path) + elif extension == '.txt': + loader = TextLoader(file_path) else: raise NotImplementedError('Only .txt or .pdf files are supported') @@ -43,6 +45,7 @@ def ingest(file_obj, file_ext='pdf'): return docs + def generate_metadata(docs): prompt_template = """ BimDiscipline = ['plumbing', 'network', 'heating', 'electrical', 'ventilation', 'architecture']