-
Notifications
You must be signed in to change notification settings - Fork 30
/
Copy pathlongimg.py
136 lines (103 loc) · 5.03 KB
/
longimg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# from https://docs.streamlit.io/develop/tutorials/llms/build-conversational-apps
import streamlit as st
from langchain_upstage import ChatUpstage as Chat
from langchain_core.output_parsers import StrOutputParser
from langchain_core.messages import AIMessage, HumanMessage
from langchain_upstage import UpstageDocumentParseLoader
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
import tempfile, os
from PIL import Image
import math
if 'all_doc_contents' not in st.session_state:
st.session_state.all_doc_contents = None
if 'basic_prompt' not in st.session_state:
st.session_state.basic_prompt = """You are processing text extracted from a long image that was split into overlapping sections. Your task is to:
1. Analyze multiple sections of text that have ~50% overlap with adjacent sections
2. Identify and remove redundant content from the overlapping areas
3. Maintain the correct sequence and flow of the text
4. Preserve all unique information
5. Ensure proper paragraph breaks and formatting
6. Return a single, coherent document that reads naturally
7. Please keep the original text and do not revise or translate it.
The following sections contain the extracted text, with overlapping content between them. Please combine them into one complete, non-redundant text while maintaining the original text and flow."""
def split_images(img_file_path, temp_dir):
"""
Split a long image into overlapping square sections.
Returns a list of paths to the split image sections.
"""
img = Image.open(img_file_path)
width, height = img.size
# Make sections square using the width as the height
section_height = width
overlap = section_height // 2
# Calculate number of sections needed (accounting for overlap)
num_sections = math.ceil((height - overlap) / (section_height - overlap))
# Create directory for split images
split_dir = os.path.join(temp_dir, "splits")
os.makedirs(split_dir, exist_ok=True)
split_image_paths = []
for i in range(num_sections):
# Calculate section boundaries
top = i * (section_height - overlap)
bottom = min(top + section_height, height)
# Adjust last section to include remaining pixels
if i == num_sections - 1:
top = height - section_height
# Crop and save section
section = img.crop((0, top, width, bottom))
section_path = os.path.join(split_dir, f"section_{i}.png")
section.save(section_path)
split_image_paths.append(section_path)
return split_image_paths
def img_to_doc_content(img_file_path):
"""Extract text content from an image using Upstage document parser."""
dp = UpstageDocumentParseLoader(img_file_path, split="page")
docs = dp.load()
return "\n".join([doc.page_content for doc in docs])
def combine_doc_contents(all_doc_contents, llm):
"""Combine multiple text sections into a coherent document using LLM."""
eval_prompt = ChatPromptTemplate.from_messages([
("human", "{basic_prompt}"),
("human", "{all_doc_contents}"),
])
llm_chain = eval_prompt | llm | StrOutputParser()
return llm_chain.stream({
"basic_prompt": st.session_state.basic_prompt,
"all_doc_contents": all_doc_contents
})
st.title("Solar Long Image")
st.markdown("""
This app processes long images by:
1. Splitting them into overlapping sections
2. Extracting text from each section
3. Intelligently combining the text to remove duplicates
4. Producing a single coherent document
Upload your image below to get started.
""")
llm = Chat(model="solar-pro")
uploaded_file = st.file_uploader("Choose your long image file", type=["png", "jpg", "jpeg", "tiff", "bmp", "webp"])
if uploaded_file and uploaded_file.name:
with tempfile.TemporaryDirectory() as temp_dir:
file_path = os.path.join(temp_dir, uploaded_file.name)
with open(file_path, "wb") as f:
f.write(uploaded_file.getvalue())
# Process image in steps with status indicators
with st.status("Splitting the image..."):
split_img_paths = split_images(file_path, temp_dir)
st.write(f"Split into {len(split_img_paths)} sections")
# Process each section
all_doc_contents = []
for i, path in enumerate(split_img_paths):
with st.status(f"Processing section {i+1}/{len(split_img_paths)}..."):
st.image(path)
doc_content = img_to_doc_content(path)
st.write(doc_content)
all_doc_contents.append(doc_content)
# Combine all sections
with st.status("Combining sections...", expanded=True):
st.session_state.all_doc_contents = all_doc_contents
combined_doc_content = combine_doc_contents(all_doc_contents, llm)
st.write_stream(combined_doc_content)
# Cleanup split images
for path in split_img_paths:
os.remove(path)