Skip to content

Commit

Permalink
chunk formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
sandeshit committed Dec 10, 2024
1 parent 1036a85 commit dbd532e
Showing 1 changed file with 17 additions and 1 deletion.
18 changes: 17 additions & 1 deletion chatbotcore/contextual_chunks.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,22 @@ def generate_contextualized_chunks(self, document: str, chunks: List[Document]):
contextualized_chunks = []
for chunk in chunks:
context = self._generate_context(document, chunk.page_content)
contextualized_content = f"""{context.strip()}. {chunk.page_content.strip()}"""

# Strip both context and chunk content of leading/trailing spaces
context = context.strip()

chunk_content = chunk.page_content.strip()

if context.startswith('"'):
context = context[1:] # Remove the first character (the opening quote)
if context.endswith('"'):
context = context[:-1]
if context.endswith("."):
context = context[:-1]

# Concatenate context with chunk content, ensuring no unwanted spaces or punctuation
contextualized_content = f"{context}. {chunk_content}"
# Add the cleaned-up content to the list of contextualized chunks
contextualized_chunks.append(Document(page_content=contextualized_content, metadata=chunk.metadata))

return contextualized_chunks

0 comments on commit dbd532e

Please sign in to comment.