Skip to content

Commit

Permalink
v6.1
Browse files Browse the repository at this point in the history
  • Loading branch information
BBC-Esq authored Jul 19, 2024
1 parent bab6762 commit 3cacc39
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 58 deletions.
2 changes: 2 additions & 0 deletions src/database_interactions.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,8 @@ def run(self):
if len(audio_documents) > 0:
print(f"Loaded {len(audio_documents)} audio transcription(s)...")

texts = [] # listed created to hold split documents

# split documents
if isinstance(documents, list) and documents:
texts = split_documents(documents)
Expand Down
70 changes: 35 additions & 35 deletions src/document_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,29 +122,29 @@ def split_documents(documents):
chunk_overlap = config["database"]["chunk_overlap"]

text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
print(f"Text splitter type: {type(text_splitter)}, content: {text_splitter.__dict__}")
# print(f"Text splitter type: {type(text_splitter)}, content: {text_splitter.__dict__}")

# Summarize documents before conversion
type_count = defaultdict(int)
exceptions = []
for i, doc in enumerate(documents):
doc_type = type(doc).__name__
content_type = type(doc.page_content).__name__
type_key = f"{doc_type}, content type: {content_type}"
type_count[type_key] += 1
# type_count = defaultdict(int)
# exceptions = []
# for i, doc in enumerate(documents):
# doc_type = type(doc).__name__
# content_type = type(doc.page_content).__name__
# type_key = f"{doc_type}, content type: {content_type}"
# type_count[type_key] += 1

if content_type != 'str':
exceptions.append(f"Document {i} has unexpected content type: {content_type}")
# if content_type != 'str':
# exceptions.append(f"Document {i} has unexpected content type: {content_type}")

print("Document summary before conversion:")
print(f"Total documents: {len(documents)}")
for type_key, count in type_count.items():
print(f"{count} documents of type: {type_key}")
# print("Document summary before conversion:")
# print(f"Total documents: {len(documents)}")
# for type_key, count in type_count.items():
# print(f"{count} documents of type: {type_key}")

if exceptions:
print("\nExceptions found:")
for exception in exceptions:
print(exception)
# if exceptions:
# print("\nExceptions found:")
# for exception in exceptions:
# print(exception)

# Convert "page content" within each document object to a string if it isn't already
for i, doc in enumerate(documents):
Expand All @@ -153,26 +153,26 @@ def split_documents(documents):
documents[i].page_content = str(doc.page_content)

# Summarize documents after conversion
type_count.clear()
exceptions.clear()
for i, doc in enumerate(documents):
doc_type = type(doc).__name__
content_type = type(doc.page_content).__name__
type_key = f"{doc_type}, content type: {content_type}"
type_count[type_key] += 1
# type_count.clear()
# exceptions.clear()
# for i, doc in enumerate(documents):
# doc_type = type(doc).__name__
# content_type = type(doc.page_content).__name__
# type_key = f"{doc_type}, content type: {content_type}"
# type_count[type_key] += 1

if content_type != 'str':
exceptions.append(f"Document {i} has unexpected content type: {content_type}")
# if content_type != 'str':
# exceptions.append(f"Document {i} has unexpected content type: {content_type}")

print("\nDocument summary after conversion:")
print(f"Total documents: {len(documents)}")
for type_key, count in type_count.items():
print(f"{count} documents of type: {type_key}")
# print("\nDocument summary after conversion:")
# print(f"Total documents: {len(documents)}")
# for type_key, count in type_count.items():
# print(f"{count} documents of type: {type_key}")

if exceptions:
print("\nExceptions found:")
for exception in exceptions:
print(exception)
# if exceptions:
# print("\nExceptions found:")
# for exception in exceptions:
# print(exception)

try:
print(f"\nSplitting {len(documents)} documents.")
Expand Down
42 changes: 20 additions & 22 deletions src/gui_tabs_settings_database_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,15 @@
class ChunkSettingsTab(QWidget):
def __init__(self):
super(ChunkSettingsTab, self).__init__()

with open('config.yaml', 'r', encoding='utf-8') as f:
config_data = yaml.safe_load(f)
self.database_config = config_data['database']
self.compute_device_options = config_data['Compute_Device']['available']
self.database_creation_device = config_data['Compute_Device']['database_creation']

grid_layout = QGridLayout()

# Device selection and current setting
self.device_label = QLabel("Create Device:")
self.device_label = QLabel("Device:")
grid_layout.addWidget(self.device_label, 0, 0)
self.device_combo = QComboBox()
self.device_combo.addItems(self.compute_device_options)
Expand All @@ -26,29 +24,29 @@ def __init__(self):
grid_layout.addWidget(self.device_combo, 0, 2)
self.current_device_label = QLabel(f"{self.database_creation_device}")
grid_layout.addWidget(self.current_device_label, 0, 1)

# Chunk overlap and current setting
self.chunk_overlap_label = QLabel("Chunk Overlap:")
grid_layout.addWidget(self.chunk_overlap_label, 0, 3)
self.chunk_overlap_edit = QLineEdit()
self.chunk_overlap_edit.setPlaceholderText("Enter new chunk_overlap...")
self.chunk_overlap_edit.setValidator(QIntValidator())
grid_layout.addWidget(self.chunk_overlap_edit, 0, 5)
current_overlap = self.database_config.get('chunk_overlap', '')
self.current_overlap_label = QLabel(f"{current_overlap}")
grid_layout.addWidget(self.current_overlap_label, 0, 4)

# Chunk size and current setting
self.chunk_size_label = QLabel("Chunk Size:")
grid_layout.addWidget(self.chunk_size_label, 0, 6)

# Chunk size and current setting (moved to the left)
self.chunk_size_label = QLabel("Chunk Size (# characters):")
grid_layout.addWidget(self.chunk_size_label, 0, 3)
self.chunk_size_edit = QLineEdit()
self.chunk_size_edit.setPlaceholderText("Enter new chunk_size...")
self.chunk_size_edit.setValidator(QIntValidator())
grid_layout.addWidget(self.chunk_size_edit, 0, 8)
grid_layout.addWidget(self.chunk_size_edit, 0, 5)
current_size = self.database_config.get('chunk_size', '')
self.current_size_label = QLabel(f"{current_size}")
grid_layout.addWidget(self.current_size_label, 0, 7)

grid_layout.addWidget(self.current_size_label, 0, 4)

# Chunk overlap and current setting (moved to the right)
self.chunk_overlap_label = QLabel("Overlap (# characters):")
grid_layout.addWidget(self.chunk_overlap_label, 0, 6)
self.chunk_overlap_edit = QLineEdit()
self.chunk_overlap_edit.setPlaceholderText("Enter new chunk_overlap...")
self.chunk_overlap_edit.setValidator(QIntValidator())
grid_layout.addWidget(self.chunk_overlap_edit, 0, 8)
current_overlap = self.database_config.get('chunk_overlap', '')
self.current_overlap_label = QLabel(f"{current_overlap}")
grid_layout.addWidget(self.current_overlap_label, 0, 7)

self.setLayout(grid_layout)

def update_config(self):
Expand Down
2 changes: 1 addition & 1 deletion src/gui_tabs_settings_database_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def __init__(self):
self.field_data = {}
self.label_data = {}

self.query_device_label = QLabel(f"Query Device: {self.database_query_device}")
self.query_device_label = QLabel(f"Device: {self.database_query_device}")
self.query_device_combo = QComboBox()
self.query_device_combo.addItems(self.compute_device_options)
if self.database_query_device in self.compute_device_options:
Expand Down

0 comments on commit 3cacc39

Please sign in to comment.