Skip to content

Commit

Permalink
v6.8.0
Browse files Browse the repository at this point in the history
  • Loading branch information
BBC-Esq authored Sep 5, 2024
1 parent 6169b45 commit d4528d6
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 41 deletions.
16 changes: 13 additions & 3 deletions src/extract_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,19 @@ def extract_audio_metadata(file_path):

def add_pymupdf_page_metadata(doc: Document, chunk_size: int = 1200, chunk_overlap: int = 600) -> List[Document]:
"""
Splits and adds page metadata to each chunk of a pdf document. Relies on the custom implementation of pymupdfparser
Called by document_processor.py.
"""
Called by document_processor.py. Chunks the body of text returned by the custom pymupdfparser script.
Uses a helper method named `split_text` to assign the appropriate page metadata to each chunk.
Detailed Process:
1. The method first identifies the positions of the custom page markers within the text using a regular expression.
These markers denote the start of a new page (e.g., `[[page1]]`).
2. The text is then cleaned by removing the page markers, resulting in a continuous block of text.
3. The cleaned text is split into chunks based on the specified `chunk_size`. If the chunk size exceeds the
remaining length of the text, the last chunk is adjusted to include the remaining text.
4. For each chunk, the method determines the appropriate page number by finding the nearest preceding page
marker position.
5. The method returns a list of tuples where each tuple contains a chunk of text and the page number associated with that chunk.
"""
def split_text(text: str, chunk_size: int, chunk_overlap: int) -> List[Tuple[str, int]]:
page_markers = [(m.start(), int(m.group(1))) for m in re.finditer(r'\[\[page(\d+)\]\]', text)]
clean_text = re.sub(r'\[\[page\d+\]\]', '', text)
Expand Down
2 changes: 1 addition & 1 deletion src/gui_tabs_settings_vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def populate_model_combobox(self):
else:
available_models.append(model)
else:
available_models.append(model) # Add non-CUDA models even if CUDA is available
available_models.append(model)
else:
if not requires_cuda:
available_models.append(model)
Expand Down
21 changes: 0 additions & 21 deletions src/module_process_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,6 @@

set_logging_level()

# warnings.filterwarnings("ignore", category=FutureWarning)
# warnings.filterwarnings("ignore", category=UserWarning)
# warnings.filterwarnings("ignore", category=DeprecationWarning)
# warnings.filterwarnings("ignore", message=".*Torch was not compiled with flash attention.*")

# datasets_logger = logging.getLogger('datasets')
# datasets_logger.setLevel(logging.WARNING)
# logging.getLogger("transformers").setLevel(logging.CRITICAL)
# logging.getLogger("transformers").setLevel(logging.ERROR)
# logging.getLogger("transformers").setLevel(logging.WARNING)
# logging.getLogger("transformers").setLevel(logging.INFO)
# logging.getLogger("transformers").setLevel(logging.DEBUG)
# logging.getLogger().setLevel(logging.WARNING)

ALLOWED_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tif', '.tiff']

current_directory = Path(__file__).parent
Expand Down Expand Up @@ -202,7 +188,6 @@ def process_single_image(self, raw_image):
model_response = full_response.split("ASSISTANT: ")[-1]
return model_response


class loader_llava_next(BaseLoader):
def initialize_model_and_tokenizer(self):
chosen_model = self.config['vision']['chosen_model']
Expand Down Expand Up @@ -247,7 +232,6 @@ def process_single_image(self, raw_image):

return model_response


class loader_falcon(BaseLoader):
def initialize_model_and_tokenizer(self):
chosen_model = self.config['vision']['chosen_model']
Expand Down Expand Up @@ -302,8 +286,6 @@ def process_single_image(self, raw_image):

return model_response



class loader_moondream(BaseLoader):
def initialize_model_and_tokenizer(self):
chosen_model = self.config['vision']['chosen_model']
Expand All @@ -329,7 +311,6 @@ def process_single_image(self, raw_image):
summary = self.model.answer_question(enc_image, "Describe what this image depicts in as much detail as possible.", self.tokenizer)
return summary


class loader_florence2(BaseLoader):
def __init__(self, config):
super().__init__(config)
Expand Down Expand Up @@ -392,7 +373,6 @@ def process_single_image(self, raw_image):

return parsed_answer['<MORE_DETAILED_CAPTION>']


class loader_phi3vision(BaseLoader):
def initialize_model_and_tokenizer(self):
chosen_model = self.config['vision']['chosen_model']
Expand Down Expand Up @@ -465,7 +445,6 @@ def process_single_image(self, raw_image):

return response


class loader_minicpm_V_2_6(BaseLoader):
def initialize_model_and_tokenizer(self):
chosen_model = self.config['vision']['chosen_model']
Expand Down
21 changes: 10 additions & 11 deletions src/module_transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,23 +112,22 @@ def convert_to_wav(self, audio_file):
output_file = f"{Path(audio_file).stem}_converted.wav"
output_path = Path(__file__).parent / output_file

with av.open(audio_file) as container:
stream = next(s for s in container.streams if s.type == 'audio')

resampler = av.AudioResampler(
format='s16',
layout='mono',
rate=16000,
)
with av.open(audio_file) as input_container:
input_stream = input_container.streams.audio[0]

output_container = av.open(str(output_path), mode='w')
output_stream = output_container.add_stream('pcm_s16le', rate=16000)
output_stream.layout = 'mono'
output_stream.channels = 1

resampler = av.AudioResampler(format='s16', layout='mono', rate=16000)

# Determine optimal chunk size (adjust as needed)
chunk_size = 1024 * 32 # 32KB chunks

for frame in container.decode(audio=0):
for frame in input_container.decode(audio=0):
frame.pts = None
resampled_frames = resampler.resample(frame)
if resampled_frames is not None:
if resampled_frames:
for resampled_frame in resampled_frames:
for packet in output_stream.encode(resampled_frame):
output_container.mux(packet)
Expand Down
7 changes: 2 additions & 5 deletions src/setup_windows.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,6 @@ def install_libraries(libraries):
"protobuf==5.27.2",
"psutil==6.0.0",
"pyarrow==17.0.0",
"pyarrow-hotfix==0.6",
"pycparser==2.22",
"pydantic==2.7.4",
"pydantic_core==2.18.4",
Expand Down Expand Up @@ -317,10 +316,8 @@ def install_libraries(libraries):
"zipp==3.19.2"
]

# matplotlib==3.9.2
# pyparsing==3.1.2
# cycler==0.12.1
# kiwisolver==1.4.5
# pip install matplotlib==3.9.2 pyparsing==3.1.2 cycler==0.12.1 kiwisolver==1.4.5 --no-deps
# matplotlib will still show conflicts re missing libraries, but these are not needed to run my specific plots

full_install_libraries = [
"pyside6==6.7.2",
Expand Down

0 comments on commit d4528d6

Please sign in to comment.