logging? pls?

natsukashiixo · Oct 5, 2023 · 1edfeb6 · 1edfeb6
1 parent aee1794
commit 1edfeb6
Show file tree

Hide file tree

Showing 14 changed files with 622 additions and 495 deletions.
diff --git a/app/src/auto_specs.spec b/app/src/auto_specs.spec
@@ -9,7 +9,7 @@ a = Analysis(
     pathex=['./', './app/src/modules'],
     binaries=[],
     datas=[],
-    hiddenimports = ['app.src.modules.functions_ui', 'app.src.modules.delete_files', 'app.src.modules.rewrite_docx', 'app.src.modules.fix_mistakes', 'app.src.modules.hocr_parser', 'app.src.modules.run_tesseract', 'app.src.modules.rotate_and_split_image', 'app.src.modules.doublepage_img_rename', 'app.src.modules.singlepage_img_rename', 'app.src.modules.verify_folders', 'app.src.modules.setup_functions', 'app.src.modules.is_image'],
+    hiddenimports = ['app.src.modules.functions_ui', 'app.src.modules.delete_files', 'app.src.modules.rewrite_docx', 'app.src.modules.fix_mistakes', 'app.src.modules.hocr_parser', 'app.src.modules.run_tesseract', 'app.src.modules.rotate_and_split_image', 'app.src.modules.doublepage_img_rename', 'app.src.modules.singlepage_img_rename', 'app.src.modules.verify_folders', 'app.src.modules.setup_functions', 'app.src.modules.is_image', 'app.src.modules.logger_mod'],
     hookspath=[],
     hooksconfig={},
     runtime_hooks=[],

diff --git a/app/src/modules/delete_files.py b/app/src/modules/delete_files.py
@@ -1,32 +1,37 @@
 import os
 from pathlib import Path
 from app.src.modules.is_image import is_image as IsImage
+from app.src.modules.logger_mod import write_log as WriteLog
 
 rootfolder = './'
 txt_file = Path('./TesseractOutput/less_mistakes.txt')
 docx_file = Path('./TesseractOutput/to_be_parsed.docx')
 
 def delete_irrelevant_files(rootfolder=rootfolder, txt_file=txt_file, docx_file=docx_file):
-    if txt_file.is_file():
-        txt_file.unlink(missing_ok=True)
-    if docx_file.is_file():
-        docx_file.unlink(missing_ok=True)
-    # Create a list of all visible files
-    allfiles = []
-    excluded_folders = {'.', 'assets', '__'}
-    for foldername, subfolders, filenames in os.walk(rootfolder):
-        subfolders[:] = [subfolder for subfolder in subfolders if not subfolder.startswith(tuple(excluded_folders))]
-        for filename in filenames:
-            #print(f'cwd is {foldername}') # debug statement
-            if not filename.startswith('.'):
-                allfiles.append(Path(foldername) / filename)
-    for file in allfiles:
-        if IsImage(file):
-            Path.unlink(file)
-        if str(file).endswith('.xml'):
-            Path.unlink(file)
-
-    print('Images and hOCR data deleted.')
+    try:
+        if txt_file.is_file():
+            txt_file.unlink(missing_ok=True)
+        if docx_file.is_file():
+            docx_file.unlink(missing_ok=True)
+        # Create a list of all visible files
+        allfiles = []
+        excluded_folders = {'.', 'assets', '__'}
+        for foldername, subfolders, filenames in os.walk(rootfolder):
+            subfolders[:] = [subfolder for subfolder in subfolders if not subfolder.startswith(tuple(excluded_folders))]
+            for filename in filenames:
+                #print(f'cwd is {foldername}') # debug statement
+                if not filename.startswith('.'):
+                    allfiles.append(Path(foldername) / filename)
+        for file in allfiles:
+            if IsImage(file):
+                Path.unlink(file)
+            if str(file).endswith('.xml'):
+                Path.unlink(file)
+
+        print('Images and hOCR data deleted.')
+    except Exception as e:
+        WriteLog(e)
+
 
 if __name__ == "__main__":
     delete_irrelevant_files(rootfolder, txt_file, docx_file)
diff --git a/app/src/modules/doublepage_img_rename.py b/app/src/modules/doublepage_img_rename.py
@@ -1,68 +1,73 @@
 from pathlib import Path
-from app.src.modules.is_image import is_image as IsImage
+import re
+from natsort import os_sorted
 import os
 import shutil
+from app.src.modules.is_image import is_image as IsImage
 from app.src.modules.functions_ui import ProgressCounter
-import re
-from natsort import os_sorted
+from app.src.modules.logger_mod import write_log as WriteLog
 
 rootfolder = './ImportFolder' 
 destination = Path('./SplitterInput/')
 
 def double_rename(rootfolder=rootfolder, destination=destination):
-    allfiles = list(Path(rootfolder).rglob('*.*'))
-    ImageList = []
-    RenamedFiles = []
-    Counter = '0001'
-    has_numbers = re.compile(r'[0-9]')
+    try:
+        allfiles = list(Path(rootfolder).rglob('*.*'))
+        ImageList = []
+        RenamedFiles = []
+        Counter = '0001'
+        has_numbers = re.compile(r'[0-9]')
+
+        if all(has_numbers.search(str(file)) for file in allfiles):
+            print('All images are numbered, using Windows sorting')
+            ImageList = [file for file in allfiles if IsImage(file)]
+            ImageList = os_sorted(ImageList)
+
+        else:
+            print("Images aren't ordered, ordering based on file creation date")
+            try:
+                for file in allfiles:
+                    if IsImage(file): 
+                        os.path.getctime(file)
+                        #print(file, 'was created at:', os.path.getctime(file), 'in UNIX time') # Read file creation time for each image file using os.path.getctime
+                        ImageList.append(file)
+                    else:
+                        print(file, "is not an image")
+            except BaseException as error:
+                    print('An exception occurred while processing {}: {}'.format(file, error))
+            ImageList = sorted(ImageList, key=os.path.getctime)
 
-    if all(has_numbers.search(str(file)) for file in allfiles):
-        print('All images are numbered, using Windows sorting')
-        ImageList = [file for file in allfiles if IsImage(file)]
-        ImageList = os_sorted(ImageList)
+        # Rename each file sequentially using the order found in ImageList, incrementing Counter by 1 with each file read
 
-    else:
-        print("Images aren't ordered, ordering based on file creation date")
-        try:
-            for file in allfiles:
-                if IsImage(file): 
-                    os.path.getctime(file)
-                    #print(file, 'was created at:', os.path.getctime(file), 'in UNIX time') # Read file creation time for each image file using os.path.getctime
-                    ImageList.append(file)
-                else:
-                    print(file, "is not an image")
-        except BaseException as error:
-                print('An exception occurred while processing {}: {}'.format(file, error))
-        ImageList = sorted(ImageList, key=os.path.getctime)
-
-    # Rename each file sequentially using the order found in ImageList, incrementing Counter by 1 with each file read
-
-    files = list(filter(IsImage, allfiles))
-
-    with ProgressCounter(len(files)) as progress:
-        for file in ImageList:
+        files = list(filter(IsImage, allfiles))
+
+        with ProgressCounter(len(files)) as progress:
+            for file in ImageList:
+                try:
+                #print(file, os.path.getctime(file)) #Debug statement
+                    ext = os.path.splitext(file)[1] 
+                    NewFile = os.path.join(rootfolder, f"{Counter}{ext}") 
+                #print(file, 'saved as:', NewFile)
+                    shutil.copy(file, NewFile) #Creates a copy of the original file with a new name and metadata
+                    Counter = '{:04d}'.format(int(Counter) + 1)
+                    RenamedFiles.append(NewFile)
+                    progress.update_progress()
+                except BaseException as error:
+                    print('An exception occurred while processing {}: {}'.format(file, error))
+            progress.finalize()
+
+        # Move processed files to the image splitting folder
+        for file in RenamedFiles:
             try:
-            #print(file, os.path.getctime(file)) #Debug statement
-                ext = os.path.splitext(file)[1] 
-                NewFile = os.path.join(rootfolder, f"{Counter}{ext}") 
-            #print(file, 'saved as:', NewFile)
-                shutil.copy(file, NewFile) #Creates a copy of the original file with a new name and metadata
-                Counter = '{:04d}'.format(int(Counter) + 1)
-                RenamedFiles.append(NewFile)
-                progress.update_progress()
+                #print(RenamedFiles) #Debug to make sure the right file was added to this list
+                #print(file, 'saved to:', destination)
+                shutil.move(file, destination) 
             except BaseException as error:
                 print('An exception occurred while processing {}: {}'.format(file, error))
-        progress.finalize()
-
-    # Move processed files to the image splitting folder
-    for file in RenamedFiles:
-        try:
-            #print(RenamedFiles) #Debug to make sure the right file was added to this list
-            #print(file, 'saved to:', destination)
-            shutil.move(file, destination) 
-        except BaseException as error:
-            print('An exception occurred while processing {}: {}'.format(file, error))
-    print(f'{len(RenamedFiles)} images renamed and moved')
+        print(f'{len(RenamedFiles)} images renamed and moved')
+    except Exception as e:
+        WriteLog(e)
+
 
 if __name__ == "__main__":
     double_rename(rootfolder, destination)
diff --git a/app/src/modules/fix_mistakes.py b/app/src/modules/fix_mistakes.py
@@ -1,74 +1,79 @@
 import docx2txt
 from pathlib import Path
 import re
+from app.src.modules.logger_mod import write_log as WriteLog
 
 input_file = Path('./TesseractOutput/to_be_parsed.docx')
 output_file = Path('./TesseractOutput/less_mistakes.txt')
 
 def regex_corrector(input_file=input_file, output_file=output_file):
-    print('Starting regex correction of hOCR data')
-    linebreak = re.compile('[a-z]- ')
-    pagenumber = re.compile('(\n\d \n)|(\n\d\d \n)|(\n\d\d\d \n)')    
-    stupid_french_e_l = re.compile('è')
-    stupid_french_e_u = re.compile('È')
-
-    text = docx2txt.process(input_file)
-
-    mistakes_no = 0
-
-    updated_text = ""
-
-    with open(output_file, 'wt') as f:
-        f.write(text)
-
-    with open(output_file, 'rt') as f: #Running a loop to count the number of mistakes because I'm inefficient
-        text = f.read()
-        for sentence in re.split('[.]', text):
-            lm = linebreak.search(sentence)
-            pm = pagenumber.search(sentence)
-            sfelm = stupid_french_e_l.search(sentence)
-            sfeum = stupid_french_e_u.search(sentence)
-            if lm:
-                mistakes_no += 1
-            if pm:
-                mistakes_no += 1
-            if sfelm:
-                mistakes_no += 1
-            if sfeum:
-                mistakes_no += 1
-            else:
-                pass
-        f.close()
-
-        with open(output_file, 'rt') as f:
+    try:
+        print('Starting regex correction of hOCR data')
+        linebreak = re.compile('[a-z]- ')
+        pagenumber = re.compile('(\n\d \n)|(\n\d\d \n)|(\n\d\d\d \n)')    
+        stupid_french_e_l = re.compile('è')
+        stupid_french_e_u = re.compile('È')
+
+        text = docx2txt.process(input_file)
+
+        mistakes_no = 0
+
+        updated_text = ""
+
+        with open(output_file, 'wt') as f:
+            f.write(text)
+
+        with open(output_file, 'rt') as f: #Running a loop to count the number of mistakes because I'm inefficient
             text = f.read()
-            for sentence in re.split('[.]', text): #Iterates through sentences and replaces regex matches
+            for sentence in re.split('[.]', text):
                 lm = linebreak.search(sentence)
                 pm = pagenumber.search(sentence)
                 sfelm = stupid_french_e_l.search(sentence)
                 sfeum = stupid_french_e_u.search(sentence)
                 if lm:
-                    re_match = lm.group()
-                    sentence = sentence.replace('- ', '')
+                    mistakes_no += 1
                 if pm:
-                    re_match = pm.group()
-                    sentence = sentence.replace(re_match, '')
+                    mistakes_no += 1
                 if sfelm:
-                    re_match = sfelm.group()
-                    sentence = sentence.replace(re_match, 'e')
+                    mistakes_no += 1
                 if sfeum:
-                    re_match = sfeum.group()
-                    sentence = sentence.replace(re_match, 'E')
+                    mistakes_no += 1
                 else:
                     pass
-                updated_text += sentence + "."
             f.close()
-
-    with open(output_file, 'wt') as f:
-        f.write(updated_text)
-        f.close()    
-
-    print(f'{mistakes_no} mistakes found and corrected')
+
+            with open(output_file, 'rt') as f:
+                text = f.read()
+                for sentence in re.split('[.]', text): #Iterates through sentences and replaces regex matches
+                    lm = linebreak.search(sentence)
+                    pm = pagenumber.search(sentence)
+                    sfelm = stupid_french_e_l.search(sentence)
+                    sfeum = stupid_french_e_u.search(sentence)
+                    if lm:
+                        re_match = lm.group()
+                        sentence = sentence.replace('- ', '')
+                    if pm:
+                        re_match = pm.group()
+                        sentence = sentence.replace(re_match, '')
+                    if sfelm:
+                        re_match = sfelm.group()
+                        sentence = sentence.replace(re_match, 'e')
+                    if sfeum:
+                        re_match = sfeum.group()
+                        sentence = sentence.replace(re_match, 'E')
+                    else:
+                        pass
+                    updated_text += sentence + "."
+                f.close()
+
+        with open(output_file, 'wt') as f:
+            f.write(updated_text)
+            f.close()    
+
+        print(f'{mistakes_no} mistakes found and corrected')
+    except Exception as e:
+        WriteLog(e)
+
 
 if __name__ == "__main__":
     regex_corrector(input_file, output_file)
diff --git a/app/src/modules/functions_ui.py b/app/src/modules/functions_ui.py
@@ -2,23 +2,32 @@
 from typing import Union
 import os
 import math
+from app.src.modules.logger_mod import write_log as WriteLog
 
 def clicked():
     print("buttonclick")
 
 def open_folder(path: Union[str, os.PathLike]):
-    realpath = os.path.realpath(path)
-    os.startfile(realpath)
+    try:
+        realpath = os.path.realpath(path)
+        os.startfile(realpath)
+    except Exception as e:
+        WriteLog(e)
+
 
 def exit_button():
     #sys.exit(app.exec_() #this probably needs to be in the actual UI file I guess
     pass
 
 def round_seconds(seconds):
-        seconds = int(seconds)
-        m, s = divmod(seconds, 60)
-        h, m = divmod(m, 60)
-        return h, m, s
+        try:
+            seconds = int(seconds)
+            m, s = divmod(seconds, 60)
+            h, m = divmod(m, 60)
+            return h, m, s
+        except Exception as e:
+            WriteLog(e)
+
 
 class ProgressCounter:
     def __init__(self, total):
@@ -33,14 +42,18 @@ def __exit__(self, exc_type, exc_val, exc_tb):
         pass
 
     def update_progress(self):
-        self.counter += 1
-        percent_done = (self.counter / self.total) * 100
-        floored_percent = math.floor(percent_done)
-        if self.total < 100:
-            print(f"{percent_done:.2f}% done")
-        elif floored_percent%5 == 0 and floored_percent != self.previous_percent:
-            self.previous_percent = floored_percent
-            print(f"{percent_done:.2f}% done")
+        try:
+            self.counter += 1
+            percent_done = (self.counter / self.total) * 100
+            floored_percent = math.floor(percent_done)
+            if self.total < 100:
+                print(f"{percent_done:.2f}% done")
+            elif floored_percent%5 == 0 and floored_percent != self.previous_percent:
+                self.previous_percent = floored_percent
+                print(f"{percent_done:.2f}% done")
+        except Exception as e:
+            WriteLog(e)
+
 
     def finalize(self):
         print('Operation completed')