diff --git a/batchalign/formats/chat/parser.py b/batchalign/formats/chat/parser.py index acda930..104ba7c 100644 --- a/batchalign/formats/chat/parser.py +++ b/batchalign/formats/chat/parser.py @@ -131,6 +131,7 @@ def chat_parse_utterance(text, mor, gra, wor, additional): if (len(phonated_words) > 0 and phonated_words[-1][1][1] == TokenType.PUNCT and # because we don't track last ending PUNCT (len(phonated_words)-1 != len(wor))) and (len(phonated_words) != len(wor)): + breakpoint() raise CHATValidationException(f"Lengths of main and wor tiers are unaligned: lens main (filtered for phonation)={len(phonated_words)} wor={len(wor)}; line: '{text}'") # insert morphology into the parsed forms diff --git a/batchalign/version b/batchalign/version index 290543c..69889a4 100644 --- a/batchalign/version +++ b/batchalign/version @@ -1,3 +1,3 @@ -0.4.0-alpha.19.post2 +0.4.0-alpha.21 Jan 17th, 2024 Fix some multilingual tagging issues \ No newline at end of file diff --git a/scratchpad.py b/scratchpad.py index 1d8caf4..c90cb5c 100644 --- a/scratchpad.py +++ b/scratchpad.py @@ -83,6 +83,7 @@ # raise e + # pipeline = BatchalignPipeline.new("fa", lang="eng", num_speakers=len(doc.tiers)) # doc = pipeline(doc) @@ -125,16 +126,16 @@ ########## The Batchalign CHAT Test Tarness ########## -# from batchalign.formats.chat.parser import chat_parse_utterance -# from batchalign.formats.chat.lexer import lex +from batchalign.formats.chat.parser import chat_parse_utterance +from batchalign.formats.chat.lexer import lex -# from batchalign.formats.chat.utils import annotation_clean +from batchalign.formats.chat.utils import annotation_clean -# main = "+< <太 高 了> [/] 太 高 了 . •125000_126823•" -# mor = None -# gra = None +main = " [//] [//] and the boy heard a crying sound so he look back and said ." +mor = None +gra = None -# chat_parse_utterance(main, mor, gra, None, None) +chat_parse_utterance(main, mor, gra, None, None)