Skip to content

Commit

Permalink
more patches
Browse files Browse the repository at this point in the history
  • Loading branch information
Jemoka committed Jan 18, 2024
1 parent f5f1f78 commit 9c7e84e
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 8 deletions.
1 change: 1 addition & 0 deletions batchalign/formats/chat/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ def chat_parse_utterance(text, mor, gra, wor, additional):
if (len(phonated_words) > 0 and
phonated_words[-1][1][1] == TokenType.PUNCT and # because we don't track last ending PUNCT
(len(phonated_words)-1 != len(wor))) and (len(phonated_words) != len(wor)):
breakpoint()
raise CHATValidationException(f"Lengths of main and wor tiers are unaligned: lens main (filtered for phonation)={len(phonated_words)} wor={len(wor)}; line: '{text}'")

# insert morphology into the parsed forms
Expand Down
2 changes: 1 addition & 1 deletion batchalign/version
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
0.4.0-alpha.19.post2
0.4.0-alpha.21
Jan 17th, 2024
Fix some multilingual tagging issues
15 changes: 8 additions & 7 deletions scratchpad.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@
# raise e



# pipeline = BatchalignPipeline.new("fa", lang="eng", num_speakers=len(doc.tiers))
# doc = pipeline(doc)

Expand Down Expand Up @@ -125,16 +126,16 @@

########## The Batchalign CHAT Test Tarness ##########

# from batchalign.formats.chat.parser import chat_parse_utterance
# from batchalign.formats.chat.lexer import lex
from batchalign.formats.chat.parser import chat_parse_utterance
from batchalign.formats.chat.lexer import lex

# from batchalign.formats.chat.utils import annotation_clean
from batchalign.formats.chat.utils import annotation_clean

# main = "+< <太 高 了> [/] 太 高 了 . •125000_126823•"
# mor = None
# gra = None
main = "<and &+f> [//] <and the boy was &+kr> [//] and the boy heard a crying sound so he look back and said ."
mor = None
gra = None

# chat_parse_utterance(main, mor, gra, None, None)
chat_parse_utterance(main, mor, gra, None, None)



0 comments on commit 9c7e84e

Please sign in to comment.