Skip to content

Commit

Permalink
more slapdash asr fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Jemoka committed Jun 17, 2024
1 parent 655aea7 commit 2dece77
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 9 deletions.
7 changes: 6 additions & 1 deletion batchalign/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,9 +198,14 @@ def __str__(self):
t = self._detokenize()

t = t.replace(". . .", "+...")
t = t.replace(" ' ", "'")
t = t.replace("¿", "").replace("¡", "")
t = re.sub(r"^\+\.\.\.", "", t.strip()).strip()
t = re.sub(r"^\W+", "", t.strip()).strip()
# this is here thrice to prevent stuff from not
# matching once because .sub seems to only match once
t = re.sub(r"^[^\w\d\s<]+", "", t.strip()).strip()
t = re.sub(r"^[^\w\d\s<]+", "", t.strip()).strip()
t = re.sub(r"^[^\w\d\s<]+", "", t.strip()).strip()
t = re.sub(r",", " , ", t.strip()).strip()
t = re.sub(r" +", " ", t.strip()).strip()
return t
Expand Down
4 changes: 2 additions & 2 deletions batchalign/version
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
0.7.3-beta.8
June 14, 2024
0.7.3-beta.9
June 17, 2024
more asr fixes
12 changes: 6 additions & 6 deletions scratchpad.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@
########### The Batchalign Core Test Harness ###########
from batchalign.formats.chat.parser import chat_parse_utterance

# ng = NgramRetraceEngine()
# disf = DisfluencyReplacementEngine()
# doc = Document.new("tu dois la manger maman, maman, à fin", lang="eng")
ng = NgramRetraceEngine()
disf = DisfluencyReplacementEngine()
doc = Document.new("I ' m such an idiot", lang="eng")
# # doc[0].content[4].text = "maman,"
# # doc[0].content[5].text = "maman,"
# pipe = BatchalignPipeline(ng, disf)
# tmp = pipe(doc)
# tmp
pipe = BatchalignPipeline(ng, disf)
tmp = pipe(doc)
tmp


# tmp[0].content
Expand Down

0 comments on commit 2dece77

Please sign in to comment.