Skip to content

Commit

Permalink
Merge pull request #33 from letuananh/main
Browse files Browse the repository at this point in the history
speach-0.1a15.post1 - maintenance release
  • Loading branch information
letuananh authored Mar 17, 2022
2 parents 9b24aa2 + 0653b53 commit f6018b5
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 28 deletions.
4 changes: 2 additions & 2 deletions speach/__version__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,6 @@
__issue__ = "https://github.com/neocl/speach/issues/"
__maintainer__ = "Le Tuan Anh"
__version_major__ = "0.1" # follow PEP-0440
__version__ = "{}a15".format(__version_major__)
__version_long__ = "{} - Alpha 15".format(__version_major__)
__version__ = "{}a15.post1".format(__version_major__)
__version_long__ = "{} - Alpha 15.post1".format(__version_major__)
__status__ = "3 - Alpha"
22 changes: 16 additions & 6 deletions speach/elan.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ def _xml_tostring(root, encoding='utf-8',
""" [Internal] Generate XML content as bytes """
if XML_PARSER == 'lxml':
# short_empty_elements is not supported
kwargs['pretty_print'] = pretty_print
return etree.tostring(root, encoding=encoding,
pretty_print=pretty_print,
*args, **kwargs)
else:
# does not support pretty_print
Expand Down Expand Up @@ -424,11 +424,15 @@ def ref(self):

@property
def from_ts(self):
return self.__ref.from_ts if self.__ref is not None else None
return self.__ref.from_ts if self.ref is not None else None

@property
def to_ts(self):
return self.__ref.to_ts if self.__ref is not None else None
return self.__ref.to_ts if self.ref is not None else None

@property
def duration(self):
return self.__ref.duration if self.ref is not None else None

def resolve(self, elan_doc):
_ref_ann = elan_doc.annotation(self.__ref_id)
Expand Down Expand Up @@ -790,6 +794,7 @@ def new_annotation(self, value, from_ts=None, to_ts=None, ann_ref_id=None, value
self.__xml_node.append(ann_node)
ann_obj = self._add_annotation_xml(ann_node)
self.doc._register_ann(ann_obj)

ann_objs.append(ann_obj)
return ann_objs
# create new annotation
Expand All @@ -808,6 +813,7 @@ def new_annotation(self, value, from_ts=None, to_ts=None, ann_ref_id=None, value
ann_info.set('ANNOTATION_ID', self.doc.new_annotation_id())
self.__xml_node.append(ann_node)
ann_obj = self._add_annotation_xml(ann_node)
ann_obj.resolve(self.doc)
self.doc._register_ann(ann_obj)
return ann_obj
else:
Expand Down Expand Up @@ -1639,9 +1645,9 @@ def to_csv_rows(self) -> List[List[str]]:
rows = []
for tier in self.tiers():
for anno in tier.annotations:
_from_ts = f"{anno.from_ts.sec:.3f}" if anno.from_ts else ''
_to_ts = f"{anno.to_ts.sec:.3f}" if anno.to_ts else ''
_duration = f"{anno.duration:.3f}" if anno.duration else ''
_from_ts = f"{anno.from_ts.sec:.3f}" if anno.from_ts is not None else ''
_to_ts = f"{anno.to_ts.sec:.3f}" if anno.to_ts is not None else ''
_duration = f"{anno.duration:.3f}" if anno.duration is not None else ''
rows.append((tier.ID, tier.participant, _from_ts, _to_ts, _duration, anno.value))
return rows

Expand Down Expand Up @@ -1675,6 +1681,10 @@ def save(self, path, encoding='utf-8', xml_declaration=None,
*args, **kwargs)
chio.write_file(path, _content, encoding=encoding)

def clone(self, *args, **kwargs):
""" Clone this ELAN object by using the save() action """
return Doc.parse_string(self.to_xml_str())

def cut(self, section, outfile, media_file=None):
""" Cut the source media with timestamps defined in section object
Expand Down
40 changes: 20 additions & 20 deletions test/data/test.eaf.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -21,23 +21,23 @@
"Person1 (Chunk)" "P001" "13.915" "14.711" "0.796" "おいしい"
"Person1 (Chunk)" "P001" "14.882" "15.908" "1.026" "means ""delicious"""
"Person1 (Chunk)" "P001" "16.615" "17.485" "0.870" "oh thanks"
"Person1 (ChunkLanguage)" "P001" "1.040" "2.330" "" "en"
"Person1 (ChunkLanguage)" "P001" "3.200" "5.050" "" "jp"
"Person1 (ChunkLanguage)" "P001" "5.510" "6.350" "" "en"
"Person1 (ChunkLanguage)" "P001" "7.070" "9.390" "" "en"
"Person1 (ChunkLanguage)" "P001" "9.731" "10.281" "" "jp"
"Person1 (ChunkLanguage)" "P001" "10.554" "11.240" "" "en"
"Person1 (ChunkLanguage)" "P001" "11.870" "12.303" "" "jp"
"Person1 (ChunkLanguage)" "P001" "12.498" "13.041" "" "en"
"Person1 (ChunkLanguage)" "P001" "13.660" "13.915" "" "en"
"Person1 (ChunkLanguage)" "P001" "13.915" "14.711" "" "jp"
"Person1 (ChunkLanguage)" "P001" "14.882" "15.908" "" "en"
"Person1 (ChunkLanguage)" "P001" "16.615" "17.485" "" "en"
"Person1 (Language)" "P001" "1.040" "2.330" "" "en"
"Person1 (Language)" "P001" "3.200" "5.050" "" "jp"
"Person1 (Language)" "P001" "5.510" "6.350" "" "en"
"Person1 (Language)" "P001" "7.070" "9.390" "" "en"
"Person1 (Language)" "P001" "9.670" "11.340" "" "en"
"Person1 (Language)" "P001" "11.780" "13.110" "" "en"
"Person1 (Language)" "P001" "13.490" "16.090" "" "en"
"Person1 (Language)" "P001" "16.615" "17.485" "" "en"
"Person1 (ChunkLanguage)" "P001" "1.040" "2.330" "1.290" "en"
"Person1 (ChunkLanguage)" "P001" "3.200" "5.050" "1.850" "jp"
"Person1 (ChunkLanguage)" "P001" "5.510" "6.350" "0.840" "en"
"Person1 (ChunkLanguage)" "P001" "7.070" "9.390" "2.320" "en"
"Person1 (ChunkLanguage)" "P001" "9.731" "10.281" "0.550" "jp"
"Person1 (ChunkLanguage)" "P001" "10.554" "11.240" "0.686" "en"
"Person1 (ChunkLanguage)" "P001" "11.870" "12.303" "0.433" "jp"
"Person1 (ChunkLanguage)" "P001" "12.498" "13.041" "0.543" "en"
"Person1 (ChunkLanguage)" "P001" "13.660" "13.915" "0.255" "en"
"Person1 (ChunkLanguage)" "P001" "13.915" "14.711" "0.796" "jp"
"Person1 (ChunkLanguage)" "P001" "14.882" "15.908" "1.026" "en"
"Person1 (ChunkLanguage)" "P001" "16.615" "17.485" "0.870" "en"
"Person1 (Language)" "P001" "1.040" "2.330" "1.290" "en"
"Person1 (Language)" "P001" "3.200" "5.050" "1.850" "jp"
"Person1 (Language)" "P001" "5.510" "6.350" "0.840" "en"
"Person1 (Language)" "P001" "7.070" "9.390" "2.320" "en"
"Person1 (Language)" "P001" "9.670" "11.340" "1.670" "en"
"Person1 (Language)" "P001" "11.780" "13.110" "1.330" "en"
"Person1 (Language)" "P001" "13.490" "16.090" "2.600" "en"
"Person1 (Language)" "P001" "16.615" "17.485" "0.870" "en"
30 changes: 30 additions & 0 deletions test/test_elan.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,36 @@ def test_cv_check_add_annotation_ref(self):
self.assertEqual(tl[-1].cve_ref, en_id)
self.assertEqual(tl[-1].ref.value, 'test')

def test_ref_time_info(self):
eaf = elan.create()
eaf.new_linguistic_type('Utterance')
eaf.new_linguistic_type('Translate', 'Symbolic_Association')
eaf.new_linguistic_type('TranslateType', 'Symbolic_Association')
tu = eaf.new_tier('Baby (Utterance)', 'Utterance')
tt = eaf.new_tier('Baby (Translate)', 'Translate', 'Baby (Utterance)')
ttt = eaf.new_tier('Baby (TranslateType)', 'TranslateType', 'Baby (Translate)')
ann = tu.new_annotation('ano ringo tabetai',
elan.ts2msec("00:00:01.123"),
elan.ts2msec("00:00:02.456"))
ann_t = tt.new_annotation('(I) want to eat that apple', ann_ref_id=ann.ID)
ann_tt = ttt.new_annotation('mock', ann_ref_id=ann_t.ID)
self.assertIsNotNone(ann_t.ref)
self.assertIsNotNone(ann_tt.ref)
self.assertEqual(ann_tt.from_ts, elan.ts2msec('00:00:01.123'))
self.assertEqual(ann_tt.to_ts, elan.ts2msec('00:00:02.456'))
self.assertEqual(ann_tt.duration, 1.333)
eaf = eaf.clone()
expected = [[('ano ringo tabetai', 1123, 2456, 1.333)],
[('(I) want to eat that apple', 1123, 2456, 1.333)],
[('mock', 1123, 2456, 1.333)]]
actual = [[(u.value, u.from_ts.value, u.to_ts.value, u.duration) for u in t] for t in eaf if len(t)]
self.assertEqual(expected, actual)

def test_clone(self):
eaf1 = read_eaf()
c1 = eaf1.clone()
self.assertEqual(eaf1.to_xml_str(), c1.to_xml_str())


# -------------------------------------------------------------------------------
# MAIN
Expand Down

0 comments on commit f6018b5

Please sign in to comment.