From d7c75e102a6749699bd95081b4ee2e2d8f9edf76 Mon Sep 17 00:00:00 2001 From: Le Tuan Anh Date: Wed, 16 Mar 2022 14:01:35 +0800 Subject: [PATCH 1/8] add clone() function --- speach/elan.py | 4 ++++ test/test_elan.py | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/speach/elan.py b/speach/elan.py index 8be5f05..8deb46c 100644 --- a/speach/elan.py +++ b/speach/elan.py @@ -1672,6 +1672,10 @@ def save(self, path, encoding='utf-8', xml_declaration=None, *args, **kwargs) chio.write_file(path, _content, encoding=encoding) + def clone(self, *args, **kwargs): + """ Clone this ELAN object by using the save() action """ + return Doc.parse_string(self.to_xml_str()) + def cut(self, section, outfile, media_file=None): """ Cut the source media with timestamps defined in section object diff --git a/test/test_elan.py b/test/test_elan.py index aedaaad..5ac4678 100644 --- a/test/test_elan.py +++ b/test/test_elan.py @@ -463,6 +463,11 @@ def test_cv_check_add_annotation_ref(self): self.assertEqual(tl[-1].cve_ref, en_id) self.assertEqual(tl[-1].ref.value, 'test') + def test_clone(self): + eaf1 = read_eaf() + c1 = eaf1.clone() + self.assertEqual(eaf1.to_xml_str(), c1.to_xml_str()) + # ------------------------------------------------------------------------------- # MAIN From 03e6496477996dfe7585fd5937ce2a12f8a10109 Mon Sep 17 00:00:00 2001 From: Le Tuan Anh Date: Wed, 16 Mar 2022 14:23:04 +0800 Subject: [PATCH 2/8] use explicit None checking for timestamps and duration info --- speach/elan.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/speach/elan.py b/speach/elan.py index 8deb46c..94d354a 100644 --- a/speach/elan.py +++ b/speach/elan.py @@ -1636,9 +1636,9 @@ def to_csv_rows(self) -> List[List[str]]: rows = [] for tier in self.tiers(): for anno in tier.annotations: - _from_ts = f"{anno.from_ts.sec:.3f}" if anno.from_ts else '' - _to_ts = f"{anno.to_ts.sec:.3f}" if anno.to_ts else '' - _duration = f"{anno.duration:.3f}" if anno.duration else '' + _from_ts = f"{anno.from_ts.sec:.3f}" if anno.from_ts is not None else '' + _to_ts = f"{anno.to_ts.sec:.3f}" if anno.to_ts is not None else '' + _duration = f"{anno.duration:.3f}" if anno.duration is not None else '' rows.append((tier.ID, tier.participant, _from_ts, _to_ts, _duration, anno.value)) return rows From b4ef297990050e7464432cc687f063571595ffa9 Mon Sep 17 00:00:00 2001 From: Le Tuan Anh Date: Wed, 16 Mar 2022 14:23:34 +0800 Subject: [PATCH 3/8] fix Symbolic Association linking bug --- speach/elan.py | 1 + 1 file changed, 1 insertion(+) diff --git a/speach/elan.py b/speach/elan.py index 94d354a..2ed84d7 100644 --- a/speach/elan.py +++ b/speach/elan.py @@ -805,6 +805,7 @@ def new_annotation(self, value, from_ts=None, to_ts=None, ann_ref_id=None, value ann_info.set('ANNOTATION_ID', self.doc.new_annotation_id()) self.__xml_node.append(ann_node) ann_obj = self._add_annotation_xml(ann_node) + ann_obj.resolve(self.doc) self.doc._register_ann(ann_obj) return ann_obj else: From ba90080090a576b23cc7058dd0a6793d51aa273d Mon Sep 17 00:00:00 2001 From: Le Tuan Anh Date: Wed, 16 Mar 2022 14:24:01 +0800 Subject: [PATCH 4/8] infer timestamps and duratio of ref_annotations from linked annotation --- speach/elan.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/speach/elan.py b/speach/elan.py index 2ed84d7..e80cb81 100644 --- a/speach/elan.py +++ b/speach/elan.py @@ -421,11 +421,15 @@ def ref(self): @property def from_ts(self): - return self.__ref.from_ts if self.__ref is not None else None + return self.__ref.from_ts if self.ref is not None else None @property def to_ts(self): - return self.__ref.to_ts if self.__ref is not None else None + return self.__ref.to_ts if self.ref is not None else None + + @property + def duration(self): + return self.__ref.duration if self.ref is not None else None def resolve(self, elan_doc): _ref_ann = elan_doc.annotation(self.__ref_id) @@ -787,6 +791,7 @@ def new_annotation(self, value, from_ts=None, to_ts=None, ann_ref_id=None, value self.__xml_node.append(ann_node) ann_obj = self._add_annotation_xml(ann_node) self.doc._register_ann(ann_obj) + ann_objs.append(ann_obj) return ann_objs # create new annotation From 8b89f99f293d652a559ae96661e668e4c65459df Mon Sep 17 00:00:00 2001 From: Le Tuan Anh Date: Wed, 16 Mar 2022 14:25:07 +0800 Subject: [PATCH 5/8] add time info inference test case (multi-level linking) --- test/test_elan.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/test/test_elan.py b/test/test_elan.py index 5ac4678..2655f9e 100644 --- a/test/test_elan.py +++ b/test/test_elan.py @@ -463,6 +463,31 @@ def test_cv_check_add_annotation_ref(self): self.assertEqual(tl[-1].cve_ref, en_id) self.assertEqual(tl[-1].ref.value, 'test') + def test_ref_time_info(self): + eaf = elan.create() + eaf.new_linguistic_type('Utterance') + eaf.new_linguistic_type('Translate', 'Symbolic_Association') + eaf.new_linguistic_type('TranslateType', 'Symbolic_Association') + tu = eaf.new_tier('Baby (Utterance)', 'Utterance') + tt = eaf.new_tier('Baby (Translate)', 'Translate', 'Baby (Utterance)') + ttt = eaf.new_tier('Baby (TranslateType)', 'TranslateType', 'Baby (Translate)') + ann = tu.new_annotation('ano ringo tabetai', + elan.ts2msec("00:00:01.123"), + elan.ts2msec("00:00:02.456")) + ann_t = tt.new_annotation('(I) want to eat that apple', ann_ref_id=ann.ID) + ann_tt = ttt.new_annotation('mock', ann_ref_id=ann_t.ID) + self.assertIsNotNone(ann_t.ref) + self.assertIsNotNone(ann_tt.ref) + self.assertEqual(ann_tt.from_ts, elan.ts2msec('00:00:01.123')) + self.assertEqual(ann_tt.to_ts, elan.ts2msec('00:00:02.456')) + self.assertEqual(ann_tt.duration, 1.333) + eaf = eaf.clone() + expected = [[('ano ringo tabetai', 1123, 2456, 1.333)], + [('(I) want to eat that apple', 1123, 2456, 1.333)], + [('mock', 1123, 2456, 1.333)]] + actual = [[(u.value, u.from_ts.value, u.to_ts.value, u.duration) for u in t] for t in eaf if len(t)] + self.assertEqual(expected, actual) + def test_clone(self): eaf1 = read_eaf() c1 = eaf1.clone() From 7b0e3d5d18c4e6fa5f37abf77cb660e87440d1d9 Mon Sep 17 00:00:00 2001 From: Le Tuan Anh Date: Thu, 17 Mar 2022 15:49:18 +0800 Subject: [PATCH 6/8] maintenance version speach-0.1a15.post1 --- speach/__version__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/speach/__version__.py b/speach/__version__.py index c98acc9..7206b0d 100644 --- a/speach/__version__.py +++ b/speach/__version__.py @@ -14,6 +14,6 @@ __issue__ = "https://github.com/neocl/speach/issues/" __maintainer__ = "Le Tuan Anh" __version_major__ = "0.1" # follow PEP-0440 -__version__ = "{}a15".format(__version_major__) -__version_long__ = "{} - Alpha 15".format(__version_major__) +__version__ = "{}a15.post1".format(__version_major__) +__version_long__ = "{} - Alpha 15.post1".format(__version_major__) __status__ = "3 - Alpha" From ab441a6ccaf65c8606b7d2f41a830ff5c67becbb Mon Sep 17 00:00:00 2001 From: Le Tuan Anh Date: Thu, 17 Mar 2022 16:05:50 +0800 Subject: [PATCH 7/8] lgtm --- speach/elan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/speach/elan.py b/speach/elan.py index ae67796..bd9373f 100644 --- a/speach/elan.py +++ b/speach/elan.py @@ -85,8 +85,8 @@ def _xml_tostring(root, encoding='utf-8', """ [Internal] Generate XML content as bytes """ if XML_PARSER == 'lxml': # short_empty_elements is not supported + kwargs['pretty_print'] = pretty_print return etree.tostring(root, encoding=encoding, - pretty_print=pretty_print, *args, **kwargs) else: # does not support pretty_print From d1add015f2d5d8c73733df0d7d0723b31c6ce45d Mon Sep 17 00:00:00 2001 From: Le Tuan Anh Date: Thu, 17 Mar 2022 16:08:29 +0800 Subject: [PATCH 8/8] update TSV expectation --- test/data/test.eaf.tsv | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/test/data/test.eaf.tsv b/test/data/test.eaf.tsv index c0b3113..e8e8441 100644 --- a/test/data/test.eaf.tsv +++ b/test/data/test.eaf.tsv @@ -21,23 +21,23 @@ "Person1 (Chunk)" "P001" "13.915" "14.711" "0.796" "おいしい" "Person1 (Chunk)" "P001" "14.882" "15.908" "1.026" "means ""delicious""" "Person1 (Chunk)" "P001" "16.615" "17.485" "0.870" "oh thanks" -"Person1 (ChunkLanguage)" "P001" "1.040" "2.330" "" "en" -"Person1 (ChunkLanguage)" "P001" "3.200" "5.050" "" "jp" -"Person1 (ChunkLanguage)" "P001" "5.510" "6.350" "" "en" -"Person1 (ChunkLanguage)" "P001" "7.070" "9.390" "" "en" -"Person1 (ChunkLanguage)" "P001" "9.731" "10.281" "" "jp" -"Person1 (ChunkLanguage)" "P001" "10.554" "11.240" "" "en" -"Person1 (ChunkLanguage)" "P001" "11.870" "12.303" "" "jp" -"Person1 (ChunkLanguage)" "P001" "12.498" "13.041" "" "en" -"Person1 (ChunkLanguage)" "P001" "13.660" "13.915" "" "en" -"Person1 (ChunkLanguage)" "P001" "13.915" "14.711" "" "jp" -"Person1 (ChunkLanguage)" "P001" "14.882" "15.908" "" "en" -"Person1 (ChunkLanguage)" "P001" "16.615" "17.485" "" "en" -"Person1 (Language)" "P001" "1.040" "2.330" "" "en" -"Person1 (Language)" "P001" "3.200" "5.050" "" "jp" -"Person1 (Language)" "P001" "5.510" "6.350" "" "en" -"Person1 (Language)" "P001" "7.070" "9.390" "" "en" -"Person1 (Language)" "P001" "9.670" "11.340" "" "en" -"Person1 (Language)" "P001" "11.780" "13.110" "" "en" -"Person1 (Language)" "P001" "13.490" "16.090" "" "en" -"Person1 (Language)" "P001" "16.615" "17.485" "" "en" +"Person1 (ChunkLanguage)" "P001" "1.040" "2.330" "1.290" "en" +"Person1 (ChunkLanguage)" "P001" "3.200" "5.050" "1.850" "jp" +"Person1 (ChunkLanguage)" "P001" "5.510" "6.350" "0.840" "en" +"Person1 (ChunkLanguage)" "P001" "7.070" "9.390" "2.320" "en" +"Person1 (ChunkLanguage)" "P001" "9.731" "10.281" "0.550" "jp" +"Person1 (ChunkLanguage)" "P001" "10.554" "11.240" "0.686" "en" +"Person1 (ChunkLanguage)" "P001" "11.870" "12.303" "0.433" "jp" +"Person1 (ChunkLanguage)" "P001" "12.498" "13.041" "0.543" "en" +"Person1 (ChunkLanguage)" "P001" "13.660" "13.915" "0.255" "en" +"Person1 (ChunkLanguage)" "P001" "13.915" "14.711" "0.796" "jp" +"Person1 (ChunkLanguage)" "P001" "14.882" "15.908" "1.026" "en" +"Person1 (ChunkLanguage)" "P001" "16.615" "17.485" "0.870" "en" +"Person1 (Language)" "P001" "1.040" "2.330" "1.290" "en" +"Person1 (Language)" "P001" "3.200" "5.050" "1.850" "jp" +"Person1 (Language)" "P001" "5.510" "6.350" "0.840" "en" +"Person1 (Language)" "P001" "7.070" "9.390" "2.320" "en" +"Person1 (Language)" "P001" "9.670" "11.340" "1.670" "en" +"Person1 (Language)" "P001" "11.780" "13.110" "1.330" "en" +"Person1 (Language)" "P001" "13.490" "16.090" "2.600" "en" +"Person1 (Language)" "P001" "16.615" "17.485" "0.870" "en"