Skip to content

Commit

Permalink
Merge branch 'main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
letuananh authored Jan 14, 2022
2 parents 2c84ad9 + 98d00d3 commit ed888e9
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 8 deletions.
8 changes: 7 additions & 1 deletion docs/api_elan.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ ELAN module

``speach`` supports reading and manipulating multi-tier transcriptions from ELAN directly.

.. note::

For better security, ``speach`` will use the package ``defusedxml`` automatically if available to parse XML streams (instead of Python's default parser).
When ``defusedxml`` is available, the flag ``speach.elan.SAFE_MODE`` will be set to True.


For common code samples to processing ELAN, see :ref:`recipe_elan` page.

.. contents:: Table of Contents
Expand Down Expand Up @@ -55,4 +61,4 @@ or an annotation sequence in the case of symbolic subdivision tiers.

.. autoclass:: TimeSlot
:members:
:member-order: groupwise
:member-order: groupwise
9 changes: 8 additions & 1 deletion docs/updates.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,14 @@
Speach Changelog
================

Speach 01.a12
Speach 0.1a13
-------------

- 2022-01-14

- Use ``defusedxml`` automatically when available to parse XML for better security

Speach 0.1a12
-------------

- 2021-11-03
Expand Down
4 changes: 2 additions & 2 deletions speach/__version__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,6 @@
__issue__ = "https://github.com/neocl/speach/issues/"
__maintainer__ = "Le Tuan Anh"
__version_major__ = "0.1" # follow PEP-0440
__version__ = "{}a12".format(__version_major__)
__version_long__ = "{} - Alpha 12".format(__version_major__)
__version__ = "{}a13".format(__version_major__)
__version_long__ = "{} - Alpha 13".format(__version_major__)
__status__ = "3 - Alpha"
15 changes: 11 additions & 4 deletions speach/elan.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@
from collections import defaultdict as dd
from typing import List, Tuple
import xml.etree.ElementTree as etree
try:
import defusedxml.ElementTree as best_parser
SAFE_MODE = True
except ModuleNotFoundError as e:
best_parser = etree
SAFE_MODE = False

import warnings

from chirptext import DataObject
Expand Down Expand Up @@ -848,7 +855,7 @@ def parse_stream(cls, ecv_stream, *args, **kwargs):
:param ecv_stream: ECV text input stream
:rtype: speach.elan.ExternalControlledVocabResource
"""
_root = etree.fromstring(ecv_stream.read())
_root = best_parser.fromstring(ecv_stream.read())
ecv = ExternalControlledVocabResource(xml_node=_root, **kwargs)
return ecv

Expand Down Expand Up @@ -1176,8 +1183,8 @@ def to_xml_bin(self, encoding='utf-8', default_namespace=None, short_empty_eleme
:returns: EAF content
:rtype: bytes
"""
_content = etree.tostring(self.__xml_root, encoding=encoding, method="xml",
short_empty_elements=short_empty_elements, *args, **kwargs)
_content = best_parser.tostring(self.__xml_root, encoding=encoding, method="xml",
short_empty_elements=short_empty_elements, *args, **kwargs)
return _content

def to_xml_str(self, encoding='utf-8', *args, **kwargs):
Expand Down Expand Up @@ -1248,7 +1255,7 @@ def parse_eaf_stream(cls, eaf_stream, *args, **kwargs):
:param eaf_stream: EAF text input stream
:rtype: speach.elan.Doc
"""
_root = etree.fromstring(eaf_stream.read())
_root = best_parser.fromstring(eaf_stream.read())
_doc = Doc()
_doc.__xml_root = _root
_doc._update_info_xml(_root)
Expand Down
4 changes: 4 additions & 0 deletions test/test_elan.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ def read_eaf():

class TestELAN(unittest.TestCase):

def test_safe_mode(self):
print(f"ELAN safe mode: {elan.SAFE_MODE}")
print(f"ELAN XML parser: {elan.best_parser.__name__}")

def test_read_elan(self):
eaf = read_eaf()
expected_tiernames = ['Person1 (Utterance)', 'marker', 'Person1 (Chunk)', 'Person1 (ChunkLanguage)',
Expand Down

0 comments on commit ed888e9

Please sign in to comment.