From 2f79aa772963668cd852b702ae900c613d64fd3e Mon Sep 17 00:00:00 2001 From: AKB Date: Sat, 1 Jun 2019 11:46:17 +0200 Subject: [PATCH] - fixed bug on frameid.sh script following modifications to add support for frameid.embed.sh - added specific dependencies required by SIMPLEFRAMEID in README - removed useless simpleFrameID.sh script in scripts - specified encoding in CoNLLizer file opening for robusteness on Linux --- README.md | 6 ++++++ scripts/CoNLLizer.py | 16 ++++++++-------- scripts/frameid.sh | 4 ++-- scripts/simpleFrameID.sh | 0 setup.py | 4 ++-- 5 files changed, 18 insertions(+), 12 deletions(-) delete mode 100755 scripts/simpleFrameID.sh diff --git a/README.md b/README.md index 01e95ce..1937352 100644 --- a/README.md +++ b/README.md @@ -299,6 +299,12 @@ export save_every_k_batches=400 # for argument identification export num_models_to_save=60 # for argument identification ``` +### Setup SIMPLEFRAMEID +If you intend to use SIMPLEFRAMEID for frame identification, you will need to install the following packages (on python 2.7): +``` +pip install keras==2.0.6 lightfm==1.13 sklearn numpy==1.13.1 networkx==1.11 tensorflow==1.3.0 +``` + ### Using the SEMEVAL PERL evaluation scripts If you intend to use the SEMEVAL perl evaluation scripts, make sure diff --git a/scripts/CoNLLizer.py b/scripts/CoNLLizer.py index 751a6de..33d68cb 100755 --- a/scripts/CoNLLizer.py +++ b/scripts/CoNLLizer.py @@ -92,7 +92,7 @@ def transform_fields(fields, prefixes = []): return final_fields def brown_to_conll(bdelim, sdelim, cdelim, filepath, insert_ids=False): - with open(filepath) as stream: + with open(filepath, 'r', encoding='utf-8') as stream: for line in stream: line = line.strip() items = re.split(sdelim, line) @@ -109,7 +109,7 @@ def conllize(delim, fields, files, cols = [], withs = []): replacements = dict(zip([int(c) for c in cols], withs)) with contextlib.ExitStack() as stack: - files = [stack.enter_context(open(fname)) for fname in files] + files = [stack.enter_context(open(fname, 'r', encoding='utf-8')) for fname in files] for lines in zip(*files): lines = [line.strip() for line in lines] lines = filter(lambda l: l != '', lines) @@ -135,7 +135,7 @@ def flatten(delim, cdelim, fields, files, cols = [], withs = [], count_tokens = replacements = dict(zip([int(c) for c in cols], withs)) with contextlib.ExitStack() as stack: - files = [stack.enter_context(open(fname)) for fname in files] + files = [stack.enter_context(open(fname, 'r', encoding='utf-8')) for fname in files] selection = defaultdict(list) for lines in zip(*files): @@ -182,7 +182,7 @@ def apply(content): if is_conll: final_fields = transform_fields(fields) - with open(filepath) as stream: + with open(filepath, 'r', encoding='utf-8') as stream: for line in stream: line = line.strip() if is_conll: @@ -205,8 +205,8 @@ def bios(conll_files, bios_files, fields): final_fields = transform_fields(fields, prefixes = ['b', 'c']) with contextlib.ExitStack() as stack: - conll_files = [stack.enter_context(open(fname)) for fname in conll_files] - bios_files = [stack.enter_context(open(fname)) for fname in bios_files] + conll_files = [stack.enter_context(open(fname, 'r', encoding='utf-8')) for fname in conll_files] + bios_files = [stack.enter_context(open(fname, 'r', encoding='utf-8')) for fname in bios_files] conll_sentences = [] conll_sentence = [] @@ -249,13 +249,13 @@ def bios(conll_files, bios_files, fields): def merger(conll_path, predicted_path, cplaceholder, cdelim, fdelim, ccolnum, fcolnum): predictions = [] - with open(predicted_path) as fstream: + with open(predicted_path, 'r', encoding='utf-8') as fstream: for line in fstream: line = line.strip() items = line.split(fdelim) predictions.append(items) - with open(conll_path) as cstream: + with open(conll_path, 'r', encoding='utf-8') as cstream: anno_set = 0 for line in cstream: line = line.strip() diff --git a/scripts/frameid.sh b/scripts/frameid.sh index 32d1bc7..84d6b50 100755 --- a/scripts/frameid.sh +++ b/scripts/frameid.sh @@ -100,7 +100,7 @@ prepare() { if [ "${mode}" = train ]; then prepare echo "Training frame identification on all models..." - python ${SIMFRAMEID_HOME}/simpleFrameId/main.py train ${XP_DIR}/${xp}/frameid + python ${SIMFRAMEID_HOME}/simpleFrameId/main.py train ${XP_DIR}/${xp}/frameid deps.words.txt echo "Done" fi @@ -118,7 +118,7 @@ if [ "${mode}" = decode ]; then esac prepare echo "Predicting frames..." - python ${SIMFRAMEID_HOME}/simpleFrameId/main.py decode ${XP_DIR}/${xp}/frameid + python ${SIMFRAMEID_HOME}/simpleFrameId/main.py decode ${XP_DIR}/${xp}/frameid deps.words.txt echo "Done" if [ "${parser}" = semafor ]; then cut -f 1-3 ${XP_DIR}/${xp}/data/test.frames > ${XP_DIR}/${xp}/data/test.frames.cut.1.txt diff --git a/scripts/simpleFrameID.sh b/scripts/simpleFrameID.sh deleted file mode 100755 index e69de29..0000000 diff --git a/setup.py b/setup.py index a841012..b65d492 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ from setuptools import setup -with open('README.md', 'r') as fh: +with open('README.md', 'r', encoding='utf-8') as fh: long_description = fh.read() setup( @@ -16,7 +16,7 @@ author_email='akb@3azouz.net', long_description=long_description, long_description_content_type='text/markdown', - version='1.3.0', + version='1.3.3', url='https://gitlab.com/akb89/pyfn', download_url='https://pypi.org/project/pyfn/#files', license='MIT',