Merge pull request #14 from akb89/develop

- fixed bug on frameid.sh script following modifications to add suppo…
akb89 · Jun 1, 2019 · 2cea8a1 · 2cea8a1
2 parents e1bc052 + 2f79aa7
commit 2cea8a1
Show file tree

Hide file tree

Showing 5 changed files with 18 additions and 12 deletions.
diff --git a/README.md b/README.md
@@ -299,6 +299,12 @@ export save_every_k_batches=400 # for argument identification
 export num_models_to_save=60 # for argument identification
 ```
 
+### Setup SIMPLEFRAMEID
+If you intend to use SIMPLEFRAMEID for frame identification, you will need to install the following packages (on python 2.7):
+```
+pip install keras==2.0.6 lightfm==1.13 sklearn numpy==1.13.1 networkx==1.11 tensorflow==1.3.0
+```
+
 ### Using the SEMEVAL PERL evaluation scripts
 
 If you intend to use the SEMEVAL perl evaluation scripts, make sure

diff --git a/scripts/CoNLLizer.py b/scripts/CoNLLizer.py
@@ -92,7 +92,7 @@ def transform_fields(fields, prefixes = []):
     return final_fields
 
 def brown_to_conll(bdelim, sdelim, cdelim, filepath, insert_ids=False):
-    with open(filepath) as stream:
+    with open(filepath, 'r', encoding='utf-8') as stream:
         for line in stream:
             line = line.strip()
             items = re.split(sdelim, line)
@@ -109,7 +109,7 @@ def conllize(delim, fields, files, cols = [], withs = []):
     replacements = dict(zip([int(c) for c in cols], withs))
 
     with contextlib.ExitStack() as stack:
-        files = [stack.enter_context(open(fname)) for fname in files]
+        files = [stack.enter_context(open(fname, 'r', encoding='utf-8')) for fname in files]
         for lines in zip(*files):
             lines = [line.strip() for line in lines]
             lines = filter(lambda l: l != '', lines)
@@ -135,7 +135,7 @@ def flatten(delim, cdelim, fields, files, cols = [], withs = [], count_tokens =
     replacements = dict(zip([int(c) for c in cols], withs))
 
     with contextlib.ExitStack() as stack:
-        files = [stack.enter_context(open(fname)) for fname in files]
+        files = [stack.enter_context(open(fname, 'r', encoding='utf-8')) for fname in files]
 
         selection = defaultdict(list)
         for lines in zip(*files):
@@ -182,7 +182,7 @@ def apply(content):
     if is_conll:
         final_fields = transform_fields(fields)
 
-    with open(filepath) as stream:
+    with open(filepath, 'r', encoding='utf-8') as stream:
         for line in stream:
             line = line.strip()
             if is_conll:
@@ -205,8 +205,8 @@ def bios(conll_files, bios_files, fields):
     final_fields = transform_fields(fields, prefixes = ['b', 'c'])
 
     with contextlib.ExitStack() as stack:
-        conll_files = [stack.enter_context(open(fname)) for fname in conll_files]
-        bios_files = [stack.enter_context(open(fname)) for fname in bios_files]
+        conll_files = [stack.enter_context(open(fname, 'r', encoding='utf-8')) for fname in conll_files]
+        bios_files = [stack.enter_context(open(fname, 'r', encoding='utf-8')) for fname in bios_files]
 
         conll_sentences = []
         conll_sentence  = []
@@ -249,13 +249,13 @@ def bios(conll_files, bios_files, fields):
 
 def merger(conll_path, predicted_path, cplaceholder, cdelim, fdelim, ccolnum, fcolnum):
     predictions = []
-    with open(predicted_path) as fstream:
+    with open(predicted_path, 'r', encoding='utf-8') as fstream:
         for line in fstream:
             line = line.strip()
             items = line.split(fdelim)
             predictions.append(items)
 
-    with open(conll_path) as cstream:
+    with open(conll_path, 'r', encoding='utf-8') as cstream:
         anno_set = 0
         for line in cstream:
             line = line.strip()

diff --git a/scripts/frameid.sh b/scripts/frameid.sh
@@ -100,7 +100,7 @@ prepare() {
 if [ "${mode}" = train ]; then
   prepare
   echo "Training frame identification on all models..."
-  python ${SIMFRAMEID_HOME}/simpleFrameId/main.py train ${XP_DIR}/${xp}/frameid
+  python ${SIMFRAMEID_HOME}/simpleFrameId/main.py train ${XP_DIR}/${xp}/frameid deps.words.txt
   echo "Done"
 fi
 
@@ -118,7 +118,7 @@ if [ "${mode}" = decode ]; then
   esac
   prepare
   echo "Predicting frames..."
-  python ${SIMFRAMEID_HOME}/simpleFrameId/main.py decode ${XP_DIR}/${xp}/frameid
+  python ${SIMFRAMEID_HOME}/simpleFrameId/main.py decode ${XP_DIR}/${xp}/frameid deps.words.txt
   echo "Done"
   if [ "${parser}" = semafor ]; then
     cut -f 1-3 ${XP_DIR}/${xp}/data/test.frames > ${XP_DIR}/${xp}/data/test.frames.cut.1.txt

diff --git a/scripts/simpleFrameID.sh b/scripts/simpleFrameID.sh
diff --git a/setup.py b/setup.py
@@ -6,7 +6,7 @@
 
 from setuptools import setup
 
-with open('README.md', 'r') as fh:
+with open('README.md', 'r', encoding='utf-8') as fh:
     long_description = fh.read()
 
 setup(
@@ -16,7 +16,7 @@
     author_email='[email protected]',
     long_description=long_description,
     long_description_content_type='text/markdown',
-    version='1.3.0',
+    version='1.3.3',
     url='https://gitlab.com/akb89/pyfn',
     download_url='https://pypi.org/project/pyfn/#files',
     license='MIT',