-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #12 from akb89/develop
Develop
- Loading branch information
Showing
4 changed files
with
317 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
#!/bin/bash | ||
|
||
source "$(dirname "${BASH_SOURCE[0]}")/setup.sh" | ||
|
||
show_help() { | ||
cat << EOF | ||
Usage: ${0##*/} [-h] -m {train,decode} -x XP_NUM [-p {semafor,open-sesame}] | ||
Perform frame identification. | ||
-h, --help display this help and exit | ||
-m, --mode train on all models or decode using a single model | ||
-x, --xp XP_NUM xp number written as 3 digits (e.g. 001) | ||
-p, --parser {semafor,open-sesame} formalize decoded frames for specified parser | ||
-e, --embed name of embeddings to use | ||
EOF | ||
} | ||
|
||
is_xp_set=FALSE | ||
is_mode_set=FALSE | ||
is_parser_set=FALSE | ||
is_embed_set=FALSE | ||
|
||
while :; do | ||
case $1 in | ||
-h|-\?|--help) | ||
show_help | ||
exit | ||
;; | ||
-x|--xp) | ||
if [ "$2" ]; then | ||
is_xp_set=TRUE | ||
xp="xp_$2" | ||
shift | ||
else | ||
die "ERROR: '--xp' requires a non-empty option argument" | ||
fi | ||
;; | ||
-m|--mode) | ||
if [ "$2" ]; then | ||
is_mode_set=TRUE | ||
mode=$2 | ||
shift | ||
else | ||
die "ERROR: '--mode' requires a non-empty option argument" | ||
fi | ||
;; | ||
-p|--parser) | ||
if [ "$2" ]; then | ||
is_parser_set=TRUE | ||
parser=$2 | ||
shift | ||
else | ||
die "ERROR: '--parser' requires a non-empty option argument" | ||
fi | ||
;; | ||
-e|--embed) | ||
if [ "$2" ]; then | ||
is_embed_set=TRUE | ||
embed=$2 | ||
shift | ||
else | ||
die "ERROR: '--embed' requires a non-empty option argument" | ||
fi | ||
;; | ||
--) | ||
shift | ||
break | ||
;; | ||
-?*) | ||
printf 'WARN: Unknown option (ignored): %s\n' "$1" >&2 | ||
;; | ||
*) | ||
break | ||
esac | ||
shift | ||
done | ||
|
||
if [ "${is_xp_set}" = FALSE ]; then | ||
die "ERROR: '--xp' parameter is required." | ||
fi | ||
|
||
if [ "${is_mode_set}" = FALSE ]; then | ||
die "ERROR: '--mode' parameter is required." | ||
fi | ||
|
||
prepare() { | ||
echo "Preparing files for frame identification..." | ||
|
||
mkdir ${XP_DIR}/${xp}/frameid 2> /dev/null | ||
mkdir ${XP_DIR}/${xp}/frameid/data 2> /dev/null | ||
mkdir ${XP_DIR}/${xp}/frameid/data/embeddings 2> /dev/null | ||
mkdir ${XP_DIR}/${xp}/frameid/data/corpora 2> /dev/null | ||
mkdir ${XP_DIR}/${xp}/frameid/data/lexicons 2> /dev/null | ||
|
||
cp ${XP_DIR}/${xp}/data/test.frames ${XP_DIR}/${xp}/frameid/data/corpora/ | ||
cp ${XP_DIR}/${xp}/data/test.sentences.conllx ${XP_DIR}/${xp}/frameid/data/corpora/ | ||
cp ${XP_DIR}/${xp}/data/train.frame.elements ${XP_DIR}/${xp}/frameid/data/corpora/ | ||
cp ${XP_DIR}/${xp}/data/train.sentences.conllx.flattened ${XP_DIR}/${xp}/frameid/data/corpora/ | ||
|
||
cp ${RESOURCES_DIR}/${embed} ${XP_DIR}/${xp}/frameid/data/embeddings/ | ||
|
||
mv ${XP_DIR}/${xp}/frameid/data/corpora/test.frames ${XP_DIR}/${xp}/frameid/data/corpora/test.frame.elements | ||
|
||
bash ${SCRIPTS_DIR}/flatten.sh -f ${XP_DIR}/${xp}/frameid/data/corpora/test.sentences.conllx | ||
|
||
python3 ${SIMFRAMEID_HOME}/generate.py ${XP_DIR}/${xp}/frameid/data/corpora/train.frame.elements ${XP_DIR}/${xp}/frameid/data/lexicons/fn_lexicon | ||
|
||
echo "Done" | ||
} | ||
|
||
if [ "${mode}" = train ]; then | ||
prepare | ||
echo "Training frame identification on all models..." | ||
python ${SIMFRAMEID_HOME}/simpleFrameId/main.py train ${XP_DIR}/${xp}/frameid ${embed} | ||
echo "Done" | ||
fi | ||
|
||
if [ "${mode}" = decode ]; then | ||
if [ "${is_parser_set}" = FALSE ]; then | ||
die "ERROR: '--parser' parameter is required." | ||
fi | ||
case "${parser}" in | ||
semafor ) | ||
;; #fallthru | ||
open-sesame ) | ||
;; #fallthru | ||
* ) | ||
die "Invalid frame semantic parser '${parser}': Should be 'semafor' or 'open-sesame'" | ||
esac | ||
prepare | ||
echo "Predicting frames..." | ||
python ${SIMFRAMEID_HOME}/simpleFrameId/main.py decode ${XP_DIR}/${xp}/frameid ${embed} | ||
echo "Done" | ||
if [ "${parser}" = semafor ]; then | ||
cut -f 1-3 ${XP_DIR}/${xp}/data/test.frames > ${XP_DIR}/${xp}/data/test.frames.cut.1.txt | ||
cut -f 5-8 ${XP_DIR}/${xp}/data/test.frames > ${XP_DIR}/${xp}/data/test.frames.cut.2.txt | ||
paste ${XP_DIR}/${xp}/data/test.frames.cut.1.txt ${XP_DIR}/${xp}/frameid/test.frames.predicted ${XP_DIR}/${xp}/data/test.frames.cut.2.txt | perl -pe "s/^\t+$//g" | cat -s > ${XP_DIR}/${xp}/data/test.frames | ||
rm ${XP_DIR}/${xp}/data/test.frames.cut.1.txt | ||
rm ${XP_DIR}/${xp}/data/test.frames.cut.2.txt | ||
fi | ||
if [ "${parser}" = open-sesame ]; then | ||
python3 CoNLLizer.py merger -c ${XP_DIR}/${xp}/data/test.bios.semeval -P ${XP_DIR}/${xp}/frameid/test.frames.predicted -n 14 -N 1 > ${XP_DIR}/${xp}/data/test.bios.semeval.merged | ||
mv ${XP_DIR}/${xp}/data/test.bios.semeval.merged ${XP_DIR}/${xp}/data/test.bios.semeval | ||
fi | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,170 @@ | ||
#!/bin/bash | ||
|
||
source "$(dirname "${BASH_SOURCE[0]}")/setup.sh" | ||
|
||
show_help() { | ||
cat << EOF | ||
Usage: ${0##*/} [-h] -m {train,decode} -x XP_NUM [-s {dev,test}] [-d] [-u] | ||
Train or decode with the OPEN-SESAME parser. | ||
-h, --help display this help and exit | ||
-m, --mode {train,decode} open-sesame mode to use: train or decode | ||
-x, --xp XP_NUM xp number written as 3 digits (e.g. 001) | ||
-s, --splits {dev,test} which splits to use in decode mode: dev or test | ||
-d, --with_dep_parses if specified, parser will use dependency parses | ||
-u, --with_hierarchy if specified, parser will use the hierarchy feature | ||
-e, --embed name of embeddings to use | ||
EOF | ||
} | ||
|
||
is_mode_set=FALSE | ||
is_xp_set=FALSE | ||
is_splits_set=FALSE | ||
with_dep_parses=FALSE | ||
is_embed_set=FALSE | ||
|
||
while :; do | ||
case $1 in | ||
-h|-\?|--help) | ||
show_help | ||
exit | ||
;; | ||
-m|--mode) | ||
if [ "$2" ]; then | ||
is_mode_set=TRUE | ||
mode=$2 | ||
shift | ||
else | ||
die "ERROR: '--mode' requires a non-empty option argument" | ||
fi | ||
;; | ||
-x|--xp) | ||
if [ "$2" ]; then | ||
is_xp_set=TRUE | ||
xp="xp_$2" | ||
shift | ||
else | ||
die "ERROR: '--xpdir' requires a non-empty option argument" | ||
fi | ||
;; | ||
-s|--splits) | ||
if [ "$2" ]; then | ||
is_splits_set=TRUE | ||
splits=$2 | ||
shift | ||
else | ||
die "ERROR: '--splits' requires a non-empty option argument" | ||
fi | ||
;; | ||
-d|--with_dep_parses) | ||
with_dep_parses=TRUE | ||
;; | ||
-e|--embed) | ||
if [ "$2" ]; then | ||
is_embed_set=TRUE | ||
embed=$2 | ||
shift | ||
else | ||
die "ERROR: '--embed' requires a non-empty option argument" | ||
fi | ||
;; | ||
--) | ||
shift | ||
break | ||
;; | ||
-?*) | ||
printf 'WARN: Unknown option (ignored): %s\n' "$1" >&2 | ||
;; | ||
*) | ||
break | ||
esac | ||
shift | ||
done | ||
|
||
if [ "${is_mode_set}" = FALSE ]; then | ||
die "ERROR: '--mode' parameter is required" | ||
fi | ||
|
||
if [ "${is_xp_set}" = FALSE ]; then | ||
die "ERROR: '--xp' parameter is required" | ||
fi | ||
|
||
case "${mode}" in | ||
train ) | ||
;; | ||
decode ) | ||
;; | ||
* ) | ||
die "Invalid mode '${mode}': should be 'train' or 'decode'" | ||
esac | ||
|
||
if [ "${mode}" = decode ]; then | ||
if [ "${is_splits_set}" = FALSE ]; then | ||
die "ERROR: '--splits' parameter is required for decoding" | ||
fi | ||
case "${splits}" in | ||
dev ) | ||
;; | ||
test ) | ||
;; | ||
* ) | ||
die "Invalid splits '${splits}': should be 'dev' or 'test'" | ||
esac | ||
fi | ||
|
||
mkdir ${XP_DIR}/${xp}/model 2> /dev/null | ||
|
||
postprocess_decoded_file() { | ||
BIOS_FILE=$1 | ||
DECODED_FILE=$2 | ||
OUTPUT_TMP_DIR="/tmp/biospost" | ||
|
||
rm -rf $OUTPUT_TMP_DIR 2> /dev/null | ||
mkdir $OUTPUT_TMP_DIR 2> /dev/null | ||
|
||
cut -f 1-14 ${BIOS_FILE} > ${OUTPUT_TMP_DIR}/cut.1.txt | ||
cut -f 15 ${DECODED_FILE} > ${OUTPUT_TMP_DIR}/cut.2.txt | ||
|
||
paste ${OUTPUT_TMP_DIR}/cut.1.txt ${OUTPUT_TMP_DIR}/cut.2.txt | perl -pe "s/^\t+$//g" | cat -s > ${DECODED_FILE} | ||
|
||
rm -rf $OUTPUT_TMP_DIR; | ||
} | ||
|
||
if [ "${mode}" = train ]; then | ||
if [ "${with_dep_parses}" = TRUE ]; then | ||
python ${OPEN_SESAME_HOME}/src/segrnn-argid.py \ | ||
--model ${XP_DIR}/${xp}/model/segrnn.argid.model \ | ||
--trainf ${XP_DIR}/${xp}/data/train.bios \ | ||
--devf ${XP_DIR}/${xp}/data/dev.bios \ | ||
--vecf ${RESOURCES_DIR}/${embed} \ | ||
--syn dep | ||
fi | ||
if [ "${with_dep_parses}" = FALSE ]; then | ||
python ${OPEN_SESAME_HOME}/src/segrnn-argid.py \ | ||
--model ${XP_DIR}/${xp}/model/segrnn.argid.model \ | ||
--trainf ${XP_DIR}/${xp}/data/train.bios \ | ||
--devf ${XP_DIR}/${xp}/data/dev.bios \ | ||
--vecf ${RESOURCES_DIR}/${embed} | ||
fi | ||
fi | ||
|
||
if [ "${mode}" = decode ]; then | ||
if [ "${with_dep_parses}" = TRUE ]; then | ||
python ${OPEN_SESAME_HOME}/src/segrnn-argid.py \ | ||
--mode test \ | ||
--model ${XP_DIR}/${xp}/model/segrnn.argid.model \ | ||
--trainf ${XP_DIR}/${xp}/data/train.bios \ | ||
--testf ${XP_DIR}/${xp}/data/${splits}.bios.semeval \ | ||
--vecf ${RESOURCES_DIR}/${embed} \ | ||
--syn dep | ||
fi | ||
if [ "${with_dep_parses}" = FALSE ]; then | ||
python ${OPEN_SESAME_HOME}/src/segrnn-argid.py \ | ||
--mode test \ | ||
--model ${XP_DIR}/${xp}/model/segrnn.argid.model \ | ||
--trainf ${XP_DIR}/${xp}/data/train.bios \ | ||
--testf ${XP_DIR}/${xp}/data/${splits}.bios.semeval \ | ||
--vecf ${RESOURCES_DIR}/${embed} | ||
fi | ||
postprocess_decoded_file ${XP_DIR}/${xp}/data/${splits}.bios.semeval ${XP_DIR}/${xp}/data/${splits}.bios.semeval.decoded | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,7 +16,7 @@ | |
author_email='[email protected]', | ||
long_description=long_description, | ||
long_description_content_type='text/markdown', | ||
version='1.2.6', | ||
version='1.3.0', | ||
url='https://gitlab.com/akb89/pyfn', | ||
download_url='https://pypi.org/project/pyfn/#files', | ||
license='MIT', | ||
|