Skip to content

Commit

Permalink
Merge pull request #12 from akb89/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
akb89 authored May 3, 2019
2 parents 1c1ae4b + b80841a commit e1bc052
Show file tree
Hide file tree
Showing 4 changed files with 317 additions and 2 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ matrix:

install:
- python setup.py -q install
- pip install pylint pydocstyle pytest pytest-cov coveralls
- pip install pylint pydocstyle pytest pytest-cov==2.5.0 coveralls

script:
- python -m pytest --cov=pyfn tests/
Expand Down
145 changes: 145 additions & 0 deletions scripts/frameid.embed.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
#!/bin/bash

source "$(dirname "${BASH_SOURCE[0]}")/setup.sh"

show_help() {
cat << EOF
Usage: ${0##*/} [-h] -m {train,decode} -x XP_NUM [-p {semafor,open-sesame}]
Perform frame identification.
-h, --help display this help and exit
-m, --mode train on all models or decode using a single model
-x, --xp XP_NUM xp number written as 3 digits (e.g. 001)
-p, --parser {semafor,open-sesame} formalize decoded frames for specified parser
-e, --embed name of embeddings to use
EOF
}

is_xp_set=FALSE
is_mode_set=FALSE
is_parser_set=FALSE
is_embed_set=FALSE

while :; do
case $1 in
-h|-\?|--help)
show_help
exit
;;
-x|--xp)
if [ "$2" ]; then
is_xp_set=TRUE
xp="xp_$2"
shift
else
die "ERROR: '--xp' requires a non-empty option argument"
fi
;;
-m|--mode)
if [ "$2" ]; then
is_mode_set=TRUE
mode=$2
shift
else
die "ERROR: '--mode' requires a non-empty option argument"
fi
;;
-p|--parser)
if [ "$2" ]; then
is_parser_set=TRUE
parser=$2
shift
else
die "ERROR: '--parser' requires a non-empty option argument"
fi
;;
-e|--embed)
if [ "$2" ]; then
is_embed_set=TRUE
embed=$2
shift
else
die "ERROR: '--embed' requires a non-empty option argument"
fi
;;
--)
shift
break
;;
-?*)
printf 'WARN: Unknown option (ignored): %s\n' "$1" >&2
;;
*)
break
esac
shift
done

if [ "${is_xp_set}" = FALSE ]; then
die "ERROR: '--xp' parameter is required."
fi

if [ "${is_mode_set}" = FALSE ]; then
die "ERROR: '--mode' parameter is required."
fi

prepare() {
echo "Preparing files for frame identification..."

mkdir ${XP_DIR}/${xp}/frameid 2> /dev/null
mkdir ${XP_DIR}/${xp}/frameid/data 2> /dev/null
mkdir ${XP_DIR}/${xp}/frameid/data/embeddings 2> /dev/null
mkdir ${XP_DIR}/${xp}/frameid/data/corpora 2> /dev/null
mkdir ${XP_DIR}/${xp}/frameid/data/lexicons 2> /dev/null

cp ${XP_DIR}/${xp}/data/test.frames ${XP_DIR}/${xp}/frameid/data/corpora/
cp ${XP_DIR}/${xp}/data/test.sentences.conllx ${XP_DIR}/${xp}/frameid/data/corpora/
cp ${XP_DIR}/${xp}/data/train.frame.elements ${XP_DIR}/${xp}/frameid/data/corpora/
cp ${XP_DIR}/${xp}/data/train.sentences.conllx.flattened ${XP_DIR}/${xp}/frameid/data/corpora/

cp ${RESOURCES_DIR}/${embed} ${XP_DIR}/${xp}/frameid/data/embeddings/

mv ${XP_DIR}/${xp}/frameid/data/corpora/test.frames ${XP_DIR}/${xp}/frameid/data/corpora/test.frame.elements

bash ${SCRIPTS_DIR}/flatten.sh -f ${XP_DIR}/${xp}/frameid/data/corpora/test.sentences.conllx

python3 ${SIMFRAMEID_HOME}/generate.py ${XP_DIR}/${xp}/frameid/data/corpora/train.frame.elements ${XP_DIR}/${xp}/frameid/data/lexicons/fn_lexicon

echo "Done"
}

if [ "${mode}" = train ]; then
prepare
echo "Training frame identification on all models..."
python ${SIMFRAMEID_HOME}/simpleFrameId/main.py train ${XP_DIR}/${xp}/frameid ${embed}
echo "Done"
fi

if [ "${mode}" = decode ]; then
if [ "${is_parser_set}" = FALSE ]; then
die "ERROR: '--parser' parameter is required."
fi
case "${parser}" in
semafor )
;; #fallthru
open-sesame )
;; #fallthru
* )
die "Invalid frame semantic parser '${parser}': Should be 'semafor' or 'open-sesame'"
esac
prepare
echo "Predicting frames..."
python ${SIMFRAMEID_HOME}/simpleFrameId/main.py decode ${XP_DIR}/${xp}/frameid ${embed}
echo "Done"
if [ "${parser}" = semafor ]; then
cut -f 1-3 ${XP_DIR}/${xp}/data/test.frames > ${XP_DIR}/${xp}/data/test.frames.cut.1.txt
cut -f 5-8 ${XP_DIR}/${xp}/data/test.frames > ${XP_DIR}/${xp}/data/test.frames.cut.2.txt
paste ${XP_DIR}/${xp}/data/test.frames.cut.1.txt ${XP_DIR}/${xp}/frameid/test.frames.predicted ${XP_DIR}/${xp}/data/test.frames.cut.2.txt | perl -pe "s/^\t+$//g" | cat -s > ${XP_DIR}/${xp}/data/test.frames
rm ${XP_DIR}/${xp}/data/test.frames.cut.1.txt
rm ${XP_DIR}/${xp}/data/test.frames.cut.2.txt
fi
if [ "${parser}" = open-sesame ]; then
python3 CoNLLizer.py merger -c ${XP_DIR}/${xp}/data/test.bios.semeval -P ${XP_DIR}/${xp}/frameid/test.frames.predicted -n 14 -N 1 > ${XP_DIR}/${xp}/data/test.bios.semeval.merged
mv ${XP_DIR}/${xp}/data/test.bios.semeval.merged ${XP_DIR}/${xp}/data/test.bios.semeval
fi
fi
170 changes: 170 additions & 0 deletions scripts/open-sesame.embed.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
#!/bin/bash

source "$(dirname "${BASH_SOURCE[0]}")/setup.sh"

show_help() {
cat << EOF
Usage: ${0##*/} [-h] -m {train,decode} -x XP_NUM [-s {dev,test}] [-d] [-u]
Train or decode with the OPEN-SESAME parser.
-h, --help display this help and exit
-m, --mode {train,decode} open-sesame mode to use: train or decode
-x, --xp XP_NUM xp number written as 3 digits (e.g. 001)
-s, --splits {dev,test} which splits to use in decode mode: dev or test
-d, --with_dep_parses if specified, parser will use dependency parses
-u, --with_hierarchy if specified, parser will use the hierarchy feature
-e, --embed name of embeddings to use
EOF
}

is_mode_set=FALSE
is_xp_set=FALSE
is_splits_set=FALSE
with_dep_parses=FALSE
is_embed_set=FALSE

while :; do
case $1 in
-h|-\?|--help)
show_help
exit
;;
-m|--mode)
if [ "$2" ]; then
is_mode_set=TRUE
mode=$2
shift
else
die "ERROR: '--mode' requires a non-empty option argument"
fi
;;
-x|--xp)
if [ "$2" ]; then
is_xp_set=TRUE
xp="xp_$2"
shift
else
die "ERROR: '--xpdir' requires a non-empty option argument"
fi
;;
-s|--splits)
if [ "$2" ]; then
is_splits_set=TRUE
splits=$2
shift
else
die "ERROR: '--splits' requires a non-empty option argument"
fi
;;
-d|--with_dep_parses)
with_dep_parses=TRUE
;;
-e|--embed)
if [ "$2" ]; then
is_embed_set=TRUE
embed=$2
shift
else
die "ERROR: '--embed' requires a non-empty option argument"
fi
;;
--)
shift
break
;;
-?*)
printf 'WARN: Unknown option (ignored): %s\n' "$1" >&2
;;
*)
break
esac
shift
done

if [ "${is_mode_set}" = FALSE ]; then
die "ERROR: '--mode' parameter is required"
fi

if [ "${is_xp_set}" = FALSE ]; then
die "ERROR: '--xp' parameter is required"
fi

case "${mode}" in
train )
;;
decode )
;;
* )
die "Invalid mode '${mode}': should be 'train' or 'decode'"
esac

if [ "${mode}" = decode ]; then
if [ "${is_splits_set}" = FALSE ]; then
die "ERROR: '--splits' parameter is required for decoding"
fi
case "${splits}" in
dev )
;;
test )
;;
* )
die "Invalid splits '${splits}': should be 'dev' or 'test'"
esac
fi

mkdir ${XP_DIR}/${xp}/model 2> /dev/null

postprocess_decoded_file() {
BIOS_FILE=$1
DECODED_FILE=$2
OUTPUT_TMP_DIR="/tmp/biospost"

rm -rf $OUTPUT_TMP_DIR 2> /dev/null
mkdir $OUTPUT_TMP_DIR 2> /dev/null

cut -f 1-14 ${BIOS_FILE} > ${OUTPUT_TMP_DIR}/cut.1.txt
cut -f 15 ${DECODED_FILE} > ${OUTPUT_TMP_DIR}/cut.2.txt

paste ${OUTPUT_TMP_DIR}/cut.1.txt ${OUTPUT_TMP_DIR}/cut.2.txt | perl -pe "s/^\t+$//g" | cat -s > ${DECODED_FILE}

rm -rf $OUTPUT_TMP_DIR;
}

if [ "${mode}" = train ]; then
if [ "${with_dep_parses}" = TRUE ]; then
python ${OPEN_SESAME_HOME}/src/segrnn-argid.py \
--model ${XP_DIR}/${xp}/model/segrnn.argid.model \
--trainf ${XP_DIR}/${xp}/data/train.bios \
--devf ${XP_DIR}/${xp}/data/dev.bios \
--vecf ${RESOURCES_DIR}/${embed} \
--syn dep
fi
if [ "${with_dep_parses}" = FALSE ]; then
python ${OPEN_SESAME_HOME}/src/segrnn-argid.py \
--model ${XP_DIR}/${xp}/model/segrnn.argid.model \
--trainf ${XP_DIR}/${xp}/data/train.bios \
--devf ${XP_DIR}/${xp}/data/dev.bios \
--vecf ${RESOURCES_DIR}/${embed}
fi
fi

if [ "${mode}" = decode ]; then
if [ "${with_dep_parses}" = TRUE ]; then
python ${OPEN_SESAME_HOME}/src/segrnn-argid.py \
--mode test \
--model ${XP_DIR}/${xp}/model/segrnn.argid.model \
--trainf ${XP_DIR}/${xp}/data/train.bios \
--testf ${XP_DIR}/${xp}/data/${splits}.bios.semeval \
--vecf ${RESOURCES_DIR}/${embed} \
--syn dep
fi
if [ "${with_dep_parses}" = FALSE ]; then
python ${OPEN_SESAME_HOME}/src/segrnn-argid.py \
--mode test \
--model ${XP_DIR}/${xp}/model/segrnn.argid.model \
--trainf ${XP_DIR}/${xp}/data/train.bios \
--testf ${XP_DIR}/${xp}/data/${splits}.bios.semeval \
--vecf ${RESOURCES_DIR}/${embed}
fi
postprocess_decoded_file ${XP_DIR}/${xp}/data/${splits}.bios.semeval ${XP_DIR}/${xp}/data/${splits}.bios.semeval.decoded
fi
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
author_email='[email protected]',
long_description=long_description,
long_description_content_type='text/markdown',
version='1.2.6',
version='1.3.0',
url='https://gitlab.com/akb89/pyfn',
download_url='https://pypi.org/project/pyfn/#files',
license='MIT',
Expand Down

0 comments on commit e1bc052

Please sign in to comment.