From ebb3188053bde18b0a04d4355b321ee4fdb8f8df Mon Sep 17 00:00:00 2001 From: Daniel King Date: Tue, 7 Jul 2020 10:12:20 -0700 Subject: [PATCH] Fix bad merge --- scripts/train_parser_and_tagger.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/scripts/train_parser_and_tagger.py b/scripts/train_parser_and_tagger.py index 11e8ea6d..d8faf8c4 100644 --- a/scripts/train_parser_and_tagger.py +++ b/scripts/train_parser_and_tagger.py @@ -152,6 +152,7 @@ def train_parser_and_tagger(train_json_path: str, cpu_wps = nwords/(end_time-start_time) if ontonotes_path: + # Ignoring misaligned docs because the ontonotes raw text does not always match the tokenized text onto_dev_docs = list([doc for doc in onto_train_corpus.dev_docs(nlp_loaded, ignore_misaligned=True) if len(doc[0]) > 0]) onto_scorer = nlp_loaded.evaluate(onto_dev_docs) @@ -200,18 +201,8 @@ def train_parser_and_tagger(train_json_path: str, meta_fp.write(json.dumps(meta)) if ontonotes_path: -<<<<<<< HEAD # Ignoring misaligned docs because the ontonotes raw text does not always match the tokenized text - onto_test_docs = list( - [ - doc - for doc in onto_test_corpus.dev_docs(nlp_loaded, ignore_misaligned=True) - if len(doc[0]) > 0 - ] - ) -======= - onto_test_docs = list([doc for doc in onto_test_corpus.dev_docs(nlp_loaded) if len(doc[0]) > 0]) ->>>>>>> parent of ae9a36e... Black format train_parser_and_tagger.py + onto_test_docs = list([doc for doc in onto_test_corpus.dev_docs(nlp_loaded, ignore_misaligned=True) if len(doc[0]) > 0]) print("Retrained ontonotes evaluation") scorer_onto_retrained = nlp_loaded.evaluate(onto_test_docs) print("Test results:")