diff --git a/Dockerfile b/Dockerfile
index a3f69d14..662bd814 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -26,7 +26,7 @@ COPY tests/ tests/
 COPY .flake8 .flake8
 
 RUN pip install -r requirements.in
-RUN pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_core_sci_sm-0.2.4.tar.gz
+RUN pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_core_sci_sm-0.2.5.tar.gz
 RUN python -m spacy download en_core_web_sm
 RUN python -m spacy download en_core_web_md
 
diff --git a/README.md b/README.md
index acae4fa8..63cc2d77 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,7 @@ pip install scispacy
 to install a model (see our full selection of available models below), run a command like the following:
 
 ```bash
-pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_core_sci_sm-0.2.4.tar.gz
+pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_core_sci_sm-0.2.5.tar.gz
 ```
 
 Note: We strongly recommend that you use an isolated Python environment (such as virtualenv or conda) to install scispacy.
@@ -77,13 +77,13 @@ pip install CMD-V(to paste the copied URL)
 
 | Model          | Description       | Install URL
 |:---------------|:------------------|:----------|
-| en_core_sci_sm | A full spaCy pipeline for biomedical data with a ~100k vocabulary. |[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_core_sci_sm-0.2.4.tar.gz)|
-| en_core_sci_md |  A full spaCy pipeline for biomedical data with a ~360k vocabulary and 50k word vectors. |[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_core_sci_md-0.2.4.tar.gz)|
-| en_core_sci_lg |  A full spaCy pipeline for biomedical data with a ~785k vocabulary and 600k word vectors. |[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_core_sci_lg-0.2.4.tar.gz)|
-| en_ner_craft_md|  A spaCy NER model trained on the CRAFT corpus.|[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_ner_craft_md-0.2.4.tar.gz)|
-| en_ner_jnlpba_md | A spaCy NER model trained on the JNLPBA corpus.| [Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_ner_jnlpba_md-0.2.4.tar.gz)|
-| en_ner_bc5cdr_md |  A spaCy NER model trained on the BC5CDR corpus. | [Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_ner_bc5cdr_md-0.2.4.tar.gz)|
-| en_ner_bionlp13cg_md |  A spaCy NER model trained on the BIONLP13CG corpus. |[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_ner_bionlp13cg_md-0.2.4.tar.gz)|
+| en_core_sci_sm | A full spaCy pipeline for biomedical data with a ~100k vocabulary. |[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_core_sci_sm-0.2.5.tar.gz)|
+| en_core_sci_md |  A full spaCy pipeline for biomedical data with a ~360k vocabulary and 50k word vectors. |[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_core_sci_md-0.2.5.tar.gz)|
+| en_core_sci_lg |  A full spaCy pipeline for biomedical data with a ~785k vocabulary and 600k word vectors. |[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_core_sci_lg-0.2.5.tar.gz)|
+| en_ner_craft_md|  A spaCy NER model trained on the CRAFT corpus.|[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_ner_craft_md-0.2.5.tar.gz)|
+| en_ner_jnlpba_md | A spaCy NER model trained on the JNLPBA corpus.| [Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_ner_jnlpba_md-0.2.5.tar.gz)|
+| en_ner_bc5cdr_md |  A spaCy NER model trained on the BC5CDR corpus. | [Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_ner_bc5cdr_md-0.2.5.tar.gz)|
+| en_ner_bionlp13cg_md |  A spaCy NER model trained on the BIONLP13CG corpus. |[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_ner_bionlp13cg_md-0.2.5.tar.gz)|
 
 
 ## Additional Pipeline Components
diff --git a/docs/index.md b/docs/index.md
index e56067df..9f97758e 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -17,13 +17,13 @@ pip install <Model URL>
 
 | Model          | Description       | Install URL
 |:---------------|:------------------|:----------|
-| en_core_sci_sm | A full spaCy pipeline for biomedical data. |[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_core_sci_sm-0.2.4.tar.gz)|
-| en_core_sci_md |  A full spaCy pipeline for biomedical data with a larger vocabulary and 50k word vectors. |[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_core_sci_md-0.2.4.tar.gz)|
-| en_core_sci_lg |  A full spaCy pipeline for biomedical data with a larger vocabulary and 600k word vectors. |[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_core_sci_lg-0.2.4.tar.gz)|
-| en_ner_craft_md|  A spaCy NER model trained on the CRAFT corpus.|[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_ner_craft_md-0.2.4.tar.gz)|
-| en_ner_jnlpba_md | A spaCy NER model trained on the JNLPBA corpus.| [Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_ner_jnlpba_md-0.2.4.tar.gz)|
-| en_ner_bc5cdr_md |  A spaCy NER model trained on the BC5CDR corpus. | [Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_ner_bc5cdr_md-0.2.4.tar.gz)|
-| en_ner_bionlp13cg_md |  A spaCy NER model trained on the BIONLP13CG corpus. | [Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.4/en_ner_bionlp13cg_md-0.2.4.tar.gz)|
+| en_core_sci_sm | A full spaCy pipeline for biomedical data. |[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_core_sci_sm-0.2.5.tar.gz)|
+| en_core_sci_md |  A full spaCy pipeline for biomedical data with a larger vocabulary and 50k word vectors. |[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_core_sci_md-0.2.5.tar.gz)|
+| en_core_sci_lg |  A full spaCy pipeline for biomedical data with a larger vocabulary and 600k word vectors. |[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_core_sci_lg-0.2.5.tar.gz)|
+| en_ner_craft_md|  A spaCy NER model trained on the CRAFT corpus.|[Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_ner_craft_md-0.2.5.tar.gz)|
+| en_ner_jnlpba_md | A spaCy NER model trained on the JNLPBA corpus.| [Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_ner_jnlpba_md-0.2.5.tar.gz)|
+| en_ner_bc5cdr_md |  A spaCy NER model trained on the BC5CDR corpus. | [Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_ner_bc5cdr_md-0.2.5.tar.gz)|
+| en_ner_bionlp13cg_md |  A spaCy NER model trained on the BIONLP13CG corpus. | [Download](https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_ner_bionlp13cg_md-0.2.5.tar.gz)|
 
 
 
@@ -34,17 +34,17 @@ Our models achieve performance within 3% of published state of the art dependenc
 
 | model          | UAS | LAS   | POS   | Mentions (F1) | Web UAS | 
 |:---------------|:----|:------|:------|:---|:---|
-| en_core_sci_sm | 89.36| 87.41  |  98.30  |  67.12  |  85.46  |
-| en_core_sci_md | 90.08| 88.26  |  98.51  |  69.17  |  86.88  |
-| en_core_sci_lg | 90.11| 88.31  |  98.52  |  69.08  |  85.16  |
+| en_core_sci_sm | 89.26| 87.38  |  98.38  |  67.14  |  87.18  |
+| en_core_sci_md | 89.92| 88.01  |  98.54  |  69.46  |  88.20  |
+| en_core_sci_lg | 89.81| 88.02  |  98.57  |  69.29  |  88.11  |
 
 
 | model          | F1 |   Entity Types|
 |:---------------|:-----|:--------|
-| en_ner_craft_md | 76.60|GGP, SO, TAXON, CHEBI, GO, CL|
-| en_ner_jnlpba_md | 74.26| DNA, CELL_TYPE, CELL_LINE, RNA, PROTEIN |
-| en_ner_bc5cdr_md | 85.02| DISEASE, CHEMICAL|
-| en_ner_bionlp13cg_md | 78.28|CANCER, ORGAN, TISSUE, ORGANISM, CELL, AMINO_ACID, GENE_OR_GENE_PRODUCT, SIMPLE_CHEMICAL, ANATOMICAL_SYSTEM, IMMATERIAL_ANATOMICAL_ENTITY, MULTI-TISSUE_STRUCTURE, DEVELOPING_ANATOMICAL_STRUCTURE, ORGANISM_SUBDIVISION, CELLULAR_COMPONENT|
+| en_ner_craft_md | 75.02|GGP, SO, TAXON, CHEBI, GO, CL|
+| en_ner_jnlpba_md | 73.56| DNA, CELL_TYPE, CELL_LINE, RNA, PROTEIN |
+| en_ner_bc5cdr_md | 84.94| DISEASE, CHEMICAL|
+| en_ner_bionlp13cg_md | 78.09|CANCER, ORGAN, TISSUE, ORGANISM, CELL, AMINO_ACID, GENE_OR_GENE_PRODUCT, SIMPLE_CHEMICAL, ANATOMICAL_SYSTEM, IMMATERIAL_ANATOMICAL_ENTITY, MULTI-TISSUE_STRUCTURE, DEVELOPING_ANATOMICAL_STRUCTURE, ORGANISM_SUBDIVISION, CELLULAR_COMPONENT|
 
 
 ### Example Usage
diff --git a/requirements.in b/requirements.in
index 79559a37..fede57e6 100644
--- a/requirements.in
+++ b/requirements.in
@@ -1,5 +1,5 @@
 numpy
-spacy>=2.2.1
+spacy>=2.3.0,<3.0.0
 spacy-lookups-data
 pandas
 requests>=2.0.0,<3.0.0
diff --git a/scispacy/file_cache.py b/scispacy/file_cache.py
index 5a519da0..744c6d23 100644
--- a/scispacy/file_cache.py
+++ b/scispacy/file_cache.py
@@ -126,7 +126,7 @@ def get_from_cache(url: str, cache_dir: str = None) -> str:
     if not os.path.exists(cache_path):
         # Download to temporary file, then copy to cache dir once finished.
         # Otherwise you get corrupt cache entries if the download gets interrupted.
-        with tempfile.NamedTemporaryFile() as temp_file:
+        with tempfile.NamedTemporaryFile() as temp_file:  # type: IO
             print(f"{url} not found in cache, downloading to {temp_file.name}")
 
             # GET file object
diff --git a/scispacy/version.py b/scispacy/version.py
index 53df94b1..07e77d87 100644
--- a/scispacy/version.py
+++ b/scispacy/version.py
@@ -1,6 +1,6 @@
 _MAJOR = "0"
 _MINOR = "2"
-_REVISION = "4-unreleased"
+_REVISION = "5-unreleased"
 
 VERSION_SHORT = "{0}.{1}".format(_MAJOR, _MINOR)
 VERSION = "{0}.{1}.{2}".format(_MAJOR, _MINOR, _REVISION)
diff --git a/scripts/init_model.py b/scripts/init_model.py
index 9bea83be..1bd71a23 100644
--- a/scripts/init_model.py
+++ b/scripts/init_model.py
@@ -105,7 +105,6 @@ def create_model(lang, probs, oov_prob, vectors_data, vector_keys, expand_vector
         lexeme = nlp.vocab[word]
         lexeme.rank = i
         lexeme.prob = prob
-        lexeme.is_oov = False
         # Decode as a little-endian string, so that we can do & 15 to get
         # the first 4 bits. See _parse_features.pyx
         lexeme.cluster = 0
@@ -117,7 +116,6 @@ def create_model(lang, probs, oov_prob, vectors_data, vector_keys, expand_vector
         for i, word in enumerate(vector_keys):
             if word not in nlp.vocab and expand_vectors:
                 lexeme = nlp.vocab[word]
-                lexeme.is_oov = False
                 lex_added += 1
             elif word in nlp.vocab and not expand_vectors:
                 new_keys.append(word)
diff --git a/scripts/parser.sh b/scripts/parser.sh
index 26dd350e..90cec465 100644
--- a/scripts/parser.sh
+++ b/scripts/parser.sh
@@ -1,4 +1,4 @@
-#!/user/bin/env bash
+#!/usr/bin/env bash
 
 set -e
 
diff --git a/scripts/train_parser_and_tagger.py b/scripts/train_parser_and_tagger.py
index 3591212c..d8faf8c4 100644
--- a/scripts/train_parser_and_tagger.py
+++ b/scripts/train_parser_and_tagger.py
@@ -102,13 +102,13 @@ def train_parser_and_tagger(train_json_path: str,
     train_docs = train_corpus.train_docs(nlp)
     train_docs = list(train_docs)
 
-    train_mixture = train_docs
     if ontonotes_path:
-        onto_train_docs = onto_train_corpus.train_docs(nlp)
+        # Ignoring misaligned because the ontonotes raw text does not always match the tokenized text
+        onto_train_docs = onto_train_corpus.train_docs(nlp, ignore_misaligned=True)
         onto_train_docs = list([doc for doc in onto_train_docs if len(doc[0]) > 0])
         num_onto_docs = int(float(ontonotes_train_percent)*len(onto_train_docs))
         randomly_sampled_onto = random.sample(onto_train_docs, num_onto_docs)
-        train_mixture += randomly_sampled_onto
+        train_docs += randomly_sampled_onto
 
     row_head, output_stats = _configure_training_output(nlp.pipe_names, -1, False)
     row_widths = [len(w) for w in row_head]
@@ -121,7 +121,7 @@ def train_parser_and_tagger(train_json_path: str,
     best_epoch = 0
     best_epoch_uas = 0.0
     for i in range(20):
-        random.shuffle(train_mixture)
+        random.shuffle(train_docs)
         with nlp.disable_pipes(*other_pipes):
             with tqdm(total=n_train_words, leave=False) as pbar:
                 losses = {}
@@ -152,7 +152,8 @@ def train_parser_and_tagger(train_json_path: str,
             cpu_wps = nwords/(end_time-start_time)
 
             if ontonotes_path:
-                onto_dev_docs = list([doc for doc in onto_train_corpus.dev_docs(nlp_loaded) if len(doc[0]) > 0])
+                # Ignoring misaligned docs because the ontonotes raw text does not always match the tokenized text
+                onto_dev_docs = list([doc for doc in onto_train_corpus.dev_docs(nlp_loaded, ignore_misaligned=True) if len(doc[0]) > 0])
                 onto_scorer = nlp_loaded.evaluate(onto_dev_docs)
 
 
@@ -200,7 +201,8 @@ def train_parser_and_tagger(train_json_path: str,
         meta_fp.write(json.dumps(meta))
 
     if ontonotes_path:
-        onto_test_docs = list([doc for doc in onto_test_corpus.dev_docs(nlp_loaded) if len(doc[0]) > 0])
+        # Ignoring misaligned docs because the ontonotes raw text does not always match the tokenized text
+        onto_test_docs = list([doc for doc in onto_test_corpus.dev_docs(nlp_loaded, ignore_misaligned=True) if len(doc[0]) > 0])
         print("Retrained ontonotes evaluation")
         scorer_onto_retrained = nlp_loaded.evaluate(onto_test_docs)
         print("Test results:")
diff --git a/scripts/train_specialised_ner.py b/scripts/train_specialised_ner.py
index 9f5c93f4..a37d2d22 100644
--- a/scripts/train_specialised_ner.py
+++ b/scripts/train_specialised_ner.py
@@ -90,7 +90,8 @@ def train(model, train_data, dev_data, test_data, output_dir, n_iter, meta_overr
                                    util.env_opt('batch_to', 32),
                                    util.env_opt('batch_compound', 1.001))
 
-    optimizer = nlp.begin_training()
+    with nlp.disable_pipes(*other_pipes):
+        optimizer = nlp.begin_training()
     best_epoch = 0
     best_f1 = 0
     for i in range(n_iter):
diff --git a/setup.py b/setup.py
index 979df6f9..adc99867 100644
--- a/setup.py
+++ b/setup.py
@@ -21,41 +21,34 @@
     exec(version_file.read(), VERSION)
 
 setup(
-    name = 'scispacy',
-    version = VERSION["VERSION"],
-    url = 'https://allenai.github.io/SciSpaCy/',
-    author = 'Allen Institute for Artificial Intelligence',
-    author_email = 'ai2-info@allenai.org',
-    description = 'A full SpaCy pipeline and models for scientific/biomedical documents.',
+    name="scispacy",
+    version=VERSION["VERSION"],
+    url="https://allenai.github.io/SciSpaCy/",
+    author="Allen Institute for Artificial Intelligence",
+    author_email="ai2-info@allenai.org",
+    description="A full SpaCy pipeline and models for scientific/biomedical documents.",
     long_description=open("README.md").read(),
     long_description_content_type="text/markdown",
-    keywords = ["bioinformatics nlp spacy SpaCy biomedical"],
+    keywords=["bioinformatics nlp spacy SpaCy biomedical"],
     classifiers=[
-        'Intended Audience :: Science/Research',
-        'Development Status :: 3 - Alpha',
-        'License :: OSI Approved :: Apache Software License',
-        'Programming Language :: Python :: 3.6',
-        'Topic :: Scientific/Engineering :: Artificial Intelligence',
-        'Topic :: Scientific/Engineering :: Bio-Informatics',
+        "Intended Audience :: Science/Research",
+        "Development Status :: 3 - Alpha",
+        "License :: OSI Approved :: Apache Software License",
+        "Programming Language :: Python :: 3.6",
+        "Topic :: Scientific/Engineering :: Artificial Intelligence",
+        "Topic :: Scientific/Engineering :: Bio-Informatics",
     ],
-    packages = find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]),
+    packages=find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]),
     license="Apache",
     install_requires=[
-        "spacy>=2.2.1",
-        "requests>=2.0.0,<3.0.0"
-        "conllu",
+        "spacy>=2.3.0,<3.0.0",
+        "requests>=2.0.0,<3.0.0" "conllu",
         "numpy",
         "joblib",
         "nmslib>=1.7.3.6",
         "scikit-learn>=0.20.3",
-        "pysbd"
-        ],
-    tests_require=[
-        "pytest",
-        "pytest-cov",
-        "flake8",
-        "black",
-        "mypy"
-        ],
-    python_requires='>=3.6.0',
+        "pysbd",
+    ],
+    tests_require=["pytest", "pytest-cov", "flake8", "black", "mypy"],
+    python_requires=">=3.6.0",
 )