diff --git a/2-svd-nmf-topic-modeling.ipynb b/2-svd-nmf-topic-modeling.ipynb index f9d50bf..eec8da3 100644 --- a/2-svd-nmf-topic-modeling.ipynb +++ b/2-svd-nmf-topic-modeling.ipynb @@ -549,7 +549,7 @@ "\n", "You will then need to download the English model:\n", "```\n", - "spacy -m download en_core_web_sm\n", + "spacy download en_core_web_sm\n", "```" ] }, @@ -564,26 +564,48 @@ }, { "cell_type": "code", - "execution_count": 80, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ + "nlp = spacy.load(\"en_core_web_sm\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note in the video we used\n", + "```\n", "from spacy.lemmatizer import Lemmatizer\n", - "lemmatizer = Lemmatizer()" + "lemmatizer = Lemmatizer()\n", + "```\n", + "which creates an *empty* lemmatizer. \n", + "\n", + "This has been replaced with an English specific lemmatizer." ] }, { "cell_type": "code", - "execution_count": 81, + "execution_count": 56, + "metadata": {}, + "outputs": [], + "source": [ + "lemmatizer = nlp.Defaults.create_lemmatizer()" + ] + }, + { + "cell_type": "code", + "execution_count": 57, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['feet', 'foot', 'foots', 'footing']" + "['foot', 'foot', 'foot', 'foot']" ] }, - "execution_count": 81, + "execution_count": 57, "metadata": {}, "output_type": "execute_result" } @@ -606,15 +628,6 @@ "Stop words vary from library to library" ] }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "nlp = spacy.load(\"en_core_web_sm\")" - ] - }, { "cell_type": "code", "execution_count": 13,