Skip to content

Commit

Permalink
mv download logic in the index nb
Browse files Browse the repository at this point in the history
  • Loading branch information
deven367 committed May 31, 2024
1 parent bc64a16 commit 8863afb
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 60 deletions.
66 changes: 6 additions & 60 deletions nbs/00_utils.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -377,60 +377,6 @@
" nltk.download(\"omw-1.4\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a59f9b5a",
"metadata": {},
"outputs": [],
"source": [
"#| hide\n",
"from nltk.corpus import stopwords\n",
"import nltk"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "29c07c57",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading dependencies\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[nltk_data] Downloading package punkt to /home/deven/nltk_data...\n",
"[nltk_data] Unzipping tokenizers/punkt.zip.\n",
"[nltk_data] Downloading package stopwords to /home/deven/nltk_data...\n",
"[nltk_data] Unzipping corpora/stopwords.zip.\n",
"[nltk_data] Downloading package averaged_perceptron_tagger to\n",
"[nltk_data] /home/deven/nltk_data...\n",
"[nltk_data] Unzipping taggers/averaged_perceptron_tagger.zip.\n",
"[nltk_data] Downloading package wordnet to /home/deven/nltk_data...\n",
"[nltk_data] Downloading package omw-1.4 to /home/deven/nltk_data...\n"
]
}
],
"source": [
"#| hide\n",
"try:\n",
" nltk.data.find('tokenizers/punkt')\n",
" nltk.data.find('corpora/stopwords')\n",
" nltk.data.find('corpora/wordnet')\n",
" nltk.data.find('corpora/omw-1.4')\n",
" nltk.data.find('taggers/averaged_perceptron_tagger')\n",
"except:\n",
" print('Downloading dependencies')\n",
" download_nltk_dep()"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -1158,7 +1104,7 @@
" name = str(self)\n",
" if name.endswith('.npy'):\n",
" return np.load(self).shape\n",
" raise AssertionError('not a npy array') "
" raise AssertionError('not a npy array')"
]
},
{
Expand Down Expand Up @@ -1223,7 +1169,7 @@
"outputs": [],
"source": [
"#| local\n",
"with working_directory('/home/deven'): \n",
"with working_directory('/home/deven'):\n",
" shp = Path('test.npy').shape\n",
" test_eq(arr.shape, Path('test.npy').shape)"
]
Expand Down Expand Up @@ -1260,7 +1206,7 @@
"def text(self: Path):\n",
" if str(self).endswith('.txt'):\n",
" with open(self) as f: return f.read()\n",
" raise AssertionError('not a txt file') "
" raise AssertionError('not a txt file')"
]
},
{
Expand Down Expand Up @@ -1305,11 +1251,11 @@
"def sentences(self: Path):\n",
" name = str(self)\n",
" if name.endswith('.txt'):\n",
" if '_cleaned' in name: \n",
" if '_cleaned' in name:\n",
" return split_by_newline(self.text)\n",
" else: \n",
" else:\n",
" return make_sentences(self.text)\n",
" raise AssertionError('not a txt file') "
" raise AssertionError('not a txt file')"
]
},
{
Expand Down
21 changes: 21 additions & 0 deletions nbs/index.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,27 @@
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| hide\n",
"from nltk.corpus import stopwords\n",
"import nltk\n",
"\n",
"try:\n",
" nltk.data.find('tokenizers/punkt')\n",
" nltk.data.find('corpora/stopwords')\n",
" nltk.data.find('corpora/wordnet')\n",
" nltk.data.find('corpora/omw-1.4')\n",
" nltk.data.find('taggers/averaged_perceptron_tagger')\n",
"except:\n",
" print('Downloading dependencies')\n",
" download_nltk_dep()"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down

0 comments on commit 8863afb

Please sign in to comment.