Skip to content

Commit

Permalink
Merge pull request #2260 from vaharoni:tutorial_text_fix_2
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 563950264
  • Loading branch information
copybara-github committed Sep 9, 2023
2 parents a0fe048 + f2ea5cf commit 6c7e49a
Showing 1 changed file with 5 additions and 2 deletions.
7 changes: 5 additions & 2 deletions site/en/tutorials/load_data/text.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1140,8 +1140,9 @@
},
"outputs": [],
"source": [
"keys = vocab\n",
"values = range(2, len(vocab) + 2) # Reserve `0` for padding, `1` for OOV tokens.\n",
"# Reserve `0` for padding, `1` for OOV tokens.\n",
"keys = ['', '[UNK]'] + vocab\n",
"values = range(len(keys))\n",
"\n",
"init = tf.lookup.KeyValueTensorInitializer(\n",
" keys, values, key_dtype=tf.string, value_dtype=tf.int64)\n",
Expand Down Expand Up @@ -1171,6 +1172,8 @@
" standardized = tf_text.case_fold_utf8(text)\n",
" tokenized = tokenizer.tokenize(standardized)\n",
" vectorized = vocab_table.lookup(tokenized)\n",
" # StaticVocabularyTable returns the OOV token as vocab_size + 2. We overwrite it to be 1.\n",
" vectorized = tf.where(vectorized == len(keys), tf.constant(1, dtype=tf.int64), vectorized)\n",
" return vectorized, label"
]
},
Expand Down

0 comments on commit 6c7e49a

Please sign in to comment.