Skip to content

Commit

Permalink
working on that database-enabled autocomplete
Browse files Browse the repository at this point in the history
  • Loading branch information
jpivarski committed Apr 4, 2024
1 parent 079c5c8 commit d62e0ec
Showing 1 changed file with 153 additions and 20 deletions.
173 changes: 153 additions & 20 deletions site/files/10-shakespeare.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1395,7 +1395,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"id": "89f07236-7114-493c-8efa-5c5ad5df104a",
"metadata": {},
"outputs": [],
Expand All @@ -1421,14 +1421,28 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"id": "aab02184-31e0-4e16-9abc-ba9f70a8e52e",
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/html": [
"<div style=\"margin: 50px; font-size: 20px;\"><a href=\"http://192.168.1.224:12345/19xx?w1=I&w2=am&w3=not&w4=the&top=20\">http://192.168.1.224:12345/19xx?w1=I&w2=am&w3=not&w4=the&top=20</a></div>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import IPython\n",
"\n",
"sample_query = f\"{url}/17xx?w1=I&w2=am&w3=not&w4=the&top=20\"\n",
"sample_query = f\"{url}/19xx?w1=I&w2=am&w3=not&w4=the&top=20\"\n",
"\n",
"IPython.display.HTML(f'<div style=\"margin: 50px; font-size: 20px;\"><a href=\"{sample_query}\">{sample_query}</a></div>')"
]
Expand Down Expand Up @@ -1469,7 +1483,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"id": "e4124960-e366-4ea7-ba30-5fcf7200408a",
"metadata": {},
"outputs": [],
Expand All @@ -1479,62 +1493,181 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"id": "77f36b96-8f05-4011-a721-d22fcc9ef9cd",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'completions': [['I', 'am', 'not', 'the', 'only', 10029],\n",
" ['I', 'am', 'not', 'the', 'first', 5657],\n",
" ['I', 'am', 'not', 'the', 'man', 5416],\n",
" ['I', 'am', 'not', 'the', 'one', 3452],\n",
" ['I', 'am', 'not', 'the', 'same', 2521],\n",
" ['I', 'am', 'not', 'the', 'least', 2470],\n",
" ['I', 'am', 'not', 'the', 'kind', 1911],\n",
" ['I', 'am', 'not', 'the', 'person', 1812],\n",
" ['I', 'am', 'not', 'the', 'sort', 1764],\n",
" ['I', 'am', 'not', 'the', 'less', 1260],\n",
" ['I', 'am', 'not', 'the', 'Christ', 1140],\n",
" ['I', 'am', 'not', 'the', 'author', 980],\n",
" ['I', 'am', 'not', 'the', 'best', 911],\n",
" ['I', 'am', 'not', 'the', 'body', 897],\n",
" ['I', 'am', 'not', 'the', 'son', 682],\n",
" ['I', 'am', 'not', 'the', 'type', 628],\n",
" ['I', 'am', 'not', 'the', 'cause', 588],\n",
" ['I', 'am', 'not', 'the', 'most', 571],\n",
" ['I', 'am', 'not', 'the', 'woman', 502],\n",
" ['I', 'am', 'not', 'the', 'master', 499]]}"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"requests.get(url + \"/19xx\", params={\"w1\": \"I\", \"w2\": \"am\", \"w3\": \"not\", \"w4\": \"the\", \"top\": 20}).json()"
]
},
{
"cell_type": "markdown",
"id": "6c65e8c7-4ebd-4aa7-9de0-e302ef479b3d",
"metadata": {},
"source": [
"<br><br><br>"
]
},
{
"cell_type": "code",
"execution_count": 967,
"id": "7c8301de-83e3-4d72-852d-95db8e85e3b1",
"metadata": {},
"outputs": [],
"source": [
"def choose_randomly(completions, temperature):\n",
" index = np.random.randint(0, len(completions))\n",
"\n",
" return completions[index][-2]"
]
},
{
"cell_type": "code",
"execution_count": 968,
"id": "5351706d-3898-4678-b669-82c94e81b5fe",
"metadata": {},
"outputs": [],
"source": [
"def choose_most_likely(completions, temperature):\n",
" counts = np.zeros(len(completions))\n",
" for i, completion in enumerate(completions):\n",
" counts[i] = completion[-1]\n",
"\n",
" index = np.argmax(counts)\n",
"\n",
" return completions[index][-2]"
]
},
{
"cell_type": "code",
"execution_count": 970,
"id": "0445c799-430b-41a2-aa6f-039bfcc5b784",
"metadata": {},
"outputs": [],
"source": [
"requests.get(url + \"/17xx\", params={\"w1\": \"I\", \"w2\": \"am\", \"w3\": \"not\", \"w4\": \"the\", \"top\": 20}).json()"
"# https://medium.com/@balci.pelin/llm-temperature-659d443b855a\n",
"def choose_by_softmax(completions, temperature):\n",
" counts = np.zeros(len(completions))\n",
" for i, completion in enumerate(completions):\n",
" counts[i] = completion[-1]\n",
" weight = counts / np.sum(counts)\n",
"\n",
" numerators = np.exp(weight / temperature)\n",
" denominator = np.sum(numerators)\n",
" softmax = numerators / denominator\n",
"\n",
" pick_a_number = np.random.uniform(np.min(softmax) - 1e-10, np.max(softmax) - 1e-10)\n",
" index = np.count_nonzero(softmax > pick_a_number) - 1\n",
" \n",
" return completions[index][-2]"
]
},
{
"cell_type": "markdown",
"id": "6c65e8c7-4ebd-4aa7-9de0-e302ef479b3d",
"id": "2e8d8ebb-3fc8-4035-8758-7bb1eada1b80",
"metadata": {},
"source": [
"<br><br><br>"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 756,
"id": "cafbaea3-33a1-4a9e-b8d3-25d3d4ccb2a1",
"metadata": {},
"outputs": [],
"source": [
"history = [\"I\", \"am\", \"not\", \"the\"]\n",
"\n",
"def autocomplete(history, century, number):\n",
"def autocomplete(history, century, temperature):\n",
" params = {\n",
" \"w1\": history[-4],\n",
" \"w2\": history[-3],\n",
" \"w3\": history[-2],\n",
" \"w4\": history[-1],\n",
" \"top\": number,\n",
" \"top\": 100,\n",
" }\n",
" output = requests.get(f\"{url}/{century}\", params=params).json()\n",
" if \"completions\" not in output:\n",
" raise ValueError(f\"server returned: {output['error']}\")\n",
" if len(output[\"completions\"]) == 0:\n",
" return history\n",
" index = np.random.randint(0, len(output[\"completions\"]))\n",
" return history + [output[\"completions\"][index][4]]"
" return history + [choose_by_softmax(output[\"completions\"], temperature)]"
]
},
{
"cell_type": "markdown",
"id": "1bbf66d9-bf97-46df-b16d-3f5d3bd63bde",
"metadata": {},
"source": [
"<br><br><br>"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1b1a539a-f233-476e-be89-e7f4631878b1",
"execution_count": 887,
"id": "2a6609f5-4b83-401b-942c-bb5638249465",
"metadata": {},
"outputs": [],
"source": [
"history = autocomplete(history, \"17xx\", 20)\n",
"history = [\"I\", \"am\", \"not\", \"the\"]"
]
},
{
"cell_type": "code",
"execution_count": 966,
"id": "1b1a539a-f233-476e-be89-e7f4631878b1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" I am not the one who is in the habit of reading the Bible. The first of these is the one at the top of the hill and was out of sight. The idea of the king as the one who was'the first man said.'I'll put a girdle round the waist. In some the whole the of county government The the the the the the the the the the the the the the\n"
]
}
],
"source": [
"history = autocomplete(history, \"19xx\", 0.5)\n",
"\n",
"no_space = [\"'\", \"-\"]\n",
"\n",
"previous = \"\"\n",
"for token in history:\n",
" if token in \",;:—.!?“”‘\\\"'&\" or token.startswith(\"'\"):\n",
" if token in \",;:—.!?“”‘\\\"'&\" or any(token.startswith(x) or previous.endswith(x) for x in no_space):\n",
" prefix = \"\"\n",
" else:\n",
" prefix = \" \"\n",
" previous = token\n",
" print(prefix + token, end=\"\")\n",
"print()"
]
Expand All @@ -1544,7 +1677,7 @@
"id": "3ce50126-df5c-4b02-9b94-68472a070ea0",
"metadata": {},
"source": [
"<br><br><br>"
"<br><br><br><br><br><br>"
]
},
{
Expand Down

0 comments on commit d62e0ec

Please sign in to comment.