diff --git a/ml-model/mathpix/ml-benchmarking/benchmark.ipynb b/ml-model/mathpix/ml-benchmarking/benchmark.ipynb index 3fccad014..823c9c10c 100644 --- a/ml-model/mathpix/ml-benchmarking/benchmark.ipynb +++ b/ml-model/mathpix/ml-benchmarking/benchmark.ipynb @@ -2,18 +2,9 @@ "cells": [ { "cell_type": "code", - "execution_count": 3, + "execution_count": 18, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - } - ], + "outputs": [], "source": [ "import pandas as pd\n", "from datasets import load_dataset\n", @@ -25,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -61,7 +52,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -77,7 +68,7 @@ "text": [ "<>:2: SyntaxWarning: invalid escape sequence '\\m'\n", "<>:2: SyntaxWarning: invalid escape sequence '\\m'\n", - "/var/folders/zj/kpptzq657ns6c10nk77xfh0w0000gp/T/ipykernel_86230/2606140733.py:2: SyntaxWarning: invalid escape sequence '\\m'\n", + "/var/folders/zj/kpptzq657ns6c10nk77xfh0w0000gp/T/ipykernel_86230/2163082617.py:2: SyntaxWarning: invalid escape sequence '\\m'\n", " \"\"\"\n" ] } @@ -144,6 +135,37 @@ " print()\n", " return zss_tree" ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Started: \\begin{align*}A_m = 1+m+(1-(-1)^m)\\kappa_1 + 2\\kappa_2.\\end{align*}\n" + ] + }, + { + "ename": "TypeError", + "evalue": "'NoneType' object is not callable", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[23], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m zss_0 \u001b[38;5;241m=\u001b[39m \u001b[43msource_to_zss\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mlatex_formula\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n", + "Cell \u001b[0;32mIn[20], line 54\u001b[0m, in \u001b[0;36msource_to_zss\u001b[0;34m(latex_expr)\u001b[0m\n\u001b[1;32m 52\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21msource_to_zss\u001b[39m(latex_expr):\n\u001b[1;32m 53\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mStarted: \u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;241m+\u001b[39m latex_expr)\n\u001b[0;32m---> 54\u001b[0m sympy_expr \u001b[38;5;241m=\u001b[39m \u001b[43mparse_latex\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mlatex_expr\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 55\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mSymPy\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 56\u001b[0m zss_tree \u001b[38;5;241m=\u001b[39m sympy_to_zss(sympy_expr)\n", + "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/sympy/parsing/latex/__init__.py:35\u001b[0m, in \u001b[0;36mparse_latex\u001b[0;34m(s)\u001b[0m\n\u001b[1;32m 30\u001b[0m _latex \u001b[38;5;241m=\u001b[39m import_module(\n\u001b[1;32m 31\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124msympy.parsing.latex._parse_latex_antlr\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 32\u001b[0m import_kwargs\u001b[38;5;241m=\u001b[39m{\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mfromlist\u001b[39m\u001b[38;5;124m'\u001b[39m: [\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mX\u001b[39m\u001b[38;5;124m'\u001b[39m]})\n\u001b[1;32m 34\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m _latex \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m---> 35\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_latex\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mparse_latex\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/sympy/parsing/latex/_parse_latex_antlr.py:73\u001b[0m, in \u001b[0;36mparse_latex\u001b[0;34m(sympy)\u001b[0m\n\u001b[1;32m 70\u001b[0m matherror \u001b[38;5;241m=\u001b[39m MathErrorListener(sympy)\n\u001b[1;32m 72\u001b[0m stream \u001b[38;5;241m=\u001b[39m antlr4\u001b[38;5;241m.\u001b[39mInputStream(sympy)\n\u001b[0;32m---> 73\u001b[0m lex \u001b[38;5;241m=\u001b[39m \u001b[43mLaTeXLexer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstream\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 74\u001b[0m lex\u001b[38;5;241m.\u001b[39mremoveErrorListeners()\n\u001b[1;32m 75\u001b[0m lex\u001b[38;5;241m.\u001b[39maddErrorListener(matherror)\n", + "\u001b[0;31mTypeError\u001b[0m: 'NoneType' object is not callable" + ] + } + ], + "source": [ + "zss_0 = source_to_zss(df['latex_formula'][0])" + ] } ], "metadata": { diff --git a/ml-model/mathpix/ml-benchmarking/requirements.txt b/ml-model/mathpix/ml-benchmarking/requirements.txt new file mode 100644 index 000000000..79f1c4393 --- /dev/null +++ b/ml-model/mathpix/ml-benchmarking/requirements.txt @@ -0,0 +1,18 @@ +antlr4-python3-runtime==4.11.0 +contourpy==1.2.1 +cycler==0.12.1 +fonttools==4.51.0 +kiwisolver==1.4.5 +matplotlib==3.8.4 +mpmath==1.3.0 +numpy==1.26.4 +packaging==24.0 +pandas==2.2.2 +pillow==10.3.0 +pyparsing==3.1.2 +python-dateutil==2.9.0.post0 +pytz==2024.1 +six==1.16.0 +sympy==1.12 +tzdata==2024.1 +zss==1.2.0