From 6f038bbcd7dbe7dc46736d6fd0a0008c33751234 Mon Sep 17 00:00:00 2001
From: arnaudbergeron <58529583+arnaudbergeron@users.noreply.github.com>
Date: Sun, 19 Nov 2023 11:51:17 -0500
Subject: [PATCH 01/13] new logo

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index a70e604..67e150b 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,7 @@
-![logo_banner](https://github.com/arnaudbergeron/NumGI/assets/58529583/4b700c9f-b58f-4448-af5d-50b637cd9d86)
+![logo_banner_3](https://github.com/arnaudbergeron/NumGI/assets/58529583/c23ba24c-5686-495b-89bb-1c3139290f9d)
 
 -----------------
+
 # NumGI - Differential Equation Solver using Transformer-based AI
 NumGI is an open-source project that aims to solve differential equations using transformer-based AI models. It provides a user-friendly interface for solving a wide range of ordinary and partial differential equations.
 

From d35dd45e7676eb8acf9755ea646b13830effdf2d Mon Sep 17 00:00:00 2001
From: Arnaud Bergeron <bergerona62@gmail.com>
Date: Tue, 21 Nov 2023 09:10:14 -0500
Subject: [PATCH 02/13] l2 loss module

---
 .gitignore                |  2 ++
 NumGI/Loss/LossDataset.py | 65 +++++++++++++++++++++++++++++++++++++++
 NumGI/Loss/__init__.py    |  0
 3 files changed, 67 insertions(+)
 create mode 100644 NumGI/Loss/LossDataset.py
 create mode 100644 NumGI/Loss/__init__.py

diff --git a/.gitignore b/.gitignore
index d931115..af723d1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -54,3 +54,5 @@ coverage.xml
 
 # Sphinx documentation
 docs/_build/
+
+.vscode
diff --git a/NumGI/Loss/LossDataset.py b/NumGI/Loss/LossDataset.py
new file mode 100644
index 0000000..fa33666
--- /dev/null
+++ b/NumGI/Loss/LossDataset.py
@@ -0,0 +1,65 @@
+from __future__ import annotations
+
+import random
+
+import sympy as sp
+import torch
+
+from NumGI.DatasetTokenizer import DatasetTokenizer
+
+
+class LossDataset(DatasetTokenizer):
+    """Docstring for LossDataset.
+
+    Args:
+        DatasetTokenizer (DatasetTokenizer): DatasetTokenizer to create loss dataset from.
+    """
+
+    def __init__(self, eq_dataset: DatasetTokenizer, N: int, ell_norm: int = 1):
+        self.eq_dataset = eq_dataset
+        self.var_dict = self.create_var_dict()
+        self.loss = self.calculate_n_pairwise_loss(N, ell_norm)
+
+    def create_var_dict(self):
+        var_dict = {}
+        equations = self.eq_dataset.y_tokenized.tolist()
+        for i, eq in enumerate(equations):
+            solution = self.eq_dataset.tokens_to_sympy(eq)
+            if frozenset(solution.free_symbols) not in var_dict:
+                var_dict[frozenset(solution.free_symbols)] = [[solution, i]]
+            else:
+                var_dict[frozenset(solution.free_symbols)].append([solution, i])
+        return var_dict
+
+    def calculate_n_pairwise_loss(self, N, ell_norm):
+        loss = torch.zeros((3, N))
+        possible_symbols = self.var_dict.keys()
+
+        first_batch = int(0.9 * N)
+        second_batch = N - first_batch
+        for i in range(first_batch):
+            chosen_symbols = random.choice(list(possible_symbols))
+
+            sol_sympy_1 = random.choice(self.var_dict[chosen_symbols])
+            sol_sympy_2 = random.choice(self.var_dict[chosen_symbols])
+            integral = sp.Abs(sol_sympy_1[0].rhs - sol_sympy_2[0].rhs) ** ell_norm
+            for symbol in chosen_symbols:
+                integral = sp.integrate(integral, (symbol, -sp.oo, sp.oo))
+
+            loss[0, i] = sol_sympy_1[1]
+            loss[1, i] = sol_sympy_2[1]
+            if integral.is_number:
+                loss[2, i] = float(integral)
+            else:
+                loss[2, i] = torch.inf
+
+        for i in range(second_batch):
+            chosen_symbols = random.sample(possible_symbols, 2)
+            sol_sympy_1 = random.choice(self.var_dict[chosen_symbols[0]])
+            sol_sympy_2 = random.choice(self.var_dict[chosen_symbols[1]])
+
+            loss[0, i] = sol_sympy_1[1]
+            loss[1, i] = sol_sympy_2[1]
+            loss[2, i] = torch.inf
+
+        return loss
diff --git a/NumGI/Loss/__init__.py b/NumGI/Loss/__init__.py
new file mode 100644
index 0000000..e69de29

From 7ed0c647551d779dc3c706bf889fb242d7de1751 Mon Sep 17 00:00:00 2001
From: Arnaud Bergeron <bergerona62@gmail.com>
Date: Sun, 24 Dec 2023 12:37:59 -0500
Subject: [PATCH 03/13] numpy to sympy

---
 .gitignore                 |  2 ++
 NumGI/EquationTokenizer.py | 54 ++++++++++++++++++++++++++++++++++----
 2 files changed, 51 insertions(+), 5 deletions(-)

diff --git a/.gitignore b/.gitignore
index d931115..ab50832 100644
--- a/.gitignore
+++ b/.gitignore
@@ -52,5 +52,7 @@ coverage.xml
 *.log
 *.pot
 
+.vscode
+
 # Sphinx documentation
 docs/_build/
diff --git a/NumGI/EquationTokenizer.py b/NumGI/EquationTokenizer.py
index 5212c14..59c17ac 100644
--- a/NumGI/EquationTokenizer.py
+++ b/NumGI/EquationTokenizer.py
@@ -7,6 +7,8 @@
 from sympy.core.numbers import Rational
 from torch.nn.utils.rnn import pad_sequence
 
+from NumGI.ConstantDictionaries import SP_TO_NP
+
 
 class EquationTokenizer:
     """Tokenizer for equations.
@@ -53,11 +55,7 @@ def sympy_to_list(self, sympy_equation) -> list:
         for ind, arg in enumerate(eq_args):
             sub_arg_list = self.sympy_to_list(arg)
             for _sub in sub_arg_list:
-                if (
-                    isinstance(_sub, Float)
-                    or isinstance(_sub, Integer)
-                    or isinstance(_sub, Rational)
-                ):
+                if self.is_number(_sub):
                     # perhaps not general enough should allow for more types
                     # the idea is we want to tokenize '12.2' as '1','2,'.','2' and not '12.2'
                     for i in str(_sub):
@@ -72,6 +70,45 @@ def sympy_to_list(self, sympy_equation) -> list:
 
         return eq_list
 
+    def _utils_exec_numpy(self, sympy_list, **kwargs):
+        """Converts a sympy list to a numpy list.
+
+        This is a util func.
+        """
+        function = sympy_list[0]
+        numpy_function = SP_TO_NP[function]
+        args_list = []
+        for i in sympy_list[1:]:
+            if isinstance(i, list):
+                args_list.append(self._utils_exec_numpy(i, **kwargs))
+            elif isinstance(i, sp.Symbol):
+                args_list.append(kwargs[str(i)])
+            elif self.is_number(i):
+                args_list.append(i.evalf())
+            else:
+                raise ValueError(f"Unknown type: {type(i)}, for {i}")
+
+        return numpy_function(*args_list)
+
+    def sympy_to_numpy(self, sympy_equation):
+        """Converts a sympy equation to a numpy function.
+
+        This is a util func.
+        """
+        simplified_eq = sympy_equation.rhs.simplify()
+
+        return sp.lambdify(list(simplified_eq.free_symbols), simplified_eq, "numpy")
+        # sympy_list = self.sympy_to_list(simplified_eq)
+        # grouped_num_list = self._regroup_numbers(sympy_list)
+        # parsed_list = self._parantheses_to_list(grouped_num_list)[0][0]
+
+        # variables = list(sympy_equation.free_symbols)
+        # variables = [str(i) for i in variables]
+        # def np_func(**kwargs):
+        #     return self._utils_exec_numpy(parsed_list, **kwargs)
+
+        # return np_func, variables
+
     def _parantheses_to_list(self, eq_list):
         """Converts a list with parentheses to a list of lists according to parentheses.
 
@@ -226,6 +263,13 @@ def tensorize_and_pad_by_len(self, list_of_token_list, max_len):
 
         return output[:-1]
 
+    def is_number(self, sp_class):
+        return (
+            isinstance(sp_class, Float)
+            or isinstance(sp_class, Integer)
+            or isinstance(sp_class, Rational)
+        )
+
 
 def defaultTokenizer():
     """Returns a default tokenizer. Because of issues with pickling."""

From 7d08e7f609c5be8294064dc6ad84574a7f204639 Mon Sep 17 00:00:00 2001
From: Arnaud Bergeron <bergerona62@gmail.com>
Date: Sun, 24 Dec 2023 12:39:26 -0500
Subject: [PATCH 04/13] new numpy to sympy using lambdify

---
 NumGI/EquationTokenizer.py | 36 +-----------------------------------
 1 file changed, 1 insertion(+), 35 deletions(-)

diff --git a/NumGI/EquationTokenizer.py b/NumGI/EquationTokenizer.py
index 59c17ac..47046aa 100644
--- a/NumGI/EquationTokenizer.py
+++ b/NumGI/EquationTokenizer.py
@@ -7,8 +7,6 @@
 from sympy.core.numbers import Rational
 from torch.nn.utils.rnn import pad_sequence
 
-from NumGI.ConstantDictionaries import SP_TO_NP
-
 
 class EquationTokenizer:
     """Tokenizer for equations.
@@ -70,44 +68,12 @@ def sympy_to_list(self, sympy_equation) -> list:
 
         return eq_list
 
-    def _utils_exec_numpy(self, sympy_list, **kwargs):
-        """Converts a sympy list to a numpy list.
-
-        This is a util func.
-        """
-        function = sympy_list[0]
-        numpy_function = SP_TO_NP[function]
-        args_list = []
-        for i in sympy_list[1:]:
-            if isinstance(i, list):
-                args_list.append(self._utils_exec_numpy(i, **kwargs))
-            elif isinstance(i, sp.Symbol):
-                args_list.append(kwargs[str(i)])
-            elif self.is_number(i):
-                args_list.append(i.evalf())
-            else:
-                raise ValueError(f"Unknown type: {type(i)}, for {i}")
-
-        return numpy_function(*args_list)
-
     def sympy_to_numpy(self, sympy_equation):
         """Converts a sympy equation to a numpy function.
 
         This is a util func.
         """
-        simplified_eq = sympy_equation.rhs.simplify()
-
-        return sp.lambdify(list(simplified_eq.free_symbols), simplified_eq, "numpy")
-        # sympy_list = self.sympy_to_list(simplified_eq)
-        # grouped_num_list = self._regroup_numbers(sympy_list)
-        # parsed_list = self._parantheses_to_list(grouped_num_list)[0][0]
-
-        # variables = list(sympy_equation.free_symbols)
-        # variables = [str(i) for i in variables]
-        # def np_func(**kwargs):
-        #     return self._utils_exec_numpy(parsed_list, **kwargs)
-
-        # return np_func, variables
+        return sp.lambdify(list(sympy_equation.free_symbols), sympy_equation, "numpy")
 
     def _parantheses_to_list(self, eq_list):
         """Converts a list with parentheses to a list of lists according to parentheses.

From 5daf030cb04b0388d11a27354f4ef013a38b2cb7 Mon Sep 17 00:00:00 2001
From: Arnaud Bergeron <bergerona62@gmail.com>
Date: Sun, 24 Dec 2023 12:43:01 -0500
Subject: [PATCH 05/13] added file containing all constants dicts and lists

---
 NumGI/ConstantDictionaries.py | 112 ++++++++++++++++++++++++++++++++++
 NumGI/EquationTokenizer.py    |  69 +--------------------
 NumGI/SolutionGenerator.py    |  55 +++--------------
 3 files changed, 125 insertions(+), 111 deletions(-)
 create mode 100644 NumGI/ConstantDictionaries.py

diff --git a/NumGI/ConstantDictionaries.py b/NumGI/ConstantDictionaries.py
new file mode 100644
index 0000000..a17969d
--- /dev/null
+++ b/NumGI/ConstantDictionaries.py
@@ -0,0 +1,112 @@
+from __future__ import annotations
+
+import sympy as sp
+
+DIFFERENTIAL_FUNCTIONS = [
+    sp.sin,
+    sp.cos,
+    sp.tan,
+    sp.cot,
+    sp.sec,
+    sp.csc,
+    sp.exp,
+    sp.log,
+    sp.sqrt,
+    sp.asin,
+    sp.acos,
+    sp.atan,
+    sp.acot,
+    sp.asec,
+    sp.acsc,
+    sp.sinh,
+    sp.cosh,
+    sp.tanh,
+    sp.coth,
+    sp.sech,
+    sp.csch,
+    sp.asinh,
+    sp.acosh,
+    sp.atanh,
+    sp.acoth,
+    sp.asech,
+    sp.acsch,
+]
+
+OPERATIONS = [
+    ("multiplication", "arithmetic"),
+    ("addition", "arithmetic"),
+    ("subtraction", "arithmetic"),
+    ("division", "arithmetic"),
+    ("differential", "differential"),
+    # ("integration", "integration"),
+    ("exponent", "exponent"),
+]
+
+VARIABLES = ["x", "y", "z", "beta", "gamma"]
+
+DEFAULT_DICT = {
+    ")": 0,
+    sp.acsc: 1,
+    sp.acot: 2,
+    sp.asech: 3,
+    sp.core.containers.Tuple: 4,
+    "/": 5,
+    sp.sech: 6,
+    "END": 7,
+    sp.exp: 8,
+    "7": 9,
+    "0": 10,
+    sp.asin: 11,
+    "5": 12,
+    sp.core.function.Derivative: 13,
+    "8": 14,
+    sp.asec: 15,
+    sp.core.add.Add: 16,
+    sp.core.power.Pow: 17,
+    sp.csch: 18,
+    "START": 19,
+    sp.csc: 20,
+    "PAD": 21,
+    sp.sin: 22,
+    ",": 23,
+    sp.acsch: 24,
+    sp.core.relational.Equality: 25,
+    "(": 26,
+    "2": 27,
+    sp.Symbol("x"): 28,
+    sp.coth: 29,
+    sp.Symbol("y"): 30,
+    sp.log: 31,
+    sp.cos: 32,
+    "6": 33,
+    sp.core.mul.Mul: 34,
+    sp.acos: 35,
+    "9": 36,
+    sp.Function("f"): 37,
+    "-": 38,
+    sp.sqrt: 39,
+    sp.cosh: 40,
+    sp.tan: 41,
+    sp.tanh: 42,
+    sp.Symbol("z"): 43,
+    "4": 44,
+    "3": 45,
+    sp.cot: 46,
+    sp.asinh: 47,
+    sp.atan: 48,
+    sp.acosh: 49,
+    "1": 50,
+    sp.atanh: 51,
+    ".": 52,
+    sp.sinh: 53,
+    sp.acoth: 54,
+    sp.sec: 55,
+    sp.Symbol("beta"): 56,
+    sp.Symbol("gamma"): 57,
+    sp.Symbol("delta"): 58,
+    sp.Symbol("a"): 59,
+    sp.Symbol("b"): 60,
+    sp.Symbol("c"): 61,
+    sp.Symbol("d"): 62,
+    sp.Symbol("epsilon"): 63,
+}
diff --git a/NumGI/EquationTokenizer.py b/NumGI/EquationTokenizer.py
index 47046aa..93424eb 100644
--- a/NumGI/EquationTokenizer.py
+++ b/NumGI/EquationTokenizer.py
@@ -7,6 +7,8 @@
 from sympy.core.numbers import Rational
 from torch.nn.utils.rnn import pad_sequence
 
+from NumGI.ConstantDictionaries import DEFAULT_DICT
+
 
 class EquationTokenizer:
     """Tokenizer for equations.
@@ -239,72 +241,7 @@ def is_number(self, sp_class):
 
 def defaultTokenizer():
     """Returns a default tokenizer. Because of issues with pickling."""
-    tokenize_dict = {
-        ")": 0,
-        sp.acsc: 1,
-        sp.acot: 2,
-        sp.asech: 3,
-        sp.core.containers.Tuple: 4,
-        "/": 5,
-        sp.sech: 6,
-        "END": 7,
-        sp.exp: 8,
-        "7": 9,
-        "0": 10,
-        sp.asin: 11,
-        "5": 12,
-        sp.core.function.Derivative: 13,
-        "8": 14,
-        sp.asec: 15,
-        sp.core.add.Add: 16,
-        sp.core.power.Pow: 17,
-        sp.csch: 18,
-        "START": 19,
-        sp.csc: 20,
-        "PAD": 21,
-        sp.sin: 22,
-        ",": 23,
-        sp.acsch: 24,
-        sp.core.relational.Equality: 25,
-        "(": 26,
-        "2": 27,
-        sp.Symbol("x"): 28,
-        sp.coth: 29,
-        sp.Symbol("y"): 30,
-        sp.log: 31,
-        sp.cos: 32,
-        "6": 33,
-        sp.core.mul.Mul: 34,
-        sp.acos: 35,
-        "9": 36,
-        sp.Function("f"): 37,
-        "-": 38,
-        sp.sqrt: 39,
-        sp.cosh: 40,
-        sp.tan: 41,
-        sp.tanh: 42,
-        sp.Symbol("z"): 43,
-        "4": 44,
-        "3": 45,
-        sp.cot: 46,
-        sp.asinh: 47,
-        sp.atan: 48,
-        sp.acosh: 49,
-        "1": 50,
-        sp.atanh: 51,
-        ".": 52,
-        sp.sinh: 53,
-        sp.acoth: 54,
-        sp.sec: 55,
-        sp.Symbol("beta"): 56,
-        sp.Symbol("gamma"): 57,
-        sp.Symbol("delta"): 58,
-        sp.Symbol("a"): 59,
-        sp.Symbol("b"): 60,
-        sp.Symbol("c"): 61,
-        sp.Symbol("d"): 62,
-        sp.Symbol("epsilon"): 63,
-    }
+    tokenize_dict = DEFAULT_DICT
 
     # invert tokenizer_dict into decode_dict
     decode_dict = {v: k for k, v in tokenize_dict.items()}
diff --git a/NumGI/SolutionGenerator.py b/NumGI/SolutionGenerator.py
index e187579..5a1921d 100644
--- a/NumGI/SolutionGenerator.py
+++ b/NumGI/SolutionGenerator.py
@@ -4,6 +4,10 @@
 
 import sympy as sp
 
+from NumGI.ConstantDictionaries import DIFFERENTIAL_FUNCTIONS
+from NumGI.ConstantDictionaries import OPERATIONS
+from NumGI.ConstantDictionaries import VARIABLES
+
 
 class SolutionGenerator:
     """Generates solutions for contrived questions to be used for training data.
@@ -14,6 +18,9 @@ class SolutionGenerator:
     def __init__(
         self,
     ):
+        self.OPERATIONS = OPERATIONS
+        self.DIFFERENTIAL_FUNCTIONS = DIFFERENTIAL_FUNCTIONS
+        self.VARIABLES = VARIABLES
         self.PROB_NEW_SYMBOL = 0.3
         self.PROB_USED_SYMBOL = 1 - self.PROB_NEW_SYMBOL
         self.NEW_VARS = self.VARIABLES
@@ -301,48 +308,6 @@ def __str__(self):
     def choose_op_noarithmetic(self, ops: list):
         return random.choice(ops[3:])
 
-    DIFFERENTIAL_FUNCTIONS = [
-        sp.sin,
-        sp.cos,
-        sp.tan,
-        sp.cot,
-        sp.sec,
-        sp.csc,
-        sp.exp,
-        sp.log,
-        sp.sqrt,
-        sp.asin,
-        sp.acos,
-        sp.atan,
-        sp.acot,
-        sp.asec,
-        sp.acsc,
-        sp.sinh,
-        sp.cosh,
-        sp.tanh,
-        sp.coth,
-        sp.sech,
-        sp.csch,
-        sp.asinh,
-        sp.acosh,
-        sp.atanh,
-        sp.acoth,
-        sp.asech,
-        sp.acsch,
-    ]
-    # must not be rordered need first three to be arithmetic because
-    #  I am lazy and chooseop_noarithmetic is not implemented well
-    OPERATIONS = [
-        ("multiplication", "arithmetic"),
-        ("addition", "arithmetic"),
-        ("subtraction", "arithmetic"),
-        ("division", "arithmetic"),
-        ("differential", "differential"),
-        # ("integration", "integration"),
-        ("exponent", "exponent"),
-    ]
-    VARIABLES = ["x", "y", "z", "beta", "gamma"]
-
 
 if __name__ == "__main__":
     sg = SolutionGenerator()
@@ -350,8 +315,8 @@ def choose_op_noarithmetic(self, ops: list):
         ops_sol=(3, 5),
         ops_eq=(2, 5),
         num_eqs=1000,
-        vars=sg.VARIABLES,
-        funcs=sg.DIFFERENTIAL_FUNCTIONS,
-        ops=sg.OPERATIONS,
+        vars=VARIABLES,
+        funcs=DIFFERENTIAL_FUNCTIONS,
+        ops=OPERATIONS,
     )
     print(eqs)

From 18e9d394c39c3b5dfb9c5a8d2a77928ee6a50659 Mon Sep 17 00:00:00 2001
From: Arnaud Bergeron <bergerona62@gmail.com>
Date: Sun, 24 Dec 2023 15:40:40 -0500
Subject: [PATCH 06/13] added test for sp to np and torch

---
 test/EquationTests/test_numpy_sympy_torch.py | 69 ++++++++++++++++++++
 test/ModelTests/test_inference.py            |  4 +-
 test/ModelTests/test_model.py                |  2 +-
 3 files changed, 72 insertions(+), 3 deletions(-)
 create mode 100644 test/EquationTests/test_numpy_sympy_torch.py

diff --git a/test/EquationTests/test_numpy_sympy_torch.py b/test/EquationTests/test_numpy_sympy_torch.py
new file mode 100644
index 0000000..6f54e93
--- /dev/null
+++ b/test/EquationTests/test_numpy_sympy_torch.py
@@ -0,0 +1,69 @@
+from __future__ import annotations
+
+import numpy as np
+import sympy as sp
+import torch
+
+from NumGI.ConstantDictionaries import DIFFERENTIAL_FUNCTIONS
+from NumGI.ConstantDictionaries import OPERATIONS
+from NumGI.EquationTokenizer import EquationTokenizer
+from NumGI.SolutionGenerator import SolutionGenerator
+
+sg = SolutionGenerator()
+sg.PROB_NEW_SYMBOL = 0
+n_eqs = 30
+sols = [
+    sg.generate_solution(4, ["x"], DIFFERENTIAL_FUNCTIONS, OPERATIONS)[0].simplify()
+    for i in range(n_eqs)
+]
+
+tokenizer = EquationTokenizer()
+
+test_arr = [1, 2, 5, 10, 20]
+np_test = np.array(test_arr)
+torch_test = torch.tensor(test_arr, device=tokenizer.device)
+x = sp.Symbol("x")
+
+cnt = 0
+
+for i in sols:
+    try:
+        np_func, var = tokenizer.sympy_to_numpy(i)
+        np_res = np_func(np_test).tolist()
+    except TypeError:
+        cnt += 1
+        continue
+
+    if cnt > n_eqs / 2:
+        raise Exception(
+            "Too many equations with TypeError are equations correctly generated \
+                or error in sp to np func"
+        )
+
+    sp_res = []
+    for idx, j in enumerate(test_arr):
+        try:
+            sp_res.append(float(i.replace(x, j).evalf()))
+        except Exception as e:
+            print(e)
+            sp_res.append(np_res[idx])
+
+    torch_func, var = tokenizer.sympy_to_torch(i)
+    torch_res = torch_func(**{_arg: torch_test for _arg in var}).tolist()
+
+    tol = 1e-3
+    for i in range(len(sp_res)):
+        try:
+            if sp_res[i] is not None and np_res[i] is not None and torch_res[i] is not None:
+                continue
+            elif sp_res[i] == 0:
+                assert (sp_res[i] - np_res[i]) < tol
+                assert (sp_res[i] - torch_res[i]) < tol
+            else:
+                assert (sp_res[i] - np_res[i]) / sp_res[i] < tol
+                assert (sp_res[i] - torch_res[i]) / sp_res[i] < tol
+        except Exception as e:
+            print(
+                f"eq:{i}, sp_res: {sp_res[i]}, np_res: {np_res[i]}, torch_res: {torch_res[i]}, {e}"
+            )
+            raise
diff --git a/test/ModelTests/test_inference.py b/test/ModelTests/test_inference.py
index ecb8468..33199e6 100644
--- a/test/ModelTests/test_inference.py
+++ b/test/ModelTests/test_inference.py
@@ -2,8 +2,8 @@
 
 import torch
 
-from NumGI.model.Inference import batch_inference
-from NumGI.model.Model import TransformerNet
+from NumGI.Model.Inference import batch_inference
+from NumGI.Model.Model import TransformerNet
 
 
 def test_batch_inference():
diff --git a/test/ModelTests/test_model.py b/test/ModelTests/test_model.py
index 5428494..78ba91d 100644
--- a/test/ModelTests/test_model.py
+++ b/test/ModelTests/test_model.py
@@ -2,7 +2,7 @@
 
 import torch
 
-from NumGI.model.Model import TransformerNet
+from NumGI.Model.Model import TransformerNet
 
 
 def test_transformer_net():

From 4336312c61d4d50d9592db888fec561b87713bfb Mon Sep 17 00:00:00 2001
From: Arnaud Bergeron <bergerona62@gmail.com>
Date: Mon, 25 Dec 2023 12:36:47 -0500
Subject: [PATCH 07/13] new loss dataset functions

---
 NumGI/Loss/LossDataset.py | 53 ++++++++++++++++++++++++++++++---------
 1 file changed, 41 insertions(+), 12 deletions(-)

diff --git a/NumGI/Loss/LossDataset.py b/NumGI/Loss/LossDataset.py
index fa33666..56fd9cd 100644
--- a/NumGI/Loss/LossDataset.py
+++ b/NumGI/Loss/LossDataset.py
@@ -8,7 +8,7 @@
 from NumGI.DatasetTokenizer import DatasetTokenizer
 
 
-class LossDataset(DatasetTokenizer):
+class LossDataset:
     """Docstring for LossDataset.
 
     Args:
@@ -17,39 +17,55 @@ class LossDataset(DatasetTokenizer):
 
     def __init__(self, eq_dataset: DatasetTokenizer, N: int, ell_norm: int = 1):
         self.eq_dataset = eq_dataset
+        self.grid_size = (100, 100, 1000)
         self.var_dict = self.create_var_dict()
         self.loss = self.calculate_n_pairwise_loss(N, ell_norm)
+        self.max_integral_value = 10e10  # we can play with this value
 
     def create_var_dict(self):
+        """Creates a dictionary of different variables and their corresponding equations.
+
+        Returns:
+            _type_: _description_
+        """
         var_dict = {}
         equations = self.eq_dataset.y_tokenized.tolist()
+        self.solutions = []
         for i, eq in enumerate(equations):
-            solution = self.eq_dataset.tokens_to_sympy(eq)
-            if frozenset(solution.free_symbols) not in var_dict:
-                var_dict[frozenset(solution.free_symbols)] = [[solution, i]]
+            sol = self.eq_dataset.tokens_to_sympy(eq)
+            self.solutions.append(sol)
+            if frozenset(sol.free_symbols) not in var_dict:
+                var_dict[frozenset(sol.free_symbols)] = [[sol, i]]
             else:
-                var_dict[frozenset(solution.free_symbols)].append([solution, i])
+                var_dict[frozenset(sol.free_symbols)].append([sol, i])
         return var_dict
 
     def calculate_n_pairwise_loss(self, N, ell_norm):
         loss = torch.zeros((3, N))
         possible_symbols = self.var_dict.keys()
 
+        possible_symbols = [i for i in possible_symbols if len(self.var_dict[i]) > 1]
+
         first_batch = int(0.9 * N)
         second_batch = N - first_batch
         for i in range(first_batch):
+            print(i)
             chosen_symbols = random.choice(list(possible_symbols))
 
-            sol_sympy_1 = random.choice(self.var_dict[chosen_symbols])
-            sol_sympy_2 = random.choice(self.var_dict[chosen_symbols])
-            integral = sp.Abs(sol_sympy_1[0].rhs - sol_sympy_2[0].rhs) ** ell_norm
-            for symbol in chosen_symbols:
-                integral = sp.integrate(integral, (symbol, -sp.oo, sp.oo))
+            possible_equations = {i[1] for i in self.var_dict[chosen_symbols]}
+
+            idx_sympy_1, idx_sympy_2 = random.sample(possible_equations, 2)
+            sol_sympy_1 = [self.solutions[idx_sympy_1], idx_sympy_1]
+            sol_sympy_2 = [self.solutions[idx_sympy_2], idx_sympy_2]
+
+            integrand = sp.Abs(sol_sympy_1[0].rhs - sol_sympy_2[0].rhs) ** ell_norm
+            print(integrand)
+            integral = self.compute_integral(integrand)
 
             loss[0, i] = sol_sympy_1[1]
             loss[1, i] = sol_sympy_2[1]
-            if integral.is_number:
-                loss[2, i] = float(integral)
+            if integral < self.max_integral_value:
+                loss[2, i] = integral.item()
             else:
                 loss[2, i] = torch.inf
 
@@ -63,3 +79,16 @@ def calculate_n_pairwise_loss(self, N, ell_norm):
             loss[2, i] = torch.inf
 
         return loss
+
+    def compute_integral(self, sympy_eq):
+        func, symbols = self.eq_dataset.sympy_to_torch(sympy_eq)
+        grids = self.create_discrete_grids(symbols)
+        print(grids[0])
+        _arg = {sym: _grid for sym, _grid in zip(symbols, grids)}
+        return torch.mean(func(**_arg))
+
+    def create_discrete_grids(self, symbols):
+        grid = torch.linspace(*self.grid_size, device=self.eq_dataset.device)
+        grids = [grid for i in symbols]
+        mesh = torch.meshgrid(grids)
+        return mesh

From 7cbb696173cdd0b632fb68195aea1d41d005c064 Mon Sep 17 00:00:00 2001
From: Arnaud Bergeron <bergerona62@gmail.com>
Date: Mon, 25 Dec 2023 12:40:22 -0500
Subject: [PATCH 08/13] renamed Model folder

---
 NumGI/ConstantDictionaries.py      | 16 ++----
 NumGI/EquationTokenizer.py         | 82 +++++++++++++++++++++++++++---
 NumGI/LoadTokenizer.py             | 10 ++--
 NumGI/ParallelEquationGenerator.py | 41 ++-------------
 NumGI/__init__.py                  | 22 ++++++++
 5 files changed, 111 insertions(+), 60 deletions(-)

diff --git a/NumGI/ConstantDictionaries.py b/NumGI/ConstantDictionaries.py
index 9e97ec3..4da2899 100644
--- a/NumGI/ConstantDictionaries.py
+++ b/NumGI/ConstantDictionaries.py
@@ -7,46 +7,38 @@
     sp.sin: torch.sin,
     sp.cos: torch.cos,
     sp.tan: torch.tan,
-    sp.csc: torch.csc,
     sp.exp: torch.exp,
     sp.log: torch.log,
-    sp.sqrt: torch.sqrt,
     sp.asin: torch.asin,
     sp.acos: torch.acos,
     sp.atan: torch.atan,
-    sp.acsc: torch.acsc,
     sp.sinh: torch.sinh,
     sp.cosh: torch.cosh,
     sp.tanh: torch.tanh,
-    sp.csch: torch.csch,
     sp.asinh: torch.asinh,
     sp.acosh: torch.acosh,
     sp.atanh: torch.atanh,
-    sp.acsch: torch.acsch,
+    sp.Mul: torch.mul,
+    sp.Add: torch.add,
+    sp.Pow: torch.pow,
+    sp.Abs: torch.abs,
 }
 
 DIFFERENTIAL_FUNCTIONS = [
     sp.sin,
     sp.cos,
     sp.tan,
-    sp.csc,
     sp.exp,
     sp.log,
-    sp.sqrt,
     sp.asin,
     sp.acos,
     sp.atan,
-    sp.acot,
-    sp.asec,
-    sp.acsc,
     sp.sinh,
     sp.cosh,
     sp.tanh,
-    sp.csch,
     sp.asinh,
     sp.acosh,
     sp.atanh,
-    sp.acsch,
 ]
 
 OPERATIONS = [
diff --git a/NumGI/EquationTokenizer.py b/NumGI/EquationTokenizer.py
index 93424eb..ad96562 100644
--- a/NumGI/EquationTokenizer.py
+++ b/NumGI/EquationTokenizer.py
@@ -8,6 +8,7 @@
 from torch.nn.utils.rnn import pad_sequence
 
 from NumGI.ConstantDictionaries import DEFAULT_DICT
+from NumGI.ConstantDictionaries import SP_TO_TORCH
 
 
 class EquationTokenizer:
@@ -32,6 +33,13 @@ def __init__(self, useDefaultTokenizer=False):
             self.dict_size = len(self.tokenize_dict)
             self.char_set = set(self.tokenize_dict.keys())
 
+        if torch.cuda.is_available():
+            self.device = "cuda"
+        elif torch.backends.mps.is_available():
+            self.device = "mps"
+        else:
+            self.device = "cpu"
+
     def sympy_to_list(self, sympy_equation) -> list:
         """Converts a sympy equation to a list that will be tokenized.
 
@@ -75,7 +83,59 @@ def sympy_to_numpy(self, sympy_equation):
 
         This is a util func.
         """
-        return sp.lambdify(list(sympy_equation.free_symbols), sympy_equation, "numpy")
+        symbols = list(sympy_equation.free_symbols)
+        return sp.lambdify(symbols, sympy_equation, "numpy"), symbols
+
+    def sympy_to_torch(self, sympy_equation):
+        """Converts a sympy equation to a pytorch function.
+
+        This is a util func.
+        """
+        # simplified_eq = sympy_equation.simplify()
+        simplified_eq = sympy_equation
+        sympy_list = self.sympy_to_list(simplified_eq)
+        grouped_num_list = self._regroup_numbers(sympy_list)
+        parsed_list = self._parantheses_to_list(grouped_num_list)[0][0]
+
+        variables = list(simplified_eq.free_symbols)
+        variables = [str(i) for i in variables]
+
+        def torch_func(**kwargs):
+            return self._utils_exec_torch(parsed_list, **kwargs)
+
+        return torch_func, variables
+
+    def _utils_exec_torch(self, sympy_list, **kwargs):
+        """Converts a sympy list to a torch function.
+
+        This is a util func.
+        """
+        function = sympy_list[0]
+        torch_function = SP_TO_TORCH[function]
+        args_list = []
+        for i in sympy_list[1:]:
+            if isinstance(i, list):
+                args_list.append(self._utils_exec_torch(i, **kwargs))
+            elif isinstance(i, sp.Symbol):
+                args_list.append(kwargs[str(i)])
+            elif self.is_number(i):
+                args_list.append(torch.tensor(float(i), device=self.device))
+            elif i == sp.core.numbers.Pi:
+                args_list.append(torch.tensor(torch.pi, device=self.device))
+            else:
+                raise ValueError(f"Unknown type: {type(i)}, for {i}")
+
+        if len(args_list) > 2 and (function == sp.Add or function == sp.Mul):
+            return self.call_multi_input_torch(torch_function, args_list)
+
+        else:
+            return torch_function(*args_list)
+
+    def call_multi_input_torch(self, func, args):
+        if len(args) > 2:
+            return func(args[0], self.call_multi_input_torch(func, args[1:]))
+        else:
+            return func(args[0], args[1])
 
     def _parantheses_to_list(self, eq_list):
         """Converts a list with parentheses to a list of lists according to parentheses.
@@ -213,8 +273,7 @@ def tensorize_and_pad(self, list_of_token_list):
         """Takes in a list of tokenized lists and outputs a padded tensor of tensors."""
         pad_val = self.tokenize_dict["PAD"]
 
-        list_of_token_list = [torch.tensor(i) for i in list_of_token_list]
-
+        list_of_token_list = [torch.tensor(i, device=self.device) for i in list_of_token_list]
         output = pad_sequence(list_of_token_list, batch_first=True, padding_value=pad_val)
 
         return output
@@ -222,14 +281,23 @@ def tensorize_and_pad(self, list_of_token_list):
     def tensorize_and_pad_by_len(self, list_of_token_list, max_len):
         """Takes in a list of tokenized lists and outputs a padded tensor of defined length."""
         pad_val = self.tokenize_dict["PAD"]
+        list_of_token_list = [torch.tensor(i, device=self.device) for i in list_of_token_list]
 
-        list_of_token_list = [torch.tensor(i) for i in list_of_token_list]
-        _extra = torch.zeros(max_len)
+        return self._pad_tensors(list_of_token_list, max_len, pad_val)
+
+    def pad_by_len(self, list_of_token_list, max_len):
+        """Takes in a list of tokenized lists and outputs a padded tensor of defined length."""
+        pad_val = self.tokenize_dict["PAD"]
+        list_of_token_list = [i.to(self.device) for i in list_of_token_list]
+
+        return self._pad_tensors(list_of_token_list, max_len, pad_val)
+
+    def _pad_tensors(self, list_of_token_list, max_len, pad_val):
+        _extra = torch.zeros(max_len, device=self.device)
         list_of_token_list.append(_extra)
 
         output = pad_sequence(list_of_token_list, batch_first=True, padding_value=pad_val)
-
-        return output[:-1]
+        return output[torch.max((output != _extra), axis=1).values]
 
     def is_number(self, sp_class):
         return (
diff --git a/NumGI/LoadTokenizer.py b/NumGI/LoadTokenizer.py
index e3128f2..2980c26 100644
--- a/NumGI/LoadTokenizer.py
+++ b/NumGI/LoadTokenizer.py
@@ -13,7 +13,9 @@ def __init__(self, x_files, y_files):
         default_tokenized_y = []
 
         temp_data = [["1", "2"]]
-        tempTokenizer = DatasetTokenizer(temp_data, temp_data, True, False)
+        tempTokenizer = DatasetTokenizer(
+            temp_data, temp_data, useDefaultTokenizer=True, isSympy=False
+        )
 
         # load files
         max_length = 0
@@ -26,8 +28,8 @@ def __init__(self, x_files, y_files):
             max_length = max(max_length, _torch_y.shape[1])
 
         for idx, (x, y) in enumerate(zip(default_tokenized_x, default_tokenized_y)):
-            default_tokenized_x[idx] = tempTokenizer.tensorize_and_pad_by_len(x, max_length)
-            default_tokenized_y[idx] = tempTokenizer.tensorize_and_pad_by_len(y, max_length)
+            default_tokenized_x[idx] = tempTokenizer.pad_by_len(x, max_length)
+            default_tokenized_y[idx] = tempTokenizer.pad_by_len(y, max_length)
 
         default_combined_x_torch = torch.cat(default_tokenized_x, axis=0)
         default_combined_y_torch = torch.cat(default_tokenized_y, axis=0)
@@ -35,4 +37,4 @@ def __init__(self, x_files, y_files):
         new_x = [tempTokenizer.tokens_to_list(i) for i in default_combined_x_torch.tolist()]
         new_y = [tempTokenizer.tokens_to_list(i) for i in default_combined_y_torch.tolist()]
 
-        super().__init__(new_x, new_y, False, False)
+        super().__init__(new_x, new_y, useDefaultTokenizer=False, isSympy=False)
diff --git a/NumGI/ParallelEquationGenerator.py b/NumGI/ParallelEquationGenerator.py
index 8e9e1fd..9b5d74d 100644
--- a/NumGI/ParallelEquationGenerator.py
+++ b/NumGI/ParallelEquationGenerator.py
@@ -6,6 +6,8 @@
 import sympy as sp
 import torch
 
+from NumGI.ConstantDictionaries import DIFFERENTIAL_FUNCTIONS
+from NumGI.ConstantDictionaries import OPERATIONS
 from NumGI.DatasetTokenizer import DatasetTokenizer
 from NumGI.EquationTokenizer import EquationTokenizer
 from NumGI.SolutionGenerator import SolutionGenerator
@@ -70,43 +72,8 @@ def generate_eq_parallel(gen_args: list, path: str, num_thousands: int):
 
 
 if __name__ == "__main__":
-    diff_func = [
-        sp.sin,
-        sp.cos,
-        sp.tan,
-        sp.cot,
-        sp.sec,
-        sp.csc,
-        sp.exp,
-        sp.log,
-        sp.sqrt,
-        sp.asin,
-        sp.acos,
-        sp.atan,
-        sp.acot,
-        sp.asec,
-        sp.acsc,
-        sp.sinh,
-        sp.cosh,
-        sp.tanh,
-        sp.coth,
-        sp.sech,
-        sp.csch,
-        sp.asinh,
-        sp.acosh,
-        sp.atanh,
-        sp.acoth,
-        sp.asech,
-        sp.acsch,
-    ]
-    ops = [
-        ("multiplication", "arithmetic"),
-        ("addition", "arithmetic"),
-        ("subtraction", "arithmetic"),
-        ("division", "arithmetic"),
-        ("differential", "differential"),
-        ("exponent", "exponent"),
-    ]
+    diff_func = DIFFERENTIAL_FUNCTIONS
+    ops = OPERATIONS
     vars = ["x", "y", "z", "beta", "gamma", "delta", "a", "b", "c", "d", "epsilon"]
     gen_args = [
         (3, 10),
diff --git a/NumGI/__init__.py b/NumGI/__init__.py
index e69de29..ee5b081 100644
--- a/NumGI/__init__.py
+++ b/NumGI/__init__.py
@@ -0,0 +1,22 @@
+from __future__ import annotations
+
+import numpy as np
+import sympy as sp
+
+sp_function_to_numpy_function = {
+    sp.Mul: np.multiply,
+    sp.Add: np.add,
+    sp.Pow: np.power,
+    sp.exp: np.exp,
+    sp.log: np.log,
+    sp.sin: np.sin,
+    sp.cos: np.cos,
+    sp.tan: np.tan,
+    sp.asin: np.arcsin,
+    sp.acos: np.arccos,
+    sp.atan: np.arctan,
+    sp.sqrt: np.sqrt,
+    sp.Abs: np.abs,
+    sp.sign: np.sign,
+    sp.Eq: np.equal,
+}

From 1799f3c7901d14d28fcd48401f0b937c34c37f21 Mon Sep 17 00:00:00 2001
From: Arnaud Bergeron <bergerona62@gmail.com>
Date: Mon, 25 Dec 2023 12:42:17 -0500
Subject: [PATCH 09/13] renamed folder

---
 NumGI/{model => Model}/Inference.py                | 0
 NumGI/{model => Model}/Model.py                    | 0
 NumGI/{model => Model}/__init__.py                 | 0
 NumGI/{model => Model}/performance/LossAnalyzer.py | 0
 NumGI/{model => Model}/performance/__init__.py     | 0
 5 files changed, 0 insertions(+), 0 deletions(-)
 rename NumGI/{model => Model}/Inference.py (100%)
 rename NumGI/{model => Model}/Model.py (100%)
 rename NumGI/{model => Model}/__init__.py (100%)
 rename NumGI/{model => Model}/performance/LossAnalyzer.py (100%)
 rename NumGI/{model => Model}/performance/__init__.py (100%)

diff --git a/NumGI/model/Inference.py b/NumGI/Model/Inference.py
similarity index 100%
rename from NumGI/model/Inference.py
rename to NumGI/Model/Inference.py
diff --git a/NumGI/model/Model.py b/NumGI/Model/Model.py
similarity index 100%
rename from NumGI/model/Model.py
rename to NumGI/Model/Model.py
diff --git a/NumGI/model/__init__.py b/NumGI/Model/__init__.py
similarity index 100%
rename from NumGI/model/__init__.py
rename to NumGI/Model/__init__.py
diff --git a/NumGI/model/performance/LossAnalyzer.py b/NumGI/Model/performance/LossAnalyzer.py
similarity index 100%
rename from NumGI/model/performance/LossAnalyzer.py
rename to NumGI/Model/performance/LossAnalyzer.py
diff --git a/NumGI/model/performance/__init__.py b/NumGI/Model/performance/__init__.py
similarity index 100%
rename from NumGI/model/performance/__init__.py
rename to NumGI/Model/performance/__init__.py

From de94e0b7b248a96755a10133e33df2f2be34b5b3 Mon Sep 17 00:00:00 2001
From: Arnaud Bergeron <bergerona62@gmail.com>
Date: Mon, 25 Dec 2023 15:09:38 -0500
Subject: [PATCH 10/13] fixed misc bugs

---
 NumGI/ConstantDictionaries.py                |  28 +++++
 NumGI/Loss/LossDataset.py                    |   3 +-
 test/EquationTests/test_numpy_sympy_torch.py | 126 +++++++++++--------
 3 files changed, 101 insertions(+), 56 deletions(-)

diff --git a/NumGI/ConstantDictionaries.py b/NumGI/ConstantDictionaries.py
index 4da2899..23f96dc 100644
--- a/NumGI/ConstantDictionaries.py
+++ b/NumGI/ConstantDictionaries.py
@@ -22,6 +22,22 @@
     sp.Add: torch.add,
     sp.Pow: torch.pow,
     sp.Abs: torch.abs,
+    sp.cot: lambda x: torch.divide(1, torch.tan(x)),
+    sp.acot: lambda x: torch.atan(torch.divide(1, x)),
+    sp.sec: lambda x: torch.divide(1, torch.cos(x)),
+    sp.asec: lambda x: torch.acos(torch.divide(1, x)),
+    sp.csc: lambda x: torch.divide(1, torch.sin(x)),
+    sp.acsc: lambda x: torch.asin(torch.divide(1, x)),
+    sp.coth: lambda x: torch.divide(1, torch.tanh(x)),
+    sp.acoth: lambda x: torch.atanh(torch.divide(1, x)),
+    sp.sech: lambda x: torch.divide(1, torch.cosh(x)),
+    sp.asech: lambda x: torch.log(
+        torch.add(torch.divide(1, x), torch.sqrt(torch.sub(torch.pow(torch.divide(1, x), 2), 1)))
+    ),
+    sp.csch: lambda x: torch.divide(1, torch.sinh(x)),
+    sp.acsch: lambda x: torch.log(
+        torch.add(torch.divide(1, x), torch.sqrt(torch.add(torch.pow(torch.divide(1, x), 2), 1)))
+    ),
 }
 
 DIFFERENTIAL_FUNCTIONS = [
@@ -39,6 +55,18 @@
     sp.asinh,
     sp.acosh,
     sp.atanh,
+    sp.cot,
+    sp.acot,
+    sp.sec,
+    sp.asec,
+    sp.csc,
+    sp.acsc,
+    sp.coth,
+    sp.acoth,
+    sp.sech,
+    sp.asech,
+    sp.csch,
+    sp.acsch,
 ]
 
 OPERATIONS = [
diff --git a/NumGI/Loss/LossDataset.py b/NumGI/Loss/LossDataset.py
index 56fd9cd..872ae03 100644
--- a/NumGI/Loss/LossDataset.py
+++ b/NumGI/Loss/LossDataset.py
@@ -46,7 +46,7 @@ def calculate_n_pairwise_loss(self, N, ell_norm):
 
         possible_symbols = [i for i in possible_symbols if len(self.var_dict[i]) > 1]
 
-        first_batch = int(0.9 * N)
+        first_batch = int(0.95 * N)
         second_batch = N - first_batch
         for i in range(first_batch):
             print(i)
@@ -59,7 +59,6 @@ def calculate_n_pairwise_loss(self, N, ell_norm):
             sol_sympy_2 = [self.solutions[idx_sympy_2], idx_sympy_2]
 
             integrand = sp.Abs(sol_sympy_1[0].rhs - sol_sympy_2[0].rhs) ** ell_norm
-            print(integrand)
             integral = self.compute_integral(integrand)
 
             loss[0, i] = sol_sympy_1[1]
diff --git a/test/EquationTests/test_numpy_sympy_torch.py b/test/EquationTests/test_numpy_sympy_torch.py
index 6f54e93..16bad8f 100644
--- a/test/EquationTests/test_numpy_sympy_torch.py
+++ b/test/EquationTests/test_numpy_sympy_torch.py
@@ -1,69 +1,87 @@
 from __future__ import annotations
 
+import math
+
 import numpy as np
 import sympy as sp
 import torch
 
 from NumGI.ConstantDictionaries import DIFFERENTIAL_FUNCTIONS
-from NumGI.ConstantDictionaries import OPERATIONS
 from NumGI.EquationTokenizer import EquationTokenizer
 from NumGI.SolutionGenerator import SolutionGenerator
 
-sg = SolutionGenerator()
-sg.PROB_NEW_SYMBOL = 0
-n_eqs = 30
-sols = [
-    sg.generate_solution(4, ["x"], DIFFERENTIAL_FUNCTIONS, OPERATIONS)[0].simplify()
-    for i in range(n_eqs)
-]
-
-tokenizer = EquationTokenizer()
-
-test_arr = [1, 2, 5, 10, 20]
-np_test = np.array(test_arr)
-torch_test = torch.tensor(test_arr, device=tokenizer.device)
-x = sp.Symbol("x")
-
-cnt = 0
-
-for i in sols:
-    try:
-        np_func, var = tokenizer.sympy_to_numpy(i)
-        np_res = np_func(np_test).tolist()
-    except TypeError:
-        cnt += 1
-        continue
-
-    if cnt > n_eqs / 2:
-        raise Exception(
-            "Too many equations with TypeError are equations correctly generated \
-                or error in sp to np func"
-        )
-
-    sp_res = []
-    for idx, j in enumerate(test_arr):
-        try:
-            sp_res.append(float(i.replace(x, j).evalf()))
-        except Exception as e:
-            print(e)
-            sp_res.append(np_res[idx])
 
-    torch_func, var = tokenizer.sympy_to_torch(i)
-    torch_res = torch_func(**{_arg: torch_test for _arg in var}).tolist()
+def test_sp_np_torch():
+    sg = SolutionGenerator()
+    sg.PROB_NEW_SYMBOL = 0
+    n_eqs = 30
+    sols = [
+        # sg.generate_solution(4, ["x"], DIFFERENTIAL_FUNCTIONS, OPERATIONS)[0].simplify()
+        # for i in range(n_eqs)
+    ]
+
+    for func in DIFFERENTIAL_FUNCTIONS:
+        sols.append(func(sp.Symbol("x")))
+
+    tokenizer = EquationTokenizer()
 
-    tol = 1e-3
-    for i in range(len(sp_res)):
+    test_arr = [-10, -5, -2, -1, 0, 1, 2, 5, 10, 20]
+    np_test = np.array(test_arr)
+    torch_test = torch.tensor(test_arr, device=tokenizer.device)
+    x = sp.Symbol("x")
+
+    cnt = 0
+
+    for i in sols:
         try:
-            if sp_res[i] is not None and np_res[i] is not None and torch_res[i] is not None:
-                continue
-            elif sp_res[i] == 0:
-                assert (sp_res[i] - np_res[i]) < tol
-                assert (sp_res[i] - torch_res[i]) < tol
-            else:
-                assert (sp_res[i] - np_res[i]) / sp_res[i] < tol
-                assert (sp_res[i] - torch_res[i]) / sp_res[i] < tol
-        except Exception as e:
+            np_func, var = tokenizer.sympy_to_numpy(i)
+            np_res = np_func(np_test).tolist()
+        except TypeError:
+            cnt += 1
+            print("typeerr")
+            continue
+
+        if cnt > n_eqs / 2:
+            raise Exception(
+                "Too many equations with TypeError are equations correctly generated \
+                    or error in sp to np func"
+            )
+
+        sp_res = []
+        for idx, j in enumerate(test_arr):
+            try:
+                sp_res.append(float(i.replace(x, j).evalf()))
+            except Exception as e:
+                print(e)
+                sp_res.append(np_res[idx])
+
+        torch_func, var = tokenizer.sympy_to_torch(i)
+        torch_res = torch_func(**{_arg: torch_test for _arg in var}).tolist()
+
+        tol = 1e-4
+        for idx in range(len(sp_res)):
             print(
-                f"eq:{i}, sp_res: {sp_res[i]}, np_res: {np_res[i]}, torch_res: {torch_res[i]}, {e}"
+                f"eq:{i}, sp_res: {sp_res[idx]}, np_res: {np_res[idx]}, torch_res: {torch_res[idx]}"
             )
-            raise
+            try:
+                if math.isnan(sp_res[idx]) or math.isnan(np_res[idx]) or math.isnan(torch_res[idx]):
+                    continue
+                elif sp_res[idx] == 0:
+                    assert (sp_res[idx] - np_res[idx]) < tol
+                    assert (sp_res[idx] - torch_res[idx]) < tol
+                elif math.isinf(np_res[idx]):
+                    assert np_res[idx] == sp_res[idx]
+                    assert np_res[idx] == torch_res[idx]
+                else:
+                    assert (sp_res[idx] - np_res[idx]) / sp_res[idx] < tol
+                    assert (sp_res[idx] - torch_res[idx]) / sp_res[idx] < tol
+            except Exception as e:
+                print(
+                    f"eq:{i}, sp_res: {sp_res[idx]}, np_res: {np_res[idx]}, \
+                        torch_res: {torch_res[idx]}, {e}"
+                )
+                raise
+
+
+if __name__ == "__main__":
+    test_sp_np_torch()

From ecb3687241402f1ec5a6e011da9e033dcd78b0fb Mon Sep 17 00:00:00 2001
From: Arnaud Bergeron <bergerona62@gmail.com>
Date: Mon, 25 Dec 2023 16:25:10 -0500
Subject: [PATCH 11/13] loss calculator should be stable

---
 NumGI/Loss/LossDataset.py                    | 8 ++++----
 test/EquationTests/test_numpy_sympy_torch.py | 5 +++--
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/NumGI/Loss/LossDataset.py b/NumGI/Loss/LossDataset.py
index 872ae03..3ee44a6 100644
--- a/NumGI/Loss/LossDataset.py
+++ b/NumGI/Loss/LossDataset.py
@@ -18,9 +18,9 @@ class LossDataset:
     def __init__(self, eq_dataset: DatasetTokenizer, N: int, ell_norm: int = 1):
         self.eq_dataset = eq_dataset
         self.grid_size = (100, 100, 1000)
+        self.max_integral_value = 10e10  # we can play with this value
         self.var_dict = self.create_var_dict()
         self.loss = self.calculate_n_pairwise_loss(N, ell_norm)
-        self.max_integral_value = 10e10  # we can play with this value
 
     def create_var_dict(self):
         """Creates a dictionary of different variables and their corresponding equations.
@@ -49,7 +49,6 @@ def calculate_n_pairwise_loss(self, N, ell_norm):
         first_batch = int(0.95 * N)
         second_batch = N - first_batch
         for i in range(first_batch):
-            print(i)
             chosen_symbols = random.choice(list(possible_symbols))
 
             possible_equations = {i[1] for i in self.var_dict[chosen_symbols]}
@@ -82,9 +81,10 @@ def calculate_n_pairwise_loss(self, N, ell_norm):
     def compute_integral(self, sympy_eq):
         func, symbols = self.eq_dataset.sympy_to_torch(sympy_eq)
         grids = self.create_discrete_grids(symbols)
-        print(grids[0])
         _arg = {sym: _grid for sym, _grid in zip(symbols, grids)}
-        return torch.mean(func(**_arg))
+        result = torch.mean(func(**_arg))
+        del grids
+        return result
 
     def create_discrete_grids(self, symbols):
         grid = torch.linspace(*self.grid_size, device=self.eq_dataset.device)
diff --git a/test/EquationTests/test_numpy_sympy_torch.py b/test/EquationTests/test_numpy_sympy_torch.py
index 16bad8f..4d88549 100644
--- a/test/EquationTests/test_numpy_sympy_torch.py
+++ b/test/EquationTests/test_numpy_sympy_torch.py
@@ -7,6 +7,7 @@
 import torch
 
 from NumGI.ConstantDictionaries import DIFFERENTIAL_FUNCTIONS
+from NumGI.ConstantDictionaries import OPERATIONS
 from NumGI.EquationTokenizer import EquationTokenizer
 from NumGI.SolutionGenerator import SolutionGenerator
 
@@ -16,8 +17,8 @@ def test_sp_np_torch():
     sg.PROB_NEW_SYMBOL = 0
     n_eqs = 30
     sols = [
-        # sg.generate_solution(4, ["x"], DIFFERENTIAL_FUNCTIONS, OPERATIONS)[0].simplify()
-        # for i in range(n_eqs)
+        sg.generate_solution(4, ["x"], DIFFERENTIAL_FUNCTIONS, OPERATIONS)[0].simplify()
+        for i in range(n_eqs)
     ]
 
     for func in DIFFERENTIAL_FUNCTIONS:

From fbe0039a05676d53f489c50335568957b64712d0 Mon Sep 17 00:00:00 2001
From: Arnaud Bergeron <bergerona62@gmail.com>
Date: Fri, 12 Jan 2024 18:01:34 -0500
Subject: [PATCH 12/13] Added decaying exp scaling to step size in integral to
 remove oom errors

---
 NumGI/Loss/LossDataset.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/NumGI/Loss/LossDataset.py b/NumGI/Loss/LossDataset.py
index 3ee44a6..e9af741 100644
--- a/NumGI/Loss/LossDataset.py
+++ b/NumGI/Loss/LossDataset.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import math
 import random
 
 import sympy as sp
@@ -15,12 +16,11 @@ class LossDataset:
         DatasetTokenizer (DatasetTokenizer): DatasetTokenizer to create loss dataset from.
     """
 
-    def __init__(self, eq_dataset: DatasetTokenizer, N: int, ell_norm: int = 1):
+    def __init__(self, eq_dataset: DatasetTokenizer):
         self.eq_dataset = eq_dataset
-        self.grid_size = (100, 100, 1000)
+        self.grid_size = (-100, 100, 1000)
         self.max_integral_value = 10e10  # we can play with this value
         self.var_dict = self.create_var_dict()
-        self.loss = self.calculate_n_pairwise_loss(N, ell_norm)
 
     def create_var_dict(self):
         """Creates a dictionary of different variables and their corresponding equations.
@@ -50,7 +50,6 @@ def calculate_n_pairwise_loss(self, N, ell_norm):
         second_batch = N - first_batch
         for i in range(first_batch):
             chosen_symbols = random.choice(list(possible_symbols))
-
             possible_equations = {i[1] for i in self.var_dict[chosen_symbols]}
 
             idx_sympy_1, idx_sympy_2 = random.sample(possible_equations, 2)
@@ -76,7 +75,7 @@ def calculate_n_pairwise_loss(self, N, ell_norm):
             loss[1, i] = sol_sympy_2[1]
             loss[2, i] = torch.inf
 
-        return loss
+        self.loss = loss
 
     def compute_integral(self, sympy_eq):
         func, symbols = self.eq_dataset.sympy_to_torch(sympy_eq)
@@ -87,7 +86,10 @@ def compute_integral(self, sympy_eq):
         return result
 
     def create_discrete_grids(self, symbols):
-        grid = torch.linspace(*self.grid_size, device=self.eq_dataset.device)
+        grid_low, grid_high, num_grid = self.grid_size
+        # scale grid down with dimesion
+        num_grid = int(num_grid * math.exp(-len(symbols)))
+        grid = torch.linspace(grid_low, grid_high, num_grid, device=self.eq_dataset.device)
         grids = [grid for i in symbols]
         mesh = torch.meshgrid(grids)
         return mesh

From e5aa1f8906f0fa3ddd90fbabea939b3b20be8140 Mon Sep 17 00:00:00 2001
From: Arnaud Bergeron <bergerona62@gmail.com>
Date: Tue, 30 Jan 2024 19:25:15 -0500
Subject: [PATCH 13/13] added integration scope to complex nbs

---
 NumGI/Loss/LossDataset.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/NumGI/Loss/LossDataset.py b/NumGI/Loss/LossDataset.py
index e9af741..910a9bf 100644
--- a/NumGI/Loss/LossDataset.py
+++ b/NumGI/Loss/LossDataset.py
@@ -1,8 +1,8 @@
 from __future__ import annotations
 
-import math
 import random
 
+import numpy as np
 import sympy as sp
 import torch
 
@@ -18,7 +18,7 @@ class LossDataset:
 
     def __init__(self, eq_dataset: DatasetTokenizer):
         self.eq_dataset = eq_dataset
-        self.grid_size = (-100, 100, 1000)
+        self.grid_size = (-1, 1, 1000)
         self.max_integral_value = 10e10  # we can play with this value
         self.var_dict = self.create_var_dict()
 
@@ -61,10 +61,11 @@ def calculate_n_pairwise_loss(self, N, ell_norm):
 
             loss[0, i] = sol_sympy_1[1]
             loss[1, i] = sol_sympy_2[1]
-            if integral < self.max_integral_value:
+            integral_val = integral.item()
+            if np.abs(integral_val) < self.max_integral_value:
                 loss[2, i] = integral.item()
             else:
-                loss[2, i] = torch.inf
+                loss[2, i] = np.sign(integral_val) * self.max_integral_value
 
         for i in range(second_batch):
             chosen_symbols = random.sample(possible_symbols, 2)
@@ -81,15 +82,19 @@ def compute_integral(self, sympy_eq):
         func, symbols = self.eq_dataset.sympy_to_torch(sympy_eq)
         grids = self.create_discrete_grids(symbols)
         _arg = {sym: _grid for sym, _grid in zip(symbols, grids)}
-        result = torch.mean(func(**_arg))
+        complex_result = func(**_arg)
+        result = (complex_result * complex_result.conj()) ** 0.5
+        result = torch.nanmean(result.real)
         del grids
         return result
 
     def create_discrete_grids(self, symbols):
         grid_low, grid_high, num_grid = self.grid_size
         # scale grid down with dimesion
-        num_grid = int(num_grid * math.exp(-len(symbols)))
-        grid = torch.linspace(grid_low, grid_high, num_grid, device=self.eq_dataset.device)
+        num_grid = int(num_grid * np.exp(-len(symbols)))
+        grid_real = torch.linspace(grid_low, grid_high, num_grid, device=self.eq_dataset.device)
+        grid_im = torch.linspace(grid_low, grid_high, num_grid, device=self.eq_dataset.device)
+        grid = torch.complex(grid_real, grid_im)
         grids = [grid for i in symbols]
         mesh = torch.meshgrid(grids)
         return mesh