From 6f038bbcd7dbe7dc46736d6fd0a0008c33751234 Mon Sep 17 00:00:00 2001 From: arnaudbergeron <58529583+arnaudbergeron@users.noreply.github.com> Date: Sun, 19 Nov 2023 11:51:17 -0500 Subject: [PATCH 01/13] new logo --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index a70e604..67e150b 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ -![logo_banner](https://github.com/arnaudbergeron/NumGI/assets/58529583/4b700c9f-b58f-4448-af5d-50b637cd9d86) +![logo_banner_3](https://github.com/arnaudbergeron/NumGI/assets/58529583/c23ba24c-5686-495b-89bb-1c3139290f9d) ----------------- + # NumGI - Differential Equation Solver using Transformer-based AI NumGI is an open-source project that aims to solve differential equations using transformer-based AI models. It provides a user-friendly interface for solving a wide range of ordinary and partial differential equations. From d35dd45e7676eb8acf9755ea646b13830effdf2d Mon Sep 17 00:00:00 2001 From: Arnaud Bergeron Date: Tue, 21 Nov 2023 09:10:14 -0500 Subject: [PATCH 02/13] l2 loss module --- .gitignore | 2 ++ NumGI/Loss/LossDataset.py | 65 +++++++++++++++++++++++++++++++++++++++ NumGI/Loss/__init__.py | 0 3 files changed, 67 insertions(+) create mode 100644 NumGI/Loss/LossDataset.py create mode 100644 NumGI/Loss/__init__.py diff --git a/.gitignore b/.gitignore index d931115..af723d1 100644 --- a/.gitignore +++ b/.gitignore @@ -54,3 +54,5 @@ coverage.xml # Sphinx documentation docs/_build/ + +.vscode diff --git a/NumGI/Loss/LossDataset.py b/NumGI/Loss/LossDataset.py new file mode 100644 index 0000000..fa33666 --- /dev/null +++ b/NumGI/Loss/LossDataset.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +import random + +import sympy as sp +import torch + +from NumGI.DatasetTokenizer import DatasetTokenizer + + +class LossDataset(DatasetTokenizer): + """Docstring for LossDataset. + + Args: + DatasetTokenizer (DatasetTokenizer): DatasetTokenizer to create loss dataset from. + """ + + def __init__(self, eq_dataset: DatasetTokenizer, N: int, ell_norm: int = 1): + self.eq_dataset = eq_dataset + self.var_dict = self.create_var_dict() + self.loss = self.calculate_n_pairwise_loss(N, ell_norm) + + def create_var_dict(self): + var_dict = {} + equations = self.eq_dataset.y_tokenized.tolist() + for i, eq in enumerate(equations): + solution = self.eq_dataset.tokens_to_sympy(eq) + if frozenset(solution.free_symbols) not in var_dict: + var_dict[frozenset(solution.free_symbols)] = [[solution, i]] + else: + var_dict[frozenset(solution.free_symbols)].append([solution, i]) + return var_dict + + def calculate_n_pairwise_loss(self, N, ell_norm): + loss = torch.zeros((3, N)) + possible_symbols = self.var_dict.keys() + + first_batch = int(0.9 * N) + second_batch = N - first_batch + for i in range(first_batch): + chosen_symbols = random.choice(list(possible_symbols)) + + sol_sympy_1 = random.choice(self.var_dict[chosen_symbols]) + sol_sympy_2 = random.choice(self.var_dict[chosen_symbols]) + integral = sp.Abs(sol_sympy_1[0].rhs - sol_sympy_2[0].rhs) ** ell_norm + for symbol in chosen_symbols: + integral = sp.integrate(integral, (symbol, -sp.oo, sp.oo)) + + loss[0, i] = sol_sympy_1[1] + loss[1, i] = sol_sympy_2[1] + if integral.is_number: + loss[2, i] = float(integral) + else: + loss[2, i] = torch.inf + + for i in range(second_batch): + chosen_symbols = random.sample(possible_symbols, 2) + sol_sympy_1 = random.choice(self.var_dict[chosen_symbols[0]]) + sol_sympy_2 = random.choice(self.var_dict[chosen_symbols[1]]) + + loss[0, i] = sol_sympy_1[1] + loss[1, i] = sol_sympy_2[1] + loss[2, i] = torch.inf + + return loss diff --git a/NumGI/Loss/__init__.py b/NumGI/Loss/__init__.py new file mode 100644 index 0000000..e69de29 From 7ed0c647551d779dc3c706bf889fb242d7de1751 Mon Sep 17 00:00:00 2001 From: Arnaud Bergeron Date: Sun, 24 Dec 2023 12:37:59 -0500 Subject: [PATCH 03/13] numpy to sympy --- .gitignore | 2 ++ NumGI/EquationTokenizer.py | 54 ++++++++++++++++++++++++++++++++++---- 2 files changed, 51 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index d931115..ab50832 100644 --- a/.gitignore +++ b/.gitignore @@ -52,5 +52,7 @@ coverage.xml *.log *.pot +.vscode + # Sphinx documentation docs/_build/ diff --git a/NumGI/EquationTokenizer.py b/NumGI/EquationTokenizer.py index 5212c14..59c17ac 100644 --- a/NumGI/EquationTokenizer.py +++ b/NumGI/EquationTokenizer.py @@ -7,6 +7,8 @@ from sympy.core.numbers import Rational from torch.nn.utils.rnn import pad_sequence +from NumGI.ConstantDictionaries import SP_TO_NP + class EquationTokenizer: """Tokenizer for equations. @@ -53,11 +55,7 @@ def sympy_to_list(self, sympy_equation) -> list: for ind, arg in enumerate(eq_args): sub_arg_list = self.sympy_to_list(arg) for _sub in sub_arg_list: - if ( - isinstance(_sub, Float) - or isinstance(_sub, Integer) - or isinstance(_sub, Rational) - ): + if self.is_number(_sub): # perhaps not general enough should allow for more types # the idea is we want to tokenize '12.2' as '1','2,'.','2' and not '12.2' for i in str(_sub): @@ -72,6 +70,45 @@ def sympy_to_list(self, sympy_equation) -> list: return eq_list + def _utils_exec_numpy(self, sympy_list, **kwargs): + """Converts a sympy list to a numpy list. + + This is a util func. + """ + function = sympy_list[0] + numpy_function = SP_TO_NP[function] + args_list = [] + for i in sympy_list[1:]: + if isinstance(i, list): + args_list.append(self._utils_exec_numpy(i, **kwargs)) + elif isinstance(i, sp.Symbol): + args_list.append(kwargs[str(i)]) + elif self.is_number(i): + args_list.append(i.evalf()) + else: + raise ValueError(f"Unknown type: {type(i)}, for {i}") + + return numpy_function(*args_list) + + def sympy_to_numpy(self, sympy_equation): + """Converts a sympy equation to a numpy function. + + This is a util func. + """ + simplified_eq = sympy_equation.rhs.simplify() + + return sp.lambdify(list(simplified_eq.free_symbols), simplified_eq, "numpy") + # sympy_list = self.sympy_to_list(simplified_eq) + # grouped_num_list = self._regroup_numbers(sympy_list) + # parsed_list = self._parantheses_to_list(grouped_num_list)[0][0] + + # variables = list(sympy_equation.free_symbols) + # variables = [str(i) for i in variables] + # def np_func(**kwargs): + # return self._utils_exec_numpy(parsed_list, **kwargs) + + # return np_func, variables + def _parantheses_to_list(self, eq_list): """Converts a list with parentheses to a list of lists according to parentheses. @@ -226,6 +263,13 @@ def tensorize_and_pad_by_len(self, list_of_token_list, max_len): return output[:-1] + def is_number(self, sp_class): + return ( + isinstance(sp_class, Float) + or isinstance(sp_class, Integer) + or isinstance(sp_class, Rational) + ) + def defaultTokenizer(): """Returns a default tokenizer. Because of issues with pickling.""" From 7d08e7f609c5be8294064dc6ad84574a7f204639 Mon Sep 17 00:00:00 2001 From: Arnaud Bergeron Date: Sun, 24 Dec 2023 12:39:26 -0500 Subject: [PATCH 04/13] new numpy to sympy using lambdify --- NumGI/EquationTokenizer.py | 36 +----------------------------------- 1 file changed, 1 insertion(+), 35 deletions(-) diff --git a/NumGI/EquationTokenizer.py b/NumGI/EquationTokenizer.py index 59c17ac..47046aa 100644 --- a/NumGI/EquationTokenizer.py +++ b/NumGI/EquationTokenizer.py @@ -7,8 +7,6 @@ from sympy.core.numbers import Rational from torch.nn.utils.rnn import pad_sequence -from NumGI.ConstantDictionaries import SP_TO_NP - class EquationTokenizer: """Tokenizer for equations. @@ -70,44 +68,12 @@ def sympy_to_list(self, sympy_equation) -> list: return eq_list - def _utils_exec_numpy(self, sympy_list, **kwargs): - """Converts a sympy list to a numpy list. - - This is a util func. - """ - function = sympy_list[0] - numpy_function = SP_TO_NP[function] - args_list = [] - for i in sympy_list[1:]: - if isinstance(i, list): - args_list.append(self._utils_exec_numpy(i, **kwargs)) - elif isinstance(i, sp.Symbol): - args_list.append(kwargs[str(i)]) - elif self.is_number(i): - args_list.append(i.evalf()) - else: - raise ValueError(f"Unknown type: {type(i)}, for {i}") - - return numpy_function(*args_list) - def sympy_to_numpy(self, sympy_equation): """Converts a sympy equation to a numpy function. This is a util func. """ - simplified_eq = sympy_equation.rhs.simplify() - - return sp.lambdify(list(simplified_eq.free_symbols), simplified_eq, "numpy") - # sympy_list = self.sympy_to_list(simplified_eq) - # grouped_num_list = self._regroup_numbers(sympy_list) - # parsed_list = self._parantheses_to_list(grouped_num_list)[0][0] - - # variables = list(sympy_equation.free_symbols) - # variables = [str(i) for i in variables] - # def np_func(**kwargs): - # return self._utils_exec_numpy(parsed_list, **kwargs) - - # return np_func, variables + return sp.lambdify(list(sympy_equation.free_symbols), sympy_equation, "numpy") def _parantheses_to_list(self, eq_list): """Converts a list with parentheses to a list of lists according to parentheses. From 5daf030cb04b0388d11a27354f4ef013a38b2cb7 Mon Sep 17 00:00:00 2001 From: Arnaud Bergeron Date: Sun, 24 Dec 2023 12:43:01 -0500 Subject: [PATCH 05/13] added file containing all constants dicts and lists --- NumGI/ConstantDictionaries.py | 112 ++++++++++++++++++++++++++++++++++ NumGI/EquationTokenizer.py | 69 +-------------------- NumGI/SolutionGenerator.py | 55 +++-------------- 3 files changed, 125 insertions(+), 111 deletions(-) create mode 100644 NumGI/ConstantDictionaries.py diff --git a/NumGI/ConstantDictionaries.py b/NumGI/ConstantDictionaries.py new file mode 100644 index 0000000..a17969d --- /dev/null +++ b/NumGI/ConstantDictionaries.py @@ -0,0 +1,112 @@ +from __future__ import annotations + +import sympy as sp + +DIFFERENTIAL_FUNCTIONS = [ + sp.sin, + sp.cos, + sp.tan, + sp.cot, + sp.sec, + sp.csc, + sp.exp, + sp.log, + sp.sqrt, + sp.asin, + sp.acos, + sp.atan, + sp.acot, + sp.asec, + sp.acsc, + sp.sinh, + sp.cosh, + sp.tanh, + sp.coth, + sp.sech, + sp.csch, + sp.asinh, + sp.acosh, + sp.atanh, + sp.acoth, + sp.asech, + sp.acsch, +] + +OPERATIONS = [ + ("multiplication", "arithmetic"), + ("addition", "arithmetic"), + ("subtraction", "arithmetic"), + ("division", "arithmetic"), + ("differential", "differential"), + # ("integration", "integration"), + ("exponent", "exponent"), +] + +VARIABLES = ["x", "y", "z", "beta", "gamma"] + +DEFAULT_DICT = { + ")": 0, + sp.acsc: 1, + sp.acot: 2, + sp.asech: 3, + sp.core.containers.Tuple: 4, + "/": 5, + sp.sech: 6, + "END": 7, + sp.exp: 8, + "7": 9, + "0": 10, + sp.asin: 11, + "5": 12, + sp.core.function.Derivative: 13, + "8": 14, + sp.asec: 15, + sp.core.add.Add: 16, + sp.core.power.Pow: 17, + sp.csch: 18, + "START": 19, + sp.csc: 20, + "PAD": 21, + sp.sin: 22, + ",": 23, + sp.acsch: 24, + sp.core.relational.Equality: 25, + "(": 26, + "2": 27, + sp.Symbol("x"): 28, + sp.coth: 29, + sp.Symbol("y"): 30, + sp.log: 31, + sp.cos: 32, + "6": 33, + sp.core.mul.Mul: 34, + sp.acos: 35, + "9": 36, + sp.Function("f"): 37, + "-": 38, + sp.sqrt: 39, + sp.cosh: 40, + sp.tan: 41, + sp.tanh: 42, + sp.Symbol("z"): 43, + "4": 44, + "3": 45, + sp.cot: 46, + sp.asinh: 47, + sp.atan: 48, + sp.acosh: 49, + "1": 50, + sp.atanh: 51, + ".": 52, + sp.sinh: 53, + sp.acoth: 54, + sp.sec: 55, + sp.Symbol("beta"): 56, + sp.Symbol("gamma"): 57, + sp.Symbol("delta"): 58, + sp.Symbol("a"): 59, + sp.Symbol("b"): 60, + sp.Symbol("c"): 61, + sp.Symbol("d"): 62, + sp.Symbol("epsilon"): 63, +} diff --git a/NumGI/EquationTokenizer.py b/NumGI/EquationTokenizer.py index 47046aa..93424eb 100644 --- a/NumGI/EquationTokenizer.py +++ b/NumGI/EquationTokenizer.py @@ -7,6 +7,8 @@ from sympy.core.numbers import Rational from torch.nn.utils.rnn import pad_sequence +from NumGI.ConstantDictionaries import DEFAULT_DICT + class EquationTokenizer: """Tokenizer for equations. @@ -239,72 +241,7 @@ def is_number(self, sp_class): def defaultTokenizer(): """Returns a default tokenizer. Because of issues with pickling.""" - tokenize_dict = { - ")": 0, - sp.acsc: 1, - sp.acot: 2, - sp.asech: 3, - sp.core.containers.Tuple: 4, - "/": 5, - sp.sech: 6, - "END": 7, - sp.exp: 8, - "7": 9, - "0": 10, - sp.asin: 11, - "5": 12, - sp.core.function.Derivative: 13, - "8": 14, - sp.asec: 15, - sp.core.add.Add: 16, - sp.core.power.Pow: 17, - sp.csch: 18, - "START": 19, - sp.csc: 20, - "PAD": 21, - sp.sin: 22, - ",": 23, - sp.acsch: 24, - sp.core.relational.Equality: 25, - "(": 26, - "2": 27, - sp.Symbol("x"): 28, - sp.coth: 29, - sp.Symbol("y"): 30, - sp.log: 31, - sp.cos: 32, - "6": 33, - sp.core.mul.Mul: 34, - sp.acos: 35, - "9": 36, - sp.Function("f"): 37, - "-": 38, - sp.sqrt: 39, - sp.cosh: 40, - sp.tan: 41, - sp.tanh: 42, - sp.Symbol("z"): 43, - "4": 44, - "3": 45, - sp.cot: 46, - sp.asinh: 47, - sp.atan: 48, - sp.acosh: 49, - "1": 50, - sp.atanh: 51, - ".": 52, - sp.sinh: 53, - sp.acoth: 54, - sp.sec: 55, - sp.Symbol("beta"): 56, - sp.Symbol("gamma"): 57, - sp.Symbol("delta"): 58, - sp.Symbol("a"): 59, - sp.Symbol("b"): 60, - sp.Symbol("c"): 61, - sp.Symbol("d"): 62, - sp.Symbol("epsilon"): 63, - } + tokenize_dict = DEFAULT_DICT # invert tokenizer_dict into decode_dict decode_dict = {v: k for k, v in tokenize_dict.items()} diff --git a/NumGI/SolutionGenerator.py b/NumGI/SolutionGenerator.py index e187579..5a1921d 100644 --- a/NumGI/SolutionGenerator.py +++ b/NumGI/SolutionGenerator.py @@ -4,6 +4,10 @@ import sympy as sp +from NumGI.ConstantDictionaries import DIFFERENTIAL_FUNCTIONS +from NumGI.ConstantDictionaries import OPERATIONS +from NumGI.ConstantDictionaries import VARIABLES + class SolutionGenerator: """Generates solutions for contrived questions to be used for training data. @@ -14,6 +18,9 @@ class SolutionGenerator: def __init__( self, ): + self.OPERATIONS = OPERATIONS + self.DIFFERENTIAL_FUNCTIONS = DIFFERENTIAL_FUNCTIONS + self.VARIABLES = VARIABLES self.PROB_NEW_SYMBOL = 0.3 self.PROB_USED_SYMBOL = 1 - self.PROB_NEW_SYMBOL self.NEW_VARS = self.VARIABLES @@ -301,48 +308,6 @@ def __str__(self): def choose_op_noarithmetic(self, ops: list): return random.choice(ops[3:]) - DIFFERENTIAL_FUNCTIONS = [ - sp.sin, - sp.cos, - sp.tan, - sp.cot, - sp.sec, - sp.csc, - sp.exp, - sp.log, - sp.sqrt, - sp.asin, - sp.acos, - sp.atan, - sp.acot, - sp.asec, - sp.acsc, - sp.sinh, - sp.cosh, - sp.tanh, - sp.coth, - sp.sech, - sp.csch, - sp.asinh, - sp.acosh, - sp.atanh, - sp.acoth, - sp.asech, - sp.acsch, - ] - # must not be rordered need first three to be arithmetic because - # I am lazy and chooseop_noarithmetic is not implemented well - OPERATIONS = [ - ("multiplication", "arithmetic"), - ("addition", "arithmetic"), - ("subtraction", "arithmetic"), - ("division", "arithmetic"), - ("differential", "differential"), - # ("integration", "integration"), - ("exponent", "exponent"), - ] - VARIABLES = ["x", "y", "z", "beta", "gamma"] - if __name__ == "__main__": sg = SolutionGenerator() @@ -350,8 +315,8 @@ def choose_op_noarithmetic(self, ops: list): ops_sol=(3, 5), ops_eq=(2, 5), num_eqs=1000, - vars=sg.VARIABLES, - funcs=sg.DIFFERENTIAL_FUNCTIONS, - ops=sg.OPERATIONS, + vars=VARIABLES, + funcs=DIFFERENTIAL_FUNCTIONS, + ops=OPERATIONS, ) print(eqs) From 18e9d394c39c3b5dfb9c5a8d2a77928ee6a50659 Mon Sep 17 00:00:00 2001 From: Arnaud Bergeron Date: Sun, 24 Dec 2023 15:40:40 -0500 Subject: [PATCH 06/13] added test for sp to np and torch --- test/EquationTests/test_numpy_sympy_torch.py | 69 ++++++++++++++++++++ test/ModelTests/test_inference.py | 4 +- test/ModelTests/test_model.py | 2 +- 3 files changed, 72 insertions(+), 3 deletions(-) create mode 100644 test/EquationTests/test_numpy_sympy_torch.py diff --git a/test/EquationTests/test_numpy_sympy_torch.py b/test/EquationTests/test_numpy_sympy_torch.py new file mode 100644 index 0000000..6f54e93 --- /dev/null +++ b/test/EquationTests/test_numpy_sympy_torch.py @@ -0,0 +1,69 @@ +from __future__ import annotations + +import numpy as np +import sympy as sp +import torch + +from NumGI.ConstantDictionaries import DIFFERENTIAL_FUNCTIONS +from NumGI.ConstantDictionaries import OPERATIONS +from NumGI.EquationTokenizer import EquationTokenizer +from NumGI.SolutionGenerator import SolutionGenerator + +sg = SolutionGenerator() +sg.PROB_NEW_SYMBOL = 0 +n_eqs = 30 +sols = [ + sg.generate_solution(4, ["x"], DIFFERENTIAL_FUNCTIONS, OPERATIONS)[0].simplify() + for i in range(n_eqs) +] + +tokenizer = EquationTokenizer() + +test_arr = [1, 2, 5, 10, 20] +np_test = np.array(test_arr) +torch_test = torch.tensor(test_arr, device=tokenizer.device) +x = sp.Symbol("x") + +cnt = 0 + +for i in sols: + try: + np_func, var = tokenizer.sympy_to_numpy(i) + np_res = np_func(np_test).tolist() + except TypeError: + cnt += 1 + continue + + if cnt > n_eqs / 2: + raise Exception( + "Too many equations with TypeError are equations correctly generated \ + or error in sp to np func" + ) + + sp_res = [] + for idx, j in enumerate(test_arr): + try: + sp_res.append(float(i.replace(x, j).evalf())) + except Exception as e: + print(e) + sp_res.append(np_res[idx]) + + torch_func, var = tokenizer.sympy_to_torch(i) + torch_res = torch_func(**{_arg: torch_test for _arg in var}).tolist() + + tol = 1e-3 + for i in range(len(sp_res)): + try: + if sp_res[i] is not None and np_res[i] is not None and torch_res[i] is not None: + continue + elif sp_res[i] == 0: + assert (sp_res[i] - np_res[i]) < tol + assert (sp_res[i] - torch_res[i]) < tol + else: + assert (sp_res[i] - np_res[i]) / sp_res[i] < tol + assert (sp_res[i] - torch_res[i]) / sp_res[i] < tol + except Exception as e: + print( + f"eq:{i}, sp_res: {sp_res[i]}, np_res: {np_res[i]}, torch_res: {torch_res[i]}, {e}" + ) + raise diff --git a/test/ModelTests/test_inference.py b/test/ModelTests/test_inference.py index ecb8468..33199e6 100644 --- a/test/ModelTests/test_inference.py +++ b/test/ModelTests/test_inference.py @@ -2,8 +2,8 @@ import torch -from NumGI.model.Inference import batch_inference -from NumGI.model.Model import TransformerNet +from NumGI.Model.Inference import batch_inference +from NumGI.Model.Model import TransformerNet def test_batch_inference(): diff --git a/test/ModelTests/test_model.py b/test/ModelTests/test_model.py index 5428494..78ba91d 100644 --- a/test/ModelTests/test_model.py +++ b/test/ModelTests/test_model.py @@ -2,7 +2,7 @@ import torch -from NumGI.model.Model import TransformerNet +from NumGI.Model.Model import TransformerNet def test_transformer_net(): From 4336312c61d4d50d9592db888fec561b87713bfb Mon Sep 17 00:00:00 2001 From: Arnaud Bergeron Date: Mon, 25 Dec 2023 12:36:47 -0500 Subject: [PATCH 07/13] new loss dataset functions --- NumGI/Loss/LossDataset.py | 53 ++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/NumGI/Loss/LossDataset.py b/NumGI/Loss/LossDataset.py index fa33666..56fd9cd 100644 --- a/NumGI/Loss/LossDataset.py +++ b/NumGI/Loss/LossDataset.py @@ -8,7 +8,7 @@ from NumGI.DatasetTokenizer import DatasetTokenizer -class LossDataset(DatasetTokenizer): +class LossDataset: """Docstring for LossDataset. Args: @@ -17,39 +17,55 @@ class LossDataset(DatasetTokenizer): def __init__(self, eq_dataset: DatasetTokenizer, N: int, ell_norm: int = 1): self.eq_dataset = eq_dataset + self.grid_size = (100, 100, 1000) self.var_dict = self.create_var_dict() self.loss = self.calculate_n_pairwise_loss(N, ell_norm) + self.max_integral_value = 10e10 # we can play with this value def create_var_dict(self): + """Creates a dictionary of different variables and their corresponding equations. + + Returns: + _type_: _description_ + """ var_dict = {} equations = self.eq_dataset.y_tokenized.tolist() + self.solutions = [] for i, eq in enumerate(equations): - solution = self.eq_dataset.tokens_to_sympy(eq) - if frozenset(solution.free_symbols) not in var_dict: - var_dict[frozenset(solution.free_symbols)] = [[solution, i]] + sol = self.eq_dataset.tokens_to_sympy(eq) + self.solutions.append(sol) + if frozenset(sol.free_symbols) not in var_dict: + var_dict[frozenset(sol.free_symbols)] = [[sol, i]] else: - var_dict[frozenset(solution.free_symbols)].append([solution, i]) + var_dict[frozenset(sol.free_symbols)].append([sol, i]) return var_dict def calculate_n_pairwise_loss(self, N, ell_norm): loss = torch.zeros((3, N)) possible_symbols = self.var_dict.keys() + possible_symbols = [i for i in possible_symbols if len(self.var_dict[i]) > 1] + first_batch = int(0.9 * N) second_batch = N - first_batch for i in range(first_batch): + print(i) chosen_symbols = random.choice(list(possible_symbols)) - sol_sympy_1 = random.choice(self.var_dict[chosen_symbols]) - sol_sympy_2 = random.choice(self.var_dict[chosen_symbols]) - integral = sp.Abs(sol_sympy_1[0].rhs - sol_sympy_2[0].rhs) ** ell_norm - for symbol in chosen_symbols: - integral = sp.integrate(integral, (symbol, -sp.oo, sp.oo)) + possible_equations = {i[1] for i in self.var_dict[chosen_symbols]} + + idx_sympy_1, idx_sympy_2 = random.sample(possible_equations, 2) + sol_sympy_1 = [self.solutions[idx_sympy_1], idx_sympy_1] + sol_sympy_2 = [self.solutions[idx_sympy_2], idx_sympy_2] + + integrand = sp.Abs(sol_sympy_1[0].rhs - sol_sympy_2[0].rhs) ** ell_norm + print(integrand) + integral = self.compute_integral(integrand) loss[0, i] = sol_sympy_1[1] loss[1, i] = sol_sympy_2[1] - if integral.is_number: - loss[2, i] = float(integral) + if integral < self.max_integral_value: + loss[2, i] = integral.item() else: loss[2, i] = torch.inf @@ -63,3 +79,16 @@ def calculate_n_pairwise_loss(self, N, ell_norm): loss[2, i] = torch.inf return loss + + def compute_integral(self, sympy_eq): + func, symbols = self.eq_dataset.sympy_to_torch(sympy_eq) + grids = self.create_discrete_grids(symbols) + print(grids[0]) + _arg = {sym: _grid for sym, _grid in zip(symbols, grids)} + return torch.mean(func(**_arg)) + + def create_discrete_grids(self, symbols): + grid = torch.linspace(*self.grid_size, device=self.eq_dataset.device) + grids = [grid for i in symbols] + mesh = torch.meshgrid(grids) + return mesh From 7cbb696173cdd0b632fb68195aea1d41d005c064 Mon Sep 17 00:00:00 2001 From: Arnaud Bergeron Date: Mon, 25 Dec 2023 12:40:22 -0500 Subject: [PATCH 08/13] renamed Model folder --- NumGI/ConstantDictionaries.py | 16 ++---- NumGI/EquationTokenizer.py | 82 +++++++++++++++++++++++++++--- NumGI/LoadTokenizer.py | 10 ++-- NumGI/ParallelEquationGenerator.py | 41 ++------------- NumGI/__init__.py | 22 ++++++++ 5 files changed, 111 insertions(+), 60 deletions(-) diff --git a/NumGI/ConstantDictionaries.py b/NumGI/ConstantDictionaries.py index 9e97ec3..4da2899 100644 --- a/NumGI/ConstantDictionaries.py +++ b/NumGI/ConstantDictionaries.py @@ -7,46 +7,38 @@ sp.sin: torch.sin, sp.cos: torch.cos, sp.tan: torch.tan, - sp.csc: torch.csc, sp.exp: torch.exp, sp.log: torch.log, - sp.sqrt: torch.sqrt, sp.asin: torch.asin, sp.acos: torch.acos, sp.atan: torch.atan, - sp.acsc: torch.acsc, sp.sinh: torch.sinh, sp.cosh: torch.cosh, sp.tanh: torch.tanh, - sp.csch: torch.csch, sp.asinh: torch.asinh, sp.acosh: torch.acosh, sp.atanh: torch.atanh, - sp.acsch: torch.acsch, + sp.Mul: torch.mul, + sp.Add: torch.add, + sp.Pow: torch.pow, + sp.Abs: torch.abs, } DIFFERENTIAL_FUNCTIONS = [ sp.sin, sp.cos, sp.tan, - sp.csc, sp.exp, sp.log, - sp.sqrt, sp.asin, sp.acos, sp.atan, - sp.acot, - sp.asec, - sp.acsc, sp.sinh, sp.cosh, sp.tanh, - sp.csch, sp.asinh, sp.acosh, sp.atanh, - sp.acsch, ] OPERATIONS = [ diff --git a/NumGI/EquationTokenizer.py b/NumGI/EquationTokenizer.py index 93424eb..ad96562 100644 --- a/NumGI/EquationTokenizer.py +++ b/NumGI/EquationTokenizer.py @@ -8,6 +8,7 @@ from torch.nn.utils.rnn import pad_sequence from NumGI.ConstantDictionaries import DEFAULT_DICT +from NumGI.ConstantDictionaries import SP_TO_TORCH class EquationTokenizer: @@ -32,6 +33,13 @@ def __init__(self, useDefaultTokenizer=False): self.dict_size = len(self.tokenize_dict) self.char_set = set(self.tokenize_dict.keys()) + if torch.cuda.is_available(): + self.device = "cuda" + elif torch.backends.mps.is_available(): + self.device = "mps" + else: + self.device = "cpu" + def sympy_to_list(self, sympy_equation) -> list: """Converts a sympy equation to a list that will be tokenized. @@ -75,7 +83,59 @@ def sympy_to_numpy(self, sympy_equation): This is a util func. """ - return sp.lambdify(list(sympy_equation.free_symbols), sympy_equation, "numpy") + symbols = list(sympy_equation.free_symbols) + return sp.lambdify(symbols, sympy_equation, "numpy"), symbols + + def sympy_to_torch(self, sympy_equation): + """Converts a sympy equation to a pytorch function. + + This is a util func. + """ + # simplified_eq = sympy_equation.simplify() + simplified_eq = sympy_equation + sympy_list = self.sympy_to_list(simplified_eq) + grouped_num_list = self._regroup_numbers(sympy_list) + parsed_list = self._parantheses_to_list(grouped_num_list)[0][0] + + variables = list(simplified_eq.free_symbols) + variables = [str(i) for i in variables] + + def torch_func(**kwargs): + return self._utils_exec_torch(parsed_list, **kwargs) + + return torch_func, variables + + def _utils_exec_torch(self, sympy_list, **kwargs): + """Converts a sympy list to a torch function. + + This is a util func. + """ + function = sympy_list[0] + torch_function = SP_TO_TORCH[function] + args_list = [] + for i in sympy_list[1:]: + if isinstance(i, list): + args_list.append(self._utils_exec_torch(i, **kwargs)) + elif isinstance(i, sp.Symbol): + args_list.append(kwargs[str(i)]) + elif self.is_number(i): + args_list.append(torch.tensor(float(i), device=self.device)) + elif i == sp.core.numbers.Pi: + args_list.append(torch.tensor(torch.pi, device=self.device)) + else: + raise ValueError(f"Unknown type: {type(i)}, for {i}") + + if len(args_list) > 2 and (function == sp.Add or function == sp.Mul): + return self.call_multi_input_torch(torch_function, args_list) + + else: + return torch_function(*args_list) + + def call_multi_input_torch(self, func, args): + if len(args) > 2: + return func(args[0], self.call_multi_input_torch(func, args[1:])) + else: + return func(args[0], args[1]) def _parantheses_to_list(self, eq_list): """Converts a list with parentheses to a list of lists according to parentheses. @@ -213,8 +273,7 @@ def tensorize_and_pad(self, list_of_token_list): """Takes in a list of tokenized lists and outputs a padded tensor of tensors.""" pad_val = self.tokenize_dict["PAD"] - list_of_token_list = [torch.tensor(i) for i in list_of_token_list] - + list_of_token_list = [torch.tensor(i, device=self.device) for i in list_of_token_list] output = pad_sequence(list_of_token_list, batch_first=True, padding_value=pad_val) return output @@ -222,14 +281,23 @@ def tensorize_and_pad(self, list_of_token_list): def tensorize_and_pad_by_len(self, list_of_token_list, max_len): """Takes in a list of tokenized lists and outputs a padded tensor of defined length.""" pad_val = self.tokenize_dict["PAD"] + list_of_token_list = [torch.tensor(i, device=self.device) for i in list_of_token_list] - list_of_token_list = [torch.tensor(i) for i in list_of_token_list] - _extra = torch.zeros(max_len) + return self._pad_tensors(list_of_token_list, max_len, pad_val) + + def pad_by_len(self, list_of_token_list, max_len): + """Takes in a list of tokenized lists and outputs a padded tensor of defined length.""" + pad_val = self.tokenize_dict["PAD"] + list_of_token_list = [i.to(self.device) for i in list_of_token_list] + + return self._pad_tensors(list_of_token_list, max_len, pad_val) + + def _pad_tensors(self, list_of_token_list, max_len, pad_val): + _extra = torch.zeros(max_len, device=self.device) list_of_token_list.append(_extra) output = pad_sequence(list_of_token_list, batch_first=True, padding_value=pad_val) - - return output[:-1] + return output[torch.max((output != _extra), axis=1).values] def is_number(self, sp_class): return ( diff --git a/NumGI/LoadTokenizer.py b/NumGI/LoadTokenizer.py index e3128f2..2980c26 100644 --- a/NumGI/LoadTokenizer.py +++ b/NumGI/LoadTokenizer.py @@ -13,7 +13,9 @@ def __init__(self, x_files, y_files): default_tokenized_y = [] temp_data = [["1", "2"]] - tempTokenizer = DatasetTokenizer(temp_data, temp_data, True, False) + tempTokenizer = DatasetTokenizer( + temp_data, temp_data, useDefaultTokenizer=True, isSympy=False + ) # load files max_length = 0 @@ -26,8 +28,8 @@ def __init__(self, x_files, y_files): max_length = max(max_length, _torch_y.shape[1]) for idx, (x, y) in enumerate(zip(default_tokenized_x, default_tokenized_y)): - default_tokenized_x[idx] = tempTokenizer.tensorize_and_pad_by_len(x, max_length) - default_tokenized_y[idx] = tempTokenizer.tensorize_and_pad_by_len(y, max_length) + default_tokenized_x[idx] = tempTokenizer.pad_by_len(x, max_length) + default_tokenized_y[idx] = tempTokenizer.pad_by_len(y, max_length) default_combined_x_torch = torch.cat(default_tokenized_x, axis=0) default_combined_y_torch = torch.cat(default_tokenized_y, axis=0) @@ -35,4 +37,4 @@ def __init__(self, x_files, y_files): new_x = [tempTokenizer.tokens_to_list(i) for i in default_combined_x_torch.tolist()] new_y = [tempTokenizer.tokens_to_list(i) for i in default_combined_y_torch.tolist()] - super().__init__(new_x, new_y, False, False) + super().__init__(new_x, new_y, useDefaultTokenizer=False, isSympy=False) diff --git a/NumGI/ParallelEquationGenerator.py b/NumGI/ParallelEquationGenerator.py index 8e9e1fd..9b5d74d 100644 --- a/NumGI/ParallelEquationGenerator.py +++ b/NumGI/ParallelEquationGenerator.py @@ -6,6 +6,8 @@ import sympy as sp import torch +from NumGI.ConstantDictionaries import DIFFERENTIAL_FUNCTIONS +from NumGI.ConstantDictionaries import OPERATIONS from NumGI.DatasetTokenizer import DatasetTokenizer from NumGI.EquationTokenizer import EquationTokenizer from NumGI.SolutionGenerator import SolutionGenerator @@ -70,43 +72,8 @@ def generate_eq_parallel(gen_args: list, path: str, num_thousands: int): if __name__ == "__main__": - diff_func = [ - sp.sin, - sp.cos, - sp.tan, - sp.cot, - sp.sec, - sp.csc, - sp.exp, - sp.log, - sp.sqrt, - sp.asin, - sp.acos, - sp.atan, - sp.acot, - sp.asec, - sp.acsc, - sp.sinh, - sp.cosh, - sp.tanh, - sp.coth, - sp.sech, - sp.csch, - sp.asinh, - sp.acosh, - sp.atanh, - sp.acoth, - sp.asech, - sp.acsch, - ] - ops = [ - ("multiplication", "arithmetic"), - ("addition", "arithmetic"), - ("subtraction", "arithmetic"), - ("division", "arithmetic"), - ("differential", "differential"), - ("exponent", "exponent"), - ] + diff_func = DIFFERENTIAL_FUNCTIONS + ops = OPERATIONS vars = ["x", "y", "z", "beta", "gamma", "delta", "a", "b", "c", "d", "epsilon"] gen_args = [ (3, 10), diff --git a/NumGI/__init__.py b/NumGI/__init__.py index e69de29..ee5b081 100644 --- a/NumGI/__init__.py +++ b/NumGI/__init__.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +import numpy as np +import sympy as sp + +sp_function_to_numpy_function = { + sp.Mul: np.multiply, + sp.Add: np.add, + sp.Pow: np.power, + sp.exp: np.exp, + sp.log: np.log, + sp.sin: np.sin, + sp.cos: np.cos, + sp.tan: np.tan, + sp.asin: np.arcsin, + sp.acos: np.arccos, + sp.atan: np.arctan, + sp.sqrt: np.sqrt, + sp.Abs: np.abs, + sp.sign: np.sign, + sp.Eq: np.equal, +} From 1799f3c7901d14d28fcd48401f0b937c34c37f21 Mon Sep 17 00:00:00 2001 From: Arnaud Bergeron Date: Mon, 25 Dec 2023 12:42:17 -0500 Subject: [PATCH 09/13] renamed folder --- NumGI/{model => Model}/Inference.py | 0 NumGI/{model => Model}/Model.py | 0 NumGI/{model => Model}/__init__.py | 0 NumGI/{model => Model}/performance/LossAnalyzer.py | 0 NumGI/{model => Model}/performance/__init__.py | 0 5 files changed, 0 insertions(+), 0 deletions(-) rename NumGI/{model => Model}/Inference.py (100%) rename NumGI/{model => Model}/Model.py (100%) rename NumGI/{model => Model}/__init__.py (100%) rename NumGI/{model => Model}/performance/LossAnalyzer.py (100%) rename NumGI/{model => Model}/performance/__init__.py (100%) diff --git a/NumGI/model/Inference.py b/NumGI/Model/Inference.py similarity index 100% rename from NumGI/model/Inference.py rename to NumGI/Model/Inference.py diff --git a/NumGI/model/Model.py b/NumGI/Model/Model.py similarity index 100% rename from NumGI/model/Model.py rename to NumGI/Model/Model.py diff --git a/NumGI/model/__init__.py b/NumGI/Model/__init__.py similarity index 100% rename from NumGI/model/__init__.py rename to NumGI/Model/__init__.py diff --git a/NumGI/model/performance/LossAnalyzer.py b/NumGI/Model/performance/LossAnalyzer.py similarity index 100% rename from NumGI/model/performance/LossAnalyzer.py rename to NumGI/Model/performance/LossAnalyzer.py diff --git a/NumGI/model/performance/__init__.py b/NumGI/Model/performance/__init__.py similarity index 100% rename from NumGI/model/performance/__init__.py rename to NumGI/Model/performance/__init__.py From de94e0b7b248a96755a10133e33df2f2be34b5b3 Mon Sep 17 00:00:00 2001 From: Arnaud Bergeron Date: Mon, 25 Dec 2023 15:09:38 -0500 Subject: [PATCH 10/13] fixed misc bugs --- NumGI/ConstantDictionaries.py | 28 +++++ NumGI/Loss/LossDataset.py | 3 +- test/EquationTests/test_numpy_sympy_torch.py | 126 +++++++++++-------- 3 files changed, 101 insertions(+), 56 deletions(-) diff --git a/NumGI/ConstantDictionaries.py b/NumGI/ConstantDictionaries.py index 4da2899..23f96dc 100644 --- a/NumGI/ConstantDictionaries.py +++ b/NumGI/ConstantDictionaries.py @@ -22,6 +22,22 @@ sp.Add: torch.add, sp.Pow: torch.pow, sp.Abs: torch.abs, + sp.cot: lambda x: torch.divide(1, torch.tan(x)), + sp.acot: lambda x: torch.atan(torch.divide(1, x)), + sp.sec: lambda x: torch.divide(1, torch.cos(x)), + sp.asec: lambda x: torch.acos(torch.divide(1, x)), + sp.csc: lambda x: torch.divide(1, torch.sin(x)), + sp.acsc: lambda x: torch.asin(torch.divide(1, x)), + sp.coth: lambda x: torch.divide(1, torch.tanh(x)), + sp.acoth: lambda x: torch.atanh(torch.divide(1, x)), + sp.sech: lambda x: torch.divide(1, torch.cosh(x)), + sp.asech: lambda x: torch.log( + torch.add(torch.divide(1, x), torch.sqrt(torch.sub(torch.pow(torch.divide(1, x), 2), 1))) + ), + sp.csch: lambda x: torch.divide(1, torch.sinh(x)), + sp.acsch: lambda x: torch.log( + torch.add(torch.divide(1, x), torch.sqrt(torch.add(torch.pow(torch.divide(1, x), 2), 1))) + ), } DIFFERENTIAL_FUNCTIONS = [ @@ -39,6 +55,18 @@ sp.asinh, sp.acosh, sp.atanh, + sp.cot, + sp.acot, + sp.sec, + sp.asec, + sp.csc, + sp.acsc, + sp.coth, + sp.acoth, + sp.sech, + sp.asech, + sp.csch, + sp.acsch, ] OPERATIONS = [ diff --git a/NumGI/Loss/LossDataset.py b/NumGI/Loss/LossDataset.py index 56fd9cd..872ae03 100644 --- a/NumGI/Loss/LossDataset.py +++ b/NumGI/Loss/LossDataset.py @@ -46,7 +46,7 @@ def calculate_n_pairwise_loss(self, N, ell_norm): possible_symbols = [i for i in possible_symbols if len(self.var_dict[i]) > 1] - first_batch = int(0.9 * N) + first_batch = int(0.95 * N) second_batch = N - first_batch for i in range(first_batch): print(i) @@ -59,7 +59,6 @@ def calculate_n_pairwise_loss(self, N, ell_norm): sol_sympy_2 = [self.solutions[idx_sympy_2], idx_sympy_2] integrand = sp.Abs(sol_sympy_1[0].rhs - sol_sympy_2[0].rhs) ** ell_norm - print(integrand) integral = self.compute_integral(integrand) loss[0, i] = sol_sympy_1[1] diff --git a/test/EquationTests/test_numpy_sympy_torch.py b/test/EquationTests/test_numpy_sympy_torch.py index 6f54e93..16bad8f 100644 --- a/test/EquationTests/test_numpy_sympy_torch.py +++ b/test/EquationTests/test_numpy_sympy_torch.py @@ -1,69 +1,87 @@ from __future__ import annotations +import math + import numpy as np import sympy as sp import torch from NumGI.ConstantDictionaries import DIFFERENTIAL_FUNCTIONS -from NumGI.ConstantDictionaries import OPERATIONS from NumGI.EquationTokenizer import EquationTokenizer from NumGI.SolutionGenerator import SolutionGenerator -sg = SolutionGenerator() -sg.PROB_NEW_SYMBOL = 0 -n_eqs = 30 -sols = [ - sg.generate_solution(4, ["x"], DIFFERENTIAL_FUNCTIONS, OPERATIONS)[0].simplify() - for i in range(n_eqs) -] - -tokenizer = EquationTokenizer() - -test_arr = [1, 2, 5, 10, 20] -np_test = np.array(test_arr) -torch_test = torch.tensor(test_arr, device=tokenizer.device) -x = sp.Symbol("x") - -cnt = 0 - -for i in sols: - try: - np_func, var = tokenizer.sympy_to_numpy(i) - np_res = np_func(np_test).tolist() - except TypeError: - cnt += 1 - continue - - if cnt > n_eqs / 2: - raise Exception( - "Too many equations with TypeError are equations correctly generated \ - or error in sp to np func" - ) - - sp_res = [] - for idx, j in enumerate(test_arr): - try: - sp_res.append(float(i.replace(x, j).evalf())) - except Exception as e: - print(e) - sp_res.append(np_res[idx]) - torch_func, var = tokenizer.sympy_to_torch(i) - torch_res = torch_func(**{_arg: torch_test for _arg in var}).tolist() +def test_sp_np_torch(): + sg = SolutionGenerator() + sg.PROB_NEW_SYMBOL = 0 + n_eqs = 30 + sols = [ + # sg.generate_solution(4, ["x"], DIFFERENTIAL_FUNCTIONS, OPERATIONS)[0].simplify() + # for i in range(n_eqs) + ] + + for func in DIFFERENTIAL_FUNCTIONS: + sols.append(func(sp.Symbol("x"))) + + tokenizer = EquationTokenizer() - tol = 1e-3 - for i in range(len(sp_res)): + test_arr = [-10, -5, -2, -1, 0, 1, 2, 5, 10, 20] + np_test = np.array(test_arr) + torch_test = torch.tensor(test_arr, device=tokenizer.device) + x = sp.Symbol("x") + + cnt = 0 + + for i in sols: try: - if sp_res[i] is not None and np_res[i] is not None and torch_res[i] is not None: - continue - elif sp_res[i] == 0: - assert (sp_res[i] - np_res[i]) < tol - assert (sp_res[i] - torch_res[i]) < tol - else: - assert (sp_res[i] - np_res[i]) / sp_res[i] < tol - assert (sp_res[i] - torch_res[i]) / sp_res[i] < tol - except Exception as e: + np_func, var = tokenizer.sympy_to_numpy(i) + np_res = np_func(np_test).tolist() + except TypeError: + cnt += 1 + print("typeerr") + continue + + if cnt > n_eqs / 2: + raise Exception( + "Too many equations with TypeError are equations correctly generated \ + or error in sp to np func" + ) + + sp_res = [] + for idx, j in enumerate(test_arr): + try: + sp_res.append(float(i.replace(x, j).evalf())) + except Exception as e: + print(e) + sp_res.append(np_res[idx]) + + torch_func, var = tokenizer.sympy_to_torch(i) + torch_res = torch_func(**{_arg: torch_test for _arg in var}).tolist() + + tol = 1e-4 + for idx in range(len(sp_res)): print( - f"eq:{i}, sp_res: {sp_res[i]}, np_res: {np_res[i]}, torch_res: {torch_res[i]}, {e}" + f"eq:{i}, sp_res: {sp_res[idx]}, np_res: {np_res[idx]}, torch_res: {torch_res[idx]}" ) - raise + try: + if math.isnan(sp_res[idx]) or math.isnan(np_res[idx]) or math.isnan(torch_res[idx]): + continue + elif sp_res[idx] == 0: + assert (sp_res[idx] - np_res[idx]) < tol + assert (sp_res[idx] - torch_res[idx]) < tol + elif math.isinf(np_res[idx]): + assert np_res[idx] == sp_res[idx] + assert np_res[idx] == torch_res[idx] + else: + assert (sp_res[idx] - np_res[idx]) / sp_res[idx] < tol + assert (sp_res[idx] - torch_res[idx]) / sp_res[idx] < tol + except Exception as e: + print( + f"eq:{i}, sp_res: {sp_res[idx]}, np_res: {np_res[idx]}, \ + torch_res: {torch_res[idx]}, {e}" + ) + raise + + +if __name__ == "__main__": + test_sp_np_torch() From ecb3687241402f1ec5a6e011da9e033dcd78b0fb Mon Sep 17 00:00:00 2001 From: Arnaud Bergeron Date: Mon, 25 Dec 2023 16:25:10 -0500 Subject: [PATCH 11/13] loss calculator should be stable --- NumGI/Loss/LossDataset.py | 8 ++++---- test/EquationTests/test_numpy_sympy_torch.py | 5 +++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/NumGI/Loss/LossDataset.py b/NumGI/Loss/LossDataset.py index 872ae03..3ee44a6 100644 --- a/NumGI/Loss/LossDataset.py +++ b/NumGI/Loss/LossDataset.py @@ -18,9 +18,9 @@ class LossDataset: def __init__(self, eq_dataset: DatasetTokenizer, N: int, ell_norm: int = 1): self.eq_dataset = eq_dataset self.grid_size = (100, 100, 1000) + self.max_integral_value = 10e10 # we can play with this value self.var_dict = self.create_var_dict() self.loss = self.calculate_n_pairwise_loss(N, ell_norm) - self.max_integral_value = 10e10 # we can play with this value def create_var_dict(self): """Creates a dictionary of different variables and their corresponding equations. @@ -49,7 +49,6 @@ def calculate_n_pairwise_loss(self, N, ell_norm): first_batch = int(0.95 * N) second_batch = N - first_batch for i in range(first_batch): - print(i) chosen_symbols = random.choice(list(possible_symbols)) possible_equations = {i[1] for i in self.var_dict[chosen_symbols]} @@ -82,9 +81,10 @@ def calculate_n_pairwise_loss(self, N, ell_norm): def compute_integral(self, sympy_eq): func, symbols = self.eq_dataset.sympy_to_torch(sympy_eq) grids = self.create_discrete_grids(symbols) - print(grids[0]) _arg = {sym: _grid for sym, _grid in zip(symbols, grids)} - return torch.mean(func(**_arg)) + result = torch.mean(func(**_arg)) + del grids + return result def create_discrete_grids(self, symbols): grid = torch.linspace(*self.grid_size, device=self.eq_dataset.device) diff --git a/test/EquationTests/test_numpy_sympy_torch.py b/test/EquationTests/test_numpy_sympy_torch.py index 16bad8f..4d88549 100644 --- a/test/EquationTests/test_numpy_sympy_torch.py +++ b/test/EquationTests/test_numpy_sympy_torch.py @@ -7,6 +7,7 @@ import torch from NumGI.ConstantDictionaries import DIFFERENTIAL_FUNCTIONS +from NumGI.ConstantDictionaries import OPERATIONS from NumGI.EquationTokenizer import EquationTokenizer from NumGI.SolutionGenerator import SolutionGenerator @@ -16,8 +17,8 @@ def test_sp_np_torch(): sg.PROB_NEW_SYMBOL = 0 n_eqs = 30 sols = [ - # sg.generate_solution(4, ["x"], DIFFERENTIAL_FUNCTIONS, OPERATIONS)[0].simplify() - # for i in range(n_eqs) + sg.generate_solution(4, ["x"], DIFFERENTIAL_FUNCTIONS, OPERATIONS)[0].simplify() + for i in range(n_eqs) ] for func in DIFFERENTIAL_FUNCTIONS: From fbe0039a05676d53f489c50335568957b64712d0 Mon Sep 17 00:00:00 2001 From: Arnaud Bergeron Date: Fri, 12 Jan 2024 18:01:34 -0500 Subject: [PATCH 12/13] Added decaying exp scaling to step size in integral to remove oom errors --- NumGI/Loss/LossDataset.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/NumGI/Loss/LossDataset.py b/NumGI/Loss/LossDataset.py index 3ee44a6..e9af741 100644 --- a/NumGI/Loss/LossDataset.py +++ b/NumGI/Loss/LossDataset.py @@ -1,5 +1,6 @@ from __future__ import annotations +import math import random import sympy as sp @@ -15,12 +16,11 @@ class LossDataset: DatasetTokenizer (DatasetTokenizer): DatasetTokenizer to create loss dataset from. """ - def __init__(self, eq_dataset: DatasetTokenizer, N: int, ell_norm: int = 1): + def __init__(self, eq_dataset: DatasetTokenizer): self.eq_dataset = eq_dataset - self.grid_size = (100, 100, 1000) + self.grid_size = (-100, 100, 1000) self.max_integral_value = 10e10 # we can play with this value self.var_dict = self.create_var_dict() - self.loss = self.calculate_n_pairwise_loss(N, ell_norm) def create_var_dict(self): """Creates a dictionary of different variables and their corresponding equations. @@ -50,7 +50,6 @@ def calculate_n_pairwise_loss(self, N, ell_norm): second_batch = N - first_batch for i in range(first_batch): chosen_symbols = random.choice(list(possible_symbols)) - possible_equations = {i[1] for i in self.var_dict[chosen_symbols]} idx_sympy_1, idx_sympy_2 = random.sample(possible_equations, 2) @@ -76,7 +75,7 @@ def calculate_n_pairwise_loss(self, N, ell_norm): loss[1, i] = sol_sympy_2[1] loss[2, i] = torch.inf - return loss + self.loss = loss def compute_integral(self, sympy_eq): func, symbols = self.eq_dataset.sympy_to_torch(sympy_eq) @@ -87,7 +86,10 @@ def compute_integral(self, sympy_eq): return result def create_discrete_grids(self, symbols): - grid = torch.linspace(*self.grid_size, device=self.eq_dataset.device) + grid_low, grid_high, num_grid = self.grid_size + # scale grid down with dimesion + num_grid = int(num_grid * math.exp(-len(symbols))) + grid = torch.linspace(grid_low, grid_high, num_grid, device=self.eq_dataset.device) grids = [grid for i in symbols] mesh = torch.meshgrid(grids) return mesh From e5aa1f8906f0fa3ddd90fbabea939b3b20be8140 Mon Sep 17 00:00:00 2001 From: Arnaud Bergeron Date: Tue, 30 Jan 2024 19:25:15 -0500 Subject: [PATCH 13/13] added integration scope to complex nbs --- NumGI/Loss/LossDataset.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/NumGI/Loss/LossDataset.py b/NumGI/Loss/LossDataset.py index e9af741..910a9bf 100644 --- a/NumGI/Loss/LossDataset.py +++ b/NumGI/Loss/LossDataset.py @@ -1,8 +1,8 @@ from __future__ import annotations -import math import random +import numpy as np import sympy as sp import torch @@ -18,7 +18,7 @@ class LossDataset: def __init__(self, eq_dataset: DatasetTokenizer): self.eq_dataset = eq_dataset - self.grid_size = (-100, 100, 1000) + self.grid_size = (-1, 1, 1000) self.max_integral_value = 10e10 # we can play with this value self.var_dict = self.create_var_dict() @@ -61,10 +61,11 @@ def calculate_n_pairwise_loss(self, N, ell_norm): loss[0, i] = sol_sympy_1[1] loss[1, i] = sol_sympy_2[1] - if integral < self.max_integral_value: + integral_val = integral.item() + if np.abs(integral_val) < self.max_integral_value: loss[2, i] = integral.item() else: - loss[2, i] = torch.inf + loss[2, i] = np.sign(integral_val) * self.max_integral_value for i in range(second_batch): chosen_symbols = random.sample(possible_symbols, 2) @@ -81,15 +82,19 @@ def compute_integral(self, sympy_eq): func, symbols = self.eq_dataset.sympy_to_torch(sympy_eq) grids = self.create_discrete_grids(symbols) _arg = {sym: _grid for sym, _grid in zip(symbols, grids)} - result = torch.mean(func(**_arg)) + complex_result = func(**_arg) + result = (complex_result * complex_result.conj()) ** 0.5 + result = torch.nanmean(result.real) del grids return result def create_discrete_grids(self, symbols): grid_low, grid_high, num_grid = self.grid_size # scale grid down with dimesion - num_grid = int(num_grid * math.exp(-len(symbols))) - grid = torch.linspace(grid_low, grid_high, num_grid, device=self.eq_dataset.device) + num_grid = int(num_grid * np.exp(-len(symbols))) + grid_real = torch.linspace(grid_low, grid_high, num_grid, device=self.eq_dataset.device) + grid_im = torch.linspace(grid_low, grid_high, num_grid, device=self.eq_dataset.device) + grid = torch.complex(grid_real, grid_im) grids = [grid for i in symbols] mesh = torch.meshgrid(grids) return mesh