From 54e41fd6e9014fca92ef73436d9e35e36782703a Mon Sep 17 00:00:00 2001 From: YUUU23 Date: Tue, 11 Jun 2024 02:06:54 +0000 Subject: [PATCH 1/7] refact branch rebase off of future; introduce custom error (UnparallelizableError and AdjLineNoteImplemented Error) caught before general errors, introduce custom error to ast_to_ir and ir in compiler at appropriate places with more detail error messages Signed-off-by: YUUU23 --- compiler/ast_to_ir.py | 4 ++-- compiler/custom_error.py | 5 +++++ compiler/ir.py | 15 ++++++++------- compiler/pash_compiler.py | 9 ++++++++- 4 files changed, 23 insertions(+), 10 deletions(-) create mode 100644 compiler/custom_error.py diff --git a/compiler/ast_to_ir.py b/compiler/ast_to_ir.py index 8d6f755a4..f815f9ca0 100644 --- a/compiler/ast_to_ir.py +++ b/compiler/ast_to_ir.py @@ -159,7 +159,7 @@ def combine_pipe(ast_nodes): else: ## If any part of the pipe is not an IR, the compilation must fail. log("Node: {} is not pure".format(ast_nodes[0])) - raise Exception("Not pure node in pipe") + raise UnparallelizableError("Node: {} is not a pure node in pipe".format(ast_nodes[0])) ## Combine the rest of the nodes for ast_node in ast_nodes[1:]: @@ -168,7 +168,7 @@ def combine_pipe(ast_nodes): else: ## If any part of the pipe is not an IR, the compilation must fail. log("Node: {} is not pure".format(ast_nodes)) - raise Exception("Not pure node in pipe") + raise UnparallelizableError("This specific node: {} is not a pure node in pipe".format(ast_node)) return [combined_nodes] diff --git a/compiler/custom_error.py b/compiler/custom_error.py new file mode 100644 index 000000000..eedb6f738 --- /dev/null +++ b/compiler/custom_error.py @@ -0,0 +1,5 @@ +class UnparallelizableError(Exception): + pass + +class AdjLineNotImplementedError(Exception): + pass \ No newline at end of file diff --git a/compiler/ir.py b/compiler/ir.py index c1534494a..b7319cc23 100644 --- a/compiler/ir.py +++ b/compiler/ir.py @@ -38,6 +38,7 @@ from shell_ast.ast_util import * from util import * +from custom_error import * import config @@ -242,11 +243,11 @@ def compile_command_to_DFG(fileIdGen, command, options, redirections=None): command_invocation ) if io_info is None: - raise Exception( + raise UnparallelizableError( f"InputOutputInformation for {format_arg_chars(command)} not provided so considered side-effectful." ) if io_info.has_other_outputs(): - raise Exception( + raise UnparallelizableError( f"Command {format_arg_chars(command)} has outputs other than streaming." ) para_info: ParallelizabilityInfo = ( @@ -840,7 +841,7 @@ def apply_parallelization_to_node( node_id, parallelizer, fileIdGen, fan_out ) else: - raise Exception("Splitter not yet implemented") + raise UnparallelizableError("Splitter not yet implemented for command: {}".format(self.get_node(node_id=node_id).cmd_invocation_with_io_vars.cmd_name)) def apply_round_robin_parallelization_to_node( self, node_id, parallelizer, fileIdGen, fan_out, r_split_batch_size @@ -849,11 +850,11 @@ def apply_round_robin_parallelization_to_node( # currently, this cannot be done since splitter etc. would be added... aggregator_spec = parallelizer.get_aggregator_spec() if aggregator_spec.is_aggregator_spec_adj_lines_merge(): - raise Exception("adj_lines_merge not yet implemented in PaSh") + raise AdjLineNotImplementedError("adj_lines_merge not yet implemented in PaSh") elif aggregator_spec.is_aggregator_spec_adj_lines_seq(): - raise Exception("adj_lines_seq not yet implemented in PaSh") + raise AdjLineNotImplementedError("adj_lines_seq not yet implemented in PaSh") elif aggregator_spec.is_aggregator_spec_adj_lines_func(): - raise Exception("adj_lines_func not yet implemented in PaSh") + raise AdjLineNotImplementedError("adj_lines_func not yet implemented in PaSh") # END of what to move node = self.get_node(node_id) @@ -1192,7 +1193,7 @@ def introduce_aggregators_for_consec_chunks( fileIdGen, ) else: - raise Exception("aggregator kind not yet implemented") + raise UnparallelizableError("aggregator kind not yet implemented for command: {}".format(original_cmd_invocation_with_io_vars.cmd_name)) else: # we got auxiliary information assert parallelizer.core_aggregator_spec.is_aggregator_spec_custom_2_ary() map_in_aggregator_ids = in_aggregator_ids diff --git a/compiler/pash_compiler.py b/compiler/pash_compiler.py index c4fc7282e..fb76a868b 100644 --- a/compiler/pash_compiler.py +++ b/compiler/pash_compiler.py @@ -11,6 +11,7 @@ from ir_to_ast import to_shell from pash_graphviz import maybe_generate_graphviz from util import * +from custom_error import * from definitions.ir.aggregator_node import * @@ -92,9 +93,15 @@ def compile_ir(ir_filename, compiled_script_file, args, compiler_config): ret = compile_optimize_output_script( ir_filename, compiled_script_file, args, compiler_config ) + except UnparallelizableError as e: + log("WARNING: Exception caught because some region(s) are unparallelizable:", e) + # log(traceback.format_exc()) # uncomment for exact trace report (PaSh user should see informative messages for unparellizable regions) + except (AdjLineNotImplementedError, NotImplementedError) as e: + log("WARNING: Exception caught because some part is not implemented:", e) + log(traceback.format_exc()) except Exception as e: log("WARNING: Exception caught:", e) - # traceback.print_exc() + log(traceback.format_exc()) return ret From 41f2e09085510c24fc413021c41fb544f0c0e1b5 Mon Sep 17 00:00:00 2001 From: YUUU23 Date: Tue, 11 Jun 2024 02:10:40 +0000 Subject: [PATCH 2/7] refactor: import custom error to ast_to_ir, raise unparallelizable err in pash_compiler Signed-off-by: YUUU23 --- compiler/ast_to_ir.py | 2 ++ compiler/pash_compiler.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/compiler/ast_to_ir.py b/compiler/ast_to_ir.py index f815f9ca0..c1e753fa3 100644 --- a/compiler/ast_to_ir.py +++ b/compiler/ast_to_ir.py @@ -8,6 +8,8 @@ from util import * from parse import from_ast_objects_to_shell +from custom_error import * + ## TODO: Separate the ir stuff to the bare minimum and ## try to move this to the shell_ast folder. diff --git a/compiler/pash_compiler.py b/compiler/pash_compiler.py index fb76a868b..8153036ed 100644 --- a/compiler/pash_compiler.py +++ b/compiler/pash_compiler.py @@ -149,7 +149,7 @@ def compile_optimize_output_script( ret = optimized_ast_or_ir else: - raise Exception("Script failed to compile!") + raise UnparallelizableError("Script failed to compile!") return ret From acad33d4cc759b06d38aa2756f6eae3eb69668db Mon Sep 17 00:00:00 2001 From: YUUU23 Date: Tue, 11 Jun 2024 03:08:06 +0000 Subject: [PATCH 3/7] refactor: put all expansion custom error from the sh_expand library (expand.py file) under one ExpansionError class in custom_error to catch and log these errors separately Signed-off-by: YUUU23 --- compiler/custom_error.py | 5 +- compiler/pash_compiler.py | 4 +- python_pkgs/sh_expand/expand.py | 518 ++++++++++++++++++++++++++++++++ 3 files changed, 525 insertions(+), 2 deletions(-) create mode 100644 python_pkgs/sh_expand/expand.py diff --git a/compiler/custom_error.py b/compiler/custom_error.py index eedb6f738..c94643310 100644 --- a/compiler/custom_error.py +++ b/compiler/custom_error.py @@ -2,4 +2,7 @@ class UnparallelizableError(Exception): pass class AdjLineNotImplementedError(Exception): - pass \ No newline at end of file + pass + +class ExpansionError(RuntimeError): + pass \ No newline at end of file diff --git a/compiler/pash_compiler.py b/compiler/pash_compiler.py index 8153036ed..1c1ce56d9 100644 --- a/compiler/pash_compiler.py +++ b/compiler/pash_compiler.py @@ -93,7 +93,9 @@ def compile_ir(ir_filename, compiled_script_file, args, compiler_config): ret = compile_optimize_output_script( ir_filename, compiled_script_file, args, compiler_config ) - except UnparallelizableError as e: + except ExpansionError as e: + log("WARNING: Exception caught because some region(s) are not expandable and therefore unparallelizable:", e) + except (ExpansionError, UnparallelizableError) as e: log("WARNING: Exception caught because some region(s) are unparallelizable:", e) # log(traceback.format_exc()) # uncomment for exact trace report (PaSh user should see informative messages for unparellizable regions) except (AdjLineNotImplementedError, NotImplementedError) as e: diff --git a/python_pkgs/sh_expand/expand.py b/python_pkgs/sh_expand/expand.py new file mode 100644 index 000000000..d4801a99b --- /dev/null +++ b/python_pkgs/sh_expand/expand.py @@ -0,0 +1,518 @@ +import copy + +from shasta.ast_node import * + +from sh_expand.util import log + +from custom_error import * + +################################################################################ +# SAFE EXPANSION ANALYSIS +################################################################################ + +## This contains all necessary state of the expansion +class ExpansionState: + variables: dict + def __init__(self, variables: dict): + self.variables = variables + + def __repr__(self): + return f'ExpansionState: {self.variables}' + +## This function checks if a word is safe to expand (i.e. if it will +## not have unpleasant side-effects) +def safe_to_expand(arg_char: ArgChar): + if isinstance(arg_char, VArgChar): # Variable + return True + return False + +def guess_arg(arg): + res = "" + for arg_char in arg: + if isinstance(arg_char, CArgChar) \ + or isinstance(arg_char, EArgChar): + res += chr(arg_char.char) + else: + return None + return res + +def safe_arg(arg): + return all([safe_arg_char(arg_char) for arg_char in arg]) + +def safe_args(args): + return all([safe_arg(arg) for arg in args]) + +def safe_arg_char(arg_char: ArgChar): + # character, escaped---noop, but safe + if isinstance(arg_char, CArgChar) \ + or isinstance(arg_char, EArgChar): + return True + # tilde --- only reads system state, safe to do early assuming no writes to HOME prior + elif isinstance(arg_char, TArgChar): + return True # TODO 2020-11-24 MMG modified variable set? take in/output written vars... + # arithmetic -- depends on what we have + elif isinstance(arg_char, AArgChar): + return safe_arith(arg_char.arg) + # quoted -- safe if its contents are safe + elif isinstance(arg_char, QArgChar): + return safe_arg(arg_char.arg) + # variables -- safe if the format is safe as are the remaining words + elif isinstance(arg_char, VArgChar): + return safe_var(fmt=arg_char.fmt, + null=arg_char.null, + var=arg_char.var, + arg=arg_char.arg) + # command substitution -- depends on the command + elif isinstance(arg_char, BArgChar): + return safe_command(arg_char.node) + + raise ValueError("bad object {}, expected one of CETAVQB".format(arg_char)) + +def safe_var(fmt, null, var, arg): + if (fmt in ['Normal', 'Length']): + return True + elif (fmt in ['Minus', 'Plus', 'TrimR', 'TrimRMax', 'TrimL', 'TrimLMax']): + return safe_arg(arg) + elif (fmt in ['Question']): + return False # TODO 2020-12-10 MMG unless we know `var` is set + elif (fmt in ['Assign']): + return False # TODO 2020-11-24 MMG unless we know `var` is set + + raise ValueError("bad parameter format {}".format(fmt)) + +def safe_arith(arg): + # operations are safe + # `+=` and `=` and family are UNSAFE + # NONPOSIX: `++` and `--` are UNSAFE + # `op="+=1"; $((x $op))` is UNSAFE + + # to determine safety, we: + # (a) check that every arg_char here is safe + # (b) pre-parse it symbolically well enough to ensure that no mutating operations occur + expr = guess_arg(arg) + + if (arg is None): + # TODO 2020-11-25 MMG symbolic pre-parse? + return False + elif ('=' in expr or '++' in expr or '--' in expr): + # TODO 2020-11-25 MMG false negatives: ==, >=, <= + return False + else: + # it's a concrete string that doesn't have mutation operations in it... go for it! + return True + +safe_cases = { + "Pipe": (lambda: + lambda ast_node: safe_pipe(ast_node)), + "Command": (lambda: + lambda ast_node: safe_simple(ast_node)), + "And": (lambda: + lambda ast_node: safe_and_or_semi(ast_node)), + "Or": (lambda: + lambda ast_node: safe_and_or_semi(ast_node)), + "Semi": (lambda: + lambda ast_node: safe_and_or_semi(ast_node)), + "Redir": (lambda: + lambda ast_node: safe_redir_subshell(ast_node)), + "Subshell": (lambda: + lambda ast_node: safe_redir_subshell(ast_node)), + "Background": (lambda: + lambda ast_node: safe_background(ast_node)), + "Defun": (lambda: + lambda ast_node: safe_defun(ast_node)), + "For": (lambda: + lambda ast_node: safe_for(ast_node)), + "While": (lambda: + lambda ast_node: safe_while(ast_node)), + "Case": (lambda: + lambda ast_node: safe_case(ast_node)), + "If": (lambda: + lambda ast_node: safe_if(ast_node)) + } + +def safe_command(command): + # TODO 2020-11-24 MMG which commands are safe to run in advance? + # TODO 2020-11-24 MMG how do we differentiate it being safe to do nested expansions? + global safe_cases + return ast_match(command, safe_cases) + +def safe_pipe(node): + return False + +safe_commands = ["echo", ":"] + +def safe_simple(node: CommandNode): + # TODO 2020-11-25 check redirs, assignments + + if (len(node.arguments) <= 0): + return True + + cmd = guess_arg(node.arguments[0]) + if (cmd is None or cmd not in safe_commands): + return False + else: + return safe_args(node.arguments[1:]) + +def safe_and_or_semi(node): + return False + +def safe_redir_subshell(node): + return False + +def safe_background(node): + return False + +def safe_defun(node): + return False + +def safe_for(node): + return False + +def safe_while(node): + return False + +def safe_case(node): + return False + +def safe_if(node): + return False + +################################################################################ +# EARLY EXPANSION +################################################################################ + +# General approach: +# +# - expand_* functions try to expand the AST +# + words return a string when it works, raises when it doesn't +# TODO MMG 2020-12-14 really should return (intermediate?) fields, not a single string +# + commands just set the structural bits appropriately + +# when early expansion detects an error +class EarlyError(ExpansionError): + def __init__(self, arg): + self.arg = arg + +class StuckExpansion(ExpansionError): + def __init__(self, reason, *info): + self.reason = reason + self.info = info + +class ImpureExpansion(ExpansionError): + def __init__(self, reason, *info): + self.reason = reason + self.info = info + +class Unimplemented(ExpansionError): + def __init__(self, msg, ast): + self.msg = msg + self.ast = ast + +class InvalidVariable(ExpansionError): + def __init__(self, var, reason): + self.var = var + self.reason = reason + +def lookup_variable(var, exp_state): + expanded_var = lookup_variable_inner(var, exp_state) + + return None, expanded_var + +## Looksup a variable and flattens it if it is an array +def lookup_variable_inner(varname, exp_state: ExpansionState): + value = lookup_variable_inner_core(varname, exp_state) + if value is not None and not isinstance(value, str): + ## TODO: This is not handled at the moment (and it is unclear if it should be). + ## + ## This is only returned when we are in an array + raise Unimplemented("Expanded value is not None or a string", (varname, value)) + return value + +## Looks up the variable and if it is unset it raises an error +def lookup_variable_inner_core(varname, exp_state: ExpansionState): + value = lookup_variable_inner_unsafe(varname, exp_state) + if value is None and is_u_set(exp_state): + raise StuckExpansion("-u is set and variable was unset", varname) + return value + + +def lookup_variable_inner_unsafe(varname, exp_state: ExpansionState): + ## TODO: Is it in there? If we have -u and it is in there. + _type, value = exp_state.variables.get(varname, [None, None]) + return value + +## This function checks if the -u flag is set +def is_u_set(exp_state: ExpansionState): + value = lookup_variable_inner_unsafe('-', exp_state) + # log(f'Previous set status is: {value}') + return value is not None and "u" in value + + +def invalidate_variable(var, reason, exp_state): + exp_state.variables[var] = [None, InvalidVariable(var, reason)] + return exp_state + + +def expand_args(args, exp_state, quoted = False): + res = [] + for arg in args: + new = expand_arg(arg, exp_state, quoted = quoted) + + # expanded! add the string in + res.append(new) + + splitted_args = split_args(res, exp_state) + + return splitted_args + +def split_args(args, exp_state): + _, ifs = lookup_variable("IFS", exp_state) + + if ifs is None: + ifs = "\n\t " + + ifs = [ord(c) for c in ifs] + + res = [] + for arg in args: + cur = [] + + for c in arg: + if isinstance(c, CArgChar) and c.char in ifs: + # split! + if len(cur) > 0: # TODO(mmg): or if val isn't IFS whitespace + res.append(cur) + cur = [] + else: + cur.append(c) + + if len(cur) > 0: + res.append(cur) + + return res + +def char_code(c) -> ArgChar: + if c in "'\\\"()${}[]*?": + return EArgChar(ord(c)) + else: + return CArgChar(ord(c)) + +def expand_arg(arg_chars, exp_state, quoted = False): + # log(f'expanding arg {arg_chars}") + res = [] + for arg_char in arg_chars: + new = expand_arg_char(arg_char, quoted, exp_state) + + if isinstance(new, str): + res += [char_code(c) for c in list(new)] + else: + res.extend(new) + + return res + +def expand_arg_char(arg_char: ArgChar, quoted, exp_state): + if isinstance(arg_char, CArgChar): + if arg_char.char in ['*', '?', '{', '}', '[', ']'] and not quoted: + raise Unimplemented("globbing", arg_char) + + return [arg_char] + elif isinstance(arg_char, EArgChar): + ## 2021-09-15 MMG Just guessing here + if arg_char.char in ['*', '?', '{', '}', '[', ']'] and not quoted: + raise Unimplemented("globbing", arg_char) + return [arg_char] + elif isinstance(arg_char, TArgChar): + val = arg_char.string + if val is None or val == "" or val == "None": + _type, val = lookup_variable("HOME", exp_state) + + if isinstance(val, InvalidVariable): + raise StuckExpansion("HOME invalid for ~", arg_char, val) + elif val is None: + return "~" + else: + return val + else: + # TODO 2020-12-10 getpwnam + raise Unimplemented("~ with prefix", arg_char) + elif isinstance(arg_char, AArgChar): + # TODO 2020-12-10 arithmetic parser and evaluator + raise Unimplemented("arithmetic", arg_char) + elif isinstance(arg_char, QArgChar): + return [QArgChar(expand_arg(arg_char.arg, exp_state, quoted = True))] + # return [['Q', expand_arg(arg_char.arg, exp_state, quoted = True)]] + elif isinstance(arg_char, VArgChar): + return expand_var(fmt=arg_char.fmt, + null=arg_char.null, + var=arg_char.var, + arg=arg_char.arg, + quoted=quoted, + exp_state=exp_state) + elif isinstance(arg_char, BArgChar): + # TODO 2020-12-10 run commands? + raise ImpureExpansion("command substitution", arg_char) + else: + raise Unimplemented("weird object", arg_char) + +def expand_var(fmt, null, var, arg, quoted, exp_state): + # TODO 2020-12-10 special variables + + _type, value = lookup_variable(var, exp_state) + + log(f'Var: {var} value: {value}') + + if isinstance(value, InvalidVariable): + raise StuckExpansion("couldn't expand invalid variable", value) + + if fmt == 'Normal': + if value is None: + return "" + else: + return value + elif fmt == 'Length': + if value is None: + return "0" + else: + return str(len(value)) + elif fmt == 'Minus': + if value is None or (null and value == ""): + return expand_arg(arg, exp_state, quoted = quoted) + else: + return value + elif fmt == 'Assign': + if value is None or (null and value == ""): + raise ImpureExpansion("assignment format on unset/null variable", value, arg) + else: + return value + elif fmt == 'Plus': + if value is None or (null and value == ""): + return "" + else: + return expand_arg(arg, exp_state, quoted = quoted) + elif fmt == 'Question': + if value is None or (null and value == ""): + # TODO 2020-12-10 more context probably helpful here + raise EarlyError(expand_arg(arg, exp_state, quoted = quoted)) + else: + return value + elif fmt in ['TrimR', 'TrimRMax', 'TrimL', 'TrimLMax']: + # TODO need patterns + raise Unimplemented("patterns", [fmt, null, var, arg]) + else: + raise ValueError("bad parameter format {}".format(fmt)) + +expand_cases = { + "Pipe": (lambda exp_state: + lambda ast_node: expand_pipe(ast_node, exp_state)), + "Command": (lambda exp_state: + lambda ast_node: expand_simple(ast_node, exp_state)), + "And": (lambda exp_state: + lambda ast_node: expand_and_or_semi(ast_node, exp_state)), + "Or": (lambda exp_state: + lambda ast_node: expand_and_or_semi(ast_node, exp_state)), + "Semi": (lambda exp_state: + lambda ast_node: expand_and_or_semi(ast_node, exp_state)), + "Redir": (lambda exp_state: + lambda ast_node: expand_redir_subshell(ast_node, exp_state)), + "Subshell": (lambda exp_state: + lambda ast_node: expand_redir_subshell(ast_node, exp_state)), + "Background": (lambda exp_state: + lambda ast_node: expand_background(ast_node, exp_state)), + "Defun": (lambda exp_state: + lambda ast_node: expand_defun(ast_node, exp_state)), + "For": (lambda exp_state: + lambda ast_node: expand_for(ast_node, exp_state)), + "While": (lambda exp_state: + lambda ast_node: expand_while(ast_node, exp_state)), + "Case": (lambda exp_state: + lambda ast_node: expand_case(ast_node, exp_state)), + "If": (lambda exp_state: + lambda ast_node: expand_if(ast_node, exp_state)) + } + +def expand_command(command, exp_state: ExpansionState): + # TODO 2020-11-24 MMG which commands are safe to run in advance? + # TODO 2020-11-24 MMG how do we differentiate it being safe to do nested expansions? + global expand_cases + return ast_match(command, expand_cases, exp_state) + +def expand_pipe(node, exp_state): + for i, n in enumerate(node.items): + # copy environment to simulate subshell (no outer effect) + node.items[i] = expand_command(n, copy.deepcopy(exp_state)) + + return node + +def expand_simple(node, exp_state): + # TODO 2020-11-25 MMG is this the order bash does? + node.redir_list = expand_redir_list(node.redir_list, exp_state) + + if len(node.assignments) > 0: + raise ImpureExpansion('assignment', node.assignments) + + node.arguments = expand_args(node.arguments, exp_state) + + return node + +def expand_redir_list(redir_list, exp_state): + for (i, r) in enumerate(redir_list): + redir_list[i] = expand_redir(r, exp_state) + + return redir_list + +def expand_redir(redirection: RedirectionNode, exp_state): + file_arg = expand_arg(redirection.arg, exp_state) + + redirection.arg = file_arg + return redirection + +def expand_and_or_semi(node, exp_state): + node.left_operand = expand_command(node.left_operand, exp_state) + node.right_operand = expand_command(node.right_operand, exp_state) + + return node + +def expand_redir_subshell(node, exp_state): + # copy environment to simulate subshell (no outer effect) + node.node = expand_command(node.node, copy.deepcopy(exp_state)) + + return node + +def expand_background(node, exp_state): + # copy environment to simulate subshell (no outer effect) + node.node = expand_command(node.node, copy.deepcopy(exp_state)) + + return node + +def expand_defun(node, exp_state): + # TODO 2020-11-24 MMG invalidate postional args + node.body = expand_command(node.body, copy.deepcopy(exp_state)) + + return node + +def expand_for(node, exp_state): + node.argument = expand_arg(node.argument, exp_state) + + # TODO 2020-11-24 if node.argument is fully expanded, we can just unroll the loop + exp_state = invalidate_variable(node.variable, "variable of for loop", exp_state) + node.body = expand_command(node.body, exp_state) + + return node + +def expand_while(node, exp_state): + node.test = expand_command(node.test, exp_state) + node.body = expand_command(node.body, exp_state) + + return node + +def expand_case(node, exp_state): + # TODO 2020-11-24 preprocess scrutinee, each pattern, each case + + raise Unimplemented("case statements", node) + +def expand_if(node, exp_state): + node.cond = expand_command(node.cond, exp_state) + node.then_b = expand_command(node.then_b, exp_state) + node.else_b = expand_command(node.else_b, exp_state) + + return node From bdbcb94883cd1ee2b06cb3a74a61e1606202231c Mon Sep 17 00:00:00 2001 From: YUUU23 Date: Tue, 11 Jun 2024 13:55:30 +0000 Subject: [PATCH 4/7] fix: remove duplicated ExpansionError in excepts (compile_ir) Signed-off-by: YUUU23 --- compiler/pash_compiler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/pash_compiler.py b/compiler/pash_compiler.py index 1c1ce56d9..4992740a4 100644 --- a/compiler/pash_compiler.py +++ b/compiler/pash_compiler.py @@ -95,7 +95,7 @@ def compile_ir(ir_filename, compiled_script_file, args, compiler_config): ) except ExpansionError as e: log("WARNING: Exception caught because some region(s) are not expandable and therefore unparallelizable:", e) - except (ExpansionError, UnparallelizableError) as e: + except UnparallelizableError as e: log("WARNING: Exception caught because some region(s) are unparallelizable:", e) # log(traceback.format_exc()) # uncomment for exact trace report (PaSh user should see informative messages for unparellizable regions) except (AdjLineNotImplementedError, NotImplementedError) as e: From 38aada793f1bc0121174b4b86d8b61e530bb41e5 Mon Sep 17 00:00:00 2001 From: YUUU23 Date: Tue, 11 Jun 2024 19:08:40 -0500 Subject: [PATCH 5/7] refactor: import ExpansionError (ExpansionError class initiated within expand package Signed-off-by: YUUU23 --- compiler/custom_error.py | 5 +---- compiler/pash_compiler.py | 1 + 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/compiler/custom_error.py b/compiler/custom_error.py index c94643310..eedb6f738 100644 --- a/compiler/custom_error.py +++ b/compiler/custom_error.py @@ -2,7 +2,4 @@ class UnparallelizableError(Exception): pass class AdjLineNotImplementedError(Exception): - pass - -class ExpansionError(RuntimeError): - pass \ No newline at end of file + pass \ No newline at end of file diff --git a/compiler/pash_compiler.py b/compiler/pash_compiler.py index 4992740a4..080681d88 100644 --- a/compiler/pash_compiler.py +++ b/compiler/pash_compiler.py @@ -4,6 +4,7 @@ from datetime import datetime from sh_expand import env_vars_util +from sh_expand import ExpansionError import config from ir import * From 7d3dad75ccb6e8a9772bbeee60921c22b412fc02 Mon Sep 17 00:00:00 2001 From: YUUU23 Date: Tue, 11 Jun 2024 19:14:19 -0500 Subject: [PATCH 6/7] delete: remove expand.py changes as it will be changed in the original package Signed-off-by: YUUU23 --- python_pkgs/sh_expand/expand.py | 518 -------------------------------- 1 file changed, 518 deletions(-) delete mode 100644 python_pkgs/sh_expand/expand.py diff --git a/python_pkgs/sh_expand/expand.py b/python_pkgs/sh_expand/expand.py deleted file mode 100644 index d4801a99b..000000000 --- a/python_pkgs/sh_expand/expand.py +++ /dev/null @@ -1,518 +0,0 @@ -import copy - -from shasta.ast_node import * - -from sh_expand.util import log - -from custom_error import * - -################################################################################ -# SAFE EXPANSION ANALYSIS -################################################################################ - -## This contains all necessary state of the expansion -class ExpansionState: - variables: dict - def __init__(self, variables: dict): - self.variables = variables - - def __repr__(self): - return f'ExpansionState: {self.variables}' - -## This function checks if a word is safe to expand (i.e. if it will -## not have unpleasant side-effects) -def safe_to_expand(arg_char: ArgChar): - if isinstance(arg_char, VArgChar): # Variable - return True - return False - -def guess_arg(arg): - res = "" - for arg_char in arg: - if isinstance(arg_char, CArgChar) \ - or isinstance(arg_char, EArgChar): - res += chr(arg_char.char) - else: - return None - return res - -def safe_arg(arg): - return all([safe_arg_char(arg_char) for arg_char in arg]) - -def safe_args(args): - return all([safe_arg(arg) for arg in args]) - -def safe_arg_char(arg_char: ArgChar): - # character, escaped---noop, but safe - if isinstance(arg_char, CArgChar) \ - or isinstance(arg_char, EArgChar): - return True - # tilde --- only reads system state, safe to do early assuming no writes to HOME prior - elif isinstance(arg_char, TArgChar): - return True # TODO 2020-11-24 MMG modified variable set? take in/output written vars... - # arithmetic -- depends on what we have - elif isinstance(arg_char, AArgChar): - return safe_arith(arg_char.arg) - # quoted -- safe if its contents are safe - elif isinstance(arg_char, QArgChar): - return safe_arg(arg_char.arg) - # variables -- safe if the format is safe as are the remaining words - elif isinstance(arg_char, VArgChar): - return safe_var(fmt=arg_char.fmt, - null=arg_char.null, - var=arg_char.var, - arg=arg_char.arg) - # command substitution -- depends on the command - elif isinstance(arg_char, BArgChar): - return safe_command(arg_char.node) - - raise ValueError("bad object {}, expected one of CETAVQB".format(arg_char)) - -def safe_var(fmt, null, var, arg): - if (fmt in ['Normal', 'Length']): - return True - elif (fmt in ['Minus', 'Plus', 'TrimR', 'TrimRMax', 'TrimL', 'TrimLMax']): - return safe_arg(arg) - elif (fmt in ['Question']): - return False # TODO 2020-12-10 MMG unless we know `var` is set - elif (fmt in ['Assign']): - return False # TODO 2020-11-24 MMG unless we know `var` is set - - raise ValueError("bad parameter format {}".format(fmt)) - -def safe_arith(arg): - # operations are safe - # `+=` and `=` and family are UNSAFE - # NONPOSIX: `++` and `--` are UNSAFE - # `op="+=1"; $((x $op))` is UNSAFE - - # to determine safety, we: - # (a) check that every arg_char here is safe - # (b) pre-parse it symbolically well enough to ensure that no mutating operations occur - expr = guess_arg(arg) - - if (arg is None): - # TODO 2020-11-25 MMG symbolic pre-parse? - return False - elif ('=' in expr or '++' in expr or '--' in expr): - # TODO 2020-11-25 MMG false negatives: ==, >=, <= - return False - else: - # it's a concrete string that doesn't have mutation operations in it... go for it! - return True - -safe_cases = { - "Pipe": (lambda: - lambda ast_node: safe_pipe(ast_node)), - "Command": (lambda: - lambda ast_node: safe_simple(ast_node)), - "And": (lambda: - lambda ast_node: safe_and_or_semi(ast_node)), - "Or": (lambda: - lambda ast_node: safe_and_or_semi(ast_node)), - "Semi": (lambda: - lambda ast_node: safe_and_or_semi(ast_node)), - "Redir": (lambda: - lambda ast_node: safe_redir_subshell(ast_node)), - "Subshell": (lambda: - lambda ast_node: safe_redir_subshell(ast_node)), - "Background": (lambda: - lambda ast_node: safe_background(ast_node)), - "Defun": (lambda: - lambda ast_node: safe_defun(ast_node)), - "For": (lambda: - lambda ast_node: safe_for(ast_node)), - "While": (lambda: - lambda ast_node: safe_while(ast_node)), - "Case": (lambda: - lambda ast_node: safe_case(ast_node)), - "If": (lambda: - lambda ast_node: safe_if(ast_node)) - } - -def safe_command(command): - # TODO 2020-11-24 MMG which commands are safe to run in advance? - # TODO 2020-11-24 MMG how do we differentiate it being safe to do nested expansions? - global safe_cases - return ast_match(command, safe_cases) - -def safe_pipe(node): - return False - -safe_commands = ["echo", ":"] - -def safe_simple(node: CommandNode): - # TODO 2020-11-25 check redirs, assignments - - if (len(node.arguments) <= 0): - return True - - cmd = guess_arg(node.arguments[0]) - if (cmd is None or cmd not in safe_commands): - return False - else: - return safe_args(node.arguments[1:]) - -def safe_and_or_semi(node): - return False - -def safe_redir_subshell(node): - return False - -def safe_background(node): - return False - -def safe_defun(node): - return False - -def safe_for(node): - return False - -def safe_while(node): - return False - -def safe_case(node): - return False - -def safe_if(node): - return False - -################################################################################ -# EARLY EXPANSION -################################################################################ - -# General approach: -# -# - expand_* functions try to expand the AST -# + words return a string when it works, raises when it doesn't -# TODO MMG 2020-12-14 really should return (intermediate?) fields, not a single string -# + commands just set the structural bits appropriately - -# when early expansion detects an error -class EarlyError(ExpansionError): - def __init__(self, arg): - self.arg = arg - -class StuckExpansion(ExpansionError): - def __init__(self, reason, *info): - self.reason = reason - self.info = info - -class ImpureExpansion(ExpansionError): - def __init__(self, reason, *info): - self.reason = reason - self.info = info - -class Unimplemented(ExpansionError): - def __init__(self, msg, ast): - self.msg = msg - self.ast = ast - -class InvalidVariable(ExpansionError): - def __init__(self, var, reason): - self.var = var - self.reason = reason - -def lookup_variable(var, exp_state): - expanded_var = lookup_variable_inner(var, exp_state) - - return None, expanded_var - -## Looksup a variable and flattens it if it is an array -def lookup_variable_inner(varname, exp_state: ExpansionState): - value = lookup_variable_inner_core(varname, exp_state) - if value is not None and not isinstance(value, str): - ## TODO: This is not handled at the moment (and it is unclear if it should be). - ## - ## This is only returned when we are in an array - raise Unimplemented("Expanded value is not None or a string", (varname, value)) - return value - -## Looks up the variable and if it is unset it raises an error -def lookup_variable_inner_core(varname, exp_state: ExpansionState): - value = lookup_variable_inner_unsafe(varname, exp_state) - if value is None and is_u_set(exp_state): - raise StuckExpansion("-u is set and variable was unset", varname) - return value - - -def lookup_variable_inner_unsafe(varname, exp_state: ExpansionState): - ## TODO: Is it in there? If we have -u and it is in there. - _type, value = exp_state.variables.get(varname, [None, None]) - return value - -## This function checks if the -u flag is set -def is_u_set(exp_state: ExpansionState): - value = lookup_variable_inner_unsafe('-', exp_state) - # log(f'Previous set status is: {value}') - return value is not None and "u" in value - - -def invalidate_variable(var, reason, exp_state): - exp_state.variables[var] = [None, InvalidVariable(var, reason)] - return exp_state - - -def expand_args(args, exp_state, quoted = False): - res = [] - for arg in args: - new = expand_arg(arg, exp_state, quoted = quoted) - - # expanded! add the string in - res.append(new) - - splitted_args = split_args(res, exp_state) - - return splitted_args - -def split_args(args, exp_state): - _, ifs = lookup_variable("IFS", exp_state) - - if ifs is None: - ifs = "\n\t " - - ifs = [ord(c) for c in ifs] - - res = [] - for arg in args: - cur = [] - - for c in arg: - if isinstance(c, CArgChar) and c.char in ifs: - # split! - if len(cur) > 0: # TODO(mmg): or if val isn't IFS whitespace - res.append(cur) - cur = [] - else: - cur.append(c) - - if len(cur) > 0: - res.append(cur) - - return res - -def char_code(c) -> ArgChar: - if c in "'\\\"()${}[]*?": - return EArgChar(ord(c)) - else: - return CArgChar(ord(c)) - -def expand_arg(arg_chars, exp_state, quoted = False): - # log(f'expanding arg {arg_chars}") - res = [] - for arg_char in arg_chars: - new = expand_arg_char(arg_char, quoted, exp_state) - - if isinstance(new, str): - res += [char_code(c) for c in list(new)] - else: - res.extend(new) - - return res - -def expand_arg_char(arg_char: ArgChar, quoted, exp_state): - if isinstance(arg_char, CArgChar): - if arg_char.char in ['*', '?', '{', '}', '[', ']'] and not quoted: - raise Unimplemented("globbing", arg_char) - - return [arg_char] - elif isinstance(arg_char, EArgChar): - ## 2021-09-15 MMG Just guessing here - if arg_char.char in ['*', '?', '{', '}', '[', ']'] and not quoted: - raise Unimplemented("globbing", arg_char) - return [arg_char] - elif isinstance(arg_char, TArgChar): - val = arg_char.string - if val is None or val == "" or val == "None": - _type, val = lookup_variable("HOME", exp_state) - - if isinstance(val, InvalidVariable): - raise StuckExpansion("HOME invalid for ~", arg_char, val) - elif val is None: - return "~" - else: - return val - else: - # TODO 2020-12-10 getpwnam - raise Unimplemented("~ with prefix", arg_char) - elif isinstance(arg_char, AArgChar): - # TODO 2020-12-10 arithmetic parser and evaluator - raise Unimplemented("arithmetic", arg_char) - elif isinstance(arg_char, QArgChar): - return [QArgChar(expand_arg(arg_char.arg, exp_state, quoted = True))] - # return [['Q', expand_arg(arg_char.arg, exp_state, quoted = True)]] - elif isinstance(arg_char, VArgChar): - return expand_var(fmt=arg_char.fmt, - null=arg_char.null, - var=arg_char.var, - arg=arg_char.arg, - quoted=quoted, - exp_state=exp_state) - elif isinstance(arg_char, BArgChar): - # TODO 2020-12-10 run commands? - raise ImpureExpansion("command substitution", arg_char) - else: - raise Unimplemented("weird object", arg_char) - -def expand_var(fmt, null, var, arg, quoted, exp_state): - # TODO 2020-12-10 special variables - - _type, value = lookup_variable(var, exp_state) - - log(f'Var: {var} value: {value}') - - if isinstance(value, InvalidVariable): - raise StuckExpansion("couldn't expand invalid variable", value) - - if fmt == 'Normal': - if value is None: - return "" - else: - return value - elif fmt == 'Length': - if value is None: - return "0" - else: - return str(len(value)) - elif fmt == 'Minus': - if value is None or (null and value == ""): - return expand_arg(arg, exp_state, quoted = quoted) - else: - return value - elif fmt == 'Assign': - if value is None or (null and value == ""): - raise ImpureExpansion("assignment format on unset/null variable", value, arg) - else: - return value - elif fmt == 'Plus': - if value is None or (null and value == ""): - return "" - else: - return expand_arg(arg, exp_state, quoted = quoted) - elif fmt == 'Question': - if value is None or (null and value == ""): - # TODO 2020-12-10 more context probably helpful here - raise EarlyError(expand_arg(arg, exp_state, quoted = quoted)) - else: - return value - elif fmt in ['TrimR', 'TrimRMax', 'TrimL', 'TrimLMax']: - # TODO need patterns - raise Unimplemented("patterns", [fmt, null, var, arg]) - else: - raise ValueError("bad parameter format {}".format(fmt)) - -expand_cases = { - "Pipe": (lambda exp_state: - lambda ast_node: expand_pipe(ast_node, exp_state)), - "Command": (lambda exp_state: - lambda ast_node: expand_simple(ast_node, exp_state)), - "And": (lambda exp_state: - lambda ast_node: expand_and_or_semi(ast_node, exp_state)), - "Or": (lambda exp_state: - lambda ast_node: expand_and_or_semi(ast_node, exp_state)), - "Semi": (lambda exp_state: - lambda ast_node: expand_and_or_semi(ast_node, exp_state)), - "Redir": (lambda exp_state: - lambda ast_node: expand_redir_subshell(ast_node, exp_state)), - "Subshell": (lambda exp_state: - lambda ast_node: expand_redir_subshell(ast_node, exp_state)), - "Background": (lambda exp_state: - lambda ast_node: expand_background(ast_node, exp_state)), - "Defun": (lambda exp_state: - lambda ast_node: expand_defun(ast_node, exp_state)), - "For": (lambda exp_state: - lambda ast_node: expand_for(ast_node, exp_state)), - "While": (lambda exp_state: - lambda ast_node: expand_while(ast_node, exp_state)), - "Case": (lambda exp_state: - lambda ast_node: expand_case(ast_node, exp_state)), - "If": (lambda exp_state: - lambda ast_node: expand_if(ast_node, exp_state)) - } - -def expand_command(command, exp_state: ExpansionState): - # TODO 2020-11-24 MMG which commands are safe to run in advance? - # TODO 2020-11-24 MMG how do we differentiate it being safe to do nested expansions? - global expand_cases - return ast_match(command, expand_cases, exp_state) - -def expand_pipe(node, exp_state): - for i, n in enumerate(node.items): - # copy environment to simulate subshell (no outer effect) - node.items[i] = expand_command(n, copy.deepcopy(exp_state)) - - return node - -def expand_simple(node, exp_state): - # TODO 2020-11-25 MMG is this the order bash does? - node.redir_list = expand_redir_list(node.redir_list, exp_state) - - if len(node.assignments) > 0: - raise ImpureExpansion('assignment', node.assignments) - - node.arguments = expand_args(node.arguments, exp_state) - - return node - -def expand_redir_list(redir_list, exp_state): - for (i, r) in enumerate(redir_list): - redir_list[i] = expand_redir(r, exp_state) - - return redir_list - -def expand_redir(redirection: RedirectionNode, exp_state): - file_arg = expand_arg(redirection.arg, exp_state) - - redirection.arg = file_arg - return redirection - -def expand_and_or_semi(node, exp_state): - node.left_operand = expand_command(node.left_operand, exp_state) - node.right_operand = expand_command(node.right_operand, exp_state) - - return node - -def expand_redir_subshell(node, exp_state): - # copy environment to simulate subshell (no outer effect) - node.node = expand_command(node.node, copy.deepcopy(exp_state)) - - return node - -def expand_background(node, exp_state): - # copy environment to simulate subshell (no outer effect) - node.node = expand_command(node.node, copy.deepcopy(exp_state)) - - return node - -def expand_defun(node, exp_state): - # TODO 2020-11-24 MMG invalidate postional args - node.body = expand_command(node.body, copy.deepcopy(exp_state)) - - return node - -def expand_for(node, exp_state): - node.argument = expand_arg(node.argument, exp_state) - - # TODO 2020-11-24 if node.argument is fully expanded, we can just unroll the loop - exp_state = invalidate_variable(node.variable, "variable of for loop", exp_state) - node.body = expand_command(node.body, exp_state) - - return node - -def expand_while(node, exp_state): - node.test = expand_command(node.test, exp_state) - node.body = expand_command(node.body, exp_state) - - return node - -def expand_case(node, exp_state): - # TODO 2020-11-24 preprocess scrutinee, each pattern, each case - - raise Unimplemented("case statements", node) - -def expand_if(node, exp_state): - node.cond = expand_command(node.cond, exp_state) - node.then_b = expand_command(node.then_b, exp_state) - node.else_b = expand_command(node.else_b, exp_state) - - return node From 828bec32c55bdbfb567b717c14b848d8bcb12a98 Mon Sep 17 00:00:00 2001 From: YUUU23 Date: Wed, 12 Jun 2024 02:04:57 +0000 Subject: [PATCH 7/7] fix: import expansion error Signed-off-by: YUUU23 --- compiler/pash_compiler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/pash_compiler.py b/compiler/pash_compiler.py index 080681d88..a949457ba 100644 --- a/compiler/pash_compiler.py +++ b/compiler/pash_compiler.py @@ -4,7 +4,7 @@ from datetime import datetime from sh_expand import env_vars_util -from sh_expand import ExpansionError +from sh_expand.expand import ExpansionError import config from ir import *