Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate FileIdGenerator to separate file + refactorings to IR #703

Closed
wants to merge 21 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
2931c6c
Move another module to the expansion library
angelhof May 31, 2023
2ee9188
update requirements to have the correct sh_expand version
angelhof May 31, 2023
7a50167
Merge pull request #685 from binpash/move-env_vars_util
angelhof May 31, 2023
61de0e5
Support speculative unsafe commands (like break and continue)
angelhof Jun 1, 2023
cece488
comment
angelhof Jun 1, 2023
050198e
sc warning disable
angelhof Jun 1, 2023
29ccd26
Merge pull request #686 from binpash/support-spec-unsafe
angelhof Jun 1, 2023
3844e91
add airflow variants to transformation types
Forthoney Oct 26, 2023
d26df3c
move transformation state classes to separate module
Forthoney Oct 26, 2023
029c335
refactor to accomodate new organization
Forthoney Oct 26, 2023
3d5fc1e
run formatter + add signatures
Forthoney Oct 26, 2023
4436c96
change starting comment into module doctring
Forthoney Oct 26, 2023
5fd6cdb
Merge pull request #694 from binpash/future-airflow
angelhof Oct 30, 2023
2c4dc86
add black formatter CI
Forthoney Nov 20, 2023
c4803a3
run black formatter on compiler directory
Forthoney Nov 20, 2023
f657d68
apply black formatter to scripts
Forthoney Nov 20, 2023
cb5b85b
change comments (kind of) describing the module into module docstrings
Forthoney Nov 20, 2023
3c0d7a7
move FileIdGen to separate file
Forthoney Nov 24, 2023
86f808f
change function call to method call on fileidgen
Forthoney Nov 24, 2023
0191365
clean file_id_gen
Forthoney Nov 24, 2023
bd155d1
fix references to file_id_gen
Forthoney Nov 25, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .github/workflows/black.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
name: Lint

on: [push, pull_request]

jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: psf/black@stable
with:
options: "--extend-exclude 'evaluations/'"
73 changes: 55 additions & 18 deletions compiler/annotations_utils/util_cmd_invocations.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,43 @@
from pash_annotations.datatypes.BasicDatatypes import Flag, ArgStringType, Operand
from pash_annotations.datatypes.BasicDatatypesWithIO import OptionWithIO
from pash_annotations.datatypes.CommandInvocationInitial import CommandInvocationInitial
from pash_annotations.annotation_generation.datatypes.InputOutputInfo import InputOutputInfo
from pash_annotations.annotation_generation.datatypes.ParallelizabilityInfo import ParallelizabilityInfo
from pash_annotations.annotation_generation.datatypes.CommandProperties import CommandProperties
from pash_annotations.annotation_generation.AnnotationGeneration import get_input_output_info_from_cmd_invocation, \
get_parallelizability_info_from_cmd_invocation
from pash_annotations.datatypes.CommandInvocationWithIOVars import CommandInvocationWithIOVars
from pash_annotations.annotation_generation.datatypes.InputOutputInfo import (
InputOutputInfo,
)
from pash_annotations.annotation_generation.datatypes.ParallelizabilityInfo import (
ParallelizabilityInfo,
)
from pash_annotations.annotation_generation.datatypes.CommandProperties import (
CommandProperties,
)
from pash_annotations.annotation_generation.AnnotationGeneration import (
get_input_output_info_from_cmd_invocation,
get_parallelizability_info_from_cmd_invocation,
)
from pash_annotations.datatypes.CommandInvocationWithIOVars import (
CommandInvocationWithIOVars,
)

from definitions.ir.arg import Arg

# for typing
from pash_annotations.datatypes.CommandInvocationPrefix import CommandInvocationPrefix

from shell_ast.ast_util import string_to_argument, redir_stdout_to_file, redir_file_to_stdin, make_command
from shell_ast.ast_util import (
string_to_argument,
redir_stdout_to_file,
redir_file_to_stdin,
make_command,
)


def get_command_invocation_prefix_from_dfg_node(dfg_node):
return CommandInvocationPrefix(cmd_name = dfg_node.com_name,
flag_option_list = dfg_node.flag_option_list,
positional_config_list = dfg_node.positional_config_list)
return CommandInvocationPrefix(
cmd_name=dfg_node.com_name,
flag_option_list=dfg_node.flag_option_list,
positional_config_list=dfg_node.positional_config_list,
)


# TODO: ideally methods in the respective classes but requires refactoring of parsing infrastructure
# TODO: isn't this `to_ast`?
Expand Down Expand Up @@ -48,55 +67,70 @@ def to_node_cmd_inv_with_io_vars(cmd_inv, edges, redirs, assignments):
node = make_command(cmd_asts, redirections=new_redirs, assignments=assignments)
return node


def to_ast_flagoption(flagoption, edges):
if isinstance(flagoption, Flag):
return [string_to_argument(flagoption.get_name())]
elif isinstance(flagoption, OptionWithIO): # retype to IOVar
elif isinstance(flagoption, OptionWithIO): # retype to IOVar
opt_name_ast = string_to_argument(flagoption.get_name())
opt_arg_ast = translate_io_var_if_applicable(flagoption.get_arg(), edges)
return [opt_name_ast, opt_arg_ast]


def to_ast_operand(operand, edges):
if isinstance(operand, Operand):
return translate_io_var_if_applicable(operand.get_name(), edges)
return translate_io_var_if_applicable(operand, edges)


def translate_io_var_if_applicable(pot_io_var, edges):
# TODO: this is currently a hack but eventually every possible type gets their own to_ast-function
if isinstance(pot_io_var, int):
return dereference_io_var(pot_io_var, edges)
elif isinstance(pot_io_var, ArgStringType):
return to_ast_arg_string_type(pot_io_var)
elif isinstance(pot_io_var, CommandInvocationWithIOVars):
assert(False)
assert False
# only happens as r-wrapped node
return to_node_cmd_inv_with_io_vars(pot_io_var, edges, [], [])
elif isinstance(pot_io_var, Arg):
return pot_io_var.to_ast()
else:
raise Exception("Unhandled type for operand in to_ast!")


def to_ast_arg_string_type(arg_string_type):
return arg_string_type.get_name().arg_char_list # is of type Arg
return arg_string_type.get_name().arg_char_list # is of type Arg


# assumes io_var is an edge id
def dereference_io_var(io_var, edges):
fid, _, _ = edges[io_var]
return fid.to_ast()

def get_input_output_info_from_cmd_invocation_util(cmd_invocationInitial : CommandInvocationInitial) -> InputOutputInfo:

def get_input_output_info_from_cmd_invocation_util(
cmd_invocationInitial: CommandInvocationInitial,
) -> InputOutputInfo:
return get_input_output_info_from_cmd_invocation(cmd_invocationInitial)

def get_parallelizability_info_from_cmd_invocation_util(cmd_invocationInitial : CommandInvocationInitial) -> ParallelizabilityInfo:

def get_parallelizability_info_from_cmd_invocation_util(
cmd_invocationInitial: CommandInvocationInitial,
) -> ParallelizabilityInfo:
return get_parallelizability_info_from_cmd_invocation(cmd_invocationInitial)


def construct_property_container_from_list_of_properties(list_properties):
return CommandProperties(dict(list_properties))


# this function is needed to wrap a node in `r_wrap`
def to_arg_from_cmd_inv_with_io_vars_without_streaming_inputs_or_outputs_for_wrapping(cmd_inv, edges):
def to_arg_from_cmd_inv_with_io_vars_without_streaming_inputs_or_outputs_for_wrapping(
cmd_inv, edges
):
# we already expand here
whole_cmd = Arg.string_to_arg("\'")
whole_cmd = Arg.string_to_arg("'")
arg_cmd_name = Arg.string_to_arg(cmd_inv.cmd_name)
arg_flagoptions = []
for flagoption in cmd_inv.flag_option_list:
Expand All @@ -107,9 +141,10 @@ def to_arg_from_cmd_inv_with_io_vars_without_streaming_inputs_or_outputs_for_wra
all_cmd_parts_arg.extend(arg_operands)
for part in all_cmd_parts_arg:
whole_cmd.concatenate(part)
whole_cmd.concatenate(Arg.string_to_arg("\'"))
whole_cmd.concatenate(Arg.string_to_arg("'"))
return whole_cmd


def to_arg_flagoption(flagoption, edges):
if isinstance(flagoption, Flag):
return [Arg.string_to_arg(flagoption.get_name())]
Expand All @@ -118,11 +153,13 @@ def to_arg_flagoption(flagoption, edges):
opt_arg_arg = translate_io_var_to_arg_if_applicable(flagoption.get_arg(), edges)
return [opt_name_arg, opt_arg_arg]


def to_arg_operand(operand, edges):
if isinstance(operand, Operand):
return translate_io_var_to_arg_if_applicable(operand.get_name(), edges)
return translate_io_var_to_arg_if_applicable(operand, edges)


def translate_io_var_to_arg_if_applicable(pot_io_var, edges):
if isinstance(pot_io_var, int):
return Arg(dereference_io_var(pot_io_var, edges))
Expand Down
13 changes: 8 additions & 5 deletions compiler/annotations_utils/util_file_descriptors.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
from util import log
from definitions.ir.resource import FileResource, Resource, FileDescriptorResource
from pash_annotations.datatypes.BasicDatatypesWithIO import FileNameWithIOInfo, StdDescriptorWithIOInfo
from pash_annotations.datatypes.BasicDatatypesWithIO import (
FileNameWithIOInfo,
StdDescriptorWithIOInfo,
)


def resource_from_file_descriptor(file_descriptor) -> Resource:
if isinstance(file_descriptor, FileNameWithIOInfo):
arg = file_descriptor.get_name()
log(f'filedes name: {file_descriptor.get_name()}')
log(f'filedes name type: {type(file_descriptor.get_name())}')
log(f'arg: {arg}')
log(f"filedes name: {file_descriptor.get_name()}")
log(f"filedes name type: {type(file_descriptor.get_name())}")
log(f"arg: {arg}")
return FileResource(file_descriptor.get_name())
elif isinstance(file_descriptor, StdDescriptorWithIOInfo):
resource = ("fd", file_descriptor.get_type().value)
return FileDescriptorResource(resource)
else:
assert(False)
assert False
# unreachable
68 changes: 51 additions & 17 deletions compiler/annotations_utils/util_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,20 @@
from definitions.ir.arg import Arg

from pash_annotations.datatypes.CommandInvocationInitial import CommandInvocationInitial
from pash_annotations.datatypes.BasicDatatypes import Option, ArgStringType, Flag, Operand
from pash_annotations.parser.parser import parse, get_set_of_all_flags, get_dict_flag_to_primary_repr, get_set_of_all_options, \
get_dict_option_to_primary_repr, are_all_individually_flags
from pash_annotations.datatypes.BasicDatatypes import (
Option,
ArgStringType,
Flag,
Operand,
)
from pash_annotations.parser.parser import (
parse,
get_set_of_all_flags,
get_dict_flag_to_primary_repr,
get_set_of_all_options,
get_dict_option_to_primary_repr,
are_all_individually_flags,
)
from pash_annotations.parser.util_parser import get_json_data


Expand All @@ -18,40 +29,51 @@ def merge_to_single_string_with_space(list_str):
else:
return " ".join(list_str)


def get_command_invocation(command, options) -> CommandInvocationInitial:
command_as_string: str = format_arg_chars(command)
options_and_operands_as_string: str = merge_to_single_string_with_space([format_arg_chars(option) for option in options])
command_invocation_as_string: str = f'{command_as_string} {options_and_operands_as_string}'
options_and_operands_as_string: str = merge_to_single_string_with_space(
[format_arg_chars(option) for option in options]
)
command_invocation_as_string: str = (
f"{command_as_string} {options_and_operands_as_string}"
)
command_invocation: CommandInvocationInitial = parse(command_invocation_as_string)
return command_invocation


def get_ast_for_flagoption(flagoption):
result = string_to_argument(flagoption.get_name())
if isinstance(flagoption, Option):
# TODO: add argument here as well but eventually also fid
assert False
return result


def get_ast_for_argstringtype(arg):
return string_to_argument(arg.get_name())


# TODO: this is a hack to fix the wrong parsing of "
def fix_parsing_newline(arg):
if arg.get_name() == '\\n':
if arg.get_name() == "\\n":
return ArgStringType(r'"\n"')
else:
return arg


def parse_arg_list_to_command_invocation(command, flags_options_operands) -> CommandInvocationInitial:

def parse_arg_list_to_command_invocation(
command, flags_options_operands
) -> CommandInvocationInitial:
cmd_name = format_arg_chars(command)
json_data = get_json_data(cmd_name)

set_of_all_flags: Set[str] = get_set_of_all_flags(json_data)
dict_flag_to_primary_repr: dict[str, str] = get_dict_flag_to_primary_repr(json_data)
set_of_all_options: Set[str] = get_set_of_all_options(json_data)
dict_option_to_primary_repr: dict[str, str] = get_dict_option_to_primary_repr(json_data)
dict_option_to_primary_repr: dict[str, str] = get_dict_option_to_primary_repr(
json_data
)
# we keep the Arg for everything but flag and option names

# parse list of command invocation terms
Expand All @@ -61,20 +83,30 @@ def parse_arg_list_to_command_invocation(command, flags_options_operands) -> Com
potential_flag_or_option_arg = flags_options_operands[i]
potential_flag_or_option_name = format_arg_chars(potential_flag_or_option_arg)
if potential_flag_or_option_name in set_of_all_flags:
flag_name_as_string: str = dict_flag_to_primary_repr.get(potential_flag_or_option_name, potential_flag_or_option_name)
flag_name_as_string: str = dict_flag_to_primary_repr.get(
potential_flag_or_option_name, potential_flag_or_option_name
)
flag: Flag = Flag(flag_name_as_string)
flag_option_list.append(flag)
elif (potential_flag_or_option_name in set_of_all_options) and ((i+1) < len(flags_options_operands)):
option_name_as_string: str = dict_option_to_primary_repr.get(potential_flag_or_option_name, potential_flag_or_option_name)
option_arg_as_arg: Arg = Arg(flags_options_operands[i+1])
elif (potential_flag_or_option_name in set_of_all_options) and (
(i + 1) < len(flags_options_operands)
):
option_name_as_string: str = dict_option_to_primary_repr.get(
potential_flag_or_option_name, potential_flag_or_option_name
)
option_arg_as_arg: Arg = Arg(flags_options_operands[i + 1])
option = Option(option_name_as_string, option_arg_as_arg)
flag_option_list.append(option)
i += 1 # since we consumed another term for the argument
elif potential_flag_or_option_name == "-": # switch to operand mode (interpreted as hyphen-stdin)
elif (
potential_flag_or_option_name == "-"
): # switch to operand mode (interpreted as hyphen-stdin)
break
elif are_all_individually_flags(potential_flag_or_option_name, set_of_all_flags):
elif are_all_individually_flags(
potential_flag_or_option_name, set_of_all_flags
):
for split_el in list(potential_flag_or_option_name[1:]):
flag: Flag = Flag(f'-{split_el}')
flag: Flag = Flag(f"-{split_el}")
flag_option_list.append(flag)
else:
break # next one is Operand, and we keep these in separate list
Expand All @@ -85,7 +117,9 @@ def parse_arg_list_to_command_invocation(command, flags_options_operands) -> Com
# if parsed_elements_list[i] == '--':
# i += 1

operand_list = [Operand(Arg(operand_arg)) for operand_arg in flags_options_operands[i:]]
operand_list = [
Operand(Arg(operand_arg)) for operand_arg in flags_options_operands[i:]
]
# log("type of operand_list[0].get_name()", type(operand_list[0].get_name())) can only be used if there are operands

return CommandInvocationInitial(cmd_name, flag_option_list, operand_list)
Loading
Loading