From 1054e35f9faa65f87c3c5c623b9d0516a27441e5 Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Mon, 29 Jul 2024 17:09:59 +0200 Subject: [PATCH 1/3] perf: disabled threading --- .../pattern_detectors/do_all_detector.py | 12 ++++++++---- .../pattern_detectors/reduction_detector.py | 12 ++++++++---- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/discopop_explorer/pattern_detectors/do_all_detector.py b/discopop_explorer/pattern_detectors/do_all_detector.py index 7d7de95ee..88001ae8c 100644 --- a/discopop_explorer/pattern_detectors/do_all_detector.py +++ b/discopop_explorer/pattern_detectors/do_all_detector.py @@ -101,10 +101,14 @@ def run_detection( nodes = cast(List[LoopNode], filter_for_hotspots(pet, cast(List[Node], nodes), hotspots)) param_list = [(node) for node in nodes] - with Pool(initializer=__initialize_worker, initargs=(pet,)) as pool: - tmp_result = list(tqdm.tqdm(pool.imap_unordered(__check_node, param_list), total=len(param_list))) - for local_result in tmp_result: - result += local_result + # with Pool(initializer=__initialize_worker, initargs=(pet,)) as pool: + # tmp_result = list(tqdm.tqdm(pool.imap_unordered(__check_node, param_list), total=len(param_list))) + # for local_result in tmp_result: + # result += local_result + + for tuple in param_list: + result += __check_node(tuple) + print("GLOBAL RES: ", [r.start_line for r in result]) for pattern in result: diff --git a/discopop_explorer/pattern_detectors/reduction_detector.py b/discopop_explorer/pattern_detectors/reduction_detector.py index 9fced17fa..2229e965e 100644 --- a/discopop_explorer/pattern_detectors/reduction_detector.py +++ b/discopop_explorer/pattern_detectors/reduction_detector.py @@ -87,10 +87,14 @@ def run_detection( warnings.warn("REDUCTION DETECTION CURRENTLY ASSUMES THE EXISTENCE OF DEPENDENCY METADATA!") param_list = [(node) for node in nodes] - with Pool(initializer=__initialize_worker, initargs=(pet,)) as pool: - tmp_result = list(tqdm.tqdm(pool.imap_unordered(__check_node, param_list), total=len(param_list))) - for local_result in tmp_result: - result += local_result + # with Pool(initializer=__initialize_worker, initargs=(pet,)) as pool: + # tmp_result = list(tqdm.tqdm(pool.imap_unordered(__check_node, param_list), total=len(param_list))) + # for local_result in tmp_result: + # result += local_result + + for tuple in param_list: + result += __check_node(tuple) + print("GLOBAL RES: ", [r.start_line for r in result]) for pattern in result: From ed1a50e91e5d649afd968ed18d656b42d5763197 Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Tue, 30 Jul 2024 09:48:21 +0200 Subject: [PATCH 2/3] perf: PEGraphX.get_variables --- discopop_explorer/PEGraphX.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/discopop_explorer/PEGraphX.py b/discopop_explorer/PEGraphX.py index d7840b6b2..461a99461 100644 --- a/discopop_explorer/PEGraphX.py +++ b/discopop_explorer/PEGraphX.py @@ -173,9 +173,9 @@ class Node: end_line: int type: NodeType name: str - parent_function_id: Optional[NodeID] = ( - None # metadata to speedup some calculations (TODO FunctionNodes have themselves as parent) - ) + parent_function_id: Optional[ + NodeID + ] = None # metadata to speedup some calculations (TODO FunctionNodes have themselves as parent) workload: Optional[int] = None # properties of CU Nodes @@ -1008,10 +1008,12 @@ def node_at(self, node_id: NodeID) -> Node: NodeT = TypeVar("NodeT", bound=Node) @overload - def all_nodes(self) -> List[Node]: ... + def all_nodes(self) -> List[Node]: + ... @overload - def all_nodes(self, type: Union[Type[NodeT], Tuple[Type[NodeT], ...]]) -> List[NodeT]: ... + def all_nodes(self, type: Union[Type[NodeT], Tuple[Type[NodeT], ...]]) -> List[NodeT]: + ... def all_nodes(self, type: Any = Node) -> List[NodeT]: """List of all nodes of specified type @@ -1054,10 +1056,12 @@ def in_edges( return [t for t in self.g.in_edges(node_id, data="data") if t[2].etype == etype] @overload - def subtree_of_type(self, root: Node) -> List[Node]: ... + def subtree_of_type(self, root: Node) -> List[Node]: + ... @overload - def subtree_of_type(self, root: Node, type: Union[Type[NodeT], Tuple[Type[NodeT], ...]]) -> List[NodeT]: ... + def subtree_of_type(self, root: Node, type: Union[Type[NodeT], Tuple[Type[NodeT], ...]]) -> List[NodeT]: + ... def subtree_of_type(self, root: Node, type: Any = Node) -> List[NodeT]: """Gets all nodes in subtree of specified type including root @@ -1069,12 +1073,14 @@ def subtree_of_type(self, root: Node, type: Any = Node) -> List[NodeT]: return self.subtree_of_type_rec(root, set(), type) @overload - def subtree_of_type_rec(self, root: Node, visited: Set[Node]) -> List[Node]: ... + def subtree_of_type_rec(self, root: Node, visited: Set[Node]) -> List[Node]: + ... @overload def subtree_of_type_rec( self, root: Node, visited: Set[Node], type: Union[Type[NodeT], Tuple[Type[NodeT], ...]] - ) -> List[NodeT]: ... + ) -> List[NodeT]: + ... def subtree_of_type_rec(self, root: Node, visited: Set[Node], type: Any = Node) -> List[NodeT]: """recursive helper function for subtree_of_type""" @@ -1308,12 +1314,14 @@ def get_variables(self, nodes: Sequence[Node]) -> Dict[Variable, Set[MemoryRegio for v in node.global_vars: if v not in res: res[v] = set() + out_data_edges = self.out_edges(node.id, EdgeType.DATA) + in_data_edges = self.in_edges(node.id, EdgeType.DATA) # try to identify memory regions for var_name in res: # since the variable name is checked for equality afterwards, # it is safe to consider incoming dependencies at this point as well. # Note that INIT type edges are considered as well! - for _, _, dep in self.out_edges(node.id, EdgeType.DATA) + self.in_edges(node.id, EdgeType.DATA): + for _, _, dep in out_data_edges + in_data_edges: if dep.var_name == var_name.name: if dep.memory_region is not None: res[var_name].add(dep.memory_region) From 00bc5816dbaa5226df57e3ea894f4f02c8c874c9 Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Tue, 30 Jul 2024 13:05:08 +0200 Subject: [PATCH 3/3] perf: buffer in_data_edges and out_data_edges --- discopop_explorer/PEGraphX.py | 120 +++++++++++++++++- .../pattern_detectors/reduction_detector.py | 27 +++- 2 files changed, 140 insertions(+), 7 deletions(-) diff --git a/discopop_explorer/PEGraphX.py b/discopop_explorer/PEGraphX.py index 461a99461..22c4cdca3 100644 --- a/discopop_explorer/PEGraphX.py +++ b/discopop_explorer/PEGraphX.py @@ -1316,13 +1316,68 @@ def get_variables(self, nodes: Sequence[Node]) -> Dict[Variable, Set[MemoryRegio res[v] = set() out_data_edges = self.out_edges(node.id, EdgeType.DATA) in_data_edges = self.in_edges(node.id, EdgeType.DATA) + + # split depdendencies by variable names + var_name_to_deps: Dict[Optional[str], List[Dependency]] = dict() + for _, _, dep in out_data_edges + in_data_edges: + if dep.var_name not in var_name_to_deps: + var_name_to_deps[dep.var_name] = [] + var_name_to_deps[dep.var_name].append(dep) + + # try to identify memory regions + for var_name in res: + # since the variable name is checked for equality afterwards, + # it is safe to consider incoming dependencies at this point as well. + # Note that INIT type edges are considered as well! + # for _, _, dep in out_data_edges + in_data_edges: + # if dep.var_name == var_name.name: + # if dep.memory_region is not None: + # res[var_name].add(dep.memory_region) + if var_name.name in var_name_to_deps: + for dep in var_name_to_deps[var_name.name]: + if dep.memory_region is not None: + res[var_name].add(dep.memory_region) + return res + + def get_variables_using_buffered_dependencies( + self, + nodes: Sequence[Node], + in_data_dependencies: Dict[NodeID, List[Tuple[NodeID, NodeID, Dependency]]], + out_data_dependencies: Dict[NodeID, List[Tuple[NodeID, NodeID, Dependency]]], + ) -> Dict[Variable, Set[MemoryRegion]]: + """Gets all variables and corresponding memory regions in nodes + + :param nodes: nodes + :return: Set of variables + """ + res: Dict[Variable, Set[MemoryRegion]] = dict() + for node in nodes: + if isinstance(node, CUNode): + for v in node.local_vars: + if v not in res: + res[v] = set() + for v in node.global_vars: + if v not in res: + res[v] = set() + + # split depdendencies by variable names + var_name_to_deps: Dict[Optional[str], List[Dependency]] = dict() + for _, _, dep in out_data_dependencies[node.id] + in_data_dependencies[node.id]: + if dep.var_name not in var_name_to_deps: + var_name_to_deps[dep.var_name] = [] + var_name_to_deps[dep.var_name].append(dep) + # try to identify memory regions for var_name in res: # since the variable name is checked for equality afterwards, # it is safe to consider incoming dependencies at this point as well. # Note that INIT type edges are considered as well! - for _, _, dep in out_data_edges + in_data_edges: - if dep.var_name == var_name.name: + # for _, _, dep in out_data_dependencies[node.id] + in_data_dependencies[node.id]: + # if dep.var_name == var_name.name: + # if dep.memory_region is not None: + # res[var_name].add(dep.memory_region) + if var_name.name in var_name_to_deps: + for dep in var_name_to_deps[var_name.name]: if dep.memory_region is not None: res[var_name].add(dep.memory_region) return res @@ -1427,6 +1482,33 @@ def is_loop_index(self, var_name: Optional[str], loops_start_lines: List[LineID] return False + def is_loop_index_using_buffered_dependencies( + self, + var_name: Optional[str], + loops_start_lines: List[LineID], + children: Sequence[Node], + out_data_dependencies: Dict[NodeID, List[Tuple[NodeID, NodeID, Dependency]]], + ) -> bool: + """Checks, whether the variable is a loop index. + + :param var_name: name of the variable + :param loops_start_lines: start lines of the loops + :param children: children nodes of the loops + :return: true if edge represents loop index + """ + + # If there is a raw dependency for var, the source cu is part of the loop + # and the dependency occurs in loop header, then var is loop index+ + + for c in children: + for t, d in [ + (t, d) for s, t, d in out_data_dependencies[c.id] if d.dtype == DepType.RAW and d.var_name == var_name + ]: + if d.sink_line == d.source_line and d.source_line in loops_start_lines and self.node_at(t) in children: + return True + + return False + def is_readonly_inside_loop_body( self, dep: Dependency, @@ -1461,6 +1543,40 @@ def is_readonly_inside_loop_body( return False return True + def is_readonly_inside_loop_body_using_buffered_dependencies( + self, + dep: Dependency, + root_loop: Node, + children_cus: Sequence[Node], + children_loops: Sequence[Node], + in_data_dependencies: Dict[NodeID, List[Tuple[NodeID, NodeID, Dependency]]], + out_data_dependencies: Dict[NodeID, List[Tuple[NodeID, NodeID, Dependency]]], + loops_start_lines: Optional[List[LineID]] = None, + ) -> bool: + """Checks, whether a variable is read-only in loop body + + :param dep: dependency variable + :param root_loop: root loop + :return: true if variable is read-only in loop body + """ + if loops_start_lines is None: + loops_start_lines = [v.start_position() for v in children_loops] + + for v in children_cus: + for t, d in [ + (t, d) for s, t, d in out_data_dependencies[v.id] if d.dtype == DepType.WAR or d.dtype == DepType.WAW + ]: + # If there is a waw dependency for var, then var is written in loop + # (sink is always inside loop for waw/war) + if dep.memory_region == d.memory_region and not (d.sink_line in loops_start_lines): + return False + for t, d in [(t, d) for s, t, d in in_data_dependencies[v.id] if d.dtype == DepType.RAW]: + # If there is a reverse raw dependency for var, then var is written in loop + # (source is always inside loop for reverse raw) + if dep.memory_region == d.memory_region and not (d.source_line in loops_start_lines): + return False + return True + def get_parent_function(self, node: Node) -> FunctionNode: """Finds the parent of a node diff --git a/discopop_explorer/pattern_detectors/reduction_detector.py b/discopop_explorer/pattern_detectors/reduction_detector.py index 2229e965e..f96557e5f 100644 --- a/discopop_explorer/pattern_detectors/reduction_detector.py +++ b/discopop_explorer/pattern_detectors/reduction_detector.py @@ -156,9 +156,18 @@ def __detect_reduction(pet: PEGraphX, root: LoopNode) -> bool: parent_function_lineid = pet.get_parent_function(root).start_position() called_functions_lineids = __get_called_functions(pet, root) + # get in and out data dependencies for CUs in loop + in_data_dependencies: Dict[NodeID, List[Tuple[NodeID, NodeID, Dependency]]] = dict() + out_data_dependencies: Dict[NodeID, List[Tuple[NodeID, NodeID, Dependency]]] = dict() + for rc_cu in root_children_cus: + in_data_dependencies[rc_cu.id] = pet.in_edges(rc_cu.id, EdgeType.DATA) + out_data_dependencies[rc_cu.id] = pet.out_edges(rc_cu.id, EdgeType.DATA) + # get variables which are defined inside the loop defined_inside_loop: List[Tuple[Variable, Set[MemoryRegion]]] = [] - tmp_loop_variables = pet.get_variables(root_children_cus) + tmp_loop_variables = pet.get_variables_using_buffered_dependencies( + root_children_cus, in_data_dependencies, out_data_dependencies + ) for var in tmp_loop_variables: if ":" in var.defLine: file_id = int(var.defLine.split(":")[0]) @@ -181,6 +190,8 @@ def __detect_reduction(pet: PEGraphX, root: LoopNode) -> bool: parent_loops, parent_function_lineid, called_functions_lineids, + in_data_dependencies, + out_data_dependencies, ): return False @@ -206,6 +217,8 @@ def __check_loop_dependencies( parent_loops: List[LineID], parent_function_lineid: LineID, called_functions_lineids: List[LineID], + in_data_dependencies: Dict[NodeID, List[Tuple[NodeID, NodeID, Dependency]]], + out_data_dependencies: Dict[NodeID, List[Tuple[NodeID, NodeID, Dependency]]], ) -> bool: """Returns True, if dependencies between the respective subgraphs chave been found. Returns False otherwise, which results in the potential suggestion of a Reduction pattern.""" @@ -215,8 +228,8 @@ def __check_loop_dependencies( # get dependency edges between children nodes deps = set() for n in loop_children_ids: - deps.update([(s, t, d) for s, t, d in pet.in_edges(n, EdgeType.DATA) if s in loop_children_ids]) - deps.update([(s, t, d) for s, t, d in pet.out_edges(n, EdgeType.DATA) if t in loop_children_ids]) + deps.update([(s, t, d) for s, t, d in in_data_dependencies[n] if s in loop_children_ids]) + deps.update([(s, t, d) for s, t, d in out_data_dependencies[n] if t in loop_children_ids]) # get memory regions which are defined inside the loop memory_regions_defined_in_loop = set() @@ -225,18 +238,22 @@ def __check_loop_dependencies( for source, target, dep in deps: # check if targeted variable is readonly inside loop - if pet.is_readonly_inside_loop_body( + if pet.is_readonly_inside_loop_body_using_buffered_dependencies( dep, root_loop, root_children_cus, root_children_loops, + in_data_dependencies, + out_data_dependencies, loops_start_lines=loop_start_lines, ): # variable is readonly -> no problem continue # check if targeted variable is loop index - if pet.is_loop_index(dep.var_name, loop_start_lines, root_children_cus): + if pet.is_loop_index_using_buffered_dependencies( + dep.var_name, loop_start_lines, root_children_cus, out_data_dependencies + ): continue # ignore dependencies where either source or sink do not lie within root_loop