From a9b2059d76bb82b183403ac4f5198936d16e11af Mon Sep 17 00:00:00 2001 From: Frank Schlimbach Date: Fri, 25 Jun 2021 07:56:40 -0500 Subject: [PATCH 01/22] first draft of a controller-worker wrapper for heat --- heat/cw4heat/__init__.py | 256 ++++++++++++++++++++++++++++++++ heat/cw4heat/arrayapi.py | 288 ++++++++++++++++++++++++++++++++++++ heat/cw4heat/distributor.py | 175 ++++++++++++++++++++++ 3 files changed, 719 insertions(+) create mode 100644 heat/cw4heat/__init__.py create mode 100644 heat/cw4heat/arrayapi.py create mode 100644 heat/cw4heat/distributor.py diff --git a/heat/cw4heat/__init__.py b/heat/cw4heat/__init__.py new file mode 100644 index 0000000000..28698df781 --- /dev/null +++ b/heat/cw4heat/__init__.py @@ -0,0 +1,256 @@ +# MIT License + +# Copyright (c) 2021 Intel Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +############################################################################### +# This provides a wrapper around SPMD-based HeAT +# (github.com/helmholtz-analytics/heat) to operate in controller-worker mode. + +# The goal is to provide a compliant implementation of the array API +# (github.com/data-apis/arra-api). + +# Returned array (DNDArray) objects are handles/futures only. Their content is +# available through __int__ etc., through __partitioned__ or heat(). Notice: this +# allows for delayed execution and optimizations of the workflow/task-graph and +# communication. + +# For a function/method of the array-API that is executed on the controller +# process, this wrapper generates the equivalent source code to be executed on +# the worker processes. The code is then sent to each remote worker and +# executed there. + +# It's up to the distribution layer (e.g. distributor) to make sure the code is +# executed in the right order on each process/worker so that collective +# communication in HeAT can operate correctly without dead-locks. + +# To allow workflow optimizations array dependences and to avoid +# pickle-dependencies to the array inputs we separate scalar/non-array arguments +# from array arguments. For this we assume that array arguments never occur +# after non-array arguments. Each function.task handles and passes array-typed +# and non-array-types arguments separately. +############################################################################### + +from . import distributor +from .arrayapi import ( + aa_attributes, + aa_tlfuncs, + aa_datatypes, + aa_constants, + aa_methods_s, + aa_methods_a, + aa_inplace_operators, + aa_reflected_operators, +) + +# just in case we find another SPMD/MPI implementation of numpy... +import heat as impl +from heat import DNDarray as dndarray +impl_str = "impl" +dndarray_str = "impl.DNDarray" + +def init(): + ''' + Initialize distribution engine. + For now we assume all ranks (controller and workers) are started through mpirun, + workers will never leave distributor.start() and so this function. + Call this as the very first thing in your program. For now it is recommended + to start your program with + + import heat.cw4heat as ht + ht.init() + + Also call fini() before exiting. + ''' + distributor.init() + distributor.start() + + +def fini(): + ''' + Finalize/shutdown distribution engine. + When called on controller, workers will sys.exit from init(). + ''' + distributor.fini() + + +class _Task: + 'A work item, executing functions provided as code.' + def __init__(self, func, args, kwargs, unwrap='*'): + self._func = func + self._args = args + self._kwargs = kwargs + self._unwrap = unwrap + + def run(self, deps): + if deps: + return eval(f"{self._func}({self._unwrap}deps, *self._args, **self._kwargs)") + else: + return eval(f"{self._func}(*self._args, **self._kwargs)") + + +class _PropertyTask: + 'A work item, executing class properties provided as code.' + def __init__(self, func): + self._func = func + + def run(self, deps): + return eval(f"deps[0].{self._func}") + + +def _submit(name, args, kwargs, unwrap='*'): + ''' + Create a _Task and submit, return PManager/Future. + ''' + scalar_args = tuple(x for x in args if not isinstance(x, DDParray)) + deps = [x._handle.getId() for x in args if isinstance(x, DDParray)] + return distributor.submitPP(_Task(name, scalar_args, kwargs, unwrap=unwrap), deps) + + +def _submitProperty(name, self): + ''' + Create a _PropertyTask (property) and submit, return PManager/Future. + ''' + t = _PropertyTask(name) + try: + res = distributor.submitPP(t, [self._handle.getId()]) + except: + assert False + return res + + +# setitem has scalar arg key before array arg value +# we need to provide a function accepting the inverse order +def _setitem_normalized(self, value, key): + self.__setitem__(key, value) + + +####################################################################### +# Our array is just a wrapper. Actual array is stored as a handle to +# allow delayed execution. +####################################################################### +class DDParray: + ''' + Shallow wrapper class representing a distributed array. + It will be filled dynamically from lists extracted from the array-API. + All functionality is delegated to the underlying implementation, + executed in tasks. + ''' + + ####################################################################### + # first define methods/properties which need special care. + ####################################################################### + + def __init__(self, handle): + 'Do not use this array. Use creator functions instead.' + self._handle = handle + + def heat(self): + ''' + Return heat native array. + With delayed execution, triggers computation as needed and blocks until array is available. + ''' + return self._handle.get() + + def __getitem__(self, key): + 'Return item/slice as array.' + return DDParray(_submit(f'{dndarray_str}.__getitem__', (self, key), {})) + + # bring args in the order we can process and feed into normal process + # using global normalized version + def __setitem__(self, key, value): + 'set item/slice to given value' + _submit(f'_setitem_normalized', (self, value, key), {}) + + @property + def T(self): + return DDParray(_submitProperty('T', self)) + + + ####################################################################### + # Now we add methods/properties through the standard process. + ####################################################################### + + # dynamically generate class methods from list of methods in array-API + # we simply make lambdas which submit appropriate Tasks + # FIXME: aa_inplace_operators,others? + fixme_afuncs = ['squeeze', 'astype', 'balance',] + for method in aa_methods_a + aa_reflected_operators + fixme_afuncs: + if method not in ['__getitem__', '__setitem__'] and hasattr(dndarray, method): + exec(f"{method} = lambda self, *args, **kwargs: DDParray(_submit('{dndarray_str}.{method}', (self, *args), kwargs))") + + for method in aa_methods_s: + if hasattr(dndarray, method): + exec(f"{method} = lambda self, *args, **kwargs: _submit('{dndarray_str}.{method}', (self, *args), kwargs).get()") + + for attr in aa_attributes: + if attr != 'T' and hasattr(dndarray, attr): + exec(f"{attr} = property(lambda self: self._handle.get().{attr})") + + def __getattr__(self, attr): + # attributes are special + if not attr in aa_attributes: + raise Exception(f"unknown method/attribute {attr} requested") + + +####################################################################### +# first define top-level functions which need special care. +####################################################################### + +# np.concatenate accepts a list of arrays (not individual arrays) +# so we let the task not unwrap the list of deps +def concatenate(*args, **kwargs): + return DDParray(_submit(f'{impl_str}.concatenate', *args, kwargs, unwrap='')) + + +####################################################################### +# first define top-level functions through the standard process. +####################################################################### +# - creating arrays +# - elementswise operations +# - statistical operations +# (lists taken from list of methods in array-API) +# Again, we simply make lambdas which submit appropriate Tasks + +fixme_funcs = ['load_csv'] +for func in aa_tlfuncs + fixme_funcs: + exec(f"{func} = lambda *args, **kwargs: DDParray(_submit('{impl_str}.{func}', args, kwargs))") + + +def concatenate(*args, **kwargs): + return DDParray(_submit(f'{impl_str}.concatenate', *args, kwargs, unwrap='')) + + +# Here we data types and constants +for attr in aa_datatypes + aa_constants: + if hasattr(impl, attr): + exec(f"{attr} = {impl_str}.{attr}") + else: + print(f"{impl.__name__} has no {attr}") + + +####################################################################### +# quick hack to provide random features +####################################################################### +class random: + for method, obj in impl.random.__dict__.items(): + if callable(obj): + exec(f"{method} = staticmethod(lambda *args, **kwargs: DDParray(_submit('{impl_str}.random.{method}', args, kwargs)))") diff --git a/heat/cw4heat/arrayapi.py b/heat/cw4heat/arrayapi.py new file mode 100644 index 0000000000..aaf18a3177 --- /dev/null +++ b/heat/cw4heat/arrayapi.py @@ -0,0 +1,288 @@ +__all__ = ['aa_creators', 'aa_attributes', 'aa_methods', 'aa_elementwises', 'aa_statisticals', + 'aa_inplace_operators', 'aa_reflected_operators', 'aa_datatypes', 'aa_datatype_functions', + 'aa_searching', 'aa_sorting', 'aa_set', 'aa_utility', 'aa_constants', + 'aa_arraydir', 'aa_tldir', 'aa_tlfuncs', 'aa_arrayfuncs', 'aa_methods_s', 'aa_methods_a'] + +aa_creators = [ + 'arange', #(start, /, stop=None, step=1, *, dtype=None, device=None) + 'asarray', #(obj, /, *, dtype=None, device=None, copy=None) + 'empty', #(shape, *, dtype=None, device=None) + 'empty_like', #(x, /, *, dtype=None, device=None) + 'eye', #(n_rows, n_cols=None, /, *, k=0, dtype=None, device=None) + 'from_dlpack', #(x, /) + 'full', #(shape, fill_value, *, dtype=None, device=None) + 'full_like', #(x, /, fill_value, *, dtype=None, device=None) + 'linspace', #(start, stop, /, num, *, dtype=None, device=None, endpoint=True) + 'meshgrid', #(*arrays, indexing=’xy’) + 'ones', #(shape, *, dtype=None, device=None) + 'ones_like', #(x, /, *, dtype=None, device=None) + 'zeros', #(shape, *, dtype=None, device=None) + 'zeros_like', #(x, /, *, dtype=None, device=None) +] + +aa_attributes = [ + 'dtype', + 'device', + 'ndim', + 'shape', + 'size', + 'T', +] + +aa_inplace_operators = [ + '__iadd__', + '__isub__', + '__imul__', + '__itruediv__', + '__iflowdiv__', + '__ipow__', + '__imatmul__', + '__imod__', + '__iand__', + '__ior__', + '__ixor__', + '__ilshift__', + '__irshift__', +] + +aa_reflected_operators = [ + '__radd__', + '__rsub__', + '__rmul__', + '__rtruediv__', + '__rflowdiv__', + '__rpow__', + '__rmatmul__', + '__rmod__', + '__rand__', + '__ror__', + '__rxor__', + '__rlshift__', + '__rrshift__', +] + +aa_datatypes = [ + 'bool', + 'int8', + 'int16', + 'int32', + 'int64', + 'uint8', + 'uint16', + 'uint32', + 'uint64', + 'float32', + 'float64', +] + +aa_datatype_functions = [ + 'broadcast_arrays', #(*arrays) + 'broadcast_to', #(x, /, shape) + 'can_cast', #(from_, to, /) + 'finfo', #(type, /) + 'iinfo', #(type, /) + 'result_type', #(*arrays_and_dtypes) +] + +aa_methods = [ + '__abs__', #(self, /) + '__add__', #(self, other, /) + '__and__', #(self, other, /) + '__array_namespace__', #(self, /, *, api_version=None) + '__bool__', #(self, /) + '__dlpack__', #(self, /, *, stream=None) + '__dlpack_device__', #(self, /) + '__eq__', #(self, other, /) + '__float__', #(self, /) + '__floordiv__', #(self, other, /) + '__ge__', #(self, other, /) + '__getitem__', #(self, key, /) + '__gt__', #(self, other, /) + '__int__', #(self, /) + '__invert__', #(self, /) + '__le__', #(self, other, /) + '__len__', #(self, /) + '__lshift__', #(self, other, /) + '__lt__', #(self, other, /) + '__matmul__', #(self, other, /) + '__mod__', #(self, other, /) + '__mul__', #(self, other, /) + '__ne__', #(self, other, /) + '__neg__', #(self, /) + '__or__', #(self, other, /) + '__pos__', #(self, /) + '__pow__', #(self, other, /) + '__rshift__', #(self, other, /) + '__setitem__', #(self, key, value, /) + '__sub__', #(self, other, /) + '__truediv__', #(self, other, /) + '__xor__', #(self, other, /) +] + +aa_creators = [ + 'arange', #(start, /, stop=None, step=1, *, dtype=None, device=None) + 'asarray', #(obj, /, *, dtype=None, device=None, copy=None) + 'empty', #(shape, *, dtype=None, device=None) + 'empty_like', #(x, /, *, dtype=None, device=None) + 'eye', #(n_rows, n_cols=None, /, *, k=0, dtype=None, device=None) + 'from_dlpack', #(x, /) + 'full', #(shape, fill_value, *, dtype=None, device=None) + 'full_like', #(x, /, fill_value, *, dtype=None, device=None) + 'linspace', #(start, stop, /, num, *, dtype=None, device=None, endpoint=True) + 'meshgrid', #(*arrays, indexing=’xy’) + 'ones', #(shape, *, dtype=None, device=None) + 'ones_like', #(x, /, *, dtype=None, device=None) + 'zeros', #(shape, *, dtype=None, device=None) + 'zeros_like', #(x, /, *, dtype=None, device=None) +] + +aa_attributes = [ + 'dtype', + 'device', + 'ndim', + 'shape', + 'size', + 'T', +] + +aa_methods_a = [ + '__abs__', #(self, /) + '__add__', #(self, other, /) + '__floordiv__', #(self, other, /) + '__invert__', #(self, /) + '__lshift__', #(self, other, /) + '__matmul__', #(self, other, /) + '__mod__', #(self, other, /) + '__mul__', #(self, other, /) + '__neg__', #(self, /) + '__pos__', #(self, /) + '__pow__', #(self, other, /) + '__rshift__', #(self, other, /) + '__sub__', #(self, other, /) + '__truediv__', #(self, other, /) + '__getitem__', #(self, key, /) + '__setitem__', #(self, key, value, /) + '__eq__', #(self, other, /) + '__ge__', #(self, other, /) + '__gt__', #(self, other, /) + '__le__', #(self, other, /) + '__lt__', #(self, other, /) + '__ne__', #(self, other, /) + '__and__', #(self, other, /) + '__or__', #(self, other, /) + '__xor__', #(self, other, /) +] + +aa_methods_s = [ + '__array_namespace__', #(self, /, *, api_version=None) + '__bool__', #(self, /) + '__dlpack__', #(self, /, *, stream=None) + '__dlpack_device__', #(self, /) + '__float__', #(self, /) + '__int__', #(self, /) + '__len__', #(self, /) +] + +aa_methods = aa_methods_s + aa_methods_a + +aa_elementwises = [ + 'abs', #(x, /) + 'acos', #(x, /) + 'acosh', #(x, /) + 'add', #(x1, x2, /) + 'asin', #(x, /) + 'asinh', #(x, /) + 'atan', #(x, /) + 'atan2', #(x1, x2, /) + 'atanh', #(x, /) + 'bitwise_and', #(x1, x2, /) + 'bitwise_left_shift', #(x1, x2, /) + 'bitwise_invert', #(x, /) + 'bitwise_or', #(x1, x2, /) + 'bitwise_right_shift', #(x1, x2, /) + 'bitwise_xor', #(x1, x2, /) + 'ceil', #(x, /) + 'cos', #(x, /) + 'cosh', #(x, /) + 'divide', #(x1, x2, /) + 'equal', #(x1, x2, /) + 'exp', #(x, /) + 'expm1', #(x, /) + 'floor', #(x, /) + 'floor_divide', #(x1, x2, /) + 'greater', #(x1, x2, /) + 'greater_equal', #(x1, x2, /) + 'isfinite', #(x, /) + 'isinf', #(x, /) + 'isnan', #(x, /) + 'less', #(x1, x2, /) + 'less_equal', #(x1, x2, /) + 'log', #(x, /) + 'log1p', #(x, /) + 'log2', #(x, /) + 'log10', #(x, /) + 'logaddexp', #(x1, x2) + 'logical_and', #(x1, x2, /) + 'logical_not', #(x, /) + 'logical_or', #(x1, x2, /) + 'logical_xor', #(x1, x2, /) + 'multiply', #(x1, x2, /) + 'negative', #(x, /) + 'not_equal', #(x1, x2, /) + 'positive', #(x, /) + 'pow', #(x1, x2, /) + 'remainder', #(x1, x2, /) + 'round', #(x, /) + 'sign', #(x, /) + 'sin', #(x, /) + 'sinh', #(x, /) + 'square', #(x, /) + 'sqrt', #(x, /) + 'subtract', #(x1, x2, /) + 'tan', #(x, /) + 'tanh', #(x, /) + 'trunc', #(x, /) +] + +aa_statisticals = [ + 'max', #(x, /, *, axis=None, keepdims=False) + 'mean', #(x, /, *, axis=None, keepdims=False) + 'min', #(x, /, *, axis=None, keepdims=False) + 'prod', #(x, /, *, axis=None, keepdims=False) + 'std', #(x, /, *, axis=None, correction=0.0, keepdims=False) + 'sum', #(x, /, *, axis=None, keepdims=False) + 'var', #(x, /, *, axis=None, correction=0.0, keepdims=False) +] + +aa_searching = [ + 'argmax', + 'argmin', + 'nonzero', + 'where', +] + +aa_sorting = [ + 'argsort', + 'sort', +] + +aa_set = [ + 'unique', +] + +aa_utility = [ + 'all', + 'any', +] + +aa_constants = [ + 'e', + 'inf', + 'nan', + 'pi', +] + +aa_tlfuncs = aa_creators + aa_elementwises + aa_statisticals + aa_datatype_functions + aa_searching + aa_sorting + aa_set + aa_utility +aa_tldir = aa_tlfuncs + aa_datatypes + aa_constants +aa_arrayfuncs = aa_methods + aa_inplace_operators + aa_reflected_operators +aa_arraydir = aa_attributes + aa_arrayfuncs diff --git a/heat/cw4heat/distributor.py b/heat/cw4heat/distributor.py new file mode 100644 index 0000000000..5153c231cf --- /dev/null +++ b/heat/cw4heat/distributor.py @@ -0,0 +1,175 @@ +# MIT License + +# Copyright (c) 2021 Intel Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +############################################################################### +# Distribution engine. +# - schedules same tasks on all workers +# - handles dependences seperately +# This currently is a very simple eagerly executing machinery. +# We can make this better over time. A low hanging fruit seems might +# be to delay distribution until go() is called. This would allow aggregating +# multiple distribution messages into one. +# +# Dependent objects have a unique identifier, assigned when a handle to it is +# created. We assume that all workers execute handle-creation in identical order. +# Such dependences are assumed to be global entities, e.g. each worker holds +# a handle/reference to it (e.g. like a heat.DNDarray). The local handles +# exist on each, stored in a worker-local dictionary. Thsi allows identifying +# dependences through simple integers. +# +# Notice, mpi4py does not provide ibcast, so we cannot overlap. This makes the +# above aggregation particularly promising. Another option woujld be to write +# this in C/C++ and use ibcast. +############################################################################### + + +import sys +from mpi4py import MPI +_comm = MPI.COMM_WORLD + +# define identifiers +END = 0 +TASK = 1 +GO = 2 + + +def init(): + 'Init distributor' + pass + + +def start(): + ''' + Start distribution engine. + Controller inits and returns. + Workers enter recv-loop and exit program when fini si called. + ''' + if _comm.rank != 0: + done = False + header = None + rtask = None + while(not done): + # wait in bcast for work + header = _comm.bcast(header, 0) + # then see what we need to do + if header[0] == END: + done = True + break + elif header[0] == TASK: + header[1].submit() + elif header[0] == GO: + # no delayed execution for now -> nothing to do + pass + else: + raise Exception("Worker received unknown tag") + sys.exit() + + +def fini(): + 'Control sends end-tag. Workers will sys.exit' + header = [END] + header = _comm.bcast(header, 0) + + +def go(): + 'Trigger execution of all tasks that are still in flight' + header = [GO] + header = _comm.bcast(header, 0) + + +def submitPP(task, deps, in_order=True): + ''' + Submit a process-parallel task and return a handle/future. + ''' + rtask = _RemoteTask(task, deps) + header = [TASK, rtask] + _, rtask = _comm.bcast(header, 0) + return rtask.submit() + + +class Handle: + ''' + A future representing an object that will be available eventually. + get() will return None as long as the value is not available. + ''' + + # this defines the next free and globally unique identifier + _nextId = 1 + + def __init__(self): + ''' + Initialize handle. + We assume all workers create handles to objects in identical order. + This allows us to assign a simple integers as the unqique id. + ''' + self._obj = None + self._id = Handle._nextId + Handle._nextId += 1 + + def set(self, obj): + 'Make object available.' + self._obj = obj + + def getId(self): + 'Return future/handle id' + return self._id + + def get(self): + 'Return object or None' + return self._obj + + +class _RemoteTask: + ''' + A task which is executed remotely on a worker. + It accepts a task with a run-method that it will execute at some point. + It also accepts dependences explicitly and so allows to create + task-graphs etc. + + We keep a static dictionary mapping globally unique identifiers to dependent + global objects (like heat.DNDarrays). This keeps the objects alive and allows + communicating through simple integers. + ''' + + def __init__(self, task, deps, inorder=True): + self._depIds = deps + self._task = task + self._inorder = inorder + + # here we store objects that are input dependences to tasks + s_pms = {} + + def submit(self): + ''' + Submit task to local task scheduler. + For now we execute eagerly, this is much simpler to implement. + Later, we might consider lazy evaluation, task-graph-optimizations etc. + FIXME: We currently assign a new id and store the result even when there is no result + or the result is not a global object. + ''' + deps = [_RemoteTask.s_pms[i] for i in self._depIds] + res = self._task.run(deps) + hndl = Handle() + hndl.set(res) + _RemoteTask.s_pms[hndl.getId()] = res + return hndl From 634398815f3b262f87ab2e659cf1b706b2639848 Mon Sep 17 00:00:00 2001 From: Frank Schlimbach Date: Fri, 25 Jun 2021 10:13:48 -0500 Subject: [PATCH 02/22] auto-init and auto-fini --- heat/cw4heat/__init__.py | 18 +++++++++--------- heat/cw4heat/distributor.py | 5 +++-- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/heat/cw4heat/__init__.py b/heat/cw4heat/__init__.py index 28698df781..1057e2438a 100644 --- a/heat/cw4heat/__init__.py +++ b/heat/cw4heat/__init__.py @@ -49,6 +49,7 @@ # and non-array-types arguments separately. ############################################################################### +import atexit from . import distributor from .arrayapi import ( aa_attributes, @@ -69,16 +70,9 @@ def init(): ''' - Initialize distribution engine. + Initialize distribution engine. Automatically when when importing cw4heat. For now we assume all ranks (controller and workers) are started through mpirun, workers will never leave distributor.start() and so this function. - Call this as the very first thing in your program. For now it is recommended - to start your program with - - import heat.cw4heat as ht - ht.init() - - Also call fini() before exiting. ''' distributor.init() distributor.start() @@ -86,7 +80,7 @@ def init(): def fini(): ''' - Finalize/shutdown distribution engine. + Finalize/shutdown distribution engine. Automatically called at exit. When called on controller, workers will sys.exit from init(). ''' distributor.fini() @@ -254,3 +248,9 @@ class random: for method, obj in impl.random.__dict__.items(): if callable(obj): exec(f"{method} = staticmethod(lambda *args, **kwargs: DDParray(_submit('{impl_str}.random.{method}', args, kwargs)))") + + +####################################################################### +####################################################################### +atexit.register(fini) +init() diff --git a/heat/cw4heat/distributor.py b/heat/cw4heat/distributor.py index 5153c231cf..8a1e88f6b6 100644 --- a/heat/cw4heat/distributor.py +++ b/heat/cw4heat/distributor.py @@ -87,8 +87,9 @@ def start(): def fini(): 'Control sends end-tag. Workers will sys.exit' - header = [END] - header = _comm.bcast(header, 0) + if _comm.rank == 0: + header = [END] + header = _comm.bcast(header, 0) def go(): From 7c780ccdd445f2577f67415cb1efe92277b07535 Mon Sep 17 00:00:00 2001 From: Frank Schlimbach Date: Mon, 5 Jul 2021 03:38:13 -0500 Subject: [PATCH 03/22] using dealyed execution; using double-quotes --- heat/cw4heat/__init__.py | 65 +++--- heat/cw4heat/arrayapi.py | 454 ++++++++++++++++++------------------ heat/cw4heat/distributor.py | 149 ++++++++---- 3 files changed, 365 insertions(+), 303 deletions(-) diff --git a/heat/cw4heat/__init__.py b/heat/cw4heat/__init__.py index 1057e2438a..3b434dcb8c 100644 --- a/heat/cw4heat/__init__.py +++ b/heat/cw4heat/__init__.py @@ -69,26 +69,26 @@ dndarray_str = "impl.DNDarray" def init(): - ''' + """ Initialize distribution engine. Automatically when when importing cw4heat. For now we assume all ranks (controller and workers) are started through mpirun, workers will never leave distributor.start() and so this function. - ''' + """ distributor.init() distributor.start() def fini(): - ''' + """ Finalize/shutdown distribution engine. Automatically called at exit. When called on controller, workers will sys.exit from init(). - ''' + """ distributor.fini() class _Task: - 'A work item, executing functions provided as code.' - def __init__(self, func, args, kwargs, unwrap='*'): + "A work item, executing functions provided as code." + def __init__(self, func, args, kwargs, unwrap="*"): self._func = func self._args = args self._kwargs = kwargs @@ -102,7 +102,7 @@ def run(self, deps): class _PropertyTask: - 'A work item, executing class properties provided as code.' + "A work item, executing class properties provided as code." def __init__(self, func): self._func = func @@ -110,19 +110,19 @@ def run(self, deps): return eval(f"deps[0].{self._func}") -def _submit(name, args, kwargs, unwrap='*'): - ''' +def _submit(name, args, kwargs, unwrap="*", numout=1): + """ Create a _Task and submit, return PManager/Future. - ''' + """ scalar_args = tuple(x for x in args if not isinstance(x, DDParray)) deps = [x._handle.getId() for x in args if isinstance(x, DDParray)] - return distributor.submitPP(_Task(name, scalar_args, kwargs, unwrap=unwrap), deps) + return distributor.submitPP(_Task(name, scalar_args, kwargs, unwrap=unwrap), deps, numout) def _submitProperty(name, self): - ''' + """ Create a _PropertyTask (property) and submit, return PManager/Future. - ''' + """ t = _PropertyTask(name) try: res = distributor.submitPP(t, [self._handle.getId()]) @@ -142,41 +142,41 @@ def _setitem_normalized(self, value, key): # allow delayed execution. ####################################################################### class DDParray: - ''' + """ Shallow wrapper class representing a distributed array. It will be filled dynamically from lists extracted from the array-API. All functionality is delegated to the underlying implementation, executed in tasks. - ''' + """ ####################################################################### # first define methods/properties which need special care. ####################################################################### def __init__(self, handle): - 'Do not use this array. Use creator functions instead.' + "Do not use this array. Use creator functions instead." self._handle = handle def heat(self): - ''' + """ Return heat native array. With delayed execution, triggers computation as needed and blocks until array is available. - ''' + """ return self._handle.get() def __getitem__(self, key): - 'Return item/slice as array.' - return DDParray(_submit(f'{dndarray_str}.__getitem__', (self, key), {})) + "Return item/slice as array." + return DDParray(_submit(f"{dndarray_str}.__getitem__", (self, key), {})) # bring args in the order we can process and feed into normal process # using global normalized version def __setitem__(self, key, value): - 'set item/slice to given value' - _submit(f'_setitem_normalized', (self, value, key), {}) + "set item/slice to given value" + _submit(f"_setitem_normalized", (self, value, key), {}) @property def T(self): - return DDParray(_submitProperty('T', self)) + return DDParray(_submitProperty("T", self)) ####################################################################### @@ -186,9 +186,9 @@ def T(self): # dynamically generate class methods from list of methods in array-API # we simply make lambdas which submit appropriate Tasks # FIXME: aa_inplace_operators,others? - fixme_afuncs = ['squeeze', 'astype', 'balance',] + fixme_afuncs = ["squeeze", "astype", "balance", "resplit",] for method in aa_methods_a + aa_reflected_operators + fixme_afuncs: - if method not in ['__getitem__', '__setitem__'] and hasattr(dndarray, method): + if method not in ["__getitem__", "__setitem__"] and hasattr(dndarray, method): exec(f"{method} = lambda self, *args, **kwargs: DDParray(_submit('{dndarray_str}.{method}', (self, *args), kwargs))") for method in aa_methods_s: @@ -196,7 +196,7 @@ def T(self): exec(f"{method} = lambda self, *args, **kwargs: _submit('{dndarray_str}.{method}', (self, *args), kwargs).get()") for attr in aa_attributes: - if attr != 'T' and hasattr(dndarray, attr): + if attr != "T" and hasattr(dndarray, attr): exec(f"{attr} = property(lambda self: self._handle.get().{attr})") def __getattr__(self, attr): @@ -212,7 +212,7 @@ def __getattr__(self, attr): # np.concatenate accepts a list of arrays (not individual arrays) # so we let the task not unwrap the list of deps def concatenate(*args, **kwargs): - return DDParray(_submit(f'{impl_str}.concatenate', *args, kwargs, unwrap='')) + return DDParray(_submit(f"{impl_str}.concatenate", *args, kwargs, unwrap="")) ####################################################################### @@ -224,13 +224,16 @@ def concatenate(*args, **kwargs): # (lists taken from list of methods in array-API) # Again, we simply make lambdas which submit appropriate Tasks -fixme_funcs = ['load_csv'] +fixme_funcs = ["load_csv", "array", "triu"] for func in aa_tlfuncs + fixme_funcs: - exec(f"{func} = lambda *args, **kwargs: DDParray(_submit('{impl_str}.{func}', args, kwargs))") + if func == "meshgrid": + exec(f"{func} = lambda *args, **kwargs: list(DDParray(x) for x in _submit('{impl_str}.{func}', args, kwargs, numout=len(args)))") + else: + exec(f"{func} = lambda *args, **kwargs: DDParray(_submit('{impl_str}.{func}', args, kwargs))") -def concatenate(*args, **kwargs): - return DDParray(_submit(f'{impl_str}.concatenate', *args, kwargs, unwrap='')) +for func in ["concatenate", "hstack",]: + exec(f"{func} = lambda *args, **kwargs: DDParray(_submit(f'{impl_str}.{func}', *args, kwargs, unwrap=''))") # Here we data types and constants diff --git a/heat/cw4heat/arrayapi.py b/heat/cw4heat/arrayapi.py index aaf18a3177..5203e9e4c8 100644 --- a/heat/cw4heat/arrayapi.py +++ b/heat/cw4heat/arrayapi.py @@ -1,285 +1,285 @@ -__all__ = ['aa_creators', 'aa_attributes', 'aa_methods', 'aa_elementwises', 'aa_statisticals', - 'aa_inplace_operators', 'aa_reflected_operators', 'aa_datatypes', 'aa_datatype_functions', - 'aa_searching', 'aa_sorting', 'aa_set', 'aa_utility', 'aa_constants', - 'aa_arraydir', 'aa_tldir', 'aa_tlfuncs', 'aa_arrayfuncs', 'aa_methods_s', 'aa_methods_a'] +__all__ = ["aa_creators", "aa_attributes", "aa_methods", "aa_elementwises", "aa_statisticals", + "aa_inplace_operators", "aa_reflected_operators", "aa_datatypes", "aa_datatype_functions", + "aa_searching", "aa_sorting", "aa_set", "aa_utility", "aa_constants", + "aa_arraydir", "aa_tldir", "aa_tlfuncs", "aa_arrayfuncs", "aa_methods_s", "aa_methods_a"] aa_creators = [ - 'arange', #(start, /, stop=None, step=1, *, dtype=None, device=None) - 'asarray', #(obj, /, *, dtype=None, device=None, copy=None) - 'empty', #(shape, *, dtype=None, device=None) - 'empty_like', #(x, /, *, dtype=None, device=None) - 'eye', #(n_rows, n_cols=None, /, *, k=0, dtype=None, device=None) - 'from_dlpack', #(x, /) - 'full', #(shape, fill_value, *, dtype=None, device=None) - 'full_like', #(x, /, fill_value, *, dtype=None, device=None) - 'linspace', #(start, stop, /, num, *, dtype=None, device=None, endpoint=True) - 'meshgrid', #(*arrays, indexing=’xy’) - 'ones', #(shape, *, dtype=None, device=None) - 'ones_like', #(x, /, *, dtype=None, device=None) - 'zeros', #(shape, *, dtype=None, device=None) - 'zeros_like', #(x, /, *, dtype=None, device=None) + "arange", #(start, /, stop=None, step=1, *, dtype=None, device=None) + "asarray", #(obj, /, *, dtype=None, device=None, copy=None) + "empty", #(shape, *, dtype=None, device=None) + "empty_like", #(x, /, *, dtype=None, device=None) + "eye", #(n_rows, n_cols=None, /, *, k=0, dtype=None, device=None) + "from_dlpack", #(x, /) + "full", #(shape, fill_value, *, dtype=None, device=None) + "full_like", #(x, /, fill_value, *, dtype=None, device=None) + "linspace", #(start, stop, /, num, *, dtype=None, device=None, endpoint=True) + "meshgrid", #(*arrays, indexing=’xy’) + "ones", #(shape, *, dtype=None, device=None) + "ones_like", #(x, /, *, dtype=None, device=None) + "zeros", #(shape, *, dtype=None, device=None) + "zeros_like", #(x, /, *, dtype=None, device=None) ] aa_attributes = [ - 'dtype', - 'device', - 'ndim', - 'shape', - 'size', - 'T', + "dtype", + "device", + "ndim", + "shape", + "size", + "T", ] aa_inplace_operators = [ - '__iadd__', - '__isub__', - '__imul__', - '__itruediv__', - '__iflowdiv__', - '__ipow__', - '__imatmul__', - '__imod__', - '__iand__', - '__ior__', - '__ixor__', - '__ilshift__', - '__irshift__', + "__iadd__", + "__isub__", + "__imul__", + "__itruediv__", + "__iflowdiv__", + "__ipow__", + "__imatmul__", + "__imod__", + "__iand__", + "__ior__", + "__ixor__", + "__ilshift__", + "__irshift__", ] aa_reflected_operators = [ - '__radd__', - '__rsub__', - '__rmul__', - '__rtruediv__', - '__rflowdiv__', - '__rpow__', - '__rmatmul__', - '__rmod__', - '__rand__', - '__ror__', - '__rxor__', - '__rlshift__', - '__rrshift__', + "__radd__", + "__rsub__", + "__rmul__", + "__rtruediv__", + "__rflowdiv__", + "__rpow__", + "__rmatmul__", + "__rmod__", + "__rand__", + "__ror__", + "__rxor__", + "__rlshift__", + "__rrshift__", ] aa_datatypes = [ - 'bool', - 'int8', - 'int16', - 'int32', - 'int64', - 'uint8', - 'uint16', - 'uint32', - 'uint64', - 'float32', - 'float64', + "bool", + "int8", + "int16", + "int32", + "int64", + "uint8", + "uint16", + "uint32", + "uint64", + "float32", + "float64", ] aa_datatype_functions = [ - 'broadcast_arrays', #(*arrays) - 'broadcast_to', #(x, /, shape) - 'can_cast', #(from_, to, /) - 'finfo', #(type, /) - 'iinfo', #(type, /) - 'result_type', #(*arrays_and_dtypes) + "broadcast_arrays", #(*arrays) + "broadcast_to", #(x, /, shape) + "can_cast", #(from_, to, /) + "finfo", #(type, /) + "iinfo", #(type, /) + "result_type", #(*arrays_and_dtypes) ] aa_methods = [ - '__abs__', #(self, /) - '__add__', #(self, other, /) - '__and__', #(self, other, /) - '__array_namespace__', #(self, /, *, api_version=None) - '__bool__', #(self, /) - '__dlpack__', #(self, /, *, stream=None) - '__dlpack_device__', #(self, /) - '__eq__', #(self, other, /) - '__float__', #(self, /) - '__floordiv__', #(self, other, /) - '__ge__', #(self, other, /) - '__getitem__', #(self, key, /) - '__gt__', #(self, other, /) - '__int__', #(self, /) - '__invert__', #(self, /) - '__le__', #(self, other, /) - '__len__', #(self, /) - '__lshift__', #(self, other, /) - '__lt__', #(self, other, /) - '__matmul__', #(self, other, /) - '__mod__', #(self, other, /) - '__mul__', #(self, other, /) - '__ne__', #(self, other, /) - '__neg__', #(self, /) - '__or__', #(self, other, /) - '__pos__', #(self, /) - '__pow__', #(self, other, /) - '__rshift__', #(self, other, /) - '__setitem__', #(self, key, value, /) - '__sub__', #(self, other, /) - '__truediv__', #(self, other, /) - '__xor__', #(self, other, /) + "__abs__", #(self, /) + "__add__", #(self, other, /) + "__and__", #(self, other, /) + "__array_namespace__", #(self, /, *, api_version=None) + "__bool__", #(self, /) + "__dlpack__", #(self, /, *, stream=None) + "__dlpack_device__", #(self, /) + "__eq__", #(self, other, /) + "__float__", #(self, /) + "__floordiv__", #(self, other, /) + "__ge__", #(self, other, /) + "__getitem__", #(self, key, /) + "__gt__", #(self, other, /) + "__int__", #(self, /) + "__invert__", #(self, /) + "__le__", #(self, other, /) + "__len__", #(self, /) + "__lshift__", #(self, other, /) + "__lt__", #(self, other, /) + "__matmul__", #(self, other, /) + "__mod__", #(self, other, /) + "__mul__", #(self, other, /) + "__ne__", #(self, other, /) + "__neg__", #(self, /) + "__or__", #(self, other, /) + "__pos__", #(self, /) + "__pow__", #(self, other, /) + "__rshift__", #(self, other, /) + "__setitem__", #(self, key, value, /) + "__sub__", #(self, other, /) + "__truediv__", #(self, other, /) + "__xor__", #(self, other, /) ] aa_creators = [ - 'arange', #(start, /, stop=None, step=1, *, dtype=None, device=None) - 'asarray', #(obj, /, *, dtype=None, device=None, copy=None) - 'empty', #(shape, *, dtype=None, device=None) - 'empty_like', #(x, /, *, dtype=None, device=None) - 'eye', #(n_rows, n_cols=None, /, *, k=0, dtype=None, device=None) - 'from_dlpack', #(x, /) - 'full', #(shape, fill_value, *, dtype=None, device=None) - 'full_like', #(x, /, fill_value, *, dtype=None, device=None) - 'linspace', #(start, stop, /, num, *, dtype=None, device=None, endpoint=True) - 'meshgrid', #(*arrays, indexing=’xy’) - 'ones', #(shape, *, dtype=None, device=None) - 'ones_like', #(x, /, *, dtype=None, device=None) - 'zeros', #(shape, *, dtype=None, device=None) - 'zeros_like', #(x, /, *, dtype=None, device=None) + "arange", #(start, /, stop=None, step=1, *, dtype=None, device=None) + "asarray", #(obj, /, *, dtype=None, device=None, copy=None) + "empty", #(shape, *, dtype=None, device=None) + "empty_like", #(x, /, *, dtype=None, device=None) + "eye", #(n_rows, n_cols=None, /, *, k=0, dtype=None, device=None) + "from_dlpack", #(x, /) + "full", #(shape, fill_value, *, dtype=None, device=None) + "full_like", #(x, /, fill_value, *, dtype=None, device=None) + "linspace", #(start, stop, /, num, *, dtype=None, device=None, endpoint=True) + "meshgrid", #(*arrays, indexing=’xy’) + "ones", #(shape, *, dtype=None, device=None) + "ones_like", #(x, /, *, dtype=None, device=None) + "zeros", #(shape, *, dtype=None, device=None) + "zeros_like", #(x, /, *, dtype=None, device=None) ] aa_attributes = [ - 'dtype', - 'device', - 'ndim', - 'shape', - 'size', - 'T', + "dtype", + "device", + "ndim", + "shape", + "size", + "T", ] aa_methods_a = [ - '__abs__', #(self, /) - '__add__', #(self, other, /) - '__floordiv__', #(self, other, /) - '__invert__', #(self, /) - '__lshift__', #(self, other, /) - '__matmul__', #(self, other, /) - '__mod__', #(self, other, /) - '__mul__', #(self, other, /) - '__neg__', #(self, /) - '__pos__', #(self, /) - '__pow__', #(self, other, /) - '__rshift__', #(self, other, /) - '__sub__', #(self, other, /) - '__truediv__', #(self, other, /) - '__getitem__', #(self, key, /) - '__setitem__', #(self, key, value, /) - '__eq__', #(self, other, /) - '__ge__', #(self, other, /) - '__gt__', #(self, other, /) - '__le__', #(self, other, /) - '__lt__', #(self, other, /) - '__ne__', #(self, other, /) - '__and__', #(self, other, /) - '__or__', #(self, other, /) - '__xor__', #(self, other, /) + "__abs__", #(self, /) + "__add__", #(self, other, /) + "__floordiv__", #(self, other, /) + "__invert__", #(self, /) + "__lshift__", #(self, other, /) + "__matmul__", #(self, other, /) + "__mod__", #(self, other, /) + "__mul__", #(self, other, /) + "__neg__", #(self, /) + "__pos__", #(self, /) + "__pow__", #(self, other, /) + "__rshift__", #(self, other, /) + "__sub__", #(self, other, /) + "__truediv__", #(self, other, /) + "__getitem__", #(self, key, /) + "__setitem__", #(self, key, value, /) + "__eq__", #(self, other, /) + "__ge__", #(self, other, /) + "__gt__", #(self, other, /) + "__le__", #(self, other, /) + "__lt__", #(self, other, /) + "__ne__", #(self, other, /) + "__and__", #(self, other, /) + "__or__", #(self, other, /) + "__xor__", #(self, other, /) ] aa_methods_s = [ - '__array_namespace__', #(self, /, *, api_version=None) - '__bool__', #(self, /) - '__dlpack__', #(self, /, *, stream=None) - '__dlpack_device__', #(self, /) - '__float__', #(self, /) - '__int__', #(self, /) - '__len__', #(self, /) + "__array_namespace__", #(self, /, *, api_version=None) + "__bool__", #(self, /) + "__dlpack__", #(self, /, *, stream=None) + "__dlpack_device__", #(self, /) + "__float__", #(self, /) + "__int__", #(self, /) + "__len__", #(self, /) ] aa_methods = aa_methods_s + aa_methods_a aa_elementwises = [ - 'abs', #(x, /) - 'acos', #(x, /) - 'acosh', #(x, /) - 'add', #(x1, x2, /) - 'asin', #(x, /) - 'asinh', #(x, /) - 'atan', #(x, /) - 'atan2', #(x1, x2, /) - 'atanh', #(x, /) - 'bitwise_and', #(x1, x2, /) - 'bitwise_left_shift', #(x1, x2, /) - 'bitwise_invert', #(x, /) - 'bitwise_or', #(x1, x2, /) - 'bitwise_right_shift', #(x1, x2, /) - 'bitwise_xor', #(x1, x2, /) - 'ceil', #(x, /) - 'cos', #(x, /) - 'cosh', #(x, /) - 'divide', #(x1, x2, /) - 'equal', #(x1, x2, /) - 'exp', #(x, /) - 'expm1', #(x, /) - 'floor', #(x, /) - 'floor_divide', #(x1, x2, /) - 'greater', #(x1, x2, /) - 'greater_equal', #(x1, x2, /) - 'isfinite', #(x, /) - 'isinf', #(x, /) - 'isnan', #(x, /) - 'less', #(x1, x2, /) - 'less_equal', #(x1, x2, /) - 'log', #(x, /) - 'log1p', #(x, /) - 'log2', #(x, /) - 'log10', #(x, /) - 'logaddexp', #(x1, x2) - 'logical_and', #(x1, x2, /) - 'logical_not', #(x, /) - 'logical_or', #(x1, x2, /) - 'logical_xor', #(x1, x2, /) - 'multiply', #(x1, x2, /) - 'negative', #(x, /) - 'not_equal', #(x1, x2, /) - 'positive', #(x, /) - 'pow', #(x1, x2, /) - 'remainder', #(x1, x2, /) - 'round', #(x, /) - 'sign', #(x, /) - 'sin', #(x, /) - 'sinh', #(x, /) - 'square', #(x, /) - 'sqrt', #(x, /) - 'subtract', #(x1, x2, /) - 'tan', #(x, /) - 'tanh', #(x, /) - 'trunc', #(x, /) + "abs", #(x, /) + "acos", #(x, /) + "acosh", #(x, /) + "add", #(x1, x2, /) + "asin", #(x, /) + "asinh", #(x, /) + "atan", #(x, /) + "atan2", #(x1, x2, /) + "atanh", #(x, /) + "bitwise_and", #(x1, x2, /) + "bitwise_left_shift", #(x1, x2, /) + "bitwise_invert", #(x, /) + "bitwise_or", #(x1, x2, /) + "bitwise_right_shift", #(x1, x2, /) + "bitwise_xor", #(x1, x2, /) + "ceil", #(x, /) + "cos", #(x, /) + "cosh", #(x, /) + "divide", #(x1, x2, /) + "equal", #(x1, x2, /) + "exp", #(x, /) + "expm1", #(x, /) + "floor", #(x, /) + "floor_divide", #(x1, x2, /) + "greater", #(x1, x2, /) + "greater_equal", #(x1, x2, /) + "isfinite", #(x, /) + "isinf", #(x, /) + "isnan", #(x, /) + "less", #(x1, x2, /) + "less_equal", #(x1, x2, /) + "log", #(x, /) + "log1p", #(x, /) + "log2", #(x, /) + "log10", #(x, /) + "logaddexp", #(x1, x2) + "logical_and", #(x1, x2, /) + "logical_not", #(x, /) + "logical_or", #(x1, x2, /) + "logical_xor", #(x1, x2, /) + "multiply", #(x1, x2, /) + "negative", #(x, /) + "not_equal", #(x1, x2, /) + "positive", #(x, /) + "pow", #(x1, x2, /) + "remainder", #(x1, x2, /) + "round", #(x, /) + "sign", #(x, /) + "sin", #(x, /) + "sinh", #(x, /) + "square", #(x, /) + "sqrt", #(x, /) + "subtract", #(x1, x2, /) + "tan", #(x, /) + "tanh", #(x, /) + "trunc", #(x, /) ] aa_statisticals = [ - 'max', #(x, /, *, axis=None, keepdims=False) - 'mean', #(x, /, *, axis=None, keepdims=False) - 'min', #(x, /, *, axis=None, keepdims=False) - 'prod', #(x, /, *, axis=None, keepdims=False) - 'std', #(x, /, *, axis=None, correction=0.0, keepdims=False) - 'sum', #(x, /, *, axis=None, keepdims=False) - 'var', #(x, /, *, axis=None, correction=0.0, keepdims=False) + "max", #(x, /, *, axis=None, keepdims=False) + "mean", #(x, /, *, axis=None, keepdims=False) + "min", #(x, /, *, axis=None, keepdims=False) + "prod", #(x, /, *, axis=None, keepdims=False) + "std", #(x, /, *, axis=None, correction=0.0, keepdims=False) + "sum", #(x, /, *, axis=None, keepdims=False) + "var", #(x, /, *, axis=None, correction=0.0, keepdims=False) ] aa_searching = [ - 'argmax', - 'argmin', - 'nonzero', - 'where', + "argmax", + "argmin", + "nonzero", + "where", ] aa_sorting = [ - 'argsort', - 'sort', + "argsort", + "sort", ] aa_set = [ - 'unique', + "unique", ] aa_utility = [ - 'all', - 'any', + "all", + "any", ] aa_constants = [ - 'e', - 'inf', - 'nan', - 'pi', + "e", + "inf", + "nan", + "pi", ] aa_tlfuncs = aa_creators + aa_elementwises + aa_statisticals + aa_datatype_functions + aa_searching + aa_sorting + aa_set + aa_utility diff --git a/heat/cw4heat/distributor.py b/heat/cw4heat/distributor.py index 8a1e88f6b6..54f1137b19 100644 --- a/heat/cw4heat/distributor.py +++ b/heat/cw4heat/distributor.py @@ -25,10 +25,17 @@ # Distribution engine. # - schedules same tasks on all workers # - handles dependences seperately -# This currently is a very simple eagerly executing machinery. -# We can make this better over time. A low hanging fruit seems might -# be to delay distribution until go() is called. This would allow aggregating -# multiple distribution messages into one. +# +# Whe tasks are submitted on root rank they are pushed on a queue and a +# handle/future is returned. When computation is requested by calling go() +# all tasks on the queue are sent to workers and executed on all ranks +# sequentially. +# +# We store tasks in the same order as they are submitted on the root rank. +# For any valid program this must be a legal ordering there is no need to check +# if dependent objects are ready when a task is executed. A more sophisticated +# scheduler could potentially try to parallelize. It remains to be invistigated +# if this would be a profitable feature, though. # # Dependent objects have a unique identifier, assigned when a handle to it is # created. We assume that all workers execute handle-creation in identical order. @@ -43,8 +50,10 @@ ############################################################################### -import sys from mpi4py import MPI +import sys +from collections import deque + _comm = MPI.COMM_WORLD # define identifiers @@ -53,17 +62,51 @@ GO = 2 +class _TaskQueue: + """ + A task queue, each rank holds one for queuing up local tasks. + We currently dissallow submitting tasks by on-root ranks. + Non-root ranks get their TaskQueue set in the recv-lop if init(). + """ + def __init__(self): + # here we store all tasks that have not been executed yet + self._taskQueue = deque() + + def submit(self, rtask): + """ + Sumbit a task to queue. Will not run it. + """ + assert _comm.rank == 0 + self._taskQueue.append(rtask) + return rtask._handle + + def go(self): + """ + Run all tasks in the queue. + We assume tasks were submitted in in a valid order, e.g. in an order + that guarntees no task is dependent on another task that is behind it in the queue. + """ + while len(self._taskQueue): + self._taskQueue.popleft().go() + + +# Our queue of tasks. +_tQueue = _TaskQueue() + + def init(): - 'Init distributor' + """ + Init distributor. + """ pass def start(): - ''' + """ Start distribution engine. Controller inits and returns. Workers enter recv-loop and exit program when fini si called. - ''' + """ if _comm.rank != 0: done = False header = None @@ -76,72 +119,79 @@ def start(): done = True break elif header[0] == TASK: - header[1].submit() + _tQueue._taskQueue = header[1] elif header[0] == GO: # no delayed execution for now -> nothing to do - pass + _tQueue.go() else: raise Exception("Worker received unknown tag") sys.exit() - + def fini(): - 'Control sends end-tag. Workers will sys.exit' + """ + Control sends end-tag. Workers will sys.exit. + """ if _comm.rank == 0: header = [END] header = _comm.bcast(header, 0) def go(): - 'Trigger execution of all tasks that are still in flight' + """ + Trigger execution of all tasks which are still in flight. + """ + assert _comm.rank == 0 + header = [TASK, _tQueue._taskQueue ] + _, _ = _comm.bcast(header, 0) header = [GO] - header = _comm.bcast(header, 0) + _ = _comm.bcast(header, 0) + _tQueue.go() -def submitPP(task, deps, in_order=True): - ''' +def submitPP(task, deps, numout=1): + """ Submit a process-parallel task and return a handle/future. - ''' - rtask = _RemoteTask(task, deps) - header = [TASK, rtask] - _, rtask = _comm.bcast(header, 0) - return rtask.submit() + """ + rtask = _RemoteTask(task, deps, numout) + return _tQueue.submit(rtask) class Handle: - ''' + """ A future representing an object that will be available eventually. get() will return None as long as the value is not available. - ''' + """ # this defines the next free and globally unique identifier _nextId = 1 def __init__(self): - ''' + """ Initialize handle. We assume all workers create handles to objects in identical order. This allows us to assign a simple integers as the unqique id. - ''' + """ self._obj = None self._id = Handle._nextId Handle._nextId += 1 def set(self, obj): - 'Make object available.' + "Make object available." self._obj = obj def getId(self): - 'Return future/handle id' + "Return future/handle id" return self._id def get(self): - 'Return object or None' + "Return object or None" + go() return self._obj - + class _RemoteTask: - ''' + """ A task which is executed remotely on a worker. It accepts a task with a run-method that it will execute at some point. It also accepts dependences explicitly and so allows to create @@ -150,27 +200,36 @@ class _RemoteTask: We keep a static dictionary mapping globally unique identifiers to dependent global objects (like heat.DNDarrays). This keeps the objects alive and allows communicating through simple integers. - ''' + """ - def __init__(self, task, deps, inorder=True): + def __init__(self, task, deps, numout): self._depIds = deps self._task = task - self._inorder = inorder + self._nOut = numout + # FIXME: We currently assign a new id and store the result even when there is no result + # or the result is not a global object. + if self._nOut == 1: + self._handle = Handle() + else: + self._handle = tuple(Handle() for _ in range(self._nOut)) + # here we store objects that are input dependences to tasks s_pms = {} - def submit(self): - ''' - Submit task to local task scheduler. - For now we execute eagerly, this is much simpler to implement. - Later, we might consider lazy evaluation, task-graph-optimizations etc. - FIXME: We currently assign a new id and store the result even when there is no result - or the result is not a global object. - ''' + def go(self): + """ + Actually run the task. + """ deps = [_RemoteTask.s_pms[i] for i in self._depIds] res = self._task.run(deps) - hndl = Handle() - hndl.set(res) - _RemoteTask.s_pms[hndl.getId()] = res - return hndl + if self._nOut == 1: + self._handle.set(res) + _RemoteTask.s_pms[self._handle.getId()] = res + else: + i = 0 + for h in self._handle: + h.set(res[i]) + _RemoteTask.s_pms[h.getId()] = res[i] + i += 1 + return self._handle From 04919dc714810e699622d6690d6ee91f7d43bc99 Mon Sep 17 00:00:00 2001 From: Frank Schlimbach Date: Mon, 5 Jul 2021 04:09:41 -0500 Subject: [PATCH 04/22] making code flake8-, black-, and pydocstyle-compliant --- heat/cw4heat/__init__.py | 121 +++++++----- heat/cw4heat/arrayapi.py | 366 ++++++++++++++++++++---------------- heat/cw4heat/distributor.py | 73 +++---- 3 files changed, 318 insertions(+), 242 deletions(-) diff --git a/heat/cw4heat/__init__.py b/heat/cw4heat/__init__.py index 3b434dcb8c..ba033b75dc 100644 --- a/heat/cw4heat/__init__.py +++ b/heat/cw4heat/__init__.py @@ -22,31 +22,33 @@ ############################################################################### -# This provides a wrapper around SPMD-based HeAT -# (github.com/helmholtz-analytics/heat) to operate in controller-worker mode. - -# The goal is to provide a compliant implementation of the array API -# (github.com/data-apis/arra-api). - -# Returned array (DNDArray) objects are handles/futures only. Their content is -# available through __int__ etc., through __partitioned__ or heat(). Notice: this -# allows for delayed execution and optimizations of the workflow/task-graph and -# communication. - -# For a function/method of the array-API that is executed on the controller -# process, this wrapper generates the equivalent source code to be executed on -# the worker processes. The code is then sent to each remote worker and -# executed there. - -# It's up to the distribution layer (e.g. distributor) to make sure the code is -# executed in the right order on each process/worker so that collective -# communication in HeAT can operate correctly without dead-locks. - -# To allow workflow optimizations array dependences and to avoid -# pickle-dependencies to the array inputs we separate scalar/non-array arguments -# from array arguments. For this we assume that array arguments never occur -# after non-array arguments. Each function.task handles and passes array-typed -# and non-array-types arguments separately. +""" +This provides a wrapper around SPMD-based HeAT +(github.com/helmholtz-analytics/heat) to operate in controller-worker mode. + +The goal is to provide a compliant implementation of the array API +(github.com/data-apis/arra-api). + +Returned array (DNDArray) objects are handles/futures only. Their content is +available through __int__ etc., through __partitioned__ or heat(). Notice: this +allows for delayed execution and optimizations of the workflow/task-graph and +communication. + +For a function/method of the array-API that is executed on the controller +process, this wrapper generates the equivalent source code to be executed on +the worker processes. The code is then sent to each remote worker and +executed there. + +It's up to the distribution layer (e.g. distributor) to make sure the code is +executed in the right order on each process/worker so that collective +communication in HeAT can operate correctly without dead-locks. + +To allow workflow optimizations array dependences and to avoid +pickle-dependencies to the array inputs we separate scalar/non-array arguments +from array arguments. For this we assume that array arguments never occur +after non-array arguments. Each function.task handles and passes array-typed +and non-array-types arguments separately. +""" ############################################################################### import atexit @@ -65,9 +67,11 @@ # just in case we find another SPMD/MPI implementation of numpy... import heat as impl from heat import DNDarray as dndarray + impl_str = "impl" dndarray_str = "impl.DNDarray" + def init(): """ Initialize distribution engine. Automatically when when importing cw4heat. @@ -87,7 +91,10 @@ def fini(): class _Task: - "A work item, executing functions provided as code." + """ + A work item, executing functions provided as code. + """ + def __init__(self, func, args, kwargs, unwrap="*"): self._func = func self._args = args @@ -102,7 +109,10 @@ def run(self, deps): class _PropertyTask: - "A work item, executing class properties provided as code." + """ + A work item, executing class properties provided as code. + """ + def __init__(self, func): self._func = func @@ -126,7 +136,7 @@ def _submitProperty(name, self): t = _PropertyTask(name) try: res = distributor.submitPP(t, [self._handle.getId()]) - except: + except Exception: assert False return res @@ -154,7 +164,9 @@ class DDParray: ####################################################################### def __init__(self, handle): - "Do not use this array. Use creator functions instead." + """ + Do not use this array. Use creator functions instead. + """ self._handle = handle def heat(self): @@ -165,20 +177,26 @@ def heat(self): return self._handle.get() def __getitem__(self, key): - "Return item/slice as array." + """ + Return item/slice as array. + """ return DDParray(_submit(f"{dndarray_str}.__getitem__", (self, key), {})) # bring args in the order we can process and feed into normal process # using global normalized version def __setitem__(self, key, value): - "set item/slice to given value" - _submit(f"_setitem_normalized", (self, value, key), {}) + """ + Set item/slice to given value. + """ + _submit("_setitem_normalized", (self, value, key), {}) @property def T(self): + """ + Transpose. + """ return DDParray(_submitProperty("T", self)) - ####################################################################### # Now we add methods/properties through the standard process. ####################################################################### @@ -186,14 +204,18 @@ def T(self): # dynamically generate class methods from list of methods in array-API # we simply make lambdas which submit appropriate Tasks # FIXME: aa_inplace_operators,others? - fixme_afuncs = ["squeeze", "astype", "balance", "resplit",] + fixme_afuncs = ["squeeze", "astype", "balance", "resplit"] for method in aa_methods_a + aa_reflected_operators + fixme_afuncs: if method not in ["__getitem__", "__setitem__"] and hasattr(dndarray, method): - exec(f"{method} = lambda self, *args, **kwargs: DDParray(_submit('{dndarray_str}.{method}', (self, *args), kwargs))") + exec( + f"{method} = lambda self, *args, **kwargs: DDParray(_submit('{dndarray_str}.{method}', (self, *args), kwargs))" + ) for method in aa_methods_s: if hasattr(dndarray, method): - exec(f"{method} = lambda self, *args, **kwargs: _submit('{dndarray_str}.{method}', (self, *args), kwargs).get()") + exec( + f"{method} = lambda self, *args, **kwargs: _submit('{dndarray_str}.{method}', (self, *args), kwargs).get()" + ) for attr in aa_attributes: if attr != "T" and hasattr(dndarray, attr): @@ -201,7 +223,7 @@ def T(self): def __getattr__(self, attr): # attributes are special - if not attr in aa_attributes: + if attr not in aa_attributes: raise Exception(f"unknown method/attribute {attr} requested") @@ -212,6 +234,9 @@ def __getattr__(self, attr): # np.concatenate accepts a list of arrays (not individual arrays) # so we let the task not unwrap the list of deps def concatenate(*args, **kwargs): + """ + Wrapper for impl.concatenate. + """ return DDParray(_submit(f"{impl_str}.concatenate", *args, kwargs, unwrap="")) @@ -227,13 +252,19 @@ def concatenate(*args, **kwargs): fixme_funcs = ["load_csv", "array", "triu"] for func in aa_tlfuncs + fixme_funcs: if func == "meshgrid": - exec(f"{func} = lambda *args, **kwargs: list(DDParray(x) for x in _submit('{impl_str}.{func}', args, kwargs, numout=len(args)))") + exec( + f"{func} = lambda *args, **kwargs: list(DDParray(x) for x in _submit('{impl_str}.{func}', args, kwargs, numout=len(args)))" + ) else: - exec(f"{func} = lambda *args, **kwargs: DDParray(_submit('{impl_str}.{func}', args, kwargs))") + exec( + f"{func} = lambda *args, **kwargs: DDParray(_submit('{impl_str}.{func}', args, kwargs))" + ) -for func in ["concatenate", "hstack",]: - exec(f"{func} = lambda *args, **kwargs: DDParray(_submit(f'{impl_str}.{func}', *args, kwargs, unwrap=''))") +for func in ["concatenate", "hstack"]: + exec( + f"{func} = lambda *args, **kwargs: DDParray(_submit(f'{impl_str}.{func}', *args, kwargs, unwrap=''))" + ) # Here we data types and constants @@ -248,9 +279,15 @@ def concatenate(*args, **kwargs): # quick hack to provide random features ####################################################################### class random: + """ + Wrapper class for random. + """ + for method, obj in impl.random.__dict__.items(): if callable(obj): - exec(f"{method} = staticmethod(lambda *args, **kwargs: DDParray(_submit('{impl_str}.random.{method}', args, kwargs)))") + exec( + f"{method} = staticmethod(lambda *args, **kwargs: DDParray(_submit('{impl_str}.random.{method}', args, kwargs)))" + ) ####################################################################### diff --git a/heat/cw4heat/arrayapi.py b/heat/cw4heat/arrayapi.py index 5203e9e4c8..d1ac6fc85b 100644 --- a/heat/cw4heat/arrayapi.py +++ b/heat/cw4heat/arrayapi.py @@ -1,23 +1,46 @@ -__all__ = ["aa_creators", "aa_attributes", "aa_methods", "aa_elementwises", "aa_statisticals", - "aa_inplace_operators", "aa_reflected_operators", "aa_datatypes", "aa_datatype_functions", - "aa_searching", "aa_sorting", "aa_set", "aa_utility", "aa_constants", - "aa_arraydir", "aa_tldir", "aa_tlfuncs", "aa_arrayfuncs", "aa_methods_s", "aa_methods_a"] +""" +Functions, data-types etc. defined by Array API. +See https://data-apis.org/array-api/latest +""" + +__all__ = [ + "aa_creators", + "aa_attributes", + "aa_methods", + "aa_elementwises", + "aa_statisticals", + "aa_inplace_operators", + "aa_reflected_operators", + "aa_datatypes", + "aa_datatype_functions", + "aa_searching", + "aa_sorting", + "aa_set", + "aa_utility", + "aa_constants", + "aa_arraydir", + "aa_tldir", + "aa_tlfuncs", + "aa_arrayfuncs", + "aa_methods_s", + "aa_methods_a", +] aa_creators = [ - "arange", #(start, /, stop=None, step=1, *, dtype=None, device=None) - "asarray", #(obj, /, *, dtype=None, device=None, copy=None) - "empty", #(shape, *, dtype=None, device=None) - "empty_like", #(x, /, *, dtype=None, device=None) - "eye", #(n_rows, n_cols=None, /, *, k=0, dtype=None, device=None) - "from_dlpack", #(x, /) - "full", #(shape, fill_value, *, dtype=None, device=None) - "full_like", #(x, /, fill_value, *, dtype=None, device=None) - "linspace", #(start, stop, /, num, *, dtype=None, device=None, endpoint=True) - "meshgrid", #(*arrays, indexing=’xy’) - "ones", #(shape, *, dtype=None, device=None) - "ones_like", #(x, /, *, dtype=None, device=None) - "zeros", #(shape, *, dtype=None, device=None) - "zeros_like", #(x, /, *, dtype=None, device=None) + "arange", # (start, /, stop=None, step=1, *, dtype=None, device=None) + "asarray", # (obj, /, *, dtype=None, device=None, copy=None) + "empty", # (shape, *, dtype=None, device=None) + "empty_like", # (x, /, *, dtype=None, device=None) + "eye", # (n_rows, n_cols=None, /, *, k=0, dtype=None, device=None) + "from_dlpack", # (x, /) + "full", # (shape, fill_value, *, dtype=None, device=None) + "full_like", # (x, /, fill_value, *, dtype=None, device=None) + "linspace", # (start, stop, /, num, *, dtype=None, device=None, endpoint=True) + "meshgrid", # (*arrays, indexing=’xy’) + "ones", # (shape, *, dtype=None, device=None) + "ones_like", # (x, /, *, dtype=None, device=None) + "zeros", # (shape, *, dtype=None, device=None) + "zeros_like", # (x, /, *, dtype=None, device=None) ] aa_attributes = [ @@ -76,64 +99,64 @@ ] aa_datatype_functions = [ - "broadcast_arrays", #(*arrays) - "broadcast_to", #(x, /, shape) - "can_cast", #(from_, to, /) - "finfo", #(type, /) - "iinfo", #(type, /) - "result_type", #(*arrays_and_dtypes) + "broadcast_arrays", # (*arrays) + "broadcast_to", # (x, /, shape) + "can_cast", # (from_, to, /) + "finfo", # (type, /) + "iinfo", # (type, /) + "result_type", # (*arrays_and_dtypes) ] - + aa_methods = [ - "__abs__", #(self, /) - "__add__", #(self, other, /) - "__and__", #(self, other, /) - "__array_namespace__", #(self, /, *, api_version=None) - "__bool__", #(self, /) - "__dlpack__", #(self, /, *, stream=None) - "__dlpack_device__", #(self, /) - "__eq__", #(self, other, /) - "__float__", #(self, /) - "__floordiv__", #(self, other, /) - "__ge__", #(self, other, /) - "__getitem__", #(self, key, /) - "__gt__", #(self, other, /) - "__int__", #(self, /) - "__invert__", #(self, /) - "__le__", #(self, other, /) - "__len__", #(self, /) - "__lshift__", #(self, other, /) - "__lt__", #(self, other, /) - "__matmul__", #(self, other, /) - "__mod__", #(self, other, /) - "__mul__", #(self, other, /) - "__ne__", #(self, other, /) - "__neg__", #(self, /) - "__or__", #(self, other, /) - "__pos__", #(self, /) - "__pow__", #(self, other, /) - "__rshift__", #(self, other, /) - "__setitem__", #(self, key, value, /) - "__sub__", #(self, other, /) - "__truediv__", #(self, other, /) - "__xor__", #(self, other, /) + "__abs__", # (self, /) + "__add__", # (self, other, /) + "__and__", # (self, other, /) + "__array_namespace__", # (self, /, *, api_version=None) + "__bool__", # (self, /) + "__dlpack__", # (self, /, *, stream=None) + "__dlpack_device__", # (self, /) + "__eq__", # (self, other, /) + "__float__", # (self, /) + "__floordiv__", # (self, other, /) + "__ge__", # (self, other, /) + "__getitem__", # (self, key, /) + "__gt__", # (self, other, /) + "__int__", # (self, /) + "__invert__", # (self, /) + "__le__", # (self, other, /) + "__len__", # (self, /) + "__lshift__", # (self, other, /) + "__lt__", # (self, other, /) + "__matmul__", # (self, other, /) + "__mod__", # (self, other, /) + "__mul__", # (self, other, /) + "__ne__", # (self, other, /) + "__neg__", # (self, /) + "__or__", # (self, other, /) + "__pos__", # (self, /) + "__pow__", # (self, other, /) + "__rshift__", # (self, other, /) + "__setitem__", # (self, key, value, /) + "__sub__", # (self, other, /) + "__truediv__", # (self, other, /) + "__xor__", # (self, other, /) ] aa_creators = [ - "arange", #(start, /, stop=None, step=1, *, dtype=None, device=None) - "asarray", #(obj, /, *, dtype=None, device=None, copy=None) - "empty", #(shape, *, dtype=None, device=None) - "empty_like", #(x, /, *, dtype=None, device=None) - "eye", #(n_rows, n_cols=None, /, *, k=0, dtype=None, device=None) - "from_dlpack", #(x, /) - "full", #(shape, fill_value, *, dtype=None, device=None) - "full_like", #(x, /, fill_value, *, dtype=None, device=None) - "linspace", #(start, stop, /, num, *, dtype=None, device=None, endpoint=True) - "meshgrid", #(*arrays, indexing=’xy’) - "ones", #(shape, *, dtype=None, device=None) - "ones_like", #(x, /, *, dtype=None, device=None) - "zeros", #(shape, *, dtype=None, device=None) - "zeros_like", #(x, /, *, dtype=None, device=None) + "arange", # (start, /, stop=None, step=1, *, dtype=None, device=None) + "asarray", # (obj, /, *, dtype=None, device=None, copy=None) + "empty", # (shape, *, dtype=None, device=None) + "empty_like", # (x, /, *, dtype=None, device=None) + "eye", # (n_rows, n_cols=None, /, *, k=0, dtype=None, device=None) + "from_dlpack", # (x, /) + "full", # (shape, fill_value, *, dtype=None, device=None) + "full_like", # (x, /, fill_value, *, dtype=None, device=None) + "linspace", # (start, stop, /, num, *, dtype=None, device=None, endpoint=True) + "meshgrid", # (*arrays, indexing=’xy’) + "ones", # (shape, *, dtype=None, device=None) + "ones_like", # (x, /, *, dtype=None, device=None) + "zeros", # (shape, *, dtype=None, device=None) + "zeros_like", # (x, /, *, dtype=None, device=None) ] aa_attributes = [ @@ -146,112 +169,112 @@ ] aa_methods_a = [ - "__abs__", #(self, /) - "__add__", #(self, other, /) - "__floordiv__", #(self, other, /) - "__invert__", #(self, /) - "__lshift__", #(self, other, /) - "__matmul__", #(self, other, /) - "__mod__", #(self, other, /) - "__mul__", #(self, other, /) - "__neg__", #(self, /) - "__pos__", #(self, /) - "__pow__", #(self, other, /) - "__rshift__", #(self, other, /) - "__sub__", #(self, other, /) - "__truediv__", #(self, other, /) - "__getitem__", #(self, key, /) - "__setitem__", #(self, key, value, /) - "__eq__", #(self, other, /) - "__ge__", #(self, other, /) - "__gt__", #(self, other, /) - "__le__", #(self, other, /) - "__lt__", #(self, other, /) - "__ne__", #(self, other, /) - "__and__", #(self, other, /) - "__or__", #(self, other, /) - "__xor__", #(self, other, /) + "__abs__", # (self, /) + "__add__", # (self, other, /) + "__floordiv__", # (self, other, /) + "__invert__", # (self, /) + "__lshift__", # (self, other, /) + "__matmul__", # (self, other, /) + "__mod__", # (self, other, /) + "__mul__", # (self, other, /) + "__neg__", # (self, /) + "__pos__", # (self, /) + "__pow__", # (self, other, /) + "__rshift__", # (self, other, /) + "__sub__", # (self, other, /) + "__truediv__", # (self, other, /) + "__getitem__", # (self, key, /) + "__setitem__", # (self, key, value, /) + "__eq__", # (self, other, /) + "__ge__", # (self, other, /) + "__gt__", # (self, other, /) + "__le__", # (self, other, /) + "__lt__", # (self, other, /) + "__ne__", # (self, other, /) + "__and__", # (self, other, /) + "__or__", # (self, other, /) + "__xor__", # (self, other, /) ] aa_methods_s = [ - "__array_namespace__", #(self, /, *, api_version=None) - "__bool__", #(self, /) - "__dlpack__", #(self, /, *, stream=None) - "__dlpack_device__", #(self, /) - "__float__", #(self, /) - "__int__", #(self, /) - "__len__", #(self, /) + "__array_namespace__", # (self, /, *, api_version=None) + "__bool__", # (self, /) + "__dlpack__", # (self, /, *, stream=None) + "__dlpack_device__", # (self, /) + "__float__", # (self, /) + "__int__", # (self, /) + "__len__", # (self, /) ] aa_methods = aa_methods_s + aa_methods_a aa_elementwises = [ - "abs", #(x, /) - "acos", #(x, /) - "acosh", #(x, /) - "add", #(x1, x2, /) - "asin", #(x, /) - "asinh", #(x, /) - "atan", #(x, /) - "atan2", #(x1, x2, /) - "atanh", #(x, /) - "bitwise_and", #(x1, x2, /) - "bitwise_left_shift", #(x1, x2, /) - "bitwise_invert", #(x, /) - "bitwise_or", #(x1, x2, /) - "bitwise_right_shift", #(x1, x2, /) - "bitwise_xor", #(x1, x2, /) - "ceil", #(x, /) - "cos", #(x, /) - "cosh", #(x, /) - "divide", #(x1, x2, /) - "equal", #(x1, x2, /) - "exp", #(x, /) - "expm1", #(x, /) - "floor", #(x, /) - "floor_divide", #(x1, x2, /) - "greater", #(x1, x2, /) - "greater_equal", #(x1, x2, /) - "isfinite", #(x, /) - "isinf", #(x, /) - "isnan", #(x, /) - "less", #(x1, x2, /) - "less_equal", #(x1, x2, /) - "log", #(x, /) - "log1p", #(x, /) - "log2", #(x, /) - "log10", #(x, /) - "logaddexp", #(x1, x2) - "logical_and", #(x1, x2, /) - "logical_not", #(x, /) - "logical_or", #(x1, x2, /) - "logical_xor", #(x1, x2, /) - "multiply", #(x1, x2, /) - "negative", #(x, /) - "not_equal", #(x1, x2, /) - "positive", #(x, /) - "pow", #(x1, x2, /) - "remainder", #(x1, x2, /) - "round", #(x, /) - "sign", #(x, /) - "sin", #(x, /) - "sinh", #(x, /) - "square", #(x, /) - "sqrt", #(x, /) - "subtract", #(x1, x2, /) - "tan", #(x, /) - "tanh", #(x, /) - "trunc", #(x, /) + "abs", # (x, /) + "acos", # (x, /) + "acosh", # (x, /) + "add", # (x1, x2, /) + "asin", # (x, /) + "asinh", # (x, /) + "atan", # (x, /) + "atan2", # (x1, x2, /) + "atanh", # (x, /) + "bitwise_and", # (x1, x2, /) + "bitwise_left_shift", # (x1, x2, /) + "bitwise_invert", # (x, /) + "bitwise_or", # (x1, x2, /) + "bitwise_right_shift", # (x1, x2, /) + "bitwise_xor", # (x1, x2, /) + "ceil", # (x, /) + "cos", # (x, /) + "cosh", # (x, /) + "divide", # (x1, x2, /) + "equal", # (x1, x2, /) + "exp", # (x, /) + "expm1", # (x, /) + "floor", # (x, /) + "floor_divide", # (x1, x2, /) + "greater", # (x1, x2, /) + "greater_equal", # (x1, x2, /) + "isfinite", # (x, /) + "isinf", # (x, /) + "isnan", # (x, /) + "less", # (x1, x2, /) + "less_equal", # (x1, x2, /) + "log", # (x, /) + "log1p", # (x, /) + "log2", # (x, /) + "log10", # (x, /) + "logaddexp", # (x1, x2) + "logical_and", # (x1, x2, /) + "logical_not", # (x, /) + "logical_or", # (x1, x2, /) + "logical_xor", # (x1, x2, /) + "multiply", # (x1, x2, /) + "negative", # (x, /) + "not_equal", # (x1, x2, /) + "positive", # (x, /) + "pow", # (x1, x2, /) + "remainder", # (x1, x2, /) + "round", # (x, /) + "sign", # (x, /) + "sin", # (x, /) + "sinh", # (x, /) + "square", # (x, /) + "sqrt", # (x, /) + "subtract", # (x1, x2, /) + "tan", # (x, /) + "tanh", # (x, /) + "trunc", # (x, /) ] aa_statisticals = [ - "max", #(x, /, *, axis=None, keepdims=False) - "mean", #(x, /, *, axis=None, keepdims=False) - "min", #(x, /, *, axis=None, keepdims=False) - "prod", #(x, /, *, axis=None, keepdims=False) - "std", #(x, /, *, axis=None, correction=0.0, keepdims=False) - "sum", #(x, /, *, axis=None, keepdims=False) - "var", #(x, /, *, axis=None, correction=0.0, keepdims=False) + "max", # (x, /, *, axis=None, keepdims=False) + "mean", # (x, /, *, axis=None, keepdims=False) + "min", # (x, /, *, axis=None, keepdims=False) + "prod", # (x, /, *, axis=None, keepdims=False) + "std", # (x, /, *, axis=None, correction=0.0, keepdims=False) + "sum", # (x, /, *, axis=None, keepdims=False) + "var", # (x, /, *, axis=None, correction=0.0, keepdims=False) ] aa_searching = [ @@ -282,7 +305,16 @@ "pi", ] -aa_tlfuncs = aa_creators + aa_elementwises + aa_statisticals + aa_datatype_functions + aa_searching + aa_sorting + aa_set + aa_utility +aa_tlfuncs = ( + aa_creators + + aa_elementwises + + aa_statisticals + + aa_datatype_functions + + aa_searching + + aa_sorting + + aa_set + + aa_utility +) aa_tldir = aa_tlfuncs + aa_datatypes + aa_constants aa_arrayfuncs = aa_methods + aa_inplace_operators + aa_reflected_operators aa_arraydir = aa_attributes + aa_arrayfuncs diff --git a/heat/cw4heat/distributor.py b/heat/cw4heat/distributor.py index 54f1137b19..f2cdd549c0 100644 --- a/heat/cw4heat/distributor.py +++ b/heat/cw4heat/distributor.py @@ -22,31 +22,33 @@ ############################################################################### -# Distribution engine. -# - schedules same tasks on all workers -# - handles dependences seperately -# -# Whe tasks are submitted on root rank they are pushed on a queue and a -# handle/future is returned. When computation is requested by calling go() -# all tasks on the queue are sent to workers and executed on all ranks -# sequentially. -# -# We store tasks in the same order as they are submitted on the root rank. -# For any valid program this must be a legal ordering there is no need to check -# if dependent objects are ready when a task is executed. A more sophisticated -# scheduler could potentially try to parallelize. It remains to be invistigated -# if this would be a profitable feature, though. -# -# Dependent objects have a unique identifier, assigned when a handle to it is -# created. We assume that all workers execute handle-creation in identical order. -# Such dependences are assumed to be global entities, e.g. each worker holds -# a handle/reference to it (e.g. like a heat.DNDarray). The local handles -# exist on each, stored in a worker-local dictionary. Thsi allows identifying -# dependences through simple integers. -# -# Notice, mpi4py does not provide ibcast, so we cannot overlap. This makes the -# above aggregation particularly promising. Another option woujld be to write -# this in C/C++ and use ibcast. +""" +Distribution engine. + - schedules same tasks on all workers + - handles dependences seperately + +Whe tasks are submitted on root rank they are pushed on a queue and a +handle/future is returned. When computation is requested by calling go() +all tasks on the queue are sent to workers and executed on all ranks +sequentially. + +We store tasks in the same order as they are submitted on the root rank. +For any valid program this must be a legal ordering there is no need to check +if dependent objects are ready when a task is executed. A more sophisticated +scheduler could potentially try to parallelize. It remains to be invistigated +if this would be a profitable feature, though. + +Dependent objects have a unique identifier, assigned when a handle to it is +created. We assume that all workers execute handle-creation in identical order. +Such dependences are assumed to be global entities, e.g. each worker holds +a handle/reference to it (e.g. like a heat.DNDarray). The local handles +exist on each, stored in a worker-local dictionary. Thsi allows identifying +dependences through simple integers. + +Notice, mpi4py does not provide ibcast, so we cannot overlap. This makes the +above aggregation particularly promising. Another option woujld be to write +this in C/C++ and use ibcast. +""" ############################################################################### @@ -68,9 +70,10 @@ class _TaskQueue: We currently dissallow submitting tasks by on-root ranks. Non-root ranks get their TaskQueue set in the recv-lop if init(). """ + def __init__(self): # here we store all tasks that have not been executed yet - self._taskQueue = deque() + self._taskQueue = deque() def submit(self, rtask): """ @@ -110,8 +113,7 @@ def start(): if _comm.rank != 0: done = False header = None - rtask = None - while(not done): + while not done: # wait in bcast for work header = _comm.bcast(header, 0) # then see what we need to do @@ -142,7 +144,7 @@ def go(): Trigger execution of all tasks which are still in flight. """ assert _comm.rank == 0 - header = [TASK, _tQueue._taskQueue ] + header = [TASK, _tQueue._taskQueue] _, _ = _comm.bcast(header, 0) header = [GO] _ = _comm.bcast(header, 0) @@ -177,15 +179,21 @@ def __init__(self): Handle._nextId += 1 def set(self, obj): - "Make object available." + """ + Make object available. + """ self._obj = obj def getId(self): - "Return future/handle id" + """ + Return future/handle id + """ return self._id def get(self): - "Return object or None" + """ + Return object or None + """ go() return self._obj @@ -213,7 +221,6 @@ def __init__(self, task, deps, numout): else: self._handle = tuple(Handle() for _ in range(self._nOut)) - # here we store objects that are input dependences to tasks s_pms = {} From 533194abdd081892b1504000739e60f7d0e8fb46 Mon Sep 17 00:00:00 2001 From: Frank Schlimbach Date: Mon, 5 Jul 2021 05:01:10 -0500 Subject: [PATCH 05/22] serving picky black --- heat/cw4heat/arrayapi.py | 46 ++++++---------------------------------- 1 file changed, 7 insertions(+), 39 deletions(-) diff --git a/heat/cw4heat/arrayapi.py b/heat/cw4heat/arrayapi.py index d1ac6fc85b..785da687c1 100644 --- a/heat/cw4heat/arrayapi.py +++ b/heat/cw4heat/arrayapi.py @@ -43,14 +43,7 @@ "zeros_like", # (x, /, *, dtype=None, device=None) ] -aa_attributes = [ - "dtype", - "device", - "ndim", - "shape", - "size", - "T", -] +aa_attributes = ["dtype", "device", "ndim", "shape", "size", "T"] aa_inplace_operators = [ "__iadd__", @@ -159,14 +152,7 @@ "zeros_like", # (x, /, *, dtype=None, device=None) ] -aa_attributes = [ - "dtype", - "device", - "ndim", - "shape", - "size", - "T", -] +aa_attributes = ["dtype", "device", "ndim", "shape", "size", "T"] aa_methods_a = [ "__abs__", # (self, /) @@ -277,33 +263,15 @@ "var", # (x, /, *, axis=None, correction=0.0, keepdims=False) ] -aa_searching = [ - "argmax", - "argmin", - "nonzero", - "where", -] +aa_searching = ["argmax", "argmin", "nonzero", "where"] -aa_sorting = [ - "argsort", - "sort", -] +aa_sorting = ["argsort", "sort"] -aa_set = [ - "unique", -] +aa_set = ["unique"] -aa_utility = [ - "all", - "any", -] +aa_utility = ["all", "any"] -aa_constants = [ - "e", - "inf", - "nan", - "pi", -] +aa_constants = ["e", "inf", "nan", "pi"] aa_tlfuncs = ( aa_creators From ee414d9253769c684a0fdf2c42d9b4adfcd68233 Mon Sep 17 00:00:00 2001 From: Frank Schlimbach Date: Wed, 7 Jul 2021 05:24:32 -0500 Subject: [PATCH 06/22] first cut for supporting ray actors. Controller no longer a worker. --- heat/cw4heat/__init__.py | 99 ++++++++++++++--------- heat/cw4heat/distributor.py | 155 ++++++++++++++++++++++-------------- heat/cw4heat/ray_runner.py | 137 +++++++++++++++++++++++++++++++ 3 files changed, 293 insertions(+), 98 deletions(-) create mode 100644 heat/cw4heat/ray_runner.py diff --git a/heat/cw4heat/__init__.py b/heat/cw4heat/__init__.py index ba033b75dc..e679972a80 100644 --- a/heat/cw4heat/__init__.py +++ b/heat/cw4heat/__init__.py @@ -51,8 +51,10 @@ """ ############################################################################### +from mpi4py import MPI +from os import getenv, getpid import atexit -from . import distributor +from .distributor import Distributor from .arrayapi import ( aa_attributes, aa_tlfuncs, @@ -71,6 +73,10 @@ impl_str = "impl" dndarray_str = "impl.DNDarray" +_distributor = None +_comm = None +_fini = None + def init(): """ @@ -78,8 +84,31 @@ def init(): For now we assume all ranks (controller and workers) are started through mpirun, workers will never leave distributor.start() and so this function. """ - distributor.init() - distributor.start() + global _distributor + global _comm + global _fini + + if _distributor is not None: + return + + _launcher = getenv("CW4H_LAUNCHER", default="mpi").lower() + + def _setComm(c): + return impl.use_comm(impl.MPICommunication(c.Create(c.group.Excl([0])))) + + # atexit.register(fini) + if _launcher == "ray": + from .ray_runner import init as ray_init, fini as ray_fini + + _comm, _distributor, _futures = ray_init(_setComm) + _distributor.start(initImpl=_setComm) + _fini = ray_fini + elif _launcher == "mpi": + _comm = MPI.COMM_WORLD + _distributor = Distributor(_comm) + _distributor.start(initImpl=_setComm) + else: + raise Exception(f"unknown launcher {_launcher}. CW4H_LAUNCHER must be 'mpi', or 'ray'.") def fini(): @@ -87,7 +116,9 @@ def fini(): Finalize/shutdown distribution engine. Automatically called at exit. When called on controller, workers will sys.exit from init(). """ - distributor.fini() + _distributor.fini() + if _fini: + _fini() class _Task: @@ -126,7 +157,7 @@ def _submit(name, args, kwargs, unwrap="*", numout=1): """ scalar_args = tuple(x for x in args if not isinstance(x, DDParray)) deps = [x._handle.getId() for x in args if isinstance(x, DDParray)] - return distributor.submitPP(_Task(name, scalar_args, kwargs, unwrap=unwrap), deps, numout) + return _distributor.submitPP(_Task(name, scalar_args, kwargs, unwrap=unwrap), deps, numout) def _submitProperty(name, self): @@ -135,7 +166,7 @@ def _submitProperty(name, self): """ t = _PropertyTask(name) try: - res = distributor.submitPP(t, [self._handle.getId()]) + res = _distributor.submitPP(t, [self._handle.getId()]) except Exception: assert False return res @@ -168,13 +199,14 @@ def __init__(self, handle): Do not use this array. Use creator functions instead. """ self._handle = handle + self._attributes = None - def heat(self): - """ - Return heat native array. - With delayed execution, triggers computation as needed and blocks until array is available. - """ - return self._handle.get() + # def heat(self): + # """ + # Return heat native array. + # With delayed execution, triggers computation as needed and blocks until array is available. + # """ + # return _distributor.get(self._handle) def __getitem__(self, key): """ @@ -211,40 +243,31 @@ def T(self): f"{method} = lambda self, *args, **kwargs: DDParray(_submit('{dndarray_str}.{method}', (self, *args), kwargs))" ) - for method in aa_methods_s: + for method in aa_methods_s + ["__str__"]: if hasattr(dndarray, method): exec( - f"{method} = lambda self, *args, **kwargs: _submit('{dndarray_str}.{method}', (self, *args), kwargs).get()" + f"{method} = lambda self, *args, **kwargs: _distributor.get(_submit('{dndarray_str}.{method}', (self, *args), kwargs))" ) - for attr in aa_attributes: - if attr != "T" and hasattr(dndarray, attr): - exec(f"{attr} = property(lambda self: self._handle.get().{attr})") - def __getattr__(self, attr): - # attributes are special - if attr not in aa_attributes: - raise Exception(f"unknown method/attribute {attr} requested") - - -####################################################################### -# first define top-level functions which need special care. -####################################################################### - -# np.concatenate accepts a list of arrays (not individual arrays) -# so we let the task not unwrap the list of deps -def concatenate(*args, **kwargs): - """ - Wrapper for impl.concatenate. - """ - return DDParray(_submit(f"{impl_str}.concatenate", *args, kwargs, unwrap="")) + """ + Get attributes. + Caches attributes from workers, so we communicate only once. + """ + if self._attributes is None: + self._attributes = _distributor.get( + _submit( + "(lambda a: {x: getattr(a, x) for x in aa_attributes if x != 'T'})", (self,), {} + ) + ) + return self._attributes[attr] ####################################################################### # first define top-level functions through the standard process. ####################################################################### # - creating arrays -# - elementswise operations +# - elementwise operations # - statistical operations # (lists taken from list of methods in array-API) # Again, we simply make lambdas which submit appropriate Tasks @@ -261,13 +284,15 @@ def concatenate(*args, **kwargs): ) +# np.concatenate/hstack accept a list of arrays (not individual arrays) +# so we let the task not unwrap the list of deps for func in ["concatenate", "hstack"]: exec( f"{func} = lambda *args, **kwargs: DDParray(_submit(f'{impl_str}.{func}', *args, kwargs, unwrap=''))" ) -# Here we data types and constants +# Here we define data types and constants for attr in aa_datatypes + aa_constants: if hasattr(impl, attr): exec(f"{attr} = {impl_str}.{attr}") @@ -292,5 +317,3 @@ class random: ####################################################################### ####################################################################### -atexit.register(fini) -init() diff --git a/heat/cw4heat/distributor.py b/heat/cw4heat/distributor.py index f2cdd549c0..21d3358698 100644 --- a/heat/cw4heat/distributor.py +++ b/heat/cw4heat/distributor.py @@ -56,12 +56,12 @@ import sys from collections import deque -_comm = MPI.COMM_WORLD # define identifiers END = 0 TASK = 1 GO = 2 +GET = 3 class _TaskQueue: @@ -79,7 +79,6 @@ def submit(self, rtask): """ Sumbit a task to queue. Will not run it. """ - assert _comm.rank == 0 self._taskQueue.append(rtask) return rtask._handle @@ -89,74 +88,107 @@ def go(self): We assume tasks were submitted in in a valid order, e.g. in an order that guarntees no task is dependent on another task that is behind it in the queue. """ + print("Executing tasks", len(self._taskQueue), flush=True) while len(self._taskQueue): self._taskQueue.popleft().go() + def len(self): + return len(self._taskQueue) -# Our queue of tasks. -_tQueue = _TaskQueue() + def clear(self): + self._taskQueue.clear() -def init(): +class Distributor: """ - Init distributor. + Instances of this class distribute work from controller to workers. + Work-items are treated as dependent tasks. """ - pass + def __init__(self, comm=MPI.COMM_WORLD): + """ + Init distributor, optionally accepts MPI communicator. + """ + self._comm = comm + # Our queue of tasks. + self._tQueue = _TaskQueue() -def start(): - """ - Start distribution engine. - Controller inits and returns. - Workers enter recv-loop and exit program when fini si called. - """ - if _comm.rank != 0: - done = False - header = None - while not done: - # wait in bcast for work - header = _comm.bcast(header, 0) - # then see what we need to do - if header[0] == END: - done = True - break - elif header[0] == TASK: - _tQueue._taskQueue = header[1] - elif header[0] == GO: - # no delayed execution for now -> nothing to do - _tQueue.go() - else: - raise Exception("Worker received unknown tag") - sys.exit() - - -def fini(): - """ - Control sends end-tag. Workers will sys.exit. - """ - if _comm.rank == 0: - header = [END] - header = _comm.bcast(header, 0) - - -def go(): - """ - Trigger execution of all tasks which are still in flight. - """ - assert _comm.rank == 0 - header = [TASK, _tQueue._taskQueue] - _, _ = _comm.bcast(header, 0) - header = [GO] - _ = _comm.bcast(header, 0) - _tQueue.go() - + def start(self, doExit=True, initImpl=None): + """ + Start distribution engine. + Controller inits and returns. + Workers enter recv-loop and exit program when fini is called. + """ + if initImpl: + initImpl(self._comm) + if self._comm.rank != 0: + done = False + header = None + while not done: + # wait in bcast for work + header = self._comm.bcast(header, 0) + # then see what we need to do + if header[0] == END: + done = True + break + elif header[0] == TASK: + self._tQueue._taskQueue = header[1] + elif header[0] == GO: + self._tQueue.go() + elif header[0] == GET: + if self._comm.rank == 1: + val = _RemoteTask.getVal(header[1]) + self._comm.send(val, dest=0, tag=GET) + else: + raise Exception("Worker received unknown tag") + self._comm.Barrier() + MPI.Finalize() + if doExit: + sys.exit() + + def fini(self): + """ + Control sends end-tag. Workers will sys.exit. + """ + if MPI.Is_initialized() and self._comm.rank == 0: + header = [END] + header = self._comm.bcast(header, 0) + self._comm.Barrier() + MPI.Finalize() -def submitPP(task, deps, numout=1): - """ - Submit a process-parallel task and return a handle/future. - """ - rtask = _RemoteTask(task, deps, numout) - return _tQueue.submit(rtask) + def go(self): + """ + Trigger execution of all tasks which are still in flight. + """ + assert self._comm.rank == 0 + if self._tQueue.len(): + header = [TASK, self._tQueue._taskQueue] + _, _ = self._comm.bcast(header, 0) + header = [GO] + _ = self._comm.bcast(header, 0) + self._tQueue.clear() + + def get(self, handle): + """ + Get actualy value from handle. + Requires communication. + We get the value from worker 0 (rank 1 in global comm). + Does not work for arrays (yet). + """ + assert self._comm.rank == 0 + self.go() + header = [GET, handle.getId()] + _ = self._comm.bcast(header, 0) + val = self._comm.recv(source=1, tag=GET) + handle.set(val) + return val + + def submitPP(self, task, deps, numout=1): + """ + Submit a process-parallel task and return a handle/future. + """ + rtask = _RemoteTask(task, deps, numout) + return self._tQueue.submit(rtask) class Handle: @@ -194,7 +226,6 @@ def get(self): """ Return object or None """ - go() return self._obj @@ -240,3 +271,7 @@ def go(self): _RemoteTask.s_pms[h.getId()] = res[i] i += 1 return self._handle + + @staticmethod + def getVal(id): + return _RemoteTask.s_pms[id] diff --git a/heat/cw4heat/ray_runner.py b/heat/cw4heat/ray_runner.py new file mode 100644 index 0000000000..f318c25fec --- /dev/null +++ b/heat/cw4heat/ray_runner.py @@ -0,0 +1,137 @@ +# =============================================================================== +# Copyright 2014-2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# =============================================================================== + +""" +A Ray backend for HeAT controller-worker wrapper. + +1. Init() nitializes actors + - one for each node in the existing ray cluster + - actors connect through MPI +2. Start actors + - actors will sit in recv-loop and wait for work +3. fini() kills all actors. + - Make sure you let distributor end recv-loop before calling this. +""" + +from mpi4py import MPI +import ray +from ray.services import get_node_ip_address as getIP +from .distributor import Distributor +import os + +_actors = {} + + +@ray.remote +class RayActor: + """ + A ray actor which connects to other actors and controller through MPI. + """ + + def __init__(self, node): + self.node = node + self._commWorld = MPI.COMM_SELF + self._distributor = None + print("Actor up", flush=True) + + def connect(self, port, nWorkers): + """ + Let nWorkers-many processes connect to controller process. + """ + print("Actor connecting", flush=True) + # workers go here + # initial connect + intercomm = self._commWorld.Connect(port) + # merge communicator + self._commWorld = intercomm.Merge(1) + intercomm.Disconnect() + rank = self._commWorld.rank + print(f"Yey, rank {rank} connected!") + # collectively accept connections from all (other) clients + for i in range(rank, nWorkers): + # connect to next worker (collectively) + intercomm = self._commWorld.Accept(port) + # merge communicators + self._commWorld = intercomm.Merge(0) + intercomm.Disconnect() + # setup our distributor + assert self._distributor is None + self._distributor = Distributor(self._commWorld) + return None + + def start(self, initImpl=None): + """ + Enter receive-loop as provided by distributor. + """ + print("actor.start", self._distributor, flush=True) + self._distributor.start(doExit=False, initImpl=initImpl) + print("Actor done!") + + +def _initActors(initImpl=None): + """ + Initalize our (SPMD) actors, one per node in ray cluster and make them + connect through MPI. + Controller (calling process) gets connection config and then + passes it to init function on each actor. + """ + global _actors + if not ray.is_initialized(): + ray.init(address="auto") + # first create one actor per node in the ray cluster + for node in ray.cluster_resources(): + if "node" in node: + name = node.split(":")[-1] + print(os.getpid(), "starting", name, flush=True) + _actors[name] = RayActor.options(resources={node: 1}).remote( + name + ) # runtime_env={"I_MPI_FABRICS": "ofi"} + nw = len(_actors) # number of workers + print(nw, flush=True) + comm = MPI.COMM_SELF + # Get Port for MPI connections + port = MPI.Open_port(MPI.INFO_NULL) + # make all actors connect + x = [_actors[a].connect.remote(port, nw) for a in _actors] + for i in range(nw): + # connect to next worker (collectively) + intercomm = comm.Accept(port) + # merge communicators + comm = intercomm.Merge(0) + intercomm.Disconnect() + print("Connected", i, flush=True) + # wait for connections to be established + r = ray.get(x) + print("All connected", r, _actors, flush=True) + x = [_actors[a].start.remote(initImpl) for a in _actors] + print("All started", flush=True) + # setup our distributor + return (comm, Distributor(comm), x) + + +def _finiActors(): + """ + Finalize Ray Actors: killing actor processes. + """ + global _actors + if ray.is_initialized(): + print("Killing actors") + for a in _actors.values(): + ray.kill(a) + + +init = _initActors +fini = _finiActors From c0c8c1658aa04da91a506a7a50d98ea11f9b293e Mon Sep 17 00:00:00 2001 From: Frank Schlimbach Date: Fri, 9 Jul 2021 03:23:55 -0500 Subject: [PATCH 07/22] adding __partitioned__ --- heat/core/dndarray.py | 92 +++++++++++++++++++++++++++++++++++++ heat/cw4heat/__init__.py | 52 +++++++++++++++++++-- heat/cw4heat/distributor.py | 38 +++++++++++---- 3 files changed, 168 insertions(+), 14 deletions(-) diff --git a/heat/core/dndarray.py b/heat/core/dndarray.py index a7edcf7e7b..fe3cbd289d 100644 --- a/heat/core/dndarray.py +++ b/heat/core/dndarray.py @@ -611,6 +611,98 @@ def create_lshape_map(self, force_check: bool = True) -> torch.Tensor: self.__lshape_map = lshape_map return lshape_map + def create_partition_interface(self, no_data=False): + """ + Create a partition interface in line with the DPPY proposal. This is subject to change. + The intention of this to facilitate the usage of a general format for the referencing of + distributed datasets. + An example of the output and shape is shown below. + __partitioned__ = { + 'shape': (27, 3, 2), + 'partition_tiling': (4, 1, 1), + 'partitions': { + (0, 0, 0): { + 'start': (0, 0, 0), + 'shape': (7, 3, 2), + 'data': tensor([...], dtype=torch.int32), + 'location': 0, + 'dtype': torch.int32, + 'device': 'cpu' + }, + (1, 0, 0): { + 'start': (7, 0, 0), + 'shape': (7, 3, 2), + 'data': None, + 'location': 1, + 'dtype': torch.int32, + 'device': 'cpu' + }, + (2, 0, 0): { + 'start': (14, 0, 0), + 'shape': (7, 3, 2), + 'data': None, + 'location': 2, + 'dtype': torch.int32, + 'device': 'cpu' + }, + (3, 0, 0): { + 'start': (21, 0, 0), + 'shape': (6, 3, 2), + 'data': None, + 'location': 3, + 'dtype': torch.int32, + 'device': 'cpu' + } + }, + 'locals': [(rank, 0, 0)], + } + Returns + ------- + dictionary containing the partition interface as shown above. + """ + # sp = + lshape_map = self.create_lshape_map() + start_idx_map = torch.zeros_like(lshape_map) + + part_tiling = [1] * self.ndim + lcls = [0] * self.ndim + + z = torch.tensor([0], device=self.device.torch_device, dtype=self.dtype.torch_type()) + if self.split is not None: + starts = torch.cat((z, torch.cumsum(lshape_map[:, self.split], dim=0)[:-1]), dim=0) + lcls[self.split] = self.comm.rank + part_tiling[self.split] = self.comm.size + else: + starts = torch.zeros(self.ndim, dtype=torch.int, device=self.device.torch_device) + + start_idx_map[:, self.split] = starts + + partitions = {} + base_key = [0] * self.ndim + for r in range(self.comm.size): + if self.split is not None: + base_key[self.split] = r + dat = None if no_data or r != self.comm.rank else self.larray + else: + dat = self.larray + + partitions[tuple(base_key)] = { + "start": tuple(start_idx_map[r].tolist()), + "shape": tuple(lshape_map[r].tolist()), + "data": dat, + "location": r, + "dtype": self.dtype.torch_type(), + "device": self.device.torch_device, + } + + partition_dict = { + "shape": self.gshape, + "partition_tiling": tuple(part_tiling), + "partitions": partitions, + "locals": [tuple(lcls)], + } + return partition_dict + def __float__(self) -> DNDarray: """ Float scalar casting. diff --git a/heat/cw4heat/__init__.py b/heat/cw4heat/__init__.py index e679972a80..41a4000bf2 100644 --- a/heat/cw4heat/__init__.py +++ b/heat/cw4heat/__init__.py @@ -53,6 +53,7 @@ from mpi4py import MPI from os import getenv, getpid +from collections import namedtuple import atexit from .distributor import Distributor from .arrayapi import ( @@ -78,7 +79,11 @@ _fini = None -def init(): +def _setComm(c): + return impl.use_comm(impl.MPICommunication(c.Create(c.group.Excl([0])))) + + +def init(doStart=True): """ Initialize distribution engine. Automatically when when importing cw4heat. For now we assume all ranks (controller and workers) are started through mpirun, @@ -93,9 +98,6 @@ def init(): _launcher = getenv("CW4H_LAUNCHER", default="mpi").lower() - def _setComm(c): - return impl.use_comm(impl.MPICommunication(c.Create(c.group.Excl([0])))) - # atexit.register(fini) if _launcher == "ray": from .ray_runner import init as ray_init, fini as ray_fini @@ -106,11 +108,21 @@ def _setComm(c): elif _launcher == "mpi": _comm = MPI.COMM_WORLD _distributor = Distributor(_comm) - _distributor.start(initImpl=_setComm) + if doStart: + _distributor.start(initImpl=_setComm) else: raise Exception(f"unknown launcher {_launcher}. CW4H_LAUNCHER must be 'mpi', or 'ray'.") +def asController(): + """ + Enter controller-worker region. + Rank 0 becomes controller, all others act as workers. + """ + init(False) + return _distributor.start(initImpl=_setComm, doExit=False) + + def fini(): """ Finalize/shutdown distribution engine. Automatically called at exit. @@ -249,6 +261,36 @@ def T(self): f"{method} = lambda self, *args, **kwargs: _distributor.get(_submit('{dndarray_str}.{method}', (self, *args), kwargs))" ) + partRef = namedtuple("partRef", ("id", "rank")) + + # @property + def __partitioned__(self): + """ + Return partitioning meta data. + """ + + def getPartForRef(pref): + """ + Return actual partition data for given partRef. + """ + # FIXME Ray + # only supported on root rank right now + # Notice: HeAT does not use COMM_WORLD, we have to translate to global rank + assert MPI.COMM_WORLD.rank == 0 + return _distributor.getPart(pref, "larray") + + parts = _distributor.get( + _submit(f"{dndarray_str}.create_partition_interface", (self, True), {}) + ) + # Provide all data as handle/reference + for _, p in parts["partitions"].items(): + p["data"] = self.partRef(self._handle._id, p["location"] + 1) + # set getter + parts["get"] = getPartForRef + # remove SPMD local key + del parts["locals"] + return parts + def __getattr__(self, attr): """ Get attributes. diff --git a/heat/cw4heat/distributor.py b/heat/cw4heat/distributor.py index 21d3358698..82137a2e47 100644 --- a/heat/cw4heat/distributor.py +++ b/heat/cw4heat/distributor.py @@ -62,6 +62,7 @@ TASK = 1 GO = 2 GET = 3 +GETPART = 4 class _TaskQueue: @@ -121,30 +122,38 @@ def start(self, doExit=True, initImpl=None): """ if initImpl: initImpl(self._comm) - if self._comm.rank != 0: + if self._comm.rank == 0: + return True + else: done = False header = None while not done: # wait in bcast for work header = self._comm.bcast(header, 0) # then see what we need to do - if header[0] == END: - done = True - break - elif header[0] == TASK: + if header[0] == TASK: self._tQueue._taskQueue = header[1] - elif header[0] == GO: - self._tQueue.go() elif header[0] == GET: if self._comm.rank == 1: val = _RemoteTask.getVal(header[1]) self._comm.send(val, dest=0, tag=GET) + elif header[0] == GO: + self._tQueue.go() + elif header[0] == GETPART: + if self._comm.rank == header[1]: + val = _RemoteTask.getVal(header[2]) + attr = getattr(val, header[3]) + self._comm.send(attr, dest=0, tag=GETPART) + elif header[0] == END: + done = True + break else: raise Exception("Worker received unknown tag") self._comm.Barrier() - MPI.Finalize() + # MPI.Finalize() if doExit: sys.exit() + return False def fini(self): """ @@ -154,7 +163,7 @@ def fini(self): header = [END] header = self._comm.bcast(header, 0) self._comm.Barrier() - MPI.Finalize() + # MPI.Finalize() def go(self): """ @@ -183,6 +192,17 @@ def get(self, handle): handle.set(val) return val + def getPart(self, handle, attr): + """ + Get local raw partition data for given handle. + """ + assert self._comm.rank == 0 + self.go() + header = [GETPART, handle.rank, handle.id, attr] + _ = self._comm.bcast(header, 0) + val = self._comm.recv(source=handle.rank, tag=GETPART) + return val + def submitPP(self, task, deps, numout=1): """ Submit a process-parallel task and return a handle/future. From dbc3056b9c3f6d46ef715585ceb0ca58935324c6 Mon Sep 17 00:00:00 2001 From: Frank Schlimbach Date: Fri, 9 Jul 2021 08:04:02 -0500 Subject: [PATCH 08/22] demoing cw/region (MPI backend) --- heat/cw4heat/__init__.py | 82 +++++++++++++++++++++++----------- heat/cw4heat/distributor.py | 30 ++++++------- heat/cw4heat/examples/tcw4h.py | 39 ++++++++++++++++ heat/cw4heat/ray_runner.py | 26 +++++------ 4 files changed, 121 insertions(+), 56 deletions(-) create mode 100644 heat/cw4heat/examples/tcw4h.py diff --git a/heat/cw4heat/__init__.py b/heat/cw4heat/__init__.py index 41a4000bf2..3989a68c0f 100644 --- a/heat/cw4heat/__init__.py +++ b/heat/cw4heat/__init__.py @@ -53,7 +53,6 @@ from mpi4py import MPI from os import getenv, getpid -from collections import namedtuple import atexit from .distributor import Distributor from .arrayapi import ( @@ -80,10 +79,11 @@ def _setComm(c): - return impl.use_comm(impl.MPICommunication(c.Create(c.group.Excl([0])))) + # return impl.use_comm(impl.MPICommunication(c.Create(c.group.Excl([0])))) + return impl.use_comm(impl.MPICommunication(c)) -def init(doStart=True): +def init(doStart=True, ctxt=False): """ Initialize distribution engine. Automatically when when importing cw4heat. For now we assume all ranks (controller and workers) are started through mpirun, @@ -100,6 +100,7 @@ def init(doStart=True): # atexit.register(fini) if _launcher == "ray": + assert ctxt is False, "Controller-worker context is useless with ray launcher." from .ray_runner import init as ray_init, fini as ray_fini _comm, _distributor, _futures = ray_init(_setComm) @@ -114,15 +115,6 @@ def init(doStart=True): raise Exception(f"unknown launcher {_launcher}. CW4H_LAUNCHER must be 'mpi', or 'ray'.") -def asController(): - """ - Enter controller-worker region. - Rank 0 becomes controller, all others act as workers. - """ - init(False) - return _distributor.start(initImpl=_setComm, doExit=False) - - def fini(): """ Finalize/shutdown distribution engine. Automatically called at exit. @@ -133,6 +125,40 @@ def fini(): _fini() +class cw4h: + """ + Contextmanager to establish controller-worker regions within SPMD runs. + Not that useful for HeAT, but demonstrates the concept. + + >>> import heat.cw4heat as ht + >>> with ht.cw4h() as cw: + >>> if cw.controller(): + >>> a = ht.arange(8) + """ + + def __init__(self): + init(False, True) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, exc_traceback): + if _comm.rank == 0: + fini() + + def controller(self): + """ + Sends non root ranks/workers into reicv-loop and lets root rank execute + the code block protected as controller. + Non-root workers will not finish until self gets deleted. + """ + if _comm.rank == 0: + return True + else: + _distributor.start(doExit=False, initImpl=_setComm) + return False + + class _Task: """ A work item, executing functions provided as code. @@ -190,6 +216,14 @@ def _setitem_normalized(self, value, key): self.__setitem__(key, value) +def _getPartForRef(pref): + """ + Return actual partition data for given partRef. + """ + # FIXME Ray + return _distributor.getPart(pref, "larray") + + ####################################################################### # Our array is just a wrapper. Actual array is stored as a handle to # allow delayed execution. @@ -261,32 +295,28 @@ def T(self): f"{method} = lambda self, *args, **kwargs: _distributor.get(_submit('{dndarray_str}.{method}', (self, *args), kwargs))" ) - partRef = namedtuple("partRef", ("id", "rank")) + class partRef: + """ + Handle used in __partitioned__. Identifies one chunk of a distributed array. + """ + + def __init__(self, id_, rank_): + self.id = id_ + self.rank = rank_ # @property def __partitioned__(self): """ Return partitioning meta data. """ - - def getPartForRef(pref): - """ - Return actual partition data for given partRef. - """ - # FIXME Ray - # only supported on root rank right now - # Notice: HeAT does not use COMM_WORLD, we have to translate to global rank - assert MPI.COMM_WORLD.rank == 0 - return _distributor.getPart(pref, "larray") - parts = _distributor.get( _submit(f"{dndarray_str}.create_partition_interface", (self, True), {}) ) # Provide all data as handle/reference for _, p in parts["partitions"].items(): - p["data"] = self.partRef(self._handle._id, p["location"] + 1) + p["data"] = self.partRef(self._handle._id, p["location"]) # set getter - parts["get"] = getPartForRef + parts["get"] = _getPartForRef # remove SPMD local key del parts["locals"] return parts diff --git a/heat/cw4heat/distributor.py b/heat/cw4heat/distributor.py index 82137a2e47..0971d1f071 100644 --- a/heat/cw4heat/distributor.py +++ b/heat/cw4heat/distributor.py @@ -89,7 +89,6 @@ def go(self): We assume tasks were submitted in in a valid order, e.g. in an order that guarntees no task is dependent on another task that is behind it in the queue. """ - print("Executing tasks", len(self._taskQueue), flush=True) while len(self._taskQueue): self._taskQueue.popleft().go() @@ -134,9 +133,8 @@ def start(self, doExit=True, initImpl=None): if header[0] == TASK: self._tQueue._taskQueue = header[1] elif header[0] == GET: - if self._comm.rank == 1: - val = _RemoteTask.getVal(header[1]) - self._comm.send(val, dest=0, tag=GET) + # We do not support arrays yet, scalars do not need communication + assert False elif header[0] == GO: self._tQueue.go() elif header[0] == GETPART: @@ -146,10 +144,10 @@ def start(self, doExit=True, initImpl=None): self._comm.send(attr, dest=0, tag=GETPART) elif header[0] == END: done = True + self._comm.Barrier() break else: raise Exception("Worker received unknown tag") - self._comm.Barrier() # MPI.Finalize() if doExit: sys.exit() @@ -175,7 +173,7 @@ def go(self): _, _ = self._comm.bcast(header, 0) header = [GO] _ = self._comm.bcast(header, 0) - self._tQueue.clear() + self._tQueue.go() def get(self, handle): """ @@ -186,21 +184,21 @@ def get(self, handle): """ assert self._comm.rank == 0 self.go() - header = [GET, handle.getId()] - _ = self._comm.bcast(header, 0) - val = self._comm.recv(source=1, tag=GET) - handle.set(val) - return val + return handle.get() def getPart(self, handle, attr): """ Get local raw partition data for given handle. """ - assert self._comm.rank == 0 - self.go() - header = [GETPART, handle.rank, handle.id, attr] - _ = self._comm.bcast(header, 0) - val = self._comm.recv(source=handle.rank, tag=GETPART) + if handle.rank == self._comm.rank: + val = _RemoteTask.getVal(handle.id) + val = getattr(val, attr) + else: + # FIXME what if left CW-context (SPMD mode) ? + assert self._comm.rank == 0 + header = [GETPART, handle.rank, handle.id, attr] + _ = self._comm.bcast(header, 0) + val = self._comm.recv(source=handle.rank, tag=GETPART) return val def submitPP(self, task, deps, numout=1): diff --git a/heat/cw4heat/examples/tcw4h.py b/heat/cw4heat/examples/tcw4h.py new file mode 100644 index 0000000000..90ed7e5bc7 --- /dev/null +++ b/heat/cw4heat/examples/tcw4h.py @@ -0,0 +1,39 @@ +from mpi4py import MPI + +comm = MPI.COMM_WORLD + +import heat.cw4heat as ht + + +with ht.cw4h() as cw: + if cw.controller(): + a = ht.arange(8, split=0) + b = ht.ones(8, split=0) + c = a @ b + assert hasattr(c, "__partitioned__") + p = a.__partitioned__() + print(c.shape, c, p) + for k, v in p["partitions"].items(): + print(k, p["get"](v["data"])) + +print("hello") + +with ht.cw4h() as cw: + if cw.controller(): + a = ht.arange(8, split=0) + b = ht.ones(8, split=0) + c = a @ b + assert hasattr(c, "__partitioned__") + p = a.__partitioned__() + print(c.shape, c, p) + for k, v in p["partitions"].items(): + print(k, p["get"](v["data"])) + else: + p = None + +p = comm.bcast(p, 0) +for v in p["partitions"].values(): + if v["location"] == comm.rank: + print("My part:", p["get"](v["data"])) + +print("bye") diff --git a/heat/cw4heat/ray_runner.py b/heat/cw4heat/ray_runner.py index f318c25fec..4d7758220a 100644 --- a/heat/cw4heat/ray_runner.py +++ b/heat/cw4heat/ray_runner.py @@ -31,6 +31,7 @@ from ray.services import get_node_ip_address as getIP from .distributor import Distributor import os +from os import getenv, getpid _actors = {} @@ -45,13 +46,11 @@ def __init__(self, node): self.node = node self._commWorld = MPI.COMM_SELF self._distributor = None - print("Actor up", flush=True) def connect(self, port, nWorkers): """ Let nWorkers-many processes connect to controller process. """ - print("Actor connecting", flush=True) # workers go here # initial connect intercomm = self._commWorld.Connect(port) @@ -59,7 +58,6 @@ def connect(self, port, nWorkers): self._commWorld = intercomm.Merge(1) intercomm.Disconnect() rank = self._commWorld.rank - print(f"Yey, rank {rank} connected!") # collectively accept connections from all (other) clients for i in range(rank, nWorkers): # connect to next worker (collectively) @@ -76,9 +74,7 @@ def start(self, initImpl=None): """ Enter receive-loop as provided by distributor. """ - print("actor.start", self._distributor, flush=True) self._distributor.start(doExit=False, initImpl=initImpl) - print("Actor done!") def _initActors(initImpl=None): @@ -91,16 +87,20 @@ def _initActors(initImpl=None): global _actors if not ray.is_initialized(): ray.init(address="auto") + ppn = int(getenv("CW4H_PPN", default="1")) + assert ppn >= 1 + my_ip = getIP() # first create one actor per node in the ray cluster for node in ray.cluster_resources(): if "node" in node: name = node.split(":")[-1] - print(os.getpid(), "starting", name, flush=True) - _actors[name] = RayActor.options(resources={node: 1}).remote( - name - ) # runtime_env={"I_MPI_FABRICS": "ofi"} + _ppn = ppn - 1 if name == my_ip else ppn + if _ppn >= 1: + for i in range(_ppn): + _actors[name] = RayActor.options(resources={node: 1}).remote( + name + ) # runtime_env={"I_MPI_FABRICS": "ofi"} nw = len(_actors) # number of workers - print(nw, flush=True) comm = MPI.COMM_SELF # Get Port for MPI connections port = MPI.Open_port(MPI.INFO_NULL) @@ -112,12 +112,10 @@ def _initActors(initImpl=None): # merge communicators comm = intercomm.Merge(0) intercomm.Disconnect() - print("Connected", i, flush=True) # wait for connections to be established - r = ray.get(x) - print("All connected", r, _actors, flush=True) + _ = ray.get(x) x = [_actors[a].start.remote(initImpl) for a in _actors] - print("All started", flush=True) + print("All actors started", flush=True) # setup our distributor return (comm, Distributor(comm), x) From e8a70117fbb1a61928ae34650d5991003def9a2c Mon Sep 17 00:00:00 2001 From: Frank Schlimbach Date: Thu, 22 Jul 2021 05:41:10 -0500 Subject: [PATCH 09/22] refactoring ray_runner and let it create ray ObjRefs in __partitioned__ --- heat/cw4heat/__init__.py | 151 ++++++++++++++++++--------------- heat/cw4heat/distributor.py | 17 ++++ heat/cw4heat/examples/t1.py | 17 ++++ heat/cw4heat/examples/tcw4h.py | 9 +- heat/cw4heat/ray_runner.py | 132 +++++++++++++++++----------- 5 files changed, 205 insertions(+), 121 deletions(-) create mode 100644 heat/cw4heat/examples/t1.py diff --git a/heat/cw4heat/__init__.py b/heat/cw4heat/__init__.py index 3989a68c0f..1e5c441402 100644 --- a/heat/cw4heat/__init__.py +++ b/heat/cw4heat/__init__.py @@ -73,9 +73,26 @@ impl_str = "impl" dndarray_str = "impl.DNDarray" -_distributor = None -_comm = None -_fini = None +_runner = None + + +class _partRef: + """ + Handle used in __partitioned__. Identifies one chunk of a distributed array. + """ + + def __init__(self, id_, rank_): + self.id = id_ + self.rank = rank_ + + +def _getPartForRef(pref): + """ + Return actual partition data for given _partRef. + """ + # FIXME Ray + ret = _runner.distributor.getPart(pref, "larray") + return ret def _setComm(c): @@ -89,11 +106,9 @@ def init(doStart=True, ctxt=False): For now we assume all ranks (controller and workers) are started through mpirun, workers will never leave distributor.start() and so this function. """ - global _distributor - global _comm - global _fini + global _runner - if _distributor is not None: + if _runner is not None: return _launcher = getenv("CW4H_LAUNCHER", default="mpi").lower() @@ -101,16 +116,28 @@ def init(doStart=True, ctxt=False): # atexit.register(fini) if _launcher == "ray": assert ctxt is False, "Controller-worker context is useless with ray launcher." - from .ray_runner import init as ray_init, fini as ray_fini + from .ray_runner import init as ray_init - _comm, _distributor, _futures = ray_init(_setComm) - _distributor.start(initImpl=_setComm) - _fini = ray_fini + _runner = ray_init(_setComm) + _runner.distributor.start(initImpl=_setComm) elif _launcher == "mpi": - _comm = MPI.COMM_WORLD - _distributor = Distributor(_comm) + + class MPIRunner: + def __init__(self, dist, comm): + self.comm = comm + self.distributor = dist + self.publish = lambda id, distributor: [ + (i, _partRef(id, i)) for i in range(self.comm.size) + ] + self.get = _getPartForRef + + def fini(self): + pass + + c = MPI.COMM_WORLD + _runner = MPIRunner(Distributor(c), c) if doStart: - _distributor.start(initImpl=_setComm) + _runner.distributor.start(initImpl=_setComm) else: raise Exception(f"unknown launcher {_launcher}. CW4H_LAUNCHER must be 'mpi', or 'ray'.") @@ -120,9 +147,10 @@ def fini(): Finalize/shutdown distribution engine. Automatically called at exit. When called on controller, workers will sys.exit from init(). """ - _distributor.fini() - if _fini: - _fini() + global _runner + _runner.distributor.fini() + if _runner: + _runner.fini() class cw4h: @@ -143,7 +171,7 @@ def __enter__(self): return self def __exit__(self, exc_type, exc_value, exc_traceback): - if _comm.rank == 0: + if _runner.comm.rank == 0: fini() def controller(self): @@ -152,10 +180,10 @@ def controller(self): the code block protected as controller. Non-root workers will not finish until self gets deleted. """ - if _comm.rank == 0: + if _runner.comm.rank == 0: return True else: - _distributor.start(doExit=False, initImpl=_setComm) + _runner.distributor.start(doExit=False, initImpl=_setComm) return False @@ -195,7 +223,9 @@ def _submit(name, args, kwargs, unwrap="*", numout=1): """ scalar_args = tuple(x for x in args if not isinstance(x, DDParray)) deps = [x._handle.getId() for x in args if isinstance(x, DDParray)] - return _distributor.submitPP(_Task(name, scalar_args, kwargs, unwrap=unwrap), deps, numout) + return _runner.distributor.submitPP( + _Task(name, scalar_args, kwargs, unwrap=unwrap), deps, numout + ) def _submitProperty(name, self): @@ -204,7 +234,7 @@ def _submitProperty(name, self): """ t = _PropertyTask(name) try: - res = _distributor.submitPP(t, [self._handle.getId()]) + res = _runner.distributor.submitPP(t, [self._handle.getId()]) except Exception: assert False return res @@ -216,14 +246,6 @@ def _setitem_normalized(self, value, key): self.__setitem__(key, value) -def _getPartForRef(pref): - """ - Return actual partition data for given partRef. - """ - # FIXME Ray - return _distributor.getPart(pref, "larray") - - ####################################################################### # Our array is just a wrapper. Actual array is stored as a handle to # allow delayed execution. @@ -252,7 +274,7 @@ def __init__(self, handle): # Return heat native array. # With delayed execution, triggers computation as needed and blocks until array is available. # """ - # return _distributor.get(self._handle) + # return _runner.distributor.get(self._handle) def __getitem__(self, key): """ @@ -275,48 +297,23 @@ def T(self): """ return DDParray(_submitProperty("T", self)) - ####################################################################### - # Now we add methods/properties through the standard process. - ####################################################################### - - # dynamically generate class methods from list of methods in array-API - # we simply make lambdas which submit appropriate Tasks - # FIXME: aa_inplace_operators,others? - fixme_afuncs = ["squeeze", "astype", "balance", "resplit"] - for method in aa_methods_a + aa_reflected_operators + fixme_afuncs: - if method not in ["__getitem__", "__setitem__"] and hasattr(dndarray, method): - exec( - f"{method} = lambda self, *args, **kwargs: DDParray(_submit('{dndarray_str}.{method}', (self, *args), kwargs))" - ) - - for method in aa_methods_s + ["__str__"]: - if hasattr(dndarray, method): - exec( - f"{method} = lambda self, *args, **kwargs: _distributor.get(_submit('{dndarray_str}.{method}', (self, *args), kwargs))" - ) - - class partRef: - """ - Handle used in __partitioned__. Identifies one chunk of a distributed array. - """ - - def __init__(self, id_, rank_): - self.id = id_ - self.rank = rank_ - - # @property + @property def __partitioned__(self): """ Return partitioning meta data. """ - parts = _distributor.get( + global _runner + + parts = _runner.distributor.get( _submit(f"{dndarray_str}.create_partition_interface", (self, True), {}) ) # Provide all data as handle/reference - for _, p in parts["partitions"].items(): - p["data"] = self.partRef(self._handle._id, p["location"]) + futures = _runner.publish(self._handle._id, _runner.distributor) + for i, p in enumerate(parts["partitions"].values()): + p["location"] = futures[i][0] + p["data"] = futures[i][1] # set getter - parts["get"] = _getPartForRef + parts["get"] = _runner.get # remove SPMD local key del parts["locals"] return parts @@ -327,13 +324,33 @@ def __getattr__(self, attr): Caches attributes from workers, so we communicate only once. """ if self._attributes is None: - self._attributes = _distributor.get( + self._attributes = _runner.distributor.get( _submit( "(lambda a: {x: getattr(a, x) for x in aa_attributes if x != 'T'})", (self,), {} ) ) return self._attributes[attr] + ####################################################################### + # Now we add methods/properties through the standard process. + ####################################################################### + + # dynamically generate class methods from list of methods in array-API + # we simply make lambdas which submit appropriate Tasks + # FIXME: aa_inplace_operators,others? + fixme_afuncs = ["squeeze", "astype", "balance", "resplit", "reshape"] + for method in aa_methods_a + aa_reflected_operators + fixme_afuncs: + if method not in ["__getitem__", "__setitem__"] and hasattr(dndarray, method): + exec( + f"{method} = lambda self, *args, **kwargs: DDParray(_submit('{dndarray_str}.{method}', (self, *args), kwargs))" + ) + + for method in aa_methods_s + ["__str__"]: + if hasattr(dndarray, method): + exec( + f"{method} = lambda self, *args, **kwargs: _runner.distributor.get(_submit('{dndarray_str}.{method}', (self, *args), kwargs))" + ) + ####################################################################### # first define top-level functions through the standard process. @@ -344,7 +361,7 @@ def __getattr__(self, attr): # (lists taken from list of methods in array-API) # Again, we simply make lambdas which submit appropriate Tasks -fixme_funcs = ["load_csv", "array", "triu"] +fixme_funcs = ["load_csv", "array", "triu", "copy", "repeat"] for func in aa_tlfuncs + fixme_funcs: if func == "meshgrid": exec( diff --git a/heat/cw4heat/distributor.py b/heat/cw4heat/distributor.py index 0971d1f071..961652bf7f 100644 --- a/heat/cw4heat/distributor.py +++ b/heat/cw4heat/distributor.py @@ -63,6 +63,7 @@ GO = 2 GET = 3 GETPART = 4 +PUBPART = 5 class _TaskQueue: @@ -124,6 +125,7 @@ def start(self, doExit=True, initImpl=None): if self._comm.rank == 0: return True else: + print("Entering worker loop", flush=True) done = False header = None while not done: @@ -142,6 +144,10 @@ def start(self, doExit=True, initImpl=None): val = _RemoteTask.getVal(header[2]) attr = getattr(val, header[3]) self._comm.send(attr, dest=0, tag=GETPART) + elif header[0] == PUBPART: + val = _RemoteTask.getVal(header[1]) + attr = header[3](getattr(val, header[2])) + self._comm.gather(attr, root=0) elif header[0] == END: done = True self._comm.Barrier() @@ -201,6 +207,16 @@ def getPart(self, handle, attr): val = self._comm.recv(source=handle.rank, tag=GETPART) return val + def publishParts(self, id, attr, publish): + """ + Publish array's attribute for each partition and gather handles on root. + """ + assert self._comm.rank == 0 + header = [PUBPART, id, attr, publish] + _ = self._comm.bcast(header, 0) + val = publish(getattr(_RemoteTask.getVal(id), attr)) + return self._comm.gather(val, root=0) + def submitPP(self, task, deps, numout=1): """ Submit a process-parallel task and return a handle/future. @@ -277,6 +293,7 @@ def go(self): """ Actually run the task. """ + # print(self._task._func) deps = [_RemoteTask.s_pms[i] for i in self._depIds] res = self._task.run(deps) if self._nOut == 1: diff --git a/heat/cw4heat/examples/t1.py b/heat/cw4heat/examples/t1.py new file mode 100644 index 0000000000..b9080ddd76 --- /dev/null +++ b/heat/cw4heat/examples/t1.py @@ -0,0 +1,17 @@ +import pickle +import heat.cw4heat as ht + +ht.init() + +a = ht.arange(8, split=0) +b = ht.ones(8, split=0) +c = a @ b +# assert hasattr(c, "__partitioned__") +print(type(c)) +p = a.__partitioned__() +print(a.shape, a, p) +for k, v in p["partitions"].items(): + print(33) + print(k, p["get"](v["data"])) +print("kkkkkk") +ht.fini() diff --git a/heat/cw4heat/examples/tcw4h.py b/heat/cw4heat/examples/tcw4h.py index 90ed7e5bc7..9ecf540e50 100644 --- a/heat/cw4heat/examples/tcw4h.py +++ b/heat/cw4heat/examples/tcw4h.py @@ -10,8 +10,9 @@ a = ht.arange(8, split=0) b = ht.ones(8, split=0) c = a @ b - assert hasattr(c, "__partitioned__") - p = a.__partitioned__() + # assert hasattr(c, "__partitioned__") + print(type(c)) + p = c.__partitioned__() print(c.shape, c, p) for k, v in p["partitions"].items(): print(k, p["get"](v["data"])) @@ -23,8 +24,8 @@ a = ht.arange(8, split=0) b = ht.ones(8, split=0) c = a @ b - assert hasattr(c, "__partitioned__") - p = a.__partitioned__() + # assert hasattr(c, "__partitioned__") + p = c.__partitioned__() print(c.shape, c, p) for k, v in p["partitions"].items(): print(k, p["get"](v["data"])) diff --git a/heat/cw4heat/ray_runner.py b/heat/cw4heat/ray_runner.py index 4d7758220a..de3854de91 100644 --- a/heat/cw4heat/ray_runner.py +++ b/heat/cw4heat/ray_runner.py @@ -28,13 +28,12 @@ from mpi4py import MPI import ray +import ray.cloudpickle from ray.services import get_node_ip_address as getIP from .distributor import Distributor import os from os import getenv, getpid -_actors = {} - @ray.remote class RayActor: @@ -77,59 +76,92 @@ def start(self, initImpl=None): self._distributor.start(doExit=False, initImpl=initImpl) -def _initActors(initImpl=None): +def _pub(x): + return ray.cloudpickle.dumps((getIP(), ray.put(x))) + + +def _ray_publish(id, distributor): """ - Initalize our (SPMD) actors, one per node in ray cluster and make them - connect through MPI. - Controller (calling process) gets connection config and then - passes it to init function on each actor. + Return ray ObjRef for obj to be used in ray. """ - global _actors - if not ray.is_initialized(): - ray.init(address="auto") - ppn = int(getenv("CW4H_PPN", default="1")) - assert ppn >= 1 - my_ip = getIP() - # first create one actor per node in the ray cluster - for node in ray.cluster_resources(): - if "node" in node: - name = node.split(":")[-1] - _ppn = ppn - 1 if name == my_ip else ppn - if _ppn >= 1: - for i in range(_ppn): - _actors[name] = RayActor.options(resources={node: 1}).remote( - name - ) # runtime_env={"I_MPI_FABRICS": "ofi"} - nw = len(_actors) # number of workers - comm = MPI.COMM_SELF - # Get Port for MPI connections - port = MPI.Open_port(MPI.INFO_NULL) - # make all actors connect - x = [_actors[a].connect.remote(port, nw) for a in _actors] - for i in range(nw): - # connect to next worker (collectively) - intercomm = comm.Accept(port) - # merge communicators - comm = intercomm.Merge(0) - intercomm.Disconnect() - # wait for connections to be established - _ = ray.get(x) - x = [_actors[a].start.remote(initImpl) for a in _actors] - print("All actors started", flush=True) - # setup our distributor - return (comm, Distributor(comm), x) + vals = distributor.publishParts(id, "larray", _pub) + return [ray.cloudpickle.loads(x) for x in vals] + +def _ray_get(x): + return ray.get(x) -def _finiActors(): + +class RayRunner: """ - Finalize Ray Actors: killing actor processes. + Using ray to launch ranks by using ray actors. """ - global _actors - if ray.is_initialized(): - print("Killing actors") - for a in _actors.values(): - ray.kill(a) + + def __init__(self, initImpl=None): + """ + Initalize our (SPMD) actors, one per node in ray cluster and make them + connect through MPI. + Controller (calling process) gets connection config and then + passes it to init function on each actor. + """ + self.publish = _ray_publish + self.get = _ray_get + self._actors = {} + self._init(initImpl) + + def fini(self): + """ + Finalize Ray Actors: killing actor processes. + """ + if ray.is_initialized(): + print("Killing actors") + if self._handles: + ray.get(self._handles) + if self._actors: + for a in self._actors.values(): + ray.kill(a) + + def _init(self, initImpl=None): + if not ray.is_initialized(): + ray.init(address="auto") + ppn = int(getenv("CW4H_PPN", default="1")) + assert ppn >= 1 + my_ip = getIP() + # first create one actor per node in the ray cluster + for node in ray.cluster_resources(): + if "node" in node: + name = node.split(":")[-1] + _ppn = ppn - 1 if name == my_ip else ppn + if _ppn >= 1: + for i in range(_ppn): + self._actors[f"{name}{i}"] = RayActor.options(resources={node: 1}).remote( + name + ) # runtime_env={"I_MPI_FABRICS": "ofi"} + nw = len(self._actors) # number of workers + self.comm = MPI.COMM_SELF + # Get Port for MPI connections + port = MPI.Open_port(MPI.INFO_NULL) + # make all actors connect + x = [a.connect.remote(port, nw) for a in self._actors.values()] + for i in range(nw): + # connect to next worker (collectively) + intercomm = self.comm.Accept(port) + # merge communicators + self.comm = intercomm.Merge(0) + intercomm.Disconnect() + # wait for connections to be established + _ = ray.get(x) + self._handles = [a.start.remote(initImpl) for a in self._actors.values()] + print("All actors started", flush=True) + # setup our distributor + self.distributor = Distributor(self.comm) + + return self -init = _initActors -fini = _finiActors +def init(initImpl=None): + """ + Return a Ray Runner. + Ray runner will launch actors and connect them throuh MPI. + """ + return RayRunner(initImpl) From 0ea1705c8c588fa16e979f7b35849eac57af76a0 Mon Sep 17 00:00:00 2001 From: Frank Schlimbach Date: Thu, 22 Jul 2021 05:47:57 -0500 Subject: [PATCH 10/22] making location a list --- heat/cw4heat/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/heat/cw4heat/__init__.py b/heat/cw4heat/__init__.py index 1e5c441402..780f33e3f1 100644 --- a/heat/cw4heat/__init__.py +++ b/heat/cw4heat/__init__.py @@ -310,7 +310,7 @@ def __partitioned__(self): # Provide all data as handle/reference futures = _runner.publish(self._handle._id, _runner.distributor) for i, p in enumerate(parts["partitions"].values()): - p["location"] = futures[i][0] + p["location"] = [futures[i][0]] p["data"] = futures[i][1] # set getter parts["get"] = _runner.get From 53fc1587cb192dfc34937d091a0ac39047f1e590 Mon Sep 17 00:00:00 2001 From: Frank Schlimbach Date: Wed, 18 Aug 2021 06:18:03 -0500 Subject: [PATCH 11/22] fixes --- heat/cw4heat/__init__.py | 3 +++ heat/cw4heat/arrayapi.py | 35 ----------------------------------- heat/cw4heat/distributor.py | 25 +++++++++++++++++-------- 3 files changed, 20 insertions(+), 43 deletions(-) diff --git a/heat/cw4heat/__init__.py b/heat/cw4heat/__init__.py index 780f33e3f1..1d07413f73 100644 --- a/heat/cw4heat/__init__.py +++ b/heat/cw4heat/__init__.py @@ -135,9 +135,12 @@ def fini(self): pass c = MPI.COMM_WORLD + if c.size <= 1: + raise Exception("At least 2 ranks required for cw4heat") _runner = MPIRunner(Distributor(c), c) if doStart: _runner.distributor.start(initImpl=_setComm) + atexit.register(fini) else: raise Exception(f"unknown launcher {_launcher}. CW4H_LAUNCHER must be 'mpi', or 'ray'.") diff --git a/heat/cw4heat/arrayapi.py b/heat/cw4heat/arrayapi.py index 785da687c1..40766c6111 100644 --- a/heat/cw4heat/arrayapi.py +++ b/heat/cw4heat/arrayapi.py @@ -100,41 +100,6 @@ "result_type", # (*arrays_and_dtypes) ] -aa_methods = [ - "__abs__", # (self, /) - "__add__", # (self, other, /) - "__and__", # (self, other, /) - "__array_namespace__", # (self, /, *, api_version=None) - "__bool__", # (self, /) - "__dlpack__", # (self, /, *, stream=None) - "__dlpack_device__", # (self, /) - "__eq__", # (self, other, /) - "__float__", # (self, /) - "__floordiv__", # (self, other, /) - "__ge__", # (self, other, /) - "__getitem__", # (self, key, /) - "__gt__", # (self, other, /) - "__int__", # (self, /) - "__invert__", # (self, /) - "__le__", # (self, other, /) - "__len__", # (self, /) - "__lshift__", # (self, other, /) - "__lt__", # (self, other, /) - "__matmul__", # (self, other, /) - "__mod__", # (self, other, /) - "__mul__", # (self, other, /) - "__ne__", # (self, other, /) - "__neg__", # (self, /) - "__or__", # (self, other, /) - "__pos__", # (self, /) - "__pow__", # (self, other, /) - "__rshift__", # (self, other, /) - "__setitem__", # (self, key, value, /) - "__sub__", # (self, other, /) - "__truediv__", # (self, other, /) - "__xor__", # (self, other, /) -] - aa_creators = [ "arange", # (start, /, stop=None, step=1, *, dtype=None, device=None) "asarray", # (obj, /, *, dtype=None, device=None, copy=None) diff --git a/heat/cw4heat/distributor.py b/heat/cw4heat/distributor.py index 961652bf7f..45007810a9 100644 --- a/heat/cw4heat/distributor.py +++ b/heat/cw4heat/distributor.py @@ -154,7 +154,7 @@ def start(self, doExit=True, initImpl=None): break else: raise Exception("Worker received unknown tag") - # MPI.Finalize() + MPI.Finalize() if doExit: sys.exit() return False @@ -262,6 +262,18 @@ def get(self): """ return self._obj + def __getstate__(self): + # we do not pickle the actual object + return {"_id": self._id} + + def __setstate__(self, state): + self.__dict__.update(state) + self._obj = None + + +# here we store objects that are input dependences to tasks +_s_pms = {} + class _RemoteTask: """ @@ -286,27 +298,24 @@ def __init__(self, task, deps, numout): else: self._handle = tuple(Handle() for _ in range(self._nOut)) - # here we store objects that are input dependences to tasks - s_pms = {} - def go(self): """ Actually run the task. """ # print(self._task._func) - deps = [_RemoteTask.s_pms[i] for i in self._depIds] + deps = [_s_pms[i] for i in self._depIds] res = self._task.run(deps) if self._nOut == 1: self._handle.set(res) - _RemoteTask.s_pms[self._handle.getId()] = res + _s_pms[self._handle.getId()] = res else: i = 0 for h in self._handle: h.set(res[i]) - _RemoteTask.s_pms[h.getId()] = res[i] + _s_pms[h.getId()] = res[i] i += 1 return self._handle @staticmethod def getVal(id): - return _RemoteTask.s_pms[id] + return _s_pms[id] From 40f7f444cf47e0568be7ae9085654098842a2af6 Mon Sep 17 00:00:00 2001 From: Frank Schlimbach Date: Wed, 18 Aug 2021 06:40:08 -0500 Subject: [PATCH 12/22] adding reset() --- heat/cw4heat/__init__.py | 8 ++++++++ heat/cw4heat/distributor.py | 39 +++++++++++++++++++++++++++++++++---- 2 files changed, 43 insertions(+), 4 deletions(-) diff --git a/heat/cw4heat/__init__.py b/heat/cw4heat/__init__.py index 1d07413f73..2ead4e216e 100644 --- a/heat/cw4heat/__init__.py +++ b/heat/cw4heat/__init__.py @@ -156,6 +156,14 @@ def fini(): _runner.fini() +def reset(): + """ + Reset all internal state. + Distributed objects created before calling reset cannot be used afterwards. + """ + _runner.distributor.reset() + + class cw4h: """ Contextmanager to establish controller-worker regions within SPMD runs. diff --git a/heat/cw4heat/distributor.py b/heat/cw4heat/distributor.py index 45007810a9..7fe8550f83 100644 --- a/heat/cw4heat/distributor.py +++ b/heat/cw4heat/distributor.py @@ -64,6 +64,10 @@ GET = 3 GETPART = 4 PUBPART = 5 +RESET = 6 + +# here we store objects that are input dependences to tasks +_s_pms = {} class _TaskQueue: @@ -148,6 +152,11 @@ def start(self, doExit=True, initImpl=None): val = _RemoteTask.getVal(header[1]) attr = header[3](getattr(val, header[2])) self._comm.gather(attr, root=0) + elif header[0] == RESET: + print("reset", flush=True) + _RemoteTask.reset() + self._tQueue.clear() + Handle.reset() elif header[0] == END: done = True self._comm.Barrier() @@ -159,9 +168,20 @@ def start(self, doExit=True, initImpl=None): sys.exit() return False + def reset(self): + """ + Reset task queues. + """ + assert self._comm.rank == 0 + header = [RESET] + header = self._comm.bcast(header, 0) + _RemoteTask.reset() + self._tQueue.clear() + Handle.reset() + def fini(self): """ - Control sends end-tag. Workers will sys.exit. + Controler sends end-tag. Workers will sys.exit. """ if MPI.Is_initialized() and self._comm.rank == 0: header = [END] @@ -270,9 +290,12 @@ def __setstate__(self, state): self.__dict__.update(state) self._obj = None - -# here we store objects that are input dependences to tasks -_s_pms = {} + @staticmethod + def reset(): + """ + Reset internal state. + """ + Handle._nextId = 1 class _RemoteTask: @@ -319,3 +342,11 @@ def go(self): @staticmethod def getVal(id): return _s_pms[id] + + @staticmethod + def reset(): + """ + Reset internal state. + """ + global _s_pms + _s_pms = {} From e7e439864a7a09677fbe37e8832bc651eb1de5ac Mon Sep 17 00:00:00 2001 From: Frank Schlimbach Date: Wed, 18 Aug 2021 06:42:37 -0500 Subject: [PATCH 13/22] using clear() --- heat/cw4heat/distributor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/heat/cw4heat/distributor.py b/heat/cw4heat/distributor.py index 7fe8550f83..dc8713c3e7 100644 --- a/heat/cw4heat/distributor.py +++ b/heat/cw4heat/distributor.py @@ -349,4 +349,4 @@ def reset(): Reset internal state. """ global _s_pms - _s_pms = {} + _s_pms.clear() From d659c2ebddeae915e1635f68b7c9e97d77259e12 Mon Sep 17 00:00:00 2001 From: Frank Schlimbach Date: Fri, 20 Aug 2021 06:32:04 -0500 Subject: [PATCH 14/22] adding dot --- heat/cw4heat/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/heat/cw4heat/__init__.py b/heat/cw4heat/__init__.py index 2ead4e216e..668ac0b85a 100644 --- a/heat/cw4heat/__init__.py +++ b/heat/cw4heat/__init__.py @@ -372,7 +372,7 @@ def __getattr__(self, attr): # (lists taken from list of methods in array-API) # Again, we simply make lambdas which submit appropriate Tasks -fixme_funcs = ["load_csv", "array", "triu", "copy", "repeat"] +fixme_funcs = ["load_csv", "array", "triu", "copy", "repeat", "dot"] for func in aa_tlfuncs + fixme_funcs: if func == "meshgrid": exec( From 1b0a4bf4ed69c9792eec36ec17704773579b3070 Mon Sep 17 00:00:00 2001 From: Frank Schlimbach Date: Wed, 1 Sep 2021 04:34:28 -0500 Subject: [PATCH 15/22] quick workaround to have __localop in cw4heat --- heat/cw4heat/__init__.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/heat/cw4heat/__init__.py b/heat/cw4heat/__init__.py index 668ac0b85a..1ded5dcc56 100644 --- a/heat/cw4heat/__init__.py +++ b/heat/cw4heat/__init__.py @@ -135,8 +135,8 @@ def fini(self): pass c = MPI.COMM_WORLD - if c.size <= 1: - raise Exception("At least 2 ranks required for cw4heat") + # if c.size <= 1: + # raise Exception("At least 2 ranks required for cw4heat") _runner = MPIRunner(Distributor(c), c) if doStart: _runner.distributor.start(initImpl=_setComm) @@ -392,6 +392,14 @@ def __getattr__(self, attr): ) +def __local_op_normalized(a, f): + return impl.core._operations.__local_op(f, a) + + +def __local_op(*args, **kwargs): + return DDParray(_submit("__local_op_normalized", args, kwargs)) + + # Here we define data types and constants for attr in aa_datatypes + aa_constants: if hasattr(impl, attr): From 06be72ea796fe84534d3a58d5f5030874fabdd9b Mon Sep 17 00:00:00 2001 From: Frank Schlimbach Date: Wed, 1 Sep 2021 09:45:32 -0500 Subject: [PATCH 16/22] fixed GC --- heat/cw4heat/__init__.py | 7 +++++++ heat/cw4heat/distributor.py | 8 +++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/heat/cw4heat/__init__.py b/heat/cw4heat/__init__.py index 1ded5dcc56..6b5e038a44 100644 --- a/heat/cw4heat/__init__.py +++ b/heat/cw4heat/__init__.py @@ -257,6 +257,10 @@ def _setitem_normalized(self, value, key): self.__setitem__(key, value) +def _release(hdl): + hdl._release() + + ####################################################################### # Our array is just a wrapper. Actual array is stored as a handle to # allow delayed execution. @@ -287,6 +291,9 @@ def __init__(self, handle): # """ # return _runner.distributor.get(self._handle) + def __del__(self): + _submit("_release", (self._handle,), {}) + def __getitem__(self, key): """ Return item/slice as array. diff --git a/heat/cw4heat/distributor.py b/heat/cw4heat/distributor.py index dc8713c3e7..abbaa05fee 100644 --- a/heat/cw4heat/distributor.py +++ b/heat/cw4heat/distributor.py @@ -153,7 +153,6 @@ def start(self, doExit=True, initImpl=None): attr = header[3](getattr(val, header[2])) self._comm.gather(attr, root=0) elif header[0] == RESET: - print("reset", flush=True) _RemoteTask.reset() self._tQueue.clear() Handle.reset() @@ -290,6 +289,13 @@ def __setstate__(self, state): self.__dict__.update(state) self._obj = None + def _release(self): + """ + Release handle from dict to make it available for GC. + """ + global _s_pms + del _s_pms[self._id] + @staticmethod def reset(): """ From df6a193526c4ed266f5789b5f9936ebbd364955a Mon Sep 17 00:00:00 2001 From: Frank Schlimbach Date: Wed, 1 Sep 2021 11:08:53 -0500 Subject: [PATCH 17/22] fixing GC issues --- heat/cw4heat/__init__.py | 2 +- heat/cw4heat/distributor.py | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/heat/cw4heat/__init__.py b/heat/cw4heat/__init__.py index 6b5e038a44..5a8ce3fd4f 100644 --- a/heat/cw4heat/__init__.py +++ b/heat/cw4heat/__init__.py @@ -292,7 +292,7 @@ def __init__(self, handle): # return _runner.distributor.get(self._handle) def __del__(self): - _submit("_release", (self._handle,), {}) + _submit("_release", (self._handle,), {}, numout=0) def __getitem__(self, key): """ diff --git a/heat/cw4heat/distributor.py b/heat/cw4heat/distributor.py index abbaa05fee..1188215427 100644 --- a/heat/cw4heat/distributor.py +++ b/heat/cw4heat/distributor.py @@ -155,7 +155,7 @@ def start(self, doExit=True, initImpl=None): elif header[0] == RESET: _RemoteTask.reset() self._tQueue.clear() - Handle.reset() + # Handle._reset() elif header[0] == END: done = True self._comm.Barrier() @@ -176,7 +176,7 @@ def reset(self): header = self._comm.bcast(header, 0) _RemoteTask.reset() self._tQueue.clear() - Handle.reset() + # Handle.reset() def fini(self): """ @@ -294,10 +294,11 @@ def _release(self): Release handle from dict to make it available for GC. """ global _s_pms - del _s_pms[self._id] + if self._id in _s_pms: + del _s_pms[self._id] @staticmethod - def reset(): + def _reset(): """ Reset internal state. """ @@ -324,8 +325,10 @@ def __init__(self, task, deps, numout): # or the result is not a global object. if self._nOut == 1: self._handle = Handle() - else: + elif self._nOut > 0: self._handle = tuple(Handle() for _ in range(self._nOut)) + else: + self._handle = None def go(self): """ @@ -337,7 +340,7 @@ def go(self): if self._nOut == 1: self._handle.set(res) _s_pms[self._handle.getId()] = res - else: + elif self._nOut > 0: i = 0 for h in self._handle: h.set(res[i]) From 9a68af9788c5de1a69580a6da08baa8535535508 Mon Sep 17 00:00:00 2001 From: Frank Schlimbach Date: Thu, 2 Sep 2021 05:13:40 -0500 Subject: [PATCH 18/22] quick hack to have random.normal --- heat/cw4heat/__init__.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/heat/cw4heat/__init__.py b/heat/cw4heat/__init__.py index 5a8ce3fd4f..6ee5cfa15a 100644 --- a/heat/cw4heat/__init__.py +++ b/heat/cw4heat/__init__.py @@ -418,6 +418,18 @@ def __local_op(*args, **kwargs): ####################################################################### # quick hack to provide random features ####################################################################### + +if not hasattr(impl.random, "normal"): + import torch + + def _normal(mean, std, size): + ret = impl.empty(size) + torch.normal(mean, std, ret.lshape, out=ret.larray) + return ret + + impl.random.normal = _normal + + class random: """ Wrapper class for random. From b9c9315ee561ea436cccc15ec25ca7da631f7208 Mon Sep 17 00:00:00 2001 From: Frank Schlimbach Date: Tue, 21 Sep 2021 04:16:45 -0700 Subject: [PATCH 19/22] allow barrier after go --- heat/cw4heat/__init__.py | 10 +++++++++- heat/cw4heat/distributor.py | 10 +++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/heat/cw4heat/__init__.py b/heat/cw4heat/__init__.py index 6ee5cfa15a..0c26eae199 100644 --- a/heat/cw4heat/__init__.py +++ b/heat/cw4heat/__init__.py @@ -120,6 +120,7 @@ def init(doStart=True, ctxt=False): _runner = ray_init(_setComm) _runner.distributor.start(initImpl=_setComm) + atexit.register(fini) elif _launcher == "mpi": class MPIRunner: @@ -134,7 +135,7 @@ def __init__(self, dist, comm): def fini(self): pass - c = MPI.COMM_WORLD + c = MPI.COMM_WORLD.Dup() # if c.size <= 1: # raise Exception("At least 2 ranks required for cw4heat") _runner = MPIRunner(Distributor(c), c) @@ -164,6 +165,13 @@ def reset(): _runner.distributor.reset() +def sync(): + """ + Trigger all computation. + """ + _runner.distributor.go(True) + + class cw4h: """ Contextmanager to establish controller-worker regions within SPMD runs. diff --git a/heat/cw4heat/distributor.py b/heat/cw4heat/distributor.py index 1188215427..f263bbee93 100644 --- a/heat/cw4heat/distributor.py +++ b/heat/cw4heat/distributor.py @@ -143,6 +143,8 @@ def start(self, doExit=True, initImpl=None): assert False elif header[0] == GO: self._tQueue.go() + if header[1]: + self._comm.Barrier() elif header[0] == GETPART: if self._comm.rank == header[1]: val = _RemoteTask.getVal(header[2]) @@ -188,7 +190,7 @@ def fini(self): self._comm.Barrier() # MPI.Finalize() - def go(self): + def go(self, barrier=False): """ Trigger execution of all tasks which are still in flight. """ @@ -196,9 +198,11 @@ def go(self): if self._tQueue.len(): header = [TASK, self._tQueue._taskQueue] _, _ = self._comm.bcast(header, 0) - header = [GO] + header = [GO, barrier] _ = self._comm.bcast(header, 0) self._tQueue.go() + if barrier: + self._comm.Barrier() def get(self, handle): """ @@ -334,7 +338,7 @@ def go(self): """ Actually run the task. """ - # print(self._task._func) + #print(self._task._func) deps = [_s_pms[i] for i in self._depIds] res = self._task.run(deps) if self._nOut == 1: From af939d57c84431d9fb670f751d01c3d299e66355 Mon Sep 17 00:00:00 2001 From: Frank Schlimbach Date: Wed, 22 Sep 2021 09:43:57 -0500 Subject: [PATCH 20/22] allow spmd mode in cw4heat --- heat/cw4heat/__init__.py | 22 ++++++----- heat/cw4heat/arrayapi.py | 12 ++++++ heat/cw4heat/distributor.py | 73 ++++++++++++++++++++++++++----------- heat/cw4heat/ray_runner.py | 2 +- setup.py | 2 +- 5 files changed, 78 insertions(+), 33 deletions(-) diff --git a/heat/cw4heat/__init__.py b/heat/cw4heat/__init__.py index 0c26eae199..2e92f4e874 100644 --- a/heat/cw4heat/__init__.py +++ b/heat/cw4heat/__init__.py @@ -111,8 +111,8 @@ def init(doStart=True, ctxt=False): if _runner is not None: return - _launcher = getenv("CW4H_LAUNCHER", default="mpi").lower() - + _launcher = getenv("CW4H_LAUNCHER", default="spmd").lower() + print("launcher:", _launcher) # atexit.register(fini) if _launcher == "ray": assert ctxt is False, "Controller-worker context is useless with ray launcher." @@ -121,7 +121,8 @@ def init(doStart=True, ctxt=False): _runner = ray_init(_setComm) _runner.distributor.start(initImpl=_setComm) atexit.register(fini) - elif _launcher == "mpi": + else: + c = MPI.COMM_WORLD.Dup() class MPIRunner: def __init__(self, dist, comm): @@ -135,15 +136,18 @@ def __init__(self, dist, comm): def fini(self): pass - c = MPI.COMM_WORLD.Dup() - # if c.size <= 1: - # raise Exception("At least 2 ranks required for cw4heat") - _runner = MPIRunner(Distributor(c), c) + _runner = MPIRunner(Distributor(c, _launcher == "spmd"), c) + + if _launcher == "spmd": + _runner.publish = None + elif _launcher != "mpi": + raise Exception( + f"unknown launcher {_launcher}. CW4H_LAUNCHER must be 'mpi', 'spmd', or 'ray'." + ) + if doStart: _runner.distributor.start(initImpl=_setComm) atexit.register(fini) - else: - raise Exception(f"unknown launcher {_launcher}. CW4H_LAUNCHER must be 'mpi', or 'ray'.") def fini(): diff --git a/heat/cw4heat/arrayapi.py b/heat/cw4heat/arrayapi.py index 40766c6111..827d65b265 100644 --- a/heat/cw4heat/arrayapi.py +++ b/heat/cw4heat/arrayapi.py @@ -24,6 +24,7 @@ "aa_arrayfuncs", "aa_methods_s", "aa_methods_a", + "aa_manips", ] aa_creators = [ @@ -159,6 +160,16 @@ aa_methods = aa_methods_s + aa_methods_a +aa_manips = [ + "concat", # (arrays, /, *, axis=0) + "expand_dims", # (x, /, *, axis) + "flip", # (x, /, *, axis=None) + "reshape", # (x, /, shape) + "roll", # (x, /, shift, *, axis=None) + "squeeze", # (x, /, axis) + "stack", # (arrays, /, *, axis=0) +] + aa_elementwises = [ "abs", # (x, /) "acos", # (x, /) @@ -247,6 +258,7 @@ + aa_sorting + aa_set + aa_utility + + aa_manips ) aa_tldir = aa_tlfuncs + aa_datatypes + aa_constants aa_arrayfuncs = aa_methods + aa_inplace_operators + aa_reflected_operators diff --git a/heat/cw4heat/distributor.py b/heat/cw4heat/distributor.py index f263bbee93..054df4e066 100644 --- a/heat/cw4heat/distributor.py +++ b/heat/cw4heat/distributor.py @@ -110,22 +110,36 @@ class Distributor: Work-items are treated as dependent tasks. """ - def __init__(self, comm=MPI.COMM_WORLD): + def __init__(self, comm, spmd=True): """ Init distributor, optionally accepts MPI communicator. """ self._comm = comm + self._spmd = spmd # Our queue of tasks. self._tQueue = _TaskQueue() - def start(self, doExit=True, initImpl=None): + self.start = self._start if spmd else self._cw_start + self.reset = self._reset if spmd else self._cw_reset + self.fini = self._fini if spmd else self._cw_fini + self.go = self._go if spmd else self._cw_go + self.get = self._get if spmd else self._cw_get + self.getPart = self._getPart if spmd else self._cw_getPart + if not spmd: + self.publishParts = self._cw_publishParts + + def _start(self, doExit=True, initImpl=None): + if initImpl: + initImpl(self._comm) + + def _cw_start(self, doExit=True, initImpl=None): """ Start distribution engine. Controller inits and returns. Workers enter recv-loop and exit program when fini is called. """ - if initImpl: - initImpl(self._comm) + self._start(doExit, initImpl) + if self._comm.rank == 0: return True else: @@ -169,28 +183,38 @@ def start(self, doExit=True, initImpl=None): sys.exit() return False - def reset(self): + def _reset(self): + _RemoteTask.reset() + self._tQueue.clear() + # Handle.reset() + + def _cw_reset(self): """ Reset task queues. """ assert self._comm.rank == 0 header = [RESET] header = self._comm.bcast(header, 0) - _RemoteTask.reset() - self._tQueue.clear() - # Handle.reset() + self._reset() - def fini(self): + def _fini(self): + self._comm.Barrier() + + def _cw_fini(self): """ Controler sends end-tag. Workers will sys.exit. """ if MPI.Is_initialized() and self._comm.rank == 0: header = [END] header = self._comm.bcast(header, 0) + self._fini() + + def _go(self, barrier=False): + self._tQueue.go() + if barrier: self._comm.Barrier() - # MPI.Finalize() - def go(self, barrier=False): + def _cw_go(self, barrier=False): """ Trigger execution of all tasks which are still in flight. """ @@ -200,11 +224,13 @@ def go(self, barrier=False): _, _ = self._comm.bcast(header, 0) header = [GO, barrier] _ = self._comm.bcast(header, 0) - self._tQueue.go() - if barrier: - self._comm.Barrier() + self._go(barrier) - def get(self, handle): + def _get(self, handle): + self.go() + return handle.get() + + def _cw_get(self, handle): """ Get actualy value from handle. Requires communication. @@ -212,16 +238,19 @@ def get(self, handle): Does not work for arrays (yet). """ assert self._comm.rank == 0 - self.go() - return handle.get() + return self._get(handle) + + def _getPart(self, handle, attr): + assert handle.rank == self._comm.rank + val = _RemoteTask.getVal(handle.id) + return getattr(val, attr) - def getPart(self, handle, attr): + def _cw_getPart(self, handle, attr): """ Get local raw partition data for given handle. """ if handle.rank == self._comm.rank: - val = _RemoteTask.getVal(handle.id) - val = getattr(val, attr) + val = self._getPart(handle, attr) else: # FIXME what if left CW-context (SPMD mode) ? assert self._comm.rank == 0 @@ -230,7 +259,7 @@ def getPart(self, handle, attr): val = self._comm.recv(source=handle.rank, tag=GETPART) return val - def publishParts(self, id, attr, publish): + def _cw_publishParts(self, id, attr, publish): """ Publish array's attribute for each partition and gather handles on root. """ @@ -338,7 +367,7 @@ def go(self): """ Actually run the task. """ - #print(self._task._func) + # print(self._task._func) deps = [_s_pms[i] for i in self._depIds] res = self._task.run(deps) if self._nOut == 1: diff --git a/heat/cw4heat/ray_runner.py b/heat/cw4heat/ray_runner.py index de3854de91..a3de73456d 100644 --- a/heat/cw4heat/ray_runner.py +++ b/heat/cw4heat/ray_runner.py @@ -154,7 +154,7 @@ def _init(self, initImpl=None): self._handles = [a.start.remote(initImpl) for a in self._actors.values()] print("All actors started", flush=True) # setup our distributor - self.distributor = Distributor(self.comm) + self.distributor = Distributor(self.comm, False) return self diff --git a/setup.py b/setup.py index 740f6660f7..00f4e6c414 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ install_requires=[ "mpi4py>=3.0.0", "numpy>=1.13.0", - "torch>=1.7.0, <1.9", + "torch>=1.7.0, <=1.9", "scipy>=0.14.0", "pillow>=6.0.0", "torchvision>=0.8.0", From b7afb5706ea2cb0b23473e01b09e387014752ece Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 1 Jun 2022 07:30:17 +0000 Subject: [PATCH 21/22] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .github/ISSUE_TEMPLATE/bug_report.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index e32a87a384..aef16d1152 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -25,7 +25,7 @@ body: id: logs attributes: label: Error message or erroneous outcome - description: Please copy and paste your error. + description: Please copy and paste your error. render: shell - type: dropdown id: version From 68dcc20e913904debf10883e9a63a18ddc2e26df Mon Sep 17 00:00:00 2001 From: Claudia Comito <39374113+ClaudiaComito@users.noreply.github.com> Date: Wed, 1 Jun 2022 09:42:20 +0200 Subject: [PATCH 22/22] Add type hints to `create_partition_interface` --- heat/core/dndarray.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/heat/core/dndarray.py b/heat/core/dndarray.py index 42932abd23..fb43d1c96c 100644 --- a/heat/core/dndarray.py +++ b/heat/core/dndarray.py @@ -9,7 +9,7 @@ from inspect import stack from mpi4py import MPI from pathlib import Path -from typing import List, Union, Tuple, TypeVar, Optional +from typing import List, Union, Tuple, TypeVar, Optional, Dict warnings.simplefilter("always", ResourceWarning) @@ -599,7 +599,7 @@ def create_lshape_map(self, force_check: bool = False) -> torch.Tensor: self.__lshape_map = lshape_map return lshape_map.clone() - def create_partition_interface(self, no_data=False): + def create_partition_interface(self, no_data: bool = False) -> Dict: """ Create a partition interface in line with the DPPY proposal. This is subject to change. The intention of this to facilitate the usage of a general format for the referencing of @@ -644,9 +644,6 @@ def create_partition_interface(self, no_data=False): }, 'locals': [(rank, 0, 0)], } - Returns - ------- - dictionary containing the partition interface as shown above. """ # sp = lshape_map = self.create_lshape_map()