From 633df41cc6eed717a66a680f09a21eec7371d273 Mon Sep 17 00:00:00 2001 From: Diego Tavares Date: Tue, 19 Nov 2024 14:54:47 -0800 Subject: [PATCH] [cuegui/pycue] Fix Local Booking widget (#1581) This feature has been inactive on opencue since the beginning. Changes to port from Ice to Grpc were not properly tested and this widget never really worked. Local Rendering if a feature that allows users to claim the ownership of a host (their workstation) and assign a job to execute frames on that host. This is very useful in situations where the farm is busy but user workstations aren't. To access the feature, right-click on a job/layer and select "Use local cores..". On the opened widget, the user can select how much cores, memory and gpu to allocate to execute cue jobs. When confirmed, cuebot will start dispatching frames to that host. --------- Signed-off-by: Diego Tavares --- VERSION.in | 2 +- cuegui/cuegui/LocalBooking.py | 77 ++++++++++++++++------ proto/renderPartition.proto | 2 +- pycue/opencue/wrappers/frame.py | 19 +++--- pycue/opencue/wrappers/host.py | 5 +- pycue/opencue/wrappers/layer.py | 19 +++--- pycue/opencue/wrappers/render_partition.py | 42 ++++++++++++ pycue/tests/wrappers/host_test.py | 2 +- 8 files changed, 126 insertions(+), 42 deletions(-) create mode 100644 pycue/opencue/wrappers/render_partition.py diff --git a/VERSION.in b/VERSION.in index b123147e2..5625e59da 100644 --- a/VERSION.in +++ b/VERSION.in @@ -1 +1 @@ -1.1 \ No newline at end of file +1.2 diff --git a/cuegui/cuegui/LocalBooking.py b/cuegui/cuegui/LocalBooking.py index b3c905a1a..7b8eced77 100644 --- a/cuegui/cuegui/LocalBooking.py +++ b/cuegui/cuegui/LocalBooking.py @@ -63,7 +63,7 @@ def __init__(self, target, parent=None): try: owner = opencue.api.getOwner(os.environ["USER"]) for host in owner.getHosts(): - if host.data.lockState != opencue.api.host_pb2.OPEN: + if host.lockState() != opencue.api.host_pb2.OPEN: self.__select_host.addItem(host.data.name) except opencue.exception.CueException: pass @@ -104,6 +104,20 @@ def __init__(self, target, parent=None): self.__text_num_mem.setValue(4) self.__text_num_mem.setSuffix("GB") + self.__num_gpu_mem = QtWidgets.QSlider(self) + self.__num_gpu_mem.setValue(0) + self.__num_gpu_mem.setMaximum(256) + self.__num_gpu_mem.setOrientation(QtCore.Qt.Horizontal) + self.__num_gpu_mem.setTickPosition(QtWidgets.QSlider.TicksBelow) + self.__num_gpu_mem.setTickInterval(1) + + self.__text_num_gpu_mem = QtWidgets.QSpinBox(self) + self.__text_num_gpu_mem.setValue(0) + self.__text_num_gpu_mem.setSuffix("GB") + + self.__num_gpus = QtWidgets.QLineEdit(self) + self.__num_gpus.setText("0") + # # Next layout is if the deed is in use. # @@ -115,7 +129,7 @@ def __init__(self, target, parent=None): self.__run_mem = QtWidgets.QSlider(self) self.__run_mem.setValue(4) - self.__num_mem.setMaximum(256) + self.__run_mem.setMaximum(256) self.__run_mem.setOrientation(QtCore.Qt.Horizontal) self.__run_mem.setTickPosition(QtWidgets.QSlider.TicksBelow) self.__run_mem.setTickInterval(1) @@ -136,6 +150,8 @@ def __init__(self, target, parent=None): self.__num_frames.valueChanged.connect(self.__calculateCores) self.__run_mem.valueChanged.connect(self.__text_run_mem.setValue) self.__text_run_mem.valueChanged.connect(self.__run_mem.setValue) + self.__num_gpu_mem.valueChanged.connect(self.__text_num_gpu_mem.setValue) + self.__text_num_gpu_mem.valueChanged.connect(self.__num_gpu_mem.setValue) # pylint: enable=no-member self.layout().addWidget(QtWidgets.QLabel("Target Host:")) @@ -154,10 +170,16 @@ def __init__(self, target, parent=None): layout.addWidget(self.__num_cores, 3, 1) layout.addWidget(self.__frame_warn, 3, 2, 1, 2) - layout.addWidget(QtWidgets.QLabel("Memory (GB): "), 4, 0) + layout.addWidget(QtWidgets.QLabel("GPU Cores: "), 4, 0) + layout.addWidget(self.__num_gpus, 4, 1) + + layout.addWidget(QtWidgets.QLabel("Memory (GB): "), 5, 0) + layout.addWidget(self.__num_mem, 5, 1, 1, 2) + layout.addWidget(self.__text_num_mem, 5, 3) - layout.addWidget(self.__num_mem, 4, 1, 1, 2) - layout.addWidget(self.__text_num_mem, 4, 3) + layout.addWidget(QtWidgets.QLabel("GPU Memory (GB): "), 6, 0) + layout.addWidget(self.__num_gpu_mem, 6, 1, 1, 2) + layout.addWidget(self.__text_num_gpu_mem, 6, 3) # # Layout 2 @@ -208,24 +230,32 @@ def __host_changed(self, hostname): return host = opencue.api.findHost(str(hostname)) try: - rp = [r for r in host.getRenderPartitions() if r.job == self.jobName] + rp = [r for r in host.getRenderPartitions() if r.data.job == self.jobName] if rp: rp = rp[0] self.__stack.setCurrentIndex(1) self.__btn_clear.setText("Clear") self.__btn_clear.setDisabled(False) - self.__run_cores.setRange(1, int(host.data.idleCores) + rp.maxCores // 100) - self.__run_cores.setValue(rp.maxCores // 100) - self.__run_mem.setRange(1, int(host.data.totalMemory / 1024 / 1024)) - self.__run_mem.setValue(int(rp.maxMemory / 1024 / 1024)) + self.__run_cores.setRange(1, int(host.data.idle_cores) + rp.data.max_cores // 100) + self.__run_cores.setValue(rp.data.max_cores // 100) + self.__run_mem.setRange(1, int(host.data.total_memory / 1024 / 1024)) + self.__run_mem.setValue(int(rp.data.max_memory / 1024 / 1024)) else: self.__stack.setCurrentIndex(0) - self.__num_frames.setRange(1, host.data.idleCores) - self.__num_threads.setRange(1, host.data.idleCores) - self.__num_mem.setRange(1, int(host.data.totalMemory / 1024 / 1024)) - self.__num_threads.setRange(1, host.data.idleCores) + self.__num_frames.setRange(1, host.data.idle_cores) + self.__num_threads.setRange(1, host.data.idle_cores) + self.__num_mem.setRange(1, int(host.data.total_memory / 1024 / 1024)) + + # Automatically disable num_gpus field if the host is not reporting GPU + gpu_memory_available = int(host.data.total_gpu_memory / 1024 / 1024) + if gpu_memory_available == 0: + self.__num_gpus.setText("0") + self.__num_gpus.setReadOnly(True) + + self.__num_gpu_mem.setRange(0, gpu_memory_available) + self.__num_threads.setRange(1, host.data.idle_cores) except opencue.exception.CueException as e: list(map(logger.warning, cuegui.Utils.exceptionOutput(e))) @@ -308,7 +338,7 @@ def clearCurrentHost(self): self.__btn_clear.setDisabled(True) host = opencue.api.findHost(str(hostname)) - rp = [r for r in host.getRenderPartitions() if r.job == self.jobName] + rp = [r for r in host.getRenderPartitions() if r.data.job == self.jobName] if rp: rp = rp[0] @@ -318,7 +348,8 @@ def clearCurrentHost(self): for _ in range(0, 10): # pylint: disable=broad-except try: - rp = [r for r in host.getRenderPartitions() if r.job == self.jobName][0] + rp = [r for r in host.getRenderPartitions() + if r.data.job == self.jobName][0] time.sleep(1) except Exception: break @@ -334,16 +365,20 @@ def bookCurrentHost(self): return host = opencue.api.findHost(str(self.__select_host.currentText())) - rp = [r for r in host.getRenderPartitions() if r.job == self.jobName] + rp = [r for r in host.getRenderPartitions() if r.data.job == self.jobName] if rp: # A render partition already exists on this hosts and user is modifying rp[0].setMaxResources(int(self.__run_cores.value() * 100), - int(self.__run_mem.value()) * 1024 * 1024, - 0) + int(self.__run_mem.value()) * 1024 * 1024, + 0, 0) else: self.__target.addRenderPartition( - str(self.__select_host.currentText()), int(self.__num_threads.value()), - int(self.__num_cores.text()), int(self.__num_mem.value() * 1048576), 0) + str(self.__select_host.currentText()), + int(self.__num_threads.value()), + int(self.__num_cores.text()), + int(self.__num_mem.value() * 1048576), + int(self.__num_gpu_mem.value() * 1048576), + int(self.__num_gpus.text())) class LocalBookingDialog(QtWidgets.QDialog): diff --git a/proto/renderPartition.proto b/proto/renderPartition.proto index 29c64e9c2..ced7a7706 100644 --- a/proto/renderPartition.proto +++ b/proto/renderPartition.proto @@ -54,8 +54,8 @@ message RenderPartitionSeq { // -------- Requests & Responses --------] - message RenderPartDeleteRequest { + RenderPartition render_partition = 1; } diff --git a/pycue/opencue/wrappers/frame.py b/pycue/opencue/wrappers/frame.py index d996d51f9..d6da5486e 100644 --- a/pycue/opencue/wrappers/frame.py +++ b/pycue/opencue/wrappers/frame.py @@ -90,7 +90,7 @@ def retry(self): if self.data.state != job_pb2.FrameState.Value('WAITING'): self.stub.Retry(job_pb2.FrameRetryRequest(frame=self.data), timeout=Cuebot.Timeout) - def addRenderPartition(self, hostname, threads, max_cores, num_mem, max_gpu): + def addRenderPartition(self, hostname, threads, max_cores, max_mem, max_gpu_memory, max_gpus): """Adds a render partition to the frame. :type hostname: str @@ -99,10 +99,12 @@ def addRenderPartition(self, hostname, threads, max_cores, num_mem, max_gpu): :param threads: number of threads of the partition :type max_cores: int :param max_cores: max cores enabled for the partition - :type num_mem: int - :param num_mem: amount of memory reserved for the partition - :type max_gpu: int - :param max_gpu: max gpu cores enabled for the partition + :type max_mem: int + :param max_mem: amount of memory reserved for the partition + :type max_gpu_memory: int + :param max_gpu_memory: max gpu memory enabled for the partition + :type max_gpus: int + :param max_gpus: max number of gpus enabled for the partition """ self.stub.AddRenderPartition( job_pb2.FrameAddRenderPartitionRequest( @@ -110,9 +112,10 @@ def addRenderPartition(self, hostname, threads, max_cores, num_mem, max_gpu): host=hostname, threads=threads, max_cores=max_cores, - max_memory=num_mem, - max_gpu=max_gpu, - username=os.getenv("USER", "unknown"))) + max_memory=max_mem, + max_gpu_memory=max_gpu_memory, + username=os.getenv("USER", "unknown"), + max_gpu=max_gpus)) def getWhatDependsOnThis(self): """Returns a list of dependencies that depend directly on this frame. diff --git a/pycue/opencue/wrappers/host.py b/pycue/opencue/wrappers/host.py index b0608dfe1..dbfd9893b 100644 --- a/pycue/opencue/wrappers/host.py +++ b/pycue/opencue/wrappers/host.py @@ -28,6 +28,7 @@ import opencue.wrappers.comment # pylint: disable=cyclic-import import opencue.wrappers.proc +import opencue.wrappers.render_partition class Host(object): @@ -109,8 +110,8 @@ def getRenderPartitions(self): """ response = self.stub.GetRenderPartitions(host_pb2.HostGetRenderPartitionsRequest( host=self.data), timeout=Cuebot.Timeout) - partitionSeq = response.render_partitions - return partitionSeq.render_partitions + return [opencue.wrappers.render_partition.RenderPartition(p) + for p in response.render_partitions.render_partitions] def rebootWhenIdle(self): """Sets the machine to reboot once idle. diff --git a/pycue/opencue/wrappers/layer.py b/pycue/opencue/wrappers/layer.py index acb5ed8d5..264ac8bf3 100644 --- a/pycue/opencue/wrappers/layer.py +++ b/pycue/opencue/wrappers/layer.py @@ -211,7 +211,7 @@ def setTimeoutLLU(self, timeout_llu): layer=self.data, timeout_llu=timeout_llu), timeout=Cuebot.Timeout) - def addRenderPartition(self, hostname, threads, max_cores, num_mem, max_gpu): + def addRenderPartition(self, hostname, threads, max_cores, max_mem, max_gpu_memory, max_gpus): """Adds a render partition to the layer. :type hostname: str @@ -220,19 +220,22 @@ def addRenderPartition(self, hostname, threads, max_cores, num_mem, max_gpu): :param threads: number of threads of the partition :type max_cores: int :param max_cores: max cores enabled for the partition - :type num_mem: int - :param num_mem: amount of memory reserved for the partition - :type max_gpu: int - :param max_gpu: max gpu cores enabled for the partition + :type max_mem: int + :param max_mem: amount of memory reserved for the partition + :type max_gpu_memory: int + :param max_gpu_memory: max gpu memory enabled for the partition + :type max_gpus: int + :param max_gpus: max gpus enabled for the partition """ self.stub.AddRenderPartition( job_pb2.LayerAddRenderPartitionRequest(layer=self.data, host=hostname, threads=threads, max_cores=max_cores, - max_memory=num_mem, - max_gpu=max_gpu, - username=os.getenv("USER", "unknown"))) + max_memory=max_mem, + max_gpu_memory=max_gpu_memory, + username=os.getenv("USER", "unknown"), + max_gpus=max_gpus)) def getWhatDependsOnThis(self): """Gets a list of dependencies that depend directly on this layer. diff --git a/pycue/opencue/wrappers/render_partition.py b/pycue/opencue/wrappers/render_partition.py new file mode 100644 index 000000000..021dad744 --- /dev/null +++ b/pycue/opencue/wrappers/render_partition.py @@ -0,0 +1,42 @@ +# Copyright Contributors to the OpenCue Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module for classes related to Render Partition.""" + +from opencue.compiled_proto import renderPartition_pb2 +from opencue.cuebot import Cuebot + + +class RenderPartition(object): + """This class contains the grpc implementation related to a Task.""" + + def __init__(self, render_partition=None): + self.data = render_partition + self.stub = Cuebot.getStub('renderPartition') + + def delete(self): + """Deletes the render partition.""" + self.stub.Delete(renderPartition_pb2.RenderPartDeleteRequest( + render_partition=self.data), timeout=Cuebot.Timeout) + + + def setMaxResources(self, cores, memory, gpuMemory, gpuCores): + """Deletes the render partition.""" + self.stub.SetMaxResources(renderPartition_pb2.RenderPartSetMaxResourcesRequest( + render_partition=self.data, + cores=cores, + memory=memory, + gpu_memory=gpuMemory, + gpus=gpuCores + ), timeout=Cuebot.Timeout) diff --git a/pycue/tests/wrappers/host_test.py b/pycue/tests/wrappers/host_test.py index 74f406490..21abda36b 100644 --- a/pycue/tests/wrappers/host_test.py +++ b/pycue/tests/wrappers/host_test.py @@ -110,7 +110,7 @@ def testGetRenderPartitions(self, getStubMock): host_pb2.HostGetRenderPartitionsRequest(host=host.data), timeout=mock.ANY) self.assertEqual(len(renderParts), 1) - self.assertEqual(renderParts[0].id, renderPartId) + self.assertEqual(renderParts[0].data.id, renderPartId) def testRebootWhenIdle(self, getStubMock): stubMock = mock.Mock()