Skip to content

Commit

Permalink
[cuegui/pycue] Fix Local Booking widget (#1581)
Browse files Browse the repository at this point in the history
This feature has been inactive on opencue since the beginning. Changes
to port from Ice to Grpc were not properly tested and this widget never
really worked.

Local Rendering if a feature that allows users to claim the ownership of
a host (their workstation) and assign a job to execute frames on that
host. This is very useful in situations where the farm is busy but user
workstations aren't.

To access the feature, right-click on a job/layer and select "Use local
cores..". On the opened widget, the user can select how much cores,
memory and gpu to allocate to execute cue jobs. When confirmed, cuebot
will start dispatching frames to that host.

---------

Signed-off-by: Diego Tavares <[email protected]>
  • Loading branch information
DiegoTavares authored Nov 19, 2024
1 parent 94f7219 commit 633df41
Show file tree
Hide file tree
Showing 8 changed files with 126 additions and 42 deletions.
2 changes: 1 addition & 1 deletion VERSION.in
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.1
1.2
77 changes: 56 additions & 21 deletions cuegui/cuegui/LocalBooking.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def __init__(self, target, parent=None):
try:
owner = opencue.api.getOwner(os.environ["USER"])
for host in owner.getHosts():
if host.data.lockState != opencue.api.host_pb2.OPEN:
if host.lockState() != opencue.api.host_pb2.OPEN:
self.__select_host.addItem(host.data.name)
except opencue.exception.CueException:
pass
Expand Down Expand Up @@ -104,6 +104,20 @@ def __init__(self, target, parent=None):
self.__text_num_mem.setValue(4)
self.__text_num_mem.setSuffix("GB")

self.__num_gpu_mem = QtWidgets.QSlider(self)
self.__num_gpu_mem.setValue(0)
self.__num_gpu_mem.setMaximum(256)
self.__num_gpu_mem.setOrientation(QtCore.Qt.Horizontal)
self.__num_gpu_mem.setTickPosition(QtWidgets.QSlider.TicksBelow)
self.__num_gpu_mem.setTickInterval(1)

self.__text_num_gpu_mem = QtWidgets.QSpinBox(self)
self.__text_num_gpu_mem.setValue(0)
self.__text_num_gpu_mem.setSuffix("GB")

self.__num_gpus = QtWidgets.QLineEdit(self)
self.__num_gpus.setText("0")

#
# Next layout is if the deed is in use.
#
Expand All @@ -115,7 +129,7 @@ def __init__(self, target, parent=None):

self.__run_mem = QtWidgets.QSlider(self)
self.__run_mem.setValue(4)
self.__num_mem.setMaximum(256)
self.__run_mem.setMaximum(256)
self.__run_mem.setOrientation(QtCore.Qt.Horizontal)
self.__run_mem.setTickPosition(QtWidgets.QSlider.TicksBelow)
self.__run_mem.setTickInterval(1)
Expand All @@ -136,6 +150,8 @@ def __init__(self, target, parent=None):
self.__num_frames.valueChanged.connect(self.__calculateCores)
self.__run_mem.valueChanged.connect(self.__text_run_mem.setValue)
self.__text_run_mem.valueChanged.connect(self.__run_mem.setValue)
self.__num_gpu_mem.valueChanged.connect(self.__text_num_gpu_mem.setValue)
self.__text_num_gpu_mem.valueChanged.connect(self.__num_gpu_mem.setValue)
# pylint: enable=no-member

self.layout().addWidget(QtWidgets.QLabel("Target Host:"))
Expand All @@ -154,10 +170,16 @@ def __init__(self, target, parent=None):
layout.addWidget(self.__num_cores, 3, 1)
layout.addWidget(self.__frame_warn, 3, 2, 1, 2)

layout.addWidget(QtWidgets.QLabel("Memory (GB): "), 4, 0)
layout.addWidget(QtWidgets.QLabel("GPU Cores: "), 4, 0)
layout.addWidget(self.__num_gpus, 4, 1)

layout.addWidget(QtWidgets.QLabel("Memory (GB): "), 5, 0)
layout.addWidget(self.__num_mem, 5, 1, 1, 2)
layout.addWidget(self.__text_num_mem, 5, 3)

layout.addWidget(self.__num_mem, 4, 1, 1, 2)
layout.addWidget(self.__text_num_mem, 4, 3)
layout.addWidget(QtWidgets.QLabel("GPU Memory (GB): "), 6, 0)
layout.addWidget(self.__num_gpu_mem, 6, 1, 1, 2)
layout.addWidget(self.__text_num_gpu_mem, 6, 3)

#
# Layout 2
Expand Down Expand Up @@ -208,24 +230,32 @@ def __host_changed(self, hostname):
return
host = opencue.api.findHost(str(hostname))
try:
rp = [r for r in host.getRenderPartitions() if r.job == self.jobName]
rp = [r for r in host.getRenderPartitions() if r.data.job == self.jobName]

if rp:
rp = rp[0]
self.__stack.setCurrentIndex(1)
self.__btn_clear.setText("Clear")
self.__btn_clear.setDisabled(False)
self.__run_cores.setRange(1, int(host.data.idleCores) + rp.maxCores // 100)
self.__run_cores.setValue(rp.maxCores // 100)
self.__run_mem.setRange(1, int(host.data.totalMemory / 1024 / 1024))
self.__run_mem.setValue(int(rp.maxMemory / 1024 / 1024))
self.__run_cores.setRange(1, int(host.data.idle_cores) + rp.data.max_cores // 100)
self.__run_cores.setValue(rp.data.max_cores // 100)
self.__run_mem.setRange(1, int(host.data.total_memory / 1024 / 1024))
self.__run_mem.setValue(int(rp.data.max_memory / 1024 / 1024))

else:
self.__stack.setCurrentIndex(0)
self.__num_frames.setRange(1, host.data.idleCores)
self.__num_threads.setRange(1, host.data.idleCores)
self.__num_mem.setRange(1, int(host.data.totalMemory / 1024 / 1024))
self.__num_threads.setRange(1, host.data.idleCores)
self.__num_frames.setRange(1, host.data.idle_cores)
self.__num_threads.setRange(1, host.data.idle_cores)
self.__num_mem.setRange(1, int(host.data.total_memory / 1024 / 1024))

# Automatically disable num_gpus field if the host is not reporting GPU
gpu_memory_available = int(host.data.total_gpu_memory / 1024 / 1024)
if gpu_memory_available == 0:
self.__num_gpus.setText("0")
self.__num_gpus.setReadOnly(True)

self.__num_gpu_mem.setRange(0, gpu_memory_available)
self.__num_threads.setRange(1, host.data.idle_cores)
except opencue.exception.CueException as e:
list(map(logger.warning, cuegui.Utils.exceptionOutput(e)))

Expand Down Expand Up @@ -308,7 +338,7 @@ def clearCurrentHost(self):
self.__btn_clear.setDisabled(True)
host = opencue.api.findHost(str(hostname))

rp = [r for r in host.getRenderPartitions() if r.job == self.jobName]
rp = [r for r in host.getRenderPartitions() if r.data.job == self.jobName]
if rp:
rp = rp[0]

Expand All @@ -318,7 +348,8 @@ def clearCurrentHost(self):
for _ in range(0, 10):
# pylint: disable=broad-except
try:
rp = [r for r in host.getRenderPartitions() if r.job == self.jobName][0]
rp = [r for r in host.getRenderPartitions()
if r.data.job == self.jobName][0]
time.sleep(1)
except Exception:
break
Expand All @@ -334,16 +365,20 @@ def bookCurrentHost(self):
return

host = opencue.api.findHost(str(self.__select_host.currentText()))
rp = [r for r in host.getRenderPartitions() if r.job == self.jobName]
rp = [r for r in host.getRenderPartitions() if r.data.job == self.jobName]
if rp:
# A render partition already exists on this hosts and user is modifying
rp[0].setMaxResources(int(self.__run_cores.value() * 100),
int(self.__run_mem.value()) * 1024 * 1024,
0)
int(self.__run_mem.value()) * 1024 * 1024,
0, 0)
else:
self.__target.addRenderPartition(
str(self.__select_host.currentText()), int(self.__num_threads.value()),
int(self.__num_cores.text()), int(self.__num_mem.value() * 1048576), 0)
str(self.__select_host.currentText()),
int(self.__num_threads.value()),
int(self.__num_cores.text()),
int(self.__num_mem.value() * 1048576),
int(self.__num_gpu_mem.value() * 1048576),
int(self.__num_gpus.text()))


class LocalBookingDialog(QtWidgets.QDialog):
Expand Down
2 changes: 1 addition & 1 deletion proto/renderPartition.proto
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ message RenderPartitionSeq {


// -------- Requests & Responses --------]

message RenderPartDeleteRequest {

RenderPartition render_partition = 1;
}

Expand Down
19 changes: 11 additions & 8 deletions pycue/opencue/wrappers/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def retry(self):
if self.data.state != job_pb2.FrameState.Value('WAITING'):
self.stub.Retry(job_pb2.FrameRetryRequest(frame=self.data), timeout=Cuebot.Timeout)

def addRenderPartition(self, hostname, threads, max_cores, num_mem, max_gpu):
def addRenderPartition(self, hostname, threads, max_cores, max_mem, max_gpu_memory, max_gpus):
"""Adds a render partition to the frame.
:type hostname: str
Expand All @@ -99,20 +99,23 @@ def addRenderPartition(self, hostname, threads, max_cores, num_mem, max_gpu):
:param threads: number of threads of the partition
:type max_cores: int
:param max_cores: max cores enabled for the partition
:type num_mem: int
:param num_mem: amount of memory reserved for the partition
:type max_gpu: int
:param max_gpu: max gpu cores enabled for the partition
:type max_mem: int
:param max_mem: amount of memory reserved for the partition
:type max_gpu_memory: int
:param max_gpu_memory: max gpu memory enabled for the partition
:type max_gpus: int
:param max_gpus: max number of gpus enabled for the partition
"""
self.stub.AddRenderPartition(
job_pb2.FrameAddRenderPartitionRequest(
frame=self.data,
host=hostname,
threads=threads,
max_cores=max_cores,
max_memory=num_mem,
max_gpu=max_gpu,
username=os.getenv("USER", "unknown")))
max_memory=max_mem,
max_gpu_memory=max_gpu_memory,
username=os.getenv("USER", "unknown"),
max_gpu=max_gpus))

def getWhatDependsOnThis(self):
"""Returns a list of dependencies that depend directly on this frame.
Expand Down
5 changes: 3 additions & 2 deletions pycue/opencue/wrappers/host.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import opencue.wrappers.comment
# pylint: disable=cyclic-import
import opencue.wrappers.proc
import opencue.wrappers.render_partition


class Host(object):
Expand Down Expand Up @@ -109,8 +110,8 @@ def getRenderPartitions(self):
"""
response = self.stub.GetRenderPartitions(host_pb2.HostGetRenderPartitionsRequest(
host=self.data), timeout=Cuebot.Timeout)
partitionSeq = response.render_partitions
return partitionSeq.render_partitions
return [opencue.wrappers.render_partition.RenderPartition(p)
for p in response.render_partitions.render_partitions]

def rebootWhenIdle(self):
"""Sets the machine to reboot once idle.
Expand Down
19 changes: 11 additions & 8 deletions pycue/opencue/wrappers/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def setTimeoutLLU(self, timeout_llu):
layer=self.data, timeout_llu=timeout_llu),
timeout=Cuebot.Timeout)

def addRenderPartition(self, hostname, threads, max_cores, num_mem, max_gpu):
def addRenderPartition(self, hostname, threads, max_cores, max_mem, max_gpu_memory, max_gpus):
"""Adds a render partition to the layer.
:type hostname: str
Expand All @@ -220,19 +220,22 @@ def addRenderPartition(self, hostname, threads, max_cores, num_mem, max_gpu):
:param threads: number of threads of the partition
:type max_cores: int
:param max_cores: max cores enabled for the partition
:type num_mem: int
:param num_mem: amount of memory reserved for the partition
:type max_gpu: int
:param max_gpu: max gpu cores enabled for the partition
:type max_mem: int
:param max_mem: amount of memory reserved for the partition
:type max_gpu_memory: int
:param max_gpu_memory: max gpu memory enabled for the partition
:type max_gpus: int
:param max_gpus: max gpus enabled for the partition
"""
self.stub.AddRenderPartition(
job_pb2.LayerAddRenderPartitionRequest(layer=self.data,
host=hostname,
threads=threads,
max_cores=max_cores,
max_memory=num_mem,
max_gpu=max_gpu,
username=os.getenv("USER", "unknown")))
max_memory=max_mem,
max_gpu_memory=max_gpu_memory,
username=os.getenv("USER", "unknown"),
max_gpus=max_gpus))

def getWhatDependsOnThis(self):
"""Gets a list of dependencies that depend directly on this layer.
Expand Down
42 changes: 42 additions & 0 deletions pycue/opencue/wrappers/render_partition.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Copyright Contributors to the OpenCue Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Module for classes related to Render Partition."""

from opencue.compiled_proto import renderPartition_pb2
from opencue.cuebot import Cuebot


class RenderPartition(object):
"""This class contains the grpc implementation related to a Task."""

def __init__(self, render_partition=None):
self.data = render_partition
self.stub = Cuebot.getStub('renderPartition')

def delete(self):
"""Deletes the render partition."""
self.stub.Delete(renderPartition_pb2.RenderPartDeleteRequest(
render_partition=self.data), timeout=Cuebot.Timeout)


def setMaxResources(self, cores, memory, gpuMemory, gpuCores):
"""Deletes the render partition."""
self.stub.SetMaxResources(renderPartition_pb2.RenderPartSetMaxResourcesRequest(
render_partition=self.data,
cores=cores,
memory=memory,
gpu_memory=gpuMemory,
gpus=gpuCores
), timeout=Cuebot.Timeout)
2 changes: 1 addition & 1 deletion pycue/tests/wrappers/host_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def testGetRenderPartitions(self, getStubMock):
host_pb2.HostGetRenderPartitionsRequest(host=host.data),
timeout=mock.ANY)
self.assertEqual(len(renderParts), 1)
self.assertEqual(renderParts[0].id, renderPartId)
self.assertEqual(renderParts[0].data.id, renderPartId)

def testRebootWhenIdle(self, getStubMock):
stubMock = mock.Mock()
Expand Down

0 comments on commit 633df41

Please sign in to comment.