Skip to content

Commit

Permalink
more classad foo
Browse files Browse the repository at this point in the history
  • Loading branch information
dsschult committed Nov 1, 2024
1 parent da7bddd commit 57d0da5
Showing 1 changed file with 16 additions and 8 deletions.
24 changes: 16 additions & 8 deletions iceprod/server/plugins/condor.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,9 @@ class CondorSubmit:
}

_GENERIC_ADS = ['Iwd', 'IceProdDatasetId', 'IceProdTaskId', 'IceProdTaskInstanceId', 'MATCH_EXP_JOBGLIDEIN_ResourceName']
AD_INFO = ['RemotePool', 'RemoteHost', 'RemoteWallClockTime', 'HoldReason', 'LastHoldReason', 'RemoveReason', 'MachineAttrGLIDEIN_Site0'] + _GENERIC_ADS
AD_INFO = ['RemotePool', 'RemoteHost', 'RemoteWallClockTime', 'ResidentSetSize_RAW', 'DiskUsage_RAW',
'HoldReason', 'RemoveReason', 'Reason', 'MachineAttrGLIDEIN_Site0'
] + _GENERIC_ADS
AD_PROJECTION_QUEUE = ['JobStatus', 'RemotePool', 'RemoteHost'] + _GENERIC_ADS
AD_PROJECTION_HISTORY = [
'JobStatus', 'ExitCode', 'RemoveReason', 'LastHoldReason', 'CpusUsage', 'RemoteSysCpu', 'RemoteUserCpu',
Expand Down Expand Up @@ -664,11 +666,17 @@ async def wait(self, timeout):
# get stats
cpu = event.get('CpusUsage', None)
gpu = event.get('GpusUsage', None)
memory = event.get('MemoryUsage', None) # MB
disk = event.get('DiskUsage', None) # KB
memory = event.get('ResidentSetSize_RAW', None) # KB
if memory is None:
memory = event.get('MemoryUsage', None)*1000 # MB
disk = event.get('DiskUsage_RAW', None) # KB
if disk is None:
disk = event.get('DiskUsage', None) # KB
time_ = event.get('RemoteWallClockTime', None) # seconds
if not time_:
time_ = parse_usage(event.get('RunRemoteUsage', ''))
if time_ is None:
time_ = parse_usage(event.get('RunRemoteUsage', '')) / event.get('RequestCpus', 1)
elif cpu is None and time_:
cpu = parse_usage(event.get('RunRemoteUsage', '')) / time_
# data_in = event['ReceivedBytes'] # KB
# data_out = event['SentBytes'] # KB

Expand All @@ -678,7 +686,7 @@ async def wait(self, timeout):
if gpu is not None:
resources['gpu'] = gpu
if memory is not None:
resources['memory'] = memory/1000.
resources['memory'] = memory/1000000.
if disk is not None:
resources['disk'] = disk/1000000.
if time_ is not None:
Expand All @@ -696,10 +704,10 @@ async def wait(self, timeout):
reason = None
if r := event.get('HoldReason'):
reason = r
elif r := event.get('LastHoldReason'):
reason = r
elif r := event.get('RemoveReason'):
reason = r
elif r := event.get('Reason'):
reason = r

# finish job
await self.finish(job_id, success=success, resources=resources, stats=stats, reason=reason)
Expand Down

0 comments on commit 57d0da5

Please sign in to comment.