Skip to content

Commit

Permalink
Added metrics information to session card
Browse files Browse the repository at this point in the history
  • Loading branch information
abujeda committed Nov 13, 2024
1 parent e1eb709 commit 51fdb12
Show file tree
Hide file tree
Showing 4 changed files with 161 additions and 0 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ clean:
rm -rf ./ondemand/apps/dashboard/node_modules
rm -rf ./ondemand/apps/dashboard/vendor/bundle
rm -rf ./ondemand/apps/dashboard/app_overrides
rm -rf ./ondemand/apps/dashboard/plugins
rm -rf ./ondemand/apps/dashboard/.env*

build_latest_ood:
Expand Down
30 changes: 30 additions & 0 deletions dev/metrics/lib/slurm_metrics/metrics_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,21 @@ def read_metrics
SlurmMetrics::MetricsSummary.new(slurm_metrics[:metrics])
end

def read_job_metrics(session)
file_path = job_metrics_path(session.id)
return refresh_job_metrics(session) unless file_path.exist?

job_metrics = {}
begin
yml = YAML.safe_load(file_path.read) || {}
job_metrics = SlurmMetrics::MetricsSummary.new(yml.symbolize_keys)
rescue => e
Rails.logger.error("Can't read or parse job metrics: #{file_path} because of error #{e}")
end

job_metrics
end

def read_fairshare
slurm_fairshare = user_settings.fetch(:slurm_fairshare, {})
slurm_fairshare = refresh_fairshare if expired?(slurm_fairshare[:timestamp])
Expand All @@ -41,6 +56,17 @@ def refresh_metrics
set_metrics(metrics_summary)
end

def refresh_job_metrics(session)
job_data = cluster.job_adapter.metrics(job_ids: [session.job_id])
Rails.logger.info(job_data)
processor = SlurmMetrics::MetricsProcessor.new
job_metrics = processor.calculate_metrics(Time.now, Time.now, job_data, ignore_cancelled: false)
Rails.logger.info(job_metrics.to_hash)
job_metrics_file = job_metrics_path(session.id)
job_metrics_file.write(job_metrics.to_hash.stringify_keys.to_yaml)
job_metrics
end

def refresh_fairshare
data = cluster.job_adapter.fairshare
fair_share = {
Expand Down Expand Up @@ -75,5 +101,9 @@ def expired?(date_string)
Time.now - Time.parse(date_string) > 24 * 60 * 60
end

def job_metrics_path(session_id)
BatchConnect::Session.dataroot.join('metrics').tap { |p| p.mkpath unless p.exist? }.join(session_id)
end

end
end
13 changes: 13 additions & 0 deletions dev/metrics/views/batch_connect/sessions/card/_card_body.html.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<div class="card-body">
<div>
<div class="float-end"><%= cancel_or_delete(session) %></div>
<%= render_card_partial('host', session) %>
<%= render_card_partial('created', session) %>
<%= render_card_partial('session_time', session) %>
<%= render_card_partial('id', session) %>
<%= render_card_partial('support_ticket', session) if Configuration.support_ticket_enabled? %>
<%= render_card_partial('display_choices', session) %>
<%= render_card_partial('session_job_metrics', session) if session.completed?%>
<%= render_connection(session) %>
</div>
</div>
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
<%

metrics_service = SlurmMetrics::MetricsService.new
job_metrics = metrics_service.read_job_metrics(session)
metrics_helper = SlurmMetrics::MetricsHelper.new
metric_details_id = "#{session.id}_metric"
%>

<style>

table.metrics th {
font-size: 0.9rem;
}
table.metrics td {
font-size: 0.8rem;
}

.job-metrics i.app-icon {
width: 0.8rem;
height: 0.8rem;
font-size: 0.8rem;
vertical-align: 1px;
color: #6c757d;
}

div.job-metric-details {
overflow: hidden;
}

div.job-metric-details [title]{
cursor: default;
}

div.job-metric-details table {
margin-top: 0.5rem;
}

div.job-metric-details .text-dark {
--bs-text-opacity: 0.8;
}

p.job-metrics-description {
cursor: pointer;
margin: 0;
}

p[aria-expanded=true] .closed {
display: none;
}

p[aria-expanded=false] .open {
display: none;
}

</style>
<hr>
<div class="job-metrics">
<p class="job-metrics-description" data-toggle="collapse" data-target="#<%= metric_details_id %>" data-bs-toggle="collapse" data-bs-target="#<%= metric_details_id %>" aria-expanded="false">
<i class="fa fa-plus fa-fw app-icon closed" aria-hidden="true"></i>
<i class="fa fa-minus fa-fw app-icon open" aria-hidden="true"></i>
<strong>Job Efficiency: </strong>
<span class="badge bg-light text-dark" title="">CPU <%= job_metrics.ave_cpu_eff.ceil(1) %>%</span>
<span class="card-text"> | </span>
<span class="badge bg-light text-dark" title="">Memory <%= job_metrics.ave_mem_eff.ceil(1) %>%</span>
<span class="card-text"> | </span>
<span class="badge bg-light text-dark" title="">Time <%= job_metrics.ave_time_eff.ceil(1) %>%</span>
</p>

<div class="job-metric-details">
<div id="<%= metric_details_id %>" class="collapse">

<table class="table table-sm table-striped table-condensed metrics">
<thead>
<tr>
<th class="fs-5">Metrics</th>
<th title="">Efficiency</th>
<th title="">Allocated</th>
<th title="">Used</th>
<th title="">Other</th>
</tr>
</thead>
<tbody>
<tr>
<td>CPU</td>
<td><%= "#{job_metrics.ave_cpu_eff.ceil(2)}%" %></td>
<td><%= "#{job_metrics.ave_cpu_req.ceil(2)} CPU Hrs" %></td>
<td><%= "#{job_metrics.ave_cpu_use.ceil(2)} CPU Hrs" %></td>
<td>Walltime <%= "#{job_metrics.tot_cpu_walltime.ceil(2)} CPU Hrs" %></td>
</tr>
<tr>
<td>Memory</td>
<td><%= "#{job_metrics.ave_mem_eff.ceil(2)}%" %></td>
<td><%= "#{job_metrics.ave_mem_req.ceil(2)}G" %></td>
<td><%= "#{job_metrics.ave_mem_use.ceil(2)}G" %></td>
<td>N/A</td>
</tr>
<tr>
<td>Time</td>
<td><%= "#{job_metrics.ave_time_eff.ceil(2)}%" %></td>
<td><%= metrics_helper.format_duration(job_metrics.ave_time_req) %></td>
<td><%= metrics_helper.format_duration(job_metrics.ave_time_use) %></td>
<td>Waiting Time <%= metrics_helper.format_duration(job_metrics.ave_wait_time) %></td>
</tr>
<tr>
<td>GPU</td>
<td>N/A</td>
<td><%= "#{job_metrics.ave_gpu_req.ceil(2)} GPU Hrs" %></td>
<td>N/A</td>
<td>Walltime <%= "#{job_metrics.tot_gpu_hours.ceil(2)} GPU Hrs" %></td>
</tr>
</tbody>
</table>

</div>
</div>

</div>

0 comments on commit 51fdb12

Please sign in to comment.