diff --git a/config/local/.env b/config/local/.env index 1efc1f50..90360bbe 100644 --- a/config/local/.env +++ b/config/local/.env @@ -3,7 +3,10 @@ OOD_DASHBOARD_SUPPORT_EMAIL=your@email.edu OOD_DASHBOARD_SUPPORT_URL=your@email.edu OOD_APP_CATALOG_URL=https://link.to.online/app/catalog ENABLE_NATIVE_VNC=true +OOD_ANNOUNCEMENT_PATH=/var/www/ood/public/announcements OOD_CONFIG_D_DIRECTORY_BAK=/etc/ood/config OOD_LOAD_EXTERNAL_CONFIG=true OOD_APP_CONFIG_ROOT=/etc/ood/config/apps/ood OOD_DATAROOT=~/ondemand/data/sys/dashboard +OOD_XDMOD_HOST=https://localhost:4443 +OOD_XDMOD_HOST_BAK=https://localhost:33000 diff --git a/config/local/app_overrides/lib/slurm_metrics/metrics_cache.rb b/config/local/app_overrides/lib/slurm_metrics/metrics_cache.rb deleted file mode 100644 index 9e78229c..00000000 --- a/config/local/app_overrides/lib/slurm_metrics/metrics_cache.rb +++ /dev/null @@ -1,55 +0,0 @@ -# frozen_string_literal: true - -module SlurmMetrics - # Manages the stored metrics within the user settings store. - # The cache expires after 24hours. - class MetricsCache - include UserSettingStore - - def set_metrics(from, to, metrics) - slurm_metrics = { - slurm_metrics: { - timestamp: Time.now.strftime('%Y-%m-%dT%H:%M:%S'), - from: from, - to: to, - metrics: metrics.to_hash - } - } - update_user_settings(slurm_metrics) - end - - def set_fairshare(timestamp, fairshare) - slurm_fairshare = { - slurm_fairshare: { - timestamp: timestamp.strftime('%Y-%m-%dT%H:%M:%S'), - fairshare: fairshare - } - } - update_user_settings(slurm_fairshare) - end - - def get_metrics - slurm_metrics = user_settings.fetch(:slurm_metrics, {}) - return nil if expired?(slurm_metrics[:timestamp]) - - [slurm_metrics[:from], slurm_metrics[:to], SlurmMetrics::MetricsSummary.new(slurm_metrics[:metrics])] - end - - def get_fairshare - slurm_fairshare = user_settings.fetch(:slurm_fairshare, {}) - return nil if expired?(slurm_fairshare[:timestamp]) - - [Time.parse(slurm_fairshare[:timestamp]), slurm_fairshare[:fairshare]] - end - - private - - def expired?(date_string) - return true if date_string.blank? - - # Parse the date string and compare the time difference with 24 hours (in seconds) - Time.now - Time.parse(date_string) > 24 * 60 * 60 - end - - end -end diff --git a/config/local/app_overrides/lib/slurm_metrics/metrics_processor.rb b/config/local/app_overrides/lib/slurm_metrics/metrics_processor.rb index 26fb6c57..99b684f7 100644 --- a/config/local/app_overrides/lib/slurm_metrics/metrics_processor.rb +++ b/config/local/app_overrides/lib/slurm_metrics/metrics_processor.rb @@ -6,8 +6,11 @@ module SlurmMetrics # Based on: https://github.com/fasrc/puppet-slurm_stats class MetricsProcessor - def calculate_metrics(user_metrics) + def calculate_metrics(from, to, user_metrics) metrics_summary = SlurmMetrics::MetricsSummary.new + metrics_summary.from = from + metrics_summary.to = to + user_max_rss = 0.0 # REVERSE METRICS TO PROCESS FIRST THE JOB STEPS AND THEN THE MAIN JOB user_metrics.reverse_each do |metric_hash| diff --git a/config/local/app_overrides/lib/slurm_metrics/metrics_service.rb b/config/local/app_overrides/lib/slurm_metrics/metrics_service.rb new file mode 100644 index 00000000..172b93df --- /dev/null +++ b/config/local/app_overrides/lib/slurm_metrics/metrics_service.rb @@ -0,0 +1,79 @@ +# frozen_string_literal: true + +module SlurmMetrics + # Service to manages retrieving and storing the user metrics within the user settings store. + # It uses a 24hours cache for theSlurm data. + class MetricsService + include UserSettingStore + METRICS_PERIOD = 7.days + + attr_reader :cluster + + def initialize + @cluster = Configuration.job_clusters.select(&:slurm?).first + end + + def read_metrics + slurm_metrics = user_settings.fetch(:slurm_metrics, {}) + slurm_metrics = refresh_metrics if expired?(slurm_metrics[:timestamp]) + + SlurmMetrics::MetricsSummary.new(slurm_metrics[:metrics]) + end + + def read_fairshare + slurm_fairshare = user_settings.fetch(:slurm_fairshare, {}) + slurm_fairshare = refresh_fairshare if expired?(slurm_fairshare[:timestamp]) + fairshare = slurm_fairshare[:fairshare] + fairshare[:from] = Time.at(fairshare[:from]) + + OpenStruct.new(fairshare) + end + + private + + def refresh_metrics + from = Time.now - METRICS_PERIOD + to = Time.now + job_data = cluster.job_adapter.metrics(from: from.strftime('%Y-%m-%dT00:00:00'), to: to.strftime('%Y-%m-%dT23:59:59')) + processor = SlurmMetrics::MetricsProcessor.new + metrics_summary = processor.calculate_metrics(from, to, job_data) + + set_metrics(metrics_summary) + end + + def refresh_fairshare + data = cluster.job_adapter.fairshare + fair_share = { + from: Time.now.to_i, + data: data + } + set_fairshare(fair_share) + end + + def set_metrics(metrics) + slurm_metrics = { + timestamp: Time.now.strftime('%Y-%m-%dT%H:%M:%S'), + metrics: metrics.to_hash + } + update_user_settings({ slurm_metrics: slurm_metrics }) + slurm_metrics + end + + def set_fairshare(fairshare) + slurm_fairshare = { + timestamp: Time.now.strftime('%Y-%m-%dT%H:%M:%S'), + fairshare: fairshare + } + update_user_settings({ slurm_fairshare: slurm_fairshare }) + slurm_fairshare + end + + def expired?(date_string) + return true if date_string.blank? + + # Parse the date string and compare the time difference with 24 hours (in seconds) + Time.now - Time.parse(date_string) > 24 * 60 * 60 + end + + end +end diff --git a/config/local/app_overrides/lib/slurm_metrics/metrics_summary.rb b/config/local/app_overrides/lib/slurm_metrics/metrics_summary.rb index 8d5b5e13..541794eb 100644 --- a/config/local/app_overrides/lib/slurm_metrics/metrics_summary.rb +++ b/config/local/app_overrides/lib/slurm_metrics/metrics_summary.rb @@ -3,6 +3,7 @@ module SlurmMetrics # Class that holds all the metrics data class MetricsSummary + attr_accessor :from, :to attr_accessor :num_jobs, :num_jgpu attr_accessor :tot_cpu_walltime, :tot_gpu_hours, :ave_cpu_use, :ave_cpu_req, :ave_cpu_eff, :ave_gpu_req attr_accessor :tot_mem_use, :ave_mem_use, :ave_mem_req, :ave_mem_eff @@ -10,6 +11,11 @@ class MetricsSummary attr_accessor :ntotal_cpu, :nca_cpu, :ncd_cpu, :nf_cpu, :noom_cpu, :nto_cpu, :ntotal_gpu, :nca_gpu, :ncd_gpu, :nf_gpu, :noom_gpu, :nto_gpu def initialize(data = {}) + @from = data.fetch(:from, nil) + @to = data.fetch(:to, nil) + @from = Time.at(@from) unless @from.blank? + @to = Time.at(@to) unless @to.blank? + @num_jobs = data.fetch(:num_jobs, 0) @num_jgpu = data.fetch(:num_jgpu, 0) @@ -55,6 +61,8 @@ def to_hash instance_variables.each do |var| hash[var.to_s.delete('@').to_sym] = instance_variable_get(var) end + hash[:from] = hash[:from].to_i + hash[:to] = hash[:to].to_i hash end end diff --git a/config/local/app_overrides/views/widgets/metrics/_fairshare.html.erb b/config/local/app_overrides/views/widgets/metrics/_fairshare.html.erb index 4bdcf07e..d34f8f40 100644 --- a/config/local/app_overrides/views/widgets/metrics/_fairshare.html.erb +++ b/config/local/app_overrides/views/widgets/metrics/_fairshare.html.erb @@ -1,43 +1,44 @@ <% - metrics_cache = SlurmMetrics::MetricsCache.new - timestamp, fairshare = metrics_cache.get_fairshare - if true || fairshare.blank? - cluster = Configuration.job_clusters.select(&:slurm?).first - fairshare = cluster.job_adapter.fairshare - timestamp = Time.now - metrics_cache.set_fairshare(timestamp, fairshare) - end + metrics_service = SlurmMetrics::MetricsService.new + slurm_faishare = metrics_service.read_fairshare + timestamp = slurm_faishare.from + fairshare = slurm_faishare.data metrics_helper = SlurmMetrics::MetricsHelper.new %> -
-
- <%= timestamp.strftime('%Y-%m-%d') %> -

Fairshare

-
-
- <% unless fairshare.empty? %> - - - - - - - - <% fairshare.each do |fairshare_data| %> +
+
+
+ <%= timestamp.strftime('%Y-%m-%d') %> +

Fairshare

+
+
+ <% unless fairshare.blank? %> +
Account Fairshare
+ - + - <% end %> - -
- <%= fairshare_data[:account] %> - <%= fairshare_data[:fairshare].to_s.to_f.round(4) %> - Account Fairshare
- <% else %> -
- No fairshare data available for user <%= @user.name %> -
- <% end %> + + + <% fairshare.each do |fairshare_data| %> + + + <%= fairshare_data[:account] %> + <%= fairshare_data[:fairshare].to_s.to_f.round(4) %> + + + <% end %> + + + <% else %> +

+ No fairshare data available for user <%= @user.name %> +

+ <% end %> + + FASRC Fairshare info + +
\ No newline at end of file diff --git a/config/local/app_overrides/views/widgets/metrics/_jobs_summary.html.erb b/config/local/app_overrides/views/widgets/metrics/_jobs_summary.html.erb index e2e99c2f..3ea0e38e 100644 --- a/config/local/app_overrides/views/widgets/metrics/_jobs_summary.html.erb +++ b/config/local/app_overrides/views/widgets/metrics/_jobs_summary.html.erb @@ -28,41 +28,43 @@ metrics_helper.metrics_ceil(total, failed), ] %> -
-
- Total Jobs: <%= total %> -

<%= title %>

- <% if subtitle %> - <%= subtitle %> - <% end %> -
-
- - - - - - - - - - - - - - - - - - - -
CompletedTimed OutCanceledOOMFailed
<%= completed %><%= timeout %><%= canceled %><%= memory %><%= failed %>
-
-
<%= percentages[0] %>%
-
<%=percentages[1] %>%
-
<%= percentages[2] %>%
-
<%= percentages[3] %>%
-
<%= percentages[4] %>%
+
+
+
+ Total Jobs: <%= total %> +

<%= title %>

+ <% if subtitle %> + <%= subtitle %> + <% end %> +
+
+ + + + + + + + + + + + + + + + + + + +
CompletedTimed OutCanceledOOMFailed
<%= completed %><%= timeout %><%= canceled %><%= memory %><%= failed %>
+
+
<%= percentages[0] %>%
+
<%=percentages[1] %>%
+
<%= percentages[2] %>%
+
<%= percentages[3] %>%
+
<%= percentages[4] %>%
+
\ No newline at end of file diff --git a/config/local/app_overrides/views/widgets/metrics/_metric_summary.html.erb b/config/local/app_overrides/views/widgets/metrics/_metric_details.html.erb similarity index 100% rename from config/local/app_overrides/views/widgets/metrics/_metric_summary.html.erb rename to config/local/app_overrides/views/widgets/metrics/_metric_details.html.erb diff --git a/config/local/app_overrides/views/widgets/metrics/_metrics.html.erb b/config/local/app_overrides/views/widgets/metrics/_metrics.html.erb index eb6afb82..3b9da8d4 100644 --- a/config/local/app_overrides/views/widgets/metrics/_metrics.html.erb +++ b/config/local/app_overrides/views/widgets/metrics/_metrics.html.erb @@ -1,17 +1,9 @@ <% - cluster = Configuration.job_clusters.select(&:slurm?).first - metrics_cache = SlurmMetrics::MetricsCache.new - from_text, to_text, summary = metrics_cache.get_metrics - if summary.blank? - from = Time.now - 7.days - to = Time.now - last_7_days = cluster.job_adapter.metrics(from: from.strftime('%Y-%m-%dT00:00:00'), to: to.strftime('%Y-%m-%dT23:59:59')) - processor = SlurmMetrics::MetricsProcessor.new - summary = processor.calculate_metrics(last_7_days) - from_text = from.strftime('%Y-%m-%d') - to_text = to.strftime('%Y-%m-%d') - metrics_cache.set_metrics(from_text, to_text, summary) - end + metrics_service = SlurmMetrics::MetricsService.new + summary = metrics_service.read_metrics + + from_text = summary.from.strftime('%Y-%m-%d') + to_text = summary.to.strftime('%Y-%m-%d') %> -<% if summary %>
-

<%= cluster.metadata.title %> Metrics

- - <%= render(partial: '/widgets/metrics/fairshare') %> - <%= render(partial: '/widgets/metrics/jobs_summary', locals: { title: 'CPU Jobs by State', subtitle: "Period: #{from_text} to #{to_text}", total: summary.ntotal_cpu, completed: summary.ncd_cpu, timeout: summary.nto_cpu, canceled: summary.nca_cpu, memory: summary.noom_cpu, failed: summary.nf_cpu } ) %> - <%= render(partial: '/widgets/metrics/jobs_summary', locals: { title: 'GPU Jobs by State', subtitle: "Period: #{from_text} to #{to_text}", total: summary.ntotal_gpu, completed: summary.ncd_gpu, timeout: summary.nto_gpu, canceled: summary.nca_gpu, memory: summary.noom_gpu, failed: summary.nf_gpu } ) %> - -
-
- Total Jobs: <%= summary.total_jobs %> -

Summary Job Stats

- <%= "Period: #{from_text} to #{to_text}" %> -
- -
- <%= render(partial: '/widgets/metrics/metric_summary', - locals: { id: 'cpu-metric-summary', - title: 'CPU Efficiency:', - efficiency: summary.ave_cpu_eff.round(1), - items: [['Average Used', "#{summary.ave_cpu_use.ceil(1)} CPU Hrs"], - ['Average Allocated', "#{summary.ave_cpu_req.ceil(1)} CPU Hrs"], - ['Total Walltime', "#{summary.tot_cpu_walltime.ceil(1)} CPU Hrs"]] } ) %> - <%= render(partial: '/widgets/metrics/metric_summary', - locals: { id: 'gpu-metric-summary', - title: 'GPU Efficiency:', - items: [['Average Allocated', "#{summary.ave_gpu_req.ceil(1)} CPU Hrs"], - ['Total Walltime', "#{summary.tot_gpu_hours.ceil(1)} CPU Hrs"]] } ) %> - <%= render(partial: '/widgets/metrics/metric_summary', - locals: { id: 'memory-metric-summary', - title: 'Memory Efficiency:', - efficiency: summary.ave_mem_eff.ceil(1), - items: [['Average Used', "#{summary.ave_mem_use.ceil(1)}G"], - ['Average Allocated', "#{summary.ave_mem_req.ceil(1)}G"], - ['Total Used', "#{summary.tot_mem_use.ceil(1)}G"]] } ) %> - <%= render(partial: '/widgets/metrics/metric_summary', - locals: { id: 'time-metric-summary', - title: 'Time Efficiency:', - efficiency: summary.ave_time_eff.ceil(1), - items: [['Average Used', "#{summary.ave_mem_use.ceil(1)}H"], - ['Average Allocated', "#{summary.ave_mem_req.ceil(1)}H"], - ['Average Waiting Time', "#{summary.ave_wait_time.ceil(1)}H"]] } ) %> - -
+

<%= metrics_service.cluster.metadata.title %> Metrics

+
+ <%= render(partial: '/widgets/metrics/fairshare') %> + + <%= render(partial: '/widgets/metrics/jobs_summary', locals: { title: 'CPU Jobs by State', subtitle: "Period: #{from_text} to #{to_text}", total: summary.ntotal_cpu, completed: summary.ncd_cpu, timeout: summary.nto_cpu, canceled: summary.nca_cpu, memory: summary.noom_cpu, failed: summary.nf_cpu } ) %> + <%= render(partial: '/widgets/metrics/jobs_summary', locals: { title: 'GPU Jobs by State', subtitle: "Period: #{from_text} to #{to_text}", total: summary.ntotal_gpu, completed: summary.ncd_gpu, timeout: summary.nto_gpu, canceled: summary.nca_gpu, memory: summary.noom_gpu, failed: summary.nf_gpu } ) %> + + <%= render(partial: '/widgets/metrics/metrics_summary', locals: { summary: summary, from_text: from_text, to_text: to_text } ) %>
-<% end %> diff --git a/config/local/app_overrides/views/widgets/metrics/_metrics_summary.html.erb b/config/local/app_overrides/views/widgets/metrics/_metrics_summary.html.erb new file mode 100644 index 00000000..caef5d17 --- /dev/null +++ b/config/local/app_overrides/views/widgets/metrics/_metrics_summary.html.erb @@ -0,0 +1,45 @@ +<% + summary = local_assigns.fetch(:summary, nil) + from_text = local_assigns.fetch(:from_text, nil) + to_text = local_assigns.fetch(:to_text, nil) +%> + +
+
+
+ Total Jobs: <%= summary.total_jobs %> +

Summary Job Stats

+ <%= "Period: #{from_text} to #{to_text}" %> +
+ +
+ <%= render(partial: '/widgets/metrics/metric_details', + locals: { id: 'cpu-metric-summary', + title: 'CPU Efficiency:', + efficiency: summary.ave_cpu_eff.round(1), + items: [['Average Used', "#{summary.ave_cpu_use.ceil(1)} CPU Hrs"], + ['Average Allocated', "#{summary.ave_cpu_req.ceil(1)} CPU Hrs"], + ['Total Walltime', "#{summary.tot_cpu_walltime.ceil(1)} CPU Hrs"]] } ) %> + <%= render(partial: '/widgets/metrics/metric_details', + locals: { id: 'gpu-metric-summary', + title: 'GPU Efficiency:', + items: [['Average Allocated', "#{summary.ave_gpu_req.ceil(1)} CPU Hrs"], + ['Total Walltime', "#{summary.tot_gpu_hours.ceil(1)} CPU Hrs"]] } ) %> + <%= render(partial: '/widgets/metrics/metric_details', + locals: { id: 'memory-metric-summary', + title: 'Memory Efficiency:', + efficiency: summary.ave_mem_eff.ceil(1), + items: [['Average Used', "#{summary.ave_mem_use.ceil(1)}G"], + ['Average Allocated', "#{summary.ave_mem_req.ceil(1)}G"], + ['Total Used', "#{summary.tot_mem_use.ceil(1)}G"]] } ) %> + <%= render(partial: '/widgets/metrics/metric_details', + locals: { id: 'time-metric-summary', + title: 'Time Efficiency:', + efficiency: summary.ave_time_eff.ceil(1), + items: [['Average Used', "#{summary.ave_mem_use.ceil(1)}H"], + ['Average Allocated', "#{summary.ave_mem_req.ceil(1)}H"], + ['Average Waiting Time', "#{summary.ave_wait_time.ceil(1)}H"]] } ) %> + +
+
+
\ No newline at end of file diff --git a/config/local/ondemand.d/fasrc.yml b/config/local/ondemand.d/fasrc.yml index d650142f..9af6d3d5 100644 --- a/config/local/ondemand.d/fasrc.yml +++ b/config/local/ondemand.d/fasrc.yml @@ -17,8 +17,13 @@ profiles: dashboard_layout: rows: - columns: - - width: 10 + - width: 8 widgets: - - "saved_settings" + - "metrics/metrics" - "pinned_apps" - - "motd" \ No newline at end of file + - "motd" + - width: 4 + widgets: + - "metrics/metrics" + - "xdmod_widget_job_efficiency" + - "xdmod_widget_jobs" diff --git a/config/local/ondemand.d/root.yml b/config/local/ondemand.d/root.yml index 175862a1..77e02a2e 100644 --- a/config/local/ondemand.d/root.yml +++ b/config/local/ondemand.d/root.yml @@ -8,11 +8,22 @@ jobs_app_alpha: true bc_dynamic_js: true bc_saved_settings: true cancel_session_enabled: true +sessions_poll_delay: 900000 +status_delay: 900000 navbar_type: dark default_profile: fasrc +globus_endpoints: + - path: "/home" + endpoint: "716de4ac-xxxx-xxxx-xxxx-xxxxxxxxxxxx" + endpoint_path: "/home" + + - path: "/var/www/ood/apps/sys/dataverse" + endpoint: "9f1fe759-xxxx-xxxx-xxxx-xxxxxxxxxxxx" + endpoint_path: "/demo/dataverse" + help_menu: - group: "Docs" - title: "Documentation" diff --git a/config/local/ondemand.d/sid.yml b/config/local/ondemand.d/sid.yml index b14aa1ab..ed7613bb 100644 --- a/config/local/ondemand.d/sid.yml +++ b/config/local/ondemand.d/sid.yml @@ -59,7 +59,6 @@ profiles: custom_pages: docs: - layout: rows: - columns: - width: 12 diff --git a/ondemand b/ondemand index eb9ce2ec..2485ad0d 160000 --- a/ondemand +++ b/ondemand @@ -1 +1 @@ -Subproject commit eb9ce2eca0de45fb7720bb3a839438d4528979cc +Subproject commit 2485ad0d87bc039fb7a06dc07b34ab6e30d2bf6d