From f6d4cb256c1d4e8c1ff3c5bf43d34565173ea9a4 Mon Sep 17 00:00:00 2001 From: Jake Smith Date: Fri, 29 Sep 2023 14:59:24 +0100 Subject: [PATCH] HPCC-30413 Add option to always capture post-mortem info Enabled either globally (i.e. for all components), or for individual components (e.g. Thor). Usage: expert: alwaysPostMortem: true Signed-off-by: Jake Smith --- helm/hpcc/templates/_helpers.tpl | 17 ++++++++++++----- initfiles/bin/check_executes | 5 ++++- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/helm/hpcc/templates/_helpers.tpl b/helm/hpcc/templates/_helpers.tpl index cb2d8e1a78c..eed1336e2b7 100644 --- a/helm/hpcc/templates/_helpers.tpl +++ b/helm/hpcc/templates/_helpers.tpl @@ -689,13 +689,13 @@ Check that the storage and spill planes for a component exist Add command for a component */}} {{- define "hpcc.componentCommand" -}} -{{- if .me.valgrind -}} + {{- if .me.valgrind -}} valgrind -{{- else if (include "hpcc.hasPlaneForCategory" (dict "root" .root "category" "debug")) -}} + {{- else if (include "hpcc.hasPlaneForCategory" (dict "root" .root "category" "debug")) -}} check_executes -{{- else -}} + {{- else -}} {{ .process }} -{{- end }} + {{- end }} {{- end -}} {{/* @@ -713,6 +713,9 @@ Add extra args for a component {{- $debugPlane := .me.debugPlane | default (include "hpcc.getFirstPlaneForCategory" (dict "root" .root "category" "debug")) -}} {{- include "hpcc.checkPlaneExists" (dict "root" .root "planeName" $debugPlane) -}} {{- $prefix := include "hpcc.getPlanePrefix" (dict "root" .root "planeName" $debugPlane) -}} + {{- if or (and (hasKey .me "expert") .me.expert.alwaysPostMortem) (and (hasKey .root.Values.global "expert") .root.Values.global.expert.alwaysPostMortem) -}} +"-a",{{ "\n" }} + {{- end -}} "-d", {{ $prefix }}, "--", {{ .process | quote }}, @@ -1670,7 +1673,11 @@ args: {{- $debugPlane := .me.debugPlane | default (include "hpcc.getFirstPlaneForCategory" (dict "root" .root "category" "debug")) -}} {{- include "hpcc.checkPlaneExists" (dict "root" .root "planeName" $debugPlane) -}} {{- $prefix := include "hpcc.getPlanePrefix" (dict "root" .root "planeName" $debugPlane) -}} - {{- $_ := set $check_cmd "command" (printf "check_executes -d %s -- %s" $prefix .command) -}} + {{- $pmd_always_opt := "" -}} + {{- if or (and (hasKey .me "expert") .me.expert.alwaysPostMortem) (and (hasKey .root.Values.global "expert") .root.Values.global.expert.alwaysPostMortem) -}} + {{- $pmd_always_opt = "-a " -}} + {{- end -}} + {{- $_ := set $check_cmd "command" (printf "check_executes %s-d %s -- %s" $pmd_always_opt $prefix .command) -}} {{- end }} - >- {{ $check_cmd.command }}; diff --git a/initfiles/bin/check_executes b/initfiles/bin/check_executes index 6a470b86053..9299015de8d 100755 --- a/initfiles/bin/check_executes +++ b/initfiles/bin/check_executes @@ -11,6 +11,7 @@ PMD_PROGNAME= PMD_COPYFILES=() PMD_DALISERVER= PMD_WORKUNIT= +PMD_ALWAYS=false while [ "$#" -gt 0 ]; do arg=$1 @@ -27,6 +28,8 @@ while [ "$#" -gt 0 ]; do f) shift; PMD_COPYFILES+=($1) ;; + a) PMD_ALWAYS=true + ;; *) usage exit ;; @@ -66,7 +69,7 @@ ${PMD_PROGNAME} --logging.postMortem=1000 "$@" # If it did not exit cleanly, copy some post-mortem info retVal=$? -if [ $retVal -ne 0 ]; then +if [ $PMD_ALWAYS = true ] || [ $retVal -ne 0 ]; then POST_MORTEM_DIR=${PMD_DIRECTORYBASE}/$(hostname)/$(date -Iseconds) mkdir -p ${POST_MORTEM_DIR} echo "Post-mortem info gathered in $POST_MORTEM_DIR"