-
Notifications
You must be signed in to change notification settings - Fork 151
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
328 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,328 @@ | ||
#!/bin/bash | ||
|
||
# Description: | ||
# This script collects debugging information from a Kubernetes cluster. | ||
# It retrieves networking, firewall, security policies, custom resource definitions (CRDs), | ||
# and logs from specified pods and secrets (sanitized). The outputs are saved to files for each namespace and object type. | ||
# This helps in diagnosing and troubleshooting cluster configurations. | ||
# Finally, it compresses all the collected files into a ZIP archive. | ||
# | ||
# Input Parameters: | ||
# - NAMESPACES: Comma-separated list of namespaces to collect data from. If not specified, the script collects data from all namespaces. | ||
# - K8S_OBJECT_NAME_FILTER: Filter for Kubernetes object names (default: 'splunk|collector|otel|certmanager|test|sck|sock'). | ||
# | ||
# Usage: | ||
# 1. Ensure you have `kubectl`, `yq`, and `helm` installed and configured to access your Kubernetes cluster. | ||
# 2. Save the script to a file called `splunk_kubernetes_debug_info.sh`. | ||
# 3. Make the script executable: | ||
# chmod +x splunk_kubernetes_debug_info.sh | ||
# 4. Run the script: | ||
# 4.1. Via Terminal and Curl: | ||
# curl -s https://raw.githubusercontent.com/signalfx/splunk-otel-collector-chart/main/tools/splunk_kubernetes_debug_info.sh | bash | ||
# 4.2. Via Terminal and Local Code: | ||
# ./splunk_kubernetes_debug_info.sh [NAMESPACES=namespace1,namespace2,...] [K8S_OBJECT_NAME_FILTER=splunk|collector|otel|certmanager|test|sck|sock|customname] | ||
# Note: If no namespaces are specified, the script will collect information from all namespaces. | ||
# Sensitive Data Handling: | ||
# The script attempts to redact sensitive information where possible, including tokens, passwords, and certificates. | ||
# However, users should review the files for any sensitive data before sharing. | ||
# | ||
# Objects Scraped: | ||
# - Pod logs for agent, cluster-receiver, certmanager, operator, gateway, splunk pods | ||
# - Deployments, daemonsets, secrets, Helm releases matching K8S_OBJECT_NAME_FILTER | ||
# - NetworkPolicies, Services, Ingress resources, Endpoints, Roles, RoleBindings, Security contexts | ||
# - OpenTelemetry Instrumentation objects | ||
# - Custom Resource Definitions (CRDs), Pod Security Policies (PSPs), Security Context Constraints (SCCs) | ||
# - Cert-manager related objects | ||
# - MutatingWebhookConfiguration objects | ||
|
||
# Helper function to write output to a file | ||
write_output() { | ||
local output="$1" | ||
local file_name="$2" | ||
local cmd="$3" | ||
|
||
# Check if output is empty, starts with "No resources found", or "error" | ||
if [[ -z "$output" || "$output" == "No resources found"* || "$output" == "error"* || "$output" == "Error"* ]]; then | ||
echo "[$(date)] Skipping $file_name: $output" >> "$temp_dir/errors.txt" | ||
return | ||
fi | ||
|
||
# Check if output is in YAML format | ||
if echo "$output" | yq eval '.' - > /dev/null 2>&1; then | ||
# Check if output contains empty list using yq | ||
if [[ $(echo "$output" | yq eval '.kind' -) == "List" ]] && [[ $(echo "$output" | yq eval '.items | length' -) -eq 0 ]]; then | ||
echo "[$(date)] Skipping $file_name: Empty list" >> "$temp_dir/errors.txt" | ||
return | ||
fi | ||
fi | ||
|
||
# Redact sensitive information | ||
output=$(echo "$output" | awk ' | ||
/BEGIN CERTIFICATE/,/END CERTIFICATE/ { | ||
if (/BEGIN CERTIFICATE/) print; | ||
else if (/END CERTIFICATE/) print; | ||
else print " [CERTIFICATE REDACTED]"; | ||
next; | ||
} | ||
/ca\.crt|client\.crt|client\.key/ { | ||
print " [SENSITIVE DATA REDACTED]"; | ||
next; | ||
} | ||
/[Tt][Oo][Kk][Ee][Nn]/ { | ||
print " [TOKEN REDACTED]"; | ||
next; | ||
} | ||
/[Pp][Aa][Ss][Ss][Ww][Oo][Rr][Dd]/ { | ||
print " [PASSWORD REDACTED]"; | ||
next; | ||
} | ||
{print}') | ||
|
||
# Write command and output to file | ||
echo "# Command: $cmd" > "$file_name" | ||
echo "$output" >> "$file_name" | ||
} | ||
|
||
# Function to collect data for a given namespace | ||
collect_data_namespace() { | ||
local ns=$1 | ||
|
||
object_types=("deployments" "daemonsets" "configmaps" "secrets" "networkpolicies" "svc" "ingress" "endpoints" "roles" "rolebindings" "otelinst") | ||
for type in "${object_types[@]}"; do | ||
stdbuf -oL echo "Collecting $type data for $ns namespace with $k8s_object_name_filter name filter" | ||
if [[ "$type" == "deployment" || "$type" == "daemonset" || "$type" == "configmaps" || "$type" == "secrets" ]]; then | ||
kubectl get "$type" -n "$ns" -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' | grep -E "$k8s_object_name_filter" | while read object; do | ||
cmd="kubectl get $type $object -n $ns -o yaml" | ||
output=$(eval "$cmd") | ||
write_output "$output" "$temp_dir/namespace_${ns}_${type}_${object}.yaml" "$cmd" | ||
done | ||
else | ||
kubectl get "$type" -n "$ns" -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' | while read object; do | ||
cmd="kubectl get $type $object -n $ns -o yaml" | ||
output=$(eval "$cmd") | ||
write_output "$output" "$temp_dir/namespace_${ns}_${type}_${object}.yaml" "$cmd" | ||
done | ||
fi | ||
done | ||
|
||
# Collect logs from specific pods | ||
pods=$(kubectl get pods -n "$ns" -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' | grep -E "$k8s_object_name_filter") | ||
# Collect logs from a single agent pod | ||
agent_pod=$(echo "$pods" | grep "agent" | head -n 1) | ||
if [ -n "$agent_pod" ]; then | ||
cmd="kubectl logs $agent_pod -n $ns" | ||
output=$(eval "$cmd") | ||
write_output "$output" "$temp_dir/namespace_${ns}_logs_pod_${agent_pod}.log" "$cmd" | ||
pods=$(echo "$pods" | grep -v "$agent_pod") | ||
fi | ||
|
||
# Collect logs from a single cluster-receiver pod | ||
cluster_receiver_pod=$(echo "$pods" | grep "cluster-receiver" | head -n 1) | ||
if [ -n "$cluster_receiver_pod" ]; then | ||
cmd="kubectl logs $cluster_receiver_pod -n $ns" | ||
output=$(eval "$cmd") | ||
write_output "$output" "$temp_dir/namespace_${ns}_logs_pod_${cluster_receiver_pod}.log" "$cmd" | ||
pods=$(echo "$pods" | grep -v "$cluster_receiver_pod") | ||
fi | ||
|
||
# Collect logs from all certmanager pods | ||
certmanager_pods=$(echo "$pods" | grep "certmanager") | ||
for pod in $certmanager_pods; do | ||
cmd="kubectl logs $pod -n $ns" | ||
output=$(eval "$cmd") | ||
write_output "$output" "$temp_dir/namespace_${ns}_logs_pod_${pod}.log" "$cmd" | ||
done | ||
pods=$(echo "$pods" | grep -v "certmanager") | ||
|
||
# Collect logs from all operator pods | ||
operator_pods=$(echo "$pods" | grep "operator") | ||
for pod in $operator_pods; do | ||
cmd="kubectl logs $pod -n $ns" | ||
output=$(eval "$cmd") | ||
write_output "$output" "$temp_dir/namespace_${ns}_logs_pod_${pod}.log" "$cmd" | ||
done | ||
pods=$(echo "$pods" | grep -v "operator") | ||
|
||
# Collect logs from a single Splunk pod | ||
splunk_pod=$(kubectl get pods -n "$ns" -l app=splunk -o jsonpath='{.items[0].metadata.name}' 2>/dev/null) | ||
if [ -n "$splunk_pod" ]; then | ||
echo "Getting logs for pod $splunk_pod in namespace ${ns}" | ||
cmd="kubectl logs -n ${ns} $splunk_pod" | ||
output=$(eval "$cmd") | ||
write_output "$output" "$temp_dir/namespace_${ns}_logs_pod_${splunk_pod}.log" "$cmd" | ||
fi | ||
|
||
# Collect pod spec and logs for specific annotations | ||
annotations=( | ||
"instrumentation.opentelemetry.io/inject-java" | ||
"instrumentation.opentelemetry.io/inject-python" | ||
"instrumentation.opentelemetry.io/inject-dotnet" | ||
"instrumentation.opentelemetry.io/inject-go" | ||
"instrumentation.opentelemetry.io/inject-nodejs" | ||
"instrumentation.opentelemetry.io/inject-nginx" | ||
"instrumentation.opentelemetry.io/inject-sdk" | ||
"instrumentation.opentelemetry.io/inject-apache-httpd" | ||
) | ||
|
||
for annotation in "${annotations[@]}"; do | ||
pod_with_annotation=$(kubectl get pods -n "$ns" -o jsonpath="{range .items[?(@.metadata.annotations['$annotation'])]}{.metadata.name}{'\n'}{end}" | head -n 1) | ||
if [ -n "$pod_with_annotation" ]; then | ||
cmd="kubectl get pod $pod_with_annotation -n $ns -o yaml" | ||
output=$(eval "$cmd") | ||
write_output "$output" "$temp_dir/namespace_${ns}_pod_spec_${pod_with_annotation}.yaml" "$cmd" | ||
cmd="kubectl logs $pod_with_annotation -n $ns" | ||
output=$(eval "$cmd") | ||
write_output "$output" "$temp_dir/namespace_${ns}_logs_pod_${pod_with_annotation}.log" "$cmd" | ||
fi | ||
done | ||
} | ||
|
||
# Function to collect cluster-wide data | ||
collect_data_cluster() { | ||
echo "Collecting cluster-wide data..." | ||
|
||
echo "Basic Cluster Configurations:" >> "$output_file" | ||
echo "Cluster Name: $(kubectl config view --minify -o jsonpath='{.clusters[].name}')" >> "$output_file" | ||
echo "Kubernetes Version:" >> "$output_file" | ||
kubectl version >> "$output_file" | ||
echo "Number of Namespaces:" >> "$output_file" | ||
kubectl get namespaces | wc -l >> "$output_file" | ||
echo "Namespaces: $(kubectl get namespaces -o jsonpath='{.items[*].metadata.name}')" >> "$output_file" | ||
echo "Number of Running Nodes:" >> "$output_file" | ||
kubectl get nodes | wc -l >> "$output_file" | ||
echo "Number of Running Pods:" >> "$output_file" | ||
kubectl get pods --all-namespaces --field-selector=status.phase=Running | wc -l >> "$output_file" | ||
echo "Splunk Related Pods:" >> "$output_file" | ||
kubectl get pods --all-namespaces | (head -n 1 && grep -E "$k8s_object_name_filter") >> "$output_file" | ||
echo "---" >> "$output_file" | ||
|
||
echo "Collecting custom resource definitions..." | ||
cmd="kubectl get crds -o yaml" | ||
output=$(eval "$cmd") | ||
write_output "$output" "$temp_dir/cluster_custom_resource_definitions.yaml" "$cmd" | ||
|
||
echo "Collecting pod security policies..." | ||
cmd="kubectl get psp -o yaml" | ||
output=$(eval "$cmd") | ||
write_output "$output" "$temp_dir/cluster_pod_security_policies.yaml" "$cmd" | ||
|
||
echo "Collecting security context constraints..." | ||
cmd="kubectl get scc -o yaml" | ||
output=$(eval "$cmd") | ||
write_output "$output" "$temp_dir/cluster_security_context_constraints.yaml" "$cmd" | ||
|
||
echo "Collecting MutatingWebhookConfiguration objects..." | ||
cmd="kubectl get mutatingwebhookconfiguration.admissionregistration.k8s.io -o yaml; kubectl describe mutatingwebhookconfiguration.admissionregistration.k8s.io; kubectl get --raw /metrics | grep apiserver_admission_webhook_rejection_count;" | ||
output=$(eval "$cmd") | ||
write_output "$output" "$temp_dir/cluster_webhooks.yaml" "$cmd" | ||
|
||
echo "Checking for cert-manager installation..." | ||
cert_manager_pods=$(kubectl get pods --all-namespaces -l app=cert-manager --no-headers) | ||
if [ -n "$cert_manager_pods" ]; then | ||
echo "Cert-manager is installed. Collecting related objects..." | ||
cmd="kubectl get Issuers,ClusterIssuers,Certificates,CertificateRequests,Orders,Challenges --all-namespaces -o yaml; kubectl describe Issuers,ClusterIssuers,Certificates,CertificateRequests,Orders,Challenges --all-namespaces" | ||
output=$(eval "$cmd") | ||
write_output "$output" "$temp_dir/cluster_cert_manager_objects.yaml" "$cmd" | ||
fi | ||
|
||
echo "Collecting Helm values for relevant releases..." | ||
helm list -A | grep -E "$k8s_object_name_filter" | awk '{print $1, $2}' | while read release namespace; do | ||
cmd="helm get values $release -n $namespace" | ||
output=$(eval "$cmd") | ||
write_output "$output" "$temp_dir/helm_values_${release}_${namespace}.yaml" "$cmd" | ||
done | ||
} | ||
|
||
# Parse input parameters | ||
namespaces="" | ||
k8s_object_name_filter="splunk|collector|otel|certmanager|test|sck|sock" | ||
|
||
for arg in "$@"; do | ||
case $arg in | ||
NAMESPACES=*) | ||
namespaces="${arg#*=}" | ||
;; | ||
K8S_OBJECT_NAME_FILTER=*) | ||
k8s_object_name_filter="${arg#*=}" | ||
;; | ||
*) | ||
echo "Unknown parameter: $arg" | ||
exit 1 | ||
;; | ||
esac | ||
done | ||
|
||
# Collect data from all namespaces if no namespaces are specified | ||
if [[ -z "$namespaces" ]]; then | ||
# Get all namespaces and convert the string into an array | ||
IFS=' ' read -r -a namespaces_array <<< "$(kubectl get namespaces -o jsonpath='{.items[*].metadata.name}')" | ||
else | ||
# Split the specified namespaces string into an array | ||
IFS=',' read -r -a namespaces_array <<< "$namespaces" | ||
fi | ||
|
||
echo "Namespaces: ${namespaces_array[@]}" | ||
echo "Kubernetes object name filter: $k8s_object_name_filter" | ||
|
||
# Create a temporary directory with a unique name | ||
temp_dir=$(mktemp -d -t splunk_kubernetes_debug_info_XXXXXX) | ||
if [[ ! -d "$temp_dir" ]]; then | ||
echo "Failed to create temporary directory" | ||
exit 1 | ||
fi | ||
|
||
# Output file for basic cluster information | ||
output_file="$temp_dir/cluster.txt" | ||
|
||
# Print script start time | ||
script_start_time=$(date +"%Y-%m-%d %H:%M:%S") | ||
echo "Script start time: $script_start_time" | ||
echo "Script start time: $script_start_time" >> "$output_file" | ||
|
||
# Collect cluster-wide data | ||
collect_data_cluster | ||
|
||
# Function to manage parallel processing of namespaces | ||
collect_data_namespace_namespaces() { | ||
local parallelism=20 | ||
local pids=() | ||
|
||
for ns in "${namespaces_array[@]}"; do | ||
collect_data_namespace "$ns" & | ||
pids+=($!) | ||
|
||
if [[ ${#pids[@]} -ge $parallelism ]]; then | ||
for pid in "${pids[@]}"; do | ||
wait "$pid" | ||
done | ||
pids=() | ||
fi | ||
done | ||
|
||
# Wait for any remaining background processes to complete | ||
for pid in "${pids[@]}"; do | ||
wait "$pid" | ||
done | ||
} | ||
|
||
# Process namespaces in parallel | ||
collect_data_namespace_namespaces | ||
|
||
# Print script end time | ||
script_end_time=$(date +"%Y-%m-%d %H:%M:%S") | ||
echo "Script end time: $script_end_time" | ||
echo "Script end time: $script_end_time" >> "$output_file" | ||
|
||
# Create a ZIP archive of all the collected YAML files | ||
output_zip="splunk_kubernetes_debug_info_$(date +%Y%m%d_%H%M%S).zip" | ||
echo "Creating ZIP archive: $output_zip" | ||
|
||
# Find and delete empty files before creating the ZIP archive | ||
find "$temp_dir" -type f -empty -delete | ||
|
||
zip -j -r "$output_zip" "$temp_dir" | ||
|
||
# Clean up the temporary directory | ||
rm -rf "$temp_dir" | ||
|
||
echo "Data collection complete. Output files are available in the ZIP archive: $output_zip" |