diff --git a/.github/workflows/installation-cli.yaml b/.github/workflows/installation-cli.yaml index 3fe30b13f5da..cd7d9be7ea86 100644 --- a/.github/workflows/installation-cli.yaml +++ b/.github/workflows/installation-cli.yaml @@ -44,7 +44,7 @@ jobs: hack/cli-testing-environment.sh # run a single e2e - export KUBECONFIG=${HOME}/karmada/karmada-apiserver.config + export KUBECONFIG=${HOME}/.kube/karmada-host.config:${HOME}/karmada/karmada-apiserver.config GO111MODULE=on go install github.com/onsi/ginkgo/v2/ginkgo ginkgo -v --race --trace -p --focus="[BasicPropagation] propagation testing deployment propagation testing" ./test/e2e/ - name: export logs @@ -87,7 +87,7 @@ jobs: hack/cli-testing-init-with-config.sh # run a single e2e - export KUBECONFIG=${HOME}/karmada/karmada-apiserver.config + export KUBECONFIG=${HOME}/.kube/karmada-host.config:${HOME}/karmada/karmada-apiserver.config GO111MODULE=on go install github.com/onsi/ginkgo/v2/ginkgo ginkgo -v --race --trace -p --focus="[BasicPropagation] propagation testing deployment propagation testing" ./test/e2e/ - name: export logs for config test diff --git a/go.mod b/go.mod index f7128c17c233..8aa69d3012fd 100644 --- a/go.mod +++ b/go.mod @@ -19,6 +19,7 @@ require ( github.com/onsi/gomega v1.34.1 github.com/opensearch-project/opensearch-go v1.1.0 github.com/prometheus/client_golang v1.19.1 + github.com/prometheus/common v0.55.0 github.com/spf13/cobra v1.8.1 github.com/spf13/pflag v1.0.5 github.com/stretchr/testify v1.9.0 @@ -134,7 +135,6 @@ require ( github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/prometheus/client_model v0.6.1 // indirect - github.com/prometheus/common v0.55.0 // indirect github.com/prometheus/procfs v0.15.1 // indirect github.com/rivo/uniseg v0.4.2 // indirect github.com/robfig/cron/v3 v3.0.1 // indirect diff --git a/test/e2e/framework/cluster.go b/test/e2e/framework/cluster.go index e2a2280c7457..9c5c458403ba 100644 --- a/test/e2e/framework/cluster.go +++ b/test/e2e/framework/cluster.go @@ -310,7 +310,15 @@ func WaitClusterFitWith(c client.Client, clusterName string, fit func(cluster *c // LoadRESTClientConfig creates a rest.Config using the passed kubeconfig. If context is empty, current context in kubeconfig will be used. func LoadRESTClientConfig(kubeconfig string, context string) (*rest.Config, error) { - loader := &clientcmd.ClientConfigLoadingRules{ExplicitPath: kubeconfig} + var loader *clientcmd.ClientConfigLoadingRules + if strings.Contains(kubeconfig, ":") { + // kubeconfig is a list of kubeconfig files in form of "file1:file2:file3" + loader = &clientcmd.ClientConfigLoadingRules{Precedence: strings.Split(kubeconfig, ":")} + } else { + // kubeconfig is a single file + loader = &clientcmd.ClientConfigLoadingRules{ExplicitPath: kubeconfig} + } + loadedConfig, err := loader.Load() if err != nil { return nil, err diff --git a/test/e2e/framework/metrics.go b/test/e2e/framework/metrics.go new file mode 100644 index 000000000000..3ba1afae493b --- /dev/null +++ b/test/e2e/framework/metrics.go @@ -0,0 +1,203 @@ +/* +Copyright 2024 The Karmada Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package framework + +import ( + "context" + "fmt" + "regexp" + "time" + + "github.com/prometheus/common/model" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/component-base/metrics/testutil" + "k8s.io/klog/v2" + + "github.com/karmada-io/karmada/pkg/util/names" +) + +const ( + karmadaNamespace = "karmada-system" + metricsBindPort = 8080 + leaderPodMetric = "leader_election_master_status" + queryTimeout = 10 * time.Second +) + +// following refers to https://github.com/kubernetes/kubernetes/blob/master/test/e2e/framework/metrics/metrics_grabber.go + +// Grabber is used to grab metrics from karmada components +type Grabber struct { + hostKubeClient clientset.Interface + controllerManagerPods []string + schedulerPods []string + deschedulerPods []string + metricsAdapterPods []string + schedulerEstimatorPods []string + webhookPods []string +} + +// NewMetricsGrabber creates a new metrics grabber +func NewMetricsGrabber(ctx context.Context, c clientset.Interface) (*Grabber, error) { + grabber := Grabber{hostKubeClient: c} + regKarmadaControllerManager := regexp.MustCompile(names.KarmadaControllerManagerComponentName + "-.*") + regKarmadaScheduler := regexp.MustCompile(names.KarmadaSchedulerComponentName + "-.*") + regKarmadaDescheduler := regexp.MustCompile(names.KarmadaDeschedulerComponentName + "-.*") + regKarmadaMetricsAdapter := regexp.MustCompile(names.KarmadaMetricsAdapterComponentName + "-.*") + regKarmadaSchedulerEstimator := regexp.MustCompile(names.KarmadaSchedulerEstimatorComponentName + "-" + ClusterNames()[0] + "-.*") + regKarmadaWebhook := regexp.MustCompile(names.KarmadaWebhookComponentName + "-.*") + + podList, err := c.CoreV1().Pods(karmadaNamespace).List(ctx, metav1.ListOptions{}) + if err != nil { + return nil, err + } + if len(podList.Items) < 1 { + klog.Warningf("Can't find any pods in namespace %s to grab metrics from", karmadaNamespace) + } + for _, pod := range podList.Items { + if regKarmadaControllerManager.MatchString(pod.Name) { + grabber.controllerManagerPods = append(grabber.controllerManagerPods, pod.Name) + continue + } + if regKarmadaDescheduler.MatchString(pod.Name) { + grabber.deschedulerPods = append(grabber.deschedulerPods, pod.Name) + continue + } + if regKarmadaMetricsAdapter.MatchString(pod.Name) { + grabber.metricsAdapterPods = append(grabber.metricsAdapterPods, pod.Name) + continue + } + if regKarmadaSchedulerEstimator.MatchString(pod.Name) { + grabber.schedulerEstimatorPods = append(grabber.schedulerEstimatorPods, pod.Name) + continue + } + if regKarmadaScheduler.MatchString(pod.Name) { + grabber.schedulerPods = append(grabber.schedulerPods, pod.Name) + continue + } + if regKarmadaWebhook.MatchString(pod.Name) { + grabber.webhookPods = append(grabber.webhookPods, pod.Name) + } + } + return &grabber, nil +} + +// GrabMetricsFromComponent fetch metrics from the leader of a specified Karmada component +func (g *Grabber) GrabMetricsFromComponent(ctx context.Context, component string) (map[string]testutil.Metrics, error) { + pods, fromLeader := make([]string, 0), false + switch component { + case names.KarmadaControllerManagerComponentName: + pods, fromLeader = g.controllerManagerPods, true + case names.KarmadaSchedulerComponentName: + pods, fromLeader = g.schedulerPods, true + case names.KarmadaDeschedulerComponentName: + pods, fromLeader = g.deschedulerPods, true + case names.KarmadaMetricsAdapterComponentName: + pods = g.metricsAdapterPods + case names.KarmadaSchedulerEstimatorComponentName: + pods = g.schedulerEstimatorPods + case names.KarmadaWebhookComponentName: + pods = g.webhookPods + } + return g.grabMetricsFromPod(ctx, component, pods, fromLeader) +} + +// grabMetricsFromPod fetch metrics from the leader pod +func (g *Grabber) grabMetricsFromPod(ctx context.Context, component string, pods []string, fromLeader bool) (map[string]testutil.Metrics, error) { + var output string + var lastMetricsFetchErr error + + result := make(map[string]testutil.Metrics) + for _, podName := range pods { + if metricsWaitErr := wait.PollUntilContextTimeout(ctx, time.Second, queryTimeout, true, func(ctx context.Context) (bool, error) { + output, lastMetricsFetchErr = GetMetricsFromPod(ctx, g.hostKubeClient, podName, karmadaNamespace, metricsBindPort) + return lastMetricsFetchErr == nil, nil + }); metricsWaitErr != nil { + klog.Errorf("error waiting for %s to expose metrics: %v; %v", podName, metricsWaitErr, lastMetricsFetchErr) + continue + } + + podMetrics := testutil.Metrics{} + metricsParseErr := testutil.ParseMetrics(output, &podMetrics) + if metricsParseErr != nil { + klog.Errorf("failed to parse metrics for %s: %v", podName, metricsParseErr) + continue + } + + // judge which pod is the leader pod + if fromLeader && !isLeaderPod(podMetrics[leaderPodMetric]) { + klog.Infof("skip fetch %s since it is not the leader pod", podName) + continue + } + + result[podName] = podMetrics + klog.Infof("successfully grabbed metrics of %s", podName) + } + + if len(result) == 0 { + return nil, fmt.Errorf("failed to fetch metrics from the pod of %s", component) + } + return result, nil +} + +// GetMetricsFromPod retrieves metrics data. +func GetMetricsFromPod(ctx context.Context, client clientset.Interface, podName string, namespace string, port int) (string, error) { + rawOutput, err := client.CoreV1().RESTClient().Get(). + Namespace(namespace). + Resource("pods"). + SubResource("proxy"). + Name(fmt.Sprintf("%s:%d", podName, port)). + Suffix("metrics"). + Do(ctx).Raw() + if err != nil { + return "", err + } + return string(rawOutput), nil +} + +func isLeaderPod(samples model.Samples) bool { + for _, sample := range samples { + if sample.Value > 0 { + return true + } + } + return false +} + +// GetMetricByName returns the metric value with the given name. +func GetMetricByName(samples model.Samples, name string) *model.Sample { + for _, sample := range samples { + if sample.Metric["name"] == model.LabelValue(name) { + return sample + } + } + return nil +} + +// PrintMetricSample prints the metric sample +func PrintMetricSample(podName string, sample model.Samples) { + if sample.Len() == 0 { + return + } + if podName != "" { + klog.Infof("metrics from pod: %s", podName) + } + for _, s := range sample { + klog.Infof("metric: %v, value: %v, timestamp: %v", s.Metric, s.Value, s.Timestamp) + } +} diff --git a/test/e2e/metrics_test.go b/test/e2e/metrics_test.go new file mode 100644 index 000000000000..d322591138e3 --- /dev/null +++ b/test/e2e/metrics_test.go @@ -0,0 +1,126 @@ +/* +Copyright 2023 The Karmada Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "context" + + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" + appsv1 "k8s.io/api/apps/v1" + "k8s.io/apimachinery/pkg/util/rand" + "k8s.io/klog/v2" + + policyv1alpha1 "github.com/karmada-io/karmada/pkg/apis/policy/v1alpha1" + "github.com/karmada-io/karmada/pkg/util/names" + "github.com/karmada-io/karmada/test/e2e/framework" + testhelper "github.com/karmada-io/karmada/test/helper" +) + +var _ = ginkgo.Describe("metrics testing", func() { + var grabber *framework.Grabber + + var componentMetrics = map[string][]string{ + names.KarmadaControllerManagerComponentName: { + "workqueue_queue_duration_seconds_sum", // workqueue metrics + "cluster_ready_state", // custom ClusterCollectors metrics + "work_sync_workload_duration_seconds_sum", // custom ResourceCollectors metrics + }, + names.KarmadaSchedulerComponentName: { + "workqueue_queue_duration_seconds_sum", // workqueue metrics + "karmada_scheduler_schedule_attempts_total", // scheduler custom metrics + }, + names.KarmadaDeschedulerComponentName: { + "workqueue_queue_duration_seconds_sum", // workqueue metrics + }, + names.KarmadaMetricsAdapterComponentName: { + "workqueue_queue_duration_seconds_sum", // workqueue metrics + }, + names.KarmadaSchedulerEstimatorComponentName: { + "karmada_scheduler_estimator_estimating_request_total", // scheduler estimator custom metrics + }, + names.KarmadaWebhookComponentName: { + "controller_runtime_webhook_requests_total", // controller runtime hook server metrics + }, + } + + ginkgo.BeforeEach(func() { + var err error + grabber, err = framework.NewMetricsGrabber(context.TODO(), hostKubeClient) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + }) + + ginkgo.Context("metrics presence testing", func() { + ginkgo.It("metrics presence testing for each component", func() { + ginkgo.By("do a simple scheduling to ensure above metrics exist", func() { + name := deploymentNamePrefix + rand.String(RandomStrLength) + deployment := testhelper.NewDeployment(testNamespace, name) + policy := testhelper.NewPropagationPolicy(testNamespace, name, []policyv1alpha1.ResourceSelector{ + { + APIVersion: deployment.APIVersion, + Kind: deployment.Kind, + Name: deployment.Name, + }, + }, policyv1alpha1.Placement{ + ClusterAffinity: &policyv1alpha1.ClusterAffinity{ + ClusterNames: framework.ClusterNames(), + }, + }) + framework.CreateDeployment(kubeClient, deployment) + framework.CreatePropagationPolicy(karmadaClient, policy) + ginkgo.DeferCleanup(func() { + framework.RemoveDeployment(kubeClient, deployment.Namespace, deployment.Name) + framework.RemovePropagationPolicy(karmadaClient, policy.Namespace, policy.Name) + }) + framework.WaitDeploymentPresentOnClustersFitWith(framework.ClusterNames(), deployment.Namespace, deployment.Name, func(_ *appsv1.Deployment) bool { return true }) + }) + + for component, metricNameList := range componentMetrics { + ginkgo.By("judge metrics presence of component: "+component, func() { + podsMetrics, err := grabber.GrabMetricsFromComponent(context.TODO(), component) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + + for _, metricName := range metricNameList { + metricExist := false + for podName, metrics := range podsMetrics { + // the output format of `metrics` is like: + // { + // "workqueue_queue_duration_seconds_sum": [{ + // "metric": { + // "__name__": "workqueue_queue_duration_seconds_sum", + // "controller": "work-status-controller", + // "name": "work-status-controller" + // }, + // "value": [0, "0.12403110800000001"] + // }] + // } + framework.PrintMetricSample(podName, metrics[metricName]) + if metrics[metricName].Len() > 0 { + metricExist = true + break + } + } + if !metricExist { + klog.Errorf("metric %s not found in component %s", metricName, component) + gomega.Expect(metricExist).ShouldNot(gomega.BeFalse()) + } + } + }) + } + }) + }) +}) diff --git a/test/e2e/suite_test.go b/test/e2e/suite_test.go index f82dd72e6599..a5f8e06772cf 100644 --- a/test/e2e/suite_test.go +++ b/test/e2e/suite_test.go @@ -103,11 +103,13 @@ var ( ) var ( + hostContext string karmadaContext string kubeconfig string karmadactlPath string restConfig *rest.Config karmadaHost string + hostKubeClient kubernetes.Interface kubeClient kubernetes.Interface karmadaClient karmada.Interface dynamicClient dynamic.Interface @@ -125,7 +127,8 @@ func init() { // eg. ginkgo -v --race --trace --fail-fast -p --randomize-all ./test/e2e/ -- --poll-interval=5s --poll-timeout=5m flag.DurationVar(&pollInterval, "poll-interval", 5*time.Second, "poll-interval defines the interval time for a poll operation") flag.DurationVar(&pollTimeout, "poll-timeout", 300*time.Second, "poll-timeout defines the time which the poll operation times out") - flag.StringVar(&karmadaContext, "karmada-context", karmadaContext, "Name of the cluster context in control plane kubeconfig file.") + flag.StringVar(&hostContext, "host-context", "karmada-host", "Name of the host cluster context in control plane kubeconfig file.") + flag.StringVar(&karmadaContext, "karmada-context", "karmada-apiserver", "Name of the karmada cluster context in control plane kubeconfig file.") } func TestE2E(t *testing.T) { @@ -148,6 +151,13 @@ var _ = ginkgo.SynchronizedBeforeSuite(func() []byte { gomega.Expect(karmadactlPath).ShouldNot(gomega.BeEmpty()) clusterProvider = cluster.NewProvider() + + restConfig, err = framework.LoadRESTClientConfig(kubeconfig, hostContext) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + + hostKubeClient, err = kubernetes.NewForConfig(restConfig) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + restConfig, err = framework.LoadRESTClientConfig(kubeconfig, karmadaContext) gomega.Expect(err).ShouldNot(gomega.HaveOccurred())