diff --git a/Makefile b/Makefile index 47ce69da76..f844732d9a 100644 --- a/Makefile +++ b/Makefile @@ -46,7 +46,6 @@ ## generate-drone Generate the Drone YAML from Jsonnet. ## generate-helm-docs Generate Helm chart documentation. ## generate-helm-tests Generate Helm chart tests. -## generate-protos Generate protobuf files. ## generate-ui Generate the UI assets. ## generate-versioned-files Generate versioned files. ## @@ -219,8 +218,8 @@ agent-boringcrypto-image: # Targets for generating assets # -.PHONY: generate generate-drone generate-helm-docs generate-helm-tests generate-protos generate-ui generate-versioned-files -generate: generate-drone generate-helm-docs generate-helm-tests generate-protos generate-ui generate-versioned-files generate-docs +.PHONY: generate generate-drone generate-helm-docs generate-helm-tests generate-ui generate-versioned-files +generate: generate-drone generate-helm-docs generate-helm-tests generate-ui generate-versioned-files generate-docs generate-drone: drone jsonnet -V BUILD_IMAGE_VERSION=$(BUILD_IMAGE_VERSION) --stream --format --source .drone/drone.jsonnet --target .drone/drone.yml @@ -239,13 +238,6 @@ else bash ./operations/helm/scripts/rebuild-tests.sh endif -generate-protos: -ifeq ($(USE_CONTAINER),1) - $(RERUN_IN_CONTAINER) -else - go generate ./internal/static/agentproto/ -endif - generate-ui: ifeq ($(USE_CONTAINER),1) $(RERUN_IN_CONTAINER) diff --git a/internal/converter/internal/staticconvert/testdata-v2/integrations_v2.river b/internal/converter/internal/staticconvert/testdata-v2/integrations_v2.river index f7c22ade64..b8a1b67208 100644 --- a/internal/converter/internal/staticconvert/testdata-v2/integrations_v2.river +++ b/internal/converter/internal/staticconvert/testdata-v2/integrations_v2.river @@ -21,6 +21,32 @@ logging { format = "json" } +faro.receiver "integrations_app_agent_receiver" { + extra_log_labels = {} + + server { + listen_address = "localhost" + listen_port = 55678 + max_allowed_payload_size = "4MiB786KiB832B" + + rate_limiting { + enabled = true + rate = 100 + burst_size = 50 + } + } + + sourcemaps { + download_from_origins = ["*"] + download_timeout = "1s" + } + + output { + logs = [loki.write.logs_log_config.receiver] + traces = [] + } +} + loki.relabel "integrations_eventhandler" { forward_to = [loki.write.logs_log_config.receiver] @@ -237,27 +263,6 @@ prometheus.scrape "integrations_elasticsearch" { job_name = "integrations/elasticsearch" } -prometheus.exporter.gcp "integrations_gcp_exporter" { - project_ids = [""] - metrics_prefixes = ["loadbalancing.googleapis.com/https/request_bytes_count", "loadbalancing.googleapis.com/https/total_latencies"] - extra_filters = ["loadbalancing.googleapis.com:resource.labels.backend_target_name=\"sample-value\""] -} - -discovery.relabel "integrations_gcp" { - targets = prometheus.exporter.gcp.integrations_gcp_exporter.targets - - rule { - target_label = "job" - replacement = "integrations/gcp" - } -} - -prometheus.scrape "integrations_gcp" { - targets = discovery.relabel.integrations_gcp.output - forward_to = [prometheus.remote_write.metrics_default.receiver] - job_name = "integrations/gcp" -} - prometheus.exporter.github "integrations_github_exporter" { repositories = ["grafana/agent", "grafana/agent-modules"] api_token = "ABCDEFGH-1234-ABCD-1234-ABCDEFGHIJKL" @@ -680,32 +685,6 @@ prometheus.scrape "integrations_apache2" { job_name = "integrations/apache2" } -faro.receiver "integrations_app_agent_receiver" { - extra_log_labels = {} - - server { - listen_address = "localhost" - listen_port = 55678 - max_allowed_payload_size = "4MiB786KiB832B" - - rate_limiting { - enabled = true - rate = 100 - burst_size = 50 - } - } - - sourcemaps { - download_from_origins = ["*"] - download_timeout = "1s" - } - - output { - logs = [loki.write.logs_log_config.receiver] - traces = [] - } -} - prometheus.exporter.blackbox "integrations_blackbox" { config = "modules:\n http_2xx:\n prober: http\n timeout: 5s\n http:\n method: POST\n headers:\n Content-Type: application/json\n body: '{}'\n preferred_ip_protocol: ip4\n" @@ -762,3 +741,24 @@ prometheus.scrape "integrations_snmp" { forward_to = [prometheus.remote_write.metrics_default.receiver] job_name = "integrations/snmp" } + +prometheus.exporter.gcp "integrations_gcp_exporter" { + project_ids = [""] + metrics_prefixes = ["loadbalancing.googleapis.com/https/request_bytes_count", "loadbalancing.googleapis.com/https/total_latencies"] + extra_filters = ["loadbalancing.googleapis.com:resource.labels.backend_target_name=\"sample-value\""] +} + +discovery.relabel "integrations_gcp" { + targets = prometheus.exporter.gcp.integrations_gcp_exporter.targets + + rule { + target_label = "job" + replacement = "integrations/gcp" + } +} + +prometheus.scrape "integrations_gcp" { + targets = discovery.relabel.integrations_gcp.output + forward_to = [prometheus.remote_write.metrics_default.receiver] + job_name = "integrations/gcp" +} diff --git a/internal/converter/internal/staticconvert/testdata-v2/unsupported.river b/internal/converter/internal/staticconvert/testdata-v2/unsupported.river index c9585a88c5..c854f1bd9d 100644 --- a/internal/converter/internal/staticconvert/testdata-v2/unsupported.river +++ b/internal/converter/internal/staticconvert/testdata-v2/unsupported.river @@ -16,10 +16,6 @@ loki.write "logs_log_config" { external_labels = {} } -loki.source.kubernetes_events "integrations_eventhandler" { - forward_to = [loki.write.logs_log_config.receiver] -} - faro.receiver "integrations_app_agent_receiver" { extra_log_labels = {} @@ -45,3 +41,7 @@ faro.receiver "integrations_app_agent_receiver" { traces = [] } } + +loki.source.kubernetes_events "integrations_eventhandler" { + forward_to = [loki.write.logs_log_config.receiver] +} diff --git a/internal/converter/internal/staticconvert/testdata/integrations.river b/internal/converter/internal/staticconvert/testdata/integrations.river index 201f5e99e1..0c7bdaee61 100644 --- a/internal/converter/internal/staticconvert/testdata/integrations.river +++ b/internal/converter/internal/staticconvert/testdata/integrations.river @@ -324,33 +324,6 @@ prometheus.scrape "integrations_elasticsearch_exporter" { } } -prometheus.exporter.gcp "integrations_gcp_exporter" { - project_ids = [""] - metrics_prefixes = ["loadbalancing.googleapis.com/https/request_bytes_count", "loadbalancing.googleapis.com/https/total_latencies"] - extra_filters = ["loadbalancing.googleapis.com:resource.labels.backend_target_name=\"sample-value\""] -} - -discovery.relabel "integrations_gcp_exporter" { - targets = prometheus.exporter.gcp.integrations_gcp_exporter.targets - - rule { - target_label = "job" - replacement = "integrations/gcp_exporter" - } -} - -prometheus.scrape "integrations_gcp_exporter" { - targets = discovery.relabel.integrations_gcp_exporter.output - forward_to = [prometheus.remote_write.integrations.receiver] - job_name = "integrations/gcp_exporter" - - tls_config { - ca_file = "/something7.cert" - cert_file = "/something8.cert" - key_file = "/something9.cert" - } -} - prometheus.exporter.github "integrations_github_exporter" { repositories = ["grafana/agent", "grafana/agent-modules"] api_token = "ABCDEFGH-1234-ABCD-1234-ABCDEFGHIJKL" @@ -780,3 +753,30 @@ prometheus.scrape "integrations_statsd_exporter" { key_file = "/something9.cert" } } + +prometheus.exporter.gcp "integrations_gcp_exporter" { + project_ids = [""] + metrics_prefixes = ["loadbalancing.googleapis.com/https/request_bytes_count", "loadbalancing.googleapis.com/https/total_latencies"] + extra_filters = ["loadbalancing.googleapis.com:resource.labels.backend_target_name=\"sample-value\""] +} + +discovery.relabel "integrations_gcp_exporter" { + targets = prometheus.exporter.gcp.integrations_gcp_exporter.targets + + rule { + target_label = "job" + replacement = "integrations/gcp_exporter" + } +} + +prometheus.scrape "integrations_gcp_exporter" { + targets = discovery.relabel.integrations_gcp_exporter.output + forward_to = [prometheus.remote_write.integrations.receiver] + job_name = "integrations/gcp_exporter" + + tls_config { + ca_file = "/something7.cert" + cert_file = "/something8.cert" + key_file = "/something9.cert" + } +} diff --git a/internal/converter/internal/staticconvert/testdata/integrations.yaml b/internal/converter/internal/staticconvert/testdata/integrations.yaml index ced11cf6d9..337eaa01cf 100644 --- a/internal/converter/internal/staticconvert/testdata/integrations.yaml +++ b/internal/converter/internal/staticconvert/testdata/integrations.yaml @@ -148,7 +148,7 @@ integrations: scrape_integration: true postgres_exporter: enabled: true - data_source_names: + data_source_names: - postgres://postgres:password@localhost:5432/postgres?sslmode=disable relabel_configs: - source_labels: [__address__] diff --git a/internal/static/agentctl/sync.go b/internal/static/agentctl/sync.go deleted file mode 100644 index 3246405921..0000000000 --- a/internal/static/agentctl/sync.go +++ /dev/null @@ -1,136 +0,0 @@ -package agentctl - -import ( - "context" - "errors" - "fmt" - "os" - "path/filepath" - "strings" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/grafana/agent/internal/static/client" - "github.com/grafana/agent/internal/static/metrics/instance" -) - -// ConfigSync loads YAML files from a directory and syncs them to the -// provided PrometheusClient API. All YAML files will be synced and -// must be valid. -// -// The base name of the YAML file (i.e., without the file extension) -// is used as the config name. -// -// ConfigSync will completely overwrite the set of active configs -// present in the provided PrometheusClient - configs present in the -// API but not in the directory will be deleted. -func ConfigSync(logger log.Logger, cli client.PrometheusClient, dir string, dryRun bool) error { - if logger == nil { - logger = log.NewNopLogger() - } - - ctx := context.Background() - cfgs, err := ConfigsFromDirectory(dir) - if err != nil { - return err - } - - if dryRun { - level.Info(logger).Log("msg", "config files validated successfully") - return nil - } - - uploaded := make(map[string]struct{}, len(cfgs)) - var hadErrors bool - - for _, cfg := range cfgs { - level.Info(logger).Log("msg", "uploading config", "name", cfg.Name) - err := cli.PutConfiguration(ctx, cfg.Name, cfg) - if err != nil { - level.Error(logger).Log("msg", "failed to upload config", "name", cfg.Name, "err", err) - hadErrors = true - } - uploaded[cfg.Name] = struct{}{} - } - - existing, err := cli.ListConfigs(ctx) - if err != nil { - return fmt.Errorf("could not list configs: %w", err) - } - - // Delete configs from the existing API list that we didn't upload. - for _, existing := range existing.Configs { - if _, existsLocally := uploaded[existing]; !existsLocally { - level.Info(logger).Log("msg", "deleting config", "name", existing) - err := cli.DeleteConfiguration(ctx, existing) - if err != nil { - level.Error(logger).Log("msg", "failed to delete outdated config", "name", existing, "err", err) - hadErrors = true - } - } - } - - if hadErrors { - return errors.New("one or more configurations failed to be modified; check the logs for more details") - } - - return nil -} - -// ConfigsFromDirectory parses all YAML files from a directory and -// loads each as an instance.Config. -func ConfigsFromDirectory(dir string) ([]*instance.Config, error) { - var files []string - err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { - if err != nil { - return err - } - if info.IsDir() { - if dir == path { - return nil - } - return filepath.SkipDir - } - - if strings.HasSuffix(path, ".yaml") || strings.HasSuffix(path, ".yml") { - files = append(files, path) - } - return nil - }) - if err != nil { - return nil, err - } - - var configs []*instance.Config - for _, file := range files { - cfg, err := configFromFile(file) - if err != nil { - return nil, err - } - configs = append(configs, cfg) - } - - return configs, nil -} - -func configFromFile(path string) (*instance.Config, error) { - var ( - fileName = filepath.Base(path) - configName = strings.TrimSuffix(fileName, filepath.Ext(fileName)) - ) - - f, err := os.Open(path) - if f != nil { - defer f.Close() - } - if err != nil { - return nil, err - } - - cfg, err := instance.UnmarshalConfig(f) - if err != nil { - return nil, err - } - cfg.Name = configName - return cfg, nil -} diff --git a/internal/static/agentctl/sync_test.go b/internal/static/agentctl/sync_test.go deleted file mode 100644 index 8cd490256c..0000000000 --- a/internal/static/agentctl/sync_test.go +++ /dev/null @@ -1,137 +0,0 @@ -package agentctl - -import ( - "context" - "errors" - "testing" - - "github.com/grafana/agent/internal/static/metrics/cluster/configapi" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/stretchr/testify/require" -) - -func TestConfigSync_EmptyStore(t *testing.T) { - cli := &mockFuncPromClient{} - cli.ListConfigsFunc = func(_ context.Context) (*configapi.ListConfigurationsResponse, error) { - return &configapi.ListConfigurationsResponse{}, nil - } - - var putConfigs []string - cli.PutConfigurationFunc = func(_ context.Context, name string, _ *instance.Config) error { - putConfigs = append(putConfigs, name) - return nil - } - - err := ConfigSync(nil, cli, "./testdata", false) - require.NoError(t, err) - - expect := []string{ - "agent-1", - "agent-2", - "agent-3", - } - require.Equal(t, expect, putConfigs) -} - -func TestConfigSync_PrepopulatedStore(t *testing.T) { - cli := &mockFuncPromClient{} - cli.ListConfigsFunc = func(_ context.Context) (*configapi.ListConfigurationsResponse, error) { - return &configapi.ListConfigurationsResponse{ - Configs: []string{"delete-a", "agent-1", "delete-b", "delete-c"}, - }, nil - } - - var putConfigs []string - cli.PutConfigurationFunc = func(_ context.Context, name string, _ *instance.Config) error { - putConfigs = append(putConfigs, name) - return nil - } - - var deletedConfigs []string - cli.DeleteConfigurationFunc = func(_ context.Context, name string) error { - deletedConfigs = append(deletedConfigs, name) - return nil - } - - err := ConfigSync(nil, cli, "./testdata", false) - require.NoError(t, err) - - expectUpdated := []string{ - "agent-1", - "agent-2", - "agent-3", - } - require.Equal(t, expectUpdated, putConfigs) - - expectDeleted := []string{ - "delete-a", - "delete-b", - "delete-c", - } - require.Equal(t, expectDeleted, deletedConfigs) -} - -func TestConfigSync_DryRun(t *testing.T) { - cli := &mockFuncPromClient{} - cli.ListConfigsFunc = func(_ context.Context) (*configapi.ListConfigurationsResponse, error) { - return &configapi.ListConfigurationsResponse{ - Configs: []string{"delete-a", "agent-1", "delete-b", "delete-c"}, - }, nil - } - - cli.PutConfigurationFunc = func(_ context.Context, name string, _ *instance.Config) error { - t.FailNow() - return nil - } - - cli.DeleteConfigurationFunc = func(_ context.Context, name string) error { - t.FailNow() - return nil - } - - err := ConfigSync(nil, cli, "./testdata", true) - require.NoError(t, err) -} - -type mockFuncPromClient struct { - InstancesFunc func(ctx context.Context) ([]string, error) - ListConfigsFunc func(ctx context.Context) (*configapi.ListConfigurationsResponse, error) - GetConfigurationFunc func(ctx context.Context, name string) (*instance.Config, error) - PutConfigurationFunc func(ctx context.Context, name string, cfg *instance.Config) error - DeleteConfigurationFunc func(ctx context.Context, name string) error -} - -func (m mockFuncPromClient) Instances(ctx context.Context) ([]string, error) { - if m.InstancesFunc != nil { - return m.InstancesFunc(ctx) - } - return nil, errors.New("not implemented") -} - -func (m mockFuncPromClient) ListConfigs(ctx context.Context) (*configapi.ListConfigurationsResponse, error) { - if m.ListConfigsFunc != nil { - return m.ListConfigsFunc(ctx) - } - return nil, errors.New("not implemented") -} - -func (m mockFuncPromClient) GetConfiguration(ctx context.Context, name string) (*instance.Config, error) { - if m.GetConfigurationFunc != nil { - return m.GetConfigurationFunc(ctx, name) - } - return nil, errors.New("not implemented") -} - -func (m mockFuncPromClient) PutConfiguration(ctx context.Context, name string, cfg *instance.Config) error { - if m.PutConfigurationFunc != nil { - return m.PutConfigurationFunc(ctx, name, cfg) - } - return errors.New("not implemented") -} - -func (m mockFuncPromClient) DeleteConfiguration(ctx context.Context, name string) error { - if m.DeleteConfigurationFunc != nil { - return m.DeleteConfigurationFunc(ctx, name) - } - return errors.New("not implemented") -} diff --git a/internal/static/agentctl/testdata/agent-1.yaml b/internal/static/agentctl/testdata/agent-1.yaml deleted file mode 100644 index d62ce80496..0000000000 --- a/internal/static/agentctl/testdata/agent-1.yaml +++ /dev/null @@ -1,12 +0,0 @@ -host_filter: false -write_stale_on_shutdown: true -scrape_configs: - - job_name: agent-1 - static_configs: - - targets: ['agent-1:12345'] - labels: - cluster: 'testdata' - origin: 'agent' - container: 'agent-1' -remote_write: - - url: http://cortex:9009/api/prom/push diff --git a/internal/static/agentctl/testdata/agent-2.yaml b/internal/static/agentctl/testdata/agent-2.yaml deleted file mode 100644 index d38252fb52..0000000000 --- a/internal/static/agentctl/testdata/agent-2.yaml +++ /dev/null @@ -1,12 +0,0 @@ -host_filter: false -write_stale_on_shutdown: true -scrape_configs: - - job_name: agent-2 - static_configs: - - targets: ['agent-2:12345'] - labels: - cluster: 'testdata' - origin: 'agent' - container: 'agent-2' -remote_write: - - url: http://cortex:9009/api/prom/push diff --git a/internal/static/agentctl/testdata/agent-3.yaml b/internal/static/agentctl/testdata/agent-3.yaml deleted file mode 100644 index 9312e87078..0000000000 --- a/internal/static/agentctl/testdata/agent-3.yaml +++ /dev/null @@ -1,12 +0,0 @@ -host_filter: false -write_stale_on_shutdown: true -scrape_configs: - - job_name: agent-3 - static_configs: - - targets: ['agent-3:12345'] - labels: - cluster: 'testdata' - origin: 'agent' - container: 'agent-3' -remote_write: - - url: http://cortex:9009/api/prom/push diff --git a/internal/static/agentproto/agent.pb.go b/internal/static/agentproto/agent.pb.go deleted file mode 100644 index 2f5c8bcfff..0000000000 --- a/internal/static/agentproto/agent.pb.go +++ /dev/null @@ -1,416 +0,0 @@ -// Code generated by protoc-gen-gogo. DO NOT EDIT. -// source: agent.proto - -package agentproto - -import ( - context "context" - fmt "fmt" - proto "github.com/gogo/protobuf/proto" - grpc "google.golang.org/grpc" - codes "google.golang.org/grpc/codes" - status "google.golang.org/grpc/status" - emptypb "google.golang.org/protobuf/types/known/emptypb" - io "io" - math "math" - math_bits "math/bits" - reflect "reflect" - strings "strings" -) - -// Reference imports to suppress errors if they are not otherwise used. -var _ = proto.Marshal -var _ = fmt.Errorf -var _ = math.Inf - -// This is a compile-time assertion to ensure that this generated file -// is compatible with the proto package it is being compiled against. -// A compilation error at this line likely means your copy of the -// proto package needs to be updated. -const _ = proto.GoGoProtoPackageIsVersion3 // please upgrade the proto package - -type ReshardRequest struct { -} - -func (m *ReshardRequest) Reset() { *m = ReshardRequest{} } -func (*ReshardRequest) ProtoMessage() {} -func (*ReshardRequest) Descriptor() ([]byte, []int) { - return fileDescriptor_56ede974c0020f77, []int{0} -} -func (m *ReshardRequest) XXX_Unmarshal(b []byte) error { - return m.Unmarshal(b) -} -func (m *ReshardRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { - if deterministic { - return xxx_messageInfo_ReshardRequest.Marshal(b, m, deterministic) - } else { - b = b[:cap(b)] - n, err := m.MarshalToSizedBuffer(b) - if err != nil { - return nil, err - } - return b[:n], nil - } -} -func (m *ReshardRequest) XXX_Merge(src proto.Message) { - xxx_messageInfo_ReshardRequest.Merge(m, src) -} -func (m *ReshardRequest) XXX_Size() int { - return m.Size() -} -func (m *ReshardRequest) XXX_DiscardUnknown() { - xxx_messageInfo_ReshardRequest.DiscardUnknown(m) -} - -var xxx_messageInfo_ReshardRequest proto.InternalMessageInfo - -func init() { - proto.RegisterType((*ReshardRequest)(nil), "agentproto.ReshardRequest") -} - -func init() { proto.RegisterFile("agent.proto", fileDescriptor_56ede974c0020f77) } - -var fileDescriptor_56ede974c0020f77 = []byte{ - // 220 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xe2, 0xe2, 0x4e, 0x4c, 0x4f, 0xcd, - 0x2b, 0xd1, 0x2b, 0x28, 0xca, 0x2f, 0xc9, 0x17, 0xe2, 0x02, 0x73, 0xc0, 0x6c, 0x29, 0xe9, 0xf4, - 0xfc, 0xfc, 0xf4, 0x9c, 0x54, 0x7d, 0x30, 0x2f, 0xa9, 0x34, 0x4d, 0x3f, 0x35, 0xb7, 0xa0, 0xa4, - 0x12, 0xa2, 0x50, 0x49, 0x80, 0x8b, 0x2f, 0x28, 0xb5, 0x38, 0x23, 0xb1, 0x28, 0x25, 0x28, 0xb5, - 0xb0, 0x34, 0xb5, 0xb8, 0xc4, 0x28, 0x80, 0x8b, 0x3f, 0x38, 0xb9, 0x28, 0xb1, 0x20, 0x33, 0x2f, - 0x3d, 0x38, 0xb5, 0xa8, 0x2c, 0x33, 0x39, 0x55, 0xc8, 0x96, 0x8b, 0x1d, 0xaa, 0x48, 0x48, 0x4a, - 0x0f, 0x61, 0xb2, 0x1e, 0xaa, 0x4e, 0x29, 0x31, 0x3d, 0x88, 0x4d, 0x7a, 0x30, 0x9b, 0xf4, 0x5c, - 0x41, 0x36, 0x39, 0xc5, 0x5e, 0x78, 0x28, 0xc7, 0x70, 0xe3, 0xa1, 0x1c, 0xc3, 0x87, 0x87, 0x72, - 0x8c, 0x0d, 0x8f, 0xe4, 0x18, 0x57, 0x3c, 0x92, 0x63, 0x3c, 0xf1, 0x48, 0x8e, 0xf1, 0xc2, 0x23, - 0x39, 0xc6, 0x07, 0x8f, 0xe4, 0x18, 0x5f, 0x3c, 0x92, 0x63, 0xf8, 0xf0, 0x48, 0x8e, 0x71, 0xc2, - 0x63, 0x39, 0x86, 0x0b, 0x8f, 0xe5, 0x18, 0x6e, 0x3c, 0x96, 0x63, 0x88, 0x52, 0x4f, 0xcf, 0x2c, - 0xc9, 0x28, 0x4d, 0xd2, 0x4b, 0xce, 0xcf, 0xd5, 0x4f, 0x2f, 0x4a, 0x4c, 0x4b, 0xcc, 0x4b, 0xd4, - 0x07, 0xdb, 0xad, 0x5f, 0x90, 0x9d, 0xae, 0x8f, 0x70, 0x45, 0x12, 0x1b, 0x98, 0x32, 0x06, 0x04, - 0x00, 0x00, 0xff, 0xff, 0x15, 0xe9, 0x8a, 0xfc, 0x01, 0x01, 0x00, 0x00, -} - -func (this *ReshardRequest) Equal(that interface{}) bool { - if that == nil { - return this == nil - } - - that1, ok := that.(*ReshardRequest) - if !ok { - that2, ok := that.(ReshardRequest) - if ok { - that1 = &that2 - } else { - return false - } - } - if that1 == nil { - return this == nil - } else if this == nil { - return false - } - return true -} -func (this *ReshardRequest) GoString() string { - if this == nil { - return "nil" - } - s := make([]string, 0, 4) - s = append(s, "&agentproto.ReshardRequest{") - s = append(s, "}") - return strings.Join(s, "") -} -func valueToGoStringAgent(v interface{}, typ string) string { - rv := reflect.ValueOf(v) - if rv.IsNil() { - return "nil" - } - pv := reflect.Indirect(rv).Interface() - return fmt.Sprintf("func(v %v) *%v { return &v } ( %#v )", typ, typ, pv) -} - -// Reference imports to suppress errors if they are not otherwise used. -var _ context.Context -var _ grpc.ClientConn - -// This is a compile-time assertion to ensure that this generated file -// is compatible with the grpc package it is being compiled against. -const _ = grpc.SupportPackageIsVersion4 - -// ScrapingServiceClient is the client API for ScrapingService service. -// -// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://godoc.org/google.golang.org/grpc#ClientConn.NewStream. -type ScrapingServiceClient interface { - // Reshard tells the implementing service to reshard all of its running - // configs. - Reshard(ctx context.Context, in *ReshardRequest, opts ...grpc.CallOption) (*emptypb.Empty, error) -} - -type scrapingServiceClient struct { - cc *grpc.ClientConn -} - -func NewScrapingServiceClient(cc *grpc.ClientConn) ScrapingServiceClient { - return &scrapingServiceClient{cc} -} - -func (c *scrapingServiceClient) Reshard(ctx context.Context, in *ReshardRequest, opts ...grpc.CallOption) (*emptypb.Empty, error) { - out := new(emptypb.Empty) - err := c.cc.Invoke(ctx, "/agentproto.ScrapingService/Reshard", in, out, opts...) - if err != nil { - return nil, err - } - return out, nil -} - -// ScrapingServiceServer is the server API for ScrapingService service. -type ScrapingServiceServer interface { - // Reshard tells the implementing service to reshard all of its running - // configs. - Reshard(context.Context, *ReshardRequest) (*emptypb.Empty, error) -} - -// UnimplementedScrapingServiceServer can be embedded to have forward compatible implementations. -type UnimplementedScrapingServiceServer struct { -} - -func (*UnimplementedScrapingServiceServer) Reshard(ctx context.Context, req *ReshardRequest) (*emptypb.Empty, error) { - return nil, status.Errorf(codes.Unimplemented, "method Reshard not implemented") -} - -func RegisterScrapingServiceServer(s *grpc.Server, srv ScrapingServiceServer) { - s.RegisterService(&_ScrapingService_serviceDesc, srv) -} - -func _ScrapingService_Reshard_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(ReshardRequest) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(ScrapingServiceServer).Reshard(ctx, in) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: "/agentproto.ScrapingService/Reshard", - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(ScrapingServiceServer).Reshard(ctx, req.(*ReshardRequest)) - } - return interceptor(ctx, in, info, handler) -} - -var _ScrapingService_serviceDesc = grpc.ServiceDesc{ - ServiceName: "agentproto.ScrapingService", - HandlerType: (*ScrapingServiceServer)(nil), - Methods: []grpc.MethodDesc{ - { - MethodName: "Reshard", - Handler: _ScrapingService_Reshard_Handler, - }, - }, - Streams: []grpc.StreamDesc{}, - Metadata: "agent.proto", -} - -func (m *ReshardRequest) Marshal() (dAtA []byte, err error) { - size := m.Size() - dAtA = make([]byte, size) - n, err := m.MarshalToSizedBuffer(dAtA[:size]) - if err != nil { - return nil, err - } - return dAtA[:n], nil -} - -func (m *ReshardRequest) MarshalTo(dAtA []byte) (int, error) { - size := m.Size() - return m.MarshalToSizedBuffer(dAtA[:size]) -} - -func (m *ReshardRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) { - i := len(dAtA) - _ = i - var l int - _ = l - return len(dAtA) - i, nil -} - -func encodeVarintAgent(dAtA []byte, offset int, v uint64) int { - offset -= sovAgent(v) - base := offset - for v >= 1<<7 { - dAtA[offset] = uint8(v&0x7f | 0x80) - v >>= 7 - offset++ - } - dAtA[offset] = uint8(v) - return base -} -func (m *ReshardRequest) Size() (n int) { - if m == nil { - return 0 - } - var l int - _ = l - return n -} - -func sovAgent(x uint64) (n int) { - return (math_bits.Len64(x|1) + 6) / 7 -} -func sozAgent(x uint64) (n int) { - return sovAgent(uint64((x << 1) ^ uint64((int64(x) >> 63)))) -} -func (this *ReshardRequest) String() string { - if this == nil { - return "nil" - } - s := strings.Join([]string{`&ReshardRequest{`, - `}`, - }, "") - return s -} -func valueToStringAgent(v interface{}) string { - rv := reflect.ValueOf(v) - if rv.IsNil() { - return "nil" - } - pv := reflect.Indirect(rv).Interface() - return fmt.Sprintf("*%v", pv) -} -func (m *ReshardRequest) Unmarshal(dAtA []byte) error { - l := len(dAtA) - iNdEx := 0 - for iNdEx < l { - preIndex := iNdEx - var wire uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowAgent - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - wire |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - fieldNum := int32(wire >> 3) - wireType := int(wire & 0x7) - if wireType == 4 { - return fmt.Errorf("proto: ReshardRequest: wiretype end group for non-group") - } - if fieldNum <= 0 { - return fmt.Errorf("proto: ReshardRequest: illegal tag %d (wire type %d)", fieldNum, wire) - } - switch fieldNum { - default: - iNdEx = preIndex - skippy, err := skipAgent(dAtA[iNdEx:]) - if err != nil { - return err - } - if skippy < 0 { - return ErrInvalidLengthAgent - } - if (iNdEx + skippy) < 0 { - return ErrInvalidLengthAgent - } - if (iNdEx + skippy) > l { - return io.ErrUnexpectedEOF - } - iNdEx += skippy - } - } - - if iNdEx > l { - return io.ErrUnexpectedEOF - } - return nil -} -func skipAgent(dAtA []byte) (n int, err error) { - l := len(dAtA) - iNdEx := 0 - depth := 0 - for iNdEx < l { - var wire uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return 0, ErrIntOverflowAgent - } - if iNdEx >= l { - return 0, io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - wire |= (uint64(b) & 0x7F) << shift - if b < 0x80 { - break - } - } - wireType := int(wire & 0x7) - switch wireType { - case 0: - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return 0, ErrIntOverflowAgent - } - if iNdEx >= l { - return 0, io.ErrUnexpectedEOF - } - iNdEx++ - if dAtA[iNdEx-1] < 0x80 { - break - } - } - case 1: - iNdEx += 8 - case 2: - var length int - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return 0, ErrIntOverflowAgent - } - if iNdEx >= l { - return 0, io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - length |= (int(b) & 0x7F) << shift - if b < 0x80 { - break - } - } - if length < 0 { - return 0, ErrInvalidLengthAgent - } - iNdEx += length - case 3: - depth++ - case 4: - if depth == 0 { - return 0, ErrUnexpectedEndOfGroupAgent - } - depth-- - case 5: - iNdEx += 4 - default: - return 0, fmt.Errorf("proto: illegal wireType %d", wireType) - } - if iNdEx < 0 { - return 0, ErrInvalidLengthAgent - } - if depth == 0 { - return iNdEx, nil - } - } - return 0, io.ErrUnexpectedEOF -} - -var ( - ErrInvalidLengthAgent = fmt.Errorf("proto: negative length found during unmarshaling") - ErrIntOverflowAgent = fmt.Errorf("proto: integer overflow") - ErrUnexpectedEndOfGroupAgent = fmt.Errorf("proto: unexpected end of group") -) diff --git a/internal/static/agentproto/agent.proto b/internal/static/agentproto/agent.proto deleted file mode 100644 index 405f7779c4..0000000000 --- a/internal/static/agentproto/agent.proto +++ /dev/null @@ -1,20 +0,0 @@ -syntax = "proto3"; - -package agentproto; -option go_package = "github.com/grafana/agent/internal/static/agentproto"; - -import "google/protobuf/empty.proto"; - -// ScrapingService holds methods that can be called against a Prometheus -// Scraping Service instance. -// -// These methods are only available when the agent config file has enabled the -// scraping service mode. If the scraping service mode is not enabling, -// invoking any of the RPCs here will return a not found error. -service ScrapingService { - // Reshard tells the implementing service to reshard all of its running - // configs. - rpc Reshard(ReshardRequest) returns (google.protobuf.Empty); -} - -message ReshardRequest {} diff --git a/internal/static/agentproto/func.go b/internal/static/agentproto/func.go deleted file mode 100644 index 64bf9cc204..0000000000 --- a/internal/static/agentproto/func.go +++ /dev/null @@ -1,21 +0,0 @@ -package agentproto - -import ( - "context" - - empty "github.com/golang/protobuf/ptypes/empty" -) - -// FuncScrapingServiceServer is an implementation of ScrapingServiceServer that -// uses function fields to implement the interface. Useful for tests. -type FuncScrapingServiceServer struct { - ReshardFunc func(context.Context, *ReshardRequest) (*empty.Empty, error) -} - -// Reshard implements ScrapingServiceServer. -func (f *FuncScrapingServiceServer) Reshard(ctx context.Context, req *ReshardRequest) (*empty.Empty, error) { - if f.ReshardFunc != nil { - return f.ReshardFunc(ctx, req) - } - panic("ReshardFunc is nil") -} diff --git a/internal/static/agentproto/gen.go b/internal/static/agentproto/gen.go deleted file mode 100644 index 591102b899..0000000000 --- a/internal/static/agentproto/gen.go +++ /dev/null @@ -1,3 +0,0 @@ -package agentproto - -//go:generate protoc --gogoslick_out=Mgoogle/protobuf/timestamp.proto=github.com/gogo/protobuf/types,plugins=grpc,paths=source_relative:./ ./agent.proto diff --git a/internal/static/client/client.go b/internal/static/client/client.go deleted file mode 100644 index 68048cbce3..0000000000 --- a/internal/static/client/client.go +++ /dev/null @@ -1,179 +0,0 @@ -// Package client provides a client interface to the Agent HTTP -// API. -package client - -import ( - "bytes" - "context" - "encoding/json" - "fmt" - "io" - "net/http" - "strings" - - "github.com/grafana/agent/internal/static/metrics/cluster/configapi" - "github.com/grafana/agent/internal/static/metrics/instance" - "gopkg.in/yaml.v2" -) - -// Client is a collection of all subsystem clients. -type Client struct { - PrometheusClient -} - -// New creates a new Client. -func New(addr string) *Client { - return &Client{ - PrometheusClient: &prometheusClient{addr: addr}, - } -} - -// PrometheusClient is the client interface to the API exposed by the -// Prometheus subsystem of the Grafana Agent. -type PrometheusClient interface { - // Instances runs the list of currently running instances. - Instances(ctx context.Context) ([]string, error) - - // The following methods are for the scraping service mode - // only and will fail when not enabled on the Agent. - - // ListConfigs runs the list of instance configs stored in the config - // management KV store. - ListConfigs(ctx context.Context) (*configapi.ListConfigurationsResponse, error) - - // GetConfiguration returns a named configuration from the config - // management KV store. - GetConfiguration(ctx context.Context, name string) (*instance.Config, error) - - // PutConfiguration adds or updates a named configuration into the - // config management KV store. - PutConfiguration(ctx context.Context, name string, cfg *instance.Config) error - - // DeleteConfiguration removes a named configuration from the config - // management KV store. - DeleteConfiguration(ctx context.Context, name string) error -} - -type prometheusClient struct { - addr string -} - -func (c *prometheusClient) Instances(ctx context.Context) ([]string, error) { - url := fmt.Sprintf("%s/agent/api/v1/metrics/instances", c.addr) - - resp, err := c.doRequest(ctx, "GET", url, nil) - if err != nil { - return nil, err - } - - var data []string - err = unmarshalPrometheusAPIResponse(resp.Body, &data) - return data, err -} - -func (c *prometheusClient) ListConfigs(ctx context.Context) (*configapi.ListConfigurationsResponse, error) { - url := fmt.Sprintf("%s/agent/api/v1/configs", c.addr) - - resp, err := c.doRequest(ctx, "GET", url, nil) - if err != nil { - return nil, err - } - - var data configapi.ListConfigurationsResponse - err = unmarshalPrometheusAPIResponse(resp.Body, &data) - return &data, err -} - -func (c *prometheusClient) GetConfiguration(ctx context.Context, name string) (*instance.Config, error) { - url := fmt.Sprintf("%s/agent/api/v1/configs/%s", c.addr, name) - - resp, err := c.doRequest(ctx, "GET", url, nil) - if err != nil { - return nil, err - } - - var data configapi.GetConfigurationResponse - if err := unmarshalPrometheusAPIResponse(resp.Body, &data); err != nil { - return nil, err - } - - var config instance.Config - err = yaml.NewDecoder(strings.NewReader(data.Value)).Decode(&config) - return &config, err -} - -func (c *prometheusClient) PutConfiguration(ctx context.Context, name string, cfg *instance.Config) error { - url := fmt.Sprintf("%s/agent/api/v1/config/%s", c.addr, name) - - bb, err := instance.MarshalConfig(cfg, false) - if err != nil { - return err - } - - resp, err := c.doRequest(ctx, "POST", url, bytes.NewReader(bb)) - if err != nil { - return err - } - - return unmarshalPrometheusAPIResponse(resp.Body, nil) -} - -func (c *prometheusClient) DeleteConfiguration(ctx context.Context, name string) error { - url := fmt.Sprintf("%s/agent/api/v1/config/%s", c.addr, name) - - resp, err := c.doRequest(ctx, "DELETE", url, nil) - if err != nil { - return err - } - - return unmarshalPrometheusAPIResponse(resp.Body, nil) -} - -func (c *prometheusClient) doRequest(ctx context.Context, method string, url string, body io.Reader) (*http.Response, error) { - req, err := http.NewRequestWithContext(ctx, method, url, body) - if err != nil { - return nil, err - } - return http.DefaultClient.Do(req) -} - -// unmarshalPrometheusAPIResponse will unmarshal a response from the Prometheus -// subsystem API. -// -// r will be closed after this method is called. -func unmarshalPrometheusAPIResponse(r io.ReadCloser, v interface{}) error { - defer func() { - _ = r.Close() - }() - - resp := struct { - Status string `json:"status"` - Data json.RawMessage `json:"data"` - }{} - - err := json.NewDecoder(r).Decode(&resp) - if err != nil { - return fmt.Errorf("could not read response: %w", err) - } - - if v != nil && resp.Status == "success" { - err := json.Unmarshal(resp.Data, v) - if err != nil { - return fmt.Errorf("unmarshaling response: %w", err) - } - } else if resp.Status == "error" { - var errResp configapi.ErrorResponse - err := json.Unmarshal(resp.Data, &errResp) - if err != nil { - return fmt.Errorf("unmarshaling error: %w", err) - } - - return fmt.Errorf("%s", errResp.Error) - } - - if resp.Status != "success" && resp.Status != "error" { - return fmt.Errorf("unknown API response status: %s", resp.Status) - } - - return nil -} diff --git a/internal/static/config/agent_management_remote_config_test.go b/internal/static/config/agent_management_remote_config_test.go deleted file mode 100644 index 820801cf70..0000000000 --- a/internal/static/config/agent_management_remote_config_test.go +++ /dev/null @@ -1,364 +0,0 @@ -package config - -import ( - "testing" - "time" - - process_exporter "github.com/grafana/agent/internal/static/integrations/process_exporter" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/model/labels" - "github.com/stretchr/testify/require" -) - -func TestBuildRemoteConfig(t *testing.T) { - baseConfig := ` -server: - log_level: debug -` - metricsSnippets := []Snippet{{ - Config: ` -metrics_scrape_configs: - - job_name: 'prometheus' - scrape_interval: 15s - static_configs: - - targets: ['localhost:9090'] -`, - }} - logsSnippets := []Snippet{{ - Config: ` -logs_scrape_configs: - - job_name: 'loki' - static_configs: - - targets: ['localhost:3100'] -`, - }} - integrationSnippets := []Snippet{{ - Config: ` -integration_configs: - agent: - enabled: true - relabel_configs: - - action: replace - source_labels: - - agent_hostname - target_label: instance -`, - }} - - allSnippets := []Snippet{} - allSnippets = append(allSnippets, metricsSnippets...) - allSnippets = append(allSnippets, logsSnippets...) - allSnippets = append(allSnippets, integrationSnippets...) - - t.Run("only metrics snippets provided", func(t *testing.T) { - rc := RemoteConfig{ - BaseConfig: BaseConfigContent(baseConfig), - Snippets: metricsSnippets, - } - c, err := rc.BuildAgentConfig() - require.NoError(t, err) - require.Equal(t, len(c.Metrics.Configs), 1) - require.Empty(t, c.Logs) - require.Empty(t, c.Integrations.ConfigV1.Integrations) - }) - - t.Run("only log snippets provided", func(t *testing.T) { - rc := RemoteConfig{ - BaseConfig: BaseConfigContent(baseConfig), - Snippets: logsSnippets, - } - c, err := rc.BuildAgentConfig() - require.NoError(t, err) - require.Equal(t, len(c.Logs.Configs), 1) - require.Empty(t, c.Metrics.Configs) - require.Empty(t, c.Integrations.ConfigV1.Integrations) - }) - - t.Run("only integration snippets provided", func(t *testing.T) { - rc := RemoteConfig{ - BaseConfig: BaseConfigContent(baseConfig), - Snippets: integrationSnippets, - } - c, err := rc.BuildAgentConfig() - require.NoError(t, err) - require.Empty(t, c.Metrics.Configs) - require.Empty(t, c.Logs) - require.Equal(t, 1, len(c.Integrations.ConfigV1.Integrations)) - }) - - t.Run("base with already logs, metrics and integrations provided", func(t *testing.T) { - fullConfig := ` -metrics: - configs: - - name: default - scrape_configs: - - job_name: default-prom - static_configs: - - targets: - - localhost:9090 -logs: - positions_directory: /tmp/grafana-agent-positions - configs: - - name: default - scrape_configs: - - job_name: default-loki - static_configs: - - targets: - - localhost:3100 -integrations: - node_exporter: - enabled: true -` - rc := RemoteConfig{ - BaseConfig: BaseConfigContent(fullConfig), - Snippets: allSnippets, - } - c, err := rc.BuildAgentConfig() - require.NoError(t, err) - require.Equal(t, len(c.Logs.Configs), 2) - require.Equal(t, len(c.Metrics.Configs), 2) - require.Equal(t, 2, len(c.Integrations.ConfigV1.Integrations)) - }) - - t.Run("do not override integrations defined in base config with the ones defined in snippets", func(t *testing.T) { - baseConfig := ` -integrations: - node_exporter: - enabled: false -` - - snippets := []Snippet{{ - Config: ` -integration_configs: - node_exporter: - enabled: true`, - }} - - rc := RemoteConfig{ - BaseConfig: BaseConfigContent(baseConfig), - Snippets: snippets, - } - c, err := rc.BuildAgentConfig() - require.NoError(t, err) - require.Equal(t, 1, len(c.Integrations.ConfigV1.Integrations)) - require.False(t, c.Integrations.ConfigV1.Integrations[0].Common.Enabled) - }) - - t.Run("all snippets provided", func(t *testing.T) { - rc := RemoteConfig{ - BaseConfig: BaseConfigContent(baseConfig), - Snippets: allSnippets, - } - c, err := rc.BuildAgentConfig() - require.NoError(t, err) - require.Equal(t, 1, len(c.Logs.Configs)) - require.Equal(t, 1, len(c.Metrics.Configs)) - require.Equal(t, 1, len(c.Integrations.ConfigV1.Integrations)) - - // check some fields to make sure the config was parsed correctly - require.Equal(t, "prometheus", c.Metrics.Configs[0].ScrapeConfigs[0].JobName) - require.Equal(t, "loki", c.Logs.Configs[0].ScrapeConfig[0].JobName) - require.Equal(t, "agent", c.Integrations.ConfigV1.Integrations[0].Name()) - - // make sure defaults for metric snippets are applied - require.Equal(t, instance.DefaultConfig.WALTruncateFrequency, c.Metrics.Configs[0].WALTruncateFrequency) - require.Equal(t, instance.DefaultConfig.HostFilter, c.Metrics.Configs[0].HostFilter) - require.Equal(t, instance.DefaultConfig.MinWALTime, c.Metrics.Configs[0].MinWALTime) - require.Equal(t, instance.DefaultConfig.MaxWALTime, c.Metrics.Configs[0].MaxWALTime) - require.Equal(t, instance.DefaultConfig.RemoteFlushDeadline, c.Metrics.Configs[0].RemoteFlushDeadline) - require.Equal(t, instance.DefaultConfig.WriteStaleOnShutdown, c.Metrics.Configs[0].WriteStaleOnShutdown) - require.Equal(t, instance.DefaultGlobalConfig, c.Metrics.Global) - - // make sure defaults for log snippets are applied - require.Equal(t, 10*time.Second, c.Logs.Configs[0].PositionsConfig.SyncPeriod) - require.Equal(t, "", c.Logs.Configs[0].PositionsConfig.PositionsFile) - require.Equal(t, false, c.Logs.Configs[0].PositionsConfig.IgnoreInvalidYaml) - require.Equal(t, false, c.Logs.Configs[0].TargetConfig.Stdin) - - // make sure defaults for integration snippets are applied - require.Equal(t, true, c.Integrations.ConfigV1.ScrapeIntegrations) - require.Equal(t, true, c.Integrations.ConfigV1.UseHostnameLabel) - require.Equal(t, true, c.Integrations.ConfigV1.ReplaceInstanceLabel) - require.Equal(t, 5*time.Second, c.Integrations.ConfigV1.IntegrationRestartBackoff) - }) - - t.Run("template variables provided", func(t *testing.T) { - baseConfig := ` -server: - log_level: {{.log_level}} -` - templateInsideTemplate := "`{{ .template_inside_template }}`" - snippet := Snippet{ - Config: ` -integration_configs: - process_exporter: - enabled: true - process_names: - - name: "grafana-agent" - cmdline: - - 'grafana-agent' - - name: "{{.nonexistent.foo.bar.baz.bat}}" - cmdline: - - "{{ ` + templateInsideTemplate + ` }}" - # Custom process monitors - {{- range $key, $value := .process_exporter_processes }} - - name: "{{ $value.name }}" - cmdline: - - "{{ $value.cmdline }}" - {{if $value.exe}} - exe: - - "{{ $value.exe }}" - {{end}} - {{- end }} -`, - } - - rc := RemoteConfig{ - BaseConfig: BaseConfigContent(baseConfig), - Snippets: []Snippet{snippet}, - AgentMetadata: AgentMetadata{ - TemplateVariables: map[string]any{ - "log_level": "debug", - "process_exporter_processes": []map[string]string{ - { - "name": "java_processes", - "cmdline": ".*/java", - }, - { - "name": "{{.ExeFull}}:{{.Matches.Cfgfile}}", - "cmdline": `-config.path\\s+(?P\\S+)`, - "exe": "/usr/local/bin/process-exporter", - }, - }, - }, - }, - } - - c, err := rc.BuildAgentConfig() - require.NoError(t, err) - require.Equal(t, 1, len(c.Integrations.ConfigV1.Integrations)) - processExporterConfig := c.Integrations.ConfigV1.Integrations[0].Config.(*process_exporter.Config) - - require.Equal(t, 4, len(processExporterConfig.ProcessExporter)) - - require.Equal(t, "grafana-agent", processExporterConfig.ProcessExporter[0].Name) - require.Equal(t, "grafana-agent", processExporterConfig.ProcessExporter[0].CmdlineRules[0]) - require.Equal(t, 0, len(processExporterConfig.ProcessExporter[0].ExeRules)) - - require.Equal(t, "", processExporterConfig.ProcessExporter[1].Name) - require.Equal(t, "{{ .template_inside_template }}", processExporterConfig.ProcessExporter[1].CmdlineRules[0]) - require.Equal(t, 0, len(processExporterConfig.ProcessExporter[1].ExeRules)) - - require.Equal(t, "java_processes", processExporterConfig.ProcessExporter[2].Name) - require.Equal(t, ".*/java", processExporterConfig.ProcessExporter[2].CmdlineRules[0]) - require.Equal(t, 0, len(processExporterConfig.ProcessExporter[2].ExeRules)) - - require.Equal(t, "{{.ExeFull}}:{{.Matches.Cfgfile}}", processExporterConfig.ProcessExporter[3].Name) - require.Equal(t, `-config.path\s+(?P\S+)`, processExporterConfig.ProcessExporter[3].CmdlineRules[0]) - require.Equal(t, "/usr/local/bin/process-exporter", processExporterConfig.ProcessExporter[3].ExeRules[0]) - }) - - t.Run("no external labels provided", func(t *testing.T) { - rc := RemoteConfig{ - BaseConfig: BaseConfigContent(baseConfig), - Snippets: allSnippets, - } - c, err := rc.BuildAgentConfig() - require.NoError(t, err) - require.Equal(t, 1, len(c.Logs.Configs)) - require.Empty(t, c.Metrics.Global.Prometheus.ExternalLabels) - }) - - t.Run("no external labels provided in remote config", func(t *testing.T) { - baseConfig := ` -server: - log_level: debug -metrics: - global: - external_labels: - foo: bar -logs: - global: - clients: - - external_labels: - foo: bar -` - rc := RemoteConfig{ - BaseConfig: BaseConfigContent(baseConfig), - Snippets: allSnippets, - } - c, err := rc.BuildAgentConfig() - require.NoError(t, err) - require.Equal(t, 1, len(c.Logs.Configs)) - require.Equal(t, 1, len(c.Logs.Global.ClientConfigs)) - require.Equal(t, c.Logs.Global.ClientConfigs[0].ExternalLabels.LabelSet, model.LabelSet{"foo": "bar"}) - require.Equal(t, 1, len(c.Metrics.Global.Prometheus.ExternalLabels)) - require.Contains(t, c.Metrics.Global.Prometheus.ExternalLabels, labels.Label{Name: "foo", Value: "bar"}) - }) - - t.Run("external labels provided", func(t *testing.T) { - baseConfig := ` -server: - log_level: debug -metrics: - global: - remote_write: - - url: http://localhost:9090/api/prom/push -logs: - global: - clients: - - url: http://localhost:3100/loki/api/v1/push -` - - rc := RemoteConfig{ - BaseConfig: BaseConfigContent(baseConfig), - Snippets: allSnippets, - AgentMetadata: AgentMetadata{ - ExternalLabels: map[string]string{ - "foo": "bar", - }, - }, - } - c, err := rc.BuildAgentConfig() - require.NoError(t, err) - require.Equal(t, 1, len(c.Logs.Configs)) - require.Equal(t, 1, len(c.Metrics.Configs)) - require.Equal(t, 1, len(c.Logs.Global.ClientConfigs)) - require.Equal(t, c.Logs.Global.ClientConfigs[0].ExternalLabels.LabelSet, model.LabelSet{"foo": "bar"}) - require.Contains(t, c.Metrics.Global.Prometheus.ExternalLabels, labels.Label{Name: "foo", Value: "bar"}) - }) - - t.Run("external labels don't override base config", func(t *testing.T) { - baseConfig := ` -server: - log_level: debug -metrics: - global: - external_labels: - foo: bar -logs: - global: - clients: - - external_labels: - foo: bar -` - rc := RemoteConfig{ - BaseConfig: BaseConfigContent(baseConfig), - Snippets: allSnippets, - AgentMetadata: AgentMetadata{ - ExternalLabels: map[string]string{ - "foo": "baz", - }, - }, - } - c, err := rc.BuildAgentConfig() - require.NoError(t, err) - require.Equal(t, 1, len(c.Logs.Configs)) - require.Equal(t, 1, len(c.Metrics.Configs)) - require.Equal(t, 1, len(c.Logs.Global.ClientConfigs)) - require.Equal(t, c.Logs.Global.ClientConfigs[0].ExternalLabels.LabelSet, model.LabelSet{"foo": "bar"}) - require.Contains(t, c.Metrics.Global.Prometheus.ExternalLabels, labels.Label{Name: "foo", Value: "bar"}) - require.NotContains(t, c.Metrics.Global.Prometheus.ExternalLabels, labels.Label{Name: "foo", Value: "baz"}) - }) -} diff --git a/internal/static/config/agentmanagement.go b/internal/static/config/agentmanagement.go index 9f91ba21a7..7b9c686d8a 100644 --- a/internal/static/config/agentmanagement.go +++ b/internal/static/config/agentmanagement.go @@ -1,202 +1,19 @@ package config import ( - "crypto/sha256" - "encoding/hex" - "encoding/json" "errors" - "flag" "fmt" - "math/rand" - "net/url" - "os" - "path/filepath" "time" - "github.com/go-kit/log/level" - "github.com/grafana/agent/internal/static/config/instrumentation" - "github.com/grafana/agent/internal/static/server" "github.com/prometheus/common/config" - "gopkg.in/yaml.v2" -) - -const ( - cacheFilename = "remote-config-cache.yaml" - apiPath = "/agent-management/api/agent/v2" - labelManagementEnabledHeader = "X-LabelManagementEnabled" - agentIDHeader = "X-AgentID" - agentNamespaceVersionHeader = "X-AgentNamespaceVersion" - agentInfoVersionHeader = "X-AgentInfoVersion" - acceptNotModifiedHeader = "X-AcceptHTTPNotModified" ) var ( - agentInfoVersion string - agentNamespaceVersion string defaultRemoteConfiguration = RemoteConfiguration{ AcceptHTTPNotModified: true, } ) -type remoteConfigProvider interface { - GetCachedRemoteConfig() ([]byte, error) - CacheRemoteConfig(remoteConfigBytes []byte) error - FetchRemoteConfig() ([]byte, error) - GetPollingInterval() time.Duration -} - -type remoteConfigHTTPProvider struct { - InitialConfig *AgentManagementConfig -} - -func newRemoteConfigHTTPProvider(c *Config) (*remoteConfigHTTPProvider, error) { - err := c.AgentManagement.Validate() - if err != nil { - return nil, err - } - return &remoteConfigHTTPProvider{ - InitialConfig: &c.AgentManagement, - }, nil -} - -type remoteConfigCache struct { - InitialConfigHash string `json:"initial_config_hash"` - Config string `json:"config"` -} - -func hashInitialConfig(am AgentManagementConfig) (string, error) { - marshalled, err := yaml.Marshal(am) - if err != nil { - return "", fmt.Errorf("could not marshal initial config: %w", err) - } - hashed := sha256.Sum256(marshalled) - return hex.EncodeToString(hashed[:]), nil -} - -// initialConfigHashCheck checks if the hash of initialConfig matches what is stored in configCache.InitialConfigHash. -// If an error is encountered while hashing initialConfig or the hashes do not match, initialConfigHashCheck -// returns an error. Otherwise, it returns nil. -func initialConfigHashCheck(initialConfig AgentManagementConfig, configCache remoteConfigCache) error { - initialConfigHash, err := hashInitialConfig(initialConfig) - if err != nil { - return err - } - - if !(configCache.InitialConfigHash == initialConfigHash) { - return errors.New("invalid remote config cache: initial config hashes don't match") - } - return nil -} - -// GetCachedRemoteConfig retrieves the cached remote config from the location specified -// in r.AgentManagement.CacheLocation -func (r remoteConfigHTTPProvider) GetCachedRemoteConfig() ([]byte, error) { - cachePath := filepath.Join(r.InitialConfig.RemoteConfiguration.CacheLocation, cacheFilename) - - var configCache remoteConfigCache - buf, err := os.ReadFile(cachePath) - - if err != nil { - return nil, fmt.Errorf("error reading remote config cache: %w", err) - } - - if err := json.Unmarshal(buf, &configCache); err != nil { - return nil, fmt.Errorf("error trying to load cached remote config from file: %w", err) - } - - if err = initialConfigHashCheck(*r.InitialConfig, configCache); err != nil { - return nil, err - } - - return []byte(configCache.Config), nil -} - -// CacheRemoteConfig caches the remote config to the location specified in -// r.AgentManagement.CacheLocation -func (r remoteConfigHTTPProvider) CacheRemoteConfig(remoteConfigBytes []byte) error { - cachePath := filepath.Join(r.InitialConfig.RemoteConfiguration.CacheLocation, cacheFilename) - initialConfigHash, err := hashInitialConfig(*r.InitialConfig) - if err != nil { - return err - } - configCache := remoteConfigCache{ - InitialConfigHash: initialConfigHash, - Config: string(remoteConfigBytes), - } - marshalled, err := json.Marshal(configCache) - if err != nil { - return fmt.Errorf("could not marshal remote config cache: %w", err) - } - return os.WriteFile(cachePath, marshalled, 0666) -} - -// FetchRemoteConfig fetches the raw bytes of the config from a remote API using -// the values in r.AgentManagement. -func (r remoteConfigHTTPProvider) FetchRemoteConfig() ([]byte, error) { - httpClientConfig := &r.InitialConfig.HTTPClientConfig - - dir, err := os.Getwd() - if err != nil { - return nil, fmt.Errorf("failed to get current working directory: %w", err) - } - httpClientConfig.SetDirectory(dir) - - remoteOpts := &remoteOpts{ - HTTPClientConfig: httpClientConfig, - } - - if r.InitialConfig.RemoteConfiguration.LabelManagementEnabled && r.InitialConfig.RemoteConfiguration.AgentID != "" { - remoteOpts.headers = map[string]string{ - labelManagementEnabledHeader: "1", - agentIDHeader: r.InitialConfig.RemoteConfiguration.AgentID, - } - - if agentNamespaceVersion != "" { - remoteOpts.headers[agentNamespaceVersionHeader] = agentNamespaceVersion - } - if agentInfoVersion != "" { - remoteOpts.headers[agentInfoVersionHeader] = agentInfoVersion - } - if r.InitialConfig.RemoteConfiguration.AcceptHTTPNotModified { - remoteOpts.headers[acceptNotModifiedHeader] = "1" - } - } - - url, err := r.InitialConfig.fullUrl() - if err != nil { - return nil, fmt.Errorf("error trying to create full url: %w", err) - } - rc, err := newRemoteProvider(url, remoteOpts) - if err != nil { - return nil, fmt.Errorf("error reading remote config: %w", err) - } - - bb, headers, err := rc.retrieve() - - // If the server returns a 304, return it and the caller will handle it. - var nme notModifiedError - if errors.Is(err, nme) { - return nil, nme - } - - if err != nil { - return nil, fmt.Errorf("error retrieving remote config: %w", err) - } - - nsVersion := headers.Get(agentNamespaceVersionHeader) - infoVersion := headers.Get(agentInfoVersionHeader) - if nsVersion != "" && infoVersion != "" { - agentNamespaceVersion = nsVersion - agentInfoVersion = infoVersion - } - - return bb, nil -} - -func (r remoteConfigHTTPProvider) GetPollingInterval() time.Duration { - return r.InitialConfig.PollingInterval -} - type labelMap map[string]string type RemoteConfiguration struct { @@ -226,131 +43,6 @@ type AgentManagementConfig struct { RemoteConfiguration RemoteConfiguration `yaml:"remote_configuration"` } -// getRemoteConfig gets the remote config specified in the initial config, falling back to a local, cached copy -// of the remote config if the request to the remote fails. If both fail, an empty config and an -// error will be returned. -func getRemoteConfig(expandEnvVars bool, configProvider remoteConfigProvider, log *server.Logger, fs *flag.FlagSet, retry bool) (*Config, error) { - remoteConfigBytes, err := configProvider.FetchRemoteConfig() - if errors.Is(err, notModifiedError{}) { - level.Info(log).Log("msg", "remote config has not changed since last fetch, using cached copy") - remoteConfigBytes, err = configProvider.GetCachedRemoteConfig() - } - if err != nil { - var retryAfterErr retryAfterError - if errors.As(err, &retryAfterErr) && retry { - // In the case that the server is telling us to retry after a time greater than our polling interval, - // the agent should sleep for the duration of the retry-after header. - // - // If the duration of the retry-after is lower than the polling interval, the agent will simply - // fall back to the cache and continue polling at the polling interval, effectively skipping - // this poll. - if retryAfterErr.retryAfter > configProvider.GetPollingInterval() { - level.Info(log).Log("msg", "received retry-after from API, sleeping and falling back to cache", "retry-after", retryAfterErr.retryAfter) - time.Sleep(retryAfterErr.retryAfter) - } else { - level.Info(log).Log("msg", "received retry-after from API, falling back to cache", "retry-after", retryAfterErr.retryAfter) - } - // Return the cached config, as this is the last known good config and a config must be returned here. - return getCachedRemoteConfig(expandEnvVars, configProvider, fs, log) - } - level.Error(log).Log("msg", "could not fetch from API, falling back to cache", "err", err) - return getCachedRemoteConfig(expandEnvVars, configProvider, fs, log) - } - - config, err := loadRemoteConfig(remoteConfigBytes, expandEnvVars, fs) - if err != nil { - level.Error(log).Log("msg", "could not load remote config, falling back to cache", "err", err) - return getCachedRemoteConfig(expandEnvVars, configProvider, fs, log) - } - - level.Info(log).Log("msg", "fetched and loaded remote config from API") - - if err = configProvider.CacheRemoteConfig(remoteConfigBytes); err != nil { - level.Error(log).Log("err", fmt.Errorf("could not cache config locally: %w", err)) - } - return config, nil -} - -// getCachedRemoteConfig gets the cached remote config, falling back to the default config if the cache is invalid or not found. -func getCachedRemoteConfig(expandEnvVars bool, configProvider remoteConfigProvider, fs *flag.FlagSet, log *server.Logger) (*Config, error) { - rc, err := configProvider.GetCachedRemoteConfig() - if err != nil { - level.Error(log).Log("msg", "could not get cached remote config, falling back to default (empty) config", "err", err) - d := DefaultConfig() - instrumentation.InstrumentAgentManagementConfigFallback("empty_config") - return &d, nil - } - instrumentation.InstrumentAgentManagementConfigFallback("cache") - return loadRemoteConfig(rc, expandEnvVars, fs) -} - -// loadRemoteConfig parses and validates the remote config, both syntactically and semantically. -func loadRemoteConfig(remoteConfigBytes []byte, expandEnvVars bool, fs *flag.FlagSet) (*Config, error) { - expandedRemoteConfigBytes, err := performEnvVarExpansion(remoteConfigBytes, expandEnvVars) - if err != nil { - instrumentation.InstrumentInvalidRemoteConfig("env_var_expansion") - return nil, fmt.Errorf("could not expand env vars for remote config: %w", err) - } - - remoteConfig, err := NewRemoteConfig(expandedRemoteConfigBytes) - if err != nil { - instrumentation.InstrumentInvalidRemoteConfig("invalid_yaml") - return nil, fmt.Errorf("could not unmarshal remote config: %w", err) - } - - config, err := remoteConfig.BuildAgentConfig() - if err != nil { - instrumentation.InstrumentInvalidRemoteConfig("invalid_remote_config") - return nil, fmt.Errorf("could not build agent config: %w", err) - } - - if err = config.Validate(fs); err != nil { - instrumentation.InstrumentInvalidRemoteConfig("semantically_invalid_agent_config") - return nil, fmt.Errorf("semantically invalid config received from the API: %w", err) - } - return config, nil -} - -// newRemoteConfigProvider creates a remoteConfigProvider based on the protocol -// specified in c.AgentManagement -func newRemoteConfigProvider(c *Config) (*remoteConfigHTTPProvider, error) { - switch p := c.AgentManagement.Protocol; { - case p == "https" || p == "http": - return newRemoteConfigHTTPProvider(c) - default: - return nil, fmt.Errorf("unsupported protocol for agent management api: %s", p) - } -} - -// fullUrl creates and returns the URL that should be used when querying the Agent Management API, -// including the namespace, base config id, and any labels that have been specified. -func (am *AgentManagementConfig) fullUrl() (string, error) { - fullPath, err := url.JoinPath(am.Protocol+"://", am.Host, apiPath, "namespace", am.RemoteConfiguration.Namespace, "remote_config") - if err != nil { - return "", fmt.Errorf("error trying to join url: %w", err) - } - u, err := url.Parse(fullPath) - if err != nil { - return "", fmt.Errorf("error trying to parse url: %w", err) - } - q := u.Query() - for label, value := range am.RemoteConfiguration.Labels { - q.Add(label, value) - } - u.RawQuery = q.Encode() - return u.String(), nil -} - -// SleepTime returns the duration in between config fetches. -func (am *AgentManagementConfig) SleepTime() time.Duration { - return am.PollingInterval -} - -// jitterTime returns a random duration in the range [0, am.PollingInterval). -func (am *AgentManagementConfig) JitterTime() time.Duration { - return time.Duration(rand.Int63n(int64(am.PollingInterval))) -} - // Validate checks that necessary portions of the config have been set. func (am *AgentManagementConfig) Validate() error { if am.HTTPClientConfig.BasicAuth == nil || am.HTTPClientConfig.BasicAuth.Username == "" || am.HTTPClientConfig.BasicAuth.PasswordFile == "" { diff --git a/internal/static/config/agentmanagement_remote_config.go b/internal/static/config/agentmanagement_remote_config.go deleted file mode 100644 index 6e658e7053..0000000000 --- a/internal/static/config/agentmanagement_remote_config.go +++ /dev/null @@ -1,179 +0,0 @@ -package config - -import ( - "bytes" - "text/template" - - "github.com/grafana/agent/internal/static/integrations" - "github.com/grafana/agent/internal/static/logs" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/prometheus/common/model" - pc "github.com/prometheus/prometheus/config" - "github.com/prometheus/prometheus/model/labels" - "gopkg.in/yaml.v2" -) - -type ( - RemoteConfig struct { - BaseConfig BaseConfigContent `json:"base_config" yaml:"base_config"` - Snippets []Snippet `json:"snippets" yaml:"snippets"` - AgentMetadata AgentMetadata `json:"agent_metadata,omitempty" yaml:"agent_metadata,omitempty"` - } - - // BaseConfigContent is the content of a base config - BaseConfigContent string - - // Snippet is a snippet of configuration returned by the config API. - Snippet struct { - // Config is the snippet of config to be included. - Config string `json:"config" yaml:"config"` - } - - AgentMetadata struct { - ExternalLabels map[string]string `json:"external_labels,omitempty" yaml:"external_labels,omitempty"` - TemplateVariables map[string]any `json:"template_variables,omitempty" yaml:"template_variables,omitempty"` - } - - // SnippetContent defines the internal structure of a snippet configuration. - SnippetContent struct { - // MetricsScrapeConfigs is a YAML containing list of metrics scrape configs. - MetricsScrapeConfigs []*pc.ScrapeConfig `yaml:"metrics_scrape_configs,omitempty"` - - // LogsScrapeConfigs is a YAML containing list of logs scrape configs. - LogsScrapeConfigs []scrapeconfig.Config `yaml:"logs_scrape_configs,omitempty"` - - // IntegrationConfigs is a YAML containing list of integrations. - IntegrationConfigs integrations.ManagerConfig `yaml:"integration_configs,omitempty"` - } -) - -func NewRemoteConfig(buf []byte) (*RemoteConfig, error) { - rc := &RemoteConfig{} - err := yaml.Unmarshal(buf, rc) - if err != nil { - return nil, err - } - return rc, nil -} - -// BuildAgentConfig builds an agent configuration from a base config and a list of snippets -func (rc *RemoteConfig) BuildAgentConfig() (*Config, error) { - baseConfig, err := evaluateTemplate(string(rc.BaseConfig), rc.AgentMetadata.TemplateVariables) - if err != nil { - return nil, err - } - - c := DefaultConfig() - err = yaml.Unmarshal([]byte(baseConfig), &c) - if err != nil { - return nil, err - } - - // For now Agent Management only supports integrations v1 - if err := c.Integrations.setVersion(IntegrationsVersion1); err != nil { - return nil, err - } - - err = appendSnippets(&c, rc.Snippets, rc.AgentMetadata.TemplateVariables) - if err != nil { - return nil, err - } - appendExternalLabels(&c, rc.AgentMetadata.ExternalLabels) - return &c, nil -} - -func appendSnippets(c *Config, snippets []Snippet, templateVars map[string]any) error { - metricsConfigs := instance.DefaultConfig - metricsConfigs.Name = "snippets" - logsConfigs := logs.InstanceConfig{ - Name: "snippets", - ScrapeConfig: []scrapeconfig.Config{}, - } - logsConfigs.Initialize() - integrationConfigs := integrations.DefaultManagerConfig() - - // Map used to identify if an integration is already configured and avoid overriding it - configuredIntegrations := map[string]bool{} - for _, itg := range c.Integrations.ConfigV1.Integrations { - configuredIntegrations[itg.Name()] = true - } - - for _, snippet := range snippets { - snippetConfig, err := evaluateTemplate(snippet.Config, templateVars) - if err != nil { - return err - } - - var snippetContent SnippetContent - err = yaml.Unmarshal([]byte(snippetConfig), &snippetContent) - if err != nil { - return err - } - metricsConfigs.ScrapeConfigs = append(metricsConfigs.ScrapeConfigs, snippetContent.MetricsScrapeConfigs...) - logsConfigs.ScrapeConfig = append(logsConfigs.ScrapeConfig, snippetContent.LogsScrapeConfigs...) - - for _, snip := range snippetContent.IntegrationConfigs.Integrations { - if _, ok := configuredIntegrations[snip.Name()]; !ok { - integrationConfigs.Integrations = append(integrationConfigs.Integrations, snip) - configuredIntegrations[snip.Name()] = true - } - } - } - - if len(metricsConfigs.ScrapeConfigs) > 0 { - c.Metrics.Configs = append(c.Metrics.Configs, metricsConfigs) - } - - if len(logsConfigs.ScrapeConfig) > 0 { - // rc.Config.Logs is initialized as nil, so we need to check if it's nil before appending - if c.Logs == nil { - c.Logs = &logs.Config{ - Configs: []*logs.InstanceConfig{}, - } - } - c.Logs.Configs = append(c.Logs.Configs, &logsConfigs) - } - - c.Integrations.ConfigV1.Integrations = append(c.Integrations.ConfigV1.Integrations, integrationConfigs.Integrations...) - return nil -} - -func appendExternalLabels(c *Config, externalLabels map[string]string) { - // Avoid doing anything if there are no external labels - if len(externalLabels) == 0 { - return - } - // Start off with the existing external labels, which will only be added to (not replaced) - metricsExternalLabels := c.Metrics.Global.Prometheus.ExternalLabels.Map() - for k, v := range externalLabels { - if _, ok := metricsExternalLabels[k]; !ok { - metricsExternalLabels[k] = v - } - } - - logsExternalLabels := make(model.LabelSet) - for k, v := range externalLabels { - logsExternalLabels[model.LabelName(k)] = model.LabelValue(v) - } - - c.Metrics.Global.Prometheus.ExternalLabels = labels.FromMap(metricsExternalLabels) - for i, cc := range c.Logs.Global.ClientConfigs { - c.Logs.Global.ClientConfigs[i].ExternalLabels.LabelSet = logsExternalLabels.Merge(cc.ExternalLabels.LabelSet) - } -} - -func evaluateTemplate(config string, templateVariables map[string]any) (string, error) { - tpl, err := template.New("config").Parse(config) - if err != nil { - return "", err - } - - var buf bytes.Buffer - err = tpl.Execute(&buf, templateVariables) - if err != nil { - return "", err - } - - return buf.String(), nil -} diff --git a/internal/static/config/agentmanagement_test.go b/internal/static/config/agentmanagement_test.go index 2cae67cfa6..28da655d27 100644 --- a/internal/static/config/agentmanagement_test.go +++ b/internal/static/config/agentmanagement_test.go @@ -1,57 +1,14 @@ package config import ( - "crypto/sha256" - "encoding/hex" - "errors" - "flag" "testing" "time" - "github.com/grafana/agent/internal/static/config/features" - "github.com/grafana/agent/internal/static/server" - "github.com/grafana/agent/internal/util" "github.com/prometheus/common/config" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" ) -// testRemoteConfigProvider is an implementation of remoteConfigProvider that can be -// used for testing. It allows setting the values to return for both fetching the -// remote config bytes & errors as well as the cached config & errors. -type testRemoteConfigProvider struct { - InitialConfig *AgentManagementConfig - - fetchedConfigBytesToReturn []byte - fetchedConfigErrorToReturn error - fetchRemoteConfigCallCount int - - cachedConfigToReturn []byte - cachedConfigErrorToReturn error - getCachedConfigCallCount int - didCacheRemoteConfig bool -} - -func (t *testRemoteConfigProvider) GetCachedRemoteConfig() ([]byte, error) { - t.getCachedConfigCallCount += 1 - return t.cachedConfigToReturn, t.cachedConfigErrorToReturn -} - -func (t *testRemoteConfigProvider) FetchRemoteConfig() ([]byte, error) { - t.fetchRemoteConfigCallCount += 1 - return t.fetchedConfigBytesToReturn, t.fetchedConfigErrorToReturn -} - -func (t *testRemoteConfigProvider) CacheRemoteConfig(r []byte) error { - t.didCacheRemoteConfig = true - return nil -} - -func (t *testRemoteConfigProvider) GetPollingInterval() time.Duration { - return t.InitialConfig.PollingInterval -} - var validAgentManagementConfig = AgentManagementConfig{ Enabled: true, Host: "localhost:1234", @@ -71,8 +28,6 @@ var validAgentManagementConfig = AgentManagementConfig{ }, } -var cachedConfig = []byte(`{"base_config":"","snippets":[]}`) - func TestUnmarshalDefault(t *testing.T) { cfg := `host: "localhost:1234" protocol: "https" @@ -162,418 +117,3 @@ func TestValidateLabelManagement(t *testing.T) { cfg.RemoteConfiguration.AgentID = "test_agent_id" assert.NoError(t, cfg.Validate()) } - -func TestSleepTime(t *testing.T) { - cfg := ` -api_url: "http://localhost" -basic_auth: - username: "initial_user" -protocol: "http" -polling_interval: "1m" -remote_configuration: - namespace: "new_namespace" - cache_location: "/etc"` - - var am AgentManagementConfig - yaml.Unmarshal([]byte(cfg), &am) - assert.Equal(t, time.Minute, am.SleepTime()) -} - -func TestFuzzJitterTime(t *testing.T) { - am := validAgentManagementConfig - pollingInterval := 2 * time.Minute - am.PollingInterval = pollingInterval - - zero := time.Duration(0) - - for i := 0; i < 10_000; i++ { - j := am.JitterTime() - assert.GreaterOrEqual(t, j, zero) - assert.Less(t, j, pollingInterval) - } -} - -func TestFullUrl(t *testing.T) { - c := validAgentManagementConfig - actual, err := c.fullUrl() - assert.NoError(t, err) - assert.Equal(t, "https://localhost:1234/agent-management/api/agent/v2/namespace/test_namespace/remote_config?a=A&b=B", actual) -} - -func TestRemoteConfigHashCheck(t *testing.T) { - // not a truly valid Agent Management config, but used for testing against - // precomputed sha256 hash - ic := AgentManagementConfig{ - Protocol: "http", - } - marshalled, err := yaml.Marshal(ic) - require.NoError(t, err) - icHashBytes := sha256.Sum256(marshalled) - icHash := hex.EncodeToString(icHashBytes[:]) - - rcCache := remoteConfigCache{ - InitialConfigHash: icHash, - Config: "server:\\n log_level: debug", - } - - require.NoError(t, initialConfigHashCheck(ic, rcCache)) - rcCache.InitialConfigHash = "abc" - require.Error(t, initialConfigHashCheck(ic, rcCache)) - - differentIc := validAgentManagementConfig - require.Error(t, initialConfigHashCheck(differentIc, rcCache)) -} - -func TestNewRemoteConfigProvider_ValidInitialConfig(t *testing.T) { - invalidAgentManagementConfig := &AgentManagementConfig{ - Enabled: true, - Host: "localhost:1234", - HTTPClientConfig: config.HTTPClientConfig{ - BasicAuth: &config.BasicAuth{ - Username: "test", - PasswordFile: "/test/path", - }, - }, - Protocol: "https", - PollingInterval: time.Minute, - RemoteConfiguration: RemoteConfiguration{ - Labels: labelMap{"b": "B", "a": "A"}, - Namespace: "test_namespace", - CacheLocation: "/test/path/", - }, - } - - cfg := Config{ - AgentManagement: *invalidAgentManagementConfig, - } - _, err := newRemoteConfigProvider(&cfg) - assert.NoError(t, err) -} - -func TestNewRemoteConfigProvider_InvalidProtocol(t *testing.T) { - invalidAgentManagementConfig := &AgentManagementConfig{ - Enabled: true, - Host: "localhost:1234", - HTTPClientConfig: config.HTTPClientConfig{ - BasicAuth: &config.BasicAuth{ - Username: "test", - PasswordFile: "/test/path", - }, - }, - Protocol: "ws", - PollingInterval: time.Minute, - RemoteConfiguration: RemoteConfiguration{ - Labels: labelMap{"b": "B", "a": "A"}, - Namespace: "test_namespace", - CacheLocation: "/test/path/", - }, - } - - cfg := Config{ - AgentManagement: *invalidAgentManagementConfig, - } - _, err := newRemoteConfigProvider(&cfg) - assert.Error(t, err) -} - -func TestNewRemoteConfigHTTPProvider_InvalidInitialConfig(t *testing.T) { - // this is invalid because it is missing the password file - invalidAgentManagementConfig := &AgentManagementConfig{ - Enabled: true, - Host: "localhost:1234", - HTTPClientConfig: config.HTTPClientConfig{ - BasicAuth: &config.BasicAuth{ - Username: "test", - }, - }, - Protocol: "https", - PollingInterval: time.Minute, - RemoteConfiguration: RemoteConfiguration{ - Labels: labelMap{"b": "B", "a": "A"}, - Namespace: "test_namespace", - CacheLocation: "/test/path/", - }, - } - - cfg := Config{ - AgentManagement: *invalidAgentManagementConfig, - } - _, err := newRemoteConfigHTTPProvider(&cfg) - assert.Error(t, err) -} - -func TestGetRemoteConfig_UnmarshallableRemoteConfig(t *testing.T) { - defaultCfg := DefaultConfig() - brokenCfg := `completely invalid config (maybe it got corrupted, maybe it was somehow set this way)` - - invalidCfgBytes := []byte(brokenCfg) - - am := validAgentManagementConfig - logger := server.NewLogger(defaultCfg.Server) - testProvider := testRemoteConfigProvider{InitialConfig: &am} - testProvider.fetchedConfigBytesToReturn = invalidCfgBytes - testProvider.cachedConfigToReturn = cachedConfig - - // flagset is required because some default values are extracted from it. - // In addition, some flags are defined as dependencies for validation - fs := flag.NewFlagSet("test", flag.ExitOnError) - features.Register(fs, allFeatures) - defaultCfg.RegisterFlags(fs) - - cfg, err := getRemoteConfig(true, &testProvider, logger, fs, false) - assert.NoError(t, err) - assert.False(t, testProvider.didCacheRemoteConfig) - - // check that the returned config is the cached one - // Note: Validate is required for the comparison as it mutates the config - expected := defaultCfg - expected.Validate(fs) - assert.True(t, util.CompareYAML(*cfg, expected)) -} - -func TestGetRemoteConfig_RemoteFetchFails(t *testing.T) { - defaultCfg := DefaultConfig() - - am := validAgentManagementConfig - logger := server.NewLogger(defaultCfg.Server) - testProvider := testRemoteConfigProvider{InitialConfig: &am} - testProvider.fetchedConfigErrorToReturn = errors.New("connection refused") - testProvider.cachedConfigToReturn = cachedConfig - - // flagset is required because some default values are extracted from it. - // In addition, some flags are defined as dependencies for validation - fs := flag.NewFlagSet("test", flag.ExitOnError) - features.Register(fs, allFeatures) - defaultCfg.RegisterFlags(fs) - - cfg, err := getRemoteConfig(true, &testProvider, logger, fs, false) - assert.NoError(t, err) - assert.False(t, testProvider.didCacheRemoteConfig) - - // check that the returned config is the cached one - // Note: Validate is required for the comparison as it mutates the config - expected := defaultCfg - expected.Validate(fs) - assert.True(t, util.CompareYAML(*cfg, expected)) -} - -func TestGetRemoteConfig_SemanticallyInvalidBaseConfig(t *testing.T) { - defaultCfg := DefaultConfig() - - // this is semantically invalid because it has two scrape_configs with - // the same job_name - invalidConfig := ` -{ - "base_config": "metrics:\n configs:\n - name: Metrics Snippets\n scrape_configs:\n - job_name: 'prometheus'\n scrape_interval: 15s\n static_configs:\n - targets: ['localhost:12345']\n - job_name: 'prometheus'\n scrape_interval: 15s\n static_configs:\n - targets: ['localhost:12345']\n", - "snippets": [] -}` - invalidCfgBytes := []byte(invalidConfig) - - am := validAgentManagementConfig - logger := server.NewLogger(defaultCfg.Server) - testProvider := testRemoteConfigProvider{InitialConfig: &am} - testProvider.fetchedConfigBytesToReturn = invalidCfgBytes - testProvider.cachedConfigToReturn = cachedConfig - - // flagset is required because some default values are extracted from it. - // In addition, some flags are defined as dependencies for validation - fs := flag.NewFlagSet("test", flag.ExitOnError) - features.Register(fs, allFeatures) - defaultCfg.RegisterFlags(fs) - - cfg, err := getRemoteConfig(true, &testProvider, logger, fs, false) - assert.NoError(t, err) - assert.False(t, testProvider.didCacheRemoteConfig) - - // check that the returned config is the cached one - // Note: Validate is required for the comparison as it mutates the config - expected := defaultCfg - expected.Validate(fs) - assert.True(t, util.CompareYAML(*cfg, expected)) -} - -func TestGetRemoteConfig_InvalidSnippet(t *testing.T) { - defaultCfg := DefaultConfig() - - // this is semantically invalid because it has two scrape_configs with - // the same job_name - invalidConfig := ` -{ - "base_config": "server:\n log_level: info\n log_format: logfmt\n", - "snippets": [ - { - "config": "metrics_scrape_configs:\n- job_name: 'prometheus'\n- job_name: 'prometheus'\n" - } - ] -}` - invalidCfgBytes := []byte(invalidConfig) - - am := validAgentManagementConfig - logger := server.NewLogger(defaultCfg.Server) - testProvider := testRemoteConfigProvider{InitialConfig: &am} - testProvider.fetchedConfigBytesToReturn = invalidCfgBytes - testProvider.cachedConfigToReturn = cachedConfig - - // flagset is required because some default values are extracted from it. - // In addition, some flags are defined as dependencies for validation - fs := flag.NewFlagSet("test", flag.ExitOnError) - features.Register(fs, allFeatures) - defaultCfg.RegisterFlags(fs) - - cfg, err := getRemoteConfig(true, &testProvider, logger, fs, false) - assert.NoError(t, err) - assert.False(t, testProvider.didCacheRemoteConfig) - - // check that the returned config is the cached one - // Note: Validate is required for the comparison as it mutates the config - expected := defaultCfg - expected.Validate(fs) - assert.True(t, util.CompareYAML(*cfg, expected)) -} - -func TestGetRemoteConfig_EmptyBaseConfig(t *testing.T) { - defaultCfg := DefaultConfig() - - validConfig := ` -{ - "base_config": "", - "snippets": [] -}` - cfgBytes := []byte(validConfig) - am := validAgentManagementConfig - logger := server.NewLogger(defaultCfg.Server) - testProvider := testRemoteConfigProvider{InitialConfig: &am} - testProvider.fetchedConfigBytesToReturn = cfgBytes - testProvider.cachedConfigToReturn = cachedConfig - - fs := flag.NewFlagSet("test", flag.ExitOnError) - features.Register(fs, allFeatures) - defaultCfg.RegisterFlags(fs) - - cfg, err := getRemoteConfig(true, &testProvider, logger, fs, false) - assert.NoError(t, err) - assert.True(t, testProvider.didCacheRemoteConfig) - - // check that the returned config is not the cached one - assert.NotEqual(t, "debug", cfg.Server.LogLevel.String()) -} - -func TestGetRemoteConfig_ValidBaseConfig(t *testing.T) { - defaultCfg := DefaultConfig() - validConfig := ` -{ - "base_config": "server:\n log_level: debug\n log_format: logfmt\nlogs:\n positions_directory: /tmp\n global:\n clients:\n - basic_auth:\n password_file: key.txt\n username: 278220\n url: https://logs-prod-eu-west-0.grafana.net/loki/api/v1/push\nintegrations:\n agent:\n enabled: false\n", - "snippets": [ - { - "config": "metrics_scrape_configs:\n- job_name: 'prometheus'\n scrape_interval: 15s\n static_configs:\n - targets: ['localhost:12345']\nlogs_scrape_configs:\n- job_name: yologs\n static_configs:\n - targets: [localhost]\n labels:\n job: yologs\n __path__: /tmp/yo.log\n", - "selector": { - "hostname": "machine-1", - "team": "team-a" - } - } - ] -}` - cfgBytes := []byte(validConfig) - am := validAgentManagementConfig - logger := server.NewLogger(defaultCfg.Server) - testProvider := testRemoteConfigProvider{InitialConfig: &am} - testProvider.fetchedConfigBytesToReturn = cfgBytes - testProvider.cachedConfigToReturn = cachedConfig - - fs := flag.NewFlagSet("test", flag.ExitOnError) - features.Register(fs, allFeatures) - defaultCfg.RegisterFlags(fs) - - cfg, err := getRemoteConfig(true, &testProvider, logger, fs, false) - assert.NoError(t, err) - assert.True(t, testProvider.didCacheRemoteConfig) - - // check that the returned config is not the cached one - assert.False(t, util.CompareYAML(*cfg, defaultCfg)) - - // check some fields to make sure the config was parsed correctly - assert.Equal(t, "debug", cfg.Server.LogLevel.String()) - assert.Equal(t, "278220", cfg.Logs.Global.ClientConfigs[0].Client.BasicAuth.Username) - assert.Equal(t, "prometheus", cfg.Metrics.Configs[0].ScrapeConfigs[0].JobName) - assert.Equal(t, "yologs", cfg.Logs.Configs[0].ScrapeConfig[0].JobName) - assert.Equal(t, 1, len(cfg.Integrations.ConfigV1.Integrations)) -} - -func TestGetRemoteConfig_ExpandsEnvVars(t *testing.T) { - defaultCfg := DefaultConfig() - validConfig := ` -{ - "base_config": "server:\n log_level: info\n log_format: ${LOG_FORMAT}\nlogs:\n positions_directory: /tmp\n global:\n clients:\n - basic_auth:\n password_file: key.txt\n username: 278220\n url: https://logs-prod-eu-west-0.grafana.net/loki/api/v1/push\nintegrations:\n agent:\n enabled: false\n", - "snippets": [ - { - "config": "metrics_scrape_configs:\n- job_name: 'prometheus'\n scrape_interval: ${SCRAPE_INTERVAL}\n static_configs:\n - targets: ['localhost:12345']\n", - "selector": { - "hostname": "machine-1", - "team": "team-a" - } - } - ] -}` - t.Setenv("SCRAPE_INTERVAL", "15s") - t.Setenv("LOG_FORMAT", "json") - - cfgBytes := []byte(validConfig) - am := validAgentManagementConfig - logger := server.NewLogger(defaultCfg.Server) - testProvider := testRemoteConfigProvider{InitialConfig: &am} - testProvider.fetchedConfigBytesToReturn = cfgBytes - testProvider.cachedConfigToReturn = cachedConfig - - fs := flag.NewFlagSet("test", flag.ExitOnError) - var configExpandEnv bool - fs.BoolVar(&configExpandEnv, "config.expand-env", false, "") - features.Register(fs, allFeatures) - defaultCfg.RegisterFlags(fs) - - cfg, err := getRemoteConfig(true, &testProvider, logger, fs, false) - assert.NoError(t, err) - assert.Equal(t, "15s", cfg.Metrics.Configs[0].ScrapeConfigs[0].ScrapeInterval.String()) - assert.Equal(t, "json", cfg.Server.LogFormat) -} - -func TestGetCachedConfig_DefaultConfigFallback(t *testing.T) { - defaultCfg := DefaultConfig() - am := validAgentManagementConfig - logger := server.NewLogger(defaultCfg.Server) - testProvider := testRemoteConfigProvider{InitialConfig: &am} - testProvider.cachedConfigErrorToReturn = errors.New("no cached config") - - fs := flag.NewFlagSet("test", flag.ExitOnError) - features.Register(fs, allFeatures) - defaultCfg.RegisterFlags(fs) - - cfg, err := getCachedRemoteConfig(true, &testProvider, fs, logger) - assert.NoError(t, err) - - // check that the returned config is the default one - assert.True(t, util.CompareYAML(*cfg, defaultCfg)) -} - -func TestGetCachedConfig_RetryAfter(t *testing.T) { - defaultCfg := DefaultConfig() - am := validAgentManagementConfig - logger := server.NewLogger(defaultCfg.Server) - testProvider := testRemoteConfigProvider{InitialConfig: &am} - testProvider.fetchedConfigErrorToReturn = retryAfterError{retryAfter: time.Duration(0)} - testProvider.cachedConfigToReturn = cachedConfig - - fs := flag.NewFlagSet("test", flag.ExitOnError) - features.Register(fs, allFeatures) - defaultCfg.RegisterFlags(fs) - - _, err := getRemoteConfig(true, &testProvider, logger, fs, true) - assert.NoError(t, err) - assert.False(t, testProvider.didCacheRemoteConfig) - - // check that FetchRemoteConfig was called only once on the TestProvider - assert.Equal(t, 1, testProvider.fetchRemoteConfigCallCount) - - // the cached config should have been retrieved once, on the second - // attempt to fetch the remote config - assert.Equal(t, 1, testProvider.getCachedConfigCallCount) -} diff --git a/internal/static/config/config.go b/internal/static/config/config.go index d6f732c2af..2ec8f4f590 100644 --- a/internal/static/config/config.go +++ b/internal/static/config/config.go @@ -10,18 +10,14 @@ import ( "unicode" "github.com/drone/envsubst/v2" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/grafana/agent/internal/build" "github.com/grafana/agent/internal/static/config/encoder" "github.com/grafana/agent/internal/static/config/features" - "github.com/grafana/agent/internal/static/config/instrumentation" "github.com/grafana/agent/internal/static/logs" "github.com/grafana/agent/internal/static/metrics" "github.com/grafana/agent/internal/static/server" "github.com/grafana/agent/internal/static/traces" "github.com/grafana/agent/internal/util" - "github.com/prometheus/common/config" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" ) @@ -166,14 +162,6 @@ func (c Config) MarshalYAML() (interface{}, error) { return m, nil } -// LogDeprecations will log use of any deprecated fields to l as warn-level -// messages. -func (c *Config) LogDeprecations(l log.Logger) { - for _, d := range c.Deprecations { - level.Warn(l).Log("msg", fmt.Sprintf("DEPRECATION NOTICE: %s", d)) - } -} - // Validate validates the config, flags, and sets default values. func (c *Config) Validate(fs *flag.FlagSet) error { if c.Server == nil { @@ -243,101 +231,6 @@ func (c *Config) RegisterFlags(f *flag.FlagSet) { f.BoolVar(&c.EnableConfigEndpoints, "config.enable-read-api", false, "Enables the /-/config and /agent/api/v1/configs/{name} APIs. Be aware that secrets could be exposed by enabling these endpoints!") } -// LoadFile reads a file and passes the contents to Load -func LoadFile(filename string, expandEnvVars bool, c *Config) error { - buf, err := os.ReadFile(filename) - if err != nil { - return fmt.Errorf("error reading config file %w", err) - } - instrumentation.InstrumentConfig(buf) - return LoadBytes(buf, expandEnvVars, c) -} - -// loadFromAgentManagementAPI loads and merges a config from an Agent Management API. -// 1. Read local initial config. -// 2. Get the remote config. -// a) Fetch from remote. If this fails or is invalid: -// b) Read the remote config from cache. If this fails, return an error. -// 4. Merge the initial and remote config into c. -func loadFromAgentManagementAPI(path string, expandEnvVars bool, c *Config, log *server.Logger, fs *flag.FlagSet) error { - // Load the initial config from disk without instrumenting the config hash - buf, err := os.ReadFile(path) - if err != nil { - return fmt.Errorf("error reading initial config file %w", err) - } - - err = LoadBytes(buf, expandEnvVars, c) - if err != nil { - return fmt.Errorf("failed to load initial config: %w", err) - } - - configProvider, err := newRemoteConfigProvider(c) - if err != nil { - return err - } - remoteConfig, err := getRemoteConfig(expandEnvVars, configProvider, log, fs, true) - if err != nil { - return err - } - mergeEffectiveConfig(c, remoteConfig) - - effectiveConfigBytes, err := yaml.Marshal(c) - if err != nil { - level.Warn(log).Log("msg", "error marshalling config for instrumenting config version", "err", err) - } else { - instrumentation.InstrumentConfig(effectiveConfigBytes) - } - - return nil -} - -// mergeEffectiveConfig overwrites any values in initialConfig with those in remoteConfig -func mergeEffectiveConfig(initialConfig *Config, remoteConfig *Config) { - initialConfig.Server = remoteConfig.Server - initialConfig.Metrics = remoteConfig.Metrics - initialConfig.Integrations = remoteConfig.Integrations - initialConfig.Traces = remoteConfig.Traces - initialConfig.Logs = remoteConfig.Logs -} - -// LoadRemote reads a config from url -func LoadRemote(url string, expandEnvVars bool, c *Config) error { - remoteOpts := &remoteOpts{} - if c.BasicAuthUser != "" && c.BasicAuthPassFile != "" { - remoteOpts.HTTPClientConfig = &config.HTTPClientConfig{ - BasicAuth: &config.BasicAuth{ - Username: c.BasicAuthUser, - PasswordFile: c.BasicAuthPassFile, - }, - } - } - - if remoteOpts.HTTPClientConfig != nil { - dir, err := os.Getwd() - if err != nil { - return fmt.Errorf("failed to get current working directory: %w", err) - } - remoteOpts.HTTPClientConfig.SetDirectory(dir) - } - - rc, err := newRemoteProvider(url, remoteOpts) - if err != nil { - return fmt.Errorf("error reading remote config: %w", err) - } - // fall back to file if no scheme is passed - if rc == nil { - return LoadFile(url, expandEnvVars, c) - } - bb, _, err := rc.retrieve() - if err != nil { - return fmt.Errorf("error retrieving remote config: %w", err) - } - - instrumentation.InstrumentConfig(bb) - - return LoadBytes(bb, expandEnvVars, c) -} - func performEnvVarExpansion(buf []byte, expandEnvVars bool) ([]byte, error) { utf8Buf, err := encoder.EnsureUTF8(buf, false) if err != nil { @@ -385,29 +278,6 @@ func getenv(name string) string { return os.Getenv(name) } -// Load loads a config file from a flagset. Flags will be registered -// to the flagset before parsing them with the values specified by -// args. -func Load(fs *flag.FlagSet, args []string, log *server.Logger) (*Config, error) { - cfg, error := LoadFromFunc(fs, args, func(path, fileType string, expandEnvVars bool, c *Config) error { - switch fileType { - case fileTypeYAML: - if features.Enabled(fs, featRemoteConfigs) { - return LoadRemote(path, expandEnvVars, c) - } - if features.Enabled(fs, featAgentManagement) { - return loadFromAgentManagementAPI(path, expandEnvVars, c, log, fs) - } - return LoadFile(path, expandEnvVars, c) - default: - return fmt.Errorf("unknown file type %q. accepted values: %s", fileType, strings.Join(fileTypes, ", ")) - } - }) - - instrumentation.InstrumentLoad(error == nil) - return cfg, error -} - type loaderFunc func(path string, fileType string, expandEnvVars bool, target *Config) error func applyIntegrationValuesFromFlagset(fs *flag.FlagSet, args []string, path string, cfg *Config) error { diff --git a/internal/static/config/config_test.go b/internal/static/config/config_test.go index 8b7e7aef72..364f2f2513 100644 --- a/internal/static/config/config_test.go +++ b/internal/static/config/config_test.go @@ -13,13 +13,11 @@ import ( "github.com/grafana/agent/internal/static/config/encoder" "github.com/grafana/agent/internal/static/metrics" "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/static/server" "github.com/grafana/agent/internal/util" commonCfg "github.com/prometheus/common/config" "github.com/prometheus/common/model" promCfg "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/model/labels" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" ) @@ -457,71 +455,18 @@ func TestAgent_OmitEmptyFields(t *testing.T) { require.Equal(t, "{}\n", string(yml)) } -func TestAgentManagement_MergeEffectiveConfig(t *testing.T) { - initialCfg := ` -server: - log_level: info -logs: - positions_directory: /tmp -agent_management: - host: "localhost" - basic_auth: - username: "initial_user" - protocol: "http" - polling_interval: "1m" - remote_configuration: - namespace: "new_namespace" - cache_location: "/etc"` - - remoteCfg := ` -server: - log_level: debug -metrics: - wal_directory: /tmp - global: - scrape_interval: 5m -integrations: - scrape_integrations: true - -agent_management: - host: "localhost:80" - basic_auth: - username: "new_user" - protocol: "http" - polling_interval: "10s" - remote_configuration: - namespace: "new_namespace" - cache_location: "/etc"` - - var ic, rc Config - err := LoadBytes([]byte(initialCfg), false, &ic) - assert.NoError(t, err) - err = LoadBytes([]byte(remoteCfg), false, &rc) - assert.NoError(t, err) - - // keep a copy of the initial config's agent management block to ensure it isn't - // overwritten by the remote config's - initialAgentManagement := ic.AgentManagement - mergeEffectiveConfig(&ic, &rc) - - // agent_management configuration should not be overwritten by the remote config - assert.Equal(t, initialAgentManagement, ic.AgentManagement) - - // since these elements are purposefully different for the previous portion of the test, - // unset them before comparing the rest of the config - ic.AgentManagement = AgentManagementConfig{} - rc.AgentManagement = AgentManagementConfig{} - - assert.True(t, util.CompareYAML(ic, rc)) -} - func TestConfig_EmptyServerConfigFails(t *testing.T) { // Since we are testing defaults via config.Load, we need a file instead of a string. // This test file has an empty server stanza, we expect default values out. - defaultServerCfg := server.DefaultConfig() - logger := server.NewLogger(&defaultServerCfg) fs := flag.NewFlagSet("", flag.ExitOnError) - _, err := Load(fs, []string{"--config.file", "./testdata/server_empty.yml"}, logger) + + _, err := LoadFromFunc(fs, []string{"--config.file", "./testdata/server_empty.yml"}, func(path, fileType string, expandEnvVars bool, target *Config) error { + bb, err := os.ReadFile(path) + if err != nil { + return err + } + return LoadBytes(bb, expandEnvVars, target) + }) require.Error(t, err) } diff --git a/internal/static/config/integrations.go b/internal/static/config/integrations.go index c4ebb70ec3..f0d2652e6d 100644 --- a/internal/static/config/integrations.go +++ b/internal/static/config/integrations.go @@ -4,15 +4,11 @@ import ( "fmt" "reflect" - "github.com/go-kit/log" - "github.com/gorilla/mux" v1 "github.com/grafana/agent/internal/static/integrations" v2 "github.com/grafana/agent/internal/static/integrations/v2" "github.com/grafana/agent/internal/static/metrics" "github.com/grafana/agent/internal/static/server" "github.com/grafana/agent/internal/util" - "github.com/prometheus/statsd_exporter/pkg/level" - "golang.org/x/exp/maps" "gopkg.in/yaml.v2" ) @@ -129,64 +125,3 @@ func (c *VersionedIntegrations) setVersion(v IntegrationsVersion) error { panic(fmt.Sprintf("unknown integrations version %d", c.Version)) } } - -// EnabledIntegrations returns a slice of enabled integrations -func (c *VersionedIntegrations) EnabledIntegrations() []string { - integrations := map[string]struct{}{} - if c.ConfigV1 != nil { - for _, integration := range c.ConfigV1.Integrations { - integrations[integration.Name()] = struct{}{} - } - } - if c.ConfigV2 != nil { - for _, integration := range c.ConfigV2.Configs { - integrations[integration.Name()] = struct{}{} - } - } - return maps.Keys(integrations) -} - -// IntegrationsGlobals is a global struct shared across integrations. -type IntegrationsGlobals = v2.Globals - -// Integrations is an abstraction over both the v1 and v2 systems. -type Integrations interface { - ApplyConfig(*VersionedIntegrations, IntegrationsGlobals) error - WireAPI(*mux.Router) - Stop() -} - -// NewIntegrations creates a new subsystem. globals should be provided regardless -// of useV2. globals.SubsystemOptions will be automatically set if cfg.Version -// is set to IntegrationsVersion2. -func NewIntegrations(logger log.Logger, cfg *VersionedIntegrations, globals IntegrationsGlobals) (Integrations, error) { - if cfg.Version != IntegrationsVersion2 { - instance, err := v1.NewManager(*cfg.ConfigV1, logger, globals.Metrics.InstanceManager(), globals.Metrics.Validate) - if err != nil { - return nil, err - } - return &v1Integrations{Manager: instance}, nil - } - - level.Warn(logger).Log("msg", "integrations-next is enabled. integrations-next is subject to change") - - globals.SubsystemOpts = *cfg.ConfigV2 - instance, err := v2.NewSubsystem(logger, globals) - if err != nil { - return nil, err - } - return &v2Integrations{Subsystem: instance}, nil -} - -type v1Integrations struct{ *v1.Manager } - -func (s *v1Integrations) ApplyConfig(cfg *VersionedIntegrations, _ IntegrationsGlobals) error { - return s.Manager.ApplyConfig(*cfg.ConfigV1) -} - -type v2Integrations struct{ *v2.Subsystem } - -func (s *v2Integrations) ApplyConfig(cfg *VersionedIntegrations, globals IntegrationsGlobals) error { - globals.SubsystemOpts = *cfg.ConfigV2 - return s.Subsystem.ApplyConfig(globals) -} diff --git a/internal/static/config/integrations_test.go b/internal/static/config/integrations_test.go index b0854f3219..d3537997e0 100644 --- a/internal/static/config/integrations_test.go +++ b/internal/static/config/integrations_test.go @@ -2,7 +2,6 @@ package config import ( "flag" - "sort" "testing" "github.com/stretchr/testify/require" @@ -46,74 +45,6 @@ integrations: require.NotNil(t, c.Integrations.ConfigV2) } -func TestEnabledIntegrations_v1(t *testing.T) { - cfg := ` -metrics: - wal_directory: /tmp/wal - -integrations: - agent: - enabled: true - node_exporter: - enabled: true` - - fs := flag.NewFlagSet("test", flag.ExitOnError) - c, err := LoadFromFunc(fs, []string{"-config.file", "test"}, func(_, _ string, _ bool, c *Config) error { - return LoadBytes([]byte(cfg), false, c) - }) - require.NoError(t, err) - - actual := c.Integrations.EnabledIntegrations() - sort.Strings(actual) - expected := []string{"agent", "node_exporter"} - sort.Strings(expected) - require.Equal(t, actual, expected) -} - -func TestEnabledIntegrations_v2(t *testing.T) { - cfg := ` -metrics: - wal_directory: /tmp/wal - -integrations: - agent: - autoscrape: - enable: false - node_exporter: - autoscrape: - enable: false` - - fs := flag.NewFlagSet("test", flag.ExitOnError) - c, err := LoadFromFunc(fs, []string{"-config.file", "test", "-enable-features=integrations-next"}, func(_, _ string, _ bool, c *Config) error { - return LoadBytes([]byte(cfg), false, c) - }) - require.NoError(t, err) - - actual := c.Integrations.EnabledIntegrations() - sort.Strings(actual) - expected := []string{"agent", "node_exporter"} - sort.Strings(expected) - require.Equal(t, actual, expected) -} - -func TestEnabledIntegrations_v2MultipleInstances(t *testing.T) { - cfg := ` -metrics: - wal_directory: /tmp/wal - -integrations: - redis_configs: - - redis_addr: "redis-0:6379" - - redis_addr: "redis-1:6379"` - - fs := flag.NewFlagSet("test", flag.ExitOnError) - c, err := LoadFromFunc(fs, []string{"-config.file", "test", "-enable-features=integrations-next"}, func(_, _ string, _ bool, c *Config) error { - return LoadBytes([]byte(cfg), false, c) - }) - require.NoError(t, err) - require.Equal(t, c.Integrations.EnabledIntegrations(), []string{"redis"}) -} - func TestSetVersionDoesNotOverrideExistingV1Integrations(t *testing.T) { cfg := ` integrations: diff --git a/internal/static/config/remote_config.go b/internal/static/config/remote_config.go deleted file mode 100644 index 6b23c89ea1..0000000000 --- a/internal/static/config/remote_config.go +++ /dev/null @@ -1,145 +0,0 @@ -package config - -import ( - "fmt" - "io" - "net/http" - "net/url" - "time" - - "github.com/grafana/agent/internal/static/config/instrumentation" - "github.com/prometheus/common/config" -) - -// supported remote config provider schemes -const ( - httpScheme = "http" - httpsScheme = "https" -) - -// remoteOpts struct contains agent remote config options -type remoteOpts struct { - url *url.URL - HTTPClientConfig *config.HTTPClientConfig - headers map[string]string -} - -// remoteProvider interface should be implemented by config providers -type remoteProvider interface { - retrieve() ([]byte, http.Header, error) -} - -// newRemoteProvider constructs a new remote configuration provider. The rawURL is parsed -// and a provider is constructed based on the URL's scheme. -func newRemoteProvider(rawURL string, opts *remoteOpts) (remoteProvider, error) { - u, err := url.Parse(rawURL) - if err != nil { - return nil, fmt.Errorf("error parsing rawURL %s: %w", rawURL, err) - } - if opts == nil { - // Default provider opts - opts = &remoteOpts{} - } - opts.url = u - - switch u.Scheme { - case "": - // if no scheme, assume local file path, return nil and let caller handle. - return nil, nil - case httpScheme, httpsScheme: - httpP, err := newHTTPProvider(opts) - if err != nil { - return nil, fmt.Errorf("error constructing httpProvider: %w", err) - } - return httpP, nil - default: - return nil, fmt.Errorf("remote config scheme not supported: %s", u.Scheme) - } -} - -// Remote Config Providers -// httpProvider - http/https provider -type httpProvider struct { - myURL *url.URL - headers map[string]string - httpClient *http.Client -} - -// newHTTPProvider constructs a new httpProvider -func newHTTPProvider(opts *remoteOpts) (*httpProvider, error) { - httpClientConfig := config.HTTPClientConfig{} - if opts.HTTPClientConfig != nil { - err := opts.HTTPClientConfig.Validate() - if err != nil { - return nil, err - } - httpClientConfig = *opts.HTTPClientConfig - } - httpClient, err := config.NewClientFromConfig(httpClientConfig, "remote-config") - if err != nil { - return nil, err - } - return &httpProvider{ - myURL: opts.url, - httpClient: httpClient, - headers: opts.headers, - }, nil -} - -type retryAfterError struct { - retryAfter time.Duration -} - -func (r retryAfterError) Error() string { - return fmt.Sprintf("server indicated to retry after %s", r.retryAfter) -} - -type notModifiedError struct{} - -func (n notModifiedError) Error() string { - return "server indicated no changes" -} - -// retrieve implements remoteProvider and fetches the config -func (p httpProvider) retrieve() ([]byte, http.Header, error) { - req, err := http.NewRequest(http.MethodGet, p.myURL.String(), nil) - if err != nil { - return nil, nil, fmt.Errorf("error creating request: %w", err) - } - for header, headerVal := range p.headers { - req.Header.Set(header, headerVal) - } - response, err := p.httpClient.Do(req) - if err != nil { - instrumentation.InstrumentRemoteConfigFetchError() - return nil, nil, fmt.Errorf("request failed: %w", err) - } - defer response.Body.Close() - - instrumentation.InstrumentRemoteConfigFetch(response.StatusCode) - - if response.StatusCode == http.StatusTooManyRequests || response.StatusCode == http.StatusServiceUnavailable { - retryAfter := response.Header.Get("Retry-After") - if retryAfter == "" { - return nil, nil, fmt.Errorf("server indicated to retry, but no Retry-After header was provided") - } - retryAfterDuration, err := time.ParseDuration(retryAfter) - if err != nil { - return nil, nil, fmt.Errorf("server indicated to retry, but Retry-After header was not a valid duration: %w", err) - } - return nil, nil, retryAfterError{retryAfter: retryAfterDuration} - } - - if response.StatusCode == http.StatusNotModified { - return nil, nil, notModifiedError{} - } - - if response.StatusCode/100 != 2 { - return nil, nil, fmt.Errorf("error fetching config: status code: %d", response.StatusCode) - } - bb, err := io.ReadAll(response.Body) - if err != nil { - return nil, nil, err - } - return bb, response.Header, nil -} diff --git a/internal/static/config/remote_config_test.go b/internal/static/config/remote_config_test.go deleted file mode 100644 index f8b5b046ce..0000000000 --- a/internal/static/config/remote_config_test.go +++ /dev/null @@ -1,155 +0,0 @@ -package config - -import ( - "fmt" - "net/http" - "net/http/httptest" - "os" - "testing" - - "github.com/prometheus/common/config" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -const configPath = "/agent.yml" - -func TestRemoteConfigHTTP(t *testing.T) { - testCfg := ` -metrics: - global: - scrape_timeout: 33s -` - - svr := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if r.URL.Path == configPath { - _, _ = w.Write([]byte(testCfg)) - } - })) - - svrWithBasicAuth := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - user, pass, _ := r.BasicAuth() - if user != "foo" && pass != "bar" { - w.WriteHeader(http.StatusUnauthorized) - return - } - if r.URL.Path == configPath { - _, _ = w.Write([]byte(testCfg)) - } - })) - - svrWithHeaders := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if r.URL.Path == configPath { - w.Header().Add("X-Test-Header", "test") - w.Header().Add("X-Other-Header", "test2") - _, _ = w.Write([]byte(testCfg)) - } - })) - - tempDir := t.TempDir() - err := os.WriteFile(fmt.Sprintf("%s/password-file.txt", tempDir), []byte("bar"), 0644) - require.NoError(t, err) - - passwdFileCfg := &config.HTTPClientConfig{ - BasicAuth: &config.BasicAuth{ - Username: "foo", - PasswordFile: fmt.Sprintf("%s/password-file.txt", tempDir), - }, - } - dir, err := os.Getwd() - require.NoError(t, err) - passwdFileCfg.SetDirectory(dir) - - type args struct { - rawURL string - opts *remoteOpts - } - tests := []struct { - name string - args args - want []byte - wantErr bool - wantHeaders map[string][]string - }{ - { - name: "httpScheme config", - args: args{ - rawURL: fmt.Sprintf("%s/agent.yml", svr.URL), - }, - want: []byte(testCfg), - wantErr: false, - }, - { - name: "httpScheme config with basic auth", - args: args{ - rawURL: fmt.Sprintf("%s/agent.yml", svrWithBasicAuth.URL), - opts: &remoteOpts{ - HTTPClientConfig: &config.HTTPClientConfig{ - BasicAuth: &config.BasicAuth{ - Username: "foo", - Password: "bar", - }, - }, - }, - }, - want: []byte(testCfg), - wantErr: false, - }, - { - name: "httpScheme config with basic auth password file", - args: args{ - rawURL: fmt.Sprintf("%s/agent.yml", svrWithBasicAuth.URL), - opts: &remoteOpts{ - HTTPClientConfig: passwdFileCfg, - }, - }, - want: []byte(testCfg), - wantErr: false, - }, - { - name: "unsupported scheme throws error", - args: args{ - rawURL: "ssh://unsupported/scheme", - }, - want: nil, - wantErr: true, - }, - { - name: "invalid url throws error", - args: args{ - rawURL: "://invalid/url", - }, - want: nil, - wantErr: true, - }, - { - name: "response headers are returned", - args: args{ - rawURL: fmt.Sprintf("%s/agent.yml", svrWithHeaders.URL), - }, - want: []byte(testCfg), - wantErr: false, - wantHeaders: map[string][]string{ - "X-Test-Header": {"test"}, - "X-Other-Header": {"test2"}, - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - rc, err := newRemoteProvider(tt.args.rawURL, tt.args.opts) - if tt.wantErr { - assert.Error(t, err) - return - } - assert.NoError(t, err) - bb, header, err := rc.retrieve() - assert.NoError(t, err) - assert.Equal(t, string(tt.want), string(bb)) - for k, v := range tt.wantHeaders { - assert.Equal(t, v, header[k]) - } - }) - } -} diff --git a/internal/static/integrations/cadvisor/cadvisor_stub.go b/internal/static/integrations/cadvisor/cadvisor_stub.go index 80a038b85d..35bdd3455b 100644 --- a/internal/static/integrations/cadvisor/cadvisor_stub.go +++ b/internal/static/integrations/cadvisor/cadvisor_stub.go @@ -3,7 +3,11 @@ package cadvisor //nolint:golint import ( + "context" + "net/http" + "github.com/grafana/agent/internal/static/integrations" + "github.com/grafana/agent/internal/static/integrations/config" "github.com/go-kit/log" "github.com/go-kit/log/level" @@ -12,5 +16,24 @@ import ( // NewIntegration creates a new cadvisor integration func (c *Config) NewIntegration(logger log.Logger) (integrations.Integration, error) { level.Warn(logger).Log("msg", "the cadvisor integration only works on linux; enabling it on other platforms will do nothing") - return &integrations.StubIntegration{}, nil + return &stubIntegration{}, nil +} + +// stubIntegration implements a no-op integration for use on platforms not supported by an integration +type stubIntegration struct{} + +// MetricsHandler returns an http.NotFoundHandler to satisfy the Integration interface +func (i *stubIntegration) MetricsHandler() (http.Handler, error) { + return http.NotFoundHandler(), nil +} + +// ScrapeConfigs returns an empty list of scrape configs, since there is nothing to scrape +func (i *stubIntegration) ScrapeConfigs() []config.ScrapeConfig { + return []config.ScrapeConfig{} +} + +// Run just waits for the context to finish +func (i *stubIntegration) Run(ctx context.Context) error { + <-ctx.Done() + return ctx.Err() } diff --git a/internal/static/integrations/manager.go b/internal/static/integrations/manager.go index de22be37e8..59760b9c7f 100644 --- a/internal/static/integrations/manager.go +++ b/internal/static/integrations/manager.go @@ -1,39 +1,18 @@ package integrations import ( - "context" "fmt" - "net/http" - "path" - "strings" - "sync" "time" config_util "github.com/prometheus/common/config" - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/gorilla/mux" "github.com/grafana/agent/internal/static/metrics" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/static/metrics/instance/configstore" "github.com/grafana/agent/internal/static/server" - "github.com/grafana/agent/internal/util" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/common/model" promConfig "github.com/prometheus/prometheus/config" - "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/model/relabel" ) -var ( - integrationAbnormalExits = promauto.NewCounterVec(prometheus.CounterOpts{ - Name: "agent_metrics_integration_abnormal_exits_total", - Help: "Total number of times an agent integration exited unexpectedly, causing it to be restarted.", - }, []string{"integration_name"}) -) - var CurrentManagerConfig ManagerConfig = DefaultManagerConfig() // DefaultManagerConfig holds the default settings for integrations. @@ -153,400 +132,3 @@ func (c *ManagerConfig) ApplyDefaults(sflags *server.Flags, mcfg *metrics.Config return nil } - -// Manager manages a set of integrations and runs them. -type Manager struct { - logger log.Logger - - cfgMut sync.RWMutex - cfg ManagerConfig - - hostname string - - ctx context.Context - cancel context.CancelFunc - wg sync.WaitGroup - - im instance.Manager - validator configstore.Validator - - integrationsMut sync.RWMutex - integrations map[string]*integrationProcess - - handlerMut sync.Mutex - handlerCache map[string]handlerCacheEntry -} - -// NewManager creates a new integrations manager. NewManager must be given an -// InstanceManager which is responsible for accepting instance configs to -// scrape and send metrics from running integrations. -func NewManager(cfg ManagerConfig, logger log.Logger, im instance.Manager, validate configstore.Validator) (*Manager, error) { - ctx, cancel := context.WithCancel(context.Background()) - - m := &Manager{ - logger: logger, - - ctx: ctx, - cancel: cancel, - - im: im, - validator: validate, - - integrations: make(map[string]*integrationProcess, len(cfg.Integrations)), - - handlerCache: make(map[string]handlerCacheEntry), - } - - var err error - m.hostname, err = instance.Hostname() - if err != nil { - return nil, err - } - - if err := m.ApplyConfig(cfg); err != nil { - return nil, fmt.Errorf("failed applying config: %w", err) - } - return m, nil -} - -// ApplyConfig updates the configuration of the integrations subsystem. -func (m *Manager) ApplyConfig(cfg ManagerConfig) error { - var failed bool - - m.cfgMut.Lock() - defer m.cfgMut.Unlock() - - m.integrationsMut.Lock() - defer m.integrationsMut.Unlock() - - // The global prometheus config settings don't get applied to integrations until later. This - // causes us to skip reload when those settings change. - if util.CompareYAML(m.cfg, cfg) && util.CompareYAML(m.cfg.PrometheusGlobalConfig, cfg.PrometheusGlobalConfig) { - level.Debug(m.logger).Log("msg", "Integrations config is unchanged skipping apply") - return nil - } - level.Debug(m.logger).Log("msg", "Applying integrations config changes") - - select { - case <-m.ctx.Done(): - return fmt.Errorf("Manager already stopped") - default: - // No-op - } - - // Iterate over our integrations. New or changed integrations will be - // started, with their existing counterparts being shut down. - for _, ic := range cfg.Integrations { - if !ic.Common.Enabled { - continue - } - // Key is used to identify the instance of this integration within the - // instance manager and within our set of running integrations. - key := integrationKey(ic.Name()) - - // Look for an existing integration with the same key. If it exists and - // is unchanged, we have nothing to do. Otherwise, we're going to recreate - // it with the new settings, so we'll need to stop it. - if p, exist := m.integrations[key]; exist { - if util.CompareYAMLWithHook(p.cfg, ic, noScrubbedSecretsHook) { - continue - } - p.stop() - delete(m.integrations, key) - } - - l := log.With(m.logger, "integration", ic.Name()) - i, err := ic.NewIntegration(l) - if err != nil { - level.Error(m.logger).Log("msg", "failed to initialize integration. it will not run or be scraped", "integration", ic.Name(), "err", err) - failed = true - - // If this integration was running before, its instance won't be cleaned - // up since it's now removed from the map. We need to clean it up here. - _ = m.im.DeleteConfig(key) - continue - } - - // Find what instance label should be used to represent this integration. - var instanceKey string - if kp := ic.Common.InstanceKey; kp != nil { - // Common config takes precedence. - instanceKey = strings.TrimSpace(*kp) - } else { - instanceKey, err = ic.InstanceKey(fmt.Sprintf("%s:%d", m.hostname, cfg.ListenPort)) - if err != nil { - level.Error(m.logger).Log("msg", "failed to get instance key for integration. it will not run or be scraped", "integration", ic.Name(), "err", err) - failed = true - - // If this integration was running before, its instance won't be cleaned - // up since it's now removed from the map. We need to clean it up here. - _ = m.im.DeleteConfig(key) - continue - } - } - - // Create, start, and register the new integration. - ctx, cancel := context.WithCancel(m.ctx) - p := &integrationProcess{ - log: m.logger, - cfg: ic, - i: i, - instanceKey: instanceKey, - - ctx: ctx, - stop: cancel, - - wg: &m.wg, - wait: m.instanceBackoff, - } - go p.Run() - m.integrations[key] = p - } - - // Delete instances and processed that have been removed in between calls to - // ApplyConfig. - for key, process := range m.integrations { - foundConfig := false - for _, ic := range cfg.Integrations { - if integrationKey(ic.Name()) == key { - // If this is disabled then we should delete from integrations - if !ic.Common.Enabled { - break - } - foundConfig = true - break - } - } - if foundConfig { - continue - } - - _ = m.im.DeleteConfig(key) - process.stop() - delete(m.integrations, key) - } - - // Re-apply configs to our instance manager for all running integrations. - // Generated scrape configs may change in between calls to ApplyConfig even - // if the configs for the integration didn't. - for key, p := range m.integrations { - shouldCollect := cfg.ScrapeIntegrations - if common := p.cfg.Common; common.ScrapeIntegration != nil { - shouldCollect = *common.ScrapeIntegration - } - - switch shouldCollect { - case true: - instanceConfig := m.instanceConfigForIntegration(p, cfg) - if err := m.validator(&instanceConfig); err != nil { - level.Error(p.log).Log("msg", "failed to validate generated scrape config for integration. integration will not be scraped", "err", err, "integration", p.cfg.Name()) - failed = true - break - } - - if err := m.im.ApplyConfig(instanceConfig); err != nil { - level.Error(p.log).Log("msg", "failed to apply integration. integration will not be scraped", "err", err, "integration", p.cfg.Name()) - failed = true - } - case false: - // If a previous instance of the config was being scraped, we need to - // delete it here. Calling DeleteConfig when nothing is running is a safe - // operation. - _ = m.im.DeleteConfig(key) - } - } - - m.cfg = cfg - - if failed { - return fmt.Errorf("not all integrations were correctly updated") - } - return nil -} - -func noScrubbedSecretsHook(in interface{}) (ok bool, out interface{}, err error) { - switch v := in.(type) { - case config_util.Secret: - return true, string(v), nil - case *config_util.URL: - return true, v.String(), nil - default: - return false, nil, nil - } -} - -// integrationProcess is a running integration. -type integrationProcess struct { - log log.Logger - ctx context.Context - stop context.CancelFunc - cfg UnmarshaledConfig - instanceKey string // Value for the `instance` label - i Integration - - wg *sync.WaitGroup - wait func(cfg Config, err error) -} - -// Run runs the integration until the process is canceled. -func (p *integrationProcess) Run() { - defer func() { - if r := recover(); r != nil { - err := fmt.Errorf("%v", r) - level.Error(p.log).Log("msg", "integration has panicked. THIS IS A BUG!", "err", err, "integration", p.cfg.Name()) - } - }() - - p.wg.Add(1) - defer p.wg.Done() - - for { - err := p.i.Run(p.ctx) - if err != nil && err != context.Canceled { - p.wait(p.cfg, err) - } else { - level.Info(p.log).Log("msg", "stopped integration", "integration", p.cfg.Name()) - break - } - } -} - -func (m *Manager) instanceBackoff(cfg Config, err error) { - m.cfgMut.RLock() - defer m.cfgMut.RUnlock() - - integrationAbnormalExits.WithLabelValues(cfg.Name()).Inc() - level.Error(m.logger).Log("msg", "integration stopped abnormally, restarting after backoff", "err", err, "integration", cfg.Name(), "backoff", m.cfg.IntegrationRestartBackoff) - time.Sleep(m.cfg.IntegrationRestartBackoff) -} - -func (m *Manager) instanceConfigForIntegration(p *integrationProcess, cfg ManagerConfig) instance.Config { - common := p.cfg.Common - relabelConfigs := append(cfg.DefaultRelabelConfigs(p.instanceKey), common.RelabelConfigs...) - - schema := "http" - // Check for HTTPS support - var httpClientConfig config_util.HTTPClientConfig - if cfg.ServerUsingTLS { - schema = "https" - httpClientConfig.TLSConfig = cfg.TLSConfig - } - - var scrapeConfigs []*promConfig.ScrapeConfig - - for _, isc := range p.i.ScrapeConfigs() { - sc := &promConfig.ScrapeConfig{ - JobName: fmt.Sprintf("integrations/%s", isc.JobName), - MetricsPath: path.Join("/integrations", p.cfg.Name(), isc.MetricsPath), - Params: isc.QueryParams, - Scheme: schema, - HonorLabels: false, - HonorTimestamps: true, - ScrapeInterval: model.Duration(common.ScrapeInterval), - ScrapeTimeout: model.Duration(common.ScrapeTimeout), - ServiceDiscoveryConfigs: m.scrapeServiceDiscovery(cfg), - RelabelConfigs: relabelConfigs, - MetricRelabelConfigs: common.MetricRelabelConfigs, - HTTPClientConfig: httpClientConfig, - } - - scrapeConfigs = append(scrapeConfigs, sc) - } - - instanceCfg := instance.DefaultConfig - instanceCfg.Name = integrationKey(p.cfg.Name()) - instanceCfg.ScrapeConfigs = scrapeConfigs - instanceCfg.RemoteWrite = cfg.PrometheusRemoteWrite - if common.WALTruncateFrequency > 0 { - instanceCfg.WALTruncateFrequency = common.WALTruncateFrequency - } - return instanceCfg -} - -// integrationKey returns the key for an integration Config, used for its -// instance name and name in the process cache. -func integrationKey(name string) string { - return fmt.Sprintf("integration/%s", name) -} - -func (m *Manager) scrapeServiceDiscovery(cfg ManagerConfig) discovery.Configs { - // A blank host somehow works, but it then requires a sever name to be set under tls. - newHost := cfg.ListenHost - if newHost == "" { - newHost = "127.0.0.1" - } - localAddr := fmt.Sprintf("%s:%d", newHost, cfg.ListenPort) - labels := model.LabelSet{} - labels[model.LabelName("agent_hostname")] = model.LabelValue(m.hostname) - for k, v := range cfg.Labels { - labels[k] = v - } - - return discovery.Configs{ - discovery.StaticConfig{{ - Targets: []model.LabelSet{{model.AddressLabel: model.LabelValue(localAddr)}}, - Labels: labels, - }}, - } -} - -// WireAPI hooks up /metrics routes per-integration. -func (m *Manager) WireAPI(r *mux.Router) { - r.HandleFunc("/integrations/{name}/metrics", func(rw http.ResponseWriter, r *http.Request) { - m.integrationsMut.RLock() - defer m.integrationsMut.RUnlock() - - key := integrationKey(mux.Vars(r)["name"]) - handler := m.loadHandler(key) - handler.ServeHTTP(rw, r) - }) -} - -// loadHandler will perform a dynamic lookup of an HTTP handler for an -// integration. loadHandler should be called with a read lock on the -// integrations mutex. -func (m *Manager) loadHandler(key string) http.Handler { - m.handlerMut.Lock() - defer m.handlerMut.Unlock() - - // Search the integration by name to see if it's still running. - p, ok := m.integrations[key] - if !ok { - delete(m.handlerCache, key) - return http.NotFoundHandler() - } - - // Now look in the cache for a handler for the running process. - cacheEntry, ok := m.handlerCache[key] - if ok && cacheEntry.process == p { - return cacheEntry.handler - } - - // New integration process that hasn't been scraped before. Generate - // a handler for it and cache it. - handler, err := p.i.MetricsHandler() - if err != nil { - level.Error(m.logger).Log("msg", "could not create http handler for integration", "integration", p.cfg.Name(), "err", err) - return http.HandlerFunc(internalServiceError) - } - - cacheEntry = handlerCacheEntry{handler: handler, process: p} - m.handlerCache[key] = cacheEntry - return cacheEntry.handler -} - -func internalServiceError(w http.ResponseWriter, r *http.Request) { - http.Error(w, "500 Internal Server Error", http.StatusInternalServerError) -} - -// Stop stops the manager and all of its integrations. Blocks until all running -// integrations exit. -func (m *Manager) Stop() { - m.cancel() - m.wg.Wait() -} - -type handlerCacheEntry struct { - handler http.Handler - process *integrationProcess -} diff --git a/internal/static/integrations/manager_test.go b/internal/static/integrations/manager_test.go deleted file mode 100644 index e44dfb6c09..0000000000 --- a/internal/static/integrations/manager_test.go +++ /dev/null @@ -1,433 +0,0 @@ -package integrations - -import ( - "context" - "fmt" - "net/http" - "testing" - "time" - - "github.com/go-kit/log" - "github.com/grafana/agent/internal/static/integrations/config" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/util" - "github.com/prometheus/client_golang/prometheus/promhttp" - "github.com/prometheus/common/model" - promConfig "github.com/prometheus/prometheus/config" - "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/model/relabel" - "github.com/stretchr/testify/require" - "go.uber.org/atomic" - "gopkg.in/yaml.v2" -) - -const mockIntegrationName = "integration/mock" - -func noOpValidator(*instance.Config) error { return nil } - -// TestConfig_MarshalEmptyIntegrations ensures that an empty set of integrations -// can be marshaled correctly. -func TestConfig_MarshalEmptyIntegrations(t *testing.T) { - cfgText := ` -scrape_integrations: true -replace_instance_label: true -integration_restart_backoff: 5s -use_hostname_label: true -` - var ( - cfg ManagerConfig - listenPort = 12345 - listenHost = "127.0.0.1" - ) - require.NoError(t, yaml.Unmarshal([]byte(cfgText), &cfg)) - - // Listen port must be set before applying defaults. Normally applied by the - // config package. - cfg.ListenPort = listenPort - cfg.ListenHost = listenHost - - outBytes, err := yaml.Marshal(cfg) - require.NoError(t, err, "Failed creating integration") - require.YAMLEq(t, cfgText, string(outBytes)) -} - -// Test that embedded integration fields in the struct can be unmarshaled and -// remarshaled back out to text. -func TestConfig_Remarshal(t *testing.T) { - RegisterIntegration(&testIntegrationA{}) - cfgText := ` -scrape_integrations: true -replace_instance_label: true -integration_restart_backoff: 5s -use_hostname_label: true -test: - text: Hello, world! - truth: true -` - var ( - cfg ManagerConfig - listenPort = 12345 - listenHost = "127.0.0.1" - ) - require.NoError(t, yaml.Unmarshal([]byte(cfgText), &cfg)) - - // Listen port must be set before applying defaults. Normally applied by the - // config package. - cfg.ListenPort = listenPort - cfg.ListenHost = listenHost - - outBytes, err := yaml.Marshal(cfg) - require.NoError(t, err, "Failed creating integration") - require.YAMLEq(t, cfgText, string(outBytes)) -} - -func TestConfig_AddressRelabels(t *testing.T) { - cfgText := ` -agent: - enabled: true -` - - var ( - cfg ManagerConfig - listenPort = 12345 - listenHost = "127.0.0.1" - ) - require.NoError(t, yaml.Unmarshal([]byte(cfgText), &cfg)) - - // Listen port must be set before applying defaults. Normally applied by the - // config package. - cfg.ListenPort = listenPort - cfg.ListenHost = listenHost - - expectHostname, _ := instance.Hostname() - relabels := cfg.DefaultRelabelConfigs(expectHostname + ":12345") - - // Ensure that the relabel configs are functional - require.Len(t, relabels, 1) - result, _ := relabel.Process(labels.FromStrings("__address__", "127.0.0.1"), relabels...) - - require.Equal(t, result.Get("instance"), expectHostname+":12345") -} - -func TestManager_instanceConfigForIntegration(t *testing.T) { - mock := newMockIntegration() - icfg := mockConfig{Integration: mock} - - im := instance.NewBasicManager(instance.DefaultBasicManagerConfig, log.NewNopLogger(), mockInstanceFactory) - m, err := NewManager(mockManagerConfig(), log.NewNopLogger(), im, noOpValidator) - require.NoError(t, err) - defer m.Stop() - - p := &integrationProcess{instanceKey: "key", cfg: makeUnmarshaledConfig(icfg, true), i: mock} - cfg := m.instanceConfigForIntegration(p, mockManagerConfig()) - - // Validate that the generated MetricsPath is a valid URL path - require.Len(t, cfg.ScrapeConfigs, 1) - require.Equal(t, "/integrations/mock/metrics", cfg.ScrapeConfigs[0].MetricsPath) -} - -func makeUnmarshaledConfig(cfg Config, enabled bool) UnmarshaledConfig { - return UnmarshaledConfig{Config: cfg, Common: config.Common{Enabled: enabled}} -} - -// TestManager_NoIntegrationsScrape ensures that configs don't get generates -// when the ScrapeIntegrations flag is disabled. -func TestManager_NoIntegrationsScrape(t *testing.T) { - mock := newMockIntegration() - icfg := mockConfig{Integration: mock} - - im := instance.NewBasicManager(instance.DefaultBasicManagerConfig, log.NewNopLogger(), mockInstanceFactory) - - cfg := mockManagerConfig() - cfg.ScrapeIntegrations = false - cfg.Integrations = append(cfg.Integrations, makeUnmarshaledConfig(&icfg, true)) - - m, err := NewManager(cfg, log.NewNopLogger(), im, noOpValidator) - require.NoError(t, err) - defer m.Stop() - - // Normally we'd use util.Eventually here, but since im.ListConfigs starts - // out with a length of zero, util.Eventually would immediately pass. Instead - // we want to wait for a bit to make sure that the length of ListConfigs - // doesn't become non-zero. - time.Sleep(time.Second) - require.Zero(t, len(im.ListConfigs())) -} - -// TestManager_NoIntegrationScrape ensures that configs don't get generates -// when the ScrapeIntegration flag is disabled on the integration. -func TestManager_NoIntegrationScrape(t *testing.T) { - mock := newMockIntegration() - icfg := mockConfig{Integration: mock} - noScrape := false - - im := instance.NewBasicManager(instance.DefaultBasicManagerConfig, log.NewNopLogger(), mockInstanceFactory) - - cfg := mockManagerConfig() - cfg.Integrations = append(cfg.Integrations, UnmarshaledConfig{ - Config: icfg, - Common: config.Common{ScrapeIntegration: &noScrape}, - }) - - m, err := NewManager(cfg, log.NewNopLogger(), im, noOpValidator) - require.NoError(t, err) - defer m.Stop() - - time.Sleep(time.Second) - require.Zero(t, len(im.ListConfigs())) -} - -// TestManager_StartsIntegrations tests that, when given an integration to -// launch, TestManager applies a config and runs the integration. -func TestManager_StartsIntegrations(t *testing.T) { - mock := newMockIntegration() - icfg := mockConfig{Integration: mock} - - cfg := mockManagerConfig() - cfg.Integrations = append(cfg.Integrations, makeUnmarshaledConfig(icfg, true)) - - im := instance.NewBasicManager(instance.DefaultBasicManagerConfig, log.NewNopLogger(), mockInstanceFactory) - m, err := NewManager(cfg, log.NewNopLogger(), im, noOpValidator) - require.NoError(t, err) - defer m.Stop() - - util.Eventually(t, func(t require.TestingT) { - require.Equal(t, 1, len(im.ListConfigs())) - }) - - // Check that the instance was set to run - util.Eventually(t, func(t require.TestingT) { - require.Equal(t, 1, int(mock.startedCount.Load())) - }) -} - -func TestManager_RestartsIntegrations(t *testing.T) { - mock := newMockIntegration() - icfg := mockConfig{Integration: mock} - - cfg := mockManagerConfig() - cfg.Integrations = append(cfg.Integrations, makeUnmarshaledConfig(icfg, true)) - - im := instance.NewBasicManager(instance.DefaultBasicManagerConfig, log.NewNopLogger(), mockInstanceFactory) - m, err := NewManager(cfg, log.NewNopLogger(), im, noOpValidator) - require.NoError(t, err) - defer m.Stop() - - mock.err <- fmt.Errorf("I can't believe this horrible error happened") - - util.Eventually(t, func(t require.TestingT) { - require.Equal(t, 2, int(mock.startedCount.Load())) - }) -} - -func TestManager_GracefulStop(t *testing.T) { - mock := newMockIntegration() - icfg := mockConfig{Integration: mock} - - cfg := mockManagerConfig() - cfg.Integrations = append(cfg.Integrations, makeUnmarshaledConfig(icfg, true)) - - im := instance.NewBasicManager(instance.DefaultBasicManagerConfig, log.NewNopLogger(), mockInstanceFactory) - m, err := NewManager(cfg, log.NewNopLogger(), im, noOpValidator) - require.NoError(t, err) - - util.Eventually(t, func(t require.TestingT) { - require.Equal(t, 1, int(mock.startedCount.Load())) - }) - - m.Stop() - - time.Sleep(500 * time.Millisecond) - require.Equal(t, 1, int(mock.startedCount.Load()), "graceful shutdown should not have restarted the Integration") - - util.Eventually(t, func(t require.TestingT) { - require.Equal(t, false, mock.running.Load()) - }) -} - -func TestManager_IntegrationEnabledToDisabledReload(t *testing.T) { - mock := newMockIntegration() - icfg := mockConfig{Integration: mock} - cfg := mockManagerConfig() - cfg.Integrations = append(cfg.Integrations, makeUnmarshaledConfig(icfg, true)) - - im := instance.NewBasicManager(instance.DefaultBasicManagerConfig, log.NewNopLogger(), mockInstanceFactory) - m, err := NewManager(cfg, log.NewNopLogger(), im, noOpValidator) - require.NoError(t, err) - - // Test for Enabled -> Disabled - _ = m.ApplyConfig(generateMockConfigWithEnabledFlag(false)) - require.Len(t, m.integrations, 0, "Integration was disabled so should be removed from map") - _, err = m.im.GetInstance(mockIntegrationName) - require.Error(t, err, "This mock should not exist") - - // test for Disabled -> Enabled - _ = m.ApplyConfig(generateMockConfigWithEnabledFlag(true)) - require.Len(t, m.integrations, 1, "Integration was enabled so should be here") - _, err = m.im.GetInstance(mockIntegrationName) - require.NoError(t, err, "This mock should exist") - require.Len(t, m.im.ListInstances(), 1, "This instance should exist") -} - -func TestManager_IntegrationDisabledToEnabledReload(t *testing.T) { - mock := newMockIntegration() - icfg := mockConfig{Integration: mock} - - cfg := mockManagerConfig() - cfg.Integrations = append(cfg.Integrations, UnmarshaledConfig{ - Config: icfg, - Common: config.Common{Enabled: false}, - }) - - im := instance.NewBasicManager(instance.DefaultBasicManagerConfig, log.NewNopLogger(), mockInstanceFactory) - m, err := NewManager(cfg, log.NewNopLogger(), im, noOpValidator) - require.NoError(t, err) - require.Len(t, m.integrations, 0, "Integration was disabled so should be removed from map") - _, err = m.im.GetInstance(mockIntegrationName) - require.Error(t, err, "This mock should not exist") - - // test for Disabled -> Enabled - - _ = m.ApplyConfig(generateMockConfigWithEnabledFlag(true)) - require.Len(t, m.integrations, 1, "Integration was enabled so should be here") - _, err = m.im.GetInstance(mockIntegrationName) - require.NoError(t, err, "This mock should exist") - require.Len(t, m.im.ListInstances(), 1, "This instance should exist") -} - -type PromDefaultsValidator struct { - PrometheusGlobalConfig promConfig.GlobalConfig -} - -func (i *PromDefaultsValidator) validate(c *instance.Config) error { - instanceConfig := instance.GlobalConfig{ - Prometheus: i.PrometheusGlobalConfig, - } - return c.ApplyDefaults(instanceConfig) -} - -func TestManager_PromConfigChangeReloads(t *testing.T) { - mock := newMockIntegration() - icfg := mockConfig{Integration: mock} - - cfg := mockManagerConfig() - cfg.Integrations = append(cfg.Integrations, makeUnmarshaledConfig(icfg, true)) - - im := instance.NewBasicManager(instance.DefaultBasicManagerConfig, log.NewNopLogger(), mockInstanceFactory) - - startingPromConfig := mockPromConfigWithValues(model.Duration(30*time.Second), model.Duration(25*time.Second)) - cfg.PrometheusGlobalConfig = startingPromConfig - validator := PromDefaultsValidator{startingPromConfig} - - m, err := NewManager(cfg, log.NewNopLogger(), im, validator.validate) - require.NoError(t, err) - require.Len(t, m.im.ListConfigs(), 1, "Integration was enabled so should be here") - //The integration never has the prom config overrides happen so go after the running instance config instead - for _, c := range m.im.ListConfigs() { - for _, scrape := range c.ScrapeConfigs { - require.Equal(t, startingPromConfig.ScrapeInterval, scrape.ScrapeInterval) - require.Equal(t, startingPromConfig.ScrapeTimeout, scrape.ScrapeTimeout) - } - } - - newPromConfig := mockPromConfigWithValues(model.Duration(60*time.Second), model.Duration(55*time.Second)) - cfg.PrometheusGlobalConfig = newPromConfig - validator.PrometheusGlobalConfig = newPromConfig - - err = m.ApplyConfig(cfg) - require.NoError(t, err) - - require.Len(t, m.im.ListConfigs(), 1, "Integration was enabled so should be here") - //The integration never has the prom config overrides happen so go after the running instance config instead - for _, c := range m.im.ListConfigs() { - for _, scrape := range c.ScrapeConfigs { - require.Equal(t, newPromConfig.ScrapeInterval, scrape.ScrapeInterval) - require.Equal(t, newPromConfig.ScrapeTimeout, scrape.ScrapeTimeout) - } - } -} - -func generateMockConfigWithEnabledFlag(enabled bool) ManagerConfig { - enabledMock := newMockIntegration() - enabledConfig := mockConfig{Integration: enabledMock} - enabledManagerConfig := mockManagerConfig() - enabledManagerConfig.Integrations = append( - enabledManagerConfig.Integrations, - makeUnmarshaledConfig(enabledConfig, enabled), - ) - return enabledManagerConfig -} - -type mockConfig struct { - Integration *mockIntegration `yaml:"mock"` -} - -// Equal is used for cmp.Equal, since otherwise mockConfig can't be compared to itself. -func (c mockConfig) Equal(other mockConfig) bool { return c.Integration == other.Integration } - -func (c mockConfig) Name() string { return "mock" } -func (c mockConfig) InstanceKey(agentKey string) (string, error) { return agentKey, nil } - -func (c mockConfig) NewIntegration(_ log.Logger) (Integration, error) { - return c.Integration, nil -} - -type mockIntegration struct { - startedCount *atomic.Uint32 - running *atomic.Bool - err chan error -} - -func newMockIntegration() *mockIntegration { - return &mockIntegration{ - running: atomic.NewBool(true), - startedCount: atomic.NewUint32(0), - err: make(chan error), - } -} - -func (i *mockIntegration) MetricsHandler() (http.Handler, error) { - return promhttp.Handler(), nil -} - -func (i *mockIntegration) ScrapeConfigs() []config.ScrapeConfig { - return []config.ScrapeConfig{{ - JobName: "mock", - MetricsPath: "/metrics", - }} -} - -func (i *mockIntegration) Run(ctx context.Context) error { - i.startedCount.Inc() - i.running.Store(true) - defer i.running.Store(false) - - select { - case <-ctx.Done(): - return ctx.Err() - case err := <-i.err: - return err - } -} - -func mockInstanceFactory(_ instance.Config) (instance.ManagedInstance, error) { - return instance.NoOpInstance{}, nil -} - -func mockManagerConfig() ManagerConfig { - listenPort := 0 - listenHost := "127.0.0.1" - return ManagerConfig{ - ScrapeIntegrations: true, - IntegrationRestartBackoff: 0, - ListenPort: listenPort, - ListenHost: listenHost, - } -} - -func mockPromConfigWithValues(scrapeInterval model.Duration, scrapeTimeout model.Duration) promConfig.GlobalConfig { - return promConfig.GlobalConfig{ - ScrapeInterval: scrapeInterval, - ScrapeTimeout: scrapeTimeout, - } -} diff --git a/internal/static/integrations/stub_integration.go b/internal/static/integrations/stub_integration.go deleted file mode 100644 index 2d118ff82c..0000000000 --- a/internal/static/integrations/stub_integration.go +++ /dev/null @@ -1,27 +0,0 @@ -package integrations - -import ( - "context" - "net/http" - - "github.com/grafana/agent/internal/static/integrations/config" -) - -// StubIntegration implements a no-op integration for use on platforms not supported by an integration -type StubIntegration struct{} - -// MetricsHandler returns an http.NotFoundHandler to satisfy the Integration interface -func (i *StubIntegration) MetricsHandler() (http.Handler, error) { - return http.NotFoundHandler(), nil -} - -// ScrapeConfigs returns an empty list of scrape configs, since there is nothing to scrape -func (i *StubIntegration) ScrapeConfigs() []config.ScrapeConfig { - return []config.ScrapeConfig{} -} - -// Run just waits for the context to finish -func (i *StubIntegration) Run(ctx context.Context) error { - <-ctx.Done() - return ctx.Err() -} diff --git a/internal/static/integrations/v2/app_agent_receiver/app_agent_receiver.go b/internal/static/integrations/v2/app_agent_receiver/app_agent_receiver.go index f1bdd00adb..9145115fd5 100644 --- a/internal/static/integrations/v2/app_agent_receiver/app_agent_receiver.go +++ b/internal/static/integrations/v2/app_agent_receiver/app_agent_receiver.go @@ -1,191 +1,17 @@ package app_agent_receiver //nolint:golint import ( - "context" "fmt" - "net/http" "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/gorilla/mux" "github.com/grafana/agent/internal/static/integrations/v2" - "github.com/grafana/agent/internal/static/integrations/v2/metricsutils" - "github.com/grafana/agent/internal/static/traces/pushreceiver" - "github.com/grafana/dskit/instrument" - "github.com/grafana/dskit/middleware" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" - "go.opentelemetry.io/collector/component" - "go.opentelemetry.io/collector/consumer" ) -type appAgentReceiverIntegration struct { - integrations.MetricsIntegration - appAgentReceiverHandler AppAgentReceiverHandler - logger log.Logger - conf *Config - reg prometheus.Registerer - - requestDurationCollector *prometheus.HistogramVec - receivedMessageSizeCollector *prometheus.HistogramVec - sentMessageSizeCollector *prometheus.HistogramVec - inflightRequestsCollector *prometheus.GaugeVec +func init() { + integrations.Register(&Config{}, integrations.TypeMultiplex) } -// Static typecheck tests -var ( - _ integrations.Integration = (*appAgentReceiverIntegration)(nil) - _ integrations.HTTPIntegration = (*appAgentReceiverIntegration)(nil) - _ integrations.MetricsIntegration = (*appAgentReceiverIntegration)(nil) -) - // NewIntegration converts this config into an instance of an integration func (c *Config) NewIntegration(l log.Logger, globals integrations.Globals) (integrations.Integration, error) { - reg := prometheus.NewRegistry() - sourcemapLogger := log.With(l, "subcomponent", "sourcemaps") - sourcemapStore := NewSourceMapStore(sourcemapLogger, c.SourceMaps, reg, nil, nil) - - receiverMetricsExporter := NewReceiverMetricsExporter(reg) - - var exp = []AppAgentReceiverExporter{ - receiverMetricsExporter, - } - - if len(c.LogsInstance) > 0 { - getLogsInstance := func() (logsInstance, error) { - instance := globals.Logs.Instance(c.LogsInstance) - if instance == nil { - return nil, fmt.Errorf("logs instance \"%s\" not found", c.LogsInstance) - } - return instance, nil - } - - if _, err := getLogsInstance(); err != nil { - return nil, err - } - - lokiExporter := NewLogsExporter( - l, - LogsExporterConfig{ - GetLogsInstance: getLogsInstance, - Labels: c.LogsLabels, - SendEntryTimeout: c.LogsSendTimeout, - }, - sourcemapStore, - ) - exp = append(exp, lokiExporter) - } - - if len(c.TracesInstance) > 0 { - getTracesConsumer := func() (consumer.Traces, error) { - tracesInstance := globals.Tracing.Instance(c.TracesInstance) - if tracesInstance == nil { - return nil, fmt.Errorf("traces instance \"%s\" not found", c.TracesInstance) - } - factory := tracesInstance.GetFactory(component.KindReceiver, pushreceiver.TypeStr) - if factory == nil { - return nil, fmt.Errorf("push receiver factory not found for traces instance \"%s\"", c.TracesInstance) - } - consumer := factory.(*pushreceiver.Factory).Consumer - if consumer == nil { - return nil, fmt.Errorf("consumer not set for push receiver factory on traces instance \"%s\"", c.TracesInstance) - } - return consumer, nil - } - if _, err := getTracesConsumer(); err != nil { - return nil, err - } - tracesExporter := NewTracesExporter(getTracesConsumer) - exp = append(exp, tracesExporter) - } - - handler := NewAppAgentReceiverHandler(c, exp, reg) - - metricsIntegration, err := metricsutils.NewMetricsHandlerIntegration(l, c, c.Common, globals, promhttp.HandlerFor(reg, promhttp.HandlerOpts{})) - if err != nil { - return nil, err - } - - requestDurationCollector := prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Name: "app_agent_receiver_request_duration_seconds", - Help: "Time (in seconds) spent serving HTTP requests.", - Buckets: instrument.DefBuckets, - }, []string{"method", "route", "status_code", "ws"}) - reg.MustRegister(requestDurationCollector) - - receivedMessageSizeCollector := prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Name: "app_agent_receiver_request_message_bytes", - Help: "Size (in bytes) of messages received in the request.", - Buckets: middleware.BodySizeBuckets, - }, []string{"method", "route"}) - reg.MustRegister(receivedMessageSizeCollector) - - sentMessageSizeCollector := prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Name: "app_agent_receiver_response_message_bytes", - Help: "Size (in bytes) of messages sent in response.", - Buckets: middleware.BodySizeBuckets, - }, []string{"method", "route"}) - reg.MustRegister(sentMessageSizeCollector) - - inflightRequestsCollector := prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "app_agent_receiver_inflight_requests", - Help: "Current number of inflight requests.", - }, []string{"method", "route"}) - reg.MustRegister(inflightRequestsCollector) - - return &appAgentReceiverIntegration{ - MetricsIntegration: metricsIntegration, - appAgentReceiverHandler: handler, - logger: l, - conf: c, - reg: reg, - - requestDurationCollector: requestDurationCollector, - receivedMessageSizeCollector: receivedMessageSizeCollector, - sentMessageSizeCollector: sentMessageSizeCollector, - inflightRequestsCollector: inflightRequestsCollector, - }, nil -} - -// RunIntegration implements Integration -func (i *appAgentReceiverIntegration) RunIntegration(ctx context.Context) error { - r := mux.NewRouter() - r.Handle("/collect", i.appAgentReceiverHandler.HTTPHandler(i.logger)).Methods("POST", "OPTIONS") - - mw := middleware.Instrument{ - RouteMatcher: r, - Duration: i.requestDurationCollector, - RequestBodySize: i.receivedMessageSizeCollector, - ResponseBodySize: i.sentMessageSizeCollector, - InflightRequests: i.inflightRequestsCollector, - } - - srv := &http.Server{ - Addr: fmt.Sprintf("%s:%d", i.conf.Server.Host, i.conf.Server.Port), - Handler: mw.Wrap(r), - } - errChan := make(chan error, 1) - - go func() { - level.Info(i.logger).Log("msg", "starting app agent receiver", "host", i.conf.Server.Host, "port", i.conf.Server.Port) - if err := srv.ListenAndServe(); err != http.ErrServerClosed { - errChan <- err - } - }() - - select { - case <-ctx.Done(): - if err := srv.Shutdown(ctx); err != nil { - return err - } - case err := <-errChan: - close(errChan) - return err - } - - return nil -} - -func init() { - integrations.Register(&Config{}, integrations.TypeMultiplex) + return nil, fmt.Errorf("app_agent_receiver integration code has been replaced by faro.receiver component") } diff --git a/internal/static/integrations/v2/app_agent_receiver/app_agent_receiver_test.go b/internal/static/integrations/v2/app_agent_receiver/app_agent_receiver_test.go deleted file mode 100644 index f44db4c038..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/app_agent_receiver_test.go +++ /dev/null @@ -1,169 +0,0 @@ -package app_agent_receiver - -import ( - "bytes" - "context" - "fmt" - "io" - "net/http" - "strings" - "testing" - "time" - - "github.com/grafana/agent/internal/static/integrations/v2" - "github.com/grafana/agent/internal/static/server" - "github.com/grafana/agent/internal/static/traces" - "github.com/grafana/agent/internal/static/traces/traceutils" - "github.com/grafana/agent/internal/util" - "github.com/phayes/freeport" - "github.com/prometheus/client_golang/prometheus" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "go.opentelemetry.io/collector/pdata/ptrace" - "gopkg.in/yaml.v2" -) - -func Test_ReceiveTracesAndRemoteWrite(t *testing.T) { - var err error - - // - // Prepare the traces instance - // - tracesCh := make(chan ptrace.Traces) - tracesAddr := traceutils.NewTestServer(t, func(t ptrace.Traces) { - tracesCh <- t - }) - - tracesCfgText := util.Untab(fmt.Sprintf(` -configs: -- name: TEST_TRACES - receivers: - jaeger: - protocols: - thrift_compact: - remote_write: - - endpoint: %s - insecure: true - batch: - timeout: 100ms - send_batch_size: 1 - `, tracesAddr)) - - var tracesCfg traces.Config - dec := yaml.NewDecoder(strings.NewReader(tracesCfgText)) - dec.SetStrict(true) - err = dec.Decode(&tracesCfg) - require.NoError(t, err) - - traces, err := traces.New(nil, nil, prometheus.NewRegistry(), tracesCfg, &server.HookLogger{}) - require.NoError(t, err) - t.Cleanup(traces.Stop) - - // - // Prepare the app_agent_receiver integration - // - integrationPort, err := freeport.GetFreePort() - require.NoError(t, err) - - var integrationCfg Config - cb := fmt.Sprintf(` -instance: TEST_APP_AGENT_RECEIVER -server: - cors_allowed_origins: - - '*' - host: '0.0.0.0' - max_allowed_payload_size: 5e+07 - port: %d - rate_limiting: - burstiness: 100 - enabled: true - rps: 100 -sourcemaps: - download: true -traces_instance: TEST_TRACES -`, integrationPort) - err = yaml.Unmarshal([]byte(cb), &integrationCfg) - require.NoError(t, err) - - logger := util.TestLogger(t) - globals := integrations.Globals{ - Tracing: traces, - } - - integration, err := integrationCfg.NewIntegration(logger, globals) - require.NoError(t, err) - - ctx := context.Background() - t.Cleanup(func() { ctx.Done() }) - // - // Start the app_agent_receiver integration - // - go func() { - err = integration.RunIntegration(ctx) - require.NoError(t, err) - }() - - // - // Send data to the integration's /collect endpoint - // - const PAYLOAD = ` -{ - "traces": { - "resourceSpans": [{ - "scopeSpans": [{ - "spans": [{ - "name": "TestSpan", - "attributes": [{ - "key": "foo", - "value": { "intValue": "11111" } - }, - { - "key": "boo", - "value": { "intValue": "22222" } - }, - { - "key": "user.email", - "value": { "stringValue": "user@email.com" } - }] - }] - }] - }] - }, - "logs": [], - "exceptions": [], - "measurements": [], - "meta": {} -} -` - - integrationURL := fmt.Sprintf("http://127.0.0.1:%d/collect", integrationPort) - - var httpResponse *http.Response - require.EventuallyWithT(t, func(c *assert.CollectT) { - req, err := http.NewRequest("POST", integrationURL, bytes.NewBuffer([]byte(PAYLOAD))) - assert.NoError(c, err) - - httpResponse, err = http.DefaultClient.Do(req) - assert.NoError(c, err) - }, 5*time.Second, 250*time.Millisecond) - - // - // Check that the data was received by the integration - // - resBody, err := io.ReadAll(httpResponse.Body) - require.NoError(t, err) - require.Equal(t, "ok", string(resBody[:])) - - require.Equal(t, http.StatusAccepted, httpResponse.StatusCode) - - // - // Check that the traces subsystem remote wrote the integration - // - select { - case <-time.After(10 * time.Second): - require.Fail(t, "failed to receive a span after 10 seconds") - case tr := <-tracesCh: - require.Equal(t, 1, tr.SpanCount()) - // Nothing to do, send succeeded. - } -} diff --git a/internal/static/integrations/v2/app_agent_receiver/handler.go b/internal/static/integrations/v2/app_agent_receiver/handler.go deleted file mode 100644 index c430e90993..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/handler.go +++ /dev/null @@ -1,126 +0,0 @@ -package app_agent_receiver - -import ( - "context" - "sync" - - "crypto/subtle" - "encoding/json" - "net/http" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/prometheus/client_golang/prometheus" - "github.com/rs/cors" - "golang.org/x/time/rate" -) - -const apiKeyHeader = "x-api-key" - -type AppAgentReceiverExporter interface { - Name() string - Export(ctx context.Context, payload Payload) error -} - -// AppAgentReceiverHandler struct controls the data ingestion http handler of the receiver -type AppAgentReceiverHandler struct { - exporters []AppAgentReceiverExporter - config *Config - rateLimiter *rate.Limiter - exporterErrorsCollector *prometheus.CounterVec -} - -// NewAppAgentReceiverHandler creates a new AppReceiver instance based on the given configuration -func NewAppAgentReceiverHandler(conf *Config, exporters []AppAgentReceiverExporter, reg prometheus.Registerer) AppAgentReceiverHandler { - var rateLimiter *rate.Limiter - if conf.Server.RateLimiting.Enabled { - var rps float64 - if conf.Server.RateLimiting.RPS > 0 { - rps = conf.Server.RateLimiting.RPS - } - - var b int - if conf.Server.RateLimiting.Burstiness > 0 { - b = conf.Server.RateLimiting.Burstiness - } - rateLimiter = rate.NewLimiter(rate.Limit(rps), b) - } - - exporterErrorsCollector := prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "app_agent_receiver_exporter_errors_total", - Help: "Total number of errors produced by a receiver exporter", - }, []string{"exporter"}) - - reg.MustRegister(exporterErrorsCollector) - - return AppAgentReceiverHandler{ - exporters: exporters, - config: conf, - rateLimiter: rateLimiter, - exporterErrorsCollector: exporterErrorsCollector, - } -} - -// HTTPHandler is the http.Handler for the receiver. It will do the following -// 0. Enable CORS for the configured hosts -// 1. Check if the request should be rate limited -// 2. Verify that the payload size is within limits -// 3. Start two go routines for exporters processing and exporting data respectively -// 4. Respond with 202 once all the work is done -func (ar *AppAgentReceiverHandler) HTTPHandler(logger log.Logger) http.Handler { - var handler http.Handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - // Check rate limiting state - if ar.config.Server.RateLimiting.Enabled { - if ok := ar.rateLimiter.Allow(); !ok { - http.Error(w, http.StatusText(http.StatusTooManyRequests), http.StatusTooManyRequests) - return - } - } - - // check API key if one is provided - if len(ar.config.Server.APIKey) > 0 && subtle.ConstantTimeCompare([]byte(r.Header.Get(apiKeyHeader)), []byte(ar.config.Server.APIKey)) == 0 { - http.Error(w, "api key not provided or incorrect", http.StatusUnauthorized) - return - } - - // Verify content length. We trust net/http to give us the correct number - if ar.config.Server.MaxAllowedPayloadSize > 0 && r.ContentLength > ar.config.Server.MaxAllowedPayloadSize { - http.Error(w, http.StatusText(http.StatusRequestEntityTooLarge), http.StatusRequestEntityTooLarge) - return - } - - var p Payload - err := json.NewDecoder(r.Body).Decode(&p) - if err != nil { - http.Error(w, err.Error(), http.StatusBadRequest) - return - } - - var wg sync.WaitGroup - - for _, exporter := range ar.exporters { - wg.Add(1) - go func(exp AppAgentReceiverExporter) { - defer wg.Done() - if err := exp.Export(r.Context(), p); err != nil { - level.Error(logger).Log("msg", "exporter error", "exporter", exp.Name(), "error", err) - ar.exporterErrorsCollector.WithLabelValues(exp.Name()).Inc() - } - }(exporter) - } - - wg.Wait() - w.WriteHeader(http.StatusAccepted) - _, _ = w.Write([]byte("ok")) - }) - - if len(ar.config.Server.CORSAllowedOrigins) > 0 { - c := cors.New(cors.Options{ - AllowedOrigins: ar.config.Server.CORSAllowedOrigins, - AllowedHeaders: []string{apiKeyHeader, "content-type", "x-faro-session-id"}, - }) - handler = c.Handler(handler) - } - - return handler -} diff --git a/internal/static/integrations/v2/app_agent_receiver/handler_test.go b/internal/static/integrations/v2/app_agent_receiver/handler_test.go deleted file mode 100644 index ac0e5438c8..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/handler_test.go +++ /dev/null @@ -1,356 +0,0 @@ -package app_agent_receiver - -import ( - "bytes" - "context" - "errors" - "net/http" - "net/http/httptest" - "testing" - - "github.com/go-kit/log" - "github.com/stretchr/testify/require" - - "github.com/prometheus/client_golang/prometheus" -) - -const PAYLOAD = ` -{ - "traces": { - "resourceSpans": [] - }, - "logs": [], - "exceptions": [], - "measurements": [], - "meta": {} -} -` - -type TestExporter struct { - name string - broken bool - payloads []Payload -} - -func (te *TestExporter) Name() string { - return te.name -} - -func (te *TestExporter) Export(ctx context.Context, payload Payload) error { - if te.broken { - return errors.New("this exporter is broken") - } - te.payloads = append(te.payloads, payload) - return nil -} - -func TestMultipleExportersAllSucceed(t *testing.T) { - req, err := http.NewRequest("POST", "/collect", bytes.NewBuffer([]byte(PAYLOAD))) - - reg := prometheus.NewRegistry() - - require.NoError(t, err) - - exporter1 := TestExporter{ - name: "exporter1", - broken: false, - payloads: []Payload{}, - } - exporter2 := TestExporter{ - name: "exporter2", - broken: false, - payloads: []Payload{}, - } - - conf := &Config{} - - fr := NewAppAgentReceiverHandler(conf, []AppAgentReceiverExporter{&exporter1, &exporter2}, reg) - handler := fr.HTTPHandler(log.NewNopLogger()) - - rr := httptest.NewRecorder() - - handler.ServeHTTP(rr, req) - - require.Equal(t, http.StatusAccepted, rr.Result().StatusCode) - - require.Len(t, exporter1.payloads, 1) - require.Len(t, exporter2.payloads, 1) -} - -func TestMultipleExportersOneFails(t *testing.T) { - req, err := http.NewRequest("POST", "/collect", bytes.NewBuffer([]byte(PAYLOAD))) - - require.NoError(t, err) - - reg := prometheus.NewRegistry() - - exporter1 := TestExporter{ - name: "exporter1", - broken: true, - payloads: []Payload{}, - } - exporter2 := TestExporter{ - name: "exporter2", - broken: false, - payloads: []Payload{}, - } - - conf := &Config{} - - fr := NewAppAgentReceiverHandler(conf, []AppAgentReceiverExporter{&exporter1, &exporter2}, reg) - handler := fr.HTTPHandler(log.NewNopLogger()) - - rr := httptest.NewRecorder() - - handler.ServeHTTP(rr, req) - - metrics, err := reg.Gather() - require.NoError(t, err) - - metric := metrics[0] - require.Equal(t, "app_agent_receiver_exporter_errors_total", *metric.Name) - require.Len(t, metric.Metric, 1) - require.Equal(t, 1.0, *metric.Metric[0].Counter.Value) - require.Len(t, metric.Metric[0].Label, 1) - require.Equal(t, *metric.Metric[0].Label[0].Value, "exporter1") - require.Len(t, metrics, 1) - require.Equal(t, http.StatusAccepted, rr.Result().StatusCode) - require.Len(t, exporter1.payloads, 0) - require.Len(t, exporter2.payloads, 1) -} - -func TestMultipleExportersAllFail(t *testing.T) { - req, err := http.NewRequest("POST", "/collect", bytes.NewBuffer([]byte(PAYLOAD))) - - reg := prometheus.NewRegistry() - - require.NoError(t, err) - - exporter1 := TestExporter{ - name: "exporter1", - broken: true, - payloads: []Payload{}, - } - exporter2 := TestExporter{ - name: "exporter2", - broken: true, - payloads: []Payload{}, - } - - conf := &Config{} - - fr := NewAppAgentReceiverHandler(conf, []AppAgentReceiverExporter{&exporter1, &exporter2}, reg) - handler := fr.HTTPHandler(log.NewNopLogger()) - - rr := httptest.NewRecorder() - - handler.ServeHTTP(rr, req) - - metrics, err := reg.Gather() - require.NoError(t, err) - - require.Len(t, metrics, 1) - metric := metrics[0] - - require.Equal(t, "app_agent_receiver_exporter_errors_total", *metric.Name) - require.Len(t, metric.Metric, 2) - require.Equal(t, 1.0, *metric.Metric[0].Counter.Value) - require.Equal(t, 1.0, *metric.Metric[1].Counter.Value) - require.Len(t, metric.Metric[0].Label, 1) - require.Len(t, metric.Metric[1].Label, 1) - require.Equal(t, *metric.Metric[0].Label[0].Value, "exporter1") - require.Equal(t, *metric.Metric[1].Label[0].Value, "exporter2") - require.Equal(t, http.StatusAccepted, rr.Result().StatusCode) - require.Len(t, exporter1.payloads, 0) - require.Len(t, exporter2.payloads, 0) -} - -func TestNoContentLengthLimitSet(t *testing.T) { - req, err := http.NewRequest("POST", "/collect", bytes.NewBuffer([]byte(PAYLOAD))) - require.NoError(t, err) - reg := prometheus.NewRegistry() - - conf := &Config{} - - req.ContentLength = 89348593894 - - fr := NewAppAgentReceiverHandler(conf, []AppAgentReceiverExporter{}, reg) - handler := fr.HTTPHandler(nil) - - rr := httptest.NewRecorder() - - handler.ServeHTTP(rr, req) - - require.Equal(t, http.StatusAccepted, rr.Result().StatusCode) -} - -func TestLargePayload(t *testing.T) { - req, err := http.NewRequest("POST", "/collect", bytes.NewBuffer([]byte(PAYLOAD))) - require.NoError(t, err) - reg := prometheus.NewRegistry() - - conf := &Config{ - Server: ServerConfig{ - MaxAllowedPayloadSize: 10, - }, - } - - fr := NewAppAgentReceiverHandler(conf, []AppAgentReceiverExporter{}, reg) - handler := fr.HTTPHandler(nil) - - rr := httptest.NewRecorder() - - handler.ServeHTTP(rr, req) - require.Equal(t, http.StatusRequestEntityTooLarge, rr.Result().StatusCode) -} - -func TestAPIKeyRequiredButNotProvided(t *testing.T) { - req, err := http.NewRequest("POST", "/collect", bytes.NewBuffer([]byte(PAYLOAD))) - - if err != nil { - t.Fatal(err) - } - - conf := &Config{ - Server: ServerConfig{ - APIKey: "foo", - }, - } - - fr := NewAppAgentReceiverHandler(conf, nil, prometheus.NewRegistry()) - handler := fr.HTTPHandler(nil) - - rr := httptest.NewRecorder() - - handler.ServeHTTP(rr, req) - require.Equal(t, http.StatusUnauthorized, rr.Result().StatusCode) -} - -func TestAPIKeyWrong(t *testing.T) { - req, err := http.NewRequest("POST", "/collect", bytes.NewBuffer([]byte(PAYLOAD))) - req.Header.Set("x-api-key", "bar") - - if err != nil { - t.Fatal(err) - } - - conf := &Config{ - Server: ServerConfig{ - APIKey: "foo", - }, - } - - fr := NewAppAgentReceiverHandler(conf, nil, prometheus.NewRegistry()) - handler := fr.HTTPHandler(nil) - - rr := httptest.NewRecorder() - - handler.ServeHTTP(rr, req) - require.Equal(t, http.StatusUnauthorized, rr.Result().StatusCode) -} - -func TestAPIKeyCorrect(t *testing.T) { - req, err := http.NewRequest("POST", "/collect", bytes.NewBuffer([]byte(PAYLOAD))) - req.Header.Set("x-api-key", "foo") - - if err != nil { - t.Fatal(err) - } - - conf := &Config{ - Server: ServerConfig{ - APIKey: "foo", - }, - } - - fr := NewAppAgentReceiverHandler(conf, nil, prometheus.NewRegistry()) - handler := fr.HTTPHandler(nil) - - rr := httptest.NewRecorder() - - handler.ServeHTTP(rr, req) - require.Equal(t, http.StatusAccepted, rr.Result().StatusCode) -} - -func TestRateLimiterNoReject(t *testing.T) { - req, err := http.NewRequest("POST", "/collect", bytes.NewBuffer([]byte(PAYLOAD))) - - if err != nil { - t.Fatal(err) - } - - conf := &Config{ - Server: ServerConfig{ - RateLimiting: RateLimitingConfig{ - Burstiness: 10, - RPS: 10, - Enabled: true, - }, - }, - } - - fr := NewAppAgentReceiverHandler(conf, nil, prometheus.NewRegistry()) - handler := fr.HTTPHandler(nil) - - rr := httptest.NewRecorder() - - handler.ServeHTTP(rr, req) - require.Equal(t, http.StatusAccepted, rr.Result().StatusCode) -} - -func TestRateLimiterReject(t *testing.T) { - conf := &Config{ - Server: ServerConfig{ - RateLimiting: RateLimitingConfig{ - Burstiness: 2, - RPS: 1, - Enabled: true, - }, - }, - } - - fr := NewAppAgentReceiverHandler(conf, nil, prometheus.NewRegistry()) - handler := fr.HTTPHandler(nil) - - makeRequest := func() *httptest.ResponseRecorder { - req, err := http.NewRequest("POST", "/collect", bytes.NewBuffer([]byte(PAYLOAD))) - require.NoError(t, err) - rr := httptest.NewRecorder() - handler.ServeHTTP(rr, req) - return rr - } - - r1 := makeRequest() - r2 := makeRequest() - r3 := makeRequest() - - require.Equal(t, http.StatusAccepted, r1.Result().StatusCode) - require.Equal(t, http.StatusAccepted, r2.Result().StatusCode) - require.Equal(t, http.StatusTooManyRequests, r3.Result().StatusCode) -} - -func TestRateLimiterDisabled(t *testing.T) { - req, err := http.NewRequest("POST", "/collect", bytes.NewBuffer([]byte(PAYLOAD))) - - if err != nil { - t.Fatal(err) - } - - conf := &Config{ - Server: ServerConfig{ - RateLimiting: RateLimitingConfig{ - Burstiness: 0, - RPS: 0, - Enabled: false, - }, - }, - } - - fr := NewAppAgentReceiverHandler(conf, nil, prometheus.NewRegistry()) - handler := fr.HTTPHandler(nil) - - rr := httptest.NewRecorder() - - handler.ServeHTTP(rr, req) - require.Equal(t, http.StatusAccepted, rr.Result().StatusCode) -} diff --git a/internal/static/integrations/v2/app_agent_receiver/logs_exporter.go b/internal/static/integrations/v2/app_agent_receiver/logs_exporter.go deleted file mode 100644 index 31295a5060..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/logs_exporter.go +++ /dev/null @@ -1,140 +0,0 @@ -package app_agent_receiver - -import ( - "context" - "fmt" - "time" - - kitlog "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/go-logfmt/logfmt" - "github.com/grafana/agent/internal/static/logs" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/pkg/logproto" - prommodel "github.com/prometheus/common/model" -) - -// logsInstance is an interface with capability to send log entries -type logsInstance interface { - SendEntry(entry api.Entry, dur time.Duration) bool -} - -// logsInstanceGetter is a function that returns a LogsInstance to send log entries to -type logsInstanceGetter func() (logsInstance, error) - -// LogsExporterConfig holds the configuration of the logs exporter -type LogsExporterConfig struct { - SendEntryTimeout time.Duration - GetLogsInstance logsInstanceGetter - Labels map[string]string -} - -// LogsExporter will send logs & errors to loki -type LogsExporter struct { - getLogsInstance logsInstanceGetter - sendEntryTimeout time.Duration - logger kitlog.Logger - labels map[string]string - sourceMapStore SourceMapStore -} - -// NewLogsExporter creates a new logs exporter with the given -// configuration -func NewLogsExporter(logger kitlog.Logger, conf LogsExporterConfig, sourceMapStore SourceMapStore) AppAgentReceiverExporter { - return &LogsExporter{ - logger: logger, - getLogsInstance: conf.GetLogsInstance, - sendEntryTimeout: conf.SendEntryTimeout, - labels: conf.Labels, - sourceMapStore: sourceMapStore, - } -} - -// Name of the exporter, for logging purposes -func (le *LogsExporter) Name() string { - return "logs exporter" -} - -// Export implements the AppDataExporter interface -func (le *LogsExporter) Export(ctx context.Context, payload Payload) error { - meta := payload.Meta.KeyVal() - - var err error - - // log events - for _, logItem := range payload.Logs { - kv := logItem.KeyVal() - MergeKeyVal(kv, meta) - err = le.sendKeyValsToLogsPipeline(kv) - } - - // exceptions - for _, exception := range payload.Exceptions { - transformedException := TransformException(le.sourceMapStore, le.logger, &exception, payload.Meta.App.Release) - kv := transformedException.KeyVal() - MergeKeyVal(kv, meta) - err = le.sendKeyValsToLogsPipeline(kv) - } - - // measurements - for _, measurement := range payload.Measurements { - kv := measurement.KeyVal() - MergeKeyVal(kv, meta) - err = le.sendKeyValsToLogsPipeline(kv) - } - - // events - for _, event := range payload.Events { - kv := event.KeyVal() - MergeKeyVal(kv, meta) - err = le.sendKeyValsToLogsPipeline(kv) - } - - return err -} - -func (le *LogsExporter) sendKeyValsToLogsPipeline(kv *KeyVal) error { - line, err := logfmt.MarshalKeyvals(KeyValToInterfaceSlice(kv)...) - if err != nil { - level.Error(le.logger).Log("msg", "failed to logfmt a frontend log event", "err", err) - return err - } - instance, err := le.getLogsInstance() - if err != nil { - return err - } - sent := instance.SendEntry(api.Entry{ - Labels: le.labelSet(kv), - Entry: logproto.Entry{ - Timestamp: time.Now(), - Line: string(line), - }, - }, le.sendEntryTimeout) - if !sent { - level.Warn(le.logger).Log("msg", "failed to log frontend log event to logs pipeline") - return fmt.Errorf("failed to send app event to logs pipeline") - } - return nil -} - -func (le *LogsExporter) labelSet(kv *KeyVal) prommodel.LabelSet { - set := make(prommodel.LabelSet, len(le.labels)) - - for k, v := range le.labels { - if len(v) > 0 { - set[prommodel.LabelName(k)] = prommodel.LabelValue(v) - } else { - if val, ok := kv.Get(k); ok { - set[prommodel.LabelName(k)] = prommodel.LabelValue(fmt.Sprint(val)) - } - } - } - - return set -} - -// Static typecheck tests -var ( - _ AppAgentReceiverExporter = (*LogsExporter)(nil) - _ logsInstance = (*logs.Instance)(nil) -) diff --git a/internal/static/integrations/v2/app_agent_receiver/logs_exporter_test.go b/internal/static/integrations/v2/app_agent_receiver/logs_exporter_test.go deleted file mode 100644 index 784e2c85bf..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/logs_exporter_test.go +++ /dev/null @@ -1,120 +0,0 @@ -package app_agent_receiver - -import ( - "context" - "encoding/json" - "os" - "testing" - "time" - - kitlog "github.com/go-kit/log" - "github.com/grafana/loki/clients/pkg/promtail/api" - prommodel "github.com/prometheus/common/model" - - "github.com/stretchr/testify/require" -) - -func loadTestPayload(t *testing.T) Payload { - t.Helper() - // Safe to disable, this is a test. - // nolint:gosec - content, err := os.ReadFile("./testdata/payload.json") - require.NoError(t, err, "expected to be able to read file") - require.True(t, len(content) > 0) - var payload Payload - err = json.Unmarshal(content, &payload) - require.NoError(t, err) - return payload -} - -type testLogsInstance struct { - Entries []api.Entry -} - -func (i *testLogsInstance) SendEntry(entry api.Entry, dur time.Duration) bool { - i.Entries = append(i.Entries, entry) - return true -} - -type MockSourceMapStore struct{} - -func (store *MockSourceMapStore) GetSourceMap(sourceURL string, release string) (*SourceMap, error) { - return nil, nil -} - -func TestExportLogs(t *testing.T) { - ctx := context.Background() - inst := &testLogsInstance{ - Entries: []api.Entry{}, - } - - logger := kitlog.NewNopLogger() - - logsExporter := NewLogsExporter( - logger, - LogsExporterConfig{ - GetLogsInstance: func() (logsInstance, error) { return inst, nil }, - Labels: map[string]string{ - "app": "frontend", - "kind": "", - }, - SendEntryTimeout: 100, - }, - &MockSourceMapStore{}, - ) - - payload := loadTestPayload(t) - - err := logsExporter.Export(ctx, payload) - require.NoError(t, err) - - require.Len(t, inst.Entries, 6) - - // log1 - require.Equal(t, prommodel.LabelSet{ - prommodel.LabelName("app"): prommodel.LabelValue("frontend"), - prommodel.LabelName("kind"): prommodel.LabelValue("log"), - }, inst.Entries[0].Labels) - expectedLine := "timestamp=\"2021-09-30 10:46:17.68 +0000 UTC\" kind=log message=\"opened pricing page\" level=info context_component=AppRoot context_page=Pricing traceID=abcd spanID=def sdk_name=grafana-frontend-agent sdk_version=1.0.0 app_name=testapp app_release=0.8.2 app_version=abcdefg app_environment=production user_email=geralt@kaermorhen.org user_id=123 user_username=domasx2 user_attr_foo=bar session_id=abcd session_attr_time_elapsed=100s page_url=https://example.com/page browser_name=chrome browser_version=88.12.1 browser_os=linux browser_mobile=false view_name=foobar" - require.Equal(t, expectedLine, inst.Entries[0].Line) - - // log2 - require.Equal(t, prommodel.LabelSet{ - prommodel.LabelName("app"): prommodel.LabelValue("frontend"), - prommodel.LabelName("kind"): prommodel.LabelValue("log"), - }, inst.Entries[1].Labels) - expectedLine = "timestamp=\"2021-09-30 10:46:17.68 +0000 UTC\" kind=log message=\"loading price list\" level=trace context_component=AppRoot context_page=Pricing traceID=abcd spanID=ghj sdk_name=grafana-frontend-agent sdk_version=1.0.0 app_name=testapp app_release=0.8.2 app_version=abcdefg app_environment=production user_email=geralt@kaermorhen.org user_id=123 user_username=domasx2 user_attr_foo=bar session_id=abcd session_attr_time_elapsed=100s page_url=https://example.com/page browser_name=chrome browser_version=88.12.1 browser_os=linux browser_mobile=false view_name=foobar" - require.Equal(t, expectedLine, inst.Entries[1].Line) - - // exception - require.Equal(t, prommodel.LabelSet{ - prommodel.LabelName("app"): prommodel.LabelValue("frontend"), - prommodel.LabelName("kind"): prommodel.LabelValue("exception"), - }, inst.Entries[2].Labels) - expectedLine = "timestamp=\"2021-09-30 10:46:17.68 +0000 UTC\" kind=exception type=Error value=\"Cannot read property 'find' of undefined\" stacktrace=\"Error: Cannot read property 'find' of undefined\\n at ? (http://fe:3002/static/js/vendors~main.chunk.js:8639:42)\\n at dispatchAction (http://fe:3002/static/js/vendors~main.chunk.js:268095:9)\\n at scheduleUpdateOnFiber (http://fe:3002/static/js/vendors~main.chunk.js:273726:13)\\n at flushSyncCallbackQueue (http://fe:3002/static/js/vendors~main.chunk.js:263362:7)\\n at flushSyncCallbackQueueImpl (http://fe:3002/static/js/vendors~main.chunk.js:263374:13)\\n at runWithPriority$1 (http://fe:3002/static/js/vendors~main.chunk.js:263325:14)\\n at unstable_runWithPriority (http://fe:3002/static/js/vendors~main.chunk.js:291265:16)\\n at ? (http://fe:3002/static/js/vendors~main.chunk.js:263379:30)\\n at performSyncWorkOnRoot (http://fe:3002/static/js/vendors~main.chunk.js:274126:22)\\n at renderRootSync (http://fe:3002/static/js/vendors~main.chunk.js:274509:11)\\n at workLoopSync (http://fe:3002/static/js/vendors~main.chunk.js:274543:9)\\n at performUnitOfWork (http://fe:3002/static/js/vendors~main.chunk.js:274606:16)\\n at beginWork$1 (http://fe:3002/static/js/vendors~main.chunk.js:275746:18)\\n at beginWork (http://fe:3002/static/js/vendors~main.chunk.js:270944:20)\\n at updateFunctionComponent (http://fe:3002/static/js/vendors~main.chunk.js:269291:24)\\n at renderWithHooks (http://fe:3002/static/js/vendors~main.chunk.js:266969:22)\\n at ? (http://fe:3002/static/js/main.chunk.js:2600:74)\\n at useGetBooksQuery (http://fe:3002/static/js/main.chunk.js:1299:65)\\n at Module.useQuery (http://fe:3002/static/js/vendors~main.chunk.js:8495:85)\\n at useBaseQuery (http://fe:3002/static/js/vendors~main.chunk.js:8656:83)\\n at useDeepMemo (http://fe:3002/static/js/vendors~main.chunk.js:8696:14)\\n at ? (http://fe:3002/static/js/vendors~main.chunk.js:8657:55)\\n at QueryData.execute (http://fe:3002/static/js/vendors~main.chunk.js:7883:47)\\n at QueryData.getExecuteResult (http://fe:3002/static/js/vendors~main.chunk.js:7944:23)\\n at QueryData._this.getQueryResult (http://fe:3002/static/js/vendors~main.chunk.js:7790:19)\\n at new ApolloError (http://fe:3002/static/js/vendors~main.chunk.js:5164:24)\" hash=2735541995122471342 sdk_name=grafana-frontend-agent sdk_version=1.0.0 app_name=testapp app_release=0.8.2 app_version=abcdefg app_environment=production user_email=geralt@kaermorhen.org user_id=123 user_username=domasx2 user_attr_foo=bar session_id=abcd session_attr_time_elapsed=100s page_url=https://example.com/page browser_name=chrome browser_version=88.12.1 browser_os=linux browser_mobile=false view_name=foobar" - require.Equal(t, expectedLine, inst.Entries[2].Line) - - // measurement - require.Equal(t, prommodel.LabelSet{ - prommodel.LabelName("app"): prommodel.LabelValue("frontend"), - prommodel.LabelName("kind"): prommodel.LabelValue("measurement"), - }, inst.Entries[3].Labels) - expectedLine = "timestamp=\"2021-09-30 10:46:17.68 +0000 UTC\" kind=measurement type=foobar ttfb=14.000000 ttfcp=22.120000 ttfp=20.120000 traceID=abcd spanID=def context_hello=world sdk_name=grafana-frontend-agent sdk_version=1.0.0 app_name=testapp app_release=0.8.2 app_version=abcdefg app_environment=production user_email=geralt@kaermorhen.org user_id=123 user_username=domasx2 user_attr_foo=bar session_id=abcd session_attr_time_elapsed=100s page_url=https://example.com/page browser_name=chrome browser_version=88.12.1 browser_os=linux browser_mobile=false view_name=foobar" - require.Equal(t, expectedLine, inst.Entries[3].Line) - - // event 1 - require.Equal(t, prommodel.LabelSet{ - prommodel.LabelName("app"): prommodel.LabelValue("frontend"), - prommodel.LabelName("kind"): prommodel.LabelValue("event"), - }, inst.Entries[4].Labels) - expectedLine = "timestamp=\"2021-09-30 10:46:17.68 +0000 UTC\" kind=event event_name=click_login_button event_domain=frontend event_data_foo=bar event_data_one=two traceID=abcd spanID=def sdk_name=grafana-frontend-agent sdk_version=1.0.0 app_name=testapp app_release=0.8.2 app_version=abcdefg app_environment=production user_email=geralt@kaermorhen.org user_id=123 user_username=domasx2 user_attr_foo=bar session_id=abcd session_attr_time_elapsed=100s page_url=https://example.com/page browser_name=chrome browser_version=88.12.1 browser_os=linux browser_mobile=false view_name=foobar" - require.Equal(t, expectedLine, inst.Entries[4].Line) - - // event 2 - require.Equal(t, prommodel.LabelSet{ - prommodel.LabelName("app"): prommodel.LabelValue("frontend"), - prommodel.LabelName("kind"): prommodel.LabelValue("event"), - }, inst.Entries[5].Labels) - expectedLine = "timestamp=\"2021-09-30 10:46:17.68 +0000 UTC\" kind=event event_name=click_reset_password_button sdk_name=grafana-frontend-agent sdk_version=1.0.0 app_name=testapp app_release=0.8.2 app_version=abcdefg app_environment=production user_email=geralt@kaermorhen.org user_id=123 user_username=domasx2 user_attr_foo=bar session_id=abcd session_attr_time_elapsed=100s page_url=https://example.com/page browser_name=chrome browser_version=88.12.1 browser_os=linux browser_mobile=false view_name=foobar" - require.Equal(t, expectedLine, inst.Entries[5].Line) -} diff --git a/internal/static/integrations/v2/app_agent_receiver/payload.go b/internal/static/integrations/v2/app_agent_receiver/payload.go deleted file mode 100644 index ca91a8842d..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/payload.go +++ /dev/null @@ -1,420 +0,0 @@ -package app_agent_receiver - -import ( - "fmt" - "sort" - "strconv" - "strings" - "time" - - "go.opentelemetry.io/collector/pdata/pcommon" - "go.opentelemetry.io/collector/pdata/ptrace" - - "github.com/zeebo/xxh3" -) - -// Payload is the body of the receiver request -type Payload struct { - Exceptions []Exception `json:"exceptions,omitempty"` - Logs []Log `json:"logs,omitempty"` - Measurements []Measurement `json:"measurements,omitempty"` - Events []Event `json:"events,omitempty"` - Meta Meta `json:"meta,omitempty"` - Traces *Traces `json:"traces,omitempty"` -} - -// Frame struct represents a single stacktrace frame -type Frame struct { - Function string `json:"function,omitempty"` - Module string `json:"module,omitempty"` - Filename string `json:"filename,omitempty"` - Lineno int `json:"lineno,omitempty"` - Colno int `json:"colno,omitempty"` -} - -// String function converts a Frame into a human readable string -func (frame Frame) String() string { - module := "" - if len(frame.Module) > 0 { - module = frame.Module + "|" - } - return fmt.Sprintf("\n at %s (%s%s:%v:%v)", frame.Function, module, frame.Filename, frame.Lineno, frame.Colno) -} - -// Stacktrace is a collection of Frames -type Stacktrace struct { - Frames []Frame `json:"frames,omitempty"` -} - -// Exception struct controls all the data regarding an exception -type Exception struct { - Type string `json:"type,omitempty"` - Value string `json:"value,omitempty"` - Stacktrace *Stacktrace `json:"stacktrace,omitempty"` - Timestamp time.Time `json:"timestamp"` - Trace TraceContext `json:"trace,omitempty"` - Context ExceptionContext `json:"context,omitempty"` -} - -// Message string is concatenating of the Exception.Type and Exception.Value -func (e Exception) Message() string { - return fmt.Sprintf("%s: %s", e.Type, e.Value) -} - -// String is the string representation of an Exception -func (e Exception) String() string { - var stacktrace = e.Message() - if e.Stacktrace != nil { - for _, frame := range e.Stacktrace.Frames { - stacktrace += frame.String() - } - } - return stacktrace -} - -// KeyVal representation of the exception object -func (e Exception) KeyVal() *KeyVal { - kv := NewKeyVal() - KeyValAdd(kv, "timestamp", e.Timestamp.String()) - KeyValAdd(kv, "kind", "exception") - KeyValAdd(kv, "type", e.Type) - KeyValAdd(kv, "value", e.Value) - KeyValAdd(kv, "stacktrace", e.String()) - KeyValAdd(kv, "hash", strconv.FormatUint(xxh3.HashString(e.Value), 10)) - MergeKeyValWithPrefix(kv, KeyValFromMap(e.Context), "context_") - MergeKeyVal(kv, e.Trace.KeyVal()) - return kv -} - -// ExceptionContext is a string to string map structure that -// represents the context of an exception -type ExceptionContext map[string]string - -// TraceContext holds trace id and span id associated to an entity (log, exception, measurement...). -type TraceContext struct { - TraceID string `json:"trace_id"` - SpanID string `json:"span_id"` -} - -// KeyVal representation of the trace context object. -func (tc TraceContext) KeyVal() *KeyVal { - retv := NewKeyVal() - KeyValAdd(retv, "traceID", tc.TraceID) - KeyValAdd(retv, "spanID", tc.SpanID) - return retv -} - -// Traces wraps the otel traces model. -type Traces struct { - ptrace.Traces -} - -// UnmarshalJSON unmarshals Traces model. -func (t *Traces) UnmarshalJSON(b []byte) error { - unmarshaler := &ptrace.JSONUnmarshaler{} - td, err := unmarshaler.UnmarshalTraces(b) - if err != nil { - return err - } - *t = Traces{td} - return nil -} - -// MarshalJSON marshals Traces model to json. -func (t Traces) MarshalJSON() ([]byte, error) { - marshaler := &ptrace.JSONMarshaler{} - return marshaler.MarshalTraces(t.Traces) -} - -// SpanSlice unpacks Traces entity into a slice of Spans. -func (t Traces) SpanSlice() []ptrace.Span { - spans := make([]ptrace.Span, 0) - rss := t.ResourceSpans() - for i := 0; i < rss.Len(); i++ { - rs := rss.At(i) - ilss := rs.ScopeSpans() - for j := 0; j < ilss.Len(); j++ { - s := ilss.At(j).Spans() - for si := 0; si < s.Len(); si++ { - spans = append(spans, s.At(si)) - } - } - } - return spans -} - -// SpanToKeyVal returns KeyVal representation of a Span. -func SpanToKeyVal(s ptrace.Span) *KeyVal { - kv := NewKeyVal() - if s.StartTimestamp() > 0 { - KeyValAdd(kv, "timestamp", s.StartTimestamp().AsTime().String()) - } - if s.EndTimestamp() > 0 { - KeyValAdd(kv, "end_timestamp", s.StartTimestamp().AsTime().String()) - } - KeyValAdd(kv, "kind", "span") - KeyValAdd(kv, "traceID", s.TraceID().String()) - KeyValAdd(kv, "spanID", s.SpanID().String()) - KeyValAdd(kv, "span_kind", s.Kind().String()) - KeyValAdd(kv, "name", s.Name()) - KeyValAdd(kv, "parent_spanID", s.ParentSpanID().String()) - s.Attributes().Range(func(k string, v pcommon.Value) bool { - KeyValAdd(kv, "attr_"+k, fmt.Sprintf("%v", v)) - return true - }) - - return kv -} - -// LogLevel is log level enum for incoming app logs -type LogLevel string - -const ( - // LogLevelTrace is "trace" - LogLevelTrace LogLevel = "trace" - // LogLevelDebug is "debug" - LogLevelDebug LogLevel = "debug" - // LogLevelInfo is "info" - LogLevelInfo LogLevel = "info" - // LogLevelWarning is "warning" - LogLevelWarning LogLevel = "warning" - // LogLevelError is "error" - LogLevelError LogLevel = "error" -) - -// LogContext is a string to string map structure that -// represents the context of a log message -type LogContext map[string]string - -// Log struct controls the data that come into a Log message -type Log struct { - Message string `json:"message,omitempty"` - LogLevel LogLevel `json:"level,omitempty"` - Context LogContext `json:"context,omitempty"` - Timestamp time.Time `json:"timestamp"` - Trace TraceContext `json:"trace,omitempty"` -} - -// KeyVal representation of a Log object -func (l Log) KeyVal() *KeyVal { - kv := NewKeyVal() - KeyValAdd(kv, "timestamp", l.Timestamp.String()) - KeyValAdd(kv, "kind", "log") - KeyValAdd(kv, "message", l.Message) - KeyValAdd(kv, "level", string(l.LogLevel)) - MergeKeyValWithPrefix(kv, KeyValFromMap(l.Context), "context_") - MergeKeyVal(kv, l.Trace.KeyVal()) - return kv -} - -// MeasurementContext is a string to string map structure that -// represents the context of a log message -type MeasurementContext map[string]string - -// Measurement holds the data for user provided measurements -type Measurement struct { - Type string `json:"type,omitempty"` - Values map[string]float64 `json:"values,omitempty"` - Timestamp time.Time `json:"timestamp,omitempty"` - Trace TraceContext `json:"trace,omitempty"` - Context MeasurementContext `json:"context,omitempty"` -} - -// KeyVal representation of the exception object -func (m Measurement) KeyVal() *KeyVal { - kv := NewKeyVal() - - KeyValAdd(kv, "timestamp", m.Timestamp.String()) - KeyValAdd(kv, "kind", "measurement") - KeyValAdd(kv, "type", m.Type) - - keys := make([]string, 0, len(m.Values)) - for k := range m.Values { - keys = append(keys, k) - } - sort.Strings(keys) - for _, k := range keys { - KeyValAdd(kv, k, fmt.Sprintf("%f", m.Values[k])) - } - MergeKeyVal(kv, m.Trace.KeyVal()) - MergeKeyValWithPrefix(kv, KeyValFromMap(m.Context), "context_") - return kv -} - -// SDK holds metadata about the app agent that produced the event -type SDK struct { - Name string `json:"name,omitempty"` - Version string `json:"version,omitempty"` - Integrations []SDKIntegration `json:"integrations,omitempty"` -} - -// KeyVal produces key->value representation of Sdk metadata -func (sdk SDK) KeyVal() *KeyVal { - kv := NewKeyVal() - KeyValAdd(kv, "name", sdk.Name) - KeyValAdd(kv, "version", sdk.Version) - - if len(sdk.Integrations) > 0 { - integrations := make([]string, len(sdk.Integrations)) - - for i, integration := range sdk.Integrations { - integrations[i] = integration.String() - } - - KeyValAdd(kv, "integrations", strings.Join(integrations, ",")) - } - - return kv -} - -// SDKIntegration holds metadata about a plugin/integration on the app agent that collected and sent the event -type SDKIntegration struct { - Name string `json:"name,omitempty"` - Version string `json:"version,omitempty"` -} - -func (i SDKIntegration) String() string { - return fmt.Sprintf("%s:%s", i.Name, i.Version) -} - -// User holds metadata about the user related to an app event -type User struct { - Email string `json:"email,omitempty"` - ID string `json:"id,omitempty"` - Username string `json:"username,omitempty"` - Attributes map[string]string `json:"attributes,omitempty"` -} - -// KeyVal produces a key->value representation User metadata -func (u User) KeyVal() *KeyVal { - kv := NewKeyVal() - KeyValAdd(kv, "email", u.Email) - KeyValAdd(kv, "id", u.ID) - KeyValAdd(kv, "username", u.Username) - MergeKeyValWithPrefix(kv, KeyValFromMap(u.Attributes), "attr_") - return kv -} - -// Meta holds metadata about an app event -type Meta struct { - SDK SDK `json:"sdk,omitempty"` - App App `json:"app,omitempty"` - User User `json:"user,omitempty"` - Session Session `json:"session,omitempty"` - Page Page `json:"page,omitempty"` - Browser Browser `json:"browser,omitempty"` - View View `json:"view,omitempty"` -} - -// KeyVal produces key->value representation of the app event metadata -func (m Meta) KeyVal() *KeyVal { - kv := NewKeyVal() - MergeKeyValWithPrefix(kv, m.SDK.KeyVal(), "sdk_") - MergeKeyValWithPrefix(kv, m.App.KeyVal(), "app_") - MergeKeyValWithPrefix(kv, m.User.KeyVal(), "user_") - MergeKeyValWithPrefix(kv, m.Session.KeyVal(), "session_") - MergeKeyValWithPrefix(kv, m.Page.KeyVal(), "page_") - MergeKeyValWithPrefix(kv, m.Browser.KeyVal(), "browser_") - MergeKeyValWithPrefix(kv, m.View.KeyVal(), "view_") - return kv -} - -// Session holds metadata about the browser session the event originates from -type Session struct { - ID string `json:"id,omitempty"` - Attributes map[string]string `json:"attributes,omitempty"` -} - -// KeyVal produces key->value representation of the Session metadata -func (s Session) KeyVal() *KeyVal { - kv := NewKeyVal() - KeyValAdd(kv, "id", s.ID) - MergeKeyValWithPrefix(kv, KeyValFromMap(s.Attributes), "attr_") - return kv -} - -// Page holds metadata about the web page event originates from -type Page struct { - ID string `json:"id,omitempty"` - URL string `json:"url,omitempty"` - Attributes map[string]string `json:"attributes,omitempty"` -} - -// KeyVal produces key->val representation of Page metadata -func (p Page) KeyVal() *KeyVal { - kv := NewKeyVal() - KeyValAdd(kv, "id", p.ID) - KeyValAdd(kv, "url", p.URL) - MergeKeyValWithPrefix(kv, KeyValFromMap(p.Attributes), "attr_") - return kv -} - -// App holds metadata about the application event originates from -type App struct { - Name string `json:"name,omitempty"` - Release string `json:"release,omitempty"` - Version string `json:"version,omitempty"` - Environment string `json:"environment,omitempty"` -} - -// Event holds RUM event data -type Event struct { - Name string `json:"name"` - Domain string `json:"domain,omitempty"` - Attributes map[string]string `json:"attributes,omitempty"` - Timestamp time.Time `json:"timestamp,omitempty"` - Trace TraceContext `json:"trace,omitempty"` -} - -// KeyVal produces key -> value representation of Event metadata -func (e Event) KeyVal() *KeyVal { - kv := NewKeyVal() - KeyValAdd(kv, "timestamp", e.Timestamp.String()) - KeyValAdd(kv, "kind", "event") - KeyValAdd(kv, "event_name", e.Name) - KeyValAdd(kv, "event_domain", e.Domain) - if e.Attributes != nil { - MergeKeyValWithPrefix(kv, KeyValFromMap(e.Attributes), "event_data_") - } - MergeKeyVal(kv, e.Trace.KeyVal()) - return kv -} - -// KeyVal produces key-> value representation of App metadata -func (a App) KeyVal() *KeyVal { - kv := NewKeyVal() - KeyValAdd(kv, "name", a.Name) - KeyValAdd(kv, "release", a.Release) - KeyValAdd(kv, "version", a.Version) - KeyValAdd(kv, "environment", a.Environment) - return kv -} - -// Browser holds metadata about a client's browser -type Browser struct { - Name string `json:"name,omitempty"` - Version string `json:"version,omitempty"` - OS string `json:"os,omitempty"` - Mobile bool `json:"mobile,omitempty"` -} - -// KeyVal produces key->value representation of the Browser metadata -func (b Browser) KeyVal() *KeyVal { - kv := NewKeyVal() - KeyValAdd(kv, "name", b.Name) - KeyValAdd(kv, "version", b.Version) - KeyValAdd(kv, "os", b.OS) - KeyValAdd(kv, "mobile", fmt.Sprintf("%v", b.Mobile)) - return kv -} - -// View holds metadata about a view -type View struct { - Name string `json:"name,omitempty"` -} - -func (v View) KeyVal() *KeyVal { - kv := NewKeyVal() - KeyValAdd(kv, "name", v.Name) - return kv -} diff --git a/internal/static/integrations/v2/app_agent_receiver/payload_test.go b/internal/static/integrations/v2/app_agent_receiver/payload_test.go deleted file mode 100644 index b66792547a..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/payload_test.go +++ /dev/null @@ -1,142 +0,0 @@ -package app_agent_receiver - -import ( - "encoding/json" - "os" - "path/filepath" - "testing" - "time" - - "github.com/stretchr/testify/require" -) - -func loadTestData(t *testing.T, file string) []byte { - t.Helper() - // Safe to disable, this is a test. - // nolint:gosec - content, err := os.ReadFile(filepath.Join("testdata", file)) - require.NoError(t, err, "expected to be able to read file") - require.True(t, len(content) > 0) - return content -} - -func TestUnmarshalPayloadJSON(t *testing.T) { - content := loadTestData(t, "payload.json") - var payload Payload - err := json.Unmarshal(content, &payload) - require.NoError(t, err) - - now, err := time.Parse("2006-01-02T15:04:05Z0700", "2021-09-30T10:46:17.680Z") - require.NoError(t, err) - - require.Equal(t, Meta{ - SDK: SDK{ - Name: "grafana-frontend-agent", - Version: "1.0.0", - }, - App: App{ - Name: "testapp", - Release: "0.8.2", - Version: "abcdefg", - Environment: "production", - }, - User: User{ - Username: "domasx2", - ID: "123", - Email: "geralt@kaermorhen.org", - Attributes: map[string]string{"foo": "bar"}, - }, - Session: Session{ - ID: "abcd", - Attributes: map[string]string{"time_elapsed": "100s"}, - }, - Page: Page{ - URL: "https://example.com/page", - }, - Browser: Browser{ - Name: "chrome", - Version: "88.12.1", - OS: "linux", - Mobile: false, - }, - View: View{ - Name: "foobar", - }, - }, payload.Meta) - - require.Len(t, payload.Exceptions, 1) - require.Len(t, payload.Exceptions[0].Stacktrace.Frames, 26) - require.Equal(t, "Error", payload.Exceptions[0].Type) - require.Equal(t, "Cannot read property 'find' of undefined", payload.Exceptions[0].Value) - require.EqualValues(t, ExceptionContext{"ReactError": "Annoying Error", "component": "ReactErrorBoundary"}, payload.Exceptions[0].Context) - - require.Equal(t, []Log{ - { - Message: "opened pricing page", - LogLevel: LogLevelInfo, - Context: map[string]string{ - "component": "AppRoot", - "page": "Pricing", - }, - Timestamp: now, - Trace: TraceContext{ - TraceID: "abcd", - SpanID: "def", - }, - }, - { - Message: "loading price list", - LogLevel: LogLevelTrace, - Context: map[string]string{ - "component": "AppRoot", - "page": "Pricing", - }, - Timestamp: now, - Trace: TraceContext{ - TraceID: "abcd", - SpanID: "ghj", - }, - }, - }, payload.Logs) - - require.Equal(t, []Event{ - { - Name: "click_login_button", - Domain: "frontend", - Timestamp: now, - Attributes: map[string]string{ - "foo": "bar", - "one": "two", - }, - Trace: TraceContext{ - TraceID: "abcd", - SpanID: "def", - }, - }, - { - Name: "click_reset_password_button", - Timestamp: now, - }, - }, payload.Events) - - require.Len(t, payload.Measurements, 1) - - require.Equal(t, []Measurement{ - { - Type: "foobar", - Values: map[string]float64{ - "ttfp": 20.12, - "ttfcp": 22.12, - "ttfb": 14, - }, - Timestamp: now, - Trace: TraceContext{ - TraceID: "abcd", - SpanID: "def", - }, - Context: MeasurementContext{ - "hello": "world", - }, - }, - }, payload.Measurements) -} diff --git a/internal/static/integrations/v2/app_agent_receiver/receiver_metrics_exporter.go b/internal/static/integrations/v2/app_agent_receiver/receiver_metrics_exporter.go deleted file mode 100644 index ea74c97fdf..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/receiver_metrics_exporter.go +++ /dev/null @@ -1,61 +0,0 @@ -package app_agent_receiver - -import ( - "context" - - "github.com/prometheus/client_golang/prometheus" -) - -// ReceiverMetricsExporter is an app agent receiver exporter that will capture metrics -// about counts of logs, exceptions, measurements, traces being ingested -type ReceiverMetricsExporter struct { - totalLogs prometheus.Counter - totalMeasurements prometheus.Counter - totalExceptions prometheus.Counter - totalEvents prometheus.Counter -} - -// NewReceiverMetricsExporter creates a new ReceiverMetricsExporter -func NewReceiverMetricsExporter(reg prometheus.Registerer) AppAgentReceiverExporter { - exp := &ReceiverMetricsExporter{ - totalLogs: prometheus.NewCounter(prometheus.CounterOpts{ - Name: "app_agent_receiver_logs_total", - Help: "Total number of ingested logs", - }), - totalMeasurements: prometheus.NewCounter(prometheus.CounterOpts{ - Name: "app_agent_receiver_measurements_total", - Help: "Total number of ingested measurements", - }), - totalExceptions: prometheus.NewCounter(prometheus.CounterOpts{ - Name: "app_agent_receiver_exceptions_total", - Help: "Total number of ingested exceptions", - }), - totalEvents: prometheus.NewCounter(prometheus.CounterOpts{ - Name: "app_agent_receiver_events_total", - Help: "Total number of ingested events", - }), - } - - reg.MustRegister(exp.totalLogs, exp.totalExceptions, exp.totalMeasurements, exp.totalEvents) - - return exp -} - -// Name of the exporter, for logging purposes -func (re *ReceiverMetricsExporter) Name() string { - return "receiver metrics exporter" -} - -// Export implements the AppDataExporter interface -func (re *ReceiverMetricsExporter) Export(ctx context.Context, payload Payload) error { - re.totalExceptions.Add(float64(len(payload.Exceptions))) - re.totalLogs.Add(float64(len(payload.Logs))) - re.totalMeasurements.Add(float64(len(payload.Measurements))) - re.totalEvents.Add(float64(len(payload.Events))) - return nil -} - -// Static typecheck tests -var ( - _ AppAgentReceiverExporter = (*ReceiverMetricsExporter)(nil) -) diff --git a/internal/static/integrations/v2/app_agent_receiver/receiver_metrics_test.go b/internal/static/integrations/v2/app_agent_receiver/receiver_metrics_test.go deleted file mode 100644 index 5fde03caad..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/receiver_metrics_test.go +++ /dev/null @@ -1,141 +0,0 @@ -package app_agent_receiver - -import ( - "context" - "fmt" - "testing" - - "github.com/prometheus/client_golang/prometheus" - - "github.com/stretchr/testify/require" -) - -type metricAssertion struct { - name string - value float64 -} - -func testcase(t *testing.T, payload Payload, assertions []metricAssertion) { - ctx := context.Background() - - reg := prometheus.NewRegistry() - - exporter := NewReceiverMetricsExporter(reg) - - err := exporter.Export(ctx, payload) - require.NoError(t, err) - - metrics, err := reg.Gather() - require.NoError(t, err) - - for _, assertion := range assertions { - found := false - for _, metric := range metrics { - if *metric.Name == assertion.name { - found = true - require.Len(t, metric.Metric, 1) - val := metric.Metric[0].Counter.Value - require.Equal(t, assertion.value, *val) - break - } - } - if !found { - require.Fail(t, fmt.Sprintf("metric [%s] not found", assertion.name)) - } - } -} - -func TestReceiverMetricsExport(t *testing.T) { - var payload Payload - payload.Logs = make([]Log, 2) - payload.Measurements = make([]Measurement, 3) - payload.Exceptions = make([]Exception, 4) - payload.Events = make([]Event, 5) - testcase(t, payload, []metricAssertion{ - { - name: "app_agent_receiver_logs_total", - value: 2, - }, - { - name: "app_agent_receiver_measurements_total", - value: 3, - }, - { - name: "app_agent_receiver_exceptions_total", - value: 4, - }, - { - name: "app_agent_receiver_events_total", - value: 5, - }, - }) -} - -func TestReceiverMetricsExportLogsOnly(t *testing.T) { - var payload Payload - payload.Logs = []Log{ - {}, - {}, - } - testcase(t, payload, []metricAssertion{ - { - name: "app_agent_receiver_logs_total", - value: 2, - }, - { - name: "app_agent_receiver_measurements_total", - value: 0, - }, - { - name: "app_agent_receiver_exceptions_total", - value: 0, - }, - }) -} - -func TestReceiverMetricsExportExceptionsOnly(t *testing.T) { - var payload Payload - payload.Exceptions = []Exception{ - {}, - {}, - {}, - {}, - } - testcase(t, payload, []metricAssertion{ - { - name: "app_agent_receiver_logs_total", - value: 0, - }, - { - name: "app_agent_receiver_measurements_total", - value: 0, - }, - { - name: "app_agent_receiver_exceptions_total", - value: 4, - }, - }) -} - -func TestReceiverMetricsExportMeasurementsOnly(t *testing.T) { - var payload Payload - payload.Measurements = []Measurement{ - {}, - {}, - {}, - } - testcase(t, payload, []metricAssertion{ - { - name: "app_agent_receiver_logs_total", - value: 0, - }, - { - name: "app_agent_receiver_measurements_total", - value: 3, - }, - { - name: "app_agent_receiver_exceptions_total", - value: 0, - }, - }) -} diff --git a/internal/static/integrations/v2/app_agent_receiver/sourcemaps.go b/internal/static/integrations/v2/app_agent_receiver/sourcemaps.go deleted file mode 100644 index fe8935dd0b..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/sourcemaps.go +++ /dev/null @@ -1,357 +0,0 @@ -package app_agent_receiver - -import ( - "bytes" - "fmt" - "io" - "io/fs" - "net/http" - "net/url" - "os" - "path/filepath" - "regexp" - "strings" - "sync" - "text/template" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/go-sourcemap/sourcemap" - "github.com/prometheus/client_golang/prometheus" - "github.com/vincent-petithory/dataurl" -) - -// SourceMapStore is interface for a sourcemap service capable of transforming -// minified source locations to original source location -type SourceMapStore interface { - GetSourceMap(sourceURL string, release string) (*SourceMap, error) -} - -type httpClient interface { - Get(url string) (resp *http.Response, err error) -} - -// FileService is interface for a service that can be used to load source maps -// from file system -type fileService interface { - Stat(name string) (fs.FileInfo, error) - ReadFile(name string) ([]byte, error) -} - -type osFileService struct{} - -func (s *osFileService) Stat(name string) (fs.FileInfo, error) { - return os.Stat(name) -} - -func (s *osFileService) ReadFile(name string) ([]byte, error) { - return os.ReadFile(name) -} - -var reSourceMap = "//[#@]\\s(source(?:Mapping)?URL)=\\s*(?P\\S+)\r?\n?$" - -// SourceMap is a wrapper for go-sourcemap consumer -type SourceMap struct { - consumer *sourcemap.Consumer -} - -type sourceMapMetrics struct { - cacheSize *prometheus.CounterVec - downloads *prometheus.CounterVec - fileReads *prometheus.CounterVec -} - -type sourcemapFileLocation struct { - SourceMapFileLocation - pathTemplate *template.Template -} - -// RealSourceMapStore is an implementation of SourceMapStore -// that can download source maps or read them from file system -type RealSourceMapStore struct { - sync.Mutex - l log.Logger - httpClient httpClient - fileService fileService - config SourceMapConfig - cache map[string]*SourceMap - fileLocations []*sourcemapFileLocation - metrics *sourceMapMetrics -} - -// NewSourceMapStore creates an instance of SourceMapStore. -// httpClient and fileService will be instantiated to defaults if nil is provided -func NewSourceMapStore(l log.Logger, config SourceMapConfig, reg prometheus.Registerer, httpClient httpClient, fileService fileService) SourceMapStore { - if httpClient == nil { - httpClient = &http.Client{ - Timeout: config.DownloadTimeout, - } - } - - if fileService == nil { - fileService = &osFileService{} - } - - metrics := &sourceMapMetrics{ - cacheSize: prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "app_agent_receiver_sourcemap_cache_size", - Help: "number of items in source map cache, per origin", - }, []string{"origin"}), - downloads: prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "app_agent_receiver_sourcemap_downloads_total", - Help: "downloads by the source map service", - }, []string{"origin", "http_status"}), - fileReads: prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "app_agent_receiver_sourcemap_file_reads_total", - Help: "source map file reads from file system, by origin and status", - }, []string{"origin", "status"}), - } - reg.MustRegister(metrics.cacheSize, metrics.downloads, metrics.fileReads) - - fileLocations := []*sourcemapFileLocation{} - - for _, configLocation := range config.FileSystem { - tpl, err := template.New(configLocation.Path).Parse(configLocation.Path) - if err != nil { - panic(err) - } - - fileLocations = append(fileLocations, &sourcemapFileLocation{ - SourceMapFileLocation: configLocation, - pathTemplate: tpl, - }) - } - - return &RealSourceMapStore{ - l: l, - httpClient: httpClient, - fileService: fileService, - config: config, - cache: make(map[string]*SourceMap), - metrics: metrics, - fileLocations: fileLocations, - } -} - -func (store *RealSourceMapStore) downloadFileContents(url string) ([]byte, error) { - resp, err := store.httpClient.Get(url) - if err != nil { - store.metrics.downloads.WithLabelValues(getOrigin(url), "?").Inc() - return nil, err - } - defer resp.Body.Close() - store.metrics.downloads.WithLabelValues(getOrigin(url), fmt.Sprint(resp.StatusCode)).Inc() - if resp.StatusCode != 200 { - return nil, fmt.Errorf("unexpected status %v", resp.StatusCode) - } - body, err := io.ReadAll(resp.Body) - if err != nil { - return nil, err - } - return body, nil -} - -func (store *RealSourceMapStore) downloadSourceMapContent(sourceURL string) (content []byte, resolvedSourceMapURL string, err error) { - level.Debug(store.l).Log("msg", "attempting to download source file", "url", sourceURL) - - result, err := store.downloadFileContents(sourceURL) - if err != nil { - level.Debug(store.l).Log("msg", "failed to download source file", "url", sourceURL, "err", err) - return nil, "", err - } - r := regexp.MustCompile(reSourceMap) - match := r.FindAllStringSubmatch(string(result), -1) - if len(match) == 0 { - level.Debug(store.l).Log("msg", "no source map url found in source", "url", sourceURL) - return nil, "", nil - } - sourceMapURL := match[len(match)-1][2] - - // inline sourcemap - if strings.HasPrefix(sourceMapURL, "data:") { - dataURL, err := dataurl.DecodeString(sourceMapURL) - if err != nil { - level.Debug(store.l).Log("msg", "failed to parse inline source map data url", "url", sourceURL, "err", err) - return nil, "", err - } - - level.Info(store.l).Log("msg", "successfully parsed inline source map data url", "url", sourceURL) - return dataURL.Data, sourceURL + ".map", nil - } - // remote sourcemap - resolvedSourceMapURL = sourceMapURL - - // if url is relative, attempt to resolve absolute - if !strings.HasPrefix(resolvedSourceMapURL, "http") { - base, err := url.Parse(sourceURL) - if err != nil { - level.Debug(store.l).Log("msg", "failed to parse source url", "url", sourceURL, "err", err) - return nil, "", err - } - relative, err := url.Parse(sourceMapURL) - if err != nil { - level.Debug(store.l).Log("msg", "failed to parse source map url", "url", sourceURL, "sourceMapURL", sourceMapURL, "err", err) - return nil, "", err - } - resolvedSourceMapURL = base.ResolveReference(relative).String() - level.Debug(store.l).Log("msg", "resolved absolute source map url", "url", sourceURL, "sourceMapURL", resolvedSourceMapURL) - } - level.Debug(store.l).Log("msg", "attempting to download source map file", "url", resolvedSourceMapURL) - result, err = store.downloadFileContents(resolvedSourceMapURL) - if err != nil { - level.Debug(store.l).Log("failed to download source map file", "url", resolvedSourceMapURL, "err", err) - return nil, "", err - } - return result, resolvedSourceMapURL, nil -} - -func (store *RealSourceMapStore) getSourceMapFromFileSystem(sourceURL string, release string, fileconf *sourcemapFileLocation) (content []byte, sourceMapURL string, err error) { - if len(sourceURL) == 0 || !strings.HasPrefix(sourceURL, fileconf.MinifiedPathPrefix) || strings.HasSuffix(sourceURL, "/") { - return nil, "", nil - } - - var rootPath bytes.Buffer - - err = fileconf.pathTemplate.Execute(&rootPath, struct{ Release string }{Release: cleanFilePathPart(release)}) - if err != nil { - return nil, "", err - } - - pathParts := []string{rootPath.String()} - for _, part := range strings.Split(strings.TrimPrefix(strings.Split(sourceURL, "?")[0], fileconf.MinifiedPathPrefix), "/") { - if len(part) > 0 && part != "." && part != ".." { - pathParts = append(pathParts, part) - } - } - mapFilePath := filepath.Join(pathParts...) + ".map" - - if _, err := store.fileService.Stat(mapFilePath); err != nil { - store.metrics.fileReads.WithLabelValues(getOrigin(sourceURL), "not_found").Inc() - level.Debug(store.l).Log("msg", "source map not found on filesystem", "url", sourceURL, "file_path", mapFilePath) - return nil, "", nil - } - level.Debug(store.l).Log("msg", "source map found on filesystem", "url", mapFilePath, "file_path", mapFilePath) - - content, err = store.fileService.ReadFile(mapFilePath) - if err != nil { - store.metrics.fileReads.WithLabelValues(getOrigin(sourceURL), "error").Inc() - } else { - store.metrics.fileReads.WithLabelValues(getOrigin(sourceURL), "ok").Inc() - } - return content, sourceURL, err -} - -func (store *RealSourceMapStore) getSourceMapContent(sourceURL string, release string) (content []byte, sourceMapURL string, err error) { - //attempt to find in fs - for _, fileconf := range store.fileLocations { - content, sourceMapURL, err = store.getSourceMapFromFileSystem(sourceURL, release, fileconf) - if content != nil || err != nil { - return content, sourceMapURL, err - } - } - - //attempt to download - if strings.HasPrefix(sourceURL, "http") && urlMatchesOrigins(sourceURL, store.config.DownloadFromOrigins) { - return store.downloadSourceMapContent(sourceURL) - } - return nil, "", nil -} - -// GetSourceMap returns sourcemap for a given source url -func (store *RealSourceMapStore) GetSourceMap(sourceURL string, release string) (*SourceMap, error) { - store.Lock() - defer store.Unlock() - - cacheKey := fmt.Sprintf("%s__%s", sourceURL, release) - - if smap, ok := store.cache[cacheKey]; ok { - return smap, nil - } - content, sourceMapURL, err := store.getSourceMapContent(sourceURL, release) - if err != nil || content == nil { - store.cache[cacheKey] = nil - return nil, err - } - if content != nil { - consumer, err := sourcemap.Parse(sourceMapURL, content) - if err != nil { - store.cache[cacheKey] = nil - level.Debug(store.l).Log("msg", "failed to parse source map", "url", sourceMapURL, "release", release, "err", err) - return nil, err - } - level.Info(store.l).Log("msg", "successfully parsed source map", "url", sourceMapURL, "release", release) - smap := &SourceMap{ - consumer: consumer, - } - store.cache[cacheKey] = smap - store.metrics.cacheSize.WithLabelValues(getOrigin(sourceURL)).Inc() - return smap, nil - } - return nil, nil -} - -// ResolveSourceLocation resolves minified source location to original source location -func ResolveSourceLocation(store SourceMapStore, frame *Frame, release string) (*Frame, error) { - smap, err := store.GetSourceMap(frame.Filename, release) - if err != nil { - return nil, err - } - if smap == nil { - return nil, nil - } - - file, function, line, col, ok := smap.consumer.Source(frame.Lineno, frame.Colno) - if !ok { - return nil, nil - } - // unfortunately in many cases go-sourcemap fails to determine the original function name. - // not a big issue as long as file, line and column are correct - if len(function) == 0 { - function = "?" - } - return &Frame{ - Filename: file, - Lineno: line, - Colno: col, - Function: function, - }, nil -} - -// TransformException will attempt to resolve all minified source locations in the stacktrace with original source locations -func TransformException(store SourceMapStore, log log.Logger, ex *Exception, release string) *Exception { - if ex.Stacktrace == nil { - return ex - } - frames := []Frame{} - - for _, frame := range ex.Stacktrace.Frames { - mappedFrame, err := ResolveSourceLocation(store, &frame, release) - if err != nil { - level.Error(log).Log("msg", "Error resolving stack trace frame source location", "err", err) - frames = append(frames, frame) - } else if mappedFrame != nil { - frames = append(frames, *mappedFrame) - } else { - frames = append(frames, frame) - } - } - - return &Exception{ - Type: ex.Type, - Value: ex.Value, - Stacktrace: &Stacktrace{Frames: frames}, - Timestamp: ex.Timestamp, - } -} - -func cleanFilePathPart(x string) string { - return strings.TrimLeft(strings.ReplaceAll(strings.ReplaceAll(x, "\\", ""), "/", ""), ".") -} - -func getOrigin(URL string) string { - parsed, err := url.Parse(URL) - if err != nil { - return "?" - } - return fmt.Sprintf("%s://%s", parsed.Scheme, parsed.Host) -} diff --git a/internal/static/integrations/v2/app_agent_receiver/sourcemaps_test.go b/internal/static/integrations/v2/app_agent_receiver/sourcemaps_test.go deleted file mode 100644 index e9f7a5bfd6..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/sourcemaps_test.go +++ /dev/null @@ -1,495 +0,0 @@ -package app_agent_receiver - -import ( - "bytes" - "errors" - "io" - "io/fs" - "net/http" - "path/filepath" - "testing" - - "github.com/go-kit/log" - "github.com/prometheus/client_golang/prometheus" - "github.com/stretchr/testify/require" -) - -type mockHTTPClient struct { - responses []struct { - *http.Response - error - } - requests []string -} - -func (cl *mockHTTPClient) Get(url string) (resp *http.Response, err error) { - if len(cl.responses) > len(cl.requests) { - r := cl.responses[len(cl.requests)] - cl.requests = append(cl.requests, url) - return r.Response, r.error - } - return nil, errors.New("mockHTTPClient got more requests than expected") -} - -type mockFileService struct { - files map[string][]byte - stats []string - reads []string -} - -func (s *mockFileService) Stat(name string) (fs.FileInfo, error) { - s.stats = append(s.stats, name) - _, ok := s.files[name] - if !ok { - return nil, errors.New("file not found") - } - return nil, nil -} - -func (s *mockFileService) ReadFile(name string) ([]byte, error) { - s.reads = append(s.reads, name) - content, ok := s.files[name] - if ok { - return content, nil - } - return nil, errors.New("file not found") -} - -func newResponseFromTestData(t *testing.T, file string) *http.Response { - return &http.Response{ - Body: io.NopCloser(bytes.NewReader(loadTestData(t, file))), - StatusCode: 200, - } -} - -func mockException() *Exception { - return &Exception{ - Stacktrace: &Stacktrace{ - Frames: []Frame{ - { - Colno: 6, - Filename: "http://localhost:1234/foo.js", - Function: "eval", - Lineno: 5, - }, - { - Colno: 5, - Filename: "http://localhost:1234/foo.js", - Function: "callUndefined", - Lineno: 6, - }, - }, - }, - } -} - -func Test_RealSourceMapStore_DownloadSuccess(t *testing.T) { - conf := SourceMapConfig{ - Download: true, - DownloadFromOrigins: []string{"*"}, - } - - httpClient := &mockHTTPClient{ - responses: []struct { - *http.Response - error - }{ - {newResponseFromTestData(t, "foo.js"), nil}, - {newResponseFromTestData(t, "foo.js.map"), nil}, - }, - } - - logger := log.NewNopLogger() - - sourceMapStore := NewSourceMapStore(logger, conf, prometheus.NewRegistry(), httpClient, &mockFileService{}) - - exception := mockException() - - transformed := TransformException(sourceMapStore, logger, exception, "123") - - require.Equal(t, []string{"http://localhost:1234/foo.js", "http://localhost:1234/foo.js.map"}, httpClient.requests) - - expected := &Exception{ - Stacktrace: &Stacktrace{ - Frames: []Frame{ - { - Colno: 37, - Filename: "/__parcel_source_root/demo/src/actions.ts", - Function: "?", - Lineno: 6, - }, - { - Colno: 2, - Filename: "/__parcel_source_root/demo/src/actions.ts", - Function: "?", - Lineno: 7, - }, - }, - }, - } - - require.Equal(t, *expected, *transformed) -} - -func Test_RealSourceMapStore_DownloadError(t *testing.T) { - conf := SourceMapConfig{ - Download: true, - DownloadFromOrigins: []string{"*"}, - } - - resp := &http.Response{ - StatusCode: 500, - Body: io.NopCloser(bytes.NewReader([]byte{})), - } - - httpClient := &mockHTTPClient{ - responses: []struct { - *http.Response - error - }{ - {resp, nil}, - }, - } - - logger := log.NewNopLogger() - - sourceMapStore := NewSourceMapStore(logger, conf, prometheus.NewRegistry(), httpClient, &mockFileService{}) - - exception := mockException() - - transformed := TransformException(sourceMapStore, logger, exception, "123") - - require.Equal(t, []string{"http://localhost:1234/foo.js"}, httpClient.requests) - require.Equal(t, exception, transformed) -} - -func Test_RealSourceMapStore_DownloadHTTPOriginFiltering(t *testing.T) { - conf := SourceMapConfig{ - Download: true, - DownloadFromOrigins: []string{"http://bar.com/"}, - } - - httpClient := &mockHTTPClient{ - responses: []struct { - *http.Response - error - }{ - {newResponseFromTestData(t, "foo.js"), nil}, - {newResponseFromTestData(t, "foo.js.map"), nil}, - }, - } - - logger := log.NewNopLogger() - - sourceMapStore := NewSourceMapStore(logger, conf, prometheus.NewRegistry(), httpClient, &mockFileService{}) - - exception := &Exception{ - Stacktrace: &Stacktrace{ - Frames: []Frame{ - { - Colno: 6, - Filename: "http://foo.com/foo.js", - Function: "eval", - Lineno: 5, - }, - { - Colno: 5, - Filename: "http://bar.com/foo.js", - Function: "callUndefined", - Lineno: 6, - }, - }, - }, - } - - transformed := TransformException(sourceMapStore, logger, exception, "123") - - require.Equal(t, []string{"http://bar.com/foo.js", "http://bar.com/foo.js.map"}, httpClient.requests) - - expected := &Exception{ - Stacktrace: &Stacktrace{ - Frames: []Frame{ - { - Colno: 6, - Filename: "http://foo.com/foo.js", - Function: "eval", - Lineno: 5, - }, - { - Colno: 2, - Filename: "/__parcel_source_root/demo/src/actions.ts", - Function: "?", - Lineno: 7, - }, - }, - }, - } - - require.Equal(t, *expected, *transformed) -} - -func Test_RealSourceMapStore_ReadFromFileSystem(t *testing.T) { - conf := SourceMapConfig{ - Download: false, - FileSystem: []SourceMapFileLocation{ - { - MinifiedPathPrefix: "http://foo.com/", - Path: filepath.FromSlash("/var/build/latest/"), - }, - { - MinifiedPathPrefix: "http://bar.com/", - Path: filepath.FromSlash("/var/build/{{ .Release }}/"), - }, - }, - } - - mapFile := loadTestData(t, "foo.js.map") - - fileService := &mockFileService{ - files: map[string][]byte{ - filepath.FromSlash("/var/build/latest/foo.js.map"): mapFile, - filepath.FromSlash("/var/build/123/foo.js.map"): mapFile, - }, - } - - logger := log.NewNopLogger() - - sourceMapStore := NewSourceMapStore(logger, conf, prometheus.NewRegistry(), &mockHTTPClient{}, fileService) - - exception := &Exception{ - Stacktrace: &Stacktrace{ - Frames: []Frame{ - { - Colno: 6, - Filename: "http://foo.com/foo.js", - Function: "eval", - Lineno: 5, - }, - { - Colno: 6, - Filename: "http://foo.com/bar.js", - Function: "eval", - Lineno: 5, - }, - { - Colno: 5, - Filename: "http://bar.com/foo.js", - Function: "callUndefined", - Lineno: 6, - }, - { - Colno: 5, - Filename: "http://baz.com/foo.js", - Function: "callUndefined", - Lineno: 6, - }, - }, - }, - } - - transformed := TransformException(sourceMapStore, logger, exception, "123") - - require.Equal(t, []string{ - filepath.FromSlash("/var/build/latest/foo.js.map"), - filepath.FromSlash("/var/build/latest/bar.js.map"), - filepath.FromSlash("/var/build/123/foo.js.map"), - }, fileService.stats) - require.Equal(t, []string{ - filepath.FromSlash("/var/build/latest/foo.js.map"), - filepath.FromSlash("/var/build/123/foo.js.map"), - }, fileService.reads) - - expected := &Exception{ - Stacktrace: &Stacktrace{ - Frames: []Frame{ - { - Colno: 37, - Filename: "/__parcel_source_root/demo/src/actions.ts", - Function: "?", - Lineno: 6, - }, - { - Colno: 6, - Filename: "http://foo.com/bar.js", - Function: "eval", - Lineno: 5, - }, - { - Colno: 2, - Filename: "/__parcel_source_root/demo/src/actions.ts", - Function: "?", - Lineno: 7, - }, - { - Colno: 5, - Filename: "http://baz.com/foo.js", - Function: "callUndefined", - Lineno: 6, - }, - }, - }, - } - - require.Equal(t, *expected, *transformed) -} - -func Test_RealSourceMapStore_ReadFromFileSystemAndDownload(t *testing.T) { - conf := SourceMapConfig{ - Download: true, - DownloadFromOrigins: []string{"*"}, - FileSystem: []SourceMapFileLocation{ - { - MinifiedPathPrefix: "http://foo.com/", - Path: filepath.FromSlash("/var/build/latest/"), - }, - }, - } - - mapFile := loadTestData(t, "foo.js.map") - - fileService := &mockFileService{ - files: map[string][]byte{ - filepath.FromSlash("/var/build/latest/foo.js.map"): mapFile, - }, - } - - httpClient := &mockHTTPClient{ - responses: []struct { - *http.Response - error - }{ - {newResponseFromTestData(t, "foo.js"), nil}, - {newResponseFromTestData(t, "foo.js.map"), nil}, - }, - } - - logger := log.NewNopLogger() - - sourceMapStore := NewSourceMapStore(logger, conf, prometheus.NewRegistry(), httpClient, fileService) - - exception := &Exception{ - Stacktrace: &Stacktrace{ - Frames: []Frame{ - { - Colno: 6, - Filename: "http://foo.com/foo.js", - Function: "eval", - Lineno: 5, - }, - { - Colno: 5, - Filename: "http://bar.com/foo.js", - Function: "callUndefined", - Lineno: 6, - }, - }, - }, - } - - transformed := TransformException(sourceMapStore, logger, exception, "123") - - require.Equal(t, []string{filepath.FromSlash("/var/build/latest/foo.js.map")}, fileService.stats) - require.Equal(t, []string{filepath.FromSlash("/var/build/latest/foo.js.map")}, fileService.reads) - require.Equal(t, []string{"http://bar.com/foo.js", "http://bar.com/foo.js.map"}, httpClient.requests) - - expected := &Exception{ - Stacktrace: &Stacktrace{ - Frames: []Frame{ - { - Colno: 37, - Filename: "/__parcel_source_root/demo/src/actions.ts", - Function: "?", - Lineno: 6, - }, - { - Colno: 2, - Filename: "/__parcel_source_root/demo/src/actions.ts", - Function: "?", - Lineno: 7, - }, - }, - }, - } - - require.Equal(t, *expected, *transformed) -} - -func Test_RealSourceMapStore_FilepathSanitized(t *testing.T) { - conf := SourceMapConfig{ - Download: false, - FileSystem: []SourceMapFileLocation{ - { - MinifiedPathPrefix: "http://foo.com/", - Path: filepath.FromSlash("/var/build/latest/"), - }, - }, - } - - fileService := &mockFileService{} - - logger := log.NewNopLogger() - - sourceMapStore := NewSourceMapStore(logger, conf, prometheus.NewRegistry(), &mockHTTPClient{}, fileService) - - exception := &Exception{ - Stacktrace: &Stacktrace{ - Frames: []Frame{ - { - Colno: 6, - Filename: "http://foo.com/../../../etc/passwd", - Function: "eval", - Lineno: 5, - }, - }, - }, - } - - transformed := TransformException(sourceMapStore, logger, exception, "123") - - require.Equal(t, []string{ - filepath.FromSlash("/var/build/latest/etc/passwd.map"), - }, fileService.stats) - require.Len(t, fileService.reads, 0) - - require.Equal(t, *exception, *transformed) -} - -func Test_RealSourceMapStore_FilepathQueryParamsOmitted(t *testing.T) { - conf := SourceMapConfig{ - Download: false, - FileSystem: []SourceMapFileLocation{ - { - MinifiedPathPrefix: "http://foo.com/", - Path: filepath.FromSlash("/var/build/latest/"), - }, - }, - } - - fileService := &mockFileService{} - - logger := log.NewNopLogger() - - sourceMapStore := NewSourceMapStore(logger, conf, prometheus.NewRegistry(), &mockHTTPClient{}, fileService) - - exception := &Exception{ - Stacktrace: &Stacktrace{ - Frames: []Frame{ - { - Colno: 6, - Filename: "http://foo.com/static/foo.js?v=1233", - Function: "eval", - Lineno: 5, - }, - }, - }, - } - - transformed := TransformException(sourceMapStore, logger, exception, "123") - - require.Equal(t, []string{ - filepath.FromSlash("/var/build/latest/static/foo.js.map"), - }, fileService.stats) - require.Len(t, fileService.reads, 0) - - require.Equal(t, *exception, *transformed) -} diff --git a/internal/static/integrations/v2/app_agent_receiver/testdata/foo.js b/internal/static/integrations/v2/app_agent_receiver/testdata/foo.js deleted file mode 100644 index b38652a4ee..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/testdata/foo.js +++ /dev/null @@ -1,39 +0,0 @@ -function throwError() { - throw new Error('This is a thrown error'); -} -function callUndefined() { - // eslint-disable-next-line no-eval - eval('test();'); -} -function callConsole(method) { - // eslint-disable-next-line no-console - console[method](`This is a console ${method} message`); -} -function fetchError() { - fetch('http://localhost:12345', { - method: 'POST' - }); -} -function promiseReject() { - new Promise((_accept, reject)=>{ - reject('This is a rejected promise'); - }); -} -function fetchSuccess() { - fetch('http://localhost:1234'); -} -function sendCustomMetric() { - window.grafanaJavaScriptAgent.api.pushMeasurement({ - type: 'custom', - values: { - my_custom_metric: Math.random() - } - }); -} -window.addEventListener('load', ()=>{ - window.grafanaJavaScriptAgent.api.pushLog([ - 'Manual event from Home' - ]); -}); - -//# sourceMappingURL=foo.js.map diff --git a/internal/static/integrations/v2/app_agent_receiver/testdata/foo.js.map b/internal/static/integrations/v2/app_agent_receiver/testdata/foo.js.map deleted file mode 100644 index 0cd4998974..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/testdata/foo.js.map +++ /dev/null @@ -1 +0,0 @@ -{"mappings":"SAAS,UAAU,GAAG,CAAC;IACrB,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,CAAwB;AAC1C,CAAC;SAEQ,aAAa,GAAG,CAAC;IACxB,EAAmC,AAAnC,iCAAmC;IACnC,IAAI,CAAC,CAAS;AAChB,CAAC;SAEQ,WAAW,CAAC,MAAmD,EAAE,CAAC;IACzE,EAAsC,AAAtC,oCAAsC;IACtC,OAAO,CAAC,MAAM,GAAG,kBAAkB,EAAE,MAAM,CAAC,QAAQ;AACtD,CAAC;SAEQ,UAAU,GAAG,CAAC;IACrB,KAAK,CAAC,CAAwB,yBAAE,CAAC;QAC/B,MAAM,EAAE,CAAM;IAChB,CAAC;AACH,CAAC;SAEQ,aAAa,GAAG,CAAC;IACxB,GAAG,CAAC,OAAO,EAAE,OAAO,EAAE,MAAM,GAAK,CAAC;QAChC,MAAM,CAAC,CAA4B;IACrC,CAAC;AACH,CAAC;SAEQ,YAAY,GAAG,CAAC;IACvB,KAAK,CAAC,CAAuB;AAC/B,CAAC;SAEQ,gBAAgB,GAAG,CAAC;IAC1B,MAAM,CAAS,sBAAsB,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC;QAC1D,IAAI,EAAE,CAAQ;QACd,MAAM,EAAE,CAAC;YACP,gBAAgB,EAAE,IAAI,CAAC,MAAM;QAC/B,CAAC;IACH,CAAC;AACH,CAAC;AAED,MAAM,CAAC,gBAAgB,CAAC,CAAM,WAAQ,CAAC;IACpC,MAAM,CAAS,sBAAsB,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QAAA,CAAwB;IAAA,CAAC;AAC/E,CAAC","sources":["demo/src/actions.ts"],"sourcesContent":["function throwError() {\n throw new Error('This is a thrown error');\n}\n\nfunction callUndefined() {\n // eslint-disable-next-line no-eval\n eval('test();');\n}\n\nfunction callConsole(method: 'trace' | 'info' | 'log' | 'warn' | 'error') {\n // eslint-disable-next-line no-console\n console[method](`This is a console ${method} message`);\n}\n\nfunction fetchError() {\n fetch('http://localhost:12345', {\n method: 'POST',\n });\n}\n\nfunction promiseReject() {\n new Promise((_accept, reject) => {\n reject('This is a rejected promise');\n });\n}\n\nfunction fetchSuccess() {\n fetch('http://localhost:1234');\n}\n\nfunction sendCustomMetric() {\n (window as any).grafanaJavaScriptAgent.api.pushMeasurement({\n type: 'custom',\n values: {\n my_custom_metric: Math.random(),\n },\n });\n}\n\nwindow.addEventListener('load', () => {\n (window as any).grafanaJavaScriptAgent.api.pushLog(['Manual event from Home']);\n});\n"],"names":[],"version":3,"file":"index.28a7d598.js.map","sourceRoot":"/__parcel_source_root/"} \ No newline at end of file diff --git a/internal/static/integrations/v2/app_agent_receiver/testdata/payload.json b/internal/static/integrations/v2/app_agent_receiver/testdata/payload.json deleted file mode 100644 index b6ac7efce0..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/testdata/payload.json +++ /dev/null @@ -1,330 +0,0 @@ -{ - "logs": [ - { - "message": "opened pricing page", - "level": "info", - "context": { - "component": "AppRoot", - "page": "Pricing" - }, - "timestamp": "2021-09-30T10:46:17.680Z", - "trace": { - "trace_id": "abcd", - "span_id": "def" - } - }, - { - "message": "loading price list", - "level": "trace", - "context": { - "component": "AppRoot", - "page": "Pricing" - }, - "timestamp": "2021-09-30T10:46:17.680Z", - "trace": { - "trace_id": "abcd", - "span_id": "ghj" - } - } - ], - "exceptions": [ - { - "type": "Error", - "value": "Cannot read property 'find' of undefined", - "stacktrace": { - "frames": [ - { - "colno": 42, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "?", - "in_app": true, - "lineno": 8639 - }, - { - "colno": 9, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "dispatchAction", - "in_app": true, - "lineno": 268095 - }, - { - "colno": 13, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "scheduleUpdateOnFiber", - "in_app": true, - "lineno": 273726 - }, - { - "colno": 7, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "flushSyncCallbackQueue", - "in_app": true, - "lineno": 263362 - }, - { - "colno": 13, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "flushSyncCallbackQueueImpl", - "in_app": true, - "lineno": 263374 - }, - { - "colno": 14, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "runWithPriority$1", - "lineno": 263325 - }, - { - "colno": 16, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "unstable_runWithPriority", - "lineno": 291265 - }, - { - "colno": 30, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "?", - "lineno": 263379 - }, - { - "colno": 22, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "performSyncWorkOnRoot", - "lineno": 274126 - }, - { - "colno": 11, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "renderRootSync", - "lineno": 274509 - }, - { - "colno": 9, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "workLoopSync", - "lineno": 274543 - }, - { - "colno": 16, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "performUnitOfWork", - "lineno": 274606 - }, - { - "colno": 18, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "beginWork$1", - "in_app": true, - "lineno": 275746 - }, - { - "colno": 20, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "beginWork", - "lineno": 270944 - }, - { - "colno": 24, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "updateFunctionComponent", - "lineno": 269291 - }, - { - "colno": 22, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "renderWithHooks", - "lineno": 266969 - }, - { - "colno": 74, - "filename": "http://fe:3002/static/js/main.chunk.js", - "function": "?", - "in_app": true, - "lineno": 2600 - }, - { - "colno": 65, - "filename": "http://fe:3002/static/js/main.chunk.js", - "function": "useGetBooksQuery", - "lineno": 1299 - }, - { - "colno": 85, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "Module.useQuery", - "lineno": 8495 - }, - { - "colno": 83, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "useBaseQuery", - "in_app": true, - "lineno": 8656 - }, - { - "colno": 14, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "useDeepMemo", - "lineno": 8696 - }, - { - "colno": 55, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "?", - "lineno": 8657 - }, - { - "colno": 47, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "QueryData.execute", - "in_app": true, - "lineno": 7883 - }, - { - "colno": 23, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "QueryData.getExecuteResult", - "lineno": 7944 - }, - { - "colno": 19, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "QueryData._this.getQueryResult", - "lineno": 7790 - }, - { - "colno": 24, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "new ApolloError", - "in_app": true, - "lineno": 5164 - } - ] - }, - "timestamp": "2021-09-30T10:46:17.680Z", - "trace": { - "trace_id": "abcd", - "span_id": "def" - }, - "context": { - "component": "ReactErrorBoundary", - "ReactError": "Annoying Error" - } - } - ], - "measurements": [ - { - "type": "foobar", - "values": { - "ttfp": 20.12, - "ttfcp": 22.12, - "ttfb": 14 - }, - "timestamp": "2021-09-30T10:46:17.680Z", - "trace": { - "trace_id": "abcd", - "span_id": "def" - }, - "context": { - "hello": "world" - } - } - ], - "events": [ - { - "name": "click_login_button", - "domain": "frontend", - "attributes": { - "foo": "bar", - "one": "two" - }, - "timestamp": "2021-09-30T10:46:17.680Z", - "trace": { - "trace_id": "abcd", - "span_id": "def" - } - }, - { - "name": "click_reset_password_button", - "timestamp": "2021-09-30T10:46:17.680Z" - } - ], - "meta": { - "sdk": { - "name": "grafana-frontend-agent", - "version": "1.0.0" - }, - "app": { - "name": "testapp", - "release": "0.8.2", - "version": "abcdefg", - "environment": "production" - }, - "user": { - "username": "domasx2", - "id": "123", - "email": "geralt@kaermorhen.org", - "attributes": { - "foo": "bar" - } - }, - "session": { - "id": "abcd", - "attributes": { - "time_elapsed": "100s" - } - }, - "page": { - "url": "https://example.com/page" - }, - "browser": { - "name": "chrome", - "version": "88.12.1", - "os": "linux", - "mobile": false - }, - "view": { - "name": "foobar" - } - }, - "traces": { - "resourceSpans": [ - { - "resource": { - "attributes": [ - { - "key": "host.name", - "value": { - "stringValue": "testHost" - } - } - ] - }, - "instrumentationLibrarySpans": [ - { - "instrumentationLibrary": { - "name": "name", - "version": "version" - }, - "spans": [ - { - "traceId": "", - "spanId": "", - "parentSpanId": "", - "name": "testSpan", - "status": {} - }, - { - "traceId": "", - "spanId": "", - "parentSpanId": "", - "name": "testSpan2", - "status": {} - } - ] - } - ] - } - ] - } -} diff --git a/internal/static/integrations/v2/app_agent_receiver/testdata/payload_2.json b/internal/static/integrations/v2/app_agent_receiver/testdata/payload_2.json deleted file mode 100644 index eb8b18e565..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/testdata/payload_2.json +++ /dev/null @@ -1,393 +0,0 @@ -{ - "logs": [ - { - "message": "opened pricing page", - "level": "info", - "context": { - "component": "AppRoot", - "page": "Pricing" - }, - "timestamp": "2021-09-30T10:46:17.680Z", - "trace": { - "trace_id": "abcd", - "span_id": "def" - } - }, - { - "message": "loading price list", - "level": "trace", - "context": { - "component": "AppRoot", - "page": "Pricing" - }, - "timestamp": "2021-09-30T10:46:17.680Z", - "trace": { - "trace_id": "abcd", - "span_id": "ghj" - } - } - ], - "exceptions": [ - { - "type": "Error", - "value": "Cannot read property 'find' of undefined", - "stacktrace": { - "frames": [ - { - "colno": 42, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "?", - "in_app": true, - "lineno": 8639 - }, - { - "colno": 9, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "dispatchAction", - "in_app": true, - "lineno": 268095 - }, - { - "colno": 13, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "scheduleUpdateOnFiber", - "in_app": true, - "lineno": 273726 - }, - { - "colno": 7, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "flushSyncCallbackQueue", - "in_app": true, - "lineno": 263362 - }, - { - "colno": 13, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "flushSyncCallbackQueueImpl", - "in_app": true, - "lineno": 263374 - }, - { - "colno": 14, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "runWithPriority$1", - "lineno": 263325 - }, - { - "colno": 16, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "unstable_runWithPriority", - "lineno": 291265 - }, - { - "colno": 30, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "?", - "lineno": 263379 - }, - { - "colno": 22, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "performSyncWorkOnRoot", - "lineno": 274126 - }, - { - "colno": 11, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "renderRootSync", - "lineno": 274509 - }, - { - "colno": 9, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "workLoopSync", - "lineno": 274543 - }, - { - "colno": 16, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "performUnitOfWork", - "lineno": 274606 - }, - { - "colno": 18, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "beginWork$1", - "in_app": true, - "lineno": 275746 - }, - { - "colno": 20, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "beginWork", - "lineno": 270944 - }, - { - "colno": 24, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "updateFunctionComponent", - "lineno": 269291 - }, - { - "colno": 22, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "renderWithHooks", - "lineno": 266969 - }, - { - "colno": 74, - "filename": "http://fe:3002/static/js/main.chunk.js", - "function": "?", - "in_app": true, - "lineno": 2600 - }, - { - "colno": 65, - "filename": "http://fe:3002/static/js/main.chunk.js", - "function": "useGetBooksQuery", - "lineno": 1299 - }, - { - "colno": 85, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "Module.useQuery", - "lineno": 8495 - }, - { - "colno": 83, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "useBaseQuery", - "in_app": true, - "lineno": 8656 - }, - { - "colno": 14, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "useDeepMemo", - "lineno": 8696 - }, - { - "colno": 55, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "?", - "lineno": 8657 - }, - { - "colno": 47, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "QueryData.execute", - "in_app": true, - "lineno": 7883 - }, - { - "colno": 23, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "QueryData.getExecuteResult", - "lineno": 7944 - }, - { - "colno": 19, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "QueryData._this.getQueryResult", - "lineno": 7790 - }, - { - "colno": 24, - "filename": "http://fe:3002/static/js/vendors~main.chunk.js", - "function": "new ApolloError", - "in_app": true, - "lineno": 5164 - } - ] - }, - "timestamp": "2021-09-30T10:46:17.680Z", - "trace": { - "trace_id": "abcd", - "span_id": "def" - } - } - ], - "measurements": [ - { - "values": { - "ttfp": 20.12, - "ttfcp": 22.12, - "ttfb": 14 - }, - "type": "page load", - "timestamp": "2021-09-30T10:46:17.680Z", - "trace": { - "trace_id": "abcd", - "span_id": "def" - } - } - ], - "meta": { - "sdk": { - "name": "grafana-frontend-agent", - "version": "1.0.0" - }, - "app": { - "name": "testapp", - "release": "0.8.2", - "version": "abcdefg", - "environment": "production" - }, - "user": { - "username": "domasx2", - "attributes": { - "foo": "bar" - } - }, - "session": { - "id": "abcd", - "attributes": { - "time_elapsed": "100s" - } - }, - "page": { - "url": "https://example.com/page" - }, - "browser": { - "name": "chrome", - "version": "88.12.1", - "os": "linux", - "mobile": false - }, - "view": { - "name": "foobar" - } - }, - "traces": { - "resourceSpans": [ - { - "resource": { - "attributes": [ - { - "key": "service.name", - "value": { - "stringValue": "unknown_service" - } - }, - { - "key": "telemetry.sdk.language", - "value": { - "stringValue": "webjs" - } - }, - { - "key": "telemetry.sdk.name", - "value": { - "stringValue": "opentelemetry" - } - }, - { - "key": "telemetry.sdk.version", - "value": { - "stringValue": "1.0.1" - } - } - ], - "droppedAttributesCount": 0 - }, - "instrumentationLibrarySpans": [ - { - "spans": [ - { - "traceId": "2d6f18da2663c7e477df23d8a8ad95b7", - "spanId": "50e64e3fac969cbb", - "parentSpanId": "9d9da6529d56706c", - "name": "documentFetch", - "kind": 1, - "startTimeUnixNano": 1646228314336100000, - "endTimeUnixNano": 1646228314351000000, - "attributes": [ - { - "key": "component", - "value": { - "stringValue": "document-load" - } - }, - { - "key": "http.response_content_length", - "value": { - "intValue": 1326 - } - } - ], - "droppedAttributesCount": 0, - "events": [ - { - "timeUnixNano": 1646228314336100000, - "name": "fetchStart", - "attributes": [], - "droppedAttributesCount": 0 - }, - { - "timeUnixNano": 1646228314342000000, - "name": "domainLookupStart", - "attributes": [], - "droppedAttributesCount": 0 - }, - { - "timeUnixNano": 1646228314342000000, - "name": "domainLookupEnd", - "attributes": [], - "droppedAttributesCount": 0 - }, - { - "timeUnixNano": 1646228314342000000, - "name": "connectStart", - "attributes": [], - "droppedAttributesCount": 0 - }, - { - "timeUnixNano": 1646228314330100000, - "name": "secureConnectionStart", - "attributes": [], - "droppedAttributesCount": 0 - }, - { - "timeUnixNano": 1646228314342500000, - "name": "connectEnd", - "attributes": [], - "droppedAttributesCount": 0 - }, - { - "timeUnixNano": 1646228314342700000, - "name": "requestStart", - "attributes": [], - "droppedAttributesCount": 0 - }, - { - "timeUnixNano": 1646228314347000000, - "name": "responseStart", - "attributes": [], - "droppedAttributesCount": 0 - }, - { - "timeUnixNano": 1646228314351000000, - "name": "responseEnd", - "attributes": [], - "droppedAttributesCount": 0 - } - ], - "droppedEventsCount": 0, - "status": { - "code": 0 - }, - "links": [], - "droppedLinksCount": 0 - } - ], - "instrumentationLibrary": { - "name": "@opentelemetry/instrumentation-document-load", - "version": "0.27.1" - } - } - ] - } - ] - } -} diff --git a/internal/static/integrations/v2/app_agent_receiver/traces_exporter.go b/internal/static/integrations/v2/app_agent_receiver/traces_exporter.go deleted file mode 100644 index 941f829452..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/traces_exporter.go +++ /dev/null @@ -1,41 +0,0 @@ -package app_agent_receiver - -import ( - "context" - - "go.opentelemetry.io/collector/consumer" -) - -type tracesConsumerGetter func() (consumer.Traces, error) - -// TracesExporter will send traces to a traces instance -type TracesExporter struct { - getTracesConsumer tracesConsumerGetter -} - -// NewTracesExporter creates a trace exporter for the app agent receiver. -func NewTracesExporter(getTracesConsumer tracesConsumerGetter) AppAgentReceiverExporter { - return &TracesExporter{getTracesConsumer} -} - -// Name of the exporter, for logging purposes -func (te *TracesExporter) Name() string { - return "traces exporter" -} - -// Export implements the AppDataExporter interface -func (te *TracesExporter) Export(ctx context.Context, payload Payload) error { - if payload.Traces == nil { - return nil - } - consumer, err := te.getTracesConsumer() - if err != nil { - return err - } - return consumer.ConsumeTraces(ctx, payload.Traces.Traces) -} - -// Static typecheck tests -var ( - _ AppAgentReceiverExporter = (*TracesExporter)(nil) -) diff --git a/internal/static/integrations/v2/app_agent_receiver/traces_test.go b/internal/static/integrations/v2/app_agent_receiver/traces_test.go deleted file mode 100644 index 3e46227c45..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/traces_test.go +++ /dev/null @@ -1,53 +0,0 @@ -package app_agent_receiver - -import ( - "context" - "errors" - "testing" - - "github.com/stretchr/testify/require" - "go.opentelemetry.io/collector/consumer" - "go.opentelemetry.io/collector/pdata/ptrace" -) - -type mockTracesConsumer struct { - consumed []ptrace.Traces -} - -func (c *mockTracesConsumer) Capabilities() consumer.Capabilities { - return consumer.Capabilities{MutatesData: false} -} - -func (c *mockTracesConsumer) ConsumeTraces(ctx context.Context, td ptrace.Traces) error { - c.consumed = append(c.consumed, td) - return nil -} - -func Test_exportTraces_success(t *testing.T) { - ctx := context.Background() - tracesConsumer := &mockTracesConsumer{} - exporter := NewTracesExporter(func() (consumer.Traces, error) { return tracesConsumer, nil }) - payload := loadTestPayload(t) - err := exporter.Export(ctx, payload) - require.NoError(t, err) - require.Len(t, tracesConsumer.consumed, 1) -} - -func Test_exportTraces_noTracesInpayload(t *testing.T) { - ctx := context.Background() - tracesConsumer := &mockTracesConsumer{consumed: nil} - exporter := NewTracesExporter(func() (consumer.Traces, error) { return tracesConsumer, nil }) - payload := loadTestPayload(t) - payload.Traces = nil - err := exporter.Export(ctx, payload) - require.NoError(t, err) - require.Len(t, tracesConsumer.consumed, 0) -} - -func Test_exportTraces_noConsumer(t *testing.T) { - ctx := context.Background() - exporter := NewTracesExporter(func() (consumer.Traces, error) { return nil, errors.New("it dont work") }) - payload := loadTestPayload(t) - err := exporter.Export(ctx, payload) - require.Error(t, err, "it don't work") -} diff --git a/internal/static/integrations/v2/app_agent_receiver/utils.go b/internal/static/integrations/v2/app_agent_receiver/utils.go deleted file mode 100644 index e716cb2043..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/utils.go +++ /dev/null @@ -1,84 +0,0 @@ -package app_agent_receiver - -import ( - "fmt" - "sort" - - "github.com/grafana/agent/internal/util/wildcard" - om "github.com/wk8/go-ordered-map" -) - -// KeyVal is an ordered map of string to interface -type KeyVal = om.OrderedMap - -// NewKeyVal creates new empty KeyVal -func NewKeyVal() *KeyVal { - return om.New() -} - -// KeyValFromMap will instantiate KeyVal from a map[string]string -func KeyValFromMap(m map[string]string) *KeyVal { - kv := NewKeyVal() - keys := make([]string, 0, len(m)) - for k := range m { - keys = append(keys, k) - } - sort.Strings(keys) - for _, k := range keys { - KeyValAdd(kv, k, m[k]) - } - return kv -} - -// MergeKeyVal will merge source in target -func MergeKeyVal(target *KeyVal, source *KeyVal) { - for el := source.Oldest(); el != nil; el = el.Next() { - target.Set(el.Key, el.Value) - } -} - -// MergeKeyValWithPrefix will merge source in target, adding a prefix to each key being merged in -func MergeKeyValWithPrefix(target *KeyVal, source *KeyVal, prefix string) { - for el := source.Oldest(); el != nil; el = el.Next() { - target.Set(fmt.Sprintf("%s%s", prefix, el.Key), el.Value) - } -} - -// KeyValAdd adds a key + value string pair to kv -func KeyValAdd(kv *KeyVal, key string, value string) { - if len(value) > 0 { - kv.Set(key, value) - } -} - -// KeyValToInterfaceSlice converts KeyVal to []interface{}, typically used for logging -func KeyValToInterfaceSlice(kv *KeyVal) []interface{} { - slice := make([]interface{}, kv.Len()*2) - idx := 0 - for el := kv.Oldest(); el != nil; el = el.Next() { - slice[idx] = el.Key - idx++ - slice[idx] = el.Value - idx++ - } - return slice -} - -// KeyValToInterfaceMap converts KeyVal to map[string]interface -func KeyValToInterfaceMap(kv *KeyVal) map[string]interface{} { - retv := make(map[string]interface{}) - for el := kv.Oldest(); el != nil; el = el.Next() { - retv[fmt.Sprint(el.Key)] = el.Value - } - return retv -} - -// URLMatchesOrigins returns true if URL matches at least one of origin prefix. Wildcard '*' and '?' supported -func urlMatchesOrigins(URL string, origins []string) bool { - for _, origin := range origins { - if origin == "*" || wildcard.Match(origin+"*", URL) { - return true - } - } - return false -} diff --git a/internal/static/integrations/v2/app_agent_receiver/utils_test.go b/internal/static/integrations/v2/app_agent_receiver/utils_test.go deleted file mode 100644 index 6e32dd9626..0000000000 --- a/internal/static/integrations/v2/app_agent_receiver/utils_test.go +++ /dev/null @@ -1,36 +0,0 @@ -package app_agent_receiver - -import ( - "testing" - - "github.com/stretchr/testify/require" -) - -func testCase(t *testing.T, URL string, origins []string, expected bool) { - result := urlMatchesOrigins(URL, origins) - require.Equal(t, expected, result) -} - -func Test_Origin_WildcardAlwaysMatches(t *testing.T) { - testCase(t, "http://example.com/static/foo.js", []string{"https://foo.com/", "*"}, true) -} - -func Test_Origin_Matches(t *testing.T) { - testCase(t, "http://example.com/static/foo.js", []string{"https://foo.com/", "http://example.com/"}, true) -} - -func Test_Origin_MatchesWithWildcard(t *testing.T) { - testCase(t, "http://foo.bar.com/static/foo.js", []string{"https://foo.com/", "http://*.bar.com/"}, true) -} - -func Test_Origin_DoesNotMatch(t *testing.T) { - testCase(t, "http://example.com/static/foo.js", []string{"https://foo.com/", "http://test.com/"}, false) -} - -func Test_Origin_DoesNotMatchWithWildcard(t *testing.T) { - testCase(t, "http://foo.bar.com/static/foo.js", []string{"https://foo.com/", "http://*.baz.com/"}, false) -} - -func Test_Origin_MatchesWithWildcardNoProtocol(t *testing.T) { - testCase(t, "http://foo.bar.com/static/foo.js", []string{"https://foo.com/", "*.bar.com/"}, true) -} diff --git a/internal/static/integrations/v2/autoscrape/appender.go b/internal/static/integrations/v2/autoscrape/appender.go deleted file mode 100644 index 04be1c7d6a..0000000000 --- a/internal/static/integrations/v2/autoscrape/appender.go +++ /dev/null @@ -1,42 +0,0 @@ -package autoscrape - -import ( - "fmt" - - "github.com/prometheus/prometheus/model/exemplar" - "github.com/prometheus/prometheus/model/histogram" - "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/model/metadata" - "github.com/prometheus/prometheus/storage" -) - -// failedAppender is used as the appender when an instance couldn't be found. -type failedAppender struct { - instanceName string -} - -var _ storage.Appender = (*failedAppender)(nil) - -func (fa *failedAppender) Append(ref storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) { - return 0, fmt.Errorf("no such instance %s", fa.instanceName) -} - -func (fa *failedAppender) Commit() error { - return fmt.Errorf("no such instance %s", fa.instanceName) -} - -func (fa *failedAppender) Rollback() error { - return fmt.Errorf("no such instance %s", fa.instanceName) -} - -func (fa *failedAppender) AppendExemplar(ref storage.SeriesRef, l labels.Labels, e exemplar.Exemplar) (storage.SeriesRef, error) { - return 0, fmt.Errorf("no such instance %s", fa.instanceName) -} - -func (fa *failedAppender) UpdateMetadata(ref storage.SeriesRef, l labels.Labels, m metadata.Metadata) (storage.SeriesRef, error) { - return 0, fmt.Errorf("no such instance %s", fa.instanceName) -} - -func (fa *failedAppender) AppendHistogram(ref storage.SeriesRef, l labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { - return 0, fmt.Errorf("no such instance %s", fa.instanceName) -} diff --git a/internal/static/integrations/v2/autoscrape/autoscrape.go b/internal/static/integrations/v2/autoscrape/autoscrape.go index 8d1bd02ae0..5415f269ac 100644 --- a/internal/static/integrations/v2/autoscrape/autoscrape.go +++ b/internal/static/integrations/v2/autoscrape/autoscrape.go @@ -2,22 +2,9 @@ package autoscrape import ( - "context" - "sync" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/grafana/agent/internal/static/metrics" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/static/server" - "github.com/oklog/run" - config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" prom_config "github.com/prometheus/prometheus/config" - "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/model/relabel" - "github.com/prometheus/prometheus/scrape" - "github.com/prometheus/prometheus/storage" ) // DefaultGlobal holds default values for Global. @@ -53,262 +40,9 @@ type Config struct { MetricRelabelConfigs []*relabel.Config `yaml:"metric_relabel_configs,omitempty"` // Relabel individual autoscrape metrics } -// InstanceStore is used to find instances to send metrics to. It is a subset -// of the pkg/metrics/instance.Manager interface. -type InstanceStore interface { - // GetInstance retrieves a ManagedInstance by name. - GetInstance(name string) (instance.ManagedInstance, error) -} - // ScrapeConfig bind a Prometheus scrape config with an instance to send // scraped metrics to. type ScrapeConfig struct { Instance string Config prom_config.ScrapeConfig } - -// Scraper is a metrics autoscraper. -type Scraper struct { - ctx context.Context - cancel context.CancelFunc - - log log.Logger - is InstanceStore - - // Prometheus doesn't pass contextual information at scrape time that could - // be used to change the behavior of generating an appender. This means that - // it's not yet possible for us to just run a single SD + scrape manager for - // all of our integrations, and we instead need to launch a pair of each for - // every instance we're writing to. - - iscrapersMut sync.RWMutex - iscrapers map[string]*instanceScraper - dialerFunc server.DialContextFunc -} - -// NewScraper creates a new autoscraper. Scraper will run until Stop is called. -// Instances to send scraped metrics to will be looked up via im. Scraping will -// use the provided dialerFunc to make connections if non-nil. -func NewScraper(l log.Logger, is InstanceStore, dialerFunc server.DialContextFunc) *Scraper { - l = log.With(l, "component", "autoscraper") - - ctx, cancel := context.WithCancel(context.Background()) - - s := &Scraper{ - ctx: ctx, - cancel: cancel, - - log: l, - is: is, - iscrapers: map[string]*instanceScraper{}, - dialerFunc: dialerFunc, - } - return s -} - -// ApplyConfig will apply the given jobs. An error will be returned for any -// jobs that failed to be applied. -func (s *Scraper) ApplyConfig(jobs []*ScrapeConfig) error { - s.iscrapersMut.Lock() - defer s.iscrapersMut.Unlock() - - var firstError error - saveError := func(e error) { - if firstError == nil { - firstError = e - } - } - - // Shard our jobs by target instance. - shardedJobs := map[string][]*prom_config.ScrapeConfig{} - for _, j := range jobs { - _, err := s.is.GetInstance(j.Instance) - if err != nil { - level.Error(s.log).Log("msg", "cannot autoscrape integration", "name", j.Config.JobName, "err", err) - saveError(err) - continue - } - - shardedJobs[j.Instance] = append(shardedJobs[j.Instance], &j.Config) - } - - // Then pass the jobs to instanceScraper, creating them if we need to. - for instance, jobs := range shardedJobs { - is, ok := s.iscrapers[instance] - if !ok { - is = newInstanceScraper(s.ctx, s.log, s.is, instance, config_util.DialContextFunc(s.dialerFunc)) - s.iscrapers[instance] = is - } - if err := is.ApplyConfig(jobs); err != nil { - // Not logging here; is.ApplyConfig already logged the errors. - saveError(err) - } - } - - // Garbage collect: If there's a key in s.scrapers that wasn't in - // shardedJobs, stop that unused scraper. - for instance, is := range s.iscrapers { - _, current := shardedJobs[instance] - if !current { - is.Stop() - delete(s.iscrapers, instance) - } - } - - return firstError -} - -// TargetsActive returns the set of active scrape targets for all target -// instances. -func (s *Scraper) TargetsActive() map[string]metrics.TargetSet { - s.iscrapersMut.RLock() - defer s.iscrapersMut.RUnlock() - - allTargets := make(map[string]metrics.TargetSet, len(s.iscrapers)) - for instance, is := range s.iscrapers { - allTargets[instance] = is.sm.TargetsActive() - } - return allTargets -} - -// Stop stops the Scraper. -func (s *Scraper) Stop() { - s.iscrapersMut.Lock() - defer s.iscrapersMut.Unlock() - - for instance, is := range s.iscrapers { - is.Stop() - delete(s.iscrapers, instance) - } - - s.cancel() -} - -// instanceScraper is a Scraper which always sends to the same instance. -type instanceScraper struct { - log log.Logger - - sd *discovery.Manager - sm *scrape.Manager - cancel context.CancelFunc - exited chan struct{} -} - -// newInstanceScraper runs a new instanceScraper. Must be stopped by calling -// Stop. -func newInstanceScraper( - ctx context.Context, - l log.Logger, - s InstanceStore, - instanceName string, - dialerFunc config_util.DialContextFunc, -) *instanceScraper { - - ctx, cancel := context.WithCancel(ctx) - l = log.With(l, "target_instance", instanceName) - - sdOpts := []func(*discovery.Manager){ - discovery.Name("autoscraper/" + instanceName), - discovery.HTTPClientOptions( - // If dialerFunc is nil, scrape.NewManager will use Go's default dialer. - config_util.WithDialContextFunc(dialerFunc), - ), - } - sd := discovery.NewManager(ctx, l, sdOpts...) - sm := scrape.NewManager(&scrape.Options{ - HTTPClientOptions: []config_util.HTTPClientOption{ - // If dialerFunc is nil, scrape.NewManager will use Go's default dialer. - config_util.WithDialContextFunc(dialerFunc), - }, - }, l, &agentAppender{ - inst: instanceName, - is: s, - }) - - is := &instanceScraper{ - log: l, - - sd: sd, - sm: sm, - cancel: cancel, - exited: make(chan struct{}), - } - - go is.run() - return is -} - -type agentAppender struct { - inst string - is InstanceStore -} - -func (aa *agentAppender) Appender(ctx context.Context) storage.Appender { - mi, err := aa.is.GetInstance(aa.inst) - if err != nil { - return &failedAppender{instanceName: aa.inst} - } - return mi.Appender(ctx) -} - -func (is *instanceScraper) run() { - defer close(is.exited) - var rg run.Group - - rg.Add(func() error { - // Service discovery will stop whenever our parent context is canceled or - // if is.cancel is called. - err := is.sd.Run() - if err != nil { - level.Error(is.log).Log("msg", "autoscrape service discovery exited with error", "err", err) - } - return err - }, func(_ error) { - is.cancel() - }) - - rg.Add(func() error { - err := is.sm.Run(is.sd.SyncCh()) - if err != nil { - level.Error(is.log).Log("msg", "autoscrape scrape manager exited with error", "err", err) - } - return err - }, func(_ error) { - is.sm.Stop() - }) - - _ = rg.Run() -} - -func (is *instanceScraper) ApplyConfig(jobs []*prom_config.ScrapeConfig) error { - var firstError error - saveError := func(e error) { - if firstError == nil && e != nil { - firstError = e - } - } - - var ( - scrapeConfigs = make([]*prom_config.ScrapeConfig, 0, len(jobs)) - sdConfigs = make(map[string]discovery.Configs, len(jobs)) - ) - for _, job := range jobs { - sdConfigs[job.JobName] = job.ServiceDiscoveryConfigs - scrapeConfigs = append(scrapeConfigs, job) - } - if err := is.sd.ApplyConfig(sdConfigs); err != nil { - level.Error(is.log).Log("msg", "error when applying SD to autoscraper", "err", err) - saveError(err) - } - if err := is.sm.ApplyConfig(&prom_config.Config{ScrapeConfigs: scrapeConfigs}); err != nil { - level.Error(is.log).Log("msg", "error when applying jobs to scraper", "err", err) - saveError(err) - } - - return firstError -} - -func (is *instanceScraper) Stop() { - is.cancel() - <-is.exited -} diff --git a/internal/static/integrations/v2/autoscrape/autoscrape_test.go b/internal/static/integrations/v2/autoscrape/autoscrape_test.go deleted file mode 100644 index 9aaa148ecd..0000000000 --- a/internal/static/integrations/v2/autoscrape/autoscrape_test.go +++ /dev/null @@ -1,118 +0,0 @@ -package autoscrape - -import ( - "context" - "net/http/httptest" - "testing" - "time" - - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/util" - "github.com/prometheus/client_golang/prometheus/promhttp" - "github.com/prometheus/common/model" - prom_config "github.com/prometheus/prometheus/config" - "github.com/prometheus/prometheus/discovery" - "github.com/prometheus/prometheus/model/exemplar" - "github.com/prometheus/prometheus/model/histogram" - "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/model/metadata" - "github.com/prometheus/prometheus/storage" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "go.uber.org/atomic" -) - -// TestAutoscrape is a basic end-to-end test of the autoscraper. -func TestAutoscrape(t *testing.T) { - srv := httptest.NewServer(promhttp.Handler()) - defer srv.Close() - - wt := util.NewWaitTrigger() - - noop := noOpAppender - noop.AppendFunc = func(ref storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) { - wt.Trigger() - return noOpAppender.AppendFunc(ref, l, t, v) - } - - im := instance.MockManager{ - GetInstanceFunc: func(name string) (instance.ManagedInstance, error) { - assert.Equal(t, t.Name(), name) - return &mockInstance{app: &noop}, nil - }, - } - as := NewScraper(util.TestLogger(t), im, nil) - defer as.Stop() - - err := as.ApplyConfig([]*ScrapeConfig{{ - Instance: t.Name(), - Config: func() prom_config.ScrapeConfig { - cfg := prom_config.DefaultScrapeConfig - cfg.JobName = t.Name() - cfg.ScrapeInterval = model.Duration(time.Second) - cfg.ScrapeTimeout = model.Duration(time.Second / 2) - cfg.ServiceDiscoveryConfigs = discovery.Configs{ - discovery.StaticConfig{{ - Targets: []model.LabelSet{{ - model.AddressLabel: model.LabelValue(srv.Listener.Addr().String()), - }}, - Source: t.Name(), - }}, - } - return cfg - }(), - }}) - require.NoError(t, err, "failed to apply configs") - - // NOTE(rfratto): SD won't start sending targets until after 5 seconds. We'll - // need to at least wait that long. - time.Sleep(5 * time.Second) - - require.NoError(t, wt.Wait(5*time.Second), "timed out waiting for scrape") -} - -var globalRef atomic.Uint64 -var noOpAppender = mockAppender{ - AppendFunc: func(ref storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) { - return storage.SeriesRef(globalRef.Inc()), nil - }, - CommitFunc: func() error { return nil }, - RollbackFunc: func() error { return nil }, - AppendExemplarFunc: func(ref storage.SeriesRef, l labels.Labels, e exemplar.Exemplar) (storage.SeriesRef, error) { - return storage.SeriesRef(globalRef.Inc()), nil - }, - AppendHistogramFunc: func(ref storage.SeriesRef, l labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { - return storage.SeriesRef(globalRef.Inc()), nil - }, -} - -type mockAppender struct { - AppendFunc func(ref storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) - CommitFunc func() error - RollbackFunc func() error - AppendExemplarFunc func(ref storage.SeriesRef, l labels.Labels, e exemplar.Exemplar) (storage.SeriesRef, error) - UpdateMetadataFunc func(ref storage.SeriesRef, l labels.Labels, m metadata.Metadata) (storage.SeriesRef, error) - AppendHistogramFunc func(ref storage.SeriesRef, l labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) -} - -func (ma *mockAppender) Append(ref storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) { - return ma.AppendFunc(ref, l, t, v) -} -func (ma *mockAppender) Commit() error { return ma.CommitFunc() } -func (ma *mockAppender) Rollback() error { return ma.RollbackFunc() } -func (ma *mockAppender) AppendExemplar(ref storage.SeriesRef, l labels.Labels, e exemplar.Exemplar) (storage.SeriesRef, error) { - return ma.AppendExemplarFunc(ref, l, e) -} -func (ma *mockAppender) UpdateMetadata(ref storage.SeriesRef, l labels.Labels, m metadata.Metadata) (storage.SeriesRef, error) { - return ma.UpdateMetadataFunc(ref, l, m) -} -func (ma *mockAppender) AppendHistogram(ref storage.SeriesRef, l labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { - return ma.AppendHistogramFunc(ref, l, t, h, fh) -} - -type mockInstance struct { - instance.NoOpInstance - app storage.Appender -} - -func (mi *mockInstance) Appender(ctx context.Context) storage.Appender { return mi.app } diff --git a/internal/static/integrations/v2/controller.go b/internal/static/integrations/v2/controller.go deleted file mode 100644 index b01a666119..0000000000 --- a/internal/static/integrations/v2/controller.go +++ /dev/null @@ -1,444 +0,0 @@ -package integrations - -import ( - "context" - "errors" - "fmt" - "net/http" - "net/url" - "path" - "sort" - "strings" - "sync" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/gorilla/mux" - "github.com/grafana/agent/internal/static/integrations/v2/autoscrape" - "github.com/prometheus/prometheus/discovery" - http_sd "github.com/prometheus/prometheus/discovery/http" - "go.uber.org/atomic" -) - -// controllerConfig holds a set of integration configs. -type controllerConfig []Config - -// controller manages a set of integrations. -type controller struct { - logger log.Logger - - mut sync.Mutex - cfg controllerConfig - globals Globals - integrations []*controlledIntegration // Running integrations - - runIntegrations chan []*controlledIntegration // Schedule integrations to run -} - -// newController creates a new Controller. Controller is intended to be -// embedded inside of integrations that may want to multiplex other -// integrations. -func newController(l log.Logger, cfg controllerConfig, globals Globals) (*controller, error) { - c := &controller{ - logger: l, - runIntegrations: make(chan []*controlledIntegration, 1), - } - if err := c.UpdateController(cfg, globals); err != nil { - return nil, err - } - return c, nil -} - -// run starts the controller and blocks until ctx is canceled. -func (c *controller) run(ctx context.Context) { - pool := newWorkerPool(ctx, c.logger) - defer pool.Close() - - for { - select { - case <-ctx.Done(): - level.Debug(c.logger).Log("msg", "controller exiting") - return - case newIntegrations := <-c.runIntegrations: - pool.Reload(newIntegrations) - } - } -} - -// controlledIntegration is a running Integration. A running integration is -// identified uniquely by its id. -type controlledIntegration struct { - id integrationID - i Integration - c Config // Config that generated i. Used for changing to see if a config changed. - running atomic.Bool -} - -func (ci *controlledIntegration) Running() bool { - return ci.running.Load() -} - -// integrationID uses a tuple of Name and Identifier to uniquely identify an -// integration. -type integrationID struct{ Name, Identifier string } - -func (id integrationID) String() string { - return fmt.Sprintf("%s/%s", id.Name, id.Identifier) -} - -// UpdateController updates the Controller with new Controller and -// IntegrationOptions. -// -// UpdateController updates running integrations. Extensions can be -// recalculated by calling relevant methods like Handler or Targets. -func (c *controller) UpdateController(cfg controllerConfig, globals Globals) error { - c.mut.Lock() - defer c.mut.Unlock() - - // Ensure that no singleton integration is defined twice - var ( - duplicatedSingletons []string - singletonSet = make(map[string]struct{}) - ) - for _, cfg := range cfg { - t, _ := RegisteredType(cfg) - if t != TypeSingleton { - continue - } - - if _, exists := singletonSet[cfg.Name()]; exists { - duplicatedSingletons = append(duplicatedSingletons, cfg.Name()) - continue - } - singletonSet[cfg.Name()] = struct{}{} - } - if len(duplicatedSingletons) == 1 { - return fmt.Errorf("integration %q may only be defined once", duplicatedSingletons[0]) - } else if len(duplicatedSingletons) > 1 { - list := strings.Join(duplicatedSingletons, ", ") - return fmt.Errorf("the following integrations may only be defined once each: %s", list) - } - - integrationIDMap := map[integrationID]struct{}{} - - integrations := make([]*controlledIntegration, 0, len(cfg)) - -NextConfig: - for _, ic := range cfg { - name := ic.Name() - - identifier, err := ic.Identifier(globals) - if err != nil { - return fmt.Errorf("could not build identifier for integration %q: %w", name, err) - } - - if err := ic.ApplyDefaults(globals); err != nil { - return fmt.Errorf("failed to apply defaults for %s/%s: %w", name, identifier, err) - } - - id := integrationID{Name: name, Identifier: identifier} - if _, exist := integrationIDMap[id]; exist { - return fmt.Errorf("multiple instance names %q in integration %q", identifier, name) - } - integrationIDMap[id] = struct{}{} - - // Now that we know the ID for an integration, we can check to see if it's - // running and can be dynamically updated. - for _, ci := range c.integrations { - if ci.id != id { - continue - } - - // If the configs haven't changed, then we don't need to do anything. - if CompareConfigs(ci.c, ic) { - integrations = append(integrations, ci) - continue NextConfig - } - - if ui, ok := ci.i.(UpdateIntegration); ok { - if err := ui.ApplyConfig(ic, globals); errors.Is(err, ErrInvalidUpdate) { - level.Warn(c.logger).Log("msg", "failed to dynamically update integration; will recreate", "integration", name, "instance", identifier, "err', err") - break - } else if err != nil { - return fmt.Errorf("failed to update %s integration %q: %w", name, identifier, err) - } else { - // Update succeeded; re-use the running one and go to the next - // integration to process. - integrations = append(integrations, ci) - continue NextConfig - } - } - - // We found the integration to update: we can stop this loop now. - break - } - - logger := log.With(c.logger, "integration", name, "instance", identifier) - integration, err := ic.NewIntegration(logger, globals) - if err != nil { - return fmt.Errorf("failed to construct %s integration %q: %w", name, identifier, err) - } - - // Create a new controlled integration. - integrations = append(integrations, &controlledIntegration{ - id: id, - i: integration, - c: ic, - }) - } - - // Schedule integrations to run - c.runIntegrations <- integrations - - c.cfg = cfg - c.globals = globals - c.integrations = integrations - return nil -} - -// Handler returns an HTTP handler for the controller and its integrations. -// Handler will pass through requests to other running integrations. Handler -// always returns an http.Handler regardless of error. -// -// Handler is expensive to compute and should only be done after reloading the -// config. -func (c *controller) Handler(prefix string) (http.Handler, error) { - var firstErr error - saveFirstErr := func(err error) { - if firstErr == nil { - firstErr = err - } - } - - r := mux.NewRouter() - - err := c.forEachIntegration(prefix, func(ci *controlledIntegration, iprefix string) { - id := ci.id - - i, ok := ci.i.(HTTPIntegration) - if !ok { - return - } - - handler, err := i.Handler(iprefix + "/") - if err != nil { - saveFirstErr(fmt.Errorf("could not generate HTTP handler for %s integration %q: %w", id.Name, id.Identifier, err)) - return - } else if handler == nil { - return - } - - // Anything that matches the integrationPrefix should be passed to the handler. - // The reason these two are separated is if you have two instance names and one is a prefix of another - // ie localhost and localhost2, localhost2 will never get called because localhost will always get precedence - // add / fixes this, but to keep old behavior we need to ensure /localhost and localhost2 also work, hence - // the second handlefunc below this one. https://github.com/grafana/agent/issues/1718 - hfunc := func(rw http.ResponseWriter, r *http.Request) { - if !ci.Running() { - http.Error(rw, fmt.Sprintf("%s integration intance %q not running", id.Name, id.Identifier), http.StatusServiceUnavailable) - return - } - handler.ServeHTTP(rw, r) - } - r.PathPrefix(iprefix + "/").HandlerFunc(hfunc) - // Handle calling the iprefix itself - r.HandleFunc(iprefix, hfunc) - }) - if err != nil { - level.Warn(c.logger).Log("msg", "error when iterating over integrations to build HTTP handlers", "err", err) - } - - // TODO(rfratto): navigation page for exact prefix match - - return r, firstErr -} - -// forEachIntegration calculates the prefix for each integration and calls f. -// prefix will not end in /. -func (c *controller) forEachIntegration(basePrefix string, f func(ci *controlledIntegration, iprefix string)) error { - c.mut.Lock() - defer c.mut.Unlock() - - // Pre-populate a mapping of integration name -> identifier. If there are - // two instances of the same integration, we want to ensure unique routing. - // - // This special logic is done for backwards compatibility with the original - // design of integrations. - identifiersMap := map[string][]string{} - for _, i := range c.integrations { - identifiersMap[i.id.Name] = append(identifiersMap[i.id.Name], i.id.Identifier) - } - - usedPrefixes := map[string]struct{}{} - - for _, ci := range c.integrations { - id := ci.id - multipleInstances := len(identifiersMap[id.Name]) > 1 - - var integrationPrefix string - if multipleInstances { - // i.e., /integrations/mysqld_exporter/server-a - integrationPrefix = path.Join(basePrefix, id.Name, id.Identifier) - } else { - // i.e., /integrations/node_exporter - integrationPrefix = path.Join(basePrefix, id.Name) - } - - f(ci, integrationPrefix) - - if _, exist := usedPrefixes[integrationPrefix]; exist { - return fmt.Errorf("BUG: duplicate integration prefix %q", integrationPrefix) - } - usedPrefixes[integrationPrefix] = struct{}{} - } - return nil -} - -// Targets returns the current set of targets across all integrations. Use opts -// to customize which targets are returned. -func (c *controller) Targets(ep Endpoint, opts TargetOptions) []*targetGroup { - // Grab the integrations as fast as possible. We don't want to spend too much - // time holding the mutex. - type prefixedMetricsIntegration struct { - id integrationID - i MetricsIntegration - ep Endpoint - } - var mm []prefixedMetricsIntegration - - err := c.forEachIntegration(ep.Prefix, func(ci *controlledIntegration, iprefix string) { - // Best effort liveness check. They might stop running when we request - // their targets, which is fine, but we should save as much work as we - // can. - if !ci.Running() { - return - } - if mi, ok := ci.i.(MetricsIntegration); ok { - ep := Endpoint{Host: ep.Host, Prefix: iprefix} - mm = append(mm, prefixedMetricsIntegration{id: ci.id, i: mi, ep: ep}) - } - }) - if err != nil { - level.Warn(c.logger).Log("msg", "error when iterating over integrations to get targets", "err", err) - } - - var tgs []*targetGroup - for _, mi := range mm { - // If we're looking for a subset of integrations, filter out anything that doesn't match. - if len(opts.Integrations) > 0 && !stringSliceContains(opts.Integrations, mi.id.Name) { - continue - } - // If we're looking for a specific instance, filter out anything that doesn't match. - if opts.Instance != "" && mi.id.Identifier != opts.Instance { - continue - } - - for _, tgt := range mi.i.Targets(mi.ep) { - tgs = append(tgs, (*targetGroup)(tgt)) - } - } - sort.Slice(tgs, func(i, j int) bool { - return tgs[i].Source < tgs[j].Source - }) - return tgs -} - -func stringSliceContains(ss []string, s string) bool { - for _, check := range ss { - if check == s { - return true - } - } - return false -} - -// TargetOptions controls which targets should be returned by the subsystem. -type TargetOptions struct { - // Integrations is the set of integrations to return. An empty slice will - // default to returning all integrations. - Integrations []string - // Instance matches a specific instance from all integrations. An empty - // string will match any instance. - Instance string -} - -// TargetOptionsFromParams creates TargetOptions from parsed URL query parameters. -func TargetOptionsFromParams(u url.Values) (TargetOptions, error) { - var to TargetOptions - - rawIntegrations := u.Get("integrations") - if rawIntegrations != "" { - rawIntegrations, err := url.QueryUnescape(rawIntegrations) - if err != nil { - return to, fmt.Errorf("invalid value for integrations: %w", err) - } - to.Integrations = strings.Split(rawIntegrations, ",") - } - - rawInstance := u.Get("instance") - if rawInstance != "" { - rawInstance, err := url.QueryUnescape(rawInstance) - if err != nil { - return to, fmt.Errorf("invalid value for instance: %w", err) - } - to.Instance = rawInstance - } - - return to, nil -} - -// ToParams will convert to into URL query parameters. -func (to TargetOptions) ToParams() url.Values { - p := make(url.Values) - if len(to.Integrations) != 0 { - p.Set("integrations", url.QueryEscape(strings.Join(to.Integrations, ","))) - } - if to.Instance != "" { - p.Set("instance", url.QueryEscape(to.Instance)) - } - return p -} - -// ScrapeConfigs returns a set of scrape configs to use for self-scraping. -// sdConfig should contain the full URL where the integrations SD API is -// exposed. ScrapeConfigs will inject unique query parameters per integration -// to limit what will be discovered. -func (c *controller) ScrapeConfigs(prefix string, sdConfig *http_sd.SDConfig) []*autoscrape.ScrapeConfig { - // Grab the integrations as fast as possible. We don't want to spend too much - // time holding the mutex. - type prefixedMetricsIntegration struct { - id integrationID - i MetricsIntegration - prefix string - } - var mm []prefixedMetricsIntegration - - err := c.forEachIntegration(prefix, func(ci *controlledIntegration, iprefix string) { - if mi, ok := ci.i.(MetricsIntegration); ok { - mm = append(mm, prefixedMetricsIntegration{id: ci.id, i: mi, prefix: iprefix}) - } - }) - if err != nil { - level.Warn(c.logger).Log("msg", "error when iterating over integrations to get scrape configs", "err", err) - } - - var cfgs []*autoscrape.ScrapeConfig - for _, mi := range mm { - // sdConfig will be pointing to the targets API. By default, this returns absolutely everything. - // We want to use the query parameters to inform the API to only return - // specific targets. - opts := TargetOptions{ - Integrations: []string{mi.id.Name}, - Instance: mi.id.Identifier, - } - - integrationSDConfig := *sdConfig - integrationSDConfig.URL = sdConfig.URL + "?" + opts.ToParams().Encode() - sds := discovery.Configs{&integrationSDConfig} - cfgs = append(cfgs, mi.i.ScrapeConfigs(sds)...) - } - sort.Slice(cfgs, func(i, j int) bool { - return cfgs[i].Config.JobName < cfgs[j].Config.JobName - }) - return cfgs -} diff --git a/internal/static/integrations/v2/controller_httpintegration_test.go b/internal/static/integrations/v2/controller_httpintegration_test.go deleted file mode 100644 index ee817a1c53..0000000000 --- a/internal/static/integrations/v2/controller_httpintegration_test.go +++ /dev/null @@ -1,259 +0,0 @@ -package integrations - -import ( - "fmt" - "io" - "net/http" - "net/http/httptest" - "strings" - "testing" - - "github.com/go-kit/log" - "github.com/gorilla/mux" - "github.com/grafana/agent/internal/util" - "github.com/stretchr/testify/require" -) - -// -// Tests for controller's utilization of the HTTPIntegration interface. -// - -func Test_controller_Handler_Sync(t *testing.T) { - httpConfigFromID := func(t *testing.T, name, identifier string) Config { - t.Helper() - - cfg := mockConfigNameTuple(t, name, identifier) - cfg.NewIntegrationFunc = func(log.Logger, Globals) (Integration, error) { - i := mockHTTPIntegration{ - Integration: NoOpIntegration, - HandlerFunc: func(prefix string) (http.Handler, error) { - return http.HandlerFunc(func(rw http.ResponseWriter, _ *http.Request) { - // We should never reach here since we don't run the integrations. - rw.WriteHeader(http.StatusBadRequest) - }), nil - }, - } - return i, nil - } - - return cfg - } - - cfg := controllerConfig{httpConfigFromID(t, "foo", "bar")} - ctrl, err := newController(util.TestLogger(t), cfg, Globals{}) - require.NoError(t, err) - - handler, err := ctrl.Handler("/integrations/") - require.NoError(t, err) - - srv := httptest.NewServer(handler) - - resp, err := srv.Client().Get(srv.URL + "/integrations/foo/bar") - require.NoError(t, err) - require.Equal(t, http.StatusServiceUnavailable, resp.StatusCode) -} - -// Test_controller_HTTPIntegration_Prefixes ensures that the controller will assign -// appropriate prefixes to HTTPIntegrations. -func Test_controller_HTTPIntegration_Prefixes(t *testing.T) { - httpConfigFromID := func(t *testing.T, prefixes *[]string, name, identifier string) Config { - t.Helper() - - cfg := mockConfigNameTuple(t, name, identifier) - cfg.NewIntegrationFunc = func(log.Logger, Globals) (Integration, error) { - i := mockHTTPIntegration{ - Integration: NoOpIntegration, - HandlerFunc: func(prefix string) (http.Handler, error) { - *prefixes = append(*prefixes, prefix) - return http.NotFoundHandler(), nil - }, - } - return i, nil - } - - return cfg - } - - t.Run("fully unique", func(t *testing.T) { - var prefixes []string - - ctrl, err := newController( - util.TestLogger(t), - controllerConfig{ - httpConfigFromID(t, &prefixes, "foo", "bar"), - httpConfigFromID(t, &prefixes, "fizz", "buzz"), - httpConfigFromID(t, &prefixes, "hello", "world"), - }, - Globals{}, - ) - require.NoError(t, err) - _ = newSyncController(t, ctrl) - - _, err = ctrl.Handler("/integrations/") - require.NoError(t, err) - - expect := []string{ - "/integrations/foo/", - "/integrations/fizz/", - "/integrations/hello/", - } - require.ElementsMatch(t, prefixes, expect) - }) - - t.Run("multiple instances", func(t *testing.T) { - var prefixes []string - - ctrl, err := newController( - util.TestLogger(t), - controllerConfig{ - httpConfigFromID(t, &prefixes, "foo", "bar"), - httpConfigFromID(t, &prefixes, "foo", "buzz"), - httpConfigFromID(t, &prefixes, "hello", "world"), - }, - Globals{}, - ) - require.NoError(t, err) - _ = newSyncController(t, ctrl) - - _, err = ctrl.Handler("/integrations/") - require.NoError(t, err) - - expect := []string{ - "/integrations/foo/bar/", - "/integrations/foo/buzz/", - "/integrations/hello/", - } - require.ElementsMatch(t, prefixes, expect) - }) -} - -// Test_controller_HTTPIntegration_Routing ensures that the controller will route -// requests to the appropriate integration. -func Test_controller_HTTPIntegration_Routing(t *testing.T) { - httpConfigFromID := func(t *testing.T, name, identifier string) Config { - t.Helper() - - cfg := mockConfigNameTuple(t, name, identifier) - cfg.NewIntegrationFunc = func(log.Logger, Globals) (Integration, error) { - i := mockHTTPIntegration{ - Integration: NoOpIntegration, - HandlerFunc: func(prefix string) (http.Handler, error) { - return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { - fmt.Fprintf(rw, "prefix=%s, path=%s", prefix, r.URL.Path) - }), nil - }, - } - return i, nil - } - - return cfg - } - - ctrl, err := newController( - util.TestLogger(t), - controllerConfig{ - httpConfigFromID(t, "foo", "bar"), - httpConfigFromID(t, "foo", "buzz"), - httpConfigFromID(t, "hello", "world"), - }, - Globals{}, - ) - require.NoError(t, err) - _ = newSyncController(t, ctrl) - - handler, err := ctrl.Handler("/integrations/") - require.NoError(t, err) - - srv := httptest.NewServer(handler) - - getResponse := func(t *testing.T, path string) string { - t.Helper() - resp, err := srv.Client().Get(srv.URL + path) - require.NoError(t, err) - defer resp.Body.Close() - - var sb strings.Builder - _, err = io.Copy(&sb, resp.Body) - require.NoError(t, err) - return sb.String() - } - - tt := []struct { - path, expect string - }{ - {"/integrations/foo/bar", "prefix=/integrations/foo/bar/, path=/integrations/foo/bar"}, - {"/integrations/foo/bar/", "prefix=/integrations/foo/bar/, path=/integrations/foo/bar/"}, - {"/integrations/foo/bar/extra", "prefix=/integrations/foo/bar/, path=/integrations/foo/bar/extra"}, - } - - for _, tc := range tt { - require.Equal(t, tc.expect, getResponse(t, tc.path)) - } -} - -// Test_controller_HTTPIntegration_NestedRouting ensures that the controller -// will work with nested routers. -func Test_controller_HTTPIntegration_NestedRouting(t *testing.T) { - cfg := mockConfigNameTuple(t, "test", "test") - cfg.NewIntegrationFunc = func(log.Logger, Globals) (Integration, error) { - i := mockHTTPIntegration{ - Integration: NoOpIntegration, - HandlerFunc: func(prefix string) (http.Handler, error) { - r := mux.NewRouter() - r.StrictSlash(true) - - r.HandleFunc(prefix, func(rw http.ResponseWriter, r *http.Request) { - fmt.Fprintf(rw, "prefix=%s, path=%s", prefix, r.URL.Path) - }) - - r.HandleFunc(prefix+"greet", func(rw http.ResponseWriter, _ *http.Request) { - fmt.Fprintf(rw, "Hello, world!") - }) - return r, nil - }, - } - return i, nil - } - - ctrl, err := newController(util.TestLogger(t), controllerConfig{cfg}, Globals{}) - require.NoError(t, err) - _ = newSyncController(t, ctrl) - - handler, err := ctrl.Handler("/integrations/") - require.NoError(t, err) - - srv := httptest.NewServer(handler) - - getResponse := func(t *testing.T, path string) string { - t.Helper() - resp, err := srv.Client().Get(srv.URL + path) - require.NoError(t, err) - defer resp.Body.Close() - - var sb strings.Builder - _, err = io.Copy(&sb, resp.Body) - require.NoError(t, err) - return sb.String() - } - - tt := []struct { - path, expect string - }{ - {"/integrations/test", "prefix=/integrations/test/, path=/integrations/test/"}, - {"/integrations/test/", "prefix=/integrations/test/, path=/integrations/test/"}, - {"/integrations/test/greet", "Hello, world!"}, - } - - for _, tc := range tt { - require.Equal(t, tc.expect, getResponse(t, tc.path)) - } -} - -type mockHTTPIntegration struct { - Integration - HandlerFunc func(prefix string) (http.Handler, error) -} - -func (m mockHTTPIntegration) Handler(prefix string) (http.Handler, error) { - return m.HandlerFunc(prefix) -} diff --git a/internal/static/integrations/v2/controller_metricsintegration_test.go b/internal/static/integrations/v2/controller_metricsintegration_test.go deleted file mode 100644 index 67222e61fe..0000000000 --- a/internal/static/integrations/v2/controller_metricsintegration_test.go +++ /dev/null @@ -1,184 +0,0 @@ -package integrations - -import ( - "context" - nethttp "net/http" - "testing" - - "github.com/go-kit/log" - "github.com/grafana/agent/internal/static/integrations/v2/autoscrape" - "github.com/grafana/agent/internal/util" - "github.com/prometheus/common/model" - prom_config "github.com/prometheus/prometheus/config" - "github.com/prometheus/prometheus/discovery" - "github.com/prometheus/prometheus/discovery/http" - "github.com/prometheus/prometheus/discovery/targetgroup" - "github.com/stretchr/testify/require" -) - -// -// Tests for controller's utilization of the MetricsIntegration interface. -// - -func Test_controller_MetricsIntegration_Targets(t *testing.T) { - integrationWithTarget := func(targetName string) Integration { - return mockMetricsIntegration{ - HTTPIntegration: newWaitStartedIntegration(), - TargetsFunc: func(Endpoint) []*targetgroup.Group { - return []*targetgroup.Group{{ - Targets: []model.LabelSet{{model.AddressLabel: model.LabelValue(targetName)}}, - }} - }, - ScrapeConfigsFunc: func(c discovery.Configs) []*autoscrape.ScrapeConfig { return nil }, - } - } - - integrations := []Config{ - mockConfigNameTuple(t, "a", "instanceA").WithNewIntegrationFunc(func(l log.Logger, g Globals) (Integration, error) { - return integrationWithTarget("a"), nil - }), - mockConfigNameTuple(t, "b", "instanceB").WithNewIntegrationFunc(func(l log.Logger, g Globals) (Integration, error) { - return integrationWithTarget("b"), nil - }), - } - - // waitIntegrations starts a controller and waits for all of its integrations - // to run. - waitIntegrations := func(t *testing.T, ctrl *controller) { - t.Helper() - _ = newSyncController(t, ctrl) - err := ctrl.forEachIntegration("/", func(ci *controlledIntegration, _ string) { - wsi := ci.i.(mockMetricsIntegration).HTTPIntegration.(*waitStartedIntegration) - _ = wsi.trigger.WaitContext(context.Background()) - }) - require.NoError(t, err) - } - - t.Run("All", func(t *testing.T) { - ctrl, err := newController( - util.TestLogger(t), - controllerConfig(integrations), - Globals{}, - ) - require.NoError(t, err) - waitIntegrations(t, ctrl) - - result := ctrl.Targets(Endpoint{Prefix: "/"}, TargetOptions{}) - expect := []*targetGroup{ - {Targets: []model.LabelSet{{model.AddressLabel: "a"}}}, - {Targets: []model.LabelSet{{model.AddressLabel: "b"}}}, - } - require.Equal(t, expect, result) - }) - - t.Run("All by Integration", func(t *testing.T) { - ctrl, err := newController( - util.TestLogger(t), - controllerConfig(integrations), - Globals{}, - ) - require.NoError(t, err) - waitIntegrations(t, ctrl) - - result := ctrl.Targets(Endpoint{Prefix: "/"}, TargetOptions{ - Integrations: []string{"a", "b"}, - }) - expect := []*targetGroup{ - {Targets: []model.LabelSet{{model.AddressLabel: "a"}}}, - {Targets: []model.LabelSet{{model.AddressLabel: "b"}}}, - } - require.Equal(t, expect, result) - }) - - t.Run("Specific Integration", func(t *testing.T) { - ctrl, err := newController( - util.TestLogger(t), - controllerConfig(integrations), - Globals{}, - ) - require.NoError(t, err) - waitIntegrations(t, ctrl) - - result := ctrl.Targets(Endpoint{Prefix: "/"}, TargetOptions{ - Integrations: []string{"a"}, - }) - expect := []*targetGroup{ - {Targets: []model.LabelSet{{model.AddressLabel: "a"}}}, - } - require.Equal(t, expect, result) - }) -} - -func Test_controller_MetricsIntegration_ScrapeConfig(t *testing.T) { - integrationWithTarget := func(targetName string) Integration { - return mockMetricsIntegration{ - HTTPIntegration: NoOpIntegration, - ScrapeConfigsFunc: func(c discovery.Configs) []*autoscrape.ScrapeConfig { - return []*autoscrape.ScrapeConfig{{ - Instance: "default", - Config: prom_config.ScrapeConfig{JobName: targetName}, - }} - }, - } - } - - integrations := []Config{ - mockConfigNameTuple(t, "a", "instanceA").WithNewIntegrationFunc(func(l log.Logger, g Globals) (Integration, error) { - return integrationWithTarget("a"), nil - }), - mockConfigNameTuple(t, "b", "instanceB").WithNewIntegrationFunc(func(l log.Logger, g Globals) (Integration, error) { - return integrationWithTarget("b"), nil - }), - } - - ctrl, err := newController( - util.TestLogger(t), - controllerConfig(integrations), - Globals{}, - ) - require.NoError(t, err) - _ = newSyncController(t, ctrl) - - result := ctrl.ScrapeConfigs("/", &http.DefaultSDConfig) - expect := []*autoscrape.ScrapeConfig{ - {Instance: "default", Config: prom_config.ScrapeConfig{JobName: "a"}}, - {Instance: "default", Config: prom_config.ScrapeConfig{JobName: "b"}}, - } - require.Equal(t, expect, result) -} - -// -// Tests for controller's utilization of the MetricsIntegration interface. -// - -type waitStartedIntegration struct { - trigger *util.WaitTrigger -} - -func newWaitStartedIntegration() *waitStartedIntegration { - return &waitStartedIntegration{trigger: util.NewWaitTrigger()} -} - -func (i *waitStartedIntegration) RunIntegration(ctx context.Context) error { - i.trigger.Trigger() - <-ctx.Done() - return nil -} - -func (i *waitStartedIntegration) Handler(prefix string) (nethttp.Handler, error) { - return nil, nil -} - -type mockMetricsIntegration struct { - HTTPIntegration - TargetsFunc func(ep Endpoint) []*targetgroup.Group - ScrapeConfigsFunc func(discovery.Configs) []*autoscrape.ScrapeConfig -} - -func (m mockMetricsIntegration) Targets(ep Endpoint) []*targetgroup.Group { - return m.TargetsFunc(ep) -} - -func (m mockMetricsIntegration) ScrapeConfigs(cfgs discovery.Configs) []*autoscrape.ScrapeConfig { - return m.ScrapeConfigsFunc(cfgs) -} diff --git a/internal/static/integrations/v2/controller_test.go b/internal/static/integrations/v2/controller_test.go deleted file mode 100644 index dcae71c56f..0000000000 --- a/internal/static/integrations/v2/controller_test.go +++ /dev/null @@ -1,286 +0,0 @@ -package integrations - -import ( - "context" - "strings" - "sync" - "testing" - - "github.com/go-kit/log" - "github.com/grafana/agent/internal/util" - "github.com/stretchr/testify/require" - "go.uber.org/atomic" -) - -// -// Tests for Controller's utilization of the core Integration interface. -// - -// Test_controller_UniqueIdentifier ensures that integrations must not share a (name, id) tuple. -func Test_controller_UniqueIdentifier(t *testing.T) { - controllerFromConfigs := func(t *testing.T, cc []Config) (*controller, error) { - t.Helper() - return newController(util.TestLogger(t), controllerConfig(cc), Globals{}) - } - - t.Run("different name, identifier", func(t *testing.T) { - _, err := controllerFromConfigs(t, []Config{ - mockConfigNameTuple(t, "foo", "bar"), - mockConfigNameTuple(t, "fizz", "buzz"), - }) - require.NoError(t, err) - }) - - t.Run("same name, different identifier", func(t *testing.T) { - _, err := controllerFromConfigs(t, []Config{ - mockConfigNameTuple(t, "foo", "bar"), - mockConfigNameTuple(t, "foo", "buzz"), - }) - require.NoError(t, err) - }) - - t.Run("same name, same identifier", func(t *testing.T) { - _, err := controllerFromConfigs(t, []Config{ - mockConfigNameTuple(t, "foo", "bar"), - mockConfigNameTuple(t, "foo", "bar"), - }) - require.Error(t, err, `multiple instance names "bar" in integration "foo"`) - }) -} - -// Test_controller_RunsIntegration ensures that integrations -// run. -func Test_controller_RunsIntegration(t *testing.T) { - var wg sync.WaitGroup - wg.Add(1) - - ctx, cancel := context.WithCancel(context.Background()) - - ctrl, err := newController( - util.TestLogger(t), - controllerConfig{ - mockConfigForIntegration(t, FuncIntegration(func(ctx context.Context) error { - defer wg.Done() - cancel() - <-ctx.Done() - return nil - })), - }, - Globals{}, - ) - require.NoError(t, err, "failed to create controller") - - // Run the controller. The controller should immediately run our fake integration - // which will cancel ctx and cause ctrl to exit. - ctrl.run(ctx) - - // Make sure that our integration exited too. - wg.Wait() -} - -// Test_controller_ConfigChanges ensures that integrations only get restarted -// when configs are no longer equal. -func Test_controller_ConfigChanges(t *testing.T) { - tc := func(t *testing.T, changed bool) (timesRan uint64) { - t.Helper() - - var integrationsWg sync.WaitGroup - var starts atomic.Uint64 - - mockIntegration := FuncIntegration(func(ctx context.Context) error { - integrationsWg.Done() - starts.Inc() - <-ctx.Done() - return nil - }) - - cfg := controllerConfig{ - mockConfig{ - NameFunc: func() string { return mockIntegrationName }, - ConfigEqualsFunc: func(Config) bool { return !changed }, - ApplyDefaultsFunc: func(g Globals) error { return nil }, - IdentifierFunc: func(Globals) (string, error) { - return mockIntegrationName, nil - }, - NewIntegrationFunc: func(log.Logger, Globals) (Integration, error) { - integrationsWg.Add(1) - return mockIntegration, nil - }, - }, - } - - globals := Globals{} - ctrl, err := newController(util.TestLogger(t), cfg, globals) - require.NoError(t, err, "failed to create controller") - - sc := newSyncController(t, ctrl) - require.NoError(t, sc.UpdateController(cfg, globals), "failed to re-apply config") - - // Wait for our integrations to have been started - integrationsWg.Wait() - - sc.Stop() - return starts.Load() - } - - t.Run("Unchanged", func(t *testing.T) { - starts := tc(t, false) - require.Equal(t, uint64(1), starts, "integration should only have started exactly once") - }) - - t.Run("Changed", func(t *testing.T) { - starts := tc(t, true) - require.Equal(t, uint64(2), starts, "integration should have started exactly twice") - }) -} - -func Test_controller_SingletonCheck(t *testing.T) { - var integrationsWg sync.WaitGroup - var starts atomic.Uint64 - - mockIntegration := FuncIntegration(func(ctx context.Context) error { - integrationsWg.Done() - starts.Inc() - <-ctx.Done() - return nil - }) - c1 := mockConfig{ - NameFunc: func() string { return mockIntegrationName }, - ConfigEqualsFunc: func(Config) bool { return true }, - ApplyDefaultsFunc: func(g Globals) error { return nil }, - IdentifierFunc: func(Globals) (string, error) { - return mockIntegrationName, nil - }, - NewIntegrationFunc: func(log.Logger, Globals) (Integration, error) { - integrationsWg.Add(1) - return mockIntegration, nil - }, - } - configMap := make(map[Config]Type) - configMap[&c1] = TypeSingleton - setRegistered(t, configMap) - cfg := controllerConfig{ - c1, - c1, - } - - globals := Globals{} - _, err := newController(util.TestLogger(t), cfg, globals) - require.Error(t, err) - require.True(t, strings.Contains(err.Error(), `integration "mock" may only be defined once`)) -} - -type syncController struct { - inner *controller - pool *workerPool -} - -// newSyncController pairs an unstarted controller with a manually managed -// worker pool to synchronously apply integrations. -func newSyncController(t *testing.T, inner *controller) *syncController { - t.Helper() - - sc := &syncController{ - inner: inner, - pool: newWorkerPool(context.Background(), inner.logger), - } - - // There's always immediately one queued integration set from any - // successfully created controller. - sc.refresh() - return sc -} - -func (sc *syncController) refresh() { - sc.inner.mut.Lock() - defer sc.inner.mut.Unlock() - - newIntegrations := <-sc.inner.runIntegrations - sc.pool.Reload(newIntegrations) - sc.inner.integrations = newIntegrations -} - -func (sc *syncController) UpdateController(c controllerConfig, g Globals) error { - err := sc.inner.UpdateController(c, g) - if err != nil { - return err - } - sc.refresh() - return nil -} - -func (sc *syncController) Stop() { - sc.pool.Close() -} - -const mockIntegrationName = "mock" - -type mockConfig struct { - NameFunc func() string - ApplyDefaultsFunc func(Globals) error - ConfigEqualsFunc func(Config) bool - IdentifierFunc func(Globals) (string, error) - NewIntegrationFunc func(log.Logger, Globals) (Integration, error) -} - -func (mc mockConfig) Name() string { - return mc.NameFunc() -} - -func (mc mockConfig) ConfigEquals(c Config) bool { - if mc.ConfigEqualsFunc != nil { - return mc.ConfigEqualsFunc(c) - } - return false -} - -func (mc mockConfig) ApplyDefaults(g Globals) error { - return mc.ApplyDefaultsFunc(g) -} - -func (mc mockConfig) Identifier(g Globals) (string, error) { - return mc.IdentifierFunc(g) -} - -func (mc mockConfig) NewIntegration(l log.Logger, g Globals) (Integration, error) { - return mc.NewIntegrationFunc(l, g) -} - -func (mc mockConfig) WithNewIntegrationFunc(f func(log.Logger, Globals) (Integration, error)) mockConfig { - return mockConfig{ - NameFunc: mc.NameFunc, - ApplyDefaultsFunc: mc.ApplyDefaultsFunc, - ConfigEqualsFunc: mc.ConfigEqualsFunc, - IdentifierFunc: mc.IdentifierFunc, - NewIntegrationFunc: f, - } -} - -func mockConfigNameTuple(t *testing.T, name, id string) mockConfig { - t.Helper() - - return mockConfig{ - NameFunc: func() string { return name }, - IdentifierFunc: func(_ Globals) (string, error) { return id, nil }, - ApplyDefaultsFunc: func(g Globals) error { return nil }, - NewIntegrationFunc: func(log.Logger, Globals) (Integration, error) { - return NoOpIntegration, nil - }, - } -} - -// mockConfigForIntegration returns a Config that will always return i. -func mockConfigForIntegration(t *testing.T, i Integration) mockConfig { - t.Helper() - - return mockConfig{ - NameFunc: func() string { return mockIntegrationName }, - ApplyDefaultsFunc: func(g Globals) error { return nil }, - IdentifierFunc: func(Globals) (string, error) { - return mockIntegrationName, nil - }, - NewIntegrationFunc: func(log.Logger, Globals) (Integration, error) { - return i, nil - }, - } -} diff --git a/internal/static/integrations/v2/controller_updateintegration_test.go b/internal/static/integrations/v2/controller_updateintegration_test.go deleted file mode 100644 index 4f0940ed6d..0000000000 --- a/internal/static/integrations/v2/controller_updateintegration_test.go +++ /dev/null @@ -1,79 +0,0 @@ -package integrations - -import ( - "context" - "sync" - "testing" - - "github.com/go-kit/log" - "github.com/grafana/agent/internal/util" - "github.com/stretchr/testify/require" - "go.uber.org/atomic" -) - -// -// Tests for controller's utilization of the UpdateIntegration interface. -// - -// Test_controller_UpdateIntegration ensures that the controller will call -// UpdateIntegration for integrations that support it. -func Test_controller_UpdateIntegration(t *testing.T) { - var ( - integrationStartWg sync.WaitGroup - applies, starts atomic.Uint64 - ) - - mockIntegration := mockUpdateIntegration{ - Integration: FuncIntegration(func(ctx context.Context) error { - starts.Inc() - integrationStartWg.Done() - <-ctx.Done() - return nil - }), - ApplyConfigFunc: func(Config, Globals) error { - applies.Inc() - return nil - }, - } - - cfg := controllerConfig{ - mockConfig{ - NameFunc: func() string { return mockIntegrationName }, - ConfigEqualsFunc: func(Config) bool { return false }, - ApplyDefaultsFunc: func(g Globals) error { return nil }, - IdentifierFunc: func(Globals) (string, error) { - return mockIntegrationName, nil - }, - NewIntegrationFunc: func(log.Logger, Globals) (Integration, error) { - integrationStartWg.Add(1) - return mockIntegration, nil - }, - }, - } - - ctrl, err := newController(util.TestLogger(t), cfg, Globals{}) - require.NoError(t, err, "failed to create controller") - - sc := newSyncController(t, ctrl) - - // Wait for our integration to start. - integrationStartWg.Wait() - - // Try to apply again. - require.NoError(t, sc.UpdateController(cfg, ctrl.globals), "failed to re-apply config") - integrationStartWg.Wait() - - sc.Stop() - - require.Equal(t, uint64(1), applies.Load(), "dynamic reload should have occurred") - require.Equal(t, uint64(1), starts.Load(), "restart should not have occurred") -} - -type mockUpdateIntegration struct { - Integration - ApplyConfigFunc func(Config, Globals) error -} - -func (m mockUpdateIntegration) ApplyConfig(c Config, g Globals) error { - return m.ApplyConfigFunc(c, g) -} diff --git a/internal/static/integrations/v2/eventhandler/eventhandler.go b/internal/static/integrations/v2/eventhandler/eventhandler.go deleted file mode 100644 index efb94e206d..0000000000 --- a/internal/static/integrations/v2/eventhandler/eventhandler.go +++ /dev/null @@ -1,472 +0,0 @@ -// Package eventhandler watches for Kubernetes Event objects and hands them off to -// Agent's Logs subsystem (embedded promtail) -package eventhandler - -import ( - "context" - "encoding/json" - "fmt" - "os" - "path/filepath" - "strings" - "sync" - "time" - - v1 "k8s.io/api/core/v1" - "k8s.io/client-go/informers" - "k8s.io/client-go/kubernetes" - "k8s.io/client-go/rest" - "k8s.io/client-go/tools/cache" - "k8s.io/client-go/tools/clientcmd" - "k8s.io/client-go/util/homedir" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/grafana/agent/internal/static/integrations/v2" - "github.com/grafana/agent/internal/static/logs" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/pkg/logproto" - "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/model/labels" -) - -const ( - cacheFileMode = 0600 - logFormatJson = "json" - logFormatFmt = "logfmt" -) - -// EventHandler watches for Kubernetes Event objects and hands them off to -// Agent's logs subsystem (embedded promtail). -type EventHandler struct { - LogsClient *logs.Logs - LogsInstance string - Log log.Logger - CachePath string - LastEvent *ShippedEvents - InitEvent *ShippedEvents - EventInformer cache.SharedIndexInformer - SendTimeout time.Duration - ticker *time.Ticker - instance string - extraLabels labels.Labels - logFormat string - sync.Mutex -} - -// ShippedEvents stores a timestamp and map of event ResourceVersions shipped for that timestamp. -// Used to avoid double-shipping events upon restart. -type ShippedEvents struct { - // shipped event's timestamp - Timestamp time.Time `json:"ts"` - // map of event RVs (resource versions) already "shipped" (handed off) for this timestamp. - // this is to handle the case of a timestamp having multiple events, - // which happens quite frequently. - RvMap map[string]struct{} `json:"resourceVersion"` -} - -func newEventHandler(l log.Logger, globals integrations.Globals, c *Config) (integrations.Integration, error) { - var ( - config *rest.Config - err error - factory informers.SharedInformerFactory - id string - ) - - // Try using KubeconfigPath or inClusterConfig - config, err = clientcmd.BuildConfigFromFlags("", c.KubeconfigPath) - if err != nil { - level.Error(l).Log("msg", "Loading from KubeconfigPath or inClusterConfig failed", "err", err) - // Trying default home location - if home := homedir.HomeDir(); home != "" { - kubeconfigPath := filepath.Join(home, ".kube", "config") - config, err = clientcmd.BuildConfigFromFlags("", kubeconfigPath) - if err != nil { - level.Error(l).Log("msg", "Could not load a kubeconfig", "err", err) - return nil, err - } - } else { - err = fmt.Errorf("could not load a kubeconfig") - return nil, err - } - } - - clientset, err := kubernetes.NewForConfig(config) - if err != nil { - return nil, err - } - - // get an informer - if c.Namespace == "" { - factory = informers.NewSharedInformerFactory(clientset, time.Duration(c.InformerResync)*time.Second) - } else { - factory = informers.NewSharedInformerFactoryWithOptions(clientset, time.Duration(c.InformerResync)*time.Second, informers.WithNamespace(c.Namespace)) - } - - eventInformer := factory.Core().V1().Events().Informer() - id, _ = c.Identifier(globals) - - eh := &EventHandler{ - LogsClient: globals.Logs, - LogsInstance: c.LogsInstance, - Log: l, - CachePath: c.CachePath, - EventInformer: eventInformer, - SendTimeout: time.Duration(c.SendTimeout) * time.Second, - instance: id, - extraLabels: c.ExtraLabels, - logFormat: c.LogFormat, - } - // set the resource handler fns - if err := eh.initInformer(eventInformer); err != nil { - return nil, err - } - eh.ticker = time.NewTicker(time.Duration(c.FlushInterval) * time.Second) - return eh, nil -} - -// Initialize informer by setting event handler fns -func (eh *EventHandler) initInformer(eventsInformer cache.SharedIndexInformer) error { - _, err := eventsInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ - AddFunc: eh.addEvent, - UpdateFunc: eh.updateEvent, - DeleteFunc: eh.deleteEvent, - }) - return err -} - -// Handles new event objects -func (eh *EventHandler) addEvent(obj interface{}) { - event, _ := obj.(*v1.Event) - - err := eh.handleEvent(event) - if err != nil { - level.Error(eh.Log).Log("msg", "Error handling event", "err", err, "event", event) - } -} - -// Handles event object updates. Note that this get triggered on informer resyncs and also -// events occurring more than once (in which case .count is incremented) -func (eh *EventHandler) updateEvent(objOld interface{}, objNew interface{}) { - eOld, _ := objOld.(*v1.Event) - eNew, _ := objNew.(*v1.Event) - - if eOld.GetResourceVersion() == eNew.GetResourceVersion() { - // ignore resync updates - level.Debug(eh.Log).Log("msg", "Event RV didn't change, ignoring", "eRV", eNew.ResourceVersion) - return - } - - err := eh.handleEvent(eNew) - if err != nil { - level.Error(eh.Log).Log("msg", "Error handling event", "err", err, "event", eNew) - } -} - -func (eh *EventHandler) handleEvent(event *v1.Event) error { - eventTs := getTimestamp(event) - - // if event is older than the one stored in cache on startup, we've shipped it - if eventTs.Before(eh.InitEvent.Timestamp) { - return nil - } - // if event is equal and is in map, we've shipped it - if eventTs.Equal(eh.InitEvent.Timestamp) { - if _, ok := eh.InitEvent.RvMap[event.ResourceVersion]; ok { - return nil - } - } - - labels, msg, err := eh.extractEvent(event) - if err != nil { - return err - } - - entry := newEntry(msg, eventTs, labels) - ok := eh.LogsClient.Instance(eh.LogsInstance).SendEntry(entry, eh.SendTimeout) - if !ok { - err = fmt.Errorf("msg=%s entry=%s", "error handing entry off to promtail", entry) - return err - } - - // update cache with new "last" event - err = eh.updateLastEvent(event, eventTs) - if err != nil { - return err - } - return nil -} - -// Called when event objects are removed from etcd, can safely ignore this -func (eh *EventHandler) deleteEvent(obj interface{}) { -} - -// extract data from event fields and create labels, etc. -// TODO: ship JSON blobs and allow users to configure using pipelines etc. -// instead of hardcoding labels here -func (eh *EventHandler) extractEvent(event *v1.Event) (model.LabelSet, string, error) { - var ( - msg strings.Builder - fields = make(map[string]any) - labels = make(model.LabelSet) - appender = appendTextMsg - ) - - if eh.logFormat == logFormatJson { - appender = appendJsonMsg - } - - obj := event.InvolvedObject - if obj.Name == "" { - return nil, "", fmt.Errorf("no involved object for event") - } - appender(&msg, fields, "name", obj.Name, "%s") - - labels[model.LabelName("namespace")] = model.LabelValue(obj.Namespace) - // TODO(hjet) omit "kubernetes" - labels[model.LabelName("job")] = model.LabelValue("integrations/kubernetes/eventhandler") - labels[model.LabelName("instance")] = model.LabelValue(eh.instance) - labels[model.LabelName("agent_hostname")] = model.LabelValue(eh.instance) - for _, lbl := range eh.extraLabels { - labels[model.LabelName(lbl.Name)] = model.LabelValue(lbl.Value) - } - - // we add these fields to the log line to reduce label bloat and cardinality - if obj.Kind != "" { - appender(&msg, fields, "kind", obj.Kind, "%s") - } - if event.Action != "" { - appender(&msg, fields, "action", event.Action, "%s") - } - if obj.APIVersion != "" { - appender(&msg, fields, "objectAPIversion", obj.APIVersion, "%s") - } - if obj.ResourceVersion != "" { - appender(&msg, fields, "objectRV", obj.ResourceVersion, "%s") - } - if event.ResourceVersion != "" { - appender(&msg, fields, "eventRV", event.ResourceVersion, "%s") - } - if event.ReportingInstance != "" { - appender(&msg, fields, "reportinginstance", event.ReportingInstance, "%s") - } - if event.ReportingController != "" { - appender(&msg, fields, "reportingcontroller", event.ReportingController, "%s") - } - if event.Source.Component != "" { - appender(&msg, fields, "sourcecomponent", event.Source.Component, "%s") - } - if event.Source.Host != "" { - appender(&msg, fields, "sourcehost", event.Source.Host, "%s") - } - if event.Reason != "" { - appender(&msg, fields, "reason", event.Reason, "%s") - } - if event.Type != "" { - appender(&msg, fields, "type", event.Type, "%s") - } - if event.Count != 0 { - appender(&msg, fields, "count", event.Count, "%d") - } - - appender(&msg, fields, "msg", event.Message, "%q") - - if eh.logFormat == logFormatJson { - bb, err := json.Marshal(fields) - if err != nil { - return nil, "", fmt.Errorf("failed to marshal Event to JSON: %w", err) - } - msg.WriteString(string(bb)) - } - - return labels, strings.TrimSpace(msg.String()), nil -} - -// Appends the "fields" map with an entry for the provided event field -// Signatures of "appendJsonMsg" and "appendTextMsg" must match -func appendJsonMsg(msg *strings.Builder, fields map[string]any, key string, value any, format string) { - fields[key] = value -} - -// Appends the message builder with the provided event field -// Signatures of "appendJsonMsg" and "appendTextMsg" must match -func appendTextMsg(msg *strings.Builder, fields map[string]any, key string, value any, format string) { - msg.WriteString(key) - msg.WriteByte('=') - msg.WriteString(fmt.Sprintf(format, value)) - msg.WriteByte(' ') -} - -func getTimestamp(event *v1.Event) time.Time { - if !event.LastTimestamp.IsZero() { - return event.LastTimestamp.Time - } - return event.EventTime.Time -} - -func newEntry(msg string, ts time.Time, labels model.LabelSet) api.Entry { - entry := logproto.Entry{Timestamp: ts, Line: msg} - return api.Entry{Labels: labels, Entry: entry} -} - -// maintain "last event" state -func (eh *EventHandler) updateLastEvent(e *v1.Event, eventTs time.Time) error { - eh.Lock() - defer eh.Unlock() - - eventRv := e.ResourceVersion - - if eh.LastEvent == nil { - // startup - eh.LastEvent = &ShippedEvents{Timestamp: eventTs, RvMap: make(map[string]struct{})} - eh.LastEvent.RvMap[eventRv] = struct{}{} - return nil - } - - // if timestamp is the same, add to map - if eh.LastEvent != nil && eventTs.Equal(eh.LastEvent.Timestamp) { - eh.LastEvent.RvMap[eventRv] = struct{}{} - return nil - } - - // if timestamp is different, create a new ShippedEvents struct - eh.LastEvent = &ShippedEvents{Timestamp: eventTs, RvMap: make(map[string]struct{})} - eh.LastEvent.RvMap[eventRv] = struct{}{} - return nil -} - -func (eh *EventHandler) writeOutLastEvent() error { - level.Info(eh.Log).Log("msg", "Flushing last event to disk") - - eh.Lock() - defer eh.Unlock() - - if eh.LastEvent == nil { - level.Info(eh.Log).Log("msg", "No last event to flush, returning") - return nil - } - - temp := eh.CachePath + "-new" - buf, err := json.Marshal(&eh.LastEvent) - if err != nil { - return err - } - - err = os.WriteFile(temp, buf, os.FileMode(cacheFileMode)) - if err != nil { - return err - } - - if err = os.Rename(temp, eh.CachePath); err != nil { - return err - } - level.Info(eh.Log).Log("msg", "Flushed last event to disk") - return nil -} - -// RunIntegration runs the eventhandler integration -func (eh *EventHandler) RunIntegration(ctx context.Context) error { - var wg sync.WaitGroup - - ctx, cancel := context.WithCancel(ctx) - defer cancel() - - // Quick check to make sure logs instance exists - if i := eh.LogsClient.Instance(eh.LogsInstance); i == nil { - level.Error(eh.Log).Log("msg", "Logs instance not configured", "instance", eh.LogsInstance) - cancel() - } - - cacheDir := filepath.Dir(eh.CachePath) - if err := os.MkdirAll(cacheDir, 0755); err != nil { - level.Error(eh.Log).Log("msg", "Failed to create cache dir", "err", err) - cancel() - } - - // cache file to store events shipped (prevents double shipping on restart) - cacheFile, err := os.OpenFile(eh.CachePath, os.O_RDWR|os.O_CREATE, cacheFileMode) - if err != nil { - level.Error(eh.Log).Log("msg", "Failed to open or create cache file", "err", err) - cancel() - } - - // attempt to read last timestamp from cache file into a ShippedEvents struct - initEvent, err := readInitEvent(cacheFile, eh.Log) - if err != nil { - level.Error(eh.Log).Log("msg", "Failed to read last event from cache file", "err", err) - cancel() - } - eh.InitEvent = initEvent - - if err = cacheFile.Close(); err != nil { - level.Error(eh.Log).Log("msg", "Failed to close cache file", "err", err) - cancel() - } - - go func() { - level.Info(eh.Log).Log("msg", "Waiting for cache to sync (initial List of events)") - isSynced := cache.WaitForCacheSync(ctx.Done(), eh.EventInformer.HasSynced) - if !isSynced { - level.Error(eh.Log).Log("msg", "Failed to sync informer cache") - // maybe want to bail here - return - } - level.Info(eh.Log).Log("msg", "Informer cache synced") - }() - - // start the informer - // technically we should prob use the factory here, but since we - // only have one informer atm, this likely doesn't matter - go eh.EventInformer.Run(ctx.Done()) - - // wait for last event to flush before returning - wg.Add(1) - go func() { - defer wg.Done() - eh.runTicker(ctx.Done()) - }() - wg.Wait() - - return nil -} - -// write out last event every FlushInterval -func (eh *EventHandler) runTicker(stopCh <-chan struct{}) { - for { - select { - case <-stopCh: - if err := eh.writeOutLastEvent(); err != nil { - level.Error(eh.Log).Log("msg", "Failed to flush last event", "err", err) - } - return - case <-eh.ticker.C: - if err := eh.writeOutLastEvent(); err != nil { - level.Error(eh.Log).Log("msg", "Failed to flush last event", "err", err) - } - } - } -} - -func readInitEvent(file *os.File, logger log.Logger) (*ShippedEvents, error) { - var ( - initEvent = new(ShippedEvents) - ) - - stat, err := file.Stat() - if err != nil { - return nil, err - } - if stat.Size() == 0 { - level.Info(logger).Log("msg", "Cache file empty, setting zero-valued initEvent") - return initEvent, nil - } - - dec := json.NewDecoder(file) - err = dec.Decode(&initEvent) - if err != nil { - err = fmt.Errorf("could not read init event from cache: %s. Please delete the cache file", err) - return nil, err - } - level.Info(logger).Log("msg", "Loaded init event from cache file", "initEventTime", initEvent.Timestamp) - return initEvent, nil -} diff --git a/internal/static/integrations/v2/eventhandler/eventhandler_test.go b/internal/static/integrations/v2/eventhandler/eventhandler_test.go deleted file mode 100644 index 9d6e08a32a..0000000000 --- a/internal/static/integrations/v2/eventhandler/eventhandler_test.go +++ /dev/null @@ -1,54 +0,0 @@ -package eventhandler - -import ( - "os" - "testing" - "time" - - v1 "k8s.io/api/core/v1" - - "github.com/go-kit/log" - "github.com/stretchr/testify/require" -) - -func TestCacheLoad(t *testing.T) { - l := log.NewNopLogger() - testTime, _ := time.Parse(time.RFC3339, "2022-01-26T13:39:40-05:00") - expectedEvents := &ShippedEvents{ - Timestamp: testTime, - RvMap: map[string]struct{}{"58588": {}}, - } - cacheFile, err := os.OpenFile("testdata/eventhandler.cache", os.O_RDWR|os.O_CREATE, cacheFileMode) - require.NoError(t, err, "Failed to open test eventhandler cache file") - actualEvents, err := readInitEvent(cacheFile, l) - require.NoError(t, err, "Failed to parse last event from eventhandler cache file") - require.Equal(t, expectedEvents, actualEvents) -} - -func TestExtractEventJson(t *testing.T) { - var eh = new(EventHandler) - eh.logFormat = logFormatJson - var event = new(v1.Event) - event.InvolvedObject = v1.ObjectReference{ - Name: "test-object", - } - event.Message = "Event Message" - - _, msg, err := eh.extractEvent(event) - require.NoError(t, err, "Failed to extract test event") - require.Equal(t, "{\"msg\":\"Event Message\",\"name\":\"test-object\"}", msg) -} - -func TestExtractEventText(t *testing.T) { - var eh = new(EventHandler) - eh.logFormat = "logfmt" - var event = new(v1.Event) - event.InvolvedObject = v1.ObjectReference{ - Name: "test-object", - } - event.Message = "Event Message" - - _, msg, err := eh.extractEvent(event) - require.NoError(t, err, "Failed to extract test event") - require.Equal(t, "name=test-object msg=\"Event Message\"", msg) -} diff --git a/internal/static/integrations/v2/eventhandler/integration.go b/internal/static/integrations/v2/eventhandler/integration.go index caba0084e2..4453aeefce 100644 --- a/internal/static/integrations/v2/eventhandler/integration.go +++ b/internal/static/integrations/v2/eventhandler/integration.go @@ -1,6 +1,8 @@ package eventhandler import ( + "context" + "github.com/go-kit/log" "github.com/grafana/agent/internal/static/integrations/v2" "github.com/prometheus/prometheus/model/labels" @@ -13,7 +15,7 @@ var DefaultConfig = Config{ LogsInstance: "default", InformerResync: 120, FlushInterval: 10, - LogFormat: logFormatFmt, + LogFormat: "logfmt", } // Config configures the eventhandler integration @@ -71,9 +73,20 @@ func (c *Config) Identifier(globals integrations.Globals) (string, error) { // NewIntegration converts this config into an instance of an integration. func (c *Config) NewIntegration(l log.Logger, globals integrations.Globals) (integrations.Integration, error) { - return newEventHandler(l, globals, c) + // NOTE(rfratto): the eventhandler integration is never run, and all the + // logic has been moved to the loki.source.kubernetes_events component. + // + // This function is never called, but still exists for config conversion. + return stubIntegration{}, nil } func init() { integrations.Register(&Config{}, integrations.TypeSingleton) } + +type stubIntegration struct{} + +func (stubIntegration) RunIntegration(ctx context.Context) error { + <-ctx.Done() + return nil +} diff --git a/internal/static/integrations/v2/eventhandler/testdata/eventhandler.cache b/internal/static/integrations/v2/eventhandler/testdata/eventhandler.cache deleted file mode 100644 index dc5814de08..0000000000 --- a/internal/static/integrations/v2/eventhandler/testdata/eventhandler.cache +++ /dev/null @@ -1 +0,0 @@ -{"ts":"2022-01-26T13:39:40-05:00","resourceVersion":{"58588":{}}} \ No newline at end of file diff --git a/internal/static/integrations/v2/integrations.go b/internal/static/integrations/v2/integrations.go index 1896280e3f..3c5ba8d3ab 100644 --- a/internal/static/integrations/v2/integrations.go +++ b/internal/static/integrations/v2/integrations.go @@ -26,10 +26,7 @@ import ( "github.com/go-kit/log" "github.com/grafana/agent/internal/static/integrations/v2/autoscrape" - "github.com/grafana/agent/internal/static/logs" - "github.com/grafana/agent/internal/static/metrics" "github.com/grafana/agent/internal/static/server" - "github.com/grafana/agent/internal/static/traces" "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/targetgroup" ) @@ -84,14 +81,6 @@ type Globals struct { // TODO(rfratto): flag to override identifier at agent level? AgentIdentifier string - // Some integrations may wish to interact with various subsystems for their - // implementation if the desired behavior is not supported natively by the - // integration manager. - - Metrics *metrics.Agent // Metrics subsystem - Logs *logs.Logs // Logs subsystem - Tracing *traces.Traces // Traces subsystem - // Options the integrations subsystem is using. SubsystemOpts SubsystemOptions // BaseURL to use to invoke methods against the embedded HTTP server. diff --git a/internal/static/integrations/v2/subsystem.go b/internal/static/integrations/v2/subsystem.go index ca128a1926..ce501b37c6 100644 --- a/internal/static/integrations/v2/subsystem.go +++ b/internal/static/integrations/v2/subsystem.go @@ -1,19 +1,8 @@ package integrations import ( - "context" - "encoding/json" - "fmt" - "net/http" - "sync" - "time" - - "github.com/go-kit/log" - "github.com/gorilla/mux" "github.com/grafana/agent/internal/static/integrations/v2/autoscrape" "github.com/grafana/agent/internal/static/metrics" - "github.com/prometheus/common/model" - http_sd "github.com/prometheus/prometheus/discovery/http" ) const ( @@ -76,172 +65,3 @@ func (o *SubsystemOptions) UnmarshalYAML(unmarshal func(interface{}) error) erro *o = DefaultSubsystemOptions return UnmarshalYAML(o, unmarshal) } - -// Subsystem runs the integrations subsystem, managing a set of integrations. -type Subsystem struct { - logger log.Logger - - mut sync.RWMutex - globals Globals - apiHandler http.Handler // generated from controller - autoscraper *autoscrape.Scraper - - ctrl *controller - stopController context.CancelFunc - controllerExited chan struct{} -} - -// NewSubsystem creates and starts a new integrations Subsystem. Every field in -// IntegrationOptions must be filled out. -func NewSubsystem(l log.Logger, globals Globals) (*Subsystem, error) { - autoscraper := autoscrape.NewScraper(l, globals.Metrics.InstanceManager(), globals.DialContextFunc) - - l = log.With(l, "component", "integrations") - - ctrl, err := newController(l, controllerConfig(globals.SubsystemOpts.Configs), globals) - if err != nil { - autoscraper.Stop() - return nil, err - } - - ctx, cancel := context.WithCancel(context.Background()) - - ctrlExited := make(chan struct{}) - go func() { - ctrl.run(ctx) - close(ctrlExited) - }() - - s := &Subsystem{ - logger: l, - - globals: globals, - autoscraper: autoscraper, - - ctrl: ctrl, - stopController: cancel, - controllerExited: ctrlExited, - } - if err := s.ApplyConfig(globals); err != nil { - cancel() - autoscraper.Stop() - return nil, err - } - return s, nil -} - -// ApplyConfig updates the configuration of the integrations subsystem. -func (s *Subsystem) ApplyConfig(globals Globals) error { - const prefix = "/integrations/" - - s.mut.Lock() - defer s.mut.Unlock() - - if err := s.ctrl.UpdateController(controllerConfig(globals.SubsystemOpts.Configs), globals); err != nil { - return fmt.Errorf("error applying integrations: %w", err) - } - - var firstErr error - saveFirstErr := func(err error) { - if firstErr == nil { - firstErr = err - } - } - - // Set up HTTP wiring - { - handler, err := s.ctrl.Handler(prefix) - if err != nil { - saveFirstErr(fmt.Errorf("HTTP handler update failed: %w", err)) - } - s.apiHandler = handler - } - - // Set up self-scraping - { - httpSDConfig := http_sd.DefaultSDConfig - httpSDConfig.RefreshInterval = model.Duration(time.Second * 5) // TODO(rfratto): make configurable? - - apiURL := globals.CloneAgentBaseURL() - apiURL.Path = IntegrationsSDEndpoint - httpSDConfig.URL = apiURL.String() - - scrapeConfigs := s.ctrl.ScrapeConfigs(prefix, &httpSDConfig) - if err := s.autoscraper.ApplyConfig(scrapeConfigs); err != nil { - saveFirstErr(fmt.Errorf("configuring autoscraper failed: %w", err)) - } - } - - s.globals = globals - return firstErr -} - -// WireAPI hooks up integration endpoints to r. -func (s *Subsystem) WireAPI(r *mux.Router) { - const prefix = "/integrations" - r.PathPrefix(prefix).HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { - s.mut.RLock() - handler := s.apiHandler - s.mut.RUnlock() - - if handler == nil { - rw.WriteHeader(http.StatusServiceUnavailable) - fmt.Fprintf(rw, "Integrations HTTP endpoints not yet available") - return - } - handler.ServeHTTP(rw, r) - }) - - r.HandleFunc(IntegrationsSDEndpoint, func(rw http.ResponseWriter, r *http.Request) { - targetOptions, err := TargetOptionsFromParams(r.URL.Query()) - if err != nil { - http.Error(rw, fmt.Sprintf("invalid query parameters: %s", err), http.StatusBadRequest) - return - } - - rw.Header().Set("Content-Type", "application/json") - rw.WriteHeader(http.StatusOK) - - tgs := s.ctrl.Targets(Endpoint{ - Host: r.Host, - Prefix: prefix, - }, targetOptions) - - // Normalize targets. We may have targets in the group with non-address - // labels. These need to be retained, so we'll just split everything up - // into multiple groups. - // - // TODO(rfratto): optimize to remove redundant groups - finalTgs := []*targetGroup{} - for _, group := range tgs { - for _, target := range group.Targets { - // Create the final labels for the group. This will be everything from - // the group and the target (except for model.AddressLabel). Labels - // from target take precedence labels from over group. - groupLabels := group.Labels.Merge(target) - delete(groupLabels, model.AddressLabel) - - finalTgs = append(finalTgs, &targetGroup{ - Targets: []model.LabelSet{{model.AddressLabel: target[model.AddressLabel]}}, - Labels: groupLabels, - }) - } - } - - enc := json.NewEncoder(rw) - _ = enc.Encode(finalTgs) - }) - - r.HandleFunc(IntegrationsAutoscrapeTargetsEndpoint, func(rw http.ResponseWriter, r *http.Request) { - allTargets := s.autoscraper.TargetsActive() - metrics.ListTargetsHandler(allTargets).ServeHTTP(rw, r) - }) -} - -// Stop stops the manager and all running integrations. Blocks until all -// running integrations exit. -func (s *Subsystem) Stop() { - s.autoscraper.Stop() - s.stopController() - <-s.controllerExited -} diff --git a/internal/static/integrations/v2/targetgroup.go b/internal/static/integrations/v2/targetgroup.go deleted file mode 100644 index 4400105c4f..0000000000 --- a/internal/static/integrations/v2/targetgroup.go +++ /dev/null @@ -1,28 +0,0 @@ -package integrations - -import ( - "encoding/json" - - "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/discovery/targetgroup" -) - -// targetGroup implements json.Marshaler for targetgroup.Group. This is -// required do to an issue with Prometheus: HTTP SD expects to be unmarshaled -// as JSON, but the form it expects to unmarshal the target groups in is not the form -// it marshals out to JSON as. -type targetGroup targetgroup.Group - -func (tg *targetGroup) MarshalJSON() ([]byte, error) { - g := &struct { - Targets []string `json:"targets"` - Labels model.LabelSet `json:"labels,omitempty"` - }{ - Targets: make([]string, 0, len(tg.Targets)), - Labels: tg.Labels, - } - for _, t := range tg.Targets { - g.Targets = append(g.Targets, string(t[model.AddressLabel])) - } - return json.Marshal(g) -} diff --git a/internal/static/integrations/v2/workers.go b/internal/static/integrations/v2/workers.go deleted file mode 100644 index 4315710c49..0000000000 --- a/internal/static/integrations/v2/workers.go +++ /dev/null @@ -1,122 +0,0 @@ -package integrations - -import ( - "context" - "sync" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" -) - -type workerPool struct { - log log.Logger - parentCtx context.Context - - mut sync.Mutex - workers map[*controlledIntegration]worker - - runningWorkers sync.WaitGroup -} - -type worker struct { - ci *controlledIntegration - stop context.CancelFunc - exited chan struct{} -} - -func newWorkerPool(ctx context.Context, l log.Logger) *workerPool { - return &workerPool{ - log: l, - parentCtx: ctx, - - workers: make(map[*controlledIntegration]worker), - } -} - -func (p *workerPool) Reload(newIntegrations []*controlledIntegration) { - p.mut.Lock() - defer p.mut.Unlock() - - level.Debug(p.log).Log("msg", "updating running integrations", "prev_count", len(p.workers), "new_count", len(newIntegrations)) - - // Shut down workers whose integrations have gone away. - var stopped []worker - for ci, w := range p.workers { - var found bool - for _, current := range newIntegrations { - if ci == current { - found = true - break - } - } - if !found { - w.stop() - stopped = append(stopped, w) - } - } - for _, w := range stopped { - // Wait for stopped integrations to fully exit. We do this in a separate - // loop so context cancellations can be handled simultaneously, allowing - // the wait to complete faster. - <-w.exited - } - - // Spawn new workers for integrations that don't have them. - for _, current := range newIntegrations { - if _, workerExists := p.workers[current]; workerExists { - continue - } - // This integration doesn't have an existing worker; schedule a new one. - p.scheduleWorker(current) - } -} - -func (p *workerPool) Close() { - p.mut.Lock() - defer p.mut.Unlock() - - level.Debug(p.log).Log("msg", "stopping all integrations") - - defer p.runningWorkers.Wait() - for _, w := range p.workers { - w.stop() - } -} - -func (p *workerPool) scheduleWorker(ci *controlledIntegration) { - p.runningWorkers.Add(1) - - ctx, cancel := context.WithCancel(p.parentCtx) - - w := worker{ - ci: ci, - stop: cancel, - exited: make(chan struct{}), - } - p.workers[ci] = w - - go func() { - ci.running.Store(true) - - // When the integration stops running, we want to free any of our - // resources that will notify watchers waiting for the worker to stop. - // - // Afterwards, we'll block until we remove ourselves from the map; having - // a worker remove itself on shutdown allows exited integrations to - // re-start when the config is reloaded. - defer func() { - ci.running.Store(false) - close(w.exited) - p.runningWorkers.Done() - - p.mut.Lock() - defer p.mut.Unlock() - delete(p.workers, ci) - }() - - err := ci.i.RunIntegration(ctx) - if err != nil { - level.Error(p.log).Log("msg", "integration exited with error", "id", ci.id, "err", err) - } - }() -} diff --git a/internal/static/logs/http.go b/internal/static/logs/http.go deleted file mode 100644 index b3e7a00d88..0000000000 --- a/internal/static/logs/http.go +++ /dev/null @@ -1,84 +0,0 @@ -package logs - -import ( - "net/http" - "sort" - - "github.com/go-kit/log/level" - "github.com/gorilla/mux" - "github.com/grafana/agent/internal/static/metrics/cluster/configapi" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" - "github.com/prometheus/common/model" -) - -// WireAPI adds API routes to the provided mux router. -func (l *Logs) WireAPI(r *mux.Router) { - r.HandleFunc("/agent/api/v1/logs/instances", l.ListInstancesHandler).Methods("GET") - r.HandleFunc("/agent/api/v1/logs/targets", l.ListTargetsHandler).Methods("GET") -} - -// ListInstancesHandler writes the set of currently running instances to the http.ResponseWriter. -func (l *Logs) ListInstancesHandler(w http.ResponseWriter, _ *http.Request) { - instances := l.instances - instanceNames := make([]string, 0, len(instances)) - for instance := range instances { - instanceNames = append(instanceNames, instance) - } - sort.Strings(instanceNames) - - err := configapi.WriteResponse(w, http.StatusOK, instanceNames) - if err != nil { - level.Error(l.l).Log("msg", "failed to write response", "err", err) - } -} - -// ListTargetsHandler retrieves the full set of targets across all instances and shows -// information on them. -func (l *Logs) ListTargetsHandler(w http.ResponseWriter, r *http.Request) { - instances := l.instances - allTargets := make(map[string]TargetSet, len(instances)) - for instName, inst := range instances { - allTargets[instName] = inst.promtail.ActiveTargets() - } - listTargetsHandler(allTargets).ServeHTTP(w, r) -} - -func listTargetsHandler(targets map[string]TargetSet) http.Handler { - return http.HandlerFunc(func(rw http.ResponseWriter, _ *http.Request) { - resp := ListTargetsResponse{} - for instance, tset := range targets { - for key, targets := range tset { - for _, tgt := range targets { - resp = append(resp, TargetInfo{ - InstanceName: instance, - TargetGroup: key, - Type: tgt.Type(), - DiscoveredLabels: tgt.DiscoveredLabels(), - Labels: tgt.Labels(), - Ready: tgt.Ready(), - Details: tgt.Details(), - }) - } - } - } - _ = configapi.WriteResponse(rw, http.StatusOK, resp) - }) -} - -// TargetSet is a set of targets for an individual scraper. -type TargetSet map[string][]target.Target - -// ListTargetsResponse is returned by the ListTargetsHandler. -type ListTargetsResponse []TargetInfo - -// TargetInfo describes a specific target. -type TargetInfo struct { - InstanceName string `json:"instance"` - TargetGroup string `json:"target_group"` - - Type target.TargetType `json:"type"` - Labels model.LabelSet `json:"labels"` - DiscoveredLabels model.LabelSet `json:"discovered_labels"` - Ready bool `json:"ready"` - Details interface{} `json:"details"` -} diff --git a/internal/static/logs/http_test.go b/internal/static/logs/http_test.go deleted file mode 100644 index e37110f205..0000000000 --- a/internal/static/logs/http_test.go +++ /dev/null @@ -1,177 +0,0 @@ -package logs - -import ( - "net/http" - "net/http/httptest" - "strings" - "testing" - - "github.com/grafana/agent/internal/util" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/common/model" - "github.com/stretchr/testify/require" - "gopkg.in/yaml.v2" -) - -func TestAgent_ListInstancesHandler(t *testing.T) { - cfgText := util.Untab(` -configs: -- name: instance-a - positions: - filename: /tmp/positions.yaml - clients: - - url: http://127.0.0.1:80/loki/api/v1/push - `) - - var cfg Config - - logger := util.TestLogger(t) - l, err := New(prometheus.NewRegistry(), &cfg, logger, false) - require.NoError(t, err) - defer l.Stop() - - r := httptest.NewRequest("GET", "/agent/api/v1/logs/instances", nil) - - t.Run("no instances", func(t *testing.T) { - rr := httptest.NewRecorder() - l.ListInstancesHandler(rr, r) - expect := `{"status":"success","data":[]}` - require.Equal(t, expect, rr.Body.String()) - }) - - dec := yaml.NewDecoder(strings.NewReader(cfgText)) - dec.SetStrict(true) - require.NoError(t, dec.Decode(&cfg)) - t.Run("non-empty", func(t *testing.T) { - require.NoError(t, l.ApplyConfig(&cfg, false)) - - expect := `{"status":"success","data":["instance-a"]}` - - util.Eventually(t, func(t require.TestingT) { - rr := httptest.NewRecorder() - l.ListInstancesHandler(rr, r) - require.Equal(t, expect, rr.Body.String()) - }) - }) -} - -func TestAgent_ListTargetsHandler(t *testing.T) { - cfgText := util.Untab(` -configs: -- name: instance-a - positions: - filename: /tmp/positions.yaml - clients: - - url: http://127.0.0.1:80/loki/api/v1/push - `) - - var cfg Config - dec := yaml.NewDecoder(strings.NewReader(cfgText)) - dec.SetStrict(true) - require.NoError(t, dec.Decode(&cfg)) - - logger := util.TestLogger(t) - l, err := New(prometheus.NewRegistry(), &cfg, logger, false) - require.NoError(t, err) - defer l.Stop() - - r := httptest.NewRequest("GET", "/agent/api/v1/logs/targets", nil) - - t.Run("scrape manager not ready", func(t *testing.T) { - rr := httptest.NewRecorder() - l.ListTargetsHandler(rr, r) - expect := `{"status": "success", "data": []}` - require.JSONEq(t, expect, rr.Body.String()) - require.Equal(t, http.StatusOK, rr.Result().StatusCode) - }) - - t.Run("scrape manager targets", func(t *testing.T) { - rr := httptest.NewRecorder() - targets := map[string]TargetSet{ - "instance-a": mockActiveTargets(), - } - listTargetsHandler(targets).ServeHTTP(rr, r) - expect := `{ - "status": "success", - "data": [ - { - "instance": "instance-a", - "target_group": "varlogs", - "type": "File", - "labels": { - "job": "varlogs" - }, - "discovered_labels": { - "__address__": "localhost", - "__path__": "/var/log/*log", - "job": "varlogs" - }, - "ready": true, - "details": { - "/var/log/alternatives.log": 13386, - "/var/log/apport.log": 0, - "/var/log/auth.log": 37009, - "/var/log/bootstrap.log": 107347, - "/var/log/dpkg.log": 374420, - "/var/log/faillog": 0, - "/var/log/fontconfig.log": 11629, - "/var/log/gpu-manager.log": 1541, - "/var/log/kern.log": 782582, - "/var/log/lastlog": 0, - "/var/log/syslog": 788450 - } - } - ] - }` - require.JSONEq(t, expect, rr.Body.String()) - require.Equal(t, http.StatusOK, rr.Result().StatusCode) - }) -} - -func mockActiveTargets() map[string][]target.Target { - return map[string][]target.Target{ - "varlogs": {&mockTarget{}}, - } -} - -type mockTarget struct { -} - -func (mt *mockTarget) Type() target.TargetType { - return target.TargetType("File") -} - -func (mt *mockTarget) DiscoveredLabels() model.LabelSet { - return map[model.LabelName]model.LabelValue{ - "__address__": "localhost", - "__path__": "/var/log/*log", - "job": "varlogs", - } -} - -func (mt *mockTarget) Labels() model.LabelSet { - return map[model.LabelName]model.LabelValue{ - "job": "varlogs", - } -} - -func (mt *mockTarget) Ready() bool { - return true -} - -func (mt *mockTarget) Details() interface{} { - return map[string]int{ - "/var/log/alternatives.log": 13386, - "/var/log/apport.log": 0, - "/var/log/auth.log": 37009, - "/var/log/bootstrap.log": 107347, - "/var/log/dpkg.log": 374420, - "/var/log/faillog": 0, - "/var/log/fontconfig.log": 11629, - "/var/log/gpu-manager.log": 1541, - "/var/log/kern.log": 782582, - "/var/log/lastlog": 0, - "/var/log/syslog": 788450, - } -} diff --git a/internal/static/logs/logs.go b/internal/static/logs/logs.go index 2d6c478fe5..8dd2035341 100644 --- a/internal/static/logs/logs.go +++ b/internal/static/logs/logs.go @@ -2,142 +2,20 @@ package logs import ( - "fmt" - "os" - "path/filepath" - "sync" - "time" _ "time/tzdata" // embed timezone data - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/grafana/agent/internal/agentseed" "github.com/grafana/agent/internal/useragent" - "github.com/grafana/agent/internal/util" - "github.com/grafana/loki/clients/pkg/promtail" - "github.com/grafana/loki/clients/pkg/promtail/api" "github.com/grafana/loki/clients/pkg/promtail/client" "github.com/grafana/loki/clients/pkg/promtail/config" "github.com/grafana/loki/clients/pkg/promtail/server" - "github.com/grafana/loki/clients/pkg/promtail/targets/file" "github.com/grafana/loki/clients/pkg/promtail/wal" "github.com/grafana/loki/pkg/tracing" - "github.com/prometheus/client_golang/prometheus" ) func init() { client.UserAgent = useragent.Get() } -// Logs is a Logs log collection. It uses multiple distinct sets of Logs -// Promtail agents to collect logs and send them to a Logs server. -type Logs struct { - mut sync.Mutex - - reg prometheus.Registerer - l log.Logger - instances map[string]*Instance -} - -// New creates and starts Loki log collection. -func New(reg prometheus.Registerer, c *Config, l log.Logger, dryRun bool) (*Logs, error) { - logs := &Logs{ - instances: make(map[string]*Instance), - reg: reg, - l: log.With(l, "component", "logs"), - } - if err := logs.ApplyConfig(c, dryRun); err != nil { - return nil, err - } - return logs, nil -} - -// ApplyConfig updates Logs with a new Config. -func (l *Logs) ApplyConfig(c *Config, dryRun bool) error { - l.mut.Lock() - defer l.mut.Unlock() - - if c == nil { - c = &Config{} - } - - newInstances := make(map[string]*Instance, len(c.Configs)) - - for _, ic := range c.Configs { - // If an old instance existed, update it and move it to the new map. - if old, ok := l.instances[ic.Name]; ok { - err := old.ApplyConfig(ic, c.Global, dryRun) - if err != nil { - return err - } - - newInstances[ic.Name] = old - continue - } - - inst, err := NewInstance(l.reg, ic, c.Global, l.l, dryRun) - if err != nil { - return fmt.Errorf("unable to apply config for %s: %w", ic.Name, err) - } - newInstances[ic.Name] = inst - } - - // Any promtail in l.instances that isn't in newInstances has been removed - // from the config. Stop them before replacing the map. - for key, i := range l.instances { - if _, exist := newInstances[key]; exist { - continue - } - i.Stop() - } - l.instances = newInstances - - return nil -} - -// Stop stops the log collector. -func (l *Logs) Stop() { - l.mut.Lock() - defer l.mut.Unlock() - - for _, i := range l.instances { - i.Stop() - } -} - -// Instance is used to retrieve a named Logs instance -func (l *Logs) Instance(name string) *Instance { - l.mut.Lock() - defer l.mut.Unlock() - - return l.instances[name] -} - -// Instance is an individual Logs instance. -type Instance struct { - mut sync.Mutex - - cfg *InstanceConfig - log log.Logger - reg *util.Unregisterer - - promtail *promtail.Promtail -} - -// NewInstance creates and starts a Logs instance. -func NewInstance(reg prometheus.Registerer, c *InstanceConfig, g GlobalConfig, l log.Logger, dryRun bool) (*Instance, error) { - instReg := prometheus.WrapRegistererWith(prometheus.Labels{"logs_config": c.Name}, reg) - - inst := Instance{ - reg: util.WrapWithUnregisterer(instReg), - log: log.With(l, "logs_config", c.Name), - } - if err := inst.ApplyConfig(c, g, dryRun); err != nil { - return nil, err - } - return &inst, nil -} - // DefaultConfig returns a default config for a Logs instance. func DefaultConfig() config.Config { return config.Config{ @@ -146,103 +24,3 @@ func DefaultConfig() config.Config { WAL: wal.Config{Enabled: false}, } } - -// ApplyConfig will apply a new InstanceConfig. If the config hasn't changed, -// then nothing will happen, otherwise the old Promtail will be stopped and -// then replaced with a new one. -func (i *Instance) ApplyConfig(c *InstanceConfig, g GlobalConfig, dryRun bool) error { - i.mut.Lock() - defer i.mut.Unlock() - - // No-op if the configs haven't changed. - if util.CompareYAML(c, i.cfg) { - level.Debug(i.log).Log("msg", "instance config hasn't changed, not recreating Promtail") - return nil - } - i.cfg = c - - positionsDir := filepath.Dir(c.PositionsConfig.PositionsFile) - err := os.MkdirAll(positionsDir, 0775) - if err != nil { - level.Warn(i.log).Log("msg", "failed to create the positions directory. logs may be unable to save their position", "path", positionsDir, "err", err) - } - - if i.promtail != nil { - i.promtail.Shutdown() - i.promtail = nil - } - - // Unregister all existing metrics before trying to create a new instance. - if !i.reg.UnregisterAll() { - // If UnregisterAll fails, we need to abort, otherwise the new promtail - // would try to re-register an existing metric and might panic. - return fmt.Errorf("failed to unregister all metrics from previous promtail. THIS IS A BUG") - } - - if len(c.ClientConfigs) == 0 { - level.Debug(i.log).Log("msg", "skipping creation of a promtail because no client_configs are present") - return nil - } - - uid := agentseed.Get().UID - for i := range c.ClientConfigs { - // ClientConfigs is a slice of struct, so we set values with the index - if c.ClientConfigs[i].Headers == nil { - c.ClientConfigs[i].Headers = map[string]string{} - } - c.ClientConfigs[i].Headers[agentseed.HeaderName] = uid - } - - clientMetrics := client.NewMetrics(i.reg) - cfg := DefaultConfig() - cfg.Global = config.GlobalConfig{ - FileWatch: file.WatchConfig{ - MinPollFrequency: g.FileWatch.MinPollFrequency, - MaxPollFrequency: g.FileWatch.MaxPollFrequency, - }, - } - cfg.ClientConfigs = c.ClientConfigs - cfg.PositionsConfig = c.PositionsConfig - cfg.ScrapeConfig = c.ScrapeConfig - cfg.TargetConfig = c.TargetConfig - cfg.LimitsConfig = c.LimitsConfig - - p, err := promtail.New(cfg, nil, clientMetrics, dryRun, promtail.WithLogger(i.log), promtail.WithRegisterer(i.reg)) - if err != nil { - return fmt.Errorf("unable to create logs instance: %w", err) - } - - i.promtail = p - return nil -} - -// SendEntry passes an entry to the internal promtail client and returns true if successfully sent. It is -// best effort and not guaranteed to succeed. -func (i *Instance) SendEntry(entry api.Entry, dur time.Duration) bool { - i.mut.Lock() - defer i.mut.Unlock() - - // promtail is nil it has been stopped - if i.promtail != nil { - // send non blocking so we don't block the mutex. this is best effort - select { - case i.promtail.Client().Chan() <- entry: - return true - case <-time.After(dur): - } - } - - return false -} - -// Stop stops the Promtail instance. -func (i *Instance) Stop() { - i.mut.Lock() - defer i.mut.Unlock() - - if i.promtail != nil { - i.promtail.Shutdown() - i.promtail = nil - } - i.reg.UnregisterAll() -} diff --git a/internal/static/logs/logs_test.go b/internal/static/logs/logs_test.go deleted file mode 100644 index 255c99b55f..0000000000 --- a/internal/static/logs/logs_test.go +++ /dev/null @@ -1,206 +0,0 @@ -//go:build !race - -package logs - -import ( - "fmt" - "net" - "net/http" - "os" - "path/filepath" - "strings" - "testing" - "time" - - "github.com/grafana/loki/pkg/loghttp/push" - - "github.com/go-kit/log" - "github.com/grafana/agent/internal/util" - "github.com/grafana/loki/pkg/logproto" - "github.com/prometheus/client_golang/prometheus" - "github.com/stretchr/testify/require" - "gopkg.in/yaml.v2" -) - -func TestLogs_NilConfig(t *testing.T) { - l, err := New(prometheus.NewRegistry(), nil, util.TestLogger(t), false) - require.NoError(t, err) - require.NoError(t, l.ApplyConfig(nil, false)) - - defer l.Stop() -} - -func TestLogs(t *testing.T) { - // - // Create a temporary file to tail - // - positionsDir := t.TempDir() - - tmpFile, err := os.CreateTemp(os.TempDir(), "*.log") - require.NoError(t, err) - t.Cleanup(func() { - _ = os.RemoveAll(tmpFile.Name()) - }) - - // - // Listen for push requests and pass them through to a channel - // - pushes := make(chan *logproto.PushRequest) - - lis, err := net.Listen("tcp", "127.0.0.1:0") - require.NoError(t, err) - t.Cleanup(func() { - require.NoError(t, lis.Close()) - }) - go func() { - _ = http.Serve(lis, http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { - req, err := push.ParseRequest(log.NewNopLogger(), "user_id", r, nil, nil, push.ParseLokiRequest) - require.NoError(t, err) - - pushes <- req - _, _ = rw.Write(nil) - })) - }() - - // - // Launch Loki so it starts tailing the file and writes to our server. - // - cfgText := util.Untab(fmt.Sprintf(` -positions_directory: %s -configs: -- name: default - clients: - - url: http://%s/loki/api/v1/push - batchwait: 50ms - batchsize: 1 - scrape_configs: - - job_name: system - static_configs: - - targets: [localhost] - labels: - job: test - __path__: %s - `, positionsDir, lis.Addr().String(), tmpFile.Name())) - - var cfg Config - dec := yaml.NewDecoder(strings.NewReader(cfgText)) - dec.SetStrict(true) - require.NoError(t, dec.Decode(&cfg)) - require.NoError(t, cfg.ApplyDefaults()) - logger := log.NewSyncLogger(log.NewNopLogger()) - l, err := New(prometheus.NewRegistry(), &cfg, logger, false) - require.NoError(t, err) - defer l.Stop() - - // - // Write a log line and wait for it to come through. - // - fmt.Fprintf(tmpFile, "Hello, world!\n") - select { - case <-time.After(time.Second * 30): - require.FailNow(t, "timed out waiting for data to be pushed") - case req := <-pushes: - require.Equal(t, "Hello, world!", req.Streams[0].Entries[0].Line) - } - - // - // Apply a new config and write a new line. - // - cfgText = util.Untab(fmt.Sprintf(` -positions_directory: %s -configs: -- name: default - clients: - - url: http://%s/loki/api/v1/push - batchwait: 50ms - batchsize: 5 - scrape_configs: - - job_name: system - static_configs: - - targets: [localhost] - labels: - job: test-2 - __path__: %s - `, positionsDir, lis.Addr().String(), tmpFile.Name())) - - var newCfg Config - dec = yaml.NewDecoder(strings.NewReader(cfgText)) - dec.SetStrict(true) - require.NoError(t, dec.Decode(&newCfg)) - require.NoError(t, newCfg.ApplyDefaults()) - require.NoError(t, l.ApplyConfig(&newCfg, false)) - - fmt.Fprintf(tmpFile, "Hello again!\n") - select { - case <-time.After(time.Second * 30): - require.FailNow(t, "timed out waiting for data to be pushed") - case req := <-pushes: - require.Equal(t, "Hello again!", req.Streams[0].Entries[0].Line) - } - - t.Run("update to nil", func(t *testing.T) { - // Applying a nil config should remove all instances. - err := l.ApplyConfig(nil, false) - require.NoError(t, err) - require.Len(t, l.instances, 0) - }) - - t.Run("re-apply previous config", func(t *testing.T) { - // Applying a nil config should remove all instances. - l.ApplyConfig(nil, false) - - // Re-Apply the previous config and write a new line. - var newCfg Config - dec = yaml.NewDecoder(strings.NewReader(cfgText)) - dec.SetStrict(true) - require.NoError(t, dec.Decode(&newCfg)) - require.NoError(t, newCfg.ApplyDefaults()) - require.NoError(t, l.ApplyConfig(&newCfg, false)) - - fmt.Fprintf(tmpFile, "Hello again!\n") - select { - case <-time.After(time.Second * 30): - require.FailNow(t, "timed out waiting for data to be pushed") - case req := <-pushes: - require.Equal(t, "Hello again!", req.Streams[0].Entries[0].Line) - } - }) -} - -func TestLogs_PositionsDirectory(t *testing.T) { - // - // Create a temporary file to tail - // - positionsDir := t.TempDir() - - // - // Launch Loki so it starts tailing the file and writes to our server. - // - cfgText := util.Untab(fmt.Sprintf(` -positions_directory: %[1]s/positions -configs: -- name: instance-a - clients: - - url: http://127.0.0.1:80/loki/api/v1/push -- name: instance-b - positions: - filename: %[1]s/other-positions/instance.yml - clients: - - url: http://127.0.0.1:80/loki/api/v1/push - `, positionsDir)) - - var cfg Config - dec := yaml.NewDecoder(strings.NewReader(cfgText)) - dec.SetStrict(true) - require.NoError(t, dec.Decode(&cfg)) - require.NoError(t, cfg.ApplyDefaults()) - logger := util.TestLogger(t) - l, err := New(prometheus.NewRegistry(), &cfg, logger, false) - require.NoError(t, err) - defer l.Stop() - - _, err = os.Stat(filepath.Join(positionsDir, "positions")) - require.NoError(t, err, "default shared positions directory did not get created") - _, err = os.Stat(filepath.Join(positionsDir, "other-positions")) - require.NoError(t, err, "instance-specific positions directory did not get created") -} diff --git a/internal/static/metrics/agent.go b/internal/static/metrics/agent.go index 0c2c745300..6ef123c1ec 100644 --- a/internal/static/metrics/agent.go +++ b/internal/static/metrics/agent.go @@ -7,29 +7,21 @@ import ( "errors" "flag" "fmt" - "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/prometheus/client_golang/prometheus" - "go.uber.org/atomic" - "google.golang.org/grpc" - "github.com/grafana/agent/internal/static/metrics/cluster" "github.com/grafana/agent/internal/static/metrics/cluster/client" "github.com/grafana/agent/internal/static/metrics/instance" "github.com/grafana/agent/internal/util" - "github.com/prometheus/prometheus/discovery" ) // DefaultConfig is the default settings for the Prometheus-lite client. var DefaultConfig = Config{ Global: instance.DefaultGlobalConfig, - InstanceRestartBackoff: instance.DefaultBasicManagerConfig.InstanceRestartBackoff, + InstanceRestartBackoff: 5 * time.Second, WALDir: "data-agent/", - WALCleanupAge: DefaultCleanupAge, - WALCleanupPeriod: DefaultCleanupPeriod, + WALCleanupAge: 12 * time.Hour, + WALCleanupPeriod: 30 * time.Minute, ServiceConfig: cluster.DefaultConfig, ServiceClientConfig: client.DefaultConfig, InstanceMode: instance.DefaultMode, @@ -123,257 +115,3 @@ func (c *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { c.ServiceConfig.RegisterFlagsWithPrefix(prefix+"service.", f) c.ServiceClientConfig.RegisterFlagsWithPrefix(prefix, f) } - -// Agent is an agent for collecting Prometheus metrics. It acts as a -// Prometheus-lite; only running the service discovery, remote_write, and WAL -// components of Prometheus. It is broken down into a series of Instances, each -// of which perform metric collection. -type Agent struct { - mut sync.RWMutex - cfg Config - logger log.Logger - reg prometheus.Registerer - - // Store both the basic manager and the modal manager, so we can update their - // settings independently. Only the ModalManager should be used for mutating - // configs. - bm *instance.BasicManager - mm *instance.ModalManager - cleaner *WALCleaner - - instanceFactory instanceFactory - - cluster *cluster.Cluster - - stopped bool - stopOnce sync.Once - actor chan func() - - initialBootDone atomic.Bool -} - -// New creates and starts a new Agent. -func New(reg prometheus.Registerer, cfg Config, logger log.Logger) (*Agent, error) { - // This registers discovery metrics with the default registry which should be the reg specified above. - discovery.RegisterMetrics() - return newAgent(reg, cfg, logger, defaultInstanceFactory) -} - -func newAgent(reg prometheus.Registerer, cfg Config, logger log.Logger, fact instanceFactory) (*Agent, error) { - a := &Agent{ - logger: log.With(logger, "agent", "prometheus"), - instanceFactory: fact, - reg: reg, - actor: make(chan func(), 1), - } - - a.bm = instance.NewBasicManager(instance.BasicManagerConfig{ - InstanceRestartBackoff: cfg.InstanceRestartBackoff, - }, a.logger, a.newInstance) - - var err error - a.mm, err = instance.NewModalManager(a.reg, a.logger, a.bm, cfg.InstanceMode) - if err != nil { - return nil, fmt.Errorf("failed to create modal instance manager: %w", err) - } - - a.cluster, err = cluster.New(a.logger, reg, cfg.ServiceConfig, a.mm, a.Validate) - if err != nil { - return nil, err - } - - if err := a.ApplyConfig(cfg); err != nil { - return nil, err - } - go a.run() - return a, nil -} - -// newInstance creates a new Instance given a config. -func (a *Agent) newInstance(c instance.Config) (instance.ManagedInstance, error) { - a.mut.RLock() - defer a.mut.RUnlock() - - // Controls the label - instanceLabel := "instance_name" - if a.cfg.InstanceMode == instance.ModeShared { - instanceLabel = "instance_group_name" - } - - reg := prometheus.WrapRegistererWith(prometheus.Labels{ - instanceLabel: c.Name, - }, a.reg) - - return a.instanceFactory(reg, c, a.cfg.WALDir, a.logger) -} - -// Validate will validate the incoming Config and mutate it to apply defaults. -func (a *Agent) Validate(c *instance.Config) error { - a.mut.RLock() - defer a.mut.RUnlock() - - if a.cfg.WALDir == "" { - return fmt.Errorf("no wal_directory configured") - } - - if err := c.ApplyDefaults(a.cfg.Global); err != nil { - return fmt.Errorf("failed to apply defaults to %q: %w", c.Name, err) - } - return nil -} - -// ApplyConfig applies config changes to the Agent. -func (a *Agent) ApplyConfig(cfg Config) error { - a.mut.Lock() - defer a.mut.Unlock() - - if util.CompareYAML(a.cfg, cfg) { - return nil - } - - if a.stopped { - return fmt.Errorf("agent stopped") - } - - // The ordering here is done to minimze the number of instances that need to - // be restarted. We update components from lowest to highest level: - // - // 1. WAL Cleaner - // 2. Basic manager - // 3. Modal Manager - // 4. Cluster - // 5. Local configs - - if a.cleaner != nil { - a.cleaner.Stop() - a.cleaner = nil - } - if cfg.WALDir != "" { - a.cleaner = NewWALCleaner( - a.logger, - a.mm, - cfg.WALDir, - cfg.WALCleanupAge, - cfg.WALCleanupPeriod, - ) - } - - a.bm.UpdateManagerConfig(instance.BasicManagerConfig{ - InstanceRestartBackoff: cfg.InstanceRestartBackoff, - }) - - if err := a.mm.SetMode(cfg.InstanceMode); err != nil { - return err - } - - if err := a.cluster.ApplyConfig(cfg.ServiceConfig); err != nil { - return fmt.Errorf("failed to apply cluster config: %w", err) - } - - // Queue an actor in the background to sync the instances. This is required - // because creating both this function and newInstance grab the mutex. - oldConfig := a.cfg - - a.actor <- func() { - a.syncInstances(oldConfig, cfg) - a.initialBootDone.Store(true) - } - - a.cfg = cfg - return nil -} - -// syncInstances syncs the state of the instance manager to newConfig by -// applying all configs from newConfig and deleting any configs from oldConfig -// that are not in newConfig. -func (a *Agent) syncInstances(oldConfig, newConfig Config) { - // Apply the new configs - for _, c := range newConfig.Configs { - if err := a.mm.ApplyConfig(c); err != nil { - level.Error(a.logger).Log("msg", "failed to apply config", "name", c.Name, "err", err) - } - } - - // Remove any configs from oldConfig that aren't in newConfig. - for _, oc := range oldConfig.Configs { - foundConfig := false - for _, nc := range newConfig.Configs { - if nc.Name == oc.Name { - foundConfig = true - break - } - } - if foundConfig { - continue - } - - if err := a.mm.DeleteConfig(oc.Name); err != nil { - level.Error(a.logger).Log("msg", "failed to delete old config", "name", oc.Name, "err", err) - } - } -} - -// run calls received actor functions in the background. -func (a *Agent) run() { - for f := range a.actor { - f() - } -} - -// Ready returns true if both the agent and all instances -// spawned by a Manager have completed startup. -func (a *Agent) Ready() bool { - // Wait for the initial load to complete so the instance manager has at least - // the base set of expected instances. - if !a.initialBootDone.Load() { - return false - } - - for _, inst := range a.mm.ListInstances() { - if !inst.Ready() { - return false - } - } - - return true -} - -// WireGRPC wires gRPC services into the provided server. -func (a *Agent) WireGRPC(s *grpc.Server) { - a.cluster.WireGRPC(s) -} - -// Config returns the configuration of this Agent. -func (a *Agent) Config() Config { return a.cfg } - -// InstanceManager returns the instance manager used by this Agent. -func (a *Agent) InstanceManager() instance.Manager { return a.mm } - -// Stop stops the agent and all its instances. -func (a *Agent) Stop() { - a.mut.Lock() - defer a.mut.Unlock() - - // Close the actor channel to stop run. - a.stopOnce.Do(func() { - close(a.actor) - }) - - a.cluster.Stop() - - if a.cleaner != nil { - a.cleaner.Stop() - } - - // Only need to stop the ModalManager, which will pass through everything to the - // BasicManager. - a.mm.Stop() - - a.stopped = true -} - -type instanceFactory = func(reg prometheus.Registerer, cfg instance.Config, walDir string, logger log.Logger) (instance.ManagedInstance, error) - -func defaultInstanceFactory(reg prometheus.Registerer, cfg instance.Config, walDir string, logger log.Logger) (instance.ManagedInstance, error) { - return instance.New(reg, cfg, walDir, logger) -} diff --git a/internal/static/metrics/agent_test.go b/internal/static/metrics/agent_test.go index bd311a07cd..2d1d063b20 100644 --- a/internal/static/metrics/agent_test.go +++ b/internal/static/metrics/agent_test.go @@ -1,22 +1,11 @@ package metrics import ( - "context" "errors" - "fmt" - "net/http" - "sync" "testing" - "time" - "github.com/go-kit/log" "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/util" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/prometheus/scrape" - "github.com/prometheus/prometheus/storage" "github.com/stretchr/testify/require" - "go.uber.org/atomic" "gopkg.in/yaml.v2" ) @@ -113,221 +102,6 @@ configs: require.Greater(t, int64(scrapeConfig.ScrapeInterval), int64(0)) } -func TestAgent(t *testing.T) { - // Launch two instances - cfg := Config{ - WALDir: "/tmp/wal", - Configs: []instance.Config{ - makeInstanceConfig("instance_a"), - makeInstanceConfig("instance_b"), - }, - InstanceRestartBackoff: time.Duration(0), - InstanceMode: instance.ModeDistinct, - } - - fact := newFakeInstanceFactory() - - a, err := newAgent(prometheus.NewRegistry(), cfg, log.NewNopLogger(), fact.factory) - require.NoError(t, err) - - util.Eventually(t, func(t require.TestingT) { - require.NotNil(t, fact.created) - require.Equal(t, 2, int(fact.created.Load())) - require.Equal(t, 2, len(a.mm.ListInstances())) - }) - - t.Run("instances should be running", func(t *testing.T) { - for _, mi := range fact.Mocks() { - // Each instance should have wait called on it - util.Eventually(t, func(t require.TestingT) { - require.True(t, mi.running.Load()) - }) - } - }) - - t.Run("instances should be restarted when stopped", func(t *testing.T) { - for _, mi := range fact.Mocks() { - util.Eventually(t, func(t require.TestingT) { - require.Equal(t, 1, int(mi.startedCount.Load())) - }) - } - - for _, mi := range fact.Mocks() { - mi.err <- fmt.Errorf("really bad error") - } - - for _, mi := range fact.Mocks() { - util.Eventually(t, func(t require.TestingT) { - require.Equal(t, 2, int(mi.startedCount.Load())) - }) - } - }) -} - -func TestAgent_NormalInstanceExits(t *testing.T) { - tt := []struct { - name string - simulateError error - }{ - {"no error", nil}, - {"context cancelled", context.Canceled}, - } - - cfg := Config{ - WALDir: "/tmp/wal", - Configs: []instance.Config{ - makeInstanceConfig("instance_a"), - makeInstanceConfig("instance_b"), - }, - InstanceRestartBackoff: time.Duration(0), - InstanceMode: instance.ModeDistinct, - } - - for _, tc := range tt { - t.Run(tc.name, func(t *testing.T) { - fact := newFakeInstanceFactory() - - a, err := newAgent(prometheus.NewRegistry(), cfg, log.NewNopLogger(), fact.factory) - require.NoError(t, err) - - util.Eventually(t, func(t require.TestingT) { - require.NotNil(t, fact.created) - require.Equal(t, 2, int(fact.created.Load())) - require.Equal(t, 2, len(a.mm.ListInstances())) - }) - for _, mi := range fact.Mocks() { - mi.err <- tc.simulateError - } - - time.Sleep(time.Millisecond * 100) - - // Get the new total amount of instances starts; value should - // be unchanged. - var startedCount int64 - for _, i := range fact.Mocks() { - startedCount += i.startedCount.Load() - } - - // There should only be two instances that started. If there's more, something - // restarted despite our error. - require.Equal(t, int64(2), startedCount, "instances should not have restarted") - }) - } -} - -func TestAgent_Stop(t *testing.T) { - // Launch two instances - cfg := Config{ - WALDir: "/tmp/wal", - Configs: []instance.Config{ - makeInstanceConfig("instance_a"), - makeInstanceConfig("instance_b"), - }, - InstanceRestartBackoff: time.Duration(0), - InstanceMode: instance.ModeDistinct, - } - - fact := newFakeInstanceFactory() - - a, err := newAgent(prometheus.NewRegistry(), cfg, log.NewNopLogger(), fact.factory) - require.NoError(t, err) - - util.Eventually(t, func(t require.TestingT) { - require.NotNil(t, fact.created) - require.Equal(t, 2, int(fact.created.Load())) - require.Equal(t, 2, len(a.mm.ListInstances())) - }) - - a.Stop() - - time.Sleep(time.Millisecond * 100) - - for _, mi := range fact.Mocks() { - require.False(t, mi.running.Load(), "instance should not have been restarted") - } -} - -type fakeInstance struct { - cfg instance.Config - - err chan error - startedCount *atomic.Int64 - running *atomic.Bool -} - -func (i *fakeInstance) Run(ctx context.Context) error { - i.startedCount.Inc() - i.running.Store(true) - defer i.running.Store(false) - - select { - case <-ctx.Done(): - return ctx.Err() - case err := <-i.err: - return err - } -} - -func (i *fakeInstance) Ready() bool { - return true -} - -func (i *fakeInstance) Update(_ instance.Config) error { - return instance.ErrInvalidUpdate{ - Inner: fmt.Errorf("can't dynamically update fakeInstance"), - } -} - -func (i *fakeInstance) TargetsActive() map[string][]*scrape.Target { - return nil -} - -func (i *fakeInstance) StorageDirectory() string { - return "" -} - -func (i *fakeInstance) WriteHandler() http.Handler { - return nil -} - -func (i *fakeInstance) Appender(ctx context.Context) storage.Appender { - return nil -} - -type fakeInstanceFactory struct { - mut sync.Mutex - mocks []*fakeInstance - - created *atomic.Int64 -} - -func newFakeInstanceFactory() *fakeInstanceFactory { - return &fakeInstanceFactory{created: atomic.NewInt64(0)} -} - -func (f *fakeInstanceFactory) Mocks() []*fakeInstance { - f.mut.Lock() - defer f.mut.Unlock() - return f.mocks -} - -func (f *fakeInstanceFactory) factory(_ prometheus.Registerer, cfg instance.Config, _ string, _ log.Logger) (instance.ManagedInstance, error) { - f.created.Add(1) - - f.mut.Lock() - defer f.mut.Unlock() - - inst := &fakeInstance{ - cfg: cfg, - running: atomic.NewBool(false), - startedCount: atomic.NewInt64(0), - err: make(chan error), - } - - f.mocks = append(f.mocks, inst) - return inst, nil -} - func makeInstanceConfig(name string) instance.Config { cfg := instance.DefaultConfig cfg.Name = name diff --git a/internal/static/metrics/cleaner.go b/internal/static/metrics/cleaner.go deleted file mode 100644 index 0bf577a5b6..0000000000 --- a/internal/static/metrics/cleaner.go +++ /dev/null @@ -1,271 +0,0 @@ -package metrics - -import ( - "fmt" - "os" - "path/filepath" - "time" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/static/metrics/wal" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" - promwal "github.com/prometheus/prometheus/tsdb/wlog" -) - -// Default settings for the WAL cleaner. -const ( - DefaultCleanupAge = 12 * time.Hour - DefaultCleanupPeriod = 30 * time.Minute -) - -var ( - discoveryError = promauto.NewCounterVec( - prometheus.CounterOpts{ - Name: "agent_metrics_cleaner_storage_error_total", - Help: "Errors encountered discovering local storage paths", - }, - []string{"storage"}, - ) - - segmentError = promauto.NewCounterVec( - prometheus.CounterOpts{ - Name: "agent_metrics_cleaner_segment_error_total", - Help: "Errors encountered finding most recent WAL segments", - }, - []string{"storage"}, - ) - - managedStorage = promauto.NewGauge( - prometheus.GaugeOpts{ - Name: "agent_metrics_cleaner_managed_storage", - Help: "Number of storage directories associated with managed instances", - }, - ) - - abandonedStorage = promauto.NewGauge( - prometheus.GaugeOpts{ - Name: "agent_metrics_cleaner_abandoned_storage", - Help: "Number of storage directories not associated with any managed instance", - }, - ) - - cleanupRunsSuccess = promauto.NewCounter( - prometheus.CounterOpts{ - Name: "agent_metrics_cleaner_success_total", - Help: "Number of successfully removed abandoned WALs", - }, - ) - - cleanupRunsErrors = promauto.NewCounter( - prometheus.CounterOpts{ - Name: "agent_metrics_cleaner_errors_total", - Help: "Number of errors removing abandoned WALs", - }, - ) - - cleanupTimes = promauto.NewHistogram( - prometheus.HistogramOpts{ - Name: "agent_metrics_cleaner_cleanup_seconds", - Help: "Time spent performing each periodic WAL cleanup", - }, - ) -) - -// lastModifiedFunc gets the last modified time of the most recent segment of a WAL -type lastModifiedFunc func(path string) (time.Time, error) - -func lastModified(path string) (time.Time, error) { - existing, err := promwal.Open(nil, path) - if err != nil { - return time.Time{}, err - } - - // We don't care if there are errors closing the abandoned WAL - defer func() { _ = existing.Close() }() - - _, last, err := promwal.Segments(existing.Dir()) - if err != nil { - return time.Time{}, fmt.Errorf("unable to open WAL: %w", err) - } - - if last == -1 { - return time.Time{}, fmt.Errorf("unable to determine most recent segment for %s", path) - } - - // full path to the most recent segment in this WAL - lastSegment := promwal.SegmentName(path, last) - segmentFile, err := os.Stat(lastSegment) - if err != nil { - return time.Time{}, fmt.Errorf("unable to determine mtime for %s segment: %w", lastSegment, err) - } - - return segmentFile.ModTime(), nil -} - -// WALCleaner periodically checks for Write Ahead Logs (WALs) that are not associated -// with any active instance.ManagedInstance and have not been written to in some configured -// amount of time and deletes them. -type WALCleaner struct { - logger log.Logger - instanceManager instance.Manager - walDirectory string - walLastModified lastModifiedFunc - minAge time.Duration - period time.Duration - done chan bool -} - -// NewWALCleaner creates a new cleaner that looks for abandoned WALs in the given -// directory and removes them if they haven't been modified in over minAge. Starts -// a goroutine to periodically run the cleanup method in a loop -func NewWALCleaner(logger log.Logger, manager instance.Manager, walDirectory string, minAge time.Duration, period time.Duration) *WALCleaner { - c := &WALCleaner{ - logger: log.With(logger, "component", "cleaner"), - instanceManager: manager, - walDirectory: filepath.Clean(walDirectory), - walLastModified: lastModified, - minAge: DefaultCleanupAge, - period: DefaultCleanupPeriod, - done: make(chan bool), - } - - if minAge > 0 { - c.minAge = minAge - } - - // We allow a period of 0 here because '0' means "don't run the task". This - // is handled by not running a ticker at all in the run method. - if period >= 0 { - c.period = period - } - - go c.run() - return c -} - -// getManagedStorage gets storage directories used for each ManagedInstance -func (c *WALCleaner) getManagedStorage(instances map[string]instance.ManagedInstance) map[string]bool { - out := make(map[string]bool) - - for _, inst := range instances { - out[inst.StorageDirectory()] = true - } - - return out -} - -// getAllStorage gets all storage directories under walDirectory -func (c *WALCleaner) getAllStorage() []string { - var out []string - - _ = filepath.Walk(c.walDirectory, func(p string, info os.FileInfo, err error) error { - if os.IsNotExist(err) { - // The root WAL directory doesn't exist. Maybe this Agent isn't responsible for any - // instances yet. Log at debug since this isn't a big deal. We'll just try to crawl - // the direction again on the next periodic run. - level.Debug(c.logger).Log("msg", "WAL storage path does not exist", "path", p, "err", err) - } else if err != nil { - // Just log any errors traversing the WAL directory. This will potentially result - // in a WAL (that has incorrect permissions or some similar problem) not being cleaned - // up. This is better than preventing *all* other WALs from being cleaned up. - discoveryError.WithLabelValues(p).Inc() - level.Warn(c.logger).Log("msg", "unable to traverse WAL storage path", "path", p, "err", err) - } else if info.IsDir() && filepath.Dir(p) == c.walDirectory { - // Single level below the root are instance storage directories (including WALs) - out = append(out, p) - } - - return nil - }) - - return out -} - -// getAbandonedStorage gets the full path of storage directories that aren't associated with -// an active instance and haven't been written to within a configured duration (usually several -// hours or more). -func (c *WALCleaner) getAbandonedStorage(all []string, managed map[string]bool, now time.Time) []string { - var out []string - - for _, dir := range all { - if managed[dir] { - level.Debug(c.logger).Log("msg", "active WAL", "name", dir) - continue - } - - walDir := wal.SubDirectory(dir) - mtime, err := c.walLastModified(walDir) - if err != nil { - segmentError.WithLabelValues(dir).Inc() - level.Warn(c.logger).Log("msg", "unable to find segment mtime of WAL", "name", dir, "err", err) - continue - } - - diff := now.Sub(mtime) - if diff > c.minAge { - // The last segment for this WAL was modified more than $minAge (positive number of hours) - // in the past. This makes it a candidate for deletion since it's also not associated with - // any Instances this agent knows about. - out = append(out, dir) - } - - level.Debug(c.logger).Log("msg", "abandoned WAL", "name", dir, "mtime", mtime, "diff", diff) - } - - return out -} - -// run cleans up abandoned WALs (if period != 0) in a loop periodically until stopped -func (c *WALCleaner) run() { - // A period of 0 means don't run a cleanup task - if c.period == 0 { - return - } - - ticker := time.NewTicker(c.period) - defer ticker.Stop() - - for { - select { - case <-c.done: - level.Debug(c.logger).Log("msg", "stopping cleaner...") - return - case <-ticker.C: - c.cleanup() - } - } -} - -// cleanup removes any abandoned and unused WAL directories. Note that it shouldn't be -// necessary to call this method explicitly in most cases since it will be run periodically -// in a goroutine (started when WALCleaner is created). -func (c *WALCleaner) cleanup() { - start := time.Now() - all := c.getAllStorage() - managed := c.getManagedStorage(c.instanceManager.ListInstances()) - abandoned := c.getAbandonedStorage(all, managed, time.Now()) - - managedStorage.Set(float64(len(managed))) - abandonedStorage.Set(float64(len(abandoned))) - - for _, a := range abandoned { - level.Info(c.logger).Log("msg", "deleting abandoned WAL", "name", a) - err := os.RemoveAll(a) - if err != nil { - level.Error(c.logger).Log("msg", "failed to delete abandoned WAL", "name", a, "err", err) - cleanupRunsErrors.Inc() - } else { - cleanupRunsSuccess.Inc() - } - } - - cleanupTimes.Observe(time.Since(start).Seconds()) -} - -// Stop the cleaner and any background tasks running -func (c *WALCleaner) Stop() { - close(c.done) -} diff --git a/internal/static/metrics/cleaner_test.go b/internal/static/metrics/cleaner_test.go deleted file mode 100644 index f8aeac7fa7..0000000000 --- a/internal/static/metrics/cleaner_test.go +++ /dev/null @@ -1,146 +0,0 @@ -package metrics - -import ( - "os" - "path/filepath" - "testing" - "time" - - "github.com/go-kit/log" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/stretchr/testify/require" -) - -func TestWALCleaner_getAllStorageNoRoot(t *testing.T) { - walRoot := filepath.Join(os.TempDir(), "getAllStorageNoRoot") - logger := log.NewLogfmtLogger(os.Stderr) - cleaner := NewWALCleaner( - logger, - &instance.MockManager{}, - walRoot, - DefaultCleanupAge, - DefaultCleanupPeriod, - ) - - // Bogus WAL root that doesn't exist. Method should return no results - wals := cleaner.getAllStorage() - - require.Empty(t, wals) -} - -func TestWALCleaner_getAllStorageSuccess(t *testing.T) { - walRoot := t.TempDir() - - walDir := filepath.Join(walRoot, "instance-1") - err := os.MkdirAll(walDir, 0755) - require.NoError(t, err) - - logger := log.NewLogfmtLogger(os.Stderr) - cleaner := NewWALCleaner( - logger, - &instance.MockManager{}, - walRoot, - DefaultCleanupAge, - DefaultCleanupPeriod, - ) - wals := cleaner.getAllStorage() - - require.Equal(t, []string{walDir}, wals) -} - -func TestWALCleaner_getAbandonedStorageBeforeCutoff(t *testing.T) { - walRoot := t.TempDir() - - walDir := filepath.Join(walRoot, "instance-1") - err := os.MkdirAll(walDir, 0755) - require.NoError(t, err) - - all := []string{walDir} - managed := make(map[string]bool) - now := time.Now() - - logger := log.NewLogfmtLogger(os.Stderr) - cleaner := NewWALCleaner( - logger, - &instance.MockManager{}, - walRoot, - 5*time.Minute, - DefaultCleanupPeriod, - ) - - cleaner.walLastModified = func(path string) (time.Time, error) { - return now, nil - } - - // Last modification time on our WAL directory is the same as "now" - // so there shouldn't be any results even though it's not part of the - // set of "managed" directories. - abandoned := cleaner.getAbandonedStorage(all, managed, now) - require.Empty(t, abandoned) -} - -func TestWALCleaner_getAbandonedStorageAfterCutoff(t *testing.T) { - walRoot := t.TempDir() - - walDir := filepath.Join(walRoot, "instance-1") - err := os.MkdirAll(walDir, 0755) - require.NoError(t, err) - - all := []string{walDir} - managed := make(map[string]bool) - now := time.Now() - - logger := log.NewLogfmtLogger(os.Stderr) - cleaner := NewWALCleaner( - logger, - &instance.MockManager{}, - walRoot, - 5*time.Minute, - DefaultCleanupPeriod, - ) - - cleaner.walLastModified = func(path string) (time.Time, error) { - return now.Add(-30 * time.Minute), nil - } - - // Last modification time on our WAL directory is 30 minutes in the past - // compared to "now" and we've set the cutoff for our cleaner to be 5 - // minutes: our WAL directory should show up as abandoned - abandoned := cleaner.getAbandonedStorage(all, managed, now) - require.Equal(t, []string{walDir}, abandoned) -} - -func TestWALCleaner_cleanup(t *testing.T) { - walRoot := t.TempDir() - - walDir := filepath.Join(walRoot, "instance-1") - err := os.MkdirAll(walDir, 0755) - require.NoError(t, err) - - now := time.Now() - logger := log.NewLogfmtLogger(os.Stderr) - manager := &instance.MockManager{} - manager.ListInstancesFunc = func() map[string]instance.ManagedInstance { - return make(map[string]instance.ManagedInstance) - } - - cleaner := NewWALCleaner( - logger, - manager, - walRoot, - 5*time.Minute, - DefaultCleanupPeriod, - ) - - cleaner.walLastModified = func(path string) (time.Time, error) { - return now.Add(-30 * time.Minute), nil - } - - // Last modification time on our WAL directory is 30 minutes in the past - // compared to "now" and we've set the cutoff for our cleaner to be 5 - // minutes: our WAL directory should be removed since it's abandoned - cleaner.cleanup() - _, err = os.Stat(walDir) - require.Error(t, err) - require.True(t, os.IsNotExist(err)) -} diff --git a/internal/static/metrics/cluster/client/client.go b/internal/static/metrics/cluster/client/client.go index 1b90feb99f..b4180ab3b0 100644 --- a/internal/static/metrics/cluster/client/client.go +++ b/internal/static/metrics/cluster/client/client.go @@ -2,25 +2,12 @@ package client import ( "flag" - "io" "reflect" - "github.com/grafana/agent/internal/static/agentproto" "github.com/grafana/agent/internal/util" "github.com/grafana/dskit/grpcclient" - "github.com/grafana/dskit/middleware" - otgrpc "github.com/opentracing-contrib/go-grpc" - "github.com/opentracing/opentracing-go" - "google.golang.org/grpc" - "google.golang.org/grpc/credentials/insecure" ) -// ScrapingServiceClient wraps agentproto.ScrapingServiceClient with a Close method. -type ScrapingServiceClient interface { - agentproto.ScrapingServiceClient - io.Closer -} - var ( // DefaultConfig provides default Config values. DefaultConfig = *util.DefaultConfigFromFlags(&Config{}).(*Config) @@ -54,40 +41,3 @@ func (c *Config) RegisterFlags(f *flag.FlagSet) { func (c *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { c.GRPCClientConfig.RegisterFlagsWithPrefix(prefix+"service-client", f) } - -// New returns a new scraping service client. -func New(cfg Config, addr string) (ScrapingServiceClient, error) { - opts := []grpc.DialOption{ - grpc.WithTransportCredentials(insecure.NewCredentials()), - grpc.WithDefaultCallOptions(cfg.GRPCClientConfig.CallOptions()...), - } - grpcDialOpts, err := cfg.GRPCClientConfig.DialOption(instrumentation()) - if err != nil { - return nil, err - } - opts = append(opts, grpcDialOpts...) - conn, err := grpc.Dial(addr, opts...) - if err != nil { - return nil, err - } - - return struct { - agentproto.ScrapingServiceClient - io.Closer - }{ - ScrapingServiceClient: agentproto.NewScrapingServiceClient(conn), - Closer: conn, - }, nil -} - -func instrumentation() ([]grpc.UnaryClientInterceptor, []grpc.StreamClientInterceptor) { - unary := []grpc.UnaryClientInterceptor{ - otgrpc.OpenTracingClientInterceptor(opentracing.GlobalTracer()), - middleware.ClientUserHeaderInterceptor, - } - stream := []grpc.StreamClientInterceptor{ - otgrpc.OpenTracingStreamClientInterceptor(opentracing.GlobalTracer()), - middleware.StreamClientUserHeaderInterceptor, - } - return unary, stream -} diff --git a/internal/static/metrics/cluster/cluster.go b/internal/static/metrics/cluster/cluster.go deleted file mode 100644 index 9ab498f7e4..0000000000 --- a/internal/static/metrics/cluster/cluster.go +++ /dev/null @@ -1,179 +0,0 @@ -package cluster - -import ( - "context" - "fmt" - "sync" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/golang/protobuf/ptypes/empty" - "github.com/gorilla/mux" - "github.com/grafana/agent/internal/static/agentproto" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/static/metrics/instance/configstore" - "github.com/grafana/agent/internal/util" - "github.com/prometheus/client_golang/prometheus" - "google.golang.org/grpc" -) - -// Cluster connects an Agent to other Agents and allows them to distribute -// workload. -type Cluster struct { - mut sync.RWMutex - - log log.Logger - cfg Config - baseValidation ValidationFunc - - // - // Internally, Cluster glues together four separate pieces of logic. - // See comments below to get an understanding of what is going on. - // - - // node manages membership in the cluster and performs cluster-wide reshards. - node *node - - // store connects to a configstore for changes. storeAPI is an HTTP API for it. - store *configstore.Remote - storeAPI *configstore.API - - // watcher watches the store and applies changes to an instance.Manager, - // triggering metrics to be collected and sent. configWatcher also does a - // complete refresh of its state on an interval. - watcher *configWatcher -} - -// New creates a new Cluster. -func New( - l log.Logger, - reg prometheus.Registerer, - cfg Config, - im instance.Manager, - validate ValidationFunc, -) (*Cluster, error) { - - l = log.With(l, "component", "cluster") - - var ( - c = &Cluster{log: l, cfg: cfg, baseValidation: validate} - err error - ) - - // Hold the lock for the initialization. This is necessary since newNode will - // eventually call Reshard, and we want c.watcher to be initialized when that - // happens. - c.mut.Lock() - defer c.mut.Unlock() - - c.node, err = newNode(reg, l, cfg, c) - if err != nil { - return nil, fmt.Errorf("failed to initialize node membership: %w", err) - } - - c.store, err = configstore.NewRemote(l, reg, cfg.KVStore.Config, cfg.Enabled) - if err != nil { - return nil, fmt.Errorf("failed to initialize configstore: %w", err) - } - c.storeAPI = configstore.NewAPI(l, c.store, c.storeValidate, cfg.APIEnableGetConfiguration) - reg.MustRegister(c.storeAPI) - - c.watcher, err = newConfigWatcher(l, cfg, c.store, im, c.node.Owns, validate) - if err != nil { - return nil, fmt.Errorf("failed to initialize configwatcher: %w", err) - } - - // NOTE(rfratto): ApplyConfig isn't necessary for the initialization but must - // be called for any changes to the configuration. - return c, nil -} - -func (c *Cluster) storeValidate(cfg *instance.Config) error { - c.mut.RLock() - defer c.mut.RUnlock() - - if err := c.baseValidation(cfg); err != nil { - return err - } - - if c.cfg.DangerousAllowReadingFiles { - return nil - } - - // If configs aren't allowed to read from the store, we need to make sure no - // configs coming in from the API set files for passwords. - return validateNofiles(cfg) -} - -// Reshard implements agentproto.ScrapingServiceServer, and syncs the state of -// configs with the configstore. -func (c *Cluster) Reshard(ctx context.Context, _ *agentproto.ReshardRequest) (*empty.Empty, error) { - c.mut.RLock() - defer c.mut.RUnlock() - - level.Info(c.log).Log("msg", "received reshard notification, requesting refresh") - c.watcher.RequestRefresh() - return &empty.Empty{}, nil -} - -// ApplyConfig applies configuration changes to Cluster. -func (c *Cluster) ApplyConfig(cfg Config) error { - c.mut.Lock() - defer c.mut.Unlock() - - if util.CompareYAML(c.cfg, cfg) { - return nil - } - - if err := c.node.ApplyConfig(cfg); err != nil { - return fmt.Errorf("failed to apply config to node membership: %w", err) - } - - if err := c.store.ApplyConfig(cfg.Lifecycler.RingConfig.KVStore, cfg.Enabled); err != nil { - return fmt.Errorf("failed to apply config to config store: %w", err) - } - - if err := c.watcher.ApplyConfig(cfg); err != nil { - return fmt.Errorf("failed to apply config to watcher: %w", err) - } - - c.cfg = cfg - - // Force a refresh so all the configs get updated with new defaults. - level.Info(c.log).Log("msg", "cluster config changed, queueing refresh") - c.watcher.RequestRefresh() - return nil -} - -// WireAPI injects routes into the provided mux router for the config -// management API. -func (c *Cluster) WireAPI(r *mux.Router) { - c.storeAPI.WireAPI(r) - c.node.WireAPI(r) -} - -// WireGRPC injects gRPC server handlers into the provided gRPC server. -func (c *Cluster) WireGRPC(srv *grpc.Server) { - agentproto.RegisterScrapingServiceServer(srv, c) -} - -// Stop stops the cluster and all of its dependencies. -func (c *Cluster) Stop() { - c.mut.Lock() - defer c.mut.Unlock() - - deps := []struct { - name string - closer func() error - }{ - {"node", c.node.Stop}, - {"config store", c.store.Close}, - {"config watcher", c.watcher.Stop}, - } - for _, dep := range deps { - err := dep.closer() - if err != nil { - level.Error(c.log).Log("msg", "failed to stop dependency", "dependency", dep.name, "err", err) - } - } -} diff --git a/internal/static/metrics/cluster/config_watcher.go b/internal/static/metrics/cluster/config_watcher.go deleted file mode 100644 index 2544975c8d..0000000000 --- a/internal/static/metrics/cluster/config_watcher.go +++ /dev/null @@ -1,340 +0,0 @@ -package cluster - -import ( - "context" - "fmt" - "sync" - "time" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/static/metrics/instance/configstore" - "github.com/grafana/agent/internal/util" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" -) - -var ( - reshardDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{ - Name: "agent_metrics_scraping_service_reshard_duration", - Help: "How long it took for resharding to run.", - }, []string{"success"}) -) - -// configWatcher connects to a configstore and will apply configs to an -// instance.Manager. -type configWatcher struct { - log log.Logger - - mut sync.Mutex - cfg Config - stopped bool - stop context.CancelFunc - - store configstore.Store - im instance.Manager - owns OwnershipFunc - validate ValidationFunc - - refreshCh chan struct{} - instanceMut sync.Mutex - instances map[string]struct{} -} - -// OwnershipFunc should determine if a given keep is owned by the caller. -type OwnershipFunc = func(key string) (bool, error) - -// ValidationFunc should validate a config. -type ValidationFunc = func(*instance.Config) error - -// newConfigWatcher watches store for changes and checks for each config against -// owns. It will also poll the configstore at a configurable interval. -func newConfigWatcher(log log.Logger, cfg Config, store configstore.Store, im instance.Manager, owns OwnershipFunc, validate ValidationFunc) (*configWatcher, error) { - ctx, cancel := context.WithCancel(context.Background()) - - w := &configWatcher{ - log: log, - - stop: cancel, - - store: store, - im: im, - owns: owns, - validate: validate, - - refreshCh: make(chan struct{}, 1), - instances: make(map[string]struct{}), - } - if err := w.ApplyConfig(cfg); err != nil { - return nil, err - } - // Delay duration, this is to prevent a race condition, see method for details - delay := cfg.Lifecycler.HeartbeatPeriod * 3 - go w.run(ctx, delay) - return w, nil -} - -func (w *configWatcher) ApplyConfig(cfg Config) error { - w.mut.Lock() - defer w.mut.Unlock() - - if util.CompareYAML(w.cfg, cfg) { - return nil - } - - if w.stopped { - return fmt.Errorf("configWatcher already stopped") - } - - w.cfg = cfg - return nil -} - -func (w *configWatcher) run(ctx context.Context, delay time.Duration) { - defer level.Info(w.log).Log("msg", "config watcher run loop exiting") - // This is due to a race condition between the heartbeat and config ring in a very narrow set of circumstances - // https://gist.github.com/mattdurham/c15f27de17a6da97bf2e6a870991c7f2 - time.Sleep(delay) - lastReshard := time.Now() - - for { - select { - case <-ctx.Done(): - return - case <-w.nextReshard(lastReshard): - level.Debug(w.log).Log("msg", "reshard timer ticked, scheduling refresh") - w.RequestRefresh() - lastReshard = time.Now() - case <-w.refreshCh: - err := w.refresh(ctx) - if err != nil { - level.Error(w.log).Log("msg", "refresh failed", "err", err) - } - case ev := <-w.store.Watch(): - level.Debug(w.log).Log("msg", "handling event from config store") - if err := w.handleEvent(ev); err != nil { - level.Error(w.log).Log("msg", "failed to handle changed or deleted config", "key", ev.Key, "err", err) - } - } - } -} - -// nextReshard returns a channel to that will fill a value when the reshard -// interval has elapsed. -func (w *configWatcher) nextReshard(lastReshard time.Time) <-chan time.Time { - w.mut.Lock() - nextReshard := lastReshard.Add(w.cfg.ReshardInterval) - w.mut.Unlock() - - remaining := time.Until(nextReshard) - - // NOTE(rfratto): clamping to 0 isn't necessary for time.After, - // but it makes the log message clearer to always use "0s" as - // "next reshard will be scheduled immediately." - if remaining < 0 { - remaining = 0 - } - - level.Debug(w.log).Log("msg", "waiting for next reshard interval", "last_reshard", lastReshard, "next_reshard", nextReshard, "remaining", remaining) - return time.After(remaining) -} - -// RequestRefresh will queue a refresh. No more than one refresh can be queued at a time. -func (w *configWatcher) RequestRefresh() { - select { - case w.refreshCh <- struct{}{}: - level.Debug(w.log).Log("msg", "successfully scheduled a refresh") - default: - level.Debug(w.log).Log("msg", "ignoring request refresh: refresh already scheduled") - } -} - -// refresh reloads all configs from the configstore. Deleted configs will be -// removed. refresh may not be called concurrently and must only be invoked from run. -// Call RequestRefresh to queue a call to refresh. -func (w *configWatcher) refresh(ctx context.Context) (err error) { - w.mut.Lock() - enabled := w.cfg.Enabled - refreshTimeout := w.cfg.ReshardTimeout - w.mut.Unlock() - - if !enabled { - level.Debug(w.log).Log("msg", "refresh skipped because clustering is disabled") - return nil - } - level.Info(w.log).Log("msg", "starting refresh") - - if refreshTimeout > 0 { - var cancel context.CancelFunc - ctx, cancel = context.WithTimeout(ctx, refreshTimeout) - defer cancel() - } - - start := time.Now() - defer func() { - success := "1" - if err != nil { - success = "0" - } - duration := time.Since(start) - level.Info(w.log).Log("msg", "refresh finished", "duration", duration, "success", success, "err", err) - reshardDuration.WithLabelValues(success).Observe(duration.Seconds()) - }() - - // This is used to determine if the context was already exceeded before calling the kv provider - if err = ctx.Err(); err != nil { - level.Error(w.log).Log("msg", "context deadline exceeded before calling store.all", "err", err) - return err - } - deadline, _ := ctx.Deadline() - level.Debug(w.log).Log("msg", "deadline before store.all", "deadline", deadline) - configs, err := w.store.All(ctx, func(key string) bool { - owns, err := w.owns(key) - if err != nil { - level.Error(w.log).Log("msg", "failed to check for ownership, instance will be deleted if it is running", "key", key, "err", err) - return false - } - return owns - }) - level.Debug(w.log).Log("msg", "count of configs from store.all", "count", len(configs)) - - if err != nil { - return fmt.Errorf("failed to get configs from store: %w", err) - } - - var ( - keys = make(map[string]struct{}) - firstError error - ) - -Outer: - for { - select { - case <-ctx.Done(): - return ctx.Err() - case cfg, ok := <-configs: - // w.store.All will close configs when all of them have been read. - if !ok { - break Outer - } - - if err := w.handleEvent(configstore.WatchEvent{Key: cfg.Name, Config: &cfg}); err != nil { - level.Error(w.log).Log("msg", "failed to process changed config", "key", cfg.Name, "err", err) - if firstError == nil { - firstError = err - } - } - - keys[cfg.Name] = struct{}{} - } - } - - // Any config we used to be running that disappeared from this most recent - // iteration should be deleted. We hold the lock just for the duration of - // populating deleted because handleEvent also grabs a hold on the lock. - var deleted []string - w.instanceMut.Lock() - for key := range w.instances { - if _, exist := keys[key]; exist { - continue - } - deleted = append(deleted, key) - } - w.instanceMut.Unlock() - - // Send a deleted event for any key that has gone away. - for _, key := range deleted { - if err := w.handleEvent(configstore.WatchEvent{Key: key, Config: nil}); err != nil { - level.Error(w.log).Log("msg", "failed to process changed config", "key", key, "err", err) - } - } - - return firstError -} - -func (w *configWatcher) handleEvent(ev configstore.WatchEvent) error { - w.mut.Lock() - defer w.mut.Unlock() - - if w.stopped { - return fmt.Errorf("configWatcher stopped") - } - - w.instanceMut.Lock() - defer w.instanceMut.Unlock() - - owned, err := w.owns(ev.Key) - if err != nil { - level.Error(w.log).Log("msg", "failed to see if config is owned. instance will be deleted if it is running", "err", err) - } - - var ( - _, isRunning = w.instances[ev.Key] - isDeleted = ev.Config == nil - ) - - switch { - // Two deletion scenarios: - // 1. A config we're running got moved to a new owner. - // 2. A config we're running got deleted - case (isRunning && !owned) || (isDeleted && isRunning): - if isDeleted { - level.Info(w.log).Log("msg", "untracking deleted config", "key", ev.Key) - } else { - level.Info(w.log).Log("msg", "untracking config that changed owners", "key", ev.Key) - } - - err := w.im.DeleteConfig(ev.Key) - delete(w.instances, ev.Key) - if err != nil { - return fmt.Errorf("failed to delete: %w", err) - } - - case !isDeleted && owned: - if err := w.validate(ev.Config); err != nil { - return fmt.Errorf( - "failed to validate config. %[1]s cannot run until the global settings are adjusted or the config is adjusted to operate within the global constraints. error: %[2]w", - ev.Key, err, - ) - } - - if _, exist := w.instances[ev.Key]; !exist { - level.Info(w.log).Log("msg", "tracking new config", "key", ev.Key) - } - - if err := w.im.ApplyConfig(*ev.Config); err != nil { - return fmt.Errorf("failed to apply config: %w", err) - } - w.instances[ev.Key] = struct{}{} - } - - return nil -} - -// Stop stops the configWatcher. Cannot be called more than once. -func (w *configWatcher) Stop() error { - w.mut.Lock() - defer w.mut.Unlock() - - if w.stopped { - return fmt.Errorf("already stopped") - } - w.stop() - w.stopped = true - - // Shut down all the instances that this configWatcher managed. It *MUST* - // happen after w.stop() is called to prevent the run loop from applying any - // new configs. - w.instanceMut.Lock() - defer w.instanceMut.Unlock() - - for key := range w.instances { - if err := w.im.DeleteConfig(key); err != nil { - level.Warn(w.log).Log("msg", "failed deleting config on shutdown", "key", key, "err", err) - } - } - w.instances = make(map[string]struct{}) - - return nil -} diff --git a/internal/static/metrics/cluster/config_watcher_test.go b/internal/static/metrics/cluster/config_watcher_test.go deleted file mode 100644 index e91bffe5d8..0000000000 --- a/internal/static/metrics/cluster/config_watcher_test.go +++ /dev/null @@ -1,267 +0,0 @@ -package cluster - -import ( - "context" - "testing" - "time" - - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/static/metrics/instance/configstore" - "github.com/grafana/agent/internal/util" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/require" -) - -func Test_configWatcher_Refresh(t *testing.T) { - var ( - log = util.TestLogger(t) - - cfg = DefaultConfig - store = configstore.Mock{ - WatchFunc: func() <-chan configstore.WatchEvent { - return make(chan configstore.WatchEvent) - }, - } - - im mockConfigManager - - validate = func(*instance.Config) error { return nil } - owned = func(key string) (bool, error) { return true, nil } - ) - cfg.Enabled = true - cfg.ReshardInterval = time.Hour - - w, err := newConfigWatcher(log, cfg, &store, &im, owned, validate) - require.NoError(t, err) - t.Cleanup(func() { _ = w.Stop() }) - - im.On("ApplyConfig", mock.Anything).Return(nil) - im.On("DeleteConfig", mock.Anything).Return(nil) - - // First: return a "hello" config. - store.AllFunc = func(ctx context.Context, keep func(key string) bool) (<-chan instance.Config, error) { - ch := make(chan instance.Config) - go func() { - ch <- instance.Config{Name: "hello"} - close(ch) - }() - return ch, nil - } - - err = w.refresh(context.Background()) - require.NoError(t, err) - - // Then: return a "new" config. - store.AllFunc = func(ctx context.Context, keep func(key string) bool) (<-chan instance.Config, error) { - ch := make(chan instance.Config, 1) - go func() { - ch <- instance.Config{Name: "new"} - close(ch) - }() - return ch, nil - } - - err = w.refresh(context.Background()) - require.NoError(t, err) - - // "hello" and "new" should've been applied, and "hello" should've been deleted - // from the second refresh. - im.AssertCalled(t, "ApplyConfig", instance.Config{Name: "hello"}) - im.AssertCalled(t, "ApplyConfig", instance.Config{Name: "new"}) - im.AssertCalled(t, "DeleteConfig", "hello") -} - -func Test_configWatcher_handleEvent(t *testing.T) { - var ( - cfg = DefaultConfig - store = configstore.Mock{ - WatchFunc: func() <-chan configstore.WatchEvent { - return make(chan configstore.WatchEvent) - }, - } - - validate = func(*instance.Config) error { return nil } - - owned = func(key string) (bool, error) { return true, nil } - unowned = func(key string) (bool, error) { return false, nil } - ) - cfg.Enabled = true - - t.Run("new owned config", func(t *testing.T) { - var ( - log = util.TestLogger(t) - im mockConfigManager - ) - - w, err := newConfigWatcher(log, cfg, &store, &im, owned, validate) - require.NoError(t, err) - t.Cleanup(func() { _ = w.Stop() }) - - im.On("ApplyConfig", mock.Anything).Return(nil) - im.On("DeleteConfig", mock.Anything).Return(nil) - - err = w.handleEvent(configstore.WatchEvent{Key: "new", Config: &instance.Config{}}) - require.NoError(t, err) - - im.AssertNumberOfCalls(t, "ApplyConfig", 1) - }) - - t.Run("updated owned config", func(t *testing.T) { - var ( - log = util.TestLogger(t) - im mockConfigManager - ) - - w, err := newConfigWatcher(log, cfg, &store, &im, owned, validate) - require.NoError(t, err) - t.Cleanup(func() { _ = w.Stop() }) - - im.On("ApplyConfig", mock.Anything).Return(nil) - im.On("DeleteConfig", mock.Anything).Return(nil) - - // One for create, one for update - err = w.handleEvent(configstore.WatchEvent{Key: "update", Config: &instance.Config{}}) - require.NoError(t, err) - - err = w.handleEvent(configstore.WatchEvent{Key: "update", Config: &instance.Config{}}) - require.NoError(t, err) - - im.AssertNumberOfCalls(t, "ApplyConfig", 2) - }) - - t.Run("new unowned config", func(t *testing.T) { - var ( - log = util.TestLogger(t) - im mockConfigManager - ) - - w, err := newConfigWatcher(log, cfg, &store, &im, unowned, validate) - require.NoError(t, err) - t.Cleanup(func() { _ = w.Stop() }) - - im.On("ApplyConfig", mock.Anything).Return(nil) - im.On("DeleteConfig", mock.Anything).Return(nil) - - // One for create, one for update - err = w.handleEvent(configstore.WatchEvent{Key: "unowned", Config: &instance.Config{}}) - require.NoError(t, err) - - im.AssertNumberOfCalls(t, "ApplyConfig", 0) - }) - - t.Run("lost ownership", func(t *testing.T) { - var ( - log = util.TestLogger(t) - - im mockConfigManager - - isOwned = true - owns = func(key string) (bool, error) { return isOwned, nil } - ) - - w, err := newConfigWatcher(log, cfg, &store, &im, owns, validate) - require.NoError(t, err) - t.Cleanup(func() { _ = w.Stop() }) - - im.On("ApplyConfig", mock.Anything).Return(nil) - im.On("DeleteConfig", mock.Anything).Return(nil) - - // One for create, then one for ownership change - err = w.handleEvent(configstore.WatchEvent{Key: "disappear", Config: &instance.Config{}}) - require.NoError(t, err) - - // Mark the config as unowned. The re-apply should then delete it. - isOwned = false - - err = w.handleEvent(configstore.WatchEvent{Key: "disappear", Config: &instance.Config{}}) - require.NoError(t, err) - - im.AssertNumberOfCalls(t, "ApplyConfig", 1) - im.AssertNumberOfCalls(t, "DeleteConfig", 1) - }) - - t.Run("deleted running config", func(t *testing.T) { - var ( - log = util.TestLogger(t) - - im mockConfigManager - ) - - w, err := newConfigWatcher(log, cfg, &store, &im, owned, validate) - require.NoError(t, err) - t.Cleanup(func() { _ = w.Stop() }) - - im.On("ApplyConfig", mock.Anything).Return(nil) - im.On("DeleteConfig", mock.Anything).Return(nil) - - // One for create, then one for deleted. - err = w.handleEvent(configstore.WatchEvent{Key: "new-key", Config: &instance.Config{}}) - require.NoError(t, err) - - err = w.handleEvent(configstore.WatchEvent{Key: "new-key", Config: nil}) - require.NoError(t, err) - - im.AssertNumberOfCalls(t, "ApplyConfig", 1) - im.AssertNumberOfCalls(t, "DeleteConfig", 1) - }) -} - -func Test_configWatcher_nextReshard(t *testing.T) { - watcher := &configWatcher{ - log: util.TestLogger(t), - cfg: Config{ReshardInterval: time.Second}, - } - - t.Run("past time", func(t *testing.T) { - select { - case <-watcher.nextReshard(time.Time{}): - case <-time.After(250 * time.Millisecond): - require.FailNow(t, "nextReshard did not return an already ready channel") - } - }) - - t.Run("future time", func(t *testing.T) { - select { - case <-watcher.nextReshard(time.Now()): - case <-time.After(1500 * time.Millisecond): - require.FailNow(t, "nextReshard took too long to return") - } - }) -} - -type mockConfigManager struct { - mock.Mock -} - -func (m *mockConfigManager) GetInstance(name string) (instance.ManagedInstance, error) { - args := m.Mock.Called() - return args.Get(0).(instance.ManagedInstance), args.Error(1) -} - -func (m *mockConfigManager) ListInstances() map[string]instance.ManagedInstance { - args := m.Mock.Called() - return args.Get(0).(map[string]instance.ManagedInstance) -} - -// ListConfigs implements Manager. -func (m *mockConfigManager) ListConfigs() map[string]instance.Config { - args := m.Mock.Called() - return args.Get(0).(map[string]instance.Config) -} - -// ApplyConfig implements Manager. -func (m *mockConfigManager) ApplyConfig(c instance.Config) error { - args := m.Mock.Called(c) - return args.Error(0) -} - -// DeleteConfig implements Manager. -func (m *mockConfigManager) DeleteConfig(name string) error { - args := m.Mock.Called(name) - return args.Error(0) -} - -// Stop implements Manager. -func (m *mockConfigManager) Stop() { - m.Mock.Called() -} diff --git a/internal/static/metrics/cluster/configapi/types.go b/internal/static/metrics/cluster/configapi/types.go deleted file mode 100644 index bf16b72bdb..0000000000 --- a/internal/static/metrics/cluster/configapi/types.go +++ /dev/null @@ -1,73 +0,0 @@ -package configapi - -import ( - "encoding/json" - "fmt" - "net/http" -) - -// APIResponse is the base object returned for any API call. -// The Data field will be set to either nil or a value of -// another *Response type value from this package. -type APIResponse struct { - Status string `json:"status"` - Data interface{} `json:"data,omitempty"` -} - -// WriteTo writes the response to the given ResponseWriter with the provided -// statusCode. -func (r *APIResponse) WriteTo(w http.ResponseWriter, statusCode int) error { - bb, err := json.Marshal(r) - if err != nil { - // If we fail here, we should at least write a 500 back. - w.WriteHeader(http.StatusInternalServerError) - return err - } - - w.WriteHeader(statusCode) - n, err := w.Write(bb) - if err != nil { - return err - } else if n != len(bb) { - return fmt.Errorf("could not write full response. expected %d, wrote %d", len(bb), n) - } - - return nil -} - -// ErrorResponse is contained inside an APIResponse and returns -// an error string. Returned by any API call that can fail. -type ErrorResponse struct { - Error string `json:"error"` -} - -// ListConfigurationsResponse is contained inside an APIResponse -// and provides the list of configurations known to the KV store. -// Returned by ListConfigurations. -type ListConfigurationsResponse struct { - // Configs is the list of configuration names. - Configs []string `json:"configs"` -} - -// GetConfigurationResponse is contained inside an APIResponse -// and provides a single configuration known to the KV store. -// Returned by GetConfiguration. -type GetConfigurationResponse struct { - // Value is the stringified YAML configuration. - Value string `json:"value"` -} - -// WriteResponse writes a response object to the provided ResponseWriter w and with a -// status code of statusCode. resp is marshaled to JSON. -func WriteResponse(w http.ResponseWriter, statusCode int, resp interface{}) error { - apiResp := &APIResponse{Status: "success", Data: resp} - w.Header().Set("Content-Type", "application/json") - return apiResp.WriteTo(w, statusCode) -} - -// WriteError writes an error response back to the ResponseWriter. -func WriteError(w http.ResponseWriter, statusCode int, err error) error { - resp := &APIResponse{Status: "error", Data: &ErrorResponse{Error: err.Error()}} - w.Header().Set("Content-Type", "application/json") - return resp.WriteTo(w, statusCode) -} diff --git a/internal/static/metrics/cluster/node.go b/internal/static/metrics/cluster/node.go deleted file mode 100644 index fab9bc6b94..0000000000 --- a/internal/static/metrics/cluster/node.go +++ /dev/null @@ -1,381 +0,0 @@ -package cluster - -import ( - "context" - "fmt" - "hash/fnv" - "net/http" - "sync" - "time" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/gorilla/mux" - pb "github.com/grafana/agent/internal/static/agentproto" - "github.com/grafana/agent/internal/static/metrics/cluster/client" - "github.com/grafana/agent/internal/util" - "github.com/grafana/dskit/backoff" - "github.com/grafana/dskit/kv" - "github.com/grafana/dskit/ring" - "github.com/grafana/dskit/services" - "github.com/grafana/dskit/user" - "github.com/prometheus/client_golang/prometheus" -) - -const ( - // agentKey is the key used for storing the hash ring. - agentKey = "agent" -) - -var backoffConfig = backoff.Config{ - MinBackoff: time.Second, - MaxBackoff: 2 * time.Minute, - MaxRetries: 10, -} - -// node manages membership within a ring. when a node joins or leaves the ring, -// it will inform other nodes to reshard their workloads. After a node joins -// the ring, it will inform the local service to reshard. -type node struct { - log log.Logger - reg *util.Unregisterer - srv pb.ScrapingServiceServer - - mut sync.RWMutex - cfg Config - ring *ring.Ring - lc *ring.Lifecycler - - exited bool - reload chan struct{} -} - -// newNode creates a new node and registers it to the ring. -func newNode(reg prometheus.Registerer, log log.Logger, cfg Config, s pb.ScrapingServiceServer) (*node, error) { - n := &node{ - reg: util.WrapWithUnregisterer(reg), - srv: s, - log: log, - - reload: make(chan struct{}, 1), - } - if err := n.ApplyConfig(cfg); err != nil { - return nil, err - } - go n.run() - return n, nil -} - -func (n *node) ApplyConfig(cfg Config) error { - n.mut.Lock() - defer n.mut.Unlock() - - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) - defer cancel() - - // Detect if the config changed. - if util.CompareYAML(n.cfg, cfg) { - return nil - } - - if n.exited { - return fmt.Errorf("node already exited") - } - - level.Info(n.log).Log("msg", "applying config") - - // Shut down old components before re-creating the updated ones. - n.reg.UnregisterAll() - - if n.lc != nil { - // Note that this will call performClusterReshard and will block until it - // completes. - err := services.StopAndAwaitTerminated(ctx, n.lc) - if err != nil { - return fmt.Errorf("failed to stop lifecycler: %w", err) - } - n.lc = nil - } - - if n.ring != nil { - err := services.StopAndAwaitTerminated(ctx, n.ring) - if err != nil { - return fmt.Errorf("failed to stop ring: %w", err) - } - n.ring = nil - } - - if !cfg.Enabled { - n.cfg = cfg - return nil - } - - r, err := newRing(cfg.Lifecycler.RingConfig, "agent_viewer", agentKey, n.reg, n.log) - if err != nil { - return fmt.Errorf("failed to create ring: %w", err) - } - - if err := services.StartAndAwaitRunning(context.Background(), r); err != nil { - return fmt.Errorf("failed to start ring: %w", err) - } - n.ring = r - - lc, err := ring.NewLifecycler(cfg.Lifecycler.LifecyclerConfig, n, "agent", agentKey, false, n.log, prometheus.WrapRegistererWithPrefix("agent_dskit_", n.reg)) - if err != nil { - return fmt.Errorf("failed to create lifecycler: %w", err) - } - if err := services.StartAndAwaitRunning(context.Background(), lc); err != nil { - if err := services.StopAndAwaitTerminated(ctx, r); err != nil { - level.Error(n.log).Log("msg", "failed to stop ring when returning error. next config reload will fail", "err", err) - } - return fmt.Errorf("failed to start lifecycler: %w", err) - } - n.lc = lc - - n.cfg = cfg - - // Reload and reshard the cluster. - n.reload <- struct{}{} - return nil -} - -// newRing creates a new Cortex Ring that ignores unhealthy nodes. -func newRing(cfg ring.Config, name, key string, reg prometheus.Registerer, log log.Logger) (*ring.Ring, error) { - codec := ring.GetCodec() - store, err := kv.NewClient( - cfg.KVStore, - codec, - kv.RegistererWithKVName(reg, name+"-ring"), - log, - ) - if err != nil { - return nil, err - } - return ring.NewWithStoreClientAndStrategy(cfg, name, key, store, ring.NewIgnoreUnhealthyInstancesReplicationStrategy(), prometheus.WrapRegistererWithPrefix("agent_dskit_", reg), log) -} - -// run waits for connection to the ring and kickstarts the join process. -func (n *node) run() { - for range n.reload { - n.mut.RLock() - - if err := n.performClusterReshard(context.Background(), true); err != nil { - level.Warn(n.log).Log("msg", "dynamic cluster reshard did not succeed", "err", err) - } - - n.mut.RUnlock() - } - - level.Info(n.log).Log("msg", "node run loop exiting") -} - -// performClusterReshard informs the cluster to immediately trigger a reshard -// of their workloads. if joining is true, the server provided to newNode will -// also be informed. -func (n *node) performClusterReshard(ctx context.Context, joining bool) error { - if n.ring == nil || n.lc == nil { - level.Info(n.log).Log("msg", "node disabled, not resharding") - return nil - } - - if n.cfg.ClusterReshardEventTimeout > 0 { - var cancel context.CancelFunc - ctx, cancel = context.WithTimeout(ctx, n.cfg.ClusterReshardEventTimeout) - defer cancel() - } - - var ( - rs ring.ReplicationSet - err error - ) - - backoff := backoff.New(ctx, backoffConfig) - for backoff.Ongoing() { - if ctx.Err() != nil { - return ctx.Err() - } - rs, err = n.ring.GetAllHealthy(ring.Read) - if err == nil { - break - } - backoff.Wait() - } - - if len(rs.Instances) > 0 { - level.Info(n.log).Log("msg", "informing remote nodes to reshard") - } - - // These are not in the go routine below due to potential race condition with n.lc.addr - _, err = rs.Do(ctx, 500*time.Millisecond, func(c context.Context, id *ring.InstanceDesc) (interface{}, error) { - // Skip over ourselves. - if id.Addr == n.lc.Addr { - return nil, nil - } - - notifyCtx := user.InjectOrgID(c, "fake") - return nil, n.notifyReshard(notifyCtx, id) - }) - - if err != nil { - level.Error(n.log).Log("msg", "notifying other nodes failed", "err", err) - } - - if joining { - level.Info(n.log).Log("msg", "running local reshard") - if _, err := n.srv.Reshard(ctx, &pb.ReshardRequest{}); err != nil { - level.Warn(n.log).Log("msg", "dynamic local reshard did not succeed", "err", err) - } - } - return err -} - -// notifyReshard informs an individual node to reshard. -func (n *node) notifyReshard(ctx context.Context, id *ring.InstanceDesc) error { - cli, err := client.New(n.cfg.Client, id.Addr) - if err != nil { - return err - } - defer cli.Close() - - level.Info(n.log).Log("msg", "attempting to notify remote agent to reshard", "addr", id.Addr) - - backoff := backoff.New(ctx, backoffConfig) - for backoff.Ongoing() { - if ctx.Err() != nil { - return ctx.Err() - } - _, err := cli.Reshard(ctx, &pb.ReshardRequest{}) - if err == nil { - break - } - - level.Warn(n.log).Log("msg", "reshard notification attempt failed", "addr", id.Addr, "err", err, "attempt", backoff.NumRetries()) - backoff.Wait() - } - - return backoff.Err() -} - -// WaitJoined waits for the node the join the cluster and enter the -// ACTIVE state. -func (n *node) WaitJoined(ctx context.Context) error { - n.mut.RLock() - defer n.mut.RUnlock() - - level.Info(n.log).Log("msg", "waiting for the node to join the cluster") - defer level.Info(n.log).Log("msg", "node has joined the cluster") - - if n.ring == nil || n.lc == nil { - return fmt.Errorf("node disabled") - } - - return waitJoined(ctx, agentKey, n.ring.KVClient, n.lc.ID) -} - -func waitJoined(ctx context.Context, key string, kvClient kv.Client, id string) error { - kvClient.WatchKey(ctx, key, func(value interface{}) bool { - if value == nil { - return true - } - - desc := value.(*ring.Desc) - for ingID, ing := range desc.Ingesters { - if ingID == id && ing.State == ring.ACTIVE { - return false - } - } - - return true - }) - - return ctx.Err() -} - -func (n *node) WireAPI(r *mux.Router) { - r.HandleFunc("/debug/ring", func(rw http.ResponseWriter, r *http.Request) { - n.mut.RLock() - defer n.mut.RUnlock() - - if n.ring == nil { - http.NotFoundHandler().ServeHTTP(rw, r) - return - } - - n.ring.ServeHTTP(rw, r) - }) -} - -// Stop stops the node and cancels it from running. The node cannot be used -// again once Stop is called. -func (n *node) Stop() error { - n.mut.Lock() - defer n.mut.Unlock() - - if n.exited { - return fmt.Errorf("node already exited") - } - n.exited = true - - level.Info(n.log).Log("msg", "shutting down node") - - // Shut down dependencies. The lifecycler *MUST* be shut down first since n.ring is - // used during the shutdown process to inform other nodes to reshard. - // - // Note that stopping the lifecycler will call performClusterReshard and will block - // until it completes. - var ( - firstError error - deps []services.Service - ) - - if n.lc != nil { - deps = append(deps, n.lc) - } - if n.ring != nil { - deps = append(deps, n.ring) - } - for _, dep := range deps { - err := services.StopAndAwaitTerminated(context.Background(), dep) - if err != nil && firstError == nil { - firstError = err - } - } - - close(n.reload) - level.Info(n.log).Log("msg", "node shut down") - return firstError -} - -// Flush implements ring.FlushTransferer. It's a no-op. -func (n *node) Flush() {} - -// TransferOut implements ring.FlushTransferer. It connects to all other healthy agents and -// tells them to reshard. TransferOut should NOT be called manually unless the mutex is -// held. -func (n *node) TransferOut(ctx context.Context) error { - return n.performClusterReshard(ctx, false) -} - -// Owns checks to see if a key is owned by this node. owns will return -// an error if the ring is empty or if there aren't enough healthy nodes. -func (n *node) Owns(key string) (bool, error) { - n.mut.RLock() - defer n.mut.RUnlock() - - rs, err := n.ring.Get(keyHash(key), ring.Write, nil, nil, nil) - if err != nil { - return false, err - } - for _, r := range rs.Instances { - if r.Addr == n.lc.Addr { - return true, nil - } - } - return false, nil -} - -func keyHash(key string) uint32 { - h := fnv.New32() - _, _ = h.Write([]byte(key)) - return h.Sum32() -} diff --git a/internal/static/metrics/cluster/node_test.go b/internal/static/metrics/cluster/node_test.go deleted file mode 100644 index 15df4ab7fb..0000000000 --- a/internal/static/metrics/cluster/node_test.go +++ /dev/null @@ -1,223 +0,0 @@ -package cluster - -import ( - "context" - "flag" - "fmt" - "math/rand" - "net" - "testing" - "time" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/golang/protobuf/ptypes/empty" - "github.com/grafana/agent/internal/static/agentproto" - "github.com/grafana/agent/internal/util" - "github.com/grafana/dskit/ring" - "github.com/grafana/dskit/services" - "github.com/prometheus/client_golang/prometheus" - "github.com/stretchr/testify/require" - "go.uber.org/atomic" - "google.golang.org/grpc" - "gopkg.in/yaml.v2" -) - -func Test_node_Join(t *testing.T) { - var ( - reg = prometheus.NewRegistry() - logger = util.TestLogger(t) - - localReshard = make(chan struct{}, 2) - remoteReshard = make(chan struct{}, 2) - ) - - local := &agentproto.FuncScrapingServiceServer{ - ReshardFunc: func(c context.Context, rr *agentproto.ReshardRequest) (*empty.Empty, error) { - localReshard <- struct{}{} - return &empty.Empty{}, nil - }, - } - - remote := &agentproto.FuncScrapingServiceServer{ - ReshardFunc: func(c context.Context, rr *agentproto.ReshardRequest) (*empty.Empty, error) { - remoteReshard <- struct{}{} - return &empty.Empty{}, nil - }, - } - startNode(t, remote, logger) - - nodeConfig := DefaultConfig - nodeConfig.Enabled = true - nodeConfig.Lifecycler.LifecyclerConfig = testLifecyclerConfig(t) - - n, err := newNode(reg, logger, nodeConfig, local) - require.NoError(t, err) - t.Cleanup(func() { _ = n.Stop() }) - - require.NoError(t, n.WaitJoined(context.Background())) - - waitAll(t, remoteReshard, localReshard) -} - -// waitAll waits for a message on all channels. -func waitAll(t *testing.T, chs ...chan struct{}) { - timeoutCh := time.After(5 * time.Second) - for _, ch := range chs { - select { - case <-timeoutCh: - require.FailNow(t, "timeout exceeded") - case <-ch: - } - } -} - -func Test_node_Leave(t *testing.T) { - var ( - reg = prometheus.NewRegistry() - logger = util.TestLogger(t) - - sendReshard = atomic.NewBool(false) - remoteReshard = make(chan struct{}, 2) - ) - - local := &agentproto.FuncScrapingServiceServer{ - ReshardFunc: func(c context.Context, rr *agentproto.ReshardRequest) (*empty.Empty, error) { - return &empty.Empty{}, nil - }, - } - - remote := &agentproto.FuncScrapingServiceServer{ - ReshardFunc: func(c context.Context, rr *agentproto.ReshardRequest) (*empty.Empty, error) { - if sendReshard.Load() { - remoteReshard <- struct{}{} - } - return &empty.Empty{}, nil - }, - } - startNode(t, remote, logger) - - nodeConfig := DefaultConfig - nodeConfig.Enabled = true - nodeConfig.Lifecycler.LifecyclerConfig = testLifecyclerConfig(t) - - n, err := newNode(reg, logger, nodeConfig, local) - require.NoError(t, err) - require.NoError(t, n.WaitJoined(context.Background())) - - // Update the reshard function to write to remoteReshard on shutdown. - sendReshard.Store(true) - - // Stop the node so it transfers data outward. - require.NoError(t, n.Stop(), "failed to stop the node") - - level.Info(logger).Log("msg", "waiting for remote reshard to occur") - waitAll(t, remoteReshard) -} - -func Test_node_ApplyConfig(t *testing.T) { - var ( - reg = prometheus.NewRegistry() - logger = util.TestLogger(t) - - localReshard = make(chan struct{}, 10) - ) - - local := &agentproto.FuncScrapingServiceServer{ - ReshardFunc: func(c context.Context, rr *agentproto.ReshardRequest) (*empty.Empty, error) { - localReshard <- struct{}{} - return &empty.Empty{}, nil - }, - } - - nodeConfig := DefaultConfig - nodeConfig.Enabled = true - nodeConfig.Lifecycler.LifecyclerConfig = testLifecyclerConfig(t) - - n, err := newNode(reg, logger, nodeConfig, local) - require.NoError(t, err) - t.Cleanup(func() { _ = n.Stop() }) - require.NoError(t, n.WaitJoined(context.Background())) - - // Wait for the initial join to trigger. - waitAll(t, localReshard) - - // An ApplyConfig working correctly should re-join the cluster, which can be - // detected by local resharding applying twice. - nodeConfig.Lifecycler.NumTokens = 1 - require.NoError(t, n.ApplyConfig(nodeConfig), "failed to apply new config") - require.NoError(t, n.WaitJoined(context.Background())) - - waitAll(t, localReshard) -} - -// startNode launches srv as a gRPC server and registers it to the ring. -func startNode(t *testing.T, srv agentproto.ScrapingServiceServer, logger log.Logger) { - t.Helper() - - l, err := net.Listen("tcp", "127.0.0.1:0") - require.NoError(t, err) - - grpcServer := grpc.NewServer() - agentproto.RegisterScrapingServiceServer(grpcServer, srv) - - go func() { - _ = grpcServer.Serve(l) - }() - t.Cleanup(func() { grpcServer.Stop() }) - - lcConfig := testLifecyclerConfig(t) - lcConfig.Addr = l.Addr().(*net.TCPAddr).IP.String() - lcConfig.Port = l.Addr().(*net.TCPAddr).Port - - lc, err := ring.NewLifecycler(lcConfig, ring.NewNoopFlushTransferer(), "agent", "agent", false, logger, nil) - require.NoError(t, err) - - err = services.StartAndAwaitRunning(context.Background(), lc) - require.NoError(t, err) - - // Wait for the new node to be in the ring. - joinWaitCtx, joinWaitCancel := context.WithTimeout(context.Background(), 5*time.Second) - defer joinWaitCancel() - err = waitJoined(joinWaitCtx, agentKey, lc.KVStore, lc.ID) - require.NoError(t, err) - - t.Cleanup(func() { - _ = services.StopAndAwaitTerminated(context.Background(), lc) - }) -} - -func testLifecyclerConfig(t *testing.T) ring.LifecyclerConfig { - t.Helper() - - cfgText := util.Untab(fmt.Sprintf(` -ring: - kvstore: - store: inmemory - prefix: tests/%s -final_sleep: 0s -min_ready_duration: 0s - `, t.Name())) - - // Apply default values by registering to a fake flag set. - var lc ring.LifecyclerConfig - lc.RegisterFlagsWithPrefix("", flag.NewFlagSet("", flag.ContinueOnError), log.NewNopLogger()) - - err := yaml.Unmarshal([]byte(cfgText), &lc) - require.NoError(t, err) - - // Assign a random default ID. - var letters = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") - name := make([]rune, 10) - for i := range name { - name[i] = letters[rand.Intn(len(letters))] - } - lc.ID = string(name) - - // Add an invalid default address/port. Tests can override if they expect - // incoming traffic. - lc.Addr = "x.x.x.x" - lc.Port = -1 - - return lc -} diff --git a/internal/static/metrics/cluster/validation.go b/internal/static/metrics/cluster/validation.go deleted file mode 100644 index 6821a8beeb..0000000000 --- a/internal/static/metrics/cluster/validation.go +++ /dev/null @@ -1,150 +0,0 @@ -package cluster - -import ( - "fmt" - - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/loki/clients/pkg/promtail/discovery/consulagent" - "github.com/prometheus/common/config" - "github.com/prometheus/prometheus/discovery" - "github.com/prometheus/prometheus/discovery/aws" - "github.com/prometheus/prometheus/discovery/azure" - "github.com/prometheus/prometheus/discovery/consul" - "github.com/prometheus/prometheus/discovery/digitalocean" - "github.com/prometheus/prometheus/discovery/dns" - "github.com/prometheus/prometheus/discovery/eureka" - "github.com/prometheus/prometheus/discovery/file" - "github.com/prometheus/prometheus/discovery/gce" - "github.com/prometheus/prometheus/discovery/hetzner" - "github.com/prometheus/prometheus/discovery/http" - "github.com/prometheus/prometheus/discovery/kubernetes" - "github.com/prometheus/prometheus/discovery/linode" - "github.com/prometheus/prometheus/discovery/marathon" - "github.com/prometheus/prometheus/discovery/moby" - "github.com/prometheus/prometheus/discovery/openstack" - "github.com/prometheus/prometheus/discovery/scaleway" - "github.com/prometheus/prometheus/discovery/triton" - "github.com/prometheus/prometheus/discovery/zookeeper" -) - -func validateNofiles(c *instance.Config) error { - for i, rw := range c.RemoteWrite { - if err := validateHTTPNoFiles(&rw.HTTPClientConfig); err != nil { - return fmt.Errorf("failed to validate remote_write at index %d: %w", i, err) - } - } - - for i, sc := range c.ScrapeConfigs { - if err := validateHTTPNoFiles(&sc.HTTPClientConfig); err != nil { - return fmt.Errorf("failed to validate scrape_config at index %d: %w", i, err) - } - - for j, disc := range sc.ServiceDiscoveryConfigs { - if err := validateDiscoveryNoFiles(disc); err != nil { - return fmt.Errorf("failed to validate service discovery at index %d within scrape_config at index %d: %w", j, i, err) - } - } - } - - return nil -} - -func validateHTTPNoFiles(cfg *config.HTTPClientConfig) error { - checks := []struct { - name string - check func() bool - }{ - {"bearer_token_file", func() bool { return cfg.BearerTokenFile != "" }}, - {"password_file", func() bool { return cfg.BasicAuth != nil && cfg.BasicAuth.PasswordFile != "" }}, - {"credentials_file", func() bool { return cfg.Authorization != nil && cfg.Authorization.CredentialsFile != "" }}, - {"ca_file", func() bool { return cfg.TLSConfig.CAFile != "" }}, - {"cert_file", func() bool { return cfg.TLSConfig.CertFile != "" }}, - {"key_file", func() bool { return cfg.TLSConfig.KeyFile != "" }}, - } - for _, check := range checks { - if check.check() { - return fmt.Errorf("%s must be empty unless dangerous_allow_reading_files is set", check.name) - } - } - return nil -} - -func validateDiscoveryNoFiles(disc discovery.Config) error { - switch d := disc.(type) { - case discovery.StaticConfig: - // no-op - case *azure.SDConfig: - // no-op - case *consul.SDConfig: - if err := validateHTTPNoFiles(&config.HTTPClientConfig{TLSConfig: d.HTTPClientConfig.TLSConfig}); err != nil { - return err - } - case *consulagent.SDConfig: - if err := validateHTTPNoFiles(&config.HTTPClientConfig{TLSConfig: d.TLSConfig}); err != nil { - return err - } - case *digitalocean.SDConfig: - if err := validateHTTPNoFiles(&d.HTTPClientConfig); err != nil { - return err - } - case *dns.SDConfig: - // no-op - case *moby.DockerSwarmSDConfig: - if err := validateHTTPNoFiles(&d.HTTPClientConfig); err != nil { - return err - } - case *aws.EC2SDConfig: - // no-op - case *eureka.SDConfig: - if err := validateHTTPNoFiles(&d.HTTPClientConfig); err != nil { - return err - } - case *file.SDConfig: - // no-op - case *gce.SDConfig: - // no-op - case *hetzner.SDConfig: - if err := validateHTTPNoFiles(&d.HTTPClientConfig); err != nil { - return err - } - case *kubernetes.SDConfig: - if err := validateHTTPNoFiles(&d.HTTPClientConfig); err != nil { - return err - } - case *marathon.SDConfig: - if err := validateHTTPNoFiles(&d.HTTPClientConfig); err != nil { - return err - } - if d.AuthTokenFile != "" { - return fmt.Errorf("auth_token_file must be empty unless dangerous_allow_reading_files is set") - } - case *openstack.SDConfig: - if err := validateHTTPNoFiles(&config.HTTPClientConfig{TLSConfig: d.TLSConfig}); err != nil { - return err - } - case *scaleway.SDConfig: - if err := validateHTTPNoFiles(&d.HTTPClientConfig); err != nil { - return err - } - case *triton.SDConfig: - if err := validateHTTPNoFiles(&config.HTTPClientConfig{TLSConfig: d.TLSConfig}); err != nil { - return err - } - case *http.SDConfig: - if err := validateHTTPNoFiles(&d.HTTPClientConfig); err != nil { - return err - } - case *linode.SDConfig: - if err := validateHTTPNoFiles(&d.HTTPClientConfig); err != nil { - return err - } - case *zookeeper.NerveSDConfig: - // no-op - case *zookeeper.ServersetSDConfig: - // no-op - default: - return fmt.Errorf("unknown service discovery %s; rejecting config for safety. set dangerous_allow_reading_files to ignore", d.Name()) - } - - return nil -} diff --git a/internal/static/metrics/cluster/validation_test.go b/internal/static/metrics/cluster/validation_test.go deleted file mode 100644 index 180a0bfabb..0000000000 --- a/internal/static/metrics/cluster/validation_test.go +++ /dev/null @@ -1,118 +0,0 @@ -package cluster - -import ( - "fmt" - "strings" - "testing" - - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/util" - "github.com/stretchr/testify/require" -) - -func Test_validateNoFiles(t *testing.T) { - tt := []struct { - name string - input string - expect error - }{ - { - name: "valid config", - input: util.Untab(` - scrape_configs: - - job_name: innocent_scrape - static_configs: - - targets: ['127.0.0.1:12345'] - remote_write: - - url: http://localhost:9009/api/prom/push - `), - expect: nil, - }, - { - name: "all SDs", - input: util.Untab(` - scrape_configs: - - job_name: basic_sds - static_configs: - - targets: ['localhost'] - azure_sd_configs: - - subscription_id: fake - tenant_id: fake - client_id: fake - client_secret: fake - consul_sd_configs: - - {} - dns_sd_configs: - - names: ['fake'] - ec2_sd_configs: - - region: fake - eureka_sd_configs: - - server: http://localhost:80/eureka - file_sd_configs: - - files: ['fake.json'] - digitalocean_sd_configs: - - {} - dockerswarm_sd_configs: - - host: localhost - role: nodes - gce_sd_configs: - - project: fake - zone: fake - hetzner_sd_configs: - - role: hcloud - kubernetes_sd_configs: - - role: pod - marathon_sd_configs: - - servers: ['localhost'] - nerve_sd_configs: - - servers: ['localhost'] - paths: ['/'] - openstack_sd_configs: - - role: instance - region: fake - scaleway_sd_configs: - - role: instance - project_id: ffffffff-ffff-ffff-ffff-ffffffffffff - secret_key: ffffffff-ffff-ffff-ffff-ffffffffffff - access_key: SCWXXXXXXXXXXXXXXXXX - serverset_sd_configs: - - servers: ['localhost'] - paths: ['/'] - triton_sd_configs: - - account: fake - dns_suffix: fake - endpoint: fake - `), - expect: nil, - }, - { - name: "invalid http client config", - input: util.Untab(` - scrape_configs: - - job_name: malicious_scrape - static_configs: - - targets: ['badsite.com'] - basic_auth: - username: file_leak - password_file: /etc/password - remote_write: - - url: http://localhost:9009/api/prom/push - `), - expect: fmt.Errorf("failed to validate scrape_config at index 0: password_file must be empty unless dangerous_allow_reading_files is set"), - }, - } - - for _, tc := range tt { - t.Run(tc.name, func(t *testing.T) { - cfg, err := instance.UnmarshalConfig(strings.NewReader(tc.input)) - require.NoError(t, err) - - actual := validateNofiles(cfg) - if tc.expect == nil { - require.NoError(t, actual) - } else { - require.EqualError(t, actual, tc.expect.Error()) - } - }) - } -} diff --git a/internal/static/metrics/http.go b/internal/static/metrics/http.go deleted file mode 100644 index 51e5aa64e6..0000000000 --- a/internal/static/metrics/http.go +++ /dev/null @@ -1,166 +0,0 @@ -package metrics - -import ( - "fmt" - "net/http" - "net/url" - "sort" - "time" - - "github.com/go-kit/log/level" - "github.com/gorilla/mux" - "github.com/grafana/agent/internal/static/metrics/cluster/configapi" - "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/scrape" -) - -// WireAPI adds API routes to the provided mux router. -func (a *Agent) WireAPI(r *mux.Router) { - a.cluster.WireAPI(r) - - // Backwards compatible endpoints. Use endpoints with `metrics` prefix instead - r.HandleFunc("/agent/api/v1/instances", a.ListInstancesHandler).Methods("GET") - r.HandleFunc("/agent/api/v1/targets", a.ListTargetsHandler).Methods("GET") - - r.HandleFunc("/agent/api/v1/metrics/instances", a.ListInstancesHandler).Methods("GET") - r.HandleFunc("/agent/api/v1/metrics/targets", a.ListTargetsHandler).Methods("GET") - r.HandleFunc("/agent/api/v1/metrics/instance/{instance}/write", a.PushMetricsHandler).Methods("POST") -} - -// ListInstancesHandler writes the set of currently running instances to the http.ResponseWriter. -func (a *Agent) ListInstancesHandler(w http.ResponseWriter, _ *http.Request) { - cfgs := a.mm.ListConfigs() - instanceNames := make([]string, 0, len(cfgs)) - for k := range cfgs { - instanceNames = append(instanceNames, k) - } - sort.Strings(instanceNames) - - err := configapi.WriteResponse(w, http.StatusOK, instanceNames) - if err != nil { - level.Error(a.logger).Log("msg", "failed to write response", "err", err) - } -} - -// ListTargetsHandler retrieves the full set of targets across all instances and shows -// information on them. -func (a *Agent) ListTargetsHandler(w http.ResponseWriter, r *http.Request) { - instances := a.mm.ListInstances() - allTagets := make(map[string]TargetSet, len(instances)) - for instName, inst := range instances { - allTagets[instName] = inst.TargetsActive() - } - ListTargetsHandler(allTagets).ServeHTTP(w, r) -} - -// ListTargetsHandler renders a mapping of instance to target set. -func ListTargetsHandler(targets map[string]TargetSet) http.Handler { - return http.HandlerFunc(func(rw http.ResponseWriter, _ *http.Request) { - resp := ListTargetsResponse{} - - for instance, tset := range targets { - for key, targets := range tset { - for _, tgt := range targets { - var lastError string - if scrapeError := tgt.LastError(); scrapeError != nil { - lastError = scrapeError.Error() - } - - resp = append(resp, TargetInfo{ - InstanceName: instance, - TargetGroup: key, - - Endpoint: tgt.URL().String(), - State: string(tgt.Health()), - DiscoveredLabels: tgt.DiscoveredLabels(), - Labels: tgt.Labels(), - LastScrape: tgt.LastScrape(), - ScrapeDuration: tgt.LastScrapeDuration().Milliseconds(), - ScrapeError: lastError, - }) - } - } - } - - sort.Slice(resp, func(i, j int) bool { - // sort by instance, then target group, then job label, then instance label - var ( - iInstance = resp[i].InstanceName - iTargetGroup = resp[i].TargetGroup - iJobLabel = resp[i].Labels.Get(model.JobLabel) - iInstanceLabel = resp[i].Labels.Get(model.InstanceLabel) - - jInstance = resp[j].InstanceName - jTargetGroup = resp[j].TargetGroup - jJobLabel = resp[j].Labels.Get(model.JobLabel) - jInstanceLabel = resp[j].Labels.Get(model.InstanceLabel) - ) - - switch { - case iInstance != jInstance: - return iInstance < jInstance - case iTargetGroup != jTargetGroup: - return iTargetGroup < jTargetGroup - case iJobLabel != jJobLabel: - return iJobLabel < jJobLabel - default: - return iInstanceLabel < jInstanceLabel - } - }) - - _ = configapi.WriteResponse(rw, http.StatusOK, resp) - }) -} - -// TargetSet is a set of targets for an individual scraper. -type TargetSet map[string][]*scrape.Target - -// ListTargetsResponse is returned by the ListTargetsHandler. -type ListTargetsResponse []TargetInfo - -// TargetInfo describes a specific target. -type TargetInfo struct { - InstanceName string `json:"instance"` - TargetGroup string `json:"target_group"` - - Endpoint string `json:"endpoint"` - State string `json:"state"` - Labels labels.Labels `json:"labels"` - DiscoveredLabels labels.Labels `json:"discovered_labels"` - LastScrape time.Time `json:"last_scrape"` - ScrapeDuration int64 `json:"scrape_duration_ms"` - ScrapeError string `json:"scrape_error"` -} - -// PushMetricsHandler provides a way to POST data directly into -// an instance's WAL. -func (a *Agent) PushMetricsHandler(w http.ResponseWriter, r *http.Request) { - // Get instance name. - instanceName, err := getInstanceName(r) - if err != nil { - http.Error(w, err.Error(), http.StatusBadRequest) - return - } - - // Get the metrics instance and serve the request. - managedInstance, err := a.InstanceManager().GetInstance(instanceName) - if err != nil || managedInstance == nil { - http.Error(w, err.Error(), http.StatusBadRequest) - return - } - - managedInstance.WriteHandler().ServeHTTP(w, r) -} - -// getInstanceName uses gorilla/mux's route variables to extract the -// "instance" variable. If not found, getInstanceName will return an error. -func getInstanceName(r *http.Request) (string, error) { - vars := mux.Vars(r) - name := vars["instance"] - name, err := url.PathUnescape(name) - if err != nil { - return "", fmt.Errorf("could not decode instance name: %w", err) - } - return name, nil -} diff --git a/internal/static/metrics/http_test.go b/internal/static/metrics/http_test.go deleted file mode 100644 index 7f557a5a40..0000000000 --- a/internal/static/metrics/http_test.go +++ /dev/null @@ -1,142 +0,0 @@ -package metrics - -import ( - "fmt" - "net/http" - "net/http/httptest" - "testing" - "time" - - "github.com/go-kit/log" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/util" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/scrape" - "github.com/stretchr/testify/require" -) - -func TestAgent_ListInstancesHandler(t *testing.T) { - fact := newFakeInstanceFactory() - a, err := newAgent(prometheus.NewRegistry(), Config{ - WALDir: "/tmp/agent", - }, log.NewNopLogger(), fact.factory) - require.NoError(t, err) - defer a.Stop() - - r := httptest.NewRequest("GET", "/agent/api/v1/metrics/instances", nil) - - t.Run("no instances", func(t *testing.T) { - rr := httptest.NewRecorder() - a.ListInstancesHandler(rr, r) - expect := `{"status":"success","data":[]}` - require.Equal(t, expect, rr.Body.String()) - }) - - t.Run("non-empty", func(t *testing.T) { - require.NoError(t, a.mm.ApplyConfig(makeInstanceConfig("foo"))) - require.NoError(t, a.mm.ApplyConfig(makeInstanceConfig("bar"))) - - expect := `{"status":"success","data":["bar","foo"]}` - util.Eventually(t, func(t require.TestingT) { - rr := httptest.NewRecorder() - a.ListInstancesHandler(rr, r) - require.Equal(t, expect, rr.Body.String()) - }) - }) -} - -func TestAgent_ListTargetsHandler(t *testing.T) { - fact := newFakeInstanceFactory() - a, err := newAgent(prometheus.NewRegistry(), Config{ - WALDir: "/tmp/agent", - }, log.NewNopLogger(), fact.factory) - require.NoError(t, err) - - mockManager := &instance.MockManager{ - ListInstancesFunc: func() map[string]instance.ManagedInstance { return nil }, - ListConfigsFunc: func() map[string]instance.Config { return nil }, - ApplyConfigFunc: func(_ instance.Config) error { return nil }, - DeleteConfigFunc: func(name string) error { return nil }, - StopFunc: func() {}, - } - a.mm, err = instance.NewModalManager(prometheus.NewRegistry(), a.logger, mockManager, instance.ModeDistinct) - require.NoError(t, err) - - r := httptest.NewRequest("GET", "/agent/api/v1/metrics/targets", nil) - - t.Run("scrape manager not ready", func(t *testing.T) { - mockManager.ListInstancesFunc = func() map[string]instance.ManagedInstance { - return map[string]instance.ManagedInstance{ - "test_instance": &mockInstanceScrape{}, - } - } - - rr := httptest.NewRecorder() - a.ListTargetsHandler(rr, r) - expect := `{"status": "success", "data": []}` - require.JSONEq(t, expect, rr.Body.String()) - require.Equal(t, http.StatusOK, rr.Result().StatusCode) - }) - - t.Run("scrape manager targets", func(t *testing.T) { - tgt := scrape.NewTarget(labels.FromMap(map[string]string{ - model.JobLabel: "job", - model.InstanceLabel: "instance", - "foo": "bar", - model.SchemeLabel: "http", - model.AddressLabel: "localhost:12345", - model.MetricsPathLabel: "/metrics", - }), labels.FromMap(map[string]string{ - "__discovered__": "yes", - }), nil) - - startTime := time.Date(1994, time.January, 12, 0, 0, 0, 0, time.UTC) - tgt.Report(startTime, time.Minute, fmt.Errorf("something went wrong")) - - mockManager.ListInstancesFunc = func() map[string]instance.ManagedInstance { - return map[string]instance.ManagedInstance{ - "test_instance": &mockInstanceScrape{ - tgts: map[string][]*scrape.Target{ - "group_a": {tgt}, - }, - }, - } - } - - rr := httptest.NewRecorder() - a.ListTargetsHandler(rr, r) - expect := `{ - "status": "success", - "data": [{ - "instance": "test_instance", - "target_group": "group_a", - "endpoint": "http://localhost:12345/metrics", - "state": "down", - "labels": { - "foo": "bar", - "instance": "instance", - "job": "job" - }, - "discovered_labels": { - "__discovered__": "yes" - }, - "last_scrape": "1994-01-12T00:00:00Z", - "scrape_duration_ms": 60000, - "scrape_error":"something went wrong" - }] - }` - require.JSONEq(t, expect, rr.Body.String()) - require.Equal(t, http.StatusOK, rr.Result().StatusCode) - }) -} - -type mockInstanceScrape struct { - instance.NoOpInstance - tgts map[string][]*scrape.Target -} - -func (i *mockInstanceScrape) TargetsActive() map[string][]*scrape.Target { - return i.tgts -} diff --git a/internal/static/metrics/instance/configstore/api.go b/internal/static/metrics/instance/configstore/api.go deleted file mode 100644 index 552b6ba404..0000000000 --- a/internal/static/metrics/instance/configstore/api.go +++ /dev/null @@ -1,268 +0,0 @@ -package configstore - -import ( - "errors" - "fmt" - "io" - "net/http" - "net/url" - "strings" - "sync" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/gorilla/mux" - "github.com/grafana/agent/internal/static/metrics/cluster/configapi" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/prometheus/client_golang/prometheus" -) - -// API is an HTTP API to interact with a configstore. -type API struct { - log log.Logger - storeMut sync.Mutex - store Store - validator Validator - - totalCreatedConfigs prometheus.Counter - totalUpdatedConfigs prometheus.Counter - totalDeletedConfigs prometheus.Counter - - enableGet bool -} - -// Validator valides a config before putting it into the store. -// Validator is allowed to mutate the config and will only be given a copy. -type Validator = func(c *instance.Config) error - -// NewAPI creates a new API. Store can be applied later with SetStore. -func NewAPI(l log.Logger, store Store, v Validator, enableGet bool) *API { - return &API{ - log: l, - store: store, - validator: v, - - totalCreatedConfigs: prometheus.NewCounter(prometheus.CounterOpts{ - Name: "agent_metrics_ha_configs_created_total", - Help: "Total number of created scraping service configs", - }), - totalUpdatedConfigs: prometheus.NewCounter(prometheus.CounterOpts{ - Name: "agent_metrics_ha_configs_updated_total", - Help: "Total number of updated scraping service configs", - }), - totalDeletedConfigs: prometheus.NewCounter(prometheus.CounterOpts{ - Name: "agent_metrics_ha_configs_deleted_total", - Help: "Total number of deleted scraping service configs", - }), - enableGet: enableGet, - } -} - -// WireAPI injects routes into the provided mux router for the config -// store API. -func (api *API) WireAPI(r *mux.Router) { - // Support URL-encoded config names. The handlers will need to decode the - // name when reading the path variable. - r = r.UseEncodedPath() - - r.HandleFunc("/agent/api/v1/configs", api.ListConfigurations).Methods("GET") - getConfigHandler := messageHandlerFunc(http.StatusNotFound, "404 - config endpoint is disabled") - if api.enableGet { - getConfigHandler = api.GetConfiguration - } - r.HandleFunc("/agent/api/v1/configs/{name}", getConfigHandler).Methods("GET") - r.HandleFunc("/agent/api/v1/config/{name}", api.PutConfiguration).Methods("PUT", "POST") - r.HandleFunc("/agent/api/v1/config/{name}", api.DeleteConfiguration).Methods("DELETE") -} - -// Describe implements prometheus.Collector. -func (api *API) Describe(ch chan<- *prometheus.Desc) { - ch <- api.totalCreatedConfigs.Desc() - ch <- api.totalUpdatedConfigs.Desc() - ch <- api.totalDeletedConfigs.Desc() -} - -// Collect implements prometheus.Collector. -func (api *API) Collect(mm chan<- prometheus.Metric) { - mm <- api.totalCreatedConfigs - mm <- api.totalUpdatedConfigs - mm <- api.totalDeletedConfigs -} - -// ListConfigurations returns a list of configurations. -func (api *API) ListConfigurations(rw http.ResponseWriter, r *http.Request) { - api.storeMut.Lock() - defer api.storeMut.Unlock() - if api.store == nil { - api.writeError(rw, http.StatusNotFound, fmt.Errorf("no config store running")) - return - } - - keys, err := api.store.List(r.Context()) - if errors.Is(err, ErrNotConnected) { - api.writeError(rw, http.StatusNotFound, fmt.Errorf("no config store running")) - return - } else if err != nil { - api.writeError(rw, http.StatusInternalServerError, fmt.Errorf("failed to write config: %w", err)) - return - } - api.writeResponse(rw, http.StatusOK, configapi.ListConfigurationsResponse{Configs: keys}) -} - -// GetConfiguration gets an individual configuration. -func (api *API) GetConfiguration(rw http.ResponseWriter, r *http.Request) { - api.storeMut.Lock() - defer api.storeMut.Unlock() - if api.store == nil { - api.writeError(rw, http.StatusNotFound, fmt.Errorf("no config store running")) - return - } - - configKey, err := getConfigName(r) - if err != nil { - api.writeError(rw, http.StatusBadRequest, err) - return - } - - cfg, err := api.store.Get(r.Context(), configKey) - switch { - case errors.Is(err, ErrNotConnected): - api.writeError(rw, http.StatusNotFound, err) - case errors.As(err, &NotExistError{}): - api.writeError(rw, http.StatusNotFound, err) - case err != nil: - api.writeError(rw, http.StatusInternalServerError, err) - case err == nil: - bb, err := instance.MarshalConfig(&cfg, true) - if err != nil { - api.writeError(rw, http.StatusInternalServerError, fmt.Errorf("could not marshal config for response: %w", err)) - return - } - api.writeResponse(rw, http.StatusOK, &configapi.GetConfigurationResponse{ - Value: string(bb), - }) - } -} - -// PutConfiguration creates or updates a configuration. -func (api *API) PutConfiguration(rw http.ResponseWriter, r *http.Request) { - api.storeMut.Lock() - defer api.storeMut.Unlock() - if api.store == nil { - api.writeError(rw, http.StatusNotFound, fmt.Errorf("no config store running")) - return - } - - configName, err := getConfigName(r) - if err != nil { - api.writeError(rw, http.StatusBadRequest, err) - return - } - - var config strings.Builder - if _, err := io.Copy(&config, r.Body); err != nil { - api.writeError(rw, http.StatusInternalServerError, err) - return - } - - cfg, err := instance.UnmarshalConfig(strings.NewReader(config.String())) - if err != nil { - api.writeError(rw, http.StatusBadRequest, fmt.Errorf("could not unmarshal config: %w", err)) - return - } - cfg.Name = configName - - if api.validator != nil { - validateCfg, err := instance.UnmarshalConfig(strings.NewReader(config.String())) - if err != nil { - api.writeError(rw, http.StatusBadRequest, fmt.Errorf("could not unmarshal config: %w", err)) - return - } - validateCfg.Name = configName - - if err := api.validator(validateCfg); err != nil { - api.writeError(rw, http.StatusBadRequest, fmt.Errorf("failed to validate config: %w", err)) - return - } - } - - created, err := api.store.Put(r.Context(), *cfg) - switch { - case errors.Is(err, ErrNotConnected): - api.writeError(rw, http.StatusNotFound, err) - case errors.As(err, &NotUniqueError{}): - api.writeError(rw, http.StatusBadRequest, err) - case err != nil: - api.writeError(rw, http.StatusInternalServerError, err) - default: - if created { - api.totalCreatedConfigs.Inc() - api.writeResponse(rw, http.StatusCreated, nil) - } else { - api.totalUpdatedConfigs.Inc() - api.writeResponse(rw, http.StatusOK, nil) - } - } -} - -// DeleteConfiguration deletes a configuration. -func (api *API) DeleteConfiguration(rw http.ResponseWriter, r *http.Request) { - api.storeMut.Lock() - defer api.storeMut.Unlock() - if api.store == nil { - api.writeError(rw, http.StatusNotFound, fmt.Errorf("no config store running")) - return - } - - configKey, err := getConfigName(r) - if err != nil { - api.writeError(rw, http.StatusBadRequest, err) - return - } - - err = api.store.Delete(r.Context(), configKey) - switch { - case errors.Is(err, ErrNotConnected): - api.writeError(rw, http.StatusNotFound, err) - case errors.As(err, &NotExistError{}): - api.writeError(rw, http.StatusNotFound, err) - case err != nil: - api.writeError(rw, http.StatusInternalServerError, err) - default: - api.totalDeletedConfigs.Inc() - api.writeResponse(rw, http.StatusOK, nil) - } -} - -func (api *API) writeError(rw http.ResponseWriter, statusCode int, writeErr error) { - err := configapi.WriteError(rw, statusCode, writeErr) - if err != nil { - level.Error(api.log).Log("msg", "failed to write response", "err", err) - } -} - -func (api *API) writeResponse(rw http.ResponseWriter, statusCode int, v interface{}) { - err := configapi.WriteResponse(rw, statusCode, v) - if err != nil { - level.Error(api.log).Log("msg", "failed to write response", "err", err) - } -} - -// getConfigName uses gorilla/mux's route variables to extract the -// "name" variable. If not found, getConfigName will return an error. -func getConfigName(r *http.Request) (string, error) { - vars := mux.Vars(r) - name := vars["name"] - name, err := url.PathUnescape(name) - if err != nil { - return "", fmt.Errorf("could not decode config name: %w", err) - } - return name, nil -} - -func messageHandlerFunc(statusCode int, msg string) http.HandlerFunc { - return func(rw http.ResponseWriter, r *http.Request) { - rw.WriteHeader(statusCode) - _, _ = rw.Write([]byte(msg)) - } -} diff --git a/internal/static/metrics/instance/configstore/api_test.go b/internal/static/metrics/instance/configstore/api_test.go deleted file mode 100644 index 71bd558810..0000000000 --- a/internal/static/metrics/instance/configstore/api_test.go +++ /dev/null @@ -1,408 +0,0 @@ -package configstore - -import ( - "bytes" - "context" - "encoding/json" - "fmt" - "io" - "net/http" - "net/http/httptest" - "strings" - "testing" - "time" - - "github.com/go-kit/log" - "github.com/gorilla/mux" - "github.com/grafana/agent/internal/static/client" - "github.com/grafana/agent/internal/static/metrics/cluster/configapi" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestAPI_ListConfigurations(t *testing.T) { - s := &Mock{ - ListFunc: func(ctx context.Context) ([]string, error) { - return []string{"a", "b", "c"}, nil - }, - } - - api := NewAPI(log.NewNopLogger(), s, nil, true) - env := newAPITestEnvironment(t, api) - - resp, err := http.Get(env.srv.URL + "/agent/api/v1/configs") - require.NoError(t, err) - require.Equal(t, http.StatusOK, resp.StatusCode) - - expect := `{ - "status": "success", - "data": { - "configs": ["a", "b", "c"] - } - }` - body, err := io.ReadAll(resp.Body) - require.NoError(t, err) - require.JSONEq(t, expect, string(body)) - - t.Run("With Client", func(t *testing.T) { - cli := client.New(env.srv.URL) - apiResp, err := cli.ListConfigs(context.Background()) - require.NoError(t, err) - - expect := &configapi.ListConfigurationsResponse{Configs: []string{"a", "b", "c"}} - require.Equal(t, expect, apiResp) - }) -} - -func TestAPI_GetConfiguration_Invalid(t *testing.T) { - s := &Mock{ - GetFunc: func(ctx context.Context, key string) (instance.Config, error) { - return instance.Config{}, NotExistError{Key: key} - }, - } - - api := NewAPI(log.NewNopLogger(), s, nil, true) - env := newAPITestEnvironment(t, api) - - resp, err := http.Get(env.srv.URL + "/agent/api/v1/configs/does-not-exist") - require.NoError(t, err) - require.Equal(t, http.StatusNotFound, resp.StatusCode) - - expect := `{ - "status": "error", - "data": { - "error": "configuration does-not-exist does not exist" - } - }` - body, err := io.ReadAll(resp.Body) - require.NoError(t, err) - require.JSONEq(t, expect, string(body)) - - t.Run("With Client", func(t *testing.T) { - cli := client.New(env.srv.URL) - _, err := cli.GetConfiguration(context.Background(), "does-not-exist") - require.NotNil(t, err) - require.Equal(t, "configuration does-not-exist does not exist", err.Error()) - }) -} - -func TestAPI_GetConfiguration(t *testing.T) { - s := &Mock{ - GetFunc: func(ctx context.Context, key string) (instance.Config, error) { - return instance.Config{ - Name: key, - HostFilter: true, - RemoteFlushDeadline: 10 * time.Minute, - }, nil - }, - } - - api := NewAPI(log.NewNopLogger(), s, nil, true) - env := newAPITestEnvironment(t, api) - - resp, err := http.Get(env.srv.URL + "/agent/api/v1/configs/exists") - require.NoError(t, err) - require.Equal(t, http.StatusOK, resp.StatusCode) - - expect := `{ - "status": "success", - "data": { - "value": "name: exists\nhost_filter: true\nremote_flush_deadline: 10m0s\n" - } - }` - body, err := io.ReadAll(resp.Body) - require.NoError(t, err) - require.JSONEq(t, expect, string(body)) - - t.Run("With Client", func(t *testing.T) { - cli := client.New(env.srv.URL) - actual, err := cli.GetConfiguration(context.Background(), "exists") - require.NoError(t, err) - - // The client will apply defaults, so we need to start with the DefaultConfig - // as a base here. - expect := instance.DefaultConfig - expect.Name = "exists" - expect.HostFilter = true - expect.RemoteFlushDeadline = 10 * time.Minute - require.Equal(t, &expect, actual) - }) -} - -func TestAPI_GetConfiguration_ScrubSecrets(t *testing.T) { - rawConfig := `name: exists -scrape_configs: -- job_name: local_scrape - follow_redirects: true - enable_http2: true - honor_timestamps: true - metrics_path: /metrics - scheme: http - track_timestamps_staleness: true - static_configs: - - targets: - - 127.0.0.1:12345 - labels: - cluster: localhost - basic_auth: - username: admin - password: SCRUBME -remote_write: -- url: http://localhost:9009/api/prom/push - remote_timeout: 30s - name: test-d0f32c - send_exemplars: true - basic_auth: - username: admin - password: SCRUBME - queue_config: - capacity: 500 - max_shards: 1000 - min_shards: 1 - max_samples_per_send: 100 - batch_send_deadline: 5s - min_backoff: 30ms - max_backoff: 100ms - retry_on_http_429: true - follow_redirects: true - enable_http2: true - metadata_config: - send: true - send_interval: 1m - max_samples_per_send: 500 -wal_truncate_frequency: 1m0s -min_wal_time: 5m0s -max_wal_time: 4h0m0s -remote_flush_deadline: 1m0s -` - scrubbedConfig := strings.ReplaceAll(rawConfig, "SCRUBME", "") - - s := &Mock{ - GetFunc: func(ctx context.Context, key string) (instance.Config, error) { - c, err := instance.UnmarshalConfig(strings.NewReader(rawConfig)) - if err != nil { - return instance.Config{}, err - } - return *c, nil - }, - } - - api := NewAPI(log.NewNopLogger(), s, nil, true) - env := newAPITestEnvironment(t, api) - - resp, err := http.Get(env.srv.URL + "/agent/api/v1/configs/exists") - require.NoError(t, err) - require.Equal(t, http.StatusOK, resp.StatusCode) - respBytes, err := io.ReadAll(resp.Body) - require.NoError(t, err) - - var apiResp struct { - Status string `json:"status"` - Data struct { - Value string `json:"value"` - } `json:"data"` - } - err = json.Unmarshal(respBytes, &apiResp) - require.NoError(t, err) - require.Equal(t, "success", apiResp.Status) - require.YAMLEq(t, scrubbedConfig, apiResp.Data.Value) - - t.Run("With Client", func(t *testing.T) { - cli := client.New(env.srv.URL) - actual, err := cli.GetConfiguration(context.Background(), "exists") - require.NoError(t, err) - - // Marshal the retrieved config _without_ scrubbing. This means - // that if the secrets weren't scrubbed from GetConfiguration, something - // bad happened at the API level. - actualBytes, err := instance.MarshalConfig(actual, false) - require.NoError(t, err) - require.YAMLEq(t, scrubbedConfig, string(actualBytes)) - }) -} - -func TestServer_GetConfiguration_Disabled(t *testing.T) { - api := NewAPI(log.NewNopLogger(), nil, nil, false) - env := newAPITestEnvironment(t, api) - resp, err := http.Get(env.srv.URL + "/agent/api/v1/configs/exists") - require.NoError(t, err) - require.Equal(t, http.StatusNotFound, resp.StatusCode) - body, err := io.ReadAll(resp.Body) - require.NoError(t, err) - require.Equal(t, []byte("404 - config endpoint is disabled"), body) -} - -func TestServer_PutConfiguration(t *testing.T) { - var s Mock - - api := NewAPI(log.NewNopLogger(), &s, nil, true) - env := newAPITestEnvironment(t, api) - - cfg := instance.Config{Name: "newconfig"} - bb, err := instance.MarshalConfig(&cfg, false) - require.NoError(t, err) - - t.Run("Created", func(t *testing.T) { - // Created configs should return http.StatusCreated - s.PutFunc = func(ctx context.Context, c instance.Config) (created bool, err error) { - return true, nil - } - - resp, err := http.Post(env.srv.URL+"/agent/api/v1/config/newconfig", "", bytes.NewReader(bb)) - require.NoError(t, err) - require.Equal(t, http.StatusCreated, resp.StatusCode) - }) - - t.Run("Updated", func(t *testing.T) { - // Updated configs should return http.StatusOK - s.PutFunc = func(ctx context.Context, c instance.Config) (created bool, err error) { - return false, nil - } - - resp, err := http.Post(env.srv.URL+"/agent/api/v1/config/newconfig", "", bytes.NewReader(bb)) - require.NoError(t, err) - require.Equal(t, http.StatusOK, resp.StatusCode) - }) -} - -func TestServer_PutConfiguration_Invalid(t *testing.T) { - var s Mock - - api := NewAPI(log.NewNopLogger(), &s, func(c *instance.Config) error { - return fmt.Errorf("custom validation error") - }, true) - env := newAPITestEnvironment(t, api) - - cfg := instance.Config{Name: "newconfig"} - bb, err := instance.MarshalConfig(&cfg, false) - require.NoError(t, err) - - resp, err := http.Post(env.srv.URL+"/agent/api/v1/config/newconfig", "", bytes.NewReader(bb)) - require.NoError(t, err) - require.Equal(t, http.StatusBadRequest, resp.StatusCode) - - expect := `{ - "status": "error", - "data": { - "error": "failed to validate config: custom validation error" - } - }` - body, err := io.ReadAll(resp.Body) - require.NoError(t, err) - require.JSONEq(t, expect, string(body)) -} - -func TestServer_PutConfiguration_WithClient(t *testing.T) { - var s Mock - api := NewAPI(log.NewNopLogger(), &s, nil, true) - env := newAPITestEnvironment(t, api) - - cfg := instance.DefaultConfig - cfg.Name = "newconfig-withclient" - cfg.HostFilter = true - cfg.RemoteFlushDeadline = 10 * time.Minute - - s.PutFunc = func(ctx context.Context, c instance.Config) (created bool, err error) { - assert.Equal(t, cfg, c) - return true, nil - } - - cli := client.New(env.srv.URL) - err := cli.PutConfiguration(context.Background(), "newconfig-withclient", &cfg) - require.NoError(t, err) -} - -func TestServer_DeleteConfiguration(t *testing.T) { - s := &Mock{ - DeleteFunc: func(ctx context.Context, key string) error { - assert.Equal(t, "deleteme", key) - return nil - }, - } - - api := NewAPI(log.NewNopLogger(), s, nil, true) - env := newAPITestEnvironment(t, api) - - req, err := http.NewRequest(http.MethodDelete, env.srv.URL+"/agent/api/v1/config/deleteme", nil) - require.NoError(t, err) - resp, err := http.DefaultClient.Do(req) - require.NoError(t, err) - require.Equal(t, http.StatusOK, resp.StatusCode) - - t.Run("With Client", func(t *testing.T) { - cli := client.New(env.srv.URL) - err := cli.DeleteConfiguration(context.Background(), "deleteme") - require.NoError(t, err) - }) -} - -func TestServer_DeleteConfiguration_Invalid(t *testing.T) { - s := &Mock{ - DeleteFunc: func(ctx context.Context, key string) error { - assert.Equal(t, "deleteme", key) - return NotExistError{Key: key} - }, - } - - api := NewAPI(log.NewNopLogger(), s, nil, true) - env := newAPITestEnvironment(t, api) - - req, err := http.NewRequest(http.MethodDelete, env.srv.URL+"/agent/api/v1/config/deleteme", nil) - require.NoError(t, err) - resp, err := http.DefaultClient.Do(req) - require.NoError(t, err) - require.Equal(t, http.StatusNotFound, resp.StatusCode) - - t.Run("With Client", func(t *testing.T) { - cli := client.New(env.srv.URL) - err := cli.DeleteConfiguration(context.Background(), "deleteme") - require.Error(t, err) - }) -} - -func TestServer_URLEncoded(t *testing.T) { - var s Mock - - api := NewAPI(log.NewNopLogger(), &s, nil, true) - env := newAPITestEnvironment(t, api) - - var cfg instance.Config - bb, err := instance.MarshalConfig(&cfg, false) - require.NoError(t, err) - - s.PutFunc = func(ctx context.Context, c instance.Config) (created bool, err error) { - assert.Equal(t, "url/encoded", c.Name) - return true, nil - } - - resp, err := http.Post(env.srv.URL+"/agent/api/v1/config/url%2Fencoded", "", bytes.NewReader(bb)) - require.NoError(t, err) - require.Equal(t, http.StatusCreated, resp.StatusCode) - - s.GetFunc = func(ctx context.Context, key string) (instance.Config, error) { - assert.Equal(t, "url/encoded", key) - return instance.Config{Name: "url/encoded"}, nil - } - - resp, err = http.Get(env.srv.URL + "/agent/api/v1/configs/url%2Fencoded") - require.NoError(t, err) - require.Equal(t, http.StatusOK, resp.StatusCode) -} - -type apiTestEnvironment struct { - srv *httptest.Server - router *mux.Router -} - -func newAPITestEnvironment(t *testing.T, api *API) apiTestEnvironment { - t.Helper() - - router := mux.NewRouter() - srv := httptest.NewServer(router) - t.Cleanup(srv.Close) - - api.WireAPI(router) - - return apiTestEnvironment{srv: srv, router: router} -} diff --git a/internal/static/metrics/instance/configstore/codec.go b/internal/static/metrics/instance/configstore/codec.go deleted file mode 100644 index 38a837c5be..0000000000 --- a/internal/static/metrics/instance/configstore/codec.go +++ /dev/null @@ -1,65 +0,0 @@ -package configstore - -import ( - "bytes" - "compress/gzip" - "fmt" - "io" - "strings" - - "github.com/grafana/dskit/kv/codec" -) - -// GetCodec returns the codec for encoding and decoding instance.Configs -// in the Remote store. -func GetCodec() codec.Codec { - return &yamlCodec{} -} - -type yamlCodec struct{} - -func (*yamlCodec) Decode(bb []byte) (interface{}, error) { - // Decode is called by kv.Clients with an empty slice when a - // key is deleted. We should stop early here and don't return - // an error so the deletion event propagates to watchers. - if len(bb) == 0 { - return nil, nil - } - - r, err := gzip.NewReader(bytes.NewReader(bb)) - if err != nil { - return nil, err - } - - var sb strings.Builder - if _, err := io.Copy(&sb, r); err != nil { - return nil, err - } - return sb.String(), nil -} - -func (*yamlCodec) Encode(v interface{}) ([]byte, error) { - var buf bytes.Buffer - - var cfg string - - switch v := v.(type) { - case string: - cfg = v - default: - panic(fmt.Sprintf("unexpected type %T passed to yamlCodec.Encode", v)) - } - - w := gzip.NewWriter(&buf) - - if _, err := io.Copy(w, strings.NewReader(cfg)); err != nil { - return nil, err - } - - w.Close() - return buf.Bytes(), nil -} - -func (*yamlCodec) CodecID() string { - return "agentConfig/yaml" -} diff --git a/internal/static/metrics/instance/configstore/codec_test.go b/internal/static/metrics/instance/configstore/codec_test.go deleted file mode 100644 index ab717c0bb6..0000000000 --- a/internal/static/metrics/instance/configstore/codec_test.go +++ /dev/null @@ -1,41 +0,0 @@ -package configstore - -import ( - "testing" - - "github.com/stretchr/testify/require" -) - -func TestCodec(t *testing.T) { - exampleConfig := `name: 'test' -host_filter: false -scrape_configs: - - job_name: process-1 - static_configs: - - targets: ['process-1:80'] - labels: - cluster: 'local' - origin: 'agent'` - - c := &yamlCodec{} - bb, err := c.Encode(exampleConfig) - require.NoError(t, err) - - out, err := c.Decode(bb) - require.NoError(t, err) - require.Equal(t, exampleConfig, out) -} - -// TestCodec_Decode_Nil makes sure that if Decode is called with an empty value, -// which may happen when a key is deleted, that no error occurs and instead a -// nil value is returned. -func TestCodec_Decode_Nil(t *testing.T) { - c := &yamlCodec{} - - input := [][]byte{nil, make([]byte, 0)} - for _, bb := range input { - out, err := c.Decode(bb) - require.Nil(t, err) - require.Nil(t, out) - } -} diff --git a/internal/static/metrics/instance/configstore/errors.go b/internal/static/metrics/instance/configstore/errors.go deleted file mode 100644 index 8d668a5ee3..0000000000 --- a/internal/static/metrics/instance/configstore/errors.go +++ /dev/null @@ -1,27 +0,0 @@ -package configstore - -import "fmt" - -// ErrNotConnected is used when a store operation was called but no connection -// to the store was active. -var ErrNotConnected = fmt.Errorf("not connected to store") - -// NotExistError is used when a config doesn't exist. -type NotExistError struct { - Key string -} - -// Error implements error. -func (e NotExistError) Error() string { - return fmt.Sprintf("configuration %s does not exist", e.Key) -} - -// NotUniqueError is used when two scrape jobs have the same name. -type NotUniqueError struct { - ScrapeJob string -} - -// Error implements error. -func (e NotUniqueError) Error() string { - return fmt.Sprintf("found multiple scrape configs in config store with job name %q", e.ScrapeJob) -} diff --git a/internal/static/metrics/instance/configstore/mock.go b/internal/static/metrics/instance/configstore/mock.go deleted file mode 100644 index 5ff303669c..0000000000 --- a/internal/static/metrics/instance/configstore/mock.go +++ /dev/null @@ -1,74 +0,0 @@ -package configstore - -import ( - "context" - - "github.com/grafana/agent/internal/static/metrics/instance" -) - -// Mock is a Mock Store. Useful primarily for testing. -type Mock struct { - ListFunc func(ctx context.Context) ([]string, error) - GetFunc func(ctx context.Context, key string) (instance.Config, error) - PutFunc func(ctx context.Context, c instance.Config) (created bool, err error) - DeleteFunc func(ctx context.Context, key string) error - AllFunc func(ctx context.Context, keep func(key string) bool) (<-chan instance.Config, error) - WatchFunc func() <-chan WatchEvent - CloseFunc func() error -} - -// List implements Store. -func (s *Mock) List(ctx context.Context) ([]string, error) { - if s.ListFunc != nil { - return s.ListFunc(ctx) - } - panic("List not implemented") -} - -// Get implements Store. -func (s *Mock) Get(ctx context.Context, key string) (instance.Config, error) { - if s.GetFunc != nil { - return s.GetFunc(ctx, key) - } - panic("Get not implemented") -} - -// Put implements Store. -func (s *Mock) Put(ctx context.Context, c instance.Config) (created bool, err error) { - if s.PutFunc != nil { - return s.PutFunc(ctx, c) - } - panic("Put not implemented") -} - -// Delete implements Store. -func (s *Mock) Delete(ctx context.Context, key string) error { - if s.DeleteFunc != nil { - return s.DeleteFunc(ctx, key) - } - panic("Delete not implemented") -} - -// All implements Store. -func (s *Mock) All(ctx context.Context, keep func(key string) bool) (<-chan instance.Config, error) { - if s.AllFunc != nil { - return s.AllFunc(ctx, keep) - } - panic("All not implemented") -} - -// Watch implements Store. -func (s *Mock) Watch() <-chan WatchEvent { - if s.WatchFunc != nil { - return s.WatchFunc() - } - panic("Watch not implemented") -} - -// Close implements Store. -func (s *Mock) Close() error { - if s.CloseFunc != nil { - return s.CloseFunc() - } - panic("Close not implemented") -} diff --git a/internal/static/metrics/instance/configstore/remote.go b/internal/static/metrics/instance/configstore/remote.go deleted file mode 100644 index a97df01e25..0000000000 --- a/internal/static/metrics/instance/configstore/remote.go +++ /dev/null @@ -1,471 +0,0 @@ -package configstore - -import ( - "context" - "errors" - "fmt" - "net/http" - "strings" - "sync" - - "github.com/grafana/dskit/instrument" - - "github.com/hashicorp/go-cleanhttp" - - "github.com/hashicorp/consul/api" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/util" - "github.com/grafana/dskit/kv" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" -) - -/*********************************************************************************************************************** -The consul code skipping the cortex handler is due to performance issue with a large number of configs and overloading -consul. See issue https://github.com/grafana/agent/issues/789. The long term method will be to refactor and extract -the cortex code so other stores can also benefit from this. @mattdurham -***********************************************************************************************************************/ - -var consulRequestDuration = instrument.NewHistogramCollector(promauto.NewHistogramVec(prometheus.HistogramOpts{ - Name: "agent_configstore_consul_request_duration_seconds", - Help: "Time spent on consul requests when listing configs.", - Buckets: prometheus.DefBuckets, -}, []string{"operation", "status_code"})) - -// Remote loads instance files from a remote KV store. The KV store -// can be swapped out in real time. -type Remote struct { - log log.Logger - reg *util.Unregisterer - - kvMut sync.RWMutex - kv *agentRemoteClient - reloadKV chan struct{} - - cancelCtx context.Context - cancelFunc context.CancelFunc - - configsMut sync.Mutex - configsCh chan WatchEvent -} - -// agentRemoteClient is a simple wrapper to allow the shortcircuit of consul, while being backwards compatible with non -// consul kv stores -type agentRemoteClient struct { - kv.Client - consul *api.Client - config kv.Config -} - -// NewRemote creates a new Remote store that uses a Key-Value client to store -// and retrieve configs. If enable is true, the store will be immediately -// connected to. Otherwise, it can be lazily loaded by enabling later through -// a call to Remote.ApplyConfig. -func NewRemote(l log.Logger, reg prometheus.Registerer, cfg kv.Config, enable bool) (*Remote, error) { - cancelCtx, cancelFunc := context.WithCancel(context.Background()) - - r := &Remote{ - log: l, - reg: util.WrapWithUnregisterer(reg), - - reloadKV: make(chan struct{}, 1), - - cancelCtx: cancelCtx, - cancelFunc: cancelFunc, - - configsCh: make(chan WatchEvent), - } - if err := r.ApplyConfig(cfg, enable); err != nil { - return nil, fmt.Errorf("failed to apply config for config store: %w", err) - } - - go r.run() - return r, nil -} - -// ApplyConfig applies the config for a kv client. -func (r *Remote) ApplyConfig(cfg kv.Config, enable bool) error { - r.kvMut.Lock() - defer r.kvMut.Unlock() - - if r.cancelCtx.Err() != nil { - return fmt.Errorf("remote store already stopped") - } - - // Unregister all metrics that the previous kv may have registered. - r.reg.UnregisterAll() - - if !enable { - r.setClient(nil, nil, kv.Config{}) - return nil - } - - cli, err := kv.NewClient(cfg, GetCodec(), kv.RegistererWithKVName(r.reg, "agent_configs"), r.log) - // This is a hack to get a consul client, the client above has it embedded but it's not exposed - var consulClient *api.Client - if cfg.Store == "consul" { - consulClient, err = api.NewClient(&api.Config{ - Address: cfg.Consul.Host, - Token: cfg.Consul.ACLToken.String(), - Scheme: "http", - HttpClient: &http.Client{ - Transport: cleanhttp.DefaultPooledTransport(), - // See https://blog.cloudflare.com/the-complete-guide-to-golang-net-http-timeouts/ - Timeout: cfg.Consul.HTTPClientTimeout, - }, - }) - if err != nil { - return err - } - } - - if err != nil { - return fmt.Errorf("failed to create kv client: %w", err) - } - - r.setClient(cli, consulClient, cfg) - return nil -} - -// setClient sets the active client and notifies run to restart the -// kv watcher. -func (r *Remote) setClient(client kv.Client, consulClient *api.Client, config kv.Config) { - if client == nil && consulClient == nil { - r.kv = nil - } else { - r.kv = &agentRemoteClient{ - Client: client, - consul: consulClient, - config: config, - } - } - r.reloadKV <- struct{}{} -} - -func (r *Remote) run() { - var ( - kvContext context.Context - kvCancel context.CancelFunc - ) - -Outer: - for { - select { - case <-r.cancelCtx.Done(): - break Outer - case <-r.reloadKV: - r.kvMut.RLock() - kv := r.kv - r.kvMut.RUnlock() - - if kvCancel != nil { - kvCancel() - } - kvContext, kvCancel = context.WithCancel(r.cancelCtx) - go r.watchKV(kvContext, kv) - } - } - - if kvCancel != nil { - kvCancel() - } -} - -func (r *Remote) watchKV(ctx context.Context, client *agentRemoteClient) { - // Edge case: client was unset, nothing to do here. - if client == nil { - level.Info(r.log).Log("msg", "not watching the KV, none set") - return - } - - client.WatchPrefix(ctx, "", func(key string, v interface{}) bool { - if ctx.Err() != nil { - return false - } - - r.configsMut.Lock() - defer r.configsMut.Unlock() - - switch { - case v == nil: - r.configsCh <- WatchEvent{Key: key, Config: nil} - default: - cfg, err := instance.UnmarshalConfig(strings.NewReader(v.(string))) - if err != nil { - level.Error(r.log).Log("msg", "could not unmarshal config from store", "name", key, "err", err) - break - } - - r.configsCh <- WatchEvent{Key: key, Config: cfg} - } - - return true - }) -} - -// List returns the list of all configs in the KV store. -func (r *Remote) List(ctx context.Context) ([]string, error) { - r.kvMut.RLock() - defer r.kvMut.RUnlock() - if r.kv == nil { - return nil, ErrNotConnected - } - - return r.kv.List(ctx, "") -} - -// listConsul returns Key Value Pairs instead of []string -func (r *Remote) listConsul(ctx context.Context) (api.KVPairs, error) { - if r.kv == nil { - return nil, ErrNotConnected - } - - var pairs api.KVPairs - options := &api.QueryOptions{ - AllowStale: !r.kv.config.Consul.ConsistentReads, - RequireConsistent: r.kv.config.Consul.ConsistentReads, - } - // This is copied from cortex list so that stats stay the same - err := instrument.CollectedRequest(ctx, "List", consulRequestDuration, instrument.ErrorCode, func(ctx context.Context) error { - var err error - pairs, _, err = r.kv.consul.KV().List(r.kv.config.Prefix, options.WithContext(ctx)) - return err - }) - - if err != nil { - return nil, err - } - // This mirrors the previous behavior of returning a blank array as opposed to nil. - if pairs == nil { - blankPairs := make(api.KVPairs, 0) - return blankPairs, nil - } - for _, kvp := range pairs { - kvp.Key = strings.TrimPrefix(kvp.Key, r.kv.config.Prefix) - } - return pairs, nil -} - -// Get retrieves an individual config from the KV store. -func (r *Remote) Get(ctx context.Context, key string) (instance.Config, error) { - r.kvMut.RLock() - defer r.kvMut.RUnlock() - if r.kv == nil { - return instance.Config{}, ErrNotConnected - } - - v, err := r.kv.Get(ctx, key) - if err != nil { - return instance.Config{}, fmt.Errorf("failed to get config %s: %w", key, err) - } else if v == nil { - return instance.Config{}, NotExistError{Key: key} - } - - cfg, err := instance.UnmarshalConfig(strings.NewReader(v.(string))) - if err != nil { - return instance.Config{}, fmt.Errorf("failed to unmarshal config %s: %w", key, err) - } - return *cfg, nil -} - -// Put adds or updates a config in the KV store. -func (r *Remote) Put(ctx context.Context, c instance.Config) (bool, error) { - // We need to use a write lock here since two Applies can't run concurrently - // (given the current need to perform a store-wide validation.) - r.kvMut.Lock() - defer r.kvMut.Unlock() - if r.kv == nil { - return false, ErrNotConnected - } - - bb, err := instance.MarshalConfig(&c, false) - if err != nil { - return false, fmt.Errorf("failed to marshal config: %w", err) - } - - cfgCh, err := r.all(ctx, nil) - if err != nil { - return false, fmt.Errorf("failed to check validity of config: %w", err) - } - if err := checkUnique(cfgCh, &c); err != nil { - return false, fmt.Errorf("failed to check uniqueness of config: %w", err) - } - - var created bool - err = r.kv.CAS(ctx, c.Name, func(in interface{}) (out interface{}, retry bool, err error) { - // The configuration is new if there's no previous value from the CAS - created = (in == nil) - return string(bb), false, nil - }) - if err != nil { - return false, fmt.Errorf("failed to put config: %w", err) - } - return created, nil -} - -// Delete deletes a config from the KV store. It returns NotExistError if -// the config doesn't exist. -func (r *Remote) Delete(ctx context.Context, key string) error { - r.kvMut.RLock() - defer r.kvMut.RUnlock() - if r.kv == nil { - return ErrNotConnected - } - - // Some KV stores don't return an error if something failed to be - // deleted, so we'll try to get it first. This isn't perfect, and - // it may fail, so we'll silently ignore any errors here unless - // we know for sure the config doesn't exist. - v, err := r.kv.Get(ctx, key) - if err != nil { - level.Warn(r.log).Log("msg", "error validating key existence for deletion", "err", err) - } else if v == nil { - return NotExistError{Key: key} - } - - err = r.kv.Delete(ctx, key) - if err != nil { - return fmt.Errorf("error deleting configuration: %w", err) - } - - return nil -} - -// All retrieves the set of all configs in the store. -func (r *Remote) All(ctx context.Context, keep func(key string) bool) (<-chan instance.Config, error) { - r.kvMut.RLock() - defer r.kvMut.RUnlock() - return r.all(ctx, keep) -} - -// all can only be called if the kvMut lock is already held. -func (r *Remote) all(ctx context.Context, keep func(key string) bool) (<-chan instance.Config, error) { - if r.kv == nil { - return nil, ErrNotConnected - } - - // If we are using a consul client then do the short circuit way, this is done so that we receive all the key value pairs - // in one call then, operate on them in memory. Previously we retrieved the list (which stripped the values) - // then ran a goroutine to get each individual value from consul. In situations with an extremely large number of - // configs this overloaded the consul instances. This reduces that to one call, that was being made anyways. - if r.kv.consul != nil { - return r.allConsul(ctx, keep) - } - - return r.allOther(ctx, keep) -} - -// allConsul is ONLY usable when consul is the keystore. This is a performance improvement in using the client directly -// -// instead of the cortex multi store kv interface. That interface returns the list then each value must be retrieved -// individually. This returns all the keys and values in one call and works on them in memory -func (r *Remote) allConsul(ctx context.Context, keep func(key string) bool) (<-chan instance.Config, error) { - if r.kv.consul == nil { - level.Error(r.log).Log("err", "allConsul called but consul client nil") - return nil, errors.New("allConsul called but consul client nil") - } - var configs []*instance.Config - c := GetCodec() - - pairs, err := r.listConsul(ctx) - - if err != nil { - return nil, err - } - for _, kvp := range pairs { - if keep != nil && !keep(kvp.Key) { - level.Debug(r.log).Log("msg", "skipping key that was filtered out", "key", kvp.Key) - continue - } - value, err := c.Decode(kvp.Value) - if err != nil { - level.Error(r.log).Log("msg", "failed to decode config from store", "key", kvp.Key, "err", err) - continue - } - if value == nil { - // Config was deleted since we called list, skip it. - level.Debug(r.log).Log("msg", "skipping key that was deleted after list was called", "key", kvp.Key) - continue - } - - cfg, err := instance.UnmarshalConfig(strings.NewReader(value.(string))) - if err != nil { - level.Error(r.log).Log("msg", "failed to unmarshal config from store", "key", kvp.Key, "err", err) - continue - } - configs = append(configs, cfg) - } - ch := make(chan instance.Config, len(configs)) - for _, cfg := range configs { - ch <- *cfg - } - close(ch) - return ch, nil -} - -func (r *Remote) allOther(ctx context.Context, keep func(key string) bool) (<-chan instance.Config, error) { - if r.kv == nil { - return nil, ErrNotConnected - } - - keys, err := r.kv.List(ctx, "") - if err != nil { - return nil, fmt.Errorf("failed to list configs: %w", err) - } - - ch := make(chan instance.Config) - - var wg sync.WaitGroup - wg.Add(len(keys)) - go func() { - wg.Wait() - close(ch) - }() - - for _, key := range keys { - go func(key string) { - defer wg.Done() - - if keep != nil && !keep(key) { - level.Debug(r.log).Log("msg", "skipping key that was filtered out", "key", key) - return - } - - // TODO(rfratto): retries might be useful here - v, err := r.kv.Get(ctx, key) - if err != nil { - level.Error(r.log).Log("msg", "failed to get config with key", "key", key, "err", err) - return - } else if v == nil { - // Config was deleted since we called list, skip it. - level.Debug(r.log).Log("msg", "skipping key that was deleted after list was called", "key", key) - return - } - - cfg, err := instance.UnmarshalConfig(strings.NewReader(v.(string))) - if err != nil { - level.Error(r.log).Log("msg", "failed to unmarshal config from store", "key", key, "err", err) - return - } - ch <- *cfg - }(key) - } - - return ch, nil -} - -// Watch watches the Store for changes. -func (r *Remote) Watch() <-chan WatchEvent { - return r.configsCh -} - -// Close closes the Remote store. -func (r *Remote) Close() error { - r.kvMut.Lock() - defer r.kvMut.Unlock() - r.cancelFunc() - return nil -} diff --git a/internal/static/metrics/instance/configstore/remote_test.go b/internal/static/metrics/instance/configstore/remote_test.go deleted file mode 100644 index 682438dd27..0000000000 --- a/internal/static/metrics/instance/configstore/remote_test.go +++ /dev/null @@ -1,271 +0,0 @@ -package configstore - -import ( - "context" - "fmt" - "sort" - "strings" - "testing" - "time" - - "github.com/go-kit/log" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/util" - "github.com/grafana/dskit/kv" - "github.com/prometheus/client_golang/prometheus" - "github.com/stretchr/testify/require" -) - -func TestRemote_List(t *testing.T) { - remote, err := NewRemote(log.NewNopLogger(), prometheus.NewRegistry(), kv.Config{ - Store: "inmemory", - Prefix: "configs/", - }, true) - require.NoError(t, err) - t.Cleanup(func() { - err := remote.Close() - require.NoError(t, err) - }) - - cfgs := []string{"a", "b", "c"} - for _, cfg := range cfgs { - err := remote.kv.CAS(context.Background(), cfg, func(in interface{}) (out interface{}, retry bool, err error) { - return fmt.Sprintf("name: %s", cfg), false, nil - }) - require.NoError(t, err) - } - - list, err := remote.List(context.Background()) - require.NoError(t, err) - sort.Strings(list) - require.Equal(t, cfgs, list) -} - -func TestRemote_Get(t *testing.T) { - remote, err := NewRemote(log.NewNopLogger(), prometheus.NewRegistry(), kv.Config{ - Store: "inmemory", - Prefix: "configs/", - }, true) - require.NoError(t, err) - t.Cleanup(func() { - err := remote.Close() - require.NoError(t, err) - }) - - err = remote.kv.CAS(context.Background(), "someconfig", func(in interface{}) (out interface{}, retry bool, err error) { - return "name: someconfig", false, nil - }) - require.NoError(t, err) - - cfg, err := remote.Get(context.Background(), "someconfig") - require.NoError(t, err) - - expect := instance.DefaultConfig - expect.Name = "someconfig" - require.Equal(t, expect, cfg) -} - -func TestRemote_Put(t *testing.T) { - remote, err := NewRemote(log.NewNopLogger(), prometheus.NewRegistry(), kv.Config{ - Store: "inmemory", - Prefix: "configs/", - }, true) - require.NoError(t, err) - t.Cleanup(func() { - err := remote.Close() - require.NoError(t, err) - }) - - cfg := instance.DefaultConfig - cfg.Name = "newconfig" - - created, err := remote.Put(context.Background(), cfg) - require.NoError(t, err) - require.True(t, created) - - actual, err := remote.Get(context.Background(), "newconfig") - require.NoError(t, err) - require.Equal(t, cfg, actual) - - t.Run("Updating", func(t *testing.T) { - cfg := instance.DefaultConfig - cfg.Name = "newconfig" - cfg.HostFilter = true - - created, err := remote.Put(context.Background(), cfg) - require.NoError(t, err) - require.False(t, created) - }) -} - -func TestRemote_Put_NonUnique(t *testing.T) { - var ( - conflictingA = util.Untab(` -name: conflicting-a -scrape_configs: -- job_name: foobar - `) - conflictingB = util.Untab(` -name: conflicting-b -scrape_configs: -- job_name: fizzbuzz -- job_name: foobar - `) - ) - - conflictingACfg, err := instance.UnmarshalConfig(strings.NewReader(conflictingA)) - require.NoError(t, err) - - conflictingBCfg, err := instance.UnmarshalConfig(strings.NewReader(conflictingB)) - require.NoError(t, err) - - remote, err := NewRemote(log.NewNopLogger(), prometheus.NewRegistry(), kv.Config{ - Store: "inmemory", - Prefix: "configs/", - }, true) - require.NoError(t, err) - t.Cleanup(func() { - err := remote.Close() - require.NoError(t, err) - }) - - created, err := remote.Put(context.Background(), *conflictingACfg) - require.NoError(t, err) - require.True(t, created) - - _, err = remote.Put(context.Background(), *conflictingBCfg) - require.EqualError(t, err, fmt.Sprintf("failed to check uniqueness of config: found multiple scrape configs in config store with job name %q", "foobar")) -} - -func TestRemote_Delete(t *testing.T) { - remote, err := NewRemote(log.NewNopLogger(), prometheus.NewRegistry(), kv.Config{ - Store: "inmemory", - Prefix: "configs/", - }, true) - require.NoError(t, err) - t.Cleanup(func() { - err := remote.Close() - require.NoError(t, err) - }) - - var cfg instance.Config - cfg.Name = "deleteme" - - created, err := remote.Put(context.Background(), cfg) - require.NoError(t, err) - require.True(t, created) - - err = remote.Delete(context.Background(), "deleteme") - require.NoError(t, err) - - _, err = remote.Get(context.Background(), "deleteme") - require.EqualError(t, err, "configuration deleteme does not exist") - - err = remote.Delete(context.Background(), "deleteme") - require.EqualError(t, err, "configuration deleteme does not exist") -} - -func TestRemote_All(t *testing.T) { - remote, err := NewRemote(log.NewNopLogger(), prometheus.NewRegistry(), kv.Config{ - Store: "inmemory", - Prefix: "all-configs/", - }, true) - require.NoError(t, err) - t.Cleanup(func() { - err := remote.Close() - require.NoError(t, err) - }) - - cfgs := []string{"a", "b", "c"} - for _, cfg := range cfgs { - err := remote.kv.CAS(context.Background(), cfg, func(in interface{}) (out interface{}, retry bool, err error) { - return fmt.Sprintf("name: %s", cfg), false, nil - }) - require.NoError(t, err) - } - - configCh, err := remote.All(context.Background(), nil) - require.NoError(t, err) - - var gotConfigs []string - for gotConfig := range configCh { - gotConfigs = append(gotConfigs, gotConfig.Name) - } - sort.Strings(gotConfigs) - - require.Equal(t, cfgs, gotConfigs) -} - -func TestRemote_Watch(t *testing.T) { - remote, err := NewRemote(log.NewNopLogger(), prometheus.NewRegistry(), kv.Config{ - Store: "inmemory", - Prefix: "watch-configs/", - }, true) - require.NoError(t, err) - t.Cleanup(func() { - err := remote.Close() - require.NoError(t, err) - }) - - _, err = remote.Put(context.Background(), instance.Config{Name: "watch"}) - require.NoError(t, err) - - select { - case cfg := <-remote.Watch(): - require.Equal(t, "watch", cfg.Key) - require.NotNil(t, cfg.Config) - require.Equal(t, "watch", cfg.Config.Name) - case <-time.After(3 * time.Second): - require.FailNow(t, "failed to watch for config") - } - - // Make sure Watch gets other updates. - _, err = remote.Put(context.Background(), instance.Config{Name: "watch2"}) - require.NoError(t, err) - - select { - case cfg := <-remote.Watch(): - require.Equal(t, "watch2", cfg.Key) - require.NotNil(t, cfg.Config) - require.Equal(t, "watch2", cfg.Config.Name) - case <-time.After(3 * time.Second): - require.FailNow(t, "failed to watch for config") - } -} - -func TestRemote_ApplyConfig(t *testing.T) { - remote, err := NewRemote(log.NewNopLogger(), prometheus.NewRegistry(), kv.Config{ - Store: "inmemory", - Prefix: "test-applyconfig/", - }, true) - require.NoError(t, err) - t.Cleanup(func() { - err := remote.Close() - require.NoError(t, err) - }) - - err = remote.ApplyConfig(kv.Config{ - Store: "inmemory", - Prefix: "test-applyconfig2/", - }, true) - require.NoError(t, err, "failed to apply a new config") - - err = remote.ApplyConfig(kv.Config{ - Store: "inmemory", - Prefix: "test-applyconfig2/", - }, true) - require.NoError(t, err, "failed to re-apply the current config") - - // Make sure watch still works - _, err = remote.Put(context.Background(), instance.Config{Name: "watch"}) - require.NoError(t, err) - - select { - case cfg := <-remote.Watch(): - require.Equal(t, "watch", cfg.Key) - require.NotNil(t, cfg.Config) - require.Equal(t, "watch", cfg.Config.Name) - case <-time.After(3 * time.Second): - require.FailNow(t, "failed to watch for config") - } -} diff --git a/internal/static/metrics/instance/configstore/store.go b/internal/static/metrics/instance/configstore/store.go deleted file mode 100644 index 799bafc882..0000000000 --- a/internal/static/metrics/instance/configstore/store.go +++ /dev/null @@ -1,49 +0,0 @@ -// Package configstore abstracts the concepts of where instance files get -// retrieved. -package configstore - -import ( - "context" - - "github.com/grafana/agent/internal/static/metrics/instance" -) - -// Store is some interface to retrieving instance configurations. -type Store interface { - // List gets the list of config names. - List(ctx context.Context) ([]string, error) - - // Get gets an individual config by name. - Get(ctx context.Context, key string) (instance.Config, error) - - // Put applies a new instance Config to the store. - // If the config already exists, created will be false to indicate an - // update. - Put(ctx context.Context, c instance.Config) (created bool, err error) - - // Delete deletes a config from the store. - Delete(ctx context.Context, key string) error - - // All retrieves the entire list of instance configs currently - // in the store. A filtering "keep" function can be provided to ignore some - // configs, which can significantly speed up the operation in some cases. - All(ctx context.Context, keep func(key string) bool) (<-chan instance.Config, error) - - // Watch watches for changed instance Configs. - // All callers of Watch receive the same Channel. - // - // It is not guaranteed that Watch will emit all store events, and Watch - // should only be used for best-effort quick convergence with the remote - // store. Watch should always be paired with polling All. - Watch() <-chan WatchEvent - - // Close closes the store. - Close() error -} - -// WatchEvent is returned by Watch. The Key is the name of the config that was -// added, updated, or deleted. If the Config was deleted, Config will be nil. -type WatchEvent struct { - Key string - Config *instance.Config -} diff --git a/internal/static/metrics/instance/configstore/unique.go b/internal/static/metrics/instance/configstore/unique.go deleted file mode 100644 index 203b77ba45..0000000000 --- a/internal/static/metrics/instance/configstore/unique.go +++ /dev/null @@ -1,35 +0,0 @@ -package configstore - -import ( - "github.com/grafana/agent/internal/static/metrics/instance" -) - -// checkUnique validates that cfg is unique from all, ensuring that no two -// configs share a job_name. -func checkUnique(all <-chan instance.Config, cfg *instance.Config) error { - defer func() { - // Drain the channel, which is necessary if we're returning an error. - for range all { - } - }() - - newJobNames := make(map[string]struct{}, len(cfg.ScrapeConfigs)) - for _, sc := range cfg.ScrapeConfigs { - newJobNames[sc.JobName] = struct{}{} - } - - for otherConfig := range all { - // If the other config is the one we're validating, skip it. - if otherConfig.Name == cfg.Name { - continue - } - - for _, otherScrape := range otherConfig.ScrapeConfigs { - if _, exist := newJobNames[otherScrape.JobName]; exist { - return NotUniqueError{ScrapeJob: otherScrape.JobName} - } - } - } - - return nil -} diff --git a/internal/static/metrics/instance/errors.go b/internal/static/metrics/instance/errors.go deleted file mode 100644 index e025abf5bb..0000000000 --- a/internal/static/metrics/instance/errors.go +++ /dev/null @@ -1,44 +0,0 @@ -package instance - -import "fmt" - -// ErrInvalidUpdate is returned whenever Update is called against an instance -// but an invalid field is changed between configs. If ErrInvalidUpdate is -// returned, the instance must be fully stopped and replaced with a new one -// with the new config. -type ErrInvalidUpdate struct { - Inner error -} - -// Error implements the error interface. -func (e ErrInvalidUpdate) Error() string { return e.Inner.Error() } - -// Is returns true if err is an ErrInvalidUpdate. -func (e ErrInvalidUpdate) Is(err error) bool { - switch err.(type) { - case ErrInvalidUpdate, *ErrInvalidUpdate: - return true - default: - return false - } -} - -// As will set the err object to ErrInvalidUpdate provided err -// is a pointer to ErrInvalidUpdate. -func (e ErrInvalidUpdate) As(err interface{}) bool { - switch v := err.(type) { - case *ErrInvalidUpdate: - *v = e - default: - return false - } - return true -} - -// errImmutableField is the error describing a field that cannot be changed. It -// is wrapped inside of a ErrInvalidUpdate. -type errImmutableField struct{ Field string } - -func (e errImmutableField) Error() string { - return fmt.Sprintf("%s cannot be changed dynamically", e.Field) -} diff --git a/internal/static/metrics/instance/group_manager.go b/internal/static/metrics/instance/group_manager.go deleted file mode 100644 index 072675d92f..0000000000 --- a/internal/static/metrics/instance/group_manager.go +++ /dev/null @@ -1,358 +0,0 @@ -package instance - -import ( - "crypto/md5" - "encoding/hex" - "fmt" - "sort" - "sync" - - "github.com/prometheus/prometheus/config" -) - -// A GroupManager wraps around another Manager and groups all incoming Configs -// into a smaller set of configs, causing less managed instances to be spawned. -// -// Configs are grouped by all settings for a Config *except* scrape configs. -// Any difference found in any flag will cause a Config to be placed in another -// group. One exception to this rule is that remote_writes are compared -// unordered, but the sets of remote_writes should otherwise be identical. -// -// GroupManagers drastically improve the performance of the Agent when a -// significant number of instances are spawned, as the overhead of each -// instance having its own service discovery, WAL, and remote_write can be -// significant. -// -// The config names of instances within the group will be represented by -// that group's hash of settings. -type GroupManager struct { - inner Manager - - mtx sync.Mutex - - // groups is a map of group name to the grouped configs. - groups map[string]groupedConfigs - - // groupLookup is a map of config name to group name. - groupLookup map[string]string -} - -// groupedConfigs holds a set of grouped configs, keyed by the config name. -// They are stored in a map rather than a slice to make overriding an existing -// config within the group less error-prone. -type groupedConfigs map[string]Config - -// Copy returns a shallow copy of the groupedConfigs. -func (g groupedConfigs) Copy() groupedConfigs { - res := make(groupedConfigs, len(g)) - for k, v := range g { - res[k] = v - } - return res -} - -// NewGroupManager creates a new GroupManager for combining instances of the -// same "group." -func NewGroupManager(inner Manager) *GroupManager { - return &GroupManager{ - inner: inner, - groups: make(map[string]groupedConfigs), - groupLookup: make(map[string]string), - } -} - -// GetInstance gets the underlying grouped instance for a given name. -func (m *GroupManager) GetInstance(name string) (ManagedInstance, error) { - m.mtx.Lock() - defer m.mtx.Unlock() - - group, ok := m.groupLookup[name] - if !ok { - return nil, fmt.Errorf("instance %s does not exist", name) - } - - inst, err := m.inner.GetInstance(group) - if err != nil { - return nil, fmt.Errorf("failed to get instance for %s: %w", name, err) - } - return inst, nil -} - -// ListInstances returns all currently grouped managed instances. The key -// will be the group's hash of shared settings. -func (m *GroupManager) ListInstances() map[string]ManagedInstance { - return m.inner.ListInstances() -} - -// ListConfigs returns the UNGROUPED instance configs with their original -// settings. To see the grouped instances, call ListInstances instead. -func (m *GroupManager) ListConfigs() map[string]Config { - m.mtx.Lock() - defer m.mtx.Unlock() - - cfgs := make(map[string]Config) - for _, groupedConfigs := range m.groups { - for _, cfg := range groupedConfigs { - cfgs[cfg.Name] = cfg - } - } - return cfgs -} - -// ApplyConfig will determine the group of the Config before applying it to -// the group. If no group exists, one will be created. If a group already -// exists, the group will have its settings merged with the Config and -// will be updated. -func (m *GroupManager) ApplyConfig(c Config) error { - m.mtx.Lock() - defer m.mtx.Unlock() - return m.applyConfig(c) -} - -func (m *GroupManager) applyConfig(c Config) (err error) { - groupName, err := hashConfig(c) - if err != nil { - return fmt.Errorf("failed to get group name for config %s: %w", c.Name, err) - } - - grouped := m.groups[groupName] - if grouped == nil { - grouped = make(groupedConfigs) - } else { - grouped = grouped.Copy() - } - - // Add the config to the group. If the config already exists within this - // group, it'll be overwritten. - grouped[c.Name] = c - mergedConfig, err := groupConfigs(groupName, grouped) - if err != nil { - err = fmt.Errorf("failed to group configs for %s: %w", c.Name, err) - return - } - - // If this config already exists in another group, we have to delete it. - // If we can't delete it from the old group, we also can't apply it. - if oldGroup, ok := m.groupLookup[c.Name]; ok && oldGroup != groupName { - // There's a few cases here where if something fails, it's safer to crash - // out and restart the Agent from scratch than it would be to continue as - // normal. The panics here are for truly exceptional cases, otherwise if - // something is recoverable, we'll return an error like normal. - - // If we can't find the old config, something got messed up when applying - // the config. But it also means that we're not going to be able to restore - // the config if something fails. Preemptively we should panic, since the - // internal state has gotten messed up and can't be fixed. - oldConfig, ok := m.groups[oldGroup][c.Name] - if !ok { - panic("failed to properly move config to new group. THIS IS A BUG!") - } - - err = m.deleteConfig(c.Name) - if err != nil { - err = fmt.Errorf("cannot apply config %s because deleting it from the old group failed: %w", c.Name, err) - return - } - - // Now that the config is deleted, we need to restore it in case applying - // the new one happens to fail. - defer func() { - if err == nil { - return - } - - // If restoring a config fails, we've left the Agent in a really bad - // state: the new config can't be applied and the old config can't be - // brought back. Just crash and let the Agent start fresh. - // - // Restoring the config _shouldn't_ fail here since applies only fail - // if the config is invalid. Since the config was running before, it - // should already be valid. If it does happen to fail, though, the - // internal state is left corrupted since we've completely lost a - // config. - restoreError := m.applyConfig(oldConfig) - if restoreError != nil { - panic(fmt.Sprintf("failed to properly restore config. THIS IS A BUG! error: %s", restoreError)) - } - }() - } - - err = m.inner.ApplyConfig(mergedConfig) - if err != nil { - err = fmt.Errorf("failed to apply grouped configs for config %s: %w", c.Name, err) - return - } - - // If the inner apply succeeded, we can update our group and the lookup. - m.groups[groupName] = grouped - m.groupLookup[c.Name] = groupName - return -} - -// DeleteConfig will remove a Config from its associated group. If there are -// no more Configs within that group after this Config is deleted, the managed -// instance will be stopped. Otherwise, the managed instance will be updated -// with the new grouped Config that doesn't include the removed one. -func (m *GroupManager) DeleteConfig(name string) error { - m.mtx.Lock() - defer m.mtx.Unlock() - return m.deleteConfig(name) -} - -func (m *GroupManager) deleteConfig(name string) error { - groupName, ok := m.groupLookup[name] - if !ok { - return fmt.Errorf("config does not exist") - } - - // Grab a copy of the stored group and delete our entry. We can - // persist it after we successfully remove the config. - group := m.groups[groupName].Copy() - delete(group, name) - - if len(group) == 0 { - // We deleted the last remaining config in that group; we can delete it in - // its entirety now. - if err := m.inner.DeleteConfig(groupName); err != nil { - return fmt.Errorf("failed to delete empty group %s after removing config %s: %w", groupName, name, err) - } - } else { - // We deleted the config but there's still more in the group; apply the new - // group that holds the remainder of the configs (minus the one we just - // deleted). - mergedConfig, err := groupConfigs(groupName, group) - if err != nil { - return fmt.Errorf("failed to regroup configs without %s: %w", name, err) - } - - err = m.inner.ApplyConfig(mergedConfig) - if err != nil { - return fmt.Errorf("failed to apply new group without %s: %w", name, err) - } - } - - // Update the stored group and remove the entry from the lookup table. - if len(group) == 0 { - delete(m.groups, groupName) - } else { - m.groups[groupName] = group - } - - delete(m.groupLookup, name) - return nil -} - -// Stop stops the Manager and all of its managed instances. -func (m *GroupManager) Stop() { - m.mtx.Lock() - defer m.mtx.Unlock() - - m.inner.Stop() - m.groupLookup = make(map[string]string) - m.groups = make(map[string]groupedConfigs) -} - -// hashConfig determines the hash of a Config used for grouping. It ignores -// the name and scrape_configs and also orders remote_writes by name prior to -// hashing. -func hashConfig(c Config) (string, error) { - // We need a deep copy since we're going to mutate the remote_write - // pointers. - groupable, err := c.Clone() - if err != nil { - return "", err - } - - // Ignore name and scrape configs when hashing - groupable.Name = "" - groupable.ScrapeConfigs = nil - - // Assign names to remote_write configs if they're not present already. - // This is also done in AssignDefaults but is duplicated here for the sake - // of simplifying responsibility of GroupManager. - for _, cfg := range groupable.RemoteWrite { - if cfg != nil { - // We don't care if the names are different, just that the other settings - // are the same. Blank out the name here before hashing the remote - // write config. - cfg.Name = "" - - hash, err := getHash(cfg) - if err != nil { - return "", err - } - cfg.Name = hash[:6] - } - } - - // Now sort remote_writes by name and nil-ness. - sort.Slice(groupable.RemoteWrite, func(i, j int) bool { - switch { - case groupable.RemoteWrite[i] == nil: - return true - case groupable.RemoteWrite[j] == nil: - return false - default: - return groupable.RemoteWrite[i].Name < groupable.RemoteWrite[j].Name - } - }) - - bb, err := MarshalConfig(&groupable, false) - if err != nil { - return "", err - } - hash := md5.Sum(bb) - return hex.EncodeToString(hash[:]), nil -} - -// groupConfig creates a grouped Config where all fields are copied from -// the first config except for scrape_configs, which are appended together. -func groupConfigs(groupName string, grouped groupedConfigs) (Config, error) { - if len(grouped) == 0 { - return Config{}, fmt.Errorf("no configs") - } - - // Move the map into a slice and sort it by name so this function - // consistently does the same thing. - cfgs := make([]Config, 0, len(grouped)) - for _, cfg := range grouped { - cfgs = append(cfgs, cfg) - } - sort.Slice(cfgs, func(i, j int) bool { return cfgs[i].Name < cfgs[j].Name }) - - combined, err := cfgs[0].Clone() - if err != nil { - return Config{}, err - } - combined.Name = groupName - combined.ScrapeConfigs = []*config.ScrapeConfig{} - - // Assign all remote_write configs in the group a consistent set of remote_names. - // If the grouped configs are coming from the scraping service, defaults will have - // been applied and the remote names will be prefixed with the old instance config name. - for _, rwc := range combined.RemoteWrite { - // Blank out the existing name before getting the hash so it doesn't take into - // account any existing name. - rwc.Name = "" - - hash, err := getHash(rwc) - if err != nil { - return Config{}, err - } - - rwc.Name = groupName[:6] + "-" + hash[:6] - } - - // Combine all the scrape configs. It's possible that two different ungrouped - // configs had a matching job name, but this will be detected and rejected - // (as it should be) when the underlying Manager eventually validates the - // combined config. - // - // TODO(rfratto): should we prepend job names with the name of the original - // config? (e.g., job_name = "config_name/job_name"). - for _, cfg := range cfgs { - combined.ScrapeConfigs = append(combined.ScrapeConfigs, cfg.ScrapeConfigs...) - } - - return combined, nil -} diff --git a/internal/static/metrics/instance/group_manager_test.go b/internal/static/metrics/instance/group_manager_test.go deleted file mode 100644 index 48b87236f4..0000000000 --- a/internal/static/metrics/instance/group_manager_test.go +++ /dev/null @@ -1,446 +0,0 @@ -package instance - -import ( - "fmt" - "strings" - "testing" - - "github.com/stretchr/testify/require" -) - -func TestGroupManager_ListInstances_Configs(t *testing.T) { - gm := NewGroupManager(newFakeManager()) - - // Create two configs in the same group and one in another - // group. - configs := []string{ - ` -name: configA -scrape_configs: [] -remote_write: []`, - ` -name: configB -scrape_configs: [] -remote_write: []`, - ` -name: configC -scrape_configs: [] -remote_write: -- url: http://localhost:9090`, - } - - for _, cfg := range configs { - c := testUnmarshalConfig(t, cfg) - err := gm.ApplyConfig(c) - require.NoError(t, err) - } - - // ListInstances should return our grouped instances - insts := gm.ListInstances() - require.Equal(t, 2, len(insts)) - - // ...but ListConfigs should return the ungrouped configs. - confs := gm.ListConfigs() - require.Equal(t, 3, len(confs)) - require.Containsf(t, confs, "configA", "configA not in confs") - require.Containsf(t, confs, "configB", "configB not in confs") - require.Containsf(t, confs, "configC", "configC not in confs") -} - -func testUnmarshalConfig(t *testing.T, cfg string) Config { - c, err := UnmarshalConfig(strings.NewReader(cfg)) - require.NoError(t, err) - return *c -} - -func TestGroupManager_ApplyConfig(t *testing.T) { - t.Run("combining configs", func(t *testing.T) { - inner := newFakeManager() - gm := NewGroupManager(inner) - err := gm.ApplyConfig(testUnmarshalConfig(t, ` -name: configA -scrape_configs: [] -remote_write: [] -`)) - require.NoError(t, err) - - err = gm.ApplyConfig(testUnmarshalConfig(t, ` -name: configB -scrape_configs: -- job_name: test_job - static_configs: - - targets: [127.0.0.1:12345] -remote_write: [] -`)) - require.NoError(t, err) - - require.Equal(t, 1, len(gm.groups)) - require.Equal(t, 2, len(gm.groupLookup)) - - // Check the underlying grouped config and make sure it was updated. - expect := testUnmarshalConfig(t, fmt.Sprintf(` -name: %s -scrape_configs: -- job_name: test_job - static_configs: - - targets: [127.0.0.1:12345] -remote_write: [] -`, gm.groupLookup["configA"])) - - innerConfigs := inner.ListConfigs() - require.Equal(t, 1, len(innerConfigs)) - require.Equal(t, expect, innerConfigs[gm.groupLookup["configA"]]) - }) - - t.Run("updating existing config within group", func(t *testing.T) { - inner := newFakeManager() - gm := NewGroupManager(inner) - err := gm.ApplyConfig(testUnmarshalConfig(t, ` -name: configA -scrape_configs: [] -remote_write: [] -`)) - require.NoError(t, err) - require.Equal(t, 1, len(gm.groups)) - require.Equal(t, 1, len(gm.groupLookup)) - - err = gm.ApplyConfig(testUnmarshalConfig(t, ` -name: configA -scrape_configs: -- job_name: test_job - static_configs: - - targets: [127.0.0.1:12345] -remote_write: [] -`)) - require.NoError(t, err) - require.Equal(t, 1, len(gm.groups)) - require.Equal(t, 1, len(gm.groupLookup)) - - // Check the underlying grouped config and make sure it was updated. - expect := testUnmarshalConfig(t, fmt.Sprintf(` -name: %s -scrape_configs: -- job_name: test_job - static_configs: - - targets: [127.0.0.1:12345] -remote_write: [] -`, gm.groupLookup["configA"])) - actual := inner.ListConfigs()[gm.groupLookup["configA"]] - require.Equal(t, expect, actual) - }) - - t.Run("updating existing config to new group", func(t *testing.T) { - inner := newFakeManager() - gm := NewGroupManager(inner) - err := gm.ApplyConfig(testUnmarshalConfig(t, ` -name: configA -scrape_configs: [] -remote_write: [] -`)) - require.NoError(t, err) - require.Equal(t, 1, len(gm.groups)) - require.Equal(t, 1, len(gm.groupLookup)) - oldGroup := gm.groupLookup["configA"] - - // Reapply the config but give it a setting change that would - // force it into a new group. We should still have only one - // group and only one entry in the group lookup table. - err = gm.ApplyConfig(testUnmarshalConfig(t, ` -name: configA -host_filter: true -scrape_configs: [] -remote_write: [] -`)) - require.NoError(t, err) - require.Equal(t, 1, len(gm.groups)) - require.Equal(t, 1, len(gm.groupLookup)) - newGroup := gm.groupLookup["configA"] - - // Check the underlying grouped config and make sure it was updated. - expect := testUnmarshalConfig(t, fmt.Sprintf(` -name: %s -host_filter: true -scrape_configs: [] -remote_write: [] -`, gm.groupLookup["configA"])) - actual := inner.ListConfigs()[newGroup] - require.Equal(t, expect, actual) - - // The old underlying ngroup should be gone. - require.NotContains(t, inner.ListConfigs(), oldGroup) - require.Equal(t, 1, len(inner.ListConfigs())) - }) -} - -func TestGroupManager_ApplyConfig_RemoteWriteName(t *testing.T) { - inner := newFakeManager() - gm := NewGroupManager(inner) - err := gm.ApplyConfig(testUnmarshalConfig(t, ` -name: configA -scrape_configs: [] -remote_write: -- name: rw-cfg-a - url: http://localhost:9009/api/prom/push -`)) - require.NoError(t, err) - - require.Equal(t, 1, len(gm.groups)) - require.Equal(t, 1, len(gm.groupLookup)) - - // Check the underlying grouped config and make sure the group_name - // didn't get copied from the remote_name of A. - innerConfigs := inner.ListConfigs() - require.Equal(t, 1, len(innerConfigs)) - - cfg := innerConfigs[gm.groupLookup["configA"]] - require.NotEqual(t, "rw-cfg-a", cfg.RemoteWrite[0].Name) -} - -func TestGroupManager_DeleteConfig(t *testing.T) { - t.Run("partial delete", func(t *testing.T) { - inner := newFakeManager() - gm := NewGroupManager(inner) - - // Apply two configs in the same group and then delete one. The group - // should still be active with the one config inside of it. - err := gm.ApplyConfig(testUnmarshalConfig(t, ` -name: configA -scrape_configs: -- job_name: test_job - static_configs: - - targets: [127.0.0.1:12345] -remote_write: [] -`)) - require.NoError(t, err) - - err = gm.ApplyConfig(testUnmarshalConfig(t, ` -name: configB -scrape_configs: -- job_name: test_job2 - static_configs: - - targets: [127.0.0.1:12345] -remote_write: [] -`)) - require.NoError(t, err) - - err = gm.DeleteConfig("configA") - require.NoError(t, err) - - expect := testUnmarshalConfig(t, fmt.Sprintf(` -name: %s -scrape_configs: -- job_name: test_job2 - static_configs: - - targets: [127.0.0.1:12345] -remote_write: []`, gm.groupLookup["configB"])) - actual := inner.ListConfigs()[gm.groupLookup["configB"]] - require.Equal(t, expect, actual) - require.Equal(t, 1, len(gm.groups)) - require.Equal(t, 1, len(gm.groupLookup)) - }) - - t.Run("full delete", func(t *testing.T) { - inner := newFakeManager() - gm := NewGroupManager(inner) - - // Apply a single config but delete the entire group. - err := gm.ApplyConfig(testUnmarshalConfig(t, ` -name: configA -scrape_configs: -- job_name: test_job - static_configs: - - targets: [127.0.0.1:12345] -remote_write: [] -`)) - require.NoError(t, err) - - err = gm.DeleteConfig("configA") - require.NoError(t, err) - require.Equal(t, 0, len(inner.ListConfigs())) - require.Equal(t, 0, len(inner.ListInstances())) - require.Equal(t, 0, len(gm.groups)) - require.Equal(t, 0, len(gm.groupLookup)) - }) -} - -func newFakeManager() Manager { - instances := make(map[string]ManagedInstance) - configs := make(map[string]Config) - - return &MockManager{ - ListInstancesFunc: func() map[string]ManagedInstance { - return instances - }, - ListConfigsFunc: func() map[string]Config { - return configs - }, - ApplyConfigFunc: func(c Config) error { - instances[c.Name] = &mockInstance{} - configs[c.Name] = c - return nil - }, - DeleteConfigFunc: func(name string) error { - delete(instances, name) - delete(configs, name) - return nil - }, - StopFunc: func() {}, - } -} - -func Test_hashConfig(t *testing.T) { - t.Run("name and scrape configs are ignored", func(t *testing.T) { - configAText := ` -name: configA -scrape_configs: [] -remote_write: []` - - configBText := ` -name: configB -scrape_configs: -- job_name: test_job - static_configs: - - targets: [127.0.0.1:12345] -remote_write: []` - - hashA, hashB := getHashesFromConfigs(t, configAText, configBText) - require.Equal(t, hashA, hashB) - }) - - t.Run("remote_writes are unordered", func(t *testing.T) { - configAText := ` -name: configA -scrape_configs: [] -remote_write: -- url: http://localhost:9009/api/prom/push1 -- url: http://localhost:9009/api/prom/push2` - - configBText := ` -name: configB -scrape_configs: [] -remote_write: -- url: http://localhost:9009/api/prom/push2 -- url: http://localhost:9009/api/prom/push1` - - hashA, hashB := getHashesFromConfigs(t, configAText, configBText) - require.Equal(t, hashA, hashB) - }) - - t.Run("remote_writes must match", func(t *testing.T) { - configAText := ` -name: configA -scrape_configs: [] -remote_write: -- url: http://localhost:9009/api/prom/push1 -- url: http://localhost:9009/api/prom/push2` - - configBText := ` -name: configB -scrape_configs: [] -remote_write: -- url: http://localhost:9009/api/prom/push1 -- url: http://localhost:9009/api/prom/push1` - - hashA, hashB := getHashesFromConfigs(t, configAText, configBText) - require.NotEqual(t, hashA, hashB) - }) - - t.Run("other fields must match", func(t *testing.T) { - configAText := ` -name: configA -host_filter: true -scrape_configs: [] -remote_write: []` - - configBText := ` -name: configB -host_filter: false -scrape_configs: [] -remote_write: []` - - hashA, hashB := getHashesFromConfigs(t, configAText, configBText) - require.NotEqual(t, hashA, hashB) - }) -} - -func getHashesFromConfigs(t *testing.T, configAText, configBText string) (string, string) { - configA := testUnmarshalConfig(t, configAText) - configB := testUnmarshalConfig(t, configBText) - - hashA, err := hashConfig(configA) - require.NoError(t, err) - - hashB, err := hashConfig(configB) - require.NoError(t, err) - - return hashA, hashB -} - -func Test_groupConfigs(t *testing.T) { - configAText := ` -name: configA -scrape_configs: -- job_name: test_job - static_configs: - - targets: [127.0.0.1:12345] -remote_write: -- url: http://localhost:9009/api/prom/push1 -- url: http://localhost:9009/api/prom/push2` - - configBText := ` -name: configB -scrape_configs: -- job_name: test_job2 - static_configs: - - targets: [127.0.0.1:12345] -remote_write: -- url: http://localhost:9009/api/prom/push2 -- url: http://localhost:9009/api/prom/push1` - - configA := testUnmarshalConfig(t, configAText) - configB := testUnmarshalConfig(t, configBText) - - groupName, err := hashConfig(configA) - require.NoError(t, err) - - expectText := fmt.Sprintf(` -name: %s -scrape_configs: -- job_name: test_job - static_configs: - - targets: [127.0.0.1:12345] -- job_name: test_job2 - static_configs: - - targets: [127.0.0.1:12345] -remote_write: -- url: http://localhost:9009/api/prom/push1 -- url: http://localhost:9009/api/prom/push2`, groupName) - - expect, err := UnmarshalConfig(strings.NewReader(expectText)) - require.NoError(t, err) - - // Generate expected remote_write names - for _, rwConfig := range expect.RemoteWrite { - hash, err := getHash(rwConfig) - require.NoError(t, err) - rwConfig.Name = groupName[:6] + "-" + hash[:6] - } - - group := groupedConfigs{ - "configA": configA, - "configB": configB, - } - actual, err := groupConfigs(groupName, group) - require.NoError(t, err) - require.Equal(t, *expect, actual) - - // Consistency check: groupedConfigs is a map and we want to always have - // groupConfigs return the same thing regardless of how the map - // is iterated over. Run through groupConfigs a bunch of times and - // make sure it always returns the same thing. - for i := 0; i < 100; i++ { - actual, err = groupConfigs(groupName, group) - require.NoError(t, err) - require.Equal(t, *expect, actual) - } -} diff --git a/internal/static/metrics/instance/host_filter.go b/internal/static/metrics/instance/host_filter.go deleted file mode 100644 index 2328f6feff..0000000000 --- a/internal/static/metrics/instance/host_filter.go +++ /dev/null @@ -1,238 +0,0 @@ -package instance - -import ( - "context" - "fmt" - "net" - "sync" - - "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/config" - "github.com/prometheus/prometheus/discovery/kubernetes" - "github.com/prometheus/prometheus/discovery/targetgroup" - "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/model/relabel" -) - -// HostFilterLabelMatchers are the set of labels that will be used to match -// against an incoming target. -var HostFilterLabelMatchers = []string{ - // Consul - "__meta_consul_node", - - // Dockerswarm - "__meta_dockerswarm_node_id", - "__meta_dockerswarm_node_hostname", - "__meta_dockerswarm_node_address", - - // Kubernetes node labels. Labels for `role: service` are omitted as - // service targets have labels merged with discovered pods. - "__meta_kubernetes_pod_node_name", - "__meta_kubernetes_node_name", - - // Generic (applied by host_filter_relabel_configs) - "__host__", -} - -// DiscoveredGroups is a set of groups found via service discovery. -type DiscoveredGroups = map[string][]*targetgroup.Group - -// GroupChannel is a channel that provides discovered target groups. -type GroupChannel = <-chan DiscoveredGroups - -// HostFilter acts as a MITM between the discovery manager and the -// scrape manager, filtering out discovered targets that are not -// running on the same node as the agent itself. -type HostFilter struct { - ctx context.Context - cancel context.CancelFunc - - host string - - inputCh GroupChannel - outputCh chan map[string][]*targetgroup.Group - - relabelMut sync.Mutex - relabels []*relabel.Config -} - -// NewHostFilter creates a new HostFilter. -func NewHostFilter(host string, relabels []*relabel.Config) *HostFilter { - ctx, cancel := context.WithCancel(context.Background()) - f := &HostFilter{ - ctx: ctx, - cancel: cancel, - - host: host, - relabels: relabels, - - outputCh: make(chan map[string][]*targetgroup.Group), - } - return f -} - -// PatchSD patches services discoveries to optimize performance for host -// filtering. The discovered targets will be pruned to as close to the set -// that HostFilter will output as possible. -func (f *HostFilter) PatchSD(scrapes []*config.ScrapeConfig) { - for _, sc := range scrapes { - for _, d := range sc.ServiceDiscoveryConfigs { - switch d := d.(type) { - case *kubernetes.SDConfig: - if d.Role == kubernetes.RolePod { - d.Selectors = []kubernetes.SelectorConfig{{ - Role: kubernetes.RolePod, - Field: fmt.Sprintf("spec.nodeName=%s", f.host), - }} - } - } - } - } -} - -// SetRelabels updates the relabeling rules used by the HostFilter. -func (f *HostFilter) SetRelabels(relabels []*relabel.Config) { - f.relabelMut.Lock() - defer f.relabelMut.Unlock() - f.relabels = relabels -} - -// Run starts the HostFilter. It only exits when the HostFilter is stopped. -// Run will continually read from syncCh and filter groups discovered down to -// targets that are colocated on the same node as the one the HostFilter is -// running in. -func (f *HostFilter) Run(syncCh GroupChannel) { - f.inputCh = syncCh - - for { - select { - case <-f.ctx.Done(): - return - case data := <-f.inputCh: - f.relabelMut.Lock() - relabels := f.relabels - f.relabelMut.Unlock() - - f.outputCh <- FilterGroups(data, f.host, relabels) - } - } -} - -// Stop stops the host filter from processing more target updates. -func (f *HostFilter) Stop() { - f.cancel() -} - -// SyncCh returns a read only channel used by all the clients to receive -// target updates. -func (f *HostFilter) SyncCh() GroupChannel { - return f.outputCh -} - -// FilterGroups takes a set of DiscoveredGroups as input and filters out -// any Target that is not running on the host machine provided by host. -// -// This is done by looking at HostFilterLabelMatchers and __address__. -// -// If the discovered address is localhost or 127.0.0.1, the group is never -// filtered out. -func FilterGroups(in DiscoveredGroups, host string, configs []*relabel.Config) DiscoveredGroups { - out := make(DiscoveredGroups, len(in)) - - for name, groups := range in { - groupList := make([]*targetgroup.Group, 0, len(groups)) - - for _, group := range groups { - newGroup := &targetgroup.Group{ - Targets: make([]model.LabelSet, 0, len(group.Targets)), - Labels: group.Labels, - Source: group.Source, - } - - for _, target := range group.Targets { - allLabels := mergeSets(target, group.Labels) - processedLabels, _ := relabel.Process(toLabelSlice(allLabels), configs...) - - if !shouldFilterTarget(processedLabels, host) { - newGroup.Targets = append(newGroup.Targets, target) - } - } - - groupList = append(groupList, newGroup) - } - - out[name] = groupList - } - - return out -} - -// shouldFilterTarget returns true when the target labels (combined with the set of common -// labels) should be filtered out by FilterGroups. -func shouldFilterTarget(lbls labels.Labels, host string) bool { - shouldFilterTargetByLabelValue := func(labelValue string) bool { - if addr, _, err := net.SplitHostPort(labelValue); err == nil { - labelValue = addr - } - - // Special case: always allow localhost/127.0.0.1 - if labelValue == "localhost" || labelValue == "127.0.0.1" { - return false - } - - return labelValue != host - } - - lset := labels.New(lbls...) - addressLabel := lset.Get(model.AddressLabel) - if addressLabel == "" { - // No address label. This is invalid and will generate an error by the scrape - // manager, so we'll pass it on for now. - return false - } - - // If the __address__ label matches, we can quit early. - if !shouldFilterTargetByLabelValue(addressLabel) { - return false - } - - // Fall back to checking metalabels as long as their values are nonempty. - for _, check := range HostFilterLabelMatchers { - // If any of the checked labels match for not being filtered out, we can - // return before checking any of the other matchers. - if addr := lset.Get(check); addr != "" && !shouldFilterTargetByLabelValue(addr) { - return false - } - } - - // Nothing matches, filter it out. - return true -} - -// mergeSets merges the sets of labels together. Earlier sets take priority for label names. -func mergeSets(sets ...model.LabelSet) model.LabelSet { - sz := 0 - for _, set := range sets { - sz += len(set) - } - result := make(model.LabelSet, sz) - - for _, set := range sets { - for labelName, labelValue := range set { - if _, exist := result[labelName]; exist { - continue - } - result[labelName] = labelValue - } - } - - return result -} - -func toLabelSlice(set model.LabelSet) labels.Labels { - slice := make(labels.Labels, 0, len(set)) - for name, value := range set { - slice = append(slice, labels.Label{Name: string(name), Value: string(value)}) - } - return slice -} diff --git a/internal/static/metrics/instance/host_filter_test.go b/internal/static/metrics/instance/host_filter_test.go deleted file mode 100644 index 8eca3a3f51..0000000000 --- a/internal/static/metrics/instance/host_filter_test.go +++ /dev/null @@ -1,201 +0,0 @@ -package instance - -import ( - "testing" - - "github.com/grafana/agent/internal/util" - "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/config" - "github.com/prometheus/prometheus/discovery/targetgroup" - "github.com/prometheus/prometheus/model/relabel" - "github.com/stretchr/testify/require" - "gopkg.in/yaml.v3" -) - -func makeGroup(labels []model.LabelSet) *targetgroup.Group { - return &targetgroup.Group{ - Targets: labels, - Labels: model.LabelSet{}, - } -} - -func TestFilterGroups(t *testing.T) { - tt := []struct { - name string - labelHost string - inputHost string - shouldRemove bool - }{ - { - name: "complete match", - labelHost: "myhost", - inputHost: "myhost", - shouldRemove: false, - }, - { - name: "mismatch", - labelHost: "notmyhost", - inputHost: "myhost", - shouldRemove: true, - }, - { - name: "match with port", - labelHost: "myhost:12345", - inputHost: "myhost", - shouldRemove: false, - }, - { - name: "mismatch with port", - labelHost: "notmyhost:12345", - inputHost: "myhost", - shouldRemove: true, - }, - } - - // Sets of labels we want to test against. - labels := []model.LabelName{ - model.AddressLabel, - model.LabelName("__meta_consul_node"), - model.LabelName("__meta_dockerswarm_node_id"), - model.LabelName("__meta_dockerswarm_node_hostname"), - model.LabelName("__meta_dockerswarm_node_address"), - model.LabelName("__meta_kubernetes_pod_node_name"), - model.LabelName("__meta_kubernetes_node_name"), - model.LabelName("__host__"), - } - - for _, tc := range tt { - t.Run(tc.name, func(t *testing.T) { - for _, label := range labels { - t.Run(string(label), func(t *testing.T) { - lset := model.LabelSet{ - label: model.LabelValue(tc.labelHost), - } - - // Special case: if label is not model.AddressLabel, we need to give - // it a fake value. model.AddressLabel is always expected to be present and - // is considered an error if it isn't. - if label != model.AddressLabel { - lset[model.AddressLabel] = "fake" - } - - group := makeGroup([]model.LabelSet{lset}) - - groups := DiscoveredGroups{"test": []*targetgroup.Group{group}} - result := FilterGroups(groups, tc.inputHost, nil) - - require.NotNil(t, result["test"]) - if tc.shouldRemove { - require.NotEqual(t, len(result["test"][0].Targets), len(groups["test"][0].Targets)) - } else { - require.Equal(t, len(result["test"][0].Targets), len(groups["test"][0].Targets)) - } - }) - } - }) - } -} - -func TestFilterGroups_Relabel(t *testing.T) { - tt := []struct { - name string - labelHost string - inputHost string - shouldRemove bool - }{ - { - name: "complete match", - labelHost: "myhost", - inputHost: "myhost", - shouldRemove: false, - }, - { - name: "mismatch", - labelHost: "notmyhost", - inputHost: "myhost", - shouldRemove: true, - }, - { - name: "match with port", - labelHost: "myhost:12345", - inputHost: "myhost", - shouldRemove: false, - }, - { - name: "mismatch with port", - labelHost: "notmyhost:12345", - inputHost: "myhost", - shouldRemove: true, - }, - } - - relabelConfig := []*relabel.Config{{ - SourceLabels: model.LabelNames{"__internal_label"}, - Action: relabel.Replace, - Separator: ";", - Regex: relabel.MustNewRegexp("(.*)"), - Replacement: "$1", - TargetLabel: "__host__", - }} - - for _, tc := range tt { - t.Run(tc.name, func(t *testing.T) { - lset := model.LabelSet{ - model.AddressLabel: "fake_target", - "__internal_label": model.LabelValue(tc.labelHost), - } - - group := makeGroup([]model.LabelSet{lset}) - - groups := DiscoveredGroups{"test": []*targetgroup.Group{group}} - result := FilterGroups(groups, tc.inputHost, relabelConfig) - - require.NotNil(t, result["test"]) - if tc.shouldRemove { - require.NotEqual(t, len(result["test"][0].Targets), len(groups["test"][0].Targets)) - } else { - require.Equal(t, len(result["test"][0].Targets), len(groups["test"][0].Targets)) - } - }) - } -} - -func TestHostFilter_PatchSD(t *testing.T) { - rawInput := util.Untab(` -- job_name: default - kubernetes_sd_configs: - - role: service - - role: pod`) - - expect := util.Untab(` -- job_name: default - honor_timestamps: true - metrics_path: /metrics - scheme: http - track_timestamps_staleness: false - follow_redirects: true - enable_http2: true - kubernetes_sd_configs: - - role: service - kubeconfig_file: "" - follow_redirects: true - enable_http2: true - - role: pod - follow_redirects: true - enable_http2: true - kubeconfig_file: "" - selectors: - - role: pod - field: spec.nodeName=myhost - `) - - var input []*config.ScrapeConfig - err := yaml.Unmarshal([]byte(rawInput), &input) - require.NoError(t, err) - - NewHostFilter("myhost", nil).PatchSD(input) - - output, err := yaml.Marshal(input) - require.NoError(t, err) - require.YAMLEq(t, expect, string(output)) -} diff --git a/internal/static/metrics/instance/instance.go b/internal/static/metrics/instance/instance.go index a0de217627..db8e22109c 100644 --- a/internal/static/metrics/instance/instance.go +++ b/internal/static/metrics/instance/instance.go @@ -2,37 +2,18 @@ package instance import ( - "bytes" - "context" "crypto/md5" "encoding/hex" "encoding/json" "errors" "fmt" - "math" - "net/http" - "os" - "path/filepath" - "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/grafana/agent/internal/agentseed" - "github.com/grafana/agent/internal/static/metrics/wal" "github.com/grafana/agent/internal/useragent" - "github.com/grafana/agent/internal/util" - "github.com/oklog/run" - "github.com/prometheus/client_golang/prometheus" - config_util "github.com/prometheus/common/config" "github.com/prometheus/prometheus/config" - "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/model/relabel" - "github.com/prometheus/prometheus/model/timestamp" "github.com/prometheus/prometheus/scrape" - "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/storage/remote" - "go.uber.org/atomic" "gopkg.in/yaml.v2" ) @@ -195,578 +176,6 @@ func (c *Config) ApplyDefaults(global GlobalConfig) error { return nil } -// Clone makes a deep copy of the config along with global settings. -func (c *Config) Clone() (Config, error) { - bb, err := MarshalConfig(c, false) - if err != nil { - return Config{}, err - } - cp, err := UnmarshalConfig(bytes.NewReader(bb)) - if err != nil { - return Config{}, err - } - cp.global = c.global - - // Some tests will trip up on this; the marshal/unmarshal cycle might set - // an empty slice to nil. Set it back to an empty slice if we detect this - // happening. - if cp.ScrapeConfigs == nil && c.ScrapeConfigs != nil { - cp.ScrapeConfigs = []*config.ScrapeConfig{} - } - if cp.RemoteWrite == nil && c.RemoteWrite != nil { - cp.RemoteWrite = []*config.RemoteWriteConfig{} - } - - return *cp, nil -} - -type walStorageFactory func(reg prometheus.Registerer) (walStorage, error) - -// Instance is an individual metrics collector and remote_writer. -type Instance struct { - // All fields in the following block may be accessed and modified by - // concurrently running goroutines. - // - // Note that all Prometheus components listed here may be nil at any - // given time; methods reading them should take care to do nil checks. - mut sync.Mutex - cfg Config - wal walStorage - discovery *discoveryService - readyScrapeManager *readyScrapeManager - remoteStore *remote.Storage - storage storage.Storage - - // ready is set to true after the initialization process finishes - ready atomic.Bool - - hostFilter *HostFilter - - logger log.Logger - - reg prometheus.Registerer - newWal walStorageFactory - writeHandler http.Handler -} - -// New creates a new Instance with a directory for storing the WAL. The instance -// will not start until Run is called on the instance. -func New(reg prometheus.Registerer, cfg Config, walDir string, logger log.Logger) (*Instance, error) { - logger = log.With(logger, "instance", cfg.Name) - - instWALDir := filepath.Join(walDir, cfg.Name) - - newWal := func(reg prometheus.Registerer) (walStorage, error) { - return wal.NewStorage(logger, reg, instWALDir) - } - - return newInstance(cfg, reg, logger, newWal) -} - -func newInstance(cfg Config, reg prometheus.Registerer, logger log.Logger, newWal walStorageFactory) (*Instance, error) { - hostname, err := Hostname() - if err != nil { - return nil, fmt.Errorf("failed to get hostname: %w", err) - } - - i := &Instance{ - cfg: cfg, - logger: logger, - hostFilter: NewHostFilter(hostname, cfg.HostFilterRelabelConfigs), - - reg: reg, - newWal: newWal, - - readyScrapeManager: &readyScrapeManager{}, - } - - return i, nil -} - -// Run starts the instance, initializing Prometheus components, and will -// continue to run until an error happens during execution or the provided -// context is cancelled. -// -// Run may be re-called after exiting, as components will be reinitialized each -// time Run is called. -func (i *Instance) Run(ctx context.Context) error { - // i.cfg may change at any point in the middle of this method but not in a way - // that affects any of the code below; rather than grabbing a mutex every time - // we want to read the config, we'll simplify the access and just grab a copy - // now. - i.mut.Lock() - cfg := i.cfg - i.mut.Unlock() - - level.Debug(i.logger).Log("msg", "initializing instance", "name", cfg.Name) - - // trackingReg wraps the register for the instance to make sure that if Run - // exits, any metrics Prometheus registers are removed and can be - // re-registered if Run is called again. - trackingReg := util.WrapWithUnregisterer(i.reg) - defer trackingReg.UnregisterAll() - - if err := i.initialize(ctx, trackingReg, &cfg); err != nil { - level.Error(i.logger).Log("msg", "failed to initialize instance", "err", err) - return fmt.Errorf("failed to initialize instance: %w", err) - } - - // The actors defined here are defined in the order we want them to shut down. - // Primarily, we want to ensure that the following shutdown order is - // maintained: - // 1. The scrape manager stops - // 2. WAL storage is closed - // 3. Remote write storage is closed - // This is done to allow the instance to write stale markers for all active - // series. - rg := runGroupWithContext(ctx) - - { - // Target Discovery - rg.Add(i.discovery.Run, i.discovery.Stop) - } - { - // Truncation loop - ctx, contextCancel := context.WithCancel(context.Background()) - defer contextCancel() - rg.Add( - func() error { - i.truncateLoop(ctx, i.wal, &cfg) - level.Info(i.logger).Log("msg", "truncation loop stopped") - return nil - }, - func(err error) { - level.Info(i.logger).Log("msg", "stopping truncation loop...") - contextCancel() - }, - ) - } - { - sm, err := i.readyScrapeManager.Get() - if err != nil { - level.Error(i.logger).Log("msg", "failed to get scrape manager") - return err - } - - // Scrape manager - rg.Add( - func() error { - err := sm.Run(i.discovery.SyncCh()) - level.Info(i.logger).Log("msg", "scrape manager stopped") - return err - }, - func(err error) { - // The scrape manager is closed first to allow us to write staleness - // markers without receiving new samples from scraping in the meantime. - level.Info(i.logger).Log("msg", "stopping scrape manager...") - sm.Stop() - - // On a graceful shutdown, write staleness markers. If something went - // wrong, then the instance will be relaunched. - if err == nil && cfg.WriteStaleOnShutdown { - level.Info(i.logger).Log("msg", "writing staleness markers...") - err := i.wal.WriteStalenessMarkers(i.getRemoteWriteTimestamp) - if err != nil { - level.Error(i.logger).Log("msg", "error writing staleness markers", "err", err) - } - } - - // Closing the storage closes both the WAL storage and remote wrte - // storage. - level.Info(i.logger).Log("msg", "closing storage...") - if err := i.storage.Close(); err != nil { - level.Error(i.logger).Log("msg", "error stopping storage", "err", err) - } - }, - ) - } - - level.Debug(i.logger).Log("msg", "running instance", "name", cfg.Name) - i.ready.Store(true) - err := rg.Run() - if err != nil { - level.Error(i.logger).Log("msg", "agent instance stopped with error", "err", err) - } - return err -} - -// initialize sets up the various Prometheus components with their initial -// settings. initialize will be called each time the Instance is run. Prometheus -// components cannot be reused after they are stopped so we need to recreate them -// each run. -func (i *Instance) initialize(ctx context.Context, reg prometheus.Registerer, cfg *Config) error { - i.mut.Lock() - defer i.mut.Unlock() - - if cfg.HostFilter { - i.hostFilter.PatchSD(cfg.ScrapeConfigs) - } - - var err error - - i.wal, err = i.newWal(reg) - if err != nil { - return fmt.Errorf("error creating WAL: %w", err) - } - - i.writeHandler = remote.NewWriteHandler(i.logger, reg, i.wal) - - i.discovery, err = i.newDiscoveryManager(ctx, cfg) - if err != nil { - return fmt.Errorf("error creating discovery manager: %w", err) - } - - i.readyScrapeManager = &readyScrapeManager{} - - // Set up the remote storage - remoteLogger := log.With(i.logger, "component", "remote") - i.remoteStore = remote.NewStorage(remoteLogger, reg, i.wal.StartTime, i.wal.Directory(), cfg.RemoteFlushDeadline, i.readyScrapeManager) - uid := agentseed.Get().UID - for _, rw := range cfg.RemoteWrite { - if rw.Headers == nil { - rw.Headers = map[string]string{} - } - rw.Headers[agentseed.HeaderName] = uid - } - err = i.remoteStore.ApplyConfig(&config.Config{ - GlobalConfig: cfg.global.Prometheus, - RemoteWriteConfigs: cfg.RemoteWrite, - }) - if err != nil { - return fmt.Errorf("failed applying config to remote storage: %w", err) - } - - i.storage = storage.NewFanout(i.logger, i.wal, i.remoteStore) - - opts := &scrape.Options{ - ExtraMetrics: cfg.global.ExtraMetrics, - HTTPClientOptions: []config_util.HTTPClientOption{}, - } - - if cfg.global.DisableKeepAlives { - opts.HTTPClientOptions = append(opts.HTTPClientOptions, config_util.WithKeepAlivesDisabled()) - } - if cfg.global.IdleConnTimeout > 0 { - opts.HTTPClientOptions = append(opts.HTTPClientOptions, config_util.WithIdleConnTimeout(cfg.global.IdleConnTimeout)) - } - scrapeManager := newScrapeManager(opts, log.With(i.logger, "component", "scrape manager"), i.storage) - err = scrapeManager.ApplyConfig(&config.Config{ - GlobalConfig: cfg.global.Prometheus, - ScrapeConfigs: cfg.ScrapeConfigs, - }) - if err != nil { - return fmt.Errorf("failed applying config to scrape manager: %w", err) - } - - i.readyScrapeManager.Set(scrapeManager) - - return nil -} - -// Ready returns true if the Instance has been initialized and is ready -// to start scraping and delivering metrics. -func (i *Instance) Ready() bool { - return i.ready.Load() -} - -// Update accepts a new Config for the Instance and will dynamically update any -// running Prometheus components with the new values from Config. Update will -// return an ErrInvalidUpdate if the Update could not be applied. -func (i *Instance) Update(c Config) (err error) { - i.mut.Lock() - defer i.mut.Unlock() - - // It's only (currently) valid to update scrape_configs and remote_write, so - // if any other field has changed here, return the error. - switch { - // This first case should never happen in practice but it's included here for - // completion’s sake. - case i.cfg.Name != c.Name: - err = errImmutableField{Field: "name"} - case i.cfg.HostFilter != c.HostFilter: - err = errImmutableField{Field: "host_filter"} - case i.cfg.WALTruncateFrequency != c.WALTruncateFrequency: - err = errImmutableField{Field: "wal_truncate_frequency"} - case i.cfg.RemoteFlushDeadline != c.RemoteFlushDeadline: - err = errImmutableField{Field: "remote_flush_deadline"} - case i.cfg.WriteStaleOnShutdown != c.WriteStaleOnShutdown: - err = errImmutableField{Field: "write_stale_on_shutdown"} - } - if err != nil { - return ErrInvalidUpdate{Inner: err} - } - - // Check to see if the components exist yet. - if i.discovery == nil || i.remoteStore == nil || i.readyScrapeManager == nil { - return ErrInvalidUpdate{ - Inner: fmt.Errorf("cannot dynamically update because instance is not running"), - } - } - - // NOTE(rfratto): Prometheus applies configs in a specific order to ensure - // flow from service discovery down to the WAL continues working properly. - // - // Keep the following order below: - // - // 1. Local config - // 2. Remote Store - // 3. Scrape Manager - // 4. Discovery Manager - - originalConfig := i.cfg - defer func() { - if err != nil { - i.cfg = originalConfig - } - }() - i.cfg = c - - i.hostFilter.SetRelabels(c.HostFilterRelabelConfigs) - if c.HostFilter { - // N.B.: only call PatchSD if HostFilter is enabled since it - // mutates what targets will be discovered. - i.hostFilter.PatchSD(c.ScrapeConfigs) - } - - err = i.remoteStore.ApplyConfig(&config.Config{ - GlobalConfig: c.global.Prometheus, - RemoteWriteConfigs: c.RemoteWrite, - }) - if err != nil { - return fmt.Errorf("error applying new remote_write configs: %w", err) - } - - sm, err := i.readyScrapeManager.Get() - if err != nil { - return fmt.Errorf("couldn't get scrape manager to apply new scrape configs: %w", err) - } - err = sm.ApplyConfig(&config.Config{ - GlobalConfig: c.global.Prometheus, - ScrapeConfigs: c.ScrapeConfigs, - }) - if err != nil { - return fmt.Errorf("error applying updated configs to scrape manager: %w", err) - } - - sdConfigs := map[string]discovery.Configs{} - for _, v := range c.ScrapeConfigs { - sdConfigs[v.JobName] = v.ServiceDiscoveryConfigs - } - err = i.discovery.Manager.ApplyConfig(sdConfigs) - if err != nil { - return fmt.Errorf("failed applying configs to discovery manager: %w", err) - } - - return nil -} - -// TargetsActive returns the set of active targets from the scrape manager. Returns nil -// if the scrape manager is not ready yet. -func (i *Instance) TargetsActive() map[string][]*scrape.Target { - i.mut.Lock() - defer i.mut.Unlock() - - if i.readyScrapeManager == nil { - return nil - } - - mgr, err := i.readyScrapeManager.Get() - if err == ErrNotReady { - return nil - } else if err != nil { - level.Error(i.logger).Log("msg", "failed to get scrape manager when collecting active targets", "err", err) - return nil - } - return mgr.TargetsActive() -} - -// StorageDirectory returns the directory where this Instance is writing series -// and samples to for the WAL. -func (i *Instance) StorageDirectory() string { - return i.wal.Directory() -} - -// WriteHandler returns an HTTP handler for pushing metrics directly into the -// instance's WAL. -func (i *Instance) WriteHandler() http.Handler { - return i.writeHandler -} - -// Appender returns a storage.Appender from the instance's WAL -func (i *Instance) Appender(ctx context.Context) storage.Appender { - return i.wal.Appender(ctx) -} - -type discoveryService struct { - Manager *discovery.Manager - - RunFunc func() error - StopFunc func(err error) - SyncChFunc func() GroupChannel -} - -func (s *discoveryService) Run() error { return s.RunFunc() } -func (s *discoveryService) Stop(err error) { s.StopFunc(err) } -func (s *discoveryService) SyncCh() GroupChannel { return s.SyncChFunc() } - -// newDiscoveryManager returns an implementation of a runnable service -// that outputs discovered targets to a channel. The implementation -// uses the Prometheus Discovery Manager. Targets will be filtered -// if the instance is configured to perform host filtering. -func (i *Instance) newDiscoveryManager(ctx context.Context, cfg *Config) (*discoveryService, error) { - ctx, cancel := context.WithCancel(ctx) - - logger := log.With(i.logger, "component", "discovery manager") - manager := discovery.NewManager(ctx, logger, discovery.Name("scrape")) - - // TODO(rfratto): refactor this to a function? - // TODO(rfratto): ensure job name name is unique - c := map[string]discovery.Configs{} - for _, v := range cfg.ScrapeConfigs { - c[v.JobName] = v.ServiceDiscoveryConfigs - } - err := manager.ApplyConfig(c) - if err != nil { - cancel() - level.Error(i.logger).Log("msg", "failed applying config to discovery manager", "err", err) - return nil, fmt.Errorf("failed applying config to discovery manager: %w", err) - } - - rg := runGroupWithContext(ctx) - - // Run the manager - rg.Add(func() error { - err := manager.Run() - level.Info(i.logger).Log("msg", "discovery manager stopped") - return err - }, func(err error) { - level.Info(i.logger).Log("msg", "stopping discovery manager...") - cancel() - }) - - syncChFunc := manager.SyncCh - - // If host filtering is enabled, run it and use its channel for discovered - // targets. - if cfg.HostFilter { - rg.Add(func() error { - i.hostFilter.Run(manager.SyncCh()) - level.Info(i.logger).Log("msg", "host filterer stopped") - return nil - }, func(_ error) { - level.Info(i.logger).Log("msg", "stopping host filterer...") - i.hostFilter.Stop() - }) - - syncChFunc = i.hostFilter.SyncCh - } - - return &discoveryService{ - Manager: manager, - - RunFunc: rg.Run, - StopFunc: rg.Stop, - SyncChFunc: syncChFunc, - }, nil -} - -func (i *Instance) truncateLoop(ctx context.Context, wal walStorage, cfg *Config) { - // Track the last timestamp we truncated for to prevent segments from getting - // deleted until at least some new data has been sent. - var lastTs int64 = math.MinInt64 - - for { - select { - case <-ctx.Done(): - return - case <-time.After(cfg.WALTruncateFrequency): - // The timestamp ts is used to determine which series are not receiving - // samples and may be deleted from the WAL. Their most recent append - // timestamp is compared to ts, and if that timestamp is older than ts, - // they are considered inactive and may be deleted. - // - // Subtracting a duration from ts will delay when it will be considered - // inactive and scheduled for deletion. - ts := i.getRemoteWriteTimestamp() - i.cfg.MinWALTime.Milliseconds() - if ts < 0 { - ts = 0 - } - - // Network issues can prevent the result of getRemoteWriteTimestamp from - // changing. We don't want data in the WAL to grow forever, so we set a cap - // on the maximum age data can be. If our ts is older than this cutoff point, - // we'll shift it forward to start deleting very stale data. - if maxTS := timestamp.FromTime(time.Now().Add(-i.cfg.MaxWALTime)); ts < maxTS { - ts = maxTS - } - - if ts == lastTs { - level.Debug(i.logger).Log("msg", "not truncating the WAL, remote_write timestamp is unchanged", "ts", ts) - continue - } - lastTs = ts - - level.Debug(i.logger).Log("msg", "truncating the WAL", "ts", ts) - err := wal.Truncate(ts) - if err != nil { - // The only issue here is larger disk usage and a greater replay time, - // so we'll only log this as a warning. - level.Warn(i.logger).Log("msg", "could not truncate WAL", "err", err) - } - } - } -} - -// getRemoteWriteTimestamp looks up the last successful remote write timestamp. -// This is passed to wal.Storage for its truncation. If no remote write sections -// are configured, getRemoteWriteTimestamp returns the current time. -func (i *Instance) getRemoteWriteTimestamp() int64 { - i.mut.Lock() - defer i.mut.Unlock() - - if len(i.cfg.RemoteWrite) == 0 { - return timestamp.FromTime(time.Now()) - } - - if i.remoteStore == nil { - // Instance still being initialized; start at 0. - return 0 - } - return i.remoteStore.LowestSentTimestamp() -} - -// walStorage is an interface satisfied by wal.Storage, and created for testing. -type walStorage interface { - // walStorage implements Queryable/ChunkQueryable for compatibility, but is unused. - storage.Queryable - storage.ChunkQueryable - - Directory() string - - StartTime() (int64, error) - WriteStalenessMarkers(remoteTsFunc func() int64) error - Appender(context.Context) storage.Appender - Truncate(mint int64) error - - Close() error -} - -// Hostname retrieves the hostname identifying the machine the process is -// running on. It will return the value of $HOSTNAME, if defined, and fall -// back to Go's os.Hostname. -func Hostname() (string, error) { - hostname := os.Getenv("HOSTNAME") - if hostname != "" { - return hostname, nil - } - - hostname, err := os.Hostname() - if err != nil { - return "", fmt.Errorf("failed to get hostname: %w", err) - } - return hostname, nil -} - func getHash(data interface{}) (string, error) { bytes, err := json.Marshal(data) if err != nil { @@ -775,73 +184,3 @@ func getHash(data interface{}) (string, error) { hash := md5.Sum(bytes) return hex.EncodeToString(hash[:]), nil } - -var managerMtx sync.Mutex - -func newScrapeManager(o *scrape.Options, logger log.Logger, app storage.Appendable) *scrape.Manager { - // scrape.NewManager modifies a global variable in Prometheus. To avoid a - // data race of modifying that global, we lock a mutex here briefly. - managerMtx.Lock() - defer managerMtx.Unlock() - return scrape.NewManager(o, logger, app) -} - -type runGroupContext struct { - cancel context.CancelFunc - - g *run.Group -} - -// runGroupWithContext creates a new run.Group that will be stopped if the -// context gets canceled in addition to the normal behavior of stopping -// when any of the actors stop. -func runGroupWithContext(ctx context.Context) *runGroupContext { - ctx, cancel := context.WithCancel(ctx) - - var g run.Group - g.Add(func() error { - <-ctx.Done() - return nil - }, func(_ error) { - cancel() - }) - - return &runGroupContext{cancel: cancel, g: &g} -} - -func (rg *runGroupContext) Add(execute func() error, interrupt func(error)) { - rg.g.Add(execute, interrupt) -} - -func (rg *runGroupContext) Run() error { return rg.g.Run() } -func (rg *runGroupContext) Stop(_ error) { rg.cancel() } - -// ErrNotReady is returned when the scrape manager is used but has not been -// initialized yet. -var ErrNotReady = errors.New("Scrape manager not ready") - -// readyScrapeManager allows a scrape manager to be retrieved. Even if it's set at a later point in time. -type readyScrapeManager struct { - mtx sync.RWMutex - m *scrape.Manager -} - -// Set the scrape manager. -func (rm *readyScrapeManager) Set(m *scrape.Manager) { - rm.mtx.Lock() - defer rm.mtx.Unlock() - - rm.m = m -} - -// Get the scrape manager. If is not ready, return an error. -func (rm *readyScrapeManager) Get() (*scrape.Manager, error) { - rm.mtx.RLock() - defer rm.mtx.RUnlock() - - if rm.m != nil { - return rm.m, nil - } - - return nil, ErrNotReady -} diff --git a/internal/static/metrics/instance/instance_integration_test.go b/internal/static/metrics/instance/instance_integration_test.go deleted file mode 100644 index 71fc2ed8b5..0000000000 --- a/internal/static/metrics/instance/instance_integration_test.go +++ /dev/null @@ -1,281 +0,0 @@ -package instance - -import ( - "context" - "fmt" - "net" - "net/http" - "os" - "strings" - "sync" - "testing" - "time" - - "github.com/go-kit/log" - "github.com/gorilla/mux" - "github.com/grafana/agent/internal/util" - "github.com/grafana/dskit/backoff" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" - "github.com/stretchr/testify/require" - "go.uber.org/atomic" -) - -var slowBackoff = backoff.Config{ - MinBackoff: 1 * time.Second, - MaxBackoff: 1 * time.Minute, - MaxRetries: 10, -} - -// TestInstance_Update performs a full integration test by doing the following: -// -// 1. Launching an HTTP server which can be scraped and also mocks the remote_write -// endpoint. -// 2. Creating an instance config with no scrape_configs or remote_write configs. -// 3. Updates the instance with a scrape_config and remote_write. -// 4. Validates that after 15 seconds, the scrape endpoint and remote_write -// endpoint has been called. -func TestInstance_Update(t *testing.T) { - logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) - - walDir := t.TempDir() - - var ( - scraped = atomic.NewBool(false) - pushed = atomic.NewBool(false) - ) - - r := mux.NewRouter() - r.HandleFunc("/metrics", func(w http.ResponseWriter, r *http.Request) { - scraped.Store(true) - promhttp.Handler().ServeHTTP(w, r) - }) - r.HandleFunc("/push", func(w http.ResponseWriter, r *http.Request) { - pushed.Store(true) - // We don't particularly care what was pushed to us, so we'll ignore - // everything here; we just want to make sure the endpoint was invoked. - }) - - // Start a server for exposing the router. - l, err := net.Listen("tcp", "127.0.0.1:0") - require.NoError(t, err) - defer l.Close() - go func() { - _ = http.Serve(l, r) - }() - - // Create a new instance where it's not scraping or writing anything by default. - initialConfig := loadConfig(t, ` -name: integration_test -scrape_configs: [] -remote_write: [] -`) - inst, err := New(prometheus.NewRegistry(), initialConfig, walDir, logger) - require.NoError(t, err) - - instCtx, cancel := context.WithCancel(context.Background()) - var wg sync.WaitGroup - defer func() { - cancel() - wg.Wait() - }() - - wg.Add(1) - go func() { - defer wg.Done() - err := inst.Run(instCtx) - require.NoError(t, err) - }() - - // Update the config with a single scrape_config and remote_write. - newConfig := loadConfig(t, fmt.Sprintf(` -name: integration_test -scrape_configs: - - job_name: test_scrape - scrape_interval: 5s - static_configs: - - targets: ['%[1]s'] -remote_write: - - url: http://%[1]s/push -`, l.Addr())) - - // Wait for the instance to be ready before updating. - util.EventuallyWithBackoff(t, func(t require.TestingT) { - require.True(t, inst.Ready()) - }, slowBackoff) - - // Wait for the instance to update (it might not be ready yet and would - // return an error until everything is initialized), and then wait again for - // the configs to apply and set the scraped and pushed atomic variables, - // indicating that the Prometheus components successfully updated. - util.EventuallyWithBackoff(t, func(t require.TestingT) { - err := inst.Update(newConfig) - if err != nil { - logger.Log("msg", "failed to update instance", "err", err) - } - require.NoError(t, err) - }, slowBackoff) - - util.EventuallyWithBackoff(t, func(t require.TestingT) { - require.True(t, scraped.Load() && pushed.Load()) - }, slowBackoff) -} - -func TestInstance_Update_Failed(t *testing.T) { - logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) - - walDir := t.TempDir() - - r := mux.NewRouter() - r.HandleFunc("/metrics", func(w http.ResponseWriter, r *http.Request) { - promhttp.Handler().ServeHTTP(w, r) - }) - r.HandleFunc("/push", func(w http.ResponseWriter, r *http.Request) {}) - - // Start a server for exposing the router. - l, err := net.Listen("tcp", "127.0.0.1:0") - require.NoError(t, err) - defer l.Close() - go func() { - _ = http.Serve(l, r) - }() - - // Create a new instance where it's not scraping or writing anything by default. - initialConfig := loadConfig(t, ` -name: integration_test -scrape_configs: [] -remote_write: [] -`) - inst, err := New(prometheus.NewRegistry(), initialConfig, walDir, logger) - require.NoError(t, err) - - instCtx, cancel := context.WithCancel(context.Background()) - var wg sync.WaitGroup - defer func() { - cancel() - wg.Wait() - }() - - wg.Add(1) - go func() { - defer wg.Done() - err := inst.Run(instCtx) - require.NoError(t, err) - }() - - // Create a new config to use for updating - newConfig := loadConfig(t, fmt.Sprintf(` -name: integration_test -scrape_configs: - - job_name: test_scrape - scrape_interval: 5s - static_configs: - - targets: ['%[1]s'] -remote_write: - - url: http://%[1]s/push -`, l.Addr())) - - // Make sure the instance can successfully update first - util.Eventually(t, func(t require.TestingT) { - err := inst.Update(newConfig) - if err != nil { - logger.Log("msg", "failed to update instance", "err", err) - } - require.NoError(t, err) - }) - - // Now force an update back to the original config to fail - inst.readyScrapeManager.Set(nil) - require.NotNil(t, inst.Update(initialConfig), "update should have failed") - require.Equal(t, newConfig, inst.cfg, "config did not roll back") -} - -// TestInstance_Update_InvalidChanges runs an instance with a blank initial -// config and performs various unacceptable updates that should return an -// error. -func TestInstance_Update_InvalidChanges(t *testing.T) { - logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) - - walDir := t.TempDir() - - // Create a new instance where it's not scraping or writing anything by default. - initialConfig := loadConfig(t, ` -name: integration_test -scrape_configs: [] -remote_write: [] -`) - inst, err := New(prometheus.NewRegistry(), initialConfig, walDir, logger) - require.NoError(t, err) - - instCtx, cancel := context.WithCancel(context.Background()) - var wg sync.WaitGroup - defer func() { - cancel() - wg.Wait() - }() - - wg.Add(1) - go func() { - defer wg.Done() - err := inst.Run(instCtx) - require.NoError(t, err) - }() - - // Do a no-op update that succeeds to ensure that the instance is running. - util.Eventually(t, func(t require.TestingT) { - err := inst.Update(initialConfig) - if err != nil { - logger.Log("msg", "failed to update instance", "err", err) - } - require.NoError(t, err) - }) - - tt := []struct { - name string - mut func(c *Config) - expect string - }{ - { - name: "name changed", - mut: func(c *Config) { c.Name = "changed name" }, - expect: "name cannot be changed dynamically", - }, - { - name: "host_filter changed", - mut: func(c *Config) { c.HostFilter = true }, - expect: "host_filter cannot be changed dynamically", - }, - { - name: "wal_truncate_frequency changed", - mut: func(c *Config) { c.WALTruncateFrequency *= 2 }, - expect: "wal_truncate_frequency cannot be changed dynamically", - }, - { - name: "remote_flush_deadline changed", - mut: func(c *Config) { c.RemoteFlushDeadline *= 2 }, - expect: "remote_flush_deadline cannot be changed dynamically", - }, - { - name: "write_stale_on_shutdown changed", - mut: func(c *Config) { c.WriteStaleOnShutdown = true }, - expect: "write_stale_on_shutdown cannot be changed dynamically", - }, - } - - for _, tc := range tt { - t.Run(tc.name, func(t *testing.T) { - mutatedConfig := initialConfig - tc.mut(&mutatedConfig) - - err := inst.Update(mutatedConfig) - require.EqualError(t, err, tc.expect) - }) - } -} - -func loadConfig(t *testing.T, s string) Config { - cfg, err := UnmarshalConfig(strings.NewReader(s)) - require.NoError(t, err) - require.NoError(t, cfg.ApplyDefaults(DefaultGlobalConfig)) - return *cfg -} diff --git a/internal/static/metrics/instance/instance_test.go b/internal/static/metrics/instance/instance_test.go index 0f97aecac2..33e6551f03 100644 --- a/internal/static/metrics/instance/instance_test.go +++ b/internal/static/metrics/instance/instance_test.go @@ -1,28 +1,13 @@ package instance import ( - "context" "fmt" - "net/http/httptest" - "os" - "path" "strings" - "sync" "testing" - "time" - "github.com/go-kit/log" - "github.com/grafana/agent/internal/util" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/discovery" - "github.com/prometheus/prometheus/model/exemplar" - "github.com/prometheus/prometheus/model/histogram" - "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/model/metadata" - "github.com/prometheus/prometheus/storage" "github.com/stretchr/testify/require" ) @@ -184,238 +169,3 @@ remote_write: require.NoError(t, cfg.ApplyDefaults(DefaultGlobalConfig)) require.NotEmpty(t, cfg.RemoteWrite[0].Name) } - -func TestInstance_Path(t *testing.T) { - scrapeAddr, closeSrv := getTestServer(t) - defer closeSrv() - - walDir := t.TempDir() - - globalConfig := getTestGlobalConfig(t) - - cfg := getTestConfig(t, &globalConfig, scrapeAddr) - cfg.WALTruncateFrequency = time.Hour - cfg.RemoteFlushDeadline = time.Hour - - logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) - inst, err := New(prometheus.NewRegistry(), cfg, walDir, logger) - require.NoError(t, err) - runInstance(t, inst) - - // / path should exist for WAL - util.Eventually(t, func(t require.TestingT) { - _, err := os.Stat(path.Join(walDir, "test")) - require.NoError(t, err) - }) -} - -// TestInstance tests that discovery and scraping are working by using a mock -// instance of the WAL storage and testing that samples get written to it. -// This test touches most of Instance and is enough for a basic integration test. -func TestInstance(t *testing.T) { - scrapeAddr, closeSrv := getTestServer(t) - defer closeSrv() - - walDir := t.TempDir() - - globalConfig := getTestGlobalConfig(t) - cfg := getTestConfig(t, &globalConfig, scrapeAddr) - cfg.WALTruncateFrequency = time.Hour - cfg.RemoteFlushDeadline = time.Hour - - mockStorage := mockWalStorage{ - series: make(map[storage.SeriesRef]int), - directory: walDir, - } - newWal := func(_ prometheus.Registerer) (walStorage, error) { return &mockStorage, nil } - - logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) - inst, err := newInstance(cfg, nil, logger, newWal) - require.NoError(t, err) - runInstance(t, inst) - - // Wait until mockWalStorage has had a series added to it. - util.EventuallyWithBackoff(t, func(t require.TestingT) { - mockStorage.mut.Lock() - defer mockStorage.mut.Unlock() - require.True(t, len(mockStorage.series) > 0) - }, slowBackoff) -} - -// TestInstance_Recreate ensures that creating an instance with the same name twice -// does not cause any duplicate metrics registration that leads to a panic. -func TestInstance_Recreate(t *testing.T) { - scrapeAddr, closeSrv := getTestServer(t) - defer closeSrv() - - walDir := t.TempDir() - - globalConfig := getTestGlobalConfig(t) - - cfg := getTestConfig(t, &globalConfig, scrapeAddr) - cfg.Name = "recreate_test" - cfg.WALTruncateFrequency = time.Hour - cfg.RemoteFlushDeadline = time.Hour - - logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) - currentReg := prometheus.NewRegistry() - inst, err := New(currentReg, cfg, walDir, logger) - require.NoError(t, err) - - ctx, cancel := context.WithCancel(context.Background()) - exited := make(chan bool) - go func() { - err := inst.Run(ctx) - close(exited) - - if err != nil { - require.Equal(t, context.Canceled, err) - } - }() - - time.Sleep(1 * time.Second) - cancel() - <-exited - - // Recreate the instance, no panic should happen. - require.NotPanics(t, func() { - inst, err := New(currentReg, cfg, walDir, logger) - require.NoError(t, err) - runInstance(t, inst) - - time.Sleep(1 * time.Second) - }) -} - -func getTestServer(t *testing.T) (addr string, closeFunc func()) { - t.Helper() - - reg := prometheus.NewRegistry() - - testCounter := prometheus.NewCounter(prometheus.CounterOpts{ - Name: "test_metric_total", - }) - testCounter.Inc() - reg.MustRegister(testCounter) - - handler := promhttp.HandlerFor(reg, promhttp.HandlerOpts{}) - httpSrv := httptest.NewServer(handler) - return httpSrv.Listener.Addr().String(), httpSrv.Close -} - -func getTestGlobalConfig(t *testing.T) GlobalConfig { - t.Helper() - - return GlobalConfig{ - Prometheus: config.GlobalConfig{ - ScrapeInterval: model.Duration(time.Millisecond * 50), - ScrapeTimeout: model.Duration(time.Millisecond * 25), - EvaluationInterval: model.Duration(time.Hour), - }, - } -} - -func getTestConfig(t *testing.T, global *GlobalConfig, scrapeAddr string) Config { - t.Helper() - - scrapeCfg := config.DefaultScrapeConfig - scrapeCfg.JobName = "test" - scrapeCfg.ScrapeInterval = global.Prometheus.ScrapeInterval - scrapeCfg.ScrapeTimeout = global.Prometheus.ScrapeTimeout - scrapeCfg.ServiceDiscoveryConfigs = discovery.Configs{ - discovery.StaticConfig{{ - Targets: []model.LabelSet{{ - model.AddressLabel: model.LabelValue(scrapeAddr), - }}, - Labels: model.LabelSet{}, - }}, - } - - cfg := DefaultConfig - cfg.Name = "test" - cfg.ScrapeConfigs = []*config.ScrapeConfig{&scrapeCfg} - cfg.global = *global - - return cfg -} - -type mockWalStorage struct { - storage.Queryable - storage.ChunkQueryable - - directory string - mut sync.Mutex - series map[storage.SeriesRef]int -} - -func (s *mockWalStorage) Directory() string { return s.directory } -func (s *mockWalStorage) StartTime() (int64, error) { return 0, nil } -func (s *mockWalStorage) WriteStalenessMarkers(f func() int64) error { return nil } -func (s *mockWalStorage) Close() error { return nil } -func (s *mockWalStorage) Truncate(mint int64) error { return nil } - -func (s *mockWalStorage) Appender(context.Context) storage.Appender { - return &mockAppender{s: s} -} - -type mockAppender struct { - s *mockWalStorage -} - -func (a *mockAppender) Append(ref storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) { - if ref == 0 { - return a.Add(l, t, v) - } - return ref, a.AddFast(ref, t, v) -} - -// Add adds a new series and sets its written count to 1. -func (a *mockAppender) Add(l labels.Labels, t int64, v float64) (storage.SeriesRef, error) { - a.s.mut.Lock() - defer a.s.mut.Unlock() - - hash := l.Hash() - a.s.series[storage.SeriesRef(hash)] = 1 - return storage.SeriesRef(hash), nil -} - -// AddFast increments the number of writes to an existing series. -func (a *mockAppender) AddFast(ref storage.SeriesRef, t int64, v float64) error { - a.s.mut.Lock() - defer a.s.mut.Unlock() - _, ok := a.s.series[ref] - if !ok { - return storage.ErrNotFound - } - - a.s.series[ref]++ - return nil -} - -func (a *mockAppender) AppendExemplar(ref storage.SeriesRef, l labels.Labels, e exemplar.Exemplar) (storage.SeriesRef, error) { - return 0, nil -} - -func (a *mockAppender) UpdateMetadata(ref storage.SeriesRef, l labels.Labels, m metadata.Metadata) (storage.SeriesRef, error) { - return 0, nil -} - -func (a *mockAppender) AppendHistogram(ref storage.SeriesRef, l labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { - return 0, nil -} - -func (a *mockAppender) Commit() error { - return nil -} - -func (a *mockAppender) Rollback() error { - return nil -} - -func runInstance(t *testing.T, i *Instance) { - ctx, cancel := context.WithCancel(context.Background()) - t.Cleanup(func() { cancel() }) - go require.NotPanics(t, func() { - _ = i.Run(ctx) - }) -} diff --git a/internal/static/metrics/instance/manager.go b/internal/static/metrics/instance/manager.go deleted file mode 100644 index 6bb90324fd..0000000000 --- a/internal/static/metrics/instance/manager.go +++ /dev/null @@ -1,379 +0,0 @@ -package instance - -import ( - "context" - "errors" - "fmt" - "net/http" - "sync" - "time" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" - "github.com/prometheus/prometheus/scrape" - "github.com/prometheus/prometheus/storage" -) - -var ( - instanceAbnormalExits = promauto.NewCounterVec(prometheus.CounterOpts{ - Name: "agent_metrics_instance_abnormal_exits_total", - Help: "Total number of times a Prometheus instance exited unexpectedly, causing it to be restarted.", - }, []string{"instance_name"}) - - currentActiveInstances = promauto.NewGauge(prometheus.GaugeOpts{ - Name: "agent_metrics_active_instances", - Help: "Current number of active instances being used by the agent.", - }) - - // DefaultBasicManagerConfig is the default config for the BasicManager. - DefaultBasicManagerConfig = BasicManagerConfig{ - InstanceRestartBackoff: 5 * time.Second, - } -) - -// Manager represents a set of methods for manipulating running instances at -// runtime. -type Manager interface { - // GetInstance retrieves a ManagedInstance by name. - GetInstance(name string) (ManagedInstance, error) - - // ListInstances returns all currently managed instances running - // within the Manager. The key will be the instance name from their config. - ListInstances() map[string]ManagedInstance - - // ListConfigs returns the config objects associated with a managed - // instance. The key will be the Name field from Config. - ListConfigs() map[string]Config - - // ApplyConfig creates a new Config or updates an existing Config if - // one with Config.Name already exists. - ApplyConfig(Config) error - - // DeleteConfig deletes a given managed instance based on its Config.Name. - DeleteConfig(name string) error - - // Stop stops the Manager and all managed instances. - Stop() -} - -// ManagedInstance is implemented by Instance. It is defined as an interface -// for the sake of testing from Manager implementations. -type ManagedInstance interface { - Run(ctx context.Context) error - Ready() bool - Update(c Config) error - TargetsActive() map[string][]*scrape.Target - StorageDirectory() string - Appender(ctx context.Context) storage.Appender - WriteHandler() http.Handler -} - -// BasicManagerConfig controls the operations of a BasicManager. -type BasicManagerConfig struct { - InstanceRestartBackoff time.Duration -} - -// BasicManager creates a new BasicManager, implementing the Manager interface. -// BasicManager will directly launch instances and perform no extra processing. -// -// Other implementations of Manager usually wrap a BasicManager. -type BasicManager struct { - cfgMut sync.Mutex - cfg BasicManagerConfig - logger log.Logger - - // Take care when locking mut: if you hold onto a lock of mut while calling - // Stop on a process, you will deadlock. - mut sync.Mutex - processes map[string]*managedProcess - - launch Factory -} - -// managedProcess represents a goroutine running a ManagedInstance. cancel -// requests that the goroutine should shutdown. done will be closed after the -// goroutine exists. -type managedProcess struct { - cfg Config - inst ManagedInstance - cancel context.CancelFunc - done chan bool -} - -func (p managedProcess) Stop() { - p.cancel() - <-p.done -} - -// Factory should return an unstarted instance given some config. -type Factory func(c Config) (ManagedInstance, error) - -// NewBasicManager creates a new BasicManager. The launch function will be -// invoked any time a new Config is applied. -// -// The lifecycle of any ManagedInstance returned by the launch function will -// be handled by the BasicManager. Instances will be automatically restarted -// if stopped, updated if the config changes, or removed when the Config is -// deleted. -func NewBasicManager(cfg BasicManagerConfig, logger log.Logger, launch Factory) *BasicManager { - return &BasicManager{ - cfg: cfg, - logger: logger, - processes: make(map[string]*managedProcess), - launch: launch, - } -} - -// UpdateManagerConfig updates the BasicManagerConfig. -func (m *BasicManager) UpdateManagerConfig(c BasicManagerConfig) { - m.cfgMut.Lock() - defer m.cfgMut.Unlock() - m.cfg = c -} - -// GetInstance returns the given instance by name. -func (m *BasicManager) GetInstance(name string) (ManagedInstance, error) { - m.mut.Lock() - defer m.mut.Unlock() - - process, ok := m.processes[name] - if !ok { - return nil, fmt.Errorf("instance %s does not exist", name) - } - return process.inst, nil -} - -// ListInstances returns the current active instances managed by BasicManager. -func (m *BasicManager) ListInstances() map[string]ManagedInstance { - m.mut.Lock() - defer m.mut.Unlock() - - res := make(map[string]ManagedInstance, len(m.processes)) - for name, process := range m.processes { - if process == nil { - continue - } - res[name] = process.inst - } - return res -} - -// ListConfigs lists the current active configs managed by BasicManager. -func (m *BasicManager) ListConfigs() map[string]Config { - m.mut.Lock() - defer m.mut.Unlock() - - res := make(map[string]Config, len(m.processes)) - for name, process := range m.processes { - res[name] = process.cfg - } - return res -} - -// ApplyConfig takes a Config and either starts a new managed instance or -// updates an existing managed instance. The value for Name in c is used to -// uniquely identify the Config and determine whether the Config has an -// existing associated managed instance. -func (m *BasicManager) ApplyConfig(c Config) error { - m.mut.Lock() - defer m.mut.Unlock() - - // If the config already exists, we need to update it. - proc, ok := m.processes[c.Name] - if ok { - err := proc.inst.Update(c) - - // If the instance could not be dynamically updated, we need to force the - // update by restarting it. If it failed for another reason, something - // serious went wrong and we'll completely give up without stopping the - // existing job. - if errors.Is(err, ErrInvalidUpdate{}) { - level.Info(m.logger).Log("msg", "could not dynamically update instance, will manually restart", "instance", c.Name, "reason", err) - - // NOTE: we don't return here; we fall through to spawn the new instance. - proc.Stop() - } else if err != nil { - return fmt.Errorf("failed to update instance %s: %w", c.Name, err) - } else { - level.Info(m.logger).Log("msg", "dynamically updated instance", "instance", c.Name) - - proc.cfg = c - return nil - } - } - - // Spawn a new process for the new config. - err := m.spawnProcess(c) - if err != nil { - return err - } - - currentActiveInstances.Inc() - return nil -} - -func (m *BasicManager) spawnProcess(c Config) error { - inst, err := m.launch(c) - if err != nil { - return err - } - - ctx, cancel := context.WithCancel(context.Background()) - done := make(chan bool) - - proc := &managedProcess{ - cancel: cancel, - done: done, - cfg: c, - inst: inst, - } - m.processes[c.Name] = proc - - go func() { - m.runProcess(ctx, c.Name, inst) - close(done) - - // Now that the process has stopped, we can remove it from our managed - // list. - // - // However, it's possible that a new Config may have been applied and - // overwrote the initial value in our map. We only want to delete the - // process from the map if it hasn't changed from what we initially - // set it to. - // - // We only use the instance for comparing (which will never change) because - // the instance may have dynamically been given a new config since this - // goroutine started. - m.mut.Lock() - if storedProc, exist := m.processes[c.Name]; exist && storedProc.inst == inst { - delete(m.processes, c.Name) - } - m.mut.Unlock() - - currentActiveInstances.Dec() - }() - - return nil -} - -// runProcess runs and instance and keeps it alive until it is explicitly stopped -// by cancelling the context. -func (m *BasicManager) runProcess(ctx context.Context, name string, inst ManagedInstance) { - for { - err := inst.Run(ctx) - if err != nil && err != context.Canceled { - backoff := m.instanceRestartBackoff() - - instanceAbnormalExits.WithLabelValues(name).Inc() - level.Error(m.logger).Log("msg", "instance stopped abnormally, restarting after backoff period", "err", err, "backoff", backoff, "instance", name) - time.Sleep(backoff) - } else { - level.Info(m.logger).Log("msg", "stopped instance", "instance", name) - break - } - } -} - -func (m *BasicManager) instanceRestartBackoff() time.Duration { - m.cfgMut.Lock() - defer m.cfgMut.Unlock() - return m.cfg.InstanceRestartBackoff -} - -// DeleteConfig removes a managed instance by its config name. Returns an error -// if there is no such managed instance with the given name. -func (m *BasicManager) DeleteConfig(name string) error { - m.mut.Lock() - proc, ok := m.processes[name] - if !ok { - m.mut.Unlock() - return errors.New("config does not exist") - } - m.mut.Unlock() - - // spawnProcess is responsible for removing the process from the map after it - // stops so we don't need to delete anything from m.processes here. - proc.Stop() - return nil -} - -// Stop stops the BasicManager and stops all active processes for configs. -func (m *BasicManager) Stop() { - var wg sync.WaitGroup - - // We don't need to change m.processes here; processes remove themselves - // from the map (in spawnProcess). - m.mut.Lock() - wg.Add(len(m.processes)) - for _, proc := range m.processes { - go func(proc *managedProcess) { - proc.Stop() - wg.Done() - }(proc) - } - m.mut.Unlock() - - wg.Wait() -} - -// MockManager exposes methods of the Manager interface as struct fields. -// Useful for tests. -type MockManager struct { - GetInstanceFunc func(name string) (ManagedInstance, error) - ListInstancesFunc func() map[string]ManagedInstance - ListConfigsFunc func() map[string]Config - ApplyConfigFunc func(Config) error - DeleteConfigFunc func(name string) error - StopFunc func() -} - -// GetInstance implements Manager. -func (m MockManager) GetInstance(name string) (ManagedInstance, error) { - if m.GetInstanceFunc != nil { - return m.GetInstanceFunc(name) - } - panic("GetInstanceFunc not implemented") -} - -// ListInstances implements Manager. -func (m MockManager) ListInstances() map[string]ManagedInstance { - if m.ListInstancesFunc != nil { - return m.ListInstancesFunc() - } - panic("ListInstancesFunc not implemented") -} - -// ListConfigs implements Manager. -func (m MockManager) ListConfigs() map[string]Config { - if m.ListConfigsFunc != nil { - return m.ListConfigsFunc() - } - panic("ListConfigsFunc not implemented") -} - -// ApplyConfig implements Manager. -func (m MockManager) ApplyConfig(c Config) error { - if m.ApplyConfigFunc != nil { - return m.ApplyConfigFunc(c) - } - panic("ApplyConfigFunc not implemented") -} - -// DeleteConfig implements Manager. -func (m MockManager) DeleteConfig(name string) error { - if m.DeleteConfigFunc != nil { - return m.DeleteConfigFunc(name) - } - panic("DeleteConfigFunc not implemented") -} - -// Stop implements Manager. -func (m MockManager) Stop() { - if m.StopFunc != nil { - m.StopFunc() - return - } - panic("StopFunc not implemented") -} diff --git a/internal/static/metrics/instance/manager_test.go b/internal/static/metrics/instance/manager_test.go deleted file mode 100644 index 6afed26732..0000000000 --- a/internal/static/metrics/instance/manager_test.go +++ /dev/null @@ -1,158 +0,0 @@ -package instance - -import ( - "context" - "fmt" - "net/http" - "os" - "testing" - - "github.com/go-kit/log" - "github.com/prometheus/prometheus/scrape" - "github.com/prometheus/prometheus/storage" - "github.com/stretchr/testify/require" -) - -func TestBasicManager_ApplyConfig(t *testing.T) { - logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) - - baseMock := mockInstance{ - RunFunc: func(ctx context.Context) error { - logger.Log("msg", "starting an instance") - <-ctx.Done() - return nil - }, - UpdateFunc: func(c Config) error { - return nil - }, - TargetsActiveFunc: func() map[string][]*scrape.Target { - return nil - }, - } - - t.Run("dynamic update successful", func(t *testing.T) { - spawnedCount := 0 - spawner := func(c Config) (ManagedInstance, error) { - spawnedCount++ - - newMock := baseMock - return &newMock, nil - } - - cm := NewBasicManager(DefaultBasicManagerConfig, logger, spawner) - - for i := 0; i < 10; i++ { - err := cm.ApplyConfig(Config{Name: "test"}) - require.NoError(t, err) - } - - require.Equal(t, 1, spawnedCount) - }) - - t.Run("dynamic update unsuccessful", func(t *testing.T) { - spawnedCount := 0 - spawner := func(c Config) (ManagedInstance, error) { - spawnedCount++ - - newMock := baseMock - newMock.UpdateFunc = func(c Config) error { - return ErrInvalidUpdate{ - Inner: fmt.Errorf("cannot dynamically update for testing reasons"), - } - } - return &newMock, nil - } - - cm := NewBasicManager(DefaultBasicManagerConfig, logger, spawner) - - for i := 0; i < 10; i++ { - err := cm.ApplyConfig(Config{Name: "test"}) - require.NoError(t, err) - } - - require.Equal(t, 10, spawnedCount) - }) - - t.Run("dynamic update errored", func(t *testing.T) { - spawnedCount := 0 - spawner := func(c Config) (ManagedInstance, error) { - spawnedCount++ - - newMock := baseMock - newMock.UpdateFunc = func(c Config) error { - return fmt.Errorf("something really bad happened") - } - return &newMock, nil - } - - cm := NewBasicManager(DefaultBasicManagerConfig, logger, spawner) - - // Creation should succeed - err := cm.ApplyConfig(Config{Name: "test"}) - require.NoError(t, err) - - // ...but the update should fail - err = cm.ApplyConfig(Config{Name: "test"}) - require.Error(t, err, "something really bad happened") - require.Equal(t, 1, spawnedCount) - }) -} - -type mockInstance struct { - RunFunc func(ctx context.Context) error - ReadyFunc func() bool - UpdateFunc func(c Config) error - TargetsActiveFunc func() map[string][]*scrape.Target - StorageDirectoryFunc func() string - AppenderFunc func() storage.Appender - WriteHandlerFunc func() http.Handler -} - -func (m mockInstance) Run(ctx context.Context) error { - if m.RunFunc != nil { - return m.RunFunc(ctx) - } - panic("RunFunc not provided") -} - -func (m mockInstance) Ready() bool { - if m.ReadyFunc != nil { - return m.ReadyFunc() - } - panic("ReadyFunc not provided") -} - -func (m mockInstance) Update(c Config) error { - if m.UpdateFunc != nil { - return m.UpdateFunc(c) - } - panic("UpdateFunc not provided") -} - -func (m mockInstance) TargetsActive() map[string][]*scrape.Target { - if m.TargetsActiveFunc != nil { - return m.TargetsActiveFunc() - } - panic("TargetsActiveFunc not provided") -} - -func (m mockInstance) StorageDirectory() string { - if m.StorageDirectoryFunc != nil { - return m.StorageDirectoryFunc() - } - panic("StorageDirectoryFunc not provided") -} - -func (m mockInstance) WriteHandler() http.Handler { - if m.WriteHandlerFunc != nil { - return m.WriteHandlerFunc() - } - panic("GetWriteHandlerFunc not provided") -} - -func (m mockInstance) Appender(_ context.Context) storage.Appender { - if m.AppenderFunc != nil { - return m.AppenderFunc() - } - panic("AppenderFunc not provided") -} diff --git a/internal/static/metrics/instance/modal_manager.go b/internal/static/metrics/instance/modal_manager.go index 18abb4f1ed..7308b351aa 100644 --- a/internal/static/metrics/instance/modal_manager.go +++ b/internal/static/metrics/instance/modal_manager.go @@ -2,12 +2,6 @@ package instance import ( "fmt" - "sync" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" ) // Mode controls how instances are created. @@ -42,175 +36,3 @@ func (m *Mode) UnmarshalYAML(unmarshal func(interface{}) error) error { return fmt.Errorf("unsupported instance_mode '%s'. supported values 'shared', 'distinct'", plain) } } - -// ModalManager runs instances by either grouping them or running them fully -// separately. -type ModalManager struct { - mut sync.RWMutex - mode Mode - configs map[string]Config - - changedConfigs *prometheus.CounterVec - currentActiveConfigs prometheus.Gauge - - log log.Logger - - // The ModalManager wraps around a "final" Manager that is intended to - // launch and manage instances based on Configs. This is specified here by the - // "wrapped" Manager. - // - // However, there may be another manager performing formations on the configs - // before they are passed through to wrapped. This is specified by the "active" - // Manager. - // - // If no transformations on Configs are needed, active will be identical to - // wrapped. - wrapped, active Manager -} - -// NewModalManager creates a new ModalManager. -func NewModalManager(reg prometheus.Registerer, l log.Logger, next Manager, mode Mode) (*ModalManager, error) { - changedConfigs := promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ - Name: "agent_metrics_configs_changed_total", - Help: "Total number of dynamically updated configs", - }, []string{"event"}) - currentActiveConfigs := promauto.With(reg).NewGauge(prometheus.GaugeOpts{ - Name: "agent_metrics_active_configs", - Help: "Current number of active configs being used by the agent.", - }) - - mm := ModalManager{ - wrapped: next, - log: l, - changedConfigs: changedConfigs, - currentActiveConfigs: currentActiveConfigs, - configs: make(map[string]Config), - } - if err := mm.SetMode(mode); err != nil { - return nil, err - } - return &mm, nil -} - -// SetMode updates the mode ModalManager is running in. Changing the mode is -// an expensive operation; all underlying configs must be stopped and then -// reapplied. -func (m *ModalManager) SetMode(newMode Mode) error { - if newMode == "" { - newMode = DefaultMode - } - - m.mut.Lock() - defer m.mut.Unlock() - - var ( - prevMode = m.mode - prevActive = m.active - ) - - if prevMode == newMode { - return nil - } - - // Set the active Manager based on the new mode. "distinct" means no transformations - // need to be applied and we can use the wrapped Manager directly. Otherwise, we need - // to create a new Manager to apply transformations. - switch newMode { - case ModeDistinct: - m.active = m.wrapped - case ModeShared: - m.active = NewGroupManager(m.wrapped) - default: - panic("unknown mode " + m.mode) - } - m.mode = newMode - - // Remove all configs from the previous active Manager. - if prevActive != nil { - prevActive.Stop() - } - - // Re-apply configs to the new active Manager. - var firstError error - for name, cfg := range m.configs { - err := m.active.ApplyConfig(cfg) - if err != nil { - level.Error(m.log).Log("msg", "failed to apply config when changing modes", "name", name, "prev_mode", prevMode, "new_mode", newMode, "err", err) - } - if firstError == nil && err != nil { - firstError = err - } - } - - return firstError -} - -// GetInstance implements Manager. -func (m *ModalManager) GetInstance(name string) (ManagedInstance, error) { - m.mut.RLock() - defer m.mut.RUnlock() - return m.active.GetInstance(name) -} - -// ListInstances implements Manager. -func (m *ModalManager) ListInstances() map[string]ManagedInstance { - m.mut.RLock() - defer m.mut.RUnlock() - return m.active.ListInstances() -} - -// ListConfigs implements Manager. -func (m *ModalManager) ListConfigs() map[string]Config { - m.mut.RLock() - defer m.mut.RUnlock() - return m.active.ListConfigs() -} - -// ApplyConfig implements Manager. -func (m *ModalManager) ApplyConfig(c Config) error { - m.mut.Lock() - defer m.mut.Unlock() - - if err := m.active.ApplyConfig(c); err != nil { - return err - } - - if _, existingConfig := m.configs[c.Name]; !existingConfig { - m.currentActiveConfigs.Inc() - m.changedConfigs.WithLabelValues("created").Inc() - } else { - m.changedConfigs.WithLabelValues("updated").Inc() - } - - m.configs[c.Name] = c - - return nil -} - -// DeleteConfig implements Manager. -func (m *ModalManager) DeleteConfig(name string) error { - m.mut.Lock() - defer m.mut.Unlock() - - if err := m.active.DeleteConfig(name); err != nil { - return err - } - - if _, existingConfig := m.configs[name]; existingConfig { - m.currentActiveConfigs.Dec() - delete(m.configs, name) - } - - m.changedConfigs.WithLabelValues("deleted").Inc() - return nil -} - -// Stop implements Manager. -func (m *ModalManager) Stop() { - m.mut.Lock() - defer m.mut.Unlock() - - m.active.Stop() - m.currentActiveConfigs.Set(0) - m.configs = make(map[string]Config) -} diff --git a/internal/static/metrics/instance/noop.go b/internal/static/metrics/instance/noop.go deleted file mode 100644 index f9f86b8713..0000000000 --- a/internal/static/metrics/instance/noop.go +++ /dev/null @@ -1,49 +0,0 @@ -package instance - -import ( - "context" - "net/http" - - "github.com/prometheus/prometheus/scrape" - "github.com/prometheus/prometheus/storage" -) - -// NoOpInstance implements the Instance interface in pkg/prom -// but does not do anything. Useful for tests. -type NoOpInstance struct{} - -// Run implements Instance. -func (NoOpInstance) Run(ctx context.Context) error { - <-ctx.Done() - return nil -} - -// Ready implements Instance. -func (NoOpInstance) Ready() bool { - return true -} - -// Update implements Instance. -func (NoOpInstance) Update(_ Config) error { - return nil -} - -// TargetsActive implements Instance. -func (NoOpInstance) TargetsActive() map[string][]*scrape.Target { - return nil -} - -// StorageDirectory implements Instance. -func (NoOpInstance) StorageDirectory() string { - return "" -} - -// WriteHandler implements Instance. -func (NoOpInstance) WriteHandler() http.Handler { - return nil -} - -// Appender implements Instance -func (NoOpInstance) Appender(_ context.Context) storage.Appender { - return nil -} diff --git a/internal/static/server/logger.go b/internal/static/server/logger.go deleted file mode 100644 index 0068775ac2..0000000000 --- a/internal/static/server/logger.go +++ /dev/null @@ -1,118 +0,0 @@ -package server - -import ( - "sync" - - "github.com/go-kit/log" - util_log "github.com/grafana/agent/internal/util/log" - dskit "github.com/grafana/dskit/log" -) - -// Logger implements Go Kit's log.Logger interface. It supports being -// dynamically updated at runtime. -type Logger struct { - // mut protects against race conditions accessing l, which can be modified - // and accessed concurrently if ApplyConfig and Log are called at the same - // time. - mut sync.RWMutex - l log.Logger - - // HookLogger is used to temporarily hijack logs for support bundles. - HookLogger HookLogger - - // makeLogger will default to defaultLogger. It's a struct - // member to make testing work properly. - makeLogger func(*Config) (log.Logger, error) -} - -// HookLogger is used to temporarily redirect -type HookLogger struct { - mut sync.RWMutex - enabled bool - logger log.Logger -} - -// NewLogger creates a new Logger. -func NewLogger(cfg *Config) *Logger { - return newLogger(cfg, defaultLogger) -} - -// NewLoggerFromLevel creates a new logger from logging.Level and logging.Format. -func NewLoggerFromLevel(lvl dskit.Level, fmt string) *Logger { - logger, err := makeDefaultLogger(lvl, fmt) - if err != nil { - panic(err) - } - return &Logger{ - l: logger, - } -} - -func newLogger(cfg *Config, ctor func(*Config) (log.Logger, error)) *Logger { - l := Logger{makeLogger: ctor} - if err := l.ApplyConfig(cfg); err != nil { - panic(err) - } - return &l -} - -// ApplyConfig applies configuration changes to the logger. -func (l *Logger) ApplyConfig(cfg *Config) error { - l.mut.Lock() - defer l.mut.Unlock() - - newLogger, err := l.makeLogger(cfg) - if err != nil { - return err - } - - l.l = newLogger - return nil -} - -func defaultLogger(cfg *Config) (log.Logger, error) { - return makeDefaultLogger(cfg.LogLevel.Level, cfg.LogFormat) -} - -func makeDefaultLogger(lvl dskit.Level, fmt string) (log.Logger, error) { - var l log.Logger - - l, err := util_log.NewPrometheusLogger(lvl, fmt) - if err != nil { - return nil, err - } - - // There are two wrappers on the log so skip two extra stacks vs default - return log.With(l, "caller", log.Caller(5)), nil -} - -// Log logs a log line. -func (l *Logger) Log(kvps ...interface{}) error { - l.mut.RLock() - defer l.mut.RUnlock() - err := l.HookLogger.Log(kvps...) - if err != nil { - return err - } - return l.l.Log(kvps...) -} - -// Log implements log.Logger. -func (hl *HookLogger) Log(kvps ...interface{}) error { - hl.mut.RLock() - defer hl.mut.RUnlock() - if hl.enabled { - return hl.logger.Log(kvps...) - } - return nil -} - -// Set where HookedLogger should tee logs to. -// If a nil logger is passed, the HookedLogger is disabled. -func (hl *HookLogger) Set(l log.Logger) { - hl.mut.Lock() - defer hl.mut.Unlock() - - hl.enabled = l != nil - hl.logger = l -} diff --git a/internal/static/server/logger_test.go b/internal/static/server/logger_test.go deleted file mode 100644 index 083933e495..0000000000 --- a/internal/static/server/logger_test.go +++ /dev/null @@ -1,58 +0,0 @@ -package server - -import ( - "bytes" - "testing" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/stretchr/testify/require" - "gopkg.in/yaml.v2" -) - -func TestLogger_DefaultParameters(t *testing.T) { - makeLogger := func(cfg *Config) (log.Logger, error) { - var l log.Logger - require.Equal(t, "info", cfg.LogLevel.String()) - require.Equal(t, "logfmt", cfg.LogFormat) - return l, nil - } - defaultCfg := DefaultConfig() - newLogger(&defaultCfg, makeLogger).makeLogger(&defaultCfg) -} - -func TestLogger_ApplyConfig(t *testing.T) { - var buf bytes.Buffer - makeLogger := func(cfg *Config) (log.Logger, error) { - l := log.NewLogfmtLogger(log.NewSyncWriter(&buf)) - if cfg.LogFormat == "json" { - l = log.NewJSONLogger(log.NewSyncWriter(&buf)) - } - l = level.NewFilter(l, cfg.LogLevel.Level.Option) - return l, nil - } - - var cfg Config - cfgText := `log_level: error` - - err := yaml.Unmarshal([]byte(cfgText), &cfg) - require.NoError(t, err) - - l := newLogger(&cfg, makeLogger) - level.Debug(l).Log("msg", "this should not appear") - - cfgText = ` -log_level: debug -log_format: json` - err = yaml.Unmarshal([]byte(cfgText), &cfg) - require.NoError(t, err) - - err = l.ApplyConfig(&cfg) - require.NoError(t, err) - - level.Debug(l).Log("msg", "this should appear") - require.JSONEq(t, `{ - "level":"debug", - "msg":"this should appear" - }`, buf.String()) -} diff --git a/internal/static/server/logger_windows.go b/internal/static/server/logger_windows.go deleted file mode 100644 index c84bd0888c..0000000000 --- a/internal/static/server/logger_windows.go +++ /dev/null @@ -1,110 +0,0 @@ -package server - -import ( - "runtime" - "strings" - - "github.com/go-kit/log/level" - - "github.com/go-kit/log" - el "golang.org/x/sys/windows/svc/eventlog" -) - -// Default name for the Grafana Agent under Windows -const ServiceName = "Grafana Agent" - -// NewWindowsEventLogger creates a new logger that writes to the event log -func NewWindowsEventLogger(cfg *Config) *Logger { - return newLogger(cfg, makeWindowsEventLogger) -} - -func makeWindowsEventLogger(cfg *Config) (log.Logger, error) { - // Set up the log in windows events - err := el.InstallAsEventCreate(ServiceName, el.Error|el.Info|el.Warning) - - // Agent should expect an error of 'already exists' if the Event Log sink has already previously been installed - if err != nil && !strings.Contains(err.Error(), "already exists") { - return nil, err - } - il, err := el.Open(ServiceName) - if err != nil { - return nil, err - } - - // Ensure the logger gets closed when the GC runs. It's valid to have more than one win logger open concurrently. - runtime.SetFinalizer(il, func(l *el.Log) { - l.Close() - }) - - // These are set up to be writers for each Windows log level - // Set up this way so we can utilize all the benefits of logformatter - infoLogger := newWinLogWrapper(cfg.LogFormat, func(p []byte) error { - return il.Info(1, string(p)) - }) - warningLogger := newWinLogWrapper(cfg.LogFormat, func(p []byte) error { - return il.Warning(1, string(p)) - }) - - errorLogger := newWinLogWrapper(cfg.LogFormat, func(p []byte) error { - return il.Error(1, string(p)) - }) - - wl := &winLogger{ - errorLogger: errorLogger, - infoLogger: infoLogger, - warningLogger: warningLogger, - } - return level.NewFilter(wl, cfg.LogLevel.Level.Option), nil -} - -// Looks through the key value pairs in the log for level and extract the value -func getLevel(keyvals ...interface{}) level.Value { - for i := 0; i < len(keyvals); i++ { - if vo, ok := keyvals[i].(level.Value); ok { - return vo - } - } - return nil -} - -func newWinLogWrapper(format string, write func(p []byte) error) log.Logger { - infoWriter := &winLogWriter{writer: write} - infoLogger := log.NewLogfmtLogger(infoWriter) - if format == "json" { - infoLogger = log.NewJSONLogger(infoWriter) - } - return infoLogger -} - -type winLogger struct { - errorLogger log.Logger - infoLogger log.Logger - warningLogger log.Logger -} - -func (w *winLogger) Log(keyvals ...interface{}) error { - lvl := getLevel(keyvals...) - // 3 different loggers are used so that agent can utilize the formatting features of go-kit logging - // if agent did not use this then the windows logger uses different function calls for different levels - // this is paired with the fact that the io.Writer interface only gives a byte array. - switch lvl { - case level.DebugValue(): - return w.infoLogger.Log(keyvals...) - case level.InfoValue(): - return w.infoLogger.Log(keyvals...) - case level.WarnValue(): - return w.warningLogger.Log(keyvals...) - case level.ErrorValue(): - return w.errorLogger.Log(keyvals...) - default: - return w.infoLogger.Log(keyvals...) - } -} - -type winLogWriter struct { - writer func(p []byte) error -} - -func (i *winLogWriter) Write(p []byte) (n int, err error) { - return len(p), i.writer(p) -} diff --git a/internal/static/server/server.go b/internal/static/server/server.go index 26f6210003..adae8c6a30 100644 --- a/internal/static/server/server.go +++ b/internal/static/server/server.go @@ -6,438 +6,9 @@ package server import ( "context" - "errors" - "fmt" "net" - "net/http" - _ "net/http/pprof" // anonymous import to get the pprof handler registered - "sync" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/gorilla/mux" - "github.com/grafana/ckit/memconn" - "github.com/grafana/dskit/middleware" - _ "github.com/grafana/pyroscope-go/godeltaprof/http/pprof" // anonymous import to get the godeltaprof handler registered - grpc_middleware "github.com/grpc-ecosystem/go-grpc-middleware" - "github.com/hashicorp/go-multierror" - "github.com/oklog/run" - otgrpc "github.com/opentracing-contrib/go-grpc" - "github.com/opentracing/opentracing-go" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" - "golang.org/x/net/netutil" - "google.golang.org/grpc" - "google.golang.org/grpc/keepalive" ) // DialContextFunc is a function matching the signature of // net.Dialer.DialContext. type DialContextFunc func(ctx context.Context, network string, addr string) (net.Conn, error) - -// Server wraps an HTTP and gRPC server with some common initialization. -// -// Unless instrumentation is disabled in the Servers config, Prometheus metrics -// will be automatically generated for the server. -type Server struct { - flagsMut sync.Mutex - flags Flags - - // Listeners for in-memory connections. These never use TLS. - httpMemListener *memconn.Listener - grpcMemListener *memconn.Listener - - // Listeners to use for connections. These will use TLS when TLS is enabled. - httpListener net.Listener - grpcListener net.Listener - - updateHTTPTLS func(TLSConfig) error - updateGRPCTLS func(TLSConfig) error - - HTTP *mux.Router - HTTPServer *http.Server - GRPC *grpc.Server - - // DialContext creates a connection to the given network/address. If address - // matches the Server's internal HTTP or gRPC address, an internal in-memory - // connection will be opened. - DialContext DialContextFunc -} - -type metrics struct { - tcpConnections *prometheus.GaugeVec - tcpConnectionsLimit *prometheus.GaugeVec - requestDuration *prometheus.HistogramVec - receivedMessageSize *prometheus.HistogramVec - sentMessageSize *prometheus.HistogramVec - inflightRequests *prometheus.GaugeVec -} - -func newMetrics(r prometheus.Registerer) (*metrics, error) { - var m metrics - - // Create metrics for the server - m.tcpConnections = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "agent_tcp_connections", - Help: "Current number of accepted TCP connections.", - }, []string{"protocol"}) - m.tcpConnectionsLimit = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "agent_tcp_connections_limit", - Help: "The maximum number of TCP connections that can be accepted (0 = unlimited)", - }, []string{"protocol"}) - m.requestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Name: "agent_request_duration_seconds", - Help: "Time in seconds spent serving HTTP requests.", - }, []string{"method", "route", "status_code", "ws"}) - m.receivedMessageSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Name: "agent_request_message_bytes", - Help: "Size (in bytes) of messages received in the request.", - Buckets: middleware.BodySizeBuckets, - }, []string{"method", "route"}) - m.sentMessageSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Name: "agent_response_message_bytes", - Help: "Size (in bytes) of messages sent in response.", - Buckets: middleware.BodySizeBuckets, - }, []string{"method", "route"}) - m.inflightRequests = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "agent_inflight_requests", - Help: "Current number of inflight requests.", - }, []string{"method", "route"}) - - if r != nil { - // Register all of our metrics - cc := []prometheus.Collector{ - m.tcpConnections, m.tcpConnectionsLimit, m.requestDuration, m.receivedMessageSize, - m.sentMessageSize, m.inflightRequests, - } - for _, c := range cc { - if err := r.Register(c); err != nil { - return nil, fmt.Errorf("failed registering server metrics: %w", err) - } - } - } - return &m, nil -} - -// New creates a new Server with the given config. -// -// r is used to register Server-specific metrics. If r is nil, no metrics will -// be registered. -// -// g is used for collecting metrics from the instrumentation handlers, when -// enabled. If g is nil, a /metrics endpoint will not be registered. -func New(l log.Logger, r prometheus.Registerer, g prometheus.Gatherer, cfg Config, flags Flags) (srv *Server, err error) { - if l == nil { - l = log.NewNopLogger() - } - - switch { - case flags.HTTP.InMemoryAddr == "": - return nil, fmt.Errorf("in memory HTTP address must be configured") - case flags.GRPC.InMemoryAddr == "": - return nil, fmt.Errorf("in memory gRPC address must be configured") - case flags.HTTP.InMemoryAddr == flags.GRPC.InMemoryAddr: - return nil, fmt.Errorf("in memory HTTP and gRPC address must be different") - } - - m, err := newMetrics(r) - if err != nil { - return nil, err - } - - // Create listeners first so we can fail early if the port is in use. - httpListener, err := newHTTPListener(&flags.HTTP, m) - if err != nil { - return nil, err - } - defer func() { - if err != nil { - _ = httpListener.Close() - } - }() - grpcListener, err := newGRPCListener(&flags.GRPC, m) - if err != nil { - return nil, err - } - defer func() { - if err != nil { - _ = httpListener.Close() - } - }() - - // Configure TLS - var ( - updateHTTPTLS func(TLSConfig) error - updateGRPCTLS func(TLSConfig) error - ) - if flags.HTTP.UseTLS { - httpTLSListener, err := newTLSListener(httpListener, cfg.HTTP.TLSConfig, l) - if err != nil { - return nil, fmt.Errorf("generating HTTP TLS config: %w", err) - } - httpListener = httpTLSListener - updateHTTPTLS = httpTLSListener.ApplyConfig - } - if flags.GRPC.UseTLS { - grpcTLSListener, err := newTLSListener(grpcListener, cfg.GRPC.TLSConfig, l) - if err != nil { - return nil, fmt.Errorf("generating GRPC TLS config: %w", err) - } - grpcListener = grpcTLSListener - updateGRPCTLS = grpcTLSListener.ApplyConfig - } - - level.Info(l).Log( - "msg", "server listening on addresses", - "http", httpListener.Addr(), "grpc", grpcListener.Addr(), - "http_tls_enabled", flags.HTTP.UseTLS, "grpc_tls_enabled", flags.GRPC.UseTLS, - ) - - // Build servers - grpcServer := newGRPCServer(l, &flags.GRPC, m) - httpServer, router, err := newHTTPServer(l, g, &flags, m) - if err != nil { - return nil, err - } - - // Build in-memory listeners and dial function - var ( - httpMemListener = memconn.NewListener(nil) - grpcMemListener = memconn.NewListener(nil) - ) - dialFunc := func(ctx context.Context, network string, address string) (net.Conn, error) { - switch address { - case flags.HTTP.InMemoryAddr: - return httpMemListener.DialContext(ctx) - case flags.GRPC.InMemoryAddr: - return grpcMemListener.DialContext(ctx) - default: - return (&net.Dialer{}).DialContext(ctx, network, address) - } - } - - return &Server{ - flags: flags, - httpListener: httpListener, - grpcListener: grpcListener, - httpMemListener: httpMemListener, - grpcMemListener: grpcMemListener, - - updateHTTPTLS: updateHTTPTLS, - updateGRPCTLS: updateGRPCTLS, - - HTTP: router, - HTTPServer: httpServer, - GRPC: grpcServer, - DialContext: dialFunc, - }, nil -} - -func newHTTPListener(opts *HTTPFlags, m *metrics) (net.Listener, error) { - httpAddress := opts.ListenAddress - if httpAddress == "" { - return nil, fmt.Errorf("http address not set") - } - httpListener, err := net.Listen(opts.ListenNetwork, httpAddress) - if err != nil { - return nil, fmt.Errorf("creating HTTP listener: %w", err) - } - httpListener = middleware.CountingListener(httpListener, m.tcpConnections.WithLabelValues("http")) - - m.tcpConnectionsLimit.WithLabelValues("http").Set(float64(opts.ConnLimit)) - if opts.ConnLimit > 0 { - httpListener = netutil.LimitListener(httpListener, opts.ConnLimit) - } - return httpListener, nil -} - -func newGRPCListener(opts *GRPCFlags, m *metrics) (net.Listener, error) { - grpcAddress := opts.ListenAddress - if grpcAddress == "" { - return nil, fmt.Errorf("gRPC address not set") - } - grpcListener, err := net.Listen(opts.ListenNetwork, grpcAddress) - if err != nil { - return nil, fmt.Errorf("creating gRPC listener: %w", err) - } - grpcListener = middleware.CountingListener(grpcListener, m.tcpConnections.WithLabelValues("grpc")) - - m.tcpConnectionsLimit.WithLabelValues("grpc").Set(float64(opts.ConnLimit)) - if opts.ConnLimit > 0 { - grpcListener = netutil.LimitListener(grpcListener, opts.ConnLimit) - } - return grpcListener, nil -} - -func newGRPCServer(l log.Logger, opts *GRPCFlags, m *metrics) *grpc.Server { - serverLog := middleware.GRPCServerLog{ - WithRequest: true, - Log: l, - } - grpcOptions := []grpc.ServerOption{ - grpc.UnaryInterceptor(grpc_middleware.ChainUnaryServer( - serverLog.UnaryServerInterceptor, - otgrpc.OpenTracingServerInterceptor(opentracing.GlobalTracer()), - middleware.UnaryServerInstrumentInterceptor(m.requestDuration), - )), - grpc.StreamInterceptor(grpc_middleware.ChainStreamServer( - serverLog.StreamServerInterceptor, - otgrpc.OpenTracingStreamServerInterceptor(opentracing.GlobalTracer()), - middleware.StreamServerInstrumentInterceptor(m.requestDuration), - )), - grpc.KeepaliveParams(keepalive.ServerParameters{ - MaxConnectionIdle: opts.MaxConnectionIdle, - MaxConnectionAge: opts.MaxConnectionAge, - MaxConnectionAgeGrace: opts.MaxConnectionAgeGrace, - Time: opts.KeepaliveTime, - Timeout: opts.KeepaliveTimeout, - }), - grpc.KeepaliveEnforcementPolicy(keepalive.EnforcementPolicy{ - MinTime: opts.MinTimeBetweenPings, - PermitWithoutStream: opts.PingWithoutStreamAllowed, - }), - grpc.MaxRecvMsgSize(opts.MaxRecvMsgSize), - grpc.MaxSendMsgSize(opts.MaxSendMsgSize), - grpc.MaxConcurrentStreams(uint32(opts.MaxConcurrentStreams)), - grpc.StatsHandler(middleware.NewStatsHandler(m.receivedMessageSize, m.sentMessageSize, m.inflightRequests)), - } - - return grpc.NewServer(grpcOptions...) -} - -func newHTTPServer(l log.Logger, g prometheus.Gatherer, opts *Flags, m *metrics) (*http.Server, *mux.Router, error) { - router := mux.NewRouter() - if opts.RegisterInstrumentation && g != nil { - router.Handle("/metrics", promhttp.HandlerFor(g, promhttp.HandlerOpts{ - EnableOpenMetrics: true, - })) - router.PathPrefix("/debug/pprof").Handler(http.DefaultServeMux) - } - - var sourceIPs *middleware.SourceIPExtractor - if opts.LogSourceIPs { - var err error - sourceIPs, err = middleware.NewSourceIPs(opts.LogSourceIPsHeader, opts.LogSourceIPsRegex) - if err != nil { - return nil, nil, fmt.Errorf("error setting up source IP extraction: %v", err) - } - } - - httpMiddleware := []middleware.Interface{ - middleware.Tracer{ - RouteMatcher: router, - SourceIPs: sourceIPs, - }, - middleware.Log{ - Log: l, - SourceIPs: sourceIPs, - }, - middleware.Instrument{ - RouteMatcher: router, - Duration: m.requestDuration, - RequestBodySize: m.receivedMessageSize, - ResponseBodySize: m.sentMessageSize, - InflightRequests: m.inflightRequests, - }, - } - - httpServer := &http.Server{ - ReadTimeout: opts.HTTP.ReadTimeout, - WriteTimeout: opts.HTTP.WriteTimeout, - IdleTimeout: opts.HTTP.IdleTimeout, - Handler: middleware.Merge(httpMiddleware...).Wrap(router), - } - - return httpServer, router, nil -} - -// HTTPAddress returns the HTTP net.Addr of this Server. -func (s *Server) HTTPAddress() net.Addr { return s.httpListener.Addr() } - -// GRPCAddress returns the GRPC net.Addr of this Server. -func (s *Server) GRPCAddress() net.Addr { return s.grpcListener.Addr() } - -// ApplyConfig applies changes to the Server block. -func (s *Server) ApplyConfig(cfg Config) error { - s.flagsMut.Lock() - defer s.flagsMut.Unlock() - - // N.B. LogLevel/LogFormat support dynamic updating but are never used in - // *Server, so they're ignored here. - - if s.updateHTTPTLS != nil { - if err := s.updateHTTPTLS(cfg.HTTP.TLSConfig); err != nil { - return fmt.Errorf("updating HTTP TLS settings: %w", err) - } - } - if s.updateGRPCTLS != nil { - if err := s.updateGRPCTLS(cfg.GRPC.TLSConfig); err != nil { - return fmt.Errorf("updating gRPC TLS settings: %w", err) - } - } - - return nil -} - -// Run the server until en error is received or the given context is canceled. -// Run may not be re-called after it exits. -func (s *Server) Run(ctx context.Context) error { - ctx, cancel := context.WithCancel(ctx) - defer cancel() - - var g run.Group - - g.Add(func() error { - <-ctx.Done() - return nil - }, func(_ error) { - cancel() - }) - - httpListeners := []net.Listener{ - s.httpListener, - s.httpMemListener, - } - for i := range httpListeners { - listener := httpListeners[i] - g.Add(func() error { - err := s.HTTPServer.Serve(listener) - if errors.Is(err, http.ErrServerClosed) { - err = nil - } - return err - }, func(_ error) { - ctx, cancel := context.WithTimeout(context.Background(), s.flags.GracefulShutdownTimeout) - defer cancel() - _ = s.HTTPServer.Shutdown(ctx) - }) - } - - grpcListeners := []net.Listener{ - s.grpcListener, - s.grpcMemListener, - } - for i := range grpcListeners { - listener := grpcListeners[i] - g.Add(func() error { - err := s.GRPC.Serve(listener) - if errors.Is(err, grpc.ErrServerStopped) { - err = nil - } - return err - }, func(_ error) { - s.GRPC.GracefulStop() - }) - } - - return g.Run() -} - -// Close forcibly closes the server's listeners. -func (s *Server) Close() error { - errs := multierror.Append( - s.httpListener.Close(), - s.grpcListener.Close(), - ) - return errs.ErrorOrNil() -} diff --git a/internal/static/server/server_test.go b/internal/static/server/server_test.go deleted file mode 100644 index effa46b72e..0000000000 --- a/internal/static/server/server_test.go +++ /dev/null @@ -1,193 +0,0 @@ -package server - -import ( - "context" - "crypto/tls" - "fmt" - "net" - "net/http" - "testing" - - "github.com/go-kit/log" - "github.com/stretchr/testify/require" - "google.golang.org/grpc" - "google.golang.org/grpc/credentials" - "google.golang.org/grpc/credentials/insecure" - "google.golang.org/grpc/health" - "google.golang.org/grpc/health/grpc_health_v1" -) - -const anyLocalhost = "127.0.0.1:0" - -func TestServer(t *testing.T) { - cfg := newTestConfig() - flags := newTestFlags() - srv := runExampleServer(t, cfg, flags) - - // Validate HTTP - resp, err := http.Get(fmt.Sprintf("http://%s/testing", srv.HTTPAddress())) - require.NoError(t, err) - require.Equal(t, http.StatusOK, resp.StatusCode) - _ = resp.Body.Close() - - // Validate gRPC - creds := grpc.WithTransportCredentials(insecure.NewCredentials()) - cc, err := grpc.Dial(srv.GRPCAddress().String(), creds) - require.NoError(t, err) - _, err = grpc_health_v1.NewHealthClient(cc).Check(context.Background(), &grpc_health_v1.HealthCheckRequest{}) - require.NoError(t, err) -} - -func TestServer_InMemory(t *testing.T) { - cfg := newTestConfig() - flags := newTestFlags() - srv := runExampleServer(t, cfg, flags) - - // Validate HTTP - var httpClient http.Client - httpClient.Transport = &http.Transport{DialContext: srv.DialContext} - resp, err := httpClient.Get(fmt.Sprintf("http://%s/testing", flags.HTTP.InMemoryAddr)) - require.NoError(t, err) - require.Equal(t, http.StatusOK, resp.StatusCode) - _ = resp.Body.Close() - - // Validate gRPC - grpcDialer := grpc.WithContextDialer(func(ctx context.Context, s string) (net.Conn, error) { - return srv.DialContext(ctx, "", s) - }) - cc, err := grpc.Dial(flags.GRPC.InMemoryAddr, grpc.WithTransportCredentials(insecure.NewCredentials()), grpcDialer) - require.NoError(t, err) - _, err = grpc_health_v1.NewHealthClient(cc).Check(context.Background(), &grpc_health_v1.HealthCheckRequest{}) - require.NoError(t, err) -} - -func newTestConfig() Config { - cfg := DefaultConfig() - return cfg -} - -func newTestFlags() Flags { - flags := DefaultFlags - flags.HTTP.ListenAddress = anyLocalhost - flags.GRPC.ListenAddress = anyLocalhost - return flags -} - -func runExampleServer(t *testing.T, cfg Config, flags Flags) *Server { - t.Helper() - - srv, err := New(log.NewNopLogger(), nil, nil, cfg, flags) - require.NoError(t, err) - - // Set up some expected services for us to test against. - srv.HTTP.HandleFunc("/testing", func(w http.ResponseWriter, _ *http.Request) { - w.WriteHeader(http.StatusOK) - }) - grpc_health_v1.RegisterHealthServer(srv.GRPC, health.NewServer()) - - // Run our server. - ctx, cancel := context.WithCancel(context.Background()) - t.Cleanup(cancel) - go func() { - require.NoError(t, srv.Run(ctx)) - }() - - return srv -} - -func TestServer_TLS(t *testing.T) { - cfg := newTestConfig() - flags := newTestFlags() - - flags.HTTP.UseTLS = true - flags.GRPC.UseTLS = true - - tlsConfig := TLSConfig{ - TLSCertPath: "testdata/example-cert.pem", - TLSKeyPath: "testdata/example-key.pem", - } - cfg.HTTP.TLSConfig = tlsConfig - cfg.GRPC.TLSConfig = tlsConfig - - srv := runExampleServer(t, cfg, flags) - - // Validate HTTPS - cli := http.Client{ - Transport: &http.Transport{ - TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, - }, - } - resp, err := cli.Get(fmt.Sprintf("https://%s/testing", srv.HTTPAddress())) - require.NoError(t, err) - require.Equal(t, http.StatusOK, resp.StatusCode) - _ = resp.Body.Close() - - // Validate gRPC TLS - creds := credentials.NewTLS(&tls.Config{InsecureSkipVerify: true}) - cc, err := grpc.Dial(srv.GRPCAddress().String(), grpc.WithTransportCredentials(creds)) - require.NoError(t, err) - _, err = grpc_health_v1.NewHealthClient(cc).Check(context.Background(), &grpc_health_v1.HealthCheckRequest{}) - require.NoError(t, err) -} - -// TestRunReturnsError validates that Run exits with an error when the -// HTTP/GRPC servers stop unexpectedly. -func TestRunReturnsError(t *testing.T) { - cfg := newTestConfig() - flags := newTestFlags() - - t.Run("http", func(t *testing.T) { - srv, err := New(nil, nil, nil, cfg, flags) - require.NoError(t, err) - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - errChan := make(chan error, 1) - go func() { - errChan <- srv.Run(ctx) - }() - - require.NoError(t, srv.httpListener.Close()) - require.NotNil(t, <-errChan) - }) - - t.Run("grpc", func(t *testing.T) { - srv, err := New(nil, nil, nil, cfg, flags) - require.NoError(t, err) - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - errChan := make(chan error, 1) - go func() { - errChan <- srv.Run(ctx) - }() - - require.NoError(t, srv.grpcListener.Close()) - require.NotNil(t, <-errChan) - }) -} - -func TestServer_ApplyConfig(t *testing.T) { - t.Run("no changes", func(t *testing.T) { - cfg := newTestConfig() - flags := newTestFlags() - - srv, err := New(nil, nil, nil, cfg, flags) - require.NoError(t, err) - - require.NoError(t, srv.ApplyConfig(cfg)) - }) - - t.Run("valid changes", func(t *testing.T) { - cfg := newTestConfig() - flags := newTestFlags() - - srv, err := New(nil, nil, nil, cfg, flags) - require.NoError(t, err) - - cfg.LogLevel.Set("debug") - require.NoError(t, srv.ApplyConfig(cfg)) - }) -} diff --git a/internal/static/server/signal_context.go b/internal/static/server/signal_context.go deleted file mode 100644 index 21ac6376ce..0000000000 --- a/internal/static/server/signal_context.go +++ /dev/null @@ -1,41 +0,0 @@ -package server - -import ( - "context" - - "github.com/go-kit/log" - "github.com/grafana/dskit/signals" - "go.uber.org/atomic" -) - -var signalContexts atomic.Int64 - -// SignalContext wraps a ctx which will be canceled if an interrupt is -// received. -// -// It is invalid to have two simultaneous SignalContexts per binary. -func SignalContext(ctx context.Context, l log.Logger) (context.Context, context.CancelFunc) { - if !signalContexts.CompareAndSwap(0, 1) { - panic("bug: multiple SignalContexts found") - } - - if l == nil { - l = log.NewNopLogger() - } - - ctx, cancel := context.WithCancel(ctx) - - handler := signals.NewHandler(l) - go func() { - handler.Loop() - signalContexts.Store(0) - cancel() - }() - go func() { - <-ctx.Done() - handler.Stop() - signalContexts.Store(0) - }() - - return ctx, cancel -} diff --git a/internal/static/server/tls.go b/internal/static/server/tls.go index 0e78edac58..5ab671f820 100644 --- a/internal/static/server/tls.go +++ b/internal/static/server/tls.go @@ -2,15 +2,9 @@ package server import ( "crypto/tls" - "crypto/x509" "errors" "fmt" - "net" - "os" - "sync" "time" - - "github.com/go-kit/log" ) // TLSConfig holds dynamic configuration options for TLS. @@ -142,152 +136,6 @@ func (tv *TLSVersion) MarshalYAML() (interface{}, error) { return fmt.Sprintf("%v", tv), nil } -// tlsListener is a net.Listener for establishing TLS connections. tlsListener -// supports dynamically updating the TLS settings used to establish -// connections. -type tlsListener struct { - mut sync.RWMutex - cfg TLSConfig - tlsConfig *tls.Config - log log.Logger - - innerListener net.Listener - - windowsCertHandler *WinCertStoreHandler -} - -// newTLSListener creates and configures a new tlsListener. -func newTLSListener(inner net.Listener, c TLSConfig, log log.Logger) (*tlsListener, error) { - tl := &tlsListener{ - innerListener: inner, - log: log, - } - return tl, tl.ApplyConfig(c) -} - -// Accept implements net.Listener and returns the next connection. Connections -func (l *tlsListener) Accept() (net.Conn, error) { - nc, err := l.innerListener.Accept() - if err != nil { - return nc, err - } - - l.mut.RLock() - defer l.mut.RUnlock() - return tls.Server(nc, l.tlsConfig), nil -} - -// Close implements net.Listener and closes the tlsListener, preventing any new -// connections from being formed. Existing connections will be kept alive. -func (l *tlsListener) Close() error { - if l.windowsCertHandler != nil { - l.windowsCertHandler.Stop() - } - return l.innerListener.Close() -} - -// Addr implements net.Listener and returns the listener's network address. -func (l *tlsListener) Addr() net.Addr { - return l.innerListener.Addr() -} - -// ApplyConfig updates the tlsListener with new settings for creating TLS -// connections. -// -// Existing TLS connections will be kept alive after updating the TLS settings. -// New connections cannot be established while ApplyConfig is running. -func (l *tlsListener) ApplyConfig(c TLSConfig) error { - l.mut.Lock() - defer l.mut.Unlock() - if c.WindowsCertificateFilter != nil { - return l.applyWindowsCertificateStore(c) - } - return l.applyNormalTLS(c) -} - -func (l *tlsListener) applyNormalTLS(c TLSConfig) error { - if l.windowsCertHandler != nil { - panic("windows certificate handler is set this should never happen") - } - // Convert our TLSConfig into a new *tls.Config. - // - // While *tls.Config supports callbacks and doesn't need to be fully - // replaced, some of our dynamic settings from TLSConfig can't be dynamically - // updated (e.g., ciphers, min/max version, etc.). - // - // To make life easier on ourselves we just replace the whole thing with a new TLS listener. - - // Make sure that the certificates exist - if c.TLSCertPath == "" { - return fmt.Errorf("missing certificate file") - } - if c.TLSKeyPath == "" { - return fmt.Errorf("missing key file") - } - _, err := tls.LoadX509KeyPair(c.TLSCertPath, c.TLSKeyPath) - if err != nil { - return fmt.Errorf("failed to load key pair: %w", err) - } - - newConfig := &tls.Config{ - MinVersion: (uint16)(c.MinVersion), - MaxVersion: (uint16)(c.MaxVersion), - PreferServerCipherSuites: c.PreferServerCipherSuites, - - GetCertificate: l.getCertificate, - } - - var cf []uint16 - for _, c := range c.CipherSuites { - cf = append(cf, (uint16)(c)) - } - if len(cf) > 0 { - newConfig.CipherSuites = cf - } - - var cp []tls.CurveID - for _, c := range c.CurvePreferences { - cp = append(cp, (tls.CurveID)(c)) - } - if len(cp) > 0 { - newConfig.CurvePreferences = cp - } - - if c.ClientCAs != "" { - clientCAPool := x509.NewCertPool() - clientCAFile, err := os.ReadFile(c.ClientCAs) - if err != nil { - return err - } - clientCAPool.AppendCertsFromPEM(clientCAFile) - newConfig.ClientCAs = clientCAPool - } - - clientAuth, err := GetClientAuthFromString(c.ClientAuth) - if err != nil { - return err - } - newConfig.ClientAuth = clientAuth - if c.ClientCAs != "" && newConfig.ClientAuth == tls.NoClientCert { - return fmt.Errorf("Client CAs have been configured without a ClientAuth policy") - } - - l.tlsConfig = newConfig - l.cfg = c - return nil -} - -func (l *tlsListener) getCertificate(*tls.ClientHelloInfo) (*tls.Certificate, error) { - l.mut.RLock() - defer l.mut.RUnlock() - - cert, err := tls.LoadX509KeyPair(l.cfg.TLSCertPath, l.cfg.TLSKeyPath) - if err != nil { - return nil, fmt.Errorf("failed to load key pair: %w", err) - } - return &cert, nil -} - func GetClientAuthFromString(clientAuth string) (tls.ClientAuthType, error) { switch clientAuth { case "RequestClientCert": diff --git a/internal/static/server/tls_certstore_stub.go b/internal/static/server/tls_certstore_stub.go index 6b6dd8430b..ba1defe3d1 100644 --- a/internal/static/server/tls_certstore_stub.go +++ b/internal/static/server/tls_certstore_stub.go @@ -2,12 +2,6 @@ package server -import "fmt" - -func (l *tlsListener) applyWindowsCertificateStore(_ TLSConfig) error { - return fmt.Errorf("cannot use Windows certificate store on non-Windows platforms") -} - type WinCertStoreHandler struct { } diff --git a/internal/static/server/tls_certstore_windows.go b/internal/static/server/tls_certstore_windows.go index c80406114b..fca1965748 100644 --- a/internal/static/server/tls_certstore_windows.go +++ b/internal/static/server/tls_certstore_windows.go @@ -59,66 +59,6 @@ func NewWinCertStoreHandler(cfg WindowsCertificateFilter, clientAuth tls.ClientA return cn, nil } -func (l *tlsListener) applyWindowsCertificateStore(c TLSConfig) error { - - // Restrict normal TLS options when using windows certificate store - if c.TLSCertPath != "" { - return fmt.Errorf("at most one of cert_file and windows_certificate_filter can be configured") - } - if c.TLSKeyPath != "" { - return fmt.Errorf("at most one of cert_key and windows_certificate_filter can be configured") - } - if c.WindowsCertificateFilter.Server == nil { - return fmt.Errorf("windows certificate filter requires a server block defined") - } - - var subjectRegEx *regexp.Regexp - var err error - if c.WindowsCertificateFilter.Client != nil && c.WindowsCertificateFilter.Client.SubjectRegEx != "" { - subjectRegEx, err = regexp.Compile(c.WindowsCertificateFilter.Client.SubjectRegEx) - if err != nil { - return fmt.Errorf("error compiling subject common name regular expression: %w", err) - } - } - - // If there is an existing windows certhandler stop it. - if l.windowsCertHandler != nil { - l.windowsCertHandler.Stop() - } - - cn := &WinCertStoreHandler{ - cfg: *c.WindowsCertificateFilter, - subjectRegEx: subjectRegEx, - log: l.log, - shutdown: make(chan struct{}), - } - - err = cn.refreshCerts() - if err != nil { - return err - } - - config := &tls.Config{ - VerifyPeerCertificate: cn.VerifyPeer, - GetCertificate: cn.CertificateHandler, - MaxVersion: uint16(c.MaxVersion), - MinVersion: uint16(c.MinVersion), - } - - ca, err := GetClientAuthFromString(c.ClientAuth) - if err != nil { - return err - } - config.ClientAuth = ca - cn.clientAuth = ca - // Kick off the refresh handler - go cn.startUpdateTimer() - l.windowsCertHandler = cn - l.tlsConfig = config - l.cfg = c - return nil -} - // Run runs the filter refresh. Stop should be called when done. func (c *WinCertStoreHandler) Run() { go c.startUpdateTimer() diff --git a/internal/static/server/tls_test.go b/internal/static/server/tls_test.go deleted file mode 100644 index de9a2402c0..0000000000 --- a/internal/static/server/tls_test.go +++ /dev/null @@ -1,68 +0,0 @@ -package server - -import ( - "crypto/tls" - "fmt" - "io" - "log" - "net" - "net/http" - "net/url" - "testing" - - kitlog "github.com/go-kit/log" - "github.com/stretchr/testify/require" -) - -func Test_tlsListener(t *testing.T) { - rawLis, err := net.Listen("tcp", "127.0.0.1:0") - require.NoError(t, err) - - tlsConfig := TLSConfig{ - TLSCertPath: "testdata/example-cert.pem", - TLSKeyPath: "testdata/example-key.pem", - ClientAuth: "NoClientCert", - } - tlsLis, err := newTLSListener(rawLis, tlsConfig, kitlog.NewNopLogger()) - require.NoError(t, err) - - httpSrv := &http.Server{ - ErrorLog: log.New(io.Discard, "", 0), - } - go func() { - _ = httpSrv.Serve(tlsLis) - }() - defer func() { - httpSrv.Close() - }() - - httpTransport := &http.Transport{ - TLSClientConfig: &tls.Config{ - InsecureSkipVerify: true, - }, - } - cli := http.Client{Transport: httpTransport} - - resp, err := cli.Get(fmt.Sprintf("https://%s", tlsLis.Addr())) - if err == nil { - resp.Body.Close() - } - require.NoError(t, err) - - // Update our TLSConfig to require a client cert. - tlsConfig.ClientAuth = "RequireAndVerifyClientCert" - require.NoError(t, tlsLis.ApplyConfig(tlsConfig)) - - // Close our idle connections so our next request forces a new dial. - httpTransport.CloseIdleConnections() - - // Create a second connection which should now fail because we don't supply a - resp, err = cli.Get(fmt.Sprintf("https://%s", tlsLis.Addr())) - if err == nil { - resp.Body.Close() - } - - var urlError *url.Error - require.ErrorAs(t, err, &urlError) - require.Contains(t, urlError.Err.Error(), "tls:") -} diff --git a/internal/static/supportbundle/supportbundle.go b/internal/static/supportbundle/supportbundle.go deleted file mode 100644 index 3963c2a9cc..0000000000 --- a/internal/static/supportbundle/supportbundle.go +++ /dev/null @@ -1,235 +0,0 @@ -package supportbundle - -import ( - "archive/zip" - "bytes" - "context" - "fmt" - "io" - "net/http" - "path/filepath" - "runtime" - "runtime/pprof" - "strings" - "sync" - "time" - - "github.com/grafana/agent/internal/build" - "github.com/grafana/agent/internal/static/server" - "github.com/mackerelio/go-osstat/uptime" - "gopkg.in/yaml.v3" -) - -// Bundle collects all the data that is exposed as a support bundle. -type Bundle struct { - meta []byte - config []byte - agentMetrics []byte - agentMetricsInstances []byte - agentMetricsTargets []byte - agentLogsInstances []byte - agentLogsTargets []byte - heapBuf *bytes.Buffer - goroutineBuf *bytes.Buffer - blockBuf *bytes.Buffer - mutexBuf *bytes.Buffer - cpuBuf *bytes.Buffer -} - -// Metadata contains general runtime information about the current Agent. -type Metadata struct { - BuildVersion string `yaml:"build_version"` - OS string `yaml:"os"` - Architecture string `yaml:"architecture"` - Uptime float64 `yaml:"uptime"` - Payload map[string]interface{} `yaml:"payload"` -} - -// Used to enforce single-flight requests to Export -var mut sync.Mutex - -// Export gathers the information required for the support bundle. -func Export(ctx context.Context, enabledFeatures []string, cfg []byte, srvAddress string, dialContext server.DialContextFunc) (*Bundle, error) { - mut.Lock() - defer mut.Unlock() - // The block profiler is disabled by default. Temporarily enable recording - // of all blocking events. Also, temporarily record all mutex contentions, - // and defer restoring of earlier mutex profiling fraction. - runtime.SetBlockProfileRate(1) - old := runtime.SetMutexProfileFraction(1) - defer func() { - runtime.SetBlockProfileRate(0) - runtime.SetMutexProfileFraction(old) - }() - - // Gather runtime metadata. - ut, err := uptime.Get() - if err != nil { - return nil, err - } - m := Metadata{ - BuildVersion: build.Version, - OS: runtime.GOOS, - Architecture: runtime.GOARCH, - Uptime: ut.Seconds(), - Payload: map[string]interface{}{"enabled-features": enabledFeatures}, - } - meta, err := yaml.Marshal(m) - if err != nil { - return nil, fmt.Errorf("failed to marshal support bundle metadata: %s", err) - } - - var httpClient http.Client - httpClient.Transport = &http.Transport{DialContext: dialContext} - // Gather Agent's own metrics. - resp, err := httpClient.Get("http://" + srvAddress + "/metrics") - if err != nil { - return nil, fmt.Errorf("failed to get internal Agent metrics: %s", err) - } - agentMetrics, err := io.ReadAll(resp.Body) - if err != nil { - return nil, fmt.Errorf("failed to read internal Agent metrics: %s", err) - } - - // Collect the Agent metrics instances and target statuses. - resp, err = httpClient.Get("http://" + srvAddress + "/agent/api/v1/metrics/instances") - if err != nil { - return nil, fmt.Errorf("failed to get internal Agent metrics: %s", err) - } - agentMetricsInstances, err := io.ReadAll(resp.Body) - if err != nil { - return nil, fmt.Errorf("failed to read internal Agent metrics: %s", err) - } - resp, err = httpClient.Get("http://" + srvAddress + "/agent/api/v1/metrics/targets") - if err != nil { - return nil, fmt.Errorf("failed to get Agent metrics targets: %s", err) - } - agentMetricsTargets, err := io.ReadAll(resp.Body) - if err != nil { - return nil, fmt.Errorf("failed to read Agent metrics targets: %s", err) - } - - // Collect the Agent's logs instances and target statuses. - resp, err = httpClient.Get("http://" + srvAddress + "/agent/api/v1/logs/instances") - if err != nil { - return nil, fmt.Errorf("failed to get Agent logs instances: %s", err) - } - agentLogsInstances, err := io.ReadAll(resp.Body) - if err != nil { - return nil, fmt.Errorf("failed to read Agent logs instances: %s", err) - } - - resp, err = httpClient.Get("http://" + srvAddress + "/agent/api/v1/logs/targets") - if err != nil { - return nil, fmt.Errorf("failed to get Agent logs targets: %s", err) - } - agentLogsTargets, err := io.ReadAll(resp.Body) - if err != nil { - return nil, fmt.Errorf("failed to read Agent logs targets: %s", err) - } - - // Export pprof data. - var ( - cpuBuf bytes.Buffer - heapBuf bytes.Buffer - goroutineBuf bytes.Buffer - blockBuf bytes.Buffer - mutexBuf bytes.Buffer - ) - err = pprof.StartCPUProfile(&cpuBuf) - if err != nil { - return nil, err - } - deadline, _ := ctx.Deadline() - // Sleep for the remaining of the context deadline, but leave some time for - // the rest of the bundle to be exported successfully. - time.Sleep(time.Until(deadline) - 200*time.Millisecond) - pprof.StopCPUProfile() - - p := pprof.Lookup("heap") - if err := p.WriteTo(&heapBuf, 0); err != nil { - return nil, err - } - p = pprof.Lookup("goroutine") - if err := p.WriteTo(&goroutineBuf, 0); err != nil { - return nil, err - } - p = pprof.Lookup("block") - if err := p.WriteTo(&blockBuf, 0); err != nil { - return nil, err - } - p = pprof.Lookup("mutex") - if err := p.WriteTo(&mutexBuf, 0); err != nil { - return nil, err - } - - // Finally, bundle everything up to be served, either as a zip from - // memory, or exported to a directory. - bundle := &Bundle{ - meta: meta, - config: cfg, - agentMetrics: agentMetrics, - agentMetricsInstances: agentMetricsInstances, - agentMetricsTargets: agentMetricsTargets, - agentLogsInstances: agentLogsInstances, - agentLogsTargets: agentLogsTargets, - heapBuf: &heapBuf, - goroutineBuf: &goroutineBuf, - blockBuf: &blockBuf, - mutexBuf: &mutexBuf, - cpuBuf: &cpuBuf, - } - - return bundle, nil -} - -// Serve the collected data and logs as a zip file over the given -// http.ResponseWriter. -func Serve(rw http.ResponseWriter, b *Bundle, logsBuf *bytes.Buffer) error { - zw := zip.NewWriter(rw) - rw.Header().Set("Content-Type", "application/zip") - rw.Header().Set("Content-Disposition", "attachment; filename=\"agent-support-bundle.zip\"") - - zipStructure := map[string][]byte{ - "agent-metadata.yaml": b.meta, - "agent-config.yaml": b.config, - "agent-metrics.txt": b.agentMetrics, - "agent-metrics-instances.json": b.agentMetricsInstances, - "agent-metrics-targets.json": b.agentMetricsTargets, - "agent-logs-instances.json": b.agentLogsInstances, - "agent-logs-targets.json": b.agentLogsTargets, - "agent-logs.txt": logsBuf.Bytes(), - "pprof/cpu.pprof": b.cpuBuf.Bytes(), - "pprof/heap.pprof": b.heapBuf.Bytes(), - "pprof/goroutine.pprof": b.goroutineBuf.Bytes(), - "pprof/mutex.pprof": b.mutexBuf.Bytes(), - "pprof/block.pprof": b.blockBuf.Bytes(), - } - - for fn, b := range zipStructure { - if b != nil { - path := append([]string{"agent-support-bundle"}, strings.Split(fn, "/")...) - if err := writeByteSlice(zw, b, path...); err != nil { - return err - } - } - } - - err := zw.Close() - if err != nil { - return fmt.Errorf("failed to flush the zip writer: %v", err) - } - return nil -} - -func writeByteSlice(zw *zip.Writer, b []byte, fn ...string) error { - f, err := zw.Create(filepath.Join(fn...)) - if err != nil { - return err - } - _, err = f.Write(b) - if err != nil { - return err - } - return nil -} diff --git a/internal/static/traces/automaticloggingprocessor/automaticloggingprocessor.go b/internal/static/traces/automaticloggingprocessor/automaticloggingprocessor.go index c347e5cbb5..0624e1a54d 100644 --- a/internal/static/traces/automaticloggingprocessor/automaticloggingprocessor.go +++ b/internal/static/traces/automaticloggingprocessor/automaticloggingprocessor.go @@ -4,25 +4,14 @@ import ( "context" "errors" "fmt" - "strconv" "time" "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/go-logfmt/logfmt" - "github.com/grafana/agent/internal/static/logs" - "github.com/grafana/agent/internal/static/traces/contextkeys" - "github.com/grafana/agent/internal/util" util_log "github.com/grafana/agent/internal/util/log" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/pkg/logproto" - "github.com/prometheus/common/model" "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/consumer" - "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/ptrace" "go.opentelemetry.io/collector/processor" - semconv "go.opentelemetry.io/collector/semconv/v1.6.1" "go.uber.org/atomic" ) @@ -35,19 +24,14 @@ const ( defaultTraceIDKey = "tid" defaultTimeout = time.Millisecond - - typeSpan = "span" - typeRoot = "root" - typeProcess = "process" ) type automaticLoggingProcessor struct { nextConsumer consumer.Traces - cfg *AutomaticLoggingConfig - logToStdout bool - logsInstance *logs.Instance - done atomic.Bool + cfg *AutomaticLoggingConfig + logToStdout bool + done atomic.Bool labels map[string]struct{} @@ -105,77 +89,7 @@ func newTraceProcessor(nextConsumer consumer.Traces, cfg *AutomaticLoggingConfig } func (p *automaticLoggingProcessor) ConsumeTraces(ctx context.Context, td ptrace.Traces) error { - rsLen := td.ResourceSpans().Len() - for i := 0; i < rsLen; i++ { - rs := td.ResourceSpans().At(i) - ssLen := rs.ScopeSpans().Len() - - var svc string - svcAtt, ok := rs.Resource().Attributes().Get(semconv.AttributeServiceName) - if ok { - svc = svcAtt.Str() - } - - for j := 0; j < ssLen; j++ { - ss := rs.ScopeSpans().At(j) - spanLen := ss.Spans().Len() - - lastTraceID := "" - for k := 0; k < spanLen; k++ { - span := ss.Spans().At(k) - traceID := span.TraceID().String() - - if p.cfg.Spans { - keyValues := append(p.spanKeyVals(span), p.processKeyVals(rs.Resource(), svc)...) - p.exportToLogsInstance(typeSpan, traceID, p.spanLabels(keyValues), keyValues...) - } - - if p.cfg.Roots && span.ParentSpanID().IsEmpty() { - keyValues := append(p.spanKeyVals(span), p.processKeyVals(rs.Resource(), svc)...) - p.exportToLogsInstance(typeRoot, traceID, p.spanLabels(keyValues), keyValues...) - } - - if p.cfg.Processes && lastTraceID != traceID { - lastTraceID = traceID - keyValues := p.processKeyVals(rs.Resource(), svc) - p.exportToLogsInstance(typeProcess, traceID, p.spanLabels(keyValues), keyValues...) - } - } - } - } - - return p.nextConsumer.ConsumeTraces(ctx, td) -} - -func (p *automaticLoggingProcessor) spanLabels(keyValues []interface{}) model.LabelSet { - if len(keyValues) == 0 { - return model.LabelSet{} - } - ls := make(map[model.LabelName]model.LabelValue, len(keyValues)/2) - var ( - k, v string - ok bool - ) - for i := 0; i < len(keyValues); i += 2 { - if k, ok = keyValues[i].(string); !ok { - // Should never happen, all keys are strings - level.Error(p.logger).Log("msg", "error casting label key to string", "key", keyValues[i]) - continue - } - // Try to cast value to string - if v, ok = keyValues[i+1].(string); !ok { - // If it's not a string, format it to its string representation - v = fmt.Sprintf("%v", keyValues[i+1]) - } - if _, ok := p.labels[k]; ok { - // Loki does not accept "." as a valid character for labels - // Dots . are replaced by underscores _ - k = util.SanitizeLabelName(k) - - ls[model.LabelName(k)] = model.LabelValue(v) - } - } - return ls + return nil } func (p *automaticLoggingProcessor) Capabilities() consumer.Capabilities { @@ -184,16 +98,8 @@ func (p *automaticLoggingProcessor) Capabilities() consumer.Capabilities { // Start is invoked during service startup. func (p *automaticLoggingProcessor) Start(ctx context.Context, _ component.Host) error { - if !p.logToStdout { - logs, ok := ctx.Value(contextkeys.Logs).(*logs.Logs) - if !ok { - return fmt.Errorf("key does not contain a logs instance") - } - p.logsInstance = logs.Instance(p.cfg.LogsName) - if p.logsInstance == nil { - return fmt.Errorf("logs instance %s not found", p.cfg.LogsName) - } - } + // NOTE(rfratto): automaticloggingprocesor only exists for config conversions + // so we don't need any logic here. return nil } @@ -204,109 +110,6 @@ func (p *automaticLoggingProcessor) Shutdown(context.Context) error { return nil } -func (p *automaticLoggingProcessor) processKeyVals(resource pcommon.Resource, svc string) []interface{} { - atts := make([]interface{}, 0, 2) // 2 for service name - rsAtts := resource.Attributes() - - // Add an attribute with the service name - atts = append(atts, p.cfg.Overrides.ServiceKey) - atts = append(atts, svc) - - for _, name := range p.cfg.ProcessAttributes { - att, ok := rsAtts.Get(name) - if ok { - // name/key val pairs - atts = append(atts, name) - atts = append(atts, attributeValue(att)) - } - } - - return atts -} - -func (p *automaticLoggingProcessor) spanKeyVals(span ptrace.Span) []interface{} { - atts := make([]interface{}, 0, 8) // 8 for name, duration, service name and status - - atts = append(atts, p.cfg.Overrides.SpanNameKey) - atts = append(atts, span.Name()) - - atts = append(atts, p.cfg.Overrides.DurationKey) - atts = append(atts, spanDuration(span)) - - // Skip STATUS_CODE_UNSET to be less spammy - if span.Status().Code() != ptrace.StatusCodeUnset { - atts = append(atts, p.cfg.Overrides.StatusKey) - atts = append(atts, span.Status().Code()) - } - - for _, name := range p.cfg.SpanAttributes { - att, ok := span.Attributes().Get(name) - if ok { - atts = append(atts, name) - atts = append(atts, attributeValue(att)) - } - } - - return atts -} - -func (p *automaticLoggingProcessor) exportToLogsInstance(kind string, traceID string, labels model.LabelSet, keyvals ...interface{}) { - if p.done.Load() { - return - } - - keyvals = append(keyvals, []interface{}{p.cfg.Overrides.TraceIDKey, traceID}...) - line, err := logfmt.MarshalKeyvals(keyvals...) - if err != nil { - level.Warn(p.logger).Log("msg", "unable to marshal keyvals", "err", err) - return - } - - // if we're logging to stdout, log and bail - if p.logToStdout { - level.Info(p.logger).Log(keyvals...) - return - } - - // Add logs instance label - labels[model.LabelName(p.cfg.Overrides.LogsTag)] = model.LabelValue(kind) - - sent := p.logsInstance.SendEntry(api.Entry{ - Labels: labels, - Entry: logproto.Entry{ - Timestamp: time.Now(), - Line: string(line), - }, - }, p.cfg.Timeout) - - if !sent { - level.Warn(p.logger).Log("msg", "failed to autolog to logs pipeline", "kind", kind, "traceid", traceID) - } -} - -func spanDuration(span ptrace.Span) string { - dur := int64(span.EndTimestamp() - span.StartTimestamp()) - return strconv.FormatInt(dur, 10) + "ns" -} - -func attributeValue(att pcommon.Value) interface{} { - switch att.Type() { - case pcommon.ValueTypeStr: - return att.Str() - case pcommon.ValueTypeInt: - return att.Int() - case pcommon.ValueTypeDouble: - return att.Double() - case pcommon.ValueTypeBool: - return att.Bool() - case pcommon.ValueTypeMap: - return att.Map() - case pcommon.ValueTypeSlice: - return att.Slice() - } - return nil -} - func override(cfgValue string, defaultValue string) string { if cfgValue == "" { return defaultValue diff --git a/internal/static/traces/automaticloggingprocessor/automaticloggingprocessor_test.go b/internal/static/traces/automaticloggingprocessor/automaticloggingprocessor_test.go index 310a09c0ad..b02b7ba7f9 100644 --- a/internal/static/traces/automaticloggingprocessor/automaticloggingprocessor_test.go +++ b/internal/static/traces/automaticloggingprocessor/automaticloggingprocessor_test.go @@ -3,174 +3,14 @@ package automaticloggingprocessor import ( "context" "testing" - "time" "github.com/grafana/agent/internal/static/logs" "github.com/grafana/agent/internal/util" - "github.com/prometheus/common/model" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.opentelemetry.io/collector/component/componenttest" - "go.opentelemetry.io/collector/pdata/pcommon" - "go.opentelemetry.io/collector/pdata/ptrace" "gopkg.in/yaml.v3" ) -func TestSpanKeyVals(t *testing.T) { - tests := []struct { - spanName string - spanAttrs map[string]interface{} - spanStart time.Time - spanEnd time.Time - cfg AutomaticLoggingConfig - expected []interface{} - }{ - { - expected: []interface{}{ - "span", "", - "dur", "0ns", - "status", ptrace.StatusCode(1), - }, - }, - { - spanName: "test", - expected: []interface{}{ - "span", "test", - "dur", "0ns", - "status", ptrace.StatusCode(1), - }, - }, - { - expected: []interface{}{ - "span", "", - "dur", "0ns", - "status", ptrace.StatusCode(1), - }, - }, - { - spanStart: time.Unix(0, 0), - spanEnd: time.Unix(0, 10), - expected: []interface{}{ - "span", "", - "dur", "10ns", - "status", ptrace.StatusCode(1), - }, - }, - { - spanStart: time.Unix(0, 10), - spanEnd: time.Unix(0, 100), - expected: []interface{}{ - "span", "", - "dur", "90ns", - "status", ptrace.StatusCode(1), - }, - }, - { - spanAttrs: map[string]interface{}{ - "xstr": "test", - }, - expected: []interface{}{ - "span", "", - "dur", "0ns", - "status", ptrace.StatusCode(1), - }, - }, - { - spanAttrs: map[string]interface{}{ - "xstr": "test", - }, - cfg: AutomaticLoggingConfig{ - SpanAttributes: []string{"xstr"}, - }, - expected: []interface{}{ - "span", "", - "dur", "0ns", - "status", ptrace.StatusCode(1), - "xstr", "test", - }, - }, - { - cfg: AutomaticLoggingConfig{ - Overrides: OverrideConfig{ - SpanNameKey: "a", - DurationKey: "c", - StatusKey: "d", - }, - }, - expected: []interface{}{ - "a", "", - "c", "0ns", - "d", ptrace.StatusCode(1), - }, - }, - } - - for _, tc := range tests { - tc.cfg.Backend = BackendStdout - tc.cfg.Spans = true - p, err := newTraceProcessor(&automaticLoggingProcessor{}, &tc.cfg) - require.NoError(t, err) - - span := ptrace.NewSpan() - span.SetName(tc.spanName) - span.Attributes().FromRaw(tc.spanAttrs) - span.SetStartTimestamp(pcommon.NewTimestampFromTime(tc.spanStart)) - span.SetEndTimestamp(pcommon.NewTimestampFromTime(tc.spanEnd)) - span.Status().SetCode(ptrace.StatusCodeOk) - - actual := p.(*automaticLoggingProcessor).spanKeyVals(span) - assert.Equal(t, tc.expected, actual) - } -} - -func TestProcessKeyVals(t *testing.T) { - tests := []struct { - processAttrs map[string]interface{} - svc string - cfg AutomaticLoggingConfig - expected []interface{} - }{ - { - expected: []interface{}{ - "svc", "", - }, - }, - { - processAttrs: map[string]interface{}{ - "xstr": "test", - }, - expected: []interface{}{ - "svc", "", - }, - }, - { - processAttrs: map[string]interface{}{ - "xstr": "test", - }, - cfg: AutomaticLoggingConfig{ - ProcessAttributes: []string{"xstr"}, - }, - expected: []interface{}{ - "svc", "", - "xstr", "test", - }, - }, - } - - for _, tc := range tests { - tc.cfg.Backend = BackendStdout - tc.cfg.Spans = true - p, err := newTraceProcessor(&automaticLoggingProcessor{}, &tc.cfg) - require.NoError(t, err) - - process := pcommon.NewResource() - process.Attributes().FromRaw(tc.processAttrs) - - actual := p.(*automaticLoggingProcessor).processKeyVals(process, tc.svc) - assert.Equal(t, tc.expected, actual) - } -} - func TestBadConfigs(t *testing.T) { tests := []struct { cfg *AutomaticLoggingConfig @@ -276,81 +116,3 @@ func TestLokiNameMigration(t *testing.T) { require.NoError(t, err) require.YAMLEq(t, expect, string(bb)) } - -func TestLabels(t *testing.T) { - tests := []struct { - name string - labels []string - keyValues []interface{} - expectedLabels model.LabelSet - }{ - { - name: "happy case", - labels: []string{"loki", "svc"}, - keyValues: []interface{}{"loki", "loki", "svc", "gateway", "duration", "1s"}, - expectedLabels: map[model.LabelName]model.LabelValue{ - "loki": "loki", - "svc": "gateway", - }, - }, - { - name: "happy case with dots", - labels: []string{"loki", "service.name"}, - keyValues: []interface{}{"loki", "loki", "service.name", "gateway", "duration", "1s"}, - expectedLabels: map[model.LabelName]model.LabelValue{ - "loki": "loki", - "service_name": "gateway", - }, - }, - { - name: "no labels", - labels: []string{}, - keyValues: []interface{}{"loki", "loki", "svc", "gateway", "duration", "1s"}, - expectedLabels: map[model.LabelName]model.LabelValue{}, - }, - { - name: "label not present in keyValues", - labels: []string{"loki", "svc"}, - keyValues: []interface{}{"loki", "loki", "duration", "1s"}, - expectedLabels: map[model.LabelName]model.LabelValue{ - "loki": "loki", - }, - }, - { - name: "label value is not type string", - labels: []string{"loki"}, - keyValues: []interface{}{"loki", 42, "duration", "1s"}, - expectedLabels: map[model.LabelName]model.LabelValue{ - "loki": "42", - }, - }, - { - name: "stringifies value if possible", - labels: []string{"status"}, - keyValues: []interface{}{"status", ptrace.StatusCode(1)}, - expectedLabels: map[model.LabelName]model.LabelValue{ - "status": model.LabelValue(ptrace.StatusCode(1).String()), - }, - }, - { - name: "no keyValues", - labels: []string{"status"}, - keyValues: []interface{}{}, - expectedLabels: map[model.LabelName]model.LabelValue{}, - }, - } - - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - cfg := &AutomaticLoggingConfig{ - Spans: true, - Labels: tc.labels, - } - p, err := newTraceProcessor(&automaticLoggingProcessor{}, cfg) - require.NoError(t, err) - - ls := p.(*automaticLoggingProcessor).spanLabels(tc.keyValues) - assert.Equal(t, tc.expectedLabels, ls) - }) - } -} diff --git a/internal/static/traces/instance.go b/internal/static/traces/instance.go deleted file mode 100644 index 0c2e3fcb19..0000000000 --- a/internal/static/traces/instance.go +++ /dev/null @@ -1,194 +0,0 @@ -package traces - -import ( - "context" - "fmt" - "sync" - "time" - - "go.opentelemetry.io/collector/component" - "go.opentelemetry.io/collector/connector" - otelexporter "go.opentelemetry.io/collector/exporter" - "go.opentelemetry.io/collector/extension" - "go.opentelemetry.io/collector/otelcol" - "go.opentelemetry.io/collector/processor" - "go.opentelemetry.io/collector/receiver" - "go.opentelemetry.io/collector/service" - "go.uber.org/zap" - "go.uber.org/zap/zapcore" - - "github.com/grafana/agent/internal/build" - "github.com/grafana/agent/internal/static/logs" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/static/traces/automaticloggingprocessor" - "github.com/grafana/agent/internal/static/traces/contextkeys" - "github.com/grafana/agent/internal/static/traces/servicegraphprocessor" - "github.com/grafana/agent/internal/static/traces/traceutils" - "github.com/grafana/agent/internal/util" - prom_client "github.com/prometheus/client_golang/prometheus" - "go.opentelemetry.io/otel/trace/noop" -) - -// Instance wraps the OpenTelemetry collector to enable tracing pipelines -type Instance struct { - mut sync.Mutex - cfg InstanceConfig - logger *zap.Logger - - factories otelcol.Factories - service *service.Service -} - -// NewInstance creates and starts an instance of tracing pipelines. -func NewInstance(logsSubsystem *logs.Logs, reg prom_client.Registerer, cfg InstanceConfig, logger *zap.Logger, promInstanceManager instance.Manager) (*Instance, error) { - instance := &Instance{} - instance.logger = logger - - if err := instance.ApplyConfig(logsSubsystem, promInstanceManager, reg, cfg); err != nil { - return nil, err - } - return instance, nil -} - -// ApplyConfig updates the configuration of the Instance. -func (i *Instance) ApplyConfig(logsSubsystem *logs.Logs, promInstanceManager instance.Manager, reg prom_client.Registerer, cfg InstanceConfig) error { - i.mut.Lock() - defer i.mut.Unlock() - - if util.CompareYAML(cfg, i.cfg) { - // No config change - return nil - } - i.cfg = cfg - - // Shut down any existing pipeline - i.stop() - - err := i.buildAndStartPipeline(context.Background(), cfg, logsSubsystem, promInstanceManager, reg) - if err != nil { - return fmt.Errorf("failed to create pipeline: %w", err) - } - - return nil -} - -// Stop stops the OpenTelemetry collector subsystem -func (i *Instance) Stop() { - i.mut.Lock() - defer i.mut.Unlock() - - i.stop() -} - -func (i *Instance) stop() { - shutdownCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second) - defer cancel() - - if i.service != nil { - err := i.service.Shutdown(shutdownCtx) - if err != nil { - i.logger.Error("failed to stop Otel service", zap.Error(err)) - } - } -} - -func (i *Instance) buildAndStartPipeline(ctx context.Context, cfg InstanceConfig, logs *logs.Logs, instManager instance.Manager, reg prom_client.Registerer) error { - // create component factories - otelConfig, err := cfg.OtelConfig() - if err != nil { - return fmt.Errorf("failed to load otelConfig from agent traces config: %w", err) - } - for _, rw := range cfg.RemoteWrite { - if rw.InsecureSkipVerify { - i.logger.Warn("Configuring TLS with insecure_skip_verify. Use tls_config.insecure_skip_verify instead") - } - if rw.TLSConfig != nil && rw.TLSConfig.ServerName != "" { - i.logger.Warn("Configuring unsupported tls_config.server_name") - } - } - - if cfg.SpanMetrics != nil && len(cfg.SpanMetrics.MetricsInstance) != 0 { - ctx = context.WithValue(ctx, contextkeys.Metrics, instManager) - } - - if cfg.LoadBalancing == nil && (cfg.TailSampling != nil || cfg.ServiceGraphs != nil) { - i.logger.Warn("Configuring tail_sampling and/or service_graphs without load_balancing." + - "Load balancing via trace ID is required for those features to work properly in multi agent deployments") - } - - if cfg.LoadBalancing == nil && cfg.SpanMetrics != nil { - i.logger.Warn("Configuring spanmetrics without load_balancing." + - "Load balancing via service name is required for spanmetrics to work properly in multi agent deployments") - } - - if cfg.AutomaticLogging != nil && cfg.AutomaticLogging.Backend != automaticloggingprocessor.BackendStdout { - ctx = context.WithValue(ctx, contextkeys.Logs, logs) - } - - factories, err := tracingFactories() - if err != nil { - return fmt.Errorf("failed to load tracing factories: %w", err) - } - i.factories = factories - - appinfo := component.BuildInfo{ - Command: "agent", - Description: "agent", - Version: build.Version, - } - - err = util.SetupStaticModeOtelFeatureGates() - if err != nil { - return err - } - - promExporter, err := traceutils.PrometheusExporter(reg) - if err != nil { - return fmt.Errorf("error creating otel prometheus exporter: %w", err) - } - - i.service, err = service.New(ctx, service.Settings{ - BuildInfo: appinfo, - Receivers: receiver.NewBuilder(otelConfig.Receivers, i.factories.Receivers), - Processors: processor.NewBuilder(otelConfig.Processors, i.factories.Processors), - Exporters: otelexporter.NewBuilder(otelConfig.Exporters, i.factories.Exporters), - Connectors: connector.NewBuilder(otelConfig.Connectors, i.factories.Connectors), - Extensions: extension.NewBuilder(otelConfig.Extensions, i.factories.Extensions), - OtelMetricViews: servicegraphprocessor.OtelMetricViews(), - OtelMetricReader: promExporter, - DisableProcessMetrics: true, - UseExternalMetricsServer: true, - TracerProvider: noop.NewTracerProvider(), - //TODO: Plug in an AsyncErrorChannel to shut down the Agent in case of a fatal event - LoggingOptions: []zap.Option{ - zap.WrapCore(func(zapcore.Core) zapcore.Core { - return i.logger.Core() - }), - }, - }, otelConfig.Service) - if err != nil { - return fmt.Errorf("failed to create Otel service: %w", err) - } - - err = i.service.Start(ctx) - if err != nil { - return fmt.Errorf("failed to start Otel service: %w", err) - } - - return err -} - -// ReportFatalError implements component.Host -func (i *Instance) ReportFatalError(err error) { - i.logger.Error("fatal error reported", zap.Error(err)) -} - -// GetFactory implements component.Host -func (i *Instance) GetFactory(kind component.Kind, componentType component.Type) component.Factory { - switch kind { - case component.KindReceiver: - return i.factories.Receivers[componentType] - default: - return nil - } -} diff --git a/internal/static/traces/remotewriteexporter/exporter.go b/internal/static/traces/remotewriteexporter/exporter.go index 5f99af577a..cec6e77d56 100644 --- a/internal/static/traces/remotewriteexporter/exporter.go +++ b/internal/static/traces/remotewriteexporter/exporter.go @@ -2,115 +2,28 @@ package remotewriteexporter import ( "context" - "fmt" - "strconv" - "strings" - "sync" - "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/static/traces/contextkeys" - util "github.com/grafana/agent/internal/util/log" - "github.com/prometheus/prometheus/model/labels" "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/consumer" "go.opentelemetry.io/collector/exporter" - "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" ) -const ( - nameLabelKey = "__name__" - sumSuffix = "sum" - countSuffix = "count" - bucketSuffix = "bucket" - leStr = "le" - infBucket = "+Inf" - noSuffix = "" -) - -type datapoint struct { - ts int64 - v float64 - l labels.Labels -} - -type remoteWriteExporter struct { - mtx sync.Mutex - - close chan struct{} - closed chan struct{} - - manager instance.Manager - promInstance string - - constLabels labels.Labels - namespace string - - seriesMap map[uint64]*datapoint - staleTime int64 - lastFlush int64 - loopInterval time.Duration - - logger log.Logger -} +type remoteWriteExporter struct{} func newRemoteWriteExporter(cfg *Config) (exporter.Metrics, error) { - logger := log.With(util.Logger, "component", "traces remote write exporter") - - ls := make(labels.Labels, 0, len(cfg.ConstLabels)) - - for name, value := range cfg.ConstLabels { - ls = append(ls, labels.Label{Name: name, Value: value}) - } - - staleTime := (15 * time.Minute).Milliseconds() - if cfg.StaleTime > 0 { - staleTime = cfg.StaleTime.Milliseconds() - } - - loopInterval := time.Second - if cfg.LoopInterval > 0 { - loopInterval = cfg.LoopInterval - } - - return &remoteWriteExporter{ - mtx: sync.Mutex{}, - close: make(chan struct{}), - closed: make(chan struct{}), - constLabels: ls, - namespace: cfg.Namespace, - promInstance: cfg.PromInstance, - seriesMap: make(map[uint64]*datapoint), - staleTime: staleTime, - loopInterval: loopInterval, - logger: logger, - }, nil + // NOTE(rfratto): remotewriteexporter has been kept for config conversions, + // but is never used, so the implementation of the component has been + // removed. + return &remoteWriteExporter{}, nil } func (e *remoteWriteExporter) Start(ctx context.Context, _ component.Host) error { - manager, ok := ctx.Value(contextkeys.Metrics).(instance.Manager) - if !ok || manager == nil { - return fmt.Errorf("key does not contain a InstanceManager instance") - } - e.manager = manager - - go e.appenderLoop() - return nil } func (e *remoteWriteExporter) Shutdown(ctx context.Context) error { - close(e.close) - - select { - case <-e.closed: - return nil - case <-ctx.Done(): - return ctx.Err() - } + return nil } func (e *remoteWriteExporter) Capabilities() consumer.Capabilities { @@ -118,202 +31,5 @@ func (e *remoteWriteExporter) Capabilities() consumer.Capabilities { } func (e *remoteWriteExporter) ConsumeMetrics(ctx context.Context, md pmetric.Metrics) error { - select { - case <-e.closed: - return nil - default: - } - - resourceMetrics := md.ResourceMetrics() - for i := 0; i < resourceMetrics.Len(); i++ { - resourceMetric := resourceMetrics.At(i) - scopeMetricsSlice := resourceMetric.ScopeMetrics() - for j := 0; j < scopeMetricsSlice.Len(); j++ { - metricSlice := scopeMetricsSlice.At(j).Metrics() - for k := 0; k < metricSlice.Len(); k++ { - switch metric := metricSlice.At(k); metric.Type() { - case pmetric.MetricTypeGauge: - dataPoints := metric.Sum().DataPoints() - if err := e.handleNumberDataPoints(metric.Name(), dataPoints); err != nil { - return err - } - case pmetric.MetricTypeSum: - if metric.Sum().AggregationTemporality() != pmetric.AggregationTemporalityCumulative { - continue // Only cumulative metrics are supported - } - dataPoints := metric.Sum().DataPoints() - if err := e.handleNumberDataPoints(metric.Name(), dataPoints); err != nil { - return err - } - case pmetric.MetricTypeHistogram: - if metric.Histogram().AggregationTemporality() != pmetric.AggregationTemporalityCumulative { - continue // Only cumulative metrics are supported - } - dataPoints := metric.Histogram().DataPoints() - e.handleHistogramDataPoints(metric.Name(), dataPoints) - case pmetric.MetricTypeSummary: - return fmt.Errorf("unsupported metric data type %s", metric.Type()) - default: - return fmt.Errorf("unsupported metric data type %s", metric.Type()) - } - } - } - } - - return nil -} - -func (e *remoteWriteExporter) handleNumberDataPoints(name string, dataPoints pmetric.NumberDataPointSlice) error { - for ix := 0; ix < dataPoints.Len(); ix++ { - dataPoint := dataPoints.At(ix) - lbls := e.createLabelSet(name, noSuffix, dataPoint.Attributes(), labels.Labels{}) - if err := e.appendNumberDataPoint(dataPoint, lbls); err != nil { - return fmt.Errorf("failed to process datapoints %s", err) - } - } - return nil -} - -func (e *remoteWriteExporter) appendNumberDataPoint(dataPoint pmetric.NumberDataPoint, labels labels.Labels) error { - var val float64 - switch dataPoint.ValueType() { - case pmetric.NumberDataPointValueTypeDouble: - val = dataPoint.DoubleValue() - case pmetric.NumberDataPointValueTypeInt: - val = float64(dataPoint.IntValue()) - default: - return fmt.Errorf("unknown data point type: %s", dataPoint.ValueType()) - } - ts := e.timestamp() - - e.appendDatapointForSeries(labels, ts, val) - return nil } - -func (e *remoteWriteExporter) handleHistogramDataPoints(name string, dataPoints pmetric.HistogramDataPointSlice) { - for ix := 0; ix < dataPoints.Len(); ix++ { - dataPoint := dataPoints.At(ix) - ts := e.timestamp() - - // Append sum value - sumLabels := e.createLabelSet(name, sumSuffix, dataPoint.Attributes(), labels.Labels{}) - e.appendDatapointForSeries(sumLabels, ts, dataPoint.Sum()) - - // Append count value - countLabels := e.createLabelSet(name, countSuffix, dataPoint.Attributes(), labels.Labels{}) - e.appendDatapointForSeries(countLabels, ts, float64(dataPoint.Count())) - - var cumulativeCount uint64 - for ix := 0; ix < dataPoint.ExplicitBounds().Len(); ix++ { - eb := dataPoint.ExplicitBounds().At(ix) - - if ix >= dataPoint.BucketCounts().Len() { - break - } - cumulativeCount += dataPoint.BucketCounts().At(ix) - boundStr := strconv.FormatFloat(eb, 'f', -1, 64) - bucketLabels := e.createLabelSet(name, bucketSuffix, dataPoint.Attributes(), labels.Labels{{Name: leStr, Value: boundStr}}) - e.appendDatapointForSeries(bucketLabels, ts, float64(cumulativeCount)) - } - - // add le=+Inf bucket - cumulativeCount += dataPoint.BucketCounts().At(dataPoint.BucketCounts().Len() - 1) - infBucketLabels := e.createLabelSet(name, bucketSuffix, dataPoint.Attributes(), labels.Labels{{Name: leStr, Value: infBucket}}) - e.appendDatapointForSeries(infBucketLabels, ts, float64(cumulativeCount)) - } -} - -func (e *remoteWriteExporter) appendDatapointForSeries(l labels.Labels, ts int64, v float64) { - e.mtx.Lock() - defer e.mtx.Unlock() - - series := l.Hash() - if lastDatapoint, ok := e.seriesMap[series]; ok { - if lastDatapoint.ts >= ts { - return - } - lastDatapoint.ts = ts - lastDatapoint.v = v - return - } - - e.seriesMap[series] = &datapoint{l: l, ts: ts, v: v} -} - -func (e *remoteWriteExporter) appenderLoop() { - t := time.NewTicker(e.loopInterval) - - for { - select { - case <-t.C: - e.mtx.Lock() - inst, err := e.manager.GetInstance(e.promInstance) - if err != nil { - level.Error(e.logger).Log("msg", "failed to get prom instance", "err", err) - continue - } - appender := inst.Appender(context.Background()) - - now := time.Now().UnixMilli() - for _, dp := range e.seriesMap { - // If the datapoint hasn't been updated since the last loop, don't append it - if dp.ts < e.lastFlush { - // If the datapoint is older than now - staleTime, it is stale and gets removed. - if now-dp.ts > e.staleTime { - delete(e.seriesMap, dp.l.Hash()) - } - continue - } - - if _, err := appender.Append(0, dp.l, dp.ts, dp.v); err != nil { - level.Error(e.logger).Log("msg", "failed to append datapoint", "err", err) - } - } - - if err := appender.Commit(); err != nil { - level.Error(e.logger).Log("msg", "failed to commit appender", "err", err) - } - - e.lastFlush = now - e.mtx.Unlock() - - case <-e.close: - close(e.closed) - return - } - } -} - -func (e *remoteWriteExporter) createLabelSet(name, suffix string, labelMap pcommon.Map, customLabels labels.Labels) labels.Labels { - ls := make(labels.Labels, 0, labelMap.Len()+1+len(e.constLabels)+len(customLabels)) - // Labels from spanmetrics processor - labelMap.Range(func(k string, v pcommon.Value) bool { - ls = append(ls, labels.Label{ - Name: strings.Replace(k, ".", "_", -1), - Value: v.Str(), - }) - return true - }) - // Metric name label - ls = append(ls, labels.Label{ - Name: nameLabelKey, - Value: metricName(e.namespace, name, suffix), - }) - // Const labels - ls = append(ls, e.constLabels...) - // Custom labels - ls = append(ls, customLabels...) - return ls -} - -func (e *remoteWriteExporter) timestamp() int64 { - return time.Now().UnixMilli() -} - -func metricName(namespace, metric, suffix string) string { - if len(suffix) != 0 { - return fmt.Sprintf("%s_%s_%s", namespace, metric, suffix) - } - return fmt.Sprintf("%s_%s", namespace, metric) -} diff --git a/internal/static/traces/remotewriteexporter/exporter_test.go b/internal/static/traces/remotewriteexporter/exporter_test.go deleted file mode 100644 index 63025ec120..0000000000 --- a/internal/static/traces/remotewriteexporter/exporter_test.go +++ /dev/null @@ -1,183 +0,0 @@ -package remotewriteexporter - -import ( - "context" - "testing" - "time" - - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/static/traces/contextkeys" - "github.com/prometheus/prometheus/model/exemplar" - "github.com/prometheus/prometheus/model/histogram" - "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/model/metadata" - "github.com/prometheus/prometheus/storage" - "github.com/stretchr/testify/require" - "go.opentelemetry.io/collector/pdata/pcommon" - "go.opentelemetry.io/collector/pdata/pmetric" -) - -const ( - callsMetric = "traces_spanmetrics_calls_total" - sumMetric = "traces_spanmetrics_latency_sum" - countMetric = "traces_spanmetrics_latency_count" - bucketMetric = "traces_spanmetrics_latency_bucket" -) - -func TestRemoteWriteExporter_ConsumeMetrics(t *testing.T) { - var ( - countValue uint64 = 20 - sumValue float64 = 100 - bucketCounts = []uint64{1, 2, 3, 4, 5, 6} - explicitBounds = []float64{1, 2.5, 5, 7.5, 10} - ts = time.Date(2020, 1, 2, 3, 4, 5, 6, time.UTC) - ) - - cfg := Config{ - ConstLabels: nil, - Namespace: "traces", - PromInstance: "traces", - } - exp, err := newRemoteWriteExporter(&cfg) - require.NoError(t, err) - - manager := &mockManager{} - ctx := context.WithValue(context.Background(), contextkeys.Metrics, manager) - require.NoError(t, exp.Start(ctx, nil)) - - metrics := pmetric.NewMetrics() - ilm := metrics.ResourceMetrics().AppendEmpty().ScopeMetrics().AppendEmpty() - ilm.Scope().SetName("spanmetrics") - - // Append sum metric - sm := ilm.Metrics().AppendEmpty() - sm.SetEmptySum() - sm.SetName("spanmetrics_calls_total") - sm.Sum().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) - - sdp := sm.Sum().DataPoints().AppendEmpty() - sdp.SetTimestamp(pcommon.NewTimestampFromTime(ts.UTC())) - sdp.SetDoubleValue(sumValue) - - // Append histogram - hm := ilm.Metrics().AppendEmpty() - hm.SetEmptyHistogram() - hm.SetName("spanmetrics_latency") - hm.Histogram().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) - - hdp := hm.Histogram().DataPoints().AppendEmpty() - hdp.SetTimestamp(pcommon.NewTimestampFromTime(ts.UTC())) - hdp.BucketCounts().FromRaw(bucketCounts) - hdp.ExplicitBounds().FromRaw(explicitBounds) - hdp.SetCount(countValue) - hdp.SetSum(sumValue) - - err = exp.ConsumeMetrics(context.TODO(), metrics) - require.NoError(t, err) - - time.Sleep(5 * time.Second) - - require.NoError(t, exp.Shutdown(context.TODO())) - - // Verify calls - calls := manager.instance.GetAppended(callsMetric) - require.Equal(t, len(calls), 1) - require.Equal(t, calls[0].v, sumValue) - require.Equal(t, calls[0].l, labels.Labels{{Name: nameLabelKey, Value: "traces_spanmetrics_calls_total"}}) - - // Verify _sum - sum := manager.instance.GetAppended(sumMetric) - require.Equal(t, len(sum), 1) - require.Equal(t, sum[0].v, sumValue) - require.Equal(t, sum[0].l, labels.Labels{{Name: nameLabelKey, Value: "traces_spanmetrics_latency_" + sumSuffix}}) - - // Check _count - count := manager.instance.GetAppended(countMetric) - require.Equal(t, len(count), 1) - require.Equal(t, count[0].v, float64(countValue)) - require.Equal(t, count[0].l, labels.Labels{{Name: nameLabelKey, Value: "traces_spanmetrics_latency_" + countSuffix}}) - - // Check _bucket - buckets := manager.instance.GetAppended(bucketMetric) - require.Equal(t, len(buckets), len(bucketCounts)) -} - -type mockManager struct { - instance *mockInstance -} - -func (m *mockManager) GetInstance(string) (instance.ManagedInstance, error) { - if m.instance == nil { - m.instance = &mockInstance{} - } - return m.instance, nil -} - -func (m *mockManager) ListInstances() map[string]instance.ManagedInstance { return nil } - -func (m *mockManager) ListConfigs() map[string]instance.Config { return nil } - -func (m *mockManager) ApplyConfig(_ instance.Config) error { return nil } - -func (m *mockManager) DeleteConfig(_ string) error { return nil } - -func (m *mockManager) Stop() {} - -type mockInstance struct { - instance.NoOpInstance - appender *mockAppender -} - -func (m *mockInstance) Appender(_ context.Context) storage.Appender { - if m.appender == nil { - m.appender = &mockAppender{} - } - return m.appender -} - -func (m *mockInstance) GetAppended(n string) []metric { - return m.appender.GetAppended(n) -} - -type metric struct { - l labels.Labels - t int64 - v float64 -} - -type mockAppender struct { - appendedMetrics []metric -} - -var _ storage.Appender = (*mockAppender)(nil) - -func (a *mockAppender) GetAppended(n string) []metric { - var ms []metric - for _, m := range a.appendedMetrics { - if n == m.l.Get(nameLabelKey) { - ms = append(ms, m) - } - } - return ms -} - -func (a *mockAppender) Append(_ storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) { - a.appendedMetrics = append(a.appendedMetrics, metric{l: l, t: t, v: v}) - return 0, nil -} - -func (a *mockAppender) Commit() error { return nil } - -func (a *mockAppender) Rollback() error { return nil } - -func (a *mockAppender) AppendExemplar(_ storage.SeriesRef, _ labels.Labels, _ exemplar.Exemplar) (storage.SeriesRef, error) { - return 0, nil -} - -func (a *mockAppender) UpdateMetadata(_ storage.SeriesRef, _ labels.Labels, _ metadata.Metadata) (storage.SeriesRef, error) { - return 0, nil -} - -func (a *mockAppender) AppendHistogram(_ storage.SeriesRef, _ labels.Labels, _ int64, _ *histogram.Histogram, _ *histogram.FloatHistogram) (storage.SeriesRef, error) { - return 0, nil -} diff --git a/internal/static/traces/traces.go b/internal/static/traces/traces.go deleted file mode 100644 index 3226e8084d..0000000000 --- a/internal/static/traces/traces.go +++ /dev/null @@ -1,111 +0,0 @@ -package traces - -import ( - "fmt" - "sync" - - "github.com/go-kit/log" - "github.com/grafana/agent/internal/static/logs" - "github.com/grafana/agent/internal/static/metrics/instance" - "github.com/grafana/agent/internal/util/zapadapter" - prom_client "github.com/prometheus/client_golang/prometheus" - "go.uber.org/zap" -) - -// Traces wraps the OpenTelemetry collector to enable tracing pipelines -type Traces struct { - mut sync.Mutex - instances map[string]*Instance - - logger *zap.Logger - reg prom_client.Registerer - - promInstanceManager instance.Manager -} - -// New creates and starts trace collection. -func New(logsSubsystem *logs.Logs, promInstanceManager instance.Manager, reg prom_client.Registerer, cfg Config, l log.Logger) (*Traces, error) { - traces := &Traces{ - instances: make(map[string]*Instance), - logger: newLogger(l), - reg: reg, - promInstanceManager: promInstanceManager, - } - if err := traces.ApplyConfig(logsSubsystem, promInstanceManager, cfg); err != nil { - return nil, err - } - return traces, nil -} - -// Instance is used to retrieve a named Traces instance -func (t *Traces) Instance(name string) *Instance { - t.mut.Lock() - defer t.mut.Unlock() - - return t.instances[name] -} - -// ApplyConfig updates Traces with a new Config. -func (t *Traces) ApplyConfig(logsSubsystem *logs.Logs, promInstanceManager instance.Manager, cfg Config) error { - t.mut.Lock() - defer t.mut.Unlock() - - newInstances := make(map[string]*Instance, len(cfg.Configs)) - - for _, c := range cfg.Configs { - var ( - instReg = prom_client.WrapRegistererWith(prom_client.Labels{"traces_config": c.Name}, t.reg) - ) - - // If an old instance exists, update it and move it to the new map. - if old, ok := t.instances[c.Name]; ok { - err := old.ApplyConfig(logsSubsystem, promInstanceManager, instReg, c) - if err != nil { - return err - } - - newInstances[c.Name] = old - continue - } - - var ( - instLogger = t.logger.With(zap.String("traces_config", c.Name)) - ) - - inst, err := NewInstance(logsSubsystem, instReg, c, instLogger, t.promInstanceManager) - if err != nil { - return fmt.Errorf("failed to create tracing instance %s: %w", c.Name, err) - } - newInstances[c.Name] = inst - } - - // Any instance in l.instances that isn't in newInstances has been removed - // from the config. Stop them before replacing the map. - for key, i := range t.instances { - if _, exist := newInstances[key]; exist { - continue - } - i.Stop() - } - t.instances = newInstances - - return nil -} - -// Stop stops the OpenTelemetry collector subsystem -func (t *Traces) Stop() { - t.mut.Lock() - defer t.mut.Unlock() - - for _, i := range t.instances { - i.Stop() - } -} - -func newLogger(l log.Logger) *zap.Logger { - logger := zapadapter.New(l) - logger = logger.With(zap.String("component", "traces")) - logger.Info("Traces Logger Initialized") - - return logger -} diff --git a/internal/static/traces/traces_test.go b/internal/static/traces/traces_test.go deleted file mode 100644 index 5fc3fa4d88..0000000000 --- a/internal/static/traces/traces_test.go +++ /dev/null @@ -1,193 +0,0 @@ -package traces - -import ( - "fmt" - "strings" - "testing" - "time" - - "github.com/grafana/agent/internal/static/server" - "github.com/grafana/agent/internal/static/traces/traceutils" - "github.com/grafana/agent/internal/util" - "github.com/grafana/dskit/log" - "github.com/opentracing/opentracing-go" - "github.com/prometheus/client_golang/prometheus" - "github.com/stretchr/testify/require" - jaegercfg "github.com/uber/jaeger-client-go/config" - "go.opentelemetry.io/collector/pdata/ptrace" - "gopkg.in/yaml.v2" -) - -func TestTraces(t *testing.T) { - tracesCh := make(chan ptrace.Traces) - tracesAddr := traceutils.NewTestServer(t, func(t ptrace.Traces) { - tracesCh <- t - }) - - tracesCfgText := util.Untab(fmt.Sprintf(` -configs: -- name: default - receivers: - jaeger: - protocols: - thrift_compact: - remote_write: - - endpoint: %s - insecure: true - batch: - timeout: 100ms - send_batch_size: 1 - `, tracesAddr)) - - var cfg Config - dec := yaml.NewDecoder(strings.NewReader(tracesCfgText)) - dec.SetStrict(true) - err := dec.Decode(&cfg) - require.NoError(t, err) - - var loggingLevel log.Level - require.NoError(t, loggingLevel.Set("debug")) - - traces, err := New(nil, nil, prometheus.NewRegistry(), cfg, &server.HookLogger{}) - require.NoError(t, err) - t.Cleanup(traces.Stop) - - tr := testJaegerTracer(t) - span := tr.StartSpan("test-span") - span.Finish() - - select { - case <-time.After(30 * time.Second): - require.Fail(t, "failed to receive a span after 30 seconds") - case tr := <-tracesCh: - require.Equal(t, 1, tr.SpanCount()) - // Nothing to do, send succeeded. - } -} - -func TestTraceWithSpanmetricsConfig(t *testing.T) { - tracesCfgText := util.Untab(` -configs: -- name: test - receivers: - zipkin: - endpoint: 0.0.0.0:9999 - remote_write: - - endpoint: 0.0.0.0:5555 - insecure: false - tls_config: - insecure_skip_verify: true - spanmetrics: - handler_endpoint: 0.0.0.0:9090 - const_labels: - key1: "value1" - key2: "value2" - `) - - var cfg Config - dec := yaml.NewDecoder(strings.NewReader(tracesCfgText)) - dec.SetStrict(true) - err := dec.Decode(&cfg) - require.NoError(t, err) - - var loggingLevel log.Level - require.NoError(t, loggingLevel.Set("debug")) - - traces, err := New(nil, nil, prometheus.NewRegistry(), cfg, &server.HookLogger{}) - require.NoError(t, err) - t.Cleanup(traces.Stop) -} - -func TestTrace_ApplyConfig(t *testing.T) { - tracesCh := make(chan ptrace.Traces) - tracesAddr := traceutils.NewTestServer(t, func(t ptrace.Traces) { - tracesCh <- t - }) - - tracesCfgText := util.Untab(` -configs: -- name: default - receivers: - jaeger: - protocols: - thrift_compact: - remote_write: - - endpoint: 127.0.0.1:80 # deliberately the wrong endpoint - insecure: true - batch: - timeout: 100ms - send_batch_size: 1 - service_graphs: - enabled: true -`) - - var cfg Config - dec := yaml.NewDecoder(strings.NewReader(tracesCfgText)) - dec.SetStrict(true) - err := dec.Decode(&cfg) - require.NoError(t, err) - - traces, err := New(nil, nil, prometheus.NewRegistry(), cfg, &server.HookLogger{}) - require.NoError(t, err) - t.Cleanup(traces.Stop) - - // Fix the config and apply it before sending spans. - tracesCfgText = util.Untab(fmt.Sprintf(` -configs: -- name: default - receivers: - jaeger: - protocols: - thrift_compact: - remote_write: - - endpoint: %s - insecure: true - batch: - timeout: 100ms - send_batch_size: 1 - `, tracesAddr)) - - var fixedConfig Config - dec = yaml.NewDecoder(strings.NewReader(tracesCfgText)) - dec.SetStrict(true) - err = dec.Decode(&fixedConfig) - require.NoError(t, err) - - err = traces.ApplyConfig(nil, nil, fixedConfig) - require.NoError(t, err) - - tr := testJaegerTracer(t) - span := tr.StartSpan("test-span") - span.Finish() - - select { - case <-time.After(30 * time.Second): - require.Fail(t, "failed to receive a span after 30 seconds") - case tr := <-tracesCh: - require.Equal(t, 1, tr.SpanCount()) - // Nothing to do, send succeeded. - } -} - -func testJaegerTracer(t *testing.T) opentracing.Tracer { - t.Helper() - - jaegerConfig := jaegercfg.Configuration{ - ServiceName: "TestTraces", - Sampler: &jaegercfg.SamplerConfig{ - Type: "const", - Param: 1, - }, - Reporter: &jaegercfg.ReporterConfig{ - LocalAgentHostPort: "127.0.0.1:6831", - LogSpans: true, - }, - } - tr, closer, err := jaegerConfig.NewTracer() - require.NoError(t, err) - t.Cleanup(func() { - require.NoError(t, closer.Close()) - }) - - return tr -} diff --git a/internal/util/log/log.go b/internal/util/log/log.go index 9983946e61..8cd0948d57 100644 --- a/internal/util/log/log.go +++ b/internal/util/log/log.go @@ -7,123 +7,9 @@ package log import ( - "fmt" - "os" - "github.com/go-kit/log" - "github.com/go-kit/log/level" - dskit "github.com/grafana/dskit/log" - "github.com/grafana/dskit/server" - "github.com/prometheus/client_golang/prometheus" ) var ( Logger = log.NewNopLogger() - - logMessages = prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "log_messages_total", - Help: "Total number of log messages.", - }, []string{"level"}) - - supportedLevels = []level.Value{ - level.DebugValue(), - level.InfoValue(), - level.WarnValue(), - level.ErrorValue(), - } ) - -func init() { - prometheus.MustRegister(logMessages) -} - -// InitLogger initialises the global gokit logger (util_log.Logger) and overrides the -// default logger for the server. -func InitLogger(cfg *server.Config) { - l, err := NewPrometheusLogger(cfg.LogLevel, cfg.LogFormat) - if err != nil { - panic(err) - } - - // when use util_log.Logger, skip 3 stack frames. - Logger = log.With(l, "caller", log.Caller(3)) - - // cfg.Log wraps log function, skip 4 stack frames to get caller information. - // this works in go 1.12, but doesn't work in versions earlier. - // it will always shows the wrapper function generated by compiler - // marked in old versions. - cfg.Log = log.With(l, "caller", log.Caller(4)) -} - -// PrometheusLogger exposes Prometheus counters for each of go-kit's log levels. -type PrometheusLogger struct { - logger log.Logger -} - -// NewPrometheusLogger creates a new instance of PrometheusLogger which exposes -// Prometheus counters for various log levels. -func NewPrometheusLogger(l dskit.Level, format string) (log.Logger, error) { - logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) - if format == "json" { - logger = log.NewJSONLogger(log.NewSyncWriter(os.Stderr)) - } - logger = level.NewFilter(logger, LevelFilter(l.String())) - - // Initialise counters for all supported levels: - for _, level := range supportedLevels { - logMessages.WithLabelValues(level.String()) - } - - logger = &PrometheusLogger{ - logger: logger, - } - - // return a Logger without caller information, shouldn't use directly - logger = log.With(logger, "ts", log.DefaultTimestampUTC) - return logger, nil -} - -// Log increments the appropriate Prometheus counter depending on the log level. -func (pl *PrometheusLogger) Log(kv ...interface{}) error { - pl.logger.Log(kv...) - l := "unknown" - for i := 1; i < len(kv); i += 2 { - if v, ok := kv[i].(level.Value); ok { - l = v.String() - break - } - } - logMessages.WithLabelValues(l).Inc() - return nil -} - -// CheckFatal prints an error and exits with error code 1 if err is non-nil -func CheckFatal(location string, err error) { - if err != nil { - logger := level.Error(Logger) - if location != "" { - logger = log.With(logger, "msg", "error "+location) - } - // %+v gets the stack trace from errors using github.com/pkg/errors - logger.Log("err", fmt.Sprintf("%+v", err)) - os.Exit(1) - } -} - -// TODO(dannyk): remove once weaveworks/common updates to go-kit/log -// -// -> we can then revert to using Level.Gokit -func LevelFilter(l string) level.Option { - switch l { - case "debug": - return level.AllowDebug() - case "info": - return level.AllowInfo() - case "warn": - return level.AllowWarn() - case "error": - return level.AllowError() - default: - return level.AllowAll() - } -} diff --git a/internal/util/otel_feature_gate.go b/internal/util/otel_feature_gate.go index d2f4797668..643f1e4773 100644 --- a/internal/util/otel_feature_gate.go +++ b/internal/util/otel_feature_gate.go @@ -7,43 +7,7 @@ import ( _ "go.opentelemetry.io/collector/obsreport" ) -// Enables a set of feature gates in Otel's Global Feature Gate Registry. -func EnableOtelFeatureGates(fgNames ...string) error { - fgReg := featuregate.GlobalRegistry() - - for _, fg := range fgNames { - err := fgReg.Set(fg, true) - if err != nil { - return fmt.Errorf("error setting Otel feature gate: %w", err) - } - } - - return nil -} - var ( - // useOtelForInternalMetrics is required so that the Collector service configures Collector components using the Otel SDK - // instead of OpenCensus. If this is not specified, then the OtelMetricViews and OtelMetricReader parameters which we - // pass to service.New() below will not be taken into account. This would mean that metrics from custom components such as - // the one in pkg/traces/servicegraphprocessor would not work. - // - // disableHighCardinalityMetrics is required so that we don't include labels containing ports and IP addresses in gRPC metrics. - // Example metric with high cardinality... - // rpc_server_duration_bucket{net_sock_peer_addr="127.0.0.1",net_sock_peer_port="59947",rpc_grpc_status_code="0",rpc_method="Export",rpc_service="opentelemetry.proto.collector.trace.v1.TraceService",rpc_system="grpc",traces_config="default",le="7500"} 294 - // ... the same metric when disableHighCardinalityMetrics is switched on looks like this: - // rpc_server_duration_bucket{rpc_grpc_status_code="0",rpc_method="Export",rpc_service="opentelemetry.proto.collector.trace.v1.TraceService",rpc_system="grpc",traces_config="default",le="7500"} 32 - // For more context: - // https://opentelemetry.io/docs/specs/otel/metrics/semantic_conventions/rpc-metrics/ - // https://github.com/open-telemetry/opentelemetry-go-contrib/pull/2700 - // https://github.com/open-telemetry/opentelemetry-collector/pull/6788/files - // - // TODO: Remove "telemetry.useOtelForInternalMetrics" when Collector components - // use OpenTelemetry metrics by default. - staticModeOtelFeatureGates = []string{ - "telemetry.useOtelForInternalMetrics", - "telemetry.disableHighCardinalityMetrics", - } - // Enable the "telemetry.useOtelForInternalMetrics" Collector feature gate. // Currently, Collector components uses OpenCensus metrics by default. // Those metrics cannot be integrated with Agent Flow, @@ -56,12 +20,21 @@ var ( } ) -// Enables a set of feature gates which should always be enabled for Static mode. -func SetupStaticModeOtelFeatureGates() error { - return EnableOtelFeatureGates(staticModeOtelFeatureGates...) -} - // Enables a set of feature gates which should always be enabled for Flow mode. func SetupFlowModeOtelFeatureGates() error { return EnableOtelFeatureGates(flowModeOtelFeatureGates...) } + +// Enables a set of feature gates in Otel's Global Feature Gate Registry. +func EnableOtelFeatureGates(fgNames ...string) error { + fgReg := featuregate.GlobalRegistry() + + for _, fg := range fgNames { + err := fgReg.Set(fg, true) + if err != nil { + return fmt.Errorf("error setting Otel feature gate: %w", err) + } + } + + return nil +} diff --git a/internal/util/otel_feature_gate_test.go b/internal/util/otel_feature_gate_test.go index d4b49ea92c..e3809de8cb 100644 --- a/internal/util/otel_feature_gate_test.go +++ b/internal/util/otel_feature_gate_test.go @@ -15,9 +15,6 @@ func Test_FeatureGates(t *testing.T) { fgSet := make(map[string]struct{}) - for _, fg := range staticModeOtelFeatureGates { - fgSet[fg] = struct{}{} - } for _, fg := range flowModeOtelFeatureGates { fgSet[fg] = struct{}{} } @@ -34,7 +31,6 @@ func Test_FeatureGates(t *testing.T) { require.Falsef(t, g.IsEnabled(), "feature gate %s is enabled - should it be removed from the Agent?", g.ID()) }) - require.NoError(t, SetupStaticModeOtelFeatureGates()) require.NoError(t, SetupFlowModeOtelFeatureGates()) reg.VisitAll(func(g *featuregate.Gate) { diff --git a/internal/util/sanitize.go b/internal/util/sanitize.go deleted file mode 100644 index f47595b3aa..0000000000 --- a/internal/util/sanitize.go +++ /dev/null @@ -1,10 +0,0 @@ -package util - -import "regexp" - -var invalidLabelCharRE = regexp.MustCompile(`[^a-zA-Z0-9_]`) - -// SanitizeLabelName sanitizes a label name for Prometheus. -func SanitizeLabelName(name string) string { - return invalidLabelCharRE.ReplaceAllString(name, "_") -} diff --git a/internal/util/structwalk/structwalk.go b/internal/util/structwalk/structwalk.go deleted file mode 100644 index a1cce56948..0000000000 --- a/internal/util/structwalk/structwalk.go +++ /dev/null @@ -1,77 +0,0 @@ -// Package structwalk allows you to "walk" the hierarchy of a struct. It is -// very similar to github.com/mitchellh/reflectwalk but allows you to change -// the visitor mid-walk. -package structwalk - -import ( - "reflect" - - "github.com/mitchellh/reflectwalk" -) - -// Walk traverses the hierarchy of o in depth-first order. It starts by calling -// v.Visit(o). If the visitor w returned by v.Visit(o) is not nil, Walk is -// invoked recursively with visitor w for each of the structs inside of o, -// followed by a call to w.Visit(nil). -// -// o must be non-nil. -func Walk(v Visitor, o interface{}) { - sw := structWalker{v: v} - _ = reflectwalk.Walk(o, &sw) -} - -// Visitor will have its Visit method invoked for each struct value encountered -// by Walk. If w returned from Visit is non-nil, Walk will then visit each child -// of value with w. The final call after visiting all children will be to -// w.Visit(nil). -type Visitor interface { - Visit(value interface{}) (w Visitor) -} - -type structWalker struct { - cur interface{} - v Visitor -} - -// Struct invoke the Visitor for v and its children. -func (sw *structWalker) Struct(v reflect.Value) error { - // structWalker will walk absolutely all fields, even unexported fields or - // types. We can only interface exported fields, so we need to abort early - // for anything that's not supported. - if !v.CanInterface() { - return nil - } - - // Get the interface to the value. reflectwalk will fully derefernce all - // structs, so if it's possible for us to get address it into a pointer, - // we will use that for visiting. - var ( - rawValue = v.Interface() - ptrValue = rawValue - ) - if v.Kind() != reflect.Ptr && v.CanAddr() { - ptrValue = v.Addr().Interface() - } - - // Struct will recursively call reflectwalk.Walk with a new walker, which - // means that sw.Struct will be called twice for the same value. We want - // to ignore calls to Struct with the same value so we don't recurse - // infinitely. - if sw.cur != nil && reflect.DeepEqual(rawValue, sw.cur) { - return nil - } - - // Visit our struct and create a new walker with the returned Visitor. - w := sw.v.Visit(ptrValue) - if w == nil { - return reflectwalk.SkipEntry - } - _ = reflectwalk.Walk(rawValue, &structWalker{cur: rawValue, v: w}) - w.Visit(nil) - - return reflectwalk.SkipEntry -} - -func (sw *structWalker) StructField(reflect.StructField, reflect.Value) error { - return nil -} diff --git a/internal/util/structwalk/structwalk_test.go b/internal/util/structwalk/structwalk_test.go deleted file mode 100644 index 44d1263f22..0000000000 --- a/internal/util/structwalk/structwalk_test.go +++ /dev/null @@ -1,63 +0,0 @@ -package structwalk - -import ( - "testing" - - "github.com/stretchr/testify/require" -) - -type LevelA struct { - Field1 bool - Field2 string - Field3 int - Nested LevelB -} - -type LevelB struct { - Level1 bool - Level2 string - Field3 int - Nested LevelC -} - -type LevelC struct { - Level1 bool - Level2 string - Field3 int -} - -func TestWalk(t *testing.T) { - var ( - iteration int - fv FuncVisitor - ) - fv = func(val interface{}) Visitor { - iteration++ - - // After visiting all 3 structs, should receive a w.Visit(nil) for each level - if iteration >= 4 { - require.Nil(t, val) - return nil - } - - switch iteration { - case 1: - require.IsType(t, LevelA{}, val) - case 2: - require.IsType(t, LevelB{}, val) - case 3: - require.IsType(t, LevelC{}, val) - default: - require.FailNow(t, "unexpected iteration") - } - - return fv - } - - var val LevelA - Walk(fv, val) -} - -type FuncVisitor func(v interface{}) Visitor - -func (fv FuncVisitor) Visit(v interface{}) Visitor { return fv(v) } diff --git a/internal/util/subset/subset.go b/internal/util/subset/subset.go deleted file mode 100644 index 6f6561b2ed..0000000000 --- a/internal/util/subset/subset.go +++ /dev/null @@ -1,120 +0,0 @@ -// Package subset implements functions to check if one value is a subset of -// another. -package subset - -import ( - "fmt" - "reflect" - - "gopkg.in/yaml.v2" -) - -// Assert checks whether target is a subset of source. source and target must -// be the same type. target is a subset of source when: -// -// - If target and source are slices or arrays, then target must have the same -// number of elements as source. Each element in target must be a subset of -// the corresponding element from source. -// -// - If target and source are maps, each key in source must exist in target. -// The value for each element in target must be a subset of the corresponding -// element from source. -// -// - Otherwise, target and source must be deeply equal. -// -// An instance of Error will be returned when target is not a subset of source. -// -// Subset checking is primarily useful when doing things like YAML assertions, -// where you only want to ensure that a subset of YAML is defined as expected. -func Assert(source, target interface{}) error { - return assert(reflect.ValueOf(source), reflect.ValueOf(target)) -} - -func assert(source, target reflect.Value) error { - // Deference interface/pointers for direct comparison - for canElem(source) { - source = source.Elem() - } - for canElem(target) { - target = target.Elem() - } - - if source.Type() != target.Type() { - return &Error{Message: fmt.Sprintf("type mismatch: %T != %T", source.Interface(), target.Interface())} - } - - switch source.Kind() { - case reflect.Slice, reflect.Array: - if source.Len() != target.Len() { - return &Error{Message: fmt.Sprintf("length mismatch: %d != %d", source.Len(), target.Len())} - } - for i := 0; i < source.Len(); i++ { - if err := assert(source.Index(i), target.Index(i)); err != nil { - return &Error{ - Message: fmt.Sprintf("element %d", i), - Inner: err, - } - } - } - return nil - - case reflect.Map: - iter := source.MapRange() - for iter.Next() { - var ( - sourceElement = iter.Value() - targetElement = target.MapIndex(iter.Key()) - ) - if !targetElement.IsValid() { - return &Error{Message: fmt.Sprintf("missing key %v", iter.Key().Interface())} - } - if err := assert(sourceElement, targetElement); err != nil { - return &Error{ - Message: fmt.Sprintf("%v", iter.Key().Interface()), - Inner: err, - } - } - } - return nil - - default: - if !reflect.DeepEqual(source.Interface(), target.Interface()) { - return &Error{Message: fmt.Sprintf("%v != %v", source, target)} - } - return nil - } -} - -func canElem(v reflect.Value) bool { - return v.Kind() == reflect.Interface || v.Kind() == reflect.Ptr -} - -// Error is a subset assertion error. -type Error struct { - Message string // Message of the error - Inner error // Optional inner error -} - -// Error implements error. -func (e *Error) Error() string { - if e.Inner == nil { - return e.Message - } - return fmt.Sprintf("%s: %s", e.Message, e.Inner) -} - -// Unwrap returns the inner error, if set. -func (e *Error) Unwrap() error { return e.Inner } - -// YAMLAssert is like Assert but accepts YAML bytes as input. -func YAMLAssert(source, target []byte) error { - var sourceValue interface{} - if err := yaml.Unmarshal(source, &sourceValue); err != nil { - return err - } - var targetValue interface{} - if err := yaml.Unmarshal(target, &targetValue); err != nil { - return err - } - return Assert(sourceValue, targetValue) -} diff --git a/internal/util/subset/subset_test.go b/internal/util/subset/subset_test.go deleted file mode 100644 index a44441dd26..0000000000 --- a/internal/util/subset/subset_test.go +++ /dev/null @@ -1,92 +0,0 @@ -package subset - -import ( - "testing" - - "github.com/stretchr/testify/require" -) - -func TestAssert(t *testing.T) { - tt := []struct { - name string - source, target string - expect string - }{ - // Plain values - { - name: "values match", - source: `true`, - target: `true`, - expect: "", - }, - { - name: "values mismatch", - source: `true`, - target: `false`, - expect: "true != false", - }, - { - name: "type mismatch", - source: `true`, - target: `5`, - expect: "type mismatch: bool != int", - }, - - // Arrays - { - name: "arrays match", - source: `[1, 2, 3]`, - target: `[1, 2, 3]`, - expect: "", - }, - { - name: "arrays mismatch", - source: `[1, 2, 3]`, - target: `[1, 2, 4]`, - expect: "element 2: 3 != 4", - }, - { - name: "array element type mismatch", - source: `[1, 2, 3]`, - target: `[1, 2, true]`, - expect: "element 2: type mismatch: int != bool", - }, - - // Maps - { - name: "maps match", - source: `{"hello": "world"}`, - target: `{"hello": "world"}`, - expect: "", - }, - { - name: "maps mismatch", - source: `{"hello": "world", "year": 2000}`, - target: `{"hello": "world", "year": 2001}`, - expect: "year: 2000 != 2001", - }, - { - name: "maps subset", - source: `{"hello": "world"}`, - target: `{"hello": "world", "year": 2001}`, - expect: "", - }, - { - name: "maps type mismatch", - source: `{"hello": "world", "year": 2000}`, - target: `{"hello": "world", "year": "yes"}`, - expect: "year: type mismatch: int != string", - }, - } - - for _, tc := range tt { - t.Run(tc.name, func(t *testing.T) { - err := YAMLAssert([]byte(tc.source), []byte(tc.target)) - if tc.expect == "" { - require.NoError(t, err) - } else { - require.EqualError(t, err, tc.expect) - } - }) - } -} diff --git a/internal/util/unregisterer.go b/internal/util/unregisterer.go deleted file mode 100644 index 822132b017..0000000000 --- a/internal/util/unregisterer.go +++ /dev/null @@ -1,63 +0,0 @@ -package util - -import "github.com/prometheus/client_golang/prometheus" - -// Unregisterer is a Prometheus Registerer that can unregister all collectors -// passed to it. -type Unregisterer struct { - wrap prometheus.Registerer - cs map[prometheus.Collector]struct{} -} - -// WrapWithUnregisterer wraps a prometheus Registerer with capabilities to -// unregister all collectors. -func WrapWithUnregisterer(reg prometheus.Registerer) *Unregisterer { - return &Unregisterer{ - wrap: reg, - cs: make(map[prometheus.Collector]struct{}), - } -} - -// Register implements prometheus.Registerer. -func (u *Unregisterer) Register(c prometheus.Collector) error { - if u.wrap == nil { - return nil - } - - err := u.wrap.Register(c) - if err != nil { - return err - } - u.cs[c] = struct{}{} - return nil -} - -// MustRegister implements prometheus.Registerer. -func (u *Unregisterer) MustRegister(cs ...prometheus.Collector) { - for _, c := range cs { - if err := u.Register(c); err != nil { - panic(err) - } - } -} - -// Unregister implements prometheus.Registerer. -func (u *Unregisterer) Unregister(c prometheus.Collector) bool { - if u.wrap != nil && u.wrap.Unregister(c) { - delete(u.cs, c) - return true - } - return false -} - -// UnregisterAll unregisters all collectors that were registered through the -// Registerer. -func (u *Unregisterer) UnregisterAll() bool { - success := true - for c := range u.cs { - if !u.Unregister(c) { - success = false - } - } - return success -}