diff --git a/examples/self-diagnostics/Cargo.toml b/examples/self-diagnostics/Cargo.toml index 8e8b1cd394..2caea54105 100644 --- a/examples/self-diagnostics/Cargo.toml +++ b/examples/self-diagnostics/Cargo.toml @@ -9,11 +9,7 @@ publish = false opentelemetry = { path = "../../opentelemetry" } opentelemetry_sdk = { path = "../../opentelemetry-sdk", features = ["rt-tokio"]} opentelemetry-stdout = { path = "../../opentelemetry-stdout"} -opentelemetry-appender-tracing = { path = "../../opentelemetry-appender-tracing"} tokio = { workspace = true, features = ["full"] } tracing = { workspace = true, features = ["std"]} tracing-core = { workspace = true } tracing-subscriber = { version = "0.3.18", features = ["env-filter","registry", "std"]} -opentelemetry-otlp = { path = "../../opentelemetry-otlp", features = ["http-proto", "reqwest-client", "logs"] } -once_cell ={ version = "1.19.0"} -ctrlc = "3.4" diff --git a/examples/self-diagnostics/Dockerfile b/examples/self-diagnostics/Dockerfile deleted file mode 100644 index f88c276a55..0000000000 --- a/examples/self-diagnostics/Dockerfile +++ /dev/null @@ -1,6 +0,0 @@ -FROM rust:1.51 -COPY . /usr/src/basic-otlp-http/ -WORKDIR /usr/src/basic-otlp-http/ -RUN cargo build --release -RUN cargo install --path . -CMD ["/usr/local/cargo/bin/basic-otlp-http"] diff --git a/examples/self-diagnostics/README.md b/examples/self-diagnostics/README.md index 393b2b846d..e40bc00702 100644 --- a/examples/self-diagnostics/README.md +++ b/examples/self-diagnostics/README.md @@ -1,93 +1,28 @@ # Basic OpenTelemetry metrics example with custom error handler: -This example shows how to setup the custom error handler for self-diagnostics. - -## Custom Error Handling: - -A custom error handler is set up to capture and record errors using the `tracing` crate's `error!` macro. These errors are then exported to a collector using the `opentelemetry-appender-tracing` crate, which utilizes the OTLP log exporter over `HTTP/protobuf`. As a result, any errors generated by the configured OTLP metrics pipeline are funneled through this custom error handler for proper recording and export. +This example shows how to self-diagnose OpenTelemetry by enabling its internal +logs. OpenTelemetry crates publish internal logs when "internal-logs" feature is +enabled. This feature is enabled by default. Internal logs are published using +`tracing` events, and hence, a `tracing` subscriber must be configured without +which the logs are simply discarded. ## Filtering logs from external dependencies of OTLP Exporter: -The example configures a tracing `filter` to restrict logs from external crates (`hyper`, `tonic`, and `reqwest`) used by the OTLP Exporter to the `error` level. This helps prevent an infinite loop of log generation when these crates emit logs that are picked up by the tracing subscriber. - -## Ensure that the internally generated errors are logged only once: - -By using a hashset to track seen errors, the custom error handler ensures that the same error is not logged multiple times. This is particularly useful for handling scenarios where continuous error logging might occur, such as when the OpenTelemetry collector is not running. - - -## Usage - -### `docker-compose` - -By default runs against the `otel/opentelemetry-collector:latest` image, and uses `reqwest-client` -as the http client, using http as the transport. - -```shell -docker-compose up -``` - -In another terminal run the application `cargo run` - -The docker-compose terminal will display logs, traces, metrics. - -Press Ctrl+C to stop the collector, and then tear it down: - -```shell -docker-compose down -``` - -### Manual - -If you don't want to use `docker-compose`, you can manually run the `otel/opentelemetry-collector` container -and inspect the logs to see traces being transferred. - -On Unix based systems use: - -```shell -# From the current directory, run `opentelemetry-collector` -docker run --rm -it -p 4318:4318 -v $(pwd):/cfg otel/opentelemetry-collector:latest --config=/cfg/otel-collector-config.yaml -``` - -On Windows use: - -```shell -# From the current directory, run `opentelemetry-collector` -docker run --rm -it -p 4318:4318 -v "%cd%":/cfg otel/opentelemetry-collector:latest --config=/cfg/otel-collector-config.yaml -``` - -Run the app which exports logs, metrics and traces via OTLP to the collector - -```shell -cargo run -``` - -### Output: - -- If the docker instance for collector is running, below error should be logged into the container. There won't be any logs from the `hyper`, `reqwest` and `tonic` crates. -``` -otel-collector-1 | 2024-06-05T17:09:46.926Z info LogExporter {"kind": "exporter", "data_type": "logs", "name": "logging", "resource logs": 1, "log records": 1} -otel-collector-1 | 2024-06-05T17:09:46.926Z info ResourceLog #0 -otel-collector-1 | Resource SchemaURL: -otel-collector-1 | Resource attributes: -otel-collector-1 | -> telemetry.sdk.name: Str(opentelemetry) -otel-collector-1 | -> telemetry.sdk.version: Str(0.23.0) -otel-collector-1 | -> telemetry.sdk.language: Str(rust) -otel-collector-1 | -> service.name: Str(unknown_service) -otel-collector-1 | ScopeLogs #0 -otel-collector-1 | ScopeLogs SchemaURL: -otel-collector-1 | InstrumentationScope opentelemetry-appender-tracing 0.4.0 -otel-collector-1 | LogRecord #0 -otel-collector-1 | ObservedTimestamp: 2024-06-05 17:09:45.931951161 +0000 UTC -otel-collector-1 | Timestamp: 1970-01-01 00:00:00 +0000 UTC -otel-collector-1 | SeverityText: ERROR -otel-collector-1 | SeverityNumber: Error(17) -otel-collector-1 | Body: Str(OpenTelemetry metrics error occurred: Metrics error: Warning: Maximum data points for metric stream exceeded. Entry added to overflow. Subsequent overflows to same metric until next collect will not be logged.) -otel-collector-1 | Attributes: -otel-collector-1 | -> name: Str(event examples/self-diagnostics/src/main.rs:42) -otel-collector-1 | Trace ID: -otel-collector-1 | Span ID: -otel-collector-1 | Flags: 0 -otel-collector-1 | {"kind": "exporter", "data_type": "logs", "name": "logging"} -``` - -- The SDK will keep trying to upload metrics at regular intervals if the collector's Docker instance is down. To avoid a logging loop, internal errors like 'Connection refused' will be attempted to be logged only once. +The example configures a tracing `filter` to restrict logs from external crates +(`hyper`, `tonic`, and `reqwest` etc.) used by the OTLP Exporter to the `error` +level. This helps prevent an infinite loop of log generation when these crates +emit logs that are picked up by the tracing subscriber. This is only a +workaround until [the root +issue](https://github.com/open-telemetry/opentelemetry-rust/issues/761) is +resolved. + +## Filtering logs to be send to OpenTelemetry itself + +If you use [OpenTelemetry Tracing +Appender](../../opentelemetry-appender-tracing/README.md) to send `tracing` logs +to OpenTelemetry, then enabling OpenTelemetry internal logs can also cause +infinite, recursive logging. You can filter out all OpenTelemetry internal logs +from being sent to [OpenTelemetry Tracing +Appender](../../opentelemetry-appender-tracing/README.md) using a filter, like +"add_directive("opentelemetry=off".parse().unwrap())" being done for tracing's +`FmtSubscriber`. \ No newline at end of file diff --git a/examples/self-diagnostics/docker-compose.yaml b/examples/self-diagnostics/docker-compose.yaml deleted file mode 100644 index b363c459ea..0000000000 --- a/examples/self-diagnostics/docker-compose.yaml +++ /dev/null @@ -1,11 +0,0 @@ -version: "2" -services: - - # Collector - otel-collector: - image: otel/opentelemetry-collector:latest - command: ["--config=/etc/otel-collector-config.yaml", "${OTELCOL_ARGS}"] - volumes: - - ./otel-collector-config.yaml:/etc/otel-collector-config.yaml - ports: - - "4318:4318" # OTLP HTTP receiver diff --git a/examples/self-diagnostics/otel-collector-config.yaml b/examples/self-diagnostics/otel-collector-config.yaml deleted file mode 100644 index 3b75f515c0..0000000000 --- a/examples/self-diagnostics/otel-collector-config.yaml +++ /dev/null @@ -1,29 +0,0 @@ -# This is a configuration file for the OpenTelemetry Collector intended to be -# used in conjunction with the opentelemetry-otlp example. -# -# For more information about the OpenTelemetry Collector see: -# https://github.com/open-telemetry/opentelemetry-collector -# -receivers: - otlp: - protocols: - grpc: - endpoint: 0.0.0.0:4317 - http: - endpoint: 0.0.0.0:4318 - -exporters: - debug: - verbosity: detailed - -service: - pipelines: - traces: - receivers: [otlp] - exporters: [debug] - metrics: - receivers: [otlp] - exporters: [debug] - logs: - receivers: [otlp] - exporters: [debug] \ No newline at end of file diff --git a/examples/self-diagnostics/src/main.rs b/examples/self-diagnostics/src/main.rs index a16204c3fe..e34ed5f67b 100644 --- a/examples/self-diagnostics/src/main.rs +++ b/examples/self-diagnostics/src/main.rs @@ -1,123 +1,16 @@ -use opentelemetry::global::{self, set_error_handler, Error as OtelError}; +use opentelemetry::global; use opentelemetry::KeyValue; -use opentelemetry_appender_tracing::layer; -use opentelemetry_otlp::{LogExporter, MetricExporter, WithExportConfig}; use opentelemetry_sdk::metrics::PeriodicReader; -use tracing_subscriber::filter::{EnvFilter, LevelFilter}; +use std::error::Error; +use tracing::info; use tracing_subscriber::fmt; use tracing_subscriber::prelude::*; - -use std::error::Error; -use tracing::error; - -use once_cell::sync::Lazy; -use std::collections::HashSet; -use std::sync::{Arc, Mutex}; - -use std::sync::mpsc::channel; - -struct ErrorState { - seen_errors: Mutex>, -} - -impl ErrorState { - fn new() -> Self { - ErrorState { - seen_errors: Mutex::new(HashSet::new()), - } - } - - fn mark_as_seen(&self, err: &OtelError) -> bool { - let mut seen_errors = self.seen_errors.lock().unwrap(); - seen_errors.insert(err.to_string()) - } -} - -static GLOBAL_ERROR_STATE: Lazy> = Lazy::new(|| Arc::new(ErrorState::new())); - -fn custom_error_handler(err: OtelError) { - if GLOBAL_ERROR_STATE.mark_as_seen(&err) { - // log error not already seen - match err { - OtelError::Metric(err) => error!("OpenTelemetry metrics error occurred: {}", err), - OtelError::Trace(err) => error!("OpenTelemetry trace error occurred: {}", err), - OtelError::Log(err) => error!("OpenTelemetry log error occurred: {}", err), - OtelError::Propagation(err) => { - error!("OpenTelemetry propagation error occurred: {}", err) - } - OtelError::Other(err_msg) => error!("OpenTelemetry error occurred: {}", err_msg), - _ => error!("OpenTelemetry error occurred: {:?}", err), - } - } -} - -fn init_logger_provider() -> opentelemetry_sdk::logs::LoggerProvider { - let exporter = LogExporter::builder() - .with_http() - .with_endpoint("http://localhost:4318/v1/logs") - .build() - .unwrap(); - - let provider = opentelemetry_sdk::logs::LoggerProvider::builder() - .with_batch_exporter(exporter, opentelemetry_sdk::runtime::Tokio) - .build(); - - let cloned_provider = provider.clone(); - - // Add a tracing filter to filter events from crates used by opentelemetry-otlp. - // The filter levels are set as follows: - // - Allow `info` level and above by default. - // - Restrict `hyper`, `tonic`, and `reqwest` to `error` level logs only. - // This ensures events generated from these crates within the OTLP Exporter are not looped back, - // thus preventing infinite event generation. - // Note: This will also drop events from these crates used outside the OTLP Exporter. - // For more details, see: https://github.com/open-telemetry/opentelemetry-rust/issues/761 - let filter = EnvFilter::new("info") - .add_directive("hyper=error".parse().unwrap()) - .add_directive("tonic=error".parse().unwrap()) - .add_directive("reqwest=error".parse().unwrap()); - - // Configuring the formatting layer specifically for OpenTelemetry internal logs. - // These logs starts with "opentelemetry" prefix in target. This allows specific logs - // from the OpenTelemetry-related components to be filtered and handled separately - // from the application logs - - let opentelemetry_filter = tracing_subscriber::filter::filter_fn(|metadata| { - metadata.target().starts_with("opentelemetry") - }); - - let fmt_opentelemetry_layer = fmt::layer() - .with_filter(LevelFilter::DEBUG) - .with_filter(opentelemetry_filter); - - // Configures the appender tracing layer, filtering out OpenTelemetry internal logs - // to prevent infinite logging loops. - - let non_opentelemetry_filter = tracing_subscriber::filter::filter_fn(|metadata| { - !metadata.target().starts_with("opentelemetry") - }); - - let otel_layer = layer::OpenTelemetryTracingBridge::new(&cloned_provider) - .with_filter(non_opentelemetry_filter.clone()); - - tracing_subscriber::registry() - .with(fmt_opentelemetry_layer) - .with(fmt::layer().with_filter(filter)) - .with(otel_layer) - .init(); - provider -} +use tracing_subscriber::EnvFilter; fn init_meter_provider() -> opentelemetry_sdk::metrics::SdkMeterProvider { - let exporter = MetricExporter::builder() - .with_http() - .with_endpoint("http://localhost:4318/v1/metrics") - .build() - .unwrap(); + let exporter = opentelemetry_stdout::MetricExporterBuilder::default().build(); - let reader = PeriodicReader::builder(exporter, opentelemetry_sdk::runtime::Tokio) - .with_interval(std::time::Duration::from_secs(1)) - .build(); + let reader = PeriodicReader::builder(exporter, opentelemetry_sdk::runtime::Tokio).build(); let provider = opentelemetry_sdk::metrics::SdkMeterProvider::builder() .with_reader(reader) @@ -130,46 +23,43 @@ fn init_meter_provider() -> opentelemetry_sdk::metrics::SdkMeterProvider { #[tokio::main] async fn main() -> Result<(), Box> { - // Set the custom error handler - if let Err(err) = set_error_handler(custom_error_handler) { - eprintln!("Failed to set custom error handler: {}", err); - } + // OpenTelemetry uses `tracing` crate for its internal logging. Unless a + // tracing subscriber is set, the logs will be discarded. In this example, + // we configure a `tracing` subscriber to: + // 1. Print logs of level INFO or higher to stdout. + // 2. Filter logs from OpenTelemetry's dependencies (like tonic, hyper, + // reqwest etc. which are commonly used by the OTLP exporter) to only print + // ERROR-level logs. This filtering helps reduce repetitive log messages + // that could otherwise create an infinite loop of log output. This is a + // workaround until + // https://github.com/open-telemetry/opentelemetry-rust/issues/761 is + // resolved. + + // Target name used by OpenTelemetry always start with "opentelemetry". + // Hence, one may use "add_directive("opentelemetry=off".parse().unwrap())" + // to turn off all logs from OpenTelemetry. - let logger_provider = init_logger_provider(); + let filter = EnvFilter::new("info") + .add_directive("hyper=error".parse().unwrap()) + .add_directive("tonic=error".parse().unwrap()) + .add_directive("h2=error".parse().unwrap()) + .add_directive("tower=error".parse().unwrap()) + .add_directive("reqwest=error".parse().unwrap()); + tracing_subscriber::registry() + .with(fmt::layer().with_thread_names(true).with_filter(filter)) + .init(); // Initialize the MeterProvider with the stdout Exporter. let meter_provider = init_meter_provider(); + info!("Starting self-diagnostics example"); - // Create a meter from the above MeterProvider. let meter = global::meter("example"); - // Create a Counter Instrument. - let counter = meter.u64_counter("my_counter").build(); - - // Record measurements with unique key-value pairs to exceed the cardinality limit - // of 2000 and trigger error message - for i in 0..3000 { - counter.add( - 10, - &[KeyValue::new( - format!("mykey{}", i), - format!("myvalue{}", i), - )], - ); - } - - let (tx, rx) = channel(); - - ctrlc::set_handler(move || tx.send(()).expect("Could not send signal on channel.")) - .expect("Error setting Ctrl-C handler"); - - println!("Press Ctrl-C to continue..."); - rx.recv().expect("Could not receive from channel."); - println!("Got Ctrl-C, Doing shutdown and existing."); + // Create a counter using an invalid name to trigger + // internal log about the same. + let counter = meter.u64_counter("my_counter with_space").build(); + counter.add(10, &[KeyValue::new("key", "value")]); - // MeterProvider is configured with an OTLP Exporter to export metrics every 1 second, - // however shutting down the MeterProvider here instantly flushes - // the metrics, instead of waiting for the 1 sec interval. meter_provider.shutdown()?; - let _ = logger_provider.shutdown(); + info!("Shutdown complete. Bye!"); Ok(()) } diff --git a/opentelemetry-sdk/src/metrics/periodic_reader.rs b/opentelemetry-sdk/src/metrics/periodic_reader.rs index 03a10a54ee..1cab53edd1 100644 --- a/opentelemetry-sdk/src/metrics/periodic_reader.rs +++ b/opentelemetry-sdk/src/metrics/periodic_reader.rs @@ -131,6 +131,12 @@ where })); }; + otel_debug!( + name: "PeriodicReader.BuildCompleted", + message = "Periodic reader built.", + interval_in_secs = self.interval.as_secs(), + ); + PeriodicReader { exporter: Arc::new(self.exporter), inner: Arc::new(Mutex::new(PeriodicReaderInner { @@ -251,6 +257,10 @@ impl PeriodicReaderWorker { async fn process_message(&mut self, message: Message) -> bool { match message { Message::Export => { + otel_debug!( + name: "PeriodicReader.ExportTriggered", + message = "Export message received.", + ); if let Err(err) = self.collect_and_export().await { otel_error!( name: "PeriodicReader.ExportFailed", @@ -259,16 +269,24 @@ impl PeriodicReaderWorker { } } Message::Flush(ch) => { + otel_debug!( + name: "PeriodicReader.ForceFlushCalled", + message = "Flush message received.", + ); let res = self.collect_and_export().await; if let Err(send_error) = ch.send(res) { otel_debug!( name: "PeriodicReader.Flush.SendResultError", - message = "Failed to send flush result", + message = "Failed to send flush result.", reason = format!("{:?}", send_error), ); } } Message::Shutdown(ch) => { + otel_debug!( + name: "PeriodicReader.ShutdownCalled", + message = "Shutdown message received", + ); let res = self.collect_and_export().await; let _ = self.reader.exporter.shutdown(); if let Err(send_error) = ch.send(res) { diff --git a/opentelemetry/CHANGELOG.md b/opentelemetry/CHANGELOG.md index feb62933ae..2b181cc8ad 100644 --- a/opentelemetry/CHANGELOG.md +++ b/opentelemetry/CHANGELOG.md @@ -33,6 +33,12 @@ let counter = meter.u64_counter("my_counter").build(); - Replaced `global::meter_with_version` with `global::meter_with_scope` - Added `global::tracer_with_scope` +- **Breaking change**: [#2260](https://github.com/open-telemetry/opentelemetry-rust/pull/2260) + - Removed `global::set_error_handler` and `global::handle_error`. + - `global::handle_error` usage inside the opentelemetry crates has been replaced with `global::otel_info`, `otel_warn`, `otel_debug` and `otel_error` macros based on the severity of the internal logs. + - The default behavior of `global::handle_error` was to log the error using `eprintln!`. With otel macro, the internal logs get emitted via `tracing` macros of matching severity. Users now need to configure the `tracing` layer to capture these logs. + - Refer to this PR description for migration guide. Also refer to [self-diagnostics](https://github.com/open-telemetry/opentelemetry-rust/tree/main/examples/self-diagnostics) example on how to configure the tracing layer for internal logs. + ## v0.26.0 Released 2024-Sep-30 diff --git a/opentelemetry/src/global/error_handler.rs b/opentelemetry/src/global/error_handler.rs index 3a717154bf..391fa25f63 100644 --- a/opentelemetry/src/global/error_handler.rs +++ b/opentelemetry/src/global/error_handler.rs @@ -1,5 +1,4 @@ use std::sync::PoisonError; -use std::sync::RwLock; #[cfg(feature = "logs")] use crate::logs::LogError; @@ -8,9 +7,6 @@ use crate::metrics::MetricError; use crate::propagation::PropagationError; #[cfg(feature = "trace")] use crate::trace::TraceError; -use once_cell::sync::Lazy; - -static GLOBAL_ERROR_HANDLER: Lazy>> = Lazy::new(|| RwLock::new(None)); /// Wrapper for error from both tracing and metrics part of open telemetry. #[derive(thiserror::Error, Debug)] @@ -47,40 +43,3 @@ impl From> for Error { Error::Other(err.to_string()) } } - -struct ErrorHandler(Box); - -/// Handle error using the globally configured error handler. -/// -/// Writes to stderr if unset. -pub fn handle_error>(err: T) { - match GLOBAL_ERROR_HANDLER.read() { - Ok(handler) if handler.is_some() => (handler.as_ref().unwrap().0)(err.into()), - _ => match err.into() { - #[cfg(feature = "metrics")] - #[cfg_attr(docsrs, doc(cfg(feature = "metrics")))] - Error::Metric(err) => eprintln!("OpenTelemetry metrics error occurred. {}", err), - #[cfg(feature = "trace")] - #[cfg_attr(docsrs, doc(cfg(feature = "trace")))] - Error::Trace(err) => eprintln!("OpenTelemetry trace error occurred. {}", err), - #[cfg(feature = "logs")] - #[cfg_attr(docsrs, doc(cfg(feature = "logs")))] - Error::Log(err) => eprintln!("OpenTelemetry log error occurred. {}", err), - Error::Propagation(err) => { - eprintln!("OpenTelemetry propagation error occurred. {}", err) - } - Error::Other(err_msg) => eprintln!("OpenTelemetry error occurred. {}", err_msg), - }, - } -} - -/// Set global error handler. -pub fn set_error_handler(f: F) -> std::result::Result<(), Error> -where - F: Fn(Error) + Send + Sync + 'static, -{ - GLOBAL_ERROR_HANDLER - .write() - .map(|mut handler| *handler = Some(ErrorHandler(Box::new(f)))) - .map_err(Into::into) -} diff --git a/opentelemetry/src/global/mod.rs b/opentelemetry/src/global/mod.rs index 4fada1123a..5f27b0e08b 100644 --- a/opentelemetry/src/global/mod.rs +++ b/opentelemetry/src/global/mod.rs @@ -138,8 +138,8 @@ mod metrics; mod propagation; #[cfg(feature = "trace")] mod trace; +pub use error_handler::Error; -pub use error_handler::{handle_error, set_error_handler, Error}; #[cfg(feature = "metrics")] #[cfg_attr(docsrs, doc(cfg(feature = "metrics")))] pub use metrics::*;