Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions quickwit/quickwit-telemetry-exporters/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,17 @@ license.workspace = true
[dependencies]
anyhow = { workspace = true }
metrics = { workspace = true }
metrics-opentelemetry = { workspace = true }
metrics-exporter-prometheus = { workspace = true }
metrics-opentelemetry = { workspace = true }
metrics-util = { workspace = true }
opentelemetry = { workspace = true }
opentelemetry-appender-tracing = { workspace = true }
opentelemetry-otlp = { workspace = true, features = ["experimental-grpc-retry", "experimental-http-retry"] }
opentelemetry_sdk = { workspace = true }
opentelemetry_sdk = { workspace = true, features = [
"experimental_logs_batch_log_processor_with_async_runtime",
"experimental_metrics_periodicreader_with_async_runtime",
"experimental_trace_batch_span_processor_with_async_runtime",
] }
serde_json = { workspace = true }
time = { workspace = true, features = ["parsing"] }
tracing = { workspace = true }
Expand Down
7 changes: 5 additions & 2 deletions quickwit/quickwit-telemetry-exporters/src/otlp/logs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ use anyhow::Context;
use opentelemetry_otlp::{
LogExporter, Protocol as OtlpWireProtocol, WithExportConfig, WithHttpConfig, WithTonicConfig,
};
use opentelemetry_sdk::Resource;
use opentelemetry_sdk::logs::SdkLoggerProvider;
use opentelemetry_sdk::logs::log_processor_with_async_runtime::BatchLogProcessor;
use opentelemetry_sdk::{Resource, runtime};

use crate::otlp::{OtlpExporterConfig, OtlpProtocol};

Expand All @@ -43,14 +44,16 @@ impl OtlpProtocol {
}
}

/// Builds the OTLP logger provider.
pub(crate) fn init_logger_provider(
otlp_config: &OtlpExporterConfig,
resource: Resource,
) -> anyhow::Result<SdkLoggerProvider> {
let logs_protocol = otlp_config.logs_protocol()?;
let log_exporter = logs_protocol.log_exporter()?;
let log_processor = BatchLogProcessor::builder(log_exporter, runtime::Tokio).build();
Ok(SdkLoggerProvider::builder()
.with_resource(resource)
.with_batch_exporter(log_exporter)
.with_log_processor(log_processor)
.build())
}
6 changes: 5 additions & 1 deletion quickwit/quickwit-telemetry-exporters/src/otlp/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ use opentelemetry_otlp::{
MetricExporter, Protocol as OtlpWireProtocol, WithExportConfig, WithHttpConfig, WithTonicConfig,
};
use opentelemetry_sdk::metrics::SdkMeterProvider;
use opentelemetry_sdk::metrics::periodic_reader_with_async_runtime::PeriodicReader;
use opentelemetry_sdk::runtime;

use crate::otlp::{OtlpExporterConfig, OtlpProtocol, quickwit_resource};

Expand All @@ -44,15 +46,17 @@ impl OtlpProtocol {
}
}

/// Builds the OTLP metrics recorder and its meter provider.
pub(crate) fn build_recorder(
service_version: &str,
otlp_config: &OtlpExporterConfig,
) -> anyhow::Result<(OpenTelemetryRecorder, SdkMeterProvider)> {
let metrics_protocol = otlp_config.metrics_protocol()?;
let metric_exporter = metrics_protocol.metric_exporter()?;
let metric_reader = PeriodicReader::builder(metric_exporter, runtime::Tokio).build();

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Keep HTTP OTLP exports off the Tokio runtime

When OTEL_EXPORTER_OTLP_PROTOCOL or a signal-specific protocol is set to http/protobuf or http/json (both are accepted in config.rs), this crate still uses opentelemetry-otlp's default blocking reqwest HTTP client because quickwit/Cargo.toml:192 does not enable reqwest-client or hyper-client. Moving this reader (and the analogous log/span processors) onto runtime::Tokio therefore runs blocking HTTP exports on Quickwit's main Tokio workers instead of the SDK dedicated thread; a slow or retrying collector can tie up runtime threads and degrade serving/indexing. Please keep the async-runtime path to gRPC or enable an async HTTP client for HTTP protocols.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@trinity-1686a thoughts on this one?

let metrics_provider = SdkMeterProvider::builder()
.with_resource(quickwit_resource(service_version))
.with_periodic_exporter(metric_exporter)
.with_reader(metric_reader)
.build();
let meter = metrics_provider.meter("quickwit");

Expand Down
6 changes: 4 additions & 2 deletions quickwit/quickwit-telemetry-exporters/src/otlp/traces.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ use anyhow::Context;
use opentelemetry_otlp::{
Protocol as OtlpWireProtocol, SpanExporter, WithExportConfig, WithHttpConfig, WithTonicConfig,
};
use opentelemetry_sdk::trace::span_processor_with_async_runtime::BatchSpanProcessor;
use opentelemetry_sdk::trace::{BatchConfigBuilder, SdkTracerProvider};
use opentelemetry_sdk::{Resource, trace};
use opentelemetry_sdk::{Resource, runtime};

use crate::otlp::{OtlpExporterConfig, OtlpProtocol};

Expand All @@ -43,13 +44,14 @@ impl OtlpProtocol {
}
}

/// Builds the OTLP tracer provider.
pub(crate) fn init_tracer_provider(
otlp_config: &OtlpExporterConfig,
resource: Resource,
) -> anyhow::Result<SdkTracerProvider> {
let traces_protocol = otlp_config.traces_protocol()?;
let span_exporter = traces_protocol.span_exporter()?;
let span_processor = trace::BatchSpanProcessor::builder(span_exporter)
let span_processor = BatchSpanProcessor::builder(span_exporter, runtime::Tokio)

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Avoid deadlocking shutdown on one-worker runtimes

With the processor spawned onto runtime::Tokio, provider shutdown now has to send a shutdown message to a Tokio task and synchronously wait for the reply. Quickwit calls telemetry_handle.shutdown() from inside main_impl, and QW_TOKIO_RUNTIME_NUM_THREADS (or small CPU limits) can make the multi-thread runtime use a single worker; in that case this blocking shutdown occupies the only worker, so the telemetry task cannot run to flush and acknowledge shutdown. The previous dedicated-thread processor did not depend on the application runtime making progress during shutdown, so please either avoid the async processor for single-worker runtimes or run shutdown outside the Tokio worker.

Useful? React with 👍 / 👎.

.with_batch_config(
BatchConfigBuilder::default()
// Quickwit can generate a lot of spans, especially in debug mode, and the
Expand Down
Loading