gemini_genai_rs/telemetry/
mod.rs

1//! Observability layer — OpenTelemetry tracing, structured logging, Prometheus metrics.
2//!
3//! All components are feature-gated for zero overhead when disabled:
4//! - `tracing-subscriber`: console logging — installs a fmt/EnvFilter subscriber
5//!   via `TelemetryConfig::init` (the `tracing` facade itself is always available)
6//! - `metrics`: Prometheus metric definitions and export
7//! - `otel-otlp`: OTLP trace and metric export to any OTel collector
8//! - `otel-gcp`: Google Cloud-native trace and metric export (Cloud Trace + Cloud Monitoring)
9
10pub mod logging;
11pub mod metrics;
12pub mod spans;
13
14/// Telemetry configuration.
15#[derive(Debug, Clone)]
16pub struct TelemetryConfig {
17    /// Enable structured logging. Installing the subscriber requires the
18    /// `tracing-subscriber` crate feature; without it this flag is inert.
19    pub logging_enabled: bool,
20    /// Log level filter (e.g., "info", "debug", "gemini_genai_rs=debug").
21    pub log_filter: String,
22    /// Use JSON format for logs (production). If false, uses pretty format (development).
23    pub json_logs: bool,
24    /// Enable Prometheus metrics endpoint.
25    pub metrics_enabled: bool,
26    /// Prometheus listen address (e.g., "0.0.0.0:9090").
27    pub metrics_addr: Option<String>,
28    /// Enable OTel trace export (requires `otel-otlp` or `otel-gcp` feature).
29    pub otel_traces: bool,
30    /// Enable OTel metrics export (requires `otel-otlp` or `otel-gcp` feature).
31    pub otel_metrics: bool,
32    /// OTel service name for resource identification.
33    pub otel_service_name: String,
34    /// Google Cloud project ID for GCP-native OTel export.
35    /// If None, auto-detects from ADC or environment.
36    pub otel_gcp_project: Option<String>,
37}
38
39impl Default for TelemetryConfig {
40    fn default() -> Self {
41        Self {
42            logging_enabled: true,
43            log_filter: "info".to_string(),
44            json_logs: false,
45            metrics_enabled: false,
46            metrics_addr: None,
47            otel_traces: false,
48            otel_metrics: false,
49            otel_service_name: "gemini-live".to_string(),
50            otel_gcp_project: None,
51        }
52    }
53}
54
55/// Guard that keeps telemetry systems alive while held.
56/// Drop this to flush and shutdown OTel exporters.
57#[derive(Default)]
58pub struct TelemetryGuard {
59    #[cfg(feature = "otel-base")]
60    _tracer_provider: Option<opentelemetry_sdk::trace::SdkTracerProvider>,
61    #[cfg(feature = "otel-base")]
62    _meter_provider: Option<opentelemetry_sdk::metrics::SdkMeterProvider>,
63    #[cfg(not(feature = "otel-base"))]
64    _private: (),
65}
66
67impl TelemetryConfig {
68    /// Initialize telemetry subsystems based on configuration.
69    ///
70    /// When `otel-otlp` is enabled and `otel_traces`/`otel_metrics` are set,
71    /// this configures OTLP exporters that send data to whatever endpoint is set
72    /// via the standard `OTEL_EXPORTER_OTLP_ENDPOINT` env var (defaults to
73    /// `http://localhost:4317` for gRPC).
74    ///
75    /// When `otel-gcp` is enabled, use `init_gcp()` to set up Google Cloud-native
76    /// exporters, or configure providers manually and call `init_with_tracer()`.
77    ///
78    /// The returned `TelemetryGuard` must be held alive for the duration of the
79    /// application. Dropping it triggers a flush and shutdown of all exporters.
80    pub fn init(&self) -> Result<TelemetryGuard, Box<dyn std::error::Error>> {
81        #[allow(unused_mut)]
82        let mut guard = TelemetryGuard::default();
83
84        // --- OTel OTLP providers (must be created before tracing subscriber) ---
85        #[cfg(feature = "otel-otlp")]
86        let otel_tracer = if self.otel_traces {
87            let exporter = opentelemetry_otlp::SpanExporter::builder()
88                .with_tonic()
89                .build()?;
90            let provider = opentelemetry_sdk::trace::SdkTracerProvider::builder()
91                .with_batch_exporter(exporter)
92                .with_resource(self.otel_resource())
93                .build();
94            let tracer = opentelemetry::trace::TracerProvider::tracer(
95                &provider,
96                self.otel_service_name.clone(),
97            );
98            guard._tracer_provider = Some(provider);
99            Some(tracer)
100        } else {
101            None
102        };
103
104        #[cfg(feature = "otel-otlp")]
105        if self.otel_metrics {
106            let exporter = opentelemetry_otlp::MetricExporter::builder()
107                .with_tonic()
108                .build()?;
109            let provider = opentelemetry_sdk::metrics::SdkMeterProvider::builder()
110                .with_periodic_exporter(exporter)
111                .with_resource(self.otel_resource())
112                .build();
113            opentelemetry::global::set_meter_provider(provider.clone());
114            guard._meter_provider = Some(provider);
115        }
116
117        // --- Tracing subscriber ---
118        #[cfg(feature = "tracing-subscriber")]
119        if self.logging_enabled {
120            #[cfg(feature = "otel-otlp")]
121            {
122                self.init_tracing_subscriber_with_tracer(otel_tracer)
123                    .map_err(|e| -> Box<dyn std::error::Error> { e })?;
124            }
125            #[cfg(not(feature = "otel-otlp"))]
126            {
127                self.init_tracing_subscriber()
128                    .map_err(|e| -> Box<dyn std::error::Error> { e })?;
129            }
130        }
131
132        Ok(guard)
133    }
134
135    /// Initialize telemetry with Google Cloud-native exporters (Cloud Trace + Cloud Monitoring).
136    ///
137    /// This is the GCP counterpart to `init()`. It uses `opentelemetry-gcloud-trace` for
138    /// span export and `opentelemetry_gcloud_monitoring_exporter` for metrics.
139    ///
140    /// If `otel_gcp_project` is set, it is used as the GCP project ID. Otherwise the
141    /// project ID is auto-detected from ADC or the environment.
142    ///
143    /// The returned `TelemetryGuard` must be held alive for the duration of the
144    /// application. Dropping it triggers a flush and shutdown of all exporters.
145    #[cfg(feature = "otel-gcp")]
146    pub async fn init_gcp(
147        &self,
148    ) -> Result<TelemetryGuard, Box<dyn std::error::Error + Send + Sync>> {
149        use opentelemetry_gcloud_trace::GcpCloudTraceExporterBuilder;
150
151        let mut guard = TelemetryGuard::default();
152
153        // --- GCP Cloud Trace provider ---
154        let otel_tracer = if self.otel_traces {
155            let gcp_trace_builder = if let Some(ref project_id) = self.otel_gcp_project {
156                GcpCloudTraceExporterBuilder::new(project_id.clone())
157                    .with_resource(self.otel_resource())
158            } else {
159                GcpCloudTraceExporterBuilder::for_default_project_id()
160                    .await?
161                    .with_resource(self.otel_resource())
162            };
163
164            let tracer_provider = gcp_trace_builder.create_provider().await?;
165            let tracer = gcp_trace_builder.install(&tracer_provider).await?;
166            opentelemetry::global::set_tracer_provider(tracer_provider.clone());
167            guard._tracer_provider = Some(tracer_provider);
168            Some(tracer)
169        } else {
170            None
171        };
172
173        // --- GCP Cloud Monitoring metrics ---
174        if self.otel_metrics {
175            use opentelemetry_gcloud_monitoring_exporter::{
176                GCPMetricsExporter, GCPMetricsExporterConfig,
177            };
178
179            let mut metrics_cfg = GCPMetricsExporterConfig {
180                prefix: format!("custom.googleapis.com/{}", self.otel_service_name),
181                ..Default::default()
182            };
183            if let Some(ref project_id) = self.otel_gcp_project {
184                metrics_cfg.project_id = Some(project_id.clone());
185            }
186            let metrics_exporter = GCPMetricsExporter::init(metrics_cfg).await?;
187
188            use opentelemetry_sdk::metrics::periodic_reader_with_async_runtime::PeriodicReader;
189            let reader =
190                PeriodicReader::builder(metrics_exporter, opentelemetry_sdk::runtime::Tokio)
191                    .build();
192
193            let meter_provider = opentelemetry_sdk::metrics::SdkMeterProvider::builder()
194                .with_resource(self.otel_resource())
195                .with_reader(reader)
196                .build();
197            opentelemetry::global::set_meter_provider(meter_provider.clone());
198            guard._meter_provider = Some(meter_provider);
199        }
200
201        // --- Tracing subscriber ---
202        #[cfg(feature = "tracing-subscriber")]
203        if self.logging_enabled {
204            self.init_tracing_subscriber_with_tracer(otel_tracer)?;
205        }
206
207        Ok(guard)
208    }
209
210    /// Set up the tracing subscriber with no OTel tracer layer (plain logging mode).
211    ///
212    /// Used when `tracing-subscriber` is on but neither `otel-otlp` nor `otel-gcp`
213    /// provides a tracer, or when called from `init()` without OTLP.
214    #[cfg(feature = "tracing-subscriber")]
215    #[allow(dead_code)]
216    fn init_tracing_subscriber(&self) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
217        self.init_tracing_subscriber_with_tracer(None)
218    }
219
220    /// Set up the tracing subscriber, optionally wiring in an OTel tracer layer.
221    ///
222    /// Shared implementation used by both `init()` (OTLP path) and `init_gcp()`.
223    #[cfg(feature = "tracing-subscriber")]
224    fn init_tracing_subscriber_with_tracer(
225        &self,
226        #[cfg(feature = "otel-base")] otel_tracer: Option<opentelemetry_sdk::trace::Tracer>,
227        #[cfg(not(feature = "otel-base"))] _otel_tracer: Option<()>,
228    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
229        use tracing_subscriber::prelude::*;
230        use tracing_subscriber::EnvFilter;
231
232        let filter =
233            EnvFilter::try_new(&self.log_filter).unwrap_or_else(|_| EnvFilter::new("info"));
234
235        let fmt_layer = if self.json_logs {
236            tracing_subscriber::fmt::layer().json().boxed()
237        } else {
238            tracing_subscriber::fmt::layer().boxed()
239        };
240
241        let registry = tracing_subscriber::registry().with(filter).with(fmt_layer);
242
243        #[cfg(feature = "otel-base")]
244        {
245            if let Some(tracer) = otel_tracer {
246                let otel_layer = tracing_opentelemetry::layer().with_tracer(tracer);
247                let subscriber = registry.with(otel_layer);
248                tracing::subscriber::set_global_default(subscriber)
249                    .map_err(|e| format!("Failed to set tracing subscriber: {e}"))?;
250            } else {
251                tracing::subscriber::set_global_default(registry)
252                    .map_err(|e| format!("Failed to set tracing subscriber: {e}"))?;
253            }
254        }
255
256        #[cfg(not(feature = "otel-base"))]
257        {
258            tracing::subscriber::set_global_default(registry)
259                .map_err(|e| format!("Failed to set tracing subscriber: {e}"))?;
260        }
261
262        Ok(())
263    }
264
265    /// Build an OTel resource with the configured service name.
266    ///
267    /// Gated on the exporter features (not `otel-base`): the only callers are
268    /// the OTLP and GCP provider-construction paths, so under a bare
269    /// `otel-base` build this would be dead code.
270    #[cfg(any(feature = "otel-otlp", feature = "otel-gcp"))]
271    pub(crate) fn otel_resource(&self) -> opentelemetry_sdk::Resource {
272        use opentelemetry::KeyValue;
273        opentelemetry_sdk::Resource::builder_empty()
274            .with_attributes([KeyValue::new(
275                "service.name",
276                self.otel_service_name.clone(),
277            )])
278            .build()
279    }
280}
281
282#[cfg(test)]
283mod tests {
284    use super::*;
285
286    #[test]
287    fn default_config_values() {
288        let config = TelemetryConfig::default();
289        assert!(config.logging_enabled);
290        assert_eq!(config.log_filter, "info");
291        assert!(!config.json_logs);
292        assert!(!config.metrics_enabled);
293        assert!(config.metrics_addr.is_none());
294        assert!(!config.otel_traces);
295        assert!(!config.otel_metrics);
296        assert_eq!(config.otel_service_name, "gemini-live");
297        assert!(config.otel_gcp_project.is_none());
298    }
299
300    #[test]
301    fn config_builder_pattern() {
302        let config = TelemetryConfig {
303            logging_enabled: false,
304            log_filter: "debug".to_string(),
305            json_logs: true,
306            metrics_enabled: true,
307            metrics_addr: Some("0.0.0.0:9090".to_string()),
308            otel_traces: true,
309            otel_metrics: true,
310            otel_service_name: "my-service".to_string(),
311            otel_gcp_project: Some("my-project".to_string()),
312        };
313        assert!(!config.logging_enabled);
314        assert_eq!(config.log_filter, "debug");
315        assert!(config.json_logs);
316        assert!(config.metrics_enabled);
317        assert_eq!(config.metrics_addr.as_deref(), Some("0.0.0.0:9090"));
318        assert!(config.otel_traces);
319        assert!(config.otel_metrics);
320        assert_eq!(config.otel_service_name, "my-service");
321        assert_eq!(config.otel_gcp_project.as_deref(), Some("my-project"));
322    }
323
324    #[test]
325    fn telemetry_guard_default() {
326        let _guard = TelemetryGuard::default();
327        // Verifies that TelemetryGuard::default() compiles and doesn't panic.
328    }
329}