gemini_genai_rs/protocol/messages/
client.rs

1//! Client → Server message types for the Gemini Live wire protocol.
2
3use serde::{Deserialize, Serialize};
4
5use crate::protocol::types::*;
6
7/// Top-level setup message sent immediately after WebSocket connect.
8#[derive(Debug, Clone, Serialize)]
9pub struct SetupMessage {
10    /// The setup payload.
11    pub setup: SetupPayload,
12}
13
14/// Payload of the setup message.
15#[derive(Debug, Clone, Serialize)]
16#[serde(rename_all = "camelCase")]
17pub struct SetupPayload {
18    /// Model URI string (e.g. `"models/gemini-2.0-flash-live-001"`).
19    pub model: String,
20    /// Generation parameters (modalities, temperature, etc.).
21    #[serde(skip_serializing_if = "Option::is_none")]
22    pub generation_config: Option<GenerationConfig>,
23    /// System instruction content.
24    #[serde(skip_serializing_if = "Option::is_none")]
25    pub system_instruction: Option<Content>,
26    /// Tool declarations for function calling, search, etc.
27    #[serde(skip_serializing_if = "Vec::is_empty")]
28    pub tools: Vec<Tool>,
29    /// Tool usage configuration.
30    #[serde(skip_serializing_if = "Option::is_none")]
31    pub tool_config: Option<ToolConfig>,
32    /// Enable input audio transcription.
33    #[serde(skip_serializing_if = "Option::is_none")]
34    pub input_audio_transcription: Option<InputAudioTranscription>,
35    /// Enable output audio transcription.
36    #[serde(skip_serializing_if = "Option::is_none")]
37    pub output_audio_transcription: Option<OutputAudioTranscription>,
38    /// Realtime input configuration (VAD, activity handling).
39    #[serde(skip_serializing_if = "Option::is_none")]
40    pub realtime_input_config: Option<RealtimeInputConfig>,
41    /// Session resumption configuration.
42    #[serde(skip_serializing_if = "Option::is_none")]
43    pub session_resumption: Option<SessionResumptionConfig>,
44    /// Context window compression configuration.
45    #[serde(skip_serializing_if = "Option::is_none")]
46    pub context_window_compression: Option<ContextWindowCompressionConfig>,
47    /// Proactivity configuration.
48    #[serde(skip_serializing_if = "Option::is_none")]
49    pub proactivity: Option<ProactivityConfig>,
50}
51
52impl SessionConfig {
53    /// Build the setup message from this configuration.
54    ///
55    /// When targeting Vertex AI, `FunctionCallingBehavior` is stripped from
56    /// tool declarations since Vertex AI does not support async tool calling.
57    pub fn to_setup_message(&self) -> SetupMessage {
58        let tools = if self.supports_async_tools() {
59            self.tools.clone()
60        } else {
61            self.tools
62                .iter()
63                .map(|tool| {
64                    let mut t = tool.clone();
65                    if let Some(ref mut decls) = t.function_declarations {
66                        for d in decls.iter_mut() {
67                            d.behavior = None;
68                        }
69                    }
70                    t
71                })
72                .collect()
73        };
74
75        let generation_config = if self.supports_async_tools() {
76            self.generation_config.clone()
77        } else {
78            let mut gc = self.generation_config.clone();
79            gc.thinking_config = None;
80            gc
81        };
82
83        SetupMessage {
84            setup: SetupPayload {
85                model: self.model_uri(),
86                generation_config: Some(generation_config),
87                system_instruction: self.system_instruction.clone(),
88                tools,
89                tool_config: self.tool_config.clone(),
90                input_audio_transcription: self.input_audio_transcription.clone(),
91                output_audio_transcription: self.output_audio_transcription.clone(),
92                realtime_input_config: self.realtime_input_config.clone(),
93                session_resumption: self.session_resumption.clone(),
94                context_window_compression: self.context_window_compression.clone(),
95                proactivity: self.proactivity.clone(),
96            },
97        }
98    }
99
100    /// Pre-serialize the setup message to JSON. Called once at connection time.
101    pub fn to_setup_json(&self) -> String {
102        serde_json::to_string(&self.to_setup_message())
103            .expect("setup message serialization is infallible for valid config")
104    }
105}
106
107/// Realtime audio input sent as a stream of chunks.
108#[derive(Debug, Clone, Serialize)]
109#[serde(rename_all = "camelCase")]
110pub struct RealtimeInputMessage {
111    /// The realtime input payload.
112    pub realtime_input: RealtimeInputPayload,
113}
114
115/// Payload for realtime audio input.
116#[derive(Debug, Clone, Serialize)]
117#[serde(rename_all = "camelCase")]
118pub struct RealtimeInputPayload {
119    /// Deprecated: use `audio` instead. Kept for backward compatibility.
120    #[serde(skip_serializing_if = "Vec::is_empty")]
121    pub media_chunks: Vec<MediaChunk>,
122    /// Audio input blob (preferred over media_chunks).
123    #[serde(skip_serializing_if = "Option::is_none")]
124    pub audio: Option<Blob>,
125    /// Video input blob.
126    #[serde(skip_serializing_if = "Option::is_none")]
127    pub video: Option<Blob>,
128    /// Signal end of audio stream.
129    #[serde(skip_serializing_if = "Option::is_none")]
130    pub audio_stream_end: Option<bool>,
131    /// Realtime text input (streamed inline, distinct from clientContent).
132    #[serde(skip_serializing_if = "Option::is_none")]
133    pub text: Option<String>,
134}
135
136/// A single chunk of media data (audio). Deprecated — use Blob in `audio` field.
137#[derive(Debug, Clone, Serialize)]
138#[serde(rename_all = "camelCase")]
139pub struct MediaChunk {
140    /// MIME type of the media (e.g. `"audio/pcm"`).
141    pub mime_type: String,
142    /// Base64-encoded media data.
143    pub data: String, // base64-encoded
144}
145
146/// Client content message for sending text or conversation history.
147#[derive(Debug, Clone, Serialize)]
148#[serde(rename_all = "camelCase")]
149pub struct ClientContentMessage {
150    /// The client content payload.
151    pub client_content: ClientContentPayload,
152}
153
154/// Payload for client content.
155#[derive(Debug, Clone, Serialize)]
156#[serde(rename_all = "camelCase")]
157pub struct ClientContentPayload {
158    /// Conversation turns to send.
159    pub turns: Vec<Content>,
160    /// Whether this completes the client's turn.
161    #[serde(skip_serializing_if = "Option::is_none")]
162    pub turn_complete: Option<bool>,
163}
164
165/// Tool response message sent after executing function calls.
166#[derive(Debug, Clone, Serialize)]
167#[serde(rename_all = "camelCase")]
168pub struct ToolResponseMessage {
169    /// The tool response payload.
170    pub tool_response: ToolResponsePayload,
171}
172
173/// Payload for tool response.
174#[derive(Debug, Clone, Serialize)]
175#[serde(rename_all = "camelCase")]
176pub struct ToolResponsePayload {
177    /// Function call responses to return to the model.
178    pub function_responses: Vec<FunctionResponse>,
179}
180
181/// Activity signal for client-side VAD events.
182#[derive(Debug, Clone, Serialize)]
183#[serde(rename_all = "camelCase")]
184pub struct ActivitySignalMessage {
185    /// The activity signal payload.
186    pub realtime_input: ActivitySignalPayload,
187}
188
189/// Payload for activity signals.
190#[derive(Debug, Clone, Serialize)]
191#[serde(rename_all = "camelCase")]
192pub struct ActivitySignalPayload {
193    /// Present when signaling activity start.
194    #[serde(skip_serializing_if = "Option::is_none")]
195    pub activity_start: Option<ActivityStart>,
196    /// Present when signaling activity end.
197    #[serde(skip_serializing_if = "Option::is_none")]
198    pub activity_end: Option<ActivityEnd>,
199}
200
201/// Marker for speech activity start.
202#[derive(Debug, Clone, Serialize, Deserialize)]
203pub struct ActivityStart {}
204
205/// Marker for speech activity end.
206#[derive(Debug, Clone, Serialize, Deserialize)]
207pub struct ActivityEnd {}