gemini_genai_rs/protocol/messages/
server.rs

1//! Server → Client message types for the Gemini Live wire protocol.
2
3use serde::{Deserialize, Serialize};
4
5use crate::protocol::types::*;
6
7/// Server setup complete acknowledgment.
8#[derive(Debug, Clone, Deserialize)]
9#[serde(rename_all = "camelCase")]
10pub struct SetupCompleteMessage {
11    /// The setup complete payload.
12    pub setup_complete: SetupCompletePayload,
13}
14
15/// Payload for setup complete.
16#[derive(Debug, Clone, Deserialize)]
17#[serde(rename_all = "camelCase")]
18pub struct SetupCompletePayload {
19    /// Session resumption result, if resumption was requested.
20    #[serde(default)]
21    pub session_resumption: Option<SessionResumptionResult>,
22}
23
24/// Session resumption result from server.
25#[derive(Debug, Clone, Deserialize)]
26#[serde(rename_all = "camelCase")]
27pub struct SessionResumptionResult {
28    /// Opaque handle for future session resumption.
29    #[serde(default)]
30    pub handle: Option<String>,
31    /// Whether the session was successfully resumed.
32    #[serde(default)]
33    pub resumed: Option<bool>,
34}
35
36/// Server content message containing model output.
37#[derive(Debug, Clone, Deserialize)]
38#[serde(rename_all = "camelCase")]
39pub struct ServerContentMessage {
40    /// The server content payload.
41    pub server_content: ServerContentPayload,
42    /// Token usage metadata (present on most server messages).
43    #[serde(default)]
44    pub usage_metadata: Option<UsageMetadata>,
45}
46
47/// Payload for server content.
48#[derive(Debug, Clone, Deserialize)]
49#[serde(rename_all = "camelCase")]
50pub struct ServerContentPayload {
51    /// Model output content for this turn.
52    #[serde(default)]
53    pub model_turn: Option<Content>,
54    /// Whether the model's turn is complete.
55    #[serde(default)]
56    pub turn_complete: Option<bool>,
57    /// Whether all generation (including tool use) is complete.
58    #[serde(default)]
59    pub generation_complete: Option<bool>,
60    /// Whether the model was interrupted by user barge-in.
61    #[serde(default)]
62    pub interrupted: Option<bool>,
63    /// Transcription of user audio input.
64    #[serde(default)]
65    pub input_transcription: Option<TranscriptionPayload>,
66    /// Transcription of model audio output.
67    #[serde(default)]
68    pub output_transcription: Option<TranscriptionPayload>,
69    /// Grounding metadata from search results.
70    #[serde(default)]
71    pub grounding_metadata: Option<GroundingMetadata>,
72    /// URL context metadata for content sourced from URLs.
73    #[serde(default)]
74    pub url_context_metadata: Option<UrlContextMetadata>,
75    /// Reason why the model's turn completed (e.g. "STOP", "MAX_TOKENS").
76    #[serde(default)]
77    pub turn_complete_reason: Option<String>,
78    /// Whether the server is waiting for user input.
79    #[serde(default)]
80    pub waiting_for_input: Option<bool>,
81}
82
83/// Transcription text from server.
84#[derive(Debug, Clone, Deserialize)]
85pub struct TranscriptionPayload {
86    /// The transcribed text.
87    #[serde(default)]
88    pub text: Option<String>,
89}
90
91/// Server tool call request message.
92#[derive(Debug, Clone, Deserialize)]
93#[serde(rename_all = "camelCase")]
94pub struct ToolCallMessage {
95    /// The tool call payload.
96    pub tool_call: ToolCallPayload,
97}
98
99/// Payload for tool call.
100#[derive(Debug, Clone, Deserialize)]
101#[serde(rename_all = "camelCase")]
102pub struct ToolCallPayload {
103    /// Function calls requested by the model.
104    pub function_calls: Vec<FunctionCall>,
105}
106
107/// Server tool call cancellation message.
108#[derive(Debug, Clone, Deserialize)]
109#[serde(rename_all = "camelCase")]
110pub struct ToolCallCancellationMessage {
111    /// The tool call cancellation payload.
112    pub tool_call_cancellation: ToolCallCancellationPayload,
113}
114
115/// Payload for tool call cancellation.
116#[derive(Debug, Clone, Deserialize)]
117#[serde(rename_all = "camelCase")]
118pub struct ToolCallCancellationPayload {
119    /// IDs of the cancelled tool calls.
120    pub ids: Vec<String>,
121}
122
123/// Server GoAway signal — requesting graceful disconnect.
124#[derive(Debug, Clone, Deserialize)]
125#[serde(rename_all = "camelCase")]
126pub struct GoAwayMessage {
127    /// The GoAway payload.
128    pub go_away: GoAwayPayload,
129}
130
131/// Payload for GoAway.
132#[derive(Debug, Clone, Deserialize)]
133#[serde(rename_all = "camelCase")]
134pub struct GoAwayPayload {
135    /// Time remaining before forced disconnect (e.g. `"30s"`).
136    #[serde(default)]
137    pub time_left: Option<String>,
138}
139
140/// Session resumption update from server (sent during active session).
141#[derive(Debug, Clone, Deserialize)]
142#[serde(rename_all = "camelCase")]
143pub struct SessionResumptionUpdateMessage {
144    /// The session resumption update payload.
145    pub session_resumption_update: SessionResumptionUpdatePayload,
146}
147
148/// Payload for session resumption update.
149#[derive(Debug, Clone, Deserialize)]
150#[serde(rename_all = "camelCase")]
151pub struct SessionResumptionUpdatePayload {
152    /// New opaque handle for session resumption.
153    #[serde(default)]
154    pub new_handle: Option<String>,
155    /// Whether the session is currently resumable.
156    #[serde(default)]
157    pub resumable: Option<bool>,
158    /// Index of the last client message consumed by the server.
159    #[serde(default)]
160    pub last_consumed_client_message_index: Option<String>,
161}
162
163/// Server-side voice activity detection event.
164#[derive(Debug, Clone, Serialize, Deserialize)]
165#[serde(rename_all = "camelCase")]
166pub struct VoiceActivityMessage {
167    /// The voice activity payload.
168    pub voice_activity: VoiceActivityPayload,
169}
170
171/// Payload for voice activity detection.
172#[derive(Debug, Clone, Serialize, Deserialize)]
173#[serde(rename_all = "camelCase")]
174pub struct VoiceActivityPayload {
175    /// The type of voice activity event.
176    #[serde(skip_serializing_if = "Option::is_none")]
177    pub voice_activity_type: Option<VoiceActivityType>,
178}
179
180/// Type of voice activity event from the server.
181#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
182pub enum VoiceActivityType {
183    /// Voice activity started (user began speaking).
184    #[serde(rename = "VOICE_ACTIVITY_START")]
185    VoiceActivityStart,
186    /// Voice activity ended (user stopped speaking).
187    #[serde(rename = "VOICE_ACTIVITY_END")]
188    VoiceActivityEnd,
189}
190
191/// Server message wrapper — includes optional usage metadata alongside the message.
192#[derive(Debug, Clone, Deserialize)]
193#[serde(rename_all = "camelCase")]
194pub struct ServerMessageWrapper {
195    /// Token usage metadata (present on most server messages).
196    #[serde(default)]
197    pub usage_metadata: Option<UsageMetadata>,
198}
199
200/// Unified server message enum — parsed from incoming WebSocket text frames.
201///
202/// We use manual dispatch instead of `#[serde(untagged)]` for performance:
203/// untagged tries every variant in order. String-contains + targeted parse
204/// is O(1) routing.
205#[derive(Debug, Clone)]
206pub enum ServerMessage {
207    /// Setup handshake completed successfully.
208    SetupComplete(SetupCompleteMessage),
209    /// Model output content (text, audio, transcription, etc.).
210    ServerContent(Box<ServerContentMessage>),
211    /// Model requested one or more tool/function calls.
212    ToolCall(ToolCallMessage),
213    /// Server cancelled previously requested tool calls.
214    ToolCallCancellation(ToolCallCancellationMessage),
215    /// Server requesting graceful disconnect.
216    GoAway(GoAwayMessage),
217    /// Updated session resumption handle.
218    SessionResumptionUpdate(SessionResumptionUpdateMessage),
219    /// Server-side voice activity detection event.
220    VoiceActivity(VoiceActivityMessage),
221    /// Unrecognized message type (forward compatibility).
222    Unknown(serde_json::Value),
223}
224
225/// Single-pass deserialization target: every known server message is
226/// discriminated by exactly one top-level key, so one serde pass over the
227/// frame replaces the previous string-contains scan + targeted re-parse
228/// (which cost an O(n) `memchr` sweep per candidate key before parsing).
229#[derive(Deserialize)]
230#[serde(rename_all = "camelCase")]
231struct RawServerMessage {
232    setup_complete: Option<SetupCompletePayload>,
233    server_content: Option<ServerContentPayload>,
234    usage_metadata: Option<UsageMetadata>,
235    tool_call: Option<ToolCallPayload>,
236    tool_call_cancellation: Option<ToolCallCancellationPayload>,
237    go_away: Option<GoAwayPayload>,
238    session_resumption_update: Option<SessionResumptionUpdatePayload>,
239    voice_activity: Option<VoiceActivityPayload>,
240}
241
242impl ServerMessage {
243    /// Parse a server message from a JSON text frame.
244    ///
245    /// One serde pass: known messages are discriminated by their single
246    /// top-level key; frames with no known key fall back to a raw-value parse
247    /// and surface as [`ServerMessage::Unknown`] (forward compatibility).
248    pub fn parse(text: &str) -> Result<Self, serde_json::Error> {
249        let raw: RawServerMessage = match serde_json::from_str(text) {
250            Ok(raw) => raw,
251            // A known key with an unexpected payload shape should surface as
252            // a parse error (matching the previous targeted-parse behavior),
253            // but a frame that isn't an object at all falls through to Unknown.
254            Err(e) => {
255                return if text.trim_start().starts_with('{') {
256                    Err(e)
257                } else {
258                    serde_json::from_str::<serde_json::Value>(text).map(ServerMessage::Unknown)
259                };
260            }
261        };
262
263        if let Some(setup_complete) = raw.setup_complete {
264            Ok(ServerMessage::SetupComplete(SetupCompleteMessage {
265                setup_complete,
266            }))
267        } else if let Some(tool_call_cancellation) = raw.tool_call_cancellation {
268            Ok(ServerMessage::ToolCallCancellation(
269                ToolCallCancellationMessage {
270                    tool_call_cancellation,
271                },
272            ))
273        } else if let Some(tool_call) = raw.tool_call {
274            Ok(ServerMessage::ToolCall(ToolCallMessage { tool_call }))
275        } else if let Some(server_content) = raw.server_content {
276            Ok(ServerMessage::ServerContent(Box::new(
277                ServerContentMessage {
278                    server_content,
279                    usage_metadata: raw.usage_metadata,
280                },
281            )))
282        } else if let Some(go_away) = raw.go_away {
283            Ok(ServerMessage::GoAway(GoAwayMessage { go_away }))
284        } else if let Some(session_resumption_update) = raw.session_resumption_update {
285            Ok(ServerMessage::SessionResumptionUpdate(
286                SessionResumptionUpdateMessage {
287                    session_resumption_update,
288                },
289            ))
290        } else if let Some(voice_activity) = raw.voice_activity {
291            Ok(ServerMessage::VoiceActivity(VoiceActivityMessage {
292                voice_activity,
293            }))
294        } else {
295            // No known key: unknown message type (forward compatibility).
296            serde_json::from_str::<serde_json::Value>(text).map(ServerMessage::Unknown)
297        }
298    }
299}