gemini_genai_rs/protocol/messages/
server.rs

1//! Server → Client message types for the Gemini Live wire protocol.
2
3use serde::{Deserialize, Serialize};
4
5use crate::protocol::types::*;
6
7/// Server setup complete acknowledgment.
8#[derive(Debug, Clone, Deserialize)]
9#[serde(rename_all = "camelCase")]
10pub struct SetupCompleteMessage {
11    /// The setup complete payload.
12    pub setup_complete: SetupCompletePayload,
13}
14
15/// Payload for setup complete.
16#[derive(Debug, Clone, Deserialize)]
17#[serde(rename_all = "camelCase")]
18pub struct SetupCompletePayload {
19    /// Session resumption result, if resumption was requested.
20    #[serde(default)]
21    pub session_resumption: Option<SessionResumptionResult>,
22}
23
24/// Session resumption result from server.
25#[derive(Debug, Clone, Deserialize)]
26#[serde(rename_all = "camelCase")]
27pub struct SessionResumptionResult {
28    /// Opaque handle for future session resumption.
29    #[serde(default)]
30    pub handle: Option<String>,
31    /// Whether the session was successfully resumed.
32    #[serde(default)]
33    pub resumed: Option<bool>,
34}
35
36/// Server content message containing model output.
37#[derive(Debug, Clone, Deserialize)]
38#[serde(rename_all = "camelCase")]
39pub struct ServerContentMessage {
40    /// The server content payload.
41    pub server_content: ServerContentPayload,
42    /// Token usage metadata (present on most server messages).
43    #[serde(default)]
44    pub usage_metadata: Option<UsageMetadata>,
45}
46
47/// Payload for server content.
48#[derive(Debug, Clone, Deserialize)]
49#[serde(rename_all = "camelCase")]
50pub struct ServerContentPayload {
51    /// Model output content for this turn.
52    #[serde(default)]
53    pub model_turn: Option<Content>,
54    /// Whether the model's turn is complete.
55    #[serde(default)]
56    pub turn_complete: Option<bool>,
57    /// Whether all generation (including tool use) is complete.
58    #[serde(default)]
59    pub generation_complete: Option<bool>,
60    /// Whether the model was interrupted by user barge-in.
61    #[serde(default)]
62    pub interrupted: Option<bool>,
63    /// Transcription of user audio input.
64    #[serde(default)]
65    pub input_transcription: Option<TranscriptionPayload>,
66    /// Transcription of model audio output.
67    #[serde(default)]
68    pub output_transcription: Option<TranscriptionPayload>,
69    /// Grounding metadata from search results.
70    #[serde(default)]
71    pub grounding_metadata: Option<GroundingMetadata>,
72    /// URL context metadata for content sourced from URLs.
73    #[serde(default)]
74    pub url_context_metadata: Option<UrlContextMetadata>,
75    /// Reason why the model's turn completed (e.g. "STOP", "MAX_TOKENS").
76    #[serde(default)]
77    pub turn_complete_reason: Option<String>,
78    /// Whether the server is waiting for user input.
79    #[serde(default)]
80    pub waiting_for_input: Option<bool>,
81}
82
83/// Transcription text from server.
84#[derive(Debug, Clone, Deserialize)]
85pub struct TranscriptionPayload {
86    /// The transcribed text.
87    #[serde(default)]
88    pub text: Option<String>,
89}
90
91/// Server tool call request message.
92#[derive(Debug, Clone, Deserialize)]
93#[serde(rename_all = "camelCase")]
94pub struct ToolCallMessage {
95    /// The tool call payload.
96    pub tool_call: ToolCallPayload,
97}
98
99/// Payload for tool call.
100#[derive(Debug, Clone, Deserialize)]
101#[serde(rename_all = "camelCase")]
102pub struct ToolCallPayload {
103    /// Function calls requested by the model.
104    pub function_calls: Vec<FunctionCall>,
105}
106
107/// Server tool call cancellation message.
108#[derive(Debug, Clone, Deserialize)]
109#[serde(rename_all = "camelCase")]
110pub struct ToolCallCancellationMessage {
111    /// The tool call cancellation payload.
112    pub tool_call_cancellation: ToolCallCancellationPayload,
113}
114
115/// Payload for tool call cancellation.
116#[derive(Debug, Clone, Deserialize)]
117#[serde(rename_all = "camelCase")]
118pub struct ToolCallCancellationPayload {
119    /// IDs of the cancelled tool calls.
120    pub ids: Vec<String>,
121}
122
123/// Server GoAway signal — requesting graceful disconnect.
124#[derive(Debug, Clone, Deserialize)]
125#[serde(rename_all = "camelCase")]
126pub struct GoAwayMessage {
127    /// The GoAway payload.
128    pub go_away: GoAwayPayload,
129}
130
131/// Payload for GoAway.
132#[derive(Debug, Clone, Deserialize)]
133#[serde(rename_all = "camelCase")]
134pub struct GoAwayPayload {
135    /// Time remaining before forced disconnect (e.g. `"30s"`).
136    #[serde(default)]
137    pub time_left: Option<String>,
138}
139
140/// Session resumption update from server (sent during active session).
141#[derive(Debug, Clone, Deserialize)]
142#[serde(rename_all = "camelCase")]
143pub struct SessionResumptionUpdateMessage {
144    /// The session resumption update payload.
145    pub session_resumption_update: SessionResumptionUpdatePayload,
146}
147
148/// Payload for session resumption update.
149#[derive(Debug, Clone, Deserialize)]
150#[serde(rename_all = "camelCase")]
151pub struct SessionResumptionUpdatePayload {
152    /// New opaque handle for session resumption.
153    #[serde(default)]
154    pub new_handle: Option<String>,
155    /// Whether the session is currently resumable.
156    #[serde(default)]
157    pub resumable: Option<bool>,
158    /// Index of the last client message consumed by the server.
159    #[serde(default)]
160    pub last_consumed_client_message_index: Option<String>,
161}
162
163/// Server-side voice activity detection event.
164#[derive(Debug, Clone, Serialize, Deserialize)]
165#[serde(rename_all = "camelCase")]
166pub struct VoiceActivityMessage {
167    /// The voice activity payload.
168    pub voice_activity: VoiceActivityPayload,
169}
170
171/// Payload for voice activity detection.
172#[derive(Debug, Clone, Serialize, Deserialize)]
173#[serde(rename_all = "camelCase")]
174pub struct VoiceActivityPayload {
175    /// The type of voice activity event.
176    #[serde(skip_serializing_if = "Option::is_none")]
177    pub voice_activity_type: Option<VoiceActivityType>,
178}
179
180/// Type of voice activity event from the server.
181#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
182pub enum VoiceActivityType {
183    /// Voice activity started (user began speaking).
184    #[serde(rename = "VOICE_ACTIVITY_START")]
185    VoiceActivityStart,
186    /// Voice activity ended (user stopped speaking).
187    #[serde(rename = "VOICE_ACTIVITY_END")]
188    VoiceActivityEnd,
189}
190
191/// Server message wrapper — includes optional usage metadata alongside the message.
192#[derive(Debug, Clone, Deserialize)]
193#[serde(rename_all = "camelCase")]
194pub struct ServerMessageWrapper {
195    /// Token usage metadata (present on most server messages).
196    #[serde(default)]
197    pub usage_metadata: Option<UsageMetadata>,
198}
199
200/// Unified server message enum — parsed from incoming WebSocket text frames.
201///
202/// We use manual dispatch instead of `#[serde(untagged)]` for performance:
203/// untagged tries every variant in order. String-contains + targeted parse
204/// is O(1) routing.
205#[derive(Debug, Clone)]
206pub enum ServerMessage {
207    /// Setup handshake completed successfully.
208    SetupComplete(SetupCompleteMessage),
209    /// Model output content (text, audio, transcription, etc.).
210    ServerContent(Box<ServerContentMessage>),
211    /// Model requested one or more tool/function calls.
212    ToolCall(ToolCallMessage),
213    /// Server cancelled previously requested tool calls.
214    ToolCallCancellation(ToolCallCancellationMessage),
215    /// Server requesting graceful disconnect.
216    GoAway(GoAwayMessage),
217    /// Updated session resumption handle.
218    SessionResumptionUpdate(SessionResumptionUpdateMessage),
219    /// Server-side voice activity detection event.
220    VoiceActivity(VoiceActivityMessage),
221    /// Unrecognized message type (forward compatibility).
222    Unknown(serde_json::Value),
223}
224
225impl ServerMessage {
226    /// Parse a server message from a JSON text frame.
227    ///
228    /// Uses string-contains routing for O(1) dispatch instead of
229    /// serde(untagged)'s O(N) try-all-variants approach.
230    pub fn parse(text: &str) -> Result<Self, serde_json::Error> {
231        if text.contains("\"setupComplete\"") {
232            serde_json::from_str::<SetupCompleteMessage>(text).map(ServerMessage::SetupComplete)
233        } else if text.contains("\"toolCallCancellation\"") {
234            // Must check before "toolCall" since it contains "toolCall" as substring
235            serde_json::from_str::<ToolCallCancellationMessage>(text)
236                .map(ServerMessage::ToolCallCancellation)
237        } else if text.contains("\"toolCall\"") {
238            serde_json::from_str::<ToolCallMessage>(text).map(ServerMessage::ToolCall)
239        } else if text.contains("\"serverContent\"") {
240            serde_json::from_str::<ServerContentMessage>(text)
241                .map(|sc| ServerMessage::ServerContent(Box::new(sc)))
242        } else if text.contains("\"goAway\"") {
243            serde_json::from_str::<GoAwayMessage>(text).map(ServerMessage::GoAway)
244        } else if text.contains("\"sessionResumptionUpdate\"") {
245            serde_json::from_str::<SessionResumptionUpdateMessage>(text)
246                .map(ServerMessage::SessionResumptionUpdate)
247        } else if text.contains("\"voiceActivity\"") {
248            serde_json::from_str::<VoiceActivityMessage>(text).map(ServerMessage::VoiceActivity)
249        } else {
250            serde_json::from_str::<serde_json::Value>(text).map(ServerMessage::Unknown)
251        }
252    }
253}