1use crate::buffer::AudioJitterBuffer;
18use crate::session::{SessionCommand, SessionPhase};
19
20#[derive(Debug, Clone)]
22pub struct BargeInConfig {
23 pub enabled: bool,
25 pub energy_threshold_db: f64,
27 pub min_speech_frames: u32,
29 pub tentative: bool,
31 pub duck_volume: f32,
33}
34
35impl Default for BargeInConfig {
36 fn default() -> Self {
37 Self {
38 enabled: true,
39 energy_threshold_db: 15.0,
40 min_speech_frames: 2,
41 tentative: true,
42 duck_volume: 0.3,
43 }
44 }
45}
46
47#[derive(Debug, Clone, Copy, PartialEq)]
49pub enum BargeInAction {
50 None,
52 Duck(f32),
55 Interrupt,
57 Restore,
59}
60
61#[derive(Debug, Clone, Copy, PartialEq, Eq)]
63enum DetectorState {
64 Idle,
66 Ducked { frames: u32 },
68}
69
70pub struct BargeInDetector {
72 config: BargeInConfig,
73 speech_frame_count: u32,
75 state: DetectorState,
77}
78
79impl BargeInDetector {
80 pub fn new(config: BargeInConfig) -> Self {
82 Self {
83 config,
84 speech_frame_count: 0,
85 state: DetectorState::Idle,
86 }
87 }
88
89 pub fn check(&mut self, current_phase: SessionPhase, vad_is_speaking: bool) -> BargeInAction {
101 if !self.config.enabled {
102 return BargeInAction::None;
103 }
104
105 if current_phase != SessionPhase::ModelSpeaking {
107 let action = self.restore_if_ducked();
108 self.speech_frame_count = 0;
109 self.state = DetectorState::Idle;
110 return action;
111 }
112
113 if self.config.tentative {
114 self.check_tentative(vad_is_speaking)
115 } else {
116 self.check_legacy(vad_is_speaking)
117 }
118 }
119
120 fn check_legacy(&mut self, vad_is_speaking: bool) -> BargeInAction {
122 if vad_is_speaking {
123 self.speech_frame_count += 1;
124 if self.speech_frame_count >= self.config.min_speech_frames {
125 self.speech_frame_count = 0;
126 return BargeInAction::Interrupt;
127 }
128 } else {
129 self.speech_frame_count = 0;
130 }
131
132 BargeInAction::None
133 }
134
135 fn check_tentative(&mut self, vad_is_speaking: bool) -> BargeInAction {
137 match self.state {
138 DetectorState::Idle => {
139 if vad_is_speaking {
140 self.state = DetectorState::Ducked { frames: 1 };
143 if self.config.min_speech_frames <= 1 {
145 self.state = DetectorState::Idle;
146 return BargeInAction::Interrupt;
147 }
148 BargeInAction::Duck(self.config.duck_volume)
149 } else {
150 BargeInAction::None
151 }
152 }
153 DetectorState::Ducked { frames } => {
154 if vad_is_speaking {
155 let new_frames = frames + 1;
156 if new_frames >= self.config.min_speech_frames {
157 self.state = DetectorState::Idle;
159 BargeInAction::Interrupt
160 } else {
161 self.state = DetectorState::Ducked { frames: new_frames };
162 BargeInAction::None
164 }
165 } else {
166 self.state = DetectorState::Idle;
168 BargeInAction::Restore
169 }
170 }
171 }
172 }
173
174 fn restore_if_ducked(&self) -> BargeInAction {
176 match self.state {
177 DetectorState::Ducked { .. } => BargeInAction::Restore,
178 DetectorState::Idle => BargeInAction::None,
179 }
180 }
181
182 pub fn reset(&mut self) {
184 self.speech_frame_count = 0;
185 self.state = DetectorState::Idle;
186 }
187
188 pub fn execute_barge_in(jitter_buffer: &mut AudioJitterBuffer) -> SessionCommand {
194 jitter_buffer.flush();
196
197 SessionCommand::ActivityStart
199 }
200}
201
202#[cfg(test)]
203mod tests {
204 use super::*;
205
206 #[test]
207 fn no_barge_in_when_disabled() {
208 let mut detector = BargeInDetector::new(BargeInConfig {
209 enabled: false,
210 ..Default::default()
211 });
212
213 let action = detector.check(SessionPhase::ModelSpeaking, true);
214 assert_eq!(action, BargeInAction::None);
215 }
216
217 #[test]
218 fn no_barge_in_when_not_model_speaking() {
219 let mut detector = BargeInDetector::new(BargeInConfig::default());
220
221 let action = detector.check(SessionPhase::Active, true);
222 assert_eq!(action, BargeInAction::None);
223 }
224
225 #[test]
226 fn barge_in_after_min_frames() {
227 let mut detector = BargeInDetector::new(BargeInConfig {
228 min_speech_frames: 3,
229 tentative: false,
230 ..Default::default()
231 });
232
233 assert_eq!(
234 detector.check(SessionPhase::ModelSpeaking, true),
235 BargeInAction::None
236 );
237 assert_eq!(
238 detector.check(SessionPhase::ModelSpeaking, true),
239 BargeInAction::None
240 );
241 assert_eq!(
242 detector.check(SessionPhase::ModelSpeaking, true),
243 BargeInAction::Interrupt
244 );
245 }
246
247 #[test]
248 fn barge_in_resets_on_silence() {
249 let mut detector = BargeInDetector::new(BargeInConfig {
250 min_speech_frames: 3,
251 tentative: false,
252 ..Default::default()
253 });
254
255 detector.check(SessionPhase::ModelSpeaking, true);
256 detector.check(SessionPhase::ModelSpeaking, true);
257 detector.check(SessionPhase::ModelSpeaking, false);
259 assert_eq!(
261 detector.check(SessionPhase::ModelSpeaking, true),
262 BargeInAction::None
263 );
264 }
265
266 #[test]
267 fn tentative_barge_in_duck_then_interrupt() {
268 let mut detector = BargeInDetector::new(BargeInConfig {
269 min_speech_frames: 3,
270 tentative: true,
271 duck_volume: 0.3,
272 ..Default::default()
273 });
274
275 assert_eq!(
277 detector.check(SessionPhase::ModelSpeaking, true),
278 BargeInAction::Duck(0.3)
279 );
280 assert_eq!(
282 detector.check(SessionPhase::ModelSpeaking, true),
283 BargeInAction::None
284 );
285 assert_eq!(
287 detector.check(SessionPhase::ModelSpeaking, true),
288 BargeInAction::Interrupt
289 );
290 }
291
292 #[test]
293 fn tentative_barge_in_duck_then_restore() {
294 let mut detector = BargeInDetector::new(BargeInConfig {
295 min_speech_frames: 3,
296 tentative: true,
297 duck_volume: 0.3,
298 ..Default::default()
299 });
300
301 assert_eq!(
303 detector.check(SessionPhase::ModelSpeaking, true),
304 BargeInAction::Duck(0.3)
305 );
306 assert_eq!(
308 detector.check(SessionPhase::ModelSpeaking, false),
309 BargeInAction::Restore
310 );
311 assert_eq!(
313 detector.check(SessionPhase::ModelSpeaking, false),
314 BargeInAction::None
315 );
316 }
317
318 #[test]
319 fn tentative_disabled_skips_duck() {
320 let mut detector = BargeInDetector::new(BargeInConfig {
321 min_speech_frames: 3,
322 tentative: false,
323 duck_volume: 0.3,
324 ..Default::default()
325 });
326
327 assert_eq!(
329 detector.check(SessionPhase::ModelSpeaking, true),
330 BargeInAction::None
331 );
332 assert_eq!(
333 detector.check(SessionPhase::ModelSpeaking, true),
334 BargeInAction::None
335 );
336 assert_eq!(
338 detector.check(SessionPhase::ModelSpeaking, true),
339 BargeInAction::Interrupt
340 );
341 }
342
343 #[test]
344 fn duck_volume_in_action() {
345 let mut detector = BargeInDetector::new(BargeInConfig {
346 min_speech_frames: 5,
347 tentative: true,
348 duck_volume: 0.5,
349 ..Default::default()
350 });
351
352 let action = detector.check(SessionPhase::ModelSpeaking, true);
353 assert_eq!(action, BargeInAction::Duck(0.5));
354 }
355
356 #[test]
357 fn tentative_restores_on_phase_change() {
358 let mut detector = BargeInDetector::new(BargeInConfig {
359 min_speech_frames: 5,
360 tentative: true,
361 duck_volume: 0.3,
362 ..Default::default()
363 });
364
365 assert_eq!(
367 detector.check(SessionPhase::ModelSpeaking, true),
368 BargeInAction::Duck(0.3)
369 );
370 assert_eq!(
372 detector.check(SessionPhase::Active, true),
373 BargeInAction::Restore
374 );
375 }
376
377 #[test]
378 fn tentative_immediate_interrupt_when_min_frames_one() {
379 let mut detector = BargeInDetector::new(BargeInConfig {
380 min_speech_frames: 1,
381 tentative: true,
382 duck_volume: 0.3,
383 ..Default::default()
384 });
385
386 assert_eq!(
388 detector.check(SessionPhase::ModelSpeaking, true),
389 BargeInAction::Interrupt
390 );
391 }
392}