gemini_adk_fluent_rs/compose/
eval.rs1use std::sync::Arc;
6
7#[derive(Clone)]
9pub struct ECriterion {
10 name: &'static str,
11 #[allow(clippy::type_complexity)]
12 checker: Arc<dyn Fn(&str, &str) -> f64 + Send + Sync>,
13}
14
15impl ECriterion {
16 fn new(name: &'static str, f: impl Fn(&str, &str) -> f64 + Send + Sync + 'static) -> Self {
17 Self {
18 name,
19 checker: Arc::new(f),
20 }
21 }
22
23 pub fn name(&self) -> &str {
25 self.name
26 }
27
28 pub fn score(&self, output: &str, expected: &str) -> f64 {
30 (self.checker)(output, expected)
31 }
32}
33
34impl std::fmt::Debug for ECriterion {
35 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
36 f.debug_struct("ECriterion")
37 .field("name", &self.name)
38 .finish()
39 }
40}
41
42impl std::ops::BitOr for ECriterion {
44 type Output = EComposite;
45
46 fn bitor(self, rhs: ECriterion) -> Self::Output {
47 EComposite {
48 criteria: vec![self, rhs],
49 }
50 }
51}
52
53#[derive(Clone)]
55pub struct EComposite {
56 pub criteria: Vec<ECriterion>,
58}
59
60impl EComposite {
61 pub fn score_all(&self, output: &str, expected: &str) -> Vec<(&str, f64)> {
63 self.criteria
64 .iter()
65 .map(|c| (c.name(), c.score(output, expected)))
66 .collect()
67 }
68
69 pub fn len(&self) -> usize {
71 self.criteria.len()
72 }
73
74 pub fn is_empty(&self) -> bool {
76 self.criteria.is_empty()
77 }
78}
79
80impl std::ops::BitOr<ECriterion> for EComposite {
81 type Output = EComposite;
82
83 fn bitor(mut self, rhs: ECriterion) -> Self::Output {
84 self.criteria.push(rhs);
85 self
86 }
87}
88
89#[derive(Clone, Debug)]
91pub struct EvalCase {
92 pub prompt: String,
94 pub expected: String,
96}
97
98#[derive(Clone, Debug)]
100pub struct EvalSuite {
101 pub cases: Vec<EvalCase>,
103 pub criteria_names: Vec<String>,
105}
106
107impl EvalSuite {
108 pub fn case(mut self, prompt: impl Into<String>, expected: impl Into<String>) -> Self {
110 self.cases.push(EvalCase {
111 prompt: prompt.into(),
112 expected: expected.into(),
113 });
114 self
115 }
116
117 pub fn criteria(mut self, names: &[&str]) -> Self {
119 self.criteria_names = names.iter().map(|s| s.to_string()).collect();
120 self
121 }
122
123 pub fn len(&self) -> usize {
125 self.cases.len()
126 }
127
128 pub fn is_empty(&self) -> bool {
130 self.cases.is_empty()
131 }
132}
133
134pub struct E;
136
137impl E {
138 pub fn suite() -> EvalSuite {
140 EvalSuite {
141 cases: Vec::new(),
142 criteria_names: Vec::new(),
143 }
144 }
145
146 pub fn response_match() -> ECriterion {
148 ECriterion::new("response_match", |output, expected| {
149 if output.trim() == expected.trim() {
150 1.0
151 } else {
152 0.0
153 }
154 })
155 }
156
157 pub fn contains_match() -> ECriterion {
159 ECriterion::new("contains_match", |output, expected| {
160 if output.contains(expected) {
161 1.0
162 } else {
163 0.0
164 }
165 })
166 }
167
168 pub fn safety() -> ECriterion {
170 ECriterion::new("safety", |_output, _expected| 1.0)
171 }
172
173 pub fn semantic_match() -> ECriterion {
175 ECriterion::new("semantic_match", |_output, _expected| 0.5)
176 }
177
178 pub fn hallucination() -> ECriterion {
180 ECriterion::new("hallucination", |_output, _expected| 0.5)
181 }
182
183 pub fn trajectory() -> ECriterion {
185 ECriterion::new("trajectory", |_output, _expected| 0.5)
186 }
187
188 pub fn custom(
190 name: &'static str,
191 f: impl Fn(&str, &str) -> f64 + Send + Sync + 'static,
192 ) -> ECriterion {
193 ECriterion::new(name, f)
194 }
195
196 pub fn from_file(path: &str) -> EvalSuite {
202 let content = std::fs::read_to_string(path).unwrap_or_default();
203 let lines: Vec<&str> = content
204 .lines()
205 .map(|l| l.trim())
206 .filter(|l| !l.is_empty() && !l.starts_with('#'))
207 .collect();
208
209 let mut cases = Vec::new();
210 let mut i = 0;
211 while i + 1 < lines.len() {
212 cases.push(EvalCase {
213 prompt: lines[i].to_string(),
214 expected: lines[i + 1].to_string(),
215 });
216 i += 2;
217 }
218
219 EvalSuite {
220 cases,
221 criteria_names: Vec::new(),
222 }
223 }
224
225 pub fn persona(name: &'static str, description: &'static str) -> ECriterion {
230 ECriterion::new(name, move |output, _expected| {
231 let _ = description;
236 if output.is_empty() {
237 0.0
238 } else {
239 0.5
240 }
241 })
242 }
243}
244
245#[cfg(test)]
246mod tests {
247 use super::*;
248
249 #[test]
250 fn response_match_exact() {
251 let c = E::response_match();
252 assert_eq!(c.score("hello", "hello"), 1.0);
253 assert_eq!(c.score("hello", "world"), 0.0);
254 }
255
256 #[test]
257 fn contains_match_works() {
258 let c = E::contains_match();
259 assert_eq!(c.score("hello world", "world"), 1.0);
260 assert_eq!(c.score("hello", "world"), 0.0);
261 }
262
263 #[test]
264 fn compose_with_bitor() {
265 let composite = E::response_match() | E::safety() | E::semantic_match();
266 assert_eq!(composite.len(), 3);
267 }
268
269 #[test]
270 fn suite_builder() {
271 let suite = E::suite()
272 .case("What is 2+2?", "4")
273 .case("Hello", "Hi")
274 .criteria(&["response_match", "safety"]);
275 assert_eq!(suite.len(), 2);
276 assert_eq!(suite.criteria_names.len(), 2);
277 }
278
279 #[test]
280 fn score_all_returns_results() {
281 let composite = E::response_match() | E::contains_match();
282 let scores = composite.score_all("hello world", "hello");
283 assert_eq!(scores.len(), 2);
284 assert_eq!(scores[0].0, "response_match");
285 assert_eq!(scores[1].0, "contains_match");
286 }
287
288 #[test]
289 fn from_file_missing() {
290 let suite = E::from_file("/nonexistent/path.txt");
291 assert!(suite.is_empty());
292 }
293
294 #[test]
295 fn from_file_parses_cases() {
296 let dir = std::env::temp_dir();
297 let path = dir.join("eval_test_cases.txt");
298 std::fs::write(&path, "# comment\nWhat is 2+2?\n4\n\nHello\nHi\n").unwrap();
299 let suite = E::from_file(path.to_str().unwrap());
300 assert_eq!(suite.len(), 2);
301 assert_eq!(suite.cases[0].prompt, "What is 2+2?");
302 assert_eq!(suite.cases[0].expected, "4");
303 assert_eq!(suite.cases[1].prompt, "Hello");
304 assert_eq!(suite.cases[1].expected, "Hi");
305 let _ = std::fs::remove_file(&path);
306 }
307
308 #[test]
309 fn persona_criterion() {
310 let c = E::persona(
311 "impatient_user",
312 "A user who is in a hurry and wants quick answers",
313 );
314 assert_eq!(c.name(), "impatient_user");
315 assert_eq!(c.score("Here is your answer", ""), 0.5);
316 assert_eq!(c.score("", ""), 0.0);
317 }
318
319 #[test]
320 fn custom_criterion() {
321 let c = E::custom(
322 "length",
323 |output, _expected| {
324 if output.len() > 10 {
325 1.0
326 } else {
327 0.0
328 }
329 },
330 );
331 assert_eq!(c.score("short", ""), 0.0);
332 assert_eq!(c.score("a long enough output", ""), 1.0);
333 }
334}