22
33import 'dart:async' ;
44import 'dart:convert' ;
5- import 'dart:io' ;
65
76import 'package:firebase_ai/firebase_ai.dart' ;
87import 'package:flutter/foundation.dart' ;
98import 'package:http/http.dart' as http;
109import 'package:image_picker/image_picker.dart' ;
11- import 'package:path/path.dart' as path;
1210
1311import '../models/clue.dart' ;
1412import '../models/clue_answer.dart' ;
1513import '../models/crossword_data.dart' ;
1614import '../models/crossword_grid.dart' ;
1715import '../models/grid_cell.dart' ;
18- import '../platform/platform.dart' ;
1916
2017class GeminiService {
2118 GeminiService () {
@@ -28,58 +25,53 @@ class GeminiService {
2825 ),
2926 );
3027
31- final clueSolverSystemInstructionContent = Content .text (
32- clueSolverSystemInstruction,
33- );
34-
35- // The model for solving clues, including functions the model can call to
36- // get more information about potential answers.
37- _clueSolverModelWithFunctions = FirebaseAI .googleAI ().generativeModel (
28+ // The model for solving clues.
29+ _clueSolverModel = FirebaseAI .googleAI ().generativeModel (
3830 model: 'gemini-2.5-flash' ,
39- systemInstruction: clueSolverSystemInstructionContent ,
31+ systemInstruction: Content . text (clueSolverSystemInstruction) ,
4032 tools: [
41- Tool .functionDeclarations ([_getWordMetadataFunction]),
33+ Tool .functionDeclarations ([
34+ _getWordMetadataFunction,
35+ _returnResultFunction,
36+ ]),
4237 ],
4338 );
44-
45- // The model for solving clues, but without the tools and only for returning
46- // the final JSON response with the answer and confidence score.
47- _clueSolverModelWithSchema = FirebaseAI .googleAI ().generativeModel (
48- model: 'gemini-2.5-flash' ,
49- systemInstruction: clueSolverSystemInstructionContent,
50- generationConfig: GenerationConfig (
51- responseMimeType: 'application/json' ,
52- responseSchema: _clueSolverSchema,
53- ),
54- );
5539 }
5640
5741 late final GenerativeModel _crosswordModel;
58- late final GenerativeModel _clueSolverModelWithFunctions;
59- late final GenerativeModel _clueSolverModelWithSchema;
42+ late final GenerativeModel _clueSolverModel;
6043 StreamSubscription <GenerateContentResponse >? _clueSolverSubscription;
6144
6245 Future <void > cancelCurrentSolve () async {
6346 await _clueSolverSubscription? .cancel ();
6447 _clueSolverSubscription = null ;
6548 }
6649
67- static final _clueSolverSchema = Schema (
68- SchemaType .object,
69- properties: {
70- 'answer' : Schema (SchemaType .string),
71- 'confidence' : Schema (SchemaType .number),
72- },
73- );
74-
7550 static final _getWordMetadataFunction = FunctionDeclaration (
7651 'getWordMetadata' ,
77- 'Gets grammatical metadata for a word, like its part of speech. Best used to verify a candidate answer against a clue that implies a grammatical constraint.' ,
52+ 'Gets grammatical metadata for a word, like its part of speech. '
53+ 'Best used to verify a candidate answer against a clue that implies a '
54+ 'grammatical constraint.' ,
7855 parameters: {
7956 'word' : Schema (SchemaType .string, description: 'The word to look up.' ),
8057 },
8158 );
8259
60+ static final _returnResultFunction = FunctionDeclaration (
61+ 'returnResult' ,
62+ 'Returns the final result of the clue solving process.' ,
63+ parameters: {
64+ 'answer' : Schema (
65+ SchemaType .string,
66+ description: 'The answer to the clue.' ,
67+ ),
68+ 'confidence' : Schema (
69+ SchemaType .number,
70+ description: 'The confidence score in the answer from 0.0 to 1.0.' ,
71+ ),
72+ },
73+ );
74+
8375 static String get clueSolverSystemInstruction =>
8476 '''
8577You are an expert crossword puzzle solver.
@@ -114,6 +106,19 @@ You have a tool to get grammatical information about a word.
114106```json
115107${jsonEncode (_getWordMetadataFunction .toJson ())}
116108```
109+
110+ ### Tool: `returnResult`
111+
112+ You have a tool to return the final result of the clue solving process.
113+
114+ **When to use:**
115+ - Use this tool when you have a final answer and confidence score to return. You
116+ must use this tool exactly once, and only once, to return the final result.
117+
118+ **Function signature:**
119+ ```json
120+ ${jsonEncode (_returnResultFunction .toJson ())}
121+ ```
117122''' ;
118123
119124 static final _crosswordSchema = Schema (
@@ -163,34 +168,6 @@ ${jsonEncode(_getWordMetadataFunction.toJson())}
163168 );
164169
165170 Future <CrosswordData > inferCrosswordData (List <XFile > images) async {
166- // Caching is supported in debug mode on desktop.
167- if (! kIsWeb && kDebugMode && isDesktop ()) {
168- try {
169- final paths = images.map ((image) => image.path).toList ()..sort ();
170- final key = paths.join (';' ).hashCode.toString ();
171- final jsonPath = '${path .join (path .dirname (paths .first ), key )}.json' ;
172- final jsonFile = File (jsonPath);
173-
174- if (jsonFile.existsSync ()) {
175- debugPrint ('Found cached crossword data at $jsonPath ' );
176- final jsonString = await jsonFile.readAsString ();
177- return CrosswordData .fromJson (jsonDecode (jsonString));
178- } else {
179- final crosswordData = await _inferCrosswordDataFromApi (images);
180- final jsonString = jsonEncode (crosswordData.toJson ());
181- await jsonFile.writeAsString (jsonString);
182- debugPrint ('Saved inferred crossword data to $jsonPath ' );
183- return crosswordData;
184- }
185- } on Exception catch (e) {
186- debugPrint ('Error with file-based caching: $e ' );
187- }
188- }
189-
190- return _inferCrosswordDataFromApi (images);
191- }
192-
193- Future <CrosswordData > _inferCrosswordDataFromApi (List <XFile > images) async {
194171 final imageParts = < Part > [];
195172 for (final image in images) {
196173 final imageBytes = await image.readAsBytes ();
@@ -258,30 +235,37 @@ The JSON schema is as follows: ${jsonEncode(_crosswordSchema.toJson())}
258235 );
259236 }
260237
238+ // Buffer for the result of the clue solving process.
239+ final _returnResult = < String , dynamic > {};
240+
261241 Future <ClueAnswer ?> solveClue (Clue clue, int length, String pattern) async {
262242 // Cancel any previous, in-flight request.
263243 await cancelCurrentSolve ();
264244
245+ // Clear the return result cache; this is where the result will be stored.
246+ _returnResult.clear ();
247+
265248 // Generate JSON response with functions and schema.
266- final json = await _generateJsonWithFunctionsAndSchema (
267- modelWithFunctions: _clueSolverModelWithFunctions,
268- modelWithSchema: _clueSolverModelWithSchema,
249+ await _clueSolverModel.generateContentWithFunctions (
269250 prompt: getSolverPrompt (clue, length, pattern),
270251 onFunctionCall: (functionCall) async => switch (functionCall.name) {
271- 'getWordMetadata' => await getWordMetadataFromApi (
252+ 'getWordMetadata' => await _getWordMetadataFromApi (
272253 functionCall.args['word' ] as String ,
273254 ),
255+ 'returnResult' => _cacheReturnResult (functionCall.args),
274256 _ => throw Exception ('Unknown function call: ${functionCall .name }' ),
275257 },
276258 );
277259
260+ assert (_returnResult.isNotEmpty, 'The return result cache is empty.' );
278261 return ClueAnswer (
279- answer: json ['answer' ] as String ,
280- confidence: (json ['confidence' ] as num ).toDouble (),
262+ answer: _returnResult ['answer' ] as String ,
263+ confidence: (_returnResult ['confidence' ] as num ).toDouble (),
281264 );
282265 }
283266
284- Future <Map <String , dynamic >> getWordMetadataFromApi (String word) async {
267+ // Look up the metadata for a word in the dictionary API.
268+ Future <Map <String , dynamic >> _getWordMetadataFromApi (String word) async {
285269 debugPrint ('Looking up metadata for word: "$word "' );
286270 final url = Uri .parse (
287271 'https://api.dictionaryapi.dev/api/v2/entries/en/${Uri .encodeComponent (word )}' ,
@@ -293,6 +277,16 @@ The JSON schema is as follows: ${jsonEncode(_crosswordSchema.toJson())}
293277 : {'error' : 'Could not find a definition for "$word ".' };
294278 }
295279
280+ // Cache the return result of the clue solving process via a function call.
281+ // This is how we get JSON responses from the model with functions, since the
282+ // model cannot return JSON directly when tools are used.
283+ Map <String , dynamic > _cacheReturnResult (Map <String , dynamic > returnResult) {
284+ debugPrint ('Caching return result: ${jsonEncode (returnResult )}' );
285+ assert (_returnResult.isEmpty, 'The return result cache is not empty.' );
286+ _returnResult.addAll (returnResult);
287+ return {'status' : 'success' };
288+ }
289+
296290 String getSolverPrompt (Clue clue, int length, String pattern) =>
297291 buildSolverPrompt (clue, length, pattern);
298292
@@ -308,24 +302,29 @@ Your task is to solve the following crossword clue.
308302
309303Return your answer and confidence score in the required JSON format.
310304''' ;
305+ }
311306
312- Future <Map <String , dynamic >> _generateJsonWithFunctionsAndSchema ({
313- required GenerativeModel modelWithFunctions,
314- required GenerativeModel modelWithSchema,
307+ extension on GenerativeModel {
308+ Future <String > generateContentWithFunctions ({
315309 required String prompt,
316310 required Future <Map <String , dynamic >> Function (FunctionCall ) onFunctionCall,
317311 }) async {
318- // 1. Let the model generate a text response with as many function calls as
319- // it wants. Use a chat session to support multiple request/response
320- // pairs, which is needed to support function calls. Also, we'll need the
321- // history to generate the final JSON response with the schema.
322- final chat = modelWithFunctions.startChat ();
312+ // Use a chat session to support multiple request/response pairs, which is
313+ // needed to support function calls.
314+ final chat = startChat ();
315+ final buffer = StringBuffer ();
323316 var response = await chat.sendMessage (Content .text (prompt));
324317
325318 while (true ) {
319+ // Append the response text to the buffer.
320+ buffer.write (response.text ?? '' );
321+
326322 // If no function calls were collected, we're done
327323 if (response.functionCalls.isEmpty) break ;
328324
325+ // Append a newline to separate responses.
326+ buffer.write ('\n ' );
327+
329328 // Execute all function calls
330329 final functionResponses = < FunctionResponse > [];
331330 for (final functionCall in response.functionCalls) {
@@ -349,24 +348,6 @@ Return your answer and confidence score in the required JSON format.
349348 );
350349 }
351350
352- // 2. Generate the final JSON response with the schema. We do that by
353- // trimming the last two messages from the history (the last prompt/tool
354- // response and the last LLM response) and sending it to the model
355- // without the functions but with the schema. Essentially, we're asking
356- // the model to generate the response to the last prompt we gave it,
357- // including all of the function call results (if there are any), and
358- // then generate the same response again, but this time with the JSON
359- // schema.
360- final history = chat.history.toList ();
361- final lastModelMessage = history.removeLast ();
362- final lastUserMessage = history.removeLast ();
363- assert (
364- lastUserMessage.role == 'user' || lastUserMessage.role == 'function' ,
365- );
366- assert (lastModelMessage.role == 'model' );
367- final jsonResponse = await modelWithSchema
368- .startChat (history: history)
369- .sendMessage (lastUserMessage);
370- return jsonDecode (jsonResponse.text! ) as Map <String , dynamic >;
351+ return buffer.toString ();
371352 }
372353}
0 commit comments