Skip to content

Commit d593905

Browse files
committed
apply review feedback; refactor GeminiService to use a function call for the JSON result of the clue solving process.
1 parent faa5053 commit d593905

File tree

3 files changed

+81
-102
lines changed

3 files changed

+81
-102
lines changed

crossword_companion/README.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,10 @@ incorrectly, creating a robust feedback loop.
3737

3838
### Prerequisites
3939

40-
- A Firebase project.
41-
- The Flutter SDK installed.
40+
- The [Flutter SDK](https://docs.flutter.dev/install) installed.
41+
42+
- A [Firebase project enabled for
43+
Generative AI](https://firebase.google.com/docs/ai-logic/get-started?api=dev).
4244

4345
### Installation
4446

crossword_companion/deploy.sh

Lines changed: 0 additions & 4 deletions
This file was deleted.

crossword_companion/lib/services/gemini_service.dart

Lines changed: 77 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,17 @@
22

33
import 'dart:async';
44
import 'dart:convert';
5-
import 'dart:io';
65

76
import 'package:firebase_ai/firebase_ai.dart';
87
import 'package:flutter/foundation.dart';
98
import 'package:http/http.dart' as http;
109
import 'package:image_picker/image_picker.dart';
11-
import 'package:path/path.dart' as path;
1210

1311
import '../models/clue.dart';
1412
import '../models/clue_answer.dart';
1513
import '../models/crossword_data.dart';
1614
import '../models/crossword_grid.dart';
1715
import '../models/grid_cell.dart';
18-
import '../platform/platform.dart';
1916

2017
class GeminiService {
2118
GeminiService() {
@@ -28,58 +25,53 @@ class GeminiService {
2825
),
2926
);
3027

31-
final clueSolverSystemInstructionContent = Content.text(
32-
clueSolverSystemInstruction,
33-
);
34-
35-
// The model for solving clues, including functions the model can call to
36-
// get more information about potential answers.
37-
_clueSolverModelWithFunctions = FirebaseAI.googleAI().generativeModel(
28+
// The model for solving clues.
29+
_clueSolverModel = FirebaseAI.googleAI().generativeModel(
3830
model: 'gemini-2.5-flash',
39-
systemInstruction: clueSolverSystemInstructionContent,
31+
systemInstruction: Content.text(clueSolverSystemInstruction),
4032
tools: [
41-
Tool.functionDeclarations([_getWordMetadataFunction]),
33+
Tool.functionDeclarations([
34+
_getWordMetadataFunction,
35+
_returnResultFunction,
36+
]),
4237
],
4338
);
44-
45-
// The model for solving clues, but without the tools and only for returning
46-
// the final JSON response with the answer and confidence score.
47-
_clueSolverModelWithSchema = FirebaseAI.googleAI().generativeModel(
48-
model: 'gemini-2.5-flash',
49-
systemInstruction: clueSolverSystemInstructionContent,
50-
generationConfig: GenerationConfig(
51-
responseMimeType: 'application/json',
52-
responseSchema: _clueSolverSchema,
53-
),
54-
);
5539
}
5640

5741
late final GenerativeModel _crosswordModel;
58-
late final GenerativeModel _clueSolverModelWithFunctions;
59-
late final GenerativeModel _clueSolverModelWithSchema;
42+
late final GenerativeModel _clueSolverModel;
6043
StreamSubscription<GenerateContentResponse>? _clueSolverSubscription;
6144

6245
Future<void> cancelCurrentSolve() async {
6346
await _clueSolverSubscription?.cancel();
6447
_clueSolverSubscription = null;
6548
}
6649

67-
static final _clueSolverSchema = Schema(
68-
SchemaType.object,
69-
properties: {
70-
'answer': Schema(SchemaType.string),
71-
'confidence': Schema(SchemaType.number),
72-
},
73-
);
74-
7550
static final _getWordMetadataFunction = FunctionDeclaration(
7651
'getWordMetadata',
77-
'Gets grammatical metadata for a word, like its part of speech. Best used to verify a candidate answer against a clue that implies a grammatical constraint.',
52+
'Gets grammatical metadata for a word, like its part of speech. '
53+
'Best used to verify a candidate answer against a clue that implies a '
54+
'grammatical constraint.',
7855
parameters: {
7956
'word': Schema(SchemaType.string, description: 'The word to look up.'),
8057
},
8158
);
8259

60+
static final _returnResultFunction = FunctionDeclaration(
61+
'returnResult',
62+
'Returns the final result of the clue solving process.',
63+
parameters: {
64+
'answer': Schema(
65+
SchemaType.string,
66+
description: 'The answer to the clue.',
67+
),
68+
'confidence': Schema(
69+
SchemaType.number,
70+
description: 'The confidence score in the answer from 0.0 to 1.0.',
71+
),
72+
},
73+
);
74+
8375
static String get clueSolverSystemInstruction =>
8476
'''
8577
You are an expert crossword puzzle solver.
@@ -114,6 +106,19 @@ You have a tool to get grammatical information about a word.
114106
```json
115107
${jsonEncode(_getWordMetadataFunction.toJson())}
116108
```
109+
110+
### Tool: `returnResult`
111+
112+
You have a tool to return the final result of the clue solving process.
113+
114+
**When to use:**
115+
- Use this tool when you have a final answer and confidence score to return. You
116+
must use this tool exactly once, and only once, to return the final result.
117+
118+
**Function signature:**
119+
```json
120+
${jsonEncode(_returnResultFunction.toJson())}
121+
```
117122
''';
118123

119124
static final _crosswordSchema = Schema(
@@ -163,34 +168,6 @@ ${jsonEncode(_getWordMetadataFunction.toJson())}
163168
);
164169

165170
Future<CrosswordData> inferCrosswordData(List<XFile> images) async {
166-
// Caching is supported in debug mode on desktop.
167-
if (!kIsWeb && kDebugMode && isDesktop()) {
168-
try {
169-
final paths = images.map((image) => image.path).toList()..sort();
170-
final key = paths.join(';').hashCode.toString();
171-
final jsonPath = '${path.join(path.dirname(paths.first), key)}.json';
172-
final jsonFile = File(jsonPath);
173-
174-
if (jsonFile.existsSync()) {
175-
debugPrint('Found cached crossword data at $jsonPath');
176-
final jsonString = await jsonFile.readAsString();
177-
return CrosswordData.fromJson(jsonDecode(jsonString));
178-
} else {
179-
final crosswordData = await _inferCrosswordDataFromApi(images);
180-
final jsonString = jsonEncode(crosswordData.toJson());
181-
await jsonFile.writeAsString(jsonString);
182-
debugPrint('Saved inferred crossword data to $jsonPath');
183-
return crosswordData;
184-
}
185-
} on Exception catch (e) {
186-
debugPrint('Error with file-based caching: $e');
187-
}
188-
}
189-
190-
return _inferCrosswordDataFromApi(images);
191-
}
192-
193-
Future<CrosswordData> _inferCrosswordDataFromApi(List<XFile> images) async {
194171
final imageParts = <Part>[];
195172
for (final image in images) {
196173
final imageBytes = await image.readAsBytes();
@@ -258,30 +235,37 @@ The JSON schema is as follows: ${jsonEncode(_crosswordSchema.toJson())}
258235
);
259236
}
260237

238+
// Buffer for the result of the clue solving process.
239+
final _returnResult = <String, dynamic>{};
240+
261241
Future<ClueAnswer?> solveClue(Clue clue, int length, String pattern) async {
262242
// Cancel any previous, in-flight request.
263243
await cancelCurrentSolve();
264244

245+
// Clear the return result cache; this is where the result will be stored.
246+
_returnResult.clear();
247+
265248
// Generate JSON response with functions and schema.
266-
final json = await _generateJsonWithFunctionsAndSchema(
267-
modelWithFunctions: _clueSolverModelWithFunctions,
268-
modelWithSchema: _clueSolverModelWithSchema,
249+
await _clueSolverModel.generateContentWithFunctions(
269250
prompt: getSolverPrompt(clue, length, pattern),
270251
onFunctionCall: (functionCall) async => switch (functionCall.name) {
271-
'getWordMetadata' => await getWordMetadataFromApi(
252+
'getWordMetadata' => await _getWordMetadataFromApi(
272253
functionCall.args['word'] as String,
273254
),
255+
'returnResult' => _cacheReturnResult(functionCall.args),
274256
_ => throw Exception('Unknown function call: ${functionCall.name}'),
275257
},
276258
);
277259

260+
assert(_returnResult.isNotEmpty, 'The return result cache is empty.');
278261
return ClueAnswer(
279-
answer: json['answer'] as String,
280-
confidence: (json['confidence'] as num).toDouble(),
262+
answer: _returnResult['answer'] as String,
263+
confidence: (_returnResult['confidence'] as num).toDouble(),
281264
);
282265
}
283266

284-
Future<Map<String, dynamic>> getWordMetadataFromApi(String word) async {
267+
// Look up the metadata for a word in the dictionary API.
268+
Future<Map<String, dynamic>> _getWordMetadataFromApi(String word) async {
285269
debugPrint('Looking up metadata for word: "$word"');
286270
final url = Uri.parse(
287271
'https://api.dictionaryapi.dev/api/v2/entries/en/${Uri.encodeComponent(word)}',
@@ -293,6 +277,16 @@ The JSON schema is as follows: ${jsonEncode(_crosswordSchema.toJson())}
293277
: {'error': 'Could not find a definition for "$word".'};
294278
}
295279

280+
// Cache the return result of the clue solving process via a function call.
281+
// This is how we get JSON responses from the model with functions, since the
282+
// model cannot return JSON directly when tools are used.
283+
Map<String, dynamic> _cacheReturnResult(Map<String, dynamic> returnResult) {
284+
debugPrint('Caching return result: ${jsonEncode(returnResult)}');
285+
assert(_returnResult.isEmpty, 'The return result cache is not empty.');
286+
_returnResult.addAll(returnResult);
287+
return {'status': 'success'};
288+
}
289+
296290
String getSolverPrompt(Clue clue, int length, String pattern) =>
297291
buildSolverPrompt(clue, length, pattern);
298292

@@ -308,24 +302,29 @@ Your task is to solve the following crossword clue.
308302
309303
Return your answer and confidence score in the required JSON format.
310304
''';
305+
}
311306

312-
Future<Map<String, dynamic>> _generateJsonWithFunctionsAndSchema({
313-
required GenerativeModel modelWithFunctions,
314-
required GenerativeModel modelWithSchema,
307+
extension on GenerativeModel {
308+
Future<String> generateContentWithFunctions({
315309
required String prompt,
316310
required Future<Map<String, dynamic>> Function(FunctionCall) onFunctionCall,
317311
}) async {
318-
// 1. Let the model generate a text response with as many function calls as
319-
// it wants. Use a chat session to support multiple request/response
320-
// pairs, which is needed to support function calls. Also, we'll need the
321-
// history to generate the final JSON response with the schema.
322-
final chat = modelWithFunctions.startChat();
312+
// Use a chat session to support multiple request/response pairs, which is
313+
// needed to support function calls.
314+
final chat = startChat();
315+
final buffer = StringBuffer();
323316
var response = await chat.sendMessage(Content.text(prompt));
324317

325318
while (true) {
319+
// Append the response text to the buffer.
320+
buffer.write(response.text ?? '');
321+
326322
// If no function calls were collected, we're done
327323
if (response.functionCalls.isEmpty) break;
328324

325+
// Append a newline to separate responses.
326+
buffer.write('\n');
327+
329328
// Execute all function calls
330329
final functionResponses = <FunctionResponse>[];
331330
for (final functionCall in response.functionCalls) {
@@ -349,24 +348,6 @@ Return your answer and confidence score in the required JSON format.
349348
);
350349
}
351350

352-
// 2. Generate the final JSON response with the schema. We do that by
353-
// trimming the last two messages from the history (the last prompt/tool
354-
// response and the last LLM response) and sending it to the model
355-
// without the functions but with the schema. Essentially, we're asking
356-
// the model to generate the response to the last prompt we gave it,
357-
// including all of the function call results (if there are any), and
358-
// then generate the same response again, but this time with the JSON
359-
// schema.
360-
final history = chat.history.toList();
361-
final lastModelMessage = history.removeLast();
362-
final lastUserMessage = history.removeLast();
363-
assert(
364-
lastUserMessage.role == 'user' || lastUserMessage.role == 'function',
365-
);
366-
assert(lastModelMessage.role == 'model');
367-
final jsonResponse = await modelWithSchema
368-
.startChat(history: history)
369-
.sendMessage(lastUserMessage);
370-
return jsonDecode(jsonResponse.text!) as Map<String, dynamic>;
351+
return buffer.toString();
371352
}
372353
}

0 commit comments

Comments
 (0)