Skip to content

Commit 8134dad

Browse files
committed
feat: support AI SDK runner and Claude 4.1 Opus & Sonnet 4.5
The Genkit runner and community libraries are seemingly falling behind here and it's not possible to trivially enable Opus 4.1 or Sonnet 4.5. This commit introduces the AI SDK runner and adds support for the latest Claude models that way. Notably AI SDK has a pretty complete documentation and more discoverable types (by avoiding Zod complexity).
1 parent 5484f1a commit 8134dad

File tree

6 files changed

+288
-2
lines changed

6 files changed

+288
-2
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ You can customize the `web-codegen-scorer eval` script with the following flags:
8888
- Example: `web-codegen-scorer eval --model=gemini-2.5-flash --autorater-model=gemini-2.5-flash --env=<config path>`
8989

9090
- `--runner=<name>`: Specifies the runner to use to execute the eval. Supported runners are
91-
`genkit` (default), `gemini-cli`, `claude-code` or `codex`.
91+
`genkit` (default), `ai-sdk`, `gemini-cli`, `claude-code` or `codex`.
9292

9393
- `--local`: Runs the script in local mode for the initial code generation request. Instead of
9494
calling the LLM, it will attempt to read the initial code from a corresponding file in the

package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
"wcs": "./runner/bin/cli.js"
5252
},
5353
"dependencies": {
54+
"@ai-sdk/anthropic": "^2.0.45",
5455
"@anthropic-ai/sdk": "^0.68.0",
5556
"@axe-core/puppeteer": "^4.10.2",
5657
"@genkit-ai/compat-oai": "1.23.0",
@@ -63,6 +64,7 @@
6364
"@types/cli-progress": "^3.11.6",
6465
"@types/node": "^24.2.0",
6566
"@types/yargs": "^17.0.33",
67+
"ai": "^5.0.95",
6668
"axe-core": "^4.10.3",
6769
"boxen": "^8.0.1",
6870
"chalk": "^5.4.1",

pnpm-lock.yaml

Lines changed: 72 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

runner/codegen/ai-sdk-runner.ts

Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
import {
2+
LlmRunner,
3+
LocalLlmConstrainedOutputGenerateRequestOptions,
4+
LocalLlmConstrainedOutputGenerateResponse,
5+
LocalLlmGenerateFilesRequestOptions,
6+
LocalLlmGenerateFilesResponse,
7+
LocalLlmGenerateTextRequestOptions,
8+
LocalLlmGenerateTextResponse,
9+
PromptDataMessage,
10+
} from './llm-runner.js';
11+
import {
12+
FilePart,
13+
generateObject,
14+
generateText,
15+
LanguageModel,
16+
ModelMessage,
17+
SystemModelMessage,
18+
TextPart,
19+
} from 'ai';
20+
import {anthropic, AnthropicProviderOptions} from '@ai-sdk/anthropic';
21+
import z from 'zod';
22+
import {callWithTimeout} from '../utils/timeout.js';
23+
import {combineAbortSignals} from '../utils/abort-signal.js';
24+
25+
const SUPPORTED_MODELS = [
26+
'claude-opus-4.1-no-thinking',
27+
'claude-opus-4.1-with-thinking',
28+
'claude-sonnet-4.5-no-thinking',
29+
'claude-sonnet-4.5-with-thinking',
30+
] as const;
31+
32+
// Increased to a very high value as we rely on an actual timeout
33+
// that aborts stuck LLM requests. WCS is targeting stability here;
34+
// even if it involves many exponential backoff-waiting.
35+
const DEFAULT_MAX_RETRIES = 100000;
36+
37+
export class AiSDKRunner implements LlmRunner {
38+
displayName = 'AI SDK';
39+
id = 'ai-sdk';
40+
hasBuiltInRepairLoop = true;
41+
42+
async generateText(
43+
options: LocalLlmGenerateTextRequestOptions,
44+
): Promise<LocalLlmGenerateTextResponse> {
45+
const response = await this._wrapRequestWithTimeoutAndRateLimiting(options, async abortSignal =>
46+
generateText({
47+
...(await this._getAiSdkModelOptions(options)),
48+
abortSignal: abortSignal,
49+
messages: this._convertRequestToMessagesList(options),
50+
maxRetries: DEFAULT_MAX_RETRIES,
51+
}),
52+
);
53+
54+
return {
55+
reasoning: response.reasoningText ?? '',
56+
text: response.text,
57+
usage: response.usage,
58+
// TODO: Consider supporting `toolLogs` and MCP here.
59+
};
60+
}
61+
62+
async generateConstrained<T extends z.ZodTypeAny = z.ZodTypeAny>(
63+
options: LocalLlmConstrainedOutputGenerateRequestOptions<T>,
64+
): Promise<LocalLlmConstrainedOutputGenerateResponse<T>> {
65+
const response = await this._wrapRequestWithTimeoutAndRateLimiting(options, async abortSignal =>
66+
generateObject({
67+
...(await this._getAiSdkModelOptions(options)),
68+
messages: this._convertRequestToMessagesList(options),
69+
schema: options.schema,
70+
abortSignal: abortSignal,
71+
maxRetries: DEFAULT_MAX_RETRIES,
72+
}),
73+
);
74+
75+
return {
76+
reasoning: response.reasoning ?? '',
77+
output: response.object,
78+
usage: response.usage,
79+
// TODO: Consider supporting `toolLogs` and MCP here.
80+
};
81+
}
82+
83+
async generateFiles(
84+
options: LocalLlmGenerateFilesRequestOptions,
85+
): Promise<LocalLlmGenerateFilesResponse> {
86+
const response = await this.generateConstrained({
87+
...options,
88+
prompt: options.context.executablePrompt,
89+
systemPrompt: options.context.systemInstructions,
90+
schema: z.object({
91+
outputFiles: z.array(
92+
z.object({
93+
filePath: z.string().describe('Name of the file that is being changed'),
94+
code: z.string().describe('New code of the file'),
95+
}),
96+
),
97+
}),
98+
});
99+
100+
return {
101+
files: response.output?.outputFiles ?? [],
102+
reasoning: response.reasoning,
103+
usage: response.usage,
104+
// TODO: Consider supporting `toolLogs` and MCP here.
105+
};
106+
}
107+
108+
getSupportedModels(): string[] {
109+
return [...SUPPORTED_MODELS];
110+
}
111+
112+
async dispose(): Promise<void> {}
113+
114+
private async _wrapRequestWithTimeoutAndRateLimiting<T>(
115+
request: LocalLlmGenerateTextRequestOptions | LocalLlmConstrainedOutputGenerateRequestOptions,
116+
fn: (abortSignal: AbortSignal) => Promise<T>,
117+
): Promise<T> {
118+
// TODO: Check if rate-limiting is actually necessary here. AI SDK
119+
// seems to do retrying on its own.
120+
121+
if (request.timeout === undefined) {
122+
return await fn(request.abortSignal);
123+
}
124+
return callWithTimeout(
125+
request.timeout.description,
126+
abortSignal => fn(combineAbortSignals(abortSignal, request.abortSignal)),
127+
request.timeout.durationInMins,
128+
);
129+
}
130+
131+
private async _getAiSdkModelOptions(
132+
request: LocalLlmGenerateTextRequestOptions,
133+
): Promise<{model: LanguageModel; providerOptions: {}}> {
134+
switch (request.model) {
135+
case 'claude-opus-4.1-no-thinking':
136+
case 'claude-opus-4.1-with-thinking': {
137+
const thinkingEnabled = request.model.endsWith('with-thinking');
138+
return {
139+
model: anthropic('claude-opus-4-1'),
140+
providerOptions: {
141+
sendReasoning: thinkingEnabled,
142+
thinking: {type: thinkingEnabled ? 'enabled' : 'disabled'},
143+
} satisfies AnthropicProviderOptions,
144+
};
145+
}
146+
case 'claude-sonnet-4.5-no-thinking':
147+
case 'claude-sonnet-4.5-with-thinking': {
148+
const thinkingEnabled = request.model.endsWith('with-thinking');
149+
return {
150+
model: anthropic('claude-sonnet-4-5'),
151+
providerOptions: {
152+
sendReasoning: true,
153+
thinking: {type: 'enabled'},
154+
} satisfies AnthropicProviderOptions,
155+
};
156+
}
157+
default:
158+
throw new Error(`Unexpected model in AI SDK runner: ${request.model}.`);
159+
}
160+
}
161+
162+
private _convertRequestToMessagesList(
163+
request: LocalLlmConstrainedOutputGenerateRequestOptions | LocalLlmGenerateTextRequestOptions,
164+
): ModelMessage[] {
165+
return [
166+
// System prompt message.
167+
...(request.systemPrompt !== undefined
168+
? [
169+
{
170+
role: 'system',
171+
content: request.systemPrompt,
172+
} satisfies SystemModelMessage,
173+
]
174+
: []),
175+
// Optional additional messages
176+
...this._toAiSDKMessage(request.messages ?? []),
177+
// The main message.
178+
{role: 'user', content: [{type: 'text', text: request.prompt}]},
179+
];
180+
}
181+
182+
private _toAiSDKMessage(messages: PromptDataMessage[]): ModelMessage[] {
183+
const result: ModelMessage[] = [];
184+
185+
for (const message of messages) {
186+
if (message.role === 'model') {
187+
result.push({
188+
role: 'assistant',
189+
content: message.content.map(c =>
190+
'media' in c
191+
? ({type: 'file', data: c.media.url, mediaType: 'image/png'} satisfies FilePart)
192+
: ({type: 'text', text: c.text} satisfies TextPart),
193+
),
194+
});
195+
} else if (message.role === 'user') {
196+
result.push({
197+
role: 'user',
198+
content: message.content.map(c =>
199+
'media' in c
200+
? ({type: 'file', data: c.media.url, mediaType: 'image/png'} satisfies FilePart)
201+
: ({type: 'text', text: c.text} satisfies TextPart),
202+
),
203+
});
204+
}
205+
}
206+
return result;
207+
}
208+
}

runner/codegen/runner-creation.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@ import type {ClaudeCodeRunner} from './claude-code-runner.js';
44
import type {GenkitRunner} from './genkit/genkit-runner.js';
55
import type {CodexRunner} from './codex-runner.js';
66
import type {NoopUnimplementedRunner} from './noop-unimplemented-runner.js';
7+
import {AiSDKRunner} from './ai-sdk-runner.js';
78

89
interface AvailableRunners {
910
genkit: GenkitRunner;
11+
'ai-sdk': AiSDKRunner;
1012
'gemini-cli': GeminiCliRunner;
1113
'claude-code': ClaudeCodeRunner;
1214
'codex': CodexRunner;
@@ -27,6 +29,8 @@ export async function getRunnerByName<T extends RunnerName>(name: T): Promise<Av
2729
return import('./genkit/genkit-runner.js').then(
2830
m => new m.GenkitRunner() as AvailableRunners[T],
2931
);
32+
case 'ai-sdk':
33+
return import('./ai-sdk-runner.js').then(m => new m.AiSDKRunner() as AvailableRunners[T]);
3034
case 'gemini-cli':
3135
return import('./gemini-cli-runner.js').then(
3236
m => new m.GeminiCliRunner() as AvailableRunners[T],

runner/eval-cli.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ function builder(argv: Argv): Argv<Options> {
6161
.option('runner', {
6262
type: 'string',
6363
default: 'genkit' as const,
64-
choices: ['genkit', 'gemini-cli', 'claude-code', 'codex'] as RunnerName[],
64+
choices: ['genkit', 'ai-sdk', 'gemini-cli', 'claude-code', 'codex'] as RunnerName[],
6565
description: 'Runner to use to execute the eval',
6666
})
6767
.option('local', {

0 commit comments

Comments
 (0)