diff --git a/.changeset/fix-duplicate-tool-processing-all-providers.md b/.changeset/fix-duplicate-tool-processing-all-providers.md new file mode 100644 index 0000000000..a9c1deb368 --- /dev/null +++ b/.changeset/fix-duplicate-tool-processing-all-providers.md @@ -0,0 +1,16 @@ +--- +"kilo-code": patch +--- + +Fix duplicate tool processing in 7 additional providers (inception, lm-studio, deepinfra, xai, lite-llm, qwen-code, chutes) + +Following PR #4531 which fixed duplicate tool processing in OpenAI-compatible providers, this change applies the same fix to 7 additional providers that had the same issue. The `ToolCallAccumulator` was processing tool calls in the streaming loop, but the code was also manually emitting `tool_call_partial` chunks, causing duplicates. This fix removes the `ToolCallAccumulator` usage and relies solely on emitting raw `tool_call_partial` chunks, which are then properly handled by `NativeToolCallParser`. + +Affected providers: +- inception +- lm-studio +- deepinfra +- xai +- lite-llm +- qwen-code +- chutes diff --git a/src/api/providers/base-openai-compatible-provider.ts b/src/api/providers/base-openai-compatible-provider.ts index 1efc03ec5f..d31843f37b 100644 --- a/src/api/providers/base-openai-compatible-provider.ts +++ b/src/api/providers/base-openai-compatible-provider.ts @@ -14,7 +14,6 @@ import { BaseProvider } from "./base-provider" import { verifyFinishReason } from "./kilocode/verifyFinishReason" import { handleOpenAIError } from "./utils/openai-error-handler" import { fetchWithTimeout } from "./kilocode/fetchWithTimeout" // kilocode_change -import { ToolCallAccumulator } from "./kilocode/nativeToolCallHelpers" // kilocode_change import { calculateApiCostOpenAI } from "../../shared/cost" import { getApiRequestTimeout } from "./utils/timeout-config" diff --git a/src/api/providers/chutes.ts b/src/api/providers/chutes.ts index e024efb9ad..4d997a9113 100644 --- a/src/api/providers/chutes.ts +++ b/src/api/providers/chutes.ts @@ -11,7 +11,7 @@ import { ApiStream } from "../transform/stream" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" import { RouterProvider } from "./router-provider" -import { addNativeToolCallsToParams, ToolCallAccumulator } from "./kilocode/nativeToolCallHelpers" +import { addNativeToolCallsToParams } from "./kilocode/nativeToolCallHelpers" export class ChutesHandler extends RouterProvider implements SingleCompletionHandler { constructor(options: ApiHandlerOptions) { @@ -69,8 +69,6 @@ export class ChutesHandler extends RouterProvider implements SingleCompletionHan ): ApiStream { const model = await this.fetchModel() - const toolCallAccumulator = new ToolCallAccumulator() // kilocode_change - if (model.id.includes("DeepSeek-R1")) { const stream = await this.client.chat.completions.create({ ...this.getCompletionParams(systemPrompt, messages, metadata), @@ -89,8 +87,6 @@ export class ChutesHandler extends RouterProvider implements SingleCompletionHan for await (const chunk of stream) { const delta = chunk.choices[0]?.delta - yield* toolCallAccumulator.processChunk(chunk) // kilocode_change - if (delta?.content) { for (const processedChunk of matcher.update(delta.content)) { yield processedChunk @@ -132,8 +128,6 @@ export class ChutesHandler extends RouterProvider implements SingleCompletionHan for await (const chunk of stream) { const delta = chunk.choices[0]?.delta - yield* toolCallAccumulator.processChunk(chunk) // kilocode_change - if (delta?.content) { yield { type: "text", text: delta.content } } diff --git a/src/api/providers/deepinfra.ts b/src/api/providers/deepinfra.ts index 9a7f9c2bec..a46780e921 100644 --- a/src/api/providers/deepinfra.ts +++ b/src/api/providers/deepinfra.ts @@ -13,7 +13,7 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ". import { RouterProvider } from "./router-provider" import { getModelParams } from "../transform/model-params" import { getModels } from "./fetchers/modelCache" -import { addNativeToolCallsToParams, ToolCallAccumulator } from "./kilocode/nativeToolCallHelpers" +import { addNativeToolCallsToParams } from "./kilocode/nativeToolCallHelpers" export class DeepInfraHandler extends RouterProvider implements SingleCompletionHandler { constructor(options: ApiHandlerOptions) { @@ -94,12 +94,9 @@ export class DeepInfraHandler extends RouterProvider implements SingleCompletion const { data: stream } = await this.client.chat.completions.create(requestOptions).withResponse() let lastUsage: OpenAI.CompletionUsage | undefined - const toolCallAccumulator = new ToolCallAccumulator() // kilocode_change for await (const chunk of stream) { const delta = chunk.choices[0]?.delta - yield* toolCallAccumulator.processChunk(chunk) // kilocode_change - if (delta?.content) { yield { type: "text", text: delta.content } } @@ -108,7 +105,7 @@ export class DeepInfraHandler extends RouterProvider implements SingleCompletion yield { type: "reasoning", text: (delta.reasoning_content as string | undefined) || "" } } - // Handle tool calls in stream - emit partial chunks for NativeToolCallParser + // Emit raw tool call chunks - NativeToolCallParser handles state management if (delta?.tool_calls) { for (const toolCall of delta.tool_calls) { yield { diff --git a/src/api/providers/inception.ts b/src/api/providers/inception.ts index 77be3fcfda..e8f0056de8 100644 --- a/src/api/providers/inception.ts +++ b/src/api/providers/inception.ts @@ -13,7 +13,7 @@ import Anthropic from "@anthropic-ai/sdk" import { ApiStream, ApiStreamUsageChunk } from "../transform/stream" import OpenAI from "openai" import { convertToOpenAiMessages } from "../transform/openai-format" -import { addNativeToolCallsToParams, ToolCallAccumulator } from "./kilocode/nativeToolCallHelpers" +import { addNativeToolCallsToParams } from "./kilocode/nativeToolCallHelpers" export class InceptionLabsHandler extends RouterProvider implements SingleCompletionHandler { constructor(options: ApiHandlerOptions) { @@ -83,10 +83,8 @@ export class InceptionLabsHandler extends RouterProvider implements SingleComple .withResponse() let lastUsage: OpenAI.CompletionUsage | undefined - const toolCallAccumulator = new ToolCallAccumulator() for await (const chunk of stream) { const delta = chunk.choices[0]?.delta - yield* toolCallAccumulator.processChunk(chunk) if (delta?.content) { yield { type: "text", text: delta.content } @@ -95,6 +93,20 @@ export class InceptionLabsHandler extends RouterProvider implements SingleComple if (delta && "reasoning_content" in delta && delta.reasoning_content) { yield { type: "reasoning", text: (delta.reasoning_content as string | undefined) || "" } } + + // Emit raw tool call chunks - NativeToolCallParser handles state management + if (delta?.tool_calls) { + for (const toolCall of delta.tool_calls) { + yield { + type: "tool_call_partial", + index: toolCall.index, + id: toolCall.id, + name: toolCall.function?.name, + arguments: toolCall.function?.arguments, + } + } + } + if (chunk.usage) { lastUsage = chunk.usage } diff --git a/src/api/providers/lite-llm.ts b/src/api/providers/lite-llm.ts index f1ba43340d..c7120f253d 100644 --- a/src/api/providers/lite-llm.ts +++ b/src/api/providers/lite-llm.ts @@ -12,7 +12,7 @@ import { convertToOpenAiMessages } from "../transform/openai-format" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" import { RouterProvider } from "./router-provider" -import { addNativeToolCallsToParams, ToolCallAccumulator } from "./kilocode/nativeToolCallHelpers" +import { addNativeToolCallsToParams } from "./kilocode/nativeToolCallHelpers" /** * LiteLLM provider handler @@ -153,18 +153,15 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa let lastUsage - const toolCallAccumulator = new ToolCallAccumulator() // kilocode_change for await (const chunk of completion) { const delta = chunk.choices[0]?.delta const usage = chunk.usage as LiteLLMUsage - yield* toolCallAccumulator.processChunk(chunk) // kilocode_change - if (delta?.content) { yield { type: "text", text: delta.content } } - // Handle tool calls in stream - emit partial chunks for NativeToolCallParser + // Emit raw tool call chunks - NativeToolCallParser handles state management if (delta?.tool_calls) { for (const toolCall of delta.tool_calls) { yield { diff --git a/src/api/providers/lm-studio.ts b/src/api/providers/lm-studio.ts index bbf545ac16..09ff2c55ff 100644 --- a/src/api/providers/lm-studio.ts +++ b/src/api/providers/lm-studio.ts @@ -14,7 +14,7 @@ import { ApiStream } from "../transform/stream" import { BaseProvider } from "./base-provider" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" import { fetchWithTimeout, HeadersTimeoutError } from "./kilocode/fetchWithTimeout" -import { addNativeToolCallsToParams, ToolCallAccumulator } from "./kilocode/nativeToolCallHelpers" +import { addNativeToolCallsToParams } from "./kilocode/nativeToolCallHelpers" import { getModels, getModelsFromCache } from "./fetchers/modelCache" import { handleOpenAIError } from "./utils/openai-error-handler" import { getApiRequestTimeout } from "./utils/timeout-config" // kilocode_change @@ -114,18 +114,28 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan }) as const, ) - const toolCallAccumulator = new ToolCallAccumulator() // kilocode_change for await (const chunk of results) { const delta = chunk.choices[0]?.delta - yield* toolCallAccumulator.processChunk(chunk) // kilocode_change - if (delta?.content) { assistantText += delta.content for (const processedChunk of matcher.update(delta.content)) { yield processedChunk } } + + // Emit raw tool call chunks - NativeToolCallParser handles state management + if (delta?.tool_calls) { + for (const toolCall of delta.tool_calls) { + yield { + type: "tool_call_partial", + index: toolCall.index, + id: toolCall.id, + name: toolCall.function?.name, + arguments: toolCall.function?.arguments, + } + } + } } for (const processedChunk of matcher.final()) { diff --git a/src/api/providers/qwen-code.ts b/src/api/providers/qwen-code.ts index c44e49bba7..9d4aea8e3a 100644 --- a/src/api/providers/qwen-code.ts +++ b/src/api/providers/qwen-code.ts @@ -16,7 +16,7 @@ import type { ApiHandlerCreateMessageMetadata, // kilocode_change SingleCompletionHandler, } from "../index" -import { addNativeToolCallsToParams, ToolCallAccumulator } from "./kilocode/nativeToolCallHelpers" +import { addNativeToolCallsToParams } from "./kilocode/nativeToolCallHelpers" const QWEN_OAUTH_BASE_URL = "https://chat.qwen.ai" const QWEN_OAUTH_TOKEN_ENDPOINT = `${QWEN_OAUTH_BASE_URL}/api/v1/oauth2/token` @@ -235,7 +235,6 @@ export class QwenCodeHandler extends BaseProvider implements SingleCompletionHan const stream = await this.callApiWithRetry(() => client.chat.completions.create(requestOptions)) let fullContent = "" - const toolCallAccumulator = new ToolCallAccumulator() // kilocode_change for await (const apiChunk of stream) { const delta = apiChunk.choices[0]?.delta ?? {} @@ -285,7 +284,18 @@ export class QwenCodeHandler extends BaseProvider implements SingleCompletionHan } } - yield* toolCallAccumulator.processChunk(apiChunk) // kilocode_change + // Emit raw tool call chunks - NativeToolCallParser handles state management + if (delta.tool_calls) { + for (const toolCall of delta.tool_calls) { + yield { + type: "tool_call_partial", + index: toolCall.index, + id: toolCall.id, + name: toolCall.function?.name, + arguments: toolCall.function?.arguments, + } + } + } if (apiChunk.usage) { yield { diff --git a/src/api/providers/xai.ts b/src/api/providers/xai.ts index 7177b53c17..4a35539fb9 100644 --- a/src/api/providers/xai.ts +++ b/src/api/providers/xai.ts @@ -15,7 +15,7 @@ import { BaseProvider } from "./base-provider" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" import { verifyFinishReason } from "./kilocode/verifyFinishReason" // kilocode_change import { handleOpenAIError } from "./utils/openai-error-handler" -import { addNativeToolCallsToParams, ToolCallAccumulator } from "./kilocode/nativeToolCallHelpers" +import { addNativeToolCallsToParams } from "./kilocode/nativeToolCallHelpers" const XAI_DEFAULT_TEMPERATURE = 0 @@ -84,14 +84,11 @@ export class XAIHandler extends BaseProvider implements SingleCompletionHandler throw handleOpenAIError(error, this.providerName) } - const toolCallAccumulator = new ToolCallAccumulator() // kilocode_change for await (const chunk of stream) { verifyFinishReason(chunk.choices[0]) // kilocode_change const delta = chunk.choices[0]?.delta const finishReason = chunk.choices[0]?.finish_reason - yield* toolCallAccumulator.processChunk(chunk) // kilocode_change - if (delta?.content) { yield { type: "text", @@ -106,7 +103,7 @@ export class XAIHandler extends BaseProvider implements SingleCompletionHandler } } - // Handle tool calls in stream - emit partial chunks for NativeToolCallParser + // Emit raw tool call chunks - NativeToolCallParser handles state management if (delta?.tool_calls) { for (const toolCall of delta.tool_calls) { yield {