Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/clear-hoops-melt.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"kilo-code": minor
---

Improve the initial setup experience for the speech-to-text feature by adding an inline setup tooltip
42 changes: 42 additions & 0 deletions apps/storybook/stories/STTSetupPopover.stories.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import type { Meta, StoryObj } from "@storybook/react-vite"
import { STTSetupPopoverContent } from "@/components/chat/STTSetupPopover"

const meta = {
title: "Components/STTSetupPopover",
component: STTSetupPopoverContent,
parameters: {
layout: "centered",
},
tags: ["autodocs"],
render: (args) => (
<div className="w-[calc(100vw-32px)] max-w-[400px]">
<STTSetupPopoverContent {...args} />
</div>
),
args: {
onFfmpegHelpClick: () => {
console.log("FFmpeg help clicked")
},
},
} satisfies Meta<typeof STTSetupPopoverContent>

export default meta
type Story = StoryObj<typeof meta>

export const FFmpegNotInstalled: Story = {
name: "FFmpeg not installed",
args: {
reason: "ffmpegNotInstalled",
onFfmpegHelpClick: () => {
console.log("FFmpeg help clicked")
},
},
}

export const OpenAIKeyMissing: Story = {
args: {
reason: "openaiKeyMissing",
},
}

export const BothMissing: Story = {}
10 changes: 0 additions & 10 deletions src/core/webview/ClineProvider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ import { Task } from "../task/Task"
import { getSystemPromptFilePath } from "../prompts/sections/custom-system-prompt"

import { webviewMessageHandler } from "./webviewMessageHandler"
import { checkSpeechToTextAvailable } from "./speechToTextCheck" // kilocode_change
import type { ClineMessage, TodoItem } from "@roo-code/types"
import { readApiMessages, saveApiMessages, saveTaskMessages } from "../task-persistence"
import { readTaskMessages } from "../task-persistence/taskMessages"
Expand Down Expand Up @@ -2215,14 +2214,6 @@ ${prompt}
: undefined
// kilocode_change end

// kilocode_change start - checkSpeechToTextAvailable (only when experiment enabled)
let speechToTextStatus: { available: boolean; reason?: "openaiKeyMissing" | "ffmpegNotInstalled" } | undefined =
undefined
if (experiments?.speechToText) {
speechToTextStatus = await checkSpeechToTextAvailable(this.providerSettingsManager)
}
// kilocode_change end - checkSpeechToTextAvailable

let cloudOrganizations: CloudOrganizationMembership[] = []

try {
Expand Down Expand Up @@ -2447,7 +2438,6 @@ ${prompt}
featureRoomoteControlEnabled,
virtualQuotaActiveModel, // kilocode_change: Include virtual quota active model in state
debug: vscode.workspace.getConfiguration(Package.name).get<boolean>("debug", false),
speechToTextStatus, // kilocode_change: Speech-to-text availability status with failure reason
}
}

Expand Down
23 changes: 1 addition & 22 deletions src/core/webview/speechToTextCheck.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,6 @@ export type SpeechToTextAvailabilityResult = {
reason?: "openaiKeyMissing" | "ffmpegNotInstalled"
}

/**
* Cached availability result with timestamp
*/
let cachedResult: { available: boolean; reason?: "openaiKeyMissing" | "ffmpegNotInstalled"; timestamp: number } | null =
null
const CACHE_DURATION_MS = 30000 // 30 seconds

/**
* Check if speech-to-text prerequisites are available
*
Expand All @@ -26,43 +19,29 @@ const CACHE_DURATION_MS = 30000 // 30 seconds
* 2. FFmpeg is installed and available
*
* Note: The experiment flag is checked on the frontend, not here.
* Results are cached for 30 seconds to prevent redundant FFmpeg checks.
* This function always performs a fresh check without caching.
*
* @param providerSettingsManager - Provider settings manager for API configuration
* @param forceRecheck - Force a fresh check, ignoring cache (default: false)
* @returns Promise<SpeechToTextAvailabilityResult> - Result with availability status and failure reason if unavailable
*/
export async function checkSpeechToTextAvailable(
providerSettingsManager: ProviderSettingsManager,
forceRecheck = false,
): Promise<SpeechToTextAvailabilityResult> {
// Return cached result if valid and not forcing recheck
if (cachedResult !== null && !forceRecheck) {
const age = Date.now() - cachedResult.timestamp
if (age < CACHE_DURATION_MS) {
return { available: cachedResult.available, reason: cachedResult.reason }
}
}

try {
// Check 1: OpenAI API key
const apiKey = await getOpenAiApiKey(providerSettingsManager)
if (!apiKey) {
cachedResult = { available: false, reason: "openaiKeyMissing", timestamp: Date.now() }
return { available: false, reason: "openaiKeyMissing" }
}

// Check 2: FFmpeg installed
const ffmpegResult = FFmpegCaptureService.findFFmpeg()
if (!ffmpegResult.available) {
cachedResult = { available: false, reason: "ffmpegNotInstalled", timestamp: Date.now() }
return { available: false, reason: "ffmpegNotInstalled" }
}

cachedResult = { available: true, timestamp: Date.now() }
return { available: true }
} catch (error) {
cachedResult = { available: false, timestamp: Date.now() }
return { available: false }
}
}
13 changes: 13 additions & 0 deletions src/core/webview/sttHandlers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,19 @@ export async function handleSTTStart(clineProvider: ClineProvider, language?: st
await service.start({ apiKey }, language)
} catch (error) {
console.error("Failed to start STT service:", error)

// The service.start() catch block should have already called onStopped,
// but as a defensive measure, ensure frontend is notified if sessionId is still available
const sessionId = service.getSessionId()
if (sessionId) {
const errorMessage = error instanceof Error ? error.message : "Failed to start STT service"
clineProvider.postMessageToWebview({
type: "stt:stopped",
sessionId,
reason: "error",
error: errorMessage,
})
}
}
}

Expand Down
11 changes: 10 additions & 1 deletion src/core/webview/webviewMessageHandler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3657,6 +3657,7 @@ export const webviewMessageHandler = async (
}
break
}
// kilocode_change end: Type-safe global state handler
// kilocode_change start: STT (Speech-to-Text) handlers
case "stt:start":
case "stt:stop":
Expand All @@ -3665,7 +3666,15 @@ export const webviewMessageHandler = async (
await handleSTTCommand(provider, message as any)
break
}
// kilocode_change end: Type-safe global state handler
case "stt:checkAvailability": {
const { checkSpeechToTextAvailable } = await import("./speechToTextCheck")
provider.postMessageToWebview({
type: "stt:statusResponse",
speechToTextStatus: await checkSpeechToTextAvailable(provider.providerSettingsManager),
})
break
}
// kilocode_change end: STT (Speech-to-Text) handlers
case "insertTextToChatArea":
provider.postMessageToWebview({ type: "insertTextToChatArea", text: message.text })
break
Expand Down
28 changes: 17 additions & 11 deletions src/services/stt/OpenAIWhisperClient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -193,31 +193,39 @@ export class OpenAIWhisperClient extends EventEmitter {
await new Promise<void>((resolve, reject) => {
const timeout = setTimeout(() => {
reject(new Error("WebSocket connection timeout"))
}, 10000)
}, 5000)

const onOpen = () => {
clearTimeout(timeout)
this.ws!.off("open", onOpen)
this.ws!.off("error", onError)
this.ws?.off("open", onOpen)
this.ws?.off("error", onError)
resolve()
}

const onError = (error: Error) => {
clearTimeout(timeout)
this.ws!.off("open", onOpen)
this.ws!.off("error", onError)
this.ws?.off("open", onOpen)
this.ws?.off("error", onError)
reject(new Error(`WebSocket connection failed: ${error.message}`))
}

this.ws!.once("open", onOpen)
this.ws!.once("error", onError)
if (this.ws) {
this.ws.once("open", onOpen)
this.ws.once("error", onError)
} else {
reject(new Error("WebSocket not initialized"))
}
})

this.isConnecting = false
this.reconnectAttempts = 0
this.emit("connected")
} catch (error) {
this.isConnecting = false
try {
this.ws?.removeAllListeners()
this.ws?.close()
} catch (_cleanupError) {}
this.ws = null
throw error
}
Expand Down Expand Up @@ -494,10 +502,8 @@ export class OpenAIWhisperClient extends EventEmitter {
}

// Close WebSocket
if (this.ws) {
this.ws.close(1000, "Client disconnect")
this.ws = null
}
this.ws?.close(1000, "Client disconnect")
this.ws = null

this.sessionConfigured = false
this.pendingAudioChunks = []
Expand Down
36 changes: 15 additions & 21 deletions src/services/stt/STTService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ export class STTService {

this.emitter.onStarted(this.sessionId)
} catch (error) {
this.isActive = false
const errorMessage = error instanceof Error ? error.message : "Failed to start"
this.emitter.onStopped("error", undefined, errorMessage)
await this.cleanupOnError()
Expand Down Expand Up @@ -348,14 +349,17 @@ export class STTService {
* Handle recoverable errors by emitting to UI and cleaning up
*/
private async handleRecoverableError(error: Error): Promise<void> {
// Immediately stop processing to prevent any new audio/data from being processed
this.isActive = false

// Send error to frontend immediately
this.emitter.onStopped("error", undefined, error.message)

if (this.isActive) {
try {
await this.cleanupOnError()
} catch (cleanupError) {
console.error("Failed to cleanup after error:", cleanupError)
}
// Cleanup resources asynchronously
try {
await this.cleanupOnError()
} catch (cleanupError) {
console.error("🎙️ [STTService] Failed to cleanup after error:", cleanupError)
}
}

Expand Down Expand Up @@ -394,21 +398,11 @@ export class STTService {
private async cleanupOnError(): Promise<void> {
this.isActive = false

// Force kill FFmpeg and disconnect - use Promise.allSettled to ensure both run
const cleanupResults = await Promise.allSettled([
this.audioCapture.stop(),
this.transcriptionClient?.disconnect() ?? Promise.resolve(),
])

// Log cleanup results for debugging
cleanupResults.forEach((result, index) => {
const name = index === 0 ? "audioCapture" : "transcriptionClient"
if (result.status === "rejected") {
console.error(`🎙️ [STTService] Failed to cleanup ${name}:`, result.reason)
} else {
console.log(`🎙️ [STTService] ${name} cleaned up successfully`)
}
})
await Promise.allSettled(
[this.audioCapture?.stop().catch(() => {}), this.transcriptionClient?.disconnect().catch(() => {})].filter(
Boolean,
),
)

this.resetSession()
}
Expand Down
2 changes: 2 additions & 0 deletions src/shared/ExtensionMessage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ export interface ExtensionMessage {
| "stt:transcript" // kilocode_change: STT transcript update
| "stt:volume" // kilocode_change: STT volume level
| "stt:stopped" // kilocode_change: STT session stopped
| "stt:statusResponse" // kilocode_change: Response to stt:checkAvailability request
| "setHistoryPreviewCollapsed"
| "commandExecutionStatus"
| "mcpExecutionStatus"
Expand Down Expand Up @@ -275,6 +276,7 @@ export interface ExtensionMessage {
isFinal?: boolean // kilocode_change: STT transcript is final
level?: number // kilocode_change: STT volume level (0-1)
reason?: "completed" | "cancelled" | "error" // kilocode_change: STT stop reason
speechToTextStatus?: { available: boolean; reason?: "openaiKeyMissing" | "ffmpegNotInstalled" } // kilocode_change: Speech-to-text availability status response
requestId?: string
promptText?: string
results?: { path: string; type: "file" | "folder"; label?: string }[]
Expand Down
1 change: 1 addition & 0 deletions src/shared/WebviewMessage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ export interface WebviewMessage {
| "stt:start" // kilocode_change: Start STT recording
| "stt:stop" // kilocode_change: Stop STT recording
| "stt:cancel" // kilocode_change: Cancel STT recording
| "stt:checkAvailability" // kilocode_change: Check STT availability on demand
| "includeTaskHistoryInEnhance" // kilocode_change
| "snoozeAutocomplete" // kilocode_change
| "autoApprovalEnabled"
Expand Down
Loading