diff --git a/.cursor/rules/docs.mdc b/.cursor/rules/docs.mdc index 7418568b4..c85311e79 100644 --- a/.cursor/rules/docs.mdc +++ b/.cursor/rules/docs.mdc @@ -180,6 +180,22 @@ export const products = [ - [ ] Includes clear next steps for users - [ ] Headers use sentence case formatting +## API specifications + +### TypeSpec definitions +API specs are defined in TypeSpec and output to OpenAPI format. All specs are located in the `specs/` directory: + +- **specs/signalwire-rest** - SignalWire REST API specifications +- **specs/compatibility-api** - Compatibility API specifications +- **specs/swml** - SWML schema definitions +- **specs/_shared** - Shared TypeSpec definitions + +### SWML JSON schema +The authoritative SWML schema is generated from TypeSpec and located at: +- **specs/swml/tsp-output/@typespec/json-schema/SWMLObject.json** + +This JSON Schema defines the complete structure of valid SWML documents. + ## SWML and SWML AI source code references ### Source code repositories diff --git a/specs/swml/Methods/ai/ai_languages.tsp b/specs/swml/Methods/ai/ai_languages.tsp index e5eae5e4e..80a36c0b3 100644 --- a/specs/swml/Methods/ai/ai_languages.tsp +++ b/specs/swml/Methods/ai/ai_languages.tsp @@ -1,4 +1,5 @@ import "@typespec/json-schema"; +import "../../Shared/Types/main.tsp"; using TypeSpec.JsonSchema; @@ -48,6 +49,22 @@ model LanguagesBase { @doc("The engine to use for the language. For example, 'elevenlabs'.") @example("elevenlabs") engine?: string; + + @doc("TTS engine-specific parameters for this language.") + params?: LanguageParams; +} + +@summary("LanguageParams") +model LanguageParams { + @doc("The stability slider determines how stable the voice is and the randomness between each generation. Lowering this slider introduces a broader emotional range for the voice. IMPORTANT: Only works with ElevenLabs TTS engine.") + @minValue(0.0) + @maxValue(1.0) + stability?: float | SWMLVar = 0.50; + + @doc("The similarity slider dictates how closely the AI should adhere to the original voice when attempting to replicate it. The higher the similarity, the closer the AI will sound to the original voice. IMPORTANT: Only works with ElevenLabs TTS engine.") + @minValue(0.0) + @maxValue(1.0) + similarity?: float | SWMLVar = 0.75; } @summary("LanguagesWithSoloFillers") diff --git a/specs/swml/Methods/ai/ai_params.tsp b/specs/swml/Methods/ai/ai_params.tsp index 677c46608..577d9b760 100644 --- a/specs/swml/Methods/ai/ai_params.tsp +++ b/specs/swml/Methods/ai/ai_params.tsp @@ -224,18 +224,6 @@ model AIParams { @example(700) end_of_speech_timeout?: integer | SWMLVar = 700; - @doc("The stability slider determines how stable the voice is and the randomness between each generation. Lowering this slider introduces a broader emotional range for the voice.") - @minValue(0.01) - @maxValue(1.0) - @example(0.5) - eleven_labs_stability?: float | SWMLVar; - - @doc("The similarity slider dictates how closely the AI should adhere to the original voice when attempting to replicate it. The higher the similarity, the closer the AI will sound to the original voice.") - @minValue(0.01) - @maxValue(1.0) - @example(0.75) - eleven_labs_similarity?: float | SWMLVar; - @doc("If `true`, enables usage accounting. The default is `false`.") @example(true) enable_accounting?: boolean | SWMLVar; @@ -388,8 +376,8 @@ model AIParams { @example(1024) max_response_tokens?: integer | SWMLVar; - @doc("The ASR (Automatic Speech Recognition) engine to use. Common values include `deepgram:nova-2`, `deepgram:nova-3`, and other supported ASR engines.") - @example("deepgram:nova-3") + @doc("The ASR (Automatic Speech Recognition) engine to use. Common values include `nova-2` and `nova-3`.") + @example("nova-3") openai_asr_engine?: string = "gcloud_speech_v2_async"; @doc("Sets a time duration for the outbound call recipient to respond to the AI agent before timeout, in a range from `10000` to `600000`. **Default:** `120000` ms (2 minutes).") @@ -574,5 +562,19 @@ model AIParams { @example("hey") wake_prefix?: string; + #deprecated "The `eleven_labs_stability` property is deprecated. Please use `languages[].params.stability` instead." + @doc("The stability slider determines how stable the voice is and the randomness between each generation. Lowering this slider introduces a broader emotional range for the voice.") + @minValue(0.0) + @maxValue(1.0) + @example(0.5) + eleven_labs_stability?: float | SWMLVar = 0.50; + + #deprecated "The `eleven_labs_similarity` property is deprecated. Please use `languages[].params.similarity` instead." + @doc("The similarity slider dictates how closely the AI should adhere to the original voice when attempting to replicate it. The higher the similarity, the closer the AI will sound to the original voice.") + @minValue(0.0) + @maxValue(1.0) + @example(0.75) + eleven_labs_similarity?: float | SWMLVar = 0.75; + ...TypeSpec.Record; } diff --git a/specs/swml/tsp-output/@typespec/json-schema/SWMLObject.json b/specs/swml/tsp-output/@typespec/json-schema/SWMLObject.json index 85add9fa0..7609760fb 100644 --- a/specs/swml/tsp-output/@typespec/json-schema/SWMLObject.json +++ b/specs/swml/tsp-output/@typespec/json-schema/SWMLObject.json @@ -4800,38 +4800,6 @@ "maximum": 10000, "description": "Amount of silence, in ms, at the end of an utterance to detect end of speech. Allowed values from `250` - `10,000`. **Default:** `700` ms (Note: Documentation incorrectly lists 2000ms)." }, - "eleven_labs_stability": { - "anyOf": [ - { - "type": "number" - }, - { - "$ref": "#/$defs/SWMLVar" - } - ], - "examples": [ - 0.5 - ], - "minimum": 0.01, - "maximum": 1, - "description": "The stability slider determines how stable the voice is and the randomness between each generation. Lowering this slider introduces a broader emotional range for the voice." - }, - "eleven_labs_similarity": { - "anyOf": [ - { - "type": "number" - }, - { - "$ref": "#/$defs/SWMLVar" - } - ], - "examples": [ - 0.75 - ], - "minimum": 0.01, - "maximum": 1, - "description": "The similarity slider dictates how closely the AI should adhere to the original voice when attempting to replicate it. The higher the similarity, the closer the AI will sound to the original voice." - }, "enable_accounting": { "anyOf": [ { @@ -5165,7 +5133,7 @@ "examples": [ "deepgram:nova-3" ], - "description": "The ASR (Automatic Speech Recognition) engine to use. Common values include `deepgram:nova-2`, `deepgram:nova-3`, and other supported ASR engines." + "description": "The ASR (Automatic Speech Recognition) engine to use. Common values include `nova-2` and `nova-3`." }, "outbound_attention_timeout": { "anyOf": [ @@ -5611,6 +5579,42 @@ "hey" ], "description": "Specifies an additional prefix that must be spoken along with the agent's name (`ai_name`)\nto wake the agent from a paused state. For example, if `ai_name` is \"computer\" and\n`wake_prefix` is \"hey\", the user would need to say \"hey computer\" to activate the agent." + }, + "eleven_labs_stability": { + "anyOf": [ + { + "type": "number" + }, + { + "$ref": "#/$defs/SWMLVar" + } + ], + "default": 0.5, + "examples": [ + 0.5 + ], + "minimum": 0, + "maximum": 1, + "description": "The stability slider determines how stable the voice is and the randomness between each generation. Lowering this slider introduces a broader emotional range for the voice.", + "deprecated": true + }, + "eleven_labs_similarity": { + "anyOf": [ + { + "type": "number" + }, + { + "$ref": "#/$defs/SWMLVar" + } + ], + "default": 0.75, + "examples": [ + 0.75 + ], + "minimum": 0, + "maximum": 1, + "description": "The similarity slider dictates how closely the AI should adhere to the original voice when attempting to replicate it. The higher the similarity, the closer the AI will sound to the original voice.", + "deprecated": true } }, "unevaluatedProperties": {}, @@ -6463,6 +6467,10 @@ "description": "The engine to use for the language. For example, 'elevenlabs'.", "deprecated": true }, + "params": { + "$ref": "#/$defs/LanguageParams", + "description": "TTS engine-specific parameters for this language." + }, "fillers": { "type": "array", "items": { @@ -6543,6 +6551,10 @@ "description": "The engine to use for the language. For example, 'elevenlabs'.", "deprecated": true }, + "params": { + "$ref": "#/$defs/LanguageParams", + "description": "TTS engine-specific parameters for this language." + }, "function_fillers": { "type": "array", "items": { @@ -7814,6 +7826,43 @@ "not": {} } }, + "LanguageParams": { + "type": "object", + "properties": { + "stability": { + "anyOf": [ + { + "type": "number" + }, + { + "$ref": "#/$defs/SWMLVar" + } + ], + "default": 0.5, + "minimum": 0, + "maximum": 1, + "description": "The stability slider determines how stable the voice is and the randomness between each generation. Lowering this slider introduces a broader emotional range for the voice. IMPORTANT: Only works with ElevenLabs TTS engine." + }, + "similarity": { + "anyOf": [ + { + "type": "number" + }, + { + "$ref": "#/$defs/SWMLVar" + } + ], + "default": 0.75, + "minimum": 0, + "maximum": 1, + "description": "The similarity slider dictates how closely the AI should adhere to the original voice when attempting to replicate it. The higher the similarity, the closer the AI will sound to the original voice. IMPORTANT: Only works with ElevenLabs TTS engine." + } + }, + "unevaluatedProperties": { + "not": {} + }, + "title": "LanguageParams" + }, "ConversationRole": { "type": "string", "enum": [ diff --git a/website/docs/main/swml/guides/AI/holiday-special-santa-ai/index.mdx b/website/docs/main/swml/guides/AI/holiday-special-santa-ai/index.mdx index d7d72b96d..1deea29e1 100644 --- a/website/docs/main/swml/guides/AI/holiday-special-santa-ai/index.mdx +++ b/website/docs/main/swml/guides/AI/holiday-special-santa-ai/index.mdx @@ -38,8 +38,8 @@ idea of what their child wants for Christmas, and then purchase the gift directl To get our AI Santa set up, we will take the following steps: 1. Sign into your SignalWire Space and navigate to your Dashboard. -2. Create a new RELAY (SWML) Script using the sample script and provided instructions. -3. Assign a phone number to the RELAY (SWML) Script. +2. Create a new Relay (SWML) Script using the sample script and provided instructions. +3. Assign a phone number to the Relay (SWML) Script. We'll explain each of these steps in detail throughout the article. Follow along, and don't hesitate to reach out if you have questions or run into issues! @@ -143,20 +143,23 @@ languages: --- -### Params +### ElevenLabs Voice Parameters -The `params` parameter is used to define the AI's `eleven_labs_stability` and `eleven_labs_similarity` parameters. +We use ElevenLabs TTS engine-specific parameters to fine-tune Santa's voice. These parameters are configured per-language using `languages[].params`. -The `eleven_labs_stability` parameter is used to define the stability of the AI's voice, while the `eleven_labs_similarity` -parameter is used to define the similarity of the AI's voice to the voice that is defined in the `voice` parameter. +The `stability` parameter controls the stability of the AI's voice, while the `similarity` parameter defines how closely the voice adheres to the original voice characteristics. This allows us to control the AI's voice and make it more realistic and as close to Santa's voice as possible. You can learn more about these settings here: [Eleven Labs Documentation](https://elevenlabs.io/docs/speech-synthesis/voice-settings#stability). ```yaml andJson -params: - eleven_labs_stability: 0.1 - eleven_labs_similarity: 0.25 +languages: + - name: English + code: en-US + voice: elevenlabs.rachel + params: + stability: 0.1 + similarity: 0.25 ``` --- @@ -508,13 +511,13 @@ sections: ### Step 6 Continue the conversation, keeping it playful and entertaining. If another present is requested, gently remind them that only one gift can be chosen. post_prompt_url: Post Prompt Webhook Here - params: - eleven_labs_stability: 0.1 - eleven_labs_similarity: 0.25 languages: - name: English code: en-US voice: elevenlabs.gvU4yEv29ZpMc9IXoZcd + params: + stability: 0.1 + similarity: 0.25 speech_fillers: - one moment please, - uhh ha, @@ -617,7 +620,7 @@ has messaging services enabled, as the Santa AI will send you an SMS with a link **Hosting your own Santa AI** -To host your own Santa AI, simply copy and paste the `SWML` script [above](#final-swml-script) into a new RELAY script then assign it in your phone number settings, located +To host your own Santa AI, simply copy and paste the `SWML` script [above](#final-swml-script) into a new relay script then assign it in your phone number settings, located on your [SignalWire Dashboard](https://my.signalwire.com). Don't forget to replace the sample values with your active `post prompt webhook URL`, `Rapid API token`, `webhook host`, and `From number`, as described above the sample script. diff --git a/website/docs/main/swml/reference/methods/ai/ai_params/index.mdx b/website/docs/main/swml/reference/methods/ai/ai_params/index.mdx index fbb93b22c..af9c0358e 100644 --- a/website/docs/main/swml/reference/methods/ai/ai_params/index.mdx +++ b/website/docs/main/swml/reference/methods/ai/ai_params/index.mdx @@ -16,7 +16,8 @@ import APIField from "@site/src/components/APIField"; [conscience]: ./conscience.mdx [hold-music]: ./hold_music.mdx [interrupt-prompt]: ./interrupt_prompt.mdx -[ai-languages]: ../ai_languages.mdx +[ai-languages]: /swml/methods/ai/languages +[ai-languages-params]: /swml/methods/ai/languages/params [ai-params]: ./index.mdx [post-prompt-url]: /swml/methods/ai/post_prompt_url [get-visual-input]: /swml/methods/ai/swaig/internal_fillers#internal_fillers-parameters @@ -329,20 +330,6 @@ Customize the AI agent's voice output, including volume control, voice character Adjust the volume of the AI. Allowed values from `-50`-`50`. - - The similarity slider dictates how closely the AI should adhere to the original voice when attempting to replicate it. The higher the similarity, the closer the AI will sound to the original voice. Valid values range from `0.01` to `1.0`.

**Important**: This will only works when `elevenlabs` is set in the [`ai.languages.voice`][ai-languages] as the engine id. -
- - - The stability slider determines how stable the voice is and the randomness between each generation. Lowering this slider introduces a broader emotional range for the voice. Valid values range from `0.01` to `1.0`.

**Important**: This will only works when `elevenlabs` is set in the [`ai.languages.voice`][ai-languages] as the engine id. -
- + + The similarity slider dictates how closely the AI should adhere to the original voice when attempting to replicate it. The higher the similarity, the closer the AI will sound to the original voice. Valid values range from `0.0` to `1.0`. **Deprecated**: Use [`languages[].params.similarity`][ai-languages-params] instead. + + + + The stability slider determines how stable the voice is and the randomness between each generation. Lowering this slider introduces a broader emotional range for the voice. Valid values range from `0.0` to `1.0`. **Deprecated**: Use [`languages[].params.stability`][ai-languages-params] instead. + + ### Interruption & Barge Control Manage how the AI agent handles interruptions when users speak over it, including when to stop speaking, acknowledge interruptions, or continue regardless. diff --git a/website/docs/main/swml/reference/methods/ai/index.mdx b/website/docs/main/swml/reference/methods/ai/index.mdx index dede6efb6..3d2fe2348 100644 --- a/website/docs/main/swml/reference/methods/ai/index.mdx +++ b/website/docs/main/swml/reference/methods/ai/index.mdx @@ -9,7 +9,7 @@ tags: ['swml'] --- [hints]: /swml/methods/ai/hints -[languages]: ./ai_languages.mdx +[languages]: /swml/methods/ai/languages [params]: ./ai_params/index.mdx [post_prompt]: /swml/methods/ai/post_prompt [post_prompt_url]: /swml/methods/ai/post_prompt_url diff --git a/website/docs/main/swml/reference/methods/ai/ai_languages.mdx b/website/docs/main/swml/reference/methods/ai/languages/index.mdx similarity index 86% rename from website/docs/main/swml/reference/methods/ai/ai_languages.mdx rename to website/docs/main/swml/reference/methods/ai/languages/index.mdx index 556df1b34..faaff03ab 100644 --- a/website/docs/main/swml/reference/methods/ai/ai_languages.mdx +++ b/website/docs/main/swml/reference/methods/ai/languages/index.mdx @@ -15,6 +15,7 @@ import APIField from "@site/src/components/APIField"; [voices-and-languages]: /voice/getting-started/voice-and-languages [swaig-functions]: /swml/methods/ai/swaig/functions [deepgram-codes]: https://developers.deepgram.com/docs/models-languages-overview#nova-3 +[ai-params]: /swml/methods/ai/params # ai.languages @@ -100,13 +101,22 @@ Use `ai.languages` to configure the spoken language of your AI Agent, as well as The speed to use for the specified TTS engine. This allows the AI to speak at a different speed at different points in the conversation. The speed behavior can be defined in the prompt of the AI.
*Valid values:** `auto`
**IMPORTANT:** Only works with [`Cartesia`](/voice/tts/cartesia) TTS engine.
+ + TTS engine-specific parameters for this language. + Accepts the [`languages.params` parameters](/swml/methods/ai/languages/params). + + - An array of strings to be used as fillers in the conversation and when the agent is calling a [`SWAIG function`][swaig-functions].**Deprecated**: Use `speech_fillers` and `function_fillers` instead. + An array of strings to be used as fillers in the conversation and when the agent is calling a [`SWAIG function`][swaig-functions]. **Deprecated**: Use `speech_fillers` and `function_fillers` instead. - The engine to use for the language. For example, `"elevenlabs"`.**Deprecated.** Set the engine with the [`voice`](#use-voice-strings) parameter. + The engine to use for the language. For example, `"elevenlabs"`. **Deprecated.** Set the engine with the [`voice`](#use-voice-strings) parameter. +--- + ### Use `voice` strings Compose the `voice` string using the `.` syntax. @@ -166,6 +178,27 @@ languages: voice: elevenlabs.rachel ``` +### Configure per-language ElevenLabs parameters + +Configure different stability and similarity values for each language using `languages[].params`: + +```yaml andJson +ai: + languages: + - name: English + code: en-US + voice: elevenlabs.josh + params: + stability: 0.6 + similarity: 0.8 + - name: Spanish + code: es-ES + voice: elevenlabs.maria + params: + stability: 0.4 + similarity: 0.9 +``` + {/* This example commented out as the language-switching behavior is a bit inconsistent. diff --git a/website/docs/main/swml/reference/methods/ai/languages/params.mdx b/website/docs/main/swml/reference/methods/ai/languages/params.mdx new file mode 100644 index 000000000..cc0418c79 --- /dev/null +++ b/website/docs/main/swml/reference/methods/ai/languages/params.mdx @@ -0,0 +1,48 @@ +--- +sidebar_label: languages.params +hide_title: false +slug: /swml/methods/ai/languages/params +title: languages.params +description: Engine-specific voice and language configuration. +tags: ['swml'] +--- + +[tts-providers]: /voice/getting-started/voice-and-languages#providers +[voices-and-languages]: /voice/getting-started/voice-and-languages +[swaig-functions]: /swml/methods/ai/swaig/functions +[deepgram-codes]: https://developers.deepgram.com/docs/models-languages-overview#nova-3 +[ai-params]: /swml/methods/ai/params + +# languages.params + +Use `languages[].params` to configure TTS engine-specific parameters for individual languages. + + + The similarity slider dictates how closely the AI should adhere to the original voice when attempting to replicate it. + The higher the similarity, the closer the AI will sound to the original voice. + Valid values range from `0.0` to `1.0`. + :::info + + Only works with the ElevenLabs TTS engine. + + ::: + + + + The stability slider determines how stable the voice is and the randomness between each generation. + Lowering this slider introduces a broader emotional range for the voice. + Valid values range from `0.0` to `1.0`. + :::info + + Only works with the ElevenLabs TTS engine. + + ::: +