Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .cursor/rules/docs.mdc
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,22 @@ export const products = [
- [ ] Includes clear next steps for users
- [ ] Headers use sentence case formatting

## API specifications

### TypeSpec definitions
API specs are defined in TypeSpec and output to OpenAPI format. All specs are located in the `specs/` directory:

- **specs/signalwire-rest** - SignalWire REST API specifications
- **specs/compatibility-api** - Compatibility API specifications
- **specs/swml** - SWML schema definitions
- **specs/_shared** - Shared TypeSpec definitions

### SWML JSON schema
The authoritative SWML schema is generated from TypeSpec and located at:
- **specs/swml/tsp-output/@typespec/json-schema/SWMLObject.json**

This JSON Schema defines the complete structure of valid SWML documents.

## SWML and SWML AI source code references

### Source code repositories
Expand Down
17 changes: 17 additions & 0 deletions specs/swml/Methods/ai/ai_languages.tsp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import "@typespec/json-schema";
import "../../Shared/Types/main.tsp";

using TypeSpec.JsonSchema;

Expand Down Expand Up @@ -48,6 +49,22 @@ model LanguagesBase {
@doc("The engine to use for the language. For example, 'elevenlabs'.")
@example("elevenlabs")
engine?: string;

@doc("TTS engine-specific parameters for this language.")
params?: LanguageParams;
}

@summary("LanguageParams")
model LanguageParams {
@doc("The stability slider determines how stable the voice is and the randomness between each generation. Lowering this slider introduces a broader emotional range for the voice. IMPORTANT: Only works with ElevenLabs TTS engine.")
@minValue(0.0)
@maxValue(1.0)
stability?: float | SWMLVar = 0.50;

@doc("The similarity slider dictates how closely the AI should adhere to the original voice when attempting to replicate it. The higher the similarity, the closer the AI will sound to the original voice. IMPORTANT: Only works with ElevenLabs TTS engine.")
@minValue(0.0)
@maxValue(1.0)
similarity?: float | SWMLVar = 0.75;
}

@summary("LanguagesWithSoloFillers")
Expand Down
30 changes: 16 additions & 14 deletions specs/swml/Methods/ai/ai_params.tsp
Original file line number Diff line number Diff line change
Expand Up @@ -224,18 +224,6 @@ model AIParams {
@example(700)
end_of_speech_timeout?: integer | SWMLVar = 700;

@doc("The stability slider determines how stable the voice is and the randomness between each generation. Lowering this slider introduces a broader emotional range for the voice.")
@minValue(0.01)
@maxValue(1.0)
@example(0.5)
eleven_labs_stability?: float | SWMLVar;

@doc("The similarity slider dictates how closely the AI should adhere to the original voice when attempting to replicate it. The higher the similarity, the closer the AI will sound to the original voice.")
@minValue(0.01)
@maxValue(1.0)
@example(0.75)
eleven_labs_similarity?: float | SWMLVar;

@doc("If `true`, enables usage accounting. The default is `false`.")
@example(true)
enable_accounting?: boolean | SWMLVar;
Expand Down Expand Up @@ -388,8 +376,8 @@ model AIParams {
@example(1024)
max_response_tokens?: integer | SWMLVar;

@doc("The ASR (Automatic Speech Recognition) engine to use. Common values include `deepgram:nova-2`, `deepgram:nova-3`, and other supported ASR engines.")
@example("deepgram:nova-3")
@doc("The ASR (Automatic Speech Recognition) engine to use. Common values include `nova-2` and `nova-3`.")
@example("nova-3")
openai_asr_engine?: string = "gcloud_speech_v2_async";

@doc("Sets a time duration for the outbound call recipient to respond to the AI agent before timeout, in a range from `10000` to `600000`. **Default:** `120000` ms (2 minutes).")
Expand Down Expand Up @@ -574,5 +562,19 @@ model AIParams {
@example("hey")
wake_prefix?: string;

#deprecated "The `eleven_labs_stability` property is deprecated. Please use `languages[].params.stability` instead."
@doc("The stability slider determines how stable the voice is and the randomness between each generation. Lowering this slider introduces a broader emotional range for the voice.")
@minValue(0.0)
@maxValue(1.0)
@example(0.5)
eleven_labs_stability?: float | SWMLVar = 0.50;

#deprecated "The `eleven_labs_similarity` property is deprecated. Please use `languages[].params.similarity` instead."
@doc("The similarity slider dictates how closely the AI should adhere to the original voice when attempting to replicate it. The higher the similarity, the closer the AI will sound to the original voice.")
@minValue(0.0)
@maxValue(1.0)
@example(0.75)
eleven_labs_similarity?: float | SWMLVar = 0.75;

...TypeSpec.Record<unknown>;
}
115 changes: 82 additions & 33 deletions specs/swml/tsp-output/@typespec/json-schema/SWMLObject.json
Original file line number Diff line number Diff line change
Expand Up @@ -4800,38 +4800,6 @@
"maximum": 10000,
"description": "Amount of silence, in ms, at the end of an utterance to detect end of speech. Allowed values from `250` - `10,000`. **Default:** `700` ms (Note: Documentation incorrectly lists 2000ms)."
},
"eleven_labs_stability": {
"anyOf": [
{
"type": "number"
},
{
"$ref": "#/$defs/SWMLVar"
}
],
"examples": [
0.5
],
"minimum": 0.01,
"maximum": 1,
"description": "The stability slider determines how stable the voice is and the randomness between each generation. Lowering this slider introduces a broader emotional range for the voice."
},
"eleven_labs_similarity": {
"anyOf": [
{
"type": "number"
},
{
"$ref": "#/$defs/SWMLVar"
}
],
"examples": [
0.75
],
"minimum": 0.01,
"maximum": 1,
"description": "The similarity slider dictates how closely the AI should adhere to the original voice when attempting to replicate it. The higher the similarity, the closer the AI will sound to the original voice."
},
"enable_accounting": {
"anyOf": [
{
Expand Down Expand Up @@ -5165,7 +5133,7 @@
"examples": [
"deepgram:nova-3"
],
"description": "The ASR (Automatic Speech Recognition) engine to use. Common values include `deepgram:nova-2`, `deepgram:nova-3`, and other supported ASR engines."
"description": "The ASR (Automatic Speech Recognition) engine to use. Common values include `nova-2` and `nova-3`."
},
"outbound_attention_timeout": {
"anyOf": [
Expand Down Expand Up @@ -5611,6 +5579,42 @@
"hey"
],
"description": "Specifies an additional prefix that must be spoken along with the agent's name (`ai_name`)\nto wake the agent from a paused state. For example, if `ai_name` is \"computer\" and\n`wake_prefix` is \"hey\", the user would need to say \"hey computer\" to activate the agent."
},
"eleven_labs_stability": {
"anyOf": [
{
"type": "number"
},
{
"$ref": "#/$defs/SWMLVar"
}
],
"default": 0.5,
"examples": [
0.5
],
"minimum": 0,
"maximum": 1,
"description": "The stability slider determines how stable the voice is and the randomness between each generation. Lowering this slider introduces a broader emotional range for the voice.",
"deprecated": true
},
"eleven_labs_similarity": {
"anyOf": [
{
"type": "number"
},
{
"$ref": "#/$defs/SWMLVar"
}
],
"default": 0.75,
"examples": [
0.75
],
"minimum": 0,
"maximum": 1,
"description": "The similarity slider dictates how closely the AI should adhere to the original voice when attempting to replicate it. The higher the similarity, the closer the AI will sound to the original voice.",
"deprecated": true
}
},
"unevaluatedProperties": {},
Expand Down Expand Up @@ -6463,6 +6467,10 @@
"description": "The engine to use for the language. For example, 'elevenlabs'.",
"deprecated": true
},
"params": {
"$ref": "#/$defs/LanguageParams",
"description": "TTS engine-specific parameters for this language."
},
"fillers": {
"type": "array",
"items": {
Expand Down Expand Up @@ -6543,6 +6551,10 @@
"description": "The engine to use for the language. For example, 'elevenlabs'.",
"deprecated": true
},
"params": {
"$ref": "#/$defs/LanguageParams",
"description": "TTS engine-specific parameters for this language."
},
"function_fillers": {
"type": "array",
"items": {
Expand Down Expand Up @@ -7814,6 +7826,43 @@
"not": {}
}
},
"LanguageParams": {
"type": "object",
"properties": {
"stability": {
"anyOf": [
{
"type": "number"
},
{
"$ref": "#/$defs/SWMLVar"
}
],
"default": 0.5,
"minimum": 0,
"maximum": 1,
"description": "The stability slider determines how stable the voice is and the randomness between each generation. Lowering this slider introduces a broader emotional range for the voice. IMPORTANT: Only works with ElevenLabs TTS engine."
},
"similarity": {
"anyOf": [
{
"type": "number"
},
{
"$ref": "#/$defs/SWMLVar"
}
],
"default": 0.75,
"minimum": 0,
"maximum": 1,
"description": "The similarity slider dictates how closely the AI should adhere to the original voice when attempting to replicate it. The higher the similarity, the closer the AI will sound to the original voice. IMPORTANT: Only works with ElevenLabs TTS engine."
}
},
"unevaluatedProperties": {
"not": {}
},
"title": "LanguageParams"
},
"ConversationRole": {
"type": "string",
"enum": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ idea of what their child wants for Christmas, and then purchase the gift directl
To get our AI Santa set up, we will take the following steps:

1. Sign into your SignalWire Space and navigate to your Dashboard.
2. Create a new RELAY (SWML) Script using the sample script and provided instructions.
3. Assign a phone number to the RELAY (SWML) Script.
2. Create a new Relay (SWML) Script using the sample script and provided instructions.
3. Assign a phone number to the Relay (SWML) Script.

We'll explain each of these steps in detail throughout the article. Follow along, and don't
hesitate to reach out if you have questions or run into issues!
Expand Down Expand Up @@ -143,20 +143,23 @@ languages:

---

### Params
### ElevenLabs Voice Parameters

The `params` parameter is used to define the AI's `eleven_labs_stability` and `eleven_labs_similarity` parameters.
We use ElevenLabs TTS engine-specific parameters to fine-tune Santa's voice. These parameters are configured per-language using `languages[].params`.

The `eleven_labs_stability` parameter is used to define the stability of the AI's voice, while the `eleven_labs_similarity`
parameter is used to define the similarity of the AI's voice to the voice that is defined in the `voice` parameter.
The `stability` parameter controls the stability of the AI's voice, while the `similarity` parameter defines how closely the voice adheres to the original voice characteristics.
This allows us to control the AI's voice and make it more realistic and as close to Santa's voice as possible.

You can learn more about these settings here: [Eleven Labs Documentation](https://elevenlabs.io/docs/speech-synthesis/voice-settings#stability).

```yaml andJson
params:
eleven_labs_stability: 0.1
eleven_labs_similarity: 0.25
languages:
- name: English
code: en-US
voice: elevenlabs.rachel
params:
stability: 0.1
similarity: 0.25
```

---
Expand Down Expand Up @@ -508,13 +511,13 @@ sections:
### Step 6 Continue the conversation, keeping it playful and entertaining.
If another present is requested, gently remind them that only one gift can be chosen.
post_prompt_url: Post Prompt Webhook Here
params:
eleven_labs_stability: 0.1
eleven_labs_similarity: 0.25
languages:
- name: English
code: en-US
voice: elevenlabs.gvU4yEv29ZpMc9IXoZcd
params:
stability: 0.1
similarity: 0.25
speech_fillers:
- one moment please,
- uhh ha,
Expand Down Expand Up @@ -617,7 +620,7 @@ has messaging services enabled, as the Santa AI will send you an SMS with a link

**Hosting your own Santa AI**

To host your own Santa AI, simply copy and paste the `SWML` script [above](#final-swml-script) into a new RELAY script then assign it in your phone number settings, located
To host your own Santa AI, simply copy and paste the `SWML` script [above](#final-swml-script) into a new relay script then assign it in your phone number settings, located
on your [SignalWire Dashboard](https://my.signalwire.com).
Don't forget to replace the sample values with your active `post prompt webhook URL`,
`Rapid API token`, `webhook host`, and `From number`, as described above the sample script.
Expand Down
35 changes: 20 additions & 15 deletions website/docs/main/swml/reference/methods/ai/ai_params/index.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ import APIField from "@site/src/components/APIField";
[conscience]: ./conscience.mdx
[hold-music]: ./hold_music.mdx
[interrupt-prompt]: ./interrupt_prompt.mdx
[ai-languages]: ../ai_languages.mdx
[ai-languages]: /swml/methods/ai/languages
[ai-languages-params]: /swml/methods/ai/languages/params
[ai-params]: ./index.mdx
[post-prompt-url]: /swml/methods/ai/post_prompt_url
[get-visual-input]: /swml/methods/ai/swaig/internal_fillers#internal_fillers-parameters
Expand Down Expand Up @@ -329,20 +330,6 @@ Customize the AI agent's voice output, including volume control, voice character
Adjust the volume of the AI. Allowed values from `-50`-`50`.
</APIField>

<APIField
name="params.eleven_labs_similarity"
type="number"
>
The similarity slider dictates how closely the AI should adhere to the original voice when attempting to replicate it. The higher the similarity, the closer the AI will sound to the original voice. Valid values range from `0.01` to `1.0`.<br /><br />**Important**: This will only works when `elevenlabs` is set in the [`ai.languages.voice`][ai-languages] as the engine id.
</APIField>

<APIField
name="params.eleven_labs_stability"
type="number"
>
The stability slider determines how stable the voice is and the randomness between each generation. Lowering this slider introduces a broader emotional range for the voice. Valid values range from `0.01` to `1.0`.<br /><br />**Important**: This will only works when `elevenlabs` is set in the [`ai.languages.voice`][ai-languages] as the engine id.
</APIField>

<APIField
name="params.max_emotion"
type="integer"
Expand Down Expand Up @@ -388,6 +375,24 @@ Customize the AI agent's voice output, including volume control, voice character
URL of a video file to play when AI is talking. Only works for calls that support video.
</APIField>

<APIField
name="params.eleven_labs_similarity"
type="number"
deprecated={true}
default="0.75"
>
The similarity slider dictates how closely the AI should adhere to the original voice when attempting to replicate it. The higher the similarity, the closer the AI will sound to the original voice. Valid values range from `0.0` to `1.0`. **Deprecated**: Use [`languages[].params.similarity`][ai-languages-params] instead.
</APIField>

<APIField
name="params.eleven_labs_stability"
type="number"
deprecated={true}
default="0.50"
>
The stability slider determines how stable the voice is and the randomness between each generation. Lowering this slider introduces a broader emotional range for the voice. Valid values range from `0.0` to `1.0`. **Deprecated**: Use [`languages[].params.stability`][ai-languages-params] instead.
</APIField>

### Interruption & Barge Control

Manage how the AI agent handles interruptions when users speak over it, including when to stop speaking, acknowledge interruptions, or continue regardless.
Expand Down
2 changes: 1 addition & 1 deletion website/docs/main/swml/reference/methods/ai/index.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ tags: ['swml']
---

[hints]: /swml/methods/ai/hints
[languages]: ./ai_languages.mdx
[languages]: /swml/methods/ai/languages
[params]: ./ai_params/index.mdx
[post_prompt]: /swml/methods/ai/post_prompt
[post_prompt_url]: /swml/methods/ai/post_prompt_url
Expand Down
Loading