Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions __tests__/azure-ssml.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/**
* Tests for Azure SSML generation correctness (issue #42)
*/

import * as SSMLUtils from "../src/core/ssml-utils";

// Minimal stub so we can import AzureTTSClient without real credentials
jest.mock("../src/core/abstract-tts", () => {
return {
AbstractTTSClient: class {
voiceId = "en-US-AriaNeural";
lang = "en-US";
properties: Record<string, unknown> = { rate: "medium", pitch: "medium", volume: 100 };
timings: unknown[] = [];
on() {}
emit() {}
},
};
});

// We test the SSML utilities directly — no network calls needed.

describe("createProsodyTag — volume format", () => {
it("emits an absolute volume value without a % suffix", () => {
const result = SSMLUtils.createProsodyTag("hello", { volume: 75 });
// volume="75" is the absolute format (0-100 scale).
// volume="75%" would be a relative +75% change — wrong.
expect(result).toContain('volume="75"');
expect(result).not.toContain('volume="75%"');
});

it("emits volume=100 without % when at full volume", () => {
const result = SSMLUtils.createProsodyTag("hello", { volume: 100 });
expect(result).toContain('volume="100"');
expect(result).not.toContain('volume="100%"');
});

it("emits volume=0 without % when muted", () => {
const result = SSMLUtils.createProsodyTag("hello", { volume: 0 });
expect(result).toContain('volume="0"');
expect(result).not.toContain('volume="0%"');
});
});

describe("Azure prepareSSML — no spurious xmlns/version warnings", () => {
let warnSpy: jest.SpyInstance;

beforeEach(() => {
warnSpy = jest.spyOn(console, "warn").mockImplementation(() => {});
});

afterEach(() => {
warnSpy.mockRestore();
});

it("does not warn about missing xmlns or version when synthesising plain text", async () => {
// Import lazily so mock is in place
const { AzureTTSClient } = await import("../src/engines/azure");
const client = new AzureTTSClient({ subscriptionKey: "key", region: "eastus" });

// Access the private method via type cast
const ssml = await (client as any).prepareSSML("Hello world");

const xmnsWarning = warnSpy.mock.calls.some((args) =>
args.some(
(a: unknown) =>
typeof a === "string" && a.includes("xmlns") ||
(Array.isArray(a) && a.some((s: unknown) => typeof s === "string" && s.includes("xmlns")))
)
);
const versionWarning = warnSpy.mock.calls.some((args) =>
args.some(
(a: unknown) =>
typeof a === "string" && a.includes("version") ||
(Array.isArray(a) && a.some((s: unknown) => typeof s === "string" && s.includes("version")))
)
);

expect(xmnsWarning).toBe(false);
expect(versionWarning).toBe(false);

// Sanity: output should actually have the attributes
expect(ssml).toContain('xmlns="http://www.w3.org/2001/10/synthesis"');
expect(ssml).toContain('version="1.0"');
});
});
7 changes: 4 additions & 3 deletions src/__tests__/azure-mstts-namespace.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -175,14 +175,15 @@ describe("Azure MSTTS Namespace Handling", () => {

it("should normalise 0-1 volume fraction to 0-100 percentage", async () => {
// Regression test for: https://github.com/willwade/js-tts-wrapper/issues/40
// Callers commonly pass volume as a 0-1 float; 0.8 should become volume="80%", not "0.8%".
// Callers commonly pass volume as a 0-1 float; 0.8 should become volume="80" (absolute, 0-100 scale), not "0.8" or "80%".
const plainSSML = `<speak>Hello world</speak>`;
const options = { volume: 0.8 };

const result = (client as any).ensureAzureSSMLStructure(plainSSML, "en-US-JennyNeural", options);

expect(result).toContain('volume="80%"');
expect(result).not.toContain('volume="0.8%"');
expect(result).toContain('volume="80"');
expect(result).not.toContain('volume="0.8"');
expect(result).not.toContain('volume="80%"');
});
});
});
4 changes: 2 additions & 2 deletions src/__tests__/ssml-utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ describe("SSMLUtils", () => {

it("should create prosody tag with volume", () => {
const result = SSMLUtils.createProsodyTag("Hello", { volume: 80 });
expect(result).toBe('<prosody volume="80%">Hello</prosody>');
expect(result).toBe('<prosody volume="80">Hello</prosody>');
});

it("should create prosody tag with multiple attributes", () => {
Expand All @@ -64,7 +64,7 @@ describe("SSMLUtils", () => {
});
expect(result).toContain('rate="slow"');
expect(result).toContain('pitch="high"');
expect(result).toContain('volume="80%"');
expect(result).toContain('volume="80"');
});

it("should return original text if no options provided", () => {
Expand Down
2 changes: 1 addition & 1 deletion src/core/abstract-tts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -993,7 +993,7 @@ export abstract class AbstractTTSClient {
}

if (this.properties.volume) {
attrs.push(`volume="${this.properties.volume}%"`);
attrs.push(`volume="${this.properties.volume}"`);
}

if (attrs.length === 0) {
Expand Down
2 changes: 1 addition & 1 deletion src/core/ssml-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ export function createProsodyTag(text: string, options?: SpeakOptions): string {

if (options.rate) attrs.push(`rate="${options.rate}"`);
if (options.pitch) attrs.push(`pitch="${options.pitch}"`);
if (options.volume !== undefined) attrs.push(`volume="${options.volume}%"`);
if (options.volume !== undefined) attrs.push(`volume="${options.volume}"`);

if (attrs.length === 0) return text;

Expand Down
14 changes: 6 additions & 8 deletions src/engines/azure.ts
Original file line number Diff line number Diff line change
Expand Up @@ -556,7 +556,11 @@ export class AzureTTSClient extends AbstractTTSClient {
// Use voice from options or the default voice
const voiceId = options?.voice || this.voiceId;

// Validate and process SSML for Azure compatibility
// Process and structure SSML first so required attributes are present before validation
ssml = SSMLUtils.processSSMLForEngine(ssml, "azure", voiceId || undefined);
ssml = this.ensureAzureSSMLStructure(ssml, voiceId, options);

// Validate after processing so warnings reflect what Azure actually receives
const validation = SSMLUtils.validateSSMLForEngine(ssml, "azure", voiceId || undefined);
if (validation.warnings.length > 0) {
console.warn("Azure SSML warnings:", validation.warnings);
Expand All @@ -566,12 +570,6 @@ export class AzureTTSClient extends AbstractTTSClient {
throw new Error(`Invalid SSML for Azure: ${validation.errors.join(", ")}`);
}

// Process SSML for Azure compatibility
ssml = SSMLUtils.processSSMLForEngine(ssml, "azure", voiceId || undefined);

// Ensure proper SSML structure for Azure
ssml = this.ensureAzureSSMLStructure(ssml, voiceId, options);

return ssml;
}

Expand Down Expand Up @@ -648,7 +646,7 @@ export class AzureTTSClient extends AbstractTTSClient {
const attrs: string[] = [];
if (rate && rate !== DEFAULT_RATE) attrs.push(`rate="${rate}"`);
if (pitch && pitch !== DEFAULT_PITCH) attrs.push(`pitch="${pitch}"`);
if (volume !== DEFAULT_VOLUME) attrs.push(`volume="${volume}%"`);
if (volume !== DEFAULT_VOLUME) attrs.push(`volume="${volume}"`);

// <prosody> must be nested inside <voice>, not as a direct child of <speak>.
// Azure rejects: Node [speak] should not contain node [prosody] with type [Others].
Expand Down
Loading