diff --git a/__tests__/azure-ssml.test.ts b/__tests__/azure-ssml.test.ts new file mode 100644 index 0000000..19e5885 --- /dev/null +++ b/__tests__/azure-ssml.test.ts @@ -0,0 +1,86 @@ +/** + * Tests for Azure SSML generation correctness (issue #42) + */ + +import * as SSMLUtils from "../src/core/ssml-utils"; + +// Minimal stub so we can import AzureTTSClient without real credentials +jest.mock("../src/core/abstract-tts", () => { + return { + AbstractTTSClient: class { + voiceId = "en-US-AriaNeural"; + lang = "en-US"; + properties: Record = { rate: "medium", pitch: "medium", volume: 100 }; + timings: unknown[] = []; + on() {} + emit() {} + }, + }; +}); + +// We test the SSML utilities directly — no network calls needed. + +describe("createProsodyTag — volume format", () => { + it("emits an absolute volume value without a % suffix", () => { + const result = SSMLUtils.createProsodyTag("hello", { volume: 75 }); + // volume="75" is the absolute format (0-100 scale). + // volume="75%" would be a relative +75% change — wrong. + expect(result).toContain('volume="75"'); + expect(result).not.toContain('volume="75%"'); + }); + + it("emits volume=100 without % when at full volume", () => { + const result = SSMLUtils.createProsodyTag("hello", { volume: 100 }); + expect(result).toContain('volume="100"'); + expect(result).not.toContain('volume="100%"'); + }); + + it("emits volume=0 without % when muted", () => { + const result = SSMLUtils.createProsodyTag("hello", { volume: 0 }); + expect(result).toContain('volume="0"'); + expect(result).not.toContain('volume="0%"'); + }); +}); + +describe("Azure prepareSSML — no spurious xmlns/version warnings", () => { + let warnSpy: jest.SpyInstance; + + beforeEach(() => { + warnSpy = jest.spyOn(console, "warn").mockImplementation(() => {}); + }); + + afterEach(() => { + warnSpy.mockRestore(); + }); + + it("does not warn about missing xmlns or version when synthesising plain text", async () => { + // Import lazily so mock is in place + const { AzureTTSClient } = await import("../src/engines/azure"); + const client = new AzureTTSClient({ subscriptionKey: "key", region: "eastus" }); + + // Access the private method via type cast + const ssml = await (client as any).prepareSSML("Hello world"); + + const xmnsWarning = warnSpy.mock.calls.some((args) => + args.some( + (a: unknown) => + typeof a === "string" && a.includes("xmlns") || + (Array.isArray(a) && a.some((s: unknown) => typeof s === "string" && s.includes("xmlns"))) + ) + ); + const versionWarning = warnSpy.mock.calls.some((args) => + args.some( + (a: unknown) => + typeof a === "string" && a.includes("version") || + (Array.isArray(a) && a.some((s: unknown) => typeof s === "string" && s.includes("version"))) + ) + ); + + expect(xmnsWarning).toBe(false); + expect(versionWarning).toBe(false); + + // Sanity: output should actually have the attributes + expect(ssml).toContain('xmlns="http://www.w3.org/2001/10/synthesis"'); + expect(ssml).toContain('version="1.0"'); + }); +}); diff --git a/src/__tests__/azure-mstts-namespace.test.ts b/src/__tests__/azure-mstts-namespace.test.ts index c6e0462..e474f73 100644 --- a/src/__tests__/azure-mstts-namespace.test.ts +++ b/src/__tests__/azure-mstts-namespace.test.ts @@ -175,14 +175,15 @@ describe("Azure MSTTS Namespace Handling", () => { it("should normalise 0-1 volume fraction to 0-100 percentage", async () => { // Regression test for: https://github.com/willwade/js-tts-wrapper/issues/40 - // Callers commonly pass volume as a 0-1 float; 0.8 should become volume="80%", not "0.8%". + // Callers commonly pass volume as a 0-1 float; 0.8 should become volume="80" (absolute, 0-100 scale), not "0.8" or "80%". const plainSSML = `Hello world`; const options = { volume: 0.8 }; const result = (client as any).ensureAzureSSMLStructure(plainSSML, "en-US-JennyNeural", options); - expect(result).toContain('volume="80%"'); - expect(result).not.toContain('volume="0.8%"'); + expect(result).toContain('volume="80"'); + expect(result).not.toContain('volume="0.8"'); + expect(result).not.toContain('volume="80%"'); }); }); }); diff --git a/src/__tests__/ssml-utils.test.ts b/src/__tests__/ssml-utils.test.ts index 7639564..f8aa3bf 100644 --- a/src/__tests__/ssml-utils.test.ts +++ b/src/__tests__/ssml-utils.test.ts @@ -53,7 +53,7 @@ describe("SSMLUtils", () => { it("should create prosody tag with volume", () => { const result = SSMLUtils.createProsodyTag("Hello", { volume: 80 }); - expect(result).toBe('Hello'); + expect(result).toBe('Hello'); }); it("should create prosody tag with multiple attributes", () => { @@ -64,7 +64,7 @@ describe("SSMLUtils", () => { }); expect(result).toContain('rate="slow"'); expect(result).toContain('pitch="high"'); - expect(result).toContain('volume="80%"'); + expect(result).toContain('volume="80"'); }); it("should return original text if no options provided", () => { diff --git a/src/core/abstract-tts.ts b/src/core/abstract-tts.ts index c3d6b59..85646b6 100644 --- a/src/core/abstract-tts.ts +++ b/src/core/abstract-tts.ts @@ -993,7 +993,7 @@ export abstract class AbstractTTSClient { } if (this.properties.volume) { - attrs.push(`volume="${this.properties.volume}%"`); + attrs.push(`volume="${this.properties.volume}"`); } if (attrs.length === 0) { diff --git a/src/core/ssml-utils.ts b/src/core/ssml-utils.ts index 070cbda..a675cd2 100644 --- a/src/core/ssml-utils.ts +++ b/src/core/ssml-utils.ts @@ -68,7 +68,7 @@ export function createProsodyTag(text: string, options?: SpeakOptions): string { if (options.rate) attrs.push(`rate="${options.rate}"`); if (options.pitch) attrs.push(`pitch="${options.pitch}"`); - if (options.volume !== undefined) attrs.push(`volume="${options.volume}%"`); + if (options.volume !== undefined) attrs.push(`volume="${options.volume}"`); if (attrs.length === 0) return text; diff --git a/src/engines/azure.ts b/src/engines/azure.ts index 8f66355..f1a02a7 100644 --- a/src/engines/azure.ts +++ b/src/engines/azure.ts @@ -556,7 +556,11 @@ export class AzureTTSClient extends AbstractTTSClient { // Use voice from options or the default voice const voiceId = options?.voice || this.voiceId; - // Validate and process SSML for Azure compatibility + // Process and structure SSML first so required attributes are present before validation + ssml = SSMLUtils.processSSMLForEngine(ssml, "azure", voiceId || undefined); + ssml = this.ensureAzureSSMLStructure(ssml, voiceId, options); + + // Validate after processing so warnings reflect what Azure actually receives const validation = SSMLUtils.validateSSMLForEngine(ssml, "azure", voiceId || undefined); if (validation.warnings.length > 0) { console.warn("Azure SSML warnings:", validation.warnings); @@ -566,12 +570,6 @@ export class AzureTTSClient extends AbstractTTSClient { throw new Error(`Invalid SSML for Azure: ${validation.errors.join(", ")}`); } - // Process SSML for Azure compatibility - ssml = SSMLUtils.processSSMLForEngine(ssml, "azure", voiceId || undefined); - - // Ensure proper SSML structure for Azure - ssml = this.ensureAzureSSMLStructure(ssml, voiceId, options); - return ssml; } @@ -648,7 +646,7 @@ export class AzureTTSClient extends AbstractTTSClient { const attrs: string[] = []; if (rate && rate !== DEFAULT_RATE) attrs.push(`rate="${rate}"`); if (pitch && pitch !== DEFAULT_PITCH) attrs.push(`pitch="${pitch}"`); - if (volume !== DEFAULT_VOLUME) attrs.push(`volume="${volume}%"`); + if (volume !== DEFAULT_VOLUME) attrs.push(`volume="${volume}"`); // must be nested inside , not as a direct child of . // Azure rejects: Node [speak] should not contain node [prosody] with type [Others].