diff --git a/package-lock.json b/package-lock.json index d9ebd95..7409985 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "speechmarkdown-js", - "version": "2.1.1", + "version": "2.3.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "speechmarkdown-js", - "version": "2.1.1", + "version": "2.3.1", "license": "MIT", "dependencies": { "myna-parser": "^2.5.1", diff --git a/src/SpeechMarkdownGrammar.ts b/src/SpeechMarkdownGrammar.ts index cb32d05..2bde628 100644 --- a/src/SpeechMarkdownGrammar.ts +++ b/src/SpeechMarkdownGrammar.ts @@ -87,6 +87,64 @@ export function speechMarkdownGrammar(myna: any): any { this.time = m.seq(this.number, this.timeUnit).ast; this.shortBreak = m.seq('[', this.time, ']').ast; + // Expressive audio tags: [laugh], [sigh], [cough], etc. + this.expressiveValue = m.keywords( + 'laugh', + 'laughter', + 'sigh', + 'cough', + 'cheer', + 'cheering', + 'cry', + 'crying', + 'gasp', + 'groan', + 'groaning', + 'hum', + 'hmm', + 'mm-hmm', + 'oh', + 'sniff', + 'whew', + 'wow', + 'yawn', + 'yeah', + 'huh', + 'tsk', + 'uh-huh', + 'mmm', + 'mhm', + 'ahem', + 'applause', + 'boo', + 'giggle', + 'hiccup', + 'hurray', + 'moan', + 'pant', + 'scream', + 'shush', + 'sneeze', + 'throat-clear', + 'wheeze', + 'whimper', + 'yay', + 'bleh', + 'eek', + 'hmm', + 'huh', + 'meh', + 'ooh', + 'pfft', + 'phew', + 'psst', + 'shh', + 'tsk-tsk', + 'uh-oh', + 'umph', + ).ast; + this.expressive = m.seq('[', this.expressiveValue, ']').ast; + // this.break = m.seq('[break:', this.time , ']').ast; // this.string = m.doubleQuoted(this.quoteChar.zeroOrMore).ast; @@ -449,6 +507,7 @@ export function speechMarkdownGrammar(myna: any): any { this.shortSub, this.textModifier, this.emphasis, + this.expressive, this.shortBreak, this.break, this.audio, diff --git a/src/formatters/ElevenLabsFormatter.ts b/src/formatters/ElevenLabsFormatter.ts index 5b0181d..d9dd2fc 100644 --- a/src/formatters/ElevenLabsFormatter.ts +++ b/src/formatters/ElevenLabsFormatter.ts @@ -116,6 +116,11 @@ export class ElevenLabsFormatter extends SsmlFormatterBase { const time = ast.children[0].allText; return this.addTagWithAttrs(lines, null, 'break', { time: time }); } + case 'expressive': { + const value = ast.children[0].allText; + lines.push(`[${value}]`); + return lines; + } case 'break': { const val = ast.children[0].allText; let time = val; diff --git a/src/formatters/TextFormatter.ts b/src/formatters/TextFormatter.ts index c41c5a8..11c2fe2 100644 --- a/src/formatters/TextFormatter.ts +++ b/src/formatters/TextFormatter.ts @@ -83,6 +83,12 @@ export class TextFormatter extends FormatterBase { case 'audio': return lines; + case 'expressive': { + const value = ast.children[0].allText; + lines.push(`[${value}]`); + return lines; + } + default: { this.processAst(ast.children, lines); return lines; diff --git a/src/formatters/W3cSsmlFormatter.ts b/src/formatters/W3cSsmlFormatter.ts index d4f2e87..a270a78 100644 --- a/src/formatters/W3cSsmlFormatter.ts +++ b/src/formatters/W3cSsmlFormatter.ts @@ -198,6 +198,11 @@ export class W3cSsmlFormatter extends SsmlFormatterBase { const time = ast.children[0].allText; return this.addTagWithAttrs(lines, null, 'break', { time }); } + case 'expressive': { + const value = ast.children[0].allText; + lines.push(`[${value}]`); + return lines; + } case 'break': { const val = ast.children[0].allText; let attrs = {}; diff --git a/tests/expressive.spec.ts b/tests/expressive.spec.ts new file mode 100644 index 0000000..6902e6c --- /dev/null +++ b/tests/expressive.spec.ts @@ -0,0 +1,111 @@ +import dedent from 'ts-dedent'; +import { SpeechMarkdown } from '../src/SpeechMarkdown'; + +describe('expressive', () => { + const speech = new SpeechMarkdown(); + + const markdown = dedent` + Hello [laugh] world + `; + + test('converts to SSML - W3C', () => { + const ssml = speech.toSSML(markdown, { platform: 'w3c' }); + const expected = dedent` + + Hello [laugh] world + + `; + expect(ssml).toBe(expected); + }); + + test('converts to SSML - ElevenLabs', () => { + const ssml = speech.toSSML(markdown, { platform: 'elevenlabs' }); + expect(ssml).toBe('Hello [laugh] world'); + }); + + test('converts to Plain Text', () => { + const text = speech.toText(markdown); + expect(text).toBe('Hello [laugh] world'); + }); + + test('converts to SSML - Amazon Polly (strips)', () => { + const ssml = speech.toSSML(markdown, { platform: 'amazon-polly' }); + const expected = dedent` + + Hello world + + `; + expect(ssml).toBe(expected); + }); + + test('converts to SSML - Google Assistant (strips)', () => { + const ssml = speech.toSSML(markdown, { platform: 'google-assistant' }); + const expected = dedent` + + Hello world + + `; + expect(ssml).toBe(expected); + }); + + test('converts to SSML - Microsoft Azure (strips)', () => { + const ssml = speech.toSSML(markdown, { platform: 'microsoft-azure' }); + const expected = dedent` + + Hello world + + `; + expect(ssml).toBe(expected); + }); +}); + +describe('expressive multiple', () => { + const speech = new SpeechMarkdown(); + + const markdown = dedent` + Hello [laugh] how are you [sigh] I'm fine [cough] + `; + + test('converts to SSML - W3C', () => { + const ssml = speech.toSSML(markdown, { platform: 'w3c' }); + const expected = dedent` + + Hello [laugh] how are you [sigh] I'm fine [cough] + + `; + expect(ssml).toBe(expected); + }); + + test('converts to Plain Text', () => { + const text = speech.toText(markdown); + expect(text).toBe("Hello [laugh] how are you [sigh] I'm fine [cough]"); + }); +}); + +describe('expressive edge cases', () => { + const speech = new SpeechMarkdown(); + + test('expressive does not conflict with shortBreak [250ms]', () => { + const md = 'Hello [laugh] wait [250ms] world'; + const ssml = speech.toSSML(md, { platform: 'w3c' }); + const expected = dedent` + + Hello [laugh] wait world + + `; + expect(ssml).toBe(expected); + }); + + test('expressive does not conflict with textModifier', () => { + const md = '(hello)[emphasis:"strong"] [laugh]'; + const ssml = speech.toSSML(md, { platform: 'w3c' }); + expect(ssml).toContain('[laugh]'); + expect(ssml).toContain(' { + const md = 'Hello [unknownthing] world'; + const text = speech.toText(md); + expect(text).toBe('Hello [unknownthing] world'); + }); +});