speechmarkdown · arjan · Nov 5, 2025 · Oct 30, 2025 · Oct 30, 2025 · Oct 30, 2025
diff --git a/.gitignore b/.gitignore
@@ -27,3 +27,6 @@ dist.browser/
 
 # Sample
 .sample
+
+# Environment variables
+.env
diff --git a/README.md b/README.md
@@ -74,6 +74,32 @@ Sample <break time="3s"/> speech <break time="250ms"/> markdown
 </speak>
 ```
 
+### SSML - Microsoft Azure
+
+Convert Speech Markdown to SSML for Microsoft Azure with automatic MSTTS namespace injection
+
+```js
+const smd = require('speechmarkdown-js');
+
+const markdown = `(This is exciting news!)[excited:"1.5"] The new features are here.`;
+const options = {
+  platform: 'microsoft-azure',
+};
+
+const speech = new smd.SpeechMarkdown();
+const ssml = speech.toSSML(markdown, options);
+```
+
+The resulting SSML is:
+
+```xml
+<speak xmlns:mstts="https://www.w3.org/2001/mstts">
+<mstts:express-as style="excited" styledegree="1.5">This is exciting news!</mstts:express-as> The new features are here.
+</speak>
+```
+
+Azure supports 27 express-as styles including emotional styles (excited, disappointed, friendly, cheerful, sad, angry, etc.) and scenario-specific styles (newscaster, customerservice, chat, etc.). See [Azure platform documentation](./docs/platforms/azure.md) for complete details.
+
 ### Plain Text
 
 Convert Speech Markdown to Plain Text

diff --git a/azure-ssml.txt b/azure-ssml.txt
@@ -0,0 +1,74 @@
+Full
+
+<speak>
+  Here are <say-as interpret-as="characters">SSML</say-as> samples.
+  I can pause <break time="3s"/>.
+  I can play a sound
+  <audio src="https://www.example.com/MY_MP3_FILE.mp3">didn't get your MP3 audio file</audio>.
+  I can speak in cardinals. Your number is <say-as interpret-as="cardinal">10</say-as>.
+  Or I can speak in ordinals. You are <say-as interpret-as="ordinal">10</say-as> in line.
+  Or I can even speak in digits. The digits for ten are <say-as interpret-as="characters">10</say-as>.
+  I can also substitute phrases, like the <sub alias="World Wide Web Consortium">W3C</sub>.
+  Finally, I can speak a paragraph with two sentences.
+</speak>
+
+
+dates
+
+<speak>
+  <say-as interpret-as="date" format="yyyymmdd" detail="1">
+    1960-09-10
+  </say-as>
+</speak>
+
+expletive
+
+<speak>
+  <say-as interpret-as="expletive">censor this</say-as>
+</speak>
+
+Audio attachment
+
+<speak>
+  <audio src="cat_purr_close.ogg">
+    <desc>a cat purring</desc>
+    PURR (sound didn't load)
+  </audio>
+</speak>
+
+Marks
+
+<speak>
+Go from <mark name="here"/> here, to <mark name="there"/> there!
+</speak>
+
+
+Prosody
+
+<prosody rate="slow" pitch="-2st">Can you hear me now?</prosody>
+
+Emphasis
+
+<emphasis level="moderate">This is an important announcement</emphasis>
+
+
+IPA
+
+  <phoneme alphabet="ipa" ph="ˌmænɪˈtoʊbə">manitoba</phoneme>
+  <phoneme alphabet="x-sampa" ph='m@"hA:g@%ni:'>mahogany</phoneme>
+
+
+Voice tags
+
+<speak>And then she asked, <voice language="fr-FR" gender="female">qu'est-ce qui
+t'amène ici</voice><break time="250ms"/> in her sweet and gentle voice.</speak>
+
+Langs in a speak
+
+
+<speak>The french word for cat is <lang xml:lang="fr-FR">chat</lang></speak>
+
+
+Style
+
+<speak><google:style name="lively">Hello I'm so happy today!</google:style></speak>
diff --git a/docs/platforms/azure.md b/docs/platforms/azure.md
diff --git a/google-ssml-examples.txt b/google-ssml-examples.txt
@@ -0,0 +1,74 @@
+Full
+
+<speak>
+  Here are <say-as interpret-as="characters">SSML</say-as> samples.
+  I can pause <break time="3s"/>.
+  I can play a sound
+  <audio src="https://www.example.com/MY_MP3_FILE.mp3">didn't get your MP3 audio file</audio>.
+  I can speak in cardinals. Your number is <say-as interpret-as="cardinal">10</say-as>.
+  Or I can speak in ordinals. You are <say-as interpret-as="ordinal">10</say-as> in line.
+  Or I can even speak in digits. The digits for ten are <say-as interpret-as="characters">10</say-as>.
+  I can also substitute phrases, like the <sub alias="World Wide Web Consortium">W3C</sub>.
+  Finally, I can speak a paragraph with two sentences.
+</speak>
+
+
+dates
+
+<speak>
+  <say-as interpret-as="date" format="yyyymmdd" detail="1">
+    1960-09-10
+  </say-as>
+</speak>
+
+expletive
+
+<speak>
+  <say-as interpret-as="expletive">censor this</say-as>
+</speak>
+
+Audio attachment
+
+<speak>
+  <audio src="cat_purr_close.ogg">
+    <desc>a cat purring</desc>
+    PURR (sound didn't load)
+  </audio>
+</speak>
+
+Marks
+
+<speak>
+Go from <mark name="here"/> here, to <mark name="there"/> there!
+</speak>
+
+
+Prosody
+
+<prosody rate="slow" pitch="-2st">Can you hear me now?</prosody>
+
+Emphasis
+
+<emphasis level="moderate">This is an important announcement</emphasis>
+
+
+IPA
+
+  <phoneme alphabet="ipa" ph="ˌmænɪˈtoʊbə">manitoba</phoneme>
+  <phoneme alphabet="x-sampa" ph='m@"hA:g@%ni:'>mahogany</phoneme>
+
+
+Voice tags
+
+<speak>And then she asked, <voice language="fr-FR" gender="female">qu'est-ce qui
+t'amène ici</voice><break time="250ms"/> in her sweet and gentle voice.</speak>
+
+Langs in a speak
+
+
+<speak>The french word for cat is <lang xml:lang="fr-FR">chat</lang></speak>
+
+
+Style
+
+<speak><google:style name="lively">Hello I'm so happy today!</google:style></speak>
diff --git a/scripts/update-voice-data.js b/scripts/update-voice-data.js
@@ -183,19 +183,58 @@ async function updateAzureVoices() {
   }
 
   const voiceMap = {};
+  const displayNameCollisions = {};
 
   for (const voice of data) {
     const name = (voice.ShortName || voice.Name || '').trim();
+    const locale = (voice.Locale || '').trim();
+    const displayName = voice.DisplayName || voice.LocalName || name;
 
     if (!name) {
       continue;
     }
 
-    voiceMap[name.toLowerCase()] = {
+    const voiceEntry = {
       voice: {
         name,
       },
+      id: name,
+      displayName,
+      locale,
     };
+
+    // Add entry by voice ID (e.g., "en-us-jennyneural")
+    voiceMap[name.toLowerCase()] = voiceEntry;
+
+    // Also add entry by display name (e.g., "jenny") for easier lookup
+    // Only add if display name is different from the voice ID
+    const displayNameKey = displayName.toLowerCase();
+    if (displayNameKey !== name.toLowerCase()) {
+      if (!voiceMap[displayNameKey]) {
+        voiceMap[displayNameKey] = voiceEntry;
+      } else {
+        // Track collisions for debugging
+        if (!displayNameCollisions[displayNameKey]) {
+          displayNameCollisions[displayNameKey] = [];
+        }
+        displayNameCollisions[displayNameKey].push(name);
+      }
+    }
+  }
+
+  // Log collisions if any
+  const collisionKeys = Object.keys(displayNameCollisions);
+  if (collisionKeys.length > 0) {
+    console.log(
+      `[azure] ${collisionKeys.length} display name collisions (not added as aliases):`,
+    );
+    collisionKeys.slice(0, 5).forEach((key) => {
+      console.log(
+        `  "${key}": ${displayNameCollisions[key].slice(0, 3).join(', ')}${
+          displayNameCollisions[key].length > 3 ? '...' : ''
+        }`,
+      );
+    });
   }
 
   writeFormatterVoiceModule('microsoftAzureVoices.ts', [
@@ -229,6 +268,10 @@ async function updateGoogleVoices() {
 
   for (const voice of voices) {
     const name = (voice.name || '').trim();
+    const languageCodes =
+      voice.languageCodes && Array.isArray(voice.languageCodes)
+        ? voice.languageCodes
+        : [];
 
     if (!name) {
       continue;
@@ -238,6 +281,8 @@ async function updateGoogleVoices() {
       voice: {
         name,
       },
+      id: name,
+      languages: languageCodes,
     };
   }
 
@@ -282,6 +327,7 @@ async function updateWatsonVoices() {
 
   for (const voice of voices) {
     const name = (voice.name || '').trim();
+    const language = (voice.language || '').trim();
 
     if (!name) {
       continue;
@@ -291,6 +337,8 @@ async function updateWatsonVoices() {
       voice: {
         name,
       },
+      id: name,
+      language,
     };
   }
 
@@ -456,10 +504,26 @@ async function updatePollyVoices() {
     }
 
     const key = id.toLowerCase();
+    const languageCodes = [];
+
+    if (voice.LanguageCode) {
+      languageCodes.push(voice.LanguageCode);
+    }
+
+    if (
+      voice.AdditionalLanguageCodes &&
+      Array.isArray(voice.AdditionalLanguageCodes)
+    ) {
+      languageCodes.push(...voice.AdditionalLanguageCodes);
+    }
+
     const entry = {
       voice: {
         name: id,
       },
+      id,
+      displayName: voice.Name || id,
+      languages: languageCodes,
     };
 
     allVoices[key] = entry;
-Original file line number
+Diff line change
@@ Expand Up / @@ -27,3 +27,6 @@ dist.browser/ @@
     # Sample
     .sample
+    # Environment variables
+    .env