diff --git a/application/single_app/app.py b/application/single_app/app.py index 53f2ff5c..cd04ff67 100644 --- a/application/single_app/app.py +++ b/application/single_app/app.py @@ -4,6 +4,20 @@ import pickle import json import os +import sys + +# Fix Windows encoding issue with Unicode characters (emojis, IPA symbols, etc.) +# Must be done before any print statements that might contain Unicode +if sys.platform == 'win32': + try: + # Reconfigure stdout and stderr to use UTF-8 encoding + sys.stdout.reconfigure(encoding='utf-8') + sys.stderr.reconfigure(encoding='utf-8') + except AttributeError: + # Python < 3.7 doesn't have reconfigure, try alternative + import codecs + sys.stdout = codecs.getwriter('utf-8')(sys.stdout.buffer, 'strict') + sys.stderr = codecs.getwriter('utf-8')(sys.stderr.buffer, 'strict') import app_settings_cache from config import * diff --git a/application/single_app/config.py b/application/single_app/config.py index 402bc9fb..21eeb639 100644 --- a/application/single_app/config.py +++ b/application/single_app/config.py @@ -88,7 +88,7 @@ EXECUTOR_TYPE = 'thread' EXECUTOR_MAX_WORKERS = 30 SESSION_TYPE = 'filesystem' -VERSION = "0.237.005" +VERSION = "0.237.006" SECRET_KEY = os.getenv('SECRET_KEY', 'dev-secret-key-change-in-production') diff --git a/application/single_app/static/js/chat/chat-sidebar-conversations.js b/application/single_app/static/js/chat/chat-sidebar-conversations.js index cb77ea50..d7908551 100644 --- a/application/single_app/static/js/chat/chat-sidebar-conversations.js +++ b/application/single_app/static/js/chat/chat-sidebar-conversations.js @@ -143,15 +143,23 @@ function createSidebarConversationItem(convo) { const originalTitleElement = headerRow ? headerRow.querySelector('.sidebar-conversation-title') : null; if (headerRow && dropdownElement && originalTitleElement) { + // Verify the dropdown is actually a child of headerRow before attempting manipulation + if (!headerRow.contains(dropdownElement)) { + console.error('Dropdown element is not a child of headerRow', { headerRow, dropdownElement }); + return convoItem; + } + const titleWrapper = document.createElement('div'); titleWrapper.classList.add('sidebar-conversation-header', 'd-flex', 'align-items-center', 'flex-grow-1', 'overflow-hidden', 'gap-2'); - // Insert the wrapper before the dropdown first - headerRow.insertBefore(titleWrapper, dropdownElement); + // Remove the original title from headerRow + originalTitleElement.remove(); - // Now move the title element into the wrapper + // Add styling to title originalTitleElement.classList.add('flex-grow-1', 'text-truncate'); originalTitleElement.style.minWidth = '0'; + + // Add title to wrapper titleWrapper.appendChild(originalTitleElement); const isGroupConversation = (convo.chat_type && convo.chat_type.startsWith('group')) || groupName; @@ -162,6 +170,9 @@ function createSidebarConversationItem(convo) { badge.title = groupName ? `Group conversation: ${groupName}` : 'Group conversation'; titleWrapper.appendChild(badge); } + + // Insert the wrapper before the dropdown + headerRow.insertBefore(titleWrapper, dropdownElement); } // Add double-click editing to title diff --git a/docs/explanation/fixes/v0.236.012/DISPLAY_WINDOWS_ENCODING_FIX.md b/docs/explanation/fixes/v0.236.012/DISPLAY_WINDOWS_ENCODING_FIX.md new file mode 100644 index 00000000..504e6813 --- /dev/null +++ b/docs/explanation/fixes/v0.236.012/DISPLAY_WINDOWS_ENCODING_FIX.md @@ -0,0 +1,364 @@ +# Windows Unicode Encoding Issue Report + +## Issue Summary + +**Purpose:** This document reports a critical Unicode encoding issue on Windows and provides recommended solutions. + +This fix addresses a critical cross-platform compatibility issue where the application fails on Windows when processing or displaying Unicode characters beyond the Western European character set. The issue manifests in multiple areas including video transcript processing, chat history display, and any logging or output containing emojis, special symbols, or international characters. + +### Broader Context + +Python applications running on Windows face a fundamental encoding mismatch: +- **Windows Default:** Python uses `cp1252` (Windows-1252) encoding for stdout/stderr, which only supports Western European characters +- **Modern Web Applications:** Use UTF-8 encoding universally for international text, emojis, and special symbols +- **Azure Services:** Return data in UTF-8 format (Video Indexer transcripts, AI responses, user-generated content) + +This mismatch causes the application to crash whenever it attempts to log, print, or display Unicode characters that exist outside the limited `cp1252` character set. + +### Impact Scope + +This fix resolves Unicode encoding errors in: +- โœ… **Video transcripts** with IPA phonetic symbols (e.g., สˆ U+02C8) +- โœ… **Chat messages** containing emojis (e.g., โœ… U+2705, ๐Ÿ” U+1F50D) +- โœ… **User-generated content** with international characters (Chinese, Arabic, Hindi, etc.) +- โœ… **Agent responses** with formatting characters and symbols +- โœ… **Debug logging** across the entire application +- โœ… **Error messages** and stack traces containing Unicode + +## Common Error Messages + +### Video Processing +``` +Error: Processing failed: 'charmap' codec can't encode character '\u02c8' in position 228: character maps to +``` + +### Chat History Display +``` +UnicodeEncodeError: 'charmap' codec can't encode character '\u2705' in position 156: character maps to +``` + +### General Pattern +``` +UnicodeEncodeError: 'charmap' codec can't encode character '\uXXXX' +``` + +## Environment + +- **Platform:** Windows 10/11 (Issue does not occur on Linux/macOS) +- **Python Version:** 3.x +- **Default stdout encoding:** `cp1252` (charmap) on Windows +- **Required encoding:** `UTF-8` for modern web applications +- **Components Affected:** All areas of the application that output text to console/logs +- **Fixed in Version:** 0.236.013 (function-level), 0.236.014 (global fix) + +## Root Cause + +### The Windows Encoding Problem + +**Core Issue:** Python on Windows defaults to `cp1252` encoding for stdout/stderr, while modern web applications and cloud services universally use UTF-8. + +### Technical Details + +1. **Platform Encoding Defaults:** + - **Windows:** `cp1252` (Code Page 1252) - supports only 256 characters (Western European) + - **Linux/macOS:** `UTF-8` - supports 1,112,064 characters (all Unicode) + - **Web/Cloud Services:** UTF-8 standard for all modern APIs + +2. **Why This Causes Crashes:** + - Azure services (Video Indexer, OpenAI, etc.) return UTF-8 encoded data + - Application processes this data correctly in memory + - When Python attempts to `print()` or log this data on Windows: + - Python tries to encode Unicode โ†’ `cp1252` + - Characters outside `cp1252` range (emojis, IPA symbols, etc.) โ†’ encoding fails + - Python raises `UnicodeEncodeError` and crashes + +3. **Common Unicode Characters That Fail on Windows:** + - **IPA Phonetic Symbols:** สˆ (U+02C8), ษ™ (U+0259), ษ‘ (U+0251) - common in Video Indexer transcripts + - **Emojis:** โœ… (U+2705), ๐Ÿ” (U+1F50D), ๐Ÿ’ฌ (U+1F4AC) - used in chat and UI + - **Box Drawing:** โ”€ (U+2500), โ”‚ (U+2502), โ”Œ (U+250C) - used in tables and formatting + - **International Text:** Chinese, Arabic, Hindi, Emoji flags, etc. + +4. **Example Failure Points:** + - Video transcript logging: `print(insights_json, flush=True)` + - Chat history display: `print(f"Messages: {chat_data}")` + - Agent responses with emojis + - Debug logging throughout the application + +5. **Platform-specific behavior:** + - โœ… **Linux/macOS:** Default UTF-8 encoding โ†’ handles all Unicode โ†’ **works perfectly** + - โŒ **Windows:** Default cp1252 encoding โ†’ limited character set โ†’ **crashes on Unicode** + +## Steps to Reproduce + +### Video Processing Scenario +1. Deploy application on Windows +2. Upload a video file to group workspace that contains speech +3. Wait for Video Indexer to process the video +4. Transcript contains Unicode phonetic characters (common in pronunciation guides, non-English speech) +5. Application crashes with `UnicodeEncodeError` when logging transcript + +### Chat History Scenario +1. Deploy application on Windows +2. Use chat feature with messages containing emojis or special characters +3. Access chat history or conversation details +4. Application crashes when attempting to display messages with Unicode characters + +### General Pattern +Any operation that logs, prints, or displays Unicode characters beyond ASCII on Windows will trigger the error. + +## Expected Behavior + +- Video should upload successfully +- Transcript data should be logged to console for debugging +- Unicode characters should be displayed or safely handled +- Processing should complete and save video chunks to search index + +## Actual Behavior + +- Video upload fails with encoding error +- Processing stops at the JSON logging stage +- Video is not indexed for chat/search +- Error appears in UI: `"Error: Processing failed: 'charmap' codec can't encode character..."` + +## Impact + +- **Severity:** High - Application crashes on Windows for common operations +- **Frequency:** Occurs whenever Unicode characters appear in logs/output on Windows +- **Affected Areas:** + - Video processing and transcript logging + - Chat history with emojis or international text + - Agent responses with Unicode formatting + - Debug logging across entire application + - Error messages and stack traces +- **Affected Users:** All Windows deployments (Linux/macOS unaffected) +- **Workaround:** None (requires code change) +- **Data Loss:** + - Videos not indexed for search + - Chat functionality breaks on Unicode content + - Application state inconsistent due to crashes + +## Recommended Fix Implementation + +**Note:** The following are recommended solutions to resolve this Unicode encoding issue on Windows. + +### Global Fix (Recommended - Version 0.236.014) + +**File:** `app.py` +**Location:** Top of file (before any imports or print statements) +**Lines:** 7-21 + +Add these lines at the very beginning of `app.py` to fix encoding for the entire application: + +```python +# Fix Windows encoding issue - configure UTF-8 BEFORE any print statements or imports +import sys +if sys.platform == 'win32': + # For Python 3.7+ + try: + sys.stdout.reconfigure(encoding='utf-8') + sys.stderr.reconfigure(encoding='utf-8') + except AttributeError: + # For Python < 3.7, use codecs module + import codecs + sys.stdout = codecs.getwriter('utf-8')(sys.stdout.buffer, 'strict') + sys.stderr = codecs.getwriter('utf-8')(sys.stderr.buffer, 'strict') +``` + +**Benefits:** +- โœ… Fixes Unicode errors throughout the entire application +- โœ… Handles video transcripts with phonetic symbols (e.g., สˆ U+02C8) +- โœ… Handles emojis in chat history (e.g., โœ… U+2705) +- โœ… One-time fix at startup rather than per-function +- โœ… Compatible with Python 3.6+ through fallback mechanism + +### Function-Level Fix (Alternative) + +**File:** `functions_documents.py` +**Function:** `process_video_document()` +**Lines:** 334-341 + +If you prefer a more targeted fix, add these lines at the beginning of the function: + +```python +# Fix Windows encoding issue with Unicode characters in video transcripts +import sys +if sys.platform == 'win32': + try: + sys.stdout.reconfigure(encoding='utf-8') + except AttributeError: + # Python < 3.7 doesn't have reconfigure + pass +``` + +**Note:** The global fix is recommended as it prevents Unicode encoding errors across the entire application, not just video processing. + +--- + +## Important Considerations and Best Practices + +### โš ๏ธ What This Fix Does (and Doesn't) Cover + +**โœ… What the code fix handles:** +- Console output via `print()` statements +- Application logging to stdout/stderr +- Unhandled exception tracebacks +- Debug output during development + +**โŒ What this fix does NOT cover:** +- File I/O operations - you must still explicitly specify encoding +- Database operations (already handled by database drivers) +- HTTP/API responses (handled by Flask/web frameworks) + +**Important:** When writing to files, always specify UTF-8 encoding explicitly: + +```python +# โŒ WRONG - still uses cp1252 on Windows +with open("log.txt", "w") as f: + f.write(data) + +# โœ… CORRECT - explicitly use UTF-8 +with open("log.txt", "w", encoding="utf-8") as f: + f.write(data) +``` + +--- + +### ๐Ÿฅ‡ Preferred Solution: Environment-Level UTF-8 (Python 3.7+) + +**Best approach if you control the deployment environment:** + +Set the `PYTHONUTF8` environment variable to enable UTF-8 mode globally: + +**Windows PowerShell:** +```powershell +$env:PYTHONUTF8 = "1" +``` + +**Windows CMD:** +```cmd +set PYTHONUTF8=1 +``` + +**Permanently (Windows):** +```powershell +setx PYTHONUTF8 1 +``` + +**Linux/macOS (.bashrc or .zshrc):** +```bash +export PYTHONUTF8=1 +``` + +**Docker/Container:** +```dockerfile +ENV PYTHONUTF8=1 +``` + +**Azure App Service (Application Settings):** +- Add application setting: `PYTHONUTF8` = `1` + +**Benefits:** +- โœ… Affects all Python encoding operations (console, files, etc.) +- โœ… No code changes required +- โœ… Officially recommended by Python +- โœ… Works for all Python scripts in the environment +- โœ… Cleaner and more maintainable than code-level fixes + +--- + +### ๐Ÿ”ง Alternative: More Robust Code Implementation + +For better error handling and broader compatibility, use this enhanced version: + +```python +# Fix Windows encoding issue - configure UTF-8 BEFORE any print statements or imports +import sys + +def force_utf8_encoding(): + """Force UTF-8 encoding for stdout/stderr on Windows.""" + if sys.platform == 'win32': + for stream in (sys.stdout, sys.stderr): + if stream is not None: + try: + stream.reconfigure(encoding='utf-8') + except (AttributeError, OSError): + # Fallback for Python < 3.7 or when stream doesn't support reconfigure + try: + import codecs + if stream == sys.stdout: + sys.stdout = codecs.getwriter('utf-8')(stream.buffer, 'strict') + elif stream == sys.stderr: + sys.stderr = codecs.getwriter('utf-8')(stream.buffer, 'strict') + except Exception: + # Silently fail if we can't reconfigure + pass + +force_utf8_encoding() +``` + +**Advantages of this version:** +- โœ… Handles None streams (redirected output) +- โœ… More defensive error handling +- โœ… Works when streams are redirected +- โœ… Gracefully degrades if reconfiguration fails + +--- + +### ๐Ÿ“‹ Recommended Approach (Ranked) + +1. **๐Ÿฅ‡ Best:** Set `PYTHONUTF8=1` environment variable + - Use when you control the deployment environment + - Cleanest and most comprehensive solution + +2. **๐Ÿฅˆ Very Good:** System-wide UTF-8 (Windows 11+) + - Enable "Use Unicode UTF-8 for worldwide language support" in Windows settings + - System-wide fix but requires reboot + - May affect legacy applications + +3. **๐Ÿฅ‰ Good:** Code-level fix (current approach) + - Use when you can't control the environment + - Essential for distributed libraries/applications + - Works but only fixes console output + +--- + +### โœ… Validation and Testing + +After applying any fix, validate it works: + +```python +# Test script - save as test_encoding.py +import sys + +print(f"Platform: {sys.platform}") +print(f"stdout encoding: {sys.stdout.encoding}") +print(f"stderr encoding: {sys.stderr.encoding}") +print("\nTesting Unicode characters:") +print("IPA Phonetic: สˆ ษ™ ษ‘") +print("Emojis: โœ… ๐Ÿ” ๐Ÿ’ฌ") +print("Box Drawing: โ”€ โ”‚ โ”Œ") +print("International: ไฝ ๅฅฝ ู…ุฑุญุจุง เคจเคฎเคธเฅเคคเฅ‡") +``` + +Expected output on Windows after fix: +``` +Platform: win32 +stdout encoding: utf-8 +stderr encoding: utf-8 + +Testing Unicode characters: +IPA Phonetic: สˆ ษ™ ษ‘ +Emojis: โœ… ๐Ÿ” ๐Ÿ’ฌ +Box Drawing: โ”€ โ”‚ โ”Œ +International: ไฝ ๅฅฝ ู…ุฑุญุจุง เคจเคฎเคธเฅเคคเฅ‡ +``` + +--- + +### ๐ŸŽฏ Summary + +โœ… **The code fix is valid and effective** for console output +โœ… **It solves the immediate Unicode logging crashes** +โš ๏ธ **It should be paired with `PYTHONUTF8=1` when possible** +โš ๏ธ **Still specify `encoding="utf-8"` for all file operations** +โญ **Best practice: Use environment variable for comprehensive solution** \ No newline at end of file diff --git a/docs/explanation/release_notes.md b/docs/explanation/release_notes.md index 02aafcd1..541d0ae0 100644 --- a/docs/explanation/release_notes.md +++ b/docs/explanation/release_notes.md @@ -1,6 +1,47 @@ # Feature Release +### **(v0.237.006)** + +#### Bug Fixes + +* **Windows Unicode Encoding Issue Fix** + * Fixed critical cross-platform compatibility issue where the application crashes on Windows when processing or displaying Unicode characters beyond the Western European character set. + * **Root Cause**: Python on Windows uses cp1252 encoding for stdout/stderr (limited to 256 Western European characters), while Azure services and web applications use UTF-8 encoding universally (1.1M+ characters). This mismatch caused `UnicodeEncodeError: 'charmap' codec can't encode character '\uXXXX'` when logging or displaying emojis, international characters, IPA symbols, or special formatting. + * **Impact**: Application crashes affecting: + * Video transcripts with phonetic symbols + * Chat messages containing emojis or international text + * Agent responses with Unicode formatting + * Debug logging across the entire application + * Error messages and stack traces + * **Solution**: Configured UTF-8 encoding globally at application startup for Windows platforms by reconfiguring `sys.stdout` and `sys.stderr` to UTF-8 at the top of `app.py` before any imports or print statements. Includes fallback for older Python versions (<3.7). Platform-specific fix only applies on Windows. + * **Testing**: Verified with video processing (IPA phonetic symbols), chat messages (emojis/international characters), debug logging (Unicode content), and confirmed no impact on Linux/macOS deployments. + * **Issue**: Fixes [#644](https://github.com/microsoft/simplechat/issues/644) + * (Ref: `app.py`, UTF-8 encoding configuration, cross-platform compatibility) + +* **Azure Speech Service Managed Identity Authentication Fix** + * Fixed Azure Speech Service managed identity authentication requiring resource-specific endpoints with custom subdomains instead of regional endpoints. + * **Root Cause**: Managed identity (AAD token) authentication fails with regional endpoints (e.g., `https://eastus2.api.cognitive.microsoft.com`) because the Bearer token doesn't specify which Speech resource to access. The regional gateway cannot determine resource authorization, resulting in 400 BadRequest errors. Key-based authentication works with regional endpoints because the subscription key identifies the specific resource. + * **Impact**: Users could not use managed identity authentication with Speech Service for audio transcription. Setup appeared successful but failed at runtime with authentication errors. + * **Solution**: Comprehensive setup guide for managed identity requiring: + * **Custom Subdomain**: Enable custom subdomain on Speech resource using `az cognitiveservices account update --custom-domain ` + * **Resource-Specific Endpoint**: Configure endpoint as `https://.cognitiveservices.azure.com` (not regional endpoint) + * **RBAC Roles**: Assign `Cognitive Services Speech User` and `Cognitive Services Speech Contributor` roles to App Service managed identity + * **Admin Settings**: Update Speech Service Endpoint to resource-specific URL, set Authentication Type to "Managed Identity", leave Speech Service Key empty + * **Key Differences**: + * Key auth โœ… works with both regional and resource-specific endpoints + * Managed Identity โŒ fails with regional endpoints (400 BadRequest) + * Managed Identity โœ… works with resource-specific endpoints (requires custom subdomain) + * **Troubleshooting Guide**: Added comprehensive troubleshooting for `NameResolutionError` (custom subdomain not enabled), 400 BadRequest (wrong endpoint type), 401 Authentication errors (missing RBAC roles). + * (Ref: Azure Speech Service, managed identity authentication, custom subdomain, RBAC configuration, endpoint types) + +* **Sidebar Conversations DOM Manipulation Fix** + * Fixed JavaScript error "Failed to execute 'insertBefore' on 'Node': The node before which the new node is to be inserted is not a child of this node" that prevented sidebar conversations from loading. + * **Root Cause**: In `createSidebarConversationItem()`, the code was attempting DOM manipulation in the wrong order. When `originalTitleElement` was appended to `titleWrapper`, it was removed from `headerRow`, making the subsequent `insertBefore(titleWrapper, dropdownElement)` fail because `dropdownElement` was no longer a valid child reference in the expected DOM position. + * **Impact**: Users experienced a complete failure loading the sidebar conversation list, with the error appearing in browser console and preventing any conversations from displaying in the sidebar. This affected all users attempting to view their conversation history. + * **Solution**: Reordered DOM manipulation to remove `originalTitleElement` from DOM first, style it, add it to `titleWrapper`, then insert the complete `titleWrapper` before `dropdownElement`. Added validation to check if `dropdownElement` is a valid child before attempting insertion. + * (Ref: `chat-sidebar-conversations.js`, `createSidebarConversationItem()`, DOM manipulation order, line 150) + ### **(v0.237.005)** #### Bug Fixes diff --git a/docs/how-to/azure_speech_managed_identity_manul_setup.md b/docs/how-to/azure_speech_managed_identity_manul_setup.md new file mode 100644 index 00000000..bf1b6e74 --- /dev/null +++ b/docs/how-to/azure_speech_managed_identity_manul_setup.md @@ -0,0 +1,261 @@ +# Azure Speech Service with Managed Identity Manual Setup + +## Overview + +This guide explains the critical difference between key-based and managed identity authentication when configuring Azure Speech Service, and the required steps to enable managed identity properly. + +## Authentication Methods: Regional vs. Resource-Specific Endpoints + +### Regional Endpoint (Shared Gateway) + +**Endpoint format**: `https://.api.cognitive.microsoft.com` +- Example: `https://eastus2.api.cognitive.microsoft.com` +- This is a **shared endpoint** for all Speech resources in that Azure region +- Acts as a gateway that routes requests to individual Speech resources + +### Resource-Specific Endpoint (Custom Subdomain) + +**Endpoint format**: `https://.cognitiveservices.azure.com` +- Example: `https://simplechat6-dev-speech.cognitiveservices.azure.com` +- This is a **unique endpoint** dedicated to your specific Speech resource +- Requires custom subdomain to be enabled on the resource + +--- + +## Why Regional Endpoint Works with Key but NOT Managed Identity + +### Key-Based Authentication โœ… Works with Regional Endpoint + +When using subscription key authentication: + +```http +POST https://eastus2.api.cognitive.microsoft.com/speechtotext/transcriptions:transcribe +Headers: + Ocp-Apim-Subscription-Key: abc123def456... +``` + +**Why it works:** +1. The subscription key **directly identifies** your specific Speech resource +2. The regional gateway uses the key to look up which resource it belongs to +3. The request is automatically routed to your resource +4. Authorization succeeds because the key proves ownership + +### Managed Identity (AAD Token) โŒ Fails with Regional Endpoint + +When using managed identity authentication: + +```http +POST https://eastus2.api.cognitive.microsoft.com/speechtotext/transcriptions:transcribe +Headers: + Authorization: Bearer eyJ0eXAiOiJKV1QiLCJhbGc... +``` + +**Why it fails (returns 400 BadRequest):** +1. The Bearer token proves your App Service identity to Azure AD +2. The token does NOT specify which Speech resource you want to access +3. The regional gateway cannot determine: + - Which specific Speech resource you're authorized for + - Whether your managed identity has RBAC roles on that resource +4. **Result**: The gateway rejects the request with 400 BadRequest + +### Managed Identity โœ… Works with Resource-Specific Endpoint + +When using managed identity with custom subdomain: + +```http +POST https://simplechat6-dev-speech.cognitiveservices.azure.com/speechtotext/transcriptions:transcribe +Headers: + Authorization: Bearer eyJ0eXAiOiJKV1QiLCJhbGc... +``` + +**Why it works:** +1. The hostname **itself identifies** your specific Speech resource +2. Azure validates your managed identity Bearer token against that resource's RBAC +3. If your App Service MI has `Cognitive Services Speech User` role โ†’ authorized +4. The request proceeds to your dedicated Speech resource instance + +--- + +## Required Setup for Managed Identity + +### Prerequisites + +1. **Azure Speech Service resource** created in your subscription +2. **System-assigned or user-assigned managed identity** on your App Service +3. **RBAC role assignments** on the Speech resource + +### Step 1: Enable Custom Subdomain on Speech Resource + +**Why needed**: By default, Speech resources use the regional endpoint and do NOT have custom subdomains. Managed identity requires the resource-specific endpoint. + +**How to enable**: + +```bash +az cognitiveservices account update \ + --name \ + --resource-group \ + --custom-domain +``` + +**Example**: + +```bash +az cognitiveservices account update \ + --name simplechat6-dev-speech \ + --resource-group sc-simplechat6-dev-rg \ + --custom-domain simplechat6-dev-speech +``` + +**Important notes**: +- Custom subdomain name must be **globally unique** across Azure +- Usually use the same name as your resource: `` +- **One-way operation**: Cannot be disabled once enabled +- After enabling, the resource's endpoint property changes from regional to resource-specific + +**Verify custom subdomain is enabled**: + +```bash +az cognitiveservices account show \ + --name \ + --resource-group \ + --query "{customSubDomainName:properties.customSubDomainName, endpoint:properties.endpoint}" +``` + +Expected output: +```json +{ + "customSubDomainName": "simplechat6-dev-speech", + "endpoint": "https://simplechat6-dev-speech.cognitiveservices.azure.com/" +} +``` + +### Step 2: Assign RBAC Roles to Managed Identity + +Grant your App Service managed identity the necessary roles on the Speech resource: + +```bash +# Get the Speech resource ID +SPEECH_RESOURCE_ID=$(az cognitiveservices account show \ + --name \ + --resource-group \ + --query id -o tsv) + +# Get the App Service managed identity principal ID +MI_PRINCIPAL_ID=$(az webapp identity show \ + --name \ + --resource-group \ + --query principalId -o tsv) + +# Assign Cognitive Services Speech User role (data-plane read access) +az role assignment create \ + --assignee $MI_PRINCIPAL_ID \ + --role "Cognitive Services Speech User" \ + --scope $SPEECH_RESOURCE_ID + +# Assign Cognitive Services Speech Contributor role (if needed for write operations) +az role assignment create \ + --assignee $MI_PRINCIPAL_ID \ + --role "Cognitive Services Speech Contributor" \ + --scope $SPEECH_RESOURCE_ID +``` + +**Verify role assignments**: + +```bash +az role assignment list \ + --assignee $MI_PRINCIPAL_ID \ + --scope $SPEECH_RESOURCE_ID \ + -o table +``` + +### Step 3: Configure Admin Settings + +In the Admin Settings โ†’ Search & Extract โ†’ Multimedia Support section: + +| Setting | Value | Example | +|---------|-------|---------| +| **Enable Audio File Support** | โœ… Checked | | +| **Speech Service Endpoint** | Resource-specific endpoint (with custom subdomain) | `https://simplechat6-dev-speech.cognitiveservices.azure.com` | +| **Speech Service Location** | Azure region | `eastus2` | +| **Speech Service Locale** | Language locale for transcription | `en-US` | +| **Authentication Type** | Managed Identity | | +| **Speech Service Key** | (Leave empty when using MI) | | + +**Critical**: +- Endpoint must be the resource-specific URL (custom subdomain) +- Do NOT use the regional endpoint for managed identity +- Remove trailing slash from endpoint: โœ… `https://..azure.com` โŒ `https://..azure.com/` + +### Step 4: Test Audio Upload + +1. Upload a short WAV or MP3 file +2. Monitor application logs for transcription progress +3. Expected log output: + ``` + File size: 1677804 bytes + Produced 1 WAV chunks: ['/tmp/tmp_chunk_000.wav'] + [Debug] Transcribing WAV chunk: /tmp/tmp_chunk_000.wav + [Debug] Speech config obtained successfully + [Debug] Received 5 phrases + Creating 3 transcript pages + ``` + +--- + +## Troubleshooting + +### Error: NameResolutionError - Failed to resolve hostname + +**Symptom**: `Failed to resolve 'simplechat6-dev-speech.cognitiveservices.azure.com'` + +**Cause**: Custom subdomain not enabled on Speech resource + +**Solution**: Enable custom subdomain using Step 1 above + +### Error: 400 BadRequest when using MI with regional endpoint + +**Symptom**: `400 Client Error: BadRequest for url: https://eastus2.api.cognitive.microsoft.com/speechtotext/transcriptions:transcribe` + +**Cause**: Managed identity requires resource-specific endpoint, not regional + +**Solution**: Update Admin Settings endpoint to use `https://.cognitiveservices.azure.com` + +### Error: 401 Authentication error with MI + +**Symptom**: `WebSocket upgrade failed: Authentication error (401)` + +**Cause**: Missing RBAC role assignments + +**Solution**: Assign required roles using Step 2 above + +### Key auth works but MI fails + +**Diagnosis checklist**: +- [ ] Custom subdomain enabled on Speech resource? +- [ ] Admin Settings endpoint is resource-specific (not regional)? +- [ ] Managed identity has RBAC roles on Speech resource? +- [ ] Authentication Type set to "Managed Identity" in Admin Settings? + +--- + +## Summary + +| Authentication Method | Endpoint Type | Example | Works? | +|----------------------|---------------|---------|--------| +| **Key** | Regional | `https://eastus2.api.cognitive.microsoft.com` | โœ… Yes | +| **Key** | Resource-specific | `https://simplechat6-dev-speech.cognitiveservices.azure.com` | โœ… Yes | +| **Managed Identity** | Regional | `https://eastus2.api.cognitive.microsoft.com` | โŒ No (400 BadRequest) | +| **Managed Identity** | Resource-specific | `https://simplechat6-dev-speech.cognitiveservices.azure.com` | โœ… Yes (with custom subdomain) | + +**Key takeaway**: Managed identity for Azure Cognitive Services data-plane operations requires: +1. Custom subdomain enabled on the resource +2. Resource-specific endpoint configured in your application +3. RBAC roles assigned to the managed identity at the resource scope + +--- + +## References + +- [Azure Cognitive Services custom subdomain documentation](https://learn.microsoft.com/azure/cognitive-services/cognitive-services-custom-subdomains) +- [Authenticate with Azure AD using managed identity](https://learn.microsoft.com/azure/cognitive-services/authentication?tabs=powershell#authenticate-with-azure-active-directory) +- [Azure Speech Service authentication](https://learn.microsoft.com/azure/ai-services/speech-service/rest-speech-to-text-short)