AssemblyAI · MAsuamah · Dec 24, 2025 · Dec 23, 2025 · Dec 24, 2025 · Dec 24, 2025
diff --git a/fern/pages/01-getting-started/transcribe-streaming-audio.mdx b/fern/pages/01-getting-started/transcribe-streaming-audio.mdx
@@ -59,13 +59,6 @@ def on_begin(self: Type[StreamingClient], event: BeginEvent):
 def on_turn(self: Type[StreamingClient], event: TurnEvent):
     print(f"{event.transcript} ({event.end_of_turn})")
 
-    if event.end_of_turn and not event.turn_is_formatted:
-        params = StreamingSessionParameters(
-            format_turns=True,
-        )
-
-        self.set_params(params)
-
 
 def on_terminated(self: Type[StreamingClient], event: TerminationEvent):
     print(
@@ -1126,13 +1119,6 @@ def on_begin(self: Type[StreamingClient], event: BeginEvent):
 def on_turn(self: Type[StreamingClient], event: TurnEvent):
     print(f"{event.transcript} ({event.end_of_turn})")
 
-    if event.end_of_turn and not event.turn_is_formatted:
-        params = StreamingSessionParameters(
-            format_turns=True,
-        )
-
-        self.set_params(params)
-
 
 def on_terminated(self: Type[StreamingClient], event: TerminationEvent):
     print(

diff --git a/fern/pages/02-speech-to-text/universal-streaming/multilingual.mdx b/fern/pages/02-speech-to-text/universal-streaming/multilingual.mdx
@@ -19,7 +19,7 @@ Multilingual currently supports: English, Spanish, French, German, Italian, and
 
 ## Language detection
 
-The multilingual streaming model supports automatic language detection, allowing you to identify which language is being spoken in real-time. When enabled, the model returns the detected language code and confidence score with each complete utterance.
+The multilingual streaming model supports automatic language detection, allowing you to identify which language is being spoken in real-time. When enabled, the model returns the detected language code and confidence score with each complete utterance and final turn.
 
 ### Configuration
 

diff --git a/fern/pages/02-speech-to-text/universal-streaming/universal-streaming.mdx b/fern/pages/02-speech-to-text/universal-streaming/universal-streaming.mdx
@@ -169,13 +169,6 @@ print(f"Session started: {event.id}")
 def on_turn(self: Type[StreamingClient], event: TurnEvent):
 print(f"{event.transcript} ({event.end_of_turn})")
 
-    if event.end_of_turn and not event.turn_is_formatted:
-        params = StreamingSessionParameters(
-            format_turns=True,
-        )
-
-        self.set_params(params)
-
 def on_terminated(self: Type[StreamingClient], event: TerminationEvent):
 print(
 f"Session terminated: {event.audio_duration_seconds} seconds of audio processed"

diff --git a/llm-gateway.yml b/llm-gateway.yml
@@ -81,7 +81,7 @@ paths:
                       translation:
                         target_languages: ["es", "de"]
                         formal: true
-                        # match_original_utterance: true
+                        match_original_utterance: true
               speaker_identification_example:
                 summary: Speaker identification request
                 value:
@@ -101,7 +101,6 @@ paths:
                         date: "mm/dd/yyyy"
                         phone_number: "(xxx)xxx-xxxx"
                         email: "username@domain.com"
-                        format_utterances: true
       responses:
         "200":
           description: Successful response containing the speech understanding results.
@@ -112,6 +111,66 @@ paths:
                   - $ref: "#/components/schemas/LLMGatewayTranslationResponse"
                   - $ref: "#/components/schemas/LLMGatewaySpeakerIdentificationResponse"
                   - $ref: "#/components/schemas/LLMGatewayCustomFormattingResponse"
+              examples:
+                complete_response:
+                  summary: Complete response example
+                  value:
+                    speech_understanding:
+                      request:
+                        translation:
+                          target_languages: ["es", "de"]
+                          formal: true
+                          match_original_utterance: true
+                        speaker_identification:
+                          speaker_type: "role"
+                          known_values: ["interviewer", "candidate"]
+                        custom_formatting:
+                          date: "mm/dd/yyyy"
+                          phone_number: "(xxx)xxx-xxxx"
+                          email: "username@domain.com"
+                      response:
+                        translation:
+                          status: "completed"
+                        speaker_identification:
+                          status: "completed"
+                        custom_formatting:
+                          status: "completed"
+                          mapping:
+                            "2024-12-25": "12/25/2024"
+                            "555-1234-5678": "(555)123-45678"
+                          formatted_text: "Call me at (555)123-45678 on 12/25/2024"
+                          formatted_utterances:
+                            - confidence: 0.92
+                              start: 0
+                              end: 2500
+                              text: "Hi, I'm the interviewer. Call me at (555)123-45678 on 12/25/2024"
+                              speaker: "interviewer"
+                            - confidence: 0.95
+                              start: 2500
+                              end: 5000
+                              text: "Thanks! I'll reach out then."
+                              speaker: "candidate"
+                    translated_texts:
+                      es: "Hola, soy el entrevistador. Llámame al cinco cinco cinco uno dos tres cuatro cinco seis siete ocho el veinticinco de diciembre de dos mil veinticuatro. ¡Gracias! Me pondré en contacto entonces."
+                      de: "Hallo, ich bin der Interviewer. Rufen Sie mich an unter fünf fünf fünf eins zwei drei vier fünf sechs sieben acht am fünfundzwanzigsten Dezember zweitausendvierundzwanzig. Danke! Ich werde mich dann melden."
+                    utterances:
+                      - confidence: 0.92
+                        start: 0
+                        end: 2500
+                        text: "Hi, I'm the interviewer. Call me at five five five one two three four five six seven eight on December twenty fifth twenty twenty four"
+                        speaker: "interviewer"
+                        translated_texts:
+                          es: "Hola, soy el entrevistador. Llámame al cinco cinco cinco uno dos tres cuatro cinco seis siete ocho el veinticinco de diciembre de dos mil veinticuatro"
+                          de: "Hallo, ich bin der Interviewer. Rufen Sie mich an unter fünf fünf fünf eins zwei drei vier fünf sechs sieben acht am fünfundzwanzigsten Dezember zweitausendvierundzwanzig"
+                      - confidence: 0.95
+                        start: 2500
+                        end: 5000
+                        text: "Thanks! I'll reach out then."
+                        speaker: "candidate"
+                        translated_texts:
+                          es: "¡Gracias! Me pondré en contacto entonces."
+                          de: "Danke! Ich werde mich dann melden."
+                    words: []
         default:
           description: An unexpected error occurred.
           content:
@@ -414,10 +473,10 @@ components:
               type: boolean
               description: Use formal language style
               default: true
-            # match_original_utterance:
-            #   type: boolean
-            #   description: Get translated utterances (if speaker_labels was enabled)
-            #   default: false
+            match_original_utterance:
+              type: boolean
+              description: When enabled with Speaker Labels, returns translated text in the utterances array. Each utterance will include a `translated_texts` key containing translations for each target language.
+              default: false
           required:
             - target_languages
       required:
@@ -458,10 +517,6 @@ components:
             email:
               type: string
               description: Email format pattern (e.g., `"username@domain.com"`)
-            format_utterances:
-              type: boolean
-              description: Whether to format utterances
-              default: true
       required:
         - custom_formatting
 
@@ -484,12 +539,20 @@ components:
         translated_texts:
           type: object
           description: 'Translated text keyed by language code (e.g., `{"es": "Texto traducido"}`)'
-          # Unsure how to display dynamic keys.
+          additionalProperties:
+            type: string
 
         utterances:
           type: array
+          description: Array of utterances with translations (when `match_original_utterance` is true)
           items:
             type: object
+            properties:
+              translated_texts:
+                type: object
+                description: 'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "Übersetzter Text"}`). Only present when `match_original_utterance` is enabled.'
+                additionalProperties:
+                  type: string
         words:
           type: array
           items:
@@ -544,14 +607,13 @@ components:
                       type: string
                     formatted_utterances:
                       type: array
+                      description: Array of formatted utterances. Only included when utterances exist and formatting was applied.
                       items:
                         type: object
                     status:
                       type: string
                   required:
                     - mapping
-                    - formatted_text
-                    - formatted_utterances
                     - status
         utterances:
           type: array

diff --git a/openapi.yml b/openapi.yml
@@ -1296,7 +1296,7 @@ components:
               x-label: Code switching confidence threshold
               description: |
                 The confidence threshold for code switching detection. If the code switching confidence is below this threshold, the transcript will be processed in the language with the highest `language_detection_confidence` score.
-              type: float
+              type: number
               minimum: 0
               maximum: 1
               default: 0.3
@@ -1634,7 +1634,7 @@ components:
             {
               request:
                 {
-                  translation: { target_languages: ["es", "de"], formal: true },
+                  translation: { target_languages: ["es", "de"], formal: true, match_original_utterance: true },
                 },
             },
         }
@@ -1698,7 +1698,7 @@ components:
             {
               request:
                 {
-                  translation: { target_languages: ["es", "de"], formal: true },
+                  translation: { target_languages: ["es", "de"], formal: true, match_original_utterance: true },
                 },
             },
         }
@@ -1822,6 +1822,12 @@ components:
           x-label: Speaker
           description: The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.
           type: string
+        translated_texts:
+          x-label: Translated texts
+          description: 'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "Übersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
+          type: object
+          additionalProperties:
+            type: string
       required:
         - confidence
         - start
@@ -2719,7 +2725,7 @@ components:
               x-label: Code switching confidence threshold
               description: |
                 The confidence threshold for code switching detection. If the code switching confidence is below this threshold, the transcript will be processed in the language with the highest `language_detection_confidence` score.
-              type: float
+              type: number
               minimum: 0
               maximum: 1
               default: 0.3
@@ -6077,10 +6083,10 @@ components:
               type: boolean
               description: Use formal language style
               default: true
-            # match_original_utterance:
-            #   type: boolean
-            #   description: Get translated utterances (if speaker_labels was enabled)
-            #   default: false
+            match_original_utterance:
+              type: boolean
+              description: When enabled with Speaker Labels, returns translated text in the utterances array. Each utterance will include a `translated_texts` key containing translations for each target language.
+              default: false
           required:
             - target_languages
       required:
@@ -6121,10 +6127,6 @@ components:
             email:
               type: string
               description: Email format pattern (e.g., `"username@domain.com"`)
-            format_utterances:
-              type: boolean
-              description: Whether to format utterances
-              default: true
       required:
         - custom_formatting
 

diff --git a/usm-streaming.yml b/usm-streaming.yml
@@ -80,7 +80,7 @@ channels:
             type: string
 
       language_detection:
-        description: Whether to detect the language of the audio stream
+        description: Whether to detect the language and return language metadata on utterances and final turns. Only available for the multilingual model.
         location: $message.payload#/language_detection
         enum: ["true", "false"]
         default: "false"
@@ -121,6 +121,11 @@ channels:
         description: API token for authentication
         location: $message.payload#/token
 
+      vad_threshold:
+        description: The confidence threshold (0.0 to 1.0) for classifying audio frames as silence. Frames with VAD confidence below this value are considered silent. Increase for noisy environments to reduce false speech detection.
+        location: $message.payload#/vad_threshold
+        default: "0.4"
+
       ApiKey:
         description: >-
           Use your API key for authentication, or alternatively generate a [temporary token](/docs/api-reference/streaming-api/generate-streaming-token) and pass it via the `token` query parameter.