Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## [4.20.0]

- Add `on_low_language_confidence` property to `LanguageDetectionOptions`
> Controls behavior when language confidence is below threshold. Either "error" (default) or "fallback".
> When set to "fallback", the transcription will use the fallback language instead of erroring when confidence is low.

## [4.8.0]

- Add `multichannel` property to `TranscriptParams`
Expand Down
14 changes: 7 additions & 7 deletions docs/compat.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ If you do use an older version of Node.js like version 16, you'll need to polyfi
To make the SDK compatible with the browser, the SDK aims to use web standards as much as possible.
However, there are still incompatibilities between Node.js and the browser.

- `StreamingTranscriber` doesn't support the AssemblyAI API key in the browser.
Instead, you have to generate a temporary auth token using `client.streaming.createTemporaryToken`, and pass in the resulting token to the streaming transcriber.
- `RealtimeTranscriber` doesn't support the AssemblyAI API key in the browser.
Instead, you have to generate a temporary auth token using `client.realtime.createTemporaryToken`, and pass in the resulting token to the real-time transcriber.

Generate a temporary auth token on the server.

Expand All @@ -23,24 +23,24 @@ However, there are still incompatibilities between Node.js and the browser.
// Ideally, to avoid embedding your API key client side,
// you generate this token on the server, and pass it to the client via an API.
const client = new AssemblyAI({ apiKey: "YOUR_API_KEY" });
const token = await client.streaming.createTemporaryToken({ expires_in_seconds: 60 });
const token = await client.realtime.createTemporaryToken({ expires_in = 480 });
```

> [!NOTE]
> We recommend generating the token on the server, so you don't embed your AssemblyAI API key in your client app.
> If you embed the API key on the client, everyone can see it and use it for themselves.

Then pass the token via an API to the client.
On the client, create an instance of `StreamingTranscriber` using the token.
On the client, create an instance of `RealtimeTranscriber` using the token.

```js
import { StreamingTranscriber } from "assemblyai";
import { RealtimeTranscriber } from "assemblyai";
// or the following if you're using UMD
// const { StreamingTranscriber } = assemblyai;
// const { RealtimeTranscriber } = assemblyai;

const token = getToken(); // getToken is a function for you to implement

const rt = new StreamingTranscriber({
const rt = new RealtimeTranscriber({
token: token,
});
```
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "assemblyai",
"version": "4.19.0",
"version": "4.21.0",
"description": "The AssemblyAI JavaScript SDK provides an easy-to-use interface for interacting with the AssemblyAI API, which supports async and real-time transcription, as well as the latest LeMUR models.",
"engines": {
"node": ">=18"
Expand Down
18 changes: 18 additions & 0 deletions src/services/streaming/service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ export class StreamingTranscriber {
);
}

if (this.params.vadThreshold !== undefined) {
searchParams.set("vad_threshold", this.params.vadThreshold.toString());
}

if (this.params.formatTurns) {
searchParams.set("format_turns", this.params.formatTurns.toString());
}
Expand Down Expand Up @@ -128,6 +132,20 @@ export class StreamingTranscriber {
searchParams.set("speech_model", this.params.speechModel.toString());
}

if (this.params.languageDetection !== undefined) {
searchParams.set(
"language_detection",
this.params.languageDetection.toString(),
);
}

if (this.params.inactivityTimeout !== undefined) {
searchParams.set(
"inactivity_timeout",
this.params.inactivityTimeout.toString(),
);
}

url.search = searchParams.toString();

return url;
Expand Down
4 changes: 4 additions & 0 deletions src/types/openapi.generated.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1690,6 +1690,10 @@ export type LanguageDetectionOptions = {
* The confidence threshold for the automatically detected code switching language.
*/
code_switching_confidence_threshold?: number | null;
/**
* Controls behavior when language confidence is below threshold. Either "error" (default) or "fallback".
*/
on_low_language_confidence?: string | null;
};

/**
Expand Down
6 changes: 6 additions & 0 deletions src/types/streaming/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,14 @@ export type StreamingTranscriberParams = {
endOfTurnConfidenceThreshold?: number;
minEndOfTurnSilenceWhenConfident?: number;
maxTurnSilence?: number;
vadThreshold?: number;
formatTurns?: boolean;
filterProfanity?: boolean;
keyterms?: string[];
keytermsPrompt?: string[];
speechModel?: StreamingSpeechModel;
languageDetection?: boolean;
inactivityTimeout?: number;
};

export type StreamingEvents = "open" | "close" | "turn" | "error";
Expand Down Expand Up @@ -54,6 +57,8 @@ export type TurnEvent = {
transcript: string;
end_of_turn_confidence: number;
words: StreamingWord[];
language_code?: string;
language_confidence?: number;
};

export type StreamingWord = {
Expand All @@ -79,6 +84,7 @@ export type StreamingUpdateConfiguration = {
end_of_turn_confidence_threshold?: number;
min_end_of_turn_silence_when_confident?: number;
max_turn_silence?: number;
vad_threshold?: number;
format_turns?: boolean;
};

Expand Down
52 changes: 52 additions & 0 deletions tests/unit/language-detection-options.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -143,4 +143,56 @@ describe("language detection options", () => {
const requestBody = JSON.parse(fetchMock.mock.calls[0][1]?.body as string);
expect(requestBody.language_detection_options).toBe(null);
});

it("should create transcript with on_low_language_confidence set to fallback", async () => {
const languageDetectionOptions: LanguageDetectionOptions = {
fallback_language: "en",
on_low_language_confidence: "fallback",
};

fetchMock.doMockOnceIf(
requestMatches({ url: "/v2/transcript", method: "POST" }),
JSON.stringify({ id: transcriptId, status: "queued" }),
);

const transcript = await assembly.transcripts.submit({
audio_url: remoteAudioURL,
language_detection: true,
language_confidence_threshold: 0.8,
language_detection_options: languageDetectionOptions,
});

expect(transcript.id).toBe(transcriptId);

const requestBody = JSON.parse(fetchMock.mock.calls[0][1]?.body as string);
expect(requestBody.language_confidence_threshold).toBe(0.8);
expect(requestBody.language_detection_options.fallback_language).toBe("en");
expect(requestBody.language_detection_options.on_low_language_confidence).toBe("fallback");
});

it("should create transcript with on_low_language_confidence set to error", async () => {
const languageDetectionOptions: LanguageDetectionOptions = {
fallback_language: "en",
on_low_language_confidence: "error",
};

fetchMock.doMockOnceIf(
requestMatches({ url: "/v2/transcript", method: "POST" }),
JSON.stringify({ id: transcriptId, status: "queued" }),
);

const transcript = await assembly.transcripts.submit({
audio_url: remoteAudioURL,
language_detection: true,
language_confidence_threshold: 0.7,
language_detection_options: languageDetectionOptions,
});

expect(transcript.id).toBe(transcriptId);

const requestBody = JSON.parse(fetchMock.mock.calls[0][1]?.body as string);
expect(requestBody.language_confidence_threshold).toBe(0.7);
expect(requestBody.language_detection_options.fallback_language).toBe("en");
expect(requestBody.language_detection_options.on_low_language_confidence).toBe("error");
});
});
Loading