Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions examples/wakeword_agent_dispatch/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# xAI Grok Voice Agent API (see https://docs.livekit.io/agents/models/realtime/plugins/xai/)
XAI_API_KEY=

# Optional: voice (Ara, Eve, Leo, Rex, Sal) and model override
# LIVEKIT_AGENT_XAI_VOICE=Ara
# LIVEKIT_AGENT_XAI_MODEL=grok-voice-fast-1.0

# LiveKit connection settings
LIVEKIT_URL=wss://your-project.livekit.cloud
LIVEKIT_API_KEY=
LIVEKIT_API_SECRET=

# Example room and named agent dispatch settings
LIVEKIT_ROOM=wakeword-preconnect
LIVEKIT_AGENT_NAME=test-agent
LIVEKIT_AGENT_METADATA=

# Optional PortAudio device indices. Leave blank to use system defaults.
LIVEKIT_AUDIO_INPUT_DEVICE=
LIVEKIT_AUDIO_OUTPUT_DEVICE=

# Wake word model settings
LIVEKIT_WAKEWORD_MODEL=./models/hey_livekit.onnx
LIVEKIT_WAKEWORD_NAME=hey_livekit
LIVEKIT_WAKEWORD_THRESHOLD=0.5
LIVEKIT_WAKEWORD_PREROLL_SECONDS=2.0
LIVEKIT_PRECONNECT_BUFFER_SECONDS=10.0
LIVEKIT_AGENT_WAIT_TIMEOUT=30.0

# Debug directory to write preconnect buffer WAV files
LIVEKIT_PRECONNECT_DEBUG_WAV=./debug
87 changes: 87 additions & 0 deletions examples/wakeword_agent_dispatch/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# Wake Word Agent Dispatch Example

This example keeps a local microphone track published from launch, listens for a wake word on
that same microphone stream, dispatches a named LiveKit agent, and sends the pre-connect audio
buffer to the first active agent participant. A companion `agent.py` provides a minimal voice
assistant that ends its session when the user says `bye bye`.

The example also keeps a short local preroll buffer so the pre-connect payload includes the wake
word audio spoken before `LocalAudioTrack.start_preconnect_buffer()` could be called.

## Requirements

- Python 3.11 or newer
- A microphone
- A LiveKit project with an agent registered using `agent_name`
- A wake word ONNX model, such as `hey_livekit.onnx`

Install the example dependencies from this directory:

```bash
cd examples/wakeword_agent_dispatch
uv sync --python 3.11
```

On macOS, `sounddevice` may require PortAudio:

```bash
brew install portaudio
```

On Ubuntu or Debian:

```bash
sudo apt install portaudio19-dev
```

## Configuration

Set your LiveKit credentials and point the example at your wake word model:

```bash
export LIVEKIT_URL="wss://your-project.livekit.cloud"
export LIVEKIT_API_KEY="..."
export LIVEKIT_API_SECRET="..."
export LIVEKIT_ROOM="wakeword-preconnect"
export LIVEKIT_AGENT_NAME="test-agent"
export LIVEKIT_WAKEWORD_MODEL="./models/hey_livekit.onnx"
```

Optional settings:

```bash
export LIVEKIT_WAKEWORD_NAME="hey_livekit"
export LIVEKIT_WAKEWORD_THRESHOLD="0.5"
export LIVEKIT_WAKEWORD_PREROLL_SECONDS="2.0"
export LIVEKIT_PRECONNECT_BUFFER_SECONDS="10.0"
export LIVEKIT_AGENT_METADATA=""
export LIVEKIT_AGENT_WAIT_TIMEOUT="30.0"
export LIVEKIT_AGENT_JOIN_DELAY_SECONDS="2.0"
export LIVEKIT_AGENT_STT_MODEL="deepgram/nova-3"
export LIVEKIT_AGENT_LLM_MODEL="openai/gpt-4o-mini"
export LIVEKIT_AGENT_TTS_MODEL="cartesia/sonic-2"
```

## Run

Start the named agent in one process:

```bash
uv run python agent.py dev
```

Then start the wake word client in another process:

```bash
uv run python wakeword_agent_dispatch.py
```

The script connects to LiveKit, publishes the microphone track immediately, and waits for the
wake word. After detection, it dispatches `LIVEKIT_AGENT_NAME`, waits for an active agent
participant, sends the buffered wake word audio to that participant, and disables wake word
detection while the agent is active. When the agent ends its session, for example after the user
says `bye bye`, the client clears the pre-connect buffer and local audio state before returning
to idle wake word detection.

For testing the pre-connect buffer, `agent.py` waits `LIVEKIT_AGENT_JOIN_DELAY_SECONDS` seconds
before starting the room session. Set it to `0` to remove the artificial delay.
104 changes: 104 additions & 0 deletions examples/wakeword_agent_dispatch/agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
from __future__ import annotations

import asyncio
import logging
import os

from dotenv import find_dotenv, load_dotenv
from livekit.agents import (
Agent,
AgentSession,
JobContext,
RunContext,
WorkerOptions,
cli,
function_tool,
room_io,
)
from livekit.plugins import xai

logger = logging.getLogger("wakeword-agent")


def _xai_realtime_model() -> xai.realtime.RealtimeModel:
"""Grok Voice Agent API (OpenAI Realtime–compatible) end-to-end voice model."""
voice = os.getenv("LIVEKIT_AGENT_XAI_VOICE", "Ara")
model = os.getenv("LIVEKIT_AGENT_XAI_MODEL", "").strip()
if model:
return xai.realtime.RealtimeModel(model=model, voice=voice)
return xai.realtime.RealtimeModel(voice=voice)


class BasicAssistant(Agent):
def __init__(self) -> None:
super().__init__(
instructions=(
"You are a brief, friendly voice assistant. Answer in one or two "
"sentences unless the user asks for more detail. When the user says "
"'bye livekit' or otherwise clearly asks to end the conversation, call "
"the end_session tool immediately."
),
llm=_xai_realtime_model(),
)

@function_tool(name="end_session")
async def end_session(self, ctx: RunContext) -> None:
"""
End the current voice session when the user says "bye livekit".

This is the final action for the conversation. Use it only when the user
clearly says "bye livekit" or otherwise asks to end the session.
"""
logger.info("ending agent session after user goodbye")
ctx.session.say("Bye, see you next time.", allow_interruptions=False)
ctx.session.shutdown()


async def entrypoint(ctx: JobContext) -> None:
session = AgentSession()
closed = asyncio.get_running_loop().create_future()
pre_connect_audio_timeout = float(
os.getenv("LIVEKIT_AGENT_PRECONNECT_AUDIO_TIMEOUT_SECONDS", "10.0")
)

@session.on("close")
def _on_close(_) -> None:
if not closed.done():
closed.set_result(None)

@session.on("user_input_transcribed")
def _on_user_input_transcribed(ev) -> None:
logger.info(
"user transcript%s: %s",
" final" if ev.is_final else "",
ev.transcript or "<empty>",
)

@session.on("conversation_item_added")
def _on_conversation_item_added(ev) -> None:
logger.info("conversation item added: %s", ev.item)

await session.start(
agent=BasicAssistant(),
room=ctx.room,
room_options=room_io.RoomOptions(
audio_input=room_io.AudioInputOptions(
pre_connect_audio=True,
pre_connect_audio_timeout=pre_connect_audio_timeout,
),
),
)
await closed
ctx.shutdown("agent session ended")


if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
load_dotenv(find_dotenv())

cli.run_app(
WorkerOptions(
entrypoint_fnc=entrypoint,
agent_name=os.getenv("LIVEKIT_AGENT_NAME", "test-agent"),
)
)
Loading
Loading