livekit · theomonnom · Apr 29, 2026 · May 1, 2026 · May 1, 2026 · May 1, 2026
diff --git a/examples/wakeword_agent_dispatch/.env.example b/examples/wakeword_agent_dispatch/.env.example
@@ -0,0 +1,31 @@
+# xAI Grok Voice Agent API (see https://docs.livekit.io/agents/models/realtime/plugins/xai/)
+XAI_API_KEY=
+
+# Optional: voice (Ara, Eve, Leo, Rex, Sal) and model override
+# LIVEKIT_AGENT_XAI_VOICE=Ara
+# LIVEKIT_AGENT_XAI_MODEL=grok-voice-fast-1.0
+
+# LiveKit connection settings
+LIVEKIT_URL=wss://your-project.livekit.cloud
+LIVEKIT_API_KEY=
+LIVEKIT_API_SECRET=
+
+# Example room and named agent dispatch settings
+LIVEKIT_ROOM=wakeword-preconnect
+LIVEKIT_AGENT_NAME=test-agent
+LIVEKIT_AGENT_METADATA=
+
+# Optional PortAudio device indices. Leave blank to use system defaults.
+LIVEKIT_AUDIO_INPUT_DEVICE=
+LIVEKIT_AUDIO_OUTPUT_DEVICE=
+
+# Wake word model settings
+LIVEKIT_WAKEWORD_MODEL=./models/hey_livekit.onnx
+LIVEKIT_WAKEWORD_NAME=hey_livekit
+LIVEKIT_WAKEWORD_THRESHOLD=0.5
+LIVEKIT_WAKEWORD_PREROLL_SECONDS=2.0
+LIVEKIT_PRECONNECT_BUFFER_SECONDS=10.0
+LIVEKIT_AGENT_WAIT_TIMEOUT=30.0
+
+# Debug directory to write preconnect buffer WAV files
+LIVEKIT_PRECONNECT_DEBUG_WAV=./debug
diff --git a/examples/wakeword_agent_dispatch/README.md b/examples/wakeword_agent_dispatch/README.md
@@ -0,0 +1,87 @@
+# Wake Word Agent Dispatch Example
+
+This example keeps a local microphone track published from launch, listens for a wake word on
+that same microphone stream, dispatches a named LiveKit agent, and sends the pre-connect audio
+buffer to the first active agent participant. A companion `agent.py` provides a minimal voice
+assistant that ends its session when the user says `bye bye`.
+
+The example also keeps a short local preroll buffer so the pre-connect payload includes the wake
+word audio spoken before `LocalAudioTrack.start_preconnect_buffer()` could be called.
+
+## Requirements
+
+- Python 3.11 or newer
+- A microphone
+- A LiveKit project with an agent registered using `agent_name`
+- A wake word ONNX model, such as `hey_livekit.onnx`
+
+Install the example dependencies from this directory:
+
+```bash
+cd examples/wakeword_agent_dispatch
+uv sync --python 3.11
+```
+
+On macOS, `sounddevice` may require PortAudio:
+
+```bash
+brew install portaudio
+```
+
+On Ubuntu or Debian:
+
+```bash
+sudo apt install portaudio19-dev
+```
+
+## Configuration
+
+Set your LiveKit credentials and point the example at your wake word model:
+
+```bash
+export LIVEKIT_URL="wss://your-project.livekit.cloud"
+export LIVEKIT_API_KEY="..."
+export LIVEKIT_API_SECRET="..."
+export LIVEKIT_ROOM="wakeword-preconnect"
+export LIVEKIT_AGENT_NAME="test-agent"
+export LIVEKIT_WAKEWORD_MODEL="./models/hey_livekit.onnx"
+```
+
+Optional settings:
+
+```bash
+export LIVEKIT_WAKEWORD_NAME="hey_livekit"
+export LIVEKIT_WAKEWORD_THRESHOLD="0.5"
+export LIVEKIT_WAKEWORD_PREROLL_SECONDS="2.0"
+export LIVEKIT_PRECONNECT_BUFFER_SECONDS="10.0"
+export LIVEKIT_AGENT_METADATA=""
+export LIVEKIT_AGENT_WAIT_TIMEOUT="30.0"
+export LIVEKIT_AGENT_JOIN_DELAY_SECONDS="2.0"
+export LIVEKIT_AGENT_STT_MODEL="deepgram/nova-3"
+export LIVEKIT_AGENT_LLM_MODEL="openai/gpt-4o-mini"
+export LIVEKIT_AGENT_TTS_MODEL="cartesia/sonic-2"
+```
+
+## Run
+
+Start the named agent in one process:
+
+```bash
+uv run python agent.py dev
+```
+
+Then start the wake word client in another process:
+
+```bash
+uv run python wakeword_agent_dispatch.py
+```
+
+The script connects to LiveKit, publishes the microphone track immediately, and waits for the
+wake word. After detection, it dispatches `LIVEKIT_AGENT_NAME`, waits for an active agent
+participant, sends the buffered wake word audio to that participant, and disables wake word
+detection while the agent is active. When the agent ends its session, for example after the user
+says `bye bye`, the client clears the pre-connect buffer and local audio state before returning
+to idle wake word detection.
+
+For testing the pre-connect buffer, `agent.py` waits `LIVEKIT_AGENT_JOIN_DELAY_SECONDS` seconds
+before starting the room session. Set it to `0` to remove the artificial delay.
diff --git a/examples/wakeword_agent_dispatch/agent.py b/examples/wakeword_agent_dispatch/agent.py
@@ -0,0 +1,104 @@
+from __future__ import annotations
+
+import asyncio
+import logging
+import os
+
+from dotenv import find_dotenv, load_dotenv
+from livekit.agents import (
+    Agent,
+    AgentSession,
+    JobContext,
+    RunContext,
+    WorkerOptions,
+    cli,
+    function_tool,
+    room_io,
+)
+from livekit.plugins import xai
+
+logger = logging.getLogger("wakeword-agent")
+
+
+def _xai_realtime_model() -> xai.realtime.RealtimeModel:
+    """Grok Voice Agent API (OpenAI Realtime–compatible) end-to-end voice model."""
+    voice = os.getenv("LIVEKIT_AGENT_XAI_VOICE", "Ara")
+    model = os.getenv("LIVEKIT_AGENT_XAI_MODEL", "").strip()
+    if model:
+        return xai.realtime.RealtimeModel(model=model, voice=voice)
+    return xai.realtime.RealtimeModel(voice=voice)
+
+
+class BasicAssistant(Agent):
+    def __init__(self) -> None:
+        super().__init__(
+            instructions=(
+                "You are a brief, friendly voice assistant. Answer in one or two "
+                "sentences unless the user asks for more detail. When the user says "
+                "'bye livekit' or otherwise clearly asks to end the conversation, call "
+                "the end_session tool immediately."
+            ),
+            llm=_xai_realtime_model(),
+        )
+
+    @function_tool(name="end_session")
+    async def end_session(self, ctx: RunContext) -> None:
+        """
+        End the current voice session when the user says "bye livekit".
+
+        This is the final action for the conversation. Use it only when the user
+        clearly says "bye livekit" or otherwise asks to end the session.
+        """
+        logger.info("ending agent session after user goodbye")
+        ctx.session.say("Bye, see you next time.", allow_interruptions=False)
+        ctx.session.shutdown()
+
+
+async def entrypoint(ctx: JobContext) -> None:
+    session = AgentSession()
+    closed = asyncio.get_running_loop().create_future()
+    pre_connect_audio_timeout = float(
+        os.getenv("LIVEKIT_AGENT_PRECONNECT_AUDIO_TIMEOUT_SECONDS", "10.0")
+    )
+
+    @session.on("close")
+    def _on_close(_) -> None:
+        if not closed.done():
+            closed.set_result(None)
+
+    @session.on("user_input_transcribed")
+    def _on_user_input_transcribed(ev) -> None:
+        logger.info(
+            "user transcript%s: %s",
+            " final" if ev.is_final else "",
+            ev.transcript or "<empty>",
+        )
+
+    @session.on("conversation_item_added")
+    def _on_conversation_item_added(ev) -> None:
+        logger.info("conversation item added: %s", ev.item)
+
+    await session.start(
+        agent=BasicAssistant(),
+        room=ctx.room,
+        room_options=room_io.RoomOptions(
+            audio_input=room_io.AudioInputOptions(
+                pre_connect_audio=True,
+                pre_connect_audio_timeout=pre_connect_audio_timeout,
+            ),
+        ),
+    )
+    await closed
+    ctx.shutdown("agent session ended")
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    load_dotenv(find_dotenv())
+
+    cli.run_app(
+        WorkerOptions(
+            entrypoint_fnc=entrypoint,
+            agent_name=os.getenv("LIVEKIT_AGENT_NAME", "test-agent"),
+        )
+    )