Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/voice_agents/email_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ async def register_for_event(self, context: RunContext):
"Start the registration process for the event."

email_result = await beta.workflows.GetEmailTask(
instructions=beta.workflows.InstructionParts(
instructions=beta.workflows.WorkflowInstructions(
persona=(
"You are capturing the email address of the user for the event registration. "
"You are only a single step in a broader system responsible solely for capturing an email address."
Expand Down
2 changes: 2 additions & 0 deletions livekit-agents/livekit/agents/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
AgentSession,
AgentStateChangedEvent,
AgentTask,
AudioRecognition,
CloseEvent,
CloseReason,
ConversationItemAddedEvent,
Expand Down Expand Up @@ -183,6 +184,7 @@ def __getattr__(name: str) -> typing.Any:
"RunContext",
"Plugin",
"AgentSession",
"AudioRecognition",
"RecordingOptions",
"text_transforms",
"AgentEvent",
Expand Down
4 changes: 2 additions & 2 deletions livekit-agents/livekit/agents/beta/workflows/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from .name import GetNameResult, GetNameTask
from .phone_number import GetPhoneNumberResult, GetPhoneNumberTask
from .task_group import TaskCompletedEvent, TaskGroup, TaskGroupResult
from .utils import InstructionParts
from .utils import WorkflowInstructions
from .warm_transfer import WarmTransferResult, WarmTransferTask

__all__ = [
Expand All @@ -18,7 +18,7 @@
"GetDOBResult",
"GetDOBTask",
"GetDtmfResult",
"InstructionParts",
"WorkflowInstructions",
"GetCreditCardResult",
"GetCreditCardTask",
"GetNameTask",
Expand Down
17 changes: 8 additions & 9 deletions livekit-agents/livekit/agents/beta/workflows/address.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from ...utils import is_given
from ...voice.agent import AgentTask
from ...voice.events import RunContext
from .utils import InstructionParts
from .utils import WorkflowInstructions

if TYPE_CHECKING:
from ...voice.turn import TurnDetectionMode
Expand All @@ -26,7 +26,7 @@ class GetAddressTask(AgentTask[GetAddressResult]):
def __init__(
self,
*,
instructions: NotGivenOr[InstructionParts | Instructions | str] = NOT_GIVEN,
instructions: NotGivenOr[WorkflowInstructions | Instructions | str] = NOT_GIVEN,
chat_ctx: NotGivenOr[llm.ChatContext] = NOT_GIVEN,
turn_detection: NotGivenOr[TurnDetectionMode | None] = NOT_GIVEN,
tools: NotGivenOr[list[llm.Tool | llm.Toolset]] = NOT_GIVEN,
Expand All @@ -40,23 +40,22 @@ def __init__(
extra_instructions: str = "",
) -> None:
if not is_given(instructions):
instructions = InstructionParts(persona=PERSONA, extra=extra_instructions)
instructions = WorkflowInstructions(persona=PERSONA, extra=extra_instructions)
elif extra_instructions:
logger.warning("`extra_instructions` will be ignored when `instructions` is provided")

if isinstance(instructions, InstructionParts):
instructions = Instructions(INSTRUCTIONS_TEMPLATE).format(
persona=instructions.persona if is_given(instructions.persona) else PERSONA,
extra=instructions.extra,
if isinstance(instructions, WorkflowInstructions):
instructions = instructions.resolve(
template=INSTRUCTIONS_TEMPLATE,
default_persona=PERSONA,
_modality_specific=Instructions(audio=AUDIO_SPECIFIC, text=TEXT_SPECIFIC),
_confirmation=Instructions(
# confirmation is enabled by default for audio, disabled by default for text
audio=CONFIRMATION_INSTRUCTION if require_confirmation is not False else "",
text=CONFIRMATION_INSTRUCTION if require_confirmation is True else "",
),
)

assert is_given(instructions) # for type checking
assert isinstance(instructions, (str, Instructions)) # for type checking
super().__init__(
instructions=instructions,
chat_ctx=chat_ctx,
Expand Down
17 changes: 8 additions & 9 deletions livekit-agents/livekit/agents/beta/workflows/email_address.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from ...utils import is_given
from ...voice.agent import AgentTask
from ...voice.events import RunContext
from .utils import InstructionParts
from .utils import WorkflowInstructions

if TYPE_CHECKING:
from ...voice.turn import TurnDetectionMode
Expand All @@ -27,7 +27,7 @@ class GetEmailTask(AgentTask[GetEmailResult]):
def __init__(
self,
*,
instructions: NotGivenOr[InstructionParts | Instructions | str] = NOT_GIVEN,
instructions: NotGivenOr[WorkflowInstructions | Instructions | str] = NOT_GIVEN,
chat_ctx: NotGivenOr[llm.ChatContext] = NOT_GIVEN,
turn_detection: NotGivenOr[TurnDetectionMode | None] = NOT_GIVEN,
tools: NotGivenOr[list[llm.Tool | llm.Toolset]] = NOT_GIVEN,
Expand All @@ -41,23 +41,22 @@ def __init__(
extra_instructions: str = "",
) -> None:
if not is_given(instructions):
instructions = InstructionParts(persona=PERSONA, extra=extra_instructions)
instructions = WorkflowInstructions(persona=PERSONA, extra=extra_instructions)
elif extra_instructions:
logger.warning("`extra_instructions` will be ignored when `instructions` is provided")

if isinstance(instructions, InstructionParts):
instructions = Instructions(INSTRUCTIONS_TEMPLATE).format(
persona=instructions.persona if is_given(instructions.persona) else PERSONA,
extra=instructions.extra,
if isinstance(instructions, WorkflowInstructions):
instructions = instructions.resolve(
template=INSTRUCTIONS_TEMPLATE,
default_persona=PERSONA,
_modality_specific=Instructions(audio=AUDIO_SPECIFIC, text=TEXT_SPECIFIC),
_confirmation=Instructions(
# confirmation is enabled by default for audio, disabled by default for text
audio=CONFIRMATION_INSTRUCTION if require_confirmation is not False else "",
text=CONFIRMATION_INSTRUCTION if require_confirmation is True else "",
),
)

assert is_given(instructions) # for type checking
assert isinstance(instructions, (str, Instructions)) # for type checking
super().__init__(
instructions=instructions,
chat_ctx=chat_ctx,
Expand Down
42 changes: 32 additions & 10 deletions livekit-agents/livekit/agents/beta/workflows/utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from __future__ import annotations

from dataclasses import dataclass
from enum import Enum
from typing import Any

from ...llm.chat_context import Instructions
from ...types import NOT_GIVEN, NotGivenOr
from ...utils import is_given


class DtmfEvent(str, Enum):
Expand Down Expand Up @@ -44,19 +45,40 @@ def format_dtmf(events: list[DtmfEvent]) -> str:
return " ".join(event.value for event in events)


@dataclass
class InstructionParts:
class WorkflowInstructions(Instructions):
"""Customizable instruction sections for built-in workflow tasks.

Extends :class:`Instructions` with ``persona`` and ``extra`` fields
that workflow tasks resolve against their own templates and defaults.

Each field overrides that section when set; leave as ``NOT_GIVEN`` to
preserve the workflow's built-in default. Set to ``""`` to remove a
section entirely.

Args:
persona: Agent persona/identity — who the agent is and how it behaves.
extra: Extra instructions appended to the prompt. The simplest hook for
adding domain context without touching defaults.
"""

persona: NotGivenOr[Instructions | str] = NOT_GIVEN
extra: Instructions | str = ""
def __init__(
self,
audio: str = "",
*,
text: str | None = None,
persona: NotGivenOr[Instructions | str] = NOT_GIVEN,
extra: Instructions | str = "",
) -> None:
super().__init__(audio, text=text)
self.persona: NotGivenOr[Instructions | str] = persona
self.extra: Instructions | str = extra

def resolve(
self,
*,
template: str,
default_persona: str,
**format_kwargs: Any,
) -> Instructions:
"""Resolve into a final :class:`Instructions` by formatting the template."""
return Instructions.resolve_template(
template,
persona=self.persona if is_given(self.persona) else default_persona,
extra=self.extra,
**format_kwargs,
)
16 changes: 8 additions & 8 deletions livekit-agents/livekit/agents/beta/workflows/warm_transfer.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
BuiltinAudioClip,
PlayHandle,
)
from .utils import InstructionParts
from .utils import WorkflowInstructions

if TYPE_CHECKING:
from ...voice.turn import TurnDetectionMode
Expand All @@ -46,7 +46,7 @@ def __init__(
sip_number: NotGivenOr[str] = NOT_GIVEN,
sip_headers: NotGivenOr[dict[str, str]] = NOT_GIVEN,
hold_audio: NotGivenOr[AudioSource | AudioConfig | list[AudioConfig] | None] = NOT_GIVEN,
instructions: NotGivenOr[InstructionParts | Instructions | str] = NOT_GIVEN,
instructions: NotGivenOr[WorkflowInstructions | Instructions | str] = NOT_GIVEN,
chat_ctx: NotGivenOr[llm.ChatContext] = NOT_GIVEN,
turn_detection: NotGivenOr[TurnDetectionMode | None] = NOT_GIVEN,
tools: NotGivenOr[list[llm.Tool | llm.Toolset]] = NOT_GIVEN,
Expand Down Expand Up @@ -79,19 +79,19 @@ def __init__(
"""

if not is_given(instructions):
instructions = InstructionParts(persona=PERSONA, extra=extra_instructions)
instructions = WorkflowInstructions(persona=PERSONA, extra=extra_instructions)
elif extra_instructions:
logger.warning("`extra_instructions` will be ignored when `instructions` is provided")

if isinstance(instructions, InstructionParts):
if isinstance(instructions, WorkflowInstructions):
conversation_history = self._format_conversation_history(chat_ctx)
instructions = Instructions(INSTRUCTIONS_TEMPLATE).format(
persona=instructions.persona if is_given(instructions.persona) else PERSONA,
extra=instructions.extra,
instructions = instructions.resolve(
template=INSTRUCTIONS_TEMPLATE,
default_persona=PERSONA,
_conversation_history=conversation_history,
)

assert is_given(instructions) # for type checking
assert isinstance(instructions, (str, Instructions)) # for type checking
super().__init__(
instructions=instructions,
chat_ctx=NOT_GIVEN, # don't pass the chat_ctx
Expand Down
2 changes: 1 addition & 1 deletion livekit-agents/livekit/agents/evals/judge.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def _get_latest_instructions(chat_ctx: ChatContext) -> str | None:
"""
for item in reversed(chat_ctx.items):
if item.type == "agent_config_update" and item.instructions:
return item.instructions
return str(item.instructions)
return None


Expand Down
23 changes: 23 additions & 0 deletions livekit-agents/livekit/agents/inference/tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,29 @@ def __init__(
)
self._streams = weakref.WeakSet[SynthesizeStream]()

class Markup(tts.TTS.Markup):
def __init__(self, gateway_tts: TTS) -> None:
super().__init__(gateway_tts)
self._gateway_tts = gateway_tts

def _upstream_provider(self) -> str:
return self._gateway_tts._opts.model.split("/")[0]

def llm_instructions(self) -> str | None:
from ..tts._provider_format import llm_instructions

return llm_instructions(self._upstream_provider())

def to_text(self, text: str) -> str:
from ..tts._provider_format import strip_markup

return strip_markup(self._upstream_provider(), text)

def convert(self, text: str) -> str:
from ..tts._provider_format import convert_markup

return convert_markup(self._upstream_provider(), text)

@classmethod
def from_model_string(cls, model: str) -> TTS:
"""Create a TTS instance from a model string
Expand Down
3 changes: 3 additions & 0 deletions livekit-agents/livekit/agents/llm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
RealtimeSessionReconnectedEvent,
RemoteItemAddedEvent,
)
from .response_field import Response, ResponseField
from .tool_context import (
FunctionTool,
ProviderTool,
Expand Down Expand Up @@ -108,6 +109,8 @@
"RealtimeSessionRestoredEvent",
"LLMError",
"RemoteItemAddedEvent",
"Response",
"ResponseField",
]

# Cleanup docs of unexported modules
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,13 @@ def to_chat_ctx(

if msg.type == "message":
for c in msg.content:
if c and isinstance(c, str):
content.append({"text": c, "type": "text"})
elif isinstance(c, llm.ImageContent):
if isinstance(c, llm.ImageContent):
content.append(_to_image_content(c))
elif isinstance(c, llm.AudioContent):
pass
elif c:
# str or Instructions
content.append({"text": str(c), "type": "text"})
elif msg.type == "function_call":
content.append(
{
Expand Down
9 changes: 6 additions & 3 deletions livekit-agents/livekit/agents/llm/_provider_format/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,13 @@ def to_chat_ctx(

if msg.type == "message":
for content in msg.content:
if content and isinstance(content, str):
current_content.append({"text": content})
elif isinstance(content, llm.ImageContent):
if isinstance(content, llm.ImageContent):
current_content.append(_build_image(content))
elif isinstance(content, llm.AudioContent):
pass
elif content:
# str or Instructions
current_content.append({"text": str(content)})
elif msg.type == "function_call":
current_content.append(
{
Expand Down
11 changes: 7 additions & 4 deletions livekit-agents/livekit/agents/llm/_provider_format/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,15 @@ def to_chat_ctx(

if msg.type == "message":
for content in msg.content:
if content and isinstance(content, str):
parts.append({"text": content})
if isinstance(content, llm.ImageContent):
parts.append(_to_image_part(content))
elif isinstance(content, llm.AudioContent):
pass
elif content and isinstance(content, dict):
parts.append({"text": json.dumps(content)})
elif isinstance(content, llm.ImageContent):
parts.append(_to_image_part(content))
elif content:
# str or Instructions
parts.append({"text": str(content)})
elif msg.type == "function_call":
fc_part: dict[str, Any] = {
"function_call": {
Expand Down
17 changes: 12 additions & 5 deletions livekit-agents/livekit/agents/llm/_provider_format/mistralai.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,11 @@ def to_conversations_ctx(
if group.message:
item = group.message
if isinstance(item, llm.ChatMessage) and item.role in ("system", "developer"):
text_parts = [c for c in item.content if isinstance(c, str)]
text_parts = [
str(c)
for c in item.content
if not isinstance(c, (llm.ImageContent, llm.AudioContent))
]
instructions = "\n".join(text_parts) if text_parts else None
continue

Expand Down Expand Up @@ -83,12 +87,15 @@ def _build_content(msg: llm.ChatMessage) -> str | list[dict[str, Any]]:
text_content = ""

for content in msg.content:
if isinstance(content, str):
if isinstance(content, llm.ImageContent):
list_content.append(_to_image_content(content))
elif isinstance(content, llm.AudioContent):
pass
else:
# str or Instructions
if text_content:
text_content += "\n"
text_content += content
elif isinstance(content, llm.ImageContent):
list_content.append(_to_image_content(content))
text_content += str(content)

if not list_content:
return text_content
Expand Down
Loading
Loading