Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions livekit-agents/livekit/agents/tts/tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -1192,11 +1192,17 @@ async def _decode_task() -> None:
audio_decoder.push(data)
elif decode_atask:
if isinstance(data, AudioEmitter._FlushSegment):
if audio_decoder:
audio_decoder.end_input()
await decode_atask
_flush_frame()
audio_decoder = None
# don't tear the decoder down here. flush_if_delayed
# can fire mid-stream while a stateful codec
# (WAV/OGG/MP3) is in the middle of a file; ending
# input would discard the parser, and the next
# bytes — a pure PCM/Opus packet continuation
# without a fresh container header — would fail to
# parse against a freshly-created decoder. The only
# purpose of FlushSegment here is to release the
# held-back tail so a slow upstream doesn't starve
# the consumer.
_flush_frame()

elif isinstance(data, AudioEmitter._EndSegment) and segment_ctx:
if audio_decoder:
Expand Down
5 changes: 5 additions & 0 deletions livekit-agents/livekit/agents/utils/codecs/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,11 @@ def _decode_loop(self) -> None:
for f in resampler.resample(None):
self._emit_av_frame(f)

except av.error.EOFError:
# Input ended before FFmpeg could parse enough data — typically caused
# by early cancellation, a very short utterance, or a premature upstream
# close. The container has nothing to emit; this is not an error.
logger.debug("audio decoder input ended before any audio could be decoded")
except Exception:
logger.exception("error decoding audio")
finally:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,16 @@
- `FISH_API_KEY` for authentication (required)
"""

from fish_audio_sdk.schemas import Backends

from livekit.agents import Plugin

from .log import logger
from .models import LatencyMode, OutputFormat
from .models import LatencyMode, OutputFormat, TTSModels
from .tts import TTS
from .version import __version__

__all__ = [
"TTS",
"Backends",
"TTSModels",
"OutputFormat",
"LatencyMode",
"__version__",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import Literal

OutputFormat = Literal["wav", "pcm", "mp3"]
TTSModels = Literal["s1", "s2-pro"]

LatencyMode = Literal["normal", "balanced"]
OutputFormat = Literal["wav", "pcm", "mp3", "opus"]

LatencyMode = Literal["normal", "balanced", "low"]
Loading
Loading