From c502bac0e3671e74883d7dbb3ea14e8b66aa9bc8 Mon Sep 17 00:00:00 2001 From: Stephen DeRosa Date: Tue, 21 Apr 2026 16:47:10 -0600 Subject: [PATCH 01/15] webrtc-sys: add EncodedVideoTrackSource + PassthroughVideoEncoder Introduce a video track source that accepts pre-encoded frames and a matching WebRTC encoder that forwards them unchanged, bypassing real encoding while preserving RTP, pacing, and congestion control. Per-track routing uses VideoFrame::id() as a side channel plus a global EncodedSourceRegistry. A LazyVideoEncoder picks between the passthrough and the real encoder on the first Encode() call. Single-layer only; callers manage simulcast with multiple sources. --- webrtc-sys/build.rs | 3 + .../include/livekit/encoded_video_source.h | 159 +++++++++ .../livekit/passthrough_video_encoder.h | 122 +++++++ webrtc-sys/src/encoded_video_source.cpp | 224 ++++++++++++ webrtc-sys/src/encoded_video_source.rs | 98 ++++++ webrtc-sys/src/lib.rs | 1 + webrtc-sys/src/passthrough_video_encoder.cpp | 320 ++++++++++++++++++ webrtc-sys/src/video_encoder_factory.cpp | 21 +- 8 files changed, 943 insertions(+), 5 deletions(-) create mode 100644 webrtc-sys/include/livekit/encoded_video_source.h create mode 100644 webrtc-sys/include/livekit/passthrough_video_encoder.h create mode 100644 webrtc-sys/src/encoded_video_source.cpp create mode 100644 webrtc-sys/src/encoded_video_source.rs create mode 100644 webrtc-sys/src/passthrough_video_encoder.cpp diff --git a/webrtc-sys/build.rs b/webrtc-sys/build.rs index 072794ecf..d0e8a40f2 100644 --- a/webrtc-sys/build.rs +++ b/webrtc-sys/build.rs @@ -55,6 +55,7 @@ fn main() { "src/apm.rs", "src/audio_mixer.rs", "src/packet_trailer.rs", + "src/encoded_video_source.rs", ]; if is_desktop { @@ -91,6 +92,8 @@ fn main() { "src/apm.cpp", "src/audio_mixer.cpp", "src/packet_trailer.cpp", + "src/encoded_video_source.cpp", + "src/passthrough_video_encoder.cpp", ]); if is_desktop { diff --git a/webrtc-sys/include/livekit/encoded_video_source.h b/webrtc-sys/include/livekit/encoded_video_source.h new file mode 100644 index 000000000..b74b10a50 --- /dev/null +++ b/webrtc-sys/include/livekit/encoded_video_source.h @@ -0,0 +1,159 @@ +/* + * Copyright 2026 LiveKit, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "api/media_stream_interface.h" +#include "api/scoped_refptr.h" +#include "api/video/video_frame.h" +#include "media/base/adapted_video_track_source.h" +#include "rtc_base/synchronization/mutex.h" +#include "rust/cxx.h" + +namespace livekit_ffi { + +class EncodedVideoTrackSource; +class EncodedVideoSourceWrapper; + +} // namespace livekit_ffi + +#include "webrtc-sys/src/encoded_video_source.rs.h" + +namespace livekit_ffi { + +// Process-global registry that maps a 16-bit source id (stamped on every +// dummy VideoFrame via VideoFrame::set_id) to the owning encoded source. +// +// This is the mechanism the LazyVideoEncoder uses to decide whether to +// instantiate a PassthroughVideoEncoder or a real encoder on the first +// Encode() call. Keying on VideoFrame::id() (rather than codec name) ensures +// per-track routing is correct even when multiple encoded sources share a +// codec. +class EncodedSourceRegistry { + public: + static EncodedSourceRegistry& instance(); + + // Returns a new non-zero u16 id, skipping any id currently in use. + uint16_t allocate_id(); + + void register_source(uint16_t id, EncodedVideoTrackSource* src); + void unregister_source(uint16_t id); + EncodedVideoTrackSource* lookup(uint16_t id); + + private: + EncodedSourceRegistry() = default; + + std::mutex mu_; + std::unordered_map map_; + uint32_t next_id_ = 1; +}; + +// Owns a single encoded video feed. The paired PassthroughVideoEncoder pops +// frames from this source via the registry (looked up by VideoFrame::id()). +class EncodedVideoTrackSource { + public: + class InternalSource : public webrtc::AdaptedVideoTrackSource { + public: + InternalSource(uint16_t source_id, + EncodedVideoCodecType codec, + uint32_t width, + uint32_t height); + ~InternalSource() override; + + bool is_screencast() const override { return false; } + std::optional needs_denoising() const override { return std::nullopt; } + SourceState state() const override { return kLive; } + bool remote() const override { return false; } + + uint16_t source_id() const { return source_id_; } + EncodedVideoCodecType codec() const { return codec_; } + + // Enqueues the encoded bytes and pushes one dummy VideoFrame into the + // WebRTC pipeline so the encoder tick fires. Returns false if the frame + // was dropped because the queue was full and the frame was not a keyframe. + bool push_encoded_frame(std::vector data, + bool is_keyframe, + bool has_sps_pps, + uint32_t width, + uint32_t height, + int64_t capture_time_us); + + struct DequeuedFrame { + std::vector data; + bool is_keyframe = false; + bool has_sps_pps = false; + uint32_t width = 0; + uint32_t height = 0; + int64_t capture_time_us = 0; + }; + bool pop_encoded_frame(DequeuedFrame& out); + + // Wired into PassthroughVideoEncoder::Encode / SetRates so the Rust + // producer can react to PLI/FIR and congestion control. + void notify_keyframe_requested(); + void notify_target_bitrate(uint32_t bitrate_bps, double framerate_fps); + + void set_observer(rust::Box observer); + + private: + const uint16_t source_id_; + const EncodedVideoCodecType codec_; + + mutable webrtc::Mutex mutex_; + std::deque queue_; + uint32_t width_; + uint32_t height_; + std::unique_ptr> observer_; + + static constexpr size_t kMaxQueueSize = 8; + }; + + EncodedVideoTrackSource(EncodedVideoCodecType codec, + uint32_t width, + uint32_t height); + ~EncodedVideoTrackSource(); + + uint16_t source_id() const { return source_->source_id(); } + EncodedVideoCodecType codec() const { return source_->codec(); } + + bool capture_frame(rust::Slice data, + bool is_keyframe, + bool has_sps_pps, + uint32_t width, + uint32_t height, + int64_t capture_time_us) const; + + void set_observer(rust::Box observer) const; + + webrtc::scoped_refptr get() const { return source_; } + + private: + webrtc::scoped_refptr source_; +}; + +std::shared_ptr new_encoded_video_track_source( + EncodedVideoCodecType codec, + uint32_t width, + uint32_t height); + +} // namespace livekit_ffi diff --git a/webrtc-sys/include/livekit/passthrough_video_encoder.h b/webrtc-sys/include/livekit/passthrough_video_encoder.h new file mode 100644 index 000000000..6162b0d56 --- /dev/null +++ b/webrtc-sys/include/livekit/passthrough_video_encoder.h @@ -0,0 +1,122 @@ +/* + * Copyright 2026 LiveKit, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include "api/environment/environment.h" +#include "api/video/video_frame.h" +#include "api/video_codecs/sdp_video_format.h" +#include "api/video_codecs/video_codec.h" +#include "api/video_codecs/video_encoder.h" +#include "api/video_codecs/video_encoder_factory.h" +#include "livekit/encoded_video_source.h" + +namespace livekit_ffi { + +// Encoder that takes pre-encoded bitstream bytes from a paired +// EncodedVideoTrackSource and forwards them unmodified to the +// EncodedImageCallback. Used for applications that already produce H.264 / +// H.265 / VP8 / VP9 / AV1 bitstreams (e.g. from a hardware capturer or a +// remote camera feed) and want to pipe them through WebRTC without +// re-encoding. +class PassthroughVideoEncoder : public webrtc::VideoEncoder { + public: + explicit PassthroughVideoEncoder(EncodedVideoCodecType codec); + ~PassthroughVideoEncoder() override; + + // webrtc::VideoEncoder + int InitEncode(const webrtc::VideoCodec* codec_settings, + const Settings& settings) override; + int32_t RegisterEncodeCompleteCallback( + webrtc::EncodedImageCallback* callback) override; + int32_t Release() override; + int32_t Encode( + const webrtc::VideoFrame& frame, + const std::vector* frame_types) override; + void SetRates(const RateControlParameters& parameters) override; + EncoderInfo GetEncoderInfo() const override; + + private: + const EncodedVideoCodecType codec_; + webrtc::EncodedImageCallback* callback_ = nullptr; + webrtc::VideoCodec codec_settings_{}; + bool initialized_ = false; +}; + +// Wraps a webrtc::VideoEncoder built lazily on the first Encode() call. This +// lets us delay the decision of "passthrough vs. real encoder" until we can +// inspect the incoming VideoFrame::id() and check the EncodedSourceRegistry. +// +// Cost: one registry lookup + one encoder construction on the first frame. +// Subsequent frames are a single virtual call with no extra overhead. +class LazyVideoEncoder : public webrtc::VideoEncoder { + public: + // `real_encoder_builder` is called at most once, the first time Encode() + // receives a frame that does not correspond to an encoded source. + using RealEncoderBuilder = + std::function()>; + + LazyVideoEncoder(webrtc::SdpVideoFormat format, + RealEncoderBuilder real_encoder_builder); + ~LazyVideoEncoder() override; + + int InitEncode(const webrtc::VideoCodec* codec_settings, + const Settings& settings) override; + int32_t RegisterEncodeCompleteCallback( + webrtc::EncodedImageCallback* callback) override; + int32_t Release() override; + int32_t Encode( + const webrtc::VideoFrame& frame, + const std::vector* frame_types) override; + void SetRates(const RateControlParameters& parameters) override; + void OnPacketLossRateUpdate(float packet_loss_rate) override; + void OnRttUpdate(int64_t rtt_ms) override; + void OnLossNotification(const LossNotification& loss_notification) override; + EncoderInfo GetEncoderInfo() const override; + + private: + // Build the underlying encoder based on frame.id() lookup. Returns true on + // success. Safe to call exactly once. + bool BuildInner(uint16_t frame_id); + + const webrtc::SdpVideoFormat format_; + RealEncoderBuilder real_encoder_builder_; + + // Set on first Encode(). + std::unique_ptr inner_; + bool is_passthrough_ = false; + + // Deferred InitEncode() args. + webrtc::VideoCodec pending_codec_settings_{}; + webrtc::VideoEncoder::Settings pending_settings_{ + webrtc::VideoEncoder::Capabilities(/*loss_notification=*/false), + /*number_of_cores=*/1, + /*max_payload_size=*/1200}; + bool has_pending_init_ = false; + webrtc::EncodedImageCallback* callback_ = nullptr; + + // Cached rate / loss / rtt updates that arrived before Encode(). + std::optional pending_rates_; + std::optional pending_loss_rate_; + std::optional pending_rtt_ms_; +}; + +} // namespace livekit_ffi diff --git a/webrtc-sys/src/encoded_video_source.cpp b/webrtc-sys/src/encoded_video_source.cpp new file mode 100644 index 000000000..dff600300 --- /dev/null +++ b/webrtc-sys/src/encoded_video_source.cpp @@ -0,0 +1,224 @@ +/* + * Copyright 2026 LiveKit, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "livekit/encoded_video_source.h" + +#include +#include + +#include "api/video/i420_buffer.h" +#include "api/video/video_frame.h" +#include "api/video/video_rotation.h" +#include "rtc_base/logging.h" +#include "rtc_base/ref_counted_object.h" +#include "rtc_base/time_utils.h" + +namespace livekit_ffi { + +// ---------- EncodedSourceRegistry ---------- + +EncodedSourceRegistry& EncodedSourceRegistry::instance() { + static EncodedSourceRegistry reg; + return reg; +} + +uint16_t EncodedSourceRegistry::allocate_id() { + std::lock_guard lock(mu_); + // Skip kNotSetId (0) and any id currently mapped. With 65535 usable slots + // and short-lived encoded tracks this loop is effectively O(1). + for (uint32_t probe = 0; probe < 0x10000u; ++probe) { + uint16_t candidate = static_cast(next_id_); + next_id_ = next_id_ + 1; + if (next_id_ > 0xFFFFu) { + next_id_ = 1; + } + if (candidate == 0) continue; + if (map_.find(candidate) == map_.end()) { + return candidate; + } + } + RTC_LOG(LS_ERROR) + << "EncodedSourceRegistry exhausted all 65535 slots; reusing 1"; + return 1; +} + +void EncodedSourceRegistry::register_source(uint16_t id, + EncodedVideoTrackSource* src) { + std::lock_guard lock(mu_); + map_[id] = src; +} + +void EncodedSourceRegistry::unregister_source(uint16_t id) { + std::lock_guard lock(mu_); + map_.erase(id); +} + +EncodedVideoTrackSource* EncodedSourceRegistry::lookup(uint16_t id) { + if (id == 0) return nullptr; + std::lock_guard lock(mu_); + auto it = map_.find(id); + return it == map_.end() ? nullptr : it->second; +} + +// ---------- EncodedVideoTrackSource::InternalSource ---------- + +EncodedVideoTrackSource::InternalSource::InternalSource( + uint16_t source_id, + EncodedVideoCodecType codec, + uint32_t width, + uint32_t height) + : webrtc::AdaptedVideoTrackSource(/*required_alignment=*/1), + source_id_(source_id), + codec_(codec), + width_(width), + height_(height) {} + +EncodedVideoTrackSource::InternalSource::~InternalSource() = default; + +bool EncodedVideoTrackSource::InternalSource::push_encoded_frame( + std::vector data, + bool is_keyframe, + bool has_sps_pps, + uint32_t width, + uint32_t height, + int64_t capture_time_us) { + { + webrtc::MutexLock lock(&mutex_); + + if (width != 0 && height != 0) { + width_ = width; + height_ = height; + } + + // Bounded queue: drop-oldest, but never drop a keyframe. + while (queue_.size() >= kMaxQueueSize) { + if (queue_.front().is_keyframe && !is_keyframe) { + RTC_LOG(LS_WARNING) + << "EncodedVideoTrackSource[" << source_id_ + << "] queue full; dropping incoming delta to preserve keyframe"; + return false; + } + queue_.pop_front(); + } + + DequeuedFrame f; + f.data = std::move(data); + f.is_keyframe = is_keyframe; + f.has_sps_pps = has_sps_pps; + f.width = width_; + f.height = height_; + f.capture_time_us = capture_time_us; + queue_.push_back(std::move(f)); + } + + // Emit a dummy VideoFrame so the WebRTC pipeline ticks. The actual bytes + // are pulled out by PassthroughVideoEncoder via the registry, keyed on + // source_id_ stamped into VideoFrame::id(). + // + // The dummy buffer is 2x2 I420 black; callers never see it. WebRTC needs + // *some* buffer here. The width/height on the VideoFrame carry the real + // resolution so downstream stats, pacing, and simulcast decisions work. + auto dummy_buffer = webrtc::I420Buffer::Create(2, 2); + webrtc::I420Buffer::SetBlack(dummy_buffer.get()); + + webrtc::VideoFrame frame = + webrtc::VideoFrame::Builder() + .set_video_frame_buffer(dummy_buffer) + .set_rotation(webrtc::kVideoRotation_0) + .set_timestamp_us(capture_time_us != 0 ? capture_time_us + : webrtc::TimeMicros()) + .set_id(source_id_) + .build(); + + OnFrame(frame); + return true; +} + +bool EncodedVideoTrackSource::InternalSource::pop_encoded_frame( + DequeuedFrame& out) { + webrtc::MutexLock lock(&mutex_); + if (queue_.empty()) return false; + out = std::move(queue_.front()); + queue_.pop_front(); + return true; +} + +void EncodedVideoTrackSource::InternalSource::notify_keyframe_requested() { + webrtc::MutexLock lock(&mutex_); + if (observer_) { + (*observer_)->on_keyframe_requested(); + } +} + +void EncodedVideoTrackSource::InternalSource::notify_target_bitrate( + uint32_t bitrate_bps, + double framerate_fps) { + webrtc::MutexLock lock(&mutex_); + if (observer_) { + (*observer_)->on_target_bitrate(bitrate_bps, framerate_fps); + } +} + +void EncodedVideoTrackSource::InternalSource::set_observer( + rust::Box observer) { + webrtc::MutexLock lock(&mutex_); + observer_ = std::make_unique>( + std::move(observer)); +} + +// ---------- EncodedVideoTrackSource ---------- + +EncodedVideoTrackSource::EncodedVideoTrackSource(EncodedVideoCodecType codec, + uint32_t width, + uint32_t height) { + uint16_t id = EncodedSourceRegistry::instance().allocate_id(); + source_ = webrtc::make_ref_counted(id, codec, width, height); + EncodedSourceRegistry::instance().register_source(id, this); + RTC_LOG(LS_INFO) << "EncodedVideoTrackSource created id=" << id + << " codec=" << static_cast(codec) << " " << width + << "x" << height; +} + +EncodedVideoTrackSource::~EncodedVideoTrackSource() { + EncodedSourceRegistry::instance().unregister_source(source_->source_id()); + RTC_LOG(LS_INFO) << "EncodedVideoTrackSource destroyed id=" + << source_->source_id(); +} + +bool EncodedVideoTrackSource::capture_frame(rust::Slice data, + bool is_keyframe, + bool has_sps_pps, + uint32_t width, + uint32_t height, + int64_t capture_time_us) const { + std::vector buf(data.begin(), data.end()); + return source_->push_encoded_frame(std::move(buf), is_keyframe, has_sps_pps, + width, height, capture_time_us); +} + +void EncodedVideoTrackSource::set_observer( + rust::Box observer) const { + source_->set_observer(std::move(observer)); +} + +std::shared_ptr new_encoded_video_track_source( + EncodedVideoCodecType codec, + uint32_t width, + uint32_t height) { + return std::make_shared(codec, width, height); +} + +} // namespace livekit_ffi diff --git a/webrtc-sys/src/encoded_video_source.rs b/webrtc-sys/src/encoded_video_source.rs new file mode 100644 index 000000000..ca449766d --- /dev/null +++ b/webrtc-sys/src/encoded_video_source.rs @@ -0,0 +1,98 @@ +// Copyright 2026 LiveKit, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use crate::impl_thread_safety; + +#[cxx::bridge(namespace = "livekit_ffi")] +pub mod ffi { + #[repr(u8)] + #[derive(Debug, Clone, Copy, PartialEq, Eq)] + pub enum EncodedVideoCodecType { + H264 = 0, + H265 = 1, + Vp8 = 2, + Vp9 = 3, + Av1 = 4, + } + + unsafe extern "C++" { + include!("livekit/encoded_video_source.h"); + + type EncodedVideoTrackSource; + + fn new_encoded_video_track_source( + codec: EncodedVideoCodecType, + width: u32, + height: u32, + ) -> SharedPtr; + + fn source_id(self: &EncodedVideoTrackSource) -> u16; + fn codec(self: &EncodedVideoTrackSource) -> EncodedVideoCodecType; + + fn capture_frame( + self: &EncodedVideoTrackSource, + data: &[u8], + is_keyframe: bool, + has_sps_pps: bool, + width: u32, + height: u32, + capture_time_us: i64, + ) -> bool; + + fn set_observer( + self: &EncodedVideoTrackSource, + observer: Box, + ); + } + + extern "Rust" { + type EncodedVideoSourceWrapper; + + fn on_keyframe_requested(self: &EncodedVideoSourceWrapper); + fn on_target_bitrate( + self: &EncodedVideoSourceWrapper, + bitrate_bps: u32, + framerate_fps: f64, + ); + } +} + +impl_thread_safety!(ffi::EncodedVideoTrackSource, Send + Sync); + +/// Trait implemented by Rust consumers to receive encoder feedback (keyframe +/// requests, target bitrate updates) from WebRTC. +pub trait EncodedVideoSourceObserver: Send + Sync { + fn on_keyframe_requested(&self); + fn on_target_bitrate(&self, bitrate_bps: u32, framerate_fps: f64); +} + +pub struct EncodedVideoSourceWrapper { + observer: Arc, +} + +impl EncodedVideoSourceWrapper { + pub fn new(observer: Arc) -> Self { + Self { observer } + } + + fn on_keyframe_requested(&self) { + self.observer.on_keyframe_requested(); + } + + fn on_target_bitrate(&self, bitrate_bps: u32, framerate_fps: f64) { + self.observer.on_target_bitrate(bitrate_bps, framerate_fps); + } +} diff --git a/webrtc-sys/src/lib.rs b/webrtc-sys/src/lib.rs index 94f4eed0c..67b63ea70 100644 --- a/webrtc-sys/src/lib.rs +++ b/webrtc-sys/src/lib.rs @@ -22,6 +22,7 @@ pub mod candidate; pub mod data_channel; #[cfg(any(target_os = "macos", target_os = "windows", target_os = "linux"))] pub mod desktop_capturer; +pub mod encoded_video_source; pub mod frame_cryptor; pub mod helper; pub mod jsep; diff --git a/webrtc-sys/src/passthrough_video_encoder.cpp b/webrtc-sys/src/passthrough_video_encoder.cpp new file mode 100644 index 000000000..5c087f779 --- /dev/null +++ b/webrtc-sys/src/passthrough_video_encoder.cpp @@ -0,0 +1,320 @@ +/* + * Copyright 2026 LiveKit, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "livekit/passthrough_video_encoder.h" + +#include +#include + +#include "api/video/encoded_image.h" +#include "api/video/video_codec_type.h" +#include "api/video/video_frame_type.h" +#include "modules/video_coding/include/video_codec_interface.h" +#include "modules/video_coding/include/video_error_codes.h" +#include "rtc_base/logging.h" + +namespace livekit_ffi { + +namespace { + +webrtc::VideoCodecType ToWebrtcCodec(EncodedVideoCodecType codec) { + switch (codec) { + case EncodedVideoCodecType::H264: + return webrtc::kVideoCodecH264; + case EncodedVideoCodecType::H265: + return webrtc::kVideoCodecH265; + case EncodedVideoCodecType::Vp8: + return webrtc::kVideoCodecVP8; + case EncodedVideoCodecType::Vp9: + return webrtc::kVideoCodecVP9; + case EncodedVideoCodecType::Av1: + return webrtc::kVideoCodecAV1; + default: + return webrtc::kVideoCodecGeneric; + } +} + +bool FrameTypesRequestKeyframe( + const std::vector* frame_types) { + if (!frame_types) return false; + return std::any_of(frame_types->begin(), frame_types->end(), + [](webrtc::VideoFrameType t) { + return t == webrtc::VideoFrameType::kVideoFrameKey; + }); +} + +} // namespace + +// ---------- PassthroughVideoEncoder ---------- + +PassthroughVideoEncoder::PassthroughVideoEncoder(EncodedVideoCodecType codec) + : codec_(codec) {} + +PassthroughVideoEncoder::~PassthroughVideoEncoder() = default; + +int PassthroughVideoEncoder::InitEncode(const webrtc::VideoCodec* codec_settings, + const Settings& settings) { + if (codec_settings) { + codec_settings_ = *codec_settings; + } + initialized_ = true; + return WEBRTC_VIDEO_CODEC_OK; +} + +int32_t PassthroughVideoEncoder::RegisterEncodeCompleteCallback( + webrtc::EncodedImageCallback* callback) { + callback_ = callback; + return WEBRTC_VIDEO_CODEC_OK; +} + +int32_t PassthroughVideoEncoder::Release() { + callback_ = nullptr; + initialized_ = false; + return WEBRTC_VIDEO_CODEC_OK; +} + +int32_t PassthroughVideoEncoder::Encode( + const webrtc::VideoFrame& frame, + const std::vector* frame_types) { + if (!initialized_ || !callback_) { + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + + EncodedVideoTrackSource* src = + EncodedSourceRegistry::instance().lookup(frame.id()); + if (!src) { + // Should never happen: LazyVideoEncoder only constructs us when the + // registry lookup succeeded. If it does (e.g. source dropped mid-stream) + // skip the frame rather than error out so the pipeline stays healthy. + RTC_LOG(LS_WARNING) + << "PassthroughVideoEncoder received frame for unknown source id=" + << frame.id(); + return WEBRTC_VIDEO_CODEC_OK; + } + + if (FrameTypesRequestKeyframe(frame_types)) { + src->get()->notify_keyframe_requested(); + } + + EncodedVideoTrackSource::InternalSource::DequeuedFrame enc; + if (!src->get()->pop_encoded_frame(enc)) { + // No bytes queued for this tick; treat as a dropped frame so WebRTC's + // pacing accounting is correct. + callback_->OnDroppedFrame( + webrtc::EncodedImageCallback::DropReason::kDroppedByEncoder); + return WEBRTC_VIDEO_CODEC_OK; + } + + webrtc::EncodedImage image; + image.SetEncodedData(webrtc::EncodedImageBuffer::Create( + enc.data.data(), enc.data.size())); + image.SetFrameType(enc.is_keyframe ? webrtc::VideoFrameType::kVideoFrameKey + : webrtc::VideoFrameType::kVideoFrameDelta); + image.SetRtpTimestamp(frame.rtp_timestamp()); + image.capture_time_ms_ = enc.capture_time_us != 0 + ? enc.capture_time_us / 1000 + : frame.render_time_ms(); + image._encodedWidth = enc.width; + image._encodedHeight = enc.height; + image.rotation_ = frame.rotation(); + + webrtc::CodecSpecificInfo info{}; + info.codecType = ToWebrtcCodec(codec_); + info.end_of_picture = true; + + auto result = callback_->OnEncodedImage(image, &info); + if (result.error != webrtc::EncodedImageCallback::Result::OK) { + RTC_LOG(LS_WARNING) + << "PassthroughVideoEncoder OnEncodedImage failed; send_failed=" + << (result.error == + webrtc::EncodedImageCallback::Result::ERROR_SEND_FAILED); + return WEBRTC_VIDEO_CODEC_ERROR; + } + return WEBRTC_VIDEO_CODEC_OK; +} + +void PassthroughVideoEncoder::SetRates(const RateControlParameters& parameters) { + // The encoder instance doesn't know which source fed it (we only learn on + // Encode()). Propagate via the registry on the first Encode() if needed, + // but for now just log — rate control only matters to the producer for + // adaptive streams and we'll wire it in a follow-up. + (void)parameters; +} + +webrtc::VideoEncoder::EncoderInfo PassthroughVideoEncoder::GetEncoderInfo() + const { + EncoderInfo info; + info.implementation_name = "LiveKitPassthrough"; + info.is_hardware_accelerated = false; + info.supports_native_handle = false; + info.has_trusted_rate_controller = true; + info.supports_simulcast = false; + info.requested_resolution_alignment = 1; + info.apply_alignment_to_all_simulcast_layers = false; + return info; +} + +// ---------- LazyVideoEncoder ---------- + +LazyVideoEncoder::LazyVideoEncoder(webrtc::SdpVideoFormat format, + RealEncoderBuilder real_encoder_builder) + : format_(std::move(format)), + real_encoder_builder_(std::move(real_encoder_builder)) {} + +LazyVideoEncoder::~LazyVideoEncoder() = default; + +int LazyVideoEncoder::InitEncode(const webrtc::VideoCodec* codec_settings, + const Settings& settings) { + if (codec_settings) { + pending_codec_settings_ = *codec_settings; + } + pending_settings_ = settings; + has_pending_init_ = true; + + // If we already built an inner (e.g. re-init), forward immediately. + if (inner_) { + return inner_->InitEncode(codec_settings, settings); + } + return WEBRTC_VIDEO_CODEC_OK; +} + +int32_t LazyVideoEncoder::RegisterEncodeCompleteCallback( + webrtc::EncodedImageCallback* callback) { + callback_ = callback; + if (inner_) { + return inner_->RegisterEncodeCompleteCallback(callback); + } + return WEBRTC_VIDEO_CODEC_OK; +} + +int32_t LazyVideoEncoder::Release() { + int32_t rc = WEBRTC_VIDEO_CODEC_OK; + if (inner_) { + rc = inner_->Release(); + } + inner_.reset(); + has_pending_init_ = false; + pending_rates_.reset(); + pending_loss_rate_.reset(); + pending_rtt_ms_.reset(); + callback_ = nullptr; + return rc; +} + +bool LazyVideoEncoder::BuildInner(uint16_t frame_id) { + EncodedVideoTrackSource* src = + EncodedSourceRegistry::instance().lookup(frame_id); + + if (src != nullptr) { + inner_ = std::make_unique(src->codec()); + is_passthrough_ = true; + RTC_LOG(LS_INFO) + << "LazyVideoEncoder: using PassthroughVideoEncoder for source id=" + << frame_id << " codec=" << static_cast(src->codec()) + << " sdp=" << format_.name; + } else { + inner_ = real_encoder_builder_ ? real_encoder_builder_() : nullptr; + is_passthrough_ = false; + if (!inner_) { + RTC_LOG(LS_ERROR) + << "LazyVideoEncoder: real_encoder_builder returned null for " + << format_.name; + return false; + } + } + + if (callback_) { + inner_->RegisterEncodeCompleteCallback(callback_); + } + if (has_pending_init_) { + int rc = inner_->InitEncode(&pending_codec_settings_, pending_settings_); + if (rc != WEBRTC_VIDEO_CODEC_OK) { + RTC_LOG(LS_ERROR) << "LazyVideoEncoder: inner InitEncode failed rc=" + << rc; + return false; + } + } + if (pending_rates_) { + inner_->SetRates(*pending_rates_); + pending_rates_.reset(); + } + if (pending_loss_rate_) { + inner_->OnPacketLossRateUpdate(*pending_loss_rate_); + pending_loss_rate_.reset(); + } + if (pending_rtt_ms_) { + inner_->OnRttUpdate(*pending_rtt_ms_); + pending_rtt_ms_.reset(); + } + return true; +} + +int32_t LazyVideoEncoder::Encode( + const webrtc::VideoFrame& frame, + const std::vector* frame_types) { + if (!inner_) { + if (!BuildInner(frame.id())) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + } + return inner_->Encode(frame, frame_types); +} + +void LazyVideoEncoder::SetRates(const RateControlParameters& parameters) { + if (inner_) { + inner_->SetRates(parameters); + } else { + pending_rates_ = parameters; + } +} + +void LazyVideoEncoder::OnPacketLossRateUpdate(float packet_loss_rate) { + if (inner_) { + inner_->OnPacketLossRateUpdate(packet_loss_rate); + } else { + pending_loss_rate_ = packet_loss_rate; + } +} + +void LazyVideoEncoder::OnRttUpdate(int64_t rtt_ms) { + if (inner_) { + inner_->OnRttUpdate(rtt_ms); + } else { + pending_rtt_ms_ = rtt_ms; + } +} + +void LazyVideoEncoder::OnLossNotification( + const LossNotification& loss_notification) { + if (inner_) { + inner_->OnLossNotification(loss_notification); + } +} + +webrtc::VideoEncoder::EncoderInfo LazyVideoEncoder::GetEncoderInfo() const { + if (inner_) { + return inner_->GetEncoderInfo(); + } + EncoderInfo info; + info.implementation_name = "LiveKitLazy"; + info.is_hardware_accelerated = false; + info.supports_native_handle = false; + info.requested_resolution_alignment = 1; + info.apply_alignment_to_all_simulcast_layers = false; + return info; +} + +} // namespace livekit_ffi diff --git a/webrtc-sys/src/video_encoder_factory.cpp b/webrtc-sys/src/video_encoder_factory.cpp index 7435760b4..45dab4cf6 100644 --- a/webrtc-sys/src/video_encoder_factory.cpp +++ b/webrtc-sys/src/video_encoder_factory.cpp @@ -21,6 +21,7 @@ #include "api/video_codecs/video_encoder.h" #include "api/video_codecs/video_encoder_factory_template.h" #include "livekit/objc_video_factory.h" +#include "livekit/passthrough_video_encoder.h" #include "media/base/media_constants.h" #include "media/engine/simulcast_encoder_adapter.h" #include "rtc_base/logging.h" @@ -146,13 +147,23 @@ VideoEncoderFactory::CodecSupport VideoEncoderFactory::QueryCodecSupport( std::unique_ptr VideoEncoderFactory::Create( const webrtc::Environment& env, const webrtc::SdpVideoFormat& format) { - std::unique_ptr encoder; - if (format.IsCodecInList(internal_factory_->GetSupportedFormats())) { - encoder = std::make_unique( - env, internal_factory_.get(), nullptr, format); + if (!format.IsCodecInList(internal_factory_->GetSupportedFormats())) { + return nullptr; } - return encoder; + // Wrap the real encoder construction in a lazy shim so we can branch + // between passthrough and a real encoder based on the first VideoFrame's + // id. The builder is called at most once and only for non-passthrough + // tracks; passthrough tracks never instantiate the SimulcastEncoderAdapter. + auto real_encoder_builder = [env, format, + internal_factory = internal_factory_.get()]() + -> std::unique_ptr { + return std::make_unique( + env, internal_factory, nullptr, format); + }; + + return std::make_unique(format, + std::move(real_encoder_builder)); } } // namespace livekit_ffi From cd427b0330b4dd4dd4662cc275906dc8daf354d5 Mon Sep 17 00:00:00 2001 From: Stephen DeRosa Date: Tue, 21 Apr 2026 17:38:48 -0600 Subject: [PATCH 02/15] libwebrtc: expose NativeEncodedVideoSource Rust wrapper around webrtc-sys::EncodedVideoTrackSource. Adds the Encoded variant to RtcVideoSource, VideoCodec/EncodedFrameInfo types, and an EncodedVideoSourceObserver trait for keyframe-request callbacks from the C++ side. PeerConnectionFactory gains create_video_track_from_encoded_source. --- libwebrtc/src/lib.rs | 3 +- libwebrtc/src/native/encoded_video_source.rs | 168 ++++++++++++++++++ libwebrtc/src/native/mod.rs | 1 + .../src/native/peer_connection_factory.rs | 17 +- libwebrtc/src/peer_connection_factory.rs | 19 +- libwebrtc/src/video_source.rs | 45 ++++- .../include/livekit/peer_connection_factory.h | 5 + webrtc-sys/src/peer_connection_factory.cpp | 10 ++ webrtc-sys/src/peer_connection_factory.rs | 7 + 9 files changed, 270 insertions(+), 5 deletions(-) create mode 100644 libwebrtc/src/native/encoded_video_source.rs diff --git a/libwebrtc/src/lib.rs b/libwebrtc/src/lib.rs index bf4ad8294..0f060c219 100644 --- a/libwebrtc/src/lib.rs +++ b/libwebrtc/src/lib.rs @@ -69,7 +69,8 @@ pub mod native { pub use webrtc_sys::webrtc::ffi::create_random_uuid; pub use crate::imp::{ - apm, audio_mixer, audio_resampler, frame_cryptor, packet_trailer, yuv_helper, + apm, audio_mixer, audio_resampler, encoded_video_source, frame_cryptor, packet_trailer, + yuv_helper, }; } diff --git a/libwebrtc/src/native/encoded_video_source.rs b/libwebrtc/src/native/encoded_video_source.rs new file mode 100644 index 000000000..0677decf0 --- /dev/null +++ b/libwebrtc/src/native/encoded_video_source.rs @@ -0,0 +1,168 @@ +// Copyright 2026 LiveKit, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::{ + fmt::{Debug, Formatter}, + sync::Arc, +}; + +use cxx::SharedPtr; +use parking_lot::Mutex; +use webrtc_sys::encoded_video_source as sys_evs; + +use crate::video_source::{EncodedFrameInfo, VideoCodec, VideoResolution}; + +/// Observer that receives encoder-side feedback (keyframe requests, bitrate +/// updates) for a [`NativeEncodedVideoSource`]. +/// +/// Callbacks are invoked on internal WebRTC threads; implementers MUST be +/// cheap and non-blocking. +pub trait EncodedVideoSourceObserver: Send + Sync { + /// Called when the receiver requests a keyframe (PLI/FIR). + fn on_keyframe_requested(&self); + + /// Called when the WebRTC bandwidth estimator updates the target + /// bitrate / framerate for this source. + fn on_target_bitrate(&self, bitrate_bps: u32, framerate_fps: f64); +} + +impl From for sys_evs::ffi::EncodedVideoCodecType { + fn from(codec: VideoCodec) -> Self { + match codec { + VideoCodec::H264 => Self::H264, + VideoCodec::H265 => Self::H265, + VideoCodec::Vp8 => Self::Vp8, + VideoCodec::Vp9 => Self::Vp9, + VideoCodec::Av1 => Self::Av1, + } + } +} + +impl From for VideoCodec { + fn from(codec: sys_evs::ffi::EncodedVideoCodecType) -> Self { + match codec { + sys_evs::ffi::EncodedVideoCodecType::H264 => Self::H264, + sys_evs::ffi::EncodedVideoCodecType::H265 => Self::H265, + sys_evs::ffi::EncodedVideoCodecType::Vp8 => Self::Vp8, + sys_evs::ffi::EncodedVideoCodecType::Vp9 => Self::Vp9, + sys_evs::ffi::EncodedVideoCodecType::Av1 => Self::Av1, + _ => Self::H264, + } + } +} + +struct Inner { + resolution: Mutex, +} + +/// A video source that accepts pre-encoded compressed frames (H.264, H.265, +/// VP8, VP9, AV1) instead of raw pixels. WebRTC's encoder is bypassed for +/// tracks bound to this source — frames flow straight from `capture_frame` +/// into RTP packetization and congestion control. +/// +/// A source carries a single encoded stream (one resolution, one codec). For +/// simulcast, create several sources and publish them on separate tracks. +#[derive(Clone)] +pub struct NativeEncodedVideoSource { + sys_handle: SharedPtr, + inner: Arc, +} + +impl Debug for NativeEncodedVideoSource { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("NativeEncodedVideoSource") + .field("source_id", &self.source_id()) + .field("codec", &self.codec()) + .finish() + } +} + +impl NativeEncodedVideoSource { + pub fn new(codec: VideoCodec, resolution: VideoResolution) -> Self { + let sys_handle = sys_evs::ffi::new_encoded_video_track_source( + codec.into(), + resolution.width, + resolution.height, + ); + Self { + sys_handle, + inner: Arc::new(Inner { resolution: Mutex::new(resolution) }), + } + } + + /// Unique non-zero id assigned to this source. Exposed for debugging / + /// tracing; callers do not need to inspect it. + pub fn source_id(&self) -> u16 { + self.sys_handle.source_id() + } + + pub fn codec(&self) -> VideoCodec { + self.sys_handle.codec().into() + } + + pub fn video_resolution(&self) -> VideoResolution { + self.inner.resolution.lock().clone() + } + + /// Push a pre-encoded frame to the track. Returns `true` if the frame was + /// accepted, `false` if the internal queue was full and the frame had to + /// be dropped. + pub fn capture_frame(&self, data: &[u8], info: &EncodedFrameInfo) -> bool { + { + let mut res = self.inner.resolution.lock(); + if info.width != 0 && info.height != 0 { + res.width = info.width; + res.height = info.height; + } + } + + self.sys_handle.capture_frame( + data, + info.is_keyframe, + info.has_sps_pps, + info.width, + info.height, + info.capture_time_us, + ) + } + + /// Register an observer for encoder-side feedback. The previous observer + /// (if any) is dropped. + pub fn set_observer(&self, observer: Arc) { + let wrapper = Box::new(sys_evs::EncodedVideoSourceWrapper::new(Arc::new( + ObserverBridge { inner: observer }, + ))); + self.sys_handle.set_observer(wrapper); + } + + pub fn sys_handle(&self) -> SharedPtr { + self.sys_handle.clone() + } +} + +/// Adapts a `libwebrtc`-level observer trait object to the +/// `webrtc-sys`-level observer trait expected by the cxx bridge. +struct ObserverBridge { + inner: Arc, +} + +impl sys_evs::EncodedVideoSourceObserver for ObserverBridge { + fn on_keyframe_requested(&self) { + self.inner.on_keyframe_requested(); + } + + fn on_target_bitrate(&self, bitrate_bps: u32, framerate_fps: f64) { + self.inner.on_target_bitrate(bitrate_bps, framerate_fps); + } +} diff --git a/libwebrtc/src/native/mod.rs b/libwebrtc/src/native/mod.rs index de56e3345..b253dae15 100644 --- a/libwebrtc/src/native/mod.rs +++ b/libwebrtc/src/native/mod.rs @@ -23,6 +23,7 @@ pub mod audio_track; pub mod data_channel; #[cfg(any(target_os = "macos", target_os = "windows", target_os = "linux"))] pub mod desktop_capturer; +pub mod encoded_video_source; pub mod frame_cryptor; pub mod ice_candidate; pub mod media_stream; diff --git a/libwebrtc/src/native/peer_connection_factory.rs b/libwebrtc/src/native/peer_connection_factory.rs index ae082aecc..f87f4ef78 100644 --- a/libwebrtc/src/native/peer_connection_factory.rs +++ b/libwebrtc/src/native/peer_connection_factory.rs @@ -26,7 +26,7 @@ use crate::{ peer_connection::PeerConnection, peer_connection_factory::RtcConfiguration, rtp_parameters::RtpCapabilities, - video_source::native::NativeVideoSource, + video_source::native::{NativeEncodedVideoSource, NativeVideoSource}, video_track::RtcVideoTrack, MediaType, RtcError, }; @@ -81,6 +81,21 @@ impl PeerConnectionFactory { } } + pub fn create_video_track_from_encoded_source( + &self, + label: &str, + source: NativeEncodedVideoSource, + ) -> RtcVideoTrack { + RtcVideoTrack { + handle: imp_vt::RtcVideoTrack::new( + self.sys_handle.create_video_track_from_encoded_source( + label.to_string(), + source.sys_handle(), + ), + ), + } + } + pub fn create_audio_track(&self, label: &str, source: NativeAudioSource) -> RtcAudioTrack { RtcAudioTrack { handle: imp_at::RtcAudioTrack { diff --git a/libwebrtc/src/peer_connection_factory.rs b/libwebrtc/src/peer_connection_factory.rs index 12f6d24bc..ebd6f4af2 100644 --- a/libwebrtc/src/peer_connection_factory.rs +++ b/libwebrtc/src/peer_connection_factory.rs @@ -87,12 +87,19 @@ impl PeerConnectionFactory { pub mod native { use super::PeerConnectionFactory; use crate::{ - audio_source::native::NativeAudioSource, audio_track::RtcAudioTrack, - video_source::native::NativeVideoSource, video_track::RtcVideoTrack, + audio_source::native::NativeAudioSource, + audio_track::RtcAudioTrack, + video_source::native::{NativeEncodedVideoSource, NativeVideoSource}, + video_track::RtcVideoTrack, }; pub trait PeerConnectionFactoryExt { fn create_video_track(&self, label: &str, source: NativeVideoSource) -> RtcVideoTrack; + fn create_video_track_from_encoded_source( + &self, + label: &str, + source: NativeEncodedVideoSource, + ) -> RtcVideoTrack; fn create_audio_track(&self, label: &str, source: NativeAudioSource) -> RtcAudioTrack; } @@ -101,6 +108,14 @@ pub mod native { self.handle.create_video_track(label, source) } + fn create_video_track_from_encoded_source( + &self, + label: &str, + source: NativeEncodedVideoSource, + ) -> RtcVideoTrack { + self.handle.create_video_track_from_encoded_source(label, source) + } + fn create_audio_track(&self, label: &str, source: NativeAudioSource) -> RtcAudioTrack { self.handle.create_audio_track(label, source) } diff --git a/libwebrtc/src/video_source.rs b/libwebrtc/src/video_source.rs index f0404ea8b..4c31ee291 100644 --- a/libwebrtc/src/video_source.rs +++ b/libwebrtc/src/video_source.rs @@ -29,18 +29,57 @@ impl Default for VideoResolution { } } +/// Codec used by a pre-encoded video feed. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum VideoCodec { + H264, + H265, + Vp8, + Vp9, + Av1, +} + +/// Metadata describing a single pre-encoded video frame pushed to an +/// [`native::NativeEncodedVideoSource`]. +#[derive(Debug, Copy, Clone)] +pub struct EncodedFrameInfo { + /// True when this frame is an IDR / keyframe. + pub is_keyframe: bool, + /// True when the `data` buffer already has SPS/PPS (or equivalent) + /// prepended. H.264/H.265 only; ignored for other codecs. + pub has_sps_pps: bool, + pub width: u32, + pub height: u32, + /// Capture timestamp in microseconds. `0` lets the source stamp `now`. + pub capture_time_us: i64, +} + +impl Default for EncodedFrameInfo { + fn default() -> Self { + Self { + is_keyframe: false, + has_sps_pps: false, + width: 0, + height: 0, + capture_time_us: 0, + } + } +} + #[non_exhaustive] #[derive(Debug, Clone)] pub enum RtcVideoSource { // TODO(theomonnom): Web video sources (eq. to tracks on browsers?) #[cfg(not(target_arch = "wasm32"))] Native(native::NativeVideoSource), + #[cfg(not(target_arch = "wasm32"))] + Encoded(native::NativeEncodedVideoSource), } // TODO(theomonnom): Support enum dispatch with conditional compilation? impl RtcVideoSource { enum_dispatch!( - [Native]; + [Native, Encoded]; pub fn video_resolution(self: &Self) -> VideoResolution; ); } @@ -49,6 +88,10 @@ impl RtcVideoSource { pub mod native { use std::fmt::{Debug, Formatter}; + pub use crate::native::encoded_video_source::{ + EncodedVideoSourceObserver, NativeEncodedVideoSource, + }; + use super::*; use crate::native::packet_trailer::PacketTrailerHandler; use crate::video_frame::{VideoBuffer, VideoFrame}; diff --git a/webrtc-sys/include/livekit/peer_connection_factory.h b/webrtc-sys/include/livekit/peer_connection_factory.h index 0e77dbadb..62598e72e 100644 --- a/webrtc-sys/include/livekit/peer_connection_factory.h +++ b/webrtc-sys/include/livekit/peer_connection_factory.h @@ -29,6 +29,7 @@ namespace livekit_ffi { class PeerConnectionFactory; class PeerConnectionObserverWrapper; +class EncodedVideoTrackSource; } // namespace livekit_ffi #include "webrtc-sys/src/peer_connection_factory.rs.h" @@ -53,6 +54,10 @@ class PeerConnectionFactory { rust::String label, std::shared_ptr source) const; + std::shared_ptr create_video_track_from_encoded_source( + rust::String label, + std::shared_ptr source) const; + std::shared_ptr create_audio_track( rust::String label, std::shared_ptr source) const; diff --git a/webrtc-sys/src/peer_connection_factory.cpp b/webrtc-sys/src/peer_connection_factory.cpp index a0e27a0a2..68c94131b 100644 --- a/webrtc-sys/src/peer_connection_factory.cpp +++ b/webrtc-sys/src/peer_connection_factory.cpp @@ -36,6 +36,7 @@ #include "livekit/peer_connection.h" #include "livekit/rtc_error.h" #include "livekit/rtp_parameters.h" +#include "livekit/encoded_video_source.h" #include "livekit/video_decoder_factory.h" #include "livekit/video_encoder_factory.h" #include "livekit/webrtc.h" @@ -116,6 +117,15 @@ std::shared_ptr PeerConnectionFactory::create_video_track( peer_factory_->CreateVideoTrack(source->get(), label.c_str()))); } +std::shared_ptr +PeerConnectionFactory::create_video_track_from_encoded_source( + rust::String label, + std::shared_ptr source) const { + return std::static_pointer_cast( + rtc_runtime_->get_or_create_media_stream_track( + peer_factory_->CreateVideoTrack(source->get(), label.c_str()))); +} + std::shared_ptr PeerConnectionFactory::create_audio_track( rust::String label, std::shared_ptr source) const { diff --git a/webrtc-sys/src/peer_connection_factory.rs b/webrtc-sys/src/peer_connection_factory.rs index c18d8331c..c84494838 100644 --- a/webrtc-sys/src/peer_connection_factory.rs +++ b/webrtc-sys/src/peer_connection_factory.rs @@ -58,6 +58,7 @@ pub mod ffi { type IceGatheringState = crate::peer_connection::ffi::IceGatheringState; type AudioTrackSource = crate::audio_track::ffi::AudioTrackSource; type VideoTrackSource = crate::video_track::ffi::VideoTrackSource; + type EncodedVideoTrackSource = crate::encoded_video_source::ffi::EncodedVideoTrackSource; type RtpCapabilities = crate::rtp_parameters::ffi::RtpCapabilities; type AudioTrack = crate::audio_track::ffi::AudioTrack; type VideoTrack = crate::video_track::ffi::VideoTrack; @@ -101,6 +102,12 @@ pub mod ffi { source: SharedPtr, ) -> SharedPtr; + fn create_video_track_from_encoded_source( + self: &PeerConnectionFactory, + label: String, + source: SharedPtr, + ) -> SharedPtr; + fn create_audio_track( self: &PeerConnectionFactory, label: String, From 9fe93f61ab5813bddd60210db8256b05413964a9 Mon Sep 17 00:00:00 2001 From: Stephen DeRosa Date: Tue, 21 Apr 2026 17:52:18 -0600 Subject: [PATCH 03/15] livekit: publish encoded video tracks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dispatch RtcVideoSource::Encoded through the new PCF path in LocalVideoTrack, and normalize TrackPublishOptions for encoded sources in LocalParticipant::publish_track — simulcast is forced off and the codec is pinned to the source's codec, with warnings on override. --- livekit-ffi/protocol/ffi.proto | 15 +- livekit-ffi/protocol/video_frame.proto | 58 ++++++++ livekit-ffi/src/conversion/video_frame.rs | 5 +- livekit-ffi/src/server/requests.rs | 10 ++ livekit-ffi/src/server/video_source.rs | 129 +++++++++++++++++- livekit/src/room/options.rs | 12 ++ .../src/room/participant/local_participant.rs | 28 ++++ livekit/src/room/track/local_video_track.rs | 8 ++ 8 files changed, 257 insertions(+), 8 deletions(-) diff --git a/livekit-ffi/protocol/ffi.proto b/livekit-ffi/protocol/ffi.proto index b27a7b865..4b1377b5e 100644 --- a/livekit-ffi/protocol/ffi.proto +++ b/livekit-ffi/protocol/ffi.proto @@ -164,7 +164,10 @@ message FfiRequest { RemoteDataTrackIsPublishedRequest remote_data_track_is_published = 74; DataTrackStreamReadRequest data_track_stream_read = 75; - // NEXT_ID: 76 + // Encoded video + CaptureEncodedVideoFrameRequest capture_encoded_video_frame = 76; + + // NEXT_ID: 77 } } @@ -274,7 +277,10 @@ message FfiResponse { RemoteDataTrackIsPublishedResponse remote_data_track_is_published = 73; DataTrackStreamReadResponse data_track_stream_read = 74; - // NEXT_ID: 75 + // Encoded video + CaptureEncodedVideoFrameResponse capture_encoded_video_frame = 75; + + // NEXT_ID: 76 } } @@ -337,7 +343,10 @@ message FfiEvent { // Data Track (remote) DataTrackStreamEvent data_track_stream_event = 43; - // NEXT_ID: 44 + // Encoded video + EncodedVideoSourceEvent encoded_video_source_event = 44; + + // NEXT_ID: 45 } } diff --git a/livekit-ffi/protocol/video_frame.proto b/livekit-ffi/protocol/video_frame.proto index ff91fa3c6..f18c23073 100644 --- a/livekit-ffi/protocol/video_frame.proto +++ b/livekit-ffi/protocol/video_frame.proto @@ -68,6 +68,9 @@ message NewVideoSourceRequest { // Most of the time it corresponds to the source resolution required VideoSourceResolution resolution = 2; optional bool is_screencast = 3; + // When type == VIDEO_SOURCE_ENCODED this field MUST be set. It configures + // the passthrough encoder for the source (codec + initial resolution). + optional EncodedVideoSourceOptions encoded_options = 4; } message NewVideoSourceResponse { required OwnedVideoSource source = 1; } @@ -82,6 +85,31 @@ message CaptureVideoFrameRequest { message CaptureVideoFrameResponse {} +// Push a pre-encoded (compressed) frame to an encoded VideoSource. +// The source must have been created with type == VIDEO_SOURCE_ENCODED. +message CaptureEncodedVideoFrameRequest { + required uint64 source_handle = 1; + // Raw encoded bitstream (e.g. NAL units for H.264/H.265, VP8/VP9/AV1 + // OBU payload). Must be a complete access unit / picture. + required bytes data = 2; + required bool is_keyframe = 3; + // H.264/H.265 only: set when SPS/PPS (or VPS/SPS/PPS) is already + // prepended to `data`. Ignored for other codecs. + optional bool has_sps_pps = 4; + // Frame resolution. 0/0 means "use the resolution from + // EncodedVideoSourceOptions". + optional uint32 width = 5; + optional uint32 height = 6; + // Capture timestamp in microseconds. 0 lets the source stamp `now`. + optional int64 capture_time_us = 7; +} + +message CaptureEncodedVideoFrameResponse { + // True if the frame was queued; false if it was dropped because the + // internal queue was full. + required bool accepted = 1; +} + message VideoConvertRequest { optional bool flip_y = 1; required VideoBufferInfo buffer = 2; @@ -206,13 +234,43 @@ message VideoSourceResolution { enum VideoSourceType { VIDEO_SOURCE_NATIVE = 0; + // A source that accepts pre-encoded compressed frames. WebRTC's internal + // encoder is bypassed for tracks bound to this source. + VIDEO_SOURCE_ENCODED = 1; } message VideoSourceInfo { required VideoSourceType type = 1; + // Only populated for encoded sources. Exposed for debugging / tracing. + optional uint32 encoded_source_id = 2; } message OwnedVideoSource { required FfiOwnedHandle handle = 1; required VideoSourceInfo info = 2; } + +// Options for an encoded video source. One source carries a single encoded +// stream (one resolution, one codec). To simulcast, create multiple sources +// and publish them on separate tracks. +message EncodedVideoSourceOptions { + required VideoCodec codec = 1; +} + +// Encoder-side feedback for an encoded video source. Emitted as FfiEvents +// so client SDKs can react (request a fresh keyframe from their encoder, +// adjust target bitrate, etc.). +message EncodedVideoSourceEvent { + required uint64 source_handle = 1; + oneof message { + KeyframeRequested keyframe_requested = 2; + TargetBitrateChanged target_bitrate_changed = 3; + } + + message KeyframeRequested {} + + message TargetBitrateChanged { + required uint32 bitrate_bps = 1; + required double framerate_fps = 2; + } +} diff --git a/livekit-ffi/src/conversion/video_frame.rs b/livekit-ffi/src/conversion/video_frame.rs index 783950b61..cd836606d 100644 --- a/livekit-ffi/src/conversion/video_frame.rs +++ b/livekit-ffi/src/conversion/video_frame.rs @@ -30,7 +30,10 @@ impl From for VideoSourceResolution { impl From<&FfiVideoSource> for proto::VideoSourceInfo { fn from(source: &FfiVideoSource) -> Self { - Self { r#type: source.source_type as i32 } + Self { + r#type: source.source_type as i32, + encoded_source_id: source.encoded_source_id().map(|id| id as u32), + } } } diff --git a/livekit-ffi/src/server/requests.rs b/livekit-ffi/src/server/requests.rs index e27a54168..998482161 100644 --- a/livekit-ffi/src/server/requests.rs +++ b/livekit-ffi/src/server/requests.rs @@ -474,6 +474,15 @@ unsafe fn on_capture_video_frame( Ok(proto::CaptureVideoFrameResponse::default()) } +/// Push a pre-encoded frame to a VIDEO_SOURCE_ENCODED source. +fn on_capture_encoded_video_frame( + server: &'static FfiServer, + push: proto::CaptureEncodedVideoFrameRequest, +) -> FfiResult { + let source = server.retrieve_handle::(push.source_handle)?; + source.capture_encoded_frame(server, push) +} + /// Convert a video frame /// /// # Safety: The user must ensure that the pointers/len provided are valid @@ -1294,6 +1303,7 @@ pub fn handle_request( } Request::NewVideoSource(req) => on_new_video_source(server, req)?.into(), Request::CaptureVideoFrame(req) => unsafe { on_capture_video_frame(server, req)?.into() }, + Request::CaptureEncodedVideoFrame(req) => on_capture_encoded_video_frame(server, req)?.into(), Request::VideoConvert(req) => unsafe { on_video_convert(server, req)?.into() }, Request::NewAudioStream(req) => on_new_audio_stream(server, req)?.into(), Request::NewAudioSource(req) => on_new_audio_source(server, req)?.into(), diff --git a/livekit-ffi/src/server/video_source.rs b/livekit-ffi/src/server/video_source.rs index 047443728..57afd60c9 100644 --- a/livekit-ffi/src/server/video_source.rs +++ b/livekit-ffi/src/server/video_source.rs @@ -1,4 +1,4 @@ -// Copyright 2025 LiveKit, Inc. +// Copyright 2026 LiveKit, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,13 +12,16 @@ // See the License for the specific language governing permissions and // limitations under the License. -use super::{colorcvt, FfiHandle}; -use crate::{proto, server, FfiError, FfiHandleId, FfiResult}; +use std::sync::Arc; + use livekit::webrtc::{ prelude::*, video_frame::{FrameMetadata, VideoFrame}, }; +use super::{colorcvt, FfiHandle}; +use crate::{proto, server, FfiError, FfiHandleId, FfiResult}; + pub struct FfiVideoSource { pub handle_id: FfiHandleId, pub source_type: proto::VideoSourceType, @@ -36,12 +39,61 @@ fn frame_metadata_from_proto(metadata: Option) -> Option livekit::webrtc::video_source::VideoCodec { + use livekit::webrtc::video_source::VideoCodec; + match codec { + proto::VideoCodec::H264 => VideoCodec::H264, + proto::VideoCodec::H265 => VideoCodec::H265, + proto::VideoCodec::Vp8 => VideoCodec::Vp8, + proto::VideoCodec::Vp9 => VideoCodec::Vp9, + proto::VideoCodec::Av1 => VideoCodec::Av1, + } +} + +/// Forwards encoder-side feedback from the native source out to the FFI +/// client as `EncodedVideoSourceEvent`s. +#[cfg(not(target_arch = "wasm32"))] +struct EncodedObserverBridge { + server: &'static server::FfiServer, + source_handle: u64, +} + +#[cfg(not(target_arch = "wasm32"))] +impl livekit::webrtc::video_source::native::EncodedVideoSourceObserver + for EncodedObserverBridge +{ + fn on_keyframe_requested(&self) { + let _ = self.server.send_event(proto::EncodedVideoSourceEvent { + source_handle: self.source_handle, + message: Some(proto::encoded_video_source_event::Message::KeyframeRequested( + proto::encoded_video_source_event::KeyframeRequested {}, + )), + }.into()); + } + + fn on_target_bitrate(&self, bitrate_bps: u32, framerate_fps: f64) { + let _ = self.server.send_event(proto::EncodedVideoSourceEvent { + source_handle: self.source_handle, + message: Some(proto::encoded_video_source_event::Message::TargetBitrateChanged( + proto::encoded_video_source_event::TargetBitrateChanged { + bitrate_bps, + framerate_fps, + }, + )), + }.into()); + } +} + impl FfiVideoSource { pub fn setup( server: &'static server::FfiServer, new_source: proto::NewVideoSourceRequest, ) -> FfiResult { let source_type = new_source.r#type(); + let handle_id = server.next_id(); #[allow(unreachable_patterns)] let source_inner = match source_type { #[cfg(not(target_arch = "wasm32"))] @@ -53,10 +105,35 @@ impl FfiVideoSource { NativeVideoSource::new(new_source.resolution.into(), is_screencast); RtcVideoSource::Native(video_source) } + #[cfg(not(target_arch = "wasm32"))] + proto::VideoSourceType::VideoSourceEncoded => { + use livekit::webrtc::video_source::{ + native::NativeEncodedVideoSource, VideoResolution, + }; + + let options = new_source.encoded_options.as_ref().ok_or_else(|| { + FfiError::InvalidRequest( + "encoded_options is required for VIDEO_SOURCE_ENCODED".into(), + ) + })?; + + let codec = video_codec_from_proto(options.codec()); + let resolution = VideoResolution { + width: new_source.resolution.width, + height: new_source.resolution.height, + }; + let source = NativeEncodedVideoSource::new(codec, resolution); + + source.set_observer(Arc::new(EncodedObserverBridge { + server, + source_handle: handle_id, + })); + + RtcVideoSource::Encoded(source) + } _ => return Err(FfiError::InvalidRequest("unsupported video source type".into())), }; - let handle_id = server.next_id(); let video_source = Self { handle_id, source_type, source: source_inner }; let source_info = proto::VideoSourceInfo::from(&video_source); server.store_handle(handle_id, video_source); @@ -67,6 +144,16 @@ impl FfiVideoSource { }) } + /// Returns the unique 16-bit id assigned to an encoded source by the + /// WebRTC layer. `None` for non-encoded sources. + pub fn encoded_source_id(&self) -> Option { + #[cfg(not(target_arch = "wasm32"))] + if let RtcVideoSource::Encoded(ref source) = self.source { + return Some(source.source_id()); + } + None + } + pub unsafe fn capture_frame( &self, _server: &'static server::FfiServer, @@ -85,10 +172,44 @@ impl FfiVideoSource { source.capture_frame(&frame); } + #[cfg(not(target_arch = "wasm32"))] + RtcVideoSource::Encoded(_) => { + return Err(FfiError::InvalidRequest( + "capture_video_frame is not supported for encoded sources; \ + use capture_encoded_video_frame instead" + .into(), + )); + } _ => {} } Ok(()) } + + pub fn capture_encoded_frame( + &self, + _server: &'static server::FfiServer, + capture: proto::CaptureEncodedVideoFrameRequest, + ) -> FfiResult { + match self.source { + #[cfg(not(target_arch = "wasm32"))] + RtcVideoSource::Encoded(ref source) => { + use livekit::webrtc::video_source::EncodedFrameInfo; + + let info = EncodedFrameInfo { + is_keyframe: capture.is_keyframe, + has_sps_pps: capture.has_sps_pps.unwrap_or(false), + width: capture.width.unwrap_or(0), + height: capture.height.unwrap_or(0), + capture_time_us: capture.capture_time_us.unwrap_or(0), + }; + let accepted = source.capture_frame(&capture.data, &info); + Ok(proto::CaptureEncodedVideoFrameResponse { accepted }) + } + _ => Err(FfiError::InvalidRequest( + "capture_encoded_video_frame requires a VIDEO_SOURCE_ENCODED source".into(), + )), + } + } } #[cfg(test)] diff --git a/livekit/src/room/options.rs b/livekit/src/room/options.rs index 2fbb79f19..5d9095c2d 100644 --- a/livekit/src/room/options.rs +++ b/livekit/src/room/options.rs @@ -38,6 +38,18 @@ impl VideoCodec { } } +impl From for VideoCodec { + fn from(codec: libwebrtc::video_source::VideoCodec) -> Self { + match codec { + libwebrtc::video_source::VideoCodec::H264 => VideoCodec::H264, + libwebrtc::video_source::VideoCodec::H265 => VideoCodec::H265, + libwebrtc::video_source::VideoCodec::Vp8 => VideoCodec::VP8, + libwebrtc::video_source::VideoCodec::Vp9 => VideoCodec::VP9, + libwebrtc::video_source::VideoCodec::Av1 => VideoCodec::AV1, + } + } +} + #[derive(Debug, Clone)] pub struct VideoResolution { pub width: u32, diff --git a/livekit/src/room/participant/local_participant.rs b/livekit/src/room/participant/local_participant.rs index 1053abde5..53a36a22b 100644 --- a/livekit/src/room/participant/local_participant.rs +++ b/livekit/src/room/participant/local_participant.rs @@ -302,6 +302,34 @@ impl LocalParticipant { track: LocalTrack, options: TrackPublishOptions, ) -> RoomResult { + // Encoded video sources deliver pre-encoded single-layer frames. + // Force-disable simulcast and pin the negotiated codec to the + // source's codec so WebRTC's encoder factory picks our passthrough + // encoder path. + let options = { + let mut options = options; + if let LocalTrack::Video(ref video_track) = track { + #[cfg(not(target_arch = "wasm32"))] + if let RtcVideoSource::Encoded(ref encoded_source) = video_track.rtc_source() { + let source_codec: options::VideoCodec = encoded_source.codec().into(); + if options.video_codec != source_codec { + log::warn!( + "publish_track: overriding video_codec {:?} -> {:?} to match encoded source", + options.video_codec, + source_codec + ); + options.video_codec = source_codec; + } + if options.simulcast { + log::warn!( + "publish_track: disabling simulcast for encoded video source (single-layer only)" + ); + options.simulcast = false; + } + } + } + options + }; let disable_red = self.local.encryption_type != EncryptionType::None || !options.red; let mut req = proto::AddTrackRequest { diff --git a/livekit/src/room/track/local_video_track.rs b/livekit/src/room/track/local_video_track.rs index 0da8f683c..f71ec139a 100644 --- a/livekit/src/room/track/local_video_track.rs +++ b/livekit/src/room/track/local_video_track.rs @@ -61,6 +61,14 @@ impl LocalVideoTrack { .pc_factory() .create_video_track(&libwebrtc::native::create_random_uuid(), native_source) } + #[cfg(not(target_arch = "wasm32"))] + RtcVideoSource::Encoded(encoded_source) => { + use libwebrtc::peer_connection_factory::native::PeerConnectionFactoryExt; + LkRuntime::instance().pc_factory().create_video_track_from_encoded_source( + &libwebrtc::native::create_random_uuid(), + encoded_source, + ) + } _ => panic!("unsupported video source"), }; From ae681d8e134e439670de79b7a53445adbbcdfeb1 Mon Sep 17 00:00:00 2001 From: Stephen DeRosa Date: Tue, 21 Apr 2026 17:58:53 -0600 Subject: [PATCH 04/15] livekit-ffi: encoded video source API Protobuf: * NewVideoSourceRequest.encoded_options + VideoSourceType.Encoded * CaptureEncodedVideoFrame request/response * EncodedVideoSourceEvent (keyframe requested, target bitrate) * VideoSourceInfo.encoded_source_id Server wires the new variant through FfiVideoSource, forwards observer callbacks to FfiEvent, and rejects capture_frame on encoded sources. --- .../livekit/passthrough_video_encoder.h | 9 +++++- webrtc-sys/src/passthrough_video_encoder.cpp | 32 +++++++------------ 2 files changed, 19 insertions(+), 22 deletions(-) diff --git a/webrtc-sys/include/livekit/passthrough_video_encoder.h b/webrtc-sys/include/livekit/passthrough_video_encoder.h index 6162b0d56..882040cf0 100644 --- a/webrtc-sys/include/livekit/passthrough_video_encoder.h +++ b/webrtc-sys/include/livekit/passthrough_video_encoder.h @@ -39,7 +39,13 @@ namespace livekit_ffi { // re-encoding. class PassthroughVideoEncoder : public webrtc::VideoEncoder { public: - explicit PassthroughVideoEncoder(EncodedVideoCodecType codec); + // The encoder holds a strong ref to the source so that: + // * Encode() can pop frames / notify keyframe requests without a registry + // lookup (bound 1:1 at construction) + // * SetRates() can forward congestion-controller target bitrate updates + // to the Rust producer immediately. + explicit PassthroughVideoEncoder( + webrtc::scoped_refptr source); ~PassthroughVideoEncoder() override; // webrtc::VideoEncoder @@ -55,6 +61,7 @@ class PassthroughVideoEncoder : public webrtc::VideoEncoder { EncoderInfo GetEncoderInfo() const override; private: + const webrtc::scoped_refptr source_; const EncodedVideoCodecType codec_; webrtc::EncodedImageCallback* callback_ = nullptr; webrtc::VideoCodec codec_settings_{}; diff --git a/webrtc-sys/src/passthrough_video_encoder.cpp b/webrtc-sys/src/passthrough_video_encoder.cpp index 5c087f779..279f30d0a 100644 --- a/webrtc-sys/src/passthrough_video_encoder.cpp +++ b/webrtc-sys/src/passthrough_video_encoder.cpp @@ -60,8 +60,12 @@ bool FrameTypesRequestKeyframe( // ---------- PassthroughVideoEncoder ---------- -PassthroughVideoEncoder::PassthroughVideoEncoder(EncodedVideoCodecType codec) - : codec_(codec) {} +PassthroughVideoEncoder::PassthroughVideoEncoder( + webrtc::scoped_refptr source) + : source_(std::move(source)), + codec_(source_ ? source_->codec() : EncodedVideoCodecType::H264) { + RTC_DCHECK(source_); +} PassthroughVideoEncoder::~PassthroughVideoEncoder() = default; @@ -93,20 +97,8 @@ int32_t PassthroughVideoEncoder::Encode( return WEBRTC_VIDEO_CODEC_UNINITIALIZED; } - EncodedVideoTrackSource* src = - EncodedSourceRegistry::instance().lookup(frame.id()); - if (!src) { - // Should never happen: LazyVideoEncoder only constructs us when the - // registry lookup succeeded. If it does (e.g. source dropped mid-stream) - // skip the frame rather than error out so the pipeline stays healthy. - RTC_LOG(LS_WARNING) - << "PassthroughVideoEncoder received frame for unknown source id=" - << frame.id(); - return WEBRTC_VIDEO_CODEC_OK; - } - if (FrameTypesRequestKeyframe(frame_types)) { - src->get()->notify_keyframe_requested(); + source_->notify_keyframe_requested(); } EncodedVideoTrackSource::InternalSource::DequeuedFrame enc; @@ -147,11 +139,9 @@ int32_t PassthroughVideoEncoder::Encode( } void PassthroughVideoEncoder::SetRates(const RateControlParameters& parameters) { - // The encoder instance doesn't know which source fed it (we only learn on - // Encode()). Propagate via the registry on the first Encode() if needed, - // but for now just log — rate control only matters to the producer for - // adaptive streams and we'll wire it in a follow-up. - (void)parameters; + const uint32_t target_bps = parameters.target_bitrate.get_sum_bps(); + const double framerate = parameters.framerate_fps; + source_->notify_target_bitrate(target_bps, framerate); } webrtc::VideoEncoder::EncoderInfo PassthroughVideoEncoder::GetEncoderInfo() @@ -219,7 +209,7 @@ bool LazyVideoEncoder::BuildInner(uint16_t frame_id) { EncodedSourceRegistry::instance().lookup(frame_id); if (src != nullptr) { - inner_ = std::make_unique(src->codec()); + inner_ = std::make_unique(src->get()); is_passthrough_ = true; RTC_LOG(LS_INFO) << "LazyVideoEncoder: using PassthroughVideoEncoder for source id=" From f3253a6a59cbe54bf40d698636d037ea3e56fc60 Mon Sep 17 00:00:00 2001 From: Stephen DeRosa Date: Tue, 21 Apr 2026 19:55:30 -0600 Subject: [PATCH 05/15] webrtc-sys: cache and auto-prepend H.264/H.265 parameter sets Encoded track source now scans incoming frames for SPS/PPS (H.264) or VPS/SPS/PPS (H.265), caches the latest seen set, and prepends them to any keyframe that arrives without inline params. This makes hardware encoders and camera feeds that only emit parameter sets on stream start usable as-is, without requiring producers to replicate them on every IDR. Producers still get a clear warning if the very first keyframe has no parameter sets and the cache is empty. The caller-supplied has_sps_pps flag becomes a hint only; the scanner is the source of truth so double-prepending is impossible. Also fix a stale `src->get()` reference left over from the SetRates refactor in PassthroughVideoEncoder::Encode. --- .../include/livekit/encoded_video_source.h | 9 + webrtc-sys/src/encoded_video_source.cpp | 162 ++++++++++++++++++ webrtc-sys/src/passthrough_video_encoder.cpp | 2 +- 3 files changed, 172 insertions(+), 1 deletion(-) diff --git a/webrtc-sys/include/livekit/encoded_video_source.h b/webrtc-sys/include/livekit/encoded_video_source.h index b74b10a50..c1c8dc664 100644 --- a/webrtc-sys/include/livekit/encoded_video_source.h +++ b/webrtc-sys/include/livekit/encoded_video_source.h @@ -125,6 +125,15 @@ class EncodedVideoTrackSource { uint32_t height_; std::unique_ptr> observer_; + // Cached H.264/H.265 parameter sets, each with a leading 4-byte Annex-B + // start code. Populated by scanning incoming keyframes. Prepended to + // later keyframes that arrive without inline parameter sets. + // + // For H.264: vps is unused. For H.265: all three are typically present. + std::vector cached_vps_; + std::vector cached_sps_; + std::vector cached_pps_; + static constexpr size_t kMaxQueueSize = 8; }; diff --git a/webrtc-sys/src/encoded_video_source.cpp b/webrtc-sys/src/encoded_video_source.cpp index dff600300..e35fbf85d 100644 --- a/webrtc-sys/src/encoded_video_source.cpp +++ b/webrtc-sys/src/encoded_video_source.cpp @@ -17,6 +17,7 @@ #include "livekit/encoded_video_source.h" #include +#include #include #include "api/video/i420_buffer.h" @@ -28,6 +29,88 @@ namespace livekit_ffi { +namespace { + +// ---- Annex-B NAL unit parsing ---- +// +// Produces a list of NAL units in the bytestream. Each NalUnit records the +// offset to its leading start code (00 00 01 or 00 00 00 01) and the +// payload offset/length (the bytes after the start code, up to the next +// start code or end of buffer). + +struct NalUnit { + size_t start_code_offset; // index of the first 0x00 of the start code + size_t start_code_length; // 3 or 4 + size_t payload_offset; // index of the first byte after the start code + size_t payload_length; // length of the NAL unit payload (no start code) + uint8_t first_byte; // payload[0] — used for NAL type extraction +}; + +std::vector ScanNalUnits(const uint8_t* data, size_t size) { + std::vector units; + if (size < 3) return units; + + // Locate start code candidates: positions where data[i..i+2] == 00 00 01. + // Track them in order; then materialize units with proper payload lengths. + std::vector> starts; // (offset, length) + for (size_t i = 0; i + 2 < size;) { + if (data[i] == 0 && data[i + 1] == 0 && data[i + 2] == 1) { + size_t off = i; + size_t len = 3; + if (i > 0 && data[i - 1] == 0) { + off = i - 1; + len = 4; + } + starts.emplace_back(off, len); + i += 3; + } else { + ++i; + } + } + + for (size_t j = 0; j < starts.size(); ++j) { + NalUnit u; + u.start_code_offset = starts[j].first; + u.start_code_length = starts[j].second; + u.payload_offset = u.start_code_offset + u.start_code_length; + size_t payload_end = + (j + 1 < starts.size()) ? starts[j + 1].first : size; + if (payload_end < u.payload_offset) continue; + u.payload_length = payload_end - u.payload_offset; + u.first_byte = u.payload_length > 0 ? data[u.payload_offset] : 0; + units.push_back(u); + } + return units; +} + +// H.264 NAL unit types we care about. +enum : uint8_t { + kH264NalSps = 7, + kH264NalPps = 8, +}; + +// H.265 NAL unit types we care about. +enum : uint8_t { + kH265NalVps = 32, + kH265NalSps = 33, + kH265NalPps = 34, +}; + +uint8_t H264NalType(uint8_t byte) { return byte & 0x1Fu; } +uint8_t H265NalType(uint8_t byte) { return (byte >> 1) & 0x3Fu; } + +// Copies [start_code_offset, payload_end) into `out`, including the start +// code. `out` is overwritten. +void CopyNalWithStartCode(const uint8_t* data, + const NalUnit& u, + std::vector& out) { + const size_t total = u.start_code_length + u.payload_length; + out.assign(data + u.start_code_offset, + data + u.start_code_offset + total); +} + +} // namespace + // ---------- EncodedSourceRegistry ---------- EncodedSourceRegistry& EncodedSourceRegistry::instance() { @@ -103,6 +186,85 @@ bool EncodedVideoTrackSource::InternalSource::push_encoded_frame( height_ = height; } + // For H.264 / H.265, cache parameter sets we see in the bytestream and + // auto-prepend them to keyframes that arrive without inline params. + // Delta frames are passed through unchanged — receivers carry the last + // seen parameter sets across the stream. + const bool param_sets_applicable = + (codec_ == EncodedVideoCodecType::H264 || + codec_ == EncodedVideoCodecType::H265); + + if (param_sets_applicable) { + const auto units = ScanNalUnits(data.data(), data.size()); + bool saw_sps = false; + bool saw_pps = false; + bool saw_vps = false; + for (const auto& u : units) { + if (codec_ == EncodedVideoCodecType::H264) { + const uint8_t t = H264NalType(u.first_byte); + if (t == kH264NalSps) { + CopyNalWithStartCode(data.data(), u, cached_sps_); + saw_sps = true; + } else if (t == kH264NalPps) { + CopyNalWithStartCode(data.data(), u, cached_pps_); + saw_pps = true; + } + } else { // H.265 + const uint8_t t = H265NalType(u.first_byte); + if (t == kH265NalVps) { + CopyNalWithStartCode(data.data(), u, cached_vps_); + saw_vps = true; + } else if (t == kH265NalSps) { + CopyNalWithStartCode(data.data(), u, cached_sps_); + saw_sps = true; + } else if (t == kH265NalPps) { + CopyNalWithStartCode(data.data(), u, cached_pps_); + saw_pps = true; + } + } + } + + if (is_keyframe) { + // Required params for this codec. + const bool h265 = codec_ == EncodedVideoCodecType::H265; + const bool have_required = + !cached_sps_.empty() && !cached_pps_.empty() && + (!h265 || !cached_vps_.empty()); + const bool frame_missing = + !(saw_sps && saw_pps && (!h265 || saw_vps)); + + if (frame_missing && have_required) { + // Prepend cached params. (void)has_sps_pps — we trust the + // scanner over the flag so callers can't accidentally double- + // prepend or lie about the contents. + std::vector prefixed; + prefixed.reserve(cached_vps_.size() + cached_sps_.size() + + cached_pps_.size() + data.size()); + if (h265) { + prefixed.insert(prefixed.end(), cached_vps_.begin(), + cached_vps_.end()); + } + prefixed.insert(prefixed.end(), cached_sps_.begin(), + cached_sps_.end()); + prefixed.insert(prefixed.end(), cached_pps_.begin(), + cached_pps_.end()); + prefixed.insert(prefixed.end(), data.begin(), data.end()); + data = std::move(prefixed); + has_sps_pps = true; + } else if (frame_missing) { + RTC_LOG(LS_WARNING) + << "EncodedVideoTrackSource[" << source_id_ + << "] keyframe is missing parameter sets and none are cached; " + "receiver will fail to decode until the producer emits a " + "keyframe with inline SPS/PPS" + << (h265 ? "/VPS" : ""); + } else { + // Frame already carries required params (producer inlined them). + has_sps_pps = true; + } + } + } + // Bounded queue: drop-oldest, but never drop a keyframe. while (queue_.size() >= kMaxQueueSize) { if (queue_.front().is_keyframe && !is_keyframe) { diff --git a/webrtc-sys/src/passthrough_video_encoder.cpp b/webrtc-sys/src/passthrough_video_encoder.cpp index 279f30d0a..6287683c8 100644 --- a/webrtc-sys/src/passthrough_video_encoder.cpp +++ b/webrtc-sys/src/passthrough_video_encoder.cpp @@ -102,7 +102,7 @@ int32_t PassthroughVideoEncoder::Encode( } EncodedVideoTrackSource::InternalSource::DequeuedFrame enc; - if (!src->get()->pop_encoded_frame(enc)) { + if (!source_->pop_encoded_frame(enc)) { // No bytes queued for this tick; treat as a dropped frame so WebRTC's // pacing accounting is correct. callback_->OnDroppedFrame( From 5c9fe8d79580ce0f08cc6f52ffc98d449d8bec8c Mon Sep 17 00:00:00 2001 From: Stephen DeRosa Date: Tue, 21 Apr 2026 23:19:06 -0600 Subject: [PATCH 06/15] examples: H.264, H.265, VP8, AV1. VP9 not supported yet --- Cargo.lock | 15 + Cargo.toml | 1 + examples/pre_encoded_ingest/Cargo.toml | 24 + examples/pre_encoded_ingest/README.md | 601 ++++++++++++++ examples/pre_encoded_ingest/src/receiver.rs | 414 ++++++++++ examples/pre_encoded_ingest/src/sender.rs | 784 +++++++++++++++++++ webrtc-sys/src/passthrough_video_encoder.cpp | 25 +- 7 files changed, 1853 insertions(+), 11 deletions(-) create mode 100644 examples/pre_encoded_ingest/Cargo.toml create mode 100644 examples/pre_encoded_ingest/README.md create mode 100644 examples/pre_encoded_ingest/src/receiver.rs create mode 100644 examples/pre_encoded_ingest/src/sender.rs diff --git a/Cargo.lock b/Cargo.lock index b26b7b137..10770b9aa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5836,6 +5836,21 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "pre_encoded_ingest" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "env_logger 0.11.10", + "futures", + "libwebrtc", + "livekit", + "livekit-api", + "log", + "tokio", +] + [[package]] name = "presser" version = "0.3.1" diff --git a/Cargo.toml b/Cargo.toml index 82c214183..6ce249acd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,6 +28,7 @@ members = [ "examples/local_video", "examples/mobile", "examples/play_from_disk", + "examples/pre_encoded_ingest", "examples/rpc", "examples/save_to_disk", "examples/screensharing", diff --git a/examples/pre_encoded_ingest/Cargo.toml b/examples/pre_encoded_ingest/Cargo.toml new file mode 100644 index 000000000..6bbba5633 --- /dev/null +++ b/examples/pre_encoded_ingest/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "pre_encoded_ingest" +version = "0.1.0" +edition.workspace = true +publish = false + +[[bin]] +name = "sender" +path = "src/sender.rs" + +[[bin]] +name = "receiver" +path = "src/receiver.rs" + +[dependencies] +anyhow = { workspace = true } +clap = { workspace = true, features = ["derive"] } +env_logger = { workspace = true } +futures = { workspace = true } +libwebrtc = { workspace = true } +livekit = { workspace = true, features = ["rustls-tls-native-roots"] } +livekit-api = { workspace = true, features = ["rustls-tls-native-roots"] } +log = { workspace = true } +tokio = { workspace = true, features = ["full"] } diff --git a/examples/pre_encoded_ingest/README.md b/examples/pre_encoded_ingest/README.md new file mode 100644 index 000000000..1c05e292e --- /dev/null +++ b/examples/pre_encoded_ingest/README.md @@ -0,0 +1,601 @@ +# pre_encoded_ingest + +End-to-end demo of the **pre-encoded video ingest** feature of the Rust +SDK. Pre-encoded H.264, H.265, VP8, or AV1 frames flow from a gstreamer +camera pipeline directly into `NativeEncodedVideoSource::capture_frame`, +get packetized by WebRTC (no software re-encode), and arrive at a +remote peer which writes decoded frames to a TCP port for a second +gstreamer pipeline to render. + +```text +┌────────────┐ encoded (TCP) ┌─────────────┐ RTP (WebRTC) ┌────────────┐ I420 (TCP) ┌─────────────┐ +│ gstreamer │ ───────────► │ sender.rs │ ────────────────► │ receiver.rs│ ─────────────► │ gstreamer │ +│ (camera) │ :5005 │ (pre-encoded│ │ (decoded │ :5006 │ (display) │ +│ tcpserver │ │ publish, │ │ output) │ │ │ +│ │ │ tcp client)│ │ │ │ │ +└────────────┘ └─────────────┘ └────────────┘ └─────────────┘ +``` + +Gstreamer produces the encoded bytestream as a TCP server on :5005; the +Rust sender connects as a client and demuxes it into individual +frames. The sender supports two wire framings, picked by `--codec`: + +- **H.264 / H.265** — raw Annex-B; the sender splits on AUD NAL + boundaries. +- **VP8 / AV1** — IVF container (gstreamer's `ivfmux` or + `avmux_ivf`); the sender parses the 32-byte file header (when + present) and each 12-byte per-frame header. For AV1, each IVF + record is one Temporal Unit (TU) — a complete OBU sequence for + one frame. + +## What this exercises + +- `libwebrtc::video_source::NativeEncodedVideoSource` — the + pre-encoded video track source, for `VideoCodec::H264`, + `VideoCodec::H265`, `VideoCodec::Vp8`, and `VideoCodec::Av1`. +- Annex-B bytestream ingest (H.264/H.265), with automatic + parameter-set caching and keyframe prepending done by the source + (SPS/PPS for H.264, VPS/SPS/PPS for H.265) so the producer does not + need to inline parameter sets on every IDR. +- IVF-framed ingest (VP8 / AV1) — no NAL parameter sets, one + compressed frame per IVF record. Keyframe flag comes from bit 0 of + the VP8 frame tag (RFC 6386) for VP8, or the presence of an + `OBU_SEQUENCE_HEADER` (type 1) in the Temporal Unit for AV1 (AV1 + spec §5.3.2). +- `EncodedVideoSourceObserver` — keyframe-request and target-bitrate + callbacks from the WebRTC pipeline. +- `LocalParticipant::publish_track` normalization for encoded sources + (forces `simulcast=false` and remaps `video_codec` to match the + source codec). + +## Prerequisites + +- gstreamer 1.22+ with the `good`, `bad`, `ugly`, and `libav` plugin + sets: + - macOS: `brew install gstreamer gst-plugins-base gst-plugins-good + gst-plugins-bad gst-plugins-ugly gst-libav` + - Debian/Ubuntu: `sudo apt install gstreamer1.0-tools + gstreamer1.0-plugins-{base,good,bad,ugly} gstreamer1.0-libav` +- A LiveKit server (use `livekit-server --dev` locally or point at a + cloud deployment). + +# Validating Camera + +**Before bringing LiveKit into the picture**, confirm your camera +encode path and a basic H.264 decode preview work in pure GStreamer. +The **send** and **receive** commands below use the **same UDP port +(5005)** on purpose: `udpsink` sends RTP to `127.0.0.1:5005` and `udpsrc` +binds `port=5005` for a quick local check. + +That is only for this camera-validation hop. In the [full LiveKit +demo](#running-the-livekit-demo) below, **port 5005** is reserved for +**TCP** from the camera pipeline into `sender` (Annex-B bytestream), +and **port 5006** is where `receiver` serves **decoded I420** to a +separate GStreamer visualizer — different protocol, different payload, +and no overlap with this UDP/RTP smoke test. + +### Send — camera → RTP/UDP 5005 + +macOS (`avfvideosrc`). Linux: replace the source with `v4l2src +device=/dev/video0`. Windows: `mfvideosrc device-index=0`. If the +camera cannot produce 640×480 natively, add `videoscale ! videorate !` +before `x264enc` and relax the first caps filter as needed. + +```bash +gst-launch-1.0 -v \ + avfvideosrc ! \ + video/x-raw,width=640,height=480,framerate=30/1 ! \ + videoconvert ! \ + x264enc tune=zerolatency bitrate=1000 speed-preset=ultrafast key-int-max=30 ! \ + video/x-h264,profile=baseline ! \ + rtph264pay pt=96 config-interval=1 ! \ + udpsink host=127.0.0.1 port=5005 +``` + +### Receive — RTP/UDP 5005 → display + +```bash +gst-launch-1.0 -v \ + udpsrc port=5005 caps="application/x-rtp,media=video,encoding-name=H264,payload=96" ! \ + rtph264depay ! \ + avdec_h264 ! \ + videoconvert ! \ + autovideosink +``` + +On macOS, if `autovideosink` hangs at `PREROLLING` (common with +`glimagesink` under `gst-launch`), replace it with `osxvideosink`. + +This path validates camera, encoder, and decoder. It is **not** the +same wire format as the Rust sender: the demo ingest uses **TCP** and +**Annex-B** with **AUD-delimited** access units (see the pipeline in +[Running the LiveKit demo](#running-the-livekit-demo)). For that path +you still want `x264enc … aud=true`, `h264parse`, and `tcpserversink` as +documented there. + +### Debugging a blank / green receive window + +Before blaming the network, collapse encode → decode into a single +local pipeline. A green square here means the encoder is being fed +buffers it cannot consume (wrong pixel format, GL memory, or no frames +at all): + +```bash +gst-launch-1.0 -v \ + avfvideosrc device-index=0 ! \ + video/x-raw,width=640,height=480,format=NV12,framerate=30/1 ! \ + videoconvert ! \ + x264enc tune=zerolatency speed-preset=ultrafast bitrate=1000 key-int-max=60 aud=true ! \ + h264parse config-interval=1 ! avdec_h264 ! videoconvert ! autovideosink sync=false +``` + +Common causes of a green (or all-black) preview: + +- **macOS camera permission.** Grant your terminal app Camera access + in *System Settings → Privacy & Security → Camera* and relaunch it. + Without permission, AVFoundation hands back solid green frames + rather than failing. +- **`memory:GLMemory` on the source pad.** `avfvideosrc` often + advertises GL-texture caps first; `x264enc` cannot consume them. + Pinning `format=NV12` (or any other plain `video/x-raw` format) on + the first caps filter forces a CPU buffer. +- **Caps pinned to a mode the camera cannot produce.** Run + `gst-device-monitor-1.0 Video/Source` and pick a + `width`/`height`/`format`/`framerate` combo listed under + `video/x-raw` (not `video/x-raw(memory:GLMemory)`). + +### Why TCP for the Rust ingest path (and not raw H.264 over UDP)? + +The camera validation above uses **RTP** over UDP on localhost, where +packets stay small enough to avoid typical OS UDP limits. + +For **raw Annex-B H.264** pushed with `udpsink`, macOS in particular has +a low default `net.inet.udp.maxdgram` (~9 KB), which large keyframes +can exceed. Symptoms look like: + +``` +Error sending message: Message too long +``` + +and broken or blocky video when the kernel drops datagrams. The demo +therefore uses **TCP** from GStreamer into `sender`, which has no such +per-write datagram cap. + +## Running the LiveKit demo + +### 0. Environment + +```bash +export LIVEKIT_URL=ws://localhost:7880 +export LIVEKIT_API_KEY=devkey +export LIVEKIT_API_SECRET=secret +``` + +Both `sender` and `receiver` use `env_logger`, so they are silent +unless `RUST_LOG` is set. The step 2/3 invocations below already +prefix `RUST_LOG=info`; lower it to `warn` once the demo is running +clean, or raise it to `RUST_LOG=info,libwebrtc=debug` to see the +underlying C++ WebRTC log sink. + +### 1. Start the gstreamer camera pipeline (Terminal 1) + +**Annex-B over TCP** into the Rust sender (not the UDP/RTP validation +pipelines). `tcpserversink` listens on **TCP** port **5005**; stop any +other **TCP** listener on that port if you have one. + +> **macOS — avoid TCP port 5000.** On macOS 12+ the *AirPlay Receiver* +> feature (managed by `ControlCenter`) binds `*:5000` by default. +> `tcpserversink host=0.0.0.0 port=5000` will log +> `Error binding to address 0.0.0.0:5000: Address already in use`, +> fall back to `current-port = 0`, and produce no data — while any +> client still "connects" to :5000 (it's talking to AirPlay, not to +> gstreamer). This demo uses **5005** to sidestep that. Either keep +> 5005, disable AirPlay Receiver in *System Settings → General → +> AirDrop & Handoff → AirPlay Receiver*, or pick another free port. +> Verify with `lsof -nP -iTCP:5005 -sTCP:LISTEN` — you should see +> `gst-launc`, not `ControlCe`. + +macOS: + +```bash +gst-launch-1.0 -v \ + avfvideosrc device-index=0 ! \ + video/x-raw,width=640,height=480,format=NV12,framerate=30/1 ! \ + videoconvert ! \ + x264enc tune=zerolatency speed-preset=ultrafast bitrate=1000 key-int-max=60 aud=true ! \ + h264parse config-interval=1 ! \ + video/x-h264,stream-format=byte-stream,alignment=au ! \ + tcpserversink host=0.0.0.0 port=5005 +``` + +Linux: replace `avfvideosrc device-index=0` with `v4l2src device=/dev/video0`. Windows: `mfvideosrc device-index=0`. + +Knobs that matter for `sender`: + +- **`aud=true`** — NAL-type-9 AUD at the start of every access unit; + the Rust sender splits the TCP byte stream on those boundaries. +- **`h264parse` … `stream-format=byte-stream,alignment=au`** — Annex-B + suitable for the ingest path. +- **`tcpserversink`** accepts one TCP client at a time. Another + process cannot listen on **TCP** :5005 at the same time. The RTP + validation pipelines in [Validating Camera](#validating-camera) use + **UDP** :5000, which is a different protocol *and* a different port, + so the two setups do not interfere. + +#### H.265 variant + +For H.265/HEVC, swap the encoder and parser. `x265enc`'s AUD output is +controlled via `option-string`, which is forwarded to libx265: + +```bash +gst-launch-1.0 -v \ + avfvideosrc device-index=0 ! \ + video/x-raw,width=640,height=480,format=NV12,framerate=30/1 ! \ + videoconvert ! \ + x265enc tune=zerolatency speed-preset=ultrafast bitrate=1000 key-int-max=60 \ + option-string="aud=1:repeat-headers=1" ! \ + h265parse config-interval=1 ! \ + video/x-h265,stream-format=byte-stream,alignment=au ! \ + tcpserversink host=0.0.0.0 port=5005 +``` + +- `aud=1` emits the HEVC AUD (NAL type 35) at every AU boundary; the + sender's splitter keys on those. +- `repeat-headers=1` makes libx265 inline VPS/SPS/PPS with every + keyframe — cheap insurance in case the parser doesn't. The SDK + source also caches and re-prepends parameter sets on its own, so + either producer behaviour works. + +You must pass `--codec h265` to `sender` as well (see step 2) so the +AU splitter uses the HEVC NAL-type layout. Mixing an H.265 pipeline +with a `--codec h264` sender will look like "no AUs ever flow" — +the 5-bit H.264 NAL-type mask won't find AUD=9 in an HEVC stream. + +HEVC caveat: the **other peer** (receiver, SFU, JS client, etc.) must +actually be able to decode H.265. If the SDP answer strips the `H265` +payload type, nothing will be published even though `sender` logs look +healthy. Point-to-point between two instances of this demo on macOS +works because `RTCDefaultVideoDecoderFactory` exposes VideoToolbox +HEVC; your SFU's behaviour may differ. + +#### VP8 variant + +VP8 has no start codes, no NAL units, and no parameter sets, so we +need external framing. The sender consumes the **IVF** container +produced by gstreamer. Use `avmux_ivf` (from `gst-libav`) — it's the +most portable option and ships in Homebrew's consolidated `gstreamer` +formula: + +```bash +gst-launch-1.0 -v \ + avfvideosrc device-index=0 ! \ + video/x-raw,width=640,height=480,format=NV12,framerate=30/1 ! \ + videoconvert ! \ + vp8enc deadline=1 cpu-used=5 threads=4 \ + target-bitrate=1000000 keyframe-max-dist=60 end-usage=cbr ! \ + avmux_ivf ! \ + tcpserversink host=0.0.0.0 port=5005 +``` + +If your install has the native `ivfmux` element (gst-plugins-bad, +relatively recent versions), it's a drop-in replacement — the +Rust-side IVF parser only cares about the on-wire bytes, which are +identical. Check with `gst-inspect-1.0 ivfmux` / `gst-inspect-1.0 +avmux_ivf`; `WARNING: erroneous pipeline: no element "ivfmux"` means +you have to use `avmux_ivf` (or reinstall gstreamer to pick up the +native muxer). + +- The muxer emits a 32-byte file header once, followed by a 12-byte + per-frame header + payload. The sender parses exactly that shape. +- `target-bitrate` is in **bps** (unlike `x264enc`/`x265enc` which use + kbps). The example above is 1 Mbps. +- `keyframe-max-dist=60` matches the 60-frame IDR interval used by the + H.26x pipelines, so time-to-first-frame behaves the same. +- `deadline=1` is realtime mode; `cpu-used=5` is the fastest preset. + +Keep `--codec vp8` on the sender (step 2). VP8 is the baseline +WebRTC codec, so SFU/peer compatibility is not a concern. + +> The `DKIF` file header is optional on the wire. The native +> `ivfmux` element emits it; `avmux_ivf` (libav-backed) swallows it +> on a non-seekable sink like `tcpserversink` and emits only +> per-frame records. The sender handles both: it consumes `DKIF` if +> the first four bytes match, otherwise it starts parsing 12-byte +> per-frame records directly. Gstreamer's one-buffer-per-packet +> semantics keep every `tcpserversink` client frame-aligned, so +> start-order between sender and gstreamer does not matter for VP8. +> If the reader ever parses an absurd `frame_size`, it drops the +> TCP connection and reconnects to re-align on the next buffer. + +#### AV1 variant + +AV1 rides the same IVF wire format as VP8 (FOURCC `AV01`). The +sender treats each IVF record as a complete Temporal Unit (TU) — the +OBU sequence for one frame — and detects keyframes by scanning the +TU's OBUs for an `OBU_SEQUENCE_HEADER` (type 1), which libaom, +SVT-AV1, and rav1e only emit at keyframes. + +Use `av1enc` (libaom, in `gst-plugins-bad`). You also want `av1parse` +between the encoder and the muxer so OBUs land in the Low Overhead +Bitstream Format with size fields populated and one TU per buffer: + +```bash +gst-launch-1.0 -v \ + avfvideosrc device-index=0 ! \ + video/x-raw,width=640,height=480,format=NV12,framerate=30/1 ! \ + videoconvert ! \ + av1enc usage-profile=realtime end-usage=cbr cpu-used=9 \ + target-bitrate=1000 keyframe-max-dist=60 threads=4 ! \ + av1parse ! \ + video/x-av1,stream-format=obu-stream,alignment=tu ! \ + avmux_ivf ! \ + tcpserversink host=0.0.0.0 port=5005 +``` + +Pass `--codec av1` to the sender (step 2). Notes on the AV1 encoder: + +- **`av1enc target-bitrate` is in kbps** (libaom convention), unlike + `vp8enc` which uses bps. The example above is 1 Mbps. +- `usage-profile=realtime` + `end-usage=cbr` picks libaom's realtime + rate-control path; without it the default is high-latency good- + quality mode and frames arrive in bursts. +- `cpu-used` for libaom AV1 realtime is 0..=10 (higher = faster, + lower quality). 9 is a reasonable live-capture default on a + laptop-class CPU; drop to 7 if your CPU is idle and you want + better quality at the same bitrate. If `ingest: X fps accepted` + lags your capture framerate, bump `cpu-used` or raise `threads` + (libaom AV1 is CPU-hungry). +- `keyframe-max-dist=60` mirrors the other pipelines for identical + time-to-first-frame. +- `av1parse` normalises the bitstream to OBU-stream framing aligned + on Temporal Units, which is what `avmux_ivf` expects and what the + Rust sender's keyframe probe assumes. Leaving it out usually still + works but is encoder-dependent — keep it in the pipeline. + +Alternative encoders (same pipeline shape, only the encoder element +changes): + +- **SVT-AV1** (`svtav1enc`, `gst-plugins-bad`) — faster than libaom + at comparable quality; tuning knobs differ + (`preset=10 target-bitrate=1000 rate-control-mode=cbr`). +- **rav1e** (`rav1enc`, `gst-plugins-rs`) — pure-Rust AV1 encoder; + realtime-ish at low `speed-preset` values. + +Keep `--codec av1` on the sender regardless of which AV1 encoder you +pick — the Rust side only cares about the on-wire IVF/OBU bytes. + +> **AV1 peer compatibility.** Like H.265, the receiving peer must +> actually be able to decode AV1. All recent browsers ship a dav1d +> decoder and LiveKit's default C++ factory also enables dav1d via +> `RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY`, so macOS-to-macOS (two +> instances of this demo) and browser subscribers work out of the +> box. Older SFUs may strip the AV1 payload type from the SDP +> answer; `sender` will log happy ingest stats while the peer sees +> a black frame. + +The IVF-header-optional notes apply here too: native `ivfmux` emits a +`DKIF` header with FOURCC `AV01`; `avmux_ivf` on `tcpserversink` +does not. The sender handles both. + +### 2. Start the sender (Terminal 2) + +```bash +RUST_LOG=info cargo run -p pre_encoded_ingest --bin sender -- \ + --tcp-host 127.0.0.1 --tcp-port 5005 \ + --width 640 --height 480 \ + --codec h264 \ + --room pre-encoded-demo --identity encoded-sender +``` + +For the H.265 pipeline use `--codec h265`; for VP8 use `--codec vp8`; +for AV1 use `--codec av1`. + +Flags: + +- `--tcp-host/--tcp-port` where gstreamer's `tcpserversink` is + listening. +- `--width/--height` declared stream resolution; must match what + gstreamer is producing. +- `--codec {h264,h265,vp8,av1}` selects the wire framing and keyframe + probe: Annex-B (AUD-split) for H.264/H.265, or IVF for VP8/AV1. + **Must match the gstreamer pipeline.** `publish_track` will + additionally remap the track's `video_codec` to match the source, + so the LiveKit publish options follow automatically. + +The sender logs one line every ~2 s with ingest stats and will print +warnings when the receiver requests keyframes or when the congestion +controller updates the target bitrate. If the gstreamer pipeline is +restarted, the sender reconnects automatically. + +### 3. Start the receiver (Terminal 3) + +```bash +RUST_LOG=info cargo run -p pre_encoded_ingest --bin receiver -- \ + --tcp-port 5006 \ + --room pre-encoded-demo --identity encoded-receiver \ + --from encoded-sender +``` + +The receiver subscribes to the room and waits for a TCP client on the +given port. Each decoded I420 frame is written tightly packed +(Y ‖ U ‖ V, no row padding, no framing header) on the socket. + +### 4. Visualize (Terminal 4) + +```bash +gst-launch-1.0 -v \ + tcpclientsrc host=127.0.0.1 port=5006 ! \ + rawvideoparse width=640 height=480 format=i420 framerate=30/1 ! \ + videoconvert ! autovideosink sync=false +``` + +`rawvideoparse` needs the exact width/height the receiver is producing. +If the publisher is at 640x480, use `width=640 height=480` here. +Framerate just drives gstreamer's display pacing — the Rust side +writes frames as fast as WebRTC delivers them. + +> The receiver's TCP output is **raw I420**, not H.264. Do **not** +> pipe it through `h264parse` — you will see +> `h264parse: No valid frames found before end of stream` / +> `Broken bit stream` because the bytes are Y/U/V planes, not NAL +> units. Use `rawvideoparse` as shown above. + +If the publisher resolution changes mid-run, the receiver closes the +TCP connection; reconnect your gstreamer visualizer to pick up the +new caps. + +## Troubleshooting + +**Sender connects to the room but never logs ingest stats.** +Most often the Rust sender is connected to something that is not +gstreamer. Quick checks, in order: + +1. Confirm the gstreamer pipeline from step 1 is actually running and + logging `PLAYING`, not blocked on `Address already in use`. +2. Sniff the TCP stream directly — you should see NAL-unit bytes + flowing: + + ```bash + nc 127.0.0.1 5005 | pv -b > /dev/null + ``` + + If `pv` stays at `0 B`, the other end is not gstreamer (on macOS, + most commonly AirPlay Receiver on :5000; see the macOS callout in + step 1). +3. Confirm you picked the TCP Annex-B pipeline from step 1 and not the + UDP/RTP validation pipeline from [Validating Camera](#validating-camera) — + the latter won't feed `tcpserversink`. + +**gstreamer says `WARNING: erroneous pipeline: no element "ivfmux"`.** +Your gstreamer install doesn't bundle the native IVF muxer. Swap +`ivfmux` for `avmux_ivf` (from `gst-libav`), which produces an +identical IVF byte stream and is in Homebrew's consolidated +`gstreamer` formula. Confirm with `gst-inspect-1.0 avmux_ivf`. If +neither is present, `brew reinstall gstreamer` (or on Debian/Ubuntu, +`sudo apt install gstreamer1.0-libav gstreamer1.0-plugins-bad`) will +pull both in. + +**gstreamer reports `Error binding to address 0.0.0.0:5000`.** +Another process is listening on that port. On macOS that is usually +AirPlay Receiver; use port 5005 (as this README does) or disable +AirPlay Receiver. Check with: + +```bash +lsof -nP -iTCP:5000 -sTCP:LISTEN +``` + +**Visualizer shows `h264parse: No valid frames found` / `Broken bit +stream` / `No caps set`.** +The visualizer in step 4 is consuming the receiver's output +(port 5006), which is raw I420 — not H.264. Use `rawvideoparse` as +shown, not `h264parse`. `h264parse` belongs in step 1, on the +*sender* side. + +**Nothing logs at all from the Rust binaries.** +`sender`/`receiver` use `env_logger`; set `RUST_LOG=info` (as in the +commands above). Without it, both processes are silent even when they +are working correctly. + +**Sender connects to gstreamer, TCP bytes flow, but `ingest:` still +reads 0 fps accepted.** +Almost always a codec / framing mismatch between the gstreamer +pipeline and the sender: + +- **H.26x**: the demuxer looks for the AUD NAL type of whichever codec + you passed via `--codec` (9 for H.264, 35 for H.265), and the two + use different bit layouts for the NAL-type field. An H.265 stream + fed to `--codec h264` (or vice versa) will scan end-to-end without + ever recognising an AUD boundary, so no AU is ever pushed to + `capture_frame`. +- **VP8 / AV1**: the demuxer accepts IVF with or without the `DKIF` + file header (native `ivfmux` emits it; `avmux_ivf` on + `tcpserversink` doesn't). It assumes the first byte starts an IVF + per-frame record, which is what gstreamer's one-buffer-per-packet + delivery guarantees. If you see `IVF: implausible frame_size=N + bytes`, gstreamer produced a byte stream where the first byte of a + new client's delivery is mid-packet (very rare in practice). The + sender logs the warning, drops the TCP connection, and reconnects — + which usually re-anchors on the next buffer boundary. If it keeps + happening, your muxer is producing non-record-aligned buffers; swap + `avmux_ivf` for the native `ivfmux` if it's available. If you pass + a `--codec` that doesn't match the pipeline's FOURCC (e.g. + `--codec av1` on a VP8 stream), you'll get a one-shot warning from + the IVF reader but bytes will keep flowing — the FOURCC check is + advisory; what actually differs between the IVF-framed codecs is + the keyframe probe (RFC 6386 frame-tag bit for VP8, OBU sequence- + header scan for AV1). +- **AV1-specific**: if ingest accepts frames but the receiver never + decodes them, check that your pipeline includes + `av1parse ! video/x-av1,stream-format=obu-stream,alignment=tu` + before `avmux_ivf`. Some encoders emit OBUs without size fields + when fed directly to the muxer; the sender's keyframe probe can't + skip those reliably and will mark every frame as a delta, causing + the jitter buffer to wait forever for a keyframe. +- **Mixed**: `--codec vp8` pointed at an Annex-B H.264 pipeline (or + `--codec h264` at an IVF VP8 pipeline) will either trip the IVF + magic check or silently scan forever — re-check `--codec` matches + your pipeline. + +**H.265 track publishes, but the remote peer shows a black frame.** +The other peer cannot decode HEVC — check the SDP answer for an +`H265` payload type. LiveKit SFUs that support H.265 will forward; +ones that don't will either drop the subscription or fall through to +a fallback codec. Point two instances of this demo at the same room +on macOS to isolate whether the problem is the SDK or the SFU: +VideoToolbox HEVC is available in `RTCDefaultVideoDecoderFactory`, so +macOS-to-macOS should decode cleanly. + +## Known limitations + +### VP9 is not documented as a supported codec for this example + +`CodecArg::Vp9` still exists in `sender.rs` (and +`NativeEncodedVideoSource` accepts `VideoCodec::Vp9`), but VP9 ingest +is not exercised by this demo and has rough edges that make it a poor +fit for a "pre-encoded bytes straight to RTP" path: + +- libvpx-vp9 emits **superframes** in IVF (a per-frame record can + bundle several coded frames — e.g. a show_existing_frame reshow + plus a hidden alt-ref). WebRTC's VP9 RTP packetizer expects one + *coded* frame per input, so feeding a superframe as one + `capture_frame` call misreports keyframe-ness and confuses the + depacketizer on the peer. +- Keyframe detection from just the VP9 uncompressed header misses + show_existing_frame / alt-ref semantics that determine whether a + picture actually refreshes the reference buffers. +- SVC (spatial / temporal layering) — the main reason to pick VP9 + over VP8 — needs the VP9 RTP descriptor plumbed through the encoded + source, which this branch does not expose. + +For single-layer VP9 with patched-up superframe handling this could +be revisited, but today **use VP8 or AV1** for IVF-framed ingest. +`--codec vp9` is left in the CLI so existing scripts don't break; it +is intentionally undocumented here. + +### Receive-side encoded frames are not exposed + +The feature added in this branch covers the **send** side: the producer +hands encoded bytes in, WebRTC packetizes them out. On the **receive** +side the SDK currently only exposes decoded frames via +`NativeVideoStream`. That's why the receiver round-trips through +WebRTC's internal decoder and serves raw I420 to gstreamer, rather +than forwarding encoded H.264. + +Exposing encoded frames on receive would require a +`RemoteEncodedVideoStream` analogue (likely backed by a WebRTC +`FrameTransformer`) and is a natural follow-up. + +### AUD-delimited bytestreams only + +The sender relies on `x264enc aud=true` emitting a NAL-type-9 AUD at +the start of every AU so it can find frame boundaries over the TCP +byte stream. Producers that don't emit AUDs would need a richer +splitter (e.g. detecting "new primary coded picture" via the slice +header's `first_mb_in_slice`). + +### Keyframe intervals dominate startup latency + +WebRTC's jitter buffer drops delta frames until it sees a keyframe, so +time-to-first-frame on the receiver is bounded by the x264enc +`key-int-max`. Lower `key-int-max` for faster startup at the cost of +bitrate overhead. diff --git a/examples/pre_encoded_ingest/src/receiver.rs b/examples/pre_encoded_ingest/src/receiver.rs new file mode 100644 index 000000000..6b47e8299 --- /dev/null +++ b/examples/pre_encoded_ingest/src/receiver.rs @@ -0,0 +1,414 @@ +// Copyright 2026 LiveKit, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Pre-encoded H.264 ingest receiver. +//! +//! Subscribes to a LiveKit room and forwards the first incoming video track +//! as tightly-packed I420 frames over a TCP connection. A gstreamer +//! pipeline on the other end renders them. +//! +//! NOTE: the current SDK only exposes *decoded* frames on the receive side +//! (via `NativeVideoStream`). WebRTC's internal decoder runs in-process +//! before we hand the frame to the application. Encoded-frame receive is +//! a future enhancement — see README.md. + +use std::{ + env, + net::SocketAddr, + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, + }, + time::{Duration, Instant}, +}; + +use anyhow::{Context, Result}; +use clap::Parser; +use futures::StreamExt; +use livekit::{ + prelude::*, + webrtc::{prelude::VideoBuffer, video_stream::native::NativeVideoStream}, +}; +use livekit_api::access_token; +use log::{info, warn}; +use tokio::{ + io::AsyncWriteExt, + net::{TcpListener, TcpStream}, + sync::{mpsc, watch}, +}; + +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +struct Args { + /// LiveKit server URL (or set LIVEKIT_URL env var) + #[arg(long)] + url: Option, + + /// LiveKit API key (or set LIVEKIT_API_KEY env var) + #[arg(long)] + api_key: Option, + + /// LiveKit API secret (or set LIVEKIT_API_SECRET env var) + #[arg(long)] + api_secret: Option, + + /// Room name to join + #[arg(long, default_value = "pre-encoded-demo")] + room: String, + + /// Participant identity + #[arg(long, default_value = "encoded-receiver")] + identity: String, + + /// TCP port to serve tightly-packed I420 frames on + #[arg(long, default_value_t = 5001)] + tcp_port: u16, + + /// Only subscribe to the track from this participant identity + #[arg(long)] + from: Option, +} + +#[tokio::main] +async fn main() -> Result<()> { + env_logger::init(); + let args = Args::parse(); + + let (shutdown_tx, shutdown_rx) = watch::channel(false); + tokio::spawn({ + let shutdown_tx = shutdown_tx.clone(); + async move { + let _ = tokio::signal::ctrl_c().await; + let _ = shutdown_tx.send(true); + info!("Ctrl-C received, shutting down..."); + } + }); + + let url = args + .url + .or_else(|| env::var("LIVEKIT_URL").ok()) + .context("--url or LIVEKIT_URL required")?; + let api_key = args + .api_key + .or_else(|| env::var("LIVEKIT_API_KEY").ok()) + .context("--api-key or LIVEKIT_API_KEY required")?; + let api_secret = args + .api_secret + .or_else(|| env::var("LIVEKIT_API_SECRET").ok()) + .context("--api-secret or LIVEKIT_API_SECRET required")?; + + let token = access_token::AccessToken::with_api_key(&api_key, &api_secret) + .with_identity(&args.identity) + .with_name(&args.identity) + .with_grants(access_token::VideoGrants { + room_join: true, + room: args.room.clone(), + can_subscribe: true, + ..Default::default() + }) + .to_jwt()?; + + info!("Connecting to LiveKit room '{}' as '{}'...", args.room, args.identity); + let mut room_options = RoomOptions::default(); + room_options.auto_subscribe = true; + room_options.adaptive_stream = false; + let (room, mut events) = Room::connect(&url, &token, room_options).await?; + let room = Arc::new(room); + info!("Connected: {} (sid {})", room.name(), room.sid().await); + + // Boot the frame server. Accepts one client at a time; subsequent + // clients supersede the previous. + let bind: SocketAddr = format!("0.0.0.0:{}", args.tcp_port).parse().unwrap(); + let listener = TcpListener::bind(bind).await.with_context(|| format!("bind tcp {bind}"))?; + info!( + "Serving tightly-packed I420 frames on tcp/{}:{} — waiting for a client", + bind.ip(), + bind.port() + ); + + // Channel feeding raw I420 frames to the TCP writer task. Kept small + // so the most recent frame wins when the client stalls. + let (frame_tx, frame_rx) = mpsc::channel::(2); + + tokio::spawn(frame_server_task(listener, frame_rx, shutdown_rx.clone())); + + let mut active_sid: Option = None; + let frame_tx = Arc::new(frame_tx); + let mut shutdown_rx_main = shutdown_rx.clone(); + + loop { + tokio::select! { + biased; + r = shutdown_rx_main.changed() => { + r.ok(); + if *shutdown_rx_main.borrow() { + break; + } + } + event = events.recv() => { + let Some(event) = event else { break }; + match event { + RoomEvent::TrackSubscribed { track, publication, participant } => { + if let Some(ref from) = args.from { + if participant.identity().as_str() != from { + continue; + } + } + let RemoteTrack::Video(video) = track else { continue }; + if active_sid.is_some() { + info!( + "Ignoring extra video track {} (already have one active)", + publication.sid() + ); + continue; + } + let sid = publication.sid(); + active_sid = Some(sid.clone()); + info!( + "Subscribed to {} from '{}': codec={}, {}x{}", + sid, + participant.identity(), + publication.mime_type(), + publication.dimension().0, + publication.dimension().1, + ); + + let frame_tx = frame_tx.clone(); + let mut shutdown_rx_video = shutdown_rx.clone(); + tokio::spawn(async move { + let mut sink = NativeVideoStream::new(video.rtc_track()); + let mut frames: u64 = 0; + let mut last_log = Instant::now(); + loop { + tokio::select! { + biased; + r = shutdown_rx_video.changed() => { + r.ok(); + if *shutdown_rx_video.borrow() { + break; + } + } + frame = sink.next() => { + let Some(frame) = frame else { + break; + }; + let i420 = frame.buffer.to_i420(); + let w = i420.width(); + let h = i420.height(); + let (sy, su, sv) = i420.strides(); + let (dy, du, dv) = i420.data(); + let packet = pack_i420(w, h, sy, su, sv, dy, du, dv); + // Non-blocking try_send: drop if the writer is slow. + let _ = frame_tx.try_send(packet); + frames += 1; + if last_log.elapsed() >= Duration::from_secs(2) { + info!( + "recv: {}x{}, ~{:.1} fps", + w, + h, + frames as f64 / last_log.elapsed().as_secs_f64() + ); + frames = 0; + last_log = Instant::now(); + } + } + } + } + info!("frame sink ended"); + }); + } + RoomEvent::TrackUnsubscribed { publication, .. } + | RoomEvent::TrackUnpublished { publication, .. } => { + if active_sid.as_ref() == Some(&publication.sid()) { + info!("Track {} ended", publication.sid()); + active_sid = None; + } + } + _ => {} + } + } + } + } + + if let Err(e) = room.close().await { + warn!("room.close: {e}"); + } + drop(frame_tx); + + info!("Shutting down..."); + Ok(()) +} + +/// A tightly-packed I420 frame ready to be written on the wire. +struct I420Packet { + width: u32, + height: u32, + /// `width*height + 2*(width/2)*(height/2)` bytes (Y, U, V planes packed + /// contiguously with no row padding). + data: Vec, +} + +fn pack_i420( + width: u32, + height: u32, + stride_y: u32, + stride_u: u32, + stride_v: u32, + y: &[u8], + u: &[u8], + v: &[u8], +) -> I420Packet { + let uv_w = (width + 1) / 2; + let uv_h = (height + 1) / 2; + let y_size = (width * height) as usize; + let uv_size = (uv_w * uv_h) as usize; + let mut data = Vec::with_capacity(y_size + 2 * uv_size); + + for row in 0..height as usize { + let off = row * stride_y as usize; + data.extend_from_slice(&y[off..off + width as usize]); + } + for row in 0..uv_h as usize { + let off = row * stride_u as usize; + data.extend_from_slice(&u[off..off + uv_w as usize]); + } + for row in 0..uv_h as usize { + let off = row * stride_v as usize; + data.extend_from_slice(&v[off..off + uv_w as usize]); + } + + I420Packet { width, height, data } +} + +/// Accepts TCP clients and pumps frames from the channel into whichever +/// one is currently connected. Frames received while no client is +/// connected are dropped. +async fn frame_server_task( + listener: TcpListener, + mut frame_rx: mpsc::Receiver, + mut shutdown_rx: watch::Receiver, +) { + let frames_out = Arc::new(AtomicU64::new(0)); + let frames_dropped = Arc::new(AtomicU64::new(0)); + + { + let frames_out = frames_out.clone(); + let frames_dropped = frames_dropped.clone(); + let mut shutdown_rx_stats = shutdown_rx.clone(); + tokio::spawn(async move { + let mut last = Instant::now(); + loop { + tokio::select! { + biased; + r = shutdown_rx_stats.changed() => { + r.ok(); + if *shutdown_rx_stats.borrow() { + break; + } + } + _ = tokio::time::sleep(Duration::from_secs(2)) => { + let ok = frames_out.swap(0, Ordering::Relaxed); + let dropped = frames_dropped.swap(0, Ordering::Relaxed); + if ok > 0 || dropped > 0 { + info!( + "serve: {:.1} fps written, {:.1} fps dropped", + ok as f64 / last.elapsed().as_secs_f64(), + dropped as f64 / last.elapsed().as_secs_f64() + ); + } + last = Instant::now(); + } + } + } + }); + } + + loop { + tokio::select! { + biased; + r = shutdown_rx.changed() => { + r.ok(); + if *shutdown_rx.borrow() { + return; + } + } + accept = listener.accept() => { + let (client, peer) = match accept { + Ok(c) => c, + Err(e) => { + warn!("accept failed: {e}"); + continue; + } + }; + info!("client connected from {peer}"); + if let Err(e) = pump_to_client( + client, + &mut frame_rx, + &frames_out, + &frames_dropped, + shutdown_rx.clone(), + ) + .await + { + warn!("client disconnected: {e}"); + } + info!("client {peer} closed, waiting for the next one"); + } + } + } +} + +async fn pump_to_client( + mut client: TcpStream, + frame_rx: &mut mpsc::Receiver, + frames_out: &AtomicU64, + frames_dropped: &AtomicU64, + mut shutdown_rx: watch::Receiver, +) -> Result<()> { + let _ = client.set_nodelay(true); + let mut announced_dims = None; + loop { + tokio::select! { + biased; + r = shutdown_rx.changed() => { + r.ok(); + if *shutdown_rx.borrow() { + return Ok(()); + } + } + maybe_frame = frame_rx.recv() => { + let Some(frame) = maybe_frame else { + return Ok(()); + }; + if announced_dims.is_none() { + announced_dims = Some((frame.width, frame.height)); + info!("first frame to client: {}x{}", frame.width, frame.height); + } + if announced_dims != Some((frame.width, frame.height)) { + // Resolution change: restart the client to let gstreamer + // reconfigure its pipeline. rawvideoparse has fixed caps. + frames_dropped.fetch_add(1, Ordering::Relaxed); + return Err(anyhow::anyhow!( + "resolution changed from {:?} to {}x{}; dropping client", + announced_dims, + frame.width, + frame.height + )); + } + client.write_all(&frame.data).await?; + frames_out.fetch_add(1, Ordering::Relaxed); + } + } + } +} diff --git a/examples/pre_encoded_ingest/src/sender.rs b/examples/pre_encoded_ingest/src/sender.rs new file mode 100644 index 000000000..1087606ca --- /dev/null +++ b/examples/pre_encoded_ingest/src/sender.rs @@ -0,0 +1,784 @@ +// Copyright 2026 LiveKit, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Pre-encoded H.264 / H.265 / VP8 / VP9 / AV1 ingest sender. +//! +//! Connects to a gstreamer pipeline as a TCP client and pushes each +//! decoded access unit / frame straight through +//! `NativeEncodedVideoSource::capture_frame`. No software encoding +//! happens on the Rust side — the bytes on the wire are the bytes that +//! get packetized into RTP. +//! +//! Two framings are supported, picked by `--codec`: +//! +//! * **H.264 / H.265**: raw Annex-B bytestream. The sender splits on +//! AUD NAL boundaries (NAL type 9 for H.264, type 35 for H.265) and +//! delivers each access unit. +//! * **VP8 / VP9 / AV1**: IVF container (gstreamer's `ivfmux` or +//! `avmux_ivf`). The sender parses the 32-byte IVF file header once +//! (when present), then each 12-byte frame header + payload, and +//! delivers each raw VPx frame (for AV1, each IVF record is one +//! Temporal Unit — a complete OBU sequence for one frame). +//! +//! TCP is used instead of UDP because macOS caps per-datagram UDP +//! payloads well below 64 KB by default, which is easy to exceed with +//! keyframes. The matching gstreamer pipelines are documented in +//! README.md. + +use std::{ + env, + sync::{ + atomic::{AtomicBool, AtomicU64, Ordering}, + Arc, Mutex, + }, + time::{Duration, Instant}, +}; + +use anyhow::{Context, Result}; +use clap::Parser; +use libwebrtc::video_source::{EncodedFrameInfo, RtcVideoSource, VideoCodec, VideoResolution}; +use livekit::{ + options::{TrackPublishOptions, VideoCodec as LkVideoCodec}, + prelude::*, + webrtc::video_source::native::{EncodedVideoSourceObserver, NativeEncodedVideoSource}, +}; +use livekit_api::access_token; +use log::{info, warn}; +use tokio::{io::AsyncReadExt, net::TcpStream, time::sleep}; + +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +struct Args { + /// LiveKit server URL (or set LIVEKIT_URL env var) + #[arg(long)] + url: Option, + + /// LiveKit API key (or set LIVEKIT_API_KEY env var) + #[arg(long)] + api_key: Option, + + /// LiveKit API secret (or set LIVEKIT_API_SECRET env var) + #[arg(long)] + api_secret: Option, + + /// Room name to join + #[arg(long, default_value = "pre-encoded-demo")] + room: String, + + /// Participant identity + #[arg(long, default_value = "encoded-sender")] + identity: String, + + /// Host of the gstreamer `tcpserversink` producing the Annex-B bytestream + #[arg(long, default_value = "127.0.0.1")] + tcp_host: String, + + /// Port of the gstreamer `tcpserversink` producing the Annex-B bytestream + #[arg(long, default_value_t = 5000)] + tcp_port: u16, + + /// Declared stream width (px) + #[arg(long, default_value_t = 640)] + width: u32, + + /// Declared stream height (px) + #[arg(long, default_value_t = 480)] + height: u32, + + /// Pre-encoded codec on the wire. Must match the gstreamer pipeline. + #[arg(long, value_enum, default_value_t = CodecArg::H264)] + codec: CodecArg, +} + +/// Codec selector for the CLI. Drives both framing (Annex-B vs. IVF) +/// and keyframe detection. +#[derive(Debug, Copy, Clone, PartialEq, Eq, clap::ValueEnum)] +enum CodecArg { + H264, + H265, + Vp8, + Vp9, + Av1, +} + +impl CodecArg { + fn webrtc_codec(self) -> VideoCodec { + match self { + CodecArg::H264 => VideoCodec::H264, + CodecArg::H265 => VideoCodec::H265, + CodecArg::Vp8 => VideoCodec::Vp8, + CodecArg::Vp9 => VideoCodec::Vp9, + CodecArg::Av1 => VideoCodec::Av1, + } + } + + fn livekit_codec(self) -> LkVideoCodec { + match self { + CodecArg::H264 => LkVideoCodec::H264, + CodecArg::H265 => LkVideoCodec::H265, + CodecArg::Vp8 => LkVideoCodec::VP8, + CodecArg::Vp9 => LkVideoCodec::VP9, + CodecArg::Av1 => LkVideoCodec::AV1, + } + } + + /// NAL unit type from the first byte after a start code. + /// H.264: lower 5 bits. H.265: bits 1..7. + fn nal_type(self, first_byte: u8) -> u8 { + match self { + CodecArg::H264 => first_byte & 0x1F, + CodecArg::H265 => (first_byte >> 1) & 0x3F, + // VPx/AV1 have no NAL units; callers should not reach this. + CodecArg::Vp8 | CodecArg::Vp9 | CodecArg::Av1 => 0, + } + } + + /// Access-unit delimiter NAL type. 9 (AUD) for H.264, 35 (AUD_NUT) + /// for H.265. Undefined for IVF-framed codecs. + fn aud_nal_type(self) -> u8 { + match self { + CodecArg::H264 => 9, + CodecArg::H265 => 35, + CodecArg::Vp8 | CodecArg::Vp9 | CodecArg::Av1 => u8::MAX, + } + } + + /// Whether a given NAL type is a keyframe NAL. + /// H.264: IDR slice (5). H.265: any IRAP (BLA/IDR/CRA, 16..=23). + /// IVF-framed codecs use [`is_keyframe`] directly; this never runs. + fn is_keyframe_nal(self, nal_type: u8) -> bool { + match self { + CodecArg::H264 => nal_type == 5, + CodecArg::H265 => (16..=23).contains(&nal_type), + CodecArg::Vp8 | CodecArg::Vp9 | CodecArg::Av1 => false, + } + } + + fn name(self) -> &'static str { + match self { + CodecArg::H264 => "H.264", + CodecArg::H265 => "H.265", + CodecArg::Vp8 => "VP8", + CodecArg::Vp9 => "VP9", + CodecArg::Av1 => "AV1", + } + } + + /// IVF FOURCC expected on the wire. Only meaningful for codecs + /// delivered via `ivfmux` / `avmux_ivf`. + fn ivf_fourcc(self) -> Option<&'static [u8; 4]> { + match self { + CodecArg::Vp8 => Some(b"VP80"), + CodecArg::Vp9 => Some(b"VP90"), + CodecArg::Av1 => Some(b"AV01"), + _ => None, + } + } +} + +/// Simple observer that logs feedback from the encoder pipeline. Real +/// producers should react here — e.g. nudge their hardware encoder to +/// emit an IDR on `on_keyframe_requested`, or clamp bitrate on +/// `on_target_bitrate`. +struct LoggingObserver { + last_bitrate_log: Mutex>, +} + +impl LoggingObserver { + fn new() -> Self { + Self { last_bitrate_log: Mutex::new(None) } + } +} + +impl EncodedVideoSourceObserver for LoggingObserver { + fn on_keyframe_requested(&self) { + warn!( + "keyframe requested by receiver — producer should emit a keyframe on the next frame \ + (in this demo the next keyframe comes when the gstreamer encoder hits its \ + keyframe-interval knob, e.g. x264enc/x265enc key-int-max or vp8enc keyframe-max-dist)" + ); + } + + fn on_target_bitrate(&self, bitrate_bps: u32, framerate_fps: f64) { + // Rate-limit logging to 1 Hz. + let mut last = self.last_bitrate_log.lock().unwrap(); + let now = Instant::now(); + if last.is_none_or(|t| now.duration_since(t) >= Duration::from_secs(1)) { + *last = Some(now); + info!("target bitrate update: {} kbps @ {:.1} fps", bitrate_bps / 1000, framerate_fps); + } + } +} + +/// Higher-level demuxer: hides whether the wire is Annex-B or IVF. +enum Demuxer { + AnnexB(AuSplitter), + Ivf(IvfReader), +} + +impl Demuxer { + fn new(codec: CodecArg) -> Self { + match codec { + CodecArg::H264 | CodecArg::H265 => Demuxer::AnnexB(AuSplitter::new(codec)), + CodecArg::Vp8 | CodecArg::Vp9 | CodecArg::Av1 => Demuxer::Ivf(IvfReader::new(codec)), + } + } + + fn feed(&mut self, chunk: &[u8], out: &mut Vec>) { + match self { + Demuxer::AnnexB(s) => s.feed(chunk, out), + Demuxer::Ivf(r) => r.feed(chunk, out), + } + } + + /// True if the demuxer has detected a byte misalignment it can't + /// recover from without a fresh TCP connection. Only meaningful + /// for IVF today. + fn desynced(&self) -> bool { + match self { + Demuxer::AnnexB(_) => false, + Demuxer::Ivf(r) => r.desynced, + } + } +} + +/// Reads IVF-framed video off the wire and emits one compressed video +/// frame per call to `feed` per available frame. Format per libvpx: +/// +/// File header (32 bytes, optional): "DKIF", u16 version, u16 +/// header_len, 4-byte FOURCC, u16 width, u16 height, u32 tb_num, +/// u32 tb_den, u32 frame_count, u32 unused. +/// +/// Frame header (12 bytes each): u32 frame_size, u64 pts. +/// +/// Frame payload: `frame_size` bytes. All integers little-endian. +/// +/// The file header is *optional* in our parser: gstreamer's +/// `avmux_ivf` on a non-seekable `tcpserversink` emits only per-frame +/// records (libavformat writes `DKIF` at `write_header` time, but the +/// ffmpeg AVIO wrapper in gst-libav appears to swallow it when the +/// output is non-seekable). We still accept `ivfmux` (native +/// gst-plugins-bad element), which does emit `DKIF`, by parsing the +/// file header if it's the first 4 bytes. Either way, gstreamer's +/// one-buffer-per-packet semantics mean new `tcpserversink` clients +/// land on an IVF record boundary. +/// +/// If we ever parse a `frame_size` that exceeds [`MAX_FRAME_BYTES`], +/// we're byte-misaligned (should be rare in practice); the reader +/// flips `desynced=true`, which the main loop reads to force a TCP +/// reconnect and a fresh alignment from the next gstreamer buffer. +const MAX_FRAME_BYTES: usize = 8 * 1024 * 1024; + +struct IvfReader { + codec: CodecArg, + buf: Vec, + /// Set once we've either consumed a 32-byte DKIF header or + /// decided there isn't one. After this, `buf` is interpreted as + /// back-to-back 12-byte-header + payload records. + header_phase_done: bool, + /// True if a frame_size field was absurd; main loop should + /// disconnect and reconnect to re-align. + desynced: bool, +} + +impl IvfReader { + fn new(codec: CodecArg) -> Self { + Self { + codec, + buf: Vec::with_capacity(256 * 1024), + header_phase_done: false, + desynced: false, + } + } + + fn feed(&mut self, chunk: &[u8], out: &mut Vec>) { + self.buf.extend_from_slice(chunk); + + if !self.header_phase_done { + // Decide whether the stream starts with a DKIF file header. + // We need at least 4 bytes to check the magic, and 32 to + // consume the full header if present. + if self.buf.len() < 4 { + return; + } + if &self.buf[0..4] == b"DKIF" { + if self.buf.len() < 32 { + return; + } + let fourcc = &self.buf[8..12]; + if let Some(expected) = self.codec.ivf_fourcc() { + if fourcc != expected { + warn!( + "IVF: expected FOURCC {:?} for {}, got {:?}", + std::str::from_utf8(expected).unwrap_or("?"), + self.codec.name(), + std::str::from_utf8(fourcc).unwrap_or("?"), + ); + } + } + info!( + "IVF: file header OK (codec fourcc={})", + std::str::from_utf8(fourcc).unwrap_or("?") + ); + self.buf.drain(..32); + } else { + // No file header — typical for gstreamer's `avmux_ivf` + // on tcpserversink. Gstreamer buffer boundaries keep + // us frame-aligned, so treat byte 0 as the start of a + // per-frame record. + info!( + "IVF: no DKIF file header on this stream (typical for gstreamer \ + avmux_ivf on tcpserversink); parsing per-frame records directly" + ); + } + self.header_phase_done = true; + } + + // Emit as many whole frames as we have. + loop { + if self.buf.len() < 12 { + return; + } + let size = u32::from_le_bytes([self.buf[0], self.buf[1], self.buf[2], self.buf[3]]) + as usize; + if size == 0 || size > MAX_FRAME_BYTES { + warn!( + "IVF: implausible frame_size={size} bytes — byte stream is misaligned. \ + Dropping connection so the main loop can reconnect and re-anchor on the \ + next gstreamer buffer boundary." + ); + self.desynced = true; + self.buf.clear(); + return; + } + if self.buf.len() < 12 + size { + return; + } + let frame = self.buf[12..12 + size].to_vec(); + self.buf.drain(..12 + size); + out.push(frame); + } + } +} + +/// Splits an incoming Annex-B bytestream into access units on AUD +/// boundaries. The AUD NAL type and NAL-type extraction are codec +/// specific — pass the right `CodecArg`. +/// +/// Relies on the upstream parser emitting an AUD at the start of every +/// AU (`x264enc aud=true` for H.264, `x265enc option-string="aud=1"` +/// plumbed through `h265parse` for H.265). Bytes before the first AUD +/// are discarded; each subsequent AU is emitted when the *next* AU's +/// AUD arrives (so there's always one AU of buffering lag, bounded by +/// the frame interval). +struct AuSplitter { + codec: CodecArg, + buf: Vec, + /// Offset (into `buf`) of the start code of the AU currently being + /// accumulated. `None` before the first AUD has been observed. + au_start: Option, + /// Position up to which `buf` has already been scanned for start codes. + scan_pos: usize, +} + +impl AuSplitter { + fn new(codec: CodecArg) -> Self { + Self { codec, buf: Vec::with_capacity(256 * 1024), au_start: None, scan_pos: 0 } + } + + fn feed(&mut self, chunk: &[u8], out: &mut Vec>) { + self.buf.extend_from_slice(chunk); + + // Scan for start codes. We need 4 more bytes to decide (3-byte + // start code + 1 NAL header byte). A 4-byte start code is detected + // one byte earlier and handled naturally as "zero byte, then + // 3-byte start code" collapsing into a 4-byte pattern. + let aud = self.codec.aud_nal_type(); + while self.scan_pos + 3 < self.buf.len() { + let i = self.scan_pos; + let (sc_start, sc_len) = if i + 4 <= self.buf.len() + && self.buf[i] == 0 + && self.buf[i + 1] == 0 + && self.buf[i + 2] == 0 + && self.buf[i + 3] == 1 + { + // 4-byte start code at i. We still need the NAL header byte after it. + if i + 5 > self.buf.len() { + break; + } + (i, 4) + } else if self.buf[i] == 0 && self.buf[i + 1] == 0 && self.buf[i + 2] == 1 { + (i, 3) + } else { + self.scan_pos += 1; + continue; + }; + + let nal_off = sc_start + sc_len; + if self.codec.nal_type(self.buf[nal_off]) == aud { + // AUD — boundary between AUs. + if let Some(start) = self.au_start.take() { + out.push(self.buf[start..sc_start].to_vec()); + } + self.au_start = Some(sc_start); + } + self.scan_pos = nal_off + 1; + } + + // Compact: drop bytes before the current AU start (or before the + // last 3 bytes, in case a start code straddles the next feed). + let drain_before = self.au_start.unwrap_or_else(|| self.buf.len().saturating_sub(3)); + if drain_before > 0 { + self.buf.drain(..drain_before); + self.scan_pos = self.scan_pos.saturating_sub(drain_before); + if self.au_start.is_some() { + self.au_start = Some(0); + } + } + } +} + +/// Minimal keyframe probe. For H.264/H.265 it scans for a keyframe +/// NAL (IDR slice / IRAP); for VP8 it reads bit 0 of the frame tag +/// (RFC 6386 §9.1: 0 = keyframe, 1 = interframe); for VP9 it decodes +/// the leading bits of the uncompressed header (VP9 bitstream spec +/// §6.2); for AV1 it scans the OBUs in the Temporal Unit for an +/// OBU_SEQUENCE_HEADER (which libaom/SVT-AV1/rav1e only emit at +/// keyframes — this is the same heuristic WebRTC's own AV1 RTP +/// packetizer uses). +fn is_keyframe(codec: CodecArg, data: &[u8]) -> bool { + match codec { + CodecArg::H264 | CodecArg::H265 => is_keyframe_annex_b(codec, data), + CodecArg::Vp8 => !data.is_empty() && (data[0] & 0x01) == 0, + CodecArg::Vp9 => is_keyframe_vp9(data), + CodecArg::Av1 => is_keyframe_av1(data), + } +} + +/// AV1 keyframe probe. Walks the OBUs in a Temporal Unit and returns +/// true if any OBU has type `OBU_SEQUENCE_HEADER` (1). AV1 spec §5.3.2 +/// (OBU header) + §5.3.1 (leb128): +/// +/// * byte 0 bits 6..=3: `obu_type`. +/// * byte 0 bit 2: `obu_extension_flag`; if set, one extension byte +/// follows. +/// * byte 0 bit 1: `obu_has_size_field`; if set, a leb128-encoded +/// `obu_size` follows and gives the payload length. If clear, the +/// OBU runs to the end of the input (legacy AV1) — so we stop +/// scanning because we can't skip it. +/// +/// Assumes the Low Overhead Bitstream Format produced by gstreamer's +/// `av1parse stream-format=obu-stream,alignment=tu` + `avmux_ivf`: +/// one Temporal Unit per IVF record, each OBU carries its own size. +fn is_keyframe_av1(mut data: &[u8]) -> bool { + const OBU_SEQUENCE_HEADER: u8 = 1; + while !data.is_empty() { + let header = data[0]; + let obu_type = (header >> 3) & 0x0F; + let ext = (header & 0x04) != 0; + let has_size = (header & 0x02) != 0; + + let mut off = 1; + if ext { + if off >= data.len() { + return false; + } + off += 1; + } + if !has_size { + // No size field means we can't skip to the next OBU; treat + // this OBU as the last one and decide based on what we've + // seen so far. + return obu_type == OBU_SEQUENCE_HEADER; + } + let (size, size_len) = match read_leb128(&data[off..]) { + Some(v) => v, + None => return false, + }; + off += size_len; + let payload_end = match off.checked_add(size as usize) { + Some(e) if e <= data.len() => e, + _ => return false, + }; + if obu_type == OBU_SEQUENCE_HEADER { + return true; + } + data = &data[payload_end..]; + } + false +} + +/// Decodes an AV1 leb128 (unsigned little-endian base-128) integer. +/// Returns `(value, bytes_consumed)` or `None` on truncated input. +/// AV1 spec §4.10.5 caps the encoding at 8 bytes and 32 significant +/// bits; we enforce the 8-byte limit and keep the value in a u32. +fn read_leb128(input: &[u8]) -> Option<(u32, usize)> { + let mut value: u64 = 0; + for (i, &byte) in input.iter().take(8).enumerate() { + value |= ((byte & 0x7F) as u64) << (i * 7); + if (byte & 0x80) == 0 { + return u32::try_from(value).ok().map(|v| (v, i + 1)); + } + } + None +} + +/// VP9 uncompressed-header keyframe probe. Reads first-byte bits (MSB +/// first) per VP9 bitstream spec §6.2: +/// +/// * bits 7..=6: `frame_marker` (must be `0b10`). +/// * bit 5: `profile_low_bit`, bit 4: `profile_high_bit` +/// (combined `profile` ∈ 0..=3). +/// * For `profile == 3`: bit 3 is reserved-zero, bit 2 is +/// `show_existing_frame`, bit 1 is `frame_type`. +/// * For `profile != 3`: bit 3 is `show_existing_frame`, bit 2 is +/// `frame_type`. +/// +/// A keyframe has `show_existing_frame == 0` and `frame_type == 0`. +/// `show_existing_frame == 1` records redisplay a previously decoded +/// buffer and carry no new coded data, so they are explicitly not +/// keyframes. +fn is_keyframe_vp9(data: &[u8]) -> bool { + let Some(&b0) = data.first() else { + return false; + }; + if (b0 >> 6) & 0b11 != 0b10 { + return false; + } + let profile_low = (b0 >> 5) & 0x1; + let profile_high = (b0 >> 4) & 0x1; + let profile = (profile_high << 1) | profile_low; + let (show_existing_bit, frame_type_bit) = if profile == 3 { (2, 1) } else { (3, 2) }; + let show_existing = (b0 >> show_existing_bit) & 0x1; + if show_existing != 0 { + return false; + } + let frame_type = (b0 >> frame_type_bit) & 0x1; + frame_type == 0 +} + +fn is_keyframe_annex_b(codec: CodecArg, data: &[u8]) -> bool { + let mut i = 0usize; + while i + 3 < data.len() { + let is_four = i + 4 <= data.len() + && data[i] == 0 + && data[i + 1] == 0 + && data[i + 2] == 0 + && data[i + 3] == 1; + let is_three = data[i] == 0 && data[i + 1] == 0 && data[i + 2] == 1; + if is_four || is_three { + let payload_idx = if is_four { i + 4 } else { i + 3 }; + if payload_idx < data.len() + && codec.is_keyframe_nal(codec.nal_type(data[payload_idx])) + { + return true; + } + i = payload_idx + 1; + } else { + i += 1; + } + } + false +} + +#[tokio::main] +async fn main() -> Result<()> { + env_logger::init(); + let args = Args::parse(); + + let shutdown = Arc::new(AtomicBool::new(false)); + tokio::spawn({ + let shutdown = shutdown.clone(); + async move { + let _ = tokio::signal::ctrl_c().await; + shutdown.store(true, Ordering::Release); + info!("Ctrl-C received, shutting down..."); + } + }); + + let url = args + .url + .or_else(|| env::var("LIVEKIT_URL").ok()) + .context("--url or LIVEKIT_URL required")?; + let api_key = args + .api_key + .or_else(|| env::var("LIVEKIT_API_KEY").ok()) + .context("--api-key or LIVEKIT_API_KEY required")?; + let api_secret = args + .api_secret + .or_else(|| env::var("LIVEKIT_API_SECRET").ok()) + .context("--api-secret or LIVEKIT_API_SECRET required")?; + + let token = access_token::AccessToken::with_api_key(&api_key, &api_secret) + .with_identity(&args.identity) + .with_name(&args.identity) + .with_grants(access_token::VideoGrants { + room_join: true, + room: args.room.clone(), + can_publish: true, + ..Default::default() + }) + .to_jwt()?; + + info!("Connecting to LiveKit room '{}' as '{}'...", args.room, args.identity); + let mut room_options = RoomOptions::default(); + room_options.auto_subscribe = false; + room_options.dynacast = false; + let (room, _events) = Room::connect(&url, &token, room_options).await?; + let room = Arc::new(room); + info!("Connected: {} (sid {})", room.name(), room.sid().await); + + let resolution = VideoResolution { width: args.width, height: args.height }; + let source = NativeEncodedVideoSource::new(args.codec.webrtc_codec(), resolution); + source.set_observer(Arc::new(LoggingObserver::new())); + info!( + "Created encoded {} source: {}x{} (source_id={})", + args.codec.name(), + args.width, + args.height, + source.source_id() + ); + + let track_name = match args.codec { + CodecArg::H264 => "encoded-h264", + CodecArg::H265 => "encoded-h265", + CodecArg::Vp8 => "encoded-vp8", + CodecArg::Vp9 => "encoded-vp9", + CodecArg::Av1 => "encoded-av1", + }; + let track = + LocalVideoTrack::create_video_track(track_name, RtcVideoSource::Encoded(source.clone())); + + let publish_opts = TrackPublishOptions { + source: TrackSource::Camera, + simulcast: false, + video_codec: args.codec.livekit_codec(), + ..Default::default() + }; + room.local_participant() + .publish_track(LocalTrack::Video(track), publish_opts) + .await + .context("publish_track failed")?; + info!("Published encoded {} track", args.codec.name()); + + let frames_accepted = Arc::new(AtomicU64::new(0)); + let frames_dropped = Arc::new(AtomicU64::new(0)); + let keyframes = Arc::new(AtomicU64::new(0)); + + { + let frames_accepted = frames_accepted.clone(); + let frames_dropped = frames_dropped.clone(); + let keyframes = keyframes.clone(); + tokio::spawn(async move { + let mut last = Instant::now(); + loop { + sleep(Duration::from_secs(2)).await; + let elapsed = last.elapsed().as_secs_f64(); + last = Instant::now(); + let ok = frames_accepted.swap(0, Ordering::Relaxed); + let dropped = frames_dropped.swap(0, Ordering::Relaxed); + let kf = keyframes.swap(0, Ordering::Relaxed); + if ok + dropped > 0 { + info!( + "ingest: {:.1} fps accepted, {:.1} fps dropped, {} keyframes", + ok as f64 / elapsed, + dropped as f64 / elapsed, + kf + ); + } + } + }); + } + + // Reconnect loop: if gstreamer restarts, we come back up automatically. + while !shutdown.load(Ordering::Acquire) { + let addr = format!("{}:{}", args.tcp_host, args.tcp_port); + let framing = match args.codec { + CodecArg::H264 | CodecArg::H265 => "Annex-B", + CodecArg::Vp8 | CodecArg::Vp9 | CodecArg::Av1 => "IVF", + }; + info!("Connecting to {addr} for {} {framing} bytestream...", args.codec.name()); + let mut stream = match TcpStream::connect(&addr).await { + Ok(s) => s, + Err(e) => { + warn!("connect {addr} failed: {e}. Retrying in 1s..."); + sleep(Duration::from_secs(1)).await; + continue; + } + }; + let _ = stream.set_nodelay(true); + info!("Connected to {addr}"); + + let mut demuxer = Demuxer::new(args.codec); + let mut read_buf = vec![0u8; 64 * 1024]; + let mut out = Vec::new(); + loop { + if shutdown.load(Ordering::Acquire) { + break; + } + let n = tokio::select! { + r = stream.read(&mut read_buf) => r, + _ = sleep(Duration::from_millis(250)) => continue, + }; + let n = match n { + Ok(0) => { + warn!("gstreamer closed the connection"); + break; + } + Ok(n) => n, + Err(e) => { + warn!("read error: {e}"); + break; + } + }; + + out.clear(); + demuxer.feed(&read_buf[..n], &mut out); + if demuxer.desynced() { + warn!("demuxer reported desync — dropping TCP connection to re-align"); + break; + } + for au in out.drain(..) { + let is_keyframe = is_keyframe(args.codec, &au); + if is_keyframe { + keyframes.fetch_add(1, Ordering::Relaxed); + } + let info = EncodedFrameInfo { + is_keyframe, + has_sps_pps: false, // the source scans+prepends SPS/PPS as needed + width: args.width, + height: args.height, + capture_time_us: 0, + }; + if source.capture_frame(&au, &info) { + frames_accepted.fetch_add(1, Ordering::Relaxed); + } else { + frames_dropped.fetch_add(1, Ordering::Relaxed); + warn!( + "capture_frame dropped AU ({} bytes, keyframe={})", + au.len(), + is_keyframe + ); + } + } + } + + if !shutdown.load(Ordering::Acquire) { + sleep(Duration::from_secs(1)).await; + } + } + + info!("Shutting down..."); + Ok(()) +} diff --git a/webrtc-sys/src/passthrough_video_encoder.cpp b/webrtc-sys/src/passthrough_video_encoder.cpp index 6287683c8..accc178ee 100644 --- a/webrtc-sys/src/passthrough_video_encoder.cpp +++ b/webrtc-sys/src/passthrough_video_encoder.cpp @@ -49,7 +49,8 @@ webrtc::VideoCodecType ToWebrtcCodec(EncodedVideoCodecType codec) { bool FrameTypesRequestKeyframe( const std::vector* frame_types) { - if (!frame_types) return false; + if (!frame_types) + return false; return std::any_of(frame_types->begin(), frame_types->end(), [](webrtc::VideoFrameType t) { return t == webrtc::VideoFrameType::kVideoFrameKey; @@ -69,8 +70,9 @@ PassthroughVideoEncoder::PassthroughVideoEncoder( PassthroughVideoEncoder::~PassthroughVideoEncoder() = default; -int PassthroughVideoEncoder::InitEncode(const webrtc::VideoCodec* codec_settings, - const Settings& settings) { +int PassthroughVideoEncoder::InitEncode( + const webrtc::VideoCodec* codec_settings, + const Settings& settings) { if (codec_settings) { codec_settings_ = *codec_settings; } @@ -111,14 +113,14 @@ int32_t PassthroughVideoEncoder::Encode( } webrtc::EncodedImage image; - image.SetEncodedData(webrtc::EncodedImageBuffer::Create( - enc.data.data(), enc.data.size())); - image.SetFrameType(enc.is_keyframe ? webrtc::VideoFrameType::kVideoFrameKey - : webrtc::VideoFrameType::kVideoFrameDelta); + image.SetEncodedData( + webrtc::EncodedImageBuffer::Create(enc.data.data(), enc.data.size())); + image.SetFrameType(enc.is_keyframe + ? webrtc::VideoFrameType::kVideoFrameKey + : webrtc::VideoFrameType::kVideoFrameDelta); image.SetRtpTimestamp(frame.rtp_timestamp()); - image.capture_time_ms_ = enc.capture_time_us != 0 - ? enc.capture_time_us / 1000 - : frame.render_time_ms(); + image.capture_time_ms_ = enc.capture_time_us != 0 ? enc.capture_time_us / 1000 + : frame.render_time_ms(); image._encodedWidth = enc.width; image._encodedHeight = enc.height; image.rotation_ = frame.rotation(); @@ -138,7 +140,8 @@ int32_t PassthroughVideoEncoder::Encode( return WEBRTC_VIDEO_CODEC_OK; } -void PassthroughVideoEncoder::SetRates(const RateControlParameters& parameters) { +void PassthroughVideoEncoder::SetRates( + const RateControlParameters& parameters) { const uint32_t target_bps = parameters.target_bitrate.get_sum_bps(); const double framerate = parameters.framerate_fps; source_->notify_target_bitrate(target_bps, framerate); From 6c3a7f6f2d6287250410fb4265405db8c32b56c6 Mon Sep 17 00:00:00 2001 From: Stephen DeRosa Date: Thu, 23 Apr 2026 10:15:31 -0600 Subject: [PATCH 07/15] user abstracted: read from port and send on track --- livekit-ffi/protocol/encoded_tcp_ingest.proto | 135 +++++ livekit-ffi/protocol/ffi.proto | 22 +- livekit-ffi/src/server/encoded_tcp_ingest.rs | 266 ++++++++++ livekit-ffi/src/server/mod.rs | 1 + livekit-ffi/src/server/requests.rs | 8 +- livekit/Cargo.toml | 2 +- livekit/src/lib.rs | 1 + livekit/src/video_ingest/demux.rs | 313 ++++++++++++ livekit/src/video_ingest/encoded_tcp.rs | 483 ++++++++++++++++++ livekit/src/video_ingest/keyframe.rs | 261 ++++++++++ livekit/src/video_ingest/mod.rs | 34 ++ 11 files changed, 1521 insertions(+), 5 deletions(-) create mode 100644 livekit-ffi/protocol/encoded_tcp_ingest.proto create mode 100644 livekit-ffi/src/server/encoded_tcp_ingest.rs create mode 100644 livekit/src/video_ingest/demux.rs create mode 100644 livekit/src/video_ingest/encoded_tcp.rs create mode 100644 livekit/src/video_ingest/keyframe.rs create mode 100644 livekit/src/video_ingest/mod.rs diff --git a/livekit-ffi/protocol/encoded_tcp_ingest.proto b/livekit-ffi/protocol/encoded_tcp_ingest.proto new file mode 100644 index 000000000..38f2b9536 --- /dev/null +++ b/livekit-ffi/protocol/encoded_tcp_ingest.proto @@ -0,0 +1,135 @@ +// Copyright 2026 LiveKit, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +package livekit.proto; +option csharp_namespace = "LiveKit.Proto"; + +import "handle.proto"; +import "track.proto"; +import "video_frame.proto"; + +// High-level helper that ingests a pre-encoded video bytestream over TCP +// and publishes it to a LiveKit room. Wraps the Rust +// `livekit::video_ingest::EncodedTcpIngest` type. + +message EncodedTcpIngestInfo { + // Sid of the published track. Always set once the creation callback + // fires with a success message. + required string track_sid = 1; + // Name of the published track. + required string track_name = 2; +} + +message OwnedEncodedTcpIngest { + required FfiOwnedHandle handle = 1; + required EncodedTcpIngestInfo info = 2; +} + +// Create an `EncodedTcpIngest` and publish its track on the given room. +// The ingest runs until `StopEncodedTcpIngestRequest` is called. +message NewEncodedTcpIngestRequest { + required uint64 room_handle = 1; + required string host = 2; + required uint32 port = 3; + required VideoCodec codec = 4; + required uint32 width = 5; + required uint32 height = 6; + optional string track_name = 7; + optional TrackSource track_source = 8; + // Forwarded to TrackPublishOptions.video_encoding.max_bitrate when set. + optional uint64 max_bitrate_bps = 9; + // Forwarded alongside `max_bitrate_bps`. Defaults to 30 fps. + optional double max_framerate_fps = 10; + // Backoff between reconnect attempts (ms). Default: 1000. + optional uint32 reconnect_backoff_ms = 11; + // When true, stopping the ingest unpublishes the track. Default: true. + optional bool unpublish_on_stop = 12; + optional uint64 request_async_id = 13; +} + +message NewEncodedTcpIngestResponse { + required uint64 async_id = 1; +} + +message NewEncodedTcpIngestCallback { + required uint64 async_id = 1; + oneof message { + string error = 2; + OwnedEncodedTcpIngest ingest = 3; + } +} + +// Stop the ingest loop. If `unpublish_on_stop` was set (the default on +// creation), the track is unpublished as part of this call. +message StopEncodedTcpIngestRequest { + required uint64 ingest_handle = 1; + optional uint64 request_async_id = 2; +} + +message StopEncodedTcpIngestResponse { + required uint64 async_id = 1; +} + +message StopEncodedTcpIngestCallback { + required uint64 async_id = 1; + optional string error = 2; +} + +// Snapshot of cumulative stats since the ingest was created. +message EncodedTcpIngestStats { + required uint64 frames_accepted = 1; + required uint64 frames_dropped = 2; + required uint64 keyframes = 3; + required uint64 tcp_reconnects = 4; +} + +message GetEncodedTcpIngestStatsRequest { + required uint64 ingest_handle = 1; +} + +message GetEncodedTcpIngestStatsResponse { + required EncodedTcpIngestStats stats = 1; +} + +// Ingest-level events dispatched as FfiEvents. +message EncodedTcpIngestEvent { + required uint64 ingest_handle = 1; + oneof message { + Connected connected = 2; + Disconnected disconnected = 3; + KeyframeRequested keyframe_requested = 4; + TargetBitrateChanged target_bitrate_changed = 5; + } + + message Connected { + // Peer socket address the TCP connection landed on, e.g. "127.0.0.1:5000". + required string peer = 1; + } + + message Disconnected { + // Human-readable reason for the disconnect. The ingest loop will + // reconnect automatically after the configured backoff unless the + // ingest is being stopped. + required string reason = 1; + } + + message KeyframeRequested {} + + message TargetBitrateChanged { + required uint32 bitrate_bps = 1; + required double framerate_fps = 2; + } +} diff --git a/livekit-ffi/protocol/ffi.proto b/livekit-ffi/protocol/ffi.proto index 4b1377b5e..99698da5c 100644 --- a/livekit-ffi/protocol/ffi.proto +++ b/livekit-ffi/protocol/ffi.proto @@ -27,6 +27,7 @@ import "audio_frame.proto"; import "rpc.proto"; import "data_stream.proto"; import "data_track.proto"; +import "encoded_tcp_ingest.proto"; // **How is the livekit-ffi working: // We refer as the ffi server the Rust server that is running the LiveKit client implementation, and we @@ -167,7 +168,12 @@ message FfiRequest { // Encoded video CaptureEncodedVideoFrameRequest capture_encoded_video_frame = 76; - // NEXT_ID: 77 + // Encoded TCP ingest (high-level helper) + NewEncodedTcpIngestRequest new_encoded_tcp_ingest = 77; + StopEncodedTcpIngestRequest stop_encoded_tcp_ingest = 78; + GetEncodedTcpIngestStatsRequest get_encoded_tcp_ingest_stats = 79; + + // NEXT_ID: 80 } } @@ -280,7 +286,12 @@ message FfiResponse { // Encoded video CaptureEncodedVideoFrameResponse capture_encoded_video_frame = 75; - // NEXT_ID: 76 + // Encoded TCP ingest (high-level helper) + NewEncodedTcpIngestResponse new_encoded_tcp_ingest = 76; + StopEncodedTcpIngestResponse stop_encoded_tcp_ingest = 77; + GetEncodedTcpIngestStatsResponse get_encoded_tcp_ingest_stats = 78; + + // NEXT_ID: 79 } } @@ -346,7 +357,12 @@ message FfiEvent { // Encoded video EncodedVideoSourceEvent encoded_video_source_event = 44; - // NEXT_ID: 45 + // Encoded TCP ingest (high-level helper) + EncodedTcpIngestEvent encoded_tcp_ingest_event = 45; + NewEncodedTcpIngestCallback new_encoded_tcp_ingest = 46; + StopEncodedTcpIngestCallback stop_encoded_tcp_ingest = 47; + + // NEXT_ID: 48 } } diff --git a/livekit-ffi/src/server/encoded_tcp_ingest.rs b/livekit-ffi/src/server/encoded_tcp_ingest.rs new file mode 100644 index 000000000..b84c1ced4 --- /dev/null +++ b/livekit-ffi/src/server/encoded_tcp_ingest.rs @@ -0,0 +1,266 @@ +// Copyright 2026 LiveKit, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! FFI wrapper for the high-level `EncodedTcpIngest` helper. +//! +//! The Rust `EncodedTcpIngest` owns the publish + TCP loop; this layer +//! simply: +//! +//! 1. Converts protobuf options to the Rust options. +//! 2. Calls `EncodedTcpIngest::start` from the FFI async runtime. +//! 3. Stores the resulting handle and surfaces ingest-level callbacks +//! through an [`IngestObserverBridge`] so C++ / Python / Swift clients +//! see them as [`proto::EncodedTcpIngestEvent`]. + +use std::{net::SocketAddr, sync::Arc, time::Duration}; + +use livekit::{ + prelude::*, + video_ingest::{EncodedIngestObserver, EncodedTcpIngest, EncodedTcpIngestOptions}, +}; + +use super::{room::FfiRoom, FfiHandle}; +use crate::{proto, server, FfiError, FfiHandleId, FfiResult}; + +/// Server-side owner of an [`EncodedTcpIngest`]. Stored behind an +/// [`FfiHandleId`] in the FFI handle table. +pub struct FfiEncodedTcpIngest { + pub handle_id: FfiHandleId, + pub ingest: Arc>>, +} + +impl FfiHandle for FfiEncodedTcpIngest {} + +/// Kicks off an async `EncodedTcpIngest::start` and returns the async id +/// immediately. The result (or error) is dispatched as +/// [`proto::NewEncodedTcpIngestCallback`]. +pub fn create( + server: &'static server::FfiServer, + req: proto::NewEncodedTcpIngestRequest, +) -> FfiResult { + let async_id = server.resolve_async_id(req.request_async_id); + let ffi_room = server.retrieve_handle::(req.room_handle)?.clone(); + + let options = match options_from_proto(&req) { + Ok(opts) => opts, + Err(e) => { + let _ = server.send_event( + proto::NewEncodedTcpIngestCallback { + async_id, + message: Some(proto::new_encoded_tcp_ingest_callback::Message::Error( + e.to_string(), + )), + } + .into(), + ); + return Ok(proto::NewEncodedTcpIngestResponse { async_id }); + } + }; + + let handle = server.async_runtime.spawn(async move { + let participant = ffi_room.inner.room.local_participant(); + match EncodedTcpIngest::start(participant, options).await { + Ok(ingest) => { + let handle_id = server.next_id(); + let track_sid = ingest.track_sid(); + let track_name = ingest.track().name(); + ingest.set_observer(Arc::new(IngestObserverBridge { + server, + ingest_handle: handle_id, + })); + + let info = proto::EncodedTcpIngestInfo { + track_sid: track_sid.to_string(), + track_name, + }; + + let ffi_ingest = FfiEncodedTcpIngest { + handle_id, + ingest: Arc::new(tokio::sync::Mutex::new(Some(ingest))), + }; + server.store_handle(handle_id, ffi_ingest); + + let _ = server.send_event( + proto::NewEncodedTcpIngestCallback { + async_id, + message: Some( + proto::new_encoded_tcp_ingest_callback::Message::Ingest( + proto::OwnedEncodedTcpIngest { + handle: proto::FfiOwnedHandle { id: handle_id }, + info, + }, + ), + ), + } + .into(), + ); + } + Err(err) => { + let _ = server.send_event( + proto::NewEncodedTcpIngestCallback { + async_id, + message: Some(proto::new_encoded_tcp_ingest_callback::Message::Error( + err.to_string(), + )), + } + .into(), + ); + } + } + }); + server.watch_panic(handle); + + Ok(proto::NewEncodedTcpIngestResponse { async_id }) +} + +/// Stops a running ingest. Async because `EncodedTcpIngest::stop` awaits +/// the background task and optionally unpublishes the track. +pub fn stop( + server: &'static server::FfiServer, + req: proto::StopEncodedTcpIngestRequest, +) -> FfiResult { + let async_id = server.resolve_async_id(req.request_async_id); + let ingest_handle = req.ingest_handle; + + let ingest_slot = { + let ffi_ingest = server.retrieve_handle::(ingest_handle)?; + ffi_ingest.ingest.clone() + }; + + let handle = server.async_runtime.spawn(async move { + let taken = { ingest_slot.lock().await.take() }; + let error = match taken { + Some(ingest) => { + ingest.stop().await; + None + } + None => Some("EncodedTcpIngest: already stopped".to_string()), + }; + let _ = server.send_event( + proto::StopEncodedTcpIngestCallback { async_id, error }.into(), + ); + }); + server.watch_panic(handle); + + Ok(proto::StopEncodedTcpIngestResponse { async_id }) +} + +/// Pulls a stats snapshot synchronously. +pub fn get_stats( + server: &'static server::FfiServer, + req: proto::GetEncodedTcpIngestStatsRequest, +) -> FfiResult { + let ffi_ingest = server.retrieve_handle::(req.ingest_handle)?; + let guard = ffi_ingest.ingest.try_lock().map_err(|_| { + FfiError::InvalidRequest("EncodedTcpIngest is busy (stop in progress?)".into()) + })?; + let Some(ingest) = guard.as_ref() else { + return Err(FfiError::InvalidRequest("EncodedTcpIngest is stopped".into())); + }; + let stats = ingest.stats(); + Ok(proto::GetEncodedTcpIngestStatsResponse { + stats: proto::EncodedTcpIngestStats { + frames_accepted: stats.frames_accepted, + frames_dropped: stats.frames_dropped, + keyframes: stats.keyframes, + tcp_reconnects: stats.tcp_reconnects, + }, + }) +} + +fn options_from_proto(req: &proto::NewEncodedTcpIngestRequest) -> FfiResult { + let port = u16::try_from(req.port) + .map_err(|_| FfiError::InvalidRequest("port must fit in u16".into()))?; + let codec = video_codec_from_proto(req.codec()); + let track_source = req + .track_source + .and_then(|s| proto::TrackSource::try_from(s).ok()) + .map(TrackSource::from) + .unwrap_or(TrackSource::Camera); + + let mut opts = EncodedTcpIngestOptions::new(port, codec, req.width, req.height); + opts.host = req.host.clone(); + opts.track_name = req.track_name.clone(); + opts.track_source = track_source; + opts.max_bitrate_bps = req.max_bitrate_bps; + if let Some(fps) = req.max_framerate_fps { + opts.max_framerate_fps = fps; + } + if let Some(ms) = req.reconnect_backoff_ms { + opts.reconnect_backoff = Duration::from_millis(ms as u64); + } + if let Some(unpublish) = req.unpublish_on_stop { + opts.unpublish_on_stop = unpublish; + } + Ok(opts) +} + +fn video_codec_from_proto(codec: proto::VideoCodec) -> livekit::webrtc::video_source::VideoCodec { + use livekit::webrtc::video_source::VideoCodec; + match codec { + proto::VideoCodec::H264 => VideoCodec::H264, + proto::VideoCodec::H265 => VideoCodec::H265, + proto::VideoCodec::Vp8 => VideoCodec::Vp8, + proto::VideoCodec::Vp9 => VideoCodec::Vp9, + proto::VideoCodec::Av1 => VideoCodec::Av1, + } +} + +/// Forwards ingest-level callbacks out to the FFI client as +/// [`proto::EncodedTcpIngestEvent`]s. +struct IngestObserverBridge { + server: &'static server::FfiServer, + ingest_handle: FfiHandleId, +} + +impl IngestObserverBridge { + fn emit(&self, message: proto::encoded_tcp_ingest_event::Message) { + let _ = self.server.send_event( + proto::EncodedTcpIngestEvent { + ingest_handle: self.ingest_handle, + message: Some(message), + } + .into(), + ); + } +} + +impl EncodedIngestObserver for IngestObserverBridge { + fn on_connected(&self, peer: SocketAddr) { + self.emit(proto::encoded_tcp_ingest_event::Message::Connected( + proto::encoded_tcp_ingest_event::Connected { peer: peer.to_string() }, + )); + } + + fn on_disconnected(&self, reason: &str) { + self.emit(proto::encoded_tcp_ingest_event::Message::Disconnected( + proto::encoded_tcp_ingest_event::Disconnected { reason: reason.to_string() }, + )); + } + + fn on_keyframe_requested(&self) { + self.emit(proto::encoded_tcp_ingest_event::Message::KeyframeRequested( + proto::encoded_tcp_ingest_event::KeyframeRequested {}, + )); + } + + fn on_target_bitrate(&self, bitrate_bps: u32, framerate_fps: f64) { + self.emit(proto::encoded_tcp_ingest_event::Message::TargetBitrateChanged( + proto::encoded_tcp_ingest_event::TargetBitrateChanged { + bitrate_bps, + framerate_fps, + }, + )); + } +} diff --git a/livekit-ffi/src/server/mod.rs b/livekit-ffi/src/server/mod.rs index 0596b6939..9bce5d112 100644 --- a/livekit-ffi/src/server/mod.rs +++ b/livekit-ffi/src/server/mod.rs @@ -39,6 +39,7 @@ pub mod audio_stream; pub mod colorcvt; pub mod data_stream; pub mod data_track; +pub mod encoded_tcp_ingest; pub mod logger; pub mod participant; pub mod requests; diff --git a/livekit-ffi/src/server/requests.rs b/livekit-ffi/src/server/requests.rs index 998482161..98f51f1b8 100644 --- a/livekit-ffi/src/server/requests.rs +++ b/livekit-ffi/src/server/requests.rs @@ -24,7 +24,7 @@ use livekit::{ use parking_lot::Mutex; use super::{ - audio_source, audio_stream, colorcvt, data_stream, data_track, + audio_source, audio_stream, colorcvt, data_stream, data_track, encoded_tcp_ingest, participant::FfiParticipant, resampler, room::{self, FfiPublication, FfiTrack}, @@ -1377,6 +1377,12 @@ pub fn handle_request( on_remote_data_track_is_published(server, req)?.into() } Request::DataTrackStreamRead(req) => on_data_track_stream_read(server, req)?.into(), + + Request::NewEncodedTcpIngest(req) => encoded_tcp_ingest::create(server, req)?.into(), + Request::StopEncodedTcpIngest(req) => encoded_tcp_ingest::stop(server, req)?.into(), + Request::GetEncodedTcpIngestStats(req) => { + encoded_tcp_ingest::get_stats(server, req)?.into() + } }); Ok(res) diff --git a/livekit/Cargo.toml b/livekit/Cargo.toml index 8022f8d03..e1d34183e 100644 --- a/livekit/Cargo.toml +++ b/livekit/Cargo.toml @@ -34,7 +34,7 @@ livekit-datatrack = { workspace = true } prost = "0.12" serde = { version = "1", features = ["derive"] } serde_json = "1.0" -tokio = { version = "1", default-features = false, features = ["sync", "macros", "fs"] } +tokio = { version = "1", default-features = false, features = ["sync", "macros", "fs", "net", "io-util", "time"] } parking_lot = { version = "0.12" } futures-util = { version = "0.3", default-features = false, features = ["sink"] } thiserror = "1.0" diff --git a/livekit/src/lib.rs b/livekit/src/lib.rs index 00abe5de8..55cf101c1 100644 --- a/livekit/src/lib.rs +++ b/livekit/src/lib.rs @@ -16,6 +16,7 @@ mod plugin; pub mod proto; mod room; mod rtc_engine; +pub mod video_ingest; pub mod webrtc { pub use libwebrtc::*; diff --git a/livekit/src/video_ingest/demux.rs b/livekit/src/video_ingest/demux.rs new file mode 100644 index 000000000..52ee85c88 --- /dev/null +++ b/livekit/src/video_ingest/demux.rs @@ -0,0 +1,313 @@ +// Copyright 2026 LiveKit, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Stream demuxers that split a raw TCP bytestream into discrete encoded +//! video frames. +//! +//! * H.264 / H.265: Annex-B bytestream, split on access-unit delimiters. +//! * VP8 / VP9 / AV1: IVF container (gstreamer's `ivfmux` or `avmux_ivf`), +//! optionally prefixed with a 32-byte DKIF file header. + +use libwebrtc::video_source::VideoCodec; + +use super::keyframe; + +/// Upper bound on per-frame size we accept from the IVF reader before we +/// conclude we are byte-misaligned. +pub(super) const MAX_FRAME_BYTES: usize = 8 * 1024 * 1024; + +/// Wire-format selector. Hides whether the underlying wire is Annex-B or +/// IVF. +pub(super) enum Demuxer { + AnnexB(AuSplitter), + Ivf(IvfReader), +} + +impl Demuxer { + pub(super) fn new(codec: VideoCodec) -> Self { + match codec { + VideoCodec::H264 | VideoCodec::H265 => Demuxer::AnnexB(AuSplitter::new(codec)), + VideoCodec::Vp8 | VideoCodec::Vp9 | VideoCodec::Av1 => { + Demuxer::Ivf(IvfReader::new(codec)) + } + } + } + + /// Feeds a raw byte chunk from the socket. Completed frames are + /// appended to `out`. + pub(super) fn feed(&mut self, chunk: &[u8], out: &mut Vec>) { + match self { + Demuxer::AnnexB(s) => s.feed(chunk, out), + Demuxer::Ivf(r) => r.feed(chunk, out), + } + } + + /// True if the demuxer has detected a byte misalignment it cannot + /// recover from without a fresh TCP connection. + pub(super) fn desynced(&self) -> bool { + match self { + Demuxer::AnnexB(_) => false, + Demuxer::Ivf(r) => r.desynced, + } + } +} + +/// Reads IVF-framed video off the wire. Format per libvpx: +/// +/// * File header (32 bytes, optional): `"DKIF"`, u16 version, u16 +/// header_len, 4-byte FOURCC, u16 width, u16 height, u32 tb_num, +/// u32 tb_den, u32 frame_count, u32 unused. +/// * Frame header (12 bytes each): u32 frame_size, u64 pts. +/// * Frame payload: `frame_size` bytes. All integers little-endian. +/// +/// The file header is *optional* here: gstreamer's `avmux_ivf` on a +/// non-seekable `tcpserversink` emits only per-frame records (libavformat +/// writes `DKIF` at `write_header` time, but the ffmpeg AVIO wrapper in +/// gst-libav swallows it when the output is non-seekable). `ivfmux` (the +/// native gst-plugins-bad element) does emit `DKIF` and we parse it when +/// present. gstreamer's one-buffer-per-packet semantics keep new +/// `tcpserversink` clients on an IVF record boundary. +pub(super) struct IvfReader { + codec: VideoCodec, + buf: Vec, + header_phase_done: bool, + pub(super) desynced: bool, +} + +impl IvfReader { + fn new(codec: VideoCodec) -> Self { + Self { + codec, + buf: Vec::with_capacity(256 * 1024), + header_phase_done: false, + desynced: false, + } + } + + fn feed(&mut self, chunk: &[u8], out: &mut Vec>) { + self.buf.extend_from_slice(chunk); + + if !self.header_phase_done { + if self.buf.len() < 4 { + return; + } + if &self.buf[0..4] == b"DKIF" { + if self.buf.len() < 32 { + return; + } + let fourcc = &self.buf[8..12]; + if let Some(expected) = ivf_fourcc(self.codec) { + if fourcc != expected { + log::warn!( + "ivf: expected FOURCC {:?} for {:?}, got {:?}", + std::str::from_utf8(expected).unwrap_or("?"), + self.codec, + std::str::from_utf8(fourcc).unwrap_or("?"), + ); + } + } + log::info!( + "ivf: file header OK (codec fourcc={})", + std::str::from_utf8(fourcc).unwrap_or("?") + ); + self.buf.drain(..32); + } else { + log::info!( + "ivf: no DKIF file header on this stream (typical for gstreamer avmux_ivf \ + on tcpserversink); parsing per-frame records directly" + ); + } + self.header_phase_done = true; + } + + loop { + if self.buf.len() < 12 { + return; + } + let size = u32::from_le_bytes([self.buf[0], self.buf[1], self.buf[2], self.buf[3]]) + as usize; + if size == 0 || size > MAX_FRAME_BYTES { + log::warn!( + "ivf: implausible frame_size={size} bytes — byte stream is misaligned. \ + Dropping connection so the ingest loop can reconnect and re-anchor on the \ + next gstreamer buffer boundary." + ); + self.desynced = true; + self.buf.clear(); + return; + } + if self.buf.len() < 12 + size { + return; + } + let frame = self.buf[12..12 + size].to_vec(); + self.buf.drain(..12 + size); + out.push(frame); + } + } +} + +/// IVF FOURCC expected on the wire. Only meaningful for codecs delivered +/// via `ivfmux` / `avmux_ivf`. +fn ivf_fourcc(codec: VideoCodec) -> Option<&'static [u8; 4]> { + match codec { + VideoCodec::Vp8 => Some(b"VP80"), + VideoCodec::Vp9 => Some(b"VP90"), + VideoCodec::Av1 => Some(b"AV01"), + _ => None, + } +} + +/// Splits an incoming Annex-B bytestream into access units on AUD +/// boundaries. The AUD NAL type and NAL-type extraction are codec +/// specific. +/// +/// Relies on the upstream parser emitting an AUD at the start of every AU +/// (`x264enc aud=true` for H.264, `x265enc option-string="aud=1"` plumbed +/// through `h265parse` for H.265). Bytes before the first AUD are +/// discarded; each subsequent AU is emitted when the *next* AU's AUD +/// arrives (one AU of buffering lag, bounded by the frame interval). +pub(super) struct AuSplitter { + codec: VideoCodec, + buf: Vec, + au_start: Option, + scan_pos: usize, +} + +impl AuSplitter { + fn new(codec: VideoCodec) -> Self { + Self { codec, buf: Vec::with_capacity(256 * 1024), au_start: None, scan_pos: 0 } + } + + fn feed(&mut self, chunk: &[u8], out: &mut Vec>) { + self.buf.extend_from_slice(chunk); + + let Some(aud) = keyframe::aud_nal_type(self.codec) else { + return; + }; + + while self.scan_pos + 3 < self.buf.len() { + let i = self.scan_pos; + let (sc_start, sc_len) = if i + 4 <= self.buf.len() + && self.buf[i] == 0 + && self.buf[i + 1] == 0 + && self.buf[i + 2] == 0 + && self.buf[i + 3] == 1 + { + if i + 5 > self.buf.len() { + break; + } + (i, 4) + } else if self.buf[i] == 0 && self.buf[i + 1] == 0 && self.buf[i + 2] == 1 { + (i, 3) + } else { + self.scan_pos += 1; + continue; + }; + + let nal_off = sc_start + sc_len; + if keyframe::nal_type(self.codec, self.buf[nal_off]) == aud { + if let Some(start) = self.au_start.take() { + out.push(self.buf[start..sc_start].to_vec()); + } + self.au_start = Some(sc_start); + } + self.scan_pos = nal_off + 1; + } + + let drain_before = self.au_start.unwrap_or_else(|| self.buf.len().saturating_sub(3)); + if drain_before > 0 { + self.buf.drain(..drain_before); + self.scan_pos = self.scan_pos.saturating_sub(drain_before); + if self.au_start.is_some() { + self.au_start = Some(0); + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn make_ivf_frame(size: u32, payload: &[u8]) -> Vec { + let mut rec = Vec::with_capacity(12 + payload.len()); + rec.extend_from_slice(&size.to_le_bytes()); + rec.extend_from_slice(&0u64.to_le_bytes()); + rec.extend_from_slice(payload); + rec + } + + #[test] + fn ivf_without_dkif_emits_frames() { + let mut r = IvfReader::new(VideoCodec::Vp8); + let mut bytes = Vec::new(); + bytes.extend_from_slice(&make_ivf_frame(4, &[1, 2, 3, 4])); + bytes.extend_from_slice(&make_ivf_frame(2, &[9, 9])); + let mut out = Vec::new(); + r.feed(&bytes, &mut out); + assert_eq!(out.len(), 2); + assert_eq!(out[0], vec![1, 2, 3, 4]); + assert_eq!(out[1], vec![9, 9]); + assert!(!r.desynced); + } + + #[test] + fn ivf_with_dkif_skips_header() { + let mut r = IvfReader::new(VideoCodec::Vp8); + let mut bytes = Vec::new(); + // 32-byte DKIF header + bytes.extend_from_slice(b"DKIF"); + bytes.extend_from_slice(&[0; 4]); + bytes.extend_from_slice(b"VP80"); + bytes.extend_from_slice(&[0; 20]); + bytes.extend_from_slice(&make_ivf_frame(3, &[7, 8, 9])); + let mut out = Vec::new(); + r.feed(&bytes, &mut out); + assert_eq!(out, vec![vec![7, 8, 9]]); + } + + #[test] + fn ivf_absurd_size_triggers_desync() { + let mut r = IvfReader::new(VideoCodec::Vp8); + // Size larger than MAX_FRAME_BYTES + let bogus = (MAX_FRAME_BYTES as u32 + 1).to_le_bytes(); + let mut bytes = bogus.to_vec(); + bytes.extend_from_slice(&[0u8; 8]); + let mut out = Vec::new(); + r.feed(&bytes, &mut out); + assert!(out.is_empty()); + assert!(r.desynced); + } + + #[test] + fn au_splitter_emits_completed_aus() { + let mut s = AuSplitter::new(VideoCodec::H264); + // AUD NAL header for H.264: type 9, first byte = 0x09. + // IDR slice header: type 5 => 0x65 (nal_ref_idc=3). + let mut bytes = Vec::new(); + // AU 1: AUD + IDR + bytes.extend_from_slice(&[0, 0, 0, 1, 0x09, 0xF0]); + bytes.extend_from_slice(&[0, 0, 0, 1, 0x65, 0x88, 0x84]); + // AU 2 starts with a new AUD -> AU 1 should be emitted. + bytes.extend_from_slice(&[0, 0, 0, 1, 0x09, 0xF0]); + bytes.extend_from_slice(&[0, 0, 0, 1, 0x41, 0x9A]); + let mut out = Vec::new(); + s.feed(&bytes, &mut out); + assert_eq!(out.len(), 1); + // The first emitted AU should begin at the first AUD start code + // and end before the second AUD. + assert_eq!(&out[0][..5], &[0, 0, 0, 1, 0x09]); + assert!(out[0].windows(5).any(|w| w == [0, 0, 0, 1, 0x65])); + } +} diff --git a/livekit/src/video_ingest/encoded_tcp.rs b/livekit/src/video_ingest/encoded_tcp.rs new file mode 100644 index 000000000..ccc1bb19d --- /dev/null +++ b/livekit/src/video_ingest/encoded_tcp.rs @@ -0,0 +1,483 @@ +// Copyright 2026 LiveKit, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! High-level helper that ingests a pre-encoded video bytestream over TCP +//! and publishes it to a LiveKit room as an encoded video track. +//! +//! The caller supplies the TCP endpoint, codec, and declared resolution. +//! The helper: +//! +//! 1. Creates a [`NativeEncodedVideoSource`] for the codec. +//! 2. Creates a [`LocalVideoTrack`] bound to that source. +//! 3. Publishes the track via `LocalParticipant::publish_track`. +//! 4. Connects to the TCP endpoint and reconnects on disconnect. +//! 5. Demuxes the stream (Annex-B for H.264/H.265, IVF for VP8/VP9/AV1). +//! 6. Pushes each demuxed frame through `capture_frame`. +//! +//! The matching gstreamer pipelines are documented in +//! `examples/pre_encoded_ingest/README.md`. + +use std::{ + net::SocketAddr, + sync::{ + atomic::{AtomicBool, AtomicU64, Ordering}, + Arc, + }, + time::Duration, +}; + +use libwebrtc::{ + video_source::{ + native::{EncodedVideoSourceObserver, NativeEncodedVideoSource}, + EncodedFrameInfo, RtcVideoSource, VideoCodec, VideoResolution, + }, +}; +use livekit_runtime::JoinHandle; +use parking_lot::Mutex; +use tokio::{io::AsyncReadExt, net::TcpStream, time::sleep}; + +use super::{demux::Demuxer, keyframe::is_keyframe}; +use crate::{ + options::{TrackPublishOptions, VideoEncoding}, + participant::LocalParticipant, + prelude::*, + RoomError, RoomResult, +}; + +/// Configuration for [`EncodedTcpIngest::start`]. +/// +/// Only `port`, `codec`, `width`, and `height` are mandatory. Everything +/// else has a default that matches the reference gstreamer pipelines. +#[derive(Debug, Clone)] +pub struct EncodedTcpIngestOptions { + /// Host running the gstreamer `tcpserversink`. Default: `127.0.0.1`. + pub host: String, + + /// Port of the gstreamer `tcpserversink`. + pub port: u16, + + /// Pre-encoded codec on the wire. Must match the upstream encoder. + pub codec: VideoCodec, + + /// Declared stream width (px). + pub width: u32, + + /// Declared stream height (px). + pub height: u32, + + /// Optional track name. Default: `encoded-`. + pub track_name: Option, + + /// Track source classification. Default: [`TrackSource::Camera`]. + pub track_source: TrackSource, + + /// Optional target max bitrate (bps) forwarded to + /// `TrackPublishOptions.video_encoding.max_bitrate`. When `None`, the + /// SDK picks an appropriate default for the resolution. + pub max_bitrate_bps: Option, + + /// Target max framerate forwarded when `max_bitrate_bps` is set. + /// Ignored otherwise. Default: 30.0. + pub max_framerate_fps: f64, + + /// Backoff between reconnection attempts. Default: 1 s. + pub reconnect_backoff: Duration, + + /// When `true`, [`EncodedTcpIngest::stop`] unpublishes the track + /// before returning. Default: `true`. + pub unpublish_on_stop: bool, +} + +impl EncodedTcpIngestOptions { + /// New options with sensible defaults. Mandatory fields only. + pub fn new(port: u16, codec: VideoCodec, width: u32, height: u32) -> Self { + Self { + host: "127.0.0.1".to_string(), + port, + codec, + width, + height, + track_name: None, + track_source: TrackSource::Camera, + max_bitrate_bps: None, + max_framerate_fps: 30.0, + reconnect_backoff: Duration::from_secs(1), + unpublish_on_stop: true, + } + } +} + +/// Callbacks dispatched by [`EncodedTcpIngest`] as the ingest loop runs. +/// +/// All methods are invoked on Tokio / WebRTC threads; implementers MUST be +/// cheap and non-blocking. Default impls are no-ops so consumers can +/// override only what they care about. +pub trait EncodedIngestObserver: Send + Sync { + /// The TCP connection to the upstream producer is established. + fn on_connected(&self, _peer: SocketAddr) {} + + /// The TCP connection was closed (by peer, timeout, or demux desync). + /// The ingest loop will reconnect after + /// [`EncodedTcpIngestOptions::reconnect_backoff`]. + fn on_disconnected(&self, _reason: &str) {} + + /// The receiver requested a keyframe (PLI/FIR). Producers should emit + /// a keyframe on the next frame. + fn on_keyframe_requested(&self) {} + + /// The bandwidth estimator produced a new target bitrate / framerate. + fn on_target_bitrate(&self, _bitrate_bps: u32, _framerate_fps: f64) {} +} + +/// Snapshot of cumulative ingest stats. Counters are monotonic since +/// [`EncodedTcpIngest::start`]. +#[derive(Debug, Clone, Copy, Default)] +pub struct EncodedIngestStats { + /// Frames pushed to the source and accepted by WebRTC. + pub frames_accepted: u64, + /// Frames the source rejected because its internal queue was full. + pub frames_dropped: u64, + /// Keyframes observed on the wire (accepted + dropped). + pub keyframes: u64, + /// TCP reconnections attempted (including the first connect). + pub tcp_reconnects: u64, +} + +/// Ingests a pre-encoded video feed from a TCP socket and publishes it as +/// an encoded LiveKit track. +/// +/// Create one with [`EncodedTcpIngest::start`], inspect it via +/// [`EncodedTcpIngest::stats`] / [`EncodedTcpIngest::track_sid`], and +/// shut it down with [`EncodedTcpIngest::stop`]. Dropping the value +/// without calling `stop` still terminates the background task, but does +/// not unpublish the track. +pub struct EncodedTcpIngest { + inner: Arc, + join_handle: Mutex>>, +} + +struct Inner { + participant: LocalParticipant, + source: NativeEncodedVideoSource, + track: LocalVideoTrack, + stop: AtomicBool, + stats: Stats, + observer: Mutex>>, + options: EncodedTcpIngestOptions, +} + +#[derive(Default)] +struct Stats { + frames_accepted: AtomicU64, + frames_dropped: AtomicU64, + keyframes: AtomicU64, + tcp_reconnects: AtomicU64, +} + +impl EncodedTcpIngest { + /// Creates the encoded source, publishes the track, and spawns the + /// TCP ingest task. The returned value owns all of those. + pub async fn start( + participant: LocalParticipant, + options: EncodedTcpIngestOptions, + ) -> RoomResult { + if options.width == 0 || options.height == 0 { + return Err(RoomError::Internal( + "EncodedTcpIngest: width and height must be non-zero".to_string(), + )); + } + if options.port == 0 { + return Err(RoomError::Internal( + "EncodedTcpIngest: port must be non-zero".to_string(), + )); + } + + let resolution = VideoResolution { width: options.width, height: options.height }; + let source = NativeEncodedVideoSource::new(options.codec, resolution); + log::info!( + "EncodedTcpIngest: created {:?} source {}x{} (source_id={})", + options.codec, + options.width, + options.height, + source.source_id() + ); + + let track_name = options + .track_name + .clone() + .unwrap_or_else(|| default_track_name(options.codec).to_string()); + let track = LocalVideoTrack::create_video_track( + &track_name, + RtcVideoSource::Encoded(source.clone()), + ); + + let mut publish_opts = TrackPublishOptions { + source: options.track_source, + simulcast: false, + ..Default::default() + }; + if let Some(max_bitrate) = options.max_bitrate_bps { + publish_opts.video_encoding = Some(VideoEncoding { + max_bitrate, + max_framerate: options.max_framerate_fps, + }); + } + // video_codec is force-pinned to match the encoded source by + // LocalParticipant::publish_track, so we leave the default. + + participant + .publish_track(LocalTrack::Video(track.clone()), publish_opts) + .await?; + log::info!("EncodedTcpIngest: published track '{}' ({:?})", track_name, options.codec); + + let inner = Arc::new(Inner { + participant, + source: source.clone(), + track, + stop: AtomicBool::new(false), + stats: Stats::default(), + observer: Mutex::new(None), + options, + }); + + source.set_observer(Arc::new(SourceObserverBridge { inner: Arc::downgrade(&inner) })); + + let join_handle = livekit_runtime::spawn({ + let inner = inner.clone(); + async move { + run_ingest_loop(inner).await; + } + }); + + Ok(Self { inner, join_handle: Mutex::new(Some(join_handle)) }) + } + + /// Register (or replace) the ingest-level observer. + pub fn set_observer(&self, observer: Arc) { + *self.inner.observer.lock() = Some(observer); + } + + /// Returns a snapshot of ingest stats since `start`. + pub fn stats(&self) -> EncodedIngestStats { + EncodedIngestStats { + frames_accepted: self.inner.stats.frames_accepted.load(Ordering::Relaxed), + frames_dropped: self.inner.stats.frames_dropped.load(Ordering::Relaxed), + keyframes: self.inner.stats.keyframes.load(Ordering::Relaxed), + tcp_reconnects: self.inner.stats.tcp_reconnects.load(Ordering::Relaxed), + } + } + + /// Returns the sid of the published track. + pub fn track_sid(&self) -> TrackSid { + self.inner.track.sid() + } + + /// Returns a clone of the underlying track. Useful for hooking mute / + /// packet-trailer state from the caller. + pub fn track(&self) -> LocalVideoTrack { + self.inner.track.clone() + } + + /// Stops the ingest loop and, if configured, unpublishes the track. + /// + /// Safe to call at most once. After `stop` returns, the TCP task is + /// terminated. If [`EncodedTcpIngestOptions::unpublish_on_stop`] is + /// true (the default), the track is unpublished from the room. + pub async fn stop(self) { + self.inner.stop.store(true, Ordering::Release); + + let join = self.join_handle.lock().take(); + if let Some(handle) = join { + // We don't care about join errors — the task can only panic + // on a broken invariant, and we're shutting down anyway. + let _ = handle.await; + } + + if self.inner.options.unpublish_on_stop { + let sid = self.inner.track.sid(); + match self.inner.participant.unpublish_track(&sid).await { + Ok(_) => log::info!("EncodedTcpIngest: unpublished track {sid:?}"), + Err(e) => log::warn!("EncodedTcpIngest: unpublish_track failed: {e}"), + } + } + } +} + +impl Drop for EncodedTcpIngest { + fn drop(&mut self) { + // Make sure the background task exits even if the caller forgot + // to call `stop`. We can't await here, so the track stays + // published until the room is dropped or explicitly unpublished. + self.inner.stop.store(true, Ordering::Release); + } +} + +fn default_track_name(codec: VideoCodec) -> &'static str { + match codec { + VideoCodec::H264 => "encoded-h264", + VideoCodec::H265 => "encoded-h265", + VideoCodec::Vp8 => "encoded-vp8", + VideoCodec::Vp9 => "encoded-vp9", + VideoCodec::Av1 => "encoded-av1", + } +} + +/// Forwards source-level callbacks (keyframe request, bitrate update) to +/// the ingest-level observer, if any. Held via a `Weak` so the source +/// does not keep `Inner` alive past `drop`. +struct SourceObserverBridge { + inner: std::sync::Weak, +} + +impl EncodedVideoSourceObserver for SourceObserverBridge { + fn on_keyframe_requested(&self) { + if let Some(inner) = self.inner.upgrade() { + if let Some(obs) = inner.observer.lock().clone() { + obs.on_keyframe_requested(); + } + } + } + + fn on_target_bitrate(&self, bitrate_bps: u32, framerate_fps: f64) { + if let Some(inner) = self.inner.upgrade() { + if let Some(obs) = inner.observer.lock().clone() { + obs.on_target_bitrate(bitrate_bps, framerate_fps); + } + } + } +} + +/// Reconnect loop: connects, demuxes, captures, and reconnects on +/// disconnect / desync until `stop` is flipped. +async fn run_ingest_loop(inner: Arc) { + let opts = &inner.options; + let addr = format!("{}:{}", opts.host, opts.port); + + while !inner.stop.load(Ordering::Acquire) { + inner.stats.tcp_reconnects.fetch_add(1, Ordering::Relaxed); + log::info!("EncodedTcpIngest: connecting to {addr} ({:?})", opts.codec); + + let mut stream = match TcpStream::connect(&addr).await { + Ok(s) => s, + Err(e) => { + log::warn!("EncodedTcpIngest: connect {addr} failed: {e}"); + notify_disconnected(&inner, &format!("connect: {e}")); + if !sleep_interruptible(&inner.stop, opts.reconnect_backoff).await { + return; + } + continue; + } + }; + let _ = stream.set_nodelay(true); + + let peer = stream.peer_addr().ok(); + if let Some(addr) = peer { + log::info!("EncodedTcpIngest: connected to {addr}"); + if let Some(obs) = inner.observer.lock().clone() { + obs.on_connected(addr); + } + } else { + log::info!("EncodedTcpIngest: connected to {addr} (peer_addr unknown)"); + } + + let reason = pump_stream(&inner, &mut stream).await; + log::warn!("EncodedTcpIngest: disconnected: {reason}"); + notify_disconnected(&inner, &reason); + + if inner.stop.load(Ordering::Acquire) { + return; + } + if !sleep_interruptible(&inner.stop, opts.reconnect_backoff).await { + return; + } + } +} + +/// Reads from the socket, demuxes, and captures frames until EOF, error, +/// desync, or stop. Returns a human-readable disconnect reason. +async fn pump_stream(inner: &Arc, stream: &mut TcpStream) -> String { + let opts = &inner.options; + let mut demuxer = Demuxer::new(opts.codec); + let mut read_buf = vec![0u8; 64 * 1024]; + let mut out: Vec> = Vec::new(); + + loop { + if inner.stop.load(Ordering::Acquire) { + return "stopped".to_string(); + } + + let n = tokio::select! { + r = stream.read(&mut read_buf) => r, + _ = sleep(Duration::from_millis(250)) => continue, + }; + + let n = match n { + Ok(0) => return "peer closed connection".to_string(), + Ok(n) => n, + Err(e) => return format!("read error: {e}"), + }; + + out.clear(); + demuxer.feed(&read_buf[..n], &mut out); + if demuxer.desynced() { + return "demuxer desync (reconnecting to re-align)".to_string(); + } + for frame in out.drain(..) { + let is_keyframe = is_keyframe(opts.codec, &frame); + if is_keyframe { + inner.stats.keyframes.fetch_add(1, Ordering::Relaxed); + } + let info = EncodedFrameInfo { + is_keyframe, + // The source scans + prepends SPS/PPS as needed. + has_sps_pps: false, + width: opts.width, + height: opts.height, + capture_time_us: 0, + }; + if inner.source.capture_frame(&frame, &info) { + inner.stats.frames_accepted.fetch_add(1, Ordering::Relaxed); + } else { + inner.stats.frames_dropped.fetch_add(1, Ordering::Relaxed); + log::warn!( + "EncodedTcpIngest: capture_frame dropped frame ({} bytes, keyframe={})", + frame.len(), + is_keyframe + ); + } + } + } +} + +fn notify_disconnected(inner: &Arc, reason: &str) { + if let Some(obs) = inner.observer.lock().clone() { + obs.on_disconnected(reason); + } +} + +/// Sleeps up to `dur`, waking early when `stop` is set. Returns `false` +/// if the sleep was interrupted by a stop request. +async fn sleep_interruptible(stop: &AtomicBool, dur: Duration) -> bool { + let tick = Duration::from_millis(100); + let mut remaining = dur; + while remaining > Duration::ZERO { + if stop.load(Ordering::Acquire) { + return false; + } + let step = remaining.min(tick); + sleep(step).await; + remaining = remaining.saturating_sub(step); + } + true +} diff --git a/livekit/src/video_ingest/keyframe.rs b/livekit/src/video_ingest/keyframe.rs new file mode 100644 index 000000000..5fe1a375c --- /dev/null +++ b/livekit/src/video_ingest/keyframe.rs @@ -0,0 +1,261 @@ +// Copyright 2026 LiveKit, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Minimal keyframe detection for the five pre-encoded codecs supported by +//! [`EncodedTcpIngest`](super::encoded_tcp::EncodedTcpIngest). +//! +//! These probes are intentionally conservative — they never scan deeper into +//! a frame than needed to answer yes/no. Incorrect answers only affect +//! ingest-side stats and the `is_keyframe` flag forwarded to the +//! `NativeEncodedVideoSource`; WebRTC's own RTP packetizer recomputes what +//! it needs for keyframe signalling. + +use libwebrtc::video_source::VideoCodec; + +/// Returns the access-unit delimiter NAL type for Annex-B codecs. `None` +/// for codecs that are not delivered as Annex-B. +pub(super) fn aud_nal_type(codec: VideoCodec) -> Option { + match codec { + VideoCodec::H264 => Some(9), + VideoCodec::H265 => Some(35), + _ => None, + } +} + +/// Extracts the NAL unit type from the first byte after an Annex-B start +/// code. Returns 0 for codecs without NAL units. +pub(super) fn nal_type(codec: VideoCodec, first_byte: u8) -> u8 { + match codec { + VideoCodec::H264 => first_byte & 0x1F, + VideoCodec::H265 => (first_byte >> 1) & 0x3F, + _ => 0, + } +} + +/// Whether a given NAL type is a keyframe NAL. +/// +/// * H.264: IDR slice (NAL type 5) +/// * H.265: any IRAP (BLA/IDR/CRA, NAL types 16..=23) +/// * VP8/VP9/AV1: never — they do not use NAL units. +pub(super) fn is_keyframe_nal(codec: VideoCodec, nal_type: u8) -> bool { + match codec { + VideoCodec::H264 => nal_type == 5, + VideoCodec::H265 => (16..=23).contains(&nal_type), + _ => false, + } +} + +/// Top-level keyframe probe. Delegates to codec-specific helpers. +/// +/// * H.264 / H.265: scans for an IDR / IRAP NAL in the access unit. +/// * VP8: bit 0 of the frame tag (RFC 6386 §9.1: 0 = keyframe). +/// * VP9: decodes the leading bits of the uncompressed header (VP9 spec §6.2). +/// * AV1: scans OBUs in the Temporal Unit for an `OBU_SEQUENCE_HEADER` +/// (the same heuristic WebRTC's own AV1 RTP packetizer uses). +pub(super) fn is_keyframe(codec: VideoCodec, data: &[u8]) -> bool { + match codec { + VideoCodec::H264 | VideoCodec::H265 => is_keyframe_annex_b(codec, data), + VideoCodec::Vp8 => !data.is_empty() && (data[0] & 0x01) == 0, + VideoCodec::Vp9 => is_keyframe_vp9(data), + VideoCodec::Av1 => is_keyframe_av1(data), + } +} + +fn is_keyframe_annex_b(codec: VideoCodec, data: &[u8]) -> bool { + let mut i = 0usize; + while i + 3 < data.len() { + let is_four = i + 4 <= data.len() + && data[i] == 0 + && data[i + 1] == 0 + && data[i + 2] == 0 + && data[i + 3] == 1; + let is_three = data[i] == 0 && data[i + 1] == 0 && data[i + 2] == 1; + if is_four || is_three { + let payload_idx = if is_four { i + 4 } else { i + 3 }; + if payload_idx < data.len() && is_keyframe_nal(codec, nal_type(codec, data[payload_idx])) + { + return true; + } + i = payload_idx + 1; + } else { + i += 1; + } + } + false +} + +/// AV1 keyframe probe. Walks OBUs in a Temporal Unit and returns true if +/// any OBU has type `OBU_SEQUENCE_HEADER` (1). AV1 spec §5.3.2 (OBU header) +/// + §5.3.1 (leb128): +/// +/// * byte 0 bits 6..=3: `obu_type`. +/// * byte 0 bit 2: `obu_extension_flag`; if set, one extension byte follows. +/// * byte 0 bit 1: `obu_has_size_field`; if set, a leb128-encoded `obu_size` +/// follows. If clear, the OBU runs to the end of the input and we cannot +/// skip it. +fn is_keyframe_av1(mut data: &[u8]) -> bool { + const OBU_SEQUENCE_HEADER: u8 = 1; + while !data.is_empty() { + let header = data[0]; + let obu_type = (header >> 3) & 0x0F; + let ext = (header & 0x04) != 0; + let has_size = (header & 0x02) != 0; + + let mut off = 1; + if ext { + if off >= data.len() { + return false; + } + off += 1; + } + if !has_size { + return obu_type == OBU_SEQUENCE_HEADER; + } + let (size, size_len) = match read_leb128(&data[off..]) { + Some(v) => v, + None => return false, + }; + off += size_len; + let payload_end = match off.checked_add(size as usize) { + Some(e) if e <= data.len() => e, + _ => return false, + }; + if obu_type == OBU_SEQUENCE_HEADER { + return true; + } + data = &data[payload_end..]; + } + false +} + +/// Decodes an AV1 leb128 (unsigned little-endian base-128) integer. +/// Returns `(value, bytes_consumed)` or `None` on truncated input. +/// AV1 spec §4.10.5 caps the encoding at 8 bytes and 32 significant bits. +fn read_leb128(input: &[u8]) -> Option<(u32, usize)> { + let mut value: u64 = 0; + for (i, &byte) in input.iter().take(8).enumerate() { + value |= ((byte & 0x7F) as u64) << (i * 7); + if (byte & 0x80) == 0 { + return u32::try_from(value).ok().map(|v| (v, i + 1)); + } + } + None +} + +/// VP9 uncompressed-header keyframe probe. Reads first-byte bits (MSB +/// first) per VP9 bitstream spec §6.2: +/// +/// * bits 7..=6: `frame_marker` (must be `0b10`). +/// * bit 5: `profile_low_bit`, bit 4: `profile_high_bit` +/// (combined `profile` ∈ 0..=3). +/// * For `profile == 3`: bit 3 is reserved-zero, bit 2 is +/// `show_existing_frame`, bit 1 is `frame_type`. +/// * For `profile != 3`: bit 3 is `show_existing_frame`, bit 2 is +/// `frame_type`. +/// +/// A keyframe has `show_existing_frame == 0` and `frame_type == 0`. +fn is_keyframe_vp9(data: &[u8]) -> bool { + let Some(&b0) = data.first() else { + return false; + }; + if (b0 >> 6) & 0b11 != 0b10 { + return false; + } + let profile_low = (b0 >> 5) & 0x1; + let profile_high = (b0 >> 4) & 0x1; + let profile = (profile_high << 1) | profile_low; + let (show_existing_bit, frame_type_bit) = if profile == 3 { (2, 1) } else { (3, 2) }; + let show_existing = (b0 >> show_existing_bit) & 0x1; + if show_existing != 0 { + return false; + } + let frame_type = (b0 >> frame_type_bit) & 0x1; + frame_type == 0 +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn h264_idr_is_keyframe() { + // 4-byte start code + NAL header for IDR (type 5, nal_ref_idc=3): 0x65 + let data = [0x00, 0x00, 0x00, 0x01, 0x65, 0x88, 0x84]; + assert!(is_keyframe(VideoCodec::H264, &data)); + } + + #[test] + fn h264_non_idr_not_keyframe() { + // NAL header for non-IDR slice (type 1): 0x41 + let data = [0x00, 0x00, 0x00, 0x01, 0x41, 0x9a]; + assert!(!is_keyframe(VideoCodec::H264, &data)); + } + + #[test] + fn h265_idr_w_radl_is_keyframe() { + // H.265 NAL type 19 (IDR_W_RADL). NAL header byte is (type << 1): 0x26 + let data = [0x00, 0x00, 0x00, 0x01, 0x26, 0x01]; + assert!(is_keyframe(VideoCodec::H265, &data)); + } + + #[test] + fn vp8_keyframe_bit_zero() { + let kf = [0x00_u8]; + let pf = [0x01_u8]; + assert!(is_keyframe(VideoCodec::Vp8, &kf)); + assert!(!is_keyframe(VideoCodec::Vp8, &pf)); + } + + #[test] + fn vp9_profile0_keyframe() { + // frame_marker=10, profile=0 (both bits 0), show_existing=0, frame_type=0 + // => top bits 10 00 0 0 .. = 0b1000_0000 = 0x80 + let data = [0x80_u8]; + assert!(is_keyframe(VideoCodec::Vp9, &data)); + } + + #[test] + fn vp9_profile0_interframe() { + // frame_type bit = bit 2 => 0b1000_0100 = 0x84 + let data = [0x84_u8]; + assert!(!is_keyframe(VideoCodec::Vp9, &data)); + } + + #[test] + fn av1_sequence_header_obu_is_keyframe() { + // obu_type=1 (SEQUENCE_HEADER) => byte 0 = (1 << 3) | 0b010 = 0x0A + // (obu_has_size_field=1, no extension). obu_size leb128 = 0 (one byte). + let data = [0x0A, 0x00]; + assert!(is_keyframe(VideoCodec::Av1, &data)); + } + + #[test] + fn av1_tile_group_obu_not_keyframe() { + // obu_type=4 (TILE_GROUP), has_size=1. size=0. + let data = [0x22, 0x00]; + assert!(!is_keyframe(VideoCodec::Av1, &data)); + } + + #[test] + fn av1_leb128_single_byte() { + assert_eq!(read_leb128(&[0x00]), Some((0, 1))); + assert_eq!(read_leb128(&[0x7F]), Some((0x7F, 1))); + } + + #[test] + fn av1_leb128_multi_byte() { + // 128 => 0x80, 0x01 + assert_eq!(read_leb128(&[0x80, 0x01]), Some((128, 2))); + } +} diff --git a/livekit/src/video_ingest/mod.rs b/livekit/src/video_ingest/mod.rs new file mode 100644 index 000000000..a879e4058 --- /dev/null +++ b/livekit/src/video_ingest/mod.rs @@ -0,0 +1,34 @@ +// Copyright 2026 LiveKit, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! High-level helpers for ingesting pre-encoded video into a LiveKit room. +//! +//! This module hides the moving parts of pulling a pre-encoded bytestream +//! from a source (currently: TCP) and turning it into a published +//! LiveKit track. Callers configure a small options struct and hand off a +//! `Room`; the helper does the rest. +//! +//! See [`EncodedTcpIngest`] for the TCP-based helper. + +#[cfg(not(target_arch = "wasm32"))] +mod demux; +#[cfg(not(target_arch = "wasm32"))] +mod encoded_tcp; +#[cfg(not(target_arch = "wasm32"))] +mod keyframe; + +#[cfg(not(target_arch = "wasm32"))] +pub use encoded_tcp::{ + EncodedIngestObserver, EncodedIngestStats, EncodedTcpIngest, EncodedTcpIngestOptions, +}; From 286e052b561c4d308e70be58b1d7facfb1711efe Mon Sep 17 00:00:00 2001 From: Stephen DeRosa Date: Thu, 23 Apr 2026 15:02:12 -0600 Subject: [PATCH 08/15] simple_sender --- examples/pre_encoded_ingest/Cargo.toml | 4 + examples/pre_encoded_ingest/README.md | 48 +++- .../pre_encoded_ingest/src/simple_sender.rs | 216 ++++++++++++++++++ 3 files changed, 267 insertions(+), 1 deletion(-) create mode 100644 examples/pre_encoded_ingest/src/simple_sender.rs diff --git a/examples/pre_encoded_ingest/Cargo.toml b/examples/pre_encoded_ingest/Cargo.toml index 6bbba5633..f966e3b65 100644 --- a/examples/pre_encoded_ingest/Cargo.toml +++ b/examples/pre_encoded_ingest/Cargo.toml @@ -12,6 +12,10 @@ path = "src/sender.rs" name = "receiver" path = "src/receiver.rs" +[[bin]] +name = "simple_sender" +path = "src/simple_sender.rs" + [dependencies] anyhow = { workspace = true } clap = { workspace = true, features = ["derive"] } diff --git a/examples/pre_encoded_ingest/README.md b/examples/pre_encoded_ingest/README.md index 1c05e292e..8865e1d5a 100644 --- a/examples/pre_encoded_ingest/README.md +++ b/examples/pre_encoded_ingest/README.md @@ -28,6 +28,46 @@ frames. The sender supports two wire framings, picked by `--codec`: record is one Temporal Unit (TU) — a complete OBU sequence for one frame. +## The two sender binaries + +This example ships **two** senders that publish the same stream; pick +whichever one better matches your integration shape: + +- **`simple_sender`** — uses the built-in SDK helper + [`livekit::video_ingest::EncodedTcpIngest`]. The helper owns the TCP + socket, demux, keyframe probe, reconnect loop, and track + publish/unpublish. Applications only supply config — port, codec, + width, height — and an optional [`EncodedIngestObserver`] for + connection / keyframe / bitrate callbacks. Recommended starting + point. +- **`sender`** — the hand-rolled version kept as a reference + implementation. It open-codes exactly what `EncodedTcpIngest` does + internally and is useful if you need to deviate from the helper + (custom transport, alternate demuxer, different track topology). + +The CLI flags are the same for both binaries; `--bin simple_sender` +is the drop-in replacement used in all examples below. + +### Minimal usage + +```rust +use libwebrtc::video_source::VideoCodec; +use livekit::video_ingest::{EncodedTcpIngest, EncodedTcpIngestOptions}; + +let options = EncodedTcpIngestOptions::new( + /* port */ 5005, + /* codec */ VideoCodec::H264, + /* width */ 640, + /* height */ 480, +); +let ingest = EncodedTcpIngest::start(room.local_participant(), options).await?; +// ... run ... +ingest.stop().await; +``` + +See `src/simple_sender.rs` for a full driver (token minting, observer, +stats polling, Ctrl-C shutdown). + ## What this exercises - `libwebrtc::video_source::NativeEncodedVideoSource` — the @@ -379,14 +419,20 @@ does not. The sender handles both. ### 2. Start the sender (Terminal 2) +Use `simple_sender` (SDK helper, recommended): + ```bash -RUST_LOG=info cargo run -p pre_encoded_ingest --bin sender -- \ +RUST_LOG=info cargo run -p pre_encoded_ingest --bin simple_sender -- \ --tcp-host 127.0.0.1 --tcp-port 5005 \ --width 640 --height 480 \ --codec h264 \ --room pre-encoded-demo --identity encoded-sender ``` +Or the hand-rolled reference (`--bin sender`) with the same flags — +see [The two sender binaries](#the-two-sender-binaries) for when to +pick one over the other. + For the H.265 pipeline use `--codec h265`; for VP8 use `--codec vp8`; for AV1 use `--codec av1`. diff --git a/examples/pre_encoded_ingest/src/simple_sender.rs b/examples/pre_encoded_ingest/src/simple_sender.rs new file mode 100644 index 000000000..f4000109c --- /dev/null +++ b/examples/pre_encoded_ingest/src/simple_sender.rs @@ -0,0 +1,216 @@ +// Copyright 2026 LiveKit, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Minimal pre-encoded ingest driver using [`livekit::video_ingest::EncodedTcpIngest`]. +//! +//! Everything that was hand-rolled in `sender.rs` (demuxing, keyframe +//! detection, reconnect loop, observer plumbing) now lives inside the +//! SDK. This example is effectively: parse CLI args, connect to the +//! room, `EncodedTcpIngest::start`, log stats, wait for Ctrl-C. + +use std::{env, net::SocketAddr, sync::Arc, time::Duration}; + +use anyhow::{Context, Result}; +use clap::Parser; +use libwebrtc::video_source::VideoCodec; +use livekit::{ + prelude::*, + video_ingest::{EncodedIngestObserver, EncodedTcpIngest, EncodedTcpIngestOptions}, +}; +use livekit_api::access_token; +use log::{info, warn}; +use tokio::time::sleep; + +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +struct Args { + /// LiveKit server URL (or set LIVEKIT_URL env var) + #[arg(long)] + url: Option, + + /// LiveKit API key (or set LIVEKIT_API_KEY env var) + #[arg(long)] + api_key: Option, + + /// LiveKit API secret (or set LIVEKIT_API_SECRET env var) + #[arg(long)] + api_secret: Option, + + /// Room name to join + #[arg(long, default_value = "pre-encoded-demo")] + room: String, + + /// Participant identity + #[arg(long, default_value = "encoded-sender")] + identity: String, + + /// Host of the gstreamer `tcpserversink` + #[arg(long, default_value = "127.0.0.1")] + tcp_host: String, + + /// Port of the gstreamer `tcpserversink` + #[arg(long, default_value_t = 5000)] + tcp_port: u16, + + /// Declared stream width (px) + #[arg(long, default_value_t = 640)] + width: u32, + + /// Declared stream height (px) + #[arg(long, default_value_t = 480)] + height: u32, + + /// Pre-encoded codec on the wire. Must match the gstreamer pipeline. + #[arg(long, value_enum, default_value_t = CodecArg::H264)] + codec: CodecArg, + + /// Optional max bitrate forwarded to TrackPublishOptions.video_encoding. + #[arg(long)] + max_bitrate_kbps: Option, +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, clap::ValueEnum)] +enum CodecArg { + H264, + H265, + Vp8, + Vp9, + Av1, +} + +impl CodecArg { + fn webrtc_codec(self) -> VideoCodec { + match self { + CodecArg::H264 => VideoCodec::H264, + CodecArg::H265 => VideoCodec::H265, + CodecArg::Vp8 => VideoCodec::Vp8, + CodecArg::Vp9 => VideoCodec::Vp9, + CodecArg::Av1 => VideoCodec::Av1, + } + } +} + +/// Logs the feedback events the SDK surfaces. Real producers should +/// react here — e.g. nudge their hardware encoder to emit an IDR on +/// `on_keyframe_requested`, or clamp their encoder to the advertised +/// `on_target_bitrate`. +struct LoggingObserver; + +impl EncodedIngestObserver for LoggingObserver { + fn on_connected(&self, peer: SocketAddr) { + info!("ingest: connected to {peer}"); + } + fn on_disconnected(&self, reason: &str) { + warn!("ingest: disconnected: {reason}"); + } + fn on_keyframe_requested(&self) { + warn!( + "ingest: keyframe requested by receiver — producer should emit a keyframe on the \ + next frame" + ); + } + fn on_target_bitrate(&self, bitrate_bps: u32, framerate_fps: f64) { + info!( + "ingest: target bitrate update: {} kbps @ {:.1} fps", + bitrate_bps / 1000, + framerate_fps + ); + } +} + +#[tokio::main] +async fn main() -> Result<()> { + env_logger::init(); + let args = Args::parse(); + + let url = args + .url + .or_else(|| env::var("LIVEKIT_URL").ok()) + .context("--url or LIVEKIT_URL required")?; + let api_key = args + .api_key + .or_else(|| env::var("LIVEKIT_API_KEY").ok()) + .context("--api-key or LIVEKIT_API_KEY required")?; + let api_secret = args + .api_secret + .or_else(|| env::var("LIVEKIT_API_SECRET").ok()) + .context("--api-secret or LIVEKIT_API_SECRET required")?; + + let token = access_token::AccessToken::with_api_key(&api_key, &api_secret) + .with_identity(&args.identity) + .with_name(&args.identity) + .with_grants(access_token::VideoGrants { + room_join: true, + room: args.room.clone(), + can_publish: true, + ..Default::default() + }) + .to_jwt()?; + + info!("connecting to LiveKit room '{}' as '{}'...", args.room, args.identity); + let mut room_options = RoomOptions::default(); + room_options.auto_subscribe = false; + room_options.dynacast = false; + let (room, _events) = Room::connect(&url, &token, room_options).await?; + info!("connected: {} (sid {})", room.name(), room.sid().await); + + let mut opts = EncodedTcpIngestOptions::new( + args.tcp_port, + args.codec.webrtc_codec(), + args.width, + args.height, + ); + opts.host = args.tcp_host.clone(); + opts.max_bitrate_bps = args.max_bitrate_kbps.map(|k| k * 1000); + + let ingest = EncodedTcpIngest::start(room.local_participant(), opts).await?; + ingest.set_observer(Arc::new(LoggingObserver)); + info!("ingest: started track sid={}", ingest.track_sid()); + + // Poll stats every 2s while the ingest runs. + let ingest_for_stats = Arc::new(ingest); + let stats_task = { + let ingest = ingest_for_stats.clone(); + tokio::spawn(async move { + let mut prev = ingest.stats(); + loop { + sleep(Duration::from_secs(2)).await; + let cur = ingest.stats(); + let ok = cur.frames_accepted.saturating_sub(prev.frames_accepted); + let dropped = cur.frames_dropped.saturating_sub(prev.frames_dropped); + let kf = cur.keyframes.saturating_sub(prev.keyframes); + if ok + dropped > 0 { + info!( + "ingest: {:.1} fps accepted, {:.1} fps dropped, {kf} keyframes (total \ + reconnects={})", + ok as f64 / 2.0, + dropped as f64 / 2.0, + cur.tcp_reconnects + ); + } + prev = cur; + } + }) + }; + + tokio::signal::ctrl_c().await.ok(); + info!("ctrl-c received, shutting down..."); + stats_task.abort(); + + let ingest = Arc::try_unwrap(ingest_for_stats) + .map_err(|_| anyhow::anyhow!("ingest still referenced"))?; + ingest.stop().await; + info!("done"); + Ok(()) +} From a2387df5f4ba877611973cd2e847a7cf1d923aa7 Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 27 Apr 2026 14:52:48 +0000 Subject: [PATCH 09/15] generated protobuf --- livekit-ffi-node-bindings/proto/ffi_pb.d.ts | 87 ++++++- livekit-ffi-node-bindings/proto/ffi_pb.js | 15 +- .../proto/video_frame_pb.d.ts | 239 ++++++++++++++++++ .../proto/video_frame_pb.js | 89 +++++++ 4 files changed, 428 insertions(+), 2 deletions(-) diff --git a/livekit-ffi-node-bindings/proto/ffi_pb.d.ts b/livekit-ffi-node-bindings/proto/ffi_pb.d.ts index 05d98054d..388ba03d5 100644 --- a/livekit-ffi-node-bindings/proto/ffi_pb.d.ts +++ b/livekit-ffi-node-bindings/proto/ffi_pb.d.ts @@ -21,13 +21,14 @@ import type { BinaryReadOptions, FieldList, JsonReadOptions, JsonValue, PartialM import { Message, proto2 } from "@bufbuild/protobuf"; import type { ConnectCallback, ConnectRequest, ConnectResponse, DisconnectCallback, DisconnectRequest, DisconnectResponse, EditChatMessageRequest, GetSessionStatsCallback, GetSessionStatsRequest, GetSessionStatsResponse, PublishDataCallback, PublishDataRequest, PublishDataResponse, PublishSipDtmfCallback, PublishSipDtmfRequest, PublishSipDtmfResponse, PublishTrackCallback, PublishTrackRequest, PublishTrackResponse, PublishTranscriptionCallback, PublishTranscriptionRequest, PublishTranscriptionResponse, RoomEvent, SendChatMessageCallback, SendChatMessageRequest, SendChatMessageResponse, SendStreamChunkCallback, SendStreamChunkRequest, SendStreamChunkResponse, SendStreamHeaderCallback, SendStreamHeaderRequest, SendStreamHeaderResponse, SendStreamTrailerCallback, SendStreamTrailerRequest, SendStreamTrailerResponse, SetDataChannelBufferedAmountLowThresholdRequest, SetDataChannelBufferedAmountLowThresholdResponse, SetLocalAttributesCallback, SetLocalAttributesRequest, SetLocalAttributesResponse, SetLocalMetadataCallback, SetLocalMetadataRequest, SetLocalMetadataResponse, SetLocalNameCallback, SetLocalNameRequest, SetLocalNameResponse, SetSubscribedRequest, SetSubscribedResponse, UnpublishTrackCallback, UnpublishTrackRequest, UnpublishTrackResponse } from "./room_pb.js"; import type { CreateAudioTrackRequest, CreateAudioTrackResponse, CreateVideoTrackRequest, CreateVideoTrackResponse, EnableRemoteTrackRequest, EnableRemoteTrackResponse, GetStatsCallback, GetStatsRequest, GetStatsResponse, LocalTrackMuteRequest, LocalTrackMuteResponse, SetTrackSubscriptionPermissionsRequest, SetTrackSubscriptionPermissionsResponse, TrackEvent } from "./track_pb.js"; -import type { CaptureVideoFrameRequest, CaptureVideoFrameResponse, NewVideoSourceRequest, NewVideoSourceResponse, NewVideoStreamRequest, NewVideoStreamResponse, VideoConvertRequest, VideoConvertResponse, VideoStreamEvent, VideoStreamFromParticipantRequest, VideoStreamFromParticipantResponse } from "./video_frame_pb.js"; +import type { CaptureEncodedVideoFrameRequest, CaptureEncodedVideoFrameResponse, CaptureVideoFrameRequest, CaptureVideoFrameResponse, EncodedVideoSourceEvent, NewVideoSourceRequest, NewVideoSourceResponse, NewVideoStreamRequest, NewVideoStreamResponse, VideoConvertRequest, VideoConvertResponse, VideoStreamEvent, VideoStreamFromParticipantRequest, VideoStreamFromParticipantResponse } from "./video_frame_pb.js"; import type { ApmProcessReverseStreamRequest, ApmProcessReverseStreamResponse, ApmProcessStreamRequest, ApmProcessStreamResponse, ApmSetStreamDelayRequest, ApmSetStreamDelayResponse, AudioStreamEvent, AudioStreamFromParticipantRequest, AudioStreamFromParticipantResponse, CaptureAudioFrameCallback, CaptureAudioFrameRequest, CaptureAudioFrameResponse, ClearAudioBufferRequest, ClearAudioBufferResponse, FlushSoxResamplerRequest, FlushSoxResamplerResponse, LoadAudioFilterPluginRequest, LoadAudioFilterPluginResponse, NewApmRequest, NewApmResponse, NewAudioResamplerRequest, NewAudioResamplerResponse, NewAudioSourceRequest, NewAudioSourceResponse, NewAudioStreamRequest, NewAudioStreamResponse, NewSoxResamplerRequest, NewSoxResamplerResponse, PushSoxResamplerRequest, PushSoxResamplerResponse, RemixAndResampleRequest, RemixAndResampleResponse } from "./audio_frame_pb.js"; import type { E2eeRequest, E2eeResponse } from "./e2ee_pb.js"; import type { PerformRpcCallback, PerformRpcRequest, PerformRpcResponse, RegisterRpcMethodRequest, RegisterRpcMethodResponse, RpcMethodInvocationEvent, RpcMethodInvocationResponseRequest, RpcMethodInvocationResponseResponse, UnregisterRpcMethodRequest, UnregisterRpcMethodResponse } from "./rpc_pb.js"; import type { EnableRemoteTrackPublicationRequest, EnableRemoteTrackPublicationResponse, SetRemoteTrackPublicationQualityRequest, SetRemoteTrackPublicationQualityResponse, UpdateRemoteTrackPublicationDimensionRequest, UpdateRemoteTrackPublicationDimensionResponse } from "./track_publication_pb.js"; import type { ByteStreamOpenCallback, ByteStreamOpenRequest, ByteStreamOpenResponse, ByteStreamReaderEvent, ByteStreamReaderReadAllCallback, ByteStreamReaderReadAllRequest, ByteStreamReaderReadAllResponse, ByteStreamReaderReadIncrementalRequest, ByteStreamReaderReadIncrementalResponse, ByteStreamReaderWriteToFileCallback, ByteStreamReaderWriteToFileRequest, ByteStreamReaderWriteToFileResponse, ByteStreamWriterCloseCallback, ByteStreamWriterCloseRequest, ByteStreamWriterCloseResponse, ByteStreamWriterWriteCallback, ByteStreamWriterWriteRequest, ByteStreamWriterWriteResponse, StreamSendBytesCallback, StreamSendBytesRequest, StreamSendBytesResponse, StreamSendFileCallback, StreamSendFileRequest, StreamSendFileResponse, StreamSendTextCallback, StreamSendTextRequest, StreamSendTextResponse, TextStreamOpenCallback, TextStreamOpenRequest, TextStreamOpenResponse, TextStreamReaderEvent, TextStreamReaderReadAllCallback, TextStreamReaderReadAllRequest, TextStreamReaderReadAllResponse, TextStreamReaderReadIncrementalRequest, TextStreamReaderReadIncrementalResponse, TextStreamWriterCloseCallback, TextStreamWriterCloseRequest, TextStreamWriterCloseResponse, TextStreamWriterWriteCallback, TextStreamWriterWriteRequest, TextStreamWriterWriteResponse } from "./data_stream_pb.js"; import type { DataTrackStreamEvent, DataTrackStreamReadRequest, DataTrackStreamReadResponse, LocalDataTrackIsPublishedRequest, LocalDataTrackIsPublishedResponse, LocalDataTrackTryPushRequest, LocalDataTrackTryPushResponse, LocalDataTrackUnpublishRequest, LocalDataTrackUnpublishResponse, PublishDataTrackCallback, PublishDataTrackRequest, PublishDataTrackResponse, RemoteDataTrackIsPublishedRequest, RemoteDataTrackIsPublishedResponse, SubscribeDataTrackRequest, SubscribeDataTrackResponse } from "./data_track_pb.js"; +import type { EncodedTcpIngestEvent, GetEncodedTcpIngestStatsRequest, GetEncodedTcpIngestStatsResponse, NewEncodedTcpIngestCallback, NewEncodedTcpIngestRequest, NewEncodedTcpIngestResponse, StopEncodedTcpIngestCallback, StopEncodedTcpIngestRequest, StopEncodedTcpIngestResponse } from "./encoded_tcp_ingest_pb.js"; /** * @generated from enum livekit.proto.LogLevel @@ -537,6 +538,34 @@ export declare class FfiRequest extends Message { */ value: DataTrackStreamReadRequest; case: "dataTrackStreamRead"; + } | { + /** + * Encoded video + * + * @generated from field: livekit.proto.CaptureEncodedVideoFrameRequest capture_encoded_video_frame = 76; + */ + value: CaptureEncodedVideoFrameRequest; + case: "captureEncodedVideoFrame"; + } | { + /** + * Encoded TCP ingest (high-level helper) + * + * @generated from field: livekit.proto.NewEncodedTcpIngestRequest new_encoded_tcp_ingest = 77; + */ + value: NewEncodedTcpIngestRequest; + case: "newEncodedTcpIngest"; + } | { + /** + * @generated from field: livekit.proto.StopEncodedTcpIngestRequest stop_encoded_tcp_ingest = 78; + */ + value: StopEncodedTcpIngestRequest; + case: "stopEncodedTcpIngest"; + } | { + /** + * @generated from field: livekit.proto.GetEncodedTcpIngestStatsRequest get_encoded_tcp_ingest_stats = 79; + */ + value: GetEncodedTcpIngestStatsRequest; + case: "getEncodedTcpIngestStats"; } | { case: undefined; value?: undefined }; constructor(data?: PartialMessage); @@ -1025,6 +1054,34 @@ export declare class FfiResponse extends Message { */ value: DataTrackStreamReadResponse; case: "dataTrackStreamRead"; + } | { + /** + * Encoded video + * + * @generated from field: livekit.proto.CaptureEncodedVideoFrameResponse capture_encoded_video_frame = 75; + */ + value: CaptureEncodedVideoFrameResponse; + case: "captureEncodedVideoFrame"; + } | { + /** + * Encoded TCP ingest (high-level helper) + * + * @generated from field: livekit.proto.NewEncodedTcpIngestResponse new_encoded_tcp_ingest = 76; + */ + value: NewEncodedTcpIngestResponse; + case: "newEncodedTcpIngest"; + } | { + /** + * @generated from field: livekit.proto.StopEncodedTcpIngestResponse stop_encoded_tcp_ingest = 77; + */ + value: StopEncodedTcpIngestResponse; + case: "stopEncodedTcpIngest"; + } | { + /** + * @generated from field: livekit.proto.GetEncodedTcpIngestStatsResponse get_encoded_tcp_ingest_stats = 78; + */ + value: GetEncodedTcpIngestStatsResponse; + case: "getEncodedTcpIngestStats"; } | { case: undefined; value?: undefined }; constructor(data?: PartialMessage); @@ -1313,6 +1370,34 @@ export declare class FfiEvent extends Message { */ value: DataTrackStreamEvent; case: "dataTrackStreamEvent"; + } | { + /** + * Encoded video + * + * @generated from field: livekit.proto.EncodedVideoSourceEvent encoded_video_source_event = 44; + */ + value: EncodedVideoSourceEvent; + case: "encodedVideoSourceEvent"; + } | { + /** + * Encoded TCP ingest (high-level helper) + * + * @generated from field: livekit.proto.EncodedTcpIngestEvent encoded_tcp_ingest_event = 45; + */ + value: EncodedTcpIngestEvent; + case: "encodedTcpIngestEvent"; + } | { + /** + * @generated from field: livekit.proto.NewEncodedTcpIngestCallback new_encoded_tcp_ingest = 46; + */ + value: NewEncodedTcpIngestCallback; + case: "newEncodedTcpIngest"; + } | { + /** + * @generated from field: livekit.proto.StopEncodedTcpIngestCallback stop_encoded_tcp_ingest = 47; + */ + value: StopEncodedTcpIngestCallback; + case: "stopEncodedTcpIngest"; } | { case: undefined; value?: undefined }; constructor(data?: PartialMessage); diff --git a/livekit-ffi-node-bindings/proto/ffi_pb.js b/livekit-ffi-node-bindings/proto/ffi_pb.js index 22727f9b7..4d2071d91 100644 --- a/livekit-ffi-node-bindings/proto/ffi_pb.js +++ b/livekit-ffi-node-bindings/proto/ffi_pb.js @@ -23,13 +23,14 @@ Object.defineProperty(exports, "__esModule", { value: true }); const { proto2 } = require("@bufbuild/protobuf"); const { ConnectCallback, ConnectRequest, ConnectResponse, DisconnectCallback, DisconnectRequest, DisconnectResponse, EditChatMessageRequest, GetSessionStatsCallback, GetSessionStatsRequest, GetSessionStatsResponse, PublishDataCallback, PublishDataRequest, PublishDataResponse, PublishSipDtmfCallback, PublishSipDtmfRequest, PublishSipDtmfResponse, PublishTrackCallback, PublishTrackRequest, PublishTrackResponse, PublishTranscriptionCallback, PublishTranscriptionRequest, PublishTranscriptionResponse, RoomEvent, SendChatMessageCallback, SendChatMessageRequest, SendChatMessageResponse, SendStreamChunkCallback, SendStreamChunkRequest, SendStreamChunkResponse, SendStreamHeaderCallback, SendStreamHeaderRequest, SendStreamHeaderResponse, SendStreamTrailerCallback, SendStreamTrailerRequest, SendStreamTrailerResponse, SetDataChannelBufferedAmountLowThresholdRequest, SetDataChannelBufferedAmountLowThresholdResponse, SetLocalAttributesCallback, SetLocalAttributesRequest, SetLocalAttributesResponse, SetLocalMetadataCallback, SetLocalMetadataRequest, SetLocalMetadataResponse, SetLocalNameCallback, SetLocalNameRequest, SetLocalNameResponse, SetSubscribedRequest, SetSubscribedResponse, UnpublishTrackCallback, UnpublishTrackRequest, UnpublishTrackResponse } = require("./room_pb.js"); const { CreateAudioTrackRequest, CreateAudioTrackResponse, CreateVideoTrackRequest, CreateVideoTrackResponse, EnableRemoteTrackRequest, EnableRemoteTrackResponse, GetStatsCallback, GetStatsRequest, GetStatsResponse, LocalTrackMuteRequest, LocalTrackMuteResponse, SetTrackSubscriptionPermissionsRequest, SetTrackSubscriptionPermissionsResponse, TrackEvent } = require("./track_pb.js"); -const { CaptureVideoFrameRequest, CaptureVideoFrameResponse, NewVideoSourceRequest, NewVideoSourceResponse, NewVideoStreamRequest, NewVideoStreamResponse, VideoConvertRequest, VideoConvertResponse, VideoStreamEvent, VideoStreamFromParticipantRequest, VideoStreamFromParticipantResponse } = require("./video_frame_pb.js"); +const { CaptureEncodedVideoFrameRequest, CaptureEncodedVideoFrameResponse, CaptureVideoFrameRequest, CaptureVideoFrameResponse, EncodedVideoSourceEvent, NewVideoSourceRequest, NewVideoSourceResponse, NewVideoStreamRequest, NewVideoStreamResponse, VideoConvertRequest, VideoConvertResponse, VideoStreamEvent, VideoStreamFromParticipantRequest, VideoStreamFromParticipantResponse } = require("./video_frame_pb.js"); const { ApmProcessReverseStreamRequest, ApmProcessReverseStreamResponse, ApmProcessStreamRequest, ApmProcessStreamResponse, ApmSetStreamDelayRequest, ApmSetStreamDelayResponse, AudioStreamEvent, AudioStreamFromParticipantRequest, AudioStreamFromParticipantResponse, CaptureAudioFrameCallback, CaptureAudioFrameRequest, CaptureAudioFrameResponse, ClearAudioBufferRequest, ClearAudioBufferResponse, FlushSoxResamplerRequest, FlushSoxResamplerResponse, LoadAudioFilterPluginRequest, LoadAudioFilterPluginResponse, NewApmRequest, NewApmResponse, NewAudioResamplerRequest, NewAudioResamplerResponse, NewAudioSourceRequest, NewAudioSourceResponse, NewAudioStreamRequest, NewAudioStreamResponse, NewSoxResamplerRequest, NewSoxResamplerResponse, PushSoxResamplerRequest, PushSoxResamplerResponse, RemixAndResampleRequest, RemixAndResampleResponse } = require("./audio_frame_pb.js"); const { E2eeRequest, E2eeResponse } = require("./e2ee_pb.js"); const { PerformRpcCallback, PerformRpcRequest, PerformRpcResponse, RegisterRpcMethodRequest, RegisterRpcMethodResponse, RpcMethodInvocationEvent, RpcMethodInvocationResponseRequest, RpcMethodInvocationResponseResponse, UnregisterRpcMethodRequest, UnregisterRpcMethodResponse } = require("./rpc_pb.js"); const { EnableRemoteTrackPublicationRequest, EnableRemoteTrackPublicationResponse, SetRemoteTrackPublicationQualityRequest, SetRemoteTrackPublicationQualityResponse, UpdateRemoteTrackPublicationDimensionRequest, UpdateRemoteTrackPublicationDimensionResponse } = require("./track_publication_pb.js"); const { ByteStreamOpenCallback, ByteStreamOpenRequest, ByteStreamOpenResponse, ByteStreamReaderEvent, ByteStreamReaderReadAllCallback, ByteStreamReaderReadAllRequest, ByteStreamReaderReadAllResponse, ByteStreamReaderReadIncrementalRequest, ByteStreamReaderReadIncrementalResponse, ByteStreamReaderWriteToFileCallback, ByteStreamReaderWriteToFileRequest, ByteStreamReaderWriteToFileResponse, ByteStreamWriterCloseCallback, ByteStreamWriterCloseRequest, ByteStreamWriterCloseResponse, ByteStreamWriterWriteCallback, ByteStreamWriterWriteRequest, ByteStreamWriterWriteResponse, StreamSendBytesCallback, StreamSendBytesRequest, StreamSendBytesResponse, StreamSendFileCallback, StreamSendFileRequest, StreamSendFileResponse, StreamSendTextCallback, StreamSendTextRequest, StreamSendTextResponse, TextStreamOpenCallback, TextStreamOpenRequest, TextStreamOpenResponse, TextStreamReaderEvent, TextStreamReaderReadAllCallback, TextStreamReaderReadAllRequest, TextStreamReaderReadAllResponse, TextStreamReaderReadIncrementalRequest, TextStreamReaderReadIncrementalResponse, TextStreamWriterCloseCallback, TextStreamWriterCloseRequest, TextStreamWriterCloseResponse, TextStreamWriterWriteCallback, TextStreamWriterWriteRequest, TextStreamWriterWriteResponse } = require("./data_stream_pb.js"); const { DataTrackStreamEvent, DataTrackStreamReadRequest, DataTrackStreamReadResponse, LocalDataTrackIsPublishedRequest, LocalDataTrackIsPublishedResponse, LocalDataTrackTryPushRequest, LocalDataTrackTryPushResponse, LocalDataTrackUnpublishRequest, LocalDataTrackUnpublishResponse, PublishDataTrackCallback, PublishDataTrackRequest, PublishDataTrackResponse, RemoteDataTrackIsPublishedRequest, RemoteDataTrackIsPublishedResponse, SubscribeDataTrackRequest, SubscribeDataTrackResponse } = require("./data_track_pb.js"); +const { EncodedTcpIngestEvent, GetEncodedTcpIngestStatsRequest, GetEncodedTcpIngestStatsResponse, NewEncodedTcpIngestCallback, NewEncodedTcpIngestRequest, NewEncodedTcpIngestResponse, StopEncodedTcpIngestCallback, StopEncodedTcpIngestRequest, StopEncodedTcpIngestResponse } = require("./encoded_tcp_ingest_pb.js"); /** * @generated from enum livekit.proto.LogLevel @@ -128,6 +129,10 @@ const FfiRequest = /*@__PURE__*/ proto2.makeMessageType( { no: 73, name: "subscribe_data_track", kind: "message", T: SubscribeDataTrackRequest, oneof: "message" }, { no: 74, name: "remote_data_track_is_published", kind: "message", T: RemoteDataTrackIsPublishedRequest, oneof: "message" }, { no: 75, name: "data_track_stream_read", kind: "message", T: DataTrackStreamReadRequest, oneof: "message" }, + { no: 76, name: "capture_encoded_video_frame", kind: "message", T: CaptureEncodedVideoFrameRequest, oneof: "message" }, + { no: 77, name: "new_encoded_tcp_ingest", kind: "message", T: NewEncodedTcpIngestRequest, oneof: "message" }, + { no: 78, name: "stop_encoded_tcp_ingest", kind: "message", T: StopEncodedTcpIngestRequest, oneof: "message" }, + { no: 79, name: "get_encoded_tcp_ingest_stats", kind: "message", T: GetEncodedTcpIngestStatsRequest, oneof: "message" }, ], ); @@ -212,6 +217,10 @@ const FfiResponse = /*@__PURE__*/ proto2.makeMessageType( { no: 72, name: "subscribe_data_track", kind: "message", T: SubscribeDataTrackResponse, oneof: "message" }, { no: 73, name: "remote_data_track_is_published", kind: "message", T: RemoteDataTrackIsPublishedResponse, oneof: "message" }, { no: 74, name: "data_track_stream_read", kind: "message", T: DataTrackStreamReadResponse, oneof: "message" }, + { no: 75, name: "capture_encoded_video_frame", kind: "message", T: CaptureEncodedVideoFrameResponse, oneof: "message" }, + { no: 76, name: "new_encoded_tcp_ingest", kind: "message", T: NewEncodedTcpIngestResponse, oneof: "message" }, + { no: 77, name: "stop_encoded_tcp_ingest", kind: "message", T: StopEncodedTcpIngestResponse, oneof: "message" }, + { no: 78, name: "get_encoded_tcp_ingest_stats", kind: "message", T: GetEncodedTcpIngestStatsResponse, oneof: "message" }, ], ); @@ -267,6 +276,10 @@ const FfiEvent = /*@__PURE__*/ proto2.makeMessageType( { no: 41, name: "send_bytes", kind: "message", T: StreamSendBytesCallback, oneof: "message" }, { no: 42, name: "publish_data_track", kind: "message", T: PublishDataTrackCallback, oneof: "message" }, { no: 43, name: "data_track_stream_event", kind: "message", T: DataTrackStreamEvent, oneof: "message" }, + { no: 44, name: "encoded_video_source_event", kind: "message", T: EncodedVideoSourceEvent, oneof: "message" }, + { no: 45, name: "encoded_tcp_ingest_event", kind: "message", T: EncodedTcpIngestEvent, oneof: "message" }, + { no: 46, name: "new_encoded_tcp_ingest", kind: "message", T: NewEncodedTcpIngestCallback, oneof: "message" }, + { no: 47, name: "stop_encoded_tcp_ingest", kind: "message", T: StopEncodedTcpIngestCallback, oneof: "message" }, ], ); diff --git a/livekit-ffi-node-bindings/proto/video_frame_pb.d.ts b/livekit-ffi-node-bindings/proto/video_frame_pb.d.ts index e0ec12f19..d02f9b375 100644 --- a/livekit-ffi-node-bindings/proto/video_frame_pb.d.ts +++ b/livekit-ffi-node-bindings/proto/video_frame_pb.d.ts @@ -168,6 +168,14 @@ export declare enum VideoSourceType { * @generated from enum value: VIDEO_SOURCE_NATIVE = 0; */ VIDEO_SOURCE_NATIVE = 0, + + /** + * A source that accepts pre-encoded compressed frames. WebRTC's internal + * encoder is bypassed for tracks bound to this source. + * + * @generated from enum value: VIDEO_SOURCE_ENCODED = 1; + */ + VIDEO_SOURCE_ENCODED = 1, } /** @@ -363,6 +371,14 @@ export declare class NewVideoSourceRequest extends Message); static readonly runtime: typeof proto2; @@ -469,6 +485,101 @@ export declare class CaptureVideoFrameResponse extends Message | undefined, b: CaptureVideoFrameResponse | PlainMessage | undefined): boolean; } +/** + * Push a pre-encoded (compressed) frame to an encoded VideoSource. + * The source must have been created with type == VIDEO_SOURCE_ENCODED. + * + * @generated from message livekit.proto.CaptureEncodedVideoFrameRequest + */ +export declare class CaptureEncodedVideoFrameRequest extends Message { + /** + * @generated from field: required uint64 source_handle = 1; + */ + sourceHandle?: bigint; + + /** + * Raw encoded bitstream (e.g. NAL units for H.264/H.265, VP8/VP9/AV1 + * OBU payload). Must be a complete access unit / picture. + * + * @generated from field: required bytes data = 2; + */ + data?: Uint8Array; + + /** + * @generated from field: required bool is_keyframe = 3; + */ + isKeyframe?: boolean; + + /** + * H.264/H.265 only: set when SPS/PPS (or VPS/SPS/PPS) is already + * prepended to `data`. Ignored for other codecs. + * + * @generated from field: optional bool has_sps_pps = 4; + */ + hasSpsPps?: boolean; + + /** + * Frame resolution. 0/0 means "use the resolution from + * EncodedVideoSourceOptions". + * + * @generated from field: optional uint32 width = 5; + */ + width?: number; + + /** + * @generated from field: optional uint32 height = 6; + */ + height?: number; + + /** + * Capture timestamp in microseconds. 0 lets the source stamp `now`. + * + * @generated from field: optional int64 capture_time_us = 7; + */ + captureTimeUs?: bigint; + + constructor(data?: PartialMessage); + + static readonly runtime: typeof proto2; + static readonly typeName = "livekit.proto.CaptureEncodedVideoFrameRequest"; + static readonly fields: FieldList; + + static fromBinary(bytes: Uint8Array, options?: Partial): CaptureEncodedVideoFrameRequest; + + static fromJson(jsonValue: JsonValue, options?: Partial): CaptureEncodedVideoFrameRequest; + + static fromJsonString(jsonString: string, options?: Partial): CaptureEncodedVideoFrameRequest; + + static equals(a: CaptureEncodedVideoFrameRequest | PlainMessage | undefined, b: CaptureEncodedVideoFrameRequest | PlainMessage | undefined): boolean; +} + +/** + * @generated from message livekit.proto.CaptureEncodedVideoFrameResponse + */ +export declare class CaptureEncodedVideoFrameResponse extends Message { + /** + * True if the frame was queued; false if it was dropped because the + * internal queue was full. + * + * @generated from field: required bool accepted = 1; + */ + accepted?: boolean; + + constructor(data?: PartialMessage); + + static readonly runtime: typeof proto2; + static readonly typeName = "livekit.proto.CaptureEncodedVideoFrameResponse"; + static readonly fields: FieldList; + + static fromBinary(bytes: Uint8Array, options?: Partial): CaptureEncodedVideoFrameResponse; + + static fromJson(jsonValue: JsonValue, options?: Partial): CaptureEncodedVideoFrameResponse; + + static fromJsonString(jsonString: string, options?: Partial): CaptureEncodedVideoFrameResponse; + + static equals(a: CaptureEncodedVideoFrameResponse | PlainMessage | undefined, b: CaptureEncodedVideoFrameResponse | PlainMessage | undefined): boolean; +} + /** * @generated from message livekit.proto.VideoConvertRequest */ @@ -908,6 +1019,13 @@ export declare class VideoSourceInfo extends Message { */ type?: VideoSourceType; + /** + * Only populated for encoded sources. Exposed for debugging / tracing. + * + * @generated from field: optional uint32 encoded_source_id = 2; + */ + encodedSourceId?: number; + constructor(data?: PartialMessage); static readonly runtime: typeof proto2; @@ -952,3 +1070,124 @@ export declare class OwnedVideoSource extends Message { static equals(a: OwnedVideoSource | PlainMessage | undefined, b: OwnedVideoSource | PlainMessage | undefined): boolean; } +/** + * Options for an encoded video source. One source carries a single encoded + * stream (one resolution, one codec). To simulcast, create multiple sources + * and publish them on separate tracks. + * + * @generated from message livekit.proto.EncodedVideoSourceOptions + */ +export declare class EncodedVideoSourceOptions extends Message { + /** + * @generated from field: required livekit.proto.VideoCodec codec = 1; + */ + codec?: VideoCodec; + + constructor(data?: PartialMessage); + + static readonly runtime: typeof proto2; + static readonly typeName = "livekit.proto.EncodedVideoSourceOptions"; + static readonly fields: FieldList; + + static fromBinary(bytes: Uint8Array, options?: Partial): EncodedVideoSourceOptions; + + static fromJson(jsonValue: JsonValue, options?: Partial): EncodedVideoSourceOptions; + + static fromJsonString(jsonString: string, options?: Partial): EncodedVideoSourceOptions; + + static equals(a: EncodedVideoSourceOptions | PlainMessage | undefined, b: EncodedVideoSourceOptions | PlainMessage | undefined): boolean; +} + +/** + * Encoder-side feedback for an encoded video source. Emitted as FfiEvents + * so client SDKs can react (request a fresh keyframe from their encoder, + * adjust target bitrate, etc.). + * + * @generated from message livekit.proto.EncodedVideoSourceEvent + */ +export declare class EncodedVideoSourceEvent extends Message { + /** + * @generated from field: required uint64 source_handle = 1; + */ + sourceHandle?: bigint; + + /** + * @generated from oneof livekit.proto.EncodedVideoSourceEvent.message + */ + message: { + /** + * @generated from field: livekit.proto.EncodedVideoSourceEvent.KeyframeRequested keyframe_requested = 2; + */ + value: EncodedVideoSourceEvent_KeyframeRequested; + case: "keyframeRequested"; + } | { + /** + * @generated from field: livekit.proto.EncodedVideoSourceEvent.TargetBitrateChanged target_bitrate_changed = 3; + */ + value: EncodedVideoSourceEvent_TargetBitrateChanged; + case: "targetBitrateChanged"; + } | { case: undefined; value?: undefined }; + + constructor(data?: PartialMessage); + + static readonly runtime: typeof proto2; + static readonly typeName = "livekit.proto.EncodedVideoSourceEvent"; + static readonly fields: FieldList; + + static fromBinary(bytes: Uint8Array, options?: Partial): EncodedVideoSourceEvent; + + static fromJson(jsonValue: JsonValue, options?: Partial): EncodedVideoSourceEvent; + + static fromJsonString(jsonString: string, options?: Partial): EncodedVideoSourceEvent; + + static equals(a: EncodedVideoSourceEvent | PlainMessage | undefined, b: EncodedVideoSourceEvent | PlainMessage | undefined): boolean; +} + +/** + * @generated from message livekit.proto.EncodedVideoSourceEvent.KeyframeRequested + */ +export declare class EncodedVideoSourceEvent_KeyframeRequested extends Message { + constructor(data?: PartialMessage); + + static readonly runtime: typeof proto2; + static readonly typeName = "livekit.proto.EncodedVideoSourceEvent.KeyframeRequested"; + static readonly fields: FieldList; + + static fromBinary(bytes: Uint8Array, options?: Partial): EncodedVideoSourceEvent_KeyframeRequested; + + static fromJson(jsonValue: JsonValue, options?: Partial): EncodedVideoSourceEvent_KeyframeRequested; + + static fromJsonString(jsonString: string, options?: Partial): EncodedVideoSourceEvent_KeyframeRequested; + + static equals(a: EncodedVideoSourceEvent_KeyframeRequested | PlainMessage | undefined, b: EncodedVideoSourceEvent_KeyframeRequested | PlainMessage | undefined): boolean; +} + +/** + * @generated from message livekit.proto.EncodedVideoSourceEvent.TargetBitrateChanged + */ +export declare class EncodedVideoSourceEvent_TargetBitrateChanged extends Message { + /** + * @generated from field: required uint32 bitrate_bps = 1; + */ + bitrateBps?: number; + + /** + * @generated from field: required double framerate_fps = 2; + */ + framerateFps?: number; + + constructor(data?: PartialMessage); + + static readonly runtime: typeof proto2; + static readonly typeName = "livekit.proto.EncodedVideoSourceEvent.TargetBitrateChanged"; + static readonly fields: FieldList; + + static fromBinary(bytes: Uint8Array, options?: Partial): EncodedVideoSourceEvent_TargetBitrateChanged; + + static fromJson(jsonValue: JsonValue, options?: Partial): EncodedVideoSourceEvent_TargetBitrateChanged; + + static fromJsonString(jsonString: string, options?: Partial): EncodedVideoSourceEvent_TargetBitrateChanged; + + static equals(a: EncodedVideoSourceEvent_TargetBitrateChanged | PlainMessage | undefined, b: EncodedVideoSourceEvent_TargetBitrateChanged | PlainMessage | undefined): boolean; +} + diff --git a/livekit-ffi-node-bindings/proto/video_frame_pb.js b/livekit-ffi-node-bindings/proto/video_frame_pb.js index 331320682..4af2dc542 100644 --- a/livekit-ffi-node-bindings/proto/video_frame_pb.js +++ b/livekit-ffi-node-bindings/proto/video_frame_pb.js @@ -93,6 +93,7 @@ const VideoSourceType = /*@__PURE__*/ proto2.makeEnum( "livekit.proto.VideoSourceType", [ {no: 0, name: "VIDEO_SOURCE_NATIVE"}, + {no: 1, name: "VIDEO_SOURCE_ENCODED"}, ], ); @@ -162,6 +163,7 @@ const NewVideoSourceRequest = /*@__PURE__*/ proto2.makeMessageType( { no: 1, name: "type", kind: "enum", T: proto2.getEnumType(VideoSourceType), req: true }, { no: 2, name: "resolution", kind: "message", T: VideoSourceResolution, req: true }, { no: 3, name: "is_screencast", kind: "scalar", T: 8 /* ScalarType.BOOL */, opt: true }, + { no: 4, name: "encoded_options", kind: "message", T: EncodedVideoSourceOptions, opt: true }, ], ); @@ -199,6 +201,35 @@ const CaptureVideoFrameResponse = /*@__PURE__*/ proto2.makeMessageType( [], ); +/** + * Push a pre-encoded (compressed) frame to an encoded VideoSource. + * The source must have been created with type == VIDEO_SOURCE_ENCODED. + * + * @generated from message livekit.proto.CaptureEncodedVideoFrameRequest + */ +const CaptureEncodedVideoFrameRequest = /*@__PURE__*/ proto2.makeMessageType( + "livekit.proto.CaptureEncodedVideoFrameRequest", + () => [ + { no: 1, name: "source_handle", kind: "scalar", T: 4 /* ScalarType.UINT64 */, req: true }, + { no: 2, name: "data", kind: "scalar", T: 12 /* ScalarType.BYTES */, req: true }, + { no: 3, name: "is_keyframe", kind: "scalar", T: 8 /* ScalarType.BOOL */, req: true }, + { no: 4, name: "has_sps_pps", kind: "scalar", T: 8 /* ScalarType.BOOL */, opt: true }, + { no: 5, name: "width", kind: "scalar", T: 13 /* ScalarType.UINT32 */, opt: true }, + { no: 6, name: "height", kind: "scalar", T: 13 /* ScalarType.UINT32 */, opt: true }, + { no: 7, name: "capture_time_us", kind: "scalar", T: 3 /* ScalarType.INT64 */, opt: true }, + ], +); + +/** + * @generated from message livekit.proto.CaptureEncodedVideoFrameResponse + */ +const CaptureEncodedVideoFrameResponse = /*@__PURE__*/ proto2.makeMessageType( + "livekit.proto.CaptureEncodedVideoFrameResponse", + () => [ + { no: 1, name: "accepted", kind: "scalar", T: 8 /* ScalarType.BOOL */, req: true }, + ], +); + /** * @generated from message livekit.proto.VideoConvertRequest */ @@ -356,6 +387,7 @@ const VideoSourceInfo = /*@__PURE__*/ proto2.makeMessageType( "livekit.proto.VideoSourceInfo", () => [ { no: 1, name: "type", kind: "enum", T: proto2.getEnumType(VideoSourceType), req: true }, + { no: 2, name: "encoded_source_id", kind: "scalar", T: 13 /* ScalarType.UINT32 */, opt: true }, ], ); @@ -370,6 +402,57 @@ const OwnedVideoSource = /*@__PURE__*/ proto2.makeMessageType( ], ); +/** + * Options for an encoded video source. One source carries a single encoded + * stream (one resolution, one codec). To simulcast, create multiple sources + * and publish them on separate tracks. + * + * @generated from message livekit.proto.EncodedVideoSourceOptions + */ +const EncodedVideoSourceOptions = /*@__PURE__*/ proto2.makeMessageType( + "livekit.proto.EncodedVideoSourceOptions", + () => [ + { no: 1, name: "codec", kind: "enum", T: proto2.getEnumType(VideoCodec), req: true }, + ], +); + +/** + * Encoder-side feedback for an encoded video source. Emitted as FfiEvents + * so client SDKs can react (request a fresh keyframe from their encoder, + * adjust target bitrate, etc.). + * + * @generated from message livekit.proto.EncodedVideoSourceEvent + */ +const EncodedVideoSourceEvent = /*@__PURE__*/ proto2.makeMessageType( + "livekit.proto.EncodedVideoSourceEvent", + () => [ + { no: 1, name: "source_handle", kind: "scalar", T: 4 /* ScalarType.UINT64 */, req: true }, + { no: 2, name: "keyframe_requested", kind: "message", T: EncodedVideoSourceEvent_KeyframeRequested, oneof: "message" }, + { no: 3, name: "target_bitrate_changed", kind: "message", T: EncodedVideoSourceEvent_TargetBitrateChanged, oneof: "message" }, + ], +); + +/** + * @generated from message livekit.proto.EncodedVideoSourceEvent.KeyframeRequested + */ +const EncodedVideoSourceEvent_KeyframeRequested = /*@__PURE__*/ proto2.makeMessageType( + "livekit.proto.EncodedVideoSourceEvent.KeyframeRequested", + [], + {localName: "EncodedVideoSourceEvent_KeyframeRequested"}, +); + +/** + * @generated from message livekit.proto.EncodedVideoSourceEvent.TargetBitrateChanged + */ +const EncodedVideoSourceEvent_TargetBitrateChanged = /*@__PURE__*/ proto2.makeMessageType( + "livekit.proto.EncodedVideoSourceEvent.TargetBitrateChanged", + () => [ + { no: 1, name: "bitrate_bps", kind: "scalar", T: 13 /* ScalarType.UINT32 */, req: true }, + { no: 2, name: "framerate_fps", kind: "scalar", T: 1 /* ScalarType.DOUBLE */, req: true }, + ], + {localName: "EncodedVideoSourceEvent_TargetBitrateChanged"}, +); + exports.VideoCodec = VideoCodec; exports.VideoRotation = VideoRotation; @@ -384,6 +467,8 @@ exports.NewVideoSourceRequest = NewVideoSourceRequest; exports.NewVideoSourceResponse = NewVideoSourceResponse; exports.CaptureVideoFrameRequest = CaptureVideoFrameRequest; exports.CaptureVideoFrameResponse = CaptureVideoFrameResponse; +exports.CaptureEncodedVideoFrameRequest = CaptureEncodedVideoFrameRequest; +exports.CaptureEncodedVideoFrameResponse = CaptureEncodedVideoFrameResponse; exports.VideoConvertRequest = VideoConvertRequest; exports.VideoConvertResponse = VideoConvertResponse; exports.VideoResolution = VideoResolution; @@ -399,3 +484,7 @@ exports.VideoStreamEOS = VideoStreamEOS; exports.VideoSourceResolution = VideoSourceResolution; exports.VideoSourceInfo = VideoSourceInfo; exports.OwnedVideoSource = OwnedVideoSource; +exports.EncodedVideoSourceOptions = EncodedVideoSourceOptions; +exports.EncodedVideoSourceEvent = EncodedVideoSourceEvent; +exports.EncodedVideoSourceEvent_KeyframeRequested = EncodedVideoSourceEvent_KeyframeRequested; +exports.EncodedVideoSourceEvent_TargetBitrateChanged = EncodedVideoSourceEvent_TargetBitrateChanged; From 9619309206722ccd80eb7fc9d901c89f1cfe409c Mon Sep 17 00:00:00 2001 From: Stephen DeRosa Date: Mon, 27 Apr 2026 08:59:18 -0600 Subject: [PATCH 10/15] additional tests --- libwebrtc/src/native/encoded_video_source.rs | 59 +++++- livekit/src/video_ingest/demux.rs | 98 +++++++++- livekit/src/video_ingest/encoded_tcp.rs | 182 ++++++++++++++++--- livekit/src/video_ingest/keyframe.rs | 38 +++- 4 files changed, 333 insertions(+), 44 deletions(-) diff --git a/libwebrtc/src/native/encoded_video_source.rs b/libwebrtc/src/native/encoded_video_source.rs index 0677decf0..f1d5f897d 100644 --- a/libwebrtc/src/native/encoded_video_source.rs +++ b/libwebrtc/src/native/encoded_video_source.rs @@ -95,10 +95,7 @@ impl NativeEncodedVideoSource { resolution.width, resolution.height, ); - Self { - sys_handle, - inner: Arc::new(Inner { resolution: Mutex::new(resolution) }), - } + Self { sys_handle, inner: Arc::new(Inner { resolution: Mutex::new(resolution) }) } } /// Unique non-zero id assigned to this source. Exposed for debugging / @@ -140,9 +137,9 @@ impl NativeEncodedVideoSource { /// Register an observer for encoder-side feedback. The previous observer /// (if any) is dropped. pub fn set_observer(&self, observer: Arc) { - let wrapper = Box::new(sys_evs::EncodedVideoSourceWrapper::new(Arc::new( - ObserverBridge { inner: observer }, - ))); + let wrapper = Box::new(sys_evs::EncodedVideoSourceWrapper::new(Arc::new(ObserverBridge { + inner: observer, + }))); self.sys_handle.set_observer(wrapper); } @@ -166,3 +163,51 @@ impl sys_evs::EncodedVideoSourceObserver for ObserverBridge { self.inner.on_target_bitrate(bitrate_bps, framerate_fps); } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn encoded_source_reports_codec_and_updates_resolution_from_frames() { + let source = NativeEncodedVideoSource::new( + VideoCodec::Av1, + VideoResolution { width: 640, height: 360 }, + ); + + assert_ne!(source.source_id(), 0); + assert_eq!(source.codec(), VideoCodec::Av1); + assert_eq!(source.video_resolution().width, 640); + assert_eq!(source.video_resolution().height, 360); + + let info = EncodedFrameInfo { + is_keyframe: true, + width: 1280, + height: 720, + capture_time_us: 123_456, + ..Default::default() + }; + + assert!(source.capture_frame(&[0x0A, 0x00], &info)); + assert_eq!(source.video_resolution().width, 1280); + assert_eq!(source.video_resolution().height, 720); + } + + #[test] + fn encoded_source_prefers_buffered_keyframe_over_incoming_delta_when_full() { + let source = NativeEncodedVideoSource::new( + VideoCodec::H264, + VideoResolution { width: 640, height: 360 }, + ); + let keyframe = + EncodedFrameInfo { is_keyframe: true, width: 640, height: 360, ..Default::default() }; + let delta = EncodedFrameInfo { width: 640, height: 360, ..Default::default() }; + + assert!(source.capture_frame(&[0, 0, 0, 1, 0x65], &keyframe)); + for _ in 0..7 { + assert!(source.capture_frame(&[0, 0, 0, 1, 0x41], &delta)); + } + + assert!(!source.capture_frame(&[0, 0, 0, 1, 0x41], &delta)); + } +} diff --git a/livekit/src/video_ingest/demux.rs b/livekit/src/video_ingest/demux.rs index 52ee85c88..0eb2d1b0d 100644 --- a/livekit/src/video_ingest/demux.rs +++ b/livekit/src/video_ingest/demux.rs @@ -135,8 +135,8 @@ impl IvfReader { if self.buf.len() < 12 { return; } - let size = u32::from_le_bytes([self.buf[0], self.buf[1], self.buf[2], self.buf[3]]) - as usize; + let size = + u32::from_le_bytes([self.buf[0], self.buf[1], self.buf[2], self.buf[3]]) as usize; if size == 0 || size > MAX_FRAME_BYTES { log::warn!( "ivf: implausible frame_size={size} bytes — byte stream is misaligned. \ @@ -248,6 +248,15 @@ mod tests { rec } + fn make_dkif_header(fourcc: &[u8; 4]) -> Vec { + let mut bytes = Vec::new(); + bytes.extend_from_slice(b"DKIF"); + bytes.extend_from_slice(&[0; 4]); + bytes.extend_from_slice(fourcc); + bytes.extend_from_slice(&[0; 20]); + bytes + } + #[test] fn ivf_without_dkif_emits_frames() { let mut r = IvfReader::new(VideoCodec::Vp8); @@ -266,17 +275,29 @@ mod tests { fn ivf_with_dkif_skips_header() { let mut r = IvfReader::new(VideoCodec::Vp8); let mut bytes = Vec::new(); - // 32-byte DKIF header - bytes.extend_from_slice(b"DKIF"); - bytes.extend_from_slice(&[0; 4]); - bytes.extend_from_slice(b"VP80"); - bytes.extend_from_slice(&[0; 20]); + bytes.extend_from_slice(&make_dkif_header(b"VP80")); bytes.extend_from_slice(&make_ivf_frame(3, &[7, 8, 9])); let mut out = Vec::new(); r.feed(&bytes, &mut out); assert_eq!(out, vec![vec![7, 8, 9]]); } + #[test] + fn ivf_header_and_frame_can_arrive_across_reads() { + let mut r = IvfReader::new(VideoCodec::Vp8); + let mut bytes = make_dkif_header(b"VP80"); + bytes.extend_from_slice(&make_ivf_frame(4, &[1, 3, 5, 7])); + bytes.extend_from_slice(&make_ivf_frame(2, &[8, 13])); + + let mut out = Vec::new(); + for chunk in bytes.chunks(5) { + r.feed(chunk, &mut out); + } + + assert_eq!(out, vec![vec![1, 3, 5, 7], vec![8, 13]]); + assert!(!r.desynced); + } + #[test] fn ivf_absurd_size_triggers_desync() { let mut r = IvfReader::new(VideoCodec::Vp8); @@ -290,6 +311,33 @@ mod tests { assert!(r.desynced); } + #[test] + fn ivf_zero_size_triggers_desync_and_drops_buffered_bytes() { + let mut r = IvfReader::new(VideoCodec::Vp9); + let mut bytes = make_ivf_frame(0, &[]); + bytes.extend_from_slice(&make_ivf_frame(3, &[1, 2, 3])); + + let mut out = Vec::new(); + r.feed(&bytes, &mut out); + + assert!(out.is_empty()); + assert!(r.desynced); + } + + #[test] + fn ivf_frame_header_can_arrive_across_reads_without_dkif() { + let mut r = IvfReader::new(VideoCodec::Av1); + let bytes = make_ivf_frame(5, &[0x0A, 0x00, 0x22, 0x00, 0x55]); + + let mut out = Vec::new(); + for chunk in bytes.chunks(2) { + r.feed(chunk, &mut out); + } + + assert_eq!(out, vec![vec![0x0A, 0x00, 0x22, 0x00, 0x55]]); + assert!(!r.desynced); + } + #[test] fn au_splitter_emits_completed_aus() { let mut s = AuSplitter::new(VideoCodec::H264); @@ -310,4 +358,40 @@ mod tests { assert_eq!(&out[0][..5], &[0, 0, 0, 1, 0x09]); assert!(out[0].windows(5).any(|w| w == [0, 0, 0, 1, 0x65])); } + + #[test] + fn au_splitter_discards_prefix_and_handles_split_start_codes() { + let mut s = AuSplitter::new(VideoCodec::H264); + let mut out = Vec::new(); + + s.feed(&[0xAA, 0xBB, 0x00, 0x00], &mut out); + s.feed(&[0x00], &mut out); + s.feed(&[0x01, 0x09, 0xF0, 0x00, 0x00, 0x01, 0x65, 0x88], &mut out); + assert!(out.is_empty()); + + s.feed(&[0x00, 0x00], &mut out); + s.feed(&[0x00, 0x01, 0x09, 0xF0, 0x00, 0x00, 0x01, 0x41, 0x9A], &mut out); + + assert_eq!(out.len(), 1); + assert_eq!(&out[0][..5], &[0, 0, 0, 1, 0x09]); + assert!(out[0].windows(5).any(|w| w == [0, 0, 1, 0x65, 0x88])); + assert!(!out[0].starts_with(&[0xAA, 0xBB])); + } + + #[test] + fn au_splitter_handles_h265_aud_boundaries() { + let mut s = AuSplitter::new(VideoCodec::H265); + let mut out = Vec::new(); + + // H.265 AUD NAL type 35 => first header byte is (35 << 1) = 0x46. + // IDR_W_RADL NAL type 19 => first header byte is (19 << 1) = 0x26. + s.feed(&[0, 0, 1, 0x46, 0x01, 0x50, 0, 0, 1, 0x26, 0x01, 0x88], &mut out); + assert!(out.is_empty()); + + s.feed(&[0, 0, 0, 1, 0x46, 0x01, 0x50, 0, 0, 1, 0x02, 0x01], &mut out); + + assert_eq!(out.len(), 1); + assert_eq!(&out[0][..5], &[0, 0, 1, 0x46, 0x01]); + assert!(out[0].windows(5).any(|w| w == [0, 0, 1, 0x26, 0x01])); + } } diff --git a/livekit/src/video_ingest/encoded_tcp.rs b/livekit/src/video_ingest/encoded_tcp.rs index ccc1bb19d..5a82abced 100644 --- a/livekit/src/video_ingest/encoded_tcp.rs +++ b/livekit/src/video_ingest/encoded_tcp.rs @@ -37,11 +37,9 @@ use std::{ time::Duration, }; -use libwebrtc::{ - video_source::{ - native::{EncodedVideoSourceObserver, NativeEncodedVideoSource}, - EncodedFrameInfo, RtcVideoSource, VideoCodec, VideoResolution, - }, +use libwebrtc::video_source::{ + native::{EncodedVideoSourceObserver, NativeEncodedVideoSource}, + EncodedFrameInfo, RtcVideoSource, VideoCodec, VideoResolution, }; use livekit_runtime::JoinHandle; use parking_lot::Mutex; @@ -192,16 +190,7 @@ impl EncodedTcpIngest { participant: LocalParticipant, options: EncodedTcpIngestOptions, ) -> RoomResult { - if options.width == 0 || options.height == 0 { - return Err(RoomError::Internal( - "EncodedTcpIngest: width and height must be non-zero".to_string(), - )); - } - if options.port == 0 { - return Err(RoomError::Internal( - "EncodedTcpIngest: port must be non-zero".to_string(), - )); - } + validate_options(&options)?; let resolution = VideoResolution { width: options.width, height: options.height }; let source = NativeEncodedVideoSource::new(options.codec, resolution); @@ -222,23 +211,11 @@ impl EncodedTcpIngest { RtcVideoSource::Encoded(source.clone()), ); - let mut publish_opts = TrackPublishOptions { - source: options.track_source, - simulcast: false, - ..Default::default() - }; - if let Some(max_bitrate) = options.max_bitrate_bps { - publish_opts.video_encoding = Some(VideoEncoding { - max_bitrate, - max_framerate: options.max_framerate_fps, - }); - } + let publish_opts = build_publish_options(&options); // video_codec is force-pinned to match the encoded source by // LocalParticipant::publish_track, so we leave the default. - participant - .publish_track(LocalTrack::Video(track.clone()), publish_opts) - .await?; + participant.publish_track(LocalTrack::Video(track.clone()), publish_opts).await?; log::info!("EncodedTcpIngest: published track '{}' ({:?})", track_name, options.codec); let inner = Arc::new(Inner { @@ -323,6 +300,31 @@ impl Drop for EncodedTcpIngest { } } +fn validate_options(options: &EncodedTcpIngestOptions) -> RoomResult<()> { + if options.width == 0 || options.height == 0 { + return Err(RoomError::Internal( + "EncodedTcpIngest: width and height must be non-zero".to_string(), + )); + } + if options.port == 0 { + return Err(RoomError::Internal("EncodedTcpIngest: port must be non-zero".to_string())); + } + Ok(()) +} + +fn build_publish_options(options: &EncodedTcpIngestOptions) -> TrackPublishOptions { + let mut publish_opts = TrackPublishOptions { + source: options.track_source, + simulcast: false, + ..Default::default() + }; + if let Some(max_bitrate) = options.max_bitrate_bps { + publish_opts.video_encoding = + Some(VideoEncoding { max_bitrate, max_framerate: options.max_framerate_fps }); + } + publish_opts +} + fn default_track_name(codec: VideoCodec) -> &'static str { match codec { VideoCodec::H264 => "encoded-h264", @@ -481,3 +483,125 @@ async fn sleep_interruptible(stop: &AtomicBool, dur: Duration) -> bool { } true } + +#[cfg(test)] +mod tests { + use std::{ + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }, + time::{Duration, Instant}, + }; + + use libwebrtc::video_source::VideoCodec; + + use super::*; + use crate::{prelude::TrackSource, RoomError}; + + #[test] + fn options_new_sets_network_and_track_defaults() { + let options = EncodedTcpIngestOptions::new(5004, VideoCodec::H264, 1920, 1080); + + assert_eq!(options.host, "127.0.0.1"); + assert_eq!(options.port, 5004); + assert_eq!(options.codec, VideoCodec::H264); + assert_eq!(options.width, 1920); + assert_eq!(options.height, 1080); + assert_eq!(options.track_name, None); + assert_eq!(options.track_source, TrackSource::Camera); + assert_eq!(options.max_bitrate_bps, None); + assert_eq!(options.max_framerate_fps, 30.0); + assert_eq!(options.reconnect_backoff, Duration::from_secs(1)); + assert!(options.unpublish_on_stop); + } + + #[test] + fn validate_options_rejects_invalid_dimensions_before_publish() { + let mut options = EncodedTcpIngestOptions::new(5004, VideoCodec::Vp8, 0, 720); + + let err = validate_options(&options).expect_err("zero width should be rejected"); + assert!( + matches!(err, RoomError::Internal(message) if message.contains("width and height")) + ); + + options.width = 1280; + options.height = 0; + let err = validate_options(&options).expect_err("zero height should be rejected"); + assert!( + matches!(err, RoomError::Internal(message) if message.contains("width and height")) + ); + } + + #[test] + fn validate_options_rejects_zero_port_before_publish() { + let options = EncodedTcpIngestOptions::new(0, VideoCodec::Av1, 1280, 720); + + let err = validate_options(&options).expect_err("zero port should be rejected"); + assert!(matches!(err, RoomError::Internal(message) if message.contains("port"))); + } + + #[test] + fn build_publish_options_disables_simulcast_and_preserves_source() { + let mut options = EncodedTcpIngestOptions::new(5004, VideoCodec::H265, 1280, 720); + options.track_source = TrackSource::Screenshare; + + let publish_options = build_publish_options(&options); + + assert_eq!(publish_options.source, TrackSource::Screenshare); + assert!(!publish_options.simulcast); + assert!(publish_options.video_encoding.is_none()); + } + + #[test] + fn build_publish_options_uses_explicit_bitrate_pair() { + let mut options = EncodedTcpIngestOptions::new(5004, VideoCodec::Vp9, 1280, 720); + options.max_bitrate_bps = Some(2_500_000); + options.max_framerate_fps = 24.0; + + let publish_options = build_publish_options(&options); + let encoding = publish_options.video_encoding.expect("encoding should be set"); + + assert_eq!(encoding.max_bitrate, 2_500_000); + assert_eq!(encoding.max_framerate, 24.0); + assert!(!publish_options.simulcast); + } + + #[test] + fn default_track_names_cover_all_ingest_codecs() { + assert_eq!(default_track_name(VideoCodec::H264), "encoded-h264"); + assert_eq!(default_track_name(VideoCodec::H265), "encoded-h265"); + assert_eq!(default_track_name(VideoCodec::Vp8), "encoded-vp8"); + assert_eq!(default_track_name(VideoCodec::Vp9), "encoded-vp9"); + assert_eq!(default_track_name(VideoCodec::Av1), "encoded-av1"); + } + + #[tokio::test] + async fn sleep_interruptible_returns_false_when_stop_already_set() { + let stop = AtomicBool::new(true); + + assert!(!sleep_interruptible(&stop, Duration::from_secs(60)).await); + } + + #[tokio::test] + async fn sleep_interruptible_wakes_soon_after_stop_is_set() { + let stop = Arc::new(AtomicBool::new(false)); + let setter = { + let stop = stop.clone(); + tokio::spawn(async move { + tokio::time::sleep(Duration::from_millis(10)).await; + stop.store(true, Ordering::Release); + }) + }; + + let start = Instant::now(); + let slept = sleep_interruptible(&stop, Duration::from_secs(5)).await; + setter.await.expect("stop setter should complete"); + + assert!(!slept); + assert!( + start.elapsed() < Duration::from_secs(1), + "sleep should be interrupted instead of waiting for the full backoff" + ); + } +} diff --git a/livekit/src/video_ingest/keyframe.rs b/livekit/src/video_ingest/keyframe.rs index 5fe1a375c..57343f299 100644 --- a/livekit/src/video_ingest/keyframe.rs +++ b/livekit/src/video_ingest/keyframe.rs @@ -83,7 +83,8 @@ fn is_keyframe_annex_b(codec: VideoCodec, data: &[u8]) -> bool { let is_three = data[i] == 0 && data[i + 1] == 0 && data[i + 2] == 1; if is_four || is_three { let payload_idx = if is_four { i + 4 } else { i + 3 }; - if payload_idx < data.len() && is_keyframe_nal(codec, nal_type(codec, data[payload_idx])) + if payload_idx < data.len() + && is_keyframe_nal(codec, nal_type(codec, data[payload_idx])) { return true; } @@ -209,6 +210,13 @@ mod tests { assert!(is_keyframe(VideoCodec::H265, &data)); } + #[test] + fn h265_vps_is_not_keyframe() { + // H.265 VPS NAL type 32 announces stream metadata but is not an IRAP picture. + let data = [0x00, 0x00, 0x01, 0x40, 0x01]; + assert!(!is_keyframe(VideoCodec::H265, &data)); + } + #[test] fn vp8_keyframe_bit_zero() { let kf = [0x00_u8]; @@ -232,6 +240,20 @@ mod tests { assert!(!is_keyframe(VideoCodec::Vp9, &data)); } + #[test] + fn vp9_profile3_uses_shifted_frame_type_bits() { + // frame_marker=10, profile=3, reserved=0, show_existing=0, frame_type=0 + // => 0b1011_0000 = 0xB0. + let keyframe = [0xB0_u8]; + // In profile 3, bit 1 is frame_type; bit 2 is show_existing_frame. + let interframe = [0xB2_u8]; + let show_existing = [0xB4_u8]; + + assert!(is_keyframe(VideoCodec::Vp9, &keyframe)); + assert!(!is_keyframe(VideoCodec::Vp9, &interframe)); + assert!(!is_keyframe(VideoCodec::Vp9, &show_existing)); + } + #[test] fn av1_sequence_header_obu_is_keyframe() { // obu_type=1 (SEQUENCE_HEADER) => byte 0 = (1 << 3) | 0b010 = 0x0A @@ -247,6 +269,20 @@ mod tests { assert!(!is_keyframe(VideoCodec::Av1, &data)); } + #[test] + fn av1_sequence_header_after_non_key_obu_is_keyframe() { + // TILE_GROUP OBU with two bytes of payload, followed by a SEQUENCE_HEADER OBU. + let data = [0x22, 0x02, 0xAA, 0xBB, 0x0A, 0x00]; + assert!(is_keyframe(VideoCodec::Av1, &data)); + } + + #[test] + fn av1_truncated_sized_obu_is_not_keyframe() { + // TILE_GROUP with a continued leb128 size byte but no terminating byte. + let data = [0x22, 0x80]; + assert!(!is_keyframe(VideoCodec::Av1, &data)); + } + #[test] fn av1_leb128_single_byte() { assert_eq!(read_leb128(&[0x00]), Some((0, 1))); From a41415ac6b98c8e9ff5920fed3445912e28b40da Mon Sep 17 00:00:00 2001 From: Stephen DeRosa Date: Tue, 28 Apr 2026 09:48:43 -0600 Subject: [PATCH 11/15] livekit-encoded-video-ingest: encoded video ingest as its own crate --- Cargo.lock | 14 +++ Cargo.toml | 2 + examples/pre_encoded_ingest/Cargo.toml | 5 +- examples/pre_encoded_ingest/README.md | 20 ++--- examples/pre_encoded_ingest/src/receiver.rs | 4 +- examples/pre_encoded_ingest/src/sender.rs | 13 ++- .../pre_encoded_ingest/src/simple_sender.rs | 15 ++-- libwebrtc/Cargo.toml | 1 + libwebrtc/src/lib.rs | 5 +- libwebrtc/src/native/encoded_video_source.rs | 4 +- libwebrtc/src/native/mod.rs | 1 + .../src/native/peer_connection_factory.rs | 11 +-- libwebrtc/src/peer_connection_factory.rs | 10 ++- libwebrtc/src/video_source.rs | 27 +++--- livekit-encoded-video-ingest/Cargo.toml | 16 ++++ .../src}/demux.rs | 0 .../src}/encoded_tcp.rs | 10 +-- .../src}/keyframe.rs | 2 +- .../src/lib.rs | 4 +- .../proto/video_frame_pb.d.ts | 4 +- .../proto/video_frame_pb.js | 2 +- livekit-ffi/Cargo.toml | 2 + livekit-ffi/protocol/encoded_tcp_ingest.proto | 4 +- livekit-ffi/protocol/video_frame.proto | 4 +- livekit-ffi/src/server/encoded_tcp_ingest.rs | 86 +++++++++++++------ livekit-ffi/src/server/requests.rs | 6 +- livekit-ffi/src/server/video_source.rs | 68 +++++++++------ livekit/Cargo.toml | 1 + livekit/src/lib.rs | 1 - livekit/src/room/options.rs | 1 + .../src/room/participant/local_participant.rs | 20 +++-- livekit/src/room/track/local_video_track.rs | 2 +- webrtc-sys/Cargo.toml | 1 + webrtc-sys/build.rs | 15 +++- .../livekit/passthrough_video_encoder.h | 2 +- .../include/livekit/peer_connection_factory.h | 4 + webrtc-sys/src/encoded_video_source.rs | 5 +- webrtc-sys/src/lib.rs | 1 + webrtc-sys/src/peer_connection_factory.cpp | 4 + webrtc-sys/src/peer_connection_factory.rs | 2 + webrtc-sys/src/video_encoder_factory.cpp | 7 ++ 41 files changed, 267 insertions(+), 139 deletions(-) create mode 100644 livekit-encoded-video-ingest/Cargo.toml rename {livekit/src/video_ingest => livekit-encoded-video-ingest/src}/demux.rs (100%) rename {livekit/src/video_ingest => livekit-encoded-video-ingest/src}/encoded_tcp.rs (98%) rename {livekit/src/video_ingest => livekit-encoded-video-ingest/src}/keyframe.rs (99%) rename livekit/src/video_ingest/mod.rs => livekit-encoded-video-ingest/src/lib.rs (87%) diff --git a/Cargo.lock b/Cargo.lock index 10770b9aa..f667f3fce 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4077,6 +4077,18 @@ dependencies = [ "tokio-stream", ] +[[package]] +name = "livekit-encoded-video-ingest" +version = "0.1.0" +dependencies = [ + "libwebrtc", + "livekit", + "livekit-runtime", + "log", + "parking_lot", + "tokio", +] + [[package]] name = "livekit-ffi" version = "0.12.53" @@ -4095,6 +4107,7 @@ dependencies = [ "link-cplusplus", "livekit", "livekit-api", + "livekit-encoded-video-ingest", "livekit-protocol", "log", "parking_lot", @@ -5847,6 +5860,7 @@ dependencies = [ "libwebrtc", "livekit", "livekit-api", + "livekit-encoded-video-ingest", "log", "tokio", ] diff --git a/Cargo.toml b/Cargo.toml index 6ce249acd..e92af633e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,6 +2,7 @@ resolver = "2" members = [ "livekit", + "livekit-encoded-video-ingest", "livekit-api", "livekit-protocol", "livekit-ffi", @@ -49,6 +50,7 @@ imgproc = { version = "0.3.19", path = "imgproc" } libwebrtc = { version = "0.3.30", path = "libwebrtc" } livekit = { version = "0.7.37", path = "livekit" } livekit-api = { version = "0.4.19", path = "livekit-api" } +livekit-encoded-video-ingest = { version = "0.1.0", path = "livekit-encoded-video-ingest" } livekit-ffi = { version = "0.12.53", path = "livekit-ffi" } livekit-datatrack = { version = "0.1.4", path = "livekit-datatrack" } livekit-protocol = { version = "0.7.5", path = "livekit-protocol" } diff --git a/examples/pre_encoded_ingest/Cargo.toml b/examples/pre_encoded_ingest/Cargo.toml index f966e3b65..8fd4dd123 100644 --- a/examples/pre_encoded_ingest/Cargo.toml +++ b/examples/pre_encoded_ingest/Cargo.toml @@ -21,8 +21,9 @@ anyhow = { workspace = true } clap = { workspace = true, features = ["derive"] } env_logger = { workspace = true } futures = { workspace = true } -libwebrtc = { workspace = true } -livekit = { workspace = true, features = ["rustls-tls-native-roots"] } +libwebrtc = { workspace = true, features = ["encoded-video"] } +livekit = { workspace = true, features = ["encoded-video", "rustls-tls-native-roots"] } livekit-api = { workspace = true, features = ["rustls-tls-native-roots"] } +livekit-encoded-video-ingest = { workspace = true } log = { workspace = true } tokio = { workspace = true, features = ["full"] } diff --git a/examples/pre_encoded_ingest/README.md b/examples/pre_encoded_ingest/README.md index 8865e1d5a..af57d2675 100644 --- a/examples/pre_encoded_ingest/README.md +++ b/examples/pre_encoded_ingest/README.md @@ -1,7 +1,7 @@ # pre_encoded_ingest -End-to-end demo of the **pre-encoded video ingest** feature of the Rust -SDK. Pre-encoded H.264, H.265, VP8, or AV1 frames flow from a gstreamer +End-to-end demo of the **encoded video ingest** feature of the Rust +SDK. Eencoded H.264, H.265, VP8, or AV1 frames flow from a gstreamer camera pipeline directly into `NativeEncodedVideoSource::capture_frame`, get packetized by WebRTC (no software re-encode), and arrive at a remote peer which writes decoded frames to a TCP port for a second @@ -10,7 +10,7 @@ gstreamer pipeline to render. ```text ┌────────────┐ encoded (TCP) ┌─────────────┐ RTP (WebRTC) ┌────────────┐ I420 (TCP) ┌─────────────┐ │ gstreamer │ ───────────► │ sender.rs │ ────────────────► │ receiver.rs│ ─────────────► │ gstreamer │ -│ (camera) │ :5005 │ (pre-encoded│ │ (decoded │ :5006 │ (display) │ +│ (camera) │ :5005 │ (encoded│ │ (decoded │ :5006 │ (display) │ │ tcpserver │ │ publish, │ │ output) │ │ │ │ │ │ tcp client)│ │ │ │ │ └────────────┘ └─────────────┘ └────────────┘ └─────────────┘ @@ -33,8 +33,8 @@ frames. The sender supports two wire framings, picked by `--codec`: This example ships **two** senders that publish the same stream; pick whichever one better matches your integration shape: -- **`simple_sender`** — uses the built-in SDK helper - [`livekit::video_ingest::EncodedTcpIngest`]. The helper owns the TCP +- **`simple_sender`** — uses the encoded video ingest helper + [`livekit_encoded_video_ingest::EncodedTcpIngest`]. The helper owns the TCP socket, demux, keyframe probe, reconnect loop, and track publish/unpublish. Applications only supply config — port, codec, width, height — and an optional [`EncodedIngestObserver`] for @@ -52,7 +52,7 @@ is the drop-in replacement used in all examples below. ```rust use libwebrtc::video_source::VideoCodec; -use livekit::video_ingest::{EncodedTcpIngest, EncodedTcpIngestOptions}; +use livekit_encoded_video_ingest::{EncodedTcpIngest, EncodedTcpIngestOptions}; let options = EncodedTcpIngestOptions::new( /* port */ 5005, @@ -71,7 +71,7 @@ stats polling, Ctrl-C shutdown). ## What this exercises - `libwebrtc::video_source::NativeEncodedVideoSource` — the - pre-encoded video track source, for `VideoCodec::H264`, + Encoded video track source, for `VideoCodec::H264`, `VideoCodec::H265`, `VideoCodec::Vp8`, and `VideoCodec::Av1`. - Annex-B bytestream ingest (H.264/H.265), with automatic parameter-set caching and keyframe prepending done by the source @@ -426,7 +426,7 @@ RUST_LOG=info cargo run -p pre_encoded_ingest --bin simple_sender -- \ --tcp-host 127.0.0.1 --tcp-port 5005 \ --width 640 --height 480 \ --codec h264 \ - --room pre-encoded-demo --identity encoded-sender + --room encoded-video-demo --identity encoded-sender ``` Or the hand-rolled reference (`--bin sender`) with the same flags — @@ -458,7 +458,7 @@ restarted, the sender reconnects automatically. ```bash RUST_LOG=info cargo run -p pre_encoded_ingest --bin receiver -- \ --tcp-port 5006 \ - --room pre-encoded-demo --identity encoded-receiver \ + --room encoded-video-demo --identity encoded-receiver \ --from encoded-sender ``` @@ -598,7 +598,7 @@ macOS-to-macOS should decode cleanly. `CodecArg::Vp9` still exists in `sender.rs` (and `NativeEncodedVideoSource` accepts `VideoCodec::Vp9`), but VP9 ingest is not exercised by this demo and has rough edges that make it a poor -fit for a "pre-encoded bytes straight to RTP" path: +fit for a "Eencoded bytes straight to RTP" path: - libvpx-vp9 emits **superframes** in IVF (a per-frame record can bundle several coded frames — e.g. a show_existing_frame reshow diff --git a/examples/pre_encoded_ingest/src/receiver.rs b/examples/pre_encoded_ingest/src/receiver.rs index 6b47e8299..4c50463a7 100644 --- a/examples/pre_encoded_ingest/src/receiver.rs +++ b/examples/pre_encoded_ingest/src/receiver.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! Pre-encoded H.264 ingest receiver. +//! Encoded (compressed) H.264 ingest receiver. //! //! Subscribes to a LiveKit room and forwards the first incoming video track //! as tightly-packed I420 frames over a TCP connection. A gstreamer @@ -64,7 +64,7 @@ struct Args { api_secret: Option, /// Room name to join - #[arg(long, default_value = "pre-encoded-demo")] + #[arg(long, default_value = "encoded-video-demo")] room: String, /// Participant identity diff --git a/examples/pre_encoded_ingest/src/sender.rs b/examples/pre_encoded_ingest/src/sender.rs index 1087606ca..50b37f4f7 100644 --- a/examples/pre_encoded_ingest/src/sender.rs +++ b/examples/pre_encoded_ingest/src/sender.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! Pre-encoded H.264 / H.265 / VP8 / VP9 / AV1 ingest sender. +//! Encoded H.264 / H.265 / VP8 / VP9 / AV1 ingest sender. //! //! Connects to a gstreamer pipeline as a TCP client and pushes each //! decoded access unit / frame straight through @@ -73,7 +73,7 @@ struct Args { api_secret: Option, /// Room name to join - #[arg(long, default_value = "pre-encoded-demo")] + #[arg(long, default_value = "encoded-video-demo")] room: String, /// Participant identity @@ -96,7 +96,7 @@ struct Args { #[arg(long, default_value_t = 480)] height: u32, - /// Pre-encoded codec on the wire. Must match the gstreamer pipeline. + /// Encoded codec on the wire. Must match the gstreamer pipeline. #[arg(long, value_enum, default_value_t = CodecArg::H264)] codec: CodecArg, } @@ -350,8 +350,8 @@ impl IvfReader { if self.buf.len() < 12 { return; } - let size = u32::from_le_bytes([self.buf[0], self.buf[1], self.buf[2], self.buf[3]]) - as usize; + let size = + u32::from_le_bytes([self.buf[0], self.buf[1], self.buf[2], self.buf[3]]) as usize; if size == 0 || size > MAX_FRAME_BYTES { warn!( "IVF: implausible frame_size={size} bytes — byte stream is misaligned. \ @@ -579,8 +579,7 @@ fn is_keyframe_annex_b(codec: CodecArg, data: &[u8]) -> bool { let is_three = data[i] == 0 && data[i + 1] == 0 && data[i + 2] == 1; if is_four || is_three { let payload_idx = if is_four { i + 4 } else { i + 3 }; - if payload_idx < data.len() - && codec.is_keyframe_nal(codec.nal_type(data[payload_idx])) + if payload_idx < data.len() && codec.is_keyframe_nal(codec.nal_type(data[payload_idx])) { return true; } diff --git a/examples/pre_encoded_ingest/src/simple_sender.rs b/examples/pre_encoded_ingest/src/simple_sender.rs index f4000109c..ba8ebc442 100644 --- a/examples/pre_encoded_ingest/src/simple_sender.rs +++ b/examples/pre_encoded_ingest/src/simple_sender.rs @@ -12,7 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! Minimal pre-encoded ingest driver using [`livekit::video_ingest::EncodedTcpIngest`]. +//! Minimal encoded (compressed) ingest driver using +//! [`livekit_encoded_video_ingest::EncodedTcpIngest`]. //! //! Everything that was hand-rolled in `sender.rs` (demuxing, keyframe //! detection, reconnect loop, observer plumbing) now lives inside the @@ -24,11 +25,11 @@ use std::{env, net::SocketAddr, sync::Arc, time::Duration}; use anyhow::{Context, Result}; use clap::Parser; use libwebrtc::video_source::VideoCodec; -use livekit::{ - prelude::*, - video_ingest::{EncodedIngestObserver, EncodedTcpIngest, EncodedTcpIngestOptions}, -}; +use livekit::prelude::*; use livekit_api::access_token; +use livekit_encoded_video_ingest::{ + EncodedIngestObserver, EncodedTcpIngest, EncodedTcpIngestOptions, +}; use log::{info, warn}; use tokio::time::sleep; @@ -48,7 +49,7 @@ struct Args { api_secret: Option, /// Room name to join - #[arg(long, default_value = "pre-encoded-demo")] + #[arg(long, default_value = "encoded-video-demo")] room: String, /// Participant identity @@ -71,7 +72,7 @@ struct Args { #[arg(long, default_value_t = 480)] height: u32, - /// Pre-encoded codec on the wire. Must match the gstreamer pipeline. + /// Encoded (compressed) codec on the wire. Must match the gstreamer pipeline. #[arg(long, value_enum, default_value_t = CodecArg::H264)] codec: CodecArg, diff --git a/libwebrtc/Cargo.toml b/libwebrtc/Cargo.toml index aab4ff181..5f2c689f7 100644 --- a/libwebrtc/Cargo.toml +++ b/libwebrtc/Cargo.toml @@ -14,6 +14,7 @@ default = [ "glib-main-loop" ] # event loop running in your application, for example if you are using the # GTK or GStreamer Rust bindings, disable this feature. glib-main-loop = [ "dep:glib" ] +encoded-video = [ "webrtc-sys/encoded-video" ] [dependencies] livekit-protocol = { workspace = true } diff --git a/libwebrtc/src/lib.rs b/libwebrtc/src/lib.rs index 0f060c219..d11d52c3b 100644 --- a/libwebrtc/src/lib.rs +++ b/libwebrtc/src/lib.rs @@ -68,9 +68,10 @@ pub mod video_track; pub mod native { pub use webrtc_sys::webrtc::ffi::create_random_uuid; + #[cfg(feature = "encoded-video")] + pub use crate::imp::encoded_video_source; pub use crate::imp::{ - apm, audio_mixer, audio_resampler, encoded_video_source, frame_cryptor, packet_trailer, - yuv_helper, + apm, audio_mixer, audio_resampler, frame_cryptor, packet_trailer, yuv_helper, }; } diff --git a/libwebrtc/src/native/encoded_video_source.rs b/libwebrtc/src/native/encoded_video_source.rs index f1d5f897d..a2367c27b 100644 --- a/libwebrtc/src/native/encoded_video_source.rs +++ b/libwebrtc/src/native/encoded_video_source.rs @@ -66,7 +66,7 @@ struct Inner { resolution: Mutex, } -/// A video source that accepts pre-encoded compressed frames (H.264, H.265, +/// A video source that accepts encoded compressed frames (H.264, H.265, /// VP8, VP9, AV1) instead of raw pixels. WebRTC's encoder is bypassed for /// tracks bound to this source — frames flow straight from `capture_frame` /// into RTP packetization and congestion control. @@ -112,7 +112,7 @@ impl NativeEncodedVideoSource { self.inner.resolution.lock().clone() } - /// Push a pre-encoded frame to the track. Returns `true` if the frame was + /// Push an encoded (compressed) frame to the track. Returns `true` if the frame was /// accepted, `false` if the internal queue was full and the frame had to /// be dropped. pub fn capture_frame(&self, data: &[u8], info: &EncodedFrameInfo) -> bool { diff --git a/libwebrtc/src/native/mod.rs b/libwebrtc/src/native/mod.rs index b253dae15..87a41e364 100644 --- a/libwebrtc/src/native/mod.rs +++ b/libwebrtc/src/native/mod.rs @@ -23,6 +23,7 @@ pub mod audio_track; pub mod data_channel; #[cfg(any(target_os = "macos", target_os = "windows", target_os = "linux"))] pub mod desktop_capturer; +#[cfg(feature = "encoded-video")] pub mod encoded_video_source; pub mod frame_cryptor; pub mod ice_candidate; diff --git a/libwebrtc/src/native/peer_connection_factory.rs b/libwebrtc/src/native/peer_connection_factory.rs index f87f4ef78..60f3e38fc 100644 --- a/libwebrtc/src/native/peer_connection_factory.rs +++ b/libwebrtc/src/native/peer_connection_factory.rs @@ -19,6 +19,8 @@ use lazy_static::lazy_static; use parking_lot::Mutex; use webrtc_sys::{peer_connection_factory as sys_pcf, rtc_error as sys_err, webrtc as sys_rtc}; +#[cfg(feature = "encoded-video")] +use crate::video_source::native::NativeEncodedVideoSource; use crate::{ audio_source::native::NativeAudioSource, audio_track::RtcAudioTrack, @@ -26,7 +28,7 @@ use crate::{ peer_connection::PeerConnection, peer_connection_factory::RtcConfiguration, rtp_parameters::RtpCapabilities, - video_source::native::{NativeEncodedVideoSource, NativeVideoSource}, + video_source::native::NativeVideoSource, video_track::RtcVideoTrack, MediaType, RtcError, }; @@ -81,6 +83,7 @@ impl PeerConnectionFactory { } } + #[cfg(feature = "encoded-video")] pub fn create_video_track_from_encoded_source( &self, label: &str, @@ -88,10 +91,8 @@ impl PeerConnectionFactory { ) -> RtcVideoTrack { RtcVideoTrack { handle: imp_vt::RtcVideoTrack::new( - self.sys_handle.create_video_track_from_encoded_source( - label.to_string(), - source.sys_handle(), - ), + self.sys_handle + .create_video_track_from_encoded_source(label.to_string(), source.sys_handle()), ), } } diff --git a/libwebrtc/src/peer_connection_factory.rs b/libwebrtc/src/peer_connection_factory.rs index ebd6f4af2..bccbf7156 100644 --- a/libwebrtc/src/peer_connection_factory.rs +++ b/libwebrtc/src/peer_connection_factory.rs @@ -86,15 +86,16 @@ impl PeerConnectionFactory { pub mod native { use super::PeerConnectionFactory; + #[cfg(feature = "encoded-video")] + use crate::video_source::native::NativeEncodedVideoSource; use crate::{ - audio_source::native::NativeAudioSource, - audio_track::RtcAudioTrack, - video_source::native::{NativeEncodedVideoSource, NativeVideoSource}, - video_track::RtcVideoTrack, + audio_source::native::NativeAudioSource, audio_track::RtcAudioTrack, + video_source::native::NativeVideoSource, video_track::RtcVideoTrack, }; pub trait PeerConnectionFactoryExt { fn create_video_track(&self, label: &str, source: NativeVideoSource) -> RtcVideoTrack; + #[cfg(feature = "encoded-video")] fn create_video_track_from_encoded_source( &self, label: &str, @@ -108,6 +109,7 @@ pub mod native { self.handle.create_video_track(label, source) } + #[cfg(feature = "encoded-video")] fn create_video_track_from_encoded_source( &self, label: &str, diff --git a/libwebrtc/src/video_source.rs b/libwebrtc/src/video_source.rs index 4c31ee291..e4f99f957 100644 --- a/libwebrtc/src/video_source.rs +++ b/libwebrtc/src/video_source.rs @@ -29,7 +29,8 @@ impl Default for VideoResolution { } } -/// Codec used by a pre-encoded video feed. +/// Codec used by an encoded video feed. +#[cfg(feature = "encoded-video")] #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum VideoCodec { H264, @@ -39,8 +40,9 @@ pub enum VideoCodec { Av1, } -/// Metadata describing a single pre-encoded video frame pushed to an +/// Metadata describing a single encoded video frame pushed to an /// [`native::NativeEncodedVideoSource`]. +#[cfg(feature = "encoded-video")] #[derive(Debug, Copy, Clone)] pub struct EncodedFrameInfo { /// True when this frame is an IDR / keyframe. @@ -54,15 +56,10 @@ pub struct EncodedFrameInfo { pub capture_time_us: i64, } +#[cfg(feature = "encoded-video")] impl Default for EncodedFrameInfo { fn default() -> Self { - Self { - is_keyframe: false, - has_sps_pps: false, - width: 0, - height: 0, - capture_time_us: 0, - } + Self { is_keyframe: false, has_sps_pps: false, width: 0, height: 0, capture_time_us: 0 } } } @@ -72,11 +69,12 @@ pub enum RtcVideoSource { // TODO(theomonnom): Web video sources (eq. to tracks on browsers?) #[cfg(not(target_arch = "wasm32"))] Native(native::NativeVideoSource), - #[cfg(not(target_arch = "wasm32"))] + #[cfg(all(not(target_arch = "wasm32"), feature = "encoded-video"))] Encoded(native::NativeEncodedVideoSource), } // TODO(theomonnom): Support enum dispatch with conditional compilation? +#[cfg(all(not(target_arch = "wasm32"), feature = "encoded-video"))] impl RtcVideoSource { enum_dispatch!( [Native, Encoded]; @@ -84,10 +82,19 @@ impl RtcVideoSource { ); } +#[cfg(all(not(target_arch = "wasm32"), not(feature = "encoded-video")))] +impl RtcVideoSource { + enum_dispatch!( + [Native]; + pub fn video_resolution(self: &Self) -> VideoResolution; + ); +} + #[cfg(not(target_arch = "wasm32"))] pub mod native { use std::fmt::{Debug, Formatter}; + #[cfg(feature = "encoded-video")] pub use crate::native::encoded_video_source::{ EncodedVideoSourceObserver, NativeEncodedVideoSource, }; diff --git a/livekit-encoded-video-ingest/Cargo.toml b/livekit-encoded-video-ingest/Cargo.toml new file mode 100644 index 000000000..ff4be9969 --- /dev/null +++ b/livekit-encoded-video-ingest/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "livekit-encoded-video-ingest" +version = "0.1.0" +edition.workspace = true +license.workspace = true +description = "Encoded video ingest helpers for LiveKit" +repository.workspace = true +publish = false + +[dependencies] +libwebrtc = { workspace = true, features = ["encoded-video"] } +livekit = { workspace = true, features = ["encoded-video"] } +livekit-runtime = { workspace = true } +log = { workspace = true } +parking_lot = { workspace = true } +tokio = { workspace = true, features = ["io-util", "net", "time"] } diff --git a/livekit/src/video_ingest/demux.rs b/livekit-encoded-video-ingest/src/demux.rs similarity index 100% rename from livekit/src/video_ingest/demux.rs rename to livekit-encoded-video-ingest/src/demux.rs diff --git a/livekit/src/video_ingest/encoded_tcp.rs b/livekit-encoded-video-ingest/src/encoded_tcp.rs similarity index 98% rename from livekit/src/video_ingest/encoded_tcp.rs rename to livekit-encoded-video-ingest/src/encoded_tcp.rs index 5a82abced..41baab657 100644 --- a/livekit/src/video_ingest/encoded_tcp.rs +++ b/livekit-encoded-video-ingest/src/encoded_tcp.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! High-level helper that ingests a pre-encoded video bytestream over TCP +//! High-level helper that ingests an encoded (compressed) video bytestream over TCP //! and publishes it to a LiveKit room as an encoded video track. //! //! The caller supplies the TCP endpoint, codec, and declared resolution. @@ -46,7 +46,7 @@ use parking_lot::Mutex; use tokio::{io::AsyncReadExt, net::TcpStream, time::sleep}; use super::{demux::Demuxer, keyframe::is_keyframe}; -use crate::{ +use livekit::{ options::{TrackPublishOptions, VideoEncoding}, participant::LocalParticipant, prelude::*, @@ -65,7 +65,7 @@ pub struct EncodedTcpIngestOptions { /// Port of the gstreamer `tcpserversink`. pub port: u16, - /// Pre-encoded codec on the wire. Must match the upstream encoder. + /// Encoded (compressed) codec on the wire. Must match the upstream encoder. pub codec: VideoCodec, /// Declared stream width (px). @@ -152,7 +152,7 @@ pub struct EncodedIngestStats { pub tcp_reconnects: u64, } -/// Ingests a pre-encoded video feed from a TCP socket and publishes it as +/// Ingests an encoded (compressed) video feed from a TCP socket and publishes it as /// an encoded LiveKit track. /// /// Create one with [`EncodedTcpIngest::start`], inspect it via @@ -495,9 +495,9 @@ mod tests { }; use libwebrtc::video_source::VideoCodec; + use livekit::{prelude::TrackSource, RoomError}; use super::*; - use crate::{prelude::TrackSource, RoomError}; #[test] fn options_new_sets_network_and_track_defaults() { diff --git a/livekit/src/video_ingest/keyframe.rs b/livekit-encoded-video-ingest/src/keyframe.rs similarity index 99% rename from livekit/src/video_ingest/keyframe.rs rename to livekit-encoded-video-ingest/src/keyframe.rs index 57343f299..099e4b4f2 100644 --- a/livekit/src/video_ingest/keyframe.rs +++ b/livekit-encoded-video-ingest/src/keyframe.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! Minimal keyframe detection for the five pre-encoded codecs supported by +//! Minimal keyframe detection for the five encoded (compressed) codecs supported by //! [`EncodedTcpIngest`](super::encoded_tcp::EncodedTcpIngest). //! //! These probes are intentionally conservative — they never scan deeper into diff --git a/livekit/src/video_ingest/mod.rs b/livekit-encoded-video-ingest/src/lib.rs similarity index 87% rename from livekit/src/video_ingest/mod.rs rename to livekit-encoded-video-ingest/src/lib.rs index a879e4058..54749c0c0 100644 --- a/livekit/src/video_ingest/mod.rs +++ b/livekit-encoded-video-ingest/src/lib.rs @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! High-level helpers for ingesting pre-encoded video into a LiveKit room. +//! High-level helpers for ingesting encoded (compressed) video into a LiveKit room. //! -//! This module hides the moving parts of pulling a pre-encoded bytestream +//! This module hides the moving parts of pulling an encoded (compressed) bytestream //! from a source (currently: TCP) and turning it into a published //! LiveKit track. Callers configure a small options struct and hand off a //! `Room`; the helper does the rest. diff --git a/livekit-ffi-node-bindings/proto/video_frame_pb.d.ts b/livekit-ffi-node-bindings/proto/video_frame_pb.d.ts index d02f9b375..06bb93451 100644 --- a/livekit-ffi-node-bindings/proto/video_frame_pb.d.ts +++ b/livekit-ffi-node-bindings/proto/video_frame_pb.d.ts @@ -170,7 +170,7 @@ export declare enum VideoSourceType { VIDEO_SOURCE_NATIVE = 0, /** - * A source that accepts pre-encoded compressed frames. WebRTC's internal + * A source that accepts encoded (compressed) frames. WebRTC's internal * encoder is bypassed for tracks bound to this source. * * @generated from enum value: VIDEO_SOURCE_ENCODED = 1; @@ -486,7 +486,7 @@ export declare class CaptureVideoFrameResponse extends Message>>, } +#[cfg(feature = "encoded-video")] impl FfiHandle for FfiEncodedTcpIngest {} /// Kicks off an async `EncodedTcpIngest::start` and returns the async id /// immediately. The result (or error) is dispatched as /// [`proto::NewEncodedTcpIngestCallback`]. +#[cfg(feature = "encoded-video")] pub fn create( server: &'static server::FfiServer, req: proto::NewEncodedTcpIngestRequest, @@ -80,10 +89,8 @@ pub fn create( ingest_handle: handle_id, })); - let info = proto::EncodedTcpIngestInfo { - track_sid: track_sid.to_string(), - track_name, - }; + let info = + proto::EncodedTcpIngestInfo { track_sid: track_sid.to_string(), track_name }; let ffi_ingest = FfiEncodedTcpIngest { handle_id, @@ -94,14 +101,12 @@ pub fn create( let _ = server.send_event( proto::NewEncodedTcpIngestCallback { async_id, - message: Some( - proto::new_encoded_tcp_ingest_callback::Message::Ingest( - proto::OwnedEncodedTcpIngest { - handle: proto::FfiOwnedHandle { id: handle_id }, - info, - }, - ), - ), + message: Some(proto::new_encoded_tcp_ingest_callback::Message::Ingest( + proto::OwnedEncodedTcpIngest { + handle: proto::FfiOwnedHandle { id: handle_id }, + info, + }, + )), } .into(), ); @@ -124,8 +129,17 @@ pub fn create( Ok(proto::NewEncodedTcpIngestResponse { async_id }) } +#[cfg(not(feature = "encoded-video"))] +pub fn create( + _server: &'static server::FfiServer, + _req: proto::NewEncodedTcpIngestRequest, +) -> FfiResult { + feature_disabled_error() +} + /// Stops a running ingest. Async because `EncodedTcpIngest::stop` awaits /// the background task and optionally unpublishes the track. +#[cfg(feature = "encoded-video")] pub fn stop( server: &'static server::FfiServer, req: proto::StopEncodedTcpIngestRequest, @@ -147,16 +161,23 @@ pub fn stop( } None => Some("EncodedTcpIngest: already stopped".to_string()), }; - let _ = server.send_event( - proto::StopEncodedTcpIngestCallback { async_id, error }.into(), - ); + let _ = server.send_event(proto::StopEncodedTcpIngestCallback { async_id, error }.into()); }); server.watch_panic(handle); Ok(proto::StopEncodedTcpIngestResponse { async_id }) } +#[cfg(not(feature = "encoded-video"))] +pub fn stop( + _server: &'static server::FfiServer, + _req: proto::StopEncodedTcpIngestRequest, +) -> FfiResult { + feature_disabled_error() +} + /// Pulls a stats snapshot synchronously. +#[cfg(feature = "encoded-video")] pub fn get_stats( server: &'static server::FfiServer, req: proto::GetEncodedTcpIngestStatsRequest, @@ -179,7 +200,23 @@ pub fn get_stats( }) } -fn options_from_proto(req: &proto::NewEncodedTcpIngestRequest) -> FfiResult { +#[cfg(not(feature = "encoded-video"))] +pub fn get_stats( + _server: &'static server::FfiServer, + _req: proto::GetEncodedTcpIngestStatsRequest, +) -> FfiResult { + feature_disabled_error() +} + +#[cfg(not(feature = "encoded-video"))] +fn feature_disabled_error() -> FfiResult { + Err(FfiError::InvalidRequest("Encoded video ingest support is not enabled".into())) +} + +#[cfg(feature = "encoded-video")] +fn options_from_proto( + req: &proto::NewEncodedTcpIngestRequest, +) -> FfiResult { let port = u16::try_from(req.port) .map_err(|_| FfiError::InvalidRequest("port must fit in u16".into()))?; let codec = video_codec_from_proto(req.codec()); @@ -206,6 +243,7 @@ fn options_from_proto(req: &proto::NewEncodedTcpIngestRequest) -> FfiResult livekit::webrtc::video_source::VideoCodec { use livekit::webrtc::video_source::VideoCodec; match codec { @@ -219,11 +257,13 @@ fn video_codec_from_proto(codec: proto::VideoCodec) -> livekit::webrtc::video_so /// Forwards ingest-level callbacks out to the FFI client as /// [`proto::EncodedTcpIngestEvent`]s. +#[cfg(feature = "encoded-video")] struct IngestObserverBridge { server: &'static server::FfiServer, ingest_handle: FfiHandleId, } +#[cfg(feature = "encoded-video")] impl IngestObserverBridge { fn emit(&self, message: proto::encoded_tcp_ingest_event::Message) { let _ = self.server.send_event( @@ -236,6 +276,7 @@ impl IngestObserverBridge { } } +#[cfg(feature = "encoded-video")] impl EncodedIngestObserver for IngestObserverBridge { fn on_connected(&self, peer: SocketAddr) { self.emit(proto::encoded_tcp_ingest_event::Message::Connected( @@ -257,10 +298,7 @@ impl EncodedIngestObserver for IngestObserverBridge { fn on_target_bitrate(&self, bitrate_bps: u32, framerate_fps: f64) { self.emit(proto::encoded_tcp_ingest_event::Message::TargetBitrateChanged( - proto::encoded_tcp_ingest_event::TargetBitrateChanged { - bitrate_bps, - framerate_fps, - }, + proto::encoded_tcp_ingest_event::TargetBitrateChanged { bitrate_bps, framerate_fps }, )); } } diff --git a/livekit-ffi/src/server/requests.rs b/livekit-ffi/src/server/requests.rs index 98f51f1b8..019a59fea 100644 --- a/livekit-ffi/src/server/requests.rs +++ b/livekit-ffi/src/server/requests.rs @@ -474,7 +474,7 @@ unsafe fn on_capture_video_frame( Ok(proto::CaptureVideoFrameResponse::default()) } -/// Push a pre-encoded frame to a VIDEO_SOURCE_ENCODED source. +/// Push an encoded (compressed) frame to a VIDEO_SOURCE_ENCODED source. fn on_capture_encoded_video_frame( server: &'static FfiServer, push: proto::CaptureEncodedVideoFrameRequest, @@ -1303,7 +1303,9 @@ pub fn handle_request( } Request::NewVideoSource(req) => on_new_video_source(server, req)?.into(), Request::CaptureVideoFrame(req) => unsafe { on_capture_video_frame(server, req)?.into() }, - Request::CaptureEncodedVideoFrame(req) => on_capture_encoded_video_frame(server, req)?.into(), + Request::CaptureEncodedVideoFrame(req) => { + on_capture_encoded_video_frame(server, req)?.into() + } Request::VideoConvert(req) => unsafe { on_video_convert(server, req)?.into() }, Request::NewAudioStream(req) => on_new_audio_stream(server, req)?.into(), Request::NewAudioSource(req) => on_new_audio_source(server, req)?.into(), diff --git a/livekit-ffi/src/server/video_source.rs b/livekit-ffi/src/server/video_source.rs index 57afd60c9..27f0cd9a1 100644 --- a/livekit-ffi/src/server/video_source.rs +++ b/livekit-ffi/src/server/video_source.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#[cfg(all(not(target_arch = "wasm32"), feature = "encoded-video"))] use std::sync::Arc; use livekit::webrtc::{ @@ -39,10 +40,8 @@ fn frame_metadata_from_proto(metadata: Option) -> Option livekit::webrtc::video_source::VideoCodec { +#[cfg(all(not(target_arch = "wasm32"), feature = "encoded-video"))] +fn video_codec_from_proto(codec: proto::VideoCodec) -> livekit::webrtc::video_source::VideoCodec { use livekit::webrtc::video_source::VideoCodec; match codec { proto::VideoCodec::H264 => VideoCodec::H264, @@ -55,35 +54,39 @@ fn video_codec_from_proto( /// Forwards encoder-side feedback from the native source out to the FFI /// client as `EncodedVideoSourceEvent`s. -#[cfg(not(target_arch = "wasm32"))] +#[cfg(all(not(target_arch = "wasm32"), feature = "encoded-video"))] struct EncodedObserverBridge { server: &'static server::FfiServer, source_handle: u64, } -#[cfg(not(target_arch = "wasm32"))] -impl livekit::webrtc::video_source::native::EncodedVideoSourceObserver - for EncodedObserverBridge -{ +#[cfg(all(not(target_arch = "wasm32"), feature = "encoded-video"))] +impl livekit::webrtc::video_source::native::EncodedVideoSourceObserver for EncodedObserverBridge { fn on_keyframe_requested(&self) { - let _ = self.server.send_event(proto::EncodedVideoSourceEvent { - source_handle: self.source_handle, - message: Some(proto::encoded_video_source_event::Message::KeyframeRequested( - proto::encoded_video_source_event::KeyframeRequested {}, - )), - }.into()); + let _ = self.server.send_event( + proto::EncodedVideoSourceEvent { + source_handle: self.source_handle, + message: Some(proto::encoded_video_source_event::Message::KeyframeRequested( + proto::encoded_video_source_event::KeyframeRequested {}, + )), + } + .into(), + ); } fn on_target_bitrate(&self, bitrate_bps: u32, framerate_fps: f64) { - let _ = self.server.send_event(proto::EncodedVideoSourceEvent { - source_handle: self.source_handle, - message: Some(proto::encoded_video_source_event::Message::TargetBitrateChanged( - proto::encoded_video_source_event::TargetBitrateChanged { - bitrate_bps, - framerate_fps, - }, - )), - }.into()); + let _ = self.server.send_event( + proto::EncodedVideoSourceEvent { + source_handle: self.source_handle, + message: Some(proto::encoded_video_source_event::Message::TargetBitrateChanged( + proto::encoded_video_source_event::TargetBitrateChanged { + bitrate_bps, + framerate_fps, + }, + )), + } + .into(), + ); } } @@ -105,7 +108,7 @@ impl FfiVideoSource { NativeVideoSource::new(new_source.resolution.into(), is_screencast); RtcVideoSource::Native(video_source) } - #[cfg(not(target_arch = "wasm32"))] + #[cfg(all(not(target_arch = "wasm32"), feature = "encoded-video"))] proto::VideoSourceType::VideoSourceEncoded => { use livekit::webrtc::video_source::{ native::NativeEncodedVideoSource, VideoResolution, @@ -131,6 +134,12 @@ impl FfiVideoSource { RtcVideoSource::Encoded(source) } + #[cfg(any(target_arch = "wasm32", not(feature = "encoded-video")))] + proto::VideoSourceType::VideoSourceEncoded => { + return Err(FfiError::InvalidRequest( + "Encoded video source support is not enabled".into(), + )); + } _ => return Err(FfiError::InvalidRequest("unsupported video source type".into())), }; @@ -147,7 +156,7 @@ impl FfiVideoSource { /// Returns the unique 16-bit id assigned to an encoded source by the /// WebRTC layer. `None` for non-encoded sources. pub fn encoded_source_id(&self) -> Option { - #[cfg(not(target_arch = "wasm32"))] + #[cfg(all(not(target_arch = "wasm32"), feature = "encoded-video"))] if let RtcVideoSource::Encoded(ref source) = self.source { return Some(source.source_id()); } @@ -172,7 +181,7 @@ impl FfiVideoSource { source.capture_frame(&frame); } - #[cfg(not(target_arch = "wasm32"))] + #[cfg(all(not(target_arch = "wasm32"), feature = "encoded-video"))] RtcVideoSource::Encoded(_) => { return Err(FfiError::InvalidRequest( "capture_video_frame is not supported for encoded sources; \ @@ -190,8 +199,11 @@ impl FfiVideoSource { _server: &'static server::FfiServer, capture: proto::CaptureEncodedVideoFrameRequest, ) -> FfiResult { + #[cfg(any(target_arch = "wasm32", not(feature = "encoded-video")))] + let _ = &capture; + match self.source { - #[cfg(not(target_arch = "wasm32"))] + #[cfg(all(not(target_arch = "wasm32"), feature = "encoded-video"))] RtcVideoSource::Encoded(ref source) => { use livekit::webrtc::video_source::EncodedFrameInfo; diff --git a/livekit/Cargo.toml b/livekit/Cargo.toml index e1d34183e..51962e822 100644 --- a/livekit/Cargo.toml +++ b/livekit/Cargo.toml @@ -13,6 +13,7 @@ default = ["tokio"] async = ["livekit-api/signal-client-async"] tokio = ["livekit-api/signal-client-tokio"] dispatcher = ["livekit-api/signal-client-dispatcher"] +encoded-video = ["libwebrtc/encoded-video"] # Note that the following features only change the behavior of tokio-tungstenite. diff --git a/livekit/src/lib.rs b/livekit/src/lib.rs index 55cf101c1..00abe5de8 100644 --- a/livekit/src/lib.rs +++ b/livekit/src/lib.rs @@ -16,7 +16,6 @@ mod plugin; pub mod proto; mod room; mod rtc_engine; -pub mod video_ingest; pub mod webrtc { pub use libwebrtc::*; diff --git a/livekit/src/room/options.rs b/livekit/src/room/options.rs index 5d9095c2d..675f2832d 100644 --- a/livekit/src/room/options.rs +++ b/livekit/src/room/options.rs @@ -38,6 +38,7 @@ impl VideoCodec { } } +#[cfg(feature = "encoded-video")] impl From for VideoCodec { fn from(codec: libwebrtc::video_source::VideoCodec) -> Self { match codec { diff --git a/livekit/src/room/participant/local_participant.rs b/livekit/src/room/participant/local_participant.rs index 53a36a22b..eebdf8528 100644 --- a/livekit/src/room/participant/local_participant.rs +++ b/livekit/src/room/participant/local_participant.rs @@ -302,34 +302,36 @@ impl LocalParticipant { track: LocalTrack, options: TrackPublishOptions, ) -> RoomResult { - // Encoded video sources deliver pre-encoded single-layer frames. + // Encoded video sources deliver encoded single-layer frames. // Force-disable simulcast and pin the negotiated codec to the // source's codec so WebRTC's encoder factory picks our passthrough // encoder path. + #[cfg(all(not(target_arch = "wasm32"), feature = "encoded-video"))] let options = { - let mut options = options; + let mut publish_options = options; if let LocalTrack::Video(ref video_track) = track { - #[cfg(not(target_arch = "wasm32"))] if let RtcVideoSource::Encoded(ref encoded_source) = video_track.rtc_source() { let source_codec: options::VideoCodec = encoded_source.codec().into(); - if options.video_codec != source_codec { + if publish_options.video_codec != source_codec { log::warn!( "publish_track: overriding video_codec {:?} -> {:?} to match encoded source", - options.video_codec, + publish_options.video_codec, source_codec ); - options.video_codec = source_codec; + publish_options.video_codec = source_codec; } - if options.simulcast { + if publish_options.simulcast { log::warn!( "publish_track: disabling simulcast for encoded video source (single-layer only)" ); - options.simulcast = false; + publish_options.simulcast = false; } } } - options + publish_options }; + #[cfg(any(target_arch = "wasm32", not(feature = "encoded-video")))] + let options = options; let disable_red = self.local.encryption_type != EncryptionType::None || !options.red; let mut req = proto::AddTrackRequest { diff --git a/livekit/src/room/track/local_video_track.rs b/livekit/src/room/track/local_video_track.rs index f71ec139a..a40a2a2b2 100644 --- a/livekit/src/room/track/local_video_track.rs +++ b/livekit/src/room/track/local_video_track.rs @@ -61,7 +61,7 @@ impl LocalVideoTrack { .pc_factory() .create_video_track(&libwebrtc::native::create_random_uuid(), native_source) } - #[cfg(not(target_arch = "wasm32"))] + #[cfg(all(not(target_arch = "wasm32"), feature = "encoded-video"))] RtcVideoSource::Encoded(encoded_source) => { use libwebrtc::peer_connection_factory::native::PeerConnectionFactoryExt; LkRuntime::instance().pc_factory().create_video_track_from_encoded_source( diff --git a/webrtc-sys/Cargo.toml b/webrtc-sys/Cargo.toml index d1bbd8bdc..e44681d7b 100644 --- a/webrtc-sys/Cargo.toml +++ b/webrtc-sys/Cargo.toml @@ -9,6 +9,7 @@ repository.workspace = true [features] default = [] +encoded-video = [] [dependencies] cxx = "1.0" diff --git a/webrtc-sys/build.rs b/webrtc-sys/build.rs index d0e8a40f2..feed895be 100644 --- a/webrtc-sys/build.rs +++ b/webrtc-sys/build.rs @@ -24,6 +24,7 @@ fn main() { let target_os = env::var("CARGO_CFG_TARGET_OS").unwrap(); let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap(); let is_desktop = target_os == "linux" || target_os == "windows" || target_os == "macos"; + let encoded_video = env::var("CARGO_FEATURE_ENCODED_VIDEO").is_ok(); println!("cargo:rerun-if-env-changed=LK_DEBUG_WEBRTC"); println!("cargo:rerun-if-env-changed=LK_CUSTOM_WEBRTC"); @@ -55,9 +56,12 @@ fn main() { "src/apm.rs", "src/audio_mixer.rs", "src/packet_trailer.rs", - "src/encoded_video_source.rs", ]; + if encoded_video { + rust_files.push("src/encoded_video_source.rs"); + } + if is_desktop { rust_files.push("src/desktop_capturer.rs"); } @@ -92,10 +96,15 @@ fn main() { "src/apm.cpp", "src/audio_mixer.cpp", "src/packet_trailer.cpp", - "src/encoded_video_source.cpp", - "src/passthrough_video_encoder.cpp", ]); + if encoded_video { + builder + .file("src/encoded_video_source.cpp") + .file("src/passthrough_video_encoder.cpp") + .define("LK_PRE_ENCODED_VIDEO", "1"); + } + if is_desktop { builder.file("src/desktop_capturer.cpp"); } diff --git a/webrtc-sys/include/livekit/passthrough_video_encoder.h b/webrtc-sys/include/livekit/passthrough_video_encoder.h index 882040cf0..179947209 100644 --- a/webrtc-sys/include/livekit/passthrough_video_encoder.h +++ b/webrtc-sys/include/livekit/passthrough_video_encoder.h @@ -31,7 +31,7 @@ namespace livekit_ffi { -// Encoder that takes pre-encoded bitstream bytes from a paired +// Encoder that takes encoded bitstream bytes from a paired // EncodedVideoTrackSource and forwards them unmodified to the // EncodedImageCallback. Used for applications that already produce H.264 / // H.265 / VP8 / VP9 / AV1 bitstreams (e.g. from a hardware capturer or a diff --git a/webrtc-sys/include/livekit/peer_connection_factory.h b/webrtc-sys/include/livekit/peer_connection_factory.h index 62598e72e..e0edf4ef4 100644 --- a/webrtc-sys/include/livekit/peer_connection_factory.h +++ b/webrtc-sys/include/livekit/peer_connection_factory.h @@ -29,7 +29,9 @@ namespace livekit_ffi { class PeerConnectionFactory; class PeerConnectionObserverWrapper; +#ifdef LK_PRE_ENCODED_VIDEO class EncodedVideoTrackSource; +#endif } // namespace livekit_ffi #include "webrtc-sys/src/peer_connection_factory.rs.h" @@ -54,9 +56,11 @@ class PeerConnectionFactory { rust::String label, std::shared_ptr source) const; +#ifdef LK_PRE_ENCODED_VIDEO std::shared_ptr create_video_track_from_encoded_source( rust::String label, std::shared_ptr source) const; +#endif std::shared_ptr create_audio_track( rust::String label, diff --git a/webrtc-sys/src/encoded_video_source.rs b/webrtc-sys/src/encoded_video_source.rs index ca449766d..82b6ecbd4 100644 --- a/webrtc-sys/src/encoded_video_source.rs +++ b/webrtc-sys/src/encoded_video_source.rs @@ -52,10 +52,7 @@ pub mod ffi { capture_time_us: i64, ) -> bool; - fn set_observer( - self: &EncodedVideoTrackSource, - observer: Box, - ); + fn set_observer(self: &EncodedVideoTrackSource, observer: Box); } extern "Rust" { diff --git a/webrtc-sys/src/lib.rs b/webrtc-sys/src/lib.rs index 67b63ea70..e23b63400 100644 --- a/webrtc-sys/src/lib.rs +++ b/webrtc-sys/src/lib.rs @@ -22,6 +22,7 @@ pub mod candidate; pub mod data_channel; #[cfg(any(target_os = "macos", target_os = "windows", target_os = "linux"))] pub mod desktop_capturer; +#[cfg(feature = "encoded-video")] pub mod encoded_video_source; pub mod frame_cryptor; pub mod helper; diff --git a/webrtc-sys/src/peer_connection_factory.cpp b/webrtc-sys/src/peer_connection_factory.cpp index 68c94131b..a63b5a90f 100644 --- a/webrtc-sys/src/peer_connection_factory.cpp +++ b/webrtc-sys/src/peer_connection_factory.cpp @@ -36,7 +36,9 @@ #include "livekit/peer_connection.h" #include "livekit/rtc_error.h" #include "livekit/rtp_parameters.h" +#ifdef LK_PRE_ENCODED_VIDEO #include "livekit/encoded_video_source.h" +#endif #include "livekit/video_decoder_factory.h" #include "livekit/video_encoder_factory.h" #include "livekit/webrtc.h" @@ -117,6 +119,7 @@ std::shared_ptr PeerConnectionFactory::create_video_track( peer_factory_->CreateVideoTrack(source->get(), label.c_str()))); } +#ifdef LK_PRE_ENCODED_VIDEO std::shared_ptr PeerConnectionFactory::create_video_track_from_encoded_source( rust::String label, @@ -125,6 +128,7 @@ PeerConnectionFactory::create_video_track_from_encoded_source( rtc_runtime_->get_or_create_media_stream_track( peer_factory_->CreateVideoTrack(source->get(), label.c_str()))); } +#endif std::shared_ptr PeerConnectionFactory::create_audio_track( rust::String label, diff --git a/webrtc-sys/src/peer_connection_factory.rs b/webrtc-sys/src/peer_connection_factory.rs index c84494838..00550dae6 100644 --- a/webrtc-sys/src/peer_connection_factory.rs +++ b/webrtc-sys/src/peer_connection_factory.rs @@ -58,6 +58,7 @@ pub mod ffi { type IceGatheringState = crate::peer_connection::ffi::IceGatheringState; type AudioTrackSource = crate::audio_track::ffi::AudioTrackSource; type VideoTrackSource = crate::video_track::ffi::VideoTrackSource; + #[cfg(feature = "encoded-video")] type EncodedVideoTrackSource = crate::encoded_video_source::ffi::EncodedVideoTrackSource; type RtpCapabilities = crate::rtp_parameters::ffi::RtpCapabilities; type AudioTrack = crate::audio_track::ffi::AudioTrack; @@ -102,6 +103,7 @@ pub mod ffi { source: SharedPtr, ) -> SharedPtr; + #[cfg(feature = "encoded-video")] fn create_video_track_from_encoded_source( self: &PeerConnectionFactory, label: String, diff --git a/webrtc-sys/src/video_encoder_factory.cpp b/webrtc-sys/src/video_encoder_factory.cpp index 45dab4cf6..8d9afa58b 100644 --- a/webrtc-sys/src/video_encoder_factory.cpp +++ b/webrtc-sys/src/video_encoder_factory.cpp @@ -21,7 +21,9 @@ #include "api/video_codecs/video_encoder.h" #include "api/video_codecs/video_encoder_factory_template.h" #include "livekit/objc_video_factory.h" +#ifdef LK_PRE_ENCODED_VIDEO #include "livekit/passthrough_video_encoder.h" +#endif #include "media/base/media_constants.h" #include "media/engine/simulcast_encoder_adapter.h" #include "rtc_base/logging.h" @@ -151,6 +153,7 @@ std::unique_ptr VideoEncoderFactory::Create( return nullptr; } +#ifdef LK_PRE_ENCODED_VIDEO // Wrap the real encoder construction in a lazy shim so we can branch // between passthrough and a real encoder based on the first VideoFrame's // id. The builder is called at most once and only for non-passthrough @@ -164,6 +167,10 @@ std::unique_ptr VideoEncoderFactory::Create( return std::make_unique(format, std::move(real_encoder_builder)); +#else + return std::make_unique( + env, internal_factory_.get(), nullptr, format); +#endif } } // namespace livekit_ffi From a594c526f888f208b116f3ab4c077eb2d994b929 Mon Sep 17 00:00:00 2001 From: Stephen DeRosa Date: Tue, 28 Apr 2026 10:19:46 -0600 Subject: [PATCH 12/15] pre_encoded_ingest -> encoded_video_ingest --- examples/{pre_encoded_ingest => encoded_video_ingest}/Cargo.toml | 0 examples/{pre_encoded_ingest => encoded_video_ingest}/README.md | 0 .../{pre_encoded_ingest => encoded_video_ingest}/src/receiver.rs | 0 .../{pre_encoded_ingest => encoded_video_ingest}/src/sender.rs | 0 .../src/simple_sender.rs | 0 5 files changed, 0 insertions(+), 0 deletions(-) rename examples/{pre_encoded_ingest => encoded_video_ingest}/Cargo.toml (100%) rename examples/{pre_encoded_ingest => encoded_video_ingest}/README.md (100%) rename examples/{pre_encoded_ingest => encoded_video_ingest}/src/receiver.rs (100%) rename examples/{pre_encoded_ingest => encoded_video_ingest}/src/sender.rs (100%) rename examples/{pre_encoded_ingest => encoded_video_ingest}/src/simple_sender.rs (100%) diff --git a/examples/pre_encoded_ingest/Cargo.toml b/examples/encoded_video_ingest/Cargo.toml similarity index 100% rename from examples/pre_encoded_ingest/Cargo.toml rename to examples/encoded_video_ingest/Cargo.toml diff --git a/examples/pre_encoded_ingest/README.md b/examples/encoded_video_ingest/README.md similarity index 100% rename from examples/pre_encoded_ingest/README.md rename to examples/encoded_video_ingest/README.md diff --git a/examples/pre_encoded_ingest/src/receiver.rs b/examples/encoded_video_ingest/src/receiver.rs similarity index 100% rename from examples/pre_encoded_ingest/src/receiver.rs rename to examples/encoded_video_ingest/src/receiver.rs diff --git a/examples/pre_encoded_ingest/src/sender.rs b/examples/encoded_video_ingest/src/sender.rs similarity index 100% rename from examples/pre_encoded_ingest/src/sender.rs rename to examples/encoded_video_ingest/src/sender.rs diff --git a/examples/pre_encoded_ingest/src/simple_sender.rs b/examples/encoded_video_ingest/src/simple_sender.rs similarity index 100% rename from examples/pre_encoded_ingest/src/simple_sender.rs rename to examples/encoded_video_ingest/src/simple_sender.rs From 043ad3c6bf4a8797f7cc197303100c8310540326 Mon Sep 17 00:00:00 2001 From: Stephen DeRosa Date: Tue, 28 Apr 2026 12:26:49 -0600 Subject: [PATCH 13/15] consumer does viz --- Cargo.lock | 36 +- Cargo.toml | 2 +- examples/encoded_video_ingest/Cargo.toml | 8 +- examples/encoded_video_ingest/README.md | 79 +- examples/encoded_video_ingest/src/receiver.rs | 717 ++++++++++-------- examples/encoded_video_ingest/src/sender.rs | 30 +- .../encoded_video_ingest/src/simple_sender.rs | 33 +- .../src/encoded_tcp.rs | 8 +- webrtc-sys/build.rs | 9 + webrtc-sys/src/peer_connection_factory.rs | 4 +- 10 files changed, 487 insertions(+), 439 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f667f3fce..b75f8a6f2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2151,6 +2151,26 @@ dependencies = [ "serde", ] +[[package]] +name = "encoded_video_ingest" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "eframe", + "egui", + "egui-wgpu", + "env_logger 0.11.10", + "futures", + "libwebrtc", + "livekit", + "livekit-api", + "livekit-encoded-video-ingest", + "log", + "parking_lot", + "tokio", +] + [[package]] name = "encoding_rs" version = "0.8.35" @@ -5849,22 +5869,6 @@ dependencies = [ "zerocopy", ] -[[package]] -name = "pre_encoded_ingest" -version = "0.1.0" -dependencies = [ - "anyhow", - "clap", - "env_logger 0.11.10", - "futures", - "libwebrtc", - "livekit", - "livekit-api", - "livekit-encoded-video-ingest", - "log", - "tokio", -] - [[package]] name = "presser" version = "0.3.1" diff --git a/Cargo.toml b/Cargo.toml index e92af633e..ca7acb070 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,7 +29,7 @@ members = [ "examples/local_video", "examples/mobile", "examples/play_from_disk", - "examples/pre_encoded_ingest", + "examples/encoded_video_ingest", "examples/rpc", "examples/save_to_disk", "examples/screensharing", diff --git a/examples/encoded_video_ingest/Cargo.toml b/examples/encoded_video_ingest/Cargo.toml index 8fd4dd123..5d49b3ec9 100644 --- a/examples/encoded_video_ingest/Cargo.toml +++ b/examples/encoded_video_ingest/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "pre_encoded_ingest" +name = "encoded_video_ingest" version = "0.1.0" edition.workspace = true publish = false @@ -18,7 +18,10 @@ path = "src/simple_sender.rs" [dependencies] anyhow = { workspace = true } -clap = { workspace = true, features = ["derive"] } +clap = { workspace = true, features = ["derive", "env"] } +eframe = { workspace = true, features = ["default_fonts", "wgpu"] } +egui = { workspace = true } +egui-wgpu = { workspace = true } env_logger = { workspace = true } futures = { workspace = true } libwebrtc = { workspace = true, features = ["encoded-video"] } @@ -26,4 +29,5 @@ livekit = { workspace = true, features = ["encoded-video", "rustls-tls-native-ro livekit-api = { workspace = true, features = ["rustls-tls-native-roots"] } livekit-encoded-video-ingest = { workspace = true } log = { workspace = true } +parking_lot = { workspace = true } tokio = { workspace = true, features = ["full"] } diff --git a/examples/encoded_video_ingest/README.md b/examples/encoded_video_ingest/README.md index af57d2675..fd9d97288 100644 --- a/examples/encoded_video_ingest/README.md +++ b/examples/encoded_video_ingest/README.md @@ -1,19 +1,18 @@ -# pre_encoded_ingest +# encoded_video_ingest End-to-end demo of the **encoded video ingest** feature of the Rust -SDK. Eencoded H.264, H.265, VP8, or AV1 frames flow from a gstreamer +SDK. Encoded H.264, H.265, VP8, or AV1 frames flow from a gstreamer camera pipeline directly into `NativeEncodedVideoSource::capture_frame`, get packetized by WebRTC (no software re-encode), and arrive at a -remote peer which writes decoded frames to a TCP port for a second -gstreamer pipeline to render. +remote peer which decodes and renders them directly in a WGPU window. ```text -┌────────────┐ encoded (TCP) ┌─────────────┐ RTP (WebRTC) ┌────────────┐ I420 (TCP) ┌─────────────┐ -│ gstreamer │ ───────────► │ sender.rs │ ────────────────► │ receiver.rs│ ─────────────► │ gstreamer │ -│ (camera) │ :5005 │ (encoded│ │ (decoded │ :5006 │ (display) │ -│ tcpserver │ │ publish, │ │ output) │ │ │ -│ │ │ tcp client)│ │ │ │ │ -└────────────┘ └─────────────┘ └────────────┘ └─────────────┘ +┌────────────┐ encoded (TCP) ┌─────────────┐ RTP (WebRTC) ┌─────────────┐ +│ gstreamer │ ───────────► │ sender.rs │ ────────────────► │ receiver.rs │ +│ (camera) │ :5005 │ (encoded │ │ (decoded │ +│ tcpserver │ │ publish, │ │ WGPU │ +│ │ │ tcp client)│ │ display) │ +└────────────┘ └─────────────┘ └─────────────┘ ``` Gstreamer produces the encoded bytestream as a TCP server on :5005; the @@ -109,10 +108,9 @@ binds `port=5005` for a quick local check. That is only for this camera-validation hop. In the [full LiveKit demo](#running-the-livekit-demo) below, **port 5005** is reserved for -**TCP** from the camera pipeline into `sender` (Annex-B bytestream), -and **port 5006** is where `receiver` serves **decoded I420** to a -separate GStreamer visualizer — different protocol, different payload, -and no overlap with this UDP/RTP smoke test. +**TCP** from the camera pipeline into `sender` (Annex-B bytestream). +The `receiver` renders the subscribed LiveKit track directly and does +not need a second GStreamer pipeline. ### Send — camera → RTP/UDP 5005 @@ -422,7 +420,7 @@ does not. The sender handles both. Use `simple_sender` (SDK helper, recommended): ```bash -RUST_LOG=info cargo run -p pre_encoded_ingest --bin simple_sender -- \ +RUST_LOG=info cargo run -p encoded_video_ingest --bin simple_sender -- \ --tcp-host 127.0.0.1 --tcp-port 5005 \ --width 640 --height 480 \ --codec h264 \ @@ -456,39 +454,15 @@ restarted, the sender reconnects automatically. ### 3. Start the receiver (Terminal 3) ```bash -RUST_LOG=info cargo run -p pre_encoded_ingest --bin receiver -- \ - --tcp-port 5006 \ +RUST_LOG=info cargo run -p encoded_video_ingest --bin receiver -- \ --room encoded-video-demo --identity encoded-receiver \ --from encoded-sender ``` -The receiver subscribes to the room and waits for a TCP client on the -given port. Each decoded I420 frame is written tightly packed -(Y ‖ U ‖ V, no row padding, no framing header) on the socket. - -### 4. Visualize (Terminal 4) - -```bash -gst-launch-1.0 -v \ - tcpclientsrc host=127.0.0.1 port=5006 ! \ - rawvideoparse width=640 height=480 format=i420 framerate=30/1 ! \ - videoconvert ! autovideosink sync=false -``` - -`rawvideoparse` needs the exact width/height the receiver is producing. -If the publisher is at 640x480, use `width=640 height=480` here. -Framerate just drives gstreamer's display pacing — the Rust side -writes frames as fast as WebRTC delivers them. - -> The receiver's TCP output is **raw I420**, not H.264. Do **not** -> pipe it through `h264parse` — you will see -> `h264parse: No valid frames found before end of stream` / -> `Broken bit stream` because the bytes are Y/U/V planes, not NAL -> units. Use `rawvideoparse` as shown above. - -If the publisher resolution changes mid-run, the receiver closes the -TCP connection; reconnect your gstreamer visualizer to pick up the -new caps. +The receiver subscribes to the room and renders the first matching +remote video track directly in a native WGPU window. The receive side +uses `NativeVideoStream`, so the window displays decoded frames from +WebRTC's internal decoder rather than encoded packets. ## Troubleshooting @@ -530,12 +504,11 @@ AirPlay Receiver. Check with: lsof -nP -iTCP:5000 -sTCP:LISTEN ``` -**Visualizer shows `h264parse: No valid frames found` / `Broken bit -stream` / `No caps set`.** -The visualizer in step 4 is consuming the receiver's output -(port 5006), which is raw I420 — not H.264. Use `rawvideoparse` as -shown, not `h264parse`. `h264parse` belongs in step 1, on the -*sender* side. +**Receiver window opens but never shows video.** +Confirm `--from` matches the publisher identity exactly, or omit it to +render the first subscribed video track. The receiver logs +`Subscribed to ...` once it accepts a track, then `recv: ... fps` as +decoded frames arrive. **Nothing logs at all from the Rust binaries.** `sender`/`receiver` use `env_logger`; set `RUST_LOG=info` (as in the @@ -598,7 +571,7 @@ macOS-to-macOS should decode cleanly. `CodecArg::Vp9` still exists in `sender.rs` (and `NativeEncodedVideoSource` accepts `VideoCodec::Vp9`), but VP9 ingest is not exercised by this demo and has rough edges that make it a poor -fit for a "Eencoded bytes straight to RTP" path: +fit for a "Encoded bytes straight to RTP" path: - libvpx-vp9 emits **superframes** in IVF (a per-frame record can bundle several coded frames — e.g. a show_existing_frame reshow @@ -624,8 +597,8 @@ The feature added in this branch covers the **send** side: the producer hands encoded bytes in, WebRTC packetizes them out. On the **receive** side the SDK currently only exposes decoded frames via `NativeVideoStream`. That's why the receiver round-trips through -WebRTC's internal decoder and serves raw I420 to gstreamer, rather -than forwarding encoded H.264. +WebRTC's internal decoder and renders decoded frames, rather than +forwarding encoded H.264. Exposing encoded frames on receive would require a `RemoteEncodedVideoStream` analogue (likely backed by a WebRTC diff --git a/examples/encoded_video_ingest/src/receiver.rs b/examples/encoded_video_ingest/src/receiver.rs index 4c50463a7..2a67ff94b 100644 --- a/examples/encoded_video_ingest/src/receiver.rs +++ b/examples/encoded_video_ingest/src/receiver.rs @@ -12,11 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! Encoded (compressed) H.264 ingest receiver. +//! Encoded ingest receiver with an in-process WGPU visualizer. //! -//! Subscribes to a LiveKit room and forwards the first incoming video track -//! as tightly-packed I420 frames over a TCP connection. A gstreamer -//! pipeline on the other end renders them. +//! Subscribes to a LiveKit room and renders the first incoming video track +//! directly in an `egui`/`wgpu` window. //! //! NOTE: the current SDK only exposes *decoded* frames on the receive side //! (via `NativeVideoStream`). WebRTC's internal decoder runs in-process @@ -24,44 +23,42 @@ //! a future enhancement — see README.md. use std::{ - env, - net::SocketAddr, - sync::{ - atomic::{AtomicU64, Ordering}, - Arc, - }, + ops::DerefMut, + sync::Arc, time::{Duration, Instant}, }; -use anyhow::{Context, Result}; +use anyhow::{anyhow, Result}; use clap::Parser; +use eframe::Renderer; use futures::StreamExt; use livekit::{ prelude::*, - webrtc::{prelude::VideoBuffer, video_stream::native::NativeVideoStream}, + webrtc::{ + native::yuv_helper, + prelude::{RtcVideoTrack, VideoBuffer}, + video_stream::native::NativeVideoStream, + }, }; use livekit_api::access_token; use log::{info, warn}; -use tokio::{ - io::AsyncWriteExt, - net::{TcpListener, TcpStream}, - sync::{mpsc, watch}, -}; +use parking_lot::Mutex; +use tokio::sync::mpsc; -#[derive(Parser, Debug)] +#[derive(Parser, Debug, Clone)] #[command(author, version, about, long_about = None)] struct Args { /// LiveKit server URL (or set LIVEKIT_URL env var) - #[arg(long)] - url: Option, + #[arg(long, env = "LIVEKIT_URL")] + url: String, /// LiveKit API key (or set LIVEKIT_API_KEY env var) - #[arg(long)] - api_key: Option, + #[arg(long, env = "LIVEKIT_API_KEY")] + api_key: String, /// LiveKit API secret (or set LIVEKIT_API_SECRET env var) - #[arg(long)] - api_secret: Option, + #[arg(long, env = "LIVEKIT_API_SECRET")] + api_secret: String, /// Room name to join #[arg(long, default_value = "encoded-video-demo")] @@ -71,44 +68,214 @@ struct Args { #[arg(long, default_value = "encoded-receiver")] identity: String, - /// TCP port to serve tightly-packed I420 frames on - #[arg(long, default_value_t = 5001)] - tcp_port: u16, - /// Only subscribe to the track from this participant identity #[arg(long)] from: Option, } -#[tokio::main] -async fn main() -> Result<()> { +fn main() -> Result<()> { env_logger::init(); let args = Args::parse(); - let (shutdown_tx, shutdown_rx) = watch::channel(false); - tokio::spawn({ - let shutdown_tx = shutdown_tx.clone(); - async move { - let _ = tokio::signal::ctrl_c().await; - let _ = shutdown_tx.send(true); - info!("Ctrl-C received, shutting down..."); + eframe::run_native( + "LiveKit Encoded Video Receiver", + eframe::NativeOptions { centered: true, renderer: Renderer::Wgpu, ..Default::default() }, + Box::new(|cc| Ok(Box::new(ReceiverApp::new(cc, args)))), + ) + .map_err(|err| anyhow!("receiver UI failed: {err}"))?; + + Ok(()) +} + +enum UiEvent { + Connected { room: Arc, sid: RoomSid }, + ConnectFailed { error: String }, + RoomEvent { event: RoomEvent }, +} + +struct ReceiverApp { + async_runtime: tokio::runtime::Runtime, + ui_rx: mpsc::UnboundedReceiver, + room: Option>, + render_state: egui_wgpu::RenderState, + renderer: Option, + active_sid: Option, + active_label: Option, + from: Option, + status: String, +} + +impl ReceiverApp { + fn new(cc: &eframe::CreationContext<'_>, args: Args) -> Self { + let async_runtime = + tokio::runtime::Builder::new_multi_thread().enable_all().build().unwrap(); + let (ui_tx, ui_rx) = mpsc::unbounded_channel(); + async_runtime.spawn(connect_task(args.clone(), ui_tx)); + + Self { + async_runtime, + ui_rx, + room: None, + render_state: cc.wgpu_render_state.clone().unwrap(), + renderer: None, + active_sid: None, + active_label: None, + from: args.from, + status: format!("Connecting to room '{}' as '{}'...", args.room, args.identity), + } + } + + fn event(&mut self, event: UiEvent) { + match event { + UiEvent::Connected { room, sid } => { + self.status = format!("Connected to room '{}' (sid {})", room.name(), sid); + self.room = Some(room); + } + UiEvent::ConnectFailed { error } => { + self.status = format!("Connection failed: {error}"); + } + UiEvent::RoomEvent { event } => self.room_event(event), + } + } + + fn room_event(&mut self, event: RoomEvent) { + match event { + RoomEvent::TrackSubscribed { track, publication, participant } => { + if let Some(from) = &self.from { + if participant.identity().as_str() != from { + return; + } + } + + let RemoteTrack::Video(video) = track else { + return; + }; + + if self.active_sid.is_some() { + info!( + "Ignoring extra video track {} (already have one active)", + publication.sid() + ); + return; + } + + let sid = publication.sid(); + let label = format!( + "{} from '{}': codec={}, {}x{}", + sid, + participant.identity(), + publication.mime_type(), + publication.dimension().0, + publication.dimension().1, + ); + + info!("Subscribed to {label}"); + self.renderer = Some(VideoRenderer::new( + self.async_runtime.handle(), + self.render_state.clone(), + video.rtc_track(), + )); + self.active_sid = Some(sid); + self.active_label = Some(label.clone()); + self.status = format!("Rendering {label}"); + } + RoomEvent::TrackUnsubscribed { publication, .. } + | RoomEvent::TrackUnpublished { publication, .. } => { + if self.active_sid.as_ref() == Some(&publication.sid()) { + info!("Track {} ended", publication.sid()); + self.renderer = None; + self.active_sid = None; + self.active_label = None; + self.status = "Waiting for a video track...".to_string(); + } + } + RoomEvent::Disconnected { reason } => { + self.renderer = None; + self.active_sid = None; + self.active_label = None; + self.room = None; + self.status = format!("Disconnected: {reason:?}"); + } + _ => {} + } + } + + fn draw_video(&self, ui: &mut egui::Ui) { + let rect = ui.available_rect_before_wrap(); + let response = ui.allocate_rect(rect, egui::Sense::hover()); + let rect = response.rect; + + ui.painter().rect_filled(rect, egui::CornerRadius::default(), egui::Color32::BLACK); + + let Some(renderer) = &self.renderer else { + ui.painter().text( + rect.center(), + egui::Align2::CENTER_CENTER, + &self.status, + egui::FontId::proportional(18.0), + egui::Color32::WHITE, + ); + return; + }; + + let resolution = renderer.resolution(); + if let Some(texture_id) = renderer.texture_id() { + let image_rect = fit_rect(rect, resolution.0, resolution.1); + ui.painter().image( + texture_id, + image_rect, + egui::Rect::from_min_max(egui::pos2(0.0, 0.0), egui::pos2(1.0, 1.0)), + egui::Color32::WHITE, + ); + } + + ui.painter().text( + egui::pos2(rect.min.x + 8.0, rect.max.y - 8.0), + egui::Align2::LEFT_BOTTOM, + format!( + "{}x{} {}", + resolution.0, + resolution.1, + self.active_label.as_deref().unwrap_or("") + ), + egui::FontId::default(), + egui::Color32::WHITE, + ); + } +} + +impl eframe::App for ReceiverApp { + fn update(&mut self, ctx: &egui::Context, _frame: &mut eframe::Frame) { + while let Ok(event) = self.ui_rx.try_recv() { + self.event(event); + } + + egui::TopBottomPanel::top("status_panel").show(ctx, |ui| { + ui.horizontal(|ui| { + ui.label(&self.status); + }); + }); + + egui::CentralPanel::default().show(ctx, |ui| { + self.draw_video(ui); + }); + + ctx.request_repaint(); + } +} + +impl Drop for ReceiverApp { + fn drop(&mut self) { + if let Some(room) = self.room.take() { + if let Err(err) = self.async_runtime.block_on(room.close()) { + warn!("room.close: {err}"); + } } - }); - - let url = args - .url - .or_else(|| env::var("LIVEKIT_URL").ok()) - .context("--url or LIVEKIT_URL required")?; - let api_key = args - .api_key - .or_else(|| env::var("LIVEKIT_API_KEY").ok()) - .context("--api-key or LIVEKIT_API_KEY required")?; - let api_secret = args - .api_secret - .or_else(|| env::var("LIVEKIT_API_SECRET").ok()) - .context("--api-secret or LIVEKIT_API_SECRET required")?; - - let token = access_token::AccessToken::with_api_key(&api_key, &api_secret) + } +} + +async fn connect_task(args: Args, ui_tx: mpsc::UnboundedSender) { + let token = match access_token::AccessToken::with_api_key(&args.api_key, &args.api_secret) .with_identity(&args.identity) .with_name(&args.identity) .with_grants(access_token::VideoGrants { @@ -117,298 +284,216 @@ async fn main() -> Result<()> { can_subscribe: true, ..Default::default() }) - .to_jwt()?; + .to_jwt() + { + Ok(token) => token, + Err(err) => { + let _ = ui_tx.send(UiEvent::ConnectFailed { error: err.to_string() }); + return; + } + }; info!("Connecting to LiveKit room '{}' as '{}'...", args.room, args.identity); let mut room_options = RoomOptions::default(); room_options.auto_subscribe = true; room_options.adaptive_stream = false; - let (room, mut events) = Room::connect(&url, &token, room_options).await?; - let room = Arc::new(room); - info!("Connected: {} (sid {})", room.name(), room.sid().await); - - // Boot the frame server. Accepts one client at a time; subsequent - // clients supersede the previous. - let bind: SocketAddr = format!("0.0.0.0:{}", args.tcp_port).parse().unwrap(); - let listener = TcpListener::bind(bind).await.with_context(|| format!("bind tcp {bind}"))?; - info!( - "Serving tightly-packed I420 frames on tcp/{}:{} — waiting for a client", - bind.ip(), - bind.port() - ); - - // Channel feeding raw I420 frames to the TCP writer task. Kept small - // so the most recent frame wins when the client stalls. - let (frame_tx, frame_rx) = mpsc::channel::(2); - - tokio::spawn(frame_server_task(listener, frame_rx, shutdown_rx.clone())); - - let mut active_sid: Option = None; - let frame_tx = Arc::new(frame_tx); - let mut shutdown_rx_main = shutdown_rx.clone(); - - loop { - tokio::select! { - biased; - r = shutdown_rx_main.changed() => { - r.ok(); - if *shutdown_rx_main.borrow() { - break; - } - } - event = events.recv() => { - let Some(event) = event else { break }; - match event { - RoomEvent::TrackSubscribed { track, publication, participant } => { - if let Some(ref from) = args.from { - if participant.identity().as_str() != from { - continue; - } - } - let RemoteTrack::Video(video) = track else { continue }; - if active_sid.is_some() { - info!( - "Ignoring extra video track {} (already have one active)", - publication.sid() - ); - continue; - } - let sid = publication.sid(); - active_sid = Some(sid.clone()); - info!( - "Subscribed to {} from '{}': codec={}, {}x{}", - sid, - participant.identity(), - publication.mime_type(), - publication.dimension().0, - publication.dimension().1, - ); - let frame_tx = frame_tx.clone(); - let mut shutdown_rx_video = shutdown_rx.clone(); - tokio::spawn(async move { - let mut sink = NativeVideoStream::new(video.rtc_track()); - let mut frames: u64 = 0; - let mut last_log = Instant::now(); - loop { - tokio::select! { - biased; - r = shutdown_rx_video.changed() => { - r.ok(); - if *shutdown_rx_video.borrow() { - break; - } - } - frame = sink.next() => { - let Some(frame) = frame else { - break; - }; - let i420 = frame.buffer.to_i420(); - let w = i420.width(); - let h = i420.height(); - let (sy, su, sv) = i420.strides(); - let (dy, du, dv) = i420.data(); - let packet = pack_i420(w, h, sy, su, sv, dy, du, dv); - // Non-blocking try_send: drop if the writer is slow. - let _ = frame_tx.try_send(packet); - frames += 1; - if last_log.elapsed() >= Duration::from_secs(2) { - info!( - "recv: {}x{}, ~{:.1} fps", - w, - h, - frames as f64 / last_log.elapsed().as_secs_f64() - ); - frames = 0; - last_log = Instant::now(); - } - } - } - } - info!("frame sink ended"); - }); - } - RoomEvent::TrackUnsubscribed { publication, .. } - | RoomEvent::TrackUnpublished { publication, .. } => { - if active_sid.as_ref() == Some(&publication.sid()) { - info!("Track {} ended", publication.sid()); - active_sid = None; - } - } - _ => {} - } - } + match Room::connect(&args.url, &token, room_options).await { + Ok((room, events)) => { + let sid = room.sid().await; + let room = Arc::new(room); + info!("Connected: {} (sid {})", room.name(), sid); + let _ = ui_tx.send(UiEvent::Connected { room, sid }); + tokio::spawn(room_event_task(events, ui_tx)); + } + Err(err) => { + let _ = ui_tx.send(UiEvent::ConnectFailed { error: err.to_string() }); } } +} + +async fn room_event_task( + mut events: mpsc::UnboundedReceiver, + ui_tx: mpsc::UnboundedSender, +) { + while let Some(event) = events.recv().await { + let _ = ui_tx.send(UiEvent::RoomEvent { event }); + } +} - if let Err(e) = room.close().await { - warn!("room.close: {e}"); +fn fit_rect(container: egui::Rect, width: u32, height: u32) -> egui::Rect { + if width == 0 || height == 0 { + return container; } - drop(frame_tx); - info!("Shutting down..."); - Ok(()) + let source_aspect = width as f32 / height as f32; + let container_aspect = container.width() / container.height(); + let size = if container_aspect > source_aspect { + egui::vec2(container.height() * source_aspect, container.height()) + } else { + egui::vec2(container.width(), container.width() / source_aspect) + }; + + egui::Rect::from_center_size(container.center(), size) } -/// A tightly-packed I420 frame ready to be written on the wire. -struct I420Packet { - width: u32, - height: u32, - /// `width*height + 2*(width/2)*(height/2)` bytes (Y, U, V planes packed - /// contiguously with no row padding). - data: Vec, +struct VideoRenderer { + internal: Arc>, + + #[allow(dead_code)] + rtc_track: RtcVideoTrack, } -fn pack_i420( +struct RendererInternal { + render_state: egui_wgpu::RenderState, width: u32, height: u32, - stride_y: u32, - stride_u: u32, - stride_v: u32, - y: &[u8], - u: &[u8], - v: &[u8], -) -> I420Packet { - let uv_w = (width + 1) / 2; - let uv_h = (height + 1) / 2; - let y_size = (width * height) as usize; - let uv_size = (uv_w * uv_h) as usize; - let mut data = Vec::with_capacity(y_size + 2 * uv_size); - - for row in 0..height as usize { - let off = row * stride_y as usize; - data.extend_from_slice(&y[off..off + width as usize]); - } - for row in 0..uv_h as usize { - let off = row * stride_u as usize; - data.extend_from_slice(&u[off..off + uv_w as usize]); - } - for row in 0..uv_h as usize { - let off = row * stride_v as usize; - data.extend_from_slice(&v[off..off + uv_w as usize]); - } - - I420Packet { width, height, data } + rgba_data: Vec, + texture: Option, + texture_view: Option, + egui_texture: Option, } -/// Accepts TCP clients and pumps frames from the channel into whichever -/// one is currently connected. Frames received while no client is -/// connected are dropped. -async fn frame_server_task( - listener: TcpListener, - mut frame_rx: mpsc::Receiver, - mut shutdown_rx: watch::Receiver, -) { - let frames_out = Arc::new(AtomicU64::new(0)); - let frames_dropped = Arc::new(AtomicU64::new(0)); +impl VideoRenderer { + fn new( + async_handle: &tokio::runtime::Handle, + render_state: egui_wgpu::RenderState, + rtc_track: RtcVideoTrack, + ) -> Self { + let internal = Arc::new(Mutex::new(RendererInternal { + render_state, + width: 0, + height: 0, + rgba_data: Vec::default(), + texture: None, + texture_view: None, + egui_texture: None, + })); - { - let frames_out = frames_out.clone(); - let frames_dropped = frames_dropped.clone(); - let mut shutdown_rx_stats = shutdown_rx.clone(); - tokio::spawn(async move { - let mut last = Instant::now(); - loop { - tokio::select! { - biased; - r = shutdown_rx_stats.changed() => { - r.ok(); - if *shutdown_rx_stats.borrow() { - break; - } - } - _ = tokio::time::sleep(Duration::from_secs(2)) => { - let ok = frames_out.swap(0, Ordering::Relaxed); - let dropped = frames_dropped.swap(0, Ordering::Relaxed); - if ok > 0 || dropped > 0 { - info!( - "serve: {:.1} fps written, {:.1} fps dropped", - ok as f64 / last.elapsed().as_secs_f64(), - dropped as f64 / last.elapsed().as_secs_f64() - ); - } - last = Instant::now(); + let mut video_sink = NativeVideoStream::new(rtc_track.clone()); + std::thread::spawn({ + let async_handle = async_handle.clone(); + let internal = internal.clone(); + move || { + let mut frames: u64 = 0; + let mut last_log = Instant::now(); + while let Some(frame) = async_handle.block_on(video_sink.next()) { + let mut internal = internal.lock(); + let buffer = frame.buffer.to_i420(); + let width = buffer.width(); + let height = buffer.height(); + + internal.ensure_texture_size(width, height); + + let rgba_ptr = internal.rgba_data.deref_mut(); + let rgba_stride = buffer.width() * 4; + let (stride_y, stride_u, stride_v) = buffer.strides(); + let (data_y, data_u, data_v) = buffer.data(); + + yuv_helper::i420_to_abgr( + data_y, + stride_y, + data_u, + stride_u, + data_v, + stride_v, + rgba_ptr, + rgba_stride, + buffer.width() as i32, + buffer.height() as i32, + ); + + internal.render_state.queue.write_texture( + eframe::wgpu::TexelCopyTextureInfo { + texture: internal.texture.as_ref().unwrap(), + mip_level: 0, + origin: eframe::wgpu::Origin3d::default(), + aspect: eframe::wgpu::TextureAspect::default(), + }, + &internal.rgba_data, + eframe::wgpu::TexelCopyBufferLayout { + bytes_per_row: Some(width * 4), + ..Default::default() + }, + eframe::wgpu::Extent3d { width, height, ..Default::default() }, + ); + + frames += 1; + if last_log.elapsed() >= Duration::from_secs(2) { + info!( + "recv: {}x{}, ~{:.1} fps", + width, + height, + frames as f64 / last_log.elapsed().as_secs_f64() + ); + frames = 0; + last_log = Instant::now(); } } + info!("frame renderer ended"); } }); + + Self { rtc_track, internal } } - loop { - tokio::select! { - biased; - r = shutdown_rx.changed() => { - r.ok(); - if *shutdown_rx.borrow() { - return; - } - } - accept = listener.accept() => { - let (client, peer) = match accept { - Ok(c) => c, - Err(e) => { - warn!("accept failed: {e}"); - continue; - } - }; - info!("client connected from {peer}"); - if let Err(e) = pump_to_client( - client, - &mut frame_rx, - &frames_out, - &frames_dropped, - shutdown_rx.clone(), - ) - .await - { - warn!("client disconnected: {e}"); - } - info!("client {peer} closed, waiting for the next one"); - } - } + fn resolution(&self) -> (u32, u32) { + let internal = self.internal.lock(); + (internal.width, internal.height) + } + + fn texture_id(&self) -> Option { + self.internal.lock().egui_texture } } -async fn pump_to_client( - mut client: TcpStream, - frame_rx: &mut mpsc::Receiver, - frames_out: &AtomicU64, - frames_dropped: &AtomicU64, - mut shutdown_rx: watch::Receiver, -) -> Result<()> { - let _ = client.set_nodelay(true); - let mut announced_dims = None; - loop { - tokio::select! { - biased; - r = shutdown_rx.changed() => { - r.ok(); - if *shutdown_rx.borrow() { - return Ok(()); - } - } - maybe_frame = frame_rx.recv() => { - let Some(frame) = maybe_frame else { - return Ok(()); - }; - if announced_dims.is_none() { - announced_dims = Some((frame.width, frame.height)); - info!("first frame to client: {}x{}", frame.width, frame.height); - } - if announced_dims != Some((frame.width, frame.height)) { - // Resolution change: restart the client to let gstreamer - // reconfigure its pipeline. rawvideoparse has fixed caps. - frames_dropped.fetch_add(1, Ordering::Relaxed); - return Err(anyhow::anyhow!( - "resolution changed from {:?} to {}x{}; dropping client", - announced_dims, - frame.width, - frame.height - )); - } - client.write_all(&frame.data).await?; - frames_out.fetch_add(1, Ordering::Relaxed); - } +impl RendererInternal { + fn ensure_texture_size(&mut self, width: u32, height: u32) { + if self.width == width && self.height == height { + return; + } + + self.width = width; + self.height = height; + self.rgba_data.resize((width * height * 4) as usize, 0); + + self.texture = + Some(self.render_state.device.create_texture(&eframe::wgpu::TextureDescriptor { + label: Some("lk-receiver-texture"), + usage: eframe::wgpu::TextureUsages::TEXTURE_BINDING + | eframe::wgpu::TextureUsages::COPY_DST, + dimension: eframe::wgpu::TextureDimension::D2, + size: eframe::wgpu::Extent3d { width, height, ..Default::default() }, + sample_count: 1, + mip_level_count: 1, + format: eframe::wgpu::TextureFormat::Rgba8Unorm, + view_formats: &[eframe::wgpu::TextureFormat::Rgba8Unorm], + })); + + self.texture_view = Some(self.texture.as_mut().unwrap().create_view( + &eframe::wgpu::TextureViewDescriptor { + label: Some("lk-receiver-texture-view"), + format: Some(eframe::wgpu::TextureFormat::Rgba8Unorm), + dimension: Some(eframe::wgpu::TextureViewDimension::D2), + mip_level_count: Some(1), + array_layer_count: Some(1), + ..Default::default() + }, + )); + + if let Some(texture_id) = self.egui_texture { + self.render_state.renderer.write().update_egui_texture_from_wgpu_texture( + &self.render_state.device, + self.texture_view.as_ref().unwrap(), + eframe::wgpu::FilterMode::Linear, + texture_id, + ); + } else { + self.egui_texture = Some(self.render_state.renderer.write().register_native_texture( + &self.render_state.device, + self.texture_view.as_ref().unwrap(), + eframe::wgpu::FilterMode::Linear, + )); } } } diff --git a/examples/encoded_video_ingest/src/sender.rs b/examples/encoded_video_ingest/src/sender.rs index 50b37f4f7..ffa819234 100644 --- a/examples/encoded_video_ingest/src/sender.rs +++ b/examples/encoded_video_ingest/src/sender.rs @@ -37,7 +37,6 @@ //! README.md. use std::{ - env, sync::{ atomic::{AtomicBool, AtomicU64, Ordering}, Arc, Mutex, @@ -61,16 +60,16 @@ use tokio::{io::AsyncReadExt, net::TcpStream, time::sleep}; #[command(author, version, about, long_about = None)] struct Args { /// LiveKit server URL (or set LIVEKIT_URL env var) - #[arg(long)] - url: Option, + #[arg(long, env = "LIVEKIT_URL")] + url: String, /// LiveKit API key (or set LIVEKIT_API_KEY env var) - #[arg(long)] - api_key: Option, + #[arg(long, env = "LIVEKIT_API_KEY")] + api_key: String, /// LiveKit API secret (or set LIVEKIT_API_SECRET env var) - #[arg(long)] - api_secret: Option, + #[arg(long, env = "LIVEKIT_API_SECRET")] + api_secret: String, /// Room name to join #[arg(long, default_value = "encoded-video-demo")] @@ -606,20 +605,7 @@ async fn main() -> Result<()> { } }); - let url = args - .url - .or_else(|| env::var("LIVEKIT_URL").ok()) - .context("--url or LIVEKIT_URL required")?; - let api_key = args - .api_key - .or_else(|| env::var("LIVEKIT_API_KEY").ok()) - .context("--api-key or LIVEKIT_API_KEY required")?; - let api_secret = args - .api_secret - .or_else(|| env::var("LIVEKIT_API_SECRET").ok()) - .context("--api-secret or LIVEKIT_API_SECRET required")?; - - let token = access_token::AccessToken::with_api_key(&api_key, &api_secret) + let token = access_token::AccessToken::with_api_key(&args.api_key, &args.api_secret) .with_identity(&args.identity) .with_name(&args.identity) .with_grants(access_token::VideoGrants { @@ -634,7 +620,7 @@ async fn main() -> Result<()> { let mut room_options = RoomOptions::default(); room_options.auto_subscribe = false; room_options.dynacast = false; - let (room, _events) = Room::connect(&url, &token, room_options).await?; + let (room, _events) = Room::connect(&args.url, &token, room_options).await?; let room = Arc::new(room); info!("Connected: {} (sid {})", room.name(), room.sid().await); diff --git a/examples/encoded_video_ingest/src/simple_sender.rs b/examples/encoded_video_ingest/src/simple_sender.rs index ba8ebc442..073a58d87 100644 --- a/examples/encoded_video_ingest/src/simple_sender.rs +++ b/examples/encoded_video_ingest/src/simple_sender.rs @@ -20,9 +20,9 @@ //! SDK. This example is effectively: parse CLI args, connect to the //! room, `EncodedTcpIngest::start`, log stats, wait for Ctrl-C. -use std::{env, net::SocketAddr, sync::Arc, time::Duration}; +use std::{net::SocketAddr, sync::Arc, time::Duration}; -use anyhow::{Context, Result}; +use anyhow::Result; use clap::Parser; use libwebrtc::video_source::VideoCodec; use livekit::prelude::*; @@ -37,16 +37,16 @@ use tokio::time::sleep; #[command(author, version, about, long_about = None)] struct Args { /// LiveKit server URL (or set LIVEKIT_URL env var) - #[arg(long)] - url: Option, + #[arg(long, env = "LIVEKIT_URL")] + url: String, /// LiveKit API key (or set LIVEKIT_API_KEY env var) - #[arg(long)] - api_key: Option, + #[arg(long, env = "LIVEKIT_API_KEY")] + api_key: String, /// LiveKit API secret (or set LIVEKIT_API_SECRET env var) - #[arg(long)] - api_secret: Option, + #[arg(long, env = "LIVEKIT_API_SECRET")] + api_secret: String, /// Room name to join #[arg(long, default_value = "encoded-video-demo")] @@ -135,20 +135,7 @@ async fn main() -> Result<()> { env_logger::init(); let args = Args::parse(); - let url = args - .url - .or_else(|| env::var("LIVEKIT_URL").ok()) - .context("--url or LIVEKIT_URL required")?; - let api_key = args - .api_key - .or_else(|| env::var("LIVEKIT_API_KEY").ok()) - .context("--api-key or LIVEKIT_API_KEY required")?; - let api_secret = args - .api_secret - .or_else(|| env::var("LIVEKIT_API_SECRET").ok()) - .context("--api-secret or LIVEKIT_API_SECRET required")?; - - let token = access_token::AccessToken::with_api_key(&api_key, &api_secret) + let token = access_token::AccessToken::with_api_key(&args.api_key, &args.api_secret) .with_identity(&args.identity) .with_name(&args.identity) .with_grants(access_token::VideoGrants { @@ -163,7 +150,7 @@ async fn main() -> Result<()> { let mut room_options = RoomOptions::default(); room_options.auto_subscribe = false; room_options.dynacast = false; - let (room, _events) = Room::connect(&url, &token, room_options).await?; + let (room, _events) = Room::connect(&args.url, &token, room_options).await?; info!("connected: {} (sid {})", room.name(), room.sid().await); let mut opts = EncodedTcpIngestOptions::new( diff --git a/livekit-encoded-video-ingest/src/encoded_tcp.rs b/livekit-encoded-video-ingest/src/encoded_tcp.rs index 41baab657..c2e4fe1ab 100644 --- a/livekit-encoded-video-ingest/src/encoded_tcp.rs +++ b/livekit-encoded-video-ingest/src/encoded_tcp.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! High-level helper that ingests an encoded (compressed) video bytestream over TCP +//! High-level helper that ingests a pre-encoded video bytestream over TCP //! and publishes it to a LiveKit room as an encoded video track. //! //! The caller supplies the TCP endpoint, codec, and declared resolution. @@ -65,7 +65,7 @@ pub struct EncodedTcpIngestOptions { /// Port of the gstreamer `tcpserversink`. pub port: u16, - /// Encoded (compressed) codec on the wire. Must match the upstream encoder. + /// Pre-encoded codec on the wire. Must match the upstream encoder. pub codec: VideoCodec, /// Declared stream width (px). @@ -152,7 +152,7 @@ pub struct EncodedIngestStats { pub tcp_reconnects: u64, } -/// Ingests an encoded (compressed) video feed from a TCP socket and publishes it as +/// Ingests a pre-encoded video feed from a TCP socket and publishes it as /// an encoded LiveKit track. /// /// Create one with [`EncodedTcpIngest::start`], inspect it via @@ -495,9 +495,9 @@ mod tests { }; use libwebrtc::video_source::VideoCodec; - use livekit::{prelude::TrackSource, RoomError}; use super::*; + use livekit::{prelude::TrackSource, RoomError}; #[test] fn options_new_sets_network_and_track_defaults() { diff --git a/webrtc-sys/build.rs b/webrtc-sys/build.rs index feed895be..3917a1f4a 100644 --- a/webrtc-sys/build.rs +++ b/webrtc-sys/build.rs @@ -28,6 +28,15 @@ fn main() { println!("cargo:rerun-if-env-changed=LK_DEBUG_WEBRTC"); println!("cargo:rerun-if-env-changed=LK_CUSTOM_WEBRTC"); + println!("cargo:rustc-check-cfg=cfg(encoded_video)"); + + if encoded_video { + // cxx_build evaluates cfgs from the build-script environment. Cargo + // exposes `encoded-video` as CARGO_FEATURE_ENCODED_VIDEO, which does + // not match `feature = "encoded-video"` in cxx's cfg evaluator. + println!("cargo:rustc-cfg=encoded_video"); + env::set_var("CARGO_CFG_ENCODED_VIDEO", "1"); + } let mut rust_files = vec![ "src/peer_connection.rs", diff --git a/webrtc-sys/src/peer_connection_factory.rs b/webrtc-sys/src/peer_connection_factory.rs index 00550dae6..0cd9ca8b7 100644 --- a/webrtc-sys/src/peer_connection_factory.rs +++ b/webrtc-sys/src/peer_connection_factory.rs @@ -58,7 +58,7 @@ pub mod ffi { type IceGatheringState = crate::peer_connection::ffi::IceGatheringState; type AudioTrackSource = crate::audio_track::ffi::AudioTrackSource; type VideoTrackSource = crate::video_track::ffi::VideoTrackSource; - #[cfg(feature = "encoded-video")] + #[cfg(encoded_video)] type EncodedVideoTrackSource = crate::encoded_video_source::ffi::EncodedVideoTrackSource; type RtpCapabilities = crate::rtp_parameters::ffi::RtpCapabilities; type AudioTrack = crate::audio_track::ffi::AudioTrack; @@ -103,7 +103,7 @@ pub mod ffi { source: SharedPtr, ) -> SharedPtr; - #[cfg(feature = "encoded-video")] + #[cfg(encoded_video)] fn create_video_track_from_encoded_source( self: &PeerConnectionFactory, label: String, From 5fcd430ba0f2b6af86503b8ab16a739599f8d644 Mon Sep 17 00:00:00 2001 From: Stephen DeRosa Date: Wed, 29 Apr 2026 12:52:05 -0600 Subject: [PATCH 14/15] ensure that in the SDK FFI layer make encoded ingest participate in the same local publish synchronization --- livekit-ffi/src/server/encoded_tcp_ingest.rs | 2 ++ livekit-ffi/src/server/room.rs | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/livekit-ffi/src/server/encoded_tcp_ingest.rs b/livekit-ffi/src/server/encoded_tcp_ingest.rs index 53a12619c..729a7997b 100644 --- a/livekit-ffi/src/server/encoded_tcp_ingest.rs +++ b/livekit-ffi/src/server/encoded_tcp_ingest.rs @@ -110,6 +110,8 @@ pub fn create( } .into(), ); + + ffi_room.inner.mark_local_publish_callback_sent(track_sid); } Err(err) => { let _ = server.send_event( diff --git a/livekit-ffi/src/server/room.rs b/livekit-ffi/src/server/room.rs index 4c120d3bc..0156698d5 100644 --- a/livekit-ffi/src/server/room.rs +++ b/livekit-ffi/src/server/room.rs @@ -325,6 +325,10 @@ impl FfiRoom { } impl RoomInner { + pub(crate) fn mark_local_publish_callback_sent(&self, sid: TrackSid) { + self.pending_published_tracks.lock().insert(sid); + } + pub fn publish_data( &self, server: &'static FfiServer, @@ -490,7 +494,7 @@ impl RoomInner { .into(), ); - inner.pending_published_tracks.lock().insert(publication.sid()); + inner.mark_local_publish_callback_sent(publication.sid()); } Err(err) => { // Failed to publish the track From 64a998a45d91df54d942ad993def6d3b60be041a Mon Sep 17 00:00:00 2001 From: Stephen DeRosa Date: Thu, 30 Apr 2026 12:17:38 -0600 Subject: [PATCH 15/15] rm tcp ingest --- Cargo.lock | 14 - Cargo.toml | 2 - examples/encoded_video_ingest/Cargo.toml | 5 - examples/encoded_video_ingest/README.md | 72 +-- examples/encoded_video_ingest/src/receiver.rs | 160 ++++- examples/encoded_video_ingest/src/sender.rs | 43 +- .../encoded_video_ingest/src/simple_sender.rs | 204 ------ livekit-encoded-video-ingest/Cargo.toml | 16 - livekit-encoded-video-ingest/src/demux.rs | 397 ------------ .../src/encoded_tcp.rs | 607 ------------------ livekit-encoded-video-ingest/src/keyframe.rs | 297 --------- livekit-encoded-video-ingest/src/lib.rs | 34 - livekit-ffi-node-bindings/proto/ffi_pb.d.ts | 61 -- livekit-ffi-node-bindings/proto/ffi_pb.js | 10 - livekit-ffi/Cargo.toml | 3 +- livekit-ffi/protocol/encoded_tcp_ingest.proto | 135 ---- livekit-ffi/protocol/ffi.proto | 22 +- livekit-ffi/src/server/encoded_tcp_ingest.rs | 306 --------- livekit-ffi/src/server/mod.rs | 1 - livekit-ffi/src/server/requests.rs | 9 +- 20 files changed, 202 insertions(+), 2196 deletions(-) delete mode 100644 examples/encoded_video_ingest/src/simple_sender.rs delete mode 100644 livekit-encoded-video-ingest/Cargo.toml delete mode 100644 livekit-encoded-video-ingest/src/demux.rs delete mode 100644 livekit-encoded-video-ingest/src/encoded_tcp.rs delete mode 100644 livekit-encoded-video-ingest/src/keyframe.rs delete mode 100644 livekit-encoded-video-ingest/src/lib.rs delete mode 100644 livekit-ffi/protocol/encoded_tcp_ingest.proto delete mode 100644 livekit-ffi/src/server/encoded_tcp_ingest.rs diff --git a/Cargo.lock b/Cargo.lock index b75f8a6f2..df4b657d5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2165,7 +2165,6 @@ dependencies = [ "libwebrtc", "livekit", "livekit-api", - "livekit-encoded-video-ingest", "log", "parking_lot", "tokio", @@ -4097,18 +4096,6 @@ dependencies = [ "tokio-stream", ] -[[package]] -name = "livekit-encoded-video-ingest" -version = "0.1.0" -dependencies = [ - "libwebrtc", - "livekit", - "livekit-runtime", - "log", - "parking_lot", - "tokio", -] - [[package]] name = "livekit-ffi" version = "0.12.53" @@ -4127,7 +4114,6 @@ dependencies = [ "link-cplusplus", "livekit", "livekit-api", - "livekit-encoded-video-ingest", "livekit-protocol", "log", "parking_lot", diff --git a/Cargo.toml b/Cargo.toml index ca7acb070..052028d59 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,6 @@ resolver = "2" members = [ "livekit", - "livekit-encoded-video-ingest", "livekit-api", "livekit-protocol", "livekit-ffi", @@ -50,7 +49,6 @@ imgproc = { version = "0.3.19", path = "imgproc" } libwebrtc = { version = "0.3.30", path = "libwebrtc" } livekit = { version = "0.7.37", path = "livekit" } livekit-api = { version = "0.4.19", path = "livekit-api" } -livekit-encoded-video-ingest = { version = "0.1.0", path = "livekit-encoded-video-ingest" } livekit-ffi = { version = "0.12.53", path = "livekit-ffi" } livekit-datatrack = { version = "0.1.4", path = "livekit-datatrack" } livekit-protocol = { version = "0.7.5", path = "livekit-protocol" } diff --git a/examples/encoded_video_ingest/Cargo.toml b/examples/encoded_video_ingest/Cargo.toml index 5d49b3ec9..414346499 100644 --- a/examples/encoded_video_ingest/Cargo.toml +++ b/examples/encoded_video_ingest/Cargo.toml @@ -12,10 +12,6 @@ path = "src/sender.rs" name = "receiver" path = "src/receiver.rs" -[[bin]] -name = "simple_sender" -path = "src/simple_sender.rs" - [dependencies] anyhow = { workspace = true } clap = { workspace = true, features = ["derive", "env"] } @@ -27,7 +23,6 @@ futures = { workspace = true } libwebrtc = { workspace = true, features = ["encoded-video"] } livekit = { workspace = true, features = ["encoded-video", "rustls-tls-native-roots"] } livekit-api = { workspace = true, features = ["rustls-tls-native-roots"] } -livekit-encoded-video-ingest = { workspace = true } log = { workspace = true } parking_lot = { workspace = true } tokio = { workspace = true, features = ["full"] } diff --git a/examples/encoded_video_ingest/README.md b/examples/encoded_video_ingest/README.md index fd9d97288..a027303b8 100644 --- a/examples/encoded_video_ingest/README.md +++ b/examples/encoded_video_ingest/README.md @@ -27,46 +27,6 @@ frames. The sender supports two wire framings, picked by `--codec`: record is one Temporal Unit (TU) — a complete OBU sequence for one frame. -## The two sender binaries - -This example ships **two** senders that publish the same stream; pick -whichever one better matches your integration shape: - -- **`simple_sender`** — uses the encoded video ingest helper - [`livekit_encoded_video_ingest::EncodedTcpIngest`]. The helper owns the TCP - socket, demux, keyframe probe, reconnect loop, and track - publish/unpublish. Applications only supply config — port, codec, - width, height — and an optional [`EncodedIngestObserver`] for - connection / keyframe / bitrate callbacks. Recommended starting - point. -- **`sender`** — the hand-rolled version kept as a reference - implementation. It open-codes exactly what `EncodedTcpIngest` does - internally and is useful if you need to deviate from the helper - (custom transport, alternate demuxer, different track topology). - -The CLI flags are the same for both binaries; `--bin simple_sender` -is the drop-in replacement used in all examples below. - -### Minimal usage - -```rust -use libwebrtc::video_source::VideoCodec; -use livekit_encoded_video_ingest::{EncodedTcpIngest, EncodedTcpIngestOptions}; - -let options = EncodedTcpIngestOptions::new( - /* port */ 5005, - /* codec */ VideoCodec::H264, - /* width */ 640, - /* height */ 480, -); -let ingest = EncodedTcpIngest::start(room.local_participant(), options).await?; -// ... run ... -ingest.stop().await; -``` - -See `src/simple_sender.rs` for a full driver (token minting, observer, -stats polling, Ctrl-C shutdown). - ## What this exercises - `libwebrtc::video_source::NativeEncodedVideoSource` — the @@ -240,8 +200,9 @@ gst-launch-1.0 -v \ avfvideosrc device-index=0 ! \ video/x-raw,width=640,height=480,format=NV12,framerate=30/1 ! \ videoconvert ! \ - x264enc tune=zerolatency speed-preset=ultrafast bitrate=1000 key-int-max=60 aud=true ! \ - h264parse config-interval=1 ! \ + x264enc tune=zerolatency speed-preset=veryfast bitrate=2500 key-int-max=30 \ + bframes=0 rc-lookahead=0 aud=true ! \ + h264parse config-interval=-1 ! \ video/x-h264,stream-format=byte-stream,alignment=au ! \ tcpserversink host=0.0.0.0 port=5005 ``` @@ -417,20 +378,15 @@ does not. The sender handles both. ### 2. Start the sender (Terminal 2) -Use `simple_sender` (SDK helper, recommended): - ```bash -RUST_LOG=info cargo run -p encoded_video_ingest --bin simple_sender -- \ +RUST_LOG=info cargo run -p encoded_video_ingest --bin sender -- \ --tcp-host 127.0.0.1 --tcp-port 5005 \ --width 640 --height 480 \ + --max-bitrate-kbps 2500 --max-framerate 30 \ --codec h264 \ --room encoded-video-demo --identity encoded-sender ``` -Or the hand-rolled reference (`--bin sender`) with the same flags — -see [The two sender binaries](#the-two-sender-binaries) for when to -pick one over the other. - For the H.265 pipeline use `--codec h265`; for VP8 use `--codec vp8`; for AV1 use `--codec av1`. @@ -440,6 +396,10 @@ Flags: listening. - `--width/--height` declared stream resolution; must match what gstreamer is producing. +- `--max-bitrate-kbps/--max-framerate` set the single RTP encoding + envelope advertised to WebRTC. Keep these at or above the upstream + encoder's realtime output; the SDK's generic 640x480 default is too + conservative for this low-latency ingest demo. - `--codec {h264,h265,vp8,av1}` selects the wire framing and keyframe probe: Annex-B (AUD-split) for H.264/H.265, or IVF for VP8/AV1. **Must match the gstreamer pipeline.** `publish_track` will @@ -464,6 +424,20 @@ remote video track directly in a native WGPU window. The receive side uses `NativeVideoStream`, so the window displays decoded frames from WebRTC's internal decoder rather than encoded packets. +The receiver defaults to a low-latency display path (`vsync=false`, +WGPU `AutoNoVsync`, and swapchain frame latency 1). Pass `--vsync` if +you prefer smoother presentation over the lowest possible glass-to-glass +latency. + +### Low-latency tuning notes + +The first place to look is the sender's `ingest:` line. If `dropped` +frames climb or the logged encoded bitrate is much higher than the +logged WebRTC target, either lower the upstream encoder output or raise +`--max-bitrate-kbps` to match it. For 640x480@30 H.264, the default +demo command uses 2.5 Mbps to avoid the SDK's conservative generic +640x480 preset becoming the bottleneck. + ## Troubleshooting **Sender connects to the room but never logs ingest stats.** diff --git a/examples/encoded_video_ingest/src/receiver.rs b/examples/encoded_video_ingest/src/receiver.rs index 2a67ff94b..99362df2a 100644 --- a/examples/encoded_video_ingest/src/receiver.rs +++ b/examples/encoded_video_ingest/src/receiver.rs @@ -23,7 +23,6 @@ //! a future enhancement — see README.md. use std::{ - ops::DerefMut, sync::Arc, time::{Duration, Instant}, }; @@ -37,7 +36,7 @@ use livekit::{ webrtc::{ native::yuv_helper, prelude::{RtcVideoTrack, VideoBuffer}, - video_stream::native::NativeVideoStream, + video_stream::native::{NativeVideoStream, NativeVideoStreamOptions}, }, }; use livekit_api::access_token; @@ -71,15 +70,34 @@ struct Args { /// Only subscribe to the track from this participant identity #[arg(long)] from: Option, + + /// Enable vsync for smoother display at the cost of extra render latency + #[arg(long, default_value_t = false)] + vsync: bool, } fn main() -> Result<()> { env_logger::init(); let args = Args::parse(); + let present_mode = if args.vsync { + eframe::wgpu::PresentMode::AutoVsync + } else { + eframe::wgpu::PresentMode::AutoNoVsync + }; eframe::run_native( "LiveKit Encoded Video Receiver", - eframe::NativeOptions { centered: true, renderer: Renderer::Wgpu, ..Default::default() }, + eframe::NativeOptions { + centered: true, + renderer: Renderer::Wgpu, + vsync: args.vsync, + wgpu_options: egui_wgpu::WgpuConfiguration { + present_mode, + desired_maximum_frame_latency: Some(1), + ..Default::default() + }, + ..Default::default() + }, Box::new(|cc| Ok(Box::new(ReceiverApp::new(cc, args)))), ) .map_err(|err| anyhow!("receiver UI failed: {err}"))?; @@ -370,7 +388,10 @@ impl VideoRenderer { egui_texture: None, })); - let mut video_sink = NativeVideoStream::new(rtc_track.clone()); + let mut video_sink = NativeVideoStream::with_options( + rtc_track.clone(), + NativeVideoStreamOptions { queue_size_frames: Some(1) }, + ); std::thread::spawn({ let async_handle = async_handle.clone(); let internal = internal.clone(); @@ -379,29 +400,12 @@ impl VideoRenderer { let mut last_log = Instant::now(); while let Some(frame) = async_handle.block_on(video_sink.next()) { let mut internal = internal.lock(); - let buffer = frame.buffer.to_i420(); + let buffer = frame.buffer.as_ref(); let width = buffer.width(); let height = buffer.height(); internal.ensure_texture_size(width, height); - - let rgba_ptr = internal.rgba_data.deref_mut(); - let rgba_stride = buffer.width() * 4; - let (stride_y, stride_u, stride_v) = buffer.strides(); - let (data_y, data_u, data_v) = buffer.data(); - - yuv_helper::i420_to_abgr( - data_y, - stride_y, - data_u, - stride_u, - data_v, - stride_v, - rgba_ptr, - rgba_stride, - buffer.width() as i32, - buffer.height() as i32, - ); + convert_to_abgr(buffer, &mut internal.rgba_data); internal.render_state.queue.write_texture( eframe::wgpu::TexelCopyTextureInfo { @@ -447,6 +451,116 @@ impl VideoRenderer { } } +fn convert_to_abgr(buffer: &dyn VideoBuffer, dst: &mut [u8]) { + let width = buffer.width(); + let height = buffer.height(); + let stride = width * 4; + + if let Some(buffer) = buffer.as_i420() { + let (stride_y, stride_u, stride_v) = buffer.strides(); + let (data_y, data_u, data_v) = buffer.data(); + yuv_helper::i420_to_abgr( + data_y, + stride_y, + data_u, + stride_u, + data_v, + stride_v, + dst, + stride, + width as i32, + height as i32, + ); + return; + } + + if let Some(buffer) = buffer.as_nv12() { + let (stride_y, stride_uv) = buffer.strides(); + let (data_y, data_uv) = buffer.data(); + yuv_helper::nv12_to_abgr( + data_y, + stride_y, + data_uv, + stride_uv, + dst, + stride, + width as i32, + height as i32, + ); + return; + } + + if let Some(buffer) = buffer.as_i422() { + let (stride_y, stride_u, stride_v) = buffer.strides(); + let (data_y, data_u, data_v) = buffer.data(); + yuv_helper::i422_to_abgr( + data_y, + stride_y, + data_u, + stride_u, + data_v, + stride_v, + dst, + stride, + width as i32, + height as i32, + ); + return; + } + + if let Some(buffer) = buffer.as_i444() { + let (stride_y, stride_u, stride_v) = buffer.strides(); + let (data_y, data_u, data_v) = buffer.data(); + yuv_helper::i444_to_abgr( + data_y, + stride_y, + data_u, + stride_u, + data_v, + stride_v, + dst, + stride, + width as i32, + height as i32, + ); + return; + } + + if let Some(buffer) = buffer.as_i010() { + let (stride_y, stride_u, stride_v) = buffer.strides(); + let (data_y, data_u, data_v) = buffer.data(); + yuv_helper::i010_to_abgr( + data_y, + stride_y, + data_u, + stride_u, + data_v, + stride_v, + dst, + stride, + width as i32, + height as i32, + ); + return; + } + + let buffer = buffer.to_i420(); + let (stride_y, stride_u, stride_v) = buffer.strides(); + let (data_y, data_u, data_v) = buffer.data(); + yuv_helper::i420_to_abgr( + data_y, + stride_y, + data_u, + stride_u, + data_v, + stride_v, + dst, + stride, + width as i32, + height as i32, + ); +} + impl RendererInternal { fn ensure_texture_size(&mut self, width: u32, height: u32) { if self.width == width && self.height == height { diff --git a/examples/encoded_video_ingest/src/sender.rs b/examples/encoded_video_ingest/src/sender.rs index ffa819234..5d784364d 100644 --- a/examples/encoded_video_ingest/src/sender.rs +++ b/examples/encoded_video_ingest/src/sender.rs @@ -48,7 +48,7 @@ use anyhow::{Context, Result}; use clap::Parser; use libwebrtc::video_source::{EncodedFrameInfo, RtcVideoSource, VideoCodec, VideoResolution}; use livekit::{ - options::{TrackPublishOptions, VideoCodec as LkVideoCodec}, + options::{TrackPublishOptions, VideoCodec as LkVideoCodec, VideoEncoding}, prelude::*, webrtc::video_source::native::{EncodedVideoSourceObserver, NativeEncodedVideoSource}, }; @@ -95,6 +95,14 @@ struct Args { #[arg(long, default_value_t = 480)] height: u32, + /// RTP sender max bitrate advertised to WebRTC, in kbps + #[arg(long, default_value_t = 2_500)] + max_bitrate_kbps: u64, + + /// RTP sender max framerate advertised to WebRTC + #[arg(long, default_value_t = 30.0)] + max_framerate: f64, + /// Encoded codec on the wire. Must match the gstreamer pipeline. #[arg(long, value_enum, default_value_t = CodecArg::H264)] codec: CodecArg, @@ -192,11 +200,12 @@ impl CodecArg { /// `on_target_bitrate`. struct LoggingObserver { last_bitrate_log: Mutex>, + target_bitrate_bps: Arc, } impl LoggingObserver { - fn new() -> Self { - Self { last_bitrate_log: Mutex::new(None) } + fn new(target_bitrate_bps: Arc) -> Self { + Self { last_bitrate_log: Mutex::new(None), target_bitrate_bps } } } @@ -210,6 +219,8 @@ impl EncodedVideoSourceObserver for LoggingObserver { } fn on_target_bitrate(&self, bitrate_bps: u32, framerate_fps: f64) { + self.target_bitrate_bps.store(bitrate_bps as u64, Ordering::Relaxed); + // Rate-limit logging to 1 Hz. let mut last = self.last_bitrate_log.lock().unwrap(); let now = Instant::now(); @@ -626,7 +637,8 @@ async fn main() -> Result<()> { let resolution = VideoResolution { width: args.width, height: args.height }; let source = NativeEncodedVideoSource::new(args.codec.webrtc_codec(), resolution); - source.set_observer(Arc::new(LoggingObserver::new())); + let target_bitrate_bps = Arc::new(AtomicU64::new(0)); + source.set_observer(Arc::new(LoggingObserver::new(target_bitrate_bps.clone()))); info!( "Created encoded {} source: {}x{} (source_id={})", args.codec.name(), @@ -649,22 +661,34 @@ async fn main() -> Result<()> { source: TrackSource::Camera, simulcast: false, video_codec: args.codec.livekit_codec(), + video_encoding: Some(VideoEncoding { + max_bitrate: args.max_bitrate_kbps.saturating_mul(1000), + max_framerate: args.max_framerate, + }), ..Default::default() }; room.local_participant() .publish_track(LocalTrack::Video(track), publish_opts) .await .context("publish_track failed")?; - info!("Published encoded {} track", args.codec.name()); + info!( + "Published encoded {} track (max {} kbps @ {:.1} fps)", + args.codec.name(), + args.max_bitrate_kbps, + args.max_framerate + ); let frames_accepted = Arc::new(AtomicU64::new(0)); let frames_dropped = Arc::new(AtomicU64::new(0)); let keyframes = Arc::new(AtomicU64::new(0)); + let encoded_bytes = Arc::new(AtomicU64::new(0)); { let frames_accepted = frames_accepted.clone(); let frames_dropped = frames_dropped.clone(); let keyframes = keyframes.clone(); + let encoded_bytes = encoded_bytes.clone(); + let target_bitrate_bps = target_bitrate_bps.clone(); tokio::spawn(async move { let mut last = Instant::now(); loop { @@ -674,11 +698,17 @@ async fn main() -> Result<()> { let ok = frames_accepted.swap(0, Ordering::Relaxed); let dropped = frames_dropped.swap(0, Ordering::Relaxed); let kf = keyframes.swap(0, Ordering::Relaxed); + let bytes = encoded_bytes.swap(0, Ordering::Relaxed); if ok + dropped > 0 { + let encoded_kbps = bytes as f64 * 8.0 / elapsed / 1000.0; + let target_kbps = target_bitrate_bps.load(Ordering::Relaxed) / 1000; info!( - "ingest: {:.1} fps accepted, {:.1} fps dropped, {} keyframes", + "ingest: {:.1} fps accepted, {:.1} fps dropped, {:.0} kbps encoded \ + (target {} kbps), {} keyframes", ok as f64 / elapsed, dropped as f64 / elapsed, + encoded_kbps, + target_kbps, kf ); } @@ -735,6 +765,7 @@ async fn main() -> Result<()> { break; } for au in out.drain(..) { + encoded_bytes.fetch_add(au.len() as u64, Ordering::Relaxed); let is_keyframe = is_keyframe(args.codec, &au); if is_keyframe { keyframes.fetch_add(1, Ordering::Relaxed); diff --git a/examples/encoded_video_ingest/src/simple_sender.rs b/examples/encoded_video_ingest/src/simple_sender.rs deleted file mode 100644 index 073a58d87..000000000 --- a/examples/encoded_video_ingest/src/simple_sender.rs +++ /dev/null @@ -1,204 +0,0 @@ -// Copyright 2026 LiveKit, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//! Minimal encoded (compressed) ingest driver using -//! [`livekit_encoded_video_ingest::EncodedTcpIngest`]. -//! -//! Everything that was hand-rolled in `sender.rs` (demuxing, keyframe -//! detection, reconnect loop, observer plumbing) now lives inside the -//! SDK. This example is effectively: parse CLI args, connect to the -//! room, `EncodedTcpIngest::start`, log stats, wait for Ctrl-C. - -use std::{net::SocketAddr, sync::Arc, time::Duration}; - -use anyhow::Result; -use clap::Parser; -use libwebrtc::video_source::VideoCodec; -use livekit::prelude::*; -use livekit_api::access_token; -use livekit_encoded_video_ingest::{ - EncodedIngestObserver, EncodedTcpIngest, EncodedTcpIngestOptions, -}; -use log::{info, warn}; -use tokio::time::sleep; - -#[derive(Parser, Debug)] -#[command(author, version, about, long_about = None)] -struct Args { - /// LiveKit server URL (or set LIVEKIT_URL env var) - #[arg(long, env = "LIVEKIT_URL")] - url: String, - - /// LiveKit API key (or set LIVEKIT_API_KEY env var) - #[arg(long, env = "LIVEKIT_API_KEY")] - api_key: String, - - /// LiveKit API secret (or set LIVEKIT_API_SECRET env var) - #[arg(long, env = "LIVEKIT_API_SECRET")] - api_secret: String, - - /// Room name to join - #[arg(long, default_value = "encoded-video-demo")] - room: String, - - /// Participant identity - #[arg(long, default_value = "encoded-sender")] - identity: String, - - /// Host of the gstreamer `tcpserversink` - #[arg(long, default_value = "127.0.0.1")] - tcp_host: String, - - /// Port of the gstreamer `tcpserversink` - #[arg(long, default_value_t = 5000)] - tcp_port: u16, - - /// Declared stream width (px) - #[arg(long, default_value_t = 640)] - width: u32, - - /// Declared stream height (px) - #[arg(long, default_value_t = 480)] - height: u32, - - /// Encoded (compressed) codec on the wire. Must match the gstreamer pipeline. - #[arg(long, value_enum, default_value_t = CodecArg::H264)] - codec: CodecArg, - - /// Optional max bitrate forwarded to TrackPublishOptions.video_encoding. - #[arg(long)] - max_bitrate_kbps: Option, -} - -#[derive(Debug, Copy, Clone, PartialEq, Eq, clap::ValueEnum)] -enum CodecArg { - H264, - H265, - Vp8, - Vp9, - Av1, -} - -impl CodecArg { - fn webrtc_codec(self) -> VideoCodec { - match self { - CodecArg::H264 => VideoCodec::H264, - CodecArg::H265 => VideoCodec::H265, - CodecArg::Vp8 => VideoCodec::Vp8, - CodecArg::Vp9 => VideoCodec::Vp9, - CodecArg::Av1 => VideoCodec::Av1, - } - } -} - -/// Logs the feedback events the SDK surfaces. Real producers should -/// react here — e.g. nudge their hardware encoder to emit an IDR on -/// `on_keyframe_requested`, or clamp their encoder to the advertised -/// `on_target_bitrate`. -struct LoggingObserver; - -impl EncodedIngestObserver for LoggingObserver { - fn on_connected(&self, peer: SocketAddr) { - info!("ingest: connected to {peer}"); - } - fn on_disconnected(&self, reason: &str) { - warn!("ingest: disconnected: {reason}"); - } - fn on_keyframe_requested(&self) { - warn!( - "ingest: keyframe requested by receiver — producer should emit a keyframe on the \ - next frame" - ); - } - fn on_target_bitrate(&self, bitrate_bps: u32, framerate_fps: f64) { - info!( - "ingest: target bitrate update: {} kbps @ {:.1} fps", - bitrate_bps / 1000, - framerate_fps - ); - } -} - -#[tokio::main] -async fn main() -> Result<()> { - env_logger::init(); - let args = Args::parse(); - - let token = access_token::AccessToken::with_api_key(&args.api_key, &args.api_secret) - .with_identity(&args.identity) - .with_name(&args.identity) - .with_grants(access_token::VideoGrants { - room_join: true, - room: args.room.clone(), - can_publish: true, - ..Default::default() - }) - .to_jwt()?; - - info!("connecting to LiveKit room '{}' as '{}'...", args.room, args.identity); - let mut room_options = RoomOptions::default(); - room_options.auto_subscribe = false; - room_options.dynacast = false; - let (room, _events) = Room::connect(&args.url, &token, room_options).await?; - info!("connected: {} (sid {})", room.name(), room.sid().await); - - let mut opts = EncodedTcpIngestOptions::new( - args.tcp_port, - args.codec.webrtc_codec(), - args.width, - args.height, - ); - opts.host = args.tcp_host.clone(); - opts.max_bitrate_bps = args.max_bitrate_kbps.map(|k| k * 1000); - - let ingest = EncodedTcpIngest::start(room.local_participant(), opts).await?; - ingest.set_observer(Arc::new(LoggingObserver)); - info!("ingest: started track sid={}", ingest.track_sid()); - - // Poll stats every 2s while the ingest runs. - let ingest_for_stats = Arc::new(ingest); - let stats_task = { - let ingest = ingest_for_stats.clone(); - tokio::spawn(async move { - let mut prev = ingest.stats(); - loop { - sleep(Duration::from_secs(2)).await; - let cur = ingest.stats(); - let ok = cur.frames_accepted.saturating_sub(prev.frames_accepted); - let dropped = cur.frames_dropped.saturating_sub(prev.frames_dropped); - let kf = cur.keyframes.saturating_sub(prev.keyframes); - if ok + dropped > 0 { - info!( - "ingest: {:.1} fps accepted, {:.1} fps dropped, {kf} keyframes (total \ - reconnects={})", - ok as f64 / 2.0, - dropped as f64 / 2.0, - cur.tcp_reconnects - ); - } - prev = cur; - } - }) - }; - - tokio::signal::ctrl_c().await.ok(); - info!("ctrl-c received, shutting down..."); - stats_task.abort(); - - let ingest = Arc::try_unwrap(ingest_for_stats) - .map_err(|_| anyhow::anyhow!("ingest still referenced"))?; - ingest.stop().await; - info!("done"); - Ok(()) -} diff --git a/livekit-encoded-video-ingest/Cargo.toml b/livekit-encoded-video-ingest/Cargo.toml deleted file mode 100644 index ff4be9969..000000000 --- a/livekit-encoded-video-ingest/Cargo.toml +++ /dev/null @@ -1,16 +0,0 @@ -[package] -name = "livekit-encoded-video-ingest" -version = "0.1.0" -edition.workspace = true -license.workspace = true -description = "Encoded video ingest helpers for LiveKit" -repository.workspace = true -publish = false - -[dependencies] -libwebrtc = { workspace = true, features = ["encoded-video"] } -livekit = { workspace = true, features = ["encoded-video"] } -livekit-runtime = { workspace = true } -log = { workspace = true } -parking_lot = { workspace = true } -tokio = { workspace = true, features = ["io-util", "net", "time"] } diff --git a/livekit-encoded-video-ingest/src/demux.rs b/livekit-encoded-video-ingest/src/demux.rs deleted file mode 100644 index 0eb2d1b0d..000000000 --- a/livekit-encoded-video-ingest/src/demux.rs +++ /dev/null @@ -1,397 +0,0 @@ -// Copyright 2026 LiveKit, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//! Stream demuxers that split a raw TCP bytestream into discrete encoded -//! video frames. -//! -//! * H.264 / H.265: Annex-B bytestream, split on access-unit delimiters. -//! * VP8 / VP9 / AV1: IVF container (gstreamer's `ivfmux` or `avmux_ivf`), -//! optionally prefixed with a 32-byte DKIF file header. - -use libwebrtc::video_source::VideoCodec; - -use super::keyframe; - -/// Upper bound on per-frame size we accept from the IVF reader before we -/// conclude we are byte-misaligned. -pub(super) const MAX_FRAME_BYTES: usize = 8 * 1024 * 1024; - -/// Wire-format selector. Hides whether the underlying wire is Annex-B or -/// IVF. -pub(super) enum Demuxer { - AnnexB(AuSplitter), - Ivf(IvfReader), -} - -impl Demuxer { - pub(super) fn new(codec: VideoCodec) -> Self { - match codec { - VideoCodec::H264 | VideoCodec::H265 => Demuxer::AnnexB(AuSplitter::new(codec)), - VideoCodec::Vp8 | VideoCodec::Vp9 | VideoCodec::Av1 => { - Demuxer::Ivf(IvfReader::new(codec)) - } - } - } - - /// Feeds a raw byte chunk from the socket. Completed frames are - /// appended to `out`. - pub(super) fn feed(&mut self, chunk: &[u8], out: &mut Vec>) { - match self { - Demuxer::AnnexB(s) => s.feed(chunk, out), - Demuxer::Ivf(r) => r.feed(chunk, out), - } - } - - /// True if the demuxer has detected a byte misalignment it cannot - /// recover from without a fresh TCP connection. - pub(super) fn desynced(&self) -> bool { - match self { - Demuxer::AnnexB(_) => false, - Demuxer::Ivf(r) => r.desynced, - } - } -} - -/// Reads IVF-framed video off the wire. Format per libvpx: -/// -/// * File header (32 bytes, optional): `"DKIF"`, u16 version, u16 -/// header_len, 4-byte FOURCC, u16 width, u16 height, u32 tb_num, -/// u32 tb_den, u32 frame_count, u32 unused. -/// * Frame header (12 bytes each): u32 frame_size, u64 pts. -/// * Frame payload: `frame_size` bytes. All integers little-endian. -/// -/// The file header is *optional* here: gstreamer's `avmux_ivf` on a -/// non-seekable `tcpserversink` emits only per-frame records (libavformat -/// writes `DKIF` at `write_header` time, but the ffmpeg AVIO wrapper in -/// gst-libav swallows it when the output is non-seekable). `ivfmux` (the -/// native gst-plugins-bad element) does emit `DKIF` and we parse it when -/// present. gstreamer's one-buffer-per-packet semantics keep new -/// `tcpserversink` clients on an IVF record boundary. -pub(super) struct IvfReader { - codec: VideoCodec, - buf: Vec, - header_phase_done: bool, - pub(super) desynced: bool, -} - -impl IvfReader { - fn new(codec: VideoCodec) -> Self { - Self { - codec, - buf: Vec::with_capacity(256 * 1024), - header_phase_done: false, - desynced: false, - } - } - - fn feed(&mut self, chunk: &[u8], out: &mut Vec>) { - self.buf.extend_from_slice(chunk); - - if !self.header_phase_done { - if self.buf.len() < 4 { - return; - } - if &self.buf[0..4] == b"DKIF" { - if self.buf.len() < 32 { - return; - } - let fourcc = &self.buf[8..12]; - if let Some(expected) = ivf_fourcc(self.codec) { - if fourcc != expected { - log::warn!( - "ivf: expected FOURCC {:?} for {:?}, got {:?}", - std::str::from_utf8(expected).unwrap_or("?"), - self.codec, - std::str::from_utf8(fourcc).unwrap_or("?"), - ); - } - } - log::info!( - "ivf: file header OK (codec fourcc={})", - std::str::from_utf8(fourcc).unwrap_or("?") - ); - self.buf.drain(..32); - } else { - log::info!( - "ivf: no DKIF file header on this stream (typical for gstreamer avmux_ivf \ - on tcpserversink); parsing per-frame records directly" - ); - } - self.header_phase_done = true; - } - - loop { - if self.buf.len() < 12 { - return; - } - let size = - u32::from_le_bytes([self.buf[0], self.buf[1], self.buf[2], self.buf[3]]) as usize; - if size == 0 || size > MAX_FRAME_BYTES { - log::warn!( - "ivf: implausible frame_size={size} bytes — byte stream is misaligned. \ - Dropping connection so the ingest loop can reconnect and re-anchor on the \ - next gstreamer buffer boundary." - ); - self.desynced = true; - self.buf.clear(); - return; - } - if self.buf.len() < 12 + size { - return; - } - let frame = self.buf[12..12 + size].to_vec(); - self.buf.drain(..12 + size); - out.push(frame); - } - } -} - -/// IVF FOURCC expected on the wire. Only meaningful for codecs delivered -/// via `ivfmux` / `avmux_ivf`. -fn ivf_fourcc(codec: VideoCodec) -> Option<&'static [u8; 4]> { - match codec { - VideoCodec::Vp8 => Some(b"VP80"), - VideoCodec::Vp9 => Some(b"VP90"), - VideoCodec::Av1 => Some(b"AV01"), - _ => None, - } -} - -/// Splits an incoming Annex-B bytestream into access units on AUD -/// boundaries. The AUD NAL type and NAL-type extraction are codec -/// specific. -/// -/// Relies on the upstream parser emitting an AUD at the start of every AU -/// (`x264enc aud=true` for H.264, `x265enc option-string="aud=1"` plumbed -/// through `h265parse` for H.265). Bytes before the first AUD are -/// discarded; each subsequent AU is emitted when the *next* AU's AUD -/// arrives (one AU of buffering lag, bounded by the frame interval). -pub(super) struct AuSplitter { - codec: VideoCodec, - buf: Vec, - au_start: Option, - scan_pos: usize, -} - -impl AuSplitter { - fn new(codec: VideoCodec) -> Self { - Self { codec, buf: Vec::with_capacity(256 * 1024), au_start: None, scan_pos: 0 } - } - - fn feed(&mut self, chunk: &[u8], out: &mut Vec>) { - self.buf.extend_from_slice(chunk); - - let Some(aud) = keyframe::aud_nal_type(self.codec) else { - return; - }; - - while self.scan_pos + 3 < self.buf.len() { - let i = self.scan_pos; - let (sc_start, sc_len) = if i + 4 <= self.buf.len() - && self.buf[i] == 0 - && self.buf[i + 1] == 0 - && self.buf[i + 2] == 0 - && self.buf[i + 3] == 1 - { - if i + 5 > self.buf.len() { - break; - } - (i, 4) - } else if self.buf[i] == 0 && self.buf[i + 1] == 0 && self.buf[i + 2] == 1 { - (i, 3) - } else { - self.scan_pos += 1; - continue; - }; - - let nal_off = sc_start + sc_len; - if keyframe::nal_type(self.codec, self.buf[nal_off]) == aud { - if let Some(start) = self.au_start.take() { - out.push(self.buf[start..sc_start].to_vec()); - } - self.au_start = Some(sc_start); - } - self.scan_pos = nal_off + 1; - } - - let drain_before = self.au_start.unwrap_or_else(|| self.buf.len().saturating_sub(3)); - if drain_before > 0 { - self.buf.drain(..drain_before); - self.scan_pos = self.scan_pos.saturating_sub(drain_before); - if self.au_start.is_some() { - self.au_start = Some(0); - } - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - fn make_ivf_frame(size: u32, payload: &[u8]) -> Vec { - let mut rec = Vec::with_capacity(12 + payload.len()); - rec.extend_from_slice(&size.to_le_bytes()); - rec.extend_from_slice(&0u64.to_le_bytes()); - rec.extend_from_slice(payload); - rec - } - - fn make_dkif_header(fourcc: &[u8; 4]) -> Vec { - let mut bytes = Vec::new(); - bytes.extend_from_slice(b"DKIF"); - bytes.extend_from_slice(&[0; 4]); - bytes.extend_from_slice(fourcc); - bytes.extend_from_slice(&[0; 20]); - bytes - } - - #[test] - fn ivf_without_dkif_emits_frames() { - let mut r = IvfReader::new(VideoCodec::Vp8); - let mut bytes = Vec::new(); - bytes.extend_from_slice(&make_ivf_frame(4, &[1, 2, 3, 4])); - bytes.extend_from_slice(&make_ivf_frame(2, &[9, 9])); - let mut out = Vec::new(); - r.feed(&bytes, &mut out); - assert_eq!(out.len(), 2); - assert_eq!(out[0], vec![1, 2, 3, 4]); - assert_eq!(out[1], vec![9, 9]); - assert!(!r.desynced); - } - - #[test] - fn ivf_with_dkif_skips_header() { - let mut r = IvfReader::new(VideoCodec::Vp8); - let mut bytes = Vec::new(); - bytes.extend_from_slice(&make_dkif_header(b"VP80")); - bytes.extend_from_slice(&make_ivf_frame(3, &[7, 8, 9])); - let mut out = Vec::new(); - r.feed(&bytes, &mut out); - assert_eq!(out, vec![vec![7, 8, 9]]); - } - - #[test] - fn ivf_header_and_frame_can_arrive_across_reads() { - let mut r = IvfReader::new(VideoCodec::Vp8); - let mut bytes = make_dkif_header(b"VP80"); - bytes.extend_from_slice(&make_ivf_frame(4, &[1, 3, 5, 7])); - bytes.extend_from_slice(&make_ivf_frame(2, &[8, 13])); - - let mut out = Vec::new(); - for chunk in bytes.chunks(5) { - r.feed(chunk, &mut out); - } - - assert_eq!(out, vec![vec![1, 3, 5, 7], vec![8, 13]]); - assert!(!r.desynced); - } - - #[test] - fn ivf_absurd_size_triggers_desync() { - let mut r = IvfReader::new(VideoCodec::Vp8); - // Size larger than MAX_FRAME_BYTES - let bogus = (MAX_FRAME_BYTES as u32 + 1).to_le_bytes(); - let mut bytes = bogus.to_vec(); - bytes.extend_from_slice(&[0u8; 8]); - let mut out = Vec::new(); - r.feed(&bytes, &mut out); - assert!(out.is_empty()); - assert!(r.desynced); - } - - #[test] - fn ivf_zero_size_triggers_desync_and_drops_buffered_bytes() { - let mut r = IvfReader::new(VideoCodec::Vp9); - let mut bytes = make_ivf_frame(0, &[]); - bytes.extend_from_slice(&make_ivf_frame(3, &[1, 2, 3])); - - let mut out = Vec::new(); - r.feed(&bytes, &mut out); - - assert!(out.is_empty()); - assert!(r.desynced); - } - - #[test] - fn ivf_frame_header_can_arrive_across_reads_without_dkif() { - let mut r = IvfReader::new(VideoCodec::Av1); - let bytes = make_ivf_frame(5, &[0x0A, 0x00, 0x22, 0x00, 0x55]); - - let mut out = Vec::new(); - for chunk in bytes.chunks(2) { - r.feed(chunk, &mut out); - } - - assert_eq!(out, vec![vec![0x0A, 0x00, 0x22, 0x00, 0x55]]); - assert!(!r.desynced); - } - - #[test] - fn au_splitter_emits_completed_aus() { - let mut s = AuSplitter::new(VideoCodec::H264); - // AUD NAL header for H.264: type 9, first byte = 0x09. - // IDR slice header: type 5 => 0x65 (nal_ref_idc=3). - let mut bytes = Vec::new(); - // AU 1: AUD + IDR - bytes.extend_from_slice(&[0, 0, 0, 1, 0x09, 0xF0]); - bytes.extend_from_slice(&[0, 0, 0, 1, 0x65, 0x88, 0x84]); - // AU 2 starts with a new AUD -> AU 1 should be emitted. - bytes.extend_from_slice(&[0, 0, 0, 1, 0x09, 0xF0]); - bytes.extend_from_slice(&[0, 0, 0, 1, 0x41, 0x9A]); - let mut out = Vec::new(); - s.feed(&bytes, &mut out); - assert_eq!(out.len(), 1); - // The first emitted AU should begin at the first AUD start code - // and end before the second AUD. - assert_eq!(&out[0][..5], &[0, 0, 0, 1, 0x09]); - assert!(out[0].windows(5).any(|w| w == [0, 0, 0, 1, 0x65])); - } - - #[test] - fn au_splitter_discards_prefix_and_handles_split_start_codes() { - let mut s = AuSplitter::new(VideoCodec::H264); - let mut out = Vec::new(); - - s.feed(&[0xAA, 0xBB, 0x00, 0x00], &mut out); - s.feed(&[0x00], &mut out); - s.feed(&[0x01, 0x09, 0xF0, 0x00, 0x00, 0x01, 0x65, 0x88], &mut out); - assert!(out.is_empty()); - - s.feed(&[0x00, 0x00], &mut out); - s.feed(&[0x00, 0x01, 0x09, 0xF0, 0x00, 0x00, 0x01, 0x41, 0x9A], &mut out); - - assert_eq!(out.len(), 1); - assert_eq!(&out[0][..5], &[0, 0, 0, 1, 0x09]); - assert!(out[0].windows(5).any(|w| w == [0, 0, 1, 0x65, 0x88])); - assert!(!out[0].starts_with(&[0xAA, 0xBB])); - } - - #[test] - fn au_splitter_handles_h265_aud_boundaries() { - let mut s = AuSplitter::new(VideoCodec::H265); - let mut out = Vec::new(); - - // H.265 AUD NAL type 35 => first header byte is (35 << 1) = 0x46. - // IDR_W_RADL NAL type 19 => first header byte is (19 << 1) = 0x26. - s.feed(&[0, 0, 1, 0x46, 0x01, 0x50, 0, 0, 1, 0x26, 0x01, 0x88], &mut out); - assert!(out.is_empty()); - - s.feed(&[0, 0, 0, 1, 0x46, 0x01, 0x50, 0, 0, 1, 0x02, 0x01], &mut out); - - assert_eq!(out.len(), 1); - assert_eq!(&out[0][..5], &[0, 0, 1, 0x46, 0x01]); - assert!(out[0].windows(5).any(|w| w == [0, 0, 1, 0x26, 0x01])); - } -} diff --git a/livekit-encoded-video-ingest/src/encoded_tcp.rs b/livekit-encoded-video-ingest/src/encoded_tcp.rs deleted file mode 100644 index c2e4fe1ab..000000000 --- a/livekit-encoded-video-ingest/src/encoded_tcp.rs +++ /dev/null @@ -1,607 +0,0 @@ -// Copyright 2026 LiveKit, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//! High-level helper that ingests a pre-encoded video bytestream over TCP -//! and publishes it to a LiveKit room as an encoded video track. -//! -//! The caller supplies the TCP endpoint, codec, and declared resolution. -//! The helper: -//! -//! 1. Creates a [`NativeEncodedVideoSource`] for the codec. -//! 2. Creates a [`LocalVideoTrack`] bound to that source. -//! 3. Publishes the track via `LocalParticipant::publish_track`. -//! 4. Connects to the TCP endpoint and reconnects on disconnect. -//! 5. Demuxes the stream (Annex-B for H.264/H.265, IVF for VP8/VP9/AV1). -//! 6. Pushes each demuxed frame through `capture_frame`. -//! -//! The matching gstreamer pipelines are documented in -//! `examples/pre_encoded_ingest/README.md`. - -use std::{ - net::SocketAddr, - sync::{ - atomic::{AtomicBool, AtomicU64, Ordering}, - Arc, - }, - time::Duration, -}; - -use libwebrtc::video_source::{ - native::{EncodedVideoSourceObserver, NativeEncodedVideoSource}, - EncodedFrameInfo, RtcVideoSource, VideoCodec, VideoResolution, -}; -use livekit_runtime::JoinHandle; -use parking_lot::Mutex; -use tokio::{io::AsyncReadExt, net::TcpStream, time::sleep}; - -use super::{demux::Demuxer, keyframe::is_keyframe}; -use livekit::{ - options::{TrackPublishOptions, VideoEncoding}, - participant::LocalParticipant, - prelude::*, - RoomError, RoomResult, -}; - -/// Configuration for [`EncodedTcpIngest::start`]. -/// -/// Only `port`, `codec`, `width`, and `height` are mandatory. Everything -/// else has a default that matches the reference gstreamer pipelines. -#[derive(Debug, Clone)] -pub struct EncodedTcpIngestOptions { - /// Host running the gstreamer `tcpserversink`. Default: `127.0.0.1`. - pub host: String, - - /// Port of the gstreamer `tcpserversink`. - pub port: u16, - - /// Pre-encoded codec on the wire. Must match the upstream encoder. - pub codec: VideoCodec, - - /// Declared stream width (px). - pub width: u32, - - /// Declared stream height (px). - pub height: u32, - - /// Optional track name. Default: `encoded-`. - pub track_name: Option, - - /// Track source classification. Default: [`TrackSource::Camera`]. - pub track_source: TrackSource, - - /// Optional target max bitrate (bps) forwarded to - /// `TrackPublishOptions.video_encoding.max_bitrate`. When `None`, the - /// SDK picks an appropriate default for the resolution. - pub max_bitrate_bps: Option, - - /// Target max framerate forwarded when `max_bitrate_bps` is set. - /// Ignored otherwise. Default: 30.0. - pub max_framerate_fps: f64, - - /// Backoff between reconnection attempts. Default: 1 s. - pub reconnect_backoff: Duration, - - /// When `true`, [`EncodedTcpIngest::stop`] unpublishes the track - /// before returning. Default: `true`. - pub unpublish_on_stop: bool, -} - -impl EncodedTcpIngestOptions { - /// New options with sensible defaults. Mandatory fields only. - pub fn new(port: u16, codec: VideoCodec, width: u32, height: u32) -> Self { - Self { - host: "127.0.0.1".to_string(), - port, - codec, - width, - height, - track_name: None, - track_source: TrackSource::Camera, - max_bitrate_bps: None, - max_framerate_fps: 30.0, - reconnect_backoff: Duration::from_secs(1), - unpublish_on_stop: true, - } - } -} - -/// Callbacks dispatched by [`EncodedTcpIngest`] as the ingest loop runs. -/// -/// All methods are invoked on Tokio / WebRTC threads; implementers MUST be -/// cheap and non-blocking. Default impls are no-ops so consumers can -/// override only what they care about. -pub trait EncodedIngestObserver: Send + Sync { - /// The TCP connection to the upstream producer is established. - fn on_connected(&self, _peer: SocketAddr) {} - - /// The TCP connection was closed (by peer, timeout, or demux desync). - /// The ingest loop will reconnect after - /// [`EncodedTcpIngestOptions::reconnect_backoff`]. - fn on_disconnected(&self, _reason: &str) {} - - /// The receiver requested a keyframe (PLI/FIR). Producers should emit - /// a keyframe on the next frame. - fn on_keyframe_requested(&self) {} - - /// The bandwidth estimator produced a new target bitrate / framerate. - fn on_target_bitrate(&self, _bitrate_bps: u32, _framerate_fps: f64) {} -} - -/// Snapshot of cumulative ingest stats. Counters are monotonic since -/// [`EncodedTcpIngest::start`]. -#[derive(Debug, Clone, Copy, Default)] -pub struct EncodedIngestStats { - /// Frames pushed to the source and accepted by WebRTC. - pub frames_accepted: u64, - /// Frames the source rejected because its internal queue was full. - pub frames_dropped: u64, - /// Keyframes observed on the wire (accepted + dropped). - pub keyframes: u64, - /// TCP reconnections attempted (including the first connect). - pub tcp_reconnects: u64, -} - -/// Ingests a pre-encoded video feed from a TCP socket and publishes it as -/// an encoded LiveKit track. -/// -/// Create one with [`EncodedTcpIngest::start`], inspect it via -/// [`EncodedTcpIngest::stats`] / [`EncodedTcpIngest::track_sid`], and -/// shut it down with [`EncodedTcpIngest::stop`]. Dropping the value -/// without calling `stop` still terminates the background task, but does -/// not unpublish the track. -pub struct EncodedTcpIngest { - inner: Arc, - join_handle: Mutex>>, -} - -struct Inner { - participant: LocalParticipant, - source: NativeEncodedVideoSource, - track: LocalVideoTrack, - stop: AtomicBool, - stats: Stats, - observer: Mutex>>, - options: EncodedTcpIngestOptions, -} - -#[derive(Default)] -struct Stats { - frames_accepted: AtomicU64, - frames_dropped: AtomicU64, - keyframes: AtomicU64, - tcp_reconnects: AtomicU64, -} - -impl EncodedTcpIngest { - /// Creates the encoded source, publishes the track, and spawns the - /// TCP ingest task. The returned value owns all of those. - pub async fn start( - participant: LocalParticipant, - options: EncodedTcpIngestOptions, - ) -> RoomResult { - validate_options(&options)?; - - let resolution = VideoResolution { width: options.width, height: options.height }; - let source = NativeEncodedVideoSource::new(options.codec, resolution); - log::info!( - "EncodedTcpIngest: created {:?} source {}x{} (source_id={})", - options.codec, - options.width, - options.height, - source.source_id() - ); - - let track_name = options - .track_name - .clone() - .unwrap_or_else(|| default_track_name(options.codec).to_string()); - let track = LocalVideoTrack::create_video_track( - &track_name, - RtcVideoSource::Encoded(source.clone()), - ); - - let publish_opts = build_publish_options(&options); - // video_codec is force-pinned to match the encoded source by - // LocalParticipant::publish_track, so we leave the default. - - participant.publish_track(LocalTrack::Video(track.clone()), publish_opts).await?; - log::info!("EncodedTcpIngest: published track '{}' ({:?})", track_name, options.codec); - - let inner = Arc::new(Inner { - participant, - source: source.clone(), - track, - stop: AtomicBool::new(false), - stats: Stats::default(), - observer: Mutex::new(None), - options, - }); - - source.set_observer(Arc::new(SourceObserverBridge { inner: Arc::downgrade(&inner) })); - - let join_handle = livekit_runtime::spawn({ - let inner = inner.clone(); - async move { - run_ingest_loop(inner).await; - } - }); - - Ok(Self { inner, join_handle: Mutex::new(Some(join_handle)) }) - } - - /// Register (or replace) the ingest-level observer. - pub fn set_observer(&self, observer: Arc) { - *self.inner.observer.lock() = Some(observer); - } - - /// Returns a snapshot of ingest stats since `start`. - pub fn stats(&self) -> EncodedIngestStats { - EncodedIngestStats { - frames_accepted: self.inner.stats.frames_accepted.load(Ordering::Relaxed), - frames_dropped: self.inner.stats.frames_dropped.load(Ordering::Relaxed), - keyframes: self.inner.stats.keyframes.load(Ordering::Relaxed), - tcp_reconnects: self.inner.stats.tcp_reconnects.load(Ordering::Relaxed), - } - } - - /// Returns the sid of the published track. - pub fn track_sid(&self) -> TrackSid { - self.inner.track.sid() - } - - /// Returns a clone of the underlying track. Useful for hooking mute / - /// packet-trailer state from the caller. - pub fn track(&self) -> LocalVideoTrack { - self.inner.track.clone() - } - - /// Stops the ingest loop and, if configured, unpublishes the track. - /// - /// Safe to call at most once. After `stop` returns, the TCP task is - /// terminated. If [`EncodedTcpIngestOptions::unpublish_on_stop`] is - /// true (the default), the track is unpublished from the room. - pub async fn stop(self) { - self.inner.stop.store(true, Ordering::Release); - - let join = self.join_handle.lock().take(); - if let Some(handle) = join { - // We don't care about join errors — the task can only panic - // on a broken invariant, and we're shutting down anyway. - let _ = handle.await; - } - - if self.inner.options.unpublish_on_stop { - let sid = self.inner.track.sid(); - match self.inner.participant.unpublish_track(&sid).await { - Ok(_) => log::info!("EncodedTcpIngest: unpublished track {sid:?}"), - Err(e) => log::warn!("EncodedTcpIngest: unpublish_track failed: {e}"), - } - } - } -} - -impl Drop for EncodedTcpIngest { - fn drop(&mut self) { - // Make sure the background task exits even if the caller forgot - // to call `stop`. We can't await here, so the track stays - // published until the room is dropped or explicitly unpublished. - self.inner.stop.store(true, Ordering::Release); - } -} - -fn validate_options(options: &EncodedTcpIngestOptions) -> RoomResult<()> { - if options.width == 0 || options.height == 0 { - return Err(RoomError::Internal( - "EncodedTcpIngest: width and height must be non-zero".to_string(), - )); - } - if options.port == 0 { - return Err(RoomError::Internal("EncodedTcpIngest: port must be non-zero".to_string())); - } - Ok(()) -} - -fn build_publish_options(options: &EncodedTcpIngestOptions) -> TrackPublishOptions { - let mut publish_opts = TrackPublishOptions { - source: options.track_source, - simulcast: false, - ..Default::default() - }; - if let Some(max_bitrate) = options.max_bitrate_bps { - publish_opts.video_encoding = - Some(VideoEncoding { max_bitrate, max_framerate: options.max_framerate_fps }); - } - publish_opts -} - -fn default_track_name(codec: VideoCodec) -> &'static str { - match codec { - VideoCodec::H264 => "encoded-h264", - VideoCodec::H265 => "encoded-h265", - VideoCodec::Vp8 => "encoded-vp8", - VideoCodec::Vp9 => "encoded-vp9", - VideoCodec::Av1 => "encoded-av1", - } -} - -/// Forwards source-level callbacks (keyframe request, bitrate update) to -/// the ingest-level observer, if any. Held via a `Weak` so the source -/// does not keep `Inner` alive past `drop`. -struct SourceObserverBridge { - inner: std::sync::Weak, -} - -impl EncodedVideoSourceObserver for SourceObserverBridge { - fn on_keyframe_requested(&self) { - if let Some(inner) = self.inner.upgrade() { - if let Some(obs) = inner.observer.lock().clone() { - obs.on_keyframe_requested(); - } - } - } - - fn on_target_bitrate(&self, bitrate_bps: u32, framerate_fps: f64) { - if let Some(inner) = self.inner.upgrade() { - if let Some(obs) = inner.observer.lock().clone() { - obs.on_target_bitrate(bitrate_bps, framerate_fps); - } - } - } -} - -/// Reconnect loop: connects, demuxes, captures, and reconnects on -/// disconnect / desync until `stop` is flipped. -async fn run_ingest_loop(inner: Arc) { - let opts = &inner.options; - let addr = format!("{}:{}", opts.host, opts.port); - - while !inner.stop.load(Ordering::Acquire) { - inner.stats.tcp_reconnects.fetch_add(1, Ordering::Relaxed); - log::info!("EncodedTcpIngest: connecting to {addr} ({:?})", opts.codec); - - let mut stream = match TcpStream::connect(&addr).await { - Ok(s) => s, - Err(e) => { - log::warn!("EncodedTcpIngest: connect {addr} failed: {e}"); - notify_disconnected(&inner, &format!("connect: {e}")); - if !sleep_interruptible(&inner.stop, opts.reconnect_backoff).await { - return; - } - continue; - } - }; - let _ = stream.set_nodelay(true); - - let peer = stream.peer_addr().ok(); - if let Some(addr) = peer { - log::info!("EncodedTcpIngest: connected to {addr}"); - if let Some(obs) = inner.observer.lock().clone() { - obs.on_connected(addr); - } - } else { - log::info!("EncodedTcpIngest: connected to {addr} (peer_addr unknown)"); - } - - let reason = pump_stream(&inner, &mut stream).await; - log::warn!("EncodedTcpIngest: disconnected: {reason}"); - notify_disconnected(&inner, &reason); - - if inner.stop.load(Ordering::Acquire) { - return; - } - if !sleep_interruptible(&inner.stop, opts.reconnect_backoff).await { - return; - } - } -} - -/// Reads from the socket, demuxes, and captures frames until EOF, error, -/// desync, or stop. Returns a human-readable disconnect reason. -async fn pump_stream(inner: &Arc, stream: &mut TcpStream) -> String { - let opts = &inner.options; - let mut demuxer = Demuxer::new(opts.codec); - let mut read_buf = vec![0u8; 64 * 1024]; - let mut out: Vec> = Vec::new(); - - loop { - if inner.stop.load(Ordering::Acquire) { - return "stopped".to_string(); - } - - let n = tokio::select! { - r = stream.read(&mut read_buf) => r, - _ = sleep(Duration::from_millis(250)) => continue, - }; - - let n = match n { - Ok(0) => return "peer closed connection".to_string(), - Ok(n) => n, - Err(e) => return format!("read error: {e}"), - }; - - out.clear(); - demuxer.feed(&read_buf[..n], &mut out); - if demuxer.desynced() { - return "demuxer desync (reconnecting to re-align)".to_string(); - } - for frame in out.drain(..) { - let is_keyframe = is_keyframe(opts.codec, &frame); - if is_keyframe { - inner.stats.keyframes.fetch_add(1, Ordering::Relaxed); - } - let info = EncodedFrameInfo { - is_keyframe, - // The source scans + prepends SPS/PPS as needed. - has_sps_pps: false, - width: opts.width, - height: opts.height, - capture_time_us: 0, - }; - if inner.source.capture_frame(&frame, &info) { - inner.stats.frames_accepted.fetch_add(1, Ordering::Relaxed); - } else { - inner.stats.frames_dropped.fetch_add(1, Ordering::Relaxed); - log::warn!( - "EncodedTcpIngest: capture_frame dropped frame ({} bytes, keyframe={})", - frame.len(), - is_keyframe - ); - } - } - } -} - -fn notify_disconnected(inner: &Arc, reason: &str) { - if let Some(obs) = inner.observer.lock().clone() { - obs.on_disconnected(reason); - } -} - -/// Sleeps up to `dur`, waking early when `stop` is set. Returns `false` -/// if the sleep was interrupted by a stop request. -async fn sleep_interruptible(stop: &AtomicBool, dur: Duration) -> bool { - let tick = Duration::from_millis(100); - let mut remaining = dur; - while remaining > Duration::ZERO { - if stop.load(Ordering::Acquire) { - return false; - } - let step = remaining.min(tick); - sleep(step).await; - remaining = remaining.saturating_sub(step); - } - true -} - -#[cfg(test)] -mod tests { - use std::{ - sync::{ - atomic::{AtomicBool, Ordering}, - Arc, - }, - time::{Duration, Instant}, - }; - - use libwebrtc::video_source::VideoCodec; - - use super::*; - use livekit::{prelude::TrackSource, RoomError}; - - #[test] - fn options_new_sets_network_and_track_defaults() { - let options = EncodedTcpIngestOptions::new(5004, VideoCodec::H264, 1920, 1080); - - assert_eq!(options.host, "127.0.0.1"); - assert_eq!(options.port, 5004); - assert_eq!(options.codec, VideoCodec::H264); - assert_eq!(options.width, 1920); - assert_eq!(options.height, 1080); - assert_eq!(options.track_name, None); - assert_eq!(options.track_source, TrackSource::Camera); - assert_eq!(options.max_bitrate_bps, None); - assert_eq!(options.max_framerate_fps, 30.0); - assert_eq!(options.reconnect_backoff, Duration::from_secs(1)); - assert!(options.unpublish_on_stop); - } - - #[test] - fn validate_options_rejects_invalid_dimensions_before_publish() { - let mut options = EncodedTcpIngestOptions::new(5004, VideoCodec::Vp8, 0, 720); - - let err = validate_options(&options).expect_err("zero width should be rejected"); - assert!( - matches!(err, RoomError::Internal(message) if message.contains("width and height")) - ); - - options.width = 1280; - options.height = 0; - let err = validate_options(&options).expect_err("zero height should be rejected"); - assert!( - matches!(err, RoomError::Internal(message) if message.contains("width and height")) - ); - } - - #[test] - fn validate_options_rejects_zero_port_before_publish() { - let options = EncodedTcpIngestOptions::new(0, VideoCodec::Av1, 1280, 720); - - let err = validate_options(&options).expect_err("zero port should be rejected"); - assert!(matches!(err, RoomError::Internal(message) if message.contains("port"))); - } - - #[test] - fn build_publish_options_disables_simulcast_and_preserves_source() { - let mut options = EncodedTcpIngestOptions::new(5004, VideoCodec::H265, 1280, 720); - options.track_source = TrackSource::Screenshare; - - let publish_options = build_publish_options(&options); - - assert_eq!(publish_options.source, TrackSource::Screenshare); - assert!(!publish_options.simulcast); - assert!(publish_options.video_encoding.is_none()); - } - - #[test] - fn build_publish_options_uses_explicit_bitrate_pair() { - let mut options = EncodedTcpIngestOptions::new(5004, VideoCodec::Vp9, 1280, 720); - options.max_bitrate_bps = Some(2_500_000); - options.max_framerate_fps = 24.0; - - let publish_options = build_publish_options(&options); - let encoding = publish_options.video_encoding.expect("encoding should be set"); - - assert_eq!(encoding.max_bitrate, 2_500_000); - assert_eq!(encoding.max_framerate, 24.0); - assert!(!publish_options.simulcast); - } - - #[test] - fn default_track_names_cover_all_ingest_codecs() { - assert_eq!(default_track_name(VideoCodec::H264), "encoded-h264"); - assert_eq!(default_track_name(VideoCodec::H265), "encoded-h265"); - assert_eq!(default_track_name(VideoCodec::Vp8), "encoded-vp8"); - assert_eq!(default_track_name(VideoCodec::Vp9), "encoded-vp9"); - assert_eq!(default_track_name(VideoCodec::Av1), "encoded-av1"); - } - - #[tokio::test] - async fn sleep_interruptible_returns_false_when_stop_already_set() { - let stop = AtomicBool::new(true); - - assert!(!sleep_interruptible(&stop, Duration::from_secs(60)).await); - } - - #[tokio::test] - async fn sleep_interruptible_wakes_soon_after_stop_is_set() { - let stop = Arc::new(AtomicBool::new(false)); - let setter = { - let stop = stop.clone(); - tokio::spawn(async move { - tokio::time::sleep(Duration::from_millis(10)).await; - stop.store(true, Ordering::Release); - }) - }; - - let start = Instant::now(); - let slept = sleep_interruptible(&stop, Duration::from_secs(5)).await; - setter.await.expect("stop setter should complete"); - - assert!(!slept); - assert!( - start.elapsed() < Duration::from_secs(1), - "sleep should be interrupted instead of waiting for the full backoff" - ); - } -} diff --git a/livekit-encoded-video-ingest/src/keyframe.rs b/livekit-encoded-video-ingest/src/keyframe.rs deleted file mode 100644 index 099e4b4f2..000000000 --- a/livekit-encoded-video-ingest/src/keyframe.rs +++ /dev/null @@ -1,297 +0,0 @@ -// Copyright 2026 LiveKit, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//! Minimal keyframe detection for the five encoded (compressed) codecs supported by -//! [`EncodedTcpIngest`](super::encoded_tcp::EncodedTcpIngest). -//! -//! These probes are intentionally conservative — they never scan deeper into -//! a frame than needed to answer yes/no. Incorrect answers only affect -//! ingest-side stats and the `is_keyframe` flag forwarded to the -//! `NativeEncodedVideoSource`; WebRTC's own RTP packetizer recomputes what -//! it needs for keyframe signalling. - -use libwebrtc::video_source::VideoCodec; - -/// Returns the access-unit delimiter NAL type for Annex-B codecs. `None` -/// for codecs that are not delivered as Annex-B. -pub(super) fn aud_nal_type(codec: VideoCodec) -> Option { - match codec { - VideoCodec::H264 => Some(9), - VideoCodec::H265 => Some(35), - _ => None, - } -} - -/// Extracts the NAL unit type from the first byte after an Annex-B start -/// code. Returns 0 for codecs without NAL units. -pub(super) fn nal_type(codec: VideoCodec, first_byte: u8) -> u8 { - match codec { - VideoCodec::H264 => first_byte & 0x1F, - VideoCodec::H265 => (first_byte >> 1) & 0x3F, - _ => 0, - } -} - -/// Whether a given NAL type is a keyframe NAL. -/// -/// * H.264: IDR slice (NAL type 5) -/// * H.265: any IRAP (BLA/IDR/CRA, NAL types 16..=23) -/// * VP8/VP9/AV1: never — they do not use NAL units. -pub(super) fn is_keyframe_nal(codec: VideoCodec, nal_type: u8) -> bool { - match codec { - VideoCodec::H264 => nal_type == 5, - VideoCodec::H265 => (16..=23).contains(&nal_type), - _ => false, - } -} - -/// Top-level keyframe probe. Delegates to codec-specific helpers. -/// -/// * H.264 / H.265: scans for an IDR / IRAP NAL in the access unit. -/// * VP8: bit 0 of the frame tag (RFC 6386 §9.1: 0 = keyframe). -/// * VP9: decodes the leading bits of the uncompressed header (VP9 spec §6.2). -/// * AV1: scans OBUs in the Temporal Unit for an `OBU_SEQUENCE_HEADER` -/// (the same heuristic WebRTC's own AV1 RTP packetizer uses). -pub(super) fn is_keyframe(codec: VideoCodec, data: &[u8]) -> bool { - match codec { - VideoCodec::H264 | VideoCodec::H265 => is_keyframe_annex_b(codec, data), - VideoCodec::Vp8 => !data.is_empty() && (data[0] & 0x01) == 0, - VideoCodec::Vp9 => is_keyframe_vp9(data), - VideoCodec::Av1 => is_keyframe_av1(data), - } -} - -fn is_keyframe_annex_b(codec: VideoCodec, data: &[u8]) -> bool { - let mut i = 0usize; - while i + 3 < data.len() { - let is_four = i + 4 <= data.len() - && data[i] == 0 - && data[i + 1] == 0 - && data[i + 2] == 0 - && data[i + 3] == 1; - let is_three = data[i] == 0 && data[i + 1] == 0 && data[i + 2] == 1; - if is_four || is_three { - let payload_idx = if is_four { i + 4 } else { i + 3 }; - if payload_idx < data.len() - && is_keyframe_nal(codec, nal_type(codec, data[payload_idx])) - { - return true; - } - i = payload_idx + 1; - } else { - i += 1; - } - } - false -} - -/// AV1 keyframe probe. Walks OBUs in a Temporal Unit and returns true if -/// any OBU has type `OBU_SEQUENCE_HEADER` (1). AV1 spec §5.3.2 (OBU header) -/// + §5.3.1 (leb128): -/// -/// * byte 0 bits 6..=3: `obu_type`. -/// * byte 0 bit 2: `obu_extension_flag`; if set, one extension byte follows. -/// * byte 0 bit 1: `obu_has_size_field`; if set, a leb128-encoded `obu_size` -/// follows. If clear, the OBU runs to the end of the input and we cannot -/// skip it. -fn is_keyframe_av1(mut data: &[u8]) -> bool { - const OBU_SEQUENCE_HEADER: u8 = 1; - while !data.is_empty() { - let header = data[0]; - let obu_type = (header >> 3) & 0x0F; - let ext = (header & 0x04) != 0; - let has_size = (header & 0x02) != 0; - - let mut off = 1; - if ext { - if off >= data.len() { - return false; - } - off += 1; - } - if !has_size { - return obu_type == OBU_SEQUENCE_HEADER; - } - let (size, size_len) = match read_leb128(&data[off..]) { - Some(v) => v, - None => return false, - }; - off += size_len; - let payload_end = match off.checked_add(size as usize) { - Some(e) if e <= data.len() => e, - _ => return false, - }; - if obu_type == OBU_SEQUENCE_HEADER { - return true; - } - data = &data[payload_end..]; - } - false -} - -/// Decodes an AV1 leb128 (unsigned little-endian base-128) integer. -/// Returns `(value, bytes_consumed)` or `None` on truncated input. -/// AV1 spec §4.10.5 caps the encoding at 8 bytes and 32 significant bits. -fn read_leb128(input: &[u8]) -> Option<(u32, usize)> { - let mut value: u64 = 0; - for (i, &byte) in input.iter().take(8).enumerate() { - value |= ((byte & 0x7F) as u64) << (i * 7); - if (byte & 0x80) == 0 { - return u32::try_from(value).ok().map(|v| (v, i + 1)); - } - } - None -} - -/// VP9 uncompressed-header keyframe probe. Reads first-byte bits (MSB -/// first) per VP9 bitstream spec §6.2: -/// -/// * bits 7..=6: `frame_marker` (must be `0b10`). -/// * bit 5: `profile_low_bit`, bit 4: `profile_high_bit` -/// (combined `profile` ∈ 0..=3). -/// * For `profile == 3`: bit 3 is reserved-zero, bit 2 is -/// `show_existing_frame`, bit 1 is `frame_type`. -/// * For `profile != 3`: bit 3 is `show_existing_frame`, bit 2 is -/// `frame_type`. -/// -/// A keyframe has `show_existing_frame == 0` and `frame_type == 0`. -fn is_keyframe_vp9(data: &[u8]) -> bool { - let Some(&b0) = data.first() else { - return false; - }; - if (b0 >> 6) & 0b11 != 0b10 { - return false; - } - let profile_low = (b0 >> 5) & 0x1; - let profile_high = (b0 >> 4) & 0x1; - let profile = (profile_high << 1) | profile_low; - let (show_existing_bit, frame_type_bit) = if profile == 3 { (2, 1) } else { (3, 2) }; - let show_existing = (b0 >> show_existing_bit) & 0x1; - if show_existing != 0 { - return false; - } - let frame_type = (b0 >> frame_type_bit) & 0x1; - frame_type == 0 -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn h264_idr_is_keyframe() { - // 4-byte start code + NAL header for IDR (type 5, nal_ref_idc=3): 0x65 - let data = [0x00, 0x00, 0x00, 0x01, 0x65, 0x88, 0x84]; - assert!(is_keyframe(VideoCodec::H264, &data)); - } - - #[test] - fn h264_non_idr_not_keyframe() { - // NAL header for non-IDR slice (type 1): 0x41 - let data = [0x00, 0x00, 0x00, 0x01, 0x41, 0x9a]; - assert!(!is_keyframe(VideoCodec::H264, &data)); - } - - #[test] - fn h265_idr_w_radl_is_keyframe() { - // H.265 NAL type 19 (IDR_W_RADL). NAL header byte is (type << 1): 0x26 - let data = [0x00, 0x00, 0x00, 0x01, 0x26, 0x01]; - assert!(is_keyframe(VideoCodec::H265, &data)); - } - - #[test] - fn h265_vps_is_not_keyframe() { - // H.265 VPS NAL type 32 announces stream metadata but is not an IRAP picture. - let data = [0x00, 0x00, 0x01, 0x40, 0x01]; - assert!(!is_keyframe(VideoCodec::H265, &data)); - } - - #[test] - fn vp8_keyframe_bit_zero() { - let kf = [0x00_u8]; - let pf = [0x01_u8]; - assert!(is_keyframe(VideoCodec::Vp8, &kf)); - assert!(!is_keyframe(VideoCodec::Vp8, &pf)); - } - - #[test] - fn vp9_profile0_keyframe() { - // frame_marker=10, profile=0 (both bits 0), show_existing=0, frame_type=0 - // => top bits 10 00 0 0 .. = 0b1000_0000 = 0x80 - let data = [0x80_u8]; - assert!(is_keyframe(VideoCodec::Vp9, &data)); - } - - #[test] - fn vp9_profile0_interframe() { - // frame_type bit = bit 2 => 0b1000_0100 = 0x84 - let data = [0x84_u8]; - assert!(!is_keyframe(VideoCodec::Vp9, &data)); - } - - #[test] - fn vp9_profile3_uses_shifted_frame_type_bits() { - // frame_marker=10, profile=3, reserved=0, show_existing=0, frame_type=0 - // => 0b1011_0000 = 0xB0. - let keyframe = [0xB0_u8]; - // In profile 3, bit 1 is frame_type; bit 2 is show_existing_frame. - let interframe = [0xB2_u8]; - let show_existing = [0xB4_u8]; - - assert!(is_keyframe(VideoCodec::Vp9, &keyframe)); - assert!(!is_keyframe(VideoCodec::Vp9, &interframe)); - assert!(!is_keyframe(VideoCodec::Vp9, &show_existing)); - } - - #[test] - fn av1_sequence_header_obu_is_keyframe() { - // obu_type=1 (SEQUENCE_HEADER) => byte 0 = (1 << 3) | 0b010 = 0x0A - // (obu_has_size_field=1, no extension). obu_size leb128 = 0 (one byte). - let data = [0x0A, 0x00]; - assert!(is_keyframe(VideoCodec::Av1, &data)); - } - - #[test] - fn av1_tile_group_obu_not_keyframe() { - // obu_type=4 (TILE_GROUP), has_size=1. size=0. - let data = [0x22, 0x00]; - assert!(!is_keyframe(VideoCodec::Av1, &data)); - } - - #[test] - fn av1_sequence_header_after_non_key_obu_is_keyframe() { - // TILE_GROUP OBU with two bytes of payload, followed by a SEQUENCE_HEADER OBU. - let data = [0x22, 0x02, 0xAA, 0xBB, 0x0A, 0x00]; - assert!(is_keyframe(VideoCodec::Av1, &data)); - } - - #[test] - fn av1_truncated_sized_obu_is_not_keyframe() { - // TILE_GROUP with a continued leb128 size byte but no terminating byte. - let data = [0x22, 0x80]; - assert!(!is_keyframe(VideoCodec::Av1, &data)); - } - - #[test] - fn av1_leb128_single_byte() { - assert_eq!(read_leb128(&[0x00]), Some((0, 1))); - assert_eq!(read_leb128(&[0x7F]), Some((0x7F, 1))); - } - - #[test] - fn av1_leb128_multi_byte() { - // 128 => 0x80, 0x01 - assert_eq!(read_leb128(&[0x80, 0x01]), Some((128, 2))); - } -} diff --git a/livekit-encoded-video-ingest/src/lib.rs b/livekit-encoded-video-ingest/src/lib.rs deleted file mode 100644 index 54749c0c0..000000000 --- a/livekit-encoded-video-ingest/src/lib.rs +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright 2026 LiveKit, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//! High-level helpers for ingesting encoded (compressed) video into a LiveKit room. -//! -//! This module hides the moving parts of pulling an encoded (compressed) bytestream -//! from a source (currently: TCP) and turning it into a published -//! LiveKit track. Callers configure a small options struct and hand off a -//! `Room`; the helper does the rest. -//! -//! See [`EncodedTcpIngest`] for the TCP-based helper. - -#[cfg(not(target_arch = "wasm32"))] -mod demux; -#[cfg(not(target_arch = "wasm32"))] -mod encoded_tcp; -#[cfg(not(target_arch = "wasm32"))] -mod keyframe; - -#[cfg(not(target_arch = "wasm32"))] -pub use encoded_tcp::{ - EncodedIngestObserver, EncodedIngestStats, EncodedTcpIngest, EncodedTcpIngestOptions, -}; diff --git a/livekit-ffi-node-bindings/proto/ffi_pb.d.ts b/livekit-ffi-node-bindings/proto/ffi_pb.d.ts index 388ba03d5..54cc664ae 100644 --- a/livekit-ffi-node-bindings/proto/ffi_pb.d.ts +++ b/livekit-ffi-node-bindings/proto/ffi_pb.d.ts @@ -28,7 +28,6 @@ import type { PerformRpcCallback, PerformRpcRequest, PerformRpcResponse, Registe import type { EnableRemoteTrackPublicationRequest, EnableRemoteTrackPublicationResponse, SetRemoteTrackPublicationQualityRequest, SetRemoteTrackPublicationQualityResponse, UpdateRemoteTrackPublicationDimensionRequest, UpdateRemoteTrackPublicationDimensionResponse } from "./track_publication_pb.js"; import type { ByteStreamOpenCallback, ByteStreamOpenRequest, ByteStreamOpenResponse, ByteStreamReaderEvent, ByteStreamReaderReadAllCallback, ByteStreamReaderReadAllRequest, ByteStreamReaderReadAllResponse, ByteStreamReaderReadIncrementalRequest, ByteStreamReaderReadIncrementalResponse, ByteStreamReaderWriteToFileCallback, ByteStreamReaderWriteToFileRequest, ByteStreamReaderWriteToFileResponse, ByteStreamWriterCloseCallback, ByteStreamWriterCloseRequest, ByteStreamWriterCloseResponse, ByteStreamWriterWriteCallback, ByteStreamWriterWriteRequest, ByteStreamWriterWriteResponse, StreamSendBytesCallback, StreamSendBytesRequest, StreamSendBytesResponse, StreamSendFileCallback, StreamSendFileRequest, StreamSendFileResponse, StreamSendTextCallback, StreamSendTextRequest, StreamSendTextResponse, TextStreamOpenCallback, TextStreamOpenRequest, TextStreamOpenResponse, TextStreamReaderEvent, TextStreamReaderReadAllCallback, TextStreamReaderReadAllRequest, TextStreamReaderReadAllResponse, TextStreamReaderReadIncrementalRequest, TextStreamReaderReadIncrementalResponse, TextStreamWriterCloseCallback, TextStreamWriterCloseRequest, TextStreamWriterCloseResponse, TextStreamWriterWriteCallback, TextStreamWriterWriteRequest, TextStreamWriterWriteResponse } from "./data_stream_pb.js"; import type { DataTrackStreamEvent, DataTrackStreamReadRequest, DataTrackStreamReadResponse, LocalDataTrackIsPublishedRequest, LocalDataTrackIsPublishedResponse, LocalDataTrackTryPushRequest, LocalDataTrackTryPushResponse, LocalDataTrackUnpublishRequest, LocalDataTrackUnpublishResponse, PublishDataTrackCallback, PublishDataTrackRequest, PublishDataTrackResponse, RemoteDataTrackIsPublishedRequest, RemoteDataTrackIsPublishedResponse, SubscribeDataTrackRequest, SubscribeDataTrackResponse } from "./data_track_pb.js"; -import type { EncodedTcpIngestEvent, GetEncodedTcpIngestStatsRequest, GetEncodedTcpIngestStatsResponse, NewEncodedTcpIngestCallback, NewEncodedTcpIngestRequest, NewEncodedTcpIngestResponse, StopEncodedTcpIngestCallback, StopEncodedTcpIngestRequest, StopEncodedTcpIngestResponse } from "./encoded_tcp_ingest_pb.js"; /** * @generated from enum livekit.proto.LogLevel @@ -546,26 +545,6 @@ export declare class FfiRequest extends Message { */ value: CaptureEncodedVideoFrameRequest; case: "captureEncodedVideoFrame"; - } | { - /** - * Encoded TCP ingest (high-level helper) - * - * @generated from field: livekit.proto.NewEncodedTcpIngestRequest new_encoded_tcp_ingest = 77; - */ - value: NewEncodedTcpIngestRequest; - case: "newEncodedTcpIngest"; - } | { - /** - * @generated from field: livekit.proto.StopEncodedTcpIngestRequest stop_encoded_tcp_ingest = 78; - */ - value: StopEncodedTcpIngestRequest; - case: "stopEncodedTcpIngest"; - } | { - /** - * @generated from field: livekit.proto.GetEncodedTcpIngestStatsRequest get_encoded_tcp_ingest_stats = 79; - */ - value: GetEncodedTcpIngestStatsRequest; - case: "getEncodedTcpIngestStats"; } | { case: undefined; value?: undefined }; constructor(data?: PartialMessage); @@ -1062,26 +1041,6 @@ export declare class FfiResponse extends Message { */ value: CaptureEncodedVideoFrameResponse; case: "captureEncodedVideoFrame"; - } | { - /** - * Encoded TCP ingest (high-level helper) - * - * @generated from field: livekit.proto.NewEncodedTcpIngestResponse new_encoded_tcp_ingest = 76; - */ - value: NewEncodedTcpIngestResponse; - case: "newEncodedTcpIngest"; - } | { - /** - * @generated from field: livekit.proto.StopEncodedTcpIngestResponse stop_encoded_tcp_ingest = 77; - */ - value: StopEncodedTcpIngestResponse; - case: "stopEncodedTcpIngest"; - } | { - /** - * @generated from field: livekit.proto.GetEncodedTcpIngestStatsResponse get_encoded_tcp_ingest_stats = 78; - */ - value: GetEncodedTcpIngestStatsResponse; - case: "getEncodedTcpIngestStats"; } | { case: undefined; value?: undefined }; constructor(data?: PartialMessage); @@ -1378,26 +1337,6 @@ export declare class FfiEvent extends Message { */ value: EncodedVideoSourceEvent; case: "encodedVideoSourceEvent"; - } | { - /** - * Encoded TCP ingest (high-level helper) - * - * @generated from field: livekit.proto.EncodedTcpIngestEvent encoded_tcp_ingest_event = 45; - */ - value: EncodedTcpIngestEvent; - case: "encodedTcpIngestEvent"; - } | { - /** - * @generated from field: livekit.proto.NewEncodedTcpIngestCallback new_encoded_tcp_ingest = 46; - */ - value: NewEncodedTcpIngestCallback; - case: "newEncodedTcpIngest"; - } | { - /** - * @generated from field: livekit.proto.StopEncodedTcpIngestCallback stop_encoded_tcp_ingest = 47; - */ - value: StopEncodedTcpIngestCallback; - case: "stopEncodedTcpIngest"; } | { case: undefined; value?: undefined }; constructor(data?: PartialMessage); diff --git a/livekit-ffi-node-bindings/proto/ffi_pb.js b/livekit-ffi-node-bindings/proto/ffi_pb.js index 4d2071d91..90cd7ab89 100644 --- a/livekit-ffi-node-bindings/proto/ffi_pb.js +++ b/livekit-ffi-node-bindings/proto/ffi_pb.js @@ -30,7 +30,6 @@ const { PerformRpcCallback, PerformRpcRequest, PerformRpcResponse, RegisterRpcMe const { EnableRemoteTrackPublicationRequest, EnableRemoteTrackPublicationResponse, SetRemoteTrackPublicationQualityRequest, SetRemoteTrackPublicationQualityResponse, UpdateRemoteTrackPublicationDimensionRequest, UpdateRemoteTrackPublicationDimensionResponse } = require("./track_publication_pb.js"); const { ByteStreamOpenCallback, ByteStreamOpenRequest, ByteStreamOpenResponse, ByteStreamReaderEvent, ByteStreamReaderReadAllCallback, ByteStreamReaderReadAllRequest, ByteStreamReaderReadAllResponse, ByteStreamReaderReadIncrementalRequest, ByteStreamReaderReadIncrementalResponse, ByteStreamReaderWriteToFileCallback, ByteStreamReaderWriteToFileRequest, ByteStreamReaderWriteToFileResponse, ByteStreamWriterCloseCallback, ByteStreamWriterCloseRequest, ByteStreamWriterCloseResponse, ByteStreamWriterWriteCallback, ByteStreamWriterWriteRequest, ByteStreamWriterWriteResponse, StreamSendBytesCallback, StreamSendBytesRequest, StreamSendBytesResponse, StreamSendFileCallback, StreamSendFileRequest, StreamSendFileResponse, StreamSendTextCallback, StreamSendTextRequest, StreamSendTextResponse, TextStreamOpenCallback, TextStreamOpenRequest, TextStreamOpenResponse, TextStreamReaderEvent, TextStreamReaderReadAllCallback, TextStreamReaderReadAllRequest, TextStreamReaderReadAllResponse, TextStreamReaderReadIncrementalRequest, TextStreamReaderReadIncrementalResponse, TextStreamWriterCloseCallback, TextStreamWriterCloseRequest, TextStreamWriterCloseResponse, TextStreamWriterWriteCallback, TextStreamWriterWriteRequest, TextStreamWriterWriteResponse } = require("./data_stream_pb.js"); const { DataTrackStreamEvent, DataTrackStreamReadRequest, DataTrackStreamReadResponse, LocalDataTrackIsPublishedRequest, LocalDataTrackIsPublishedResponse, LocalDataTrackTryPushRequest, LocalDataTrackTryPushResponse, LocalDataTrackUnpublishRequest, LocalDataTrackUnpublishResponse, PublishDataTrackCallback, PublishDataTrackRequest, PublishDataTrackResponse, RemoteDataTrackIsPublishedRequest, RemoteDataTrackIsPublishedResponse, SubscribeDataTrackRequest, SubscribeDataTrackResponse } = require("./data_track_pb.js"); -const { EncodedTcpIngestEvent, GetEncodedTcpIngestStatsRequest, GetEncodedTcpIngestStatsResponse, NewEncodedTcpIngestCallback, NewEncodedTcpIngestRequest, NewEncodedTcpIngestResponse, StopEncodedTcpIngestCallback, StopEncodedTcpIngestRequest, StopEncodedTcpIngestResponse } = require("./encoded_tcp_ingest_pb.js"); /** * @generated from enum livekit.proto.LogLevel @@ -130,9 +129,6 @@ const FfiRequest = /*@__PURE__*/ proto2.makeMessageType( { no: 74, name: "remote_data_track_is_published", kind: "message", T: RemoteDataTrackIsPublishedRequest, oneof: "message" }, { no: 75, name: "data_track_stream_read", kind: "message", T: DataTrackStreamReadRequest, oneof: "message" }, { no: 76, name: "capture_encoded_video_frame", kind: "message", T: CaptureEncodedVideoFrameRequest, oneof: "message" }, - { no: 77, name: "new_encoded_tcp_ingest", kind: "message", T: NewEncodedTcpIngestRequest, oneof: "message" }, - { no: 78, name: "stop_encoded_tcp_ingest", kind: "message", T: StopEncodedTcpIngestRequest, oneof: "message" }, - { no: 79, name: "get_encoded_tcp_ingest_stats", kind: "message", T: GetEncodedTcpIngestStatsRequest, oneof: "message" }, ], ); @@ -218,9 +214,6 @@ const FfiResponse = /*@__PURE__*/ proto2.makeMessageType( { no: 73, name: "remote_data_track_is_published", kind: "message", T: RemoteDataTrackIsPublishedResponse, oneof: "message" }, { no: 74, name: "data_track_stream_read", kind: "message", T: DataTrackStreamReadResponse, oneof: "message" }, { no: 75, name: "capture_encoded_video_frame", kind: "message", T: CaptureEncodedVideoFrameResponse, oneof: "message" }, - { no: 76, name: "new_encoded_tcp_ingest", kind: "message", T: NewEncodedTcpIngestResponse, oneof: "message" }, - { no: 77, name: "stop_encoded_tcp_ingest", kind: "message", T: StopEncodedTcpIngestResponse, oneof: "message" }, - { no: 78, name: "get_encoded_tcp_ingest_stats", kind: "message", T: GetEncodedTcpIngestStatsResponse, oneof: "message" }, ], ); @@ -277,9 +270,6 @@ const FfiEvent = /*@__PURE__*/ proto2.makeMessageType( { no: 42, name: "publish_data_track", kind: "message", T: PublishDataTrackCallback, oneof: "message" }, { no: 43, name: "data_track_stream_event", kind: "message", T: DataTrackStreamEvent, oneof: "message" }, { no: 44, name: "encoded_video_source_event", kind: "message", T: EncodedVideoSourceEvent, oneof: "message" }, - { no: 45, name: "encoded_tcp_ingest_event", kind: "message", T: EncodedTcpIngestEvent, oneof: "message" }, - { no: 46, name: "new_encoded_tcp_ingest", kind: "message", T: NewEncodedTcpIngestCallback, oneof: "message" }, - { no: 47, name: "stop_encoded_tcp_ingest", kind: "message", T: StopEncodedTcpIngestCallback, oneof: "message" }, ], ); diff --git a/livekit-ffi/Cargo.toml b/livekit-ffi/Cargo.toml index c0552e06c..5d9851d8e 100644 --- a/livekit-ffi/Cargo.toml +++ b/livekit-ffi/Cargo.toml @@ -15,14 +15,13 @@ native-tls-vendored = ["livekit/native-tls-vendored"] rustls-tls-native-roots = ["livekit/rustls-tls-native-roots"] rustls-tls-webpki-roots = ["livekit/rustls-tls-webpki-roots"] __rustls-tls = ["livekit/__rustls-tls"] -encoded-video = ["livekit/encoded-video", "dep:livekit-encoded-video-ingest"] +encoded-video = ["livekit/encoded-video"] # Enable tokio-console to debug tasks tracing = ["tokio/tracing", "console-subscriber"] [dependencies] livekit = { workspace = true } -livekit-encoded-video-ingest = { workspace = true, optional = true } webrtc-sys = { workspace = true } soxr-sys = { workspace = true } imgproc = { workspace = true } diff --git a/livekit-ffi/protocol/encoded_tcp_ingest.proto b/livekit-ffi/protocol/encoded_tcp_ingest.proto deleted file mode 100644 index e2f461193..000000000 --- a/livekit-ffi/protocol/encoded_tcp_ingest.proto +++ /dev/null @@ -1,135 +0,0 @@ -// Copyright 2026 LiveKit, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto2"; - -package livekit.proto; -option csharp_namespace = "LiveKit.Proto"; - -import "handle.proto"; -import "track.proto"; -import "video_frame.proto"; - -// High-level helper that ingests an encoded (compressed) video bytestream over TCP -// and publishes it to a LiveKit room. Wraps the Rust -// `livekit_encoded_video_ingest::EncodedTcpIngest` type. - -message EncodedTcpIngestInfo { - // Sid of the published track. Always set once the creation callback - // fires with a success message. - required string track_sid = 1; - // Name of the published track. - required string track_name = 2; -} - -message OwnedEncodedTcpIngest { - required FfiOwnedHandle handle = 1; - required EncodedTcpIngestInfo info = 2; -} - -// Create an `EncodedTcpIngest` and publish its track on the given room. -// The ingest runs until `StopEncodedTcpIngestRequest` is called. -message NewEncodedTcpIngestRequest { - required uint64 room_handle = 1; - required string host = 2; - required uint32 port = 3; - required VideoCodec codec = 4; - required uint32 width = 5; - required uint32 height = 6; - optional string track_name = 7; - optional TrackSource track_source = 8; - // Forwarded to TrackPublishOptions.video_encoding.max_bitrate when set. - optional uint64 max_bitrate_bps = 9; - // Forwarded alongside `max_bitrate_bps`. Defaults to 30 fps. - optional double max_framerate_fps = 10; - // Backoff between reconnect attempts (ms). Default: 1000. - optional uint32 reconnect_backoff_ms = 11; - // When true, stopping the ingest unpublishes the track. Default: true. - optional bool unpublish_on_stop = 12; - optional uint64 request_async_id = 13; -} - -message NewEncodedTcpIngestResponse { - required uint64 async_id = 1; -} - -message NewEncodedTcpIngestCallback { - required uint64 async_id = 1; - oneof message { - string error = 2; - OwnedEncodedTcpIngest ingest = 3; - } -} - -// Stop the ingest loop. If `unpublish_on_stop` was set (the default on -// creation), the track is unpublished as part of this call. -message StopEncodedTcpIngestRequest { - required uint64 ingest_handle = 1; - optional uint64 request_async_id = 2; -} - -message StopEncodedTcpIngestResponse { - required uint64 async_id = 1; -} - -message StopEncodedTcpIngestCallback { - required uint64 async_id = 1; - optional string error = 2; -} - -// Snapshot of cumulative stats since the ingest was created. -message EncodedTcpIngestStats { - required uint64 frames_accepted = 1; - required uint64 frames_dropped = 2; - required uint64 keyframes = 3; - required uint64 tcp_reconnects = 4; -} - -message GetEncodedTcpIngestStatsRequest { - required uint64 ingest_handle = 1; -} - -message GetEncodedTcpIngestStatsResponse { - required EncodedTcpIngestStats stats = 1; -} - -// Ingest-level events dispatched as FfiEvents. -message EncodedTcpIngestEvent { - required uint64 ingest_handle = 1; - oneof message { - Connected connected = 2; - Disconnected disconnected = 3; - KeyframeRequested keyframe_requested = 4; - TargetBitrateChanged target_bitrate_changed = 5; - } - - message Connected { - // Peer socket address the TCP connection landed on, e.g. "127.0.0.1:5000". - required string peer = 1; - } - - message Disconnected { - // Human-readable reason for the disconnect. The ingest loop will - // reconnect automatically after the configured backoff unless the - // ingest is being stopped. - required string reason = 1; - } - - message KeyframeRequested {} - - message TargetBitrateChanged { - required uint32 bitrate_bps = 1; - required double framerate_fps = 2; - } -} diff --git a/livekit-ffi/protocol/ffi.proto b/livekit-ffi/protocol/ffi.proto index 99698da5c..4b1377b5e 100644 --- a/livekit-ffi/protocol/ffi.proto +++ b/livekit-ffi/protocol/ffi.proto @@ -27,7 +27,6 @@ import "audio_frame.proto"; import "rpc.proto"; import "data_stream.proto"; import "data_track.proto"; -import "encoded_tcp_ingest.proto"; // **How is the livekit-ffi working: // We refer as the ffi server the Rust server that is running the LiveKit client implementation, and we @@ -168,12 +167,7 @@ message FfiRequest { // Encoded video CaptureEncodedVideoFrameRequest capture_encoded_video_frame = 76; - // Encoded TCP ingest (high-level helper) - NewEncodedTcpIngestRequest new_encoded_tcp_ingest = 77; - StopEncodedTcpIngestRequest stop_encoded_tcp_ingest = 78; - GetEncodedTcpIngestStatsRequest get_encoded_tcp_ingest_stats = 79; - - // NEXT_ID: 80 + // NEXT_ID: 77 } } @@ -286,12 +280,7 @@ message FfiResponse { // Encoded video CaptureEncodedVideoFrameResponse capture_encoded_video_frame = 75; - // Encoded TCP ingest (high-level helper) - NewEncodedTcpIngestResponse new_encoded_tcp_ingest = 76; - StopEncodedTcpIngestResponse stop_encoded_tcp_ingest = 77; - GetEncodedTcpIngestStatsResponse get_encoded_tcp_ingest_stats = 78; - - // NEXT_ID: 79 + // NEXT_ID: 76 } } @@ -357,12 +346,7 @@ message FfiEvent { // Encoded video EncodedVideoSourceEvent encoded_video_source_event = 44; - // Encoded TCP ingest (high-level helper) - EncodedTcpIngestEvent encoded_tcp_ingest_event = 45; - NewEncodedTcpIngestCallback new_encoded_tcp_ingest = 46; - StopEncodedTcpIngestCallback stop_encoded_tcp_ingest = 47; - - // NEXT_ID: 48 + // NEXT_ID: 45 } } diff --git a/livekit-ffi/src/server/encoded_tcp_ingest.rs b/livekit-ffi/src/server/encoded_tcp_ingest.rs deleted file mode 100644 index 729a7997b..000000000 --- a/livekit-ffi/src/server/encoded_tcp_ingest.rs +++ /dev/null @@ -1,306 +0,0 @@ -// Copyright 2026 LiveKit, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//! FFI wrapper for the high-level `EncodedTcpIngest` helper. -//! -//! The Rust `EncodedTcpIngest` owns the publish + TCP loop; this layer -//! simply: -//! -//! 1. Converts protobuf options to the Rust options. -//! 2. Calls `EncodedTcpIngest::start` from the FFI async runtime. -//! 3. Stores the resulting handle and surfaces ingest-level callbacks -//! through an [`IngestObserverBridge`] so C++ / Python / Swift clients -//! see them as [`proto::EncodedTcpIngestEvent`]. - -#[cfg(feature = "encoded-video")] -use std::{net::SocketAddr, sync::Arc, time::Duration}; - -#[cfg(feature = "encoded-video")] -use livekit::prelude::*; -#[cfg(feature = "encoded-video")] -use livekit_encoded_video_ingest::{ - EncodedIngestObserver, EncodedTcpIngest, EncodedTcpIngestOptions, -}; - -#[cfg(feature = "encoded-video")] -use super::{room::FfiRoom, FfiHandle}; -#[cfg(feature = "encoded-video")] -use crate::FfiHandleId; -use crate::{proto, server, FfiError, FfiResult}; - -/// Server-side owner of an [`EncodedTcpIngest`]. Stored behind an -/// [`FfiHandleId`] in the FFI handle table. -#[cfg(feature = "encoded-video")] -pub struct FfiEncodedTcpIngest { - pub handle_id: FfiHandleId, - pub ingest: Arc>>, -} - -#[cfg(feature = "encoded-video")] -impl FfiHandle for FfiEncodedTcpIngest {} - -/// Kicks off an async `EncodedTcpIngest::start` and returns the async id -/// immediately. The result (or error) is dispatched as -/// [`proto::NewEncodedTcpIngestCallback`]. -#[cfg(feature = "encoded-video")] -pub fn create( - server: &'static server::FfiServer, - req: proto::NewEncodedTcpIngestRequest, -) -> FfiResult { - let async_id = server.resolve_async_id(req.request_async_id); - let ffi_room = server.retrieve_handle::(req.room_handle)?.clone(); - - let options = match options_from_proto(&req) { - Ok(opts) => opts, - Err(e) => { - let _ = server.send_event( - proto::NewEncodedTcpIngestCallback { - async_id, - message: Some(proto::new_encoded_tcp_ingest_callback::Message::Error( - e.to_string(), - )), - } - .into(), - ); - return Ok(proto::NewEncodedTcpIngestResponse { async_id }); - } - }; - - let handle = server.async_runtime.spawn(async move { - let participant = ffi_room.inner.room.local_participant(); - match EncodedTcpIngest::start(participant, options).await { - Ok(ingest) => { - let handle_id = server.next_id(); - let track_sid = ingest.track_sid(); - let track_name = ingest.track().name(); - ingest.set_observer(Arc::new(IngestObserverBridge { - server, - ingest_handle: handle_id, - })); - - let info = - proto::EncodedTcpIngestInfo { track_sid: track_sid.to_string(), track_name }; - - let ffi_ingest = FfiEncodedTcpIngest { - handle_id, - ingest: Arc::new(tokio::sync::Mutex::new(Some(ingest))), - }; - server.store_handle(handle_id, ffi_ingest); - - let _ = server.send_event( - proto::NewEncodedTcpIngestCallback { - async_id, - message: Some(proto::new_encoded_tcp_ingest_callback::Message::Ingest( - proto::OwnedEncodedTcpIngest { - handle: proto::FfiOwnedHandle { id: handle_id }, - info, - }, - )), - } - .into(), - ); - - ffi_room.inner.mark_local_publish_callback_sent(track_sid); - } - Err(err) => { - let _ = server.send_event( - proto::NewEncodedTcpIngestCallback { - async_id, - message: Some(proto::new_encoded_tcp_ingest_callback::Message::Error( - err.to_string(), - )), - } - .into(), - ); - } - } - }); - server.watch_panic(handle); - - Ok(proto::NewEncodedTcpIngestResponse { async_id }) -} - -#[cfg(not(feature = "encoded-video"))] -pub fn create( - _server: &'static server::FfiServer, - _req: proto::NewEncodedTcpIngestRequest, -) -> FfiResult { - feature_disabled_error() -} - -/// Stops a running ingest. Async because `EncodedTcpIngest::stop` awaits -/// the background task and optionally unpublishes the track. -#[cfg(feature = "encoded-video")] -pub fn stop( - server: &'static server::FfiServer, - req: proto::StopEncodedTcpIngestRequest, -) -> FfiResult { - let async_id = server.resolve_async_id(req.request_async_id); - let ingest_handle = req.ingest_handle; - - let ingest_slot = { - let ffi_ingest = server.retrieve_handle::(ingest_handle)?; - ffi_ingest.ingest.clone() - }; - - let handle = server.async_runtime.spawn(async move { - let taken = { ingest_slot.lock().await.take() }; - let error = match taken { - Some(ingest) => { - ingest.stop().await; - None - } - None => Some("EncodedTcpIngest: already stopped".to_string()), - }; - let _ = server.send_event(proto::StopEncodedTcpIngestCallback { async_id, error }.into()); - }); - server.watch_panic(handle); - - Ok(proto::StopEncodedTcpIngestResponse { async_id }) -} - -#[cfg(not(feature = "encoded-video"))] -pub fn stop( - _server: &'static server::FfiServer, - _req: proto::StopEncodedTcpIngestRequest, -) -> FfiResult { - feature_disabled_error() -} - -/// Pulls a stats snapshot synchronously. -#[cfg(feature = "encoded-video")] -pub fn get_stats( - server: &'static server::FfiServer, - req: proto::GetEncodedTcpIngestStatsRequest, -) -> FfiResult { - let ffi_ingest = server.retrieve_handle::(req.ingest_handle)?; - let guard = ffi_ingest.ingest.try_lock().map_err(|_| { - FfiError::InvalidRequest("EncodedTcpIngest is busy (stop in progress?)".into()) - })?; - let Some(ingest) = guard.as_ref() else { - return Err(FfiError::InvalidRequest("EncodedTcpIngest is stopped".into())); - }; - let stats = ingest.stats(); - Ok(proto::GetEncodedTcpIngestStatsResponse { - stats: proto::EncodedTcpIngestStats { - frames_accepted: stats.frames_accepted, - frames_dropped: stats.frames_dropped, - keyframes: stats.keyframes, - tcp_reconnects: stats.tcp_reconnects, - }, - }) -} - -#[cfg(not(feature = "encoded-video"))] -pub fn get_stats( - _server: &'static server::FfiServer, - _req: proto::GetEncodedTcpIngestStatsRequest, -) -> FfiResult { - feature_disabled_error() -} - -#[cfg(not(feature = "encoded-video"))] -fn feature_disabled_error() -> FfiResult { - Err(FfiError::InvalidRequest("Encoded video ingest support is not enabled".into())) -} - -#[cfg(feature = "encoded-video")] -fn options_from_proto( - req: &proto::NewEncodedTcpIngestRequest, -) -> FfiResult { - let port = u16::try_from(req.port) - .map_err(|_| FfiError::InvalidRequest("port must fit in u16".into()))?; - let codec = video_codec_from_proto(req.codec()); - let track_source = req - .track_source - .and_then(|s| proto::TrackSource::try_from(s).ok()) - .map(TrackSource::from) - .unwrap_or(TrackSource::Camera); - - let mut opts = EncodedTcpIngestOptions::new(port, codec, req.width, req.height); - opts.host = req.host.clone(); - opts.track_name = req.track_name.clone(); - opts.track_source = track_source; - opts.max_bitrate_bps = req.max_bitrate_bps; - if let Some(fps) = req.max_framerate_fps { - opts.max_framerate_fps = fps; - } - if let Some(ms) = req.reconnect_backoff_ms { - opts.reconnect_backoff = Duration::from_millis(ms as u64); - } - if let Some(unpublish) = req.unpublish_on_stop { - opts.unpublish_on_stop = unpublish; - } - Ok(opts) -} - -#[cfg(feature = "encoded-video")] -fn video_codec_from_proto(codec: proto::VideoCodec) -> livekit::webrtc::video_source::VideoCodec { - use livekit::webrtc::video_source::VideoCodec; - match codec { - proto::VideoCodec::H264 => VideoCodec::H264, - proto::VideoCodec::H265 => VideoCodec::H265, - proto::VideoCodec::Vp8 => VideoCodec::Vp8, - proto::VideoCodec::Vp9 => VideoCodec::Vp9, - proto::VideoCodec::Av1 => VideoCodec::Av1, - } -} - -/// Forwards ingest-level callbacks out to the FFI client as -/// [`proto::EncodedTcpIngestEvent`]s. -#[cfg(feature = "encoded-video")] -struct IngestObserverBridge { - server: &'static server::FfiServer, - ingest_handle: FfiHandleId, -} - -#[cfg(feature = "encoded-video")] -impl IngestObserverBridge { - fn emit(&self, message: proto::encoded_tcp_ingest_event::Message) { - let _ = self.server.send_event( - proto::EncodedTcpIngestEvent { - ingest_handle: self.ingest_handle, - message: Some(message), - } - .into(), - ); - } -} - -#[cfg(feature = "encoded-video")] -impl EncodedIngestObserver for IngestObserverBridge { - fn on_connected(&self, peer: SocketAddr) { - self.emit(proto::encoded_tcp_ingest_event::Message::Connected( - proto::encoded_tcp_ingest_event::Connected { peer: peer.to_string() }, - )); - } - - fn on_disconnected(&self, reason: &str) { - self.emit(proto::encoded_tcp_ingest_event::Message::Disconnected( - proto::encoded_tcp_ingest_event::Disconnected { reason: reason.to_string() }, - )); - } - - fn on_keyframe_requested(&self) { - self.emit(proto::encoded_tcp_ingest_event::Message::KeyframeRequested( - proto::encoded_tcp_ingest_event::KeyframeRequested {}, - )); - } - - fn on_target_bitrate(&self, bitrate_bps: u32, framerate_fps: f64) { - self.emit(proto::encoded_tcp_ingest_event::Message::TargetBitrateChanged( - proto::encoded_tcp_ingest_event::TargetBitrateChanged { bitrate_bps, framerate_fps }, - )); - } -} diff --git a/livekit-ffi/src/server/mod.rs b/livekit-ffi/src/server/mod.rs index 9bce5d112..0596b6939 100644 --- a/livekit-ffi/src/server/mod.rs +++ b/livekit-ffi/src/server/mod.rs @@ -39,7 +39,6 @@ pub mod audio_stream; pub mod colorcvt; pub mod data_stream; pub mod data_track; -pub mod encoded_tcp_ingest; pub mod logger; pub mod participant; pub mod requests; diff --git a/livekit-ffi/src/server/requests.rs b/livekit-ffi/src/server/requests.rs index 019a59fea..5566f2681 100644 --- a/livekit-ffi/src/server/requests.rs +++ b/livekit-ffi/src/server/requests.rs @@ -24,8 +24,7 @@ use livekit::{ use parking_lot::Mutex; use super::{ - audio_source, audio_stream, colorcvt, data_stream, data_track, encoded_tcp_ingest, - participant::FfiParticipant, + audio_source, audio_stream, colorcvt, data_stream, data_track, participant::FfiParticipant, resampler, room::{self, FfiPublication, FfiTrack}, video_source, video_stream, FfiError, FfiResult, FfiServer, @@ -1379,12 +1378,6 @@ pub fn handle_request( on_remote_data_track_is_published(server, req)?.into() } Request::DataTrackStreamRead(req) => on_data_track_stream_read(server, req)?.into(), - - Request::NewEncodedTcpIngest(req) => encoded_tcp_ingest::create(server, req)?.into(), - Request::StopEncodedTcpIngest(req) => encoded_tcp_ingest::stop(server, req)?.into(), - Request::GetEncodedTcpIngestStats(req) => { - encoded_tcp_ingest::get_stats(server, req)?.into() - } }); Ok(res)