Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -172,4 +172,7 @@ config.backup.yaml
runtime/
dev/
installer_files/
logs/

# Streamlit runtime logs from OneKeyStart.bat
logs/
videolingo_*.log
6 changes: 5 additions & 1 deletion .streamlit/config.toml
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
[server]
maxUploadSize = 4096
maxUploadSize = 4096
fileWatcherType = "none"

[client]
toolbarMode = "viewer"
90 changes: 82 additions & 8 deletions OneKeyStart.bat
Original file line number Diff line number Diff line change
@@ -1,11 +1,85 @@
@echo off
chcp 65001 >nul 2>&1
call conda activate videolingo 2>nul
set PYTHONWARNINGS=ignore
python "%~dp0launch.py"
if %errorlevel% neq 0 (
echo.
echo Pre-flight checks or Streamlit failed. See logs\ for details.
echo.
setlocal EnableExtensions
cd /D "%~dp0"

for /F "tokens=1,2 delims=#" %%A in ('"prompt #$H#$E# & echo on & for %%B in (1) do rem"') do set "ESC=%%B"
set "C_RESET=%ESC%[0m"
set "C_GREEN=%ESC%[32m"
set "C_YELLOW=%ESC%[33m"
set "C_RED=%ESC%[31m"
set "C_CYAN=%ESC%[36m"
set "C_BOLD=%ESC%[1m"

if not exist "logs" mkdir "logs"
for /f "tokens=2 delims==" %%I in ('wmic os get localdatetime /value') do set dt=%%I
set "LOGFILE=logs\videolingo_%dt:~0,8%_%dt:~8,6%.log"
set "CHECK_ONLY="
if /I "%~1"=="--check-only" set "CHECK_ONLY=1"

echo [%date% %time%] VideoLingo starting... > "%LOGFILE%"
echo %C_CYAN%Log file:%C_RESET% %LOGFILE%

set "VENV_LABEL="
set "VENV_PY="

set "SHARED_VENV=%USERPROFILE%\.venvs\videolingo"
if exist "%SHARED_VENV%\Scripts\python.exe" (
set "VENV_LABEL=shared venv"
set "VENV_PY=%SHARED_VENV%\Scripts\python.exe"
goto venv_found
)

if exist ".venv\Scripts\python.exe" (
set "VENV_LABEL=project .venv"
set "VENV_PY=.venv\Scripts\python.exe"
goto venv_found
)

where conda >nul 2>nul
if %errorlevel%==0 (
echo %C_YELLOW%No uv venv found, falling back to Conda env "videolingo"...%C_RESET%
call conda activate videolingo
python installer.py --check --quiet
if errorlevel 1 (
echo %C_YELLOW%Conda env is incomplete or outdated. Repairing...%C_RESET%
python installer.py --yes
if errorlevel 1 goto install_failed
)
if defined CHECK_ONLY (
echo %C_GREEN%Environment check passed. --check-only set, not starting Streamlit.%C_RESET%
goto end
)
echo %C_GREEN%Starting VideoLingo with Conda...%C_RESET%
python -m streamlit run st.py 2>&1 | powershell -NoProfile -Command "$input | Tee-Object -FilePath '%LOGFILE%' -Append"
goto end
)

echo %C_RED%ERROR: No usable VideoLingo environment found.%C_RESET%
echo Run one of these first:
echo python setup_env.py --shared
echo python setup_env.py
goto end

:venv_found
echo %C_GREEN%Detected %VENV_LABEL%:%C_RESET% %VENV_PY%
"%VENV_PY%" installer.py --check --quiet
if errorlevel 1 (
echo %C_YELLOW%Environment is incomplete or outdated. Repairing with installer.py...%C_RESET%
"%VENV_PY%" installer.py --yes
if errorlevel 1 goto install_failed
)

if defined CHECK_ONLY (
echo %C_GREEN%Environment check passed. --check-only set, not starting Streamlit.%C_RESET%
goto end
)

echo %C_GREEN%Starting VideoLingo with %VENV_LABEL%...%C_RESET%
"%VENV_PY%" -m streamlit run st.py 2>&1 | powershell -NoProfile -Command "$input | Tee-Object -FilePath '%LOGFILE%' -Append"
goto end

:install_failed
echo %C_RED%Install/repair failed. Check the messages above and the log file.%C_RESET%

:end
pause
19 changes: 0 additions & 19 deletions OneKeyStart_uv.bat

This file was deleted.

10 changes: 5 additions & 5 deletions config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# * Settings marked with * are advanced settings that won't appear in the Streamlit page and can only be modified manually in config.py
# recommend to set in streamlit page
# -------------------
# version: "3.0.0"
# version: "3.0.3"
# author: "Huanshere"
# -------------------

Expand All @@ -11,9 +11,9 @@ display_language: "zh-CN"

# API settings
api:
key: 'your-api-key'
key: 'YOUR_API_KEY'
base_url: 'https://yunwu.ai'
model: ''
model: 'gpt-5.5'
llm_support_json: false
# *Number of LLM multi-threaded accesses, set to 1 if using local LLM
max_workers: 4
Expand All @@ -22,7 +22,7 @@ max_workers: 4
target_language: '简体中文'

# Whether to use Demucs for vocal separation before transcription
demucs: true
demucs: false

whisper:
# ["large-v3", "large-v3-turbo"]. Note: for zh model will force to use Belle/large-v3
Expand All @@ -38,7 +38,7 @@ whisper:
elevenlabs_api_key: 'your_elevenlabs_api_key'

# Whether to burn subtitles into the video
burn_subtitles: true
burn_subtitles: false

## ======================== Advanced Settings ======================== ##
# *🔬 h264_nvenc GPU acceleration for ffmpeg, make sure your GPU supports it
Expand Down
24 changes: 16 additions & 8 deletions core/_10_gen_audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ def generate_tts_audio(tasks_df: pd.DataFrame) -> pd.DataFrame:
warmup_size = min(WARMUP_SIZE, len(tasks_df))
for _, row in tasks_df.head(warmup_size).iterrows():
try:
check_cancel()
number, real_dur = process_row(row, tasks_df)
tasks_df.loc[tasks_df['number'] == number, 'real_dur'] = real_dur
progress.advance(task)
Expand All @@ -103,14 +104,20 @@ def generate_tts_audio(tasks_df: pd.DataFrame) -> pd.DataFrame:
for _, row in remaining_tasks.iterrows()
]

for future in as_completed(futures):
try:
number, real_dur = future.result()
tasks_df.loc[tasks_df['number'] == number, 'real_dur'] = real_dur
progress.advance(task)
except Exception as e:
rprint(f"[red]❌ Error: {str(e)}[/red]")
raise e
try:
for future in as_completed(futures):
check_cancel()
try:
number, real_dur = future.result()
tasks_df.loc[tasks_df['number'] == number, 'real_dur'] = real_dur
progress.advance(task)
except Exception as e:
rprint(f"[red]❌ Error: {str(e)}[/red]")
raise e
except BaseException:
for f in futures:
f.cancel()
raise

rprint("[bold green]✨ TTS audio generation completed![/bold green]")
return tasks_df
Expand Down Expand Up @@ -149,6 +156,7 @@ def merge_chunks(tasks_df: pd.DataFrame) -> pd.DataFrame:

for index, row in tasks_df.iterrows():
if row['cut_off'] == 1:
check_cancel()
chunk_df = tasks_df.iloc[chunk_start:index+1].reset_index(drop=True)
speed_factor, keep_gaps = process_chunk(chunk_df, accept, min_speed)

Expand Down
1 change: 1 addition & 0 deletions core/_11_merge_audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def merge_audio_segments(audios, new_sub_times, sample_rate):
merge_task = progress.add_task("🎵 Merging audio segments...", total=len(audios))

for i, (audio_file, time_range) in enumerate(zip(audios, new_sub_times)):
check_cancel()
if not os.path.exists(audio_file):
console.print(f"[bold yellow]⚠️ Warning: File {audio_file} does not exist, skipping...[/bold yellow]")
progress.advance(merge_task)
Expand Down
5 changes: 5 additions & 0 deletions core/_12_dub_to_vid.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@

def merge_video_audio():
"""Merge video and audio, and reduce video volume"""
from core._1_ytdlp import is_audio_only_input
if is_audio_only_input():
rprint("[bold green]🎵 Audio-only input: skipping dubbing video merge. Dubbed audio is in the `output` directory.[/bold green]")
return

VIDEO_FILE = find_video_files()
background_file = _BACKGROUND_AUDIO_FILE

Expand Down
72 changes: 72 additions & 0 deletions core/_1_ytdlp.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
import os,sys
import glob
import json
import re
import subprocess
from core.utils import *

OUTPUT_DIR = "output"
INPUT_MANIFEST = "input_manifest.json"
GENERATED_AUDIO_NAMES = {"dub.mp3", "normalized_dub.wav"}

def sanitize_filename(filename):
# Remove or replace illegal characters
filename = re.sub(r'[<>:"/\\|?*]', '', filename)
Expand Down Expand Up @@ -51,6 +56,27 @@ def download_video_ytdlp(url, save_path='output', resolution='1080'):
new_filename = sanitize_filename(filename)
if new_filename != filename:
os.rename(os.path.join(save_path, file), os.path.join(save_path, new_filename + ext))
media_file = find_video_files(save_path)
write_input_manifest(media_file, "video", save_path)

def write_input_manifest(media_file: str, media_type: str, save_path='output'):
os.makedirs(save_path, exist_ok=True)
manifest_path = os.path.join(save_path, INPUT_MANIFEST)
media_path = media_file.replace("\\", "/") if sys.platform.startswith('win') else media_file
with open(manifest_path, "w", encoding="utf-8") as f:
json.dump({"path": media_path, "type": media_type}, f, ensure_ascii=False, indent=2)

def _read_input_manifest(save_path='output'):
manifest_path = os.path.join(save_path, INPUT_MANIFEST)
if not os.path.exists(manifest_path):
return None
with open(manifest_path, "r", encoding="utf-8") as f:
data = json.load(f)
media_file = data.get("path")
media_type = data.get("type")
if media_type not in {"video", "audio"} or not media_file or not os.path.exists(media_file):
return None
return media_file.replace("\\", "/") if sys.platform.startswith('win') else media_file, media_type

def find_video_files(save_path='output'):
video_files = [file for file in glob.glob(save_path + "/*") if os.path.splitext(file)[1][1:].lower() in load_key("allowed_video_formats")]
Expand All @@ -62,6 +88,52 @@ def find_video_files(save_path='output'):
raise ValueError(f"Number of videos found {len(video_files)} is not unique. Please check.")
return video_files[0]

def find_audio_files(save_path='output'):
audio_files = [file for file in glob.glob(save_path + "/*") if os.path.splitext(file)[1][1:].lower() in load_key("allowed_audio_formats")]
if sys.platform.startswith('win'):
audio_files = [file.replace("\\", "/") for file in audio_files]
audio_files = [file for file in audio_files if os.path.basename(file) not in GENERATED_AUDIO_NAMES]
if len(audio_files) != 1:
raise ValueError(f"Number of audio files found {len(audio_files)} is not unique. Please check.")
return audio_files[0]

def _safe_find_video_file(save_path='output'):
try:
return find_video_files(save_path)
except ValueError as e:
if "found 0" in str(e):
return None
raise

def _safe_find_audio_file(save_path='output'):
try:
return find_audio_files(save_path)
except ValueError as e:
if "found 0" in str(e):
return None
raise

def find_media_file(save_path='output'):
manifest = _read_input_manifest(save_path)
if manifest:
return manifest
video_file = _safe_find_video_file(save_path)
if video_file:
return video_file, "video"
audio_file = _safe_find_audio_file(save_path)
if audio_file:
return audio_file, "audio"
raise ValueError("No media file found. Please download or upload a media file first.")

def is_audio_only_input(save_path='output'):
# True when the input is a standalone audio file (no video present).
# In this case VideoLingo only produces subtitle files; no video output.
try:
_, media_type = find_media_file(save_path)
return media_type == "audio"
except Exception:
return False

if __name__ == '__main__':
# Example usage
url = input('Please enter the URL of the video you want to download: ')
Expand Down
16 changes: 10 additions & 6 deletions core/_2_asr.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
from core.utils import *
from core.asr_backend.demucs_vl import demucs_audio
from core.asr_backend.audio_preprocess import process_transcription, convert_video_to_audio, split_audio, save_results, normalize_audio_volume
from core._1_ytdlp import find_video_files
from core.asr_backend.audio_preprocess import process_transcription, convert_video_to_audio, prepare_audio_for_asr, split_audio, save_results, normalize_audio_volume
from core._1_ytdlp import find_media_file
from core.utils.models import *

@check_file_exists(_2_CLEANED_CHUNKS)
def transcribe():
# 1. video to audio
video_file = find_video_files()
convert_video_to_audio(video_file)
# 1. prepare audio
media_file, media_type = find_media_file()
if media_type == "video":
convert_video_to_audio(media_file)
else:
prepare_audio_for_asr(media_file)

# 2. Demucs vocal separation:
if load_key("demucs"):
Expand All @@ -34,6 +37,7 @@ def transcribe():
rprint("[cyan]🎤 Transcribing audio with ElevenLabs API...[/cyan]")

for start, end in segments:
check_cancel()
result = ts(_RAW_AUDIO_FILE, vocal_audio, start, end)
all_results.append(result)

Expand All @@ -47,4 +51,4 @@ def transcribe():
save_results(df)

if __name__ == "__main__":
transcribe()
transcribe()
12 changes: 9 additions & 3 deletions core/_4_2_translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,15 @@ def translate_all():
future = executor.submit(translate_chunk, chunk, chunks, theme_prompt, i)
futures.append(future)
results = []
for future in concurrent.futures.as_completed(futures):
results.append(future.result())
progress.update(task, advance=1)
try:
for future in concurrent.futures.as_completed(futures):
check_cancel()
results.append(future.result())
progress.update(task, advance=1)
except BaseException:
for f in futures:
f.cancel()
raise

results.sort(key=lambda x: x[0]) # Sort results based on original order

Expand Down
Loading