From 69aa9f700d3b7639858ca223b42b34ea1453a89b Mon Sep 17 00:00:00 2001
From: Yash Desai <yashdesai@MacBookPro.bbrouter>
Date: Fri, 3 Apr 2026 01:03:38 +0530
Subject: [PATCH] perf: lazy-import timing module to avoid loading scipy/numba
 when unused

Move `from .timing import add_word_timestamps` from module-level to
inside the `if word_timestamps:` guard in transcribe().

timing.py imports scipy and numba at module level for word-level
timestamp alignment (DTW + median filter). These are heavy dependencies
(~212 MB combined) that load ~620 Python modules on import. When
word_timestamps=False (the default), none of this code is ever called,
yet it all gets loaded eagerly.

This change makes scipy and numba truly optional for the common case
of transcription without word timestamps, which:
- Reduces import time significantly
- Enables downstream packagers (PyInstaller, cx_Freeze) to exclude
  scipy/numba/llvmlite from bundles when word timestamps aren't needed
- Saves ~212 MB in frozen/packaged applications
---
 whisper/mlx_whisper/transcribe.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/whisper/mlx_whisper/transcribe.py b/whisper/mlx_whisper/transcribe.py
index bced16a58..3e9a834cc 100644
--- a/whisper/mlx_whisper/transcribe.py
+++ b/whisper/mlx_whisper/transcribe.py
@@ -19,7 +19,6 @@
 )
 from .decoding import DecodingOptions, DecodingResult
 from .load_models import load_model
-from .timing import add_word_timestamps
 from .tokenizer import LANGUAGES, get_tokenizer
 
 
@@ -412,6 +411,8 @@ def next_words_segment(segments: List[dict]) -> Optional[dict]:
                     seek += segment_size
 
                 if word_timestamps:
+                    from .timing import add_word_timestamps
+
                     add_word_timestamps(
                         segments=current_segments,
                         model=model,