From 39aa9f50dbca20d729178e210885794fea94c1b6 Mon Sep 17 00:00:00 2001
From: Claude-Assistant <noreply@anthropic.com>
Date: Sun, 15 Mar 2026 13:07:16 +0100
Subject: [PATCH 1/6] fix: restore word-level timestamps for unalignable
 characters (#1372)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Characters not in the alignment model's vocabulary (digits, symbols,
foreign script) were excluded from CTC alignment and got no timestamps.

Fix: extend the emission matrix with a wildcard column (max non-blank
score per frame) so unknown chars participate in alignment via the
existing get_trellis/backtrack — no changes to the CTC internals.
Interpolation fallback fills any remaining gaps as a safety net.

Unlike PR #986, this does not rewrite get_trellis or backtrack, so it
avoids the regression that caused #1220.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 tests/test_word_timestamp_interpolation.py | 135 +++++++++++++++++++++
 whisperx/alignment.py                      |  36 +++++-
 2 files changed, 166 insertions(+), 5 deletions(-)
 create mode 100644 tests/test_word_timestamp_interpolation.py
diff --git a/tests/test_word_timestamp_interpolation.py b/tests/test_word_timestamp_interpolation.py
new file mode 100644
index 000000000..08be577fd
--- /dev/null
+++ b/tests/test_word_timestamp_interpolation.py
@@ -0,0 +1,135 @@
+"""Test that align() produces word-level timestamps for unalignable characters."""
+
+import torch
+from unittest.mock import MagicMock
+
+from whisperx.alignment import align
+
+
+def _make_mock_model(emission, dictionary):
+    """Create a mock torchaudio-style model that returns a fixed emission matrix.
+
+    The emission should be pre-log-softmax logits of shape (num_frames, vocab_size).
+    align() will apply log_softmax itself.
+    """
+    model = MagicMock()
+    # torchaudio interface: model(waveform, lengths=lengths) -> (emissions, _)
+    # emissions shape: (batch=1, num_frames, vocab_size)
+    model.return_value = (emission.unsqueeze(0), None)
+    return model
+
+
+def _make_emission(num_frames, dictionary, transcript_chars, blank_id=0):
+    """Build a synthetic emission matrix where known chars peak at the right frames.
+
+    Distributes characters evenly across frames. Known chars get high logits
+    at their assigned frames. Unknown chars have no specific token but will
+    get wildcard treatment in align().
+    """
+    vocab_size = max(dictionary.values()) + 1
+    # Start with uniform low logits, blank slightly favored
+    emission = torch.full((num_frames, vocab_size), -5.0)
+    emission[:, blank_id] = -1.0
+
+    # Assign each transcript char a span of frames
+    chars_in_dict = [(i, c) for i, c in enumerate(transcript_chars)
+                     if c.lower() in dictionary]
+    if chars_in_dict:
+        frames_per_char = num_frames // (len(transcript_chars) + 1)
+        for seq_idx, (char_idx, char) in enumerate(chars_in_dict):
+            center = (char_idx + 1) * frames_per_char
+            start = max(0, center - frames_per_char // 2)
+            end = min(num_frames, center + frames_per_char // 2)
+            token_id = dictionary[char.lower()]
+            for t in range(start, end):
+                emission[t, token_id] = 2.0  # high logit for this token
+                emission[t, blank_id] = -3.0  # suppress blank
+
+    return emission
+
+
+class TestAlignWithWildcards:
+    """Test align() end-to-end with unknown characters."""
+
+    DICTIONARY = {
+        "<pad>": 0,  # blank
+        "a": 1, "b": 2, "c": 3, "d": 4, "e": 5,
+        "f": 6, "g": 7, "h": 8, "i": 9, "k": 10,
+        "l": 11, "m": 12, "n": 13, "o": 14, "p": 15,
+        "r": 16, "s": 17, "t": 18, "u": 19, "w": 20,
+        "|": 21,
+    }
+    METADATA = {"language": "en", "dictionary": DICTIONARY, "type": "torchaudio"}
+
+    def _run_align(self, text, duration=5.0, num_frames=100):
+        """Run align() with a mock model on a single segment."""
+        torch.manual_seed(0)
+        emission = _make_emission(num_frames, self.DICTIONARY, list(text), blank_id=0)
+        model = _make_mock_model(emission, self.DICTIONARY)
+
+        sample_rate = 16000
+        num_samples = int(duration * sample_rate)
+        audio = torch.randn(num_samples)
+
+        transcript = [{"text": text, "start": 0.0, "end": duration}]
+        result = align(
+            transcript=transcript,
+            model=model,
+            align_model_metadata=self.METADATA,
+            audio=audio,
+            device="cpu",
+        )
+        return result
+
+    def test_known_chars_get_timestamps(self):
+        """Baseline: words with all known chars get timestamps."""
+        result = self._run_align("the cat sat")
+        for word in result["word_segments"]:
+            assert "start" in word, f"'{word['word']}' missing start"
+            assert "end" in word, f"'{word['word']}' missing end"
+            assert "score" in word, f"'{word['word']}' missing score"
+
+    def test_unknown_word_gets_timestamps(self):
+        """A word made of unknown chars (digits) gets timestamps via wildcard."""
+        result = self._run_align("cost 43 dollars")
+        words = {w["word"]: w for w in result["word_segments"]}
+        assert "43" in words, f"'43' not in word_segments: {list(words.keys())}"
+        assert "start" in words["43"], "'43' missing start timestamp"
+        assert "end" in words["43"], "'43' missing end timestamp"
+        assert "score" in words["43"], "'43' missing score"
+
+    def test_mixed_word_gets_timestamps(self):
+        """A word with mixed known/unknown chars gets timestamps."""
+        result = self._run_align("has 43k users")
+        # "43k" has unknown '4','3' and known 'k'
+        words = {w["word"]: w for w in result["word_segments"]}
+        assert "43k" in words, f"'43k' not in word_segments: {list(words.keys())}"
+        assert "start" in words["43k"]
+        assert "end" in words["43k"]
+
+    def test_unknown_word_does_not_corrupt_neighbors(self):
+        """Known words adjacent to unknown words still get valid timestamps."""
+        result = self._run_align("cost 43 dollars")
+        words = {w["word"]: w for w in result["word_segments"]}
+        for known in ("cost", "dollars"):
+            assert known in words
+            assert "start" in words[known], f"'{known}' missing start"
+            assert "end" in words[known], f"'{known}' missing end"
+            assert "score" in words[known], f"'{known}' missing score"
+
+    def test_all_unknown_segment_gets_timestamps(self):
+        """A segment with only unknown chars gets wildcard-aligned timestamps."""
+        result = self._run_align("123 456")
+        assert len(result["word_segments"]) > 0, "expected word_segments for all-unknown text"
+        for word in result["word_segments"]:
+            assert "start" in word, f"'{word['word']}' missing start"
+            assert "end" in word, f"'{word['word']}' missing end"
+
+    def test_timestamps_are_ordered(self):
+        """Word timestamps are monotonically non-decreasing."""
+        result = self._run_align("the 99 cats")
+        starts = [w["start"] for w in result["word_segments"] if "start" in w]
+        for i in range(1, len(starts)):
+            assert starts[i] >= starts[i - 1], (
+                f"Timestamps not ordered: {starts}"
+            )
diff --git a/whisperx/alignment.py b/whisperx/alignment.py
index 0786d0eb9..35b1f8aac 100644
--- a/whisperx/alignment.py
+++ b/whisperx/alignment.py
@@ -178,11 +178,12 @@ def align(
             elif char_ in model_dictionary.keys():
                 clean_char.append(char_)
                 clean_cdx.append(cdx)
+            elif char_ not in (" ", "|"):
+                # unknown char (digit, symbol, foreign script) — use wildcard
+                clean_char.append(char_)
+                clean_cdx.append(cdx)
 
-        clean_wdx = []
-        for wdx, wrd in enumerate(per_word):
-            if any([c in model_dictionary.keys() for c in wrd.lower()]):
-                clean_wdx.append(wdx)
+        clean_wdx = list(range(len(per_word)))
 
         # Use language-specific Punkt model if available otherwise we fallback to English.
         punkt_lang = PUNKT_LANGUAGES.get(model_lang, 'english')
@@ -236,7 +237,6 @@ def align(
             continue
 
         text_clean = "".join(segment_data[sdx]["clean_char"])
-        tokens = [model_dictionary[c] for c in text_clean]
 
         f1 = int(t1 * SAMPLE_RATE)
         f2 = int(t2 * SAMPLE_RATE)
@@ -268,6 +268,19 @@ def align(
             if char == '[pad]' or char == '<pad>':
                 blank_id = code
 
+        # Build tokens, mapping unknown chars to a wildcard column
+        has_wildcard = any(c not in model_dictionary for c in text_clean)
+        if has_wildcard:
+            # Extend emission with a wildcard column: max non-blank score per frame
+            non_blank_mask = torch.ones(emission.size(1), dtype=torch.bool)
+            non_blank_mask[blank_id] = False
+            wildcard_col = emission[:, non_blank_mask].max(dim=1).values
+            emission = torch.cat([emission, wildcard_col.unsqueeze(1)], dim=1)
+            wildcard_id = emission.size(1) - 1
+            tokens = [model_dictionary.get(c, wildcard_id) for c in text_clean]
+        else:
+            tokens = [model_dictionary[c] for c in text_clean]
+
         trellis = get_trellis(emission, tokens, blank_id)
         path = backtrack(trellis, emission, tokens, blank_id)
 
@@ -348,6 +361,19 @@ def align(
 
                 sentence_words.append(word_segment)
 
+            # Interpolate timestamps for words with no alignable characters
+            if sentence_words:
+                _starts = pd.Series([w.get("start", np.nan) for w in sentence_words])
+                _ends = pd.Series([w.get("end", np.nan) for w in sentence_words])
+                if _starts.isna().any() and _starts.notna().any():
+                    _starts = interpolate_nans(_starts, method=interpolate_method)
+                    _ends = interpolate_nans(_ends, method=interpolate_method)
+                    for i, w in enumerate(sentence_words):
+                        if "start" not in w and pd.notna(_starts.iloc[i]):
+                            w["start"] = _starts.iloc[i]
+                        if "end" not in w and pd.notna(_ends.iloc[i]):
+                            w["end"] = _ends.iloc[i]
+
             subsegment = {
                 "text": sentence_text,
                 "start": sentence_start,

From da072d6bcb593720566ff6c773f8caf86f1591e7 Mon Sep 17 00:00:00 2001
From: Claude-Assistant <noreply@anthropic.com>
Date: Sun, 15 Mar 2026 13:33:11 +0100
Subject: [PATCH 2/6] test: add regression test for #1372 (digits+comma get no
 timestamps)

Reproduces the exact reported bug: '4,9' embedded in text gets no
start/end/score because digits and commas are not in the alignment
model dictionary.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 tests/test_word_timestamp_interpolation.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tests/test_word_timestamp_interpolation.py b/tests/test_word_timestamp_interpolation.py
index 08be577fd..ed2602eff 100644
--- a/tests/test_word_timestamp_interpolation.py
+++ b/tests/test_word_timestamp_interpolation.py
@@ -133,3 +133,17 @@ def test_timestamps_are_ordered(self):
             assert starts[i] >= starts[i - 1], (
                 f"Timestamps not ordered: {starts}"
             )
+
+    def test_issue_1372_digits_comma_no_timestamps(self):
+        """Regression: '4,9' (digits+comma) must get timestamps.
+
+        https://github.com/m-bain/whisperX/issues/1372#issuecomment-4051234966
+        Reporter showed that align() returned {'word': '4,9'} with no
+        start/end/score for German text containing '4,9'.
+        """
+        result = self._run_align("halt mit 4,9 nicht ins parlament", num_frames=200)
+        words = {w["word"]: w for w in result["word_segments"]}
+        assert "4,9" in words, f"'4,9' not in word_segments: {list(words.keys())}"
+        assert "start" in words["4,9"], "'4,9' missing start"
+        assert "end" in words["4,9"], "'4,9' missing end"
+        assert "score" in words["4,9"], "'4,9' missing score"

From f9a3f8fadae2832ae5b39aee7f469926acccbe58 Mon Sep 17 00:00:00 2001
From: Claude-Assistant <noreply@anthropic.com>
Date: Sun, 15 Mar 2026 13:41:50 +0100
Subject: [PATCH 3/6] ci: add pytest dev dependency and test workflow

Add pytest to optional dev dependencies and create a dedicated
tests.yml workflow to run pytest across Python 3.10-3.13.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .github/workflows/tests.yml |  30 ++++++++++
 pyproject.toml              |   3 +
 uv.lock                     | 115 ++++++++++++++++++++++++++----------
 3 files changed, 118 insertions(+), 30 deletions(-)
 create mode 100644 .github/workflows/tests.yml

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
new file mode 100644
index 000000000..dbfd87509
--- /dev/null
+++ b/.github/workflows/tests.yml
@@ -0,0 +1,30 @@
+name: Tests
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+  workflow_dispatch:
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.10", "3.11", "3.12", "3.13"]
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          version: "0.5.14"
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install the project
+        run: uv sync --all-extras
+
+      - name: Run tests
+        run: uv run pytest tests/ -v
diff --git a/pyproject.toml b/pyproject.toml
index 94626d605..b5d69f625 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,6 +24,9 @@ dependencies = [
 ]
 
 
+[project.optional-dependencies]
+dev = ["pytest"]
+
 [project.scripts]
 whisperx = "whisperx.__main__:cli"
 
diff --git a/uv.lock b/uv.lock
index 22d61222e..2aace58a6 100644
--- a/uv.lock
+++ b/uv.lock
@@ -526,6 +526,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/87/62/9773de14fe6c45c23649e98b83231fffd7b9892b6cf863251dc2afa73643/einops-0.8.1-py3-none-any.whl", hash = "sha256:919387eb55330f5757c6bea9165c5ff5cfe63a642682ea788a6d472576d81737", size = 64359, upload-time = "2025-02-09T03:17:01.998Z" },
 ]
 
+[[package]]
+name = "exceptiongroup"
+version = "1.3.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions", marker = "python_full_version < '3.11'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" },
+]
+
 [[package]]
 name = "faster-whisper"
 version = "1.2.0"
@@ -888,6 +900,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fa/5e/f8e9a1d23b9c20a551a8a02ea3637b4642e22c2626e3a13a9a29cdea99eb/importlib_metadata-8.7.1-py3-none-any.whl", hash = "sha256:5a1f80bf1daa489495071efbb095d75a634cf28a8bc299581244063b53176151", size = 27865, upload-time = "2025-12-21T10:00:18.329Z" },
 ]
 
+[[package]]
+name = "iniconfig"
+version = "2.3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
+]
+
 [[package]]
 name = "jinja2"
 version = "3.1.6"
@@ -1864,6 +1885,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/34/e7/ae39f538fd6844e982063c3a5e4598b8ced43b9633baa3a85ef33af8c05c/pillow-11.3.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:c84d689db21a1c397d001aa08241044aa2069e7587b398c8cc63020390b1c1b8", size = 6984598, upload-time = "2025-07-01T09:16:27.732Z" },
 ]
 
+[[package]]
+name = "pluggy"
+version = "1.6.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
+]
+
 [[package]]
 name = "primepy"
 version = "1.3"
@@ -2109,6 +2139,24 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5a/dc/491b7661614ab97483abf2056be1deee4dc2490ecbf7bff9ab5cdbac86e1/pyreadline3-3.5.4-py3-none-any.whl", hash = "sha256:eaf8e6cc3c49bcccf145fc6067ba8643d1df34d604a1ec0eccbf7a18e6d3fae6", size = 83178, upload-time = "2024-09-19T02:40:08.598Z" },
 ]
 
+[[package]]
+name = "pytest"
+version = "9.0.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "exceptiongroup", marker = "python_full_version < '3.11'" },
+    { name = "iniconfig" },
+    { name = "packaging" },
+    { name = "pluggy" },
+    { name = "pygments" },
+    { name = "tomli", marker = "python_full_version < '3.11'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" },
+]
+
 [[package]]
 name = "python-dateutil"
 version = "2.9.0.post0"
@@ -2849,16 +2897,16 @@ dependencies = [
     { name = "torch", version = "2.8.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux'" },
 ]
 wheels = [
-    { url = "https://download.pytorch.org/whl/cpu/torchaudio-2.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c2f44cf279f673cfcdd8f576c349eee8bedf8caab351a5dd78b32970cc34a212" },
-    { url = "https://download.pytorch.org/whl/cpu/torchaudio-2.8.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:d3c1b85b26a09832d139f6d6da6b66caeb51d2e16e08f8587665c44a9e1aa8f9" },
-    { url = "https://download.pytorch.org/whl/cpu/torchaudio-2.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c9276857d241c6de257af765c0f51fc011af38cb725401495121b280913007cf" },
-    { url = "https://download.pytorch.org/whl/cpu/torchaudio-2.8.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:4573c6042950c20278e3608a9a38050ba0bc72e0049e1bbfd249caf859a8029b" },
-    { url = "https://download.pytorch.org/whl/cpu/torchaudio-2.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ddef94bf181e6447cbb05f38beaca8f6c5bb8d2b9ddced1aa3452025b9fc70d3" },
-    { url = "https://download.pytorch.org/whl/cpu/torchaudio-2.8.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:862e2e40bf09d865e5df080a84c1a39bbcef40e43140f4b1737eb3a389d3b38f" },
-    { url = "https://download.pytorch.org/whl/cpu/torchaudio-2.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f851d32e94ca05e470f0c60e25726ec1e0eb71cb2ca5a0206b7fd03272ccc3c8" },
-    { url = "https://download.pytorch.org/whl/cpu/torchaudio-2.8.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:09535a9b727c0793cd07c1ace99f3f353626281bcc3e30c2f2314e3ebc9d3f96" },
-    { url = "https://download.pytorch.org/whl/cpu/torchaudio-2.8.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:68df9c9068984edff8065c2b6656725e6114fe89281b0cf122c7505305fc98a4" },
-    { url = "https://download.pytorch.org/whl/cpu/torchaudio-2.8.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:1951f10ed092f2dda57634f6a3950ef21c9d9352551aa84a9fccd51bbda18095" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchaudio-2.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c2f44cf279f673cfcdd8f576c349eee8bedf8caab351a5dd78b32970cc34a212" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchaudio-2.8.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:d3c1b85b26a09832d139f6d6da6b66caeb51d2e16e08f8587665c44a9e1aa8f9" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchaudio-2.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c9276857d241c6de257af765c0f51fc011af38cb725401495121b280913007cf" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchaudio-2.8.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:4573c6042950c20278e3608a9a38050ba0bc72e0049e1bbfd249caf859a8029b" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchaudio-2.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ddef94bf181e6447cbb05f38beaca8f6c5bb8d2b9ddced1aa3452025b9fc70d3" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchaudio-2.8.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:862e2e40bf09d865e5df080a84c1a39bbcef40e43140f4b1737eb3a389d3b38f" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchaudio-2.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f851d32e94ca05e470f0c60e25726ec1e0eb71cb2ca5a0206b7fd03272ccc3c8" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchaudio-2.8.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:09535a9b727c0793cd07c1ace99f3f353626281bcc3e30c2f2314e3ebc9d3f96" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchaudio-2.8.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:68df9c9068984edff8065c2b6656725e6114fe89281b0cf122c7505305fc98a4" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchaudio-2.8.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:1951f10ed092f2dda57634f6a3950ef21c9d9352551aa84a9fccd51bbda18095" },
 ]
 
 [[package]]
@@ -2875,16 +2923,16 @@ dependencies = [
     { name = "torch", version = "2.8.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux') or (platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux') or (platform_machine != 'x86_64' and sys_platform != 'darwin' and sys_platform != 'linux')" },
 ]
 wheels = [
-    { url = "https://download.pytorch.org/whl/cpu/torchaudio-2.8.0%2Bcpu-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:c955835e470ebbde03d7d54ca5d8ba5722138bbfd66cfb86845234b3a5b9f9fa" },
-    { url = "https://download.pytorch.org/whl/cpu/torchaudio-2.8.0%2Bcpu-cp310-cp310-win_amd64.whl", hash = "sha256:e9e68f16f1afe108f0cb1c7d636d0242fdc43cbbcaab222a72a373b9d2799134" },
-    { url = "https://download.pytorch.org/whl/cpu/torchaudio-2.8.0%2Bcpu-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:e54bd7fc9472019308097d99102df9acee22aa2451ae808d27840bc874320292" },
-    { url = "https://download.pytorch.org/whl/cpu/torchaudio-2.8.0%2Bcpu-cp311-cp311-win_amd64.whl", hash = "sha256:db37df7eee906f8fe0a639fdc673f3541cb2e173169b16d4133447eb922d1938" },
-    { url = "https://download.pytorch.org/whl/cpu/torchaudio-2.8.0%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:9377faee65a290578280ac7f4884c3586253dac2ca28c60f458ff6efe86a6b05" },
-    { url = "https://download.pytorch.org/whl/cpu/torchaudio-2.8.0%2Bcpu-cp312-cp312-win_amd64.whl", hash = "sha256:9b302192b570657c1cc787a4d487ae4bbb7f2aab1c01b1fcc46757e7f86f391e" },
-    { url = "https://download.pytorch.org/whl/cpu/torchaudio-2.8.0%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:e1b1f530e8b71b1d079e23db45a0e621709061710ef8540aae8280aa039554ee" },
-    { url = "https://download.pytorch.org/whl/cpu/torchaudio-2.8.0%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:0c2d081e24204768e636cbf05e1377c8a6964b8ed6fa3aa5092ba9af9bbc19c5" },
-    { url = "https://download.pytorch.org/whl/cpu/torchaudio-2.8.0%2Bcpu-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:89c2d04fe1cb7c31eb042f7b36e1ce8e2afacf769ecd5f216527e184e4857099" },
-    { url = "https://download.pytorch.org/whl/cpu/torchaudio-2.8.0%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:ab4653da31dc37f0a643f41f4da8bee647a8686bacf12d3929cac8aead186811" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchaudio-2.8.0%2Bcpu-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:c955835e470ebbde03d7d54ca5d8ba5722138bbfd66cfb86845234b3a5b9f9fa" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchaudio-2.8.0%2Bcpu-cp310-cp310-win_amd64.whl", hash = "sha256:e9e68f16f1afe108f0cb1c7d636d0242fdc43cbbcaab222a72a373b9d2799134" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchaudio-2.8.0%2Bcpu-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:e54bd7fc9472019308097d99102df9acee22aa2451ae808d27840bc874320292" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchaudio-2.8.0%2Bcpu-cp311-cp311-win_amd64.whl", hash = "sha256:db37df7eee906f8fe0a639fdc673f3541cb2e173169b16d4133447eb922d1938" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchaudio-2.8.0%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:9377faee65a290578280ac7f4884c3586253dac2ca28c60f458ff6efe86a6b05" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchaudio-2.8.0%2Bcpu-cp312-cp312-win_amd64.whl", hash = "sha256:9b302192b570657c1cc787a4d487ae4bbb7f2aab1c01b1fcc46757e7f86f391e" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchaudio-2.8.0%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:e1b1f530e8b71b1d079e23db45a0e621709061710ef8540aae8280aa039554ee" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchaudio-2.8.0%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:0c2d081e24204768e636cbf05e1377c8a6964b8ed6fa3aa5092ba9af9bbc19c5" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchaudio-2.8.0%2Bcpu-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:89c2d04fe1cb7c31eb042f7b36e1ce8e2afacf769ecd5f216527e184e4857099" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchaudio-2.8.0%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:ab4653da31dc37f0a643f41f4da8bee647a8686bacf12d3929cac8aead186811" },
 ]
 
 [[package]]
@@ -2901,16 +2949,16 @@ dependencies = [
     { name = "torch", version = "2.8.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine == 'x86_64' and sys_platform != 'darwin'" },
 ]
 wheels = [
-    { url = "https://download.pytorch.org/whl/cu128/torchaudio-2.8.0%2Bcu128-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a0161e95285a0b716de210fee0392151d601e7da3cc86595008d826abff48a8c" },
-    { url = "https://download.pytorch.org/whl/cu128/torchaudio-2.8.0%2Bcu128-cp310-cp310-win_amd64.whl", hash = "sha256:5d7a9d913e2744573ed3b7ec2f781ed39833c81c9c41859973ec10ac174c2366" },
-    { url = "https://download.pytorch.org/whl/cu128/torchaudio-2.8.0%2Bcu128-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:f4409df567d0723a7a3a89d32c7552a17e0ff6f137ea26a0d268c665259b2995" },
-    { url = "https://download.pytorch.org/whl/cu128/torchaudio-2.8.0%2Bcu128-cp311-cp311-win_amd64.whl", hash = "sha256:7a1eb6154e05b8056b34c7a41495e09d57f79eb0180eb4e7f3bb2a61845ca8ea" },
-    { url = "https://download.pytorch.org/whl/cu128/torchaudio-2.8.0%2Bcu128-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:145b8a0c21cfcaa1705c67173c5d439087e0e120d5da9bc344746f937901d243" },
-    { url = "https://download.pytorch.org/whl/cu128/torchaudio-2.8.0%2Bcu128-cp312-cp312-win_amd64.whl", hash = "sha256:cce3a60cd9a97f7360c8f95504ac349311fb7d6b9b826135936764f4de5f782d" },
-    { url = "https://download.pytorch.org/whl/cu128/torchaudio-2.8.0%2Bcu128-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:410bb8ea46225efe658e5d27a3802c181a2255913003621a5d25a51aca8018d9" },
-    { url = "https://download.pytorch.org/whl/cu128/torchaudio-2.8.0%2Bcu128-cp313-cp313-win_amd64.whl", hash = "sha256:3146bbd48992d215f6bb1aef9626d734c3180b377791ded2a4d4d2c0e63c0cc2" },
-    { url = "https://download.pytorch.org/whl/cu128/torchaudio-2.8.0%2Bcu128-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:04b410f93337fc6c16576d0c88e2a31091aef9d1fd212ebb8cd26899dba175e0" },
-    { url = "https://download.pytorch.org/whl/cu128/torchaudio-2.8.0%2Bcu128-cp313-cp313t-win_amd64.whl", hash = "sha256:1054e0a7613cac54ed9b3784a5fcbe023748a70004d9cca74c5f9ae00a1fdfd1" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchaudio-2.8.0%2Bcu128-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a0161e95285a0b716de210fee0392151d601e7da3cc86595008d826abff48a8c" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchaudio-2.8.0%2Bcu128-cp310-cp310-win_amd64.whl", hash = "sha256:5d7a9d913e2744573ed3b7ec2f781ed39833c81c9c41859973ec10ac174c2366" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchaudio-2.8.0%2Bcu128-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:f4409df567d0723a7a3a89d32c7552a17e0ff6f137ea26a0d268c665259b2995" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchaudio-2.8.0%2Bcu128-cp311-cp311-win_amd64.whl", hash = "sha256:7a1eb6154e05b8056b34c7a41495e09d57f79eb0180eb4e7f3bb2a61845ca8ea" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchaudio-2.8.0%2Bcu128-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:145b8a0c21cfcaa1705c67173c5d439087e0e120d5da9bc344746f937901d243" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchaudio-2.8.0%2Bcu128-cp312-cp312-win_amd64.whl", hash = "sha256:cce3a60cd9a97f7360c8f95504ac349311fb7d6b9b826135936764f4de5f782d" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchaudio-2.8.0%2Bcu128-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:410bb8ea46225efe658e5d27a3802c181a2255913003621a5d25a51aca8018d9" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchaudio-2.8.0%2Bcu128-cp313-cp313-win_amd64.whl", hash = "sha256:3146bbd48992d215f6bb1aef9626d734c3180b377791ded2a4d4d2c0e63c0cc2" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchaudio-2.8.0%2Bcu128-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:04b410f93337fc6c16576d0c88e2a31091aef9d1fd212ebb8cd26899dba175e0" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchaudio-2.8.0%2Bcu128-cp313-cp313t-win_amd64.whl", hash = "sha256:1054e0a7613cac54ed9b3784a5fcbe023748a70004d9cca74c5f9ae00a1fdfd1" },
 ]
 
 [[package]]
@@ -3047,6 +3095,11 @@ dependencies = [
     { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
 ]
 
+[package.optional-dependencies]
+dev = [
+    { name = "pytest" },
+]
+
 [package.metadata]
 requires-dist = [
     { name = "ctranslate2", specifier = ">=4.5.0" },
@@ -3057,6 +3110,7 @@ requires-dist = [
     { name = "omegaconf", specifier = ">=2.3.0" },
     { name = "pandas", specifier = ">=2.2.3" },
     { name = "pyannote-audio", specifier = ">=4.0.0" },
+    { name = "pytest", marker = "extra == 'dev'" },
     { name = "torch", marker = "sys_platform == 'darwin'", specifier = "~=2.8.0", index = "https://download.pytorch.org/whl/cpu" },
     { name = "torch", marker = "platform_machine != 'x86_64' and sys_platform != 'darwin'", specifier = "~=2.8.0", index = "https://download.pytorch.org/whl/cpu" },
     { name = "torch", marker = "platform_machine == 'x86_64' and sys_platform != 'darwin'", specifier = "~=2.8.0", index = "https://download.pytorch.org/whl/cu128" },
@@ -3066,6 +3120,7 @@ requires-dist = [
     { name = "transformers", specifier = ">=4.48.0" },
     { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'", specifier = ">=3.3.0", index = "https://download.pytorch.org/whl/cu128" },
 ]
+provides-extras = ["dev"]
 
 [[package]]
 name = "yarl"

From 94f60aab58564fdb5bfa7f34c77643daf6d22e00 Mon Sep 17 00:00:00 2001
From: Barabazs <31799121+Barabazs@users.noreply.github.com>
Date: Wed, 25 Mar 2026 08:36:27 +0100
Subject: [PATCH 4/6] chore: bump version to 3.8.3

---
 pyproject.toml | 2 +-
 uv.lock        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index b5d69f625..0a9f9f9a6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 urls = { repository = "https://github.com/m-bain/whisperx" }
 authors = [{ name = "Max Bain" }]
 name = "whisperx"
-version = "3.8.2"
+version = "3.8.3"
 description = "Time-Accurate Automatic Speech Recognition using Whisper."
 readme = "README.md"
 requires-python = ">=3.10, <3.14"
diff --git a/uv.lock b/uv.lock
index 2aace58a6..c7f0e3453 100644
--- a/uv.lock
+++ b/uv.lock
@@ -3074,7 +3074,7 @@ wheels = [
 
 [[package]]
 name = "whisperx"
-version = "3.8.2"
+version = "3.8.3"
 source = { editable = "." }
 dependencies = [
     { name = "ctranslate2" },

From 8efddaa2506c546fc66856405b5876033c1ca72a Mon Sep 17 00:00:00 2001
From: Claude-Assistant <noreply@anthropic.com>
Date: Wed, 25 Mar 2026 08:54:26 +0100
Subject: [PATCH 5/6] fix: require faster-whisper>=1.2.0 for use_auth_token
 support (#1385)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 pyproject.toml | 2 +-
 uv.lock        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 0a9f9f9a6..1b87bd4fc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,7 +10,7 @@ license = { text = "BSD-2-Clause" }
 
 dependencies = [
     "ctranslate2>=4.5.0",
-    "faster-whisper>=1.1.1",
+    "faster-whisper>=1.2.0",
     "nltk>=3.9.1",
     "numpy>=2.1.0",
     "omegaconf>=2.3.0",
diff --git a/uv.lock b/uv.lock
index c7f0e3453..ade706d0f 100644
--- a/uv.lock
+++ b/uv.lock
@@ -3103,7 +3103,7 @@ dev = [
 [package.metadata]
 requires-dist = [
     { name = "ctranslate2", specifier = ">=4.5.0" },
-    { name = "faster-whisper", specifier = ">=1.1.1" },
+    { name = "faster-whisper", specifier = ">=1.2.0" },
     { name = "huggingface-hub", specifier = "<1.0.0" },
     { name = "nltk", specifier = ">=3.9.1" },
     { name = "numpy", specifier = ">=2.1.0" },

From 095b36b5573ae2b6b7d0711d36c0e0c09df088d1 Mon Sep 17 00:00:00 2001
From: Barabazs <31799121+Barabazs@users.noreply.github.com>
Date: Wed, 25 Mar 2026 09:10:59 +0100
Subject: [PATCH 6/6] chore: bump version to 3.8.4

---
 pyproject.toml | 2 +-
 uv.lock        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 1b87bd4fc..df0592eaf 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 urls = { repository = "https://github.com/m-bain/whisperx" }
 authors = [{ name = "Max Bain" }]
 name = "whisperx"
-version = "3.8.3"
+version = "3.8.4"
 description = "Time-Accurate Automatic Speech Recognition using Whisper."
 readme = "README.md"
 requires-python = ">=3.10, <3.14"
diff --git a/uv.lock b/uv.lock
index ade706d0f..7b26699df 100644
--- a/uv.lock
+++ b/uv.lock
@@ -3074,7 +3074,7 @@ wheels = [
 
 [[package]]
 name = "whisperx"
-version = "3.8.3"
+version = "3.8.4"
 source = { editable = "." }
 dependencies = [
     { name = "ctranslate2" },