From 2e1c2bbf1680e583f59f35f9a79fe92f9f2584b8 Mon Sep 17 00:00:00 2001 From: Aegis AI Assistant Date: Wed, 6 May 2026 23:09:47 -0700 Subject: [PATCH 1/2] feat: Expose MLXFast.fromFp8 in bindings --- .github/workflows/upstream-sync.yml | 7 ++++--- Source/MLX/MLXFast.swift | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/.github/workflows/upstream-sync.yml b/.github/workflows/upstream-sync.yml index 29d4cd71..8d4ed463 100644 --- a/.github/workflows/upstream-sync.yml +++ b/.github/workflows/upstream-sync.yml @@ -32,13 +32,14 @@ jobs: - name: Create or Update Sync Branch run: | - git checkout -B sync/upstream-latest - git reset --hard upstream/main + git checkout -B sync/upstream-latest origin/main + git checkout upstream/main -- . + git checkout origin/main -- .github/workflows git push -f origin sync/upstream-latest - name: Create Pull Request to Main env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | gh pr create --base main --head sync/upstream-latest \ --title "🔄 Auto-Sync: Apple Upstream Repository" \ diff --git a/Source/MLX/MLXFast.swift b/Source/MLX/MLXFast.swift index 41dccd23..a803aaeb 100644 --- a/Source/MLX/MLXFast.swift +++ b/Source/MLX/MLXFast.swift @@ -420,6 +420,25 @@ public enum MLXFast { mlx_fast_set_prefetch_enabled(enabled) } + /// Convert an array of raw FP8 E4M3 bytes (stored as uint8 by safetensors loader) + /// to the specified floating point dtype using proper FP8 E4M3 semantics. + /// + /// MLX's safetensors loader maps `F8_E4M3` → `uint8` (raw bit patterns). + /// Use this before applying block-wise scale_inv to dequantize FP8 weights correctly. + /// + /// - Parameters: + /// - x: uint8 array containing raw FP8 E4M3 bit patterns + /// - dtype: target floating-point dtype (e.g. `.bfloat16`, `.float16`, `.float32`) + /// - stream: stream or device to evaluate on + /// - Returns: Array in the requested dtype with correctly converted FP8 values + public static func fromFp8( + _ x: MLXArray, dtype: DType = .bfloat16, stream: StreamOrDevice = .default + ) -> MLXArray { + var result = mlx_array_new() + mlx_from_fp8(&result, x.ctx, dtype.cmlxDtype, stream.ctx) + return MLXArray(result) + } + } /// Optimized implementation of `NN.RoPE`. From c1708ae59d89cef53a538df1aac31ddcba9706f9 Mon Sep 17 00:00:00 2001 From: Aegis AI Assistant Date: Wed, 6 May 2026 23:33:10 -0700 Subject: [PATCH 2/2] fix: Address Copilot review points --- .github/workflows/upstream-sync.yml | 3 ++- Source/MLX/MLXFast.swift | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/upstream-sync.yml b/.github/workflows/upstream-sync.yml index 8d4ed463..9fd5408f 100644 --- a/.github/workflows/upstream-sync.yml +++ b/.github/workflows/upstream-sync.yml @@ -35,11 +35,12 @@ jobs: git checkout -B sync/upstream-latest origin/main git checkout upstream/main -- . git checkout origin/main -- .github/workflows + git commit -m "chore: sync with upstream/main" || true git push -f origin sync/upstream-latest - name: Create Pull Request to Main env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GH_TOKEN: ${{ secrets.SWIFTLM_PR_TOKEN || secrets.GITHUB_TOKEN }} run: | gh pr create --base main --head sync/upstream-latest \ --title "🔄 Auto-Sync: Apple Upstream Repository" \ diff --git a/Source/MLX/MLXFast.swift b/Source/MLX/MLXFast.swift index a803aaeb..0eaf07ad 100644 --- a/Source/MLX/MLXFast.swift +++ b/Source/MLX/MLXFast.swift @@ -434,6 +434,8 @@ public enum MLXFast { public static func fromFp8( _ x: MLXArray, dtype: DType = .bfloat16, stream: StreamOrDevice = .default ) -> MLXArray { + precondition(x.dtype == .uint8, "FP8 input must be uint8 bit patterns") + precondition(dtype == .bfloat16 || dtype == .float16 || dtype == .float32, "Target dtype must be a floating point type") var result = mlx_array_new() mlx_from_fp8(&result, x.ctx, dtype.cmlxDtype, stream.ctx) return MLXArray(result)