diff --git a/skainet-io/skainet-io-iree-params/src/androidMain/kotlin/sk/ainet/io/irpa/FileBackedTransfer.android.kt b/skainet-io/skainet-io-iree-params/src/androidMain/kotlin/sk/ainet/io/irpa/FileBackedTransfer.android.kt new file mode 100644 index 00000000..eda154d5 --- /dev/null +++ b/skainet-io/skainet-io-iree-params/src/androidMain/kotlin/sk/ainet/io/irpa/FileBackedTransfer.android.kt @@ -0,0 +1,46 @@ +package sk.ainet.io.irpa + +import kotlinx.io.Sink +import kotlinx.io.write +import sk.ainet.lang.tensor.storage.BufferHandle +import java.io.RandomAccessFile +import java.nio.channels.FileChannel + +/** + * Android actual for [writeFileBackedBytes]. Android's Dalvik/ART + * runtime supports the same `RandomAccessFile` + `FileChannel.map` + * surface as the desktop JVM, so the mmap implementation is byte-for- + * byte identical to the jvmMain version. Kept as a separate file + * (rather than sharing via a `jvmAndroidMain` intermediate source + * set) because this module does not yet configure a hierarchical + * source-set template. + */ +internal actual fun writeFileBackedBytes(sink: Sink, handle: BufferHandle.FileBacked) { + require(handle.sizeInBytes <= Int.MAX_VALUE.toLong()) { + "FileBacked region of ${handle.sizeInBytes} bytes exceeds Int.MAX_VALUE; " + + "multi-window mmap is not yet implemented (see issue #523 PR E follow-up). " + + "path=${handle.path} offset=${handle.fileOffset}" + } + require(handle.sizeInBytes >= 0) { + "FileBacked size must be non-negative, got ${handle.sizeInBytes}" + } + if (handle.sizeInBytes == 0L) return + + RandomAccessFile(handle.path, "r").use { raf -> + raf.channel.use { channel -> + val mapped = channel.map( + FileChannel.MapMode.READ_ONLY, + handle.fileOffset, + handle.sizeInBytes + ) + val chunk = ByteArray(64 * 1024) + var remaining = handle.sizeInBytes.toInt() + while (remaining > 0) { + val step = if (remaining >= chunk.size) chunk.size else remaining + mapped.get(chunk, 0, step) + sink.write(chunk, startIndex = 0, endIndex = step) + remaining -= step + } + } + } +} diff --git a/skainet-io/skainet-io-iree-params/src/commonMain/kotlin/sk/ainet/io/irpa/FileBackedTransfer.kt b/skainet-io/skainet-io-iree-params/src/commonMain/kotlin/sk/ainet/io/irpa/FileBackedTransfer.kt new file mode 100644 index 00000000..314b177f --- /dev/null +++ b/skainet-io/skainet-io-iree-params/src/commonMain/kotlin/sk/ainet/io/irpa/FileBackedTransfer.kt @@ -0,0 +1,32 @@ +package sk.ainet.io.irpa + +import kotlinx.io.Sink +import sk.ainet.lang.tensor.storage.BufferHandle + +/** + * Stream the bytes behind a [BufferHandle.FileBacked] handle directly + * into [sink]. + * + * The whole point of the FileBacked variant is that the tensor lives + * as a byte range in a source file — the GGUF or safetensors blob on + * disk. Under [IrpaWriter], that range blits verbatim into the + * `.irpa` archive's storage segment with no intermediate heap copy, + * no parse, and no re-quantization. PR E of issue #523 closes the + * loop on this path for real models where inline weights are + * unworkable (Whisper-tiny ≈ 151 MB text MLIR under the inline + * policy). + * + * JVM actual uses `FileChannel.map` for a true mmap window. Platforms + * without mmap support throw with a pointer to the tracking issue — + * rather than silently falling back to a slower read path that would + * undermine the "zero-copy ingestion" contract callers rely on. + * + * Implementations MUST: + * - Respect [BufferHandle.FileBacked.fileOffset] as the starting + * byte in the file, not in any mapped window. + * - Write exactly [BufferHandle.FileBacked.sizeInBytes] bytes. + * - Not flush or close [sink]; the caller manages lifecycle. + * - Close any OS resources they open (file descriptors, mapped + * regions) before returning, regardless of exceptions. + */ +internal expect fun writeFileBackedBytes(sink: Sink, handle: BufferHandle.FileBacked) diff --git a/skainet-io/skainet-io-iree-params/src/commonMain/kotlin/sk/ainet/io/irpa/IrpaWriter.kt b/skainet-io/skainet-io-iree-params/src/commonMain/kotlin/sk/ainet/io/irpa/IrpaWriter.kt index 2be7eb7c..796c9518 100644 --- a/skainet-io/skainet-io-iree-params/src/commonMain/kotlin/sk/ainet/io/irpa/IrpaWriter.kt +++ b/skainet-io/skainet-io-iree-params/src/commonMain/kotlin/sk/ainet/io/irpa/IrpaWriter.kt @@ -222,21 +222,26 @@ public class IrpaWriter { } private fun writeBufferHandle(sink: Sink, handle: BufferHandle) { - val (data, offset, length) = when (handle) { - is BufferHandle.Owned -> Triple(handle.data, handle.offset, handle.sizeInBytes.toInt()) - is BufferHandle.Borrowed -> Triple(handle.data, handle.offset, handle.sizeInBytes.toInt()) + when (handle) { + is BufferHandle.Owned -> writeByteArray(sink, handle.data, handle.offset, handle.sizeInBytes.toInt()) + is BufferHandle.Borrowed -> writeByteArray(sink, handle.data, handle.offset, handle.sizeInBytes.toInt()) + is BufferHandle.FileBacked -> writeFileBackedBytes(sink, handle) else -> throw IllegalArgumentException( "IrpaWriter does not yet handle BufferHandle subclass ${handle::class.simpleName}. " + - "Only Owned/Borrowed byte-array handles are wired in PR C; mmap-backed " + - "handles land with PR E (issue #523)." + "Owned / Borrowed / FileBacked are wired. Aliased, DeviceResident, and " + + "other variants are out of scope — resolve them to one of the wired " + + "variants before handing to the writer." ) } - // Byte-at-a-time so we do not rely on kotlinx.io's - // `Sink.write(ByteArray, Int, Int)` extension resolving on the - // raw receiver — extension overload ambiguity bit this on JVM - // in an earlier revision. Performance-critical callers should - // switch to `write(ByteArray, ...)` once that path is covered - // by a dedicated test. + } + + private fun writeByteArray(sink: Sink, data: ByteArray, offset: Int, length: Int) { + // Byte-at-a-time for the same reason noted below — and because + // under the sizes we see in practice for single-op values + // (tens to a few thousand bytes) the overhead is lost in the + // wider write cost. FileBacked paths use a chunked copy on + // their platform-specific side, which is where the byte + // volume is meaningful. for (i in offset until offset + length) { sink.writeByte(data[i]) } diff --git a/skainet-io/skainet-io-iree-params/src/jvmMain/kotlin/sk/ainet/io/irpa/FileBackedTransfer.jvm.kt b/skainet-io/skainet-io-iree-params/src/jvmMain/kotlin/sk/ainet/io/irpa/FileBackedTransfer.jvm.kt new file mode 100644 index 00000000..1b1b8947 --- /dev/null +++ b/skainet-io/skainet-io-iree-params/src/jvmMain/kotlin/sk/ainet/io/irpa/FileBackedTransfer.jvm.kt @@ -0,0 +1,57 @@ +package sk.ainet.io.irpa + +import kotlinx.io.Sink +import kotlinx.io.write +import sk.ainet.lang.tensor.storage.BufferHandle +import java.io.RandomAccessFile +import java.nio.channels.FileChannel + +/** + * JVM actual for [writeFileBackedBytes]: opens the source file + * read-only, memory-maps the declared byte range, and copies it in + * chunks into [sink]. + * + * Uses direct mmap rather than a buffered stream for the usual + * reason — avoids an extra heap copy and keeps the kernel in charge + * of page-in / eviction. The chunk size (64 KiB) is a throughput + * compromise: small enough that `sink.write` does not see a + * multi-megabyte transient byte array, large enough that system-call + * overhead does not dominate. + * + * FileChannel.map cannot return a region larger than `Int.MAX_VALUE` + * (≈ 2 GiB) in a single call, so oversized handles are rejected with + * a diagnostic rather than silently truncated. Splitting into + * multiple windows is doable but out of scope for PR E; filed as + * a follow-up once a real model hits the limit. + */ +internal actual fun writeFileBackedBytes(sink: Sink, handle: BufferHandle.FileBacked) { + require(handle.sizeInBytes <= Int.MAX_VALUE.toLong()) { + "FileBacked region of ${handle.sizeInBytes} bytes exceeds Int.MAX_VALUE; " + + "multi-window mmap is not yet implemented (see issue #523 PR E follow-up). " + + "path=${handle.path} offset=${handle.fileOffset}" + } + require(handle.sizeInBytes >= 0) { + "FileBacked size must be non-negative, got ${handle.sizeInBytes}" + } + if (handle.sizeInBytes == 0L) return + + RandomAccessFile(handle.path, "r").use { raf -> + raf.channel.use { channel -> + val mapped = channel.map( + FileChannel.MapMode.READ_ONLY, + handle.fileOffset, + handle.sizeInBytes + ) + val chunk = ByteArray(CHUNK_SIZE) + var remaining = handle.sizeInBytes.toInt() + while (remaining > 0) { + val step = if (remaining >= CHUNK_SIZE) CHUNK_SIZE else remaining + mapped.get(chunk, 0, step) + sink.write(chunk, startIndex = 0, endIndex = step) + remaining -= step + } + } + } +} + +private const val CHUNK_SIZE: Int = 64 * 1024 diff --git a/skainet-io/skainet-io-iree-params/src/jvmTest/kotlin/sk/ainet/io/irpa/FileBackedIrpaRoundTripTest.kt b/skainet-io/skainet-io-iree-params/src/jvmTest/kotlin/sk/ainet/io/irpa/FileBackedIrpaRoundTripTest.kt new file mode 100644 index 00000000..826de91c --- /dev/null +++ b/skainet-io/skainet-io-iree-params/src/jvmTest/kotlin/sk/ainet/io/irpa/FileBackedIrpaRoundTripTest.kt @@ -0,0 +1,127 @@ +package sk.ainet.io.irpa + +import kotlinx.io.Buffer +import kotlinx.io.readByteArray +import org.junit.Rule +import org.junit.Test +import org.junit.rules.TemporaryFolder +import sk.ainet.compile.hlo.ExternalParameterRef +import sk.ainet.lang.tensor.storage.BufferHandle +import sk.ainet.lang.tensor.storage.TensorEncoding +import kotlin.test.assertContentEquals +import kotlin.test.assertEquals +import kotlin.test.assertFailsWith + +/** + * JVM round-trip for the FileBacked path added in PR E of #523. + * + * The gguf / safetensors loaders already produce + * `BufferHandle.FileBacked` via their `loadTensorStorageMapped` + * methods — this test pins the writer end of that path so the full + * ingestion pipeline (source file → FileBacked handle → IrpaWriter → + * `.irpa`) lands bytes unchanged. + */ +class FileBackedIrpaRoundTripTest { + + @JvmField + @Rule + val tmp: TemporaryFolder = TemporaryFolder() + + @Test + fun testFileBackedEntryBytesLandInStorageSegment() { + // Write a fake "weights file" with a known byte pattern. + // Tensor bytes live at offset 7, length 16 — deliberately a + // non-aligned offset in a file that also contains leading and + // trailing filler so any off-by-one in the mmap math shows up. + val leading = byteArrayOf(0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77) + val tensor = byteArrayOf( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16 + ) + val trailing = byteArrayOf(0x77.toByte(), 0x66, 0x55, 0x44) + val sourceFile = tmp.newFile("weights.bin") + sourceFile.writeBytes(leading + tensor + trailing) + + val ref = ExternalParameterRef( + scope = "model", + key = "w", + encoding = TensorEncoding.Dense(bytesPerElement = 1), + source = BufferHandle.FileBacked( + path = sourceFile.absolutePath, + fileOffset = leading.size.toLong(), + sizeInBytes = tensor.size.toLong() + ) + ) + + val buffer = Buffer() + IrpaWriter().write(listOf(ref), buffer) + val bytes = buffer.readByteArray() + + // Locate the storage segment via its header-relative offset + // (byte 72 in the header-block). The entire tensor region + // should appear verbatim at that offset. + val storageOffset = readU64Le(bytes, 72).toInt() + val storageLength = readU64Le(bytes, 80).toInt() + assertEquals(tensor.size, storageLength, "storage length tracks FileBacked size") + + val stored = bytes.copyOfRange(storageOffset, storageOffset + tensor.size) + assertContentEquals(tensor, stored, "mmap-transferred bytes must match source exactly") + } + + @Test + fun testFileBackedRejectsOversizedMap() { + val sourceFile = tmp.newFile("toobig.bin") + sourceFile.writeBytes(byteArrayOf(0, 0, 0, 0)) + + val ref = ExternalParameterRef( + scope = "model", + key = "huge", + encoding = TensorEncoding.Dense(bytesPerElement = 1), + // Declared size exceeds Int.MAX_VALUE — this is the guard + // rail for the single-window mmap limit. A follow-up will + // add multi-window streaming. + source = BufferHandle.FileBacked( + path = sourceFile.absolutePath, + fileOffset = 0L, + sizeInBytes = Int.MAX_VALUE.toLong() + 1L + ) + ) + + assertFailsWith { + IrpaWriter().write(listOf(ref), Buffer()) + } + } + + @Test + fun testFileBackedZeroLengthIsNoOp() { + // Edge case: a 0-byte FileBacked handle should not open the + // file, should not mmap, and should write zero bytes into the + // storage segment. Useful because an empty tensor is a + // perfectly valid degenerate case (e.g. an unused slot). + val ref = ExternalParameterRef( + scope = "model", + key = "empty", + encoding = TensorEncoding.Dense(bytesPerElement = 1), + source = BufferHandle.FileBacked( + path = "/nonexistent/path", // must not be opened + fileOffset = 0L, + sizeInBytes = 0L + ) + ) + + val buffer = Buffer() + IrpaWriter().write(listOf(ref), buffer) + val bytes = buffer.readByteArray() + + // storage.length in the header is 0. + assertEquals(0L, readU64Le(bytes, 80)) + } + + private fun readU64Le(bytes: ByteArray, offset: Int): Long { + var result = 0L + for (i in 0 until 8) { + result = result or ((bytes[offset + i].toLong() and 0xff) shl (i * 8)) + } + return result + } +} diff --git a/skainet-io/skainet-io-iree-params/src/nativeMain/kotlin/sk/ainet/io/irpa/FileBackedTransfer.native.kt b/skainet-io/skainet-io-iree-params/src/nativeMain/kotlin/sk/ainet/io/irpa/FileBackedTransfer.native.kt new file mode 100644 index 00000000..9dbd8ed4 --- /dev/null +++ b/skainet-io/skainet-io-iree-params/src/nativeMain/kotlin/sk/ainet/io/irpa/FileBackedTransfer.native.kt @@ -0,0 +1,23 @@ +package sk.ainet.io.irpa + +import kotlinx.io.Sink +import sk.ainet.lang.tensor.storage.BufferHandle + +/** + * Native actual for [writeFileBackedBytes]. Kotlin/Native can reach + * `mmap(2)` via cinterop, but wiring that up cleanly across every + * native target (iosArm64, iosSimulatorArm64, macosArm64, linuxX64, + * linuxArm64) is deferred — PR E focuses on the JVM + Android path + * which is where real inference workloads run today. + * + * Callers that hit this on native should either resolve the handle + * into an [BufferHandle.Owned] / [BufferHandle.Borrowed] before + * handing it to [IrpaWriter], or wait for native mmap support. + */ +internal actual fun writeFileBackedBytes(sink: Sink, handle: BufferHandle.FileBacked) { + throw NotImplementedError( + "FileBacked mmap transfer is not yet implemented on Kotlin/Native. " + + "See issue #523 PR E follow-up. Resolve the handle into an Owned " + + "or Borrowed buffer before writing." + ) +} diff --git a/skainet-io/skainet-io-iree-params/src/wasmJsMain/kotlin/sk/ainet/io/irpa/FileBackedTransfer.wasmjs.kt b/skainet-io/skainet-io-iree-params/src/wasmJsMain/kotlin/sk/ainet/io/irpa/FileBackedTransfer.wasmjs.kt new file mode 100644 index 00000000..90bd336c --- /dev/null +++ b/skainet-io/skainet-io-iree-params/src/wasmJsMain/kotlin/sk/ainet/io/irpa/FileBackedTransfer.wasmjs.kt @@ -0,0 +1,19 @@ +package sk.ainet.io.irpa + +import kotlinx.io.Sink +import sk.ainet.lang.tensor.storage.BufferHandle + +/** + * wasmJs actual. The wasm browser sandbox has no direct filesystem + * access, so mmap is not applicable here. Emitted `.irpa` files are + * typically produced server-side or at build time; wasmJs consumers + * should load pre-written archives rather than generate them. See + * issue #523. + */ +internal actual fun writeFileBackedBytes(sink: Sink, handle: BufferHandle.FileBacked) { + throw NotImplementedError( + "FileBacked mmap transfer is not supported on wasmJs — no filesystem " + + "access in the browser sandbox. Produce .irpa archives on JVM/Android " + + "and load them at runtime." + ) +} diff --git a/skainet-io/skainet-io-iree-params/src/wasmWasiMain/kotlin/sk/ainet/io/irpa/FileBackedTransfer.wasmwasi.kt b/skainet-io/skainet-io-iree-params/src/wasmWasiMain/kotlin/sk/ainet/io/irpa/FileBackedTransfer.wasmwasi.kt new file mode 100644 index 00000000..7908543c --- /dev/null +++ b/skainet-io/skainet-io-iree-params/src/wasmWasiMain/kotlin/sk/ainet/io/irpa/FileBackedTransfer.wasmwasi.kt @@ -0,0 +1,16 @@ +package sk.ainet.io.irpa + +import kotlinx.io.Sink +import sk.ainet.lang.tensor.storage.BufferHandle + +/** + * wasmWasi actual. WASI has filesystem primitives but no mmap syscall + * today (the preview2 proposal adds one but isn't widespread yet). + * Same deferral rationale as the Native target. See issue #523. + */ +internal actual fun writeFileBackedBytes(sink: Sink, handle: BufferHandle.FileBacked) { + throw NotImplementedError( + "FileBacked mmap transfer is not yet implemented on wasmWasi. See " + + "issue #523 PR E follow-up." + ) +}