Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package sk.ainet.io.irpa

import kotlinx.io.Sink
import kotlinx.io.write
import sk.ainet.lang.tensor.storage.BufferHandle
import java.io.RandomAccessFile
import java.nio.channels.FileChannel

/**
* Android actual for [writeFileBackedBytes]. Android's Dalvik/ART
* runtime supports the same `RandomAccessFile` + `FileChannel.map`
* surface as the desktop JVM, so the mmap implementation is byte-for-
* byte identical to the jvmMain version. Kept as a separate file
* (rather than sharing via a `jvmAndroidMain` intermediate source
* set) because this module does not yet configure a hierarchical
* source-set template.
*/
internal actual fun writeFileBackedBytes(sink: Sink, handle: BufferHandle.FileBacked) {
require(handle.sizeInBytes <= Int.MAX_VALUE.toLong()) {
"FileBacked region of ${handle.sizeInBytes} bytes exceeds Int.MAX_VALUE; " +
"multi-window mmap is not yet implemented (see issue #523 PR E follow-up). " +
"path=${handle.path} offset=${handle.fileOffset}"
}
require(handle.sizeInBytes >= 0) {
"FileBacked size must be non-negative, got ${handle.sizeInBytes}"
}
if (handle.sizeInBytes == 0L) return

RandomAccessFile(handle.path, "r").use { raf ->
raf.channel.use { channel ->
val mapped = channel.map(
FileChannel.MapMode.READ_ONLY,
handle.fileOffset,
handle.sizeInBytes
)
val chunk = ByteArray(64 * 1024)
var remaining = handle.sizeInBytes.toInt()
while (remaining > 0) {
val step = if (remaining >= chunk.size) chunk.size else remaining
mapped.get(chunk, 0, step)
sink.write(chunk, startIndex = 0, endIndex = step)
remaining -= step
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package sk.ainet.io.irpa

import kotlinx.io.Sink
import sk.ainet.lang.tensor.storage.BufferHandle

/**
* Stream the bytes behind a [BufferHandle.FileBacked] handle directly
* into [sink].
*
* The whole point of the FileBacked variant is that the tensor lives
* as a byte range in a source file — the GGUF or safetensors blob on
* disk. Under [IrpaWriter], that range blits verbatim into the
* `.irpa` archive's storage segment with no intermediate heap copy,
* no parse, and no re-quantization. PR E of issue #523 closes the
* loop on this path for real models where inline weights are
* unworkable (Whisper-tiny ≈ 151 MB text MLIR under the inline
* policy).
*
* JVM actual uses `FileChannel.map` for a true mmap window. Platforms
* without mmap support throw with a pointer to the tracking issue —
* rather than silently falling back to a slower read path that would
* undermine the "zero-copy ingestion" contract callers rely on.
*
* Implementations MUST:
* - Respect [BufferHandle.FileBacked.fileOffset] as the starting
* byte in the file, not in any mapped window.
* - Write exactly [BufferHandle.FileBacked.sizeInBytes] bytes.
* - Not flush or close [sink]; the caller manages lifecycle.
* - Close any OS resources they open (file descriptors, mapped
* regions) before returning, regardless of exceptions.
*/
internal expect fun writeFileBackedBytes(sink: Sink, handle: BufferHandle.FileBacked)
Original file line number Diff line number Diff line change
Expand Up @@ -222,21 +222,26 @@ public class IrpaWriter {
}

private fun writeBufferHandle(sink: Sink, handle: BufferHandle) {
val (data, offset, length) = when (handle) {
is BufferHandle.Owned -> Triple(handle.data, handle.offset, handle.sizeInBytes.toInt())
is BufferHandle.Borrowed -> Triple(handle.data, handle.offset, handle.sizeInBytes.toInt())
when (handle) {
is BufferHandle.Owned -> writeByteArray(sink, handle.data, handle.offset, handle.sizeInBytes.toInt())
is BufferHandle.Borrowed -> writeByteArray(sink, handle.data, handle.offset, handle.sizeInBytes.toInt())
is BufferHandle.FileBacked -> writeFileBackedBytes(sink, handle)
else -> throw IllegalArgumentException(
"IrpaWriter does not yet handle BufferHandle subclass ${handle::class.simpleName}. " +
"Only Owned/Borrowed byte-array handles are wired in PR C; mmap-backed " +
"handles land with PR E (issue #523)."
"Owned / Borrowed / FileBacked are wired. Aliased, DeviceResident, and " +
"other variants are out of scope — resolve them to one of the wired " +
"variants before handing to the writer."
)
}
// Byte-at-a-time so we do not rely on kotlinx.io's
// `Sink.write(ByteArray, Int, Int)` extension resolving on the
// raw receiver — extension overload ambiguity bit this on JVM
// in an earlier revision. Performance-critical callers should
// switch to `write(ByteArray, ...)` once that path is covered
// by a dedicated test.
}

private fun writeByteArray(sink: Sink, data: ByteArray, offset: Int, length: Int) {
// Byte-at-a-time for the same reason noted below — and because
// under the sizes we see in practice for single-op values
// (tens to a few thousand bytes) the overhead is lost in the
// wider write cost. FileBacked paths use a chunked copy on
// their platform-specific side, which is where the byte
// volume is meaningful.
for (i in offset until offset + length) {
sink.writeByte(data[i])
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package sk.ainet.io.irpa

import kotlinx.io.Sink
import kotlinx.io.write
import sk.ainet.lang.tensor.storage.BufferHandle
import java.io.RandomAccessFile
import java.nio.channels.FileChannel

/**
* JVM actual for [writeFileBackedBytes]: opens the source file
* read-only, memory-maps the declared byte range, and copies it in
* chunks into [sink].
*
* Uses direct mmap rather than a buffered stream for the usual
* reason — avoids an extra heap copy and keeps the kernel in charge
* of page-in / eviction. The chunk size (64 KiB) is a throughput
* compromise: small enough that `sink.write` does not see a
* multi-megabyte transient byte array, large enough that system-call
* overhead does not dominate.
*
* FileChannel.map cannot return a region larger than `Int.MAX_VALUE`
* (≈ 2 GiB) in a single call, so oversized handles are rejected with
* a diagnostic rather than silently truncated. Splitting into
* multiple windows is doable but out of scope for PR E; filed as
* a follow-up once a real model hits the limit.
*/
internal actual fun writeFileBackedBytes(sink: Sink, handle: BufferHandle.FileBacked) {
require(handle.sizeInBytes <= Int.MAX_VALUE.toLong()) {
"FileBacked region of ${handle.sizeInBytes} bytes exceeds Int.MAX_VALUE; " +
"multi-window mmap is not yet implemented (see issue #523 PR E follow-up). " +
"path=${handle.path} offset=${handle.fileOffset}"
}
require(handle.sizeInBytes >= 0) {
"FileBacked size must be non-negative, got ${handle.sizeInBytes}"
}
if (handle.sizeInBytes == 0L) return

RandomAccessFile(handle.path, "r").use { raf ->
raf.channel.use { channel ->
val mapped = channel.map(
FileChannel.MapMode.READ_ONLY,
handle.fileOffset,
handle.sizeInBytes
)
val chunk = ByteArray(CHUNK_SIZE)
var remaining = handle.sizeInBytes.toInt()
while (remaining > 0) {
val step = if (remaining >= CHUNK_SIZE) CHUNK_SIZE else remaining
mapped.get(chunk, 0, step)
sink.write(chunk, startIndex = 0, endIndex = step)
remaining -= step
}
}
}
}

private const val CHUNK_SIZE: Int = 64 * 1024
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
package sk.ainet.io.irpa

import kotlinx.io.Buffer
import kotlinx.io.readByteArray
import org.junit.Rule
import org.junit.Test
import org.junit.rules.TemporaryFolder
import sk.ainet.compile.hlo.ExternalParameterRef
import sk.ainet.lang.tensor.storage.BufferHandle
import sk.ainet.lang.tensor.storage.TensorEncoding
import kotlin.test.assertContentEquals
import kotlin.test.assertEquals
import kotlin.test.assertFailsWith

/**
* JVM round-trip for the FileBacked path added in PR E of #523.
*
* The gguf / safetensors loaders already produce
* `BufferHandle.FileBacked` via their `loadTensorStorageMapped`
* methods — this test pins the writer end of that path so the full
* ingestion pipeline (source file → FileBacked handle → IrpaWriter →
* `.irpa`) lands bytes unchanged.
*/
class FileBackedIrpaRoundTripTest {

@JvmField
@Rule
val tmp: TemporaryFolder = TemporaryFolder()

@Test
fun testFileBackedEntryBytesLandInStorageSegment() {
// Write a fake "weights file" with a known byte pattern.
// Tensor bytes live at offset 7, length 16 — deliberately a
// non-aligned offset in a file that also contains leading and
// trailing filler so any off-by-one in the mmap math shows up.
val leading = byteArrayOf(0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77)
val tensor = byteArrayOf(
1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16
)
val trailing = byteArrayOf(0x77.toByte(), 0x66, 0x55, 0x44)
val sourceFile = tmp.newFile("weights.bin")
sourceFile.writeBytes(leading + tensor + trailing)

val ref = ExternalParameterRef(
scope = "model",
key = "w",
encoding = TensorEncoding.Dense(bytesPerElement = 1),
source = BufferHandle.FileBacked(
path = sourceFile.absolutePath,
fileOffset = leading.size.toLong(),
sizeInBytes = tensor.size.toLong()
)
)

val buffer = Buffer()
IrpaWriter().write(listOf(ref), buffer)
val bytes = buffer.readByteArray()

// Locate the storage segment via its header-relative offset
// (byte 72 in the header-block). The entire tensor region
// should appear verbatim at that offset.
val storageOffset = readU64Le(bytes, 72).toInt()
val storageLength = readU64Le(bytes, 80).toInt()
assertEquals(tensor.size, storageLength, "storage length tracks FileBacked size")

val stored = bytes.copyOfRange(storageOffset, storageOffset + tensor.size)
assertContentEquals(tensor, stored, "mmap-transferred bytes must match source exactly")
}

@Test
fun testFileBackedRejectsOversizedMap() {
val sourceFile = tmp.newFile("toobig.bin")
sourceFile.writeBytes(byteArrayOf(0, 0, 0, 0))

val ref = ExternalParameterRef(
scope = "model",
key = "huge",
encoding = TensorEncoding.Dense(bytesPerElement = 1),
// Declared size exceeds Int.MAX_VALUE — this is the guard
// rail for the single-window mmap limit. A follow-up will
// add multi-window streaming.
source = BufferHandle.FileBacked(
path = sourceFile.absolutePath,
fileOffset = 0L,
sizeInBytes = Int.MAX_VALUE.toLong() + 1L
)
)

assertFailsWith<IllegalArgumentException> {
IrpaWriter().write(listOf(ref), Buffer())
}
}

@Test
fun testFileBackedZeroLengthIsNoOp() {
// Edge case: a 0-byte FileBacked handle should not open the
// file, should not mmap, and should write zero bytes into the
// storage segment. Useful because an empty tensor is a
// perfectly valid degenerate case (e.g. an unused slot).
val ref = ExternalParameterRef(
scope = "model",
key = "empty",
encoding = TensorEncoding.Dense(bytesPerElement = 1),
source = BufferHandle.FileBacked(
path = "/nonexistent/path", // must not be opened
fileOffset = 0L,
sizeInBytes = 0L
)
)

val buffer = Buffer()
IrpaWriter().write(listOf(ref), buffer)
val bytes = buffer.readByteArray()

// storage.length in the header is 0.
assertEquals(0L, readU64Le(bytes, 80))
}

private fun readU64Le(bytes: ByteArray, offset: Int): Long {
var result = 0L
for (i in 0 until 8) {
result = result or ((bytes[offset + i].toLong() and 0xff) shl (i * 8))
}
return result
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package sk.ainet.io.irpa

import kotlinx.io.Sink
import sk.ainet.lang.tensor.storage.BufferHandle

/**
* Native actual for [writeFileBackedBytes]. Kotlin/Native can reach
* `mmap(2)` via cinterop, but wiring that up cleanly across every
* native target (iosArm64, iosSimulatorArm64, macosArm64, linuxX64,
* linuxArm64) is deferred — PR E focuses on the JVM + Android path
* which is where real inference workloads run today.
*
* Callers that hit this on native should either resolve the handle
* into an [BufferHandle.Owned] / [BufferHandle.Borrowed] before
* handing it to [IrpaWriter], or wait for native mmap support.
*/
internal actual fun writeFileBackedBytes(sink: Sink, handle: BufferHandle.FileBacked) {
throw NotImplementedError(
"FileBacked mmap transfer is not yet implemented on Kotlin/Native. " +
"See issue #523 PR E follow-up. Resolve the handle into an Owned " +
"or Borrowed buffer before writing."
)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package sk.ainet.io.irpa

import kotlinx.io.Sink
import sk.ainet.lang.tensor.storage.BufferHandle

/**
* wasmJs actual. The wasm browser sandbox has no direct filesystem
* access, so mmap is not applicable here. Emitted `.irpa` files are
* typically produced server-side or at build time; wasmJs consumers
* should load pre-written archives rather than generate them. See
* issue #523.
*/
internal actual fun writeFileBackedBytes(sink: Sink, handle: BufferHandle.FileBacked) {
throw NotImplementedError(
"FileBacked mmap transfer is not supported on wasmJs — no filesystem " +
"access in the browser sandbox. Produce .irpa archives on JVM/Android " +
"and load them at runtime."
)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package sk.ainet.io.irpa

import kotlinx.io.Sink
import sk.ainet.lang.tensor.storage.BufferHandle

/**
* wasmWasi actual. WASI has filesystem primitives but no mmap syscall
* today (the preview2 proposal adds one but isn't widespread yet).
* Same deferral rationale as the Native target. See issue #523.
*/
internal actual fun writeFileBackedBytes(sink: Sink, handle: BufferHandle.FileBacked) {
throw NotImplementedError(
"FileBacked mmap transfer is not yet implemented on wasmWasi. See " +
"issue #523 PR E follow-up."
)
}
Loading