diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2586106b69..054cf32921 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -384,6 +384,9 @@ install (EXPORT OIIO_EXPORTED_TARGETS
 
 
 if (PROJECT_IS_TOP_LEVEL AND BUILD_TESTING AND ${PROJ_NAME}_BUILD_TESTS)
+    if (NOT BUILD_OIIOUTIL_ONLY)
+        add_subdirectory (testsuite/texture-device)
+    endif ()
     oiio_setup_test_data()
     oiio_add_all_tests()
 endif ()
diff --git a/src/cmake/testing.cmake b/src/cmake/testing.cmake
index 93aa7345e6..4ef7e2f33e 100644
--- a/src/cmake/testing.cmake
+++ b/src/cmake/testing.cmake
@@ -170,6 +170,7 @@ macro (oiio_add_all_tests)
                     cmake-consumer
                     cryptomatte
                     docs-examples-cpp
+                    texture-device
                     iinfo igrep
                     nonwhole-tiles
                     oiiotool
diff --git a/testsuite/texture-device/CMakeLists.txt b/testsuite/texture-device/CMakeLists.txt
new file mode 100644
index 0000000000..dc33933f01
--- /dev/null
+++ b/testsuite/texture-device/CMakeLists.txt
@@ -0,0 +1,78 @@
+# Copyright Contributors to the OpenImageIO project.
+# SPDX-License-Identifier: Apache-2.0
+# https://github.com/AcademySoftwareFoundation/OpenImageIO
+
+cmake_minimum_required (VERSION 3.15)
+
+if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
+    message (FATAL_ERROR
+             "texture-device must be configured from OpenImageIO top-level CMake")
+endif ()
+
+if (NOT CMAKE_BUILD_TYPE)
+    set (CMAKE_BUILD_TYPE "Release")
+endif ()
+
+set (CMAKE_CXX_STANDARD 17 CACHE STRING "C++ standard to prefer (17, 20, etc.)")
+set (CMAKE_CXX_STANDARD_REQUIRED ON)
+set (CMAKE_CXX_EXTENSIONS OFF)
+
+if (TARGET OpenImageIO AND NOT TARGET OpenImageIO::OpenImageIO)
+    add_library (OpenImageIO::OpenImageIO ALIAS OpenImageIO)
+endif ()
+
+if (NOT TARGET OpenImageIO::OpenImageIO)
+    message (FATAL_ERROR
+             "Missing target OpenImageIO::OpenImageIO; configure from OIIO root")
+endif ()
+if (NOT TARGET Imath::Imath)
+    message (FATAL_ERROR
+             "Missing target Imath::Imath; configure from OIIO root")
+endif ()
+
+# Special for OIIO testsuite when running in sanitize mode
+if (DEFINED ENV{SANITIZE})
+    add_compile_options (-fsanitize=$ENV{SANITIZE})
+    add_link_options (-fsanitize=$ENV{SANITIZE})
+endif()
+
+add_executable (texture-device
+    arena.cpp
+    blend.cpp
+    host.cpp
+    texture_loader.cpp
+    device_tests.cpp
+)
+target_link_libraries (texture-device
+    PRIVATE OpenImageIO::OpenImageIO Imath::Imath)
+
+if (MSVC)
+    target_compile_options (texture-device PRIVATE /utf-8)
+endif ()
+
+# Compile+link gate for code that is intended to remain device-safe and
+# header-only. This target must build without linking against OpenImageIO
+# libraries, so unresolved symbols are detected at link time.
+add_executable (texture-device-devicecheck
+    arena.cpp
+    blend.cpp
+    device_tests.cpp
+    devicecheck_main.cpp
+)
+if (MSVC)
+    target_compile_options (texture-device-devicecheck PRIVATE /utf-8)
+    # devicecheck intentionally does not link OIIO libs; avoid dllimport on
+    # inline OIIO types (like ustringhash) to prevent unresolved imports.
+    target_compile_definitions (texture-device-devicecheck PRIVATE OIIO_STATIC_DEFINE)
+endif ()
+target_include_directories (texture-device-devicecheck
+    PRIVATE
+        $<TARGET_PROPERTY:OpenImageIO::OpenImageIO,INTERFACE_INCLUDE_DIRECTORIES>
+)
+
+if (TARGET Imath::Imath)
+    target_include_directories (texture-device-devicecheck
+        PRIVATE
+            $<TARGET_PROPERTY:Imath::Imath,INTERFACE_INCLUDE_DIRECTORIES>
+    )
+endif ()
diff --git a/testsuite/texture-device/README.md b/testsuite/texture-device/README.md
new file mode 100644
index 0000000000..270ff80a4d
--- /dev/null
+++ b/testsuite/texture-device/README.md
@@ -0,0 +1,410 @@
+# A GPU texture system for a mock-up device
+
+This directory contains a prototype texture system for code that must be safe in a
+GPU-like kernel environment. The "device" in this test is a mock implementation,
+but the code is structured as if host and device memory were separate, with
+explicit copies between them. The demo operation is a simple blend + blur of two
+input images:
+
+| Input A | Input B | Operation on device | Result |
+|---------|---------|---------------------|--------|
+| ![grid input](img/grid.jpg) | ![checker input](img/checker.jpg) | blur + blend | ![output result](img/result.jpg) |
+
+The goal is to validate a manager/managed architecture for texture lookup,
+missing-resource requests, and retry-based execution. The host launches a kernel,
+the kernel records missing textures or tiles, and the host resolves those requests
+and relaunches until all required data is resident.
+
+The test is intentionally scalar and pragmatic. It prioritizes architecture,
+correctness, and integration points over peak performance.
+
+# The Arena and tagged_ptr framework
+
+`Host` and `MockDevice` are the concrete arenas; both implement `alloc/free`,
+`copy_to/copy_from`, and `copy_in`. `Host` tracks allocations and validates frees;
+`MockDevice` does the same and also implements `run()` for kernel dispatch.
+
+`tagged_ptr<T>` carries a lightweight context tag with every pointer. Dereference
+checks the tag against the active execution context and aborts on mismatch —
+catching accidental cross-boundary access without runtime overhead.
+
+## Optional CUDA arena sketch
+
+`arena.h`/`arena.cpp` include a guidance-only `CudaArena`.
+
+- Active only when CUDA headers are present (`TEXTURE_DEVICE_HAS_CUDA_RUNTIME`).
+- Live CUDA calls require explicit opt-in (`TEXTURE_DEVICE_ENABLE_CUDA_SKETCH_IMPL=1`).
+- `Atomic<T>` wraps CUDA atomics under `__CUDA_ARCH__`, and `std::atomic` on host.
+
+The runtime still uses `MockDevice`; `CudaArena` is a future integration sketch.
+
+# The main loop
+
+The launch model is fail-and-retry:
+
+```cpp
+// copy_to/copy_from take tagged_ptr<[const] void> to enforce context safety;
+// the snippet uses &op as shorthand for tagged_ptr<const void>(&op, "Host").
+for (int pass = 0; pass < max_passes; ++pass) {
+	textures.begin_launch();
+	device.copy_to(device_op, &op, sizeof(op));
+	device.run(width, height, &blend_kernel, device_op);
+	device.copy_from(&op, device_op, sizeof(op));
+
+	textures.sync_from_managed();
+
+	if (textures.needs_retry()) {
+		textures.sync_to_managed();
+		continue;  // queue overflow path: recollect requests
+	}
+
+	if (!textures.failures())
+		break;  // converged: all resources resident
+
+	textures.process_requests([&](const Request& req,
+	                             DTextureSystem<Host, MockDevice>* ts) {
+		return loader.process_request(req, ts);
+	});
+	textures.sync_to_managed();
+}
+```
+
+## Pass sequence at a glance
+
+```mermaid
+sequenceDiagram
+	participant H as Host
+	participant D as MockDevice
+	participant K as blend_kernel
+	participant T as DTextureSystem
+	participant L as TextureLoader
+
+	H->>D: copy_to(op)
+	H->>D: run(width, height, blend_kernel)
+	D->>K: execute pixels
+	K->>T: lookup(name_a/name_b)
+	T-->>K: sample or record request
+	H->>T: sync_from_managed()
+	alt queue overflow
+		H->>T: needs_retry() grows + clears queue
+		H->>T: sync_to_managed()
+		H->>D: relaunch pass
+	else normal request handling
+		H->>T: process_requests(...)
+		H->>L: process_request(req)
+		L-->>T: set_texture_ready / set_tile_payload
+		H->>T: sync_to_managed()
+	end
+```
+
+## Our example blend kernel
+
+```cpp
+void blend_kernel(int x, int y, tagged_ptr<void> data)
+{
+	tagged_ptr<BlendOp> op(data);
+	const float resx = static_cast<float>(op->width);
+	const float resy = static_cast<float>(op->height);
+	const float invx = 1.0f / resx;
+	const float invy = 1.0f / resy;
+	const float u    = (static_cast<float>(x) + 0.5f) * invx;
+	const float v    = (static_cast<float>(y) + 0.5f) * invy;
+
+	Vec2 duA { invx, 0.0f };
+	Vec2 dvA { 0.0f, invy };
+	Vec2 duB = 4.0f * duA;
+	Vec2 dvB = 4.0f * dvA;
+
+	RGBA A = op->texture_system.lookup(op->name_a, u, v, duA, dvA);
+	RGBA B = op->texture_system.lookup(op->name_b, u, v, duB, dvB);
+
+	if (op->texture_system.failures())
+		return;
+
+	op->output_buffer[y * op->width + x] = 0.5f * A + 0.5f * B;
+}
+```
+
+# The manager/managed data container pattern
+
+The core type is DTextureSystem<Arena, ManagedArena>.
+
+- Managed form (device view): DTextureSystem<MockDevice>
+- Manager form (host orchestration): DTextureSystem<Host, MockDevice>
+
+Below is a sketch of how the managed mirror is constructed, synced, and transferred to the device:
+```cpp
+    Host host;
+    MockDevice device;
+    tagged_ptr<void> dptr =
+	    device.alloc(sizeof(DTextureSystem<MockDevice>), "");
+    DTextureSystem<MockDevice> texsys_gpu(device);
+    DTextureSystem<Host, MockDevice> texsys_host(host, device, texsys_gpu);
+	...
+	texsys_host.sync_to_managed();
+	device.copy_to(dptr, tagged_ptr<const void>(&texsys_host, "Host"),
+                       sizeof(DTextureSystem<MockDevice>));
+```
+
+Both forms are instantiated from the same template, but they expose different
+APIs and fields through type-driven conditional declarations.
+
+- Method availability is gated with `std::enable_if` helpers (the `OPT_*`
+	macros), so host-only orchestration methods exist only when
+	`IsManager == true`.
+- Optional fields are expressed with `std::conditional_t`, allowing one
+	definition to carry manager-only state (for example, a reference to the
+	managed instance) without duplicating the whole class.
+
+This gives one shared data layout model and one lookup code path while still
+enforcing that device-side instances do not expose host-only operations.
+
+### Composition
+
+`DTextureSystem` is mostly composition of smaller manager/managed containers:
+
+- `RequestQueue` (`ClosedHashMap<Request, bool, ...>`)
+- `TextureMap` (`ClosedHashMap<uint64_t, uint32_t, ...>`)
+- `TileIndexMap` (`ClosedHashMap<TileCoords, uint32_t, ...>`)
+- `TilePool` (`Stream<TileRecord, ...>`)
+- POD arrays/counters (`m_textures`, counts, failure flags)
+
+`sync_to_managed()` calls each sub-component's own sync, then copies
+`DTextureSystem`'s plain fields (flags, records, counters). `sync_from_managed()`
+pulls request and failure state back in the reverse order.
+
+### Manager and managed ownership
+
+```mermaid
+flowchart LR
+	subgraph HostSide[Manager side on Host]
+		HM[DTextureSystem<Host, MockDevice>]
+		HMap[TextureMap]
+		HTile[TileIndexMap]
+		HPool[TilePool Stream]
+		HReq[RequestQueue]
+		HM --> HMap
+		HM --> HTile
+		HM --> HPool
+		HM --> HReq
+	end
+
+	subgraph DeviceSide[Managed side on MockDevice]
+		DM[DTextureSystem<MockDevice>]
+		DMap[TextureMap]
+		DTile[TileIndexMap]
+		DPool[TilePool Stream]
+		DReq[RequestQueue]
+		DM --> DMap
+		DM --> DTile
+		DM --> DPool
+		DM --> DReq
+	end
+
+	HM -- sync_to_managed --> DM
+	DM -- sync_from_managed --> HM
+```
+
+## ClosedHashMap
+
+ClosedHashMap is a pointerless open-addressed hash map with linear probing.
+
+Design intent:
+
+- Device-readable and device-writable in place.
+- Manager can grow and rehash as needed.
+- Atomic slot state transitions support concurrent insertion.
+
+## Stream
+
+Stream is a paged container used as a sidecar pool for large payloads (tile pixel
+data). Instead of storing large values directly in a hash map, the map stores an
+index and Stream stores the actual records.
+
+Key properties in this prototype:
+
+- Manager appends records with push_back.
+- Managed side reads by index.
+- Data is synchronized by sync_to_managed() from manager.
+- Each page holds as many elements as needed for a good page size (64 Mb).
+
+### Stream page ownership model
+
+The manager keeps a writable staging page for `push_back()` (effectively the
+current tail page). The managed side holds the full page table and reads all
+resident pages as immutable data during lookup.
+
+```mermaid
+flowchart LR
+	direction LR
+	subgraph Manager[Manager Stream on Host]
+		direction LR
+		MLast[page N]
+		MPush[push_back]
+		MPush --> MLast
+	end
+
+	subgraph Managed[Managed Stream on Device read only]
+		direction LR
+		DN[page N]
+		D1[page 1]
+		D0[page 0]
+	end
+		DRead[operator &#91i&#93]
+		DRead --> D0
+		DRead --> D1
+		DRead --> DN
+
+	MLast -- sync tail page --> DN
+```
+
+Current limitation: tile payloads are fixed to 64×64 records.
+
+This separation keeps indexing structures compact while allowing larger payload
+storage to grow independently.
+
+## DTextureSystem
+
+DTextureSystem composes the containers above into a minimal texture runtime and
+provides the device-side `lookup()` and the manager-side orchestration API.
+
+### Lookup pipeline (code-level)
+
+At a high level, `lookup(name, u, v, du, dv, rnd)` runs this sequence
+(`rnd` defaults to `-1` for deterministic behavior):
+
+1. Resolve `TextureRecord` from `TextureMap`.
+2. Build `MipAnisoFilter(texture, du, dv)`.
+3. For each selected mip level:
+	- Generate weighted taps (`generate_samples`) for that mip.
+	- Resolve taps to resident tiles via `load_tiles(...)`.
+	- If taps are resident, accumulate weighted texels from `TilePool`.
+4. If any mip had missing taps, return debug magenta for that pass.
+5. Otherwise return the accumulated filtered color.
+
+The important split is:
+- `generate_samples(...)` computes *what* to sample.
+- `load_tiles(...)` resolves *where* those samples live (or enqueues requests).
+- The accumulation loop computes the final color.
+
+### Anisotropic filtering in this prototype
+
+The filtering path is scalar and follows the OIIO-style helper flow, but with a
+pragmatic bilinear-tap implementation:
+
+1. `ellipse_axes(texture, du, dv)`
+	- Converts derivatives to texture-space footprint radii.
+	- Chooses major/minor axis and normalized axis direction.
+
+2. `compute_miplevels(texture, axes)`
+	- Uses the minor footprint radius to compute LOD.
+	- Selects `mip0`, `mip1`, and blend factor (`mip_blend`) for trilinear
+	  blending.
+
+3. `compute_ellipse_sampling(texture, axes, mip)`
+	- Computes anisotropy aspect ratio and clamps sample count to
+	  `kMaxSamples`.
+	- Places samples along the major axis span in UV space.
+
+4. `generate_samples(...)`
+	- For each anisotropic position, computes a Gaussian-like weight.
+	- Expands each position into 4 bilinear taps (`x0/x1`, `y0/y1`).
+	- Wraps coordinates, derives tile coords + local pixel coords, emits taps.
+
+5. Accumulation
+	- For each mip, accumulate `mip_weight * sample_weight * texel`.
+	- Blend across mips via the trilinear `mip_blend` weight.
+
+Experimental stochastic path (when `rnd >= 0`):
+
+- Stochastically selects one mip level instead of blending two.
+- Stochastically selects one tap from the generated sample set.
+- The default runtime path remains deterministic; the blend kernel
+  calls `lookup(...)` without supplying `rnd`.
+
+So the current implementation is "anisotropic sample placement + bilinear texel
+fetches + trilinear mip blend". It is intentionally simpler than full EWA area
+integration, but it preserves the key footprint-driven behavior for this test.
+
+## Request lifecycle (one miss to one retry)
+
+This is the most important runtime sequence to understand:
+
+1. Kernel calls `lookup(name, u, v, du, dv)`.
+2. If the texture record is unknown, `MissingTexture` is inserted in the request
+	queue and failure is marked.
+3. If the texture exists but one or more tiles are missing, each missing tile is
+	inserted as `MissingTile` and failure is marked.
+4. Kernel keeps running other pixels so the pass collects as many unique requests
+	as possible.
+5. Host calls `sync_from_managed()` and checks retry conditions.
+6. If queue overflow happened, `needs_retry()` grows the queue, clears it, and the
+	host relaunches immediately to recollect requests at larger capacity.
+7. Otherwise host walks deduplicated requests via `process_requests(...)`.
+8. `TextureLoader` resolves texture metadata and loads tile payloads into
+	`TextureMap`, `TileIndexMap`, and `TilePool`.
+9. Host calls `sync_to_managed()` and launches the next pass.
+10. When no failures remain, output is written.
+
+### Retry state machine
+
+```mermaid
+stateDiagram-v2
+		[*] --> Launch
+		Launch --> CollectRequests: kernel lookup misses
+		Launch --> Converged: no failures
+		CollectRequests --> GrowAndRelaunch: queue overflow
+		CollectRequests --> ProcessOnHost: queue has requests
+		GrowAndRelaunch --> Launch
+		ProcessOnHost --> SyncToManaged
+		SyncToManaged --> Launch
+		Converged --> WriteOutput
+		WriteOutput --> [*]
+```
+
+# Build and run
+
+The local test runner is `run.py`, which configures and builds this folder with
+CMake and then executes `texture-device`.
+
+Useful commands from repository root:
+
+1. `ctest --test-dir build -R texture-device --output-on-failure`
+2. `cd testsuite/texture-device/build && cmake --build . --target texture-device texture-device-devicecheck`
+3. `cd testsuite/texture-device/build && ./texture-device-devicecheck && ./texture-device`
+
+Expected outputs include:
+
+- out.exr
+- out.txt
+
+There is also a second executable, texture-device-devicecheck, used as a compile/link
+gate for code intended to remain device-safe and header-centric.
+
+# Current scope and limitations
+
+- Prototype-only implementation intended for design validation.
+- Scalar filtering path; not a full production texture cache.
+- Retry loop is explicit and host-driven.
+- Prioritizes clarity and portability over optimization.
+
+# Code map
+
+If you are reading the code for the first time, this order is a good path:
+
+1. host.cpp: sets up the manager side, launches the kernel, drives retry, writes
+	output.
+2. blend.h and blend.cpp: kernel entry point and blend operation structure.
+3. texture_device_decl.h and texture_device_impl.h: core DTextureSystem API and
+	lookup implementation.
+4. texture_loader.h and texture_loader.cpp: host-side request fulfillment.
+5. closed_hashmap.h and stream.h: core containers used by DTextureSystem.
+6. filtering_decl.h and filtering_impl.h: EWA footprint computation and sample
+	generation pipeline.
+7. vector_lite.h: fixed-capacity dynamic-size container used by the filtering
+	path.
+8. arena.h and tagged_ptr.h: memory and pointer-context safety infrastructure.
+9. device_tests.cpp: focused self-tests for map, request, and filtering behavior.
+
+This progression follows the runtime flow from orchestration to kernel lookup and
+then to low-level data structures.
\ No newline at end of file
diff --git a/testsuite/texture-device/SPEC.md b/testsuite/texture-device/SPEC.md
new file mode 100644
index 0000000000..ff747d8232
--- /dev/null
+++ b/testsuite/texture-device/SPEC.md
@@ -0,0 +1,334 @@
+# GPU Texture System
+
+Create a new test to prototype a new C++ texture system that can work on a GPU.
+We will call this GPU "the device", and the only requirement from it is that it can
+run a kernel written in C++. The test will create an executable following the
+pattern of the few existing tests that already do that.
+
+The purpose of the test is to simulate execution of a texture system on the device.
+For this we will write a class that models the device interface as seen from the 
+CPU. For now it will just be a fake GPU. Here's an example:
+```cpp
+class MockDevice {
+  using Kernel = void (*)(int x, int y, tagged_ptr<void> data);
+
+  tagged_ptr<void> alloc(size_t bytes, const char* purpose);
+  void free(tagged_ptr<void> p);
+  void copy_to(tagged_ptr<void> device, tagged_ptr<const void> host,
+               size_t bytes);
+  void copy_from(tagged_ptr<void> host, tagged_ptr<const void> device,
+                 size_t bytes);
+  void run(int width, int height, Kernel kernel, tagged_ptr<void> data);
+};
+```
+
+The MockDevice class is minimal. We are going to assume that what you allocate in the
+device cannot be accessed by the host and vice versa. But truly it is just a mock-up,
+`copy_to` and `copy_from` will just be `memcpy`. But we write the rest of the code as
+if these pointers were real host, GPU.
+
+
+The test is split between host orchestration (`host.cpp`) and the kernel source
+(`blend.cpp`). Both are compiled together and linked to OIIO for the main test.
+There is also a sanity-check binary where the kernel/device-side code must
+compile itself alone to an executable, even if it is useless. This separate binary:
+  * Can include any OIIO header. BUT ...
+  * It must **not** link against OIIO.
+
+And that is our way of ensuring the kernel with the device texture system can 
+actually run on a GPU. Do this as a separate build target in the `CMakeLists.txt` so
+cmake itself enforces it.
+
+
+## Test Exercise
+
+The idea of this test is to do a simple operation. A kernel will access two on-disk
+textures via file name plus u, v coordinates (with derivatives). It will do a blend
+of the two where one uses pixel size derivatives and the other uses something 4
+times bigger so we see a blur. Then the output image will be written to disk by the
+host side using OIIO.
+
+## Purpose of the test
+
+Gather all the functionality we need from OIIO to prototype the texture system so
+we can then publish it in OIIO headers one thing at a time. In the first stage, if
+something is not yet exposed to be inlined, we will copy&paste it into the test
+code. And when we are happy with the result we can start moving code to the official 
+headers inside OIIO.
+
+## General Guidelines
+### OIIO Style
+
+The common OpenImageIO coding style should be followed and a few existing headers
+and sources should be explored. We are going to be borrowing code for texture
+filtering and mip mapping, so this should be located first. Implement the full 
+EWA/anisotropic footprint pipeline (ellipse_axes, anisotropic_aspect, 
+compute_miplevels, compute_ellipse_sampling) as in `texturesys.cpp`.
+
+### Start Simple but with Full Filtering
+
+This prototype is a proof of concept and we must use simple approaches except 
+for mip mapping. The footprint computation (ellipse_axes, anisotropic_aspect, 
+compute_miplevels, compute_ellipse_sampling) should be close to the original 
+but keep the inner sampling to bilinear-only (no bicubic, no stochastic).
+
+This is the hardest part of the effort. The logic needs to be decoupled from `ImageCache` 
+and extracted without SIMD to headers in the test for inlining. You may need
+several logical headers to organize the code.
+
+### Prepare for Inlining
+
+All tooling code that has to run on the device must be inlined. For later
+incorporation in OpenImageIO we will keep it in headers inside the test. If the
+implementation is more than a few lines we will split headers in blah_decl.h and
+blah_impl.h.
+
+### SIMD vs scalar on device
+
+OIIO's existing texture filtering uses `simd::vfloat4` internally (with scalar fallbacks
+when `OIIO_SIMD` is 0). Use only scalar logic and ignore SIMD.
+
+
+# Design
+
+The device texture system uses a launch, fail and retry cycle. This means we try
+to run the kernel and if something is missing execution is aborted so the host can
+load what the kernel needs. The following data structures are needed:
+  * A device-side map from ustringhash to texture descriptor. This descriptor holds
+    data and display window, as well as mip level ranges. It will also keep a
+    pointer to the following ...
+  * Another per-texture map from tile coordinates (mip level, x and y) to the
+    pixels of the tile, that is float RGBA colors.
+  * A request queue. Requests can be missing textures (file name ustringhash) or
+    missing tiles (ustringhash plus tile coordinates).
+
+All these maps are pointerless hash tables that work on the device. Defined as a
+template on the Key and Value, they are always created, filled and resized on the
+host. Then copied in just one block to the device. The device kernel can query
+them but not modify or grow.
+
+We will use a closed hash (open addressing) with linear probing. OIIO has `hash.h`
+with `farmhash` but no pointerless hash table template. Write the closed-hash table
+from scratch but using similar patterns. If possible reuse the same hash functions.
+
+In the current prototype, the request queue is implemented as a closed hash map
+(`Request -> bool`) with dedup semantics. This keeps request insertion idempotent
+across many pixels and avoids repeated host work. The queue can overflow when full;
+on host side this triggers a grow+retry path (`needs_retry()`), which clears the
+queue and re-runs the launch to recollect missing requests at the new capacity.
+
+Note all pixels in the kernel run are executed even if one fails. We want to collect
+as many requests as possible. Concurrent access to the queue will be based on atomic
+integers. It is ok to use the CPU ones for now, but we can use a 
+`DTextureSystem::atomic_inc()` method to abstract it.
+
+
+## The tile hash map
+
+Tiles are big, so it wouldn't be efficient to store them in a closed hash table.
+Instead we will store them in a sidecar vector ordered as they come. Then the hash
+map will just store integer offsets into this table.
+
+## Example execution
+
+This is the workflow that the test will follow, starting with completely empty
+data structures:
+  1. Launches the kernel
+  2. Kernel tries to read a texture by ustringhash, u, v, etc...
+  3. Texture is missing. File a request, give up on this pixel.
+  4. Host finds the kernel failed, handle requests and update tables on device.
+  5. Repeat
+
+The host fetches the request queue after a kernel run (step 4). A failure exists if
+it is not empty.
+
+On the second run textures may be found but tiles will be missing so more requests
+will be filed for the host to satisfy until the kernel runs to completion. When
+the kernel looks up a known texture, it computes the footprint of the filter, and from
+that it decides which tiles it needs from what mip levels. Whatever is missing it's
+requested and otherwise the lookup is computed and returned.
+
+## Main APIs
+
+The texture system lives both on the host and on device. It is a templated C++ class
+with a managed (device) specialization and manager (host) specialization. It keeps:
+  * The hash map of texture descriptors
+  * A hash map from tile coordinates to tile-pool indices
+  * A paged tile pool (sidecar stream) storing tile pixel payloads
+  * The request queue
+
+All of these have identical copies on the device side. It is structured like this:
+```cpp
+template<class Arena, class ManagedArena = NullArena>
+class DTextureSystem {
+  // Device-side lookup path
+  RGBA lookup(ustringhash, float u, float v, Vec2 du, Vec2 dv);
+
+  // Host-side orchestration path (when IsManager=true)
+  bool needs_retry();
+  template<class Func> bool process_requests(Func&&);
+  void sync_to_managed();
+  void sync_from_managed();
+
+  // Resident state
+  ClosedHashMap<ustringhash, TextureRecordIndex> textures;
+  ClosedHashMap<TileCoords, TilePoolIndex>       tile_index;
+  Stream<TileRecord>                              tile_pool;
+  ClosedHashMap<Request, bool>                    requests;
+};
+
+class TextureLoader {
+  bool process_request(const Request&, DTextureSystem<Host, MockDevice>*);
+};
+
+template<class T, size_t N>
+struct vector_lite : public std::array<T, N> {
+  // Fixed-capacity storage with dynamic active size.
+  unsigned size() const;
+  void push_back(const T&);
+};
+
+struct RequestHash {
+  size_t operator()(const Request&) const;
+  static uint64_t hash_mix_u64(uint64_t h, uint64_t v);
+};
+
+struct MipSelection {
+  unsigned mip_levels[2];
+  float mip_blend;
+};
+```
+
+The key idea is that `DTextureSystem` is device data and code with host-manager
+specialization support. The implementation is split in declaration/implementation
+headers for inlining (`texture_device_decl.h` + `texture_device_impl.h`), and host
+texture resolution/payload loading is delegated to `TextureLoader`.
+
+Filtering sample generation uses a return-by-value fixed-capacity container
+(`vector_lite<Sample, kMaxSamples * 4>`) so we can keep stack-local ownership while
+still tracking runtime sample count. Tile-loading and accumulation loops consume
+the dynamic active size instead of always iterating the full capacity.
+
+OIIO's tile size defaults to 64. Use 64x64 as the hardcoded tile size and assume 
+RGBA shading values (accept RGB or RGBA input textures; use alpha=1 for RGB). The buffers for the tiles are grown and copied again 
+when new data is added. This is inefficient but good enough for a first draft. 
+Also we are using `RGBA` and `Vec2`. These should be:
+  - `RGBA` → `Imath::C4f` (`.r`, `.g`, `.b`, `.a`) 
+  - `Vec2` → `Imath::V2f` (already available via OIIO's Imath dependency).
+
+
+## Example Kernel
+
+The device runs the kernel on a grid of pixels with a tagged data pointer.
+```cpp
+void blend_kernel(int x, int y, tagged_ptr<void> data){
+  tagged_ptr<BlendOp> op(data);
+  float resx = float(op->width), resy = float(op->height);
+  float u = x / resx, v = y / resy;
+  Vec2 duA = { 1 / resx, 0 }, dvA = { 0, 1 / resy };
+  Vec2 duB = 4 * duA, dvB = 4 * dvA; // Make it blurry
+  RGBA A = op->texture_system.lookup(op->name_a, u, v, duA, dvA);
+  RGBA B = op->texture_system.lookup(op->name_b, u, v, duB, dvB);
+  // We let both lookups run to file as many requests as possible, but now we can
+  // early exit.
+  if (op->texture_system.failures())
+    return;
+  op->output_buffer[y * op->width + x] = 0.5 * A + 0.5 * B;
+}
+```
+
+The 'MockDevice::run()' method just runs this kernel for all pixels using an OIIO
+parallel loop to check that we don't run into race conditions. This method
+implementation can live in host.cpp for the mock-up, only the kernel lives in
+blend.cpp.
+
+# Steps
+
+This is a suggested rough plan
+  1. Create basic test files, cmake and cpp.
+  2. Write an implementation of a pointerless hash table.
+  3. Write unit tests within the existing test that writes, uploads and queries
+     the hash. Make sure these unit tests pass.
+  4. Write and test the launch, fail, repeat cycle.
+  5. Design the request queue implementation and also unit test it.
+  6. Gather the filtering functionality that we need for the kernel.
+  7. Finally write the blending test.
+
+`texture-device` should be added to the `oiio_add_tests()` list in 
+`src/cmake/testing.cmake` so it's discovered by CTest. This is part of step 1 
+(creating basic test files). The mentioned unit tests should be assert-based self-tests
+within the main test executable that run before the blend kernel, exiting non-zero
+on failure.
+
+For the blend test use the existing `testsuite/common/textures/grid.tx` and
+`checker.tx` (pre-MIP-mapped, already in repo). And we read everything using OIIO
+`ImageInput`. Convert to 3 channel if needed and do not clean up any generated files
+(output images, build directories) automatically, the expectation is that `run.py` 
+leaves artifacts behind for debugging. Also use exr/tif output and image comparison
+via `diff_command` in `run.py` for the blend test validation.
+
+# Codebase Analysis
+
+## Existing test patterns
+
+Only 2 test suites produce their own executables:
+- `testsuite/cmake-consumer/` -- standalone `CMakeLists.txt`, builds `consumer` target 
+  linked to OIIO, run via `run.py`
+- `testsuite/docs-examples-cpp/` -- standalone `CMakeLists.txt`, builds 8 executables
+  (`docs-examples-texturesys`, etc.), all linked to both OIIO and Imath
+
+All other 142+ tests are `runtest.py`-based, using pre-built OIIO tools (`testtex`, 
+`oiiotool`, `maketx`).
+
+The `cmake-consumer` pattern is closest to what we need: a `CMakeLists.txt` that builds 
+executables and a `run.py` that invokes cmake + runs them. Tests are registered in 
+`src/cmake/testing.cmake` via `oiio_add_tests()`.
+
+No existing test uses a `host.cpp` / `blend.cpp` split or has one target linked to OIIO 
+and another not.
+
+## Key OIIO headers and types for device-side code
+
+**GPU-safe headers already in OIIO:**
+- `OpenImageIO/ustring.h` -- `ustringhash` is header-only, stores a single `uint64_t`, 
+  fully `constexpr`/device-safe
+- `OpenImageIO/hash.h` -- `farmhash::inlined::Hash64` is `OIIO_HOSTDEVICE constexpr`
+- `OpenImageIO/fmath.h` -- `fast_sincos`, `fast_atan2`, `safe_sqrt`, `madd`, `clamp`,
+  `fast_exp`, etc. all `OIIO_HOSTDEVICE`
+- `OpenImageIO/platform.h` -- `OIIO_HOSTDEVICE`, `OIIO_DEVICE_CONSTEXPR` macros
+- `OpenImageIO/simd.h` -- `vfloat4`, `vint4`, etc. have scalar fallbacks when `OIIO_SIMD` is 0
+- `OpenImageIO/texture.h` -- `TextureOpt`, `MipMode`, `InterpMode`, `Wrap` enums (no GPU 
+   annotations, but pure data)
+
+**NOT GPU-safe:**
+- `ustring` (not `ustringhash`) -- uses global hash table with mutex/malloc
+- SIMD types with `OIIO_SIMD > 0` -- use SSE/AVX intrinsics
+
+**Types:**
+- Vec2/Vec3: OIIO wraps Imath (`Imath::V2f`, `Imath::V3f`) via `OpenImageIO/Imath.h`
+- RGBA color: `Imath::C4f` (`.r`, `.g`, `.b`, `.a`) or raw `float[4]`
+- OIIO internally uses `simd::vfloat3` (padded to 4) for color accumulation
+
+## Texture filtering code location
+
+All in `src/libtexture/texturesys.cpp`:
+- `ellipse_axes()` -- EWA footprint computation (pure math, ~40 lines)
+- `anisotropic_aspect()` -- anisotropy clamping (in `texture_pvt.h`, ~35 lines)
+- `adjust_blur()` -- sblur/tblur application
+- `compute_miplevels()` -- mip level selection with blend weights (~80 lines)
+- `compute_ellipse_sampling()` -- sample placement along major axis (~55 lines)
+- `st_to_texel()` -- coordinate to texel mapping (in `texture_pvt.h`, ~15 lines)
+- `sample_bilinear()` -- bilinear interpolation within a mip level (depends heavily on 
+  tile cache, ~200+ lines)
+- `texture_lookup()` -- full EWA anisotropic lookup, orchestrates all of the above 
+  (~230 lines)
+
+The tile cache layer (`ImageCacheImpl`, `ImageCacheFile`, `ImageCacheTile`) is the biggest 
+piece that needs adaptation for GPU.
+
+## Source textures available in repo
+
+- `testsuite/common/textures/grid.tx` -- pre-MIP-mapped grid, used by 25+ existing tests
+- `testsuite/common/textures/checker.tx` -- pre-MIP-mapped checkerboard, used by `texture-blurtube`
+- `testsuite/common/grid.tif` -- 1000x1000 plain TIFF grid, can be `maketx`'d at runtime
+- `testsuite/common/checker_with_alpha.exr` -- checkerboard with alpha
diff --git a/testsuite/texture-device/arena.cpp b/testsuite/texture-device/arena.cpp
new file mode 100644
index 0000000000..91392b7425
--- /dev/null
+++ b/testsuite/texture-device/arena.cpp
@@ -0,0 +1,313 @@
+// Copyright Contributors to the OpenImageIO project.
+// SPDX-License-Identifier: Apache-2.0
+// https://github.com/AcademySoftwareFoundation/OpenImageIO
+
+#include "arena.h"
+
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+namespace texture_device {
+
+uint64_t g_tagged_ptr_context = ptrtag("Host");
+
+namespace {
+
+    template<class AllocationMap>
+    void report_leaks(const char* owner, const AllocationMap& allocated)
+    {
+        if (allocated.empty())
+            return;
+
+        std::fprintf(stderr,
+                     "texture-device: %s leak check failed (%zu allocations)\n",
+                     owner, allocated.size());
+        for (const auto& it : allocated) {
+            const auto& rec = it.second;
+            std::fprintf(stderr, "  leak ptr=%p bytes=%zu purpose=%s\n",
+                         it.first, rec.bytes,
+                         rec.purpose ? rec.purpose : "(null purpose)");
+        }
+        std::abort();
+    }
+
+    template<class AllocationMap>
+    void tracked_free(AllocationMap& allocated, tagged_ptr<void> p,
+                      const char* owner)
+    {
+        if (!p)
+            return;
+
+        auto it = allocated.find(p.get());
+        if (it == allocated.end()) {
+            std::fprintf(
+                stderr,
+                "texture-device: invalid free ptr=%p (not allocated by %s::alloc)\n",
+                p.get(), owner);
+            std::abort();
+        }
+        allocated.erase(it);
+        std::free(p.get());
+    }
+
+}  // namespace
+
+Host::~Host() { report_leaks("Host", m_allocated); }
+
+tagged_ptr<void>
+Host::alloc(size_t bytes, const char* purpose)
+{
+    void* p = std::malloc(bytes);
+    if (!p)
+        return nullptr;
+
+    m_allocated[p] = AllocationRecord { bytes, purpose };
+    return { p, "Host" };
+}
+
+void
+Host::free(tagged_ptr<void> p)
+{
+    tracked_free(m_allocated, p, "Host");
+}
+
+void
+Host::copy_to(tagged_ptr<void> device, tagged_ptr<const void> host,
+              size_t bytes)
+{
+    if (!device || !host)
+        return;
+
+    // copy_to expects a device destination and host source.
+    if (!device.is("MockDevice") || host.is("MockDevice"))
+        std::abort();
+
+    std::memcpy(device.get(), host.get(), bytes);
+}
+
+void
+Host::copy_from(tagged_ptr<void> host, tagged_ptr<const void> device,
+                size_t bytes)
+{
+    if (!host || !device)
+        return;
+
+    // copy_from expects a host destination and device source.
+    if (host.is("MockDevice") || !device.is("MockDevice"))
+        std::abort();
+
+    std::memcpy(host.get(), device.get(), bytes);
+}
+
+void
+Host::copy_in(tagged_ptr<void> to, tagged_ptr<const void> from, size_t bytes)
+{
+    if (!to || !from)
+        return;
+
+    // copy_in expects both pointers to belong to Host.
+    if (!to.is("Host") || !from.is("Host"))
+        std::abort();
+
+    std::memcpy(to.get(), from.get(), bytes);
+}
+
+MockDevice::~MockDevice() { report_leaks("MockDevice", m_allocated); }
+
+tagged_ptr<void>
+MockDevice::alloc(size_t bytes, const char* purpose)
+{
+    void* p = std::malloc(bytes);
+    if (!p)
+        return nullptr;
+
+    m_allocated[p] = AllocationRecord { bytes, purpose };
+    return { p, "MockDevice" };
+}
+
+void
+MockDevice::free(tagged_ptr<void> p)
+{
+    tracked_free(m_allocated, p, "MockDevice");
+}
+
+void
+MockDevice::copy_to(tagged_ptr<void> device, tagged_ptr<const void> host,
+                    size_t bytes)
+{
+    if (!device || !host)
+        return;
+
+    // copy_to expects a device destination and host source.
+    if (!device.is("MockDevice") || host.is("MockDevice"))
+        std::abort();
+
+    std::memcpy(device.get(), host.get(), bytes);
+}
+
+void
+MockDevice::copy_from(tagged_ptr<void> host, tagged_ptr<const void> device,
+                      size_t bytes)
+{
+    if (!host || !device)
+        return;
+
+    // copy_from expects a host destination and device source.
+    if (host.is("MockDevice") || !device.is("MockDevice"))
+        std::abort();
+
+    std::memcpy(host.get(), device.get(), bytes);
+}
+
+void
+MockDevice::copy_in(tagged_ptr<void> to, tagged_ptr<const void> from,
+                    size_t bytes)
+{
+    if (!to || !from)
+        return;
+
+    // copy_in expects both pointers to belong to MockDevice.
+    if (!to.is("MockDevice") || !from.is("MockDevice"))
+        std::abort();
+
+    std::memcpy(to.get(), from.get(), bytes);
+}
+
+void
+MockDevice::run(int width, int height, Kernel kernel, tagged_ptr<void> data)
+{
+    struct MockDeviceExecutionGuard {
+        MockDeviceExecutionGuard()
+        {
+            g_tagged_ptr_context = ptrtag("MockDevice");
+        }
+        ~MockDeviceExecutionGuard() { g_tagged_ptr_context = ptrtag("Host"); }
+    } guard;
+
+    for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x)
+            kernel(x, y, data);
+    }
+}
+
+#if TEXTURE_DEVICE_HAS_CUDA_RUNTIME
+
+CudaArena::~CudaArena()
+{
+#    if TEXTURE_DEVICE_ENABLE_CUDA_SKETCH_IMPL
+    // FIXME: this frees tracked allocations but does not clear m_allocated,
+    // so report_leaks below will still abort if entries remain.
+    for (const auto& it : m_allocated) {
+        cudaFree(it.first);
+    }
+#    endif
+    report_leaks("CudaArena", m_allocated);
+}
+
+tagged_ptr<void>
+CudaArena::alloc(size_t bytes, const char* purpose)
+{
+#    if TEXTURE_DEVICE_ENABLE_CUDA_SKETCH_IMPL
+    void* p         = nullptr;
+    cudaError_t err = cudaMalloc(&p, bytes);
+    if (err != cudaSuccess)
+        return nullptr;
+    m_allocated[p] = AllocationRecord { bytes, purpose };
+    return { p, "CudaDevice" };
+#    else
+    (void)bytes;
+    (void)purpose;
+    return nullptr;
+#    endif
+}
+
+void
+CudaArena::free(tagged_ptr<void> p)
+{
+#    if TEXTURE_DEVICE_ENABLE_CUDA_SKETCH_IMPL
+    if (!p)
+        return;
+    auto it = m_allocated.find(p.get());
+    if (it == m_allocated.end()) {
+        std::fprintf(
+            stderr,
+            "texture-device: invalid free ptr=%p (not allocated by CudaArena::alloc)\n",
+            p.get());
+        std::abort();
+    }
+    // FIXME: check cudaFree return and surface failures.
+    cudaFree(p.get());
+    m_allocated.erase(it);
+#    else
+    (void)p;
+#    endif
+}
+
+void
+CudaArena::copy_to(tagged_ptr<void> device, tagged_ptr<const void> host,
+                   size_t bytes)
+{
+#    if TEXTURE_DEVICE_ENABLE_CUDA_SKETCH_IMPL
+    if (!device || !host)
+        return;
+    // FIXME: check cudaMemcpyAsync return and decide synchronization policy.
+    cudaMemcpyAsync(device.get(), host.get(), bytes, cudaMemcpyHostToDevice,
+                    m_stream);
+#    else
+    (void)device;
+    (void)host;
+    (void)bytes;
+#    endif
+}
+
+void
+CudaArena::copy_from(tagged_ptr<void> host, tagged_ptr<const void> device,
+                     size_t bytes)
+{
+#    if TEXTURE_DEVICE_ENABLE_CUDA_SKETCH_IMPL
+    if (!host || !device)
+        return;
+    // FIXME: check cudaMemcpyAsync return and decide synchronization policy.
+    cudaMemcpyAsync(host.get(), device.get(), bytes, cudaMemcpyDeviceToHost,
+                    m_stream);
+#    else
+    (void)host;
+    (void)device;
+    (void)bytes;
+#    endif
+}
+
+void
+CudaArena::copy_in(tagged_ptr<void> to, tagged_ptr<const void> from,
+                   size_t bytes)
+{
+#    if TEXTURE_DEVICE_ENABLE_CUDA_SKETCH_IMPL
+    if (!to || !from)
+        return;
+    // FIXME: check cudaMemcpyAsync return and decide synchronization policy.
+    cudaMemcpyAsync(to.get(), from.get(), bytes, cudaMemcpyDeviceToDevice,
+                    m_stream);
+#    else
+    (void)to;
+    (void)from;
+    (void)bytes;
+#    endif
+}
+
+void
+CudaArena::run(int width, int height, Kernel kernel, tagged_ptr<void> data)
+{
+#    if TEXTURE_DEVICE_ENABLE_CUDA_SKETCH_IMPL
+    // The current Kernel signature is host-callable. A real CUDA path needs a
+    // separate __global__ entrypoint and launch configuration.
+#    endif
+    (void)width;
+    (void)height;
+    (void)kernel;
+    (void)data;
+}
+
+#endif
+
+}  // namespace texture_device
diff --git a/testsuite/texture-device/arena.h b/testsuite/texture-device/arena.h
new file mode 100644
index 0000000000..635edfbc86
--- /dev/null
+++ b/testsuite/texture-device/arena.h
@@ -0,0 +1,189 @@
+// Copyright Contributors to the OpenImageIO project.
+// SPDX-License-Identifier: Apache-2.0
+// https://github.com/AcademySoftwareFoundation/OpenImageIO
+
+#pragma once
+
+#include <atomic>
+#include <cstddef>
+#include <cstdlib>
+#include <unordered_map>
+
+#include "tagged_ptr.h"
+
+#if defined(__has_include)
+#    if __has_include(<cuda_runtime_api.h>)
+#        include <cuda_runtime_api.h>
+#        define TEXTURE_DEVICE_HAS_CUDA_RUNTIME 1
+#    endif
+#    if __has_include(<cuda/atomic>)
+#        include <cuda/atomic>
+#    endif
+#endif
+
+#ifndef TEXTURE_DEVICE_HAS_CUDA_RUNTIME
+#    define TEXTURE_DEVICE_HAS_CUDA_RUNTIME 0
+#endif
+
+namespace texture_device {
+
+class NullArena {
+public:
+    using Kernel = void (*)(int x, int y, tagged_ptr<void> data);
+
+    // Methods an arena should implement
+    tagged_ptr<void> alloc(size_t bytes, const char* purpose);
+    void free(tagged_ptr<void> p);
+    void copy_to(tagged_ptr<void> device, tagged_ptr<const void> host,
+                 size_t bytes);
+    void copy_from(tagged_ptr<void> host, tagged_ptr<const void> device,
+                   size_t bytes);
+    void copy_in(tagged_ptr<void> to, tagged_ptr<const void> from,
+                 size_t bytes);
+    // Optionally, if we intend to run things on it
+    void run(int width, int height, Kernel kernel, tagged_ptr<void> data);
+};
+
+class Host : public NullArena {
+public:
+    ~Host();
+
+    tagged_ptr<void> alloc(size_t bytes, const char* purpose);
+    void free(tagged_ptr<void> p);
+    void copy_to(tagged_ptr<void> device, tagged_ptr<const void> host,
+                 size_t bytes);
+    void copy_from(tagged_ptr<void> host, tagged_ptr<const void> device,
+                   size_t bytes);
+    void copy_in(tagged_ptr<void> to, tagged_ptr<const void> from,
+                 size_t bytes);
+    template<typename T> struct Atomic : public std::atomic<T> {
+        using std::atomic<T>::atomic;
+        using std::atomic<T>::operator=;
+
+        T load() const
+        {
+            return std::atomic<T>::load(std::memory_order_acquire);
+        }
+        void store(const T& v)
+        {
+            std::atomic<T>::store(v, std::memory_order_release);
+        }
+        bool cas(T& expected, const T& desired)
+        {
+            return std::atomic<T>::compare_exchange_strong(
+                expected, desired, std::memory_order_acq_rel,
+                std::memory_order_acquire);
+        }
+        T fetch_add(const T& v)
+        {
+            return std::atomic<T>::fetch_add(v, std::memory_order_acq_rel);
+        }
+    };
+
+private:
+    struct AllocationRecord {
+        size_t bytes        = 0;
+        const char* purpose = nullptr;
+    };
+
+    // Record every alloc pointer with its purpose to validate frees and leaks.
+    std::unordered_map<void*, AllocationRecord> m_allocated;
+};
+
+class MockDevice : public NullArena {
+public:
+    template<typename T> using Atomic = Host::Atomic<T>;
+    using NullArena::Kernel;
+
+    ~MockDevice();
+
+    tagged_ptr<void> alloc(size_t bytes, const char* purpose);
+    void free(tagged_ptr<void> p);
+    void copy_to(tagged_ptr<void> device, tagged_ptr<const void> host,
+                 size_t bytes);
+    void copy_from(tagged_ptr<void> host, tagged_ptr<const void> device,
+                   size_t bytes);
+    void copy_in(tagged_ptr<void> to, tagged_ptr<const void> from,
+                 size_t bytes);
+    void run(int width, int height, Kernel kernel, tagged_ptr<void> data);
+
+private:
+    struct AllocationRecord {
+        size_t bytes        = 0;
+        const char* purpose = nullptr;
+    };
+
+    // Record every alloc pointer with its purpose to validate frees and leaks.
+    std::unordered_map<void*, AllocationRecord> m_allocated;
+};
+
+#if TEXTURE_DEVICE_HAS_CUDA_RUNTIME
+
+// Guidance-only sketch for a real device arena backed by CUDA runtime APIs.
+// This is intentionally not wired into the current test flow.
+class CudaArena : public NullArena {
+public:
+    template<typename T>
+#    ifdef __CUDA_ARCH__
+    struct Atomic : public cuda::atomic<T> {
+        using cuda::atomic<T>::atomic;
+        using cuda::atomic<T>::operator=;
+
+        T load() const
+        {
+            return cuda::atomic<T>::load(cuda::memory_order_acquire);
+        }
+        void store(const T& v)
+        {
+            cuda::atomic<T>::store(v, cuda::memory_order_release);
+        }
+        bool cas(T& expected, const T& desired)
+        {
+            return cuda::atomic<T>::compare_exchange_strong(
+                expected, desired, cuda::memory_order_acq_rel,
+                cuda::memory_order_acquire);
+        }
+        T fetch_add(const T& v)
+        {
+            return cuda::atomic<T>::fetch_add(v, cuda::memory_order_acq_rel);
+        }
+    };
+#    else
+    using Atomic = Host::Atomic<T>;
+#    endif
+    using NullArena::Kernel;
+
+    CudaArena() = default;
+    explicit CudaArena(cudaStream_t stream)
+        : m_stream(stream)
+    {
+    }
+
+    ~CudaArena();
+
+    tagged_ptr<void> alloc(size_t bytes, const char* purpose);
+    void free(tagged_ptr<void> p);
+    void copy_to(tagged_ptr<void> device, tagged_ptr<const void> host,
+                 size_t bytes);
+    void copy_from(tagged_ptr<void> host, tagged_ptr<const void> device,
+                   size_t bytes);
+    void copy_in(tagged_ptr<void> to, tagged_ptr<const void> from,
+                 size_t bytes);
+    void run(int width, int height, Kernel kernel, tagged_ptr<void> data);
+
+    void set_stream(cudaStream_t stream) { m_stream = stream; }
+    cudaStream_t stream() const { return m_stream; }
+
+private:
+    struct AllocationRecord {
+        size_t bytes        = 0;
+        const char* purpose = nullptr;
+    };
+
+    cudaStream_t m_stream = nullptr;
+    std::unordered_map<void*, AllocationRecord> m_allocated;
+};
+
+#endif
+
+}  // namespace texture_device
diff --git a/testsuite/texture-device/blend.cpp b/testsuite/texture-device/blend.cpp
new file mode 100644
index 0000000000..4c55060371
--- /dev/null
+++ b/testsuite/texture-device/blend.cpp
@@ -0,0 +1,36 @@
+// Copyright Contributors to the OpenImageIO project.
+// SPDX-License-Identifier: Apache-2.0
+// https://github.com/AcademySoftwareFoundation/OpenImageIO
+
+#include "blend.h"
+#include "filtering_impl.h"
+#include "texture_device_impl.h"
+
+namespace texture_device {
+
+void
+blend_kernel(int x, int y, tagged_ptr<void> data)
+{
+    tagged_ptr<BlendOp> op(data);
+    const float resx = static_cast<float>(op->width);
+    const float resy = static_cast<float>(op->height);
+    const float invx = 1.0f / resx;
+    const float invy = 1.0f / resy;
+    const float u    = (static_cast<float>(x) + 0.5f) * invx;
+    const float v    = (static_cast<float>(y) + 0.5f) * invy;
+
+    Vec2 duA { invx, 0.0f };
+    Vec2 dvA { 0.0f, invy };
+    Vec2 duB = 4.0f * duA;
+    Vec2 dvB = 4.0f * dvA;
+
+    RGBA A = op->texture_system.lookup(op->name_a, u, v, duA, dvA);
+    RGBA B = op->texture_system.lookup(op->name_b, u, v, duB, dvB);
+
+    if (op->texture_system.failures())
+        return;
+
+    op->output_buffer[y * op->width + x] = 0.5f * A + 0.5f * B;
+}
+
+}  // namespace texture_device
diff --git a/testsuite/texture-device/blend.h b/testsuite/texture-device/blend.h
new file mode 100644
index 0000000000..4804d4886e
--- /dev/null
+++ b/testsuite/texture-device/blend.h
@@ -0,0 +1,23 @@
+// Copyright Contributors to the OpenImageIO project.
+// SPDX-License-Identifier: Apache-2.0
+// https://github.com/AcademySoftwareFoundation/OpenImageIO
+
+#pragma once
+
+#include "texture_device_decl.h"
+
+namespace texture_device {
+
+struct BlendOp {
+    int width  = 0;
+    int height = 0;
+    OIIO::ustringhash name_a;
+    OIIO::ustringhash name_b;
+    DTextureSystem<MockDevice> texture_system;
+    tagged_ptr<RGBA> output_buffer = nullptr;
+};
+
+void
+blend_kernel(int x, int y, tagged_ptr<void> data);
+
+}  // namespace texture_device
diff --git a/testsuite/texture-device/closed_hashmap.h b/testsuite/texture-device/closed_hashmap.h
new file mode 100644
index 0000000000..9fd21f5540
--- /dev/null
+++ b/testsuite/texture-device/closed_hashmap.h
@@ -0,0 +1,357 @@
+// Copyright Contributors to the OpenImageIO project.
+// SPDX-License-Identifier: Apache-2.0
+// https://github.com/AcademySoftwareFoundation/OpenImageIO
+
+#pragma once
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <variant>
+
+#include "arena.h"
+
+namespace texture_device {
+
+#define OPT_FUNCT(condition, return_type) \
+    template<typename __Q = return_type> std::enable_if_t<condition, __Q>
+
+#define OPT_CONSTRUCT(condition) \
+    template<bool __C = condition, typename = std::enable_if_t<__C>>
+
+#define OPT_FIELD(condition, field_type) \
+    std::conditional_t<condition, field_type, std::monostate>
+
+template<class Key, class Value, class Hash, typename Arena,
+         typename ManagedArena = NullArena>
+class ClosedHashMap {
+    template<typename T> using Atomic = typename Arena::template Atomic<T>;
+    static constexpr bool IsManager
+        = !std::is_same<ManagedArena, NullArena>::value;
+    struct Slot;
+
+
+public:
+    template<class, class, class, typename, typename>
+    friend class ClosedHashMap;
+
+    using Managed = ClosedHashMap<Key, Value, Hash, ManagedArena>;
+
+    // Managed only functionality
+
+    OPT_CONSTRUCT(!IsManager)
+    ClosedHashMap(Arena& arena, uint32_t capacity)
+        : m_arena(&arena)
+        , m_capacity(capacity)
+        , m_size(0)
+        , m_overflowed(false)
+        , m_owner(true)
+    {
+        m_slots = m_arena->alloc(sizeof(Slot) * capacity,
+                                 "ClosedHashMap::ClosedHashMap");
+        clear();
+    }
+    OPT_CONSTRUCT(!IsManager)
+    ClosedHashMap(const ClosedHashMap& o)
+    {
+        m_arena      = o.m_arena;
+        m_slots      = o.m_slots;
+        m_capacity   = o.m_capacity;
+        m_size       = o.m_size;
+        m_overflowed = o.m_overflowed;
+        m_owner      = false;
+    }
+    OPT_CONSTRUCT(!IsManager)
+    ClosedHashMap()
+        : m_arena(nullptr)
+        , m_slots(nullptr)
+        , m_capacity(0)
+        , m_size(0)
+        , m_overflowed(false)
+        , m_owner(false)
+    {
+    }
+
+    OPT_FUNCT(!IsManager, const ClosedHashMap&)
+    operator=(const ClosedHashMap & o)
+    {
+        m_arena      = o.m_arena;
+        m_slots      = o.m_slots;
+        m_capacity   = o.m_capacity;
+        m_size       = o.m_size.load();
+        m_overflowed = o.m_overflowed.load();
+        m_owner      = false;
+        return *this;
+    }
+
+    // Manager only functionality
+
+    OPT_CONSTRUCT(IsManager)
+    ClosedHashMap(Arena& arena, ManagedArena& marena, size_t capacity,
+                  Managed& managed)
+        : m_arena(&arena)
+        , m_capacity(capacity)
+        , m_size(0)
+        , m_overflowed(false)
+        , m_owner(true)
+        , m_managed(managed)
+    {
+        // Do not assign from a temporary Managed(marena, capacity) here.
+        // Managed-side operator= intentionally sets m_owner=false (view
+        // semantics), and a temporary would free the allocated slots on
+        // destruction, leaving dangling pointers in m_managed.
+        m_managed.m_arena      = &marena;
+        m_managed.m_capacity   = m_capacity;
+        m_managed.m_size       = m_size.load();
+        m_managed.m_overflowed = m_overflowed.load();
+        m_managed.m_owner      = true;
+        m_managed.m_slots
+            = m_managed.m_arena->alloc(sizeof(Slot) * capacity,
+                                       "ClosedHashMap::ClosedHashMap");
+        m_slots = m_arena->alloc(sizeof(Slot) * capacity,
+                                 "ClosedHashMap::ClosedHashMap");
+
+        // Initialize manager-side slot states. Managed side is synchronized
+        // from manager before use.
+        clear();
+    }
+
+    OPT_FUNCT(IsManager, bool)
+    grow() { return resize(std::max(8u, m_capacity * 2u)); }
+
+    OPT_FUNCT(IsManager, void)
+    sync_to_managed()
+    {
+        m_managed.m_arena->copy_to(m_managed.m_slots, m_slots,
+                                   sizeof(Slot) * m_capacity);
+        m_managed.m_size       = m_size.load();
+        m_managed.m_overflowed = m_overflowed.load();
+    }
+    OPT_FUNCT(IsManager, void)
+    sync_from_managed()
+    {
+        m_managed.m_arena->copy_from(m_slots, m_managed.m_slots,
+                                     sizeof(Slot) * m_capacity);
+        m_size       = m_managed.m_size.load();
+        m_overflowed = m_managed.m_overflowed.load();
+    }
+
+    // Both manager and managed
+
+    ~ClosedHashMap()
+    {
+        if (m_owner)
+            m_arena->free(m_slots);
+    }
+
+    bool insert(const Key& key, const Value& value)
+    {
+        if (!m_slots || m_capacity == 0)
+            return false;
+
+        if constexpr (IsManager) {
+            const uint32_t used = m_size.load();
+            if ((uint64_t(used + 1u) * 8u) >= (uint64_t(m_capacity) * 7u)) {
+                if (!grow()) {
+                    m_overflowed.store(true);
+                    return false;
+                }
+            }
+        }
+
+        const uint64_t key_state = slot_state(key);
+        const uint32_t idx       = probe_start(key);
+        for (uint32_t i = 0; i < m_capacity; ++i) {
+            Slot& slot = m_slots[(idx + i) % m_capacity];
+
+            // A reserved slot is being written by another thread; spin until
+            // that writer publishes the final state.
+            uint64_t observed = slot.state.load();
+            while (observed == kReservedState)
+                observed = slot.state.load();
+
+            if (observed == key_state && slot.key == key) {
+                slot.value = value;
+                return true;
+            }
+
+            if (observed == kEmptyState) {
+                uint64_t expected = kEmptyState;
+                // Reserve this slot first so key/value writes are not raced by
+                // another inserter probing the same position.
+                if (!slot.state.cas(expected, kReservedState))
+                    continue;
+
+                slot.key   = key;
+                slot.value = value;
+                slot.state.store(key_state);
+                m_size.fetch_add(1u);
+                return true;
+            }
+        }
+
+        m_overflowed.store(true);
+        return false;
+    }
+
+    bool find(const Key& key, Value& value) const
+    {
+        if (!m_slots || m_capacity == 0)
+            return false;
+
+        const uint64_t key_state = slot_state(key);
+        const uint32_t idx       = probe_start(key);
+        for (uint32_t i = 0; i < m_capacity; ++i) {
+            const Slot& slot = m_slots[(idx + i) % m_capacity];
+
+            uint64_t observed = slot.state.load();
+            while (observed == kReservedState)
+                observed = slot.state.load();
+
+            if (observed == kEmptyState)
+                // Open addressing invariant: first empty slot means key is not
+                // present in this probe chain.
+                return false;
+
+            if (observed == key_state && slot.key == key) {
+                value = slot.value;
+                return true;
+            }
+        }
+        return false;
+    }
+
+    uint32_t capacity() const { return m_capacity; }
+    uint32_t size() const { return m_size.load(); }
+    bool overflowed() const { return m_overflowed.load(); }
+    bool failed() const { return size() != 0 || overflowed(); }
+
+    void clear()
+    {
+        if (!m_slots)
+            return;
+        for (uint32_t i = 0; i < m_capacity; ++i)
+            m_slots[i].state.store(kEmptyState);
+        m_size.store(0u);
+        m_overflowed.store(false);
+    }
+
+    class Iterator {
+    public:
+        Iterator(const Slot* slot, const Slot* end)
+            : m_slot(slot)
+            , m_end(end)
+        {
+            advance_to_occupied();
+        }
+
+        const Key& operator*() const { return m_slot->key; }
+
+        Iterator& operator++()
+        {
+            ++m_slot;
+            advance_to_occupied();
+            return *this;
+        }
+
+        bool operator!=(const Iterator& other) const
+        {
+            return m_slot != other.m_slot;
+        }
+
+    private:
+        void advance_to_occupied()
+        {
+            while (m_slot < m_end) {
+                const uint64_t state = m_slot->state.load();
+                if (state != kEmptyState && state != kReservedState)
+                    break;
+                ++m_slot;
+            }
+        }
+
+        const Slot* m_slot;
+        const Slot* m_end;
+    };
+
+    Iterator begin() const
+    {
+        if (!m_slots)
+            return Iterator(nullptr, nullptr);
+        return Iterator(m_slots.get(), m_slots.get() + m_capacity);
+    }
+
+    Iterator end() const
+    {
+        if (!m_slots)
+            return Iterator(nullptr, nullptr);
+        return Iterator(m_slots.get() + m_capacity, m_slots.get() + m_capacity);
+    }
+
+private:
+    struct Slot {
+        Key key {};
+        Value value {};
+        Atomic<uint64_t> state { kEmptyState };
+    };
+
+
+    static constexpr uint64_t kEmptyState    = 0;
+    static constexpr uint64_t kReservedState = 1;
+
+    static uint64_t slot_state(const Key& key)
+    {
+        uint64_t s = static_cast<uint64_t>(Hash {}(key));
+        // Reserve 0/1 for sentinel states so all occupied slots use >=2.
+        if (s == kEmptyState || s == kReservedState)
+            s += 2;
+        return s;
+    }
+
+    uint32_t probe_start(const Key& key) const
+    {
+        return static_cast<uint32_t>(slot_state(key) % m_capacity);
+    }
+
+    bool resize(uint32_t new_capacity)
+    {
+        tagged_ptr<Slot> old_slots = m_slots;
+        m_slots                    = m_arena->alloc(sizeof(Slot) * new_capacity,
+                                                    "ClosedHashMap::resize");
+        uint32_t old_capacity      = m_capacity;
+        m_capacity                 = new_capacity;
+        if constexpr (IsManager) {
+            clear();
+            // Reinsert so probe positions are rebuilt for the new capacity.
+            for (uint32_t i = 0; i < old_capacity; ++i) {
+                const Slot& slot     = old_slots[i];
+                const uint64_t state = slot.state.load();
+                if (state == kEmptyState || state == kReservedState)
+                    continue;
+                insert(slot.key, slot.value);
+            }
+            m_managed.resize(new_capacity);
+            m_managed.m_arena->copy_to(m_managed.m_slots, m_slots,
+                                       sizeof(Slot) * m_capacity);
+        }  // Otherwise reallocating slots is enough, manager will copy data
+
+        m_arena->free(old_slots);
+        return true;
+    }
+
+    Arena* m_arena;
+    tagged_ptr<Slot> m_slots;
+    uint32_t m_capacity = 0;
+    Atomic<uint32_t> m_size { 0 };
+    Atomic<bool> m_overflowed { false };
+    bool m_owner;
+    // Manager only
+    OPT_FIELD(IsManager, Managed&) m_managed;
+};
+
+}  // namespace texture_device
+
+#undef OPT_FIELD
+#undef OPT_CONSTRUCT
+#undef OPT_FUNCT
diff --git a/testsuite/texture-device/device_tests.cpp b/testsuite/texture-device/device_tests.cpp
new file mode 100644
index 0000000000..04fbc2215a
--- /dev/null
+++ b/testsuite/texture-device/device_tests.cpp
@@ -0,0 +1,186 @@
+// Copyright Contributors to the OpenImageIO project.
+// SPDX-License-Identifier: Apache-2.0
+// https://github.com/AcademySoftwareFoundation/OpenImageIO
+
+#include "filtering_impl.h"  // IWYU pragma: keep
+#include "texture_device_decl.h"
+#include "texture_device_impl.h"  // IWYU pragma: keep
+
+#include <array>
+
+namespace texture_device {
+
+template<typename Arena, typename ManagedArena>
+struct DTextureSystemTestAccess {
+    using System = DTextureSystem<Arena, ManagedArena>;
+
+    static auto ellipse_axes(System& system, const TextureRecord& texture,
+                             Vec2 du, Vec2 dv)
+    {
+        (void)system;
+        return FilteringUnitTestsAccess::ellipse_axes(texture, du, dv);
+    }
+
+    static auto compute_miplevels(System& system, const TextureRecord& texture,
+                                  const MipAnisoFilter::EllipseAxes& axes)
+    {
+        (void)system;
+        return FilteringUnitTestsAccess::compute_miplevels(texture, axes);
+    }
+
+    static auto
+    compute_ellipse_sampling(System& system, const TextureRecord& texture,
+                             const MipAnisoFilter::EllipseAxes& axes, int mip)
+    {
+        (void)system;
+        return FilteringUnitTestsAccess::compute_ellipse_sampling(texture, axes,
+                                                                  mip);
+    }
+};
+
+namespace {
+
+    struct ConstantHash {
+        size_t operator()(uint64_t) const { return 1; }
+    };
+
+}  // namespace
+
+using UnitRequestQueue = DTextureSystem<Host>::RequestQueue;
+bool
+run_device_unit_tests()
+{
+    constexpr uint64_t kGridHash    = 0x3f6b4e91u;
+    constexpr uint64_t kCheckerHash = 0x13a56d2bu;
+    constexpr uint64_t kBulkHash    = 0x67d9aa11u;
+
+    Host host;
+
+    using Map = ClosedHashMap<uint64_t, int, std::hash<uint64_t>, Host>;
+    using CollisionMap = ClosedHashMap<uint64_t, int, ConstantHash, Host>;
+
+    Map map(host, 16);
+    map.clear();
+
+    CollisionMap collision_map(host, 8);
+    collision_map.clear();
+
+    // Basic closed-hash insert/find path.
+    if (!map.insert(10, 100))
+        return false;
+    if (!map.insert(26, 260))
+        return false;
+
+    int value = 0;
+    if (!map.find(10, value) || value != 100)
+        return false;
+    if (!map.find(26, value) || value != 260)
+        return false;
+    // Missing key must not report a hit.
+    if (map.find(42, value))
+        return false;
+
+    // Force collisions into one probe chain and validate retrieval order
+    // independence.
+    if (!collision_map.insert(1, 11) || !collision_map.insert(9, 99)
+        || !collision_map.insert(17, 171))
+        return false;
+    if (!collision_map.find(17, value) || value != 171)
+        return false;
+    if (!collision_map.find(9, value) || value != 99)
+        return false;
+
+    UnitRequestQueue queue(host, DTextureSystem<Host>::kMaxRequests);
+    queue.clear();
+
+    Request req_a;
+    req_a.type              = RequestType::MissingTexture;
+    req_a.tile.texture_hash = kGridHash;
+
+    Request req_b;
+    req_b.type              = RequestType::MissingTexture;
+    req_b.tile.texture_hash = kCheckerHash;
+
+    // Request queue is deduplicating: repeated inserts of the same request
+    // should succeed but not grow queue size.
+    if (!queue.insert(req_a, true))
+        return false;
+    if (!queue.insert(req_a, true))
+        return false;
+    if (!queue.insert(req_b, true))
+        return false;
+
+    std::array<Request, DTextureSystem<Host>::kMaxRequests> bulk;
+    for (uint32_t i = 0; i < DTextureSystem<Host>::kMaxRequests; ++i) {
+        bulk[i].type = RequestType::MissingTile;
+        bulk[i].tile = TileCoords { kBulkHash, uint16_t(i), uint16_t(i / 64),
+                                    uint16_t(i % 4), 0 };
+    }
+
+    UnitRequestQueue full_queue(host, DTextureSystem<Host>::kMaxRequests);
+    full_queue.clear();
+    // Fill to capacity to exercise overflow signaling behavior.
+    for (uint32_t i = 0; i < DTextureSystem<Host>::kMaxRequests; ++i) {
+        if (!full_queue.insert(bulk[i], true))
+            return false;
+    }
+    // Existing key still succeeds when table is full.
+    if (full_queue.insert(bulk[0], true) != true)
+        return false;
+    // New key must fail once no slot remains.
+    if (full_queue.insert(Request { RequestType::MissingTile,
+                                    TileCoords { kBulkHash, 12345, 0, 0, 0 } },
+                          true))
+        return false;
+    if (!full_queue.overflowed())
+        return false;
+
+    {
+        DTextureSystem<Host> system(host);
+        using Access = DTextureSystemTestAccess<Host, NullArena>;
+        TextureRecord tex;
+        tex.width  = 1024;
+        tex.height = 1024;
+
+        // Mip transition: small derivatives should stay near base level.
+        auto axes_lo = Access::ellipse_axes(system, tex,
+                                            Vec2 { 1.0f / 1024.0f, 0.0f },
+                                            Vec2 { 0.0f, 1.0f / 1024.0f });
+        auto mips_lo = Access::compute_miplevels(system, tex, axes_lo);
+        if (mips_lo.mip_levels[0] != 0)
+            return false;
+
+        // Mip transition: larger derivatives should move to coarser levels.
+        auto axes_hi = Access::ellipse_axes(system, tex,
+                                            Vec2 { 8.0f / 1024.0f, 0.0f },
+                                            Vec2 { 0.0f, 8.0f / 1024.0f });
+        auto mips_hi = Access::compute_miplevels(system, tex, axes_hi);
+        if (mips_hi.mip_levels[0] <= mips_lo.mip_levels[0])
+            return false;
+
+        // Anisotropy extremes should increase sample count, capped by max.
+        auto axes_aniso = Access::ellipse_axes(system, tex,
+                                               Vec2 { 8.0f / 1024.0f, 0.0f },
+                                               Vec2 { 0.0f, 1.0f / 1024.0f });
+        auto ellipse = Access::compute_ellipse_sampling(system, tex, axes_aniso,
+                                                        0);
+        if (ellipse.nsamples < 2
+            || ellipse.nsamples > MipAnisoFilter::kMaxSamples)
+            return false;
+
+        // Wrap behavior regression checks.
+        bool in_range = true;
+        if (wrap_coord(-1, 8, WrapMode::Clamp, in_range) != 0 || !in_range)
+            return false;
+        if (wrap_coord(-1, 8, WrapMode::Periodic, in_range) != 7 || !in_range)
+            return false;
+        (void)wrap_coord(-1, 8, WrapMode::Black, in_range);
+        if (in_range)
+            return false;
+    }
+
+    // Dedup result for req_a + req_b should keep queue size at 2.
+    return queue.size() == 2;
+}
+
+}  // namespace texture_device
diff --git a/testsuite/texture-device/devicecheck_main.cpp b/testsuite/texture-device/devicecheck_main.cpp
new file mode 100644
index 0000000000..fc5fda25d8
--- /dev/null
+++ b/testsuite/texture-device/devicecheck_main.cpp
@@ -0,0 +1,23 @@
+// Copyright Contributors to the OpenImageIO project.
+// SPDX-License-Identifier: Apache-2.0
+// https://github.com/AcademySoftwareFoundation/OpenImageIO
+
+#include "blend.h"
+
+int
+main()
+{
+    // Keep a direct kernel call in this binary so link-time symbol resolution
+    // covers the full devicecheck path.
+    texture_device::BlendOp op;
+    op.width  = 1;
+    op.height = 1;
+    texture_device::blend_kernel(0, 0,
+                                 texture_device::tagged_ptr<void>(&op, "Host"));
+
+    // Force an observable read of kernel-mutated state to avoid DCE.
+    volatile bool kernel_recorded_failure = op.texture_system.failures();
+    (void)kernel_recorded_failure;
+
+    return texture_device::run_device_unit_tests() ? 0 : 1;
+}
diff --git a/testsuite/texture-device/filtering_decl.h b/testsuite/texture-device/filtering_decl.h
new file mode 100644
index 0000000000..7f3359c96f
--- /dev/null
+++ b/testsuite/texture-device/filtering_decl.h
@@ -0,0 +1,118 @@
+// Copyright Contributors to the OpenImageIO project.
+// SPDX-License-Identifier: Apache-2.0
+// https://github.com/AcademySoftwareFoundation/OpenImageIO
+
+#pragma once
+
+#include <OpenImageIO/Imath.h>
+
+#include <cstdint>
+
+#include "vector_lite.h"
+
+namespace texture_device {
+
+using Vec2 = Imath::V2f;
+
+struct TileCoords {
+    uint64_t texture_hash = 0;
+    uint16_t x            = 0;
+    uint16_t y            = 0;
+    uint16_t mip          = 0;
+    uint16_t pad          = 0;  // zero padded for the hash to be deterministic
+
+    bool operator==(const TileCoords& other) const
+    {
+        return texture_hash == other.texture_hash && x == other.x
+               && y == other.y && mip == other.mip;
+    }
+};
+
+struct TileRecord;
+struct TextureRecord;
+struct FilteringUnitTestsAccess;
+
+struct Filter {
+    struct Sample {
+        float weight;
+        TileCoords tcoords;
+        int local_x;
+        int local_y;
+        const TileRecord* tile;
+    };
+};
+
+struct MipAnisoFilter : public Filter {
+    static constexpr uint32_t kMaxSamples  = 8;
+    static constexpr uint32_t kMaxMipLevel = 4;
+
+    struct MipSelection {
+        unsigned mip_levels[2] = { 0, 0 };
+        float mip_blend        = 0.0f;
+    };
+
+    struct EllipseSampling {
+        Vec2 axis_uv      = Vec2(1.0f, 0.0f);
+        unsigned nsamples = 1;
+        float span_uv     = 0.0f;
+    };
+
+    struct EllipseAxes {
+        Vec2 major_uv   = Vec2(1.0f, 0.0f);
+        float major_rho = 1.0f;
+        float minor_rho = 1.0f;
+    };
+
+    const TextureRecord& texture;
+    EllipseAxes axes;
+    MipSelection mips;
+    unsigned num_mips;
+
+    MipAnisoFilter(const TextureRecord& texture, Vec2 du, Vec2 dv);
+    vector_lite<Sample, kMaxSamples * 4>
+    generate_samples(unsigned mip_i, float u, float v) const;
+
+private:
+    friend struct FilteringUnitTestsAccess;
+
+    static EllipseAxes ellipse_axes(const TextureRecord& texture, Vec2 du,
+                                    Vec2 dv);
+    static MipSelection compute_miplevels(const TextureRecord& texture,
+                                          const EllipseAxes& axes);
+    static EllipseSampling
+    compute_ellipse_sampling(const TextureRecord& texture,
+                             const EllipseAxes& axes, int mip);
+
+
+    static int floor_div(int n, int d);
+    static float anisotropic_aspect(float major_rho, float minor_rho,
+                                    int max_aniso)
+    {
+        const float safe_minor = std::max(1.0f, minor_rho);
+        return std::clamp(major_rho / safe_minor, 1.0f, float(max_aniso));
+    }
+};
+
+struct FilteringUnitTestsAccess {
+    static MipAnisoFilter::EllipseAxes
+    ellipse_axes(const TextureRecord& texture, Vec2 du, Vec2 dv)
+    {
+        return MipAnisoFilter::ellipse_axes(texture, du, dv);
+    }
+
+    static MipAnisoFilter::MipSelection
+    compute_miplevels(const TextureRecord& texture,
+                      const MipAnisoFilter::EllipseAxes& axes)
+    {
+        return MipAnisoFilter::compute_miplevels(texture, axes);
+    }
+
+    static MipAnisoFilter::EllipseSampling
+    compute_ellipse_sampling(const TextureRecord& texture,
+                             const MipAnisoFilter::EllipseAxes& axes, int mip)
+    {
+        return MipAnisoFilter::compute_ellipse_sampling(texture, axes, mip);
+    }
+};
+
+}  // namespace texture_device
\ No newline at end of file
diff --git a/testsuite/texture-device/filtering_impl.h b/testsuite/texture-device/filtering_impl.h
new file mode 100644
index 0000000000..ff2544cd30
--- /dev/null
+++ b/testsuite/texture-device/filtering_impl.h
@@ -0,0 +1,178 @@
+// Copyright Contributors to the OpenImageIO project.
+// SPDX-License-Identifier: Apache-2.0
+// https://github.com/AcademySoftwareFoundation/OpenImageIO
+
+#pragma once
+
+#include "filtering_decl.h"
+#include "texture_device_decl.h"
+#include <cmath>
+
+namespace texture_device {
+
+inline MipAnisoFilter::MipAnisoFilter(const TextureRecord& texture, Vec2 du,
+                                      Vec2 dv)
+    : texture(texture)
+{
+    axes     = ellipse_axes(texture, du, dv);
+    mips     = compute_miplevels(texture, axes);
+    num_mips = mips.mip_levels[1] == mips.mip_levels[0] ? 1 : 2;
+}
+
+inline int
+MipAnisoFilter::floor_div(int n, int d)
+{
+    int q = n / d;
+    int r = n % d;
+    if (r != 0 && ((r < 0) != (d < 0)))
+        --q;
+    return q;
+}
+
+inline vector_lite<Filter::Sample, MipAnisoFilter::kMaxSamples * 4>
+MipAnisoFilter::generate_samples(unsigned mip_i, float u, float v) const
+{
+    EllipseSampling e = compute_ellipse_sampling(texture, axes,
+                                                 mips.mip_levels[mip_i]);
+
+
+    // Precompute normalized per-sample gaussian weights along the major axis.
+    vector_lite<float, kMaxSamples> gaussian;
+    float gaussian_weight_sum = 0;
+    for (unsigned s = 0; s < e.nsamples; ++s) {
+        const float t = (s + 0.5f) / e.nsamples - 0.5f;
+        const float w = e.nsamples > 1 ? std::exp(-2.0f * t * t) : 1;
+        gaussian.push_back(w);
+        gaussian_weight_sum += w;
+    }
+    vector_lite<Sample, kMaxSamples * 4> samples;
+
+    const uint32_t mip     = mips.mip_levels[mip_i];
+    const uint32_t base_w  = std::max(1u, texture.width);
+    const uint32_t base_h  = std::max(1u, texture.height);
+    const uint32_t level_w = std::max(1u, base_w >> mip);
+    const uint32_t level_h = std::max(1u, base_h >> mip);
+
+    if (gaussian_weight_sum <= 0.0f)
+        return samples;
+
+    unsigned s = 0;
+    for (const float gaussian_sample : gaussian) {
+        const float t = (e.nsamples > 1)
+                            ? ((float(s) + 0.5f) / float(e.nsamples) - 0.5f)
+                            : 0.0f;
+        const Vec2 uv = Vec2(u, v) + (2.0f * t * e.span_uv) * e.axis_uv;
+        const float gaussian_w = gaussian_sample / gaussian_weight_sum;
+
+        const float fu = uv.x * float(level_w);
+        const float fv = uv.y * float(level_h);
+        const int x0   = int(std::floor(fu));
+        const int y0   = int(std::floor(fv));
+        const int x1   = x0 + 1;
+        const int y1   = y0 + 1;
+        const float tx = fu - float(x0);
+        const float ty = fv - float(y0);
+
+        const int tap_x[4]        = { x0, x1, x0, x1 };
+        const int tap_y[4]        = { y0, y0, y1, y1 };
+        const float bilinear_w[4] = { (1.0f - tx) * (1.0f - ty),
+                                      tx * (1.0f - ty), (1.0f - tx) * ty,
+                                      tx * ty };
+
+        // Expand each anisotropic sample position into 4 bilinear taps.
+        for (int tap = 0; tap < 4; ++tap) {
+            const float w = gaussian_w * bilinear_w[tap];
+            if (w <= 0.0f)
+                continue;
+
+            bool x_ok           = true;
+            bool y_ok           = true;
+            const int wrapped_x = wrap_coord(tap_x[tap], int(level_w),
+                                             texture.swrap, x_ok);
+            const int wrapped_y = wrap_coord(tap_y[tap], int(level_h),
+                                             texture.twrap, y_ok);
+            if (!x_ok || !y_ok)
+                continue;
+
+            const uint16_t tile_x = floor_div(wrapped_x,
+                                              int(TileRecord::kTileWidth));
+            const uint16_t tile_y = floor_div(wrapped_y,
+                                              int(TileRecord::kTileHeight));
+            const int local_x     = wrapped_x
+                                - tile_x * int(TileRecord::kTileWidth);
+            const int local_y = wrapped_y
+                                - tile_y * int(TileRecord::kTileHeight);
+
+            if (samples.size() >= samples.capacity())
+                continue;
+
+            samples.push_back({
+                w,
+                TileCoords { texture.name.hash(), tile_x, tile_y, uint16_t(mip),
+                             0 },
+                local_x,
+                local_y,
+                nullptr,
+            });
+        }
+        ++s;
+    }
+    return samples;
+}
+
+inline MipAnisoFilter::EllipseSampling
+MipAnisoFilter::compute_ellipse_sampling(const TextureRecord& texture,
+                                         const EllipseAxes& axes, int mip)
+{
+    (void)texture;
+    const float mip_scale   = 1.0f / float(1u << uint32_t(std::max(0, mip)));
+    const float aspect      = anisotropic_aspect(axes.major_rho * mip_scale,
+                                                 axes.minor_rho * mip_scale,
+                                                 MipAnisoFilter::kMaxSamples);
+    const unsigned nsamples = std::clamp(unsigned(std::ceil(aspect)), 1u,
+                                         MipAnisoFilter::kMaxSamples);
+
+    const float span_uv = 0.5f * axes.major_uv.length();
+    return EllipseSampling { axes.major_uv, nsamples, span_uv };
+}
+
+inline MipAnisoFilter::EllipseAxes
+MipAnisoFilter::ellipse_axes(const TextureRecord& texture, Vec2 du, Vec2 dv)
+{
+    const float tex_w = float(std::max(1u, texture.width));
+    const float tex_h = float(std::max(1u, texture.height));
+    const float rho_u = std::sqrt((du.x * tex_w) * (du.x * tex_w)
+                                  + (du.y * tex_h) * (du.y * tex_h));
+    const float rho_v = std::sqrt((dv.x * tex_w) * (dv.x * tex_w)
+                                  + (dv.y * tex_h) * (dv.y * tex_h));
+
+    const bool major_is_u = rho_u >= rho_v;
+    const Vec2 major_uv   = major_is_u ? du : dv;
+    const float major_rho = std::max(rho_u, rho_v);
+    const float minor_rho = std::max(1.0f, std::min(rho_u, rho_v));
+
+    const float major_uv_len = major_uv.length();
+    Vec2 axis_uv             = Vec2(1.0f, 0.0f);
+    if (major_uv_len > 0.0f)
+        axis_uv = major_uv / major_uv_len;
+
+    return EllipseAxes { axis_uv, major_rho, minor_rho };
+}
+
+inline MipAnisoFilter::MipSelection
+MipAnisoFilter::compute_miplevels(const TextureRecord& texture,
+                                  const EllipseAxes& axes)
+{
+    (void)texture;
+    const float rho = std::max(1.0f, axes.minor_rho);
+
+    const float lod     = std::max(0.0f, std::log2(rho));
+    const uint32_t mip0 = std::clamp(uint32_t(std::floor(lod)), 0u,
+                                     kMaxMipLevel);
+    const uint32_t mip1 = std::min(kMaxMipLevel, mip0 + 1);
+    const float blend   = std::clamp(lod - float(mip0), 0.0f, 1.0f);
+    return MipSelection { { mip0, mip1 }, blend };
+}
+
+
+}  // namespace texture_device
\ No newline at end of file
diff --git a/testsuite/texture-device/host.cpp b/testsuite/texture-device/host.cpp
new file mode 100644
index 0000000000..dfbe489919
--- /dev/null
+++ b/testsuite/texture-device/host.cpp
@@ -0,0 +1,144 @@
+// Copyright Contributors to the OpenImageIO project.
+// SPDX-License-Identifier: Apache-2.0
+// https://github.com/AcademySoftwareFoundation/OpenImageIO
+
+#include "arena.h"
+#include "blend.h"
+#include "texture_device_impl.h"  // IWYU pragma: keep
+#include "texture_loader.h"
+
+#include <OpenImageIO/imageio.h>
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+using OIIO::ImageOutput;
+using OIIO::ImageSpec;
+using OIIO::TypeDesc;
+using OIIO::ustringhash;
+using texture_device::blend_kernel;
+using texture_device::BlendOp;
+using texture_device::DTextureSystem;
+using texture_device::Host;
+using texture_device::MockDevice;
+using texture_device::Request;
+using texture_device::RGBA;
+using texture_device::run_device_unit_tests;
+using texture_device::tagged_ptr;
+using texture_device::TextureLoader;
+
+namespace {
+
+bool
+write_output(const std::string& filename, int width, int height,
+             const std::vector<RGBA>& pixels)
+{
+    auto out = ImageOutput::create(filename);
+    if (!out)
+        return false;
+
+    ImageSpec spec(width, height, 4, TypeDesc::FLOAT);
+    if (!out->open(filename, spec))
+        return false;
+
+    bool ok = out->write_image(TypeDesc::FLOAT, pixels.data());
+    ok      = out->close() && ok;
+    return ok;
+}
+
+}  // namespace
+
+int
+main()
+{
+    if (!run_device_unit_tests()) {
+        std::cout << "texture-device: unit-tests-failed\n";
+        return 2;
+    }
+
+    const int width  = 256;
+    const int height = 256;
+
+    Host host;
+    MockDevice device;
+    TextureLoader loader;
+
+    BlendOp op;
+    DTextureSystem<Host, MockDevice> textures(host, device, op.texture_system);
+    static constexpr const char* kTextureSearchPaths[] = {
+        "../common/textures",
+        "../../../testsuite/common/textures",
+        "testsuite/common/textures",
+    };
+    for (const char* path : kTextureSearchPaths)
+        loader.add_texture_path(path);
+    textures.request_queue().clear();
+
+    std::vector<RGBA> output(width * height, RGBA(0.0f, 0.0f, 0.0f, 0.0f));
+
+    op.width         = width;
+    op.height        = height;
+    op.name_a        = ustringhash("grid.tx");
+    op.name_b        = ustringhash("checker.tx");
+    op.output_buffer = nullptr;
+    textures.sync_to_managed();
+
+    const size_t output_bytes      = output.size() * sizeof(RGBA);
+    tagged_ptr<void> device_output = device.alloc(output_bytes,
+                                                  "host::output_buffer");
+    tagged_ptr<void> device_op     = device.alloc(sizeof(BlendOp),
+                                                  "host::blend_op");
+    device.copy_to(device_output, tagged_ptr<const void>(output.data(), "Host"),
+                   output_bytes);
+
+    op.output_buffer = device_output;
+
+    constexpr int max_passes = 8;
+    int completed_passes     = 0;
+    bool converged           = false;
+    for (int pass = 0; pass < max_passes; ++pass) {
+        textures.begin_launch();
+        device.copy_to(device_op, tagged_ptr<const void>(&op, "Host"),
+                       sizeof(op));
+        device.run(width, height, &blend_kernel, device_op);
+        device.copy_from(tagged_ptr<void>(&op, "Host"), device_op, sizeof(op));
+        ++completed_passes;
+
+        textures.sync_from_managed();
+        if (textures.needs_retry()) {
+            textures.sync_to_managed();
+            continue;
+        }
+
+        if (!textures.failures()) {
+            converged = true;
+            break;
+        }
+
+        textures.process_requests(
+            [&](const Request& req, DTextureSystem<Host, MockDevice>* ts) {
+                return loader.process_request(req, ts);
+            });
+
+        textures.sync_to_managed();
+    }
+
+    if (!converged)
+        std::cout << "texture-device: retry-limit-hit\n";
+
+    device.copy_from(tagged_ptr<void>(output.data(), "Host"), device_output,
+                     output_bytes);
+
+    (void)write_output("out.exr", width, height, output);
+
+    device.free(device_op);
+    device.free(device_output);
+
+    std::cout << "texture-device: startup-ok\n";
+    std::cout << "texture-device: passes=" << completed_passes << "\n";
+    std::cout << "texture-device: requests=" << textures.request_queue().size()
+              << "\n";
+    std::cout << "texture-device: wrote out.exr\n";
+    return 0;
+}
diff --git a/testsuite/texture-device/img/checker.jpg b/testsuite/texture-device/img/checker.jpg
new file mode 100644
index 0000000000..cd245d41f3
Binary files /dev/null and b/testsuite/texture-device/img/checker.jpg differ
diff --git a/testsuite/texture-device/img/grid.jpg b/testsuite/texture-device/img/grid.jpg
new file mode 100644
index 0000000000..4f7427c1a2
Binary files /dev/null and b/testsuite/texture-device/img/grid.jpg differ
diff --git a/testsuite/texture-device/img/result.jpg b/testsuite/texture-device/img/result.jpg
new file mode 100644
index 0000000000..82091c2d82
Binary files /dev/null and b/testsuite/texture-device/img/result.jpg differ
diff --git a/testsuite/texture-device/ref/out.exr b/testsuite/texture-device/ref/out.exr
new file mode 100644
index 0000000000..539b820f8e
Binary files /dev/null and b/testsuite/texture-device/ref/out.exr differ
diff --git a/testsuite/texture-device/ref/out.txt b/testsuite/texture-device/ref/out.txt
new file mode 100644
index 0000000000..761d3bb207
--- /dev/null
+++ b/testsuite/texture-device/ref/out.txt
@@ -0,0 +1,4 @@
+texture-device: startup-ok
+texture-device: passes=3
+texture-device: requests=0
+texture-device: wrote out.exr
diff --git a/testsuite/texture-device/run.py b/testsuite/texture-device/run.py
new file mode 100644
index 0000000000..9a34b2a8c1
--- /dev/null
+++ b/testsuite/texture-device/run.py
@@ -0,0 +1,11 @@
+#!/usr/bin/env python
+
+# Copyright Contributors to the OpenImageIO project.
+# SPDX-License-Identifier: Apache-2.0
+# https://github.com/AcademySoftwareFoundation/OpenImageIO
+
+# texture-device is built by top-level CMake via add_subdirectory, so
+# tests only need to run the already-built executable.
+command += run_app(oiio_app("texture-device").strip())
+
+outputs = [ "out.txt", "out.exr" ]
diff --git a/testsuite/texture-device/stream.h b/testsuite/texture-device/stream.h
new file mode 100644
index 0000000000..d1025c59f1
--- /dev/null
+++ b/testsuite/texture-device/stream.h
@@ -0,0 +1,264 @@
+// Copyright Contributors to the OpenImageIO project.
+// SPDX-License-Identifier: Apache-2.0
+// https://github.com/AcademySoftwareFoundation/OpenImageIO
+
+#pragma once
+
+#include <algorithm>
+#include <array>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <type_traits>
+#include <variant>
+
+#include "arena.h"
+
+namespace texture_device {
+
+#define OPT_FUNCT(condition, return_type) \
+    template<typename __Q = return_type> std::enable_if_t<condition, __Q>
+
+#define OPT_CONSTRUCT(condition) \
+    template<bool __C = condition, typename = std::enable_if_t<__C>>
+
+#define OPT_FIELD(condition, field_type) \
+    std::conditional_t<condition, field_type, std::monostate>
+
+template<class T, typename Arena, typename ManagedArena = NullArena>
+class Stream {
+public:
+    template<class, typename, typename> friend class Stream;
+
+    static constexpr bool IsManager
+        = !std::is_same<ManagedArena, NullArena>::value;
+    using Managed = Stream<T, ManagedArena>;
+
+    // Managed side functionality
+
+    OPT_FUNCT(!IsManager, T&)
+    operator[](uint32_t i)
+    {
+        assert(i < m_size);
+        const uint32_t page_index = i / kPageSize;
+        const uint32_t off        = i % kPageSize;
+        assert(page_index < num_pages());
+        tagged_ptr<Page> page = m_pages[page_index];
+        return (*page)[off];
+    }
+
+    OPT_FUNCT(!IsManager, const T&)
+    operator[](uint32_t i) const
+    {
+        assert(i < m_size);
+        const uint32_t page_index = i / kPageSize;
+        const uint32_t off        = i % kPageSize;
+        assert(page_index < num_pages());
+        tagged_ptr<Page> page = m_pages[page_index];
+        return (*page)[off];
+    }
+
+    OPT_CONSTRUCT(!IsManager)
+    Stream()
+        : m_pages(nullptr)
+        , m_page_capacity(0)
+        , m_size(0)
+        , m_owner(false)
+        , m_arena(nullptr)
+    {
+    }
+
+    OPT_CONSTRUCT(!IsManager)
+    Stream(Arena& arena)
+        : m_pages(nullptr)
+        , m_page_capacity(0)
+        , m_size(0)
+        , m_owner(false)
+        , m_arena(&arena)
+    {
+    }
+
+    OPT_CONSTRUCT(!IsManager)
+    Stream(const Stream& o)
+        : m_pages(o.m_pages)
+        , m_page_capacity(o.m_page_capacity)
+        , m_size(o.m_size)
+        , m_owner(false)
+        , m_arena(o.m_arena)
+    {
+    }
+
+    OPT_FUNCT(!IsManager, const Stream&)
+    operator=(const Stream & o)
+    {
+        m_pages         = o.m_pages;
+        m_page_capacity = o.m_page_capacity;
+        m_size          = o.m_size;
+        m_owner         = false;
+        m_arena         = o.m_arena;
+        return *this;
+    }
+
+    // Manager side functionality
+
+    OPT_CONSTRUCT(IsManager)
+    Stream(Arena& arena, ManagedArena& marena, Managed& managed)
+        : m_pages(nullptr)
+        , m_page_capacity(0)
+        , m_size(0)
+        , m_owner(true)  // Manager always owns
+        , m_arena(&arena)
+        , m_managed(managed)
+        , m_staging_page(nullptr)
+        , m_staging_index(0)
+    {
+        // Initialize managed-side storage explicitly; assignment would force
+        // m_owner=false and break teardown invariants when no pages are grown.
+        m_managed.m_pages         = nullptr;
+        m_managed.m_page_capacity = 0;
+        m_managed.m_size          = 0;
+        m_managed.m_owner         = false;
+        m_managed.m_arena         = &marena;
+        m_staging_page = m_arena->alloc(sizeof(Page), "Stream::Stream()");
+    }
+
+    ~Stream()
+    {
+        if (m_owner) {
+            if constexpr (IsManager) {
+                for (uint32_t i = 0, end = num_pages(); i < end; ++i)
+                    m_managed.m_arena->free(m_pages[i]);
+                assert(!m_managed.m_owner);
+                m_managed.m_arena->free(m_managed.m_pages);
+                m_managed.m_pages = nullptr;
+                m_arena->free(m_staging_page);
+                m_staging_page = nullptr;
+            }
+            m_arena->free(m_pages);
+        }
+    }
+
+    OPT_FUNCT(IsManager, void)
+    clear()
+    {
+        if (m_pages) {
+            uint32_t npages = num_pages();
+            for (uint32_t i = 0; i < npages; ++i) {
+                m_managed.m_arena->free(m_pages[i]);
+                m_pages[i] = nullptr;
+            }
+            if (m_managed.m_pages && m_page_capacity) {
+                m_managed.m_arena->copy_to(m_managed.m_pages, m_pages,
+                                           sizeof(tagged_ptr<Page>)
+                                               * m_page_capacity);
+            }
+        }
+        m_staging_index = m_size = m_managed.m_size = 0;
+    }
+
+    OPT_FUNCT(IsManager, uint32_t)
+    push_back(const T& value)
+    {
+        // When a page fills, flush the staging page into managed-visible
+        // storage before starting the next one.
+        if (m_size && (m_size % kPageSize) == 0)
+            sync_stage();
+        uint32_t req_page_capacity = (m_size + kPageSize) / kPageSize;
+        if (req_page_capacity > m_page_capacity)
+            grow_page_capacity();
+        uint32_t page = req_page_capacity - 1;
+        if (!m_pages[page]) {
+            // Pages are allocated in managed arena memory so device-side reads
+            // can index them directly after sync.
+            m_pages[page] = m_managed.m_arena->alloc(sizeof(Page),
+                                                     "Stream::push_back");
+            m_managed.m_arena->copy_to(m_managed.m_pages, m_pages,
+                                       sizeof(tagged_ptr<Page>)
+                                           * m_page_capacity);
+            m_staging_index = 0;
+        }
+        (*m_staging_page)[m_staging_index] = value;
+        m_size++;
+        m_staging_index = m_size % kPageSize;
+        return m_size - 1;
+    }
+
+    OPT_FUNCT(IsManager, void)
+    sync_to_managed()
+    {
+        // Only the mutable tail page needs to be flushed; sealed pages are
+        // already synchronized when they are completed.
+        if (m_size != m_managed.m_size)
+            sync_stage();
+    }
+    OPT_FUNCT(IsManager, void)
+    sync_from_managed() {}
+
+private:
+    static constexpr uint32_t kPageSize = uint32_t((64ull * 1024ull * 1024ull)
+                                                   / sizeof(T));  // 64MB pages
+    static_assert(kPageSize > 0, "Stream page size must be non-zero");
+
+    using Page = std::array<T, kPageSize>;
+
+    uint32_t num_pages() const
+    {
+        return m_size / kPageSize + (m_size % kPageSize ? 1 : 0);
+    }
+
+    size_t grow_page_capacity()
+    {
+        // Both manager and managed
+        uint32_t newcap = m_page_capacity ? m_page_capacity * 2 : 2;
+        tagged_ptr<tagged_ptr<Page>> newp
+            = m_arena->alloc(sizeof(tagged_ptr<Page>) * newcap,
+                             "Stream::grow_page_capacity");
+        // Ensure newly allocated pointer slots start null.
+        std::fill_n(newp.get(), newcap, tagged_ptr<Page>(nullptr));
+        m_arena->copy_in(newp, m_pages,
+                         sizeof(tagged_ptr<Page>) * m_page_capacity);
+        m_page_capacity = newcap;
+        if (m_owner)
+            m_arena->free(m_pages);
+        m_pages = newp;
+        m_owner = true;
+        // Managed only
+        if constexpr (IsManager) {
+            m_managed.grow_page_capacity();
+            m_managed.m_owner = false;
+            assert(m_page_capacity == m_managed.m_page_capacity);
+        }
+        return m_page_capacity;
+    }
+
+    void sync_stage()
+    {
+        if (m_size == 0)
+            return;
+        const uint32_t last_page = (m_size - 1) / kPageSize;
+        assert(last_page < num_pages());
+        // The staging page mirrors the current tail page and is copied as a
+        // whole page for simplicity.
+        m_managed.m_arena->copy_to(m_pages[last_page], m_staging_page,
+                                   sizeof(Page));
+        m_managed.m_size = m_size;
+    }
+
+    // Both managed and manager
+    tagged_ptr<tagged_ptr<Page>> m_pages;
+    uint32_t m_page_capacity;
+    uint32_t m_size;
+    bool m_owner;
+    Arena* m_arena;
+
+    // Manager only
+    OPT_FIELD(IsManager, Managed&) m_managed;
+    OPT_FIELD(IsManager, tagged_ptr<Page>) m_staging_page;
+    OPT_FIELD(IsManager, uint32_t) m_staging_index;
+};
+
+}  // namespace texture_device
+
+#undef OPT_FIELD
+#undef OPT_CONSTRUCT
+#undef OPT_FUNCT
diff --git a/testsuite/texture-device/tagged_ptr.h b/testsuite/texture-device/tagged_ptr.h
new file mode 100644
index 0000000000..8fd4868a6d
--- /dev/null
+++ b/testsuite/texture-device/tagged_ptr.h
@@ -0,0 +1,171 @@
+// Copyright Contributors to the OpenImageIO project.
+// SPDX-License-Identifier: Apache-2.0
+// https://github.com/AcademySoftwareFoundation/OpenImageIO
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <type_traits>
+
+#include <OpenImageIO/strutil.h>
+
+namespace texture_device {
+
+#define OPT_FUNCT(condition, return_type) \
+    template<typename __Q = return_type> std::enable_if_t<condition, __Q>
+
+extern uint64_t g_tagged_ptr_context;
+
+inline uint64_t
+ptrtag(const char* s)
+{
+    if (!s || !s[0])
+        return 0;
+    return OIIO::Strutil::strhash64(std::strlen(s), s);
+}
+
+template<size_t N>
+inline constexpr uint64_t
+ptrtag(const char (&s)[N])
+{
+    static_assert(N > 1, "tag literal must be non-empty");
+    return OIIO::Strutil::strhash64(N - 1, s);
+}
+
+template<class T> class tagged_ptr {
+public:
+    using element_type            = T;
+    static constexpr bool IsVoid  = std::is_void<T>::value;
+    static constexpr bool IsConst = std::is_const<T>::value;
+    using ElementRef              = std::add_lvalue_reference_t<T>;
+    using ElementPtr              = std::add_pointer_t<T>;
+
+    tagged_ptr() = default;
+    tagged_ptr(std::nullptr_t)
+        : m_ptr(nullptr)
+        , m_tag(0)
+    {
+    }
+
+    template<class U,
+             class = std::enable_if_t<std::is_convertible<U*, T*>::value>>
+    tagged_ptr(U* p) = delete;
+
+    template<class U,
+             class = std::enable_if_t<std::is_convertible<U*, T*>::value>>
+    tagged_ptr(U* p, const char* context_tag)
+        : m_ptr(p)
+        , m_tag(ptrtag(context_tag))
+    {
+    }
+
+    template<class U,
+             class = std::enable_if_t<std::is_convertible<U*, T*>::value>>
+    tagged_ptr(const tagged_ptr<U>& other)
+        : m_ptr(other.get())
+        , m_tag(other.tag())
+    {
+    }
+
+    template<class U,
+             class = std::enable_if_t<std::is_convertible<U*, T*>::value>>
+    tagged_ptr& operator=(const tagged_ptr<U>& other)
+    {
+        m_ptr = other.get();
+        m_tag = other.tag();
+        return *this;
+    }
+
+    tagged_ptr(const tagged_ptr<void>& other)
+        : m_ptr(static_cast<T*>(other.get()))
+        , m_tag(other.tag())
+    {
+    }
+
+    OPT_FUNCT(!IsVoid, tagged_ptr&)
+    operator=(const tagged_ptr<void>& other)
+    {
+        m_ptr = static_cast<T*>(other.get());
+        m_tag = other.tag();
+        return *this;
+    }
+
+    tagged_ptr(const tagged_ptr<const void>& other)
+        : m_ptr(static_cast<T*>(other.get()))
+        , m_tag(other.tag())
+    {
+    }
+
+    OPT_FUNCT(!IsVoid && IsConst, tagged_ptr&)
+    operator=(const tagged_ptr<const void>& other)
+    {
+        m_ptr = static_cast<T*>(other.get());
+        m_tag = other.tag();
+        return *this;
+    }
+
+    T* get() const { return m_ptr; }
+
+    explicit operator bool() const { return m_ptr != nullptr; }
+
+    bool operator==(std::nullptr_t) const { return m_ptr == nullptr; }
+    bool operator!=(std::nullptr_t) const { return m_ptr != nullptr; }
+
+    bool operator==(const tagged_ptr<T>& other) const
+    {
+        return m_ptr == other.get() && m_tag == other.tag();
+    }
+
+    bool operator!=(const tagged_ptr<T>& other) const
+    {
+        return !(*this == other);
+    }
+
+    bool is(uint64_t tag) const { return m_tag == tag; }
+    bool is(const char* context_tag) const
+    {
+        return m_tag == ptrtag(context_tag);
+    }
+    uint64_t tag() const { return m_tag; }
+
+    OPT_FUNCT(!IsVoid, ElementRef)
+    operator*() const
+    {
+        check_deref_allowed();
+        return *m_ptr;
+    }
+
+    OPT_FUNCT(!IsVoid, ElementPtr)
+    operator->() const
+    {
+        check_deref_allowed();
+        return m_ptr;
+    }
+
+    OPT_FUNCT(!IsVoid, ElementRef)
+    operator[](size_t i) const
+    {
+        check_deref_allowed();
+        return m_ptr[i];
+    }
+
+private:
+    template<class U> friend class tagged_ptr;
+
+    void check_deref_allowed() const
+    {
+        // Enforce explicit host/device context boundaries at dereference time.
+        if (m_tag != g_tagged_ptr_context)
+            std::abort();
+    }
+
+    T* m_ptr       = nullptr;
+    uint64_t m_tag = 0;
+};
+
+}  // namespace texture_device
+
+#undef OPT_FUNCT
diff --git a/testsuite/texture-device/texture_device_decl.h b/testsuite/texture-device/texture_device_decl.h
new file mode 100644
index 0000000000..90abdeaf9e
--- /dev/null
+++ b/testsuite/texture-device/texture_device_decl.h
@@ -0,0 +1,271 @@
+// Copyright Contributors to the OpenImageIO project.
+// SPDX-License-Identifier: Apache-2.0
+// https://github.com/AcademySoftwareFoundation/OpenImageIO
+
+#pragma once
+
+#include <OpenImageIO/Imath.h>
+#include <OpenImageIO/hash.h>
+#include <OpenImageIO/ustring.h>
+
+#include <array>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+#include <type_traits>
+#include <vector>
+
+#include "arena.h"
+#include "closed_hashmap.h"
+#include "filtering_decl.h"
+#include "stream.h"
+
+namespace texture_device {
+
+#define OPT_FUNCT(condition, return_type) \
+    template<typename __Q = return_type> std::enable_if_t<condition, __Q>
+
+#define OPT_CONSTRUCT(condition) \
+    template<bool __C = condition, typename = std::enable_if_t<__C>>
+
+#define OPT_FIELD(condition, field_type) \
+    std::conditional_t<condition, field_type, std::monostate>
+
+#define OPT_FUNCT_DECL(condition) \
+    template<bool __C = condition, typename = std::enable_if_t<__C>>
+
+using RGBA = Imath::C4f;
+using Vec2 = Imath::V2f;
+
+struct TileCoordsHash {
+    size_t operator()(const TileCoords& c) const;
+};
+
+enum class RequestType : uint8_t { MissingTexture, MissingTile };
+
+struct Request {
+    RequestType type = RequestType::MissingTexture;
+    TileCoords tile;
+
+    bool operator==(const Request& other) const;
+};
+
+struct RequestHash {
+    size_t operator()(const Request& req) const;
+    static uint64_t hash_mix_u64(uint64_t h, uint64_t v);
+};
+
+enum class WrapMode : uint8_t { Clamp, Periodic, Black };
+
+int
+wrap_coord(int coord, int size, WrapMode mode, bool& in_range);
+
+struct TileRecord {
+    static constexpr uint32_t kTileWidth  = 64;
+    static constexpr uint32_t kTileHeight = 64;
+    TileCoords tile;
+    std::array<RGBA, kTileWidth * kTileHeight> pixels {};
+};
+
+struct TextureRecord {
+    OIIO::ustringhash name;
+    bool ready      = false;
+    uint32_t width  = 0;
+    uint32_t height = 0;
+    WrapMode swrap  = WrapMode::Clamp;
+    WrapMode twrap  = WrapMode::Clamp;
+
+    void reset(OIIO::ustringhash texture_name)
+    {
+        name   = texture_name;
+        ready  = false;
+        width  = 0;
+        height = 0;
+        swrap  = WrapMode::Periodic;
+        twrap  = WrapMode::Periodic;
+    }
+};
+
+template<typename Arena, typename ManagedArena> struct DTextureSystemTestAccess;
+
+template<typename Arena, typename ManagedArena = NullArena>
+class DTextureSystem {
+public:
+    template<typename, typename> friend class DTextureSystem;
+    template<typename, typename> friend struct DTextureSystemTestAccess;
+
+    template<typename T> using Atomic = typename Arena::template Atomic<T>;
+    using Managed                     = DTextureSystem<ManagedArena>;
+    static constexpr bool IsManager
+        = !std::is_same<ManagedArena, NullArena>::value;
+
+    static constexpr uint32_t kMaxResidentTextures = 32;
+    static constexpr uint32_t kMaxResidentTiles    = 2048;
+    static constexpr uint32_t kMaxTilesPerTexture  = 256;
+    static constexpr uint32_t kMaxRequests         = 1024;
+    static constexpr uint32_t kTileWidth           = TileRecord::kTileWidth;
+    static constexpr uint32_t kTileHeight          = TileRecord::kTileHeight;
+
+    using RequestQueue
+        = ClosedHashMap<Request, bool, RequestHash, Arena, ManagedArena>;
+    using TextureMap = ClosedHashMap<uint64_t, uint32_t, std::hash<uint64_t>,
+                                     Arena, ManagedArena>;
+
+    // Managed only functionality
+
+    OPT_CONSTRUCT(!IsManager)
+    DTextureSystem()
+        : m_failed(false)
+        , m_texture_count(0)
+        , m_tile_count(0)
+    {
+    }
+
+    OPT_CONSTRUCT(!IsManager)
+    DTextureSystem(Arena& arena)
+        : m_queue(arena, kMaxRequests)
+        , m_failed(false)
+        , m_texture_count(0)
+        , m_texture_lookup(arena, kMaxResidentTextures)
+        , m_tile_pool(arena)
+        , m_tile_index(arena, kMaxResidentTiles)
+        , m_tile_count(0)
+    {
+        std::fill(std::begin(m_textures), std::end(m_textures),
+                  TextureRecord {});
+    }
+
+    OPT_CONSTRUCT(!IsManager)
+    DTextureSystem(const DTextureSystem& o)
+    {
+        m_queue  = o.m_queue;
+        m_failed = o.m_failed.load();
+        memcpy(m_textures, o.m_textures, sizeof(m_textures));
+        m_texture_count  = o.m_texture_count;
+        m_texture_lookup = o.m_texture_lookup;
+        m_tile_pool      = o.m_tile_pool;
+        m_tile_index     = o.m_tile_index;
+        m_tile_count     = o.m_tile_count;
+    }
+
+
+    OPT_FUNCT(!IsManager, DTextureSystem&)
+    operator=(const DTextureSystem & o)
+    {
+        m_queue  = o.m_queue;
+        m_failed = o.m_failed.load();
+        memcpy(m_textures, o.m_textures, sizeof(m_textures));
+        m_texture_count  = o.m_texture_count;
+        m_texture_lookup = o.m_texture_lookup;
+        m_tile_pool      = o.m_tile_pool;
+        m_tile_index     = o.m_tile_index;
+        m_tile_count     = o.m_tile_count;
+        return *this;
+    }
+
+    // Manager only functionality
+
+    OPT_CONSTRUCT(IsManager)
+    DTextureSystem(Arena& arena, ManagedArena& marena, Managed& managed)
+        : m_managed(managed)
+        , m_queue(arena, marena, kMaxRequests, m_managed.m_queue)
+        , m_failed(false)
+        , m_texture_count(0)
+        , m_texture_lookup(arena, marena, kMaxResidentTextures,
+                           m_managed.m_texture_lookup)
+        , m_tile_pool(arena, marena, m_managed.m_tile_pool)
+        , m_tile_index(arena, marena, kMaxResidentTiles, m_managed.m_tile_index)
+        , m_tile_count(0)
+    {
+        std::fill(std::begin(m_textures), std::end(m_textures),
+                  TextureRecord {});
+    }
+
+    OPT_FUNCT(IsManager, void) begin_launch() { m_failed.store(false); }
+    OPT_FUNCT_DECL(IsManager)
+    bool set_texture_ready(OIIO::ustringhash name, uint32_t width,
+                           uint32_t height);
+    OPT_FUNCT_DECL(IsManager)
+    bool set_tile_payload(OIIO::ustringhash name, TileCoords tile, int width,
+                          int height, const std::vector<RGBA>& pixels);
+    OPT_FUNCT(IsManager, bool)
+    needs_retry()
+    {
+        if (!m_queue.overflowed())
+            return false;
+        if (!m_queue.grow())
+            return false;
+
+        // Overflow indicates dropped requests; clear and re-run to recollect.
+        m_queue.clear();
+        m_failed.store(false);
+        return true;
+    }
+    OPT_FUNCT_DECL(IsManager)
+    bool find_or_add_texture(OIIO::ustringhash name, uint32_t& index);
+    OPT_FUNCT_DECL(IsManager)
+    void sync_to_managed();
+    OPT_FUNCT_DECL(IsManager)
+    void sync_from_managed();
+
+    template<typename Func, bool __C = IsManager,
+             typename = std::enable_if_t<__C>>
+    bool process_requests(Func&& fn)
+    {
+        RequestQueue& queue = request_queue();
+        for (const Request& req : queue) {
+            if (!fn(req, this))
+                return false;
+        }
+        queue.clear();
+        return true;
+    }
+
+    // Both managed and manager
+
+    RequestQueue& request_queue() { return m_queue; }
+    const RequestQueue& request_queue() const { return m_queue; }
+
+    bool failures() const
+    {
+        const RequestQueue& queue = request_queue();
+        return m_failed.load() || queue.failed();
+    }
+    RGBA lookup(OIIO::ustringhash name, float u, float v, Vec2 du, Vec2 dv,
+                float rnd = -1);
+
+    using TilePool     = Stream<TileRecord, Arena, ManagedArena>;
+    using TileIndexMap = ClosedHashMap<TileCoords, uint32_t, TileCoordsHash,
+                                       Arena, ManagedArena>;
+
+private:
+    bool find_texture(OIIO::ustringhash name, uint32_t& index) const;
+
+    static void copy_tile_pixels(TileRecord& dst,
+                                 const std::vector<RGBA>& pixels);
+
+    template<class SampleArray> bool load_tiles(SampleArray& samples);
+
+    // Manager only
+    OPT_FIELD(IsManager, Managed&) m_managed;
+    // Both
+    RequestQueue m_queue;
+    Atomic<bool> m_failed;
+    TextureRecord m_textures[kMaxResidentTextures];
+    uint32_t m_texture_count;
+    TextureMap m_texture_lookup;
+    TilePool m_tile_pool;
+    TileIndexMap m_tile_index;
+    uint32_t m_tile_count;
+};
+
+bool
+run_device_unit_tests();
+
+}  // namespace texture_device
+
+#undef OPT_FIELD
+#undef OPT_CONSTRUCT
+#undef OPT_FUNCT_DECL
+#undef OPT_FUNCT
diff --git a/testsuite/texture-device/texture_device_impl.h b/testsuite/texture-device/texture_device_impl.h
new file mode 100644
index 0000000000..01b44d9dab
--- /dev/null
+++ b/testsuite/texture-device/texture_device_impl.h
@@ -0,0 +1,332 @@
+// Copyright Contributors to the OpenImageIO project.
+// SPDX-License-Identifier: Apache-2.0
+// https://github.com/AcademySoftwareFoundation/OpenImageIO
+
+#pragma once
+
+#include "texture_device_decl.h"
+
+#include <algorithm>
+#include <cstring>
+
+// Just to reduce verbosity
+#define DTS DTextureSystem<Arena, ManagedArena>
+#define OPT_FUNCT_IMPL template<bool __C, typename>
+
+namespace texture_device {
+
+inline size_t
+TileCoordsHash::operator()(const TileCoords& c) const
+{
+    uint64_t h
+        = OIIO::farmhash::inlined::Hash64(reinterpret_cast<const char*>(&c),
+                                          sizeof(TileCoords));
+    return static_cast<size_t>(h);
+}
+
+inline bool
+Request::operator==(const Request& other) const
+{
+    return type == other.type && tile == other.tile;
+}
+
+inline size_t
+RequestHash::operator()(const Request& req) const
+{
+    uint64_t h = 1469598103934665603ull;
+    h          = hash_mix_u64(h, static_cast<uint64_t>(req.type));
+    h          = hash_mix_u64(h, req.tile.texture_hash);
+    h          = hash_mix_u64(h,
+                              static_cast<uint64_t>(static_cast<uint32_t>(req.tile.x)));
+    h          = hash_mix_u64(h,
+                              static_cast<uint64_t>(static_cast<uint32_t>(req.tile.y)));
+    h          = hash_mix_u64(h, static_cast<uint64_t>(
+                            static_cast<uint32_t>(req.tile.mip)));
+    return static_cast<size_t>(h);
+}
+
+inline uint64_t
+RequestHash::hash_mix_u64(uint64_t h, uint64_t v)
+{
+    h ^= v + 0x9e3779b97f4a7c15ull + (h << 6) + (h >> 2);
+    return h;
+}
+
+template<typename Arena, typename ManagedArena>
+OPT_FUNCT_IMPL inline bool
+DTS::set_texture_ready(OIIO::ustringhash name, uint32_t width, uint32_t height)
+{
+    uint32_t index = 0;
+    if (!find_or_add_texture(name, index))
+        return false;
+    if (width == 0 || height == 0)
+        return false;
+    m_textures[index].width  = width;
+    m_textures[index].height = height;
+    m_textures[index].swrap  = WrapMode::Periodic;
+    m_textures[index].twrap  = WrapMode::Periodic;
+    m_textures[index].ready  = true;
+    return true;
+}
+
+template<typename Arena, typename ManagedArena>
+OPT_FUNCT_IMPL inline bool
+DTS::set_tile_payload(OIIO::ustringhash name, TileCoords tile, int width,
+                      int height, const std::vector<RGBA>& pixels)
+{
+    if (width != int(kTileWidth) || height != int(kTileHeight))
+        return false;
+    if (pixels.size() != size_t(width) * size_t(height))
+        return false;
+
+    uint32_t index = 0;
+    if (!find_texture(name, index))
+        return false;
+    TextureRecord& texture = m_textures[index];
+    if (!texture.ready)
+        return false;
+
+    uint32_t tile_pool_index = 0;
+    if (m_tile_index.find(tile, tile_pool_index)) {
+        if (tile_pool_index >= m_tile_count)
+            return false;
+        return true;
+    }
+
+    if (m_tile_count >= kMaxResidentTiles)
+        return false;
+
+    if (m_tile_index.size() >= kMaxTilesPerTexture)
+        return false;
+
+    tile_pool_index = static_cast<uint16_t>(m_tile_count);
+    if (!m_tile_index.insert(tile, tile_pool_index))
+        return false;
+
+    TileRecord record;
+    record.tile = tile;
+    copy_tile_pixels(record, pixels);
+
+    m_tile_pool.push_back(record);
+    ++m_tile_count;
+
+    return true;
+}
+
+template<typename Arena, typename ManagedArena>
+OPT_FUNCT_IMPL inline bool
+DTextureSystem<Arena, ManagedArena>::find_or_add_texture(OIIO::ustringhash name,
+                                                         uint32_t& index)
+{
+    if (find_texture(name, index))
+        return true;
+
+    if (m_texture_count >= kMaxResidentTextures)
+        return false;
+
+    const uint32_t new_index = m_texture_count++;
+    m_textures[new_index].reset(name);
+    if (!m_texture_lookup.insert(name.hash(), new_index))
+        return false;
+    index = new_index;
+    return true;
+}
+
+template<typename Arena, typename ManagedArena>
+OPT_FUNCT_IMPL inline void
+DTextureSystem<Arena, ManagedArena>::sync_to_managed()
+{
+    m_queue.sync_to_managed();
+    m_texture_lookup.sync_to_managed();
+    m_tile_pool.sync_to_managed();
+    m_tile_index.sync_to_managed();
+    m_managed.m_failed = m_failed.load();
+    memcpy(m_managed.m_textures, m_textures, sizeof(m_textures));
+    m_managed.m_texture_count = m_texture_count;
+    m_managed.m_tile_count    = m_tile_count;
+}
+
+template<typename Arena, typename ManagedArena>
+OPT_FUNCT_IMPL inline void
+DTextureSystem<Arena, ManagedArena>::sync_from_managed()
+{
+    m_queue.sync_from_managed();
+    m_texture_lookup.sync_from_managed();
+    m_tile_pool.sync_from_managed();
+    m_tile_index.sync_from_managed();
+    m_failed = m_managed.m_failed.load();
+    memcpy(m_textures, m_managed.m_textures, sizeof(m_textures));
+    m_texture_count = m_managed.m_texture_count;
+    m_tile_count    = m_managed.m_tile_count;
+}
+
+template<typename Arena, typename ManagedArena>
+inline bool
+DTS::find_texture(OIIO::ustringhash name, uint32_t& index) const
+{
+    uint32_t out = 0;
+    if (!m_texture_lookup.find(name.hash(), out))
+        return false;
+    if (out >= m_texture_count)
+        return false;
+    if (m_textures[out].name != name)
+        return false;
+    index = out;
+    return true;
+}
+
+template<typename Arena, typename ManagedArena>
+inline void
+DTS::copy_tile_pixels(TileRecord& dst, const std::vector<RGBA>& pixels)
+{
+    std::copy(pixels.begin(), pixels.end(), dst.pixels.begin());
+}
+
+inline int
+wrap_coord(int coord, int size, WrapMode mode, bool& in_range)
+{
+    in_range = true;
+    if (size <= 0) {
+        in_range = false;
+        return 0;
+    }
+    if (mode == WrapMode::Clamp)
+        return std::clamp(coord, 0, size - 1);
+    if (mode == WrapMode::Periodic) {
+        const int m = coord % size;
+        return (m < 0) ? (m + size) : m;
+    }
+    if (coord < 0 || coord >= size)
+        in_range = false;
+    return coord;
+}
+
+template<typename Arena, typename ManagedArena>
+template<class SampleArray>
+inline bool
+DTS::load_tiles(SampleArray& samples)
+{
+    bool missing_any    = false;
+    RequestQueue& queue = request_queue();
+    for (size_t i = 0; i < samples.size(); ++i) {
+        if (samples[i].tile || samples[i].weight == 0)
+            continue;
+
+        uint32_t tile_pool_index = 0;
+        const bool found         = m_tile_index.find(samples[i].tcoords,
+                                                     tile_pool_index)
+                           && tile_pool_index < m_tile_count;
+
+        // Apply the same tile-resolution result to all duplicate tap entries
+        // with identical tile coordinates in this sample batch.
+        for (size_t j = i; j < samples.size(); ++j) {
+            if (!(samples[j].tcoords == samples[i].tcoords))
+                continue;
+            if (found) {
+                samples[j].tile = &m_tile_pool[tile_pool_index];
+            } else {
+                samples[j].weight = 0;
+            }
+        }
+
+        if (!found) {
+            missing_any = true;
+            // RequestQueue deduplicates repeated misses across pixels/taps.
+            if (!queue.insert(Request { RequestType::MissingTile,
+                                        samples[i].tcoords },
+                              true)) {
+                m_failed.store(true);
+            }
+        }
+    }
+    return !missing_any;
+}
+
+template<typename Arena, typename ManagedArena>
+inline RGBA
+DTS::lookup(OIIO::ustringhash name, float u, float v, Vec2 du, Vec2 dv,
+            float rnd)
+{
+    uint32_t texture_index = 0;
+    if (find_texture(name, texture_index)) {
+        const TextureRecord& texture = m_textures[texture_index];
+        if (texture.ready) {
+            RGBA accum(0.0f, 0.0f, 0.0f, 0.0f);
+            bool failure = false;
+            MipAnisoFilter filter(texture, du, dv);
+            if (rnd >= 0.0f && filter.num_mips > 1) {
+                // Experimental path: stochastic mip choice collapses trilinear
+                // blending to one selected mip for this lookup.
+                const float blend = filter.mips.mip_blend;
+                assert(0 <= rnd && rnd < 1);
+                unsigned selected = rnd < blend ? 0 : 1;
+                rnd = selected ? (rnd - blend) / (1 - blend) : rnd / blend;
+                filter.mips.mip_levels[0] = filter.mips.mip_levels[selected];
+                filter.num_mips           = 1;  // This will make the weight 1.0
+            }
+            for (unsigned mip_i = 0; mip_i < filter.num_mips; ++mip_i) {
+                const float mip_weight
+                    = filter.num_mips == 1
+                          ? 1
+                          : (mip_i ? filter.mips.mip_blend
+                                   : 1 - filter.mips.mip_blend);
+                auto samples = filter.generate_samples(mip_i, u, v);
+
+                if (!load_tiles(samples)) {
+                    failure = true;
+                    continue;
+                }
+
+                if (rnd >= 0) {
+                    // Experimental path: select one tap by cumulative
+                    // distribution instead of deterministic weighted sum.
+                    float total_weight = 0;
+                    for (size_t i = 0; i < samples.size(); ++i)
+                        total_weight += samples[i].weight;
+                    float sum = 0;
+                    for (size_t i = 0; i < samples.size(); ++i) {
+                        sum += samples[i].weight;
+                        if (rnd < sum / total_weight
+                            || i == samples.size() - 1) {
+                            const size_t idx = size_t(samples[i].local_y)
+                                                   * size_t(kTileWidth)
+                                               + size_t(samples[i].local_x);
+                            accum = samples[i].tile->pixels[idx];
+                            break;
+                        }
+                    }
+                } else {
+                    for (size_t i = 0; i < samples.size(); ++i) {
+                        if (!samples[i].tile)
+                            continue;
+                        const size_t idx = size_t(samples[i].local_y)
+                                               * size_t(kTileWidth)
+                                           + size_t(samples[i].local_x);
+                        accum += mip_weight * samples[i].weight
+                                 * samples[i].tile->pixels[idx];
+                    }
+                }
+            }
+
+            return failure ? RGBA(1.0f, 0.0f, 1.0f, 1.0f) : accum;
+        }
+    }
+
+    Request req;
+    req.type = (find_texture(name, texture_index)
+                && m_textures[texture_index].ready)
+                   ? RequestType::MissingTile
+                   : RequestType::MissingTexture;
+    req.tile = TileCoords { name.hash() };
+
+    RequestQueue& queue = request_queue();
+    if (!queue.insert(req, true))
+        m_failed.store(true);
+
+    return RGBA(1.0f, 0.0f, 1.0f, 1.0f);
+}
+
+#undef DTS
+#undef OPT_FUNCT_IMPL
+
+}  // namespace texture_device
diff --git a/testsuite/texture-device/texture_loader.cpp b/testsuite/texture-device/texture_loader.cpp
new file mode 100644
index 0000000000..cd8b1256e7
--- /dev/null
+++ b/testsuite/texture-device/texture_loader.cpp
@@ -0,0 +1,165 @@
+// Copyright Contributors to the OpenImageIO project.
+// SPDX-License-Identifier: Apache-2.0
+// https://github.com/AcademySoftwareFoundation/OpenImageIO
+
+#include "texture_loader.h"
+
+#include <OpenImageIO/imageio.h>
+
+#include <algorithm>
+#include <filesystem>
+#include <iostream>
+
+namespace texture_device {
+
+namespace {
+
+    bool loader_error(const std::string& msg)
+    {
+        std::cerr << "texture-device: " << msg << "\n";
+        return false;
+    }
+
+}  // namespace
+
+void
+TextureLoader::add_texture_path(std::string path)
+{
+    if (!path.empty())
+        m_texture_paths.emplace_back(std::move(path));
+}
+
+bool
+TextureLoader::resolve_texture(uint64_t texture_hash,
+                               OIIO::ustringhash& texture_name,
+                               std::string& filename) const
+{
+    texture_name         = OIIO::ustringhash::from_hash(texture_hash);
+    const char* basename = texture_name.c_str();
+    if (!basename || !basename[0])
+        return false;
+
+    for (const std::string& path : m_texture_paths) {
+        const std::filesystem::path fullpath = std::filesystem::path(path)
+                                               / basename;
+        if (!std::filesystem::exists(fullpath))
+            continue;
+        filename = fullpath.string();
+        return true;
+    }
+
+    return false;
+}
+
+bool
+TextureLoader::query_texture_info(const std::string& filename, int& width,
+                                  int& height)
+{
+    // Cache dimensions/validation for repeated requests to the same file.
+    auto cache_it = m_metadata_cache.find(filename);
+    if (cache_it != m_metadata_cache.end()) {
+        width  = cache_it->second.width;
+        height = cache_it->second.height;
+        return true;
+    }
+
+    auto in = OIIO::ImageInput::open(filename);
+    if (!in)
+        return false;
+
+    const OIIO::ImageSpec& spec = in->spec();
+    if (spec.width <= 0 || spec.height <= 0 || spec.nchannels <= 0) {
+        in->close();
+        return false;
+    }
+    if (spec.nchannels != 3 && spec.nchannels != 4) {
+        in->close();
+        return loader_error("unsupported channel count for " + filename
+                            + " got " + std::to_string(spec.nchannels)
+                            + " expected 3 or 4");
+    }
+
+    if (spec.tile_width != int(TileRecord::kTileWidth)
+        || spec.tile_height != int(TileRecord::kTileHeight)) {
+        in->close();
+        return loader_error("unsupported tile size for " + filename + " got "
+                            + std::to_string(spec.tile_width) + "x"
+                            + std::to_string(spec.tile_height) + " expected "
+                            + std::to_string(TileRecord::kTileWidth) + "x"
+                            + std::to_string(TileRecord::kTileHeight));
+    }
+
+    width  = spec.width;
+    height = spec.height;
+    in->close();
+    m_metadata_cache[filename] = TextureMetadata { width, height };
+    return true;
+}
+
+bool
+TextureLoader::load_tile_payload(const std::string& filename, TileCoords tile,
+                                 std::vector<RGBA>& tile_pixels)
+{
+    int width  = 0;
+    int height = 0;
+    if (!query_texture_info(filename, width, height))
+        return false;
+
+    auto in = OIIO::ImageInput::open(filename);
+    if (!in)
+        return false;
+
+    constexpr int tile_w = int(TileRecord::kTileWidth);
+    constexpr int tile_h = int(TileRecord::kTileHeight);
+    const int mip        = std::max(0u, unsigned(tile.mip));
+    if (!in->seek_subimage(0, mip)) {
+        in->close();
+        return loader_error("failed to seek mip " + std::to_string(mip)
+                            + " for " + filename);
+    }
+    const OIIO::ImageSpec mipspec = in->spec();
+
+    if (mipspec.tile_width != tile_w || mipspec.tile_height != tile_h) {
+        in->close();
+        return loader_error(
+            "unsupported mip tile size for " + filename + " mip "
+            + std::to_string(mip) + " got " + std::to_string(mipspec.tile_width)
+            + "x" + std::to_string(mipspec.tile_height) + " expected "
+            + std::to_string(tile_w) + "x" + std::to_string(tile_h));
+    }
+
+    const int tile_x    = mipspec.x + tile.x * tile_w;
+    const int tile_y    = mipspec.y + tile.y * tile_h;
+    const int nchannels = std::max(1, mipspec.nchannels);
+    if (nchannels != 3 && nchannels != 4) {
+        in->close();
+        return loader_error("unsupported channel count for " + filename
+                            + " mip " + std::to_string(mip) + " got "
+                            + std::to_string(nchannels) + " expected 3 or 4");
+    }
+
+    std::vector<float> raw(size_t(tile_w) * size_t(tile_h) * size_t(nchannels),
+                           0.0f);
+    // Read one tile in float and normalize payload to RGBA for device lookup.
+    const bool ok = in->read_tile(tile_x, tile_y, 0, OIIO::TypeDesc::FLOAT,
+                                  raw.data());
+    in->close();
+    if (!ok)
+        return false;
+
+    tile_pixels.assign(size_t(tile_w * tile_h), RGBA(0.0f, 0.0f, 0.0f, 0.0f));
+    for (int y = 0; y < tile_h; ++y) {
+        for (int x = 0; x < tile_w; ++x) {
+            const size_t p = size_t(y) * size_t(tile_w) + size_t(x);
+            const size_t o = p * size_t(nchannels);
+            const float r  = raw[o + 0];
+            const float g  = raw[o + 1];
+            const float b  = raw[o + 2];
+            const float a  = (nchannels == 4) ? raw[o + 3] : 1.0f;
+            tile_pixels[p] = RGBA(r, g, b, a);
+        }
+    }
+    return true;
+}
+
+}  // namespace texture_device
diff --git a/testsuite/texture-device/texture_loader.h b/testsuite/texture-device/texture_loader.h
new file mode 100644
index 0000000000..204b5ffa41
--- /dev/null
+++ b/testsuite/texture-device/texture_loader.h
@@ -0,0 +1,73 @@
+// Copyright Contributors to the OpenImageIO project.
+// SPDX-License-Identifier: Apache-2.0
+// https://github.com/AcademySoftwareFoundation/OpenImageIO
+
+#pragma once
+
+#include "texture_device_decl.h"
+
+#include <OpenImageIO/ustring.h>
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace texture_device {
+
+class TextureLoader {
+public:
+    using RGBA = texture_device::RGBA;
+
+    void add_texture_path(std::string path);
+
+    bool resolve_texture(uint64_t texture_hash, OIIO::ustringhash& texture_name,
+                         std::string& filename) const;
+
+    bool query_texture_info(const std::string& filename, int& width,
+                            int& height);
+
+    bool load_tile_payload(const std::string& filename, TileCoords tile,
+                           std::vector<RGBA>& tile_pixels);
+
+    template<typename TextureSystem>
+    bool process_request(const Request& req, TextureSystem* ts)
+    {
+        const uint64_t key = req.tile.texture_hash;
+        OIIO::ustringhash texture_name;
+        std::string filename;
+        if (!resolve_texture(key, texture_name, filename))
+            return false;
+
+        switch (req.type) {
+        case RequestType::MissingTexture: {
+            int tex_w = 0;
+            int tex_h = 0;
+            if (!query_texture_info(filename, tex_w, tex_h))
+                return false;
+            return ts->set_texture_ready(texture_name, uint32_t(tex_w),
+                                         uint32_t(tex_h));
+        }
+        case RequestType::MissingTile: {
+            std::vector<RGBA> tile_pixels;
+            if (!load_tile_payload(filename, req.tile, tile_pixels))
+                return false;
+            return ts->set_tile_payload(texture_name, req.tile,
+                                        int(TextureSystem::kTileWidth),
+                                        int(TextureSystem::kTileHeight),
+                                        tile_pixels);
+        }
+        default: return false;
+        }
+    }
+
+private:
+    struct TextureMetadata {
+        int width  = 0;
+        int height = 0;
+    };
+
+    std::vector<std::string> m_texture_paths;
+    std::unordered_map<std::string, TextureMetadata> m_metadata_cache;
+};
+
+}  // namespace texture_device
diff --git a/testsuite/texture-device/vector_lite.h b/testsuite/texture-device/vector_lite.h
new file mode 100644
index 0000000000..cabda436ef
--- /dev/null
+++ b/testsuite/texture-device/vector_lite.h
@@ -0,0 +1,52 @@
+// Copyright Contributors to the OpenImageIO project.
+// SPDX-License-Identifier: Apache-2.0
+// https://github.com/AcademySoftwareFoundation/OpenImageIO
+
+#pragma once
+
+#include <array>
+#include <cassert>
+#include <cstddef>
+#include <utility>
+
+namespace texture_device {
+
+template<class T, size_t N> struct vector_lite : public std::array<T, N> {
+    using Base = std::array<T, N>;
+
+    vector_lite()
+        : Base {}
+        , m_size(0)
+    {
+    }
+
+    size_t size() const { return m_size; }
+    size_t capacity() const { return N; }
+    bool empty() const { return m_size == 0; }
+
+    void clear() { m_size = 0; }
+
+    void push_back(const T& value)
+    {
+        assert(m_size < N);
+        (*this)[m_size++] = value;
+    }
+
+    void push_back(T&& value)
+    {
+        assert(m_size < N);
+        (*this)[m_size++] = std::move(value);
+    }
+
+    T* begin() { return this->data(); }
+    T* end() { return this->data() + m_size; }
+    const T* begin() const { return this->data(); }
+    const T* end() const { return this->data() + m_size; }
+    const T* cbegin() const { return begin(); }
+    const T* cend() const { return end(); }
+
+private:
+    unsigned m_size;
+};
+
+}  // namespace texture_device