pulp-platform · Victor-Jung · Mar 12, 2026 · Mar 12, 2026 · Mar 17, 2026 · Mar 18, 2026
@@ -0,0 +1,53 @@
+# SPDX-FileCopyrightText: 2026 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+---
+name: _runner-xdna2
+
+"on":
+  workflow_call:
+    inputs:
+      pytest-marker:
+        required: true
+        type: string
+      docker-image:
+        required: true
+        type: string
+
+jobs:
+  test-runner-xdna2:
+    runs-on: xdna2-npu
+    # NOTE: We cannot use the `container:` directive here because
+    # GitHub Actions does not support `--device` flags required for
+    # NPU access (/dev/accel/accel0). Instead we use explicit
+    # `docker run` commands.
+    steps:
+      - name: Fix workspace permissions
+        shell: bash
+        run: |
+          docker run --rm \
+            -v "${{ github.workspace }}":/workspace \
+            ${{ inputs.docker-image }} \
+            chown -R $(id -u):$(id -g) /workspace || true
+
+      - name: Checkout Repo
+        uses: actions/checkout@v4
+        with:
+          submodules: recursive
+
+      - name: Run Tests in Docker
+        shell: bash
+        run: |
+          docker run --rm \
+            --device /dev/accel/accel0 \
+            --ulimit memlock=-1 \
+            -v /opt/xilinx:/opt/xilinx \
+            -v "${{ github.workspace }}":/app/Deeploy \
+            -w /app/Deeploy \
+            ${{ inputs.docker-image }} \
+            bash -c "
+              pip install -e . &&
+              cd DeeployTest &&
+              pytest test_platforms.py -v -m 'xdna2 and ${{ inputs.pytest-marker }}'
+            "
@@ -0,0 +1,31 @@
+# SPDX-FileCopyrightText: 2026 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+---
+name: CI • XDNA2
+
+"on":
+  push:
+    branches:
+      - "**"
+    tags:
+      - "v*.*.*"
+  pull_request:
+  workflow_dispatch:
+    inputs:
+      docker_image:
+        description: "XDNA2 Docker image (must be pre-built on the runner)"
+        required: false
+        default: "deeploy-xdna:local"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  xdna2-kernels:
+    uses: ./.github/workflows/_runner-xdna2.yml
+    with:
+      pytest-marker: "kernels"
+      docker-image: ${{ inputs.docker_image || 'deeploy-xdna:local' }}
@@ -57,3 +57,7 @@ CHANGELOG_GEN.md
 # Container Artifacts
 .pyusbip/
 .cache/
+
+# Claude context file
+CLAUDE.md
+Container/xrt-debs/
@@ -46,6 +46,8 @@ elseif(platform STREQUAL SoftHier)
   message(STATUS "Building for platform 'SoftHier'")
 elseif(platform STREQUAL Chimera)
   message(STATUS "Building for platform 'Chimera'")
+elseif(platform STREQUAL XDNA2)
+  message(STATUS "Building for platform 'XDNA2'")
 else()
   message(FATAL_ERROR "Invalid platform '${platform}' specified!")
 endif()
@@ -299,5 +301,20 @@ if(platform STREQUAL Chimera)
 
 endif()
 
+if(platform STREQUAL XDNA2)
+
+  project(${TESTNAME} LANGUAGES CXX)
+
+  message(STATUS "============================= XDNA2 Configuration ============================")
+  message(STATUS "[cMake  ]   GENERATED_SOURCE         = " ${GENERATED_SOURCE})
+  message(STATUS "[cMake  ]   TESTNAME                 = " ${TESTNAME})
+  message(STATUS "==============================================================================")
+  message(STATUS "")
+
+  add_subdirectory(TargetLibraries/XDNA2)
+  add_subdirectory(DeeployTest/Platforms/XDNA2)
+
+endif()
+
 
 print_simulation_config()
@@ -0,0 +1,56 @@
+# SPDX-FileCopyrightText: 2026 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+FROM ubuntu:24.04
+
+ARG DEBIAN_FRONTEND=noninteractive
+ENV TZ=Etc/UTC
+ENV LANG=C.UTF-8
+ENV LC_ALL=C.UTF-8
+ENV PIP_BREAK_SYSTEM_PACKAGES=1
+ENV LLVM_INSTALL_DIR="nope"
+
+WORKDIR /app/build
+
+RUN apt-get update && apt-get install -y \
+    software-properties-common \
+    && add-apt-repository -y ppa:amd-team/xrt \
+    && apt-get update && apt-get install -y \
+    cmake \
+    ninja-build \
+    g++ \
+    git \
+    git-lfs \
+    python3 \
+    python3-pip \
+    python-is-python3 \
+    uuid-dev \
+    wget \
+    curl \
+    ccache \
+    libxrt2 \
+    libxrt-npu2 \
+    libxrt-dev \
+    libxrt-utils \
+    libxrt-utils-npu \
+    && rm -rf /var/lib/apt/lists/*
+
+ENV XILINX_XRT=/opt/xilinx/xrt
+ENV PATH=${XILINX_XRT}/bin:${PATH}
+ENV LD_LIBRARY_PATH=${XILINX_XRT}/lib
+
+# Remove unused files and clean up to reduce image size
+WORKDIR /app
+RUN rm -rf /app/build
+
+COPY pyproject.toml requirements-xdna.txt ./
+RUN pip install toml-to-requirements && \
+    toml-to-req --toml-file pyproject.toml && \
+    pip install -r requirements.txt && \
+    pip install -r requirements-xdna.txt && \
+    rm -f requirements.txt pyproject.toml requirements-xdna.txt
+
+ENV MLIR_AIE_PYTHON=/usr/bin/python3
+
+WORKDIR /app/Deeploy
@@ -0,0 +1,201 @@
+# SPDX-FileCopyrightText: 2026 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+"""Base classes for MLIR-emitting node templates and code transformations.
+
+This module provides:
+
+* :class:`MLIRNodeTemplate` — a :class:`NodeTemplate` subclass whose
+  ``emit()`` method populates an ``mlir.ir.Module`` instead of rendering C.
+* :class:`MLIRExecutionBlock` — MLIR-specific execution state replacing the
+  C-oriented :class:`ExecutionBlock` (code-snippet deque) with MLIR builder
+  state (tile references, ObjectFifo handles, tiling parameters).
+* :class:`MLIRCodeTransformationPass` — base class for MLIR code
+  transformation passes that operate on an :class:`MLIRExecutionBlock`.
+* :class:`MLIRCodeTransformation` — two-phase pass container
+  (``devicePasses`` + ``runtimeSequencePasses``) that the deployer
+  orchestrates inside ``@aie_d.device`` and ``@aiex_d.runtime_sequence``
+  regions respectively.
+
+All classes are intentionally dialect-agnostic so that future MLIR-based
+backends (NVGPU, Linalg, …) can reuse them.
+"""
+
+from __future__ import annotations
+
+from abc import abstractmethod
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
+
+from Deeploy.DeeployTypes import NodeTemplate
+
+if TYPE_CHECKING:
+    from Deeploy.DeeployTypes import NetworkContext, OperatorRepresentation
+
+# ======================================================================
+# MLIRExecutionBlock
+# ======================================================================
+
+
+class MLIRExecutionBlock:
+    """MLIR-specific execution state for a single operator.
+
+    Replaces the C-oriented :class:`ExecutionBlock` (which holds a deque of
+    :class:`CodeSnippet` objects) with fields that carry MLIR builder state
+    through the code-transformation pipeline.
+
+    Passes populate fields progressively:
+
+    1. The deployer sets ``computeTile``, ``shimTile``,
+       ``operatorRepresentation``, and ``patternMemoryConstraint``.
+    2. A device-phase pass (e.g. ``MLIRObjectFifoPass``) fills
+       ``fifoMap``, ``fifoTypes``, ``tileSize``, ``numTiles``,
+       ``kernelFuncName``, and ``kernelObjFile``.
+    3. The deployer sets ``runtimeSequenceArgs`` before the runtime-
+       sequence phase.
+    4. A runtime-sequence pass (e.g. ``MLIRRuntimeSequencePass``) reads
+       all of the above to emit DMA configuration.
+    """
+
+    def __init__(self, computeTile: Any = None, shimTile: Any = None) -> None:
+        # MLIR tile references (set by deployer)
+        self.computeTile: Any = computeTile
+        self.shimTile: Any = shimTile
+
+        # Operator metadata (set by deployer from parser)
+        self.operatorRepresentation: OperatorRepresentation = {}
+
+        # Tiling constraint from midend solver (may be None)
+        self.patternMemoryConstraint: Any = None
+
+        # Populated by device-phase passes (e.g. MLIRObjectFifoPass)
+        self.fifoMap: Dict[str, str] = {}  # tensor name → FIFO name
+        self.fifoTypes: Dict[str, Any] = {}  # tensor name → MemRefType
+        self.tileSize: int = 0
+        self.numTiles: int = 0
+        self.numElements: int = 0
+        self.kernelFuncName: Optional[str] = None
+        self.kernelObjFile: Optional[str] = None
+
+        # The MLIRNodeTemplate for this node (set by deployer, called by
+        # MLIRComputeCorePass to emit the kernel call inside the core block)
+        self.template: Optional[Any] = None
+
+        # Set by deployer before runtime-sequence phase
+        self.runtimeSequenceArgs: List[Any] = []
+
+        # Input / output tensor name lists (set by deployer from parser)
+        self.inputNames: List[str] = []
+        self.outputNames: List[str] = []
+
+
+# ======================================================================
+# MLIRCodeTransformationPass / MLIRCodeTransformation
+# ======================================================================
+
+
+class MLIRCodeTransformationPass:
+    """Base class for passes that transform an :class:`MLIRExecutionBlock`.
+
+    Subclasses override :meth:`apply` to read / mutate the block's fields
+    and optionally emit MLIR operations into the current insertion point.
+    """
+
+    def apply(self, ctxt: NetworkContext, mlirBlock: MLIRExecutionBlock,
+              name: str) -> Tuple[NetworkContext, MLIRExecutionBlock]:
+        return ctxt, mlirBlock
+
+
+class MLIRCodeTransformation:
+    """Two-phase pass container for MLIR code transformations.
+
+    *devicePasses* run inside an ``@aie_d.device(...)`` region (ObjectFifo
+    creation, external-kernel declarations, …).
+
+    *runtimeSequencePasses* run inside an ``@aiex_d.runtime_sequence``
+    block (DMA configuration, token await, …).
+
+    The deployer calls :meth:`applyDevicePasses` and
+    :meth:`applyRuntimeSequencePasses` at the appropriate points.
+    """
+
+    def __init__(self,
+                 devicePasses: Optional[List[MLIRCodeTransformationPass]] = None,
+                 runtimeSequencePasses: Optional[List[MLIRCodeTransformationPass]] = None) -> None:
+        self.devicePasses: List[MLIRCodeTransformationPass] = devicePasses or []
+        self.runtimeSequencePasses: List[MLIRCodeTransformationPass] = runtimeSequencePasses or []
+
+    def applyDevicePasses(self, ctxt: NetworkContext, mlirBlock: MLIRExecutionBlock,
+                          name: str) -> Tuple[NetworkContext, MLIRExecutionBlock]:
+        for _pass in self.devicePasses:
+            ctxt, mlirBlock = _pass.apply(ctxt, mlirBlock, name)
+        return ctxt, mlirBlock
+
+    def applyRuntimeSequencePasses(self, ctxt: NetworkContext, mlirBlock: MLIRExecutionBlock,
+                                   name: str) -> Tuple[NetworkContext, MLIRExecutionBlock]:
+        for _pass in self.runtimeSequencePasses:
+            ctxt, mlirBlock = _pass.apply(ctxt, mlirBlock, name)
+        return ctxt, mlirBlock
+
+
+# ======================================================================
+# MLIRNodeTemplate
+# ======================================================================
+
+
+class MLIRNodeTemplate(NodeTemplate):
+    """NodeTemplate subclass that emits MLIR instead of C code.
+
+    Subclasses must override :meth:`emit` to add dialect operations to an
+    ``mlir.ir.Module`` (or region / insertion point provided via *kwargs*).
+
+    ``generate()`` is overridden as a convenience that constructs a
+    standalone module, calls :meth:`emit`, and returns the MLIR text.
+    The base-class ``alignToContext`` / ``hoistTransientBuffers`` hooks are
+    retained and work unchanged.
+    """
+
+    def __init__(self):
+        # Empty Mako template — no C code is generated.
+        super().__init__("")
+
+    # ------------------------------------------------------------------
+    # Subclass API
+    # ------------------------------------------------------------------
+
+    @abstractmethod
+    def emit(self, operatorRepresentation: OperatorRepresentation, **kwargs) -> None:
+        """Populate an MLIR module with the operations for this node.
+
+        The caller (typically the deployer) sets up an ``mlir.ir.Module``
+        with the appropriate device wrapper and passes dialect-specific
+        context through *kwargs* (e.g. insertion point, tile references,
+        ObjectFifo handles).
+
+        Parameters
+        ----------
+        operatorRepresentation : OperatorRepresentation
+            The parser's node representation (buffer names, sizes, types …).
+        **kwargs
+            Dialect-specific context provided by the deployer.
+        """
+        ...
+
+    # ------------------------------------------------------------------
+    # NodeTemplate overrides
+    # ------------------------------------------------------------------
+
+    def generate(self, operatorRepresentation = {}, **kwargs) -> str:
+        """Generate an MLIR string for this node.
+
+        This default implementation is a thin wrapper: it delegates to
+        :meth:`emit`.  Deployers that need to build a single module from
+        multiple nodes should call :meth:`emit` directly with the shared
+        module context and then stringify the complete module themselves.
+
+        Returns
+        -------
+        str
+            MLIR text (printable module or fragment).
+        """
+        self.emit(operatorRepresentation, **kwargs)
+        return ""