PASSIONLab · vbharadwaj-bk · Mar 24, 2026 · Mar 6, 2026 · Mar 21, 2026 · Mar 21, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,14 @@
 ## Latest Changes
 
+### v0.6.5 (2026-03-22) 
+This release brings `ir_mul` layout support for
+OpenEquivariance. Pass the parameter
+`layout='ir_mul'` to any `TPProblem` instance to use
+a transposed layout for the input and output
+irreps. To transpose input and output irreps use
+`oeq.transpose_irreps` or `oeq.jax.transpose_irreps`;
+see our API page for usage details. 
+
 ### v0.6.4 (2026-03-05) 
 Bugfix: added missing MLIR lowerings for
 a pair of JAX primitives (thanks @teddykoker!) 

diff --git a/docs/api.rst b/docs/api.rst
@@ -30,6 +30,8 @@ PyTorch API
     :undoc-members:
     :exclude-members: name
 
+.. autofunction:: openequivariance.transpose_irreps
+
 .. autofunction:: openequivariance.torch_to_oeq_dtype
 
 .. autofunction:: openequivariance.torch_ext_so_path
@@ -54,7 +56,9 @@ breaking the PyTorch version of OpenEquivariance.
 .. autoclass:: openequivariance.jax.TensorProductConv
     :members: forward, reorder_weights_from_e3nn, reorder_weights_to_e3nn
     :undoc-members:
-    :exclude-members: 
+    :exclude-members:
+
+.. autofunction:: openequivariance.jax.transpose_irreps
 
 Common API
 ---------------------

diff --git a/docs/conf.py b/docs/conf.py
@@ -38,6 +38,8 @@
     "openequivariance._torch.extlib",
     "openequivariance.jax.extlib",
     "openequivariance_extjax",
+    "openequivariance.jax.jvp.tp_prim",
+    "openequivariance.jax.jvp.conv_prim",
     "jinja2",
     "numpy",
 ]

diff --git a/openequivariance/openequivariance/__init__.py b/openequivariance/openequivariance/__init__.py
@@ -37,6 +37,7 @@ def _check_package_editable():
 
     from openequivariance._torch.TensorProduct import TensorProduct
     from openequivariance._torch.TensorProductConv import TensorProductConv
+    from openequivariance._torch.utils import transpose_irreps
 
     from openequivariance._torch.extlib import (
         torch_ext_so_path as torch_ext_so_path_internal,
@@ -111,4 +112,5 @@ def TensorProductConv(*args, **kwargs):
     "_check_package_editable",
     "torch_ext_so_path",
     "jax",
+    "transpose_irreps",
 ]
diff --git a/openequivariance/openequivariance/_torch/CUETensorProduct.py b/openequivariance/openequivariance/_torch/CUETensorProduct.py
@@ -6,13 +6,12 @@
 
 from openequivariance.core.TensorProductBase import TensorProductBase
 from openequivariance.core.e3nn_lite import TPProblem
-from openequivariance.benchmark.logging_utils import getLogger
-from openequivariance.benchmark.tpp_creation_utils import (
+from openequivariance.core.logging import getLogger
+from openequivariance.benchmark.problems import (
     ChannelwiseTPP,
     FullyConnectedTPProblem,
     SingleInstruction,
 )
-from openequivariance.core.utils import count_cg_non_zero
 
 os.environ["CUEQUIVARIANCE_OPS_USE_JIT"] = "1"
 
@@ -235,57 +234,6 @@ def benchmark_backward(
             kernel_names=self.kernel_names,
         )
 
-    # Copied over from loop unroller to match arithmetic intensity on roofline plots
-    def calculate_flops_forward(self, batch_size: int) -> dict:
-        if self.is_uvw:
-            return super().calculate_flops_forward(batch_size)
-        else:
-            tpp = self.config
-            flop_count = {
-                "CG_decomposition": 0,
-                "linear_combination": 0,
-                "outer_products": 0,
-            }
-            for ins in tpp.instructions:
-                l1, l2, l3 = (
-                    tpp.irreps_in1[ins.i_in1].ir.l,
-                    tpp.irreps_in2[ins.i_in2].ir.l,
-                    tpp.irreps_out[ins.i_out].ir.l,
-                )
-                flop_count["CG_decomposition"] += count_cg_non_zero(l1, l2, l3) * (
-                    ins.path_shape[0] * ins.path_shape[1]
-                )
-                flop_count["linear_combination"] += (
-                    (2 * l3 + 1) * np.prod(ins.path_shape) if ins.has_weight else 0
-                )
-
-            flop_count["CG_decomposition"] *= 3 * batch_size
-            flop_count["linear_combination"] *= (
-                batch_size  # Weights do not require FMA here
-            )
-            flop_count["total"] = sum(flop_count.values())
-            return flop_count
-
-    def calculate_flops_backward(self, batch_size: int) -> dict:
-        if self.is_uvw:
-            return super().calculate_flops_backward(batch_size)
-        else:
-            tpp = self.config
-            flop_count = {"backward": 0}
-            for ins in tpp.instructions:
-                l1, l2, l3 = (
-                    tpp.irreps_in1[ins.i_in1].ir.l,
-                    tpp.irreps_in2[ins.i_in2].ir.l,
-                    tpp.irreps_out[ins.i_out].ir.l,
-                )
-                flop_count["backward"] += count_cg_non_zero(l1, l2, l3) * (
-                    ins.path_shape[0] * ins.path_shape[1]
-                )
-
-            flop_count["backward"] *= 9 * batch_size
-            flop_count["total"] = sum(flop_count.values())
-            return flop_count
-
     @staticmethod
     def name():
         return "CUETensorProduct"
diff --git a/openequivariance/openequivariance/_torch/E3NNTensorProduct.py b/openequivariance/openequivariance/_torch/E3NNTensorProduct.py
@@ -11,7 +11,7 @@
 
 from openequivariance.core.TensorProductBase import TensorProductBase
 from openequivariance.core.e3nn_lite import TPProblem
-from openequivariance.benchmark.logging_utils import getLogger
+from openequivariance.core.logging import getLogger
 from openequivariance._torch.NPDoubleBackwardMixin import NumpyDoubleBackwardMixin
 
 TORCH_COMPILE_AUTOTUNING_DIR = pathlib.Path("triton_autotuning")

diff --git a/openequivariance/openequivariance/_torch/TensorProduct.py b/openequivariance/openequivariance/_torch/TensorProduct.py
@@ -3,7 +3,7 @@
 from openequivariance._torch import extlib
 import torch
 from openequivariance.core.utils import torch_to_oeq_dtype, dtype_to_enum
-from openequivariance.benchmark.logging_utils import getLogger
+from openequivariance.core.logging import getLogger
 from openequivariance._torch.utils import (
     reorder_torch,
     string_to_tensor,

diff --git a/openequivariance/openequivariance/_torch/TensorProductConv.py b/openequivariance/openequivariance/_torch/TensorProductConv.py
@@ -23,7 +23,7 @@
     enum_to_torch_dtype,
 )
 
-from openequivariance.benchmark.logging_utils import getLogger
+from openequivariance.core.logging import getLogger
 from openequivariance._torch.NPDoubleBackwardMixin import NumpyDoubleBackwardMixinConv
 
 logger = getLogger()

diff --git a/openequivariance/openequivariance/_torch/extlib/__init__.py b/openequivariance/openequivariance/_torch/extlib/__init__.py
@@ -8,7 +8,7 @@
 
 import torch
 
-from openequivariance.benchmark.logging_utils import getLogger
+from openequivariance.core.logging import getLogger
 
 oeq_root = str(Path(__file__).parent.parent.parent)
 

diff --git a/openequivariance/openequivariance/_torch/utils.py b/openequivariance/openequivariance/_torch/utils.py
@@ -2,6 +2,7 @@
 import numpy as np
 from types import MappingProxyType
 from openequivariance.core.utils import DTypeEnum
+from openequivariance.core.e3nn_lite import Irreps
 
 
 def reorder_helper(schedule, weights_in, direction, has_batch_dim):
@@ -75,3 +76,72 @@ def string_to_tensor(text: str) -> torch.Tensor:
     result = torch.tensor(np_bytes, device="cpu")
     result.requires_grad = False
     return result
+
+
+def transpose_irreps(
+    array: torch.Tensor,
+    irreps: Irreps,
+    src_layout: str,
+    dst_layout: str,
+) -> torch.Tensor:
+    r"""
+    Transpose irrep-packed feature tensors between ``mul_ir`` and ``ir_mul`` layouts.
+
+    The function operates on the trailing feature dimension and preserves all leading
+    batch dimensions. It uses only differentiable PyTorch tensor operations, so gradients
+    propagate through the transpose.
+
+    :param array: Input feature tensor with shape ``[..., irreps.dim]``.
+    :param irreps: Irreps specification describing how the trailing feature dimension
+                   is partitioned into irrep blocks.
+    :param src_layout: Source layout. Must be either ``"mul_ir"`` or ``"ir_mul"``.
+    :param dst_layout: Destination layout. Must be either ``"mul_ir"`` or ``"ir_mul"``.
+
+
+    :returns: Tensor in ``dst_layout`` with the same shape, dtype, and device as ``array``.
+              If ``src_layout == dst_layout``, returns a clone of ``array``.
+
+
+    :raises TypeError: If ``array`` is not a ``torch.Tensor``.
+    :raises ValueError: If ``src_layout`` or ``dst_layout`` is not one of
+                        ``"mul_ir"`` or ``"ir_mul"``.
+    """
+    if src_layout not in ("mul_ir", "ir_mul"):
+        raise ValueError(f"Unsupported src_layout: {src_layout}")
+    if dst_layout not in ("mul_ir", "ir_mul"):
+        raise ValueError(f"Unsupported dst_layout: {dst_layout}")
+
+    if not isinstance(array, torch.Tensor):
+        raise TypeError(f"Expected torch.Tensor, got {type(array)}")
+
+    out = torch.empty_like(array)
+
+    if src_layout == dst_layout:
+        out.copy_(array)
+        return out
+
+    slices = irreps.slices()
+    for ir_idx, mul_ir in enumerate(irreps):
+        mul = mul_ir.mul
+        dim = mul_ir.ir.dim
+        seg = slices[ir_idx]
+        block = array[..., seg.start : seg.stop]
+
+        if src_layout == "ir_mul" and dst_layout == "mul_ir":
+            out[..., seg.start : seg.stop] = (
+                block.reshape(*block.shape[:-1], dim, mul)
+                .transpose(-1, -2)
+                .reshape(*block.shape[:-1], mul * dim)
+            )
+        elif src_layout == "mul_ir" and dst_layout == "ir_mul":
+            out[..., seg.start : seg.stop] = (
+                block.reshape(*block.shape[:-1], mul, dim)
+                .transpose(-1, -2)
+                .reshape(*block.shape[:-1], dim * mul)
+            )
+        else:
+            raise ValueError(
+                f"Unsupported layout transpose: {src_layout} -> {dst_layout}"
+            )
+
+    return out
diff --git a/openequivariance/openequivariance/benchmark/ConvBenchmarkSuite.py b/openequivariance/openequivariance/benchmark/ConvBenchmarkSuite.py
@@ -6,7 +6,12 @@
 import numpy as np
 
 import openequivariance as oeq
-from openequivariance.benchmark.logging_utils import getLogger
+from openequivariance.benchmark.correctness import (
+    correctness_backward_conv,
+    correctness_double_backward_conv,
+    correctness_forward_conv,
+)
+from openequivariance.core.logging import getLogger
 from openequivariance.core.ConvolutionBase import CoordGraph
 from openequivariance.benchmark.benchmark_utils import NpEncoder
 
@@ -90,7 +95,8 @@ def run(
 
                 if direction == "forward":
                     if correctness:
-                        correctness = conv.test_correctness_forward(
+                        correctness = correctness_forward_conv(
+                            conv,
                             graph,
                             thresh=self.correctness_threshold,
                             prng_seed=self.prng_seed,
@@ -105,7 +111,8 @@ def run(
 
                 if direction == "backward":
                     if correctness:
-                        correctness = conv.test_correctness_backward(
+                        correctness = correctness_backward_conv(
+                            conv,
                             graph,
                             thresh=self.correctness_threshold,
                             prng_seed=self.prng_seed,
@@ -120,8 +127,9 @@ def run(
 
                 if direction == "double_backward":
                     if correctness:
-                        correctness = conv.test_correctness_double_backward(
-                            self.graph,
+                        correctness = correctness_double_backward_conv(
+                            conv,
+                            graph,
                             thresh=self.correctness_threshold,
                             prng_seed=self.prng_seed,
                             reference_implementation=self.reference_impl,

diff --git a/openequivariance/openequivariance/benchmark/TestBenchmarkSuite.py b/openequivariance/openequivariance/benchmark/TestBenchmarkSuite.py
@@ -10,9 +10,9 @@
 from openequivariance._torch.extlib import DeviceProp
 from openequivariance.core.TensorProductBase import TensorProductBase
 
-from openequivariance.benchmark.logging_utils import getLogger, bcolors
+from openequivariance.core.logging import getLogger, bcolors
 from openequivariance.core.e3nn_lite import TPProblem
-from openequivariance.benchmark.correctness_utils import (
+from openequivariance.benchmark.correctness import (
     correctness_forward,
     correctness_backward,
     correctness_double_backward,