diff --git a/python/tvm/relax/frontend/tflite/tflite_frontend.py b/python/tvm/relax/frontend/tflite/tflite_frontend.py index 334021b90300..c8c24f943417 100644 --- a/python/tvm/relax/frontend/tflite/tflite_frontend.py +++ b/python/tvm/relax/frontend/tflite/tflite_frontend.py @@ -331,13 +331,13 @@ def convert_op_to_relax(self): assert isinstance(op, Operator) ret = self.convert_map[op_code_str](op=op) - ret = self.bb.normalize(ret) - # print("Op Code:", op_code_str, " Shape:", ret.struct_info) # In case the Op can be prefetched, the output can be optimized out if ret is None: continue + ret = self.bb.normalize(ret) + if len(output_tensors) == 1: tensor_idx = output_tensors[0].tensor_idx self.exp_tab.set_expr(get_tensor_name(self.subgraph, tensor_idx), ret) @@ -1898,15 +1898,8 @@ def convert_fully_connected(self, op): TensorType.UINT8, TensorType.FLOAT32, ) - weight_tensor_type_str = self.get_tensor_type_str(weight_tensor_type) - if self.has_expr(weight_tensor.tensor_idx): - weight_expr = self.get_expr(weight_tensor.tensor_idx) - else: - weight_value = self.get_tensor_value(weight_tensor) - weight_expr = self.exp_tab.new_const( - weight_value, dtype=weight_tensor_type_str, source_name=weight_tensor.tensor.Name() - ) + weight_expr = self.get_tensor_expr(weight_tensor) weight_shape = weight_expr.struct_info.shape weight_expr = relax.op.permute_dims(weight_expr, [1, 0]) @@ -3142,7 +3135,7 @@ def convert_transpose_conv(self, op): weight_expr_iohw = self.get_expr(weights_tensor.tensor_idx) weight_expr_iohw = relax.op.permute_dims(weight_expr_iohw, axes=(3, 0, 1, 2)) else: - weight_value_ohwi = self.get_tensor_value(weights_tensor) + weight_value_ohwi = self.get_tensor_value_or_prefetched(weights_tensor) # Relax kernel_layout should be OIHW # Relax weights layout should be different from kernel_layout - it should be IOHW weight_value_iohw = np.transpose(weight_value_ohwi, (3, 0, 1, 2)) @@ -3878,18 +3871,21 @@ def set_prefetched_node(self, input_tensor_idx, value): def get_prefetched_node(self, input_tensor_idx): return self.prefetched_nodes[get_tensor_name(self.subgraph, input_tensor_idx)] + def get_tensor_value_or_prefetched(self, tensor, is_sparse=False): + if self.is_prefetched(tensor.tensor_idx): + return self.get_prefetched_node(tensor.tensor_idx) + return self.get_tensor_value(tensor, is_sparse) + def get_tensor_expr(self, tensor, is_sparse=False): """Return the Relax expr for tensor.""" if self.has_expr(tensor.tensor_idx): - expr = self.get_expr(tensor.tensor_idx) - else: - type_str = self.get_tensor_type_str(tensor.tensor.Type()) - expr = self.exp_tab.new_const( - self.get_tensor_value(tensor, is_sparse), - dtype=type_str, - source_name=tensor.tensor.Name(), - ) - return expr + return self.get_expr(tensor.tensor_idx) + + type_str = self.get_tensor_type_str(tensor.tensor.Type()) + value = self.get_tensor_value_or_prefetched(tensor, is_sparse) + return self.exp_tab.new_const( + value, dtype=type_str, source_name=tensor.tensor.Name() + ) def get_tensor_shape(self, tensor_wrapper): """Returns tensor shape. Infers shape if the shape is empty.""" diff --git a/tests/python/relax/test_frontend_tflite.py b/tests/python/relax/test_frontend_tflite.py index 92080634e2cc..89d7a7ff4184 100644 --- a/tests/python/relax/test_frontend_tflite.py +++ b/tests/python/relax/test_frontend_tflite.py @@ -20,9 +20,11 @@ import os +import flatbuffers import numpy as np import pytest import tensorflow as tf +import tflite as tfl import tflite.Model from tensorflow.keras import applications as keras_app @@ -2454,5 +2456,524 @@ def main( verify(SparseToDense, Expected) +# DENSIFY operator tests +# DENSIFY converts sparse weight tensors to dense at conversion time (not runtime). +# Since TensorFlow does not provide an API to create sparse TFLite models, +# we manually build them using the flatbuffers API. + +# Compatibility shim: schema-generated tflite packages (as used in CI) do not +# re-export builder helpers at the package top-level. Bind them from submodules +# so that the rest of the test file can use tfl.XXXStart / tfl.XXXEnd uniformly. +if not hasattr(tfl, "Int32VectorStart"): + _tflite_helper_modules = [ + "AddOptions", + "Buffer", + "Conv2DOptions", + "DimensionMetadata", + "FullyConnectedOptions", + "Int32Vector", + "Model", + "Operator", + "OperatorCode", + "SparsityParameters", + "SubGraph", + "Tensor", + ] + for _mod_name in _tflite_helper_modules: + _mod = __import__(f"tflite.{_mod_name}", fromlist=[_mod_name]) + for _attr_name in dir(_mod): + if not _attr_name.startswith("_"): + setattr(tfl, _attr_name, getattr(_mod, _attr_name)) + +_DENSIFY_TEST_VALUES = np.array([1.0, 2.0], dtype=np.float32) +_DENSIFY_TEST_DENSE = np.array([[1.0, 0.0], [0.0, 2.0]], dtype=np.float32) +_DENSIFY_ROW_PTRS = [0, 1, 2] +_DENSIFY_COL_INDICES = [0, 1] +_DENSIFY_CONV_KERNEL_DENSE_HWIO = _DENSIFY_TEST_DENSE.reshape(2, 2, 1, 1) +_DENSIFY_FC_WEIGHT_VALUES = np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32) +_DENSIFY_FC_WEIGHT_DENSE_OI = np.diag(_DENSIFY_FC_WEIGHT_VALUES).astype(np.float32) +_DENSIFY_FC_ROW_PTRS = [0, 1, 2, 3, 4] +_DENSIFY_FC_COL_INDICES = [0, 1, 2, 3] + + +def _tflite_int32_vector(builder, start_vector_fn, values): + start_vector_fn(builder, len(values)) + for value in reversed(values): + builder.PrependInt32(value) + return builder.EndVector() + + +def _tflite_offset_vector(builder, start_vector_fn, offsets): + start_vector_fn(builder, len(offsets)) + for offset in reversed(offsets): + builder.PrependUOffsetTRelative(offset) + return builder.EndVector() + + +def _tflite_byte_vector(builder, data): + tfl.BufferStartDataVector(builder, len(data)) + for byte in reversed(data): + builder.PrependByte(byte) + return builder.EndVector() + + +def _tflite_int32_table(builder, values): + # Build the values vector directly without relying on version-specific + # helper tfl.Int32VectorStartValuesVector, which is absent in older + # tflite package versions used in CI. + builder.StartVector(4, len(values), 4) + for value in reversed(values): + builder.PrependInt32(value) + values_vec = builder.EndVector() + tfl.Int32VectorStart(builder) + tfl.Int32VectorAddValues(builder, values_vec) + return tfl.Int32VectorEnd(builder) + + +def _tflite_shape(builder, shape): + return _tflite_int32_vector(builder, tfl.TensorStartShapeVector, shape) + + +def _build_tensor(builder, buffer_idx, shape, sparsity=None): + """Helper to build a TFLite tensor.""" + shape_vec = _tflite_shape(builder, shape) + tfl.TensorStart(builder) + tfl.TensorAddBuffer(builder, buffer_idx) + tfl.TensorAddHasRank(builder, True) + tfl.TensorAddIsVariable(builder, False) + tfl.TensorAddShape(builder, shape_vec) + if sparsity is not None: + tfl.TensorAddSparsity(builder, sparsity) + tfl.TensorAddType(builder, tfl.TensorType.FLOAT32) + return tfl.TensorEnd(builder) + + +def _build_buffer(builder, data=None): + # Build the data vector before starting the Buffer table to avoid + # flatbuffers IsNestedError (vectors cannot be created inside tables). + data_offset = None + if data is not None: + data_offset = _tflite_byte_vector(builder, data) + tfl.BufferStart(builder) + if data_offset is not None: + tfl.BufferAddData(builder, data_offset) + return tfl.BufferEnd(builder) + + +def _build_operator( + builder, opcode_index, inputs, outputs, builtin_options_type, builtin_options=None +): + inputs_vec = _tflite_int32_vector(builder, tfl.OperatorStartInputsVector, inputs) + outputs_vec = _tflite_int32_vector(builder, tfl.OperatorStartOutputsVector, outputs) + tfl.OperatorStart(builder) + tfl.OperatorAddOpcodeIndex(builder, opcode_index) + tfl.OperatorAddInputs(builder, inputs_vec) + tfl.OperatorAddOutputs(builder, outputs_vec) + tfl.OperatorAddBuiltinOptionsType(builder, builtin_options_type) + if builtin_options is not None: + tfl.OperatorAddBuiltinOptions(builder, builtin_options) + return tfl.OperatorEnd(builder) + + +def _build_operator_code(builder, builtin_op): + tfl.OperatorCodeStart(builder) + tfl.OperatorCodeAddDeprecatedBuiltinCode(builder, builtin_op) + tfl.OperatorCodeAddBuiltinCode(builder, builtin_op) + tfl.OperatorCodeAddVersion(builder, 1) + return tfl.OperatorCodeEnd(builder) + + +def _build_subgraph(builder, *, tensors, operators, inputs, outputs): + tensors_vec = _tflite_offset_vector(builder, tfl.SubGraphStartTensorsVector, tensors) + operators_vec = _tflite_offset_vector(builder, tfl.SubGraphStartOperatorsVector, operators) + inputs_vec = _tflite_int32_vector(builder, tfl.SubGraphStartInputsVector, inputs) + outputs_vec = _tflite_int32_vector(builder, tfl.SubGraphStartOutputsVector, outputs) + + tfl.SubGraphStart(builder) + tfl.SubGraphAddTensors(builder, tensors_vec) + tfl.SubGraphAddOperators(builder, operators_vec) + tfl.SubGraphAddInputs(builder, inputs_vec) + tfl.SubGraphAddOutputs(builder, outputs_vec) + return tfl.SubGraphEnd(builder) + + +def _finish_tflite_model(builder, *, subgraph, operator_codes, buffers): + buffers_vec = _tflite_offset_vector(builder, tfl.ModelStartBuffersVector, buffers) + opcodes_vec = _tflite_offset_vector(builder, tfl.ModelStartOperatorCodesVector, operator_codes) + subgraphs_vec = _tflite_offset_vector(builder, tfl.ModelStartSubgraphsVector, [subgraph]) + + tfl.ModelStart(builder) + tfl.ModelAddBuffers(builder, buffers_vec) + tfl.ModelAddSubgraphs(builder, subgraphs_vec) + tfl.ModelAddOperatorCodes(builder, opcodes_vec) + tfl.ModelAddVersion(builder, 3) + model = tfl.ModelEnd(builder) + + builder.Finish(model) + return bytes(builder.Output()) + + +def _build_csr_sparsity( + builder, + *, + dense_sizes, + row_ptrs, + col_indices, + sparse_axis, + traversal_order=None, +): + row_ptrs_vec = _tflite_int32_table(builder, row_ptrs) + col_indices_vec = _tflite_int32_table(builder, col_indices) + dim_metadata = [] + + for axis, dense_size in enumerate(dense_sizes): + tfl.DimensionMetadataStart(builder) + if axis == sparse_axis: + tfl.DimensionMetadataAddFormat(builder, tfl.DimensionType.SPARSE_CSR) + tfl.DimensionMetadataAddArraySegmentsType( + builder, tfl.SparseIndexVector.Int32Vector + ) + tfl.DimensionMetadataAddArraySegments(builder, row_ptrs_vec) + tfl.DimensionMetadataAddArrayIndicesType(builder, tfl.SparseIndexVector.Int32Vector) + tfl.DimensionMetadataAddArrayIndices(builder, col_indices_vec) + else: + tfl.DimensionMetadataAddFormat(builder, tfl.DimensionType.DENSE) + tfl.DimensionMetadataAddDenseSize(builder, dense_size) + dim_metadata.append(tfl.DimensionMetadataEnd(builder)) + + if traversal_order is None: + traversal_order = list(range(len(dense_sizes))) + + traversal_order_vec = _tflite_int32_vector( + builder, tfl.SparsityParametersStartTraversalOrderVector, traversal_order + ) + dim_metadata_vec = _tflite_offset_vector( + builder, tfl.SparsityParametersStartDimMetadataVector, dim_metadata + ) + + tfl.SparsityParametersStart(builder) + tfl.SparsityParametersAddTraversalOrder(builder, traversal_order_vec) + tfl.SparsityParametersAddDimMetadata(builder, dim_metadata_vec) + return tfl.SparsityParametersEnd(builder) + + +def _build_densify_only_case(builder): + sparse_tensor_idx = 0 + dense_tensor_idx = 1 + shape = [2, 2] + sparsity = _build_csr_sparsity( + builder, + dense_sizes=shape, + row_ptrs=_DENSIFY_ROW_PTRS, + col_indices=_DENSIFY_COL_INDICES, + sparse_axis=1, + ) + + sparse_tensor = _build_tensor(builder, 0, shape, sparsity) + dense_tensor = _build_tensor(builder, 1, shape) + densify_op = _build_operator( + builder, + 0, + [sparse_tensor_idx], + [dense_tensor_idx], + tfl.BuiltinOptions.DensifyOptions, + ) + subgraph = _build_subgraph( + builder, + tensors=[sparse_tensor, dense_tensor], + operators=[densify_op], + inputs=[], + outputs=[dense_tensor_idx], + ) + operator_codes = [_build_operator_code(builder, tfl.BuiltinOperator.DENSIFY)] + return _DENSIFY_TEST_VALUES, subgraph, operator_codes + + +def _build_densify_add_case(builder): + input_tensor_idx = 0 + sparse_tensor_idx = 1 + dense_tensor_idx = 2 + output_tensor_idx = 3 + shape = [2, 2] + sparsity = _build_csr_sparsity( + builder, + dense_sizes=shape, + row_ptrs=_DENSIFY_ROW_PTRS, + col_indices=_DENSIFY_COL_INDICES, + sparse_axis=1, + ) + + input_tensor = _build_tensor(builder, 1, shape) + sparse_tensor = _build_tensor(builder, 0, shape, sparsity) + dense_tensor = _build_tensor(builder, 1, shape) + output_tensor = _build_tensor(builder, 1, shape) + + densify_op = _build_operator( + builder, + 1, + [sparse_tensor_idx], + [dense_tensor_idx], + tfl.BuiltinOptions.DensifyOptions, + ) + tfl.AddOptionsStart(builder) + add_options = tfl.AddOptionsEnd(builder) + add_op = _build_operator( + builder, + 0, + [input_tensor_idx, dense_tensor_idx], + [output_tensor_idx], + tfl.BuiltinOptions.AddOptions, + add_options, + ) + subgraph = _build_subgraph( + builder, + tensors=[input_tensor, sparse_tensor, dense_tensor, output_tensor], + operators=[densify_op, add_op], + inputs=[input_tensor_idx], + outputs=[output_tensor_idx], + ) + operator_codes = [ + _build_operator_code(builder, tfl.BuiltinOperator.ADD), + _build_operator_code(builder, tfl.BuiltinOperator.DENSIFY), + ] + return _DENSIFY_TEST_VALUES, subgraph, operator_codes + + +def _build_densify_conv2d_case(builder): + input_tensor_idx = 0 + sparse_kernel_idx = 1 + dense_kernel_idx = 2 + output_tensor_idx = 3 + + sparsity = _build_csr_sparsity( + builder, + dense_sizes=[1, 2, 2, 1], + row_ptrs=_DENSIFY_ROW_PTRS, + col_indices=_DENSIFY_COL_INDICES, + sparse_axis=2, + ) + + input_tensor = _build_tensor(builder, 1, [1, 4, 4, 1]) + sparse_kernel = _build_tensor(builder, 0, [1, 2, 2, 1], sparsity) + dense_kernel = _build_tensor(builder, 1, [1, 2, 2, 1]) + output_tensor = _build_tensor(builder, 1, [1, 4, 4, 1]) + + tfl.Conv2DOptionsStart(builder) + tfl.Conv2DOptionsAddStrideH(builder, 1) + tfl.Conv2DOptionsAddStrideW(builder, 1) + tfl.Conv2DOptionsAddPadding(builder, tfl.Padding.SAME) + tfl.Conv2DOptionsAddDilationHFactor(builder, 1) + tfl.Conv2DOptionsAddDilationWFactor(builder, 1) + conv2d_options = tfl.Conv2DOptionsEnd(builder) + + densify_op = _build_operator( + builder, + 1, + [sparse_kernel_idx], + [dense_kernel_idx], + tfl.BuiltinOptions.DensifyOptions, + ) + conv2d_op = _build_operator( + builder, + 0, + [input_tensor_idx, dense_kernel_idx], + [output_tensor_idx], + tfl.BuiltinOptions.Conv2DOptions, + conv2d_options, + ) + subgraph = _build_subgraph( + builder, + tensors=[input_tensor, sparse_kernel, dense_kernel, output_tensor], + operators=[densify_op, conv2d_op], + inputs=[input_tensor_idx], + outputs=[output_tensor_idx], + ) + operator_codes = [ + _build_operator_code(builder, tfl.BuiltinOperator.CONV_2D), + _build_operator_code(builder, tfl.BuiltinOperator.DENSIFY), + ] + return _DENSIFY_TEST_VALUES, subgraph, operator_codes + + +def _build_densify_fully_connected_case(builder): + input_tensor_idx = 0 + sparse_weight_idx = 1 + dense_weight_idx = 2 + output_tensor_idx = 3 + weight_shape = [4, 4] + + sparsity = _build_csr_sparsity( + builder, + dense_sizes=weight_shape, + row_ptrs=_DENSIFY_FC_ROW_PTRS, + col_indices=_DENSIFY_FC_COL_INDICES, + sparse_axis=1, + ) + + input_tensor = _build_tensor(builder, 1, [1, 4]) + sparse_weight = _build_tensor(builder, 0, weight_shape, sparsity) + dense_weight = _build_tensor(builder, 1, weight_shape) + output_tensor = _build_tensor(builder, 1, [1, 4]) + + tfl.FullyConnectedOptionsStart(builder) + tfl.FullyConnectedOptionsAddWeightsFormat( + builder, tfl.FullyConnectedOptionsWeightsFormat.DEFAULT + ) + fc_options = tfl.FullyConnectedOptionsEnd(builder) + + densify_op = _build_operator( + builder, + 1, + [sparse_weight_idx], + [dense_weight_idx], + tfl.BuiltinOptions.DensifyOptions, + ) + fc_op = _build_operator( + builder, + 0, + [input_tensor_idx, dense_weight_idx], + [output_tensor_idx], + tfl.BuiltinOptions.FullyConnectedOptions, + fc_options, + ) + subgraph = _build_subgraph( + builder, + tensors=[input_tensor, sparse_weight, dense_weight, output_tensor], + operators=[densify_op, fc_op], + inputs=[input_tensor_idx], + outputs=[output_tensor_idx], + ) + operator_codes = [ + _build_operator_code(builder, tfl.BuiltinOperator.FULLY_CONNECTED), + _build_operator_code(builder, tfl.BuiltinOperator.DENSIFY), + ] + return _DENSIFY_FC_WEIGHT_VALUES, subgraph, operator_codes + + +def _build_densify_model(*, downstream_op=None): + """Build a sparse TFLite model with DENSIFY operator for testing.""" + scenario_builders = { + None: _build_densify_only_case, + "add": _build_densify_add_case, + "conv2d": _build_densify_conv2d_case, + "fully_connected": _build_densify_fully_connected_case, + } + if downstream_op not in scenario_builders: + raise ValueError(f"Unsupported DENSIFY downstream op: {downstream_op}") + + builder = flatbuffers.Builder(4096) + sparse_values, subgraph, operator_codes = scenario_builders[downstream_op](builder) + sparse_buffer = _build_buffer(builder, sparse_values.tobytes()) + empty_buffer = _build_buffer(builder) + return _finish_tflite_model( + builder, + subgraph=subgraph, + operator_codes=operator_codes, + buffers=[sparse_buffer, empty_buffer], + ) + + +def _load_densify_module(downstream_op=None): + """Load a DENSIFY test model and return the converted Relax module.""" + model_bytes = _build_densify_model(downstream_op=downstream_op) + if hasattr(tflite.Model, "Model"): + tflite_model = tflite.Model.Model.GetRootAsModel(model_bytes, 0) + else: + tflite_model = tflite.Model.GetRootAsModel(model_bytes, 0) + mod = from_tflite(tflite_model) + mod["main"] = mod["main"].without_attr("params") + return mod + + +def test_densify(): + """Test TFLite DENSIFY operator conversion.""" + mod = _load_densify_module() + + @I.ir_module + class Expected: + @R.function + def main() -> R.Tensor((2, 2), dtype="float32"): + R.func_attr({"num_input": 0}) + with R.dataflow(): + gv: R.Tensor((2, 2), dtype="float32") = R.const(_DENSIFY_TEST_DENSE) + R.output(gv) + return gv + + tvm.ir.assert_structural_equal(mod, Expected) + + +def test_densify_with_add(): + """Test DENSIFY followed by a downstream ADD operator.""" + mod = _load_densify_module(downstream_op="add") + + @I.ir_module + class Expected: + @R.function + def main(x: R.Tensor((2, 2), dtype="float32")) -> R.Tensor((2, 2), dtype="float32"): + R.func_attr({"num_input": 1}) + with R.dataflow(): + gv: R.Tensor((2, 2), dtype="float32") = R.add(x, R.const(_DENSIFY_TEST_DENSE)) + R.output(gv) + return gv + + tvm.ir.assert_structural_equal(mod, Expected) + +def test_densify_with_conv2d(): + """Test DENSIFY followed by CONV2D - a real-world scenario. + + This simulates a sparse convolution where DENSIFY converts sparse weights + before CONV2D uses them for inference. + """ + mod = _load_densify_module(downstream_op="conv2d") + + @I.ir_module + class Expected: + @R.function + def main(x: R.Tensor((1, 4, 4, 1), dtype="float32")) -> R.Tensor( + (1, 4, 4, 1), dtype="float32" + ): + R.func_attr({"num_input": 1}) + with R.dataflow(): + gv: R.Tensor((1, 4, 4, 1), dtype="float32") = R.nn.conv2d( + x, + R.const(_DENSIFY_CONV_KERNEL_DENSE_HWIO), + strides=[1, 1], + padding=[0, 0, 1, 1], + dilation=[1, 1], + groups=1, + data_layout="NHWC", + kernel_layout="HWIO", + out_layout="NHWC", + out_dtype="void", + ) + R.output(gv) + return gv + + tvm.ir.assert_structural_equal(mod, Expected) + +def test_densify_with_fully_connected(): + """Test DENSIFY followed by FULLY_CONNECTED - a real-world scenario. + + This simulates a sparse fully connected layer where DENSIFY converts + sparse weights before matrix multiplication for inference. + """ + mod = _load_densify_module(downstream_op="fully_connected") + + @I.ir_module + class Expected: + @R.function + def main(x: R.Tensor((1, 4), dtype="float32")) -> R.Tensor((1, 4), dtype="float32"): + R.func_attr({"num_input": 1}) + with R.dataflow(): + weight_t: R.Tensor((4, 4), dtype="float32") = R.permute_dims( + R.const(_DENSIFY_FC_WEIGHT_DENSE_OI), axes=[1, 0] + ) + gv: R.Tensor((1, 4), dtype="float32") = R.matmul(x, weight_t, out_dtype="void") + R.output(gv) + return gv + + tvm.ir.assert_structural_equal(mod, Expected) + + if __name__ == "__main__": pytest.main(["-s", __file__])