Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 24 additions & 6 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,18 @@
"program": "generateNetwork.py",
"console": "integratedTerminal",
"cwd": "${workspaceFolder}/DeeployTest",
"env": {
"PYTHONPATH": "${workspaceFolder}"
},
"subProcess": true,
"justMyCode": false,
"args":
"-p${input:platformUntiled} -t${input:model} ${input:additionalArgsUntiled}"
"args": [
"-p",
"${input:platformUntiled}",
"-t",
"${input:model}",
"${input:additionalArgsUntiled}"
]
},
{
"name": "Deeploy Generate Tiled",
Expand All @@ -22,9 +31,18 @@
"program": "testMVP.py",
"console": "integratedTerminal",
"cwd": "${workspaceFolder}/DeeployTest",
"env": {
"PYTHONPATH": "${workspaceFolder}"
},
"subProcess": true,
"justMyCode": false,
"args":
"-p${input:platformTiled} -t${input:model} ${input:additionalArgsTiled}"
"args": [
"-p",
"${input:platformTiled}",
"-t",
"${input:model}",
"${input:additionalArgsTiled}"
]
}
],
"inputs": [
Expand Down Expand Up @@ -85,7 +103,7 @@
"id": "additionalArgsTiled",
"type": "promptString",
"description": "Additional Arguments",
"default": "-v --doublebuffer"
"default": "--doublebuffer"
}
]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,8 @@ def apply(self,
ctxt._maxDynamicSize[levels] = max(ctxt._maxDynamicSize.get(levels, 0), ctxt._dynamicSize[levels])

for buffer in inputs + transients:
assert buffer._live == True, f"Tried to deallocate already dead buffer {buffer.name}"
if buffer._live == False:
continue
buffer._live = False
# Don't deallocate if it's an alias of a live buffer
if not buffer.has_live_aliases(ctxt):
Expand Down Expand Up @@ -362,8 +363,8 @@ def apply(self,
ctxt._maxDynamicSize[levels] = max(ctxt._maxDynamicSize.get(levels, 0), ctxt._dynamicSize[levels])

for buffer in inputs + transients:
assert buffer._live == True, f"Tried to deallocate already dead buffer {buffer.name}"

if buffer._live == False:
continue
memoryLevel = "None" if not hasattr(buffer, "_memoryLevel") else buffer._memoryLevel
if memoryLevel not in ctxt._dynamicSize:
ctxt._dynamicSize[memoryLevel] = 0
Expand Down
84 changes: 60 additions & 24 deletions Deeploy/DeeployTypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,37 @@ class CodeGenVerbosity:
_backendPostParsingFilename = 'backend_post_parsing'
_backendPostBindingFilename = 'backend_post_binding'


def _deeployTypeToNpType(ty: Type[BaseType]):

def _broadcastInteger(ty: Type[IntegerImmediate]):
if ty.signed:
return np.dtype(getattr(np, "int" + str(ty.typeWidth)))
else:
return np.dtype(getattr(np, "uint" + str(ty.typeWidth)))

def _broadcastFloat(ty: Type[FloatImmediate]):
if ty.typeWidth == 16:
return np.dtype(np.float16)
if ty.typeWidth == 32:
return np.dtype(np.float32)
if ty.typeWidth == 64:
return np.dtype(np.float64)
return np.dtype(np.float32)

if issubclass(ty, Pointer) and hasattr(ty, "referencedType"):
if issubclass(ty.referencedType, IntegerImmediate):
return _broadcastInteger(ty.referencedType)
if issubclass(ty.referencedType, FloatImmediate):
return _broadcastFloat(ty.referencedType)
elif issubclass(ty, IntegerImmediate):
return _broadcastInteger(ty)
elif issubclass(ty, FloatImmediate):
return _broadcastFloat(ty)

return None


_ctxtExtension = '.pkl'
_graphExtension = '.onnx'
_dataExtension = '.data'
Expand Down Expand Up @@ -415,7 +446,12 @@ def __eq__(self, other):
def _valueString(self) -> str:
values = list(self.values.reshape(-1))
if self._type.typeName == 'float32_t*':
strValues = [f'{value}f' for value in values]
strValues = []
for value in values:
literal = f"{float(value):.9g}"
if "e" not in literal and "." not in literal:
literal += ".0"
strValues.append(literal + "f")
elif self._type.typeName == 'int8_t*':
strValues = [f'{int(value)}' for value in values]
else:
Expand Down Expand Up @@ -977,8 +1013,6 @@ def hoistConstant(self,
Returns the name of the newly registed ConstantBuffer

"""
assert len(constant.outputs) <= 1, f"Constant {constant.name} has more than one output"

name = name if name is not None else constant.name

# LMACAN: The shape needs to be copied into a tuple for pickling to work. Don't ask me why..
Expand Down Expand Up @@ -2027,25 +2061,7 @@ def parse(self, ctxt: NetworkContext, default_channels_first: bool) -> Tuple[Net
return ctxt, False

def _broadcastToNpType(self, ty: Type[BaseType]):

def _broadcastInteger(ty: Type[IntegerImmediate]):
if ty.signed:
return np.dtype(getattr(np, "int" + str(ty.typeWidth)))
else:
return np.dtype(getattr(np, "uint" + str(ty.typeWidth)))

def _broadcastFloat(ty: Type[FloatImmediate]):
return np.dtype(getattr(np, "double"))

if issubclass(ty, Pointer) and hasattr(ty, "referencedType"):
if issubclass(ty.referencedType, IntegerImmediate):
return _broadcastInteger(ty.referencedType)
elif issubclass(ty, IntegerImmediate):
return _broadcastInteger(ty)
elif issubclass(ty, FloatImmediate):
return _broadcastFloat(ty)

return None
return _deeployTypeToNpType(ty)

def typeCheck(self, ctxt: NetworkContext) -> Tuple[NetworkContext, bool]:
"""Invokes the mapper's typeCheck method
Expand Down Expand Up @@ -2106,8 +2122,9 @@ def bind(self, ctxt: NetworkContext) -> Tuple[NetworkContext, bool]:
elif ctxt.is_global(node.name):
npType = self._broadcastToNpType(ctxt.globalObjects[node.name]._type)
if isinstance(ctxt.globalObjects[node.name], ConstantBuffer):
if isinstance(node, gs.Constant):
if isinstance(node, gs.Constant) and npType is not None:
node.values = node.values.astype(npType)
node.export_dtype = npType
else:
node.shape = ctxt.globalObjects[node.name].shape
if npType is not None:
Expand Down Expand Up @@ -2856,7 +2873,17 @@ def generateInferenceInitializationCode(self) -> str:

name = node.name
node.name = self.ctxt._mangle(node.name)
callStack += node.init()

if ("TILING_CODEGEN" not in node.name and isinstance(node, VariableBuffer) and hasattr(node, "_type")
and issubclass(node._type, Pointer)):
# Local inference buffers are late-bound by the generated layer code. Initializing them to NULL keeps
# clang from flagging false-positive uninitialized reads on paths where the assignment is emitted in a
# separate closure, and marking them unused avoids noise for scratch buffers that are reserved
# generically but optimized away for a specific layer instance.
typeName = node._instance.typeName if hasattr(node, "_instance") else node._type.typeName
callStack += f"{typeName} {node.name} __attribute__((unused)) = NULL;\n"
else:
callStack += node.init()
node.name = name

return callStack
Expand Down Expand Up @@ -3121,6 +3148,15 @@ def _exportGraph(self, folderPath, fileName):
# VJUNG: ONNX-Graphsurgeon needs tensors to be in their export types
constTensors = [tensor for tensor in self.graph.tensors().values() if isinstance(tensor, gs.Constant)]
for tensor in constTensors:
if tensor.name in self.ctxt.globalObjects:
ctxtTensor = self.ctxt.globalObjects[tensor.name]
if isinstance(ctxtTensor, ConstantBuffer) and hasattr(ctxtTensor, "_type"):
npType = _deeployTypeToNpType(ctxtTensor._type)
if npType is not None:
tensor.values = tensor.values.astype(npType)
tensor.export_dtype = npType
continue

if tensor.dtype != tensor.export_dtype:
tensor.values = tensor.values.astype(tensor.export_dtype)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ def apply(self, graph: gs.Graph) -> Tuple[gs.Graph]:
engine = self.engineMapper.mapNodeToEngine(node, graph)
if engine is not None:
node.attrs["engine"] = engine.name
if hasattr(engine, "n_cores"):
node.attrs["n_cores"] = engine.n_cores
return graph


Expand Down
2 changes: 1 addition & 1 deletion Deeploy/Targets/GAP9/DMA/L3Dma.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class GAP9L3Dma(AsyncDma):
_transferTemplates = {
2:
NodeTemplate(
"pi_cl_ram_copy_2d(get_ram_ptr(), ${ext}, ${loc}, ${transfer_size}, ${stride}, ${length}, ${ext2loc}, &${future});"
"pi_cl_ram_copy_2d(get_ram_ptr(), (uint32_t) ${ext}, ${loc}, ${transfer_size}, ${stride}, ${length}, ${ext2loc}, &${future});"
)
}
_waitingStrategy = PerTensorWaitingStrategy(GAP9L3DmaFuture)
Expand Down
42 changes: 33 additions & 9 deletions Deeploy/Targets/Generic/Bindings.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,16 @@
ConvTransposeTemplate, DebugPrintTemplate, DequantTemplate, DummyTemplate, DWConvTemplate, FloatAddTemplate, \
FloatConvTemplate, FloatDivTemplate, FloatDWConvTemplate, FloatGELUTemplate, FloatGemmTemplate, \
FloatLayernormTemplate, FloatMatMulTemplate, FloatMaxPoolTemplate, FloatMulTemplate, FloatPadTemplate, \
FloatPowTemplate, FloatReduceMeanTemplate, FloatReluTemplate, FloatSoftmaxTemplate, FloatSqrtTemplate, \
GatherTemplate, GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, MatMulTemplate, \
MaxPoolTemplate, MulTemplate, PadTemplate, QuantTemplate, ReduceMeanTemplate, ReduceSumTemplate, \
FloatPowTemplate, FloatReduceLogSumExpTemplate, FloatReduceMeanTemplate, FloatReluTemplate, FloatSoftmaxTemplate, \
FloatSqrtTemplate, GatherTemplate, GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, \
MatMulTemplate, MaxPoolTemplate, MulTemplate, PadTemplate, QuantTemplate, ReduceMeanTemplate, ReduceSumTemplate, \
RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, RQSiGELUTemplate, SliceTemplate, TransposeTemplate, \
iGELUTemplate, iLayernormTemplate, iRMSNormTemplate, iSoftmaxTemplate
from Deeploy.Targets.Generic.TypeCheckers import AddChecker, BatchNormChecker, ConcatChecker, ConvChecker, \
DebugPrintChecker, DequantChecker, DivChecker, DummyChecker, GatherChecker, GELUChecker, GEMMChecker, \
LayerNormChecker, MatMulChecker, MaxPoolChecker, MulChecker, PadChecker, QuantChecker, ReduceMeanChecker, \
ReduceSumChecker, ReluChecker, RequantShiftChecker, ReshapeChecker, RQIntegerDivChecker, SliceChecker, \
SoftmaxChecker, TransposeChecker
LayerNormChecker, MatMulChecker, MaxPoolChecker, MulChecker, PadChecker, QuantChecker, ReduceLogSumExpChecker, \
ReduceMeanChecker, ReduceSumChecker, ReluChecker, RequantShiftChecker, ReshapeChecker, RQIntegerDivChecker, \
SliceChecker, SoftmaxChecker, TransposeChecker

BasicTransformer = CodeTransformation([ArgumentStructGeneration(), MemoryManagementGeneration(), FutureGeneration()])

Expand Down Expand Up @@ -227,6 +227,11 @@
BasicTransformer) for type in SignedIntegerDataTypes
]

BasicReduceLogSumExpBindings = [
NodeBinding(ReduceLogSumExpChecker([PointerClass(float32_t)], [PointerClass(float32_t)]),
FloatReduceLogSumExpTemplate.referenceTemplate, BasicTransformer)
]

BasicReluBinding = NodeBinding(ReluChecker([PointerClass(float32_t)], [PointerClass(float32_t)]),
FloatReluTemplate.referenceTemplate, BasicTransformer)

Expand Down Expand Up @@ -286,6 +291,9 @@
BasicConcatBindings = [
NodeBinding(ConcatChecker([PointerClass(type), PointerClass(type)], [PointerClass(type)]),
ConcatTemplate.referenceTemplate, BasicTransformer) for type in IntegerDataTypes
] + [
NodeBinding(ConcatChecker([PointerClass(float32_t), PointerClass(float32_t)], [PointerClass(float32_t)]),
ConcatTemplate.referenceTemplate, BasicTransformer)
]

BasicQuantBindings = [
Expand All @@ -312,18 +320,34 @@
for type in FloatDataTypes
]

BasicConvTransposeBindings = [
BasicConvTranspose1DBindings = [
NodeBinding(
ConvChecker(
[PointerClass(type), PointerClass(type), PointerClass(type)], # input, weight, bias
[PointerClass(type)]),
ConvTransposeTemplate.reference1DTemplate,
BasicTransformer) for type in FloatDataTypes
] + [
NodeBinding(
ConvChecker(
[PointerClass(type), PointerClass(type)], # input, weight
[PointerClass(type)]),
ConvTransposeTemplate.reference1DTemplate,
BasicTransformer) for type in FloatDataTypes
]

BasicConvTranspose2DBindings = [
NodeBinding(
ConvChecker(
[PointerClass(type), PointerClass(type), PointerClass(type)], # input, weight, bias
[PointerClass(type)]),
ConvTransposeTemplate.referenceTemplate,
ConvTransposeTemplate.reference2DTemplate,
BasicTransformer) for type in FloatDataTypes
] + [
NodeBinding(
ConvChecker(
[PointerClass(type), PointerClass(type)], # input, weight
[PointerClass(type)]),
ConvTransposeTemplate.referenceTemplate,
ConvTransposeTemplate.reference2DTemplate,
BasicTransformer) for type in FloatDataTypes
]
27 changes: 27 additions & 0 deletions Deeploy/Targets/Generic/Layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,12 @@ def computeShapes(self, inputShapes: Shape, outputShapes: Shape, operatorReprese
if inputShapes[1] == () or inputShapes[1] == []:
inputShapes[1] = (1,)

# Scalars and singletons should broadcast to the tensor operand,
# not shrink the tensor shape to (1,).
if tuple(inputShapes[1]) == (1,):
inputShapes[1] = inputShapes[0]
return (inputShapes, outputShapes)

if len(inputShapes[0]) > len(inputShapes[1]):
inputShapes[1] = inputShapes[0]
else:
Expand Down Expand Up @@ -438,6 +444,27 @@ def computeShapes(self, inputShapes: Shape, outputShapes: Shape, operatorReprese
return (inputShapes, outputShapes)


class ReduceLogSumExpLayer(ONNXLayer):

def __init__(self, maps: List[NodeMapper]):
super().__init__(maps)

def computeShapes(self, inputShapes: Shape, outputShapes: Shape, operatorRepresentation,
channels_first) -> Tuple[Shape, Shape]:
axis = operatorRepresentation['axes'][0]
inputShape = list(copy.deepcopy(inputShapes[0]))

if operatorRepresentation['keepdims']:
outputShape = inputShape
outputShape[axis] = 1
else:
outputShape = inputShape[:axis] + inputShape[axis + 1:]
if len(outputShape) == 0:
outputShape = [1]

return (inputShapes, [outputShape])


class ReluLayer(ONNXLayer):

def __init__(self, maps: List[NodeMapper]):
Expand Down
Loading