From 300f21bac8ef1790e3e0f6046d7f13b60cdad01d Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Fri, 11 Apr 2025 14:22:14 +0200 Subject: [PATCH 01/43] Initial implementation for ALPAKA integration to SOFIE --- .vscode/settings.json | 6 + src/SOFIE_core/inc/SOFIE/RModel_Base.hxx | 1 + src/SOFIE_core/src/RModel.cxx | 333 +++++++++++++++++++++-- src/SOFIE_core/src/RModel_Base.cxx | 32 +++ 4 files changed, 344 insertions(+), 28 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..eb254be --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "files.associations": { + "*.icc": "cpp", + "limits": "cpp" + } +} \ No newline at end of file diff --git a/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx b/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx index f8a9d34..0a615c5 100644 --- a/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx +++ b/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx @@ -82,6 +82,7 @@ public: fCustomOpHeaders.insert(filename); } void GenerateHeaderInfo(std::string &hgname); + void GenerateHeaderInfo_GPU_ALPAKA(std::string& hgname); void PrintGenerated() { std::cout << fGC; } std::string ReturnGenerated() { return fGC; } diff --git a/src/SOFIE_core/src/RModel.cxx b/src/SOFIE_core/src/RModel.cxx index e5495ed..e2bc530 100644 --- a/src/SOFIE_core/src/RModel.cxx +++ b/src/SOFIE_core/src/RModel.cxx @@ -594,6 +594,28 @@ void RModel::GenerateInitializedTensorInfo() } } +void RModel::GenerateInitializedTensorInfo_GPU_ALPAKA() +{ + if (!fInitializedTensors.empty()) + fGC += "// initialized tensors\n"; + + for (auto &i : fInitializedTensors) { + if (!fUseWeightFile || i.second.IsConstantTensor()) { + if (i.second.type() == ETensorType::FLOAT) + fGC += GenerateConstantTensorCode(i); + else if (i.second.type() == ETensorType::INT64) + fGC += GenerateConstantTensorCode(i); + + } else { + // case of tensors which are read from a file + size_t length = ConvertShapeToLength(i.second.shape()); + if (i.second.type() == ETensorType::FLOAT) { + fGC += "auto deviceBuf_"+i.first+" = alpaka::allocBuf(devAcc, "+std::to_string(length)+");\n"; + } + } + } +} + void RModel::GenerateIntermediateMemoryPool() { if (fIntermediateMemoryInfo.total_stack.size() == 0) return; fGC += "\n//--- Allocating session memory pool to be used for allocating intermediate tensors\n"; @@ -612,7 +634,7 @@ void RModel::GenerateIntermediateTensorInfo() { tensor_declaration_block += "std::vector fTensor_" + i.first + " = std::vector(" + std::to_string(ConvertShapeToLength(i.second.shape)) + ");\n"; // No pointer allocation needed for BOOL } - if (fIntermediateTensorFrequencyLookup.find(i.first) == fIntermediateTensorFrequencyLookup.end() && std::find(fOutputTensorNames.begin(), fOutputTensorNames.end(), i.first) == fOutputTensorNames.end()) { + if (std::find(fOutputTensorNames.begin(), fOutputTensorNames.end(), i.first) == fOutputTensorNames.end()) { size_t length = ConvertShapeToLength(i.second.shape); if (i.second.type == ETensorType::FLOAT) { @@ -652,6 +674,55 @@ void RModel::GenerateIntermediateTensorInfo() { } } +void GenerateGPU_ALPAKA_Buffers(){ + if (!fIntermediateTensorInfos.empty()) { + std::string tensor_declaration_block = ""; + + for (auto &i : fIntermediateTensorInfos) { + if (i.second.type == ETensorType::BOOL) { + tensor_declaration_block += "std::vector fTensor_" + i.first + " = std::vector(" + std::to_string(ConvertShapeToLength(i.second.shape)) + ");\n"; + // No pointer allocation needed for BOOL + } + if (std::find(fOutputTensorNames.begin(), fOutputTensorNames.end(), i.first) == fOutputTensorNames.end()) { + size_t length = ConvertShapeToLength(i.second.shape); + + if (i.second.type == ETensorType::FLOAT) { + tensor_declaration_block += "auto bufDev_" + i.first + " = alpaka::allocBuf(devAcc," + std::to_string(length) + ");\n"; + } + else if (i.second.type == ETensorType::DOUBLE) { + tensor_declaration_block += "auto bufDev_" + i.first + " = alpaka::allocBuf(devAcc," + std::to_string(length) + ");\n"; + } + else if (i.second.type == ETensorType::INT64) { + tensor_declaration_block += "auto bufDev_" + i.first + " = alpaka::allocBuf(devAcc," + std::to_string(length) + ");\n"; + + } + } + } + + if (tensor_declaration_block.length()) { + fGC += "\n//--- declare and allocate the intermediate tensors\n" + tensor_declaration_block; + } + } + // add also the dynamic tensors (only declarations, allocation will be done later) + if (!fDynamicTensorInfos.empty()) { + fGC += "//--- declare the dynamic tensors\n"; + fGC += "using bufDev_float = alpaka::Buf, size_t>;\n" + fGC += "using bufDev_double = alpaka::Buf, size_t>;\n" + fGC += "using bufDev_int64= alpaka::Buf, size_t>;\n" + for (auto &i : fDynamicTensorInfos) { + if (i.second.type == ETensorType::FLOAT) { + fGC += "bufDev_float bufDev_" + i.first + ";\n"; + } else if (i.second.type == ETensorType::DOUBLE) { + fGC += "bufDev_double bufDev_" + i.first + ";\n"; + } else if (i.second.type == ETensorType::INT64) { + fGC += "bufDev_int64 bufDev_" + i.first + ";\n"; + + } + } + } +} + + // generate code for specific operator declarations to be defined in the Session class void RModel::GenerateOperatorDeclarations() { std::string strcode; @@ -665,11 +736,26 @@ void RModel::GenerateOperatorDeclarations() { } void RModel::GenerateDynamicTensorInfo() { + fGC += "//---- allocate the intermediate dynamic tensors\n"; + std::stringstream out; + for (auto & i: fDynamicTensorInfos) { + auto length = ConvertDynamicShapeToLength(i.second.shape); + out << SP << "if (" << length << " > 0) {\n"; + out << SP << SP << "fTensor_" << i.first << ".resize(" << length << ");\n"; + out << SP << SP << "tensor_" << i.first << " = fTensor_" << i.first << ".data();\n"; + out << SP << "}\n"; + } + fGC += out.str(); +} + +void RModel::GenerateDynamicTensorInfo_GPU_ALPAKA() { fGC += "//---- allocate the intermediate dynamic tensors\n"; std::stringstream out; for (auto & i: fDynamicTensorInfos) { auto length = ConvertDynamicShapeToLength(i.second.shape); out << SP << "if (" << length << " > 0) {\n"; + out << "auto bufDev_" + i.first + " = alpaka::allocBuf(devAcc," + std::to_string(length) + ");\n"; + out << SP << SP << "fTensor_" << i.first << ".resize(" << length << ");\n"; out << SP << SP << "tensor_" << i.first << " = fTensor_" << i.first << ".data();\n"; out << SP << "}\n"; @@ -804,7 +890,7 @@ void RModel::GenerateSessionCode() { // define the Session struct (for GNN this is generated in RModel_GNN) - if (fUseSession && !fIsGNNComponent) { + if (fUseSession) { if (!fIsSubGraph) fGC += "struct Session {\n"; else @@ -814,32 +900,32 @@ void RModel::GenerateSessionCode() // generate code for declaring the initialized tensors GenerateInitializedTensorInfo(); - // evaluate total intermediate memory and position intermediate tensor addresses - std::string intermediate_memory_alloc_string = ""; - intermediate_memory_alloc_string += "\n// --- Positioning intermediate tensor memory --"; - for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) { - intermediate_memory_alloc_string += AllocateIntermediateMemory(fOperators[op_idx]->GetOpOutputTensors()); - CheckAndFlushIntermediateMemory(fOperators[op_idx]->GetOpInputTensors(), op_idx); - } - - // to check remaining unused fragments after memory allocation (lesser the better) - // for (const auto &it: fIntermediateMemoryInfo.available_stack){ - // std::cout<<"chunk_idx: "<GetOpOutputTensors()); + // CheckAndFlushIntermediateMemory(fOperators[op_idx]->GetOpInputTensors(), op_idx); // } - // generate the memory pool to be used by intermediate tensors - GenerateIntermediateMemoryPool(); + // // to check remaining unused fragments after memory allocation (lesser the better) + // // for (const auto &it: fIntermediateMemoryInfo.available_stack){ + // // std::cout<<"chunk_idx: "<GenerateSessionMembersCode(opName); + // } + // fGC += "\n"; + // here add initialization and reading of weight tensors + if (fUseWeightFile) { + std::string fileName = fName; + if (fWeightFile == WeightFileType::Text) { + fileName += ".dat"; + } + if (fWeightFile == WeightFileType::RootBinary) { + fileName += ".root"; + } + fGC += sessionName + "(std::string filename =\"" + fileName + "\""; + } else { + // no need to pass weight file since it is not used + // keep passing a string for compatibility + fGC += sessionName + "(std::string = \"\""; + } + // add initialization of shape parameters + // assume all parameters are of type size_t + if (!fShapeParams.empty()) { + for (auto &p : fShapeParams) { + fGC += ",\n"; + fGC += " size_t " + p.first + " = " + p.second; + } + } + fGC += ") {\n"; + + if (fUseWeightFile) { + fGC += "\n//--- reading weights from file\n"; + ReadInitializedTensorsFromFile(fReadPos); + fGC += "\n"; + // fUseWeightFile = fUseWeightFile; + } + + // now we have passed the parameters we can allocate the dynamic tensors + GenerateDynamicTensorInfo(); + + // add here initialization code for operator for (size_t id = 0; id < fOperators.size(); id++) { - std::string opName = std::to_string(id); - fGC += fOperators[id]->GenerateSessionMembersCode(opName); + fGC += fOperators[id]->GenerateInitCode(); } + + fGC += "}\n\n"; + } + // generate the inference code + GenerateOutput(); + + // end of session + if (fUseSession && !fIsGNNComponent) { + fGC += "}; // end of Session\n"; + } +} + +void RModel::GenerateSessionCode_GPU_ALPAKA() +{ + + // define the Session struct (for GNN this is generated in RModel_GNN) + if (fUseSession) { + if (!fIsSubGraph) + fGC += "struct Session {\n"; + else + fGC += "struct Session_" + fName + " {\n"; + } + + // // generate code for declaring the initialized tensors + GenerateInitializedTensorInfo_GPU_ALPAKA(); + + // // evaluate total intermediate memory and position intermediate tensor addresses + // std::string intermediate_memory_alloc_string = ""; + // intermediate_memory_alloc_string += "\n// --- Positioning intermediate tensor memory --"; + // for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) { + // intermediate_memory_alloc_string += AllocateIntermediateMemory(fOperators[op_idx]->GetOpOutputTensors()); + // CheckAndFlushIntermediateMemory(fOperators[op_idx]->GetOpInputTensors(), op_idx); + // } + + // // to check remaining unused fragments after memory allocation (lesser the better) + // // for (const auto &it: fIntermediateMemoryInfo.available_stack){ + // // std::cout<<"chunk_idx: "<fName + " fSession_" + graph->fName + ";\n"; + } + + // Generate code for Session constructor + if (fUseSession) { + std::string sessionName = "Session"; + if (fIsSubGraph) + sessionName += "_" + fName; + // add here specific operator code that needs to define session data members + // fGC += "\n"; + // for (size_t id = 0; id < fOperators.size(); id++) { + // std::string opName = std::to_string(id); + // fGC += fOperators[id]->GenerateSessionMembersCode(opName); + // } fGC += "\n"; // here add initialization and reading of weight tensors if (fUseWeightFile) { @@ -885,13 +1080,15 @@ void RModel::GenerateSessionCode() if (fUseWeightFile) { fGC += "\n//--- reading weights from file\n"; - ReadInitializedTensorsFromFile(fReadPos); + ReadInitializedTensorsFromFile(0); fGC += "\n"; // fUseWeightFile = fUseWeightFile; } + MoveInitializedTensorsToBuffers_ALPAKA(); + // now we have passed the parameters we can allocate the dynamic tensors - GenerateDynamicTensorInfo(); + GenerateDynamicTensorInfo_GPU_ALPAKA(); // add here initialization code for operator for (size_t id = 0; id < fOperators.size(); id++) { @@ -967,6 +1164,62 @@ void RModel::Generate(std::underlying_type_t options, int batchSize, lo } } +void RModel::GenerateGPU_ALPAKA(std::underlying_type_t options, int batchSize, bool verbose) +{ + fVerbose = verbose; + fBatchSize = batchSize; + + // session flag is used in operator initialize + if (static_cast>(Options::kNoSession) & options) { + fUseSession = false; + fWeightFile = WeightFileType::None; + } + if (static_cast>(Options::kNoWeightFile) & options) { + fUseWeightFile = false; + fWeightFile = WeightFileType::None; + } + if (static_cast>(Options::kRootBinaryWeightFile) & options) { + fUseWeightFile = true; + fWeightFile = WeightFileType::RootBinary; + } + if (fUseWeightFile && !fUseSession) { + throw std::runtime_error( + "TMVA-SOFIE: RModel::Generate: cannot use a separate weight file without generating a Session class"); + } + + if (static_cast>(Options::kGNN) & options || static_cast>(Options::kGNNComponent) & options) + throw std::runtime_error("SOFIE GPU does not yet supports GNN Inference.") + + // initialize the model including all operators and sub-graphs + Initialize(batchSize, verbose); + + std::string hgname; + // if (!fIsSubGraph) { + // fGC.clear(); + // GenerateHeaderInfo_GPU_ALPAKA(hgname); + // } + + // generate first code for the subgraphs + // for (auto &graph : fSubGraphs) { + // if (fVerbose) + // std::cout << "generate session code for subgraph " << graph->fName << std::endl; + // graph->GenerateSessionCode(); + // fGC += graph->fGC; + // } + + if (fVerbose) + std::cout << "generate Main session code - model " << fName << std::endl; + + // generate main session code + GenerateSessionCode_GPU_ALPAKA(); + + if (!fIsSubGraph) { + fGC += ("} //SOFIE_" + fName + "\n"); + fGC += "\n#endif // " + hgname + "\n"; + } +} + + void RModel::ReadInitializedTensorsFromFile(long pos) { // generate the code to read initialized tensors from a text data file if (fWeightFile == WeightFileType::Text) { @@ -978,9 +1231,9 @@ void RModel::ReadInitializedTensorsFromFile(long pos) { fGC += " throw std::runtime_error(\"tmva-sofie failed to open file \" + filename + \" for input weights\");\n"; fGC += " }\n"; - if(fIsGNNComponent) { - fGC += " f.seekg(" + std::to_string(pos) + ");\n"; - } + // if(fIsGNNComponent) { + // fGC += " f.seekg(" + std::to_string(pos) + ");\n"; + // } fGC += " std::string tensor_name;\n"; fGC += " size_t length;\n"; @@ -1048,10 +1301,34 @@ void RModel::ReadInitializedTensorsFromFile(long pos) { std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be read from a ROOT file"); } fGC += " }\n"; - } - fGC += " }\n"; + } } -} + } + + void RModel::MoveInitializedTensorsToBuffers_ALPAKA(){ + for (auto &i : fInitializedTensors) { + // skip Constant and shape tensors + if (!i.second.IsWeightTensor()) continue; + std::string tensor_name = "tensor_" + i.first; + length = ConvertShapeToLength(i.second.shape()); + std::string slength = std::to_string(length); + if (i.second.type() == ETensorType::FLOAT) { + fGC += " auto hostBuf_"+i.first+" = alpaka::allocBuf(hostAcc,"+ slength+");\n"; + fGC += " std::memcpy(alpaka::getPtrNative(hostBuf_"+i.first+"), tensor_"+i.first+", "+slength+"* sizeof(float));\n"; + fGC += " alpaka::memcpy(queue, deviceBuf_"+i.first+", hostBuf"+i.first+", "+slength");\n"; + } else if (i.second.type() == ETensorType::DOUBLE) { + fGC += " auto hostBuf_"+i.first+" = alpaka::allocBuf(hostAcc,"+ slength+");\n"; + fGC += " std::memcpy(alpaka::getPtrNative(hostBuf_"+i.first+"), tensor_"+i.first+", "+slength+"* sizeof(doub;e));"; + fGC += " alpaka::memcpy(queue, deviceBuf_"+i.first+", hostBuf"+i.first+", "+slength");\n"; + } else if (i.second.type() == ETensorType::INT64) { + fGC += " auto hostBuf_"+i.first+" = alpaka::allocBuf(hostAcc,"+ slength+");\n"; + fGC += " std::memcpy(alpaka::getPtrNative(hostBuf_"+i.first+"), tensor_"+i.first+", "+slength+"* sizeof(int64_t));"; + fGC += " alpaka::memcpy(queue, deviceBuf_"+i.first+", hostBuf"+i.first+", "+slength");\n"; + } else { + std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be read from a ROOT file"); + } + } + } long RModel::WriteInitializedTensorsToFile(std::string filename) { // Determine the file extension based on the weight file type diff --git a/src/SOFIE_core/src/RModel_Base.cxx b/src/SOFIE_core/src/RModel_Base.cxx index d4d1f1c..a3392d8 100644 --- a/src/SOFIE_core/src/RModel_Base.cxx +++ b/src/SOFIE_core/src/RModel_Base.cxx @@ -58,6 +58,38 @@ void RModel_Base::GenerateHeaderInfo(std::string& hgname) { } } +void RModel_Base::GenerateHeaderInfo_GPU_ALPAKA(std::string& hgname) { + fGC += ("//Code generated automatically by TMVA for ALPAKA Inference of Model file [" + fFileName + "] at [" + fParseTime.substr(0, fParseTime.length()-1) +"] \n"); + // add header guards + hgname = fName; + std::transform(hgname.begin(), hgname.end(), hgname.begin(), [](unsigned char c) { + return std::toupper(c); + } ); + hgname = "SOFIE_" + hgname; + fGC += "\n#ifndef " + hgname + "\n"; + fGC += "#define " + hgname + "\n\n"; + for (auto& i: fNeededStdLib) { + fGC += "#include <" + i + ">\n"; + } + for (auto& i: fCustomOpHeaders) { + fGC += "#include \"" + i + "\"\n"; + } + fGC += "#include \n"; + fGC += "#include \n"; + fGC += "#include \n"; + + // for the session we need to include SOFIE_Common functions + //needed for convolution operator (need to add a flag) + fGC += "#include \"SOFIE/SOFIE_common.hxx\"\n"; + if (fUseWeightFile) + fGC += "#include \n"; + // Include TFile when saving the weights in a binary ROOT file + if (fWeightFile == WeightFileType::RootBinary) + fGC += "#include \"TFile.h\"\n"; + + fGC += "\nnamespace SOFIE_" + fName + "{\n"; +} + void RModel_Base::OutputGenerated(std::string filename, bool append) { // the model can be appended only if a file name is provided if (filename.empty()) { From fc9846cfbb5c94b30b35dc91fa03465ac12f79a6 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Fri, 11 Apr 2025 14:57:12 +0200 Subject: [PATCH 02/43] GPU ALPAKA Support in GEMM --- src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx | 108 ++++++++++++++++++++ src/SOFIE_core/src/RModel.cxx | 8 +- 2 files changed, 112 insertions(+), 4 deletions(-) diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx index 046bf56..b6901f0 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx @@ -289,6 +289,28 @@ namespace SOFIE{ return out.str(); } + std::string GenerateInitCode_GPU_ALPAKA() override { + std::stringstream out; + // generate initialization code for broadcasting of bias tensor + if (fShapeC.size() != fShapeY.size() && fNC != fNC2) { + // we broadcast here always C in Y output, so target shape is the one of Y + // no need to call UTILITY::UnidirectionalBroadcastShape. + // here in case of parametric shape we need to assume that the parameters will be defined in the initialization code. + auto targetShape = fShapeY; + // include a separate scope to avoid defining unique operator temp variables + out << "//--- broadcast bias tensor " << fNC << "for Gemm op\n"; + out << SP << "{\n"; + out << " float * data = SOFIE::UTILITY::UnidirectionalBroadcast(tensor_" + << fNC << "," << ConvertShapeToString(fShapeC) << ", " << ConvertDynamicShapeToString(fShapeY) << ");\n"; + auto length = SOFIE::ConvertDynamicShapeToLength(fShapeY); // output size + out << SP << SP << "auto hostBuf_"<< fNC2 << " = alpaka::allocBuf(hostAcc,"+ length +");\n"; + out << SP << SP << "std::memcpy(alpaka::getPtrNative(hostBuf_"<< fNC2 <<"), data, "<< length << " * sizeof(float));\n"; + out << SP << SP << "alpaka::memcpy(queue, deviceBuf_"<< fNC2 << ", hostBuf_"<< fNC2 << " , "<< length << ");\n"; + out << SP << "}\n"; + } + return out.str(); + } + std::string Generate(std::string opName) override { opName = "op_" + opName; @@ -389,6 +411,92 @@ namespace SOFIE{ return out.str(); } + std::string Generate_GPU_ALPAKA(std::string opName) override { + opName = "op_" + opName; + + if (fShapeA.empty() || fShapeB.empty() || fShapeY.empty() || (fNC != "" && fShapeC.empty())) { + throw std::runtime_error("TMVA SOFIE Gemm Op called to Generate without being initialized first"); + } + std::stringstream out; + out << "\n//--------- Gemm_GPU_ALPAKA\n"; + out << SP << "char " << opName << "_transA = " << (fAttrTransA ? "\'t\'" : "\'n\'") << ";\n"; + out << SP << "char " << opName << "_transB = " << (fAttrTransB ? "\'t\'" : "\'n\'") << ";\n"; + // need to consider case A and B have dim > 2 (for MatMul) + int64_t dimA = fShapeA.size(); + int64_t dimB = fShapeB.size(); + int64_t dimY = fShapeY.size(); + if (dimA != dimB || dimA != dimY) { + throw std::runtime_error("TMVA SOFIE Gemm(MatMul) has invalid shape for inputs or output"); + } + auto m = (fAttrTransA ? fShapeA[dimA-1].GetVal() : fShapeA[dimA-2].GetVal()); + auto n = (fAttrTransB ? fShapeB[dimB-2].GetVal() : fShapeB[dimB-1].GetVal()); + auto k = (fAttrTransA ? fShapeA[dimA-2].GetVal() : fShapeA[dimA-1].GetVal()); + std::vector sY = {fShapeY[dimY-2], fShapeY[dimY-1]}; + // extra dimensions in case of stacked MatMul + std::vector sA; + for (int64_t i = 0; i < dimY-2; i++) { + sA.push_back(fShapeY[i]); + } + auto lengthGemm = ConvertDynamicShapeToLength(sY); // size of the Gemm operation + auto lengthExtra = ConvertDynamicShapeToLength(sA); // extra length in case input tensors are of dim>2 (MatMul) + + out << SP << "int " << opName << "_m = " << m << ";\n"; + out << SP << "int " << opName << "_n = " << n << ";\n"; + out << SP << "int " << opName << "_k = " << k << ";\n"; + out << SP << "float " << opName << "_alpha = " << std::setprecision(std::numeric_limits::max_digits10) << fAttrAlpha << ";\n"; + out << SP << "float " << opName << "_beta = " << std::setprecision(std::numeric_limits::max_digits10) << fAttrBeta << ";\n"; + out << SP << "int " << opName << "_lda = " << (fAttrTransA ? m : k) << ";\n"; + out << SP << "int " << opName << "_ldb = " << (fAttrTransB ? k : n) << ";\n"; + + // case bias is present + if (!fNC.empty()){ + if (fNC2 == fNC) { + // add a check in case broadcasting was not needed or done outside of session + // C should have smaller dimension of Y + if (!fIsDynamic) { + if (std::stoi(lengthGemm) != static_cast(ConvertShapeToLength(fShapeC))) + throw std::runtime_error("TMVA SOFIE Gemm Op " + opName + " Bias tensor has not correct size " + + ConvertShapeToString(fShapeC) + " output length " + lengthGemm); + } else { + // add a dynamic check (C should not be a dynamic tensor) + out << SP << "assert(" << lengthGemm << " != " << ConvertShapeToLength(fShapeC) << ");\n"; + } + } + } else { + //in this case fAttrBeta needs to be equal to zero otherwise second time we run we will use + // the previous result + if (fAttrBeta != 0) { + throw std::runtime_error("TMVA SOFIE Gemm Op " + opName + " Bias tensor is not present but beta value in Gemm is not zero"); + } + } + + // include MatMul case where we stack the Gemm operations + // exclude case where we have only 1's in the additional dims + bool doStackMul = dimY > 2 && ( fIsDynamic || std::stoi(lengthExtra) > 1); + if (doStackMul) { + out << SP << "size_t " << opName << "_yoffset = 0;\n"; // needed if we stack the gemm operations + out << SP << "for (int i = 0; i < " << lengthExtra << "; i++){\n"; + out << SP; + } + // in the case of bias + if (!fNC.empty()){ + out << SP << "std::copy(" << "tensor_" << fNC2 << ", " << "tensor_" << fNC2 << " + " << lengthGemm << ", " + << "tensor_" << fNY; + if (doStackMul) out << " + " << opName << "_yoffset"; + out << ");\n"; + } + + + if (fType == "float"){ + out << SP << "Kokkos::View kokkos_dev_"< kokkos_dev_"< kokkos_dev_"< GetBlasRoutines() override { return { std::string("Gemm"), std::string("Gemv") }; } }; diff --git a/src/SOFIE_core/src/RModel.cxx b/src/SOFIE_core/src/RModel.cxx index e2bc530..5b6a793 100644 --- a/src/SOFIE_core/src/RModel.cxx +++ b/src/SOFIE_core/src/RModel.cxx @@ -827,7 +827,7 @@ std::string createOutputTensor(RModel const &rmodel, std::string const &name, bo } // namespace -void RModel::GenerateOutput() { +void RModel::GenerateOutput_GPU_ALPAKA() { if (fVerbose) std::cout << "Generating main inference code for " << fName << std::endl; @@ -871,7 +871,7 @@ void RModel::GenerateOutput() { for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) { if (fVerbose) std::cout << "Generating code for operator .... " << op_idx << std::endl; - fGC += (fOperators[op_idx]->Generate(std::to_string(op_idx))); + fGC += (fOperators[op_idx]->Generate_GPU_ALPAKA(std::to_string(op_idx))); } fGC += SP + "return {"; @@ -1092,13 +1092,13 @@ void RModel::GenerateSessionCode_GPU_ALPAKA() // add here initialization code for operator for (size_t id = 0; id < fOperators.size(); id++) { - fGC += fOperators[id]->GenerateInitCode(); + fGC += fOperators[id]->GenerateInitCode_GPU_ALPAKA(); } fGC += "}\n\n"; } // generate the inference code - GenerateOutput(); + GenerateOutput_GPU_ALPAKA(); // end of session if (fUseSession && !fIsGNNComponent) { From 419b3543b49a31939887ca72b44896115f34f99e Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Fri, 11 Apr 2025 16:17:54 +0200 Subject: [PATCH 03/43] fix: errors with the generation function --- .vscode/settings.json | 73 ++- Linear_16.dat | 40 ++ Linear_16.hxx | 658 ++++++++++++++++++++ src/SOFIE_core/inc/SOFIE/RModel.hxx | 15 + src/SOFIE_core/inc/SOFIE/ROperator.hxx | 2 + src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx | 7 +- src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx | 25 + src/SOFIE_core/src/RModel.cxx | 51 +- src/SOFIE_core/src/RModel_Base.cxx | 5 +- 9 files changed, 844 insertions(+), 32 deletions(-) create mode 100644 Linear_16.dat create mode 100644 Linear_16.hxx diff --git a/.vscode/settings.json b/.vscode/settings.json index eb254be..381ce8f 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,6 +1,77 @@ { "files.associations": { "*.icc": "cpp", - "limits": "cpp" + "limits": "cpp", + "cctype": "cpp", + "clocale": "cpp", + "cmath": "cpp", + "cstdarg": "cpp", + "cstddef": "cpp", + "cstdio": "cpp", + "cstdlib": "cpp", + "cstring": "cpp", + "ctime": "cpp", + "cwchar": "cpp", + "cwctype": "cpp", + "array": "cpp", + "atomic": "cpp", + "bit": "cpp", + "*.tcc": "cpp", + "bitset": "cpp", + "compare": "cpp", + "complex": "cpp", + "concepts": "cpp", + "cstdint": "cpp", + "deque": "cpp", + "map": "cpp", + "set": "cpp", + "string": "cpp", + "unordered_map": "cpp", + "unordered_set": "cpp", + "vector": "cpp", + "exception": "cpp", + "algorithm": "cpp", + "functional": "cpp", + "iterator": "cpp", + "memory": "cpp", + "memory_resource": "cpp", + "numeric": "cpp", + "optional": "cpp", + "random": "cpp", + "regex": "cpp", + "string_view": "cpp", + "system_error": "cpp", + "tuple": "cpp", + "type_traits": "cpp", + "utility": "cpp", + "fstream": "cpp", + "initializer_list": "cpp", + "iomanip": "cpp", + "iosfwd": "cpp", + "iostream": "cpp", + "istream": "cpp", + "new": "cpp", + "numbers": "cpp", + "ostream": "cpp", + "sstream": "cpp", + "stdexcept": "cpp", + "streambuf": "cpp", + "cinttypes": "cpp", + "typeinfo": "cpp", + "charconv": "cpp", + "chrono": "cpp", + "condition_variable": "cpp", + "list": "cpp", + "ratio": "cpp", + "future": "cpp", + "mutex": "cpp", + "semaphore": "cpp", + "shared_mutex": "cpp", + "span": "cpp", + "stop_token": "cpp", + "thread": "cpp", + "cfenv": "cpp", + "variant": "cpp", + "format": "cpp" } } \ No newline at end of file diff --git a/Linear_16.dat b/Linear_16.dat new file mode 100644 index 0000000..873ce7d --- /dev/null +++ b/Linear_16.dat @@ -0,0 +1,40 @@ +tensor_8weight 2500 +0.0268758684 0.139096066 0.0821818858 -0.127417535 -0.0831027254 0.109001353 -0.0448572189 0.0432091393 -0.100685023 -0.0782502964 -0.0569691472 -0.0834055692 -0.0914414823 -0.00128868222 0.114371844 0.157571077 -0.0249715224 -0.0275524613 -0.106611423 0.160815567 0.0850525424 -0.0246056858 0.0868391246 0.0197147224 0.0387364663 0.0334140956 -0.0329913124 0.110141195 0.105670758 -0.0897664875 -0.0678865984 0.0182914361 0.146356225 0.0747506022 -0.0347048417 0.0646456406 -0.0683225691 -0.0967762694 0.144724965 0.0968451351 -0.049604129 -0.0246048607 0.0982864648 -0.00104637037 -0.0540190488 0.02299482 -0.0587500408 0.162345782 -0.0178857595 -0.114502899 -0.0277074426 0.0523337275 -0.0407291614 -0.125408962 -0.0477996059 -0.144638136 -0.141282856 0.107945614 -0.0642622635 0.106897406 0.141127169 0.00702024298 0.115400836 0.0949773341 -0.0841375515 -0.029037755 -0.12251503 -0.113417722 0.152951673 -0.052355133 0.125115007 0.11263705 -0.0993821546 -0.100654982 0.13138859 -0.121526435 0.0688993633 0.0602294281 0.0230368655 -0.119217426 -0.131345615 -0.0928916186 0.0589227341 -0.0877812058 -0.0575322062 -0.0479355939 0.119958326 0.0839382187 0.0134669729 -0.120720349 -0.0728492588 -0.0201216638 -0.0426205285 0.0580729693 -0.0317371115 -0.0582037121 -0.058949165 -0.0108661382 -0.0596015975 0.0923921913 0.106575489 -0.00681856275 0.0882440805 -0.0621290579 -0.0726372078 -0.00623785472 0.0285876859 0.0697654709 0.0963460952 -0.0578520186 -0.0386559479 0.0133705661 -0.0272551179 0.0195438117 0.0346884355 -0.00187640428 0.0641605407 0.176762238 0.159317046 -0.0952501073 0.0985514522 -0.0871972367 -0.0842028037 -0.0397452265 0.17574358 -0.0538034029 0.136749208 -0.0399385504 0.068287164 0.0217975918 -0.132272243 -0.0182651877 0.105293095 0.00358554721 0.00108983321 -0.153865114 0.0111923162 0.168782786 0.0969837084 0.0112454593 -0.0346569866 -0.0916731507 -0.00954194739 0.154270783 -0.0877914429 0.0242955964 -0.0126784407 0.121400051 -0.0289624184 0.0689913705 0.0434036702 0.0646613985 0.0640042126 -0.0474287085 0.073149845 0.156802103 -0.041823104 0.0810273662 0.179701 -0.0470410772 -0.0788431466 -0.147018611 0.053253185 -0.0240673199 -0.0210381355 -0.0958639532 -0.0170583278 0.0516901463 -0.111291543 0.00283904956 0.142500415 0.141777232 0.126547039 0.129214615 0.0729232654 -0.0321790762 -0.143716827 -0.00954447314 0.172390178 0.0871036574 0.000518912973 0.103541978 0.00573523017 0.100544035 0.167853162 0.157549649 0.117853075 -0.0903918445 -0.00601014355 0.0462133735 -0.119286336 0.111245058 0.00672465004 -0.035454426 0.184626952 -0.0521864779 0.180116341 -0.0577540956 -0.0600765273 0.151180387 -0.0436708629 -0.119430825 0.163085073 0.0715407208 0.0878540799 0.0810162574 -0.0718293861 0.12325272 -0.0860322118 -0.122137249 0.00682034623 0.158898726 -0.109563902 -0.140805796 0.144035459 0.0911271796 -0.0533853769 0.158740863 -0.12681675 0.0929608271 -0.0734888241 -0.0542239025 -0.0844008029 -0.0349548869 0.0194364432 0.00317873154 -0.0542409308 -0.110601485 -0.0040136571 0.126150146 -0.0695957989 0.135092571 0.0206705686 0.0210149139 -0.175603613 -0.0048725917 0.0448048264 0.0753361583 -0.0960626155 -0.00809389353 0.00274648191 0.118843384 0.0924557075 -0.0390414186 0.104679853 -0.02499073 0.115688451 0.183578849 -0.0963831246 -0.0192914438 0.0245501548 0.132269129 0.0528996326 0.126745895 0.0650902316 0.142014235 -0.109039702 0.110978663 -0.168853745 -0.0998671725 -0.0272130556 -0.0755283609 -0.145343035 0.0856398046 -0.0465832911 0.156254068 -0.00759668648 0.0660862029 -0.128243685 0.123390384 -0.0730970129 0.0211296733 0.172928646 -0.0569610596 0.162485525 0.0430793129 0.148347437 0.0230896771 0.0979775786 0.0892469361 0.114915423 0.102117866 0.114598379 -0.0385860801 0.104682192 0.05711741 0.183385804 0.114157908 0.0803671777 0.121530138 0.0750841424 -0.0201519765 0.0892636031 -0.00840737578 -0.0380099379 0.0670845732 0.173552945 -0.0446153358 0.0231650621 -0.0720840693 0.104690835 0.113704339 0.0918174759 -0.025853835 0.0474199951 -0.0122872479 0.0429795273 -0.0552103594 0.00883762538 0.151448011 -0.0409595668 0.0949078426 0.0689887926 0.107225835 0.0818655714 0.0588729456 -0.0576343685 -0.0948910415 -0.0496109203 -0.10446807 0.187580436 0.165386483 -0.0410638899 0.109590538 -0.0093578482 -0.168643042 0.0453341343 0.0740079209 -0.0932418108 -0.136097178 0.0847565904 0.016970491 -0.196696535 0.0213545114 0.146830887 0.188490406 -0.104602233 0.102813676 -0.020301817 0.0581303798 0.154687196 0.0931936353 0.0230271649 -0.0598398224 0.00981738791 0.00165832066 0.130958691 -0.141922146 -0.0931093395 -0.0452646948 0.0832985789 -0.0752738565 0.108650707 -0.0635550246 0.161715269 -0.048298005 -0.0519500524 0.111362822 -0.0297681484 0.0919308066 0.00461465074 -0.123445027 -0.0583725758 0.0877097845 -0.0828031972 -0.0494801551 -0.0178236663 0.110060342 -0.104191855 0.00495020067 0.190953419 0.175235912 0.0748231113 -0.0354038626 -0.0866233632 0.0114633273 -0.0709704086 -0.0408563316 -0.00744438358 -0.12112467 -0.00320398994 -0.109712012 -0.13769187 -0.0328272358 0.091612272 0.14105013 0.146673918 -0.0514545403 0.202921212 0.0834511071 0.160404101 0.0601333193 0.0581494831 0.083636649 0.0984802917 0.0609594397 0.128031611 -0.0596118644 0.112030312 0.175436005 -0.0210987478 0.100863054 0.141590253 -0.014728453 -0.0364963971 0.0035578683 -0.0124314548 0.0569810681 0.0548986979 0.130580813 0.103102274 -0.0677055791 -0.116339654 0.129263833 0.162418574 0.122197464 -0.0109819323 0.0938043669 -0.0469912067 -0.101457044 0.131629422 -0.0877847597 -0.0320621915 0.0457580797 0.0759001374 -0.0854525268 0.0624620654 0.088354066 0.0471264385 0.122950502 -0.0319194868 0.0971357599 0.144149795 0.0968611538 -0.0324465856 -0.13455525 0.0447516218 -0.0679218769 -0.0809827521 0.0494714826 -0.0949900225 0.0311798677 -0.00109984947 0.174830928 -0.0281612556 0.163149565 0.0736394823 -0.0375521332 0.00539422035 -0.0927275494 -0.0925532579 0.0742847919 0.0994291157 0.127749816 0.0300972443 -0.0191503167 -0.0972991213 0.0944213718 -0.0106646148 0.0151962861 0.00275415881 0.0332029015 -0.0985995755 -0.0955503657 0.0529588386 -0.0463228486 -0.139574915 -0.105905958 0.0530111678 -0.153271616 0.00135927019 -0.018976111 0.0405978933 0.0479904711 0.0545446351 -0.114191957 0.141731873 -0.132812411 -0.0630234033 0.0933084786 0.0396189578 -0.0473725162 -0.0290426835 -0.123914912 -0.0582598001 0.152059436 0.0548362397 -0.0321423411 0.115950227 -0.120880082 0.184676751 -0.0445445627 -0.103703029 0.0245305933 -0.00212677591 -0.0626897737 -0.0121289967 0.0785561725 -0.0832984447 0.0998352543 0.117086425 -0.0671990365 -0.0363239795 0.0353550613 0.114468403 0.143954277 0.105897352 0.0256107412 0.156521618 0.0780752227 -0.0554250963 0.0736213177 -0.10541296 0.0503535867 0.00255402969 0.0666635558 0.129061893 -0.0195398014 0.0478001311 0.065228425 -0.0979058444 0.0814248547 0.108272545 -0.00994789507 -0.0218796581 -0.154623747 0.0106207961 -0.12293978 -0.00427472685 -0.126063108 -0.0116878618 0.100741506 -0.0546985939 -0.0451277271 0.0930468291 -0.0851750597 -0.0140047939 -0.123040549 0.132812724 0.0833404511 0.140870854 -0.120734856 -0.0805390403 -0.0502453148 -0.0170761105 0.00345065887 -0.0480272733 -0.0561171696 0.0876882076 0.0613627955 -0.0316582024 0.0100890994 0.0458408594 -0.135604486 -0.0192864686 -0.01957082 0.0726629794 -0.0564594492 -0.0693246424 0.0831580311 -0.123946451 0.0842915326 0.027340591 0.123881891 0.0634962171 0.0854125172 0.0679267496 0.0966168046 -0.029469654 0.0381903499 0.111267343 0.140807226 -0.13526763 0.0522036403 -0.115617715 0.0543578118 0.0461016595 0.103722617 0.0174795687 -0.136987507 -0.0600835234 0.0538240522 0.0903360397 0.0172370523 -0.0140279233 -0.114186123 0.0943125635 -0.0614755452 -0.0279850513 0.083064124 -0.12880753 -0.13455835 -0.0599042326 -0.0389251933 -0.0604324266 -0.0513332263 -0.0554876402 0.0233900547 -0.0640518144 0.111689016 0.0502607375 -0.00419057906 -0.0730830133 -0.0277305022 0.171559766 0.0534306914 0.00674414961 -0.107875511 -0.0510217324 -0.0838860199 -0.15236254 -0.138948157 -0.125519603 0.0523681492 -0.0187952798 0.114655808 0.0474532545 0.0917048305 -0.0550882407 0.0838057324 0.18853642 0.142427206 0.180868432 0.140680373 -0.0942524076 0.123159751 0.0897716284 -0.0308326464 0.0049529071 0.13588357 0.0297236629 -0.0363686383 -0.0588090122 0.12634854 0.0122025581 0.186096713 0.0920768976 0.0481046252 0.0876177624 0.0250588302 -0.0850643218 -0.0529115237 0.029226495 -0.0699693412 0.150488198 -0.0428842455 0.179663286 -0.0198406726 0.0218468606 0.197559595 0.0729278922 0.0885386169 -0.133979425 0.0167944431 -0.0360915139 0.0497089326 -0.0268492103 -0.0587182194 0.0121284872 -0.00810500979 -0.0885604918 -0.0682897642 -0.109051131 0.101431355 -0.10556107 -0.0689118356 0.0273847431 0.123891041 -0.0328962579 -0.0183387175 0.0236377716 -0.126516774 -0.027949512 0.125757441 -0.137005895 0.0159674287 -0.0881164894 0.0896662176 -0.0548697859 -0.0910438597 -0.126777187 0.0143643618 -0.0796068907 -0.0773626193 -0.0353754535 0.0982186347 0.102850467 0.0936983526 0.0350374728 0.0642853081 -0.0353903249 0.0034533143 -0.0836362615 -0.0474314392 0.137183502 0.00499179959 -0.0352529734 -0.12372198 0.0710547566 -0.0847075656 0.108061433 0.0962944925 -0.0228818804 0.0236922354 0.0593082607 -0.0698251426 -0.0753812417 -0.0950560495 -0.0748883784 -0.139509365 -0.0391269475 0.117235079 -0.0770111158 0.0286441594 -0.0478565544 0.0810799748 -0.0450968295 -0.0848289505 -0.0374233201 -0.0248766541 -0.0257886276 -0.00540667772 -0.131286308 -0.125986263 0.0405903906 -0.0291525051 -0.0074609369 -0.0744228065 0.0589668602 -0.0275227204 0.134439722 -0.116755374 -0.0779221952 0.0212557018 0.126257434 0.105313227 0.120735362 -0.0692541525 -0.0584569424 -0.108607799 -0.0476316065 -0.0588775352 0.0463445932 -0.133415371 -0.128379583 0.121760055 -0.0548802391 -0.0722203329 0.0508697033 -0.079833433 0.119531378 -0.0217971876 -0.105434492 -0.0522313938 -0.0453321934 0.107274927 0.0276630223 0.131097168 0.079335019 -0.114221223 0.0391028263 0.128627002 -0.0898075253 0.0599811226 0.072371535 0.0517965741 -0.0948484987 -0.00232080673 -0.120201647 -0.1168992 -0.163116753 0.101379991 -0.0693345442 -0.0656319857 0.0136408824 -0.0277835261 0.0546165146 -0.00200848537 0.105287716 0.021810092 0.101102382 -0.0842717886 -0.020271264 -0.121380635 0.0648328215 0.0722329915 0.0304982048 -0.0108427657 -0.0313236415 0.0242884308 0.0848189518 -0.00415426493 -0.129282877 -0.0663083941 -0.0568652116 -0.0136977984 0.0484237522 -0.136208966 -0.0747673362 -0.00170940161 0.00959950686 -0.0287488401 0.174732566 0.0778143853 -0.0412021503 0.138728648 -0.12335252 0.0248393398 -0.00735486019 -0.0928628147 -0.0812815279 0.125593081 0.0110786557 0.124990925 0.150338039 0.0616421662 -0.0968330279 0.11320933 0.116342612 -0.00344289024 0.141467705 -0.072798416 0.121146008 -0.0969213247 -0.0562434942 -0.0969665498 0.0179323703 -0.0307174679 0.0410963222 0.0908566862 -0.0271566976 0.181122735 0.176294565 -0.0137444139 -0.152425051 0.0303653441 -0.0654244274 0.103337444 0.161812425 -0.114469662 0.0337155983 0.0851140097 0.0473025665 -0.0455731675 0.0910733119 0.0064521106 0.187958792 0.167304024 -0.127127901 0.00861696992 -0.0400827006 -0.140295923 0.0512709506 0.0780323595 -0.0932431147 0.0957963392 -0.125637099 -0.0162038952 0.00915290881 0.0997759104 0.0987372771 0.16382876 -0.056870617 -0.139243662 -0.071242094 -0.0608208477 0.107261404 0.0251677446 -0.0958002061 -0.0900856256 0.0601827726 0.107842483 -0.0984033346 -0.00783828646 0.0255061835 0.00474396348 -0.0694380254 0.0950763747 -0.0441939719 0.0136436457 0.108352683 0.137562498 0.0213271081 0.0454172641 -0.0874122232 -0.089138791 0.0275239777 -0.0769107938 -0.0700656921 0.131777659 0.175489351 -0.0777074322 -0.00239577657 -0.00230550254 0.167611465 0.0103928242 -0.0727633685 -0.0352996625 0.00823523104 -0.0106461262 0.0824658424 -0.0121006668 -0.0598732941 -0.0662225783 0.0269689541 0.0804088712 0.138990924 0.149531111 -0.0406282917 -0.126480639 -0.025079472 0.0510983169 0.035402365 0.08281295 0.156712428 -0.026096575 -0.0651845187 -0.0323777311 -0.105685644 0.0783127025 0.0188494585 0.0856304839 -0.046786584 -0.0739144981 0.0625574216 0.127959684 -0.0416722037 0.114131734 -0.04018737 0.0335959457 -0.0786943138 -0.0593536906 0.000781891402 0.0818767101 0.07887806 0.0942715183 0.178498864 0.14772743 0.00345369685 -0.0423939079 -0.0205054302 0.123664357 0.0551863275 -0.177257061 0.114078067 0.0455558784 -0.0323475748 -0.112341911 0.0721865445 -0.0341178104 -0.0914598405 0.0694510341 -0.0585612506 -0.0373541526 -0.205118358 -0.0179533362 0.0257616416 0.18985191 0.101283662 0.0620856099 0.163825974 0.150054261 0.0351246744 -0.0134136677 0.0362584144 0.0490719676 -0.0219044462 0.100722261 -0.0236032922 -0.0624775924 -0.18531242 -0.0643399507 0.0405745842 0.0175180174 0.123290591 0.074898921 0.0684316009 -0.0228197258 0.138146341 -0.0247859173 -0.136237904 0.0807761028 0.190366209 -0.0662142709 0.0290480666 -0.0762866884 0.106889285 0.00406613294 0.00212845136 -0.0551334918 0.176173732 -0.000862196088 0.0479077958 0.12893793 0.0908120275 0.0428063385 -0.105808966 0.0208340362 -0.0391079783 -0.17646575 -0.0161272287 -0.0779476464 -0.139349103 0.132013753 0.0993892252 0.064087227 0.131257027 -0.0114984009 -0.120081656 0.0867618024 0.0161269289 0.0568408556 -0.0086016655 -0.00697259605 -0.127590686 0.00164337456 -0.106980473 -0.0617386699 -0.093155548 -0.0321060345 0.0353029482 -0.144390106 -0.041361127 -0.102071285 -0.0588951148 0.0818923414 -0.127334356 0.0141031453 -0.111001149 -0.123913996 -0.0247361958 -0.0820739791 -0.030570088 0.127384081 0.0231190175 -0.10356193 -0.139310062 -0.0380821303 -0.0285825692 0.126087889 -0.066886954 0.0766842216 -0.135645509 -0.0953988656 0.131382018 -0.12620239 -0.0145515203 0.134063303 0.0396169424 0.0967397094 0.119111016 -0.0184818357 0.176523507 -0.0199789405 0.0826793611 -0.110192202 -0.0409205034 0.00472770026 -0.14348729 0.147804692 0.0450261496 0.0670832992 -0.0345766172 0.126415744 0.00601782696 0.104479343 0.0414096117 -0.0710287988 0.112614326 -0.147158608 -0.0370420963 -0.119263552 0.126887798 -0.105801471 -0.00610316033 -0.105143495 0.196164653 -0.0159037225 -0.0744655356 0.132361174 0.0196442343 -0.0159279685 0.126357719 -0.029065378 0.0336539075 -0.168783128 0.0148825208 0.0555515438 0.0461699739 0.102379352 0.155959725 -0.137117967 0.0191216022 0.222972959 0.125019222 -0.0988391787 -0.0180195421 -0.00158139609 0.029359296 -0.142667904 0.103885561 -0.105973668 0.0307869632 0.00780402496 -0.0674499497 0.114393353 -0.127377525 0.00557687134 -0.061907284 -0.124339581 0.0993482098 -0.0195321329 -0.0585047677 0.0953318775 0.0480449647 -0.0227444768 0.0408569276 0.0562379509 0.0313135199 -0.132302389 0.125221208 -0.0873878524 -0.111026652 0.115592606 -0.0906311348 -0.00652401475 -0.127266601 -0.144731417 0.0330261067 -0.135340631 -0.1036596 0.0953472406 0.0560712516 -0.135941952 -0.040515393 -0.0903434008 -0.0961870179 0.119984761 -0.023610061 0.136370555 -0.0166805629 0.127074108 -0.0724001899 -0.0508536957 0.115113258 -0.0258387104 0.0489959568 -0.142882243 0.104940325 -0.109118342 -0.0262665749 -0.139981106 0.15954946 -0.00940728188 0.0964377075 0.0164540596 0.0535212867 0.189132586 -0.035037268 -0.0510806404 -0.0423220247 -0.0387359485 0.0559357852 -0.0354634076 0.174487337 -0.0849912167 -0.124549776 0.0769607276 -0.0557537489 -0.0945133492 0.152356565 -0.0749799982 0.124006495 -0.11373242 0.0692153648 -0.0678370595 -0.0376192741 -0.0456925295 0.0221248977 0.0522562191 -0.098749496 0.0200695693 0.122223869 -0.0449365303 0.0366582051 -0.135746583 -0.0222668201 -0.00500035612 0.0176082794 0.128107294 -0.0137729133 0.0214566886 0.110726222 0.150741264 -0.0353633799 0.159352034 0.0344046839 0.105027668 -0.0436317027 -0.140568197 -0.101473704 -0.032741949 -0.0106838001 -0.0316685364 -0.129210651 -0.0679190904 0.133524075 0.123635188 0.10515888 -0.0406672806 -0.0610394813 0.091179423 -0.110931419 0.079060778 0.116783403 -0.0516342819 -0.13402909 0.0207334459 0.0812019557 -0.0820832253 0.0703516304 -0.084283106 -0.143779725 0.0990532935 -0.0511374213 -0.0828005821 -0.0313504227 0.108964168 -0.0947234705 -0.129062966 -0.0215799771 0.0714171082 -0.013629063 0.100170761 0.121061251 -0.122967482 0.0534396805 -0.104779765 -0.0699278712 0.0862568319 0.0739753321 0.0636002868 0.113115802 -0.0251864307 0.0644432828 0.00374182384 0.00278152619 0.0277899243 -0.0400727838 -0.0934138894 0.0662064999 0.0740315318 0.156935647 -0.125642329 -0.0181016717 0.0717086047 -0.0788133815 -0.127949879 -0.0690763518 0.0889543295 -0.172350034 -0.100477748 0.00906703342 -0.0588162839 0.0321615078 0.143424392 0.0124900788 -0.0917625949 -0.0731047541 -0.0883597806 0.193930492 -0.0296085142 0.153995425 0.056007009 0.0701433122 0.0177569669 -0.0888565779 -0.0714818016 -0.00891659409 0.14154695 0.186709836 -0.0978443697 -0.0666612759 0.00330674648 0.15638712 -0.0387458205 -0.0156392194 0.029518418 0.177299723 -0.00526926899 -0.03754526 0.142723694 0.0702423528 -0.0628082901 -0.0962519944 -0.0736426339 0.00633251155 -0.0707057118 -0.0702924654 0.0923877209 0.00983795524 0.0308573246 0.117168695 -0.112862423 0.142043695 0.0422373824 -0.0367415026 -0.106207736 -0.0756792426 0.117217235 -0.0436312594 0.0131786875 -0.0440221652 0.097473219 -0.0796951875 -0.125066265 0.124969348 0.0678982735 0.139240772 -0.076055415 -0.025149785 0.022765873 0.00105297181 -0.0667744279 -0.142704338 0.0969702899 -0.131543919 -0.0815857351 0.107110865 0.00967122614 -0.0012585416 0.122922324 -0.0521417297 0.0298166722 0.069311209 0.0601125322 0.0429519527 0.0771004632 0.138341069 0.0400493145 0.0989085436 -0.115988038 0.0790331438 0.0363655277 0.0255561695 -0.127918124 0.0311952345 -0.0788384601 0.0947113633 0.144414648 -0.0196468234 0.0516601503 0.0307283401 0.0900717825 -0.0476232618 0.0064008832 0.0990933776 -0.105707288 0.0693743229 -0.124773592 0.0183412433 0.0536187291 -0.0942984521 -0.108814135 0.0376636833 0.0153515637 0.00406998396 0.0265448689 -0.135168463 -0.120080709 -0.121890008 -0.0570892245 -0.124603435 -0.0809690952 -0.0672037601 0.13564612 0.0371975005 0.112579718 0.014319554 -0.108871549 -0.0871257633 0.0682478845 0.0747066289 -0.0636163577 0.0898959637 -0.090092048 0.129719719 -0.0293056145 0.026599288 0.0581899136 0.0231975913 -0.0209574401 -0.00654032826 0.0904182643 -0.114409715 -0.0957838446 -0.044936955 0.0165019929 0.135205165 -0.117184259 0.0401535928 0.134790704 -0.0130638136 -0.0190193728 -0.0656322092 -0.113029599 0.00841842592 0.0140976086 -0.0268416889 0.0387401059 -0.00842970423 0.153014824 0.0431912951 0.0146862119 0.0648952872 0.159487918 -0.107046999 -0.0957565159 0.0974680409 0.0875119492 -0.106558517 -0.00956180599 -0.10792207 0.0139202345 0.0237691645 0.170654655 0.182866856 0.0337778889 -0.122317858 0.104945458 -0.00241611805 -0.0189588871 0.154625118 0.0883154273 0.0683931634 0.0424042568 -0.0296660978 0.0563843139 0.0966898203 -0.00994552113 0.18443881 -0.146624371 -0.0824042782 0.116611265 -0.108186543 0.0102908229 0.103478007 0.0179831069 0.131399289 -0.0788777545 -0.0424850732 -0.00943686068 0.083427988 -0.137433812 -0.0903602764 -0.105054028 0.124498554 0.088755466 -0.0493076742 -0.027949756 0.0751230642 0.115139447 -0.127784625 -0.0843564868 0.1399187 0.077144593 0.0511633307 -0.031733308 0.00182465685 0.149220034 -0.108540453 0.0384725034 0.0657235011 -0.150511175 0.0859548301 -0.031602826 0.153693542 0.100388199 0.153390184 -0.000678598415 -0.0295467041 0.107720926 0.0790676847 0.075719431 -0.0342444293 -0.105272986 -0.101255842 0.17369619 -0.0664026737 0.0892078429 -0.113553904 -0.0292268191 0.180186689 0.109818101 0.0164926779 0.0922102034 0.0830212384 -0.00248041586 -0.0470679849 -0.165345639 0.12901403 -0.0142368376 0.0695406124 -0.0135625293 -0.106628664 0.00303458911 -0.0594627149 0.0922242925 -0.0427582636 -0.0400496349 0.178693265 0.183653072 0.0575503781 -0.0235571191 0.048922874 -0.00795071851 0.0807336569 -0.0162454005 0.0234919712 -0.102429815 0.0907384083 0.126808628 0.0676393136 -0.167194471 0.0440359078 0.137444958 0.196354747 -0.0737531483 0.0279132333 0.123419479 0.058315713 0.0996660143 0.122060843 -0.0147102922 -0.121966586 -0.0944622681 0.142329201 -0.0832371339 -0.0505962893 0.133063897 0.134808093 -0.0476008505 0.0899724364 -0.00881881081 0.129822224 -0.0857772455 0.0220859721 0.128572404 0.0105826855 0.102347367 -0.0834473595 0.0246756226 0.0283253919 0.0684853047 0.101052776 -0.12661618 -0.086164698 -0.0609710813 -0.0304403771 -0.0863657966 -0.155161664 -0.00632416084 -0.00467219949 -0.137039587 0.111212932 0.166964158 0.0581296235 -0.103098728 0.0281474199 0.0514760315 -0.0765168592 0.0823773816 0.112306684 -0.119962633 0.108718097 -0.0627609268 0.0295355972 0.061364796 0.0683022588 0.164325893 -0.112472534 0.164340407 0.0510179065 0.00963465869 -0.0768766776 -0.04377589 0.096517235 0.148181275 -0.0579664111 0.0587554127 -0.0445416085 -0.03864979 0.00753601873 0.100015543 -0.0362141766 0.133224964 -0.0191601235 0.152707025 -0.0183888227 -0.059442617 0.104400992 -0.0854767412 -0.00690022996 0.0886835605 -0.0273776986 0.130314052 0.105323426 0.161530361 -0.108458608 0.0788582712 -0.0838668495 -0.08755178 -0.102095522 -0.096060887 0.169443905 0.0577232093 0.120626166 -0.0495226867 0.096482262 0.0348549932 0.199682817 -0.0202110633 0.0846792087 -0.0826675221 0.168889627 0.0768956468 0.0698982626 0.0966169164 0.14701435 0.00816824846 0.0694516674 -0.0396548584 0.109372504 -0.0630989447 0.117110327 0.0741739869 0.0132170692 0.100324839 0.00397197716 0.00673523871 0.0452416129 0.0100933397 0.181526616 -0.131816193 -0.0729396716 0.0076587908 0.0789732337 -0.0381261818 -0.082727015 -0.112304315 0.0861935169 0.0106273741 -0.114372075 0.0639646724 -0.0495705158 -0.0882112607 -0.0384016633 0.110038161 -0.0208555609 0.0701313913 -0.0773748457 0.078994669 -0.0506972298 -0.126550719 0.0909916982 0.00305084884 -0.128831208 -0.0751241222 0.134015068 -0.0960550979 -0.0882394835 -0.0782357231 -0.0271630995 -0.0772069469 -0.118653722 -0.0367000699 0.121580288 -0.0561355688 -0.138331473 0.125544876 -0.0296058543 0.0746538565 -0.0162153672 0.062437132 -0.0231160969 0.0841860995 0.0602102727 0.124720961 -0.0469560064 -0.140982583 -0.136888638 -0.0804962814 -0.0440254994 -0.107610121 -0.0446921699 -0.0253842529 -0.0920281038 -0.102073133 0.0864460468 -0.0521458536 -0.0281716138 -0.12248721 0.108246624 0.0153880091 -0.00498063862 -0.0892293677 -0.10190247 0.144978091 -0.0280745663 0.0683950707 -0.0395756029 0.0730759278 -0.00125575683 -0.0320034325 0.0139094684 0.127148211 0.00908912718 0.074735418 0.0604887865 -0.0297355298 0.06545984 -0.0896448418 0.131814942 0.0968025178 0.0828945488 0.0279728677 0.0131593319 -0.117338456 0.0965867341 -0.00240561157 0.10527093 0.0450791791 0.020316802 -0.00228017569 0.00896368176 0.0269619301 0.150818229 0.155888513 0.116388619 -0.137444928 -0.0795316845 0.119856015 0.0243596714 -0.113116957 0.0804922581 -0.0587314703 0.169540256 0.139007777 0.139136776 -0.0298109893 0.0328682661 0.0793280452 -0.132538036 0.10372372 -0.0968293995 -0.048270233 0.0598262101 -0.0930386782 0.00116990507 0.0488549769 0.034680441 0.0733197182 -0.0573430657 0.100507811 0.07086052 -0.101341262 -0.0982980579 0.124785289 0.075397715 -0.106355786 0.00812490284 0.118339553 -0.056385763 -0.073536776 -0.091773212 0.137454107 0.0820370913 -0.0450648963 -0.104219824 -0.0738624409 -0.0940235406 -0.0482450277 0.101593941 0.0674851388 -0.088429369 -0.0648159012 -0.0145470053 -0.138597056 0.086754784 -0.0615454912 0.0661340803 -0.0228997692 0.117914066 -0.0457687825 0.0386412889 -0.139924914 0.0699922591 -0.0633266196 -0.0395022333 -0.0829551816 0.123344138 -0.0269175917 -0.112848774 -0.127718255 0.0982895121 0.00638221437 -0.0981410667 0.0559622087 -0.0958316401 0.0377015024 -0.0980365872 -0.0954344869 -0.0140493829 0.0955453813 0.0694573075 0.132618234 -0.00649338961 0.162486911 -0.0644210577 0.0785710365 -0.0324906185 0.0616738945 0.131774053 -0.065818958 0.171095803 0.17064096 -0.0761323273 0.0750825778 0.0011169787 -0.0321993567 0.0106129069 0.0721838102 0.0231869202 0.0562860221 -0.00459621055 0.0595190637 -0.0840448812 -0.081757158 -0.0908710882 0.0917035788 -0.157435328 -0.0259377975 0.137452871 0.0223723184 0.0868041068 0.120598882 0.00937895011 0.155116081 -0.100786671 0.0592786036 -0.133716181 -0.080596447 0.0356213599 0.00931102037 -0.0950432196 -0.0769308135 0.0528798848 -0.120534495 0.00211757421 0.0449208915 0.0501613319 -0.0820226222 -0.0251336843 -0.0144774914 0.0143801719 -0.0881446749 -0.123702742 -0.0779745206 -0.139141038 0.0531492084 -0.12054643 0.0143575966 0.10409309 0.0117436498 -0.0403948873 -0.110459745 -0.0662581548 -0.1114856 0.00284221768 -0.052574873 0.0127746612 0.0721953511 -0.0164361224 0.0638382882 -0.0129706711 -0.0614550114 -0.060835205 -0.0864404589 -0.132438704 -0.108277529 -0.105507694 -0.0632813722 0.0298161656 -0.0744545162 0.0407374054 0.0746406019 -0.106669813 -0.10373731 0.12445356 0.0397888571 0.0220787525 0.0202515423 0.12489415 0.132801518 0.0626152232 0.0729931891 0.0667951256 -0.0493182763 -0.0674306005 0.0432554148 -0.0183124356 -0.104636811 0.063214466 -0.128902912 0.105252452 0.082741566 0.0974095464 -0.0517911017 -0.022460917 0.0845701918 0.00974364486 0.0755192861 0.122211002 -0.0929605439 -0.0323449485 -0.00275745941 0.13703306 -0.126241356 -0.0444845371 -0.0595683604 0.0420802012 -0.121947996 -0.0962189585 0.034678936 0.133177251 0.0845321864 0.0163415857 0.0860773325 -0.0293091722 -0.0457063392 0.116976917 -0.112045035 -0.114811443 -0.0518591814 0.0787069798 0.00974517968 0.11453367 -0.115365967 -0.0442552418 0.0140983164 -0.0719776675 -0.0414564312 -0.00496765925 -0.0418873541 0.0135209961 0.150040343 0.018279193 0.0670056716 -0.0128589002 0.00129946775 -0.0175037291 -0.10695336 -0.0812502131 -0.00151915848 0.105532125 0.156881258 0.107406408 0.0299301185 0.0623822287 0.105002947 -0.00692248205 -0.0561903454 0.0528496578 0.0645767525 -0.0349360071 0.0887037516 0.0392689034 0.160266221 0.0597992055 0.0948610157 0.0702525973 0.0522093065 0.116412245 0.0978843421 -0.148455024 -0.0602231361 0.0339148492 0.0785273239 0.143222392 -0.1303702 -0.0237084106 0.00480483705 -0.00544637674 0.123185195 0.066207394 0.138298839 -0.0356794894 0.176579177 0.0118068606 -0.0892722458 0.110782906 -0.105181009 0.0947949737 0.0951533318 -0.0448544845 -0.0301951393 -0.0468887351 -0.00123819872 0.101715624 0.0727012604 0.0642970651 -0.0423549041 0.0688230544 0.104760513 0.101078875 -0.0486233123 -0.0383191928 0.00411880249 0.110782482 -0.0667161196 -0.115577795 -0.107555278 -0.0455098785 0.0147148855 -0.0387868471 0.119055025 0.113614053 -0.0650238395 -0.116119511 0.0904611796 0.0928509012 0.036130257 0.063363038 0.088962093 -0.0774177462 0.00342554948 -0.0879331529 0.0105287833 -0.150841638 0.0330097973 -0.101222105 0.000547326345 0.0222531687 0.00177719572 -0.168144733 -0.102801181 0.127630353 -0.0044782632 -0.0718901828 -0.0688694715 -0.107435137 -0.00648547709 -0.137246579 0.116464987 0.0476300418 0.0752717406 0.0731578618 0.100494623 -0.0570759401 -0.0219588652 0.060869351 -0.0204062089 0.119302526 0.110057697 -0.0576427504 0.0296158791 -0.0681548789 -0.0114316642 -0.132044569 -0.0581466183 0.00203379989 -0.113137707 -0.0557623059 -0.0389640704 -0.129291847 0.121296927 -0.00863479078 0.109711155 -0.0720649809 -0.114019588 -0.0326023102 0.047520563 0.13592954 -0.136486098 -0.138339326 -0.130544424 -0.0851323009 0.0625912994 -0.0132746696 -0.0394350886 0.106071725 0.0412790775 -0.0212111101 -0.110249251 0.128628239 -0.00126694143 -0.0719275251 -0.0233325437 -0.0285679474 -0.119621359 0.0375062078 -0.0438100025 0.127980903 0.0915731937 0.0225412827 -0.0376331583 0.0345656835 0.135839269 -0.152139679 -0.00529241795 0.0757251382 -0.0507489964 0.0434143096 -0.109213173 -0.0232270882 0.110102899 -0.11542847 0.178933024 -0.146328598 0.080565691 -0.0281426851 -0.0798788965 -0.0825010538 0.102853604 0.176710308 0.105933264 0.142999679 0.0393511392 0.0469196737 0.155381039 -0.0202247016 0.170517668 0.00554223079 -0.067655623 0.128527895 0.00835985132 0.192383677 0.133679509 0.109696992 0.124087319 -0.0682987794 -0.0266768672 -0.0692853928 -0.15578717 0.111135691 0.152784228 0.182785735 0.115072496 -0.0234794691 -0.14098835 -0.0995724574 -0.0710255876 -0.0245003197 -0.121010661 0.214906275 0.126054928 0.0240032822 -0.0867983475 0.0794893727 -0.0287744384 -0.0114687914 -0.0225537177 0.00640312536 0.0122232735 0.148882598 -0.0123748779 -0.0145422816 -0.0797223598 -0.0824621096 0.050172396 0.197323322 0.0408616215 0.165366396 0.141404614 0.135947406 -0.0240413714 -0.115962207 0.193585813 0.0844455436 -0.0809815899 0.17395325 0.0107643139 -0.0946478769 -0.0715151504 -0.0346882716 0.0626753345 0.181155458 0.146124348 0.050194148 -0.016578801 -0.0884145498 -0.119957604 -0.0384309553 0.0239939895 0.071738176 -0.0269928221 -0.0424483791 0.0305357967 0.129883602 0.143514901 0.133759692 0.0695038289 -0.000178681847 -0.0580186956 -0.0775882527 0.136189267 -0.0727865323 0.0678651482 -0.049817346 -0.0649325028 -0.0088552665 0.156783015 -0.048809994 -0.0406761616 0.158696339 0.0890753791 0.136065736 0.160149634 -0.0645535365 -0.111809649 -0.0370648354 0.194843888 0.0213514157 0.102395862 -0.0400028452 0.0761639178 0.0394547395 0.0327902511 0.162416309 -0.00130897725 0.020087108 -0.0961144641 0.0304949749 -0.045121409 -0.0313251726 0.085803628 0.0291031022 -0.0910456851 0.0660064593 -0.068344146 -0.0507363826 0.0779818743 0.14303115 -0.0030358301 0.0674030483 0.147154242 0.0136561031 0.054678835 0.0916109383 -0.108775541 -0.0925002992 0.0767795593 0.0127289426 -0.0589483082 -0.119000398 -0.121735357 -0.0326918289 0.137503535 0.119247034 0.0430034138 0.0618253574 -0.0975219831 0.0736228079 -0.0372737274 0.153161958 -0.0518422537 -0.0204763189 -0.0608311482 -0.0457191877 0.147904009 0.0655161589 0.000597919687 -0.0326539725 -0.155328959 0.167974561 -0.0343649164 0.130426079 0.00636828598 0.141477138 0.057434544 -0.0446176901 -0.0856851637 -0.0112518054 -0.0844905823 -0.0406574272 -0.153341204 -0.0638041422 0.0856886953 0.0646770895 0.130005434 -0.0040921187 -0.0604991764 0.0163501818 -0.0783527344 0.120934926 0.161637381 0.115128227 -0.014278437 0.0813159347 -0.0724511221 0.0282054543 -0.00229437649 0.0406513065 -0.0661629364 0.0403244048 0.0202239044 -0.0395012945 -0.0349203013 -0.054835394 0.0276283957 0.0147262886 0.171100989 0.0777800605 0.113793746 0.029462589 -0.0530196279 -0.115720108 0.167530239 -0.0646177605 0.163341776 0.0519124195 -0.0436448865 -0.00622291025 -0.118472219 -0.11200767 -0.0293926019 -0.0851374194 -0.0335079357 -0.00235709315 -0.114089273 -0.125252411 0.0811661184 0.143217117 0.0657678992 -0.144908518 0.074894011 0.0680066049 0.0251119025 0.0237030108 0.130361617 -0.117028616 0.0458782166 -0.0999599174 0.0947599187 0.125303924 0.124342829 0.0346639715 0.0599663034 0.00829797983 -0.12561053 0.0641372502 0.076463908 0.0719346106 -0.0685527846 0.0804899856 0.133784577 0.0426442474 0.108722381 -0.120919384 0.0425808728 0.0937603563 -0.0493984073 -0.101851352 -0.0743994713 -0.0168575719 -0.0864764303 0.134012744 -0.0345991999 0.0765475258 -0.0502674989 0.148546934 0.154048041 0.112422884 -0.0310489275 0.0740677267 0.124277003 0.11085771 0.0675311983 0.0243612733 0.0620236471 0.0995759219 0.168544546 0.00101517653 -0.0610326529 0.0783934444 -0.0371061936 0.0923965722 -0.0212610923 0.133722678 0.0999392346 -0.089509137 0.0711376369 0.112333678 0.0204899628 0.17973493 0.0402919464 0.126110092 -0.00392504036 -0.000692084432 -0.0994881168 0.053651616 0.00273740292 -0.0712720156 -0.0218583867 0.00874059927 0.0359456241 0.0621751361 0.000342633168 0.0569748171 -0.0946905017 0.00123212801 0.142329782 -0.0667219386 -0.0533551276 -0.0563121587 0.071681805 0.101482138 0.161976591 -0.106341578 0.186979875 0.0426207557 0.140588462 0.0434038043 -0.0569239818 0.164891273 0.140453205 0.0955060944 0.0860626772 0.151230052 0.187594429 -0.18127653 0.131841645 0.15597482 -0.131366268 -0.165060341 0.128387749 -0.0202594791 0.0415013544 -0.0959378406 -0.0706115887 -0.121529371 0.0975343287 0.0213517249 0.183627069 0.0060459557 0.00643412722 0.113227792 0.169127882 -0.109208152 -0.151426241 -0.00370581448 0.0630536079 0.108941384 0.0786992684 0.0706410259 0.0392542407 -0.127687827 0.0440069884 0.0562533028 0.0949133858 -0.0812414587 0.092598483 0.0175310317 0.0891861469 -0.045317024 0.136054009 0.0259930789 0.00634265412 0.0573615059 -0.114974082 0.0966024846 -0.112975411 -0.0831556097 -0.0627890527 -0.00156623824 -0.0914661735 0.0832677707 -0.0836677551 0.0405839272 -0.072140947 0.0206351802 0.0579435751 0.175754473 0.0569373667 -0.0288151708 -0.133179188 0.0659383461 -0.0622974038 0.111952148 -0.0266913269 0.0549159199 0.126251087 -0.0655740872 0.103494681 0.000715725822 0.092549786 0.0262457915 -0.0120499283 -0.134702772 0.0988872126 +tensor_8bias 50 +0.0448136181 -0.0294532757 0.00591958454 -0.0112828789 0.0547700003 0.102279283 0.00554918963 0.0933698788 0.138683245 0.153071642 -0.0246890131 -0.066205956 0.0102847284 -0.0217106864 -0.11153923 -0.0833024532 0.0690509453 0.0574259795 0.0326761454 0.048058711 0.0932174474 0.173286349 0.0437983349 0.0692929476 -0.1425194 0.0164392311 -0.0525733009 -0.0926198289 0.01558726 0.124148585 0.159763724 -0.112289928 0.122134581 -0.0329846852 0.123975173 0.00884330273 -0.125247195 -0.108203024 -0.0963885933 0.12722528 0.105277926 -0.0898397416 0.108396716 0.133004621 0.111592449 -0.0548007637 0.112471558 0.0952548459 -0.0418147035 0.0495906435 +tensor_4bias 50 +0.0420062914 -0.0531011894 -0.0405919701 0.147642136 -0.0448930375 -0.0946018249 0.0368757285 0.0895275325 -0.00135793048 -0.0465053245 0.104558863 0.0464918055 -0.0928135291 0.145776987 -0.0437397324 0.0744188651 -0.0975865945 0.0791935027 -0.0783651695 0.0380954593 -0.0641139522 0.0319918618 0.0519438572 0.00847010501 0.124498516 0.182475775 -0.0537090674 0.0583103821 -0.0401648097 0.0082509499 -0.0618926026 -0.122952975 0.0772916004 0.014789585 0.101875983 0.0958903432 0.064464353 0.0122809373 0.149964184 -0.141134128 -0.0849211961 -0.0111745978 -0.0645377114 -0.0344211683 0.0628582314 0.0434207059 -0.0433468781 -0.0299602263 0.15525946 -0.0448016711 +tensor_2weight 2500 +-0.0597149245 -0.0791020989 -0.00306093879 0.113323435 0.118636928 -0.0843338519 -0.109422937 0.0164578613 0.168519169 -0.0703572854 0.0312314406 0.0899977908 0.0896739215 -0.0900451988 -0.057600379 0.0125688771 0.0722137764 -0.0290169287 -0.0694356412 -0.111381322 0.0917039365 0.00489026168 -0.0580901131 0.183314383 0.195475265 -0.12944217 -0.0534728765 0.074898228 0.104391731 0.123983808 -0.013343907 -0.112780578 0.012140803 -0.086059548 -0.0357166752 -0.0239756703 0.114319615 0.0447655618 -0.0479144566 0.0672920421 -0.039890483 -0.0342019647 0.170793653 -0.0611885674 0.128305733 0.0986138955 -0.0286394898 -0.0084637003 -0.141880184 0.0852712765 -0.0972362906 -0.00365298078 -0.108331524 -0.0803529769 0.179286033 0.0825248212 -0.0778654292 -0.0261579026 0.0222861301 0.199497893 -0.0576646812 0.142493397 0.018432891 -0.0569059029 0.0996442288 -0.0431534536 -0.0794040635 0.136226013 -0.0141376657 -0.0539442487 -0.133499324 -0.0887252018 -0.0284489784 -0.0330936722 -0.03493331 0.0510139801 0.192286044 -0.00151121407 -0.0730649382 0.136111543 0.162208974 -0.115568712 0.176949784 0.0509604737 -0.140759781 0.0942156538 0.15726684 0.0260999966 -0.0726049989 -0.0243513957 0.156701684 0.138213098 0.112526298 0.0941351131 0.104868479 0.105548747 -0.0304395221 0.0303013697 0.162006006 0.100969627 0.145671651 -0.0650625825 0.0855033845 0.0336373001 0.141778961 -0.0337854326 -0.00864057243 -0.0735450611 0.0464367941 -0.0596558116 0.0623771138 0.14349483 0.0591385625 -0.00258940901 -0.0122495294 0.14376843 -0.0750882924 -0.0664319023 0.0305001531 0.0184416007 0.02046955 0.0551448241 -0.0694528297 -0.0207397975 0.154329836 0.0494214594 0.0845211819 0.16324687 0.0757716969 0.0634511784 0.120605588 -0.113957532 -0.0832520127 -0.0171713699 -0.0601701811 0.148658082 0.0899651572 0.118677244 0.0283228904 -0.0590552986 0.0797857642 0.0911054611 -0.022215249 0.176669434 0.000942089071 0.112969555 0.105361097 -0.0645927563 0.103734575 -0.0436463058 -0.0349569395 0.115449831 0.0422306731 -0.0804883987 0.0807694271 -0.0505034067 0.00729625719 0.137707859 -0.0488397889 0.162600547 0.15114215 0.0636213571 0.00903507788 0.128289327 0.163847417 0.000159272255 0.0834238008 -0.104029171 -0.0793354735 0.0541718863 0.00707805855 0.077409409 -0.00238326658 0.125607908 0.0396535546 -0.0790733248 0.0564618595 0.100612111 -0.0357064828 0.117824152 0.132536173 -0.0289113428 -0.014852941 -0.0426625349 0.0135453995 0.103636682 -0.0972069129 0.0516828299 -0.00995481107 0.0232977849 0.0937414765 -0.023261575 -0.0417088531 0.0130363097 -0.14154911 0.0702126473 0.00403433712 -0.0650982484 -0.0789552182 0.216502696 0.122806698 0.027723331 0.063748695 -0.0578081496 -0.0157720149 0.0400142148 0.133040145 0.0334649682 0.0875510424 0.110794596 0.0254984461 -0.0512416363 0.0211649723 -0.143576398 -0.0205686055 -0.111181781 0.0162975509 0.121590719 0.0656936541 0.155964255 0.0245984644 0.0352118239 0.133722454 -0.0262214299 -0.0336278044 0.156469122 -0.13011755 -0.027528204 -0.0602145456 -0.0930233747 0.0099506909 -0.0182043407 -0.118824221 -0.00373798492 0.178733543 0.00827211235 -0.0456761308 -0.0721783042 0.00670965109 -0.0409170277 0.00431948341 0.124081343 -0.0710947514 -0.104117736 0.093746461 0.171907842 0.110169716 -0.070081532 -0.0667723492 0.125274718 -0.0586081445 0.139502883 0.177527696 0.0687526166 -0.0820335746 -0.0490859933 -0.12959671 0.124665432 -0.0872184113 0.0991814062 0.0363627896 0.190564334 -0.0296370834 0.0762037039 0.0642659366 -0.0918578207 -0.054685194 -0.0458993316 0.146039933 0.0528010353 -0.0662797019 0.00561331725 -0.01142208 0.0815358981 0.0418767408 0.110681847 -0.00722674327 0.130719125 0.139407441 0.0292424969 -0.0270317923 0.0958031863 -0.0573824011 0.12932986 -0.043775145 0.059319146 -0.0913528278 0.115791552 0.078004472 0.115792975 0.107448012 -0.0748391598 0.0529222861 0.13462083 -0.141233921 0.166953042 0.168474525 -0.0700130537 -0.117624134 -0.00714296196 0.0268919822 0.163626537 0.0181761291 -0.0640345961 -0.0449223928 -0.141952619 -0.0284713078 0.147408575 0.139610574 -0.0779195204 0.106946297 0.117024481 -0.0941873938 0.09258876 -0.00288540404 -0.0543360636 0.0990853012 -0.0131437555 -0.0769185126 0.0146610877 0.0856351554 -0.090552628 0.124525517 0.072334148 0.00881079119 0.0441620275 -0.0116904415 -0.108310528 -0.0406595394 0.0195690114 0.0474229716 0.08090958 0.0409525596 0.077940464 -0.121437781 -0.0896261111 -0.134390622 0.099559769 0.107502699 0.0738855898 -0.0311849546 0.12491411 0.0958716646 0.048406072 0.0154622868 -0.130314365 0.148058236 0.00762006547 -0.0898886994 0.144507095 -0.0986621678 0.0791233629 0.0717348233 0.137725651 0.0972002074 0.0856728703 0.0490715429 -0.0558436215 0.177653775 -0.0812159926 0.174190253 -0.0374299698 -0.0888636857 0.0568164624 0.0539831966 -0.046500802 -0.088104479 -0.0324098729 0.123006575 0.174390927 -0.0655597001 0.118238717 0.165678978 0.115315504 0.149962306 -0.0967894346 0.0218543001 -0.0471816473 0.136843204 0.0418579951 0.130341902 -0.10788656 -0.0118869822 0.0904047042 0.10771846 -0.0203160401 0.0716004148 0.121576704 0.114085183 0.0813911036 -0.0706418529 0.0724584237 0.0249532741 0.156553373 -0.00865705032 0.134671107 0.0270873979 0.0121872211 -0.000827496988 -0.103484429 0.12091063 0.0684384331 -0.112646192 -0.0716026947 0.0865510404 -0.0961387679 -0.0992462859 -0.014073588 0.0901760384 -0.0329191796 -0.00509604625 0.0300773419 -0.113896236 0.0637915656 0.176874548 -0.0267044064 0.12591213 0.0827189684 0.00802489929 -0.0155225964 0.139007181 -0.0314813517 -0.0244915821 0.0454487316 0.113499463 0.147255525 0.0290668719 0.0196187459 -0.0756559074 -0.0474474952 0.000423966238 -0.125565693 -0.142974168 0.0265704822 0.100150622 0.124454536 0.128189385 -0.125751778 -0.0660192817 -0.0496372506 -0.025079174 -0.0945867226 0.00687600998 -0.108164005 -0.0449875742 -0.0757939294 0.0345570296 -0.0277413465 -0.0288163945 -0.0649622455 0.00885617267 0.0745153949 -0.0630018637 -0.00193145883 0.0763816684 0.156405032 -0.0854697376 -0.0829446241 0.0749762207 -0.0894886181 0.00361103215 0.0892253667 -0.00260828738 -0.0638676211 0.00824388769 -0.0162695311 0.0992859229 0.0285193995 -0.0495389216 0.0868888199 0.0549531169 -0.0304261018 -0.0182636939 -0.0249298904 0.159364238 -0.0837972984 0.11065764 0.0529022627 0.010110856 0.110683426 0.0919133052 0.0737009645 0.0965587646 0.0305129029 -0.0127110174 0.0697814003 0.103699945 -0.0261213128 0.170093238 -0.0687487945 -0.12052843 -0.104825832 -0.126111925 0.142499581 -0.128851101 0.0239339732 -0.0617658421 0.0295549762 0.119156219 -0.0673037395 -0.0500704497 -0.0940866619 0.0919373184 0.146928117 0.0300044753 0.0634653345 0.0144530665 0.0691985935 0.0211127512 -0.0590388924 0.0216479953 -0.0947615728 0.00890090037 -0.143075675 -0.00150912558 0.101439707 0.0146557204 -0.0631864071 0.0695210993 0.159808844 0.0115857897 -0.00928535312 -0.0489135161 -0.0782282799 0.125244364 -0.0499396287 0.140853539 -0.0960367844 0.0661479533 -0.0767967701 0.0877454206 -0.0602071472 -0.00595363509 0.115926109 0.178855419 -0.000521433423 0.0932693109 0.0502367616 0.152228653 0.104619421 0.0170960594 -0.103684276 0.0711491629 0.0488289595 -0.0617828257 0.0788236633 0.163875833 -0.0177440327 0.0156344157 0.109268099 -0.0375487134 0.0692994222 0.0731202066 0.0198084135 -0.0638355985 -0.0859975517 -0.0729697719 0.0573660471 -0.0556606203 -0.0930642337 0.145462662 -0.00594186038 -0.0928620845 0.139376998 -0.0553284064 0.0321234614 0.122701474 -0.123724081 -0.118198179 -0.0378811546 0.0980066508 -0.110459164 -0.0362307765 -0.0317853428 0.0111791994 0.0406676829 0.102611743 0.181697577 0.0510763824 0.123425812 -2.60259403e-05 -0.00361568225 0.153392524 -0.0397593305 0.0637998879 -0.145311564 -0.0472530723 0.0862638727 -0.0162773281 0.14536725 -0.0755254775 0.111187324 -0.0551111922 -0.000961930782 0.0612597242 0.163095102 0.0857004449 0.134386837 -0.0350845531 0.104531094 -0.0771434605 -0.067063503 0.171728879 0.167630181 -0.055467926 0.0409745835 0.177932739 -0.0550457239 0.107211053 0.0066946256 -0.00466190139 0.00958849117 0.156683907 0.111835107 0.0854923576 0.0730453655 0.121291943 0.0563716777 -0.111218229 0.0502161607 0.00677639991 0.18682304 -0.0360405892 -0.0132346814 -0.0414354391 0.0244455282 0.0727593377 -0.0868931487 -0.102708675 0.0923786163 0.154591203 -0.0693407878 0.106593266 0.162805468 0.0318478011 -0.031251967 -0.126320124 0.0780377984 -0.0280229542 -0.0295661092 0.0982864872 0.101671919 0.120140024 0.0414738134 0.105208568 0.0855850428 -0.0743453577 -0.000411789661 -0.0912177339 0.0883763209 -0.0493486412 0.123505704 0.166449651 0.105023161 -0.0776017308 0.162414178 -0.117349826 0.168772966 -0.101528428 0.13711141 -0.0164699852 0.0836634934 0.0919587389 -0.0328455754 -0.0752447918 -0.009731967 0.0349985808 -0.0194250569 -0.0934877768 0.185227469 0.0435012877 0.154062793 -0.0773278996 0.0929438472 -0.00670079701 0.0923984647 0.102851599 0.134395629 -0.120910235 0.170304388 0.0816775039 -0.0626546219 -0.0595025942 0.0244693402 0.0510447063 -0.115126796 0.115374513 -0.0176392663 -0.0923264623 0.122397989 0.0872549042 0.125522628 -0.100656673 -0.00508889835 -0.123501971 0.0617450103 0.139201492 0.051387202 0.00884217676 0.0175981224 -0.0483928584 -0.0360136032 0.05417905 0.022909319 -0.0881463438 -0.0459814519 -0.0131944772 0.0480347835 0.1673228 0.137037218 0.14526543 0.0446226932 0.0413857326 0.0612014905 0.132490978 0.0794302076 -0.0342803597 0.0863904208 0.15610376 0.121424645 0.0110774338 -0.0368165858 0.104494691 -0.0254124962 0.154777497 -0.0138444677 0.118794315 0.0259997863 -0.00128288078 0.142353535 0.155503765 0.0894722044 0.0424166657 -0.0683410317 0.0425889567 -0.10710226 -0.0400536358 -0.000696110365 -0.0677292421 -0.0385467038 0.0813434571 -0.0811068788 0.0311896447 0.0156664345 -0.147901028 -0.000463384727 0.0149115929 -0.112064414 0.0082620522 -0.024438085 -0.0304117016 0.162811249 0.128227949 0.0702825859 0.0863868073 0.0475940667 -0.100322515 0.118678033 0.153219327 0.103472307 0.106240071 -0.00983386766 0.0908779651 0.0990438908 0.0359329022 -0.0689288601 -0.0298974775 -0.115996465 0.0365048237 0.0202663038 -0.133836135 0.0477452688 0.0554565825 0.0893209428 -0.0239705388 -0.0640460923 0.13875863 0.105679706 0.0737722218 -0.0183230489 -0.0404619724 -0.0105633233 -0.0761946291 0.164347902 -0.0172834061 -0.094510898 0.0345971286 0.0106645143 0.194848433 -0.0547695532 -0.106579714 -0.0123255178 0.0403105766 -0.0313294157 -0.00499826716 0.100480273 -0.00637257611 -0.0778858364 -0.0411514193 -0.00478123594 0.0215911381 -0.0732492954 0.194053754 0.0539965741 0.113155119 -0.0752726197 -0.0769620165 0.193490297 0.0789649859 -0.0801189467 -0.0407260284 0.0242670309 0.10401839 -0.0375796929 0.0314083621 0.0724864528 -0.0512620732 -0.137483209 -0.0787761062 0.0968051478 -0.087627165 0.190840423 0.158406734 0.0864097029 0.133482426 -0.0359799229 -0.0242824815 0.0159911942 -0.0485018119 0.144926906 -0.058078561 0.111498684 0.065325208 -0.0478983261 0.0192427151 -0.0443237759 0.0666328892 0.0329897963 0.134647146 0.0964290947 -0.109050713 -0.000148722494 -0.00192280754 0.07619223 -0.203230783 0.0140038347 -0.0237551313 0.113345623 -0.0610194132 -0.123688005 0.00247963867 -0.0892862976 0.0488414988 -0.0904518217 0.174596399 -0.131306589 0.0763920173 0.150487289 -0.153154299 0.0224560183 0.0973761827 -0.0426088274 -0.0505751371 0.104424372 -0.133484393 0.0833508074 0.0194486398 -0.142139688 -0.0637170449 -0.101654164 0.0209246967 -0.140727118 0.00838450529 0.0946883485 0.148535386 0.103271469 -0.114976875 -0.0612382665 0.0309834275 -0.080561161 -0.0438423492 -0.0763120055 -0.0871841311 0.0487271659 0.192025125 0.0274662226 0.0726716295 0.126637235 0.11231558 0.00397039996 0.192436248 -0.0129653281 0.087810427 0.101742446 -0.0811328292 -0.0573779941 -0.00594198145 0.157350421 0.0783605501 0.201680467 0.0806498379 -0.0635789633 0.173802316 0.046798829 -0.111674123 -0.105566561 -0.110239312 0.137469321 0.0206604954 0.190349817 0.169501752 0.126503631 0.167581499 -0.0180790145 -0.0187429003 -0.0419336259 0.0993470997 -0.0918064341 0.110130824 -0.0955291986 -0.0254780296 -0.0506573617 0.0148899863 0.104261681 -0.0428472869 -0.0548303574 -0.0506917909 -0.0156032071 0.0699625984 -0.15484792 -0.0459163263 -0.110004574 -0.0441328883 0.197784573 0.0838625804 -0.0522456057 0.0942399129 0.0829744935 0.057554815 0.109780334 0.10274224 0.154105842 0.10721004 -0.0166563932 -0.0466450788 0.0287705809 0.10081622 -0.0953564495 -0.0839984119 0.101254053 -0.0738965794 0.0948913991 0.167173281 0.0429453701 0.0383497626 0.091592297 0.106202237 0.156739905 0.0656498298 0.13198331 0.147935465 -0.0810967013 -0.018950887 0.00158079178 0.164120257 0.0798614174 0.0186908729 -0.132187113 0.124651186 0.112890542 -0.138516054 -0.0781108215 0.106892236 -0.0400021151 0.121923052 -0.00202938612 0.0550861284 0.115557112 -0.0589016899 0.103443392 0.138156414 -0.156100512 0.141828462 0.163233846 0.185174793 -0.134581283 -0.0946970135 0.0305168517 0.0545697697 0.122422308 0.0272117686 0.092828013 -0.0790654421 0.0789960772 -0.113887571 0.139471903 0.177440643 -0.0422447994 -0.0695037767 0.133454293 0.00472546089 0.0994608104 0.0261894893 -0.0549818948 -0.0645656288 0.0830694512 -0.122029178 0.110560618 0.021865055 0.0957053602 0.153808683 0.153240129 0.00273627671 0.107639149 0.0361639187 -0.0830527321 -0.0444489233 -0.00363346422 0.0640293211 -0.0754873753 0.0189995058 -0.1402542 0.162265539 0.138485089 -0.0446577705 -0.000309297611 0.169430003 -0.0834633186 0.00541008823 0.034290649 0.0778444111 -0.0421900116 -0.0198174808 0.0522686094 -0.0672751144 -0.0208641775 0.151453003 -0.0738410801 0.043029502 0.0127242813 0.0245345235 -0.0372197554 0.090350613 -0.0694454312 -0.045643907 0.0334979966 0.0695154294 -0.0134842489 0.141971424 -0.074017182 0.0237953663 0.106125079 -0.0695564449 -0.145977944 0.166921124 -0.0877014548 0.0716962293 0.0305217579 0.117084034 -0.0790342316 0.0964029655 0.13598761 -0.134147704 0.189042479 0.182969391 -0.107122943 0.0923936591 -0.0212771464 -0.0149747208 -0.0244534928 -0.0772951767 -0.0497068875 -0.0293945558 -0.00921653118 0.105580427 -0.0721728429 -0.0770729706 -0.00264244643 0.00397060299 -0.139186502 0.0292970631 -0.0475326255 0.08476118 -0.0867509693 0.126799867 -0.0671816245 0.160967201 -0.0940391421 0.036259234 -0.0219887402 0.0285151005 -0.0580190904 0.13070558 -0.171060801 0.135117233 -0.0228546057 -0.107383102 0.0618890449 -0.0694213063 -0.0618949234 0.133807048 0.17643562 0.128621712 0.0101680793 0.176956698 0.0840079859 0.097374849 -0.100451432 0.0399295464 0.172356963 0.00101820775 0.145156473 0.104961276 0.0815578476 0.146059379 0.107903466 -0.121531352 0.0570647754 0.047216557 0.170416638 -0.0707143247 0.0478855185 0.0394140966 0.0102794804 0.125966758 0.135744303 0.0133625893 -0.0925729126 0.136732638 -0.0822476298 0.154679909 -0.13795127 -0.0215002652 -0.0249491148 0.0930954218 -0.106826156 0.10373725 -0.0187940467 -0.0534816161 0.134281337 -0.0336386599 0.114718519 0.0787281469 0.0239171404 0.0408289284 0.117535852 0.0759770721 -0.0240571704 0.0102049625 0.0229755491 -0.0571867488 -0.0825752616 -0.0630160421 0.0233204234 -0.0362254977 -0.0341095217 0.110644877 -0.0943035707 0.0922036394 -0.052436009 -0.0474082902 0.0808229521 -0.0361060351 0.0341569446 0.127944812 -0.0520493798 0.00435285084 0.0624745227 0.0890819654 0.120440952 -0.125715539 -0.0429935902 -0.100851558 0.115291968 0.103215486 -0.0138821993 0.114144072 0.144928649 -0.0672504827 0.0337884873 0.178193495 0.0654718578 -0.032493107 -0.0594031401 -0.0149731291 -0.108951643 0.148577735 -0.0590856262 0.123775907 0.129149333 0.120561078 0.0938586891 -0.0787900835 0.157910496 -0.0426145568 0.124586366 0.153994665 -0.0279286914 0.0712008774 0.132654876 -0.058968544 0.152131483 0.0144725023 -0.0846911147 -0.0830136165 -0.0503571592 -0.129175395 0.107455552 0.0278498847 -0.0376918465 0.125603542 0.0866251886 0.0744670108 -0.0176635683 0.081767872 -0.116470791 -0.076551564 0.107822165 0.0519237667 0.169635236 0.000728378771 0.195453733 0.0923824608 -0.00255433074 0.130946428 0.033110749 0.0234523341 0.159311384 0.0584074371 -0.0724054351 -0.00702239107 0.0528859086 0.0255747363 0.123749338 -0.0502231903 -0.127378836 0.000618861057 0.168384925 0.0564158484 -0.0874255598 0.0325605795 0.110717267 -0.0185731165 0.0527723245 -0.0973552689 -0.0553385355 0.099351272 0.126928583 0.037081793 0.159003794 -0.0413037315 -0.0480074212 -0.0216640383 -0.109965399 -0.0768443644 -0.0550187156 -0.02983227 -0.041209314 -0.0762111172 0.00517032761 -0.021049602 -0.082603015 0.128907517 -0.0880745947 -0.101325043 0.0862258524 0.0882336497 -0.0198778603 -0.0331840217 -0.109220311 -0.107734382 0.0400603004 -0.132840812 -0.0447417721 0.00368672935 0.0691269711 -0.0319370776 -0.0310771763 -0.152229711 -0.126779377 -0.0754033923 -0.106641732 0.0897259042 0.0624115281 -0.084738642 -0.0616546944 -0.00815979205 0.0202450287 0.071183376 0.0515766665 -0.0504490845 0.0691114515 -0.121255346 -0.0616305098 -0.121116355 -0.0412869304 0.0541755706 -0.00845611095 -0.019792689 -0.0873068273 -0.1018041 0.00566182006 0.155241832 0.0709863603 -0.0654985607 0.0548714921 0.1288618 -0.0813171715 -0.0274352692 0.050162863 0.0174831059 0.115988865 -0.0983620062 0.00916780252 -0.111271255 -0.0194736812 0.122007161 -0.05491817 -0.155907109 0.0968826488 0.0766369477 0.120036878 0.118291102 0.144479945 -0.109428965 0.0191301908 -0.131886169 0.0119547276 -0.178280339 -0.074061133 0.0725457594 0.047452867 -0.0980938748 0.00940938015 0.17627655 0.0703046694 -0.0134887863 -0.0899318606 0.140372112 0.076489009 0.0844909772 -0.0435512364 -0.0578976758 0.0688769594 0.10411185 -0.114739448 0.11660511 -0.0925834179 0.0873279944 0.175916493 -0.0425273553 0.143908709 0.0721898228 -0.0761375278 -0.11759565 -0.020234637 0.0312824845 0.0598440468 0.110991903 -0.0383540764 0.105179779 0.0467798598 0.167937577 0.0386657864 0.0542986952 0.0948523358 0.0433264002 -0.148534298 -0.0129731102 -0.0280822664 0.0361635387 -0.00415288471 0.138104618 0.10342367 -0.0275076535 0.116774455 0.101908013 0.0884111896 0.0440905578 0.117984377 -0.013649038 -0.126955181 -0.0553081445 0.00625609886 0.133344293 0.00572153553 -0.0223944504 0.177496225 0.0815475732 -0.00271677272 -0.0417993777 0.106240101 -0.0412545837 0.18434307 -0.0274373218 0.178807244 0.0294444654 0.0646818206 0.0889737979 -0.0807103813 -0.0720598325 0.151093379 -0.033041738 -0.117520221 0.126272097 -0.106381506 0.037757419 0.00232348521 -0.0547570363 0.076936692 0.107204638 -0.0481706001 -0.00307636359 0.132705554 0.0537186749 0.0473928303 0.0915753692 0.119206257 0.176839486 -0.155492246 -0.151921168 -0.101832837 0.0815496519 -0.0724380389 0.0371969007 0.0601178631 -0.058801692 -0.0965428352 -0.0115199285 -0.0381022878 0.105424263 0.0742912889 -0.0960293785 -0.0112020867 0.0849209279 0.0520411208 0.122885831 0.151498944 0.122882292 0.153430328 -0.0156365 -0.0119283618 -0.0820145831 0.0155551042 -0.149645686 -0.0800471455 0.094618395 -0.0650205612 -0.0104006175 0.131104678 0.0416966155 -0.0876214504 0.0637880862 0.0821948424 -0.0084727779 -0.0978877619 0.168948174 0.149989381 -0.0299459342 0.0972742289 0.000181726937 -0.0922966674 0.176449746 -0.0968618467 0.0333946943 -0.0902563259 0.16238676 -0.0905583873 -0.03544081 0.152139008 -0.119497493 0.0413080677 0.0570814125 -0.000148650375 0.112468541 0.058299277 0.0766911507 0.0998951718 -0.0232174434 0.167854264 0.168274015 -0.0583992265 0.154771283 0.13650085 0.117386065 -0.0722455084 0.0544443242 0.0949110314 0.143746346 0.108085796 -0.0419899784 0.143864065 -0.025696218 0.133225232 -0.111586809 0.0990424305 -0.0993287787 0.078866601 -0.0784433931 0.132836834 -0.106675968 -0.1069621 0.077828303 0.187477276 0.0277358871 0.0359606817 -0.0598008744 -0.00338539528 0.00320412288 -0.0859301984 -0.0230135676 0.163934514 0.0130302329 -0.0939015523 0.0754140466 0.107570499 0.00613959366 0.135453999 -0.0996567607 0.109139353 -0.100983992 0.0580916367 0.0119609917 0.0797068924 0.151979074 0.16187796 0.075011678 0.0931628644 0.0360457934 0.00347460015 0.0319518261 -0.0867329165 0.0962795615 0.0821009502 -0.0356594585 -0.02422712 0.0152814919 0.0527246408 0.132090867 0.144757852 -0.0431858338 0.139542729 -0.0139607172 0.171679854 -0.0980732143 0.180641383 0.186638147 0.0636721104 0.0896847546 -0.0584419966 0.143411934 -0.0701248869 0.0328124799 -0.0829237774 0.145310253 -0.00750299264 0.118470781 -0.0582295991 -0.0695642605 0.0633131266 -0.0640299097 0.159781903 0.0272349548 0.109332368 0.168833092 0.0473833978 -0.0882677585 0.0282821339 -0.0536800846 0.0558247045 -0.13541919 -0.107939526 -0.0673646927 0.0988901109 0.144200847 -0.0628962666 0.0835133493 0.0278197322 0.112931602 -0.0297801625 -0.00817243289 -0.119054325 0.00599690992 0.16512607 -0.0284597538 0.133023679 0.108261056 0.175182506 0.148225054 0.0877180696 0.0722195581 -0.0575301908 0.0970565677 -0.0798201114 -0.0708039552 0.0288234167 0.155979618 -0.0531227216 -0.0605055392 -0.0713208541 -0.0868417323 -0.0402501673 0.0165239926 0.181012854 -0.160325661 0.0927709043 -0.0364443325 -0.0312899835 -0.109137982 0.111598797 0.11623574 -0.068648465 -0.0206921138 -0.13928856 0.0243328102 0.0555803142 0.132689178 -0.0608246513 -0.0354508683 0.172304466 0.0327581689 0.170413792 0.163448825 -0.0454982035 -0.0583826788 0.0481920801 -0.0540810302 0.182651609 -0.174388662 0.155782059 -0.0293228272 0.015085889 -0.108551085 -0.121923782 0.0719362497 -0.168311208 -0.120032616 0.0659890622 0.115256436 0.131331578 0.119614907 0.178487614 0.09089607 0.00386154489 -0.0554215722 -0.0119620096 -0.0467320494 0.0933647081 -0.0323850662 0.141180277 0.107638344 -0.0253946837 0.173948079 0.137527362 -0.0205912776 0.010461146 0.142752916 -0.0192131344 0.107507631 0.14598392 -0.0370280705 0.0341507122 0.138847277 0.102246776 0.067923449 -0.00280428468 -0.0519020297 0.0715199634 0.151722491 0.00090766669 -0.0237915833 -0.00762919895 0.0348006599 0.0952979177 0.11985556 0.143176049 -0.00832088478 0.0575121641 0.0766030177 0.0191355087 -0.0326572359 -0.104509436 0.0300794542 0.062483415 0.132271856 0.100359082 0.0538696684 0.0351427197 0.071405977 -0.040407598 0.157145143 0.0427171327 0.031071905 0.085857898 -0.140569568 0.0797872916 0.160493046 0.0627081841 0.0700800642 0.0625736564 -0.0243169125 -0.0110870786 0.150555253 0.057589937 0.166157231 0.120003015 -0.0579976961 0.0612958968 -0.0452432111 0.054822579 0.0524013229 0.149773791 -0.0714917257 -0.023807399 0.0454889461 0.0992185473 0.0658304542 0.02966832 0.101745747 0.0872673169 -0.125905886 0.0570483804 0.135766774 0.113360628 0.0364370681 0.0223136339 0.0019436914 0.0164411664 -0.0682152584 0.0921245515 0.0128188692 -0.0173182599 -0.00139826769 0.0518905111 0.0928640962 -0.0397072323 0.103173278 0.00413324265 -0.0670715123 -0.0588557124 -0.0857635513 0.0062935818 -0.0887346044 0.138430178 -0.025497932 -0.0639876872 0.0670730025 -0.0446693785 0.0594656765 -0.0820678324 0.157999322 -0.182115525 0.00614317786 0.0362051241 0.0657482669 0.106698424 0.191083074 0.135481074 0.0106980857 0.00957398489 0.0367676988 -0.0136649683 0.0735901445 0.0689036474 -0.134390131 -0.0718721896 -0.00467563979 -0.00951108709 -0.0722589716 -0.00359070604 0.0947275981 0.126477376 -0.0131597025 -0.12138617 -0.0347730219 -0.00653850706 0.135745063 -0.0925999731 -0.0944521725 -0.0723555461 -0.153894082 0.188464135 0.0043896623 -0.00922763348 0.213275999 0.00525289867 0.0494968928 0.0589311495 -0.07227844 -0.0655579418 0.173283234 0.0376433432 0.156717747 -0.078233324 -0.0844314247 0.0813227743 -0.0925660729 0.124083593 0.153772607 0.068052493 0.0846082121 0.127492517 0.147963956 0.0722059608 0.154211655 0.0819868073 0.111736804 -0.0420656316 -0.156026006 -0.135763094 0.132754937 0.110232912 -0.119001575 0.119631797 0.100629732 -0.0977817997 -0.0254859347 0.0714960396 0.0816458389 0.0694845393 0.107111402 0.0238910895 -0.11218477 -0.117907912 -0.0337541923 -0.114325784 -0.114220396 -0.153953075 -0.0372015573 -0.0811879858 -0.0323405489 0.128496513 -0.0856468379 0.0182948634 0.0260079242 0.0171604026 0.0346086845 0.114011452 -0.0935687795 -0.011811249 0.131105796 0.0234864186 0.0406528525 -0.114756532 0.122138247 0.0470963418 0.0791598186 -0.0303514749 -0.0687026605 0.194362417 -0.022815939 0.0876949206 0.0480690859 -0.0250811335 0.148164272 0.0488567054 -0.0881642401 0.198841885 -0.0379917733 -0.00708210841 0.041188851 0.0292753335 0.0645876899 0.00623326236 0.0477512181 -0.000584310852 -0.106165297 -0.015090609 0.0250555836 0.0323013403 0.041745469 -0.133533582 -0.0977768302 0.0384080522 -0.014046954 0.0909342691 -0.0820525289 0.132567018 -0.0924441591 0.155193165 0.110916458 -0.0571177192 -0.141427621 0.0474443957 0.0907076299 -0.064002499 -0.0244310405 0.0177996214 0.0721451417 -0.00413550809 -0.0516352393 0.0421805531 0.131461561 -0.0123250391 -0.0480676852 0.0910230353 -0.0799057558 0.0509942733 0.112865351 0.104875125 -0.085275501 0.0623678714 0.0686701387 -0.080322735 0.0964362845 -0.0460433923 -0.0657152012 -0.074650757 -0.0327339992 0.16137737 0.0697549805 -0.108303167 0.00211762171 -0.0693195313 0.00135752186 0.013155547 -0.0307769664 0.0750898421 0.0616175942 -0.0536347926 0.0857256502 0.0237529613 -0.021395212 0.00901291613 0.00728149712 -0.113782011 -0.0464270264 0.167675585 0.0525661036 -0.0210970417 0.156918585 0.061230965 0.0992827117 -0.0678927675 -0.151397571 0.075506404 -0.0497730784 -0.0540236272 -0.0624297559 0.0426682606 -0.0725995973 0.076271072 0.116657615 -0.0210639741 -0.0213112682 -0.0862966105 0.0802445114 -0.0198064968 0.176064715 -0.0988808721 0.101349302 0.119555868 0.128017023 0.0522831939 -0.0366001837 0.145147249 0.0257630255 0.0434764959 0.111463912 0.0327721126 0.123577833 0.0872525647 0.162450716 0.199674487 0.164900869 0.0993724763 -0.144233376 0.0194361061 -0.0317708701 -0.0597182736 0.0684302226 -0.0456766337 0.0549333505 0.101966113 0.0526848994 -0.118291982 0.0568728857 0.125453085 0.107232653 -0.0476998277 0.135429114 -0.130028173 0.0840768516 0.158564597 0.0256799646 -0.0523730144 0.031615708 0.0914076194 0.188867077 0.143099532 -0.0071679526 -0.0894442722 0.0995981768 -0.0183322281 0.0598267131 -0.0731880441 0.0917812809 -0.140130281 0.00585151743 0.00310393353 0.152784497 0.158248886 -0.137339488 0.0995804071 -0.0764262974 0.171144649 -0.0672199726 -0.0027869083 -0.102201961 0.107543819 -0.0715040565 0.214907989 -0.0438209847 -0.119756781 -0.00894473586 0.137930363 0.126813769 0.0999665186 0.0293341558 -0.0930798054 0.0649531037 -0.101108015 -0.0516813099 -0.0952921212 -0.0980607495 0.0295815989 0.0134664373 0.0469121002 0.0314295888 0.116126269 0.144171268 -0.109329514 0.102265686 0.0232829526 -0.0721712261 0.0460076891 0.00450235466 -0.078920044 0.120493911 -0.0557029285 -0.0781340078 -0.113349713 -0.146189392 -0.0182035994 0.10833291 -0.0549698845 0.111201309 0.0124426633 0.0892337486 -0.10926117 -0.00912767928 -0.0338271856 -0.105445758 0.211141855 -0.119850591 -0.180531412 -0.00868519023 0.218875796 -0.17652452 0.0990117118 0.0245204382 0.149801061 -0.00586622301 0.0881711915 -0.0257251803 -0.0817466527 0.0768139511 -0.0895878077 -0.107276358 0.0430153459 -0.162487656 0.0267249998 0.130476043 0.0166731309 -0.127173543 0.0398012064 0.0680280626 0.0879124179 -0.0295924786 0.0133784497 0.000692039728 -0.0751812905 -0.0830598176 -0.131929606 0.135406211 -0.112499252 0.0126099214 0.00665883068 -0.00475356216 0.0249228943 0.102894537 -0.0225983374 0.061107967 -0.0330257192 -0.0597277209 0.179125711 0.050645031 -0.0669407696 0.158360988 0.205021739 0.00652983878 0.121627569 -0.0640201867 -0.0522308983 0.0900547057 -0.122924723 -0.114422083 0.0658481047 0.0173800383 -0.0786071345 -0.0717952251 0.0280309897 0.0219677705 0.0759255365 0.084643513 -0.0923913196 0.166452676 -0.0389173701 0.0821120963 -0.113245606 -0.0364514329 -0.0393794179 -0.0335422643 -0.0305757262 0.0866778567 0.127289161 0.0190164503 0.0835306719 0.152247652 0.138753071 0.130225837 0.00651189126 -0.148018956 0.0714074373 -0.10346128 0.143939614 -0.0185987595 0.0229391046 0.101105355 0.0875614211 0.168692231 0.0672137067 -0.116006561 -0.069646351 -0.0440914668 -0.0887304097 0.0605254248 -0.0931111053 -0.0291967671 0.0453826375 -0.0647103488 -0.08282765 -0.0912294909 -0.0972726122 -0.000134341666 0.117167793 0.14967677 -0.103549697 0.070657745 -0.0581128635 -0.1150591 -0.0362361856 -0.00619550841 -0.0881526992 -0.0659523532 0.0312618017 -0.0897310749 -0.0618666895 -0.0287295692 0.176449448 0.159317598 0.0456931591 -0.0967225209 -0.0319313519 -0.0248310566 -0.021381963 -0.0773523748 0.0478850566 -0.0207856018 -0.0701669753 0.147760212 0.0727391243 0.0043316409 0.159602404 0.0948895067 0.0890363902 0.00142308639 -0.0310148094 0.0753311291 0.0981000364 0.0223886538 -0.0931696519 -0.0682313293 -0.0667512491 0.0491678901 0.0186522752 0.137692913 -0.0236726794 -0.00927374046 0.114333265 0.104560494 -0.0608054437 0.204097703 0.205062792 0.094779104 -0.0898220837 0.0206578374 0.142832264 -0.049721241 0.0423907191 -0.0763793141 -0.093291074 -0.0389654711 -0.15401794 -0.00734718097 -0.0189076923 0.170103803 -0.0205226652 0.0636992827 -0.0644718111 0.108284459 0.138413697 -0.0257913806 0.00794647168 -0.0307983607 -0.0936900675 -0.0606061853 -0.154241458 -0.00459569367 0.141765147 -0.117230393 -0.0629995316 0.125530377 -0.0406536944 0.200848967 -0.104435295 -0.123753838 0.0367464684 -0.141358763 -0.0206649359 0.202375904 0.022151649 0.0812491998 -0.131115228 -0.0367442705 0.172151536 -0.0442129597 0.0605035909 -0.082755737 0.0954060331 -0.0230737943 0.0334409699 0.13487561 -0.110517241 -0.0225322787 0.062106967 0.119216867 -0.0953765139 0.14884612 -0.0287193451 -0.0106208287 0.153283879 0.0467924252 0.177715346 0.068970874 -0.0450268574 -0.0325655416 0.0547090545 0.0860708728 0.0107891522 0.0944289416 -0.0758065358 -0.0732419118 0.183351845 0.0771918073 -0.140022755 0.0537439361 0.149029925 0.131033957 0.0027445287 0.107114777 0.0976283476 0.156666949 0.192800567 -0.0668931082 -0.048805628 -0.0531376973 0.00703070173 0.135483757 -0.0116650816 0.0274772495 0.0685842708 0.0897314772 0.168943107 -0.0672629923 -0.132059872 0.120118931 -0.0828481987 -0.0856146142 0.0526181571 -0.0933162645 -0.00942354929 0.169790015 0.0899278149 -0.112179808 -0.0518537723 -0.0197913386 0.107157297 -0.0402358808 -0.0525892675 -0.0279793683 -0.0564811975 0.111123219 0.0285395123 0.103563443 0.0289735198 -0.119780019 -0.138365477 0.0683289915 0.0212225579 -0.136515081 0.178696275 0.0141859809 0.0798230842 -0.0112462975 0.00572248595 0.165446609 0.1772625 0.0877416283 0.0257206522 0.0255760346 0.109550558 0.0173372477 0.0941226184 0.156007677 -0.0609857477 -0.168111339 0.112304233 0.141951621 0.0942777917 0.0506730452 0.0117849326 0.110127218 0.11164064 -0.0365206338 0.17797333 0.0919450074 -0.0340361036 0.0371871293 0.137831658 -0.0145821422 0.171712354 0.0708972588 0.00538893556 0.138060763 0.0720485598 -0.102158196 -0.00157117634 0.161901429 0.164550751 0.0551112816 0.00221332046 0.163098738 0.0364854559 0.137255967 -0.0139900306 0.116951399 0.0309598278 -0.0843858272 0.149897203 -0.0105481371 0.129127055 0.0816876367 0.0559929311 0.109518707 -0.0339205861 0.102632456 0.00931944605 -0.00911251083 -0.0575628281 -0.0238492247 -0.13892445 -0.0204126779 0.140317202 -0.0053701899 0.0747784898 0.0447182879 0.0408651866 0.00653599948 0.0279005133 -0.0747578964 0.0836786777 0.115476467 0.0596954301 0.0199178606 -0.0829307586 -0.0326780863 -0.113550745 -0.0547465198 -0.0249119569 -0.113830656 -0.0401589163 0.093042478 0.0902988166 0.0128500452 0.0456254408 0.0772466585 0.159909874 -0.0500820577 -0.0781232342 -0.051164262 0.0975523815 -0.0487125441 -0.0570325479 0.0562490486 -0.0282622576 0.0981458947 -0.131002113 -0.157566205 0.0727057457 -0.146405846 0.146773353 0.101571307 0.130754113 -0.0140098277 0.119490281 -0.134308934 0.0664061159 0.100537203 0.152347729 -0.066916585 -0.0918895528 0.0884105414 0.150467262 0.190053374 0.0404316857 0.0131604439 0.0616949201 0.0204220749 -0.00819397438 -0.0989773422 0.135392219 0.034526363 -0.0443142466 0.097198464 -0.0308621768 -0.116274439 -0.0293579884 -0.0962141603 -0.0434402823 0.199946746 0.112885021 -0.00315347826 -0.0176970661 0.0463292599 -0.0291460063 0.0120348148 -0.00303878007 -0.0146520687 0.155439973 0.0927805379 0.112616234 -0.10787008 0.199581042 -0.0556919798 0.0174912345 -0.147659183 0.00716985716 0.0517262444 -0.132578805 0.0649443641 0.0491473936 0.0128262425 0.0570766106 0.14404805 0.0125820432 -0.0378993787 -0.0890177265 0.0629585683 -0.0781803727 -0.0468946658 -0.0970080867 0.107453912 -0.0726181269 -0.0156788174 0.109249413 0.179553613 0.0696004778 0.0497379862 0.1262182 0.070380047 0.036693722 0.153491363 -0.0165516399 0.11358387 -0.0461250581 -0.0716388002 -0.063194491 0.182627037 -0.0979600772 +tensor_0bias 50 +0.0687436238 0.121526092 -0.0379632339 -0.0472203717 0.085131444 0.0979627594 0.12071842 0.0766481757 0.111980788 0.0231025815 0.0757955536 0.0592933744 -0.0364503562 0.118033081 -0.0119619071 -0.0852706879 -0.057033807 0.104488559 -0.0188827403 -0.00805457216 0.106946483 -0.0220598206 0.0901777968 0.154047132 0.127174735 0.0643620193 0.119487718 -0.0108337859 0.10897246 -0.0441434234 0.0685871169 -0.0381012894 0.0594230555 0.0113021079 0.163605392 -0.0388617851 0.0634208769 0.104776219 0.0779020116 0.0259756818 0.152426898 -0.0797943622 -0.0156979878 0.161263436 0.0584381446 -0.00747399358 0.0999026075 0.0664042234 -0.0277017541 0.0495129935 +tensor_12bias 50 +-0.0650987327 0.0561347082 -0.0524960756 -0.0606846772 0.0553311668 0.084040381 0.0655064732 0.132255286 0.0351522863 -0.0857200176 0.0463379882 -0.138509437 -0.0309930108 0.0726053342 -0.0611225963 0.040046718 0.0333271585 -0.139368355 -0.115387805 0.035529051 -0.0753710642 -0.108340122 -0.165888399 0.0588014238 0.0744016021 0.0737710446 -0.166636527 0.139449701 -0.107233316 0.166755453 0.111904733 0.142458484 -0.10559769 0.17358239 0.0248687863 -0.00832488388 -0.00902078301 0.0966997072 0.16634649 0.0510999039 -0.11830131 -0.137912169 -0.0549811572 -0.140467361 0.0248682722 -0.0492456779 0.132404506 -0.136645094 -0.00630686618 -0.0665986538 +tensor_18bias 10 +-0.0286833197 0.0315113701 -0.0158580411 0.0455938913 0.0954583585 -0.105117157 -0.0738683939 -0.119185224 -0.0686925054 0.0992293954 +tensor_14bias 50 +0.0160144269 0.0659304708 -0.134516135 -0.125120386 -0.126968578 0.011852067 0.111996368 -0.0366256982 -0.0781780854 -0.00189105608 -0.0748870224 0.11818061 -0.0440538898 -0.0143895745 0.0724157915 -0.0405166261 -0.0633764267 -0.0380873531 0.0670853108 0.0602433793 0.0957999155 -0.0834713951 -0.0430925563 -0.0397071578 -0.0210147407 -0.00462661684 0.0973884314 -0.153825372 -0.147843331 0.0121727288 0.180789441 0.0183312204 -0.130684286 -0.10787309 -0.049283646 -0.0444232263 -0.059754774 -0.0348485857 -0.159336865 0.0452591404 -0.0289487373 0.0982468277 -0.0173284747 -0.102011278 0.0217117742 0.0264923107 0.137902901 0.00199478748 -0.0913077295 0.0711041912 +tensor_4weight 2500 +0.0806722939 -0.0843013674 0.0457266495 0.0729286149 0.077442795 0.0629948378 -0.0479649454 0.18801184 -0.13502112 0.139153555 -0.0434509926 0.146485865 0.113608092 0.0014678 -0.00269440887 -0.0458229147 0.10888987 0.0207153317 0.182204321 -0.0768271685 -0.0549781434 -0.142413139 -0.0730496719 0.179332584 0.0324325487 -0.133773685 -0.0963223055 0.0017872625 0.0347659923 0.125833228 0.0962186158 0.0585900992 0.135499209 0.1581707 -0.0280255843 0.0394914299 -0.136502922 -0.0513676554 0.0243021026 0.13252829 0.0698634982 0.016627552 -0.0407816991 0.0601785071 0.0856543556 -0.0628332347 0.117408261 0.100019909 -0.0541871078 -0.0604645647 -0.10079059 0.129933119 0.0221414883 0.0313292705 0.0785076022 0.0515565313 0.103933319 0.130206779 0.0966112837 0.0613389611 -0.0399818122 0.185782522 0.0632089376 0.203524143 -0.00186416006 0.0119111966 0.175457805 0.0410548635 0.132948205 0.145901531 0.0722996518 -0.0906703025 -0.0737293884 0.114364773 -0.0281930994 0.0720122755 -0.008361076 0.090595901 -0.0460113436 0.0419355966 -0.0804221854 0.0608950555 0.170809716 -0.0476390161 0.169650152 0.0800267011 -0.148617357 0.0836865678 -0.0557933412 -0.0582912862 -0.0435073562 -0.0371096209 0.083936885 0.141528875 -0.0368124209 0.162937 0.182462409 0.0500146635 -0.0173346996 0.00344588282 0.077063866 0.0220477413 0.0806412846 -0.00341189606 -0.101663046 0.0451156981 0.0767283887 0.0141340708 0.039356716 -0.0705280155 0.0963134021 0.0619241111 0.0269989092 -0.021566296 0.145353943 0.0327979699 -0.0733648017 -0.0134483287 -0.0573412068 0.0651545897 0.0168404263 0.0243993159 0.0994131193 -0.0538567007 0.00572972698 0.0229900386 -0.0910499841 -0.105468161 0.0651908889 -0.0938435644 0.0910287648 0.0970317647 -0.0301273968 0.131670371 -0.035016574 0.0178018566 -0.117108293 -0.104218014 -0.0651276186 -0.0432341956 -0.0111995684 -0.113138527 -0.0553993024 -0.0827366859 0.111415848 0.0760915801 -0.036721129 0.0393031836 -0.107385024 0.0509901345 0.137131959 0.101355053 -0.0619110428 -0.0586683974 -0.125746161 0.146015525 0.0182545464 0.101042837 -0.103411034 0.141968071 -0.110162877 0.0819647014 0.151268393 0.181896359 -0.0776448846 0.158234477 -0.0236076955 0.162451014 -0.0745234191 -0.0891344696 -0.0165763423 0.0465359874 0.164738223 -0.0147009594 0.136529386 0.119493932 0.123288825 -0.0328544565 -0.0433127023 0.142125174 0.104210556 0.165809229 -0.0778093338 -0.0373858176 -0.0823435411 0.0155735873 0.103326514 -0.05250616 0.0069106333 -0.0459599681 0.0475454628 -0.0535901822 -0.0878656879 0.188019454 0.0936229303 0.00049323542 0.111949839 0.101932622 0.111278057 0.00650064787 -0.132303327 0.154887334 -0.0368106291 0.185657039 0.078358531 0.0830566436 0.000230199876 0.1663609 -0.110823177 0.0969691128 -0.0866530016 -0.0828108639 0.063297838 0.0365174599 -0.0799051747 -0.00249398779 0.0037801282 0.175893486 -0.0303821024 0.17547828 0.156276211 -0.0808659643 0.0175747536 0.0641925558 0.132568434 -0.107452586 0.0114268949 -0.0109794568 0.105085135 -0.00249309023 -0.105099067 0.11644727 0.00391933694 0.0913905948 0.195951268 0.0354405977 -0.00441903574 0.0888798311 0.0769788325 0.0180195644 0.0349424444 0.00618674606 0.0450688228 0.0371989682 -0.0904219598 0.0357578248 -0.0248050801 -0.019140562 -0.0610508397 0.0400745049 -0.0465866067 -0.0127383219 0.136099428 -0.00988076627 -0.0224271286 0.138736099 -0.0945356563 -0.0887529403 0.00517961383 0.000571548939 0.0760833472 -0.0421531834 0.107855894 -0.12219803 -0.120920695 0.010690853 0.00134639442 -0.0446796417 -0.0908018351 0.0188111514 -0.117398165 -0.00559857022 -0.112447143 0.0241752416 0.051861912 -0.0751599744 0.0727101266 -0.0994263515 -0.116854861 -0.109606072 0.0153764635 0.0296985656 -0.094453536 -0.123156615 -0.0982722938 0.102426931 -0.038060952 -0.127354875 -0.0424764156 -0.141495243 -0.136656374 -0.0849142522 0.100465015 0.0261948798 -0.0149132377 -0.095423542 0.0603073835 0.0239272416 0.0944917873 0.0912092179 0.0132168755 -0.0580653921 0.0647564083 0.0321055204 0.0641190782 0.107408777 -0.0238600653 0.0974822938 -0.151330233 0.0900393799 0.185146719 -0.0562634654 0.19377704 0.0260389671 0.149799034 0.0271496754 -0.0140250009 0.0241740346 0.0609554648 0.0848416314 0.0207572728 -0.107217379 -0.0550570227 0.0541072674 -0.0474922284 -0.0888904482 0.0580282025 0.0328076519 -0.00170895853 0.0491873212 0.090218015 -0.0734803379 -0.0112014636 0.150287092 0.16178152 -0.0178813841 -0.030176945 0.175077632 -0.0382624194 -0.0120699406 0.0354120433 0.0163750257 0.116489731 0.138228595 -0.00104773929 0.02116061 0.195760205 -0.113974452 0.204999462 -0.00420999806 0.0197493862 0.140327349 0.145642623 0.113952592 0.0668037087 0.144647643 0.138338432 0.0301354099 -0.092116423 0.141155869 -0.0757502168 -0.0111120678 -0.019430887 -0.132206604 0.0304258037 -0.00902231503 0.023573963 -0.11457108 -0.0038465804 0.0600269213 0.185435995 0.103536278 0.108110771 0.0262743887 0.092287004 0.0180775113 0.0580065064 0.109430514 0.167516813 -0.0948597863 0.147749871 -0.0377445519 -0.16559723 0.103454545 -0.0619672574 -0.0660705566 -0.0222212803 -0.00183966081 -0.0677803308 0.0315424129 -0.00417117588 -0.144289106 -0.0828239396 -0.146710843 0.0357453451 0.00331253489 0.0235776883 0.0018393771 -0.0240897052 0.112991959 -0.097301051 -0.0531368554 0.102575697 0.224759638 -0.100583948 0.027663447 0.0663552508 -0.0544704907 0.0913643613 0.10431046 0.14408429 0.160526797 -0.0272651091 0.128108725 0.137256622 0.0514451601 0.0290343836 0.0522942841 0.169628382 0.0517538302 0.039717 -0.112903044 -0.0319129899 0.142312348 0.16764465 0.00277794432 0.155595258 -0.016347399 0.0998492464 -0.0829867125 0.0122846849 0.10995502 0.176780567 0.166116044 -0.0651847348 0.0968866721 0.0796400309 0.156421289 0.00979311764 -0.111015052 0.100035012 0.198834509 0.104847461 0.0955422893 0.0701622218 -0.00574288098 0.0388571136 0.0862576142 0.0804817602 0.0700528994 0.0890722573 -0.00526280887 0.0880217403 0.108766705 0.0439562909 -0.136678606 -0.017526824 -0.101755708 0.150479943 -0.0441651195 0.0611818954 -0.010830123 -0.0615075193 -0.0789036453 -0.0960501432 -0.0448041894 -0.114129215 0.157983571 0.0660151616 0.00131378241 -0.0953527689 0.0812098756 0.0714970827 -0.0841728672 0.0815933347 -0.0241262466 -0.0606837049 0.117682979 0.170510948 0.0609742589 -0.0866294503 0.0337947756 0.0836874992 0.128505945 0.0142021542 0.00716301799 0.073032476 0.096828863 0.0873111039 -0.0305738319 -0.017774554 -0.0532108061 0.180189192 0.0185202211 -0.0507842451 0.100284688 0.0385177433 0.0343939774 -0.151525408 0.161509618 0.0328486934 0.0315718576 -0.0216901544 0.011822544 0.0440483205 0.123011395 -0.0785683393 -0.0336200632 -0.0159502272 0.148681283 -0.0669046566 0.185322538 -0.0733356997 0.0739779621 0.0648668483 -0.0301269554 0.156339601 0.0585463084 0.155070648 0.18907924 0.0647668913 0.013078318 0.167015359 -0.0203347579 0.134109989 0.110318691 0.0168762747 -0.00621265173 -0.0438371375 0.0293022711 0.0136860888 -0.0961167067 0.131283402 0.0199183244 0.0874097347 0.109514065 -0.0711590275 0.13801989 -0.100633904 -0.0766485333 -0.0797629207 0.120990887 0.117599219 -0.0800174996 -0.0554481633 -0.0130264247 -0.0096846018 0.0329468772 0.200460345 -0.0798182935 -0.127444193 0.137921482 0.056331329 0.0758561566 -0.0259927046 -0.00583240716 -0.151809007 -0.0616948605 0.0165051967 -0.106223613 -0.107458085 0.0237796139 -0.133243531 -0.0831126943 -0.012560742 0.0367795378 0.00879683718 -0.121515289 0.0290033501 -0.0651801005 -0.0471335575 -0.00044152551 0.112791196 -0.156063318 0.0274669975 -0.00387126207 -0.142437324 -0.133588076 -0.0751931593 -0.0703300163 -0.0889332145 0.0192210358 -0.0770214796 -0.142161205 0.118518829 0.015532054 0.00297008874 0.00619109394 -0.0286132246 0.067848444 0.0446564294 -0.12528789 0.0658862889 0.142027885 -0.0245133974 -0.0243382379 0.0615522414 -0.131100833 0.0117956251 -0.144661099 0.153534442 -0.00748422509 -0.101551078 0.140787482 0.120413505 0.133537158 0.109931737 -0.076232776 -0.0067446162 -0.105740324 -0.0634061843 0.0939473137 0.119690232 -0.0357088707 0.0102475164 0.150871128 -0.000345803623 0.111536011 0.0299190637 0.191872507 -0.00425557932 0.0131858671 0.0683450401 0.011605869 0.0222013909 0.0556304455 -0.0517201163 0.161248505 0.0784498453 0.171983451 0.119539365 -0.0555509515 0.0169317685 0.00468148896 0.0350351445 -0.143832296 -0.123316839 -0.060894113 -0.00792651903 0.165182695 0.13920185 -0.0278753694 -0.0646031126 0.0390878469 0.103839591 -0.0264649615 0.159046769 0.161050528 0.056850709 0.0216505565 -0.019877946 0.0416690223 0.0680721179 -0.00811236072 0.0474281274 0.139705688 0.128775299 -0.134791732 -0.0200266857 -0.0365998596 -0.0124936523 -0.0767151639 -0.042266313 -0.0712475628 -0.0536471978 0.133768514 -0.0192902926 0.105865858 0.113762073 0.104997188 0.0901620463 0.066951476 -0.0920727104 0.150316 -0.115942262 -0.0646094009 0.051550284 0.106786288 0.0553277843 -0.135059014 -0.0844271183 -0.093783997 0.14749904 -0.0715771541 0.104918532 0.169773012 0.030166015 0.0254033525 0.0341539346 -0.0932782665 0.0505385213 -0.13566044 0.172710717 0.181072846 0.0247942675 -0.0522602275 -0.0928869545 -0.0629897714 -0.00787132327 0.160656855 0.119059108 -0.0577676259 0.130579263 0.103787817 -0.0639968142 0.0237170234 -0.0796101764 0.0789383575 -0.11092788 0.0240584183 0.0880425051 0.0497003458 0.0207255501 0.0609250851 0.121555626 0.0423985943 0.169498548 -0.168339416 -0.0737465993 0.00344401528 -0.0481818803 -0.0240785405 0.138308004 -0.0498832725 -0.0870527998 0.135833338 0.0367706791 0.164695784 -0.0926531628 -0.0138947945 0.0515966341 -0.124790356 0.160091609 -0.08937978 0.0392833501 -0.0552154407 -0.0162713174 -0.0258723479 -0.065008454 0.0126740728 0.136108771 0.112826265 -0.0117993969 -0.0383974053 0.13958928 -0.111128941 0.0530200005 0.0264452137 -0.00290334155 -0.0446272232 0.061192058 0.175629675 0.0535970144 0.0791243389 -0.144284248 0.161759198 -0.0264586899 0.170231506 0.0360257179 0.0236983728 -0.0918620229 -0.124832675 -0.129897267 0.0827946812 0.167229131 -0.0483314805 0.0731398612 0.0330644958 -0.0419231206 0.147590339 0.120546743 0.0866150856 -0.0558574684 0.14509137 0.112841494 0.010361298 -0.0738257468 0.062864013 -0.000246174692 -0.0360012166 0.172877163 0.117604062 -0.143552348 0.169368088 0.0966829956 -0.0905596018 0.0989860147 0.143281475 0.0763563141 0.137610212 0.122151025 -0.139252588 0.0253664367 -0.0899616033 -0.0669621378 0.173054621 -0.0460386537 0.0831045434 0.136806592 0.134621754 -0.0229169969 -0.00980438758 0.0452408046 0.0591817014 0.186792865 0.00453559728 -0.0630519763 0.129970819 -0.0708865598 0.0169868432 -0.0856622308 -0.0697054416 -0.00249436265 -0.00108185853 -0.0849266797 -0.0896446258 0.205029503 0.0984538794 0.0920003206 0.0979186818 -0.0760004744 0.0389556028 0.154188663 0.0658197105 -0.0182720162 0.134750709 0.0715288147 0.17903395 -0.133035272 0.200460136 -0.0744331852 -0.0414500348 0.0895937532 0.0214252006 -0.022997003 -0.0672739893 -0.0548784323 0.166354895 -0.0431604087 0.165455922 -0.124262832 0.113898836 -0.166768521 0.00970084779 -0.0567515977 0.0607765876 0.183420077 -0.0727137551 -0.0270099547 0.0259942077 0.00637345994 0.0362093039 0.0634940416 0.22586067 0.0804543719 0.177989498 0.168853745 -0.0725347623 -0.130849689 0.14897649 0.0990756676 0.105376959 0.0459880121 0.037299931 0.0371170193 0.124092944 0.026399713 0.214453608 0.165884897 -0.0445454419 -0.02944877 -0.0441051386 0.0706486255 -0.0643619671 0.107751079 0.0106587159 -0.0160649233 0.0853765532 0.0439129174 -0.0179767329 -0.0966302827 0.153880417 0.158972874 -0.0232971646 0.109733656 0.118792728 0.111737549 -0.0411141589 0.128475308 0.177355379 0.088045463 0.0918510482 -0.0182551499 0.0349350236 0.0172623489 0.0358161516 -0.0553316772 0.00500165345 0.0473173968 0.0152386809 -0.102430955 -0.0801292434 0.142120838 0.182653144 0.0498294421 -0.0422114469 0.0125372913 0.165790632 -0.0877576023 0.0534564219 -0.0601202659 0.04567682 0.0359176025 -0.0493109711 -0.0345178694 0.0486616641 0.179762542 -0.0616127439 0.142689958 0.173905298 0.13620089 0.0958447605 0.0267633125 0.0164805949 0.0387719236 -0.0923323482 -0.025394721 -0.0696693659 -0.0885034949 0.155368611 0.175244749 0.108128108 0.0818990022 0.146583825 0.0607022159 -0.0263073556 0.0531130992 0.0492566414 0.0426749587 0.128532976 0.165642813 -0.0271078423 -0.0249717701 -0.00465310086 0.0946793407 -0.0277413856 -0.0293258466 0.153490797 0.0542890653 -0.121490426 0.0403553173 -0.0457720421 0.0372848473 0.1336312 -0.0569381975 0.0157627482 0.183314934 -0.098882556 0.0972879678 0.133379266 -0.13416934 0.127192289 0.114743538 -0.145406723 -0.141376868 -0.00748612825 -0.0768275931 -0.00242518331 0.0610179976 0.11269661 0.0289160293 0.134316772 0.0604642555 -0.0706829354 0.0602234714 -0.00745525956 0.128726706 0.02637784 0.0765895173 -0.100985415 -0.103230231 0.0484438539 -0.0102437539 0.117040537 0.130066067 0.0934641883 0.035608504 0.080431819 -0.0183086582 -0.0162367485 -0.000762896263 0.091369085 0.0433430262 -0.0226776432 0.0331841335 -0.0515708551 -0.138115823 0.111762553 -0.00960157253 0.0194415804 0.0162233952 0.0687385723 0.00495963311 0.124809526 -0.0617128015 0.128910005 0.124350287 0.123989262 -0.0698941946 -0.0828819647 -0.051271636 0.108241625 0.090747878 -0.0240470748 -0.0150890118 0.159876198 0.111201644 -0.107370481 0.041435346 0.0879196003 -0.073871471 -0.00736038294 -0.0421624519 -0.00446702167 0.0206496771 -0.0786093399 0.00770913251 0.0940739587 -0.143417105 0.161424622 -0.00790184364 -0.0727001727 0.042316515 0.141635984 0.168245554 -0.0294214915 -0.159648478 0.112373084 -0.0489424169 -0.0889650211 -0.136385739 0.0504631549 -0.0346960463 0.0639858767 0.0742279962 -0.0959718451 0.125432774 0.0435161628 0.0543604121 0.12924619 -0.067039676 0.0832744464 -0.119794376 -0.0892888829 0.144032732 -0.13564758 0.0327132791 0.0262426939 0.0729919598 -0.0233827997 0.0430958606 0.103070885 -0.00284027657 -0.131751791 0.0218737386 -0.0669141933 0.0876880065 0.108192131 -0.00722055649 0.0314042829 0.00201363396 0.0693058148 -0.127397463 -0.11308068 0.095366247 0.0613252074 0.0767963976 0.109912105 -0.023631271 9.64457431e-05 0.0741448328 0.0674567968 0.0592180379 0.120218024 -0.138540611 0.186128601 0.0634339973 -0.066954501 0.123150513 0.00262892642 -0.156880677 0.0271030273 0.0389812775 0.163095251 0.0937159061 -0.119892217 -0.10029912 -0.0113538243 0.117122836 0.0958641991 0.0554464087 -0.0611852631 0.0309960768 -0.148533225 0.0168368462 -0.0148341283 0.0199822951 0.0840069354 -0.0250799228 0.000891973905 -0.110700309 -0.0532766916 0.0793971419 0.0984170437 -0.128924787 0.0291142873 -0.0578225479 0.0482807197 0.0368235111 -0.0756311119 0.056336727 0.168977603 0.00820702594 -0.0148615483 0.128323391 -0.0488858111 0.0030394888 0.0392629169 0.0217599515 -0.109780788 -0.124327026 -0.110027082 0.079080537 -0.0519953929 -0.0504782349 -0.00701974798 0.17692171 0.143427163 0.152141586 0.0693683028 -0.115196042 -0.0221416522 0.169055194 -0.0540919825 0.182789385 0.139691234 -0.074015893 0.108184151 -0.0935382247 -0.0598740615 0.0335229784 0.150850862 0.111152209 0.163470238 -0.111606114 -0.0112746516 0.00895981397 0.146160573 0.137397975 0.165998906 -0.0262579694 0.0310111959 0.0895001888 0.0290670171 0.148392752 0.10005831 0.0260470044 0.0666432157 0.14119634 0.145734191 0.140673295 0.0609008037 0.0760866255 -0.0274666939 -0.0857639909 -0.093561694 -0.0673863441 0.06305594 0.183098152 0.0362807289 0.179879576 0.187368259 0.0867011249 -0.00191641552 0.179400802 -0.178453162 0.0443644077 0.14893277 0.130691677 0.190664202 -0.00733991154 0.0375372507 0.139617547 0.0207137242 -0.0660620481 0.121428333 -0.027175935 0.0812105387 -0.0167020112 0.0997308716 0.0814295784 0.00100216595 0.0675137788 -0.0446306355 0.11655578 0.0790163651 -0.0364569351 0.043299146 0.0583344959 0.0861434639 0.0999846533 -0.101646118 0.0544962138 0.111912884 0.13398391 0.192645401 -0.0995487198 0.172990069 0.0454724953 -0.0182189811 -0.0639074966 -0.117950983 0.158961445 0.0991295949 0.00396099035 -0.120374672 0.168334991 -0.0206933524 0.0865943655 0.161322176 0.0555427149 -0.0746511817 -0.0254731867 0.148725659 0.114840917 -0.0996649787 0.0825758129 0.00449527614 0.162873149 0.14936614 0.156525835 0.0766895339 0.118420944 -0.0548048988 -0.167635486 0.0756825805 0.059926942 0.0492656752 0.0400654711 0.0896347836 0.0765077025 -0.0438671187 -0.146087736 0.117657624 -0.0255973134 -0.11745102 0.0930163413 -0.0821457729 -0.115750015 0.0327894762 0.120342232 -0.0219539329 0.190586492 0.0217166767 0.0146391429 0.0616531707 0.103957534 -0.0640848204 -0.0858041495 0.0310945753 -0.102986038 -0.0559266806 -0.000226511125 -0.140096694 -0.119943008 -0.111525618 -0.022964308 0.0660581961 -0.140670016 -0.0539666936 0.0656664073 -0.146068677 -0.0434579179 0.0442539938 0.049366042 0.138595164 0.215925127 0.127935782 0.00820590742 -0.0747593045 -0.0842378289 -0.0417899489 0.0216342304 0.0241500065 0.10414844 0.0635119677 0.109194174 -0.0664032325 -0.0841010138 0.0333074108 0.144626364 0.0835791081 0.0405562595 0.144938678 0.113412 -0.0413297117 -0.0116025591 0.116451755 -0.122508198 0.115656048 0.118746422 0.149316311 -0.0756765008 0.162036806 0.136063144 0.0960770398 0.0914931074 0.00234524277 0.141637772 0.0776848495 -0.103856735 0.0223964415 0.0540647469 0.154280543 0.0363733396 0.020892188 0.0519513749 0.00717404438 -0.0717171952 0.0605637506 0.123303227 -0.0711054057 -0.0547375344 0.0238987729 0.122411825 0.0981374756 -0.0796336755 0.181836978 -0.0139241079 0.0208457373 0.0578660555 0.115007117 -0.152290791 -0.116023742 0.125889778 -0.0744427964 0.173140392 -0.0230522808 0.0991717577 0.0317968801 0.107756197 0.164815009 -0.0174482651 0.0639693215 0.0523474552 0.0462639593 0.140508741 0.0506025292 -0.0438129827 0.0276643373 0.0874049738 0.138593227 -0.0969195291 -0.0404044203 -0.0224188063 0.108781926 -0.100896388 -0.0309308395 0.125772789 0.028097406 0.0634060204 0.0683069155 0.145577833 0.183691531 -0.0496028848 -0.0776938125 -0.060156174 0.0218149051 0.197596177 -0.0154853165 -0.0412122235 0.0939057693 -0.118166968 0.1283319 0.111681804 -0.143919662 0.00493258471 0.126385331 0.151842475 0.186866894 0.0665669069 -0.0903968439 -0.0313272439 -0.0247976556 -0.169458717 0.054224968 0.0968870521 0.139724314 0.0172182582 0.103909202 0.155110002 -0.0126233418 0.190851286 -0.0473378785 -0.106794864 0.0950474441 0.108153269 -0.00215253839 0.0927259997 0.0746136159 0.000158840179 0.151475027 0.112737246 0.0532431304 0.054269813 0.129630253 0.164428711 0.0523424074 0.0814517438 0.016212143 -0.0117008882 0.0680367425 -0.0788285807 -0.0789092779 0.172827527 0.119092286 -0.0780554339 0.148280904 -0.0357619487 0.0404880531 0.139303714 0.152678803 -0.0313055441 -0.0433851704 0.145161822 -0.157154232 0.0209975056 -0.0296302848 0.020536093 0.0674732029 -0.0705216452 -0.0414924286 0.057908535 -0.169467628 -0.0330408588 0.182830229 0.0190448835 0.13370271 -0.0713856667 -0.0501033887 0.0232970063 -0.0963442922 0.0447021201 -0.118378267 0.189147756 -0.0498844683 0.0468240269 0.00958443806 -0.109032102 0.108971842 0.00711469864 0.00700109964 0.112477995 -0.138462275 -0.114364117 -0.0077861608 -0.143087372 -0.0425114706 -0.137003303 0.00309556606 -0.056427639 -0.084226869 -0.0175813958 -0.120090812 -0.131012186 -0.136584803 -0.0965648219 -0.0900525004 0.0984451473 -0.0295922905 -0.0215709601 -0.157052785 -0.0698363632 0.128503025 0.0812449306 0.044423122 -0.0622849166 0.126199692 -0.0240686592 0.110058717 0.0588081293 -0.0629438162 0.0803009868 0.0551974401 0.00367958308 0.0953964517 0.096288465 0.0765576512 0.134714395 0.128183305 -0.0794131979 0.158218175 0.0334427804 0.103117377 0.118011713 -0.0353304408 0.0812821984 -0.0924249962 -0.0943661332 0.0499824919 0.172689974 -0.015012878 -0.00591862109 0.000791038619 0.0393676013 -0.087351352 -0.00550199067 -0.131391063 0.110083923 -0.0942325443 0.151406854 -0.0441909246 0.125668615 0.113547325 0.104156129 0.0539508313 0.04261766 0.0514821857 0.120158657 0.157996073 0.00543851778 0.145554841 -0.141709834 -0.0490901694 -0.0447609834 -0.0976633877 -0.0905513093 0.142786831 0.134974882 -0.0384936519 0.124396443 0.0711318851 0.0364370346 0.0655808076 -0.111105889 0.177323133 0.0175751373 -0.00530883716 0.0142328804 0.0944742784 -0.104841068 0.0137548354 0.072433494 0.0684480369 -0.0881074294 -0.0346771851 0.153584346 0.127561867 -0.036190562 0.0799537897 -0.0859517306 -0.0208766013 0.0334149264 0.0264191292 0.0374499112 0.142725974 0.0713385791 -0.0855041817 -0.0318115205 -0.070518069 0.0307250991 0.13150534 -0.072534509 -0.104207613 0.189150855 0.0493283272 -0.0275112167 0.166747719 -0.0543703809 0.0186180789 0.165644959 -0.0484347753 -0.101127744 0.1094006 0.185057849 -0.0443641394 -0.144589871 -0.0198352784 0.0630682558 0.152583927 0.0218491945 -0.122168265 0.0245404653 0.0799318552 0.0951262489 0.122852422 0.0709591582 0.147164628 0.0161824599 0.137200028 0.0640827939 -0.0755483732 0.0245481338 -0.0100152371 0.14329806 0.0801420659 0.0691645741 -0.0589840487 0.130164921 0.00311033521 0.0268984325 -0.0349549092 -0.00415431196 0.0867199227 -0.0820708722 0.164873272 -0.169460997 -0.0531368032 0.152747095 -0.165769219 -0.0328237601 0.208387405 0.047868643 -0.137711033 0.091696687 0.0407248922 -0.0470836647 0.145012021 0.0215788931 -0.0970180035 -0.0877238438 0.172740042 -0.0380049273 0.0389408693 -0.0466512367 -0.0354783237 0.0639048144 -0.0723189265 0.104144089 0.0859282464 0.13152495 -0.128844962 -0.0214285571 0.115861677 -0.0982220173 -0.0596799552 -0.146179408 0.0965073407 -0.0615618378 0.0918795392 -0.0798900872 -0.160657704 -0.126372367 -0.0579259321 0.0660004467 0.161658168 0.104445606 -0.0138723087 -0.0934201628 -0.0294323321 0.00640385225 -0.0297847576 0.00359103805 0.0525301509 0.043393068 0.157615259 0.180452317 0.194085628 0.0691384748 0.134813935 -0.0515557639 -0.0708269849 0.088720344 0.0964737907 0.155151665 -0.0886232555 0.0660808086 -0.0829776451 0.101593263 -0.0553871915 -0.0182833746 -0.0800305083 0.178971991 -0.118555464 0.000954513147 0.00644796155 -0.0530595593 0.0575908013 -0.118949205 0.0164483711 -0.0739891082 0.157318026 0.0651573166 -0.0650493428 -0.0733206868 -0.0576313995 0.164077073 0.145226628 0.162652925 0.0094029773 -0.0282768738 0.00458042137 -0.0341153368 0.0680479035 0.00607198244 -0.00773193361 0.0329170078 0.0389629118 -0.162258938 0.0433447473 0.04725869 0.0874198675 0.0951827168 0.171968609 -0.0408742838 0.0696654394 0.0726477876 -0.0282292832 0.13275665 -0.0873321742 0.118847266 -0.094043538 -0.00783028547 -0.110805973 0.0371340886 0.011893562 0.0594444014 -0.0330062397 0.103927121 0.0566597134 0.070883058 -0.0379756056 -0.0413067117 -0.0425783545 0.111014336 0.0684353039 0.0692486465 0.0570905209 -0.0403401703 -0.0371561572 0.0209832303 0.115475081 -0.0771651715 0.1641756 0.132279456 0.11953865 0.152888119 0.161066189 0.0236473735 0.00623266771 0.0192979313 -0.0633362159 -0.0164176226 0.112811953 -0.0552775189 0.104069315 -0.00800814759 0.142864808 -0.119347326 0.0983854905 0.0419207662 0.172624946 0.13796024 0.14337796 -0.0860052034 0.0162426792 0.189442113 0.152368501 -0.0791355148 0.191711664 -0.045804102 0.0885845646 -0.0440708026 0.0840106755 0.142962575 -0.109646082 0.00183130568 -0.0525661372 -0.063833341 0.135730505 -0.0724909231 -0.0305184722 -0.0249376651 0.121382438 -0.053534802 0.0984386578 -0.0753171369 0.0939152837 0.00636771461 -0.049575828 -0.0524887219 0.168064952 -0.160115361 0.164482102 0.0465373471 -0.0193462316 0.189078987 -0.0162147954 0.0448620357 -0.0965996385 0.029319942 -0.00714721577 0.113784157 0.0306320339 -0.162794113 0.036773555 0.132374078 0.127610669 0.050170999 -0.0961276665 0.119895853 0.139223352 0.0842405856 -0.0275717005 -0.048406817 0.0329983979 0.0270373188 0.0343525745 0.143185422 0.126393601 0.117207043 -0.111426808 0.00335491286 -0.0176657494 0.00746619329 0.0158684719 0.0560003184 0.162911817 0.169606015 -0.00238073198 0.110828638 0.170307085 -0.00346783875 0.0999374315 0.120749101 -0.0795692578 0.00236885715 -0.0515253469 0.150424793 0.105336741 0.0109604793 0.173142359 -0.0780003294 0.0945810005 -0.00646437472 0.180615485 0.165021613 0.0602739379 0.187165871 0.0765264705 0.127765179 -0.176980063 0.0931098312 -0.0693295747 0.00362776732 0.0865088329 -0.122369155 -0.0243129283 0.0619697198 -0.104171418 -0.0679299384 -0.0262254607 0.0717521831 0.177063763 -0.0804491788 0.0471047014 0.130668938 0.0129235433 -0.044484172 -0.110434927 -0.0484529473 0.056467887 0.161255106 0.0596067756 0.00202068407 -0.0361751877 -0.0691773742 0.13520807 -0.076868318 -0.00055724883 0.136181444 -0.144539505 0.0790223703 0.104204692 0.113801822 0.079020001 -0.00296835252 -0.0561508648 -0.0639912412 -0.096902214 0.063912116 0.0158059336 0.0301315952 -0.0454157777 0.0474643707 -0.0998102129 0.0738191977 0.104636416 -0.0670538545 -0.0157381035 0.0721107796 0.145137876 -0.0157069545 0.0764504448 -0.102792904 -0.0242816862 -0.0148483599 0.164760023 0.0949771851 0.0556184649 0.159742668 0.11568401 0.133465067 0.0253654663 0.0464388952 0.1051047 0.168289691 0.0721212029 0.0222112965 0.177081063 0.0332994349 -0.0857983083 0.168268591 0.0322159566 0.109546766 -0.0359725878 0.173063204 0.114196211 -0.0472102724 -0.0865413472 0.0990323052 0.106711067 0.124012247 -0.0643537641 -0.0489022098 0.0309291538 -0.139787465 0.154954955 0.158387884 0.175662607 0.0474990308 -0.0359640867 0.0122991987 -0.0742847919 -0.116437078 -0.022644069 0.0906722546 0.109451734 -0.0519334488 0.0178270023 0.166177243 -0.0722740144 -0.00594325503 -0.054272633 -0.142582893 0.0954159126 -0.00533125736 0.0493725352 -0.146273687 -0.00553551223 0.126585066 0.078031756 0.0408783071 0.0403124169 0.0905888006 -0.0999932885 0.0324288867 -0.048167184 0.048798237 0.0910838693 0.010453077 0.0368673541 0.126243964 -0.10025917 0.119546175 0.0917273164 0.00480829086 0.150867537 0.0249657575 0.0332049243 0.0880667567 -0.0586667955 0.072079584 0.175612509 0.0517335869 0.0897404104 0.0503287949 -0.133238509 0.0293708127 -0.0389682427 -0.14583306 -0.0541342646 -0.145008922 -0.00405186322 -0.0991181433 0.149902388 -0.079027079 0.13217856 0.152729511 -0.00680424459 0.0569330305 0.0193487108 -0.0548162982 0.0513189137 -0.0619835034 0.0174638182 -0.102416456 -0.0279327556 -0.129593804 -0.0595460832 -0.0616615489 -0.0346394479 0.108837441 0.0645946115 -0.11461664 -0.00987040997 -0.0194515288 -0.0440613478 -0.0762307048 0.113881603 0.0807152838 -0.0215207562 0.053498432 0.00671930611 -0.0759949684 0.078516528 -0.129376277 0.00994156301 -0.0961488858 -0.0222117975 -0.0067448318 0.109046414 -0.118271597 -0.0475344136 -0.00401996076 -0.0324587896 0.111761943 0.0669000298 -0.140469015 0.0275926143 -0.0115185082 -0.111927435 -0.131411597 0.0218255222 0.0812726617 -0.0837273151 0.0129608214 0.133702025 0.0800562501 -0.0325852484 0.0149642564 -0.0189071596 -0.173735201 0.0099428352 -0.00971476547 0.0206104293 -0.113343984 -0.107871518 -0.120784573 -0.0340123661 0.12200997 0.0924243927 0.134061486 0.00931480248 0.00610988587 -0.143206105 -0.013435632 0.157742649 0.0986237824 0.031523902 -0.109645322 0.101808242 -0.0647404119 -0.0524963662 0.0176734496 0.0557880327 0.108553298 -0.0902532712 -0.130619377 0.0632717982 0.104041591 0.0543672703 0.0434634201 0.147163749 -0.114743508 0.158163086 0.147472963 -0.0521723554 -0.100233488 -0.0301290527 0.022240812 -0.0719027072 -0.0280963797 0.0667710602 0.00854949374 -0.0772623569 0.0685823038 0.00593935698 -0.0318717696 0.0944193527 0.0372171178 0.0526549183 -0.101998597 -0.0764912069 -0.118265085 -0.0155763114 0.0363124497 0.0786181912 0.0214089006 0.00902846642 -0.0222423617 -0.0114359492 0.016868338 0.0275472291 -0.0955874622 0.068063274 0.0991849825 -0.129994661 -0.14134939 -0.0891924128 0.0620854646 -0.147031859 -0.0616799332 0.125663459 0.0466651432 0.049083516 0.0202748105 -0.0635501817 -0.0811214596 0.0342678167 -0.144643277 -0.0279157292 -0.0520310104 -0.0900295675 -0.0991411358 -0.00983341318 0.0775161907 -0.108855121 0.0795320719 0.0555958673 0.047831919 0.116747767 -0.0178907923 0.00232720398 0.0800028816 0.0948506668 0.109556422 -0.139458165 -0.0708387718 -0.0218770187 0.0945127904 -0.15837191 -0.0499448627 -0.148272514 -0.0720821992 0.0286393929 0.166063771 -0.0910914093 0.0242926553 -0.12178494 0.0714246258 0.0331623964 0.141669735 -0.0356313661 -0.113949567 -0.107093729 0.0499410294 0.178825215 0.0778585151 -0.0158302784 -0.104186572 0.0341806933 0.00881133415 -0.0617542751 -0.136974439 0.141358063 0.00945444964 -0.0606168583 -0.0930097848 0.191351295 -0.0328337252 0.0643470958 -0.0714427084 -0.0224459022 -0.0216223132 0.0666145608 0.158240885 0.197071999 -0.00105335366 -0.130007252 -0.105515987 0.138388366 -0.137579709 0.114597313 0.189100042 0.142671525 0.17301853 -0.134293392 -0.0512899421 0.0793258399 -0.0749241337 -0.0476800092 -0.0704626963 0.188314125 -0.0592299625 0.13017118 -0.101025827 -0.0467180312 0.0082515683 -0.112287328 -0.062346559 0.013593995 0.0616131909 -0.078616567 0.0776763111 0.0905192047 0.0462051481 0.161554873 0.105334468 0.13517189 -0.115146726 -0.143860593 0.0162272323 -0.0732620955 -0.0247567333 0.0928674936 0.115267269 0.0816683248 0.00596335484 0.0484995171 0.0975567997 0.055007495 0.194354102 -0.00516810175 -0.00678860582 0.0775851458 -0.0466337353 0.106064767 0.143327415 0.0536226183 0.0567489788 0.205744937 0.0850102827 0.066885747 0.0559313521 -0.0664127171 0.157319784 0.0911302492 0.168385208 -0.0391344093 -0.0564815253 0.098924838 0.0328915305 -0.0534631759 0.0365355276 -0.0136043811 0.0482157357 -0.00801647455 -0.0435665064 -0.0428646132 -0.0644146577 -0.049035199 -0.0692589357 -0.113157302 0.0250543877 -0.0696002543 -0.0740747377 -0.0703184903 -0.00694498792 -0.0684268475 0.149164468 0.153054193 0.0761677772 -0.0204661116 0.12485972 -0.126783043 -0.00187381369 0.0541003644 0.107983328 -0.0837595835 -0.0870729461 0.0846110657 -0.0927637219 0.0266203284 -0.00495085446 0.0371818319 0.148703456 0.0929978117 0.00649248715 -0.052705083 -0.00607873127 -0.114258632 0.0115164211 -0.0972977728 0.084398061 -0.103781044 0.042367924 -0.0428892151 0.0323127471 -0.00776143046 -0.0386278778 -0.0703245178 -0.0709099472 0.16123727 -0.0346258432 0.0354511216 0.0405629389 -0.0782804564 -0.038936574 0.0836522579 0.176592737 -0.00653237011 0.0523184314 0.0602646545 0.0936931893 0.161708683 -0.0552729927 0.113780089 0.150944054 -0.0067132581 0.033431489 0.134626687 -0.0877803564 0.0627585277 0.159808293 0.0874273032 -0.0712592527 -0.0547058992 -0.0712833256 -0.0840452388 0.164110661 0.0463254526 0.11971055 -0.0787557662 0.0414926298 -0.0132505866 0.143975362 -0.0445205495 0.0115820579 0.0280404091 -0.0880124941 0.0269049294 -0.151099011 -0.0708077624 0.1746151 -0.0408792794 0.170332685 0.147516906 0.16913189 -0.0805162191 -0.0267642699 0.00414879527 -0.138525307 0.00516474945 -0.0246851854 -0.0470002219 -0.0861195773 -0.100950107 0.0405560918 -0.0432230942 0.104467802 -0.0955900922 -0.0287470184 0.13435027 -0.0651793703 -0.0681400895 -0.105308339 0.105755769 -0.130883738 0.0873673931 -0.0740140676 0.0563389994 0.0325795859 -0.0656638816 0.0513560064 -0.0224221945 -0.117003471 0.0642713904 0.0981090814 -0.0234821606 0.00352106593 0.0429167375 0.0492503606 -0.0412014462 0.00425704801 -0.0755375251 0.0191665534 0.001770781 -0.110787489 -0.1348757 0.106690206 0.00657321559 -0.0908673182 -0.108731627 0.0566626191 0.0443638451 -0.0703660399 0.0542139225 -0.00493319333 0.12348906 0.00126835168 -0.00761680584 -0.0449741632 0.128203496 0.0697475076 0.156427085 0.111904904 0.0363090038 -0.069633007 -0.124383852 0.0528828725 -0.0260626376 0.146264195 0.202404305 0.123526029 0.113593549 0.063587226 0.115930393 -0.0480900295 0.0870323107 -0.072083123 -0.0411015637 0.1239696 -0.0247502401 0.101827934 0.0338473544 0.179488152 0.0185336322 -0.0024687883 0.193507329 0.0594470054 -0.122646861 0.0367143154 0.0131789902 -0.00824107043 -0.0438461341 0.0322048962 -0.0372958966 -0.0309159439 0.112236492 0.104765609 0.0508270562 -0.134375989 0.0461360626 0.161751613 -0.0832926556 0.0294436906 -0.0715111569 0.151263878 -0.165225923 0.0409720019 -0.0970856622 0.118698254 -0.0376353674 0.0590884909 0.172025725 -0.0388665274 0.0342746116 0.0503573269 -0.0705714077 0.143638507 0.0997425094 -0.0244571362 0.126339301 0.00611212337 -0.0767538771 0.135550186 -0.0817491114 0.0428956598 0.053690739 0.0867463723 -0.120110415 0.092980817 0.0624419227 0.171113074 0.071235843 0.0251063108 0.147427261 0.0600847751 0.147185415 0.0782428235 -0.0334974229 0.140428677 -0.0508178994 -0.0417781211 0.096801661 -0.0446150079 -0.0366388001 -0.0328694917 0.111436456 0.0238367319 0.0866102576 -0.151921302 -0.0687064454 -0.104527332 -0.0853670314 -0.000437619659 0.126783535 0.0569791384 -0.0517665707 +tensor_10weight 2500 +-0.0212358683 0.122197703 -0.0510823093 -0.0501324013 -0.0548678078 0.0356177911 -0.00784289744 0.133274257 -0.113695405 -0.0432012156 0.0939747244 0.0988038033 -0.0639443696 0.0290857553 0.0895108432 -0.0702648386 -0.103292271 -0.0736945942 -0.00376112177 0.0998725593 0.104668014 -0.105342008 0.0656864345 -0.114149243 -0.00571362674 0.0216598436 -0.0114243887 -0.0504532829 0.0596787184 0.00372021808 -0.15211682 -0.0136099635 -0.0886892602 0.0599286295 0.111488119 -0.0207552537 0.0917330608 -0.0306493416 -0.0526035354 -0.0849622265 0.101938419 0.0732950419 -0.117307015 0.0721970722 -0.0825721473 0.0530262738 -0.0570005514 -0.14794296 -0.125646636 0.0978401229 -0.0572635084 -0.0405422673 -0.0372838974 -0.0117440075 -0.0372347534 0.05405204 -0.125367016 0.00642648339 0.0431452096 -0.0818922222 0.0333031267 0.0492147207 -0.108358391 0.0211769491 0.0296456665 -0.115399122 0.0998793691 0.0635934472 -0.0597816631 0.135748908 -0.0291152298 -0.0674216747 -0.0520641059 0.13197051 0.112524465 0.0249439776 0.0945808142 -0.104327582 0.131835803 -0.0558281392 -0.104002684 -0.0600294694 -0.0934771448 0.035828352 -0.00607830286 -0.0175107867 -0.126915321 0.10744977 0.0146200657 0.080092743 0.0436066091 -0.0399526656 0.00826710463 0.0102796853 -0.138014555 0.088743791 -0.0517612249 -0.103482887 -0.0803165734 -0.126166776 0.0944030806 0.117681094 0.109592296 -0.0782303661 -0.012482455 0.0995225459 -0.139412105 -0.085186258 0.148377135 0.150794506 0.110053003 0.0709926337 -0.000629723771 0.013662681 0.0823172182 -0.0580728464 0.0406894386 0.0878868401 -0.0696239024 0.180716202 -0.0156155387 0.102927946 -0.0455422476 -0.0267394036 -0.105474576 -0.0847397819 0.0854922086 0.0470506549 0.0965595171 0.127182499 0.137551412 -0.0368003063 -0.0720289052 0.0134783518 0.0268878676 0.0088609159 -0.0511660501 -0.0823307037 0.0519733205 -0.0623468719 0.0798326582 0.0710632354 -0.00685403682 -0.137981296 0.011066772 -0.0105396928 -0.0804577619 0.111978434 0.125334308 0.00390364812 0.0692017078 -0.0330482721 0.163387418 0.0201984197 0.151748836 -0.0597846881 0.0121450806 0.0635938272 0.143580437 0.120768994 -0.0914445743 -0.0753249824 -0.0758429915 -0.0557716116 -0.0532640293 0.0682220832 -0.0222889148 -0.0228032283 -0.0421909615 0.0669203699 0.0878867134 -0.10618075 0.0309686121 0.0602300242 -0.115028903 -0.104819998 0.0828765184 -0.0887905657 -0.128947496 0.0665918365 -0.0184224322 -0.0902371928 0.0826666802 -0.118614137 -0.0974627435 -0.126331478 0.121555597 0.0860794485 0.087571308 0.10830126 0.000543156988 0.0120290369 0.119321413 -0.0516519211 0.0992750004 -0.0404139012 -6.53001553e-05 0.0228800084 0.142584652 -0.112774611 -0.0440897308 -0.101826452 -0.0968946069 0.110168286 0.0165652726 -0.0190366097 -0.00230144663 0.165133551 0.000178731847 0.163377017 0.0187537577 -0.111495733 0.00262274873 0.0370500833 0.111238286 0.0467720181 -0.112312317 0.0970760286 -0.00430823164 -0.18515943 0.0749811605 -0.108619854 -0.115121402 -0.127359807 -0.139737591 0.144899085 0.125063911 -0.0695543438 -0.0871604905 0.0424468778 -0.0412323475 -0.023522187 0.0755847916 -0.00548974751 0.12203481 -0.135008246 0.133453161 -0.0179671869 -0.0897851288 0.0138236731 0.0413174592 -0.130779296 0.0947689861 0.145704255 0.0761682168 0.0180642232 -0.0403898954 0.0231717415 0.0988920107 -0.116821639 -0.000677700795 -0.0828759521 -0.0409976803 -0.0652928352 -0.00307619479 -0.0512737185 0.0509406962 0.164199054 0.0935533643 0.0614940412 0.0182006471 -0.136083275 -0.036729455 -0.0655212551 -0.000610545278 0.110906526 -0.0215685032 0.0942183807 0.091754362 0.0299259573 -0.0927302539 -0.0870193392 0.041432485 -0.0959858447 -0.0521472655 0.133616418 0.00140008167 0.0231243372 -0.0265129507 -0.0465798788 -0.0466384888 -0.115962021 -0.0177416876 0.0583319366 0.0504825823 0.0262723565 -0.00666236039 0.0547430068 0.176730543 0.0766595826 0.0228095483 -0.0677164495 0.142202839 0.0357140489 0.152742878 -0.138932645 -0.0411144495 0.046292562 -0.0618947372 0.129624233 0.0132857962 0.13160333 0.0381516591 -0.00642365264 0.146867096 0.172721684 0.0822038203 -0.170568198 0.104037531 0.0469250493 0.149638385 0.0287801176 -0.105733179 -0.060603328 0.104576632 -0.109430753 -0.0709009469 0.142124668 -0.0206337441 -0.0685040206 -0.00922098476 0.154050708 0.140051425 0.0351035632 -0.049322959 -0.133136809 -0.00690346071 -0.126874417 0.0869078487 0.163344264 0.177283853 0.0478345305 0.0679927543 -0.122830227 -0.0329710469 -0.138243169 -0.12043523 0.018810302 0.0234610289 0.085149698 0.122265451 0.0947110653 0.0231434219 -0.135353088 0.102692701 0.0495786704 0.0994817838 -0.00882655289 0.0960466415 0.139429167 -0.00701248366 -0.0530242138 0.13024801 -0.0270533189 0.0678792298 -0.0942333192 -0.0897237882 -0.116916768 0.128777713 -0.0864267498 0.000630012888 0.0271891207 0.0435388982 0.0202370584 -0.101047307 0.0206810199 0.17194964 0.148503706 0.0857690498 0.161830828 -0.135013863 0.118901089 -0.0623394176 0.144353598 -0.0805446953 -0.166448697 0.0953875184 -0.0034776032 -0.108664006 0.0685736537 -0.0221380815 -0.0657745823 0.00281999423 -0.107203327 -0.0475207977 -0.055595018 -0.110597998 -0.000231489539 -0.0365380459 -0.0790299848 -0.0336767174 0.00941203535 0.0301933419 -0.0706446469 0.0102126878 -0.0478211716 0.0370407067 0.106186956 -0.0224919319 -0.0260020383 0.0235891566 0.0859296694 0.108447783 0.00656368863 -0.0794644728 -0.075159736 -0.110706359 -0.0758301392 0.0576422177 -0.0484935977 0.0564662404 0.137293592 -0.1790566 -0.00773862004 -0.0347555578 0.0215124693 0.0171802938 0.176061988 0.0460711457 0.147571266 -0.102327831 0.195341617 0.058222834 0.13799569 0.118976817 0.0509692021 0.050737951 -0.0811304599 -0.0793619528 0.0404427722 0.166059211 0.0436591581 0.0677934214 -0.103122085 -0.0477355011 0.0969016552 0.0731616244 -0.0612649173 0.0483927317 0.101544008 -0.0105341347 0.0251087993 -0.0852457061 0.0189878102 -0.00559592852 -0.0792983919 0.0710483044 0.0725165606 0.0998317599 -0.00865345914 0.0574824326 0.00925513823 -0.124299236 -0.152690053 -0.0688084438 0.0785533562 0.0506814644 0.147026345 -0.020497581 0.0864086375 0.0827063099 0.0358608812 -0.0127497688 -0.051433742 -0.03434515 -0.128280848 0.0762891397 0.0711952001 0.0842626169 0.116974868 -0.13133204 -0.0335378908 0.0899138004 -0.133157939 -0.120754138 0.122247837 0.144659519 0.0242889076 0.0994777754 0.06880299 -0.0276972707 -0.113097489 -0.0623187982 0.0822641999 -0.119825244 -0.103717044 -0.0319737606 -0.135691062 0.152195513 -0.0339170918 0.14714168 0.0540374666 0.133274242 -0.0455024727 0.113066867 -0.0257611051 0.0637556389 0.0359611772 -0.0776446313 0.0980009288 0.00792387128 -0.113957405 -0.0919724554 0.0144144921 0.138423935 0.073610343 -0.143383607 0.0898004025 -0.0354972184 0.0135471914 -0.157295063 0.0106644779 0.0293142907 -0.00285607576 -0.0387508944 0.0805381238 0.134696633 -0.0486889333 0.115237691 -0.114024878 0.0478819907 -0.204706341 0.0640740022 0.0566777252 0.127831176 -0.0553595684 0.111671567 -0.09231098 0.0911259577 0.000540402718 0.048529759 0.0324980132 0.131618008 -0.0291069895 -0.117312111 0.119812474 0.0617211722 -0.0828384385 0.103557624 -0.102094062 -0.165967241 -0.0316339955 0.114424214 0.0979985967 0.106774256 0.10326688 -0.0260687321 0.0577113181 0.107890628 -0.207330927 0.13924247 -0.108645178 -0.006839226 -0.0576377921 0.086189121 -0.0478715226 -0.0539538078 0.0736430809 -0.113258503 -0.144859955 0.00832846761 -0.139867589 -0.0278512705 0.111798391 -0.128404155 0.0160521064 0.0621018 -0.0222936384 0.067758739 0.151578188 0.0899427235 -0.0596541949 -0.107079484 0.0257597771 0.10411863 -0.0212498736 -0.106785044 0.102427348 -0.037666291 -0.0371498428 0.12570031 0.0917552412 -0.0337271765 -0.00665520132 0.0846108422 -0.137335181 -0.0166137852 0.0288310051 -0.0332151465 0.129317015 0.00994049478 0.0253879577 0.0699467286 0.0467383862 0.00212879106 0.0807025656 0.104752108 -0.0590361021 0.0355920568 0.144212484 -0.0798209384 -0.119622223 0.129336998 0.144326404 0.140256554 0.130315661 -0.0842513517 0.0202817731 0.0428673401 0.0224014688 0.0633894131 -0.121904492 0.0456793755 -0.119502507 0.0149252117 0.0991675705 -0.00801187102 0.0127708912 0.146813497 0.0297204014 0.0190939084 -0.0910222307 0.0188052319 0.158833399 0.0565757714 0.106845409 0.126346767 0.0609186888 -0.0289904978 0.176135257 -0.0152246384 0.00210902141 0.170274019 0.147236124 0.100339673 -0.0750769973 0.126910731 0.0639681518 -0.141766325 0.00575648621 0.00252318289 -0.132266387 0.0917782336 0.165258139 0.159399614 0.110745199 -0.0729087219 0.00930848531 0.129214033 -0.0562379323 0.114632159 0.0686002523 -0.0798907951 -0.0475209691 0.135875911 0.0170198567 -0.116139926 0.00134182745 0.119676389 0.00874867849 0.044542592 0.0469349325 -0.171083689 0.137197331 0.0741593838 -0.0608005896 0.142061442 -0.012179587 0.0524498336 -0.0355517454 -0.093820259 -0.116283298 0.0172735397 0.112878129 -0.133789182 -0.0446203165 -0.056491144 0.0793790519 -0.0914917514 0.102299117 -0.0138002355 0.162907958 -0.0748615712 0.138051897 0.106378302 0.00609137118 0.122283094 0.139686123 0.0373347066 0.0692594871 -0.09532848 -0.0478848442 0.125734076 0.0966270939 0.079935506 -0.010005963 -0.062777698 0.0338496156 0.0195008758 0.0981275588 0.161850542 -0.0924032331 0.132830709 -0.061219655 -0.112690888 0.121970147 -0.0678780898 -0.0108335214 -0.026362868 -0.0930077806 0.0707007274 0.0775098354 0.0764014944 -0.0804891065 -0.113752075 0.00710404944 0.0588292368 -0.0711446628 0.0660018176 -0.0639827028 -0.0958132148 -0.123145066 -0.0116624041 -0.0329767279 0.0308814552 -0.138171315 -0.126469448 -0.0836871266 -0.0362357274 0.0118497657 0.0553193018 0.162871584 0.00862812717 -0.0775254369 -0.0664473996 -0.0720814988 0.0207482781 -0.000734820962 -0.0429652929 0.156263977 0.0950327292 -0.131239817 0.0131368376 0.0467994325 0.152589336 0.0339371823 0.0687561333 0.0528964065 0.165966034 -0.124363385 -0.0527783372 -0.0752571672 0.0272485688 0.19544439 -0.0526332743 0.11452125 0.06950555 0.0388930514 -0.105637603 -0.031256184 -0.143096924 -0.131879777 0.0679151788 0.0898881108 0.138390079 0.0468003303 0.00110050617 -0.166997537 0.148328051 -0.135715783 -0.0184072368 -0.0204313342 0.0777179599 0.0854007453 0.0669743344 0.0512876213 0.0736838058 -0.0702814385 -0.153760359 -0.0127334371 0.0702296048 0.0945134461 0.113579147 -0.045809608 -0.0650008023 0.0018505156 0.0389033966 -0.0164941698 0.0427322201 0.0889017582 0.0659029856 -0.0811767578 0.0873623267 0.18029575 0.16048792 -0.112073712 -0.134867206 0.0882760212 -0.102488875 -0.175802514 -0.0120754875 0.0511610173 -0.0656074211 0.0839222074 0.132837757 0.19432193 0.0962905958 0.11927399 -0.0800096765 0.0369717441 0.172430858 -0.124649733 -0.0634947792 -0.0230292752 0.0867050886 -0.0332086422 0.0130523248 0.1857972 -0.0377311036 -0.0985669419 -0.0854148418 0.0455307364 0.11115057 0.0544578135 0.0360678583 0.0398137569 0.0514812209 0.0629364699 0.157751113 -0.0630130768 -0.0467872992 0.104552008 -0.00756954914 0.128961414 0.152928904 0.0538875759 -0.0584964044 0.000610977411 0.106909499 0.167631388 0.0725584775 0.129740968 -0.0139085511 0.102276772 0.0839342475 0.0176016726 0.0198159665 -0.0576478094 -0.0319991671 -0.0245914981 -0.0934077576 -0.00444747973 0.0162859596 -0.12718007 0.137412518 -0.0712438971 0.0353779495 -0.00824063458 -0.0929021388 -0.0211561657 0.00593618024 0.0581243485 -0.0115015805 -0.117641151 0.0745487809 0.100706778 0.101716518 0.180298716 0.0615622588 0.190501958 0.0251207165 0.161565259 0.0331474617 -0.049629122 -0.0350433774 -0.177207738 -0.0606218576 -0.135168135 -0.0552285984 0.0633888692 0.169191226 0.181376621 0.123600326 0.0766487271 0.0379917473 0.0728779733 -0.0138807297 -0.126598462 0.0824816525 -0.0179684516 0.0607266538 0.127557591 0.130710021 -0.0497126617 0.097561419 0.0444984324 0.00906473491 -0.107714869 0.109389283 0.0244740434 0.0977818221 0.0717759356 0.146900296 0.0456320979 0.0432121679 -0.068095766 0.035988193 0.16688697 0.015179188 0.193963483 0.030294802 0.0684091449 -0.154407337 -0.0253650546 -0.1035157 -0.123490132 0.160163686 0.000832796795 0.0173495747 -0.103751779 -0.114844963 -0.0266452879 -0.10764344 0.057379473 -0.0908356607 -0.0109218499 0.0242156517 0.107376277 0.00434230454 -0.0106718605 0.126825973 -0.076470606 0.082616128 0.043287307 -0.0409609638 -0.0944999158 -0.102769725 0.0562386662 0.110875674 0.075079143 -0.0874663591 -0.087224707 -0.0344407968 -0.032368429 0.139833078 0.138399825 0.0838530734 0.145841986 -0.0942437425 0.0514989197 0.106409363 -0.105719045 -0.0327457897 0.0023922089 0.0723857582 0.115679517 0.0641390905 -0.0389708839 0.064969562 -0.11146944 -0.0840641856 -0.00882211328 -0.0985186249 -0.0100410283 -0.140838861 -0.0820496976 0.126737103 0.0637906492 0.185262144 0.0401138254 -0.0199363641 0.0733033046 0.0741309002 -0.0405171663 0.119358622 0.131731167 0.00470401347 -0.0874768347 -0.0030607495 0.154426917 0.142330453 0.0776753575 -0.0136618298 -0.0559839308 0.117518134 0.0162534118 0.105629325 0.0662130266 -0.0396728814 -0.0103532532 -0.0827013478 -0.0040447861 0.113319181 -0.0071705617 0.118163139 0.0378845818 0.0424246453 0.148900077 0.0901416466 -0.131045297 -0.0908931792 -0.00706362771 0.118041806 0.0408021808 0.0160984807 0.114142895 -0.0700615197 -0.0494136475 -0.0595068522 -0.0467700139 -0.169063121 0.0931548029 -0.0394937769 0.0162656307 0.10976477 -0.124994159 0.152687699 -0.161219954 0.0349471532 0.00292883557 0.0899900794 0.0686580092 -0.0421929248 -0.0581205003 0.015063826 -0.0568712726 -0.147363365 0.0534492135 -0.0798066407 -0.0166791826 0.0615924746 0.0440535769 0.157255575 0.0443781093 0.107450068 0.0306068957 0.111347824 -0.0176973268 0.0151626179 0.083994858 0.00934020989 -0.0776927471 0.142738372 -0.0590152331 -0.0490930192 -0.103396282 -0.113522559 0.0619038753 0.14320752 -0.162481412 0.0684234798 0.019900918 0.0254009217 0.104212388 -0.034442611 -0.0442203879 -0.0163948387 0.00209845603 0.00254264474 -0.0859125108 0.0658564866 0.0487911701 -0.10677994 -0.0656869113 -0.0264248922 -0.102019623 -0.102298513 -0.138458312 -0.119788498 0.0699746907 -0.127610922 -0.029055886 0.0234031677 -0.0221559554 -0.0785237625 -0.079463318 -0.00504159927 -0.0934635699 -0.128190622 0.116880015 -0.131823063 -0.0321453363 0.00674818456 -0.118852653 0.0121976882 -0.124695078 -0.0894826874 -0.0633899048 0.0750036389 -0.0270189941 0.0216782093 0.119181171 0.139441684 -0.0148586482 0.0138805658 0.0725070536 0.0154770464 -0.126432493 -0.0768562183 -0.0687850416 -0.0399501249 -0.0991529524 -0.0160450432 -0.0420119353 -0.0326361097 -0.0698527694 0.0993200317 0.205627039 0.177876145 0.158109769 -0.0378685482 -0.0387157574 0.0318316072 0.0931719393 -0.0390335843 -0.093457289 0.158116326 -0.0205905512 -0.0280272551 0.0694964528 -0.00315693673 0.100278348 -0.115536571 0.134927243 -0.0945299864 0.157936096 0.0946900696 0.0962090343 -0.0132327564 0.0670364797 0.112801351 0.0329531841 0.00913766772 -0.157800838 -0.0242684614 -0.062707752 -0.111074157 -0.0969263613 -0.0453660265 0.0788833573 -0.10100504 0.0351482034 0.00255969632 0.146189213 0.176710948 0.076782994 0.0267225392 -0.0470640622 0.0270443261 -0.0651312843 -0.0475523248 -0.0839515477 -0.0822535306 0.0876104087 -0.0853568986 0.0850623995 -0.0537006631 -0.00454986189 -0.115827605 0.076602146 -0.0653875545 -0.0943169966 0.0738318786 0.00882227719 -0.105202228 0.0500551313 -0.0072239181 -0.095633924 0.0686312243 -0.155843362 -0.10586188 -0.0515479743 -0.0627304092 -0.119748496 -0.055315733 -0.034655381 -0.011634198 -0.0673866794 0.0497374982 -0.0230727922 -0.117348522 -0.0596318021 0.124789402 -0.100009322 0.125387162 0.125093237 0.11934261 0.122768745 0.00161180296 0.0404978395 -0.0511606485 0.025636822 0.0928659737 -0.119610175 -0.0845185071 -0.0342509151 0.0703661814 -0.0939814001 -0.0344806798 -0.0912777558 0.00682032388 0.121527597 0.152524486 -0.013022732 0.0765541866 -0.0932440087 -0.0872863084 -0.0211912636 0.0815037265 -0.0279196408 0.0894725025 0.0345439613 0.00409509987 -0.0809944794 -0.1393179 -0.125571921 -0.00232244516 -0.149977431 -0.000756583293 -0.107811421 -0.13097401 -0.124444515 -0.016273234 -0.0980345458 -0.0679222867 0.145107448 -0.0677620098 0.11634396 -0.150404945 0.0630804896 -0.0887408033 0.0502750538 0.0362975895 -0.000291265926 -0.00586632686 -0.0945255011 -0.106234178 0.140956268 -0.0830846429 -0.0355807208 -0.0545149483 0.0505581349 -0.00131463038 0.158409923 -0.0294266306 -0.075270161 -0.0552593507 0.110549971 0.0572320521 -0.139074802 0.079300411 -0.13124457 -0.0520654507 0.0863010138 -0.0589499101 0.0994329005 -0.0107528744 0.133117393 0.135243297 -0.100229755 -0.064115867 0.117229715 -0.0909496993 -0.123401761 -0.118980557 0.0693805069 -0.0614001341 -0.103551611 0.0602181554 -0.0570658669 0.0473706871 -0.115162462 -0.00543242693 0.0720573142 -0.0638199747 0.163928419 0.15177232 -0.107249737 -0.150279045 -0.124889374 -0.0754014552 -0.117463201 0.0907788277 -0.137130409 -0.112758525 -0.0632348582 -0.085662134 0.074386403 -0.0294712894 -0.103990085 -0.0954368263 -0.116161741 0.139300272 0.109896317 0.0364004523 0.0969301388 -0.0921835527 0.0688580126 0.0143036414 0.135515511 -0.10856048 -0.128424734 0.00355436816 -0.0129383691 -0.0427853577 0.0744294003 0.120447546 0.0322268978 0.111017898 -0.148288384 -0.0120353373 -0.0698817894 0.123846047 0.0900507122 0.0200054049 -0.135218769 -0.0566676334 0.000346322719 0.138647377 0.133002952 0.0393482894 0.0430621244 0.123535052 -0.0048245755 0.0777058303 0.105987355 0.0330017395 -0.0667480379 -0.0552513823 0.119488135 -0.0444536582 -0.0245578699 0.10310775 0.135614321 -0.088050127 0.101776421 -0.106960148 -0.0866610706 0.0718072876 -0.126476645 -0.124185599 -0.0585252866 0.0773498043 0.0260625742 -0.0329737999 0.050648436 -0.0158751626 -0.11962828 0.119259298 0.074308984 0.138976827 -0.159304231 0.0172711946 0.164110437 0.100464553 0.10389293 -0.0240411293 -0.0973320231 -0.119676456 0.00236897776 -0.0644378364 0.0506922454 0.19321233 -0.000225052238 -0.0394313931 0.110896409 0.132451773 -0.0674725696 0.106891982 0.0839640722 0.00246000011 -0.128063992 -0.106095694 -0.0592876561 0.0409409404 0.0347136185 5.23093404e-05 -0.0714386553 -0.00371642876 -0.109261386 0.150267109 -0.0990683362 0.151943013 -0.0184463765 -0.0465980396 0.0613862872 0.136278436 -0.00558510423 0.100865357 0.101894312 -0.0244332775 -0.125516385 0.0207252149 0.101681627 0.145940647 0.0516951084 0.16129303 0.178174466 0.067602627 -0.0624158829 -0.0751179382 -0.0219047312 0.052100759 -0.0685305297 0.0354404449 0.145376444 0.18646054 0.178686082 -0.0324649215 0.186916694 0.0407279916 -0.130371153 -0.0193989351 0.0320602059 0.0507811755 -0.113682859 0.0327428654 0.00616077147 -0.0258325189 -0.143563882 -0.00310279964 0.185173586 -0.0143889384 -0.0377997085 0.102268487 0.193651006 -0.0199139044 0.0859818161 -0.12391866 -0.0357881412 0.117275149 -0.00614784472 0.157511353 -0.0342883319 0.0726206675 -0.140140399 -0.0890319422 -0.084323287 0.181992263 0.0513049066 0.0995599255 -0.0906688347 -0.103707798 0.189509571 0.0850646123 -0.160007775 -0.125715733 0.119538076 -0.119608335 0.0870004892 -0.0289871693 0.0428543352 0.0881076306 -0.0739037469 0.150500178 -0.0330894664 0.120570533 0.0367856883 0.0812487155 0.0561989732 0.0205095038 -0.0794103295 -0.149736494 0.0125511289 -0.117065132 -0.107071228 0.0454606973 0.0796560869 -0.101957068 0.147831231 -0.0325231701 0.102706663 -0.00391825195 0.164131463 0.113403954 -0.121139087 -0.124457628 0.00445246696 0.040741276 0.135801449 -0.00480739959 -0.078587763 0.0388003998 0.0373185351 -0.0382047556 0.123435661 0.0475043617 0.00739693642 0.0977098569 -0.0144506181 0.113023907 -0.134157673 0.115016133 0.0749712959 0.122992218 0.128705814 -0.0182231162 -0.0774768889 -0.100664325 0.0511561967 -0.0247491226 -0.0616593063 0.0696846992 -0.131600708 0.128312185 -0.0420887545 -0.0552831143 -0.132332042 0.0888988078 -0.0928973109 -0.0273143947 -0.00685594976 -0.0243961141 -0.0604439601 0.127762869 0.0312011689 0.0182463527 0.0971829295 -0.0156243443 -0.110507861 0.118185356 -0.0953080505 0.0230623633 -0.139256597 -0.0295829773 -0.00552763045 0.02986148 -0.0686590225 -0.092862606 -0.1297746 0.0351940989 -0.0635817125 -0.00291644037 -0.0894218385 -0.081991896 -0.0788865611 0.123435721 -0.07980977 0.127385929 0.0179787278 0.14100419 0.0584084392 -0.11898651 0.0776305497 -0.0203871056 -0.0342618525 -0.0426038876 0.0608503371 -0.0924751759 -0.110731475 -0.0939228088 0.0693204403 -0.0963335559 -0.136444792 -0.0746414661 -0.0768960863 -0.0328734815 0.121303841 0.0307305455 -0.0240531266 0.0461567193 -0.112294145 0.0924198776 0.0726142228 -0.0232265219 0.0855844915 -0.0904331505 0.00588195026 -0.0725407153 0.0315330774 0.0881674886 -0.0255857036 0.00970177352 -0.0484308302 0.0706667453 -0.0180258229 -0.0909893364 0.0912033021 -0.0541153103 0.118749335 -0.000519677997 -0.12067198 5.66840172e-05 -0.0196669474 -0.0159197301 -0.120875023 0.0988306701 -0.0475680456 -0.0561974943 0.0541714281 -0.138189748 -0.00213968754 0.0791497976 0.0153562725 0.102331981 -0.02512566 0.0891461223 -0.100474566 0.0814501047 -0.0774096027 -0.0491994061 -0.0873599797 0.125588104 0.194157138 0.021343857 0.153216049 0.153526738 0.0659974441 0.00557104684 0.00512425043 0.0593339801 -0.0341665149 0.1543639 -0.000350087881 -0.0057605654 -0.0569509007 0.0794611201 -0.0399132632 -0.053406354 -0.021354856 0.0188272204 0.161608189 -0.124593504 0.0364353582 0.0259574521 0.0545204356 0.123703361 0.0302464068 -0.180545017 -0.0880055279 0.0392708704 -0.0457140729 0.0489293262 -0.0629897118 0.0735282153 0.0903664082 -0.0868221596 0.0773085654 0.0447509512 -0.0666414499 0.10026215 0.0482167453 -0.0492483862 0.194690838 0.0770569816 -0.142567039 0.114170618 0.0906017646 -0.0648275763 -0.0698935315 0.0423483737 0.0252110269 0.0347312838 -0.0560424551 0.155089974 -0.0118696066 -0.154669881 0.122313514 -0.0593367517 -0.133725002 0.0684605017 -0.086332038 0.15027903 0.00808770955 0.0856792182 -0.103054002 0.144213781 0.0522497185 -0.012934139 -0.090749681 -0.111638978 -0.15779312 -0.120520085 0.00437208824 -0.0104053728 -0.0667165816 -0.126051918 -0.0548642427 -0.104130857 0.0505597442 -0.103951007 0.0535476543 0.126115173 -0.126277968 -0.0643455386 -0.0392601117 0.0434453227 0.0778148323 -0.0601691268 -0.0518789664 -0.104312316 0.0010818392 0.0459540337 0.0427468866 -0.053878624 0.139487013 0.0611597076 0.0600839928 0.11611075 -0.0964443162 0.0624526814 -0.120941721 0.132605001 0.0708762035 0.163541496 -0.00394374598 -0.166257143 -0.203483716 0.0616421737 0.116930254 -0.0280899294 0.0485812873 -0.219869539 0.00187383592 0.0102478564 0.108631112 0.063601993 0.0346559994 0.0576099493 0.129458979 0.0913215131 -0.0457242802 -0.128046185 0.0772113949 0.0461370759 -0.16218935 0.031650953 -0.0577221215 0.060324017 0.11425402 -0.0406978093 0.181004599 0.122253641 -0.0693835318 0.092224963 -0.00997300446 0.137522176 -0.100897603 -0.0125190523 0.0379933044 -0.0922655612 0.0624521673 -0.0938842148 -0.0446401648 -0.0100792432 0.153803915 0.114698537 -0.0192592535 0.0803508684 -0.0886118263 -0.0518604182 0.0633063093 -0.051035013 -0.110467285 0.0447706729 -0.00330133177 -0.0912512243 0.00899628736 0.165726572 -0.0454223789 0.101888083 -0.0568365306 -0.076063931 -0.0752097219 -0.0845429227 0.0309042297 0.0270464383 0.124918491 -0.0662075952 -0.108895019 0.139057159 0.140161321 -0.119610265 0.136644498 -0.0796718523 0.00887751393 -0.178879619 0.0901582688 0.138248637 0.0798882693 -0.0129555576 0.0835241526 0.112452 -0.0344961025 0.00462428341 -0.129168198 0.120342299 0.0168783814 0.0924949348 0.104666054 0.0520613231 -0.048391819 0.0304543953 -0.0920903161 -0.0853118896 0.0968189314 0.144676819 0.0184051823 0.129063278 0.158408046 -0.0152725829 -0.0893933401 -0.0740989223 -0.0408062041 0.000471571781 -0.126842275 0.0290431026 -0.00776752643 0.0354271829 0.137151405 0.0211606342 -0.075007841 -0.0753669515 0.046267733 -0.09437415 -0.137393638 0.113058127 -0.026870288 -0.0587519705 0.117264286 0.00866104662 0.0201778039 -0.0951031819 0.018844882 0.0590609238 -0.0709743276 0.0873017982 -0.105027102 0.0530570112 -0.133408979 0.0909369588 0.0397072695 -0.126006484 -0.0984181166 0.130100951 -0.0197343528 -0.14802596 -0.0323335156 0.0627914593 -0.115520857 -0.0452975444 -0.012140125 0.0625165701 0.12852025 0.137867913 -0.0958045647 0.0133615816 0.107896157 -0.0483738929 0.129055023 0.116109088 -0.077403754 -0.0633423328 -0.0807941109 -0.0901692063 0.135869384 0.115573078 0.0980086252 -0.0180905815 0.00574294198 0.0841204077 -0.114126891 -0.0070350226 0.12609975 -0.12341994 -0.0101188719 -0.0531954169 0.0926595107 -0.14045234 0.0385289043 -0.00951111317 -0.130858913 -0.0142143071 0.106903538 -0.108551912 -0.125400409 0.058905676 -0.117564946 -0.0387115702 -0.120323576 -0.00476152822 -0.117001377 0.0521154404 0.0897176862 -0.143047303 0.0822072178 -0.0936514139 -0.0156190991 0.0703094602 -0.109612264 0.10537225 0.10860981 0.0861182958 0.0131997541 0.0654514953 0.0550837517 0.0355030596 -0.127356902 0.0202751011 0.0479572453 0.0155448811 0.0376787409 -0.104008965 0.138826773 0.128562942 -0.0176690705 0.00616015308 0.0954742804 0.168244436 0.0313249305 -0.00562194875 -0.013728708 0.0743624717 -0.0685951263 -0.118008956 -0.0987153798 -0.0248185005 -0.140825942 0.0919594541 0.0747423917 -0.0905265957 0.129529849 -0.124772102 0.0051275813 0.0195230469 -0.146458313 -0.0511724278 -0.00252351165 -0.0710987002 0.164692074 0.0975965038 0.0976115763 -0.017378984 -0.0336411037 -0.00410315767 -0.111187756 0.0705890879 -0.0971891209 -0.170355156 -0.0228465442 -0.000340677885 -0.111434825 0.0483258702 0.0879553556 0.0596619062 0.0922146142 -0.0954236314 0.100458361 0.0672615394 -0.0278729852 0.0144202998 -0.0134725468 0.00500577223 -0.0834878609 0.0704662427 0.0280400775 -0.173883215 0.0107927518 0.145330369 -0.00825130939 0.0123181194 0.104186043 -0.00961343665 -0.0443730503 -0.12468195 -0.127869114 0.0712913722 0.0349406078 -0.0263537578 0.140321776 0.0573507696 0.0558672026 0.0422607958 0.0942431912 -0.134432197 -0.0805723518 -0.117681436 -0.0301251039 0.0112107592 0.0862491578 0.0813791007 0.15245752 -0.104849078 -0.1278539 -0.0234719608 -0.0375554711 -0.00391989155 0.0951362252 -0.124997646 0.101080559 -0.0823273435 0.114658192 0.0812243894 0.0979240239 0.0534090586 0.11992234 -0.0614060089 0.0227183215 -0.0045140041 -0.0936731175 0.164146602 0.109354012 0.0560285486 0.0972351655 0.0452851653 -0.0221107267 -0.143891752 -0.0367256775 -0.100730948 -0.0520177893 -0.0628575087 -0.0123458058 -0.144427627 -0.0617014915 -0.0792786926 0.105423264 0.118998893 0.065015249 0.103419602 0.078821741 0.11923746 -0.0886161029 0.0995480195 -0.00256725191 0.00349370553 -0.0529144071 -0.0110389693 0.137066856 -0.177615538 -0.0219782609 0.0117756883 -0.0622062907 -0.031752415 0.109670192 0.00176507875 -0.128339231 0.0967004448 -0.0175355524 -0.0179675464 0.00220880075 0.0363271013 -0.0472624972 0.0390423499 0.0431711748 -0.0524883382 0.137009606 0.032474678 0.0447325036 -0.109758742 0.0141501743 -0.0879177302 0.0324584693 0.0984169021 0.0776358694 -0.0186196659 -0.0178685524 0.136504993 -0.130911753 0.120253332 -0.00131494773 0.0500290655 -0.0261307955 -0.115568407 0.0599436909 0.0793855786 0.013578156 0.164593741 -0.0608182624 0.00377323222 0.144239753 -0.0365212336 -0.00670494884 0.113193937 -0.0333096795 -0.0992462263 -0.0314201638 0.121462323 -0.00150117278 0.152934536 -0.0595164932 0.155643508 -0.0155162774 0.113217972 0.0924211666 -0.0591561124 0.119596377 -0.148361132 0.0774345249 -0.0543995574 -0.0430051051 -0.0587892085 -0.0859734565 0.0093586091 -0.139499143 0.144523352 0.0625231117 -0.0972201005 -0.0138707748 0.105667144 0.0957431421 -0.0930925608 0.114664152 0.0465527698 -0.0664715692 0.103240147 0.116609365 -0.192610845 -0.152009219 -0.0707181469 -0.123133332 -0.0165981147 -0.119874239 -0.123395704 -0.134802729 -0.0183263794 -0.00162356615 0.0698149651 -0.0478172004 -0.0292479843 0.0124004614 0.0228632949 0.125499591 -0.128627047 -0.0394836068 -0.0904139578 0.0717693791 -0.0241678786 -0.0282474756 0.066885747 0.172793537 -0.0325833708 0.134693041 -0.0837737098 0.028767975 0.149519458 0.10379082 0.134075478 -0.00300905108 -0.11726708 0.00835976377 0.0115354434 -0.104070403 0.0869700015 0.0493195616 0.173674643 0.136860088 0.133943602 0.0349466503 0.0715380386 0.0455492027 -0.0717399567 0.0613892823 -0.071349673 0.103834003 -0.0662872195 0.110759154 -0.0086634336 0.11282818 -0.0787081271 0.0525955185 -0.115244508 0.017306909 -0.148093 0.11725767 -0.0918413401 -0.0415653959 0.0675327182 -0.142755657 0.122713141 -0.0754952356 0.119042411 0.120726988 0.0600856133 0.0850800574 0.042756021 -0.0042983531 -0.0966215879 -0.109561965 -0.121179365 -0.121256597 -0.0916649252 -0.0139565729 0.0183815174 0.010678432 -0.0070024007 -0.10860841 0.123465493 -0.0865222588 0.0993401259 0.0353338942 -0.122070476 -0.00103206933 -0.147107095 -0.0779001042 -0.0985540375 -0.0854975283 0.0685408339 -0.065476723 -0.0296396669 0.0578391589 0.0765029639 -0.0989598259 -0.0449470505 0.0685051531 0.0537158102 0.0139079243 0.0583296567 -0.0594620258 0.0189818889 -0.0988531634 -0.00361028314 -0.0409312546 -0.0480199158 -0.0213640556 -0.126316875 -0.118182555 -0.134643987 0.0825530589 -0.0812493861 -0.00928412378 0.0827149451 0.014478147 -0.124131575 -0.13848491 0.113321409 -0.0203420967 0.132658973 -0.0168279931 0.0254632235 -0.0577695444 -0.125243694 0.0185761452 0.125729159 -0.0470989868 0.137575284 0.00578674674 0.125307932 0.13194342 -0.110660031 -0.108901128 0.0635878146 0.139960542 -0.0849670395 0.0299605131 -0.0711956099 0.0486410856 -0.0859787986 -0.125518829 -0.0788395777 0.113387808 0.172473475 0.125997916 -0.03521844 0.00814832374 -0.0874923393 0.0611119755 -0.103455245 0.0358751714 0.0404794477 0.116629399 0.107773907 0.0883051604 0.155530751 0.0984854996 -0.0652066395 0.0862129629 0.0566241778 0.0940288976 -0.0396610685 -0.054708723 -0.0403634794 0.0204263702 -0.0830316693 0.175091997 0.0323943421 -0.0326925032 0.187265396 -0.0557819456 -0.149056599 0.161892369 -0.0292865653 -0.106854089 0.113595374 -0.0478481576 0.0871280357 -0.0546426699 0.109687231 -0.0883881673 0.152132541 0.0782102272 -0.0743416622 -0.0343025103 0.151285902 0.0897574127 0.0613243282 -0.0363124833 -0.0416660458 0.0365589708 0.00544850901 0.0712229908 -0.0174090713 -0.00367248501 0.100005753 -0.0259860251 0.0931316465 0.0641765073 -0.0520533472 -0.000773876556 0.113493264 -0.0614270456 -0.126095414 0.11269249 0.00773805752 0.0544681847 -0.0156176239 0.0190839916 0.00293012918 0.0550577864 0.0748387203 -0.0842898712 -0.0772737563 -0.157246128 0.00333786267 0.0245691296 -0.0825911462 0.0128215477 -0.0858282223 -0.132421732 -0.0927961841 0.144973248 0.0598813556 -0.0359893106 0.000982378377 0.100832321 -0.106284365 0.0759487748 0.124556273 0.105182365 0.00680022268 0.0535307154 -0.0955361351 0.0177737772 0.0095509449 -0.00627064146 0.033846356 0.168117985 0.0789029747 0.152832642 0.175960913 -0.00220050896 -0.0862830505 0.106605045 -0.0212400369 -0.0751578733 -0.0164428316 0.0700538829 -0.0992731154 0.129148081 -0.0179386213 0.0452752709 -0.0637316629 0.0592104197 -0.128685504 -0.0348353833 -0.087284103 -0.0663602129 -0.0031849267 -0.000142063553 -0.0840348825 0.0924766883 0.0673238337 0.167500377 -0.0353617668 -0.0333009921 -0.112182476 -0.04946943 0.134662643 0.139826789 0.156980231 0.0751472116 0.0695004016 -0.0762207955 0.156616062 -0.0323073752 -0.074322626 -0.0840249732 -0.0568689369 0.0383799225 0.12136513 0.0337613001 0.150748312 -0.0912267268 -0.0950863957 -0.0684669167 0.0555381961 0.00922425464 0.0324769616 0.154439181 0.00271727936 0.0754308924 -0.137119815 -0.0415384322 0.107144803 -0.125383273 0.053086549 -0.0475495011 -0.00223423541 -0.128673628 -0.0492692962 -0.0113832206 -0.0116038918 0.104703367 0.0881138444 0.137240604 -0.005506441 0.00293928385 0.0956130475 -0.0784134567 -0.021586366 -0.0949726552 0.109912977 0.0546887219 0.0080575645 -0.0473530963 0.101009175 -0.0167142078 0.0431778133 -0.0919825733 0.0523650348 -0.136390731 -0.0124782622 -0.0131750405 0.0803216249 -0.135611996 -0.139048174 0.121725962 0.0170050114 -0.0948430598 0.126605704 -0.12657319 0.0280110091 0.0484481603 -0.101917908 -0.131059453 -0.00414025784 -0.000507161021 -0.0291253105 -0.0539416969 -0.132729664 0.118548885 -0.119953021 -0.106853649 -0.0724168047 0.0265188962 -0.0701930001 -0.0523263291 0.0513340086 0.0227334052 0.134236738 0.00679840147 -0.0369084999 0.101770133 -0.0643866509 -0.0391958281 -0.11996039 0.0662431717 -0.0149399638 0.113427982 0.00900928676 -0.115386441 -0.0923655182 -0.125217244 -0.105257966 -0.127515703 -0.115915604 -0.117300279 -0.0824699104 -0.00401163101 -0.0243823603 -0.071768783 -0.0823556334 -0.116988376 -0.0874817073 -0.0767723396 0.0525470376 0.0697348416 0.0105717331 0.116433874 0.119623169 -0.0428111032 0.0553773344 -0.107416034 0.131372139 -0.0292251855 -0.064143002 -0.129900947 0.0492804348 -0.0171842128 -0.13053751 0.0456082523 +tensor_6bias 50 +-0.130149469 -0.0166715905 0.0930550545 -0.00245699566 -0.106651746 0.0490312241 -0.022929607 0.0280555151 -0.149067715 0.102508798 0.00938428845 0.150981218 -0.114400074 -0.0645027235 0.016750779 -0.0841871202 -0.140838712 0.0354661271 -0.127782494 -0.107863024 0.0691528246 0.0463019311 0.0961098671 0.0680775866 -0.118705533 -0.0768498629 0.17632094 0.119572431 -0.0184698328 0.0619278774 0.0973391309 0.0654409006 0.0840726122 -0.0982169956 -0.0271483194 0.119829573 -0.0558238514 0.0868603587 0.109693505 -0.126328036 0.169493452 -0.153565153 -0.0748259053 0.0136530614 0.00735191396 0.121958517 0.00247201324 -0.0304538812 -0.0688641742 0.0533529967 +tensor_18weight 500 +0.116833255 -0.0357756764 -0.131794453 -0.0954782292 -0.0199575797 -0.0554031618 0.0123106642 -0.0748193115 -0.138343185 -0.0409799181 0.00820469763 -0.000622143503 0.103817098 0.133209154 0.108685024 -0.0963258296 0.133699819 -0.0743798465 0.105768584 0.101406492 -0.0850842893 0.088313885 -0.0203158874 0.0809838101 -0.00829087198 -0.00621365244 0.12420509 -0.0834524781 0.013258785 0.000458776922 -0.0094107436 0.121913455 -0.112981685 -0.0701991916 -0.0657723844 -0.0241640378 -0.111835979 0.0305915046 0.0958627611 -0.123314679 -0.0531556047 0.0353454947 0.122457325 -0.188562363 -0.0805713162 -0.0883217677 0.137407482 -0.0494341888 0.08294186 0.02592903 -0.102925614 0.112401806 -0.0666541457 -0.0743903071 -0.124930732 0.00989535823 0.0654063374 0.0936208814 0.00587140396 -0.133220345 -0.0864736214 0.129711837 0.00181314978 -0.11009489 -0.142534941 0.112804607 0.0828809589 -0.0675114542 -0.0837594494 0.080936946 -0.0578315705 0.000171717635 -0.115505785 -0.00781203434 -0.044651553 0.0236158818 -0.0261275116 0.0364638008 -0.0263226833 -0.0818104967 -0.0312857106 0.0161777474 0.0981943533 -0.142439932 0.130368665 -0.148819655 -0.0904635265 0.142308936 -0.0558655635 0.119650826 -0.123948567 0.071270369 -0.0475144461 0.0499968566 0.0238669682 0.0490803383 0.102086172 0.0440850668 -0.0912592411 -0.124338679 0.0205278974 0.0803509951 -0.127337903 0.0695622861 -0.0565674454 -0.0611764155 0.0246511605 0.147374704 0.0300100464 0.031006122 0.0128832478 -0.134186521 -0.0788531825 0.0231011659 -0.077104196 -0.0899467021 0.083257556 -0.016190676 -0.15599066 0.0265589394 0.0970405489 -0.101687469 -0.119606331 0.101642758 0.0926929563 0.0224016327 -0.118740149 -0.0145147676 -0.121801361 0.0961833745 -0.0375055596 -0.0604088642 -0.0904211402 0.0308890697 0.0637984425 0.0605207421 0.0125890784 0.101664178 0.0726759285 0.0591030382 0.0281341467 0.0179437492 -0.0888262913 0.0439237058 0.0959485695 0.0477892607 0.138858929 -0.0815726668 0.0842805654 -0.0488859788 -0.127596661 0.0637440085 0.0945658982 0.0379101187 -0.133902624 -0.0156785361 0.153879091 -0.0837965533 0.112203002 -0.109656185 -0.121323109 -0.0539827608 0.157840356 -0.140984669 0.103252746 0.0117826462 0.0724756718 0.1193185 -0.0168017652 0.105931647 -0.15716891 -0.0412402935 -0.182400733 0.175201252 0.0334252864 0.190847382 -0.0840521902 -0.102074817 0.0166378226 -0.070603177 -0.0926473141 0.0471240357 -0.0813179836 -0.0973169506 0.049886927 0.107353233 0.0245145429 -0.122061022 0.0877110511 0.0779518932 0.181554541 0.00650133053 0.148282856 -0.167027533 0.0817222595 0.166063353 -0.071306996 0.0401937515 -0.0894560814 0.0982646197 -0.0373243652 0.0289797336 0.0392166823 -0.108381942 0.119098619 -0.0920399055 -0.0729553699 -0.124035373 -0.0866058767 0.132396668 -0.0131686293 -0.0622706711 -0.115531176 -0.129241109 0.0608382747 0.0826291889 -0.0870855898 -0.153687358 -0.00150228257 0.114700183 0.093991451 0.0560563877 -0.0242470428 -0.03554409 -0.0501358062 0.0884645432 -0.0462445691 -0.160832793 -0.0499034822 -0.0424251109 0.15960142 0.00967518892 0.013454861 0.0476650223 -0.0562032312 0.00298618712 0.113581337 0.0738084391 -0.137588665 -0.0807389989 -0.0702914745 0.0433799401 0.130559713 -0.0844176263 0.00879538152 0.0190126356 0.0929833725 -0.0415338278 -0.0416321158 0.0581194386 -0.106194869 0.0854856074 0.105097309 0.0867655277 -0.110841736 0.13861914 0.00394579815 0.0424983464 -0.0553477593 0.0576893315 -0.0487310477 0.00869362801 -0.0946266651 0.0686361194 0.0094735641 -0.0982639343 0.0865717679 -0.0489508957 -0.0480820388 0.0424392 -0.0348532163 -0.145720199 -0.0116074253 -0.0465409979 0.0409410596 0.137870952 -0.141640723 0.133919835 0.0830813125 -0.0514443479 -0.144528806 -0.0606587119 0.0772298053 -0.000756907742 0.0295330584 -0.00934765488 0.0772825181 -0.120189674 -0.0941238254 0.110182583 0.020738909 -0.110578194 -0.170464888 0.135421559 0.0186651032 0.157670006 0.106957033 0.113828443 0.032888636 -0.085790351 -0.102287576 0.10205999 0.11301367 -0.0841406286 0.0869232267 -0.0806331262 -0.0432705954 0.0882454589 -0.127744198 0.0729970783 0.183013499 0.0513928235 -0.160567686 -0.0651886687 0.0733133778 -0.140486658 -0.119877644 -0.0233747195 0.0682742521 -0.0181181505 0.0523737594 -0.114034481 0.178959042 -0.108594783 0.0531802028 0.00544614438 0.122413933 -0.107881032 0.00437956769 -0.0753047615 -0.0751520917 -0.0669195428 0.140085652 0.107123025 0.0215605646 -0.0108890682 0.126112461 -0.0248530898 0.0944449008 -0.11579188 0.0103179337 -0.147988439 0.0894878879 0.155256197 0.0593105108 0.0212335344 0.108353369 -0.0329438969 0.0173103362 0.113536589 -0.0336008444 -0.0386447273 0.0362053365 0.0621379763 -0.0990284234 0.00793749839 -0.0617283881 -0.0743452683 0.179765821 0.114959568 0.136922091 -0.1003832 -0.0692859069 -0.139016584 -0.13847138 -0.0461068004 -0.0357935503 -0.175952822 -0.0971040502 0.0884984359 0.00901553407 0.173032984 -0.0787108466 0.0656532124 -0.0365875959 -0.0772555619 -0.101957574 0.177314684 -0.184264794 0.0541368276 -0.173181415 -0.0607216991 0.0584572963 -0.0959036872 -0.0192932636 -0.140759885 -0.0871745721 -0.0414703935 -0.128599197 -0.134148136 -0.0330905467 0.086126022 0.0754621923 -0.0512827821 0.0647300407 -0.0423469543 0.103672571 -0.10058222 -0.0269276202 -0.0845367238 0.148252413 -0.127518728 0.120773628 0.0699376613 0.085009709 -0.0772422925 -0.00348520023 -0.0487586632 0.0242007188 0.0718890578 0.0988076255 0.0587318242 0.0960896015 -0.0790796131 0.0568241999 0.0869796574 0.0882544219 -0.0609133728 -0.193863526 0.117342524 -0.0339369737 0.115510337 0.0176041406 -0.134604976 -0.0149109662 -0.0460692905 -0.0518316031 0.154208392 0.116131343 0.000284732843 -0.124516778 0.00545642432 -0.1934973 -0.0553306863 -0.114772283 0.0383958407 0.065391317 0.0921707079 -0.112537354 0.138822451 -0.112784393 0.163916111 0.141898572 0.144761667 0.043000266 -0.0156551208 -0.13070862 0.0155345816 0.0829150677 -0.15498811 0.0502647795 0.106921747 -0.0415367335 -0.101776689 -0.0270393789 -0.150163233 -0.00523975072 0.12342082 0.030272549 -0.0634030774 0.110089242 0.078154169 0.141164288 -0.0665735304 -0.133532166 0.0205077082 -0.0799736828 0.00293931179 -0.0775768757 -0.0609018579 0.104522519 -0.0473734476 -0.000608845323 -0.139011964 -0.12750718 -0.113618098 0.0852759406 0.0522349291 -0.011812062 0.103033014 0.116778359 -0.0851101875 0.0191278923 -0.138369411 -0.0144041777 -0.033769384 0.0952177495 -0.184691101 0.00995114446 0.0508734547 -0.161694378 0.103683837 0.106144048 -0.0914210454 -0.10774231 -0.0468717627 0.0950566381 0.0999391824 -0.0267179832 0.114936009 +tensor_0weight 5000 +-0.0508145355 0.0380447619 0.063999176 0.0305916369 0.0178726781 -0.0604492612 -0.0922251716 -0.0409420505 -0.0806181803 0.0253173634 0.0200065672 0.025728466 -0.0917467773 0.103931934 -0.0364619642 0.0943374634 -0.00828016549 -0.00586269284 0.0867723376 -0.0814111456 0.031561438 0.0127995471 -0.0175799523 -0.0762633756 0.0174684227 0.0652227029 -0.0757252946 -0.0175922979 -0.0378516056 -0.0103352945 0.0671745986 -0.00129433826 -0.0402865373 -0.0769478306 0.136584729 -0.00320164161 0.0293592662 -0.131899893 -0.00832800474 -0.0754740536 0.0523712561 0.00832175463 0.0861478224 -0.0513150692 0.0660690591 0.0819229931 -0.061753273 -0.0584596395 0.00335742347 0.0352997482 -0.0355550982 -0.0571611226 0.0267518349 0.00366023136 0.0501797497 -0.0911384001 -0.0553132854 -0.0707477331 -0.00369775807 0.0324063897 0.0952493548 -0.0701338053 0.0869731754 -0.0122592403 0.0579397976 0.0811071023 0.0882389173 0.00392138492 -0.0282733813 -0.07796707 -0.0237656292 -0.0976018459 -0.0302748028 0.0959793851 -0.0774898157 0.0740917549 0.0638118461 0.078004308 0.0606729016 0.00807148404 0.122843958 -0.0412954316 -0.00570290349 0.048803661 -0.0925534815 0.0642913654 0.0318598822 0.0163798314 -0.128930375 0.10925965 0.0126452744 0.0192803536 0.0565545857 0.0464702807 0.0887314975 -0.0057315547 -0.0403685123 -0.0338817462 -0.048792094 -0.0464581065 -0.0035396677 0.00428326242 -0.00884059165 -0.0119973514 -0.000725717517 -0.00789349712 0.118645795 -0.0580181517 0.0644906759 0.00912526064 0.0169304255 0.124032162 -0.0209737495 -0.0644200072 -0.00610294472 0.0443046205 -0.111826301 0.0924093947 -0.0439966656 -0.0174338557 0.0350687169 0.00473201321 0.0256146453 -0.0102232145 -0.0740443543 -0.0562746376 -0.014960424 0.0814036652 -0.116801761 0.100059807 -0.103689127 -0.0274910927 0.0271945633 0.0108639002 0.10785304 0.119615123 -0.0933286771 -0.104783587 0.0308681801 -0.0486758314 -0.109117366 0.0569621176 -0.0481622331 0.00365207065 -0.0219343584 -0.0347234085 -0.0862182751 -0.0892119408 -0.0102323415 0.0456935875 -0.0520036221 0.0333958827 0.0777817443 -0.0517655611 -0.0659507513 -0.0258211251 0.0767518133 -0.0409224853 0.0908992663 -0.02203927 -0.0486994721 -0.0106065404 0.0736719891 -0.0421070047 0.00211916398 -0.0954323933 -0.0184283517 0.0945980549 0.0464413576 0.0754498392 -0.037175186 -0.00513185887 -0.0806778446 0.0640999004 0.0459177829 -0.0637460798 0.050385993 0.0309800953 -0.0565693192 0.123325884 -0.0541292951 -0.103475936 0.131021678 0.0500673242 -0.0464580432 -0.0415508747 0.0403000787 0.0370427035 -0.0539194234 0.115986116 -0.0146925198 0.0295080431 -0.0609974898 -0.0897742435 0.0866930038 -0.0403378308 0.0923621878 0.0294523649 0.0361655056 0.0205234103 0.120485581 0.0700325444 -0.0497162156 0.043903362 -0.123235166 -0.00563619565 0.0633756816 0.0214825068 0.0333110169 0.0104085831 0.00706916722 0.0849041864 0.110511042 -0.0315861739 0.0258784778 0.0600173473 0.0242875703 -0.0698327497 -0.0123227434 0.014183823 0.0316864885 -0.0905837119 -0.166954413 0.0110382149 -0.0523379371 0.113217518 -0.0749479383 -0.0839105621 0.06123152 -0.123284116 0.00467087328 -0.0400639065 -0.139171645 -0.0208464172 0.0824865252 0.0881877691 -0.0531907566 -0.0462760702 -0.110711597 0.0575970635 0.0207520071 -0.0592331663 -0.0876662433 0.0696238056 0.0587312393 -0.146805972 0.0465192385 -0.0876265243 0.0274952278 -0.0920811594 0.0425466485 0.138803825 -0.11095649 0.035078045 0.0631543472 0.0814108402 -0.0799154192 0.0390180871 0.076176472 0.0414959937 -0.0411920138 -0.013391098 0.00715481211 0.0667355806 0.0209038183 -0.0535365157 -0.000487437297 0.0508622043 -0.0740626454 -0.0971553922 -0.00739993062 -0.11939621 0.0771144629 -0.0776564106 0.00858938135 -0.103641413 0.0731763914 0.00908944476 0.0822138935 -0.0353183523 0.0420758463 0.00362044154 0.107373729 0.0974787101 -0.101556815 0.0841847733 0.0912442133 0.125659168 0.0618594773 -0.0642373115 0.0193936136 0.101438187 0.0530573241 0.0676667318 -0.00218354817 0.0432167687 -0.0871621072 -0.0426511392 0.0288807489 0.0790897906 0.0490392447 -0.0205203649 -0.0993364006 0.00468417443 -0.0547306687 -0.0277090929 0.00697927317 -0.0244437791 -0.0157332867 0.106168769 -0.0359168798 0.114783011 0.152111039 -0.0253520384 -0.0015796381 0.03375398 -0.104952097 0.0092763612 0.101215295 -0.00308345142 -0.0368209258 -0.0473044775 -0.00817228947 -0.109114319 0.105732635 0.0244474001 -0.0146100083 0.0529635809 -0.00840851665 -0.0632252246 -0.0520593971 -0.00865435693 0.0344991274 -5.80968299e-06 -0.133371904 -0.151106805 0.0796020627 -0.00727936905 0.0341606252 -0.0332682915 -0.121840335 -0.152285203 -0.0688880011 0.0230131447 0.000283442176 0.0609894954 -0.004379577 0.0477737971 0.044799611 -0.132041544 -0.0921159685 0.0794112161 0.0724173859 0.0694021881 -0.0325237289 -0.0596630126 -0.128212959 0.0867897272 0.0183652658 0.067165792 -0.0221667513 -0.0792030767 0.00673970953 0.0961405337 0.11915601 0.0162419658 -0.0958381593 -0.0221719481 0.066273272 0.0103854984 0.0839003772 -0.0880922079 0.0691054389 -0.0436538383 -0.0678017363 -0.0862348899 -0.0580505431 0.0340274572 -0.0189464837 -0.0844241232 0.077873528 0.07532157 0.0911468565 0.130459666 0.0642754659 0.103514485 -0.0523621738 0.0538226627 -0.00427193614 -0.0198291782 0.0464040674 -0.0794681758 -0.0358173288 -0.0710784718 0.108343065 -0.0409613326 0.0320960544 0.053875234 0.00935616158 0.0279227011 -0.0595730767 -0.0895934626 -0.054435689 0.0687097013 -0.0623276383 -0.0781896859 0.0710855275 -0.0379823111 0.0614629425 0.107129268 -0.0969881415 -0.116216652 0.104508013 -0.0730313659 -0.0942338929 -0.124592021 -0.0121723814 0.0757561401 0.00725453952 0.027494695 -0.0790883899 -0.0104121519 -0.0122909518 0.0885993391 0.00961995777 -0.0863305554 0.0516466871 -0.00846583862 -0.137650937 0.053744074 0.0191885531 0.099622637 0.119871758 -0.0234789476 -0.0225552637 -0.0628033355 -0.061706692 0.00870011281 0.0219527185 -0.113005184 0.0864791349 -0.0586110726 -0.0858683884 0.0617091358 -0.0387163647 0.0250992496 0.0188102666 -0.0987309664 0.0387692712 -0.0278170835 -0.0702976808 -0.036741849 0.0252645276 0.0743944049 0.0373597182 -0.0650147647 0.0886150151 -0.0378745385 -0.0721595287 -0.100263052 -0.024431048 -0.00138329086 -0.0156793948 -0.108034611 0.00560034066 -0.10105747 0.10377124 0.108116172 0.106484957 -0.00357731106 -0.103540003 -0.162499279 -0.0292240772 -0.13454926 -0.0578239672 0.0473558456 -0.0877546594 0.0137864761 -0.0327536836 0.0507721342 0.0252886489 -0.092969656 -0.046330668 0.0129592251 0.0318424702 -0.0836347714 -0.00133580307 0.0577662215 -0.130686596 0.0925165117 -0.0197680425 -0.0594279207 0.081254214 -0.025833251 0.106694445 0.103731573 0.0476561114 -0.0746863931 0.0867198333 0.0718293041 -0.0795527846 0.0907836407 -0.0875569582 -0.0436345451 0.0336315818 -0.101902887 -0.112922281 0.0268265437 -0.0239662174 0.108922079 0.041044455 -0.00692772307 0.0482088998 -0.0167634431 -0.0713200569 -0.131396279 -0.0818632841 -0.0646765679 -0.00467614038 -0.068184495 0.0581986308 -0.12290591 0.0856338814 0.0330237187 -0.113662779 0.0768672228 -0.0640005991 -0.102779485 -0.0699071512 0.109070554 0.0373121388 0.00894282851 0.0210740287 0.102456108 0.00209105411 0.0643166751 -0.0748509914 0.0103702946 -0.00475171115 -0.124941736 0.0627204254 -0.110363327 -0.0701798648 -0.00204091449 -0.0581695102 0.0710774362 0.0637491271 -0.112383977 -0.0604935288 -0.0444468074 -0.0884831399 0.0787647441 -0.057589367 -0.0092884656 -0.0068281414 -6.82513783e-05 -0.00189695833 0.0291572809 0.0887888893 -0.10821224 -0.0333320834 -0.0257991888 0.0457047522 0.0474029109 -0.0698928088 -0.12633343 -0.0517579019 0.0700997636 -0.0429069959 0.0199789684 -0.0108658681 0.00652803527 -0.00151343702 -0.0620038249 0.0806239918 -0.0406728946 -0.0541682765 -0.0738097504 0.106633566 0.118564427 -0.0846382231 -0.0406942107 -0.0214116126 0.021105893 0.00434125355 -0.0575985499 -0.0204750691 -0.0223995987 -0.108478487 -0.0789667591 0.00276051858 -0.0364289954 0.0240258034 -0.00772039779 0.0677978322 0.0111008026 -0.0301737618 0.129933059 -0.0297325328 -0.121423602 -0.00256420486 -0.0767344758 -0.0345042236 0.0232742243 0.0518034101 0.0377323851 -0.0785427988 0.0944864005 0.0168189276 -0.0450433195 -0.0179200061 -0.0332794897 -0.115497865 -0.079460144 0.0748219565 -0.0902453661 0.0155278947 -0.0175510496 0.095810093 0.081910409 0.0412754081 -0.124072641 -0.0311682243 0.0492392965 -0.0202937964 0.0275281016 0.0286301002 -0.0205335319 -0.06336198 -0.00144966797 -0.0174041037 -0.116765253 -0.0784229636 0.0826164782 0.0420248657 -0.0860279575 0.0445345417 0.0170288365 -0.0538485646 0.0218434893 -0.126645058 -0.081135571 0.100772187 0.074350059 0.0520832427 0.0342816785 -0.0332369693 -0.00494507421 0.0594232231 0.0195060819 -0.0653662607 -0.0566894747 -0.049552016 0.0946275666 0.0667197555 -0.0115687326 0.0609822571 -0.0733285174 -0.00757924188 0.128872409 0.131651253 0.0883550048 0.00453105802 -0.0755265802 -0.0273298975 -0.075551331 0.0423105136 0.103586905 -0.103956595 -0.0853345916 -0.0501238741 0.0979430974 0.0415611826 0.0830030888 0.026601227 0.0730280057 -0.0635615513 0.0683744699 -0.0418914109 -0.0580942109 -0.0672050193 0.000723240606 0.0774079859 -0.0247790851 0.0417027809 0.0464081317 0.0150757832 0.0990941375 -0.0262722876 0.0383368991 -0.0866433606 0.053820353 0.000491182785 0.0509168692 0.0996452123 -0.109139279 -0.0142310057 -0.0809439868 0.0759590417 -0.0160349142 0.0490121357 -0.096037291 0.0512820296 0.0240419395 -0.0778301433 -0.00461494876 0.0313165486 -0.0526363626 0.0636812896 0.0332127437 -0.0893361941 0.107702576 0.0753764287 -0.134968281 0.154497638 -0.0106210969 0.0807469338 0.0397658274 -0.0412878655 0.0725319758 -0.075096637 0.0352239423 -0.156253964 -0.105903931 0.00186598103 0.0204177406 0.0137510747 -0.0850754306 -0.0996614769 -0.137975514 -0.0964332893 -0.0970748141 0.0658250079 -0.0284603387 -0.0586091056 0.0547327399 -0.0909201056 0.0791378096 -0.135371462 0.0970040932 -0.0691698939 -0.0478290841 -0.091066964 -0.10993892 -0.0587934963 -0.149786964 -0.0152382096 0.104548037 -0.0258558169 -0.144351274 -0.0335272104 0.0226420816 -0.0596394576 -0.0499719083 -0.0401447415 -0.137909144 -0.0354104489 0.0517158546 -0.00912801269 0.100223139 0.0372407772 -0.0557585917 -0.00336286239 0.0683526322 -0.040298298 -0.00263097975 -0.0374882258 0.0522439405 0.0507735275 0.105002061 0.0763192996 -0.0607291535 0.0252055712 -0.00846379343 -0.0764852315 -0.0580886006 0.0643623322 0.0687340647 -0.0175391026 0.0497902818 -0.0412122346 -0.0626358017 -0.0582311451 0.126354679 -0.118473426 0.151346073 0.0104045104 0.0904658511 0.0403522402 0.00989431608 -0.104035281 0.0668206066 -0.0350457989 0.0594084747 -0.0234730225 0.0567279682 0.0705103427 0.0155637255 0.00617892295 0.0591375902 0.10290321 0.0125923716 0.0783741623 0.00137256691 -0.00307283737 0.0503848121 0.10381522 0.118850879 0.128660917 -0.053519316 0.0977203473 -0.0134721575 -0.0389264151 -0.00525255827 0.0452782214 -0.0551993214 -0.10694126 0.027695125 0.0864779726 0.0454558991 -0.0506804623 -0.0287189651 -0.0546144284 0.120786496 -0.0527668484 -0.0774859414 0.102176331 0.0673900619 -0.0448943712 -0.0719371215 -0.0406077392 0.0517419763 -0.133232012 -0.0570902154 -0.09013246 -0.0748804808 0.00943455193 0.0882416517 0.000705939427 0.0691983029 -0.0305666197 0.0502307639 0.0774589181 0.0290872231 -0.103126198 0.0543247163 0.0888695046 -0.0432999581 -0.0238669831 -0.0651162937 0.0898748636 -0.0334561318 -0.0923917145 0.00535089429 0.0831253678 -0.032534346 -0.103109762 0.0489915684 -0.0154016791 -0.0483072698 0.0992657989 -0.0456443615 0.0638154149 -0.00041857746 0.0412595235 -0.0256175622 -0.0011343424 0.0302553996 -0.0492172204 0.0441855341 0.0358452648 -0.125684917 0.0641204044 -0.101313218 0.0406814888 0.0231520366 0.00894289184 -0.0159130525 -0.0403623842 -0.0126857739 -0.0646654069 -0.0864315107 0.0479207449 0.0227875356 0.0891341716 0.0144964764 -0.128592268 -0.0647967756 -0.0491824746 -0.122899771 0.0843127072 -0.0399818346 -0.0702486336 0.0469990969 0.0585947372 0.00991726387 -0.0545531549 0.121398546 0.0267390348 -0.0134512298 0.0329682231 -0.0672333017 -0.0224784035 -0.00435023708 -0.0271258652 0.0712630972 -0.0160659477 0.0995363668 -0.0256949402 0.103435107 0.109910071 0.00650324021 -0.0404900536 -0.0908767134 0.0118982857 0.00520248339 0.0329482853 0.0144852586 -0.0797013938 -0.0785156786 -0.114622436 -0.0149816191 -0.0634922013 -0.0747183189 0.0377447829 0.00633793836 0.0832202658 -0.0870476142 -0.0112469308 0.051385209 0.00177763787 0.0805689245 0.0667984635 0.119763464 -0.0189604852 -0.0689202473 -0.00829955377 0.0841114894 -0.0166632887 -0.101568498 0.0870780572 0.0787321255 -0.101076506 -0.0728867874 -0.0815497339 0.059538722 0.0476107113 -0.0611895993 -0.055862911 -0.00502554746 0.0184646137 -0.0100589432 -0.141160175 0.0608552545 0.0207750183 -0.0828769058 -0.0782217011 -0.0249421597 0.0649304986 -0.0759224221 0.0226793531 0.0345480256 0.101637982 -0.0291147213 -0.020399509 -0.0961149037 0.0607593879 -0.0901033953 -0.00980376825 0.0093408674 0.0903950557 -0.0326510593 -0.0616331063 -0.0332476608 -0.0641225353 -0.0496507026 -0.058669664 0.117607869 -0.0409576073 -0.00356686814 -0.105142437 0.0766613707 0.0395114012 0.0188095663 0.0634850636 -0.0737257972 -0.0844153538 0.118897498 -0.000630921393 0.080352664 0.00662139896 -0.0893025771 -0.0714181289 0.081619963 0.0111359404 -0.0571513996 0.0548180155 -0.0636223927 0.125711203 0.0851431414 0.130260974 0.0836031362 0.061986275 -0.028846303 -0.0287329499 0.0502533987 0.115427487 -0.0506522879 0.127979293 0.119268231 0.0850080177 0.0331578441 0.0409094281 0.0090124933 -0.0136618558 0.0948067382 -0.0672471449 0.0505564883 0.032799989 0.0633241385 -0.0469509736 0.0506216548 -0.0372176617 0.0645158365 0.149505928 0.0132820019 0.0121845976 0.0295179803 0.0295598768 -0.130403206 0.0423673615 0.0379888043 -0.0185889266 0.0913859308 0.0504159145 -0.0536566004 -0.064247027 0.0357843451 -0.00891068671 0.0950773582 -0.116974562 -0.0360760242 -0.121625684 0.103534453 -0.110155627 -0.109326176 -0.0306907389 -0.124297231 0.0215684474 -0.107538059 0.115768477 -0.0631534979 -0.107648998 0.00836135633 -0.0481221005 -0.0226832405 0.00744933914 0.0239705071 0.00856848713 -0.0518919192 -0.0672201142 -0.0423557498 0.0152753228 0.0322034582 -0.0436891429 -0.0355248898 -0.0221560691 -0.0227099117 -0.0872905031 -0.074751161 -0.0961238891 0.0214987211 -0.0765815899 -0.101568431 0.0197522994 0.0158146303 0.0358287059 -0.0310186576 0.054503344 -0.0471081249 0.00175969047 0.0102003291 0.0548275784 0.0608831719 0.00927542709 -0.00995576289 -0.00546212913 0.124199063 -0.0787529647 -0.107979171 0.0664112717 0.00175410474 0.0996535346 0.053399168 0.0650362223 0.0413330421 0.059269011 0.00307723135 0.0968322679 0.0304244794 0.0847681016 -0.0587718002 -0.0920936614 0.0963051766 0.0730310529 0.075302057 -0.101675689 0.00834253523 0.0334894434 8.25827228e-05 -0.0221394673 0.0630398169 0.0403992832 -0.0181807 0.0733471513 -0.0055750059 0.0181060694 0.1169772 -0.00306291087 -0.0245710369 0.0374747738 0.0355481431 0.127457261 0.06669911 0.033534728 -0.0313876085 0.058364775 -0.0700249672 -0.0348450616 -0.0787659734 -0.111428857 -0.0750032812 0.1067295 0.0295113139 0.022092022 0.0761882439 -0.0214715526 -0.0454636477 0.0440265127 -0.0405539833 0.0178954173 -0.0918944478 0.0349099524 0.0980099589 -0.0593721792 -0.0717693344 0.0749724507 -0.10822311 -0.10527648 -0.0456449613 0.00945392437 -0.113418877 -0.0248292517 -0.151771814 -0.0317451209 0.00303221145 0.0136932479 0.0757390037 -0.0645068213 0.110142581 0.0331983566 -0.0726855695 0.0410212204 -0.0837602541 0.00736812409 -0.0960764661 0.0659725666 -0.0506423712 0.108358607 0.0074415463 -0.0579753295 0.0222589932 0.0219781511 -0.0769435242 -0.00365759665 0.0658315271 -0.0195193309 0.0876873434 0.0829789042 0.030799007 0.0445269085 -0.087823227 0.0490200967 0.0495685935 -0.0617967919 -0.00453193625 0.103787176 -0.0256911721 -0.0746461451 0.149433792 -0.00853996538 0.0359981731 -0.0535804741 0.10725081 0.0878978521 0.0258817542 -0.0147519195 -0.0875247493 0.0177521463 0.0229451209 0.0438379906 -0.0674143359 0.0837276876 0.0518606342 0.0602514297 -0.0148247061 0.0175807085 0.0104981009 0.0398374051 0.016279107 0.0897895545 -0.010169927 0.12621972 -0.152914077 0.102994591 -0.00934717152 -0.0707922205 0.088611111 0.106939681 0.112134047 -0.0540277697 -0.054023616 -0.0951209962 0.0558281131 -0.0773286074 -0.000430493499 -0.0220108796 0.00224742503 -0.042162884 0.0229496341 0.000386319705 0.0903915763 -0.0727334097 -0.0566792227 -0.0469854027 0.0666792765 -0.0901913494 0.0639531165 0.00190761709 0.0819069371 0.0437930077 0.073981382 -0.10753461 -0.0635947138 0.0397201367 0.0639339834 0.0106142825 -0.095933184 -0.0258502234 -0.151261196 0.0201133601 0.0523358956 -0.113775507 -0.0635734051 -0.0209280569 0.0180092286 -0.0952379927 -0.0805232748 0.0792436674 0.116160475 0.0405516624 -0.0603361167 0.0921702161 0.0638613254 -0.0797907561 0.0562291071 -0.0404303297 0.0192060955 0.0931882188 -0.0454974994 -0.063482672 0.0183900204 -0.0941224843 0.0311275516 -0.0276973266 -0.0228528716 -0.0103476569 -0.0013043856 0.0595675893 -0.0146932686 -0.0967626795 0.0205185581 0.0111512868 0.0304273423 0.0346512347 0.0115508316 -0.0314554684 0.0335132703 -0.0399859101 0.0783086121 0.0110251317 0.050887987 0.0386743098 -0.018033972 0.0640587211 0.0695254728 -0.0274955798 0.0315612257 -0.0987086147 0.0660334751 0.108960167 0.0362012573 -0.0556706525 0.0763316229 0.0343016721 0.0549547151 0.0566200167 -0.00617094245 0.104899995 -0.0148995249 -0.0461326651 -0.052078858 0.113826625 0.042423591 0.0696527734 0.0174295567 0.0255777556 0.0323791206 -0.085186027 -0.0352433883 0.0130573669 0.116727203 0.0527772866 0.0953754038 0.0984134078 0.0301339664 0.0283296034 0.0112838252 -0.041340284 -0.106966309 0.0208709706 0.0510318168 0.0411410108 0.0704910904 0.113985598 0.0480646491 -0.101439185 -0.101105615 -0.0554792546 -0.0963118672 -0.0833592713 0.0804136619 -0.0818424746 -0.0130467992 0.0993848965 -0.0514523238 0.0999550074 0.102077879 0.00966593996 -0.00935996324 -0.0179428924 -0.0360591672 0.00619822368 0.0243546553 -0.142853007 -0.0114681982 -0.0543433689 0.0218674429 -0.0637027845 0.0662505031 0.11204917 -0.0893480182 0.0857268497 0.103264339 0.0781002343 -0.0893782303 -0.0274790041 0.0431495346 0.0856630653 -0.12378367 -0.0509530865 -0.0479679741 -0.0808392987 0.0511769354 -0.00993785262 -0.0495909974 0.00732931681 0.107190818 0.0212429408 0.0919175819 -0.0032403795 -0.0621873438 -0.0842421055 0.0878323093 0.00147393253 0.0229070615 -0.0386694148 -0.0345502682 -0.0645541772 0.14156653 -0.0889476463 0.0902122259 -0.0681383684 -0.0405545346 -0.0987435952 0.0225519631 -0.118827663 -0.111183643 -0.0112256492 -0.00946287438 0.0775573999 0.0200256836 -0.0373974107 -0.078532733 -0.108547017 0.0992447287 0.0162392482 -0.0711892024 0.0401137359 0.0209429767 0.000362629071 0.0647842437 -0.0358259976 0.00750721199 0.0491359942 0.0709332824 -0.105451792 -0.0134563902 0.0872533396 -0.0307084043 -0.118991949 0.0960125998 0.0121480636 -0.036713779 0.0374878086 0.0718258396 -0.0660520568 -0.00429979758 -0.055313319 0.018989075 0.0844045654 0.0639191419 0.0425145887 -0.0436811857 0.0248131063 0.0507366285 0.00984115712 0.0211421121 -0.0417334475 0.127702236 -0.142305464 0.038462583 -0.100248791 -0.0598390587 0.0798201784 0.0749086887 -0.0129145803 0.0493668057 0.0832006335 -0.00326930895 0.0621138252 0.116234139 -0.0619600303 -0.0258555952 0.00560154766 -0.00271366001 -0.0680233538 0.039063748 -0.114920385 -0.0542362481 0.0695442334 0.0281284824 0.0585357882 0.125471935 0.0688281953 0.0719351396 -0.0179130882 -0.0254238006 -0.00948760845 -0.0995621756 -0.0127528915 0.0291331895 -0.0169871729 -0.00137848861 0.126049355 0.0243894756 -0.00514754048 -0.0438758358 -0.070057936 0.00142127706 0.0820695385 -0.0231800079 -0.0708072856 -0.0734865814 -0.114026025 -0.0061859726 -0.0585030317 0.0943298936 -0.0582126155 0.064423196 0.0419933088 0.0116295256 0.0170936771 0.0498891808 0.0110197524 0.0411308594 -0.0257459451 0.0114618847 0.0878219977 -0.0317848064 0.0811458603 0.01887214 0.00988883246 -0.0506531522 0.0625907555 0.0145452367 -0.112982243 0.0802996382 -0.0328567512 0.0700641721 -0.00277703465 -0.0246732663 -0.0414474681 -0.0930275917 0.0817583874 -0.0246985424 -0.0693705902 0.0860790238 0.0245301407 0.028220322 0.0357720293 0.0410393327 0.0705156475 -0.063267082 0.050686691 -0.0218410157 0.0550663397 0.0759022906 -0.0350831598 -0.0160008334 -0.115162447 -0.0647135377 0.0396890379 -0.0345642604 0.0103187198 -0.0589025803 0.0834977105 -0.0107147945 0.0380949751 0.0866653398 -0.0723311082 -0.0372112989 -0.000454910012 0.0213319007 -0.00432507833 -0.0310348179 0.0425887331 -0.0940774977 -0.0323967934 -0.0242477451 0.117995851 -0.0160061121 0.0213480443 -0.0668758824 0.114949614 0.0316681191 -0.0759480372 -0.0610279627 0.0633142143 0.0236565657 0.0845542625 0.00935758371 0.0250929277 -0.0281674396 -0.0359582417 0.0694757774 0.056437064 0.016289724 -0.043686077 0.0887322947 0.000600125699 0.0521455668 0.0419055298 -0.0610189848 -0.0224667937 0.0316987857 -0.0323978439 -0.0178262964 -0.0366154872 0.0907478258 -0.0856860802 -0.110066622 -0.0315983742 -0.0946494043 -0.0222084317 -0.0352201238 0.0455912501 0.0811657757 -0.0895951316 0.0279459916 -0.0952548608 0.113056384 0.00558312191 0.050939288 0.124181278 0.0341638587 0.00255426345 -0.033172816 0.0153816594 0.104887553 0.0244834907 0.0457413457 -0.0520596057 0.095031105 0.0351452902 -0.11665196 -0.0497119017 0.050630711 -0.0861758068 -0.0872503743 -0.0496218018 -0.00381143531 0.109498873 -0.0175776016 0.00528071402 0.0259748194 0.0909558833 0.0579428524 0.139967725 0.0764526948 0.00463831052 -0.0771861747 -0.144396409 -0.104723662 -0.0137682576 0.0223192684 0.0313319825 -0.019306751 0.0563000366 -0.00657232618 0.034467455 0.0391030945 -0.0310320668 -0.0617044605 -0.126123548 0.0184416007 -0.051189024 -0.0356684178 0.0274483245 -0.0450351276 -0.0650538877 -0.000781424344 -0.0433340222 -0.0677636564 -0.0434984639 0.0460208468 -0.0124574052 0.045673795 0.0250319857 -0.0097975824 -0.000288532581 -0.0693829432 0.0817056447 -0.0367149822 -0.0803469568 -0.0272673164 -0.0125355599 0.108721487 0.0555210412 -0.0362726599 0.00188711134 -0.0225252602 -0.0467403233 0.0223246478 0.0924254134 -0.00607204111 0.0348412544 -0.0419691056 -0.0352974981 0.120286591 -0.0532627963 0.0599474981 0.117449939 0.0250896253 -0.0453546159 0.0333019607 0.0678343773 0.0618110187 0.0790082738 -0.013288267 0.0124899093 -0.0627008379 -0.0927575454 0.0872658491 -0.0858765841 0.0804509819 0.115199946 -0.116047971 0.114141606 -0.000370875583 -0.0547132045 0.0655369386 -0.105160132 -0.0587072149 0.00996344257 -0.0588719957 0.143944472 -0.0636086613 -0.0625388771 0.050297644 0.01688735 0.0503490344 -0.0187371671 0.0263831038 0.0351513959 -0.0622758158 -0.0289025009 -0.00445907749 -0.0819463283 0.0783530101 0.00172135397 -0.0108682076 0.053622894 0.0898650363 -0.125441834 -0.0469661765 -0.0740193054 0.0146140624 -0.074739024 -0.124391489 0.000910399249 0.100878544 -0.00938480534 0.10746365 -0.10205555 0.0822874457 0.117170572 0.065103583 -0.0468601808 0.0443411134 -0.0220601298 -0.0349924974 0.0565963052 0.024442032 -0.000596265076 -0.0503311418 0.0310966447 -0.0986445844 -0.0381193534 -0.107975848 0.041932255 0.0274684485 0.060344439 -0.0951578543 -0.00214851787 -0.0242667589 -0.00569425896 0.058796335 0.106092222 0.0197916087 -0.0124082975 0.0247668065 0.0231674556 0.0468560532 -0.000621114043 0.0964491889 0.0251123365 -0.0552343167 0.119750619 0.043985635 0.00931171793 -0.0136433262 0.091603227 -0.0712718666 0.0786479861 -0.0408394635 0.0966731384 -0.0660808533 0.0769225433 -0.0086235553 -0.105327964 -0.017321486 0.0972045138 -0.077172406 0.0514651127 -0.0781937093 -0.108713485 0.112203546 -0.0802456141 0.121202722 0.11901883 -0.0931790471 -0.0164292976 0.0312756896 0.105686158 -0.0783906654 0.0468474701 0.0110720228 -0.00267141312 0.0711446702 -0.0328070559 0.0175967477 -0.0356302932 -0.0124149965 0.0686402246 -0.0505678542 0.129400566 0.0214219112 0.0196880996 0.104359493 -0.0519865453 0.052242592 0.00997835957 -0.0990768448 -0.0456322841 0.0230734646 -0.0203887951 -0.0376775041 -0.07378342 -0.0275467373 -0.0691813529 -0.0821307749 0.0593699105 -0.0246762205 -0.101402849 0.0678628758 -0.00588039402 0.106908754 0.00991031248 -0.115229808 0.0721573606 -0.0429049321 0.0701546818 -0.0851534382 0.0652838498 -0.0788848251 -0.0332299247 -0.0408851914 0.00320880138 0.0196518935 0.0832488984 -0.0366014726 0.01875652 0.053820096 0.0153092891 0.0467731841 -0.0368329771 0.0111917052 -0.0291276965 0.0631685331 0.0357577875 -0.0179604348 0.00486189499 0.0305900779 -0.0209680013 0.0740240738 -0.0749756619 -0.121885069 0.032916151 0.00262444629 0.00849013589 0.0662304983 0.0742840394 -0.125835225 -0.0522070974 -0.113991506 0.0744321495 0.0978048667 -0.00498304795 -0.0719037652 -0.0263758246 0.0775782466 0.0118285939 -0.0350849591 -0.0356183834 -0.0106398668 -0.0223848727 0.0236225128 -0.120488241 0.0512224138 -0.041531492 0.0656389818 0.0879166201 -0.0274794661 0.0920548141 0.0264546964 0.0468961522 0.0466408059 0.0399114974 0.0449604504 -0.0700372905 0.0427690521 0.100792646 -0.0325727239 0.0341325775 -0.114281707 -0.00742708845 0.0433078147 -0.10298638 -0.104879826 -0.0632601455 -0.0102007883 -0.0802601725 -0.00294449297 0.00281117624 0.104717933 0.0612074584 -0.0467934757 0.118006982 0.117171124 0.106652826 -0.0225161687 0.0956271738 0.0270829834 0.0200848412 0.0168362167 0.0149010466 -0.0440483093 0.117436014 0.0664134845 0.0302254353 0.0535751954 -0.00848081987 0.0130929723 -0.0325898565 0.128652498 -0.0109613249 -0.00605653459 0.0190438107 -0.0259028617 0.0401356928 -0.00171622215 -0.104202524 0.0845206603 0.12883538 -0.0919445157 -0.00195987965 0.0736826509 0.0480313748 0.0636631101 -0.0408567712 0.0155776199 0.0579566024 0.134289131 0.0326339938 0.0374059007 -0.0366481617 -0.0377640799 -0.0160350259 0.0126323858 -0.0398559012 -0.0692037791 0.00400359975 -0.106625289 -0.0896666497 0.119961634 0.129376546 -0.0542201884 -0.0679891706 -0.0174552612 0.0752889439 0.116622798 -0.122125328 -0.0475201905 0.0421101414 0.00309556024 0.0322735868 -0.097082302 -0.0326796286 -0.0467596054 -0.0276475735 -0.088789694 0.0212633777 0.0486687906 0.108680114 -9.16656572e-05 -0.0739132911 -0.00859406963 -0.0290659312 -0.0827777684 0.151205987 0.0135993576 0.0095570432 -0.142162323 0.00178637984 -0.0175982956 -0.00341362623 -0.116277464 0.127863139 0.155240506 0.0902651772 -0.0665329844 -0.0343229175 0.00592056988 -0.0689622238 -0.0882099047 0.0423216335 -0.0418753251 0.0649065152 -0.148054436 -0.126529023 -0.107144743 0.048180446 0.0964411348 0.0283801127 -0.147997066 0.0730884373 0.0641160384 0.103855938 -0.0519286469 -0.0627045557 -0.123388886 0.106936358 -0.100495324 0.036348857 -0.0662566945 0.0681579113 0.0571975075 -0.0754548088 -0.0399843156 0.0368984528 -0.0353834778 -0.0517093278 -0.123962395 0.0263090748 -0.130231589 0.0996464565 0.0178089179 0.0458062775 0.0963696018 0.0762125254 0.0340860561 0.109043621 -0.0622775555 0.113345571 0.110095598 -0.100886367 0.0148935774 -0.123453058 0.0149683403 0.0882795379 -0.0308197234 -0.00579763902 -0.0442597829 -0.0558761358 0.00445035286 0.0967673883 -0.0510170944 0.1171581 0.0859833658 0.00351574784 -0.0282143541 0.0262071025 -0.0563719533 0.0486262627 0.0520373955 0.0674998388 -0.0777793005 -0.019211974 0.0729704723 0.0623332597 0.0411960185 -0.00379213877 -0.0160260908 0.101385273 -0.07236664 0.118454322 -0.0279059727 -0.00111757044 0.108530454 0.0313402973 -0.109885067 -0.00746698584 0.0517579988 -0.102587014 -0.062045224 -0.0723215193 -0.00753403036 -0.0194992591 -0.055590637 -0.110146999 0.0563573688 0.000793169835 -0.0437380224 0.037614204 -0.103893019 -0.11840263 -0.0892521739 0.0177615266 0.0299307667 -0.0603615977 0.0103125488 -0.0940437391 -0.0742155388 -0.00279134372 0.116551526 -0.0507049747 -0.0112416446 0.0206989124 -0.0475890413 0.00135824515 0.0360365659 -0.0638581216 -0.110917598 -0.0285419766 -0.0785639212 -0.00673839869 -0.072663039 -0.0943017006 0.0209225155 0.0458435677 0.06708619 -0.00773984846 0.130092591 -0.0302911103 -0.094658874 -0.106029265 0.0598360002 0.0363203026 -0.0587395169 0.0218387116 -0.00121726026 0.0923015103 0.0268146414 0.00547261769 -0.0118542481 -0.0527351797 0.0381134599 0.0549164079 -0.0742723569 -0.00661152741 -0.0885568187 -0.146828458 -0.145066977 -0.0526844971 0.0974245518 0.0119285621 -0.148422763 0.138095111 -0.0548562445 0.0224515107 -0.033984974 -0.0918067098 -0.0412526764 -0.129727185 -0.091969721 -0.0195525698 -0.0304857362 -0.114038028 0.123106226 0.0100026429 0.0864370763 0.0507619679 0.00284729549 -0.00105298625 0.0372510068 0.0406655185 0.035562437 0.0691316351 0.0814873502 -0.0958798006 -0.0228097066 0.0195914619 -0.027246682 0.0997626036 0.0630631745 -0.125293136 0.0745200738 -0.0511293188 0.0464217141 -0.067329675 0.018248735 -0.10921976 -0.0231118333 0.0425507538 0.0270119589 -0.0716171041 0.0184950344 0.0490458496 0.0652568191 0.0191503335 0.0326661766 0.00589203555 -0.0994252041 0.0639910772 0.0935874581 -0.0105717117 0.0074147 -0.0258986168 0.0828858837 0.00915369298 -0.031141039 -0.0613915138 0.0385154858 -0.098638989 0.0779575929 -0.0105742011 -0.0758871809 -0.0109963436 -0.00425893022 -0.0998037308 0.0786853656 -0.00168336509 -0.0925417468 -0.125538707 -0.122153223 0.0821714997 0.124297135 0.0863585621 -0.0707112625 -0.0507845916 0.0522913001 0.0209270567 0.0663688928 -0.0528395213 0.120615751 0.0676541924 -0.024552837 -0.0572560132 -0.0213594604 0.10777957 0.101967193 0.13353315 -0.0903856754 0.000669586763 0.0565047972 -0.0825800672 -0.062248636 -0.014425775 -0.0738483593 -0.00286239828 0.0884365365 -0.105007574 -0.0629888326 0.0934715867 0.110788323 0.0860318914 0.00210579997 -0.0725004748 -0.127494186 0.0919124186 0.110653833 -0.0781571791 0.00416795025 -0.110430084 -0.0848361403 0.00444030436 0.116966464 -0.0922116861 -0.0228395946 -0.00113955385 0.00864992663 0.0542619862 0.0738494545 0.0541707687 0.0777184516 -0.0970832705 0.0126359928 -0.0184956864 -0.0622084662 0.0451156609 0.079605639 0.012876017 -0.0658479407 0.0148149095 -0.116397806 -0.084576413 -0.100534178 0.0414143018 0.0586240441 0.0751088932 -0.0757061094 -0.0277423412 0.134908676 -0.033364512 -0.0541506857 -0.0223149844 0.0424581356 -0.0582377762 -0.0225137156 0.0737239867 0.0508049503 0.0808460936 0.0816969201 0.0865024626 0.0526327603 0.00781754963 0.0650917143 0.0286054742 0.0803678334 -0.0147822825 0.0908693671 0.107531264 0.0507709011 0.0359385237 0.0059293001 -0.00380560011 -0.00963426009 0.0474996306 0.0564068668 -0.0188577659 0.0513748489 0.0650842935 -0.0713231862 0.0369577892 -0.0133927027 -0.0248449575 0.0464835763 0.0194541477 0.0589518249 -0.0366000384 -0.00920657907 0.0802554563 0.107316345 -0.0738855079 -0.041999802 -0.061523933 -0.0305218492 0.156987384 0.0346348062 0.0126057826 -0.0152376043 0.0502441861 0.0664728656 -0.0426487103 -0.0210334025 -0.0939696729 -0.0341476351 0.0202106778 -0.0405531637 -0.0474039763 -0.122032635 -0.0849575177 0.0358551703 0.0997392237 0.0705704316 -0.0954606384 -0.0823383406 -0.0335653499 -0.121191315 0.013300227 -0.0529904217 0.0114150038 -0.0746903941 -0.03891664 -0.0819836557 -0.0729287639 -0.0179754216 -0.0102402242 -0.0607133955 -0.0891000032 0.00797273777 -0.022605991 -0.0809449404 0.0195738394 -0.134260491 0.0545250103 -0.0856008008 0.0678676516 -0.0582477748 0.0922091454 0.0154068666 0.0158708468 -0.12760596 0.0564289317 -0.0613937639 -0.0670805797 -2.70218661e-05 0.0686709732 -0.0105099222 -0.0782819167 -0.126448721 -0.0540316962 -0.00444437843 0.0426672585 0.0511715114 -0.0664667189 -0.117823824 -0.0813703611 0.0417935252 -0.075663574 -0.0227859747 -0.0927959681 0.00258619245 0.0769350082 -0.0262786634 0.15649274 0.00548388064 -0.0489241667 -0.028820714 -0.11300534 0.0169768985 0.0593335219 0.107664488 0.0454386249 0.127441257 0.0917166471 0.118455522 -0.089602381 -0.00173335895 0.039862778 0.030519424 0.0692663789 -0.0869349092 0.0913121849 0.0777036697 -0.103544652 -0.0235558059 -0.00459441263 0.0603882074 0.092305325 -0.0165223666 -0.0444068499 0.0616826303 -0.0684993789 -0.0543850996 0.02927945 0.0985921547 0.0364675447 0.0572966635 0.130570352 -0.109130152 0.0495605171 -0.0749507546 0.0501365066 -0.119342029 0.0564106107 -0.0683375373 -0.10465467 -0.10835091 -0.0725807771 -0.061554186 0.0320482478 0.0128553994 0.0586562306 0.0384311117 0.0629496649 -0.0201265886 0.00698905718 0.0514870435 -0.100664191 -0.0316986516 0.00712753553 0.0836560577 -0.129708961 -0.0514545329 0.127032861 0.0874415487 0.0538004041 -0.12563847 0.0540097728 0.0164953042 0.0776124969 0.0573409572 0.0104391398 0.0330088995 0.0108658904 -0.0540808886 -0.0545005389 -0.0912899747 0.039707467 -0.0845631137 0.00952329952 -0.132783711 -0.00215431862 -0.00721834227 -0.0975917131 0.0458213203 0.0200680383 0.0877264142 -0.037273623 0.0161861442 0.110501729 0.0362346061 -0.0118159521 0.06564527 -0.0172634032 -0.0569092482 -0.0533066876 0.049190376 -0.0601144843 -0.0810378492 0.0477174371 0.0174570587 -0.0335904099 -0.133734539 0.0280133393 -0.0691580176 0.0767375976 0.138999552 -0.120073155 -0.0186303947 -0.0229670238 0.110347301 -0.0162759181 -0.125030503 -0.0610758066 -0.0921585709 0.0763650015 -0.0880154371 0.0487306491 -0.0280137099 0.013374065 -0.106856197 0.0210058708 -0.0297262985 -0.100724012 -0.0436595678 0.0109883668 -0.0853584632 0.0911873952 0.0432055667 0.00453142403 0.0127071068 -0.0517662428 0.0676092654 -0.0881505087 0.0098452922 -0.0823895931 0.026693739 -0.0401734523 -0.0659064725 0.0980170965 -0.0178305618 0.0141226156 -0.0129480297 -0.0210044179 -0.052819252 -0.00334462686 -0.0292001851 0.102371179 0.0348540843 0.0185807701 -0.0405740775 0.0680675134 -0.0530634858 -0.0756660923 0.000960345787 0.0799955055 -0.00204092567 0.106765963 -0.00071719382 -0.0700320154 0.0350265689 -0.0876352489 -0.0223301966 0.0634540915 0.046514608 0.00813020207 -0.0424726084 -0.0671557188 -0.0563678183 0.00290626287 -0.0465417765 0.0410110131 -0.0751074255 0.0387596823 0.0907988921 -0.0267684907 0.0381636135 -0.0729710087 0.111388907 -0.00196558004 -0.0716333836 0.00468423311 0.0505745411 -0.0488672592 -0.0919072255 -0.0417954288 0.0445425287 -0.133938208 0.0828562379 -0.0701478645 -0.086428687 0.031541761 -0.0582731441 0.0219943449 0.0624012351 0.0130704865 -0.0401517116 0.0552026331 0.0567754321 0.0710184798 0.00758881867 -0.0124910641 0.0352796912 -0.0257061403 0.0308687277 -0.0213920511 9.48071975e-05 -0.0383036099 0.0287866332 -0.0110332398 0.0567016639 0.0603689998 0.00948372204 0.0733415633 -0.057091359 -0.0315212272 0.0307328366 -0.047477711 0.0429863371 -0.0265197102 -0.0657150894 0.0344378874 0.00179678167 0.010823926 0.0920330659 0.0393380597 -0.117785789 0.0551497154 0.0594726615 -0.0610899888 0.0493407547 0.0192096289 0.018806411 -0.0479772612 0.084946245 -0.0363575257 0.0180488937 0.0628424212 0.047983963 -0.00609665224 0.0490711816 -0.0524433553 0.0299023781 -0.134415284 0.0352817513 0.112402901 -0.00522935297 -0.108842514 -0.100159943 0.0536962375 -0.0482212268 0.033140216 0.0652505681 -0.0243575778 -0.00743648084 0.109651551 -0.0922743604 0.00548604131 -0.00986081176 0.0220172554 0.0101632392 -0.0740579367 -0.127274752 0.123685867 -0.03647862 -0.0687318668 0.00219690241 0.0450170375 -0.115679674 0.0136389351 0.103024855 0.103029318 0.110668465 0.00388179533 -0.0727895275 0.0803632215 0.0280900523 0.0280285254 -0.0467234924 -0.0731821135 -0.130512729 -0.0447000824 -0.107333377 -0.0769113675 -0.0171804633 0.0346588232 -0.0489323661 -0.0676056147 -0.105796985 0.0340438746 -0.0489065722 0.0679880902 -0.138374269 -0.10602235 0.0318014435 -0.116686605 0.0709820092 -0.0534674339 -0.00963871367 0.117157362 -0.0614339933 -0.0393194258 -0.0876544267 0.00427854154 0.0532199927 -0.044476755 0.000165691803 -0.0572972745 0.0502985567 -0.0602571145 0.12552923 0.0100918449 -0.0934075043 -0.0230401549 0.0295511037 0.0494234338 -0.0848071203 0.0259556789 0.079481706 -0.070821397 -0.0908804163 0.0531463176 0.0720867738 -0.0683894381 0.0868811682 -0.0569350533 0.0212230869 0.0892836973 0.0608907081 0.0264557358 0.117087588 -0.0282123219 0.0711553991 0.10003607 0.0398537852 0.0925926194 0.0799961835 0.106195562 0.0835256651 0.0742167607 -0.1288362 -0.0119199455 0.0209458005 -0.0807825178 -0.090900667 0.0211038124 -0.0566857532 0.031491559 0.0864643902 -0.151924461 -0.0301744267 0.0409735925 0.111272551 -0.0702080205 0.0960132927 0.108262971 0.0476099811 -0.00588667253 0.0851650238 0.010426431 -0.0125091802 -0.101221651 -0.0594466254 0.0449221432 0.0909607708 -0.0161867402 -0.0196655095 0.0530647635 0.0182610322 -0.0709991604 0.00761050964 0.0762544423 -0.0591728203 -0.0941646695 0.024339376 -0.0626933351 -0.103930928 -0.0321634226 0.0614973754 -0.118808359 0.0537795126 -0.0431177206 -0.116265662 -0.0131834941 -0.00236911164 0.0999807268 0.110797547 0.0114548178 -0.0998885259 0.145602047 0.111171857 -0.0988338813 0.0828444064 -0.00907499064 0.00587745896 0.071559459 -0.13120684 0.060727559 0.0683118403 -0.0836969465 -0.0657875538 -0.0527593195 -0.115409821 0.0975560099 0.0509091392 -0.0899974853 0.0956521481 0.0244503263 0.0126064662 0.114975713 -0.0405928865 -0.0483787097 0.0214200635 0.00839174818 -0.046142336 0.0722193867 0.0103485761 0.107258148 -0.0378706008 -0.0450719856 -0.0208163224 -0.00753858453 0.0927842855 -0.0761416107 0.0222469252 0.0221501626 0.103600152 0.00779794529 -0.0233703442 0.012515164 0.0401047952 -0.0354013927 0.0374246277 0.0105700931 0.0782879516 0.0528782457 0.0609663725 0.121859848 0.0579176694 0.0506275222 0.089213632 -0.118543468 0.037998043 -0.0670093521 -0.13410008 0.0411561765 0.0282420814 0.0700198412 0.0285238512 0.00444778334 -0.0674335882 -0.0760352165 -0.0417404994 -0.00990704726 0.0137323095 0.0390490703 -0.05264882 -0.0782701373 0.123745263 0.0234257653 -0.0788318142 0.024272114 -0.0218193699 0.0922509953 -0.00793454051 -0.0463180654 -0.113730133 -0.0577475242 -0.00455238903 0.0886773691 0.0554309674 0.0941009894 0.129545763 -0.021742925 -0.081449911 -0.0626695529 -0.0395893045 -0.036148537 -0.0715967566 0.00607152935 0.0371897854 -0.0568351522 -0.0322895311 -0.104940452 0.0889158696 0.0984952897 0.0380211174 0.109986477 0.00241002371 0.0807410851 0.0208322443 0.0305939745 -0.109555371 0.0584459454 0.0174565129 0.0446334742 -0.00203529699 -0.0710110068 -0.00075355859 0.00535989506 -0.00548237702 -0.0412316248 -0.10585098 -0.0147320451 -0.000358470366 -0.0302088298 0.0840577036 0.0291063283 -0.0138413198 0.0101552876 -0.0291738547 0.0731139556 -0.00815887749 -0.117785364 -0.0678437576 0.0784235671 0.0205320921 0.0843389407 0.0271183364 0.000885073736 0.0346361846 0.0954925418 0.130852431 -0.0751837641 -0.0253316611 -0.0976730809 -0.0676677674 0.0476101562 0.0662705749 -0.0301036816 -0.049003385 0.0106137209 -0.11252144 -0.0744634122 -0.0980421826 -0.0166393481 0.0481715277 0.0189415459 -0.0880238637 0.104637556 0.10711097 -0.0564402714 -0.0601721779 0.0159785729 0.0403848588 0.0139113516 -0.108862996 -0.00510752294 -0.0115773957 -0.0455890708 -0.0705545172 0.0851743072 -0.0565754622 0.0915891081 -0.0679899007 0.140255541 0.0228361152 -0.0286569875 0.0132950023 0.0172942225 -0.0447521694 0.0719969049 0.109762378 0.0197884869 -0.0228357762 -0.0507639088 0.0672996938 -0.0723399743 -0.00699901069 0.103675373 0.00931620412 -0.0457025245 0.0969348028 -0.0169527791 0.0237304047 -0.0895861909 -0.0194170661 0.0993531495 0.00229117088 -0.0562044792 0.0336305238 0.0490789376 0.0386500955 0.0127196591 0.00393643929 0.00232719886 -0.0829996243 -0.121718653 0.0753233731 0.0201976635 0.082195513 0.0600713976 0.0539501272 -0.0779756531 0.0625429153 0.00583441136 0.00761622144 0.115591303 -0.0334634334 -0.0245005973 -0.00851792749 0.0614887588 -0.0673773736 -0.0610792227 -0.0935594514 0.0109041268 0.0797310621 -0.054385256 -0.0951922908 0.0503217317 -0.105528668 -0.111157358 -0.0676904768 0.0699280798 -0.0395813137 0.0542365499 -0.0423914567 0.0401284434 -0.108586438 0.106151514 0.0741012841 0.0261538271 -0.0234557595 0.025597224 0.128329813 0.075639084 -0.0326176621 0.0483325124 0.0441246293 -0.00211445754 -0.0714289173 -0.028952891 -0.0652696118 0.10838379 0.0806302279 0.0361339003 0.000588985044 -0.0501025841 -0.0767339468 0.0711772069 -0.109505966 0.0615409054 -0.00328358519 -0.0599714369 0.0810240135 0.0322509259 -0.105258301 0.00425739167 -0.0571172498 0.011786839 0.0552534238 0.0161224175 0.00839114189 -0.0516668111 -0.0663074031 0.0675223991 0.00408511516 -0.0934429094 -0.00482452614 -0.0146074528 0.116948992 -0.0514550433 -0.0092106685 0.0192382932 0.0676550567 -0.107134365 -0.0554183982 -0.0378831327 -0.00111221685 -0.111972146 0.0370764211 -0.025367327 0.0421501771 -0.034930028 -0.133550882 0.0460590795 -0.0408849232 0.0172165278 -0.0828626677 -0.142174855 0.0368294381 -0.0315607302 -0.0813754499 -0.0344028175 0.024872696 0.115811288 0.0284592416 -0.0183729436 0.0680122226 0.096686542 -0.0504275933 0.0555682927 -0.0117671303 -0.106590241 -0.0779706761 -0.0753313005 -0.01689367 -0.0120249027 -0.121162862 0.0904329047 -0.0305291414 0.115454301 -0.0964366719 -0.00301298662 -0.0242908541 0.0638555288 0.130841374 -0.10721194 -0.108135305 0.0741211176 0.0291729122 -0.113760702 -0.0604273416 -0.0709419549 0.139797956 -0.0342961662 -0.0201933645 -0.112033077 -0.0611329861 0.0254594646 0.0681489855 0.0438128486 0.0619565509 -0.11234884 0.0307241976 0.0981715992 0.00958520174 -0.0568824336 -0.0283987094 0.0636719465 0.0751391798 -0.114547461 -0.0748261958 -0.0586201847 0.000263712311 0.00999936834 -0.0185962841 -0.08149115 0.105373196 -0.00710947951 0.000694327406 0.0461735427 -0.0157357287 -0.0240037851 0.0580982715 -0.100919247 0.111699469 -0.0659800097 -0.00773917325 -0.0232742541 -0.0999335274 0.0753451958 -0.051228717 -0.0892171562 -0.0358730741 -0.0996832997 0.0939411744 0.00339663634 0.0281091705 -0.0894726738 -0.00704634562 -0.07571394 -0.0520362742 -0.0228929147 -0.0211675484 0.0902847946 -0.0659550056 0.11681138 -0.0221742485 0.0613481849 -0.0157842189 0.0737548843 0.00965575501 -0.0838649422 0.00177340093 0.0528827235 -0.11357832 0.100755192 -0.0534734391 -0.0358427912 0.144596636 0.0647218004 -0.0423597619 0.0605341755 0.0732165053 0.0211333074 -0.132864833 -0.0364422947 0.0478251725 0.0119115161 -0.00161979138 0.0571164563 -0.0192272216 0.00423192605 0.0322174877 -0.0759943277 0.0137448525 0.00333114085 0.0397117473 -0.0238687452 -0.0832956731 -0.0279709753 0.0938207209 0.0343491249 -0.0871218666 0.0408064276 -0.116285831 0.0795068964 0.0848761573 -0.046004314 0.0709750503 -0.10165219 0.113097928 -0.0288509876 0.0324093103 0.120680496 -0.0231527574 0.0672659576 0.0315056667 -0.0114693092 0.0518258587 -0.0120203597 0.0803814754 0.034682408 -0.0337615535 0.0386820808 -0.0665814355 -0.0311845411 -0.0300064813 0.000839714077 0.0999553874 0.0476576835 -0.0808109865 -0.0593067668 -0.0203568004 -0.0215366837 -0.0306633245 0.0436415672 -0.0116597321 -0.0583168492 -0.0336035974 0.0782463998 0.019496616 -0.113910265 -0.100025997 0.0590599142 -0.0404247232 0.0250362512 -0.114405021 0.0491880253 0.0765769333 -0.0574881397 0.0377197377 0.0666196123 -0.0882478282 0.12657164 -0.072820656 0.0688454062 -0.0431665219 -0.0397588946 0.0174000375 -0.0294575971 -0.113417856 0.0438238829 0.0227670204 -0.0407737307 0.00265934458 0.0681627318 0.0419177152 -0.0144360522 0.0160792209 0.0458587408 0.0618349649 0.0176743343 -0.0137938112 -0.0633563772 0.0310720019 0.0681380481 0.0212434102 0.0676606894 0.041460555 0.110738382 0.0473630205 -0.0378823988 0.0510912016 0.00387570239 0.0408548079 -0.0602159686 -0.129621208 -0.00288634188 0.00880218763 0.091095008 -0.132332921 0.0251508802 -0.0712097511 -0.103759281 0.0195465796 0.0568811819 0.0538945012 0.140887022 -0.0393595546 -0.00443352573 0.0130884098 0.0407299697 -0.0129098492 0.120743103 0.02760542 -0.129898503 -0.00857560523 -0.0720292479 0.0491493791 0.102326475 0.0921848789 0.0479474291 0.0605254434 0.0442970507 -0.0229930989 -0.101877302 0.120247759 -0.0461608209 0.0228956696 -0.0468554012 0.0931479931 0.0344555005 0.01930671 -0.0351826884 0.0757239461 0.0770438984 -0.0225172918 0.0203145165 0.0127433063 -0.0512304567 0.070727922 0.011212891 0.0631789118 0.061186403 -0.0620558858 -0.0242845789 -0.0357322469 0.0807544664 0.0694836825 0.0275604576 0.048231598 0.0312998705 0.0098650381 -0.0849438533 0.00338348607 -0.0562642589 0.062496379 0.00795244705 -0.00989409816 0.0154066579 0.0518637821 -0.103680417 0.00535006076 -0.104885489 0.0388466492 -0.0735442638 -0.0991858095 -0.0114390533 -0.024811225 0.0624354333 0.106003806 -0.0726236701 -0.106910177 -0.0611559413 0.0201094151 -0.052110929 0.0187292732 -0.0280273762 -0.101260066 -0.143803522 -0.1252902 0.0355423726 0.041179236 -0.126104265 -0.0216143429 0.0806514397 0.00608616043 0.0657909364 0.0178345367 -0.0923066512 0.0481731519 0.145438254 0.0159616042 -0.0456462018 0.0941475853 0.0632876828 0.0367626883 0.0247407742 0.0601012856 -0.0355465524 -0.0118422816 0.0488038473 -0.0545568913 0.0373688899 -0.0515505187 -0.0396510959 -0.0605122671 -0.085121952 0.0596127883 0.105902717 0.0220958665 -0.0255203731 -0.0148762362 -0.0768131837 -0.0578792021 -0.0949795991 0.0773940459 0.0814553499 0.137127966 -0.116018936 0.0563674271 0.0888326541 0.0284422096 -0.110917278 0.0353827216 0.0380769633 0.12019825 -0.00778515963 0.0705309212 0.00951496419 0.0804332197 0.0100569949 -0.0600129589 0.0635915622 0.0929165035 0.0890567824 -0.0334398523 0.0183780789 0.0171072353 -0.0848544464 0.0573717169 -0.0625854135 0.00517629972 -0.0316587314 -0.00222206302 -0.139464319 0.00396719109 0.119108282 -0.0407875292 -0.0145511776 0.0634373575 0.0286066066 0.0339107104 0.0838994458 0.156156093 0.0932729319 -0.067164138 -0.0970614329 -0.000510855229 0.108286127 0.151279747 -0.153703973 0.035030935 0.0742894635 -0.0494455397 -0.0341568068 -0.0577272587 -0.0769041032 -0.0278417245 -0.0176225342 0.0874658376 -0.0257863011 -0.0709038004 0.0103997458 0.105501436 -0.132797644 0.0622315481 -0.0974398479 -0.0254051387 0.0495131202 -0.111515976 0.103808023 -0.101846233 -0.0294793397 -0.131958127 -0.0533692092 0.109044902 -0.0826396644 -0.0222143289 -0.0188204758 -0.0809235647 -0.0443305187 0.0731882006 -0.0607132837 0.108671054 -0.099729836 0.0198555607 -0.14591822 0.0282850396 -0.0493627414 -0.0674319044 -0.132230341 -0.0341024846 -0.0808820575 0.0899107382 -0.0263775121 -0.103454776 0.0222094338 0.00426623598 -0.084072873 -0.0383605286 -0.0547198616 0.0559252352 0.0470217839 0.0677336454 0.0497331806 0.0631156936 -0.00398747297 0.112397343 -0.166542813 0.137340739 0.0250983089 0.0890974551 0.119683884 -0.0544718653 0.0518688969 -0.0565655194 0.0104118874 0.0981469452 -0.0510016531 0.00272554019 0.0927296504 -0.0194716733 0.110565722 0.0546717308 0.0536676086 0.064474754 0.0118900863 -0.11696136 0.0142176412 0.0417189002 -0.0839173347 -0.0281918701 -0.0403215103 0.02901816 -0.0981693715 -0.0701962784 -0.00782805495 -0.0462877564 -0.0264666826 0.0648322478 0.0252208374 -0.0529760942 0.0255852453 -0.0645134747 -0.0154027175 -0.0532422848 0.0832066536 0.0396291837 -0.0148703549 -0.00148318615 -0.0615470037 0.0724665746 -0.0584392287 -0.026484957 -0.0585906915 0.0238307714 -0.0229600184 -0.0852913335 0.038450405 0.103452124 -0.033543352 -0.101024874 -0.0779693052 0.0964624882 -0.0119416546 -0.0178287029 -0.0222403612 0.0204786509 0.0163842663 -0.045447167 -0.0765725672 0.0971594155 0.041313909 0.0470899418 0.00305234478 -0.119385377 0.0464745872 0.0310937278 -0.131466374 0.0710817575 0.0100257266 0.068332687 0.0689313188 0.0125061376 -0.0347266309 0.0975375995 -0.00572358584 -0.0388996676 0.0531310216 0.137647584 0.0694899485 0.00679214392 -0.00121487537 -0.0537946038 0.0458747223 0.0680655316 -0.0407916345 0.0546781644 0.0273533594 -0.12560609 -0.0675602481 0.0385174453 -0.0973455235 -0.00201383908 0.0485716909 -0.0049632024 0.00582174305 0.0641960278 -0.0848527774 -0.0219417512 -0.15150407 -0.0783323124 -0.00965964142 -0.0608184524 -0.0130811296 0.0480553694 -0.0386784896 0.0999142677 0.0546924099 -0.1222317 0.0141203087 -0.0226628929 -0.0910275429 0.0631851926 -0.0745201185 0.0876418352 0.00886597019 -0.047717195 -0.128835618 0.0425309427 -0.0195610169 0.0638543293 -0.129029542 -0.0131098628 0.0650595948 0.0454248711 0.0853424594 0.0140957199 -0.0202541035 0.0317879245 -0.143378869 -0.00423101289 -0.131989092 0.0190712065 0.0510258228 -0.0865119174 0.0910336599 0.0548940673 -0.105332822 -0.0292321127 0.00560635095 0.0586736389 0.00505677052 0.110722467 -0.107276112 0.110618874 -0.0691331774 0.118226469 -0.0460344777 0.0930353999 0.0951236412 -0.0392442942 -0.112528846 -0.0722931251 -0.0136691937 -0.0806170553 -0.0411446244 -0.041987583 -0.00134162803 0.0213002544 -0.0701876432 -0.0885930806 0.0525745451 0.0624400154 0.0732306167 -0.0200780723 0.0234855395 0.00717696035 -0.147869304 0.0787881762 -0.0716938302 0.0857125446 -0.0352597944 -0.0805057809 0.0154540623 -0.00925941207 0.0756853744 0.0621306412 0.00842974614 -0.0371917672 0.108534172 -0.035098426 0.0504793301 -0.020088356 -0.0706505328 0.0162423179 -0.0752581954 -0.134370595 -0.015700357 -0.0898832977 0.0254374146 -0.0247301795 0.130348459 -0.035687875 -0.0680520609 -0.0444837064 -0.0601570755 -0.0513698831 0.111533344 -0.113860339 -0.0939767584 -0.0477326587 0.0567156076 0.0808228627 -0.0376587808 -0.114645995 0.0915196016 -0.019314684 0.0117936404 0.0774793029 0.0794394761 0.0432011634 0.0209889244 0.0314350612 0.0340264812 -0.104788385 -0.00981875602 0.0270214248 -0.0868451148 -0.0589688234 -0.0642679632 0.063923806 0.0117191905 0.0869612917 -0.0395875722 -0.0766618028 0.0934183374 -0.0996760055 0.0946377516 0.0345660634 -0.0784306899 0.07895834 -0.0232259352 0.0685211644 -0.0316710509 -0.0698057264 -0.011367701 -0.0852755904 0.0756148174 0.112687804 -0.0221658461 -0.0638062581 -0.0995947495 -0.148020685 -0.130782247 -0.0682474449 -0.0904296935 -0.0312870853 0.00539993821 0.0133634834 -0.0529328249 0.11267703 -0.0565492623 0.00755324587 -0.13352786 0.0963837281 -0.0480984338 -0.0886128098 0.0665832683 -0.109329402 -0.0235391576 -0.057248909 0.0689797029 -0.06789276 0.055123087 -0.0196565576 -0.0988758132 -0.0760087073 -0.0214166064 -0.0119032441 0.0697430596 0.00237821974 -0.0490270369 0.117983244 0.0743466169 -0.0153463781 0.101554446 0.103069983 0.0187342204 0.06316486 -0.117018297 0.0541630685 -0.100673176 0.0376137197 -0.0421548113 0.0645036548 -0.0168326087 0.0727319941 -0.0281656329 -0.117367707 0.116008684 0.0423757844 0.0988826826 0.0217613652 -0.0824515149 -0.0351628885 0.0812880173 0.147239089 0.0299862716 0.0364563912 0.145076081 0.105126604 -0.0210315958 0.0879319981 -0.0583813041 0.0593444556 0.106604464 -0.0204098541 0.0887133107 0.0528428815 -0.0444989093 -0.157084122 0.054473713 0.150896385 0.028199533 -0.0832022205 0.0864810869 -0.0489188135 -0.00317808706 -0.138337359 -0.0614818409 -0.0958974063 0.16130729 0.0542741828 -0.0278767291 -0.0963605344 0.132825524 -0.0308959335 0.0545662679 -0.0319377147 -0.0980552807 0.0630095378 -0.0429899767 -0.0260642897 -0.0517068692 -0.0561511219 0.11457511 -0.060042996 -0.0527282394 0.0207744949 0.023834521 0.0296360757 0.0525115617 -0.114589319 -0.147910029 -0.0456607491 0.0715667382 0.0993826538 -0.0107942242 0.117987439 -0.0284947716 -0.0709881261 -0.0903323144 -0.0224822853 -0.157054216 -0.0552059412 -0.0338664018 0.0750938728 -0.065085113 0.0211203843 0.118827477 -0.0167396851 -0.0932219103 0.0603475198 -0.0151796769 0.00819401443 -0.103917979 -0.0764359087 0.0163631905 0.0167195648 0.10436935 0.031059213 -0.010204182 0.0322529972 -0.0338583738 0.0547566526 0.0993093476 -0.0449988134 0.0820005462 0.0658240914 -0.0828819126 -0.0934411883 -0.00780287059 0.0153802652 -0.0455549546 -0.021676302 -0.00396145368 -0.121144004 0.0972423553 -0.0255077239 -0.0110973027 0.120320976 -0.0416977331 -0.0117513239 -0.105017632 0.0385619588 -0.0398330316 0.0233246256 -0.0271476638 -0.0675703511 0.0597647466 0.0618401542 -0.0964857414 0.060786169 0.00302257249 0.0607231446 0.0319902562 0.0811921582 -0.01984399 -0.00189587893 -0.139521733 0.0102705099 0.0954400972 0.0966984481 0.0283194389 -0.045280274 0.0892768875 -0.052237168 0.0279194918 0.0737474114 -0.000261810783 0.112771511 -0.00738663413 -0.0538329072 0.0805022269 0.0928170681 0.0922827125 -0.12284258 -0.0546792485 0.0152943293 -0.0549117215 -0.0439201444 0.148618817 -0.0444157384 -0.014640267 -0.0463561974 -0.141159236 -0.0240516476 -0.0309760477 0.103102759 -0.0665320605 0.0416155756 0.0295819342 -0.072761029 0.106963806 -0.0282385554 -0.0605240837 0.0600927584 -0.0168974958 0.0207078587 -0.0489886738 0.0462408178 0.0563473664 0.0989545807 0.0286699794 0.00622357149 -0.0955138803 -0.089370057 0.0498380885 0.111254118 0.0486598499 0.0335665718 0.00651514437 -0.137515217 -0.00872137025 -0.0768070891 -0.103112787 -0.0138399014 0.0693487599 -0.000350349292 -0.132112339 -0.054441724 0.0118631627 0.0867957175 0.0417998731 -0.125937298 0.0554638319 0.0775117502 0.0927276611 0.0790062174 -0.107764006 -0.00206389814 -0.0824461728 0.0240072217 0.0872223303 0.0261813533 0.0419610031 -0.110041335 0.0405180678 -0.0859660432 0.0439100154 0.0232283361 -0.0124879908 0.0721851513 -0.078385748 -0.0744791776 0.0467836894 0.0291622393 -0.00720773824 0.0299567468 -0.0575182885 0.0597681552 0.0921448171 -0.0661888644 -0.00482166698 0.0675290599 0.0166998263 -0.0955132842 0.085087046 0.102876 0.0426338613 0.0119831273 0.0855897442 0.0225691516 0.0364397429 0.0160135124 0.00990157295 0.0420151539 0.0316322856 -0.0585764237 0.0824364498 -0.0413012579 0.135568514 0.034757603 -0.0277830604 -0.034982793 0.0369454138 0.0193050615 0.0799474046 -0.0846258327 0.0366695002 0.0598423779 0.0799307451 0.136157006 0.128120825 -0.0201004725 0.0452948473 -0.06387043 -0.0197872147 -0.0240808073 0.0412721485 -0.0553675219 -0.106740355 -0.035156589 -0.0776401386 -0.104280807 0.0636275262 0.143016845 0.0158824641 0.0104194768 -0.0614338666 0.0736046582 0.0777402669 -0.0251369067 0.08835724 0.0629755557 0.100526057 0.130498186 -0.0605841354 -0.107173443 0.0618615188 -0.0027110891 0.0448608994 0.0324240513 0.13815707 0.0885208547 -0.0184885561 0.0110004703 0.0750818923 -0.123291738 -0.0332586765 -0.00227115862 0.0191539656 -0.00745699275 0.0960062817 -0.00826996565 -0.11104311 0.00985418726 -0.00825903285 0.0663968921 -0.022775976 0.120236516 -0.0306508504 -0.126447007 0.0266145803 0.0663856491 0.0159932058 -0.00567367487 0.0217537843 -0.129739061 0.0966290981 -0.120227985 -0.0869013295 -0.0486435518 0.145977855 0.0870844871 0.08520028 -0.0570347048 0.0191150215 0.0577304959 -0.0748146251 0.138433784 -0.00482775643 0.0784191266 -0.00595876481 -0.089486897 -0.0807763785 0.0747055635 0.0123625547 -0.0272405632 0.0675494596 0.0216415282 0.0251738597 -0.055193793 0.00323623535 0.104679525 0.00744761759 -0.0563779734 -0.00321181351 0.0755025595 0.0668580309 0.0714727044 0.0588193573 -0.0533336736 -0.027081253 0.0995806679 -0.00321418745 -0.0934964344 -0.0121698389 -0.0306112021 0.049315121 -0.0717256963 0.0284800846 -0.0465604663 0.0592573173 0.0975120962 -0.0522723123 0.0236058217 0.03004965 0.0192594938 -0.0153996143 0.0517514087 0.0202556662 -0.036583852 0.105843432 0.0923823789 -0.108679689 0.104115218 0.0757252499 -0.0786331147 -0.108693816 -0.0475629792 0.0984940901 -0.014999046 -0.0789110363 -0.0634896383 -0.124007449 0.0513020195 0.0337021165 0.00936586969 0.0974761024 -0.0039249598 -0.0539403148 0.0093635805 0.064329423 0.00841173995 -0.0369432382 -0.0830086768 0.0733837709 -0.0366025865 -0.0928544104 -0.0144924261 -0.0935278535 -0.0608592965 -0.102837108 -0.105442159 -0.0516982377 0.0344825126 -0.0649234951 0.0406593382 0.0177465007 -0.129067734 0.0989128351 -0.00798356999 0.0440664552 0.0535025857 0.0184675008 0.0339980274 0.0178082474 0.0744322464 -0.0364990495 -0.00785736833 -0.0367429368 0.110767066 0.043253459 0.0398505144 0.0362781025 -0.0534571707 -0.0138610825 0.0870700777 0.0741645619 0.0842578635 0.142304018 -0.0709979832 -0.0997136533 -0.0118433814 -0.0195060018 -0.0260943621 -0.0851690397 -0.0610457137 0.0346727297 0.038758263 -0.026763279 0.00496497378 -0.0248329956 -0.0392976888 0.0639327541 -0.0116053829 0.0389414802 -0.0765374303 -0.00860751234 0.0580505244 0.058850836 0.0729101896 -0.0168091431 0.0493836068 0.0378085151 -0.0690903589 -0.0735144988 0.0763928369 -0.0370460264 0.0515179113 -0.0130856326 0.0213577785 -0.0343368538 -0.0334647931 -0.0814459473 0.0176412053 0.0479053147 0.00514184404 0.0467168912 -0.114557423 0.0326536633 -0.122538239 0.0797366053 -0.0220797621 -0.0939437151 -0.0356741399 -0.129558548 -0.0728810504 0.0334232114 0.0454140641 0.0641237572 0.0639395788 0.0806245655 -0.0568198524 0.113629669 -0.0269122235 0.0319497921 0.0329489671 -0.0914393291 -0.0630809143 0.0387656465 -0.0879159197 -0.0275259484 0.0394459814 -0.0404246971 0.0497982427 0.0524061657 0.00315544894 0.0564953573 0.126382247 -0.0345166884 0.0698444024 -0.0992106721 -0.0982451588 -0.107451998 -0.0527341142 0.123538248 0.024919359 -0.0131305484 -0.0894226953 -0.115056708 -0.0378506444 0.0621916279 -0.122512206 -0.0171847306 -0.0232702196 -0.103842773 -0.100498989 0.0790945068 0.0964731276 -0.0845638365 -0.0501652695 0.0643050224 -0.0128263319 0.0967387334 0.0712623745 -0.0615100749 0.0906366855 -0.0492125414 0.0326652825 0.126826495 -0.0253421534 -0.0376052111 -0.0545951948 0.0150278658 -0.120675765 0.0287395269 0.0592594892 0.0814532936 0.102257699 -0.00316688977 0.0232737195 -0.0206201728 -0.106220126 0.0464446917 0.0267721917 -0.0964898616 0.000449200714 0.058107879 -0.0119522484 0.119053274 -0.0157440305 -0.0229303446 -0.0538970679 0.0736326724 0.0061632446 0.084165886 0.0724296197 -0.144264609 0.0839208364 0.0139587959 -0.0285230391 0.0120399361 -0.11818894 -0.104979657 0.0247651879 0.0166639592 0.00610556966 -0.0571998879 -0.127792791 -0.0762260929 -0.0623565726 -0.0835529417 -0.0365828983 0.0787034184 0.0618906133 -0.0250335261 -0.0716648474 0.0972160697 0.0754901916 -0.0548195727 -0.0320330486 -0.0405605473 -0.0662167147 -0.0892478526 -0.0222243164 -0.0625544339 -0.0201768409 0.118638895 -0.0409025624 0.0845576376 -0.107005633 0.0590842962 0.034769319 0.00361982756 -0.0245255139 0.0547625758 -0.115161017 -0.0553721972 0.0778890774 -0.041641593 -0.0323726982 0.0310737193 -0.0700510889 0.015053235 0.160222828 0.10772761 0.037959829 -0.0358525217 -0.111562051 0.0241074972 -0.12357261 -0.12483231 -0.0926482156 -0.0271823723 -0.113680638 0.108206771 -0.116128989 0.00913964305 -0.0308017898 0.0424275286 0.122627713 -0.0544019043 -0.132333323 -0.046340026 0.139717042 -0.0182990991 0.068831861 0.0823961869 -0.043938648 -0.038030766 -0.0475954749 -0.0332984775 -0.058541622 -0.0173970181 0.00698842388 -0.00505919755 -0.0264520328 -0.075296253 -0.0592983365 0.0703864917 0.0332557037 -0.083748579 0.0185417943 0.0330124721 0.081373781 -0.0521305203 -0.0828755274 -0.0191443413 -0.0275393687 -0.104940005 -0.0373899266 -0.129006848 -0.0350307822 -0.0594692267 -0.086783044 0.0255106967 -0.112269998 -0.0306032244 0.0352828279 -0.0759110004 0.0436218269 0.0371037386 -0.0105048856 -0.0273989253 0.0408369228 -0.0481947623 -0.0490162857 0.0253327843 -0.0900856555 -0.10654892 0.0296242032 -0.0569313392 0.00557792746 0.0511694998 0.0238375813 -0.0494668148 0.00665589096 -0.0544650368 -0.0221331436 0.122494169 -0.00346396537 -0.03750135 -0.0583185181 -0.0697296709 -0.00882709585 -0.0139009692 -0.0581789836 0.0511611365 -0.0665547177 0.0625669286 0.0951525047 0.0109070055 -0.0445890985 0.0510924347 0.0549049266 0.076408051 0.0454293936 0.00382474251 -0.0298265554 0.00967820082 -0.0501072742 0.0321529955 0.0609736592 0.0241174586 -0.0162564274 -0.0707081556 0.144424051 -0.0120626008 0.0290390942 -0.0303947851 -0.0576401316 -0.0760076717 0.0552736968 0.0407108702 -0.0295744073 -0.097608991 0.0594237335 -0.0941474885 0.0194068011 -0.0656058192 -0.0635675117 0.0444326811 0.0656519532 -0.0508928746 -0.0729632974 0.0443925709 0.0649101809 -0.0789977983 0.0154495686 0.0545210727 0.0447072089 0.126279771 -0.0358525813 0.0973265097 0.0260252561 0.0660820976 0.103842169 0.0981507078 0.0491054058 -0.0901521593 0.00591290556 0.0812497959 0.00227644946 -0.0607588552 0.0970650539 -0.0110606086 0.0776812136 -0.0386007279 0.119623892 0.0970067903 0.0679384917 0.05710252 0.0563185252 0.120259158 -0.0155343693 0.11131572 0.0304788649 0.0158111248 -0.0598068163 0.0854219869 0.0570583344 -0.0570600703 0.0287855826 -0.0342741273 0.036824815 -0.0501024202 -0.0268743541 -0.0634012967 -0.0412885621 0.128790557 0.00406311126 0.042762816 0.00955149718 -0.0193585306 0.0519001707 -0.039887663 -0.0505587868 0.0825586244 0.159575224 -0.039045386 0.0544076897 -0.0779607669 0.0380125828 0.0408898331 -0.0760996863 0.0313064456 -0.0805607736 -0.0574796721 -0.0846826658 0.113500386 -0.0871631727 -0.117222093 0.0810274109 0.051653102 -0.0653802082 -0.00290928991 -0.0630526915 -0.119209491 0.0347142629 0.0174591336 -0.0608103834 -0.0927200988 -0.0334013142 -0.0835639536 0.11650601 0.0233004745 0.0682244599 -0.0430421382 0.028754117 -0.0900809765 0.012021089 0.0624547713 -0.105882496 0.0918491483 -0.0683888867 -0.0233582761 -0.0216962695 -0.0297207572 -0.0362710096 0.0270867273 0.0460449308 0.0642470419 0.0419084579 0.096854955 0.0901691094 0.0210975073 0.0876087993 -0.0793926194 -0.0800184235 0.0771968812 0.00599401817 0.0235112216 -0.124454454 -0.0479293279 0.0157109667 0.0773659572 0.00740274787 0.0359256268 0.0233147927 0.0827361718 0.0162823889 0.0475280918 0.0248643626 -0.0826426297 0.0601135641 0.119723722 -0.0729700252 -0.115519248 0.0721688643 0.105871052 -0.0132921757 0.08133322 -0.0257968605 -0.0480753519 0.0262065995 0.0138876187 0.0765962973 -0.0963439941 0.0358153284 -0.0679376945 -0.0568544529 -0.0514913723 -0.0568194948 -0.0500201248 0.040343143 -0.0777348354 0.0369281098 0.0772028044 0.00108942436 -0.00182866259 -0.0662001669 0.0198386908 0.045538079 0.0673875585 0.0710163489 0.0381334536 -0.0855338573 0.0286879074 0.151627332 -0.0448625647 0.0633926764 0.0938242897 0.0526457354 0.0111530349 0.097349681 0.0770600736 0.0608735308 0.0969019234 -0.0362519659 0.00380012137 -0.0779155269 0.00992168486 0.0114733698 -0.000200923663 -0.0576153658 0.083698988 -0.147824839 0.0438186601 0.0710815191 0.028712105 0.04132507 -0.0103248488 -0.000444060774 -0.0523407757 -0.0500133485 -0.0338968448 0.0263319649 -0.04361872 -0.0277711656 -0.0168558471 0.0467232168 0.0177872274 0.0654023588 0.0379033573 -0.0607685857 0.00964797754 -0.120628364 0.0780472904 -0.0843662843 0.030455105 0.118488185 0.0485005565 0.0704616383 0.100928433 0.0408400409 -0.0665229484 -0.00970364176 -0.0212464705 0.0161203556 0.147231802 -0.0107072778 -0.0776233077 -0.0784361213 -0.0594038591 0.0418931209 -0.000451631407 0.112979718 -0.0695450008 -0.0122077055 0.00301642111 0.0691217184 0.0310368631 0.0258781705 -0.0280865338 0.0467126593 0.00575158047 0.00231607817 -0.0486097038 -0.119848073 0.0345518552 -0.043744415 0.0102821859 -0.0540690646 0.138281018 -0.00140106888 -0.0396243855 -0.0496274233 -0.0120704891 -0.0222738367 -0.0162870642 0.014834349 0.00513116037 -0.0692124441 -0.0776496753 -0.120830476 0.0161017329 -0.0370132066 -0.0627025366 -0.108544096 -0.0967517719 -0.0203821119 0.0064773499 -0.13926363 -0.0478015207 -0.00240246905 0.0692687705 -0.0158551876 -0.0689087138 0.104410864 0.0010006387 0.0992425233 0.0829349607 0.0481005087 -0.0365891643 -0.150370061 0.0162225049 -0.0743764937 0.0161576103 -0.0280919597 0.0834880769 0.0755537376 -0.0628848672 0.0377435535 0.0462861024 0.0946150273 0.037716087 0.0241654012 -0.0828214586 0.0492958799 -0.0326795466 -0.0770705715 -0.140277237 -0.0159451012 -0.109213002 0.0240502506 0.0428965315 0.0193452798 0.0742191896 0.0848924294 -0.0308462307 0.0752010345 -0.0391890407 -0.00412439182 0.0942446142 0.00469598826 -0.0910378024 0.000205826393 -0.0885702595 -0.10040123 -0.0408986025 -0.0709256157 0.0822851807 -0.0281461775 -0.0398267582 0.0393168256 -0.112278141 -0.0479413383 -0.0573235005 -0.0405642055 0.122419089 0.0270299502 0.00841662008 -0.0121508595 -0.0198617335 -0.000879280851 0.00153166568 0.0554407202 0.00480276532 -0.0682227761 -0.0328659527 -0.0611312203 -0.142565057 -0.00181693793 0.0861305967 0.0628089532 -0.0740653574 -0.0160195958 0.0974677652 -0.101770975 0.0444948077 0.0841920972 -0.0720307156 -0.00300135929 -0.069788307 -0.0935182795 -0.0931349918 -0.10534066 0.00375205046 0.0325018056 0.0886859596 0.0366864353 -0.0217052139 0.0339041911 -0.0782768726 0.00892924238 -0.0385166742 0.0129920086 0.0826299712 -0.044218149 -0.0138300406 0.00623118551 0.0313761942 0.126124471 0.0703845546 0.0405096114 0.0779507384 0.056386482 0.0744233653 0.0530518629 0.0425919332 -0.0413660035 0.0031353673 0.0289649554 0.0164382402 -0.011526701 0.0496848971 0.119253859 -0.111141384 -0.036666058 -0.0288353041 -0.00853391178 -0.0851149112 -0.0335021652 -0.0633720756 -0.0867637545 0.0132676009 0.0459979139 0.0367925242 -0.0876119509 -0.0333126523 -0.0919199139 0.0108301183 0.117218599 0.107971512 0.0295869391 0.0267735161 -0.0267074816 0.0445350818 0.111544669 0.0392179638 0.0937596411 -0.00281381886 0.0201951191 -0.0501904786 0.0335953049 -0.0029760797 -0.0398072712 -0.0350824408 0.0377969109 -0.0179890636 0.0425337292 0.0354793109 0.0247561317 -0.0294101313 -0.0284981932 0.028481964 -0.0429043695 -0.053075958 -0.126678079 -0.085693188 0.0219913088 -0.103943169 -0.0727316067 -0.0124961985 -0.126258418 -0.0539888591 0.0462416373 0.0975957662 -0.00795640703 -0.021866478 -0.0801899433 0.0211574696 -0.00211753859 -0.0770760551 0.106369033 0.0529551283 0.00306034461 -0.153457433 0.0278290957 -0.0596790686 -0.04930925 0.072257936 -0.067070365 -0.0302931052 -0.115538754 0.138532385 0.0228340477 0.154491559 -0.0212814286 0.000630576746 -0.0118962303 0.0624279119 0.0366596803 -0.104068108 -0.0649591386 0.0323766321 0.0730391443 0.0661889538 -0.0391814969 0.0793058872 -0.0105679389 -0.0682836398 -0.0303012673 0.122320741 -0.00334193907 -0.107540131 0.0638230518 -0.119354151 0.108916059 0.0184885114 -0.0397466794 -0.074820742 0.0587450974 0.111003347 -0.0857884958 0.0496451035 -0.175095469 -0.0324501954 0.0114386939 -0.083123289 -0.0846996307 -0.0342796296 -0.0571025424 0.126033574 0.0537063144 0.0963928178 -0.0292254034 0.0303790402 0.0882642195 0.0210448559 0.0844297558 0.0784011334 -0.067198731 0.0598029867 -0.115000091 -0.0233332999 0.11517673 0.0587579273 0.0726984292 -0.0598884225 -0.0596332885 0.10184852 -0.00164783257 0.0644008815 -0.0344729498 0.00754436292 -0.133005932 0.0759031922 0.00654394785 -0.00963001978 -0.172101706 0.0105558978 0.176982358 -0.0497983247 -0.0037973139 -0.0655243993 0.0364305004 0.0520126633 0.016448427 -0.00652270019 0.068141371 0.0190387368 -0.0741908997 -0.0276863649 -0.0545798913 0.0041190316 0.0235041492 0.0122208726 -0.0720304623 0.0136137297 -0.0269483216 +tensor_10bias 50 +0.12787357 0.017543152 0.122975975 0.0730041191 0.0510178655 -0.00993559696 0.139933825 0.15092434 0.0684130192 -0.0333705768 -0.184260622 -0.13440612 0.109378524 0.111376524 -0.10483826 -0.0250708181 0.120549299 0.0411001481 0.183845177 0.135748357 -0.00771392835 -0.12025056 0.085442692 -0.0513125733 0.136845529 -0.0145230526 -0.0895486251 -0.0252410602 -0.00896273553 0.0933182612 -0.108676046 -0.104239464 0.170086652 -0.0341263078 0.0728005916 -0.0453254506 -0.100045033 -0.110129185 -0.00771265198 -0.119152002 0.1214706 0.101130307 0.0332861841 0.0142126186 -0.010599345 0.109234303 -0.0182705577 0.177162722 0.0691059828 -0.0739419758 +tensor_2bias 50 +-0.0447338857 0.0537877791 0.0785957575 -0.0634338111 0.153481558 0.148676842 0.0265698414 -0.0261984505 -0.0751923025 -0.0352455713 0.0932889804 0.113871664 -0.0193461645 0.175267622 -0.0770687833 0.157511786 0.0196232703 -0.0737266392 0.0872744098 0.116388358 0.168398216 0.0425802097 -0.102230035 0.0693789497 -0.0855393335 0.126388997 0.0205914602 0.140580684 -0.00234525092 -0.0295791756 0.0197821874 0.0661892593 0.166472748 0.149337456 0.0513125136 0.00068877294 -0.0757507607 -0.0540507101 0.134943455 0.0256511811 -0.0943378955 -0.0261238459 0.0309584048 0.111188456 0.169084176 0.136096522 0.0985386074 0.0480017625 -0.0471420884 0.122215845 +tensor_6weight 2500 +0.0649253875 0.129901871 -0.0820776671 -0.0164463595 -0.0272229239 0.0591965616 -0.118314907 -0.037768431 0.0372078121 -0.105141595 -0.140254259 0.0649844706 -0.112917937 0.141195908 0.140458569 0.0553426445 0.0367731303 -0.0505450144 0.0507215112 0.114758804 0.115806922 -0.0424669459 0.0370975286 -0.14095898 -0.104349688 -0.007835567 -0.0608764365 -0.0330444127 -0.12756449 -0.104601666 -0.0191679522 0.00627362728 -0.0662557259 0.0937368721 -0.101459384 -0.0692796931 -0.0512177646 -0.126805127 0.0393478721 0.0119376034 -0.0574386194 0.100259379 -0.10315454 0.109866068 -0.02667135 0.130284503 -0.127174616 -0.0201597661 0.0414076746 -0.122587755 0.126039341 -0.115497321 -0.126209974 -0.00932627916 0.0310982913 0.0501976013 -0.0105512738 -0.117707536 -0.116891071 0.117860749 0.0559653193 0.0531298667 -0.0543823317 0.106951609 0.0151336193 -0.0444077402 -0.112000868 0.0114103854 0.0838644654 -0.012747705 -0.0791340023 -0.0889710411 -0.0655299723 -0.0225159228 -0.00320497155 -0.0662335902 -0.0993035883 0.137778953 0.105412766 -0.116872713 0.0578503758 0.0725949556 0.0382958353 -0.0512723327 -0.00722907484 0.0786679238 -0.116880074 -0.0138037503 -0.0500161424 -0.133497417 0.0958063304 0.0558829457 0.0326671302 -0.0238390192 0.0845869035 -0.0934950113 -0.0433793738 0.0942181498 -0.045510605 0.0947668105 -0.106258683 0.0446187519 -0.0900780708 -0.0834366232 0.191142887 -0.100739747 0.171907842 -0.0254000407 0.138836846 -0.0700232163 0.114825904 -0.143776864 -0.0321323685 0.0355321914 -0.178224027 0.119957708 -0.0752720386 0.127894193 0.164032444 0.065395847 -0.063121289 -0.0970638469 0.102740057 0.0505844206 0.0253012329 0.0821145922 0.180317059 0.136325151 -0.103746325 0.126737922 -0.0877246112 -0.0697940513 0.0607301034 0.0686804578 -0.0175086763 0.0285665393 0.147603065 -0.159169093 -0.058806546 0.101134196 -0.0185775906 -0.113093227 0.0278050229 -0.0363715962 0.123531096 0.105049185 0.0325903893 0.101475507 0.175050184 0.0439927392 -0.0129783954 -0.103368133 -0.094232142 -0.133218303 -0.10637027 -0.126878336 0.100644603 -0.0823836327 -0.0993345156 -0.0921484306 -0.00233977009 0.0756816864 -0.0497992188 0.044235874 -0.100462228 0.0119998753 -0.0844490379 -0.0331858918 -0.0446389243 0.042482052 -0.126429394 -0.105036467 0.0468023382 -0.0696351752 0.0628612116 0.0562251285 -0.0864542499 -0.0504873767 -0.057342425 0.107809477 0.103574097 0.0706402957 0.0782148615 -0.112125456 0.0768203884 0.0012682596 -0.124097727 0.114557318 -0.0111420928 0.0438492894 -0.0157870948 -0.129962921 0.115011618 0.0792783797 0.0613046065 -0.000343024731 -0.0795636103 -0.0708794519 -0.0101428293 0.0629758537 -0.0162976906 0.111654803 -0.134260848 0.00456416048 -0.129808471 -0.0437678993 -0.0731499866 -0.156290948 0.176469311 -0.134536281 -0.0936101675 0.094726339 0.129458129 -0.00281856535 -0.0142846275 0.00348282605 0.129408911 0.125073373 0.153636366 -0.0143775577 -0.013238579 -0.0172810107 0.0421338268 0.116808861 0.0514435619 0.13204819 0.0942413136 -0.012623366 -0.0874075145 -0.0010379689 -0.162753403 -0.0148045626 -0.0110199554 -0.0829107389 -0.0709493682 0.162264898 -0.0466960482 0.115680397 -0.0569904298 0.0977253392 -0.0407817513 0.163954467 -0.0335706919 0.145685494 0.122499764 -0.0530293435 0.160302415 0.00654218439 0.0903446525 -0.0116685461 0.0239315517 -0.0313074701 -0.102479123 0.0804489553 0.0174044427 0.0801673904 -0.0707507953 -0.0458744019 0.0368017294 -0.158817649 0.0533084273 0.0464035608 -0.0136327893 -0.026964413 -0.0722962692 -0.0277424678 0.193694353 -0.00919557363 -0.0336900316 -0.00418696925 -0.0529568717 -0.00187929883 -0.00698451232 -0.0436371192 0.0323710404 -0.019839149 -0.0511180982 -0.110972911 -0.0133787924 -0.00690555479 0.104938939 -0.038326323 0.0560517721 0.138403684 0.143514618 0.199766785 0.14532347 0.0941502005 0.0855569765 0.0256890338 0.0689958632 -0.0572427884 -0.00418164022 0.0580582805 0.150297597 -0.122072354 0.176015973 -0.120600596 0.119270205 0.106842689 0.108840823 -0.0772350207 0.128743961 -0.0015650976 0.0175431371 0.053713128 -0.117410287 0.0328807086 0.0287136007 -0.104569376 0.0721085593 0.0677165911 -0.0558042675 -0.0673747733 0.115988277 -0.122426286 0.0186466724 0.101494573 -0.029576974 -0.115950264 -0.0865741 0.0563799553 0.107808612 -0.0450687222 0.0710128173 -0.0514423363 0.0430348404 -0.0574421734 0.0800841525 0.0757694393 0.10702318 -0.0222116411 -0.0559151433 0.0379136428 -0.0136397472 -0.125272736 -0.12881507 0.0900285095 0.0889691934 0.121225074 0.0771746784 -0.0660418868 -0.044440113 -0.122758932 -0.109487474 -0.0582289658 -0.104467168 -0.00918032415 -0.0209672842 -0.0869374499 0.168161795 0.108111799 -0.0880761966 -0.0135405827 0.178589284 0.003923479 0.0852129236 0.161241695 0.00760242762 0.06472487 0.0908324644 -0.109867044 0.13713856 -0.0345446207 -0.144439176 0.0468028821 0.136207759 0.122578613 0.0340208001 -0.105203725 0.0250524748 0.106788099 -0.121437281 0.181704462 0.11812605 0.0816245601 -0.101409554 0.158797711 -0.0405994244 -0.106579058 0.0417435579 -0.0245459247 -0.00784720015 0.0369141363 -0.070102796 -0.0140520735 0.180028707 -0.0340496227 0.0966045856 -0.0815079585 -0.0375775248 -0.173401833 -0.0957172289 -0.189357907 0.0151246237 0.0324664675 -0.0768369883 -0.106799647 -0.0305638388 0.0201060958 -0.053941071 -0.0226951279 -0.0301792286 -0.0753694102 0.106556229 0.00628629327 0.0264616497 0.115733989 0.0310344063 -0.0524785519 0.0871863812 0.1207719 -0.0298178941 0.152269572 -0.13239485 -0.0819777101 0.0469505712 -0.0912657976 -0.111869723 -0.0653776079 0.10464593 -0.0256920718 0.12280155 -0.143135741 -0.00778760947 0.03016074 0.0972794741 -0.0641395524 -0.0162782986 0.0504767261 0.0761293843 -0.0471233875 -0.0866800919 0.0426621437 0.0164198168 0.111198299 -0.150085554 0.0248084236 -0.0389914848 -0.0365719572 -0.138500616 -0.0784377009 -0.107607454 0.0207631979 0.0907824636 -0.0914271027 0.0534422696 -0.112685621 0.0665683895 -0.0469377451 0.0247338824 -0.0177221745 0.118170217 0.113025144 0.0234410614 0.104385503 0.0654341355 -0.10872592 0.128927425 0.196715385 0.0276464783 -0.0738130882 -0.081564039 -0.00269559864 -0.126407489 0.015476441 -0.045586586 0.0332736522 -0.0798867643 0.135938272 -0.162508756 0.0983785167 -0.0764289424 -0.0560759567 0.0814144537 -0.031941954 -0.121607453 -0.0935366377 -0.0972638801 -0.0318852663 0.134761959 0.00468478957 0.0771510676 0.0787510574 0.164012611 -0.0312081948 -0.0129511952 0.0929201245 0.128727853 -0.00758869387 0.0151306689 0.0861001238 0.106875338 0.0643666014 0.153492779 0.0107787019 0.0601070002 -0.0477736481 -0.131303728 0.00165647722 -0.159763634 0.0611100607 -0.0269413907 0.0301383473 0.118319333 -0.114341162 -0.143750668 -0.106911905 -0.0885151848 0.160572648 -0.0470729731 0.0245884079 -0.0456172712 0.0757794902 0.0562509894 0.0297678653 0.0527246483 0.0166134071 -0.108542152 0.142919838 0.127158552 -0.0228688288 0.00676658237 -0.03869633 -0.0931294337 -0.00328914542 -0.0614178069 -0.0198070854 0.145518914 -0.0294807851 0.0692162439 0.15985842 0.0560066774 -0.0942831039 0.0402628109 -0.118215956 -0.116073422 0.0202833321 0.117826007 0.122413464 -0.0271829292 0.0389408059 0.0934228823 0.0398765206 -0.00495207263 0.0981794819 0.115069546 0.0594924539 0.0624140352 0.0753316805 0.0130726891 0.00351743586 -0.118038118 0.133946255 -0.0532785915 -0.111061007 -0.0136450082 0.0968498662 0.133393183 0.149615765 -0.126794592 -0.107227415 0.167891011 -0.0144322244 -0.181450546 0.0244579148 -0.0923274755 0.157411754 0.050326366 0.143469214 0.00917230081 -0.0694648325 -0.0583085977 0.100404061 -0.0703162327 -0.132603139 0.0277496353 0.182791844 0.0298265126 -0.14978756 -0.0095058633 0.177655354 -0.0389893278 -0.0960298106 0.055750493 -0.0944034085 0.175231501 -0.151938185 0.0563026294 -0.126313433 -0.137585253 -0.11282815 0.0335017443 -0.016390631 0.0258972906 0.149925053 -0.0161783621 0.132413134 -0.129700065 -0.0751069337 -0.0137014491 -0.126565307 -0.0802877396 -0.127848729 -0.0448123366 0.00571359694 -0.0442490689 -0.0026283646 -0.0133119607 -0.117010497 -0.032991223 -0.0752329901 -0.0423538461 0.0337411128 -0.101852775 0.102702036 -0.113081135 0.128210023 0.0527718291 0.0711361766 0.046200335 0.112589262 -0.0602141693 -0.124360792 -0.049823273 -0.140881091 0.116494343 -0.137485832 0.0550901145 -0.0324928425 -0.101916127 -0.0462415516 0.0865442455 -0.119312339 0.0382132456 -0.0243112519 0.101194464 -0.10621307 -0.0587359108 0.107364364 -0.0826650411 0.112274796 0.0253867805 0.0701454431 -0.043696586 -0.0748712718 -0.0725907981 0.0644025356 0.0884814113 0.0663292259 -0.129587308 -0.0319217071 0.0338242948 0.115189984 0.0245237201 0.0201187134 -0.0739658847 -0.0454444066 -0.0267900527 0.0743228644 -0.134670675 0.0116872936 0.123525247 0.00718687475 0.139177337 -0.0978305936 0.0739517361 -0.0291812122 0.0807204247 -0.140549108 0.00840865076 -0.0133223087 -0.0685992762 -0.0170855597 0.060691461 0.0238291025 -0.141307816 0.0849160701 0.0482466817 -0.0244439244 0.0211740434 0.0507029444 -0.069623448 -0.0391115323 -0.045335494 0.105534464 -0.0210918859 0.0410889536 -0.119236276 -0.0102088749 0.0296808928 -0.111803085 0.0251688212 -0.0522222742 -0.134250998 0.112514332 -0.0292918608 0.114655808 -0.115933761 -0.0447240621 -0.0562940501 0.115107387 -0.0417959876 -0.0358452164 0.128562316 0.123079613 0.0867616385 0.0504442304 0.085063085 -0.0750186294 -0.0415927172 0.0159885045 0.0309951119 0.0242125411 0.0228883941 0.128811404 -0.0658345073 0.0893866047 -0.0262501985 -0.0197901707 0.0398271419 -0.00843849033 0.0776178464 0.0806626081 0.168270662 0.015441413 0.0647286773 -0.0716274977 0.0758225247 0.114696413 0.142221808 0.103615619 0.0212591253 0.140274763 0.00738972286 0.159614474 0.11493472 -0.0833858475 0.0133725926 0.0502345115 0.138931051 -0.0143997408 -0.135814145 -0.0122304466 0.157529533 -0.150415257 -0.0632499009 -0.0106943063 -0.0938702598 0.163158879 0.13341108 0.106037788 0.1496768 0.177437797 0.087329708 0.137258947 -0.0137388939 0.0762795284 0.0370195433 -0.0747531578 -0.092746526 0.0398157351 0.0443542562 0.0983223766 -0.00542128552 0.0799729377 0.168658942 0.125600606 0.150951058 0.117274851 0.0738498569 -0.0982450694 -0.011585433 -0.00457595475 -0.0337975733 -0.0616223812 0.0883765817 0.146805629 0.0442404337 -0.101139419 -0.059554819 0.0444233194 0.0295815617 0.0203016624 0.0470338352 0.00290740328 0.0758937672 0.0288642086 -0.0832545534 0.0548138246 -0.00573976338 0.0907851085 0.0382896215 -0.137567922 -0.0848902464 -0.0355325341 0.0280306078 0.0849616677 -0.0109465634 -0.0933749229 -0.0489923954 0.131554142 -0.0105491728 -0.0911042765 0.0896382779 0.107579067 -0.029194802 -0.118035324 -0.0691957697 0.0260686129 -0.117240146 0.0314605832 0.10417594 0.0173794031 -0.10924159 0.00410650671 0.12347053 -0.021081768 -0.0583038926 -0.076368995 -0.0559989214 -0.12317574 0.126255885 0.124372408 -0.139102474 -0.127438575 -0.0832829475 -0.0507567972 -0.0409637913 0.0168262422 -0.109306589 0.0518526733 0.0749200583 0.00206166506 0.0649633855 -0.0586098135 -0.00433701277 -0.140350699 0.0938716233 -0.089609772 -0.0619740263 -0.0610454977 0.0776864439 -0.0440377593 -0.0523070544 0.136881992 0.111145541 0.0935858637 -0.130629882 0.0228392035 0.0660683215 0.0564527586 -0.0145275388 -0.056871783 0.140726104 0.0382112935 0.0346260034 -0.0959678069 0.145820111 -0.0788428187 0.130337492 0.106305443 0.186199993 -0.0118903993 0.114453636 0.0458821617 -0.0491925776 0.0321561061 0.0618102029 -0.16807498 0.146204278 -0.0881870687 -0.169820085 0.0581149757 -0.0209829025 0.000727858045 0.0668258667 0.0809662268 0.0593013167 -0.154004052 -0.0266895164 0.131010324 0.0933532268 0.136942223 0.0960304737 0.127566546 0.128763124 -0.129231334 0.0490520634 0.0179415178 0.035261184 -0.179191247 0.134654313 -0.191801935 -0.076531738 0.0557464883 -0.0514609776 0.030970484 -0.0304086115 -0.058471296 -0.107087307 -0.0737263411 0.0960866362 0.0616026595 0.00334342872 0.0160897672 0.115088649 -0.129959434 -0.0453715175 0.106996052 0.0485980026 -0.0609982088 0.0606777444 0.0854022726 -0.0109910937 0.0280183572 -0.106572933 -0.00772281922 -0.0217049569 0.142191678 0.078674458 0.068385914 -0.0397756584 -0.0448649749 0.0790037736 -0.0683723092 -0.134903669 0.0462144762 -0.0944194347 0.14962922 0.0367264152 -0.075939849 0.151242435 -0.0653834939 0.0671074167 0.0147493538 0.13696453 -0.0275645163 -0.0429917164 -0.0180217978 0.0253212303 -0.0417146906 0.0207910389 -0.0281672105 0.130631521 -0.109785154 0.0733767524 -0.109265648 -0.0798736662 0.0224359911 0.208666578 -0.0645421147 0.0355885737 -0.073725976 -0.0510966443 -0.0937370732 0.173772439 0.0993817151 0.00306298863 -0.195579961 0.052579727 0.127555981 0.0955225378 0.0206778944 0.0144746751 0.130441144 0.0313935652 0.00892100483 0.080054298 -0.128953949 0.0751526803 -0.0949046835 -0.153239205 -0.0463347062 0.016422227 0.0674657375 -0.0140186697 0.064172186 0.202651188 -0.165430844 0.0656619221 -0.0430362485 -0.197136238 -0.0389609933 -0.12942259 0.0315187573 0.0998861641 0.0155031411 0.0358207226 0.168374822 0.0940297097 0.0293072574 -0.0722433701 -0.0128252115 -0.0433789827 0.059830334 0.167342469 0.05525738 0.00795800146 0.177529857 0.0210485943 0.047749389 -0.0363491178 0.168270051 -0.100355022 0.0292338673 0.175140589 -0.127292693 0.162490025 0.0100361016 0.154595226 0.0616088361 0.136025682 -0.00410753815 0.0369135141 -0.143811956 0.0958657786 0.144568652 -0.00905292854 0.130941108 0.0106995432 0.0483372957 -0.0231650397 0.036639642 -0.0617889985 0.0236214604 0.0238810871 -0.0795606971 -0.110024542 0.174338296 -0.0911057219 0.0656976923 0.0863363743 0.0683924854 0.134093165 0.145337448 0.116067648 -0.0847840905 -0.0767683238 -0.0150442421 -0.0229843333 0.0828322992 0.0535647161 0.0319587328 0.068530798 -0.0646711886 0.197244614 0.0427581631 0.0388010144 0.162918717 0.136511028 0.0195802618 -0.0968718901 0.167434052 -0.0834559351 0.0702522248 0.163521126 0.110413931 0.161692828 -0.0881290808 0.148896158 -0.128931329 0.0255813021 -0.0889823139 0.157743439 -0.0732447058 -0.0442789234 0.0533142164 0.133719116 -0.116840921 0.0800347999 0.189877659 -0.135516554 -0.0575624406 -0.0097662257 0.119637571 -0.074548699 -0.0714714378 0.126038283 0.1195461 -0.09768942 0.0303867999 -0.123445861 -0.0530549176 0.107548378 0.106309928 -0.0313007124 0.183906198 0.0751518011 -0.0633003265 -0.0617225319 -0.0701497793 0.0320757441 -0.0290392973 -0.0253149793 -0.0470200963 -0.0478345975 -0.120073162 0.201239541 0.142304704 0.0925019607 0.148831651 -0.167674646 0.123002127 0.106455177 0.0328564122 0.18806994 -0.116831504 -0.00451909332 0.108785309 0.157465339 -0.00134878256 0.168126434 0.0580710471 0.0837541148 -0.0657100528 0.158608526 -0.0463683493 0.0946896747 -0.104266793 0.0244341511 -0.0714015439 -0.0990499556 -0.0860033333 0.145062909 -0.0333383344 0.142448917 -0.00225598761 -0.0131941633 -0.149845496 0.00207266607 0.0925255567 -0.182044104 0.00203921902 0.178830191 0.135419115 0.127062351 0.119150542 0.120787822 0.0427289233 -0.102054872 0.0916266441 -0.0503866151 -0.0314327143 0.113203667 -0.14366518 0.12766479 0.0501433946 -0.0380674638 0.132927895 0.147104084 0.129884318 0.0988519117 0.0387863517 0.0734434873 0.0411540642 -0.027659202 -0.13669847 0.083362028 -0.0450929962 0.145056829 0.0885054395 -0.0165824685 -0.0861969367 -0.0862592608 -0.160450995 0.0212117564 -0.104402281 0.143013418 -0.0506607853 0.121090904 0.00905802753 0.111442901 -0.143552661 0.0210310649 0.0612097643 0.00359729188 0.0227075666 -0.0815051943 0.155096367 -0.0119450046 -0.0233580228 -0.0038536794 -0.0880303755 0.164003551 0.1600402 -0.016360864 -0.0836358368 0.0851199031 0.0105815725 -0.121088877 0.161806434 -0.0379569791 0.0800513998 -0.0538180247 0.153429583 -0.0247538723 -0.00772412121 0.120341845 0.0548929647 0.114107296 0.0127800889 -0.0710391551 0.134522244 -0.0879234001 -0.0632987469 -0.0650375783 0.0809550807 0.137545347 0.0396288 0.186278701 0.110111617 0.143173963 -0.176478416 0.160997689 0.0144827925 0.0872319192 -0.0407468043 0.114270978 0.0436847992 0.0258595552 -0.0514572188 -0.0362136886 0.130494818 0.126685143 -0.0894779786 0.117681846 0.173565581 0.174748227 -0.15385066 0.149053425 0.160555586 0.0397729799 0.156005859 0.110312633 0.104156397 0.161141351 -0.0919019654 0.015511048 0.0107473964 -0.0837544352 -0.0176889747 -0.10078945 -0.0619383864 0.160746276 -0.087044619 -0.0232165866 -0.0215495545 0.0582484603 0.0864141285 0.175924376 0.0442700647 -0.0247930624 0.0347629003 -0.161288068 -0.0290379301 0.170908287 -0.117735907 0.110525407 -0.115487754 -0.000686930609 0.130876914 -0.0291782003 -0.192795917 0.127867773 0.126315489 -0.07262256 -0.098871097 0.0209841039 -0.19527556 0.116880774 -0.02486692 -0.00237640645 0.143660888 -0.016016813 -0.0697216764 0.175688595 0.0232482143 0.0199046992 -0.103963897 -0.0378533229 0.0388961881 0.00533542689 0.0628525913 0.159435913 -0.0747304037 0.0978682712 0.164278746 0.077385895 0.109259471 -0.0799139515 -0.0421864092 -0.0443351157 -0.133975893 0.0834283531 0.093928501 0.00520775095 -0.0434011891 -0.0435828492 0.138147533 0.106794529 0.093232654 -0.077764377 0.16267027 0.051492583 -0.0966648981 -0.0458262265 -0.0408286341 0.0238162875 -0.00872587226 0.153415054 -0.0966666192 -0.0194769856 0.151141167 -0.132202849 0.17568706 0.0875745118 -0.00695692096 0.0846608803 0.0842222869 -0.00846964866 -0.133651823 0.0813971162 0.0544089861 0.101662867 -0.166373864 -0.112454981 0.137616843 0.140390456 0.0915882215 0.10989771 -0.0496877804 0.154562473 0.0789823458 0.0279520545 -0.0192710813 0.025512537 -0.00114545715 0.0528355576 -0.0804974213 0.130488142 -0.0450717099 0.00189470535 -0.126931518 0.00184863445 0.0691755414 0.0959887952 -0.00365662854 -0.0239975173 -0.000226317745 0.162838191 0.110088706 0.103135742 -0.0143095907 0.0685937479 0.039006602 0.181053951 0.0662889108 0.142534971 -0.0225376673 -0.0523421951 -0.0925690904 -0.00610838691 -0.0569295287 -0.0691444948 -0.0351942256 -0.0200236402 0.0384809263 -0.00329685421 -0.15174298 0.1632265 -0.191212401 -0.169024199 -0.093971774 0.115878142 0.0936368257 -0.0726782456 -0.0567203537 0.127668455 0.0460995883 -0.0191945117 0.18582131 -0.171271384 0.0437021852 0.062035732 0.0159470849 0.0150196124 0.00918887649 -0.0672063157 0.0613921694 0.0558371395 -0.172685817 0.0529843457 -0.179647043 -0.00943551958 -0.0415023826 -0.0244376082 -0.0472054332 0.153094694 0.143580258 0.0942730904 0.156098858 -0.00754955551 0.0512687974 0.138893977 0.0646209419 0.00226254459 0.133554146 0.0259827524 0.110805348 0.0725759491 -0.131094366 0.12708883 0.0314303264 -0.0524304323 -0.032248389 0.163754046 0.0906126276 0.00314503536 0.103355683 -0.022527555 -0.1250837 -0.143783137 0.0596455783 0.0511251315 -0.0954806134 0.17346862 -0.00509193866 -0.0772540048 -0.0803210288 0.173364595 0.167615995 -0.129515707 0.0145245409 0.0466810837 0.0946052521 -0.0887519196 -0.0918630585 0.154023126 0.182059482 0.122924969 -0.0969166085 0.0428368933 -0.0473706648 0.0871873423 0.0173784196 -0.0468124636 0.130918413 0.115169801 0.106101029 0.0267140083 0.171541661 0.117503718 0.0674298778 0.0793930814 -0.0995452777 0.0986198336 -0.0477845483 -0.0891349018 -0.110497288 0.149275228 0.0541292913 -0.0509323142 0.00657417579 -0.00849667098 0.0782996938 -0.000425429258 0.0927700475 0.0596327335 -0.0792194828 0.048249729 -0.125496924 -0.119564533 0.0140337572 0.154170945 -0.175600752 0.0509903021 0.0491141193 0.151463166 0.0498116091 -0.0577821173 0.0124854716 0.0519152619 0.000966675114 -0.0199240129 -0.0589309931 0.000340196391 -0.0851683021 0.0118466569 0.109990321 -0.0261993259 -0.0374022834 -0.0214411858 -0.077557683 0.0687204972 0.0663195103 -0.0442392081 0.0338341743 0.13567619 0.180690661 0.19239752 0.107011527 -0.0798124969 0.0309492871 0.0260094907 -0.125474811 0.0975558758 -0.171736181 0.121255443 -0.0812420845 0.174648881 0.0337508172 -0.0655879006 0.168462068 -0.123068273 -0.14526248 0.1509289 0.149049625 0.0172713008 -0.0775876939 0.125850379 0.0576170236 0.0959700122 -0.0350637622 -0.0413426161 0.198388338 0.06012512 -0.18112573 0.0456633084 0.123411685 -0.135381892 0.0592928678 0.0492700674 0.192084178 0.00668479549 0.00347893289 -0.0798124969 0.160300285 -0.0158643834 -0.097056821 -0.00595153868 0.193585619 0.129847378 -0.0445784479 0.154722676 0.0128285876 0.114035919 0.0366068296 0.0581881292 0.0526427999 -0.0453962088 -0.0249866024 0.147942722 0.048362948 0.0718180016 -0.129099786 0.0695572644 -0.040164955 0.151449472 -0.107218184 0.0813755468 0.0611639321 0.120362371 0.00170552568 0.0150107937 0.0923141465 0.179166928 -0.0595131889 0.0748501047 0.024664795 -0.072850123 0.0498956218 -0.118837982 0.11913538 0.1241147 -0.0298356991 -0.0732461885 0.137327462 0.150715679 -0.144629672 0.0296867546 0.0185879748 -0.158391654 -0.0696423948 -0.0815559775 0.120456815 0.174756512 -0.0714245588 0.100912079 0.109141059 -0.0181489885 -0.189933077 0.0589498132 -0.146864727 0.0246144049 -0.0326956324 0.0814622864 0.044614289 0.0344069004 -0.0722796917 0.0347998254 0.00988415256 0.074375473 0.0236355383 0.186613545 -0.0229948368 -0.0627373829 0.058446534 0.0801035017 -0.0250811949 0.0163063705 -0.0360587984 -0.0412077829 -0.131415576 -0.14121896 0.183651194 0.0538982339 -0.0825245678 0.0530949496 0.129799366 0.077988103 -0.163070917 0.131275401 0.115696557 0.0255096387 -0.0695977584 0.149488509 0.110933349 0.0595859699 -0.105136663 0.139630318 0.13104403 0.140138745 -0.101875864 0.0968326181 -0.0490331948 0.0320329145 0.0932519361 0.111740142 -0.0153519753 -0.0669102296 0.0104083447 -0.0649985299 -0.154267743 -0.0946161672 0.139226034 0.107407138 -0.0765753686 -0.0474209748 -0.111844584 -0.0410924852 0.00278180838 0.108596429 0.014437899 -0.120850071 0.101168439 0.0475970656 0.110533401 -0.0760123357 -0.0803952068 -0.0215338543 0.109282747 0.0477782488 0.0887209475 -0.0328624696 -0.0277395248 -0.131564692 0.0674616843 -0.0144642591 0.13782452 -0.0827166885 -0.0459428355 -0.0465939641 -0.0978194177 0.137472615 -0.0644845441 -0.093579635 -0.079621926 0.0540327132 -0.0426073149 -0.0682768524 0.140229478 -0.0689926222 -0.116822943 -0.0883634388 -0.0420724526 0.0797011107 0.134867147 -0.0124301612 -0.0311987475 0.076223284 0.0785176903 -0.0510006249 -0.08932513 0.0967391878 -0.136143774 0.124550089 -0.119794972 -0.106707312 0.0434878916 -0.000768460974 0.083400093 0.123351663 0.154955849 0.0239652898 -0.00470558135 0.0155227007 -0.155885831 -0.0280565098 0.128090873 -0.0347218178 0.0469225496 0.066305764 -0.0798357874 0.0677081048 -0.153243482 0.0412665345 0.15851365 0.0430604853 -0.0530885011 0.131436363 -0.0623488314 0.0265644994 -0.130693406 -0.0925032496 0.167786196 -0.00228108512 -0.051090654 -0.129197508 0.125834614 -0.0826043189 0.0495859832 0.13765806 0.140279785 -0.100200407 0.078553237 0.102651939 0.0530582368 -0.105640791 -0.0712560862 0.0563652664 0.0500995256 0.110330448 0.0879531652 0.0794132054 0.0128588937 0.139566243 0.00905480981 0.0769669786 0.0616210736 0.0383987278 0.024789568 -0.0215452202 -0.0754719898 0.103158571 0.021371033 0.157727793 -0.168305516 0.041431915 -0.205217093 0.0685112029 0.11518427 0.0901029781 0.0836623907 -0.00306673371 -0.078299813 0.0937599093 0.0358634107 0.150480777 0.017379215 0.0400344506 -0.0467984006 -0.0435465574 -0.0746275187 -0.12713474 0.110726796 0.163420781 -0.100556083 -0.00550368195 -0.10327252 0.044071883 0.0337789692 -0.0129005229 -0.0913272351 0.132832885 0.147079349 0.100901216 0.134497017 -0.0322105363 0.133053601 -0.0325982273 0.141311869 0.0040314584 0.151371911 0.181470856 0.0484154783 0.164058596 -0.0128529146 0.0409421511 0.159602627 0.101342879 0.149882555 -0.0492368788 0.172365248 0.124329507 0.0683217645 0.0930551067 -0.0814763457 0.147788212 0.00853961147 0.0389146842 -0.000336691737 0.163021743 -0.084802106 0.0986582115 -0.0116979126 0.0385086611 -0.0496010855 0.0737678558 0.103331998 0.161403298 0.0173213035 -0.103028946 -0.0950937942 -0.0377868973 -0.0620894209 0.13404268 0.0146548431 -0.0653266087 -0.0033960822 -0.112761199 0.0226024743 -0.177061707 0.109000698 0.045506943 0.101955965 0.0158496425 -0.0690437183 0.104365595 0.0359109081 -0.122470014 -0.0659879521 0.0467208475 -0.0753396451 0.0523877777 -0.0585377291 0.100402929 0.119433776 0.0242477674 0.0617414936 0.182905495 0.157282576 0.0866737887 0.107341088 0.114345349 0.00848616753 0.0763099417 0.0206906293 0.0617443733 0.0259690173 0.107850946 -0.111641936 0.133501753 -0.169169813 -0.0887352601 0.089083977 0.156513289 0.0230403095 0.000902002619 0.0383367911 -0.0300379787 -0.146975219 -0.00701804645 0.131880164 -0.0454387777 0.0733794197 0.173866943 0.0410080142 0.19769071 0.0897455812 0.0198194478 0.00869395584 -0.0264868997 0.0861539766 0.123009734 -0.0185853429 0.16686818 -0.0672833547 0.0305484533 0.132848471 -0.127947524 -0.1613774 0.0643686131 0.0070268726 0.0036489605 -0.189245149 -0.0304792393 0.113094799 0.130098417 0.118080013 0.127857327 0.0940245837 -0.165752977 -0.0374614373 0.109492496 0.0428666584 0.170740604 -0.115685873 0.0148922838 -0.116838083 0.111455843 -0.0632996783 0.0108929574 0.0726874396 0.0742699429 -0.0629896522 0.113437019 0.199758425 0.0475728512 0.137889087 0.19803226 0.0400452688 0.0794214979 -0.108014926 -0.00188282889 0.111494496 0.0771949738 -0.116306648 0.0865728483 0.0771485493 0.0915202647 -0.0908453912 0.0911061615 -0.0472535603 0.136873767 -0.037476372 0.129080757 0.173227653 0.176956236 -0.117876649 0.0886662453 -0.0194631983 0.140326738 0.0929994658 -0.0285486728 -0.123725995 0.0545314588 -0.132062644 0.196129248 -0.0776121169 -0.0292998273 -0.0817124322 -0.123064265 0.0644138977 -0.0409719124 0.0910102725 0.0774317682 0.0588561557 -0.0303226635 -0.114509314 0.00717926025 -0.0146975368 -0.0139649464 -0.0769111067 -0.0884687155 -0.0844886228 -0.0546910986 0.0992946401 -0.0306005422 -0.0368665494 0.0252984539 0.0552819744 -0.0180559643 -0.0461472273 -0.059688963 0.0529744141 0.105257906 0.135227516 -0.136654019 0.0433159433 0.0750075579 -0.143905401 0.0697793365 0.0171793997 -0.0880545825 0.0440685079 -0.135759518 -0.0708841234 -0.137341917 0.0642284378 -0.0825591236 0.0998160243 0.104954824 0.0703029931 -0.00554473838 0.0652662367 -0.137622833 -0.0849017501 0.0079975808 0.0469577163 0.0332614519 -0.0239423085 0.0571367703 0.125478789 -0.0188843291 0.0104151899 0.0507268719 0.0427310057 0.182245687 -0.0464136638 -0.0774840489 0.0790423155 0.0158217624 0.174919963 -0.167722598 0.00393518014 -0.141249925 0.0320646316 -0.0712961331 0.18195422 -0.101946741 0.205521435 0.0143015096 0.190244779 0.0565855652 0.143080652 -0.0879745483 0.0268129539 0.0264821127 -0.0976307765 -0.0719135925 -0.0931720287 0.0752973855 -0.0940701365 0.0943753496 -0.00160595321 0.00526125729 -0.0494134016 -0.0277267974 0.114400044 0.0121099204 0.0469762683 -0.0178804994 0.175322458 -0.0936195925 -0.0206507854 0.0129827568 -0.127164483 -0.0533081368 0.0902868807 -0.089850314 0.0812181607 -0.0252427552 -0.0260248482 -0.0263420995 -0.128954813 -0.144561514 -0.0969642028 0.0840708092 0.105219595 -0.0315751806 -0.133927286 -0.0635263324 0.0815265328 -0.103957005 -0.0656396598 -0.0624658093 0.027983008 0.0192227215 -0.0915314779 -0.0996872336 0.0151820509 0.00491440995 -0.0790896341 -0.148336604 0.106279097 0.057419382 0.034870699 0.100479744 0.00237061502 0.0768525749 -0.12644136 -0.125458911 0.112800233 0.00162200222 0.0578222498 -0.056215629 -0.0922449976 -0.158906817 -0.0518889986 -0.100536995 -0.0453334972 0.00646515191 0.0148057342 0.0331344642 -0.00636346964 0.0370892994 0.00641168654 -0.0307880603 -0.0186160952 0.0293306652 0.0952301919 -0.12958698 0.117998272 -0.0704888254 0.00443183212 0.147841737 -0.0992462039 0.0764997005 -0.0257688798 -0.0460406169 0.0839670599 0.120056614 -0.0614700243 0.113699906 0.0346624181 0.180427715 -0.0145217599 0.168693572 -0.00197043363 0.191886678 -0.0972156301 0.0206416119 0.0345100351 0.0903015509 -0.100287922 0.0303347614 0.136919394 -0.0126191778 -0.115950003 -0.0293597691 0.0265962426 0.00261192676 0.0278086904 0.158691257 0.0234635379 0.117342651 0.0816714615 0.0948666632 -0.0849409848 -0.114143133 0.0362917073 0.070062004 -0.0524370112 0.167162567 0.104840927 0.124661915 -0.138633773 0.19061929 0.0486695245 -0.00107917748 -0.0845123231 0.046763584 0.0243339688 0.0911204591 -0.113943458 0.00347187044 0.0777205974 0.095806241 0.0292435624 0.127136692 -0.0182037577 -0.0450141095 -0.0123331165 -0.0598197915 0.152919352 -0.131715685 0.164068297 -0.0793498456 0.00121658249 -0.0503176786 -0.0856561065 0.0431076214 0.0459455065 0.167714477 -0.00190150819 0.0480769761 0.0142310113 0.0774440318 0.0504581034 0.197599128 0.172974482 0.0960050672 -0.0724410191 -0.0655787885 0.0426691361 -0.0474077053 0.0671926141 -0.0111915339 -0.0694714338 -0.0728770122 -0.0334699675 0.0879241973 0.0191930141 -0.0492004342 0.170004874 -0.136069939 -0.0839288905 0.121699564 -0.0373032577 -0.0790554881 0.0212189052 -0.0723486841 -0.0706750974 0.164014727 -0.1265852 0.180671826 -0.0538335219 0.135076165 -0.082566984 0.00627529481 0.0355592817 0.146791458 0.0428247713 -0.0218269154 0.113299571 0.15928854 0.109753877 -0.0433866642 -0.0531712547 0.121344112 -0.0599708892 -0.140954524 -0.0652005821 0.107553594 -0.0420940556 0.0391891636 -0.0892334729 0.0449264199 0.038767308 -0.00515921181 0.00227128062 -0.0260546599 0.145359293 0.0675093085 0.165128261 0.107131146 0.145455942 0.150093794 0.182555065 0.0683342069 -0.0751166418 0.00099511235 0.136942998 -0.0637786239 -0.118969493 0.0861738697 0.121482879 -0.0593939386 -0.0681066886 0.137257427 0.142178074 0.110687025 0.104999736 0.00519723399 -0.0884702951 0.0194963887 0.146859735 -0.00592712127 0.0192816481 -0.0416031592 0.10512694 0.0102964779 -0.0616582707 0.13753584 -0.00928659178 -0.0823482201 0.114266947 0.0528262816 -0.0983823165 0.0188455041 0.0611194335 0.100222267 0.00196855632 0.0390211269 -0.139956653 -0.0327276476 0.156153634 0.0125370612 0.0344246514 -0.0589949451 0.0921387449 -0.144850284 0.0212448426 0.144581348 0.0431137607 -0.0885965675 0.0854236633 0.00550921075 0.172450885 0.123434886 -0.185906976 0.132565111 -0.181601852 -0.10362874 -0.184760764 0.130670205 -0.0472870953 -0.10729944 0.132553771 0.0137786418 0.0876799598 -0.0260619633 -0.0263571106 0.123666577 0.123572513 0.00393577246 -0.0911321938 0.149641573 0.0127057144 0.040043395 0.0847804174 -0.0459438674 -0.177438155 0.00510600302 -0.00431553461 0.138086572 0.116237916 0.168006837 -0.0157650076 -0.00970370602 0.0802516192 -0.0514502637 0.132704586 -0.0417737029 -0.0193822831 0.166242853 0.0419458486 0.15848121 -0.00174845546 -0.0805547163 0.00697086425 -0.171336144 0.0593196638 0.0595933609 0.110469177 -0.0882590115 0.050680656 0.0118347788 -0.0319386683 -0.0632662848 -0.0177531485 0.0838051289 0.0470289141 0.178859159 -0.0346439704 0.0504389554 -0.0520837195 0.00219774805 -0.0491009764 0.0503517203 0.107992731 0.0384831354 0.0872439444 0.167825118 0.0623564459 0.0223074984 0.0821516067 -0.00865145214 -0.0457197949 0.0970128179 0.0120575717 -0.0556218661 -0.116809532 0.074401699 0.0880089849 -0.0123710101 0.007505944 0.135203391 -0.146449044 0.0852448419 -0.132378265 -0.109845184 0.149791658 0.015171879 -0.158416107 -0.0637820587 0.16275458 -0.0364229716 -0.143408865 0.127545208 -0.0622910671 -0.139478207 0.0405872539 0.0932571068 0.0956263393 -0.00292709633 -0.100080743 0.137279779 -0.0495060496 -0.0749291778 -0.0744291395 -0.0862122774 0.0235699266 0.109829761 0.0802345648 -0.123428002 -0.135655686 0.115854591 0.186653689 0.104481116 -0.0934653729 -0.107345767 -0.0480583683 -0.112480521 -0.0674405769 0.0481690913 0.0844945163 0.102531567 -0.132132486 0.137842521 0.00775253773 -0.0610849336 -0.032591112 -0.0524423793 -0.0668133944 -0.113737375 -0.000165238976 0.114906386 -0.1328713 -0.0835750252 -0.088781476 0.018294096 -0.0263542235 -0.0792298913 0.0685598254 0.0419423133 -0.0260287449 0.13109158 -0.138066247 -0.0322780311 -0.0882859379 0.0807678401 0.0512416959 -0.123070188 -0.00298701227 -0.0796232373 -0.104369447 -0.117494076 0.00122408569 0.0332989395 0.0664115399 -0.0739870965 0.0106086135 0.109527692 -0.0934588537 -0.0895289928 0.0728636533 -0.0278315544 0.0639105886 0.0930453986 0.0494588055 0.017094126 0.112311125 -0.00386948418 0.0680094063 0.0254231635 0.0913507342 0.16692546 -0.0122418981 0.108312286 0.0950310752 0.101287387 -0.105884947 0.030036103 0.00558372587 0.109446019 -0.0987028182 -0.0895694122 0.118367992 0.0662995502 0.114169754 0.0966514125 -0.0286930036 -0.0851531997 0.128677562 0.124861382 -0.100621521 -0.128018498 0.0673300773 -0.0310823116 -0.0784357563 0.0379403606 -0.0306251384 0.0655758083 0.0960387737 -0.152080312 0.136492133 -0.101761207 -0.0275989529 0.0933943838 0.0766497627 -0.0804210976 0.143909901 0.143697292 -0.0849372372 -0.10959392 -0.0742666796 0.125293195 -0.0966164172 +tensor_14weight 2500 +-0.0543760806 0.0856281444 0.0533403084 0.0177523084 -0.0268334541 -0.0549559146 0.159062862 0.172800139 0.109722741 0.0875528008 0.0125674438 -0.0810011849 -0.068577148 0.170207128 -0.135173365 -0.0806247443 -0.0548967347 -0.0452914089 0.0365853943 0.129278928 -0.0377073251 -0.17943646 -0.00266921567 0.0811229944 0.0154373068 -0.0359650813 -0.0855926052 0.127574399 -0.1265679 0.04885903 0.0561187416 -0.112507693 -0.139889583 0.170207158 -0.097494632 -0.0187973343 -0.0904997438 -0.0484883524 0.031168703 -0.055549074 -0.0741278306 -0.002624318 -0.117438287 -0.0157258548 -0.0880523771 0.114648446 0.0272049736 0.103814438 -0.0217095967 -0.140518233 -0.0760676265 -0.110887714 -0.0115829725 -0.00750160404 0.0959720686 0.0384376198 0.061359182 0.0955482125 0.101260468 -0.0115174651 -0.013766964 0.0398462117 0.166129872 -0.0850986466 0.140506133 -0.103672192 -0.154903129 0.0968019962 0.066429466 0.0431276001 0.147400737 -0.00412948243 -0.0342022404 -0.0535201877 -5.71517012e-05 0.0244176984 0.0832642242 0.176724657 -0.0719986036 0.172275752 -0.114797458 0.0914949924 0.0334078744 0.0464251973 -0.00394226797 -0.0035392812 -0.0278604105 0.0514154881 -0.0311339442 0.021156881 -0.0213947129 -0.0683914274 0.0251719803 0.0944593325 0.12849097 -0.049127765 -0.0469818637 -0.0983457267 0.13893728 0.0303975027 -0.0299507454 -0.0138533115 0.139151528 -0.135961041 -0.0921831578 0.0593009293 -0.0144180804 -0.0136186779 -0.0715967119 0.0790341347 0.00953520834 -0.0408776402 0.101040825 0.00309920311 0.0447804034 0.0982600003 -0.0721947402 -0.118167073 0.0333673507 -0.0950507745 0.10244967 0.08306925 0.0455361456 -0.122597888 -0.0647362471 0.00561864674 -0.136176527 0.0647586584 -0.122481212 -0.0205618665 -0.094566375 0.0131596476 -0.117649406 -0.110489279 -0.0717473105 0.103288978 0.0714375228 -0.0784455761 -0.105901703 0.0811899006 -0.131345108 -0.0233812556 0.102898851 -0.00886622071 0.0682659149 0.129993364 -0.0971994996 -0.0193270147 0.00360363722 0.121052161 0.00784411095 -0.123100765 -0.0609981082 -0.135147735 0.0461434908 0.117215686 -0.0296066701 -0.0148467962 0.054072503 -0.118035108 -0.13138777 0.0103239622 0.0106298085 0.00161406794 0.121524885 0.106862329 -0.0696737245 0.122207746 -0.129250824 0.0716361329 0.117990665 0.0917533413 0.0275282189 -0.124964394 0.123115174 0.0490060188 -0.0750153661 -0.0502910502 -0.0452317111 0.101086549 -0.101995051 -0.112885557 -0.0476158895 -0.0509889536 0.0219939649 0.00387603301 -0.0764786229 -0.0421580113 -0.0788122267 0.084515363 0.0346965827 -0.01090011 0.0382516384 -0.00645032525 0.129111394 -0.0737728179 -0.00789030734 0.11321111 0.00651154015 0.000951979193 -0.0776003599 0.0253983736 -0.0880478546 0.111969553 0.0747581348 -0.0281555094 0.0477269702 -0.0837645158 -0.00260412018 0.0995940417 -0.0253548026 0.0838286281 0.037731003 0.0643470585 0.0464969426 -0.102055438 0.00463358313 0.0325008184 -0.00370962941 0.0821173638 0.0869908333 0.032846041 0.0313670263 -0.148173332 -0.177075326 0.014351381 -0.0347749256 0.0631445199 0.0138477925 0.141796917 0.0031752775 -0.0240941141 0.115030944 0.0497418977 0.0109222829 0.0674659908 0.010190879 -0.0980509967 0.107191958 -0.0665694326 -0.0149048567 -0.135567963 -0.0943998545 -0.0724455938 -0.108684249 -0.117758349 -0.0431607552 -0.0478789434 0.0548663996 -0.0874581188 -0.12479274 0.0178123116 0.070239827 -0.0386666693 0.134508371 -0.0741510987 -0.045267418 -0.104734987 0.0435491502 0.0155023336 0.062136706 0.103647709 0.00290234643 0.064395614 0.0177004337 -0.0480007231 -0.110428169 0.0850054473 -0.0885846689 -0.0500162207 -0.0616900064 0.148497447 0.0951149315 -0.0552124381 -0.14905256 -0.0889345855 0.0241270382 -0.0488678627 -0.00426269416 -0.0119903926 0.141092837 -0.0797038823 0.0120936269 -0.0693103597 0.0249975473 0.145910755 0.0371512882 0.117824383 -0.0573362373 0.0323375016 0.10749159 0.0636148006 0.0273176879 -0.0183407739 -0.115713961 0.00984863937 -0.042138014 -0.118756019 0.0761800632 -0.132937029 0.119471751 -0.0354485847 0.127721861 0.121748939 -0.0444656201 -0.116685092 0.0304207485 0.0655196533 0.0813344195 -0.107156277 -0.0844279304 -0.0894685909 -0.127893046 -0.0939210355 0.0843395889 0.0614806749 -0.0614846796 0.0341131836 0.0421788543 -0.105776869 -0.122536495 -0.129194289 0.12389411 0.0393402874 0.0495846197 -0.119962715 0.106891051 0.0449610613 -0.145153821 0.0629368573 -0.108572282 -0.0517346151 -0.0622508824 0.0323888771 0.0450324118 0.0440010354 -0.0938819498 0.0203166902 -0.122573078 0.0831483901 -0.0707751289 0.0610891283 -0.0958172753 -0.0424687862 -0.107412554 0.126818612 -0.0554413795 -0.122475646 0.0243445728 0.0513011068 -0.0796121135 -0.0414110497 0.0280717909 -0.0394670665 0.0598106235 0.0484230518 -0.126170114 -0.0951998904 -0.078391239 0.0402923077 0.0229541957 -0.0937744156 -0.0477972776 -0.0179067627 0.0602110922 0.0934107453 0.139090851 0.0272798836 -0.0181408152 0.0339401662 -0.0496698096 0.123755589 0.077883482 0.0388832986 -0.119654641 0.140062913 -0.02580522 -0.0365974084 -0.0948570818 0.0476925224 -0.106507264 -0.0877594203 0.0167225003 0.0365579128 0.0707214922 0.0943449885 -0.0219054744 0.0260573626 -0.0470213518 0.139553711 -0.0147360563 0.0706477165 -0.078761287 -0.0111072361 0.0549765974 -0.0400532633 -0.0153049231 0.0315274298 -0.0705541149 0.0631048977 0.117633738 -0.0514981188 -0.134041414 0.0551473498 -0.0794123039 0.00670406362 -0.0401185192 0.00911470596 -0.0211872291 0.116496786 0.155161962 0.0629097223 0.138448417 0.0936189666 0.0627936721 0.0648671389 -0.129390776 0.0583335906 -0.0174725447 0.0610876642 0.163619712 0.0462206006 -0.0404846109 0.0467165858 -0.149703398 0.0884451717 0.0297990069 0.0904366821 -6.66035776e-05 0.089948453 -0.164717227 0.0440124683 0.0429885276 -0.0889559984 0.0580933429 -0.0497451164 0.0140721994 0.123201773 -0.0521491505 0.0792684183 -0.116658807 -0.0728405491 0.138154134 0.0858280063 -0.0885532424 -0.0259025618 0.143468827 0.11027436 0.130449191 0.0466446765 0.0738923401 -0.0459300056 0.135415688 -0.0519030988 0.0363911055 0.100617178 -0.0735667422 -0.056867335 -0.100770339 -0.0581379086 -0.0582638234 0.057107687 -0.0833413973 0.117787801 -0.121084802 -0.0609023273 0.0458093919 -0.0386206284 0.120702438 0.152693301 -0.0653539896 0.00243751518 -0.116231412 0.129159972 0.123322234 -0.00450206548 -0.0729444399 0.0853474438 -0.145202518 0.0369620174 0.0304967947 -0.0492551252 -0.130056858 0.0221843477 0.0469832569 -0.056170959 -0.146923915 0.0244862288 -0.0021409702 0.0949956179 0.134217575 -0.0556118563 -0.106579103 -0.0108840466 -0.147231668 -0.0594046339 -0.0605274215 0.00136603415 0.127606124 -0.115885407 -0.00178258657 0.0252946466 -0.0912591442 -0.0576305799 -0.0229029693 -0.085684374 0.165293708 -0.0522565134 -0.0692233294 -0.0375391915 -0.0102695916 -0.104804181 -0.0526487827 0.150056034 0.0551703274 -0.0257776212 -0.0228184611 0.073696211 -0.0467144549 0.0735779107 0.0948753133 0.144433752 0.166210935 0.150882557 0.0348055102 -0.0166522712 0.161574543 -0.0576791242 -0.0115333898 0.0468233787 0.0915934965 0.0572047532 0.00290581165 -0.0225567296 -0.0246865228 0.0868225098 -0.00762603246 -0.11816176 -0.12570864 -0.0243588239 0.0893646181 0.0507476032 0.0117150992 0.10665486 0.121998012 0.0696426779 0.0685170516 0.025851354 0.142432615 0.146065772 -0.0138009675 -0.129955053 -0.000132796747 0.0203777198 0.181162477 0.0261075366 0.168559924 -0.0609995425 0.168947399 0.0293546468 0.0149049358 0.0582519248 -0.04978792 0.103416584 0.0590672493 -0.0700641274 -0.14282304 0.0278612077 -0.131275356 0.14719297 -0.0653766692 -0.0751730502 -0.0065545626 -0.0380778089 -0.157771811 -0.0827088878 0.0777130723 0.198460281 -0.0469098203 0.134435102 0.14349848 0.0417049713 -0.167793706 -0.0996251702 0.0797272176 0.122879468 0.179209173 -0.0471446738 0.168509901 -0.0920644701 0.000451093569 -0.129801482 -0.0594977811 0.00711449794 -0.102573976 -0.109216064 0.148805737 -0.0950382799 -0.00647751195 -0.109716304 -0.055815164 0.0108515322 0.0906130522 -0.0650295168 0.0893351659 0.114572234 -0.106268756 -0.0118306447 0.113306493 0.0420926064 -0.143582255 -0.11182075 -0.0272862986 0.0896898583 0.0240881741 -0.0932913795 0.0246650521 -0.138226554 0.0727393776 0.0228461325 -0.16916123 0.00246544858 0.0807503536 0.0973562822 0.0370443426 -0.135211006 -0.0732924193 -0.00324719655 0.00122735673 -0.0426253006 0.0464077778 0.0109117776 0.0653063208 -0.0427299105 0.0784161389 -0.0572868735 0.105860651 -0.097189337 0.000937802775 0.0585776716 0.141582102 0.0805247277 0.0409072042 0.00662690401 -0.0858124942 -0.0805532038 0.0404491127 0.124266788 -0.106811218 -0.0971105546 -0.0997086912 0.0074750483 -0.108586416 0.0607502013 -0.054395549 -0.0843265578 -0.0900614634 -0.0676774904 0.0206092894 0.099438563 -0.0570041686 0.0200342685 0.0236357749 0.0572907329 0.0950599462 -0.125204116 0.0311794877 -0.0876096636 -0.136037469 -0.0375309587 -0.0594457537 -0.113332778 -0.0978064537 0.0964330435 -0.0265298411 -0.00542576611 -0.0438456684 -0.0523090437 0.0630306751 0.124709442 0.0266276151 0.0232248306 -0.0171631426 0.106016204 -0.110088825 0.08274737 -0.110974953 0.0963929445 -0.0833926201 0.0303138644 -0.0306184739 -0.00307349861 -0.11504256 0.1075629 -0.131726444 -0.0421231985 -0.138194129 -0.113239586 0.0452417433 -0.0149982423 -0.0436716527 0.0988965183 0.0192198902 0.0135216201 0.00940239057 -0.11294537 0.0857888535 -0.120034076 -0.108530715 -0.101037055 0.0667439774 0.0855601728 0.0677483305 -0.0134334378 0.120776698 -0.116636701 0.0875215456 -0.0949789584 -0.0514179617 -0.115705922 -0.0596454814 0.0422541201 -0.0999356657 0.0502830669 -0.0702968836 -0.123627275 0.106365606 -0.0744836628 0.0104168141 -0.0793894753 -0.114666551 -0.00283100014 0.140118852 0.0356186256 0.054495573 0.102265559 -0.0458586551 -0.0236852318 0.106435075 0.00892684981 0.11433281 -0.115178108 -0.131405771 0.117208794 0.0424666107 0.134797171 -0.143703952 0.0569373965 -0.0665611774 0.0933629125 -0.0201621354 0.111485049 0.00233875564 -0.0812246799 -0.0269328542 0.0232816096 0.0677310228 0.117872521 -0.141205952 -0.00658942759 -0.0309162736 -0.144758567 -0.057528194 -0.0684359372 -0.0633766428 -0.0364208929 0.152422816 0.0387307405 0.0868177786 0.0241200123 -0.0501802117 -0.03670137 0.0153368488 -0.0590804406 0.0290142465 0.100567661 -0.0803031549 0.037419185 -0.0459126569 0.0960116088 0.116186179 -0.0661039278 -0.0716232583 0.0593420751 0.0900740027 0.132992968 -0.025843842 -0.062321458 0.0100088529 0.00727820396 0.0946147069 0.111916468 -0.0648906529 -0.0192210414 -0.10582228 -0.052964583 -0.0713335574 -0.100110069 0.0656400323 -0.0618378446 0.0341230631 0.105089828 -0.0501025058 -0.138664886 -0.115556583 -0.105643809 -0.0142834401 -0.0338118225 0.0103544462 -0.0796577036 -0.00744031509 0.0366418958 -0.13744548 -0.12394321 -0.133015111 0.139994159 -0.096086286 -0.0779372826 0.0771979392 0.127238646 -0.0140574072 -0.0333673917 -0.0900884196 0.0653517544 -0.0381353125 0.124770477 -0.12113288 0.0970005691 0.107545584 0.114952408 0.0286091883 -0.109182179 -0.0630336329 0.0627928153 0.0373910069 0.110793836 0.0872234032 -0.110715158 0.0479132868 0.068171978 0.0977038071 -0.0969489664 0.0069321245 -0.138224244 -0.1087984 -0.0156357884 -0.0806711093 0.0635136664 -0.088648513 0.0915248096 -0.0295681208 -0.0889791846 -0.0202619806 0.0473107845 0.0719934851 0.0844703317 -0.0181293488 -0.0325784534 -0.0223496631 0.0800980031 -0.0469706431 0.12284486 -0.0163284689 -0.0720243454 0.0100721121 0.012039721 0.0458173305 0.00532619655 -0.0372635648 -0.0938430429 -0.0808144957 -0.0140093267 -0.139559567 -0.0209952146 -0.0627007261 0.0597438067 0.105400652 -0.034525536 -0.0433830321 0.00658106804 -0.113124847 0.039323777 -0.0219132751 -0.0291076973 -0.0714975595 0.0354093611 -0.0999722928 -0.0223256275 -0.00125360489 0.131300226 0.0749686807 -0.103176132 -0.00177618861 0.100545034 -0.0482359231 -0.130312055 0.0585651398 0.0481558293 -0.050323084 -0.0714227259 0.114079475 0.0674445853 0.0338538028 -0.0207888857 -0.0817157254 0.0414048955 -0.00719799427 0.000449810963 -0.0368338116 -0.000197023153 -0.128918022 -0.120564923 0.00234631728 0.017666148 -0.0760105997 0.129530162 0.0278998706 0.0785642117 -0.0846611708 0.142189592 -0.0866099969 -0.034735851 -0.0374385677 -0.141838074 -0.0461979173 0.0792662352 0.0883275494 0.0821309313 0.0135414349 0.0762536079 0.162841812 0.11422585 0.0459163897 -0.101808973 0.124757119 0.10234201 0.121778518 0.124219798 0.15815866 0.120062478 -0.0342520848 -0.0277299657 0.00312125683 0.0146831786 0.0419388674 0.12670289 -0.0661896765 0.00485484302 -0.0807016641 -0.102789596 -0.10495542 -0.0415844247 0.117631674 0.0792787224 0.121481225 -0.121911712 0.0956676602 -0.13807556 -0.071042493 0.10285683 0.134201437 0.0446345471 -0.0243865289 -0.0965441614 -0.0743445978 0.0868661553 0.0650995299 0.0439964831 0.0520170368 0.122856326 -0.00557545433 0.0643403828 -0.130216479 0.075305514 -0.0704696178 0.0523242615 0.132898629 0.0556151196 0.0135608455 0.106630892 0.00448958855 0.0294760223 -0.130943984 -0.115232065 -0.0185228847 0.0750679225 0.0868396237 0.115859844 0.0128320716 -0.0767292604 -0.0757243782 -0.109670304 -0.022307232 -0.0409514084 -0.0354676992 -0.101452865 -0.0788824335 0.0799969286 0.139659941 -0.080234088 0.0067355819 0.0488539226 0.134902641 -0.044175718 0.0824501589 -0.150373846 -0.134587288 0.0352238007 0.116991237 -0.0220136195 0.114933126 -0.114182681 -0.0119344881 0.0254181288 0.110809639 -0.139513344 0.0670420676 0.100754023 0.0473007746 -0.110368282 -0.0900191069 -0.0120764263 0.0714306533 0.122893341 -0.0579950325 -0.069827266 -0.0631239116 -0.0197088365 0.0283315647 -0.0101505062 -0.012342534 -0.136444777 -0.110550106 -0.0110609038 0.0534135252 0.0715058818 -0.135529175 0.0218331032 -0.0914941207 -0.117715605 0.0637661964 -0.119437411 -0.0767295882 0.13249214 -0.0307924412 -0.0124762207 -0.0491118282 0.115513906 -0.0599435866 0.0437990949 -0.0970950872 -0.126184925 0.00789543986 -0.0699488521 -0.0769708008 0.0143007189 -0.125161707 0.0508386642 -0.0768451542 0.0126496255 0.0037975586 0.0945261717 0.030873267 -0.0632951036 0.121244743 -0.124326058 -0.0519415066 -0.00867667794 -0.0128302025 0.0473873913 0.134872839 0.0456339866 -0.141257316 0.131795123 -0.129008144 0.0524923205 0.111486077 0.00192398916 0.0257397145 0.0104135079 0.0229955614 -0.0101489769 0.0377998948 0.0529350787 -0.0652860105 -0.0964240208 -0.119346842 0.102311134 -0.0513100103 -0.0956246778 0.0625582039 0.10400553 -0.0152444094 0.0996984094 -0.11846026 -0.0332592428 0.0517609864 0.124776825 0.0835027397 -0.0161721092 0.0544919521 0.0960061252 -0.0852253288 -0.115617849 0.132461503 0.0126986802 -0.0718445331 -0.0552118719 0.0423579067 0.00376538932 0.109214559 -0.060116075 0.00753490627 -0.11368005 0.00783166289 -0.0537703261 0.0794192106 -0.0919727385 0.00208424032 0.111719355 -0.0963476151 0.0950013399 -0.108282149 -0.12641567 0.12435104 -0.110969186 0.0644554049 -0.140637219 -0.0633735061 -0.0338808447 -0.123940453 -0.012826249 -0.0421397537 0.123598143 -0.038868092 -0.0173738599 -0.0128029287 -0.0981713384 0.100576788 0.075251177 0.0625472218 0.0764244497 -0.0654502288 -0.0070194602 -0.0901699513 -0.018791154 0.109865949 -0.0268438831 -0.0740915313 0.10593608 0.150855407 -0.0649960041 0.05053664 0.131901428 0.0731908754 -0.109051332 -0.00465088245 -0.0684918538 0.146911919 0.00280831754 0.14333044 0.108945541 0.0746650323 0.112723231 0.0545722842 -0.120427899 0.0842576474 0.0266837925 0.0456104651 0.0698814988 0.047249984 0.190266967 0.163280755 -0.0185097642 0.0825716704 0.031122379 0.064069435 -0.164239749 0.0375948921 0.0769198686 -0.155088678 -0.104684114 0.075281471 -0.00940326042 -0.0429962575 -0.0142965838 0.0120388716 0.0730001554 0.00848747697 -0.107716456 -0.0337854624 -0.133317709 0.128731623 -0.108297765 -0.0142049389 0.133316174 -0.115573399 0.0836417973 -0.0845889002 0.0138655473 -0.0655629039 -0.0104014426 8.64409303e-05 0.1608392 0.0675673187 0.0179192871 -0.0236063488 0.046505671 -0.0392516479 0.0720221549 0.0741254017 0.0510466658 0.149464175 0.0988350585 -0.0124777406 -0.0114185531 -0.143563926 0.116806343 0.0688097924 0.145737663 0.0330820084 0.103331283 0.189101636 -0.0580817536 -0.152595311 0.10088592 -0.012149916 -0.153301135 -0.0369912386 0.0269313119 -0.100157224 -0.0723579377 0.0605176054 -0.16903989 -0.140898824 -0.124555223 0.149031043 -0.112365574 0.0937826708 0.0157145858 -0.100749768 0.0515762866 -0.0710803419 -0.0686863139 -0.0993681699 0.136043593 -0.0661427677 -0.00102904439 -0.0144443447 0.00527101662 0.0664612353 -0.125710681 -0.0143222958 0.0327278823 0.00587114692 0.0143621564 0.0739389807 0.0117307007 0.00317768753 0.131541565 0.0450980216 0.0562246889 -0.112021118 -0.121544585 -0.0703852251 -0.0653774664 -0.049726896 -0.0712407231 0.0384204239 -0.0756127983 -0.108454555 0.0761180222 -0.108996943 0.031490311 -0.110682026 0.0954553038 -0.0480172858 0.026041314 -0.0700769648 -0.0236957669 -0.105962321 -0.0145984888 -0.0221198499 0.080327794 -0.0255625173 0.0917616338 -0.0579084232 -0.115254268 0.0298689604 -0.033957921 -0.00849801302 0.101248682 0.0688192248 -0.033151634 0.0967501849 0.125775561 0.0594263077 -0.0315592438 -0.0751049966 -0.0152497739 0.0194816925 -0.025003599 0.110108972 0.0337932631 0.0466330573 -0.108177759 0.0923204646 -0.127821535 0.132012337 -0.0206957385 0.160091415 -0.0121085728 0.103845544 -0.00710585574 0.106360584 -0.0573629886 -0.116646938 -0.0393697619 0.111357979 -0.00339666428 -0.0565832593 0.187762156 0.0738954321 0.00436372962 -0.0201180782 0.0962937772 -0.0409735553 0.126787856 -0.0257689413 0.142943844 0.0456875302 0.0157842282 -0.125038773 -0.127323419 -0.0811595023 0.127563968 0.0147216143 -0.0188120566 -0.037811175 -0.0973046944 0.155958325 0.107728779 -0.0499815643 -0.132879764 0.0703277811 -0.0151593685 -0.00903364085 -0.12080054 -0.0394929722 -0.05078182 -0.0835544169 -0.0767298788 0.111453474 0.0164176039 0.0738900974 -0.0237710364 0.108378887 0.00188849773 0.0275815967 0.0885725319 0.0609878637 0.0426038019 -0.0367399938 0.00715940725 0.0550841689 0.0243325494 -0.182480216 0.0272398591 0.0657472908 0.0879313201 0.0918957889 0.156981304 -0.0646059811 -0.119247735 -0.047599677 -0.116651364 0.147660568 -0.0381035991 0.0582092069 -0.147984505 0.19525826 0.122297406 0.0312584154 0.0444355682 0.0738523602 -0.00197054748 -0.0539330691 -0.101374164 -0.0567407869 -0.083611846 0.0657852963 0.0735184103 -0.0166746452 0.0735289529 0.0353927538 0.0238035768 0.0864389837 -0.0453045592 0.0350245051 0.0573615246 -0.117907874 -0.0528462976 0.0303292908 0.0802146792 0.0530198105 0.0752285719 0.0932139829 -0.126501501 -0.0627672225 0.0169230103 -0.0626025647 0.0349148065 0.109626763 -0.105911814 0.0788237303 0.0676720217 -0.153280228 0.0910756141 -0.0301282536 -0.000840488705 0.106123164 0.0890205577 -0.0100000529 -0.0720009357 -0.0945366025 0.10201738 0.139737338 -0.00469925907 -0.00698884297 0.108112089 0.0135092204 -0.127867475 0.0966705605 -0.00785964262 -0.138336435 0.0638243333 0.0331486128 0.0260607041 -0.0856452286 0.0471999496 0.0917534381 -0.0968203172 0.137134388 -0.108194001 -0.0242078379 -0.0838816911 0.0852543861 0.101658776 0.0793644413 0.0438847691 0.048443377 0.0175982714 -0.0791520029 -0.126521289 0.120813206 -0.0748142153 0.0292773098 -0.0915241987 -0.0191392377 0.070805788 0.0297760516 0.0540979952 0.0455519557 -0.104483157 -0.127842084 0.00549553335 -0.00101320446 -0.0383768007 -0.0929412916 -0.0527148545 -0.121787742 0.116296932 0.127205387 0.0215808749 -0.00527906418 -0.0950953811 -0.102384314 -0.136119738 -0.135512173 0.0213526934 0.131827787 0.0118881762 -0.0137285888 0.0550298095 -0.101517551 -0.0358831212 -0.0636179894 -0.0305602103 0.000535279512 0.0445587337 -0.14124018 0.108244672 -0.0400930718 -0.081310682 -0.133354321 -0.00902658701 0.0237516761 0.136513993 0.0469265431 -0.12061704 -0.0962344632 -0.100439511 -0.0504943989 0.0647302195 -0.00468132785 0.0335817374 -0.0337964632 0.134859458 -0.0318156444 -0.0735609904 -0.087305516 -0.00295244693 0.143023223 0.0555585437 -0.0434618294 -0.0989003032 0.0798072517 0.0347367227 -0.0889567286 0.0454466157 -0.0994898081 0.0186923463 -0.173386604 0.0882037133 0.137024656 0.0983027816 -0.120416664 0.192227334 0.0853054151 0.0870762393 -0.0379575193 0.161280155 -0.0570422672 0.151813626 -0.106967077 0.166952252 0.101883747 0.054235056 0.189217716 -0.0840895325 0.070606254 0.0933529139 -0.0497438386 -0.0975164622 0.156465441 -0.061189115 -0.0255476627 -0.00462962687 -0.0117694922 -0.026365703 -0.0826264173 0.10701783 -0.0981103182 0.0933794603 0.112419106 -0.00185976818 -0.0279008057 -0.0761715546 0.160778821 -0.00510247052 -0.0529871248 -0.00726129953 -0.0403804705 0.106054351 0.0701956153 0.160074636 -0.126040637 -0.0446393117 -0.0562781654 -0.0765028149 0.118484885 -0.00363161834 0.0893984511 0.0428721681 -0.0862490907 0.161411509 0.110493377 -0.125039488 0.160013914 0.138771698 0.00862341002 0.120823577 0.124806479 0.126176298 0.0108156186 0.103124447 0.0837469697 0.150882855 0.0736863688 0.00851425901 0.0157211907 0.0169511139 0.0302761346 0.0810550079 0.0379871763 0.138916358 -0.0554511286 -0.134623349 -0.0141197927 0.102847748 -0.109416708 0.116542891 0.0628927052 -0.0400933027 0.104376331 -0.119453713 0.0155786276 -0.0215776712 -0.00744922459 0.0798532814 0.0639959276 -0.0196548998 0.0446766913 -0.0535386354 -0.0242897347 -0.0326042622 -0.0356833786 0.091811657 0.119531497 -0.00566391647 -0.108836398 -0.0138843209 -0.0777539462 0.0244711339 0.0302925706 -0.0840600431 -0.135451585 -0.0993905663 0.0911848098 0.0254171342 -0.104574814 -0.0337190852 0.123146519 -0.00662016869 0.112501815 0.115908071 -0.0144283175 -0.0765947551 -0.070567295 -0.0816542134 -0.063042447 0.0821659714 -0.106859922 0.0445039272 0.0489273965 -0.054737024 0.0737254471 -0.0994004011 0.0907932669 -0.0525529906 0.0760397166 -0.0459865108 0.115764856 0.089792937 0.0262587946 0.0727124959 -0.0986016765 0.134460911 0.120390087 -0.106480896 -0.126752883 0.0600169674 0.0778978691 0.18399404 -0.116155103 -0.0270620678 0.0360745415 -0.0706986636 0.0219121743 -0.0485235155 0.0116395457 -0.0190455988 -0.031040974 0.138535559 0.0049945279 -0.0669544563 -0.135656506 0.111046769 0.0884723812 0.0641905293 0.0139932213 -0.0747538805 0.14979732 -0.0598235726 -0.0681837425 0.078919284 -0.0920129493 0.0927841365 0.0622474365 -0.0725972429 -0.113833509 -0.106715776 -0.127413034 -0.0326030068 0.0898154825 0.0540499836 0.120726503 0.000129148364 0.122900732 -0.000325784204 -0.152520627 -0.0064624548 0.073874481 0.0152352303 0.0961310565 0.0109456517 -0.0507575348 0.120654956 0.126830235 0.108187631 -0.163000211 -0.17243591 0.0882807449 -0.00232086889 -0.132580787 -0.132526517 0.101748489 0.118433878 -0.034390375 -0.0734529495 -0.163235143 0.0954085439 -0.164447442 0.0220291484 0.165916741 -0.0268109124 0.00910670217 0.0109529579 0.0204264484 0.0675835386 0.0192554276 -0.019607991 -0.139865518 -0.0490590185 0.116421953 0.133383304 -0.00608086493 0.0666911826 0.010473121 0.0839324743 -0.00665302482 -0.0341572762 0.0905544311 -0.0330039002 -0.121482521 -0.13752155 -0.00984864868 -0.00438012183 -0.0152192581 -0.0799304917 -0.144058108 -0.0689092726 0.0797125772 0.049202282 0.121467397 -0.059979789 -0.0338185877 -0.0875877663 -0.0352213718 -0.0265744999 -0.000970848021 -0.0542924628 -0.0339916125 0.0772038847 0.072251454 0.0386321284 -0.123490326 0.115714893 0.00240401109 0.138920873 -0.0890034363 0.0679262504 -0.0547012426 -0.0577247515 -0.00683979178 0.0310920458 0.0301014595 0.09850014 0.0825214908 0.137729675 -0.101440713 0.0277171135 -0.0100952508 -0.106422052 0.0132092983 0.104450844 -0.0620612726 -0.0807258561 -0.0243801288 -0.0753299445 0.0706419945 0.133234069 -0.119514674 0.126184896 -0.0232823435 -0.107439861 0.0486271791 -0.0467080846 -0.0273494851 -0.0494388938 0.106704935 0.0911619067 -0.0630275458 -0.12553288 -0.00862511992 -0.028438285 0.0349335819 0.140132353 0.0860453546 -0.0957376212 -0.00533922017 -0.0604479536 0.105011344 -0.0739720687 0.0970580429 0.0160997361 -0.0723680109 -0.0773093924 -0.100641474 -0.000383406878 0.0533034801 -0.0397997424 -0.094927974 -0.0186026245 -0.128123358 0.0100442469 0.0157190859 -0.0655204803 -0.0211179629 -0.0938301831 -0.0268794596 -0.0193795785 -0.0539523363 0.0891814232 -0.0723926127 -0.0894492418 -0.103800982 -0.0962850004 -0.0336188897 0.0200818777 -0.0712372959 0.0486632138 -0.0899597034 0.0786281079 0.0739620626 0.000252395868 -0.123112433 0.0357136726 -0.123496763 -0.10789144 0.0340666659 0.0580845289 0.129886597 0.0481477603 -0.0664139464 -0.068032667 0.0435736328 0.120462291 0.119476132 -0.123401277 -0.0684902221 0.0760027915 -0.00998137705 0.0795442387 0.0158706605 0.0744519681 -0.0714682937 0.0881680399 0.0666808859 -0.0570629239 0.164650321 0.12301676 0.0870940611 0.0813848004 -0.026452858 -0.0121077476 0.129477188 -0.142631516 0.0968390107 -0.0411611088 0.0438095704 -0.0174164046 0.143360704 -0.0702998862 -0.00564636895 -0.129426509 0.129657581 0.00313778641 -0.0574785173 0.130360812 0.0483710952 -0.119206332 0.0171343237 0.0760408044 0.0280918181 -0.0732154027 0.110174745 0.0896861851 0.00575533276 0.101032197 -0.0848289058 0.0622318983 -0.0934585631 0.13217181 -0.145480588 -0.151452094 -0.193723321 -0.0797042996 0.0565548502 -0.103730097 -0.0197361708 0.0303085633 -0.077303797 -0.0265100189 0.0391752571 -0.0126148164 0.0999564305 0.0248864293 0.120508894 -0.133522972 -0.0817446709 0.168698058 -0.0242357664 0.125314549 0.138810694 0.0583302379 0.117092818 -0.00391758466 0.132906526 0.00617161999 0.0204244088 0.0222095568 -0.0337266289 -0.120259523 -0.0814958364 0.0759975687 -0.127410248 0.107059687 0.0443736836 -0.111485079 -0.0593480803 0.0116199553 -0.0279748887 -0.114313811 0.138794228 0.0156804174 0.0343504995 -0.0126132518 -0.0769002363 0.0819693729 -0.0879027769 0.129266858 -0.089684993 -0.0613807291 -0.141623229 -0.0882831067 -0.00330040953 -0.0679773539 0.0915297493 -0.137266099 -0.130465984 -0.0945152789 0.00593703426 -0.116638407 -0.0219550729 0.156748876 0.180117995 -0.0130009502 0.0716223866 -0.144065097 -0.0823530853 -0.018947104 0.0158558208 0.0430184379 0.154572129 -0.0570289902 0.0835271254 0.117485747 -0.0653187782 -0.0945697576 0.0832378045 0.128256038 -0.074221611 -0.0555155501 -0.0284128729 -0.0774558261 0.0875912234 -0.000803266244 0.0153772263 0.105136067 -0.156031758 -0.0866245553 0.0865259767 0.013942048 0.0376075655 0.0092022717 0.137353256 0.0442829132 0.0658835545 0.12299668 0.0346786119 0.100729421 0.0517841168 -0.0898612216 0.0358949974 -0.0126487454 -0.150891528 0.118704185 0.165351018 -0.0962797925 0.14023003 0.103115313 -0.0602911599 0.110436209 0.0308854431 0.121604525 0.127166167 -0.0259905457 0.128405809 0.0925664902 0.120217182 -0.110264599 0.0578555204 0.0608986728 -0.0673596784 0.0434045941 0.0300820656 0.00743610319 -0.0426555723 -0.0631315857 -0.102573559 0.159559608 -0.0250314325 0.151789397 0.0174076445 0.0587394796 0.0256275982 0.0982463285 0.113202661 -0.0388134755 -0.175611705 0.00276806951 0.100146458 -0.0734548494 0.0292181689 0.0794109702 0.163403466 -0.0469043329 0.0134403957 0.010670647 -0.0128954323 -0.123306222 -0.105166018 0.0782779232 -0.0463915803 0.0925515667 -0.153734311 0.0399706028 0.0324561 -0.136016384 0.121838123 -0.0339740776 0.0617321283 0.0716827065 0.0507700294 0.163305402 -0.0263362825 -0.0235168263 0.156056419 -0.0918754488 0.0799345896 -0.0889241397 0.132214025 -0.0339573547 -0.0231025834 -0.0893127769 -0.15716745 0.0421844684 0.164029196 -0.0144499643 -0.0524246357 0.0808350593 -0.0515551828 0.12877433 0.123672612 -0.00602019066 0.108041525 0.0685625225 -0.107800402 -0.106587365 -0.0706622899 0.079738766 0.00206816196 0.111752108 0.164152429 -0.140735298 0.0629734248 -0.139009103 0.147802591 -0.0249149017 0.0117471283 -0.0343583301 0.100054584 0.0977551788 0.0404288657 0.126847446 0.172036812 -0.0862832591 0.0989949033 -0.0108139813 -0.109132327 -0.065063715 -0.05474668 -0.0446046479 0.0215636939 0.0535516292 0.0117583256 -0.0911338031 0.08406578 -0.0239859279 -0.0490008146 -0.0228198916 -0.0266752448 -0.0871018842 -0.143308818 -0.000947127643 0.0332983695 0.111087874 -0.112454593 0.166085541 0.138097584 -0.11190179 0.0368833952 0.0595792085 -0.08562731 0.159958228 -0.0567510165 -0.139039397 0.142708972 0.0837232322 0.0328567959 -0.137172282 0.0749840587 0.0762316734 -0.121910147 -0.0499757975 -0.0399817154 0.0123820901 0.0685181022 -0.0794972554 0.130480066 0.0485983491 0.138348088 0.115582928 -0.139361694 -0.124933064 -0.116998814 0.0428138971 -0.119468078 0.0110614747 -0.0100474358 -0.0216016769 0.0101694763 -0.0820546895 -0.0777691826 -0.117070802 0.126368955 -0.134564951 -0.0302673057 0.110644177 -0.117660195 -0.00458803773 -0.134814233 -0.0293745026 -0.0682334229 -0.095479995 0.0648405999 -0.0339591131 -0.0932905525 -0.123183005 -0.0931407288 -0.134305537 -0.00308911502 -0.0195324719 0.131572172 0.0200927258 -0.0235931352 0.0316516161 -0.0269541889 -0.0461172685 0.0184912682 0.0468815118 -0.138456166 -0.0628102869 -0.116835825 -0.0204005614 -0.0558201149 -0.0682484955 -0.0157806873 0.0899439305 -0.100633815 0.0933804959 -0.0987777337 0.0174719673 -0.0371658802 -0.0836207345 -0.145877808 -0.0866134912 0.113749318 -0.0686557889 -0.10567008 -0.0482730195 -0.0070918831 0.114946 -0.134217158 -0.118984662 0.158714101 0.108982846 0.0504443385 -0.141264856 0.0717000142 0.104913875 0.138390988 0.0620018132 0.116844647 -0.0917929709 0.00455025444 -0.0148221394 0.090433605 -0.140797302 -0.127093479 -0.139803812 -0.0659283474 0.0552696697 0.0945735574 -0.097406134 0.0754362494 0.068106018 0.124531001 -0.0909646004 -0.102133028 -0.115647264 0.0507812947 -0.0703440532 0.124720164 0.138866737 -0.100535631 0.0667068362 0.00467189308 0.116699241 0.00265486818 -0.0454894938 -0.0559370294 -0.13688907 -0.0905043259 0.0473077707 0.00356861111 0.119102031 -0.00279730256 -0.0290972441 -0.0549311638 0.0690060258 -0.108683005 0.038172666 -0.0376584455 0.0284418333 -0.0453239642 -0.0188913643 0.10330762 0.0158650316 0.0302619878 0.10599114 -0.0192918926 -0.00512768747 -0.067353636 -0.0088639101 -0.0262610465 -0.00164695387 -0.0329788141 -0.0782948136 0.0144974366 -0.102364071 -0.0231709629 -0.053929802 0.0872491896 -0.0339207873 -0.0452650562 -0.0555231161 -0.106613263 0.0545223877 -0.104694769 0.0733768344 0.117440388 0.0850411355 0.127571523 -0.0451369807 0.127713785 0.0604759753 -0.116016053 0.120935522 -0.0490187742 0.0145258456 0.12456093 0.0472011603 0.0200307388 0.213931099 -0.0325213335 0.100024901 0.150687665 -0.0330869481 -0.0743881986 -0.0423107482 0.0197958369 0.144206196 -0.075991191 0.00704598008 0.169469431 -0.0728433281 0.00913137291 0.00888578407 -0.0482634306 -0.0623912066 -0.0938192755 0.0339379199 0.119007394 -0.0949489996 -0.114306375 0.0466704629 0.01765888 0.12498638 0.0581458732 0.0168336164 0.109576389 -0.0280811246 -0.0985747352 0.0206970666 -0.0964695066 -0.0664414242 0.110378399 0.139610469 -0.12254858 0.005280599 0.00761935115 -0.128316179 -0.0374653786 0.0578353852 0.127879024 0.0134341568 -0.0500954539 -0.00337731675 -0.119695731 -0.11888539 -0.00329445861 0.062248569 0.123474449 -0.0654902309 -0.0125339935 -0.0127297472 0.106315874 -0.117941201 0.101428568 -0.0125857871 -0.0383322537 0.0596774332 -0.13507621 -0.0973153785 -0.126249328 -0.0674846619 -0.0139419707 0.120396413 0.105357513 0.0358138867 0.154476896 0.111231543 -0.171673253 -0.0643624365 -0.0841967613 0.0120489495 -0.110221028 -0.0037632389 -0.0754774585 0.10185004 0.094084166 -0.123280331 -0.0452301428 -0.119065173 0.00748275174 0.0470008291 -0.0596443266 0.110938832 0.078253679 0.0948193073 -0.00295168161 -0.0287760254 0.158084825 0.0492092073 0.0768529922 0.0546108335 -0.0640769675 -0.0777091384 0.102904022 -0.0749624521 0.129996344 -0.050066106 0.140702903 0.135439858 0.0565205291 0.039585311 0.131971121 -0.0433706567 -0.00613029394 -0.0415827632 -0.0896254405 -0.0250180494 0.0855293274 0.0162106231 0.0830451697 0.0412258208 0.109798126 0.0821261331 0.119376883 0.123270549 -0.109921172 -0.069528237 -0.128147811 0.00981930271 -0.0908927023 0.162675932 -0.115118623 0.0430796407 -0.0360173024 -0.142209709 -0.0679464936 0.125360489 0.0772604644 0.159708619 0.136423364 -0.113330379 -0.0995218381 0.0548840612 -0.0670673028 -0.0882866234 0.0123610795 -0.0497058704 -0.0615072772 -0.0382896513 -0.114018604 0.139805213 0.139648527 -0.0281077586 0.0916863829 -0.00357940956 0.119387031 -0.038923528 0.099045448 -0.056032382 -0.132692307 0.0513692684 -0.0116330124 0.0760553926 -0.0958063975 0.133466452 0.0711750537 -0.0614313632 -0.0412404239 0.0541786365 0.063894175 0.0131827295 0.0888437405 0.0340017416 0.00108662061 0.102465764 0.172270909 -0.110579006 0.0245050453 -0.113299966 0.0307509303 -0.00821371656 -0.0823507458 0.12988846 0.0906711072 0.0899582058 -0.0237735175 0.0522135533 -0.0568510592 -0.0833672881 0.0160184987 0.0457749926 -0.157355502 -0.137885511 0.0371456817 0.140265986 -0.0514193922 -0.0789706931 -0.0262926575 0.031272471 0.0584937558 0.088243857 0.0618854538 0.105974808 +tensor_16weight 2500 +-0.00417222502 0.0333769061 -0.068073824 -0.00174406881 0.0395693518 0.163498551 0.085880965 -0.0441546589 0.0277523138 -0.0394380651 -0.108085141 0.0367854051 -0.088741377 0.110496983 0.137496606 -0.0574882254 -0.0685930923 -0.118896537 -0.0459423698 0.128555194 -0.00706961751 -0.109601662 -0.0728417113 0.0407270938 0.116901517 0.037543118 -0.0457625464 0.105285026 -0.0170973707 0.0741245896 0.0285330229 0.0602779202 -0.0999675766 0.0917604342 0.00275715417 0.0221613981 0.0221239924 0.0220769197 0.0309871975 0.141381815 0.102749333 0.0140401116 -0.160013482 0.0494507849 -0.109416723 0.0506694168 -0.0603474639 0.122836456 0.102406837 0.102601565 0.0129987504 0.0918339491 -0.0100188255 0.0104814339 -0.0403004438 0.0710072964 0.139566272 -0.0848197117 -0.065366447 0.0172738302 0.0708059147 0.0956877321 -0.101862162 0.0278298706 0.119774833 0.109509952 -0.0190650206 -0.0558079928 -0.0829644129 -0.0505085252 0.0924009234 -0.0197391063 0.120741382 0.109388441 -0.155889794 0.0402384102 0.0444546603 -0.0963738933 -0.0165153071 0.0370764881 0.152952656 0.173049316 -0.104738578 0.153395116 0.0572723225 -0.0417025536 -0.0367592871 -0.113802627 0.127408341 -0.0777374357 0.0151805067 -0.0934357792 -0.0254224017 -0.0842629671 0.102764659 0.0851573944 0.119558379 -0.0993547663 -0.0726170391 -0.0864863023 0.0344553739 -0.0371179581 0.014597863 -0.101272777 0.0389231592 0.0477042645 -0.135611862 -0.0279283747 -0.0736898407 -0.00949966908 0.0327468514 0.011397168 0.0977702439 0.0774219632 -0.0666735023 -0.120168492 0.0698120147 0.0764941722 0.00528292358 0.111364022 0.0231147856 -0.0114662051 -0.0972312316 -0.046190843 -0.0422428921 0.0343527198 -0.0671815872 -0.0542576611 0.0252622664 -0.133999184 -0.036504671 -0.10330338 0.0781079531 -0.0724790767 -0.0193377137 -0.106740713 0.119743183 -0.131887868 -0.00206248462 -0.0915471017 -0.0368748158 0.00305576622 0.101493865 0.126230076 -0.0354850665 0.0502597541 -0.00101481378 0.066304937 -0.0897568762 -0.0490704626 -0.104472384 -0.0435468704 -0.083782576 0.00601604581 0.113205031 -0.0322454944 0.0402761698 -0.0695916861 -0.139957666 -0.0615144074 -0.0179994181 -0.0612879917 -0.0798030049 0.105225846 -0.0033620894 0.0714047551 0.0813372284 -0.0406201556 0.0420547277 -0.0629295483 0.128933236 -0.0379166752 0.0688687265 -0.0884593129 -0.12569578 -0.0999955758 0.0851953328 -0.105392635 0.0814247131 -0.123030953 0.0994129926 -0.0500591323 -0.109111317 -0.13911283 -0.0404232666 0.0469972044 -0.135043383 0.0236423463 0.00703085959 -0.0505910367 -0.0494552478 -0.135939568 -0.125193208 0.0693392456 0.128731236 -0.0279947668 -0.00343213975 -0.0427332819 0.137931898 -0.120300733 0.11937128 -0.0187009424 -0.0802288204 -0.13997142 -0.0681145191 -0.123841763 0.12888445 0.0320565253 -0.0545828864 0.0154529363 -0.109894589 0.127666876 0.0496545732 -0.0170144811 0.117039517 0.111107305 -0.140118808 -0.00819459558 -0.0746041089 -0.104405858 0.0340925604 -0.0303836614 0.115028903 0.082659781 0.0525604337 -0.0737821385 -0.0581655875 0.0645884275 -0.121426933 0.0243149996 0.104953259 0.11160703 -0.134071976 -0.0780070424 -0.0556476638 0.0689132363 -0.0958281457 0.0529917628 -0.00589548051 -0.0485527515 -0.133214802 -0.0193034932 -0.0625650287 0.0658643395 -0.115375213 0.0658562183 -0.126765266 0.0663554519 0.105942756 0.0113557875 0.00450533349 -0.0318827182 -0.0382149518 -0.0586391389 0.16599071 -0.0995347276 0.0626212955 -0.0848238021 0.0312059093 -0.0671846345 0.114710093 0.111972772 -0.109937578 -0.033448413 -0.00338487327 -0.137714684 0.061901439 0.0263031721 0.0757300183 -0.0868174583 -0.115281321 -0.0186160952 0.022743687 -0.0695182681 -0.0389948115 -0.00921310484 0.146889284 0.121259861 0.123389371 0.0740807503 -0.0913022682 0.0926736519 -0.0748549104 0.0120129362 -0.0260141995 0.160688117 0.103861287 -0.0145337479 -0.0333565325 0.106497906 0.12470568 -0.0427022539 -0.0200853404 0.125489667 0.117662869 -0.00191673823 0.0511538982 -0.0245015733 -0.0231342129 -0.0423432663 -0.067036055 0.0543258078 0.0126409382 -0.13212578 0.0719170049 0.0198016949 0.103088938 0.0337647051 0.118756339 -0.106006429 0.108546667 -0.161683097 -0.0532578528 -0.0416240096 -0.0414204299 0.137367234 0.113782153 -0.00119005144 -0.124352522 -0.0775565729 -0.0454200171 0.110256732 0.138212636 0.139873043 0.0665704682 -0.0985324904 -0.132870167 0.025891345 0.102456145 0.131423756 -0.108975701 -0.0261260234 0.0348203629 -0.0914004669 -0.130643874 0.064868167 -0.073974885 0.102802224 0.176195145 0.0179768056 0.0108742332 -0.0181707554 -0.0102240648 0.00463731587 0.0538709089 0.00627785875 -0.117067128 -0.087891832 0.004855379 -0.016739469 -0.0725544542 0.0821630657 -0.0417728201 0.113972411 -0.0352853052 0.076194793 0.106641911 0.101013884 -0.0971745029 0.0243909303 0.029293431 0.0236056633 -0.0190717317 -0.12938638 -0.101987876 -0.0398474075 -0.0397941768 -0.0599071085 -0.0816488713 0.131114334 -0.072889857 -0.128262654 -0.048984535 0.0297204573 -0.071578376 0.0270867199 -0.0633577183 0.13518168 0.0259044431 0.0611634552 -0.0133587159 -0.00845749862 0.0748591572 -0.0559645891 0.0772884116 -0.0892494842 -0.124270409 0.0640116185 0.0149187753 0.112770371 -0.0819433481 -0.158321261 0.146862835 0.05744645 -0.136843622 -0.0534451306 -0.057807114 0.112765148 -0.121185474 0.0865707248 0.0242433939 0.00829058886 0.00682345033 -0.0391817167 0.118281983 0.0936698243 -0.0126834186 -0.0224663615 0.0807867125 0.0614369959 -0.0208024122 -0.0155477682 0.0223713629 0.133279428 0.0341431238 -0.0801266879 -0.0473638549 -0.0159126073 0.0619758293 0.104336567 0.0777632669 0.0014840191 -0.0106922537 -0.089225471 0.139960572 0.060204789 0.0285251942 0.00940582156 0.124438897 0.0746723562 -0.0786366537 -0.140947089 -0.0317693353 0.0952398479 0.0504078493 -0.101314582 0.00230675936 0.028885033 0.0213491581 -0.138435528 -0.0281476919 0.108943664 0.00568072731 -0.021763064 0.115609825 -0.0683022514 0.0327104814 0.0758552849 0.11111246 -0.0849706307 -0.0847819373 -0.000146973485 0.0806944817 0.0748187006 0.00227210205 0.0243628193 -0.0767735019 -0.0275015086 0.0700375587 0.0457462482 0.0988681763 -0.033178322 -0.0504397713 0.0441949666 0.0874261707 -0.192612246 -0.0348819122 0.0727340132 -0.0881135315 0.0686711743 -0.0988578126 0.0592095368 -0.0447203517 0.0680712909 0.132170856 -0.0182013065 -0.0532437004 -0.0237953365 -0.0845318213 0.0437362194 0.0721768141 -0.0437071882 -0.0137181133 0.117270313 0.0595132113 0.174431637 0.0367056727 0.16878359 -0.0222882591 0.0592969358 0.0360841192 -0.0704026446 -0.0345776901 -0.0942229927 -0.165675908 -0.0193019863 -0.000750561245 -0.0503743216 -0.0973385572 0.0120092537 -0.168930963 0.162473217 -0.114793286 -0.0891378894 -0.0980757028 0.100445837 -0.0628618151 0.046582222 0.0680130422 -0.110214941 0.127800643 -0.0861949921 -0.0599781014 0.0208107978 0.0531936698 -0.00385034014 -0.0491617396 -0.0419875681 -0.00234212819 -0.044506561 -0.0653151795 0.0169184674 0.0848835111 0.144753113 -0.0450268537 0.0625728816 -0.022600282 0.0092583932 -0.0980294049 0.0949492604 -0.108607598 0.0273391213 -0.0572451502 -0.0307708569 -0.0600267388 0.0144662457 -0.122474261 0.0097047314 0.00327231945 -0.0497302189 0.143207729 0.110742435 0.116505228 -0.0606399626 0.130554333 -0.0482001454 0.146989092 0.0189287849 0.074033089 0.0732528344 0.0350786448 0.0295372307 0.131620392 0.0493342653 0.0956929848 0.0703327283 0.0766540915 -0.0670498535 0.105273128 -0.05142162 0.0384206101 0.0971984193 -0.116058186 -0.120040804 -0.0705016181 -0.10306605 0.100996941 0.104779199 -0.0833229199 0.0132066812 -0.131049663 -0.0160818845 -0.118435718 -0.0491212681 0.0483348295 0.0081577599 -0.0334057733 0.0873719454 -0.0852230042 0.1210372 -0.0395233259 0.0784760267 -0.0274248198 0.099436149 -0.081705071 -0.015222121 0.110525087 -0.0580340363 -0.0512353852 0.0699884966 -0.134925202 -0.00844763592 -0.0294794999 0.0049456358 0.0870961398 0.112957731 -0.12726216 -0.0223451219 0.053539414 0.0455328077 -0.0087382691 -0.0701403543 0.0741796196 -0.122097038 0.0159640387 -0.0312917195 0.10192579 0.0948835909 0.0600303523 -0.116103448 0.0967111215 -0.186777875 0.0394800454 0.0645573735 -0.00990641117 0.0682907104 -0.0206053555 -0.0376345441 -0.0776938945 -0.0846702084 -0.0824906975 0.020499425 -0.0137960762 0.180186674 0.0354575664 0.0700841099 0.0222972054 0.0324955657 0.130303159 -0.0262751058 -0.184336275 0.0281189717 -0.138265505 -0.0036136366 -0.0606828108 -0.013456936 0.112634584 0.105258964 0.176689893 -0.169644877 -0.0470958985 0.132185519 -0.175565004 0.0531695932 0.0104851555 0.0826452076 -0.145328104 -0.0922671333 -0.164871365 0.142606691 -0.0212450475 -0.0627536103 0.120814189 -0.0105575472 0.0765462518 -0.0688535571 0.0809823424 0.00905969739 -0.0526664332 0.0274341255 -0.065106079 0.106400639 0.0910326689 0.0318324715 0.00195610523 -0.0523022339 -0.0163692534 0.0768652707 -0.0259682089 -0.0942348465 -0.126349121 -0.122505806 0.0306945741 -0.113110162 0.0605172664 0.0169282854 -0.0441042334 -0.139085665 0.0421898365 0.0137649477 -0.0915340632 0.0716675818 -0.0818142742 0.0774355978 0.132591441 -0.0364979431 -0.112088569 0.025617823 0.0622905344 0.137828872 -0.128434256 -0.0762574747 0.039005056 0.0113735795 0.0853985548 -0.110001206 -0.122525297 -0.118933201 -0.0976378173 -0.00780165195 0.0592657812 0.127340347 -0.116591275 -0.00909214467 0.0285629407 0.137352273 0.0194081701 0.0885308981 0.157478809 0.00182641763 -0.111265883 -0.0360108428 0.0904895365 0.0231916904 -0.134599373 0.123875104 -0.0176251531 0.00123633444 0.118751198 0.0944036767 0.00726477336 -0.00553962262 0.0213748366 0.0605549626 -0.0671228841 -0.11890097 0.0446490161 -0.124758892 0.0125705721 -0.0418758988 0.0201296303 -0.0815314054 -0.0722059831 -0.0872977003 0.17252858 0.0650849119 0.105454117 -0.0630111396 0.0785340741 -0.0949152634 0.120920762 -0.00637194049 -0.122229263 -0.04851266 0.0649544969 -0.0367785469 0.0145572387 0.0296253487 -0.104165144 -0.0307542253 0.0980055779 -0.0429146662 0.0307714939 0.0965365469 0.0424363613 -0.0620633438 0.037256062 0.0731356591 0.0377854705 0.126446977 -0.0181031153 -0.10487172 -0.012096405 0.0115436465 0.00484970212 0.0365195423 -0.0648169369 0.0427874923 -0.0382243469 -0.131412312 0.0129759014 0.014330104 0.104675427 -0.0191158354 0.110161588 -0.0690229386 -0.134754956 0.0209327489 -0.137074128 0.101832643 -0.0448981151 0.0525557846 -0.0926448479 -0.0648545772 -0.118553191 -0.0258715078 0.0647533536 0.0853616297 0.0266875774 0.0280805677 -0.133945882 -0.131097019 0.0535267889 -0.0883116797 0.0746518523 0.0716847479 0.0854772329 -0.0682440624 -0.0383327305 -0.0687909126 0.0661910623 -0.0651709512 0.0676450729 0.042929098 0.0129889995 -0.0627579913 0.026080206 -0.0745798126 -0.110127226 0.0564449728 -0.0521587133 -0.0703866705 0.0996105373 0.0995502174 -0.0150131434 -0.093635723 -0.0350378379 -0.0970593914 0.122739777 -0.0746073425 0.00775636733 -0.0259913579 -0.0691226423 -0.0844953358 0.0796677917 -0.0647141263 -0.126704842 0.0870190561 0.110861555 -0.0944047272 -0.00367192924 0.0355183631 0.118708417 -0.0181086287 0.10595347 -0.00600086153 -0.00448402762 0.0546585321 0.0393356681 0.140889272 0.0358275473 -0.0797418952 0.0966726542 0.102656618 -0.0845542178 0.000538542867 -0.138633057 -0.103164904 -0.0017221421 -0.0267990902 -0.0042184745 0.0618429407 0.0372477919 -0.0600601025 -0.120651938 0.018602442 0.0741632134 -0.0556408912 -0.0401281454 0.0216008872 0.0986668468 -0.0385047868 -0.00614350522 0.0835534334 0.0581347793 0.0706503093 0.0540118366 -0.0558184311 -0.163868651 0.00549533684 -0.0525678769 0.0940028876 -0.0198096931 0.0326584801 0.0405591354 -0.0935613215 -0.0739186257 -0.032372281 0.131545618 0.076330319 0.0784656629 0.096952945 -0.0555465668 0.127968788 0.0291817039 -0.161188528 -0.0909612328 0.0471343175 -0.050026428 0.0184959229 -0.139395848 0.0397638716 -0.18348141 0.112185314 -0.0655662641 -0.0470221415 -0.164569005 -0.121025704 -0.0150856273 -0.00368866767 -0.0741992891 0.059470281 -0.0445875078 0.0394076109 -0.129359409 0.0192312244 -0.0108757932 0.0112697631 -0.146348611 0.159908772 0.0195346791 0.00573998271 0.106009968 0.0103269666 0.0211634543 -0.0780764073 -0.115655147 0.102710932 -0.0541914441 0.0466746576 -0.106848881 0.10031607 -0.134285003 -0.0872690454 -0.135360599 -0.0295483619 -0.105432019 -0.0230201259 0.0187110342 -0.10428597 0.184378833 -0.0646381974 0.0479477942 0.0756504536 0.101597495 0.10669633 0.0338435024 0.0337592065 -0.136492506 -0.0870145112 0.0574491024 0.0298079327 0.0783730522 -0.0157561749 0.0467197858 0.103480637 0.0719107315 -0.10105852 0.0388642065 -0.0440139845 -0.0164328683 -0.0481204912 0.14119412 -0.136799589 -0.0650587231 0.00970490556 0.0506416559 0.0616328567 -0.00483906409 0.00724408031 0.165032029 -0.124474898 -0.149660811 -0.0778745487 -0.127463415 -0.0629397258 0.0866350383 -0.135488585 -0.120086707 -0.0116316313 -0.0172481909 -0.033788152 -0.00575722754 0.0619941019 0.109511442 -0.1259799 -0.0296196118 0.0126680005 -0.00631114235 0.0279259682 -0.000546666677 0.046820391 -0.0707993954 0.0746022463 -0.0110969217 -0.106299125 0.0408700407 0.0607301854 0.0142738195 -0.117608964 0.125323534 -0.0797425956 -0.147771716 -0.0951133072 -0.0761706829 -0.0271511003 -0.0952035114 0.149354368 0.0850981027 0.12360011 -0.0736458525 0.0565205403 0.0375516564 -0.0330046788 0.0698090419 -0.0682012588 -0.108341932 -0.0916303098 0.0491649024 0.128733024 -0.0175282191 -0.0851026475 0.121217623 0.0644281209 -9.09119844e-05 -0.00583170354 0.0807056576 -0.0556110144 -0.030019151 -0.151016012 -0.0976619869 -0.0704679191 -0.140273213 -0.131401047 -0.0380841792 0.105461046 0.115149468 0.0569063798 -0.0500233844 -0.120028786 -0.0609620214 0.0207634512 0.121349677 0.0845320895 -0.00625681877 0.130006418 0.10405767 -0.0260085575 0.00623696856 -0.00945841614 0.100506075 0.0220730081 -0.125443459 0.0532133728 -0.120297104 -0.11440815 -0.0461563803 -0.0888359398 0.013649012 -0.00815679412 -0.070658952 -0.00767463259 -0.120047957 0.120935254 0.0590654165 -0.0607035644 0.0469796248 0.10308367 0.17886214 -0.0048859301 -0.0231490359 -0.127134889 -0.0811686739 -0.0198651105 0.0723841488 -0.00580265373 0.0251238793 0.0345673561 -0.150929868 0.0692640245 -0.0050682174 -0.0870390087 0.0347174555 0.118515827 0.0921180844 0.0585382432 0.102678254 0.0418618806 -0.0638227612 0.0212153941 -0.048158478 0.067370899 0.134428993 -0.0895267203 -0.137082666 0.0383201912 0.0807136148 0.0119578699 -0.00565120764 0.051708404 -0.0704574063 -0.0806446597 -0.0455211401 0.128211096 -0.152886659 -0.126107663 -0.174904436 0.170342699 0.0492694043 -0.016915286 0.0414748713 -0.0318201743 0.103975341 -0.0692306831 -0.0701901168 -0.136825696 -0.104924172 0.0634303093 -0.103916064 -0.106038429 -0.0103737917 -0.102413662 -0.0131952306 0.0928339362 0.0422227457 0.0485036634 0.100036606 -0.0334572345 0.0251479615 -0.0170369614 0.105127081 0.0789836645 -0.125357226 -0.0810341984 -0.119952716 -0.087011233 -0.136233181 -0.145835862 -0.139367864 0.0879707336 0.118222609 0.0654330924 -0.00252592564 0.0332888886 0.0847103074 -0.147119001 0.13717629 -0.0333771855 -0.0392536968 0.140056893 0.161733225 0.170547694 -0.13669911 -0.126283079 0.136217371 0.0222201925 0.100318342 0.0159461573 -0.151088864 -0.0795981139 0.0589573346 -0.00727232778 -0.0690927505 -0.00238380092 0.00583820418 -0.0911569446 -0.131048679 -0.117706373 0.0648147985 0.00487408834 0.117360242 0.170240089 0.118482806 0.0110958666 -0.0510480367 -0.0351133123 0.059696164 0.0868533999 -0.02283502 0.157529563 0.0911459476 -0.130911916 -0.0573937744 -0.116225123 0.0283462927 -0.0848590583 -0.00989816617 -0.0507650711 0.00583241554 -0.00466799736 -0.0543093197 0.123912387 -0.0726056844 -0.0514792576 0.178007007 -0.0574177206 0.0999287218 0.08989916 -0.0277903583 -0.144622609 0.0931683257 0.140252993 0.0123335114 -0.119432166 0.001549048 -0.00325248647 0.096072562 0.162840962 0.0217829365 0.122066244 -0.0385645702 -0.026368469 0.0513184667 -0.0859575272 -0.0242716596 0.00956724584 0.0109226704 0.0818789154 0.0224322379 0.135026619 -0.04657121 -0.104234524 0.0428646505 0.0151414573 -0.0313612148 -0.00296355784 -0.0544822216 0.129524395 0.0263857096 0.0820539892 0.0624815822 -0.0385384262 0.111337319 -0.0279020891 0.0234304219 -0.138835654 0.0737862438 0.0558309704 0.106782034 0.0583992153 -0.0247403383 -0.128191724 0.0889156908 -0.0910850763 0.0517035276 -0.0938775688 -0.0293845311 -0.120674253 -0.0451591834 0.0491400808 0.0255339593 -0.109320991 -0.0378039181 -0.0312750563 0.0803458393 -0.139691994 -0.00930851698 -0.0976287797 -0.114196926 -0.133310482 0.141642928 0.105140746 0.119025633 0.0663658231 0.0742790997 -0.0807274282 -0.049172353 0.0325890929 0.168383315 0.0958275571 0.0830662847 0.13090989 0.11515788 0.0930101275 -0.135337189 -0.01777054 -0.0463960879 0.121989891 -0.0663873479 0.0556551777 -0.0777027011 0.0780093744 0.0564488322 0.0328528732 0.0221739244 0.0690468401 0.165196538 -0.0979238898 0.0833290517 -0.0583337545 0.171756044 0.0485463925 -0.0788506195 -0.133651629 -0.0622514226 0.146948621 0.00967819989 -0.10787762 -0.0629694313 -0.0672841221 0.0454268195 0.153374791 0.0254830644 -0.107513458 -0.0417494588 -0.128024951 0.0972493291 0.0891800448 0.0753429011 0.0632758364 -0.0226834938 0.0985851139 -0.0661747381 -0.0974865481 -0.116021931 -0.0705863535 0.0088367667 0.11096172 0.00588925183 0.0319114141 -0.0778467134 -0.0426055863 0.0243966766 0.00911161304 -0.125340477 0.0958841443 0.0809076428 -0.0837767944 -0.0955800563 0.0739903226 -0.0197054464 0.0984792486 0.0248065591 0.013472463 0.0424549654 -0.115080677 0.0406080261 -0.0103811678 -0.00163237448 0.124697939 -0.0250684526 -0.109149016 -0.0914341062 0.0849946067 0.114163592 -0.114370003 -0.0137929916 0.0836040005 0.113473631 -0.0115816081 -0.00375672383 0.145472392 0.115759097 -0.0200298876 -0.0216319505 0.0506976917 0.146452606 0.00109191891 0.0250434522 0.125239 -0.070178017 0.112422191 0.128775164 0.0972625315 -0.0562150516 0.0652283952 0.0582313985 -0.0210614875 0.0146073569 -0.0898632482 0.00922326744 0.143344715 -0.154285237 0.120073751 -0.0218681768 -0.0313799977 -0.0713483468 -0.11174649 -0.0150706414 -0.0665470064 0.0984330028 -0.0607178248 0.00206361711 0.144247591 0.114495434 -0.0467930511 0.0812650472 0.126196146 0.155254051 -0.117046624 -0.00681339065 0.110044703 -0.130152121 0.0844703615 -0.103106052 0.07677605 0.136585757 -0.00327930111 -0.0542289279 -0.0169889219 -0.00491619762 0.00191336707 0.123525828 -0.0764942095 0.062074706 -0.118892998 -0.152141303 0.128834948 -0.00888511073 -0.128381923 0.166257232 -0.118136637 0.104106829 -0.00407020096 -0.0890831947 0.0344172269 -0.0881365165 0.00957617164 -0.0420764349 -0.11916101 0.0259462018 -0.12878786 -0.11673443 0.0500368178 0.123366237 -0.118328013 -0.0965624005 -0.124413118 -0.0506703407 0.10639628 0.118844062 0.136438951 -0.096905753 0.0549481958 -0.0769049451 -0.0224119276 0.0397996418 0.0951755494 0.113283962 -0.0366398245 0.106758043 -0.153018385 0.116750017 0.0242051464 0.0555575825 -0.144076809 -0.123421922 -0.0913296789 -0.0159076471 0.0744441524 0.129337355 0.0706739873 0.0101508312 0.00735373795 0.127268285 -0.108703181 0.0155255729 -0.0134423403 -0.0967226699 0.0832142085 -0.174966842 0.130565166 0.0653796941 0.0878034979 -0.0906267166 0.0458128788 -0.00101685664 0.0771127343 0.0380266793 0.0676126033 0.119121142 -0.113756225 -0.044930473 -0.0590678304 -0.114111833 0.0717334226 0.121548906 0.141148821 0.145141944 -0.0386701077 0.0908232033 0.0557151176 0.0785376281 0.0665990561 -0.026476074 0.0890998095 -0.0765963942 0.00731649436 -0.0525127202 0.0552615821 0.0508936346 -0.0130886035 -0.136101693 0.0808141679 -0.0865692198 -0.0861371309 0.120797709 0.14255853 -0.0736281499 -0.110956788 -0.0413296558 0.132970884 -0.128850982 -0.0727135316 -0.0477897227 0.0840527937 -0.0905833915 -0.115051724 -0.0579792745 0.111140922 0.10985522 -0.0506750494 -0.0162282735 0.0716301352 0.0544709153 0.12016838 -0.0960830227 0.0585822612 0.0589075349 0.0988149717 0.0618340187 -0.0847306624 -0.0631039515 0.0553748012 -0.111075364 0.0433333553 -0.0188259855 -0.112079769 0.0795213431 -0.0475602113 0.13167055 0.022298038 0.134109125 0.0383149795 -0.0405841693 -0.120256543 -0.104634985 -0.0977489278 -0.020790996 0.105706528 0.133288711 -0.00679467805 0.107927233 0.132649049 0.184538066 0.0214475244 -0.0248829424 0.0847060084 0.059696611 0.0894906074 -0.0552571602 0.110768363 -0.0777745917 0.0256617665 -0.0267863441 -0.011260841 0.167986751 -0.0874950364 0.191600099 0.145262077 -0.0928313509 0.0274879988 -0.0514709651 0.09432742 0.120864995 0.0269548818 0.00873297453 0.0555894822 0.0474308766 0.10841053 0.0247467272 -0.0354869291 0.0167415757 0.130794838 0.130202994 -0.105830118 -0.0619872436 0.118844941 0.0582736246 -0.0529267974 -0.122846067 -0.00995433331 -0.0649164692 0.0907461941 -0.0287418552 -0.0419690758 0.0771300942 -0.0686131641 -0.137708366 -0.0504630506 -0.0929714441 -0.105465375 0.128161833 0.0110401921 -0.102053583 -0.137259245 -0.106930703 0.116477638 0.0865236223 0.0225237608 -0.10337396 -0.141124219 0.0859540254 0.124468133 0.122546621 -0.13006793 0.0637582615 -0.111786462 0.0577507317 -0.104083821 -0.0128176091 0.165550634 -0.0433670357 -0.0197649412 0.0431121029 0.0775365457 0.0529541112 -0.094568193 0.121677496 -0.0553900301 -0.111113667 0.0546647757 -0.102778159 -0.11795371 -0.0317932889 -0.0803981498 0.0593941696 0.0496674031 0.115015298 -0.0528469123 -0.139373824 0.0843660235 0.139188275 -0.00414940715 -0.0868603438 0.13721244 -0.111397579 -0.159685537 -0.082697615 -0.104306176 0.126542285 0.105239764 0.107114151 0.177761674 0.0128696105 0.0826148763 0.058955878 -0.104517892 -0.0758889616 0.09818995 -0.0577240698 -0.0954538211 -0.00908805989 -0.0742976665 0.0133352363 -0.0486188494 0.110651046 0.142552137 -0.115153261 -0.0301514734 0.063916415 0.15691933 0.0455034524 -0.099480927 -0.0727156326 0.0517707281 -0.00254492369 -0.0865125954 -0.0609851852 -0.0927319676 0.0186524875 0.0648534745 -0.106417365 -0.12526378 -0.0807953104 -0.112901188 0.0167454872 0.0380413979 0.0550592877 0.121599279 -0.110051796 0.02049114 -0.113943994 0.0120341163 0.054577291 -0.0282275155 -0.0312832892 -0.14506188 0.0298683271 0.0238241255 -0.109261803 0.156495273 -0.132307068 0.0377000831 -0.194822595 0.0180359278 0.0903957933 0.0792595521 -0.112396851 0.181946352 0.131865382 0.131673768 -0.0266070645 -0.0772341341 -0.152235314 0.00981780421 0.021913426 0.128900573 0.0193908215 -0.0176257994 -0.0472896919 -0.0430161357 0.00791153312 -0.00393258035 -0.0942965969 -0.121386364 0.0414898992 -0.108187407 -0.0423817933 0.0773015916 0.0430543721 -0.140809581 -0.0302241296 -0.0291004777 0.137758568 0.118799791 0.0682975501 -0.0604750663 0.0137153119 -0.132459 -0.127778888 -0.0843767822 -0.0346240923 0.109940037 0.0166125298 -0.0546926185 0.0908764154 0.021722734 -0.098706007 0.0907239914 0.0898113251 -0.0827805996 -0.0991119295 -0.140943363 0.0492345244 -0.0119397789 0.14030765 0.103607342 0.0925647169 0.0516325533 0.0379345268 -0.0236282125 -0.12176773 0.0705950111 -0.0243805572 0.0545013547 -0.0887916982 -0.114718959 -0.0397065133 -0.0364263579 0.0644538626 -0.0560083427 0.00641170144 -0.115581721 0.0916448981 0.165714934 -0.0800587684 -0.139642954 -0.147206143 0.14433305 0.121563576 0.124653377 0.00290819933 -0.00689320266 -0.134743109 -0.0199477952 0.025890775 -0.097605601 0.144976184 0.0240392517 0.106600702 -0.0212116838 0.141039237 0.0561088696 0.0311461743 -0.0972645953 -0.072422564 -0.00339891389 0.0796980709 0.104762718 -0.000878902618 -0.132795557 -0.0110478699 0.0181016214 0.033460509 0.16719529 0.0324967206 -0.0523595363 -0.0596223697 0.00182852231 -0.0620629825 0.0593552999 -0.0177798048 -0.134050861 0.091143012 0.0613186061 -0.0134655545 0.0728626028 0.11032062 -0.110704623 0.113462664 -0.125822559 -0.0217845738 -0.0629318058 -0.110271119 0.0102175586 0.0384269953 0.041528672 0.117570601 0.0818911791 -0.172721431 -0.0826341584 0.143604502 -0.0946950018 -0.0604351908 -0.145674467 -0.0840071887 0.124765471 -0.0483974069 0.0704860687 -0.0513613969 0.102030285 0.0974786878 -0.122555755 0.0484942682 0.120798931 0.0460326076 -0.158095554 -0.174045458 0.0302520841 0.00143720582 0.109413154 -0.089490898 0.0403371304 0.0639648885 0.0195339117 0.0851712674 0.0805694312 0.14608863 0.0028625133 -0.0460382551 0.133578837 0.0474288128 -0.00900612772 0.0885348395 -0.0367066041 -0.0723806694 0.11516311 -0.118180014 0.173740089 -0.146156281 0.0717909113 0.0275212824 -0.00858630054 -0.0898427665 0.0496289805 -0.00977645814 -0.000550225377 0.107040554 0.00615952769 -0.0177549198 0.00348424283 0.0556790046 0.00946377218 0.0532293469 0.065482147 -0.00725790858 -0.0178769901 -0.0931927189 -0.00408215867 -0.0455512553 0.0916866064 -0.0685584769 0.0127655407 0.153180107 -0.111894742 0.0765284225 0.0260704271 0.166822523 0.0185323507 0.0651063919 -0.0187719949 0.124089167 0.060685847 -0.0775963366 -0.0125377784 -0.0656027719 0.0646102652 -0.168868825 0.0715800896 0.0879634544 -0.0288382769 0.106939703 0.0860616416 -0.147091925 0.0701762438 -0.154209405 0.0168217402 -0.0449583791 0.150799006 -0.0704786852 0.0240108818 -0.0355115384 0.0358552337 -0.00473457575 -0.0508537143 -0.141223907 0.0694443733 0.13417317 -0.0782108009 -0.101270229 0.0261989981 -0.0686776191 0.0585096031 -0.0421225578 0.112916127 0.086612314 -0.0666391104 -0.0785085559 0.0723554343 0.0139737725 0.0708736479 0.104420915 0.0537221283 0.0645926893 0.0457919091 -0.0992759913 -0.0648162961 0.0873944908 0.135357454 0.0170427561 -0.111893699 0.118674949 0.0242431909 0.131864205 -0.0160110891 -0.0652568489 -0.136016116 0.109248772 -0.0484858006 0.0772848427 -0.14131242 -0.0729296431 -0.0393306315 -0.136528164 -0.0325899497 -0.138895422 0.0920817852 -0.00506219268 -0.0218565464 0.178741023 0.000478784292 0.131854966 -0.0545493215 0.0215138886 0.00403119158 0.0818745121 -0.0696112365 0.0600213483 -0.0925471112 -0.0903580412 -0.0477291904 0.107568391 0.00575149059 -0.0782493427 -0.139606044 -0.104121134 0.000853225589 -0.0665216073 0.0340940468 0.089630641 0.105736643 0.0541669875 -0.0017753446 -0.0978966951 0.0146369338 0.148612946 0.0921797678 0.136579424 0.0460715145 0.0911454633 -0.128372431 0.0109238625 -0.0441332199 -0.0767628923 0.0842499733 0.0932835937 0.0461514592 -0.0389378071 0.0933182985 -0.0622017495 -0.150734246 0.0955395252 0.0519403517 -0.0935698077 0.066866152 -0.0563694201 0.0773465633 0.130469427 -0.0873394459 -0.0453016274 0.108205438 0.113830492 0.0172546878 0.142020121 -0.154692054 -0.0341549404 -0.0286441967 0.109917641 0.0980234817 -0.0668107346 0.184589744 0.0403014049 0.0617943406 -0.0170874391 0.165723398 -0.129302576 -0.0549281389 -0.0933095962 0.0417284593 -0.0645934939 -0.110736929 0.0152139366 -0.0576914884 0.14489913 0.0873956382 0.122595996 -0.100744717 -0.0429090895 -0.0427098498 0.00572537677 0.0862406343 0.0775141418 -0.114131004 0.0538300015 -0.133984268 -0.0513852537 -0.10520786 0.0823375359 -0.154663965 0.140852332 -0.12760137 0.0852351859 0.0137166381 0.107333735 0.0452813096 -0.0948897004 0.151388273 0.0713874474 -0.128167778 0.0409513377 -0.0138035901 -0.124753937 0.0356289335 0.0602589771 -0.0651701242 -0.0210708473 0.103122458 0.0906714946 -0.00217249128 0.122800052 -0.181580707 0.116333932 -0.0186058432 -0.0243001878 -0.113380618 -0.118654899 -0.0473651513 -0.0539538004 0.0831884071 -0.146917343 0.087518312 -0.0194211677 -0.0846155882 0.113477409 -0.136310667 -0.0191066563 0.143494949 0.112203613 0.114737526 -0.111948609 -0.00443618372 -0.0773533285 0.136251882 0.0225571636 -0.0362712741 -0.0346994996 0.114932373 0.0432204269 -0.0263436884 0.107738495 0.076974012 0.0996974856 0.119173273 -0.000754479493 -0.120190717 -0.194255233 0.0754887313 -0.098057352 -0.0132691478 -0.179493576 0.0841401368 -0.0858590156 0.0584929027 -0.0300332811 0.123049341 -0.0501776785 0.0645771027 0.147005603 0.0247138739 0.0643707812 -0.163929448 0.106429175 0.0111500323 0.00196518004 -0.136184648 0.0189471096 -0.0814004242 -0.045225054 -0.0670835301 -0.0842032209 0.0329777375 0.0835169405 0.0539242215 0.0254002046 0.0783123672 -0.00774729624 0.0359950811 -0.0802180246 -0.107105985 0.00407534558 -0.00570665905 -0.0215339214 -0.0752450898 0.0079908995 -0.051493451 0.0471975654 0.137056708 -0.0891998485 0.0102625685 0.074707374 -0.105349854 -0.013545217 0.0585588515 0.123852111 -0.0915067494 -0.0506767295 0.146273047 -0.0317402482 0.0230212267 0.0921593308 -0.147391126 -0.102736384 -0.100691356 -0.0281435605 0.0503647961 0.0425743312 0.0990781933 0.119182095 0.0459855348 0.0305577312 -0.123558313 -0.0513050854 0.0112928003 0.00887593627 0.126145825 0.00219893456 0.0915814042 -0.109302096 -0.15622583 -0.13981463 -0.00616870821 -0.0105179995 0.104944795 0.0233698469 0.0621656775 -0.134819224 -0.0790077522 -0.0676694512 -0.118123025 0.0649551898 -0.0268536378 -0.128346384 0.0998368114 0.0453064777 0.0186656322 0.082455948 0.0146208499 0.0852921382 -0.034911897 0.0727281496 -0.0483980887 -0.121127099 0.0403252244 0.063668929 0.0448285639 0.119639181 -0.047936447 0.0020842955 -0.093942605 -0.104747362 0.0033461235 0.034542352 0.0912363082 -0.0369631052 -0.0812179595 -0.0106220422 0.00214873254 -0.0435321555 0.0224560201 -0.136056453 0.0154713243 -0.0294029415 -0.030266948 0.0689932555 0.0570425093 0.098155126 -0.0518679842 0.119383372 -0.138322964 -0.00904059596 -0.106911823 0.103631556 -0.103188537 0.00235909224 -0.0282520652 -0.0132444557 0.0874274075 -0.0390356034 -0.0197433233 -0.0990931988 -0.0221592877 -0.0945370346 0.0292591304 0.0865644217 0.0864166915 0.100083038 -0.0889186412 -0.115894549 0.102965675 0.0463272929 -0.134802818 0.0737079829 -0.129007578 0.0247757733 0.0989635438 -0.11343433 0.134549081 -0.0447648764 -0.0838385448 0.0168119203 -0.0835036933 -0.0518646985 0.0785566717 0.0304447822 0.0878861994 -0.0602087826 0.0966850668 0.00694012549 0.150681928 -0.0779627338 -0.0904122442 -0.0970596001 -0.129838735 -0.050377138 -0.120469227 0.0891878232 0.0898670554 0.0890591964 0.0588300936 -0.00137163699 0.122232124 -0.0888908282 0.091505006 0.0884710699 -0.00118646026 -0.0513543747 0.120835468 0.10668838 -0.123527482 0.126270682 -0.0198479965 -0.136215568 -0.120502472 -0.11477147 -0.0835626945 0.112296805 -0.10101068 -0.0431131609 -0.0186134148 -0.0433320589 -0.000781024981 0.104710743 -0.0381170735 0.0791320279 0.0748666972 0.121473044 -0.0812482983 0.12976031 -0.0290680751 -0.00869207084 0.134282753 -0.124510139 0.0872084498 0.0703281984 0.044497788 -0.0438723378 0.0582895428 0.0864354447 0.00991265662 0.0587073565 0.0693353862 0.0946473181 0.132393822 -0.0691181794 0.114411987 0.0981440097 0.0721710473 0.0940810665 0.0939522609 0.0250950288 -0.00488990825 -0.0450708121 0.00825079717 0.0511284098 -0.0437048152 -0.0188587848 -0.0834606662 0.0222276002 0.115497097 0.0385790952 -0.0981816575 -0.0865187049 -0.0519928411 0.0127416467 0.0403454639 -0.102931798 -0.0622832291 -0.176352426 0.0319424681 0.115145199 0.137701288 -0.160753131 0.0574897826 0.029381264 -0.106967442 0.135685384 0.0374634452 -0.0107224155 -0.0305843167 0.0789548606 0.0807328969 0.116793022 0.0109361755 -0.116716683 0.0137034692 0.0524121523 -0.0414179638 -0.0828967467 -0.131809413 -0.08836063 0.0339427441 0.00510858931 0.102733135 -0.0078022778 -0.0649038255 0.113678351 0.0853035152 -0.0652009472 -0.105958655 -0.0926674381 -0.124525182 0.0650760978 0.0134403473 -0.0214727018 0.0792901665 -0.0695419237 0.00138363184 -0.0841140822 0.00448186696 0.011020178 -0.0218410827 0.124228075 0.135860786 -0.0192789454 -0.10124626 -0.0194821134 0.0967154875 0.14184539 0.0926100463 0.0980347469 0.00184068806 -0.095286414 -0.123941429 0.025722323 0.11833895 -0.141884103 0.117436834 -0.0285835806 0.129173145 -0.0291574448 0.0875629336 -0.12026988 0.108618274 -0.0268362314 0.140304998 0.0205702633 -0.0184959918 -0.0276578963 -0.131651908 -0.0468064025 0.100602806 -0.0214187056 0.121467933 0.132986173 0.0596923828 -0.100520998 0.0469026566 -0.0225379243 -0.124888517 -0.0528000593 -0.119780593 0.103240386 0.0283819586 0.00888063014 -0.0159017146 0.0695591718 -0.0311708003 0.0383356661 0.0305758268 -0.0846125856 -0.133274123 0.0758237839 0.0325065106 0.0458696932 -0.101240918 -0.0143970549 -0.0982608497 -0.111203283 0.0112188458 0.0285433233 0.0977355093 0.0379961878 -0.0932414755 -0.137481123 0.0168030262 -0.130169049 -0.125621766 -0.0568111241 0.0422300845 -0.025392022 -0.0273560993 -0.0488719195 0.00497778924 0.0536593273 0.0217000954 -0.0605572015 -0.0740404874 0.117026515 0.142124146 -0.102213182 0.112507164 0.077157028 0.0520981103 0.105322793 0.126890764 0.0796572641 0.029000476 0.0621110015 -0.112644024 -0.111841545 0.114291631 0.119438633 -0.146059752 -0.140381634 0.12208508 -0.0593487211 -0.123670183 -0.0897043496 0.0858117491 -0.00866622385 0.081977047 0.0776127875 0.0893446133 -0.0835789144 -0.0501656011 -0.00162425637 0.113464631 -0.0252724476 -0.103041545 0.113613687 0.101202324 0.100906543 -0.126668274 -0.0983274356 -0.0464979149 0.151394174 0.0900189281 -0.0734032542 -0.0592180528 +tensor_12weight 2500 +-0.0283639673 -0.0221012682 0.133199289 0.0563756078 0.0816197246 0.0497825928 0.0042012888 -0.0795029998 0.0960107818 -0.0432373807 0.121810496 -0.0352233462 -0.00614713971 -0.0204377715 0.0226566363 0.0332049951 0.0991061553 -0.101053268 0.0533379503 0.106147163 0.0264711361 -0.0589281917 0.0324204192 -0.105546504 -0.00826245081 -0.0233381912 -0.0656121224 -0.107647173 -0.0529786088 -0.0139337238 0.0566528141 -0.0541924387 -0.138034001 0.00951558538 0.0105655985 0.127043515 0.0221604444 0.0460083932 -0.053250242 -0.078631945 0.0428880826 0.0788966715 0.0724399239 -0.0252473392 0.105300531 0.00663374923 0.103679053 -0.0542423241 0.00125868618 0.0159070492 0.0878015533 -0.0953141749 -0.00558294635 0.121205859 -0.0582696423 -0.0979145169 0.129265517 -0.135002077 -0.0146291191 -0.122696914 -0.081385985 -0.0635085255 -0.0441100746 -0.0792573318 0.0985748172 -0.0419243202 -0.0581838712 -0.0535519533 -0.142513841 -0.0591027141 -0.137103438 -0.106035508 0.0540995859 -0.00673660636 0.0947093591 0.080907613 0.101540178 -0.0970778465 0.044728864 0.00199462846 -0.138367772 -0.0271480009 -0.141949549 0.133446857 -0.0284297504 0.11598064 -0.139251098 -0.11250446 -0.0144105088 0.0116322637 -0.0658263117 0.00760472333 0.00838519912 0.0230650119 0.107505634 0.0946382433 -0.04299891 0.0719103888 -0.102656573 0.0685082525 -0.0726309568 0.0508401543 -0.164903283 -0.00989604276 0.0877222568 -0.0414222702 0.0155824088 -0.163643703 -0.127258003 -0.036953602 0.144812256 -0.0623725951 -0.0524286106 -0.0514523163 0.149055287 0.151022196 -0.0676416233 -0.00197385135 0.114244364 -0.0560362265 0.0799408183 -0.0222610235 -0.0891805142 -0.00829313789 0.074500069 0.118379205 -0.0137284929 -0.00101503101 0.153622374 -0.0226939153 0.00704781711 -0.0985070691 0.0757866055 -0.0784802958 -0.0075249183 -0.158581018 0.000263459544 -0.0682379603 0.152031437 -0.0480913743 -0.127841696 0.0475812815 -0.0938547626 -0.0747125596 0.108499467 0.138318345 0.149815157 -0.167665958 0.0575088561 0.0208468586 -0.0716508031 0.0452511907 -0.137918025 0.0906910673 0.0187588073 0.024338223 0.0748084113 0.102134176 0.0844082534 0.0736728013 -0.0625494495 0.158075228 0.167300254 0.124639682 0.105899356 0.0630086958 0.131153017 0.112854511 -0.00915049016 0.166930482 0.0416866131 -0.123647667 0.0432391763 0.10396602 0.110268258 0.113732249 -0.0752861872 0.174172521 -0.102077015 -0.0965104327 0.0681533068 -0.00983810425 -0.106261119 0.0500092097 0.0385275073 0.023127025 0.0940388143 -0.0235950071 0.0871616006 0.0998354703 0.0826614797 -0.100802064 -0.0113284597 -0.125418305 -0.0762608498 0.0396648198 -0.0408090092 -0.11062713 0.0179210901 0.030816406 0.0176397078 -0.0463601053 -0.0776805356 -0.0160311256 0.157598197 -0.0891788378 -0.0539689176 -0.095866017 -0.06246797 -0.0310623348 0.0908927321 0.0408000574 0.0765677392 -0.0167786833 -0.0754223838 0.0956952795 0.114675142 -0.108119257 -0.0605223328 -0.024680933 0.0278924722 0.0077753365 -0.0282166582 0.0750903338 -0.0393958427 0.124427982 0.0725398734 0.143909976 0.0277911164 -0.0190528724 0.00227800012 -0.0566477478 0.00833356939 -0.0737289861 0.0828023031 -0.0422451794 0.111898191 -0.134707883 0.19176881 -0.0650343299 0.153366417 -0.132717431 0.0561521985 0.112367474 -0.107373334 0.0557826981 0.139610589 0.0258726813 0.102975905 -0.141169146 -0.0615244322 0.0394206047 -0.0578120835 0.00785397924 0.0674745068 0.0853137672 0.175648615 -0.0636080429 0.155097261 0.111320585 -0.172565565 0.0217328835 -0.0781033784 0.132392913 0.0711862743 0.0581634976 0.186962739 0.0883171558 0.0591275692 0.0622355938 -0.00711058732 -0.0328418538 0.0393970683 0.0599646643 -0.0935084671 0.11538434 0.00435523642 0.0496765226 0.120068103 0.150854304 0.125003979 0.10406445 -0.0222269446 -0.0942721814 -0.05046333 0.0410163514 -0.0161274076 -0.033829648 0.0972495377 -0.143943429 -0.0443067737 -0.0550568588 -0.0995942876 0.055709511 -0.0130466223 0.16307734 -0.0587146431 -0.0876527801 -0.0571654439 -0.0653208718 0.0995535851 0.110708371 -0.117773779 -0.0629195049 -0.0506410673 0.104000725 -0.0097075915 0.039908275 -0.169400364 0.116414152 0.0159012116 -0.115527548 0.0599115118 0.0376613848 0.0347734354 0.149321213 0.0256413259 -0.0118766362 -0.00494059315 0.154646352 -0.0279375352 0.0956456512 0.128097624 -0.0107601006 0.102062821 0.0818155333 -0.00417978317 0.169782162 0.075039342 -0.132441014 0.136600628 -0.074167937 0.182788104 0.0683773085 -0.158134758 -0.153111815 -0.131043464 0.0933924839 0.149934262 0.0977778062 -0.123312928 -0.0546211079 -0.0884571895 -0.123066284 0.03769508 -0.0408919007 -0.0261739939 -0.0946909636 0.0763799548 0.10528186 -0.0239403676 -0.00170964003 0.107443631 0.0624295175 -0.041471377 0.0583784059 0.101691283 0.172578365 0.147721156 -0.00853029452 -0.0682957247 -0.128606334 0.108250916 0.110467754 0.0698471293 -0.10638088 -0.134169206 0.0335389711 0.0807912797 0.10768722 -0.0748209134 -0.0619279668 -0.0597048439 0.0238682833 0.0770351812 0.133448511 -0.116575532 -0.00418985356 -0.014257974 0.186535001 0.0766483098 0.0578167289 -0.1063518 -0.0533722229 -0.0255306661 0.0375191718 -0.0521368645 -0.0643548667 -0.100525327 0.087843962 0.0360278301 0.186081558 0.0779006332 0.124324918 -0.0178796574 0.0964507908 0.000115471157 0.121352643 -0.0145074725 0.136342749 -0.0524556786 -0.0543406755 -0.0231247786 0.0547336638 -0.110653035 0.00989100244 0.0201757103 0.168474302 0.0595019609 0.140253812 -0.0207650233 0.156596705 0.0423379913 0.0540750362 0.0326289497 -0.0725854859 0.120222606 0.0588764995 0.0232573915 0.156475991 -0.107084453 -0.109426022 0.120157138 -0.0353143811 0.109706894 0.0043564043 0.138297886 0.168600574 0.147256792 -0.00588848395 0.0737749934 0.0574953109 0.0112638986 -0.125408784 0.0741967931 -0.0695395544 -0.0118542425 0.0154474955 0.0319254026 -0.0753778815 -0.0449277535 -0.00750545319 0.170274869 -0.131167576 0.135590628 0.122695968 -0.0971804634 0.0734618902 -0.00931429863 0.0365690589 0.0510447621 -0.0520833731 -0.0832352266 -0.101591244 -0.0607022047 0.0549951196 0.127605066 0.0541120619 -0.0588162467 0.0998720974 0.0493049473 -0.0581545606 0.0780114084 0.0665595233 -0.0931976438 0.038903445 -0.0382999554 0.0369430333 -0.085522607 0.0509717613 -0.0687565655 -0.0897999555 -0.0971511528 0.138259694 0.116990075 0.00449900329 -0.0872562379 -0.138075918 0.096727103 -0.0454748869 -0.00835196674 0.0760231018 0.0369116217 0.0443069041 0.0586204678 0.134296641 -0.131746709 0.05054304 0.141017333 -0.0719901621 -0.0659282431 0.109382346 -0.129540086 0.118722782 0.0131379962 -0.10180755 -0.0842798278 -0.0826065615 -0.0181476772 -0.0728539303 0.0718309358 -0.0300796181 -0.120240092 0.133931667 0.0222150292 0.0798185244 -0.107362755 0.105332904 -0.135004058 -0.11729826 0.147456452 -0.0890139267 0.0949609131 0.13500765 0.126173526 0.11857067 -0.000773293898 0.0894765258 0.0475671589 -0.0154326251 0.15092206 -0.037826851 -0.049774427 0.103259467 -0.0269829631 -0.0261214655 0.0183449704 0.0224278755 -0.157012105 -0.0851792991 0.0859342813 0.112071827 -0.0393440984 0.137702033 -0.0219873022 0.115037508 0.0346440673 0.0699992254 -0.0236694999 0.164631814 0.0094958609 -0.153284445 0.040881291 -0.108448327 -0.110614225 0.0260498095 0.0183986761 -0.116771467 -0.00523093343 -0.0689058378 0.0228717029 0.034788698 -0.081005007 0.0377451777 -0.0307365786 -0.0821703076 0.0325528234 -0.14126493 0.0695137903 0.0801631436 0.0886292234 0.183120564 -0.0694635212 0.161076441 0.0139303654 0.108437546 0.0401087664 -0.0091292914 0.00868863612 0.141585931 0.156644121 -0.0755528286 -0.0417527556 0.00487269135 -0.0350182503 -0.126364589 -0.024675725 -0.0234753042 -0.00169406319 0.0142500447 0.0215916336 0.0767152011 0.0652107447 0.159517407 0.103661276 -0.0556443408 -0.0293452106 -0.0731868073 0.002619609 -0.0566306897 0.00929513387 -0.197862059 0.116043128 -0.00975279883 0.105451569 -0.0839810297 0.0331301987 -0.181696653 -0.0319507346 -0.00473482907 0.0933903456 -0.0281781182 0.00192525925 0.0756578669 0.160195202 0.0697703436 0.146909162 0.0564890578 0.00285601406 -0.0287020877 -0.0564336739 -0.0819117948 0.0810552388 0.165045336 0.147315055 -0.0212504752 0.10172905 0.0923355818 -0.0141675817 0.150093243 0.179681987 -0.114264801 -0.0956551284 -0.00638729148 -0.128433809 0.0788437128 -0.119181894 -0.0507362969 -0.115742147 -0.0738439783 0.0803774595 -0.0840121433 0.138585642 -0.0423021615 0.137064368 -0.0672038868 0.117374197 -0.142620116 0.0667178184 0.140385941 0.104921885 -0.107778296 4.28000058e-05 0.124074519 -0.114285842 0.0608676858 0.176241919 -0.0267177299 0.0773821026 -0.010434255 -0.0313907899 -0.0211786404 -0.0771455616 -1.91580984e-05 0.0954339802 -0.0113247158 0.103564784 0.17941153 0.166724324 0.0164197646 0.0291595794 -0.0204665605 -0.0382782891 -0.0668093562 -0.123122104 -0.132946268 0.160618961 0.0747604817 0.0837294608 0.169379547 -0.0834754705 0.0670293868 0.125993848 0.0960132033 0.00812351983 -0.0131131131 0.076757364 0.160401285 0.107536592 0.0723732188 -0.00439980626 0.0320614874 0.147804722 -0.123346433 0.138982892 0.0878069773 0.143579617 0.0674099252 0.0150862103 0.107429564 -0.0661900267 0.169957653 0.0881028622 0.0399612524 -0.0500341803 0.153780296 0.166395634 0.137741536 -0.106945679 0.0822954476 0.0795641989 0.135367706 -0.0886543989 0.00912791491 0.0881962925 0.00199659169 -0.0546213388 -0.0906977132 0.079190582 -0.0895646214 -0.135372177 -0.114347287 -0.0691716 -0.00326307118 0.0402559191 0.104741633 0.139453545 0.0585349798 -0.0482536629 0.0699717999 0.0140683651 -0.0911885202 -0.136688069 -0.0421462804 0.0418486297 -0.119360626 -0.132552981 0.139314875 -0.0619060844 -0.0730924681 0.0170933753 0.0174552351 -0.100706309 -0.0780835152 -0.0697649717 -0.0582113415 -0.0707645416 -0.104700163 0.0463543534 -0.00485464931 -0.0724216402 0.0457475185 -0.0357155427 -0.00419606268 -0.119270869 -0.0787308067 0.0156891048 -0.0961950272 -0.0582991764 0.0977203697 -0.0429823883 -0.0821526423 0.143807203 -0.0634690672 0.00810478069 0.0317232311 0.0819439888 -0.0359285586 0.145277902 0.0952888206 -8.69644427e-05 0.148746893 0.0504151061 0.130737811 -0.0782369152 -0.0521331243 -0.0100645348 -0.0266241431 -0.137800336 0.114405535 0.00846977346 -0.113788694 -0.0912239552 0.120060176 -0.0522767827 0.0220687613 0.069416903 0.0120533248 -0.050613381 -0.0949904099 -0.0282441527 -0.00488385558 -0.099041827 0.137989372 0.0805989653 -0.0629852638 0.0974306986 -0.0589808449 -0.0508747622 0.075892739 -0.112869091 -0.0507352911 0.153549612 -0.071098201 0.101641163 -0.0416162089 0.0734882876 0.0855877772 -0.0345149338 0.127689764 -0.0378296189 -0.0469297916 0.164389119 -0.0796598718 -0.118202388 -0.0146005005 0.168985084 0.160650715 -0.0404448994 0.155802146 0.025944557 0.139599308 0.0971967429 0.0117399581 -0.0171507876 -0.142275631 0.0839506909 0.0660035759 0.125143692 -0.0461359918 -0.116108619 0.098188132 0.119050659 0.0910999924 -0.117082357 0.0748248175 -0.0653233901 -0.0601715185 0.0412754826 0.105733104 0.0728395283 0.0162961781 0.131876916 -0.115422845 0.144570231 0.0887707174 0.097047694 -0.0720524788 0.0252056289 0.0271244925 0.130636543 0.0491141453 0.0332921445 0.113790326 -0.0568653941 -0.0698323101 -0.10968978 -6.08155533e-05 0.0675321668 -0.137371317 0.0749645829 -0.0216469616 0.0328196287 -0.141031533 -0.122319311 0.131579712 0.0271315724 0.12323828 -0.0254372582 -0.0824730098 -0.0256547406 0.1272306 -0.0528855324 0.12240018 -0.0112800747 0.0355034769 -0.0888580233 0.0970521122 0.0183172673 -0.0275907442 -0.122296013 0.0405875295 -0.112721227 0.0605996549 -0.124520272 -0.0803802982 0.0134015977 0.0546580702 -0.13796401 -0.0714464486 0.0842663348 0.124747202 -0.0913373977 -0.0796824321 0.125632361 -0.0140991956 0.139319405 -0.103938892 0.0555772036 -0.100091144 0.0524645001 -0.0437719002 -0.0133467764 -0.129776016 0.133946911 -0.0986202359 0.0850319415 -0.110841952 -0.061344333 0.00772999227 0.0248650014 -0.0392874405 0.133872464 -0.0498681031 -0.0235028919 -0.0904225931 0.0328456573 0.0908727348 0.063584283 0.0274092723 0.116862483 -0.0850711316 -0.00758437719 0.0703060776 0.0831446201 -0.130965278 -0.0187913738 0.0873690993 0.0466382094 0.10954687 0.132400319 -0.0466739088 -0.0960940421 -0.1589448 -0.0499225333 -0.14365828 0.147253916 0.114190049 -0.0489069223 0.00201576366 -0.0865763724 -0.141105622 0.0361168049 0.156169817 0.160132021 -0.0354782976 0.0171889104 0.00112317502 0.112893924 0.078825742 0.100391164 0.0169844106 -0.031357035 0.103728026 -0.0463408977 -0.00959085487 0.131976262 0.0656702071 -0.0224215165 0.0772374868 -0.102550328 0.130415276 0.0524659678 0.0614057928 -0.0569102988 -0.00330987363 -0.123422012 -0.0507996455 -0.111704901 -0.0851848572 0.0836961493 -0.0112077333 0.13896206 -0.0878892019 -0.0824800953 0.0448645279 0.0262479223 0.0815070346 -0.0392298363 0.0658551753 0.0590564199 0.137629583 -0.0485812463 -0.0806304142 0.0890918598 0.0108571658 0.0944422483 -0.0846425742 0.160814211 -0.0429416671 0.0703043491 0.0728700608 0.194119066 0.0854097977 -0.109207563 -0.0898918658 -0.0273189656 -0.116638198 0.0686772019 -0.109993316 0.124169983 -0.197762132 -0.0250630938 -0.0492028296 0.0160446316 0.174763769 -0.0346757248 -0.0763854831 -0.0731105581 -0.0112034082 -0.0417329669 0.0718449354 0.0531028807 0.0267944783 -0.125297725 0.0641857833 -0.0335034095 0.0876890123 -0.0756414309 0.023246035 0.0138799511 -0.00875826553 0.0961356089 0.0716646761 -0.0198832899 0.0437015556 0.0223694257 0.0413542055 -0.0606194325 0.0904049575 0.146203548 0.128786162 0.0143855959 -0.145219877 -0.0717869774 -0.0981839523 -0.0615584739 0.148408964 -0.102737933 -0.0149649279 0.109365121 0.0478291512 -0.10187453 0.00699617295 0.146613404 -0.033152815 -0.0324295312 -0.124276176 0.149597749 0.0922028646 0.157599032 -0.135646835 0.135939449 0.101161912 -0.0470223129 0.135992065 0.049443569 -0.173461407 -0.0964307934 -0.131441608 0.0878081992 -0.0803490132 -0.0302332956 -0.103354827 0.0431390628 -0.102742799 0.0908333436 -0.0526512675 0.0258093104 0.0369244777 0.0257188752 0.133202627 0.0297276527 0.149491176 0.151769906 0.0164624956 0.0616860129 -0.0109535027 -0.118965067 0.0411789566 0.1403061 -0.0148792714 0.0120514603 -0.0927679688 -0.0414527878 0.0476270206 0.0865987465 0.157360256 -0.00801904406 -0.0541693904 -0.0268263221 -0.109232292 0.0128868064 0.0780390874 -0.124661535 -0.0210585389 -0.14193961 0.111578748 0.036265783 0.0652774721 0.0353158712 -0.150081992 -0.0131793777 0.0812528357 -0.119403362 -0.0613792799 0.160303533 -0.191477299 0.0978740901 0.0488265157 0.0381616503 -0.057727918 0.0255572535 0.0918723196 0.0639780238 0.0967227072 0.128719524 -0.0644618943 -0.0716221407 -0.113643073 0.0597443692 0.133907422 0.132179052 -0.0287083741 0.161785662 0.0266503692 -0.0227328409 0.116025813 0.098081924 0.0945273861 0.121004365 -0.128996223 -0.16734314 -0.116783001 -0.125158042 -0.124759458 0.0583286844 0.0625823587 -0.14607048 0.0436232202 -0.0646699741 -0.0939132422 -0.0846700892 0.171912223 -0.00973419473 0.1652738 -0.0948991999 -0.061686486 0.168850407 0.0989598855 -0.111697182 -0.0287689064 -0.0542520583 0.0386746228 0.02370058 0.0738422796 0.0875272676 0.0808269605 0.13247247 0.017513141 -0.0532009676 0.0261921436 0.0148328589 0.12498486 0.0354332887 0.035208758 0.136238024 0.0110940281 -0.0153260306 0.0743869767 0.0171744954 -0.0469010063 0.0871724337 -0.119166553 -0.0623017251 0.0695702434 -0.0897610858 0.0566469952 -0.0424713381 0.0625269711 -0.063430272 -0.138267055 -0.0862576067 0.0633616149 0.0599713437 -0.0952118188 0.120180786 -0.0877415165 -0.0423083492 0.142752334 0.0664141625 0.0153184086 -0.11723125 0.0551554859 0.0941181779 0.0712193325 -0.103319407 0.0191577971 0.122941971 0.10161072 0.0123700919 0.133175269 -0.0438123122 0.00521303201 -0.0965816975 -0.0765956715 0.157058403 0.0214360245 0.0272503048 0.0736062229 -0.127181187 0.0797498822 0.00283462019 -0.0299958158 -0.177072033 -0.10098806 0.0592009835 0.111474186 -0.0195805114 0.0541746095 0.0449817032 0.0997726023 -0.0977167487 0.119134799 -0.0316534825 0.000842235982 0.10191527 0.117535733 -0.0239665229 0.126424983 -0.123411469 -0.075183101 0.0330423154 -0.0681335926 -0.0103695542 -0.0856132656 0.0537110977 -0.102201037 -0.077232644 0.0817721263 0.0940563828 0.0121499747 -0.0706892982 0.0818995014 -0.0357823521 -0.0557577461 -0.0687677711 0.0249025971 0.115824074 0.0820207819 -0.0299545322 0.154968947 0.146601692 0.0333185792 -0.0947523862 0.11622557 -0.00900522526 0.164223433 -0.00459069014 -0.0108394325 -0.102431804 -0.000589489937 -0.149016678 0.094403781 0.00958791561 -0.139076263 -0.00828526635 -0.0485995077 0.0503224507 -0.106602147 -0.0803155676 0.0535347834 0.134317338 0.116844706 -0.0210482683 0.153106958 -0.0599929169 -0.0479789935 -0.00114973786 0.0614023507 0.00115412858 -0.0867474899 -0.0715186149 0.0718893707 0.0815631598 0.0764374584 0.120969631 -0.0238302015 -0.0586408377 -0.172944322 -0.032943733 0.0524964482 -0.116896465 0.0810932145 0.0334346704 -0.146538407 -0.0801583529 -0.101430289 -0.0612037107 -0.113821179 0.00876645837 0.0436918586 0.00689579546 -0.126817837 0.0449476391 0.00970517192 0.120329946 -0.130458891 0.0930926949 -0.0741954371 0.117795736 -0.0513594598 -0.0421553478 -0.0799744502 0.139152512 -0.132767022 -0.148529813 0.0808773115 0.0915669501 -0.137642413 0.179301977 -0.0657531545 0.111276798 0.13818717 0.180973396 0.148477748 0.00805056468 -0.0999653786 -0.0432874598 0.14668797 0.00481138518 0.0158720016 -0.111080863 0.0309790596 0.0831020325 0.0419512913 -0.0200633295 0.0992910117 -0.0361859724 -0.0418536253 -0.0347556248 -0.055278711 -0.129016384 0.156187266 -0.0416366123 -0.0245922692 0.0343578979 0.0859458372 0.101438627 0.00994137488 -0.069522813 -0.0426072292 0.0350656547 -0.103549249 0.0265573412 0.0763911158 -0.102405414 0.0613944381 -0.122291684 -0.030453749 -0.0144146821 0.0219928026 0.128770724 -0.0348007977 0.176855937 0.114892505 0.0656236783 -0.00231774151 0.150578141 -0.0806376413 -0.0191067625 -0.112510107 0.0331190526 0.0922281742 -0.130172268 0.0489657484 0.0407428741 -0.112448193 0.131103024 -0.0824060962 -0.0440546162 -0.135340303 0.167595923 0.0455297045 0.146565259 0.106871687 0.0179066844 0.14257127 0.0931942984 -0.0602279194 0.114411339 0.0802091658 0.183811814 0.0961534381 -0.0782056525 0.0361536555 -0.0492672026 0.0595217533 0.107877143 -0.0258457195 -0.0297816172 0.11304047 -0.0520307608 -0.106525473 0.0179495215 0.050986968 -0.00489270268 0.0162587743 -0.137507111 0.0951974392 0.0486419611 0.117301539 0.0375222899 0.00458820676 0.00400207192 -0.0281724483 0.160230845 -0.0574774742 0.0331582278 0.0722234622 -0.0535472929 0.0631503314 0.0969888195 0.140119597 0.0477451496 -0.175573215 0.0213595796 0.0623874292 0.0219977442 0.123139918 0.0430315174 -0.0433251262 -0.107093088 -0.112723738 -0.000674394774 0.0406252891 0.0568238758 -0.10773351 -0.0489739999 0.0599360615 0.126524165 -0.0252056345 0.103718482 -0.00571722444 0.0458063446 0.163875476 -0.00559463073 -0.129122823 0.0351556465 -0.12400604 0.0866204128 -0.00551600335 -0.0378636234 -0.128071412 -0.0678838044 -0.0604350902 0.135361746 -0.0829056799 0.00692036748 0.00260563195 -0.0532157905 0.124754503 0.0440538712 0.0327834561 0.182058588 0.110708781 -0.0718415529 0.0142884664 -0.0505664833 0.111684635 0.0205106754 -0.115548059 0.0633464828 -0.099902004 -0.144687116 0.102363184 -0.0373336896 0.0391648747 0.108540595 0.171795741 -0.0586603731 0.135946795 0.0958023518 0.0913395435 0.104659595 -0.104977675 -0.136726558 0.101383127 -0.00297953957 -0.0918395743 -0.0204569325 -0.130254254 0.0839903727 0.0175899137 0.113330193 0.0912543014 -0.0047330535 0.123540469 -0.0720442235 0.039481502 0.163159758 -0.122090541 0.161992833 -0.116110057 -0.0345729962 0.180961043 0.13387315 -0.0200760309 0.0601475984 -0.124806568 0.155271858 0.0845254213 0.0596974678 0.126717508 0.0584589094 0.104401402 -0.0844913498 -0.106263012 -0.0435930751 -0.0393355973 -0.043946974 0.0409472808 0.0387777314 -0.0833237693 0.0790580288 0.00874889921 0.122026242 0.106449321 0.00690521859 -0.0539527871 -0.0490253009 -0.116141111 -0.0438183956 0.0866069868 -0.0664470792 0.162248239 0.161943093 0.104601003 -0.0271019768 0.0825799704 0.00950320065 -0.0404817834 0.129946649 0.114401944 -0.0974406078 -0.144606605 0.0822113976 -0.133282393 -0.0866103172 -0.161681429 0.0105247656 0.0808934346 -0.0963236913 -0.130843952 -0.0294511542 0.14981015 0.0511926003 0.119163141 0.0719184428 0.014378503 -0.0375215076 0.100146711 0.00126785447 0.159019947 -0.0868258625 0.0822456554 0.00716631068 0.0106834266 -0.0192997064 0.0986597613 -0.0639843941 0.0297827087 0.0358716287 -0.0587924942 0.0603018999 0.0780824944 -0.123953499 -0.0754288808 -0.00476129353 -0.0413157828 0.0502767749 -0.0340033025 -0.111571126 -0.100440137 0.147668496 0.0673740879 -0.0353276059 0.0640556961 -0.0988995805 0.136604816 0.140064046 0.0980866402 -0.0750294924 -0.137589633 0.043476589 -0.114663213 -0.0333486758 -0.0707642436 -0.0460274704 0.114950918 -0.0877812207 -0.0162663646 0.150960758 -0.126444697 -0.042077966 0.0971751958 0.0925146341 -0.0311614387 0.0668840557 0.114941254 0.0032322011 0.110740408 0.0407327525 0.127984926 -0.0671080053 -0.0279953331 0.0831837729 0.0103864418 -0.0418696329 -0.0798007697 0.00768780895 -0.0546894707 -0.0398143195 0.118723847 0.116604052 0.0362069607 -0.120131537 -0.0460622348 0.0910601467 0.161261335 0.132019728 0.0777907819 0.12057399 -0.0873341486 0.0300714727 -0.0391077697 0.139219776 0.034336701 0.0798435882 0.0177026708 -0.00687919557 -0.0386889726 -0.0851943567 0.120803818 -0.0706116483 0.0470289737 -0.00083668204 -0.0218703523 0.0288076419 -0.0316865966 0.111179002 0.088308543 0.0760286823 0.104716919 0.00950780511 -0.0447407067 0.098795779 -0.116593644 0.103690639 -0.0872254521 -0.0867426321 0.0982015431 0.170969442 0.0893125832 -0.052779343 -0.103205182 -0.0188849494 0.148354068 0.135574684 0.16014275 0.0423907042 -0.123855539 -0.0422023758 -0.0901324898 -0.155949607 -0.0723659694 -0.0916374624 0.0385438688 0.129645094 -0.0276183728 -0.0442992523 0.0822724849 0.12342529 -0.0557483733 -0.0643166676 -0.137550637 -0.0250330232 -0.111662775 0.175575733 0.015148961 0.0261206627 0.134495571 -0.0594918132 0.0758288354 0.060908068 -0.0519245639 0.0473323725 -0.127921104 0.0161799341 0.0626626536 0.0191916153 0.0295425896 0.00972762611 -0.0193439405 0.0721613467 0.0690132603 0.145383656 0.139039531 0.0707440674 0.0853963941 0.121891946 0.0830445886 -0.139111638 -0.046770677 -0.0729267821 -0.0370384678 -0.122914143 0.0188236907 -0.0119272592 0.131827608 0.0751277357 0.0265672356 0.102637075 0.000820508925 0.128561452 -0.113339409 -0.0267119724 -0.0484277457 0.0841864794 -0.0932453424 0.112891175 -0.0558898747 -0.118770018 0.107176155 -0.0351163447 0.0367854685 -0.0528843328 -0.0932976678 0.132041663 -0.0217739437 0.0115754455 0.0903767347 -0.0251083001 -0.0478500426 -0.122446015 -0.110440329 -0.0738081709 0.13881968 0.0556323677 0.113088712 -0.0600917004 0.0295646787 0.0281529948 -0.0955312327 -0.0372477174 -0.0413099937 -0.12244685 -0.0883221477 -0.0363030881 -0.0330142006 0.0267371461 -0.0137264878 -0.110913046 0.0730738714 0.186447442 0.190215573 0.109426335 0.144610614 0.092832081 0.0243124962 0.082650587 -0.0580552481 -0.0164464172 0.132549599 -0.16663675 0.176283285 0.0554265827 -0.0576385558 0.0210192334 -0.0107073868 0.0469353348 0.0421484709 0.20231232 0.112845972 -0.0282869264 0.0477520488 -0.0603832416 0.0570014864 0.14945437 -0.123605035 0.00659620762 -0.0773272812 0.139960304 -0.0898624659 -0.0288678277 -0.073680222 0.0669173375 -0.125848636 -0.0638360605 0.00877119321 0.146627396 0.100049771 -0.174804077 -0.0694195628 0.114080206 -0.035090182 -0.0557439104 0.0807942003 0.0843401998 0.0177999288 0.0371674523 0.035731107 -0.111021757 -0.093117848 -0.0678854063 0.0266885534 -0.00953813922 0.089507781 -0.106526807 -0.0295186415 0.00943453331 0.142027363 0.108211689 0.106748775 0.0579309165 0.0385677479 -0.100708753 0.151549757 0.0247560851 0.0252912976 0.0531664081 -0.123913847 -0.14173685 -0.0242508408 0.159124956 -0.113823667 -0.0568166599 0.0248767957 0.0976887569 -0.0333233364 0.0985455513 0.0487232953 0.0828748867 0.0737239048 -0.00177626917 0.0483965725 0.0487547442 0.0349344462 0.0642146096 0.135932118 0.139095858 -0.161683232 0.147549808 -0.0745932832 -0.0598408207 -0.0111973844 0.052948419 -0.133784652 -0.0361436382 -0.0810343474 0.113400467 0.0375890285 0.0919118151 0.165626541 -0.0969523787 -0.116375238 0.156546161 0.160234615 0.152540177 -0.0588590465 -0.082201235 0.151506081 -0.0245612338 0.108619809 0.0955060497 0.0378499888 0.0223950893 -0.0736716762 -0.0403732657 -0.112797014 0.153787464 -0.00461600721 0.0319394395 0.0467294194 0.0903732851 -0.155461773 -0.0431794003 -0.0157981049 -0.0684597194 -0.0530542284 -0.0374011174 -0.0614936724 -0.00120179285 -0.105804361 0.0348305702 0.146401137 -0.103180595 -0.0832952708 0.0653216466 0.112658091 0.107945248 0.107463151 -0.0334763043 0.137352556 0.0405373275 0.0836709291 0.0841100141 0.000271841243 -0.0795930102 0.115181461 -0.0337604173 -0.128911823 0.123057336 0.0785609409 0.0277274642 0.143650293 0.0181832965 0.126039356 -0.0717210472 -0.0782386661 -0.207162619 -0.0587309264 0.0823272541 0.11590194 -0.15862602 -0.110215195 0.119876325 -0.03551469 0.203044742 0.0244795568 0.10723605 0.0298106819 0.174269021 0.143131137 0.145505443 0.0387773216 0.0399972759 -0.0434709154 0.174700931 0.0863626674 -0.00607159734 -0.00721096992 0.181514055 -0.174008265 -0.106141508 0.0943884626 0.0748455301 -0.08260189 0.128811941 0.09916839 -0.0107264379 0.174222261 -0.116252743 0.108514942 0.0142451078 0.0903487951 0.0105539095 -0.00275873509 -0.0601309314 -0.103395417 0.0936504826 0.00442700088 0.0223080404 -0.0448008254 0.0983777866 0.0909713805 0.0360316932 0.00507651642 -0.0769937262 0.0967232734 -0.0641804114 0.122599706 -0.113460898 -0.107624725 0.117271051 -0.00697429758 0.0493416227 -0.127027497 -0.0720096231 -0.0119818123 0.0336597078 -0.0459525064 -0.0465637259 -0.0834386647 0.0638463795 0.0122022033 -0.0931649208 -0.00553575298 -0.0875594765 0.0826876462 -0.00695849955 0.00258775055 -0.128275678 0.0674737692 -0.0166282598 -0.101550914 -0.000357478857 0.136971578 -0.0781561136 0.0165070295 0.130231693 -0.0846520886 -0.0524368845 0.13351126 0.0329737216 -0.0871247053 -0.0810154378 0.127794713 0.0551171899 0.0123283863 0.0219221041 0.0886786133 -0.0899260417 0.113652535 0.092415683 0.148293018 -0.0763472393 -0.00241494132 0.0385320969 -0.0792235285 0.119529434 0.116140589 -0.00117489253 -0.0458484106 0.022996638 -0.0169208553 0.122886755 -0.123275951 0.00158014265 0.0272646137 0.0736831352 0.0474004149 0.114665695 -0.0364573114 0.021729935 0.107678957 0.0608051606 0.109061301 -0.0183101986 -0.143050343 0.011449188 0.0253396928 0.0116122691 0.169679016 -0.108423777 0.114302441 -0.0180101134 0.0376329049 0.000675914111 -0.0989059806 0.00174778071 0.0729235336 0.0997067988 0.0798274875 0.00286698341 0.0484239347 0.0732191578 -0.0997895151 0.0131101757 0.0049995631 0.0331563503 0.0908577889 0.0351340324 0.0972493589 -0.140762478 0.0144873261 0.0925962031 0.042752862 -0.0921942815 0.123944506 0.103000984 0.134866908 -0.0654911175 -0.131940141 -0.124962308 0.121972367 -0.0777393058 -0.0134515613 -0.0593911484 0.0903083831 -0.0194513649 0.125025287 -0.0718345344 -0.028370142 -0.137873486 -0.138855338 0.102305382 -0.114035651 -0.00128389895 -0.0209189281 -0.0363460779 0.0348549187 0.0462828428 -0.135960251 0.046998933 -0.00560566783 -0.00562353432 0.0511512756 -0.097703442 -0.000405952334 -0.102428362 -0.13385509 0.0778306574 -0.126189426 0.00569581985 -0.0901096463 0.0396448672 -0.0905233249 0.0647575855 -0.0692101866 -0.118010342 0.140668467 0.0754639357 0.0287674516 -0.127176955 0.0508546382 -0.0297820792 -0.034435086 -0.0525137484 0.0414588898 0.125622496 -0.122770369 -0.103983626 0.0979629755 -0.134100616 0.0132061988 0.136052623 -0.101130173 -0.125888899 -0.0912302136 -0.000320419669 0.0951493084 0.0250687301 0.0693171024 0.0681221336 0.0920942575 -0.0242754743 -0.126860991 0.00760373473 0.0472807437 0.00129759312 -0.0417886227 -0.135421842 -0.0420329869 0.109683141 -0.0780478939 0.0869462043 -0.0336673781 0.068866685 -0.0332275853 0.0885529816 -0.0637842491 -0.0287733674 0.0399621874 0.00726474822 -0.0887221545 -0.026667513 -0.10050422 -0.0866817236 -0.0743763596 -0.0557404384 -0.0415508077 -0.0706221908 0.03670571 0.0769552216 -0.136574149 0.12560834 0.0611953586 0.0578964055 0.0321745314 -0.108904168 -0.0603623837 0.106482595 0.0418807454 0.0567226261 -0.0480391793 -0.0818945691 -0.0918800607 0.0692870989 -0.0831394047 -0.140842602 -0.0133125484 0.0388532393 -0.0127856918 -0.0663905591 -0.0192305241 -0.00343641825 0.0277077034 0.0341987535 -0.0332082435 0.0317000449 0.133374199 0.0840312392 -0.00916420575 -0.0913549662 -0.131094456 -0.114135049 0.0324433446 -0.0922504738 0.0968189761 0.0722978711 -0.0336492881 -0.135974228 -0.0138650686 0.0959312618 -0.0392885059 0.0773027018 0.0320036188 -0.136021271 -0.096613884 -0.0733416006 -0.0797348469 0.198495209 0.0582847074 -0.0810181722 -0.0431594327 -0.0618753731 0.0181986317 0.078358449 -0.0357146375 0.0401486307 -0.0250369273 0.0634938627 -0.0496993251 0.020140117 -0.127322808 -0.0666764453 0.138859123 0.0398158766 0.110776573 0.142119944 -0.034179695 -0.0179098602 0.0632333979 -0.00735516846 -0.071974054 -0.118237272 0.097171858 -0.107004285 0.0833490044 -0.0937577263 0.0946442783 0.0788892806 0.0437900051 -0.0310971122 0.0778285116 -0.0993157849 0.0229949653 0.11270339 0.16689226 0.162221476 -0.0412555002 0.173148394 -0.0327887125 -0.121510416 -0.0140555762 0.0903469846 -0.029039843 0.068566218 -0.12506409 0.0382451862 0.0248380303 0.0277039558 -0.14029333 -0.126389086 -0.00850690901 0.0435213149 0.0944143683 0.13669233 0.010785535 0.131652728 0.08942011 -0.0438658521 -0.090351522 0.0258171856 -0.0345944241 0.0610096753 -0.0412340835 -0.0557757616 -0.114867412 -0.0179175213 0.00409558415 -0.0384710208 0.100451782 0.141252503 -0.0818129927 0.0703791678 -0.070404999 -0.110884957 -0.109055385 -0.0898616537 -0.0513886064 0.12470524 -0.13137272 -0.0607609376 -0.0753680915 0.0404375494 -0.0389348865 -0.0476250499 0.139109835 -0.0529488549 -0.123520866 0.11238347 -0.0333348513 -0.0331748128 -0.042967163 0.098916471 0.100189671 0.113453075 0.0968655795 -0.0717557892 0.00738480687 0.0515316948 -0.0553591289 -0.00930176955 -0.00901313685 0.175364062 -0.0196713433 -0.0502335988 0.0383971296 0.121085942 0.103061765 0.15212594 0.116526209 -0.0264002159 -0.094560124 -0.0376541167 0.147969663 -0.0978502855 -0.00200880459 0.0312320646 -0.0133588314 0.0813049003 -0.029433284 -0.0262242351 -0.106563412 0.0297239125 0.0302417222 -0.0301722232 -0.0122521063 -0.0121933669 0.0207778811 0.118457362 0.0412592813 0.133936256 -0.0869062468 -0.0865229592 -0.0799478814 -0.0991789475 -0.0158145819 0.120537013 0.0608306117 0.17652306 -0.0725468472 0.126066044 -0.0148094278 -0.0349133536 0.0138158342 -0.00439210236 0.0949555784 -0.0591577739 -0.0189778134 0.0861846507 0.024233669 -0.0585409105 -0.114641324 -0.0379219502 0.0835855305 0.0618880242 -0.00448146462 -0.0475826338 0.0992924273 -0.0171841681 0.130379245 -0.115067512 -0.0295631811 0.132417724 -0.00352632999 -0.0539170504 -0.0408100188 0.0476579517 0.0435158908 -0.108436733 0.0848499984 -0.0134641975 0.103017956 0.113764629 -0.0229989663 -0.0402461812 0.10478355 0.105660841 -0.103532135 0.0136412531 -0.113981269 -0.130932122 0.020966202 0.103488669 0.0354949832 -0.0729100034 -0.0167546049 -0.0969255865 -0.0415144786 -0.0227924958 0.0969979763 -0.0482411981 -0.0222034156 0.0317669511 -0.0238891691 -0.0465090126 -0.049510397 -0.00603993051 -0.0495383143 0.0391993932 0.0514197163 0.0751582161 -0.0600364245 0.149168238 0.00608866569 0.145530403 -0.0471902825 -0.00813056901 0.0724131763 0.02852365 -0.0375874527 0.0505036674 0.162053078 0.061635565 -0.0772141367 -0.101029664 0.135133624 -0.026366543 0.0173085034 -0.196915612 -0.094210051 -0.0830252171 -0.0152556822 0.0760191381 0.11165338 0.015817089 0.0882711783 0.0494683385 -0.135736912 -0.0937291756 0.155131117 0.0703033358 -0.101866767 -0.136783585 -0.0569891371 0.0305453669 0.0333361402 0.0399238467 0.0434939228 0.0999660268 -0.106167085 -0.0346824601 0.1533719 -0.0761708841 0.0984655321 0.0181172937 -0.0619307309 -0.00992794242 0.070495829 0.154349118 0.0747213513 -0.191178009 -0.087587826 0.0466681495 -0.128632694 0.0855400562 -0.12334384 0.01812369 0.155531704 0.11304646 0.132001072 0.196818605 -0.0396067835 -0.0600948259 -0.0705011338 -0.113130942 -0.11137788 0.0562906638 0.0935193449 0.0702943429 -0.135742277 -0.0983269736 -0.164341018 0.0970739946 0.0331884474 0.0538573004 -0.0459236056 -0.0607440993 -0.057828702 -0.11368376 0.115170449 0.144811422 0.046080593 -0.116181828 0.0888293982 0.149672315 -0.085598737 -0.0972040147 0.101113833 0.145851657 0.0576108471 0.0922829062 0.0284254942 0.0193997808 -0.000928774185 -0.0930889547 -0.0205265954 +tensor_16bias 50 +-0.142527401 0.168874308 -0.0887828916 -0.0631441548 -0.0660232753 0.0544182472 0.0641550943 0.0606994219 -0.0223076269 0.102970138 0.0258652028 -0.0809393153 -0.0276761875 0.0546831638 0.128886178 -0.0795307532 -0.0189131647 -0.120750055 0.17368494 -0.0492844023 -0.0578795224 0.0671775565 0.0123591702 0.132642269 -0.0525798798 0.0173827056 0.0659839064 -0.0958536118 0.0788409114 0.0107072359 0.0492983423 -0.02552481 0.0519438088 0.137576833 -0.115964495 -0.0723876506 0.111166283 -0.119082645 -0.0866416842 0.0962954909 0.110601142 -0.0136935199 -0.138656154 -0.0623757802 0.0855045691 -0.12340495 0.0683390722 -0.0176106151 -0.0413498878 0.0233600959 diff --git a/Linear_16.hxx b/Linear_16.hxx new file mode 100644 index 0000000..8d831c0 --- /dev/null +++ b/Linear_16.hxx @@ -0,0 +1,658 @@ +//Code generated automatically by TMVA for GPU Inference using ALPAKA of Model file [Linear_16.onnx] at [Fri Apr 11 14:16:45 2025] + +#ifndef SOFIE_LINEAR_16 +#define SOFIE_LINEAR_16 + +#include +#include +#include +#include +#include +#include "SOFIE/SOFIE_common.hxx" +#include + +using Dim1D = alpaka::DimInt<1>; +using Acc = alpaka::TagToAcc; +using Queue = alpaka::Queue; + +namespace SOFIE_Linear_16{ +struct Session { + +// initialized tensors +auto deviceBuf_8weight = alpaka::allocBuf(devAcc, 2500); +auto deviceBuf_8bias = alpaka::allocBuf(devAcc, 50); +auto deviceBuf_4bias = alpaka::allocBuf(devAcc, 50); +auto deviceBuf_2weight = alpaka::allocBuf(devAcc, 2500); +auto deviceBuf_0bias = alpaka::allocBuf(devAcc, 50); +auto deviceBuf_12bias = alpaka::allocBuf(devAcc, 50); +auto deviceBuf_18bias = alpaka::allocBuf(devAcc, 10); +auto deviceBuf_14bias = alpaka::allocBuf(devAcc, 50); +auto deviceBuf_4weight = alpaka::allocBuf(devAcc, 2500); +auto deviceBuf_10weight = alpaka::allocBuf(devAcc, 2500); +auto deviceBuf_6bias = alpaka::allocBuf(devAcc, 50); +auto deviceBuf_18weight = alpaka::allocBuf(devAcc, 500); +auto deviceBuf_0weight = alpaka::allocBuf(devAcc, 5000); +auto deviceBuf_10bias = alpaka::allocBuf(devAcc, 50); +auto deviceBuf_2bias = alpaka::allocBuf(devAcc, 50); +auto deviceBuf_6weight = alpaka::allocBuf(devAcc, 2500); +auto deviceBuf_14weight = alpaka::allocBuf(devAcc, 2500); +auto deviceBuf_16weight = alpaka::allocBuf(devAcc, 2500); +auto deviceBuf_12weight = alpaka::allocBuf(devAcc, 2500); +auto deviceBuf_16bias = alpaka::allocBuf(devAcc, 50); + +//--- declare and allocate the intermediate tensors +auto bufDev_18biasbcast = alpaka::allocBuf(devAcc,160); +auto bufDev_38 = alpaka::allocBuf(devAcc,800); +auto bufDev_14biasbcast = alpaka::allocBuf(devAcc,800); +auto bufDev_34 = alpaka::allocBuf(devAcc,800); +auto bufDev_22 = alpaka::allocBuf(devAcc,800); +auto bufDev_2biasbcast = alpaka::allocBuf(devAcc,800); +auto bufDev_24 = alpaka::allocBuf(devAcc,800); +auto bufDev_0biasbcast = alpaka::allocBuf(devAcc,800); +auto bufDev_6biasbcast = alpaka::allocBuf(devAcc,800); +auto bufDev_4biasbcast = alpaka::allocBuf(devAcc,800); +auto bufDev_16biasbcast = alpaka::allocBuf(devAcc,800); +auto bufDev_8biasbcast = alpaka::allocBuf(devAcc,800); +auto bufDev_26 = alpaka::allocBuf(devAcc,800); +auto bufDev_28 = alpaka::allocBuf(devAcc,800); +auto bufDev_10biasbcast = alpaka::allocBuf(devAcc,800); +auto bufDev_30 = alpaka::allocBuf(devAcc,800); +auto bufDev_32 = alpaka::allocBuf(devAcc,800); +auto bufDev_36 = alpaka::allocBuf(devAcc,800); +auto bufDev_12biasbcast = alpaka::allocBuf(devAcc,800); + +Session(std::string filename ="Linear_16.dat") { + +//--- reading weights from file + std::ifstream f; + f.open(filename); + if (!f.is_open()) { + throw std::runtime_error("tmva-sofie failed to open file " + filename + " for input weights"); + } + std::string tensor_name; + size_t length; + f >> tensor_name >> length; + if (tensor_name != "tensor_8weight" ) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_8weight , read " + tensor_name; + throw std::runtime_error(err_msg); + } + if (length != 2500) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 2500 , read " + std::to_string(length) ; + throw std::runtime_error(err_msg); + } + for (size_t i = 0; i < length; ++i) + f >> tensor_8weight[i]; + if (f.fail()) { + throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_8weight"); + } + f >> tensor_name >> length; + if (tensor_name != "tensor_8bias" ) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_8bias , read " + tensor_name; + throw std::runtime_error(err_msg); + } + if (length != 50) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 50 , read " + std::to_string(length) ; + throw std::runtime_error(err_msg); + } + for (size_t i = 0; i < length; ++i) + f >> tensor_8bias[i]; + if (f.fail()) { + throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_8bias"); + } + f >> tensor_name >> length; + if (tensor_name != "tensor_4bias" ) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_4bias , read " + tensor_name; + throw std::runtime_error(err_msg); + } + if (length != 50) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 50 , read " + std::to_string(length) ; + throw std::runtime_error(err_msg); + } + for (size_t i = 0; i < length; ++i) + f >> tensor_4bias[i]; + if (f.fail()) { + throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_4bias"); + } + f >> tensor_name >> length; + if (tensor_name != "tensor_2weight" ) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_2weight , read " + tensor_name; + throw std::runtime_error(err_msg); + } + if (length != 2500) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 2500 , read " + std::to_string(length) ; + throw std::runtime_error(err_msg); + } + for (size_t i = 0; i < length; ++i) + f >> tensor_2weight[i]; + if (f.fail()) { + throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_2weight"); + } + f >> tensor_name >> length; + if (tensor_name != "tensor_0bias" ) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_0bias , read " + tensor_name; + throw std::runtime_error(err_msg); + } + if (length != 50) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 50 , read " + std::to_string(length) ; + throw std::runtime_error(err_msg); + } + for (size_t i = 0; i < length; ++i) + f >> tensor_0bias[i]; + if (f.fail()) { + throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_0bias"); + } + f >> tensor_name >> length; + if (tensor_name != "tensor_12bias" ) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_12bias , read " + tensor_name; + throw std::runtime_error(err_msg); + } + if (length != 50) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 50 , read " + std::to_string(length) ; + throw std::runtime_error(err_msg); + } + for (size_t i = 0; i < length; ++i) + f >> tensor_12bias[i]; + if (f.fail()) { + throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_12bias"); + } + f >> tensor_name >> length; + if (tensor_name != "tensor_18bias" ) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_18bias , read " + tensor_name; + throw std::runtime_error(err_msg); + } + if (length != 10) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 10 , read " + std::to_string(length) ; + throw std::runtime_error(err_msg); + } + for (size_t i = 0; i < length; ++i) + f >> tensor_18bias[i]; + if (f.fail()) { + throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_18bias"); + } + f >> tensor_name >> length; + if (tensor_name != "tensor_14bias" ) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_14bias , read " + tensor_name; + throw std::runtime_error(err_msg); + } + if (length != 50) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 50 , read " + std::to_string(length) ; + throw std::runtime_error(err_msg); + } + for (size_t i = 0; i < length; ++i) + f >> tensor_14bias[i]; + if (f.fail()) { + throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_14bias"); + } + f >> tensor_name >> length; + if (tensor_name != "tensor_4weight" ) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_4weight , read " + tensor_name; + throw std::runtime_error(err_msg); + } + if (length != 2500) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 2500 , read " + std::to_string(length) ; + throw std::runtime_error(err_msg); + } + for (size_t i = 0; i < length; ++i) + f >> tensor_4weight[i]; + if (f.fail()) { + throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_4weight"); + } + f >> tensor_name >> length; + if (tensor_name != "tensor_10weight" ) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_10weight , read " + tensor_name; + throw std::runtime_error(err_msg); + } + if (length != 2500) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 2500 , read " + std::to_string(length) ; + throw std::runtime_error(err_msg); + } + for (size_t i = 0; i < length; ++i) + f >> tensor_10weight[i]; + if (f.fail()) { + throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_10weight"); + } + f >> tensor_name >> length; + if (tensor_name != "tensor_6bias" ) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_6bias , read " + tensor_name; + throw std::runtime_error(err_msg); + } + if (length != 50) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 50 , read " + std::to_string(length) ; + throw std::runtime_error(err_msg); + } + for (size_t i = 0; i < length; ++i) + f >> tensor_6bias[i]; + if (f.fail()) { + throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_6bias"); + } + f >> tensor_name >> length; + if (tensor_name != "tensor_18weight" ) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_18weight , read " + tensor_name; + throw std::runtime_error(err_msg); + } + if (length != 500) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 500 , read " + std::to_string(length) ; + throw std::runtime_error(err_msg); + } + for (size_t i = 0; i < length; ++i) + f >> tensor_18weight[i]; + if (f.fail()) { + throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_18weight"); + } + f >> tensor_name >> length; + if (tensor_name != "tensor_0weight" ) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_0weight , read " + tensor_name; + throw std::runtime_error(err_msg); + } + if (length != 5000) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 5000 , read " + std::to_string(length) ; + throw std::runtime_error(err_msg); + } + for (size_t i = 0; i < length; ++i) + f >> tensor_0weight[i]; + if (f.fail()) { + throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_0weight"); + } + f >> tensor_name >> length; + if (tensor_name != "tensor_10bias" ) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_10bias , read " + tensor_name; + throw std::runtime_error(err_msg); + } + if (length != 50) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 50 , read " + std::to_string(length) ; + throw std::runtime_error(err_msg); + } + for (size_t i = 0; i < length; ++i) + f >> tensor_10bias[i]; + if (f.fail()) { + throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_10bias"); + } + f >> tensor_name >> length; + if (tensor_name != "tensor_2bias" ) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_2bias , read " + tensor_name; + throw std::runtime_error(err_msg); + } + if (length != 50) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 50 , read " + std::to_string(length) ; + throw std::runtime_error(err_msg); + } + for (size_t i = 0; i < length; ++i) + f >> tensor_2bias[i]; + if (f.fail()) { + throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_2bias"); + } + f >> tensor_name >> length; + if (tensor_name != "tensor_6weight" ) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_6weight , read " + tensor_name; + throw std::runtime_error(err_msg); + } + if (length != 2500) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 2500 , read " + std::to_string(length) ; + throw std::runtime_error(err_msg); + } + for (size_t i = 0; i < length; ++i) + f >> tensor_6weight[i]; + if (f.fail()) { + throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_6weight"); + } + f >> tensor_name >> length; + if (tensor_name != "tensor_14weight" ) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_14weight , read " + tensor_name; + throw std::runtime_error(err_msg); + } + if (length != 2500) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 2500 , read " + std::to_string(length) ; + throw std::runtime_error(err_msg); + } + for (size_t i = 0; i < length; ++i) + f >> tensor_14weight[i]; + if (f.fail()) { + throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_14weight"); + } + f >> tensor_name >> length; + if (tensor_name != "tensor_16weight" ) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_16weight , read " + tensor_name; + throw std::runtime_error(err_msg); + } + if (length != 2500) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 2500 , read " + std::to_string(length) ; + throw std::runtime_error(err_msg); + } + for (size_t i = 0; i < length; ++i) + f >> tensor_16weight[i]; + if (f.fail()) { + throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_16weight"); + } + f >> tensor_name >> length; + if (tensor_name != "tensor_12weight" ) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_12weight , read " + tensor_name; + throw std::runtime_error(err_msg); + } + if (length != 2500) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 2500 , read " + std::to_string(length) ; + throw std::runtime_error(err_msg); + } + for (size_t i = 0; i < length; ++i) + f >> tensor_12weight[i]; + if (f.fail()) { + throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_12weight"); + } + f >> tensor_name >> length; + if (tensor_name != "tensor_16bias" ) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_16bias , read " + tensor_name; + throw std::runtime_error(err_msg); + } + if (length != 50) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 50 , read " + std::to_string(length) ; + throw std::runtime_error(err_msg); + } + for (size_t i = 0; i < length; ++i) + f >> tensor_16bias[i]; + if (f.fail()) { + throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_16bias"); + } + f.close(); + + auto hostBuf_8weight = alpaka::allocBuf(hostAcc,2500); + std::memcpy(alpaka::getPtrNative(hostBuf_8weight), tensor_8weight, 2500* sizeof(float)); + alpaka::memcpy(queue, deviceBuf_8weight, hostBuf8weight, 2500); + auto hostBuf_8bias = alpaka::allocBuf(hostAcc,50); + std::memcpy(alpaka::getPtrNative(hostBuf_8bias), tensor_8bias, 50* sizeof(float)); + alpaka::memcpy(queue, deviceBuf_8bias, hostBuf8bias, 50); + auto hostBuf_4bias = alpaka::allocBuf(hostAcc,50); + std::memcpy(alpaka::getPtrNative(hostBuf_4bias), tensor_4bias, 50* sizeof(float)); + alpaka::memcpy(queue, deviceBuf_4bias, hostBuf4bias, 50); + auto hostBuf_2weight = alpaka::allocBuf(hostAcc,2500); + std::memcpy(alpaka::getPtrNative(hostBuf_2weight), tensor_2weight, 2500* sizeof(float)); + alpaka::memcpy(queue, deviceBuf_2weight, hostBuf2weight, 2500); + auto hostBuf_0bias = alpaka::allocBuf(hostAcc,50); + std::memcpy(alpaka::getPtrNative(hostBuf_0bias), tensor_0bias, 50* sizeof(float)); + alpaka::memcpy(queue, deviceBuf_0bias, hostBuf0bias, 50); + auto hostBuf_12bias = alpaka::allocBuf(hostAcc,50); + std::memcpy(alpaka::getPtrNative(hostBuf_12bias), tensor_12bias, 50* sizeof(float)); + alpaka::memcpy(queue, deviceBuf_12bias, hostBuf12bias, 50); + auto hostBuf_18bias = alpaka::allocBuf(hostAcc,10); + std::memcpy(alpaka::getPtrNative(hostBuf_18bias), tensor_18bias, 10* sizeof(float)); + alpaka::memcpy(queue, deviceBuf_18bias, hostBuf18bias, 10); + auto hostBuf_14bias = alpaka::allocBuf(hostAcc,50); + std::memcpy(alpaka::getPtrNative(hostBuf_14bias), tensor_14bias, 50* sizeof(float)); + alpaka::memcpy(queue, deviceBuf_14bias, hostBuf14bias, 50); + auto hostBuf_4weight = alpaka::allocBuf(hostAcc,2500); + std::memcpy(alpaka::getPtrNative(hostBuf_4weight), tensor_4weight, 2500* sizeof(float)); + alpaka::memcpy(queue, deviceBuf_4weight, hostBuf4weight, 2500); + auto hostBuf_10weight = alpaka::allocBuf(hostAcc,2500); + std::memcpy(alpaka::getPtrNative(hostBuf_10weight), tensor_10weight, 2500* sizeof(float)); + alpaka::memcpy(queue, deviceBuf_10weight, hostBuf10weight, 2500); + auto hostBuf_6bias = alpaka::allocBuf(hostAcc,50); + std::memcpy(alpaka::getPtrNative(hostBuf_6bias), tensor_6bias, 50* sizeof(float)); + alpaka::memcpy(queue, deviceBuf_6bias, hostBuf6bias, 50); + auto hostBuf_18weight = alpaka::allocBuf(hostAcc,500); + std::memcpy(alpaka::getPtrNative(hostBuf_18weight), tensor_18weight, 500* sizeof(float)); + alpaka::memcpy(queue, deviceBuf_18weight, hostBuf18weight, 500); + auto hostBuf_0weight = alpaka::allocBuf(hostAcc,5000); + std::memcpy(alpaka::getPtrNative(hostBuf_0weight), tensor_0weight, 5000* sizeof(float)); + alpaka::memcpy(queue, deviceBuf_0weight, hostBuf0weight, 5000); + auto hostBuf_10bias = alpaka::allocBuf(hostAcc,50); + std::memcpy(alpaka::getPtrNative(hostBuf_10bias), tensor_10bias, 50* sizeof(float)); + alpaka::memcpy(queue, deviceBuf_10bias, hostBuf10bias, 50); + auto hostBuf_2bias = alpaka::allocBuf(hostAcc,50); + std::memcpy(alpaka::getPtrNative(hostBuf_2bias), tensor_2bias, 50* sizeof(float)); + alpaka::memcpy(queue, deviceBuf_2bias, hostBuf2bias, 50); + auto hostBuf_6weight = alpaka::allocBuf(hostAcc,2500); + std::memcpy(alpaka::getPtrNative(hostBuf_6weight), tensor_6weight, 2500* sizeof(float)); + alpaka::memcpy(queue, deviceBuf_6weight, hostBuf6weight, 2500); + auto hostBuf_14weight = alpaka::allocBuf(hostAcc,2500); + std::memcpy(alpaka::getPtrNative(hostBuf_14weight), tensor_14weight, 2500* sizeof(float)); + alpaka::memcpy(queue, deviceBuf_14weight, hostBuf14weight, 2500); + auto hostBuf_16weight = alpaka::allocBuf(hostAcc,2500); + std::memcpy(alpaka::getPtrNative(hostBuf_16weight), tensor_16weight, 2500* sizeof(float)); + alpaka::memcpy(queue, deviceBuf_16weight, hostBuf16weight, 2500); + auto hostBuf_12weight = alpaka::allocBuf(hostAcc,2500); + std::memcpy(alpaka::getPtrNative(hostBuf_12weight), tensor_12weight, 2500* sizeof(float)); + alpaka::memcpy(queue, deviceBuf_12weight, hostBuf12weight, 2500); + auto hostBuf_16bias = alpaka::allocBuf(hostAcc,50); + std::memcpy(alpaka::getPtrNative(hostBuf_16bias), tensor_16bias, 50* sizeof(float)); + alpaka::memcpy(queue, deviceBuf_16bias, hostBuf16bias, 50); + +//---- allocate the intermediate dynamic tensors +//--- broadcast bias tensor 0biasfor Gemm op + { + float * data = SOFIE::UTILITY::UnidirectionalBroadcast(tensor_0bias,{ 50 }, { 16 , 50 }); + auto hostBuf_0biasbcast = alpaka::allocBuf(hostAcc,800); + std::memcpy(alpaka::getPtrNative(hostBuf_0biasbcast), data, 800 * sizeof(float)); + alpaka::memcpy(queue, deviceBuf_0biasbcast, hostBuf_0biasbcast , 800); + } +//--- broadcast bias tensor 2biasfor Gemm op + { + float * data = SOFIE::UTILITY::UnidirectionalBroadcast(tensor_2bias,{ 50 }, { 16 , 50 }); + auto hostBuf_2biasbcast = alpaka::allocBuf(hostAcc,800); + std::memcpy(alpaka::getPtrNative(hostBuf_2biasbcast), data, 800 * sizeof(float)); + alpaka::memcpy(queue, deviceBuf_2biasbcast, hostBuf_2biasbcast , 800); + } +//--- broadcast bias tensor 4biasfor Gemm op + { + float * data = SOFIE::UTILITY::UnidirectionalBroadcast(tensor_4bias,{ 50 }, { 16 , 50 }); + auto hostBuf_4biasbcast = alpaka::allocBuf(hostAcc,800); + std::memcpy(alpaka::getPtrNative(hostBuf_4biasbcast), data, 800 * sizeof(float)); + alpaka::memcpy(queue, deviceBuf_4biasbcast, hostBuf_4biasbcast , 800); + } +//--- broadcast bias tensor 6biasfor Gemm op + { + float * data = SOFIE::UTILITY::UnidirectionalBroadcast(tensor_6bias,{ 50 }, { 16 , 50 }); + auto hostBuf_6biasbcast = alpaka::allocBuf(hostAcc,800); + std::memcpy(alpaka::getPtrNative(hostBuf_6biasbcast), data, 800 * sizeof(float)); + alpaka::memcpy(queue, deviceBuf_6biasbcast, hostBuf_6biasbcast , 800); + } +//--- broadcast bias tensor 8biasfor Gemm op + { + float * data = SOFIE::UTILITY::UnidirectionalBroadcast(tensor_8bias,{ 50 }, { 16 , 50 }); + auto hostBuf_8biasbcast = alpaka::allocBuf(hostAcc,800); + std::memcpy(alpaka::getPtrNative(hostBuf_8biasbcast), data, 800 * sizeof(float)); + alpaka::memcpy(queue, deviceBuf_8biasbcast, hostBuf_8biasbcast , 800); + } +//--- broadcast bias tensor 10biasfor Gemm op + { + float * data = SOFIE::UTILITY::UnidirectionalBroadcast(tensor_10bias,{ 50 }, { 16 , 50 }); + auto hostBuf_10biasbcast = alpaka::allocBuf(hostAcc,800); + std::memcpy(alpaka::getPtrNative(hostBuf_10biasbcast), data, 800 * sizeof(float)); + alpaka::memcpy(queue, deviceBuf_10biasbcast, hostBuf_10biasbcast , 800); + } +//--- broadcast bias tensor 12biasfor Gemm op + { + float * data = SOFIE::UTILITY::UnidirectionalBroadcast(tensor_12bias,{ 50 }, { 16 , 50 }); + auto hostBuf_12biasbcast = alpaka::allocBuf(hostAcc,800); + std::memcpy(alpaka::getPtrNative(hostBuf_12biasbcast), data, 800 * sizeof(float)); + alpaka::memcpy(queue, deviceBuf_12biasbcast, hostBuf_12biasbcast , 800); + } +//--- broadcast bias tensor 14biasfor Gemm op + { + float * data = SOFIE::UTILITY::UnidirectionalBroadcast(tensor_14bias,{ 50 }, { 16 , 50 }); + auto hostBuf_14biasbcast = alpaka::allocBuf(hostAcc,800); + std::memcpy(alpaka::getPtrNative(hostBuf_14biasbcast), data, 800 * sizeof(float)); + alpaka::memcpy(queue, deviceBuf_14biasbcast, hostBuf_14biasbcast , 800); + } +//--- broadcast bias tensor 16biasfor Gemm op + { + float * data = SOFIE::UTILITY::UnidirectionalBroadcast(tensor_16bias,{ 50 }, { 16 , 50 }); + auto hostBuf_16biasbcast = alpaka::allocBuf(hostAcc,800); + std::memcpy(alpaka::getPtrNative(hostBuf_16biasbcast), data, 800 * sizeof(float)); + alpaka::memcpy(queue, deviceBuf_16biasbcast, hostBuf_16biasbcast , 800); + } +//--- broadcast bias tensor 18biasfor Gemm op + { + float * data = SOFIE::UTILITY::UnidirectionalBroadcast(tensor_18bias,{ 10 }, { 16 , 10 }); + auto hostBuf_18biasbcast = alpaka::allocBuf(hostAcc,160); + std::memcpy(alpaka::getPtrNative(hostBuf_18biasbcast), data, 160 * sizeof(float)); + alpaka::memcpy(queue, deviceBuf_18biasbcast, hostBuf_18biasbcast , 160); + } +} + + + +std::vector infer(float* tensor_input1){ + +//--------- Gemm_GPU_ALPAKA + char op_0_transA = 'n'; + char op_0_transB = 't'; + int op_0_m = 16; + int op_0_n = 50; + int op_0_k = 100; + float op_0_alpha = 1; + float op_0_beta = 1; + int op_0_lda = 100; + int op_0_ldb = 100; + std::copy(tensor_0biasbcast, tensor_0biasbcast + 800, tensor_22); + Kokkos::View kokkos_dev_input1((float*)std::data(bufDev_input1), op_0_m, op_0_k); + Kokkos::View kokkos_dev_0weight((float*)std::data(bufDev_0weight), op_0_k, op_0_n); + Kokkos::View kokkos_dev_22((float*)std::data(bufDev_22), op_0_m, op_0_n); + KokkosBlas::gemm(&op_0_transB, &op_0_transA, op_0_alpha, kokkos_dev_input1, kokkos_dev_0weight, op_0_beta, kokkos_dev_22); + +//--------- Gemm_GPU_ALPAKA + char op_1_transA = 'n'; + char op_1_transB = 't'; + int op_1_m = 16; + int op_1_n = 50; + int op_1_k = 50; + float op_1_alpha = 1; + float op_1_beta = 1; + int op_1_lda = 50; + int op_1_ldb = 50; + std::copy(tensor_2biasbcast, tensor_2biasbcast + 800, tensor_24); + Kokkos::View kokkos_dev_22((float*)std::data(bufDev_22), op_1_m, op_1_k); + Kokkos::View kokkos_dev_2weight((float*)std::data(bufDev_2weight), op_1_k, op_1_n); + Kokkos::View kokkos_dev_24((float*)std::data(bufDev_24), op_1_m, op_1_n); + KokkosBlas::gemm(&op_1_transB, &op_1_transA, op_1_alpha, kokkos_dev_22, kokkos_dev_2weight, op_1_beta, kokkos_dev_24); + +//--------- Gemm_GPU_ALPAKA + char op_2_transA = 'n'; + char op_2_transB = 't'; + int op_2_m = 16; + int op_2_n = 50; + int op_2_k = 50; + float op_2_alpha = 1; + float op_2_beta = 1; + int op_2_lda = 50; + int op_2_ldb = 50; + std::copy(tensor_4biasbcast, tensor_4biasbcast + 800, tensor_26); + Kokkos::View kokkos_dev_24((float*)std::data(bufDev_24), op_2_m, op_2_k); + Kokkos::View kokkos_dev_4weight((float*)std::data(bufDev_4weight), op_2_k, op_2_n); + Kokkos::View kokkos_dev_26((float*)std::data(bufDev_26), op_2_m, op_2_n); + KokkosBlas::gemm(&op_2_transB, &op_2_transA, op_2_alpha, kokkos_dev_24, kokkos_dev_4weight, op_2_beta, kokkos_dev_26); + +//--------- Gemm_GPU_ALPAKA + char op_3_transA = 'n'; + char op_3_transB = 't'; + int op_3_m = 16; + int op_3_n = 50; + int op_3_k = 50; + float op_3_alpha = 1; + float op_3_beta = 1; + int op_3_lda = 50; + int op_3_ldb = 50; + std::copy(tensor_6biasbcast, tensor_6biasbcast + 800, tensor_28); + Kokkos::View kokkos_dev_26((float*)std::data(bufDev_26), op_3_m, op_3_k); + Kokkos::View kokkos_dev_6weight((float*)std::data(bufDev_6weight), op_3_k, op_3_n); + Kokkos::View kokkos_dev_28((float*)std::data(bufDev_28), op_3_m, op_3_n); + KokkosBlas::gemm(&op_3_transB, &op_3_transA, op_3_alpha, kokkos_dev_26, kokkos_dev_6weight, op_3_beta, kokkos_dev_28); + +//--------- Gemm_GPU_ALPAKA + char op_4_transA = 'n'; + char op_4_transB = 't'; + int op_4_m = 16; + int op_4_n = 50; + int op_4_k = 50; + float op_4_alpha = 1; + float op_4_beta = 1; + int op_4_lda = 50; + int op_4_ldb = 50; + std::copy(tensor_8biasbcast, tensor_8biasbcast + 800, tensor_30); + Kokkos::View kokkos_dev_28((float*)std::data(bufDev_28), op_4_m, op_4_k); + Kokkos::View kokkos_dev_8weight((float*)std::data(bufDev_8weight), op_4_k, op_4_n); + Kokkos::View kokkos_dev_30((float*)std::data(bufDev_30), op_4_m, op_4_n); + KokkosBlas::gemm(&op_4_transB, &op_4_transA, op_4_alpha, kokkos_dev_28, kokkos_dev_8weight, op_4_beta, kokkos_dev_30); + +//--------- Gemm_GPU_ALPAKA + char op_5_transA = 'n'; + char op_5_transB = 't'; + int op_5_m = 16; + int op_5_n = 50; + int op_5_k = 50; + float op_5_alpha = 1; + float op_5_beta = 1; + int op_5_lda = 50; + int op_5_ldb = 50; + std::copy(tensor_10biasbcast, tensor_10biasbcast + 800, tensor_32); + Kokkos::View kokkos_dev_30((float*)std::data(bufDev_30), op_5_m, op_5_k); + Kokkos::View kokkos_dev_10weight((float*)std::data(bufDev_10weight), op_5_k, op_5_n); + Kokkos::View kokkos_dev_32((float*)std::data(bufDev_32), op_5_m, op_5_n); + KokkosBlas::gemm(&op_5_transB, &op_5_transA, op_5_alpha, kokkos_dev_30, kokkos_dev_10weight, op_5_beta, kokkos_dev_32); + +//--------- Gemm_GPU_ALPAKA + char op_6_transA = 'n'; + char op_6_transB = 't'; + int op_6_m = 16; + int op_6_n = 50; + int op_6_k = 50; + float op_6_alpha = 1; + float op_6_beta = 1; + int op_6_lda = 50; + int op_6_ldb = 50; + std::copy(tensor_12biasbcast, tensor_12biasbcast + 800, tensor_34); + Kokkos::View kokkos_dev_32((float*)std::data(bufDev_32), op_6_m, op_6_k); + Kokkos::View kokkos_dev_12weight((float*)std::data(bufDev_12weight), op_6_k, op_6_n); + Kokkos::View kokkos_dev_34((float*)std::data(bufDev_34), op_6_m, op_6_n); + KokkosBlas::gemm(&op_6_transB, &op_6_transA, op_6_alpha, kokkos_dev_32, kokkos_dev_12weight, op_6_beta, kokkos_dev_34); + +//--------- Gemm_GPU_ALPAKA + char op_7_transA = 'n'; + char op_7_transB = 't'; + int op_7_m = 16; + int op_7_n = 50; + int op_7_k = 50; + float op_7_alpha = 1; + float op_7_beta = 1; + int op_7_lda = 50; + int op_7_ldb = 50; + std::copy(tensor_14biasbcast, tensor_14biasbcast + 800, tensor_36); + Kokkos::View kokkos_dev_34((float*)std::data(bufDev_34), op_7_m, op_7_k); + Kokkos::View kokkos_dev_14weight((float*)std::data(bufDev_14weight), op_7_k, op_7_n); + Kokkos::View kokkos_dev_36((float*)std::data(bufDev_36), op_7_m, op_7_n); + KokkosBlas::gemm(&op_7_transB, &op_7_transA, op_7_alpha, kokkos_dev_34, kokkos_dev_14weight, op_7_beta, kokkos_dev_36); + +//--------- Gemm_GPU_ALPAKA + char op_8_transA = 'n'; + char op_8_transB = 't'; + int op_8_m = 16; + int op_8_n = 50; + int op_8_k = 50; + float op_8_alpha = 1; + float op_8_beta = 1; + int op_8_lda = 50; + int op_8_ldb = 50; + std::copy(tensor_16biasbcast, tensor_16biasbcast + 800, tensor_38); + Kokkos::View kokkos_dev_36((float*)std::data(bufDev_36), op_8_m, op_8_k); + Kokkos::View kokkos_dev_16weight((float*)std::data(bufDev_16weight), op_8_k, op_8_n); + Kokkos::View kokkos_dev_38((float*)std::data(bufDev_38), op_8_m, op_8_n); + KokkosBlas::gemm(&op_8_transB, &op_8_transA, op_8_alpha, kokkos_dev_36, kokkos_dev_16weight, op_8_beta, kokkos_dev_38); + +//--------- Gemm_GPU_ALPAKA + char op_9_transA = 'n'; + char op_9_transB = 't'; + int op_9_m = 16; + int op_9_n = 10; + int op_9_k = 50; + float op_9_alpha = 1; + float op_9_beta = 1; + int op_9_lda = 50; + int op_9_ldb = 50; + std::copy(tensor_18biasbcast, tensor_18biasbcast + 160, tensor_39); + Kokkos::View kokkos_dev_38((float*)std::data(bufDev_38), op_9_m, op_9_k); + Kokkos::View kokkos_dev_18weight((float*)std::data(bufDev_18weight), op_9_k, op_9_n); + Kokkos::View kokkos_dev_39((float*)std::data(bufDev_39), op_9_m, op_9_n); + KokkosBlas::gemm(&op_9_transB, &op_9_transA, op_9_alpha, kokkos_dev_38, kokkos_dev_18weight, op_9_beta, kokkos_dev_39); + return {std::vector(tensor_39, tensor_39 + 160)}; +} +}; // end of Session +} //SOFIE_Linear_16 + +#endif // SOFIE_LINEAR_16 diff --git a/src/SOFIE_core/inc/SOFIE/RModel.hxx b/src/SOFIE_core/inc/SOFIE/RModel.hxx index 79541af..83a47af 100644 --- a/src/SOFIE_core/inc/SOFIE/RModel.hxx +++ b/src/SOFIE_core/inc/SOFIE/RModel.hxx @@ -141,6 +141,11 @@ public: { Generate(static_cast>(options), batchSize, pos, verbose); } + void GenerateGPU_ALPAKA(std::underlying_type_t options, int batchSize = -1, bool verbose = false); + void GenerateGPU_ALPAKA(Options options = Options::kDefault, int batchSize = -1, bool verbose = false) + { + GenerateGPU_ALPAKA(static_cast>(options), batchSize, verbose); + } // generate the infer function signature. If isdecl= false generate the calling infer function // used to infer the sub-graphs std::string GenerateInferSignature(bool isdecl = true); @@ -153,18 +158,28 @@ protected: // internal functions // generate code for the initialized tensors void GenerateInitializedTensorInfo(); + + void GenerateInitializedTensorInfo_GPU_ALPAKA(); // generate code for the intermediate tensors void GenerateIntermediateTensorInfo(); // generate code for the dynamic tensors void GenerateDynamicTensorInfo(); + + void GenerateDynamicTensorInfo_GPU_ALPAKA(); // generate code for declarations needed by operators void GenerateOperatorDeclarations(); // generate code for inference void GenerateOutput(); + + void GenerateOutput_GPU_ALPAKA(); + + void MoveInitializedTensorsToBuffers_ALPAKA(); // generate code for initializing memory pool for intermediate tensors void GenerateIntermediateMemoryPool(); // Generate all session code void GenerateSessionCode(); + void GenerateSessionCode_GPU_ALPAKA(); + void GenerateGPU_ALPAKA_Buffers(); public: const std::vector &GetInputTensorNames() const { return fInputTensorNames; } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator.hxx b/src/SOFIE_core/inc/SOFIE/ROperator.hxx index edbec58..f7db548 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator.hxx @@ -24,8 +24,10 @@ public: virtual std::vector TypeInference(std::vector) = 0; virtual void Initialize(RModel&) = 0; virtual std::string Generate(std::string OpName) = 0; //expect unique opName for each operator within the same RModel + virtual std::string Generate_GPU_ALPAKA(std::string OpName){ return "";} //expect unique opName for each operator within the same RModel // generate initialization code for session constructor virtual std::string GenerateInitCode() { return "";} + virtual std::string GenerateInitCode_GPU_ALPAKA() { return "";}; // generate some specific declaration code for Session virtual std::string GenerateDeclCode() { return "";} // generate session data members specific to operator diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx index b6901f0..7410cf3 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx @@ -488,10 +488,11 @@ namespace SOFIE{ if (fType == "float"){ - out << SP << "Kokkos::View kokkos_dev_"< kokkos_dev_"< kokkos_dev_"< kokkos_dev_"< kokkos_dev_"< kokkos_dev_"<{\n" + <<"alpaka::workdiv::getValidWorkDiv(devAcc, {totalElems}, true, alpaka::GridBlockExtent::All)\n" + <<"};\n"; + out<< SP << SP << "alpaka::exec(queue, workDiv,\n" + <<"[] ALPAKA_FN_ACC (auto const& acc, auto buf, Idx size) {\n" + <<"Idx const idx = alpaka::getIdx(acc)[0];\n" + <<" if (idx < size) {\n" + <<" auto& x = alpaka::getPtrNative(buf)[idx];\n" + <<" x = x < 0 ? 0 : x;\n" + <<" }\n" + <<"}, bufDev_"< 0) {\n"; - out << "auto bufDev_" + i.first + " = alpaka::allocBuf(devAcc," + std::to_string(length) + ");\n"; - - out << SP << SP << "fTensor_" << i.first << ".resize(" << length << ");\n"; - out << SP << SP << "tensor_" << i.first << " = fTensor_" << i.first << ".data();\n"; + out << "auto bufDev_" + i.first + " = alpaka::allocBuf(devAcc," << length << ");\n"; out << SP << "}\n"; } fGC += out.str(); @@ -892,9 +889,9 @@ void RModel::GenerateSessionCode() // define the Session struct (for GNN this is generated in RModel_GNN) if (fUseSession) { if (!fIsSubGraph) - fGC += "struct Session {\n"; + fGC += "struct Session {\n\n"; else - fGC += "struct Session_" + fName + " {\n"; + fGC += "struct Session_" + fName + " {\n\n"; } // generate code for declaring the initialized tensors @@ -1001,9 +998,9 @@ void RModel::GenerateSessionCode_GPU_ALPAKA() // define the Session struct (for GNN this is generated in RModel_GNN) if (fUseSession) { if (!fIsSubGraph) - fGC += "struct Session {\n"; + fGC += "struct Session {\n\n"; else - fGC += "struct Session_" + fName + " {\n"; + fGC += "struct Session_" + fName + " {\n\n"; } // // generate code for declaring the initialized tensors @@ -1188,16 +1185,16 @@ void RModel::GenerateGPU_ALPAKA(std::underlying_type_t options, int bat } if (static_cast>(Options::kGNN) & options || static_cast>(Options::kGNNComponent) & options) - throw std::runtime_error("SOFIE GPU does not yet supports GNN Inference.") + throw std::runtime_error("SOFIE GPU does not yet supports GNN Inference."); // initialize the model including all operators and sub-graphs Initialize(batchSize, verbose); std::string hgname; - // if (!fIsSubGraph) { - // fGC.clear(); - // GenerateHeaderInfo_GPU_ALPAKA(hgname); - // } + if (!fIsSubGraph) { + fGC.clear(); + GenerateHeaderInfo_GPU_ALPAKA(hgname); + } // generate first code for the subgraphs // for (auto &graph : fSubGraphs) { @@ -1310,20 +1307,20 @@ void RModel::ReadInitializedTensorsFromFile(long pos) { // skip Constant and shape tensors if (!i.second.IsWeightTensor()) continue; std::string tensor_name = "tensor_" + i.first; - length = ConvertShapeToLength(i.second.shape()); + auto length = ConvertShapeToLength(i.second.shape()); std::string slength = std::to_string(length); if (i.second.type() == ETensorType::FLOAT) { - fGC += " auto hostBuf_"+i.first+" = alpaka::allocBuf(hostAcc,"+ slength+");\n"; - fGC += " std::memcpy(alpaka::getPtrNative(hostBuf_"+i.first+"), tensor_"+i.first+", "+slength+"* sizeof(float));\n"; - fGC += " alpaka::memcpy(queue, deviceBuf_"+i.first+", hostBuf"+i.first+", "+slength");\n"; + fGC += " auto hostBuf_"+i.first+" = alpaka::allocBuf(hostAcc,"+ slength+");\n"; + fGC += " std::memcpy(alpaka::getPtrNative(hostBuf_"+i.first+"), tensor_"+i.first+", "+slength+"* sizeof(float));\n"; + fGC += " alpaka::memcpy(queue, deviceBuf_"+i.first+", hostBuf"+i.first+", "+slength+");\n"; } else if (i.second.type() == ETensorType::DOUBLE) { - fGC += " auto hostBuf_"+i.first+" = alpaka::allocBuf(hostAcc,"+ slength+");\n"; - fGC += " std::memcpy(alpaka::getPtrNative(hostBuf_"+i.first+"), tensor_"+i.first+", "+slength+"* sizeof(doub;e));"; - fGC += " alpaka::memcpy(queue, deviceBuf_"+i.first+", hostBuf"+i.first+", "+slength");\n"; + fGC += " auto hostBuf_"+i.first+" = alpaka::allocBuf(hostAcc,"+ slength+");\n"; + fGC += " std::memcpy(alpaka::getPtrNative(hostBuf_"+i.first+"), tensor_"+i.first+", "+slength+"* sizeof(doub;e));"; + fGC += " alpaka::memcpy(queue, deviceBuf_"+i.first+", hostBuf"+i.first+", "+slength+");\n"; } else if (i.second.type() == ETensorType::INT64) { - fGC += " auto hostBuf_"+i.first+" = alpaka::allocBuf(hostAcc,"+ slength+");\n"; - fGC += " std::memcpy(alpaka::getPtrNative(hostBuf_"+i.first+"), tensor_"+i.first+", "+slength+"* sizeof(int64_t));"; - fGC += " alpaka::memcpy(queue, deviceBuf_"+i.first+", hostBuf"+i.first+", "+slength");\n"; + fGC += " auto hostBuf_"+i.first+" = alpaka::allocBuf(hostAcc,"+ slength+");\n"; + fGC += " std::memcpy(alpaka::getPtrNative(hostBuf_"+i.first+"), tensor_"+i.first+", "+slength+"* sizeof(int64_t));"; + fGC += " alpaka::memcpy(queue, deviceBuf_"+i.first+", hostBuf"+i.first+", "+slength+");\n"; } else { std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be read from a ROOT file"); } diff --git a/src/SOFIE_core/src/RModel_Base.cxx b/src/SOFIE_core/src/RModel_Base.cxx index a3392d8..b5524d1 100644 --- a/src/SOFIE_core/src/RModel_Base.cxx +++ b/src/SOFIE_core/src/RModel_Base.cxx @@ -59,7 +59,7 @@ void RModel_Base::GenerateHeaderInfo(std::string& hgname) { } void RModel_Base::GenerateHeaderInfo_GPU_ALPAKA(std::string& hgname) { - fGC += ("//Code generated automatically by TMVA for ALPAKA Inference of Model file [" + fFileName + "] at [" + fParseTime.substr(0, fParseTime.length()-1) +"] \n"); + fGC += ("//Code generated automatically by TMVA for GPU Inference using ALPAKA of Model file [" + fFileName + "] at [" + fParseTime.substr(0, fParseTime.length()-1) +"] \n"); // add header guards hgname = fName; std::transform(hgname.begin(), hgname.end(), hgname.begin(), [](unsigned char c) { @@ -87,6 +87,9 @@ void RModel_Base::GenerateHeaderInfo_GPU_ALPAKA(std::string& hgname) { if (fWeightFile == WeightFileType::RootBinary) fGC += "#include \"TFile.h\"\n"; + fGC += "\nusing Dim1D = alpaka::DimInt<1>;\n"; + fGC += "using Acc = alpaka::TagToAcc;\n"; + fGC += "using Queue = alpaka::Queue;\n"; fGC += "\nnamespace SOFIE_" + fName + "{\n"; } From 6481c052097eb1dd7fd550ddf8f24aab1d4a1d28 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Mon, 18 Aug 2025 15:43:27 +0200 Subject: [PATCH 04/43] fix: defining intermediate and initialized tensors --- Linear_16.dat | 40 -- Linear_16.hxx | 658 ---------------------- settings-dev.cmake | 7 + src/SOFIE_core/CMakeLists.txt | 1 + src/SOFIE_core/inc/SOFIE/RModel.hxx | 4 + src/SOFIE_core/inc/SOFIE/RModel_Base.hxx | 11 +- src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx | 5 + src/SOFIE_core/src/RModel.cxx | 407 ------------- src/SOFIE_core/src/RModel_ALPAKA.cxx | 366 ++++++++++++ src/SOFIE_core/src/RModel_Base.cxx | 5 +- src/SOFIE_core/src/SOFIE_common.cxx | 35 ++ 11 files changed, 428 insertions(+), 1111 deletions(-) delete mode 100644 Linear_16.dat delete mode 100644 Linear_16.hxx create mode 100644 settings-dev.cmake create mode 100644 src/SOFIE_core/src/RModel_ALPAKA.cxx diff --git a/Linear_16.dat b/Linear_16.dat deleted file mode 100644 index 873ce7d..0000000 --- a/Linear_16.dat +++ /dev/null @@ -1,40 +0,0 @@ -tensor_8weight 2500 -0.0268758684 0.139096066 0.0821818858 -0.127417535 -0.0831027254 0.109001353 -0.0448572189 0.0432091393 -0.100685023 -0.0782502964 -0.0569691472 -0.0834055692 -0.0914414823 -0.00128868222 0.114371844 0.157571077 -0.0249715224 -0.0275524613 -0.106611423 0.160815567 0.0850525424 -0.0246056858 0.0868391246 0.0197147224 0.0387364663 0.0334140956 -0.0329913124 0.110141195 0.105670758 -0.0897664875 -0.0678865984 0.0182914361 0.146356225 0.0747506022 -0.0347048417 0.0646456406 -0.0683225691 -0.0967762694 0.144724965 0.0968451351 -0.049604129 -0.0246048607 0.0982864648 -0.00104637037 -0.0540190488 0.02299482 -0.0587500408 0.162345782 -0.0178857595 -0.114502899 -0.0277074426 0.0523337275 -0.0407291614 -0.125408962 -0.0477996059 -0.144638136 -0.141282856 0.107945614 -0.0642622635 0.106897406 0.141127169 0.00702024298 0.115400836 0.0949773341 -0.0841375515 -0.029037755 -0.12251503 -0.113417722 0.152951673 -0.052355133 0.125115007 0.11263705 -0.0993821546 -0.100654982 0.13138859 -0.121526435 0.0688993633 0.0602294281 0.0230368655 -0.119217426 -0.131345615 -0.0928916186 0.0589227341 -0.0877812058 -0.0575322062 -0.0479355939 0.119958326 0.0839382187 0.0134669729 -0.120720349 -0.0728492588 -0.0201216638 -0.0426205285 0.0580729693 -0.0317371115 -0.0582037121 -0.058949165 -0.0108661382 -0.0596015975 0.0923921913 0.106575489 -0.00681856275 0.0882440805 -0.0621290579 -0.0726372078 -0.00623785472 0.0285876859 0.0697654709 0.0963460952 -0.0578520186 -0.0386559479 0.0133705661 -0.0272551179 0.0195438117 0.0346884355 -0.00187640428 0.0641605407 0.176762238 0.159317046 -0.0952501073 0.0985514522 -0.0871972367 -0.0842028037 -0.0397452265 0.17574358 -0.0538034029 0.136749208 -0.0399385504 0.068287164 0.0217975918 -0.132272243 -0.0182651877 0.105293095 0.00358554721 0.00108983321 -0.153865114 0.0111923162 0.168782786 0.0969837084 0.0112454593 -0.0346569866 -0.0916731507 -0.00954194739 0.154270783 -0.0877914429 0.0242955964 -0.0126784407 0.121400051 -0.0289624184 0.0689913705 0.0434036702 0.0646613985 0.0640042126 -0.0474287085 0.073149845 0.156802103 -0.041823104 0.0810273662 0.179701 -0.0470410772 -0.0788431466 -0.147018611 0.053253185 -0.0240673199 -0.0210381355 -0.0958639532 -0.0170583278 0.0516901463 -0.111291543 0.00283904956 0.142500415 0.141777232 0.126547039 0.129214615 0.0729232654 -0.0321790762 -0.143716827 -0.00954447314 0.172390178 0.0871036574 0.000518912973 0.103541978 0.00573523017 0.100544035 0.167853162 0.157549649 0.117853075 -0.0903918445 -0.00601014355 0.0462133735 -0.119286336 0.111245058 0.00672465004 -0.035454426 0.184626952 -0.0521864779 0.180116341 -0.0577540956 -0.0600765273 0.151180387 -0.0436708629 -0.119430825 0.163085073 0.0715407208 0.0878540799 0.0810162574 -0.0718293861 0.12325272 -0.0860322118 -0.122137249 0.00682034623 0.158898726 -0.109563902 -0.140805796 0.144035459 0.0911271796 -0.0533853769 0.158740863 -0.12681675 0.0929608271 -0.0734888241 -0.0542239025 -0.0844008029 -0.0349548869 0.0194364432 0.00317873154 -0.0542409308 -0.110601485 -0.0040136571 0.126150146 -0.0695957989 0.135092571 0.0206705686 0.0210149139 -0.175603613 -0.0048725917 0.0448048264 0.0753361583 -0.0960626155 -0.00809389353 0.00274648191 0.118843384 0.0924557075 -0.0390414186 0.104679853 -0.02499073 0.115688451 0.183578849 -0.0963831246 -0.0192914438 0.0245501548 0.132269129 0.0528996326 0.126745895 0.0650902316 0.142014235 -0.109039702 0.110978663 -0.168853745 -0.0998671725 -0.0272130556 -0.0755283609 -0.145343035 0.0856398046 -0.0465832911 0.156254068 -0.00759668648 0.0660862029 -0.128243685 0.123390384 -0.0730970129 0.0211296733 0.172928646 -0.0569610596 0.162485525 0.0430793129 0.148347437 0.0230896771 0.0979775786 0.0892469361 0.114915423 0.102117866 0.114598379 -0.0385860801 0.104682192 0.05711741 0.183385804 0.114157908 0.0803671777 0.121530138 0.0750841424 -0.0201519765 0.0892636031 -0.00840737578 -0.0380099379 0.0670845732 0.173552945 -0.0446153358 0.0231650621 -0.0720840693 0.104690835 0.113704339 0.0918174759 -0.025853835 0.0474199951 -0.0122872479 0.0429795273 -0.0552103594 0.00883762538 0.151448011 -0.0409595668 0.0949078426 0.0689887926 0.107225835 0.0818655714 0.0588729456 -0.0576343685 -0.0948910415 -0.0496109203 -0.10446807 0.187580436 0.165386483 -0.0410638899 0.109590538 -0.0093578482 -0.168643042 0.0453341343 0.0740079209 -0.0932418108 -0.136097178 0.0847565904 0.016970491 -0.196696535 0.0213545114 0.146830887 0.188490406 -0.104602233 0.102813676 -0.020301817 0.0581303798 0.154687196 0.0931936353 0.0230271649 -0.0598398224 0.00981738791 0.00165832066 0.130958691 -0.141922146 -0.0931093395 -0.0452646948 0.0832985789 -0.0752738565 0.108650707 -0.0635550246 0.161715269 -0.048298005 -0.0519500524 0.111362822 -0.0297681484 0.0919308066 0.00461465074 -0.123445027 -0.0583725758 0.0877097845 -0.0828031972 -0.0494801551 -0.0178236663 0.110060342 -0.104191855 0.00495020067 0.190953419 0.175235912 0.0748231113 -0.0354038626 -0.0866233632 0.0114633273 -0.0709704086 -0.0408563316 -0.00744438358 -0.12112467 -0.00320398994 -0.109712012 -0.13769187 -0.0328272358 0.091612272 0.14105013 0.146673918 -0.0514545403 0.202921212 0.0834511071 0.160404101 0.0601333193 0.0581494831 0.083636649 0.0984802917 0.0609594397 0.128031611 -0.0596118644 0.112030312 0.175436005 -0.0210987478 0.100863054 0.141590253 -0.014728453 -0.0364963971 0.0035578683 -0.0124314548 0.0569810681 0.0548986979 0.130580813 0.103102274 -0.0677055791 -0.116339654 0.129263833 0.162418574 0.122197464 -0.0109819323 0.0938043669 -0.0469912067 -0.101457044 0.131629422 -0.0877847597 -0.0320621915 0.0457580797 0.0759001374 -0.0854525268 0.0624620654 0.088354066 0.0471264385 0.122950502 -0.0319194868 0.0971357599 0.144149795 0.0968611538 -0.0324465856 -0.13455525 0.0447516218 -0.0679218769 -0.0809827521 0.0494714826 -0.0949900225 0.0311798677 -0.00109984947 0.174830928 -0.0281612556 0.163149565 0.0736394823 -0.0375521332 0.00539422035 -0.0927275494 -0.0925532579 0.0742847919 0.0994291157 0.127749816 0.0300972443 -0.0191503167 -0.0972991213 0.0944213718 -0.0106646148 0.0151962861 0.00275415881 0.0332029015 -0.0985995755 -0.0955503657 0.0529588386 -0.0463228486 -0.139574915 -0.105905958 0.0530111678 -0.153271616 0.00135927019 -0.018976111 0.0405978933 0.0479904711 0.0545446351 -0.114191957 0.141731873 -0.132812411 -0.0630234033 0.0933084786 0.0396189578 -0.0473725162 -0.0290426835 -0.123914912 -0.0582598001 0.152059436 0.0548362397 -0.0321423411 0.115950227 -0.120880082 0.184676751 -0.0445445627 -0.103703029 0.0245305933 -0.00212677591 -0.0626897737 -0.0121289967 0.0785561725 -0.0832984447 0.0998352543 0.117086425 -0.0671990365 -0.0363239795 0.0353550613 0.114468403 0.143954277 0.105897352 0.0256107412 0.156521618 0.0780752227 -0.0554250963 0.0736213177 -0.10541296 0.0503535867 0.00255402969 0.0666635558 0.129061893 -0.0195398014 0.0478001311 0.065228425 -0.0979058444 0.0814248547 0.108272545 -0.00994789507 -0.0218796581 -0.154623747 0.0106207961 -0.12293978 -0.00427472685 -0.126063108 -0.0116878618 0.100741506 -0.0546985939 -0.0451277271 0.0930468291 -0.0851750597 -0.0140047939 -0.123040549 0.132812724 0.0833404511 0.140870854 -0.120734856 -0.0805390403 -0.0502453148 -0.0170761105 0.00345065887 -0.0480272733 -0.0561171696 0.0876882076 0.0613627955 -0.0316582024 0.0100890994 0.0458408594 -0.135604486 -0.0192864686 -0.01957082 0.0726629794 -0.0564594492 -0.0693246424 0.0831580311 -0.123946451 0.0842915326 0.027340591 0.123881891 0.0634962171 0.0854125172 0.0679267496 0.0966168046 -0.029469654 0.0381903499 0.111267343 0.140807226 -0.13526763 0.0522036403 -0.115617715 0.0543578118 0.0461016595 0.103722617 0.0174795687 -0.136987507 -0.0600835234 0.0538240522 0.0903360397 0.0172370523 -0.0140279233 -0.114186123 0.0943125635 -0.0614755452 -0.0279850513 0.083064124 -0.12880753 -0.13455835 -0.0599042326 -0.0389251933 -0.0604324266 -0.0513332263 -0.0554876402 0.0233900547 -0.0640518144 0.111689016 0.0502607375 -0.00419057906 -0.0730830133 -0.0277305022 0.171559766 0.0534306914 0.00674414961 -0.107875511 -0.0510217324 -0.0838860199 -0.15236254 -0.138948157 -0.125519603 0.0523681492 -0.0187952798 0.114655808 0.0474532545 0.0917048305 -0.0550882407 0.0838057324 0.18853642 0.142427206 0.180868432 0.140680373 -0.0942524076 0.123159751 0.0897716284 -0.0308326464 0.0049529071 0.13588357 0.0297236629 -0.0363686383 -0.0588090122 0.12634854 0.0122025581 0.186096713 0.0920768976 0.0481046252 0.0876177624 0.0250588302 -0.0850643218 -0.0529115237 0.029226495 -0.0699693412 0.150488198 -0.0428842455 0.179663286 -0.0198406726 0.0218468606 0.197559595 0.0729278922 0.0885386169 -0.133979425 0.0167944431 -0.0360915139 0.0497089326 -0.0268492103 -0.0587182194 0.0121284872 -0.00810500979 -0.0885604918 -0.0682897642 -0.109051131 0.101431355 -0.10556107 -0.0689118356 0.0273847431 0.123891041 -0.0328962579 -0.0183387175 0.0236377716 -0.126516774 -0.027949512 0.125757441 -0.137005895 0.0159674287 -0.0881164894 0.0896662176 -0.0548697859 -0.0910438597 -0.126777187 0.0143643618 -0.0796068907 -0.0773626193 -0.0353754535 0.0982186347 0.102850467 0.0936983526 0.0350374728 0.0642853081 -0.0353903249 0.0034533143 -0.0836362615 -0.0474314392 0.137183502 0.00499179959 -0.0352529734 -0.12372198 0.0710547566 -0.0847075656 0.108061433 0.0962944925 -0.0228818804 0.0236922354 0.0593082607 -0.0698251426 -0.0753812417 -0.0950560495 -0.0748883784 -0.139509365 -0.0391269475 0.117235079 -0.0770111158 0.0286441594 -0.0478565544 0.0810799748 -0.0450968295 -0.0848289505 -0.0374233201 -0.0248766541 -0.0257886276 -0.00540667772 -0.131286308 -0.125986263 0.0405903906 -0.0291525051 -0.0074609369 -0.0744228065 0.0589668602 -0.0275227204 0.134439722 -0.116755374 -0.0779221952 0.0212557018 0.126257434 0.105313227 0.120735362 -0.0692541525 -0.0584569424 -0.108607799 -0.0476316065 -0.0588775352 0.0463445932 -0.133415371 -0.128379583 0.121760055 -0.0548802391 -0.0722203329 0.0508697033 -0.079833433 0.119531378 -0.0217971876 -0.105434492 -0.0522313938 -0.0453321934 0.107274927 0.0276630223 0.131097168 0.079335019 -0.114221223 0.0391028263 0.128627002 -0.0898075253 0.0599811226 0.072371535 0.0517965741 -0.0948484987 -0.00232080673 -0.120201647 -0.1168992 -0.163116753 0.101379991 -0.0693345442 -0.0656319857 0.0136408824 -0.0277835261 0.0546165146 -0.00200848537 0.105287716 0.021810092 0.101102382 -0.0842717886 -0.020271264 -0.121380635 0.0648328215 0.0722329915 0.0304982048 -0.0108427657 -0.0313236415 0.0242884308 0.0848189518 -0.00415426493 -0.129282877 -0.0663083941 -0.0568652116 -0.0136977984 0.0484237522 -0.136208966 -0.0747673362 -0.00170940161 0.00959950686 -0.0287488401 0.174732566 0.0778143853 -0.0412021503 0.138728648 -0.12335252 0.0248393398 -0.00735486019 -0.0928628147 -0.0812815279 0.125593081 0.0110786557 0.124990925 0.150338039 0.0616421662 -0.0968330279 0.11320933 0.116342612 -0.00344289024 0.141467705 -0.072798416 0.121146008 -0.0969213247 -0.0562434942 -0.0969665498 0.0179323703 -0.0307174679 0.0410963222 0.0908566862 -0.0271566976 0.181122735 0.176294565 -0.0137444139 -0.152425051 0.0303653441 -0.0654244274 0.103337444 0.161812425 -0.114469662 0.0337155983 0.0851140097 0.0473025665 -0.0455731675 0.0910733119 0.0064521106 0.187958792 0.167304024 -0.127127901 0.00861696992 -0.0400827006 -0.140295923 0.0512709506 0.0780323595 -0.0932431147 0.0957963392 -0.125637099 -0.0162038952 0.00915290881 0.0997759104 0.0987372771 0.16382876 -0.056870617 -0.139243662 -0.071242094 -0.0608208477 0.107261404 0.0251677446 -0.0958002061 -0.0900856256 0.0601827726 0.107842483 -0.0984033346 -0.00783828646 0.0255061835 0.00474396348 -0.0694380254 0.0950763747 -0.0441939719 0.0136436457 0.108352683 0.137562498 0.0213271081 0.0454172641 -0.0874122232 -0.089138791 0.0275239777 -0.0769107938 -0.0700656921 0.131777659 0.175489351 -0.0777074322 -0.00239577657 -0.00230550254 0.167611465 0.0103928242 -0.0727633685 -0.0352996625 0.00823523104 -0.0106461262 0.0824658424 -0.0121006668 -0.0598732941 -0.0662225783 0.0269689541 0.0804088712 0.138990924 0.149531111 -0.0406282917 -0.126480639 -0.025079472 0.0510983169 0.035402365 0.08281295 0.156712428 -0.026096575 -0.0651845187 -0.0323777311 -0.105685644 0.0783127025 0.0188494585 0.0856304839 -0.046786584 -0.0739144981 0.0625574216 0.127959684 -0.0416722037 0.114131734 -0.04018737 0.0335959457 -0.0786943138 -0.0593536906 0.000781891402 0.0818767101 0.07887806 0.0942715183 0.178498864 0.14772743 0.00345369685 -0.0423939079 -0.0205054302 0.123664357 0.0551863275 -0.177257061 0.114078067 0.0455558784 -0.0323475748 -0.112341911 0.0721865445 -0.0341178104 -0.0914598405 0.0694510341 -0.0585612506 -0.0373541526 -0.205118358 -0.0179533362 0.0257616416 0.18985191 0.101283662 0.0620856099 0.163825974 0.150054261 0.0351246744 -0.0134136677 0.0362584144 0.0490719676 -0.0219044462 0.100722261 -0.0236032922 -0.0624775924 -0.18531242 -0.0643399507 0.0405745842 0.0175180174 0.123290591 0.074898921 0.0684316009 -0.0228197258 0.138146341 -0.0247859173 -0.136237904 0.0807761028 0.190366209 -0.0662142709 0.0290480666 -0.0762866884 0.106889285 0.00406613294 0.00212845136 -0.0551334918 0.176173732 -0.000862196088 0.0479077958 0.12893793 0.0908120275 0.0428063385 -0.105808966 0.0208340362 -0.0391079783 -0.17646575 -0.0161272287 -0.0779476464 -0.139349103 0.132013753 0.0993892252 0.064087227 0.131257027 -0.0114984009 -0.120081656 0.0867618024 0.0161269289 0.0568408556 -0.0086016655 -0.00697259605 -0.127590686 0.00164337456 -0.106980473 -0.0617386699 -0.093155548 -0.0321060345 0.0353029482 -0.144390106 -0.041361127 -0.102071285 -0.0588951148 0.0818923414 -0.127334356 0.0141031453 -0.111001149 -0.123913996 -0.0247361958 -0.0820739791 -0.030570088 0.127384081 0.0231190175 -0.10356193 -0.139310062 -0.0380821303 -0.0285825692 0.126087889 -0.066886954 0.0766842216 -0.135645509 -0.0953988656 0.131382018 -0.12620239 -0.0145515203 0.134063303 0.0396169424 0.0967397094 0.119111016 -0.0184818357 0.176523507 -0.0199789405 0.0826793611 -0.110192202 -0.0409205034 0.00472770026 -0.14348729 0.147804692 0.0450261496 0.0670832992 -0.0345766172 0.126415744 0.00601782696 0.104479343 0.0414096117 -0.0710287988 0.112614326 -0.147158608 -0.0370420963 -0.119263552 0.126887798 -0.105801471 -0.00610316033 -0.105143495 0.196164653 -0.0159037225 -0.0744655356 0.132361174 0.0196442343 -0.0159279685 0.126357719 -0.029065378 0.0336539075 -0.168783128 0.0148825208 0.0555515438 0.0461699739 0.102379352 0.155959725 -0.137117967 0.0191216022 0.222972959 0.125019222 -0.0988391787 -0.0180195421 -0.00158139609 0.029359296 -0.142667904 0.103885561 -0.105973668 0.0307869632 0.00780402496 -0.0674499497 0.114393353 -0.127377525 0.00557687134 -0.061907284 -0.124339581 0.0993482098 -0.0195321329 -0.0585047677 0.0953318775 0.0480449647 -0.0227444768 0.0408569276 0.0562379509 0.0313135199 -0.132302389 0.125221208 -0.0873878524 -0.111026652 0.115592606 -0.0906311348 -0.00652401475 -0.127266601 -0.144731417 0.0330261067 -0.135340631 -0.1036596 0.0953472406 0.0560712516 -0.135941952 -0.040515393 -0.0903434008 -0.0961870179 0.119984761 -0.023610061 0.136370555 -0.0166805629 0.127074108 -0.0724001899 -0.0508536957 0.115113258 -0.0258387104 0.0489959568 -0.142882243 0.104940325 -0.109118342 -0.0262665749 -0.139981106 0.15954946 -0.00940728188 0.0964377075 0.0164540596 0.0535212867 0.189132586 -0.035037268 -0.0510806404 -0.0423220247 -0.0387359485 0.0559357852 -0.0354634076 0.174487337 -0.0849912167 -0.124549776 0.0769607276 -0.0557537489 -0.0945133492 0.152356565 -0.0749799982 0.124006495 -0.11373242 0.0692153648 -0.0678370595 -0.0376192741 -0.0456925295 0.0221248977 0.0522562191 -0.098749496 0.0200695693 0.122223869 -0.0449365303 0.0366582051 -0.135746583 -0.0222668201 -0.00500035612 0.0176082794 0.128107294 -0.0137729133 0.0214566886 0.110726222 0.150741264 -0.0353633799 0.159352034 0.0344046839 0.105027668 -0.0436317027 -0.140568197 -0.101473704 -0.032741949 -0.0106838001 -0.0316685364 -0.129210651 -0.0679190904 0.133524075 0.123635188 0.10515888 -0.0406672806 -0.0610394813 0.091179423 -0.110931419 0.079060778 0.116783403 -0.0516342819 -0.13402909 0.0207334459 0.0812019557 -0.0820832253 0.0703516304 -0.084283106 -0.143779725 0.0990532935 -0.0511374213 -0.0828005821 -0.0313504227 0.108964168 -0.0947234705 -0.129062966 -0.0215799771 0.0714171082 -0.013629063 0.100170761 0.121061251 -0.122967482 0.0534396805 -0.104779765 -0.0699278712 0.0862568319 0.0739753321 0.0636002868 0.113115802 -0.0251864307 0.0644432828 0.00374182384 0.00278152619 0.0277899243 -0.0400727838 -0.0934138894 0.0662064999 0.0740315318 0.156935647 -0.125642329 -0.0181016717 0.0717086047 -0.0788133815 -0.127949879 -0.0690763518 0.0889543295 -0.172350034 -0.100477748 0.00906703342 -0.0588162839 0.0321615078 0.143424392 0.0124900788 -0.0917625949 -0.0731047541 -0.0883597806 0.193930492 -0.0296085142 0.153995425 0.056007009 0.0701433122 0.0177569669 -0.0888565779 -0.0714818016 -0.00891659409 0.14154695 0.186709836 -0.0978443697 -0.0666612759 0.00330674648 0.15638712 -0.0387458205 -0.0156392194 0.029518418 0.177299723 -0.00526926899 -0.03754526 0.142723694 0.0702423528 -0.0628082901 -0.0962519944 -0.0736426339 0.00633251155 -0.0707057118 -0.0702924654 0.0923877209 0.00983795524 0.0308573246 0.117168695 -0.112862423 0.142043695 0.0422373824 -0.0367415026 -0.106207736 -0.0756792426 0.117217235 -0.0436312594 0.0131786875 -0.0440221652 0.097473219 -0.0796951875 -0.125066265 0.124969348 0.0678982735 0.139240772 -0.076055415 -0.025149785 0.022765873 0.00105297181 -0.0667744279 -0.142704338 0.0969702899 -0.131543919 -0.0815857351 0.107110865 0.00967122614 -0.0012585416 0.122922324 -0.0521417297 0.0298166722 0.069311209 0.0601125322 0.0429519527 0.0771004632 0.138341069 0.0400493145 0.0989085436 -0.115988038 0.0790331438 0.0363655277 0.0255561695 -0.127918124 0.0311952345 -0.0788384601 0.0947113633 0.144414648 -0.0196468234 0.0516601503 0.0307283401 0.0900717825 -0.0476232618 0.0064008832 0.0990933776 -0.105707288 0.0693743229 -0.124773592 0.0183412433 0.0536187291 -0.0942984521 -0.108814135 0.0376636833 0.0153515637 0.00406998396 0.0265448689 -0.135168463 -0.120080709 -0.121890008 -0.0570892245 -0.124603435 -0.0809690952 -0.0672037601 0.13564612 0.0371975005 0.112579718 0.014319554 -0.108871549 -0.0871257633 0.0682478845 0.0747066289 -0.0636163577 0.0898959637 -0.090092048 0.129719719 -0.0293056145 0.026599288 0.0581899136 0.0231975913 -0.0209574401 -0.00654032826 0.0904182643 -0.114409715 -0.0957838446 -0.044936955 0.0165019929 0.135205165 -0.117184259 0.0401535928 0.134790704 -0.0130638136 -0.0190193728 -0.0656322092 -0.113029599 0.00841842592 0.0140976086 -0.0268416889 0.0387401059 -0.00842970423 0.153014824 0.0431912951 0.0146862119 0.0648952872 0.159487918 -0.107046999 -0.0957565159 0.0974680409 0.0875119492 -0.106558517 -0.00956180599 -0.10792207 0.0139202345 0.0237691645 0.170654655 0.182866856 0.0337778889 -0.122317858 0.104945458 -0.00241611805 -0.0189588871 0.154625118 0.0883154273 0.0683931634 0.0424042568 -0.0296660978 0.0563843139 0.0966898203 -0.00994552113 0.18443881 -0.146624371 -0.0824042782 0.116611265 -0.108186543 0.0102908229 0.103478007 0.0179831069 0.131399289 -0.0788777545 -0.0424850732 -0.00943686068 0.083427988 -0.137433812 -0.0903602764 -0.105054028 0.124498554 0.088755466 -0.0493076742 -0.027949756 0.0751230642 0.115139447 -0.127784625 -0.0843564868 0.1399187 0.077144593 0.0511633307 -0.031733308 0.00182465685 0.149220034 -0.108540453 0.0384725034 0.0657235011 -0.150511175 0.0859548301 -0.031602826 0.153693542 0.100388199 0.153390184 -0.000678598415 -0.0295467041 0.107720926 0.0790676847 0.075719431 -0.0342444293 -0.105272986 -0.101255842 0.17369619 -0.0664026737 0.0892078429 -0.113553904 -0.0292268191 0.180186689 0.109818101 0.0164926779 0.0922102034 0.0830212384 -0.00248041586 -0.0470679849 -0.165345639 0.12901403 -0.0142368376 0.0695406124 -0.0135625293 -0.106628664 0.00303458911 -0.0594627149 0.0922242925 -0.0427582636 -0.0400496349 0.178693265 0.183653072 0.0575503781 -0.0235571191 0.048922874 -0.00795071851 0.0807336569 -0.0162454005 0.0234919712 -0.102429815 0.0907384083 0.126808628 0.0676393136 -0.167194471 0.0440359078 0.137444958 0.196354747 -0.0737531483 0.0279132333 0.123419479 0.058315713 0.0996660143 0.122060843 -0.0147102922 -0.121966586 -0.0944622681 0.142329201 -0.0832371339 -0.0505962893 0.133063897 0.134808093 -0.0476008505 0.0899724364 -0.00881881081 0.129822224 -0.0857772455 0.0220859721 0.128572404 0.0105826855 0.102347367 -0.0834473595 0.0246756226 0.0283253919 0.0684853047 0.101052776 -0.12661618 -0.086164698 -0.0609710813 -0.0304403771 -0.0863657966 -0.155161664 -0.00632416084 -0.00467219949 -0.137039587 0.111212932 0.166964158 0.0581296235 -0.103098728 0.0281474199 0.0514760315 -0.0765168592 0.0823773816 0.112306684 -0.119962633 0.108718097 -0.0627609268 0.0295355972 0.061364796 0.0683022588 0.164325893 -0.112472534 0.164340407 0.0510179065 0.00963465869 -0.0768766776 -0.04377589 0.096517235 0.148181275 -0.0579664111 0.0587554127 -0.0445416085 -0.03864979 0.00753601873 0.100015543 -0.0362141766 0.133224964 -0.0191601235 0.152707025 -0.0183888227 -0.059442617 0.104400992 -0.0854767412 -0.00690022996 0.0886835605 -0.0273776986 0.130314052 0.105323426 0.161530361 -0.108458608 0.0788582712 -0.0838668495 -0.08755178 -0.102095522 -0.096060887 0.169443905 0.0577232093 0.120626166 -0.0495226867 0.096482262 0.0348549932 0.199682817 -0.0202110633 0.0846792087 -0.0826675221 0.168889627 0.0768956468 0.0698982626 0.0966169164 0.14701435 0.00816824846 0.0694516674 -0.0396548584 0.109372504 -0.0630989447 0.117110327 0.0741739869 0.0132170692 0.100324839 0.00397197716 0.00673523871 0.0452416129 0.0100933397 0.181526616 -0.131816193 -0.0729396716 0.0076587908 0.0789732337 -0.0381261818 -0.082727015 -0.112304315 0.0861935169 0.0106273741 -0.114372075 0.0639646724 -0.0495705158 -0.0882112607 -0.0384016633 0.110038161 -0.0208555609 0.0701313913 -0.0773748457 0.078994669 -0.0506972298 -0.126550719 0.0909916982 0.00305084884 -0.128831208 -0.0751241222 0.134015068 -0.0960550979 -0.0882394835 -0.0782357231 -0.0271630995 -0.0772069469 -0.118653722 -0.0367000699 0.121580288 -0.0561355688 -0.138331473 0.125544876 -0.0296058543 0.0746538565 -0.0162153672 0.062437132 -0.0231160969 0.0841860995 0.0602102727 0.124720961 -0.0469560064 -0.140982583 -0.136888638 -0.0804962814 -0.0440254994 -0.107610121 -0.0446921699 -0.0253842529 -0.0920281038 -0.102073133 0.0864460468 -0.0521458536 -0.0281716138 -0.12248721 0.108246624 0.0153880091 -0.00498063862 -0.0892293677 -0.10190247 0.144978091 -0.0280745663 0.0683950707 -0.0395756029 0.0730759278 -0.00125575683 -0.0320034325 0.0139094684 0.127148211 0.00908912718 0.074735418 0.0604887865 -0.0297355298 0.06545984 -0.0896448418 0.131814942 0.0968025178 0.0828945488 0.0279728677 0.0131593319 -0.117338456 0.0965867341 -0.00240561157 0.10527093 0.0450791791 0.020316802 -0.00228017569 0.00896368176 0.0269619301 0.150818229 0.155888513 0.116388619 -0.137444928 -0.0795316845 0.119856015 0.0243596714 -0.113116957 0.0804922581 -0.0587314703 0.169540256 0.139007777 0.139136776 -0.0298109893 0.0328682661 0.0793280452 -0.132538036 0.10372372 -0.0968293995 -0.048270233 0.0598262101 -0.0930386782 0.00116990507 0.0488549769 0.034680441 0.0733197182 -0.0573430657 0.100507811 0.07086052 -0.101341262 -0.0982980579 0.124785289 0.075397715 -0.106355786 0.00812490284 0.118339553 -0.056385763 -0.073536776 -0.091773212 0.137454107 0.0820370913 -0.0450648963 -0.104219824 -0.0738624409 -0.0940235406 -0.0482450277 0.101593941 0.0674851388 -0.088429369 -0.0648159012 -0.0145470053 -0.138597056 0.086754784 -0.0615454912 0.0661340803 -0.0228997692 0.117914066 -0.0457687825 0.0386412889 -0.139924914 0.0699922591 -0.0633266196 -0.0395022333 -0.0829551816 0.123344138 -0.0269175917 -0.112848774 -0.127718255 0.0982895121 0.00638221437 -0.0981410667 0.0559622087 -0.0958316401 0.0377015024 -0.0980365872 -0.0954344869 -0.0140493829 0.0955453813 0.0694573075 0.132618234 -0.00649338961 0.162486911 -0.0644210577 0.0785710365 -0.0324906185 0.0616738945 0.131774053 -0.065818958 0.171095803 0.17064096 -0.0761323273 0.0750825778 0.0011169787 -0.0321993567 0.0106129069 0.0721838102 0.0231869202 0.0562860221 -0.00459621055 0.0595190637 -0.0840448812 -0.081757158 -0.0908710882 0.0917035788 -0.157435328 -0.0259377975 0.137452871 0.0223723184 0.0868041068 0.120598882 0.00937895011 0.155116081 -0.100786671 0.0592786036 -0.133716181 -0.080596447 0.0356213599 0.00931102037 -0.0950432196 -0.0769308135 0.0528798848 -0.120534495 0.00211757421 0.0449208915 0.0501613319 -0.0820226222 -0.0251336843 -0.0144774914 0.0143801719 -0.0881446749 -0.123702742 -0.0779745206 -0.139141038 0.0531492084 -0.12054643 0.0143575966 0.10409309 0.0117436498 -0.0403948873 -0.110459745 -0.0662581548 -0.1114856 0.00284221768 -0.052574873 0.0127746612 0.0721953511 -0.0164361224 0.0638382882 -0.0129706711 -0.0614550114 -0.060835205 -0.0864404589 -0.132438704 -0.108277529 -0.105507694 -0.0632813722 0.0298161656 -0.0744545162 0.0407374054 0.0746406019 -0.106669813 -0.10373731 0.12445356 0.0397888571 0.0220787525 0.0202515423 0.12489415 0.132801518 0.0626152232 0.0729931891 0.0667951256 -0.0493182763 -0.0674306005 0.0432554148 -0.0183124356 -0.104636811 0.063214466 -0.128902912 0.105252452 0.082741566 0.0974095464 -0.0517911017 -0.022460917 0.0845701918 0.00974364486 0.0755192861 0.122211002 -0.0929605439 -0.0323449485 -0.00275745941 0.13703306 -0.126241356 -0.0444845371 -0.0595683604 0.0420802012 -0.121947996 -0.0962189585 0.034678936 0.133177251 0.0845321864 0.0163415857 0.0860773325 -0.0293091722 -0.0457063392 0.116976917 -0.112045035 -0.114811443 -0.0518591814 0.0787069798 0.00974517968 0.11453367 -0.115365967 -0.0442552418 0.0140983164 -0.0719776675 -0.0414564312 -0.00496765925 -0.0418873541 0.0135209961 0.150040343 0.018279193 0.0670056716 -0.0128589002 0.00129946775 -0.0175037291 -0.10695336 -0.0812502131 -0.00151915848 0.105532125 0.156881258 0.107406408 0.0299301185 0.0623822287 0.105002947 -0.00692248205 -0.0561903454 0.0528496578 0.0645767525 -0.0349360071 0.0887037516 0.0392689034 0.160266221 0.0597992055 0.0948610157 0.0702525973 0.0522093065 0.116412245 0.0978843421 -0.148455024 -0.0602231361 0.0339148492 0.0785273239 0.143222392 -0.1303702 -0.0237084106 0.00480483705 -0.00544637674 0.123185195 0.066207394 0.138298839 -0.0356794894 0.176579177 0.0118068606 -0.0892722458 0.110782906 -0.105181009 0.0947949737 0.0951533318 -0.0448544845 -0.0301951393 -0.0468887351 -0.00123819872 0.101715624 0.0727012604 0.0642970651 -0.0423549041 0.0688230544 0.104760513 0.101078875 -0.0486233123 -0.0383191928 0.00411880249 0.110782482 -0.0667161196 -0.115577795 -0.107555278 -0.0455098785 0.0147148855 -0.0387868471 0.119055025 0.113614053 -0.0650238395 -0.116119511 0.0904611796 0.0928509012 0.036130257 0.063363038 0.088962093 -0.0774177462 0.00342554948 -0.0879331529 0.0105287833 -0.150841638 0.0330097973 -0.101222105 0.000547326345 0.0222531687 0.00177719572 -0.168144733 -0.102801181 0.127630353 -0.0044782632 -0.0718901828 -0.0688694715 -0.107435137 -0.00648547709 -0.137246579 0.116464987 0.0476300418 0.0752717406 0.0731578618 0.100494623 -0.0570759401 -0.0219588652 0.060869351 -0.0204062089 0.119302526 0.110057697 -0.0576427504 0.0296158791 -0.0681548789 -0.0114316642 -0.132044569 -0.0581466183 0.00203379989 -0.113137707 -0.0557623059 -0.0389640704 -0.129291847 0.121296927 -0.00863479078 0.109711155 -0.0720649809 -0.114019588 -0.0326023102 0.047520563 0.13592954 -0.136486098 -0.138339326 -0.130544424 -0.0851323009 0.0625912994 -0.0132746696 -0.0394350886 0.106071725 0.0412790775 -0.0212111101 -0.110249251 0.128628239 -0.00126694143 -0.0719275251 -0.0233325437 -0.0285679474 -0.119621359 0.0375062078 -0.0438100025 0.127980903 0.0915731937 0.0225412827 -0.0376331583 0.0345656835 0.135839269 -0.152139679 -0.00529241795 0.0757251382 -0.0507489964 0.0434143096 -0.109213173 -0.0232270882 0.110102899 -0.11542847 0.178933024 -0.146328598 0.080565691 -0.0281426851 -0.0798788965 -0.0825010538 0.102853604 0.176710308 0.105933264 0.142999679 0.0393511392 0.0469196737 0.155381039 -0.0202247016 0.170517668 0.00554223079 -0.067655623 0.128527895 0.00835985132 0.192383677 0.133679509 0.109696992 0.124087319 -0.0682987794 -0.0266768672 -0.0692853928 -0.15578717 0.111135691 0.152784228 0.182785735 0.115072496 -0.0234794691 -0.14098835 -0.0995724574 -0.0710255876 -0.0245003197 -0.121010661 0.214906275 0.126054928 0.0240032822 -0.0867983475 0.0794893727 -0.0287744384 -0.0114687914 -0.0225537177 0.00640312536 0.0122232735 0.148882598 -0.0123748779 -0.0145422816 -0.0797223598 -0.0824621096 0.050172396 0.197323322 0.0408616215 0.165366396 0.141404614 0.135947406 -0.0240413714 -0.115962207 0.193585813 0.0844455436 -0.0809815899 0.17395325 0.0107643139 -0.0946478769 -0.0715151504 -0.0346882716 0.0626753345 0.181155458 0.146124348 0.050194148 -0.016578801 -0.0884145498 -0.119957604 -0.0384309553 0.0239939895 0.071738176 -0.0269928221 -0.0424483791 0.0305357967 0.129883602 0.143514901 0.133759692 0.0695038289 -0.000178681847 -0.0580186956 -0.0775882527 0.136189267 -0.0727865323 0.0678651482 -0.049817346 -0.0649325028 -0.0088552665 0.156783015 -0.048809994 -0.0406761616 0.158696339 0.0890753791 0.136065736 0.160149634 -0.0645535365 -0.111809649 -0.0370648354 0.194843888 0.0213514157 0.102395862 -0.0400028452 0.0761639178 0.0394547395 0.0327902511 0.162416309 -0.00130897725 0.020087108 -0.0961144641 0.0304949749 -0.045121409 -0.0313251726 0.085803628 0.0291031022 -0.0910456851 0.0660064593 -0.068344146 -0.0507363826 0.0779818743 0.14303115 -0.0030358301 0.0674030483 0.147154242 0.0136561031 0.054678835 0.0916109383 -0.108775541 -0.0925002992 0.0767795593 0.0127289426 -0.0589483082 -0.119000398 -0.121735357 -0.0326918289 0.137503535 0.119247034 0.0430034138 0.0618253574 -0.0975219831 0.0736228079 -0.0372737274 0.153161958 -0.0518422537 -0.0204763189 -0.0608311482 -0.0457191877 0.147904009 0.0655161589 0.000597919687 -0.0326539725 -0.155328959 0.167974561 -0.0343649164 0.130426079 0.00636828598 0.141477138 0.057434544 -0.0446176901 -0.0856851637 -0.0112518054 -0.0844905823 -0.0406574272 -0.153341204 -0.0638041422 0.0856886953 0.0646770895 0.130005434 -0.0040921187 -0.0604991764 0.0163501818 -0.0783527344 0.120934926 0.161637381 0.115128227 -0.014278437 0.0813159347 -0.0724511221 0.0282054543 -0.00229437649 0.0406513065 -0.0661629364 0.0403244048 0.0202239044 -0.0395012945 -0.0349203013 -0.054835394 0.0276283957 0.0147262886 0.171100989 0.0777800605 0.113793746 0.029462589 -0.0530196279 -0.115720108 0.167530239 -0.0646177605 0.163341776 0.0519124195 -0.0436448865 -0.00622291025 -0.118472219 -0.11200767 -0.0293926019 -0.0851374194 -0.0335079357 -0.00235709315 -0.114089273 -0.125252411 0.0811661184 0.143217117 0.0657678992 -0.144908518 0.074894011 0.0680066049 0.0251119025 0.0237030108 0.130361617 -0.117028616 0.0458782166 -0.0999599174 0.0947599187 0.125303924 0.124342829 0.0346639715 0.0599663034 0.00829797983 -0.12561053 0.0641372502 0.076463908 0.0719346106 -0.0685527846 0.0804899856 0.133784577 0.0426442474 0.108722381 -0.120919384 0.0425808728 0.0937603563 -0.0493984073 -0.101851352 -0.0743994713 -0.0168575719 -0.0864764303 0.134012744 -0.0345991999 0.0765475258 -0.0502674989 0.148546934 0.154048041 0.112422884 -0.0310489275 0.0740677267 0.124277003 0.11085771 0.0675311983 0.0243612733 0.0620236471 0.0995759219 0.168544546 0.00101517653 -0.0610326529 0.0783934444 -0.0371061936 0.0923965722 -0.0212610923 0.133722678 0.0999392346 -0.089509137 0.0711376369 0.112333678 0.0204899628 0.17973493 0.0402919464 0.126110092 -0.00392504036 -0.000692084432 -0.0994881168 0.053651616 0.00273740292 -0.0712720156 -0.0218583867 0.00874059927 0.0359456241 0.0621751361 0.000342633168 0.0569748171 -0.0946905017 0.00123212801 0.142329782 -0.0667219386 -0.0533551276 -0.0563121587 0.071681805 0.101482138 0.161976591 -0.106341578 0.186979875 0.0426207557 0.140588462 0.0434038043 -0.0569239818 0.164891273 0.140453205 0.0955060944 0.0860626772 0.151230052 0.187594429 -0.18127653 0.131841645 0.15597482 -0.131366268 -0.165060341 0.128387749 -0.0202594791 0.0415013544 -0.0959378406 -0.0706115887 -0.121529371 0.0975343287 0.0213517249 0.183627069 0.0060459557 0.00643412722 0.113227792 0.169127882 -0.109208152 -0.151426241 -0.00370581448 0.0630536079 0.108941384 0.0786992684 0.0706410259 0.0392542407 -0.127687827 0.0440069884 0.0562533028 0.0949133858 -0.0812414587 0.092598483 0.0175310317 0.0891861469 -0.045317024 0.136054009 0.0259930789 0.00634265412 0.0573615059 -0.114974082 0.0966024846 -0.112975411 -0.0831556097 -0.0627890527 -0.00156623824 -0.0914661735 0.0832677707 -0.0836677551 0.0405839272 -0.072140947 0.0206351802 0.0579435751 0.175754473 0.0569373667 -0.0288151708 -0.133179188 0.0659383461 -0.0622974038 0.111952148 -0.0266913269 0.0549159199 0.126251087 -0.0655740872 0.103494681 0.000715725822 0.092549786 0.0262457915 -0.0120499283 -0.134702772 0.0988872126 -tensor_8bias 50 -0.0448136181 -0.0294532757 0.00591958454 -0.0112828789 0.0547700003 0.102279283 0.00554918963 0.0933698788 0.138683245 0.153071642 -0.0246890131 -0.066205956 0.0102847284 -0.0217106864 -0.11153923 -0.0833024532 0.0690509453 0.0574259795 0.0326761454 0.048058711 0.0932174474 0.173286349 0.0437983349 0.0692929476 -0.1425194 0.0164392311 -0.0525733009 -0.0926198289 0.01558726 0.124148585 0.159763724 -0.112289928 0.122134581 -0.0329846852 0.123975173 0.00884330273 -0.125247195 -0.108203024 -0.0963885933 0.12722528 0.105277926 -0.0898397416 0.108396716 0.133004621 0.111592449 -0.0548007637 0.112471558 0.0952548459 -0.0418147035 0.0495906435 -tensor_4bias 50 -0.0420062914 -0.0531011894 -0.0405919701 0.147642136 -0.0448930375 -0.0946018249 0.0368757285 0.0895275325 -0.00135793048 -0.0465053245 0.104558863 0.0464918055 -0.0928135291 0.145776987 -0.0437397324 0.0744188651 -0.0975865945 0.0791935027 -0.0783651695 0.0380954593 -0.0641139522 0.0319918618 0.0519438572 0.00847010501 0.124498516 0.182475775 -0.0537090674 0.0583103821 -0.0401648097 0.0082509499 -0.0618926026 -0.122952975 0.0772916004 0.014789585 0.101875983 0.0958903432 0.064464353 0.0122809373 0.149964184 -0.141134128 -0.0849211961 -0.0111745978 -0.0645377114 -0.0344211683 0.0628582314 0.0434207059 -0.0433468781 -0.0299602263 0.15525946 -0.0448016711 -tensor_2weight 2500 --0.0597149245 -0.0791020989 -0.00306093879 0.113323435 0.118636928 -0.0843338519 -0.109422937 0.0164578613 0.168519169 -0.0703572854 0.0312314406 0.0899977908 0.0896739215 -0.0900451988 -0.057600379 0.0125688771 0.0722137764 -0.0290169287 -0.0694356412 -0.111381322 0.0917039365 0.00489026168 -0.0580901131 0.183314383 0.195475265 -0.12944217 -0.0534728765 0.074898228 0.104391731 0.123983808 -0.013343907 -0.112780578 0.012140803 -0.086059548 -0.0357166752 -0.0239756703 0.114319615 0.0447655618 -0.0479144566 0.0672920421 -0.039890483 -0.0342019647 0.170793653 -0.0611885674 0.128305733 0.0986138955 -0.0286394898 -0.0084637003 -0.141880184 0.0852712765 -0.0972362906 -0.00365298078 -0.108331524 -0.0803529769 0.179286033 0.0825248212 -0.0778654292 -0.0261579026 0.0222861301 0.199497893 -0.0576646812 0.142493397 0.018432891 -0.0569059029 0.0996442288 -0.0431534536 -0.0794040635 0.136226013 -0.0141376657 -0.0539442487 -0.133499324 -0.0887252018 -0.0284489784 -0.0330936722 -0.03493331 0.0510139801 0.192286044 -0.00151121407 -0.0730649382 0.136111543 0.162208974 -0.115568712 0.176949784 0.0509604737 -0.140759781 0.0942156538 0.15726684 0.0260999966 -0.0726049989 -0.0243513957 0.156701684 0.138213098 0.112526298 0.0941351131 0.104868479 0.105548747 -0.0304395221 0.0303013697 0.162006006 0.100969627 0.145671651 -0.0650625825 0.0855033845 0.0336373001 0.141778961 -0.0337854326 -0.00864057243 -0.0735450611 0.0464367941 -0.0596558116 0.0623771138 0.14349483 0.0591385625 -0.00258940901 -0.0122495294 0.14376843 -0.0750882924 -0.0664319023 0.0305001531 0.0184416007 0.02046955 0.0551448241 -0.0694528297 -0.0207397975 0.154329836 0.0494214594 0.0845211819 0.16324687 0.0757716969 0.0634511784 0.120605588 -0.113957532 -0.0832520127 -0.0171713699 -0.0601701811 0.148658082 0.0899651572 0.118677244 0.0283228904 -0.0590552986 0.0797857642 0.0911054611 -0.022215249 0.176669434 0.000942089071 0.112969555 0.105361097 -0.0645927563 0.103734575 -0.0436463058 -0.0349569395 0.115449831 0.0422306731 -0.0804883987 0.0807694271 -0.0505034067 0.00729625719 0.137707859 -0.0488397889 0.162600547 0.15114215 0.0636213571 0.00903507788 0.128289327 0.163847417 0.000159272255 0.0834238008 -0.104029171 -0.0793354735 0.0541718863 0.00707805855 0.077409409 -0.00238326658 0.125607908 0.0396535546 -0.0790733248 0.0564618595 0.100612111 -0.0357064828 0.117824152 0.132536173 -0.0289113428 -0.014852941 -0.0426625349 0.0135453995 0.103636682 -0.0972069129 0.0516828299 -0.00995481107 0.0232977849 0.0937414765 -0.023261575 -0.0417088531 0.0130363097 -0.14154911 0.0702126473 0.00403433712 -0.0650982484 -0.0789552182 0.216502696 0.122806698 0.027723331 0.063748695 -0.0578081496 -0.0157720149 0.0400142148 0.133040145 0.0334649682 0.0875510424 0.110794596 0.0254984461 -0.0512416363 0.0211649723 -0.143576398 -0.0205686055 -0.111181781 0.0162975509 0.121590719 0.0656936541 0.155964255 0.0245984644 0.0352118239 0.133722454 -0.0262214299 -0.0336278044 0.156469122 -0.13011755 -0.027528204 -0.0602145456 -0.0930233747 0.0099506909 -0.0182043407 -0.118824221 -0.00373798492 0.178733543 0.00827211235 -0.0456761308 -0.0721783042 0.00670965109 -0.0409170277 0.00431948341 0.124081343 -0.0710947514 -0.104117736 0.093746461 0.171907842 0.110169716 -0.070081532 -0.0667723492 0.125274718 -0.0586081445 0.139502883 0.177527696 0.0687526166 -0.0820335746 -0.0490859933 -0.12959671 0.124665432 -0.0872184113 0.0991814062 0.0363627896 0.190564334 -0.0296370834 0.0762037039 0.0642659366 -0.0918578207 -0.054685194 -0.0458993316 0.146039933 0.0528010353 -0.0662797019 0.00561331725 -0.01142208 0.0815358981 0.0418767408 0.110681847 -0.00722674327 0.130719125 0.139407441 0.0292424969 -0.0270317923 0.0958031863 -0.0573824011 0.12932986 -0.043775145 0.059319146 -0.0913528278 0.115791552 0.078004472 0.115792975 0.107448012 -0.0748391598 0.0529222861 0.13462083 -0.141233921 0.166953042 0.168474525 -0.0700130537 -0.117624134 -0.00714296196 0.0268919822 0.163626537 0.0181761291 -0.0640345961 -0.0449223928 -0.141952619 -0.0284713078 0.147408575 0.139610574 -0.0779195204 0.106946297 0.117024481 -0.0941873938 0.09258876 -0.00288540404 -0.0543360636 0.0990853012 -0.0131437555 -0.0769185126 0.0146610877 0.0856351554 -0.090552628 0.124525517 0.072334148 0.00881079119 0.0441620275 -0.0116904415 -0.108310528 -0.0406595394 0.0195690114 0.0474229716 0.08090958 0.0409525596 0.077940464 -0.121437781 -0.0896261111 -0.134390622 0.099559769 0.107502699 0.0738855898 -0.0311849546 0.12491411 0.0958716646 0.048406072 0.0154622868 -0.130314365 0.148058236 0.00762006547 -0.0898886994 0.144507095 -0.0986621678 0.0791233629 0.0717348233 0.137725651 0.0972002074 0.0856728703 0.0490715429 -0.0558436215 0.177653775 -0.0812159926 0.174190253 -0.0374299698 -0.0888636857 0.0568164624 0.0539831966 -0.046500802 -0.088104479 -0.0324098729 0.123006575 0.174390927 -0.0655597001 0.118238717 0.165678978 0.115315504 0.149962306 -0.0967894346 0.0218543001 -0.0471816473 0.136843204 0.0418579951 0.130341902 -0.10788656 -0.0118869822 0.0904047042 0.10771846 -0.0203160401 0.0716004148 0.121576704 0.114085183 0.0813911036 -0.0706418529 0.0724584237 0.0249532741 0.156553373 -0.00865705032 0.134671107 0.0270873979 0.0121872211 -0.000827496988 -0.103484429 0.12091063 0.0684384331 -0.112646192 -0.0716026947 0.0865510404 -0.0961387679 -0.0992462859 -0.014073588 0.0901760384 -0.0329191796 -0.00509604625 0.0300773419 -0.113896236 0.0637915656 0.176874548 -0.0267044064 0.12591213 0.0827189684 0.00802489929 -0.0155225964 0.139007181 -0.0314813517 -0.0244915821 0.0454487316 0.113499463 0.147255525 0.0290668719 0.0196187459 -0.0756559074 -0.0474474952 0.000423966238 -0.125565693 -0.142974168 0.0265704822 0.100150622 0.124454536 0.128189385 -0.125751778 -0.0660192817 -0.0496372506 -0.025079174 -0.0945867226 0.00687600998 -0.108164005 -0.0449875742 -0.0757939294 0.0345570296 -0.0277413465 -0.0288163945 -0.0649622455 0.00885617267 0.0745153949 -0.0630018637 -0.00193145883 0.0763816684 0.156405032 -0.0854697376 -0.0829446241 0.0749762207 -0.0894886181 0.00361103215 0.0892253667 -0.00260828738 -0.0638676211 0.00824388769 -0.0162695311 0.0992859229 0.0285193995 -0.0495389216 0.0868888199 0.0549531169 -0.0304261018 -0.0182636939 -0.0249298904 0.159364238 -0.0837972984 0.11065764 0.0529022627 0.010110856 0.110683426 0.0919133052 0.0737009645 0.0965587646 0.0305129029 -0.0127110174 0.0697814003 0.103699945 -0.0261213128 0.170093238 -0.0687487945 -0.12052843 -0.104825832 -0.126111925 0.142499581 -0.128851101 0.0239339732 -0.0617658421 0.0295549762 0.119156219 -0.0673037395 -0.0500704497 -0.0940866619 0.0919373184 0.146928117 0.0300044753 0.0634653345 0.0144530665 0.0691985935 0.0211127512 -0.0590388924 0.0216479953 -0.0947615728 0.00890090037 -0.143075675 -0.00150912558 0.101439707 0.0146557204 -0.0631864071 0.0695210993 0.159808844 0.0115857897 -0.00928535312 -0.0489135161 -0.0782282799 0.125244364 -0.0499396287 0.140853539 -0.0960367844 0.0661479533 -0.0767967701 0.0877454206 -0.0602071472 -0.00595363509 0.115926109 0.178855419 -0.000521433423 0.0932693109 0.0502367616 0.152228653 0.104619421 0.0170960594 -0.103684276 0.0711491629 0.0488289595 -0.0617828257 0.0788236633 0.163875833 -0.0177440327 0.0156344157 0.109268099 -0.0375487134 0.0692994222 0.0731202066 0.0198084135 -0.0638355985 -0.0859975517 -0.0729697719 0.0573660471 -0.0556606203 -0.0930642337 0.145462662 -0.00594186038 -0.0928620845 0.139376998 -0.0553284064 0.0321234614 0.122701474 -0.123724081 -0.118198179 -0.0378811546 0.0980066508 -0.110459164 -0.0362307765 -0.0317853428 0.0111791994 0.0406676829 0.102611743 0.181697577 0.0510763824 0.123425812 -2.60259403e-05 -0.00361568225 0.153392524 -0.0397593305 0.0637998879 -0.145311564 -0.0472530723 0.0862638727 -0.0162773281 0.14536725 -0.0755254775 0.111187324 -0.0551111922 -0.000961930782 0.0612597242 0.163095102 0.0857004449 0.134386837 -0.0350845531 0.104531094 -0.0771434605 -0.067063503 0.171728879 0.167630181 -0.055467926 0.0409745835 0.177932739 -0.0550457239 0.107211053 0.0066946256 -0.00466190139 0.00958849117 0.156683907 0.111835107 0.0854923576 0.0730453655 0.121291943 0.0563716777 -0.111218229 0.0502161607 0.00677639991 0.18682304 -0.0360405892 -0.0132346814 -0.0414354391 0.0244455282 0.0727593377 -0.0868931487 -0.102708675 0.0923786163 0.154591203 -0.0693407878 0.106593266 0.162805468 0.0318478011 -0.031251967 -0.126320124 0.0780377984 -0.0280229542 -0.0295661092 0.0982864872 0.101671919 0.120140024 0.0414738134 0.105208568 0.0855850428 -0.0743453577 -0.000411789661 -0.0912177339 0.0883763209 -0.0493486412 0.123505704 0.166449651 0.105023161 -0.0776017308 0.162414178 -0.117349826 0.168772966 -0.101528428 0.13711141 -0.0164699852 0.0836634934 0.0919587389 -0.0328455754 -0.0752447918 -0.009731967 0.0349985808 -0.0194250569 -0.0934877768 0.185227469 0.0435012877 0.154062793 -0.0773278996 0.0929438472 -0.00670079701 0.0923984647 0.102851599 0.134395629 -0.120910235 0.170304388 0.0816775039 -0.0626546219 -0.0595025942 0.0244693402 0.0510447063 -0.115126796 0.115374513 -0.0176392663 -0.0923264623 0.122397989 0.0872549042 0.125522628 -0.100656673 -0.00508889835 -0.123501971 0.0617450103 0.139201492 0.051387202 0.00884217676 0.0175981224 -0.0483928584 -0.0360136032 0.05417905 0.022909319 -0.0881463438 -0.0459814519 -0.0131944772 0.0480347835 0.1673228 0.137037218 0.14526543 0.0446226932 0.0413857326 0.0612014905 0.132490978 0.0794302076 -0.0342803597 0.0863904208 0.15610376 0.121424645 0.0110774338 -0.0368165858 0.104494691 -0.0254124962 0.154777497 -0.0138444677 0.118794315 0.0259997863 -0.00128288078 0.142353535 0.155503765 0.0894722044 0.0424166657 -0.0683410317 0.0425889567 -0.10710226 -0.0400536358 -0.000696110365 -0.0677292421 -0.0385467038 0.0813434571 -0.0811068788 0.0311896447 0.0156664345 -0.147901028 -0.000463384727 0.0149115929 -0.112064414 0.0082620522 -0.024438085 -0.0304117016 0.162811249 0.128227949 0.0702825859 0.0863868073 0.0475940667 -0.100322515 0.118678033 0.153219327 0.103472307 0.106240071 -0.00983386766 0.0908779651 0.0990438908 0.0359329022 -0.0689288601 -0.0298974775 -0.115996465 0.0365048237 0.0202663038 -0.133836135 0.0477452688 0.0554565825 0.0893209428 -0.0239705388 -0.0640460923 0.13875863 0.105679706 0.0737722218 -0.0183230489 -0.0404619724 -0.0105633233 -0.0761946291 0.164347902 -0.0172834061 -0.094510898 0.0345971286 0.0106645143 0.194848433 -0.0547695532 -0.106579714 -0.0123255178 0.0403105766 -0.0313294157 -0.00499826716 0.100480273 -0.00637257611 -0.0778858364 -0.0411514193 -0.00478123594 0.0215911381 -0.0732492954 0.194053754 0.0539965741 0.113155119 -0.0752726197 -0.0769620165 0.193490297 0.0789649859 -0.0801189467 -0.0407260284 0.0242670309 0.10401839 -0.0375796929 0.0314083621 0.0724864528 -0.0512620732 -0.137483209 -0.0787761062 0.0968051478 -0.087627165 0.190840423 0.158406734 0.0864097029 0.133482426 -0.0359799229 -0.0242824815 0.0159911942 -0.0485018119 0.144926906 -0.058078561 0.111498684 0.065325208 -0.0478983261 0.0192427151 -0.0443237759 0.0666328892 0.0329897963 0.134647146 0.0964290947 -0.109050713 -0.000148722494 -0.00192280754 0.07619223 -0.203230783 0.0140038347 -0.0237551313 0.113345623 -0.0610194132 -0.123688005 0.00247963867 -0.0892862976 0.0488414988 -0.0904518217 0.174596399 -0.131306589 0.0763920173 0.150487289 -0.153154299 0.0224560183 0.0973761827 -0.0426088274 -0.0505751371 0.104424372 -0.133484393 0.0833508074 0.0194486398 -0.142139688 -0.0637170449 -0.101654164 0.0209246967 -0.140727118 0.00838450529 0.0946883485 0.148535386 0.103271469 -0.114976875 -0.0612382665 0.0309834275 -0.080561161 -0.0438423492 -0.0763120055 -0.0871841311 0.0487271659 0.192025125 0.0274662226 0.0726716295 0.126637235 0.11231558 0.00397039996 0.192436248 -0.0129653281 0.087810427 0.101742446 -0.0811328292 -0.0573779941 -0.00594198145 0.157350421 0.0783605501 0.201680467 0.0806498379 -0.0635789633 0.173802316 0.046798829 -0.111674123 -0.105566561 -0.110239312 0.137469321 0.0206604954 0.190349817 0.169501752 0.126503631 0.167581499 -0.0180790145 -0.0187429003 -0.0419336259 0.0993470997 -0.0918064341 0.110130824 -0.0955291986 -0.0254780296 -0.0506573617 0.0148899863 0.104261681 -0.0428472869 -0.0548303574 -0.0506917909 -0.0156032071 0.0699625984 -0.15484792 -0.0459163263 -0.110004574 -0.0441328883 0.197784573 0.0838625804 -0.0522456057 0.0942399129 0.0829744935 0.057554815 0.109780334 0.10274224 0.154105842 0.10721004 -0.0166563932 -0.0466450788 0.0287705809 0.10081622 -0.0953564495 -0.0839984119 0.101254053 -0.0738965794 0.0948913991 0.167173281 0.0429453701 0.0383497626 0.091592297 0.106202237 0.156739905 0.0656498298 0.13198331 0.147935465 -0.0810967013 -0.018950887 0.00158079178 0.164120257 0.0798614174 0.0186908729 -0.132187113 0.124651186 0.112890542 -0.138516054 -0.0781108215 0.106892236 -0.0400021151 0.121923052 -0.00202938612 0.0550861284 0.115557112 -0.0589016899 0.103443392 0.138156414 -0.156100512 0.141828462 0.163233846 0.185174793 -0.134581283 -0.0946970135 0.0305168517 0.0545697697 0.122422308 0.0272117686 0.092828013 -0.0790654421 0.0789960772 -0.113887571 0.139471903 0.177440643 -0.0422447994 -0.0695037767 0.133454293 0.00472546089 0.0994608104 0.0261894893 -0.0549818948 -0.0645656288 0.0830694512 -0.122029178 0.110560618 0.021865055 0.0957053602 0.153808683 0.153240129 0.00273627671 0.107639149 0.0361639187 -0.0830527321 -0.0444489233 -0.00363346422 0.0640293211 -0.0754873753 0.0189995058 -0.1402542 0.162265539 0.138485089 -0.0446577705 -0.000309297611 0.169430003 -0.0834633186 0.00541008823 0.034290649 0.0778444111 -0.0421900116 -0.0198174808 0.0522686094 -0.0672751144 -0.0208641775 0.151453003 -0.0738410801 0.043029502 0.0127242813 0.0245345235 -0.0372197554 0.090350613 -0.0694454312 -0.045643907 0.0334979966 0.0695154294 -0.0134842489 0.141971424 -0.074017182 0.0237953663 0.106125079 -0.0695564449 -0.145977944 0.166921124 -0.0877014548 0.0716962293 0.0305217579 0.117084034 -0.0790342316 0.0964029655 0.13598761 -0.134147704 0.189042479 0.182969391 -0.107122943 0.0923936591 -0.0212771464 -0.0149747208 -0.0244534928 -0.0772951767 -0.0497068875 -0.0293945558 -0.00921653118 0.105580427 -0.0721728429 -0.0770729706 -0.00264244643 0.00397060299 -0.139186502 0.0292970631 -0.0475326255 0.08476118 -0.0867509693 0.126799867 -0.0671816245 0.160967201 -0.0940391421 0.036259234 -0.0219887402 0.0285151005 -0.0580190904 0.13070558 -0.171060801 0.135117233 -0.0228546057 -0.107383102 0.0618890449 -0.0694213063 -0.0618949234 0.133807048 0.17643562 0.128621712 0.0101680793 0.176956698 0.0840079859 0.097374849 -0.100451432 0.0399295464 0.172356963 0.00101820775 0.145156473 0.104961276 0.0815578476 0.146059379 0.107903466 -0.121531352 0.0570647754 0.047216557 0.170416638 -0.0707143247 0.0478855185 0.0394140966 0.0102794804 0.125966758 0.135744303 0.0133625893 -0.0925729126 0.136732638 -0.0822476298 0.154679909 -0.13795127 -0.0215002652 -0.0249491148 0.0930954218 -0.106826156 0.10373725 -0.0187940467 -0.0534816161 0.134281337 -0.0336386599 0.114718519 0.0787281469 0.0239171404 0.0408289284 0.117535852 0.0759770721 -0.0240571704 0.0102049625 0.0229755491 -0.0571867488 -0.0825752616 -0.0630160421 0.0233204234 -0.0362254977 -0.0341095217 0.110644877 -0.0943035707 0.0922036394 -0.052436009 -0.0474082902 0.0808229521 -0.0361060351 0.0341569446 0.127944812 -0.0520493798 0.00435285084 0.0624745227 0.0890819654 0.120440952 -0.125715539 -0.0429935902 -0.100851558 0.115291968 0.103215486 -0.0138821993 0.114144072 0.144928649 -0.0672504827 0.0337884873 0.178193495 0.0654718578 -0.032493107 -0.0594031401 -0.0149731291 -0.108951643 0.148577735 -0.0590856262 0.123775907 0.129149333 0.120561078 0.0938586891 -0.0787900835 0.157910496 -0.0426145568 0.124586366 0.153994665 -0.0279286914 0.0712008774 0.132654876 -0.058968544 0.152131483 0.0144725023 -0.0846911147 -0.0830136165 -0.0503571592 -0.129175395 0.107455552 0.0278498847 -0.0376918465 0.125603542 0.0866251886 0.0744670108 -0.0176635683 0.081767872 -0.116470791 -0.076551564 0.107822165 0.0519237667 0.169635236 0.000728378771 0.195453733 0.0923824608 -0.00255433074 0.130946428 0.033110749 0.0234523341 0.159311384 0.0584074371 -0.0724054351 -0.00702239107 0.0528859086 0.0255747363 0.123749338 -0.0502231903 -0.127378836 0.000618861057 0.168384925 0.0564158484 -0.0874255598 0.0325605795 0.110717267 -0.0185731165 0.0527723245 -0.0973552689 -0.0553385355 0.099351272 0.126928583 0.037081793 0.159003794 -0.0413037315 -0.0480074212 -0.0216640383 -0.109965399 -0.0768443644 -0.0550187156 -0.02983227 -0.041209314 -0.0762111172 0.00517032761 -0.021049602 -0.082603015 0.128907517 -0.0880745947 -0.101325043 0.0862258524 0.0882336497 -0.0198778603 -0.0331840217 -0.109220311 -0.107734382 0.0400603004 -0.132840812 -0.0447417721 0.00368672935 0.0691269711 -0.0319370776 -0.0310771763 -0.152229711 -0.126779377 -0.0754033923 -0.106641732 0.0897259042 0.0624115281 -0.084738642 -0.0616546944 -0.00815979205 0.0202450287 0.071183376 0.0515766665 -0.0504490845 0.0691114515 -0.121255346 -0.0616305098 -0.121116355 -0.0412869304 0.0541755706 -0.00845611095 -0.019792689 -0.0873068273 -0.1018041 0.00566182006 0.155241832 0.0709863603 -0.0654985607 0.0548714921 0.1288618 -0.0813171715 -0.0274352692 0.050162863 0.0174831059 0.115988865 -0.0983620062 0.00916780252 -0.111271255 -0.0194736812 0.122007161 -0.05491817 -0.155907109 0.0968826488 0.0766369477 0.120036878 0.118291102 0.144479945 -0.109428965 0.0191301908 -0.131886169 0.0119547276 -0.178280339 -0.074061133 0.0725457594 0.047452867 -0.0980938748 0.00940938015 0.17627655 0.0703046694 -0.0134887863 -0.0899318606 0.140372112 0.076489009 0.0844909772 -0.0435512364 -0.0578976758 0.0688769594 0.10411185 -0.114739448 0.11660511 -0.0925834179 0.0873279944 0.175916493 -0.0425273553 0.143908709 0.0721898228 -0.0761375278 -0.11759565 -0.020234637 0.0312824845 0.0598440468 0.110991903 -0.0383540764 0.105179779 0.0467798598 0.167937577 0.0386657864 0.0542986952 0.0948523358 0.0433264002 -0.148534298 -0.0129731102 -0.0280822664 0.0361635387 -0.00415288471 0.138104618 0.10342367 -0.0275076535 0.116774455 0.101908013 0.0884111896 0.0440905578 0.117984377 -0.013649038 -0.126955181 -0.0553081445 0.00625609886 0.133344293 0.00572153553 -0.0223944504 0.177496225 0.0815475732 -0.00271677272 -0.0417993777 0.106240101 -0.0412545837 0.18434307 -0.0274373218 0.178807244 0.0294444654 0.0646818206 0.0889737979 -0.0807103813 -0.0720598325 0.151093379 -0.033041738 -0.117520221 0.126272097 -0.106381506 0.037757419 0.00232348521 -0.0547570363 0.076936692 0.107204638 -0.0481706001 -0.00307636359 0.132705554 0.0537186749 0.0473928303 0.0915753692 0.119206257 0.176839486 -0.155492246 -0.151921168 -0.101832837 0.0815496519 -0.0724380389 0.0371969007 0.0601178631 -0.058801692 -0.0965428352 -0.0115199285 -0.0381022878 0.105424263 0.0742912889 -0.0960293785 -0.0112020867 0.0849209279 0.0520411208 0.122885831 0.151498944 0.122882292 0.153430328 -0.0156365 -0.0119283618 -0.0820145831 0.0155551042 -0.149645686 -0.0800471455 0.094618395 -0.0650205612 -0.0104006175 0.131104678 0.0416966155 -0.0876214504 0.0637880862 0.0821948424 -0.0084727779 -0.0978877619 0.168948174 0.149989381 -0.0299459342 0.0972742289 0.000181726937 -0.0922966674 0.176449746 -0.0968618467 0.0333946943 -0.0902563259 0.16238676 -0.0905583873 -0.03544081 0.152139008 -0.119497493 0.0413080677 0.0570814125 -0.000148650375 0.112468541 0.058299277 0.0766911507 0.0998951718 -0.0232174434 0.167854264 0.168274015 -0.0583992265 0.154771283 0.13650085 0.117386065 -0.0722455084 0.0544443242 0.0949110314 0.143746346 0.108085796 -0.0419899784 0.143864065 -0.025696218 0.133225232 -0.111586809 0.0990424305 -0.0993287787 0.078866601 -0.0784433931 0.132836834 -0.106675968 -0.1069621 0.077828303 0.187477276 0.0277358871 0.0359606817 -0.0598008744 -0.00338539528 0.00320412288 -0.0859301984 -0.0230135676 0.163934514 0.0130302329 -0.0939015523 0.0754140466 0.107570499 0.00613959366 0.135453999 -0.0996567607 0.109139353 -0.100983992 0.0580916367 0.0119609917 0.0797068924 0.151979074 0.16187796 0.075011678 0.0931628644 0.0360457934 0.00347460015 0.0319518261 -0.0867329165 0.0962795615 0.0821009502 -0.0356594585 -0.02422712 0.0152814919 0.0527246408 0.132090867 0.144757852 -0.0431858338 0.139542729 -0.0139607172 0.171679854 -0.0980732143 0.180641383 0.186638147 0.0636721104 0.0896847546 -0.0584419966 0.143411934 -0.0701248869 0.0328124799 -0.0829237774 0.145310253 -0.00750299264 0.118470781 -0.0582295991 -0.0695642605 0.0633131266 -0.0640299097 0.159781903 0.0272349548 0.109332368 0.168833092 0.0473833978 -0.0882677585 0.0282821339 -0.0536800846 0.0558247045 -0.13541919 -0.107939526 -0.0673646927 0.0988901109 0.144200847 -0.0628962666 0.0835133493 0.0278197322 0.112931602 -0.0297801625 -0.00817243289 -0.119054325 0.00599690992 0.16512607 -0.0284597538 0.133023679 0.108261056 0.175182506 0.148225054 0.0877180696 0.0722195581 -0.0575301908 0.0970565677 -0.0798201114 -0.0708039552 0.0288234167 0.155979618 -0.0531227216 -0.0605055392 -0.0713208541 -0.0868417323 -0.0402501673 0.0165239926 0.181012854 -0.160325661 0.0927709043 -0.0364443325 -0.0312899835 -0.109137982 0.111598797 0.11623574 -0.068648465 -0.0206921138 -0.13928856 0.0243328102 0.0555803142 0.132689178 -0.0608246513 -0.0354508683 0.172304466 0.0327581689 0.170413792 0.163448825 -0.0454982035 -0.0583826788 0.0481920801 -0.0540810302 0.182651609 -0.174388662 0.155782059 -0.0293228272 0.015085889 -0.108551085 -0.121923782 0.0719362497 -0.168311208 -0.120032616 0.0659890622 0.115256436 0.131331578 0.119614907 0.178487614 0.09089607 0.00386154489 -0.0554215722 -0.0119620096 -0.0467320494 0.0933647081 -0.0323850662 0.141180277 0.107638344 -0.0253946837 0.173948079 0.137527362 -0.0205912776 0.010461146 0.142752916 -0.0192131344 0.107507631 0.14598392 -0.0370280705 0.0341507122 0.138847277 0.102246776 0.067923449 -0.00280428468 -0.0519020297 0.0715199634 0.151722491 0.00090766669 -0.0237915833 -0.00762919895 0.0348006599 0.0952979177 0.11985556 0.143176049 -0.00832088478 0.0575121641 0.0766030177 0.0191355087 -0.0326572359 -0.104509436 0.0300794542 0.062483415 0.132271856 0.100359082 0.0538696684 0.0351427197 0.071405977 -0.040407598 0.157145143 0.0427171327 0.031071905 0.085857898 -0.140569568 0.0797872916 0.160493046 0.0627081841 0.0700800642 0.0625736564 -0.0243169125 -0.0110870786 0.150555253 0.057589937 0.166157231 0.120003015 -0.0579976961 0.0612958968 -0.0452432111 0.054822579 0.0524013229 0.149773791 -0.0714917257 -0.023807399 0.0454889461 0.0992185473 0.0658304542 0.02966832 0.101745747 0.0872673169 -0.125905886 0.0570483804 0.135766774 0.113360628 0.0364370681 0.0223136339 0.0019436914 0.0164411664 -0.0682152584 0.0921245515 0.0128188692 -0.0173182599 -0.00139826769 0.0518905111 0.0928640962 -0.0397072323 0.103173278 0.00413324265 -0.0670715123 -0.0588557124 -0.0857635513 0.0062935818 -0.0887346044 0.138430178 -0.025497932 -0.0639876872 0.0670730025 -0.0446693785 0.0594656765 -0.0820678324 0.157999322 -0.182115525 0.00614317786 0.0362051241 0.0657482669 0.106698424 0.191083074 0.135481074 0.0106980857 0.00957398489 0.0367676988 -0.0136649683 0.0735901445 0.0689036474 -0.134390131 -0.0718721896 -0.00467563979 -0.00951108709 -0.0722589716 -0.00359070604 0.0947275981 0.126477376 -0.0131597025 -0.12138617 -0.0347730219 -0.00653850706 0.135745063 -0.0925999731 -0.0944521725 -0.0723555461 -0.153894082 0.188464135 0.0043896623 -0.00922763348 0.213275999 0.00525289867 0.0494968928 0.0589311495 -0.07227844 -0.0655579418 0.173283234 0.0376433432 0.156717747 -0.078233324 -0.0844314247 0.0813227743 -0.0925660729 0.124083593 0.153772607 0.068052493 0.0846082121 0.127492517 0.147963956 0.0722059608 0.154211655 0.0819868073 0.111736804 -0.0420656316 -0.156026006 -0.135763094 0.132754937 0.110232912 -0.119001575 0.119631797 0.100629732 -0.0977817997 -0.0254859347 0.0714960396 0.0816458389 0.0694845393 0.107111402 0.0238910895 -0.11218477 -0.117907912 -0.0337541923 -0.114325784 -0.114220396 -0.153953075 -0.0372015573 -0.0811879858 -0.0323405489 0.128496513 -0.0856468379 0.0182948634 0.0260079242 0.0171604026 0.0346086845 0.114011452 -0.0935687795 -0.011811249 0.131105796 0.0234864186 0.0406528525 -0.114756532 0.122138247 0.0470963418 0.0791598186 -0.0303514749 -0.0687026605 0.194362417 -0.022815939 0.0876949206 0.0480690859 -0.0250811335 0.148164272 0.0488567054 -0.0881642401 0.198841885 -0.0379917733 -0.00708210841 0.041188851 0.0292753335 0.0645876899 0.00623326236 0.0477512181 -0.000584310852 -0.106165297 -0.015090609 0.0250555836 0.0323013403 0.041745469 -0.133533582 -0.0977768302 0.0384080522 -0.014046954 0.0909342691 -0.0820525289 0.132567018 -0.0924441591 0.155193165 0.110916458 -0.0571177192 -0.141427621 0.0474443957 0.0907076299 -0.064002499 -0.0244310405 0.0177996214 0.0721451417 -0.00413550809 -0.0516352393 0.0421805531 0.131461561 -0.0123250391 -0.0480676852 0.0910230353 -0.0799057558 0.0509942733 0.112865351 0.104875125 -0.085275501 0.0623678714 0.0686701387 -0.080322735 0.0964362845 -0.0460433923 -0.0657152012 -0.074650757 -0.0327339992 0.16137737 0.0697549805 -0.108303167 0.00211762171 -0.0693195313 0.00135752186 0.013155547 -0.0307769664 0.0750898421 0.0616175942 -0.0536347926 0.0857256502 0.0237529613 -0.021395212 0.00901291613 0.00728149712 -0.113782011 -0.0464270264 0.167675585 0.0525661036 -0.0210970417 0.156918585 0.061230965 0.0992827117 -0.0678927675 -0.151397571 0.075506404 -0.0497730784 -0.0540236272 -0.0624297559 0.0426682606 -0.0725995973 0.076271072 0.116657615 -0.0210639741 -0.0213112682 -0.0862966105 0.0802445114 -0.0198064968 0.176064715 -0.0988808721 0.101349302 0.119555868 0.128017023 0.0522831939 -0.0366001837 0.145147249 0.0257630255 0.0434764959 0.111463912 0.0327721126 0.123577833 0.0872525647 0.162450716 0.199674487 0.164900869 0.0993724763 -0.144233376 0.0194361061 -0.0317708701 -0.0597182736 0.0684302226 -0.0456766337 0.0549333505 0.101966113 0.0526848994 -0.118291982 0.0568728857 0.125453085 0.107232653 -0.0476998277 0.135429114 -0.130028173 0.0840768516 0.158564597 0.0256799646 -0.0523730144 0.031615708 0.0914076194 0.188867077 0.143099532 -0.0071679526 -0.0894442722 0.0995981768 -0.0183322281 0.0598267131 -0.0731880441 0.0917812809 -0.140130281 0.00585151743 0.00310393353 0.152784497 0.158248886 -0.137339488 0.0995804071 -0.0764262974 0.171144649 -0.0672199726 -0.0027869083 -0.102201961 0.107543819 -0.0715040565 0.214907989 -0.0438209847 -0.119756781 -0.00894473586 0.137930363 0.126813769 0.0999665186 0.0293341558 -0.0930798054 0.0649531037 -0.101108015 -0.0516813099 -0.0952921212 -0.0980607495 0.0295815989 0.0134664373 0.0469121002 0.0314295888 0.116126269 0.144171268 -0.109329514 0.102265686 0.0232829526 -0.0721712261 0.0460076891 0.00450235466 -0.078920044 0.120493911 -0.0557029285 -0.0781340078 -0.113349713 -0.146189392 -0.0182035994 0.10833291 -0.0549698845 0.111201309 0.0124426633 0.0892337486 -0.10926117 -0.00912767928 -0.0338271856 -0.105445758 0.211141855 -0.119850591 -0.180531412 -0.00868519023 0.218875796 -0.17652452 0.0990117118 0.0245204382 0.149801061 -0.00586622301 0.0881711915 -0.0257251803 -0.0817466527 0.0768139511 -0.0895878077 -0.107276358 0.0430153459 -0.162487656 0.0267249998 0.130476043 0.0166731309 -0.127173543 0.0398012064 0.0680280626 0.0879124179 -0.0295924786 0.0133784497 0.000692039728 -0.0751812905 -0.0830598176 -0.131929606 0.135406211 -0.112499252 0.0126099214 0.00665883068 -0.00475356216 0.0249228943 0.102894537 -0.0225983374 0.061107967 -0.0330257192 -0.0597277209 0.179125711 0.050645031 -0.0669407696 0.158360988 0.205021739 0.00652983878 0.121627569 -0.0640201867 -0.0522308983 0.0900547057 -0.122924723 -0.114422083 0.0658481047 0.0173800383 -0.0786071345 -0.0717952251 0.0280309897 0.0219677705 0.0759255365 0.084643513 -0.0923913196 0.166452676 -0.0389173701 0.0821120963 -0.113245606 -0.0364514329 -0.0393794179 -0.0335422643 -0.0305757262 0.0866778567 0.127289161 0.0190164503 0.0835306719 0.152247652 0.138753071 0.130225837 0.00651189126 -0.148018956 0.0714074373 -0.10346128 0.143939614 -0.0185987595 0.0229391046 0.101105355 0.0875614211 0.168692231 0.0672137067 -0.116006561 -0.069646351 -0.0440914668 -0.0887304097 0.0605254248 -0.0931111053 -0.0291967671 0.0453826375 -0.0647103488 -0.08282765 -0.0912294909 -0.0972726122 -0.000134341666 0.117167793 0.14967677 -0.103549697 0.070657745 -0.0581128635 -0.1150591 -0.0362361856 -0.00619550841 -0.0881526992 -0.0659523532 0.0312618017 -0.0897310749 -0.0618666895 -0.0287295692 0.176449448 0.159317598 0.0456931591 -0.0967225209 -0.0319313519 -0.0248310566 -0.021381963 -0.0773523748 0.0478850566 -0.0207856018 -0.0701669753 0.147760212 0.0727391243 0.0043316409 0.159602404 0.0948895067 0.0890363902 0.00142308639 -0.0310148094 0.0753311291 0.0981000364 0.0223886538 -0.0931696519 -0.0682313293 -0.0667512491 0.0491678901 0.0186522752 0.137692913 -0.0236726794 -0.00927374046 0.114333265 0.104560494 -0.0608054437 0.204097703 0.205062792 0.094779104 -0.0898220837 0.0206578374 0.142832264 -0.049721241 0.0423907191 -0.0763793141 -0.093291074 -0.0389654711 -0.15401794 -0.00734718097 -0.0189076923 0.170103803 -0.0205226652 0.0636992827 -0.0644718111 0.108284459 0.138413697 -0.0257913806 0.00794647168 -0.0307983607 -0.0936900675 -0.0606061853 -0.154241458 -0.00459569367 0.141765147 -0.117230393 -0.0629995316 0.125530377 -0.0406536944 0.200848967 -0.104435295 -0.123753838 0.0367464684 -0.141358763 -0.0206649359 0.202375904 0.022151649 0.0812491998 -0.131115228 -0.0367442705 0.172151536 -0.0442129597 0.0605035909 -0.082755737 0.0954060331 -0.0230737943 0.0334409699 0.13487561 -0.110517241 -0.0225322787 0.062106967 0.119216867 -0.0953765139 0.14884612 -0.0287193451 -0.0106208287 0.153283879 0.0467924252 0.177715346 0.068970874 -0.0450268574 -0.0325655416 0.0547090545 0.0860708728 0.0107891522 0.0944289416 -0.0758065358 -0.0732419118 0.183351845 0.0771918073 -0.140022755 0.0537439361 0.149029925 0.131033957 0.0027445287 0.107114777 0.0976283476 0.156666949 0.192800567 -0.0668931082 -0.048805628 -0.0531376973 0.00703070173 0.135483757 -0.0116650816 0.0274772495 0.0685842708 0.0897314772 0.168943107 -0.0672629923 -0.132059872 0.120118931 -0.0828481987 -0.0856146142 0.0526181571 -0.0933162645 -0.00942354929 0.169790015 0.0899278149 -0.112179808 -0.0518537723 -0.0197913386 0.107157297 -0.0402358808 -0.0525892675 -0.0279793683 -0.0564811975 0.111123219 0.0285395123 0.103563443 0.0289735198 -0.119780019 -0.138365477 0.0683289915 0.0212225579 -0.136515081 0.178696275 0.0141859809 0.0798230842 -0.0112462975 0.00572248595 0.165446609 0.1772625 0.0877416283 0.0257206522 0.0255760346 0.109550558 0.0173372477 0.0941226184 0.156007677 -0.0609857477 -0.168111339 0.112304233 0.141951621 0.0942777917 0.0506730452 0.0117849326 0.110127218 0.11164064 -0.0365206338 0.17797333 0.0919450074 -0.0340361036 0.0371871293 0.137831658 -0.0145821422 0.171712354 0.0708972588 0.00538893556 0.138060763 0.0720485598 -0.102158196 -0.00157117634 0.161901429 0.164550751 0.0551112816 0.00221332046 0.163098738 0.0364854559 0.137255967 -0.0139900306 0.116951399 0.0309598278 -0.0843858272 0.149897203 -0.0105481371 0.129127055 0.0816876367 0.0559929311 0.109518707 -0.0339205861 0.102632456 0.00931944605 -0.00911251083 -0.0575628281 -0.0238492247 -0.13892445 -0.0204126779 0.140317202 -0.0053701899 0.0747784898 0.0447182879 0.0408651866 0.00653599948 0.0279005133 -0.0747578964 0.0836786777 0.115476467 0.0596954301 0.0199178606 -0.0829307586 -0.0326780863 -0.113550745 -0.0547465198 -0.0249119569 -0.113830656 -0.0401589163 0.093042478 0.0902988166 0.0128500452 0.0456254408 0.0772466585 0.159909874 -0.0500820577 -0.0781232342 -0.051164262 0.0975523815 -0.0487125441 -0.0570325479 0.0562490486 -0.0282622576 0.0981458947 -0.131002113 -0.157566205 0.0727057457 -0.146405846 0.146773353 0.101571307 0.130754113 -0.0140098277 0.119490281 -0.134308934 0.0664061159 0.100537203 0.152347729 -0.066916585 -0.0918895528 0.0884105414 0.150467262 0.190053374 0.0404316857 0.0131604439 0.0616949201 0.0204220749 -0.00819397438 -0.0989773422 0.135392219 0.034526363 -0.0443142466 0.097198464 -0.0308621768 -0.116274439 -0.0293579884 -0.0962141603 -0.0434402823 0.199946746 0.112885021 -0.00315347826 -0.0176970661 0.0463292599 -0.0291460063 0.0120348148 -0.00303878007 -0.0146520687 0.155439973 0.0927805379 0.112616234 -0.10787008 0.199581042 -0.0556919798 0.0174912345 -0.147659183 0.00716985716 0.0517262444 -0.132578805 0.0649443641 0.0491473936 0.0128262425 0.0570766106 0.14404805 0.0125820432 -0.0378993787 -0.0890177265 0.0629585683 -0.0781803727 -0.0468946658 -0.0970080867 0.107453912 -0.0726181269 -0.0156788174 0.109249413 0.179553613 0.0696004778 0.0497379862 0.1262182 0.070380047 0.036693722 0.153491363 -0.0165516399 0.11358387 -0.0461250581 -0.0716388002 -0.063194491 0.182627037 -0.0979600772 -tensor_0bias 50 -0.0687436238 0.121526092 -0.0379632339 -0.0472203717 0.085131444 0.0979627594 0.12071842 0.0766481757 0.111980788 0.0231025815 0.0757955536 0.0592933744 -0.0364503562 0.118033081 -0.0119619071 -0.0852706879 -0.057033807 0.104488559 -0.0188827403 -0.00805457216 0.106946483 -0.0220598206 0.0901777968 0.154047132 0.127174735 0.0643620193 0.119487718 -0.0108337859 0.10897246 -0.0441434234 0.0685871169 -0.0381012894 0.0594230555 0.0113021079 0.163605392 -0.0388617851 0.0634208769 0.104776219 0.0779020116 0.0259756818 0.152426898 -0.0797943622 -0.0156979878 0.161263436 0.0584381446 -0.00747399358 0.0999026075 0.0664042234 -0.0277017541 0.0495129935 -tensor_12bias 50 --0.0650987327 0.0561347082 -0.0524960756 -0.0606846772 0.0553311668 0.084040381 0.0655064732 0.132255286 0.0351522863 -0.0857200176 0.0463379882 -0.138509437 -0.0309930108 0.0726053342 -0.0611225963 0.040046718 0.0333271585 -0.139368355 -0.115387805 0.035529051 -0.0753710642 -0.108340122 -0.165888399 0.0588014238 0.0744016021 0.0737710446 -0.166636527 0.139449701 -0.107233316 0.166755453 0.111904733 0.142458484 -0.10559769 0.17358239 0.0248687863 -0.00832488388 -0.00902078301 0.0966997072 0.16634649 0.0510999039 -0.11830131 -0.137912169 -0.0549811572 -0.140467361 0.0248682722 -0.0492456779 0.132404506 -0.136645094 -0.00630686618 -0.0665986538 -tensor_18bias 10 --0.0286833197 0.0315113701 -0.0158580411 0.0455938913 0.0954583585 -0.105117157 -0.0738683939 -0.119185224 -0.0686925054 0.0992293954 -tensor_14bias 50 -0.0160144269 0.0659304708 -0.134516135 -0.125120386 -0.126968578 0.011852067 0.111996368 -0.0366256982 -0.0781780854 -0.00189105608 -0.0748870224 0.11818061 -0.0440538898 -0.0143895745 0.0724157915 -0.0405166261 -0.0633764267 -0.0380873531 0.0670853108 0.0602433793 0.0957999155 -0.0834713951 -0.0430925563 -0.0397071578 -0.0210147407 -0.00462661684 0.0973884314 -0.153825372 -0.147843331 0.0121727288 0.180789441 0.0183312204 -0.130684286 -0.10787309 -0.049283646 -0.0444232263 -0.059754774 -0.0348485857 -0.159336865 0.0452591404 -0.0289487373 0.0982468277 -0.0173284747 -0.102011278 0.0217117742 0.0264923107 0.137902901 0.00199478748 -0.0913077295 0.0711041912 -tensor_4weight 2500 -0.0806722939 -0.0843013674 0.0457266495 0.0729286149 0.077442795 0.0629948378 -0.0479649454 0.18801184 -0.13502112 0.139153555 -0.0434509926 0.146485865 0.113608092 0.0014678 -0.00269440887 -0.0458229147 0.10888987 0.0207153317 0.182204321 -0.0768271685 -0.0549781434 -0.142413139 -0.0730496719 0.179332584 0.0324325487 -0.133773685 -0.0963223055 0.0017872625 0.0347659923 0.125833228 0.0962186158 0.0585900992 0.135499209 0.1581707 -0.0280255843 0.0394914299 -0.136502922 -0.0513676554 0.0243021026 0.13252829 0.0698634982 0.016627552 -0.0407816991 0.0601785071 0.0856543556 -0.0628332347 0.117408261 0.100019909 -0.0541871078 -0.0604645647 -0.10079059 0.129933119 0.0221414883 0.0313292705 0.0785076022 0.0515565313 0.103933319 0.130206779 0.0966112837 0.0613389611 -0.0399818122 0.185782522 0.0632089376 0.203524143 -0.00186416006 0.0119111966 0.175457805 0.0410548635 0.132948205 0.145901531 0.0722996518 -0.0906703025 -0.0737293884 0.114364773 -0.0281930994 0.0720122755 -0.008361076 0.090595901 -0.0460113436 0.0419355966 -0.0804221854 0.0608950555 0.170809716 -0.0476390161 0.169650152 0.0800267011 -0.148617357 0.0836865678 -0.0557933412 -0.0582912862 -0.0435073562 -0.0371096209 0.083936885 0.141528875 -0.0368124209 0.162937 0.182462409 0.0500146635 -0.0173346996 0.00344588282 0.077063866 0.0220477413 0.0806412846 -0.00341189606 -0.101663046 0.0451156981 0.0767283887 0.0141340708 0.039356716 -0.0705280155 0.0963134021 0.0619241111 0.0269989092 -0.021566296 0.145353943 0.0327979699 -0.0733648017 -0.0134483287 -0.0573412068 0.0651545897 0.0168404263 0.0243993159 0.0994131193 -0.0538567007 0.00572972698 0.0229900386 -0.0910499841 -0.105468161 0.0651908889 -0.0938435644 0.0910287648 0.0970317647 -0.0301273968 0.131670371 -0.035016574 0.0178018566 -0.117108293 -0.104218014 -0.0651276186 -0.0432341956 -0.0111995684 -0.113138527 -0.0553993024 -0.0827366859 0.111415848 0.0760915801 -0.036721129 0.0393031836 -0.107385024 0.0509901345 0.137131959 0.101355053 -0.0619110428 -0.0586683974 -0.125746161 0.146015525 0.0182545464 0.101042837 -0.103411034 0.141968071 -0.110162877 0.0819647014 0.151268393 0.181896359 -0.0776448846 0.158234477 -0.0236076955 0.162451014 -0.0745234191 -0.0891344696 -0.0165763423 0.0465359874 0.164738223 -0.0147009594 0.136529386 0.119493932 0.123288825 -0.0328544565 -0.0433127023 0.142125174 0.104210556 0.165809229 -0.0778093338 -0.0373858176 -0.0823435411 0.0155735873 0.103326514 -0.05250616 0.0069106333 -0.0459599681 0.0475454628 -0.0535901822 -0.0878656879 0.188019454 0.0936229303 0.00049323542 0.111949839 0.101932622 0.111278057 0.00650064787 -0.132303327 0.154887334 -0.0368106291 0.185657039 0.078358531 0.0830566436 0.000230199876 0.1663609 -0.110823177 0.0969691128 -0.0866530016 -0.0828108639 0.063297838 0.0365174599 -0.0799051747 -0.00249398779 0.0037801282 0.175893486 -0.0303821024 0.17547828 0.156276211 -0.0808659643 0.0175747536 0.0641925558 0.132568434 -0.107452586 0.0114268949 -0.0109794568 0.105085135 -0.00249309023 -0.105099067 0.11644727 0.00391933694 0.0913905948 0.195951268 0.0354405977 -0.00441903574 0.0888798311 0.0769788325 0.0180195644 0.0349424444 0.00618674606 0.0450688228 0.0371989682 -0.0904219598 0.0357578248 -0.0248050801 -0.019140562 -0.0610508397 0.0400745049 -0.0465866067 -0.0127383219 0.136099428 -0.00988076627 -0.0224271286 0.138736099 -0.0945356563 -0.0887529403 0.00517961383 0.000571548939 0.0760833472 -0.0421531834 0.107855894 -0.12219803 -0.120920695 0.010690853 0.00134639442 -0.0446796417 -0.0908018351 0.0188111514 -0.117398165 -0.00559857022 -0.112447143 0.0241752416 0.051861912 -0.0751599744 0.0727101266 -0.0994263515 -0.116854861 -0.109606072 0.0153764635 0.0296985656 -0.094453536 -0.123156615 -0.0982722938 0.102426931 -0.038060952 -0.127354875 -0.0424764156 -0.141495243 -0.136656374 -0.0849142522 0.100465015 0.0261948798 -0.0149132377 -0.095423542 0.0603073835 0.0239272416 0.0944917873 0.0912092179 0.0132168755 -0.0580653921 0.0647564083 0.0321055204 0.0641190782 0.107408777 -0.0238600653 0.0974822938 -0.151330233 0.0900393799 0.185146719 -0.0562634654 0.19377704 0.0260389671 0.149799034 0.0271496754 -0.0140250009 0.0241740346 0.0609554648 0.0848416314 0.0207572728 -0.107217379 -0.0550570227 0.0541072674 -0.0474922284 -0.0888904482 0.0580282025 0.0328076519 -0.00170895853 0.0491873212 0.090218015 -0.0734803379 -0.0112014636 0.150287092 0.16178152 -0.0178813841 -0.030176945 0.175077632 -0.0382624194 -0.0120699406 0.0354120433 0.0163750257 0.116489731 0.138228595 -0.00104773929 0.02116061 0.195760205 -0.113974452 0.204999462 -0.00420999806 0.0197493862 0.140327349 0.145642623 0.113952592 0.0668037087 0.144647643 0.138338432 0.0301354099 -0.092116423 0.141155869 -0.0757502168 -0.0111120678 -0.019430887 -0.132206604 0.0304258037 -0.00902231503 0.023573963 -0.11457108 -0.0038465804 0.0600269213 0.185435995 0.103536278 0.108110771 0.0262743887 0.092287004 0.0180775113 0.0580065064 0.109430514 0.167516813 -0.0948597863 0.147749871 -0.0377445519 -0.16559723 0.103454545 -0.0619672574 -0.0660705566 -0.0222212803 -0.00183966081 -0.0677803308 0.0315424129 -0.00417117588 -0.144289106 -0.0828239396 -0.146710843 0.0357453451 0.00331253489 0.0235776883 0.0018393771 -0.0240897052 0.112991959 -0.097301051 -0.0531368554 0.102575697 0.224759638 -0.100583948 0.027663447 0.0663552508 -0.0544704907 0.0913643613 0.10431046 0.14408429 0.160526797 -0.0272651091 0.128108725 0.137256622 0.0514451601 0.0290343836 0.0522942841 0.169628382 0.0517538302 0.039717 -0.112903044 -0.0319129899 0.142312348 0.16764465 0.00277794432 0.155595258 -0.016347399 0.0998492464 -0.0829867125 0.0122846849 0.10995502 0.176780567 0.166116044 -0.0651847348 0.0968866721 0.0796400309 0.156421289 0.00979311764 -0.111015052 0.100035012 0.198834509 0.104847461 0.0955422893 0.0701622218 -0.00574288098 0.0388571136 0.0862576142 0.0804817602 0.0700528994 0.0890722573 -0.00526280887 0.0880217403 0.108766705 0.0439562909 -0.136678606 -0.017526824 -0.101755708 0.150479943 -0.0441651195 0.0611818954 -0.010830123 -0.0615075193 -0.0789036453 -0.0960501432 -0.0448041894 -0.114129215 0.157983571 0.0660151616 0.00131378241 -0.0953527689 0.0812098756 0.0714970827 -0.0841728672 0.0815933347 -0.0241262466 -0.0606837049 0.117682979 0.170510948 0.0609742589 -0.0866294503 0.0337947756 0.0836874992 0.128505945 0.0142021542 0.00716301799 0.073032476 0.096828863 0.0873111039 -0.0305738319 -0.017774554 -0.0532108061 0.180189192 0.0185202211 -0.0507842451 0.100284688 0.0385177433 0.0343939774 -0.151525408 0.161509618 0.0328486934 0.0315718576 -0.0216901544 0.011822544 0.0440483205 0.123011395 -0.0785683393 -0.0336200632 -0.0159502272 0.148681283 -0.0669046566 0.185322538 -0.0733356997 0.0739779621 0.0648668483 -0.0301269554 0.156339601 0.0585463084 0.155070648 0.18907924 0.0647668913 0.013078318 0.167015359 -0.0203347579 0.134109989 0.110318691 0.0168762747 -0.00621265173 -0.0438371375 0.0293022711 0.0136860888 -0.0961167067 0.131283402 0.0199183244 0.0874097347 0.109514065 -0.0711590275 0.13801989 -0.100633904 -0.0766485333 -0.0797629207 0.120990887 0.117599219 -0.0800174996 -0.0554481633 -0.0130264247 -0.0096846018 0.0329468772 0.200460345 -0.0798182935 -0.127444193 0.137921482 0.056331329 0.0758561566 -0.0259927046 -0.00583240716 -0.151809007 -0.0616948605 0.0165051967 -0.106223613 -0.107458085 0.0237796139 -0.133243531 -0.0831126943 -0.012560742 0.0367795378 0.00879683718 -0.121515289 0.0290033501 -0.0651801005 -0.0471335575 -0.00044152551 0.112791196 -0.156063318 0.0274669975 -0.00387126207 -0.142437324 -0.133588076 -0.0751931593 -0.0703300163 -0.0889332145 0.0192210358 -0.0770214796 -0.142161205 0.118518829 0.015532054 0.00297008874 0.00619109394 -0.0286132246 0.067848444 0.0446564294 -0.12528789 0.0658862889 0.142027885 -0.0245133974 -0.0243382379 0.0615522414 -0.131100833 0.0117956251 -0.144661099 0.153534442 -0.00748422509 -0.101551078 0.140787482 0.120413505 0.133537158 0.109931737 -0.076232776 -0.0067446162 -0.105740324 -0.0634061843 0.0939473137 0.119690232 -0.0357088707 0.0102475164 0.150871128 -0.000345803623 0.111536011 0.0299190637 0.191872507 -0.00425557932 0.0131858671 0.0683450401 0.011605869 0.0222013909 0.0556304455 -0.0517201163 0.161248505 0.0784498453 0.171983451 0.119539365 -0.0555509515 0.0169317685 0.00468148896 0.0350351445 -0.143832296 -0.123316839 -0.060894113 -0.00792651903 0.165182695 0.13920185 -0.0278753694 -0.0646031126 0.0390878469 0.103839591 -0.0264649615 0.159046769 0.161050528 0.056850709 0.0216505565 -0.019877946 0.0416690223 0.0680721179 -0.00811236072 0.0474281274 0.139705688 0.128775299 -0.134791732 -0.0200266857 -0.0365998596 -0.0124936523 -0.0767151639 -0.042266313 -0.0712475628 -0.0536471978 0.133768514 -0.0192902926 0.105865858 0.113762073 0.104997188 0.0901620463 0.066951476 -0.0920727104 0.150316 -0.115942262 -0.0646094009 0.051550284 0.106786288 0.0553277843 -0.135059014 -0.0844271183 -0.093783997 0.14749904 -0.0715771541 0.104918532 0.169773012 0.030166015 0.0254033525 0.0341539346 -0.0932782665 0.0505385213 -0.13566044 0.172710717 0.181072846 0.0247942675 -0.0522602275 -0.0928869545 -0.0629897714 -0.00787132327 0.160656855 0.119059108 -0.0577676259 0.130579263 0.103787817 -0.0639968142 0.0237170234 -0.0796101764 0.0789383575 -0.11092788 0.0240584183 0.0880425051 0.0497003458 0.0207255501 0.0609250851 0.121555626 0.0423985943 0.169498548 -0.168339416 -0.0737465993 0.00344401528 -0.0481818803 -0.0240785405 0.138308004 -0.0498832725 -0.0870527998 0.135833338 0.0367706791 0.164695784 -0.0926531628 -0.0138947945 0.0515966341 -0.124790356 0.160091609 -0.08937978 0.0392833501 -0.0552154407 -0.0162713174 -0.0258723479 -0.065008454 0.0126740728 0.136108771 0.112826265 -0.0117993969 -0.0383974053 0.13958928 -0.111128941 0.0530200005 0.0264452137 -0.00290334155 -0.0446272232 0.061192058 0.175629675 0.0535970144 0.0791243389 -0.144284248 0.161759198 -0.0264586899 0.170231506 0.0360257179 0.0236983728 -0.0918620229 -0.124832675 -0.129897267 0.0827946812 0.167229131 -0.0483314805 0.0731398612 0.0330644958 -0.0419231206 0.147590339 0.120546743 0.0866150856 -0.0558574684 0.14509137 0.112841494 0.010361298 -0.0738257468 0.062864013 -0.000246174692 -0.0360012166 0.172877163 0.117604062 -0.143552348 0.169368088 0.0966829956 -0.0905596018 0.0989860147 0.143281475 0.0763563141 0.137610212 0.122151025 -0.139252588 0.0253664367 -0.0899616033 -0.0669621378 0.173054621 -0.0460386537 0.0831045434 0.136806592 0.134621754 -0.0229169969 -0.00980438758 0.0452408046 0.0591817014 0.186792865 0.00453559728 -0.0630519763 0.129970819 -0.0708865598 0.0169868432 -0.0856622308 -0.0697054416 -0.00249436265 -0.00108185853 -0.0849266797 -0.0896446258 0.205029503 0.0984538794 0.0920003206 0.0979186818 -0.0760004744 0.0389556028 0.154188663 0.0658197105 -0.0182720162 0.134750709 0.0715288147 0.17903395 -0.133035272 0.200460136 -0.0744331852 -0.0414500348 0.0895937532 0.0214252006 -0.022997003 -0.0672739893 -0.0548784323 0.166354895 -0.0431604087 0.165455922 -0.124262832 0.113898836 -0.166768521 0.00970084779 -0.0567515977 0.0607765876 0.183420077 -0.0727137551 -0.0270099547 0.0259942077 0.00637345994 0.0362093039 0.0634940416 0.22586067 0.0804543719 0.177989498 0.168853745 -0.0725347623 -0.130849689 0.14897649 0.0990756676 0.105376959 0.0459880121 0.037299931 0.0371170193 0.124092944 0.026399713 0.214453608 0.165884897 -0.0445454419 -0.02944877 -0.0441051386 0.0706486255 -0.0643619671 0.107751079 0.0106587159 -0.0160649233 0.0853765532 0.0439129174 -0.0179767329 -0.0966302827 0.153880417 0.158972874 -0.0232971646 0.109733656 0.118792728 0.111737549 -0.0411141589 0.128475308 0.177355379 0.088045463 0.0918510482 -0.0182551499 0.0349350236 0.0172623489 0.0358161516 -0.0553316772 0.00500165345 0.0473173968 0.0152386809 -0.102430955 -0.0801292434 0.142120838 0.182653144 0.0498294421 -0.0422114469 0.0125372913 0.165790632 -0.0877576023 0.0534564219 -0.0601202659 0.04567682 0.0359176025 -0.0493109711 -0.0345178694 0.0486616641 0.179762542 -0.0616127439 0.142689958 0.173905298 0.13620089 0.0958447605 0.0267633125 0.0164805949 0.0387719236 -0.0923323482 -0.025394721 -0.0696693659 -0.0885034949 0.155368611 0.175244749 0.108128108 0.0818990022 0.146583825 0.0607022159 -0.0263073556 0.0531130992 0.0492566414 0.0426749587 0.128532976 0.165642813 -0.0271078423 -0.0249717701 -0.00465310086 0.0946793407 -0.0277413856 -0.0293258466 0.153490797 0.0542890653 -0.121490426 0.0403553173 -0.0457720421 0.0372848473 0.1336312 -0.0569381975 0.0157627482 0.183314934 -0.098882556 0.0972879678 0.133379266 -0.13416934 0.127192289 0.114743538 -0.145406723 -0.141376868 -0.00748612825 -0.0768275931 -0.00242518331 0.0610179976 0.11269661 0.0289160293 0.134316772 0.0604642555 -0.0706829354 0.0602234714 -0.00745525956 0.128726706 0.02637784 0.0765895173 -0.100985415 -0.103230231 0.0484438539 -0.0102437539 0.117040537 0.130066067 0.0934641883 0.035608504 0.080431819 -0.0183086582 -0.0162367485 -0.000762896263 0.091369085 0.0433430262 -0.0226776432 0.0331841335 -0.0515708551 -0.138115823 0.111762553 -0.00960157253 0.0194415804 0.0162233952 0.0687385723 0.00495963311 0.124809526 -0.0617128015 0.128910005 0.124350287 0.123989262 -0.0698941946 -0.0828819647 -0.051271636 0.108241625 0.090747878 -0.0240470748 -0.0150890118 0.159876198 0.111201644 -0.107370481 0.041435346 0.0879196003 -0.073871471 -0.00736038294 -0.0421624519 -0.00446702167 0.0206496771 -0.0786093399 0.00770913251 0.0940739587 -0.143417105 0.161424622 -0.00790184364 -0.0727001727 0.042316515 0.141635984 0.168245554 -0.0294214915 -0.159648478 0.112373084 -0.0489424169 -0.0889650211 -0.136385739 0.0504631549 -0.0346960463 0.0639858767 0.0742279962 -0.0959718451 0.125432774 0.0435161628 0.0543604121 0.12924619 -0.067039676 0.0832744464 -0.119794376 -0.0892888829 0.144032732 -0.13564758 0.0327132791 0.0262426939 0.0729919598 -0.0233827997 0.0430958606 0.103070885 -0.00284027657 -0.131751791 0.0218737386 -0.0669141933 0.0876880065 0.108192131 -0.00722055649 0.0314042829 0.00201363396 0.0693058148 -0.127397463 -0.11308068 0.095366247 0.0613252074 0.0767963976 0.109912105 -0.023631271 9.64457431e-05 0.0741448328 0.0674567968 0.0592180379 0.120218024 -0.138540611 0.186128601 0.0634339973 -0.066954501 0.123150513 0.00262892642 -0.156880677 0.0271030273 0.0389812775 0.163095251 0.0937159061 -0.119892217 -0.10029912 -0.0113538243 0.117122836 0.0958641991 0.0554464087 -0.0611852631 0.0309960768 -0.148533225 0.0168368462 -0.0148341283 0.0199822951 0.0840069354 -0.0250799228 0.000891973905 -0.110700309 -0.0532766916 0.0793971419 0.0984170437 -0.128924787 0.0291142873 -0.0578225479 0.0482807197 0.0368235111 -0.0756311119 0.056336727 0.168977603 0.00820702594 -0.0148615483 0.128323391 -0.0488858111 0.0030394888 0.0392629169 0.0217599515 -0.109780788 -0.124327026 -0.110027082 0.079080537 -0.0519953929 -0.0504782349 -0.00701974798 0.17692171 0.143427163 0.152141586 0.0693683028 -0.115196042 -0.0221416522 0.169055194 -0.0540919825 0.182789385 0.139691234 -0.074015893 0.108184151 -0.0935382247 -0.0598740615 0.0335229784 0.150850862 0.111152209 0.163470238 -0.111606114 -0.0112746516 0.00895981397 0.146160573 0.137397975 0.165998906 -0.0262579694 0.0310111959 0.0895001888 0.0290670171 0.148392752 0.10005831 0.0260470044 0.0666432157 0.14119634 0.145734191 0.140673295 0.0609008037 0.0760866255 -0.0274666939 -0.0857639909 -0.093561694 -0.0673863441 0.06305594 0.183098152 0.0362807289 0.179879576 0.187368259 0.0867011249 -0.00191641552 0.179400802 -0.178453162 0.0443644077 0.14893277 0.130691677 0.190664202 -0.00733991154 0.0375372507 0.139617547 0.0207137242 -0.0660620481 0.121428333 -0.027175935 0.0812105387 -0.0167020112 0.0997308716 0.0814295784 0.00100216595 0.0675137788 -0.0446306355 0.11655578 0.0790163651 -0.0364569351 0.043299146 0.0583344959 0.0861434639 0.0999846533 -0.101646118 0.0544962138 0.111912884 0.13398391 0.192645401 -0.0995487198 0.172990069 0.0454724953 -0.0182189811 -0.0639074966 -0.117950983 0.158961445 0.0991295949 0.00396099035 -0.120374672 0.168334991 -0.0206933524 0.0865943655 0.161322176 0.0555427149 -0.0746511817 -0.0254731867 0.148725659 0.114840917 -0.0996649787 0.0825758129 0.00449527614 0.162873149 0.14936614 0.156525835 0.0766895339 0.118420944 -0.0548048988 -0.167635486 0.0756825805 0.059926942 0.0492656752 0.0400654711 0.0896347836 0.0765077025 -0.0438671187 -0.146087736 0.117657624 -0.0255973134 -0.11745102 0.0930163413 -0.0821457729 -0.115750015 0.0327894762 0.120342232 -0.0219539329 0.190586492 0.0217166767 0.0146391429 0.0616531707 0.103957534 -0.0640848204 -0.0858041495 0.0310945753 -0.102986038 -0.0559266806 -0.000226511125 -0.140096694 -0.119943008 -0.111525618 -0.022964308 0.0660581961 -0.140670016 -0.0539666936 0.0656664073 -0.146068677 -0.0434579179 0.0442539938 0.049366042 0.138595164 0.215925127 0.127935782 0.00820590742 -0.0747593045 -0.0842378289 -0.0417899489 0.0216342304 0.0241500065 0.10414844 0.0635119677 0.109194174 -0.0664032325 -0.0841010138 0.0333074108 0.144626364 0.0835791081 0.0405562595 0.144938678 0.113412 -0.0413297117 -0.0116025591 0.116451755 -0.122508198 0.115656048 0.118746422 0.149316311 -0.0756765008 0.162036806 0.136063144 0.0960770398 0.0914931074 0.00234524277 0.141637772 0.0776848495 -0.103856735 0.0223964415 0.0540647469 0.154280543 0.0363733396 0.020892188 0.0519513749 0.00717404438 -0.0717171952 0.0605637506 0.123303227 -0.0711054057 -0.0547375344 0.0238987729 0.122411825 0.0981374756 -0.0796336755 0.181836978 -0.0139241079 0.0208457373 0.0578660555 0.115007117 -0.152290791 -0.116023742 0.125889778 -0.0744427964 0.173140392 -0.0230522808 0.0991717577 0.0317968801 0.107756197 0.164815009 -0.0174482651 0.0639693215 0.0523474552 0.0462639593 0.140508741 0.0506025292 -0.0438129827 0.0276643373 0.0874049738 0.138593227 -0.0969195291 -0.0404044203 -0.0224188063 0.108781926 -0.100896388 -0.0309308395 0.125772789 0.028097406 0.0634060204 0.0683069155 0.145577833 0.183691531 -0.0496028848 -0.0776938125 -0.060156174 0.0218149051 0.197596177 -0.0154853165 -0.0412122235 0.0939057693 -0.118166968 0.1283319 0.111681804 -0.143919662 0.00493258471 0.126385331 0.151842475 0.186866894 0.0665669069 -0.0903968439 -0.0313272439 -0.0247976556 -0.169458717 0.054224968 0.0968870521 0.139724314 0.0172182582 0.103909202 0.155110002 -0.0126233418 0.190851286 -0.0473378785 -0.106794864 0.0950474441 0.108153269 -0.00215253839 0.0927259997 0.0746136159 0.000158840179 0.151475027 0.112737246 0.0532431304 0.054269813 0.129630253 0.164428711 0.0523424074 0.0814517438 0.016212143 -0.0117008882 0.0680367425 -0.0788285807 -0.0789092779 0.172827527 0.119092286 -0.0780554339 0.148280904 -0.0357619487 0.0404880531 0.139303714 0.152678803 -0.0313055441 -0.0433851704 0.145161822 -0.157154232 0.0209975056 -0.0296302848 0.020536093 0.0674732029 -0.0705216452 -0.0414924286 0.057908535 -0.169467628 -0.0330408588 0.182830229 0.0190448835 0.13370271 -0.0713856667 -0.0501033887 0.0232970063 -0.0963442922 0.0447021201 -0.118378267 0.189147756 -0.0498844683 0.0468240269 0.00958443806 -0.109032102 0.108971842 0.00711469864 0.00700109964 0.112477995 -0.138462275 -0.114364117 -0.0077861608 -0.143087372 -0.0425114706 -0.137003303 0.00309556606 -0.056427639 -0.084226869 -0.0175813958 -0.120090812 -0.131012186 -0.136584803 -0.0965648219 -0.0900525004 0.0984451473 -0.0295922905 -0.0215709601 -0.157052785 -0.0698363632 0.128503025 0.0812449306 0.044423122 -0.0622849166 0.126199692 -0.0240686592 0.110058717 0.0588081293 -0.0629438162 0.0803009868 0.0551974401 0.00367958308 0.0953964517 0.096288465 0.0765576512 0.134714395 0.128183305 -0.0794131979 0.158218175 0.0334427804 0.103117377 0.118011713 -0.0353304408 0.0812821984 -0.0924249962 -0.0943661332 0.0499824919 0.172689974 -0.015012878 -0.00591862109 0.000791038619 0.0393676013 -0.087351352 -0.00550199067 -0.131391063 0.110083923 -0.0942325443 0.151406854 -0.0441909246 0.125668615 0.113547325 0.104156129 0.0539508313 0.04261766 0.0514821857 0.120158657 0.157996073 0.00543851778 0.145554841 -0.141709834 -0.0490901694 -0.0447609834 -0.0976633877 -0.0905513093 0.142786831 0.134974882 -0.0384936519 0.124396443 0.0711318851 0.0364370346 0.0655808076 -0.111105889 0.177323133 0.0175751373 -0.00530883716 0.0142328804 0.0944742784 -0.104841068 0.0137548354 0.072433494 0.0684480369 -0.0881074294 -0.0346771851 0.153584346 0.127561867 -0.036190562 0.0799537897 -0.0859517306 -0.0208766013 0.0334149264 0.0264191292 0.0374499112 0.142725974 0.0713385791 -0.0855041817 -0.0318115205 -0.070518069 0.0307250991 0.13150534 -0.072534509 -0.104207613 0.189150855 0.0493283272 -0.0275112167 0.166747719 -0.0543703809 0.0186180789 0.165644959 -0.0484347753 -0.101127744 0.1094006 0.185057849 -0.0443641394 -0.144589871 -0.0198352784 0.0630682558 0.152583927 0.0218491945 -0.122168265 0.0245404653 0.0799318552 0.0951262489 0.122852422 0.0709591582 0.147164628 0.0161824599 0.137200028 0.0640827939 -0.0755483732 0.0245481338 -0.0100152371 0.14329806 0.0801420659 0.0691645741 -0.0589840487 0.130164921 0.00311033521 0.0268984325 -0.0349549092 -0.00415431196 0.0867199227 -0.0820708722 0.164873272 -0.169460997 -0.0531368032 0.152747095 -0.165769219 -0.0328237601 0.208387405 0.047868643 -0.137711033 0.091696687 0.0407248922 -0.0470836647 0.145012021 0.0215788931 -0.0970180035 -0.0877238438 0.172740042 -0.0380049273 0.0389408693 -0.0466512367 -0.0354783237 0.0639048144 -0.0723189265 0.104144089 0.0859282464 0.13152495 -0.128844962 -0.0214285571 0.115861677 -0.0982220173 -0.0596799552 -0.146179408 0.0965073407 -0.0615618378 0.0918795392 -0.0798900872 -0.160657704 -0.126372367 -0.0579259321 0.0660004467 0.161658168 0.104445606 -0.0138723087 -0.0934201628 -0.0294323321 0.00640385225 -0.0297847576 0.00359103805 0.0525301509 0.043393068 0.157615259 0.180452317 0.194085628 0.0691384748 0.134813935 -0.0515557639 -0.0708269849 0.088720344 0.0964737907 0.155151665 -0.0886232555 0.0660808086 -0.0829776451 0.101593263 -0.0553871915 -0.0182833746 -0.0800305083 0.178971991 -0.118555464 0.000954513147 0.00644796155 -0.0530595593 0.0575908013 -0.118949205 0.0164483711 -0.0739891082 0.157318026 0.0651573166 -0.0650493428 -0.0733206868 -0.0576313995 0.164077073 0.145226628 0.162652925 0.0094029773 -0.0282768738 0.00458042137 -0.0341153368 0.0680479035 0.00607198244 -0.00773193361 0.0329170078 0.0389629118 -0.162258938 0.0433447473 0.04725869 0.0874198675 0.0951827168 0.171968609 -0.0408742838 0.0696654394 0.0726477876 -0.0282292832 0.13275665 -0.0873321742 0.118847266 -0.094043538 -0.00783028547 -0.110805973 0.0371340886 0.011893562 0.0594444014 -0.0330062397 0.103927121 0.0566597134 0.070883058 -0.0379756056 -0.0413067117 -0.0425783545 0.111014336 0.0684353039 0.0692486465 0.0570905209 -0.0403401703 -0.0371561572 0.0209832303 0.115475081 -0.0771651715 0.1641756 0.132279456 0.11953865 0.152888119 0.161066189 0.0236473735 0.00623266771 0.0192979313 -0.0633362159 -0.0164176226 0.112811953 -0.0552775189 0.104069315 -0.00800814759 0.142864808 -0.119347326 0.0983854905 0.0419207662 0.172624946 0.13796024 0.14337796 -0.0860052034 0.0162426792 0.189442113 0.152368501 -0.0791355148 0.191711664 -0.045804102 0.0885845646 -0.0440708026 0.0840106755 0.142962575 -0.109646082 0.00183130568 -0.0525661372 -0.063833341 0.135730505 -0.0724909231 -0.0305184722 -0.0249376651 0.121382438 -0.053534802 0.0984386578 -0.0753171369 0.0939152837 0.00636771461 -0.049575828 -0.0524887219 0.168064952 -0.160115361 0.164482102 0.0465373471 -0.0193462316 0.189078987 -0.0162147954 0.0448620357 -0.0965996385 0.029319942 -0.00714721577 0.113784157 0.0306320339 -0.162794113 0.036773555 0.132374078 0.127610669 0.050170999 -0.0961276665 0.119895853 0.139223352 0.0842405856 -0.0275717005 -0.048406817 0.0329983979 0.0270373188 0.0343525745 0.143185422 0.126393601 0.117207043 -0.111426808 0.00335491286 -0.0176657494 0.00746619329 0.0158684719 0.0560003184 0.162911817 0.169606015 -0.00238073198 0.110828638 0.170307085 -0.00346783875 0.0999374315 0.120749101 -0.0795692578 0.00236885715 -0.0515253469 0.150424793 0.105336741 0.0109604793 0.173142359 -0.0780003294 0.0945810005 -0.00646437472 0.180615485 0.165021613 0.0602739379 0.187165871 0.0765264705 0.127765179 -0.176980063 0.0931098312 -0.0693295747 0.00362776732 0.0865088329 -0.122369155 -0.0243129283 0.0619697198 -0.104171418 -0.0679299384 -0.0262254607 0.0717521831 0.177063763 -0.0804491788 0.0471047014 0.130668938 0.0129235433 -0.044484172 -0.110434927 -0.0484529473 0.056467887 0.161255106 0.0596067756 0.00202068407 -0.0361751877 -0.0691773742 0.13520807 -0.076868318 -0.00055724883 0.136181444 -0.144539505 0.0790223703 0.104204692 0.113801822 0.079020001 -0.00296835252 -0.0561508648 -0.0639912412 -0.096902214 0.063912116 0.0158059336 0.0301315952 -0.0454157777 0.0474643707 -0.0998102129 0.0738191977 0.104636416 -0.0670538545 -0.0157381035 0.0721107796 0.145137876 -0.0157069545 0.0764504448 -0.102792904 -0.0242816862 -0.0148483599 0.164760023 0.0949771851 0.0556184649 0.159742668 0.11568401 0.133465067 0.0253654663 0.0464388952 0.1051047 0.168289691 0.0721212029 0.0222112965 0.177081063 0.0332994349 -0.0857983083 0.168268591 0.0322159566 0.109546766 -0.0359725878 0.173063204 0.114196211 -0.0472102724 -0.0865413472 0.0990323052 0.106711067 0.124012247 -0.0643537641 -0.0489022098 0.0309291538 -0.139787465 0.154954955 0.158387884 0.175662607 0.0474990308 -0.0359640867 0.0122991987 -0.0742847919 -0.116437078 -0.022644069 0.0906722546 0.109451734 -0.0519334488 0.0178270023 0.166177243 -0.0722740144 -0.00594325503 -0.054272633 -0.142582893 0.0954159126 -0.00533125736 0.0493725352 -0.146273687 -0.00553551223 0.126585066 0.078031756 0.0408783071 0.0403124169 0.0905888006 -0.0999932885 0.0324288867 -0.048167184 0.048798237 0.0910838693 0.010453077 0.0368673541 0.126243964 -0.10025917 0.119546175 0.0917273164 0.00480829086 0.150867537 0.0249657575 0.0332049243 0.0880667567 -0.0586667955 0.072079584 0.175612509 0.0517335869 0.0897404104 0.0503287949 -0.133238509 0.0293708127 -0.0389682427 -0.14583306 -0.0541342646 -0.145008922 -0.00405186322 -0.0991181433 0.149902388 -0.079027079 0.13217856 0.152729511 -0.00680424459 0.0569330305 0.0193487108 -0.0548162982 0.0513189137 -0.0619835034 0.0174638182 -0.102416456 -0.0279327556 -0.129593804 -0.0595460832 -0.0616615489 -0.0346394479 0.108837441 0.0645946115 -0.11461664 -0.00987040997 -0.0194515288 -0.0440613478 -0.0762307048 0.113881603 0.0807152838 -0.0215207562 0.053498432 0.00671930611 -0.0759949684 0.078516528 -0.129376277 0.00994156301 -0.0961488858 -0.0222117975 -0.0067448318 0.109046414 -0.118271597 -0.0475344136 -0.00401996076 -0.0324587896 0.111761943 0.0669000298 -0.140469015 0.0275926143 -0.0115185082 -0.111927435 -0.131411597 0.0218255222 0.0812726617 -0.0837273151 0.0129608214 0.133702025 0.0800562501 -0.0325852484 0.0149642564 -0.0189071596 -0.173735201 0.0099428352 -0.00971476547 0.0206104293 -0.113343984 -0.107871518 -0.120784573 -0.0340123661 0.12200997 0.0924243927 0.134061486 0.00931480248 0.00610988587 -0.143206105 -0.013435632 0.157742649 0.0986237824 0.031523902 -0.109645322 0.101808242 -0.0647404119 -0.0524963662 0.0176734496 0.0557880327 0.108553298 -0.0902532712 -0.130619377 0.0632717982 0.104041591 0.0543672703 0.0434634201 0.147163749 -0.114743508 0.158163086 0.147472963 -0.0521723554 -0.100233488 -0.0301290527 0.022240812 -0.0719027072 -0.0280963797 0.0667710602 0.00854949374 -0.0772623569 0.0685823038 0.00593935698 -0.0318717696 0.0944193527 0.0372171178 0.0526549183 -0.101998597 -0.0764912069 -0.118265085 -0.0155763114 0.0363124497 0.0786181912 0.0214089006 0.00902846642 -0.0222423617 -0.0114359492 0.016868338 0.0275472291 -0.0955874622 0.068063274 0.0991849825 -0.129994661 -0.14134939 -0.0891924128 0.0620854646 -0.147031859 -0.0616799332 0.125663459 0.0466651432 0.049083516 0.0202748105 -0.0635501817 -0.0811214596 0.0342678167 -0.144643277 -0.0279157292 -0.0520310104 -0.0900295675 -0.0991411358 -0.00983341318 0.0775161907 -0.108855121 0.0795320719 0.0555958673 0.047831919 0.116747767 -0.0178907923 0.00232720398 0.0800028816 0.0948506668 0.109556422 -0.139458165 -0.0708387718 -0.0218770187 0.0945127904 -0.15837191 -0.0499448627 -0.148272514 -0.0720821992 0.0286393929 0.166063771 -0.0910914093 0.0242926553 -0.12178494 0.0714246258 0.0331623964 0.141669735 -0.0356313661 -0.113949567 -0.107093729 0.0499410294 0.178825215 0.0778585151 -0.0158302784 -0.104186572 0.0341806933 0.00881133415 -0.0617542751 -0.136974439 0.141358063 0.00945444964 -0.0606168583 -0.0930097848 0.191351295 -0.0328337252 0.0643470958 -0.0714427084 -0.0224459022 -0.0216223132 0.0666145608 0.158240885 0.197071999 -0.00105335366 -0.130007252 -0.105515987 0.138388366 -0.137579709 0.114597313 0.189100042 0.142671525 0.17301853 -0.134293392 -0.0512899421 0.0793258399 -0.0749241337 -0.0476800092 -0.0704626963 0.188314125 -0.0592299625 0.13017118 -0.101025827 -0.0467180312 0.0082515683 -0.112287328 -0.062346559 0.013593995 0.0616131909 -0.078616567 0.0776763111 0.0905192047 0.0462051481 0.161554873 0.105334468 0.13517189 -0.115146726 -0.143860593 0.0162272323 -0.0732620955 -0.0247567333 0.0928674936 0.115267269 0.0816683248 0.00596335484 0.0484995171 0.0975567997 0.055007495 0.194354102 -0.00516810175 -0.00678860582 0.0775851458 -0.0466337353 0.106064767 0.143327415 0.0536226183 0.0567489788 0.205744937 0.0850102827 0.066885747 0.0559313521 -0.0664127171 0.157319784 0.0911302492 0.168385208 -0.0391344093 -0.0564815253 0.098924838 0.0328915305 -0.0534631759 0.0365355276 -0.0136043811 0.0482157357 -0.00801647455 -0.0435665064 -0.0428646132 -0.0644146577 -0.049035199 -0.0692589357 -0.113157302 0.0250543877 -0.0696002543 -0.0740747377 -0.0703184903 -0.00694498792 -0.0684268475 0.149164468 0.153054193 0.0761677772 -0.0204661116 0.12485972 -0.126783043 -0.00187381369 0.0541003644 0.107983328 -0.0837595835 -0.0870729461 0.0846110657 -0.0927637219 0.0266203284 -0.00495085446 0.0371818319 0.148703456 0.0929978117 0.00649248715 -0.052705083 -0.00607873127 -0.114258632 0.0115164211 -0.0972977728 0.084398061 -0.103781044 0.042367924 -0.0428892151 0.0323127471 -0.00776143046 -0.0386278778 -0.0703245178 -0.0709099472 0.16123727 -0.0346258432 0.0354511216 0.0405629389 -0.0782804564 -0.038936574 0.0836522579 0.176592737 -0.00653237011 0.0523184314 0.0602646545 0.0936931893 0.161708683 -0.0552729927 0.113780089 0.150944054 -0.0067132581 0.033431489 0.134626687 -0.0877803564 0.0627585277 0.159808293 0.0874273032 -0.0712592527 -0.0547058992 -0.0712833256 -0.0840452388 0.164110661 0.0463254526 0.11971055 -0.0787557662 0.0414926298 -0.0132505866 0.143975362 -0.0445205495 0.0115820579 0.0280404091 -0.0880124941 0.0269049294 -0.151099011 -0.0708077624 0.1746151 -0.0408792794 0.170332685 0.147516906 0.16913189 -0.0805162191 -0.0267642699 0.00414879527 -0.138525307 0.00516474945 -0.0246851854 -0.0470002219 -0.0861195773 -0.100950107 0.0405560918 -0.0432230942 0.104467802 -0.0955900922 -0.0287470184 0.13435027 -0.0651793703 -0.0681400895 -0.105308339 0.105755769 -0.130883738 0.0873673931 -0.0740140676 0.0563389994 0.0325795859 -0.0656638816 0.0513560064 -0.0224221945 -0.117003471 0.0642713904 0.0981090814 -0.0234821606 0.00352106593 0.0429167375 0.0492503606 -0.0412014462 0.00425704801 -0.0755375251 0.0191665534 0.001770781 -0.110787489 -0.1348757 0.106690206 0.00657321559 -0.0908673182 -0.108731627 0.0566626191 0.0443638451 -0.0703660399 0.0542139225 -0.00493319333 0.12348906 0.00126835168 -0.00761680584 -0.0449741632 0.128203496 0.0697475076 0.156427085 0.111904904 0.0363090038 -0.069633007 -0.124383852 0.0528828725 -0.0260626376 0.146264195 0.202404305 0.123526029 0.113593549 0.063587226 0.115930393 -0.0480900295 0.0870323107 -0.072083123 -0.0411015637 0.1239696 -0.0247502401 0.101827934 0.0338473544 0.179488152 0.0185336322 -0.0024687883 0.193507329 0.0594470054 -0.122646861 0.0367143154 0.0131789902 -0.00824107043 -0.0438461341 0.0322048962 -0.0372958966 -0.0309159439 0.112236492 0.104765609 0.0508270562 -0.134375989 0.0461360626 0.161751613 -0.0832926556 0.0294436906 -0.0715111569 0.151263878 -0.165225923 0.0409720019 -0.0970856622 0.118698254 -0.0376353674 0.0590884909 0.172025725 -0.0388665274 0.0342746116 0.0503573269 -0.0705714077 0.143638507 0.0997425094 -0.0244571362 0.126339301 0.00611212337 -0.0767538771 0.135550186 -0.0817491114 0.0428956598 0.053690739 0.0867463723 -0.120110415 0.092980817 0.0624419227 0.171113074 0.071235843 0.0251063108 0.147427261 0.0600847751 0.147185415 0.0782428235 -0.0334974229 0.140428677 -0.0508178994 -0.0417781211 0.096801661 -0.0446150079 -0.0366388001 -0.0328694917 0.111436456 0.0238367319 0.0866102576 -0.151921302 -0.0687064454 -0.104527332 -0.0853670314 -0.000437619659 0.126783535 0.0569791384 -0.0517665707 -tensor_10weight 2500 --0.0212358683 0.122197703 -0.0510823093 -0.0501324013 -0.0548678078 0.0356177911 -0.00784289744 0.133274257 -0.113695405 -0.0432012156 0.0939747244 0.0988038033 -0.0639443696 0.0290857553 0.0895108432 -0.0702648386 -0.103292271 -0.0736945942 -0.00376112177 0.0998725593 0.104668014 -0.105342008 0.0656864345 -0.114149243 -0.00571362674 0.0216598436 -0.0114243887 -0.0504532829 0.0596787184 0.00372021808 -0.15211682 -0.0136099635 -0.0886892602 0.0599286295 0.111488119 -0.0207552537 0.0917330608 -0.0306493416 -0.0526035354 -0.0849622265 0.101938419 0.0732950419 -0.117307015 0.0721970722 -0.0825721473 0.0530262738 -0.0570005514 -0.14794296 -0.125646636 0.0978401229 -0.0572635084 -0.0405422673 -0.0372838974 -0.0117440075 -0.0372347534 0.05405204 -0.125367016 0.00642648339 0.0431452096 -0.0818922222 0.0333031267 0.0492147207 -0.108358391 0.0211769491 0.0296456665 -0.115399122 0.0998793691 0.0635934472 -0.0597816631 0.135748908 -0.0291152298 -0.0674216747 -0.0520641059 0.13197051 0.112524465 0.0249439776 0.0945808142 -0.104327582 0.131835803 -0.0558281392 -0.104002684 -0.0600294694 -0.0934771448 0.035828352 -0.00607830286 -0.0175107867 -0.126915321 0.10744977 0.0146200657 0.080092743 0.0436066091 -0.0399526656 0.00826710463 0.0102796853 -0.138014555 0.088743791 -0.0517612249 -0.103482887 -0.0803165734 -0.126166776 0.0944030806 0.117681094 0.109592296 -0.0782303661 -0.012482455 0.0995225459 -0.139412105 -0.085186258 0.148377135 0.150794506 0.110053003 0.0709926337 -0.000629723771 0.013662681 0.0823172182 -0.0580728464 0.0406894386 0.0878868401 -0.0696239024 0.180716202 -0.0156155387 0.102927946 -0.0455422476 -0.0267394036 -0.105474576 -0.0847397819 0.0854922086 0.0470506549 0.0965595171 0.127182499 0.137551412 -0.0368003063 -0.0720289052 0.0134783518 0.0268878676 0.0088609159 -0.0511660501 -0.0823307037 0.0519733205 -0.0623468719 0.0798326582 0.0710632354 -0.00685403682 -0.137981296 0.011066772 -0.0105396928 -0.0804577619 0.111978434 0.125334308 0.00390364812 0.0692017078 -0.0330482721 0.163387418 0.0201984197 0.151748836 -0.0597846881 0.0121450806 0.0635938272 0.143580437 0.120768994 -0.0914445743 -0.0753249824 -0.0758429915 -0.0557716116 -0.0532640293 0.0682220832 -0.0222889148 -0.0228032283 -0.0421909615 0.0669203699 0.0878867134 -0.10618075 0.0309686121 0.0602300242 -0.115028903 -0.104819998 0.0828765184 -0.0887905657 -0.128947496 0.0665918365 -0.0184224322 -0.0902371928 0.0826666802 -0.118614137 -0.0974627435 -0.126331478 0.121555597 0.0860794485 0.087571308 0.10830126 0.000543156988 0.0120290369 0.119321413 -0.0516519211 0.0992750004 -0.0404139012 -6.53001553e-05 0.0228800084 0.142584652 -0.112774611 -0.0440897308 -0.101826452 -0.0968946069 0.110168286 0.0165652726 -0.0190366097 -0.00230144663 0.165133551 0.000178731847 0.163377017 0.0187537577 -0.111495733 0.00262274873 0.0370500833 0.111238286 0.0467720181 -0.112312317 0.0970760286 -0.00430823164 -0.18515943 0.0749811605 -0.108619854 -0.115121402 -0.127359807 -0.139737591 0.144899085 0.125063911 -0.0695543438 -0.0871604905 0.0424468778 -0.0412323475 -0.023522187 0.0755847916 -0.00548974751 0.12203481 -0.135008246 0.133453161 -0.0179671869 -0.0897851288 0.0138236731 0.0413174592 -0.130779296 0.0947689861 0.145704255 0.0761682168 0.0180642232 -0.0403898954 0.0231717415 0.0988920107 -0.116821639 -0.000677700795 -0.0828759521 -0.0409976803 -0.0652928352 -0.00307619479 -0.0512737185 0.0509406962 0.164199054 0.0935533643 0.0614940412 0.0182006471 -0.136083275 -0.036729455 -0.0655212551 -0.000610545278 0.110906526 -0.0215685032 0.0942183807 0.091754362 0.0299259573 -0.0927302539 -0.0870193392 0.041432485 -0.0959858447 -0.0521472655 0.133616418 0.00140008167 0.0231243372 -0.0265129507 -0.0465798788 -0.0466384888 -0.115962021 -0.0177416876 0.0583319366 0.0504825823 0.0262723565 -0.00666236039 0.0547430068 0.176730543 0.0766595826 0.0228095483 -0.0677164495 0.142202839 0.0357140489 0.152742878 -0.138932645 -0.0411144495 0.046292562 -0.0618947372 0.129624233 0.0132857962 0.13160333 0.0381516591 -0.00642365264 0.146867096 0.172721684 0.0822038203 -0.170568198 0.104037531 0.0469250493 0.149638385 0.0287801176 -0.105733179 -0.060603328 0.104576632 -0.109430753 -0.0709009469 0.142124668 -0.0206337441 -0.0685040206 -0.00922098476 0.154050708 0.140051425 0.0351035632 -0.049322959 -0.133136809 -0.00690346071 -0.126874417 0.0869078487 0.163344264 0.177283853 0.0478345305 0.0679927543 -0.122830227 -0.0329710469 -0.138243169 -0.12043523 0.018810302 0.0234610289 0.085149698 0.122265451 0.0947110653 0.0231434219 -0.135353088 0.102692701 0.0495786704 0.0994817838 -0.00882655289 0.0960466415 0.139429167 -0.00701248366 -0.0530242138 0.13024801 -0.0270533189 0.0678792298 -0.0942333192 -0.0897237882 -0.116916768 0.128777713 -0.0864267498 0.000630012888 0.0271891207 0.0435388982 0.0202370584 -0.101047307 0.0206810199 0.17194964 0.148503706 0.0857690498 0.161830828 -0.135013863 0.118901089 -0.0623394176 0.144353598 -0.0805446953 -0.166448697 0.0953875184 -0.0034776032 -0.108664006 0.0685736537 -0.0221380815 -0.0657745823 0.00281999423 -0.107203327 -0.0475207977 -0.055595018 -0.110597998 -0.000231489539 -0.0365380459 -0.0790299848 -0.0336767174 0.00941203535 0.0301933419 -0.0706446469 0.0102126878 -0.0478211716 0.0370407067 0.106186956 -0.0224919319 -0.0260020383 0.0235891566 0.0859296694 0.108447783 0.00656368863 -0.0794644728 -0.075159736 -0.110706359 -0.0758301392 0.0576422177 -0.0484935977 0.0564662404 0.137293592 -0.1790566 -0.00773862004 -0.0347555578 0.0215124693 0.0171802938 0.176061988 0.0460711457 0.147571266 -0.102327831 0.195341617 0.058222834 0.13799569 0.118976817 0.0509692021 0.050737951 -0.0811304599 -0.0793619528 0.0404427722 0.166059211 0.0436591581 0.0677934214 -0.103122085 -0.0477355011 0.0969016552 0.0731616244 -0.0612649173 0.0483927317 0.101544008 -0.0105341347 0.0251087993 -0.0852457061 0.0189878102 -0.00559592852 -0.0792983919 0.0710483044 0.0725165606 0.0998317599 -0.00865345914 0.0574824326 0.00925513823 -0.124299236 -0.152690053 -0.0688084438 0.0785533562 0.0506814644 0.147026345 -0.020497581 0.0864086375 0.0827063099 0.0358608812 -0.0127497688 -0.051433742 -0.03434515 -0.128280848 0.0762891397 0.0711952001 0.0842626169 0.116974868 -0.13133204 -0.0335378908 0.0899138004 -0.133157939 -0.120754138 0.122247837 0.144659519 0.0242889076 0.0994777754 0.06880299 -0.0276972707 -0.113097489 -0.0623187982 0.0822641999 -0.119825244 -0.103717044 -0.0319737606 -0.135691062 0.152195513 -0.0339170918 0.14714168 0.0540374666 0.133274242 -0.0455024727 0.113066867 -0.0257611051 0.0637556389 0.0359611772 -0.0776446313 0.0980009288 0.00792387128 -0.113957405 -0.0919724554 0.0144144921 0.138423935 0.073610343 -0.143383607 0.0898004025 -0.0354972184 0.0135471914 -0.157295063 0.0106644779 0.0293142907 -0.00285607576 -0.0387508944 0.0805381238 0.134696633 -0.0486889333 0.115237691 -0.114024878 0.0478819907 -0.204706341 0.0640740022 0.0566777252 0.127831176 -0.0553595684 0.111671567 -0.09231098 0.0911259577 0.000540402718 0.048529759 0.0324980132 0.131618008 -0.0291069895 -0.117312111 0.119812474 0.0617211722 -0.0828384385 0.103557624 -0.102094062 -0.165967241 -0.0316339955 0.114424214 0.0979985967 0.106774256 0.10326688 -0.0260687321 0.0577113181 0.107890628 -0.207330927 0.13924247 -0.108645178 -0.006839226 -0.0576377921 0.086189121 -0.0478715226 -0.0539538078 0.0736430809 -0.113258503 -0.144859955 0.00832846761 -0.139867589 -0.0278512705 0.111798391 -0.128404155 0.0160521064 0.0621018 -0.0222936384 0.067758739 0.151578188 0.0899427235 -0.0596541949 -0.107079484 0.0257597771 0.10411863 -0.0212498736 -0.106785044 0.102427348 -0.037666291 -0.0371498428 0.12570031 0.0917552412 -0.0337271765 -0.00665520132 0.0846108422 -0.137335181 -0.0166137852 0.0288310051 -0.0332151465 0.129317015 0.00994049478 0.0253879577 0.0699467286 0.0467383862 0.00212879106 0.0807025656 0.104752108 -0.0590361021 0.0355920568 0.144212484 -0.0798209384 -0.119622223 0.129336998 0.144326404 0.140256554 0.130315661 -0.0842513517 0.0202817731 0.0428673401 0.0224014688 0.0633894131 -0.121904492 0.0456793755 -0.119502507 0.0149252117 0.0991675705 -0.00801187102 0.0127708912 0.146813497 0.0297204014 0.0190939084 -0.0910222307 0.0188052319 0.158833399 0.0565757714 0.106845409 0.126346767 0.0609186888 -0.0289904978 0.176135257 -0.0152246384 0.00210902141 0.170274019 0.147236124 0.100339673 -0.0750769973 0.126910731 0.0639681518 -0.141766325 0.00575648621 0.00252318289 -0.132266387 0.0917782336 0.165258139 0.159399614 0.110745199 -0.0729087219 0.00930848531 0.129214033 -0.0562379323 0.114632159 0.0686002523 -0.0798907951 -0.0475209691 0.135875911 0.0170198567 -0.116139926 0.00134182745 0.119676389 0.00874867849 0.044542592 0.0469349325 -0.171083689 0.137197331 0.0741593838 -0.0608005896 0.142061442 -0.012179587 0.0524498336 -0.0355517454 -0.093820259 -0.116283298 0.0172735397 0.112878129 -0.133789182 -0.0446203165 -0.056491144 0.0793790519 -0.0914917514 0.102299117 -0.0138002355 0.162907958 -0.0748615712 0.138051897 0.106378302 0.00609137118 0.122283094 0.139686123 0.0373347066 0.0692594871 -0.09532848 -0.0478848442 0.125734076 0.0966270939 0.079935506 -0.010005963 -0.062777698 0.0338496156 0.0195008758 0.0981275588 0.161850542 -0.0924032331 0.132830709 -0.061219655 -0.112690888 0.121970147 -0.0678780898 -0.0108335214 -0.026362868 -0.0930077806 0.0707007274 0.0775098354 0.0764014944 -0.0804891065 -0.113752075 0.00710404944 0.0588292368 -0.0711446628 0.0660018176 -0.0639827028 -0.0958132148 -0.123145066 -0.0116624041 -0.0329767279 0.0308814552 -0.138171315 -0.126469448 -0.0836871266 -0.0362357274 0.0118497657 0.0553193018 0.162871584 0.00862812717 -0.0775254369 -0.0664473996 -0.0720814988 0.0207482781 -0.000734820962 -0.0429652929 0.156263977 0.0950327292 -0.131239817 0.0131368376 0.0467994325 0.152589336 0.0339371823 0.0687561333 0.0528964065 0.165966034 -0.124363385 -0.0527783372 -0.0752571672 0.0272485688 0.19544439 -0.0526332743 0.11452125 0.06950555 0.0388930514 -0.105637603 -0.031256184 -0.143096924 -0.131879777 0.0679151788 0.0898881108 0.138390079 0.0468003303 0.00110050617 -0.166997537 0.148328051 -0.135715783 -0.0184072368 -0.0204313342 0.0777179599 0.0854007453 0.0669743344 0.0512876213 0.0736838058 -0.0702814385 -0.153760359 -0.0127334371 0.0702296048 0.0945134461 0.113579147 -0.045809608 -0.0650008023 0.0018505156 0.0389033966 -0.0164941698 0.0427322201 0.0889017582 0.0659029856 -0.0811767578 0.0873623267 0.18029575 0.16048792 -0.112073712 -0.134867206 0.0882760212 -0.102488875 -0.175802514 -0.0120754875 0.0511610173 -0.0656074211 0.0839222074 0.132837757 0.19432193 0.0962905958 0.11927399 -0.0800096765 0.0369717441 0.172430858 -0.124649733 -0.0634947792 -0.0230292752 0.0867050886 -0.0332086422 0.0130523248 0.1857972 -0.0377311036 -0.0985669419 -0.0854148418 0.0455307364 0.11115057 0.0544578135 0.0360678583 0.0398137569 0.0514812209 0.0629364699 0.157751113 -0.0630130768 -0.0467872992 0.104552008 -0.00756954914 0.128961414 0.152928904 0.0538875759 -0.0584964044 0.000610977411 0.106909499 0.167631388 0.0725584775 0.129740968 -0.0139085511 0.102276772 0.0839342475 0.0176016726 0.0198159665 -0.0576478094 -0.0319991671 -0.0245914981 -0.0934077576 -0.00444747973 0.0162859596 -0.12718007 0.137412518 -0.0712438971 0.0353779495 -0.00824063458 -0.0929021388 -0.0211561657 0.00593618024 0.0581243485 -0.0115015805 -0.117641151 0.0745487809 0.100706778 0.101716518 0.180298716 0.0615622588 0.190501958 0.0251207165 0.161565259 0.0331474617 -0.049629122 -0.0350433774 -0.177207738 -0.0606218576 -0.135168135 -0.0552285984 0.0633888692 0.169191226 0.181376621 0.123600326 0.0766487271 0.0379917473 0.0728779733 -0.0138807297 -0.126598462 0.0824816525 -0.0179684516 0.0607266538 0.127557591 0.130710021 -0.0497126617 0.097561419 0.0444984324 0.00906473491 -0.107714869 0.109389283 0.0244740434 0.0977818221 0.0717759356 0.146900296 0.0456320979 0.0432121679 -0.068095766 0.035988193 0.16688697 0.015179188 0.193963483 0.030294802 0.0684091449 -0.154407337 -0.0253650546 -0.1035157 -0.123490132 0.160163686 0.000832796795 0.0173495747 -0.103751779 -0.114844963 -0.0266452879 -0.10764344 0.057379473 -0.0908356607 -0.0109218499 0.0242156517 0.107376277 0.00434230454 -0.0106718605 0.126825973 -0.076470606 0.082616128 0.043287307 -0.0409609638 -0.0944999158 -0.102769725 0.0562386662 0.110875674 0.075079143 -0.0874663591 -0.087224707 -0.0344407968 -0.032368429 0.139833078 0.138399825 0.0838530734 0.145841986 -0.0942437425 0.0514989197 0.106409363 -0.105719045 -0.0327457897 0.0023922089 0.0723857582 0.115679517 0.0641390905 -0.0389708839 0.064969562 -0.11146944 -0.0840641856 -0.00882211328 -0.0985186249 -0.0100410283 -0.140838861 -0.0820496976 0.126737103 0.0637906492 0.185262144 0.0401138254 -0.0199363641 0.0733033046 0.0741309002 -0.0405171663 0.119358622 0.131731167 0.00470401347 -0.0874768347 -0.0030607495 0.154426917 0.142330453 0.0776753575 -0.0136618298 -0.0559839308 0.117518134 0.0162534118 0.105629325 0.0662130266 -0.0396728814 -0.0103532532 -0.0827013478 -0.0040447861 0.113319181 -0.0071705617 0.118163139 0.0378845818 0.0424246453 0.148900077 0.0901416466 -0.131045297 -0.0908931792 -0.00706362771 0.118041806 0.0408021808 0.0160984807 0.114142895 -0.0700615197 -0.0494136475 -0.0595068522 -0.0467700139 -0.169063121 0.0931548029 -0.0394937769 0.0162656307 0.10976477 -0.124994159 0.152687699 -0.161219954 0.0349471532 0.00292883557 0.0899900794 0.0686580092 -0.0421929248 -0.0581205003 0.015063826 -0.0568712726 -0.147363365 0.0534492135 -0.0798066407 -0.0166791826 0.0615924746 0.0440535769 0.157255575 0.0443781093 0.107450068 0.0306068957 0.111347824 -0.0176973268 0.0151626179 0.083994858 0.00934020989 -0.0776927471 0.142738372 -0.0590152331 -0.0490930192 -0.103396282 -0.113522559 0.0619038753 0.14320752 -0.162481412 0.0684234798 0.019900918 0.0254009217 0.104212388 -0.034442611 -0.0442203879 -0.0163948387 0.00209845603 0.00254264474 -0.0859125108 0.0658564866 0.0487911701 -0.10677994 -0.0656869113 -0.0264248922 -0.102019623 -0.102298513 -0.138458312 -0.119788498 0.0699746907 -0.127610922 -0.029055886 0.0234031677 -0.0221559554 -0.0785237625 -0.079463318 -0.00504159927 -0.0934635699 -0.128190622 0.116880015 -0.131823063 -0.0321453363 0.00674818456 -0.118852653 0.0121976882 -0.124695078 -0.0894826874 -0.0633899048 0.0750036389 -0.0270189941 0.0216782093 0.119181171 0.139441684 -0.0148586482 0.0138805658 0.0725070536 0.0154770464 -0.126432493 -0.0768562183 -0.0687850416 -0.0399501249 -0.0991529524 -0.0160450432 -0.0420119353 -0.0326361097 -0.0698527694 0.0993200317 0.205627039 0.177876145 0.158109769 -0.0378685482 -0.0387157574 0.0318316072 0.0931719393 -0.0390335843 -0.093457289 0.158116326 -0.0205905512 -0.0280272551 0.0694964528 -0.00315693673 0.100278348 -0.115536571 0.134927243 -0.0945299864 0.157936096 0.0946900696 0.0962090343 -0.0132327564 0.0670364797 0.112801351 0.0329531841 0.00913766772 -0.157800838 -0.0242684614 -0.062707752 -0.111074157 -0.0969263613 -0.0453660265 0.0788833573 -0.10100504 0.0351482034 0.00255969632 0.146189213 0.176710948 0.076782994 0.0267225392 -0.0470640622 0.0270443261 -0.0651312843 -0.0475523248 -0.0839515477 -0.0822535306 0.0876104087 -0.0853568986 0.0850623995 -0.0537006631 -0.00454986189 -0.115827605 0.076602146 -0.0653875545 -0.0943169966 0.0738318786 0.00882227719 -0.105202228 0.0500551313 -0.0072239181 -0.095633924 0.0686312243 -0.155843362 -0.10586188 -0.0515479743 -0.0627304092 -0.119748496 -0.055315733 -0.034655381 -0.011634198 -0.0673866794 0.0497374982 -0.0230727922 -0.117348522 -0.0596318021 0.124789402 -0.100009322 0.125387162 0.125093237 0.11934261 0.122768745 0.00161180296 0.0404978395 -0.0511606485 0.025636822 0.0928659737 -0.119610175 -0.0845185071 -0.0342509151 0.0703661814 -0.0939814001 -0.0344806798 -0.0912777558 0.00682032388 0.121527597 0.152524486 -0.013022732 0.0765541866 -0.0932440087 -0.0872863084 -0.0211912636 0.0815037265 -0.0279196408 0.0894725025 0.0345439613 0.00409509987 -0.0809944794 -0.1393179 -0.125571921 -0.00232244516 -0.149977431 -0.000756583293 -0.107811421 -0.13097401 -0.124444515 -0.016273234 -0.0980345458 -0.0679222867 0.145107448 -0.0677620098 0.11634396 -0.150404945 0.0630804896 -0.0887408033 0.0502750538 0.0362975895 -0.000291265926 -0.00586632686 -0.0945255011 -0.106234178 0.140956268 -0.0830846429 -0.0355807208 -0.0545149483 0.0505581349 -0.00131463038 0.158409923 -0.0294266306 -0.075270161 -0.0552593507 0.110549971 0.0572320521 -0.139074802 0.079300411 -0.13124457 -0.0520654507 0.0863010138 -0.0589499101 0.0994329005 -0.0107528744 0.133117393 0.135243297 -0.100229755 -0.064115867 0.117229715 -0.0909496993 -0.123401761 -0.118980557 0.0693805069 -0.0614001341 -0.103551611 0.0602181554 -0.0570658669 0.0473706871 -0.115162462 -0.00543242693 0.0720573142 -0.0638199747 0.163928419 0.15177232 -0.107249737 -0.150279045 -0.124889374 -0.0754014552 -0.117463201 0.0907788277 -0.137130409 -0.112758525 -0.0632348582 -0.085662134 0.074386403 -0.0294712894 -0.103990085 -0.0954368263 -0.116161741 0.139300272 0.109896317 0.0364004523 0.0969301388 -0.0921835527 0.0688580126 0.0143036414 0.135515511 -0.10856048 -0.128424734 0.00355436816 -0.0129383691 -0.0427853577 0.0744294003 0.120447546 0.0322268978 0.111017898 -0.148288384 -0.0120353373 -0.0698817894 0.123846047 0.0900507122 0.0200054049 -0.135218769 -0.0566676334 0.000346322719 0.138647377 0.133002952 0.0393482894 0.0430621244 0.123535052 -0.0048245755 0.0777058303 0.105987355 0.0330017395 -0.0667480379 -0.0552513823 0.119488135 -0.0444536582 -0.0245578699 0.10310775 0.135614321 -0.088050127 0.101776421 -0.106960148 -0.0866610706 0.0718072876 -0.126476645 -0.124185599 -0.0585252866 0.0773498043 0.0260625742 -0.0329737999 0.050648436 -0.0158751626 -0.11962828 0.119259298 0.074308984 0.138976827 -0.159304231 0.0172711946 0.164110437 0.100464553 0.10389293 -0.0240411293 -0.0973320231 -0.119676456 0.00236897776 -0.0644378364 0.0506922454 0.19321233 -0.000225052238 -0.0394313931 0.110896409 0.132451773 -0.0674725696 0.106891982 0.0839640722 0.00246000011 -0.128063992 -0.106095694 -0.0592876561 0.0409409404 0.0347136185 5.23093404e-05 -0.0714386553 -0.00371642876 -0.109261386 0.150267109 -0.0990683362 0.151943013 -0.0184463765 -0.0465980396 0.0613862872 0.136278436 -0.00558510423 0.100865357 0.101894312 -0.0244332775 -0.125516385 0.0207252149 0.101681627 0.145940647 0.0516951084 0.16129303 0.178174466 0.067602627 -0.0624158829 -0.0751179382 -0.0219047312 0.052100759 -0.0685305297 0.0354404449 0.145376444 0.18646054 0.178686082 -0.0324649215 0.186916694 0.0407279916 -0.130371153 -0.0193989351 0.0320602059 0.0507811755 -0.113682859 0.0327428654 0.00616077147 -0.0258325189 -0.143563882 -0.00310279964 0.185173586 -0.0143889384 -0.0377997085 0.102268487 0.193651006 -0.0199139044 0.0859818161 -0.12391866 -0.0357881412 0.117275149 -0.00614784472 0.157511353 -0.0342883319 0.0726206675 -0.140140399 -0.0890319422 -0.084323287 0.181992263 0.0513049066 0.0995599255 -0.0906688347 -0.103707798 0.189509571 0.0850646123 -0.160007775 -0.125715733 0.119538076 -0.119608335 0.0870004892 -0.0289871693 0.0428543352 0.0881076306 -0.0739037469 0.150500178 -0.0330894664 0.120570533 0.0367856883 0.0812487155 0.0561989732 0.0205095038 -0.0794103295 -0.149736494 0.0125511289 -0.117065132 -0.107071228 0.0454606973 0.0796560869 -0.101957068 0.147831231 -0.0325231701 0.102706663 -0.00391825195 0.164131463 0.113403954 -0.121139087 -0.124457628 0.00445246696 0.040741276 0.135801449 -0.00480739959 -0.078587763 0.0388003998 0.0373185351 -0.0382047556 0.123435661 0.0475043617 0.00739693642 0.0977098569 -0.0144506181 0.113023907 -0.134157673 0.115016133 0.0749712959 0.122992218 0.128705814 -0.0182231162 -0.0774768889 -0.100664325 0.0511561967 -0.0247491226 -0.0616593063 0.0696846992 -0.131600708 0.128312185 -0.0420887545 -0.0552831143 -0.132332042 0.0888988078 -0.0928973109 -0.0273143947 -0.00685594976 -0.0243961141 -0.0604439601 0.127762869 0.0312011689 0.0182463527 0.0971829295 -0.0156243443 -0.110507861 0.118185356 -0.0953080505 0.0230623633 -0.139256597 -0.0295829773 -0.00552763045 0.02986148 -0.0686590225 -0.092862606 -0.1297746 0.0351940989 -0.0635817125 -0.00291644037 -0.0894218385 -0.081991896 -0.0788865611 0.123435721 -0.07980977 0.127385929 0.0179787278 0.14100419 0.0584084392 -0.11898651 0.0776305497 -0.0203871056 -0.0342618525 -0.0426038876 0.0608503371 -0.0924751759 -0.110731475 -0.0939228088 0.0693204403 -0.0963335559 -0.136444792 -0.0746414661 -0.0768960863 -0.0328734815 0.121303841 0.0307305455 -0.0240531266 0.0461567193 -0.112294145 0.0924198776 0.0726142228 -0.0232265219 0.0855844915 -0.0904331505 0.00588195026 -0.0725407153 0.0315330774 0.0881674886 -0.0255857036 0.00970177352 -0.0484308302 0.0706667453 -0.0180258229 -0.0909893364 0.0912033021 -0.0541153103 0.118749335 -0.000519677997 -0.12067198 5.66840172e-05 -0.0196669474 -0.0159197301 -0.120875023 0.0988306701 -0.0475680456 -0.0561974943 0.0541714281 -0.138189748 -0.00213968754 0.0791497976 0.0153562725 0.102331981 -0.02512566 0.0891461223 -0.100474566 0.0814501047 -0.0774096027 -0.0491994061 -0.0873599797 0.125588104 0.194157138 0.021343857 0.153216049 0.153526738 0.0659974441 0.00557104684 0.00512425043 0.0593339801 -0.0341665149 0.1543639 -0.000350087881 -0.0057605654 -0.0569509007 0.0794611201 -0.0399132632 -0.053406354 -0.021354856 0.0188272204 0.161608189 -0.124593504 0.0364353582 0.0259574521 0.0545204356 0.123703361 0.0302464068 -0.180545017 -0.0880055279 0.0392708704 -0.0457140729 0.0489293262 -0.0629897118 0.0735282153 0.0903664082 -0.0868221596 0.0773085654 0.0447509512 -0.0666414499 0.10026215 0.0482167453 -0.0492483862 0.194690838 0.0770569816 -0.142567039 0.114170618 0.0906017646 -0.0648275763 -0.0698935315 0.0423483737 0.0252110269 0.0347312838 -0.0560424551 0.155089974 -0.0118696066 -0.154669881 0.122313514 -0.0593367517 -0.133725002 0.0684605017 -0.086332038 0.15027903 0.00808770955 0.0856792182 -0.103054002 0.144213781 0.0522497185 -0.012934139 -0.090749681 -0.111638978 -0.15779312 -0.120520085 0.00437208824 -0.0104053728 -0.0667165816 -0.126051918 -0.0548642427 -0.104130857 0.0505597442 -0.103951007 0.0535476543 0.126115173 -0.126277968 -0.0643455386 -0.0392601117 0.0434453227 0.0778148323 -0.0601691268 -0.0518789664 -0.104312316 0.0010818392 0.0459540337 0.0427468866 -0.053878624 0.139487013 0.0611597076 0.0600839928 0.11611075 -0.0964443162 0.0624526814 -0.120941721 0.132605001 0.0708762035 0.163541496 -0.00394374598 -0.166257143 -0.203483716 0.0616421737 0.116930254 -0.0280899294 0.0485812873 -0.219869539 0.00187383592 0.0102478564 0.108631112 0.063601993 0.0346559994 0.0576099493 0.129458979 0.0913215131 -0.0457242802 -0.128046185 0.0772113949 0.0461370759 -0.16218935 0.031650953 -0.0577221215 0.060324017 0.11425402 -0.0406978093 0.181004599 0.122253641 -0.0693835318 0.092224963 -0.00997300446 0.137522176 -0.100897603 -0.0125190523 0.0379933044 -0.0922655612 0.0624521673 -0.0938842148 -0.0446401648 -0.0100792432 0.153803915 0.114698537 -0.0192592535 0.0803508684 -0.0886118263 -0.0518604182 0.0633063093 -0.051035013 -0.110467285 0.0447706729 -0.00330133177 -0.0912512243 0.00899628736 0.165726572 -0.0454223789 0.101888083 -0.0568365306 -0.076063931 -0.0752097219 -0.0845429227 0.0309042297 0.0270464383 0.124918491 -0.0662075952 -0.108895019 0.139057159 0.140161321 -0.119610265 0.136644498 -0.0796718523 0.00887751393 -0.178879619 0.0901582688 0.138248637 0.0798882693 -0.0129555576 0.0835241526 0.112452 -0.0344961025 0.00462428341 -0.129168198 0.120342299 0.0168783814 0.0924949348 0.104666054 0.0520613231 -0.048391819 0.0304543953 -0.0920903161 -0.0853118896 0.0968189314 0.144676819 0.0184051823 0.129063278 0.158408046 -0.0152725829 -0.0893933401 -0.0740989223 -0.0408062041 0.000471571781 -0.126842275 0.0290431026 -0.00776752643 0.0354271829 0.137151405 0.0211606342 -0.075007841 -0.0753669515 0.046267733 -0.09437415 -0.137393638 0.113058127 -0.026870288 -0.0587519705 0.117264286 0.00866104662 0.0201778039 -0.0951031819 0.018844882 0.0590609238 -0.0709743276 0.0873017982 -0.105027102 0.0530570112 -0.133408979 0.0909369588 0.0397072695 -0.126006484 -0.0984181166 0.130100951 -0.0197343528 -0.14802596 -0.0323335156 0.0627914593 -0.115520857 -0.0452975444 -0.012140125 0.0625165701 0.12852025 0.137867913 -0.0958045647 0.0133615816 0.107896157 -0.0483738929 0.129055023 0.116109088 -0.077403754 -0.0633423328 -0.0807941109 -0.0901692063 0.135869384 0.115573078 0.0980086252 -0.0180905815 0.00574294198 0.0841204077 -0.114126891 -0.0070350226 0.12609975 -0.12341994 -0.0101188719 -0.0531954169 0.0926595107 -0.14045234 0.0385289043 -0.00951111317 -0.130858913 -0.0142143071 0.106903538 -0.108551912 -0.125400409 0.058905676 -0.117564946 -0.0387115702 -0.120323576 -0.00476152822 -0.117001377 0.0521154404 0.0897176862 -0.143047303 0.0822072178 -0.0936514139 -0.0156190991 0.0703094602 -0.109612264 0.10537225 0.10860981 0.0861182958 0.0131997541 0.0654514953 0.0550837517 0.0355030596 -0.127356902 0.0202751011 0.0479572453 0.0155448811 0.0376787409 -0.104008965 0.138826773 0.128562942 -0.0176690705 0.00616015308 0.0954742804 0.168244436 0.0313249305 -0.00562194875 -0.013728708 0.0743624717 -0.0685951263 -0.118008956 -0.0987153798 -0.0248185005 -0.140825942 0.0919594541 0.0747423917 -0.0905265957 0.129529849 -0.124772102 0.0051275813 0.0195230469 -0.146458313 -0.0511724278 -0.00252351165 -0.0710987002 0.164692074 0.0975965038 0.0976115763 -0.017378984 -0.0336411037 -0.00410315767 -0.111187756 0.0705890879 -0.0971891209 -0.170355156 -0.0228465442 -0.000340677885 -0.111434825 0.0483258702 0.0879553556 0.0596619062 0.0922146142 -0.0954236314 0.100458361 0.0672615394 -0.0278729852 0.0144202998 -0.0134725468 0.00500577223 -0.0834878609 0.0704662427 0.0280400775 -0.173883215 0.0107927518 0.145330369 -0.00825130939 0.0123181194 0.104186043 -0.00961343665 -0.0443730503 -0.12468195 -0.127869114 0.0712913722 0.0349406078 -0.0263537578 0.140321776 0.0573507696 0.0558672026 0.0422607958 0.0942431912 -0.134432197 -0.0805723518 -0.117681436 -0.0301251039 0.0112107592 0.0862491578 0.0813791007 0.15245752 -0.104849078 -0.1278539 -0.0234719608 -0.0375554711 -0.00391989155 0.0951362252 -0.124997646 0.101080559 -0.0823273435 0.114658192 0.0812243894 0.0979240239 0.0534090586 0.11992234 -0.0614060089 0.0227183215 -0.0045140041 -0.0936731175 0.164146602 0.109354012 0.0560285486 0.0972351655 0.0452851653 -0.0221107267 -0.143891752 -0.0367256775 -0.100730948 -0.0520177893 -0.0628575087 -0.0123458058 -0.144427627 -0.0617014915 -0.0792786926 0.105423264 0.118998893 0.065015249 0.103419602 0.078821741 0.11923746 -0.0886161029 0.0995480195 -0.00256725191 0.00349370553 -0.0529144071 -0.0110389693 0.137066856 -0.177615538 -0.0219782609 0.0117756883 -0.0622062907 -0.031752415 0.109670192 0.00176507875 -0.128339231 0.0967004448 -0.0175355524 -0.0179675464 0.00220880075 0.0363271013 -0.0472624972 0.0390423499 0.0431711748 -0.0524883382 0.137009606 0.032474678 0.0447325036 -0.109758742 0.0141501743 -0.0879177302 0.0324584693 0.0984169021 0.0776358694 -0.0186196659 -0.0178685524 0.136504993 -0.130911753 0.120253332 -0.00131494773 0.0500290655 -0.0261307955 -0.115568407 0.0599436909 0.0793855786 0.013578156 0.164593741 -0.0608182624 0.00377323222 0.144239753 -0.0365212336 -0.00670494884 0.113193937 -0.0333096795 -0.0992462263 -0.0314201638 0.121462323 -0.00150117278 0.152934536 -0.0595164932 0.155643508 -0.0155162774 0.113217972 0.0924211666 -0.0591561124 0.119596377 -0.148361132 0.0774345249 -0.0543995574 -0.0430051051 -0.0587892085 -0.0859734565 0.0093586091 -0.139499143 0.144523352 0.0625231117 -0.0972201005 -0.0138707748 0.105667144 0.0957431421 -0.0930925608 0.114664152 0.0465527698 -0.0664715692 0.103240147 0.116609365 -0.192610845 -0.152009219 -0.0707181469 -0.123133332 -0.0165981147 -0.119874239 -0.123395704 -0.134802729 -0.0183263794 -0.00162356615 0.0698149651 -0.0478172004 -0.0292479843 0.0124004614 0.0228632949 0.125499591 -0.128627047 -0.0394836068 -0.0904139578 0.0717693791 -0.0241678786 -0.0282474756 0.066885747 0.172793537 -0.0325833708 0.134693041 -0.0837737098 0.028767975 0.149519458 0.10379082 0.134075478 -0.00300905108 -0.11726708 0.00835976377 0.0115354434 -0.104070403 0.0869700015 0.0493195616 0.173674643 0.136860088 0.133943602 0.0349466503 0.0715380386 0.0455492027 -0.0717399567 0.0613892823 -0.071349673 0.103834003 -0.0662872195 0.110759154 -0.0086634336 0.11282818 -0.0787081271 0.0525955185 -0.115244508 0.017306909 -0.148093 0.11725767 -0.0918413401 -0.0415653959 0.0675327182 -0.142755657 0.122713141 -0.0754952356 0.119042411 0.120726988 0.0600856133 0.0850800574 0.042756021 -0.0042983531 -0.0966215879 -0.109561965 -0.121179365 -0.121256597 -0.0916649252 -0.0139565729 0.0183815174 0.010678432 -0.0070024007 -0.10860841 0.123465493 -0.0865222588 0.0993401259 0.0353338942 -0.122070476 -0.00103206933 -0.147107095 -0.0779001042 -0.0985540375 -0.0854975283 0.0685408339 -0.065476723 -0.0296396669 0.0578391589 0.0765029639 -0.0989598259 -0.0449470505 0.0685051531 0.0537158102 0.0139079243 0.0583296567 -0.0594620258 0.0189818889 -0.0988531634 -0.00361028314 -0.0409312546 -0.0480199158 -0.0213640556 -0.126316875 -0.118182555 -0.134643987 0.0825530589 -0.0812493861 -0.00928412378 0.0827149451 0.014478147 -0.124131575 -0.13848491 0.113321409 -0.0203420967 0.132658973 -0.0168279931 0.0254632235 -0.0577695444 -0.125243694 0.0185761452 0.125729159 -0.0470989868 0.137575284 0.00578674674 0.125307932 0.13194342 -0.110660031 -0.108901128 0.0635878146 0.139960542 -0.0849670395 0.0299605131 -0.0711956099 0.0486410856 -0.0859787986 -0.125518829 -0.0788395777 0.113387808 0.172473475 0.125997916 -0.03521844 0.00814832374 -0.0874923393 0.0611119755 -0.103455245 0.0358751714 0.0404794477 0.116629399 0.107773907 0.0883051604 0.155530751 0.0984854996 -0.0652066395 0.0862129629 0.0566241778 0.0940288976 -0.0396610685 -0.054708723 -0.0403634794 0.0204263702 -0.0830316693 0.175091997 0.0323943421 -0.0326925032 0.187265396 -0.0557819456 -0.149056599 0.161892369 -0.0292865653 -0.106854089 0.113595374 -0.0478481576 0.0871280357 -0.0546426699 0.109687231 -0.0883881673 0.152132541 0.0782102272 -0.0743416622 -0.0343025103 0.151285902 0.0897574127 0.0613243282 -0.0363124833 -0.0416660458 0.0365589708 0.00544850901 0.0712229908 -0.0174090713 -0.00367248501 0.100005753 -0.0259860251 0.0931316465 0.0641765073 -0.0520533472 -0.000773876556 0.113493264 -0.0614270456 -0.126095414 0.11269249 0.00773805752 0.0544681847 -0.0156176239 0.0190839916 0.00293012918 0.0550577864 0.0748387203 -0.0842898712 -0.0772737563 -0.157246128 0.00333786267 0.0245691296 -0.0825911462 0.0128215477 -0.0858282223 -0.132421732 -0.0927961841 0.144973248 0.0598813556 -0.0359893106 0.000982378377 0.100832321 -0.106284365 0.0759487748 0.124556273 0.105182365 0.00680022268 0.0535307154 -0.0955361351 0.0177737772 0.0095509449 -0.00627064146 0.033846356 0.168117985 0.0789029747 0.152832642 0.175960913 -0.00220050896 -0.0862830505 0.106605045 -0.0212400369 -0.0751578733 -0.0164428316 0.0700538829 -0.0992731154 0.129148081 -0.0179386213 0.0452752709 -0.0637316629 0.0592104197 -0.128685504 -0.0348353833 -0.087284103 -0.0663602129 -0.0031849267 -0.000142063553 -0.0840348825 0.0924766883 0.0673238337 0.167500377 -0.0353617668 -0.0333009921 -0.112182476 -0.04946943 0.134662643 0.139826789 0.156980231 0.0751472116 0.0695004016 -0.0762207955 0.156616062 -0.0323073752 -0.074322626 -0.0840249732 -0.0568689369 0.0383799225 0.12136513 0.0337613001 0.150748312 -0.0912267268 -0.0950863957 -0.0684669167 0.0555381961 0.00922425464 0.0324769616 0.154439181 0.00271727936 0.0754308924 -0.137119815 -0.0415384322 0.107144803 -0.125383273 0.053086549 -0.0475495011 -0.00223423541 -0.128673628 -0.0492692962 -0.0113832206 -0.0116038918 0.104703367 0.0881138444 0.137240604 -0.005506441 0.00293928385 0.0956130475 -0.0784134567 -0.021586366 -0.0949726552 0.109912977 0.0546887219 0.0080575645 -0.0473530963 0.101009175 -0.0167142078 0.0431778133 -0.0919825733 0.0523650348 -0.136390731 -0.0124782622 -0.0131750405 0.0803216249 -0.135611996 -0.139048174 0.121725962 0.0170050114 -0.0948430598 0.126605704 -0.12657319 0.0280110091 0.0484481603 -0.101917908 -0.131059453 -0.00414025784 -0.000507161021 -0.0291253105 -0.0539416969 -0.132729664 0.118548885 -0.119953021 -0.106853649 -0.0724168047 0.0265188962 -0.0701930001 -0.0523263291 0.0513340086 0.0227334052 0.134236738 0.00679840147 -0.0369084999 0.101770133 -0.0643866509 -0.0391958281 -0.11996039 0.0662431717 -0.0149399638 0.113427982 0.00900928676 -0.115386441 -0.0923655182 -0.125217244 -0.105257966 -0.127515703 -0.115915604 -0.117300279 -0.0824699104 -0.00401163101 -0.0243823603 -0.071768783 -0.0823556334 -0.116988376 -0.0874817073 -0.0767723396 0.0525470376 0.0697348416 0.0105717331 0.116433874 0.119623169 -0.0428111032 0.0553773344 -0.107416034 0.131372139 -0.0292251855 -0.064143002 -0.129900947 0.0492804348 -0.0171842128 -0.13053751 0.0456082523 -tensor_6bias 50 --0.130149469 -0.0166715905 0.0930550545 -0.00245699566 -0.106651746 0.0490312241 -0.022929607 0.0280555151 -0.149067715 0.102508798 0.00938428845 0.150981218 -0.114400074 -0.0645027235 0.016750779 -0.0841871202 -0.140838712 0.0354661271 -0.127782494 -0.107863024 0.0691528246 0.0463019311 0.0961098671 0.0680775866 -0.118705533 -0.0768498629 0.17632094 0.119572431 -0.0184698328 0.0619278774 0.0973391309 0.0654409006 0.0840726122 -0.0982169956 -0.0271483194 0.119829573 -0.0558238514 0.0868603587 0.109693505 -0.126328036 0.169493452 -0.153565153 -0.0748259053 0.0136530614 0.00735191396 0.121958517 0.00247201324 -0.0304538812 -0.0688641742 0.0533529967 -tensor_18weight 500 -0.116833255 -0.0357756764 -0.131794453 -0.0954782292 -0.0199575797 -0.0554031618 0.0123106642 -0.0748193115 -0.138343185 -0.0409799181 0.00820469763 -0.000622143503 0.103817098 0.133209154 0.108685024 -0.0963258296 0.133699819 -0.0743798465 0.105768584 0.101406492 -0.0850842893 0.088313885 -0.0203158874 0.0809838101 -0.00829087198 -0.00621365244 0.12420509 -0.0834524781 0.013258785 0.000458776922 -0.0094107436 0.121913455 -0.112981685 -0.0701991916 -0.0657723844 -0.0241640378 -0.111835979 0.0305915046 0.0958627611 -0.123314679 -0.0531556047 0.0353454947 0.122457325 -0.188562363 -0.0805713162 -0.0883217677 0.137407482 -0.0494341888 0.08294186 0.02592903 -0.102925614 0.112401806 -0.0666541457 -0.0743903071 -0.124930732 0.00989535823 0.0654063374 0.0936208814 0.00587140396 -0.133220345 -0.0864736214 0.129711837 0.00181314978 -0.11009489 -0.142534941 0.112804607 0.0828809589 -0.0675114542 -0.0837594494 0.080936946 -0.0578315705 0.000171717635 -0.115505785 -0.00781203434 -0.044651553 0.0236158818 -0.0261275116 0.0364638008 -0.0263226833 -0.0818104967 -0.0312857106 0.0161777474 0.0981943533 -0.142439932 0.130368665 -0.148819655 -0.0904635265 0.142308936 -0.0558655635 0.119650826 -0.123948567 0.071270369 -0.0475144461 0.0499968566 0.0238669682 0.0490803383 0.102086172 0.0440850668 -0.0912592411 -0.124338679 0.0205278974 0.0803509951 -0.127337903 0.0695622861 -0.0565674454 -0.0611764155 0.0246511605 0.147374704 0.0300100464 0.031006122 0.0128832478 -0.134186521 -0.0788531825 0.0231011659 -0.077104196 -0.0899467021 0.083257556 -0.016190676 -0.15599066 0.0265589394 0.0970405489 -0.101687469 -0.119606331 0.101642758 0.0926929563 0.0224016327 -0.118740149 -0.0145147676 -0.121801361 0.0961833745 -0.0375055596 -0.0604088642 -0.0904211402 0.0308890697 0.0637984425 0.0605207421 0.0125890784 0.101664178 0.0726759285 0.0591030382 0.0281341467 0.0179437492 -0.0888262913 0.0439237058 0.0959485695 0.0477892607 0.138858929 -0.0815726668 0.0842805654 -0.0488859788 -0.127596661 0.0637440085 0.0945658982 0.0379101187 -0.133902624 -0.0156785361 0.153879091 -0.0837965533 0.112203002 -0.109656185 -0.121323109 -0.0539827608 0.157840356 -0.140984669 0.103252746 0.0117826462 0.0724756718 0.1193185 -0.0168017652 0.105931647 -0.15716891 -0.0412402935 -0.182400733 0.175201252 0.0334252864 0.190847382 -0.0840521902 -0.102074817 0.0166378226 -0.070603177 -0.0926473141 0.0471240357 -0.0813179836 -0.0973169506 0.049886927 0.107353233 0.0245145429 -0.122061022 0.0877110511 0.0779518932 0.181554541 0.00650133053 0.148282856 -0.167027533 0.0817222595 0.166063353 -0.071306996 0.0401937515 -0.0894560814 0.0982646197 -0.0373243652 0.0289797336 0.0392166823 -0.108381942 0.119098619 -0.0920399055 -0.0729553699 -0.124035373 -0.0866058767 0.132396668 -0.0131686293 -0.0622706711 -0.115531176 -0.129241109 0.0608382747 0.0826291889 -0.0870855898 -0.153687358 -0.00150228257 0.114700183 0.093991451 0.0560563877 -0.0242470428 -0.03554409 -0.0501358062 0.0884645432 -0.0462445691 -0.160832793 -0.0499034822 -0.0424251109 0.15960142 0.00967518892 0.013454861 0.0476650223 -0.0562032312 0.00298618712 0.113581337 0.0738084391 -0.137588665 -0.0807389989 -0.0702914745 0.0433799401 0.130559713 -0.0844176263 0.00879538152 0.0190126356 0.0929833725 -0.0415338278 -0.0416321158 0.0581194386 -0.106194869 0.0854856074 0.105097309 0.0867655277 -0.110841736 0.13861914 0.00394579815 0.0424983464 -0.0553477593 0.0576893315 -0.0487310477 0.00869362801 -0.0946266651 0.0686361194 0.0094735641 -0.0982639343 0.0865717679 -0.0489508957 -0.0480820388 0.0424392 -0.0348532163 -0.145720199 -0.0116074253 -0.0465409979 0.0409410596 0.137870952 -0.141640723 0.133919835 0.0830813125 -0.0514443479 -0.144528806 -0.0606587119 0.0772298053 -0.000756907742 0.0295330584 -0.00934765488 0.0772825181 -0.120189674 -0.0941238254 0.110182583 0.020738909 -0.110578194 -0.170464888 0.135421559 0.0186651032 0.157670006 0.106957033 0.113828443 0.032888636 -0.085790351 -0.102287576 0.10205999 0.11301367 -0.0841406286 0.0869232267 -0.0806331262 -0.0432705954 0.0882454589 -0.127744198 0.0729970783 0.183013499 0.0513928235 -0.160567686 -0.0651886687 0.0733133778 -0.140486658 -0.119877644 -0.0233747195 0.0682742521 -0.0181181505 0.0523737594 -0.114034481 0.178959042 -0.108594783 0.0531802028 0.00544614438 0.122413933 -0.107881032 0.00437956769 -0.0753047615 -0.0751520917 -0.0669195428 0.140085652 0.107123025 0.0215605646 -0.0108890682 0.126112461 -0.0248530898 0.0944449008 -0.11579188 0.0103179337 -0.147988439 0.0894878879 0.155256197 0.0593105108 0.0212335344 0.108353369 -0.0329438969 0.0173103362 0.113536589 -0.0336008444 -0.0386447273 0.0362053365 0.0621379763 -0.0990284234 0.00793749839 -0.0617283881 -0.0743452683 0.179765821 0.114959568 0.136922091 -0.1003832 -0.0692859069 -0.139016584 -0.13847138 -0.0461068004 -0.0357935503 -0.175952822 -0.0971040502 0.0884984359 0.00901553407 0.173032984 -0.0787108466 0.0656532124 -0.0365875959 -0.0772555619 -0.101957574 0.177314684 -0.184264794 0.0541368276 -0.173181415 -0.0607216991 0.0584572963 -0.0959036872 -0.0192932636 -0.140759885 -0.0871745721 -0.0414703935 -0.128599197 -0.134148136 -0.0330905467 0.086126022 0.0754621923 -0.0512827821 0.0647300407 -0.0423469543 0.103672571 -0.10058222 -0.0269276202 -0.0845367238 0.148252413 -0.127518728 0.120773628 0.0699376613 0.085009709 -0.0772422925 -0.00348520023 -0.0487586632 0.0242007188 0.0718890578 0.0988076255 0.0587318242 0.0960896015 -0.0790796131 0.0568241999 0.0869796574 0.0882544219 -0.0609133728 -0.193863526 0.117342524 -0.0339369737 0.115510337 0.0176041406 -0.134604976 -0.0149109662 -0.0460692905 -0.0518316031 0.154208392 0.116131343 0.000284732843 -0.124516778 0.00545642432 -0.1934973 -0.0553306863 -0.114772283 0.0383958407 0.065391317 0.0921707079 -0.112537354 0.138822451 -0.112784393 0.163916111 0.141898572 0.144761667 0.043000266 -0.0156551208 -0.13070862 0.0155345816 0.0829150677 -0.15498811 0.0502647795 0.106921747 -0.0415367335 -0.101776689 -0.0270393789 -0.150163233 -0.00523975072 0.12342082 0.030272549 -0.0634030774 0.110089242 0.078154169 0.141164288 -0.0665735304 -0.133532166 0.0205077082 -0.0799736828 0.00293931179 -0.0775768757 -0.0609018579 0.104522519 -0.0473734476 -0.000608845323 -0.139011964 -0.12750718 -0.113618098 0.0852759406 0.0522349291 -0.011812062 0.103033014 0.116778359 -0.0851101875 0.0191278923 -0.138369411 -0.0144041777 -0.033769384 0.0952177495 -0.184691101 0.00995114446 0.0508734547 -0.161694378 0.103683837 0.106144048 -0.0914210454 -0.10774231 -0.0468717627 0.0950566381 0.0999391824 -0.0267179832 0.114936009 -tensor_0weight 5000 --0.0508145355 0.0380447619 0.063999176 0.0305916369 0.0178726781 -0.0604492612 -0.0922251716 -0.0409420505 -0.0806181803 0.0253173634 0.0200065672 0.025728466 -0.0917467773 0.103931934 -0.0364619642 0.0943374634 -0.00828016549 -0.00586269284 0.0867723376 -0.0814111456 0.031561438 0.0127995471 -0.0175799523 -0.0762633756 0.0174684227 0.0652227029 -0.0757252946 -0.0175922979 -0.0378516056 -0.0103352945 0.0671745986 -0.00129433826 -0.0402865373 -0.0769478306 0.136584729 -0.00320164161 0.0293592662 -0.131899893 -0.00832800474 -0.0754740536 0.0523712561 0.00832175463 0.0861478224 -0.0513150692 0.0660690591 0.0819229931 -0.061753273 -0.0584596395 0.00335742347 0.0352997482 -0.0355550982 -0.0571611226 0.0267518349 0.00366023136 0.0501797497 -0.0911384001 -0.0553132854 -0.0707477331 -0.00369775807 0.0324063897 0.0952493548 -0.0701338053 0.0869731754 -0.0122592403 0.0579397976 0.0811071023 0.0882389173 0.00392138492 -0.0282733813 -0.07796707 -0.0237656292 -0.0976018459 -0.0302748028 0.0959793851 -0.0774898157 0.0740917549 0.0638118461 0.078004308 0.0606729016 0.00807148404 0.122843958 -0.0412954316 -0.00570290349 0.048803661 -0.0925534815 0.0642913654 0.0318598822 0.0163798314 -0.128930375 0.10925965 0.0126452744 0.0192803536 0.0565545857 0.0464702807 0.0887314975 -0.0057315547 -0.0403685123 -0.0338817462 -0.048792094 -0.0464581065 -0.0035396677 0.00428326242 -0.00884059165 -0.0119973514 -0.000725717517 -0.00789349712 0.118645795 -0.0580181517 0.0644906759 0.00912526064 0.0169304255 0.124032162 -0.0209737495 -0.0644200072 -0.00610294472 0.0443046205 -0.111826301 0.0924093947 -0.0439966656 -0.0174338557 0.0350687169 0.00473201321 0.0256146453 -0.0102232145 -0.0740443543 -0.0562746376 -0.014960424 0.0814036652 -0.116801761 0.100059807 -0.103689127 -0.0274910927 0.0271945633 0.0108639002 0.10785304 0.119615123 -0.0933286771 -0.104783587 0.0308681801 -0.0486758314 -0.109117366 0.0569621176 -0.0481622331 0.00365207065 -0.0219343584 -0.0347234085 -0.0862182751 -0.0892119408 -0.0102323415 0.0456935875 -0.0520036221 0.0333958827 0.0777817443 -0.0517655611 -0.0659507513 -0.0258211251 0.0767518133 -0.0409224853 0.0908992663 -0.02203927 -0.0486994721 -0.0106065404 0.0736719891 -0.0421070047 0.00211916398 -0.0954323933 -0.0184283517 0.0945980549 0.0464413576 0.0754498392 -0.037175186 -0.00513185887 -0.0806778446 0.0640999004 0.0459177829 -0.0637460798 0.050385993 0.0309800953 -0.0565693192 0.123325884 -0.0541292951 -0.103475936 0.131021678 0.0500673242 -0.0464580432 -0.0415508747 0.0403000787 0.0370427035 -0.0539194234 0.115986116 -0.0146925198 0.0295080431 -0.0609974898 -0.0897742435 0.0866930038 -0.0403378308 0.0923621878 0.0294523649 0.0361655056 0.0205234103 0.120485581 0.0700325444 -0.0497162156 0.043903362 -0.123235166 -0.00563619565 0.0633756816 0.0214825068 0.0333110169 0.0104085831 0.00706916722 0.0849041864 0.110511042 -0.0315861739 0.0258784778 0.0600173473 0.0242875703 -0.0698327497 -0.0123227434 0.014183823 0.0316864885 -0.0905837119 -0.166954413 0.0110382149 -0.0523379371 0.113217518 -0.0749479383 -0.0839105621 0.06123152 -0.123284116 0.00467087328 -0.0400639065 -0.139171645 -0.0208464172 0.0824865252 0.0881877691 -0.0531907566 -0.0462760702 -0.110711597 0.0575970635 0.0207520071 -0.0592331663 -0.0876662433 0.0696238056 0.0587312393 -0.146805972 0.0465192385 -0.0876265243 0.0274952278 -0.0920811594 0.0425466485 0.138803825 -0.11095649 0.035078045 0.0631543472 0.0814108402 -0.0799154192 0.0390180871 0.076176472 0.0414959937 -0.0411920138 -0.013391098 0.00715481211 0.0667355806 0.0209038183 -0.0535365157 -0.000487437297 0.0508622043 -0.0740626454 -0.0971553922 -0.00739993062 -0.11939621 0.0771144629 -0.0776564106 0.00858938135 -0.103641413 0.0731763914 0.00908944476 0.0822138935 -0.0353183523 0.0420758463 0.00362044154 0.107373729 0.0974787101 -0.101556815 0.0841847733 0.0912442133 0.125659168 0.0618594773 -0.0642373115 0.0193936136 0.101438187 0.0530573241 0.0676667318 -0.00218354817 0.0432167687 -0.0871621072 -0.0426511392 0.0288807489 0.0790897906 0.0490392447 -0.0205203649 -0.0993364006 0.00468417443 -0.0547306687 -0.0277090929 0.00697927317 -0.0244437791 -0.0157332867 0.106168769 -0.0359168798 0.114783011 0.152111039 -0.0253520384 -0.0015796381 0.03375398 -0.104952097 0.0092763612 0.101215295 -0.00308345142 -0.0368209258 -0.0473044775 -0.00817228947 -0.109114319 0.105732635 0.0244474001 -0.0146100083 0.0529635809 -0.00840851665 -0.0632252246 -0.0520593971 -0.00865435693 0.0344991274 -5.80968299e-06 -0.133371904 -0.151106805 0.0796020627 -0.00727936905 0.0341606252 -0.0332682915 -0.121840335 -0.152285203 -0.0688880011 0.0230131447 0.000283442176 0.0609894954 -0.004379577 0.0477737971 0.044799611 -0.132041544 -0.0921159685 0.0794112161 0.0724173859 0.0694021881 -0.0325237289 -0.0596630126 -0.128212959 0.0867897272 0.0183652658 0.067165792 -0.0221667513 -0.0792030767 0.00673970953 0.0961405337 0.11915601 0.0162419658 -0.0958381593 -0.0221719481 0.066273272 0.0103854984 0.0839003772 -0.0880922079 0.0691054389 -0.0436538383 -0.0678017363 -0.0862348899 -0.0580505431 0.0340274572 -0.0189464837 -0.0844241232 0.077873528 0.07532157 0.0911468565 0.130459666 0.0642754659 0.103514485 -0.0523621738 0.0538226627 -0.00427193614 -0.0198291782 0.0464040674 -0.0794681758 -0.0358173288 -0.0710784718 0.108343065 -0.0409613326 0.0320960544 0.053875234 0.00935616158 0.0279227011 -0.0595730767 -0.0895934626 -0.054435689 0.0687097013 -0.0623276383 -0.0781896859 0.0710855275 -0.0379823111 0.0614629425 0.107129268 -0.0969881415 -0.116216652 0.104508013 -0.0730313659 -0.0942338929 -0.124592021 -0.0121723814 0.0757561401 0.00725453952 0.027494695 -0.0790883899 -0.0104121519 -0.0122909518 0.0885993391 0.00961995777 -0.0863305554 0.0516466871 -0.00846583862 -0.137650937 0.053744074 0.0191885531 0.099622637 0.119871758 -0.0234789476 -0.0225552637 -0.0628033355 -0.061706692 0.00870011281 0.0219527185 -0.113005184 0.0864791349 -0.0586110726 -0.0858683884 0.0617091358 -0.0387163647 0.0250992496 0.0188102666 -0.0987309664 0.0387692712 -0.0278170835 -0.0702976808 -0.036741849 0.0252645276 0.0743944049 0.0373597182 -0.0650147647 0.0886150151 -0.0378745385 -0.0721595287 -0.100263052 -0.024431048 -0.00138329086 -0.0156793948 -0.108034611 0.00560034066 -0.10105747 0.10377124 0.108116172 0.106484957 -0.00357731106 -0.103540003 -0.162499279 -0.0292240772 -0.13454926 -0.0578239672 0.0473558456 -0.0877546594 0.0137864761 -0.0327536836 0.0507721342 0.0252886489 -0.092969656 -0.046330668 0.0129592251 0.0318424702 -0.0836347714 -0.00133580307 0.0577662215 -0.130686596 0.0925165117 -0.0197680425 -0.0594279207 0.081254214 -0.025833251 0.106694445 0.103731573 0.0476561114 -0.0746863931 0.0867198333 0.0718293041 -0.0795527846 0.0907836407 -0.0875569582 -0.0436345451 0.0336315818 -0.101902887 -0.112922281 0.0268265437 -0.0239662174 0.108922079 0.041044455 -0.00692772307 0.0482088998 -0.0167634431 -0.0713200569 -0.131396279 -0.0818632841 -0.0646765679 -0.00467614038 -0.068184495 0.0581986308 -0.12290591 0.0856338814 0.0330237187 -0.113662779 0.0768672228 -0.0640005991 -0.102779485 -0.0699071512 0.109070554 0.0373121388 0.00894282851 0.0210740287 0.102456108 0.00209105411 0.0643166751 -0.0748509914 0.0103702946 -0.00475171115 -0.124941736 0.0627204254 -0.110363327 -0.0701798648 -0.00204091449 -0.0581695102 0.0710774362 0.0637491271 -0.112383977 -0.0604935288 -0.0444468074 -0.0884831399 0.0787647441 -0.057589367 -0.0092884656 -0.0068281414 -6.82513783e-05 -0.00189695833 0.0291572809 0.0887888893 -0.10821224 -0.0333320834 -0.0257991888 0.0457047522 0.0474029109 -0.0698928088 -0.12633343 -0.0517579019 0.0700997636 -0.0429069959 0.0199789684 -0.0108658681 0.00652803527 -0.00151343702 -0.0620038249 0.0806239918 -0.0406728946 -0.0541682765 -0.0738097504 0.106633566 0.118564427 -0.0846382231 -0.0406942107 -0.0214116126 0.021105893 0.00434125355 -0.0575985499 -0.0204750691 -0.0223995987 -0.108478487 -0.0789667591 0.00276051858 -0.0364289954 0.0240258034 -0.00772039779 0.0677978322 0.0111008026 -0.0301737618 0.129933059 -0.0297325328 -0.121423602 -0.00256420486 -0.0767344758 -0.0345042236 0.0232742243 0.0518034101 0.0377323851 -0.0785427988 0.0944864005 0.0168189276 -0.0450433195 -0.0179200061 -0.0332794897 -0.115497865 -0.079460144 0.0748219565 -0.0902453661 0.0155278947 -0.0175510496 0.095810093 0.081910409 0.0412754081 -0.124072641 -0.0311682243 0.0492392965 -0.0202937964 0.0275281016 0.0286301002 -0.0205335319 -0.06336198 -0.00144966797 -0.0174041037 -0.116765253 -0.0784229636 0.0826164782 0.0420248657 -0.0860279575 0.0445345417 0.0170288365 -0.0538485646 0.0218434893 -0.126645058 -0.081135571 0.100772187 0.074350059 0.0520832427 0.0342816785 -0.0332369693 -0.00494507421 0.0594232231 0.0195060819 -0.0653662607 -0.0566894747 -0.049552016 0.0946275666 0.0667197555 -0.0115687326 0.0609822571 -0.0733285174 -0.00757924188 0.128872409 0.131651253 0.0883550048 0.00453105802 -0.0755265802 -0.0273298975 -0.075551331 0.0423105136 0.103586905 -0.103956595 -0.0853345916 -0.0501238741 0.0979430974 0.0415611826 0.0830030888 0.026601227 0.0730280057 -0.0635615513 0.0683744699 -0.0418914109 -0.0580942109 -0.0672050193 0.000723240606 0.0774079859 -0.0247790851 0.0417027809 0.0464081317 0.0150757832 0.0990941375 -0.0262722876 0.0383368991 -0.0866433606 0.053820353 0.000491182785 0.0509168692 0.0996452123 -0.109139279 -0.0142310057 -0.0809439868 0.0759590417 -0.0160349142 0.0490121357 -0.096037291 0.0512820296 0.0240419395 -0.0778301433 -0.00461494876 0.0313165486 -0.0526363626 0.0636812896 0.0332127437 -0.0893361941 0.107702576 0.0753764287 -0.134968281 0.154497638 -0.0106210969 0.0807469338 0.0397658274 -0.0412878655 0.0725319758 -0.075096637 0.0352239423 -0.156253964 -0.105903931 0.00186598103 0.0204177406 0.0137510747 -0.0850754306 -0.0996614769 -0.137975514 -0.0964332893 -0.0970748141 0.0658250079 -0.0284603387 -0.0586091056 0.0547327399 -0.0909201056 0.0791378096 -0.135371462 0.0970040932 -0.0691698939 -0.0478290841 -0.091066964 -0.10993892 -0.0587934963 -0.149786964 -0.0152382096 0.104548037 -0.0258558169 -0.144351274 -0.0335272104 0.0226420816 -0.0596394576 -0.0499719083 -0.0401447415 -0.137909144 -0.0354104489 0.0517158546 -0.00912801269 0.100223139 0.0372407772 -0.0557585917 -0.00336286239 0.0683526322 -0.040298298 -0.00263097975 -0.0374882258 0.0522439405 0.0507735275 0.105002061 0.0763192996 -0.0607291535 0.0252055712 -0.00846379343 -0.0764852315 -0.0580886006 0.0643623322 0.0687340647 -0.0175391026 0.0497902818 -0.0412122346 -0.0626358017 -0.0582311451 0.126354679 -0.118473426 0.151346073 0.0104045104 0.0904658511 0.0403522402 0.00989431608 -0.104035281 0.0668206066 -0.0350457989 0.0594084747 -0.0234730225 0.0567279682 0.0705103427 0.0155637255 0.00617892295 0.0591375902 0.10290321 0.0125923716 0.0783741623 0.00137256691 -0.00307283737 0.0503848121 0.10381522 0.118850879 0.128660917 -0.053519316 0.0977203473 -0.0134721575 -0.0389264151 -0.00525255827 0.0452782214 -0.0551993214 -0.10694126 0.027695125 0.0864779726 0.0454558991 -0.0506804623 -0.0287189651 -0.0546144284 0.120786496 -0.0527668484 -0.0774859414 0.102176331 0.0673900619 -0.0448943712 -0.0719371215 -0.0406077392 0.0517419763 -0.133232012 -0.0570902154 -0.09013246 -0.0748804808 0.00943455193 0.0882416517 0.000705939427 0.0691983029 -0.0305666197 0.0502307639 0.0774589181 0.0290872231 -0.103126198 0.0543247163 0.0888695046 -0.0432999581 -0.0238669831 -0.0651162937 0.0898748636 -0.0334561318 -0.0923917145 0.00535089429 0.0831253678 -0.032534346 -0.103109762 0.0489915684 -0.0154016791 -0.0483072698 0.0992657989 -0.0456443615 0.0638154149 -0.00041857746 0.0412595235 -0.0256175622 -0.0011343424 0.0302553996 -0.0492172204 0.0441855341 0.0358452648 -0.125684917 0.0641204044 -0.101313218 0.0406814888 0.0231520366 0.00894289184 -0.0159130525 -0.0403623842 -0.0126857739 -0.0646654069 -0.0864315107 0.0479207449 0.0227875356 0.0891341716 0.0144964764 -0.128592268 -0.0647967756 -0.0491824746 -0.122899771 0.0843127072 -0.0399818346 -0.0702486336 0.0469990969 0.0585947372 0.00991726387 -0.0545531549 0.121398546 0.0267390348 -0.0134512298 0.0329682231 -0.0672333017 -0.0224784035 -0.00435023708 -0.0271258652 0.0712630972 -0.0160659477 0.0995363668 -0.0256949402 0.103435107 0.109910071 0.00650324021 -0.0404900536 -0.0908767134 0.0118982857 0.00520248339 0.0329482853 0.0144852586 -0.0797013938 -0.0785156786 -0.114622436 -0.0149816191 -0.0634922013 -0.0747183189 0.0377447829 0.00633793836 0.0832202658 -0.0870476142 -0.0112469308 0.051385209 0.00177763787 0.0805689245 0.0667984635 0.119763464 -0.0189604852 -0.0689202473 -0.00829955377 0.0841114894 -0.0166632887 -0.101568498 0.0870780572 0.0787321255 -0.101076506 -0.0728867874 -0.0815497339 0.059538722 0.0476107113 -0.0611895993 -0.055862911 -0.00502554746 0.0184646137 -0.0100589432 -0.141160175 0.0608552545 0.0207750183 -0.0828769058 -0.0782217011 -0.0249421597 0.0649304986 -0.0759224221 0.0226793531 0.0345480256 0.101637982 -0.0291147213 -0.020399509 -0.0961149037 0.0607593879 -0.0901033953 -0.00980376825 0.0093408674 0.0903950557 -0.0326510593 -0.0616331063 -0.0332476608 -0.0641225353 -0.0496507026 -0.058669664 0.117607869 -0.0409576073 -0.00356686814 -0.105142437 0.0766613707 0.0395114012 0.0188095663 0.0634850636 -0.0737257972 -0.0844153538 0.118897498 -0.000630921393 0.080352664 0.00662139896 -0.0893025771 -0.0714181289 0.081619963 0.0111359404 -0.0571513996 0.0548180155 -0.0636223927 0.125711203 0.0851431414 0.130260974 0.0836031362 0.061986275 -0.028846303 -0.0287329499 0.0502533987 0.115427487 -0.0506522879 0.127979293 0.119268231 0.0850080177 0.0331578441 0.0409094281 0.0090124933 -0.0136618558 0.0948067382 -0.0672471449 0.0505564883 0.032799989 0.0633241385 -0.0469509736 0.0506216548 -0.0372176617 0.0645158365 0.149505928 0.0132820019 0.0121845976 0.0295179803 0.0295598768 -0.130403206 0.0423673615 0.0379888043 -0.0185889266 0.0913859308 0.0504159145 -0.0536566004 -0.064247027 0.0357843451 -0.00891068671 0.0950773582 -0.116974562 -0.0360760242 -0.121625684 0.103534453 -0.110155627 -0.109326176 -0.0306907389 -0.124297231 0.0215684474 -0.107538059 0.115768477 -0.0631534979 -0.107648998 0.00836135633 -0.0481221005 -0.0226832405 0.00744933914 0.0239705071 0.00856848713 -0.0518919192 -0.0672201142 -0.0423557498 0.0152753228 0.0322034582 -0.0436891429 -0.0355248898 -0.0221560691 -0.0227099117 -0.0872905031 -0.074751161 -0.0961238891 0.0214987211 -0.0765815899 -0.101568431 0.0197522994 0.0158146303 0.0358287059 -0.0310186576 0.054503344 -0.0471081249 0.00175969047 0.0102003291 0.0548275784 0.0608831719 0.00927542709 -0.00995576289 -0.00546212913 0.124199063 -0.0787529647 -0.107979171 0.0664112717 0.00175410474 0.0996535346 0.053399168 0.0650362223 0.0413330421 0.059269011 0.00307723135 0.0968322679 0.0304244794 0.0847681016 -0.0587718002 -0.0920936614 0.0963051766 0.0730310529 0.075302057 -0.101675689 0.00834253523 0.0334894434 8.25827228e-05 -0.0221394673 0.0630398169 0.0403992832 -0.0181807 0.0733471513 -0.0055750059 0.0181060694 0.1169772 -0.00306291087 -0.0245710369 0.0374747738 0.0355481431 0.127457261 0.06669911 0.033534728 -0.0313876085 0.058364775 -0.0700249672 -0.0348450616 -0.0787659734 -0.111428857 -0.0750032812 0.1067295 0.0295113139 0.022092022 0.0761882439 -0.0214715526 -0.0454636477 0.0440265127 -0.0405539833 0.0178954173 -0.0918944478 0.0349099524 0.0980099589 -0.0593721792 -0.0717693344 0.0749724507 -0.10822311 -0.10527648 -0.0456449613 0.00945392437 -0.113418877 -0.0248292517 -0.151771814 -0.0317451209 0.00303221145 0.0136932479 0.0757390037 -0.0645068213 0.110142581 0.0331983566 -0.0726855695 0.0410212204 -0.0837602541 0.00736812409 -0.0960764661 0.0659725666 -0.0506423712 0.108358607 0.0074415463 -0.0579753295 0.0222589932 0.0219781511 -0.0769435242 -0.00365759665 0.0658315271 -0.0195193309 0.0876873434 0.0829789042 0.030799007 0.0445269085 -0.087823227 0.0490200967 0.0495685935 -0.0617967919 -0.00453193625 0.103787176 -0.0256911721 -0.0746461451 0.149433792 -0.00853996538 0.0359981731 -0.0535804741 0.10725081 0.0878978521 0.0258817542 -0.0147519195 -0.0875247493 0.0177521463 0.0229451209 0.0438379906 -0.0674143359 0.0837276876 0.0518606342 0.0602514297 -0.0148247061 0.0175807085 0.0104981009 0.0398374051 0.016279107 0.0897895545 -0.010169927 0.12621972 -0.152914077 0.102994591 -0.00934717152 -0.0707922205 0.088611111 0.106939681 0.112134047 -0.0540277697 -0.054023616 -0.0951209962 0.0558281131 -0.0773286074 -0.000430493499 -0.0220108796 0.00224742503 -0.042162884 0.0229496341 0.000386319705 0.0903915763 -0.0727334097 -0.0566792227 -0.0469854027 0.0666792765 -0.0901913494 0.0639531165 0.00190761709 0.0819069371 0.0437930077 0.073981382 -0.10753461 -0.0635947138 0.0397201367 0.0639339834 0.0106142825 -0.095933184 -0.0258502234 -0.151261196 0.0201133601 0.0523358956 -0.113775507 -0.0635734051 -0.0209280569 0.0180092286 -0.0952379927 -0.0805232748 0.0792436674 0.116160475 0.0405516624 -0.0603361167 0.0921702161 0.0638613254 -0.0797907561 0.0562291071 -0.0404303297 0.0192060955 0.0931882188 -0.0454974994 -0.063482672 0.0183900204 -0.0941224843 0.0311275516 -0.0276973266 -0.0228528716 -0.0103476569 -0.0013043856 0.0595675893 -0.0146932686 -0.0967626795 0.0205185581 0.0111512868 0.0304273423 0.0346512347 0.0115508316 -0.0314554684 0.0335132703 -0.0399859101 0.0783086121 0.0110251317 0.050887987 0.0386743098 -0.018033972 0.0640587211 0.0695254728 -0.0274955798 0.0315612257 -0.0987086147 0.0660334751 0.108960167 0.0362012573 -0.0556706525 0.0763316229 0.0343016721 0.0549547151 0.0566200167 -0.00617094245 0.104899995 -0.0148995249 -0.0461326651 -0.052078858 0.113826625 0.042423591 0.0696527734 0.0174295567 0.0255777556 0.0323791206 -0.085186027 -0.0352433883 0.0130573669 0.116727203 0.0527772866 0.0953754038 0.0984134078 0.0301339664 0.0283296034 0.0112838252 -0.041340284 -0.106966309 0.0208709706 0.0510318168 0.0411410108 0.0704910904 0.113985598 0.0480646491 -0.101439185 -0.101105615 -0.0554792546 -0.0963118672 -0.0833592713 0.0804136619 -0.0818424746 -0.0130467992 0.0993848965 -0.0514523238 0.0999550074 0.102077879 0.00966593996 -0.00935996324 -0.0179428924 -0.0360591672 0.00619822368 0.0243546553 -0.142853007 -0.0114681982 -0.0543433689 0.0218674429 -0.0637027845 0.0662505031 0.11204917 -0.0893480182 0.0857268497 0.103264339 0.0781002343 -0.0893782303 -0.0274790041 0.0431495346 0.0856630653 -0.12378367 -0.0509530865 -0.0479679741 -0.0808392987 0.0511769354 -0.00993785262 -0.0495909974 0.00732931681 0.107190818 0.0212429408 0.0919175819 -0.0032403795 -0.0621873438 -0.0842421055 0.0878323093 0.00147393253 0.0229070615 -0.0386694148 -0.0345502682 -0.0645541772 0.14156653 -0.0889476463 0.0902122259 -0.0681383684 -0.0405545346 -0.0987435952 0.0225519631 -0.118827663 -0.111183643 -0.0112256492 -0.00946287438 0.0775573999 0.0200256836 -0.0373974107 -0.078532733 -0.108547017 0.0992447287 0.0162392482 -0.0711892024 0.0401137359 0.0209429767 0.000362629071 0.0647842437 -0.0358259976 0.00750721199 0.0491359942 0.0709332824 -0.105451792 -0.0134563902 0.0872533396 -0.0307084043 -0.118991949 0.0960125998 0.0121480636 -0.036713779 0.0374878086 0.0718258396 -0.0660520568 -0.00429979758 -0.055313319 0.018989075 0.0844045654 0.0639191419 0.0425145887 -0.0436811857 0.0248131063 0.0507366285 0.00984115712 0.0211421121 -0.0417334475 0.127702236 -0.142305464 0.038462583 -0.100248791 -0.0598390587 0.0798201784 0.0749086887 -0.0129145803 0.0493668057 0.0832006335 -0.00326930895 0.0621138252 0.116234139 -0.0619600303 -0.0258555952 0.00560154766 -0.00271366001 -0.0680233538 0.039063748 -0.114920385 -0.0542362481 0.0695442334 0.0281284824 0.0585357882 0.125471935 0.0688281953 0.0719351396 -0.0179130882 -0.0254238006 -0.00948760845 -0.0995621756 -0.0127528915 0.0291331895 -0.0169871729 -0.00137848861 0.126049355 0.0243894756 -0.00514754048 -0.0438758358 -0.070057936 0.00142127706 0.0820695385 -0.0231800079 -0.0708072856 -0.0734865814 -0.114026025 -0.0061859726 -0.0585030317 0.0943298936 -0.0582126155 0.064423196 0.0419933088 0.0116295256 0.0170936771 0.0498891808 0.0110197524 0.0411308594 -0.0257459451 0.0114618847 0.0878219977 -0.0317848064 0.0811458603 0.01887214 0.00988883246 -0.0506531522 0.0625907555 0.0145452367 -0.112982243 0.0802996382 -0.0328567512 0.0700641721 -0.00277703465 -0.0246732663 -0.0414474681 -0.0930275917 0.0817583874 -0.0246985424 -0.0693705902 0.0860790238 0.0245301407 0.028220322 0.0357720293 0.0410393327 0.0705156475 -0.063267082 0.050686691 -0.0218410157 0.0550663397 0.0759022906 -0.0350831598 -0.0160008334 -0.115162447 -0.0647135377 0.0396890379 -0.0345642604 0.0103187198 -0.0589025803 0.0834977105 -0.0107147945 0.0380949751 0.0866653398 -0.0723311082 -0.0372112989 -0.000454910012 0.0213319007 -0.00432507833 -0.0310348179 0.0425887331 -0.0940774977 -0.0323967934 -0.0242477451 0.117995851 -0.0160061121 0.0213480443 -0.0668758824 0.114949614 0.0316681191 -0.0759480372 -0.0610279627 0.0633142143 0.0236565657 0.0845542625 0.00935758371 0.0250929277 -0.0281674396 -0.0359582417 0.0694757774 0.056437064 0.016289724 -0.043686077 0.0887322947 0.000600125699 0.0521455668 0.0419055298 -0.0610189848 -0.0224667937 0.0316987857 -0.0323978439 -0.0178262964 -0.0366154872 0.0907478258 -0.0856860802 -0.110066622 -0.0315983742 -0.0946494043 -0.0222084317 -0.0352201238 0.0455912501 0.0811657757 -0.0895951316 0.0279459916 -0.0952548608 0.113056384 0.00558312191 0.050939288 0.124181278 0.0341638587 0.00255426345 -0.033172816 0.0153816594 0.104887553 0.0244834907 0.0457413457 -0.0520596057 0.095031105 0.0351452902 -0.11665196 -0.0497119017 0.050630711 -0.0861758068 -0.0872503743 -0.0496218018 -0.00381143531 0.109498873 -0.0175776016 0.00528071402 0.0259748194 0.0909558833 0.0579428524 0.139967725 0.0764526948 0.00463831052 -0.0771861747 -0.144396409 -0.104723662 -0.0137682576 0.0223192684 0.0313319825 -0.019306751 0.0563000366 -0.00657232618 0.034467455 0.0391030945 -0.0310320668 -0.0617044605 -0.126123548 0.0184416007 -0.051189024 -0.0356684178 0.0274483245 -0.0450351276 -0.0650538877 -0.000781424344 -0.0433340222 -0.0677636564 -0.0434984639 0.0460208468 -0.0124574052 0.045673795 0.0250319857 -0.0097975824 -0.000288532581 -0.0693829432 0.0817056447 -0.0367149822 -0.0803469568 -0.0272673164 -0.0125355599 0.108721487 0.0555210412 -0.0362726599 0.00188711134 -0.0225252602 -0.0467403233 0.0223246478 0.0924254134 -0.00607204111 0.0348412544 -0.0419691056 -0.0352974981 0.120286591 -0.0532627963 0.0599474981 0.117449939 0.0250896253 -0.0453546159 0.0333019607 0.0678343773 0.0618110187 0.0790082738 -0.013288267 0.0124899093 -0.0627008379 -0.0927575454 0.0872658491 -0.0858765841 0.0804509819 0.115199946 -0.116047971 0.114141606 -0.000370875583 -0.0547132045 0.0655369386 -0.105160132 -0.0587072149 0.00996344257 -0.0588719957 0.143944472 -0.0636086613 -0.0625388771 0.050297644 0.01688735 0.0503490344 -0.0187371671 0.0263831038 0.0351513959 -0.0622758158 -0.0289025009 -0.00445907749 -0.0819463283 0.0783530101 0.00172135397 -0.0108682076 0.053622894 0.0898650363 -0.125441834 -0.0469661765 -0.0740193054 0.0146140624 -0.074739024 -0.124391489 0.000910399249 0.100878544 -0.00938480534 0.10746365 -0.10205555 0.0822874457 0.117170572 0.065103583 -0.0468601808 0.0443411134 -0.0220601298 -0.0349924974 0.0565963052 0.024442032 -0.000596265076 -0.0503311418 0.0310966447 -0.0986445844 -0.0381193534 -0.107975848 0.041932255 0.0274684485 0.060344439 -0.0951578543 -0.00214851787 -0.0242667589 -0.00569425896 0.058796335 0.106092222 0.0197916087 -0.0124082975 0.0247668065 0.0231674556 0.0468560532 -0.000621114043 0.0964491889 0.0251123365 -0.0552343167 0.119750619 0.043985635 0.00931171793 -0.0136433262 0.091603227 -0.0712718666 0.0786479861 -0.0408394635 0.0966731384 -0.0660808533 0.0769225433 -0.0086235553 -0.105327964 -0.017321486 0.0972045138 -0.077172406 0.0514651127 -0.0781937093 -0.108713485 0.112203546 -0.0802456141 0.121202722 0.11901883 -0.0931790471 -0.0164292976 0.0312756896 0.105686158 -0.0783906654 0.0468474701 0.0110720228 -0.00267141312 0.0711446702 -0.0328070559 0.0175967477 -0.0356302932 -0.0124149965 0.0686402246 -0.0505678542 0.129400566 0.0214219112 0.0196880996 0.104359493 -0.0519865453 0.052242592 0.00997835957 -0.0990768448 -0.0456322841 0.0230734646 -0.0203887951 -0.0376775041 -0.07378342 -0.0275467373 -0.0691813529 -0.0821307749 0.0593699105 -0.0246762205 -0.101402849 0.0678628758 -0.00588039402 0.106908754 0.00991031248 -0.115229808 0.0721573606 -0.0429049321 0.0701546818 -0.0851534382 0.0652838498 -0.0788848251 -0.0332299247 -0.0408851914 0.00320880138 0.0196518935 0.0832488984 -0.0366014726 0.01875652 0.053820096 0.0153092891 0.0467731841 -0.0368329771 0.0111917052 -0.0291276965 0.0631685331 0.0357577875 -0.0179604348 0.00486189499 0.0305900779 -0.0209680013 0.0740240738 -0.0749756619 -0.121885069 0.032916151 0.00262444629 0.00849013589 0.0662304983 0.0742840394 -0.125835225 -0.0522070974 -0.113991506 0.0744321495 0.0978048667 -0.00498304795 -0.0719037652 -0.0263758246 0.0775782466 0.0118285939 -0.0350849591 -0.0356183834 -0.0106398668 -0.0223848727 0.0236225128 -0.120488241 0.0512224138 -0.041531492 0.0656389818 0.0879166201 -0.0274794661 0.0920548141 0.0264546964 0.0468961522 0.0466408059 0.0399114974 0.0449604504 -0.0700372905 0.0427690521 0.100792646 -0.0325727239 0.0341325775 -0.114281707 -0.00742708845 0.0433078147 -0.10298638 -0.104879826 -0.0632601455 -0.0102007883 -0.0802601725 -0.00294449297 0.00281117624 0.104717933 0.0612074584 -0.0467934757 0.118006982 0.117171124 0.106652826 -0.0225161687 0.0956271738 0.0270829834 0.0200848412 0.0168362167 0.0149010466 -0.0440483093 0.117436014 0.0664134845 0.0302254353 0.0535751954 -0.00848081987 0.0130929723 -0.0325898565 0.128652498 -0.0109613249 -0.00605653459 0.0190438107 -0.0259028617 0.0401356928 -0.00171622215 -0.104202524 0.0845206603 0.12883538 -0.0919445157 -0.00195987965 0.0736826509 0.0480313748 0.0636631101 -0.0408567712 0.0155776199 0.0579566024 0.134289131 0.0326339938 0.0374059007 -0.0366481617 -0.0377640799 -0.0160350259 0.0126323858 -0.0398559012 -0.0692037791 0.00400359975 -0.106625289 -0.0896666497 0.119961634 0.129376546 -0.0542201884 -0.0679891706 -0.0174552612 0.0752889439 0.116622798 -0.122125328 -0.0475201905 0.0421101414 0.00309556024 0.0322735868 -0.097082302 -0.0326796286 -0.0467596054 -0.0276475735 -0.088789694 0.0212633777 0.0486687906 0.108680114 -9.16656572e-05 -0.0739132911 -0.00859406963 -0.0290659312 -0.0827777684 0.151205987 0.0135993576 0.0095570432 -0.142162323 0.00178637984 -0.0175982956 -0.00341362623 -0.116277464 0.127863139 0.155240506 0.0902651772 -0.0665329844 -0.0343229175 0.00592056988 -0.0689622238 -0.0882099047 0.0423216335 -0.0418753251 0.0649065152 -0.148054436 -0.126529023 -0.107144743 0.048180446 0.0964411348 0.0283801127 -0.147997066 0.0730884373 0.0641160384 0.103855938 -0.0519286469 -0.0627045557 -0.123388886 0.106936358 -0.100495324 0.036348857 -0.0662566945 0.0681579113 0.0571975075 -0.0754548088 -0.0399843156 0.0368984528 -0.0353834778 -0.0517093278 -0.123962395 0.0263090748 -0.130231589 0.0996464565 0.0178089179 0.0458062775 0.0963696018 0.0762125254 0.0340860561 0.109043621 -0.0622775555 0.113345571 0.110095598 -0.100886367 0.0148935774 -0.123453058 0.0149683403 0.0882795379 -0.0308197234 -0.00579763902 -0.0442597829 -0.0558761358 0.00445035286 0.0967673883 -0.0510170944 0.1171581 0.0859833658 0.00351574784 -0.0282143541 0.0262071025 -0.0563719533 0.0486262627 0.0520373955 0.0674998388 -0.0777793005 -0.019211974 0.0729704723 0.0623332597 0.0411960185 -0.00379213877 -0.0160260908 0.101385273 -0.07236664 0.118454322 -0.0279059727 -0.00111757044 0.108530454 0.0313402973 -0.109885067 -0.00746698584 0.0517579988 -0.102587014 -0.062045224 -0.0723215193 -0.00753403036 -0.0194992591 -0.055590637 -0.110146999 0.0563573688 0.000793169835 -0.0437380224 0.037614204 -0.103893019 -0.11840263 -0.0892521739 0.0177615266 0.0299307667 -0.0603615977 0.0103125488 -0.0940437391 -0.0742155388 -0.00279134372 0.116551526 -0.0507049747 -0.0112416446 0.0206989124 -0.0475890413 0.00135824515 0.0360365659 -0.0638581216 -0.110917598 -0.0285419766 -0.0785639212 -0.00673839869 -0.072663039 -0.0943017006 0.0209225155 0.0458435677 0.06708619 -0.00773984846 0.130092591 -0.0302911103 -0.094658874 -0.106029265 0.0598360002 0.0363203026 -0.0587395169 0.0218387116 -0.00121726026 0.0923015103 0.0268146414 0.00547261769 -0.0118542481 -0.0527351797 0.0381134599 0.0549164079 -0.0742723569 -0.00661152741 -0.0885568187 -0.146828458 -0.145066977 -0.0526844971 0.0974245518 0.0119285621 -0.148422763 0.138095111 -0.0548562445 0.0224515107 -0.033984974 -0.0918067098 -0.0412526764 -0.129727185 -0.091969721 -0.0195525698 -0.0304857362 -0.114038028 0.123106226 0.0100026429 0.0864370763 0.0507619679 0.00284729549 -0.00105298625 0.0372510068 0.0406655185 0.035562437 0.0691316351 0.0814873502 -0.0958798006 -0.0228097066 0.0195914619 -0.027246682 0.0997626036 0.0630631745 -0.125293136 0.0745200738 -0.0511293188 0.0464217141 -0.067329675 0.018248735 -0.10921976 -0.0231118333 0.0425507538 0.0270119589 -0.0716171041 0.0184950344 0.0490458496 0.0652568191 0.0191503335 0.0326661766 0.00589203555 -0.0994252041 0.0639910772 0.0935874581 -0.0105717117 0.0074147 -0.0258986168 0.0828858837 0.00915369298 -0.031141039 -0.0613915138 0.0385154858 -0.098638989 0.0779575929 -0.0105742011 -0.0758871809 -0.0109963436 -0.00425893022 -0.0998037308 0.0786853656 -0.00168336509 -0.0925417468 -0.125538707 -0.122153223 0.0821714997 0.124297135 0.0863585621 -0.0707112625 -0.0507845916 0.0522913001 0.0209270567 0.0663688928 -0.0528395213 0.120615751 0.0676541924 -0.024552837 -0.0572560132 -0.0213594604 0.10777957 0.101967193 0.13353315 -0.0903856754 0.000669586763 0.0565047972 -0.0825800672 -0.062248636 -0.014425775 -0.0738483593 -0.00286239828 0.0884365365 -0.105007574 -0.0629888326 0.0934715867 0.110788323 0.0860318914 0.00210579997 -0.0725004748 -0.127494186 0.0919124186 0.110653833 -0.0781571791 0.00416795025 -0.110430084 -0.0848361403 0.00444030436 0.116966464 -0.0922116861 -0.0228395946 -0.00113955385 0.00864992663 0.0542619862 0.0738494545 0.0541707687 0.0777184516 -0.0970832705 0.0126359928 -0.0184956864 -0.0622084662 0.0451156609 0.079605639 0.012876017 -0.0658479407 0.0148149095 -0.116397806 -0.084576413 -0.100534178 0.0414143018 0.0586240441 0.0751088932 -0.0757061094 -0.0277423412 0.134908676 -0.033364512 -0.0541506857 -0.0223149844 0.0424581356 -0.0582377762 -0.0225137156 0.0737239867 0.0508049503 0.0808460936 0.0816969201 0.0865024626 0.0526327603 0.00781754963 0.0650917143 0.0286054742 0.0803678334 -0.0147822825 0.0908693671 0.107531264 0.0507709011 0.0359385237 0.0059293001 -0.00380560011 -0.00963426009 0.0474996306 0.0564068668 -0.0188577659 0.0513748489 0.0650842935 -0.0713231862 0.0369577892 -0.0133927027 -0.0248449575 0.0464835763 0.0194541477 0.0589518249 -0.0366000384 -0.00920657907 0.0802554563 0.107316345 -0.0738855079 -0.041999802 -0.061523933 -0.0305218492 0.156987384 0.0346348062 0.0126057826 -0.0152376043 0.0502441861 0.0664728656 -0.0426487103 -0.0210334025 -0.0939696729 -0.0341476351 0.0202106778 -0.0405531637 -0.0474039763 -0.122032635 -0.0849575177 0.0358551703 0.0997392237 0.0705704316 -0.0954606384 -0.0823383406 -0.0335653499 -0.121191315 0.013300227 -0.0529904217 0.0114150038 -0.0746903941 -0.03891664 -0.0819836557 -0.0729287639 -0.0179754216 -0.0102402242 -0.0607133955 -0.0891000032 0.00797273777 -0.022605991 -0.0809449404 0.0195738394 -0.134260491 0.0545250103 -0.0856008008 0.0678676516 -0.0582477748 0.0922091454 0.0154068666 0.0158708468 -0.12760596 0.0564289317 -0.0613937639 -0.0670805797 -2.70218661e-05 0.0686709732 -0.0105099222 -0.0782819167 -0.126448721 -0.0540316962 -0.00444437843 0.0426672585 0.0511715114 -0.0664667189 -0.117823824 -0.0813703611 0.0417935252 -0.075663574 -0.0227859747 -0.0927959681 0.00258619245 0.0769350082 -0.0262786634 0.15649274 0.00548388064 -0.0489241667 -0.028820714 -0.11300534 0.0169768985 0.0593335219 0.107664488 0.0454386249 0.127441257 0.0917166471 0.118455522 -0.089602381 -0.00173335895 0.039862778 0.030519424 0.0692663789 -0.0869349092 0.0913121849 0.0777036697 -0.103544652 -0.0235558059 -0.00459441263 0.0603882074 0.092305325 -0.0165223666 -0.0444068499 0.0616826303 -0.0684993789 -0.0543850996 0.02927945 0.0985921547 0.0364675447 0.0572966635 0.130570352 -0.109130152 0.0495605171 -0.0749507546 0.0501365066 -0.119342029 0.0564106107 -0.0683375373 -0.10465467 -0.10835091 -0.0725807771 -0.061554186 0.0320482478 0.0128553994 0.0586562306 0.0384311117 0.0629496649 -0.0201265886 0.00698905718 0.0514870435 -0.100664191 -0.0316986516 0.00712753553 0.0836560577 -0.129708961 -0.0514545329 0.127032861 0.0874415487 0.0538004041 -0.12563847 0.0540097728 0.0164953042 0.0776124969 0.0573409572 0.0104391398 0.0330088995 0.0108658904 -0.0540808886 -0.0545005389 -0.0912899747 0.039707467 -0.0845631137 0.00952329952 -0.132783711 -0.00215431862 -0.00721834227 -0.0975917131 0.0458213203 0.0200680383 0.0877264142 -0.037273623 0.0161861442 0.110501729 0.0362346061 -0.0118159521 0.06564527 -0.0172634032 -0.0569092482 -0.0533066876 0.049190376 -0.0601144843 -0.0810378492 0.0477174371 0.0174570587 -0.0335904099 -0.133734539 0.0280133393 -0.0691580176 0.0767375976 0.138999552 -0.120073155 -0.0186303947 -0.0229670238 0.110347301 -0.0162759181 -0.125030503 -0.0610758066 -0.0921585709 0.0763650015 -0.0880154371 0.0487306491 -0.0280137099 0.013374065 -0.106856197 0.0210058708 -0.0297262985 -0.100724012 -0.0436595678 0.0109883668 -0.0853584632 0.0911873952 0.0432055667 0.00453142403 0.0127071068 -0.0517662428 0.0676092654 -0.0881505087 0.0098452922 -0.0823895931 0.026693739 -0.0401734523 -0.0659064725 0.0980170965 -0.0178305618 0.0141226156 -0.0129480297 -0.0210044179 -0.052819252 -0.00334462686 -0.0292001851 0.102371179 0.0348540843 0.0185807701 -0.0405740775 0.0680675134 -0.0530634858 -0.0756660923 0.000960345787 0.0799955055 -0.00204092567 0.106765963 -0.00071719382 -0.0700320154 0.0350265689 -0.0876352489 -0.0223301966 0.0634540915 0.046514608 0.00813020207 -0.0424726084 -0.0671557188 -0.0563678183 0.00290626287 -0.0465417765 0.0410110131 -0.0751074255 0.0387596823 0.0907988921 -0.0267684907 0.0381636135 -0.0729710087 0.111388907 -0.00196558004 -0.0716333836 0.00468423311 0.0505745411 -0.0488672592 -0.0919072255 -0.0417954288 0.0445425287 -0.133938208 0.0828562379 -0.0701478645 -0.086428687 0.031541761 -0.0582731441 0.0219943449 0.0624012351 0.0130704865 -0.0401517116 0.0552026331 0.0567754321 0.0710184798 0.00758881867 -0.0124910641 0.0352796912 -0.0257061403 0.0308687277 -0.0213920511 9.48071975e-05 -0.0383036099 0.0287866332 -0.0110332398 0.0567016639 0.0603689998 0.00948372204 0.0733415633 -0.057091359 -0.0315212272 0.0307328366 -0.047477711 0.0429863371 -0.0265197102 -0.0657150894 0.0344378874 0.00179678167 0.010823926 0.0920330659 0.0393380597 -0.117785789 0.0551497154 0.0594726615 -0.0610899888 0.0493407547 0.0192096289 0.018806411 -0.0479772612 0.084946245 -0.0363575257 0.0180488937 0.0628424212 0.047983963 -0.00609665224 0.0490711816 -0.0524433553 0.0299023781 -0.134415284 0.0352817513 0.112402901 -0.00522935297 -0.108842514 -0.100159943 0.0536962375 -0.0482212268 0.033140216 0.0652505681 -0.0243575778 -0.00743648084 0.109651551 -0.0922743604 0.00548604131 -0.00986081176 0.0220172554 0.0101632392 -0.0740579367 -0.127274752 0.123685867 -0.03647862 -0.0687318668 0.00219690241 0.0450170375 -0.115679674 0.0136389351 0.103024855 0.103029318 0.110668465 0.00388179533 -0.0727895275 0.0803632215 0.0280900523 0.0280285254 -0.0467234924 -0.0731821135 -0.130512729 -0.0447000824 -0.107333377 -0.0769113675 -0.0171804633 0.0346588232 -0.0489323661 -0.0676056147 -0.105796985 0.0340438746 -0.0489065722 0.0679880902 -0.138374269 -0.10602235 0.0318014435 -0.116686605 0.0709820092 -0.0534674339 -0.00963871367 0.117157362 -0.0614339933 -0.0393194258 -0.0876544267 0.00427854154 0.0532199927 -0.044476755 0.000165691803 -0.0572972745 0.0502985567 -0.0602571145 0.12552923 0.0100918449 -0.0934075043 -0.0230401549 0.0295511037 0.0494234338 -0.0848071203 0.0259556789 0.079481706 -0.070821397 -0.0908804163 0.0531463176 0.0720867738 -0.0683894381 0.0868811682 -0.0569350533 0.0212230869 0.0892836973 0.0608907081 0.0264557358 0.117087588 -0.0282123219 0.0711553991 0.10003607 0.0398537852 0.0925926194 0.0799961835 0.106195562 0.0835256651 0.0742167607 -0.1288362 -0.0119199455 0.0209458005 -0.0807825178 -0.090900667 0.0211038124 -0.0566857532 0.031491559 0.0864643902 -0.151924461 -0.0301744267 0.0409735925 0.111272551 -0.0702080205 0.0960132927 0.108262971 0.0476099811 -0.00588667253 0.0851650238 0.010426431 -0.0125091802 -0.101221651 -0.0594466254 0.0449221432 0.0909607708 -0.0161867402 -0.0196655095 0.0530647635 0.0182610322 -0.0709991604 0.00761050964 0.0762544423 -0.0591728203 -0.0941646695 0.024339376 -0.0626933351 -0.103930928 -0.0321634226 0.0614973754 -0.118808359 0.0537795126 -0.0431177206 -0.116265662 -0.0131834941 -0.00236911164 0.0999807268 0.110797547 0.0114548178 -0.0998885259 0.145602047 0.111171857 -0.0988338813 0.0828444064 -0.00907499064 0.00587745896 0.071559459 -0.13120684 0.060727559 0.0683118403 -0.0836969465 -0.0657875538 -0.0527593195 -0.115409821 0.0975560099 0.0509091392 -0.0899974853 0.0956521481 0.0244503263 0.0126064662 0.114975713 -0.0405928865 -0.0483787097 0.0214200635 0.00839174818 -0.046142336 0.0722193867 0.0103485761 0.107258148 -0.0378706008 -0.0450719856 -0.0208163224 -0.00753858453 0.0927842855 -0.0761416107 0.0222469252 0.0221501626 0.103600152 0.00779794529 -0.0233703442 0.012515164 0.0401047952 -0.0354013927 0.0374246277 0.0105700931 0.0782879516 0.0528782457 0.0609663725 0.121859848 0.0579176694 0.0506275222 0.089213632 -0.118543468 0.037998043 -0.0670093521 -0.13410008 0.0411561765 0.0282420814 0.0700198412 0.0285238512 0.00444778334 -0.0674335882 -0.0760352165 -0.0417404994 -0.00990704726 0.0137323095 0.0390490703 -0.05264882 -0.0782701373 0.123745263 0.0234257653 -0.0788318142 0.024272114 -0.0218193699 0.0922509953 -0.00793454051 -0.0463180654 -0.113730133 -0.0577475242 -0.00455238903 0.0886773691 0.0554309674 0.0941009894 0.129545763 -0.021742925 -0.081449911 -0.0626695529 -0.0395893045 -0.036148537 -0.0715967566 0.00607152935 0.0371897854 -0.0568351522 -0.0322895311 -0.104940452 0.0889158696 0.0984952897 0.0380211174 0.109986477 0.00241002371 0.0807410851 0.0208322443 0.0305939745 -0.109555371 0.0584459454 0.0174565129 0.0446334742 -0.00203529699 -0.0710110068 -0.00075355859 0.00535989506 -0.00548237702 -0.0412316248 -0.10585098 -0.0147320451 -0.000358470366 -0.0302088298 0.0840577036 0.0291063283 -0.0138413198 0.0101552876 -0.0291738547 0.0731139556 -0.00815887749 -0.117785364 -0.0678437576 0.0784235671 0.0205320921 0.0843389407 0.0271183364 0.000885073736 0.0346361846 0.0954925418 0.130852431 -0.0751837641 -0.0253316611 -0.0976730809 -0.0676677674 0.0476101562 0.0662705749 -0.0301036816 -0.049003385 0.0106137209 -0.11252144 -0.0744634122 -0.0980421826 -0.0166393481 0.0481715277 0.0189415459 -0.0880238637 0.104637556 0.10711097 -0.0564402714 -0.0601721779 0.0159785729 0.0403848588 0.0139113516 -0.108862996 -0.00510752294 -0.0115773957 -0.0455890708 -0.0705545172 0.0851743072 -0.0565754622 0.0915891081 -0.0679899007 0.140255541 0.0228361152 -0.0286569875 0.0132950023 0.0172942225 -0.0447521694 0.0719969049 0.109762378 0.0197884869 -0.0228357762 -0.0507639088 0.0672996938 -0.0723399743 -0.00699901069 0.103675373 0.00931620412 -0.0457025245 0.0969348028 -0.0169527791 0.0237304047 -0.0895861909 -0.0194170661 0.0993531495 0.00229117088 -0.0562044792 0.0336305238 0.0490789376 0.0386500955 0.0127196591 0.00393643929 0.00232719886 -0.0829996243 -0.121718653 0.0753233731 0.0201976635 0.082195513 0.0600713976 0.0539501272 -0.0779756531 0.0625429153 0.00583441136 0.00761622144 0.115591303 -0.0334634334 -0.0245005973 -0.00851792749 0.0614887588 -0.0673773736 -0.0610792227 -0.0935594514 0.0109041268 0.0797310621 -0.054385256 -0.0951922908 0.0503217317 -0.105528668 -0.111157358 -0.0676904768 0.0699280798 -0.0395813137 0.0542365499 -0.0423914567 0.0401284434 -0.108586438 0.106151514 0.0741012841 0.0261538271 -0.0234557595 0.025597224 0.128329813 0.075639084 -0.0326176621 0.0483325124 0.0441246293 -0.00211445754 -0.0714289173 -0.028952891 -0.0652696118 0.10838379 0.0806302279 0.0361339003 0.000588985044 -0.0501025841 -0.0767339468 0.0711772069 -0.109505966 0.0615409054 -0.00328358519 -0.0599714369 0.0810240135 0.0322509259 -0.105258301 0.00425739167 -0.0571172498 0.011786839 0.0552534238 0.0161224175 0.00839114189 -0.0516668111 -0.0663074031 0.0675223991 0.00408511516 -0.0934429094 -0.00482452614 -0.0146074528 0.116948992 -0.0514550433 -0.0092106685 0.0192382932 0.0676550567 -0.107134365 -0.0554183982 -0.0378831327 -0.00111221685 -0.111972146 0.0370764211 -0.025367327 0.0421501771 -0.034930028 -0.133550882 0.0460590795 -0.0408849232 0.0172165278 -0.0828626677 -0.142174855 0.0368294381 -0.0315607302 -0.0813754499 -0.0344028175 0.024872696 0.115811288 0.0284592416 -0.0183729436 0.0680122226 0.096686542 -0.0504275933 0.0555682927 -0.0117671303 -0.106590241 -0.0779706761 -0.0753313005 -0.01689367 -0.0120249027 -0.121162862 0.0904329047 -0.0305291414 0.115454301 -0.0964366719 -0.00301298662 -0.0242908541 0.0638555288 0.130841374 -0.10721194 -0.108135305 0.0741211176 0.0291729122 -0.113760702 -0.0604273416 -0.0709419549 0.139797956 -0.0342961662 -0.0201933645 -0.112033077 -0.0611329861 0.0254594646 0.0681489855 0.0438128486 0.0619565509 -0.11234884 0.0307241976 0.0981715992 0.00958520174 -0.0568824336 -0.0283987094 0.0636719465 0.0751391798 -0.114547461 -0.0748261958 -0.0586201847 0.000263712311 0.00999936834 -0.0185962841 -0.08149115 0.105373196 -0.00710947951 0.000694327406 0.0461735427 -0.0157357287 -0.0240037851 0.0580982715 -0.100919247 0.111699469 -0.0659800097 -0.00773917325 -0.0232742541 -0.0999335274 0.0753451958 -0.051228717 -0.0892171562 -0.0358730741 -0.0996832997 0.0939411744 0.00339663634 0.0281091705 -0.0894726738 -0.00704634562 -0.07571394 -0.0520362742 -0.0228929147 -0.0211675484 0.0902847946 -0.0659550056 0.11681138 -0.0221742485 0.0613481849 -0.0157842189 0.0737548843 0.00965575501 -0.0838649422 0.00177340093 0.0528827235 -0.11357832 0.100755192 -0.0534734391 -0.0358427912 0.144596636 0.0647218004 -0.0423597619 0.0605341755 0.0732165053 0.0211333074 -0.132864833 -0.0364422947 0.0478251725 0.0119115161 -0.00161979138 0.0571164563 -0.0192272216 0.00423192605 0.0322174877 -0.0759943277 0.0137448525 0.00333114085 0.0397117473 -0.0238687452 -0.0832956731 -0.0279709753 0.0938207209 0.0343491249 -0.0871218666 0.0408064276 -0.116285831 0.0795068964 0.0848761573 -0.046004314 0.0709750503 -0.10165219 0.113097928 -0.0288509876 0.0324093103 0.120680496 -0.0231527574 0.0672659576 0.0315056667 -0.0114693092 0.0518258587 -0.0120203597 0.0803814754 0.034682408 -0.0337615535 0.0386820808 -0.0665814355 -0.0311845411 -0.0300064813 0.000839714077 0.0999553874 0.0476576835 -0.0808109865 -0.0593067668 -0.0203568004 -0.0215366837 -0.0306633245 0.0436415672 -0.0116597321 -0.0583168492 -0.0336035974 0.0782463998 0.019496616 -0.113910265 -0.100025997 0.0590599142 -0.0404247232 0.0250362512 -0.114405021 0.0491880253 0.0765769333 -0.0574881397 0.0377197377 0.0666196123 -0.0882478282 0.12657164 -0.072820656 0.0688454062 -0.0431665219 -0.0397588946 0.0174000375 -0.0294575971 -0.113417856 0.0438238829 0.0227670204 -0.0407737307 0.00265934458 0.0681627318 0.0419177152 -0.0144360522 0.0160792209 0.0458587408 0.0618349649 0.0176743343 -0.0137938112 -0.0633563772 0.0310720019 0.0681380481 0.0212434102 0.0676606894 0.041460555 0.110738382 0.0473630205 -0.0378823988 0.0510912016 0.00387570239 0.0408548079 -0.0602159686 -0.129621208 -0.00288634188 0.00880218763 0.091095008 -0.132332921 0.0251508802 -0.0712097511 -0.103759281 0.0195465796 0.0568811819 0.0538945012 0.140887022 -0.0393595546 -0.00443352573 0.0130884098 0.0407299697 -0.0129098492 0.120743103 0.02760542 -0.129898503 -0.00857560523 -0.0720292479 0.0491493791 0.102326475 0.0921848789 0.0479474291 0.0605254434 0.0442970507 -0.0229930989 -0.101877302 0.120247759 -0.0461608209 0.0228956696 -0.0468554012 0.0931479931 0.0344555005 0.01930671 -0.0351826884 0.0757239461 0.0770438984 -0.0225172918 0.0203145165 0.0127433063 -0.0512304567 0.070727922 0.011212891 0.0631789118 0.061186403 -0.0620558858 -0.0242845789 -0.0357322469 0.0807544664 0.0694836825 0.0275604576 0.048231598 0.0312998705 0.0098650381 -0.0849438533 0.00338348607 -0.0562642589 0.062496379 0.00795244705 -0.00989409816 0.0154066579 0.0518637821 -0.103680417 0.00535006076 -0.104885489 0.0388466492 -0.0735442638 -0.0991858095 -0.0114390533 -0.024811225 0.0624354333 0.106003806 -0.0726236701 -0.106910177 -0.0611559413 0.0201094151 -0.052110929 0.0187292732 -0.0280273762 -0.101260066 -0.143803522 -0.1252902 0.0355423726 0.041179236 -0.126104265 -0.0216143429 0.0806514397 0.00608616043 0.0657909364 0.0178345367 -0.0923066512 0.0481731519 0.145438254 0.0159616042 -0.0456462018 0.0941475853 0.0632876828 0.0367626883 0.0247407742 0.0601012856 -0.0355465524 -0.0118422816 0.0488038473 -0.0545568913 0.0373688899 -0.0515505187 -0.0396510959 -0.0605122671 -0.085121952 0.0596127883 0.105902717 0.0220958665 -0.0255203731 -0.0148762362 -0.0768131837 -0.0578792021 -0.0949795991 0.0773940459 0.0814553499 0.137127966 -0.116018936 0.0563674271 0.0888326541 0.0284422096 -0.110917278 0.0353827216 0.0380769633 0.12019825 -0.00778515963 0.0705309212 0.00951496419 0.0804332197 0.0100569949 -0.0600129589 0.0635915622 0.0929165035 0.0890567824 -0.0334398523 0.0183780789 0.0171072353 -0.0848544464 0.0573717169 -0.0625854135 0.00517629972 -0.0316587314 -0.00222206302 -0.139464319 0.00396719109 0.119108282 -0.0407875292 -0.0145511776 0.0634373575 0.0286066066 0.0339107104 0.0838994458 0.156156093 0.0932729319 -0.067164138 -0.0970614329 -0.000510855229 0.108286127 0.151279747 -0.153703973 0.035030935 0.0742894635 -0.0494455397 -0.0341568068 -0.0577272587 -0.0769041032 -0.0278417245 -0.0176225342 0.0874658376 -0.0257863011 -0.0709038004 0.0103997458 0.105501436 -0.132797644 0.0622315481 -0.0974398479 -0.0254051387 0.0495131202 -0.111515976 0.103808023 -0.101846233 -0.0294793397 -0.131958127 -0.0533692092 0.109044902 -0.0826396644 -0.0222143289 -0.0188204758 -0.0809235647 -0.0443305187 0.0731882006 -0.0607132837 0.108671054 -0.099729836 0.0198555607 -0.14591822 0.0282850396 -0.0493627414 -0.0674319044 -0.132230341 -0.0341024846 -0.0808820575 0.0899107382 -0.0263775121 -0.103454776 0.0222094338 0.00426623598 -0.084072873 -0.0383605286 -0.0547198616 0.0559252352 0.0470217839 0.0677336454 0.0497331806 0.0631156936 -0.00398747297 0.112397343 -0.166542813 0.137340739 0.0250983089 0.0890974551 0.119683884 -0.0544718653 0.0518688969 -0.0565655194 0.0104118874 0.0981469452 -0.0510016531 0.00272554019 0.0927296504 -0.0194716733 0.110565722 0.0546717308 0.0536676086 0.064474754 0.0118900863 -0.11696136 0.0142176412 0.0417189002 -0.0839173347 -0.0281918701 -0.0403215103 0.02901816 -0.0981693715 -0.0701962784 -0.00782805495 -0.0462877564 -0.0264666826 0.0648322478 0.0252208374 -0.0529760942 0.0255852453 -0.0645134747 -0.0154027175 -0.0532422848 0.0832066536 0.0396291837 -0.0148703549 -0.00148318615 -0.0615470037 0.0724665746 -0.0584392287 -0.026484957 -0.0585906915 0.0238307714 -0.0229600184 -0.0852913335 0.038450405 0.103452124 -0.033543352 -0.101024874 -0.0779693052 0.0964624882 -0.0119416546 -0.0178287029 -0.0222403612 0.0204786509 0.0163842663 -0.045447167 -0.0765725672 0.0971594155 0.041313909 0.0470899418 0.00305234478 -0.119385377 0.0464745872 0.0310937278 -0.131466374 0.0710817575 0.0100257266 0.068332687 0.0689313188 0.0125061376 -0.0347266309 0.0975375995 -0.00572358584 -0.0388996676 0.0531310216 0.137647584 0.0694899485 0.00679214392 -0.00121487537 -0.0537946038 0.0458747223 0.0680655316 -0.0407916345 0.0546781644 0.0273533594 -0.12560609 -0.0675602481 0.0385174453 -0.0973455235 -0.00201383908 0.0485716909 -0.0049632024 0.00582174305 0.0641960278 -0.0848527774 -0.0219417512 -0.15150407 -0.0783323124 -0.00965964142 -0.0608184524 -0.0130811296 0.0480553694 -0.0386784896 0.0999142677 0.0546924099 -0.1222317 0.0141203087 -0.0226628929 -0.0910275429 0.0631851926 -0.0745201185 0.0876418352 0.00886597019 -0.047717195 -0.128835618 0.0425309427 -0.0195610169 0.0638543293 -0.129029542 -0.0131098628 0.0650595948 0.0454248711 0.0853424594 0.0140957199 -0.0202541035 0.0317879245 -0.143378869 -0.00423101289 -0.131989092 0.0190712065 0.0510258228 -0.0865119174 0.0910336599 0.0548940673 -0.105332822 -0.0292321127 0.00560635095 0.0586736389 0.00505677052 0.110722467 -0.107276112 0.110618874 -0.0691331774 0.118226469 -0.0460344777 0.0930353999 0.0951236412 -0.0392442942 -0.112528846 -0.0722931251 -0.0136691937 -0.0806170553 -0.0411446244 -0.041987583 -0.00134162803 0.0213002544 -0.0701876432 -0.0885930806 0.0525745451 0.0624400154 0.0732306167 -0.0200780723 0.0234855395 0.00717696035 -0.147869304 0.0787881762 -0.0716938302 0.0857125446 -0.0352597944 -0.0805057809 0.0154540623 -0.00925941207 0.0756853744 0.0621306412 0.00842974614 -0.0371917672 0.108534172 -0.035098426 0.0504793301 -0.020088356 -0.0706505328 0.0162423179 -0.0752581954 -0.134370595 -0.015700357 -0.0898832977 0.0254374146 -0.0247301795 0.130348459 -0.035687875 -0.0680520609 -0.0444837064 -0.0601570755 -0.0513698831 0.111533344 -0.113860339 -0.0939767584 -0.0477326587 0.0567156076 0.0808228627 -0.0376587808 -0.114645995 0.0915196016 -0.019314684 0.0117936404 0.0774793029 0.0794394761 0.0432011634 0.0209889244 0.0314350612 0.0340264812 -0.104788385 -0.00981875602 0.0270214248 -0.0868451148 -0.0589688234 -0.0642679632 0.063923806 0.0117191905 0.0869612917 -0.0395875722 -0.0766618028 0.0934183374 -0.0996760055 0.0946377516 0.0345660634 -0.0784306899 0.07895834 -0.0232259352 0.0685211644 -0.0316710509 -0.0698057264 -0.011367701 -0.0852755904 0.0756148174 0.112687804 -0.0221658461 -0.0638062581 -0.0995947495 -0.148020685 -0.130782247 -0.0682474449 -0.0904296935 -0.0312870853 0.00539993821 0.0133634834 -0.0529328249 0.11267703 -0.0565492623 0.00755324587 -0.13352786 0.0963837281 -0.0480984338 -0.0886128098 0.0665832683 -0.109329402 -0.0235391576 -0.057248909 0.0689797029 -0.06789276 0.055123087 -0.0196565576 -0.0988758132 -0.0760087073 -0.0214166064 -0.0119032441 0.0697430596 0.00237821974 -0.0490270369 0.117983244 0.0743466169 -0.0153463781 0.101554446 0.103069983 0.0187342204 0.06316486 -0.117018297 0.0541630685 -0.100673176 0.0376137197 -0.0421548113 0.0645036548 -0.0168326087 0.0727319941 -0.0281656329 -0.117367707 0.116008684 0.0423757844 0.0988826826 0.0217613652 -0.0824515149 -0.0351628885 0.0812880173 0.147239089 0.0299862716 0.0364563912 0.145076081 0.105126604 -0.0210315958 0.0879319981 -0.0583813041 0.0593444556 0.106604464 -0.0204098541 0.0887133107 0.0528428815 -0.0444989093 -0.157084122 0.054473713 0.150896385 0.028199533 -0.0832022205 0.0864810869 -0.0489188135 -0.00317808706 -0.138337359 -0.0614818409 -0.0958974063 0.16130729 0.0542741828 -0.0278767291 -0.0963605344 0.132825524 -0.0308959335 0.0545662679 -0.0319377147 -0.0980552807 0.0630095378 -0.0429899767 -0.0260642897 -0.0517068692 -0.0561511219 0.11457511 -0.060042996 -0.0527282394 0.0207744949 0.023834521 0.0296360757 0.0525115617 -0.114589319 -0.147910029 -0.0456607491 0.0715667382 0.0993826538 -0.0107942242 0.117987439 -0.0284947716 -0.0709881261 -0.0903323144 -0.0224822853 -0.157054216 -0.0552059412 -0.0338664018 0.0750938728 -0.065085113 0.0211203843 0.118827477 -0.0167396851 -0.0932219103 0.0603475198 -0.0151796769 0.00819401443 -0.103917979 -0.0764359087 0.0163631905 0.0167195648 0.10436935 0.031059213 -0.010204182 0.0322529972 -0.0338583738 0.0547566526 0.0993093476 -0.0449988134 0.0820005462 0.0658240914 -0.0828819126 -0.0934411883 -0.00780287059 0.0153802652 -0.0455549546 -0.021676302 -0.00396145368 -0.121144004 0.0972423553 -0.0255077239 -0.0110973027 0.120320976 -0.0416977331 -0.0117513239 -0.105017632 0.0385619588 -0.0398330316 0.0233246256 -0.0271476638 -0.0675703511 0.0597647466 0.0618401542 -0.0964857414 0.060786169 0.00302257249 0.0607231446 0.0319902562 0.0811921582 -0.01984399 -0.00189587893 -0.139521733 0.0102705099 0.0954400972 0.0966984481 0.0283194389 -0.045280274 0.0892768875 -0.052237168 0.0279194918 0.0737474114 -0.000261810783 0.112771511 -0.00738663413 -0.0538329072 0.0805022269 0.0928170681 0.0922827125 -0.12284258 -0.0546792485 0.0152943293 -0.0549117215 -0.0439201444 0.148618817 -0.0444157384 -0.014640267 -0.0463561974 -0.141159236 -0.0240516476 -0.0309760477 0.103102759 -0.0665320605 0.0416155756 0.0295819342 -0.072761029 0.106963806 -0.0282385554 -0.0605240837 0.0600927584 -0.0168974958 0.0207078587 -0.0489886738 0.0462408178 0.0563473664 0.0989545807 0.0286699794 0.00622357149 -0.0955138803 -0.089370057 0.0498380885 0.111254118 0.0486598499 0.0335665718 0.00651514437 -0.137515217 -0.00872137025 -0.0768070891 -0.103112787 -0.0138399014 0.0693487599 -0.000350349292 -0.132112339 -0.054441724 0.0118631627 0.0867957175 0.0417998731 -0.125937298 0.0554638319 0.0775117502 0.0927276611 0.0790062174 -0.107764006 -0.00206389814 -0.0824461728 0.0240072217 0.0872223303 0.0261813533 0.0419610031 -0.110041335 0.0405180678 -0.0859660432 0.0439100154 0.0232283361 -0.0124879908 0.0721851513 -0.078385748 -0.0744791776 0.0467836894 0.0291622393 -0.00720773824 0.0299567468 -0.0575182885 0.0597681552 0.0921448171 -0.0661888644 -0.00482166698 0.0675290599 0.0166998263 -0.0955132842 0.085087046 0.102876 0.0426338613 0.0119831273 0.0855897442 0.0225691516 0.0364397429 0.0160135124 0.00990157295 0.0420151539 0.0316322856 -0.0585764237 0.0824364498 -0.0413012579 0.135568514 0.034757603 -0.0277830604 -0.034982793 0.0369454138 0.0193050615 0.0799474046 -0.0846258327 0.0366695002 0.0598423779 0.0799307451 0.136157006 0.128120825 -0.0201004725 0.0452948473 -0.06387043 -0.0197872147 -0.0240808073 0.0412721485 -0.0553675219 -0.106740355 -0.035156589 -0.0776401386 -0.104280807 0.0636275262 0.143016845 0.0158824641 0.0104194768 -0.0614338666 0.0736046582 0.0777402669 -0.0251369067 0.08835724 0.0629755557 0.100526057 0.130498186 -0.0605841354 -0.107173443 0.0618615188 -0.0027110891 0.0448608994 0.0324240513 0.13815707 0.0885208547 -0.0184885561 0.0110004703 0.0750818923 -0.123291738 -0.0332586765 -0.00227115862 0.0191539656 -0.00745699275 0.0960062817 -0.00826996565 -0.11104311 0.00985418726 -0.00825903285 0.0663968921 -0.022775976 0.120236516 -0.0306508504 -0.126447007 0.0266145803 0.0663856491 0.0159932058 -0.00567367487 0.0217537843 -0.129739061 0.0966290981 -0.120227985 -0.0869013295 -0.0486435518 0.145977855 0.0870844871 0.08520028 -0.0570347048 0.0191150215 0.0577304959 -0.0748146251 0.138433784 -0.00482775643 0.0784191266 -0.00595876481 -0.089486897 -0.0807763785 0.0747055635 0.0123625547 -0.0272405632 0.0675494596 0.0216415282 0.0251738597 -0.055193793 0.00323623535 0.104679525 0.00744761759 -0.0563779734 -0.00321181351 0.0755025595 0.0668580309 0.0714727044 0.0588193573 -0.0533336736 -0.027081253 0.0995806679 -0.00321418745 -0.0934964344 -0.0121698389 -0.0306112021 0.049315121 -0.0717256963 0.0284800846 -0.0465604663 0.0592573173 0.0975120962 -0.0522723123 0.0236058217 0.03004965 0.0192594938 -0.0153996143 0.0517514087 0.0202556662 -0.036583852 0.105843432 0.0923823789 -0.108679689 0.104115218 0.0757252499 -0.0786331147 -0.108693816 -0.0475629792 0.0984940901 -0.014999046 -0.0789110363 -0.0634896383 -0.124007449 0.0513020195 0.0337021165 0.00936586969 0.0974761024 -0.0039249598 -0.0539403148 0.0093635805 0.064329423 0.00841173995 -0.0369432382 -0.0830086768 0.0733837709 -0.0366025865 -0.0928544104 -0.0144924261 -0.0935278535 -0.0608592965 -0.102837108 -0.105442159 -0.0516982377 0.0344825126 -0.0649234951 0.0406593382 0.0177465007 -0.129067734 0.0989128351 -0.00798356999 0.0440664552 0.0535025857 0.0184675008 0.0339980274 0.0178082474 0.0744322464 -0.0364990495 -0.00785736833 -0.0367429368 0.110767066 0.043253459 0.0398505144 0.0362781025 -0.0534571707 -0.0138610825 0.0870700777 0.0741645619 0.0842578635 0.142304018 -0.0709979832 -0.0997136533 -0.0118433814 -0.0195060018 -0.0260943621 -0.0851690397 -0.0610457137 0.0346727297 0.038758263 -0.026763279 0.00496497378 -0.0248329956 -0.0392976888 0.0639327541 -0.0116053829 0.0389414802 -0.0765374303 -0.00860751234 0.0580505244 0.058850836 0.0729101896 -0.0168091431 0.0493836068 0.0378085151 -0.0690903589 -0.0735144988 0.0763928369 -0.0370460264 0.0515179113 -0.0130856326 0.0213577785 -0.0343368538 -0.0334647931 -0.0814459473 0.0176412053 0.0479053147 0.00514184404 0.0467168912 -0.114557423 0.0326536633 -0.122538239 0.0797366053 -0.0220797621 -0.0939437151 -0.0356741399 -0.129558548 -0.0728810504 0.0334232114 0.0454140641 0.0641237572 0.0639395788 0.0806245655 -0.0568198524 0.113629669 -0.0269122235 0.0319497921 0.0329489671 -0.0914393291 -0.0630809143 0.0387656465 -0.0879159197 -0.0275259484 0.0394459814 -0.0404246971 0.0497982427 0.0524061657 0.00315544894 0.0564953573 0.126382247 -0.0345166884 0.0698444024 -0.0992106721 -0.0982451588 -0.107451998 -0.0527341142 0.123538248 0.024919359 -0.0131305484 -0.0894226953 -0.115056708 -0.0378506444 0.0621916279 -0.122512206 -0.0171847306 -0.0232702196 -0.103842773 -0.100498989 0.0790945068 0.0964731276 -0.0845638365 -0.0501652695 0.0643050224 -0.0128263319 0.0967387334 0.0712623745 -0.0615100749 0.0906366855 -0.0492125414 0.0326652825 0.126826495 -0.0253421534 -0.0376052111 -0.0545951948 0.0150278658 -0.120675765 0.0287395269 0.0592594892 0.0814532936 0.102257699 -0.00316688977 0.0232737195 -0.0206201728 -0.106220126 0.0464446917 0.0267721917 -0.0964898616 0.000449200714 0.058107879 -0.0119522484 0.119053274 -0.0157440305 -0.0229303446 -0.0538970679 0.0736326724 0.0061632446 0.084165886 0.0724296197 -0.144264609 0.0839208364 0.0139587959 -0.0285230391 0.0120399361 -0.11818894 -0.104979657 0.0247651879 0.0166639592 0.00610556966 -0.0571998879 -0.127792791 -0.0762260929 -0.0623565726 -0.0835529417 -0.0365828983 0.0787034184 0.0618906133 -0.0250335261 -0.0716648474 0.0972160697 0.0754901916 -0.0548195727 -0.0320330486 -0.0405605473 -0.0662167147 -0.0892478526 -0.0222243164 -0.0625544339 -0.0201768409 0.118638895 -0.0409025624 0.0845576376 -0.107005633 0.0590842962 0.034769319 0.00361982756 -0.0245255139 0.0547625758 -0.115161017 -0.0553721972 0.0778890774 -0.041641593 -0.0323726982 0.0310737193 -0.0700510889 0.015053235 0.160222828 0.10772761 0.037959829 -0.0358525217 -0.111562051 0.0241074972 -0.12357261 -0.12483231 -0.0926482156 -0.0271823723 -0.113680638 0.108206771 -0.116128989 0.00913964305 -0.0308017898 0.0424275286 0.122627713 -0.0544019043 -0.132333323 -0.046340026 0.139717042 -0.0182990991 0.068831861 0.0823961869 -0.043938648 -0.038030766 -0.0475954749 -0.0332984775 -0.058541622 -0.0173970181 0.00698842388 -0.00505919755 -0.0264520328 -0.075296253 -0.0592983365 0.0703864917 0.0332557037 -0.083748579 0.0185417943 0.0330124721 0.081373781 -0.0521305203 -0.0828755274 -0.0191443413 -0.0275393687 -0.104940005 -0.0373899266 -0.129006848 -0.0350307822 -0.0594692267 -0.086783044 0.0255106967 -0.112269998 -0.0306032244 0.0352828279 -0.0759110004 0.0436218269 0.0371037386 -0.0105048856 -0.0273989253 0.0408369228 -0.0481947623 -0.0490162857 0.0253327843 -0.0900856555 -0.10654892 0.0296242032 -0.0569313392 0.00557792746 0.0511694998 0.0238375813 -0.0494668148 0.00665589096 -0.0544650368 -0.0221331436 0.122494169 -0.00346396537 -0.03750135 -0.0583185181 -0.0697296709 -0.00882709585 -0.0139009692 -0.0581789836 0.0511611365 -0.0665547177 0.0625669286 0.0951525047 0.0109070055 -0.0445890985 0.0510924347 0.0549049266 0.076408051 0.0454293936 0.00382474251 -0.0298265554 0.00967820082 -0.0501072742 0.0321529955 0.0609736592 0.0241174586 -0.0162564274 -0.0707081556 0.144424051 -0.0120626008 0.0290390942 -0.0303947851 -0.0576401316 -0.0760076717 0.0552736968 0.0407108702 -0.0295744073 -0.097608991 0.0594237335 -0.0941474885 0.0194068011 -0.0656058192 -0.0635675117 0.0444326811 0.0656519532 -0.0508928746 -0.0729632974 0.0443925709 0.0649101809 -0.0789977983 0.0154495686 0.0545210727 0.0447072089 0.126279771 -0.0358525813 0.0973265097 0.0260252561 0.0660820976 0.103842169 0.0981507078 0.0491054058 -0.0901521593 0.00591290556 0.0812497959 0.00227644946 -0.0607588552 0.0970650539 -0.0110606086 0.0776812136 -0.0386007279 0.119623892 0.0970067903 0.0679384917 0.05710252 0.0563185252 0.120259158 -0.0155343693 0.11131572 0.0304788649 0.0158111248 -0.0598068163 0.0854219869 0.0570583344 -0.0570600703 0.0287855826 -0.0342741273 0.036824815 -0.0501024202 -0.0268743541 -0.0634012967 -0.0412885621 0.128790557 0.00406311126 0.042762816 0.00955149718 -0.0193585306 0.0519001707 -0.039887663 -0.0505587868 0.0825586244 0.159575224 -0.039045386 0.0544076897 -0.0779607669 0.0380125828 0.0408898331 -0.0760996863 0.0313064456 -0.0805607736 -0.0574796721 -0.0846826658 0.113500386 -0.0871631727 -0.117222093 0.0810274109 0.051653102 -0.0653802082 -0.00290928991 -0.0630526915 -0.119209491 0.0347142629 0.0174591336 -0.0608103834 -0.0927200988 -0.0334013142 -0.0835639536 0.11650601 0.0233004745 0.0682244599 -0.0430421382 0.028754117 -0.0900809765 0.012021089 0.0624547713 -0.105882496 0.0918491483 -0.0683888867 -0.0233582761 -0.0216962695 -0.0297207572 -0.0362710096 0.0270867273 0.0460449308 0.0642470419 0.0419084579 0.096854955 0.0901691094 0.0210975073 0.0876087993 -0.0793926194 -0.0800184235 0.0771968812 0.00599401817 0.0235112216 -0.124454454 -0.0479293279 0.0157109667 0.0773659572 0.00740274787 0.0359256268 0.0233147927 0.0827361718 0.0162823889 0.0475280918 0.0248643626 -0.0826426297 0.0601135641 0.119723722 -0.0729700252 -0.115519248 0.0721688643 0.105871052 -0.0132921757 0.08133322 -0.0257968605 -0.0480753519 0.0262065995 0.0138876187 0.0765962973 -0.0963439941 0.0358153284 -0.0679376945 -0.0568544529 -0.0514913723 -0.0568194948 -0.0500201248 0.040343143 -0.0777348354 0.0369281098 0.0772028044 0.00108942436 -0.00182866259 -0.0662001669 0.0198386908 0.045538079 0.0673875585 0.0710163489 0.0381334536 -0.0855338573 0.0286879074 0.151627332 -0.0448625647 0.0633926764 0.0938242897 0.0526457354 0.0111530349 0.097349681 0.0770600736 0.0608735308 0.0969019234 -0.0362519659 0.00380012137 -0.0779155269 0.00992168486 0.0114733698 -0.000200923663 -0.0576153658 0.083698988 -0.147824839 0.0438186601 0.0710815191 0.028712105 0.04132507 -0.0103248488 -0.000444060774 -0.0523407757 -0.0500133485 -0.0338968448 0.0263319649 -0.04361872 -0.0277711656 -0.0168558471 0.0467232168 0.0177872274 0.0654023588 0.0379033573 -0.0607685857 0.00964797754 -0.120628364 0.0780472904 -0.0843662843 0.030455105 0.118488185 0.0485005565 0.0704616383 0.100928433 0.0408400409 -0.0665229484 -0.00970364176 -0.0212464705 0.0161203556 0.147231802 -0.0107072778 -0.0776233077 -0.0784361213 -0.0594038591 0.0418931209 -0.000451631407 0.112979718 -0.0695450008 -0.0122077055 0.00301642111 0.0691217184 0.0310368631 0.0258781705 -0.0280865338 0.0467126593 0.00575158047 0.00231607817 -0.0486097038 -0.119848073 0.0345518552 -0.043744415 0.0102821859 -0.0540690646 0.138281018 -0.00140106888 -0.0396243855 -0.0496274233 -0.0120704891 -0.0222738367 -0.0162870642 0.014834349 0.00513116037 -0.0692124441 -0.0776496753 -0.120830476 0.0161017329 -0.0370132066 -0.0627025366 -0.108544096 -0.0967517719 -0.0203821119 0.0064773499 -0.13926363 -0.0478015207 -0.00240246905 0.0692687705 -0.0158551876 -0.0689087138 0.104410864 0.0010006387 0.0992425233 0.0829349607 0.0481005087 -0.0365891643 -0.150370061 0.0162225049 -0.0743764937 0.0161576103 -0.0280919597 0.0834880769 0.0755537376 -0.0628848672 0.0377435535 0.0462861024 0.0946150273 0.037716087 0.0241654012 -0.0828214586 0.0492958799 -0.0326795466 -0.0770705715 -0.140277237 -0.0159451012 -0.109213002 0.0240502506 0.0428965315 0.0193452798 0.0742191896 0.0848924294 -0.0308462307 0.0752010345 -0.0391890407 -0.00412439182 0.0942446142 0.00469598826 -0.0910378024 0.000205826393 -0.0885702595 -0.10040123 -0.0408986025 -0.0709256157 0.0822851807 -0.0281461775 -0.0398267582 0.0393168256 -0.112278141 -0.0479413383 -0.0573235005 -0.0405642055 0.122419089 0.0270299502 0.00841662008 -0.0121508595 -0.0198617335 -0.000879280851 0.00153166568 0.0554407202 0.00480276532 -0.0682227761 -0.0328659527 -0.0611312203 -0.142565057 -0.00181693793 0.0861305967 0.0628089532 -0.0740653574 -0.0160195958 0.0974677652 -0.101770975 0.0444948077 0.0841920972 -0.0720307156 -0.00300135929 -0.069788307 -0.0935182795 -0.0931349918 -0.10534066 0.00375205046 0.0325018056 0.0886859596 0.0366864353 -0.0217052139 0.0339041911 -0.0782768726 0.00892924238 -0.0385166742 0.0129920086 0.0826299712 -0.044218149 -0.0138300406 0.00623118551 0.0313761942 0.126124471 0.0703845546 0.0405096114 0.0779507384 0.056386482 0.0744233653 0.0530518629 0.0425919332 -0.0413660035 0.0031353673 0.0289649554 0.0164382402 -0.011526701 0.0496848971 0.119253859 -0.111141384 -0.036666058 -0.0288353041 -0.00853391178 -0.0851149112 -0.0335021652 -0.0633720756 -0.0867637545 0.0132676009 0.0459979139 0.0367925242 -0.0876119509 -0.0333126523 -0.0919199139 0.0108301183 0.117218599 0.107971512 0.0295869391 0.0267735161 -0.0267074816 0.0445350818 0.111544669 0.0392179638 0.0937596411 -0.00281381886 0.0201951191 -0.0501904786 0.0335953049 -0.0029760797 -0.0398072712 -0.0350824408 0.0377969109 -0.0179890636 0.0425337292 0.0354793109 0.0247561317 -0.0294101313 -0.0284981932 0.028481964 -0.0429043695 -0.053075958 -0.126678079 -0.085693188 0.0219913088 -0.103943169 -0.0727316067 -0.0124961985 -0.126258418 -0.0539888591 0.0462416373 0.0975957662 -0.00795640703 -0.021866478 -0.0801899433 0.0211574696 -0.00211753859 -0.0770760551 0.106369033 0.0529551283 0.00306034461 -0.153457433 0.0278290957 -0.0596790686 -0.04930925 0.072257936 -0.067070365 -0.0302931052 -0.115538754 0.138532385 0.0228340477 0.154491559 -0.0212814286 0.000630576746 -0.0118962303 0.0624279119 0.0366596803 -0.104068108 -0.0649591386 0.0323766321 0.0730391443 0.0661889538 -0.0391814969 0.0793058872 -0.0105679389 -0.0682836398 -0.0303012673 0.122320741 -0.00334193907 -0.107540131 0.0638230518 -0.119354151 0.108916059 0.0184885114 -0.0397466794 -0.074820742 0.0587450974 0.111003347 -0.0857884958 0.0496451035 -0.175095469 -0.0324501954 0.0114386939 -0.083123289 -0.0846996307 -0.0342796296 -0.0571025424 0.126033574 0.0537063144 0.0963928178 -0.0292254034 0.0303790402 0.0882642195 0.0210448559 0.0844297558 0.0784011334 -0.067198731 0.0598029867 -0.115000091 -0.0233332999 0.11517673 0.0587579273 0.0726984292 -0.0598884225 -0.0596332885 0.10184852 -0.00164783257 0.0644008815 -0.0344729498 0.00754436292 -0.133005932 0.0759031922 0.00654394785 -0.00963001978 -0.172101706 0.0105558978 0.176982358 -0.0497983247 -0.0037973139 -0.0655243993 0.0364305004 0.0520126633 0.016448427 -0.00652270019 0.068141371 0.0190387368 -0.0741908997 -0.0276863649 -0.0545798913 0.0041190316 0.0235041492 0.0122208726 -0.0720304623 0.0136137297 -0.0269483216 -tensor_10bias 50 -0.12787357 0.017543152 0.122975975 0.0730041191 0.0510178655 -0.00993559696 0.139933825 0.15092434 0.0684130192 -0.0333705768 -0.184260622 -0.13440612 0.109378524 0.111376524 -0.10483826 -0.0250708181 0.120549299 0.0411001481 0.183845177 0.135748357 -0.00771392835 -0.12025056 0.085442692 -0.0513125733 0.136845529 -0.0145230526 -0.0895486251 -0.0252410602 -0.00896273553 0.0933182612 -0.108676046 -0.104239464 0.170086652 -0.0341263078 0.0728005916 -0.0453254506 -0.100045033 -0.110129185 -0.00771265198 -0.119152002 0.1214706 0.101130307 0.0332861841 0.0142126186 -0.010599345 0.109234303 -0.0182705577 0.177162722 0.0691059828 -0.0739419758 -tensor_2bias 50 --0.0447338857 0.0537877791 0.0785957575 -0.0634338111 0.153481558 0.148676842 0.0265698414 -0.0261984505 -0.0751923025 -0.0352455713 0.0932889804 0.113871664 -0.0193461645 0.175267622 -0.0770687833 0.157511786 0.0196232703 -0.0737266392 0.0872744098 0.116388358 0.168398216 0.0425802097 -0.102230035 0.0693789497 -0.0855393335 0.126388997 0.0205914602 0.140580684 -0.00234525092 -0.0295791756 0.0197821874 0.0661892593 0.166472748 0.149337456 0.0513125136 0.00068877294 -0.0757507607 -0.0540507101 0.134943455 0.0256511811 -0.0943378955 -0.0261238459 0.0309584048 0.111188456 0.169084176 0.136096522 0.0985386074 0.0480017625 -0.0471420884 0.122215845 -tensor_6weight 2500 -0.0649253875 0.129901871 -0.0820776671 -0.0164463595 -0.0272229239 0.0591965616 -0.118314907 -0.037768431 0.0372078121 -0.105141595 -0.140254259 0.0649844706 -0.112917937 0.141195908 0.140458569 0.0553426445 0.0367731303 -0.0505450144 0.0507215112 0.114758804 0.115806922 -0.0424669459 0.0370975286 -0.14095898 -0.104349688 -0.007835567 -0.0608764365 -0.0330444127 -0.12756449 -0.104601666 -0.0191679522 0.00627362728 -0.0662557259 0.0937368721 -0.101459384 -0.0692796931 -0.0512177646 -0.126805127 0.0393478721 0.0119376034 -0.0574386194 0.100259379 -0.10315454 0.109866068 -0.02667135 0.130284503 -0.127174616 -0.0201597661 0.0414076746 -0.122587755 0.126039341 -0.115497321 -0.126209974 -0.00932627916 0.0310982913 0.0501976013 -0.0105512738 -0.117707536 -0.116891071 0.117860749 0.0559653193 0.0531298667 -0.0543823317 0.106951609 0.0151336193 -0.0444077402 -0.112000868 0.0114103854 0.0838644654 -0.012747705 -0.0791340023 -0.0889710411 -0.0655299723 -0.0225159228 -0.00320497155 -0.0662335902 -0.0993035883 0.137778953 0.105412766 -0.116872713 0.0578503758 0.0725949556 0.0382958353 -0.0512723327 -0.00722907484 0.0786679238 -0.116880074 -0.0138037503 -0.0500161424 -0.133497417 0.0958063304 0.0558829457 0.0326671302 -0.0238390192 0.0845869035 -0.0934950113 -0.0433793738 0.0942181498 -0.045510605 0.0947668105 -0.106258683 0.0446187519 -0.0900780708 -0.0834366232 0.191142887 -0.100739747 0.171907842 -0.0254000407 0.138836846 -0.0700232163 0.114825904 -0.143776864 -0.0321323685 0.0355321914 -0.178224027 0.119957708 -0.0752720386 0.127894193 0.164032444 0.065395847 -0.063121289 -0.0970638469 0.102740057 0.0505844206 0.0253012329 0.0821145922 0.180317059 0.136325151 -0.103746325 0.126737922 -0.0877246112 -0.0697940513 0.0607301034 0.0686804578 -0.0175086763 0.0285665393 0.147603065 -0.159169093 -0.058806546 0.101134196 -0.0185775906 -0.113093227 0.0278050229 -0.0363715962 0.123531096 0.105049185 0.0325903893 0.101475507 0.175050184 0.0439927392 -0.0129783954 -0.103368133 -0.094232142 -0.133218303 -0.10637027 -0.126878336 0.100644603 -0.0823836327 -0.0993345156 -0.0921484306 -0.00233977009 0.0756816864 -0.0497992188 0.044235874 -0.100462228 0.0119998753 -0.0844490379 -0.0331858918 -0.0446389243 0.042482052 -0.126429394 -0.105036467 0.0468023382 -0.0696351752 0.0628612116 0.0562251285 -0.0864542499 -0.0504873767 -0.057342425 0.107809477 0.103574097 0.0706402957 0.0782148615 -0.112125456 0.0768203884 0.0012682596 -0.124097727 0.114557318 -0.0111420928 0.0438492894 -0.0157870948 -0.129962921 0.115011618 0.0792783797 0.0613046065 -0.000343024731 -0.0795636103 -0.0708794519 -0.0101428293 0.0629758537 -0.0162976906 0.111654803 -0.134260848 0.00456416048 -0.129808471 -0.0437678993 -0.0731499866 -0.156290948 0.176469311 -0.134536281 -0.0936101675 0.094726339 0.129458129 -0.00281856535 -0.0142846275 0.00348282605 0.129408911 0.125073373 0.153636366 -0.0143775577 -0.013238579 -0.0172810107 0.0421338268 0.116808861 0.0514435619 0.13204819 0.0942413136 -0.012623366 -0.0874075145 -0.0010379689 -0.162753403 -0.0148045626 -0.0110199554 -0.0829107389 -0.0709493682 0.162264898 -0.0466960482 0.115680397 -0.0569904298 0.0977253392 -0.0407817513 0.163954467 -0.0335706919 0.145685494 0.122499764 -0.0530293435 0.160302415 0.00654218439 0.0903446525 -0.0116685461 0.0239315517 -0.0313074701 -0.102479123 0.0804489553 0.0174044427 0.0801673904 -0.0707507953 -0.0458744019 0.0368017294 -0.158817649 0.0533084273 0.0464035608 -0.0136327893 -0.026964413 -0.0722962692 -0.0277424678 0.193694353 -0.00919557363 -0.0336900316 -0.00418696925 -0.0529568717 -0.00187929883 -0.00698451232 -0.0436371192 0.0323710404 -0.019839149 -0.0511180982 -0.110972911 -0.0133787924 -0.00690555479 0.104938939 -0.038326323 0.0560517721 0.138403684 0.143514618 0.199766785 0.14532347 0.0941502005 0.0855569765 0.0256890338 0.0689958632 -0.0572427884 -0.00418164022 0.0580582805 0.150297597 -0.122072354 0.176015973 -0.120600596 0.119270205 0.106842689 0.108840823 -0.0772350207 0.128743961 -0.0015650976 0.0175431371 0.053713128 -0.117410287 0.0328807086 0.0287136007 -0.104569376 0.0721085593 0.0677165911 -0.0558042675 -0.0673747733 0.115988277 -0.122426286 0.0186466724 0.101494573 -0.029576974 -0.115950264 -0.0865741 0.0563799553 0.107808612 -0.0450687222 0.0710128173 -0.0514423363 0.0430348404 -0.0574421734 0.0800841525 0.0757694393 0.10702318 -0.0222116411 -0.0559151433 0.0379136428 -0.0136397472 -0.125272736 -0.12881507 0.0900285095 0.0889691934 0.121225074 0.0771746784 -0.0660418868 -0.044440113 -0.122758932 -0.109487474 -0.0582289658 -0.104467168 -0.00918032415 -0.0209672842 -0.0869374499 0.168161795 0.108111799 -0.0880761966 -0.0135405827 0.178589284 0.003923479 0.0852129236 0.161241695 0.00760242762 0.06472487 0.0908324644 -0.109867044 0.13713856 -0.0345446207 -0.144439176 0.0468028821 0.136207759 0.122578613 0.0340208001 -0.105203725 0.0250524748 0.106788099 -0.121437281 0.181704462 0.11812605 0.0816245601 -0.101409554 0.158797711 -0.0405994244 -0.106579058 0.0417435579 -0.0245459247 -0.00784720015 0.0369141363 -0.070102796 -0.0140520735 0.180028707 -0.0340496227 0.0966045856 -0.0815079585 -0.0375775248 -0.173401833 -0.0957172289 -0.189357907 0.0151246237 0.0324664675 -0.0768369883 -0.106799647 -0.0305638388 0.0201060958 -0.053941071 -0.0226951279 -0.0301792286 -0.0753694102 0.106556229 0.00628629327 0.0264616497 0.115733989 0.0310344063 -0.0524785519 0.0871863812 0.1207719 -0.0298178941 0.152269572 -0.13239485 -0.0819777101 0.0469505712 -0.0912657976 -0.111869723 -0.0653776079 0.10464593 -0.0256920718 0.12280155 -0.143135741 -0.00778760947 0.03016074 0.0972794741 -0.0641395524 -0.0162782986 0.0504767261 0.0761293843 -0.0471233875 -0.0866800919 0.0426621437 0.0164198168 0.111198299 -0.150085554 0.0248084236 -0.0389914848 -0.0365719572 -0.138500616 -0.0784377009 -0.107607454 0.0207631979 0.0907824636 -0.0914271027 0.0534422696 -0.112685621 0.0665683895 -0.0469377451 0.0247338824 -0.0177221745 0.118170217 0.113025144 0.0234410614 0.104385503 0.0654341355 -0.10872592 0.128927425 0.196715385 0.0276464783 -0.0738130882 -0.081564039 -0.00269559864 -0.126407489 0.015476441 -0.045586586 0.0332736522 -0.0798867643 0.135938272 -0.162508756 0.0983785167 -0.0764289424 -0.0560759567 0.0814144537 -0.031941954 -0.121607453 -0.0935366377 -0.0972638801 -0.0318852663 0.134761959 0.00468478957 0.0771510676 0.0787510574 0.164012611 -0.0312081948 -0.0129511952 0.0929201245 0.128727853 -0.00758869387 0.0151306689 0.0861001238 0.106875338 0.0643666014 0.153492779 0.0107787019 0.0601070002 -0.0477736481 -0.131303728 0.00165647722 -0.159763634 0.0611100607 -0.0269413907 0.0301383473 0.118319333 -0.114341162 -0.143750668 -0.106911905 -0.0885151848 0.160572648 -0.0470729731 0.0245884079 -0.0456172712 0.0757794902 0.0562509894 0.0297678653 0.0527246483 0.0166134071 -0.108542152 0.142919838 0.127158552 -0.0228688288 0.00676658237 -0.03869633 -0.0931294337 -0.00328914542 -0.0614178069 -0.0198070854 0.145518914 -0.0294807851 0.0692162439 0.15985842 0.0560066774 -0.0942831039 0.0402628109 -0.118215956 -0.116073422 0.0202833321 0.117826007 0.122413464 -0.0271829292 0.0389408059 0.0934228823 0.0398765206 -0.00495207263 0.0981794819 0.115069546 0.0594924539 0.0624140352 0.0753316805 0.0130726891 0.00351743586 -0.118038118 0.133946255 -0.0532785915 -0.111061007 -0.0136450082 0.0968498662 0.133393183 0.149615765 -0.126794592 -0.107227415 0.167891011 -0.0144322244 -0.181450546 0.0244579148 -0.0923274755 0.157411754 0.050326366 0.143469214 0.00917230081 -0.0694648325 -0.0583085977 0.100404061 -0.0703162327 -0.132603139 0.0277496353 0.182791844 0.0298265126 -0.14978756 -0.0095058633 0.177655354 -0.0389893278 -0.0960298106 0.055750493 -0.0944034085 0.175231501 -0.151938185 0.0563026294 -0.126313433 -0.137585253 -0.11282815 0.0335017443 -0.016390631 0.0258972906 0.149925053 -0.0161783621 0.132413134 -0.129700065 -0.0751069337 -0.0137014491 -0.126565307 -0.0802877396 -0.127848729 -0.0448123366 0.00571359694 -0.0442490689 -0.0026283646 -0.0133119607 -0.117010497 -0.032991223 -0.0752329901 -0.0423538461 0.0337411128 -0.101852775 0.102702036 -0.113081135 0.128210023 0.0527718291 0.0711361766 0.046200335 0.112589262 -0.0602141693 -0.124360792 -0.049823273 -0.140881091 0.116494343 -0.137485832 0.0550901145 -0.0324928425 -0.101916127 -0.0462415516 0.0865442455 -0.119312339 0.0382132456 -0.0243112519 0.101194464 -0.10621307 -0.0587359108 0.107364364 -0.0826650411 0.112274796 0.0253867805 0.0701454431 -0.043696586 -0.0748712718 -0.0725907981 0.0644025356 0.0884814113 0.0663292259 -0.129587308 -0.0319217071 0.0338242948 0.115189984 0.0245237201 0.0201187134 -0.0739658847 -0.0454444066 -0.0267900527 0.0743228644 -0.134670675 0.0116872936 0.123525247 0.00718687475 0.139177337 -0.0978305936 0.0739517361 -0.0291812122 0.0807204247 -0.140549108 0.00840865076 -0.0133223087 -0.0685992762 -0.0170855597 0.060691461 0.0238291025 -0.141307816 0.0849160701 0.0482466817 -0.0244439244 0.0211740434 0.0507029444 -0.069623448 -0.0391115323 -0.045335494 0.105534464 -0.0210918859 0.0410889536 -0.119236276 -0.0102088749 0.0296808928 -0.111803085 0.0251688212 -0.0522222742 -0.134250998 0.112514332 -0.0292918608 0.114655808 -0.115933761 -0.0447240621 -0.0562940501 0.115107387 -0.0417959876 -0.0358452164 0.128562316 0.123079613 0.0867616385 0.0504442304 0.085063085 -0.0750186294 -0.0415927172 0.0159885045 0.0309951119 0.0242125411 0.0228883941 0.128811404 -0.0658345073 0.0893866047 -0.0262501985 -0.0197901707 0.0398271419 -0.00843849033 0.0776178464 0.0806626081 0.168270662 0.015441413 0.0647286773 -0.0716274977 0.0758225247 0.114696413 0.142221808 0.103615619 0.0212591253 0.140274763 0.00738972286 0.159614474 0.11493472 -0.0833858475 0.0133725926 0.0502345115 0.138931051 -0.0143997408 -0.135814145 -0.0122304466 0.157529533 -0.150415257 -0.0632499009 -0.0106943063 -0.0938702598 0.163158879 0.13341108 0.106037788 0.1496768 0.177437797 0.087329708 0.137258947 -0.0137388939 0.0762795284 0.0370195433 -0.0747531578 -0.092746526 0.0398157351 0.0443542562 0.0983223766 -0.00542128552 0.0799729377 0.168658942 0.125600606 0.150951058 0.117274851 0.0738498569 -0.0982450694 -0.011585433 -0.00457595475 -0.0337975733 -0.0616223812 0.0883765817 0.146805629 0.0442404337 -0.101139419 -0.059554819 0.0444233194 0.0295815617 0.0203016624 0.0470338352 0.00290740328 0.0758937672 0.0288642086 -0.0832545534 0.0548138246 -0.00573976338 0.0907851085 0.0382896215 -0.137567922 -0.0848902464 -0.0355325341 0.0280306078 0.0849616677 -0.0109465634 -0.0933749229 -0.0489923954 0.131554142 -0.0105491728 -0.0911042765 0.0896382779 0.107579067 -0.029194802 -0.118035324 -0.0691957697 0.0260686129 -0.117240146 0.0314605832 0.10417594 0.0173794031 -0.10924159 0.00410650671 0.12347053 -0.021081768 -0.0583038926 -0.076368995 -0.0559989214 -0.12317574 0.126255885 0.124372408 -0.139102474 -0.127438575 -0.0832829475 -0.0507567972 -0.0409637913 0.0168262422 -0.109306589 0.0518526733 0.0749200583 0.00206166506 0.0649633855 -0.0586098135 -0.00433701277 -0.140350699 0.0938716233 -0.089609772 -0.0619740263 -0.0610454977 0.0776864439 -0.0440377593 -0.0523070544 0.136881992 0.111145541 0.0935858637 -0.130629882 0.0228392035 0.0660683215 0.0564527586 -0.0145275388 -0.056871783 0.140726104 0.0382112935 0.0346260034 -0.0959678069 0.145820111 -0.0788428187 0.130337492 0.106305443 0.186199993 -0.0118903993 0.114453636 0.0458821617 -0.0491925776 0.0321561061 0.0618102029 -0.16807498 0.146204278 -0.0881870687 -0.169820085 0.0581149757 -0.0209829025 0.000727858045 0.0668258667 0.0809662268 0.0593013167 -0.154004052 -0.0266895164 0.131010324 0.0933532268 0.136942223 0.0960304737 0.127566546 0.128763124 -0.129231334 0.0490520634 0.0179415178 0.035261184 -0.179191247 0.134654313 -0.191801935 -0.076531738 0.0557464883 -0.0514609776 0.030970484 -0.0304086115 -0.058471296 -0.107087307 -0.0737263411 0.0960866362 0.0616026595 0.00334342872 0.0160897672 0.115088649 -0.129959434 -0.0453715175 0.106996052 0.0485980026 -0.0609982088 0.0606777444 0.0854022726 -0.0109910937 0.0280183572 -0.106572933 -0.00772281922 -0.0217049569 0.142191678 0.078674458 0.068385914 -0.0397756584 -0.0448649749 0.0790037736 -0.0683723092 -0.134903669 0.0462144762 -0.0944194347 0.14962922 0.0367264152 -0.075939849 0.151242435 -0.0653834939 0.0671074167 0.0147493538 0.13696453 -0.0275645163 -0.0429917164 -0.0180217978 0.0253212303 -0.0417146906 0.0207910389 -0.0281672105 0.130631521 -0.109785154 0.0733767524 -0.109265648 -0.0798736662 0.0224359911 0.208666578 -0.0645421147 0.0355885737 -0.073725976 -0.0510966443 -0.0937370732 0.173772439 0.0993817151 0.00306298863 -0.195579961 0.052579727 0.127555981 0.0955225378 0.0206778944 0.0144746751 0.130441144 0.0313935652 0.00892100483 0.080054298 -0.128953949 0.0751526803 -0.0949046835 -0.153239205 -0.0463347062 0.016422227 0.0674657375 -0.0140186697 0.064172186 0.202651188 -0.165430844 0.0656619221 -0.0430362485 -0.197136238 -0.0389609933 -0.12942259 0.0315187573 0.0998861641 0.0155031411 0.0358207226 0.168374822 0.0940297097 0.0293072574 -0.0722433701 -0.0128252115 -0.0433789827 0.059830334 0.167342469 0.05525738 0.00795800146 0.177529857 0.0210485943 0.047749389 -0.0363491178 0.168270051 -0.100355022 0.0292338673 0.175140589 -0.127292693 0.162490025 0.0100361016 0.154595226 0.0616088361 0.136025682 -0.00410753815 0.0369135141 -0.143811956 0.0958657786 0.144568652 -0.00905292854 0.130941108 0.0106995432 0.0483372957 -0.0231650397 0.036639642 -0.0617889985 0.0236214604 0.0238810871 -0.0795606971 -0.110024542 0.174338296 -0.0911057219 0.0656976923 0.0863363743 0.0683924854 0.134093165 0.145337448 0.116067648 -0.0847840905 -0.0767683238 -0.0150442421 -0.0229843333 0.0828322992 0.0535647161 0.0319587328 0.068530798 -0.0646711886 0.197244614 0.0427581631 0.0388010144 0.162918717 0.136511028 0.0195802618 -0.0968718901 0.167434052 -0.0834559351 0.0702522248 0.163521126 0.110413931 0.161692828 -0.0881290808 0.148896158 -0.128931329 0.0255813021 -0.0889823139 0.157743439 -0.0732447058 -0.0442789234 0.0533142164 0.133719116 -0.116840921 0.0800347999 0.189877659 -0.135516554 -0.0575624406 -0.0097662257 0.119637571 -0.074548699 -0.0714714378 0.126038283 0.1195461 -0.09768942 0.0303867999 -0.123445861 -0.0530549176 0.107548378 0.106309928 -0.0313007124 0.183906198 0.0751518011 -0.0633003265 -0.0617225319 -0.0701497793 0.0320757441 -0.0290392973 -0.0253149793 -0.0470200963 -0.0478345975 -0.120073162 0.201239541 0.142304704 0.0925019607 0.148831651 -0.167674646 0.123002127 0.106455177 0.0328564122 0.18806994 -0.116831504 -0.00451909332 0.108785309 0.157465339 -0.00134878256 0.168126434 0.0580710471 0.0837541148 -0.0657100528 0.158608526 -0.0463683493 0.0946896747 -0.104266793 0.0244341511 -0.0714015439 -0.0990499556 -0.0860033333 0.145062909 -0.0333383344 0.142448917 -0.00225598761 -0.0131941633 -0.149845496 0.00207266607 0.0925255567 -0.182044104 0.00203921902 0.178830191 0.135419115 0.127062351 0.119150542 0.120787822 0.0427289233 -0.102054872 0.0916266441 -0.0503866151 -0.0314327143 0.113203667 -0.14366518 0.12766479 0.0501433946 -0.0380674638 0.132927895 0.147104084 0.129884318 0.0988519117 0.0387863517 0.0734434873 0.0411540642 -0.027659202 -0.13669847 0.083362028 -0.0450929962 0.145056829 0.0885054395 -0.0165824685 -0.0861969367 -0.0862592608 -0.160450995 0.0212117564 -0.104402281 0.143013418 -0.0506607853 0.121090904 0.00905802753 0.111442901 -0.143552661 0.0210310649 0.0612097643 0.00359729188 0.0227075666 -0.0815051943 0.155096367 -0.0119450046 -0.0233580228 -0.0038536794 -0.0880303755 0.164003551 0.1600402 -0.016360864 -0.0836358368 0.0851199031 0.0105815725 -0.121088877 0.161806434 -0.0379569791 0.0800513998 -0.0538180247 0.153429583 -0.0247538723 -0.00772412121 0.120341845 0.0548929647 0.114107296 0.0127800889 -0.0710391551 0.134522244 -0.0879234001 -0.0632987469 -0.0650375783 0.0809550807 0.137545347 0.0396288 0.186278701 0.110111617 0.143173963 -0.176478416 0.160997689 0.0144827925 0.0872319192 -0.0407468043 0.114270978 0.0436847992 0.0258595552 -0.0514572188 -0.0362136886 0.130494818 0.126685143 -0.0894779786 0.117681846 0.173565581 0.174748227 -0.15385066 0.149053425 0.160555586 0.0397729799 0.156005859 0.110312633 0.104156397 0.161141351 -0.0919019654 0.015511048 0.0107473964 -0.0837544352 -0.0176889747 -0.10078945 -0.0619383864 0.160746276 -0.087044619 -0.0232165866 -0.0215495545 0.0582484603 0.0864141285 0.175924376 0.0442700647 -0.0247930624 0.0347629003 -0.161288068 -0.0290379301 0.170908287 -0.117735907 0.110525407 -0.115487754 -0.000686930609 0.130876914 -0.0291782003 -0.192795917 0.127867773 0.126315489 -0.07262256 -0.098871097 0.0209841039 -0.19527556 0.116880774 -0.02486692 -0.00237640645 0.143660888 -0.016016813 -0.0697216764 0.175688595 0.0232482143 0.0199046992 -0.103963897 -0.0378533229 0.0388961881 0.00533542689 0.0628525913 0.159435913 -0.0747304037 0.0978682712 0.164278746 0.077385895 0.109259471 -0.0799139515 -0.0421864092 -0.0443351157 -0.133975893 0.0834283531 0.093928501 0.00520775095 -0.0434011891 -0.0435828492 0.138147533 0.106794529 0.093232654 -0.077764377 0.16267027 0.051492583 -0.0966648981 -0.0458262265 -0.0408286341 0.0238162875 -0.00872587226 0.153415054 -0.0966666192 -0.0194769856 0.151141167 -0.132202849 0.17568706 0.0875745118 -0.00695692096 0.0846608803 0.0842222869 -0.00846964866 -0.133651823 0.0813971162 0.0544089861 0.101662867 -0.166373864 -0.112454981 0.137616843 0.140390456 0.0915882215 0.10989771 -0.0496877804 0.154562473 0.0789823458 0.0279520545 -0.0192710813 0.025512537 -0.00114545715 0.0528355576 -0.0804974213 0.130488142 -0.0450717099 0.00189470535 -0.126931518 0.00184863445 0.0691755414 0.0959887952 -0.00365662854 -0.0239975173 -0.000226317745 0.162838191 0.110088706 0.103135742 -0.0143095907 0.0685937479 0.039006602 0.181053951 0.0662889108 0.142534971 -0.0225376673 -0.0523421951 -0.0925690904 -0.00610838691 -0.0569295287 -0.0691444948 -0.0351942256 -0.0200236402 0.0384809263 -0.00329685421 -0.15174298 0.1632265 -0.191212401 -0.169024199 -0.093971774 0.115878142 0.0936368257 -0.0726782456 -0.0567203537 0.127668455 0.0460995883 -0.0191945117 0.18582131 -0.171271384 0.0437021852 0.062035732 0.0159470849 0.0150196124 0.00918887649 -0.0672063157 0.0613921694 0.0558371395 -0.172685817 0.0529843457 -0.179647043 -0.00943551958 -0.0415023826 -0.0244376082 -0.0472054332 0.153094694 0.143580258 0.0942730904 0.156098858 -0.00754955551 0.0512687974 0.138893977 0.0646209419 0.00226254459 0.133554146 0.0259827524 0.110805348 0.0725759491 -0.131094366 0.12708883 0.0314303264 -0.0524304323 -0.032248389 0.163754046 0.0906126276 0.00314503536 0.103355683 -0.022527555 -0.1250837 -0.143783137 0.0596455783 0.0511251315 -0.0954806134 0.17346862 -0.00509193866 -0.0772540048 -0.0803210288 0.173364595 0.167615995 -0.129515707 0.0145245409 0.0466810837 0.0946052521 -0.0887519196 -0.0918630585 0.154023126 0.182059482 0.122924969 -0.0969166085 0.0428368933 -0.0473706648 0.0871873423 0.0173784196 -0.0468124636 0.130918413 0.115169801 0.106101029 0.0267140083 0.171541661 0.117503718 0.0674298778 0.0793930814 -0.0995452777 0.0986198336 -0.0477845483 -0.0891349018 -0.110497288 0.149275228 0.0541292913 -0.0509323142 0.00657417579 -0.00849667098 0.0782996938 -0.000425429258 0.0927700475 0.0596327335 -0.0792194828 0.048249729 -0.125496924 -0.119564533 0.0140337572 0.154170945 -0.175600752 0.0509903021 0.0491141193 0.151463166 0.0498116091 -0.0577821173 0.0124854716 0.0519152619 0.000966675114 -0.0199240129 -0.0589309931 0.000340196391 -0.0851683021 0.0118466569 0.109990321 -0.0261993259 -0.0374022834 -0.0214411858 -0.077557683 0.0687204972 0.0663195103 -0.0442392081 0.0338341743 0.13567619 0.180690661 0.19239752 0.107011527 -0.0798124969 0.0309492871 0.0260094907 -0.125474811 0.0975558758 -0.171736181 0.121255443 -0.0812420845 0.174648881 0.0337508172 -0.0655879006 0.168462068 -0.123068273 -0.14526248 0.1509289 0.149049625 0.0172713008 -0.0775876939 0.125850379 0.0576170236 0.0959700122 -0.0350637622 -0.0413426161 0.198388338 0.06012512 -0.18112573 0.0456633084 0.123411685 -0.135381892 0.0592928678 0.0492700674 0.192084178 0.00668479549 0.00347893289 -0.0798124969 0.160300285 -0.0158643834 -0.097056821 -0.00595153868 0.193585619 0.129847378 -0.0445784479 0.154722676 0.0128285876 0.114035919 0.0366068296 0.0581881292 0.0526427999 -0.0453962088 -0.0249866024 0.147942722 0.048362948 0.0718180016 -0.129099786 0.0695572644 -0.040164955 0.151449472 -0.107218184 0.0813755468 0.0611639321 0.120362371 0.00170552568 0.0150107937 0.0923141465 0.179166928 -0.0595131889 0.0748501047 0.024664795 -0.072850123 0.0498956218 -0.118837982 0.11913538 0.1241147 -0.0298356991 -0.0732461885 0.137327462 0.150715679 -0.144629672 0.0296867546 0.0185879748 -0.158391654 -0.0696423948 -0.0815559775 0.120456815 0.174756512 -0.0714245588 0.100912079 0.109141059 -0.0181489885 -0.189933077 0.0589498132 -0.146864727 0.0246144049 -0.0326956324 0.0814622864 0.044614289 0.0344069004 -0.0722796917 0.0347998254 0.00988415256 0.074375473 0.0236355383 0.186613545 -0.0229948368 -0.0627373829 0.058446534 0.0801035017 -0.0250811949 0.0163063705 -0.0360587984 -0.0412077829 -0.131415576 -0.14121896 0.183651194 0.0538982339 -0.0825245678 0.0530949496 0.129799366 0.077988103 -0.163070917 0.131275401 0.115696557 0.0255096387 -0.0695977584 0.149488509 0.110933349 0.0595859699 -0.105136663 0.139630318 0.13104403 0.140138745 -0.101875864 0.0968326181 -0.0490331948 0.0320329145 0.0932519361 0.111740142 -0.0153519753 -0.0669102296 0.0104083447 -0.0649985299 -0.154267743 -0.0946161672 0.139226034 0.107407138 -0.0765753686 -0.0474209748 -0.111844584 -0.0410924852 0.00278180838 0.108596429 0.014437899 -0.120850071 0.101168439 0.0475970656 0.110533401 -0.0760123357 -0.0803952068 -0.0215338543 0.109282747 0.0477782488 0.0887209475 -0.0328624696 -0.0277395248 -0.131564692 0.0674616843 -0.0144642591 0.13782452 -0.0827166885 -0.0459428355 -0.0465939641 -0.0978194177 0.137472615 -0.0644845441 -0.093579635 -0.079621926 0.0540327132 -0.0426073149 -0.0682768524 0.140229478 -0.0689926222 -0.116822943 -0.0883634388 -0.0420724526 0.0797011107 0.134867147 -0.0124301612 -0.0311987475 0.076223284 0.0785176903 -0.0510006249 -0.08932513 0.0967391878 -0.136143774 0.124550089 -0.119794972 -0.106707312 0.0434878916 -0.000768460974 0.083400093 0.123351663 0.154955849 0.0239652898 -0.00470558135 0.0155227007 -0.155885831 -0.0280565098 0.128090873 -0.0347218178 0.0469225496 0.066305764 -0.0798357874 0.0677081048 -0.153243482 0.0412665345 0.15851365 0.0430604853 -0.0530885011 0.131436363 -0.0623488314 0.0265644994 -0.130693406 -0.0925032496 0.167786196 -0.00228108512 -0.051090654 -0.129197508 0.125834614 -0.0826043189 0.0495859832 0.13765806 0.140279785 -0.100200407 0.078553237 0.102651939 0.0530582368 -0.105640791 -0.0712560862 0.0563652664 0.0500995256 0.110330448 0.0879531652 0.0794132054 0.0128588937 0.139566243 0.00905480981 0.0769669786 0.0616210736 0.0383987278 0.024789568 -0.0215452202 -0.0754719898 0.103158571 0.021371033 0.157727793 -0.168305516 0.041431915 -0.205217093 0.0685112029 0.11518427 0.0901029781 0.0836623907 -0.00306673371 -0.078299813 0.0937599093 0.0358634107 0.150480777 0.017379215 0.0400344506 -0.0467984006 -0.0435465574 -0.0746275187 -0.12713474 0.110726796 0.163420781 -0.100556083 -0.00550368195 -0.10327252 0.044071883 0.0337789692 -0.0129005229 -0.0913272351 0.132832885 0.147079349 0.100901216 0.134497017 -0.0322105363 0.133053601 -0.0325982273 0.141311869 0.0040314584 0.151371911 0.181470856 0.0484154783 0.164058596 -0.0128529146 0.0409421511 0.159602627 0.101342879 0.149882555 -0.0492368788 0.172365248 0.124329507 0.0683217645 0.0930551067 -0.0814763457 0.147788212 0.00853961147 0.0389146842 -0.000336691737 0.163021743 -0.084802106 0.0986582115 -0.0116979126 0.0385086611 -0.0496010855 0.0737678558 0.103331998 0.161403298 0.0173213035 -0.103028946 -0.0950937942 -0.0377868973 -0.0620894209 0.13404268 0.0146548431 -0.0653266087 -0.0033960822 -0.112761199 0.0226024743 -0.177061707 0.109000698 0.045506943 0.101955965 0.0158496425 -0.0690437183 0.104365595 0.0359109081 -0.122470014 -0.0659879521 0.0467208475 -0.0753396451 0.0523877777 -0.0585377291 0.100402929 0.119433776 0.0242477674 0.0617414936 0.182905495 0.157282576 0.0866737887 0.107341088 0.114345349 0.00848616753 0.0763099417 0.0206906293 0.0617443733 0.0259690173 0.107850946 -0.111641936 0.133501753 -0.169169813 -0.0887352601 0.089083977 0.156513289 0.0230403095 0.000902002619 0.0383367911 -0.0300379787 -0.146975219 -0.00701804645 0.131880164 -0.0454387777 0.0733794197 0.173866943 0.0410080142 0.19769071 0.0897455812 0.0198194478 0.00869395584 -0.0264868997 0.0861539766 0.123009734 -0.0185853429 0.16686818 -0.0672833547 0.0305484533 0.132848471 -0.127947524 -0.1613774 0.0643686131 0.0070268726 0.0036489605 -0.189245149 -0.0304792393 0.113094799 0.130098417 0.118080013 0.127857327 0.0940245837 -0.165752977 -0.0374614373 0.109492496 0.0428666584 0.170740604 -0.115685873 0.0148922838 -0.116838083 0.111455843 -0.0632996783 0.0108929574 0.0726874396 0.0742699429 -0.0629896522 0.113437019 0.199758425 0.0475728512 0.137889087 0.19803226 0.0400452688 0.0794214979 -0.108014926 -0.00188282889 0.111494496 0.0771949738 -0.116306648 0.0865728483 0.0771485493 0.0915202647 -0.0908453912 0.0911061615 -0.0472535603 0.136873767 -0.037476372 0.129080757 0.173227653 0.176956236 -0.117876649 0.0886662453 -0.0194631983 0.140326738 0.0929994658 -0.0285486728 -0.123725995 0.0545314588 -0.132062644 0.196129248 -0.0776121169 -0.0292998273 -0.0817124322 -0.123064265 0.0644138977 -0.0409719124 0.0910102725 0.0774317682 0.0588561557 -0.0303226635 -0.114509314 0.00717926025 -0.0146975368 -0.0139649464 -0.0769111067 -0.0884687155 -0.0844886228 -0.0546910986 0.0992946401 -0.0306005422 -0.0368665494 0.0252984539 0.0552819744 -0.0180559643 -0.0461472273 -0.059688963 0.0529744141 0.105257906 0.135227516 -0.136654019 0.0433159433 0.0750075579 -0.143905401 0.0697793365 0.0171793997 -0.0880545825 0.0440685079 -0.135759518 -0.0708841234 -0.137341917 0.0642284378 -0.0825591236 0.0998160243 0.104954824 0.0703029931 -0.00554473838 0.0652662367 -0.137622833 -0.0849017501 0.0079975808 0.0469577163 0.0332614519 -0.0239423085 0.0571367703 0.125478789 -0.0188843291 0.0104151899 0.0507268719 0.0427310057 0.182245687 -0.0464136638 -0.0774840489 0.0790423155 0.0158217624 0.174919963 -0.167722598 0.00393518014 -0.141249925 0.0320646316 -0.0712961331 0.18195422 -0.101946741 0.205521435 0.0143015096 0.190244779 0.0565855652 0.143080652 -0.0879745483 0.0268129539 0.0264821127 -0.0976307765 -0.0719135925 -0.0931720287 0.0752973855 -0.0940701365 0.0943753496 -0.00160595321 0.00526125729 -0.0494134016 -0.0277267974 0.114400044 0.0121099204 0.0469762683 -0.0178804994 0.175322458 -0.0936195925 -0.0206507854 0.0129827568 -0.127164483 -0.0533081368 0.0902868807 -0.089850314 0.0812181607 -0.0252427552 -0.0260248482 -0.0263420995 -0.128954813 -0.144561514 -0.0969642028 0.0840708092 0.105219595 -0.0315751806 -0.133927286 -0.0635263324 0.0815265328 -0.103957005 -0.0656396598 -0.0624658093 0.027983008 0.0192227215 -0.0915314779 -0.0996872336 0.0151820509 0.00491440995 -0.0790896341 -0.148336604 0.106279097 0.057419382 0.034870699 0.100479744 0.00237061502 0.0768525749 -0.12644136 -0.125458911 0.112800233 0.00162200222 0.0578222498 -0.056215629 -0.0922449976 -0.158906817 -0.0518889986 -0.100536995 -0.0453334972 0.00646515191 0.0148057342 0.0331344642 -0.00636346964 0.0370892994 0.00641168654 -0.0307880603 -0.0186160952 0.0293306652 0.0952301919 -0.12958698 0.117998272 -0.0704888254 0.00443183212 0.147841737 -0.0992462039 0.0764997005 -0.0257688798 -0.0460406169 0.0839670599 0.120056614 -0.0614700243 0.113699906 0.0346624181 0.180427715 -0.0145217599 0.168693572 -0.00197043363 0.191886678 -0.0972156301 0.0206416119 0.0345100351 0.0903015509 -0.100287922 0.0303347614 0.136919394 -0.0126191778 -0.115950003 -0.0293597691 0.0265962426 0.00261192676 0.0278086904 0.158691257 0.0234635379 0.117342651 0.0816714615 0.0948666632 -0.0849409848 -0.114143133 0.0362917073 0.070062004 -0.0524370112 0.167162567 0.104840927 0.124661915 -0.138633773 0.19061929 0.0486695245 -0.00107917748 -0.0845123231 0.046763584 0.0243339688 0.0911204591 -0.113943458 0.00347187044 0.0777205974 0.095806241 0.0292435624 0.127136692 -0.0182037577 -0.0450141095 -0.0123331165 -0.0598197915 0.152919352 -0.131715685 0.164068297 -0.0793498456 0.00121658249 -0.0503176786 -0.0856561065 0.0431076214 0.0459455065 0.167714477 -0.00190150819 0.0480769761 0.0142310113 0.0774440318 0.0504581034 0.197599128 0.172974482 0.0960050672 -0.0724410191 -0.0655787885 0.0426691361 -0.0474077053 0.0671926141 -0.0111915339 -0.0694714338 -0.0728770122 -0.0334699675 0.0879241973 0.0191930141 -0.0492004342 0.170004874 -0.136069939 -0.0839288905 0.121699564 -0.0373032577 -0.0790554881 0.0212189052 -0.0723486841 -0.0706750974 0.164014727 -0.1265852 0.180671826 -0.0538335219 0.135076165 -0.082566984 0.00627529481 0.0355592817 0.146791458 0.0428247713 -0.0218269154 0.113299571 0.15928854 0.109753877 -0.0433866642 -0.0531712547 0.121344112 -0.0599708892 -0.140954524 -0.0652005821 0.107553594 -0.0420940556 0.0391891636 -0.0892334729 0.0449264199 0.038767308 -0.00515921181 0.00227128062 -0.0260546599 0.145359293 0.0675093085 0.165128261 0.107131146 0.145455942 0.150093794 0.182555065 0.0683342069 -0.0751166418 0.00099511235 0.136942998 -0.0637786239 -0.118969493 0.0861738697 0.121482879 -0.0593939386 -0.0681066886 0.137257427 0.142178074 0.110687025 0.104999736 0.00519723399 -0.0884702951 0.0194963887 0.146859735 -0.00592712127 0.0192816481 -0.0416031592 0.10512694 0.0102964779 -0.0616582707 0.13753584 -0.00928659178 -0.0823482201 0.114266947 0.0528262816 -0.0983823165 0.0188455041 0.0611194335 0.100222267 0.00196855632 0.0390211269 -0.139956653 -0.0327276476 0.156153634 0.0125370612 0.0344246514 -0.0589949451 0.0921387449 -0.144850284 0.0212448426 0.144581348 0.0431137607 -0.0885965675 0.0854236633 0.00550921075 0.172450885 0.123434886 -0.185906976 0.132565111 -0.181601852 -0.10362874 -0.184760764 0.130670205 -0.0472870953 -0.10729944 0.132553771 0.0137786418 0.0876799598 -0.0260619633 -0.0263571106 0.123666577 0.123572513 0.00393577246 -0.0911321938 0.149641573 0.0127057144 0.040043395 0.0847804174 -0.0459438674 -0.177438155 0.00510600302 -0.00431553461 0.138086572 0.116237916 0.168006837 -0.0157650076 -0.00970370602 0.0802516192 -0.0514502637 0.132704586 -0.0417737029 -0.0193822831 0.166242853 0.0419458486 0.15848121 -0.00174845546 -0.0805547163 0.00697086425 -0.171336144 0.0593196638 0.0595933609 0.110469177 -0.0882590115 0.050680656 0.0118347788 -0.0319386683 -0.0632662848 -0.0177531485 0.0838051289 0.0470289141 0.178859159 -0.0346439704 0.0504389554 -0.0520837195 0.00219774805 -0.0491009764 0.0503517203 0.107992731 0.0384831354 0.0872439444 0.167825118 0.0623564459 0.0223074984 0.0821516067 -0.00865145214 -0.0457197949 0.0970128179 0.0120575717 -0.0556218661 -0.116809532 0.074401699 0.0880089849 -0.0123710101 0.007505944 0.135203391 -0.146449044 0.0852448419 -0.132378265 -0.109845184 0.149791658 0.015171879 -0.158416107 -0.0637820587 0.16275458 -0.0364229716 -0.143408865 0.127545208 -0.0622910671 -0.139478207 0.0405872539 0.0932571068 0.0956263393 -0.00292709633 -0.100080743 0.137279779 -0.0495060496 -0.0749291778 -0.0744291395 -0.0862122774 0.0235699266 0.109829761 0.0802345648 -0.123428002 -0.135655686 0.115854591 0.186653689 0.104481116 -0.0934653729 -0.107345767 -0.0480583683 -0.112480521 -0.0674405769 0.0481690913 0.0844945163 0.102531567 -0.132132486 0.137842521 0.00775253773 -0.0610849336 -0.032591112 -0.0524423793 -0.0668133944 -0.113737375 -0.000165238976 0.114906386 -0.1328713 -0.0835750252 -0.088781476 0.018294096 -0.0263542235 -0.0792298913 0.0685598254 0.0419423133 -0.0260287449 0.13109158 -0.138066247 -0.0322780311 -0.0882859379 0.0807678401 0.0512416959 -0.123070188 -0.00298701227 -0.0796232373 -0.104369447 -0.117494076 0.00122408569 0.0332989395 0.0664115399 -0.0739870965 0.0106086135 0.109527692 -0.0934588537 -0.0895289928 0.0728636533 -0.0278315544 0.0639105886 0.0930453986 0.0494588055 0.017094126 0.112311125 -0.00386948418 0.0680094063 0.0254231635 0.0913507342 0.16692546 -0.0122418981 0.108312286 0.0950310752 0.101287387 -0.105884947 0.030036103 0.00558372587 0.109446019 -0.0987028182 -0.0895694122 0.118367992 0.0662995502 0.114169754 0.0966514125 -0.0286930036 -0.0851531997 0.128677562 0.124861382 -0.100621521 -0.128018498 0.0673300773 -0.0310823116 -0.0784357563 0.0379403606 -0.0306251384 0.0655758083 0.0960387737 -0.152080312 0.136492133 -0.101761207 -0.0275989529 0.0933943838 0.0766497627 -0.0804210976 0.143909901 0.143697292 -0.0849372372 -0.10959392 -0.0742666796 0.125293195 -0.0966164172 -tensor_14weight 2500 --0.0543760806 0.0856281444 0.0533403084 0.0177523084 -0.0268334541 -0.0549559146 0.159062862 0.172800139 0.109722741 0.0875528008 0.0125674438 -0.0810011849 -0.068577148 0.170207128 -0.135173365 -0.0806247443 -0.0548967347 -0.0452914089 0.0365853943 0.129278928 -0.0377073251 -0.17943646 -0.00266921567 0.0811229944 0.0154373068 -0.0359650813 -0.0855926052 0.127574399 -0.1265679 0.04885903 0.0561187416 -0.112507693 -0.139889583 0.170207158 -0.097494632 -0.0187973343 -0.0904997438 -0.0484883524 0.031168703 -0.055549074 -0.0741278306 -0.002624318 -0.117438287 -0.0157258548 -0.0880523771 0.114648446 0.0272049736 0.103814438 -0.0217095967 -0.140518233 -0.0760676265 -0.110887714 -0.0115829725 -0.00750160404 0.0959720686 0.0384376198 0.061359182 0.0955482125 0.101260468 -0.0115174651 -0.013766964 0.0398462117 0.166129872 -0.0850986466 0.140506133 -0.103672192 -0.154903129 0.0968019962 0.066429466 0.0431276001 0.147400737 -0.00412948243 -0.0342022404 -0.0535201877 -5.71517012e-05 0.0244176984 0.0832642242 0.176724657 -0.0719986036 0.172275752 -0.114797458 0.0914949924 0.0334078744 0.0464251973 -0.00394226797 -0.0035392812 -0.0278604105 0.0514154881 -0.0311339442 0.021156881 -0.0213947129 -0.0683914274 0.0251719803 0.0944593325 0.12849097 -0.049127765 -0.0469818637 -0.0983457267 0.13893728 0.0303975027 -0.0299507454 -0.0138533115 0.139151528 -0.135961041 -0.0921831578 0.0593009293 -0.0144180804 -0.0136186779 -0.0715967119 0.0790341347 0.00953520834 -0.0408776402 0.101040825 0.00309920311 0.0447804034 0.0982600003 -0.0721947402 -0.118167073 0.0333673507 -0.0950507745 0.10244967 0.08306925 0.0455361456 -0.122597888 -0.0647362471 0.00561864674 -0.136176527 0.0647586584 -0.122481212 -0.0205618665 -0.094566375 0.0131596476 -0.117649406 -0.110489279 -0.0717473105 0.103288978 0.0714375228 -0.0784455761 -0.105901703 0.0811899006 -0.131345108 -0.0233812556 0.102898851 -0.00886622071 0.0682659149 0.129993364 -0.0971994996 -0.0193270147 0.00360363722 0.121052161 0.00784411095 -0.123100765 -0.0609981082 -0.135147735 0.0461434908 0.117215686 -0.0296066701 -0.0148467962 0.054072503 -0.118035108 -0.13138777 0.0103239622 0.0106298085 0.00161406794 0.121524885 0.106862329 -0.0696737245 0.122207746 -0.129250824 0.0716361329 0.117990665 0.0917533413 0.0275282189 -0.124964394 0.123115174 0.0490060188 -0.0750153661 -0.0502910502 -0.0452317111 0.101086549 -0.101995051 -0.112885557 -0.0476158895 -0.0509889536 0.0219939649 0.00387603301 -0.0764786229 -0.0421580113 -0.0788122267 0.084515363 0.0346965827 -0.01090011 0.0382516384 -0.00645032525 0.129111394 -0.0737728179 -0.00789030734 0.11321111 0.00651154015 0.000951979193 -0.0776003599 0.0253983736 -0.0880478546 0.111969553 0.0747581348 -0.0281555094 0.0477269702 -0.0837645158 -0.00260412018 0.0995940417 -0.0253548026 0.0838286281 0.037731003 0.0643470585 0.0464969426 -0.102055438 0.00463358313 0.0325008184 -0.00370962941 0.0821173638 0.0869908333 0.032846041 0.0313670263 -0.148173332 -0.177075326 0.014351381 -0.0347749256 0.0631445199 0.0138477925 0.141796917 0.0031752775 -0.0240941141 0.115030944 0.0497418977 0.0109222829 0.0674659908 0.010190879 -0.0980509967 0.107191958 -0.0665694326 -0.0149048567 -0.135567963 -0.0943998545 -0.0724455938 -0.108684249 -0.117758349 -0.0431607552 -0.0478789434 0.0548663996 -0.0874581188 -0.12479274 0.0178123116 0.070239827 -0.0386666693 0.134508371 -0.0741510987 -0.045267418 -0.104734987 0.0435491502 0.0155023336 0.062136706 0.103647709 0.00290234643 0.064395614 0.0177004337 -0.0480007231 -0.110428169 0.0850054473 -0.0885846689 -0.0500162207 -0.0616900064 0.148497447 0.0951149315 -0.0552124381 -0.14905256 -0.0889345855 0.0241270382 -0.0488678627 -0.00426269416 -0.0119903926 0.141092837 -0.0797038823 0.0120936269 -0.0693103597 0.0249975473 0.145910755 0.0371512882 0.117824383 -0.0573362373 0.0323375016 0.10749159 0.0636148006 0.0273176879 -0.0183407739 -0.115713961 0.00984863937 -0.042138014 -0.118756019 0.0761800632 -0.132937029 0.119471751 -0.0354485847 0.127721861 0.121748939 -0.0444656201 -0.116685092 0.0304207485 0.0655196533 0.0813344195 -0.107156277 -0.0844279304 -0.0894685909 -0.127893046 -0.0939210355 0.0843395889 0.0614806749 -0.0614846796 0.0341131836 0.0421788543 -0.105776869 -0.122536495 -0.129194289 0.12389411 0.0393402874 0.0495846197 -0.119962715 0.106891051 0.0449610613 -0.145153821 0.0629368573 -0.108572282 -0.0517346151 -0.0622508824 0.0323888771 0.0450324118 0.0440010354 -0.0938819498 0.0203166902 -0.122573078 0.0831483901 -0.0707751289 0.0610891283 -0.0958172753 -0.0424687862 -0.107412554 0.126818612 -0.0554413795 -0.122475646 0.0243445728 0.0513011068 -0.0796121135 -0.0414110497 0.0280717909 -0.0394670665 0.0598106235 0.0484230518 -0.126170114 -0.0951998904 -0.078391239 0.0402923077 0.0229541957 -0.0937744156 -0.0477972776 -0.0179067627 0.0602110922 0.0934107453 0.139090851 0.0272798836 -0.0181408152 0.0339401662 -0.0496698096 0.123755589 0.077883482 0.0388832986 -0.119654641 0.140062913 -0.02580522 -0.0365974084 -0.0948570818 0.0476925224 -0.106507264 -0.0877594203 0.0167225003 0.0365579128 0.0707214922 0.0943449885 -0.0219054744 0.0260573626 -0.0470213518 0.139553711 -0.0147360563 0.0706477165 -0.078761287 -0.0111072361 0.0549765974 -0.0400532633 -0.0153049231 0.0315274298 -0.0705541149 0.0631048977 0.117633738 -0.0514981188 -0.134041414 0.0551473498 -0.0794123039 0.00670406362 -0.0401185192 0.00911470596 -0.0211872291 0.116496786 0.155161962 0.0629097223 0.138448417 0.0936189666 0.0627936721 0.0648671389 -0.129390776 0.0583335906 -0.0174725447 0.0610876642 0.163619712 0.0462206006 -0.0404846109 0.0467165858 -0.149703398 0.0884451717 0.0297990069 0.0904366821 -6.66035776e-05 0.089948453 -0.164717227 0.0440124683 0.0429885276 -0.0889559984 0.0580933429 -0.0497451164 0.0140721994 0.123201773 -0.0521491505 0.0792684183 -0.116658807 -0.0728405491 0.138154134 0.0858280063 -0.0885532424 -0.0259025618 0.143468827 0.11027436 0.130449191 0.0466446765 0.0738923401 -0.0459300056 0.135415688 -0.0519030988 0.0363911055 0.100617178 -0.0735667422 -0.056867335 -0.100770339 -0.0581379086 -0.0582638234 0.057107687 -0.0833413973 0.117787801 -0.121084802 -0.0609023273 0.0458093919 -0.0386206284 0.120702438 0.152693301 -0.0653539896 0.00243751518 -0.116231412 0.129159972 0.123322234 -0.00450206548 -0.0729444399 0.0853474438 -0.145202518 0.0369620174 0.0304967947 -0.0492551252 -0.130056858 0.0221843477 0.0469832569 -0.056170959 -0.146923915 0.0244862288 -0.0021409702 0.0949956179 0.134217575 -0.0556118563 -0.106579103 -0.0108840466 -0.147231668 -0.0594046339 -0.0605274215 0.00136603415 0.127606124 -0.115885407 -0.00178258657 0.0252946466 -0.0912591442 -0.0576305799 -0.0229029693 -0.085684374 0.165293708 -0.0522565134 -0.0692233294 -0.0375391915 -0.0102695916 -0.104804181 -0.0526487827 0.150056034 0.0551703274 -0.0257776212 -0.0228184611 0.073696211 -0.0467144549 0.0735779107 0.0948753133 0.144433752 0.166210935 0.150882557 0.0348055102 -0.0166522712 0.161574543 -0.0576791242 -0.0115333898 0.0468233787 0.0915934965 0.0572047532 0.00290581165 -0.0225567296 -0.0246865228 0.0868225098 -0.00762603246 -0.11816176 -0.12570864 -0.0243588239 0.0893646181 0.0507476032 0.0117150992 0.10665486 0.121998012 0.0696426779 0.0685170516 0.025851354 0.142432615 0.146065772 -0.0138009675 -0.129955053 -0.000132796747 0.0203777198 0.181162477 0.0261075366 0.168559924 -0.0609995425 0.168947399 0.0293546468 0.0149049358 0.0582519248 -0.04978792 0.103416584 0.0590672493 -0.0700641274 -0.14282304 0.0278612077 -0.131275356 0.14719297 -0.0653766692 -0.0751730502 -0.0065545626 -0.0380778089 -0.157771811 -0.0827088878 0.0777130723 0.198460281 -0.0469098203 0.134435102 0.14349848 0.0417049713 -0.167793706 -0.0996251702 0.0797272176 0.122879468 0.179209173 -0.0471446738 0.168509901 -0.0920644701 0.000451093569 -0.129801482 -0.0594977811 0.00711449794 -0.102573976 -0.109216064 0.148805737 -0.0950382799 -0.00647751195 -0.109716304 -0.055815164 0.0108515322 0.0906130522 -0.0650295168 0.0893351659 0.114572234 -0.106268756 -0.0118306447 0.113306493 0.0420926064 -0.143582255 -0.11182075 -0.0272862986 0.0896898583 0.0240881741 -0.0932913795 0.0246650521 -0.138226554 0.0727393776 0.0228461325 -0.16916123 0.00246544858 0.0807503536 0.0973562822 0.0370443426 -0.135211006 -0.0732924193 -0.00324719655 0.00122735673 -0.0426253006 0.0464077778 0.0109117776 0.0653063208 -0.0427299105 0.0784161389 -0.0572868735 0.105860651 -0.097189337 0.000937802775 0.0585776716 0.141582102 0.0805247277 0.0409072042 0.00662690401 -0.0858124942 -0.0805532038 0.0404491127 0.124266788 -0.106811218 -0.0971105546 -0.0997086912 0.0074750483 -0.108586416 0.0607502013 -0.054395549 -0.0843265578 -0.0900614634 -0.0676774904 0.0206092894 0.099438563 -0.0570041686 0.0200342685 0.0236357749 0.0572907329 0.0950599462 -0.125204116 0.0311794877 -0.0876096636 -0.136037469 -0.0375309587 -0.0594457537 -0.113332778 -0.0978064537 0.0964330435 -0.0265298411 -0.00542576611 -0.0438456684 -0.0523090437 0.0630306751 0.124709442 0.0266276151 0.0232248306 -0.0171631426 0.106016204 -0.110088825 0.08274737 -0.110974953 0.0963929445 -0.0833926201 0.0303138644 -0.0306184739 -0.00307349861 -0.11504256 0.1075629 -0.131726444 -0.0421231985 -0.138194129 -0.113239586 0.0452417433 -0.0149982423 -0.0436716527 0.0988965183 0.0192198902 0.0135216201 0.00940239057 -0.11294537 0.0857888535 -0.120034076 -0.108530715 -0.101037055 0.0667439774 0.0855601728 0.0677483305 -0.0134334378 0.120776698 -0.116636701 0.0875215456 -0.0949789584 -0.0514179617 -0.115705922 -0.0596454814 0.0422541201 -0.0999356657 0.0502830669 -0.0702968836 -0.123627275 0.106365606 -0.0744836628 0.0104168141 -0.0793894753 -0.114666551 -0.00283100014 0.140118852 0.0356186256 0.054495573 0.102265559 -0.0458586551 -0.0236852318 0.106435075 0.00892684981 0.11433281 -0.115178108 -0.131405771 0.117208794 0.0424666107 0.134797171 -0.143703952 0.0569373965 -0.0665611774 0.0933629125 -0.0201621354 0.111485049 0.00233875564 -0.0812246799 -0.0269328542 0.0232816096 0.0677310228 0.117872521 -0.141205952 -0.00658942759 -0.0309162736 -0.144758567 -0.057528194 -0.0684359372 -0.0633766428 -0.0364208929 0.152422816 0.0387307405 0.0868177786 0.0241200123 -0.0501802117 -0.03670137 0.0153368488 -0.0590804406 0.0290142465 0.100567661 -0.0803031549 0.037419185 -0.0459126569 0.0960116088 0.116186179 -0.0661039278 -0.0716232583 0.0593420751 0.0900740027 0.132992968 -0.025843842 -0.062321458 0.0100088529 0.00727820396 0.0946147069 0.111916468 -0.0648906529 -0.0192210414 -0.10582228 -0.052964583 -0.0713335574 -0.100110069 0.0656400323 -0.0618378446 0.0341230631 0.105089828 -0.0501025058 -0.138664886 -0.115556583 -0.105643809 -0.0142834401 -0.0338118225 0.0103544462 -0.0796577036 -0.00744031509 0.0366418958 -0.13744548 -0.12394321 -0.133015111 0.139994159 -0.096086286 -0.0779372826 0.0771979392 0.127238646 -0.0140574072 -0.0333673917 -0.0900884196 0.0653517544 -0.0381353125 0.124770477 -0.12113288 0.0970005691 0.107545584 0.114952408 0.0286091883 -0.109182179 -0.0630336329 0.0627928153 0.0373910069 0.110793836 0.0872234032 -0.110715158 0.0479132868 0.068171978 0.0977038071 -0.0969489664 0.0069321245 -0.138224244 -0.1087984 -0.0156357884 -0.0806711093 0.0635136664 -0.088648513 0.0915248096 -0.0295681208 -0.0889791846 -0.0202619806 0.0473107845 0.0719934851 0.0844703317 -0.0181293488 -0.0325784534 -0.0223496631 0.0800980031 -0.0469706431 0.12284486 -0.0163284689 -0.0720243454 0.0100721121 0.012039721 0.0458173305 0.00532619655 -0.0372635648 -0.0938430429 -0.0808144957 -0.0140093267 -0.139559567 -0.0209952146 -0.0627007261 0.0597438067 0.105400652 -0.034525536 -0.0433830321 0.00658106804 -0.113124847 0.039323777 -0.0219132751 -0.0291076973 -0.0714975595 0.0354093611 -0.0999722928 -0.0223256275 -0.00125360489 0.131300226 0.0749686807 -0.103176132 -0.00177618861 0.100545034 -0.0482359231 -0.130312055 0.0585651398 0.0481558293 -0.050323084 -0.0714227259 0.114079475 0.0674445853 0.0338538028 -0.0207888857 -0.0817157254 0.0414048955 -0.00719799427 0.000449810963 -0.0368338116 -0.000197023153 -0.128918022 -0.120564923 0.00234631728 0.017666148 -0.0760105997 0.129530162 0.0278998706 0.0785642117 -0.0846611708 0.142189592 -0.0866099969 -0.034735851 -0.0374385677 -0.141838074 -0.0461979173 0.0792662352 0.0883275494 0.0821309313 0.0135414349 0.0762536079 0.162841812 0.11422585 0.0459163897 -0.101808973 0.124757119 0.10234201 0.121778518 0.124219798 0.15815866 0.120062478 -0.0342520848 -0.0277299657 0.00312125683 0.0146831786 0.0419388674 0.12670289 -0.0661896765 0.00485484302 -0.0807016641 -0.102789596 -0.10495542 -0.0415844247 0.117631674 0.0792787224 0.121481225 -0.121911712 0.0956676602 -0.13807556 -0.071042493 0.10285683 0.134201437 0.0446345471 -0.0243865289 -0.0965441614 -0.0743445978 0.0868661553 0.0650995299 0.0439964831 0.0520170368 0.122856326 -0.00557545433 0.0643403828 -0.130216479 0.075305514 -0.0704696178 0.0523242615 0.132898629 0.0556151196 0.0135608455 0.106630892 0.00448958855 0.0294760223 -0.130943984 -0.115232065 -0.0185228847 0.0750679225 0.0868396237 0.115859844 0.0128320716 -0.0767292604 -0.0757243782 -0.109670304 -0.022307232 -0.0409514084 -0.0354676992 -0.101452865 -0.0788824335 0.0799969286 0.139659941 -0.080234088 0.0067355819 0.0488539226 0.134902641 -0.044175718 0.0824501589 -0.150373846 -0.134587288 0.0352238007 0.116991237 -0.0220136195 0.114933126 -0.114182681 -0.0119344881 0.0254181288 0.110809639 -0.139513344 0.0670420676 0.100754023 0.0473007746 -0.110368282 -0.0900191069 -0.0120764263 0.0714306533 0.122893341 -0.0579950325 -0.069827266 -0.0631239116 -0.0197088365 0.0283315647 -0.0101505062 -0.012342534 -0.136444777 -0.110550106 -0.0110609038 0.0534135252 0.0715058818 -0.135529175 0.0218331032 -0.0914941207 -0.117715605 0.0637661964 -0.119437411 -0.0767295882 0.13249214 -0.0307924412 -0.0124762207 -0.0491118282 0.115513906 -0.0599435866 0.0437990949 -0.0970950872 -0.126184925 0.00789543986 -0.0699488521 -0.0769708008 0.0143007189 -0.125161707 0.0508386642 -0.0768451542 0.0126496255 0.0037975586 0.0945261717 0.030873267 -0.0632951036 0.121244743 -0.124326058 -0.0519415066 -0.00867667794 -0.0128302025 0.0473873913 0.134872839 0.0456339866 -0.141257316 0.131795123 -0.129008144 0.0524923205 0.111486077 0.00192398916 0.0257397145 0.0104135079 0.0229955614 -0.0101489769 0.0377998948 0.0529350787 -0.0652860105 -0.0964240208 -0.119346842 0.102311134 -0.0513100103 -0.0956246778 0.0625582039 0.10400553 -0.0152444094 0.0996984094 -0.11846026 -0.0332592428 0.0517609864 0.124776825 0.0835027397 -0.0161721092 0.0544919521 0.0960061252 -0.0852253288 -0.115617849 0.132461503 0.0126986802 -0.0718445331 -0.0552118719 0.0423579067 0.00376538932 0.109214559 -0.060116075 0.00753490627 -0.11368005 0.00783166289 -0.0537703261 0.0794192106 -0.0919727385 0.00208424032 0.111719355 -0.0963476151 0.0950013399 -0.108282149 -0.12641567 0.12435104 -0.110969186 0.0644554049 -0.140637219 -0.0633735061 -0.0338808447 -0.123940453 -0.012826249 -0.0421397537 0.123598143 -0.038868092 -0.0173738599 -0.0128029287 -0.0981713384 0.100576788 0.075251177 0.0625472218 0.0764244497 -0.0654502288 -0.0070194602 -0.0901699513 -0.018791154 0.109865949 -0.0268438831 -0.0740915313 0.10593608 0.150855407 -0.0649960041 0.05053664 0.131901428 0.0731908754 -0.109051332 -0.00465088245 -0.0684918538 0.146911919 0.00280831754 0.14333044 0.108945541 0.0746650323 0.112723231 0.0545722842 -0.120427899 0.0842576474 0.0266837925 0.0456104651 0.0698814988 0.047249984 0.190266967 0.163280755 -0.0185097642 0.0825716704 0.031122379 0.064069435 -0.164239749 0.0375948921 0.0769198686 -0.155088678 -0.104684114 0.075281471 -0.00940326042 -0.0429962575 -0.0142965838 0.0120388716 0.0730001554 0.00848747697 -0.107716456 -0.0337854624 -0.133317709 0.128731623 -0.108297765 -0.0142049389 0.133316174 -0.115573399 0.0836417973 -0.0845889002 0.0138655473 -0.0655629039 -0.0104014426 8.64409303e-05 0.1608392 0.0675673187 0.0179192871 -0.0236063488 0.046505671 -0.0392516479 0.0720221549 0.0741254017 0.0510466658 0.149464175 0.0988350585 -0.0124777406 -0.0114185531 -0.143563926 0.116806343 0.0688097924 0.145737663 0.0330820084 0.103331283 0.189101636 -0.0580817536 -0.152595311 0.10088592 -0.012149916 -0.153301135 -0.0369912386 0.0269313119 -0.100157224 -0.0723579377 0.0605176054 -0.16903989 -0.140898824 -0.124555223 0.149031043 -0.112365574 0.0937826708 0.0157145858 -0.100749768 0.0515762866 -0.0710803419 -0.0686863139 -0.0993681699 0.136043593 -0.0661427677 -0.00102904439 -0.0144443447 0.00527101662 0.0664612353 -0.125710681 -0.0143222958 0.0327278823 0.00587114692 0.0143621564 0.0739389807 0.0117307007 0.00317768753 0.131541565 0.0450980216 0.0562246889 -0.112021118 -0.121544585 -0.0703852251 -0.0653774664 -0.049726896 -0.0712407231 0.0384204239 -0.0756127983 -0.108454555 0.0761180222 -0.108996943 0.031490311 -0.110682026 0.0954553038 -0.0480172858 0.026041314 -0.0700769648 -0.0236957669 -0.105962321 -0.0145984888 -0.0221198499 0.080327794 -0.0255625173 0.0917616338 -0.0579084232 -0.115254268 0.0298689604 -0.033957921 -0.00849801302 0.101248682 0.0688192248 -0.033151634 0.0967501849 0.125775561 0.0594263077 -0.0315592438 -0.0751049966 -0.0152497739 0.0194816925 -0.025003599 0.110108972 0.0337932631 0.0466330573 -0.108177759 0.0923204646 -0.127821535 0.132012337 -0.0206957385 0.160091415 -0.0121085728 0.103845544 -0.00710585574 0.106360584 -0.0573629886 -0.116646938 -0.0393697619 0.111357979 -0.00339666428 -0.0565832593 0.187762156 0.0738954321 0.00436372962 -0.0201180782 0.0962937772 -0.0409735553 0.126787856 -0.0257689413 0.142943844 0.0456875302 0.0157842282 -0.125038773 -0.127323419 -0.0811595023 0.127563968 0.0147216143 -0.0188120566 -0.037811175 -0.0973046944 0.155958325 0.107728779 -0.0499815643 -0.132879764 0.0703277811 -0.0151593685 -0.00903364085 -0.12080054 -0.0394929722 -0.05078182 -0.0835544169 -0.0767298788 0.111453474 0.0164176039 0.0738900974 -0.0237710364 0.108378887 0.00188849773 0.0275815967 0.0885725319 0.0609878637 0.0426038019 -0.0367399938 0.00715940725 0.0550841689 0.0243325494 -0.182480216 0.0272398591 0.0657472908 0.0879313201 0.0918957889 0.156981304 -0.0646059811 -0.119247735 -0.047599677 -0.116651364 0.147660568 -0.0381035991 0.0582092069 -0.147984505 0.19525826 0.122297406 0.0312584154 0.0444355682 0.0738523602 -0.00197054748 -0.0539330691 -0.101374164 -0.0567407869 -0.083611846 0.0657852963 0.0735184103 -0.0166746452 0.0735289529 0.0353927538 0.0238035768 0.0864389837 -0.0453045592 0.0350245051 0.0573615246 -0.117907874 -0.0528462976 0.0303292908 0.0802146792 0.0530198105 0.0752285719 0.0932139829 -0.126501501 -0.0627672225 0.0169230103 -0.0626025647 0.0349148065 0.109626763 -0.105911814 0.0788237303 0.0676720217 -0.153280228 0.0910756141 -0.0301282536 -0.000840488705 0.106123164 0.0890205577 -0.0100000529 -0.0720009357 -0.0945366025 0.10201738 0.139737338 -0.00469925907 -0.00698884297 0.108112089 0.0135092204 -0.127867475 0.0966705605 -0.00785964262 -0.138336435 0.0638243333 0.0331486128 0.0260607041 -0.0856452286 0.0471999496 0.0917534381 -0.0968203172 0.137134388 -0.108194001 -0.0242078379 -0.0838816911 0.0852543861 0.101658776 0.0793644413 0.0438847691 0.048443377 0.0175982714 -0.0791520029 -0.126521289 0.120813206 -0.0748142153 0.0292773098 -0.0915241987 -0.0191392377 0.070805788 0.0297760516 0.0540979952 0.0455519557 -0.104483157 -0.127842084 0.00549553335 -0.00101320446 -0.0383768007 -0.0929412916 -0.0527148545 -0.121787742 0.116296932 0.127205387 0.0215808749 -0.00527906418 -0.0950953811 -0.102384314 -0.136119738 -0.135512173 0.0213526934 0.131827787 0.0118881762 -0.0137285888 0.0550298095 -0.101517551 -0.0358831212 -0.0636179894 -0.0305602103 0.000535279512 0.0445587337 -0.14124018 0.108244672 -0.0400930718 -0.081310682 -0.133354321 -0.00902658701 0.0237516761 0.136513993 0.0469265431 -0.12061704 -0.0962344632 -0.100439511 -0.0504943989 0.0647302195 -0.00468132785 0.0335817374 -0.0337964632 0.134859458 -0.0318156444 -0.0735609904 -0.087305516 -0.00295244693 0.143023223 0.0555585437 -0.0434618294 -0.0989003032 0.0798072517 0.0347367227 -0.0889567286 0.0454466157 -0.0994898081 0.0186923463 -0.173386604 0.0882037133 0.137024656 0.0983027816 -0.120416664 0.192227334 0.0853054151 0.0870762393 -0.0379575193 0.161280155 -0.0570422672 0.151813626 -0.106967077 0.166952252 0.101883747 0.054235056 0.189217716 -0.0840895325 0.070606254 0.0933529139 -0.0497438386 -0.0975164622 0.156465441 -0.061189115 -0.0255476627 -0.00462962687 -0.0117694922 -0.026365703 -0.0826264173 0.10701783 -0.0981103182 0.0933794603 0.112419106 -0.00185976818 -0.0279008057 -0.0761715546 0.160778821 -0.00510247052 -0.0529871248 -0.00726129953 -0.0403804705 0.106054351 0.0701956153 0.160074636 -0.126040637 -0.0446393117 -0.0562781654 -0.0765028149 0.118484885 -0.00363161834 0.0893984511 0.0428721681 -0.0862490907 0.161411509 0.110493377 -0.125039488 0.160013914 0.138771698 0.00862341002 0.120823577 0.124806479 0.126176298 0.0108156186 0.103124447 0.0837469697 0.150882855 0.0736863688 0.00851425901 0.0157211907 0.0169511139 0.0302761346 0.0810550079 0.0379871763 0.138916358 -0.0554511286 -0.134623349 -0.0141197927 0.102847748 -0.109416708 0.116542891 0.0628927052 -0.0400933027 0.104376331 -0.119453713 0.0155786276 -0.0215776712 -0.00744922459 0.0798532814 0.0639959276 -0.0196548998 0.0446766913 -0.0535386354 -0.0242897347 -0.0326042622 -0.0356833786 0.091811657 0.119531497 -0.00566391647 -0.108836398 -0.0138843209 -0.0777539462 0.0244711339 0.0302925706 -0.0840600431 -0.135451585 -0.0993905663 0.0911848098 0.0254171342 -0.104574814 -0.0337190852 0.123146519 -0.00662016869 0.112501815 0.115908071 -0.0144283175 -0.0765947551 -0.070567295 -0.0816542134 -0.063042447 0.0821659714 -0.106859922 0.0445039272 0.0489273965 -0.054737024 0.0737254471 -0.0994004011 0.0907932669 -0.0525529906 0.0760397166 -0.0459865108 0.115764856 0.089792937 0.0262587946 0.0727124959 -0.0986016765 0.134460911 0.120390087 -0.106480896 -0.126752883 0.0600169674 0.0778978691 0.18399404 -0.116155103 -0.0270620678 0.0360745415 -0.0706986636 0.0219121743 -0.0485235155 0.0116395457 -0.0190455988 -0.031040974 0.138535559 0.0049945279 -0.0669544563 -0.135656506 0.111046769 0.0884723812 0.0641905293 0.0139932213 -0.0747538805 0.14979732 -0.0598235726 -0.0681837425 0.078919284 -0.0920129493 0.0927841365 0.0622474365 -0.0725972429 -0.113833509 -0.106715776 -0.127413034 -0.0326030068 0.0898154825 0.0540499836 0.120726503 0.000129148364 0.122900732 -0.000325784204 -0.152520627 -0.0064624548 0.073874481 0.0152352303 0.0961310565 0.0109456517 -0.0507575348 0.120654956 0.126830235 0.108187631 -0.163000211 -0.17243591 0.0882807449 -0.00232086889 -0.132580787 -0.132526517 0.101748489 0.118433878 -0.034390375 -0.0734529495 -0.163235143 0.0954085439 -0.164447442 0.0220291484 0.165916741 -0.0268109124 0.00910670217 0.0109529579 0.0204264484 0.0675835386 0.0192554276 -0.019607991 -0.139865518 -0.0490590185 0.116421953 0.133383304 -0.00608086493 0.0666911826 0.010473121 0.0839324743 -0.00665302482 -0.0341572762 0.0905544311 -0.0330039002 -0.121482521 -0.13752155 -0.00984864868 -0.00438012183 -0.0152192581 -0.0799304917 -0.144058108 -0.0689092726 0.0797125772 0.049202282 0.121467397 -0.059979789 -0.0338185877 -0.0875877663 -0.0352213718 -0.0265744999 -0.000970848021 -0.0542924628 -0.0339916125 0.0772038847 0.072251454 0.0386321284 -0.123490326 0.115714893 0.00240401109 0.138920873 -0.0890034363 0.0679262504 -0.0547012426 -0.0577247515 -0.00683979178 0.0310920458 0.0301014595 0.09850014 0.0825214908 0.137729675 -0.101440713 0.0277171135 -0.0100952508 -0.106422052 0.0132092983 0.104450844 -0.0620612726 -0.0807258561 -0.0243801288 -0.0753299445 0.0706419945 0.133234069 -0.119514674 0.126184896 -0.0232823435 -0.107439861 0.0486271791 -0.0467080846 -0.0273494851 -0.0494388938 0.106704935 0.0911619067 -0.0630275458 -0.12553288 -0.00862511992 -0.028438285 0.0349335819 0.140132353 0.0860453546 -0.0957376212 -0.00533922017 -0.0604479536 0.105011344 -0.0739720687 0.0970580429 0.0160997361 -0.0723680109 -0.0773093924 -0.100641474 -0.000383406878 0.0533034801 -0.0397997424 -0.094927974 -0.0186026245 -0.128123358 0.0100442469 0.0157190859 -0.0655204803 -0.0211179629 -0.0938301831 -0.0268794596 -0.0193795785 -0.0539523363 0.0891814232 -0.0723926127 -0.0894492418 -0.103800982 -0.0962850004 -0.0336188897 0.0200818777 -0.0712372959 0.0486632138 -0.0899597034 0.0786281079 0.0739620626 0.000252395868 -0.123112433 0.0357136726 -0.123496763 -0.10789144 0.0340666659 0.0580845289 0.129886597 0.0481477603 -0.0664139464 -0.068032667 0.0435736328 0.120462291 0.119476132 -0.123401277 -0.0684902221 0.0760027915 -0.00998137705 0.0795442387 0.0158706605 0.0744519681 -0.0714682937 0.0881680399 0.0666808859 -0.0570629239 0.164650321 0.12301676 0.0870940611 0.0813848004 -0.026452858 -0.0121077476 0.129477188 -0.142631516 0.0968390107 -0.0411611088 0.0438095704 -0.0174164046 0.143360704 -0.0702998862 -0.00564636895 -0.129426509 0.129657581 0.00313778641 -0.0574785173 0.130360812 0.0483710952 -0.119206332 0.0171343237 0.0760408044 0.0280918181 -0.0732154027 0.110174745 0.0896861851 0.00575533276 0.101032197 -0.0848289058 0.0622318983 -0.0934585631 0.13217181 -0.145480588 -0.151452094 -0.193723321 -0.0797042996 0.0565548502 -0.103730097 -0.0197361708 0.0303085633 -0.077303797 -0.0265100189 0.0391752571 -0.0126148164 0.0999564305 0.0248864293 0.120508894 -0.133522972 -0.0817446709 0.168698058 -0.0242357664 0.125314549 0.138810694 0.0583302379 0.117092818 -0.00391758466 0.132906526 0.00617161999 0.0204244088 0.0222095568 -0.0337266289 -0.120259523 -0.0814958364 0.0759975687 -0.127410248 0.107059687 0.0443736836 -0.111485079 -0.0593480803 0.0116199553 -0.0279748887 -0.114313811 0.138794228 0.0156804174 0.0343504995 -0.0126132518 -0.0769002363 0.0819693729 -0.0879027769 0.129266858 -0.089684993 -0.0613807291 -0.141623229 -0.0882831067 -0.00330040953 -0.0679773539 0.0915297493 -0.137266099 -0.130465984 -0.0945152789 0.00593703426 -0.116638407 -0.0219550729 0.156748876 0.180117995 -0.0130009502 0.0716223866 -0.144065097 -0.0823530853 -0.018947104 0.0158558208 0.0430184379 0.154572129 -0.0570289902 0.0835271254 0.117485747 -0.0653187782 -0.0945697576 0.0832378045 0.128256038 -0.074221611 -0.0555155501 -0.0284128729 -0.0774558261 0.0875912234 -0.000803266244 0.0153772263 0.105136067 -0.156031758 -0.0866245553 0.0865259767 0.013942048 0.0376075655 0.0092022717 0.137353256 0.0442829132 0.0658835545 0.12299668 0.0346786119 0.100729421 0.0517841168 -0.0898612216 0.0358949974 -0.0126487454 -0.150891528 0.118704185 0.165351018 -0.0962797925 0.14023003 0.103115313 -0.0602911599 0.110436209 0.0308854431 0.121604525 0.127166167 -0.0259905457 0.128405809 0.0925664902 0.120217182 -0.110264599 0.0578555204 0.0608986728 -0.0673596784 0.0434045941 0.0300820656 0.00743610319 -0.0426555723 -0.0631315857 -0.102573559 0.159559608 -0.0250314325 0.151789397 0.0174076445 0.0587394796 0.0256275982 0.0982463285 0.113202661 -0.0388134755 -0.175611705 0.00276806951 0.100146458 -0.0734548494 0.0292181689 0.0794109702 0.163403466 -0.0469043329 0.0134403957 0.010670647 -0.0128954323 -0.123306222 -0.105166018 0.0782779232 -0.0463915803 0.0925515667 -0.153734311 0.0399706028 0.0324561 -0.136016384 0.121838123 -0.0339740776 0.0617321283 0.0716827065 0.0507700294 0.163305402 -0.0263362825 -0.0235168263 0.156056419 -0.0918754488 0.0799345896 -0.0889241397 0.132214025 -0.0339573547 -0.0231025834 -0.0893127769 -0.15716745 0.0421844684 0.164029196 -0.0144499643 -0.0524246357 0.0808350593 -0.0515551828 0.12877433 0.123672612 -0.00602019066 0.108041525 0.0685625225 -0.107800402 -0.106587365 -0.0706622899 0.079738766 0.00206816196 0.111752108 0.164152429 -0.140735298 0.0629734248 -0.139009103 0.147802591 -0.0249149017 0.0117471283 -0.0343583301 0.100054584 0.0977551788 0.0404288657 0.126847446 0.172036812 -0.0862832591 0.0989949033 -0.0108139813 -0.109132327 -0.065063715 -0.05474668 -0.0446046479 0.0215636939 0.0535516292 0.0117583256 -0.0911338031 0.08406578 -0.0239859279 -0.0490008146 -0.0228198916 -0.0266752448 -0.0871018842 -0.143308818 -0.000947127643 0.0332983695 0.111087874 -0.112454593 0.166085541 0.138097584 -0.11190179 0.0368833952 0.0595792085 -0.08562731 0.159958228 -0.0567510165 -0.139039397 0.142708972 0.0837232322 0.0328567959 -0.137172282 0.0749840587 0.0762316734 -0.121910147 -0.0499757975 -0.0399817154 0.0123820901 0.0685181022 -0.0794972554 0.130480066 0.0485983491 0.138348088 0.115582928 -0.139361694 -0.124933064 -0.116998814 0.0428138971 -0.119468078 0.0110614747 -0.0100474358 -0.0216016769 0.0101694763 -0.0820546895 -0.0777691826 -0.117070802 0.126368955 -0.134564951 -0.0302673057 0.110644177 -0.117660195 -0.00458803773 -0.134814233 -0.0293745026 -0.0682334229 -0.095479995 0.0648405999 -0.0339591131 -0.0932905525 -0.123183005 -0.0931407288 -0.134305537 -0.00308911502 -0.0195324719 0.131572172 0.0200927258 -0.0235931352 0.0316516161 -0.0269541889 -0.0461172685 0.0184912682 0.0468815118 -0.138456166 -0.0628102869 -0.116835825 -0.0204005614 -0.0558201149 -0.0682484955 -0.0157806873 0.0899439305 -0.100633815 0.0933804959 -0.0987777337 0.0174719673 -0.0371658802 -0.0836207345 -0.145877808 -0.0866134912 0.113749318 -0.0686557889 -0.10567008 -0.0482730195 -0.0070918831 0.114946 -0.134217158 -0.118984662 0.158714101 0.108982846 0.0504443385 -0.141264856 0.0717000142 0.104913875 0.138390988 0.0620018132 0.116844647 -0.0917929709 0.00455025444 -0.0148221394 0.090433605 -0.140797302 -0.127093479 -0.139803812 -0.0659283474 0.0552696697 0.0945735574 -0.097406134 0.0754362494 0.068106018 0.124531001 -0.0909646004 -0.102133028 -0.115647264 0.0507812947 -0.0703440532 0.124720164 0.138866737 -0.100535631 0.0667068362 0.00467189308 0.116699241 0.00265486818 -0.0454894938 -0.0559370294 -0.13688907 -0.0905043259 0.0473077707 0.00356861111 0.119102031 -0.00279730256 -0.0290972441 -0.0549311638 0.0690060258 -0.108683005 0.038172666 -0.0376584455 0.0284418333 -0.0453239642 -0.0188913643 0.10330762 0.0158650316 0.0302619878 0.10599114 -0.0192918926 -0.00512768747 -0.067353636 -0.0088639101 -0.0262610465 -0.00164695387 -0.0329788141 -0.0782948136 0.0144974366 -0.102364071 -0.0231709629 -0.053929802 0.0872491896 -0.0339207873 -0.0452650562 -0.0555231161 -0.106613263 0.0545223877 -0.104694769 0.0733768344 0.117440388 0.0850411355 0.127571523 -0.0451369807 0.127713785 0.0604759753 -0.116016053 0.120935522 -0.0490187742 0.0145258456 0.12456093 0.0472011603 0.0200307388 0.213931099 -0.0325213335 0.100024901 0.150687665 -0.0330869481 -0.0743881986 -0.0423107482 0.0197958369 0.144206196 -0.075991191 0.00704598008 0.169469431 -0.0728433281 0.00913137291 0.00888578407 -0.0482634306 -0.0623912066 -0.0938192755 0.0339379199 0.119007394 -0.0949489996 -0.114306375 0.0466704629 0.01765888 0.12498638 0.0581458732 0.0168336164 0.109576389 -0.0280811246 -0.0985747352 0.0206970666 -0.0964695066 -0.0664414242 0.110378399 0.139610469 -0.12254858 0.005280599 0.00761935115 -0.128316179 -0.0374653786 0.0578353852 0.127879024 0.0134341568 -0.0500954539 -0.00337731675 -0.119695731 -0.11888539 -0.00329445861 0.062248569 0.123474449 -0.0654902309 -0.0125339935 -0.0127297472 0.106315874 -0.117941201 0.101428568 -0.0125857871 -0.0383322537 0.0596774332 -0.13507621 -0.0973153785 -0.126249328 -0.0674846619 -0.0139419707 0.120396413 0.105357513 0.0358138867 0.154476896 0.111231543 -0.171673253 -0.0643624365 -0.0841967613 0.0120489495 -0.110221028 -0.0037632389 -0.0754774585 0.10185004 0.094084166 -0.123280331 -0.0452301428 -0.119065173 0.00748275174 0.0470008291 -0.0596443266 0.110938832 0.078253679 0.0948193073 -0.00295168161 -0.0287760254 0.158084825 0.0492092073 0.0768529922 0.0546108335 -0.0640769675 -0.0777091384 0.102904022 -0.0749624521 0.129996344 -0.050066106 0.140702903 0.135439858 0.0565205291 0.039585311 0.131971121 -0.0433706567 -0.00613029394 -0.0415827632 -0.0896254405 -0.0250180494 0.0855293274 0.0162106231 0.0830451697 0.0412258208 0.109798126 0.0821261331 0.119376883 0.123270549 -0.109921172 -0.069528237 -0.128147811 0.00981930271 -0.0908927023 0.162675932 -0.115118623 0.0430796407 -0.0360173024 -0.142209709 -0.0679464936 0.125360489 0.0772604644 0.159708619 0.136423364 -0.113330379 -0.0995218381 0.0548840612 -0.0670673028 -0.0882866234 0.0123610795 -0.0497058704 -0.0615072772 -0.0382896513 -0.114018604 0.139805213 0.139648527 -0.0281077586 0.0916863829 -0.00357940956 0.119387031 -0.038923528 0.099045448 -0.056032382 -0.132692307 0.0513692684 -0.0116330124 0.0760553926 -0.0958063975 0.133466452 0.0711750537 -0.0614313632 -0.0412404239 0.0541786365 0.063894175 0.0131827295 0.0888437405 0.0340017416 0.00108662061 0.102465764 0.172270909 -0.110579006 0.0245050453 -0.113299966 0.0307509303 -0.00821371656 -0.0823507458 0.12988846 0.0906711072 0.0899582058 -0.0237735175 0.0522135533 -0.0568510592 -0.0833672881 0.0160184987 0.0457749926 -0.157355502 -0.137885511 0.0371456817 0.140265986 -0.0514193922 -0.0789706931 -0.0262926575 0.031272471 0.0584937558 0.088243857 0.0618854538 0.105974808 -tensor_16weight 2500 --0.00417222502 0.0333769061 -0.068073824 -0.00174406881 0.0395693518 0.163498551 0.085880965 -0.0441546589 0.0277523138 -0.0394380651 -0.108085141 0.0367854051 -0.088741377 0.110496983 0.137496606 -0.0574882254 -0.0685930923 -0.118896537 -0.0459423698 0.128555194 -0.00706961751 -0.109601662 -0.0728417113 0.0407270938 0.116901517 0.037543118 -0.0457625464 0.105285026 -0.0170973707 0.0741245896 0.0285330229 0.0602779202 -0.0999675766 0.0917604342 0.00275715417 0.0221613981 0.0221239924 0.0220769197 0.0309871975 0.141381815 0.102749333 0.0140401116 -0.160013482 0.0494507849 -0.109416723 0.0506694168 -0.0603474639 0.122836456 0.102406837 0.102601565 0.0129987504 0.0918339491 -0.0100188255 0.0104814339 -0.0403004438 0.0710072964 0.139566272 -0.0848197117 -0.065366447 0.0172738302 0.0708059147 0.0956877321 -0.101862162 0.0278298706 0.119774833 0.109509952 -0.0190650206 -0.0558079928 -0.0829644129 -0.0505085252 0.0924009234 -0.0197391063 0.120741382 0.109388441 -0.155889794 0.0402384102 0.0444546603 -0.0963738933 -0.0165153071 0.0370764881 0.152952656 0.173049316 -0.104738578 0.153395116 0.0572723225 -0.0417025536 -0.0367592871 -0.113802627 0.127408341 -0.0777374357 0.0151805067 -0.0934357792 -0.0254224017 -0.0842629671 0.102764659 0.0851573944 0.119558379 -0.0993547663 -0.0726170391 -0.0864863023 0.0344553739 -0.0371179581 0.014597863 -0.101272777 0.0389231592 0.0477042645 -0.135611862 -0.0279283747 -0.0736898407 -0.00949966908 0.0327468514 0.011397168 0.0977702439 0.0774219632 -0.0666735023 -0.120168492 0.0698120147 0.0764941722 0.00528292358 0.111364022 0.0231147856 -0.0114662051 -0.0972312316 -0.046190843 -0.0422428921 0.0343527198 -0.0671815872 -0.0542576611 0.0252622664 -0.133999184 -0.036504671 -0.10330338 0.0781079531 -0.0724790767 -0.0193377137 -0.106740713 0.119743183 -0.131887868 -0.00206248462 -0.0915471017 -0.0368748158 0.00305576622 0.101493865 0.126230076 -0.0354850665 0.0502597541 -0.00101481378 0.066304937 -0.0897568762 -0.0490704626 -0.104472384 -0.0435468704 -0.083782576 0.00601604581 0.113205031 -0.0322454944 0.0402761698 -0.0695916861 -0.139957666 -0.0615144074 -0.0179994181 -0.0612879917 -0.0798030049 0.105225846 -0.0033620894 0.0714047551 0.0813372284 -0.0406201556 0.0420547277 -0.0629295483 0.128933236 -0.0379166752 0.0688687265 -0.0884593129 -0.12569578 -0.0999955758 0.0851953328 -0.105392635 0.0814247131 -0.123030953 0.0994129926 -0.0500591323 -0.109111317 -0.13911283 -0.0404232666 0.0469972044 -0.135043383 0.0236423463 0.00703085959 -0.0505910367 -0.0494552478 -0.135939568 -0.125193208 0.0693392456 0.128731236 -0.0279947668 -0.00343213975 -0.0427332819 0.137931898 -0.120300733 0.11937128 -0.0187009424 -0.0802288204 -0.13997142 -0.0681145191 -0.123841763 0.12888445 0.0320565253 -0.0545828864 0.0154529363 -0.109894589 0.127666876 0.0496545732 -0.0170144811 0.117039517 0.111107305 -0.140118808 -0.00819459558 -0.0746041089 -0.104405858 0.0340925604 -0.0303836614 0.115028903 0.082659781 0.0525604337 -0.0737821385 -0.0581655875 0.0645884275 -0.121426933 0.0243149996 0.104953259 0.11160703 -0.134071976 -0.0780070424 -0.0556476638 0.0689132363 -0.0958281457 0.0529917628 -0.00589548051 -0.0485527515 -0.133214802 -0.0193034932 -0.0625650287 0.0658643395 -0.115375213 0.0658562183 -0.126765266 0.0663554519 0.105942756 0.0113557875 0.00450533349 -0.0318827182 -0.0382149518 -0.0586391389 0.16599071 -0.0995347276 0.0626212955 -0.0848238021 0.0312059093 -0.0671846345 0.114710093 0.111972772 -0.109937578 -0.033448413 -0.00338487327 -0.137714684 0.061901439 0.0263031721 0.0757300183 -0.0868174583 -0.115281321 -0.0186160952 0.022743687 -0.0695182681 -0.0389948115 -0.00921310484 0.146889284 0.121259861 0.123389371 0.0740807503 -0.0913022682 0.0926736519 -0.0748549104 0.0120129362 -0.0260141995 0.160688117 0.103861287 -0.0145337479 -0.0333565325 0.106497906 0.12470568 -0.0427022539 -0.0200853404 0.125489667 0.117662869 -0.00191673823 0.0511538982 -0.0245015733 -0.0231342129 -0.0423432663 -0.067036055 0.0543258078 0.0126409382 -0.13212578 0.0719170049 0.0198016949 0.103088938 0.0337647051 0.118756339 -0.106006429 0.108546667 -0.161683097 -0.0532578528 -0.0416240096 -0.0414204299 0.137367234 0.113782153 -0.00119005144 -0.124352522 -0.0775565729 -0.0454200171 0.110256732 0.138212636 0.139873043 0.0665704682 -0.0985324904 -0.132870167 0.025891345 0.102456145 0.131423756 -0.108975701 -0.0261260234 0.0348203629 -0.0914004669 -0.130643874 0.064868167 -0.073974885 0.102802224 0.176195145 0.0179768056 0.0108742332 -0.0181707554 -0.0102240648 0.00463731587 0.0538709089 0.00627785875 -0.117067128 -0.087891832 0.004855379 -0.016739469 -0.0725544542 0.0821630657 -0.0417728201 0.113972411 -0.0352853052 0.076194793 0.106641911 0.101013884 -0.0971745029 0.0243909303 0.029293431 0.0236056633 -0.0190717317 -0.12938638 -0.101987876 -0.0398474075 -0.0397941768 -0.0599071085 -0.0816488713 0.131114334 -0.072889857 -0.128262654 -0.048984535 0.0297204573 -0.071578376 0.0270867199 -0.0633577183 0.13518168 0.0259044431 0.0611634552 -0.0133587159 -0.00845749862 0.0748591572 -0.0559645891 0.0772884116 -0.0892494842 -0.124270409 0.0640116185 0.0149187753 0.112770371 -0.0819433481 -0.158321261 0.146862835 0.05744645 -0.136843622 -0.0534451306 -0.057807114 0.112765148 -0.121185474 0.0865707248 0.0242433939 0.00829058886 0.00682345033 -0.0391817167 0.118281983 0.0936698243 -0.0126834186 -0.0224663615 0.0807867125 0.0614369959 -0.0208024122 -0.0155477682 0.0223713629 0.133279428 0.0341431238 -0.0801266879 -0.0473638549 -0.0159126073 0.0619758293 0.104336567 0.0777632669 0.0014840191 -0.0106922537 -0.089225471 0.139960572 0.060204789 0.0285251942 0.00940582156 0.124438897 0.0746723562 -0.0786366537 -0.140947089 -0.0317693353 0.0952398479 0.0504078493 -0.101314582 0.00230675936 0.028885033 0.0213491581 -0.138435528 -0.0281476919 0.108943664 0.00568072731 -0.021763064 0.115609825 -0.0683022514 0.0327104814 0.0758552849 0.11111246 -0.0849706307 -0.0847819373 -0.000146973485 0.0806944817 0.0748187006 0.00227210205 0.0243628193 -0.0767735019 -0.0275015086 0.0700375587 0.0457462482 0.0988681763 -0.033178322 -0.0504397713 0.0441949666 0.0874261707 -0.192612246 -0.0348819122 0.0727340132 -0.0881135315 0.0686711743 -0.0988578126 0.0592095368 -0.0447203517 0.0680712909 0.132170856 -0.0182013065 -0.0532437004 -0.0237953365 -0.0845318213 0.0437362194 0.0721768141 -0.0437071882 -0.0137181133 0.117270313 0.0595132113 0.174431637 0.0367056727 0.16878359 -0.0222882591 0.0592969358 0.0360841192 -0.0704026446 -0.0345776901 -0.0942229927 -0.165675908 -0.0193019863 -0.000750561245 -0.0503743216 -0.0973385572 0.0120092537 -0.168930963 0.162473217 -0.114793286 -0.0891378894 -0.0980757028 0.100445837 -0.0628618151 0.046582222 0.0680130422 -0.110214941 0.127800643 -0.0861949921 -0.0599781014 0.0208107978 0.0531936698 -0.00385034014 -0.0491617396 -0.0419875681 -0.00234212819 -0.044506561 -0.0653151795 0.0169184674 0.0848835111 0.144753113 -0.0450268537 0.0625728816 -0.022600282 0.0092583932 -0.0980294049 0.0949492604 -0.108607598 0.0273391213 -0.0572451502 -0.0307708569 -0.0600267388 0.0144662457 -0.122474261 0.0097047314 0.00327231945 -0.0497302189 0.143207729 0.110742435 0.116505228 -0.0606399626 0.130554333 -0.0482001454 0.146989092 0.0189287849 0.074033089 0.0732528344 0.0350786448 0.0295372307 0.131620392 0.0493342653 0.0956929848 0.0703327283 0.0766540915 -0.0670498535 0.105273128 -0.05142162 0.0384206101 0.0971984193 -0.116058186 -0.120040804 -0.0705016181 -0.10306605 0.100996941 0.104779199 -0.0833229199 0.0132066812 -0.131049663 -0.0160818845 -0.118435718 -0.0491212681 0.0483348295 0.0081577599 -0.0334057733 0.0873719454 -0.0852230042 0.1210372 -0.0395233259 0.0784760267 -0.0274248198 0.099436149 -0.081705071 -0.015222121 0.110525087 -0.0580340363 -0.0512353852 0.0699884966 -0.134925202 -0.00844763592 -0.0294794999 0.0049456358 0.0870961398 0.112957731 -0.12726216 -0.0223451219 0.053539414 0.0455328077 -0.0087382691 -0.0701403543 0.0741796196 -0.122097038 0.0159640387 -0.0312917195 0.10192579 0.0948835909 0.0600303523 -0.116103448 0.0967111215 -0.186777875 0.0394800454 0.0645573735 -0.00990641117 0.0682907104 -0.0206053555 -0.0376345441 -0.0776938945 -0.0846702084 -0.0824906975 0.020499425 -0.0137960762 0.180186674 0.0354575664 0.0700841099 0.0222972054 0.0324955657 0.130303159 -0.0262751058 -0.184336275 0.0281189717 -0.138265505 -0.0036136366 -0.0606828108 -0.013456936 0.112634584 0.105258964 0.176689893 -0.169644877 -0.0470958985 0.132185519 -0.175565004 0.0531695932 0.0104851555 0.0826452076 -0.145328104 -0.0922671333 -0.164871365 0.142606691 -0.0212450475 -0.0627536103 0.120814189 -0.0105575472 0.0765462518 -0.0688535571 0.0809823424 0.00905969739 -0.0526664332 0.0274341255 -0.065106079 0.106400639 0.0910326689 0.0318324715 0.00195610523 -0.0523022339 -0.0163692534 0.0768652707 -0.0259682089 -0.0942348465 -0.126349121 -0.122505806 0.0306945741 -0.113110162 0.0605172664 0.0169282854 -0.0441042334 -0.139085665 0.0421898365 0.0137649477 -0.0915340632 0.0716675818 -0.0818142742 0.0774355978 0.132591441 -0.0364979431 -0.112088569 0.025617823 0.0622905344 0.137828872 -0.128434256 -0.0762574747 0.039005056 0.0113735795 0.0853985548 -0.110001206 -0.122525297 -0.118933201 -0.0976378173 -0.00780165195 0.0592657812 0.127340347 -0.116591275 -0.00909214467 0.0285629407 0.137352273 0.0194081701 0.0885308981 0.157478809 0.00182641763 -0.111265883 -0.0360108428 0.0904895365 0.0231916904 -0.134599373 0.123875104 -0.0176251531 0.00123633444 0.118751198 0.0944036767 0.00726477336 -0.00553962262 0.0213748366 0.0605549626 -0.0671228841 -0.11890097 0.0446490161 -0.124758892 0.0125705721 -0.0418758988 0.0201296303 -0.0815314054 -0.0722059831 -0.0872977003 0.17252858 0.0650849119 0.105454117 -0.0630111396 0.0785340741 -0.0949152634 0.120920762 -0.00637194049 -0.122229263 -0.04851266 0.0649544969 -0.0367785469 0.0145572387 0.0296253487 -0.104165144 -0.0307542253 0.0980055779 -0.0429146662 0.0307714939 0.0965365469 0.0424363613 -0.0620633438 0.037256062 0.0731356591 0.0377854705 0.126446977 -0.0181031153 -0.10487172 -0.012096405 0.0115436465 0.00484970212 0.0365195423 -0.0648169369 0.0427874923 -0.0382243469 -0.131412312 0.0129759014 0.014330104 0.104675427 -0.0191158354 0.110161588 -0.0690229386 -0.134754956 0.0209327489 -0.137074128 0.101832643 -0.0448981151 0.0525557846 -0.0926448479 -0.0648545772 -0.118553191 -0.0258715078 0.0647533536 0.0853616297 0.0266875774 0.0280805677 -0.133945882 -0.131097019 0.0535267889 -0.0883116797 0.0746518523 0.0716847479 0.0854772329 -0.0682440624 -0.0383327305 -0.0687909126 0.0661910623 -0.0651709512 0.0676450729 0.042929098 0.0129889995 -0.0627579913 0.026080206 -0.0745798126 -0.110127226 0.0564449728 -0.0521587133 -0.0703866705 0.0996105373 0.0995502174 -0.0150131434 -0.093635723 -0.0350378379 -0.0970593914 0.122739777 -0.0746073425 0.00775636733 -0.0259913579 -0.0691226423 -0.0844953358 0.0796677917 -0.0647141263 -0.126704842 0.0870190561 0.110861555 -0.0944047272 -0.00367192924 0.0355183631 0.118708417 -0.0181086287 0.10595347 -0.00600086153 -0.00448402762 0.0546585321 0.0393356681 0.140889272 0.0358275473 -0.0797418952 0.0966726542 0.102656618 -0.0845542178 0.000538542867 -0.138633057 -0.103164904 -0.0017221421 -0.0267990902 -0.0042184745 0.0618429407 0.0372477919 -0.0600601025 -0.120651938 0.018602442 0.0741632134 -0.0556408912 -0.0401281454 0.0216008872 0.0986668468 -0.0385047868 -0.00614350522 0.0835534334 0.0581347793 0.0706503093 0.0540118366 -0.0558184311 -0.163868651 0.00549533684 -0.0525678769 0.0940028876 -0.0198096931 0.0326584801 0.0405591354 -0.0935613215 -0.0739186257 -0.032372281 0.131545618 0.076330319 0.0784656629 0.096952945 -0.0555465668 0.127968788 0.0291817039 -0.161188528 -0.0909612328 0.0471343175 -0.050026428 0.0184959229 -0.139395848 0.0397638716 -0.18348141 0.112185314 -0.0655662641 -0.0470221415 -0.164569005 -0.121025704 -0.0150856273 -0.00368866767 -0.0741992891 0.059470281 -0.0445875078 0.0394076109 -0.129359409 0.0192312244 -0.0108757932 0.0112697631 -0.146348611 0.159908772 0.0195346791 0.00573998271 0.106009968 0.0103269666 0.0211634543 -0.0780764073 -0.115655147 0.102710932 -0.0541914441 0.0466746576 -0.106848881 0.10031607 -0.134285003 -0.0872690454 -0.135360599 -0.0295483619 -0.105432019 -0.0230201259 0.0187110342 -0.10428597 0.184378833 -0.0646381974 0.0479477942 0.0756504536 0.101597495 0.10669633 0.0338435024 0.0337592065 -0.136492506 -0.0870145112 0.0574491024 0.0298079327 0.0783730522 -0.0157561749 0.0467197858 0.103480637 0.0719107315 -0.10105852 0.0388642065 -0.0440139845 -0.0164328683 -0.0481204912 0.14119412 -0.136799589 -0.0650587231 0.00970490556 0.0506416559 0.0616328567 -0.00483906409 0.00724408031 0.165032029 -0.124474898 -0.149660811 -0.0778745487 -0.127463415 -0.0629397258 0.0866350383 -0.135488585 -0.120086707 -0.0116316313 -0.0172481909 -0.033788152 -0.00575722754 0.0619941019 0.109511442 -0.1259799 -0.0296196118 0.0126680005 -0.00631114235 0.0279259682 -0.000546666677 0.046820391 -0.0707993954 0.0746022463 -0.0110969217 -0.106299125 0.0408700407 0.0607301854 0.0142738195 -0.117608964 0.125323534 -0.0797425956 -0.147771716 -0.0951133072 -0.0761706829 -0.0271511003 -0.0952035114 0.149354368 0.0850981027 0.12360011 -0.0736458525 0.0565205403 0.0375516564 -0.0330046788 0.0698090419 -0.0682012588 -0.108341932 -0.0916303098 0.0491649024 0.128733024 -0.0175282191 -0.0851026475 0.121217623 0.0644281209 -9.09119844e-05 -0.00583170354 0.0807056576 -0.0556110144 -0.030019151 -0.151016012 -0.0976619869 -0.0704679191 -0.140273213 -0.131401047 -0.0380841792 0.105461046 0.115149468 0.0569063798 -0.0500233844 -0.120028786 -0.0609620214 0.0207634512 0.121349677 0.0845320895 -0.00625681877 0.130006418 0.10405767 -0.0260085575 0.00623696856 -0.00945841614 0.100506075 0.0220730081 -0.125443459 0.0532133728 -0.120297104 -0.11440815 -0.0461563803 -0.0888359398 0.013649012 -0.00815679412 -0.070658952 -0.00767463259 -0.120047957 0.120935254 0.0590654165 -0.0607035644 0.0469796248 0.10308367 0.17886214 -0.0048859301 -0.0231490359 -0.127134889 -0.0811686739 -0.0198651105 0.0723841488 -0.00580265373 0.0251238793 0.0345673561 -0.150929868 0.0692640245 -0.0050682174 -0.0870390087 0.0347174555 0.118515827 0.0921180844 0.0585382432 0.102678254 0.0418618806 -0.0638227612 0.0212153941 -0.048158478 0.067370899 0.134428993 -0.0895267203 -0.137082666 0.0383201912 0.0807136148 0.0119578699 -0.00565120764 0.051708404 -0.0704574063 -0.0806446597 -0.0455211401 0.128211096 -0.152886659 -0.126107663 -0.174904436 0.170342699 0.0492694043 -0.016915286 0.0414748713 -0.0318201743 0.103975341 -0.0692306831 -0.0701901168 -0.136825696 -0.104924172 0.0634303093 -0.103916064 -0.106038429 -0.0103737917 -0.102413662 -0.0131952306 0.0928339362 0.0422227457 0.0485036634 0.100036606 -0.0334572345 0.0251479615 -0.0170369614 0.105127081 0.0789836645 -0.125357226 -0.0810341984 -0.119952716 -0.087011233 -0.136233181 -0.145835862 -0.139367864 0.0879707336 0.118222609 0.0654330924 -0.00252592564 0.0332888886 0.0847103074 -0.147119001 0.13717629 -0.0333771855 -0.0392536968 0.140056893 0.161733225 0.170547694 -0.13669911 -0.126283079 0.136217371 0.0222201925 0.100318342 0.0159461573 -0.151088864 -0.0795981139 0.0589573346 -0.00727232778 -0.0690927505 -0.00238380092 0.00583820418 -0.0911569446 -0.131048679 -0.117706373 0.0648147985 0.00487408834 0.117360242 0.170240089 0.118482806 0.0110958666 -0.0510480367 -0.0351133123 0.059696164 0.0868533999 -0.02283502 0.157529563 0.0911459476 -0.130911916 -0.0573937744 -0.116225123 0.0283462927 -0.0848590583 -0.00989816617 -0.0507650711 0.00583241554 -0.00466799736 -0.0543093197 0.123912387 -0.0726056844 -0.0514792576 0.178007007 -0.0574177206 0.0999287218 0.08989916 -0.0277903583 -0.144622609 0.0931683257 0.140252993 0.0123335114 -0.119432166 0.001549048 -0.00325248647 0.096072562 0.162840962 0.0217829365 0.122066244 -0.0385645702 -0.026368469 0.0513184667 -0.0859575272 -0.0242716596 0.00956724584 0.0109226704 0.0818789154 0.0224322379 0.135026619 -0.04657121 -0.104234524 0.0428646505 0.0151414573 -0.0313612148 -0.00296355784 -0.0544822216 0.129524395 0.0263857096 0.0820539892 0.0624815822 -0.0385384262 0.111337319 -0.0279020891 0.0234304219 -0.138835654 0.0737862438 0.0558309704 0.106782034 0.0583992153 -0.0247403383 -0.128191724 0.0889156908 -0.0910850763 0.0517035276 -0.0938775688 -0.0293845311 -0.120674253 -0.0451591834 0.0491400808 0.0255339593 -0.109320991 -0.0378039181 -0.0312750563 0.0803458393 -0.139691994 -0.00930851698 -0.0976287797 -0.114196926 -0.133310482 0.141642928 0.105140746 0.119025633 0.0663658231 0.0742790997 -0.0807274282 -0.049172353 0.0325890929 0.168383315 0.0958275571 0.0830662847 0.13090989 0.11515788 0.0930101275 -0.135337189 -0.01777054 -0.0463960879 0.121989891 -0.0663873479 0.0556551777 -0.0777027011 0.0780093744 0.0564488322 0.0328528732 0.0221739244 0.0690468401 0.165196538 -0.0979238898 0.0833290517 -0.0583337545 0.171756044 0.0485463925 -0.0788506195 -0.133651629 -0.0622514226 0.146948621 0.00967819989 -0.10787762 -0.0629694313 -0.0672841221 0.0454268195 0.153374791 0.0254830644 -0.107513458 -0.0417494588 -0.128024951 0.0972493291 0.0891800448 0.0753429011 0.0632758364 -0.0226834938 0.0985851139 -0.0661747381 -0.0974865481 -0.116021931 -0.0705863535 0.0088367667 0.11096172 0.00588925183 0.0319114141 -0.0778467134 -0.0426055863 0.0243966766 0.00911161304 -0.125340477 0.0958841443 0.0809076428 -0.0837767944 -0.0955800563 0.0739903226 -0.0197054464 0.0984792486 0.0248065591 0.013472463 0.0424549654 -0.115080677 0.0406080261 -0.0103811678 -0.00163237448 0.124697939 -0.0250684526 -0.109149016 -0.0914341062 0.0849946067 0.114163592 -0.114370003 -0.0137929916 0.0836040005 0.113473631 -0.0115816081 -0.00375672383 0.145472392 0.115759097 -0.0200298876 -0.0216319505 0.0506976917 0.146452606 0.00109191891 0.0250434522 0.125239 -0.070178017 0.112422191 0.128775164 0.0972625315 -0.0562150516 0.0652283952 0.0582313985 -0.0210614875 0.0146073569 -0.0898632482 0.00922326744 0.143344715 -0.154285237 0.120073751 -0.0218681768 -0.0313799977 -0.0713483468 -0.11174649 -0.0150706414 -0.0665470064 0.0984330028 -0.0607178248 0.00206361711 0.144247591 0.114495434 -0.0467930511 0.0812650472 0.126196146 0.155254051 -0.117046624 -0.00681339065 0.110044703 -0.130152121 0.0844703615 -0.103106052 0.07677605 0.136585757 -0.00327930111 -0.0542289279 -0.0169889219 -0.00491619762 0.00191336707 0.123525828 -0.0764942095 0.062074706 -0.118892998 -0.152141303 0.128834948 -0.00888511073 -0.128381923 0.166257232 -0.118136637 0.104106829 -0.00407020096 -0.0890831947 0.0344172269 -0.0881365165 0.00957617164 -0.0420764349 -0.11916101 0.0259462018 -0.12878786 -0.11673443 0.0500368178 0.123366237 -0.118328013 -0.0965624005 -0.124413118 -0.0506703407 0.10639628 0.118844062 0.136438951 -0.096905753 0.0549481958 -0.0769049451 -0.0224119276 0.0397996418 0.0951755494 0.113283962 -0.0366398245 0.106758043 -0.153018385 0.116750017 0.0242051464 0.0555575825 -0.144076809 -0.123421922 -0.0913296789 -0.0159076471 0.0744441524 0.129337355 0.0706739873 0.0101508312 0.00735373795 0.127268285 -0.108703181 0.0155255729 -0.0134423403 -0.0967226699 0.0832142085 -0.174966842 0.130565166 0.0653796941 0.0878034979 -0.0906267166 0.0458128788 -0.00101685664 0.0771127343 0.0380266793 0.0676126033 0.119121142 -0.113756225 -0.044930473 -0.0590678304 -0.114111833 0.0717334226 0.121548906 0.141148821 0.145141944 -0.0386701077 0.0908232033 0.0557151176 0.0785376281 0.0665990561 -0.026476074 0.0890998095 -0.0765963942 0.00731649436 -0.0525127202 0.0552615821 0.0508936346 -0.0130886035 -0.136101693 0.0808141679 -0.0865692198 -0.0861371309 0.120797709 0.14255853 -0.0736281499 -0.110956788 -0.0413296558 0.132970884 -0.128850982 -0.0727135316 -0.0477897227 0.0840527937 -0.0905833915 -0.115051724 -0.0579792745 0.111140922 0.10985522 -0.0506750494 -0.0162282735 0.0716301352 0.0544709153 0.12016838 -0.0960830227 0.0585822612 0.0589075349 0.0988149717 0.0618340187 -0.0847306624 -0.0631039515 0.0553748012 -0.111075364 0.0433333553 -0.0188259855 -0.112079769 0.0795213431 -0.0475602113 0.13167055 0.022298038 0.134109125 0.0383149795 -0.0405841693 -0.120256543 -0.104634985 -0.0977489278 -0.020790996 0.105706528 0.133288711 -0.00679467805 0.107927233 0.132649049 0.184538066 0.0214475244 -0.0248829424 0.0847060084 0.059696611 0.0894906074 -0.0552571602 0.110768363 -0.0777745917 0.0256617665 -0.0267863441 -0.011260841 0.167986751 -0.0874950364 0.191600099 0.145262077 -0.0928313509 0.0274879988 -0.0514709651 0.09432742 0.120864995 0.0269548818 0.00873297453 0.0555894822 0.0474308766 0.10841053 0.0247467272 -0.0354869291 0.0167415757 0.130794838 0.130202994 -0.105830118 -0.0619872436 0.118844941 0.0582736246 -0.0529267974 -0.122846067 -0.00995433331 -0.0649164692 0.0907461941 -0.0287418552 -0.0419690758 0.0771300942 -0.0686131641 -0.137708366 -0.0504630506 -0.0929714441 -0.105465375 0.128161833 0.0110401921 -0.102053583 -0.137259245 -0.106930703 0.116477638 0.0865236223 0.0225237608 -0.10337396 -0.141124219 0.0859540254 0.124468133 0.122546621 -0.13006793 0.0637582615 -0.111786462 0.0577507317 -0.104083821 -0.0128176091 0.165550634 -0.0433670357 -0.0197649412 0.0431121029 0.0775365457 0.0529541112 -0.094568193 0.121677496 -0.0553900301 -0.111113667 0.0546647757 -0.102778159 -0.11795371 -0.0317932889 -0.0803981498 0.0593941696 0.0496674031 0.115015298 -0.0528469123 -0.139373824 0.0843660235 0.139188275 -0.00414940715 -0.0868603438 0.13721244 -0.111397579 -0.159685537 -0.082697615 -0.104306176 0.126542285 0.105239764 0.107114151 0.177761674 0.0128696105 0.0826148763 0.058955878 -0.104517892 -0.0758889616 0.09818995 -0.0577240698 -0.0954538211 -0.00908805989 -0.0742976665 0.0133352363 -0.0486188494 0.110651046 0.142552137 -0.115153261 -0.0301514734 0.063916415 0.15691933 0.0455034524 -0.099480927 -0.0727156326 0.0517707281 -0.00254492369 -0.0865125954 -0.0609851852 -0.0927319676 0.0186524875 0.0648534745 -0.106417365 -0.12526378 -0.0807953104 -0.112901188 0.0167454872 0.0380413979 0.0550592877 0.121599279 -0.110051796 0.02049114 -0.113943994 0.0120341163 0.054577291 -0.0282275155 -0.0312832892 -0.14506188 0.0298683271 0.0238241255 -0.109261803 0.156495273 -0.132307068 0.0377000831 -0.194822595 0.0180359278 0.0903957933 0.0792595521 -0.112396851 0.181946352 0.131865382 0.131673768 -0.0266070645 -0.0772341341 -0.152235314 0.00981780421 0.021913426 0.128900573 0.0193908215 -0.0176257994 -0.0472896919 -0.0430161357 0.00791153312 -0.00393258035 -0.0942965969 -0.121386364 0.0414898992 -0.108187407 -0.0423817933 0.0773015916 0.0430543721 -0.140809581 -0.0302241296 -0.0291004777 0.137758568 0.118799791 0.0682975501 -0.0604750663 0.0137153119 -0.132459 -0.127778888 -0.0843767822 -0.0346240923 0.109940037 0.0166125298 -0.0546926185 0.0908764154 0.021722734 -0.098706007 0.0907239914 0.0898113251 -0.0827805996 -0.0991119295 -0.140943363 0.0492345244 -0.0119397789 0.14030765 0.103607342 0.0925647169 0.0516325533 0.0379345268 -0.0236282125 -0.12176773 0.0705950111 -0.0243805572 0.0545013547 -0.0887916982 -0.114718959 -0.0397065133 -0.0364263579 0.0644538626 -0.0560083427 0.00641170144 -0.115581721 0.0916448981 0.165714934 -0.0800587684 -0.139642954 -0.147206143 0.14433305 0.121563576 0.124653377 0.00290819933 -0.00689320266 -0.134743109 -0.0199477952 0.025890775 -0.097605601 0.144976184 0.0240392517 0.106600702 -0.0212116838 0.141039237 0.0561088696 0.0311461743 -0.0972645953 -0.072422564 -0.00339891389 0.0796980709 0.104762718 -0.000878902618 -0.132795557 -0.0110478699 0.0181016214 0.033460509 0.16719529 0.0324967206 -0.0523595363 -0.0596223697 0.00182852231 -0.0620629825 0.0593552999 -0.0177798048 -0.134050861 0.091143012 0.0613186061 -0.0134655545 0.0728626028 0.11032062 -0.110704623 0.113462664 -0.125822559 -0.0217845738 -0.0629318058 -0.110271119 0.0102175586 0.0384269953 0.041528672 0.117570601 0.0818911791 -0.172721431 -0.0826341584 0.143604502 -0.0946950018 -0.0604351908 -0.145674467 -0.0840071887 0.124765471 -0.0483974069 0.0704860687 -0.0513613969 0.102030285 0.0974786878 -0.122555755 0.0484942682 0.120798931 0.0460326076 -0.158095554 -0.174045458 0.0302520841 0.00143720582 0.109413154 -0.089490898 0.0403371304 0.0639648885 0.0195339117 0.0851712674 0.0805694312 0.14608863 0.0028625133 -0.0460382551 0.133578837 0.0474288128 -0.00900612772 0.0885348395 -0.0367066041 -0.0723806694 0.11516311 -0.118180014 0.173740089 -0.146156281 0.0717909113 0.0275212824 -0.00858630054 -0.0898427665 0.0496289805 -0.00977645814 -0.000550225377 0.107040554 0.00615952769 -0.0177549198 0.00348424283 0.0556790046 0.00946377218 0.0532293469 0.065482147 -0.00725790858 -0.0178769901 -0.0931927189 -0.00408215867 -0.0455512553 0.0916866064 -0.0685584769 0.0127655407 0.153180107 -0.111894742 0.0765284225 0.0260704271 0.166822523 0.0185323507 0.0651063919 -0.0187719949 0.124089167 0.060685847 -0.0775963366 -0.0125377784 -0.0656027719 0.0646102652 -0.168868825 0.0715800896 0.0879634544 -0.0288382769 0.106939703 0.0860616416 -0.147091925 0.0701762438 -0.154209405 0.0168217402 -0.0449583791 0.150799006 -0.0704786852 0.0240108818 -0.0355115384 0.0358552337 -0.00473457575 -0.0508537143 -0.141223907 0.0694443733 0.13417317 -0.0782108009 -0.101270229 0.0261989981 -0.0686776191 0.0585096031 -0.0421225578 0.112916127 0.086612314 -0.0666391104 -0.0785085559 0.0723554343 0.0139737725 0.0708736479 0.104420915 0.0537221283 0.0645926893 0.0457919091 -0.0992759913 -0.0648162961 0.0873944908 0.135357454 0.0170427561 -0.111893699 0.118674949 0.0242431909 0.131864205 -0.0160110891 -0.0652568489 -0.136016116 0.109248772 -0.0484858006 0.0772848427 -0.14131242 -0.0729296431 -0.0393306315 -0.136528164 -0.0325899497 -0.138895422 0.0920817852 -0.00506219268 -0.0218565464 0.178741023 0.000478784292 0.131854966 -0.0545493215 0.0215138886 0.00403119158 0.0818745121 -0.0696112365 0.0600213483 -0.0925471112 -0.0903580412 -0.0477291904 0.107568391 0.00575149059 -0.0782493427 -0.139606044 -0.104121134 0.000853225589 -0.0665216073 0.0340940468 0.089630641 0.105736643 0.0541669875 -0.0017753446 -0.0978966951 0.0146369338 0.148612946 0.0921797678 0.136579424 0.0460715145 0.0911454633 -0.128372431 0.0109238625 -0.0441332199 -0.0767628923 0.0842499733 0.0932835937 0.0461514592 -0.0389378071 0.0933182985 -0.0622017495 -0.150734246 0.0955395252 0.0519403517 -0.0935698077 0.066866152 -0.0563694201 0.0773465633 0.130469427 -0.0873394459 -0.0453016274 0.108205438 0.113830492 0.0172546878 0.142020121 -0.154692054 -0.0341549404 -0.0286441967 0.109917641 0.0980234817 -0.0668107346 0.184589744 0.0403014049 0.0617943406 -0.0170874391 0.165723398 -0.129302576 -0.0549281389 -0.0933095962 0.0417284593 -0.0645934939 -0.110736929 0.0152139366 -0.0576914884 0.14489913 0.0873956382 0.122595996 -0.100744717 -0.0429090895 -0.0427098498 0.00572537677 0.0862406343 0.0775141418 -0.114131004 0.0538300015 -0.133984268 -0.0513852537 -0.10520786 0.0823375359 -0.154663965 0.140852332 -0.12760137 0.0852351859 0.0137166381 0.107333735 0.0452813096 -0.0948897004 0.151388273 0.0713874474 -0.128167778 0.0409513377 -0.0138035901 -0.124753937 0.0356289335 0.0602589771 -0.0651701242 -0.0210708473 0.103122458 0.0906714946 -0.00217249128 0.122800052 -0.181580707 0.116333932 -0.0186058432 -0.0243001878 -0.113380618 -0.118654899 -0.0473651513 -0.0539538004 0.0831884071 -0.146917343 0.087518312 -0.0194211677 -0.0846155882 0.113477409 -0.136310667 -0.0191066563 0.143494949 0.112203613 0.114737526 -0.111948609 -0.00443618372 -0.0773533285 0.136251882 0.0225571636 -0.0362712741 -0.0346994996 0.114932373 0.0432204269 -0.0263436884 0.107738495 0.076974012 0.0996974856 0.119173273 -0.000754479493 -0.120190717 -0.194255233 0.0754887313 -0.098057352 -0.0132691478 -0.179493576 0.0841401368 -0.0858590156 0.0584929027 -0.0300332811 0.123049341 -0.0501776785 0.0645771027 0.147005603 0.0247138739 0.0643707812 -0.163929448 0.106429175 0.0111500323 0.00196518004 -0.136184648 0.0189471096 -0.0814004242 -0.045225054 -0.0670835301 -0.0842032209 0.0329777375 0.0835169405 0.0539242215 0.0254002046 0.0783123672 -0.00774729624 0.0359950811 -0.0802180246 -0.107105985 0.00407534558 -0.00570665905 -0.0215339214 -0.0752450898 0.0079908995 -0.051493451 0.0471975654 0.137056708 -0.0891998485 0.0102625685 0.074707374 -0.105349854 -0.013545217 0.0585588515 0.123852111 -0.0915067494 -0.0506767295 0.146273047 -0.0317402482 0.0230212267 0.0921593308 -0.147391126 -0.102736384 -0.100691356 -0.0281435605 0.0503647961 0.0425743312 0.0990781933 0.119182095 0.0459855348 0.0305577312 -0.123558313 -0.0513050854 0.0112928003 0.00887593627 0.126145825 0.00219893456 0.0915814042 -0.109302096 -0.15622583 -0.13981463 -0.00616870821 -0.0105179995 0.104944795 0.0233698469 0.0621656775 -0.134819224 -0.0790077522 -0.0676694512 -0.118123025 0.0649551898 -0.0268536378 -0.128346384 0.0998368114 0.0453064777 0.0186656322 0.082455948 0.0146208499 0.0852921382 -0.034911897 0.0727281496 -0.0483980887 -0.121127099 0.0403252244 0.063668929 0.0448285639 0.119639181 -0.047936447 0.0020842955 -0.093942605 -0.104747362 0.0033461235 0.034542352 0.0912363082 -0.0369631052 -0.0812179595 -0.0106220422 0.00214873254 -0.0435321555 0.0224560201 -0.136056453 0.0154713243 -0.0294029415 -0.030266948 0.0689932555 0.0570425093 0.098155126 -0.0518679842 0.119383372 -0.138322964 -0.00904059596 -0.106911823 0.103631556 -0.103188537 0.00235909224 -0.0282520652 -0.0132444557 0.0874274075 -0.0390356034 -0.0197433233 -0.0990931988 -0.0221592877 -0.0945370346 0.0292591304 0.0865644217 0.0864166915 0.100083038 -0.0889186412 -0.115894549 0.102965675 0.0463272929 -0.134802818 0.0737079829 -0.129007578 0.0247757733 0.0989635438 -0.11343433 0.134549081 -0.0447648764 -0.0838385448 0.0168119203 -0.0835036933 -0.0518646985 0.0785566717 0.0304447822 0.0878861994 -0.0602087826 0.0966850668 0.00694012549 0.150681928 -0.0779627338 -0.0904122442 -0.0970596001 -0.129838735 -0.050377138 -0.120469227 0.0891878232 0.0898670554 0.0890591964 0.0588300936 -0.00137163699 0.122232124 -0.0888908282 0.091505006 0.0884710699 -0.00118646026 -0.0513543747 0.120835468 0.10668838 -0.123527482 0.126270682 -0.0198479965 -0.136215568 -0.120502472 -0.11477147 -0.0835626945 0.112296805 -0.10101068 -0.0431131609 -0.0186134148 -0.0433320589 -0.000781024981 0.104710743 -0.0381170735 0.0791320279 0.0748666972 0.121473044 -0.0812482983 0.12976031 -0.0290680751 -0.00869207084 0.134282753 -0.124510139 0.0872084498 0.0703281984 0.044497788 -0.0438723378 0.0582895428 0.0864354447 0.00991265662 0.0587073565 0.0693353862 0.0946473181 0.132393822 -0.0691181794 0.114411987 0.0981440097 0.0721710473 0.0940810665 0.0939522609 0.0250950288 -0.00488990825 -0.0450708121 0.00825079717 0.0511284098 -0.0437048152 -0.0188587848 -0.0834606662 0.0222276002 0.115497097 0.0385790952 -0.0981816575 -0.0865187049 -0.0519928411 0.0127416467 0.0403454639 -0.102931798 -0.0622832291 -0.176352426 0.0319424681 0.115145199 0.137701288 -0.160753131 0.0574897826 0.029381264 -0.106967442 0.135685384 0.0374634452 -0.0107224155 -0.0305843167 0.0789548606 0.0807328969 0.116793022 0.0109361755 -0.116716683 0.0137034692 0.0524121523 -0.0414179638 -0.0828967467 -0.131809413 -0.08836063 0.0339427441 0.00510858931 0.102733135 -0.0078022778 -0.0649038255 0.113678351 0.0853035152 -0.0652009472 -0.105958655 -0.0926674381 -0.124525182 0.0650760978 0.0134403473 -0.0214727018 0.0792901665 -0.0695419237 0.00138363184 -0.0841140822 0.00448186696 0.011020178 -0.0218410827 0.124228075 0.135860786 -0.0192789454 -0.10124626 -0.0194821134 0.0967154875 0.14184539 0.0926100463 0.0980347469 0.00184068806 -0.095286414 -0.123941429 0.025722323 0.11833895 -0.141884103 0.117436834 -0.0285835806 0.129173145 -0.0291574448 0.0875629336 -0.12026988 0.108618274 -0.0268362314 0.140304998 0.0205702633 -0.0184959918 -0.0276578963 -0.131651908 -0.0468064025 0.100602806 -0.0214187056 0.121467933 0.132986173 0.0596923828 -0.100520998 0.0469026566 -0.0225379243 -0.124888517 -0.0528000593 -0.119780593 0.103240386 0.0283819586 0.00888063014 -0.0159017146 0.0695591718 -0.0311708003 0.0383356661 0.0305758268 -0.0846125856 -0.133274123 0.0758237839 0.0325065106 0.0458696932 -0.101240918 -0.0143970549 -0.0982608497 -0.111203283 0.0112188458 0.0285433233 0.0977355093 0.0379961878 -0.0932414755 -0.137481123 0.0168030262 -0.130169049 -0.125621766 -0.0568111241 0.0422300845 -0.025392022 -0.0273560993 -0.0488719195 0.00497778924 0.0536593273 0.0217000954 -0.0605572015 -0.0740404874 0.117026515 0.142124146 -0.102213182 0.112507164 0.077157028 0.0520981103 0.105322793 0.126890764 0.0796572641 0.029000476 0.0621110015 -0.112644024 -0.111841545 0.114291631 0.119438633 -0.146059752 -0.140381634 0.12208508 -0.0593487211 -0.123670183 -0.0897043496 0.0858117491 -0.00866622385 0.081977047 0.0776127875 0.0893446133 -0.0835789144 -0.0501656011 -0.00162425637 0.113464631 -0.0252724476 -0.103041545 0.113613687 0.101202324 0.100906543 -0.126668274 -0.0983274356 -0.0464979149 0.151394174 0.0900189281 -0.0734032542 -0.0592180528 -tensor_12weight 2500 --0.0283639673 -0.0221012682 0.133199289 0.0563756078 0.0816197246 0.0497825928 0.0042012888 -0.0795029998 0.0960107818 -0.0432373807 0.121810496 -0.0352233462 -0.00614713971 -0.0204377715 0.0226566363 0.0332049951 0.0991061553 -0.101053268 0.0533379503 0.106147163 0.0264711361 -0.0589281917 0.0324204192 -0.105546504 -0.00826245081 -0.0233381912 -0.0656121224 -0.107647173 -0.0529786088 -0.0139337238 0.0566528141 -0.0541924387 -0.138034001 0.00951558538 0.0105655985 0.127043515 0.0221604444 0.0460083932 -0.053250242 -0.078631945 0.0428880826 0.0788966715 0.0724399239 -0.0252473392 0.105300531 0.00663374923 0.103679053 -0.0542423241 0.00125868618 0.0159070492 0.0878015533 -0.0953141749 -0.00558294635 0.121205859 -0.0582696423 -0.0979145169 0.129265517 -0.135002077 -0.0146291191 -0.122696914 -0.081385985 -0.0635085255 -0.0441100746 -0.0792573318 0.0985748172 -0.0419243202 -0.0581838712 -0.0535519533 -0.142513841 -0.0591027141 -0.137103438 -0.106035508 0.0540995859 -0.00673660636 0.0947093591 0.080907613 0.101540178 -0.0970778465 0.044728864 0.00199462846 -0.138367772 -0.0271480009 -0.141949549 0.133446857 -0.0284297504 0.11598064 -0.139251098 -0.11250446 -0.0144105088 0.0116322637 -0.0658263117 0.00760472333 0.00838519912 0.0230650119 0.107505634 0.0946382433 -0.04299891 0.0719103888 -0.102656573 0.0685082525 -0.0726309568 0.0508401543 -0.164903283 -0.00989604276 0.0877222568 -0.0414222702 0.0155824088 -0.163643703 -0.127258003 -0.036953602 0.144812256 -0.0623725951 -0.0524286106 -0.0514523163 0.149055287 0.151022196 -0.0676416233 -0.00197385135 0.114244364 -0.0560362265 0.0799408183 -0.0222610235 -0.0891805142 -0.00829313789 0.074500069 0.118379205 -0.0137284929 -0.00101503101 0.153622374 -0.0226939153 0.00704781711 -0.0985070691 0.0757866055 -0.0784802958 -0.0075249183 -0.158581018 0.000263459544 -0.0682379603 0.152031437 -0.0480913743 -0.127841696 0.0475812815 -0.0938547626 -0.0747125596 0.108499467 0.138318345 0.149815157 -0.167665958 0.0575088561 0.0208468586 -0.0716508031 0.0452511907 -0.137918025 0.0906910673 0.0187588073 0.024338223 0.0748084113 0.102134176 0.0844082534 0.0736728013 -0.0625494495 0.158075228 0.167300254 0.124639682 0.105899356 0.0630086958 0.131153017 0.112854511 -0.00915049016 0.166930482 0.0416866131 -0.123647667 0.0432391763 0.10396602 0.110268258 0.113732249 -0.0752861872 0.174172521 -0.102077015 -0.0965104327 0.0681533068 -0.00983810425 -0.106261119 0.0500092097 0.0385275073 0.023127025 0.0940388143 -0.0235950071 0.0871616006 0.0998354703 0.0826614797 -0.100802064 -0.0113284597 -0.125418305 -0.0762608498 0.0396648198 -0.0408090092 -0.11062713 0.0179210901 0.030816406 0.0176397078 -0.0463601053 -0.0776805356 -0.0160311256 0.157598197 -0.0891788378 -0.0539689176 -0.095866017 -0.06246797 -0.0310623348 0.0908927321 0.0408000574 0.0765677392 -0.0167786833 -0.0754223838 0.0956952795 0.114675142 -0.108119257 -0.0605223328 -0.024680933 0.0278924722 0.0077753365 -0.0282166582 0.0750903338 -0.0393958427 0.124427982 0.0725398734 0.143909976 0.0277911164 -0.0190528724 0.00227800012 -0.0566477478 0.00833356939 -0.0737289861 0.0828023031 -0.0422451794 0.111898191 -0.134707883 0.19176881 -0.0650343299 0.153366417 -0.132717431 0.0561521985 0.112367474 -0.107373334 0.0557826981 0.139610589 0.0258726813 0.102975905 -0.141169146 -0.0615244322 0.0394206047 -0.0578120835 0.00785397924 0.0674745068 0.0853137672 0.175648615 -0.0636080429 0.155097261 0.111320585 -0.172565565 0.0217328835 -0.0781033784 0.132392913 0.0711862743 0.0581634976 0.186962739 0.0883171558 0.0591275692 0.0622355938 -0.00711058732 -0.0328418538 0.0393970683 0.0599646643 -0.0935084671 0.11538434 0.00435523642 0.0496765226 0.120068103 0.150854304 0.125003979 0.10406445 -0.0222269446 -0.0942721814 -0.05046333 0.0410163514 -0.0161274076 -0.033829648 0.0972495377 -0.143943429 -0.0443067737 -0.0550568588 -0.0995942876 0.055709511 -0.0130466223 0.16307734 -0.0587146431 -0.0876527801 -0.0571654439 -0.0653208718 0.0995535851 0.110708371 -0.117773779 -0.0629195049 -0.0506410673 0.104000725 -0.0097075915 0.039908275 -0.169400364 0.116414152 0.0159012116 -0.115527548 0.0599115118 0.0376613848 0.0347734354 0.149321213 0.0256413259 -0.0118766362 -0.00494059315 0.154646352 -0.0279375352 0.0956456512 0.128097624 -0.0107601006 0.102062821 0.0818155333 -0.00417978317 0.169782162 0.075039342 -0.132441014 0.136600628 -0.074167937 0.182788104 0.0683773085 -0.158134758 -0.153111815 -0.131043464 0.0933924839 0.149934262 0.0977778062 -0.123312928 -0.0546211079 -0.0884571895 -0.123066284 0.03769508 -0.0408919007 -0.0261739939 -0.0946909636 0.0763799548 0.10528186 -0.0239403676 -0.00170964003 0.107443631 0.0624295175 -0.041471377 0.0583784059 0.101691283 0.172578365 0.147721156 -0.00853029452 -0.0682957247 -0.128606334 0.108250916 0.110467754 0.0698471293 -0.10638088 -0.134169206 0.0335389711 0.0807912797 0.10768722 -0.0748209134 -0.0619279668 -0.0597048439 0.0238682833 0.0770351812 0.133448511 -0.116575532 -0.00418985356 -0.014257974 0.186535001 0.0766483098 0.0578167289 -0.1063518 -0.0533722229 -0.0255306661 0.0375191718 -0.0521368645 -0.0643548667 -0.100525327 0.087843962 0.0360278301 0.186081558 0.0779006332 0.124324918 -0.0178796574 0.0964507908 0.000115471157 0.121352643 -0.0145074725 0.136342749 -0.0524556786 -0.0543406755 -0.0231247786 0.0547336638 -0.110653035 0.00989100244 0.0201757103 0.168474302 0.0595019609 0.140253812 -0.0207650233 0.156596705 0.0423379913 0.0540750362 0.0326289497 -0.0725854859 0.120222606 0.0588764995 0.0232573915 0.156475991 -0.107084453 -0.109426022 0.120157138 -0.0353143811 0.109706894 0.0043564043 0.138297886 0.168600574 0.147256792 -0.00588848395 0.0737749934 0.0574953109 0.0112638986 -0.125408784 0.0741967931 -0.0695395544 -0.0118542425 0.0154474955 0.0319254026 -0.0753778815 -0.0449277535 -0.00750545319 0.170274869 -0.131167576 0.135590628 0.122695968 -0.0971804634 0.0734618902 -0.00931429863 0.0365690589 0.0510447621 -0.0520833731 -0.0832352266 -0.101591244 -0.0607022047 0.0549951196 0.127605066 0.0541120619 -0.0588162467 0.0998720974 0.0493049473 -0.0581545606 0.0780114084 0.0665595233 -0.0931976438 0.038903445 -0.0382999554 0.0369430333 -0.085522607 0.0509717613 -0.0687565655 -0.0897999555 -0.0971511528 0.138259694 0.116990075 0.00449900329 -0.0872562379 -0.138075918 0.096727103 -0.0454748869 -0.00835196674 0.0760231018 0.0369116217 0.0443069041 0.0586204678 0.134296641 -0.131746709 0.05054304 0.141017333 -0.0719901621 -0.0659282431 0.109382346 -0.129540086 0.118722782 0.0131379962 -0.10180755 -0.0842798278 -0.0826065615 -0.0181476772 -0.0728539303 0.0718309358 -0.0300796181 -0.120240092 0.133931667 0.0222150292 0.0798185244 -0.107362755 0.105332904 -0.135004058 -0.11729826 0.147456452 -0.0890139267 0.0949609131 0.13500765 0.126173526 0.11857067 -0.000773293898 0.0894765258 0.0475671589 -0.0154326251 0.15092206 -0.037826851 -0.049774427 0.103259467 -0.0269829631 -0.0261214655 0.0183449704 0.0224278755 -0.157012105 -0.0851792991 0.0859342813 0.112071827 -0.0393440984 0.137702033 -0.0219873022 0.115037508 0.0346440673 0.0699992254 -0.0236694999 0.164631814 0.0094958609 -0.153284445 0.040881291 -0.108448327 -0.110614225 0.0260498095 0.0183986761 -0.116771467 -0.00523093343 -0.0689058378 0.0228717029 0.034788698 -0.081005007 0.0377451777 -0.0307365786 -0.0821703076 0.0325528234 -0.14126493 0.0695137903 0.0801631436 0.0886292234 0.183120564 -0.0694635212 0.161076441 0.0139303654 0.108437546 0.0401087664 -0.0091292914 0.00868863612 0.141585931 0.156644121 -0.0755528286 -0.0417527556 0.00487269135 -0.0350182503 -0.126364589 -0.024675725 -0.0234753042 -0.00169406319 0.0142500447 0.0215916336 0.0767152011 0.0652107447 0.159517407 0.103661276 -0.0556443408 -0.0293452106 -0.0731868073 0.002619609 -0.0566306897 0.00929513387 -0.197862059 0.116043128 -0.00975279883 0.105451569 -0.0839810297 0.0331301987 -0.181696653 -0.0319507346 -0.00473482907 0.0933903456 -0.0281781182 0.00192525925 0.0756578669 0.160195202 0.0697703436 0.146909162 0.0564890578 0.00285601406 -0.0287020877 -0.0564336739 -0.0819117948 0.0810552388 0.165045336 0.147315055 -0.0212504752 0.10172905 0.0923355818 -0.0141675817 0.150093243 0.179681987 -0.114264801 -0.0956551284 -0.00638729148 -0.128433809 0.0788437128 -0.119181894 -0.0507362969 -0.115742147 -0.0738439783 0.0803774595 -0.0840121433 0.138585642 -0.0423021615 0.137064368 -0.0672038868 0.117374197 -0.142620116 0.0667178184 0.140385941 0.104921885 -0.107778296 4.28000058e-05 0.124074519 -0.114285842 0.0608676858 0.176241919 -0.0267177299 0.0773821026 -0.010434255 -0.0313907899 -0.0211786404 -0.0771455616 -1.91580984e-05 0.0954339802 -0.0113247158 0.103564784 0.17941153 0.166724324 0.0164197646 0.0291595794 -0.0204665605 -0.0382782891 -0.0668093562 -0.123122104 -0.132946268 0.160618961 0.0747604817 0.0837294608 0.169379547 -0.0834754705 0.0670293868 0.125993848 0.0960132033 0.00812351983 -0.0131131131 0.076757364 0.160401285 0.107536592 0.0723732188 -0.00439980626 0.0320614874 0.147804722 -0.123346433 0.138982892 0.0878069773 0.143579617 0.0674099252 0.0150862103 0.107429564 -0.0661900267 0.169957653 0.0881028622 0.0399612524 -0.0500341803 0.153780296 0.166395634 0.137741536 -0.106945679 0.0822954476 0.0795641989 0.135367706 -0.0886543989 0.00912791491 0.0881962925 0.00199659169 -0.0546213388 -0.0906977132 0.079190582 -0.0895646214 -0.135372177 -0.114347287 -0.0691716 -0.00326307118 0.0402559191 0.104741633 0.139453545 0.0585349798 -0.0482536629 0.0699717999 0.0140683651 -0.0911885202 -0.136688069 -0.0421462804 0.0418486297 -0.119360626 -0.132552981 0.139314875 -0.0619060844 -0.0730924681 0.0170933753 0.0174552351 -0.100706309 -0.0780835152 -0.0697649717 -0.0582113415 -0.0707645416 -0.104700163 0.0463543534 -0.00485464931 -0.0724216402 0.0457475185 -0.0357155427 -0.00419606268 -0.119270869 -0.0787308067 0.0156891048 -0.0961950272 -0.0582991764 0.0977203697 -0.0429823883 -0.0821526423 0.143807203 -0.0634690672 0.00810478069 0.0317232311 0.0819439888 -0.0359285586 0.145277902 0.0952888206 -8.69644427e-05 0.148746893 0.0504151061 0.130737811 -0.0782369152 -0.0521331243 -0.0100645348 -0.0266241431 -0.137800336 0.114405535 0.00846977346 -0.113788694 -0.0912239552 0.120060176 -0.0522767827 0.0220687613 0.069416903 0.0120533248 -0.050613381 -0.0949904099 -0.0282441527 -0.00488385558 -0.099041827 0.137989372 0.0805989653 -0.0629852638 0.0974306986 -0.0589808449 -0.0508747622 0.075892739 -0.112869091 -0.0507352911 0.153549612 -0.071098201 0.101641163 -0.0416162089 0.0734882876 0.0855877772 -0.0345149338 0.127689764 -0.0378296189 -0.0469297916 0.164389119 -0.0796598718 -0.118202388 -0.0146005005 0.168985084 0.160650715 -0.0404448994 0.155802146 0.025944557 0.139599308 0.0971967429 0.0117399581 -0.0171507876 -0.142275631 0.0839506909 0.0660035759 0.125143692 -0.0461359918 -0.116108619 0.098188132 0.119050659 0.0910999924 -0.117082357 0.0748248175 -0.0653233901 -0.0601715185 0.0412754826 0.105733104 0.0728395283 0.0162961781 0.131876916 -0.115422845 0.144570231 0.0887707174 0.097047694 -0.0720524788 0.0252056289 0.0271244925 0.130636543 0.0491141453 0.0332921445 0.113790326 -0.0568653941 -0.0698323101 -0.10968978 -6.08155533e-05 0.0675321668 -0.137371317 0.0749645829 -0.0216469616 0.0328196287 -0.141031533 -0.122319311 0.131579712 0.0271315724 0.12323828 -0.0254372582 -0.0824730098 -0.0256547406 0.1272306 -0.0528855324 0.12240018 -0.0112800747 0.0355034769 -0.0888580233 0.0970521122 0.0183172673 -0.0275907442 -0.122296013 0.0405875295 -0.112721227 0.0605996549 -0.124520272 -0.0803802982 0.0134015977 0.0546580702 -0.13796401 -0.0714464486 0.0842663348 0.124747202 -0.0913373977 -0.0796824321 0.125632361 -0.0140991956 0.139319405 -0.103938892 0.0555772036 -0.100091144 0.0524645001 -0.0437719002 -0.0133467764 -0.129776016 0.133946911 -0.0986202359 0.0850319415 -0.110841952 -0.061344333 0.00772999227 0.0248650014 -0.0392874405 0.133872464 -0.0498681031 -0.0235028919 -0.0904225931 0.0328456573 0.0908727348 0.063584283 0.0274092723 0.116862483 -0.0850711316 -0.00758437719 0.0703060776 0.0831446201 -0.130965278 -0.0187913738 0.0873690993 0.0466382094 0.10954687 0.132400319 -0.0466739088 -0.0960940421 -0.1589448 -0.0499225333 -0.14365828 0.147253916 0.114190049 -0.0489069223 0.00201576366 -0.0865763724 -0.141105622 0.0361168049 0.156169817 0.160132021 -0.0354782976 0.0171889104 0.00112317502 0.112893924 0.078825742 0.100391164 0.0169844106 -0.031357035 0.103728026 -0.0463408977 -0.00959085487 0.131976262 0.0656702071 -0.0224215165 0.0772374868 -0.102550328 0.130415276 0.0524659678 0.0614057928 -0.0569102988 -0.00330987363 -0.123422012 -0.0507996455 -0.111704901 -0.0851848572 0.0836961493 -0.0112077333 0.13896206 -0.0878892019 -0.0824800953 0.0448645279 0.0262479223 0.0815070346 -0.0392298363 0.0658551753 0.0590564199 0.137629583 -0.0485812463 -0.0806304142 0.0890918598 0.0108571658 0.0944422483 -0.0846425742 0.160814211 -0.0429416671 0.0703043491 0.0728700608 0.194119066 0.0854097977 -0.109207563 -0.0898918658 -0.0273189656 -0.116638198 0.0686772019 -0.109993316 0.124169983 -0.197762132 -0.0250630938 -0.0492028296 0.0160446316 0.174763769 -0.0346757248 -0.0763854831 -0.0731105581 -0.0112034082 -0.0417329669 0.0718449354 0.0531028807 0.0267944783 -0.125297725 0.0641857833 -0.0335034095 0.0876890123 -0.0756414309 0.023246035 0.0138799511 -0.00875826553 0.0961356089 0.0716646761 -0.0198832899 0.0437015556 0.0223694257 0.0413542055 -0.0606194325 0.0904049575 0.146203548 0.128786162 0.0143855959 -0.145219877 -0.0717869774 -0.0981839523 -0.0615584739 0.148408964 -0.102737933 -0.0149649279 0.109365121 0.0478291512 -0.10187453 0.00699617295 0.146613404 -0.033152815 -0.0324295312 -0.124276176 0.149597749 0.0922028646 0.157599032 -0.135646835 0.135939449 0.101161912 -0.0470223129 0.135992065 0.049443569 -0.173461407 -0.0964307934 -0.131441608 0.0878081992 -0.0803490132 -0.0302332956 -0.103354827 0.0431390628 -0.102742799 0.0908333436 -0.0526512675 0.0258093104 0.0369244777 0.0257188752 0.133202627 0.0297276527 0.149491176 0.151769906 0.0164624956 0.0616860129 -0.0109535027 -0.118965067 0.0411789566 0.1403061 -0.0148792714 0.0120514603 -0.0927679688 -0.0414527878 0.0476270206 0.0865987465 0.157360256 -0.00801904406 -0.0541693904 -0.0268263221 -0.109232292 0.0128868064 0.0780390874 -0.124661535 -0.0210585389 -0.14193961 0.111578748 0.036265783 0.0652774721 0.0353158712 -0.150081992 -0.0131793777 0.0812528357 -0.119403362 -0.0613792799 0.160303533 -0.191477299 0.0978740901 0.0488265157 0.0381616503 -0.057727918 0.0255572535 0.0918723196 0.0639780238 0.0967227072 0.128719524 -0.0644618943 -0.0716221407 -0.113643073 0.0597443692 0.133907422 0.132179052 -0.0287083741 0.161785662 0.0266503692 -0.0227328409 0.116025813 0.098081924 0.0945273861 0.121004365 -0.128996223 -0.16734314 -0.116783001 -0.125158042 -0.124759458 0.0583286844 0.0625823587 -0.14607048 0.0436232202 -0.0646699741 -0.0939132422 -0.0846700892 0.171912223 -0.00973419473 0.1652738 -0.0948991999 -0.061686486 0.168850407 0.0989598855 -0.111697182 -0.0287689064 -0.0542520583 0.0386746228 0.02370058 0.0738422796 0.0875272676 0.0808269605 0.13247247 0.017513141 -0.0532009676 0.0261921436 0.0148328589 0.12498486 0.0354332887 0.035208758 0.136238024 0.0110940281 -0.0153260306 0.0743869767 0.0171744954 -0.0469010063 0.0871724337 -0.119166553 -0.0623017251 0.0695702434 -0.0897610858 0.0566469952 -0.0424713381 0.0625269711 -0.063430272 -0.138267055 -0.0862576067 0.0633616149 0.0599713437 -0.0952118188 0.120180786 -0.0877415165 -0.0423083492 0.142752334 0.0664141625 0.0153184086 -0.11723125 0.0551554859 0.0941181779 0.0712193325 -0.103319407 0.0191577971 0.122941971 0.10161072 0.0123700919 0.133175269 -0.0438123122 0.00521303201 -0.0965816975 -0.0765956715 0.157058403 0.0214360245 0.0272503048 0.0736062229 -0.127181187 0.0797498822 0.00283462019 -0.0299958158 -0.177072033 -0.10098806 0.0592009835 0.111474186 -0.0195805114 0.0541746095 0.0449817032 0.0997726023 -0.0977167487 0.119134799 -0.0316534825 0.000842235982 0.10191527 0.117535733 -0.0239665229 0.126424983 -0.123411469 -0.075183101 0.0330423154 -0.0681335926 -0.0103695542 -0.0856132656 0.0537110977 -0.102201037 -0.077232644 0.0817721263 0.0940563828 0.0121499747 -0.0706892982 0.0818995014 -0.0357823521 -0.0557577461 -0.0687677711 0.0249025971 0.115824074 0.0820207819 -0.0299545322 0.154968947 0.146601692 0.0333185792 -0.0947523862 0.11622557 -0.00900522526 0.164223433 -0.00459069014 -0.0108394325 -0.102431804 -0.000589489937 -0.149016678 0.094403781 0.00958791561 -0.139076263 -0.00828526635 -0.0485995077 0.0503224507 -0.106602147 -0.0803155676 0.0535347834 0.134317338 0.116844706 -0.0210482683 0.153106958 -0.0599929169 -0.0479789935 -0.00114973786 0.0614023507 0.00115412858 -0.0867474899 -0.0715186149 0.0718893707 0.0815631598 0.0764374584 0.120969631 -0.0238302015 -0.0586408377 -0.172944322 -0.032943733 0.0524964482 -0.116896465 0.0810932145 0.0334346704 -0.146538407 -0.0801583529 -0.101430289 -0.0612037107 -0.113821179 0.00876645837 0.0436918586 0.00689579546 -0.126817837 0.0449476391 0.00970517192 0.120329946 -0.130458891 0.0930926949 -0.0741954371 0.117795736 -0.0513594598 -0.0421553478 -0.0799744502 0.139152512 -0.132767022 -0.148529813 0.0808773115 0.0915669501 -0.137642413 0.179301977 -0.0657531545 0.111276798 0.13818717 0.180973396 0.148477748 0.00805056468 -0.0999653786 -0.0432874598 0.14668797 0.00481138518 0.0158720016 -0.111080863 0.0309790596 0.0831020325 0.0419512913 -0.0200633295 0.0992910117 -0.0361859724 -0.0418536253 -0.0347556248 -0.055278711 -0.129016384 0.156187266 -0.0416366123 -0.0245922692 0.0343578979 0.0859458372 0.101438627 0.00994137488 -0.069522813 -0.0426072292 0.0350656547 -0.103549249 0.0265573412 0.0763911158 -0.102405414 0.0613944381 -0.122291684 -0.030453749 -0.0144146821 0.0219928026 0.128770724 -0.0348007977 0.176855937 0.114892505 0.0656236783 -0.00231774151 0.150578141 -0.0806376413 -0.0191067625 -0.112510107 0.0331190526 0.0922281742 -0.130172268 0.0489657484 0.0407428741 -0.112448193 0.131103024 -0.0824060962 -0.0440546162 -0.135340303 0.167595923 0.0455297045 0.146565259 0.106871687 0.0179066844 0.14257127 0.0931942984 -0.0602279194 0.114411339 0.0802091658 0.183811814 0.0961534381 -0.0782056525 0.0361536555 -0.0492672026 0.0595217533 0.107877143 -0.0258457195 -0.0297816172 0.11304047 -0.0520307608 -0.106525473 0.0179495215 0.050986968 -0.00489270268 0.0162587743 -0.137507111 0.0951974392 0.0486419611 0.117301539 0.0375222899 0.00458820676 0.00400207192 -0.0281724483 0.160230845 -0.0574774742 0.0331582278 0.0722234622 -0.0535472929 0.0631503314 0.0969888195 0.140119597 0.0477451496 -0.175573215 0.0213595796 0.0623874292 0.0219977442 0.123139918 0.0430315174 -0.0433251262 -0.107093088 -0.112723738 -0.000674394774 0.0406252891 0.0568238758 -0.10773351 -0.0489739999 0.0599360615 0.126524165 -0.0252056345 0.103718482 -0.00571722444 0.0458063446 0.163875476 -0.00559463073 -0.129122823 0.0351556465 -0.12400604 0.0866204128 -0.00551600335 -0.0378636234 -0.128071412 -0.0678838044 -0.0604350902 0.135361746 -0.0829056799 0.00692036748 0.00260563195 -0.0532157905 0.124754503 0.0440538712 0.0327834561 0.182058588 0.110708781 -0.0718415529 0.0142884664 -0.0505664833 0.111684635 0.0205106754 -0.115548059 0.0633464828 -0.099902004 -0.144687116 0.102363184 -0.0373336896 0.0391648747 0.108540595 0.171795741 -0.0586603731 0.135946795 0.0958023518 0.0913395435 0.104659595 -0.104977675 -0.136726558 0.101383127 -0.00297953957 -0.0918395743 -0.0204569325 -0.130254254 0.0839903727 0.0175899137 0.113330193 0.0912543014 -0.0047330535 0.123540469 -0.0720442235 0.039481502 0.163159758 -0.122090541 0.161992833 -0.116110057 -0.0345729962 0.180961043 0.13387315 -0.0200760309 0.0601475984 -0.124806568 0.155271858 0.0845254213 0.0596974678 0.126717508 0.0584589094 0.104401402 -0.0844913498 -0.106263012 -0.0435930751 -0.0393355973 -0.043946974 0.0409472808 0.0387777314 -0.0833237693 0.0790580288 0.00874889921 0.122026242 0.106449321 0.00690521859 -0.0539527871 -0.0490253009 -0.116141111 -0.0438183956 0.0866069868 -0.0664470792 0.162248239 0.161943093 0.104601003 -0.0271019768 0.0825799704 0.00950320065 -0.0404817834 0.129946649 0.114401944 -0.0974406078 -0.144606605 0.0822113976 -0.133282393 -0.0866103172 -0.161681429 0.0105247656 0.0808934346 -0.0963236913 -0.130843952 -0.0294511542 0.14981015 0.0511926003 0.119163141 0.0719184428 0.014378503 -0.0375215076 0.100146711 0.00126785447 0.159019947 -0.0868258625 0.0822456554 0.00716631068 0.0106834266 -0.0192997064 0.0986597613 -0.0639843941 0.0297827087 0.0358716287 -0.0587924942 0.0603018999 0.0780824944 -0.123953499 -0.0754288808 -0.00476129353 -0.0413157828 0.0502767749 -0.0340033025 -0.111571126 -0.100440137 0.147668496 0.0673740879 -0.0353276059 0.0640556961 -0.0988995805 0.136604816 0.140064046 0.0980866402 -0.0750294924 -0.137589633 0.043476589 -0.114663213 -0.0333486758 -0.0707642436 -0.0460274704 0.114950918 -0.0877812207 -0.0162663646 0.150960758 -0.126444697 -0.042077966 0.0971751958 0.0925146341 -0.0311614387 0.0668840557 0.114941254 0.0032322011 0.110740408 0.0407327525 0.127984926 -0.0671080053 -0.0279953331 0.0831837729 0.0103864418 -0.0418696329 -0.0798007697 0.00768780895 -0.0546894707 -0.0398143195 0.118723847 0.116604052 0.0362069607 -0.120131537 -0.0460622348 0.0910601467 0.161261335 0.132019728 0.0777907819 0.12057399 -0.0873341486 0.0300714727 -0.0391077697 0.139219776 0.034336701 0.0798435882 0.0177026708 -0.00687919557 -0.0386889726 -0.0851943567 0.120803818 -0.0706116483 0.0470289737 -0.00083668204 -0.0218703523 0.0288076419 -0.0316865966 0.111179002 0.088308543 0.0760286823 0.104716919 0.00950780511 -0.0447407067 0.098795779 -0.116593644 0.103690639 -0.0872254521 -0.0867426321 0.0982015431 0.170969442 0.0893125832 -0.052779343 -0.103205182 -0.0188849494 0.148354068 0.135574684 0.16014275 0.0423907042 -0.123855539 -0.0422023758 -0.0901324898 -0.155949607 -0.0723659694 -0.0916374624 0.0385438688 0.129645094 -0.0276183728 -0.0442992523 0.0822724849 0.12342529 -0.0557483733 -0.0643166676 -0.137550637 -0.0250330232 -0.111662775 0.175575733 0.015148961 0.0261206627 0.134495571 -0.0594918132 0.0758288354 0.060908068 -0.0519245639 0.0473323725 -0.127921104 0.0161799341 0.0626626536 0.0191916153 0.0295425896 0.00972762611 -0.0193439405 0.0721613467 0.0690132603 0.145383656 0.139039531 0.0707440674 0.0853963941 0.121891946 0.0830445886 -0.139111638 -0.046770677 -0.0729267821 -0.0370384678 -0.122914143 0.0188236907 -0.0119272592 0.131827608 0.0751277357 0.0265672356 0.102637075 0.000820508925 0.128561452 -0.113339409 -0.0267119724 -0.0484277457 0.0841864794 -0.0932453424 0.112891175 -0.0558898747 -0.118770018 0.107176155 -0.0351163447 0.0367854685 -0.0528843328 -0.0932976678 0.132041663 -0.0217739437 0.0115754455 0.0903767347 -0.0251083001 -0.0478500426 -0.122446015 -0.110440329 -0.0738081709 0.13881968 0.0556323677 0.113088712 -0.0600917004 0.0295646787 0.0281529948 -0.0955312327 -0.0372477174 -0.0413099937 -0.12244685 -0.0883221477 -0.0363030881 -0.0330142006 0.0267371461 -0.0137264878 -0.110913046 0.0730738714 0.186447442 0.190215573 0.109426335 0.144610614 0.092832081 0.0243124962 0.082650587 -0.0580552481 -0.0164464172 0.132549599 -0.16663675 0.176283285 0.0554265827 -0.0576385558 0.0210192334 -0.0107073868 0.0469353348 0.0421484709 0.20231232 0.112845972 -0.0282869264 0.0477520488 -0.0603832416 0.0570014864 0.14945437 -0.123605035 0.00659620762 -0.0773272812 0.139960304 -0.0898624659 -0.0288678277 -0.073680222 0.0669173375 -0.125848636 -0.0638360605 0.00877119321 0.146627396 0.100049771 -0.174804077 -0.0694195628 0.114080206 -0.035090182 -0.0557439104 0.0807942003 0.0843401998 0.0177999288 0.0371674523 0.035731107 -0.111021757 -0.093117848 -0.0678854063 0.0266885534 -0.00953813922 0.089507781 -0.106526807 -0.0295186415 0.00943453331 0.142027363 0.108211689 0.106748775 0.0579309165 0.0385677479 -0.100708753 0.151549757 0.0247560851 0.0252912976 0.0531664081 -0.123913847 -0.14173685 -0.0242508408 0.159124956 -0.113823667 -0.0568166599 0.0248767957 0.0976887569 -0.0333233364 0.0985455513 0.0487232953 0.0828748867 0.0737239048 -0.00177626917 0.0483965725 0.0487547442 0.0349344462 0.0642146096 0.135932118 0.139095858 -0.161683232 0.147549808 -0.0745932832 -0.0598408207 -0.0111973844 0.052948419 -0.133784652 -0.0361436382 -0.0810343474 0.113400467 0.0375890285 0.0919118151 0.165626541 -0.0969523787 -0.116375238 0.156546161 0.160234615 0.152540177 -0.0588590465 -0.082201235 0.151506081 -0.0245612338 0.108619809 0.0955060497 0.0378499888 0.0223950893 -0.0736716762 -0.0403732657 -0.112797014 0.153787464 -0.00461600721 0.0319394395 0.0467294194 0.0903732851 -0.155461773 -0.0431794003 -0.0157981049 -0.0684597194 -0.0530542284 -0.0374011174 -0.0614936724 -0.00120179285 -0.105804361 0.0348305702 0.146401137 -0.103180595 -0.0832952708 0.0653216466 0.112658091 0.107945248 0.107463151 -0.0334763043 0.137352556 0.0405373275 0.0836709291 0.0841100141 0.000271841243 -0.0795930102 0.115181461 -0.0337604173 -0.128911823 0.123057336 0.0785609409 0.0277274642 0.143650293 0.0181832965 0.126039356 -0.0717210472 -0.0782386661 -0.207162619 -0.0587309264 0.0823272541 0.11590194 -0.15862602 -0.110215195 0.119876325 -0.03551469 0.203044742 0.0244795568 0.10723605 0.0298106819 0.174269021 0.143131137 0.145505443 0.0387773216 0.0399972759 -0.0434709154 0.174700931 0.0863626674 -0.00607159734 -0.00721096992 0.181514055 -0.174008265 -0.106141508 0.0943884626 0.0748455301 -0.08260189 0.128811941 0.09916839 -0.0107264379 0.174222261 -0.116252743 0.108514942 0.0142451078 0.0903487951 0.0105539095 -0.00275873509 -0.0601309314 -0.103395417 0.0936504826 0.00442700088 0.0223080404 -0.0448008254 0.0983777866 0.0909713805 0.0360316932 0.00507651642 -0.0769937262 0.0967232734 -0.0641804114 0.122599706 -0.113460898 -0.107624725 0.117271051 -0.00697429758 0.0493416227 -0.127027497 -0.0720096231 -0.0119818123 0.0336597078 -0.0459525064 -0.0465637259 -0.0834386647 0.0638463795 0.0122022033 -0.0931649208 -0.00553575298 -0.0875594765 0.0826876462 -0.00695849955 0.00258775055 -0.128275678 0.0674737692 -0.0166282598 -0.101550914 -0.000357478857 0.136971578 -0.0781561136 0.0165070295 0.130231693 -0.0846520886 -0.0524368845 0.13351126 0.0329737216 -0.0871247053 -0.0810154378 0.127794713 0.0551171899 0.0123283863 0.0219221041 0.0886786133 -0.0899260417 0.113652535 0.092415683 0.148293018 -0.0763472393 -0.00241494132 0.0385320969 -0.0792235285 0.119529434 0.116140589 -0.00117489253 -0.0458484106 0.022996638 -0.0169208553 0.122886755 -0.123275951 0.00158014265 0.0272646137 0.0736831352 0.0474004149 0.114665695 -0.0364573114 0.021729935 0.107678957 0.0608051606 0.109061301 -0.0183101986 -0.143050343 0.011449188 0.0253396928 0.0116122691 0.169679016 -0.108423777 0.114302441 -0.0180101134 0.0376329049 0.000675914111 -0.0989059806 0.00174778071 0.0729235336 0.0997067988 0.0798274875 0.00286698341 0.0484239347 0.0732191578 -0.0997895151 0.0131101757 0.0049995631 0.0331563503 0.0908577889 0.0351340324 0.0972493589 -0.140762478 0.0144873261 0.0925962031 0.042752862 -0.0921942815 0.123944506 0.103000984 0.134866908 -0.0654911175 -0.131940141 -0.124962308 0.121972367 -0.0777393058 -0.0134515613 -0.0593911484 0.0903083831 -0.0194513649 0.125025287 -0.0718345344 -0.028370142 -0.137873486 -0.138855338 0.102305382 -0.114035651 -0.00128389895 -0.0209189281 -0.0363460779 0.0348549187 0.0462828428 -0.135960251 0.046998933 -0.00560566783 -0.00562353432 0.0511512756 -0.097703442 -0.000405952334 -0.102428362 -0.13385509 0.0778306574 -0.126189426 0.00569581985 -0.0901096463 0.0396448672 -0.0905233249 0.0647575855 -0.0692101866 -0.118010342 0.140668467 0.0754639357 0.0287674516 -0.127176955 0.0508546382 -0.0297820792 -0.034435086 -0.0525137484 0.0414588898 0.125622496 -0.122770369 -0.103983626 0.0979629755 -0.134100616 0.0132061988 0.136052623 -0.101130173 -0.125888899 -0.0912302136 -0.000320419669 0.0951493084 0.0250687301 0.0693171024 0.0681221336 0.0920942575 -0.0242754743 -0.126860991 0.00760373473 0.0472807437 0.00129759312 -0.0417886227 -0.135421842 -0.0420329869 0.109683141 -0.0780478939 0.0869462043 -0.0336673781 0.068866685 -0.0332275853 0.0885529816 -0.0637842491 -0.0287733674 0.0399621874 0.00726474822 -0.0887221545 -0.026667513 -0.10050422 -0.0866817236 -0.0743763596 -0.0557404384 -0.0415508077 -0.0706221908 0.03670571 0.0769552216 -0.136574149 0.12560834 0.0611953586 0.0578964055 0.0321745314 -0.108904168 -0.0603623837 0.106482595 0.0418807454 0.0567226261 -0.0480391793 -0.0818945691 -0.0918800607 0.0692870989 -0.0831394047 -0.140842602 -0.0133125484 0.0388532393 -0.0127856918 -0.0663905591 -0.0192305241 -0.00343641825 0.0277077034 0.0341987535 -0.0332082435 0.0317000449 0.133374199 0.0840312392 -0.00916420575 -0.0913549662 -0.131094456 -0.114135049 0.0324433446 -0.0922504738 0.0968189761 0.0722978711 -0.0336492881 -0.135974228 -0.0138650686 0.0959312618 -0.0392885059 0.0773027018 0.0320036188 -0.136021271 -0.096613884 -0.0733416006 -0.0797348469 0.198495209 0.0582847074 -0.0810181722 -0.0431594327 -0.0618753731 0.0181986317 0.078358449 -0.0357146375 0.0401486307 -0.0250369273 0.0634938627 -0.0496993251 0.020140117 -0.127322808 -0.0666764453 0.138859123 0.0398158766 0.110776573 0.142119944 -0.034179695 -0.0179098602 0.0632333979 -0.00735516846 -0.071974054 -0.118237272 0.097171858 -0.107004285 0.0833490044 -0.0937577263 0.0946442783 0.0788892806 0.0437900051 -0.0310971122 0.0778285116 -0.0993157849 0.0229949653 0.11270339 0.16689226 0.162221476 -0.0412555002 0.173148394 -0.0327887125 -0.121510416 -0.0140555762 0.0903469846 -0.029039843 0.068566218 -0.12506409 0.0382451862 0.0248380303 0.0277039558 -0.14029333 -0.126389086 -0.00850690901 0.0435213149 0.0944143683 0.13669233 0.010785535 0.131652728 0.08942011 -0.0438658521 -0.090351522 0.0258171856 -0.0345944241 0.0610096753 -0.0412340835 -0.0557757616 -0.114867412 -0.0179175213 0.00409558415 -0.0384710208 0.100451782 0.141252503 -0.0818129927 0.0703791678 -0.070404999 -0.110884957 -0.109055385 -0.0898616537 -0.0513886064 0.12470524 -0.13137272 -0.0607609376 -0.0753680915 0.0404375494 -0.0389348865 -0.0476250499 0.139109835 -0.0529488549 -0.123520866 0.11238347 -0.0333348513 -0.0331748128 -0.042967163 0.098916471 0.100189671 0.113453075 0.0968655795 -0.0717557892 0.00738480687 0.0515316948 -0.0553591289 -0.00930176955 -0.00901313685 0.175364062 -0.0196713433 -0.0502335988 0.0383971296 0.121085942 0.103061765 0.15212594 0.116526209 -0.0264002159 -0.094560124 -0.0376541167 0.147969663 -0.0978502855 -0.00200880459 0.0312320646 -0.0133588314 0.0813049003 -0.029433284 -0.0262242351 -0.106563412 0.0297239125 0.0302417222 -0.0301722232 -0.0122521063 -0.0121933669 0.0207778811 0.118457362 0.0412592813 0.133936256 -0.0869062468 -0.0865229592 -0.0799478814 -0.0991789475 -0.0158145819 0.120537013 0.0608306117 0.17652306 -0.0725468472 0.126066044 -0.0148094278 -0.0349133536 0.0138158342 -0.00439210236 0.0949555784 -0.0591577739 -0.0189778134 0.0861846507 0.024233669 -0.0585409105 -0.114641324 -0.0379219502 0.0835855305 0.0618880242 -0.00448146462 -0.0475826338 0.0992924273 -0.0171841681 0.130379245 -0.115067512 -0.0295631811 0.132417724 -0.00352632999 -0.0539170504 -0.0408100188 0.0476579517 0.0435158908 -0.108436733 0.0848499984 -0.0134641975 0.103017956 0.113764629 -0.0229989663 -0.0402461812 0.10478355 0.105660841 -0.103532135 0.0136412531 -0.113981269 -0.130932122 0.020966202 0.103488669 0.0354949832 -0.0729100034 -0.0167546049 -0.0969255865 -0.0415144786 -0.0227924958 0.0969979763 -0.0482411981 -0.0222034156 0.0317669511 -0.0238891691 -0.0465090126 -0.049510397 -0.00603993051 -0.0495383143 0.0391993932 0.0514197163 0.0751582161 -0.0600364245 0.149168238 0.00608866569 0.145530403 -0.0471902825 -0.00813056901 0.0724131763 0.02852365 -0.0375874527 0.0505036674 0.162053078 0.061635565 -0.0772141367 -0.101029664 0.135133624 -0.026366543 0.0173085034 -0.196915612 -0.094210051 -0.0830252171 -0.0152556822 0.0760191381 0.11165338 0.015817089 0.0882711783 0.0494683385 -0.135736912 -0.0937291756 0.155131117 0.0703033358 -0.101866767 -0.136783585 -0.0569891371 0.0305453669 0.0333361402 0.0399238467 0.0434939228 0.0999660268 -0.106167085 -0.0346824601 0.1533719 -0.0761708841 0.0984655321 0.0181172937 -0.0619307309 -0.00992794242 0.070495829 0.154349118 0.0747213513 -0.191178009 -0.087587826 0.0466681495 -0.128632694 0.0855400562 -0.12334384 0.01812369 0.155531704 0.11304646 0.132001072 0.196818605 -0.0396067835 -0.0600948259 -0.0705011338 -0.113130942 -0.11137788 0.0562906638 0.0935193449 0.0702943429 -0.135742277 -0.0983269736 -0.164341018 0.0970739946 0.0331884474 0.0538573004 -0.0459236056 -0.0607440993 -0.057828702 -0.11368376 0.115170449 0.144811422 0.046080593 -0.116181828 0.0888293982 0.149672315 -0.085598737 -0.0972040147 0.101113833 0.145851657 0.0576108471 0.0922829062 0.0284254942 0.0193997808 -0.000928774185 -0.0930889547 -0.0205265954 -tensor_16bias 50 --0.142527401 0.168874308 -0.0887828916 -0.0631441548 -0.0660232753 0.0544182472 0.0641550943 0.0606994219 -0.0223076269 0.102970138 0.0258652028 -0.0809393153 -0.0276761875 0.0546831638 0.128886178 -0.0795307532 -0.0189131647 -0.120750055 0.17368494 -0.0492844023 -0.0578795224 0.0671775565 0.0123591702 0.132642269 -0.0525798798 0.0173827056 0.0659839064 -0.0958536118 0.0788409114 0.0107072359 0.0492983423 -0.02552481 0.0519438088 0.137576833 -0.115964495 -0.0723876506 0.111166283 -0.119082645 -0.0866416842 0.0962954909 0.110601142 -0.0136935199 -0.138656154 -0.0623757802 0.0855045691 -0.12340495 0.0683390722 -0.0176106151 -0.0413498878 0.0233600959 diff --git a/Linear_16.hxx b/Linear_16.hxx deleted file mode 100644 index 8d831c0..0000000 --- a/Linear_16.hxx +++ /dev/null @@ -1,658 +0,0 @@ -//Code generated automatically by TMVA for GPU Inference using ALPAKA of Model file [Linear_16.onnx] at [Fri Apr 11 14:16:45 2025] - -#ifndef SOFIE_LINEAR_16 -#define SOFIE_LINEAR_16 - -#include -#include -#include -#include -#include -#include "SOFIE/SOFIE_common.hxx" -#include - -using Dim1D = alpaka::DimInt<1>; -using Acc = alpaka::TagToAcc; -using Queue = alpaka::Queue; - -namespace SOFIE_Linear_16{ -struct Session { - -// initialized tensors -auto deviceBuf_8weight = alpaka::allocBuf(devAcc, 2500); -auto deviceBuf_8bias = alpaka::allocBuf(devAcc, 50); -auto deviceBuf_4bias = alpaka::allocBuf(devAcc, 50); -auto deviceBuf_2weight = alpaka::allocBuf(devAcc, 2500); -auto deviceBuf_0bias = alpaka::allocBuf(devAcc, 50); -auto deviceBuf_12bias = alpaka::allocBuf(devAcc, 50); -auto deviceBuf_18bias = alpaka::allocBuf(devAcc, 10); -auto deviceBuf_14bias = alpaka::allocBuf(devAcc, 50); -auto deviceBuf_4weight = alpaka::allocBuf(devAcc, 2500); -auto deviceBuf_10weight = alpaka::allocBuf(devAcc, 2500); -auto deviceBuf_6bias = alpaka::allocBuf(devAcc, 50); -auto deviceBuf_18weight = alpaka::allocBuf(devAcc, 500); -auto deviceBuf_0weight = alpaka::allocBuf(devAcc, 5000); -auto deviceBuf_10bias = alpaka::allocBuf(devAcc, 50); -auto deviceBuf_2bias = alpaka::allocBuf(devAcc, 50); -auto deviceBuf_6weight = alpaka::allocBuf(devAcc, 2500); -auto deviceBuf_14weight = alpaka::allocBuf(devAcc, 2500); -auto deviceBuf_16weight = alpaka::allocBuf(devAcc, 2500); -auto deviceBuf_12weight = alpaka::allocBuf(devAcc, 2500); -auto deviceBuf_16bias = alpaka::allocBuf(devAcc, 50); - -//--- declare and allocate the intermediate tensors -auto bufDev_18biasbcast = alpaka::allocBuf(devAcc,160); -auto bufDev_38 = alpaka::allocBuf(devAcc,800); -auto bufDev_14biasbcast = alpaka::allocBuf(devAcc,800); -auto bufDev_34 = alpaka::allocBuf(devAcc,800); -auto bufDev_22 = alpaka::allocBuf(devAcc,800); -auto bufDev_2biasbcast = alpaka::allocBuf(devAcc,800); -auto bufDev_24 = alpaka::allocBuf(devAcc,800); -auto bufDev_0biasbcast = alpaka::allocBuf(devAcc,800); -auto bufDev_6biasbcast = alpaka::allocBuf(devAcc,800); -auto bufDev_4biasbcast = alpaka::allocBuf(devAcc,800); -auto bufDev_16biasbcast = alpaka::allocBuf(devAcc,800); -auto bufDev_8biasbcast = alpaka::allocBuf(devAcc,800); -auto bufDev_26 = alpaka::allocBuf(devAcc,800); -auto bufDev_28 = alpaka::allocBuf(devAcc,800); -auto bufDev_10biasbcast = alpaka::allocBuf(devAcc,800); -auto bufDev_30 = alpaka::allocBuf(devAcc,800); -auto bufDev_32 = alpaka::allocBuf(devAcc,800); -auto bufDev_36 = alpaka::allocBuf(devAcc,800); -auto bufDev_12biasbcast = alpaka::allocBuf(devAcc,800); - -Session(std::string filename ="Linear_16.dat") { - -//--- reading weights from file - std::ifstream f; - f.open(filename); - if (!f.is_open()) { - throw std::runtime_error("tmva-sofie failed to open file " + filename + " for input weights"); - } - std::string tensor_name; - size_t length; - f >> tensor_name >> length; - if (tensor_name != "tensor_8weight" ) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_8weight , read " + tensor_name; - throw std::runtime_error(err_msg); - } - if (length != 2500) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 2500 , read " + std::to_string(length) ; - throw std::runtime_error(err_msg); - } - for (size_t i = 0; i < length; ++i) - f >> tensor_8weight[i]; - if (f.fail()) { - throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_8weight"); - } - f >> tensor_name >> length; - if (tensor_name != "tensor_8bias" ) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_8bias , read " + tensor_name; - throw std::runtime_error(err_msg); - } - if (length != 50) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 50 , read " + std::to_string(length) ; - throw std::runtime_error(err_msg); - } - for (size_t i = 0; i < length; ++i) - f >> tensor_8bias[i]; - if (f.fail()) { - throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_8bias"); - } - f >> tensor_name >> length; - if (tensor_name != "tensor_4bias" ) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_4bias , read " + tensor_name; - throw std::runtime_error(err_msg); - } - if (length != 50) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 50 , read " + std::to_string(length) ; - throw std::runtime_error(err_msg); - } - for (size_t i = 0; i < length; ++i) - f >> tensor_4bias[i]; - if (f.fail()) { - throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_4bias"); - } - f >> tensor_name >> length; - if (tensor_name != "tensor_2weight" ) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_2weight , read " + tensor_name; - throw std::runtime_error(err_msg); - } - if (length != 2500) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 2500 , read " + std::to_string(length) ; - throw std::runtime_error(err_msg); - } - for (size_t i = 0; i < length; ++i) - f >> tensor_2weight[i]; - if (f.fail()) { - throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_2weight"); - } - f >> tensor_name >> length; - if (tensor_name != "tensor_0bias" ) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_0bias , read " + tensor_name; - throw std::runtime_error(err_msg); - } - if (length != 50) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 50 , read " + std::to_string(length) ; - throw std::runtime_error(err_msg); - } - for (size_t i = 0; i < length; ++i) - f >> tensor_0bias[i]; - if (f.fail()) { - throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_0bias"); - } - f >> tensor_name >> length; - if (tensor_name != "tensor_12bias" ) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_12bias , read " + tensor_name; - throw std::runtime_error(err_msg); - } - if (length != 50) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 50 , read " + std::to_string(length) ; - throw std::runtime_error(err_msg); - } - for (size_t i = 0; i < length; ++i) - f >> tensor_12bias[i]; - if (f.fail()) { - throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_12bias"); - } - f >> tensor_name >> length; - if (tensor_name != "tensor_18bias" ) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_18bias , read " + tensor_name; - throw std::runtime_error(err_msg); - } - if (length != 10) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 10 , read " + std::to_string(length) ; - throw std::runtime_error(err_msg); - } - for (size_t i = 0; i < length; ++i) - f >> tensor_18bias[i]; - if (f.fail()) { - throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_18bias"); - } - f >> tensor_name >> length; - if (tensor_name != "tensor_14bias" ) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_14bias , read " + tensor_name; - throw std::runtime_error(err_msg); - } - if (length != 50) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 50 , read " + std::to_string(length) ; - throw std::runtime_error(err_msg); - } - for (size_t i = 0; i < length; ++i) - f >> tensor_14bias[i]; - if (f.fail()) { - throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_14bias"); - } - f >> tensor_name >> length; - if (tensor_name != "tensor_4weight" ) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_4weight , read " + tensor_name; - throw std::runtime_error(err_msg); - } - if (length != 2500) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 2500 , read " + std::to_string(length) ; - throw std::runtime_error(err_msg); - } - for (size_t i = 0; i < length; ++i) - f >> tensor_4weight[i]; - if (f.fail()) { - throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_4weight"); - } - f >> tensor_name >> length; - if (tensor_name != "tensor_10weight" ) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_10weight , read " + tensor_name; - throw std::runtime_error(err_msg); - } - if (length != 2500) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 2500 , read " + std::to_string(length) ; - throw std::runtime_error(err_msg); - } - for (size_t i = 0; i < length; ++i) - f >> tensor_10weight[i]; - if (f.fail()) { - throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_10weight"); - } - f >> tensor_name >> length; - if (tensor_name != "tensor_6bias" ) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_6bias , read " + tensor_name; - throw std::runtime_error(err_msg); - } - if (length != 50) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 50 , read " + std::to_string(length) ; - throw std::runtime_error(err_msg); - } - for (size_t i = 0; i < length; ++i) - f >> tensor_6bias[i]; - if (f.fail()) { - throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_6bias"); - } - f >> tensor_name >> length; - if (tensor_name != "tensor_18weight" ) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_18weight , read " + tensor_name; - throw std::runtime_error(err_msg); - } - if (length != 500) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 500 , read " + std::to_string(length) ; - throw std::runtime_error(err_msg); - } - for (size_t i = 0; i < length; ++i) - f >> tensor_18weight[i]; - if (f.fail()) { - throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_18weight"); - } - f >> tensor_name >> length; - if (tensor_name != "tensor_0weight" ) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_0weight , read " + tensor_name; - throw std::runtime_error(err_msg); - } - if (length != 5000) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 5000 , read " + std::to_string(length) ; - throw std::runtime_error(err_msg); - } - for (size_t i = 0; i < length; ++i) - f >> tensor_0weight[i]; - if (f.fail()) { - throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_0weight"); - } - f >> tensor_name >> length; - if (tensor_name != "tensor_10bias" ) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_10bias , read " + tensor_name; - throw std::runtime_error(err_msg); - } - if (length != 50) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 50 , read " + std::to_string(length) ; - throw std::runtime_error(err_msg); - } - for (size_t i = 0; i < length; ++i) - f >> tensor_10bias[i]; - if (f.fail()) { - throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_10bias"); - } - f >> tensor_name >> length; - if (tensor_name != "tensor_2bias" ) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_2bias , read " + tensor_name; - throw std::runtime_error(err_msg); - } - if (length != 50) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 50 , read " + std::to_string(length) ; - throw std::runtime_error(err_msg); - } - for (size_t i = 0; i < length; ++i) - f >> tensor_2bias[i]; - if (f.fail()) { - throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_2bias"); - } - f >> tensor_name >> length; - if (tensor_name != "tensor_6weight" ) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_6weight , read " + tensor_name; - throw std::runtime_error(err_msg); - } - if (length != 2500) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 2500 , read " + std::to_string(length) ; - throw std::runtime_error(err_msg); - } - for (size_t i = 0; i < length; ++i) - f >> tensor_6weight[i]; - if (f.fail()) { - throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_6weight"); - } - f >> tensor_name >> length; - if (tensor_name != "tensor_14weight" ) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_14weight , read " + tensor_name; - throw std::runtime_error(err_msg); - } - if (length != 2500) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 2500 , read " + std::to_string(length) ; - throw std::runtime_error(err_msg); - } - for (size_t i = 0; i < length; ++i) - f >> tensor_14weight[i]; - if (f.fail()) { - throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_14weight"); - } - f >> tensor_name >> length; - if (tensor_name != "tensor_16weight" ) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_16weight , read " + tensor_name; - throw std::runtime_error(err_msg); - } - if (length != 2500) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 2500 , read " + std::to_string(length) ; - throw std::runtime_error(err_msg); - } - for (size_t i = 0; i < length; ++i) - f >> tensor_16weight[i]; - if (f.fail()) { - throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_16weight"); - } - f >> tensor_name >> length; - if (tensor_name != "tensor_12weight" ) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_12weight , read " + tensor_name; - throw std::runtime_error(err_msg); - } - if (length != 2500) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 2500 , read " + std::to_string(length) ; - throw std::runtime_error(err_msg); - } - for (size_t i = 0; i < length; ++i) - f >> tensor_12weight[i]; - if (f.fail()) { - throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_12weight"); - } - f >> tensor_name >> length; - if (tensor_name != "tensor_16bias" ) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor name; expected name is tensor_16bias , read " + tensor_name; - throw std::runtime_error(err_msg); - } - if (length != 50) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is 50 , read " + std::to_string(length) ; - throw std::runtime_error(err_msg); - } - for (size_t i = 0; i < length; ++i) - f >> tensor_16bias[i]; - if (f.fail()) { - throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor tensor_16bias"); - } - f.close(); - - auto hostBuf_8weight = alpaka::allocBuf(hostAcc,2500); - std::memcpy(alpaka::getPtrNative(hostBuf_8weight), tensor_8weight, 2500* sizeof(float)); - alpaka::memcpy(queue, deviceBuf_8weight, hostBuf8weight, 2500); - auto hostBuf_8bias = alpaka::allocBuf(hostAcc,50); - std::memcpy(alpaka::getPtrNative(hostBuf_8bias), tensor_8bias, 50* sizeof(float)); - alpaka::memcpy(queue, deviceBuf_8bias, hostBuf8bias, 50); - auto hostBuf_4bias = alpaka::allocBuf(hostAcc,50); - std::memcpy(alpaka::getPtrNative(hostBuf_4bias), tensor_4bias, 50* sizeof(float)); - alpaka::memcpy(queue, deviceBuf_4bias, hostBuf4bias, 50); - auto hostBuf_2weight = alpaka::allocBuf(hostAcc,2500); - std::memcpy(alpaka::getPtrNative(hostBuf_2weight), tensor_2weight, 2500* sizeof(float)); - alpaka::memcpy(queue, deviceBuf_2weight, hostBuf2weight, 2500); - auto hostBuf_0bias = alpaka::allocBuf(hostAcc,50); - std::memcpy(alpaka::getPtrNative(hostBuf_0bias), tensor_0bias, 50* sizeof(float)); - alpaka::memcpy(queue, deviceBuf_0bias, hostBuf0bias, 50); - auto hostBuf_12bias = alpaka::allocBuf(hostAcc,50); - std::memcpy(alpaka::getPtrNative(hostBuf_12bias), tensor_12bias, 50* sizeof(float)); - alpaka::memcpy(queue, deviceBuf_12bias, hostBuf12bias, 50); - auto hostBuf_18bias = alpaka::allocBuf(hostAcc,10); - std::memcpy(alpaka::getPtrNative(hostBuf_18bias), tensor_18bias, 10* sizeof(float)); - alpaka::memcpy(queue, deviceBuf_18bias, hostBuf18bias, 10); - auto hostBuf_14bias = alpaka::allocBuf(hostAcc,50); - std::memcpy(alpaka::getPtrNative(hostBuf_14bias), tensor_14bias, 50* sizeof(float)); - alpaka::memcpy(queue, deviceBuf_14bias, hostBuf14bias, 50); - auto hostBuf_4weight = alpaka::allocBuf(hostAcc,2500); - std::memcpy(alpaka::getPtrNative(hostBuf_4weight), tensor_4weight, 2500* sizeof(float)); - alpaka::memcpy(queue, deviceBuf_4weight, hostBuf4weight, 2500); - auto hostBuf_10weight = alpaka::allocBuf(hostAcc,2500); - std::memcpy(alpaka::getPtrNative(hostBuf_10weight), tensor_10weight, 2500* sizeof(float)); - alpaka::memcpy(queue, deviceBuf_10weight, hostBuf10weight, 2500); - auto hostBuf_6bias = alpaka::allocBuf(hostAcc,50); - std::memcpy(alpaka::getPtrNative(hostBuf_6bias), tensor_6bias, 50* sizeof(float)); - alpaka::memcpy(queue, deviceBuf_6bias, hostBuf6bias, 50); - auto hostBuf_18weight = alpaka::allocBuf(hostAcc,500); - std::memcpy(alpaka::getPtrNative(hostBuf_18weight), tensor_18weight, 500* sizeof(float)); - alpaka::memcpy(queue, deviceBuf_18weight, hostBuf18weight, 500); - auto hostBuf_0weight = alpaka::allocBuf(hostAcc,5000); - std::memcpy(alpaka::getPtrNative(hostBuf_0weight), tensor_0weight, 5000* sizeof(float)); - alpaka::memcpy(queue, deviceBuf_0weight, hostBuf0weight, 5000); - auto hostBuf_10bias = alpaka::allocBuf(hostAcc,50); - std::memcpy(alpaka::getPtrNative(hostBuf_10bias), tensor_10bias, 50* sizeof(float)); - alpaka::memcpy(queue, deviceBuf_10bias, hostBuf10bias, 50); - auto hostBuf_2bias = alpaka::allocBuf(hostAcc,50); - std::memcpy(alpaka::getPtrNative(hostBuf_2bias), tensor_2bias, 50* sizeof(float)); - alpaka::memcpy(queue, deviceBuf_2bias, hostBuf2bias, 50); - auto hostBuf_6weight = alpaka::allocBuf(hostAcc,2500); - std::memcpy(alpaka::getPtrNative(hostBuf_6weight), tensor_6weight, 2500* sizeof(float)); - alpaka::memcpy(queue, deviceBuf_6weight, hostBuf6weight, 2500); - auto hostBuf_14weight = alpaka::allocBuf(hostAcc,2500); - std::memcpy(alpaka::getPtrNative(hostBuf_14weight), tensor_14weight, 2500* sizeof(float)); - alpaka::memcpy(queue, deviceBuf_14weight, hostBuf14weight, 2500); - auto hostBuf_16weight = alpaka::allocBuf(hostAcc,2500); - std::memcpy(alpaka::getPtrNative(hostBuf_16weight), tensor_16weight, 2500* sizeof(float)); - alpaka::memcpy(queue, deviceBuf_16weight, hostBuf16weight, 2500); - auto hostBuf_12weight = alpaka::allocBuf(hostAcc,2500); - std::memcpy(alpaka::getPtrNative(hostBuf_12weight), tensor_12weight, 2500* sizeof(float)); - alpaka::memcpy(queue, deviceBuf_12weight, hostBuf12weight, 2500); - auto hostBuf_16bias = alpaka::allocBuf(hostAcc,50); - std::memcpy(alpaka::getPtrNative(hostBuf_16bias), tensor_16bias, 50* sizeof(float)); - alpaka::memcpy(queue, deviceBuf_16bias, hostBuf16bias, 50); - -//---- allocate the intermediate dynamic tensors -//--- broadcast bias tensor 0biasfor Gemm op - { - float * data = SOFIE::UTILITY::UnidirectionalBroadcast(tensor_0bias,{ 50 }, { 16 , 50 }); - auto hostBuf_0biasbcast = alpaka::allocBuf(hostAcc,800); - std::memcpy(alpaka::getPtrNative(hostBuf_0biasbcast), data, 800 * sizeof(float)); - alpaka::memcpy(queue, deviceBuf_0biasbcast, hostBuf_0biasbcast , 800); - } -//--- broadcast bias tensor 2biasfor Gemm op - { - float * data = SOFIE::UTILITY::UnidirectionalBroadcast(tensor_2bias,{ 50 }, { 16 , 50 }); - auto hostBuf_2biasbcast = alpaka::allocBuf(hostAcc,800); - std::memcpy(alpaka::getPtrNative(hostBuf_2biasbcast), data, 800 * sizeof(float)); - alpaka::memcpy(queue, deviceBuf_2biasbcast, hostBuf_2biasbcast , 800); - } -//--- broadcast bias tensor 4biasfor Gemm op - { - float * data = SOFIE::UTILITY::UnidirectionalBroadcast(tensor_4bias,{ 50 }, { 16 , 50 }); - auto hostBuf_4biasbcast = alpaka::allocBuf(hostAcc,800); - std::memcpy(alpaka::getPtrNative(hostBuf_4biasbcast), data, 800 * sizeof(float)); - alpaka::memcpy(queue, deviceBuf_4biasbcast, hostBuf_4biasbcast , 800); - } -//--- broadcast bias tensor 6biasfor Gemm op - { - float * data = SOFIE::UTILITY::UnidirectionalBroadcast(tensor_6bias,{ 50 }, { 16 , 50 }); - auto hostBuf_6biasbcast = alpaka::allocBuf(hostAcc,800); - std::memcpy(alpaka::getPtrNative(hostBuf_6biasbcast), data, 800 * sizeof(float)); - alpaka::memcpy(queue, deviceBuf_6biasbcast, hostBuf_6biasbcast , 800); - } -//--- broadcast bias tensor 8biasfor Gemm op - { - float * data = SOFIE::UTILITY::UnidirectionalBroadcast(tensor_8bias,{ 50 }, { 16 , 50 }); - auto hostBuf_8biasbcast = alpaka::allocBuf(hostAcc,800); - std::memcpy(alpaka::getPtrNative(hostBuf_8biasbcast), data, 800 * sizeof(float)); - alpaka::memcpy(queue, deviceBuf_8biasbcast, hostBuf_8biasbcast , 800); - } -//--- broadcast bias tensor 10biasfor Gemm op - { - float * data = SOFIE::UTILITY::UnidirectionalBroadcast(tensor_10bias,{ 50 }, { 16 , 50 }); - auto hostBuf_10biasbcast = alpaka::allocBuf(hostAcc,800); - std::memcpy(alpaka::getPtrNative(hostBuf_10biasbcast), data, 800 * sizeof(float)); - alpaka::memcpy(queue, deviceBuf_10biasbcast, hostBuf_10biasbcast , 800); - } -//--- broadcast bias tensor 12biasfor Gemm op - { - float * data = SOFIE::UTILITY::UnidirectionalBroadcast(tensor_12bias,{ 50 }, { 16 , 50 }); - auto hostBuf_12biasbcast = alpaka::allocBuf(hostAcc,800); - std::memcpy(alpaka::getPtrNative(hostBuf_12biasbcast), data, 800 * sizeof(float)); - alpaka::memcpy(queue, deviceBuf_12biasbcast, hostBuf_12biasbcast , 800); - } -//--- broadcast bias tensor 14biasfor Gemm op - { - float * data = SOFIE::UTILITY::UnidirectionalBroadcast(tensor_14bias,{ 50 }, { 16 , 50 }); - auto hostBuf_14biasbcast = alpaka::allocBuf(hostAcc,800); - std::memcpy(alpaka::getPtrNative(hostBuf_14biasbcast), data, 800 * sizeof(float)); - alpaka::memcpy(queue, deviceBuf_14biasbcast, hostBuf_14biasbcast , 800); - } -//--- broadcast bias tensor 16biasfor Gemm op - { - float * data = SOFIE::UTILITY::UnidirectionalBroadcast(tensor_16bias,{ 50 }, { 16 , 50 }); - auto hostBuf_16biasbcast = alpaka::allocBuf(hostAcc,800); - std::memcpy(alpaka::getPtrNative(hostBuf_16biasbcast), data, 800 * sizeof(float)); - alpaka::memcpy(queue, deviceBuf_16biasbcast, hostBuf_16biasbcast , 800); - } -//--- broadcast bias tensor 18biasfor Gemm op - { - float * data = SOFIE::UTILITY::UnidirectionalBroadcast(tensor_18bias,{ 10 }, { 16 , 10 }); - auto hostBuf_18biasbcast = alpaka::allocBuf(hostAcc,160); - std::memcpy(alpaka::getPtrNative(hostBuf_18biasbcast), data, 160 * sizeof(float)); - alpaka::memcpy(queue, deviceBuf_18biasbcast, hostBuf_18biasbcast , 160); - } -} - - - -std::vector infer(float* tensor_input1){ - -//--------- Gemm_GPU_ALPAKA - char op_0_transA = 'n'; - char op_0_transB = 't'; - int op_0_m = 16; - int op_0_n = 50; - int op_0_k = 100; - float op_0_alpha = 1; - float op_0_beta = 1; - int op_0_lda = 100; - int op_0_ldb = 100; - std::copy(tensor_0biasbcast, tensor_0biasbcast + 800, tensor_22); - Kokkos::View kokkos_dev_input1((float*)std::data(bufDev_input1), op_0_m, op_0_k); - Kokkos::View kokkos_dev_0weight((float*)std::data(bufDev_0weight), op_0_k, op_0_n); - Kokkos::View kokkos_dev_22((float*)std::data(bufDev_22), op_0_m, op_0_n); - KokkosBlas::gemm(&op_0_transB, &op_0_transA, op_0_alpha, kokkos_dev_input1, kokkos_dev_0weight, op_0_beta, kokkos_dev_22); - -//--------- Gemm_GPU_ALPAKA - char op_1_transA = 'n'; - char op_1_transB = 't'; - int op_1_m = 16; - int op_1_n = 50; - int op_1_k = 50; - float op_1_alpha = 1; - float op_1_beta = 1; - int op_1_lda = 50; - int op_1_ldb = 50; - std::copy(tensor_2biasbcast, tensor_2biasbcast + 800, tensor_24); - Kokkos::View kokkos_dev_22((float*)std::data(bufDev_22), op_1_m, op_1_k); - Kokkos::View kokkos_dev_2weight((float*)std::data(bufDev_2weight), op_1_k, op_1_n); - Kokkos::View kokkos_dev_24((float*)std::data(bufDev_24), op_1_m, op_1_n); - KokkosBlas::gemm(&op_1_transB, &op_1_transA, op_1_alpha, kokkos_dev_22, kokkos_dev_2weight, op_1_beta, kokkos_dev_24); - -//--------- Gemm_GPU_ALPAKA - char op_2_transA = 'n'; - char op_2_transB = 't'; - int op_2_m = 16; - int op_2_n = 50; - int op_2_k = 50; - float op_2_alpha = 1; - float op_2_beta = 1; - int op_2_lda = 50; - int op_2_ldb = 50; - std::copy(tensor_4biasbcast, tensor_4biasbcast + 800, tensor_26); - Kokkos::View kokkos_dev_24((float*)std::data(bufDev_24), op_2_m, op_2_k); - Kokkos::View kokkos_dev_4weight((float*)std::data(bufDev_4weight), op_2_k, op_2_n); - Kokkos::View kokkos_dev_26((float*)std::data(bufDev_26), op_2_m, op_2_n); - KokkosBlas::gemm(&op_2_transB, &op_2_transA, op_2_alpha, kokkos_dev_24, kokkos_dev_4weight, op_2_beta, kokkos_dev_26); - -//--------- Gemm_GPU_ALPAKA - char op_3_transA = 'n'; - char op_3_transB = 't'; - int op_3_m = 16; - int op_3_n = 50; - int op_3_k = 50; - float op_3_alpha = 1; - float op_3_beta = 1; - int op_3_lda = 50; - int op_3_ldb = 50; - std::copy(tensor_6biasbcast, tensor_6biasbcast + 800, tensor_28); - Kokkos::View kokkos_dev_26((float*)std::data(bufDev_26), op_3_m, op_3_k); - Kokkos::View kokkos_dev_6weight((float*)std::data(bufDev_6weight), op_3_k, op_3_n); - Kokkos::View kokkos_dev_28((float*)std::data(bufDev_28), op_3_m, op_3_n); - KokkosBlas::gemm(&op_3_transB, &op_3_transA, op_3_alpha, kokkos_dev_26, kokkos_dev_6weight, op_3_beta, kokkos_dev_28); - -//--------- Gemm_GPU_ALPAKA - char op_4_transA = 'n'; - char op_4_transB = 't'; - int op_4_m = 16; - int op_4_n = 50; - int op_4_k = 50; - float op_4_alpha = 1; - float op_4_beta = 1; - int op_4_lda = 50; - int op_4_ldb = 50; - std::copy(tensor_8biasbcast, tensor_8biasbcast + 800, tensor_30); - Kokkos::View kokkos_dev_28((float*)std::data(bufDev_28), op_4_m, op_4_k); - Kokkos::View kokkos_dev_8weight((float*)std::data(bufDev_8weight), op_4_k, op_4_n); - Kokkos::View kokkos_dev_30((float*)std::data(bufDev_30), op_4_m, op_4_n); - KokkosBlas::gemm(&op_4_transB, &op_4_transA, op_4_alpha, kokkos_dev_28, kokkos_dev_8weight, op_4_beta, kokkos_dev_30); - -//--------- Gemm_GPU_ALPAKA - char op_5_transA = 'n'; - char op_5_transB = 't'; - int op_5_m = 16; - int op_5_n = 50; - int op_5_k = 50; - float op_5_alpha = 1; - float op_5_beta = 1; - int op_5_lda = 50; - int op_5_ldb = 50; - std::copy(tensor_10biasbcast, tensor_10biasbcast + 800, tensor_32); - Kokkos::View kokkos_dev_30((float*)std::data(bufDev_30), op_5_m, op_5_k); - Kokkos::View kokkos_dev_10weight((float*)std::data(bufDev_10weight), op_5_k, op_5_n); - Kokkos::View kokkos_dev_32((float*)std::data(bufDev_32), op_5_m, op_5_n); - KokkosBlas::gemm(&op_5_transB, &op_5_transA, op_5_alpha, kokkos_dev_30, kokkos_dev_10weight, op_5_beta, kokkos_dev_32); - -//--------- Gemm_GPU_ALPAKA - char op_6_transA = 'n'; - char op_6_transB = 't'; - int op_6_m = 16; - int op_6_n = 50; - int op_6_k = 50; - float op_6_alpha = 1; - float op_6_beta = 1; - int op_6_lda = 50; - int op_6_ldb = 50; - std::copy(tensor_12biasbcast, tensor_12biasbcast + 800, tensor_34); - Kokkos::View kokkos_dev_32((float*)std::data(bufDev_32), op_6_m, op_6_k); - Kokkos::View kokkos_dev_12weight((float*)std::data(bufDev_12weight), op_6_k, op_6_n); - Kokkos::View kokkos_dev_34((float*)std::data(bufDev_34), op_6_m, op_6_n); - KokkosBlas::gemm(&op_6_transB, &op_6_transA, op_6_alpha, kokkos_dev_32, kokkos_dev_12weight, op_6_beta, kokkos_dev_34); - -//--------- Gemm_GPU_ALPAKA - char op_7_transA = 'n'; - char op_7_transB = 't'; - int op_7_m = 16; - int op_7_n = 50; - int op_7_k = 50; - float op_7_alpha = 1; - float op_7_beta = 1; - int op_7_lda = 50; - int op_7_ldb = 50; - std::copy(tensor_14biasbcast, tensor_14biasbcast + 800, tensor_36); - Kokkos::View kokkos_dev_34((float*)std::data(bufDev_34), op_7_m, op_7_k); - Kokkos::View kokkos_dev_14weight((float*)std::data(bufDev_14weight), op_7_k, op_7_n); - Kokkos::View kokkos_dev_36((float*)std::data(bufDev_36), op_7_m, op_7_n); - KokkosBlas::gemm(&op_7_transB, &op_7_transA, op_7_alpha, kokkos_dev_34, kokkos_dev_14weight, op_7_beta, kokkos_dev_36); - -//--------- Gemm_GPU_ALPAKA - char op_8_transA = 'n'; - char op_8_transB = 't'; - int op_8_m = 16; - int op_8_n = 50; - int op_8_k = 50; - float op_8_alpha = 1; - float op_8_beta = 1; - int op_8_lda = 50; - int op_8_ldb = 50; - std::copy(tensor_16biasbcast, tensor_16biasbcast + 800, tensor_38); - Kokkos::View kokkos_dev_36((float*)std::data(bufDev_36), op_8_m, op_8_k); - Kokkos::View kokkos_dev_16weight((float*)std::data(bufDev_16weight), op_8_k, op_8_n); - Kokkos::View kokkos_dev_38((float*)std::data(bufDev_38), op_8_m, op_8_n); - KokkosBlas::gemm(&op_8_transB, &op_8_transA, op_8_alpha, kokkos_dev_36, kokkos_dev_16weight, op_8_beta, kokkos_dev_38); - -//--------- Gemm_GPU_ALPAKA - char op_9_transA = 'n'; - char op_9_transB = 't'; - int op_9_m = 16; - int op_9_n = 10; - int op_9_k = 50; - float op_9_alpha = 1; - float op_9_beta = 1; - int op_9_lda = 50; - int op_9_ldb = 50; - std::copy(tensor_18biasbcast, tensor_18biasbcast + 160, tensor_39); - Kokkos::View kokkos_dev_38((float*)std::data(bufDev_38), op_9_m, op_9_k); - Kokkos::View kokkos_dev_18weight((float*)std::data(bufDev_18weight), op_9_k, op_9_n); - Kokkos::View kokkos_dev_39((float*)std::data(bufDev_39), op_9_m, op_9_n); - KokkosBlas::gemm(&op_9_transB, &op_9_transA, op_9_alpha, kokkos_dev_38, kokkos_dev_18weight, op_9_beta, kokkos_dev_39); - return {std::vector(tensor_39, tensor_39 + 160)}; -} -}; // end of Session -} //SOFIE_Linear_16 - -#endif // SOFIE_LINEAR_16 diff --git a/settings-dev.cmake b/settings-dev.cmake new file mode 100644 index 0000000..6a8496f --- /dev/null +++ b/settings-dev.cmake @@ -0,0 +1,7 @@ +set (CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "" FORCE) +set (CMAKE_INSTALL_PREFIX ../install CACHE STRING "" FORCE) +set (CMAKE_INSTALL_BINDIR bin CACHE STRING "" FORCE) +set (CMAKE_INSTALL_INCLUDEDIR include CACHE STRING "" FORCE) +set (CMAKE_INSTALL_LIBDIR lib CACHE STRING "" FORCE) +set (testing ON CACHE BOOL "" FORCE) +set (mathmore ON CACHE BOOL "" FORCE) diff --git a/src/SOFIE_core/CMakeLists.txt b/src/SOFIE_core/CMakeLists.txt index 7297957..de13b58 100644 --- a/src/SOFIE_core/CMakeLists.txt +++ b/src/SOFIE_core/CMakeLists.txt @@ -76,6 +76,7 @@ list(TRANSFORM sources_headers PREPEND "inc/") set(sources_cxx src/RModel_Base.cxx src/RModel.cxx + src/RModel_ALPAKA.cxx src/RModel_GNN.cxx src/RModel_GraphIndependent.cxx src/RFunction.cxx diff --git a/src/SOFIE_core/inc/SOFIE/RModel.hxx b/src/SOFIE_core/inc/SOFIE/RModel.hxx index 83a47af..dbee25b 100644 --- a/src/SOFIE_core/inc/SOFIE/RModel.hxx +++ b/src/SOFIE_core/inc/SOFIE/RModel.hxx @@ -162,6 +162,10 @@ protected: void GenerateInitializedTensorInfo_GPU_ALPAKA(); // generate code for the intermediate tensors void GenerateIntermediateTensorInfo(); + + // generate code for the temporary initialized tensors containers + void GenerateTemporaryInitializedTensorContainers_GPU_ALPAKA(); + // generate code for the dynamic tensors void GenerateDynamicTensorInfo(); diff --git a/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx b/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx index 0a615c5..073c6bc 100644 --- a/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx +++ b/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx @@ -29,8 +29,15 @@ enum class Options { enum class WeightFileType { None, RootBinary, Text }; -std::underlying_type_t operator|(Options opA, Options opB); -std::underlying_type_t operator|(std::underlying_type_t opA, Options opB); + +inline std::underlying_type_t operator|(Options opA, Options opB) { + return static_cast>(opA) | + static_cast>(opB); +} + +inline std::underlying_type_t operator|(std::underlying_type_t opA, Options opB) { + return opA | static_cast>(opB); +} class RModel_Base { diff --git a/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx b/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx index d183052..f25b66b 100644 --- a/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx +++ b/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx @@ -704,6 +704,11 @@ inline GNN_Data Copy(const GNN_Data & data) { return out; } +// Function to generate the code for declaring and initializing constant tensors +// This is for tensors which are not part of weight files and can be created from the Constant operator +template +std::string GenerateConstantTensorCode(const std::pair &t); + }//SOFIE #endif //TMVA_SOFIE_RMODEL diff --git a/src/SOFIE_core/src/RModel.cxx b/src/SOFIE_core/src/RModel.cxx index f877b00..b78ad43 100644 --- a/src/SOFIE_core/src/RModel.cxx +++ b/src/SOFIE_core/src/RModel.cxx @@ -12,13 +12,6 @@ namespace SOFIE { -std::underlying_type_t operator|(Options opA, Options opB) { - return static_cast>(opA) | static_cast>(opB); -} -std::underlying_type_t operator|(std::underlying_type_t opA, Options opB) { - return opA | static_cast>(opB); -} - RModel::RModel(RModel&& other) { fInputTensorInfos = std::move(other.fInputTensorInfos); fReadyInputTensorInfos = std::move(other.fReadyInputTensorInfos); @@ -534,43 +527,6 @@ void RModel::InitializeSubGraph(std::shared_ptr graph) { } -// Function to generate the code for declaring and initializing constant tensors -// This is for tensors which are not part of weight files and can be created from the Constant operator -template -std::string GenerateConstantTensorCode(const std::pair &t) -{ - std::stringstream strs; - std::string type = ConvertTypeToString(t.second.type()); - size_t length = ConvertShapeToLength(t.second.shape()); - // avoid using stack sizes for constant tensors to reduce compilation time - bool allocateOnStack = (length > 100) ? false : true; - - const T *data = t.second.data(); - - // and check if all values are the same - bool sameData = false; - // for non stack allocation check if data are the same - if (!allocateOnStack && length > 1) { - size_t idx = 1; - do { - sameData = (data[idx] == data[idx - 1]); - idx++; - } while (sameData && idx < length); - } - if (allocateOnStack) { - strs << type << " tensor_" << t.first << "[" << length << "] = " << ConvertValuesToString(length, data) << ";\n"; - } else { - strs << "std::vector<" << type << "> fTensor_" << t.first << " = "; - if (sameData) - strs << "std::vector<" << type << ">(" << length << ", " << ConvertValToString(data[0]) << ");\n"; - else { - strs << ConvertValuesToString(length, data) << ";\n"; - } - strs << "const " << type << " * tensor_" + t.first + " = fTensor_" + t.first + ".data();\n"; - } - return strs.str(); -} - void RModel::GenerateInitializedTensorInfo() { if (!fInitializedTensors.empty()) @@ -594,28 +550,6 @@ void RModel::GenerateInitializedTensorInfo() } } -void RModel::GenerateInitializedTensorInfo_GPU_ALPAKA() -{ - if (!fInitializedTensors.empty()) - fGC += "// initialized tensors\n"; - - for (auto &i : fInitializedTensors) { - if (!fUseWeightFile || i.second.IsConstantTensor()) { - if (i.second.type() == ETensorType::FLOAT) - fGC += GenerateConstantTensorCode(i); - else if (i.second.type() == ETensorType::INT64) - fGC += GenerateConstantTensorCode(i); - - } else { - // case of tensors which are read from a file - size_t length = ConvertShapeToLength(i.second.shape()); - if (i.second.type() == ETensorType::FLOAT) { - fGC += "auto deviceBuf_"+i.first+" = alpaka::allocBuf(devAcc, "+std::to_string(length)+");\n"; - } - } - } -} - void RModel::GenerateIntermediateMemoryPool() { if (fIntermediateMemoryInfo.total_stack.size() == 0) return; fGC += "\n//--- Allocating session memory pool to be used for allocating intermediate tensors\n"; @@ -674,55 +608,6 @@ void RModel::GenerateIntermediateTensorInfo() { } } -void RModel::GenerateGPU_ALPAKA_Buffers(){ - if (!fIntermediateTensorInfos.empty()) { - std::string tensor_declaration_block = ""; - - for (auto &i : fIntermediateTensorInfos) { - if (i.second.type == ETensorType::BOOL) { - tensor_declaration_block += "std::vector fTensor_" + i.first + " = std::vector(" + std::to_string(ConvertShapeToLength(i.second.shape)) + ");\n"; - // No pointer allocation needed for BOOL - } - if (std::find(fOutputTensorNames.begin(), fOutputTensorNames.end(), i.first) == fOutputTensorNames.end()) { - size_t length = ConvertShapeToLength(i.second.shape); - - if (i.second.type == ETensorType::FLOAT) { - tensor_declaration_block += "auto bufDev_" + i.first + " = alpaka::allocBuf(devAcc," + std::to_string(length) + ");\n"; - } - else if (i.second.type == ETensorType::DOUBLE) { - tensor_declaration_block += "auto bufDev_" + i.first + " = alpaka::allocBuf(devAcc," + std::to_string(length) + ");\n"; - } - else if (i.second.type == ETensorType::INT64) { - tensor_declaration_block += "auto bufDev_" + i.first + " = alpaka::allocBuf(devAcc," + std::to_string(length) + ");\n"; - - } - } - } - - if (tensor_declaration_block.length()) { - fGC += "\n//--- declare and allocate the intermediate tensors\n" + tensor_declaration_block; - } - } - // add also the dynamic tensors (only declarations, allocation will be done later) - if (!fDynamicTensorInfos.empty()) { - fGC += "//--- declare the dynamic tensors\n"; - fGC += "using bufDev_float = alpaka::Buf, size_t>;\n"; - fGC += "using bufDev_double = alpaka::Buf, size_t>;\n"; - fGC += "using bufDev_int64= alpaka::Buf, size_t>;\n"; - for (auto &i : fDynamicTensorInfos) { - if (i.second.type == ETensorType::FLOAT) { - fGC += "bufDev_float bufDev_" + i.first + ";\n"; - } else if (i.second.type == ETensorType::DOUBLE) { - fGC += "bufDev_double bufDev_" + i.first + ";\n"; - } else if (i.second.type == ETensorType::INT64) { - fGC += "bufDev_int64 bufDev_" + i.first + ";\n"; - - } - } - } -} - - // generate code for specific operator declarations to be defined in the Session class void RModel::GenerateOperatorDeclarations() { std::string strcode; @@ -748,18 +633,6 @@ void RModel::GenerateDynamicTensorInfo() { fGC += out.str(); } -void RModel::GenerateDynamicTensorInfo_GPU_ALPAKA() { - fGC += "//---- allocate the intermediate dynamic tensors\n"; - std::stringstream out; - for (auto & i: fDynamicTensorInfos) { - auto length = ConvertDynamicShapeToLength(i.second.shape); - out << SP << "if (" << length << " > 0) {\n"; - out << "auto bufDev_" + i.first + " = alpaka::allocBuf(devAcc," << length << ");\n"; - out << SP << "}\n"; - } - fGC += out.str(); -} - std::string RModel::GenerateInferSignature(bool isdecl) { // generate the infer signature given the inputs: eg. "float * tensor1, float * tensor2" // if (decl = false) generate only calling signature (tensor1,tensor2,....) @@ -795,94 +668,6 @@ std::string RModel::GenerateInferSignature(bool isdecl) { return rGC; } -namespace { - -std::string createOutputTensor(RModel const &rmodel, std::string const &name, bool isIntermediateTensor) -{ - if(name.empty()) return "{}"; - ETensorType eOutputType = rmodel.GetTensorType(name); - std::string outputType = ConvertTypeToString(eOutputType); - if (isIntermediateTensor) { - - if (eOutputType == ETensorType::BOOL) { - return "fTensor_" + name; - } else { - // need to check is size is the same(don't want to return a vector with larger size) - // in that case better to copy - return "std::vector<" + ConvertTypeToString(eOutputType) + ">(tensor_" + name + ", tensor_" + name + " + " + - std::to_string(ConvertShapeToLength(rmodel.GetTensorShape(name))) + ")"; - } - } - // include also dynamic tensors since the vectors can be allocated with a size larger than their output - // we need a special handling for bool type allocated as vector - auto outputLength = ConvertDynamicShapeToLength(rmodel.GetDynamicTensorShape(name)); - if (rmodel.IsDynamicTensor(name) && eOutputType == ETensorType::BOOL) { - return "std::vector(fTensor_" + name + ".begin(), fTensor_" + name + ".begin() + " + outputLength + ")"; - } - return "std::vector<" + outputType + ">(tensor_" + name + ", tensor_" + name + " + " + outputLength + ")"; -} - -} // namespace - -void RModel::GenerateOutput_GPU_ALPAKA() { - - if (fVerbose) - std::cout << "Generating main inference code for " << fName << std::endl; - - size_t outputSize = fOutputTensorNames.size(); - // assume output types are all the same - if (outputSize == 0) - throw std::runtime_error("TMVA-SOFIE: output size=0 are not supported"); - - bool sameOutputTypes = true; - std::string inferReturnType; // type return by infer function - ETensorType eOutputType = GetTensorType(*fOutputTensorNames.begin()); - std::string outputType = ConvertTypeToString(eOutputType); - fGC += "\n\n"; - if (outputSize == 1) { - fGC += "std::vector<" + outputType + ">"; - } else { - // if all output types are the same we return an std::vector - otherwise a tuple - for (size_t i = 1; i < outputSize; i++) { - if (GetTensorType(fOutputTensorNames[i]) != eOutputType) - sameOutputTypes = false; - } - if (sameOutputTypes) - fGC += "std::vector>"; - else { - inferReturnType = "std::tuple<"; - for (size_t i = 0; i < outputSize; i++) { - inferReturnType += "std::vector<" + ConvertTypeToString(GetTensorType(fOutputTensorNames[i])) + ">"; - if (i < outputSize-1) inferReturnType += ","; - } - inferReturnType += ">"; - fGC += inferReturnType; - } - } - - fGC += " infer("; - - fGC += GenerateInferSignature(); - - fGC += "){\n"; - - for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) { - if (fVerbose) std::cout << "Generating code for operator .... " << op_idx << std::endl; - fGC += (fOperators[op_idx]->Generate_GPU_ALPAKA(std::to_string(op_idx))); - } - - fGC += SP + "return {"; - for (size_t i = 0; i < outputSize; i++) { - std::string tensorName = *(fOutputTensorNames.begin() + i); - bool isIntermediate = fIntermediateTensorInfos.count(tensorName) > 0; - fGC += createOutputTensor(*this, tensorName, isIntermediate); - if (i < outputSize - 1) - fGC += ","; - } - fGC += "};\n"; - fGC += "}\n"; // end of infer function scope -} - void RModel::GenerateSessionCode() { @@ -992,117 +777,6 @@ void RModel::GenerateSessionCode() } } -void RModel::GenerateSessionCode_GPU_ALPAKA() -{ - - // define the Session struct (for GNN this is generated in RModel_GNN) - if (fUseSession) { - if (!fIsSubGraph) - fGC += "struct Session {\n\n"; - else - fGC += "struct Session_" + fName + " {\n\n"; - } - - // // generate code for declaring the initialized tensors - GenerateInitializedTensorInfo_GPU_ALPAKA(); - - // // evaluate total intermediate memory and position intermediate tensor addresses - // std::string intermediate_memory_alloc_string = ""; - // intermediate_memory_alloc_string += "\n// --- Positioning intermediate tensor memory --"; - // for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) { - // intermediate_memory_alloc_string += AllocateIntermediateMemory(fOperators[op_idx]->GetOpOutputTensors()); - // CheckAndFlushIntermediateMemory(fOperators[op_idx]->GetOpInputTensors(), op_idx); - // } - - // // to check remaining unused fragments after memory allocation (lesser the better) - // // for (const auto &it: fIntermediateMemoryInfo.available_stack){ - // // std::cout<<"chunk_idx: "<fName + " fSession_" + graph->fName + ";\n"; - } - - // Generate code for Session constructor - if (fUseSession) { - std::string sessionName = "Session"; - if (fIsSubGraph) - sessionName += "_" + fName; - // add here specific operator code that needs to define session data members - // fGC += "\n"; - // for (size_t id = 0; id < fOperators.size(); id++) { - // std::string opName = std::to_string(id); - // fGC += fOperators[id]->GenerateSessionMembersCode(opName); - // } - fGC += "\n"; - // here add initialization and reading of weight tensors - if (fUseWeightFile) { - std::string fileName = fName; - if (fWeightFile == WeightFileType::Text) { - fileName += ".dat"; - } - if (fWeightFile == WeightFileType::RootBinary) { - fileName += ".root"; - } - fGC += sessionName + "(std::string filename =\"" + fileName + "\""; - } else { - // no need to pass weight file since it is not used - // keep passing a string for compatibility - fGC += sessionName + "(std::string = \"\""; - } - // add initialization of shape parameters - // assume all parameters are of type size_t - if (!fShapeParams.empty()) { - for (auto &p : fShapeParams) { - fGC += ",\n"; - fGC += " size_t " + p.first + " = " + p.second; - } - } - fGC += ") {\n"; - - if (fUseWeightFile) { - fGC += "\n//--- reading weights from file\n"; - ReadInitializedTensorsFromFile(0); - fGC += "\n"; - // fUseWeightFile = fUseWeightFile; - } - - MoveInitializedTensorsToBuffers_ALPAKA(); - - // now we have passed the parameters we can allocate the dynamic tensors - GenerateDynamicTensorInfo_GPU_ALPAKA(); - - // add here initialization code for operator - for (size_t id = 0; id < fOperators.size(); id++) { - fGC += fOperators[id]->GenerateInitCode_GPU_ALPAKA(); - } - - fGC += "}\n\n"; - } - // generate the inference code - GenerateOutput_GPU_ALPAKA(); - - // end of session - if (fUseSession && !fIsGNNComponent) { - fGC += "}; // end of Session\n"; - } -} - void RModel::Generate(std::underlying_type_t options, int batchSize, long pos, bool verbose) { fVerbose = verbose; @@ -1161,62 +835,6 @@ void RModel::Generate(std::underlying_type_t options, int batchSize, lo } } -void RModel::GenerateGPU_ALPAKA(std::underlying_type_t options, int batchSize, bool verbose) -{ - fVerbose = verbose; - fBatchSize = batchSize; - - // session flag is used in operator initialize - if (static_cast>(Options::kNoSession) & options) { - fUseSession = false; - fWeightFile = WeightFileType::None; - } - if (static_cast>(Options::kNoWeightFile) & options) { - fUseWeightFile = false; - fWeightFile = WeightFileType::None; - } - if (static_cast>(Options::kRootBinaryWeightFile) & options) { - fUseWeightFile = true; - fWeightFile = WeightFileType::RootBinary; - } - if (fUseWeightFile && !fUseSession) { - throw std::runtime_error( - "TMVA-SOFIE: RModel::Generate: cannot use a separate weight file without generating a Session class"); - } - - if (static_cast>(Options::kGNN) & options || static_cast>(Options::kGNNComponent) & options) - throw std::runtime_error("SOFIE GPU does not yet supports GNN Inference."); - - // initialize the model including all operators and sub-graphs - Initialize(batchSize, verbose); - - std::string hgname; - if (!fIsSubGraph) { - fGC.clear(); - GenerateHeaderInfo_GPU_ALPAKA(hgname); - } - - // generate first code for the subgraphs - // for (auto &graph : fSubGraphs) { - // if (fVerbose) - // std::cout << "generate session code for subgraph " << graph->fName << std::endl; - // graph->GenerateSessionCode(); - // fGC += graph->fGC; - // } - - if (fVerbose) - std::cout << "generate Main session code - model " << fName << std::endl; - - // generate main session code - GenerateSessionCode_GPU_ALPAKA(); - - if (!fIsSubGraph) { - fGC += ("} //SOFIE_" + fName + "\n"); - fGC += "\n#endif // " + hgname + "\n"; - } -} - - void RModel::ReadInitializedTensorsFromFile(long pos) { // generate the code to read initialized tensors from a text data file if (fWeightFile == WeightFileType::Text) { @@ -1302,31 +920,6 @@ void RModel::ReadInitializedTensorsFromFile(long pos) { } } - void RModel::MoveInitializedTensorsToBuffers_ALPAKA(){ - for (auto &i : fInitializedTensors) { - // skip Constant and shape tensors - if (!i.second.IsWeightTensor()) continue; - std::string tensor_name = "tensor_" + i.first; - auto length = ConvertShapeToLength(i.second.shape()); - std::string slength = std::to_string(length); - if (i.second.type() == ETensorType::FLOAT) { - fGC += " auto hostBuf_"+i.first+" = alpaka::allocBuf(hostAcc,"+ slength+");\n"; - fGC += " std::memcpy(alpaka::getPtrNative(hostBuf_"+i.first+"), tensor_"+i.first+", "+slength+"* sizeof(float));\n"; - fGC += " alpaka::memcpy(queue, deviceBuf_"+i.first+", hostBuf"+i.first+", "+slength+");\n"; - } else if (i.second.type() == ETensorType::DOUBLE) { - fGC += " auto hostBuf_"+i.first+" = alpaka::allocBuf(hostAcc,"+ slength+");\n"; - fGC += " std::memcpy(alpaka::getPtrNative(hostBuf_"+i.first+"), tensor_"+i.first+", "+slength+"* sizeof(doub;e));"; - fGC += " alpaka::memcpy(queue, deviceBuf_"+i.first+", hostBuf"+i.first+", "+slength+");\n"; - } else if (i.second.type() == ETensorType::INT64) { - fGC += " auto hostBuf_"+i.first+" = alpaka::allocBuf(hostAcc,"+ slength+");\n"; - fGC += " std::memcpy(alpaka::getPtrNative(hostBuf_"+i.first+"), tensor_"+i.first+", "+slength+"* sizeof(int64_t));"; - fGC += " alpaka::memcpy(queue, deviceBuf_"+i.first+", hostBuf"+i.first+", "+slength+");\n"; - } else { - std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be read from a ROOT file"); - } - } - } - long RModel::WriteInitializedTensorsToFile(std::string filename) { // Determine the file extension based on the weight file type std::string fileExtension; diff --git a/src/SOFIE_core/src/RModel_ALPAKA.cxx b/src/SOFIE_core/src/RModel_ALPAKA.cxx new file mode 100644 index 0000000..d0047c0 --- /dev/null +++ b/src/SOFIE_core/src/RModel_ALPAKA.cxx @@ -0,0 +1,366 @@ +#include +#include +#include +#include +#include + +#include "TFile.h" +#include "SOFIE/RModel.hxx" +#include "SOFIE/SOFIE_common.hxx" + +namespace SOFIE { + +//==================================================================== +// RModel - GPU Alpaka Codegen +//==================================================================== + +void RModel::GenerateInitializedTensorInfo_GPU_ALPAKA() { + if (!fInitializedTensors.empty()) + fGC += "\n// temporary initialized tensors for loading weights\n"; + + for (auto &i : fInitializedTensors) { + if (!fUseWeightFile || i.second.IsConstantTensor()) { + if (i.second.type() == ETensorType::FLOAT) + fGC += GenerateConstantTensorCode(i); + else if (i.second.type() == ETensorType::INT64) + fGC += GenerateConstantTensorCode(i); + + } else { + // case of tensors which are read from a file + size_t length = ConvertShapeToLength(i.second.shape()); + if (i.second.type() == ETensorType::FLOAT) { + fGC += "auto deviceBuf_" + i.first + + " = alpaka::allocBuf(devAcc, " + + std::to_string(length) + ");\n"; + } + } + } +} + +void RModel::GenerateTemporaryInitializedTensorContainers_GPU_ALPAKA() +{ + if (!fInitializedTensors.empty()) + fGC += "// initialized tensors\n"; + + for (auto &i : fInitializedTensors) { + if (!fUseWeightFile || i.second.IsConstantTensor()) { + if (i.second.type() == ETensorType::FLOAT) + fGC += GenerateConstantTensorCode(i); + else if (i.second.type() == ETensorType::INT64) + fGC += GenerateConstantTensorCode(i); + + } else { + // case of tensors which are read from a file + size_t length = ConvertShapeToLength(i.second.shape()); + if (i.second.type() == ETensorType::FLOAT) { + fGC += "float tensor_" + i.first + "[" + std::to_string(length) + "];\n"; + } + } + } +} + +void RModel::GenerateGPU_ALPAKA_Buffers() { + if (!fIntermediateTensorInfos.empty()) { + std::string tensor_declaration_block = ""; + + for (auto &i : fIntermediateTensorInfos) { + if (i.second.type == ETensorType::BOOL) { + tensor_declaration_block += "std::vector fTensor_" + i.first + + " = std::vector(" + + std::to_string(ConvertShapeToLength(i.second.shape)) + + ");\n"; + // No pointer allocation needed for BOOL + } + if (std::find(fOutputTensorNames.begin(), fOutputTensorNames.end(), i.first) == + fOutputTensorNames.end()) { + size_t length = ConvertShapeToLength(i.second.shape); + + if (i.second.type == ETensorType::FLOAT) { + tensor_declaration_block += "auto bufDev_" + i.first + + " = alpaka::allocBuf(devAcc," + + std::to_string(length) + ");\n"; + } else if (i.second.type == ETensorType::DOUBLE) { + tensor_declaration_block += "auto bufDev_" + i.first + + " = alpaka::allocBuf(devAcc," + + std::to_string(length) + ");\n"; + } else if (i.second.type == ETensorType::INT64) { + tensor_declaration_block += "auto bufDev_" + i.first + + " = alpaka::allocBuf(devAcc," + + std::to_string(length) + ");\n"; + } + } + } + + if (tensor_declaration_block.length()) { + fGC += "\n//--- declare and allocate the intermediate tensors\n" + tensor_declaration_block; + } + } + + // add also the dynamic tensors (only declarations, allocation will be done later) + if (!fDynamicTensorInfos.empty()) { + fGC += "//--- declare the dynamic tensors\n"; + fGC += "using bufDev_float = alpaka::Buf, size_t>;\n"; + fGC += "using bufDev_double = alpaka::Buf, size_t>;\n"; + fGC += "using bufDev_int64 = alpaka::Buf, size_t>;\n"; + + for (auto &i : fDynamicTensorInfos) { + if (i.second.type == ETensorType::FLOAT) { + fGC += "bufDev_float bufDev_" + i.first + ";\n"; + } else if (i.second.type == ETensorType::DOUBLE) { + fGC += "bufDev_double bufDev_" + i.first + ";\n"; + } else if (i.second.type == ETensorType::INT64) { + fGC += "bufDev_int64 bufDev_" + i.first + ";\n"; + } + } + } +} + +void RModel::GenerateDynamicTensorInfo_GPU_ALPAKA() { + fGC += "//---- allocate the intermediate dynamic tensors\n"; + std::stringstream out; + + for (auto &i : fDynamicTensorInfos) { + auto length = ConvertDynamicShapeToLength(i.second.shape); + out << SP << "if (" << length << " > 0) {\n"; + out << "auto bufDev_" + i.first + + " = alpaka::allocBuf(devAcc," << length << ");\n"; + out << SP << "}\n"; + } + fGC += out.str(); +} + +namespace { + +std::string createOutputTensor(RModel const &rmodel, std::string const &name, bool isIntermediateTensor) +{ + if(name.empty()) return "{}"; + ETensorType eOutputType = rmodel.GetTensorType(name); + std::string outputType = ConvertTypeToString(eOutputType); + if (isIntermediateTensor) { + + if (eOutputType == ETensorType::BOOL) { + return "fTensor_" + name; + } else { + // need to check is size is the same(don't want to return a vector with larger size) + // in that case better to copy + return "std::vector<" + ConvertTypeToString(eOutputType) + ">(tensor_" + name + ", tensor_" + name + " + " + + std::to_string(ConvertShapeToLength(rmodel.GetTensorShape(name))) + ")"; + } + } + // include also dynamic tensors since the vectors can be allocated with a size larger than their output + // we need a special handling for bool type allocated as vector + auto outputLength = ConvertDynamicShapeToLength(rmodel.GetDynamicTensorShape(name)); + if (rmodel.IsDynamicTensor(name) && eOutputType == ETensorType::BOOL) { + return "std::vector(fTensor_" + name + ".begin(), fTensor_" + name + ".begin() + " + outputLength + ")"; + } + return "std::vector<" + outputType + ">(tensor_" + name + ", tensor_" + name + " + " + outputLength + ")"; +} + +} // namespace + +void RModel::GenerateOutput_GPU_ALPAKA() { + if (fVerbose) + std::cout << "Generating main inference code for " << fName << std::endl; + + size_t outputSize = fOutputTensorNames.size(); + if (outputSize == 0) + throw std::runtime_error("TMVA-SOFIE: output size=0 are not supported"); + + bool sameOutputTypes = true; + std::string inferReturnType; + ETensorType eOutputType = GetTensorType(*fOutputTensorNames.begin()); + std::string outputType = ConvertTypeToString(eOutputType); + + fGC += "\n\n"; + if (outputSize == 1) { + fGC += "std::vector<" + outputType + ">"; + } else { + for (size_t i = 1; i < outputSize; i++) { + if (GetTensorType(fOutputTensorNames[i]) != eOutputType) + sameOutputTypes = false; + } + if (sameOutputTypes) { + fGC += "std::vector>"; + } else { + inferReturnType = "std::tuple<"; + for (size_t i = 0; i < outputSize; i++) { + inferReturnType += "std::vector<" + + ConvertTypeToString(GetTensorType(fOutputTensorNames[i])) + + ">"; + if (i < outputSize - 1) + inferReturnType += ","; + } + inferReturnType += ">"; + fGC += inferReturnType; + } + } + + fGC += " infer("; + fGC += GenerateInferSignature(); + fGC += "){\n"; + + for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) { + if (fVerbose) + std::cout << "Generating code for operator .... " << op_idx << std::endl; + fGC += (fOperators[op_idx]->Generate_GPU_ALPAKA(std::to_string(op_idx))); + } + + fGC += SP + "return {"; + for (size_t i = 0; i < outputSize; i++) { + std::string tensorName = *(fOutputTensorNames.begin() + i); + bool isIntermediate = fIntermediateTensorInfos.count(tensorName) > 0; + fGC += createOutputTensor(*this, tensorName, isIntermediate); + if (i < outputSize - 1) + fGC += ","; + } + fGC += "};\n"; + fGC += "}\n"; // end of infer function scope +} + +void RModel::GenerateSessionCode_GPU_ALPAKA() { + // define the Session struct (for GNN this is generated in RModel_GNN) + fGC += "template \n;"; + if (fUseSession) { + if (!fIsSubGraph) + fGC += "struct Session {\n\n"; + else + fGC += "struct Session_" + fName + " {\n\n"; + } + + // define host and device accelerators + fGC += "using Idx = alpaka::Idx;\n"; + fGC += "using devAcc = alpaka::AccGpuCudaRt, Idx, tagAcc>;\n"; + fGC += "using hostAcc = alpaka::AccCpuSerial, Idx>;\n\n"; + + + GenerateInitializedTensorInfo_GPU_ALPAKA(); + GenerateGPU_ALPAKA_Buffers(); + GenerateOperatorDeclarations(); + + // add subgraph session + if (!fSubGraphs.empty()) + fGC += "// subgraph sessions\n"; + for (auto &graph : fSubGraphs) { + fGC += "Session_" + graph->fName + " fSession_" + graph->fName + ";\n"; + } + + // Session constructor + if (fUseSession) { + std::string sessionName = "\n\nSession"; + if (fIsSubGraph) + sessionName += "_" + fName; + + if (fUseWeightFile) { + std::string fileName = fName; + if (fWeightFile == WeightFileType::Text) + fileName += ".dat"; + if (fWeightFile == WeightFileType::RootBinary) + fileName += ".root"; + + fGC += sessionName + "(std::string filename =\"" + fileName + "\""; + } else { + fGC += sessionName + "(std::string = \"\""; + } + + if (!fShapeParams.empty()) { + for (auto &p : fShapeParams) { + fGC += ",\n"; + fGC += " size_t " + p.first + " = " + p.second; + } + } + fGC += ") {\n"; + + GenerateTemporaryInitializedTensorContainers_GPU_ALPAKA(); + if (fUseWeightFile) { + fGC += "\n//--- reading weights from file\n"; + ReadInitializedTensorsFromFile(0); + fGC += "\n"; + } + + MoveInitializedTensorsToBuffers_ALPAKA(); + GenerateDynamicTensorInfo_GPU_ALPAKA(); + + for (size_t id = 0; id < fOperators.size(); id++) { + fGC += fOperators[id]->GenerateInitCode_GPU_ALPAKA(); + } + + fGC += "}\n\n"; + } + + GenerateOutput_GPU_ALPAKA(); + + if (fUseSession && !fIsGNNComponent) { + fGC += "}; // end of Session\n"; + } +} + +void RModel::GenerateGPU_ALPAKA(std::underlying_type_t options, int batchSize, bool verbose) { + fVerbose = verbose; + fBatchSize = batchSize; + + if (static_cast>(Options::kNoSession) & options) { + fUseSession = false; + fWeightFile = WeightFileType::None; + } + if (static_cast>(Options::kNoWeightFile) & options) { + fUseWeightFile = false; + fWeightFile = WeightFileType::None; + } + if (static_cast>(Options::kRootBinaryWeightFile) & options) { + fUseWeightFile = true; + fWeightFile = WeightFileType::RootBinary; + } + if (fUseWeightFile && !fUseSession) { + throw std::runtime_error( + "TMVA-SOFIE: RModel::Generate: cannot use a separate weight file without generating a Session class"); + } + + if (static_cast>(Options::kGNN) & options || + static_cast>(Options::kGNNComponent) & options) + throw std::runtime_error("SOFIE GPU does not yet supports GNN Inference."); + + Initialize(batchSize, verbose); + + std::string hgname; + if (!fIsSubGraph) { + fGC.clear(); + GenerateHeaderInfo_GPU_ALPAKA(hgname); + } + + if (fVerbose) + std::cout << "generate Main session code - model " << fName << std::endl; + + GenerateSessionCode_GPU_ALPAKA(); + + if (!fIsSubGraph) { + fGC += ("} //SOFIE_" + fName + "\n"); + fGC += "\n#endif // " + hgname + "\n"; + } +} + +void RModel::MoveInitializedTensorsToBuffers_ALPAKA(){ + for (auto &i : fInitializedTensors) { + // skip Constant and shape tensors + if (!i.second.IsWeightTensor()) continue; + std::string tensor_name = "tensor_" + i.first; + auto length = ConvertShapeToLength(i.second.shape()); + std::string slength = std::to_string(length); + if (i.second.type() == ETensorType::FLOAT) { + fGC += " auto hostBuf_"+i.first+" = alpaka::allocBuf(hostAcc,"+ slength+");\n"; + fGC += " std::memcpy(alpaka::getPtrNative(hostBuf_"+i.first+"), tensor_"+i.first+", "+slength+"* sizeof(float));\n"; + fGC += " alpaka::memcpy(queue, deviceBuf_"+i.first+", hostBuf_"+i.first+", "+slength+");\n"; + } else if (i.second.type() == ETensorType::DOUBLE) { + fGC += " auto hostBuf_"+i.first+" = alpaka::allocBuf(hostAcc,"+ slength+");\n"; + fGC += " std::memcpy(alpaka::getPtrNative(hostBuf_"+i.first+"), tensor_"+i.first+", "+slength+"* sizeof(doub;e));"; + fGC += " alpaka::memcpy(queue, deviceBuf_"+i.first+", hostBuf_"+i.first+", "+slength+");\n"; + } else if (i.second.type() == ETensorType::INT64) { + fGC += " auto hostBuf_"+i.first+" = alpaka::allocBuf(hostAcc,"+ slength+");\n"; + fGC += " std::memcpy(alpaka::getPtrNative(hostBuf_"+i.first+"), tensor_"+i.first+", "+slength+"* sizeof(int64_t));"; + fGC += " alpaka::memcpy(queue, deviceBuf_"+i.first+", hostBuf_"+i.first+", "+slength+");\n"; + } else { + std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be read from a ROOT file"); + } + } + } + +} // namespace SOFIE diff --git a/src/SOFIE_core/src/RModel_Base.cxx b/src/SOFIE_core/src/RModel_Base.cxx index b5524d1..f212c53 100644 --- a/src/SOFIE_core/src/RModel_Base.cxx +++ b/src/SOFIE_core/src/RModel_Base.cxx @@ -75,8 +75,7 @@ void RModel_Base::GenerateHeaderInfo_GPU_ALPAKA(std::string& hgname) { fGC += "#include \"" + i + "\"\n"; } fGC += "#include \n"; - fGC += "#include \n"; - fGC += "#include \n"; + fGC += "#include \n"; // for the session we need to include SOFIE_Common functions //needed for convolution operator (need to add a flag) @@ -88,8 +87,6 @@ void RModel_Base::GenerateHeaderInfo_GPU_ALPAKA(std::string& hgname) { fGC += "#include \"TFile.h\"\n"; fGC += "\nusing Dim1D = alpaka::DimInt<1>;\n"; - fGC += "using Acc = alpaka::TagToAcc;\n"; - fGC += "using Queue = alpaka::Queue;\n"; fGC += "\nnamespace SOFIE_" + fName + "{\n"; } diff --git a/src/SOFIE_core/src/SOFIE_common.cxx b/src/SOFIE_core/src/SOFIE_common.cxx index ad74313..bb288cf 100644 --- a/src/SOFIE_core/src/SOFIE_common.cxx +++ b/src/SOFIE_core/src/SOFIE_common.cxx @@ -423,5 +423,40 @@ std::vector UTILITY::ComputeStrideFromShape(const std::vector & shape) return strides; } +template +std::string GenerateConstantTensorCode(const std::pair &t) +{ + std::stringstream strs; + std::string type = ConvertTypeToString(t.second.type()); + size_t length = ConvertShapeToLength(t.second.shape()); + // avoid using stack sizes for constant tensors to reduce compilation time + bool allocateOnStack = (length > 100) ? false : true; + + const T *data = t.second.data(); + + // and check if all values are the same + bool sameData = false; + // for non stack allocation check if data are the same + if (!allocateOnStack && length > 1) { + size_t idx = 1; + do { + sameData = (data[idx] == data[idx - 1]); + idx++; + } while (sameData && idx < length); + } + if (allocateOnStack) { + strs << type << " tensor_" << t.first << "[" << length << "] = " << ConvertValuesToString(length, data) << ";\n"; + } else { + strs << "std::vector<" << type << "> fTensor_" << t.first << " = "; + if (sameData) + strs << "std::vector<" << type << ">(" << length << ", " << ConvertValToString(data[0]) << ");\n"; + else { + strs << ConvertValuesToString(length, data) << ";\n"; + } + strs << "const " << type << " * tensor_" + t.first + " = fTensor_" + t.first + ".data();\n"; + } + return strs.str(); +} + }//SOFIE From e31303fe9a9f91ad4b904a4e21ddd755361c1ddd Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Tue, 14 Oct 2025 09:26:53 +0200 Subject: [PATCH 05/43] feat: use sofieblas efficiently and add leaky relu, sigmoid support --- .vscode/settings.json | 7 +- src/.vscode/settings.json | 61 +++ src/SOFIE_core/inc/SOFIE/RModel.hxx | 6 + src/SOFIE_core/inc/SOFIE/ROperator.hxx | 54 +- .../inc/SOFIE/ROperator_BasicBinary.hxx | 462 +++++++++++++----- src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx | 59 ++- .../inc/SOFIE/ROperator_LeakyRelu.hxx | 52 ++ src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx | 52 +- .../inc/SOFIE/ROperator_Sigmoid.hxx | 52 ++ src/SOFIE_core/src/RModel.cxx | 56 ++- src/SOFIE_core/src/RModel_ALPAKA.cxx | 185 ++++--- src/SOFIE_core/src/RModel_Base.cxx | 4 +- 12 files changed, 784 insertions(+), 266 deletions(-) create mode 100644 src/.vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json index 381ce8f..fd03126 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -72,6 +72,9 @@ "thread": "cpp", "cfenv": "cpp", "variant": "cpp", - "format": "cpp" + "format": "cpp", + "any": "cpp", + "source_location": "cpp", + "run_inference_particle_net.C": "cpp" } -} \ No newline at end of file +} diff --git a/src/.vscode/settings.json b/src/.vscode/settings.json new file mode 100644 index 0000000..8bc121a --- /dev/null +++ b/src/.vscode/settings.json @@ -0,0 +1,61 @@ +{ + "files.associations": { + "*.icc": "cpp", + "iostream": "cpp", + "ostream": "cpp", + "cctype": "cpp", + "clocale": "cpp", + "cmath": "cpp", + "cstdarg": "cpp", + "cstddef": "cpp", + "cstdio": "cpp", + "cstdlib": "cpp", + "cstring": "cpp", + "ctime": "cpp", + "cwchar": "cpp", + "cwctype": "cpp", + "array": "cpp", + "atomic": "cpp", + "bit": "cpp", + "bitset": "cpp", + "compare": "cpp", + "complex": "cpp", + "concepts": "cpp", + "cstdint": "cpp", + "deque": "cpp", + "map": "cpp", + "set": "cpp", + "string": "cpp", + "unordered_map": "cpp", + "unordered_set": "cpp", + "vector": "cpp", + "exception": "cpp", + "algorithm": "cpp", + "functional": "cpp", + "iterator": "cpp", + "memory": "cpp", + "memory_resource": "cpp", + "numeric": "cpp", + "optional": "cpp", + "random": "cpp", + "regex": "cpp", + "string_view": "cpp", + "system_error": "cpp", + "tuple": "cpp", + "type_traits": "cpp", + "utility": "cpp", + "fstream": "cpp", + "initializer_list": "cpp", + "iomanip": "cpp", + "iosfwd": "cpp", + "istream": "cpp", + "limits": "cpp", + "new": "cpp", + "numbers": "cpp", + "sstream": "cpp", + "stdexcept": "cpp", + "streambuf": "cpp", + "cinttypes": "cpp", + "typeinfo": "cpp" + } +} \ No newline at end of file diff --git a/src/SOFIE_core/inc/SOFIE/RModel.hxx b/src/SOFIE_core/inc/SOFIE/RModel.hxx index dbee25b..6083e47 100644 --- a/src/SOFIE_core/inc/SOFIE/RModel.hxx +++ b/src/SOFIE_core/inc/SOFIE/RModel.hxx @@ -150,6 +150,10 @@ public: // used to infer the sub-graphs std::string GenerateInferSignature(bool isdecl = true); + void RemoveIntermediateTensor(const std::string& tensor_name){ + fIntermediateTensorInfos.erase(tensor_name); + } + // calculate total intermediate memory and position intermediate tensor addresses std::string AllocateIntermediateMemory(std::span op_output_tensors); void CheckAndFlushIntermediateMemory(std::span op_output_tensors, const size_t& op_idx); @@ -185,6 +189,8 @@ protected: void GenerateSessionCode_GPU_ALPAKA(); void GenerateGPU_ALPAKA_Buffers(); + void CheckAndFuseOperators(); + public: const std::vector &GetInputTensorNames() const { return fInputTensorNames; } const std::vector &GetOutputTensorNames() const { return fOutputTensorNames; } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator.hxx b/src/SOFIE_core/inc/SOFIE/ROperator.hxx index f7db548..0ad57b3 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator.hxx @@ -2,6 +2,7 @@ #define SOFIE_ROPERATOR #include +#include #include #include "SOFIE/SOFIE_common.hxx" @@ -14,6 +15,33 @@ namespace SOFIE{ class RModel; +enum class OperatorKind { + GEMM = 0, + LAYERNORM = 1, + RELU = 2, + CONSTANT = 3, + CONSTANTOFSHAPE = 4, + UNDEFINED = 5, + CONV=6, + BATCHNORM=7 +}; + +inline const char* toString(OperatorKind kind) { + switch (kind) { + case OperatorKind::GEMM: return "GEMM"; + case OperatorKind::LAYERNORM: return "LAYERNORM"; + case OperatorKind::RELU: return "RELU"; + case OperatorKind::CONSTANT: return "CONSTANT"; + case OperatorKind::CONSTANTOFSHAPE: return "CONSTANTOFSHAPE"; + case OperatorKind::BATCHNORM: return "batchnorm"; + case OperatorKind::CONV: return "conv"; + case OperatorKind::UNDEFINED: return "UNDEFINED"; + default: return "UNKNOWN"; + } +} + +inline std::set FusableKinds = { OperatorKind::RELU, OperatorKind::LAYERNORM, OperatorKind::BATCHNORM}; + class ROperator{ @@ -32,29 +60,45 @@ public: virtual std::string GenerateDeclCode() { return "";} // generate session data members specific to operator virtual std::string GenerateSessionMembersCode(std::string /*opName*/) { return ""; } + virtual std::string Generate_GPU_Kernel_ALPAKA() { return ""; } + virtual std::string Generate_GPU_Kernel_Definitions_ALPAKA() { return ""; } virtual std::string Header() { return "";} + virtual std::string GetFusableOutputTensorName() { return "";} + virtual std::string GetBlasConfig() { return ""; } + virtual void UpdateFusableTensorName(std::string, const std::function& removal_func){ return;}; + //virtual void Forward_reference() = 0; //virtual void Forward_blas() = 0; virtual ~ROperator(){} protected: - + OperatorKind fKind = OperatorKind::UNDEFINED; + size_t fOpOrder = 0; const std::string SP = " "; ///< space used to correctly indent the generated C++ code bool fUseSession = false; ///< flag to identify if using the session class bool fIsOutputConstant = false; ///< flag to identify if operator has a constant output (no need to generate code) - mutable std::vector fInputTensorNames; - mutable std::vector fOutputTensorNames; + mutable std::vector fInputTensorNames; + mutable std::vector fOutputTensorNames; public: - std::span GetOpInputTensors() const { + std::span GetOpInputTensors() const { return fInputTensorNames; } - std::span GetOpOutputTensors() const { + std::span GetOpOutputTensors() const { return fOutputTensorNames; } + + OperatorKind GetKind() const { return fKind; } + + void RegisterOperatorOrder(const size_t ord){ + fOpOrder = ord; + } + size_t GetOpOrder(){ + return fOpOrder; + } }; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx index 127eaff..756c3b9 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx @@ -1,15 +1,23 @@ -#ifndef SOFIE_ROperator_BasicBinary -#define SOFIE_ROperator_BasicBinary +#ifndef TMVA_SOFIE_ROperator_BasicBinary +#define TMVA_SOFIE_ROperator_BasicBinary -#include "SOFIE/SOFIE_common.hxx" -#include "SOFIE/ROperator.hxx" -#include "SOFIE/RModel.hxx" +#include "TMVA/SOFIE_common.hxx" +#include "TMVA/ROperator.hxx" +#include "TMVA/RModel.hxx" #include -namespace SOFIE{ +namespace TMVA { +namespace Experimental { +namespace SOFIE { -enum EBasicBinaryOperator { Add, Sub, Mul, Div, Pow }; +enum EBasicBinaryOperator { + Add, + Sub, + Mul, + Div, + Pow +}; template struct BinaryOperatorTrait {}; @@ -17,42 +25,42 @@ struct BinaryOperatorTrait {}; template struct BinaryOperatorTrait { static const std::string Name() { return "Add"; } - static std::string Op(const std::string & t1, const std::string t2) { return t1 + " + " + t2; } - static T Func(T t1, T t2) {return t1 + t2;} + static std::string Op(const std::string &t1, const std::string t2) { return t1 + " + " + t2; } + static T Func(T t1, T t2) { return t1 + t2; } }; template struct BinaryOperatorTrait { static const std::string Name() { return "Sub"; } - static std::string Op(const std::string & t1, const std::string t2) { return t1 + " - " + t2; } - static T Func (T t1, T t2) { return t1 - t2;} + static std::string Op(const std::string &t1, const std::string t2) { return t1 + " - " + t2; } + static T Func(T t1, T t2) { return t1 - t2; } }; template struct BinaryOperatorTrait { static const std::string Name() { return "Mul"; } - static std::string Op(const std::string & t1, const std::string t2) { return t1 + " * " + t2; } - static T Func (T t1, T t2) { return t1 * t2;} + static std::string Op(const std::string &t1, const std::string t2) { return t1 + " * " + t2; } + static T Func(T t1, T t2) { return t1 * t2; } }; template struct BinaryOperatorTrait { static const std::string Name() { return "Div"; } - static std::string Op(const std::string & t1, const std::string t2) { return t1 + " / " + t2; } - static T Func (T t1, T t2) { return t1/t2;} + static std::string Op(const std::string &t1, const std::string t2) { return t1 + " / " + t2; } + static T Func(T t1, T t2) { return t1 / t2; } }; template struct BinaryOperatorTrait { static const std::string Name() { return "Pow"; } - static std::string Op(const std::string & t1, const std::string t2) { return "std::pow(" + t1 + "," + t2 + ")"; } - static T Func (T t1, T t2) { return std::pow(t1,t2);} + static std::string Op(const std::string &t1, const std::string t2) { return "std::pow(" + t1 + "," + t2 + ")"; } + static T Func(T t1, T t2) { return std::pow(t1, t2); } }; -template -class ROperator_BasicBinary final : public ROperator{ +template +class ROperator_BasicBinary final : public ROperator { private: - + int fBroadcastFlag = 0; std::string fNA; std::string fNB; std::string fNBroadcastedA; @@ -63,154 +71,364 @@ private: std::vector fShapeB; std::vector fShapeY; + std::vector fDimShapeA; + std::vector fDimShapeB; + std::vector fDimShapeY; + public: - ROperator_BasicBinary(){} - ROperator_BasicBinary(std::string nameA, std::string nameB, std::string nameY): - fNA(UTILITY::Clean_name(nameA)), fNB(UTILITY::Clean_name(nameB)), fNY(UTILITY::Clean_name(nameY)){ - fInputTensorNames = { fNA, fNB }; - fOutputTensorNames = { fNY }; - } + ROperator_BasicBinary() {} + ROperator_BasicBinary(std::string nameA, std::string nameB, std::string nameY) + : fNA(UTILITY::Clean_name(nameA)), fNB(UTILITY::Clean_name(nameB)), fNY(UTILITY::Clean_name(nameY)) + { + fInputTensorNames = {fNA, fNB}; + fOutputTensorNames = {fNY}; + } // type of output given input - std::vector TypeInference(std::vector input) override { - return input; - } + std::vector TypeInference(std::vector input) override { return input; } // shape of output tensors given input tensors - std::vector> ShapeInference(std::vector> input) override { + std::vector> ShapeInference(std::vector> input) override + { // assume now inputs have same shape (no broadcasting) auto ret = std::vector>(1, input[0]); // return vector size 1 with first input return ret; } - void Initialize(RModel& model) override { + void Initialize(RModel &model) override + { // input must be a graph input, or already initialized intermediate tensor - if (!model.CheckIfTensorAlreadyExist(fNA)){ + if (!model.CheckIfTensorAlreadyExist(fNA)) { throw std::runtime_error(std::string("TMVA SOFIE Binary Op Input Tensor ") + fNA + "is not found in model"); } if (!model.CheckIfTensorAlreadyExist(fNB)) { throw std::runtime_error(std::string("TMVA SOFIE Binary Op Input Tensor ") + fNB + "is not found in model"); } - fShapeA = model.GetTensorShape(fNA); - fShapeB = model.GetTensorShape(fNB); - bool broadcast = !UTILITY::AreSameShape(fShapeA, fShapeB); - if (broadcast) { - // Y is the common shape of A and B - fShapeY = UTILITY::UnidirectionalBroadcastShape(fShapeA, fShapeB); - bool broadcastA = !UTILITY::AreSameShape(fShapeA, fShapeY); - bool broadcastB = !UTILITY::AreSameShape(fShapeB, fShapeY); - // Broadcast A to Y - if (broadcastA) { - fNBroadcastedA = "Broadcasted" + fNA + "to" + fNY; - if (model.IsInitializedTensor(fNA)) { - auto data = model.GetInitializedTensorData(fNA); - std::shared_ptr broadcastedData( - UTILITY::UnidirectionalBroadcast(static_cast(data.get()), fShapeA, fShapeY), - std::default_delete()); - // Update the data and the shape of A - model.AddConstantTensor(fNBroadcastedA, model.GetTensorType(fNA), fShapeY, broadcastedData); - fShapeA = fShapeY; + int dynamicInputs = 0; + if (model.IsDynamicTensor(fNA)) { + fDimShapeA = model.GetDynamicTensorShape(fNA); + dynamicInputs |= 1; + } else { + fShapeA = model.GetTensorShape(fNA); + fDimShapeA = ConvertShapeToDim(fShapeA); + } + if (model.IsDynamicTensor(fNB)) { + dynamicInputs |= 2; + fDimShapeB = model.GetDynamicTensorShape(fNB); + } else { + fShapeB = model.GetTensorShape(fNB); + fDimShapeB = ConvertShapeToDim(fShapeB); + } + if (dynamicInputs & 1 && model.Verbose()) + std::cout << BinaryOperatorTrait::Name() << " : input " << fNA << " is dynamic " + << ConvertShapeToString(fDimShapeA) << " "; + if (dynamicInputs & 2 && model.Verbose()) + std::cout << BinaryOperatorTrait::Name() << " : input " << fNB << " is dynamic " + << ConvertShapeToString(fDimShapeB) << " "; + std::cout << std::endl; + // check if need to broadcast at initialization time if shapes are known and different + // (we could broadcast the tensor tensor to maximum values of dynamic shapes - to be done) + // case of known shapes + // if shapes are known find the output shape from broadcasting + if (dynamicInputs == 0) { + auto ret = UTILITY::MultidirectionalBroadcastShape(fShapeA, fShapeB); + fBroadcastFlag = ret.first; + fShapeY = ret.second; + if (model.IsConstantTensor(fNA) && model.IsConstantTensor(fNB)) { + bool broadcast = fBroadcastFlag > 0; + if (broadcast) { + // Y is the common shape of A and B + bool broadcastA = fBroadcastFlag & 2; + bool broadcastB = fBroadcastFlag & 1; + // Broadcast A to Y + if (broadcastA) { + fNBroadcastedA = "Broadcasted" + fNA + "to" + fNY; + auto data = model.GetInitializedTensorData(fNA); + std::shared_ptr broadcastedData( + UTILITY::UnidirectionalBroadcast(static_cast(data.get()), fShapeA, fShapeY), + std::default_delete()); + if (model.Verbose()) + std::cout << "broadcasted data A " << ConvertShapeToString(fShapeY) << " : " + << ConvertValuesToString(ConvertShapeToLength(fShapeY), + static_cast(broadcastedData.get())) + << std::endl; + // Update the data and the shape of A + model.AddConstantTensor(fNBroadcastedA, model.GetTensorType(fNA), fShapeY, broadcastedData); + fShapeA = fShapeY; + fDimShapeA = ConvertShapeToDim(fShapeA); + } + // Broadcast B to Y + if (broadcastB) { + fNBroadcastedB = "Broadcasted" + fNB + "to" + fNY; + auto data = model.GetInitializedTensorData(fNB); + if (model.Verbose()) + std::cout << "data B " << ConvertShapeToString(fShapeB) << " : " + << ConvertValuesToString(ConvertShapeToLength(fShapeB), static_cast(data.get())) + << std::endl; + std::shared_ptr broadcastedData( + UTILITY::UnidirectionalBroadcast(static_cast(data.get()), fShapeB, fShapeY), + std::default_delete()); + // do not update tensor B but add broadcasted one (since it can be input to some other operators) + if (model.Verbose()) + std::cout << "broadcasted data B " << ConvertShapeToString(fShapeY) << " : " + << ConvertValuesToString(ConvertShapeToLength(fShapeY), + static_cast(broadcastedData.get())) + << std::endl; + model.AddConstantTensor(fNBroadcastedB, model.GetTensorType(fNB), fShapeY, broadcastedData); + fShapeB = fShapeY; + fDimShapeB = ConvertShapeToDim(fShapeB); + } } else { - // Add an intermediate tensor for broadcasting A - model.AddIntermediateTensor(fNBroadcastedA, model.GetTensorType(fNA), fShapeY); + fShapeY = fShapeA; } - } - // Broadcast B to Y - if (broadcastB) { - fNBroadcastedB = "Broadcasted" + fNB + "to" + fNY; - if (model.IsInitializedTensor(fNB)) { - auto data = model.GetInitializedTensorData(fNB); - std::cout << "data B " << ConvertShapeToString(fShapeB) << " : " << - ConvertValuesToString(ConvertShapeToLength(fShapeB), static_cast(data.get())) << std::endl; - std::shared_ptr broadcastedData( - UTILITY::UnidirectionalBroadcast(static_cast(data.get()), fShapeB, fShapeY), - std::default_delete()); - // do not update tensor B but add broadcasted one (since it can be input to some other operators) - std::cout << "broadcasted data B " << ConvertShapeToString(fShapeY) << " : " << - ConvertValuesToString(ConvertShapeToLength(fShapeY), static_cast(broadcastedData.get())) << std::endl; - model.AddConstantTensor(fNBroadcastedB, model.GetTensorType(fNB), fShapeY, broadcastedData); - fShapeB = fShapeY; - } else { - // Add an intermediate tensor for broadcasting B - model.AddIntermediateTensor(fNBroadcastedB, model.GetTensorType(fNB), fShapeY); + // tensors are constant: perform here the binary operation + + const std::string &nameA = fNBroadcastedA.empty() ? fNA : fNBroadcastedA; + const std::string &nameB = fNBroadcastedB.empty() ? fNB : fNBroadcastedB; + auto dataA = static_cast(model.GetInitializedTensorData(nameA).get()); + auto dataB = static_cast(model.GetInitializedTensorData(nameB).get()); + std::vector dataY(ConvertShapeToLength(fShapeY)); + for (size_t i = 0; i < dataY.size(); i++) { + dataY[i] = BinaryOperatorTrait::Func(dataA[i], dataB[i]); + } + model.AddConstantTensor(fNY, fShapeY, dataY.data()); + // flag tensors to not be written in the weight file + model.SetNotWritableInitializedTensor(nameA); + model.SetNotWritableInitializedTensor(nameB); + fIsOutputConstant = true; + if (model.Verbose()) { + std::cout << BinaryOperatorTrait::Name() << " : " << fNA << " " << ConvertShapeToString(fShapeA) + << " , " << fNB << " " << ConvertShapeToString(fShapeB) << " ---> " << fNY << " " + << ConvertShapeToString(fShapeY) << " : " << ConvertValuesToString(dataY) << std::endl; } + } else { + // case of defined and non-constant tensors + model.AddIntermediateTensor(fNY, model.GetTensorType(fNA), fShapeY); + if (model.Verbose()) { + std::cout << BinaryOperatorTrait::Name() << " : " << fNA << " " << ConvertShapeToString(fShapeA) + << " , " << fNB << " " << ConvertShapeToString(fShapeB) << " ---> " << fNY << " " + << ConvertShapeToString(fShapeY) << std::endl; + } + // we convert non-dim shapes to Dim shapes + fDimShapeY = ConvertShapeToDim(fShapeY); } } else { - fShapeY = fShapeA; - } - // check case of constant output (if all inputs are defined) - if (model.IsInitializedTensor(fNA) && model.IsInitializedTensor(fNB)) { - const std::string& nameA = fNBroadcastedA.empty()? fNA : fNBroadcastedA; - const std::string& nameB = fNBroadcastedB.empty()? fNB : fNBroadcastedB; - auto dataA = static_cast(model.GetInitializedTensorData(nameA).get()); - auto dataB = static_cast(model.GetInitializedTensorData(nameB).get()); - std::vector dataY(ConvertShapeToLength(fShapeY)); - for (size_t i = 0; i < dataY.size(); i++) { - dataY[i] = BinaryOperatorTrait::Func(dataA[i], dataB[i]); + // case A or B have dynamic shapes. We need to broadcast if shape are not same + auto ret = UTILITY::MultidirectionalBroadcastShape(fDimShapeA, fDimShapeB); + fBroadcastFlag = ret.first; + fDimShapeY = ret.second; + // case of all parametric shapes and MultiDirectionalBroadcastShape return the max of the 2 + // need to do before we declare the output tensor shape and the broadcasted ones + if (ret.first & 4) { + // check if one of the parameter is an input dimension + // define function to find this + auto IsInputDimParam = [&](const std::string &p) { + auto inputNames = model.GetInputTensorNames(); + for (auto &input : inputNames) { + for (auto &i_s : model.GetDimTensorShape(input)) { + if (i_s.isParam && i_s.param == p) + return true; + } + } + return false; + }; + for (size_t i = 0; i < fDimShapeY.size(); i++) { + auto &s = fDimShapeY[i]; + if (s.isParam && s.param.find("std::max") != std::string::npos) { + if (IsInputDimParam(fDimShapeA[i].param)) { + // case dim is 1 we indicate that the input parameter is equal to 1 + if (fDimShapeA[i].dim != 1) + s = fDimShapeA[i]; + else + s = fDimShapeB[i]; + } else if (IsInputDimParam(fDimShapeB[i].param)) { + if (fDimShapeB[i].dim != 1) + s = fDimShapeB[i]; + else + s = fDimShapeA[i]; + } + } + } + } + + model.AddIntermediateTensor(fNY, model.GetTensorType(fNA), fDimShapeY); + if (model.Verbose()) { + std::cout << BinaryOperatorTrait::Name() << " : " << ConvertShapeToString(fDimShapeA) << " , " + << ConvertShapeToString(fDimShapeB) << " --> " << ConvertShapeToString(fDimShapeY) << std::endl; } - model.AddConstantTensor(fNY, fShapeY, dataY.data()); - // flag tensors to not be written in a fil - model.SetNotWritableInitializedTensor(nameA); - model.SetNotWritableInitializedTensor(nameB); - fIsOutputConstant = true; - if (model.Verbose()) - std::cout << "Binary op ---> " << fNY << " " << ConvertShapeToString(fShapeY) << " : " - << ConvertValuesToString(dataY) << std::endl; - } - else { - model.AddIntermediateTensor(fNY, model.GetTensorType(fNA), fShapeY); } } - std::string GenerateInitCode() override { + std::string GenerateInitCode() override + { std::stringstream out; return out.str(); } - std::string Generate(std::string OpName) override { + std::string Generate(std::string opName) override + { - if (fIsOutputConstant) return ""; + if (fIsOutputConstant) + return ""; - OpName = "op_" + OpName; + opName = "op_" + opName; - if (fShapeY.empty()) { + if (fDimShapeY.empty()) { throw std::runtime_error("TMVA SOFIE Binary Op called to Generate without being initialized first"); } std::stringstream out; - out << SP << "\n//------ " << BinaryOperatorTrait::Name() << "\n"; - size_t length = ConvertShapeToLength(fShapeY); + out << SP << "\n//------ " << opName << " " << BinaryOperatorTrait::Name() << " --> " + << ConvertDimShapeToString(fDimShapeY) << "\n"; + auto length = ConvertDimShapeToLength(fDimShapeY); std::string typeName = TensorType::Name(); - // Broadcast A if it's uninitialized - // use broadcasting function where we pass an already allocated tensor to minimize memory allocations - if (fShapeA != fShapeY) { - out << SP << "// Broadcasting uninitialized tensor " << fNA << "\n"; - out << SP << "SOFIE::UTILITY::UnidirectionalBroadcast<" << typeName << ">(tensor_" << fNA << ", " << ConvertShapeToString(fShapeA) << ", " << ConvertShapeToString(fShapeY) - << ", fTensor_" << fNBroadcastedA << ");\n"; + + // we need to check if we can broadcast (case flag has bit 4 set) + + if (fBroadcastFlag & 4) { + // need to check if shapes are the same + auto lengthA = ConvertDimShapeToLength(fDimShapeA); + auto lengthB = ConvertDimShapeToLength(fDimShapeB); + out << SP << "if (" << lengthA << "!=" << lengthB << ") {\n"; + // check if A->B or B->A + // bool broadcastable = true; + for (size_t i = 0; i < fDimShapeY.size(); i++) { + if (fBroadcastFlag & 5 && fDimShapeY[i] == fDimShapeA[i] && fDimShapeA[i].dim > 1 && + fDimShapeB[i].isParam) { + // B->A B[i] needs to be 1 + out << SP << SP << "if (" << fDimShapeB[i] << "!= 1)\n"; + out << SP << SP << SP << "throw std::runtime_error(\"SOFIE - Cannot broadcast B->A in operator " + << opName << "\");\n"; + } + if (fBroadcastFlag & 6 && fDimShapeY[i] == fDimShapeB[i] && fDimShapeB[i].dim > 1 && + fDimShapeA[i].isParam) { + // A-> B A[i] needs to be 1 + out << SP << SP << "if (" << fDimShapeA[i] << "!= 1)\n"; + out << SP << SP << SP << "throw std::runtime_error(\"SOFIE - Cannot broadcast A->B in operator " + << opName << "\");\n"; + } else if (fDimShapeA[i].isParam && fDimShapeB[i].isParam) { + // both shapes are parametric and we broadcast to maximum + // we allocate here output vector + out << SP << SP << "if (" << fDimShapeA[i] << " != " << fDimShapeB[i] << " && (" << fDimShapeA[i] + << " != 1 || " << fDimShapeB[i] << " != 1))\n"; + out << SP << SP << SP << "throw std::runtime_error(\"SOFIE - Cannot broadcast shapes in operator " << opName + << "\");\n"; + } + } + out << SP << "}\n"; + } + + auto stridesA = UTILITY::ComputeStrideFromShape(fDimShapeA); + auto stridesB = UTILITY::ComputeStrideFromShape(fDimShapeB); + auto stridesY = UTILITY::ComputeStrideFromShape(fDimShapeY); + + std::string compute_idx_A, compute_idx_B, compute_idx_Y; + if (fDimShapeA.empty() || + std::all_of(fDimShapeA.begin(), fDimShapeA.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) { + compute_idx_A = "0"; + } else { + for (size_t i = 0; i < fDimShapeA.size(); ++i) { + if (fDimShapeA[i].dim == 1 || fDimShapeA[i].GetVal() == "1") + continue; + compute_idx_A += "idx_" + std::to_string(i + (fDimShapeY.size() - fDimShapeA.size())); + if (stridesA[i].GetVal() != "1") + compute_idx_A += " * " + stridesA[i].GetVal(); + compute_idx_A += " + "; + } + // remove last 3 character " + " + for (int j = 0; j < 3; j++) + compute_idx_A.pop_back(); + } + if (fDimShapeB.empty() || + std::all_of(fDimShapeB.begin(), fDimShapeB.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) { + compute_idx_B = "0"; + } else { + for (size_t i = 0; i < fDimShapeB.size(); ++i) { + if (fDimShapeB[i].dim == 1 || fDimShapeB[i].GetVal() == "1") + continue; + compute_idx_B += "idx_" + std::to_string(i + (fDimShapeY.size() - fDimShapeB.size())); + if (stridesB[i].GetVal() != "1") + compute_idx_B += " * " + stridesB[i].GetVal(); + compute_idx_B += " + "; + } + // remove last 3 character " + " + for (int j = 0; j < 3; j++) + compute_idx_B.pop_back(); } - // Broadcast B if it's uninitialized - if (fShapeB != fShapeY) { - out << SP << "// Broadcasting uninitialized tensor " << fNB << "\n"; - out << SP << "SOFIE::UTILITY::UnidirectionalBroadcast<" << typeName << ">(tensor_" << fNB << ", " << ConvertShapeToString(fShapeB) << ", " << ConvertShapeToString(fShapeY) - << ", fTensor_" << fNBroadcastedB << ");\n"; + int nloop = 0; + if (fDimShapeY.empty() || + std::all_of(fDimShapeY.begin(), fDimShapeY.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) { + compute_idx_Y = "0"; + } else { + for (size_t i = 0; i < fDimShapeY.size(); ++i) { + if (fDimShapeY[i].dim != 1 && fDimShapeY[i].GetVal() != "1") { + nloop++; + for (int j = 0; j < nloop; j++) out << SP; + out << "for (size_t idx_" << i << " = 0; idx_" << i << " < " << fDimShapeY[i] + << "; ++idx_" << i << "){\n"; + compute_idx_Y += "idx_" + std::to_string(i); + if (stridesY[i].GetVal() != "1") + compute_idx_Y += " * " + stridesY[i].GetVal(); + compute_idx_Y += " + "; + } + } + // remove last 3 characters " + " + for (int j = 0; j < 3; j++) + compute_idx_Y.pop_back(); + } + for (int j = 0; j < nloop + 1; j++) out << SP; + out << "tensor_" << fNY << "[" << compute_idx_Y << "] = " + << BinaryOperatorTrait::Op("tensor_" + fNA + "[" + compute_idx_A + "]", + "tensor_" + fNB + "[" + compute_idx_B + "]") + << " ;\n"; + + for (int i = nloop; i > 0; i--) { + for (int j = 0; j < i; j++) out << SP; + out << "}\n"; } - const std::string& nameA = fNBroadcastedA.empty()? fNA : fNBroadcastedA; - const std::string& nameB = fNBroadcastedB.empty()? fNB : fNBroadcastedB; - out << SP << "for (size_t id = 0; id < " << length << " ; id++){\n"; - out << SP << SP << "tensor_" << fNY << "[id] = " << BinaryOperatorTrait::Op( "tensor_" + nameA + "[id]" , "tensor_" + nameB + "[id]") << " ;\n"; - out << SP << "}\n"; return out.str(); } - std::vector GetStdLibs() override { + std::string Generate_GPU_Kernel_ALPAKA() override { + std::string op; + op = "\n//------ BINARY_"+BinaryOperatorTrait::Name()+"_KERNEL_ALPAKA\n"; + op += SP + "struct Binary"+BinaryOperatorTrait::Name()+"Kernel {\n"; + op += SP + SP + "template\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, T const * A, T const * B, T * C, const size_t * shape_A, const size_t * shape_B,\n"; + op += SP + SP + SP + "const size_t * shape_C, const size_t * strides_A, const size_t * strides_B, const size_t * strides_C, size_t ndim) const{\n"; + op += SP + SP + SP + SP + "size_t threadIdx1 = alpaka::getIdx(acc)[0];\n"; + op += SP + SP + SP + SP + "size_t blockIdx1 = alpaka::getIdx(acc)[0];\n"; + op += SP + SP + SP + SP + "size_t blockDim1 = alpaka::getWorkDiv(acc)[0];\n\n"; + op += SP + SP + SP + SP + "size_t outer_dim = blockIdx1;\n"; + op += SP + SP + SP + SP + "size_t inner_start = threadIdx1;\n"; + op += SP + SP + SP + SP + "size_t inner_stride = blockDim1;\n"; + op += SP + SP + SP + SP + "if (outer_dim >= shape_C[0]) return;\n\n"; + op += SP + SP + SP + SP + "size_t idx_A[ndim], idx_B[ndim];\n\n"; + op += SP + SP + SP + SP + "size_t flat_idx_A = 0, flat_idx_B = 0, flat_idx_C = 0;\n\n"; + op += SP + SP + SP + SP + "for(size_t inner = inner_start; inner < shape_C[1]; inner += inner_stride){\n"; + op += SP + SP + SP + SP + "for(size_t tensor_idx=0; tensor_idx GetStdLibs() override + { if (Op == EBasicBinaryOperator::Pow) { - return { std::string("cmath") }; + return {std::string("cmath")}; } else { return {}; } } -}; -}//SOFIE + +}; +} // namespace SOFIE +} // namespace Experimental +} // namespace TMVA -#endif //SOFIE_ROperator_BasicBinary +#endif // TMVA_SOFIE_ROperator_BasicBinary \ No newline at end of file diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx index 7410cf3..e3d0595 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx @@ -48,6 +48,8 @@ namespace SOFIE{ fAttrAlpha(alpha), fAttrBeta(beta), fAttrTransA(transA), fAttrTransB(transB), fNA(UTILITY::Clean_name(nameA)), fNB(UTILITY::Clean_name(nameB)), fNY(UTILITY::Clean_name(nameY)) { + + fKind = OperatorKind::GEMM; fActivation = activation; fType = "float"; static_assert(std::is_same_v, @@ -60,9 +62,11 @@ namespace SOFIE{ fAttrAlpha(alpha), fAttrBeta(beta), fAttrTransA(transA), fAttrTransB(transB), fNA(UTILITY::Clean_name(nameA)), fNB(UTILITY::Clean_name(nameB)), fNC(UTILITY::Clean_name(nameC)), fNY(UTILITY::Clean_name(nameY)), fActivation(activation) { + fKind = OperatorKind::GEMM; fActivation = activation; fType = "float"; + fInputTensorNames = { fNA, fNB, fNC }; fOutputTensorNames = { fNY }; } @@ -252,8 +256,10 @@ namespace SOFIE{ shapeY.erase(shapeY.end()-1); } - if (!fIsDynamic) + if (!fIsDynamic){ model.AddIntermediateTensor(fNY, model.GetTensorType(fNA), shapeY); + std::cout<<"currently adding: "<(tensor_" + out << " float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast(tensor_" << fNC << "," << ConvertShapeToString(fShapeC) << ", " << ConvertDynamicShapeToString(fShapeY) << ");\n"; auto length = SOFIE::ConvertDynamicShapeToLength(fShapeY); // output size out << SP << SP << "std::copy(data, data + " << length << ", tensor_" << fNC2 << ");\n"; @@ -300,12 +306,13 @@ namespace SOFIE{ // include a separate scope to avoid defining unique operator temp variables out << "//--- broadcast bias tensor " << fNC << "for Gemm op\n"; out << SP << "{\n"; - out << " float * data = SOFIE::UTILITY::UnidirectionalBroadcast(tensor_" - << fNC << "," << ConvertShapeToString(fShapeC) << ", " << ConvertDynamicShapeToString(fShapeY) << ");\n"; + out << " float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast(tensor_" + << fNC << ".data()," << ConvertShapeToString(fShapeC) << ", " << ConvertDynamicShapeToString(fShapeY) << ");\n"; auto length = SOFIE::ConvertDynamicShapeToLength(fShapeY); // output size - out << SP << SP << "auto hostBuf_"<< fNC2 << " = alpaka::allocBuf(hostAcc,"+ length +");\n"; + out << SP << SP << "auto hostBuf_"<< fNC2 << " = alpaka::allocBuf(hostAcc, Ext1D::all(Idx{" << length << "}));\n"; out << SP << SP << "std::memcpy(alpaka::getPtrNative(hostBuf_"<< fNC2 <<"), data, "<< length << " * sizeof(float));\n"; - out << SP << SP << "alpaka::memcpy(queue, deviceBuf_"<< fNC2 << ", hostBuf_"<< fNC2 << " , "<< length << ");\n"; + out << SP << SP << "alpaka::memcpy(queue, deviceBuf_"<< fNC2 << ", hostBuf_"<< fNC2 << ");\n"; + out << SP << SP << "delete [] data;\n"; out << SP << "}\n"; } return out.str(); @@ -429,8 +436,8 @@ namespace SOFIE{ throw std::runtime_error("TMVA SOFIE Gemm(MatMul) has invalid shape for inputs or output"); } auto m = (fAttrTransA ? fShapeA[dimA-1].GetVal() : fShapeA[dimA-2].GetVal()); - auto n = (fAttrTransB ? fShapeB[dimB-2].GetVal() : fShapeB[dimB-1].GetVal()); - auto k = (fAttrTransA ? fShapeA[dimA-2].GetVal() : fShapeA[dimA-1].GetVal()); + auto n = (fAttrTransA ? fShapeA[dimA-2].GetVal() : fShapeA[dimA-1].GetVal()); + auto k = (fAttrTransB ? fShapeB[dimB-2].GetVal() : fShapeB[dimB-1].GetVal()); std::vector sY = {fShapeY[dimY-2], fShapeY[dimY-1]}; // extra dimensions in case of stacked MatMul std::vector sA; @@ -445,8 +452,6 @@ namespace SOFIE{ out << SP << "int " << opName << "_k = " << k << ";\n"; out << SP << "float " << opName << "_alpha = " << std::setprecision(std::numeric_limits::max_digits10) << fAttrAlpha << ";\n"; out << SP << "float " << opName << "_beta = " << std::setprecision(std::numeric_limits::max_digits10) << fAttrBeta << ";\n"; - out << SP << "int " << opName << "_lda = " << (fAttrTransA ? m : k) << ";\n"; - out << SP << "int " << opName << "_ldb = " << (fAttrTransB ? k : n) << ";\n"; // case bias is present if (!fNC.empty()){ @@ -479,27 +484,35 @@ namespace SOFIE{ out << SP; } // in the case of bias - if (!fNC.empty()){ - out << SP << "std::copy(" << "tensor_" << fNC2 << ", " << "tensor_" << fNC2 << " + " << lengthGemm << ", " - << "tensor_" << fNY; - if (doStackMul) out << " + " << opName << "_yoffset"; - out << ");\n"; + if (!fNC.empty() && fActivation == EActivationType::RELU){ + out << SP << "blas.gemmrelu("< kokkos_dev_"< kokkos_dev_"< kokkos_dev_"< GetBlasRoutines() override { return { std::string("Gemm"), std::string("Gemv") }; } + std::string GetFusableOutputTensorName() override { + return fNY; + } + + void UpdateFusableTensorName(std::string fusable_tensor_name, const std::function& removal_func){ + removal_func(fNY); + fNY = fusable_tensor_name; + fOutputTensorNames[0] = fNY; + } + + std::string GetBlasConfig(){ + + int64_t dimA = fShapeA.size(); + int64_t dimB = fShapeB.size(); + std::string m = (fAttrTransA ? fShapeA[dimA-1].GetVal() : fShapeA[dimA-2].GetVal()); + std::string n = (fAttrTransA ? fShapeA[dimA-2].GetVal() : fShapeA[dimA-1].GetVal()); + std::string k = (fAttrTransB ? fShapeB[dimB-2].GetVal() : fShapeB[dimB-1].GetVal()); + return m+", "+n+", "+k; + } }; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_LeakyRelu.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_LeakyRelu.hxx index 8fefa6d..66b31e5 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_LeakyRelu.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_LeakyRelu.hxx @@ -75,6 +75,58 @@ public: return out.str(); } + std::string Generate_GPU_Kernel_ALPAKA() override { + std::string op; + op = "\n//------ LEAKY_RELU_KERNEL_ALPAKA\n"; + op += SP + "struct LeakyReluKernel {\n"; + op += SP + SP + "template\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, T* data, std::size_t numElements, T alpha = static_cast(0.01)) const {\n"; + op += SP + SP + SP + "for (auto i : alpaka::uniformElements(acc, numElements)) {\n"; + op += SP + SP + SP + SP + "data[i] = (data[i] < static_cast(0)) ? alpha * data[i] : data[i];\n"; + op += SP + SP + SP + "}\n"; + op += SP + SP + "}\n"; + op += SP + "};\n"; + return op; + } + + std::string Generate_GPU_Kernel_Definitions_ALPAKA() override { + return SP + "LeakyReluKernel leakyReluKernel;\n"; + } + + std::string Generate_GPU_ALPAKA(std::string OpName) override { + OpName = "op_" + OpName; + if (fShape.empty()) { + throw std::runtime_error("TMVA SOFIE Operator LeakyRelu called to Generate without being initialized first"); + } + + std::stringstream out; + auto length = ConvertDynamicShapeToLength(fShape); + out << "\n//------ LEAKY_RELU_GPU_ALPAKA\n"; + out << SP << "alpaka::WorkDivMembers workDiv_" << fNX + << "(alpaka::Vec::all((" << length << " + 256 - 1) / 256), " + << "alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; + + out << SP << "alpaka::exec(queue, workDiv_" << fNX + << ", leakyReluKernel, alpaka::getPtrNative(deviceBuf_" << fNX + << "), static_cast(" << length << "), static_cast(0.01));\n"; + + return out.str(); + } + + + std::string GetFusableOutputTensorName() override { + return fNY; + } + + void UpdateFusableTensorName(std::string fusable_tensor_name, const std::function& removal_func){ + removal_func(fNX); + removal_func(fNY); + fNX = fusable_tensor_name; + fNY = fusable_tensor_name; + fInputTensorNames[0] = fNX; + fOutputTensorNames[0] = fNY; + } + }; }//SOFIE diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx index 12ec337..66c6f2d 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx @@ -24,6 +24,7 @@ public: ROperator_Relu(){} ROperator_Relu(std::string nameX, std::string nameY): fNX(UTILITY::Clean_name(nameX)), fNY(UTILITY::Clean_name(nameY)){ + fKind = OperatorKind::RELU; fInputTensorNames = { fNX }; fOutputTensorNames = { fNY }; } @@ -65,6 +66,23 @@ public: return out.str(); } + std::string Generate_GPU_Kernel_ALPAKA() override { + std::string op; + op = "\n//------ RELU_KERNEL_ALPAKA\n"; + op += SP + "struct ReluKernel{\n"; + op += SP + SP + "template\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, T* data, std::size_t numElements) const {\n"; + op += SP + SP + SP + "for (auto i : alpaka::uniformElements(acc, numElements)) {\n"; + op += SP + SP + SP + "data[i] = (data[i] < 0) ? 0 : data[i];\n"; + op += SP + SP + "}\n"; + op += SP + "}\n};\n"; + return op; + } + + std::string Generate_GPU_Kernel_Definitions_ALPAKA() override { + return SP + "ReluKernel reluKernel;\n"; + } + std::string Generate_GPU_ALPAKA(std::string OpName) override { OpName = "op_" + OpName; if (fShape.empty()) { @@ -73,23 +91,27 @@ public: std::stringstream out; auto length = ConvertDynamicShapeToLength(fShape); out << "\n//------ RELU_GPU_ALPAKA\n"; - out << SP << "{\n"; - out << SP << SP <<"Idx totalElems = "<{\n" - <<"alpaka::workdiv::getValidWorkDiv(devAcc, {totalElems}, true, alpaka::GridBlockExtent::All)\n" - <<"};\n"; - out<< SP << SP << "alpaka::exec(queue, workDiv,\n" - <<"[] ALPAKA_FN_ACC (auto const& acc, auto buf, Idx size) {\n" - <<"Idx const idx = alpaka::getIdx(acc)[0];\n" - <<" if (idx < size) {\n" - <<" auto& x = alpaka::getPtrNative(buf)[idx];\n" - <<" x = x < 0 ? 0 : x;\n" - <<" }\n" - <<"}, bufDev_"<(1);\n"; + // out << SP << "Vec elementsPerGrid_" << fNX << " = static_cast(" << length << ");\n"; + // out << SP << "alpaka::KernelCfg kernelCfg_" << fNX << " = {elementsPerGrid_" << fNX << ", elementsPerThread_" << fNX << "};\n"; + // out << SP << "auto workDiv_" << fNX << " = alpaka::getValidWorkDiv(kernelCfg_" << fNX << ", devAcc, reluKernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), static_cast(" << length << "));\n"; + out << SP << "alpaka::WorkDivMembers workDiv_"<::all("<<(stoi(length)+256-1)/256<<"), alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; + out << SP << "alpaka::exec(queue, workDiv_" << fNX << ", reluKernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), static_cast(" << length << ")); \n"; + return out.str(); + } + + std::string GetFusableOutputTensorName() override { + return fNY; } + void UpdateFusableTensorName(std::string fusable_tensor_name, const std::function& removal_func){ + removal_func(fNX); + removal_func(fNY); + fNX = fusable_tensor_name; + fNY = fusable_tensor_name; + fInputTensorNames[0] = fNX; + fOutputTensorNames[0] = fNY; + } }; }//SOFIE diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx index 68edd01..783e391 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx @@ -61,6 +61,58 @@ public: return out.str(); } + std::string Generate_GPU_Kernel_ALPAKA() override { + std::string op; + op = "\n//------ SIGMOID_KERNEL_ALPAKA\n"; + op += SP + "struct SigmoidKernel {\n"; + op += SP + SP + "template\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, T* data, std::size_t numElements) const {\n"; + op += SP + SP + SP + "for (auto i : alpaka::uniformElements(acc, numElements)) {\n"; + op += SP + SP + SP + SP + "data[i] = static_cast(1) / (static_cast(1) + exp(-data[i]));\n"; + op += SP + SP + SP + "}\n"; + op += SP + SP + "}\n"; + op += SP + "};\n"; + return op; + } + + + std::string Generate_GPU_Kernel_Definitions_ALPAKA() override { + return SP + "SigmoidKernel sigmoidKernel;\n"; + } + + std::string Generate_GPU_ALPAKA(std::string OpName) override { + OpName = "op_" + OpName; + if (fShape.empty()) { + throw std::runtime_error("TMVA SOFIE Operator Sigmoid called to Generate without being initialized first"); + } + + std::stringstream out; + auto length = ConvertDynamicShapeToLength(fShape); + out << "\n//------ SIGMOID_GPU_ALPAKA\n"; + out << SP << "alpaka::WorkDivMembers workDiv_" << fNX + << "(alpaka::Vec::all((" << length << " + 256 - 1) / 256), " + << "alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; + + out << SP << "alpaka::exec(queue, workDiv_" << fNX + << ", sigmoidKernel, alpaka::getPtrNative(deviceBuf_" << fNX + << "), static_cast(" << length << "));\n"; + + return out.str(); + } + + std::string GetFusableOutputTensorName() override { + return fNY; + } + + void UpdateFusableTensorName(std::string fusable_tensor_name, const std::function& removal_func){ + removal_func(fNX); + removal_func(fNY); + fNX = fusable_tensor_name; + fNY = fusable_tensor_name; + fInputTensorNames[0] = fNX; + fOutputTensorNames[0] = fNY; + } + std::vector GetStdLibs() override { return { std::string("cmath") };} }; diff --git a/src/SOFIE_core/src/RModel.cxx b/src/SOFIE_core/src/RModel.cxx index b78ad43..61357e8 100644 --- a/src/SOFIE_core/src/RModel.cxx +++ b/src/SOFIE_core/src/RModel.cxx @@ -386,6 +386,55 @@ void RModel::CheckAndFlushIntermediateMemory(std::span o } } +void RModel::CheckAndFuseOperators() { + size_t idx = 0; + std::vector fusable_indices; + std::string fusable_propagate_tensor_name; + while (idx < fOperators.size()) { + if (fOperators[idx]->GetKind() != OperatorKind::GEMM && fOperators[idx]->GetKind() != OperatorKind::CONV) { + ++idx; + continue; + } + + fusable_indices.clear(); + fusable_propagate_tensor_name.clear(); + + fusable_indices.push_back(idx); + size_t j = idx + 1; + for (; j < fOperators.size()-1; ++j) { + auto opKind = fOperators[j]->GetKind(); + // Only consider operators with fusable kinds + if (!FusableKinds.count(opKind)) { + break; + } + + const auto& tensorName = fOperators[j]->GetFusableOutputTensorName(); + auto freqIt = fIntermediateTensorFrequencyLookup.find(tensorName); + + // Propagate tensor name only if it's not used multiple times + fusable_indices.push_back(j); + if (freqIt != fIntermediateTensorFrequencyLookup.end() && + (freqIt->second != fOperators[j + 1]->GetOpOrder() || + FusableKinds.count(fOperators[j + 1]->GetKind()) == 0)) { + fusable_propagate_tensor_name = tensorName; + break; + } + } + if (!fusable_propagate_tensor_name.empty()) { + auto fusable_tensor_type = GetTensorType(fusable_propagate_tensor_name); + auto fusable_tensor_shape = GetDynamicTensorShape(fusable_propagate_tensor_name); + for (auto& index : fusable_indices) { + fOperators[index]->UpdateFusableTensorName(fusable_propagate_tensor_name, [this](const std::string& name) { + this->RemoveIntermediateTensor(name); + }); + } + AddIntermediateTensor(fusable_propagate_tensor_name, fusable_tensor_type, fusable_tensor_shape); + } + + idx = std::max(idx + 1, j); + } +} + void RModel::Initialize(int batchSize, bool verbose) { @@ -494,7 +543,7 @@ void RModel::Initialize(const std::map & inputParams, bool } i++; } - + CheckAndFuseOperators(); fIsInitialized = true; } @@ -653,14 +702,15 @@ std::string RModel::GenerateInferSignature(bool isdecl) { } } } + rGC += "alpaka::Buf "; } - rGC += "tensor_" + name + ","; + rGC += "deviceBuf_" + name + ","; i_input++; } diff --git a/src/SOFIE_core/src/RModel_ALPAKA.cxx b/src/SOFIE_core/src/RModel_ALPAKA.cxx index d0047c0..549a3dd 100644 --- a/src/SOFIE_core/src/RModel_ALPAKA.cxx +++ b/src/SOFIE_core/src/RModel_ALPAKA.cxx @@ -10,13 +10,14 @@ namespace SOFIE { -//==================================================================== -// RModel - GPU Alpaka Codegen -//==================================================================== - void RModel::GenerateInitializedTensorInfo_GPU_ALPAKA() { - if (!fInitializedTensors.empty()) - fGC += "\n// temporary initialized tensors for loading weights\n"; + if (!fInitializedTensors.empty()){ + fGC += "\n// initialized tensors for weights\n"; + fGC += "using BufF1D = alpaka::Buf;\n"; + fGC += "using BufD1D = alpaka::Buf;\n"; + fGC += "using BufI641D = alpaka::Buf;\n"; + + } for (auto &i : fInitializedTensors) { if (!fUseWeightFile || i.second.IsConstantTensor()) { @@ -29,9 +30,9 @@ void RModel::GenerateInitializedTensorInfo_GPU_ALPAKA() { // case of tensors which are read from a file size_t length = ConvertShapeToLength(i.second.shape()); if (i.second.type() == ETensorType::FLOAT) { - fGC += "auto deviceBuf_" + i.first + - " = alpaka::allocBuf(devAcc, " + - std::to_string(length) + ");\n"; + fGC += "BufF1D deviceBuf_" + i.first + + " = alpaka::allocBuf(devAcc, Ext1D::all(Idx{" + + std::to_string(length) + "}));\n"; } } } @@ -40,7 +41,7 @@ void RModel::GenerateInitializedTensorInfo_GPU_ALPAKA() { void RModel::GenerateTemporaryInitializedTensorContainers_GPU_ALPAKA() { if (!fInitializedTensors.empty()) - fGC += "// initialized tensors\n"; + fGC += "// temporary initialized tensors for loading weights\n"; for (auto &i : fInitializedTensors) { if (!fUseWeightFile || i.second.IsConstantTensor()) { @@ -53,7 +54,7 @@ void RModel::GenerateTemporaryInitializedTensorContainers_GPU_ALPAKA() // case of tensors which are read from a file size_t length = ConvertShapeToLength(i.second.shape()); if (i.second.type() == ETensorType::FLOAT) { - fGC += "float tensor_" + i.first + "[" + std::to_string(length) + "];\n"; + fGC += "std::vector tensor_" + i.first + "(" + std::to_string(length) + ");\n"; } } } @@ -71,23 +72,21 @@ void RModel::GenerateGPU_ALPAKA_Buffers() { ");\n"; // No pointer allocation needed for BOOL } - if (std::find(fOutputTensorNames.begin(), fOutputTensorNames.end(), i.first) == - fOutputTensorNames.end()) { - size_t length = ConvertShapeToLength(i.second.shape); - - if (i.second.type == ETensorType::FLOAT) { - tensor_declaration_block += "auto bufDev_" + i.first + - " = alpaka::allocBuf(devAcc," + - std::to_string(length) + ");\n"; - } else if (i.second.type == ETensorType::DOUBLE) { - tensor_declaration_block += "auto bufDev_" + i.first + - " = alpaka::allocBuf(devAcc," + - std::to_string(length) + ");\n"; - } else if (i.second.type == ETensorType::INT64) { - tensor_declaration_block += "auto bufDev_" + i.first + - " = alpaka::allocBuf(devAcc," + - std::to_string(length) + ");\n"; - } + + size_t length = ConvertShapeToLength(i.second.shape); + + if (i.second.type == ETensorType::FLOAT) { + tensor_declaration_block += "BufF1D deviceBuf_" + i.first + + " = alpaka::allocBuf(devAcc, Ext1D::all(Idx{" + + std::to_string(length) + "}));\n"; + } else if (i.second.type == ETensorType::DOUBLE) { + tensor_declaration_block += "BufD1D deviceBuf_" + i.first + + " = alpaka::allocBuf(devAcc, Ext1D::all(Idx{" + + std::to_string(length) + "}));\n"; + } else if (i.second.type == ETensorType::INT64) { + tensor_declaration_block += "BufI641D deviceBuf_" + i.first + + " = alpaka::allocBuf(devAcc, Ext1D::all(Idx{" + + std::to_string(length) + "}));\n"; } } @@ -123,41 +122,12 @@ void RModel::GenerateDynamicTensorInfo_GPU_ALPAKA() { auto length = ConvertDynamicShapeToLength(i.second.shape); out << SP << "if (" << length << " > 0) {\n"; out << "auto bufDev_" + i.first + - " = alpaka::allocBuf(devAcc," << length << ");\n"; + " = alpaka::allocBuf(devAcc, Ext1D::all(Idx{" << length << "}));\n"; out << SP << "}\n"; } fGC += out.str(); } -namespace { - -std::string createOutputTensor(RModel const &rmodel, std::string const &name, bool isIntermediateTensor) -{ - if(name.empty()) return "{}"; - ETensorType eOutputType = rmodel.GetTensorType(name); - std::string outputType = ConvertTypeToString(eOutputType); - if (isIntermediateTensor) { - - if (eOutputType == ETensorType::BOOL) { - return "fTensor_" + name; - } else { - // need to check is size is the same(don't want to return a vector with larger size) - // in that case better to copy - return "std::vector<" + ConvertTypeToString(eOutputType) + ">(tensor_" + name + ", tensor_" + name + " + " + - std::to_string(ConvertShapeToLength(rmodel.GetTensorShape(name))) + ")"; - } - } - // include also dynamic tensors since the vectors can be allocated with a size larger than their output - // we need a special handling for bool type allocated as vector - auto outputLength = ConvertDynamicShapeToLength(rmodel.GetDynamicTensorShape(name)); - if (rmodel.IsDynamicTensor(name) && eOutputType == ETensorType::BOOL) { - return "std::vector(fTensor_" + name + ".begin(), fTensor_" + name + ".begin() + " + outputLength + ")"; - } - return "std::vector<" + outputType + ">(tensor_" + name + ", tensor_" + name + " + " + outputLength + ")"; -} - -} // namespace - void RModel::GenerateOutput_GPU_ALPAKA() { if (fVerbose) std::cout << "Generating main inference code for " << fName << std::endl; @@ -173,26 +143,9 @@ void RModel::GenerateOutput_GPU_ALPAKA() { fGC += "\n\n"; if (outputSize == 1) { - fGC += "std::vector<" + outputType + ">"; + fGC += "alpaka::Buf"; } else { - for (size_t i = 1; i < outputSize; i++) { - if (GetTensorType(fOutputTensorNames[i]) != eOutputType) - sameOutputTypes = false; - } - if (sameOutputTypes) { - fGC += "std::vector>"; - } else { - inferReturnType = "std::tuple<"; - for (size_t i = 0; i < outputSize; i++) { - inferReturnType += "std::vector<" + - ConvertTypeToString(GetTensorType(fOutputTensorNames[i])) + - ">"; - if (i < outputSize - 1) - inferReturnType += ","; - } - inferReturnType += ">"; - fGC += inferReturnType; - } + throw std::runtime_error("TMVA-SOFIE: multiple output tensors are not supported in ALPAKA code generation"); } fGC += " infer("; @@ -205,21 +158,37 @@ void RModel::GenerateOutput_GPU_ALPAKA() { fGC += (fOperators[op_idx]->Generate_GPU_ALPAKA(std::to_string(op_idx))); } - fGC += SP + "return {"; + fGC += "\n\n alpaka::wait(queue);\n"; + fGC += SP + "return "; + if (outputSize>1) fGC += " {"; for (size_t i = 0; i < outputSize; i++) { std::string tensorName = *(fOutputTensorNames.begin() + i); bool isIntermediate = fIntermediateTensorInfos.count(tensorName) > 0; - fGC += createOutputTensor(*this, tensorName, isIntermediate); + fGC += "deviceBuf_"+tensorName; if (i < outputSize - 1) fGC += ","; } - fGC += "};\n"; + if (outputSize>1) fGC += " };\n"; + else fGC += ";\n"; fGC += "}\n"; // end of infer function scope } void RModel::GenerateSessionCode_GPU_ALPAKA() { + + std::set registered_operators; + + fGC += "\n//--- ALPAKA Kernels\n"; + for (size_t id = 0; id < fOperators.size(); id++) { + std::cout<GetKind())<GetKind()) == registered_operators.end()) { + std::cout<<"Generating ALPAKA kernel for operator"<< std::endl; + fGC += fOperators[id]->Generate_GPU_Kernel_ALPAKA(); + registered_operators.insert(fOperators[id]->GetKind()); + } + } + // define the Session struct (for GNN this is generated in RModel_GNN) - fGC += "template \n;"; + fGC += "\n\ntemplate \n"; if (fUseSession) { if (!fIsSubGraph) fGC += "struct Session {\n\n"; @@ -228,11 +197,25 @@ void RModel::GenerateSessionCode_GPU_ALPAKA() { } // define host and device accelerators - fGC += "using Idx = alpaka::Idx;\n"; - fGC += "using devAcc = alpaka::AccGpuCudaRt, Idx, tagAcc>;\n"; - fGC += "using hostAcc = alpaka::AccCpuSerial, Idx>;\n\n"; + fGC += "using Idx = std::size_t;\n"; + fGC += "using Dim = alpaka::DimInt<1>;\n"; + fGC += "using Acc = alpaka::TagToAcc;\n"; + fGC += "using DevAcc = alpaka::Dev;\n"; + fGC += "using QueueProperty = alpaka::NonBlocking;\n"; + fGC += "using QueueAcc = alpaka::Queue;\n"; + fGC += "\nalpaka::Platform const platform{};\n"; + fGC += "DevAcc devAcc = alpaka::getDevByIdx(platform, 0);\n"; + fGC += "alpaka::PlatformCpu platformHost{};\n"; + fGC += "alpaka::DevCpu hostAcc = alpaka::getDevByIdx(platformHost, 0);\n"; + fGC += "QueueAcc queue{devAcc};\n"; + fGC += "Idx threadsPerBlock = 256;\n"; + fGC += "\nusing Ext1D = alpaka::Vec;\n"; + fGC += "using Vec = alpaka::Vec;\n"; + if (registered_operators.find(SOFIE::OperatorKind::GEMM) != registered_operators.end()) { + fGC += "\n\n// BLAS declarations\n"; + fGC += "sofieBLAS blas{queue};\n"; + } - GenerateInitializedTensorInfo_GPU_ALPAKA(); GenerateGPU_ALPAKA_Buffers(); GenerateOperatorDeclarations(); @@ -282,11 +265,25 @@ void RModel::GenerateSessionCode_GPU_ALPAKA() { for (size_t id = 0; id < fOperators.size(); id++) { fGC += fOperators[id]->GenerateInitCode_GPU_ALPAKA(); + if (fOperators[id]->GetKind() == OperatorKind::GEMM){ + fGC += "\nblas.AddLayoutConfig("+fOperators[id]->GetBlasConfig()+");"; + } } + fGC += "alpaka::wait(queue);\n"; fGC += "}\n\n"; } + registered_operators.clear(); + for (size_t id = 0; id < fOperators.size(); id++) { + std::cout<GetKind())<GetKind()) == registered_operators.end()) { + std::cout<<"Declaring ALPAKA kernel for operator"<< std::endl; + fGC += fOperators[id]->Generate_GPU_Kernel_Definitions_ALPAKA(); + registered_operators.insert(fOperators[id]->GetKind()); + } + } + GenerateOutput_GPU_ALPAKA(); if (fUseSession && !fIsGNNComponent) { @@ -346,17 +343,17 @@ void RModel::MoveInitializedTensorsToBuffers_ALPAKA(){ auto length = ConvertShapeToLength(i.second.shape()); std::string slength = std::to_string(length); if (i.second.type() == ETensorType::FLOAT) { - fGC += " auto hostBuf_"+i.first+" = alpaka::allocBuf(hostAcc,"+ slength+");\n"; - fGC += " std::memcpy(alpaka::getPtrNative(hostBuf_"+i.first+"), tensor_"+i.first+", "+slength+"* sizeof(float));\n"; - fGC += " alpaka::memcpy(queue, deviceBuf_"+i.first+", hostBuf_"+i.first+", "+slength+");\n"; + fGC += " auto hostBuf_"+i.first+" = alpaka::allocBuf(hostAcc, Ext1D::all(Idx{"+ slength+"}));\n"; + fGC += " std::memcpy(alpaka::getPtrNative(hostBuf_"+i.first+"), tensor_"+i.first+".data(), "+slength+"* sizeof(float));\n"; + fGC += " alpaka::memcpy(queue, deviceBuf_"+i.first+", hostBuf_"+i.first+");\n"; } else if (i.second.type() == ETensorType::DOUBLE) { - fGC += " auto hostBuf_"+i.first+" = alpaka::allocBuf(hostAcc,"+ slength+");\n"; - fGC += " std::memcpy(alpaka::getPtrNative(hostBuf_"+i.first+"), tensor_"+i.first+", "+slength+"* sizeof(doub;e));"; - fGC += " alpaka::memcpy(queue, deviceBuf_"+i.first+", hostBuf_"+i.first+", "+slength+");\n"; + fGC += " auto hostBuf_"+i.first+" = alpaka::allocBuf(hostAcc, Ext1D::all(Idx{"+ slength+"}));\n"; + fGC += " std::memcpy(alpaka::getPtrNative(hostBuf_"+i.first+"), tensor_"+i.first+".data(), "+slength+"* sizeof(double));\n"; + fGC += " alpaka::memcpy(queue, deviceBuf_"+i.first+", hostBuf_"+i.first+");\n"; } else if (i.second.type() == ETensorType::INT64) { - fGC += " auto hostBuf_"+i.first+" = alpaka::allocBuf(hostAcc,"+ slength+");\n"; - fGC += " std::memcpy(alpaka::getPtrNative(hostBuf_"+i.first+"), tensor_"+i.first+", "+slength+"* sizeof(int64_t));"; - fGC += " alpaka::memcpy(queue, deviceBuf_"+i.first+", hostBuf_"+i.first+", "+slength+");\n"; + fGC += " auto hostBuf_"+i.first+" = alpaka::allocBuf(hostAcc, Ext1D::all(Idx{" + slength + "}));\n"; + fGC += " std::memcpy(alpaka::getPtrNative(hostBuf_"+i.first+"), tensor_"+i.first+".data(), "+slength+"* sizeof(int64_t));"; + fGC += " alpaka::memcpy(queue, deviceBuf_"+i.first+", hostBuf_"+i.first+");\n"; } else { std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be read from a ROOT file"); } diff --git a/src/SOFIE_core/src/RModel_Base.cxx b/src/SOFIE_core/src/RModel_Base.cxx index f212c53..3b1885c 100644 --- a/src/SOFIE_core/src/RModel_Base.cxx +++ b/src/SOFIE_core/src/RModel_Base.cxx @@ -29,7 +29,7 @@ void RModel_Base::GenerateHeaderInfo(std::string& hgname) { } // for the session we need to include SOFIE_Common functions //needed for convolution operator (need to add a flag) - fGC += "#include \"SOFIE/SOFIE_common.hxx\"\n"; + fGC += "#include \"TMVA/SOFIE_common.hxx\"\n"; if (fUseWeightFile) fGC += "#include \n"; // Include TFile when saving the weights in a binary ROOT file @@ -79,7 +79,7 @@ void RModel_Base::GenerateHeaderInfo_GPU_ALPAKA(std::string& hgname) { // for the session we need to include SOFIE_Common functions //needed for convolution operator (need to add a flag) - fGC += "#include \"SOFIE/SOFIE_common.hxx\"\n"; + fGC += "#include \"TMVA/SOFIE_common.hxx\"\n"; if (fUseWeightFile) fGC += "#include \n"; // Include TFile when saving the weights in a binary ROOT file From afae7c38e9c5b35b5aa04ec1e494df4fd5090bfc Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Fri, 17 Oct 2025 12:53:38 +0200 Subject: [PATCH 06/43] feat: add basic binary kernel --- .../inc/SOFIE/ROperator_BasicBinary.hxx | 116 +++++++++++++++--- 1 file changed, 96 insertions(+), 20 deletions(-) diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx index 756c3b9..09f690c 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx @@ -390,29 +390,105 @@ public: std::string Generate_GPU_Kernel_ALPAKA() override { std::string op; - op = "\n//------ BINARY_"+BinaryOperatorTrait::Name()+"_KERNEL_ALPAKA\n"; + op = "\n//------ "+opName+"_"+BinaryOperatorTrait::Name()+"_KERNEL_ALPAKA\n"; op += SP + "struct Binary"+BinaryOperatorTrait::Name()+"Kernel {\n"; op += SP + SP + "template\n"; - op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, T const * A, T const * B, T * C, const size_t * shape_A, const size_t * shape_B,\n"; - op += SP + SP + SP + "const size_t * shape_C, const size_t * strides_A, const size_t * strides_B, const size_t * strides_C, size_t ndim) const{\n"; - op += SP + SP + SP + SP + "size_t threadIdx1 = alpaka::getIdx(acc)[0];\n"; - op += SP + SP + SP + SP + "size_t blockIdx1 = alpaka::getIdx(acc)[0];\n"; - op += SP + SP + SP + SP + "size_t blockDim1 = alpaka::getWorkDiv(acc)[0];\n\n"; - op += SP + SP + SP + SP + "size_t outer_dim = blockIdx1;\n"; - op += SP + SP + SP + SP + "size_t inner_start = threadIdx1;\n"; - op += SP + SP + SP + SP + "size_t inner_stride = blockDim1;\n"; - op += SP + SP + SP + SP + "if (outer_dim >= shape_C[0]) return;\n\n"; - op += SP + SP + SP + SP + "size_t idx_A[ndim], idx_B[ndim];\n\n"; - op += SP + SP + SP + SP + "size_t flat_idx_A = 0, flat_idx_B = 0, flat_idx_C = 0;\n\n"; - op += SP + SP + SP + SP + "for(size_t inner = inner_start; inner < shape_C[1]; inner += inner_stride){\n"; - op += SP + SP + SP + SP + "for(size_t tensor_idx=0; tensor_idx("; + for (size_t i = 0; i < fDimShapeY.size(); i++) { + op += "size_" + std::to_string(i); + } + op.pop_back(); + op += "));\n"; + op += SP + SP + SP + SP + "for (auto const& elem : elements) {\n"; + auto stridesA = UTILITY::ComputeStrideFromShape(fDimShapeA); + auto stridesB = UTILITY::ComputeStrideFromShape(fDimShapeB); + auto stridesY = UTILITY::ComputeStrideFromShape(fDimShapeY); + + std::string compute_idx_A, compute_idx_B, compute_idx_Y; + if (fDimShapeA.empty() || + std::all_of(fDimShapeA.begin(), fDimShapeA.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) { + compute_idx_A = "0"; + } else { + for (size_t i = 0; i < fDimShapeA.size(); ++i) { + if (fDimShapeA[i].dim == 1 || fDimShapeA[i].GetVal() == "1") + continue; + compute_idx_A += "elem[" + std::to_string(i + (fDimShapeY.size() - fDimShapeA.size())) + "]"; + if (stridesA[i].GetVal() != "1") + compute_idx_A += " * " + stridesA[i].GetVal(); + compute_idx_A += " + "; + } + // remove last 3 character " + " + for (int j = 0; j < 3; j++) + compute_idx_A.pop_back(); + } + if (fDimShapeB.empty() || + std::all_of(fDimShapeB.begin(), fDimShapeB.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) { + compute_idx_B = "0"; + } else { + for (size_t i = 0; i < fDimShapeB.size(); ++i) { + if (fDimShapeB[i].dim == 1 || fDimShapeB[i].GetVal() == "1") + continue; + compute_idx_B += "elem[" + std::to_string(i + (fDimShapeY.size() - fDimShapeB.size())) + "]"; + if (stridesB[i].GetVal() != "1") + compute_idx_B += " * " + stridesB[i].GetVal(); + compute_idx_B += " + "; + } + // remove last 3 character " + " + for (int j = 0; j < 3; j++) + compute_idx_B.pop_back(); + } + int nloop = 0; + if (fDimShapeY.empty() || + std::all_of(fDimShapeY.begin(), fDimShapeY.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) { + compute_idx_Y = "0"; + } else { + for (size_t i = 0; i < fDimShapeY.size(); ++i) { + if (fDimShapeY[i].dim != 1 && fDimShapeY[i].GetVal() != "1") { + nloop++; + for (int j = 0; j < nloop; j++) out << SP; + compute_idx_Y += "elem[" + std::to_string(i) + "]"; + if (stridesY[i].GetVal() != "1") + compute_idx_Y += " * " + stridesY[i].GetVal(); + compute_idx_Y += " + "; + } + } + // remove last 3 characters " + " + for (int j = 0; j < 3; j++) + compute_idx_Y.pop_back(); + } + for (int j = 0; j < nloop + 1; j++) out << SP; + out << "C[" << compute_idx_Y << "] = " + << BinaryOperatorTrait::Op("A[" + compute_idx_A + "]", + "B[" + compute_idx_B + "]") + << " ;\n"; + + for (int i = nloop; i > 0; i--) { + for (int j = 0; j < i; j++) out << SP; + out << "}\n"; + } + } + + std::string Generate_GPU_Kernel_Definitions_ALPAKA() override { + return SP + "Binary"+BinaryOperatorTrait::Name()+"Kernel " + OpName + "Kernel;\n"; + } + + std::string Generate_GPU_ALPAKA(std::string OpName) override { + if (fShape.empty()) { + throw std::runtime_error("TMVA SOFIE Operator Basic Binary called to Generate without being initialized first"); + } + std::stringstream out; + auto length = ConvertDynamicShapeToLength(fShape); + out << "\n//------ "+OpName+"_ALPAKA\n"; + out << SP << "alpaka::WorkDivMembers workDiv_"<::all("<<(stoi(length)+256-1)/256<<"), alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; + out << SP << "alpaka::exec(queue, workDiv_" << fNX << ", " << OpName << "Kernel, alpaka::getPtrNative(deviceBuf_" << fNA << "), alpaka::getPtrNative(deviceBuf_"< GetStdLibs() override @@ -431,4 +507,4 @@ public: } // namespace Experimental } // namespace TMVA -#endif // TMVA_SOFIE_ROperator_BasicBinary \ No newline at end of file +#endif // TMVA_SOFIE_ROperator_BasicBinary From c845fe7116e32063c9f8e9da018c9b1787a7df86 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Fri, 17 Oct 2025 13:01:07 +0200 Subject: [PATCH 07/43] feat: add cast kernel --- src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx | 30 +++++++++++++++++++++ src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx | 4 --- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx index 47c3d66..7532fa1 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx @@ -90,6 +90,36 @@ public: return out.str(); } + std::string Generate_GPU_Kernel_ALPAKA() override { + std::string op; + op = "\n//------ CAST_KERNEL_ALPAKA\n"; + op += SP + "struct CastKernel{\n"; + op += SP + SP + "template\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, SrcT const * src, DstT * dst, std::size_t numElements) const {\n"; + op += SP + SP + SP + "for (auto i : alpaka::uniformElements(acc, numElements)) {\n"; + op += SP + SP + SP + "dst[i] = static_cast(src[i]);\n"; + op += SP + SP + "}\n"; + op += SP + "}\n};\n"; + return op; + } + + std::string Generate_GPU_Kernel_Definitions_ALPAKA() override { + return SP + "CastKernel castKernel;\n"; + } + + std::string Generate_GPU_ALPAKA(std::string OpName) override { + OpName = "op_" + OpName; + if (fShape.empty()) { + throw std::runtime_error("TMVA SOFIE Operator Cast called to Generate without being initialized first"); + } + std::stringstream out; + auto length = ConvertDynamicShapeToLength(fShape); + out << "\n//------ CAST_GPU_ALPAKA\n"; + out << SP << "alpaka::WorkDivMembers workDiv_"<::all("<<(stoi(length)+256-1)/256<<"), alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; + out << SP << "alpaka::exec(queue, workDiv_" << fNX << ", castKernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), static_cast(" << length << ")); \n"; + return out.str(); + } + }; }//SOFIE diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx index 66c6f2d..351f7c4 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx @@ -91,10 +91,6 @@ public: std::stringstream out; auto length = ConvertDynamicShapeToLength(fShape); out << "\n//------ RELU_GPU_ALPAKA\n"; - // out << SP << "Vec elementsPerThread_" << fNX << " = static_cast(1);\n"; - // out << SP << "Vec elementsPerGrid_" << fNX << " = static_cast(" << length << ");\n"; - // out << SP << "alpaka::KernelCfg kernelCfg_" << fNX << " = {elementsPerGrid_" << fNX << ", elementsPerThread_" << fNX << "};\n"; - // out << SP << "auto workDiv_" << fNX << " = alpaka::getValidWorkDiv(kernelCfg_" << fNX << ", devAcc, reluKernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), static_cast(" << length << "));\n"; out << SP << "alpaka::WorkDivMembers workDiv_"<::all("<<(stoi(length)+256-1)/256<<"), alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; out << SP << "alpaka::exec(queue, workDiv_" << fNX << ", reluKernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), static_cast(" << length << ")); \n"; return out.str(); From 3d9f8129ab12c1862831aa200f3c61f722093718 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Wed, 22 Oct 2025 00:36:26 +0200 Subject: [PATCH 08/43] feat: add squeeze, unsqueeze, flatten and reshape --- .../inc/SOFIE/ROperator_Reshape.hxx | 29 +++++++++++++++++++ .../inc/SOFIE/ROperator_Transpose.hxx | 12 ++++++++ 2 files changed, 41 insertions(+) diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Reshape.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Reshape.hxx index 66a7e09..ddb373e 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Reshape.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Reshape.hxx @@ -244,6 +244,35 @@ public: << ");\n"; return out.str(); } + + + std::string Generate_GPU_ALPAKA(std::string opName) override { + if (fIsOutputConstant) return ""; //no op for constant tensors + + OpName = "op_" + OpName; + + // output of reshape is same as input + size_t length = ConvertShapeToLength(fShapeOutput); + if (length != ConvertShapeToLength(fShapeInput)) { + throw std::runtime_error("TMVA SOFIE Reshape Op : wrong output shape - is " + + ConvertShapeToString(fShapeOutput) + " and input is " + + ConvertShapeToString(fShapeInput)); + } + std::stringstream out; + std::string opName = "Reshape"; + if (fOpMode == Flatten) + opName = "Flatten"; + else if (fOpMode == Squeeze) + opName = "Squeeze"; + else if (fOpMode == Unsqueeze) + opName = "Unsquueze"; + + + out << SP << "///-------" << opName << " operator\n" << std::endl; + out << SP << "alpaka::memcpy(queue, deviceBuf_" << fNOutput << ", deviceBuf_" << fNData << ");\n"; + return out.str(); + } + }; }//SOFIE diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx index 11c40bb..5c0f70e 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx @@ -165,6 +165,18 @@ public: return out.str(); } + std::string Generate_GPU_Kernel_ALPAKA() override { + std::string op; + op = "\n//------ TRANSPOSE_KERNEL_ALPAKA\n"; + op += SP + "struct TransposeKernel{\n"; + op += SP + SP + "template\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, T const * input, T const * output, std::size_t * shape, std::size_t * strides) const {\n"; + op += SP + SP + SP + "for (auto i : alpaka::uniformElementsND(acc, shape)) {\n"; + op += SP + SP + SP + SP + "size_t input_idx = 0;\n"; + + return op; + } + }; From 284405e329afef54c114c64a28701b771ba33ca5 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Wed, 22 Oct 2025 01:19:21 +0200 Subject: [PATCH 09/43] feat: add support for basic unary --- .../inc/SOFIE/ROperator_BasicUnary.hxx | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_BasicUnary.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_BasicUnary.hxx index c18c17e..48d699e 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_BasicUnary.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_BasicUnary.hxx @@ -107,6 +107,36 @@ public: return out.str(); } + std::string Generate_GPU_Kernel_ALPAKA() override { + std::string op; + op = "\n//------ " + UnaryOpTraits::Name() + "_KERNEL_ALPAKA\n"; + op += SP + "struct Unary" + UnaryOpTraits::Name() + "Kernel{\n"; + op += SP + SP + "template\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, T* data, std::size_t numElements) const {\n"; + op += SP + SP + SP + "for (auto i : alpaka::uniformElements(acc, numElements)) {\n"; + op += SP + SP + SP + "data[i] = " << UnaryOpTraits::Op("data[i]") << ";\n"; + op += SP + SP + "}\n"; + op += SP + "}\n};\n"; + return op; + } + + std::string Generate_GPU_Kernel_Definitions_ALPAKA() override { + return SP + "Unary" + UnaryOpTraits::Name() + "Kernel " + UnaryOpTraits::Name() + "Kernel;\n"; + } + + std::string Generate_GPU_ALPAKA(std::string OpName) override { + OpName = "op_" + OpName; + if (fShape.empty()) { + throw std::runtime_error("TMVA SOFIE Operator Relu called to Generate without being initialized first"); + } + std::stringstream out; + auto length = ConvertDynamicShapeToLength(fShape); + out << "\n//------ "+OpName+"_ALPAKA\n"; + out << SP << "alpaka::WorkDivMembers workDiv_"<::all("<<(stoi(length)+256-1)/256<<"), alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; + out << SP << "alpaka::exec(queue, workDiv_" << fNX << ", " << UnaryOpTraits::Name() << "Kernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), static_cast(" << length << ")); \n"; + return out.str(); + } + std::vector GetStdLibs() override { if (Op == EBasicUnaryOperator::kSqrt || Op == EBasicUnaryOperator::kExp || Op == EBasicUnaryOperator::kLog) { return { std::string("cmath") }; From d64a40ff6261bdbd352e10ffb9da7f82b2dd6b9f Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Wed, 22 Oct 2025 01:21:16 +0200 Subject: [PATCH 10/43] feat: add support for Constant operator --- src/SOFIE_core/inc/SOFIE/ROperator_Constant.hxx | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Constant.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Constant.hxx index 0d08432..6590909 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Constant.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Constant.hxx @@ -101,6 +101,11 @@ public: // no code to generate here. Tensor are defined in Session constructor return "//---------------------------------------\n"; } + + std::string Generate_GPU_ALPAKA(std::string /* OpName */) override { + // no code to generate here. Tensor are defined in Session constructor + return "//---------------------------------------\n"; + } }; }//SOFIE From ac8d6628204b19fd691017bef0308633f86670ee Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Wed, 22 Oct 2025 17:44:08 +0200 Subject: [PATCH 11/43] feat: add support for shape operator --- src/SOFIE_core/inc/SOFIE/ROperator_Shape.hxx | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Shape.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Shape.hxx index 52bdeae..34e69eb 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Shape.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Shape.hxx @@ -101,6 +101,26 @@ public: return out.str(); } + std::string Generate_GPU_ALPAKA(std::string OpName) override { + // no need to generate code if the output is constant + if (fIsOutputConstant) return ""; + + OpName = "op_" + OpName; + if (fShape.empty()) { + throw std::runtime_error("TMVA SOFIE Shape op called to Generate without being initialized first"); + } + std::stringstream out; + + out << "\n//------ Shape\n"; + // add a dummy statement to avoid warning for unused input + out << SP << "(void) deviceBuf_" << fNX << ";\n"; + size_t length = ConvertShapeToLength(fOutput_shape); + for (size_t id = 0; id < length; id++) { + out << SP << "deviceBuf_" << fNY << "["<< id << "] = " << fShape[fStart+id] << ";\n"; + } + return out.str(); + } + }; }//SOFIE From d75eac3375f1f372036d48f3e828473614a538fe Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Thu, 20 Nov 2025 14:03:58 +0100 Subject: [PATCH 12/43] feat: add support for Basic Binary operations --- .../inc/SOFIE/ROperator_BasicBinary.hxx | 6 +- src/SOFIE_core/inc/SOFIE/ROperator_Expand.hxx | 54 ++++++++++++++ src/SOFIE_core/inc/SOFIE/ROperator_Gather.hxx | 71 +++++++++++++++++++ src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx | 66 +++++++++++++++++ src/SOFIE_core/inc/SOFIE/ROperator_Tile.hxx | 50 +++++++++++++ .../test/EmitFromONNX_GPU_ALPAKA.cxx.in | 24 +++++++ 6 files changed, 268 insertions(+), 3 deletions(-) create mode 100644 src/SOFIE_core/test/EmitFromONNX_GPU_ALPAKA.cxx.in diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx index 09f690c..8af8e3a 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx @@ -484,10 +484,10 @@ public: throw std::runtime_error("TMVA SOFIE Operator Basic Binary called to Generate without being initialized first"); } std::stringstream out; - auto length = ConvertDynamicShapeToLength(fShape); + auto length = ConvertDynamicShapeToLength(fShapeY); out << "\n//------ "+OpName+"_ALPAKA\n"; - out << SP << "alpaka::WorkDivMembers workDiv_"<::all("<<(stoi(length)+256-1)/256<<"), alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; - out << SP << "alpaka::exec(queue, workDiv_" << fNX << ", " << OpName << "Kernel, alpaka::getPtrNative(deviceBuf_" << fNA << "), alpaka::getPtrNative(deviceBuf_"< workDiv_"<::all("<<(stoi(length)+256-1)/256<<"), alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; + out << SP << "alpaka::exec(queue, workDiv_" << fNY << ", " << OpName << "Kernel, alpaka::getPtrNative(deviceBuf_" << fNA << "), alpaka::getPtrNative(deviceBuf_"<\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, T const * input, T * output, const size_t * input_shape, const size_t * output_shape, const size_t * input_strides, const size_t * output_strides, const size_t ndim){\n"; + op += SP + SP + SP + SP + "size_t input_idx = 0;\n"; + op += SP + SP + SP + SP + "size_t output_idx = 0;\n"; + op += SP + SP + SP + SP + "size_t coord_out;\n"; + op += SP + SP + SP + SP + "size_t coord_in;\n"; + op += SP + SP + SP + SP + "auto elements = alpaka::uniformElementsND(acc, alpaka::Vec(output_shape));\n"; + op += SP + SP + SP + SP + "for (auto const& elem : elements) {\n"; + op += SP + SP + SP + SP + "input_idx = 0;\n"; + op += SP + SP + SP + SP + "output_idx = 0;\n"; + op += SP + SP + SP + SP + "for (int i = 0; i < ndim; ++i) {\n"; + op += SP + SP + SP + SP + SP + "coord_out = elem[i];\n"; + op += SP + SP + SP + SP + SP + "coord_in = (input_shape[i] == 1) ? 0 : coord_out;\n"; + op += SP + SP + SP + SP + SP + "input_idx += coord_in * input_strides[i];\n}\n"; + op += SP + SP + SP + SP + SP + "output_idx += coord_out * output_strides[i];\n}\n"; + op += SP + SP + SP + SP + SP + "output[output_idx] = input[input_idx];\n"; + op += SP + SP + SP + SP + "}\n"; + op += SP + SP + "}\n"; + op += SP + "};\n"; + return op; + } + + std::string Generate_GPU_Kernel_Definitions_ALPAKA() override { + return SP + "ExpandKernel expandKernel;\n"; + } + + std::string Generate_GPU_ALPAKA(std::string OpName) override { + OpName = "op_" + OpName; + if (fShape.empty()) { + throw std::runtime_error("TMVA SOFIE Operator Expand called to Generate without being initialized first"); + } + + std::stringstream out; + auto length = ConvertDynamicShapeToLength(fShape); + out << "\n//------ EXPAND_GPU_ALPAKA\n"; + out << SP << "alpaka::WorkDivMembers workDiv_" << fNX + << "(alpaka::Vec::all((" << length << " + 256 - 1) / 256), " + << "alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; + + out << SP << "alpaka::exec(queue, workDiv_" << fNX + << ", expandKernel, alpaka::getPtrNative(deviceBuf_" << fNX + << "), alpaka::getPtrNative(deviceBuf_" << fNY + << "), "<< UTILITY::ConvertShapeToString(fShapeX) <<", "<\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, T const * input, T const * indices, T * output, std::size_t const * output_shape, std::size_t const axis, std::size_t const axisDim, std::size_t const indicesNumElements, std::size_t const * output_strides, std::size_t const * input_strides, std::size_t const ndim) const {\n"; + op += SP + SP + SP + SP + "auto elements = alpaka::uniformElementsND(acc, alpaka::Vec(output_shape));\n"; + op += SP + SP + SP + SP + "for (auto const& elem : elements) {\n"; + + // find flattened index for indices tensor + op += SP + SP + SP + SP + "int64_t idxLinear = 0;\n{\n"; + op += SP + SP + SP + SP + SP + "int64_t stride = 1;\n"; + op += SP + SP + SP + SP + SP + "for (int i = ndim - 1; i >= axis; --i) {;\n"; + op += SP + SP + SP + SP + SP + "stride *= (i > axis ? output_shape[i] : 1);\n}\n"; + op += SP + SP + SP + SP + SP + "idxLinear = elem[axis];\n"; + op += SP + SP + SP + SP + SP + "if (idxLinear >= indicesNumElements) idxLinear %= indicesNumElements;\n}\n"; + + // load gather index and wrap negative if any + op += SP + SP + SP + SP + "int64_t k = indices[idxLinear];\n"; + op += SP + SP + SP + SP + "if (k < 0) k += axisDim;\n"; + op += SP + SP + SP + SP + "if (k < 0) k = 0;\n"; + op += SP + SP + SP + SP + "if (k >= axisDim) k = axisDim - 1;\n"; + + // compute input flattened index + op += SP + SP + SP + SP + "size_t input_idx = 0;\n"; + op += SP + SP + SP + SP + "size_t output_idx = 0;\n"; + op += SP + SP + SP + SP + "for (int i = 0; i < ndim; ++i) {\n"; + op += SP + SP + SP + SP + SP + "size_t coord = elem[i];\n"; + op += SP + SP + SP + SP + SP + "output_idx += coord * output_strides[i];\n}\n"; + op += SP + SP + SP + SP + SP + "if (i == axis) coord = k;\n"; + op += SP + SP + SP + SP + SP + "input_idx += coord * input_strides[i];\n}\n"; + + // write to output tensor + op += SP + SP + SP + SP + "output[output_idx] = input[input_idx];\n"; + op += SP + SP + SP + SP + "}\n"; + op += SP + SP + "}\n"; + op += SP + "};\n"; + + return op; + } + + std::string Generate_GPU_Kernel_Definitions_ALPAKA() override { + return SP + "GatherKernel gatherKernel;\n"; + } + + std::string Generate_GPU_ALPAKA(std::string OpName) override { + OpName = "op_" + OpName; + if (fShape.empty()) { + throw std::runtime_error("TMVA SOFIE Operator Gather called to Generate without being initialized first"); + } + + std::stringstream out; + auto length = ConvertDynamicShapeToLength(fShapeY); + out << "\n//------ GATHER_GPU_ALPAKA\n"; + out << SP << "alpaka::WorkDivMembers workDiv_" << fNY + << "(alpaka::Vec::all((" << length << " + 256 - 1) / 256), " + << "alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; + + out << SP << "alpaka::exec(queue, workDiv_" << fNY + << ", gatherKernel, alpaka::getPtrNative(deviceBuf_" << fNX + << "), alpaka::getPtrNative(deviceBuf_" << fNIndices + << "), alpaka::getPtrNative(deviceBuf_" << fNY + << "), "<< UTILITY::ConvertShapeToString(fShapeY) <<", "<< fAttrAxis <<", "<< fShapeX[fAttrAxis] <<", " + << fShapeIndices.size() <<", " + << UTILITY::ConvertShapeToString(ComputeStrideFromShape(fShapeY)) <<", " + << UTILITY::ConvertShapeToString(ComputeStrideFromShape(fShapeX)) <<", "<< fShapeY.size() + << ",static_cast(" << length << "));\n"; + + return out.str(); + } + }; }//SOFIE diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx index 63fbcb3..0fd8f5a 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx @@ -153,6 +153,72 @@ public: return out.str(); } + std::string Generate_GPU_Kernel_ALPAKA() override { + std::string op; + op = "\n//------ SPLIT_KERNEL_ALPAKA\n"; + op += SP + "struct SplitKernel {\n"; + op += SP + SP + "template\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, T const * input, T * output,"; + op += "std::size_t const * input_strides, std::size_t const * output_strides, std::size_t const split_axis, "; + op += "std::size_t const axis_offset, std::size_t const ndim) const {\n"; + op += SP + SP + SP + SP + "auto elements = alpaka::uniformElementsND(acc, alpaka::Vec(output_shape));\n"; + op += SP + SP + SP + SP + "for (auto const& elem : elements) {\n"; + op += SP + SP + SP + SP + SP + "size_t input_idx = 0;\n"; + op += SP + SP + SP + SP + SP + "size_t output_idx = 0;\n"; + op += SP + SP + SP + SP + SP + "for (int i = 0; i < ndim; ++i) {\n"; + op += SP + SP + SP + SP + SP + SP + "size_t output_coord = elem[i];\n"; + op += SP + SP + SP + SP + SP + SP + "size_t input_coord = (i == split_axis) ? (output_coord + axis_offset) : output_coord;\n"; + op += SP + SP + SP + SP + SP + SP + "input_idx += input_coord * input_strides[i];\n"; + op += SP + SP + SP + SP + SP + SP + "output_idx += output_coord * output_strides[i];\n}\n"; + op += SP + SP + SP + SP + SP + "output[output_idx] = input[input_idx];\n"; + op += SP + SP + SP + SP + "}\n"; + op += SP + SP + "}\n"; + op += SP + "};\n"; + + return op; + } + + std::string Generate_GPU_Kernel_Definitions_ALPAKA() override { + return SP + "SplitKernel splitKernel;\n"; + } + + std::string Generate_GPU_ALPAKA(std::string OpName) override { + OpName = "op_" + OpName; + if (fShape.empty()) { + throw std::runtime_error("TMVA SOFIE Operator Split called to Generate without being initialized first"); + } + + std::stringstream out; + out << "\n//------ SPLIT_GPU_ALPAKA\n"; + + bool axis_is_innermost = (axis == static_cast(fInputShape.size()) - 1) + && (UTILITY::ComputeStridesFromShape(fInputShape)[fInputShape.size()-1] == 1); + out << SP <<"size_t "<(" << length << ") * sizeof(float);\n"; + out << SP << SP << SP << "alpaka::memcpy(queue, "< workDiv_" << fNYs[i] + << "(alpaka::Vec::all((" << length << " + 256 - 1) / 256), " + << "alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; + + out << SP << "alpaka::exec(queue, workDiv_" << fNYs[i] + << ", splitKernel, alpaka::getPtrNative(deviceBuf_" << fNX + << "), alpaka::getPtrNative(deviceBuf_" << fNY + << "), "<< UTILITY::ConvertShapeToString(UTILITY::ComputeStrideFromShape(fInputShape)) <<", "<\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, T const * __restrict__ tensor_X,"; + op += SP + SP + SP + "T * __restrict__ tensor_Y, const int64_t * __restrict__ shape_X,"; + op += SP + SP + SP + "const int64_t * __restrict__ stride_X, const int64_t * __restrict__ shape_Y,"; + op += SP + SP + SP + "const int64_t * __restrict__ stride_Y, std::size_t const ndim) const {\n"; + op += SP + SP + SP + SP + "auto elements = alpaka::uniformElementsND(acc, alpaka::Vec(shape_Y));\n"; + op += SP + SP + SP + SP + "for (auto const& elem: elements) {\n"; + op += SP + SP + SP + SP + SP + "size_t input_idx = 0;\n"; + op += SP + SP + SP + SP + SP + "size_t output_idx = 0;\n"; + op += SP + SP + SP + SP + SP + "for (int i = 0; i < ndim; ++i) {\n"; + op += SP + SP + SP + SP + SP + SP + "size_t input_coord = elem[i] % shape_X[i];\n"; + op += SP + SP + SP + SP + SP + SP + "input_idx += input_coord * stride_X[i];\n"; + op += SP + SP + SP + SP + SP + "output_idx += elem[i] * stride_Y[i];\n}\n"; + op += SP + SP + SP + SP + SP + "tensor_Y[output_idx] = tensor_X[input_idx];\n"; + op += SP + SP + SP + SP + "}\n"; + op += SP + SP + "}\n"; + op += SP + "};\n"; + return op; + } + + std::string Generate_GPU_Kernel_Definitions_ALPAKA() override { + return SP + "TileKernel tileKernel;\n"; + } + + std::string Generate_GPU_ALPAKA(std::string OpName) override { + OpName = "op_" + OpName; + if (fShape.empty()) { + throw std::runtime_error("TMVA SOFIE Operator Tile called to Generate without being initialized first"); + } + std::stringstream out; + auto length = ConvertDynamicShapeToLength(fShapeY); + out << "\n//------ TILE_GPU_ALPAKA\n"; + out << SP << "alpaka::WorkDivMembers workDiv_" << fNY + << "(alpaka::Vec::all((" << length << " + 256 - 1) / 256), " + << "alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; + + out << SP << "alpaka::exec(queue, workDiv_" << fNY + << ", tileKernel, alpaka::getPtrNative(deviceBuf_" << fNInput + << "), alpaka::getPtrNative(deviceBuf_" << fNY + << "), "<< UTILITY::ConvertShapeToString(fShapeInput)<<", "<< UTILITY::ConvertShapeToString(UTILITY::ComputeStrideFromShape(fShapeInput)) <<", " + < Date: Sun, 23 Nov 2025 14:41:09 +0100 Subject: [PATCH 13/43] fix: compilation issues due to faulty rebase --- src/SOFIE_core/inc/SOFIE/RModel.hxx | 40 ++- src/SOFIE_core/inc/SOFIE/RModel_Base.hxx | 46 +++ src/SOFIE_core/inc/SOFIE/ROperator.hxx | 34 +- .../inc/SOFIE/ROperator_BasicBinary.hxx | 44 ++- .../inc/SOFIE/ROperator_BasicUnary.hxx | 11 +- .../SOFIE/ROperator_BatchNormalization.hxx | 6 +- src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx | 8 +- src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx | 120 +++++-- .../inc/SOFIE/ROperator_ConvTranspose.hxx | 6 +- src/SOFIE_core/inc/SOFIE/ROperator_Expand.hxx | 10 +- src/SOFIE_core/inc/SOFIE/ROperator_Gather.hxx | 18 +- src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx | 20 +- .../SOFIE/ROperator_LayerNormalization.hxx | 15 +- .../inc/SOFIE/ROperator_LeakyRelu.hxx | 6 +- src/SOFIE_core/inc/SOFIE/ROperator_Range.hxx | 15 +- src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx | 8 +- .../inc/SOFIE/ROperator_Reshape.hxx | 333 +++++++++++------ .../inc/SOFIE/ROperator_Sigmoid.hxx | 6 +- src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx | 18 +- src/SOFIE_core/inc/SOFIE/ROperator_Tile.hxx | 12 +- .../inc/SOFIE/ROperator_Transpose.hxx | 2 +- src/SOFIE_core/inc/SOFIE/ROperator_Where.hxx | 110 ++++-- src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx | 161 +++++++-- src/SOFIE_core/src/RModel.cxx | 338 ++++++++++++------ src/SOFIE_core/src/RModel_ALPAKA.cxx | 4 +- src/SOFIE_core/src/RModel_Base.cxx | 4 +- src/SOFIE_core/src/RModel_GNN.cxx | 4 +- .../src/RModel_GraphIndependent.cxx | 6 +- src/SOFIE_core/src/SOFIE_common.cxx | 230 ++++++++---- src/SOFIE_core/test/CMakeLists.txt | 15 + .../test/TestCustomModelsFromONNX.cxx | 10 +- 31 files changed, 1134 insertions(+), 526 deletions(-) diff --git a/src/SOFIE_core/inc/SOFIE/RModel.hxx b/src/SOFIE_core/inc/SOFIE/RModel.hxx index 6083e47..09feb17 100644 --- a/src/SOFIE_core/inc/SOFIE/RModel.hxx +++ b/src/SOFIE_core/inc/SOFIE/RModel.hxx @@ -16,14 +16,21 @@ private: int fVerbose = 0; int fBatchSize = -1; long fReadPos = 0; // reading file position + size_t fConstantTensorSize = 0; // size (in Bytes) of the allocated constant tensors + size_t fWeightsTensorSize = 0; // size (in Bytes) of the allocated weight tensors + size_t fOtherTensorSize = 0; // size (in Bytes) of intermediate tensors which are not managed by the memory pool + + OptimizationLevel fOptimizationLevel = OptimizationLevel::kExtended; std::unordered_map fInputTensorInfos; // input tensors where shape may not fully defined or other graph inputs? std::unordered_map fReadyInputTensorInfos; // input tensors where shape is full defined std::unordered_map fInitializedTensors; std::unordered_map fIntermediateTensorInfos; std::unordered_map fDynamicTensorInfos; + std::unordered_map, bool>> fShapeTensors; // constant tensors describing a shape std::unordered_map fShapeParams; // parameters defining the dynamic shape (e.g. batch size), store also its default value + std::vector fDimShapeNames; // parameter names used to define the shapes std::vector fOutputTensorNames; std::vector fInputTensorNames; // input tensor names using ONNX order @@ -59,8 +66,13 @@ public: int Verbose() const { return fVerbose;} const std::vector &GetTensorShape(std::string name) const; - std::vector GetDynamicTensorShape(std::string name) const; + std::vector GetDimTensorShape(const std::string & name) const; const ETensorType &GetTensorType(std::string name) const; + std::vector GetDynamicTensorShape(const std::string & name) const ; + + // get the values for the tensor representing a shape + const std::vector & GetShapeTensorValues(const std::string & tensor_name) const; + bool CheckIfTensorAlreadyExist(std::string tensor_name); void AddInputTensorInfo(std::string input_name, ETensorType type, std::vector shape); @@ -102,6 +114,8 @@ public: AddInitializedTensor(tensor_name, GetTemplatedType(T()), shape, data); } + void AddShapeTensor(const std::string & name, const std::vector & shapeValues, bool scalar = false); + // add and initialize subgraph to the model void InitializeSubGraph(std::shared_ptr graph); @@ -118,6 +132,8 @@ public: bool IsDimInputTensor(const std::string &name) const; // check if tensor is a fully specified input tensor bool IsReadyInputTensor(const std::string &name) const; + /// check if a tensor is a shape tensor + bool IsShapeTensor(const std::string & name) const; // Add intermediate tensor void AddIntermediateTensor(std::string tensor_name, ETensorType type, std::vector dim_shape); @@ -132,6 +148,8 @@ public: void UpdateInitializedTensor(std::string tensor_name, ETensorType type, std::vector shape, std::shared_ptr data); std::shared_ptr GetInitializedTensorData(std::string tensor_name); + template + std::vector GetTensorData(const std::string & name); void Initialize(int batchSize = -1, bool verbose = false); void Initialize(const std::map & inputParams, bool verbose = false); @@ -155,8 +173,8 @@ public: } // calculate total intermediate memory and position intermediate tensor addresses - std::string AllocateIntermediateMemory(std::span op_output_tensors); - void CheckAndFlushIntermediateMemory(std::span op_output_tensors, const size_t& op_idx); + std::string AllocateIntermediateMemory(std::span op_output_tensors); + void CheckAndFlushIntermediateMemory(std::span op_output_tensors, const size_t& op_idx); protected: // internal functions @@ -194,6 +212,7 @@ protected: public: const std::vector &GetInputTensorNames() const { return fInputTensorNames; } const std::vector &GetOutputTensorNames() const { return fOutputTensorNames; } + const std::vector & GetDimShapeNames() const { return fDimShapeNames; } void ReadInitializedTensorsFromFile(long); long WriteInitializedTensorsToFile(std::string filename = ""); @@ -228,6 +247,21 @@ public: ClassDefNV(RModel, 3); }; +template +inline std::vector RModel::GetTensorData(const std::string & name) { + if (!IsInitializedTensor(name)) return std::vector{}; + T * data = static_cast(GetInitializedTensorData(name).get()); + size_t size = ConvertShapeToLength(GetTensorShape(name)); + return std::vector(data, data+size); +} + +template<> +inline std::vector RModel::GetTensorData(const std::string & name) { + if (!IsShapeTensor(name)) return std::vector{}; + return GetShapeTensorValues(name); +} + + } // namespace SOFIE #endif // SOFIE_RMODEL diff --git a/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx b/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx index 073c6bc..460372a 100644 --- a/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx +++ b/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx @@ -27,6 +27,15 @@ enum class Options { kGNNComponent = 0x10, }; +// Optimization levels inspired by ONNXRuntime. +// We only get Operator Fusion with the Basic, and +// memory reuse with Extended. kExtended is enabled +// by default +enum class OptimizationLevel { + kBasic = 0x0, + kExtended = 0x1, +}; + enum class WeightFileType { None, RootBinary, Text }; @@ -60,6 +69,43 @@ protected: bool fIsGNN = false; bool fIsGNNComponent = false; + // Function to generate the code for declaring and initializing constant tensors + // This is for tensors which are not part of weight files and can be created from the Constant operator + template + std::string GenerateConstantTensorCode(const std::pair &t) + { + std::stringstream strs; + std::string type = ConvertTypeToString(t.second.type()); + size_t length = ConvertShapeToLength(t.second.shape()); + // avoid using stack sizes for constant tensors to reduce compilation time + bool allocateOnStack = (length > 100) ? false : true; + + const T *data = t.second.data(); + + // and check if all values are the same + bool sameData = false; + // for non stack allocation check if data are the same + if (!allocateOnStack && length > 1) { + size_t idx = 1; + do { + sameData = (data[idx] == data[idx - 1]); + idx++; + } while (sameData && idx < length); + } + if (allocateOnStack) { + strs << type << " tensor_" << t.first << "[" << length << "] = " << ConvertValuesToString(length, data) << ";\n"; + } else { + strs << "std::vector<" << type << "> fTensor_" << t.first << " = "; + if (sameData) + strs << "std::vector<" << type << ">(" << length << ", " << ConvertValToString(data[0]) << ");\n"; + else { + strs << ConvertValuesToString(length, data) << ";\n"; + } + strs << "const " << type << " * tensor_" + t.first + " = fTensor_" + t.first + ".data();\n"; + } + return strs.str(); + } + public: /** Default constructor. Needed to allow serialization of ROOT objects. See diff --git a/src/SOFIE_core/inc/SOFIE/ROperator.hxx b/src/SOFIE_core/inc/SOFIE/ROperator.hxx index 0ad57b3..9bccc5b 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator.hxx @@ -1,14 +1,11 @@ -#ifndef SOFIE_ROPERATOR -#define SOFIE_ROPERATOR +#ifndef TMVA_SOFIE_ROPERATOR +#define TMVA_SOFIE_ROPERATOR #include #include #include #include "SOFIE/SOFIE_common.hxx" -//#include "RModel.hxx" - - namespace SOFIE{ @@ -31,10 +28,10 @@ inline const char* toString(OperatorKind kind) { case OperatorKind::GEMM: return "GEMM"; case OperatorKind::LAYERNORM: return "LAYERNORM"; case OperatorKind::RELU: return "RELU"; - case OperatorKind::CONSTANT: return "CONSTANT"; - case OperatorKind::CONSTANTOFSHAPE: return "CONSTANTOFSHAPE"; - case OperatorKind::BATCHNORM: return "batchnorm"; - case OperatorKind::CONV: return "conv"; + case OperatorKind::CONSTANT: return "CONSTANT"; + case OperatorKind::CONSTANTOFSHAPE: return "CONSTANTOFSHAPE"; + case OperatorKind::BATCHNORM: return "BATCHNORM"; + case OperatorKind::CONV: return "CONV"; case OperatorKind::UNDEFINED: return "UNDEFINED"; default: return "UNKNOWN"; } @@ -48,11 +45,11 @@ class ROperator{ public: virtual std::vector GetBlasRoutines() { return {}; } virtual std::vector GetStdLibs() { return {}; } - virtual std::vector> ShapeInference(std::vector>) = 0; - virtual std::vector TypeInference(std::vector) = 0; + virtual std::vector> ShapeInference(std::vector>) { return {}; }; + virtual std::vector TypeInference(std::vector) { return {}; }; virtual void Initialize(RModel&) = 0; virtual std::string Generate(std::string OpName) = 0; //expect unique opName for each operator within the same RModel - virtual std::string Generate_GPU_ALPAKA(std::string OpName){ return "";} //expect unique opName for each operator within the same RModel + virtual std::string Generate_GPU_ALPAKA(std::string OpName){ return "";} //expect unique opName for each operator within the same RModel // generate initialization code for session constructor virtual std::string GenerateInitCode() { return "";} virtual std::string GenerateInitCode_GPU_ALPAKA() { return "";}; @@ -60,10 +57,9 @@ public: virtual std::string GenerateDeclCode() { return "";} // generate session data members specific to operator virtual std::string GenerateSessionMembersCode(std::string /*opName*/) { return ""; } - virtual std::string Generate_GPU_Kernel_ALPAKA() { return ""; } - virtual std::string Generate_GPU_Kernel_Definitions_ALPAKA() { return ""; } + virtual std::string Generate_GPU_Kernel_ALPAKA(std::string /*opName*/) { return ""; } + virtual std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string /*opName*/) { return ""; } virtual std::string Header() { return "";} - virtual std::string GetFusableOutputTensorName() { return "";} virtual std::string GetBlasConfig() { return ""; } virtual void UpdateFusableTensorName(std::string, const std::function& removal_func){ return;}; @@ -78,7 +74,8 @@ protected: const std::string SP = " "; ///< space used to correctly indent the generated C++ code bool fUseSession = false; ///< flag to identify if using the session class bool fIsOutputConstant = false; ///< flag to identify if operator has a constant output (no need to generate code) - + bool fIsOutputParamShape = false; ///< flag to identify of the output represents a parametric shape (can be knwon at compile time) + mutable std::vector fInputTensorNames; mutable std::vector fOutputTensorNames; @@ -99,12 +96,11 @@ public: size_t GetOpOrder(){ return fOpOrder; } - + }; }//SOFIE - -#endif //SOFIE_OPERATOR +#endif //TMVA_SOFIE_OPERATOR diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx index 8af8e3a..da7cf63 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx @@ -1,14 +1,12 @@ #ifndef TMVA_SOFIE_ROperator_BasicBinary #define TMVA_SOFIE_ROperator_BasicBinary -#include "TMVA/SOFIE_common.hxx" -#include "TMVA/ROperator.hxx" -#include "TMVA/RModel.hxx" +#include "SOFIE/SOFIE_common.hxx" +#include "SOFIE/ROperator.hxx" +#include "SOFIE/RModel.hxx" #include -namespace TMVA { -namespace Experimental { namespace SOFIE { enum EBasicBinaryOperator { @@ -106,7 +104,7 @@ public: } int dynamicInputs = 0; if (model.IsDynamicTensor(fNA)) { - fDimShapeA = model.GetDynamicTensorShape(fNA); + fDimShapeA = model.GetDimTensorShape(fNA); dynamicInputs |= 1; } else { fShapeA = model.GetTensorShape(fNA); @@ -114,17 +112,17 @@ public: } if (model.IsDynamicTensor(fNB)) { dynamicInputs |= 2; - fDimShapeB = model.GetDynamicTensorShape(fNB); + fDimShapeB = model.GetDimTensorShape(fNB); } else { fShapeB = model.GetTensorShape(fNB); fDimShapeB = ConvertShapeToDim(fShapeB); } if (dynamicInputs & 1 && model.Verbose()) std::cout << BinaryOperatorTrait::Name() << " : input " << fNA << " is dynamic " - << ConvertShapeToString(fDimShapeA) << " "; + << ConvertDimShapeToString(fDimShapeA) << " "; if (dynamicInputs & 2 && model.Verbose()) std::cout << BinaryOperatorTrait::Name() << " : input " << fNB << " is dynamic " - << ConvertShapeToString(fDimShapeB) << " "; + << ConvertDimShapeToString(fDimShapeB) << " "; std::cout << std::endl; // check if need to broadcast at initialization time if shapes are known and different // (we could broadcast the tensor tensor to maximum values of dynamic shapes - to be done) @@ -388,7 +386,7 @@ public: return out.str(); } - std::string Generate_GPU_Kernel_ALPAKA() override { + std::string Generate_GPU_Kernel_ALPAKA(std::string opName) { std::string op; op = "\n//------ "+opName+"_"+BinaryOperatorTrait::Name()+"_KERNEL_ALPAKA\n"; op += SP + "struct Binary"+BinaryOperatorTrait::Name()+"Kernel {\n"; @@ -452,7 +450,7 @@ public: for (size_t i = 0; i < fDimShapeY.size(); ++i) { if (fDimShapeY[i].dim != 1 && fDimShapeY[i].GetVal() != "1") { nloop++; - for (int j = 0; j < nloop; j++) out << SP; + for (int j = 0; j < nloop; j++) op += SP; compute_idx_Y += "elem[" + std::to_string(i) + "]"; if (stridesY[i].GetVal() != "1") compute_idx_Y += " * " + stridesY[i].GetVal(); @@ -463,31 +461,31 @@ public: for (int j = 0; j < 3; j++) compute_idx_Y.pop_back(); } - for (int j = 0; j < nloop + 1; j++) out << SP; - out << "C[" << compute_idx_Y << "] = " - << BinaryOperatorTrait::Op("A[" + compute_idx_A + "]", + for (int j = 0; j < nloop + 1; j++) op += SP; + op += "C[" + compute_idx_Y + "] = " + + BinaryOperatorTrait::Op("A[" + compute_idx_A + "]", "B[" + compute_idx_B + "]") - << " ;\n"; + + " ;\n"; for (int i = nloop; i > 0; i--) { - for (int j = 0; j < i; j++) out << SP; - out << "}\n"; + for (int j = 0; j < i; j++) op += SP; + op += "}\n"; } } - std::string Generate_GPU_Kernel_Definitions_ALPAKA() override { + std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string OpName) { return SP + "Binary"+BinaryOperatorTrait::Name()+"Kernel " + OpName + "Kernel;\n"; } - std::string Generate_GPU_ALPAKA(std::string OpName) override { - if (fShape.empty()) { + std::string Generate_GPU_ALPAKA(std::string OpName) { + if (fDimShapeY.empty()) { throw std::runtime_error("TMVA SOFIE Operator Basic Binary called to Generate without being initialized first"); } std::stringstream out; - auto length = ConvertDynamicShapeToLength(fShapeY); + auto length = ConvertDimShapeToLength(fDimShapeY); out << "\n//------ "+OpName+"_ALPAKA\n"; out << SP << "alpaka::WorkDivMembers workDiv_"<::all("<<(stoi(length)+256-1)/256<<"), alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; - out << SP << "alpaka::exec(queue, workDiv_" << fNY << ", " << OpName << "Kernel, alpaka::getPtrNative(deviceBuf_" << fNA << "), alpaka::getPtrNative(deviceBuf_"<(queue, workDiv_" << fNY << ", " << OpName << "Kernel, alpaka::getPtrNative(deviceBuf_" << fNA << "), alpaka::getPtrNative(deviceBuf_"<::Name() + "_KERNEL_ALPAKA\n"; op += SP + "struct Unary" + UnaryOpTraits::Name() + "Kernel{\n"; @@ -120,19 +120,16 @@ public: return op; } - std::string Generate_GPU_Kernel_Definitions_ALPAKA() override { + std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string /*opName*/) override { return SP + "Unary" + UnaryOpTraits::Name() + "Kernel " + UnaryOpTraits::Name() + "Kernel;\n"; } std::string Generate_GPU_ALPAKA(std::string OpName) override { OpName = "op_" + OpName; - if (fShape.empty()) { - throw std::runtime_error("TMVA SOFIE Operator Relu called to Generate without being initialized first"); - } std::stringstream out; - auto length = ConvertDynamicShapeToLength(fShape); + auto length = ConvertShapeToLength(fShapeX); out << "\n//------ "+OpName+"_ALPAKA\n"; - out << SP << "alpaka::WorkDivMembers workDiv_"<::all("<<(stoi(length)+256-1)/256<<"), alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; + out << SP << "alpaka::WorkDivMembers workDiv_"<::all("<<(length+255)/256<<"), alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; out << SP << "alpaka::exec(queue, workDiv_" << fNX << ", " << UnaryOpTraits::Name() << "Kernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), static_cast(" << length << ")); \n"; return out.str(); } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_BatchNormalization.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_BatchNormalization.hxx index a27cea4..1a6098d 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_BatchNormalization.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_BatchNormalization.hxx @@ -1,9 +1,9 @@ #ifndef SOFIE_ROPERATOR_BatchNormalization #define SOFIE_ROPERATOR_BatchNormalization -#include "SOFIE_common.hxx" -#include "ROperator.hxx" -#include "RModel.hxx" +#include "SOFIE/SOFIE_common.hxx" +#include "SOFIE/ROperator.hxx" +#include "SOFIE/RModel.hxx" #include diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx index 7532fa1..c813f7c 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx @@ -90,7 +90,7 @@ public: return out.str(); } - std::string Generate_GPU_Kernel_ALPAKA() override { + std::string Generate_GPU_Kernel_ALPAKA() { std::string op; op = "\n//------ CAST_KERNEL_ALPAKA\n"; op += SP + "struct CastKernel{\n"; @@ -103,7 +103,7 @@ public: return op; } - std::string Generate_GPU_Kernel_Definitions_ALPAKA() override { + std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string /*opName*/) { return SP + "CastKernel castKernel;\n"; } @@ -113,9 +113,9 @@ public: throw std::runtime_error("TMVA SOFIE Operator Cast called to Generate without being initialized first"); } std::stringstream out; - auto length = ConvertDynamicShapeToLength(fShape); + auto length = ConvertShapeToLength(fShape); out << "\n//------ CAST_GPU_ALPAKA\n"; - out << SP << "alpaka::WorkDivMembers workDiv_"<::all("<<(stoi(length)+256-1)/256<<"), alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; + out << SP << "alpaka::WorkDivMembers workDiv_"<::all("<< (length+255)/256 <<"), alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; out << SP << "alpaka::exec(queue, workDiv_" << fNX << ", castKernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), static_cast(" << length << ")); \n"; return out.str(); } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx index 0d5e574..3ef0ee4 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx @@ -1,5 +1,5 @@ -#ifndef SOFIE_ROPERATOR_Concat - #define SOFIE_ROPERATOR_Concat +#ifndef TMVA_SOFIE_ROPERATOR_Concat + #define TMVA_SOFIE_ROPERATOR_Concat #include "SOFIE/SOFIE_common.hxx" @@ -25,6 +25,7 @@ std::vector> fInputShapes; public: + ROperator_Concat(){} ROperator_Concat(std::vector inputs, int axis, int newAxis, std::string output): fAxis(axis), fnewAxis(newAxis), fOutput(UTILITY::Clean_name(output)) { @@ -53,6 +54,7 @@ throw std::runtime_error("TMVA SOFIE Concat Op - invalid axis value "); int concat_dim=0; + // case of Concat (fNewAxis = 0) and not ConcatFromSequence if(fnewAxis == 0){ for (size_t i = 0; i < inputs.size(); i++) { if (i > 0 && inputs[i].size() != inputs[i - 1].size()) @@ -73,6 +75,7 @@ ret[0][fAxis] = concat_dim; } std::vector stack; + // case ConCatFromSequence if(fnewAxis == 1){ for(size_t i = 0; i < inputs.size(); i++) { if (i > 0 && inputs[i].size() != inputs[i-1].size() ) @@ -96,8 +99,8 @@ } // get shape of output given inputs. It is going to be called after initialized - std::vector> ShapeInference(const std::vector> & inputs) { - std::vector> ret(1); + std::vector ShapeInference(const std::vector> & inputs, const RModel & model) { + std::vector ret(inputs[0].size()); // treat negative axis case if (fAxis<0) { fAxis = inputs[0].size()+fAxis; @@ -105,31 +108,54 @@ if (fAxis < 0 || fAxis >= (int) inputs[0].size()) throw std::runtime_error("TMVA SOFIE Concat Op - invalid axis value "); - int concat_dim=0; + Dim concat_dim; if(fnewAxis == 0){ for (size_t i = 0; i < inputs.size(); i++) { if (i > 0 && inputs[i].size() != inputs[i - 1].size()) throw std::runtime_error("TMVA SOFIE Concat Op - input tensors have different shapes " + fInputs[i] + " : " + - ConvertDynamicShapeToString(inputs[i]) + " and " + fInputs[i-1] + " : " + ConvertDynamicShapeToString(inputs[i - 1])); + ConvertShapeToString(inputs[i]) + " and " + fInputs[i-1] + " : " + ConvertShapeToString(inputs[i - 1])); for (size_t iaxis = 0; iaxis < inputs[i].size(); iaxis++) { if ((int)iaxis == fAxis) { - // support only non-params shape for the concatenation axis - if (inputs[i][iaxis].isParam) - throw std::runtime_error("TMVA SOFIE Concat Op - not supporting input param dimensions for concatenation axis. Input shape is " + - ConvertDynamicShapeToString(inputs[i])); - concat_dim += inputs[i][iaxis].dim; + // support both integer and params shape for the concatenation axis + if (concat_dim.param.empty() && concat_dim.dim == 0) + concat_dim = inputs[i][iaxis]; + else if (inputs[i][iaxis].isParam || concat_dim.isParam) { + concat_dim = + Dim{ concat_dim.GetVal() + std::string("+ ") + inputs[i][iaxis].GetVal(), + static_cast(-1)}; + } else { + concat_dim = Dim { concat_dim.dim + inputs[i][iaxis].dim }; + } + } + else if (i == 0) { + ret[iaxis] = inputs[i][iaxis]; } - // other dimensions must be the same - else if (i > 0 && inputs[i][iaxis].GetVal() != inputs[i - 1][iaxis].GetVal()) + else if ((!inputs[i][iaxis].isParam && !ret[iaxis].isParam) && (inputs[i][iaxis].dim != ret[iaxis].dim)) { throw std::runtime_error("TMVA SOFIE Concat Op - input tensors have wrong shapes " + - ConvertDynamicShapeToString(inputs[i]) + " and " + - ConvertDynamicShapeToString(inputs[i - 1])); + ConvertShapeToString(inputs[i]) + " and " + + ConvertShapeToString(inputs[i - 1])); + } + else if (!inputs[i][iaxis].isParam && ret[iaxis].isParam){ + // if shape is not parametric use it + ret[iaxis] = inputs[i][iaxis]; + } + else if (inputs[i][iaxis].isParam && ret[iaxis].isParam) { + // check which parameter is first in RModel list + auto & dimNames = model.GetDimShapeNames(); + auto p1 = std::find(dimNames.begin(), dimNames.end(), inputs[i][iaxis].param); + auto p2 = std::find(dimNames.begin(), dimNames.end(), ret[iaxis].param); + if (p1 < p2) ret[iaxis] = inputs[i][iaxis]; + } + } + // add parenthesis in case is an expression + if (concat_dim.isParam && concat_dim.dim == static_cast(-1)) + concat_dim = Dim{ std::string("(") + concat_dim.GetVal() + std::string(")"), concat_dim.dim }; } - // output shape - ret[0] = inputs[0]; - ret[0][fAxis].dim = concat_dim; + // output shape for concatenated axis + ret[fAxis] = Dim{concat_dim}; + } // case of stacking (not supported yet) // here we need to check that input shapes are the same @@ -141,24 +167,30 @@ return ret; } - void Initialize(RModel& model) override { + void Initialize(RModel& model) override { for (auto &it : fInputs) { if (model.CheckIfTensorAlreadyExist(it) == false) { throw std::runtime_error("TMVA SOFIE Concat Op Input Tensor " + it + " is not found in model"); } - fInputShapes.push_back(model.GetDynamicTensorShape(it)); + fInputShapes.push_back(model.GetDimTensorShape(it)); } - fOutputShape = ShapeInference(fInputShapes)[0]; + fOutputShape = ShapeInference(fInputShapes, model); if (model.Verbose()) - std::cout << "Output of concat operator has shape " << ConvertDynamicShapeToString(fOutputShape) << std::endl; + std::cout << "Output of concat operator has shape " << ConvertDimShapeToString(fOutputShape) << std::endl; // check if concat has constant inputs , axis 0(concat contigous memory and type is integer) + bool isOutputShape = false; if (model.GetTensorType(fInputs[0]) == ETensorType::INT64 && fAxis == 0) { fIsOutputConstant = true; + isOutputShape = true; + for ( auto & input : fInputs) { if (!model.IsInitializedTensor(input)) { fIsOutputConstant = false; - break; + if (!model.IsShapeTensor(input)) { + isOutputShape = false; + break; + } } } if (fIsOutputConstant) { @@ -177,26 +209,53 @@ model.AddConstantTensor(fOutput, outputShape, outputData.data()); if (model.Verbose()) { std::cout << "output of Concat is a constant tensor " << ConvertShapeToString(outputShape) << " : " - << ConvertValuesToString(outputData) << std::endl; + << ConvertValuesToString(outputData) << " (constant)" << std::endl; } + } else if (isOutputShape) { + auto outputShape = ConvertShapeToInt(fOutputShape); // conversion must be possible + std::vector outputData(ConvertShapeToLength(outputShape)); + size_t offset = 0; + for ( auto & input : fInputs) { + std::vector inputData; + auto inputShape = model.GetTensorShape(input); // shape is not dynamic + size_t inputLength = ConvertShapeToLength(inputShape); // shape can be a scalar + if (model.IsShapeTensor(input)) + inputData = model.GetShapeTensorValues(input); + else if (model.IsConstantTensor(input)) { + inputData.resize(inputLength); + auto intData = static_cast(model.GetInitializedTensorData(input).get()); + for (size_t i = 0; i < inputData.size(); i++) + inputData[i] = Dim{ static_cast(intData[i])}; + } + std::cout << "concatenating input data " << inputLength << " " << inputData[0] << std::endl; + std::copy(inputData.begin(), inputData.end(), outputData.begin() + offset ); + offset += inputLength; + } + // add output tensor + model.AddShapeTensor(fOutput,outputData, false); // cannot be a scalar + if (model.Verbose()) { + std::cout << "output of Concat is a shape tensor " << ConvertShapeToString(outputShape) << " : " + << ConvertShapeToString(outputData) << " (shape)" << std::endl; + } + fIsOutputConstant = true; } } if (!fIsOutputConstant) { model.AddIntermediateTensor(fOutput, model.GetTensorType(fInputs[0]), fOutputShape); if (model.Verbose()) { - std::cout << "Concat ---> " << fOutput << " " << ConvertDynamicShapeToString(fOutputShape) << std::endl; + std::cout << "Concat ---> " << fOutput << " " << ConvertDimShapeToString(fOutputShape) << std::endl; } } } - std::string Generate(std::string OpName) override { + std::string Generate(std::string opName) override { if (fIsOutputConstant) return ""; - OpName = "op_"+OpName; + opName = "op_" + opName; if(fOutputShape.empty()){ throw std::runtime_error("TMVA SOFIE Concat called to Generate without being initialized first"); } std::stringstream out; - out<<"\n//--------- Concat\n"; + out<<"\n//--------- Concat " << opName << " --> " << ConvertShapeToString(fOutputShape) << "\n"; // special case when memory is contiguous bool hasShapeOnes = true; for(int i = 0; i 0) out << offset; offset += " + " + length; @@ -260,4 +319,5 @@ }; }//SOFIE - #endif //SOFIE_ROPERATOR_CONCAT + + #endif //TMVA_SOFIE_ROPERATOR_CONCAT \ No newline at end of file diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_ConvTranspose.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_ConvTranspose.hxx index 0467385..b9d917b 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_ConvTranspose.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_ConvTranspose.hxx @@ -1,9 +1,9 @@ #ifndef SOFIE_SOFIE_ROPERATOR_CONVTRANSPOSE_HXX #define SOFIE_SOFIE_ROPERATOR_CONVTRANSPOSE_HXX -#include -#include -#include +#include "SOFIE/SOFIE_common.hxx" +#include "SOFIE/ROperator.hxx" +#include "SOFIE/RModel.hxx" #include #include diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Expand.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Expand.hxx index 9da8f76..bf163b7 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Expand.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Expand.hxx @@ -122,7 +122,7 @@ public: return out.str(); } - std::string Generate_GPU_Kernel_ALPAKA() override { + std::string Generate_GPU_Kernel_ALPAKA() { std::string op; op = "\n//------ Expand_KERNEL_ALPAKA\n"; op += SP + "struct ExpandKernel {\n"; @@ -148,7 +148,7 @@ public: return op; } - std::string Generate_GPU_Kernel_Definitions_ALPAKA() override { + std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string /*opName*/) override { return SP + "ExpandKernel expandKernel;\n"; } @@ -159,7 +159,7 @@ public: } std::stringstream out; - auto length = ConvertDynamicShapeToLength(fShape); + auto length = ConvertShapeToLength(fShape); out << "\n//------ EXPAND_GPU_ALPAKA\n"; out << SP << "alpaka::WorkDivMembers workDiv_" << fNX << "(alpaka::Vec::all((" << length << " + 256 - 1) / 256), " @@ -168,8 +168,8 @@ public: out << SP << "alpaka::exec(queue, workDiv_" << fNX << ", expandKernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), alpaka::getPtrNative(deviceBuf_" << fNY - << "), "<< UTILITY::ConvertShapeToString(fShapeX) <<", "< workDiv_" << fNY << "(alpaka::Vec::all((" << length << " + 256 - 1) / 256), " << "alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; @@ -274,10 +270,10 @@ public: << ", gatherKernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), alpaka::getPtrNative(deviceBuf_" << fNIndices << "), alpaka::getPtrNative(deviceBuf_" << fNY - << "), "<< UTILITY::ConvertShapeToString(fShapeY) <<", "<< fAttrAxis <<", "<< fShapeX[fAttrAxis] <<", " + << "), "<< ConvertShapeToString(fShapeY) <<", "<< fAttrAxis <<", "<< fShapeX[fAttrAxis] <<", " << fShapeIndices.size() <<", " - << UTILITY::ConvertShapeToString(ComputeStrideFromShape(fShapeY)) <<", " - << UTILITY::ConvertShapeToString(ComputeStrideFromShape(fShapeX)) <<", "<< fShapeY.size() + << ConvertShapeToString(UTILITY::ComputeStrideFromShape(fShapeY)) <<", " + << ConvertShapeToString(UTILITY::ComputeStrideFromShape(fShapeX)) <<", "<< fShapeY.size() << ",static_cast(" << length << "));\n"; return out.str(); diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx index e3d0595..7756f9a 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx @@ -152,7 +152,7 @@ namespace SOFIE{ } } if (model.IsDynamicTensor(fNA) || model.IsDimInputTensor(fNA) ) { - fShapeA = model.GetDynamicTensorShape(fNA); + fShapeA = model.GetDimTensorShape(fNA); fIsDynamic = true; } else { auto shapeA_int = model.GetTensorShape(fNA); @@ -166,7 +166,7 @@ namespace SOFIE{ } if (model.IsDynamicTensor(fNB) || model.IsDimInputTensor(fNB)) { - fShapeB = model.GetDynamicTensorShape(fNB); + fShapeB = model.GetDimTensorShape(fNB); fIsDynamic = true; } else { @@ -195,7 +195,7 @@ namespace SOFIE{ if (!fIsDynamic) { shapeY = ConvertShapeToInt(fShapeY); if (shapeY.empty()) { - throw std::runtime_error("TMVA SOFIE Gemm Op " + fNY + " has invalid shape" + ConvertDynamicShapeToString(fShapeY)); + throw std::runtime_error("TMVA SOFIE Gemm Op " + fNY + " has invalid shape" + ConvertDimShapeToString(fShapeY)); } } @@ -266,7 +266,7 @@ namespace SOFIE{ if (model.Verbose()){ std::cout << "Gemm (or MatMul) " << " ---> " << fNY << " shape "; if (fIsDynamic) - std::cout << ConvertDynamicShapeToString(fShapeY) << std::endl; + std::cout << ConvertDimShapeToString(fShapeY) << std::endl; else std::cout << ConvertShapeToString(shapeY) << std::endl; } @@ -285,9 +285,9 @@ namespace SOFIE{ // include a separate scope to avoid defining unique operator temp variables out << "//--- broadcast bias tensor " << fNC << "for Gemm op\n"; out << SP << "{\n"; - out << " float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast(tensor_" - << fNC << "," << ConvertShapeToString(fShapeC) << ", " << ConvertDynamicShapeToString(fShapeY) << ");\n"; - auto length = SOFIE::ConvertDynamicShapeToLength(fShapeY); // output size + out << " float * data = SOFIE::UTILITY::UnidirectionalBroadcast(tensor_" + << fNC << "," << ConvertShapeToString(fShapeC) << ", " << ConvertDimShapeToString(fShapeY) << ");\n"; + auto length = SOFIE::ConvertDimShapeToLength(fShapeY); // output size out << SP << SP << "std::copy(data, data + " << length << ", tensor_" << fNC2 << ");\n"; out << SP << SP << "delete [] data;\n"; out << SP << "}\n"; @@ -306,9 +306,9 @@ namespace SOFIE{ // include a separate scope to avoid defining unique operator temp variables out << "//--- broadcast bias tensor " << fNC << "for Gemm op\n"; out << SP << "{\n"; - out << " float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast(tensor_" - << fNC << ".data()," << ConvertShapeToString(fShapeC) << ", " << ConvertDynamicShapeToString(fShapeY) << ");\n"; - auto length = SOFIE::ConvertDynamicShapeToLength(fShapeY); // output size + out << " float * data = SOFIE::UTILITY::UnidirectionalBroadcast(tensor_" + << fNC << ".data()," << ConvertShapeToString(fShapeC) << ", " << ConvertDimShapeToString(fShapeY) << ");\n"; + auto length = SOFIE::ConvertDimShapeToLength(fShapeY); // output size out << SP << SP << "auto hostBuf_"<< fNC2 << " = alpaka::allocBuf(hostAcc, Ext1D::all(Idx{" << length << "}));\n"; out << SP << SP << "std::memcpy(alpaka::getPtrNative(hostBuf_"<< fNC2 <<"), data, "<< length << " * sizeof(float));\n"; out << SP << SP << "alpaka::memcpy(queue, deviceBuf_"<< fNC2 << ", hostBuf_"<< fNC2 << ");\n"; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_LayerNormalization.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_LayerNormalization.hxx index 17b77b3..e6c4c99 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_LayerNormalization.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_LayerNormalization.hxx @@ -1,5 +1,5 @@ -#ifndef SOFIE_ROPERATOR_LAYERNORMALIZATION -#define SOFIE_ROPERATOR_LAYERNORMALIZATION +#ifndef TMVA_SOFIE_ROPERATOR_LAYERNORMALIZATION +#define TMVA_SOFIE_ROPERATOR_LAYERNORMALIZATION #include "SOFIE/RModel.hxx" #include "SOFIE/SOFIE_common.hxx" @@ -81,7 +81,7 @@ public: throw std::runtime_error("TMVA::SOFIE - Tensor " + fNX + " not found."); } bool isDynamic = model.IsDynamicTensor(fNX); - fShapeX = model.GetDynamicTensorShape(fNX); + fShapeX = model.GetDimTensorShape(fNX); fShapeY = fShapeX; model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShapeY); // Type of the output @@ -93,13 +93,13 @@ public: // Shape of fShapeX[0, ..., fAxis) fAxesShape = std::vector(fShapeX.begin(), fShapeX.begin() + fAxis); // Length of the axes - fAxesLength = ConvertDynamicShapeToLength(fAxesShape); + fAxesLength = ConvertDimShapeToLength(fAxesShape); // Shape of fShapeX[fAxis, ..., fSize) fNormalizedShape = std::vector(fShapeX.begin() + fAxis, fShapeX.end()); // Length of the normalized axis - fNormalizedLength = ConvertDynamicShapeToLength(fNormalizedShape); + fNormalizedLength = ConvertDimShapeToLength(fNormalizedShape); // length of the input - fLength = ConvertDynamicShapeToLength(fShapeX); + fLength = ConvertDimShapeToLength(fShapeX); // Type of mean and std ETensorType type = (fAttrStashType == 1) ? ETensorType::FLOAT : model.GetTensorType(fNX); // Mean @@ -146,7 +146,7 @@ public: out << SP << "// Broadcasting the bias of LayerNormalization op\n"; out << SP << "{\n"; out << SP << SP << "float* data = SOFIE::UTILITY::UnidirectionalBroadcast(tensor_"; - out << fNB << ", " << ConvertShapeToString(fShapeB) << ", " << ConvertDynamicShapeToString(fShapeX) << ");\n"; + out << fNB << ", " << ConvertShapeToString(fShapeB) << ", " << ConvertShapeToString(fShapeX) << ");\n"; out << SP << "std::copy(data, data + " << fLength << ", tensor_" << fNBroadcastedB << ");\n"; out << SP << "delete[] data;\n"; out << SP << "}\n"; @@ -339,5 +339,4 @@ public: } // namespace SOFIE - #endif diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_LeakyRelu.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_LeakyRelu.hxx index 66b31e5..02eca17 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_LeakyRelu.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_LeakyRelu.hxx @@ -75,7 +75,7 @@ public: return out.str(); } - std::string Generate_GPU_Kernel_ALPAKA() override { + std::string Generate_GPU_Kernel_ALPAKA() { std::string op; op = "\n//------ LEAKY_RELU_KERNEL_ALPAKA\n"; op += SP + "struct LeakyReluKernel {\n"; @@ -89,7 +89,7 @@ public: return op; } - std::string Generate_GPU_Kernel_Definitions_ALPAKA() override { + std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string /*opName*/) override { return SP + "LeakyReluKernel leakyReluKernel;\n"; } @@ -100,7 +100,7 @@ public: } std::stringstream out; - auto length = ConvertDynamicShapeToLength(fShape); + auto length = ConvertShapeToLength(fShape); out << "\n//------ LEAKY_RELU_GPU_ALPAKA\n"; out << SP << "alpaka::WorkDivMembers workDiv_" << fNX << "(alpaka::Vec::all((" << length << " + 256 - 1) / 256), " diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Range.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Range.hxx index 8af272d..0930a0b 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Range.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Range.hxx @@ -1,5 +1,5 @@ -#ifndef SOFIE_ROPERATOR_RANGE -#define SOFIE_ROPERATOR_RANGE +#ifndef TMVA_SOFIE_ROPERATOR_RANGE +#define TMVA_SOFIE_ROPERATOR_RANGE #include "SOFIE/SOFIE_common.hxx" #include "SOFIE/ROperator.hxx" @@ -8,7 +8,6 @@ #include #include - namespace SOFIE{ template @@ -89,9 +88,9 @@ public: model.AddDynamicTensor(fNOutput, type, fShape); } if (model.Verbose()) { - std::cout << "Range -> output is " << fNOutput << " "; - if (fIsOutputConstant) std::cout << ConvertDynamicShapeToString(fShape) << std::endl; - else std::cout << ConvertDynamicShapeToString(model.GetDynamicTensorShape(fNOutput)) << std::endl; + std::cout << "Range -> output is " << fNOutput << " : " << ConvertShapeToString(fShape); + if (fIsOutputConstant) std::cout << " : " << ConvertValuesToString(model.GetTensorData(fNOutput)); + std::cout << std::endl; } } @@ -121,5 +120,5 @@ public: }; }//SOFIE - -#endif //SOFIE_ROPERATOR_RANGE + +#endif //TMVA_SOFIE_ROPERATOR_RANGE diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx index 351f7c4..5fb2f04 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx @@ -43,11 +43,11 @@ public: throw std::runtime_error("TMVA SOFIE Relu Op Input Tensor " + fNX + " is not found in model"); } - fShape = model.GetDynamicTensorShape(fNX); + fShape = model.GetDimTensorShape(fNX); model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShape); if (model.Verbose()) { - std::cout << "Relu : " << fNX << " -> " << fNY << " " << ConvertDynamicShapeToString(fShape) << std::endl; + std::cout << "Relu : " << fNX << " -> " << fNY << " " << ConvertDimShapeToString(fShape) << std::endl; } } @@ -66,7 +66,7 @@ public: return out.str(); } - std::string Generate_GPU_Kernel_ALPAKA() override { + std::string Generate_GPU_Kernel_ALPAKA() { std::string op; op = "\n//------ RELU_KERNEL_ALPAKA\n"; op += SP + "struct ReluKernel{\n"; @@ -79,7 +79,7 @@ public: return op; } - std::string Generate_GPU_Kernel_Definitions_ALPAKA() override { + std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string /*opName*/) override { return SP + "ReluKernel reluKernel;\n"; } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Reshape.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Reshape.hxx index ddb373e..a014547 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Reshape.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Reshape.hxx @@ -1,12 +1,15 @@ -#ifndef SOFIE_ROPERATOR_RESHAPE -#define SOFIE_ROPERATOR_RESHAPE +#ifndef TMVA_SOFIE_ROPERATOR_RESHAPE +#define TMVA_SOFIE_ROPERATOR_RESHAPE #include "SOFIE/SOFIE_common.hxx" #include "SOFIE/ROperator.hxx" #include "SOFIE/RModel.hxx" #include +#include #include +#include + namespace SOFIE{ @@ -19,17 +22,20 @@ class ROperator_Reshape final : public ROperator private: bool fVerbose = false; + bool fDimInput = false; + bool fDynamicShape = false; ReshapeOpMode fOpMode = Reshape; // type of Reshape operator int fAllowZero = 0; // (for Reshape) zero in tensor shape makes output shape equal to input tensor shape int fAxis = 1; // (for Flatten) std::string fNData; // input data tensor name - std::string fNShape; // reshape tensor name + std::string fNInput2; // reshape or axes tensor name depending on operator std::string fNOutput; // output tensor name - std::vector fShapeInput; // input shape data - std::vector fShapeOutput; // output shape data + std::vector fShapeInput; // input shape data + std::vector fShapeOutput; // output shape data std::vector fAttrAxes; // axes attributes (provided for all version of Squeeze/Unsqueeze) + std::vector fShape; // shape tensor values provided for Reshape public: @@ -42,16 +48,16 @@ public: } ROperator_Reshape(){} - ROperator_Reshape(ReshapeOpMode opMode, int attr_value, std::string nameData, std::string nameShape, std::string nameOutput) - : fOpMode(opMode), fNData(UTILITY::Clean_name(nameData)), fNShape(UTILITY::Clean_name(nameShape)), - fNOutput(UTILITY::Clean_name(nameOutput)) + ROperator_Reshape(ReshapeOpMode opMode, int attr_value, std::string nameData, std::string nameInput2, std::string nameOutput) + : fOpMode(opMode), fNData(UTILITY::Clean_name(nameData)), fNInput2(UTILITY::Clean_name(nameInput2)), + fNOutput(UTILITY::Clean_name(nameOutput)) { if (opMode == Reshape) fAllowZero = attr_value; if (opMode == Flatten) fAxis = attr_value; fInputTensorNames = { fNData }; - if(!fNShape.empty()){ - fInputTensorNames.emplace_back(fNShape); + if(!fNInput2.empty()){ + fInputTensorNames.emplace_back(fNInput2); } fOutputTensorNames = { fNOutput }; } @@ -70,94 +76,153 @@ public: auto ret = std::vector(1, input[0]); return ret; } + std::vector> ShapeInference(std::vector> input) override { + return input; + } // output shape - std::vector> ShapeInference(std::vector> input) override { - std::vector> ret; + std::vector> ShapeInference(const std::vector> & input) { + std::vector> ret; auto & input_shape = input[0]; - if (fOpMode == Reshape) { - if (input.size() != 2) throw std::runtime_error("TMVA SOFIE Reshape Op needs 2 input tensors"); - auto output_shape = input[1]; // the provided shape - size_t input_length = ConvertShapeToLength(input_shape); - size_t output_length = ConvertShapeToLength(output_shape); - // (input_length == output_length) is the easy case : (2,3,4) -> (2,12) - if (input_length != output_length) { - if ((output_length == 0 && fAllowZero == 0) || static_cast(output_length) < 0) { - // in this case value 0 or -1 in shape are automatically corrected - bool replacementDone = false; - for (size_t i = 0; i < output_shape.size(); i++) { - if (output_shape[i] == 0 || output_shape[i] == static_cast(-1)) { - if (replacementDone) { - throw std::runtime_error("TMVA Reshape Op : output shape has multiple negative or zero values"); + // correct the provided shape (here we have the value) for 0 or -1 + std::vector output_shape(fShape.size()); + assert(!fShape.empty() && !fDynamicShape); + for (size_t i = 0; i < output_shape.size(); i++) { + if (fShape[i] > 0 || (fAllowZero && fShape[i] >= 0)) + output_shape[i] = Dim{ static_cast(fShape[i]) }; + else if (!fAllowZero && fShape[i] == 0) + output_shape[i] = input_shape[i]; + } + // now case of -1 in shape + for (size_t i = 0; i < output_shape.size(); i++) { + if (fShape[i] == -1) { + auto tmp = output_shape; + tmp.erase(tmp.begin() + i); + auto tmp_length = ConvertDimShapeToLength(tmp); + auto input_length = ConvertDimShapeToLength(input_shape); + if (fVerbose) + std::cout << "reshape- try simplifying " << ConvertDimShapeToString(input_shape) << " with length " + << input_length << " to " << tmp_length << std::endl; + + if (IsInteger(tmp_length) && IsInteger(input_length)) + output_shape[i] = Dim{static_cast(std::stoi(input_length) / std::stoi(tmp_length))}; + else { + //we can try simplifying expression if tmp_length is integer and part of input_length + // contains tmp_length + bool canSimplify = false; + std::vector reduced_input; + if (IsInteger(tmp_length)) { + + // try to tokenize with * the input length + + std::stringstream ss(input_length); + + std::string token; + + // Tokenizing w.r.t. space '*' + while(getline(ss, token, '*')) + { + // remove any whitespace + token.erase(std::remove_if(token.begin(), token.end(), + [](unsigned char x) { return std::isspace(x); }), token.end()); + if (token != tmp_length) { + if (IsInteger(token)) { + size_t il = static_cast(std::stoi(input_length)); + size_t tl = static_cast(std::stoi(tmp_length)); + if ((il % tl) == 0) { + canSimplify = true; + reduced_input.push_back(Dim{il / tl}); + } + } else { + reduced_input.push_back(Dim{token}); + } + } else { + // token is equal to tmp_length, can be not considered and is simplified + canSimplify = true; + } } - auto tmp = output_shape; - tmp.erase(tmp.begin() + i); - auto tmp_length = ConvertShapeToLength(tmp); - output_shape[i] = input_length / tmp_length; - replacementDone = true; } + if (canSimplify) { + // if length contains * we need to add some brackets + std::string res_shape = ConvertDimShapeToLength(reduced_input); + if (res_shape.find('*') != std::string::npos) + output_shape[i] = Dim{std::string("(") + res_shape + ")", static_cast(-1)}; + else + output_shape[i] = Dim{res_shape}; + } + if (!canSimplify) + output_shape[i] = Dim{std::string("(") + input_length + " / (" + tmp_length + "))", static_cast(-1)}; } - if (fVerbose) - std::cout << "Reshape: correct output shape from " << ConvertShapeToString(input[1]) - << " to " << ConvertShapeToString(output_shape) << std::endl; - } - if (ConvertShapeToLength(output_shape) != input_length) { - throw std::runtime_error("TMVA Reshape Op : Invalid shapes : " + ConvertShapeToString(input_shape) + - ConvertShapeToString(output_shape)); + + break; // cannot have more than -1 } + // throw std::runtime_error( + // "TMVA Reshape Op : output shape has multiple negative or zero values"); + } + + if (fVerbose) + std::cout << "Reshape: correct output shape to " << ConvertShapeToString(output_shape) << std::endl; + + if (!fDimInput && ConvertDimShapeToLength(output_shape) != ConvertDimShapeToLength(input_shape)) { + throw std::runtime_error("TMVA Reshape Op : Invalid shapes : " + ConvertShapeToString(input_shape) + + ConvertShapeToString(output_shape)); } ret.push_back(output_shape); } else if (fOpMode == Flatten) { - // flattenig case - size_t inputSize = ConvertShapeToLength(input_shape); - size_t b = input[0][0]; - std::vector newShape = {b, inputSize / b}; + // flatten case + if (fAxis < 0) + fAxis += input_shape.size(); + auto s1 = std::vector(input_shape.begin(), input_shape.begin() + fAxis); + auto s2 = std::vector(input_shape.begin() + fAxis, input_shape.end()); + auto l1 = ConvertDimShapeToLength(s1); + auto l2 = ConvertDimShapeToLength(s2); + std::vector newShape = {Dim{l1}, Dim{l2}}; ret.push_back(newShape); - } else if (fOpMode == Squeeze) { // squeeze // assume no axis is provided - remove all axes with value equal to 1 - auto output_shape = input[0]; - if (input.size() == 1) { + auto output_shape = input_shape; + if (fAttrAxes.empty()) { size_t i = 0; while (i < output_shape.size()) { - if (output_shape[i] == 1 ) { + if (output_shape[i] == Dim{1}) { output_shape.erase(output_shape.begin() + i); - } - else { + } else { i++; } } - } else if (input.size() == 2) { - auto & axes = input[1]; - for (size_t i = 0; i < axes.size(); i++){ - if (output_shape[axes[i]] != 1) - throw std::runtime_error("TMVA Squeeze Op : Invalid axes : " + ConvertShapeToString(axes) + - ConvertShapeToString(output_shape)); + } else { + auto &axes = fAttrAxes; + for (size_t i = 0; i < axes.size(); i++) { + if (axes[i] < 0) + axes[i] += input_shape.size(); + if (!(output_shape[axes[i]] == Dim{1})) + throw std::runtime_error("TMVA Squeeze Op : Invalid axis value " + std::to_string(axes[i]) + + " for " + ConvertShapeToString(output_shape)); output_shape.erase(output_shape.begin() + axes[i]); } } ret.push_back(output_shape); } - else if (fOpMode == Unsqueeze) { // unsqueeze - assert(input.size() == 2); - auto output_shape = input[0]; - auto &axes = input[1]; + std::cout << "doing unsqueeze....\n"; + assert(!fAttrAxes.empty()); + auto output_shape = input_shape; + auto &axes = fAttrAxes; // output rank int64_t r = input[0].size() + axes.size(); - for (auto & a : axes) { + for (auto &a : axes) { int64_t i = static_cast(a); - if ( i < -r || i > r - 1 ) + if (i < -r || i > r - 1) throw std::runtime_error("TMVA Unsqueeze Op - axes input is not in correct range"); if (i >= 0) - output_shape.insert(output_shape.begin() + i, 1); + output_shape.insert(output_shape.begin() + i, Dim{1}); else - //negative axes - output_shape.insert(output_shape.end() + i + 1, 1); + // negative axes + output_shape.insert(output_shape.end() + i + 1, Dim{1}); } ret.push_back(output_shape); } @@ -166,34 +231,51 @@ public: void Initialize(RModel& model) override { + std::cout << "initialize reshape op type " << fOpMode << " - " << fNInput2 << " " << fNData << std::endl; fVerbose = model.Verbose(); if (model.CheckIfTensorAlreadyExist(fNData) == false) { // input must be a graph input, or already initialized intermediate tensor throw std::runtime_error("TMVA Reshape Op Input Tensor " + fNData + " is not found in model"); } - fShapeInput = model.GetTensorShape(fNData); - // check if optional shape tensor exist - if (!fNShape.empty()) { - if (model.CheckIfTensorAlreadyExist(fNShape)) { - auto dptr = model.GetInitializedTensorData(fNShape); - auto input_shape = static_cast(dptr.get()); - auto vec = model.GetTensorShape(fNShape); - assert(vec.size() == 1); - size_t n = vec[0]; // size of shape input tensor - - std::vector descShape(n); - std::copy(input_shape, input_shape + n, descShape.begin()); - fShapeOutput = ShapeInference({fShapeInput, descShape})[0]; - // set flag to not write tensor in weight file. Its data will be hard-coded in way model is constructed - model.SetNotWritableInitializedTensor(fNShape); + fShapeInput = model.GetDimTensorShape(fNData); + fDimInput = model.IsDynamicTensor(fNData); + // check if optional tensor exists defining shape or axes + if (!fNInput2.empty()) { + if (model.CheckIfTensorAlreadyExist(fNInput2)) { + if (model.IsConstantTensor(fNInput2) || model.IsInitializedTensor(fNInput2)) { + // assume input shape is an initialized tensor + auto dptr = model.GetInitializedTensorData(fNInput2); + auto values = static_cast(dptr.get()); + auto vec = model.GetTensorShape(fNInput2); + size_t n = 1; + if (vec.size() > 0) + n = vec[0]; // size of shape input tensor + // copy values in fShape vector or fAttrAxes + if (fOpMode == Reshape) + fShape = std::vector(values, values + n); + else + fAttrAxes = std::vector(values, values + n); + + fShapeOutput = ShapeInference({fShapeInput})[0]; + // set flag to not write tensor in weight file. Its data will be hard-coded in way model is constructed + model.SetNotWritableInitializedTensor(fNInput2); + } else { + // we cannot get shape at initialization time but at run-time + fDynamicShape = true; + // size of shape output us given by size of shape input tensor + auto shapeInput2 = model.GetTensorShape(fNInput2); + fShapeOutput.resize(shapeInput2[0]); + for (size_t i = 0; i < fShapeOutput.size(); i++) { + fShapeOutput[i] = Dim{ std::string("s_") + fNOutput + "_" + std::to_string(i)}; + } + } } else { - throw std::runtime_error("TMVA Reshape Op Shape Tensor " + fNShape + " is not found in model"); + throw std::runtime_error("TMVA Reshape Op 2nd input Tensor " + fNInput2 + " is not found in model"); } } else if (!fAttrAxes.empty()) { - // case fNShape is empty and axes are provided as attributes - std::vector descShape(fAttrAxes.size()); - std::copy(fAttrAxes.begin(), fAttrAxes.end(), descShape.begin()); - fShapeOutput = ShapeInference({fShapeInput, descShape})[0]; + // case fNShape is empty and axes are provided as attributes (e.g. for Unsqueeze) + std::cout << "attribute axes exists\n"; + fShapeOutput = ShapeInference({fShapeInput})[0]; } else if (fOpMode == Flatten || fOpMode == Squeeze) { fShapeOutput = ShapeInference({fShapeInput})[0]; } else { @@ -203,14 +285,26 @@ public: if (model.IsInitializedTensor(fNData) && model.GetTensorType(fNData) == ETensorType::INT64) { fIsOutputConstant = true; auto inputData = static_cast(model.GetInitializedTensorData(fNData).get()); - if (ConvertShapeToLength(fShapeInput) != ConvertShapeToLength(fShapeOutput)) + auto o_shape = ConvertShapeToInt(fShapeOutput); + if (ConvertShapeToLength(ConvertShapeToInt(fShapeInput)) != ConvertShapeToLength(o_shape) ) throw std::runtime_error("TMVA Reshape Op : Invalid Input/Output lengths"); - model.AddConstantTensor(fNOutput, fShapeOutput, inputData); + model.AddConstantTensor(fNOutput, o_shape, inputData); if (model.Verbose()) { std::cout << Name() << " : " << fNData << " " << ConvertShapeToString(fShapeInput) << " --> " << fNOutput << " (constant) " << ConvertShapeToString(fShapeOutput) << " : " << - ConvertValuesToString(ConvertShapeToLength(fShapeOutput), inputData) << std::endl; + ConvertValuesToString(ConvertShapeToLength(o_shape), inputData) << std::endl; } - } else { + } + // for shape tensors we can have it if output shape is size==1 or a scalar + else if (model.IsShapeTensor(fNData) && fShapeOutput.size() <=1) { + fIsOutputConstant = true; + auto inputData = model.GetShapeTensorValues(fNData); + model.AddShapeTensor(fNOutput, inputData); + if (model.Verbose()) { + std::cout << Name() << " : " << fNData << " " << ConvertShapeToString(fShapeInput) << " --> " << fNOutput << " (shape) " << ConvertShapeToString(fShapeOutput) << " : " << + ConvertShapeToString(inputData) << std::endl; + } + } + else { // non-constant case model.AddIntermediateTensor(fNOutput, model.GetTensorType(fNData), fShapeOutput); if (model.Verbose()) @@ -218,54 +312,67 @@ public: } } - std::string Generate(std::string OpName) override { + std::string Generate(std::string opName) override { if (fIsOutputConstant) return ""; //no op for constant tensors - OpName = "op_" + OpName; - - // output of reshape is same as input - size_t length = ConvertShapeToLength(fShapeOutput); - if (length != ConvertShapeToLength(fShapeInput)) { - throw std::runtime_error("TMVA SOFIE Reshape Op : wrong output shape - is " + - ConvertShapeToString(fShapeOutput) + " and input is " + - ConvertShapeToString(fShapeInput)); - } std::stringstream out; - std::string opName = "Reshape"; + std::string opType = "Reshape"; if (fOpMode == Flatten) - opName = "Flatten"; + opType = "Flatten"; else if (fOpMode == Squeeze) - opName = "Squeeze"; + opType = "Squeeze"; else if (fOpMode == Unsqueeze) - opName = "Unsquueze"; + opType = "Unsquueze"; + + out << SP << "///--------" << opType << " operator " << opName << " --> " << ConvertShapeToString(fShapeOutput) << "\n"; + + // in case of dynamic output shape we need to set the shape value from input shape tensor + // and take case of the zero values + if (fDynamicShape) { + for (size_t i = 0; i < fShapeOutput.size(); i++) { + // since fNInput2 values are int64_t, should we check if they are negative? + out << SP << "size_t " << fShapeOutput[i].param << " = " << "tensor_" << fNInput2 << "[" << i << "];\n"; + if (!fAllowZero) + out << SP << "if (tensor_" << fNInput2 << "[" << i << "] <= 0 ) " + << fShapeOutput[i].param << " = " << fShapeInput[i] << ";\n"; + } + } - out << SP << "///--------" << opName << " operator\n" << std::endl; - out << SP << "std::copy( tensor_" << fNData << ", tensor_" << fNData << " + " << length << ", " << "tensor_" << fNOutput + // output of reshape is same as input + auto lengthOut = ConvertDimShapeToLength(fShapeOutput); + auto lengthIn = ConvertDimShapeToLength(fShapeInput); + if (lengthOut != lengthIn) { + // check needs to be done at run-time + out << SP << "if (" << lengthOut << "!=" << lengthIn << ")\n"; + out << "throw std::runtime_error(\"TMVA SOFIE Reshape Op : output lengths is different than input one\");\n"; + } + + + out << SP << "std::copy( tensor_" << fNData << ", tensor_" << fNData << " + " << lengthIn << ", " << "tensor_" << fNOutput << ");\n"; return out.str(); } - std::string Generate_GPU_ALPAKA(std::string opName) override { if (fIsOutputConstant) return ""; //no op for constant tensors - OpName = "op_" + OpName; + opName = "op_" + opName; // output of reshape is same as input - size_t length = ConvertShapeToLength(fShapeOutput); - if (length != ConvertShapeToLength(fShapeInput)) { + auto length = ConvertDimShapeToLength(fShapeOutput); + if (length != ConvertDimShapeToLength(fShapeInput)) { throw std::runtime_error("TMVA SOFIE Reshape Op : wrong output shape - is " + - ConvertShapeToString(fShapeOutput) + " and input is " + - ConvertShapeToString(fShapeInput)); + ConvertDimShapeToString(fShapeOutput) + " and input is " + + ConvertDimShapeToString(fShapeInput)); } std::stringstream out; - std::string opName = "Reshape"; + opName += "_Reshape"; if (fOpMode == Flatten) - opName = "Flatten"; + opName += "_Flatten"; else if (fOpMode == Squeeze) - opName = "Squeeze"; + opName += "_Squeeze"; else if (fOpMode == Unsqueeze) - opName = "Unsquueze"; + opName += "_Unsquueze"; out << SP << "///-------" << opName << " operator\n" << std::endl; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx index 783e391..f2e2e25 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx @@ -61,7 +61,7 @@ public: return out.str(); } - std::string Generate_GPU_Kernel_ALPAKA() override { + std::string Generate_GPU_Kernel_ALPAKA() { std::string op; op = "\n//------ SIGMOID_KERNEL_ALPAKA\n"; op += SP + "struct SigmoidKernel {\n"; @@ -76,7 +76,7 @@ public: } - std::string Generate_GPU_Kernel_Definitions_ALPAKA() override { + std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string /*opName*/) override { return SP + "SigmoidKernel sigmoidKernel;\n"; } @@ -87,7 +87,7 @@ public: } std::stringstream out; - auto length = ConvertDynamicShapeToLength(fShape); + auto length = ConvertShapeToLength(fShape); out << "\n//------ SIGMOID_GPU_ALPAKA\n"; out << SP << "alpaka::WorkDivMembers workDiv_" << fNX << "(alpaka::Vec::all((" << length << " + 256 - 1) / 256), " diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx index 0fd8f5a..6335db3 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx @@ -153,7 +153,7 @@ public: return out.str(); } - std::string Generate_GPU_Kernel_ALPAKA() override { + std::string Generate_GPU_Kernel_ALPAKA(std::string /*opName*/) { std::string op; op = "\n//------ SPLIT_KERNEL_ALPAKA\n"; op += SP + "struct SplitKernel {\n"; @@ -178,24 +178,24 @@ public: return op; } - std::string Generate_GPU_Kernel_Definitions_ALPAKA() override { + std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string /*opName*/) override { return SP + "SplitKernel splitKernel;\n"; } std::string Generate_GPU_ALPAKA(std::string OpName) override { OpName = "op_" + OpName; - if (fShape.empty()) { + if (fOutputShapes.empty()){ throw std::runtime_error("TMVA SOFIE Operator Split called to Generate without being initialized first"); } std::stringstream out; out << "\n//------ SPLIT_GPU_ALPAKA\n"; - bool axis_is_innermost = (axis == static_cast(fInputShape.size()) - 1) - && (UTILITY::ComputeStridesFromShape(fInputShape)[fInputShape.size()-1] == 1); + bool axis_is_innermost = (fAxis == static_cast(fInputShape.size()) - 1) + && (UTILITY::ComputeStrideFromShape(fInputShape)[fInputShape.size()-1] == 1); out << SP <<"size_t "<(queue, workDiv_" << fNYs[i] << ", splitKernel, alpaka::getPtrNative(deviceBuf_" << fNX - << "), alpaka::getPtrNative(deviceBuf_" << fNY - << "), "<< UTILITY::ConvertShapeToString(UTILITY::ComputeStrideFromShape(fInputShape)) <<", "< workDiv_" << fNY << "(alpaka::Vec::all((" << length << " + 256 - 1) / 256), " @@ -185,8 +185,8 @@ public: out << SP << "alpaka::exec(queue, workDiv_" << fNY << ", tileKernel, alpaka::getPtrNative(deviceBuf_" << fNInput << "), alpaka::getPtrNative(deviceBuf_" << fNY - << "), "<< UTILITY::ConvertShapeToString(fShapeInput)<<", "<< UTILITY::ConvertShapeToString(UTILITY::ComputeStrideFromShape(fShapeInput)) <<", " - < - namespace SOFIE{ @@ -150,32 +149,86 @@ public: fShapeY = fShapeA; } // check case of constant output (if all inputs are defined) - if (model.IsInitializedTensor(fNA) && model.IsInitializedTensor(fNB) && model.IsInitializedTensor(fNC)) { - std::string nameA = fNBroadcastedA.empty()? fNA : fNBroadcastedA; - std::string nameB = fNBroadcastedB.empty()? fNB : fNBroadcastedB; + if (model.IsInitializedTensor(fNC)) { + std::string nameC = fNBroadcastedC.empty()? fNC : fNBroadcastedC; - auto dataA = static_cast(model.GetInitializedTensorData(nameA).get()); - auto dataB = static_cast(model.GetInitializedTensorData(nameB).get()); auto dataC = static_cast(model.GetInitializedTensorData(nameC).get()); - std::vector dataY(ConvertShapeToLength(fShapeY)); - for (size_t i = 0; i < dataY.size(); i++) - dataY[i] = (dataC[i]) ? dataA[i] : dataB[i]; - model.AddConstantTensor(fNY, fShapeY, dataY.data()); - // flag tensors to not be written in a file - model.SetNotWritableInitializedTensor(nameA); - model.SetNotWritableInitializedTensor(nameB); model.SetNotWritableInitializedTensor(nameC); + T * dataA = nullptr; + T * dataB = nullptr; + std::vector shapeDataA; + std::vector shapeDataB; + if (model.IsInitializedTensor(fNA)) { + std::string nameA = fNBroadcastedA.empty()? fNA : fNBroadcastedA; + dataA = static_cast(model.GetInitializedTensorData(nameA).get()); + // flag tensors to not be written in a file + model.SetNotWritableInitializedTensor(nameA); + } else if (model.IsShapeTensor(fNA)) + shapeDataA = model.GetShapeTensorValues(fNA); + if (model.IsInitializedTensor(fNB)) { + std::string nameB = fNBroadcastedB.empty()? fNB : fNBroadcastedB; + dataB = static_cast(model.GetInitializedTensorData(nameB).get()); + model.SetNotWritableInitializedTensor(nameB); + } else if (model.IsShapeTensor(fNB)) + shapeDataB = model.GetShapeTensorValues(fNB); - fIsOutputConstant = true; - if (model.Verbose()) + std::vector dataY; + std::vector shapeDataY; + + bool isOutputConstantTensor = true; + if (dataA && dataB) { + dataY.resize(ConvertShapeToLength(fShapeY)); + for (size_t i = 0; i < dataY.size(); i++) + dataY[i] = (dataC[i]) ? dataA[i] : dataB[i]; + } + else if (dataA && shapeDataB.size()>0 ) { + shapeDataY.resize(ConvertShapeToLength(fShapeY)); + for (size_t i = 0; i < shapeDataY.size(); i++) { + shapeDataY[i] = (dataC[i]) ? Dim{size_t(dataA[i])} : shapeDataB[i]; + isOutputConstantTensor &= !shapeDataY[i].isParam; + } + } + else if (dataB && shapeDataA.size()>0 ) { + shapeDataY.resize(ConvertShapeToLength(fShapeY)); + for (size_t i = 0; i < shapeDataY.size(); i++) { + shapeDataY[i] = (dataC[i]) ? shapeDataB[i] : Dim{size_t(dataB[i])}; + isOutputConstantTensor &= !shapeDataY[i].isParam; + } + } + else if (shapeDataB.size() > 0 && shapeDataA.size()>0 ) { + shapeDataY.resize(ConvertShapeToLength(fShapeY)); + for (size_t i = 0; i < shapeDataY.size(); i++) { + shapeDataY[i] = (dataC[i]) ? shapeDataA[i] : shapeDataB[i]; + isOutputConstantTensor &= !shapeDataY[i].isParam; + } + } + fIsOutputConstant = true; // this contains both case constant tensor output ans shape tensor output + if (isOutputConstantTensor && dataY.empty()) { + dataY.resize(shapeDataY.size()); + for (size_t i = 0; i < shapeDataY.size(); i++) + dataY[i] = static_cast(shapeDataY[i].dim); + } + if (dataY.size() > 0) + model.AddConstantTensor(fNY, fShapeY, dataY.data()); + else if (shapeDataY.size() > 0 ) + model.AddShapeTensor(fNY, shapeDataY, fShapeY.size() == 0); + else { + fIsOutputConstant = false; + } + if (fIsOutputConstant && model.Verbose()) std::cout << "Where op ---> " << fNY << " " << ConvertShapeToString(fShapeY) << " : " - << ConvertValuesToString(dataY) << std::endl; - + << ((dataY.size() > 0) ? ConvertValuesToString(dataY) : ConvertShapeToString(shapeDataY) ) + << ((dataY.size() > 0) ? " (constant)" : " (shape)") << std::endl; + // output is a constant tensor - fOutputTensorNames.pop_back(); + if (fIsOutputConstant) fOutputTensorNames.pop_back(); } - else { + if (!fIsOutputConstant) { model.AddIntermediateTensor(fNY, model.GetTensorType(fNA), fShapeY); + if (model.Verbose()) + std::cout << "Where op " << " condition : " << fNC << " " << ConvertShapeToString(fShapeC) << + " X " << fNA << " " << ConvertShapeToString(fShapeA) << " Y " << fNB << " " << ConvertShapeToString(fShapeB) + << " ---> " << fNY << " " << ConvertShapeToString(fShapeY) << std::endl; } } @@ -184,17 +237,17 @@ public: return out.str(); } - std::string Generate(std::string OpName) override { + std::string Generate(std::string opName) override { if (fIsOutputConstant) return ""; - OpName = "op_" + OpName; + opName = "op_" + opName; if (fShapeY.empty()) { throw std::runtime_error("TMVA SOFIE Where Op called to Generate without being initialized first"); } std::stringstream out; - out << SP << "\n//-------- Where \n"; + out << SP << "\n//-------- Where " << opName << " --> " << ConvertShapeToString(fShapeY) << "\n"; size_t length = ConvertShapeToLength(fShapeY); std::string typeName = TensorType::Name(); // Broadcast A if it's uninitialized @@ -216,19 +269,18 @@ public: // special case if C is an input tensor if (fIsInputBoolTensor) { size_t inputLength = ConvertShapeToLength(fShapeC); - out << SP << "std::vector fTensor_" << fNC << "(tensor_" << fNC << ", tensor_" << fNC << " + " << inputLength << ");\n"; + out << SP << "std::vector fTensor_" << fNC << "(tensor_" << fNC << ", tensor_" << fNC << " + " << inputLength << ");\n"; } out << SP << "// Broadcasting uninitialized tensor " << fNC << "\n"; //out << SP << "{\n"; - // for boolean we need to pass vector and use the non-template version of the function - out << SP << "SOFIE::UTILITY::UnidirectionalBroadcast(fTensor_" << fNC << ", " << ConvertShapeToString(fShapeC) << ", " << ConvertShapeToString(fShapeY) + out << SP << "SOFIE::UTILITY::UnidirectionalBroadcast(fTensor_" << fNC << ".data(), " << ConvertShapeToString(fShapeC) << ", " << ConvertShapeToString(fShapeY) << ", fTensor_" << fNBroadcastedC << ");\n"; } std::string nameA = fNBroadcastedA.empty()? fNA : fNBroadcastedA; std::string nameB = fNBroadcastedB.empty()? fNB : fNBroadcastedB; std::string nameC = fNBroadcastedC.empty()? fNC : fNBroadcastedC; out << SP << "for (size_t id = 0; id < " << length << " ; id++){\n"; - // get output tensor applying condition (note we need to use directly the vector since v.data(), i.e the data pointer, does not exist) + // get output tensor applying condition out << SP << SP << "tensor_" << fNY << "[id] = " << "(fTensor_" << nameC << "[id]) ? tensor_" << nameA << "[id] : tensor_" + nameB + "[id];\n"; out << SP << "}\n"; @@ -240,4 +292,4 @@ public: }//SOFIE -#endif //SOFIE_ROperator_Where +#endif //TMVA_SOFIE_ROperator_Where diff --git a/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx b/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx index f25b66b..17ac714 100644 --- a/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx +++ b/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx @@ -1,5 +1,5 @@ -#ifndef SOFIE_SOFIE_COMMON -#define SOFIE_SOFIE_COMMON +#ifndef TMVA_SOFIE_SOFIE_COMMON +#define TMVA_SOFIE_SOFIE_COMMON #include "TMVA/RTensor.hxx" @@ -21,13 +21,10 @@ #include #include - -namespace SOFIE{ - -//typedef RTensor tensor_t; +namespace SOFIE { enum class ETensorType{ - UNDEFINED = 0, FLOAT = 1, UNINT8 = 2, INT8 = 3, UINT16 = 4, INT16 = 5, INT32 = 6, INT64 = 7, STRING = 8, BOOL = 9, //order sensitive + UNDEFINED = 0, FLOAT = 1, UINT8 = 2, INT8 = 3, UINT16 = 4, INT16 = 5, INT32 = 6, INT64 = 7, STRING = 8, BOOL = 9, //order sensitive FLOAT16 = 10, DOUBLE = 11, UINT32 = 12, UINT64 = 13, COMPLEX64 = 14, COMPLEX28 = 15, BFLOAT16 = 16 }; @@ -39,7 +36,7 @@ constexpr size_t GetTypeSize(ETensorType type) { switch (type) { case ETensorType::FLOAT: return sizeof(float); case ETensorType::DOUBLE: return sizeof(double); - case ETensorType::UNINT8: return sizeof(uint8_t); + case ETensorType::UINT8: return sizeof(uint8_t); case ETensorType::INT8: return sizeof(int8_t); case ETensorType::UINT16: return sizeof(uint16_t); case ETensorType::INT16: return sizeof(int16_t); @@ -58,6 +55,9 @@ typedef std::int64_t int_t; std::string ConvertTypeToString(ETensorType type); ETensorType ConvertStringToType(std::string type); +// find if a string represents a number +bool IsInteger(const std::string & s); + struct Dim{ bool isParam = false; size_t dim = 0; @@ -67,16 +67,42 @@ struct Dim{ Dim() {} // constructor for a parametric dimension with the option to pass a default dim value - Dim(const std::string & p, size_t d = 0) : isParam(true), dim(d), param(p) {} + // We use -1 for dim to indicate that the param dimension is an expression (e.g. "d1+d2") + // in case the string represents a number make Dim not parametric + Dim(const std::string & p, size_t d = 0) : isParam(true), dim(d), param(p) + { + if (IsInteger(p)) { + isParam = false; + dim = std::stoi(p); + } + } // constructor for a non-parametric dimension Dim(size_t d) : dim(d) {} std::string GetVal() const { - return (isParam) ? param : std::to_string(dim); + // cast to int64_t for negative shape values + return (isParam) ? param : std::to_string(static_cast(dim)); + } + + std::ostream& operator<< (std::ostream& os) const { + os << GetVal(); + return os; + } + + bool operator==(const Dim& rhs) const { + return (isParam && rhs.isParam) ? param == rhs.param : dim == rhs.dim; + } + bool operator!=(const Dim& rhs) const { + return !(*this == rhs); } }; +//bool operator==(const Dim& lhs, const Dim& rhs); +inline std::ostream & operator<< (std::ostream &os, const Dim &d) { + os << d.GetVal(); + return os; +} struct InputTensorInfo{ ETensorType type; @@ -93,6 +119,18 @@ struct DynamicTensorInfo{ std::vector shape; }; +// template traits for Tensor Shape +template +struct TensorShape {}; +template<> +struct TensorShape { + static bool IsDim() { return true; } +}; +template<> +struct TensorShape { + static bool IsDim() { return false; } +}; + // template traits for Tensor type template struct TensorType {}; @@ -120,6 +158,10 @@ template<> struct TensorType { static const std::string Name() { return "uint64_t"; } }; +template<> +struct TensorType { + static const std::string Name() { return "bool"; } +}; struct TensorMemoryInfo { std::string_view tensor_name; @@ -148,19 +190,21 @@ struct MemoryPoolInfo { std::map available_stack; }; -std::vector ConvertShapeToDim(std::vector shape); +std::vector ConvertShapeToDim(const std::vector & shape); + +std::vector ConvertShapeToInt(const std::vector & shape); -std::vector ConvertShapeToInt(std::vector shape); +std::size_t ConvertShapeToLength(const std::vector & shape); -std::size_t ConvertShapeToLength(std::vector shape); +std::string ConvertShapeToString(const std::vector & shape); +std::string ConvertDimShapeToString(const std::vector & shape); +std::string ConvertShapeToString(const std::vector & shape); -std::string ConvertShapeToString(std::vector shape); -std::string ConvertDynamicShapeToString(std::vector shape); -// std::string ConvertShapeToString(std::vector shape) { -// return ConvertDynamicShapeToString(shape); -// } -std::string ConvertDynamicShapeToLength(std::vector shape); + +std::string ConvertDimShapeToLength(const std::vector & shape); +std::string ConvertDynamicShapeToLength(const std::vector & shape); + template std::string ConvertValToString(T value) { @@ -271,7 +315,7 @@ private: template ETensorType GetTemplatedType(T /*obj*/ ){ if (std::is_same::value) return ETensorType::FLOAT; - if (std::is_same::value) return ETensorType::UNINT8; + if (std::is_same::value) return ETensorType::UINT8; if (std::is_same::value) return ETensorType::INT8; if (std::is_same::value) return ETensorType::UINT16; if (std::is_same::value) return ETensorType::INT16; @@ -287,6 +331,12 @@ ETensorType GetTemplatedType(T /*obj*/ ){ } namespace UTILITY{ + + + +// clean operator and tensor names +std::string Clean_name(std::string input_tensor_name); + // Check if two shapes are equal bool AreSameShape(const std::vector&, const std::vector&); bool AreSameShape(const std::vector&, const std::vector&); @@ -296,10 +346,14 @@ bool AreSameShape(const std::vector&, const std::vector&); // Multidirectional broadcast a list of tensors to the same shape std::vector MultidirectionalBroadcastShape(std::vector>); -// Unidirectional broadcast two shapes to the same shape -std::vector UnidirectionalBroadcastShape(std::vector, std::vector); +// Multidirectional broadcast two shapes to the same shape + +std::pair> MultidirectionalBroadcastShape(std::vector &, std::vector &); +std::vector UnidirectionalBroadcastShape(std::vector &, std::vector &); + +std::pair> MultidirectionalBroadcastShape(std::vector &, std::vector &); + -std::string Clean_name(std::string input_tensor_name); template T* BroadcastConvBias(const T* data, const size_t channel, const std::vector& targetShape) { @@ -352,7 +406,7 @@ void BroadcastTensor(ConstContT data, const std::vector& shape, const st size_t targetLength = broadcastedData.size(); assert(ConvertShapeToLength(targetShape) == targetLength); // special case when broadcasting last dimensions (initial shapes must be the same) - if (shape.front() == targetShape.front() && shape.back() == 1 && size > 1) { + if (size > 1 && shape.front() == targetShape.front() && shape.back() == 1) { size_t bsize = targetShape.back(); // compute the size of the data to broadcast for (int k = int(size)-2; k >=0; k--) { @@ -419,6 +473,7 @@ T* CreateBroadcastTensor(const T* data, const std::vector& shape, const BroadcastTensor, std::span>(inData, shape, targetShape, bData); return broadcastedData; } + // Unidirectional broadcasting shape to targetShape// In unidirectional broadcast - only tensor B can have the shape changed not // tensor A - otherwise is a multidirectional broadcast template @@ -449,8 +504,6 @@ void UnidirectionalBroadcast(const T* data, const std::vector& shape, co } BroadcastTensor>(inData, shape, targetShape, broadcastedData); } -// specialization for vector of boolean -void UnidirectionalBroadcast(const std::vector & data, const std::vector& shape, const std::vector& targetShape, std::vector & broadcastedData); /// compute stride of a tensor given its shape (assume layout is row-major) std::vector ComputeStrideFromShape(const std::vector & shape); @@ -619,7 +672,15 @@ void col2im(const Dtype* data_col, const int channels, //std::cout << "finishing col2imp" << std::endl; } - +// Used at the end of infer() to fill the return object. +template +void FillOutput(T const *arr, std::vector &out, std::size_t n) +{ + out.resize(n); + for (std::size_t i = 0; i < n; ++i) { + out[i] = arr[i]; + } +} } // end namespace UTILITY @@ -704,11 +765,45 @@ inline GNN_Data Copy(const GNN_Data & data) { return out; } -// Function to generate the code for declaring and initializing constant tensors -// This is for tensors which are not part of weight files and can be created from the Constant operator -template -std::string GenerateConstantTensorCode(const std::pair &t); +inline void Gemm_Call(float *output, bool transa, bool transb, int m, int n, int k, float alpha, const float *A, + const float *B, float beta, const float *C) +{ + char ct = 't'; + char cn = 'n'; + const int *lda = transa ? &k : &m; + const int *ldb = transb ? &n : &k; + const int *ldc = &m; + if (C != nullptr) { + std::copy(C, C + m * n, output); + } + SOFIE::BLAS::sgemm_(transa ? &ct : &cn, transb ? &ct : &cn, &m, &n, &k, &alpha, A, lda, B, ldb, + &beta, output, ldc); +} + +template +void ReadTensorFromStream(std::istream &is, T &target, std::string const &expectedName, std::size_t expectedLength) +{ + std::string name; + std::size_t length; + is >> name >> length; + if (name != expectedName) { + std::string err_msg = + "TMVA-SOFIE failed to read the correct tensor name; expected name is " + expectedName + " , read " + name; + throw std::runtime_error(err_msg); + } + if (length != expectedLength) { + std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is " + + std::to_string(expectedLength) + " , read " + std::to_string(length); + throw std::runtime_error(err_msg); + } + for (size_t i = 0; i < length; ++i) { + is >> target[i]; + } + if (is.fail()) { + throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor " + expectedName); + } +} -}//SOFIE +} // namespace SOFIE -#endif //TMVA_SOFIE_RMODEL +#endif //TMVA_SOFIE_COMMON diff --git a/src/SOFIE_core/src/RModel.cxx b/src/SOFIE_core/src/RModel.cxx index 61357e8..8b87749 100644 --- a/src/SOFIE_core/src/RModel.cxx +++ b/src/SOFIE_core/src/RModel.cxx @@ -71,7 +71,19 @@ const std::vector& RModel::GetTensorShape(std::string name) const { throw std::runtime_error("TMVA SOFIE tensor [" + name + "] for which the shape is requested is not found"); } -std::vector RModel::GetDynamicTensorShape(std::string name) const { +std::vector RModel::GetDimTensorShape(const std::string & name) const { + if (auto f = fDynamicTensorInfos.find(name); f != fDynamicTensorInfos.end()) { + return f->second.shape; + } + if (auto f = fInputTensorInfos.find(name); f != fInputTensorInfos.end()) { + return f->second.shape; + } + // in case is not a dynamic tensor convert normal shape to Dim one + // for this we need to return the vector by value + return ConvertShapeToDim(GetTensorShape(name)); +} + +std::vector RModel::GetDynamicTensorShape(const std::string & name) const { if (auto f = fDynamicTensorInfos.find(name); f != fDynamicTensorInfos.end()) { return f->second.shape; } @@ -191,6 +203,23 @@ void RModel::AddConstantTensor(std::string tensor_name, ETensorType type, std::v fInitializedTensors[tensor_name] = new_tensor; } +void RModel::AddShapeTensor(const std::string & name, const std::vector & shape_values, bool scalar){ + auto tensor_name = UTILITY::Clean_name(name); + if (fShapeTensors.count(tensor_name) != 0) { + throw std::runtime_error("TMVA-SOFIE: shape tensor with name " + tensor_name + " already exists \n"); + } + fShapeTensors[tensor_name] = std::make_pair(shape_values, scalar); +} + +bool RModel::IsShapeTensor(const std::string & tensor_name) const { + return fShapeTensors.count(tensor_name) != 0; +} + +const std::vector & RModel::GetShapeTensorValues(const std::string & tensor_name) const { + //if (!IsShapeTensor(tensor_name) ) return std::vector{}; + return fShapeTensors.at(tensor_name).first; +} + bool RModel::IsInitializedTensor(const std::string& tensorName) const { std::string name = UTILITY::Clean_name(tensorName); return fInitializedTensors.find(name) != fInitializedTensors.end(); @@ -294,7 +323,7 @@ void RModel::SetNotWritableInitializedTensor(const std::string & tensor_name) { t->second.SetNotWritable(); } -std::string RModel:: AllocateIntermediateMemory(std::span op_output_tensors) { +std::string RModel:: AllocateIntermediateMemory(std::span op_output_tensors) { std::string memory_allocation_string = ""; bool allocated; @@ -349,7 +378,7 @@ std::string RModel:: AllocateIntermediateMemory(std::span op_input_tensors, const size_t& op_idx){ +void RModel::CheckAndFlushIntermediateMemory(std::span op_input_tensors, const size_t& op_idx){ for (auto &it : op_input_tensors){ // last occurence of the tensor is reached => flush it from memory if (fIntermediateTensorFrequencyLookup[it] == op_idx) { @@ -386,55 +415,6 @@ void RModel::CheckAndFlushIntermediateMemory(std::span o } } -void RModel::CheckAndFuseOperators() { - size_t idx = 0; - std::vector fusable_indices; - std::string fusable_propagate_tensor_name; - while (idx < fOperators.size()) { - if (fOperators[idx]->GetKind() != OperatorKind::GEMM && fOperators[idx]->GetKind() != OperatorKind::CONV) { - ++idx; - continue; - } - - fusable_indices.clear(); - fusable_propagate_tensor_name.clear(); - - fusable_indices.push_back(idx); - size_t j = idx + 1; - for (; j < fOperators.size()-1; ++j) { - auto opKind = fOperators[j]->GetKind(); - // Only consider operators with fusable kinds - if (!FusableKinds.count(opKind)) { - break; - } - - const auto& tensorName = fOperators[j]->GetFusableOutputTensorName(); - auto freqIt = fIntermediateTensorFrequencyLookup.find(tensorName); - - // Propagate tensor name only if it's not used multiple times - fusable_indices.push_back(j); - if (freqIt != fIntermediateTensorFrequencyLookup.end() && - (freqIt->second != fOperators[j + 1]->GetOpOrder() || - FusableKinds.count(fOperators[j + 1]->GetKind()) == 0)) { - fusable_propagate_tensor_name = tensorName; - break; - } - } - if (!fusable_propagate_tensor_name.empty()) { - auto fusable_tensor_type = GetTensorType(fusable_propagate_tensor_name); - auto fusable_tensor_shape = GetDynamicTensorShape(fusable_propagate_tensor_name); - for (auto& index : fusable_indices) { - fOperators[index]->UpdateFusableTensorName(fusable_propagate_tensor_name, [this](const std::string& name) { - this->RemoveIntermediateTensor(name); - }); - } - AddIntermediateTensor(fusable_propagate_tensor_name, fusable_tensor_type, fusable_tensor_shape); - } - - idx = std::max(idx + 1, j); - } -} - void RModel::Initialize(int batchSize, bool verbose) { @@ -484,7 +464,7 @@ void RModel::Initialize(const std::map & inputParams, bool auto shape = ConvertShapeToInt(input.second.shape); if (verbose) std::cout << "converting input shape for " << input.first << " " << ConvertShapeToString(shape) << " from " - << ConvertDynamicShapeToString(input.second.shape) << std::endl; + << ConvertDimShapeToString(input.second.shape) << std::endl; if (!shape.empty()) { // case shape is defined (not parametric) we add the tensor in the fReadyInputTensorInfos map and // we remove the tensor from the fInputTensorInfo where th eold parametric shape was stored @@ -543,7 +523,7 @@ void RModel::Initialize(const std::map & inputParams, bool } i++; } - CheckAndFuseOperators(); + fIsInitialized = true; } @@ -576,6 +556,43 @@ void RModel::InitializeSubGraph(std::shared_ptr graph) { } +// Function to generate the code for declaring and initializing constant tensors +// This is for tensors which are not part of weight files and can be created from the Constant operator +template +std::string GenerateConstantTensorCode(const std::pair &t) +{ + std::stringstream strs; + std::string type = ConvertTypeToString(t.second.type()); + size_t length = ConvertShapeToLength(t.second.shape()); + // avoid using stack sizes for constant tensors to reduce compilation time + bool allocateOnStack = (length > 100) ? false : true; + + const T *data = t.second.data(); + + // and check if all values are the same + bool sameData = false; + // for non stack allocation check if data are the same + if (!allocateOnStack && length > 1) { + size_t idx = 1; + do { + sameData = (data[idx] == data[idx - 1]); + idx++; + } while (sameData && idx < length); + } + if (allocateOnStack) { + strs << type << " tensor_" << t.first << "[" << length << "] = " << ConvertValuesToString(length, data) << ";\n"; + } else { + strs << "std::vector<" << type << "> fTensor_" << t.first << " = "; + if (sameData) + strs << "std::vector<" << type << ">(" << length << ", " << ConvertValToString(data[0]) << ");\n"; + else { + strs << ConvertValuesToString(length, data) << ";\n"; + } + strs << "const " << type << " * tensor_" + t.first + " = fTensor_" + t.first + ".data();\n"; + } + return strs.str(); +} + void RModel::GenerateInitializedTensorInfo() { if (!fInitializedTensors.empty()) @@ -614,10 +631,10 @@ void RModel::GenerateIntermediateTensorInfo() { for (auto &i : fIntermediateTensorInfos) { if (i.second.type == ETensorType::BOOL) { - tensor_declaration_block += "std::vector fTensor_" + i.first + " = std::vector(" + std::to_string(ConvertShapeToLength(i.second.shape)) + ");\n"; - // No pointer allocation needed for BOOL + tensor_declaration_block += "std::vector fTensor_" + i.first + " = std::vector(" + std::to_string(ConvertShapeToLength(i.second.shape)) + ");\n"; + tensor_declaration_block += "std::uint8_t * tensor_" + i.first + " = fTensor_" + i.first + ".data();\n"; } - if (std::find(fOutputTensorNames.begin(), fOutputTensorNames.end(), i.first) == fOutputTensorNames.end()) { + if (fIntermediateTensorFrequencyLookup.find(i.first) == fIntermediateTensorFrequencyLookup.end() && std::find(fOutputTensorNames.begin(), fOutputTensorNames.end(), i.first) == fOutputTensorNames.end()) { size_t length = ConvertShapeToLength(i.second.shape); if (i.second.type == ETensorType::FLOAT) { @@ -670,16 +687,16 @@ void RModel::GenerateOperatorDeclarations() { } void RModel::GenerateDynamicTensorInfo() { - fGC += "//---- allocate the intermediate dynamic tensors\n"; - std::stringstream out; - for (auto & i: fDynamicTensorInfos) { - auto length = ConvertDynamicShapeToLength(i.second.shape); - out << SP << "if (" << length << " > 0) {\n"; - out << SP << SP << "fTensor_" << i.first << ".resize(" << length << ");\n"; - out << SP << SP << "tensor_" << i.first << " = fTensor_" << i.first << ".data();\n"; - out << SP << "}\n"; - } - fGC += out.str(); + fGC += "//---- allocate the intermediate dynamic tensors\n"; + std::stringstream out; + for (auto & i: fDynamicTensorInfos) { + auto length = ConvertDynamicShapeToLength(i.second.shape); + out << SP << "if (" << length << " > 0) {\n"; + out << SP << SP << "fTensor_" << i.first << ".resize(" << length << ");\n"; + out << SP << SP << "tensor_" << i.first << " = fTensor_" << i.first << ".data();\n"; + out << SP << "}\n"; + } + fGC += out.str(); } std::string RModel::GenerateInferSignature(bool isdecl) { @@ -702,15 +719,14 @@ std::string RModel::GenerateInferSignature(bool isdecl) { } } } - rGC += "alpaka::Buf "; + rGC += type + "* "; } - rGC += "deviceBuf_" + name + ","; + rGC += "tensor_" + name + ","; i_input++; } @@ -718,46 +734,129 @@ std::string RModel::GenerateInferSignature(bool isdecl) { return rGC; } +namespace { + +std::string typeForOutput(ETensorType t) { + // The std::vector is a special type that is not wrapping continuous memory. + // We don't want to use it as a return type. + if (t == ETensorType::BOOL) t = ETensorType::UINT8; + return ConvertTypeToString(t); +} + +} + +void RModel::GenerateOutput() +{ + size_t outputSize = fOutputTensorNames.size(); + // assume output types are all the same + + bool sameOutputTypes = true; + std::string inferReturnType; // type return by infer function + ETensorType eFirstOutputType = GetTensorType(*fOutputTensorNames.begin()); + fGC += "\n\n"; + if (outputSize == 1) { + fGC += "std::vector<" + typeForOutput(eFirstOutputType) + ">"; + } else { + // if all output types are the same we return an std::vector - otherwise a tuple + for (std::string const &name : fOutputTensorNames) { + if (GetTensorType(name) != eFirstOutputType) + sameOutputTypes = false; + } + if (sameOutputTypes) + fGC += "std::vector>"; + else { + inferReturnType = "std::tuple<"; + for (size_t i = 0; i < outputSize; i++) { + inferReturnType += "std::vector<" + typeForOutput(GetTensorType(fOutputTensorNames[i])) + ">"; + if (i < outputSize - 1) + inferReturnType += ","; + } + inferReturnType += ">"; + fGC += inferReturnType; + } + } + + fGC += " infer(" + GenerateInferSignature() + "){\n"; + + std::string doInferArgs = GenerateInferSignature(false); + if (!doInferArgs.empty()) + doInferArgs += ","; + for (std::string const &name : fOutputTensorNames) { + fGC += SP + "std::vector<" + typeForOutput(GetTensorType(name)) + " > output_tensor_" + name + ";\n"; + doInferArgs += " output_tensor_" + name + ","; + } + if (!doInferArgs.empty()) + doInferArgs.back() = ' '; + + fGC += SP + "doInfer(" + doInferArgs + ");\n"; + + fGC += SP + "return {"; + for (size_t i = 0; i < fOutputTensorNames.size(); i++) { + fGC += "output_tensor_" + fOutputTensorNames[i]; + if (i < fOutputTensorNames.size() - 1) + fGC += ","; + } + fGC += "};\n"; + fGC += "}\n"; // end of infer function scope +} + void RModel::GenerateSessionCode() { + // Determine the signature of the actual inference function + std::string doInferSignature = GenerateInferSignature(); + if (!doInferSignature.empty()) + doInferSignature += ", "; + for (auto const &name : fOutputTensorNames) { + doInferSignature += " std::vector<" + typeForOutput(GetTensorType(name)) + "> &output_tensor_" + name + ","; + } + doInferSignature.back() = ' '; + + doInferSignature = "void doInfer(" + doInferSignature + ")"; // define the Session struct (for GNN this is generated in RModel_GNN) - if (fUseSession) { + if (fUseSession && !fIsGNNComponent) { if (!fIsSubGraph) - fGC += "struct Session {\n\n"; + fGC += "struct Session {\n"; else - fGC += "struct Session_" + fName + " {\n\n"; + fGC += "struct Session_" + fName + " {\n"; } // generate code for declaring the initialized tensors GenerateInitializedTensorInfo(); - // // evaluate total intermediate memory and position intermediate tensor addresses - // std::string intermediate_memory_alloc_string = ""; - // intermediate_memory_alloc_string += "\n// --- Positioning intermediate tensor memory --"; - // for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) { - // intermediate_memory_alloc_string += AllocateIntermediateMemory(fOperators[op_idx]->GetOpOutputTensors()); - // CheckAndFlushIntermediateMemory(fOperators[op_idx]->GetOpInputTensors(), op_idx); - // } + if (fOptimizationLevel == OptimizationLevel::kExtended) { + // evaluate total intermediate memory and position intermediate tensor addresses + std::string intermediate_memory_alloc_string = ""; + intermediate_memory_alloc_string += "\n// --- Positioning intermediate tensor memory --"; + for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) { + if (fVerbose) { + auto op = fOperators[op_idx].get(); + std::cout << "\n******************\n analyzing input/output operator " << op_idx << " " + << typeid(*op).name() << std::endl; + } + intermediate_memory_alloc_string += AllocateIntermediateMemory(fOperators[op_idx]->GetOpOutputTensors()); + CheckAndFlushIntermediateMemory(fOperators[op_idx]->GetOpInputTensors(), op_idx); + } - // // to check remaining unused fragments after memory allocation (lesser the better) - // // for (const auto &it: fIntermediateMemoryInfo.available_stack){ - // // std::cout<<"chunk_idx: "<GenerateSessionMembersCode(opName); - // } - // fGC += "\n"; + for (size_t id = 0; id < fOperators.size(); id++) { + std::string opName = std::to_string(id); + fGC += fOperators[id]->GenerateSessionMembersCode(opName); + } + fGC += "\n"; // here add initialization and reading of weight tensors if (fUseWeightFile) { std::string fileName = fName; @@ -793,10 +892,10 @@ void RModel::GenerateSessionCode() } // add initialization of shape parameters // assume all parameters are of type size_t - if (!fShapeParams.empty()) { - for (auto &p : fShapeParams) { + if (!fDimShapeNames.empty()) { + for (auto &p : fDimShapeNames) { fGC += ",\n"; - fGC += " size_t " + p.first + " = " + p.second; + fGC += " size_t " + p + " = " + fShapeParams[p]; } } fGC += ") {\n"; @@ -818,12 +917,42 @@ void RModel::GenerateSessionCode() fGC += "}\n\n"; } + + fGC += doInferSignature + "{\n"; + fGC += "\n"; + // generate the inference code + if (fVerbose) + std::cout << "Generating main inference code for " << fName << std::endl; + + if (fOutputTensorNames.size() == 0) + throw std::runtime_error("TMVA-SOFIE: output size=0 are not supported"); + + for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) { + if (fVerbose) + std::cout << "Generating code for operator .... " << op_idx << std::endl; + fGC += (fOperators[op_idx]->Generate(std::to_string(op_idx))); + } + + fGC += SP + "using SOFIE::UTILITY::FillOutput;\n\n"; + + for (std::string const &name : fOutputTensorNames) { + // need to check is size is the same (don't want to return a vector with + // larger size) in that case better to copy + bool isIntermediate = fIntermediateTensorInfos.count(name) > 0; + std::string n = isIntermediate ? std::to_string(ConvertShapeToLength(GetTensorShape(name))) + : ConvertDimShapeToLength(GetDimTensorShape(name)); + fGC += SP + "FillOutput(tensor_" + name + ", output_tensor_" + name + ", " + n + ");\n"; + } + + fGC += "}\n\n"; + + // generate the inference overload that returns an output struct GenerateOutput(); // end of session if (fUseSession && !fIsGNNComponent) { - fGC += "}; // end of Session\n"; + fGC += "}; // end of Session\n\n"; } } @@ -896,9 +1025,9 @@ void RModel::ReadInitializedTensorsFromFile(long pos) { fGC += " throw std::runtime_error(\"tmva-sofie failed to open file \" + filename + \" for input weights\");\n"; fGC += " }\n"; - // if(fIsGNNComponent) { - // fGC += " f.seekg(" + std::to_string(pos) + ");\n"; - // } + if(fIsGNNComponent) { + fGC += " f.seekg(" + std::to_string(pos) + ");\n"; + } fGC += " std::string tensor_name;\n"; fGC += " size_t length;\n"; @@ -966,9 +1095,10 @@ void RModel::ReadInitializedTensorsFromFile(long pos) { std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be read from a ROOT file"); } fGC += " }\n"; - } + } + fGC += " }\n"; } - } +} long RModel::WriteInitializedTensorsToFile(std::string filename) { // Determine the file extension based on the weight file type @@ -1163,7 +1293,7 @@ void RModel::PrintOutputTensors() { if (!IsDynamicTensor(it)) std::cout << "shape: " << ConvertShapeToString(GetTensorShape(it)) << std::endl; else - std::cout << "shape: " << ConvertDynamicShapeToString(GetDynamicTensorShape(it)) << std::endl; + std::cout << "shape: " << ConvertDimShapeToString(GetDynamicTensorShape(it)) << std::endl; } std::cout << "\n"; } diff --git a/src/SOFIE_core/src/RModel_ALPAKA.cxx b/src/SOFIE_core/src/RModel_ALPAKA.cxx index 549a3dd..066d4e8 100644 --- a/src/SOFIE_core/src/RModel_ALPAKA.cxx +++ b/src/SOFIE_core/src/RModel_ALPAKA.cxx @@ -182,7 +182,7 @@ void RModel::GenerateSessionCode_GPU_ALPAKA() { std::cout<GetKind())<GetKind()) == registered_operators.end()) { std::cout<<"Generating ALPAKA kernel for operator"<< std::endl; - fGC += fOperators[id]->Generate_GPU_Kernel_ALPAKA(); + fGC += fOperators[id]->Generate_GPU_Kernel_ALPAKA(std::to_string(id)); registered_operators.insert(fOperators[id]->GetKind()); } } @@ -279,7 +279,7 @@ void RModel::GenerateSessionCode_GPU_ALPAKA() { std::cout<GetKind())<GetKind()) == registered_operators.end()) { std::cout<<"Declaring ALPAKA kernel for operator"<< std::endl; - fGC += fOperators[id]->Generate_GPU_Kernel_Definitions_ALPAKA(); + fGC += fOperators[id]->Generate_GPU_Kernel_Definitions_ALPAKA(std::to_string(id)); registered_operators.insert(fOperators[id]->GetKind()); } } diff --git a/src/SOFIE_core/src/RModel_Base.cxx b/src/SOFIE_core/src/RModel_Base.cxx index 3b1885c..f212c53 100644 --- a/src/SOFIE_core/src/RModel_Base.cxx +++ b/src/SOFIE_core/src/RModel_Base.cxx @@ -29,7 +29,7 @@ void RModel_Base::GenerateHeaderInfo(std::string& hgname) { } // for the session we need to include SOFIE_Common functions //needed for convolution operator (need to add a flag) - fGC += "#include \"TMVA/SOFIE_common.hxx\"\n"; + fGC += "#include \"SOFIE/SOFIE_common.hxx\"\n"; if (fUseWeightFile) fGC += "#include \n"; // Include TFile when saving the weights in a binary ROOT file @@ -79,7 +79,7 @@ void RModel_Base::GenerateHeaderInfo_GPU_ALPAKA(std::string& hgname) { // for the session we need to include SOFIE_Common functions //needed for convolution operator (need to add a flag) - fGC += "#include \"TMVA/SOFIE_common.hxx\"\n"; + fGC += "#include \"SOFIE/SOFIE_common.hxx\"\n"; if (fUseWeightFile) fGC += "#include \n"; // Include TFile when saving the weights in a binary ROOT file diff --git a/src/SOFIE_core/src/RModel_GNN.cxx b/src/SOFIE_core/src/RModel_GNN.cxx index a1dfe06..3dae254 100644 --- a/src/SOFIE_core/src/RModel_GNN.cxx +++ b/src/SOFIE_core/src/RModel_GNN.cxx @@ -94,7 +94,7 @@ void RModel_GNN::Generate() { // the number of output edges features can be smaller, so we need to correct here auto num_edge_features_input = num_edge_features; - auto edges_update_output_shape = edges_update_block->GetFunctionBlock()->GetDynamicTensorShape(edges_update_block->GetFunctionBlock()->GetOutputTensorNames()[0]); + auto edges_update_output_shape = edges_update_block->GetFunctionBlock()->GetDimTensorShape(edges_update_block->GetFunctionBlock()->GetOutputTensorNames()[0]); if(!edges_update_output_shape[1].isParam && edges_update_output_shape[1].dim != num_edge_features_input) { num_edge_features = edges_update_output_shape[1].dim; } @@ -117,7 +117,7 @@ void RModel_GNN::Generate() { // we need to correct the output number of node features auto num_node_features_input = num_node_features; - auto nodes_update_output_shape = nodes_update_block->GetFunctionBlock()->GetDynamicTensorShape(nodes_update_block->GetFunctionBlock()->GetOutputTensorNames()[0]); + auto nodes_update_output_shape = nodes_update_block->GetFunctionBlock()->GetDimTensorShape(nodes_update_block->GetFunctionBlock()->GetOutputTensorNames()[0]); if(!nodes_update_output_shape[1].isParam && nodes_update_output_shape[1].dim != num_node_features_input) { num_node_features = nodes_update_output_shape[1].dim; } diff --git a/src/SOFIE_core/src/RModel_GraphIndependent.cxx b/src/SOFIE_core/src/RModel_GraphIndependent.cxx index bab06b3..cd62d0c 100644 --- a/src/SOFIE_core/src/RModel_GraphIndependent.cxx +++ b/src/SOFIE_core/src/RModel_GraphIndependent.cxx @@ -81,7 +81,7 @@ void RModel_GraphIndependent::Generate() { // the number of output edges features can be smaller, so we need to correct here // assume num_edge_features is not a parametric shape - auto edges_update_output_shape = edges_update_block->GetFunctionBlock()->GetDynamicTensorShape(edges_update_block->GetFunctionBlock()->GetOutputTensorNames()[0]); + auto edges_update_output_shape = edges_update_block->GetFunctionBlock()->GetDimTensorShape(edges_update_block->GetFunctionBlock()->GetOutputTensorNames()[0]); if(!edges_update_output_shape[1].isParam && edges_update_output_shape[1].dim != num_edge_features_input) { num_edge_features = edges_update_output_shape[1].dim; } @@ -100,7 +100,7 @@ void RModel_GraphIndependent::Generate() { fGC+="};\n}\n"; // we need to correct the output number of node features - auto nodes_update_output_shape = nodes_update_block->GetFunctionBlock()->GetDynamicTensorShape(nodes_update_block->GetFunctionBlock()->GetOutputTensorNames()[0]); + auto nodes_update_output_shape = nodes_update_block->GetFunctionBlock()->GetDimTensorShape(nodes_update_block->GetFunctionBlock()->GetOutputTensorNames()[0]); if(!nodes_update_output_shape[1].isParam && nodes_update_output_shape[1].dim != num_node_features_input) { num_node_features = nodes_update_output_shape[1].dim; } @@ -119,7 +119,7 @@ void RModel_GraphIndependent::Generate() { // we need to correct the output number of global features // global features are in shape[1] #if 0 - auto globals_update_output_shape = globals_update_block->GetFunctionBlock()->GetDynamicTensorShape(globals_update_block->GetFunctionBlock()->GetOutputTensorNames()[0]); + auto globals_update_output_shape = globals_update_block->GetFunctionBlock()->GetDimTensorShape(globals_update_block->GetFunctionBlock()->GetOutputTensorNames()[0]); if(!globals_update_output_shape[1].isParam && globals_update_output_shape[1].dim != num_global_features_input) { num_global_features = globals_update_output_shape[1].dim; } diff --git a/src/SOFIE_core/src/SOFIE_common.cxx b/src/SOFIE_core/src/SOFIE_common.cxx index bb288cf..6136f72 100644 --- a/src/SOFIE_core/src/SOFIE_common.cxx +++ b/src/SOFIE_core/src/SOFIE_common.cxx @@ -1,15 +1,16 @@ #include "SOFIE/SOFIE_common.hxx" -#include + +#include #include #include +#include - -namespace SOFIE{ +namespace SOFIE { /// @brief Convert shape from integer format to dynamic one (based on Dim) /// @param shape /// @return shape based on Dim -std::vector ConvertShapeToDim(std::vector shape){ +std::vector ConvertShapeToDim(const std::vector & shape){ std::vector ret_shape(shape.size()); for (size_t i =0; i < shape.size(); i++){ ret_shape[i].dim = shape[i]; @@ -20,7 +21,7 @@ std::vector ConvertShapeToDim(std::vector shape){ /// @brief Convert shape based on Dim to integer format /// @param shape /// @return shape based on integer. Return an empty shape in case shape is dynamic (has a parameter) -std::vector ConvertShapeToInt(std::vector shape){ +std::vector ConvertShapeToInt(const std::vector & shape){ std::vector ret_shape(shape.size()); for (size_t i =0; i < shape.size(); i++){ if (shape[i].isParam) { @@ -46,7 +47,7 @@ std::vector ConvertShapeToInt(std::vector shape){ } -std::size_t ConvertShapeToLength(std::vector shape){ +std::size_t ConvertShapeToLength(const std::vector & shape){ // Empty shape represent scalar values, so we return a length=1 std::size_t fLength = 1; for (auto& dim: shape) fLength *= dim; @@ -58,6 +59,9 @@ std::string ConvertTypeToString(ETensorType type){ case ETensorType::FLOAT : { return "float"; } + case ETensorType::INT8 : { + return "int8_t"; + } case ETensorType::INT16 : { return "int16_t"; } @@ -67,6 +71,9 @@ std::string ConvertTypeToString(ETensorType type){ case ETensorType::INT64 : { return "int64_t"; } + case ETensorType::UINT8 : { + return "uint8_t"; + } case ETensorType::UINT16 : { return "uint16_t"; } @@ -106,7 +113,7 @@ ETensorType ConvertStringToType(std::string type){ } } -std::string ConvertShapeToString(std::vector shape) { +std::string ConvertShapeToString(const std::vector & shape) { std::stringstream out; out << "{ "; for (size_t i = 0; i < shape.size(); i++) { @@ -117,7 +124,7 @@ std::string ConvertShapeToString(std::vector shape) { return out.str(); } -std::string ConvertDynamicShapeToString(std::vector shape) { +std::string ConvertDimShapeToString(const std::vector & shape) { std::stringstream out; out << "{ "; for (size_t i = 0; i < shape.size(); i++) { @@ -128,10 +135,12 @@ std::string ConvertDynamicShapeToString(std::vector shape) { return out.str(); } -std::string ConvertDynamicShapeToLength(std::vector shape) { +std::string ConvertDimShapeToLength(const std::vector & shape) { // convert generic shape to a string // multiply all the integer specified dimensions of the shape std::string length; + // case of empty vectors return 1 + if (shape.empty()) return "1"; size_t int_length = 0; for (size_t i = 0; i < shape.size(); i++) { if (shape[i].isParam) { @@ -145,12 +154,24 @@ std::string ConvertDynamicShapeToLength(std::vector shape) { } } // multiply the integer components to the parametric one + // if larger than 1 if (int_length > 0) { - if (!length.empty()) length += " * "; - length += std::to_string(int_length); + if (!length.empty() && int_length > 1) { + length += " * "; + length += std::to_string(int_length); + } else if (length.empty()) { // case is full known shape + length = std::to_string(int_length); + } } return length; } +std::string ConvertShapeToString(const std::vector & shape) { + return ConvertDimShapeToString(shape); +} +std::string ConvertDynamicShapeToLength(const std::vector & shape) { + return ConvertDimShapeToLength(shape); +} + namespace{ template @@ -169,6 +190,12 @@ static inline void copy_vector_data(int_t no_of_copies, int_t input_size, T* inp } } +bool IsInteger(const std::string & s) { + int value; + auto [ptr, ec] = std::from_chars(s.data(), s.data() + s.size(), value); + return ec == std::errc() && ptr == s.data() + s.size(); +} + bool UTILITY::AreSameShape(const std::vector& shapeA, const std::vector& shapeB) { if (shapeA.size() != shapeB.size()) { return false; @@ -330,17 +357,24 @@ std::vector UTILITY::MultidirectionalBroadcastShape(std::vector UTILITY::UnidirectionalBroadcastShape(std::vector shapeA, std::vector shapeB) +// check multi-directional broadcasting of two shapes (need to pass inputs by non const ref. since we might prepends with one's +// return a pair of integer flag and new broadcasted shape +// if flag = 0: shape are identical +// flag = 1: return shape is equal to A, we broadcast B +// flag = 2: return shape is equal to B we broadcast A +// flag = 3: return shape is common of two we broadcast A and B to output +std::pair> UTILITY::MultidirectionalBroadcastShape(std::vector & shapeA, std::vector & shapeB) { size_t sizeA = shapeA.size(); size_t sizeB = shapeB.size(); // Check if A and B have the same shape if (UTILITY::AreSameShape(shapeA, shapeB)){ - return shapeA; + return std::make_pair(0, shapeA); } // Find the common shape of A and B size_t size = std::max(sizeA, sizeB); if (sizeA < size) { + // prepend 1's in A to make of same shape as B std::vector newShapeA(size, 1); size_t offset = size - sizeA; std::copy(shapeA.begin(), shapeA.end(), newShapeA.begin() + offset); @@ -359,36 +393,116 @@ std::vector UTILITY::UnidirectionalBroadcastShape(std::vector s break; } } + int broadcastFlag = 0; if (broadcastable) { // The output shape is max(outShape, targetShape) std::vector targetShape(size, 1); for (size_t i = 0; i < size; i++) { targetShape[i] = std::max(shapeA[i], shapeB[i]); + if (shapeB[i] < targetShape[i]) broadcastFlag |= 1; + if (shapeA[i] < targetShape[i]) broadcastFlag |= 2; } - return targetShape; + return std::make_pair(broadcastFlag, targetShape); } else { throw - std::runtime_error("TMVA::SOFIE - Error unidirectional broadcasting tensors of shape " + std::runtime_error("TMVA::SOFIE - Error multidirectional broadcasting tensors of shape " + ConvertShapeToString(shapeA) + " and " + ConvertShapeToString(shapeB) + " to a common shape."); } } +// unidirectional broadcast- only B changes +std::vector UTILITY::UnidirectionalBroadcastShape(std::vector & shapeA, std::vector & shapeB) +{ + auto ret = UTILITY::MultidirectionalBroadcastShape(shapeA, shapeB); + if (ret.first > 1) { + std::runtime_error("TMVA::SOFIE - Error unidirectional broadcasting tensors of shape " + + ConvertShapeToString(shapeA) + " and " + ConvertShapeToString(shapeB) + + " to a common shape."); + } + return ret.second; +} -// UNidirectional boradcast specializaiton for vector +// for broadcasting Dim shapes +// flag indicates also which vector needs to be broadcasted +// flag & 1 == 1 : broadcast B -> A +// flag & 2 == 2 : broadcast A -> B +// flag & 4 == 4 a run time check is needed on shapes with values +std::pair> UTILITY::MultidirectionalBroadcastShape(std::vector & shapeA, std::vector & shapeB) { + size_t sizeA = shapeA.size(); + size_t sizeB = shapeB.size(); + // Check if A and B have the same shape + if (UTILITY::AreSameShape(shapeA, shapeB)){ + return std::make_pair(0, shapeA); + } + // Find the common shape of A and B + size_t size = std::max(sizeA, sizeB); + if (sizeA < size) { + // prepend 1's in A to make of same shape as B + std::vector newShapeA(size, Dim{1}); + size_t offset = size - sizeA; + std::copy(shapeA.begin(), shapeA.end(), newShapeA.begin() + offset); + shapeA = std::move(newShapeA); + } + if (sizeB < size) { + std::vector newShapeB(size, Dim{1}); + size_t offset = size - sizeB; + std::copy(shapeB.begin(), shapeB.end(), newShapeB.begin() + offset); + shapeB = std::move(newShapeB); + } + + int broadcastFlag = 0; + // The output shape is targetShape + std::vector targetShape(size); + for (size_t i = 0; i < size; i++) { + // assume we broadcast to the parametric value + if (shapeA[i] == shapeB[i]) { + targetShape[i] = shapeA[i]; + } else if (shapeA[i].isParam && shapeB[i].GetVal() == "1" ) { + // broadcast B to A (case A is parametric with ) + targetShape[i] = shapeA[i]; + broadcastFlag |= 1; + } else if (shapeA[i].GetVal() == "1" && shapeB[i].isParam) { + // broadcast A to B + targetShape[i] = shapeB[i]; + broadcastFlag |= 2; + } else if (!shapeA[i].isParam && !shapeB[i].isParam) { + if (shapeB[i].dim == 1) { + targetShape[i] = shapeA[i]; + broadcastFlag |= 1; + } else if (shapeA[i].dim == 1) { + targetShape[i] = shapeB[i]; + broadcastFlag |= 2; + } else { + // non broadcastable case cannot have A and B two different defined shapes different than one + broadcastFlag = -1; + } + } else if (shapeA[i].isParam && shapeB[i].isParam) { + // full dynamic case - we will decided at run time + std::stringstream s; + s << "std::max(" << shapeA[i] << "," << shapeB[i] << ")"; + // use -1 for dim to indicate is an expression + targetShape[i] = Dim { s.str() , static_cast(-1)}; + broadcastFlag |= 4; + } else if (shapeA[i].isParam && !shapeB[i].isParam) { + // A -> B need to check at run time if consistent + targetShape[i] = shapeB[i]; + broadcastFlag |= 6; + } else if (!shapeA[i].isParam && shapeB[i].isParam) { + // B -> A need to check at run time if consistent + targetShape[i] = shapeA[i]; + broadcastFlag |= 5; + } else { + // all cases should be covered + throw std::runtime_error("TMVA::SOFIE - Fatal error in MultiDirectionalBroadCastDimShape"); + } + } + if (broadcastFlag == -1) { + throw std::runtime_error("TMVA::SOFIE - Error multidirectional broadcasting tensors of shape " + + ConvertDimShapeToString(shapeA) + " and " + ConvertDimShapeToString(shapeB) + + " to a common shape."); + } -// specialization for vector of boolean -void UTILITY::UnidirectionalBroadcast(const std::vector & data, const std::vector& shape, const std::vector& targetShape, std::vector & broadcastedData) - { - // Prepend shape with ones - auto ncdata = const_cast &>(data); - if (shape.size() < targetShape.size()) { - size_t targetSize = targetShape.size(); - std::vector newShape(targetSize, 1); - size_t offset = targetSize - shape.size(); - std::copy(shape.begin(), shape.end(), newShape.begin() + offset); - UTILITY::BroadcastTensor &, std::vector &>(ncdata, newShape, targetShape, broadcastedData); - } - UTILITY::BroadcastTensor &, std::vector &>(ncdata, shape, targetShape, broadcastedData); + return std::make_pair(broadcastFlag, targetShape); } std::string UTILITY::Clean_name(std::string input_tensor_name){ @@ -413,50 +527,22 @@ std::vector UTILITY::ComputeStrideFromShape(const std::vector & shape) // assume row major layout const auto size = shape.size(); std::vector strides(size); - strides[size-1] = Dim{1}; - for (std::size_t i = 1; i < size; i++) { - if (!shape[size-i].isParam && !strides[size-i].isParam) - strides[size - 1 - i] = Dim{strides[size-i].dim * shape[size-i].dim}; - else - strides[size - 1 - i] = Dim{std::string(strides[size-i].GetVal() + "*" + shape[size-i].GetVal())}; - } - return strides; -} - -template -std::string GenerateConstantTensorCode(const std::pair &t) -{ - std::stringstream strs; - std::string type = ConvertTypeToString(t.second.type()); - size_t length = ConvertShapeToLength(t.second.shape()); - // avoid using stack sizes for constant tensors to reduce compilation time - bool allocateOnStack = (length > 100) ? false : true; - - const T *data = t.second.data(); - - // and check if all values are the same - bool sameData = false; - // for non stack allocation check if data are the same - if (!allocateOnStack && length > 1) { - size_t idx = 1; - do { - sameData = (data[idx] == data[idx - 1]); - idx++; - } while (sameData && idx < length); - } - if (allocateOnStack) { - strs << type << " tensor_" << t.first << "[" << length << "] = " << ConvertValuesToString(length, data) << ";\n"; - } else { - strs << "std::vector<" << type << "> fTensor_" << t.first << " = "; - if (sameData) - strs << "std::vector<" << type << ">(" << length << ", " << ConvertValToString(data[0]) << ");\n"; - else { - strs << ConvertValuesToString(length, data) << ";\n"; + if (size > 0) { + strides[size-1] = Dim{1}; + for (std::size_t i = 1; i < size; i++) { + if (!shape[size-i].isParam && !strides[size-i].isParam) + strides[size - 1 - i] = Dim{strides[size-i].dim * shape[size-i].dim}; + else { + if (strides[size-i].GetVal() == "1") + strides[size - 1 - i] = shape[size-i]; + else if (shape[size-i].GetVal() == "1") + strides[size - 1 - i] = strides[size-i]; + else + strides[size - 1 - i] = Dim{std::string(strides[size-i].GetVal() + "*" + shape[size-i].GetVal())}; + } } - strs << "const " << type << " * tensor_" + t.first + " = fTensor_" + t.first + ".data();\n"; } - return strs.str(); + return strides; } - -}//SOFIE +} // namespace SOFIE \ No newline at end of file diff --git a/src/SOFIE_core/test/CMakeLists.txt b/src/SOFIE_core/test/CMakeLists.txt index 34bb49f..e4713b6 100644 --- a/src/SOFIE_core/test/CMakeLists.txt +++ b/src/SOFIE_core/test/CMakeLists.txt @@ -33,6 +33,7 @@ endforeach() string(REPLACE ";" ";\n" EMIT_CAPTURES "${ALL_CAPTURES}") configure_file(EmitFromONNX.cxx.in EmitFromONNX_all.cxx @ONLY) configure_file(EmitFromRoot.cxx.in EmitFromRoot_all.cxx @ONLY) +configure_file(EmitFromONNX_GPU_ALPAKA.cxx.in EmitFromONNX_GPU_ALPAKA_all.cxx @ONLY) ROOTTEST_GENERATE_EXECUTABLE(emitFromONNX EmitFromONNX_all.cxx LIBRARIES protobuf::libprotobuf SOFIE_core SOFIE_parsers @@ -129,3 +130,17 @@ ROOT_ADD_TEST(tmva-sofie-EmitGNN COMMAND emitGNN) ROOT_EXECUTABLE(EmitGraphIndependent GNN/EmitGraphIndependent.cxx LIBRARIES SOFIE_core) ROOT_ADD_TEST(tmva-sofie-EmitGraphIndependent COMMAND EmitGraphIndependent) + +# Generating inference code for heterogeneous testing using ALPAKA +# ROOTTEST_GENERATE_EXECUTABLE(emitFromONNXAlpaka EmitFromONNX_GPU_ALPAKA_all.cxx +# LIBRARIES protobuf::libprotobuf SOFIE_core SOFIE_parsers +# FIXTURES_SETUP sofie-compile-models-onnx-alpaka-build) + +# # silence protobuf warnings seen in version 3.0 and 3.6. Not needed from protobuf version 3.17 +# target_compile_options(emitFromONNXAlpaka PRIVATE -Wno-unused-parameter -Wno-array-bounds) + +# ROOTTEST_ADD_TEST(SofieCompileModels_ONNX_Alpaka +# COMMAND ${CMAKE_COMMAND} -E env ROOTIGNOREPREFIX=1 ./emitFromONNXAlpaka ${onnx_file} ${CMAKE_CURRENT_BINARY_DIR}/${fname} +# FIXTURES_REQUIRED sofie-compile-models-onnx-alpaka-build +# FIXTURES_SETUP sofie-compile-models-onnx-alpaka +# ) diff --git a/src/SOFIE_core/test/TestCustomModelsFromONNX.cxx b/src/SOFIE_core/test/TestCustomModelsFromONNX.cxx index d02dc5e..ba9a42a 100644 --- a/src/SOFIE_core/test/TestCustomModelsFromONNX.cxx +++ b/src/SOFIE_core/test/TestCustomModelsFromONNX.cxx @@ -2515,7 +2515,7 @@ TEST(ONNX, Equal){ }); SOFIE_Equal::Session s("Equal_FromONNX.dat"); - std::vector output = s.infer(input1.data(),input2.data()); + std::vector output = s.infer(input1.data(),input2.data()); // Checking output size EXPECT_EQ(output.size(), sizeof(Equal_ExpectedOutput::outputs) / sizeof(bool)); @@ -2540,7 +2540,7 @@ TEST(ONNX, LessOrEqual){ }); SOFIE_LessOrEqual::Session s("LessOrEqual_FromONNX.dat"); - std::vector output = s.infer(input1.data(),input2.data()); + std::vector output = s.infer(input1.data(),input2.data()); // Checking output size EXPECT_EQ(output.size(), sizeof(LessOrEqual_ExpectedOutput::outputs) / sizeof(bool)); @@ -2565,7 +2565,7 @@ TEST(ONNX, GreaterOrEqual){ }); SOFIE_GreaterOrEqual::Session s("GreaterOrEqual_FromONNX.dat"); - std::vector output = s.infer(input1.data(),input2.data()); + std::vector output = s.infer(input1.data(),input2.data()); // Checking output size EXPECT_EQ(output.size(), sizeof(GreaterOrEqual_ExpectedOutput::outputs) / sizeof(bool)); @@ -2590,7 +2590,7 @@ TEST(ONNX, Greater){ }); SOFIE_Greater::Session s("Greater_FromONNX.dat"); - std::vector output = s.infer(input1.data(),input2.data()); + std::vector output = s.infer(input1.data(),input2.data()); // Checking output size EXPECT_EQ(output.size(), sizeof(Greater_ExpectedOutput::outputs) / sizeof(bool)); @@ -2615,7 +2615,7 @@ TEST(ONNX, Less){ }); SOFIE_Less::Session s("Less_FromONNX.dat"); - std::vector output = s.infer(input1.data(),input2.data()); + std::vector output = s.infer(input1.data(),input2.data()); // Checking output size EXPECT_EQ(output.size(), sizeof(Less_ExpectedOutput::outputs) / sizeof(bool)); From b4cd917b56b150b989070420b80fb0efce0aac52 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Sun, 23 Nov 2025 20:44:02 +0100 Subject: [PATCH 14/43] fix: parameteric inputs for range operator --- src/SOFIE_core/inc/SOFIE/RModel.hxx | 6 +- src/SOFIE_core/inc/SOFIE/ROperator_GRU.hxx | 24 +- src/SOFIE_core/inc/SOFIE/ROperator_GRU.icc | 75 ++-- src/SOFIE_core/inc/SOFIE/ROperator_LSTM.icc | 7 +- src/SOFIE_core/inc/SOFIE/ROperator_RNN.icc | 7 +- src/SOFIE_core/src/RModel.cxx | 417 ++++++++++++-------- 6 files changed, 304 insertions(+), 232 deletions(-) diff --git a/src/SOFIE_core/inc/SOFIE/RModel.hxx b/src/SOFIE_core/inc/SOFIE/RModel.hxx index 09feb17..4ecdaec 100644 --- a/src/SOFIE_core/inc/SOFIE/RModel.hxx +++ b/src/SOFIE_core/inc/SOFIE/RModel.hxx @@ -65,9 +65,9 @@ public: int Verbose() const { return fVerbose;} - const std::vector &GetTensorShape(std::string name) const; + const std::vector &GetTensorShape(const std::string & name) const; std::vector GetDimTensorShape(const std::string & name) const; - const ETensorType &GetTensorType(std::string name) const; + const ETensorType &GetTensorType(const std::string & name) const; std::vector GetDynamicTensorShape(const std::string & name) const ; // get the values for the tensor representing a shape @@ -140,7 +140,7 @@ public: void AddIntermediateTensor(std::string tensor_name, ETensorType type, std::vector shape); // Add an intermediate dynamic tensor void AddDynamicTensor(std::string tensor_name, ETensorType type, std::vector shape); - + void AddShapeParam(const std::string & name, size_t def_value = 0); void AddInputTensorName(std::string name); void AddOutputTensorNameList(std::vector output_tensor_names); void diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_GRU.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_GRU.hxx index bb1a74e..f9998e1 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_GRU.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_GRU.hxx @@ -1,5 +1,5 @@ -#ifndef SOFIE_ROPERATOR_GRU -#define SOFIE_ROPERATOR_GRU +#ifndef TMVA_SOFIE_ROPERATOR_GRU +#define TMVA_SOFIE_ROPERATOR_GRU #include "SOFIE/RModel.hxx" #include "SOFIE/ROperator.hxx" @@ -11,7 +11,6 @@ #include #include - namespace SOFIE { /*! \brief Gated Recurrent Unit operator @@ -91,7 +90,7 @@ template class ROperator_GRU final : public ROperator { fNSequence_lens(UTILITY::Clean_name(nameSequence_lens)), fNInitial_h(UTILITY::Clean_name(nameInitial_h)), fNY(UTILITY::Clean_name(nameY)), fNY_h(UTILITY::Clean_name(nameY_h)) { - + fInputTensorNames = { fNX, fNW, fNR }; if (!fNB.empty()){ fInputTensorNames.emplace_back(fNB); @@ -123,39 +122,34 @@ template class ROperator_GRU final : public ROperator { * * \param input type of the input tensors */ - std::vector TypeInference(std::vector /*input*/); + std::vector TypeInference(std::vector /*input*/) override; /*! \brief Infers the shape of the output tensors * * \param input shape of the input tensors */ - std::vector> ShapeInference(std::vector> /*input*/); + std::vector> ShapeInference(std::vector> /*input*/) override; /*! \brief Initialize the model * * \param model Model */ - void Initialize(RModel &); + void Initialize(RModel &) override; /*! \brief Generate the inference code * * \param OpName name of the operator */ - std::string Generate(std::string /*OpName*/); - - /*! \brief Generate the code for the Session internal data vectors - * - * \param opName name of the operator - */ - std::string GenerateSessionMembersCode(std::string opName); + std::string Generate(std::string /*OpName*/) override; /*! \brief Returns the blas routines needed to compile the generated code */ - std::vector GetBlasRoutines() { return { std::string("Gemm"), std::string("Axpy") }; } + std::vector GetBlasRoutines() override { return { std::string("Gemm"), std::string("Axpy") }; } }; } // namespace SOFIE + // Implementation of the ROperator_GRU class #include "SOFIE/ROperator_GRU.icc" diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_GRU.icc b/src/SOFIE_core/inc/SOFIE/ROperator_GRU.icc index f3813c2..d011617 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_GRU.icc +++ b/src/SOFIE_core/inc/SOFIE/ROperator_GRU.icc @@ -1,5 +1,5 @@ -#ifndef SOFIE_ROPERATOR_GRU_I -#define SOFIE_ROPERATOR_GRU_I +#ifndef TMVA_SOFIE_ROPERATOR_GRU_I +#define TMVA_SOFIE_ROPERATOR_GRU_I namespace SOFIE { @@ -175,51 +175,45 @@ void ROperator_GRU::Initialize(RModel& model){ fAttrActivations = {"Sigmoid", "Tanh"}; } } -} -// generate code for Session data members (e.g. internal vectors) -template -std::string ROperator_GRU::GenerateSessionMembersCode(std::string opName) -{ - opName = "op_" + opName; - std::stringstream out; + // To get unique intermediate tensor names, we add the name of the input + // tensor. One might also consider using the index of the operator in the + // RMode, but this information is not available in the current scope. + std::string opName = "op_gru_" + fNX; size_t num_directions = fShapeW[0]; size_t seq_length = (fAttrLayout == 0) ? fShapeX[0] : fShapeX[1]; size_t batch_size = (fAttrLayout == 0) ? fShapeX[1] : fShapeX[0]; size_t input_size = fShapeX[2]; + auto declareVector = [&](std::string const &name, std::size_t n){ + std::string fullName = opName + "_" + name; + model.AddIntermediateTensor(fullName, ConvertStringToType(fType), std::vector{n}); + }; + if (fAttrLayout != 0) { - out << "std::vector<" << fType << "> fVec_" << opName << "_input = std::vector<" << fType << ">(" - << seq_length * batch_size * input_size << ");\n"; - out << "std::vector<" << fType << "> fVec_" << opName << "_initial_hidden_state = std::vector<" << fType << ">(" - << num_directions * batch_size * fAttrHiddenSize << ");\n"; - out << "std::vector<" << fType << "> fVec_" << opName << "_initial_cell_state = std::vector<" << fType << ">(" - << num_directions * batch_size * fAttrHiddenSize << ");\n"; + declareVector("input", seq_length * batch_size * input_size); + declareVector("initial_hidden_state", num_directions * batch_size * fAttrHiddenSize); + declareVector("initial_cell_state", num_directions * batch_size * fAttrHiddenSize); } // Set the feedforward size_t ff_size = seq_length * batch_size * fAttrHiddenSize; - out << "std::vector<" << fType << "> fVec_" << opName << "_f_update_gate = std::vector<" << fType << ">(" << ff_size << ");\n"; - out << "std::vector<" << fType << "> fVec_" << opName << "_f_reset_gate = std::vector<" << fType << ">(" << ff_size << ");\n"; - out << "std::vector<" << fType << "> fVec_" << opName << "_f_hidden_gate = std::vector<" << fType << ">(" << ff_size << ");\n"; + declareVector("f_update_gate", ff_size); + declareVector("f_reset_gate", ff_size); + declareVector("f_hidden_gate", ff_size); // gate results size_t hs_size = seq_length * num_directions * batch_size * fAttrHiddenSize; - out << "std::vector<" << fType << "> fVec_" << opName << "_update_gate = std::vector<" << fType << ">(" << hs_size << ");\n"; - out << "std::vector<" << fType << "> fVec_" << opName << "_reset_gate = std::vector<" << fType << ">(" << hs_size << ");\n"; - out << "std::vector<" << fType << "> fVec_" << opName << "_hidden_gate = std::vector<" << fType << ">(" << hs_size << ");\n"; + declareVector("update_gate", hs_size); + declareVector("reset_gate", hs_size); + declareVector("hidden_gate", hs_size); // feedback - out << "std::vector<" << fType << "> fVec_" << opName << "_feedback = std::vector<" << fType << ">(" - << batch_size * fAttrHiddenSize << ");\n"; + declareVector("feedback", batch_size * fAttrHiddenSize); // hiddden state if (fAttrLayout != 0 || fNY.empty()) { - out << "std::vector<" << fType << "> fVec_" << opName << "_hidden_state = std::vector<" << fType << ">(" << hs_size << ");\n"; + declareVector("hidden_state", hs_size); } - - out << "\n"; - - return out.str(); } @@ -234,12 +228,14 @@ auto ROperator_GRU::Generate(std::string OpName) size_t input_size = fShapeX[2]; size_t num_directions = fShapeW[0]; + auto getVec = [&](std::string const &name) { return "tensor_op_gru_" + fNX + "_" + name; }; + // set the input if (fAttrLayout == 0) { - out << SP << fType << " *" << OpName << "_input = tensor_" << fNX << ";\n"; + out << SP << fType << " const* " << OpName << "_input = tensor_" << fNX << ";\n"; } else { if (fUseSession) { - out << SP << fType << " * " << OpName << "_input = fVec_" << OpName << "_input.data();\n"; + out << SP << fType << " * " << OpName << "_input = " << getVec("input") << ";\n"; } else { out << SP << fType << " " << OpName << "_input[" << seq_length * batch_size * input_size << "];\n"; } @@ -261,8 +257,7 @@ auto ROperator_GRU::Generate(std::string OpName) << fNInitial_h << ";\n"; } else { if (fUseSession) { - out << SP << fType << " * " << OpName << "_initial_hidden_state = fVec_" << OpName - << "_initial_hidden_state.data();\n"; + out << SP << fType << " * " << OpName << "_initial_hidden_state = " << getVec("initial_hidden_state") << ";\n"; } else { out << SP << fType << " " << OpName << "_initial_hidden_state[" << num_directions * batch_size * fAttrHiddenSize << "];\n"; @@ -283,9 +278,9 @@ auto ROperator_GRU::Generate(std::string OpName) // Set the feedforward size_t feedforward_size = seq_length * batch_size * fAttrHiddenSize; if (fUseSession) { - out << SP << fType << " * " << OpName << "_f_update_gate = fVec_" << OpName << "_f_update_gate.data();\n"; - out << SP << fType << " * " << OpName << "_f_reset_gate = fVec_" << OpName << "_f_reset_gate.data();\n"; - out << SP << fType << " * " << OpName << "_f_hidden_gate = fVec_" << OpName << "_f_hidden_gate.data();\n"; + out << SP << fType << " * " << OpName << "_f_update_gate = " << getVec("f_update_gate") << ";\n"; + out << SP << fType << " * " << OpName << "_f_reset_gate = " << getVec("f_reset_gate") << ";\n"; + out << SP << fType << " * " << OpName << "_f_hidden_gate = " << getVec("f_hidden_gate") << ";\n"; } else { out << SP << fType << " " << OpName << "_f_update_gate[" << feedforward_size << "] = {0};\n"; out << SP << fType << " " << OpName << "_f_reset_gate[" << feedforward_size << "] = {0};\n"; @@ -294,9 +289,9 @@ auto ROperator_GRU::Generate(std::string OpName) // Set the gates size_t hidden_state_size = seq_length * num_directions * batch_size * fAttrHiddenSize; if (fUseSession) { - out << SP << fType << " * " << OpName << "_update_gate = fVec_" << OpName << "_update_gate.data();\n"; - out << SP << fType << " * " << OpName << "_reset_gate = fVec_" << OpName << "_reset_gate.data();\n"; - out << SP << fType << " * " << OpName << "_hidden_gate = fVec_" << OpName << "_hidden_gate.data();\n"; + out << SP << fType << " * " << OpName << "_update_gate = " << getVec("update_gate") << ";\n"; + out << SP << fType << " * " << OpName << "_reset_gate = " << getVec("reset_gate") << ";\n"; + out << SP << fType << " * " << OpName << "_hidden_gate = " << getVec("hidden_gate") << ";\n"; } else { out << SP << fType << " " << OpName << "_update_gate[" << hidden_state_size << "] = {0};\n"; out << SP << fType << " " << OpName << "_reset_gate[" << hidden_state_size << "] = {0};\n"; @@ -307,14 +302,14 @@ auto ROperator_GRU::Generate(std::string OpName) out << SP << fType << " *" << OpName << "_hidden_state = tensor_" << fNY << ";\n"; } else { if (fUseSession) { - out << SP << fType << " * " << OpName << "_hidden_state = fVec_" << OpName << "_hidden_state.data();\n"; + out << SP << fType << " * " << OpName << "_hidden_state = " << getVec("hidden_state") << ";\n"; } else { out << SP << fType << " " << OpName << "_hidden_state[" << hidden_state_size << "] = {0};\n"; } } if (fUseSession) { - out << SP << fType << " * " << OpName << "_feedback = fVec_" << OpName << "_feedback.data();\n"; + out << SP << fType << " * " << OpName << "_feedback = " << getVec("feedback") << ";\n"; } else { out << SP << fType << " " << OpName << "_feedback[" << batch_size * fAttrHiddenSize << "] = {0};\n"; } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_LSTM.icc b/src/SOFIE_core/inc/SOFIE/ROperator_LSTM.icc index bec7760..9d31b7f 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_LSTM.icc +++ b/src/SOFIE_core/inc/SOFIE/ROperator_LSTM.icc @@ -1,6 +1,5 @@ -#ifndef SOFIE_ROPERATOR_LSTM_I -#define SOFIE_ROPERATOR_LSTM_I - +#ifndef TMVA_SOFIE_ROPERATOR_LSTM_I +#define TMVA_SOFIE_ROPERATOR_LSTM_I namespace SOFIE { @@ -291,7 +290,7 @@ auto ROperator_LSTM::Generate(std::string OpName) // set the input if (fAttrLayout == 0) { - out << SP << fType << " *" << OpName << "_input = tensor_" << fNX << ";\n"; + out << SP << fType << " const *" << OpName << "_input = tensor_" << fNX << ";\n"; } else { if (fUseSession) out << SP << fType << " * " << OpName << "_input = fVec_" << OpName << "_input.data();\n"; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_RNN.icc b/src/SOFIE_core/inc/SOFIE/ROperator_RNN.icc index c03c1c2..08dc3dc 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_RNN.icc +++ b/src/SOFIE_core/inc/SOFIE/ROperator_RNN.icc @@ -1,6 +1,5 @@ -#ifndef SOFIE_ROPERATOR_RNN_I -#define SOFIE_ROPERATOR_RNN_I - +#ifndef TMVA_SOFIE_ROPERATOR_RNN_I +#define TMVA_SOFIE_ROPERATOR_RNN_I namespace SOFIE { @@ -230,7 +229,7 @@ auto ROperator_RNN::Generate(std::string OpName) // set the input if (fAttrLayout == 0) { if (fType == "float") { - out << SP << "float *" << OpName << "_input = tensor_" << fNX << ";\n"; + out << SP << "float const*" << OpName << "_input = tensor_" << fNX << ";\n"; } } else { if (fUseSession) diff --git a/src/SOFIE_core/src/RModel.cxx b/src/SOFIE_core/src/RModel.cxx index 8b87749..b2d8625 100644 --- a/src/SOFIE_core/src/RModel.cxx +++ b/src/SOFIE_core/src/RModel.cxx @@ -4,48 +4,21 @@ #include #include +#ifdef SOFIE_SUPPORT_ROOT_BINARY #include "TFile.h" +#endif #include "SOFIE/RModel.hxx" #include "SOFIE/SOFIE_common.hxx" - namespace SOFIE { -RModel::RModel(RModel&& other) { - fInputTensorInfos = std::move(other.fInputTensorInfos); - fReadyInputTensorInfos = std::move(other.fReadyInputTensorInfos); - fOutputTensorNames = other.fOutputTensorNames; - fInputTensorNames = other.fInputTensorNames; - fOperators = std::move(other.fOperators); - fInitializedTensors = std::move(other.fInitializedTensors); - fIntermediateTensorInfos = std::move(other.fIntermediateTensorInfos); - fName = other.fName; - fFileName = other.fFileName; - fParseTime = other.fParseTime; - fGC = other.fGC; - fNeededBlasRoutines = other.fNeededBlasRoutines; - fNeededStdLib = other.fNeededStdLib; +namespace { +const std::string SP = " "; } -RModel& RModel::operator=(RModel&& other) { - fInputTensorInfos = std::move(other.fInputTensorInfos); - fReadyInputTensorInfos = std::move(other.fReadyInputTensorInfos); - fOutputTensorNames = other.fOutputTensorNames; - fInputTensorNames = other.fInputTensorNames; - fOperators = std::move(other.fOperators); - fInitializedTensors = std::move(other.fInitializedTensors); - fIntermediateTensorInfos = std::move(other.fIntermediateTensorInfos); - fName = other.fName; - fFileName = other.fFileName; - fParseTime = other.fParseTime; - fGC = other.fGC; - fNeededBlasRoutines = other.fNeededBlasRoutines; - fNeededStdLib = other.fNeededStdLib; - return *this; -} -const std::vector& RModel::GetTensorShape(std::string name) const { +const std::vector& RModel::GetTensorShape(const std::string & name) const { auto f = fReadyInputTensorInfos.find(name); if (f != fReadyInputTensorInfos.end()) { return f->second.shape; @@ -62,6 +35,16 @@ const std::vector& RModel::GetTensorShape(std::string name) const { if (f4 != fIntermediateTensorInfos.end()) { return f4->second.shape; } + // case of shape tensors + auto f5 = fShapeTensors.find(name); + if (f5 != fShapeTensors.end()) { + // shape is vector of size 1 with size of shape values or just a scalar + if (f5->second.second) // check scalar flag + return std::vector{}; + else + return std::vector{f5->second.first.size()}; + } + if (fDynamicTensorInfos.find(name) != fDynamicTensorInfos.end()) throw std::runtime_error("TMVA SOFIE tensor [" + name + "] is a dynamic tensor. Use GetDynamicTensorShape instead of GetTensorShape"); @@ -82,7 +65,6 @@ std::vector RModel::GetDimTensorShape(const std::string & name) const { // for this we need to return the vector by value return ConvertShapeToDim(GetTensorShape(name)); } - std::vector RModel::GetDynamicTensorShape(const std::string & name) const { if (auto f = fDynamicTensorInfos.find(name); f != fDynamicTensorInfos.end()) { return f->second.shape; @@ -90,12 +72,14 @@ std::vector RModel::GetDynamicTensorShape(const std::string & name) const { if (auto f = fInputTensorInfos.find(name); f != fInputTensorInfos.end()) { return f->second.shape; } - // in case is not a dynamic tensor convert normal shape to Dim one - // for this we need to return the vector by value - return ConvertShapeToDim(GetTensorShape(name)); + // throw error if shape is not dynamic + if (!IsDynamicTensor(name)) + throw std::runtime_error("TMVA SOFIE tensor [" + name + "] for which the shape is requested is not dynamic"); + + throw std::runtime_error("TMVA SOFIE tensor [" + name + "] for which the shape is requested is not found"); } -const ETensorType& RModel::GetTensorType(std::string name) const { +const ETensorType& RModel::GetTensorType(const std::string & name) const { auto f = fReadyInputTensorInfos.find(name); if (f != fReadyInputTensorInfos.end()) { return f->second.type; @@ -116,6 +100,10 @@ const ETensorType& RModel::GetTensorType(std::string name) const { if (f5 != fDynamicTensorInfos.end()){ return f5->second.type; } + // case of shape tensor type is INT64 + if (fShapeTensors.find(name) != fShapeTensors.end()){ + return ETensorType::INT64; + } if (fIsSubGraph && fParentGraph) return fParentGraph->GetTensorType(name); @@ -129,6 +117,7 @@ bool RModel::CheckIfTensorAlreadyExist(std::string tensor_name) { if (fInitializedTensors.find(tensor_name) != fInitializedTensors.end()) return true; if (fIntermediateTensorInfos.find(tensor_name) != fIntermediateTensorInfos.end()) return true; if (fDynamicTensorInfos.find(tensor_name) != fDynamicTensorInfos.end()) return true; + if (fShapeTensors.find(tensor_name) != fShapeTensors.end()) return true; if (fIsSubGraph && fParentGraph) return fParentGraph->CheckIfTensorAlreadyExist(tensor_name); return false; } @@ -197,7 +186,7 @@ void RModel::AddConstantTensor(std::string tensor_name, ETensorType type, std::v tensor_name = UTILITY::Clean_name(tensor_name); //NB: own data if (CheckIfTensorAlreadyExist(tensor_name)) { - throw std::runtime_error("TMVA-SOFIE: initialized tensor with name " + tensor_name + " already exists \n"); + throw std::runtime_error("TMVA-SOFIE: constant tensor with name " + tensor_name + " already exists \n"); } InitializedTensor new_tensor {type, shape, data, true}; // add here flag to specify is a constant tensor fInitializedTensors[tensor_name] = new_tensor; @@ -231,9 +220,11 @@ bool RModel::IsConstantTensor(const std::string& tensorName) const { return itr->second.IsConstantTensor(); } +// dynamic tensors include also Dim input tensors bool RModel::IsDynamicTensor(const std::string& tensorName) const { std::string name = UTILITY::Clean_name(tensorName); - return fDynamicTensorInfos.find(name) != fDynamicTensorInfos.end(); + bool ret = fDynamicTensorInfos.find(name) != fDynamicTensorInfos.end(); + return (ret) ? true : IsDimInputTensor(tensorName); } bool RModel::IsDimInputTensor(const std::string& tensorName) const { std::string name = UTILITY::Clean_name(tensorName); @@ -272,17 +263,21 @@ void RModel::AddDynamicTensor(std::string tensor_name, ETensorType type, std::ve // store shape parameter if not existing for (auto &d : shape) { if (d.isParam) { - if (fShapeParams.count(d.param) == 0) { - // case parameter is an expression of some other existing parameter, no need to - // register it - if (d.dim != size_t(-1)) { - fShapeParams[d.param] = std::to_string(d.dim); - } + if (d.dim != size_t(-1)) { + AddShapeParam(d.param, d.dim); } } } } +void RModel::AddShapeParam(const std::string & param, size_t default_value) { + if (fShapeParams.count(param) == 0) { + fShapeParams[param] = std::to_string(default_value); + // add also in the vector list (used to keep the order) + fDimShapeNames.push_back(param); + } +} + void RModel::AddOutputTensorNameList(std::vector outputtensornames) { fOutputTensorNames.clear(); for(auto& it : outputtensornames) { @@ -323,100 +318,180 @@ void RModel::SetNotWritableInitializedTensor(const std::string & tensor_name) { t->second.SetNotWritable(); } -std::string RModel:: AllocateIntermediateMemory(std::span op_output_tensors) { +std::string RModel::AllocateIntermediateMemory(std::span op_output_tensors) +{ + std::stringstream code; - std::string memory_allocation_string = ""; - bool allocated; + if (fVerbose) { + std::cout << "Total chunks allocated\n"; + for (auto chunk = fIntermediateMemoryInfo.total_stack.begin(); chunk != fIntermediateMemoryInfo.total_stack.end(); ++chunk) { + std::cout << "..... chunk " << chunk->first << " size " << chunk->second.tensor_size << " " << chunk->second.tensor_name << std::endl; + } + } - for (auto& it : op_output_tensors) { - allocated = false; - if (GetTensorType(std::string(it)) == ETensorType::BOOL || - fInitializedTensors.find(std::string(it)) != fInitializedTensors.end() || - fDynamicTensorInfos.find(std::string(it)) != fDynamicTensorInfos.end()) continue; + auto declareIntermediateTensor = [this, &code](std::string const &name, size_t size, size_t location) { + std::string typeName = ConvertTypeToString(GetTensorType(name)); + code << "\n // Allocating memory for intermediate tensor " << name << " with size " << size << " bytes"; + code << "\n" + << typeName << "* tensor_" << name << " = reinterpret_cast<" << typeName + << "*>(fIntermediateMemoryPool.data() + " << location << ");\n"; + }; + + if (fVerbose) std::cout << "*** AllocateIntermediateMemory: Loop on op output tensors\n"; + // order output tensors by size + std::vector ordered_output_tensors; + + for (auto &it : op_output_tensors) { + auto name = std::string(it); + if (GetTensorType(name) == ETensorType::BOOL || fInitializedTensors.find(name) != fInitializedTensors.end() || + fDynamicTensorInfos.find(name) != fDynamicTensorInfos.end()) + continue; + + auto tensor_size = GetTypeSize(GetTensorType(name)) * ConvertShapeToLength(GetTensorShape(name)); + // important fill the pair in the ordered output tensors with the string view and not the string + TensorMemoryInfo tmi = {it, tensor_size}; + ordered_output_tensors.push_back(tmi); + } + std::sort(ordered_output_tensors.begin(), ordered_output_tensors.end(), + [](const TensorMemoryInfo &a, const TensorMemoryInfo &b) { return a.tensor_size > b.tensor_size; }); - auto tensor_size = GetTypeSize(GetTensorType(std::string(it))) * ConvertShapeToLength(GetTensorShape(std::string(it))); - memory_allocation_string += "\n // Allocating memory for intermediate tensor " + std::string(it) + " with size " + std::to_string(tensor_size) + " bytes"; + for (auto &it : ordered_output_tensors) { + bool allocated = false; + std::string name = std::string{it.tensor_name}; + size_t tensor_size = it.tensor_size; + if (fVerbose) + std::cout << "output tensor " << name << " size " << tensor_size << std::endl; - for (auto chunk = fIntermediateMemoryInfo.available_stack.begin(); chunk != fIntermediateMemoryInfo.available_stack.end(); ) { + for (auto chunk = fIntermediateMemoryInfo.available_stack.begin(); + chunk != fIntermediateMemoryInfo.available_stack.end();) { - // check if available memory chunks can accommodate the tensor - if (chunk->second >= tensor_size) { - auto new_chunk = fIntermediateMemoryInfo.total_stack[chunk->first].split(it, tensor_size); - auto new_chunk_location = chunk->first+chunk->second-tensor_size; - fIntermediateMemoryInfo.total_stack[new_chunk_location] = new_chunk; + if (fVerbose) std::cout << ".. available chunk " << chunk->first << " with size = " << chunk->second; + // check if available memory chunks can accommodate the tensor + if (chunk->second >= tensor_size) { + // need to use here string_view (i.e it.tensor_name) + // split returns the new chunk with size of new tensor. The free chunk is before the used one + auto new_chunk = fIntermediateMemoryInfo.total_stack[chunk->first].split(it.tensor_name, tensor_size); + auto new_chunk_location = chunk->first + chunk->second - tensor_size; + fIntermediateMemoryInfo.total_stack[new_chunk_location] = new_chunk; - memory_allocation_string += "\n" + ConvertTypeToString(GetTensorType(std::string(it))) + - "* tensor_" + std::string(it) + - " = reinterpret_cast<"+ConvertTypeToString(GetTensorType(std::string(it)))+"*>(fIntermediateMemoryPool + " + std::to_string(new_chunk_location) + ");\n"; - chunk->second -= tensor_size; + declareIntermediateTensor(name, tensor_size, new_chunk_location); + chunk->second -= tensor_size; - allocated = true; + allocated = true; - if (chunk->second == 0) { - chunk = fIntermediateMemoryInfo.available_stack.erase(chunk); - } + if (fVerbose) std::cout << " is re-used and split in a new of size " << new_chunk.tensor_size << " at " << new_chunk_location; - break; - } - ++chunk; + if (chunk->second == 0) { + if (fVerbose) std::cout << " and deleted since size matches"; + fIntermediateMemoryInfo.available_stack.erase(chunk); } + if (fVerbose) std::cout << std::endl; + break; + } else if (chunk->first == fIntermediateMemoryInfo.available_stack.rbegin()->first && + fIntermediateMemoryInfo.total_stack.rbegin()->first == chunk->first) { + // case last available chunk is the last in the memory, we can increase that one + fIntermediateMemoryInfo.total_stack[chunk->first] = {it.tensor_name, tensor_size}; + declareIntermediateTensor(name, tensor_size, chunk->first); + fIntermediateMemoryInfo.available_stack.erase(chunk); + allocated = true; + if (fVerbose) std::cout << " is extended with a bigger one of size " << tensor_size << std::endl; + break; + } + ++chunk; + if (fVerbose) std::cout << std::endl; + } - if (!allocated) { - size_t chunk_idx = fIntermediateMemoryInfo.total_stack.empty() - ? 0 - : fIntermediateMemoryInfo.total_stack.rbegin()->first + fIntermediateMemoryInfo.total_stack.rbegin()->second.tensor_size; + if (!allocated) { + size_t chunk_idx = fIntermediateMemoryInfo.total_stack.empty() + ? 0 + : fIntermediateMemoryInfo.total_stack.rbegin()->first + + fIntermediateMemoryInfo.total_stack.rbegin()->second.tensor_size; - fIntermediateMemoryInfo.total_stack[chunk_idx] = - { - it, - tensor_size - }; + fIntermediateMemoryInfo.total_stack[chunk_idx] = it; - memory_allocation_string += "\n"+ConvertTypeToString(GetTensorType(std::string(it)))+"* tensor_"+ std::string(it) + "= reinterpret_cast<"+ConvertTypeToString(GetTensorType(std::string(it)))+"*>(fIntermediateMemoryPool + " + std::to_string(chunk_idx) + ");\n"; - } + declareIntermediateTensor(name, tensor_size, chunk_idx); + + if (fVerbose) std::cout << "no chunk available - add in total stack a new chunk with size of tensor and idx : " << chunk_idx + << std::endl; + } } - return memory_allocation_string; + return code.str(); } void RModel::CheckAndFlushIntermediateMemory(std::span op_input_tensors, const size_t& op_idx){ - for (auto &it : op_input_tensors){ + if (fVerbose) std::cout << "*** CheckAndFlushIntermediateMemory: Loop on input tensors for op " << op_idx << "\n"; + //print available chunks + if (fVerbose) std::cout << "available chunks before freeing them : \n"; + for (auto chunk = fIntermediateMemoryInfo.available_stack.begin(); + chunk != fIntermediateMemoryInfo.available_stack.end(); chunk++) { + if (fVerbose) std::cout << "-- free chunk " << chunk->first << " size = " << chunk->second << std::endl; + } + for (auto &it : op_input_tensors) { // last occurence of the tensor is reached => flush it from memory + if (fVerbose) std::cout << ".. input tensors : " << it; if (fIntermediateTensorFrequencyLookup[it] == op_idx) { + if (fVerbose) std::cout << " flash condition is met - looping on chunks to find matching one \n"; for (auto chunk = fIntermediateMemoryInfo.total_stack.begin(); - chunk != fIntermediateMemoryInfo.total_stack.end(); ++chunk ) { - if (chunk->second.tensor_name == it) { - - // check if nearby chunks in available memory can coalesce - auto first_greater = fIntermediateMemoryInfo.available_stack.upper_bound(chunk->first); // smallest element greater than the flushed chunk idx - auto last_smaller = (first_greater == fIntermediateMemoryInfo.available_stack.begin()) ? fIntermediateMemoryInfo.available_stack.end() : std::prev(first_greater); // largest element smaller than the flushed chunk idx - - // check if the next stack entry is actually adjacent in memory - if (last_smaller->first+last_smaller->second + 1 == chunk->first){ - last_smaller->second += chunk->second.tensor_size; - fIntermediateMemoryInfo.total_stack[last_smaller->first].merge(chunk->second); - - if (last_smaller->first + last_smaller->second + 1 == first_greater->first){ - fIntermediateMemoryInfo.total_stack[last_smaller->first].merge(fIntermediateMemoryInfo.total_stack[first_greater->first]); - first_greater = fIntermediateMemoryInfo.available_stack.erase(first_greater); - } - } else{ - if (chunk->first + chunk->second.tensor_size + 1 == first_greater->first){ - fIntermediateMemoryInfo.total_stack[chunk->first].merge(fIntermediateMemoryInfo.total_stack[first_greater->first]); - first_greater = fIntermediateMemoryInfo.available_stack.erase(first_greater); - } - fIntermediateMemoryInfo.available_stack.insert({ - chunk->first, - chunk->second.tensor_size - }); - } + chunk != fIntermediateMemoryInfo.total_stack.end(); ++chunk) { + if (fVerbose) std::cout << "--- chunk " << chunk->first << " , " << chunk->second.tensor_name << " size " << chunk->second.tensor_size; + if (chunk->second.tensor_name == it) { + if (fVerbose) std::cout << " -- Found chunk corresponding to input tensor: " << chunk->first; + // check if nearby chunks in available memory can coalesce + auto first_greater = fIntermediateMemoryInfo.available_stack.upper_bound( + chunk->first); // smallest element greater than the flushed chunk idx + auto last_smaller = (first_greater == fIntermediateMemoryInfo.available_stack.begin()) + ? fIntermediateMemoryInfo.available_stack.end() + : std::prev(first_greater); // largest element smaller than the flushed chunk idx + + // check if the next stack entry is actually adjacent in memory + + if (last_smaller != fIntermediateMemoryInfo.available_stack.end() && + last_smaller->first + last_smaller->second == chunk->first) { + // merge chunk with previous one + last_smaller->second += chunk->second.tensor_size; + fIntermediateMemoryInfo.total_stack[last_smaller->first].merge(chunk->second); + if (fVerbose) std::cout << " is adjacent in memory with previous one - merge "; + if (first_greater != fIntermediateMemoryInfo.available_stack.end() && + last_smaller->first + last_smaller->second == first_greater->first) { + // merge also with following one + last_smaller->second += first_greater->second; + fIntermediateMemoryInfo.total_stack[last_smaller->first].merge( + fIntermediateMemoryInfo.total_stack[first_greater->first]); + // delete merged one in available stack and in total stack + fIntermediateMemoryInfo.total_stack.erase(first_greater->first); + fIntermediateMemoryInfo.available_stack.erase(first_greater); + if (fVerbose) std::cout << " merge also with following that is free "; + } + fIntermediateMemoryInfo.total_stack.erase(chunk->first); + if (fVerbose) std::cout << std::endl; + break; + } else if (first_greater != fIntermediateMemoryInfo.available_stack.end() && + chunk->first + chunk->second.tensor_size == first_greater->first) { + // merge with first greater + if (fVerbose) std::cout << " is adjacent in memory with following one - merge \n"; + // cannot modify idx of first_greter. Insert a new one and delete previous one + size_t new_size = chunk->second.tensor_size + first_greater->second; + size_t first_greater_idx = first_greater->first; + fIntermediateMemoryInfo.available_stack.erase(first_greater); + // cannot use anymore first_greater + fIntermediateMemoryInfo.available_stack.insert({chunk->first, new_size}); + fIntermediateMemoryInfo.total_stack[chunk->first].merge( + fIntermediateMemoryInfo.total_stack[first_greater_idx]); + fIntermediateMemoryInfo.total_stack.erase(first_greater_idx); + } else { + fIntermediateMemoryInfo.available_stack.insert({chunk->first, chunk->second.tensor_size}); + if (fVerbose) std::cout << " insert in the available stack the chunk with size " << chunk->second.tensor_size << std::endl; } + chunk->second.tensor_name = "free"; + break; + } } + } else { + if (fVerbose) std::cout << std::endl; } } } - - void RModel::Initialize(int batchSize, bool verbose) { std::map inputParams; if (batchSize > 0) { @@ -464,7 +539,7 @@ void RModel::Initialize(const std::map & inputParams, bool auto shape = ConvertShapeToInt(input.second.shape); if (verbose) std::cout << "converting input shape for " << input.first << " " << ConvertShapeToString(shape) << " from " - << ConvertDimShapeToString(input.second.shape) << std::endl; + << ConvertShapeToString(input.second.shape) << std::endl; if (!shape.empty()) { // case shape is defined (not parametric) we add the tensor in the fReadyInputTensorInfos map and // we remove the tensor from the fInputTensorInfo where th eold parametric shape was stored @@ -478,8 +553,12 @@ void RModel::Initialize(const std::map & inputParams, bool else { // store the found parametric shape parameters for (auto &d : input.second.shape) { - if (d.isParam) - fShapeParams[d.param] = std::to_string(d.dim); + if (d.isParam) { + if (fShapeParams.count(d.param) == 0) { + fDimShapeNames.push_back(d.param); + fShapeParams[d.param] = std::to_string(d.dim); + } + } } } } @@ -514,10 +593,11 @@ void RModel::Initialize(const std::map & inputParams, bool } fOperators[op_idx]->Initialize(*this); for(auto &it:fOperators[op_idx]->GetOpOutputTensors()){ + std::string name = std::string{it}; if (fIntermediateTensorFrequencyLookup.find(it) == fIntermediateTensorFrequencyLookup.end() && - std::find(fOutputTensorNames.begin(), fOutputTensorNames.end(), std::string(it)) == fOutputTensorNames.end() && - fInitializedTensors.find(std::string(it)) == fInitializedTensors.end() && - fDynamicTensorInfos.find(std::string(it)) == fDynamicTensorInfos.end()){ + std::find(fOutputTensorNames.begin(), fOutputTensorNames.end(), name) == fOutputTensorNames.end() && + fInitializedTensors.find(name) == fInitializedTensors.end() && + fDynamicTensorInfos.find(name) == fDynamicTensorInfos.end()){ fIntermediateTensorFrequencyLookup[it] = op_idx; } } @@ -600,10 +680,13 @@ void RModel::GenerateInitializedTensorInfo() for (auto &i : fInitializedTensors) { if (!fUseWeightFile || i.second.IsConstantTensor()) { - if (i.second.type() == ETensorType::FLOAT) + if (i.second.type() == ETensorType::FLOAT) { fGC += GenerateConstantTensorCode(i); - else if (i.second.type() == ETensorType::INT64) + fConstantTensorSize += ConvertShapeToLength(i.second.shape()) * 4; + } else if (i.second.type() == ETensorType::INT64) { fGC += GenerateConstantTensorCode(i); + fConstantTensorSize += ConvertShapeToLength(i.second.shape()) * 8; + } } else { // case of tensors which are read from a file @@ -611,43 +694,55 @@ void RModel::GenerateInitializedTensorInfo() if (i.second.type() == ETensorType::FLOAT) { fGC += "std::vector fTensor_" + i.first + " = std::vector(" + std::to_string(length) + ");\n"; fGC += "float * tensor_" + i.first + " = fTensor_" + i.first + ".data();\n"; + fWeightsTensorSize += ConvertShapeToLength(i.second.shape()) * 4; } } } } void RModel::GenerateIntermediateMemoryPool() { - if (fIntermediateMemoryInfo.total_stack.size() == 0) return; + if (fIntermediateMemoryInfo.total_stack.empty()) return; fGC += "\n//--- Allocating session memory pool to be used for allocating intermediate tensors\n"; // char memory block is allocated since char takes 1 byte, thus easier to allocate tensors // of other data types - fGC += "char* fIntermediateMemoryPool = new char[" + std::to_string(fIntermediateMemoryInfo.total_stack.rbegin()->first + fIntermediateMemoryInfo.total_stack.rbegin()->second.tensor_size)+ "];\n\n"; + auto const &totalStack = fIntermediateMemoryInfo.total_stack; + const size_t memPoolSize = totalStack.rbegin()->first + totalStack.rbegin()->second.tensor_size; + fGC += "std::vector fIntermediateMemoryPool = std::vector(" + std::to_string(memPoolSize) + ");\n\n"; } void RModel::GenerateIntermediateTensorInfo() { if (!fIntermediateTensorInfos.empty()) { std::string tensor_declaration_block = ""; - for (auto &i : fIntermediateTensorInfos) { if (i.second.type == ETensorType::BOOL) { tensor_declaration_block += "std::vector fTensor_" + i.first + " = std::vector(" + std::to_string(ConvertShapeToLength(i.second.shape)) + ");\n"; tensor_declaration_block += "std::uint8_t * tensor_" + i.first + " = fTensor_" + i.first + ".data();\n"; + continue; } - if (fIntermediateTensorFrequencyLookup.find(i.first) == fIntermediateTensorFrequencyLookup.end() && std::find(fOutputTensorNames.begin(), fOutputTensorNames.end(), i.first) == fOutputTensorNames.end()) { + bool is_extended = (fOptimizationLevel == OptimizationLevel::kExtended); + bool not_in_freq_map = + (fIntermediateTensorFrequencyLookup.find(i.first) == fIntermediateTensorFrequencyLookup.end()); + bool not_in_output_names = + (std::find(fOutputTensorNames.begin(), fOutputTensorNames.end(), i.first) == fOutputTensorNames.end()); + + if ((not_in_freq_map && not_in_output_names) || (!not_in_freq_map && !is_extended && not_in_output_names)) { size_t length = ConvertShapeToLength(i.second.shape); if (i.second.type == ETensorType::FLOAT) { tensor_declaration_block += "std::vector fTensor_" + i.first + " = std::vector(" + std::to_string(length) + ");\n"; tensor_declaration_block += "float * tensor_" + i.first + " = fTensor_" + i.first + ".data();\n"; + fOtherTensorSize += 4 * length; } else if (i.second.type == ETensorType::DOUBLE) { tensor_declaration_block += "std::vector fTensor_" + i.first + " = std::vector(" + std::to_string(length) + ");\n"; tensor_declaration_block += "double * tensor_" + i.first + " = fTensor_" + i.first + ".data();\n"; + fOtherTensorSize += 8 * length; } else if (i.second.type == ETensorType::INT64) { tensor_declaration_block += "std::vector fTensor_" + i.first + " = std::vector(" + std::to_string(length) + ");\n"; tensor_declaration_block += "int64_t * tensor_" + i.first + " = fTensor_" + i.first + ".data();\n"; + fOtherTensorSize += 8 * length; } } } @@ -686,17 +781,17 @@ void RModel::GenerateOperatorDeclarations() { fGC += "\n"; } -void RModel::GenerateDynamicTensorInfo() { - fGC += "//---- allocate the intermediate dynamic tensors\n"; - std::stringstream out; - for (auto & i: fDynamicTensorInfos) { - auto length = ConvertDynamicShapeToLength(i.second.shape); - out << SP << "if (" << length << " > 0) {\n"; - out << SP << SP << "fTensor_" << i.first << ".resize(" << length << ");\n"; - out << SP << SP << "tensor_" << i.first << " = fTensor_" << i.first << ".data();\n"; - out << SP << "}\n"; - } - fGC += out.str(); +void RModel::GenerateDynamicTensorInfo() +{ + std::stringstream out; + for (auto &i : fDynamicTensorInfos) { + auto length = ConvertDynamicShapeToLength(i.second.shape); + out << SP << "if (" << length << " > 0) {\n"; + out << SP << SP << "fTensor_" << i.first << ".resize(" << length << ");\n"; + out << SP << SP << "tensor_" << i.first << " = fTensor_" << i.first << ".data();\n"; + out << SP << "}\n"; + } + fGC += out.str(); } std::string RModel::GenerateInferSignature(bool isdecl) { @@ -724,7 +819,7 @@ std::string RModel::GenerateInferSignature(bool isdecl) { if (type == "other") throw std::runtime_error("TMVA-SOFIE: input tensor " + name + " is of a data type which is not yet supported."); - rGC += type + "* "; + rGC += type + " const* "; } rGC += "tensor_" + name + ","; i_input++; @@ -935,7 +1030,7 @@ void RModel::GenerateSessionCode() } fGC += SP + "using SOFIE::UTILITY::FillOutput;\n\n"; - + for (std::string const &name : fOutputTensorNames) { // need to check is size is the same (don't want to return a vector with // larger size) in that case better to copy @@ -1029,8 +1124,7 @@ void RModel::ReadInitializedTensorsFromFile(long pos) { fGC += " f.seekg(" + std::to_string(pos) + ");\n"; } - fGC += " std::string tensor_name;\n"; - fGC += " size_t length;\n"; + fGC += " using SOFIE::ReadTensorFromStream;\n"; // loop on tensors and parse the file for (auto& i: fInitializedTensors) { @@ -1038,25 +1132,8 @@ void RModel::ReadInitializedTensorsFromFile(long pos) { if (!i.second.IsWeightTensor()) continue; std::string tensor_name = "tensor_" + i.first; if (i.second.type() == ETensorType::FLOAT) { - size_t length = 1; - length = ConvertShapeToLength(i.second.shape()); - std::string slength = std::to_string(length); - fGC += " f >> tensor_name >> length;\n"; - fGC += " if (tensor_name != \"" + tensor_name + "\" ) {\n"; - fGC += " std::string err_msg = \"TMVA-SOFIE failed to read the correct tensor name; expected name is " + - tensor_name + " , read \" + tensor_name;\n"; - fGC += " throw std::runtime_error(err_msg);\n"; - fGC += " }\n"; - fGC += " if (length != " + slength + ") {\n"; - fGC += " std::string err_msg = \"TMVA-SOFIE failed to read the correct tensor size; expected size is " + - slength + " , read \" + std::to_string(length) ;\n"; - fGC += " throw std::runtime_error(err_msg);\n"; - fGC += " }\n"; - fGC += " for (size_t i = 0; i < length; ++i)\n"; - fGC += " f >> " + tensor_name + "[i];\n"; - fGC += " if (f.fail()) {\n"; - fGC += " throw std::runtime_error(\"TMVA-SOFIE failed to read the values for tensor " + tensor_name + "\");\n"; - fGC += " }\n"; + std::string length = std::to_string(ConvertShapeToLength(i.second.shape())); + fGC += " ReadTensorFromStream(f, " + tensor_name + ", \"" + tensor_name + "\", " + length + ");\n"; } else { std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be read from a file"); } @@ -1066,6 +1143,7 @@ void RModel::ReadInitializedTensorsFromFile(long pos) { // generate the code to read initialized tensors from a ROOT data file if(fWeightFile == WeightFileType::RootBinary) { +#ifdef SOFIE_SUPPORT_ROOT_BINARY fGC += " {\n"; fGC += " std::unique_ptr rootFile(TFile::Open(filename.c_str(), \"READ\"));\n"; fGC += " if (!rootFile->IsOpen()) {\n"; @@ -1097,6 +1175,9 @@ void RModel::ReadInitializedTensorsFromFile(long pos) { fGC += " }\n"; } fGC += " }\n"; +#else + throw std::runtime_error("SOFIE was not built with ROOT file support."); +#endif // SOFIE_SUPPORT_ROOT_BINARY } } @@ -1122,6 +1203,7 @@ long RModel::WriteInitializedTensorsToFile(std::string filename) { // Write the initialized tensors to the file if (fWeightFile == WeightFileType::RootBinary) { +#ifdef SOFIE_SUPPORT_ROOT_BINARY if(fIsGNNComponent || fIsGNN) { throw std::runtime_error("SOFIE-GNN yet not supports writing to a ROOT file."); } @@ -1165,6 +1247,9 @@ long RModel::WriteInitializedTensorsToFile(std::string filename) { // this needs to be changed, similar to the text file return -1; +#else + throw std::runtime_error("SOFIE was not built with ROOT file support."); +#endif // SOFIE_SUPPORT_ROOT_BINARY } else if (fWeightFile == WeightFileType::Text) { std::ofstream f; if(fIsGNNComponent) { @@ -1291,9 +1376,9 @@ void RModel::PrintOutputTensors() { for (auto& it: fOutputTensorNames) { std::cout << "Tensor name: \"" << it << "\"\t"; if (!IsDynamicTensor(it)) - std::cout << "shape: " << ConvertShapeToString(GetTensorShape(it)) << std::endl; - else - std::cout << "shape: " << ConvertDimShapeToString(GetDynamicTensorShape(it)) << std::endl; + std::cout << "shape: " << ConvertShapeToString(GetTensorShape(it)) << std::endl; + else + std::cout << "shape: " << ConvertShapeToString(GetDynamicTensorShape(it)) << std::endl; } std::cout << "\n"; } @@ -1359,13 +1444,13 @@ void RModel::OutputGenerated(std::string filename, bool append) { void RModel::Streamer(TBuffer &R__b) { if (R__b.IsReading()) { RModel::Class()->ReadBuffer(R__b, this); - for(auto i=RModel::fInitializedTensors.begin(); i!=RModel::fInitializedTensors.end(); ++i) { - i->second.CastPersistentToShared(); + for (auto & i : fInitializedTensors) { + i.second.CastPersistentToShared(); } } else { - for(auto i=RModel::fInitializedTensors.begin(); i!=RModel::fInitializedTensors.end(); ++i) { - i->second.CastSharedToPersistent(); + for (auto & i : fInitializedTensors) { + i.second.CastSharedToPersistent(); } RModel::Class()->WriteBuffer(R__b, this); } From 20167943d59a3bf7b138fb8391c8eb65b704133e Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Mon, 24 Nov 2025 16:26:31 +0100 Subject: [PATCH 15/43] fix: linking issue because of incorrect symbols --- src/SOFIE_core/CMakeLists.txt | 7 +- src/SOFIE_core/inc/SOFIE/RFunction.hxx | 1 + src/SOFIE_core/inc/SOFIE/RModel_Base.hxx | 1 - src/SOFIE_core/inc/SOFIE/ROperator.hxx | 6 +- .../inc/SOFIE/ROperator_BasicBinary.hxx | 14 +-- src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx | 6 +- src/SOFIE_core/inc/SOFIE/ROperator_GRU.hxx | 4 +- src/SOFIE_core/inc/SOFIE/ROperator_GRU.icc | 4 +- src/SOFIE_core/inc/SOFIE/ROperator_LSTM.icc | 4 +- .../SOFIE/ROperator_LayerNormalization.hxx | 4 +- src/SOFIE_core/inc/SOFIE/ROperator_RNN.icc | 4 +- src/SOFIE_core/inc/SOFIE/ROperator_Range.hxx | 6 +- .../inc/SOFIE/ROperator_Reshape.hxx | 4 +- src/SOFIE_core/inc/SOFIE/ROperator_Where.hxx | 6 +- src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx | 6 +- src/SOFIE_core/src/RModel.cxx | 37 -------- src/SOFIE_core/test/CMakeLists.txt | 88 ++++++++++++++----- src/SOFIE_core/test/EmitFromONNX.cxx.in | 2 +- .../test/EmitFromONNX_GPU_ALPAKA.cxx.in | 11 ++- src/SOFIE_parsers/CMakeLists.txt | 9 ++ 20 files changed, 126 insertions(+), 98 deletions(-) diff --git a/src/SOFIE_core/CMakeLists.txt b/src/SOFIE_core/CMakeLists.txt index de13b58..ac7499d 100644 --- a/src/SOFIE_core/CMakeLists.txt +++ b/src/SOFIE_core/CMakeLists.txt @@ -94,12 +94,17 @@ target_link_libraries(SOFIE_core PUBLIC RIO ) -ROOT_GENERATE_DICTIONARY(G__SOFIE ${sources_headers} +ROOT_GENERATE_DICTIONARY(G__SOFIE_core ${sources_headers} LINKDEF inc/LinkDef.h MODULE SOFIE_core OPTIONS --deep ) +# Install the dictionaries. +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libSOFIE_core_rdict.pcm + ${CMAKE_CURRENT_BINARY_DIR}/libSOFIE_core.rootmap + DESTINATION lib) + install(TARGETS SOFIE_core LIBRARY DESTINATION lib ) diff --git a/src/SOFIE_core/inc/SOFIE/RFunction.hxx b/src/SOFIE_core/inc/SOFIE/RFunction.hxx index 53c30e3..f79691a 100644 --- a/src/SOFIE_core/inc/SOFIE/RFunction.hxx +++ b/src/SOFIE_core/inc/SOFIE/RFunction.hxx @@ -3,6 +3,7 @@ #include "SOFIE/RModel_Base.hxx" #include "SOFIE/SOFIE_common.hxx" +#include "SOFIE/ROperator.hxx" #include #include diff --git a/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx b/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx index 460372a..deac58b 100644 --- a/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx +++ b/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx @@ -12,7 +12,6 @@ #include #include #include "SOFIE/SOFIE_common.hxx" -#include "SOFIE/ROperator.hxx" #include "TBuffer.h" diff --git a/src/SOFIE_core/inc/SOFIE/ROperator.hxx b/src/SOFIE_core/inc/SOFIE/ROperator.hxx index 9bccc5b..17b62f6 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator.hxx @@ -1,5 +1,5 @@ -#ifndef TMVA_SOFIE_ROPERATOR -#define TMVA_SOFIE_ROPERATOR +#ifndef SOFIE_ROPERATOR +#define SOFIE_ROPERATOR #include #include @@ -103,4 +103,4 @@ public: }//SOFIE -#endif //TMVA_SOFIE_OPERATOR +#endif //SOFIE_OPERATOR diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx index da7cf63..80f35be 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx @@ -1,5 +1,5 @@ -#ifndef TMVA_SOFIE_ROperator_BasicBinary -#define TMVA_SOFIE_ROperator_BasicBinary +#ifndef SOFIE_SOFIE_ROperator_BasicBinary +#define SOFIE_SOFIE_ROperator_BasicBinary #include "SOFIE/SOFIE_common.hxx" #include "SOFIE/ROperator.hxx" @@ -402,7 +402,7 @@ public: } op.pop_back(); op += "));\n"; - + std::cout<<"okay till here 1\n"; op += SP + SP + SP + SP + "for (auto const& elem : elements) {\n"; auto stridesA = UTILITY::ComputeStrideFromShape(fDimShapeA); @@ -426,6 +426,7 @@ public: for (int j = 0; j < 3; j++) compute_idx_A.pop_back(); } + std::cout<<"okay till here 2\n"; if (fDimShapeB.empty() || std::all_of(fDimShapeB.begin(), fDimShapeB.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) { compute_idx_B = "0"; @@ -442,6 +443,7 @@ public: for (int j = 0; j < 3; j++) compute_idx_B.pop_back(); } + std::cout<<"okay till here 3\n"; int nloop = 0; if (fDimShapeY.empty() || std::all_of(fDimShapeY.begin(), fDimShapeY.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) { @@ -461,16 +463,18 @@ public: for (int j = 0; j < 3; j++) compute_idx_Y.pop_back(); } + std::cout<<"okay till here 4\n"; for (int j = 0; j < nloop + 1; j++) op += SP; op += "C[" + compute_idx_Y + "] = " + BinaryOperatorTrait::Op("A[" + compute_idx_A + "]", "B[" + compute_idx_B + "]") + " ;\n"; - + std::cout<<"okay till here 5\n"; for (int i = nloop; i > 0; i--) { for (int j = 0; j < i; j++) op += SP; op += "}\n"; } + return op; } std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string OpName) { @@ -503,4 +507,4 @@ public: } // namespace SOFIE -#endif // TMVA_SOFIE_ROperator_BasicBinary +#endif // SOFIE_ROperator_BasicBinary diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx index 3ef0ee4..c828668 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx @@ -1,5 +1,5 @@ -#ifndef TMVA_SOFIE_ROPERATOR_Concat - #define TMVA_SOFIE_ROPERATOR_Concat +#ifndef SOFIE_ROPERATOR_Concat +#define SOFIE_ROPERATOR_Concat #include "SOFIE/SOFIE_common.hxx" @@ -320,4 +320,4 @@ }//SOFIE - #endif //TMVA_SOFIE_ROPERATOR_CONCAT \ No newline at end of file + #endif //SOFIE_ROPERATOR_CONCAT \ No newline at end of file diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_GRU.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_GRU.hxx index f9998e1..5b553ff 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_GRU.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_GRU.hxx @@ -1,5 +1,5 @@ -#ifndef TMVA_SOFIE_ROPERATOR_GRU -#define TMVA_SOFIE_ROPERATOR_GRU +#ifndef SOFIE_ROPERATOR_GRU +#define SOFIE_ROPERATOR_GRU #include "SOFIE/RModel.hxx" #include "SOFIE/ROperator.hxx" diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_GRU.icc b/src/SOFIE_core/inc/SOFIE/ROperator_GRU.icc index d011617..38030d1 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_GRU.icc +++ b/src/SOFIE_core/inc/SOFIE/ROperator_GRU.icc @@ -1,5 +1,5 @@ -#ifndef TMVA_SOFIE_ROPERATOR_GRU_I -#define TMVA_SOFIE_ROPERATOR_GRU_I +#ifndef SOFIE_ROPERATOR_GRU_I +#define SOFIE_ROPERATOR_GRU_I namespace SOFIE { diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_LSTM.icc b/src/SOFIE_core/inc/SOFIE/ROperator_LSTM.icc index 9d31b7f..ebf4daf 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_LSTM.icc +++ b/src/SOFIE_core/inc/SOFIE/ROperator_LSTM.icc @@ -1,5 +1,5 @@ -#ifndef TMVA_SOFIE_ROPERATOR_LSTM_I -#define TMVA_SOFIE_ROPERATOR_LSTM_I +#ifndef SOFIE_ROPERATOR_LSTM_I +#define SOFIE_ROPERATOR_LSTM_I namespace SOFIE { diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_LayerNormalization.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_LayerNormalization.hxx index e6c4c99..4a328de 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_LayerNormalization.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_LayerNormalization.hxx @@ -1,5 +1,5 @@ -#ifndef TMVA_SOFIE_ROPERATOR_LAYERNORMALIZATION -#define TMVA_SOFIE_ROPERATOR_LAYERNORMALIZATION +#ifndef SOFIE_ROPERATOR_LAYERNORMALIZATION +#define SOFIE_ROPERATOR_LAYERNORMALIZATION #include "SOFIE/RModel.hxx" #include "SOFIE/SOFIE_common.hxx" diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_RNN.icc b/src/SOFIE_core/inc/SOFIE/ROperator_RNN.icc index 08dc3dc..c10c2a5 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_RNN.icc +++ b/src/SOFIE_core/inc/SOFIE/ROperator_RNN.icc @@ -1,5 +1,5 @@ -#ifndef TMVA_SOFIE_ROPERATOR_RNN_I -#define TMVA_SOFIE_ROPERATOR_RNN_I +#ifndef SOFIE_ROPERATOR_RNN_I +#define SOFIE_ROPERATOR_RNN_I namespace SOFIE { diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Range.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Range.hxx index 0930a0b..3e8605e 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Range.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Range.hxx @@ -1,5 +1,5 @@ -#ifndef TMVA_SOFIE_ROPERATOR_RANGE -#define TMVA_SOFIE_ROPERATOR_RANGE +#ifndef SOFIE_ROPERATOR_RANGE +#define SOFIE_ROPERATOR_RANGE #include "SOFIE/SOFIE_common.hxx" #include "SOFIE/ROperator.hxx" @@ -121,4 +121,4 @@ public: }//SOFIE -#endif //TMVA_SOFIE_ROPERATOR_RANGE +#endif //SOFIE_ROPERATOR_RANGE diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Reshape.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Reshape.hxx index a014547..0a21709 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Reshape.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Reshape.hxx @@ -1,5 +1,5 @@ -#ifndef TMVA_SOFIE_ROPERATOR_RESHAPE -#define TMVA_SOFIE_ROPERATOR_RESHAPE +#ifndef SOFIE_ROPERATOR_RESHAPE +#define SOFIE_ROPERATOR_RESHAPE #include "SOFIE/SOFIE_common.hxx" #include "SOFIE/ROperator.hxx" diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Where.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Where.hxx index 15906e5..19d217d 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Where.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Where.hxx @@ -1,5 +1,5 @@ -#ifndef TMVA_SOFIE_ROperator_Where -#define TMVA_SOFIE_ROperator_Where +#ifndef SOFIE_ROperator_Where +#define SOFIE_ROperator_Where #include "SOFIE/SOFIE_common.hxx" #include "SOFIE/ROperator.hxx" @@ -292,4 +292,4 @@ public: }//SOFIE -#endif //TMVA_SOFIE_ROperator_Where +#endif // SOFIE_ROperator_Where diff --git a/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx b/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx index 17ac714..c120570 100644 --- a/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx +++ b/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx @@ -1,5 +1,5 @@ -#ifndef TMVA_SOFIE_SOFIE_COMMON -#define TMVA_SOFIE_SOFIE_COMMON +#ifndef SOFIE_SOFIE_COMMON +#define SOFIE_SOFIE_COMMON #include "TMVA/RTensor.hxx" @@ -806,4 +806,4 @@ void ReadTensorFromStream(std::istream &is, T &target, std::string const &expect } // namespace SOFIE -#endif //TMVA_SOFIE_COMMON +#endif //SOFIE_COMMON diff --git a/src/SOFIE_core/src/RModel.cxx b/src/SOFIE_core/src/RModel.cxx index b2d8625..fb23c11 100644 --- a/src/SOFIE_core/src/RModel.cxx +++ b/src/SOFIE_core/src/RModel.cxx @@ -636,43 +636,6 @@ void RModel::InitializeSubGraph(std::shared_ptr graph) { } -// Function to generate the code for declaring and initializing constant tensors -// This is for tensors which are not part of weight files and can be created from the Constant operator -template -std::string GenerateConstantTensorCode(const std::pair &t) -{ - std::stringstream strs; - std::string type = ConvertTypeToString(t.second.type()); - size_t length = ConvertShapeToLength(t.second.shape()); - // avoid using stack sizes for constant tensors to reduce compilation time - bool allocateOnStack = (length > 100) ? false : true; - - const T *data = t.second.data(); - - // and check if all values are the same - bool sameData = false; - // for non stack allocation check if data are the same - if (!allocateOnStack && length > 1) { - size_t idx = 1; - do { - sameData = (data[idx] == data[idx - 1]); - idx++; - } while (sameData && idx < length); - } - if (allocateOnStack) { - strs << type << " tensor_" << t.first << "[" << length << "] = " << ConvertValuesToString(length, data) << ";\n"; - } else { - strs << "std::vector<" << type << "> fTensor_" << t.first << " = "; - if (sameData) - strs << "std::vector<" << type << ">(" << length << ", " << ConvertValToString(data[0]) << ");\n"; - else { - strs << ConvertValuesToString(length, data) << ";\n"; - } - strs << "const " << type << " * tensor_" + t.first + " = fTensor_" + t.first + ".data();\n"; - } - return strs.str(); -} - void RModel::GenerateInitializedTensorInfo() { if (!fInitializedTensors.empty()) diff --git a/src/SOFIE_core/test/CMakeLists.txt b/src/SOFIE_core/test/CMakeLists.txt index e4713b6..1ba5dfd 100644 --- a/src/SOFIE_core/test/CMakeLists.txt +++ b/src/SOFIE_core/test/CMakeLists.txt @@ -16,32 +16,54 @@ if (NOT ONNX_MODELS_DIR) set(ONNX_MODELS_DIR input_models) endif() -# Finding .onnx files to be parsed and creating the appropriate code to -# parse all file. It is much faster to combine all parsing in a single executable -# which will avoid initialization time (especially when using ROOT) -set(CAPTURE_STR "EmitModel( \"@1\", \"@2\");") +# String template used to produce calls to EmitModel(...) per file. +set(CAPTURE_STR +"try {\n\ + EmitModel(\"@1\", \"@2\");\n\ +} catch (const std::exception& e) {\n\ + std::cerr << \"[ERROR] Failed processing @1: \" << e.what() << std::endl;\n\ + failures++;\n\ +} catch (...) {\n\ + std::cerr << \"[ERROR] Unknown failure processing @1\" << std::endl;\n\ + failures++;\n\ +}\n\ +") +# --- Collect ONNX files and build ALL_CAPTURES BEFORE any configure_file() --- set(ALL_CAPTURES "") -# Finding .onnx files to be parsed and creating the appropriate command file(GLOB ONNX_FILES "${ONNX_MODELS_DIR}/*.onnx") + +# If there are no models, ONNX_FILES will be empty and ALL_CAPTURES stays empty. foreach(onnx_file ${ONNX_FILES}) get_filename_component(fname ${onnx_file} NAME_WE) get_filename_component(fdir ${onnx_file} DIRECTORY) - string(REPLACE "@1" ${onnx_file} cap ${CAPTURE_STR}) - string(REPLACE "@2" ${fname} cap ${cap}) + + string(REPLACE "@1" "${onnx_file}" cap "${CAPTURE_STR}") + string(REPLACE "@2" "${fname}" cap "${cap}") list(APPEND ALL_CAPTURES ${cap}) endforeach() -string(REPLACE ";" ";\n" EMIT_CAPTURES "${ALL_CAPTURES}") + +# Now generate the combined source files for CPU, ROOT and ALPAKA +# They will have @EMIT_CAPTURES@ substituted with the contents of ALL_CAPTURES +# (CMake configure_file uses variables via @VAR@ when @ONLY is provided). +# To make ALL_CAPTURES visible to configure_file we set a temporary variable +# that configure_file can reference directly. +set(EMIT_CAPTURES "${ALL_CAPTURES}") + +# Note: the .in templates must use @EMIT_CAPTURES@ placeholder. configure_file(EmitFromONNX.cxx.in EmitFromONNX_all.cxx @ONLY) configure_file(EmitFromRoot.cxx.in EmitFromRoot_all.cxx @ONLY) configure_file(EmitFromONNX_GPU_ALPAKA.cxx.in EmitFromONNX_GPU_ALPAKA_all.cxx @ONLY) +# --- CPU emitter executable and test (unchanged) --- ROOTTEST_GENERATE_EXECUTABLE(emitFromONNX EmitFromONNX_all.cxx LIBRARIES protobuf::libprotobuf SOFIE_core SOFIE_parsers - FIXTURES_SETUP sofie-compile-models-onnx-build) + FIXTURES_SETUP sofie-compile-models-onnx-build) # silence protobuf warnings seen in version 3.0 and 3.6. Not needed from protobuf version 3.17 target_compile_options(emitFromONNX PRIVATE -Wno-unused-parameter -Wno-array-bounds) +# IMPORTANT: ROOTTEST_ADD_TEST below used to expand ${onnx_file}/${fname} which are loop vars. +# We keep it as a single-test wrapper; per-file test invocation is added later for ALPAKA. ROOTTEST_ADD_TEST(SofieCompileModels_ONNX COMMAND ${CMAKE_COMMAND} -E env ROOTIGNOREPREFIX=1 ./emitFromONNX ${onnx_file} ${CMAKE_CURRENT_BINARY_DIR}/${fname} FIXTURES_REQUIRED sofie-compile-models-onnx-build @@ -69,7 +91,6 @@ if (BLAS_FOUND) # we need BLAS for compiling the models endif() # For testing serialisation of RModel object - ROOTTEST_GENERATE_EXECUTABLE(emitFromROOT EmitFromRoot_all.cxx LIBRARIES protobuf::libprotobuf RIO SOFIE_core SOFIE_parsers FIXTURES_SETUP sofie-compile-models-onnx-root @@ -132,15 +153,38 @@ ROOT_EXECUTABLE(EmitGraphIndependent GNN/EmitGraphIndependent.cxx LIBRARIES SOFI ROOT_ADD_TEST(tmva-sofie-EmitGraphIndependent COMMAND EmitGraphIndependent) # Generating inference code for heterogeneous testing using ALPAKA -# ROOTTEST_GENERATE_EXECUTABLE(emitFromONNXAlpaka EmitFromONNX_GPU_ALPAKA_all.cxx -# LIBRARIES protobuf::libprotobuf SOFIE_core SOFIE_parsers -# FIXTURES_SETUP sofie-compile-models-onnx-alpaka-build) - -# # silence protobuf warnings seen in version 3.0 and 3.6. Not needed from protobuf version 3.17 -# target_compile_options(emitFromONNXAlpaka PRIVATE -Wno-unused-parameter -Wno-array-bounds) - -# ROOTTEST_ADD_TEST(SofieCompileModels_ONNX_Alpaka -# COMMAND ${CMAKE_COMMAND} -E env ROOTIGNOREPREFIX=1 ./emitFromONNXAlpaka ${onnx_file} ${CMAKE_CURRENT_BINARY_DIR}/${fname} -# FIXTURES_REQUIRED sofie-compile-models-onnx-alpaka-build -# FIXTURES_SETUP sofie-compile-models-onnx-alpaka -# ) +ROOTTEST_GENERATE_EXECUTABLE(emitFromONNXAlpaka EmitFromONNX_GPU_ALPAKA_all.cxx + LIBRARIES protobuf::libprotobuf SOFIE_core SOFIE_parsers + FIXTURES_SETUP sofie-compile-models-onnx-alpaka-build) + +# silence protobuf warnings seen in version 3.0 and 3.6. Not needed from protobuf version 3.17 +target_compile_options(emitFromONNXAlpaka PRIVATE -Wno-unused-parameter -Wno-array-bounds) + +# Add explicit per-file post-build runs of the alpaka emitter executable so that +# EmitFromONNXAlpaka is invoked for each detected .onnx file during the build. +# This avoids relying on a single ${onnx_file}/${fname} value that would +# otherwise expand only to the last entry when used outside the loop. + +if (ONNX_FILES) + foreach(onnx_file ${ONNX_FILES}) + get_filename_component(fname ${onnx_file} NAME_WE) + + # Create a post-build command attached to the emitFromONNXAlpaka target that + # will run the built binary with the current onnx file and the chosen outname. + add_custom_command(TARGET emitFromONNXAlpaka + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E env ROOTIGNOREPREFIX=1 $ "${onnx_file}" "${CMAKE_CURRENT_BINARY_DIR}/${fname}" + COMMENT "Running ALPAKA emitter on ${onnx_file}" + ) + endforeach() +endif() + +# Also add a ROOTTEST wrapper so that ctest can run one of the invocations if desired. +# (This remains mostly for CI / test harness compatibility.) +ROOTTEST_ADD_TEST(SofieCompileModels_ONNX_Alpaka + COMMAND ${CMAKE_COMMAND} -E env ROOTIGNOREPREFIX=1 ./emitFromONNXAlpaka ${ONNX_FILES} + FIXTURES_REQUIRED sofie-compile-models-onnx-alpaka-build + FIXTURES_SETUP sofie-compile-models-onnx-alpaka +) + +# End of CMakeLists.txt diff --git a/src/SOFIE_core/test/EmitFromONNX.cxx.in b/src/SOFIE_core/test/EmitFromONNX.cxx.in index f7a56e2..a45af49 100644 --- a/src/SOFIE_core/test/EmitFromONNX.cxx.in +++ b/src/SOFIE_core/test/EmitFromONNX.cxx.in @@ -23,7 +23,7 @@ int EmitModel(std::string filename, std::string outname) { int main(int argc, char *argv[]){ -@EMIT_CAPTURES@ ; +@EMIT_CAPTURES@ } diff --git a/src/SOFIE_core/test/EmitFromONNX_GPU_ALPAKA.cxx.in b/src/SOFIE_core/test/EmitFromONNX_GPU_ALPAKA.cxx.in index e2250e6..10619a5 100644 --- a/src/SOFIE_core/test/EmitFromONNX_GPU_ALPAKA.cxx.in +++ b/src/SOFIE_core/test/EmitFromONNX_GPU_ALPAKA.cxx.in @@ -11,14 +11,17 @@ int EmitModel(std::string filename, std::string outname) { RModelParser_ONNX parser; RModel model = parser.Parse(filename); model.GenerateGPU_ALPAKA(); - model.OutputGenerated(outname+"_FromONNX.hxx"); + model.OutputGenerated(outname+"_FromONNX_GPU_ALPAKA.hxx"); return 0; } -int main(int argc, char *argv[]){ +int main(int argc, char *argv[]) { -@EMIT_CAPTURES@ ; + int failures = 0; -} +@EMIT_CAPTURES@ + std::cout << "[SUMMARY] Completed with " << failures << " failures" << std::endl; + return failures == 0 ? 0 : 1; +} diff --git a/src/SOFIE_parsers/CMakeLists.txt b/src/SOFIE_parsers/CMakeLists.txt index 379b7d7..d77d1e6 100644 --- a/src/SOFIE_parsers/CMakeLists.txt +++ b/src/SOFIE_parsers/CMakeLists.txt @@ -102,6 +102,15 @@ target_include_directories(SOFIE_parsers PUBLIC set_target_properties(SOFIE_parsers PROPERTIES POSITION_INDEPENDENT_CODE TRUE) + ROOT_GENERATE_DICTIONARY(G__SOFIE_parsers ${sources_headers} + LINKDEF inc/LinkDef.h + MODULE SOFIE_parsers + OPTIONS --deep +) +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libSOFIE_parsers_rdict.pcm + ${CMAKE_CURRENT_BINARY_DIR}/libSOFIE_parsers.rootmap + DESTINATION lib) + install(TARGETS SOFIE_parsers LIBRARY DESTINATION lib ) From f35d9d94cc14872488ec6f40625c1f0e35ea70ab Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Thu, 27 Nov 2025 11:48:25 +0100 Subject: [PATCH 16/43] fix: cmake script for tests --- .vscode/settings.json | 3 ++- src/SOFIE_core/src/RModel_ALPAKA.cxx | 4 ++-- src/SOFIE_core/test/CMakeLists.txt | 2 +- src/SOFIE_core/test/EmitFromONNX.cxx.in | 8 +++++++- src/SOFIE_core/test/EmitFromONNX_GPU_ALPAKA.cxx.in | 2 +- 5 files changed, 13 insertions(+), 6 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index fd03126..182ccd4 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -75,6 +75,7 @@ "format": "cpp", "any": "cpp", "source_location": "cpp", - "run_inference_particle_net.C": "cpp" + "run_inference_particle_net.C": "cpp", + "test.C": "cpp" } } diff --git a/src/SOFIE_core/src/RModel_ALPAKA.cxx b/src/SOFIE_core/src/RModel_ALPAKA.cxx index 066d4e8..2da0e1f 100644 --- a/src/SOFIE_core/src/RModel_ALPAKA.cxx +++ b/src/SOFIE_core/src/RModel_ALPAKA.cxx @@ -266,11 +266,11 @@ void RModel::GenerateSessionCode_GPU_ALPAKA() { for (size_t id = 0; id < fOperators.size(); id++) { fGC += fOperators[id]->GenerateInitCode_GPU_ALPAKA(); if (fOperators[id]->GetKind() == OperatorKind::GEMM){ - fGC += "\nblas.AddLayoutConfig("+fOperators[id]->GetBlasConfig()+");"; + fGC += "\nblas.AddLayoutConfig("+fOperators[id]->GetBlasConfig()+");\n"; } } - fGC += "alpaka::wait(queue);\n"; + fGC += "\nalpaka::wait(queue);\n"; fGC += "}\n\n"; } diff --git a/src/SOFIE_core/test/CMakeLists.txt b/src/SOFIE_core/test/CMakeLists.txt index 1ba5dfd..c5e0d8f 100644 --- a/src/SOFIE_core/test/CMakeLists.txt +++ b/src/SOFIE_core/test/CMakeLists.txt @@ -39,7 +39,7 @@ foreach(onnx_file ${ONNX_FILES}) string(REPLACE "@1" "${onnx_file}" cap "${CAPTURE_STR}") string(REPLACE "@2" "${fname}" cap "${cap}") - list(APPEND ALL_CAPTURES ${cap}) + string(APPEND ALL_CAPTURES "${cap}") endforeach() # Now generate the combined source files for CPU, ROOT and ALPAKA diff --git a/src/SOFIE_core/test/EmitFromONNX.cxx.in b/src/SOFIE_core/test/EmitFromONNX.cxx.in index a45af49..1433ba6 100644 --- a/src/SOFIE_core/test/EmitFromONNX.cxx.in +++ b/src/SOFIE_core/test/EmitFromONNX.cxx.in @@ -23,7 +23,13 @@ int EmitModel(std::string filename, std::string outname) { int main(int argc, char *argv[]){ -@EMIT_CAPTURES@ + + int failures = 0; + + @EMIT_CAPTURES@ + + std::cout << "[SUMMARY] Completed with " << failures << " failures" << std::endl; + return failures == 0 ? 0 : 1; } diff --git a/src/SOFIE_core/test/EmitFromONNX_GPU_ALPAKA.cxx.in b/src/SOFIE_core/test/EmitFromONNX_GPU_ALPAKA.cxx.in index 10619a5..0d51e92 100644 --- a/src/SOFIE_core/test/EmitFromONNX_GPU_ALPAKA.cxx.in +++ b/src/SOFIE_core/test/EmitFromONNX_GPU_ALPAKA.cxx.in @@ -20,7 +20,7 @@ int main(int argc, char *argv[]) { int failures = 0; -@EMIT_CAPTURES@ + @EMIT_CAPTURES@ std::cout << "[SUMMARY] Completed with " << failures << " failures" << std::endl; return failures == 0 ? 0 : 1; From cdc6a9f8865b4eb7d43bff80dda8277391de33ad Mon Sep 17 00:00:00 2001 From: Saransh Chopra Date: Tue, 2 Dec 2025 16:11:53 +0100 Subject: [PATCH 17/43] fix: define failures in EmitFromRoot.cxx.in (#6) --- src/SOFIE_core/test/EmitFromRoot.cxx.in | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/SOFIE_core/test/EmitFromRoot.cxx.in b/src/SOFIE_core/test/EmitFromRoot.cxx.in index 4a630c7..b08d17b 100644 --- a/src/SOFIE_core/test/EmitFromRoot.cxx.in +++ b/src/SOFIE_core/test/EmitFromRoot.cxx.in @@ -43,6 +43,10 @@ int EmitModel(std::string inputfile, std::string outname){ int main(int argc, char *argv[]){ -@EMIT_CAPTURES@ ; + int failures = 0; + @EMIT_CAPTURES@; + + std::cout << "[SUMMARY] Completed with " << failures << " failures" << std::endl; + return failures == 0 ? 0 : 1; } From 3ffbe4605c46397e353505f1b3027e53908ee09f Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Sun, 14 Dec 2025 23:41:56 +0100 Subject: [PATCH 18/43] fix: layout inconsistencies in alpaka code generation --- src/CMakeLists.txt | 1 + src/SOFIE_core/CMakeLists.txt | 1 + src/SOFIE_core/README.md | 3 - src/SOFIE_core/inc/SOFIE/RModel.hxx | 4 + src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx | 28 +- src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx | 33 +- src/SOFIE_core/src/RModel_ALPAKA.cxx | 38 +- src/SOFIE_core/src/SOFIE_common.cxx | 10 +- src/SOFIE_core/test/EmitFromONNX.cxx.in | 2 +- .../test/EmitFromONNX_GPU_ALPAKA.cxx.in | 2 +- src/SOFIE_core/test/EmitFromRoot.cxx.in | 7 +- src/utils/CMakeLists.txt | 11 + src/utils/SOFIE/RTensor.hxx | 628 ++++++++++++++++++ 13 files changed, 726 insertions(+), 42 deletions(-) create mode 100644 src/utils/CMakeLists.txt create mode 100644 src/utils/SOFIE/RTensor.hxx diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c48e8d1..102ca3b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -8,3 +8,4 @@ set(sofie_legacy_eval_backend ON CACHE BOOL "" FORCE) add_subdirectory(SOFIE_core) add_subdirectory(SOFIE_parsers) +add_subdirectory(utils) diff --git a/src/SOFIE_core/CMakeLists.txt b/src/SOFIE_core/CMakeLists.txt index ac7499d..4cab8e0 100644 --- a/src/SOFIE_core/CMakeLists.txt +++ b/src/SOFIE_core/CMakeLists.txt @@ -88,6 +88,7 @@ set(sources_cxx target_sources(SOFIE_core PRIVATE ${sources_headers} ${sources_cxx}) target_include_directories(SOFIE_core PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/inc) +target_link_libraries(SOFIE_core PUBLIC utils) target_link_libraries(SOFIE_core PUBLIC Tree Core diff --git a/src/SOFIE_core/README.md b/src/SOFIE_core/README.md index 033cad4..2259d7a 100644 --- a/src/SOFIE_core/README.md +++ b/src/SOFIE_core/README.md @@ -25,7 +25,6 @@ SOFIE works in a parser-generator working architecture. With SOFIE, the user get From ROOT command line, or in a ROOT macro, we can proceed with an ONNX model: ```c++ -using namespace TMVA::Experimental; SOFIE::RModelParser_ONNX parser; SOFIE::RModel model = parser.Parse(“./example_model.onnx”); model.Generate(); @@ -73,7 +72,6 @@ SOFIE also supports generating inference code with RDataFrame as inputs, refer t Here is the updated list of supported ONNX operators. You can obtain this list by doing ```cpp -using namespace TMVA::Experimental; SOFIE::RModelParser_ONNX parser; std::vector supportedOperators = parser.GetRegisteredOperators(); ``` @@ -164,7 +162,6 @@ The above operators are supported for tensors of the following types: You can also check your model whether all operators are implemented by doing the following: ```c++ -using namespace TMVA::Experimental; SOFIE::RModelParser_ONNX parser; parser.CheckModel("example_model.ONNX"); ``` diff --git a/src/SOFIE_core/inc/SOFIE/RModel.hxx b/src/SOFIE_core/inc/SOFIE/RModel.hxx index 4ecdaec..ed28b48 100644 --- a/src/SOFIE_core/inc/SOFIE/RModel.hxx +++ b/src/SOFIE_core/inc/SOFIE/RModel.hxx @@ -168,6 +168,10 @@ public: // used to infer the sub-graphs std::string GenerateInferSignature(bool isdecl = true); + // generate the infer function signature for inference on ALPAKA. If isdecl= false generate the calling infer function + // used to infer the sub-graphs + std::string GenerateInferSignature_GPU_ALPAKA(bool isdecl = true); + void RemoveIntermediateTensor(const std::string& tensor_name){ fIntermediateTensorInfos.erase(tensor_name); } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx index 7756f9a..f417acf 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx @@ -436,8 +436,8 @@ namespace SOFIE{ throw std::runtime_error("TMVA SOFIE Gemm(MatMul) has invalid shape for inputs or output"); } auto m = (fAttrTransA ? fShapeA[dimA-1].GetVal() : fShapeA[dimA-2].GetVal()); - auto n = (fAttrTransA ? fShapeA[dimA-2].GetVal() : fShapeA[dimA-1].GetVal()); - auto k = (fAttrTransB ? fShapeB[dimB-2].GetVal() : fShapeB[dimB-1].GetVal()); + auto n = (fAttrTransB ? fShapeB[dimB-2].GetVal() : fShapeB[dimB-1].GetVal()); + auto k = (fAttrTransA ? fShapeA[dimA-2].GetVal() : fShapeA[dimA-1].GetVal()); std::vector sY = {fShapeY[dimY-2], fShapeY[dimY-1]}; // extra dimensions in case of stacked MatMul std::vector sA; @@ -451,7 +451,9 @@ namespace SOFIE{ out << SP << "int " << opName << "_n = " << n << ";\n"; out << SP << "int " << opName << "_k = " << k << ";\n"; out << SP << "float " << opName << "_alpha = " << std::setprecision(std::numeric_limits::max_digits10) << fAttrAlpha << ";\n"; - out << SP << "float " << opName << "_beta = " << std::setprecision(std::numeric_limits::max_digits10) << fAttrBeta << ";\n"; + + // restricting to a 0 beta since BIAS is configured separately through sofieBLAS interface + out << SP << "float " << opName << "_beta = 0;\n"; // case bias is present if (!fNC.empty()){ @@ -484,10 +486,14 @@ namespace SOFIE{ out << SP; } // in the case of bias - if (!fNC.empty() && fActivation == EActivationType::RELU){ - out << SP << "blas.gemmrelu("< ConvertShapeToDim(const std::vector & shape); std::vector ConvertShapeToInt(const std::vector & shape); -std::size_t ConvertShapeToLength(const std::vector & shape); +inline std::size_t ConvertShapeToLength(const std::vector & shape){ + // Empty shape represent scalar values, so we return a length=1 + std::size_t fLength = 1; + for (auto& dim: shape) fLength *= dim; + return fLength; +} std::string ConvertShapeToString(const std::vector & shape); std::string ConvertDimShapeToString(const std::vector & shape); @@ -692,20 +697,20 @@ extern "C" void sgemm_(const char * transa, const char * transb, const int * m, struct GNN_Data { - TMVA::Experimental::RTensor node_data; // the node feature data, tensor with shape (num_nodes, num_node_features) - TMVA::Experimental::RTensor edge_data; // the edge feature data, tensor with shape (num_edges, num_edge_features) - TMVA::Experimental::RTensor global_data; // the global features, tensor with shape (1, num_global_features) - TMVA::Experimental::RTensor edge_index; // the edge index (receivers and senders for each edge), tensor with shape (2, num_edges) + SOFIE::RTensor node_data; // the node feature data, tensor with shape (num_nodes, num_node_features) + SOFIE::RTensor edge_data; // the edge feature data, tensor with shape (num_edges, num_edge_features) + SOFIE::RTensor global_data; // the global features, tensor with shape (1, num_global_features) + SOFIE::RTensor edge_index; // the edge index (receivers and senders for each edge), tensor with shape (2, num_edges) // edge_index[0,:] are the receivers and edge_index[1,:] are the senders // need to have default constructor since RTensor has not one - GNN_Data(): node_data(TMVA::Experimental::RTensor({})), edge_data(TMVA::Experimental::RTensor({})), global_data(TMVA::Experimental::RTensor({})), edge_index(TMVA::Experimental::RTensor({})) {} + GNN_Data(): node_data(SOFIE::RTensor({})), edge_data(SOFIE::RTensor({})), global_data(SOFIE::RTensor({})), edge_index(SOFIE::RTensor({})) {} }; template -TMVA::Experimental::RTensor Concatenate( TMVA::Experimental::RTensor & t1, TMVA::Experimental::RTensor & t2, int axis = 0) +SOFIE::RTensor Concatenate( SOFIE::RTensor & t1, SOFIE::RTensor & t2, int axis = 0) { // concatenate tensor along axis. Shape must be the same except in the dimension of the concatenated axis if (t1.GetMemoryLayout() != t2.GetMemoryLayout()) @@ -720,8 +725,8 @@ TMVA::Experimental::RTensor Concatenate( TMVA::Experimental::RTensor & t1, } std::vector outShape = shape1; outShape[axis] = shape1[axis] + shape2[axis]; - TMVA::Experimental::RTensor tout(outShape, t1.GetMemoryLayout()); - if (t1.GetMemoryLayout() == TMVA::Experimental::MemoryLayout::ColumnMajor) { + SOFIE::RTensor tout(outShape, t1.GetMemoryLayout()); + if (t1.GetMemoryLayout() == SOFIE::MemoryLayout::ColumnMajor) { throw std::runtime_error("TMVA RTensor Concatenate is not yet supported for column major tensors"); } @@ -754,10 +759,10 @@ inline GNN_Data Concatenate(GNN_Data & data1, GNN_Data & data2, int axis = 0) { inline GNN_Data Copy(const GNN_Data & data) { GNN_Data out; - out.node_data = TMVA::Experimental::RTensor(data.node_data.GetShape()); - out.edge_data = TMVA::Experimental::RTensor(data.edge_data.GetShape()); - out.global_data = TMVA::Experimental::RTensor(data.global_data.GetShape()); - out.edge_index = TMVA::Experimental::RTensor(data.edge_index.GetShape()); + out.node_data = SOFIE::RTensor(data.node_data.GetShape()); + out.edge_data = SOFIE::RTensor(data.edge_data.GetShape()); + out.global_data = SOFIE::RTensor(data.global_data.GetShape()); + out.edge_index = SOFIE::RTensor(data.edge_index.GetShape()); std::copy(data.node_data.GetData(), data.node_data.GetData()+ data.node_data.GetSize(), out.node_data.GetData()); std::copy(data.edge_data.GetData(), data.edge_data.GetData()+ data.edge_data.GetSize(), out.edge_data.GetData()); std::copy(data.global_data.GetData(), data.global_data.GetData()+ data.global_data.GetSize(), out.global_data.GetData()); diff --git a/src/SOFIE_core/src/RModel_ALPAKA.cxx b/src/SOFIE_core/src/RModel_ALPAKA.cxx index 2da0e1f..03eb5e7 100644 --- a/src/SOFIE_core/src/RModel_ALPAKA.cxx +++ b/src/SOFIE_core/src/RModel_ALPAKA.cxx @@ -128,6 +128,42 @@ void RModel::GenerateDynamicTensorInfo_GPU_ALPAKA() { fGC += out.str(); } +// only supports BufF1D buffer data types for now +std::string RModel::GenerateInferSignature_GPU_ALPAKA(bool isdecl) { + // generate the infer signature given the inputs: eg. "BufF1D const deviceBuf_A, BufF1D const deviceBuf_B" + // if (decl = false) generate only calling signature (deviceBuf_A, deviceBuf_B, ....) + std::string rGC; + std::unordered_map inputParams; + int i_input = 0; + for (auto &name : fInputTensorNames) { + // if is a dynamic tensor pass initial parameters + if (IsDimInputTensor(name)) { + auto shape = GetDynamicTensorShape(name); + for (auto &d : shape) { + std::string pName = d.param; + // need to check if the input parameters is already existing in another input tensor + if (d.isParam && inputParams.count(pName) == 0) { + if (isdecl) rGC += "size_t "; + rGC += d.param + ","; + inputParams[pName] = i_input; + } + } + } + if (isdecl) { + std::string type = "BufF1D"; + if (type == "other") + throw std::runtime_error("TMVA-SOFIE: input tensor " + name + + " is of a data type which is not yet supported."); + rGC += type + " const "; + } + rGC += "deviceBuf_" + name + ","; + i_input++; + } + + if (fInputTensorNames.size() > 0) rGC.pop_back();// remove last "," + return rGC; +} + void RModel::GenerateOutput_GPU_ALPAKA() { if (fVerbose) std::cout << "Generating main inference code for " << fName << std::endl; @@ -149,7 +185,7 @@ void RModel::GenerateOutput_GPU_ALPAKA() { } fGC += " infer("; - fGC += GenerateInferSignature(); + fGC += GenerateInferSignature_GPU_ALPAKA(); fGC += "){\n"; for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) { diff --git a/src/SOFIE_core/src/SOFIE_common.cxx b/src/SOFIE_core/src/SOFIE_common.cxx index 6136f72..05f873b 100644 --- a/src/SOFIE_core/src/SOFIE_common.cxx +++ b/src/SOFIE_core/src/SOFIE_common.cxx @@ -46,14 +46,6 @@ std::vector ConvertShapeToInt(const std::vector & shape){ return ret_shape; } - -std::size_t ConvertShapeToLength(const std::vector & shape){ - // Empty shape represent scalar values, so we return a length=1 - std::size_t fLength = 1; - for (auto& dim: shape) fLength *= dim; - return fLength; -} - std::string ConvertTypeToString(ETensorType type){ switch(type){ case ETensorType::FLOAT : { @@ -545,4 +537,4 @@ std::vector UTILITY::ComputeStrideFromShape(const std::vector & shape) return strides; } -} // namespace SOFIE \ No newline at end of file +} // namespace SOFIE diff --git a/src/SOFIE_core/test/EmitFromONNX.cxx.in b/src/SOFIE_core/test/EmitFromONNX.cxx.in index 1433ba6..c464f4d 100644 --- a/src/SOFIE_core/test/EmitFromONNX.cxx.in +++ b/src/SOFIE_core/test/EmitFromONNX.cxx.in @@ -28,7 +28,7 @@ int main(int argc, char *argv[]){ @EMIT_CAPTURES@ - std::cout << "[SUMMARY] Completed with " << failures << " failures" << std::endl; + std::cout << "[SUMMARY for generation from ONNX] Completed with " << failures << " failures" << std::endl; return failures == 0 ? 0 : 1; } diff --git a/src/SOFIE_core/test/EmitFromONNX_GPU_ALPAKA.cxx.in b/src/SOFIE_core/test/EmitFromONNX_GPU_ALPAKA.cxx.in index 0d51e92..58198c1 100644 --- a/src/SOFIE_core/test/EmitFromONNX_GPU_ALPAKA.cxx.in +++ b/src/SOFIE_core/test/EmitFromONNX_GPU_ALPAKA.cxx.in @@ -22,6 +22,6 @@ int main(int argc, char *argv[]) { @EMIT_CAPTURES@ - std::cout << "[SUMMARY] Completed with " << failures << " failures" << std::endl; + std::cout << "[SUMMARY for generation from ONNX with ALPAKA] Completed with " << failures << " failures" << std::endl; return failures == 0 ? 0 : 1; } diff --git a/src/SOFIE_core/test/EmitFromRoot.cxx.in b/src/SOFIE_core/test/EmitFromRoot.cxx.in index b08d17b..88c0789 100644 --- a/src/SOFIE_core/test/EmitFromRoot.cxx.in +++ b/src/SOFIE_core/test/EmitFromRoot.cxx.in @@ -43,7 +43,12 @@ int EmitModel(std::string inputfile, std::string outname){ int main(int argc, char *argv[]){ - int failures = 0; + int failures = 0; + + @EMIT_CAPTURES@ + + std::cout << "[SUMMARY for generation from ROOT] Completed with " << failures << " failures" << std::endl; + return failures == 0 ? 0 : 1; @EMIT_CAPTURES@; diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt new file mode 100644 index 0000000..2ede060 --- /dev/null +++ b/src/utils/CMakeLists.txt @@ -0,0 +1,11 @@ +add_library(utils INTERFACE) + +target_include_directories(utils INTERFACE + $ + $ +) + +install( + DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/SOFIE + DESTINATION include +) diff --git a/src/utils/SOFIE/RTensor.hxx b/src/utils/SOFIE/RTensor.hxx new file mode 100644 index 0000000..db82dc9 --- /dev/null +++ b/src/utils/SOFIE/RTensor.hxx @@ -0,0 +1,628 @@ +#ifndef SOFIE_RTENSOR +#define SOFIE_RTENSOR + +#include +#include // std::size_t +#include +#include // std::runtime_error +#include // std::stringstream +#include // std::shared_ptr +#include // std::is_convertible +#include // std::reverse +#include // std::random_access_iterator_tag + +namespace SOFIE { + +/// Memory layout type +enum class MemoryLayout : uint8_t { + RowMajor = 0x01, + ColumnMajor = 0x02 +}; + +namespace Internal { + +/// \brief Get size of tensor from shape vector +/// \param[in] shape Shape vector +/// \return Size of contiguous memory +template +inline std::size_t GetSizeFromShape(const T &shape) +{ + if (shape.size() == 0) + return 0; + std::size_t size = 1; + for (auto &s : shape) + size *= s; + return size; +} + +/// \brief Compute strides from shape vector. +/// \param[in] shape Shape vector +/// \param[in] layout Memory layout +/// \return Size of contiguous memory +/// +/// This information is needed for the multi-dimensional indexing. See here: +/// https://en.wikipedia.org/wiki/Row-_and_column-major_order +/// https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.strides.html +template +inline std::vector ComputeStridesFromShape(const T &shape, MemoryLayout layout) +{ + const auto size = shape.size(); + T strides(size); + if (layout == MemoryLayout::RowMajor) { + for (std::size_t i = 0; i < size; i++) { + if (i == 0) { + strides[size - 1 - i] = 1; + } else { + strides[size - 1 - i] = strides[size - 1 - i + 1] * shape[size - 1 - i + 1]; + } + } + } else if (layout == MemoryLayout::ColumnMajor) { + for (std::size_t i = 0; i < size; i++) { + if (i == 0) { + strides[i] = 1; + } else { + strides[i] = strides[i - 1] * shape[i - 1]; + } + } + } else { + std::stringstream ss; + ss << "Memory layout type is not valid for calculating strides."; + throw std::runtime_error(ss.str()); + } + return strides; +} + +/// \brief Compute indices from global index +/// \param[in] shape Shape vector +/// \param[in] idx Global index +/// \param[in] layout Memory layout +/// \return Indice vector +template +inline T ComputeIndicesFromGlobalIndex(const T& shape, MemoryLayout layout, const typename T::value_type idx) +{ + const auto size = shape.size(); + auto strides = ComputeStridesFromShape(shape, layout); + T indices(size); + auto r = idx; + for (std::size_t i = 0; i < size; i++) { + indices[i] = int(r / strides[i]); + r = r % strides[i]; + } + return indices; +} + +/// \brief Compute global index from indices +/// \param[in] strides Strides vector +/// \param[in] idx Indice vector +/// \return Global index +template +inline std::size_t ComputeGlobalIndex(const U& strides, const V& idx) +{ + std::size_t globalIndex = 0; + const auto size = idx.size(); + for (std::size_t i = 0; i < size; i++) { + globalIndex += strides[size - 1 - i] * idx[size - 1 - i]; + } + return globalIndex; +} + +/// \brief Type checking for all types of a parameter pack, e.g., used in combination with std::is_convertible +template +struct and_types : std::true_type { +}; + +template +struct and_types : std::integral_constant()> { +}; + +/// \brief Copy slice of a tensor recursively from here to there +/// \param[in] here Source tensor +/// \param[in] there Target tensor (slice of source tensor) +/// \param[in] mins Minimum of indices for each dimension +/// \param[in] maxs Maximum of indices for each dimension +/// \param[in] idx Current indices +/// \param[in] active Active index needed to stop the recursion +/// +/// Copy the content of a slice of a tensor from source to target. This is done +/// by recursively iterating over the ranges of the slice for each dimension. +template +void RecursiveCopy(const T &here, T &there, + const std::vector &mins, const std::vector &maxs, + std::vector idx, std::size_t active) +{ + const auto size = idx.size(); + for (std::size_t i = mins[active]; i < maxs[active]; i++) { + idx[active] = i; + if (active == size - 1) { + auto idxThere = idx; + for (std::size_t j = 0; j < size; j++) { + idxThere[j] -= mins[j]; + } + there(idxThere) = here(idx); + } else { + Internal::RecursiveCopy(here, there, mins, maxs, idx, active + 1); + } + } +} + +} // namespace SOFIE::Internal + +/// \class SOFIE::RTensor +/// \brief RTensor is a container with contiguous memory and shape information. +/// \tparam T Data-type of the tensor +/// +/// An RTensor is a vector-like container, which has additional shape information. +/// The elements of the multi-dimensional container can be accessed by their +/// indices in a coherent way without taking care about the one-dimensional memory +/// layout of the contiguous storage. This also allows to manipulate the shape +/// of the container without moving the actual elements in memory. Another feature +/// is that an RTensor can own the underlying contiguous memory but can also represent +/// only a view on existing data without owning it. +template > +class RTensor { +public: + // Typedefs + using Value_t = V; + using Shape_t = std::vector; + using Index_t = Shape_t; + using Slice_t = std::vector; + using Container_t = C; + +private: + Shape_t fShape; + Shape_t fStrides; + std::size_t fSize; + MemoryLayout fLayout; + Value_t *fData; + std::shared_ptr fContainer; + +protected: + void ReshapeInplace(const Shape_t &shape); + +public: + // Constructors + + /// \brief Construct a tensor as view on data + /// \param[in] data Pointer to data contiguous in memory + /// \param[in] shape Shape vector + /// \param[in] layout Memory layout + RTensor(Value_t *data, Shape_t shape, MemoryLayout layout = MemoryLayout::RowMajor) + : fShape(shape), fLayout(layout), fData(data), fContainer(nullptr) + { + fSize = Internal::GetSizeFromShape(shape); + fStrides = Internal::ComputeStridesFromShape(shape, layout); + } + + /// \brief Construct a tensor as view on data + /// \param[in] data Pointer to data contiguous in memory + /// \param[in] shape Shape vector + /// \param[in] strides Strides vector + /// \param[in] layout Memory layout + RTensor(Value_t *data, Shape_t shape, Shape_t strides, MemoryLayout layout = MemoryLayout::RowMajor) + : fShape(shape), fStrides(strides), fLayout(layout), fData(data), fContainer(nullptr) + { + fSize = Internal::GetSizeFromShape(shape); + } + + /// \brief Construct a tensor owning externally provided data + /// \param[in] container Shared pointer to data container + /// \param[in] shape Shape vector + /// \param[in] layout Memory layout + RTensor(std::shared_ptr container, Shape_t shape, + MemoryLayout layout = MemoryLayout::RowMajor) + : fShape(shape), fLayout(layout), fContainer(container) + { + fSize = Internal::GetSizeFromShape(shape); + fStrides = Internal::ComputeStridesFromShape(shape, layout); + fData = std::data(*fContainer); + } + + /// \brief Construct a tensor owning data initialized with new container + /// \param[in] shape Shape vector + /// \param[in] layout Memory layout + RTensor(Shape_t shape, MemoryLayout layout = MemoryLayout::RowMajor) + : fShape(shape), fLayout(layout) + { + // TODO: Document how data pointer is determined using STL iterator interface. + // TODO: Sanitize given container type with type traits + fSize = Internal::GetSizeFromShape(shape); + fStrides = Internal::ComputeStridesFromShape(shape, layout); + fContainer = std::make_shared(fSize); + fData = std::data(*fContainer); + } + + // Access elements + Value_t &operator()(const Index_t &idx); + const Value_t &operator() (const Index_t &idx) const; + template Value_t &operator()(Idx... idx); + template const Value_t &operator() (Idx... idx) const; + + // Access properties + std::size_t GetSize() const { return fSize; } + const Shape_t &GetShape() const { return fShape; } + const Shape_t &GetStrides() const { return fStrides; } + Value_t *GetData() { return fData; } + const Value_t *GetData() const { return fData; } + std::shared_ptr GetContainer() { return fContainer; } + const std::shared_ptr GetContainer() const { return fContainer; } + MemoryLayout GetMemoryLayout() const { return fLayout; } + bool IsView() const { return fContainer == nullptr; } + bool IsOwner() const { return !IsView(); } + + // Copy + RTensor Copy(MemoryLayout layout = MemoryLayout::RowMajor) const; + + // Transformations + RTensor Transpose() const; + RTensor Squeeze() const; + RTensor ExpandDims(int idx) const; + RTensor Reshape(const Shape_t &shape) const; + RTensor Resize(const Shape_t &shape); + RTensor Slice(const Slice_t &slice); + + // Iterator class + class Iterator { + private: + RTensor& fTensor; + Index_t::value_type fGlobalIndex; + public: + using iterator_category = std::random_access_iterator_tag; + using value_type = Value_t; + using difference_type = std::ptrdiff_t; + using pointer = Value_t *; + using reference = Value_t &; + + Iterator(RTensor& x, typename Index_t::value_type idx) : fTensor(x), fGlobalIndex(idx) {} + Iterator& operator++() { fGlobalIndex++; return *this; } + Iterator operator++(int) { auto tmp = *this; operator++(); return tmp; } + Iterator& operator--() { fGlobalIndex--; return *this; } + Iterator operator--(int) { auto tmp = *this; operator--(); return tmp; } + Iterator operator+(difference_type rhs) const { return Iterator(fTensor, fGlobalIndex + rhs); } + Iterator operator-(difference_type rhs) const { return Iterator(fTensor, fGlobalIndex - rhs); } + difference_type operator-(const Iterator& rhs) { return fGlobalIndex - rhs.GetGlobalIndex(); } + Iterator& operator+=(difference_type rhs) { fGlobalIndex += rhs; return *this; } + Iterator& operator-=(difference_type rhs) { fGlobalIndex -= rhs; return *this; } + Value_t& operator*() + { + auto idx = Internal::ComputeIndicesFromGlobalIndex(fTensor.GetShape(), fTensor.GetMemoryLayout(), fGlobalIndex); + return fTensor(idx); + } + bool operator==(const Iterator& rhs) const + { + if (fGlobalIndex == rhs.GetGlobalIndex()) return true; + return false; + } + bool operator!=(const Iterator& rhs) const { return !operator==(rhs); }; + bool operator>(const Iterator& rhs) const { return fGlobalIndex > rhs.GetGlobalIndex(); } + bool operator<(const Iterator& rhs) const { return fGlobalIndex < rhs.GetGlobalIndex(); } + bool operator>=(const Iterator& rhs) const { return fGlobalIndex >= rhs.GetGlobalIndex(); } + bool operator<=(const Iterator& rhs) const { return fGlobalIndex <= rhs.GetGlobalIndex(); } + typename Index_t::value_type GetGlobalIndex() const { return fGlobalIndex; }; + }; + + // Iterator interface + // TODO: Document that the iterator always iterates following the physical memory layout. + Iterator begin() noexcept { + return Iterator(*this, 0); + } + Iterator end() noexcept { + return Iterator(*this, fSize); + } +}; + +/// \brief Reshape tensor in place +/// \param[in] shape Shape vector +/// Reshape tensor without changing the overall size +template +inline void RTensor::ReshapeInplace(const Shape_t &shape) +{ + const auto size = Internal::GetSizeFromShape(shape); + if (size != fSize) { + std::stringstream ss; + ss << "Cannot reshape tensor with size " << fSize << " into shape { "; + for (std::size_t i = 0; i < shape.size(); i++) { + if (i != shape.size() - 1) { + ss << shape[i] << ", "; + } else { + ss << shape[i] << " }."; + } + } + throw std::runtime_error(ss.str()); + } + + // Compute new strides from shape + auto strides = Internal::ComputeStridesFromShape(shape, fLayout); + fShape = shape; + fStrides = strides; +} + + +/// \brief Access elements +/// \param[in] idx Index vector +/// \return Reference to element +template +inline Value_t &RTensor::operator()(const Index_t &idx) +{ + const auto globalIndex = Internal::ComputeGlobalIndex(fStrides, idx); + return fData[globalIndex]; +} + +/// \brief Access elements +/// \param[in] idx Index vector +/// \return Reference to element +template +inline const Value_t &RTensor::operator() (const Index_t &idx) const +{ + const auto globalIndex = Internal::ComputeGlobalIndex(fStrides, idx); + return fData[globalIndex]; +} + +/// \brief Access elements +/// \param[in] idx Indices +/// \return Reference to element +template +template +Value_t &RTensor::operator()(Idx... idx) +{ + static_assert(Internal::and_types...>{}, + "Indices are not convertible to std::size_t."); + return operator()({static_cast(idx)...}); +} + +/// \brief Access elements +/// \param[in] idx Indices +/// \return Reference to element +template +template +const Value_t &RTensor::operator() (Idx... idx) const +{ + static_assert(Internal::and_types...>{}, + "Indices are not convertible to std::size_t."); + return operator()({static_cast(idx)...}); +} + +/// \brief Transpose +/// \returns New RTensor +/// The tensor is transposed by inverting the associated memory layout from row- +/// major to column-major and vice versa. Therefore, the underlying data is not +/// touched. +template +inline RTensor RTensor::Transpose() const +{ + MemoryLayout layout; + // Transpose by inverting memory layout + if (fLayout == MemoryLayout::RowMajor) { + layout = MemoryLayout::ColumnMajor; + } else if (fLayout == MemoryLayout::ColumnMajor) { + layout = MemoryLayout::RowMajor; + } else { + throw std::runtime_error("Memory layout is not known."); + } + + // Create copy of container + RTensor x(fData, fShape, fStrides, layout); + + // Reverse shape + std::reverse(x.fShape.begin(), x.fShape.end()); + + // Reverse strides + std::reverse(x.fStrides.begin(), x.fStrides.end()); + + return x; +} + +/// \brief Squeeze dimensions +/// \returns New RTensor +/// Squeeze removes the dimensions of size one from the shape. +template +inline RTensor RTensor::Squeeze() const +{ + // Remove dimensions of one and associated strides + Shape_t shape; + Shape_t strides; + for (std::size_t i = 0; i < fShape.size(); i++) { + if (fShape[i] != 1) { + shape.emplace_back(fShape[i]); + strides.emplace_back(fStrides[i]); + } + } + + // If all dimensions are 1, we need to keep one. + // This does not apply if the inital shape is already empty. Then, return + // the empty shape. + if (shape.size() == 0 && fShape.size() != 0) { + shape.emplace_back(1); + strides.emplace_back(1); + } + + // Create copy, attach new shape and strides and return + RTensor x(*this); + x.fShape = shape; + x.fStrides = strides; + return x; +} + +/// \brief Expand dimensions +/// \param[in] idx Index in shape vector where dimension is added +/// \returns New RTensor +/// Inserts a dimension of one into the shape. +template +inline RTensor RTensor::ExpandDims(int idx) const +{ + // Compose shape vector with additional dimensions and adjust strides + const int len = fShape.size(); + auto shape = fShape; + auto strides = fStrides; + if (idx < 0) { + idx = len + 1 + idx; + } + if (idx < 0) { + throw std::runtime_error("Given negative index is invalid."); + } + else if (idx > len) { + throw std::runtime_error("Given index is invalid."); + } + shape.insert(shape.begin() + idx, 1); + strides = Internal::ComputeStridesFromShape(shape, fLayout); + + // Create view copy, attach new shape and strides and return + RTensor x(*this); + x.fShape = shape; + x.fStrides = strides; + return x; +} + +/// \brief Reshape tensor +/// \param[in] shape Shape vector +/// \returns New RTensor +/// Reshape tensor without changing the overall size +template +inline RTensor RTensor::Reshape(const Shape_t &shape) const +{ + // Create copy, replace and return + RTensor x(*this); + x.ReshapeInplace(shape); + return x; +} + +/// \brief Resize tensor +/// \param[in] shape Shape vector +/// \returns New RTensor +/// Resize tensor into new shape +template +inline RTensor RTensor::Resize(const Shape_t &shape) +{ + // Create new tensor with the specified shape + RTensor x(shape, fLayout); + + // Copying contents from previous tensor + size_t n = (x.GetSize()>fSize) ? fSize : x.GetSize(); + std::copy(this->GetData(), this->GetData() + n, x.GetData() ); + + return x; +} + +/// \brief Create a slice of the tensor +/// \param[in] slice Slice vector +/// \returns New RTensor +/// A slice is a subset of the tensor defined by a vector of pairs of indices. +template +inline RTensor RTensor::Slice(const Slice_t &slice) +{ + // Sanitize size of slice + const auto sliceSize = slice.size(); + const auto shapeSize = fShape.size(); + if (sliceSize != shapeSize) { + std::stringstream ss; + ss << "Size of slice (" << sliceSize << ") is unequal number of dimensions (" << shapeSize << ")."; + throw std::runtime_error(ss.str()); + } + + // Sanitize slice indices + // TODO: Sanitize slice indices + /* + for (std::size_t i = 0; i < sliceSize; i++) { + } + */ + + // Convert -1 in slice to proper pair of indices + // TODO + + // Recompute shape and size + Shape_t shape(sliceSize); + for (std::size_t i = 0; i < sliceSize; i++) { + shape[i] = slice[i][1] - slice[i][0]; + } + auto size = Internal::GetSizeFromShape(shape); + + // Determine first element contributing to the slice and get the data pointer + Value_t *data; + Shape_t idx(sliceSize); + for (std::size_t i = 0; i < sliceSize; i++) { + idx[i] = slice[i][0]; + } + data = &operator()(idx); + + // Create copy and modify properties + RTensor x(*this); + x.fData = data; + x.fShape = shape; + x.fSize = size; + + // Squeeze tensor and return + return x.Squeeze(); +} + +/// Copy RTensor to new object +/// \param[in] layout Memory layout of the new RTensor +/// \returns New RTensor +/// The operation copies all elements of the current RTensor to a new RTensor +/// with the given layout contiguous in memory. Note that this copies by default +/// to a row major memory layout. +template +inline RTensor RTensor::Copy(MemoryLayout layout) const +{ + // Create new tensor with zeros owning the memory + RTensor r(fShape, layout); + + // Copy over the elements from this tensor + const auto mins = Shape_t(fShape.size()); + const auto maxs = fShape; + auto idx = mins; + Internal::RecursiveCopy(*this, r, mins, maxs, idx, 0); + + return r; +} + +/// \brief Pretty printing +/// \param[in] os Output stream +/// \param[in] x RTensor +/// \return Modified output stream +template +std::ostream &operator<<(std::ostream &os, RTensor &x) +{ + const auto shapeSize = x.GetShape().size(); + if (shapeSize == 1) { + os << "{ "; + const auto size = x.GetSize(); + for (std::size_t i = 0; i < size; i++) { + os << x({i}); + if (i != size - 1) + os << ", "; + } + os << " }"; + } else if (shapeSize == 2) { + os << "{"; + const auto shape = x.GetShape(); + for (std::size_t i = 0; i < shape[0]; i++) { + os << " { "; + for (std::size_t j = 0; j < shape[1]; j++) { + os << x({i, j}); + if (j < shape[1] - 1) { + os << ", "; + } else { + os << " "; + } + } + os << "}"; + } + os << " }"; + } else { + os << "{ printing not yet implemented for this rank }"; + } + return os; +} + +} // namespace SOFIE + +namespace cling { +template +std::string printValue(SOFIE::RTensor *x) +{ + std::stringstream ss; + ss << *x; + return ss.str(); +} +} // namespace cling + +#endif // SOFIE_RTENSOR From 1979a11e0642f00174e211b36c0495244f645d45 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Mon, 15 Dec 2025 01:36:14 +0100 Subject: [PATCH 19/43] feat: turn off emitting from ROOT files and skip tests with multiple output errors for now --- .../inc/SOFIE/ROperator_BasicBinary.hxx | 14 ++-- src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx | 1 - src/SOFIE_core/src/RModel.cxx | 1 + src/SOFIE_core/src/RModel_ALPAKA.cxx | 12 ++-- src/SOFIE_core/test/CMakeLists.txt | 72 ++++++++++--------- 5 files changed, 58 insertions(+), 42 deletions(-) diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx index 80f35be..2d0e6cb 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx @@ -387,6 +387,9 @@ public: } std::string Generate_GPU_Kernel_ALPAKA(std::string opName) { + if (fIsOutputConstant) + return ""; + std::string op; op = "\n//------ "+opName+"_"+BinaryOperatorTrait::Name()+"_KERNEL_ALPAKA\n"; op += SP + "struct Binary"+BinaryOperatorTrait::Name()+"Kernel {\n"; @@ -402,7 +405,6 @@ public: } op.pop_back(); op += "));\n"; - std::cout<<"okay till here 1\n"; op += SP + SP + SP + SP + "for (auto const& elem : elements) {\n"; auto stridesA = UTILITY::ComputeStrideFromShape(fDimShapeA); @@ -426,7 +428,6 @@ public: for (int j = 0; j < 3; j++) compute_idx_A.pop_back(); } - std::cout<<"okay till here 2\n"; if (fDimShapeB.empty() || std::all_of(fDimShapeB.begin(), fDimShapeB.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) { compute_idx_B = "0"; @@ -443,7 +444,6 @@ public: for (int j = 0; j < 3; j++) compute_idx_B.pop_back(); } - std::cout<<"okay till here 3\n"; int nloop = 0; if (fDimShapeY.empty() || std::all_of(fDimShapeY.begin(), fDimShapeY.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) { @@ -463,13 +463,11 @@ public: for (int j = 0; j < 3; j++) compute_idx_Y.pop_back(); } - std::cout<<"okay till here 4\n"; for (int j = 0; j < nloop + 1; j++) op += SP; op += "C[" + compute_idx_Y + "] = " + BinaryOperatorTrait::Op("A[" + compute_idx_A + "]", "B[" + compute_idx_B + "]") + " ;\n"; - std::cout<<"okay till here 5\n"; for (int i = nloop; i > 0; i--) { for (int j = 0; j < i; j++) op += SP; op += "}\n"; @@ -478,10 +476,16 @@ public: } std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string OpName) { + if (fIsOutputConstant) + return ""; + return SP + "Binary"+BinaryOperatorTrait::Name()+"Kernel " + OpName + "Kernel;\n"; } std::string Generate_GPU_ALPAKA(std::string OpName) { + if (fIsOutputConstant) + return ""; + if (fDimShapeY.empty()) { throw std::runtime_error("TMVA SOFIE Operator Basic Binary called to Generate without being initialized first"); } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx index f417acf..1c43724 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx @@ -258,7 +258,6 @@ namespace SOFIE{ if (!fIsDynamic){ model.AddIntermediateTensor(fNY, model.GetTensorType(fNA), shapeY); - std::cout<<"currently adding: "<GetKind())<GetKind()) == registered_operators.end()) { - std::cout<<"Generating ALPAKA kernel for operator"<< std::endl; + + if (fVerbose) + std::cout<<"Generating ALPAKA kernel for operator"<< toString(fOperators[id]->GetKind()) << std::endl; + fGC += fOperators[id]->Generate_GPU_Kernel_ALPAKA(std::to_string(id)); registered_operators.insert(fOperators[id]->GetKind()); } @@ -312,9 +314,11 @@ void RModel::GenerateSessionCode_GPU_ALPAKA() { registered_operators.clear(); for (size_t id = 0; id < fOperators.size(); id++) { - std::cout<GetKind())<GetKind()) == registered_operators.end()) { - std::cout<<"Declaring ALPAKA kernel for operator"<< std::endl; + + if (fVerbose) + std::cout<<"Declaring ALPAKA kernel for operator"<< toString(fOperators[id]->GetKind())<Generate_GPU_Kernel_Definitions_ALPAKA(std::to_string(id)); registered_operators.insert(fOperators[id]->GetKind()); } diff --git a/src/SOFIE_core/test/CMakeLists.txt b/src/SOFIE_core/test/CMakeLists.txt index c5e0d8f..c792e3d 100644 --- a/src/SOFIE_core/test/CMakeLists.txt +++ b/src/SOFIE_core/test/CMakeLists.txt @@ -21,13 +21,19 @@ set(CAPTURE_STR "try {\n\ EmitModel(\"@1\", \"@2\");\n\ } catch (const std::exception& e) {\n\ - std::cerr << \"[ERROR] Failed processing @1: \" << e.what() << std::endl;\n\ - failures++;\n\ + std::string msg = e.what();\n\ + if (msg.find(\"multiple output tensors are not supported\") != std::string::npos) {\n\ + std::cerr << \"[SKIP] Multiple outputs are not supported for @1\" << std::endl;\n\ + } else {\n\ + std::cerr << \"[ERROR] Failed processing @1: \" << msg << std::endl;\n\ + failures++;\n\ + }\n\ } catch (...) {\n\ std::cerr << \"[ERROR] Unknown failure processing @1\" << std::endl;\n\ failures++;\n\ }\n\ ") + # --- Collect ONNX files and build ALL_CAPTURES BEFORE any configure_file() --- set(ALL_CAPTURES "") file(GLOB ONNX_FILES "${ONNX_MODELS_DIR}/*.onnx") @@ -51,7 +57,7 @@ set(EMIT_CAPTURES "${ALL_CAPTURES}") # Note: the .in templates must use @EMIT_CAPTURES@ placeholder. configure_file(EmitFromONNX.cxx.in EmitFromONNX_all.cxx @ONLY) -configure_file(EmitFromRoot.cxx.in EmitFromRoot_all.cxx @ONLY) +# configure_file(EmitFromRoot.cxx.in EmitFromRoot_all.cxx @ONLY) configure_file(EmitFromONNX_GPU_ALPAKA.cxx.in EmitFromONNX_GPU_ALPAKA_all.cxx @ONLY) # --- CPU emitter executable and test (unchanged) --- @@ -90,39 +96,41 @@ if (BLAS_FOUND) # we need BLAS for compiling the models FIXTURES_REQUIRED sofie-test-models-onnx-build) endif() +# Skipping emitting from ROOT and further tests for now. # For testing serialisation of RModel object -ROOTTEST_GENERATE_EXECUTABLE(emitFromROOT EmitFromRoot_all.cxx - LIBRARIES protobuf::libprotobuf RIO SOFIE_core SOFIE_parsers - FIXTURES_SETUP sofie-compile-models-onnx-root -) +# ROOTTEST_GENERATE_EXECUTABLE(emitFromROOT EmitFromRoot_all.cxx +# LIBRARIES protobuf::libprotobuf RIO SOFIE_core SOFIE_parsers +# FIXTURES_SETUP sofie-compile-models-onnx-root +# ) + # silence protobuf warnings seen in version 3.0 and 3.6. Not needed from protobuf version 3.17 -target_compile_options(emitFromROOT PRIVATE -Wno-unused-parameter -Wno-array-bounds) +# target_compile_options(emitFromROOT PRIVATE -Wno-unused-parameter -Wno-array-bounds) # Automatic compilation of headers from root files -ROOTTEST_ADD_TEST(SofieCompileModels_ROOT - COMMAND ${CMAKE_COMMAND} -E env ROOTIGNOREPREFIX=1 ./emitFromROOT - FIXTURES_REQUIRED sofie-compile-models-onnx-root - FIXTURES_SETUP sofie-compile-models-root -) - -if (BLAS_FOUND) - # Creating a Google Test for Serialisation of RModel - ROOTTEST_GENERATE_EXECUTABLE(TestCustomModelsFromROOT TestCustomModelsFromROOT.cxx - LIBRARIES - SOFIE_core - BLAS::BLAS - GTest::gtest - GTest::gtest_main - FIXTURES_REQUIRED - sofie-compile-models-root - FIXTURES_SETUP - sofie-test-models-root-build - ) - target_include_directories(TestCustomModelsFromROOT PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) - ROOTTEST_ADD_TEST(TestCustomModelsFromROOT - EXEC ./TestCustomModelsFromROOT - FIXTURES_REQUIRED sofie-test-models-root-build) -endif() +# ROOTTEST_ADD_TEST(SofieCompileModels_ROOT +# COMMAND ${CMAKE_COMMAND} -E env ROOTIGNOREPREFIX=1 ./emitFromROOT +# FIXTURES_REQUIRED sofie-compile-models-onnx-root +# FIXTURES_SETUP sofie-compile-models-root +# ) + +# if (BLAS_FOUND) +# # Creating a Google Test for Serialisation of RModel +# ROOTTEST_GENERATE_EXECUTABLE(TestCustomModelsFromROOT TestCustomModelsFromROOT.cxx +# LIBRARIES +# SOFIE_core +# BLAS::BLAS +# GTest::gtest +# GTest::gtest_main +# FIXTURES_REQUIRED +# sofie-compile-models-root +# FIXTURES_SETUP +# sofie-test-models-root-build +# ) +# target_include_directories(TestCustomModelsFromROOT PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) +# ROOTTEST_ADD_TEST(TestCustomModelsFromROOT +# EXEC ./TestCustomModelsFromROOT +# FIXTURES_REQUIRED sofie-test-models-root-build) +# endif() # Look for needed Python modules ROOT_FIND_PYTHON_MODULE(torch) From 59aeac458ee2559b8162bf092ae97538b7ecfeb0 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Mon, 15 Dec 2025 17:40:51 +0100 Subject: [PATCH 20/43] feat: support for google tests for inference code with alpaka implementations --- README.md | 6 +- src/SOFIE_core/test/CMakeLists.txt | 259 +++++++++++------- .../TestCustomModelsFromONNXForAlpakaCuda.cxx | 156 +++++++++++ 3 files changed, 316 insertions(+), 105 deletions(-) create mode 100644 src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx diff --git a/README.md b/README.md index 97902f8..597cb56 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,11 @@ source setup.sh ``` Now ROOT should also access the SOFIE libraries while it runs. This helps to accelerate development. Submit your developments here and we will proceed with the developments in ROOT carefull. - +3. To enable testing generated code with alpaka implementations, build using the following command: +```bash +cmake -Dtesting=ON -DENABLE_ALPAKA_TESTS=ON -DCMAKE_INSTALL_PREFIX=../install -DCMAKE_BUILD_TYPE=RelWithDebInfo .. +``` +The default architecture is CUDA, but can be configured using an additional`-DALPAKA_BACKEND=hip` cmake option. ## Inspiration The standalone version of SOFIE is developed with inspiration from the standalone version of RooFit developed by Jonas Rembser that can be found [here](https://github.com/guitargeek/roofit). diff --git a/src/SOFIE_core/test/CMakeLists.txt b/src/SOFIE_core/test/CMakeLists.txt index c792e3d..5d5667a 100644 --- a/src/SOFIE_core/test/CMakeLists.txt +++ b/src/SOFIE_core/test/CMakeLists.txt @@ -9,13 +9,24 @@ # @author Federico Sossai, Sanjiban Sengupta ############################################################################ +cmake_minimum_required(VERSION 3.14) +include(FetchContent) + include_directories(${CMAKE_SOURCE_DIR}/src/SOFIE_core/inc) include_directories(${CMAKE_SOURCE_DIR}/src/SOFIE_parsers/inc) +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + if (NOT ONNX_MODELS_DIR) set(ONNX_MODELS_DIR input_models) endif() +option(ENABLE_ALPAKA_TESTS "Enable Alpaka-based SOFIE tests" OFF) +set(ALPAKA_BACKEND "cuda" + CACHE STRING "Alpaka backend to test (cuda, cpu, hip, sycl)") +set_property(CACHE ALPAKA_BACKEND PROPERTY STRINGS cuda cpu hip sycl) + # String template used to produce calls to EmitModel(...) per file. set(CAPTURE_STR "try {\n\ @@ -34,50 +45,37 @@ set(CAPTURE_STR }\n\ ") -# --- Collect ONNX files and build ALL_CAPTURES BEFORE any configure_file() --- +# --- Collect ONNX files --- set(ALL_CAPTURES "") file(GLOB ONNX_FILES "${ONNX_MODELS_DIR}/*.onnx") -# If there are no models, ONNX_FILES will be empty and ALL_CAPTURES stays empty. foreach(onnx_file ${ONNX_FILES}) get_filename_component(fname ${onnx_file} NAME_WE) - get_filename_component(fdir ${onnx_file} DIRECTORY) - string(REPLACE "@1" "${onnx_file}" cap "${CAPTURE_STR}") string(REPLACE "@2" "${fname}" cap "${cap}") string(APPEND ALL_CAPTURES "${cap}") endforeach() -# Now generate the combined source files for CPU, ROOT and ALPAKA -# They will have @EMIT_CAPTURES@ substituted with the contents of ALL_CAPTURES -# (CMake configure_file uses variables via @VAR@ when @ONLY is provided). -# To make ALL_CAPTURES visible to configure_file we set a temporary variable -# that configure_file can reference directly. set(EMIT_CAPTURES "${ALL_CAPTURES}") -# Note: the .in templates must use @EMIT_CAPTURES@ placeholder. configure_file(EmitFromONNX.cxx.in EmitFromONNX_all.cxx @ONLY) -# configure_file(EmitFromRoot.cxx.in EmitFromRoot_all.cxx @ONLY) configure_file(EmitFromONNX_GPU_ALPAKA.cxx.in EmitFromONNX_GPU_ALPAKA_all.cxx @ONLY) -# --- CPU emitter executable and test (unchanged) --- +# --- CPU emitter --- ROOTTEST_GENERATE_EXECUTABLE(emitFromONNX EmitFromONNX_all.cxx - LIBRARIES protobuf::libprotobuf SOFIE_core SOFIE_parsers - FIXTURES_SETUP sofie-compile-models-onnx-build) + LIBRARIES protobuf::libprotobuf SOFIE_core SOFIE_parsers + FIXTURES_SETUP sofie-compile-models-onnx-build) -# silence protobuf warnings seen in version 3.0 and 3.6. Not needed from protobuf version 3.17 target_compile_options(emitFromONNX PRIVATE -Wno-unused-parameter -Wno-array-bounds) -# IMPORTANT: ROOTTEST_ADD_TEST below used to expand ${onnx_file}/${fname} which are loop vars. -# We keep it as a single-test wrapper; per-file test invocation is added later for ALPAKA. ROOTTEST_ADD_TEST(SofieCompileModels_ONNX - COMMAND ${CMAKE_COMMAND} -E env ROOTIGNOREPREFIX=1 ./emitFromONNX ${onnx_file} ${CMAKE_CURRENT_BINARY_DIR}/${fname} + COMMAND ${CMAKE_COMMAND} -E env ROOTIGNOREPREFIX=1 ./emitFromONNX FIXTURES_REQUIRED sofie-compile-models-onnx-build FIXTURES_SETUP sofie-compile-models-onnx ) -# Creating a Google Test -if (BLAS_FOUND) # we need BLAS for compiling the models +# --- Custom model tests --- +if (BLAS_FOUND) ROOTTEST_GENERATE_EXECUTABLE(TestCustomModelsFromONNX TestCustomModelsFromONNX.cxx LIBRARIES MathCore @@ -85,71 +83,32 @@ if (BLAS_FOUND) # we need BLAS for compiling the models BLAS::BLAS GTest::gtest GTest::gtest_main - FIXTURES_REQUIRED - sofie-compile-models-onnx - FIXTURES_SETUP - sofie-test-models-onnx-build + FIXTURES_REQUIRED sofie-compile-models-onnx + FIXTURES_SETUP sofie-test-models-onnx-build ) + target_include_directories(TestCustomModelsFromONNX PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) + ROOTTEST_ADD_TEST(TestCustomModelsFromONNX - EXEC ./TestCustomModelsFromONNX - FIXTURES_REQUIRED sofie-test-models-onnx-build) + EXEC ./TestCustomModelsFromONNX + FIXTURES_REQUIRED sofie-test-models-onnx-build + ) endif() -# Skipping emitting from ROOT and further tests for now. -# For testing serialisation of RModel object -# ROOTTEST_GENERATE_EXECUTABLE(emitFromROOT EmitFromRoot_all.cxx -# LIBRARIES protobuf::libprotobuf RIO SOFIE_core SOFIE_parsers -# FIXTURES_SETUP sofie-compile-models-onnx-root -# ) - -# silence protobuf warnings seen in version 3.0 and 3.6. Not needed from protobuf version 3.17 -# target_compile_options(emitFromROOT PRIVATE -Wno-unused-parameter -Wno-array-bounds) - -# Automatic compilation of headers from root files -# ROOTTEST_ADD_TEST(SofieCompileModels_ROOT -# COMMAND ${CMAKE_COMMAND} -E env ROOTIGNOREPREFIX=1 ./emitFromROOT -# FIXTURES_REQUIRED sofie-compile-models-onnx-root -# FIXTURES_SETUP sofie-compile-models-root -# ) - -# if (BLAS_FOUND) -# # Creating a Google Test for Serialisation of RModel -# ROOTTEST_GENERATE_EXECUTABLE(TestCustomModelsFromROOT TestCustomModelsFromROOT.cxx -# LIBRARIES -# SOFIE_core -# BLAS::BLAS -# GTest::gtest -# GTest::gtest_main -# FIXTURES_REQUIRED -# sofie-compile-models-root -# FIXTURES_SETUP -# sofie-test-models-root-build -# ) -# target_include_directories(TestCustomModelsFromROOT PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) -# ROOTTEST_ADD_TEST(TestCustomModelsFromROOT -# EXEC ./TestCustomModelsFromROOT -# FIXTURES_REQUIRED sofie-test-models-root-build) -# endif() - -# Look for needed Python modules +# --- Python-based generators --- ROOT_FIND_PYTHON_MODULE(torch) if (ROOT_TORCH_FOUND) configure_file(Conv1dModelGenerator.py Conv1dModelGenerator.py COPYONLY) configure_file(Conv2dModelGenerator.py Conv2dModelGenerator.py COPYONLY) configure_file(Conv3dModelGenerator.py Conv3dModelGenerator.py COPYONLY) - configure_file(ConvTrans2dModelGenerator.py ConvTrans2dModelGenerator.py COPYONLY) - configure_file(LinearModelGenerator.py LinearModelGenerator.py COPYONLY) - configure_file(RecurrentModelGenerator.py RecurrentModelGenerator.py COPYONLY) + configure_file(ConvTrans2dModelGenerator.py ConvTrans2dModelGenerator.py COPYONLY) + configure_file(LinearModelGenerator.py LinearModelGenerator.py COPYONLY) + configure_file(RecurrentModelGenerator.py RecurrentModelGenerator.py COPYONLY) if (BLAS_FOUND) ROOT_ADD_GTEST(TestSofieModels TestSofieModels.cxx - LIBRARIES - SOFIE_core - SOFIE_parsers - BLAS::BLAS - INCLUDE_DIRS - ${CMAKE_CURRENT_BINARY_DIR} + LIBRARIES SOFIE_core SOFIE_parsers BLAS::BLAS + INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR} ) endif() endif() @@ -160,39 +119,131 @@ ROOT_ADD_TEST(tmva-sofie-EmitGNN COMMAND emitGNN) ROOT_EXECUTABLE(EmitGraphIndependent GNN/EmitGraphIndependent.cxx LIBRARIES SOFIE_core) ROOT_ADD_TEST(tmva-sofie-EmitGraphIndependent COMMAND EmitGraphIndependent) -# Generating inference code for heterogeneous testing using ALPAKA -ROOTTEST_GENERATE_EXECUTABLE(emitFromONNXAlpaka EmitFromONNX_GPU_ALPAKA_all.cxx +# ========================= +# ALPAKA TESTS +# ========================= +if (ENABLE_ALPAKA_TESTS) + + string(TOLOWER "${ALPAKA_BACKEND}" _alpaka_backend) + + if (NOT _alpaka_backend IN_LIST ALPAKA_BACKEND) + message(FATAL_ERROR + "Unsupported ALPAKA_BACKEND='${ALPAKA_BACKEND}'. " + "Valid values: cuda, cpu, hip, sycl") + endif() + + FetchContent_Declare( + sofieBLAS + GIT_REPOSITORY https://github.com/ML4EP/sofieBLAS + GIT_TAG edf2259876e9f4fb5a8f72db20b2dfb5dc26b517 + ) + FetchContent_MakeAvailable(sofieBLAS) + + FetchContent_Declare( + alpaka + GIT_REPOSITORY https://github.com/alpaka-group/alpaka + GIT_TAG 2fa91a34ed11b2076e474c5507d920e85cf9b79d + ) + FetchContent_MakeAvailable(alpaka) + + # --- ALPAKA emitter --- + ROOTTEST_GENERATE_EXECUTABLE(emitFromONNXAlpaka EmitFromONNX_GPU_ALPAKA_all.cxx LIBRARIES protobuf::libprotobuf SOFIE_core SOFIE_parsers - FIXTURES_SETUP sofie-compile-models-onnx-alpaka-build) - -# silence protobuf warnings seen in version 3.0 and 3.6. Not needed from protobuf version 3.17 -target_compile_options(emitFromONNXAlpaka PRIVATE -Wno-unused-parameter -Wno-array-bounds) - -# Add explicit per-file post-build runs of the alpaka emitter executable so that -# EmitFromONNXAlpaka is invoked for each detected .onnx file during the build. -# This avoids relying on a single ${onnx_file}/${fname} value that would -# otherwise expand only to the last entry when used outside the loop. - -if (ONNX_FILES) - foreach(onnx_file ${ONNX_FILES}) - get_filename_component(fname ${onnx_file} NAME_WE) - - # Create a post-build command attached to the emitFromONNXAlpaka target that - # will run the built binary with the current onnx file and the chosen outname. - add_custom_command(TARGET emitFromONNXAlpaka - POST_BUILD - COMMAND ${CMAKE_COMMAND} -E env ROOTIGNOREPREFIX=1 $ "${onnx_file}" "${CMAKE_CURRENT_BINARY_DIR}/${fname}" - COMMENT "Running ALPAKA emitter on ${onnx_file}" + FIXTURES_SETUP sofie-compile-models-onnx-alpaka-build + ) + + target_compile_options(emitFromONNXAlpaka PRIVATE -Wno-unused-parameter -Wno-array-bounds) + + if (ONNX_FILES) + foreach(onnx_file ${ONNX_FILES}) + get_filename_component(fname ${onnx_file} NAME_WE) + add_custom_command(TARGET emitFromONNXAlpaka POST_BUILD + COMMAND ${CMAKE_COMMAND} -E env ROOTIGNOREPREFIX=1 + $ + "${onnx_file}" "${CMAKE_CURRENT_BINARY_DIR}/${fname}" + COMMENT "Running ALPAKA emitter on ${onnx_file}") + endforeach() + endif() + + ROOTTEST_ADD_TEST(SofieCompileModels_ONNX_Alpaka + COMMAND ${CMAKE_COMMAND} -E env ROOTIGNOREPREFIX=1 ./emitFromONNXAlpaka + FIXTURES_REQUIRED sofie-compile-models-onnx-alpaka-build + FIXTURES_SETUP sofie-compile-models-onnx-alpaka + ) + + set(CXXFLAGS -O2 -g -DALPAKA_HAS_STD_ATOMIC_REF) + set(CXX_HOST_FLAGS -fPIC -pthread) + + # ---- Backend selection ---- + if (_alpaka_backend STREQUAL "cuda") + message(STATUS "Enabling Alpaka CUDA tests") + enable_language(CUDA) + find_package(CUDAToolkit REQUIRED) + + set(CUDA_ARCH "sm_86") + set(CXX_CUDA_FLAGS + -arch=${CUDA_ARCH} + -Wno-deprecated-gpu-targets + --extended-lambda + --expt-relaxed-constexpr) + + set_source_files_properties( + TestCustomModelsFromONNXForAlpakaCuda.cxx + PROPERTIES LANGUAGE CUDA ) - endforeach() -endif() -# Also add a ROOTTEST wrapper so that ctest can run one of the invocations if desired. -# (This remains mostly for CI / test harness compatibility.) -ROOTTEST_ADD_TEST(SofieCompileModels_ONNX_Alpaka - COMMAND ${CMAKE_COMMAND} -E env ROOTIGNOREPREFIX=1 ./emitFromONNXAlpaka ${ONNX_FILES} - FIXTURES_REQUIRED sofie-compile-models-onnx-alpaka-build - FIXTURES_SETUP sofie-compile-models-onnx-alpaka -) + ROOTTEST_GENERATE_EXECUTABLE( + TestCustomModelsFromONNXForAlpakaCuda + TestCustomModelsFromONNXForAlpakaCuda.cxx + LIBRARIES MathCore SOFIE_core GTest::gtest GTest::gtest_main + FIXTURES_REQUIRED sofie-compile-models-onnx-alpaka + FIXTURES_SETUP sofie-test-models-onnx-alpaka-build + ) + + target_include_directories( + TestCustomModelsFromONNXForAlpakaCuda PRIVATE + ${CMAKE_CURRENT_BINARY_DIR} + ${alpaka_SOURCE_DIR}/include + ${SOFIE_INCLUDE} + ${sofieblas_SOURCE_DIR}/include + ${ROOT_INCLUDE_DIRS} + ${CUDA_BASE}/include + ${CMAKE_CURRENT_SOURCE_DIR} + ) + + set_target_properties( + TestCustomModelsFromONNXForAlpakaCuda + PROPERTIES CUDA_SEPARABLE_COMPILATION ON + ) + + target_compile_definitions( + TestCustomModelsFromONNXForAlpakaCuda PRIVATE + ALPAKA_ACC_GPU_CUDA_ENABLED + ) + + target_link_directories( + TestCustomModelsFromONNXForAlpakaCuda PRIVATE + ${CUDA_BASE}/lib64 + ) + + target_link_libraries(TestCustomModelsFromONNXForAlpakaCuda + CUDA::cublas + CUDA::cublasLt + CUDA::cudart + ${ROOT_LIBRARIES} + ) + + ROOTTEST_ADD_TEST(TestCustomModelsFromONNXForAlpakaCuda + EXEC ./TestCustomModelsFromONNXForAlpakaCuda + FIXTURES_REQUIRED sofie-compile-models-onnx-alpaka + ) + + elseif (_alpaka_backend STREQUAL "cpu") + message(STATUS "Alpaka CPU backend selected (not yet implemented)") + elseif (_alpaka_backend STREQUAL "hip") + message(STATUS "Alpaka HIP backend selected (not yet implemented)") + elseif (_alpaka_backend STREQUAL "sycl") + message(STATUS "Alpaka SYCL backend selected (not yet implemented)") + endif() # backend -# End of CMakeLists.txt +endif() # ENABLE_ALPAKA_TESTS diff --git a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx new file mode 100644 index 0000000..017a19f --- /dev/null +++ b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx @@ -0,0 +1,156 @@ +#include +#include + +#include "Linear_16_FromONNX_GPU_ALPAKA.hxx" +#include "input_models/references/Linear_16.ref.hxx" + +#include "Linear_32_FromONNX_GPU_ALPAKA.hxx" +#include "input_models/references/Linear_32.ref.hxx" + +#include "Linear_64_FromONNX_GPU_ALPAKA.hxx" +#include "input_models/references/Linear_64.ref.hxx" + +#include +#include +#include +#include "gtest/gtest.h" + +constexpr float DEFAULT_TOLERANCE = 1e-3f; + +using Idx = std::size_t; +using Dim = alpaka::DimInt<1>; +using Ext1D = alpaka::Vec; + +class SofieAlpakaTest : public ::testing::Test { +protected: + // Shared devices and platforms + alpaka::PlatformCpu hostPlatform; + alpaka::DevCpu host; + alpaka::PlatformCudaRt platform; + alpaka::DevCudaRt device; + alpaka::Queue queue; + + SofieAlpakaTest() + : hostPlatform{} + , host(alpaka::getDevByIdx(hostPlatform, 0u)) + , platform{} + , device(alpaka::getDevByIdx(platform, 0u)) + , queue(device) + { + } + + void SetUp() override { + cudaDeviceSynchronize(); + } + + void TearDown() override { + alpaka::wait(queue); + cudaDeviceSynchronize(); + } + + ~SofieAlpakaTest() override { + cudaDeviceSynchronize(); + } +}; + +TEST_F(SofieAlpakaTest, Linear16) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + auto A = alpaka::allocBuf(host, Ext1D::all(Idx{1600})); + float *A_ptr = reinterpret_cast(alpaka::getPtrNative(A)); + + for (Idx i = 0; i < 1600; ++i) { + A_ptr[i] = 1.0; + } + + auto A_d = alpaka::allocBuf(device, Ext1D::all(Idx{1600})); + alpaka::memcpy(queue, A_d, A); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{160})); + + { + SOFIE_Linear_16::Session session("Linear_16_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(A_d); + + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + cudaDeviceSynchronize(); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float *correct = Linear_16_ExpectedOutput::all_ones; + + for (size_t i = 0; i < 160; ++i) { + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE); + } +} + +TEST_F(SofieAlpakaTest, Linear32) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + auto A = alpaka::allocBuf(host, Ext1D::all(Idx{1600})); + float *A_ptr = reinterpret_cast(alpaka::getPtrNative(A)); + + for (Idx i = 0; i < 1600; ++i) { + A_ptr[i] = 1.0; + } + + auto A_d = alpaka::allocBuf(device, Ext1D::all(Idx{1600})); + alpaka::memcpy(queue, A_d, A); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{160})); + + { + SOFIE_Linear_32::Session session("Linear_32_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(A_d); + + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + cudaDeviceSynchronize(); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float *correct = Linear_32_ExpectedOutput::all_ones; + + for (size_t i = 0; i < 160; ++i) { + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE); + } +} + +TEST_F(SofieAlpakaTest, Linear64) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + auto A = alpaka::allocBuf(host, Ext1D::all(Idx{1600})); + float *A_ptr = reinterpret_cast(alpaka::getPtrNative(A)); + + for (Idx i = 0; i < 1600; ++i) { + A_ptr[i] = 1.0; + } + + auto A_d = alpaka::allocBuf(device, Ext1D::all(Idx{1600})); + alpaka::memcpy(queue, A_d, A); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{160})); + + { + SOFIE_Linear_64::Session session("Linear_64_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(A_d); + + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + cudaDeviceSynchronize(); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float *correct = Linear_64_ExpectedOutput::all_ones; + + for (size_t i = 0; i < 160; ++i) { + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE); + } +} From 815a80c4e96f72d86fa348232bb4b4ee3eae6f2c Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Mon, 26 Jan 2026 12:52:36 +0100 Subject: [PATCH 21/43] feat: test cases for leaky relu operator --- .../inc/SOFIE/ROperator_LeakyRelu.hxx | 34 +++-- src/SOFIE_core/src/RModel_ALPAKA.cxx | 12 +- src/SOFIE_core/test/CMakeLists.txt | 128 +++++++++--------- .../test/TestCustomModelsFromONNX.cxx | 2 +- .../TestCustomModelsFromONNXForAlpakaCuda.cxx | 51 +++++++ 5 files changed, 142 insertions(+), 85 deletions(-) diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_LeakyRelu.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_LeakyRelu.hxx index 02eca17..7d12228 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_LeakyRelu.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_LeakyRelu.hxx @@ -75,22 +75,23 @@ public: return out.str(); } - std::string Generate_GPU_Kernel_ALPAKA() { + std::string Generate_GPU_Kernel_ALPAKA(std::string /*opName*/) override { std::string op; op = "\n//------ LEAKY_RELU_KERNEL_ALPAKA\n"; - op += SP + "struct LeakyReluKernel {\n"; - op += SP + SP + "template\n"; - op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, T* data, std::size_t numElements, T alpha = static_cast(0.01)) const {\n"; - op += SP + SP + SP + "for (auto i : alpaka::uniformElements(acc, numElements)) {\n"; - op += SP + SP + SP + SP + "data[i] = (data[i] < static_cast(0)) ? alpha * data[i] : data[i];\n"; - op += SP + SP + SP + "}\n"; + op += "struct LeakyReluKernel {\n"; + op += SP + "template\n"; + op += SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, T const* __restrict__ data, T* __restrict__ out, std::size_t numElements, T alpha) const {\n"; + op += SP + SP + "const auto idx = alpaka::getIdx(acc)[0];\n"; + op += SP + "if(idx < numElements) {\n"; + op += SP + SP + "out[idx] = data[idx] >= 0 ? data[idx] : alpha * data[idx];\n"; op += SP + SP + "}\n"; - op += SP + "};\n"; + op += SP + "}\n"; + op += "};\n"; return op; } std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string /*opName*/) override { - return SP + "LeakyReluKernel leakyReluKernel;\n"; + return "LeakyReluKernel leakyReluKernel;\n"; } std::string Generate_GPU_ALPAKA(std::string OpName) override { @@ -99,17 +100,20 @@ public: throw std::runtime_error("TMVA SOFIE Operator LeakyRelu called to Generate without being initialized first"); } + + std::stringstream out; auto length = ConvertShapeToLength(fShape); out << "\n//------ LEAKY_RELU_GPU_ALPAKA\n"; - out << SP << "alpaka::WorkDivMembers workDiv_" << fNX - << "(alpaka::Vec::all((" << length << " + 256 - 1) / 256), " - << "alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; - + out << SP << "constexpr float " << OpName << "_alpha = " << std::setprecision(std::numeric_limits::max_digits10) << falpha << ";\n"; + out << SP << "auto const elementsPerThread_"<(1));\n"; + out << SP << "auto const elementsPerGrid_"< const kernelCfg_" << fNX << " = {elementsPerGrid_" << fNX << ", elementsPerThread_" << fNX << "};\n"; + out << SP << "auto const workDiv_" << fNX << " = alpaka::getValidWorkDiv(kernelCfg_" << fNX << ", devAcc, leakyReluKernel, alpaka::getPtrNative(deviceBuf_" << fNX + << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), static_cast(" << length << "), " << OpName << "_alpha);\n"; out << SP << "alpaka::exec(queue, workDiv_" << fNX << ", leakyReluKernel, alpaka::getPtrNative(deviceBuf_" << fNX - << "), static_cast(" << length << "), static_cast(0.01));\n"; - + << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), static_cast(" << length << "), " << OpName << "_alpha);\n"; return out.str(); } diff --git a/src/SOFIE_core/src/RModel_ALPAKA.cxx b/src/SOFIE_core/src/RModel_ALPAKA.cxx index 3e0be79..9ff300a 100644 --- a/src/SOFIE_core/src/RModel_ALPAKA.cxx +++ b/src/SOFIE_core/src/RModel_ALPAKA.cxx @@ -13,10 +13,6 @@ namespace SOFIE { void RModel::GenerateInitializedTensorInfo_GPU_ALPAKA() { if (!fInitializedTensors.empty()){ fGC += "\n// initialized tensors for weights\n"; - fGC += "using BufF1D = alpaka::Buf;\n"; - fGC += "using BufD1D = alpaka::Buf;\n"; - fGC += "using BufI641D = alpaka::Buf;\n"; - } for (auto &i : fInitializedTensors) { @@ -238,9 +234,13 @@ void RModel::GenerateSessionCode_GPU_ALPAKA() { fGC += "using Idx = std::size_t;\n"; fGC += "using Dim = alpaka::DimInt<1>;\n"; fGC += "using Acc = alpaka::TagToAcc;\n"; - fGC += "using DevAcc = alpaka::Dev;\n"; + fGC += "using DevAcc = alpaka::Dev;\n\n"; fGC += "using QueueProperty = alpaka::NonBlocking;\n"; - fGC += "using QueueAcc = alpaka::Queue;\n"; + fGC += "using QueueAcc = alpaka::Queue;\n\n"; + fGC += "using BufF1D = alpaka::Buf;\n"; + fGC += "using BufD1D = alpaka::Buf;\n"; + fGC += "using BufI641D = alpaka::Buf;\n\n"; + fGC += "\nalpaka::Platform const platform{};\n"; fGC += "DevAcc devAcc = alpaka::getDevByIdx(platform, 0);\n"; fGC += "alpaka::PlatformCpu platformHost{};\n"; diff --git a/src/SOFIE_core/test/CMakeLists.txt b/src/SOFIE_core/test/CMakeLists.txt index 5d5667a..76e5e29 100644 --- a/src/SOFIE_core/test/CMakeLists.txt +++ b/src/SOFIE_core/test/CMakeLists.txt @@ -62,62 +62,62 @@ configure_file(EmitFromONNX.cxx.in EmitFromONNX_all.cxx @ONLY) configure_file(EmitFromONNX_GPU_ALPAKA.cxx.in EmitFromONNX_GPU_ALPAKA_all.cxx @ONLY) # --- CPU emitter --- -ROOTTEST_GENERATE_EXECUTABLE(emitFromONNX EmitFromONNX_all.cxx - LIBRARIES protobuf::libprotobuf SOFIE_core SOFIE_parsers - FIXTURES_SETUP sofie-compile-models-onnx-build) - -target_compile_options(emitFromONNX PRIVATE -Wno-unused-parameter -Wno-array-bounds) - -ROOTTEST_ADD_TEST(SofieCompileModels_ONNX - COMMAND ${CMAKE_COMMAND} -E env ROOTIGNOREPREFIX=1 ./emitFromONNX - FIXTURES_REQUIRED sofie-compile-models-onnx-build - FIXTURES_SETUP sofie-compile-models-onnx -) - -# --- Custom model tests --- -if (BLAS_FOUND) - ROOTTEST_GENERATE_EXECUTABLE(TestCustomModelsFromONNX TestCustomModelsFromONNX.cxx - LIBRARIES - MathCore - SOFIE_core - BLAS::BLAS - GTest::gtest - GTest::gtest_main - FIXTURES_REQUIRED sofie-compile-models-onnx - FIXTURES_SETUP sofie-test-models-onnx-build - ) - - target_include_directories(TestCustomModelsFromONNX PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) - - ROOTTEST_ADD_TEST(TestCustomModelsFromONNX - EXEC ./TestCustomModelsFromONNX - FIXTURES_REQUIRED sofie-test-models-onnx-build - ) -endif() - -# --- Python-based generators --- -ROOT_FIND_PYTHON_MODULE(torch) -if (ROOT_TORCH_FOUND) - configure_file(Conv1dModelGenerator.py Conv1dModelGenerator.py COPYONLY) - configure_file(Conv2dModelGenerator.py Conv2dModelGenerator.py COPYONLY) - configure_file(Conv3dModelGenerator.py Conv3dModelGenerator.py COPYONLY) - configure_file(ConvTrans2dModelGenerator.py ConvTrans2dModelGenerator.py COPYONLY) - configure_file(LinearModelGenerator.py LinearModelGenerator.py COPYONLY) - configure_file(RecurrentModelGenerator.py RecurrentModelGenerator.py COPYONLY) - - if (BLAS_FOUND) - ROOT_ADD_GTEST(TestSofieModels TestSofieModels.cxx - LIBRARIES SOFIE_core SOFIE_parsers BLAS::BLAS - INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR} - ) - endif() -endif() - -ROOT_EXECUTABLE(emitGNN GNN/EmitGNN.cxx LIBRARIES SOFIE_core) -ROOT_ADD_TEST(tmva-sofie-EmitGNN COMMAND emitGNN) - -ROOT_EXECUTABLE(EmitGraphIndependent GNN/EmitGraphIndependent.cxx LIBRARIES SOFIE_core) -ROOT_ADD_TEST(tmva-sofie-EmitGraphIndependent COMMAND EmitGraphIndependent) +# ROOTTEST_GENERATE_EXECUTABLE(emitFromONNX EmitFromONNX_all.cxx +# LIBRARIES protobuf::libprotobuf SOFIE_core SOFIE_parsers +# FIXTURES_SETUP sofie-compile-models-onnx-build) + +# target_compile_options(emitFromONNX PRIVATE -Wno-unused-parameter -Wno-array-bounds) + +# ROOTTEST_ADD_TEST(SofieCompileModels_ONNX +# COMMAND ${CMAKE_COMMAND} -E env ROOTIGNOREPREFIX=1 ./emitFromONNX +# FIXTURES_REQUIRED sofie-compile-models-onnx-build +# FIXTURES_SETUP sofie-compile-models-onnx +# ) + +# # --- Custom model tests --- +# if (BLAS_FOUND) +# ROOTTEST_GENERATE_EXECUTABLE(TestCustomModelsFromONNX TestCustomModelsFromONNX.cxx +# LIBRARIES +# MathCore +# SOFIE_core +# BLAS::BLAS +# GTest::gtest +# GTest::gtest_main +# FIXTURES_REQUIRED sofie-compile-models-onnx +# FIXTURES_SETUP sofie-test-models-onnx-build +# ) + +# target_include_directories(TestCustomModelsFromONNX PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) + +# ROOTTEST_ADD_TEST(TestCustomModelsFromONNX +# EXEC ./TestCustomModelsFromONNX +# FIXTURES_REQUIRED sofie-test-models-onnx-build +# ) +# endif() + +# # --- Python-based generators --- +# ROOT_FIND_PYTHON_MODULE(torch) +# if (ROOT_TORCH_FOUND) +# configure_file(Conv1dModelGenerator.py Conv1dModelGenerator.py COPYONLY) +# configure_file(Conv2dModelGenerator.py Conv2dModelGenerator.py COPYONLY) +# configure_file(Conv3dModelGenerator.py Conv3dModelGenerator.py COPYONLY) +# configure_file(ConvTrans2dModelGenerator.py ConvTrans2dModelGenerator.py COPYONLY) +# configure_file(LinearModelGenerator.py LinearModelGenerator.py COPYONLY) +# configure_file(RecurrentModelGenerator.py RecurrentModelGenerator.py COPYONLY) + +# if (BLAS_FOUND) +# ROOT_ADD_GTEST(TestSofieModels TestSofieModels.cxx +# LIBRARIES SOFIE_core SOFIE_parsers BLAS::BLAS +# INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR} +# ) +# endif() +# endif() + +# ROOT_EXECUTABLE(emitGNN GNN/EmitGNN.cxx LIBRARIES SOFIE_core) +# ROOT_ADD_TEST(tmva-sofie-EmitGNN COMMAND emitGNN) + +# ROOT_EXECUTABLE(EmitGraphIndependent GNN/EmitGraphIndependent.cxx LIBRARIES SOFIE_core) +# ROOT_ADD_TEST(tmva-sofie-EmitGraphIndependent COMMAND EmitGraphIndependent) # ========================= # ALPAKA TESTS @@ -192,13 +192,15 @@ if (ENABLE_ALPAKA_TESTS) PROPERTIES LANGUAGE CUDA ) - ROOTTEST_GENERATE_EXECUTABLE( - TestCustomModelsFromONNXForAlpakaCuda - TestCustomModelsFromONNXForAlpakaCuda.cxx - LIBRARIES MathCore SOFIE_core GTest::gtest GTest::gtest_main - FIXTURES_REQUIRED sofie-compile-models-onnx-alpaka - FIXTURES_SETUP sofie-test-models-onnx-alpaka-build - ) + +ROOTTEST_GENERATE_EXECUTABLE( + TestCustomModelsFromONNXForAlpakaCuda + TestCustomModelsFromONNXForAlpakaCuda.cxx + LIBRARIES MathCore SOFIE_core GTest::gtest GTest::gtest_main + FIXTURES_REQUIRED sofie-compile-models-onnx-alpaka + FIXTURES_SETUP sofie-test-models-onnx-alpaka-build +) + target_include_directories( TestCustomModelsFromONNXForAlpakaCuda PRIVATE diff --git a/src/SOFIE_core/test/TestCustomModelsFromONNX.cxx b/src/SOFIE_core/test/TestCustomModelsFromONNX.cxx index ba9a42a..14eb6a3 100644 --- a/src/SOFIE_core/test/TestCustomModelsFromONNX.cxx +++ b/src/SOFIE_core/test/TestCustomModelsFromONNX.cxx @@ -812,7 +812,7 @@ TEST(ONNX, LinearWithLeakyRelu) { constexpr float TOLERANCE = 1; - // Preparing the standard all-ones input + // Preparing input std::vector input({ 0.4369, -0.6882, 1.0309, -1.0263, -0.1519, 1.2237, -0.7054, -0.1762, -0.6811, -2.2597, 1.0388, -0.7993, 0.1468, 1.3257, -0.4714, -0.0958, diff --git a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx index 017a19f..fc2f154 100644 --- a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx +++ b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx @@ -10,6 +10,9 @@ #include "Linear_64_FromONNX_GPU_ALPAKA.hxx" #include "input_models/references/Linear_64.ref.hxx" +#include "LinearWithLeakyRelu_FromONNX_GPU_ALPAKA.hxx" +#include "input_models/references/LinearWithLeakyRelu.ref.hxx" + #include #include #include @@ -154,3 +157,51 @@ TEST_F(SofieAlpakaTest, Linear64) EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE); } } + +TEST_F(SofieAlpakaTest, LinearWithLeakyRelu) +{ + alpaka::PlatformCpu hostPlatform{}; + auto host = alpaka::getDevByIdx(hostPlatform, 0u); + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + alpaka::PlatformCudaRt platform{}; + alpaka::DevCudaRt device = alpaka::getDevByIdx(platform, 0u); + alpaka::Queue queue{device}; + + std::vector input({ + 0.4369, -0.6882, 1.0309, -1.0263, -0.1519, 1.2237, -0.7054, -0.1762, + -0.6811, -2.2597, 1.0388, -0.7993, 0.1468, 1.3257, -0.4714, -0.0958, + 0.7057, -0.3749, -0.3310, 0.0986, -0.1370, 0.0832, -1.6465, -0.2793 + }); + + auto A = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float *A_ptr = reinterpret_cast(alpaka::getPtrNative(A)); + + for (Idx i = 0; i < input.size(); ++i) { + A_ptr[i] = input[i]; + } + + auto A_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, A_d, A); + alpaka::wait(queue); + cudaDeviceSynchronize(); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{24})); + + { + SOFIE_LinearWithLeakyRelu::Session session; + auto result = session.infer(A_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float *correct = LinearWithLeakyRelu_ExpectedOutput::outputs; + + for (size_t i = 0; i < 24; ++i) { + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE); + } +} From 671b4b035566b6e8b1243c62e2f3beecc41b648a Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Mon, 26 Jan 2026 13:26:23 +0100 Subject: [PATCH 22/43] fix: sigmoid operator gpu implementation and test --- .../inc/SOFIE/ROperator_LeakyRelu.hxx | 2 - .../inc/SOFIE/ROperator_Sigmoid.hxx | 25 ++++++------ .../TestCustomModelsFromONNXForAlpakaCuda.cxx | 39 ++++++++++++++++++- 3 files changed, 51 insertions(+), 15 deletions(-) diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_LeakyRelu.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_LeakyRelu.hxx index 7d12228..0f3b699 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_LeakyRelu.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_LeakyRelu.hxx @@ -100,8 +100,6 @@ public: throw std::runtime_error("TMVA SOFIE Operator LeakyRelu called to Generate without being initialized first"); } - - std::stringstream out; auto length = ConvertShapeToLength(fShape); out << "\n//------ LEAKY_RELU_GPU_ALPAKA\n"; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx index f2e2e25..5edbcf9 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx @@ -61,14 +61,15 @@ public: return out.str(); } - std::string Generate_GPU_Kernel_ALPAKA() { + std::string Generate_GPU_Kernel_ALPAKA(std::string /*opName*/) override { std::string op; op = "\n//------ SIGMOID_KERNEL_ALPAKA\n"; - op += SP + "struct SigmoidKernel {\n"; - op += SP + SP + "template\n"; - op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, T* data, std::size_t numElements) const {\n"; - op += SP + SP + SP + "for (auto i : alpaka::uniformElements(acc, numElements)) {\n"; - op += SP + SP + SP + SP + "data[i] = static_cast(1) / (static_cast(1) + exp(-data[i]));\n"; + op += "struct SigmoidKernel {\n"; + op += SP + "template\n"; + op += SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, T const* __restrict__ data, T* __restrict__ out, std::size_t numElements) const {\n"; + op += SP + SP + "const auto idx = alpaka::getIdx(acc)[0];\n"; + op += SP + SP + "if(idx < numElements) {\n"; + op += SP + SP + SP + SP + "out[idx] = static_cast(1) / (static_cast(1) + exp(-data[idx]));\n"; op += SP + SP + SP + "}\n"; op += SP + SP + "}\n"; op += SP + "};\n"; @@ -89,14 +90,14 @@ public: std::stringstream out; auto length = ConvertShapeToLength(fShape); out << "\n//------ SIGMOID_GPU_ALPAKA\n"; - out << SP << "alpaka::WorkDivMembers workDiv_" << fNX - << "(alpaka::Vec::all((" << length << " + 256 - 1) / 256), " - << "alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; - + out << SP << "auto const elementsPerThread_"<(1));\n"; + out << SP << "auto const elementsPerGrid_"< const kernelCfg_" << fNX << " = {elementsPerGrid_" << fNX << ", elementsPerThread_" << fNX << "};\n"; + out << SP << "auto const workDiv_" << fNX << " = alpaka::getValidWorkDiv(kernelCfg_" << fNX << ", devAcc, sigmoidKernel, alpaka::getPtrNative(deviceBuf_" << fNX + << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), static_cast(" << length << "));\n"; out << SP << "alpaka::exec(queue, workDiv_" << fNX << ", sigmoidKernel, alpaka::getPtrNative(deviceBuf_" << fNX - << "), static_cast(" << length << "));\n"; - + << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), static_cast(" << length << "));\n"; return out.str(); } diff --git a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx index fc2f154..1537ea4 100644 --- a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx +++ b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx @@ -13,6 +13,9 @@ #include "LinearWithLeakyRelu_FromONNX_GPU_ALPAKA.hxx" #include "input_models/references/LinearWithLeakyRelu.ref.hxx" +#include "LinearWithSigmoid_FromONNX_GPU_ALPAKA.hxx" +#include "input_models/references/LinearWithSigmoid.ref.hxx" + #include #include #include @@ -184,7 +187,6 @@ TEST_F(SofieAlpakaTest, LinearWithLeakyRelu) auto A_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); alpaka::memcpy(queue, A_d, A); alpaka::wait(queue); - cudaDeviceSynchronize(); auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{24})); @@ -205,3 +207,38 @@ TEST_F(SofieAlpakaTest, LinearWithLeakyRelu) EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE); } } + +TEST_F(SofieAlpakaTest, LinearWithSigmoid) +{ + + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + auto A = alpaka::allocBuf(host, Ext1D::all(Idx{48})); + float *A_ptr = reinterpret_cast(alpaka::getPtrNative(A)); + + for (Idx i = 0; i < 48; ++i) { + A_ptr[i] = 1.0; + } + + auto A_d = alpaka::allocBuf(device, Ext1D::all(Idx{48})); + alpaka::memcpy(queue, A_d, A); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{48})); + + { + SOFIE_LinearWithSigmoid::Session session("LinearWithSigmoid_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(A_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float *correct = LinearWithSigmoid_ExpectedOutput::all_ones; + for (size_t i = 0; i < 24; ++i) { + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE); + } +} From 2fe07bbc543acf278eca790c485e3ea3eab25d01 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Tue, 3 Mar 2026 15:30:17 +0100 Subject: [PATCH 23/43] feat: Support for heterogeneous inference of transpose operator Co-authored-by: Saransh Chopra Co-authored-by: Francesco Derme Co-authored-by: PietroFumagalli --- .../inc/SOFIE/ROperator_BasicBinary.hxx | 147 +++++----- src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx | 17 +- .../inc/SOFIE/ROperator_LeakyRelu.hxx | 6 +- .../inc/SOFIE/ROperator_Transpose.hxx | 48 +++- src/SOFIE_core/src/RModel_ALPAKA.cxx | 9 +- src/SOFIE_core/test/CMakeLists.txt | 204 +++++--------- .../TestCustomModelsFromONNXForAlpakaCuda.cxx | 253 +++++++++++------- .../test/input_models/Transpose.onnx | Bin 0 -> 156 bytes 8 files changed, 352 insertions(+), 332 deletions(-) create mode 100644 src/SOFIE_core/test/input_models/Transpose.onnx diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx index 2d0e6cb..012905d 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx @@ -394,84 +394,75 @@ public: op = "\n//------ "+opName+"_"+BinaryOperatorTrait::Name()+"_KERNEL_ALPAKA\n"; op += SP + "struct Binary"+BinaryOperatorTrait::Name()+"Kernel {\n"; op += SP + SP + "template\n"; - op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, T const * A, T const * B, T * C,\n"; - for( size_t i=0; i(acc)[0];\n"; + op += SP + SP + SP + "if (idx < " + ConvertShapeToLength(fShapeY) + ") {\n"; + auto stridesA = UTILITY::ComputeStrideFromShape(fShapeA); + auto stridesB = UTILITY::ComputeStrideFromShape(fShapeB); + + for(size_t id_s = 0; id_s < stridesA.size(); ++id_s){ + if(fShapeA[id_s] == 1) + stridesA[id_s] = 0; } - op += SP + SP + SP + ") const{\n"; - op += SP + SP + SP + SP + "auto elements = alpaka::uniformElementsND(acc, alpaka::Vec<" + std::to_string(fDimShapeY.size()) + ", std::size_t>("; - for (size_t i = 0; i < fDimShapeY.size(); i++) { - op += "size_" + std::to_string(i); + + for(size_t id_s = 0; id_s < stridesB.size(); ++id_s){ + if(fShapeB[id_s] == 1) + stridesB[id_s] = 0; } - op.pop_back(); - op += "));\n"; - op += SP + SP + SP + SP + "for (auto const& elem : elements) {\n"; - auto stridesA = UTILITY::ComputeStrideFromShape(fDimShapeA); - auto stridesB = UTILITY::ComputeStrideFromShape(fDimShapeB); - auto stridesY = UTILITY::ComputeStrideFromShape(fDimShapeY); + auto stridesY = UTILITY::ComputeStrideFromShape(fShapeY); - std::string compute_idx_A, compute_idx_B, compute_idx_Y; - if (fDimShapeA.empty() || - std::all_of(fDimShapeA.begin(), fDimShapeA.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) { - compute_idx_A = "0"; - } else { - for (size_t i = 0; i < fDimShapeA.size(); ++i) { - if (fDimShapeA[i].dim == 1 || fDimShapeA[i].GetVal() == "1") - continue; - compute_idx_A += "elem[" + std::to_string(i + (fDimShapeY.size() - fDimShapeA.size())) + "]"; - if (stridesA[i].GetVal() != "1") - compute_idx_A += " * " + stridesA[i].GetVal(); - compute_idx_A += " + "; - } - // remove last 3 character " + " - for (int j = 0; j < 3; j++) - compute_idx_A.pop_back(); - } - if (fDimShapeB.empty() || - std::all_of(fDimShapeB.begin(), fDimShapeB.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) { - compute_idx_B = "0"; - } else { - for (size_t i = 0; i < fDimShapeB.size(); ++i) { - if (fDimShapeB[i].dim == 1 || fDimShapeB[i].GetVal() == "1") - continue; - compute_idx_B += "elem[" + std::to_string(i + (fDimShapeY.size() - fDimShapeB.size())) + "]"; - if (stridesB[i].GetVal() != "1") - compute_idx_B += " * " + stridesB[i].GetVal(); - compute_idx_B += " + "; - } - // remove last 3 character " + " - for (int j = 0; j < 3; j++) - compute_idx_B.pop_back(); - } - int nloop = 0; - if (fDimShapeY.empty() || - std::all_of(fDimShapeY.begin(), fDimShapeY.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) { - compute_idx_Y = "0"; - } else { - for (size_t i = 0; i < fDimShapeY.size(); ++i) { - if (fDimShapeY[i].dim != 1 && fDimShapeY[i].GetVal() != "1") { - nloop++; - for (int j = 0; j < nloop; j++) op += SP; - compute_idx_Y += "elem[" + std::to_string(i) + "]"; - if (stridesY[i].GetVal() != "1") - compute_idx_Y += " * " + stridesY[i].GetVal(); - compute_idx_Y += " + "; - } - } - // remove last 3 characters " + " - for (int j = 0; j < 3; j++) - compute_idx_Y.pop_back(); + std::string flattened_index_A = ""; + std::string flattened_index_B = ""; + std::string temp = "idx"; + + op += "// stridesY " + ConvertShapeToString(stridesY) + "\n"; + op += "// stridesA " + ConvertShapeToString(stridesA) + "\n"; + op += "// stridesB " + ConvertShapeToString(stridesB) + "\n"; + + for (size_t id_s = 0; id_s < fShapeA.size(); ++id_s) { + + auto strideY = stridesY[id_s]; + auto strideA = stridesA[id_s]; + + // coord expression + std::string coord = "(int)(" + temp + " / " + std::to_string(strideY) + ")"; + + // accumulate into final index + flattened_index_A += coord + " * " + std::to_string(strideA) + " + "; + + // update temp correctly + temp = temp + " - (" + coord + " * " + std::to_string(strideY) + ")"; } - for (int j = 0; j < nloop + 1; j++) op += SP; - op += "C[" + compute_idx_Y + "] = " - + BinaryOperatorTrait::Op("A[" + compute_idx_A + "]", - "B[" + compute_idx_B + "]") - + " ;\n"; - for (int i = nloop; i > 0; i--) { - for (int j = 0; j < i; j++) op += SP; - op += "}\n"; + + // remove trailing " + " + if (!flattened_index_A.empty()) + flattened_index_A.erase(flattened_index_A.size() - 3); + + temp = "idx"; + + for (size_t id_s = 0; id_s < fShapeB.size(); ++id_s) { + + auto strideY = stridesY[id_s]; + auto strideB = stridesB[id_s]; + + // coord expression + std::string coord = "(int)(" + temp + " / " + std::to_string(strideY) + ")"; + + // accumulate into final index + flattened_index_B += coord + " * " + std::to_string(strideB) + " + "; + + // update temp correctly + temp = temp + " - (" + coord + " * " + std::to_string(strideY) + ")"; } + + // remove trailing " + " + if (!flattened_index_B.empty()) + flattened_index_B.erase(flattened_index_B.size() - 3); + + + op += "C[idx] = " + BinaryOperatorTrait::Op("A["+flattened_index_A+"]", "B["+flattened_index_B+"]") + ";\n"; + op += "}\n}\n};\n"; return op; } @@ -479,7 +470,7 @@ public: if (fIsOutputConstant) return ""; - return SP + "Binary"+BinaryOperatorTrait::Name()+"Kernel " + OpName + "Kernel;\n"; + return SP + "Binary"+BinaryOperatorTrait::Name()+"Kernel binary" + OpName + "Kernel;\n"; } std::string Generate_GPU_ALPAKA(std::string OpName) { @@ -492,8 +483,14 @@ public: std::stringstream out; auto length = ConvertDimShapeToLength(fDimShapeY); out << "\n//------ "+OpName+"_ALPAKA\n"; - out << SP << "alpaka::WorkDivMembers workDiv_"<::all("<<(stoi(length)+256-1)/256<<"), alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; - out << SP << "alpaka::exec(queue, workDiv_" << fNY << ", " << OpName << "Kernel, alpaka::getPtrNative(deviceBuf_" << fNA << "), alpaka::getPtrNative(deviceBuf_"<(1));\n"; + out << SP << "auto const elementsPerGrid_"< const kernelCfg_" << fNY << " = {elementsPerGrid_" << fNY << ", elementsPerThread_" << fNY << "};\n"; + out << SP << "auto const workDiv_" << fNY << " = alpaka::getValidWorkDiv(kernelCfg_" << fNY << ", devAcc, binary" << OpName << "Kernel, alpaka::getPtrNative(deviceBuf_" << fNA + << "), alpaka::getPtrNative(deviceBuf_" << fNB << "), alpaka::getPtrNative(deviceBuf_" << fNY << "));\n"; + out << SP << "alpaka::exec(queue, workDiv_" << fNY + << ", binary" << OpName << "Kernel, alpaka::getPtrNative(deviceBuf_" << fNA + << "), alpaka::getPtrNative(deviceBuf_" << fNB << "), alpaka::getPtrNative(deviceBuf_" << fNY << "));\n"; return out.str(); } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx index 1c43724..978685b 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx @@ -303,15 +303,13 @@ namespace SOFIE{ // here in case of parametric shape we need to assume that the parameters will be defined in the initialization code. auto targetShape = fShapeY; // include a separate scope to avoid defining unique operator temp variables + auto length = SOFIE::ConvertDimShapeToLength(fShapeY); // output size out << "//--- broadcast bias tensor " << fNC << "for Gemm op\n"; out << SP << "{\n"; - out << " float * data = SOFIE::UTILITY::UnidirectionalBroadcast(tensor_" - << fNC << ".data()," << ConvertShapeToString(fShapeC) << ", " << ConvertDimShapeToString(fShapeY) << ");\n"; - auto length = SOFIE::ConvertDimShapeToLength(fShapeY); // output size - out << SP << SP << "auto hostBuf_"<< fNC2 << " = alpaka::allocBuf(hostAcc, Ext1D::all(Idx{" << length << "}));\n"; - out << SP << SP << "std::memcpy(alpaka::getPtrNative(hostBuf_"<< fNC2 <<"), data, "<< length << " * sizeof(float));\n"; + out << " std::vector data(" << length << ");\n"; + out << " SOFIE::UTILITY::UnidirectionalBroadcast(tensor_" << fNC << ".data()," << ConvertShapeToString(fShapeC) << ", " << ConvertDimShapeToString(fShapeY) << ", " << "std::span(data));\n"; + out << SP << SP << "auto hostBuf_"<< fNC2 << " = alpaka::createView(hostAcc, data);\n"; out << SP << SP << "alpaka::memcpy(queue, deviceBuf_"<< fNC2 << ", hostBuf_"<< fNC2 << ");\n"; - out << SP << SP << "delete [] data;\n"; out << SP << "}\n"; } return out.str(); @@ -489,7 +487,7 @@ namespace SOFIE{ if (fActivation == EActivationType::RELU){ out << SP << "blas.gemmrelu("<\n"; op += SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, T const* __restrict__ data, T* __restrict__ out, std::size_t numElements, T alpha) const {\n"; - op += SP + SP + "const auto idx = alpaka::getIdx(acc)[0];\n"; - op += SP + "if(idx < numElements) {\n"; - op += SP + SP + "out[idx] = data[idx] >= 0 ? data[idx] : alpha * data[idx];\n"; + op += SP + SP + SP + "auto idx = alpaka::getIdx(acc)[0];\n"; + op += SP + SP + SP + "if (idx < numElements) {\n"; + op += SP + SP + SP + "out[idx] = data[idx] >= T(0) ? data[idx] : alpha * data[idx];\n"; op += SP + SP + "}\n"; op += SP + "}\n"; op += "};\n"; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx index 090d2d8..92a2eee 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx @@ -165,18 +165,56 @@ public: return out.str(); } - std::string Generate_GPU_Kernel_ALPAKA() { + std::string Generate_GPU_Kernel_ALPAKA(std::string OpName) { std::string op; + OpName = "op_" + OpName; op = "\n//------ TRANSPOSE_KERNEL_ALPAKA\n"; - op += SP + "struct TransposeKernel{\n"; + op += SP + "struct TransposeKernel_" + OpName + " {\n"; op += SP + SP + "template\n"; - op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, T const * input, T const * output, std::size_t * shape, std::size_t * strides) const {\n"; - op += SP + SP + SP + "for (auto i : alpaka::uniformElementsND(acc, shape)) {\n"; - op += SP + SP + SP + SP + "size_t input_idx = 0;\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const& acc, T const* input, T* output,"; + op += "const std::size_t totalElements) const {\n"; + op += SP + SP + SP + SP + "auto const idx = alpaka::getIdx(acc)[0];\n"; + op += SP + SP + SP + SP + "if(idx >= totalElements) return;\n"; + op += SP + SP + SP + SP + "std::size_t input_idx = 0;\n"; + op += SP + SP + SP + SP + "std::size_t remaining = idx;\n"; + op += SP + SP + SP + SP + "std::size_t coord;\n"; + auto inputStrides = UTILITY::ComputeStrideFromShape(fShapeData); + auto outputStrides = UTILITY::ComputeStrideFromShape(fShapeOutput); + for (size_t k = 0; k < fShapeData.size(); k++) { + op += SP + SP + SP + SP + "coord = remaining * " + std::to_string(1/outputStrides[k]) + ";\n"; + op += SP + SP + SP + SP + "remaining = remaining - coord * " + std::to_string(outputStrides[k]) + ";\n"; + op += SP + SP + SP + SP + "input_idx += coord * " + std::to_string(inputStrides[fAttrPerm[k]]) + ";\n"; + } + op += SP + SP + SP + SP + SP + "output[idx] = input[input_idx];\n"; + op += SP + SP + SP + SP + "}\n"; + op += SP + SP + SP + "};\n"; return op; } + std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string OpName) override { + return SP + "TransposeKernel_op_" + OpName + " transposeKernel_" + OpName + ";\n"; + } + + std::string Generate_GPU_ALPAKA(std::string OpName) override { + if (fShapeOutput.empty()) { + throw std::runtime_error("TMVA SOFIE Operator Transpose called to Generate without being initialized first"); + } + std::stringstream out; + auto length = ConvertShapeToLength(fShapeOutput); + + out << "\n//------ TRANSPOSE_GPU_ALPAKA\n"; + out << SP << "auto const elementsPerThread_"<(1));\n"; + out << SP << "auto const elementsPerGrid_"< const kernelCfg_" << fNData << " = {elementsPerGrid_" << fNData << ", elementsPerThread_" << fNData << "};\n"; + out << SP << "auto const workDiv_" << fNData << " = alpaka::getValidWorkDiv(kernelCfg_" << fNData << ", devAcc, transposeKernel_" << OpName << ", alpaka::getPtrNative(deviceBuf_" << fNData + << "), alpaka::getPtrNative(deviceBuf_" << fNOutput << "), static_cast(" << length << "));\n"; + out << SP << "alpaka::exec(queue, workDiv_" << fNData + << ", transposeKernel_" << OpName << ", alpaka::getPtrNative(deviceBuf_" << fNData + << "), alpaka::getPtrNative(deviceBuf_" << fNOutput << "), static_cast(" << length << "));\n"; + + return out.str(); + } }; diff --git a/src/SOFIE_core/src/RModel_ALPAKA.cxx b/src/SOFIE_core/src/RModel_ALPAKA.cxx index 9ff300a..f09b754 100644 --- a/src/SOFIE_core/src/RModel_ALPAKA.cxx +++ b/src/SOFIE_core/src/RModel_ALPAKA.cxx @@ -383,16 +383,15 @@ void RModel::MoveInitializedTensorsToBuffers_ALPAKA(){ auto length = ConvertShapeToLength(i.second.shape()); std::string slength = std::to_string(length); if (i.second.type() == ETensorType::FLOAT) { - fGC += " auto hostBuf_"+i.first+" = alpaka::allocBuf(hostAcc, Ext1D::all(Idx{"+ slength+"}));\n"; - fGC += " std::memcpy(alpaka::getPtrNative(hostBuf_"+i.first+"), tensor_"+i.first+".data(), "+slength+"* sizeof(float));\n"; + fGC += " auto hostBuf_"+i.first+" = alpaka::createView(hostAcc, tensor_"+i.first+");\n"; fGC += " alpaka::memcpy(queue, deviceBuf_"+i.first+", hostBuf_"+i.first+");\n"; } else if (i.second.type() == ETensorType::DOUBLE) { - fGC += " auto hostBuf_"+i.first+" = alpaka::allocBuf(hostAcc, Ext1D::all(Idx{"+ slength+"}));\n"; + fGC += " auto hostBuf_"+i.first+" = alpaka::createView(hostAcc, tensor_"+i.first+");\n"; fGC += " std::memcpy(alpaka::getPtrNative(hostBuf_"+i.first+"), tensor_"+i.first+".data(), "+slength+"* sizeof(double));\n"; fGC += " alpaka::memcpy(queue, deviceBuf_"+i.first+", hostBuf_"+i.first+");\n"; } else if (i.second.type() == ETensorType::INT64) { - fGC += " auto hostBuf_"+i.first+" = alpaka::allocBuf(hostAcc, Ext1D::all(Idx{" + slength + "}));\n"; - fGC += " std::memcpy(alpaka::getPtrNative(hostBuf_"+i.first+"), tensor_"+i.first+".data(), "+slength+"* sizeof(int64_t));"; + fGC += " auto hostBuf_"+i.first+" = alpaka::createView(hostAcc, tensor_"+i.first+");\n"; + fGC += " std::memcpy(alpaka::getPtrNative(hostBuf_"+i.first+"), tensor_"+i.first+".data(), "+slength+"* sizeof(int64_t));\n"; fGC += " alpaka::memcpy(queue, deviceBuf_"+i.first+", hostBuf_"+i.first+");\n"; } else { std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be read from a ROOT file"); diff --git a/src/SOFIE_core/test/CMakeLists.txt b/src/SOFIE_core/test/CMakeLists.txt index 76e5e29..c669aa5 100644 --- a/src/SOFIE_core/test/CMakeLists.txt +++ b/src/SOFIE_core/test/CMakeLists.txt @@ -1,17 +1,9 @@ -# Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. -# All rights reserved. -# -# For the licensing terms see $ROOTSYS/LICENSE. -# For the list of contributors see $ROOTSYS/README/CREDITS. - -############################################################################ -# CMakeLists.txt file for building TMVA SOFIE tests. -# @author Federico Sossai, Sanjiban Sengupta -############################################################################ - cmake_minimum_required(VERSION 3.14) include(FetchContent) +############################################################################ +# Basic setup +############################################################################ include_directories(${CMAKE_SOURCE_DIR}/src/SOFIE_core/inc) include_directories(${CMAKE_SOURCE_DIR}/src/SOFIE_parsers/inc) @@ -23,11 +15,14 @@ if (NOT ONNX_MODELS_DIR) endif() option(ENABLE_ALPAKA_TESTS "Enable Alpaka-based SOFIE tests" OFF) + set(ALPAKA_BACKEND "cuda" CACHE STRING "Alpaka backend to test (cuda, cpu, hip, sycl)") set_property(CACHE ALPAKA_BACKEND PROPERTY STRINGS cuda cpu hip sycl) -# String template used to produce calls to EmitModel(...) per file. +############################################################################ +# Generate emitter sources +############################################################################ set(CAPTURE_STR "try {\n\ EmitModel(\"@1\", \"@2\");\n\ @@ -45,10 +40,9 @@ set(CAPTURE_STR }\n\ ") -# --- Collect ONNX files --- -set(ALL_CAPTURES "") file(GLOB ONNX_FILES "${ONNX_MODELS_DIR}/*.onnx") +set(ALL_CAPTURES "") foreach(onnx_file ${ONNX_FILES}) get_filename_component(fname ${onnx_file} NAME_WE) string(REPLACE "@1" "${onnx_file}" cap "${CAPTURE_STR}") @@ -61,81 +55,20 @@ set(EMIT_CAPTURES "${ALL_CAPTURES}") configure_file(EmitFromONNX.cxx.in EmitFromONNX_all.cxx @ONLY) configure_file(EmitFromONNX_GPU_ALPAKA.cxx.in EmitFromONNX_GPU_ALPAKA_all.cxx @ONLY) -# --- CPU emitter --- -# ROOTTEST_GENERATE_EXECUTABLE(emitFromONNX EmitFromONNX_all.cxx -# LIBRARIES protobuf::libprotobuf SOFIE_core SOFIE_parsers -# FIXTURES_SETUP sofie-compile-models-onnx-build) - -# target_compile_options(emitFromONNX PRIVATE -Wno-unused-parameter -Wno-array-bounds) - -# ROOTTEST_ADD_TEST(SofieCompileModels_ONNX -# COMMAND ${CMAKE_COMMAND} -E env ROOTIGNOREPREFIX=1 ./emitFromONNX -# FIXTURES_REQUIRED sofie-compile-models-onnx-build -# FIXTURES_SETUP sofie-compile-models-onnx -# ) - -# # --- Custom model tests --- -# if (BLAS_FOUND) -# ROOTTEST_GENERATE_EXECUTABLE(TestCustomModelsFromONNX TestCustomModelsFromONNX.cxx -# LIBRARIES -# MathCore -# SOFIE_core -# BLAS::BLAS -# GTest::gtest -# GTest::gtest_main -# FIXTURES_REQUIRED sofie-compile-models-onnx -# FIXTURES_SETUP sofie-test-models-onnx-build -# ) - -# target_include_directories(TestCustomModelsFromONNX PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) - -# ROOTTEST_ADD_TEST(TestCustomModelsFromONNX -# EXEC ./TestCustomModelsFromONNX -# FIXTURES_REQUIRED sofie-test-models-onnx-build -# ) -# endif() - -# # --- Python-based generators --- -# ROOT_FIND_PYTHON_MODULE(torch) -# if (ROOT_TORCH_FOUND) -# configure_file(Conv1dModelGenerator.py Conv1dModelGenerator.py COPYONLY) -# configure_file(Conv2dModelGenerator.py Conv2dModelGenerator.py COPYONLY) -# configure_file(Conv3dModelGenerator.py Conv3dModelGenerator.py COPYONLY) -# configure_file(ConvTrans2dModelGenerator.py ConvTrans2dModelGenerator.py COPYONLY) -# configure_file(LinearModelGenerator.py LinearModelGenerator.py COPYONLY) -# configure_file(RecurrentModelGenerator.py RecurrentModelGenerator.py COPYONLY) - -# if (BLAS_FOUND) -# ROOT_ADD_GTEST(TestSofieModels TestSofieModels.cxx -# LIBRARIES SOFIE_core SOFIE_parsers BLAS::BLAS -# INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR} -# ) -# endif() -# endif() - -# ROOT_EXECUTABLE(emitGNN GNN/EmitGNN.cxx LIBRARIES SOFIE_core) -# ROOT_ADD_TEST(tmva-sofie-EmitGNN COMMAND emitGNN) - -# ROOT_EXECUTABLE(EmitGraphIndependent GNN/EmitGraphIndependent.cxx LIBRARIES SOFIE_core) -# ROOT_ADD_TEST(tmva-sofie-EmitGraphIndependent COMMAND EmitGraphIndependent) - -# ========================= -# ALPAKA TESTS -# ========================= +############################################################################ +# Alpaka tests +############################################################################ if (ENABLE_ALPAKA_TESTS) string(TOLOWER "${ALPAKA_BACKEND}" _alpaka_backend) - if (NOT _alpaka_backend IN_LIST ALPAKA_BACKEND) - message(FATAL_ERROR - "Unsupported ALPAKA_BACKEND='${ALPAKA_BACKEND}'. " - "Valid values: cuda, cpu, hip, sycl") + message(FATAL_ERROR "Unsupported ALPAKA_BACKEND=${ALPAKA_BACKEND}") endif() FetchContent_Declare( sofieBLAS GIT_REPOSITORY https://github.com/ML4EP/sofieBLAS - GIT_TAG edf2259876e9f4fb5a8f72db20b2dfb5dc26b517 + GIT_TAG dev ) FetchContent_MakeAvailable(sofieBLAS) @@ -146,106 +79,111 @@ if (ENABLE_ALPAKA_TESTS) ) FetchContent_MakeAvailable(alpaka) - # --- ALPAKA emitter --- - ROOTTEST_GENERATE_EXECUTABLE(emitFromONNXAlpaka EmitFromONNX_GPU_ALPAKA_all.cxx + ########################################################################## + # Alpaka emitter + ########################################################################## + ROOTTEST_GENERATE_EXECUTABLE( + emitFromONNXAlpaka + EmitFromONNX_GPU_ALPAKA_all.cxx LIBRARIES protobuf::libprotobuf SOFIE_core SOFIE_parsers FIXTURES_SETUP sofie-compile-models-onnx-alpaka-build ) - target_compile_options(emitFromONNXAlpaka PRIVATE -Wno-unused-parameter -Wno-array-bounds) - - if (ONNX_FILES) - foreach(onnx_file ${ONNX_FILES}) - get_filename_component(fname ${onnx_file} NAME_WE) - add_custom_command(TARGET emitFromONNXAlpaka POST_BUILD - COMMAND ${CMAKE_COMMAND} -E env ROOTIGNOREPREFIX=1 - $ - "${onnx_file}" "${CMAKE_CURRENT_BINARY_DIR}/${fname}" - COMMENT "Running ALPAKA emitter on ${onnx_file}") - endforeach() - endif() + target_compile_options(emitFromONNXAlpaka PRIVATE + -Wno-unused-parameter + -Wno-array-bounds + ) - ROOTTEST_ADD_TEST(SofieCompileModels_ONNX_Alpaka + ROOTTEST_ADD_TEST( + SofieCompileModels_ONNX_Alpaka COMMAND ${CMAKE_COMMAND} -E env ROOTIGNOREPREFIX=1 ./emitFromONNXAlpaka FIXTURES_REQUIRED sofie-compile-models-onnx-alpaka-build FIXTURES_SETUP sofie-compile-models-onnx-alpaka ) - set(CXXFLAGS -O2 -g -DALPAKA_HAS_STD_ATOMIC_REF) - set(CXX_HOST_FLAGS -fPIC -pthread) - - # ---- Backend selection ---- + ########################################################################## + # CUDA backend + ########################################################################## if (_alpaka_backend STREQUAL "cuda") + message(STATUS "Enabling Alpaka CUDA tests") + enable_language(CUDA) find_package(CUDAToolkit REQUIRED) - set(CUDA_ARCH "sm_86") - set(CXX_CUDA_FLAGS - -arch=${CUDA_ARCH} - -Wno-deprecated-gpu-targets - --extended-lambda - --expt-relaxed-constexpr) - set_source_files_properties( TestCustomModelsFromONNXForAlpakaCuda.cxx PROPERTIES LANGUAGE CUDA ) - -ROOTTEST_GENERATE_EXECUTABLE( - TestCustomModelsFromONNXForAlpakaCuda - TestCustomModelsFromONNXForAlpakaCuda.cxx - LIBRARIES MathCore SOFIE_core GTest::gtest GTest::gtest_main - FIXTURES_REQUIRED sofie-compile-models-onnx-alpaka - FIXTURES_SETUP sofie-test-models-onnx-alpaka-build -) - + ROOTTEST_GENERATE_EXECUTABLE( + TestCustomModelsFromONNXForAlpakaCuda + TestCustomModelsFromONNXForAlpakaCuda.cxx + LIBRARIES MathCore SOFIE_core GTest::gtest GTest::gtest_main + FIXTURES_REQUIRED sofie-compile-models-onnx-alpaka + FIXTURES_SETUP sofie-test-models-onnx-alpaka-build + ) target_include_directories( TestCustomModelsFromONNXForAlpakaCuda PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${alpaka_SOURCE_DIR}/include - ${SOFIE_INCLUDE} - ${sofieblas_SOURCE_DIR}/include + ${sofieblas_SOURCE_DIR}/include ${ROOT_INCLUDE_DIRS} - ${CUDA_BASE}/include + ${CUDAToolkit_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR} ) set_target_properties( TestCustomModelsFromONNXForAlpakaCuda - PROPERTIES CUDA_SEPARABLE_COMPILATION ON + PROPERTIES + CUDA_SEPARABLE_COMPILATION OFF + CUDA_ARCHITECTURES 70 80 86 ) target_compile_definitions( TestCustomModelsFromONNXForAlpakaCuda PRIVATE ALPAKA_ACC_GPU_CUDA_ENABLED + ALPAKA_HAS_STD_ATOMIC_REF ) - target_link_directories( + target_compile_options( TestCustomModelsFromONNXForAlpakaCuda PRIVATE - ${CUDA_BASE}/lib64 + $<$: + --extended-lambda + --expt-relaxed-constexpr + --generate-line-info + --use_fast_math + -g + -G + # -fsanitize=address + -O1 + -Wno-deprecated-gpu-targets + > + $<$: + -O2 + -g + -G + -fPIC + -pthread + > ) + # set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address") - target_link_libraries(TestCustomModelsFromONNXForAlpakaCuda + # ROOT-compatible: plain signature only + target_link_libraries( + TestCustomModelsFromONNXForAlpakaCuda + CUDA::cudart CUDA::cublas CUDA::cublasLt - CUDA::cudart ${ROOT_LIBRARIES} ) - ROOTTEST_ADD_TEST(TestCustomModelsFromONNXForAlpakaCuda + ROOTTEST_ADD_TEST( + TestCustomModelsFromONNXForAlpakaCuda EXEC ./TestCustomModelsFromONNXForAlpakaCuda FIXTURES_REQUIRED sofie-compile-models-onnx-alpaka ) - elseif (_alpaka_backend STREQUAL "cpu") - message(STATUS "Alpaka CPU backend selected (not yet implemented)") - elseif (_alpaka_backend STREQUAL "hip") - message(STATUS "Alpaka HIP backend selected (not yet implemented)") - elseif (_alpaka_backend STREQUAL "sycl") - message(STATUS "Alpaka SYCL backend selected (not yet implemented)") - endif() # backend - -endif() # ENABLE_ALPAKA_TESTS + endif() # cuda backend +endif() # ENABLE_ALPAKA_TESTS diff --git a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx index 1537ea4..abe6163 100644 --- a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx +++ b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx @@ -1,21 +1,20 @@ #include #include -#include "Linear_16_FromONNX_GPU_ALPAKA.hxx" -#include "input_models/references/Linear_16.ref.hxx" - -#include "Linear_32_FromONNX_GPU_ALPAKA.hxx" -#include "input_models/references/Linear_32.ref.hxx" - #include "Linear_64_FromONNX_GPU_ALPAKA.hxx" #include "input_models/references/Linear_64.ref.hxx" +#include "AddBroadcast1_FromONNX_GPU_ALPAKA.hxx" +#include "input_models/references/AddBroadcast1.ref.hxx" + #include "LinearWithLeakyRelu_FromONNX_GPU_ALPAKA.hxx" #include "input_models/references/LinearWithLeakyRelu.ref.hxx" #include "LinearWithSigmoid_FromONNX_GPU_ALPAKA.hxx" #include "input_models/references/LinearWithSigmoid.ref.hxx" +#include "Transpose_FromONNX_GPU_ALPAKA.hxx" + #include #include #include @@ -59,186 +58,234 @@ class SofieAlpakaTest : public ::testing::Test { } }; -TEST_F(SofieAlpakaTest, Linear16) -{ - constexpr float TOLERANCE = DEFAULT_TOLERANCE; - auto A = alpaka::allocBuf(host, Ext1D::all(Idx{1600})); - float *A_ptr = reinterpret_cast(alpaka::getPtrNative(A)); +// TEST_F(SofieAlpakaTest, Linear64) +// { +// constexpr float TOLERANCE = DEFAULT_TOLERANCE; - for (Idx i = 0; i < 1600; ++i) { - A_ptr[i] = 1.0; - } +// auto A = alpaka::allocBuf(host, Ext1D::all(Idx{1600})); +// float *A_ptr = reinterpret_cast(alpaka::getPtrNative(A)); - auto A_d = alpaka::allocBuf(device, Ext1D::all(Idx{1600})); - alpaka::memcpy(queue, A_d, A); - alpaka::wait(queue); +// for (Idx i = 0; i < 1600; ++i) { +// A_ptr[i] = 1.0; +// } - auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{160})); - - { - SOFIE_Linear_16::Session session("Linear_16_FromONNX_GPU_ALPAKA.dat"); - auto result = session.infer(A_d); +// auto A_d = alpaka::allocBuf(device, Ext1D::all(Idx{1600})); +// alpaka::memcpy(queue, A_d, A); +// alpaka::wait(queue); - alpaka::memcpy(queue, result_h, result); - alpaka::wait(queue); - cudaDeviceSynchronize(); - } +// auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{160})); - float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); - float *correct = Linear_16_ExpectedOutput::all_ones; +// { +// SOFIE_Linear_64::Session session("Linear_64_FromONNX_GPU_ALPAKA.dat"); +// auto result = session.infer(A_d); +// alpaka::wait(queue); +// cudaDeviceSynchronize(); + +// alpaka::memcpy(queue, result_h, result); +// alpaka::wait(queue); +// } + +// float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); +// float *correct = Linear_64_ExpectedOutput::all_ones; - for (size_t i = 0; i < 160; ++i) { - EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE); - } -} +// for (size_t i = 0; i < 160; ++i) { +// EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE); +// } +// } -TEST_F(SofieAlpakaTest, Linear32) +TEST_F(SofieAlpakaTest, LinearWithLeakyRelu) { constexpr float TOLERANCE = DEFAULT_TOLERANCE; - auto A = alpaka::allocBuf(host, Ext1D::all(Idx{1600})); + std::vector input({ + 0.4369, -0.6882, 1.0309, -1.0263, -0.1519, 1.2237, -0.7054, -0.1762, + -0.6811, -2.2597, 1.0388, -0.7993, 0.1468, 1.3257, -0.4714, -0.0958, + 0.7057, -0.3749, -0.3310, 0.0986, -0.1370, 0.0832, -1.6465, -0.2793 + }); + + auto A = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); float *A_ptr = reinterpret_cast(alpaka::getPtrNative(A)); - for (Idx i = 0; i < 1600; ++i) { - A_ptr[i] = 1.0; + for (Idx i = 0; i < input.size(); ++i) { + A_ptr[i] = input[i]; } - auto A_d = alpaka::allocBuf(device, Ext1D::all(Idx{1600})); + auto A_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); alpaka::memcpy(queue, A_d, A); alpaka::wait(queue); - auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{160})); + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{24})); { - SOFIE_Linear_32::Session session("Linear_32_FromONNX_GPU_ALPAKA.dat"); + SOFIE_LinearWithLeakyRelu::Session session; auto result = session.infer(A_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); alpaka::memcpy(queue, result_h, result); alpaka::wait(queue); - cudaDeviceSynchronize(); } float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); - float *correct = Linear_32_ExpectedOutput::all_ones; + float *correct = LinearWithLeakyRelu_ExpectedOutput::outputs; - for (size_t i = 0; i < 160; ++i) { + for (size_t i = 0; i < 24; ++i) { EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE); } } -TEST_F(SofieAlpakaTest, Linear64) +TEST_F(SofieAlpakaTest, LinearWithSigmoid) { + constexpr float TOLERANCE = DEFAULT_TOLERANCE; - auto A = alpaka::allocBuf(host, Ext1D::all(Idx{1600})); + auto A = alpaka::allocBuf(host, Ext1D::all(Idx{48})); float *A_ptr = reinterpret_cast(alpaka::getPtrNative(A)); - for (Idx i = 0; i < 1600; ++i) { + for (Idx i = 0; i < 48; ++i) { A_ptr[i] = 1.0; } - auto A_d = alpaka::allocBuf(device, Ext1D::all(Idx{1600})); + auto A_d = alpaka::allocBuf(device, Ext1D::all(Idx{48})); alpaka::memcpy(queue, A_d, A); alpaka::wait(queue); - auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{160})); + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{24})); { - SOFIE_Linear_64::Session session("Linear_64_FromONNX_GPU_ALPAKA.dat"); + SOFIE_LinearWithSigmoid::Session session("LinearWithSigmoid_FromONNX_GPU_ALPAKA.dat"); auto result = session.infer(A_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); alpaka::memcpy(queue, result_h, result); alpaka::wait(queue); - cudaDeviceSynchronize(); } - - float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); - float *correct = Linear_64_ExpectedOutput::all_ones; - for (size_t i = 0; i < 160; ++i) { + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float *correct = LinearWithSigmoid_ExpectedOutput::all_ones; + for (size_t i = 0; i < 24; ++i) { EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE); } } -TEST_F(SofieAlpakaTest, LinearWithLeakyRelu) +TEST_F(SofieAlpakaTest, AddBroadcast1) { - alpaka::PlatformCpu hostPlatform{}; - auto host = alpaka::getDevByIdx(hostPlatform, 0u); + constexpr float TOLERANCE = DEFAULT_TOLERANCE; - alpaka::PlatformCudaRt platform{}; - alpaka::DevCudaRt device = alpaka::getDevByIdx(platform, 0u); - alpaka::Queue queue{device}; + auto A = alpaka::allocBuf(host, Ext1D::all(Idx{5})); + float *A_ptr = reinterpret_cast(alpaka::getPtrNative(A)); - std::vector input({ - 0.4369, -0.6882, 1.0309, -1.0263, -0.1519, 1.2237, -0.7054, -0.1762, - -0.6811, -2.2597, 1.0388, -0.7993, 0.1468, 1.3257, -0.4714, -0.0958, - 0.7057, -0.3749, -0.3310, 0.0986, -0.1370, 0.0832, -1.6465, -0.2793 - }); + auto B = alpaka::allocBuf(host, Ext1D::all(Idx{20})); + float *B_ptr = reinterpret_cast(alpaka::getPtrNative(B)); - auto A = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); - float *A_ptr = reinterpret_cast(alpaka::getPtrNative(A)); + std::vector A_vec({-0.78023305, -1.34029483, -3.01482951, 0.53641361, + -1.22594789}); + std::vector B_vec({1.0626695, 0.43842875, 1.22476468, 0.79763274, 0.98688211, + 0.25267614, 0.44874883, 0.31516773, -0.78771195, 0.64565664, + 0.50450593, -0.41265227, -0.22474539, -0.22362374, 0.00509674, + 0.16927211, 1.06756969, -0.81634773, 0.88467744, 0.78902059}); - for (Idx i = 0; i < input.size(); ++i) { - A_ptr[i] = input[i]; + for (Idx i = 0; i < A_vec.size(); ++i) { + A_ptr[i] = A_vec[i]; } - auto A_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + for (Idx i = 0; i < B_vec.size(); ++i) { + B_ptr[i] = B_vec[i]; + } + + auto A_d = alpaka::allocBuf(device, Ext1D::all(Idx{5})); alpaka::memcpy(queue, A_d, A); alpaka::wait(queue); - auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{24})); + auto B_d = alpaka::allocBuf(device, Ext1D::all(Idx{20})); + alpaka::memcpy(queue, B_d, B); + alpaka::wait(queue); + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{20})); { - SOFIE_LinearWithLeakyRelu::Session session; - auto result = session.infer(A_d); + SOFIE_AddBroadcast1::Session session; + auto result = session.infer(A_d, B_d); alpaka::wait(queue); cudaDeviceSynchronize(); alpaka::memcpy(queue, result_h, result); alpaka::wait(queue); - } - - float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); - float *correct = LinearWithLeakyRelu_ExpectedOutput::outputs; + } - for (size_t i = 0; i < 24; ++i) { + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float *correct = AddBroadcast1_ExpectedOutput::output; + for (size_t i = 0; i < 20; ++i) { EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE); } } -TEST_F(SofieAlpakaTest, LinearWithSigmoid) +TEST_F(SofieAlpakaTest, Transpose) { + constexpr float TOLERANCE = DEFAULT_TOLERANCE; - constexpr float TOLERANCE = DEFAULT_TOLERANCE; + // Input shape: (2, 1, 3, 4) -> 24 elements + constexpr Idx inputSize = 24; + // Output shape: (2, 3, 4, 1) -> 24 elements + constexpr Idx outputSize = 24; - auto A = alpaka::allocBuf(host, Ext1D::all(Idx{48})); - float *A_ptr = reinterpret_cast(alpaka::getPtrNative(A)); + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{inputSize})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); - for (Idx i = 0; i < 48; ++i) { - A_ptr[i] = 1.0; - } + std::vector input_vec({ + // shape (2, 1, 3, 4) + 0.f, 1.f, 2.f, 3.f, + 4.f, 5.f, 6.f, 7.f, + 8.f, 9.f, 10.f, 11.f, - auto A_d = alpaka::allocBuf(device, Ext1D::all(Idx{48})); - alpaka::memcpy(queue, A_d, A); - alpaka::wait(queue); + 12.f, 13.f, 14.f, 15.f, + 16.f, 17.f, 18.f, 19.f, + 20.f, 21.f, 22.f, 23.f + }); - auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{48})); - - { - SOFIE_LinearWithSigmoid::Session session("LinearWithSigmoid_FromONNX_GPU_ALPAKA.dat"); - auto result = session.infer(A_d); - alpaka::wait(queue); - cudaDeviceSynchronize(); + for (Idx i = 0; i < inputSize; ++i) + input_ptr[i] = input_vec[i]; - alpaka::memcpy(queue, result_h, result); - alpaka::wait(queue); - } + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{inputSize})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); - float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); - float *correct = LinearWithSigmoid_ExpectedOutput::all_ones; - for (size_t i = 0; i < 24; ++i) { - EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE); - } -} + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{outputSize})); + + { + SOFIE_Transpose::Session session; + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + std::vector expected(outputSize); + std::vector inputShape = {2, 1, 3, 4}; + std::vector perm = {0, 2, 3, 1}; + std::vector outputShape = {2, 3, 4, 1}; + + std::vector inputStrides = {12, 12, 4, 1}; + std::vector outputStrides = {12, 4, 1, 1}; + + for (size_t i = 0; i < outputSize; ++i) + { + size_t remaining = i; + size_t inputIdx = 0; + for (size_t d = 0; d < 4; ++d) + { + size_t const coord = remaining / outputStrides[d]; + remaining = remaining - coord * outputStrides[d]; + inputIdx += coord * inputStrides[perm[d]]; + } + expected[i] = input_vec[inputIdx]; + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + for (size_t i = 0; i < outputSize; ++i) + EXPECT_LE(std::abs(res_ptr[i] - expected[i]), TOLERANCE); +} \ No newline at end of file diff --git a/src/SOFIE_core/test/input_models/Transpose.onnx b/src/SOFIE_core/test/input_models/Transpose.onnx new file mode 100644 index 0000000000000000000000000000000000000000..0e08157fb44f2c39f08643eb57f36b8e68631a64 GIT binary patch literal 156 zcmd Date: Thu, 5 Mar 2026 20:23:17 +0100 Subject: [PATCH 24/43] feat: Support for heterogeneous inference on concat operator Co-authored-by: Saransh Chopra Co-authored-by: Francesco Derme Co-authored-by: PietroFumagalli --- src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx | 119 +++++++++++++++++- .../TestCustomModelsFromONNXForAlpakaCuda.cxx | 46 +++++++ 2 files changed, 163 insertions(+), 2 deletions(-) diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx index c828668..a44a807 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx @@ -316,8 +316,123 @@ return out.str(); } - }; + + std::string Generate_GPU_Kernel_ALPAKA(std::string opName) override { + opName = "op_" + opName; + if (fOutputShape.empty()) { + throw std::runtime_error("TMVA SOFIE Operator Concat called to Generate without being initialized first"); + } + + const std::size_t D = fOutputShape.size(); + const std::size_t Nin = fInputs.size(); + + auto outStrides = UTILITY::ComputeStrideFromShape(fOutputShape); + + std::vector prefix(Nin); + prefix[0] = 0; + for (std::size_t k = 1; k < Nin; ++k) + prefix[k] = prefix[k - 1] + std::stoul(fInputShapes[k - 1][fAxis].GetVal()); + + std::vector> inStrides(Nin); + for (std::size_t k = 0; k < Nin; ++k) + inStrides[k] = UTILITY::ComputeStrideFromShape(fInputShapes[k]); + + std::string op; + op = "\n//------ CONCAT_KERNEL_ALPAKA\n"; + op += SP + "struct ConcatKernel_" + opName + " {\n"; + op += SP + SP + "template\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(\n"; + op += SP + SP + SP + "TAcc const& acc,\n"; + op += SP + SP + SP + "std::array inputs,\n"; + op += SP + SP + SP + "T* output,\n"; + op += SP + SP + SP + "std::size_t const totalElements) const {\n\n"; + + op += SP + SP + SP + "auto const global_thread_idx = alpaka::getIdx(acc)[0];\n"; + op += SP + SP + SP + "if (global_thread_idx >= totalElements) return;\n"; + op += SP + SP + SP + "auto const grid_thread_extent = alpaka::getWorkDiv(acc)[0];\n\n"; + + op += SP + SP + SP + "std::size_t remaining;\n"; + op += SP + SP + SP + "for (std::size_t elem_idx = global_thread_idx; elem_idx < totalElements; elem_idx += grid_thread_extent) {\n\n"; + + op += SP + SP + SP + SP + "remaining = elem_idx;\n"; + for (std::size_t d = 0; d < D; ++d) { + std::string stride_val = outStrides[d].GetVal(); + op += SP + SP + SP + SP + "std::size_t const out_" + std::to_string(d) + + " = remaining * " + 1/std::stoul(stride_val) + ";\n"; + op += SP + SP + SP + SP + "remaining -= out_" + std::to_string(d) + + " * " + stride_val + ";\n"; + } + op += "\n"; + + op += SP + SP + SP + SP + "std::size_t chosen = 0;\n"; + for (std::size_t k = 0; k < Nin; ++k) { + std::size_t end_k = prefix[k] + std::stoul(fInputShapes[k][fAxis].GetVal()); + op += SP + SP + SP + SP + "chosen += static_cast(" + + std::to_string(end_k) + " <= out_" + std::to_string(fAxis) + ");\n"; + } + op += "\n"; + + op += SP + SP + SP + SP + "std::size_t const output_idx =\n"; + for (std::size_t d = 0; d < D; ++d) { + op += SP + SP + SP + SP + SP + "out_" + std::to_string(d) + + " * " + outStrides[d].GetVal(); + op += (d + 1 < D) ? " +\n" : ";\n\n"; + } + + op += SP + SP + SP + SP + "std::size_t const input_idx =\n"; + for (std::size_t k = 0; k < Nin; ++k) { + op += SP + SP + SP + SP + SP + "(chosen == " + std::to_string(k) + ") * (\n"; + for (std::size_t d = 0; d < D; ++d) { + std::string coord = (d == fAxis) + ? ("(out_" + std::to_string(d) + " - " + std::to_string(prefix[k]) + ")") + : ("out_" + std::to_string(d)); + op += SP + SP + SP + SP + SP + SP + coord + + " * " + inStrides[k][d].GetVal(); + op += (d + 1 < D) ? " +\n" : "\n"; + } + op += SP + SP + SP + SP + SP + ")"; + op += (k + 1 < Nin) ? " +\n" : ";\n\n"; + } + + op += SP + SP + SP + SP + "output[output_idx] = inputs[chosen][input_idx];\n"; + op += SP + SP + SP + "}\n"; + op += SP + SP + "}\n"; + op += SP + "};\n"; + + return op; + } + + std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string opName) override { + opName = "op_" + opName; + return SP + "ConcatKernel_" + opName + " concatKernel_" + opName + ";\n"; + } + + std::string Generate_GPU_ALPAKA(std::string OpName) override { + OpName = "op_" + OpName; + if (fOutputShape.empty()) { + throw std::runtime_error("TMVA SOFIE Operator Concat called to Generate without being initialized first"); + } + std::stringstream out; + auto length = ConvertDynamicShapeToLength(fOutputShape); + out << "\n//------ CONCAT_GPU_ALPAKA\n"; + out << SP << "std::array input_ptrs_" << OpName << " = {"; + for(size_t i=0; i0) out << ", "; + out << "alpaka::getPtrNative(deviceBuf_" << fInputs[i] << ")"; + } + out << "};\n"; + + out << SP << "auto const elementsPerThread_"<(1));\n"; + out << SP << "auto const elementsPerGrid_"< const kernelCfg_" << OpName << " = {elementsPerGrid_" << OpName << ", elementsPerThread_" << OpName << "};\n"; + out << SP << "auto const workDiv_" << OpName << " = alpaka::getValidWorkDiv(kernelCfg_" << OpName << ", devAcc, concatKernel_" << OpName << ", input_ptrs_" << OpName << ", alpaka::getPtrNative(deviceBuf_" << fOutput << "), static_cast(" << length << "));\n"; + out << SP << "alpaka::exec(queue, workDiv_" << OpName + << ", concatKernel_" << OpName << ", input_ptrs_" << OpName << ", alpaka::getPtrNative(deviceBuf_" << fOutput << "), static_cast(" << length << "));\n"; + return out.str(); + } + + }; }//SOFIE - #endif //SOFIE_ROPERATOR_CONCAT \ No newline at end of file + #endif //SOFIE_ROPERATOR_CONCAT diff --git a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx index abe6163..ddfbe37 100644 --- a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx +++ b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx @@ -15,6 +15,9 @@ #include "Transpose_FromONNX_GPU_ALPAKA.hxx" +#include "Concat_0D_FromONNX_GPU_ALPAKA.hxx" + + #include #include #include @@ -288,4 +291,47 @@ TEST_F(SofieAlpakaTest, Transpose) float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); for (size_t i = 0; i < outputSize; ++i) EXPECT_LE(std::abs(res_ptr[i] - expected[i]), TOLERANCE); +} + +TEST_F(SofieAlpakaTest, Concat0D) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector input({1.40519865e+00, -2.87660856e-01}); + std::vector expected_output({ + 1.40519865e+00, -2.87660856e-01, + 1.40519865e+00, -2.87660856e-01 + }); + + // Host input buffer + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + + for (Idx i = 0; i < input.size(); ++i) + input_ptr[i] = input[i]; + + // Device input buffer + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + // Host output buffer + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{expected_output.size()})); + + { + SOFIE_Concat_0D::Session session("Concat_0D_FromONNX_GPU_ALPAKA.dat"); + + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + + for (size_t i = 0; i < expected_output.size(); ++i) { + EXPECT_LE(std::abs(res_ptr[i] - expected_output[i]), TOLERANCE); + } } \ No newline at end of file From 4b179eacfacfbca3f770e0b826b4ab041b62a46c Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Thu, 5 Mar 2026 21:02:39 +0100 Subject: [PATCH 25/43] feat: Support for heterogeneous inference on scatter elements operator --- .../inc/SOFIE/ROperator_ScatterElements.hxx | 108 ++++++++++++++++++ src/SOFIE_core/src/RModel_ALPAKA.cxx | 22 ++-- src/SOFIE_core/test/CMakeLists.txt | 2 + .../TestCustomModelsFromONNXForAlpakaCuda.cxx | 53 +++++++++ 4 files changed, 176 insertions(+), 9 deletions(-) diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_ScatterElements.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_ScatterElements.hxx index 6951017..e436b74 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_ScatterElements.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_ScatterElements.hxx @@ -168,6 +168,114 @@ public: return out.str(); } + std::string Generate_GPU_Kernel_ALPAKA(std::string opName) override { + opName = "op_" + opName; + if (fShapeY.empty()) { + throw std::runtime_error("TMVA SOFIE ScatterElements Op called to Generate without being initialized first"); + } + + const std::size_t D = fShapeI.size(); + + auto strideY = UTILITY::ComputeStrideFromShape(fShapeY); + auto strideI = UTILITY::ComputeStrideFromShape(fShapeI); + + std::size_t totalElements = 1; + for (std::size_t d = 0; d < D; ++d) + totalElements *= fShapeI[d]; + + std::string op; + op = "\n//------ SCATTERELEMENTS_KERNEL_ALPAKA\n"; + op += SP + "struct ScatterElementsKernel_" + opName + " {\n"; + op += SP + SP + "template\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(\n"; + op += SP + SP + SP + "TAcc const& acc,\n"; + op += SP + SP + SP + "T* Y,\n"; + op += SP + SP + SP + "int64_t const* I,\n"; + op += SP + SP + SP + "T const* U,\n"; + op += SP + SP + SP + "std::size_t const totalElements) const {\n\n"; + + op += SP + SP + SP + "auto const global_thread_idx = alpaka::getIdx(acc)[0];\n"; + op += SP + SP + SP + "if (global_thread_idx >= totalElements) return;\n"; + op += SP + SP + SP + "auto const grid_thread_extent = alpaka::getWorkDiv(acc)[0];\n\n"; + + op += SP + SP + SP + "for (std::size_t elem_idx = global_thread_idx; elem_idx < totalElements; elem_idx += grid_thread_extent) {\n\n"; + + op += SP + SP + SP + SP + "std::size_t remaining = elem_idx;\n"; + for (std::size_t d = 0; d < D; ++d) { + op += SP + SP + SP + SP + "std::size_t const idx_" + std::to_string(d) + + " = remaining / " + strideI[d] + ";\n"; + op += SP + SP + SP + SP + "remaining -= idx_" + std::to_string(d) + + " * " + strideI[d] + ";\n"; + } + op += "\n"; + + op += SP + SP + SP + SP + "int64_t iAxis = I[elem_idx];\n"; + op += SP + SP + SP + SP + "if (iAxis < 0) iAxis += " + std::to_string(fShapeY[fAxis]) + ";\n\n"; + + op += SP + SP + SP + SP + "std::size_t const out_idx =\n"; + for (std::size_t d = 0; d < D; ++d) { + std::string coord = (d == (std::size_t)fAxis) + ? "static_cast(iAxis)" + : "idx_" + std::to_string(d); + op += SP + SP + SP + SP + SP + coord + " * " + std::to_string(strideY[d]); + op += (d + 1 < D) ? " +\n" : ";\n\n"; + } + + if (fReduction.empty() || fReduction == "none") { + op += SP + SP + SP + SP + "Y[out_idx] = U[elem_idx];\n"; + } else if (fReduction == "add") { + op += SP + SP + SP + SP + "alpaka::atomicAdd(acc, &Y[out_idx], U[elem_idx]);\n"; + } else if (fReduction == "mul") { + op += SP + SP + SP + SP + "alpaka::atomicMul(acc, &Y[out_idx], U[elem_idx]);\n"; + } else if (fReduction == "max") { + op += SP + SP + SP + SP + "alpaka::atomicMax(acc, &Y[out_idx], U[elem_idx]);\n"; + } else if (fReduction == "min") { + op += SP + SP + SP + SP + "alpaka::atomicMin(acc, &Y[out_idx], U[elem_idx]);\n"; + } + + op += SP + SP + SP + "}\n"; + op += SP + SP + "}\n"; + op += SP + "};\n"; + + return op; + } + +std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string opName) override { + opName = "op_" + opName; + return SP + "ScatterElementsKernel_" + opName + " scatterElementsKernel_" + opName + ";\n"; +} + +std::string Generate_GPU_ALPAKA(std::string opName) override { + opName = "op_" + opName; + if (fShapeY.empty()) { + throw std::runtime_error("TMVA SOFIE ScatterElements Op called to Generate without being initialized first"); + } + + std::size_t totalElements = ConvertShapeToLength(fShapeI); + + std::stringstream out; + out << "\n//------ SCATTERELEMENTS_GPU_ALPAKA\n"; + + out << SP << "alpaka::memcpy(queue, deviceBuf_" << fNY << ", deviceBuf_" << fNX << ");\n"; + out << SP << "alpaka::wait(queue);\n\n"; + + out << SP << "auto const elementsPerThread_" << opName << " = Vec::all(static_cast(1));\n"; + out << SP << "auto const elementsPerGrid_" << opName << " = Vec::all(Idx{" << totalElements << "});\n"; + out << SP << "alpaka::KernelCfg const kernelCfg_" << opName << " = {elementsPerGrid_" << opName << ", elementsPerThread_" << opName << "};\n"; + out << SP << "auto const workDiv_" << opName << " = alpaka::getValidWorkDiv(kernelCfg_" << opName << ", devAcc, scatterElementsKernel_" << opName + << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNI << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNU << ")" + << ", static_cast(" << totalElements << "));\n"; + out << SP << "alpaka::exec(queue, workDiv_" << opName + << ", scatterElementsKernel_" << opName + << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNI << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNU << ")" + << ", static_cast(" << totalElements << "));\n"; + + return out.str(); +} }; }//SOFIE diff --git a/src/SOFIE_core/src/RModel_ALPAKA.cxx b/src/SOFIE_core/src/RModel_ALPAKA.cxx index f09b754..b7d9bae 100644 --- a/src/SOFIE_core/src/RModel_ALPAKA.cxx +++ b/src/SOFIE_core/src/RModel_ALPAKA.cxx @@ -124,10 +124,19 @@ void RModel::GenerateDynamicTensorInfo_GPU_ALPAKA() { fGC += out.str(); } -// only supports BufF1D buffer data types for now std::string RModel::GenerateInferSignature_GPU_ALPAKA(bool isdecl) { // generate the infer signature given the inputs: eg. "BufF1D const deviceBuf_A, BufF1D const deviceBuf_B" - // if (decl = false) generate only calling signature (deviceBuf_A, deviceBuf_B, ....) + // if (isdecl = false) generate only calling signature (deviceBuf_A, deviceBuf_B, ....) + + auto GetBufType = [this](const std::string& name) -> std::string { + ETensorType type = GetTensorType(name); + if (type == ETensorType::FLOAT) return "BufF1D"; + if (type == ETensorType::DOUBLE) return "BufD1D"; + if (type == ETensorType::INT64) return "BufI641D"; + throw std::runtime_error("TMVA-SOFIE: input tensor " + name + + " is of a data type which is not yet supported."); + }; + std::string rGC; std::unordered_map inputParams; int i_input = 0; @@ -137,7 +146,6 @@ std::string RModel::GenerateInferSignature_GPU_ALPAKA(bool isdecl) { auto shape = GetDynamicTensorShape(name); for (auto &d : shape) { std::string pName = d.param; - // need to check if the input parameters is already existing in another input tensor if (d.isParam && inputParams.count(pName) == 0) { if (isdecl) rGC += "size_t "; rGC += d.param + ","; @@ -146,17 +154,13 @@ std::string RModel::GenerateInferSignature_GPU_ALPAKA(bool isdecl) { } } if (isdecl) { - std::string type = "BufF1D"; - if (type == "other") - throw std::runtime_error("TMVA-SOFIE: input tensor " + name + - " is of a data type which is not yet supported."); - rGC += type + " const "; + rGC += GetBufType(name) + " const "; } rGC += "deviceBuf_" + name + ","; i_input++; } - if (fInputTensorNames.size() > 0) rGC.pop_back();// remove last "," + if (fInputTensorNames.size() > 0) rGC.pop_back(); // remove last "," return rGC; } diff --git a/src/SOFIE_core/test/CMakeLists.txt b/src/SOFIE_core/test/CMakeLists.txt index c669aa5..fd848df 100644 --- a/src/SOFIE_core/test/CMakeLists.txt +++ b/src/SOFIE_core/test/CMakeLists.txt @@ -30,6 +30,8 @@ set(CAPTURE_STR std::string msg = e.what();\n\ if (msg.find(\"multiple output tensors are not supported\") != std::string::npos) {\n\ std::cerr << \"[SKIP] Multiple outputs are not supported for @1\" << std::endl;\n\ + } else if (msg.find(\"is of a data type which is not yet supported\") != std::string::npos) {\n\ + std::cerr << \"[SKIP] Operatorr with nsupported data type in @1: \" << msg << std::endl;\n\ } else {\n\ std::cerr << \"[ERROR] Failed processing @1: \" << msg << std::endl;\n\ failures++;\n\ diff --git a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx index ddfbe37..d82005b 100644 --- a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx +++ b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx @@ -16,6 +16,7 @@ #include "Transpose_FromONNX_GPU_ALPAKA.hxx" #include "Concat_0D_FromONNX_GPU_ALPAKA.hxx" +#include "ScatterElements_FromONNX_GPU_ALPAKA.hxx" #include @@ -334,4 +335,56 @@ TEST_F(SofieAlpakaTest, Concat0D) for (size_t i = 0; i < expected_output.size(); ++i) { EXPECT_LE(std::abs(res_ptr[i] - expected_output[i]), TOLERANCE); } +} + +TEST_F(SofieAlpakaTest, ScatterElements) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector input (9, 0.f); + std::vector indices = { 1, 0, 2, 0, 2, 1 }; + std::vector updates = { 1.f, 1.1f, 1.2f, 2.f, 2.1f, 2.2f }; + std::vector correct = { 2.f, 1.1f, 0.f, 1.f, 0.f, 2.2f, 0.f, 2.1f, 1.2f }; + + // Allocate and fill host buffers + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + auto indices_h = alpaka::allocBuf(host, Ext1D::all(Idx{indices.size()})); + auto updates_h = alpaka::allocBuf(host, Ext1D::all(Idx{updates.size()})); + + float* input_ptr = reinterpret_cast (alpaka::getPtrNative(input_h)); + int64_t* indices_ptr = reinterpret_cast(alpaka::getPtrNative(indices_h)); + float* updates_ptr = reinterpret_cast (alpaka::getPtrNative(updates_h)); + + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + for (Idx i = 0; i < indices.size(); ++i) indices_ptr[i] = indices[i]; + for (Idx i = 0; i < updates.size(); ++i) updates_ptr[i] = updates[i]; + + // Allocate device buffers and copy + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + auto indices_d = alpaka::allocBuf(device, Ext1D::all(Idx{indices.size()})); + auto updates_d = alpaka::allocBuf(device, Ext1D::all(Idx{updates.size()})); + + alpaka::memcpy(queue, input_d, input_h); + alpaka::memcpy(queue, indices_d, indices_h); + alpaka::memcpy(queue, updates_d, updates_h); + alpaka::wait(queue); + + // Host result buffer + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{correct.size()})); + + { + SOFIE_ScatterElements::Session session; + auto result = session.infer(input_d, indices_d, updates_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + EXPECT_EQ(correct.size(), 9u); + for (size_t i = 0; i < correct.size(); ++i){ + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE); + } } \ No newline at end of file From 50d478e1e59ade30a127eaec3f17bcac3d8e0de7 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Mon, 9 Mar 2026 16:40:08 +0100 Subject: [PATCH 26/43] fix: split operator implementation and multiple buffer return --- src/SOFIE_core/inc/SOFIE/ROperator.hxx | 10 +- src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx | 1 + .../inc/SOFIE/ROperator_Comparision.hxx | 1 + src/SOFIE_core/inc/SOFIE/ROperator_Einsum.hxx | 1 + src/SOFIE_core/inc/SOFIE/ROperator_Elu.hxx | 1 + .../inc/SOFIE/ROperator_Sigmoid.hxx | 1 + src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx | 168 +++++++++++------- src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx | 7 + src/SOFIE_core/src/RModel.cxx | 21 +-- src/SOFIE_core/src/RModel_ALPAKA.cxx | 72 ++++++-- .../TestCustomModelsFromONNXForAlpakaCuda.cxx | 121 +++++++++++++ 11 files changed, 309 insertions(+), 95 deletions(-) diff --git a/src/SOFIE_core/inc/SOFIE/ROperator.hxx b/src/SOFIE_core/inc/SOFIE/ROperator.hxx index 17b62f6..6c9a812 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator.hxx @@ -20,7 +20,15 @@ enum class OperatorKind { CONSTANTOFSHAPE = 4, UNDEFINED = 5, CONV=6, - BATCHNORM=7 + BATCHNORM=7, + CAST=8, + COMPARISON=9, + EINSUM=10, + ELU=11, + SIGMOID=12, + TANH=13, + SOFTMAX=14, + LEAKYRELU=15, }; inline const char* toString(OperatorKind kind) { diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx index c813f7c..8c04302 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx @@ -26,6 +26,7 @@ public: ROperator_Cast(std::string attr_type,std::string nameX, std::string nameY): fNX(UTILITY::Clean_name(nameX)), fNY(UTILITY::Clean_name(nameY)), fAttrType(attr_type) { + fKind = OperatorKind::CAST; fInputTensorNames = { fNX }; fOutputTensorNames = { fNY }; } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Comparision.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Comparision.hxx index 7648a9a..a00ed28 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Comparision.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Comparision.hxx @@ -73,6 +73,7 @@ public: ROperator_Comparision(){} ROperator_Comparision(const std::string & nameX1, const std::string & nameX2, const std::string & nameY): fNX1(UTILITY::Clean_name(nameX1)), fNX2(UTILITY::Clean_name(nameX2)), fNY(UTILITY::Clean_name(nameY)){ + fKind = OperatorKind::COMPARISON; fInputTensorNames = { fNX1, fNX2 }; // output will be a boolean vector so should not be considered for memory optimized pool diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Einsum.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Einsum.hxx index e9b555b..901bff8 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Einsum.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Einsum.hxx @@ -41,6 +41,7 @@ public: ROperator_Einsum(const std::string & equation, const std::vector & namesX, const std::string & nameY): fNInputs(namesX.size()), fNY(UTILITY::Clean_name(nameY)) { + fKind = OperatorKind::EINSUM; for (size_t i = 0; i < namesX.size(); i++) fNInputs[i] = UTILITY::Clean_name(namesX[i]); diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Elu.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Elu.hxx index 34e18a6..dcbfd68 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Elu.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Elu.hxx @@ -27,6 +27,7 @@ public: ROperator_Elu(float alpha,std::string nameX, std::string nameY): falpha(alpha),fNX(UTILITY::Clean_name(nameX)), fNY(UTILITY::Clean_name(nameY)) { + fKind = OperatorKind::ELU; fInputTensorNames = { fNX }; fOutputTensorNames = { fNY }; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx index 5edbcf9..73b32a3 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx @@ -23,6 +23,7 @@ public: ROperator_Sigmoid(){} ROperator_Sigmoid(std::string nameX, std::string nameY): fNX(UTILITY::Clean_name(nameX)), fNY(UTILITY::Clean_name(nameY)){ + fKind = OperatorKind::SIGMOID; fInputTensorNames = { fNX }; fOutputTensorNames = { fNY }; } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx index 6335db3..0c5762b 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx @@ -153,71 +153,109 @@ public: return out.str(); } - std::string Generate_GPU_Kernel_ALPAKA(std::string /*opName*/) { - std::string op; - op = "\n//------ SPLIT_KERNEL_ALPAKA\n"; - op += SP + "struct SplitKernel {\n"; - op += SP + SP + "template\n"; - op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, T const * input, T * output,"; - op += "std::size_t const * input_strides, std::size_t const * output_strides, std::size_t const split_axis, "; - op += "std::size_t const axis_offset, std::size_t const ndim) const {\n"; - op += SP + SP + SP + SP + "auto elements = alpaka::uniformElementsND(acc, alpaka::Vec(output_shape));\n"; - op += SP + SP + SP + SP + "for (auto const& elem : elements) {\n"; - op += SP + SP + SP + SP + SP + "size_t input_idx = 0;\n"; - op += SP + SP + SP + SP + SP + "size_t output_idx = 0;\n"; - op += SP + SP + SP + SP + SP + "for (int i = 0; i < ndim; ++i) {\n"; - op += SP + SP + SP + SP + SP + SP + "size_t output_coord = elem[i];\n"; - op += SP + SP + SP + SP + SP + SP + "size_t input_coord = (i == split_axis) ? (output_coord + axis_offset) : output_coord;\n"; - op += SP + SP + SP + SP + SP + SP + "input_idx += input_coord * input_strides[i];\n"; - op += SP + SP + SP + SP + SP + SP + "output_idx += output_coord * output_strides[i];\n}\n"; - op += SP + SP + SP + SP + SP + "output[output_idx] = input[input_idx];\n"; - op += SP + SP + SP + SP + "}\n"; - op += SP + SP + "}\n"; - op += SP + "};\n"; - - return op; - } - - std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string /*opName*/) override { - return SP + "SplitKernel splitKernel;\n"; - } - - std::string Generate_GPU_ALPAKA(std::string OpName) override { - OpName = "op_" + OpName; - if (fOutputShapes.empty()){ - throw std::runtime_error("TMVA SOFIE Operator Split called to Generate without being initialized first"); - } - - std::stringstream out; - out << "\n//------ SPLIT_GPU_ALPAKA\n"; - - bool axis_is_innermost = (fAxis == static_cast(fInputShape.size()) - 1) - && (UTILITY::ComputeStrideFromShape(fInputShape)[fInputShape.size()-1] == 1); - out << SP <<"size_t "<(" << length << ") * sizeof(float);\n"; - out << SP << SP << SP << "alpaka::memcpy(queue, "< workDiv_" << fNYs[i] - << "(alpaka::Vec::all((" << length << " + 256 - 1) / 256), " - << "alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; - - out << SP << "alpaka::exec(queue, workDiv_" << fNYs[i] - << ", splitKernel, alpaka::getPtrNative(deviceBuf_" << fNX - << "), alpaka::getPtrNative(deviceBuf_" << fNYs[i] - << "), "<< ConvertShapeToString(UTILITY::ComputeStrideFromShape(fInputShape)) <<", "<\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(\n"; + op += SP + SP + SP + "TAcc const& acc,\n"; + op += SP + SP + SP + "T const* input,\n"; + op += SP + SP + SP + "T* output,\n"; + op += SP + SP + SP + "std::size_t const totalElements) const {\n\n"; + + op += SP + SP + SP + "auto const global_thread_idx = alpaka::getIdx(acc)[0];\n"; + op += SP + SP + SP + "if (global_thread_idx >= totalElements) return;\n"; + op += SP + SP + SP + "auto const grid_thread_extent = alpaka::getWorkDiv(acc)[0];\n\n"; + + op += SP + SP + SP + "for (std::size_t elem_idx = global_thread_idx; elem_idx < totalElements; elem_idx += grid_thread_extent) {\n\n"; + + for (std::size_t d = 0; d < D; ++d) { + op += SP + SP + SP + SP + "std::size_t const out_" + std::to_string(d) + + " = (elem_idx / " + std::to_string(outputStrides[d]) + "u) % " + + std::to_string(fOutputShapes[i][d]) + "u;\n"; + } + op += "\n"; + + op += SP + SP + SP + SP + "std::size_t const input_idx =\n"; + for (std::size_t d = 0; d < D; ++d) { + std::string coord = (d == static_cast(fAxis)) + ? ("(out_" + std::to_string(d) + " + " + std::to_string(axis_offset) + "u)") + : ("out_" + std::to_string(d)); + op += SP + SP + SP + SP + SP + coord + " * " + std::to_string(inputStrides[d]) + "u"; + op += (d + 1 < D) ? " +\n" : ";\n\n"; + } + + op += SP + SP + SP + SP + "output[elem_idx] = input[input_idx];\n"; + op += SP + SP + SP + "}\n"; + op += SP + SP + "}\n"; + op += SP + "};\n\n"; + } + std::cout<<"Finished generating GPU kernel for Split operator "<(1));\n"; + out << SP << SP << "auto const elementsPerGrid_" << i << " = Vec::all(Idx{" << length << "});\n"; + out << SP << SP << "alpaka::KernelCfg const kernelCfg_" << i + << " = {elementsPerGrid_" << i << ", elementsPerThread_" << i << "};\n"; + out << SP << SP << "auto const workDiv_" << i << " = alpaka::getValidWorkDiv(kernelCfg_" << i + << ", devAcc, " << kname + << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNYs[i] << ")" + << ", static_cast(" << length << "));\n"; + out << SP << SP << "alpaka::exec(queue, workDiv_" << i + << ", " << kname + << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNYs[i] << ")" + << ", static_cast(" << length << "));\n"; + out << SP << "}\n"; + } + return out.str(); +} }; diff --git a/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx b/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx index 8b9727b..e7e5ef2 100644 --- a/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx +++ b/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx @@ -809,6 +809,13 @@ void ReadTensorFromStream(std::istream &is, T &target, std::string const &expect } } +inline std::string ConvertOutputTypeToString(ETensorType t) { + // The std::vector is a special type that is not wrapping continuous memory. + // We don't want to use it as a return type. + if (t == ETensorType::BOOL) t = ETensorType::UINT8; + return ConvertTypeToString(t); +} + } // namespace SOFIE #endif //SOFIE_COMMON diff --git a/src/SOFIE_core/src/RModel.cxx b/src/SOFIE_core/src/RModel.cxx index 0eab8d1..456cf23 100644 --- a/src/SOFIE_core/src/RModel.cxx +++ b/src/SOFIE_core/src/RModel.cxx @@ -793,17 +793,6 @@ std::string RModel::GenerateInferSignature(bool isdecl) { return rGC; } -namespace { - -std::string typeForOutput(ETensorType t) { - // The std::vector is a special type that is not wrapping continuous memory. - // We don't want to use it as a return type. - if (t == ETensorType::BOOL) t = ETensorType::UINT8; - return ConvertTypeToString(t); -} - -} - void RModel::GenerateOutput() { size_t outputSize = fOutputTensorNames.size(); @@ -814,7 +803,7 @@ void RModel::GenerateOutput() ETensorType eFirstOutputType = GetTensorType(*fOutputTensorNames.begin()); fGC += "\n\n"; if (outputSize == 1) { - fGC += "std::vector<" + typeForOutput(eFirstOutputType) + ">"; + fGC += "std::vector<" + ConvertOutputTypeToString(eFirstOutputType) + ">"; } else { // if all output types are the same we return an std::vector - otherwise a tuple for (std::string const &name : fOutputTensorNames) { @@ -822,11 +811,11 @@ void RModel::GenerateOutput() sameOutputTypes = false; } if (sameOutputTypes) - fGC += "std::vector>"; + fGC += "std::vector>"; else { inferReturnType = "std::tuple<"; for (size_t i = 0; i < outputSize; i++) { - inferReturnType += "std::vector<" + typeForOutput(GetTensorType(fOutputTensorNames[i])) + ">"; + inferReturnType += "std::vector<" + ConvertOutputTypeToString(GetTensorType(fOutputTensorNames[i])) + ">"; if (i < outputSize - 1) inferReturnType += ","; } @@ -841,7 +830,7 @@ void RModel::GenerateOutput() if (!doInferArgs.empty()) doInferArgs += ","; for (std::string const &name : fOutputTensorNames) { - fGC += SP + "std::vector<" + typeForOutput(GetTensorType(name)) + " > output_tensor_" + name + ";\n"; + fGC += SP + "std::vector<" + ConvertOutputTypeToString(GetTensorType(name)) + " > output_tensor_" + name + ";\n"; doInferArgs += " output_tensor_" + name + ","; } if (!doInferArgs.empty()) @@ -866,7 +855,7 @@ void RModel::GenerateSessionCode() if (!doInferSignature.empty()) doInferSignature += ", "; for (auto const &name : fOutputTensorNames) { - doInferSignature += " std::vector<" + typeForOutput(GetTensorType(name)) + "> &output_tensor_" + name + ","; + doInferSignature += " std::vector<" + ConvertOutputTypeToString(GetTensorType(name)) + "> &output_tensor_" + name + ","; } doInferSignature.back() = ' '; diff --git a/src/SOFIE_core/src/RModel_ALPAKA.cxx b/src/SOFIE_core/src/RModel_ALPAKA.cxx index b7d9bae..ab64a2e 100644 --- a/src/SOFIE_core/src/RModel_ALPAKA.cxx +++ b/src/SOFIE_core/src/RModel_ALPAKA.cxx @@ -174,14 +174,29 @@ void RModel::GenerateOutput_GPU_ALPAKA() { bool sameOutputTypes = true; std::string inferReturnType; - ETensorType eOutputType = GetTensorType(*fOutputTensorNames.begin()); - std::string outputType = ConvertTypeToString(eOutputType); + ETensorType eFirstOutputType = GetTensorType(*fOutputTensorNames.begin()); fGC += "\n\n"; if (outputSize == 1) { - fGC += "alpaka::Buf"; + fGC += "alpaka::Buf"; } else { - throw std::runtime_error("TMVA-SOFIE: multiple output tensors are not supported in ALPAKA code generation"); + // if all output types are the same we return an std::vector - otherwise a tuple + for (std::string const &name : fOutputTensorNames) { + if (GetTensorType(name) != eFirstOutputType) + sameOutputTypes = false; + } + if (sameOutputTypes) + fGC += "std::array, " + std::to_string(outputSize) + ">"; + else { + inferReturnType = "std::tuple<"; + for (size_t i = 0; i < outputSize; i++) { + inferReturnType += "alpaka::Buf"; + if (i < outputSize - 1) + inferReturnType += ","; + } + inferReturnType += ">"; + fGC += inferReturnType; + } } fGC += " infer("; @@ -212,16 +227,38 @@ void RModel::GenerateOutput_GPU_ALPAKA() { void RModel::GenerateSessionCode_GPU_ALPAKA() { std::set registered_operators; + std::set single_initialized_operators = { + SOFIE::OperatorKind::RELU, + SOFIE::OperatorKind::SIGMOID, + SOFIE::OperatorKind::TANH, + SOFIE::OperatorKind::SOFTMAX, + SOFIE::OperatorKind::LEAKYRELU, + SOFIE::OperatorKind::EINSUM, + SOFIE::OperatorKind::COMPARISON, + SOFIE::OperatorKind::ELU, + }; + bool OpNeedsBlas = false; + // single initiation operators must only be initialized only once and their count should be stored in the registered_operators set to avoid generating multiple kernels for the same operator kind fGC += "\n//--- ALPAKA Kernels\n"; for (size_t id = 0; id < fOperators.size(); id++) { - if(registered_operators.find(fOperators[id]->GetKind()) == registered_operators.end()) { + if(fOperators[id]->GetKind() == OperatorKind::GEMM){ + OpNeedsBlas = true; + } + if(single_initialized_operators.find(fOperators[id]->GetKind()) != single_initialized_operators.end()) { - if (fVerbose) + if(registered_operators.find(fOperators[id]->GetKind()) == registered_operators.end()) { + + if (fVerbose) std::cout<<"Generating ALPAKA kernel for operator"<< toString(fOperators[id]->GetKind()) << std::endl; + fGC += fOperators[id]->Generate_GPU_Kernel_ALPAKA(std::to_string(id)); + registered_operators.insert(fOperators[id]->GetKind()); + } + } else { + if (fVerbose) + std::cout<<"Generating ALPAKA kernel for operator"<< toString(fOperators[id]->GetKind()) << std::endl; fGC += fOperators[id]->Generate_GPU_Kernel_ALPAKA(std::to_string(id)); - registered_operators.insert(fOperators[id]->GetKind()); } } @@ -253,7 +290,7 @@ void RModel::GenerateSessionCode_GPU_ALPAKA() { fGC += "Idx threadsPerBlock = 256;\n"; fGC += "\nusing Ext1D = alpaka::Vec;\n"; fGC += "using Vec = alpaka::Vec;\n"; - if (registered_operators.find(SOFIE::OperatorKind::GEMM) != registered_operators.end()) { + if (OpNeedsBlas) { fGC += "\n\n// BLAS declarations\n"; fGC += "sofieBLAS blas{queue};\n"; } @@ -317,14 +354,23 @@ void RModel::GenerateSessionCode_GPU_ALPAKA() { } registered_operators.clear(); - for (size_t id = 0; id < fOperators.size(); id++) { - if(registered_operators.find(fOperators[id]->GetKind()) == registered_operators.end()) { + + for (size_t id = 0; id < fOperators.size(); id++) { + + if(single_initialized_operators.find(fOperators[id]->GetKind()) != single_initialized_operators.end()) { - if (fVerbose) - std::cout<<"Declaring ALPAKA kernel for operator"<< toString(fOperators[id]->GetKind())<GetKind()) == registered_operators.end()) { + + if (fVerbose) + std::cout<<"Declaring ALPAKA kernel for operator"<< toString(fOperators[id]->GetKind()) << std::endl; + fGC += fOperators[id]->Generate_GPU_Kernel_Definitions_ALPAKA(std::to_string(id)); + registered_operators.insert(fOperators[id]->GetKind()); + } + } else { + if (fVerbose) + std::cout<<"Declaring ALPAKA kernel for operator"<< toString(fOperators[id]->GetKind()) << std::endl; fGC += fOperators[id]->Generate_GPU_Kernel_Definitions_ALPAKA(std::to_string(id)); - registered_operators.insert(fOperators[id]->GetKind()); } } diff --git a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx index d82005b..724640e 100644 --- a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx +++ b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx @@ -18,6 +18,9 @@ #include "Concat_0D_FromONNX_GPU_ALPAKA.hxx" #include "ScatterElements_FromONNX_GPU_ALPAKA.hxx" +#include "Split_0_FromONNX_GPU_ALPAKA.hxx" +#include "Split_1_FromONNX_GPU_ALPAKA.hxx" +#include "Split_2_FromONNX_GPU_ALPAKA.hxx" #include #include @@ -387,4 +390,122 @@ TEST_F(SofieAlpakaTest, ScatterElements) for (size_t i = 0; i < correct.size(); ++i){ EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE); } +} + +TEST_F(SofieAlpakaTest, Split_0) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + // split in axis 0 in 2 tensors {2,2,3} -> {1,2,3} each + std::vector input {1.,2.,3.,4.,5.,6.,7.,8.,9.,10.,11.,12.}; + std::vector> correct_output = { {1.,2.,3.,4.,5.,6.}, {7.,8.,9.,10.,11.,12.} }; + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result0_h = alpaka::allocBuf(host, Ext1D::all(Idx{correct_output[0].size()})); + auto result1_h = alpaka::allocBuf(host, Ext1D::all(Idx{correct_output[1].size()})); + + { + SOFIE_Split_0::Session session; + auto [result0, result1] = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + + alpaka::memcpy(queue, result0_h, result0); + alpaka::memcpy(queue, result1_h, result1); + alpaka::wait(queue); + } + + float* res0_ptr = reinterpret_cast(alpaka::getPtrNative(result0_h)); + float* res1_ptr = reinterpret_cast(alpaka::getPtrNative(result1_h)); + + for (size_t j = 0; j < correct_output[0].size(); ++j) + EXPECT_LE(std::abs(res0_ptr[j] - correct_output[0][j]), TOLERANCE); + for (size_t j = 0; j < correct_output[1].size(); ++j) + EXPECT_LE(std::abs(res1_ptr[j] - correct_output[1][j]), TOLERANCE); +} + +TEST_F(SofieAlpakaTest, Split_1) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + // split in axis 1 in 2 tensors {2,2,3} -> {2,1,3} each + std::vector input {1.,2.,3.,4.,5.,6.,7.,8.,9.,10.,11.,12.}; + std::vector> correct_output = { {1.,2.,3.,7.,8.,9.}, {4.,5.,6.,10.,11.,12.} }; + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result0_h = alpaka::allocBuf(host, Ext1D::all(Idx{correct_output[0].size()})); + auto result1_h = alpaka::allocBuf(host, Ext1D::all(Idx{correct_output[1].size()})); + + { + SOFIE_Split_1::Session session; + auto [result0, result1] = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + + alpaka::memcpy(queue, result0_h, result0); + alpaka::memcpy(queue, result1_h, result1); + alpaka::wait(queue); + } + + float* res0_ptr = reinterpret_cast(alpaka::getPtrNative(result0_h)); + float* res1_ptr = reinterpret_cast(alpaka::getPtrNative(result1_h)); + + for (size_t j = 0; j < correct_output[0].size(); ++j) + EXPECT_LE(std::abs(res0_ptr[j] - correct_output[0][j]), TOLERANCE); + for (size_t j = 0; j < correct_output[1].size(); ++j) + EXPECT_LE(std::abs(res1_ptr[j] - correct_output[1][j]), TOLERANCE); +} + +TEST_F(SofieAlpakaTest, Split_2) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + // split in axis 2 in 2 tensors {2,2,3} -> {2,2,2} and {2,2,1} + std::vector input {1.,2.,3.,4.,5.,6.,7.,8.,9.,10.,11.,12.}; + std::vector> correct_output = { {1.,2.,4.,5.,7.,8.,10.,11.}, {3.,6.,9.,12.} }; + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + // outputs have different sizes: {2,2,2}=8 and {2,2,1}=4 + auto result0_h = alpaka::allocBuf(host, Ext1D::all(Idx{correct_output[0].size()})); + auto result1_h = alpaka::allocBuf(host, Ext1D::all(Idx{correct_output[1].size()})); + + { + SOFIE_Split_2::Session session; + auto [result0, result1] = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + + alpaka::memcpy(queue, result0_h, result0); + alpaka::memcpy(queue, result1_h, result1); + alpaka::wait(queue); + } + + float* res0_ptr = reinterpret_cast(alpaka::getPtrNative(result0_h)); + float* res1_ptr = reinterpret_cast(alpaka::getPtrNative(result1_h)); + + for (size_t j = 0; j < correct_output[0].size(); ++j) + EXPECT_LE(std::abs(res0_ptr[j] - correct_output[0][j]), TOLERANCE); + for (size_t j = 0; j < correct_output[1].size(); ++j) + EXPECT_LE(std::abs(res1_ptr[j] - correct_output[1][j]), TOLERANCE); } \ No newline at end of file From 5af24cf3ad9f9ee27a3f7e8da42b151bb200c30f Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Thu, 12 Mar 2026 15:21:04 +0100 Subject: [PATCH 27/43] feat: Support for heterogeneous inference on tile operator --- src/SOFIE_core/inc/SOFIE/ROperator_Tile.hxx | 287 +++++++++++------- src/SOFIE_core/src/RModel_ALPAKA.cxx | 14 +- .../TestCustomModelsFromONNXForAlpakaCuda.cxx | 60 ++++ 3 files changed, 232 insertions(+), 129 deletions(-) diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Tile.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Tile.hxx index 1b31faa..36a93c5 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Tile.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Tile.hxx @@ -19,14 +19,17 @@ private: std::string fNRepeats; std::string fNInput; std::string fNY; - std::vectorfShapeInput; + std::vector fShapeInput; std::vector fShapeY; + std::vector fRepeats; // populated in Initialize() if repeats are known at generation time public: ROperator_Tile(){} ROperator_Tile(std::string nameRepeat, std::string nameInput, std::string nameY): - fNRepeats(UTILITY::Clean_name(nameRepeat)),fNInput(UTILITY::Clean_name(nameInput)), fNY(UTILITY::Clean_name(nameY)){ - fInputTensorNames = { fNRepeats, fNInput }; + fNRepeats(UTILITY::Clean_name(nameRepeat)), + fNInput(UTILITY::Clean_name(nameInput)), + fNY(UTILITY::Clean_name(nameY)) { + fInputTensorNames = { fNRepeats, fNInput }; fOutputTensorNames = { fNY }; } @@ -36,157 +39,206 @@ public: std::vector> ShapeInference(std::vector> input) override { std::vector ret = input[0]; - - for(size_t i=0; i < input[1].size(); i++) { - ret[i]=ret[i]*input[1][i]; - } + for (size_t i = 0; i < input[1].size(); i++) + ret[i] = ret[i] * input[1][i]; return {ret}; } void Initialize(RModel& model) override { - //input must be a graph input, or already initialized intermediate tensor - if (model.CheckIfTensorAlreadyExist(fNInput) == false){ - throw std::runtime_error("TMVA SOFIE Tile Op Input Tensor is not found in model"); - } - if (model.CheckIfTensorAlreadyExist(fNRepeats) == false){ - throw std::runtime_error("TMVA SOFIE Tile Op Input Tensor is not found in model"); - } - fShapeInput=model.GetTensorShape(fNInput); + if (model.CheckIfTensorAlreadyExist(fNInput) == false) + throw std::runtime_error("TMVA SOFIE Tile Op Input Tensor is not found in model"); + if (model.CheckIfTensorAlreadyExist(fNRepeats) == false) + throw std::runtime_error("TMVA SOFIE Tile Op Repeats Tensor is not found in model"); + + fShapeInput = model.GetTensorShape(fNInput); - // if repeats vector is not initialized we cannot deduce shape of output - // not support for time being this case - if (!model.IsInitializedTensor(fNRepeats)) { + if (!model.IsInitializedTensor(fNRepeats)) throw std::runtime_error("TMVA SOFIE Tile Op: non-initialized repeats input is not supported"); - } - // Retrieve the data pointer for the repeats tensor - auto repptr = model.GetInitializedTensorData(fNRepeats); - // Cast the raw pointer to the appropriate type (size_t*) + auto repptr = model.GetInitializedTensorData(fNRepeats); auto repeats_data = static_cast(repptr.get()); - if (repeats_data == nullptr) { - throw std::runtime_error("Failed to retrieve the data for the repeats tensor."); - } - // Get the shape of the repeats tensor to determine the number of elements + if (repeats_data == nullptr) + throw std::runtime_error("TMVA SOFIE Tile Op: failed to retrieve repeats tensor data"); + auto repeats_shape = model.GetTensorShape(fNRepeats); - // Ensure the repeats tensor is 1D and get the number of elements - if (repeats_shape.size() != 1) { - throw std::runtime_error("Repeats tensor is not 1D."); - } + if (repeats_shape.size() != 1) + throw std::runtime_error("TMVA SOFIE Tile Op: repeats tensor must be 1D"); + size_t num_elements = repeats_shape[0]; - // Convert the data to a vector of size_t - std::vector repeats_vector(num_elements); - std::copy(repeats_data, repeats_data + num_elements, repeats_vector.begin()); + // Save repeats if known at generation time so the GPU kernel can bake + // fShapeInput[d] directly without needing a runtime repeats pointer. + // fRepeats is left empty if repeats are not initialized (future case), + // which will cause the kernel to use the runtime repeats pointer path. + fRepeats.resize(num_elements); + std::copy(repeats_data, repeats_data + num_elements, fRepeats.begin()); - fShapeY = ShapeInference({fShapeInput,repeats_vector})[0]; + fShapeY = ShapeInference({fShapeInput, fRepeats})[0]; model.AddIntermediateTensor(fNY, model.GetTensorType(fNInput), fShapeY); if (model.Verbose()) - std::cout << "Tile: " << fNInput << " " << ConvertShapeToString(fShapeInput) << " -> " << fNY << " with shape " << ConvertShapeToString(fShapeY) - << " given repeats " << ConvertShapeToString(repeats_vector) << std::endl; + std::cout << "Tile: " << fNInput << " " << ConvertShapeToString(fShapeInput) + << " -> " << fNY << " with shape " << ConvertShapeToString(fShapeY) + << " given repeats " << ConvertShapeToString(fRepeats) << std::endl; } std::string Generate(std::string OpName) override { OpName = "op_" + OpName; - if (fShapeInput.empty() || fShapeY.empty()) { - throw std::runtime_error("TMVA SOFIE Tile Op called to Generate without being initialized first"); - } - - //size_t input_length = ConvertShapeToLength(fShapeInput); - //size_t output_length = ConvertShapeToLength(fShapeY); - + if (fShapeInput.empty() || fShapeY.empty()) + throw std::runtime_error("TMVA SOFIE Tile Op called to Generate without being initialized first"); std::stringstream out; - std::string input = "tensor_" + fNInput; - std::string output = "tensor_" + fNY; + std::string input = "tensor_" + fNInput; + std::string output = "tensor_" + fNY; + std::string repeats = "tensor_" + fNRepeats; + out << "///-------- Tile operator\n"; - out << "{\n"; // add scope to re-use same names - out << "const int input_shape[" << fShapeInput.size() << "] = " << ConvertShapeToString(fShapeInput) << ";\n"; - - out << "int inputLength = " << ConvertShapeToLength(fShapeInput) << ";\n"; - out << "int s = 1;\n"; - // loop from inverse dim order - out << "for (int i = " << fShapeInput.size()-1 << "; i >=0; i--) {\n"; - out << SP << "int r = tensor_" << fNRepeats << "[i];\n"; - // we cannot exclude case where repeats=1 since we need offset - //out << SP << "if (r == 1 && i < " << fShapeInput.size()-1 << ") continue;\n"; - out << SP << "int i_offset = 0, o_offset = 0;\n"; - out << SP << "s = s * input_shape[i];\n"; - // case we have first copy - out << SP << "if (i == " << fShapeInput.size()-1 << ") {\n"; - out << SP << SP << "for (int j = 0; j < inputLength/s ; j++) {\n"; - out << SP << SP << SP << "for (int k = 0; k < r ; k++) {\n"; - out << SP << SP << SP << SP << "std::copy(" << input << "+ i_offset, " - << input << "+ i_offset + s, " << output << "+ o_offset);\n"; - out << SP << SP << SP << SP << "o_offset += s;\n"; - out << SP << SP << SP << "}\n"; // end k loop - out << SP << SP << SP << "i_offset += s;\n"; - out << SP << SP << "}\n"; // end j loop - out << SP << "} else {\n"; // second copy we do from output to output - // and we need to loop on j from reverse order to avoir re-writing in output tensor - out << SP << SP << "for (int j = inputLength/s - 1 ; j>=0; j--) {\n"; - out << SP << SP << SP << "o_offset = j*s*r;\n"; - out << SP << SP << SP << "i_offset = j*s;\n"; - out << SP << SP << SP << "for (int k = 0; k < r ; k++) {\n"; - out << SP << SP << SP << SP << "std::copy(" << output << "+ i_offset, " - << output << "+ i_offset + s, " << output << "+ o_offset);\n"; - out << SP << SP << SP << SP << "o_offset += s;\n"; - out << SP << SP << SP << "}\n"; // end k loop - out << SP << SP << "}\n"; // end j loop - out << SP << "}\n"; // end if - out << SP << "s *= r;\n"; - out << SP << "inputLength *= r;\n"; - out << "}\n"; // end i loop - out << "}\n"; // end of scope + out << "{\n"; + + out << SP << "const int input_shape[" << fShapeInput.size() << "] = {"; + for (size_t i = 0; i < fShapeInput.size(); ++i) { + if (i > 0) out << ", "; + out << fShapeInput[i]; + } + out << "};\n"; + + out << SP << "int inputLength = " << ConvertShapeToLength(fShapeInput) << ";\n"; + out << SP << "int s = 1;\n"; + + // Read repeats from the tensor at runtime so the generated code remains + // correct even if repeats become a runtime input/intermediate in the future + out << SP << "for (int i = " << fShapeInput.size() - 1 << "; i >= 0; i--) {\n"; + out << SP << SP << "int r = " << repeats << "[i];\n"; + out << SP << SP << "int i_offset = 0, o_offset = 0;\n"; + out << SP << SP << "s = s * input_shape[i];\n"; + out << SP << SP << "if (i == " << fShapeInput.size() - 1 << ") {\n"; + out << SP << SP << SP << "for (int j = 0; j < inputLength / s; j++) {\n"; + out << SP << SP << SP << SP << "for (int k = 0; k < r; k++) {\n"; + out << SP << SP << SP << SP << SP << "std::copy(" << input << " + i_offset, " + << input << " + i_offset + s, " + << output << " + o_offset);\n"; + out << SP << SP << SP << SP << SP << "o_offset += s;\n"; + out << SP << SP << SP << SP << "}\n"; + out << SP << SP << SP << SP << "i_offset += s;\n"; + out << SP << SP << SP << "}\n"; + out << SP << SP << "} else {\n"; + out << SP << SP << SP << "for (int j = inputLength / s - 1; j >= 0; j--) {\n"; + out << SP << SP << SP << SP << "o_offset = j * s * r;\n"; + out << SP << SP << SP << SP << "i_offset = j * s;\n"; + out << SP << SP << SP << SP << "for (int k = 0; k < r; k++) {\n"; + out << SP << SP << SP << SP << SP << "std::copy(" << output << " + i_offset, " + << output << " + i_offset + s, " + << output << " + o_offset);\n"; + out << SP << SP << SP << SP << SP << "o_offset += s;\n"; + out << SP << SP << SP << SP << "}\n"; + out << SP << SP << SP << "}\n"; + out << SP << SP << "}\n"; + out << SP << SP << "s *= r;\n"; + out << SP << SP << "inputLength *= r;\n"; + out << SP << "}\n"; + out << "}\n"; return out.str(); } - std::string Generate_GPU_Kernel_ALPAKA() { + std::string Generate_GPU_Kernel_ALPAKA(std::string opName) override { + opName = "op_" + opName; + if (fShapeInput.empty() || fShapeY.empty()) + throw std::runtime_error("TMVA SOFIE Operator Tile called to Generate without being initialized first"); + + const std::size_t D = fShapeInput.size(); + + auto inputStrides = UTILITY::ComputeStrideFromShape(fShapeInput); + auto outputStrides = UTILITY::ComputeStrideFromShape(fShapeY); + std::size_t totalElements = ConvertShapeToLength(fShapeY); + + // If fRepeats is populated, repeats were known at generation time and + // we can bake fShapeInput[d] as literals — no runtime repeats pointer needed. + // If fRepeats is empty (future: runtime repeats), pass repeats as a kernel arg. + bool repeatsKnown = !fRepeats.empty(); + + std::string kname = "TileKernel_" + opName; + std::string op; - op = "\n//------ TILE_KERNEL_ALPAKA\n"; - op += SP + "struct TileKernel {\n"; + op = "\n//------ TILE_KERNEL_ALPAKA\n"; + op += SP + "struct " + kname + " {\n"; op += SP + SP + "template\n"; - op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, T const * __restrict__ tensor_X,"; - op += SP + SP + SP + "T * __restrict__ tensor_Y, const int64_t * __restrict__ shape_X,"; - op += SP + SP + SP + "const int64_t * __restrict__ stride_X, const int64_t * __restrict__ shape_Y,"; - op += SP + SP + SP + "const int64_t * __restrict__ stride_Y, std::size_t const ndim) const {\n"; - op += SP + SP + SP + SP + "auto elements = alpaka::uniformElementsND(acc, alpaka::Vec(shape_Y));\n"; - op += SP + SP + SP + SP + "for (auto const& elem: elements) {\n"; - op += SP + SP + SP + SP + SP + "size_t input_idx = 0;\n"; - op += SP + SP + SP + SP + SP + "size_t output_idx = 0;\n"; - op += SP + SP + SP + SP + SP + "for (int i = 0; i < ndim; ++i) {\n"; - op += SP + SP + SP + SP + SP + SP + "size_t input_coord = elem[i] % shape_X[i];\n"; - op += SP + SP + SP + SP + SP + SP + "input_idx += input_coord * stride_X[i];\n"; - op += SP + SP + SP + SP + SP + "output_idx += elem[i] * stride_Y[i];\n}\n"; - op += SP + SP + SP + SP + SP + "tensor_Y[output_idx] = tensor_X[input_idx];\n"; - op += SP + SP + SP + SP + "}\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(\n"; + op += SP + SP + SP + "TAcc const& acc,\n"; + op += SP + SP + SP + "T const* __restrict__ input,\n"; + op += SP + SP + SP + "T* __restrict__ output,\n"; + if (!repeatsKnown) + op += SP + SP + SP + "int64_t const* __restrict__ repeats,\n"; + op += SP + SP + SP + "std::size_t const totalElements) const {\n\n"; + + op += SP + SP + SP + "auto const global_thread_idx = alpaka::getIdx(acc)[0];\n"; + op += SP + SP + SP + "if (global_thread_idx >= totalElements) return;\n"; + op += SP + SP + SP + "auto const grid_thread_extent = alpaka::getWorkDiv(acc)[0];\n\n"; + + op += SP + SP + SP + "for (std::size_t elem_idx = global_thread_idx; elem_idx < totalElements; elem_idx += grid_thread_extent) {\n\n"; + + // Decompose output linear index — output strides always compile-time + for (std::size_t d = 0; d < D; ++d) { + op += SP + SP + SP + SP + "std::size_t const out_" + std::to_string(d) + + " = (elem_idx / " + std::to_string(outputStrides[d]) + "u) % " + + std::to_string(fShapeY[d]) + "u;\n"; + } + op += "\n"; + + // Input index: fShapeInput[d] is always a compile-time constant since + // it is the input tensor shape, never runtime-variable. + // When repeatsKnown, we bake it directly as a literal. + // When not repeatsKnown (future), we still use fShapeInput[d] as a + // literal for the % — repeats pointer is only needed if fShapeY is dynamic. + op += SP + SP + SP + SP + "std::size_t const input_idx =\n"; + for (std::size_t d = 0; d < D; ++d) { + op += SP + SP + SP + SP + SP + + "(out_" + std::to_string(d) + " % " + std::to_string(fShapeInput[d]) + "u)" + + " * " + std::to_string(inputStrides[d]) + "u"; + op += (d + 1 < D) ? " +\n" : ";\n\n"; + } + + op += SP + SP + SP + SP + "output[elem_idx] = input[input_idx];\n"; + op += SP + SP + SP + "}\n"; op += SP + SP + "}\n"; op += SP + "};\n"; + return op; } - std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string /*opName*/) override { - return SP + "TileKernel tileKernel;\n"; + std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string opName) override { + opName = "op_" + opName; + std::string kname = "TileKernel_" + opName; + return SP + kname + " tileKernel_" + opName + ";\n"; } - std::string Generate_GPU_ALPAKA(std::string OpName) override { - OpName = "op_" + OpName; - if (fShapeInput.empty() || fShapeY.empty()) { + std::string Generate_GPU_ALPAKA(std::string opName) override { + opName = "op_" + opName; + if (fShapeInput.empty() || fShapeY.empty()) throw std::runtime_error("TMVA SOFIE Operator Tile called to Generate without being initialized first"); - } + + bool repeatsKnown = !fRepeats.empty(); + std::size_t totalElements = ConvertShapeToLength(fShapeY); + std::string kname = "tileKernel_" + opName; + + // Build argument list once, reused for both getValidWorkDiv and exec + std::string args = + "alpaka::getPtrNative(deviceBuf_" + fNInput + "), " + + "alpaka::getPtrNative(deviceBuf_" + fNY + ")"; + if (!repeatsKnown) + args += ", alpaka::getPtrNative(deviceBuf_" + fNRepeats + ")"; + args += ", static_cast(" + std::to_string(totalElements) + ")"; + std::stringstream out; - auto length = ConvertShapeToLength(fShapeY); out << "\n//------ TILE_GPU_ALPAKA\n"; - out << SP << "alpaka::WorkDivMembers workDiv_" << fNY - << "(alpaka::Vec::all((" << length << " + 256 - 1) / 256), " - << "alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; - - out << SP << "alpaka::exec(queue, workDiv_" << fNY - << ", tileKernel, alpaka::getPtrNative(deviceBuf_" << fNInput - << "), alpaka::getPtrNative(deviceBuf_" << fNY - << "), "<< ConvertShapeToString(fShapeInput)<<", "<< ConvertShapeToString(UTILITY::ComputeStrideFromShape(fShapeInput)) <<", " - <(1));\n"; + out << SP << "auto const elementsPerGrid_" << opName << " = Vec::all(Idx{" << totalElements << "});\n"; + out << SP << "alpaka::KernelCfg const kernelCfg_" << opName + << " = {elementsPerGrid_" << opName << ", elementsPerThread_" << opName << "};\n"; + out << SP << "auto const workDiv_" << opName << " = alpaka::getValidWorkDiv(kernelCfg_" << opName + << ", devAcc, " << kname << ", " << args << ");\n"; + out << SP << "alpaka::exec(queue, workDiv_" << opName + << ", " << kname << ", " << args << ");\n"; return out.str(); } @@ -195,5 +247,4 @@ public: }//SOFIE - -#endif //SOFIE_ROPERATOR_Tile +#endif //SOFIE_ROPERATOR_Tile \ No newline at end of file diff --git a/src/SOFIE_core/src/RModel_ALPAKA.cxx b/src/SOFIE_core/src/RModel_ALPAKA.cxx index ab64a2e..f35277a 100644 --- a/src/SOFIE_core/src/RModel_ALPAKA.cxx +++ b/src/SOFIE_core/src/RModel_ALPAKA.cxx @@ -40,13 +40,7 @@ void RModel::GenerateTemporaryInitializedTensorContainers_GPU_ALPAKA() fGC += "// temporary initialized tensors for loading weights\n"; for (auto &i : fInitializedTensors) { - if (!fUseWeightFile || i.second.IsConstantTensor()) { - if (i.second.type() == ETensorType::FLOAT) - fGC += GenerateConstantTensorCode(i); - else if (i.second.type() == ETensorType::INT64) - fGC += GenerateConstantTensorCode(i); - - } else { + if (fUseWeightFile && !i.second.IsConstantTensor()) { // case of tensors which are read from a file size_t length = ConvertShapeToLength(i.second.shape()); if (i.second.type() == ETensorType::FLOAT) { @@ -428,7 +422,7 @@ void RModel::GenerateGPU_ALPAKA(std::underlying_type_t options, int bat void RModel::MoveInitializedTensorsToBuffers_ALPAKA(){ for (auto &i : fInitializedTensors) { // skip Constant and shape tensors - if (!i.second.IsWeightTensor()) continue; + if (!i.second.IsWeightTensor() || i.second.IsConstantTensor() || !fUseWeightFile) continue; std::string tensor_name = "tensor_" + i.first; auto length = ConvertShapeToLength(i.second.shape()); std::string slength = std::to_string(length); @@ -437,11 +431,9 @@ void RModel::MoveInitializedTensorsToBuffers_ALPAKA(){ fGC += " alpaka::memcpy(queue, deviceBuf_"+i.first+", hostBuf_"+i.first+");\n"; } else if (i.second.type() == ETensorType::DOUBLE) { fGC += " auto hostBuf_"+i.first+" = alpaka::createView(hostAcc, tensor_"+i.first+");\n"; - fGC += " std::memcpy(alpaka::getPtrNative(hostBuf_"+i.first+"), tensor_"+i.first+".data(), "+slength+"* sizeof(double));\n"; fGC += " alpaka::memcpy(queue, deviceBuf_"+i.first+", hostBuf_"+i.first+");\n"; } else if (i.second.type() == ETensorType::INT64) { - fGC += " auto hostBuf_"+i.first+" = alpaka::createView(hostAcc, tensor_"+i.first+");\n"; - fGC += " std::memcpy(alpaka::getPtrNative(hostBuf_"+i.first+"), tensor_"+i.first+".data(), "+slength+"* sizeof(int64_t));\n"; + fGC += " auto hostBuf_"+i.first+" = alpaka::createView(hostAcc, tensor_"+i.first+", " + slength + ");\n"; fGC += " alpaka::memcpy(queue, deviceBuf_"+i.first+", hostBuf_"+i.first+");\n"; } else { std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be read from a ROOT file"); diff --git a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx index 724640e..9700b36 100644 --- a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx +++ b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx @@ -22,6 +22,9 @@ #include "Split_1_FromONNX_GPU_ALPAKA.hxx" #include "Split_2_FromONNX_GPU_ALPAKA.hxx" +#include "Tile5D_FromONNX_GPU_ALPAKA.hxx" +#include "input_models/references/Tile5D.ref.hxx" + #include #include #include @@ -508,4 +511,61 @@ TEST_F(SofieAlpakaTest, Split_2) EXPECT_LE(std::abs(res0_ptr[j] - correct_output[0][j]), TOLERANCE); for (size_t j = 0; j < correct_output[1].size(); ++j) EXPECT_LE(std::abs(res1_ptr[j] - correct_output[1][j]), TOLERANCE); +} + +TEST_F(SofieAlpakaTest, Tile5D) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector input_data({ + 0.2386120855808258, 0.5549510717391968, -1.8190287351608276, 0.5724563598632812, -0.6596977710723877, + 0.17560836672782898, 0.7608169317245483, 0.08603227883577347, -0.049375515431165695, 0.2705111503601074, + 1.42119562625885, 0.032626643776893616, -1.212586522102356, -0.5129594802856445, -0.43296414613723755, + -0.1606937050819397, 1.1884371042251587, -0.662174642086029, -2.291109323501587, -0.6852569580078125, + 2.325223922729492, -0.19389064610004425, -0.5784135460853577, -0.39328137040138245, 0.2831517457962036, + 0.4496127665042877, -0.2029038816690445, 0.35477763414382935, 0.4266718924045563, 0.24683749675750732, + 1.90426504611969, -0.4861580729484558, 0.9139055013656616, -0.5031066536903381, 0.9583520293235779, + -0.23210509121418, 1.3183971643447876, 1.7042455673217773, -0.3201166093349457, -0.14444805681705475, + -0.8829464912414551, 1.725736141204834, 0.45657631754875183, 0.4920198321342468, -1.088847041130066, + 0.49437597393989563, -0.006085286382585764, 2.475630760192871, 0.12170185893774033, -0.8953945636749268, + 1.1430096626281738, 1.3278610706329346, 0.3076854348182678, 0.036237504333257675, 0.05180325731635094, + 0.2802475392818451, 0.5289335250854492, 0.9356630444526672, 0.7863689064979553, 0.4239695370197296, + 0.8723016977310181, -0.2248474359512329, 0.3891502320766449, 0.5463842153549194, -0.7782878875732422, + -0.8570080399513245, -2.593783378601074, -0.11392943561077118, 0.5637082457542419, 2.075004816055298, + -1.0598397254943848, 1.0823975801467896 + }); + + const std::size_t inputSize = input_data.size(); + const std::size_t outputSize = sizeof(Tile5D_ExpectedOutput::output) / sizeof(float); + + // Allocate and fill host input buffer + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{inputSize})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < inputSize; ++i) + input_ptr[i] = input_data[i]; + + // Copy to device + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{inputSize})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + // Host result buffer + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{outputSize})); + + { + SOFIE_Tile5D::Session session; + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float* correct = Tile5D_ExpectedOutput::output; + + EXPECT_EQ(outputSize, sizeof(Tile5D_ExpectedOutput::output) / sizeof(float)); + for (size_t i = 0; i < outputSize; ++i) + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE); } \ No newline at end of file From 6dac663afc6da4dbc51b1d7f14598b7608debd9c Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Fri, 13 Mar 2026 16:30:22 +0100 Subject: [PATCH 28/43] feat: tests for heterogeneous inference of Gather operator --- src/SOFIE_core/inc/SOFIE/ROperator_Gather.hxx | 195 ++++++++++------ src/SOFIE_core/src/RModel_ALPAKA.cxx | 10 +- .../TestCustomModelsFromONNXForAlpakaCuda.cxx | 218 +++++++++++++++++- 3 files changed, 352 insertions(+), 71 deletions(-) diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Gather.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Gather.hxx index 4f685d7..7f10f8f 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Gather.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Gather.hxx @@ -212,72 +212,135 @@ public: return out.str(); } - std::string Generate_GPU_Kernel_ALPAKA() { - std::string op; - op = "\n//------ GATHER_KERNEL_ALPAKA\n"; - op += SP + "struct GatherKernel {\n"; - op += SP + SP + "template\n"; - op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, T const * input, T const * indices, T * output, std::size_t const * output_shape, std::size_t const axis, std::size_t const axisDim, std::size_t const indicesNumElements, std::size_t const * output_strides, std::size_t const * input_strides, std::size_t const ndim) const {\n"; - op += SP + SP + SP + SP + "auto elements = alpaka::uniformElementsND(acc, alpaka::Vec(output_shape));\n"; - op += SP + SP + SP + SP + "for (auto const& elem : elements) {\n"; - - // find flattened index for indices tensor - op += SP + SP + SP + SP + "int64_t idxLinear = 0;\n{\n"; - op += SP + SP + SP + SP + SP + "int64_t stride = 1;\n"; - op += SP + SP + SP + SP + SP + "for (int i = ndim - 1; i >= axis; --i) {;\n"; - op += SP + SP + SP + SP + SP + "stride *= (i > axis ? output_shape[i] : 1);\n}\n"; - op += SP + SP + SP + SP + SP + "idxLinear = elem[axis];\n"; - op += SP + SP + SP + SP + SP + "if (idxLinear >= indicesNumElements) idxLinear %= indicesNumElements;\n}\n"; - - // load gather index and wrap negative if any - op += SP + SP + SP + SP + "int64_t k = indices[idxLinear];\n"; - op += SP + SP + SP + SP + "if (k < 0) k += axisDim;\n"; - op += SP + SP + SP + SP + "if (k < 0) k = 0;\n"; - op += SP + SP + SP + SP + "if (k >= axisDim) k = axisDim - 1;\n"; - - // compute input flattened index - op += SP + SP + SP + SP + "size_t input_idx = 0;\n"; - op += SP + SP + SP + SP + "size_t output_idx = 0;\n"; - op += SP + SP + SP + SP + "for (int i = 0; i < ndim; ++i) {\n"; - op += SP + SP + SP + SP + SP + "size_t coord = elem[i];\n"; - op += SP + SP + SP + SP + SP + "output_idx += coord * output_strides[i];\n}\n"; - op += SP + SP + SP + SP + SP + "if (i == axis) coord = k;\n"; - op += SP + SP + SP + SP + SP + "input_idx += coord * input_strides[i];\n}\n"; - - // write to output tensor - op += SP + SP + SP + SP + "output[output_idx] = input[input_idx];\n"; - op += SP + SP + SP + SP + "}\n"; - op += SP + SP + "}\n"; - op += SP + "};\n"; - - return op; - } - - std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string /*opName*/) override { - return SP + "GatherKernel gatherKernel;\n"; - } - - std::string Generate_GPU_ALPAKA(std::string OpName) override { - OpName = "op_" + OpName; - std::stringstream out; - auto length = ConvertShapeToLength(fShapeY); - out << "\n//------ "< workDiv_" << fNY - << "(alpaka::Vec::all((" << length << " + 256 - 1) / 256), " - << "alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; - - out << SP << "alpaka::exec(queue, workDiv_" << fNY - << ", gatherKernel, alpaka::getPtrNative(deviceBuf_" << fNX - << "), alpaka::getPtrNative(deviceBuf_" << fNIndices - << "), alpaka::getPtrNative(deviceBuf_" << fNY - << "), "<< ConvertShapeToString(fShapeY) <<", "<< fAttrAxis <<", "<< fShapeX[fAttrAxis] <<", " - << fShapeIndices.size() <<", " - << ConvertShapeToString(UTILITY::ComputeStrideFromShape(fShapeY)) <<", " - << ConvertShapeToString(UTILITY::ComputeStrideFromShape(fShapeX)) <<", "<< fShapeY.size() - << ",static_cast(" << length << "));\n"; - - return out.str(); - } +std::string Generate_GPU_Kernel_ALPAKA(std::string opName) override { + if (fIsOutputConstant) return ""; + opName = "op_" + opName; + if (fShapeY.empty()) + throw std::runtime_error("TMVA SOFIE Gather Op called to Generate without being initialized first"); + + const std::size_t D = fShapeY.size(); // output rank = q + r - 1 + const std::size_t r = fShapeX.size(); + const std::size_t q = fShapeIndices.size(); + + auto stridesY = UTILITY::ComputeStrideFromShape(fShapeY); + auto stridesX = UTILITY::ComputeStrideFromShape(fShapeX); + auto stridesIndices = UTILITY::ComputeStrideFromShape(fShapeIndices); + + std::size_t totalElements = ConvertShapeToLength(fShapeY); + std::size_t indicesNumElements = ConvertShapeToLength(fShapeIndices); + + std::string kname = "GatherKernel_" + opName; + + std::string op; + op = "\n//------ GATHER_KERNEL_ALPAKA\n"; + op += SP + "struct " + kname + " {\n"; + op += SP + SP + "template\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(\n"; + op += SP + SP + SP + "TAcc const& acc,\n"; + op += SP + SP + SP + "T const* __restrict__ input,\n"; + op += SP + SP + SP + "int64_t const* __restrict__ indices,\n"; + op += SP + SP + SP + "T* __restrict__ output,\n"; + op += SP + SP + SP + "std::size_t const totalElements) const {\n\n"; + + op += SP + SP + SP + "auto const global_thread_idx = alpaka::getIdx(acc)[0];\n"; + op += SP + SP + SP + "if (global_thread_idx >= totalElements) return;\n"; + op += SP + SP + SP + "auto const grid_thread_extent = alpaka::getWorkDiv(acc)[0];\n\n"; + + op += SP + SP + SP + "for (std::size_t elem_idx = global_thread_idx; elem_idx < totalElements; elem_idx += grid_thread_extent) {\n\n"; + + // Decompose output linear index into per-dim coords using compile-time strides + for (std::size_t d = 0; d < D; ++d) { + op += SP + SP + SP + SP + "std::size_t const out_" + std::to_string(d) + + " = (elem_idx * " + std::to_string(1/stridesY[d]) + "u) % " + + std::to_string(fShapeY[d]) + "u;\n"; + } + op += "\n"; + + // Compute index into the indices tensor. + // Output dims [axis ... axis+q) correspond to the indices tensor dims [0 ... q) + // so i_index = sum over i in [0,q): out_{axis+i} * stridesIndices[i] + if (q == 0) { + op += SP + SP + SP + SP + "std::size_t const i_index = 0u;\n"; + } else { + op += SP + SP + SP + SP + "std::size_t const i_index =\n"; + for (std::size_t i = 0; i < q; ++i) { + op += SP + SP + SP + SP + SP + + "out_" + std::to_string(fAttrAxis + i) + + " * " + std::to_string(stridesIndices[i]) + "u"; + op += (i + 1 < q) ? " +\n" : ";\n"; + } + } + op += "\n"; + + op += SP + SP + SP + SP + "int64_t k = indices[i_index];\n"; + op += SP + SP + SP + SP + "if (k < 0) k += " + std::to_string(fShapeX[fAttrAxis]) + ";\n"; + op += SP + SP + SP + SP + "if (k < 0) k = 0;\n"; + op += SP + SP + SP + SP + "if (k >= static_cast(" + std::to_string(fShapeX[fAttrAxis]) + ")) " + + "k = static_cast(" + std::to_string(fShapeX[fAttrAxis]) + ") - 1;\n\n"; + + // Compute input index: + // x_index = k * stridesX[axis] + // + sum over j in [0, axis): out_j * stridesX[j] + // + sum over j in [axis+1, r): out_{j-1+q} * stridesX[j] + // (the dims after axis in Y are shifted by q-1 relative to X) + op += SP + SP + SP + SP + "std::size_t const input_idx =\n"; + op += SP + SP + SP + SP + SP + "static_cast(k) * " + std::to_string(stridesX[fAttrAxis]) + "u"; + for (std::size_t j = 0; j < static_cast(fAttrAxis); ++j) { + op += " +\n" + SP + SP + SP + SP + SP + + "out_" + std::to_string(j) + " * " + std::to_string(stridesX[j]) + "u"; + } + for (std::size_t j = fAttrAxis + 1; j < r; ++j) { + // in Y, the coord for X's dim j lives at output dim q + j - 1 + op += " +\n" + SP + SP + SP + SP + SP + + "out_" + std::to_string(q + j - 1) + " * " + std::to_string(stridesX[j]) + "u"; + } + op += ";\n\n"; + + op += SP + SP + SP + SP + "output[elem_idx] = input[input_idx];\n"; + op += SP + SP + SP + "}\n"; + op += SP + SP + "}\n"; + op += SP + "};\n"; + + return op; +} + +std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string opName) override { + if (fIsOutputConstant) return ""; + opName = "op_" + opName; + std::string kname = "GatherKernel_" + opName; + return SP + kname + " gatherKernel_" + opName + ";\n"; +} + +std::string Generate_GPU_ALPAKA(std::string opName) override { + if (fIsOutputConstant) return ""; + opName = "op_" + opName; + if (fShapeY.empty()) + throw std::runtime_error("TMVA SOFIE Gather Op called to Generate without being initialized first"); + + std::size_t totalElements = ConvertShapeToLength(fShapeY); + std::string kname = "gatherKernel_" + opName; + + std::stringstream out; + out << "\n//------ GATHER_GPU_ALPAKA\n"; + out << SP << "auto const elementsPerThread_" << opName << " = Vec::all(static_cast(1));\n"; + out << SP << "auto const elementsPerGrid_" << opName << " = Vec::all(Idx{" << totalElements << "});\n"; + out << SP << "alpaka::KernelCfg const kernelCfg_" << opName + << " = {elementsPerGrid_" << opName << ", elementsPerThread_" << opName << "};\n"; + out << SP << "auto const workDiv_" << opName << " = alpaka::getValidWorkDiv(kernelCfg_" << opName + << ", devAcc, " << kname + << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNIndices << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" + << ", static_cast(" << totalElements << "));\n"; + out << SP << "alpaka::exec(queue, workDiv_" << opName + << ", " << kname + << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNIndices << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" + << ", static_cast(" << totalElements << "));\n"; + + return out.str(); +} }; diff --git a/src/SOFIE_core/src/RModel_ALPAKA.cxx b/src/SOFIE_core/src/RModel_ALPAKA.cxx index f35277a..6c2d293 100644 --- a/src/SOFIE_core/src/RModel_ALPAKA.cxx +++ b/src/SOFIE_core/src/RModel_ALPAKA.cxx @@ -22,15 +22,19 @@ void RModel::GenerateInitializedTensorInfo_GPU_ALPAKA() { else if (i.second.type() == ETensorType::INT64) fGC += GenerateConstantTensorCode(i); - } else { + } // case of tensors which are read from a file size_t length = ConvertShapeToLength(i.second.shape()); if (i.second.type() == ETensorType::FLOAT) { fGC += "BufF1D deviceBuf_" + i.first + " = alpaka::allocBuf(devAcc, Ext1D::all(Idx{" + std::to_string(length) + "}));\n"; + } else if (i.second.type() == ETensorType::INT64) { + fGC += "BufI641D deviceBuf_" + i.first + + " = alpaka::allocBuf(devAcc, Ext1D::all(Idx{" + + std::to_string(length) + "}));\n"; } - } + } } @@ -421,8 +425,6 @@ void RModel::GenerateGPU_ALPAKA(std::underlying_type_t options, int bat void RModel::MoveInitializedTensorsToBuffers_ALPAKA(){ for (auto &i : fInitializedTensors) { - // skip Constant and shape tensors - if (!i.second.IsWeightTensor() || i.second.IsConstantTensor() || !fUseWeightFile) continue; std::string tensor_name = "tensor_" + i.first; auto length = ConvertShapeToLength(i.second.shape()); std::string slength = std::to_string(length); diff --git a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx index 9700b36..d10f9ff 100644 --- a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx +++ b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx @@ -25,6 +25,19 @@ #include "Tile5D_FromONNX_GPU_ALPAKA.hxx" #include "input_models/references/Tile5D.ref.hxx" +#include "GatherAxis0_FromONNX_GPU_ALPAKA.hxx" +#include "GatherAxis1_FromONNX_GPU_ALPAKA.hxx" +#include "GatherAxis2_FromONNX_GPU_ALPAKA.hxx" +#include "GatherAxis3_FromONNX_GPU_ALPAKA.hxx" +#include "Gather2d_FromONNX_GPU_ALPAKA.hxx" +#include "GatherNegativeIndices_FromONNX_GPU_ALPAKA.hxx" +#include "input_models/references/GatherAxis0.ref.hxx" +#include "input_models/references/GatherAxis1.ref.hxx" +#include "input_models/references/GatherAxis2.ref.hxx" +#include "input_models/references/GatherAxis3.ref.hxx" +#include "input_models/references/Gather2d.ref.hxx" +#include "input_models/references/GatherNegativeIndices.ref.hxx" + #include #include #include @@ -555,7 +568,6 @@ TEST_F(SofieAlpakaTest, Tile5D) { SOFIE_Tile5D::Session session; auto result = session.infer(input_d); - alpaka::wait(queue); cudaDeviceSynchronize(); alpaka::memcpy(queue, result_h, result); @@ -568,4 +580,208 @@ TEST_F(SofieAlpakaTest, Tile5D) EXPECT_EQ(outputSize, sizeof(Tile5D_ExpectedOutput::output) / sizeof(float)); for (size_t i = 0; i < outputSize; ++i) EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE); +} + +TEST_F(SofieAlpakaTest, GatherAxis0) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + constexpr Idx inputSize = 120; + const std::size_t outputSize = sizeof(GatherAxis0_ExpectedOutput::output) / sizeof(float); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{inputSize})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + std::iota(input_ptr, input_ptr + inputSize, 0.f); + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{inputSize})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{outputSize})); + + { + SOFIE_GatherAxis0::Session session("GatherAxis0_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float* correct = GatherAxis0_ExpectedOutput::output; + EXPECT_EQ(outputSize, sizeof(GatherAxis0_ExpectedOutput::output) / sizeof(float)); + for (size_t i = 0; i < outputSize; ++i) + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE); +} + +TEST_F(SofieAlpakaTest, GatherAxis1) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + constexpr Idx inputSize = 120; + const std::size_t outputSize = sizeof(GatherAxis1_ExpectedOutput::output) / sizeof(float); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{inputSize})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + std::iota(input_ptr, input_ptr + inputSize, 0.f); + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{inputSize})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{outputSize})); + + { + SOFIE_GatherAxis1::Session session("GatherAxis1_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float* correct = GatherAxis1_ExpectedOutput::output; + EXPECT_EQ(outputSize, sizeof(GatherAxis1_ExpectedOutput::output) / sizeof(float)); + for (size_t i = 0; i < outputSize; ++i) + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE); +} + +TEST_F(SofieAlpakaTest, GatherAxis2) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + constexpr Idx inputSize = 120; + const std::size_t outputSize = sizeof(GatherAxis2_ExpectedOutput::output) / sizeof(float); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{inputSize})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + std::iota(input_ptr, input_ptr + inputSize, 0.f); + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{inputSize})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{outputSize})); + + { + SOFIE_GatherAxis2::Session session("GatherAxis2_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float* correct = GatherAxis2_ExpectedOutput::output; + EXPECT_EQ(outputSize, sizeof(GatherAxis2_ExpectedOutput::output) / sizeof(float)); + for (size_t i = 0; i < outputSize; ++i) + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE); +} + +TEST_F(SofieAlpakaTest, GatherAxis3) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + constexpr Idx inputSize = 120; + const std::size_t outputSize = sizeof(GatherAxis3_ExpectedOutput::output) / sizeof(float); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{inputSize})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + std::iota(input_ptr, input_ptr + inputSize, 0.f); + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{inputSize})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{outputSize})); + + { + SOFIE_GatherAxis3::Session session("GatherAxis3_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float* correct = GatherAxis3_ExpectedOutput::output; + EXPECT_EQ(outputSize, sizeof(GatherAxis3_ExpectedOutput::output) / sizeof(float)); + for (size_t i = 0; i < outputSize; ++i) + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE); +} + +TEST_F(SofieAlpakaTest, Gather2d) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + constexpr Idx inputSize = 9; + const std::size_t outputSize = sizeof(Gather2d_ExpectedOutput::output) / sizeof(float); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{inputSize})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + std::iota(input_ptr, input_ptr + inputSize, 0.f); + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{inputSize})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{outputSize})); + + { + SOFIE_Gather2d::Session session("Gather2d_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float* correct = Gather2d_ExpectedOutput::output; + EXPECT_EQ(outputSize, sizeof(Gather2d_ExpectedOutput::output) / sizeof(float)); + for (size_t i = 0; i < outputSize; ++i) + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE); +} + +TEST_F(SofieAlpakaTest, GatherNegativeIndices) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + constexpr Idx inputSize = 10; + const std::size_t outputSize = sizeof(GatherNegativeIndices_ExpectedOutput::output) / sizeof(float); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{inputSize})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + std::iota(input_ptr, input_ptr + inputSize, 0.f); + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{inputSize})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{outputSize})); + + { + SOFIE_GatherNegativeIndices::Session session("GatherNegativeIndices_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float* correct = GatherNegativeIndices_ExpectedOutput::output; + EXPECT_EQ(outputSize, sizeof(GatherNegativeIndices_ExpectedOutput::output) / sizeof(float)); + for (size_t i = 0; i < outputSize; ++i) + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE); } \ No newline at end of file From 0d205e93088eadbf83e55579e7fff9b75b3e6c56 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Mon, 16 Mar 2026 10:26:15 +0100 Subject: [PATCH 29/43] feat: tests for heterogeneous inference of Expand operator --- src/SOFIE_core/inc/SOFIE/ROperator_Expand.hxx | 156 ++++++++++++------ src/SOFIE_core/inc/SOFIE/ROperator_Gather.hxx | 5 +- .../TestCustomModelsFromONNXForAlpakaCuda.cxx | 78 ++++++++- 3 files changed, 179 insertions(+), 60 deletions(-) diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Expand.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Expand.hxx index bf163b7..dbb75c1 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Expand.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Expand.hxx @@ -122,62 +122,108 @@ public: return out.str(); } - std::string Generate_GPU_Kernel_ALPAKA() { - std::string op; - op = "\n//------ Expand_KERNEL_ALPAKA\n"; - op += SP + "struct ExpandKernel {\n"; - op += SP + SP + "template\n"; - op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, T const * input, T * output, const size_t * input_shape, const size_t * output_shape, const size_t * input_strides, const size_t * output_strides, const size_t ndim){\n"; - op += SP + SP + SP + SP + "size_t input_idx = 0;\n"; - op += SP + SP + SP + SP + "size_t output_idx = 0;\n"; - op += SP + SP + SP + SP + "size_t coord_out;\n"; - op += SP + SP + SP + SP + "size_t coord_in;\n"; - op += SP + SP + SP + SP + "auto elements = alpaka::uniformElementsND(acc, alpaka::Vec(output_shape));\n"; - op += SP + SP + SP + SP + "for (auto const& elem : elements) {\n"; - op += SP + SP + SP + SP + "input_idx = 0;\n"; - op += SP + SP + SP + SP + "output_idx = 0;\n"; - op += SP + SP + SP + SP + "for (int i = 0; i < ndim; ++i) {\n"; - op += SP + SP + SP + SP + SP + "coord_out = elem[i];\n"; - op += SP + SP + SP + SP + SP + "coord_in = (input_shape[i] == 1) ? 0 : coord_out;\n"; - op += SP + SP + SP + SP + SP + "input_idx += coord_in * input_strides[i];\n}\n"; - op += SP + SP + SP + SP + SP + "output_idx += coord_out * output_strides[i];\n}\n"; - op += SP + SP + SP + SP + SP + "output[output_idx] = input[input_idx];\n"; - op += SP + SP + SP + SP + "}\n"; - op += SP + SP + "}\n"; - op += SP + "};\n"; - return op; - } - - std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string /*opName*/) override { - return SP + "ExpandKernel expandKernel;\n"; - } - - std::string Generate_GPU_ALPAKA(std::string OpName) override { - OpName = "op_" + OpName; - if (fShape.empty()) { - throw std::runtime_error("TMVA SOFIE Operator Expand called to Generate without being initialized first"); - } - - std::stringstream out; - auto length = ConvertShapeToLength(fShape); - out << "\n//------ EXPAND_GPU_ALPAKA\n"; - out << SP << "alpaka::WorkDivMembers workDiv_" << fNX - << "(alpaka::Vec::all((" << length << " + 256 - 1) / 256), " - << "alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; - - out << SP << "alpaka::exec(queue, workDiv_" << fNX - << ", expandKernel, alpaka::getPtrNative(deviceBuf_" << fNX - << "), alpaka::getPtrNative(deviceBuf_" << fNY - << "), "<< ConvertShapeToString(fShapeX) <<", "< shapeX_padded(D, 1); + size_t offset = D - fShapeX.size(); + for (size_t i = 0; i < fShapeX.size(); ++i) + shapeX_padded[offset + i] = fShapeX[i]; + + auto stridesX = UTILITY::ComputeStrideFromShape(shapeX_padded); + auto stridesY = UTILITY::ComputeStrideFromShape(fShapeY); + + std::size_t totalElements = ConvertShapeToLength(fShapeY); + + std::string kname = "ExpandKernel_" + opName; + + std::string op; + op = "\n//------ EXPAND_KERNEL_ALPAKA\n"; + op += SP + "struct " + kname + " {\n"; + op += SP + SP + "template\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(\n"; + op += SP + SP + SP + "TAcc const& acc,\n"; + op += SP + SP + SP + "T const* __restrict__ input,\n"; + op += SP + SP + SP + "T* __restrict__ output,\n"; + op += SP + SP + SP + "std::size_t const totalElements) const {\n\n"; + + op += SP + SP + SP + "auto const global_thread_idx = alpaka::getIdx(acc)[0];\n"; + op += SP + SP + SP + "if (global_thread_idx >= totalElements) return;\n"; + op += SP + SP + SP + "auto const grid_thread_extent = alpaka::getWorkDiv(acc)[0];\n\n"; + + op += SP + SP + SP + "for (std::size_t elem_idx = global_thread_idx; elem_idx < totalElements; elem_idx += grid_thread_extent) {\n\n"; + + for (std::size_t d = 0; d < D; ++d) { + op += SP + SP + SP + SP + "std::size_t const out_" + std::to_string(d) + + " = (elem_idx / " + std::to_string(stridesY[d]) + "u) % " + + std::to_string(fShapeY[d]) + "u;\n"; + } + op += "\n"; + + op += SP + SP + SP + SP + "std::size_t const input_idx =\n"; + for (std::size_t d = 0; d < D; ++d) { + if (shapeX_padded[d] == 1) { + op += SP + SP + SP + SP + SP + "0u"; + } else { + op += SP + SP + SP + SP + SP + + "out_" + std::to_string(d) + + " * " + std::to_string(stridesX[d]) + "u"; + } + op += (d + 1 < D) ? " +\n" : ";\n\n"; + } + + op += SP + SP + SP + SP + "output[elem_idx] = input[input_idx];\n"; + op += SP + SP + SP + "}\n"; + op += SP + SP + "}\n"; + op += SP + "};\n"; + + return op; +} + +std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string opName) override { + if (fIsOutputConstant) return ""; + opName = "op_" + opName; + std::string kname = "ExpandKernel_" + opName; + return SP + kname + " expandKernel_" + opName + ";\n"; +} + +std::string Generate_GPU_ALPAKA(std::string opName) override { + if (fIsOutputConstant) return ""; + opName = "op_" + opName; + if (fShapeY.empty()) + throw std::runtime_error("TMVA SOFIE Operator Expand called to Generate without being initialized first"); + + if (fInitialized || fShapeX == fShapeY) + return ""; + + std::size_t totalElements = ConvertShapeToLength(fShapeY); + std::string kname = "expandKernel_" + opName; + + std::stringstream out; + out << "\n//------ EXPAND_GPU_ALPAKA\n"; + out << SP << "auto const elementsPerThread_" << opName << " = Vec::all(static_cast(1));\n"; + out << SP << "auto const elementsPerGrid_" << opName << " = Vec::all(Idx{" << totalElements << "});\n"; + out << SP << "alpaka::KernelCfg const kernelCfg_" << opName + << " = {elementsPerGrid_" << opName << ", elementsPerThread_" << opName << "};\n"; + out << SP << "auto const workDiv_" << opName << " = alpaka::getValidWorkDiv(kernelCfg_" << opName + << ", devAcc, " << kname + << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" + << ", static_cast(" << totalElements << "));\n"; + out << SP << "alpaka::exec(queue, workDiv_" << opName + << ", " << kname + << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" + << ", static_cast(" << totalElements << "));\n"; + + return out.str(); +} }; - }//SOFIE #endif //SOFIE_ROperator_Expand diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Gather.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Gather.hxx index 7f10f8f..06fe5a9 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Gather.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Gather.hxx @@ -248,15 +248,13 @@ std::string Generate_GPU_Kernel_ALPAKA(std::string opName) override { op += SP + SP + SP + "for (std::size_t elem_idx = global_thread_idx; elem_idx < totalElements; elem_idx += grid_thread_extent) {\n\n"; - // Decompose output linear index into per-dim coords using compile-time strides for (std::size_t d = 0; d < D; ++d) { op += SP + SP + SP + SP + "std::size_t const out_" + std::to_string(d) - + " = (elem_idx * " + std::to_string(1/stridesY[d]) + "u) % " + + " = (elem_idx / " + std::to_string(stridesY[d]) + "u) % " + std::to_string(fShapeY[d]) + "u;\n"; } op += "\n"; - // Compute index into the indices tensor. // Output dims [axis ... axis+q) correspond to the indices tensor dims [0 ... q) // so i_index = sum over i in [0,q): out_{axis+i} * stridesIndices[i] if (q == 0) { @@ -278,7 +276,6 @@ std::string Generate_GPU_Kernel_ALPAKA(std::string opName) override { op += SP + SP + SP + SP + "if (k >= static_cast(" + std::to_string(fShapeX[fAttrAxis]) + ")) " + "k = static_cast(" + std::to_string(fShapeX[fAttrAxis]) + ") - 1;\n\n"; - // Compute input index: // x_index = k * stridesX[axis] // + sum over j in [0, axis): out_j * stridesX[j] // + sum over j in [axis+1, r): out_{j-1+q} * stridesX[j] diff --git a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx index d10f9ff..ae7f745 100644 --- a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx +++ b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx @@ -38,6 +38,12 @@ #include "input_models/references/Gather2d.ref.hxx" #include "input_models/references/GatherNegativeIndices.ref.hxx" +#include "ExpandSameSize_FromONNX_GPU_ALPAKA.hxx" +#include "input_models/references/ExpandSameSize.ref.hxx" + +#include "ExpandDiffSize_FromONNX_GPU_ALPAKA.hxx" +#include "input_models/references/ExpandDiffSize.ref.hxx" + #include #include #include @@ -784,4 +790,74 @@ TEST_F(SofieAlpakaTest, GatherNegativeIndices) EXPECT_EQ(outputSize, sizeof(GatherNegativeIndices_ExpectedOutput::output) / sizeof(float)); for (size_t i = 0; i < outputSize; ++i) EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE); -} \ No newline at end of file +} + +TEST_F(SofieAlpakaTest, ExpandSameSize) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector input({0.f, 1.f, 2.f}); + const std::size_t outputSize = sizeof(ExpandSameSize_ExpectedOutput::output) / sizeof(float); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) + input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{outputSize})); + + { + SOFIE_ExpandSameSize::Session session("ExpandSameSize_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float* correct = ExpandSameSize_ExpectedOutput::output; + EXPECT_EQ(outputSize, sizeof(ExpandSameSize_ExpectedOutput::output) / sizeof(float)); + for (size_t i = 0; i < outputSize; ++i) + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE); +} + +TEST_F(SofieAlpakaTest, ExpandDiffSize) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector input({0.f, 1.f, 2.f}); + const std::size_t outputSize = sizeof(ExpandDiffSize_ExpectedOutput::output) / sizeof(float); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) + input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{outputSize})); + + { + SOFIE_ExpandDiffSize::Session session("ExpandDiffSize_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float* correct = ExpandDiffSize_ExpectedOutput::output; + EXPECT_EQ(outputSize, sizeof(ExpandDiffSize_ExpectedOutput::output) / sizeof(float)); + for (size_t i = 0; i < outputSize; ++i) + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE); +} From 1f0ebc6b2f7b449d97ff85e3e24e79ac695b514d Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Mon, 16 Mar 2026 10:51:15 +0100 Subject: [PATCH 30/43] feat: Support for heterogeneous inference on gathernd operator --- .../inc/SOFIE/ROperator_GatherND.hxx | 304 ++++++++++++++++++ .../inc/SOFIE/ROperator_Reshape.hxx | 55 ++-- .../TestCustomModelsFromONNXForAlpakaCuda.cxx | 233 ++++++++++++++ .../test/input_models/GatherND_Batch.onnx | Bin 0 -> 186 bytes .../test/input_models/GatherND_Ex1.onnx | Bin 0 -> 176 bytes .../test/input_models/GatherND_Ex2.onnx | Bin 0 -> 164 bytes .../test/input_models/GatherND_Ex3.onnx | Bin 0 -> 184 bytes .../test/input_models/GatherND_Ex4.onnx | Bin 0 -> 190 bytes .../test/input_models/GatherND_Ex5.onnx | Bin 0 -> 168 bytes .../GatherND_NegativeIndices.onnx | Bin 0 -> 204 bytes src/SOFIE_parsers/CMakeLists.txt | 1 + src/SOFIE_parsers/src/ParseGatherND.cxx | 49 +++ src/SOFIE_parsers/src/RModelParser_ONNX.cxx | 2 + 13 files changed, 618 insertions(+), 26 deletions(-) create mode 100644 src/SOFIE_core/inc/SOFIE/ROperator_GatherND.hxx create mode 100644 src/SOFIE_core/test/input_models/GatherND_Batch.onnx create mode 100644 src/SOFIE_core/test/input_models/GatherND_Ex1.onnx create mode 100644 src/SOFIE_core/test/input_models/GatherND_Ex2.onnx create mode 100644 src/SOFIE_core/test/input_models/GatherND_Ex3.onnx create mode 100644 src/SOFIE_core/test/input_models/GatherND_Ex4.onnx create mode 100644 src/SOFIE_core/test/input_models/GatherND_Ex5.onnx create mode 100644 src/SOFIE_core/test/input_models/GatherND_NegativeIndices.onnx create mode 100644 src/SOFIE_parsers/src/ParseGatherND.cxx diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_GatherND.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_GatherND.hxx new file mode 100644 index 0000000..e147e30 --- /dev/null +++ b/src/SOFIE_core/inc/SOFIE/ROperator_GatherND.hxx @@ -0,0 +1,304 @@ +#ifndef SOFIE_ROPERATOR_GATHERND +#define SOFIE_ROPERATOR_GATHERND + +#include "SOFIE/SOFIE_common.hxx" +#include "SOFIE/ROperator.hxx" +#include "SOFIE/RModel.hxx" + +#include +#include +#include +#include + +namespace SOFIE { + +class ROperator_GatherND final : public ROperator +{ +private: + + int64_t fBatchDims = 0; + + std::string fNData; + std::string fNIndices; + std::string fNY; + + std::vector fShapeData; + std::vector fShapeIndices; + std::vector fShapeY; + + std::string fType; + +public: + ROperator_GatherND() {} + ROperator_GatherND(int64_t batchDims, + std::string nameData, + std::string nameIndices, + std::string nameY) + : fBatchDims(batchDims), + fNData(UTILITY::Clean_name(nameData)), + fNIndices(UTILITY::Clean_name(nameIndices)), + fNY(UTILITY::Clean_name(nameY)) + { + fInputTensorNames = { fNData, fNIndices }; + fOutputTensorNames = { fNY }; + } + + std::vector TypeInference(std::vector input) override { + return { input[0] }; + } + + std::vector> ShapeInference(std::vector> input) override { + return { input[0] }; + } + + void Initialize(RModel& model) override { + if (!model.CheckIfTensorAlreadyExist(fNData)) + throw std::runtime_error("TMVA SOFIE GatherND: data tensor " + fNData + " not found in model"); + if (!model.CheckIfTensorAlreadyExist(fNIndices)) + throw std::runtime_error("TMVA SOFIE GatherND: indices tensor " + fNIndices + " not found in model"); + + fShapeData = model.GetTensorShape(fNData); + fShapeIndices = model.GetTensorShape(fNIndices); + + size_t r = fShapeData.size(); + size_t q = fShapeIndices.size(); + size_t b = static_cast(fBatchDims); + size_t last_idx_dim = fShapeIndices.back(); + + if (r < 1) + throw std::runtime_error("TMVA SOFIE GatherND: data rank must be >= 1"); + if (q < 1) + throw std::runtime_error("TMVA SOFIE GatherND: indices rank must be >= 1"); + if (b >= std::min(q, r)) + throw std::runtime_error("TMVA SOFIE GatherND: batch_dims must be < min(q, r)"); + if (last_idx_dim > r - b) + throw std::runtime_error("TMVA SOFIE GatherND: indices_shape[-1] must be <= r - batch_dims"); + + for (size_t i = 0; i < b; ++i) { + if (fShapeData[i] != fShapeIndices[i]) + throw std::runtime_error("TMVA SOFIE GatherND: first batch_dims dimensions of data and indices must match"); + } + + // Output shape: batch_dims + indices[0..q-2] + data[b + last_idx_dim .. r-1] + // rank = b + (q - b - 1) + (r - b - last_idx_dim) + // = q + r - last_idx_dim - 1 - b + fShapeY.clear(); + for (size_t i = 0; i < b; ++i) + fShapeY.push_back(fShapeData[i]); + for (size_t i = b; i + 1 < q; ++i) + fShapeY.push_back(fShapeIndices[i]); + for (size_t i = b + last_idx_dim; i < r; ++i) + fShapeY.push_back(fShapeData[i]); + + model.AddIntermediateTensor(fNY, model.GetTensorType(fNData), fShapeY); + fType = ConvertTypeToString(model.GetTensorType(fNData)); + + if (model.Verbose()) + std::cout << "GatherND: data " << ConvertShapeToString(fShapeData) + << " indices " << ConvertShapeToString(fShapeIndices) + << " batch_dims=" << fBatchDims + << " -> " << fNY << " " << ConvertShapeToString(fShapeY) << std::endl; + } + + std::string Generate(std::string opName) override { + opName = "op_" + opName; + if (fShapeY.empty()) + throw std::runtime_error("TMVA SOFIE GatherND called to Generate without being initialized first"); + + size_t r = fShapeData.size(); + size_t q = fShapeIndices.size(); + size_t b = static_cast(fBatchDims); + size_t last_idx_dim = fShapeIndices.back(); + + auto stridesData = UTILITY::ComputeStrideFromShape(fShapeData); + auto stridesIndices = UTILITY::ComputeStrideFromShape(fShapeIndices); + auto stridesY = UTILITY::ComputeStrideFromShape(fShapeY); + + size_t totalOutput = ConvertShapeToLength(fShapeY); + + std::stringstream out; + out << SP << "//--------- GatherND operator " << opName << "\n"; + + out << SP << "for (size_t out_idx = 0; out_idx < " << totalOutput << "; out_idx++) {\n"; + + out << SP << SP << "size_t rem = out_idx;\n"; + size_t Dy = fShapeY.size(); + for (size_t d = 0; d < Dy; ++d) { + out << SP << SP << "size_t oy_" << d << " = rem / " << stridesY[d] << ";\n"; + out << SP << SP << "rem %= " << stridesY[d] << ";\n"; + } + + out << SP << SP << "size_t idx_base = 0;\n"; + for (size_t i = 0; i < b; ++i) + out << SP << SP << "idx_base += oy_" << i << " * " << stridesIndices[i] << ";\n"; + for (size_t i = b; i + 1 < q; ++i) + out << SP << SP << "idx_base += oy_" << i << " * " << stridesIndices[i] << ";\n"; + + out << SP << SP << "size_t data_idx = 0;\n"; + for (size_t i = 0; i < b; ++i) + out << SP << SP << "data_idx += oy_" << i << " * " << stridesData[i] << ";\n"; + + out << SP << SP << "for (size_t k = 0; k < " << last_idx_dim << "; k++) {\n"; + out << SP << SP << SP << "int64_t idx_val = tensor_" << fNIndices + << "[idx_base + k * " << stridesIndices[q - 1] << "];\n"; + out << SP << SP << SP << "if (idx_val < 0) idx_val += " << "static_cast(tensor_" + << fNData << "_shape[" << b << " + k]);\n"; + out << SP << SP << SP << "data_idx += static_cast(idx_val) * " << "data_stride_b_plus_k_" << opName << "[k];\n"; + out << SP << SP << "}\n"; + + // Accumulate trailing data dims from output coords + // Y dims [b + (q-b-1) .. ] correspond to data dims [b + last_idx_dim .. r-1] + size_t y_trailing_start = b + (q - b - 1); + for (size_t i = b + last_idx_dim; i < r; ++i) { + size_t oy_dim = y_trailing_start + (i - (b + last_idx_dim)); + out << SP << SP << "data_idx += oy_" << oy_dim << " * " << stridesData[i] << ";\n"; + } + + out << SP << SP << "tensor_" << fNY << "[out_idx] = tensor_" << fNData << "[data_idx];\n"; + out << SP << "}\n"; + + return out.str(); + } + + std::string Generate_GPU_Kernel_ALPAKA(std::string opName) override { + opName = "op_" + opName; + if (fShapeY.empty()) + throw std::runtime_error("TMVA SOFIE GatherND called to Generate without being initialized first"); + + size_t r = fShapeData.size(); + size_t q = fShapeIndices.size(); + size_t b = static_cast(fBatchDims); + size_t last_idx_dim = fShapeIndices.back(); + + auto stridesData = UTILITY::ComputeStrideFromShape(fShapeData); + auto stridesIndices = UTILITY::ComputeStrideFromShape(fShapeIndices); + auto stridesY = UTILITY::ComputeStrideFromShape(fShapeY); + + size_t Dy = fShapeY.size(); + size_t totalOutput = ConvertShapeToLength(fShapeY); + + std::string kname = "GatherNDKernel_" + opName; + + std::string op; + op = "\n//------ GATHERND_KERNEL_ALPAKA\n"; + op += SP + "struct " + kname + " {\n"; + op += SP + SP + "template\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(\n"; + op += SP + SP + SP + "TAcc const& acc,\n"; + op += SP + SP + SP + "T const* __restrict__ data,\n"; + op += SP + SP + SP + "int64_t const* __restrict__ indices,\n"; + op += SP + SP + SP + "T* __restrict__ output,\n"; + op += SP + SP + SP + "std::size_t const totalElements) const {\n\n"; + + op += SP + SP + SP + "auto const global_thread_idx = alpaka::getIdx(acc)[0];\n"; + op += SP + SP + SP + "if (global_thread_idx >= totalElements) return;\n"; + op += SP + SP + SP + "auto const grid_thread_extent = alpaka::getWorkDiv(acc)[0];\n\n"; + + op += SP + SP + SP + "for (std::size_t elem_idx = global_thread_idx; elem_idx < totalElements; elem_idx += grid_thread_extent) {\n\n"; + + for (size_t d = 0; d < Dy; ++d) { + op += SP + SP + SP + SP + "std::size_t const oy_" + std::to_string(d) + + " = (elem_idx / " + std::to_string(stridesY[d]) + "u) % " + + std::to_string(fShapeY[d]) + "u;\n"; + } + op += "\n"; + + op += SP + SP + SP + SP + "std::size_t const idx_base =\n"; + // batch dims: oy_0..oy_{b-1} * stridesIndices[0..b-1] + // outer idx dims: oy_b..oy_{b+(q-b-2)} * stridesIndices[b..q-2] + bool first = true; + for (size_t i = 0; i < q - 1; ++i) { + op += SP + SP + SP + SP + SP + + (first ? "" : "+ ") + + "oy_" + std::to_string(i) + " * " + std::to_string(stridesIndices[i]) + "u\n"; + first = false; + } + if (first) op += SP + SP + SP + SP + SP + "0u\n"; // q==1: scalar index tuple + op += SP + SP + SP + SP + SP + ";\n\n"; + + op += SP + SP + SP + SP + "std::size_t data_idx =\n"; + first = true; + for (size_t i = 0; i < b; ++i) { + op += SP + SP + SP + SP + SP + + (first ? "" : "+ ") + + "oy_" + std::to_string(i) + " * " + std::to_string(stridesData[i]) + "u\n"; + first = false; + } + if (first) op += SP + SP + SP + SP + SP + "0u\n"; + op += SP + SP + SP + SP + SP + ";\n\n"; + + op += SP + SP + SP + SP + "// Read " + std::to_string(last_idx_dim) + "-element index tuple\n"; + for (size_t k = 0; k < last_idx_dim; ++k) { + size_t idx_offset = k; + size_t data_axis = b + k; + op += SP + SP + SP + SP + "{\n"; + op += SP + SP + SP + SP + SP + + "int64_t idx_val = indices[idx_base + " + + std::to_string(idx_offset) + "u];\n"; + op += SP + SP + SP + SP + SP + + "if (idx_val < 0) idx_val += " + + std::to_string(fShapeData[data_axis]) + ";\n"; + op += SP + SP + SP + SP + SP + + "data_idx += static_cast(idx_val) * " + + std::to_string(stridesData[data_axis]) + "u;\n"; + op += SP + SP + SP + SP + "}\n"; + } + op += "\n"; + + size_t y_trailing_start = b + (q - b - 1); + for (size_t i = b + last_idx_dim; i < r; ++i) { + size_t oy_dim = y_trailing_start + (i - (b + last_idx_dim)); + op += SP + SP + SP + SP + + "data_idx += oy_" + std::to_string(oy_dim) + + " * " + std::to_string(stridesData[i]) + "u;\n"; + } + op += "\n"; + + op += SP + SP + SP + SP + "output[elem_idx] = data[data_idx];\n"; + op += SP + SP + SP + "}\n"; + op += SP + SP + "}\n"; + op += SP + "};\n"; + + return op; + } + + std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string opName) override { + opName = "op_" + opName; + std::string kname = "GatherNDKernel_" + opName; + return SP + kname + " gatherNDKernel_" + opName + ";\n"; + } + + std::string Generate_GPU_ALPAKA(std::string opName) override { + opName = "op_" + opName; + if (fShapeY.empty()) + throw std::runtime_error("TMVA SOFIE GatherND called to Generate without being initialized first"); + + std::size_t totalElements = ConvertShapeToLength(fShapeY); + std::string kname = "gatherNDKernel_" + opName; + + std::stringstream out; + out << "\n//------ GATHERND_GPU_ALPAKA\n"; + out << SP << "auto const elementsPerThread_" << opName << " = Vec::all(static_cast(1));\n"; + out << SP << "auto const elementsPerGrid_" << opName << " = Vec::all(Idx{" << totalElements << "});\n"; + out << SP << "alpaka::KernelCfg const kernelCfg_" << opName + << " = {elementsPerGrid_" << opName << ", elementsPerThread_" << opName << "};\n"; + out << SP << "auto const workDiv_" << opName << " = alpaka::getValidWorkDiv(kernelCfg_" << opName + << ", devAcc, " << kname + << ", alpaka::getPtrNative(deviceBuf_" << fNData << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNIndices << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" + << ", static_cast(" << totalElements << "));\n"; + out << SP << "alpaka::exec(queue, workDiv_" << opName + << ", " << kname + << ", alpaka::getPtrNative(deviceBuf_" << fNData << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNIndices << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" + << ", static_cast(" << totalElements << "));\n"; + + return out.str(); + } +}; + +} // SOFIE + +#endif // SOFIE_ROPERATOR_GATHERND diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Reshape.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Reshape.hxx index 0a21709..2fa72b5 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Reshape.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Reshape.hxx @@ -353,32 +353,35 @@ public: return out.str(); } - std::string Generate_GPU_ALPAKA(std::string opName) override { - if (fIsOutputConstant) return ""; //no op for constant tensors - - opName = "op_" + opName; - - // output of reshape is same as input - auto length = ConvertDimShapeToLength(fShapeOutput); - if (length != ConvertDimShapeToLength(fShapeInput)) { - throw std::runtime_error("TMVA SOFIE Reshape Op : wrong output shape - is " + - ConvertDimShapeToString(fShapeOutput) + " and input is " + - ConvertDimShapeToString(fShapeInput)); - } - std::stringstream out; - opName += "_Reshape"; - if (fOpMode == Flatten) - opName += "_Flatten"; - else if (fOpMode == Squeeze) - opName += "_Squeeze"; - else if (fOpMode == Unsqueeze) - opName += "_Unsquueze"; - - - out << SP << "///-------" << opName << " operator\n" << std::endl; - out << SP << "alpaka::memcpy(queue, deviceBuf_" << fNOutput << ", deviceBuf_" << fNData << ");\n"; - return out.str(); - } +std::string Generate_GPU_ALPAKA(std::string opName) override { + if (fIsOutputConstant) return ""; + + opName = "op_" + opName; + + std::string opType = "Reshape"; + if (fOpMode == Flatten) opType = "Flatten"; + else if (fOpMode == Squeeze) opType = "Squeeze"; + else if (fOpMode == Unsqueeze) opType = "Unsqueeze"; + + std::stringstream out; + out << SP << "///------- " << opType << " operator " << opName << "\n"; + + if (fDynamicShape) { + auto lengthOut = ConvertDimShapeToLength(fShapeOutput); + auto lengthIn = ConvertDimShapeToLength(fShapeInput); + if (lengthOut != lengthIn) { + out << SP << "if (" << lengthOut << " != " << lengthIn << ")\n"; + out << SP << SP << "throw std::runtime_error(\"TMVA SOFIE " << opType + << " Op : output length is different from input length\");\n"; + } + } + + out << SP << "alpaka::memcpy(queue, deviceBuf_" << fNOutput + << ", deviceBuf_" << fNData << ");\n"; + out << SP << "alpaka::wait(queue);\n"; + + return out.str(); +} }; diff --git a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx index ae7f745..1303251 100644 --- a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx +++ b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx @@ -44,6 +44,14 @@ #include "ExpandDiffSize_FromONNX_GPU_ALPAKA.hxx" #include "input_models/references/ExpandDiffSize.ref.hxx" +#include "GatherND_Ex1_FromONNX_GPU_ALPAKA.hxx" +#include "GatherND_Ex2_FromONNX_GPU_ALPAKA.hxx" +#include "GatherND_Ex3_FromONNX_GPU_ALPAKA.hxx" +#include "GatherND_Ex4_FromONNX_GPU_ALPAKA.hxx" +#include "GatherND_Ex5_FromONNX_GPU_ALPAKA.hxx" +#include "GatherND_NegativeIndices_FromONNX_GPU_ALPAKA.hxx" +#include "GatherND_Batch_FromONNX_GPU_ALPAKA.hxx" + #include #include #include @@ -861,3 +869,228 @@ TEST_F(SofieAlpakaTest, ExpandDiffSize) for (size_t i = 0; i < outputSize; ++i) EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE); } + +TEST_F(SofieAlpakaTest, GatherND_Ex1) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector data = {0.f, 1.f, 2.f, 3.f}; + std::vector expected = {0.f, 3.f}; + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{data.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < data.size(); ++i) input_ptr[i] = data[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{data.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{expected.size()})); + + { + SOFIE_GatherND_Ex1::Session session("GatherND_Ex1_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res = reinterpret_cast(alpaka::getPtrNative(result_h)); + ASSERT_EQ(expected.size(), 2u); + for (size_t i = 0; i < expected.size(); ++i) + EXPECT_LE(std::abs(res[i] - expected[i]), TOLERANCE) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, GatherND_Ex2) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector data = {0.f, 1.f, 2.f, 3.f}; + std::vector expected = {2.f, 3.f, 0.f, 1.f}; + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{data.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < data.size(); ++i) input_ptr[i] = data[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{data.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{expected.size()})); + + { + SOFIE_GatherND_Ex2::Session session("GatherND_Ex2_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res = reinterpret_cast(alpaka::getPtrNative(result_h)); + ASSERT_EQ(expected.size(), 4u); + for (size_t i = 0; i < expected.size(); ++i) + EXPECT_LE(std::abs(res[i] - expected[i]), TOLERANCE) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, GatherND_Ex3) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector data = {0.f,1.f,2.f,3.f,4.f,5.f,6.f,7.f}; + std::vector expected = {2.f, 3.f, 4.f, 5.f}; + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{data.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < data.size(); ++i) input_ptr[i] = data[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{data.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{expected.size()})); + + { + SOFIE_GatherND_Ex3::Session session("GatherND_Ex3_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res = reinterpret_cast(alpaka::getPtrNative(result_h)); + ASSERT_EQ(expected.size(), 4u); + for (size_t i = 0; i < expected.size(); ++i) + EXPECT_LE(std::abs(res[i] - expected[i]), TOLERANCE) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, GatherND_Ex4) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector data = {0.f,1.f,2.f,3.f,4.f,5.f,6.f,7.f}; + std::vector expected = {2.f, 3.f, 4.f, 5.f}; + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{data.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < data.size(); ++i) input_ptr[i] = data[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{data.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{expected.size()})); + + { + SOFIE_GatherND_Ex4::Session session("GatherND_Ex4_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res = reinterpret_cast(alpaka::getPtrNative(result_h)); + ASSERT_EQ(expected.size(), 4u); + for (size_t i = 0; i < expected.size(); ++i) + EXPECT_LE(std::abs(res[i] - expected[i]), TOLERANCE) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, GatherND_Ex5) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector data = {0.f,1.f,2.f,3.f,4.f,5.f,6.f,7.f}; + std::vector expected = {2.f, 3.f, 4.f, 5.f}; + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{data.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < data.size(); ++i) input_ptr[i] = data[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{data.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{expected.size()})); + + { + SOFIE_GatherND_Ex5::Session session("GatherND_Ex5_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res = reinterpret_cast(alpaka::getPtrNative(result_h)); + ASSERT_EQ(expected.size(), 4u); + for (size_t i = 0; i < expected.size(); ++i) + EXPECT_LE(std::abs(res[i] - expected[i]), TOLERANCE) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, GatherND_NegativeIndices) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector data = {0.f,1.f,2.f,3.f,4.f,5.f,6.f,7.f,8.f}; + std::vector expected = {6.f, 2.f, 4.f}; + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{data.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < data.size(); ++i) input_ptr[i] = data[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{data.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{expected.size()})); + + { + SOFIE_GatherND_NegativeIndices::Session session("GatherND_NegativeIndices_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res = reinterpret_cast(alpaka::getPtrNative(result_h)); + ASSERT_EQ(expected.size(), 3u); + for (size_t i = 0; i < expected.size(); ++i) + EXPECT_LE(std::abs(res[i] - expected[i]), TOLERANCE) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, GatherND_Batch) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector data(24); + std::iota(data.begin(), data.end(), 0.f); + std::vector expected = {4.f,5.f,6.f,7.f, 20.f,21.f,22.f,23.f}; + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{data.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < data.size(); ++i) input_ptr[i] = data[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{data.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{expected.size()})); + + { + SOFIE_GatherND_Batch::Session session("GatherND_Batch_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res = reinterpret_cast(alpaka::getPtrNative(result_h)); + ASSERT_EQ(expected.size(), 8u); + for (size_t i = 0; i < expected.size(); ++i) + EXPECT_LE(std::abs(res[i] - expected[i]), TOLERANCE) << "i=" << i; +} diff --git a/src/SOFIE_core/test/input_models/GatherND_Batch.onnx b/src/SOFIE_core/test/input_models/GatherND_Batch.onnx new file mode 100644 index 0000000000000000000000000000000000000000..4d146c6fd1bb47486baedf47e72793433a250a78 GIT binary patch literal 186 zcmd;Jw_4B0Wx~ahl30?+#h#g$l9`-ZEX0;yT2fG2qQv2zSdx)iMplF>5cVB%mBV0VI<;-$a<1&nAk6ONqVW?%u!NkEN!0rSy!%Kky4KPCaFgi*MVyzG#7Y_%c5C<2~ b01!?R;ey&P#Kpw{lwkphI`q-h`2 literal 0 HcmV?d00001 diff --git a/src/SOFIE_core/test/input_models/GatherND_Ex2.onnx b/src/SOFIE_core/test/input_models/GatherND_Ex2.onnx new file mode 100644 index 0000000000000000000000000000000000000000..4cd511ca4796f87117c9760051de8b9f170f803d GIT binary patch literal 164 zcmd;Jx0=bwWx~ahl30?+#h#g$l9`-ZEX0;yT2fG2qQv2zSdx)iNqVWg$V!NkER!0rSy!%KjX0To1vL97wtNqVXS4y!NkEN!0rSy!%Kky3K-F7n0S;F#9ARCE&&ckAs#NE b0Wh2-!3A}L5FZy0P?iHGNqVWMTs!NkGH!6d-$1T)1;fdL8_(P)@>loZ5bAt5dS4n`p! aE+!5p7*3Mmg1Q1tmJy`FiG_NqVXCFb!NkER!0rSy!%KjX0To0^L97uH;u7Fs6yo7x;$VW| YBnd94okDzEJV03vn2Zw(7lQyV0QeCf`v3p{ literal 0 HcmV?d00001 diff --git a/src/SOFIE_core/test/input_models/GatherND_NegativeIndices.onnx b/src/SOFIE_core/test/input_models/GatherND_NegativeIndices.onnx new file mode 100644 index 0000000000000000000000000000000000000000..5fa05aa6caea1206209894ef2c61365e86564702 GIT binary patch literal 204 zcmd;Jw>rqkWx~ahl30?+#h#g$l9`-ZEX0;yT2fG2qQv2zSdx)iOSnx0saS(fSv(XQpl!OX!V!0rSy&&%LH6fmFxnD{>^gApnp qB?fVl5FZy02cr-N7ZV3F2q%efLER?A#l-=XVF8Icv2Za6@B#oD{XGf* literal 0 HcmV?d00001 diff --git a/src/SOFIE_parsers/CMakeLists.txt b/src/SOFIE_parsers/CMakeLists.txt index d77d1e6..0e7e03d 100644 --- a/src/SOFIE_parsers/CMakeLists.txt +++ b/src/SOFIE_parsers/CMakeLists.txt @@ -61,6 +61,7 @@ set(sources_cxx src/ParseLayerNormalization.cxx src/ParseExpand.cxx src/ParseGather.cxx + src/ParseGatherND.cxx src/ParseElu.cxx src/ParseFuseConvAdd.cxx src/ParseFuseConvTransposeAdd.cxx diff --git a/src/SOFIE_parsers/src/ParseGatherND.cxx b/src/SOFIE_parsers/src/ParseGatherND.cxx new file mode 100644 index 0000000..57beb01 --- /dev/null +++ b/src/SOFIE_parsers/src/ParseGatherND.cxx @@ -0,0 +1,49 @@ +#include "SOFIE/RModelParser_ONNX.hxx" +#include "SOFIE/ROperator_GatherND.hxx" +#include "onnx_proto3.pb.h" +#include + + +namespace SOFIE { + +ParserFuncSignature ParseGatherND = [](RModelParser_ONNX &parser, const onnx::NodeProto &nodeproto) { + ETensorType input_type = ETensorType::UNDEFINED; + auto input_name = nodeproto.input(0); + if (parser.IsRegisteredTensorType(input_name)) { + input_type = parser.GetTensorType(input_name); + } else { + throw std::runtime_error("TMVA::SOFIE ONNX Parser GatherND op has input tensor " + input_name + + " but its type is not yet registered"); + } + + auto indices_name = nodeproto.input(1); + if (parser.IsRegisteredTensorType(indices_name)) { + ETensorType indices_type = parser.GetTensorType(indices_name); + if (indices_type != ETensorType::INT64) { + throw std::runtime_error("TMVA::SOFIE ONNX Parser GatherND op indices tensor must be INT64, got " + + indices_name); + } + } + + int64_t batch_dims = 0; + for (int i = 0; i < nodeproto.attribute_size(); ++i) { + const auto& attr = nodeproto.attribute(i); + if (attr.name() == "batch_dims") { + batch_dims = attr.i(); + break; + } + } + + std::string output_name = nodeproto.output(0); + + std::unique_ptr op( + new ROperator_GatherND(batch_dims, input_name, indices_name, output_name)); + + if (!parser.IsRegisteredTensorType(output_name)) { + parser.RegisterTensorType(output_name, input_type); + } + + return op; +}; + +} // namespace SOFIE diff --git a/src/SOFIE_parsers/src/RModelParser_ONNX.cxx b/src/SOFIE_parsers/src/RModelParser_ONNX.cxx index 68662ae..27c2c1a 100644 --- a/src/SOFIE_parsers/src/RModelParser_ONNX.cxx +++ b/src/SOFIE_parsers/src/RModelParser_ONNX.cxx @@ -73,6 +73,7 @@ extern ParserFuncSignature ParseShape; extern ParserFuncSignature ParseMatMul; extern ParserFuncSignature ParseLayerNormalization; extern ParserFuncSignature ParseGather; +extern ParserFuncSignature ParseGatherND; extern ParserFuncSignature ParseErf; extern ParserFuncSignature ParseElu; extern ParserFuncSignature ParseEyeLike; @@ -217,6 +218,7 @@ RModelParser_ONNX::RModelParser_ONNX() noexcept : fOperatorsMapImpl(std::make_un RegisterOperator("LayerNormalization", ParseLayerNormalization); RegisterOperator("Expand", ParseExpand); RegisterOperator("Gather", ParseGather); + RegisterOperator("GatherND", ParseGatherND); RegisterOperator("Erf", ParseErf); RegisterOperator("Elu", ParseElu); RegisterOperator("EyeLike", ParseEyeLike); From 19cdc110eb57ffd1266979acc3ff4611f3d0ef85 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Thu, 19 Mar 2026 18:48:45 +0100 Subject: [PATCH 31/43] fix: tensor management for expand and cast operators --- settings-dev.cmake | 7 - src/SOFIE_core/inc/SOFIE/RModel.hxx | 2 + src/SOFIE_core/inc/SOFIE/RModel_Base.hxx | 3 + .../inc/SOFIE/ROperator_BasicBinary.hxx | 9 +- src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx | 87 +++++- src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx | 203 +++++++------ src/SOFIE_core/inc/SOFIE/ROperator_Expand.hxx | 234 +++++++++++---- src/SOFIE_core/inc/SOFIE/ROperator_Gather.hxx | 210 ++++++++----- .../inc/SOFIE/ROperator_GatherND.hxx | 2 +- src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx | 279 +++++++++--------- .../SOFIE/ROperator_LayerNormalization.hxx | 248 ++++++++-------- .../inc/SOFIE/ROperator_LeakyRelu.hxx | 4 +- src/SOFIE_core/inc/SOFIE/ROperator_Range.hxx | 2 +- src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx | 33 ++- .../inc/SOFIE/ROperator_Reshape.hxx | 46 ++- .../inc/SOFIE/ROperator_ScatterElements.hxx | 2 +- .../inc/SOFIE/ROperator_Sigmoid.hxx | 1 + src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx | 1 + src/SOFIE_core/inc/SOFIE/ROperator_Tile.hxx | 10 +- .../inc/SOFIE/ROperator_Transpose.hxx | 34 ++- src/SOFIE_core/inc/SOFIE/ROperator_Where.hxx | 29 +- src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx | 114 ++++--- src/SOFIE_core/src/RModel.cxx | 16 +- src/SOFIE_core/src/RModel_ALPAKA.cxx | 9 +- src/SOFIE_core/src/SOFIE_common.cxx | 165 +++++++++-- .../test/input_models/GNN_model.onnx | Bin 0 -> 591051 bytes src/SOFIE_parsers/src/ParseTile.cxx | 1 + src/SOFIE_parsers/src/RModelParser_ONNX.cxx | 8 + 28 files changed, 1117 insertions(+), 642 deletions(-) delete mode 100644 settings-dev.cmake create mode 100644 src/SOFIE_core/test/input_models/GNN_model.onnx diff --git a/settings-dev.cmake b/settings-dev.cmake deleted file mode 100644 index 6a8496f..0000000 --- a/settings-dev.cmake +++ /dev/null @@ -1,7 +0,0 @@ -set (CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "" FORCE) -set (CMAKE_INSTALL_PREFIX ../install CACHE STRING "" FORCE) -set (CMAKE_INSTALL_BINDIR bin CACHE STRING "" FORCE) -set (CMAKE_INSTALL_INCLUDEDIR include CACHE STRING "" FORCE) -set (CMAKE_INSTALL_LIBDIR lib CACHE STRING "" FORCE) -set (testing ON CACHE BOOL "" FORCE) -set (mathmore ON CACHE BOOL "" FORCE) diff --git a/src/SOFIE_core/inc/SOFIE/RModel.hxx b/src/SOFIE_core/inc/SOFIE/RModel.hxx index ed28b48..50fc231 100644 --- a/src/SOFIE_core/inc/SOFIE/RModel.hxx +++ b/src/SOFIE_core/inc/SOFIE/RModel.hxx @@ -93,6 +93,7 @@ public: size_t length = ConvertShapeToLength(shape); std::shared_ptr data_ptr(malloc(length * sizeof(T)), free); std::memcpy(data_ptr.get(), (void*) data, length * sizeof(T)); + std::cout<<"Length of constant tensor "<(T()), shape, data_ptr); } // for boolean can be more convenient passing an std::vector @@ -148,6 +149,7 @@ public: void UpdateInitializedTensor(std::string tensor_name, ETensorType type, std::vector shape, std::shared_ptr data); std::shared_ptr GetInitializedTensorData(std::string tensor_name); + void RemoveInitializedTensor(std::string tensor_name); template std::vector GetTensorData(const std::string & name); diff --git a/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx b/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx index deac58b..601e3a9 100644 --- a/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx +++ b/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx @@ -76,6 +76,7 @@ protected: std::stringstream strs; std::string type = ConvertTypeToString(t.second.type()); size_t length = ConvertShapeToLength(t.second.shape()); + std::cout<<"Constant tensor name: "< 100) ? false : true; @@ -86,7 +87,9 @@ protected: // for non stack allocation check if data are the same if (!allocateOnStack && length > 1) { size_t idx = 1; + std::cout<<"insider allocate on stack and length\n"; do { + std::cout<<"Printing idx: "< " << ConvertShapeToString(fDimShapeY) << std::endl; + std::cout << BinaryOperatorTrait::Name() << " : " << ConvertDimShapeToString(fDimShapeA) << " , " + << ConvertDimShapeToString(fDimShapeB) << " --> " << ConvertDimShapeToString(fDimShapeY) << std::endl; } } } @@ -392,7 +392,7 @@ public: std::string op; op = "\n//------ "+opName+"_"+BinaryOperatorTrait::Name()+"_KERNEL_ALPAKA\n"; - op += SP + "struct Binary"+BinaryOperatorTrait::Name()+"Kernel {\n"; + op += SP + "struct Binary"+opName+BinaryOperatorTrait::Name()+"Kernel {\n"; op += SP + SP + "template\n"; op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, T const * A, T const * B, T * C) const {\n"; op += SP + SP + SP + "auto idx = alpaka::getIdx(acc)[0];\n"; @@ -470,7 +470,7 @@ public: if (fIsOutputConstant) return ""; - return SP + "Binary"+BinaryOperatorTrait::Name()+"Kernel binary" + OpName + "Kernel;\n"; + return SP + "Binary"+OpName+BinaryOperatorTrait::Name()+"Kernel binary" + OpName + "Kernel;\n"; } std::string Generate_GPU_ALPAKA(std::string OpName) { @@ -491,6 +491,7 @@ public: out << SP << "alpaka::exec(queue, workDiv_" << fNY << ", binary" << OpName << "Kernel, alpaka::getPtrNative(deviceBuf_" << fNA << "), alpaka::getPtrNative(deviceBuf_" << fNB << "), alpaka::getPtrNative(deviceBuf_" << fNY << "));\n"; + out << SP <<"alpaka::wait(queue);\n"; return out.str(); } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx index 8c04302..2cb797b 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx @@ -10,6 +10,14 @@ namespace SOFIE{ +template +std::vector convertToInt64(const In* src, size_t n) { + std::vector dst(n); + std::transform(src, src + n, dst.begin(), + [](In v) { return static_cast(v); }); + return dst; +} + class ROperator_Cast final : public ROperator { @@ -48,11 +56,67 @@ public: fShape = model.GetTensorShape(fNX); // shoud we add a check if the same type auto inputType = model.GetTensorType(fNX); + const size_t n = ConvertShapeToLength(fShape); if (model.IsInitializedTensor(fNX)) { fIsOutputConstant = true; auto inputData = model.GetInitializedTensorData(fNX); if (ConvertStringToType(fAttrType) == ETensorType::INT64) { - model.AddConstantTensor(fNY, fShape, static_cast(inputData.get())); + auto inputTypeStr = ConvertTypeToString(inputType); + if (inputTypeStr == "int32_t") { + auto* src = static_cast(inputData.get()); + auto converted = convertToInt64(src, n); + model.AddConstantTensor(fNY, fShape, converted); + } + else if (inputTypeStr == "float") { + auto* src = static_cast(inputData.get()); + auto converted = convertToInt64(src, n); + model.AddConstantTensor(fNY, fShape, converted); + } + else if (inputTypeStr == "double") { + auto* src = static_cast(inputData.get()); + auto converted = convertToInt64(src, n); + model.AddConstantTensor(fNY, fShape, converted); + } + else if (inputTypeStr == "int8_t") { + auto* src = static_cast(inputData.get()); + auto converted = convertToInt64(src, n); + model.AddConstantTensor(fNY, fShape, converted); + } + else if (inputTypeStr == "int16_t") { + auto* src = static_cast(inputData.get()); + auto converted = convertToInt64(src, n); + model.AddConstantTensor(fNY, fShape, converted); + } + else if (inputTypeStr == "uint8_t") { + auto* src = static_cast(inputData.get()); + auto converted = convertToInt64(src, n); + model.AddConstantTensor(fNY, fShape, converted); + } + else if (inputTypeStr == "uint16_t") { + auto* src = static_cast(inputData.get()); + auto converted = convertToInt64(src, n); + model.AddConstantTensor(fNY, fShape, converted); + } + else if (inputTypeStr == "uint32_t") { + auto* src = static_cast(inputData.get()); + auto converted = convertToInt64(src, n); + model.AddConstantTensor(fNY, fShape, converted); + } + else if (inputTypeStr == "uint64_t") { + auto* src = static_cast(inputData.get()); + auto converted = convertToInt64(src, n); + model.AddConstantTensor(fNY, fShape, converted); + } + else if (inputTypeStr == "int64_t") { + model.AddConstantTensor( + fNY, fShape, + static_cast(inputData.get()) + ); + } + else { + throw std::runtime_error("Unsupported input type for INT64 conversion"); + } + model.SetNotWritableInitializedTensor(fNX); } else @@ -91,10 +155,11 @@ public: return out.str(); } - std::string Generate_GPU_Kernel_ALPAKA() { + std::string Generate_GPU_Kernel_ALPAKA(std::string opName) override { + if (fIsOutputConstant) return ""; std::string op; op = "\n//------ CAST_KERNEL_ALPAKA\n"; - op += SP + "struct CastKernel{\n"; + op += SP + "struct CastKernel"+opName+"{\n"; op += SP + SP + "template\n"; op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, SrcT const * src, DstT * dst, std::size_t numElements) const {\n"; op += SP + SP + SP + "for (auto i : alpaka::uniformElements(acc, numElements)) {\n"; @@ -104,20 +169,28 @@ public: return op; } - std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string /*opName*/) { - return SP + "CastKernel castKernel;\n"; + std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string opName) override { + if (fIsOutputConstant) return ""; + return SP + "CastKernel"+opName+" castKernel;\n"; } std::string Generate_GPU_ALPAKA(std::string OpName) override { + if (fIsOutputConstant) return ""; OpName = "op_" + OpName; if (fShape.empty()) { throw std::runtime_error("TMVA SOFIE Operator Cast called to Generate without being initialized first"); } + std::stringstream out; auto length = ConvertShapeToLength(fShape); out << "\n//------ CAST_GPU_ALPAKA\n"; - out << SP << "alpaka::WorkDivMembers workDiv_"<::all("<< (length+255)/256 <<"), alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; - out << SP << "alpaka::exec(queue, workDiv_" << fNX << ", castKernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), static_cast(" << length << ")); \n"; + out << SP << "auto const elementsPerThread_"<(1));\n"; + out << SP << "auto const elementsPerGrid_"< const kernelCfg_" << fNY << " = {elementsPerGrid_" << fNY << ", elementsPerThread_" << fNY << "};\n"; + out << SP << "auto const workDiv_" << fNY << " = alpaka::getValidWorkDiv(kernelCfg_" << fNY << ", devAcc, castKernel, alpaka::getPtrNative(deviceBuf_" << fNX + << "), alpaka::getPtrNative(deviceBuf_" << fNY << "));\n"; + out << SP << "alpaka::exec(queue, workDiv_" << fNY << ", castKernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), static_cast(" << length << ")); \n"; + out << SP <<"alpaka::wait(queue);\n"; return out.str(); } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx index a44a807..10d6d0d 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx @@ -23,6 +23,7 @@ std::string fOutput; std::vectorfOutputShape; std::vector> fInputShapes; + ETensorType fInputType; public: @@ -113,7 +114,7 @@ for (size_t i = 0; i < inputs.size(); i++) { if (i > 0 && inputs[i].size() != inputs[i - 1].size()) throw std::runtime_error("TMVA SOFIE Concat Op - input tensors have different shapes " + fInputs[i] + " : " + - ConvertShapeToString(inputs[i]) + " and " + fInputs[i-1] + " : " + ConvertShapeToString(inputs[i - 1])); + ConvertDimShapeToString(inputs[i]) + " and " + fInputs[i-1] + " : " + ConvertDimShapeToString(inputs[i - 1])); for (size_t iaxis = 0; iaxis < inputs[i].size(); iaxis++) { if ((int)iaxis == fAxis) { // support both integer and params shape for the concatenation axis @@ -121,7 +122,7 @@ concat_dim = inputs[i][iaxis]; else if (inputs[i][iaxis].isParam || concat_dim.isParam) { concat_dim = - Dim{ concat_dim.GetVal() + std::string("+ ") + inputs[i][iaxis].GetVal(), + Dim{ concat_dim.GetVal() + std::string(" + ") + inputs[i][iaxis].GetVal(), static_cast(-1)}; } else { concat_dim = Dim { concat_dim.dim + inputs[i][iaxis].dim }; @@ -132,8 +133,8 @@ } else if ((!inputs[i][iaxis].isParam && !ret[iaxis].isParam) && (inputs[i][iaxis].dim != ret[iaxis].dim)) { throw std::runtime_error("TMVA SOFIE Concat Op - input tensors have wrong shapes " + - ConvertShapeToString(inputs[i]) + " and " + - ConvertShapeToString(inputs[i - 1])); + ConvertDimShapeToString(inputs[i]) + " and " + + ConvertDimShapeToString(inputs[i - 1])); } else if (!inputs[i][iaxis].isParam && ret[iaxis].isParam){ // if shape is not parametric use it @@ -154,7 +155,7 @@ } // output shape for concatenated axis - ret[fAxis] = Dim{concat_dim}; + ret[fAxis] = concat_dim; } // case of stacking (not supported yet) @@ -180,6 +181,7 @@ // check if concat has constant inputs , axis 0(concat contigous memory and type is integer) bool isOutputShape = false; + fInputType = model.GetTensorType(fInputs[0]); if (model.GetTensorType(fInputs[0]) == ETensorType::INT64 && fAxis == 0) { fIsOutputConstant = true; isOutputShape = true; @@ -203,7 +205,7 @@ size_t inputLength = ConvertShapeToLength(inputShape); std::copy(inputData, inputData + inputLength, outputData.begin() + offset ); offset += inputLength; - // data do not need to be written as a weight + // the data of the input tensor don't need to be written in the generated code and data file model.SetNotWritableInitializedTensor(input); } model.AddConstantTensor(fOutput, outputShape, outputData.data()); @@ -219,15 +221,18 @@ std::vector inputData; auto inputShape = model.GetTensorShape(input); // shape is not dynamic size_t inputLength = ConvertShapeToLength(inputShape); // shape can be a scalar - if (model.IsShapeTensor(input)) + if (model.IsShapeTensor(input)) { inputData = model.GetShapeTensorValues(input); - else if (model.IsConstantTensor(input)) { + } else if (model.IsInitializedTensor(input)) { inputData.resize(inputLength); auto intData = static_cast(model.GetInitializedTensorData(input).get()); for (size_t i = 0; i < inputData.size(); i++) inputData[i] = Dim{ static_cast(intData[i])}; } - std::cout << "concatenating input data " << inputLength << " " << inputData[0] << std::endl; + else { + // this should not happen + throw std::runtime_error("TMVA SOFIE Concat Operator- invalid input type for shape output type"); + } std::copy(inputData.begin(), inputData.end(), outputData.begin() + offset ); offset += inputLength; } @@ -235,7 +240,7 @@ model.AddShapeTensor(fOutput,outputData, false); // cannot be a scalar if (model.Verbose()) { std::cout << "output of Concat is a shape tensor " << ConvertShapeToString(outputShape) << " : " - << ConvertShapeToString(outputData) << " (shape)" << std::endl; + << ConvertDimShapeToString(outputData) << " (shape)" << std::endl; } fIsOutputConstant = true; } @@ -249,13 +254,15 @@ } std::string Generate(std::string opName) override { - if (fIsOutputConstant) return ""; opName = "op_" + opName; + std::stringstream out; + out<<"\n//--------- Concat " << opName << " --> " << fOutput << " " << ConvertDimShapeToString(fOutputShape) << "\n"; + + if (fIsOutputConstant) return out.str(); + if(fOutputShape.empty()){ throw std::runtime_error("TMVA SOFIE Concat called to Generate without being initialized first"); } - std::stringstream out; - out<<"\n//--------- Concat " << opName << " --> " << ConvertShapeToString(fOutputShape) << "\n"; // special case when memory is contiguous bool hasShapeOnes = true; for(int i = 0; i0) - out << SP << SP << SP << "idxOut += " << fInputShapes[j-1][fAxis].GetVal() << ";\n"; + out << SP << SP << SP << "idxOut += " << inStrides[j-1][fAxis-1].GetVal() << ";\n"; out << SP << SP << SP << "int idxIn" << j <<" = "; for (int k = 0; k < fAxis; k++) { if (k > 0) out << " + "; out << inStrides[j][k].GetVal() << "*i" << k; } out << ";\n"; - out << SP << SP << SP << "for (size_t iC = 0; iC < " << fInputShapes[j][fAxis].GetVal() << "; ++iC) {\n"; + out << SP << SP << SP << "for (size_t iC = 0; iC < " << inStrides[j][fAxis-1].GetVal() << "; ++iC) {\n"; out << SP << SP << SP << SP << "tensor_" << fOutput << "[idxOut+iC] = tensor_" << fInputs[j] << "[idxIn" << j << "+iC];\n"; out << SP << SP << SP << "}\n"; // concatenate the axis values @@ -317,90 +324,90 @@ return out.str(); } - std::string Generate_GPU_Kernel_ALPAKA(std::string opName) override { - opName = "op_" + opName; - if (fOutputShape.empty()) { - throw std::runtime_error("TMVA SOFIE Operator Concat called to Generate without being initialized first"); - } + std::string Generate_GPU_Kernel_ALPAKA(std::string opName) override { + if (fIsOutputConstant) return ""; + opName = "op_" + opName; + if (fOutputShape.empty()) + throw std::runtime_error("TMVA SOFIE Operator Concat called to Generate without being initialized first"); - const std::size_t D = fOutputShape.size(); - const std::size_t Nin = fInputs.size(); - - auto outStrides = UTILITY::ComputeStrideFromShape(fOutputShape); - - std::vector prefix(Nin); - prefix[0] = 0; - for (std::size_t k = 1; k < Nin; ++k) - prefix[k] = prefix[k - 1] + std::stoul(fInputShapes[k - 1][fAxis].GetVal()); - - std::vector> inStrides(Nin); - for (std::size_t k = 0; k < Nin; ++k) - inStrides[k] = UTILITY::ComputeStrideFromShape(fInputShapes[k]); - - std::string op; - op = "\n//------ CONCAT_KERNEL_ALPAKA\n"; - op += SP + "struct ConcatKernel_" + opName + " {\n"; - op += SP + SP + "template\n"; - op += SP + SP + "ALPAKA_FN_ACC void operator()(\n"; - op += SP + SP + SP + "TAcc const& acc,\n"; - op += SP + SP + SP + "std::array inputs,\n"; - op += SP + SP + SP + "T* output,\n"; - op += SP + SP + SP + "std::size_t const totalElements) const {\n\n"; - - op += SP + SP + SP + "auto const global_thread_idx = alpaka::getIdx(acc)[0];\n"; - op += SP + SP + SP + "if (global_thread_idx >= totalElements) return;\n"; - op += SP + SP + SP + "auto const grid_thread_extent = alpaka::getWorkDiv(acc)[0];\n\n"; - - op += SP + SP + SP + "std::size_t remaining;\n"; - op += SP + SP + SP + "for (std::size_t elem_idx = global_thread_idx; elem_idx < totalElements; elem_idx += grid_thread_extent) {\n\n"; - - op += SP + SP + SP + SP + "remaining = elem_idx;\n"; - for (std::size_t d = 0; d < D; ++d) { - std::string stride_val = outStrides[d].GetVal(); - op += SP + SP + SP + SP + "std::size_t const out_" + std::to_string(d) - + " = remaining * " + 1/std::stoul(stride_val) + ";\n"; - op += SP + SP + SP + SP + "remaining -= out_" + std::to_string(d) - + " * " + stride_val + ";\n"; - } - op += "\n"; + const std::size_t D = fOutputShape.size(); + const std::size_t Nin = fInputs.size(); + + auto outStrides = UTILITY::ComputeStrideFromShape(fOutputShape); + + std::vector prefix(Nin); + prefix[0] = 0; + for (std::size_t k = 1; k < Nin; ++k) + prefix[k] = prefix[k - 1] + std::stoul(fInputShapes[k - 1][fAxis].GetVal()); + + std::vector> inStrides(Nin); + for (std::size_t k = 0; k < Nin; ++k) + inStrides[k] = UTILITY::ComputeStrideFromShape(fInputShapes[k]); + + std::string op; + op = "\n//------ CONCAT_KERNEL_ALPAKA\n"; + op += SP + "struct ConcatKernel_" + opName + " {\n"; + op += SP + SP + "template\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(\n"; + op += SP + SP + SP + "TAcc const& acc,\n"; + op += SP + SP + SP + "std::array inputs,\n"; + op += SP + SP + SP + "T* output,\n"; + op += SP + SP + SP + "std::size_t const totalElements) const {\n\n"; + + op += SP + SP + SP + "auto const global_thread_idx = alpaka::getIdx(acc)[0];\n"; + op += SP + SP + SP + "if (global_thread_idx >= totalElements) return;\n"; + op += SP + SP + SP + "auto const grid_thread_extent = alpaka::getWorkDiv(acc)[0];\n\n"; + + op += SP + SP + SP + "std::size_t remaining;\n"; + op += SP + SP + SP + "for (std::size_t elem_idx = global_thread_idx; elem_idx < totalElements; elem_idx += grid_thread_extent) {\n\n"; + + op += SP + SP + SP + SP + "remaining = elem_idx;\n"; + for (std::size_t d = 0; d < D; ++d) { + std::string stride_val = outStrides[d].GetVal(); + op += SP + SP + SP + SP + "std::size_t const out_" + std::to_string(d) + + " = remaining / " + stride_val + "u;\n"; + op += SP + SP + SP + SP + "remaining -= out_" + std::to_string(d) + + " * " + stride_val + "u;\n"; + } + op += "\n"; - op += SP + SP + SP + SP + "std::size_t chosen = 0;\n"; - for (std::size_t k = 0; k < Nin; ++k) { - std::size_t end_k = prefix[k] + std::stoul(fInputShapes[k][fAxis].GetVal()); - op += SP + SP + SP + SP + "chosen += static_cast(" - + std::to_string(end_k) + " <= out_" + std::to_string(fAxis) + ");\n"; - } - op += "\n"; + op += SP + SP + SP + SP + "std::size_t chosen = 0;\n"; + for (std::size_t k = 0; k < Nin; ++k) { + std::size_t end_k = prefix[k] + std::stoul(fInputShapes[k][fAxis].GetVal()); + op += SP + SP + SP + SP + "chosen += static_cast(" + + std::to_string(end_k) + "u <= out_" + std::to_string(fAxis) + ");\n"; + } + op += "\n"; - op += SP + SP + SP + SP + "std::size_t const output_idx =\n"; - for (std::size_t d = 0; d < D; ++d) { - op += SP + SP + SP + SP + SP + "out_" + std::to_string(d) - + " * " + outStrides[d].GetVal(); - op += (d + 1 < D) ? " +\n" : ";\n\n"; - } + op += SP + SP + SP + SP + "std::size_t const output_idx =\n"; + for (std::size_t d = 0; d < D; ++d) { + op += SP + SP + SP + SP + SP + "out_" + std::to_string(d) + + " * " + outStrides[d].GetVal() + "u"; + op += (d + 1 < D) ? " +\n" : ";\n\n"; + } - op += SP + SP + SP + SP + "std::size_t const input_idx =\n"; - for (std::size_t k = 0; k < Nin; ++k) { - op += SP + SP + SP + SP + SP + "(chosen == " + std::to_string(k) + ") * (\n"; - for (std::size_t d = 0; d < D; ++d) { - std::string coord = (d == fAxis) - ? ("(out_" + std::to_string(d) + " - " + std::to_string(prefix[k]) + ")") - : ("out_" + std::to_string(d)); - op += SP + SP + SP + SP + SP + SP + coord - + " * " + inStrides[k][d].GetVal(); - op += (d + 1 < D) ? " +\n" : "\n"; - } - op += SP + SP + SP + SP + SP + ")"; - op += (k + 1 < Nin) ? " +\n" : ";\n\n"; + op += SP + SP + SP + SP + "std::size_t const input_idx =\n"; + for (std::size_t k = 0; k < Nin; ++k) { + op += SP + SP + SP + SP + SP + "(chosen == " + std::to_string(k) + "u) * (\n"; + for (std::size_t d = 0; d < D; ++d) { + std::string coord = (d == static_cast(fAxis)) + ? ("(out_" + std::to_string(d) + " - " + std::to_string(prefix[k]) + "u)") + : ("out_" + std::to_string(d)); + op += SP + SP + SP + SP + SP + SP + coord + + " * " + inStrides[k][d].GetVal() + "u"; + op += (d + 1 < D) ? " +\n" : "\n"; } + op += SP + SP + SP + SP + SP + ")"; + op += (k + 1 < Nin) ? " +\n" : ";\n\n"; + } - op += SP + SP + SP + SP + "output[output_idx] = inputs[chosen][input_idx];\n"; - op += SP + SP + SP + "}\n"; - op += SP + SP + "}\n"; - op += SP + "};\n"; + op += SP + SP + SP + SP + "output[output_idx] = inputs[chosen][input_idx];\n"; + op += SP + SP + SP + "}\n"; + op += SP + SP + "}\n"; + op += SP + "};\n"; - return op; - } + return op; + } std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string opName) override { opName = "op_" + opName; @@ -413,9 +420,16 @@ throw std::runtime_error("TMVA SOFIE Operator Concat called to Generate without being initialized first"); } std::stringstream out; - auto length = ConvertDynamicShapeToLength(fOutputShape); + auto length = ConvertDimShapeToLength(fOutputShape); out << "\n//------ CONCAT_GPU_ALPAKA\n"; - out << SP << "std::array input_ptrs_" << OpName << " = {"; + switch (fInputType){ + case ETensorType::FLOAT: + out << SP << "std::array input_ptrs_" << OpName << " = {"; break; + case ETensorType::INT64: + out << SP << "std::array input_ptrs_" << OpName << " = {"; break; + default: + throw std::runtime_error("Data type for Concat operator is not yet supported."); + } for(size_t i=0; i0) out << ", "; out << "alpaka::getPtrNative(deviceBuf_" << fInputs[i] << ")"; @@ -428,6 +442,7 @@ out << SP << "auto const workDiv_" << OpName << " = alpaka::getValidWorkDiv(kernelCfg_" << OpName << ", devAcc, concatKernel_" << OpName << ", input_ptrs_" << OpName << ", alpaka::getPtrNative(deviceBuf_" << fOutput << "), static_cast(" << length << "));\n"; out << SP << "alpaka::exec(queue, workDiv_" << OpName << ", concatKernel_" << OpName << ", input_ptrs_" << OpName << ", alpaka::getPtrNative(deviceBuf_" << fOutput << "), static_cast(" << length << "));\n"; + out << SP <<"alpaka::wait(queue);\n"; return out.str(); } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Expand.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Expand.hxx index dbb75c1..786556d 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Expand.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Expand.hxx @@ -14,9 +14,10 @@ template class ROperator_Expand final : public ROperator{ private: - std::vector fShapeX; + std::vector fShapeX; std::vector fShape; - std::vector fShapeY; + std::vector fShapeY; + std::vector fShapeDim; std::string fNX; std::string fNShape; @@ -24,6 +25,8 @@ private: std::string fType; bool fInitialized = false; + bool fInitializedShape = false; + bool fInitBroadcast = false; public: ROperator_Expand(){} @@ -33,112 +36,175 @@ public: fOutputTensorNames = { fNY }; } - // type of output given input - std::vector TypeInference(std::vector input) override { - return input; - } - - std::vector> ShapeInference(std::vector> input) override { - return input; - } void Initialize(RModel& model) override { // input must be a graph input, or already initialized intermediate tensor if (!model.CheckIfTensorAlreadyExist(fNX)) { throw std::runtime_error("TMVA SOFIE Expand Op Input Tensor " + fNX + " is not found in model"); } - fShapeX = model.GetTensorShape(fNX); - if (!model.IsInitializedTensor(fNShape)) { - throw std::runtime_error("TMVA::SOFIE - Tensor " + fNShape + " is not initialized."); - } - int64_t *shapeData = + fShapeX = model.GetDimTensorShape(fNX); + if (model.IsInitializedTensor(fNShape)) { + fInitializedShape = true; + int64_t *shapeData = static_cast(model.GetInitializedTensorData(fNShape).get()); - fShape = model.GetTensorShape(fNShape); - if (fShape.size() != 1) { - throw std::runtime_error("TMVA::SOFIE - Expand operator shape must be a 1d tensor."); + fShape = model.GetTensorShape(fNShape); + if (fShape.size() != 1) { + throw std::runtime_error("TMVA::SOFIE - Expand operator shape must be a 1d tensor."); + } + size_t N = fShape[0]; + // what do we do if shapeData contains negative values? + for (size_t i = 0; i < N; i++) { + if ( shapeData[i] < 0) + throw std::runtime_error("TMVA::SOFIE - Expand: invalid shape value " + std::to_string(shapeData[i])); + } + std::vector shape(shapeData, shapeData + N); + fShapeDim = ConvertShapeToDim(shape); + } else if (model.IsShapeTensor(fNShape)) { + // case input shape is a shape tensor + fShapeDim = model.GetShapeTensorValues(fNShape); + fInitializedShape = true; + } else { + // assume shape of input shape is known (size is 1) + auto shapeOfInputShape = model.GetTensorShape(fNShape); + fShapeDim.resize(shapeOfInputShape[0]); + for (size_t i = 0; i < fShapeDim.size(); i++) { + fShapeDim[i] = Dim{std::string("v_") + fNShape + "_" + std::to_string(i)}; + model.AddShapeParam(fShapeDim[i].param); + } } - size_t N = fShape[0]; - std::vector shape(shapeData, shapeData + N); // Y is the common shape of fShapeX and shape - fShapeY = SOFIE::UTILITY::UnidirectionalBroadcastShape( - fShapeX, shape); - fInitialized = model.IsInitializedTensor(fNX); - // Broadcast X to the common shape fShapeY - bool broadcast = !UTILITY::AreSameShape(fShapeX, fShapeY); - if (model.IsInitializedTensor(fNX)) { + auto ret = SOFIE::UTILITY::MultidirectionalBroadcastShape(fShapeX, fShapeDim); + fShapeY = ret.second; + fInitialized = model.IsInitializedTensor(fNX) && fInitializedShape; + std::vector shapeX; + std::vector shapeY; + // case shape tensor and input shape are known + if (!model.IsDynamicTensor(fNX) && !model.IsDimInputTensor(fNX) && fInitializedShape) { + shapeX = ConvertShapeToInt(fShapeX); + shapeY = ConvertShapeToInt(fShapeY); + if (!UTILITY::AreSameShape(shapeX, shapeY)) + fInitBroadcast = true; + } + if (fInitialized) { + // cannot have Dim initialized tensors + assert(!shapeX.empty() && !shapeY.empty()); + // Broadcast X to the common shape shapeY // If X is an initialized tensor (constant) auto data = model.GetInitializedTensorData(fNX); - if (broadcast) { + if (fInitBroadcast) { std::shared_ptr broadcastedData( - UTILITY::UnidirectionalBroadcast(static_cast(data.get()), fShapeX, fShapeY), + UTILITY::UnidirectionalBroadcast(static_cast(data.get()), shapeX, shapeY), std::default_delete()); // Update the data and the shape of X - model.UpdateInitializedTensor(fNX, model.GetTensorType(fNX), fShapeY, broadcastedData); + model.UpdateInitializedTensor(fNX, model.GetTensorType(fNX), shapeY, broadcastedData); fShapeX = fShapeY; // need to set as a not writable tensor model.SetNotWritableInitializedTensor(fNX); data = broadcastedData; } - if (broadcast || model.IsConstantTensor(fNX)) { + if (fInitBroadcast || model.IsConstantTensor(fNX)) { fIsOutputConstant = true; // constant output in this case - model.AddConstantTensor(fNY, model.GetTensorType(fNX), fShapeY, data); + model.AddConstantTensor(fNY, model.GetTensorType(fNX), shapeY, data); fOutputTensorNames.pop_back(); } else { - model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShapeY); + model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), shapeY); } } else { - // case input is not initialized - model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShapeY); + // // case input is not initialized + // if (shapeX.empty() && shapeDim.empty()) { + + // } + // if (fInitializedShape) + model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShapeY); } fType = ConvertTypeToString(model.GetTensorType(fNX)); - if (model.Verbose()) - std::cout << "Expand - output is with shape " << ConvertShapeToString(fShapeY) << std::endl; + if (model.Verbose()) { + std::cout << "Expand - input " << fNX << " shape " << ConvertDimShapeToString(fShapeX) << " --> " << fNY << " shape " + << ConvertDimShapeToString(fShapeY) << (fIsOutputConstant ? ConvertValuesToString(model.GetTensorData(fNY)) + " (constant)" : "") << std::endl; + } + + if (fInitializedShape && model.IsInitializedTensor(fNShape)) { + // Shape values are fully consumed into fShapeY/fShapeDim at generation time — + // no device buffer needed for fNShape for Heterogeneous inference + model.SetNotWritableInitializedTensor(fNShape); + } } std::string GenerateInitCode() override { std::stringstream out; - if (!fIsOutputConstant && (fInitialized || fShapeX == fShapeY ) ) { - size_t length = ConvertShapeToLength(fShapeY); + if (!fIsOutputConstant && fInitialized && !fInitBroadcast) { + // shapeX and shapeY are the same in this case + auto length = ConvertDimShapeToLength(fShapeY); out << "// Copying initialized tensor " << fNX << " to " << fNY << "\n"; out << SP << "std::copy(tensor_" << fNX << ", " << "tensor_" << fNX << " + " << length << ", tensor_" << fNY << ");\n"; } return out.str(); } - std::string Generate(std::string OpName) override { + std::string Generate(std::string opName) override { if (fIsOutputConstant) return ""; - OpName = "op_" + OpName; + opName = "op_" + opName; if (fShapeY.empty()) { throw std::runtime_error("TMVA SOFIE Expand Op called to Generate without being initialized first"); } std::stringstream out; - out << SP << "\n//------ Expand Op" << "\n"; + out << SP << "\n//------ Expand " << opName << " --> " << ConvertDimShapeToString(fShapeY) << "\n"; + // need to declare shape parameters for non initialized shapes + if (!fInitializedShape) { + for (size_t i = 0; i < fShapeDim.size(); i++) { + out << SP << "size_t " << fShapeDim[i] << " = " << "tensor_" << fNShape << "[" << i << "];\n"; + } + } // No need to broadcast A if it's an initialized tensor or shapes are the same if (!fInitialized && fShapeX != fShapeY) { out << SP << "// Broadcasting uninitialized tensor " << fNX << "\n"; - out << SP << "SOFIE::UTILITY::UnidirectionalBroadcast<" << fType << ">(tensor_" << fNX << ", " << ConvertShapeToString(fShapeX) << ", " << ConvertShapeToString(fShapeY) - << ", std::span<"<(tensor_"<& shape) { + return std::all_of(shape.begin(), shape.end(), + [](const Dim& d){ return !d.isParam; }); + }; + if (!isStatic(fShapeX) || !isStatic(fShapeY)) return ""; + + // Check if broadcast is actually needed + bool needsBroadcast = (fShapeX.size() != fShapeY.size()); + if (!needsBroadcast) { + needsBroadcast = std::any_of(fShapeX.begin(), fShapeX.end(), + [&](const Dim& d) { + size_t i = &d - fShapeX.data(); + return fShapeX[i].dim != fShapeY[i].dim; + }); + } + if (!needsBroadcast) return ""; // same static shape — just a memcpy + const std::size_t D = fShapeY.size(); + // Left-pad fShapeX with dim=1 entries to match rank of fShapeY std::vector shapeX_padded(D, 1); size_t offset = D - fShapeX.size(); for (size_t i = 0; i < fShapeX.size(); ++i) - shapeX_padded[offset + i] = fShapeX[i]; + shapeX_padded[offset + i] = fShapeX[i].dim; - auto stridesX = UTILITY::ComputeStrideFromShape(shapeX_padded); - auto stridesY = UTILITY::ComputeStrideFromShape(fShapeY); + std::vector shapeY_int(D); + for (size_t i = 0; i < D; ++i) + shapeY_int[i] = fShapeY[i].dim; - std::size_t totalElements = ConvertShapeToLength(fShapeY); + auto stridesX = UTILITY::ComputeStrideFromShape(shapeX_padded); + auto stridesY = UTILITY::ComputeStrideFromShape(shapeY_int); + std::size_t totalElements = ConvertShapeToLength(shapeY_int); std::string kname = "ExpandKernel_" + opName; @@ -158,13 +224,16 @@ std::string Generate_GPU_Kernel_ALPAKA(std::string opName) override { op += SP + SP + SP + "for (std::size_t elem_idx = global_thread_idx; elem_idx < totalElements; elem_idx += grid_thread_extent) {\n\n"; + // Decompose output linear index using compile-time output strides for (std::size_t d = 0; d < D; ++d) { op += SP + SP + SP + SP + "std::size_t const out_" + std::to_string(d) + " = (elem_idx / " + std::to_string(stridesY[d]) + "u) % " - + std::to_string(fShapeY[d]) + "u;\n"; + + std::to_string(shapeY_int[d]) + "u;\n"; } op += "\n"; + // Input index: broadcast dims (shapeX_padded[d]==1) contribute 0 — + // compiler eliminates zero terms entirely, no runtime branch op += SP + SP + SP + SP + "std::size_t const input_idx =\n"; for (std::size_t d = 0; d < D; ++d) { if (shapeX_padded[d] == 1) { @@ -178,15 +247,31 @@ std::string Generate_GPU_Kernel_ALPAKA(std::string opName) override { } op += SP + SP + SP + SP + "output[elem_idx] = input[input_idx];\n"; - op += SP + SP + SP + "}\n"; - op += SP + SP + "}\n"; - op += SP + "};\n"; + op += SP + SP + SP + "}\n"; // end grid-stride loop + op += SP + SP + "}\n"; // end operator() + op += SP + "};\n"; // end struct return op; } std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string opName) override { if (fIsOutputConstant) return ""; + if (fInitialized) return ""; + + auto isStatic = [](const std::vector& shape) { + return std::all_of(shape.begin(), shape.end(), + [](const Dim& d){ return !d.isParam; }); + }; + if (!isStatic(fShapeX) || !isStatic(fShapeY)) return ""; + + // Check if broadcast is actually needed + bool needsBroadcast = (fShapeX.size() != fShapeY.size()); + if (!needsBroadcast) { + for (size_t i = 0; i < fShapeX.size(); ++i) + if (fShapeX[i].dim != fShapeY[i].dim) { needsBroadcast = true; break; } + } + if (!needsBroadcast) return ""; + opName = "op_" + opName; std::string kname = "ExpandKernel_" + opName; return SP + kname + " expandKernel_" + opName + ";\n"; @@ -198,14 +283,52 @@ std::string Generate_GPU_ALPAKA(std::string opName) override { if (fShapeY.empty()) throw std::runtime_error("TMVA SOFIE Operator Expand called to Generate without being initialized first"); - if (fInitialized || fShapeX == fShapeY) + std::stringstream out; + out << "\n//------ EXPAND_GPU_ALPAKA\n"; + + if (fInitialized && !fInitBroadcast) { + // GenerateInitCode already handled the copy — nothing to do at inference time return ""; + } - std::size_t totalElements = ConvertShapeToLength(fShapeY); + auto isStatic = [](const std::vector& shape) { + return std::all_of(shape.begin(), shape.end(), + [](const Dim& d){ return !d.isParam; }); + }; + bool staticShapes = isStatic(fShapeX) && isStatic(fShapeY); + + // Check if broadcast is actually needed for static shapes + bool needsBroadcast = !staticShapes; // dynamic always needs runtime broadcast + if (staticShapes) { + needsBroadcast = (fShapeX.size() != fShapeY.size()); + if (!needsBroadcast) { + for (size_t i = 0; i < fShapeX.size(); ++i) + if (fShapeX[i].dim != fShapeY[i].dim) { needsBroadcast = true; break; } + } + } + + if (!needsBroadcast) { + // Same static shape — device-to-device copy + out << SP << "alpaka::memcpy(queue, deviceBuf_" << fNY + << ", deviceBuf_" << fNX << ");\n"; + out << SP << "alpaka::wait(queue);\n"; + return out.str(); + } + + if (!staticShapes) { + // Dynamic shapes — not yet supported on GPU, throw a clear error + throw std::runtime_error( + "TMVA SOFIE Expand GPU: dynamic shapes are not yet supported for GPU inference. " + "Tensor " + fNX + " has a dynamic shape."); + } + + // Static broadcast — launch the expand kernel + std::vector shapeY_int(fShapeY.size()); + for (size_t i = 0; i < fShapeY.size(); ++i) + shapeY_int[i] = fShapeY[i].dim; + std::size_t totalElements = ConvertShapeToLength(shapeY_int); std::string kname = "expandKernel_" + opName; - std::stringstream out; - out << "\n//------ EXPAND_GPU_ALPAKA\n"; out << SP << "auto const elementsPerThread_" << opName << " = Vec::all(static_cast(1));\n"; out << SP << "auto const elementsPerGrid_" << opName << " = Vec::all(Idx{" << totalElements << "});\n"; out << SP << "alpaka::KernelCfg const kernelCfg_" << opName @@ -220,6 +343,7 @@ std::string Generate_GPU_ALPAKA(std::string opName) override { << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" << ", static_cast(" << totalElements << "));\n"; + out << SP <<"alpaka::wait(queue);\n"; return out.str(); } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Gather.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Gather.hxx index 06fe5a9..a56b012 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Gather.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Gather.hxx @@ -22,9 +22,9 @@ private: std::string fNIndices; std::string fNY; - std::vector fShapeX; - std::vector fShapeIndices; - std::vector fShapeY; + std::vector fShapeX; + std::vector fShapeIndices; + std::vector fShapeY; std::vector fIndices; // indices vector in case they are known at initialization @@ -51,8 +51,12 @@ public: if (!model.CheckIfTensorAlreadyExist(fNX)) { throw std::runtime_error("TMVA SOFIE Gather Op Input Tensor " + fNX + " is not found in model"); } - fShapeX = model.GetTensorShape(fNX); - fShapeIndices = model.GetTensorShape(fNIndices); + fShapeX = model.GetDimTensorShape(fNX); + if (model.Verbose()) + std::cout << "Gather - initial shape " << ConvertDimShapeToString(fShapeX) << " shape of indices " + << ConvertDimShapeToString(model.GetDimTensorShape(fNIndices)) << std::endl; + // fShapeIndices can be dynamic + fShapeIndices = model.GetDimTensorShape(fNIndices); size_t q = fShapeIndices.size(); // Axis in range [0, r) where r=rank(X) size_t r = fShapeX.size(); @@ -60,18 +64,20 @@ public: if (fAttrAxis < 0) { fAttrAxis = fAttrAxis + int64_t(r); } - // empty fShapeIndices is a scalar value for the indices - size_t indicesLength = ConvertShapeToLength(fShapeIndices); + // case indices tensor is initialized if (model.IsInitializedTensor(fNIndices)) { + // empty shape Indices is a scalar value for the indices + size_t indicesLength = ConvertShapeToLength(model.GetTensorShape(fNIndices)); int64_t* indicesData = static_cast(model.GetInitializedTensorData(fNIndices).get()); - //flag index tensor as not writable (not sure this is needed since index tensor might be used in generated code) - model.SetNotWritableInitializedTensor(fNIndices); // update indices data in case of negative dim values for (size_t i = 0; i < indicesLength; i++) { - if (indicesData[i] < 0) { - indicesData[i] += fShapeX[fAttrAxis]; + // move this at generation time? + if (!fShapeX[fAttrAxis].isParam) { + if (indicesData[i] < 0) { + indicesData[i] += fShapeX[fAttrAxis].dim; + } } } // Save in a vector gather Indices of size q @@ -79,65 +85,91 @@ public: } // Output shape if (model.Verbose()) - std::cout << "Gather: q and r " << q << " " << r << " shape indices " << ConvertShapeToString(fShapeIndices) << std::endl; + std::cout << "Gather: q and r " << q << " " << r << " shape indices " << ConvertDimShapeToString(fShapeIndices) << std::endl; if (fShapeY.empty()) { fShapeY.resize(q + r - 1); if (fAttrAxis > 0) { - // Copy shape of X[0, ..., axis) to Shape of Y[0, ..., axis) + // Copy shape of X[0, ..., axis-1) to Shape of Y[0, ..., axis-1) std::copy(fShapeX.begin(), fShapeX.begin() + fAttrAxis, fShapeY.begin()); } // Set shape of Y[axis, ..., axis + q) for (size_t i = 0; i < q; i++) { - fShapeY[fAttrAxis + i] = fShapeIndices[i]; + fShapeY[fAttrAxis + i] = Dim{ fShapeIndices[i]}; } - // Copy shape of X[axis + 1, ..., axis + r) to shape of Y[axis + q, ... q + r - 1) + // Copy shape of X[axis + 1, ..., r) to shape of Y[axis + q, ... q + r - 1) std::copy(fShapeX.begin() + fAttrAxis + 1, fShapeX.end(), fShapeY.begin() + fAttrAxis + q); } // case input is known (type is an integer) and input indices is a scalar (or vector of size 1) if (model.IsInitializedTensor(fNX) && q <= 1 && r == 1 && fIndices.size() > 0) { + auto shapeX = ConvertShapeToInt(fShapeX); // we assume model is not dynamic + auto shapeY = ConvertShapeToInt(fShapeY); if (model.GetTensorType(fNX) == ETensorType::INT64) { auto inputData = static_cast(model.GetInitializedTensorData(fNX).get()); // if q <=1 and r = 1 output length = 1 (it is a scalar) - std::vector outputData(ConvertShapeToLength(fShapeY)); + std::vector outputData(1); //ConvertShapeToLength(shapeY)); outputData[0] = inputData[fIndices[0]]; - model.AddConstantTensor(fNY, fShapeY, outputData.data()); + model.AddConstantTensor(fNY, shapeY, outputData.data()); if (model.Verbose()) - std::cout << "Gather: " << fNX << " " << ConvertShapeToString(fShapeX) << " -> " << fNY << " with shape " << ConvertShapeToString(fShapeY) + std::cout << "Gather: " << fNX << " " << ConvertShapeToString(shapeX) << " -> " << fNY << " with shape " << ConvertShapeToString(shapeY) << " and values " << ConvertValuesToString(outputData) << " (constant) " << std::endl; fIsOutputConstant = true; } } + // case input is a shape tensor (r is == 1 by definition) and indices are known + else if (model.IsShapeTensor(fNX) && q <=1 && fIndices.size() > 0) { + auto inputData = model.GetShapeTensorValues(fNX); + // if r == 1 and q<=1 then output length is 1 (is a scalar or tensor of size1) + std::vector outputData(1); + outputData[0] = inputData[fIndices[0]]; + if (outputData[0].isParam) { + fIsOutputConstant = true; + // shapeY can be scalar or vector of size1 + model.AddShapeTensor(fNY, outputData, fShapeY.size() == 0); + if (model.Verbose()) + std::cout << "Gather: " << fNX << " " << ConvertDimShapeToString(fShapeX) << " -> " << fNY << " with shape " << ConvertDimShapeToString(fShapeY) + << " and values " << ConvertDimShapeToString(outputData) << " (shape) " << std::endl; + } else { + int64_t value = static_cast(outputData[0].dim); + auto shapeY = ConvertShapeToInt(fShapeY); + model.AddConstantTensor(fNY, shapeY, &value); + fIsOutputConstant = true; + if (model.Verbose()) + std::cout << "Gather: " << fNX << " " << ConvertDimShapeToString(fShapeX) << " -> " << fNY << " with shape " << ConvertDimShapeToString(fShapeY) + << " and values {" << value << "} (constant) " << std::endl; + } + } if (!fIsOutputConstant) { // Add output tensor model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShapeY); fType = ConvertTypeToString(model.GetTensorType(fNX)); if (model.Verbose()) - std::cout << "Gather: " << fNX << " " << ConvertShapeToString(fShapeX) << " -> " << fNY << " with shape " << ConvertShapeToString(fShapeY) - << std::endl; + std::cout << "Gather: input " << fNX << " " << ConvertDimShapeToString(fShapeX) << " indices " << fNIndices << ConvertDimShapeToString(fShapeIndices) + << " -> " << fNY << " with shape " << ConvertDimShapeToString(fShapeY) << std::endl; } } - std::string Generate(std::string OpName) override { + std::string Generate(std::string opName) override { + opName = "op_" + opName; + std::stringstream out; + out << "//--------- Gather " << opName << " --> " << fNY << " " << ConvertDimShapeToString(fShapeY) << "\n"; if (fIsOutputConstant) { // no code to generate here for constant output. Tensor output is defined in Session constructor - return "//---------------------------------------\n"; + out << "//--------------------(constant)----------\n"; + return out.str(); } - OpName = "op_" + OpName; - std::stringstream out; - out << "//--------- Gather operator \n"; // The shape of the output is q + r - 1 size_t r = fShapeX.size(); // Indices of shape q size_t q = fShapeIndices.size(); // Strides - std::vector stridesX = UTILITY::ComputeStrideFromShape(fShapeX); - std::vector stridesY = UTILITY::ComputeStrideFromShape(fShapeY); - std::vector stridesIndices = UTILITY::ComputeStrideFromShape(fShapeIndices); + auto stridesX = UTILITY::ComputeStrideFromShape(fShapeX); + auto stridesY = UTILITY::ComputeStrideFromShape(fShapeY); + auto stridesIndices = UTILITY::ComputeStrideFromShape(fShapeIndices); // case fIndices is not known we need to correct for negative axis indices at run-time if (fIndices.empty()) { - size_t indicesLength = ConvertShapeToLength(fShapeIndices); + auto indicesLength = ConvertDimShapeToLength(fShapeIndices); out << SP << "// correct in case of negative gather indices\n"; out << SP << "for (size_t i = 0; i < " << indicesLength << "; i++){\n"; out << SP << SP << "if (tensor_" << fNIndices << "[i] < 0)\n"; @@ -145,69 +177,101 @@ public: out << SP << "}\n"; } - // Fill the output Y[j_0, j_1, ..., j_{axis - 1}, i_0, i_1, ..., i_{q - 1}, j_{axis + 1}, ..., j_{r - 1}] // [0 ... axis) [axis ... axis + q) [axis + q ... q + r - 1) // iterate in [0 ... axis) [0 ... q) [axis ... r - 1) // for j_0, j_1, ..., j_{axis-1} + for (size_t j = 0; j < size_t(fAttrAxis); j++) { std::string index = "j_" + std::to_string(j); - out << SP << "for (size_t " << index << " = 0; " << index << " < " << fShapeY[j] << "; " << index << "++) {\n"; + for (size_t k = 0; k <= j; k++) out << SP; + out << "for (size_t " << index << " = 0; " << index << " < " << fShapeY[j] << "; " << index << "++) {\n"; } // for i_0, i_1, ..., i_{q - 1} - if (q == 0) - out << SP << SP << "{\n"; // add a scope for local variables for (size_t i = 0; i < q; i++) { std::string index = "i_" + std::to_string(i); - out << SP << SP << "for (size_t " << index << " = " << 0 << "; " << index << " < " << fShapeIndices[i] << "; " << index << "++) {\n"; + for (size_t k = 0; k <= i + fAttrAxis; k++) out << SP; + out << "for (size_t " << index << " = " << 0 << "; " << index << " < " << fShapeIndices[i] << "; " << index << "++) {\n"; } // for j_axis, j_{axis + 1}, ..., j_{r - 1} for (size_t j = fAttrAxis; j + 1 < r; j++) { - std::string index = "j_" + std::to_string(j); - out << SP << SP << SP << "for (size_t " << index << " = 0; " << index << " < " << fShapeY[q + j] << "; " << index << "++) {\n"; + std::string index = "j_" + std::to_string(q+j); // annotate index using output axis + for (size_t k = 0; k <= q + j; k++) out << SP; + out << "for (size_t " << index << " = 0; " << index << " < " << fShapeY[q + j] << "; " << index << "++) {\n"; } - out << SP << SP << SP << "size_t y_index = 0;\n"; + // add a scope for local variables in case above loop are not done + if (fAttrAxis == 0 && q == 0 && r <= 1) + out << SP << "{ // scalar case \n"; + + // output index + for (size_t k = 0; k < q + r; k++) out << SP; + out << "size_t y_index = "; for (size_t j = 0; j < size_t(fAttrAxis); j++) { - out << SP << SP << SP << "y_index += j_" + std::to_string(j) + " * " << stridesY[j] << ";\n"; + if (j > 0) out << " + "; + out << "j_" << j; + if (stridesY[j].dim != 1) out << " * " << stridesY[j]; } for (size_t i = 0; i < q; i++) { - out << SP << SP << SP << "y_index += i_" + std::to_string(i) + " * " << stridesY[fAttrAxis + i] << ";\n"; + if (fAttrAxis + i > 0) out << " + "; + out << "i_" << i; + if (stridesY[fAttrAxis + i].dim != 1) out << " * " << stridesY[fAttrAxis + i]; } for (size_t j = fAttrAxis; j + 1 < r; j++) { - out << SP << SP << SP << "y_index += j_" + std::to_string(j) + " * " << stridesY[q + j] << ";\n"; + if (j + q > 0) out << " + "; + out << "j_" << q+j; + if (stridesY[q+j].dim != 1) out << " * " << stridesY[q+j]; } - // Indices - out << SP << SP << SP << "size_t i_index = 0;\n"; + // empty case + if (fAttrAxis == 0 && q == 0 && r <= 1) + out << "0"; + out << ";\n"; + + // input Indices + for (size_t k = 0; k < q + r; k++) out << SP; + out << "size_t i_index = "; for (size_t i = 0; i < q; i++) { - out << SP << SP << SP << "i_index += i_" + std::to_string(i) + " * " << stridesIndices[i] << ";\n"; + if (i > 0) out << " + "; + out << "i_" << i; + if (stridesIndices[i].dim != 1) out << " * " << stridesIndices[i]; } + // empty case + if (q == 0) + out << "0"; + out << ";\n"; + // K - out << SP << SP << SP << "size_t k = static_cast(" << "tensor_" << fNIndices << "[i_index]" << ");\n"; + for (size_t k = 0; k < q + r; k++) out << SP; + out << "size_t k = static_cast(" << "tensor_" << fNIndices << "[i_index]" << ");\n"; // Input - out << SP << SP << SP << "size_t x_index = k * " << stridesX[fAttrAxis] << ";\n"; + for (size_t k = 0; k < q + r; k++) out << SP; + out << "size_t x_index = k"; + if (stridesX[fAttrAxis].dim != 1) out << " * " << stridesX[fAttrAxis]; for (size_t j = 0; j < size_t(fAttrAxis); j++) { - out << SP << SP << SP << "x_index += j_" + std::to_string(j) + " * " << stridesX[j] << ";\n"; + out << " + "; + out << " j_" << j; + if (stridesX[j].dim != 1) out << " * " << stridesX[j]; } - for (size_t j = fAttrAxis + 1; j < r; j++) { - out << SP << SP << SP << "x_index += j_" + std::to_string(j - 1) + " * " << stridesX[j] << ";\n"; + // for input corresponding stride is axis+1,.... r + // loop is on j from fAttrAxis, so consider stridesX[j+1] + for (size_t j = fAttrAxis; j+1 < r; j++) { + out << " + "; + out << " j_" << q+j; + if (stridesX[j+1].dim != 1) out << " * " << stridesX[j+1]; } - out << SP << SP << SP << "tensor_" << fNY << "[y_index] = tensor_" << fNX << "[x_index];\n"; + out << ";\n"; + for (size_t k = 0; k < q + r; k++) out << SP; + out << "tensor_" << fNY << "[y_index] = tensor_" << fNX << "[x_index];\n"; // end loops j_k, j_{k + 1}, ..., j_{r - 2} - for (size_t j = fAttrAxis; j + 1 < r; j++) { - out << SP << SP << SP << "}\n"; - } - // end loops i_0, i_1, ..., i_{q - 1} - if (q == 0) - out << SP << SP << "}\n"; // end of scope for q = 0 - for (size_t i = 0; i < q; i++) { - out << SP << SP << "}\n"; - } - // end loops j_0, j_1, ..., j_{axis - 1} - for (size_t j = 0; j < size_t(fAttrAxis); j++) { - out << SP << "}\n"; + for (size_t j = q+r-1; j > 0; j--) { + for (size_t k = 0; k = static_cast(" + std::to_string(fShapeX[fAttrAxis]) + ")) " - + "k = static_cast(" + std::to_string(fShapeX[fAttrAxis]) + ") - 1;\n\n"; + op += SP + SP + SP + SP + "if (k >= static_cast(" + fShapeX[fAttrAxis].GetVal() + ")) " + + "k = static_cast(" + fShapeX[fAttrAxis].GetVal() + ") - 1;\n\n"; // x_index = k * stridesX[axis] // + sum over j in [0, axis): out_j * stridesX[j] // + sum over j in [axis+1, r): out_{j-1+q} * stridesX[j] // (the dims after axis in Y are shifted by q-1 relative to X) op += SP + SP + SP + SP + "std::size_t const input_idx =\n"; - op += SP + SP + SP + SP + SP + "static_cast(k) * " + std::to_string(stridesX[fAttrAxis]) + "u"; + op += SP + SP + SP + SP + SP + "static_cast(k) * " + stridesX[fAttrAxis].GetVal() + "u"; for (std::size_t j = 0; j < static_cast(fAttrAxis); ++j) { op += " +\n" + SP + SP + SP + SP + SP - + "out_" + std::to_string(j) + " * " + std::to_string(stridesX[j]) + "u"; + + "out_" + std::to_string(j) + " * " + stridesX[j].GetVal() + "u"; } for (std::size_t j = fAttrAxis + 1; j < r; ++j) { // in Y, the coord for X's dim j lives at output dim q + j - 1 op += " +\n" + SP + SP + SP + SP + SP - + "out_" + std::to_string(q + j - 1) + " * " + std::to_string(stridesX[j]) + "u"; + + "out_" + std::to_string(q + j - 1) + " * " + stridesX[j].GetVal() + "u"; } op += ";\n\n"; @@ -314,7 +375,7 @@ std::string Generate_GPU_ALPAKA(std::string opName) override { if (fShapeY.empty()) throw std::runtime_error("TMVA SOFIE Gather Op called to Generate without being initialized first"); - std::size_t totalElements = ConvertShapeToLength(fShapeY); + auto totalElements = ConvertDimShapeToLength(fShapeY); std::string kname = "gatherKernel_" + opName; std::stringstream out; @@ -335,6 +396,7 @@ std::string Generate_GPU_ALPAKA(std::string opName) override { << ", alpaka::getPtrNative(deviceBuf_" << fNIndices << ")" << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" << ", static_cast(" << totalElements << "));\n"; + out << SP <<"alpaka::wait(queue);\n"; return out.str(); } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_GatherND.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_GatherND.hxx index e147e30..3fa45fa 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_GatherND.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_GatherND.hxx @@ -294,7 +294,7 @@ public: << ", alpaka::getPtrNative(deviceBuf_" << fNIndices << ")" << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" << ", static_cast(" << totalElements << "));\n"; - + out << SP <<"alpaka::wait(queue);\n"; return out.str(); } }; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx index 978685b..47efe01 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx @@ -23,6 +23,7 @@ namespace SOFIE{ private: bool fIsDynamic = false; + bool fBroadcastBias = false; float fAttrAlpha = 1.0; float fAttrBeta = 1.0; @@ -32,7 +33,6 @@ namespace SOFIE{ std::string fNA; std::string fNB; std::string fNC = ""; - std::string fNC2; // bias tensor name after broadcasting std::string fNY; std::string fType; EActivationType fActivation; @@ -40,6 +40,7 @@ namespace SOFIE{ std::vector fShapeB; std::vector fShapeC; std::vector fShapeY; + RModel * fModel = nullptr; public: @@ -48,7 +49,6 @@ namespace SOFIE{ fAttrAlpha(alpha), fAttrBeta(beta), fAttrTransA(transA), fAttrTransB(transB), fNA(UTILITY::Clean_name(nameA)), fNB(UTILITY::Clean_name(nameB)), fNY(UTILITY::Clean_name(nameY)) { - fKind = OperatorKind::GEMM; fActivation = activation; fType = "float"; @@ -66,7 +66,7 @@ namespace SOFIE{ fActivation = activation; fType = "float"; - fInputTensorNames = { fNA, fNB, fNC }; + fInputTensorNames = {fNA, fNB, fNC}; fOutputTensorNames = { fNY }; } @@ -76,7 +76,7 @@ namespace SOFIE{ } template - std::vector> DoShapeInference(const std::vector> & input){ + std::vector DoShapeInference(const std::vector> & input){ if (input.size() > 3) throw std::runtime_error("TMVA SOFIE Gemm Op Shape Inference only need 2 or 3 input tensor"); // accept tensor with input dimensions > 2 // example: A = (d1,d2,...,N1,N2) B = (d1,d2,...,N2,N3) --> Y = (d1,d2,..,N1,N3) @@ -86,11 +86,10 @@ namespace SOFIE{ } } - std::vector> ret; // when there are 3 inputs shape of Y is the one of C if (input.size() == 3){ - ret.push_back(input[2]); //shape of C is shape of Y - return ret; + //shape of C is shape of Y + return input[2]; } // ioffset cannot be less than 2 int ioffset = input[0].size()-2; // in case of tensors with dim > 2 @@ -109,6 +108,7 @@ namespace SOFIE{ if (input[0].size() > 2 && input[1].size() == input[0].size()) { // in case of dim > 2 first dimensions are equal to the input ones not // equal to 1 (e.g. (1,2,3) * (2,3,4) -> (2,2,4)) + // here could probably use the Broadcasting function UTILITY::MultidirectionalBroadcastShape for (size_t i = 0; i < input[0].size()-2; i++) { Dim valueA = input[0][i]; Dim valueB = input[1][i]; @@ -117,24 +117,41 @@ namespace SOFIE{ s_y.push_back(input[0][i]); else if (valueA.GetVal() == "1") s_y.push_back(input[1][i]); + else if (!valueA.isParam && !valueB.isParam) + throw std::runtime_error("TMVA SOFIE Gemm Op - invalid input shapes " + valueA.GetVal() + " and " + + valueB.GetVal()); + else if (valueA.isParam && valueB.isParam){ + // check which parameter is first in RModel list + auto & dimNames = fModel->GetDimShapeNames(); + auto p1 = std::find(dimNames.begin(), dimNames.end(), valueA.param); + auto p2 = std::find(dimNames.begin(), dimNames.end(), valueB.param); + if (p1 < p2) s_y.push_back(input[0][i]); + else s_y.push_back(input[1][i]); + } + else if (!valueA.isParam) + s_y.push_back(input[0][i]); + else if (!valueB.isParam) + s_y.push_back(input[1][i]); else throw std::runtime_error("TMVA SOFIE Gemm Op - invalid input shapes " + valueA.GetVal() + " and " + valueB.GetVal()); } - s_y.push_back(input[0][i]); + else + s_y.push_back(input[0][i]); } } s_y.push_back(s_a[0]); s_y.push_back(s_b[1]); - ret.push_back(s_y); - return ret; + return s_y; } std::vector> ShapeInference(std::vector> input) override { - return DoShapeInference(input); + std::vector> ret; + ret.push_back(DoShapeInference(input)); + return ret; } - std::vector> DynamicShapeInference(const std::vector> & input){ + std::vector DynamicShapeInference(const std::vector> & input){ return DoShapeInference(input); } @@ -142,6 +159,7 @@ namespace SOFIE{ void Initialize(RModel& model) override { //TODO: propagate A or B as specified by ONNX standard + fModel = &model; if ((model.CheckIfTensorAlreadyExist(fNA) == false) || (model.CheckIfTensorAlreadyExist(fNB) == false) ){ //input must be a graph input, or already initialized intermediate tensor throw std::runtime_error("TMVA SOFIE Gemm Op Input Tensor " + fNA + " or " + fNB + " is not found in model"); @@ -152,7 +170,7 @@ namespace SOFIE{ } } if (model.IsDynamicTensor(fNA) || model.IsDimInputTensor(fNA) ) { - fShapeA = model.GetDimTensorShape(fNA); + fShapeA = model.GetDynamicTensorShape(fNA); fIsDynamic = true; } else { auto shapeA_int = model.GetTensorShape(fNA); @@ -166,7 +184,7 @@ namespace SOFIE{ } if (model.IsDynamicTensor(fNB) || model.IsDimInputTensor(fNB)) { - fShapeB = model.GetDimTensorShape(fNB); + fShapeB = model.GetDynamicTensorShape(fNB); fIsDynamic = true; } else { @@ -190,14 +208,8 @@ namespace SOFIE{ } } - fShapeY = DynamicShapeInference({fShapeA, fShapeB})[0]; - std::vector shapeY; - if (!fIsDynamic) { - shapeY = ConvertShapeToInt(fShapeY); - if (shapeY.empty()) { - throw std::runtime_error("TMVA SOFIE Gemm Op " + fNY + " has invalid shape" + ConvertDimShapeToString(fShapeY)); - } - } + fShapeY = DynamicShapeInference({fShapeA, fShapeB}); + std::vector shapeY = ConvertShapeToInt(fShapeY); // bias is normally not dynamic (not support it for time being) if (fNC != ""){ @@ -206,38 +218,27 @@ namespace SOFIE{ throw std::runtime_error("TMVA SOFIE Gemm Op Input Tensor" + fNC + " is dynamic and is not supported"); } fShapeC = model.GetTensorShape(fNC); - fNC2 = fNC; - size_t lengthC = ConvertShapeToLength(fShapeC); - size_t lengthY = ConvertShapeToLength(shapeY); - // for dynamic outputs broadcasting is always done - bool broadcast_needed = lengthC != lengthY; + // for dynamic outputs broadcasting is always needed + bool broadcast_needed = false; + if (fIsDynamic && shapeY.empty()) + broadcast_needed = true; + else + // consider broadcasting also if same length + broadcast_needed = (fShapeC != shapeY); if (broadcast_needed) { - if (!model.UseSession()) { - // without session dynamic tensors not supported in Gemm - if (fIsDynamic) { - throw std::runtime_error("TMVA SOFIE Gemm Op: dynamic tensors not supported without a session"); - } - auto original_data = model.GetInitializedTensorData(fNC); - auto targetShape = UTILITY::UnidirectionalBroadcastShape(fShapeC, shapeY); - if (fType == "float") { - std::shared_ptr new_data_ptr(UTILITY::UnidirectionalBroadcast( - static_cast(original_data.get()), fShapeC, targetShape), - std::default_delete()); - - model.UpdateInitializedTensor(fNC, model.GetTensorType(fNC), shapeY, new_data_ptr); - fShapeC = shapeY; - } - } else { - // In case of session add broadcasting code in Session constructor and in GenerateInitCode - // we need to add a new intermediate tensor for broadcasted bias tensor - fNC2 = fNC + "bcast"; - if (!fIsDynamic) { - model.AddIntermediateTensor(fNC2, model.GetTensorType(fNC), shapeY); - } - else - model.AddDynamicTensor(fNC2,model.GetTensorType(fNC), fShapeY); + fBroadcastBias = true; + // check if broadcasting is compatible and note that prepend 1 to shapeC + auto shapeDimC = ConvertShapeToDim(fShapeC); + auto r = UTILITY::MultidirectionalBroadcastShape(fShapeY, shapeDimC); + // return flag must be equal to 1 since this is a unidirectional broadcast of C->Y + if (r.first > 1) { + throw std::runtime_error("TMVA SOFIE Gemm Op - bias tensor of shape " + ConvertShapeToString(fShapeC) + " cannot be uni-directional broadcasted to " + ConvertDimShapeToString(fShapeY)); + } + fShapeC = ConvertShapeToInt(shapeDimC); + if (fShapeC.empty()) { + throw std::runtime_error("TMVA SOFIE Gemm Op - Error in bias tensor " + ConvertDimShapeToString(shapeDimC) ); } } } @@ -256,9 +257,8 @@ namespace SOFIE{ shapeY.erase(shapeY.end()-1); } - if (!fIsDynamic){ + if (!fIsDynamic) model.AddIntermediateTensor(fNY, model.GetTensorType(fNA), shapeY); - } else model.AddDynamicTensor(fNY, model.GetTensorType(fNA), fShapeY); @@ -273,48 +273,6 @@ namespace SOFIE{ model.AddNeededStdLib("algorithm"); } - std::string GenerateInitCode() override { - std::stringstream out; - // generate initialization code for broadcasting of bias tensor - if (fShapeC.size() != fShapeY.size() && fNC != fNC2) { - // we broadcast here always C in Y output, so target shape is the one of Y - // no need to call UTILITY::UnidirectionalBroadcastShape. - // here in case of parametric shape we need to assume that the parameters will be defined in the initialization code. - auto targetShape = fShapeY; - // include a separate scope to avoid defining unique operator temp variables - out << "//--- broadcast bias tensor " << fNC << "for Gemm op\n"; - out << SP << "{\n"; - out << " float * data = SOFIE::UTILITY::UnidirectionalBroadcast(tensor_" - << fNC << "," << ConvertShapeToString(fShapeC) << ", " << ConvertDimShapeToString(fShapeY) << ");\n"; - auto length = SOFIE::ConvertDimShapeToLength(fShapeY); // output size - out << SP << SP << "std::copy(data, data + " << length << ", tensor_" << fNC2 << ");\n"; - out << SP << SP << "delete [] data;\n"; - out << SP << "}\n"; - } - return out.str(); - } - - std::string GenerateInitCode_GPU_ALPAKA() override { - std::stringstream out; - // generate initialization code for broadcasting of bias tensor - if (fShapeC.size() != fShapeY.size() && fNC != fNC2) { - // we broadcast here always C in Y output, so target shape is the one of Y - // no need to call UTILITY::UnidirectionalBroadcastShape. - // here in case of parametric shape we need to assume that the parameters will be defined in the initialization code. - auto targetShape = fShapeY; - // include a separate scope to avoid defining unique operator temp variables - auto length = SOFIE::ConvertDimShapeToLength(fShapeY); // output size - out << "//--- broadcast bias tensor " << fNC << "for Gemm op\n"; - out << SP << "{\n"; - out << " std::vector data(" << length << ");\n"; - out << " SOFIE::UTILITY::UnidirectionalBroadcast(tensor_" << fNC << ".data()," << ConvertShapeToString(fShapeC) << ", " << ConvertDimShapeToString(fShapeY) << ", " << "std::span(data));\n"; - out << SP << SP << "auto hostBuf_"<< fNC2 << " = alpaka::createView(hostAcc, data);\n"; - out << SP << SP << "alpaka::memcpy(queue, deviceBuf_"<< fNC2 << ", hostBuf_"<< fNC2 << ");\n"; - out << SP << "}\n"; - } - return out.str(); - } - std::string Generate(std::string opName) override { opName = "op_" + opName; @@ -322,9 +280,8 @@ namespace SOFIE{ throw std::runtime_error("TMVA SOFIE Gemm Op called to Generate without being initialized first"); } std::stringstream out; - out << "\n//--------- Gemm\n"; - out << SP << "char " << opName << "_transA = " << (fAttrTransA ? "\'t\'" : "\'n\'") << ";\n"; - out << SP << "char " << opName << "_transB = " << (fAttrTransB ? "\'t\'" : "\'n\'") << ";\n"; + out << "\n//--------- Gemm " << opName << " " << ConvertDimShapeToString(fShapeA) << " * " << ConvertDimShapeToString(fShapeB) + << " -> " << ConvertDimShapeToString(fShapeY) << "\n"; // need to consider case A and B have dim > 2 (for MatMul) int64_t dimA = fShapeA.size(); int64_t dimB = fShapeB.size(); @@ -335,26 +292,20 @@ namespace SOFIE{ auto m = (fAttrTransA ? fShapeA[dimA-1].GetVal() : fShapeA[dimA-2].GetVal()); auto n = (fAttrTransB ? fShapeB[dimB-2].GetVal() : fShapeB[dimB-1].GetVal()); auto k = (fAttrTransA ? fShapeA[dimA-2].GetVal() : fShapeA[dimA-1].GetVal()); + // size of A: if (transposeA) is m*k else k*m + // size of B n*k std::vector sY = {fShapeY[dimY-2], fShapeY[dimY-1]}; // extra dimensions in case of stacked MatMul - std::vector sA; + std::vector sExtraY; for (int64_t i = 0; i < dimY-2; i++) { - sA.push_back(fShapeY[i]); + sExtraY.push_back(fShapeY[i]); } - auto lengthGemm = ConvertDynamicShapeToLength(sY); // size of the Gemm operation - auto lengthExtra = ConvertDynamicShapeToLength(sA); // extra length in case input tensors are of dim>2 (MatMul) - - out << SP << "int " << opName << "_m = " << m << ";\n"; - out << SP << "int " << opName << "_n = " << n << ";\n"; - out << SP << "int " << opName << "_k = " << k << ";\n"; - out << SP << "float " << opName << "_alpha = " << std::setprecision(std::numeric_limits::max_digits10) << fAttrAlpha << ";\n"; - out << SP << "float " << opName << "_beta = " << std::setprecision(std::numeric_limits::max_digits10) << fAttrBeta << ";\n"; - out << SP << "int " << opName << "_lda = " << (fAttrTransA ? m : k) << ";\n"; - out << SP << "int " << opName << "_ldb = " << (fAttrTransB ? k : n) << ";\n"; + auto lengthGemm = ConvertDimShapeToLength(sY); // size of the Gemm operation + auto lengthExtra_Y = ConvertDimShapeToLength(sExtraY); // extra length in case input tensors are of dim>2 (MatMul) // case bias is present if (!fNC.empty()){ - if (fNC2 == fNC) { + if (!fBroadcastBias) { // add a check in case broadcasting was not needed or done outside of session // C should have smaller dimension of Y if (!fIsDynamic) { @@ -363,7 +314,7 @@ namespace SOFIE{ + ConvertShapeToString(fShapeC) + " output length " + lengthGemm); } else { // add a dynamic check (C should not be a dynamic tensor) - out << SP << "assert(" << lengthGemm << " != " << ConvertShapeToLength(fShapeC) << ");\n"; + out << SP << "assert(" << lengthGemm << " == " << ConvertShapeToLength(fShapeC) << ");\n"; } } } else { @@ -376,39 +327,99 @@ namespace SOFIE{ // include MatMul case where we stack the Gemm operations // exclude case where we have only 1's in the additional dims - bool doStackMul = dimY > 2 && ( fIsDynamic || std::stoi(lengthExtra) > 1); + bool doStackMul = dimY > 2 && ( fIsDynamic || std::stoi(lengthExtra_Y) > 1); + // compute input offset for stack multiplications + std::string lengthExtra_A; + std::string lengthExtra_B; + std::string increment_A; + std::string increment_B; + if (doStackMul) { - out << SP << "size_t " << opName << "_yoffset = 0;\n"; // needed if we stack the gemm operations - out << SP << "for (int i = 0; i < " << lengthExtra << "; i++){\n"; + std::vector sA(fShapeA.begin(), fShapeA.begin()+dimA-2); + std::vector sB(fShapeB.begin(), fShapeB.begin()+dimB-2); + std::vector mA = {fShapeA[dimA-2], fShapeA[dimA-1]}; + std::vector mB = {fShapeA[dimB-2], fShapeB[dimB-1]}; + lengthExtra_A = ConvertDimShapeToLength(sA); + lengthExtra_B = ConvertDimShapeToLength(sB); + // size of A performing matmul is m*k and n*k for B + increment_A = ConvertDimShapeToLength(mA); + increment_B = ConvertDimShapeToLength(mB); + } + bool extraA = (doStackMul && lengthExtra_A != "1"); + bool extraB = (doStackMul && lengthExtra_B != "1"); + if (doStackMul) { + out << SP << "size_t " << opName << "_y_offset = 0;\n"; // needed if we stack the gemm operations + if (extraA) + out << SP << "size_t " << opName << "_A_offset = 0;\n"; + if (extraB) + out << SP << "size_t " << opName << "_B_offset = 0;\n"; + out << SP << "for (size_t i = 0; i < " << lengthExtra_Y << "; i++){\n"; out << SP; } - // in the case of bias - if (!fNC.empty()){ - out << SP << "std::copy(" << "tensor_" << fNC2 << ", " << "tensor_" << fNC2 << " + " << lengthGemm << ", " - << "tensor_" << fNY; - if (doStackMul) out << " + " << opName << "_yoffset"; - out << ");\n"; - } + // do the bias broadcasting + if (fBroadcastBias) { + fAttrBeta = 1.; + out << SP << "for (size_t j = 0; j < " << sY[0] << "; j++) { \n"; + out << SP << SP << "size_t y_index = "; + if (doStackMul) // add offset in caseof stack multiplications (not sure if bias is present in these cases) + out << opName << "_y_offset + "; + if (sY[1].GetVal() != "1") + out << sY[1] << " * j;\n"; + else + out << "j;\n"; + + out << SP << SP << "for (size_t k = 0; k < " << sY[1] << "; k++) { \n"; + std::string bias_index; + if (fShapeC[0] == 1 && fShapeC[1] == sY[1].dim) + bias_index = "k"; + else if (fShapeC[1] == 1 && fShapeC[0] == sY[0].dim) + bias_index = "j"; + else if (fShapeC[0] == 1 && fShapeC[1] == 1) // scalar case + bias_index = "0"; + else { + throw std::runtime_error("TMVA SOFIE Gemm Op - invalid shape for bias tensor " + ConvertShapeToString(fShapeC)); + } + out << SP << SP << SP << "tensor_" << fNY << "[y_index + k] = " << "tensor_" << fNC << "[" << bias_index << "];\n"; + out << SP << SP << "}\n"; + out << SP << "}\n"; + } if (fType == "float"){ - out << SP << "BLAS::sgemm_(&" << opName << "_transB, &" << opName << "_transA, &" << opName - << "_n, &" << opName << "_m, &" << opName << "_k, &" << opName << "_alpha, " << "tensor_" << fNB - << ", &" << opName << "_ldb, " << "tensor_" << fNA << ", &" << opName << "_lda, &" << opName << "_beta, " + out << SP << "TMVA::Experimental::SOFIE::Gemm_Call(" << "tensor_" << fNY; - if (doStackMul) out << " + " << opName << "_yoffset"; - out << ", &" << opName << "_n);\n"; + if (doStackMul) out << " + " << opName << "_y_offset"; + out << ", " + << (fAttrTransB ? "true, " : "false, ") + << (fAttrTransA ? "true, " : "false, ") + << n << ", " << m << ", " << k << ", "; + out << std::setprecision(std::numeric_limits::max_digits10) << fAttrAlpha << ", tensor_" << fNB; + if (extraB) out << " + " << opName << "_B_offset"; + out << ", tensor_" << fNA; + if (extraA) out << " + " << opName << "_A_offset"; + out << ", " << std::setprecision(std::numeric_limits::max_digits10) << fAttrBeta << ","; + // in the case of bias and no broadcasting needed + if (!fNC.empty() && !fBroadcastBias) + out << "tensor_" << fNC; + else + out << "nullptr"; + out << ");\n"; if(fActivation == EActivationType::RELU){ - out << SP << "for (int id = 0; id < " << SOFIE::ConvertDynamicShapeToLength(fShapeY) << " ; id++){\n"; + out << SP << "for (int id = 0; id < " << ConvertDimShapeToLength(fShapeY) << " ; id++){\n"; out << SP << SP << "tensor_" << fNY << "[id] = ((tensor_" << fNY << "[id] > 0 )? tensor_" << fNY << "[id] : 0);\n"; out << SP << "}\n"; } } if (doStackMul) { - out << SP << SP << opName << "_yoffset += " << lengthGemm << ";\n"; + out << SP << SP << opName << "_y_offset += " << lengthGemm << ";\n"; + if (lengthExtra_A != "1") + out << SP << SP << opName << "_A_offset += " << increment_A << ";\n"; + if (lengthExtra_B != "1") + out << SP << SP << opName << "_B_offset += " << increment_B << ";\n"; + out << "}\n"; // end of loop on the stacked multiplications } @@ -441,8 +452,8 @@ namespace SOFIE{ for (int64_t i = 0; i < dimY-2; i++) { sA.push_back(fShapeY[i]); } - auto lengthGemm = ConvertDynamicShapeToLength(sY); // size of the Gemm operation - auto lengthExtra = ConvertDynamicShapeToLength(sA); // extra length in case input tensors are of dim>2 (MatMul) + auto lengthGemm = ConvertDimShapeToLength(sY); // size of the Gemm operation + auto lengthExtra = ConvertDimShapeToLength(sA); // extra length in case input tensors are of dim>2 (MatMul) out << SP << "int " << opName << "_m = " << m << ";\n"; out << SP << "int " << opName << "_n = " << n << ";\n"; @@ -454,7 +465,7 @@ namespace SOFIE{ // case bias is present if (!fNC.empty()){ - if (fNC2 == fNC) { + if (!fBroadcastBias) { // add a check in case broadcasting was not needed or done outside of session // C should have smaller dimension of Y if (!fIsDynamic) { @@ -485,9 +496,9 @@ namespace SOFIE{ // in the case of bias if (!fNC.empty()){ if (fActivation == EActivationType::RELU){ - out << SP << "blas.gemmrelu("< #include - namespace SOFIE { template class ROperator_LayerNormalization : public ROperator { private: + bool fCastToFloat = false; // flag to indicate if operation 1 are in floats (to be impl) int fAttrAxis; float fAttrEpsilon; size_t fAttrStashType; @@ -30,7 +29,7 @@ private: std::vector fShapeX; std::vector fShapeScale; - std::vector fShapeB; // shape of input Bias (B) is assumed to be fully defined + std::vector fShapeB; std::vector fShapeY; std::vector fShapeMean; std::vector fShapeInvStdDev; @@ -39,8 +38,8 @@ private: size_t fSize; // Size of the input // size_t fAxisDim; - std::vector fNormalizedShape; - std::vector fAxesShape; + std::vector fNormalizedShape; // shape from X[ axis,...,N-1] + std::vector fAxesShape; // shape from X[0,..,axis-1] // lengths in string format std::string fLength; // Length of the input std::string fNormalizedLength; @@ -78,7 +77,7 @@ public: void Initialize(RModel& model) override { if (!model.CheckIfTensorAlreadyExist(fNX)) { - throw std::runtime_error("TMVA::SOFIE - Tensor " + fNX + " not found."); + throw std::runtime_error("TMVA::SOFIE - LayerNormalization - Tensor " + fNX + " not found."); } bool isDynamic = model.IsDynamicTensor(fNX); fShapeX = model.GetDimTensorShape(fNX); @@ -103,8 +102,7 @@ public: // Type of mean and std ETensorType type = (fAttrStashType == 1) ? ETensorType::FLOAT : model.GetTensorType(fNX); // Mean - if (fNMean.empty()) { - fNMean = "Mean" + fNX; + if (!fNMean.empty()) { // cannot use initializer list with one element since it is ambiguous if (isDynamic) // add size_t(-1) to indicate that shape is an expression @@ -113,29 +111,60 @@ public: model.AddIntermediateTensor(fNMean, type, std::vector(1,std::stoi(fAxesLength))); } // Inverse Standard Deviation - if (fNInvStdDev.empty()) { - fNInvStdDev = "InvStdDev" + fNX; + if (!fNInvStdDev.empty()) { if (isDynamic) model.AddIntermediateTensor(fNInvStdDev, type, std::vector(1,Dim{fAxesLength,std::size_t(-1)})); else model.AddIntermediateTensor(fNInvStdDev, type, std::vector(1,std::stoi(fAxesLength))); } + // if mean and stdev are not empty they are not defined in the output list // Cast X to float if (fAttrStashType == 1 && model.GetTensorType(fNX) != ETensorType::FLOAT) { - fNCastedX = "Casted" + fNX; - model.AddIntermediateTensor(fNCastedX, ETensorType::FLOAT, fShapeX); - fNNormalizedX = "Normalized" + fNX; - model.AddIntermediateTensor(fNNormalizedX, ETensorType::FLOAT, fShapeX); + fCastToFloat = true; + fType = "float"; + // fNCastedX = "Casted" + fNX; + // model.AddIntermediateTensor(fNCastedX, ETensorType::FLOAT, fShapeX); + // fNNormalizedX = "Normalized" + fNX; + // model.AddIntermediateTensor(fNNormalizedX, ETensorType::FLOAT, fShapeX); + } + // scale shape + fShapeScale = model.GetDimTensorShape(fNScale); + // appends 1 to scale shapes if missing + size_t dimScale = fShapeScale.size(); + if (dimScale < fSize) { + for (size_t i = 0; i < fSize-dimScale; i++) + fShapeScale.insert(fShapeScale.begin(), Dim{1}); + } + // check also shape if consistent now + for (size_t i = 0; i < fSize; i++) { + if (fShapeScale[i].dim != 1 && fShapeScale[i] != fShapeX[i]) + throw std::runtime_error("TMVA::SOFIE - LayerNormalization - Scale Tensor has invalid shape " + ConvertDimShapeToString(fShapeScale)); } - // Broadcast the bias if (!fNB.empty()) { - fShapeB = model.GetTensorShape(fNB); - size_t lengthB = ConvertShapeToLength(fShapeB); - if (isDynamic || lengthB < static_cast(std::stoi(fLength))) { - fNBroadcastedB = "Broadcasted" + fNB; - model.AddIntermediateTensor(fNBroadcastedB, ConvertStringToType(fType), fShapeX); + fShapeB = model.GetDimTensorShape(fNB); + // appends 1 to bias shapes if missing + size_t dimB = fShapeB.size(); + if (dimB < fShapeX.size()) { + for (size_t i = 0; i < fSize-dimB; i++) + fShapeB.insert(fShapeB.begin(), Dim{1}); + } + for (size_t i = 0; i < fSize; i++) { + if (fShapeB[i].dim != 1 && fShapeB[i] != fShapeX[i]) + throw std::runtime_error("TMVA::SOFIE - LayerNormalization - Bias Tensor has invalid shape " + ConvertDimShapeToString(fShapeScale)); } } + + std::cout << "bias + scale " << ConvertDimShapeToString(fShapeB) << " " << ConvertDimShapeToString(fShapeScale) << std::endl; + + // // Broadcast the bias + // if (!fNB.empty()) { + // fShapeB = model.GetTensorShape(fNB); + // size_t lengthB = ConvertShapeToLength(fShapeB); + // if (isDynamic || lengthB < static_cast(std::stoi(fLength))) { + // fNBroadcastedB = "Broadcasted" + fNB; + // model.AddIntermediateTensor(fNBroadcastedB, ConvertStringToType(fType), fShapeX); + // } + // } model.AddNeededStdLib("cmath"); } @@ -145,8 +174,8 @@ public: if (!fNBroadcastedB.empty()) { out << SP << "// Broadcasting the bias of LayerNormalization op\n"; out << SP << "{\n"; - out << SP << SP << "float* data = SOFIE::UTILITY::UnidirectionalBroadcast(tensor_"; - out << fNB << ", " << ConvertShapeToString(fShapeB) << ", " << ConvertShapeToString(fShapeX) << ");\n"; + out << SP << SP << "float* data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast(tensor_"; + out << fNB << ", " << ConvertDimShapeToString(fShapeB) << ", " << ConvertDimShapeToString(fShapeX) << ");\n"; out << SP << "std::copy(data, data + " << fLength << ", tensor_" << fNBroadcastedB << ");\n"; out << SP << "delete[] data;\n"; out << SP << "}\n"; @@ -161,10 +190,6 @@ public: throw std::runtime_error("TMVA::SOFIE LayerNormalization operator " + opName + " called to generate without being initialized first."); } - if (fShapeX.size() > 5) { - throw std::runtime_error("TMVA::SOFIE LayerNormalization operator not " - "implemented for input tensor of size > 5."); - } std::stringstream out; @@ -178,10 +203,32 @@ public: } auto strides = UTILITY::ComputeStrideFromShape(fShapeX); - std::string InputIndex = "axis_0 * " + strides[0].GetVal(); + std::string inputIndex = "axis_0 * " + strides[0].GetVal(); for (size_t i = 1; i < fSize; i++) { - InputIndex += " + axis_" + std::to_string(i) + " * " + strides[i].GetVal(); + inputIndex += " + axis_" + std::to_string(i); + if (i < fSize-1) inputIndex += " * " + strides[i].GetVal(); + } + auto scaleStrides = UTILITY::ComputeStrideFromShape(fShapeScale); + std::string scaleIndex; + for (size_t i = 0; i < fSize; i++) { + if (fShapeScale[i].dim != 1) { + if (!scaleIndex.empty()) scaleIndex += " + "; + scaleIndex += "axis_" + std::to_string(i); + if ( scaleStrides[i].dim != 1) scaleIndex += " * " + scaleStrides[i].GetVal(); + } + } + if (scaleIndex.empty()) scaleIndex = "0"; + + auto biasStrides = UTILITY::ComputeStrideFromShape(fShapeB); + std::string biasIndex; + for (size_t i = 0; i < fSize; i++) { + if (fShapeB[i].dim != 1) { + if (!biasIndex.empty()) biasIndex += " + "; + biasIndex += "axis_" + std::to_string(i); + if ( biasStrides[i].dim != 1) biasIndex += " * " + biasStrides[i].GetVal(); + } } + if (biasIndex.empty()) biasIndex = "0"; auto axesStrides = UTILITY::ComputeStrideFromShape(fAxesShape); std::string axesIndex = "axis_" + std::to_string(0) + " * " + axesStrides[0].GetVal(); @@ -189,51 +236,33 @@ public: axesIndex += " + axis_" + std::to_string(i) + " * " + axesStrides[i].GetVal(); } - auto normalizedStrides = UTILITY::ComputeStrideFromShape(fNormalizedShape); - std::string normalizedIndex = "axis_" + std::to_string(fAxis) + " * " + normalizedStrides[0].GetVal(); - for (size_t i = fAxis + 1; i < fSize; i++) { - normalizedIndex += " + axis_" + std::to_string(i) + " * " + normalizedStrides[i - fAxis].GetVal(); - } - if (!fNCastedX.empty()) { - // Cast X to float - out << SP << "for (size_t i = 0; i < " << fLength << "; i++) {\n"; - out << SP << SP << "tensor_" << fNCastedX << "[i] = " << "static_cast(tensor_" << fNX; - out << "[i]);\n"; - out << SP << "}\n"; - } + // compute mean and std-dev. Save in tensors if requested out << SP << "// Compute the mean\n"; - // Loop over the normalized dimensions + + // Loop over all the outer dims in [0, fAxis) for (size_t i = 0; i < fAxis; i++) { std::string iIdx = "axis_" + std::to_string(i); out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape[i] << "; " << iIdx << "++) {\n"; } - out << SP << SP << fType << " sum = 0.;\n"; - // loop over all the dims in [0, fAxis) + out << SP << SP << fType << " mean = 0.;\n"; + // loop over the normalized dimensions (fAxis,....,N-1) for (size_t j = fAxis; j < fSize; j++) { std::string jIdx = "axis_" + std::to_string(j); out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape[j] << "; " << jIdx << "++) {\n"; } - out << SP << SP << SP << "sum += tensor_" << fNX << "[" << InputIndex << "];\n"; + out << SP << SP << SP << "mean += tensor_" << fNX << "[" << inputIndex << "];\n"; for (size_t j = fAxis; j < fSize; j++) { out << SP << SP << "}\n"; } - out << SP << SP << "tensor_" << fNMean << "[" << axesIndex << "] = sum / " << fType << "("; - out << fNormalizedLength << ");\n"; - for (size_t i = fAxis; i < fSize; i++) { - out << SP << "}\n"; - } + out << SP << SP << "mean /= " << fType << "(" << fNormalizedLength << ");\n"; + out << SP << "// Compute the inverse Standard Deviation\n"; - // Loop over the normalized dimensions - for (size_t i = 0; i < fAxis; i++) { - std::string iIdx = "axis_" + std::to_string(i); - out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape[i] - << "; " << iIdx << "++){\n"; - } + // Set sum = 0 out << SP << SP << fType << " sum = 0.;\n"; // loop over all the dims in [0, fAxis) @@ -242,91 +271,46 @@ public: out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape[j] << "; " << jIdx << "++){\n"; } - out << SP << SP << SP << "float tmp = tensor_" << fNX << "[" << InputIndex << "] - tensor_" - << fNMean << "[" << axesIndex << "];\n"; + out << SP << SP << SP << "float tmp = tensor_" << fNX << "[" << inputIndex << "] - mean;\n"; out << SP << SP << SP << "sum += tmp*tmp;\n"; for (size_t j = fAxis; j < fSize; j++) { out << SP << SP << "}\n"; } - out << SP << SP << "tensor_" << fNInvStdDev << "[" << axesIndex << "] = 1 / std::sqrt("; + out << SP << SP << fType << " invStdDev = 1 / std::sqrt("; out << "sum / " << fType << "(" << fNormalizedLength << ") + " << fAttrEpsilon << ");\n"; - for (size_t i = 0; i < fAxis; i++) { - out << SP << "}\n"; - } - if (!fNCastedX.empty()) { - out << "// NormalizedX = InvStdDev * (CastedX - Mean)\n"; - for (size_t i = 0; i < fAxis; i++) { - std::string iIdx = "axis_" + std::to_string(i); - out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape[i] - << "; " << iIdx << "++){\n"; - } - for (size_t j = fAxis; j < fSize; j++) { - std::string jIdx = "axis_" + std::to_string(j); - out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape[j] - << "; " << jIdx << "++){\n"; - } - out << SP << SP << SP << "tensor_" << fNNormalizedX << "[" << InputIndex << "] = tensor_"; - out << fNInvStdDev << "[" << axesIndex << "] * (tensor_" << fNCastedX << "[" << InputIndex; - out << "] - tensor_" << fNMean << "[" << axesIndex << "])\n"; - for (size_t j = fAxis; j < fSize; j++) { - out << SP << SP << "}\n"; - } - for (size_t i = fAxis; i < fSize; i++) { - out << SP << "}\n"; - } - out << "// Y = Scale o NormalizedX"; - for (size_t i = 0; i < fAxis; i++) { - std::string iIdx = "axis_" + std::to_string(i); - out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape[i] - << "; " << iIdx << "++){\n"; - } - for (size_t j = fAxis; j < fSize; j++) { - std::string jIdx = "axis_" + std::to_string(j); - out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape[j] - << "; " << jIdx << "++){\n"; - } - out << SP << SP << SP << "tensor_" << fNY << "[" << InputIndex << "] = tensor_" << fNScale; - out << "[" << axesIndex << "] * static_cast<" << fType << ">(tensor_" << fNCastedX << "[" << InputIndex; - out << "]);\n"; - for (size_t j = fAxis; j < fSize; j++) { - out << SP << SP << "}\n"; - } - for (size_t i = fAxis; i < fSize; i++) { - out << SP << "}\n"; - } - } else { - out << SP << "// Y = Scale o InvStdDev (X - Mean)\n"; - for (size_t i = 0; i < fAxis; i++) { - std::string iIdx = "axis_" + std::to_string(i); - out << SP << "for (size_t " << iIdx << " = 0; " << iIdx << " < " << inputShape[i] - << "; " << iIdx << "++){\n"; - } - for (size_t j = fAxis; j < fSize; j++) { - std::string jIdx = "axis_" + std::to_string(j); - out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape[j] - << "; " << jIdx << "++){\n"; - } - out << SP << SP << SP << "tensor_" << fNY << "[" << InputIndex << "] = tensor_" << fNScale; - out << "[" << normalizedIndex << "] * tensor_" << fNInvStdDev << "[" << axesIndex; - out << "] * (tensor_" << fNX << "[" << InputIndex << "] - tensor_" << fNMean << "["; - out << axesIndex << "]);\n"; - for (size_t j = fAxis; j < fSize; j++) { - out << SP << SP << "}\n"; - } - for (size_t i = fAxis; i < fSize; i++) { - out << SP << "}\n"; - } + + // set output mean and invStdDev if requested + if (!fNMean.empty()) + out << SP << SP << "tensor_" << fNMean << "[" << axesIndex << "] = mean;\n"; + if (!fNInvStdDev.empty()) + out << SP << SP << "tensor_" << fNInvStdDev << "[" << axesIndex << "] = invStdDev;\n"; + + // scale and add bias + + out << SP << "// Y = Scale o InvStdDev (X - Mean)\n"; + + for (size_t j = fAxis; j < fSize; j++) { + std::string jIdx = "axis_" + std::to_string(j); + out << SP << SP << "for (size_t " << jIdx << " = 0; " << jIdx << " < " << inputShape[j] << "; " << jIdx + << "++){\n"; } + out << SP << SP << SP << "tensor_" << fNY << "[" << inputIndex << "] = tensor_" << fNScale; + out << "[" << scaleIndex << "] * invStdDev * (tensor_" << fNX << "[" << inputIndex << "] - mean)"; - if (!fNB.empty()) { - std::string bias = "tensor_" + (fNBroadcastedB.empty() ? fNB : fNBroadcastedB); - out << SP << "// Add the bias to Y\n"; - out << SP << "int " << opName << "_n = " << fLength << ";\n"; - out << SP << "float " << opName << "_alpha = 1.;\n"; - out << SP << "int " << opName << "_inc = 1;\n"; - out << SP << "BLAS::saxpy_(&" << opName << "_n, &" << opName << "_alpha, " << bias << ", &"; - out << opName << "_inc, " << "tensor_" << fNY << ", &" << opName << "_inc);\n"; + // add bias if needed + if (!fNB.empty()) + // assume bias has index as scale + out << " + tensor_" << fNB << "[" << biasIndex << "]"; + out << ";\n"; + + // close loops on normalizing dim [..,fAxis,...fSize-1] + for (size_t j = fAxis; j < fSize; j++) { + out << SP << SP << "}\n"; + } + // close loops on the other dimensions [0,...,fAxis] + for (size_t i = 0; i < fAxis; i++) { + out << SP << "}\n"; } return out.str(); diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_LeakyRelu.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_LeakyRelu.hxx index 2f7ebe7..1218b56 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_LeakyRelu.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_LeakyRelu.hxx @@ -27,7 +27,8 @@ public: ROperator_LeakyRelu(){} ROperator_LeakyRelu(float alpha,std::string nameX, std::string nameY): falpha(alpha),fNX(UTILITY::Clean_name(nameX)), fNY(UTILITY::Clean_name(nameY)) - { + { + fKind = OperatorKind::LEAKYRELU; if(std::is_same::value){ fType = "float"; } @@ -112,6 +113,7 @@ public: out << SP << "alpaka::exec(queue, workDiv_" << fNX << ", leakyReluKernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), static_cast(" << length << "), " << OpName << "_alpha);\n"; + out << SP <<"alpaka::wait(queue);\n"; return out.str(); } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Range.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Range.hxx index 3e8605e..fea9814 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Range.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Range.hxx @@ -88,7 +88,7 @@ public: model.AddDynamicTensor(fNOutput, type, fShape); } if (model.Verbose()) { - std::cout << "Range -> output is " << fNOutput << " : " << ConvertShapeToString(fShape); + std::cout << "Range -> output is " << fNOutput << " : " << ConvertDimShapeToString(fShape); if (fIsOutputConstant) std::cout << " : " << ConvertValuesToString(model.GetTensorData(fNOutput)); std::cout << std::endl; } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx index 5fb2f04..fcc3cd6 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx @@ -58,7 +58,7 @@ public: throw std::runtime_error("TMVA SOFIE Operator Relu called to Generate without being initialized first"); } std::stringstream out; - auto length = ConvertDynamicShapeToLength(fShape); + auto length = ConvertDimShapeToLength(fShape); out << "\n//------ RELU\n"; out << SP << "for (int id = 0; id < " << length << " ; id++){\n"; out << SP << SP << "tensor_" << fNY << "[id] = ((tensor_" << fNX << "[id] > 0 )? tensor_" << fNX << "[id] : 0);\n"; @@ -66,16 +66,20 @@ public: return out.str(); } - std::string Generate_GPU_Kernel_ALPAKA() { + std::string Generate_GPU_Kernel_ALPAKA(std::string /*opName*/) { std::string op; op = "\n//------ RELU_KERNEL_ALPAKA\n"; - op += SP + "struct ReluKernel{\n"; - op += SP + SP + "template\n"; - op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, T* data, std::size_t numElements) const {\n"; - op += SP + SP + SP + "for (auto i : alpaka::uniformElements(acc, numElements)) {\n"; - op += SP + SP + SP + "data[i] = (data[i] < 0) ? 0 : data[i];\n"; + + op = "\n//------ RELU_KERNEL_ALPAKA\n"; + op += "struct ReluKernel {\n"; + op += SP + "template\n"; + op += SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, T const* __restrict__ data, T* __restrict__ out, std::size_t numElements) const {\n"; + op += SP + SP + SP + "auto idx = alpaka::getIdx(acc)[0];\n"; + op += SP + SP + SP + "if (idx < numElements) {\n"; + op += SP + SP + SP + "out[idx] = data[idx] >= T(0) ? data[idx] : 0;\n"; op += SP + SP + "}\n"; - op += SP + "}\n};\n"; + op += SP + "}\n"; + op += "};\n"; return op; } @@ -88,11 +92,18 @@ public: if (fShape.empty()) { throw std::runtime_error("TMVA SOFIE Operator Relu called to Generate without being initialized first"); } + std::stringstream out; - auto length = ConvertDynamicShapeToLength(fShape); + auto length = ConvertDimShapeToLength(fShape); out << "\n//------ RELU_GPU_ALPAKA\n"; - out << SP << "alpaka::WorkDivMembers workDiv_"<::all("<<(stoi(length)+256-1)/256<<"), alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; - out << SP << "alpaka::exec(queue, workDiv_" << fNX << ", reluKernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), static_cast(" << length << ")); \n"; + out << SP << "auto const elementsPerThread_"<(1));\n"; + out << SP << "auto const elementsPerGrid_"< const kernelCfg_" << fNY << " = {elementsPerGrid_" << fNY << ", elementsPerThread_" << fNY << "};\n"; + out << SP << "auto const workDiv_" << fNY << " = alpaka::getValidWorkDiv(kernelCfg_" << fNY << ", devAcc, reluKernel, alpaka::getPtrNative(deviceBuf_" << fNX + << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), static_cast(" << length << "));\n"; + out << SP << "alpaka::exec(queue, workDiv_" << fNY + << ", reluKernel, alpaka::getPtrNative(deviceBuf_" << fNX + << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), static_cast(" << length << "));\n"; return out.str(); } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Reshape.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Reshape.hxx index 2fa72b5..2b3391c 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Reshape.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Reshape.hxx @@ -69,6 +69,8 @@ public: fAttrAxes(attrAxes) { assert(fOpMode == Squeeze || fOpMode == Unsqueeze); + fInputTensorNames = { fNData }; + fOutputTensorNames = { fNOutput }; } // output type is same as input @@ -107,6 +109,9 @@ public: if (IsInteger(tmp_length) && IsInteger(input_length)) output_shape[i] = Dim{static_cast(std::stoi(input_length) / std::stoi(tmp_length))}; + else if (IsInteger(tmp_length) && std::stoi(tmp_length) == 1) { + output_shape[i] = Dim{input_length, static_cast(-1)}; + } else { //we can try simplifying expression if tmp_length is integer and part of input_length // contains tmp_length @@ -162,11 +167,11 @@ public: } if (fVerbose) - std::cout << "Reshape: correct output shape to " << ConvertShapeToString(output_shape) << std::endl; + std::cout << "Reshape: correct output shape to " << ConvertDimShapeToString(output_shape) << std::endl; if (!fDimInput && ConvertDimShapeToLength(output_shape) != ConvertDimShapeToLength(input_shape)) { - throw std::runtime_error("TMVA Reshape Op : Invalid shapes : " + ConvertShapeToString(input_shape) + - ConvertShapeToString(output_shape)); + throw std::runtime_error("TMVA Reshape Op : Invalid shapes : " + ConvertDimShapeToString(input_shape) + + ConvertDimShapeToString(output_shape)); } ret.push_back(output_shape); @@ -194,14 +199,23 @@ public: } } } else { - auto &axes = fAttrAxes; + std::cout << "getting shape for Squeeze...from attribute\n"; + auto axes = fAttrAxes; for (size_t i = 0; i < axes.size(); i++) { + std::cout << i << " " << axes[i] << std::endl; if (axes[i] < 0) axes[i] += input_shape.size(); if (!(output_shape[axes[i]] == Dim{1})) throw std::runtime_error("TMVA Squeeze Op : Invalid axis value " + std::to_string(axes[i]) + - " for " + ConvertShapeToString(output_shape)); - output_shape.erase(output_shape.begin() + axes[i]); + " for " + ConvertDimShapeToString(output_shape)); + } + // for calling vector::erase we must sort axes in decreasing order to avoid + std::sort(axes.begin(), axes.end(), std::greater()); + for (auto & axis : axes) { + std::cout << "erase give axis " << axis << " -> "; + for (auto & o : output_shape) std::cout << o << " , "; + std::cout << std::endl; + output_shape.erase(output_shape.begin() + axis); } } ret.push_back(output_shape); @@ -231,8 +245,10 @@ public: void Initialize(RModel& model) override { - std::cout << "initialize reshape op type " << fOpMode << " - " << fNInput2 << " " << fNData << std::endl; fVerbose = model.Verbose(); + if (fVerbose) + std::cout << "initialize reshape op type " << fOpMode << " - " << fNInput2 << " " << fNData << std::endl; + if (model.CheckIfTensorAlreadyExist(fNData) == false) { // input must be a graph input, or already initialized intermediate tensor throw std::runtime_error("TMVA Reshape Op Input Tensor " + fNData + " is not found in model"); @@ -242,7 +258,7 @@ public: // check if optional tensor exists defining shape or axes if (!fNInput2.empty()) { if (model.CheckIfTensorAlreadyExist(fNInput2)) { - if (model.IsConstantTensor(fNInput2) || model.IsInitializedTensor(fNInput2)) { + if (model.IsInitializedTensor(fNInput2)) { // assume input shape is an initialized tensor auto dptr = model.GetInitializedTensorData(fNInput2); auto values = static_cast(dptr.get()); @@ -259,6 +275,9 @@ public: fShapeOutput = ShapeInference({fShapeInput})[0]; // set flag to not write tensor in weight file. Its data will be hard-coded in way model is constructed model.SetNotWritableInitializedTensor(fNInput2); + } else if (model.IsShapeTensor(fNInput2)) { + auto shapeData = model.GetShapeTensorValues(fNInput2); + fShapeOutput = shapeData; } else { // we cannot get shape at initialization time but at run-time fDynamicShape = true; @@ -290,7 +309,7 @@ public: throw std::runtime_error("TMVA Reshape Op : Invalid Input/Output lengths"); model.AddConstantTensor(fNOutput, o_shape, inputData); if (model.Verbose()) { - std::cout << Name() << " : " << fNData << " " << ConvertShapeToString(fShapeInput) << " --> " << fNOutput << " (constant) " << ConvertShapeToString(fShapeOutput) << " : " << + std::cout << Name() << " : " << fNData << " " << ConvertDimShapeToString(fShapeInput) << " --> " << fNOutput << " (constant) " << ConvertDimShapeToString(fShapeOutput) << " : " << ConvertValuesToString(ConvertShapeToLength(o_shape), inputData) << std::endl; } } @@ -300,15 +319,15 @@ public: auto inputData = model.GetShapeTensorValues(fNData); model.AddShapeTensor(fNOutput, inputData); if (model.Verbose()) { - std::cout << Name() << " : " << fNData << " " << ConvertShapeToString(fShapeInput) << " --> " << fNOutput << " (shape) " << ConvertShapeToString(fShapeOutput) << " : " << - ConvertShapeToString(inputData) << std::endl; + std::cout << Name() << " : " << fNData << " " << ConvertDimShapeToString(fShapeInput) << " --> " << fNOutput << " (shape) " << ConvertDimShapeToString(fShapeOutput) << " : " << + ConvertDimShapeToString(inputData) << std::endl; } } else { // non-constant case model.AddIntermediateTensor(fNOutput, model.GetTensorType(fNData), fShapeOutput); if (model.Verbose()) - std::cout << Name() << " : " << fNData << " " << ConvertShapeToString(fShapeInput) << " --> "<< fNOutput << " " << ConvertShapeToString(fShapeOutput) << std::endl; + std::cout << Name() << " : " << fNData << " " << ConvertDimShapeToString(fShapeInput) << " --> "<< fNOutput << " " << ConvertDimShapeToString(fShapeOutput) << std::endl; } } @@ -324,7 +343,7 @@ public: else if (fOpMode == Unsqueeze) opType = "Unsquueze"; - out << SP << "///--------" << opType << " operator " << opName << " --> " << ConvertShapeToString(fShapeOutput) << "\n"; + out << SP << "///--------" << opType << " operator " << opName << " --> " << ConvertDimShapeToString(fShapeOutput) << "\n"; // in case of dynamic output shape we need to set the shape value from input shape tensor // and take case of the zero values @@ -352,7 +371,6 @@ public: << ");\n"; return out.str(); } - std::string Generate_GPU_ALPAKA(std::string opName) override { if (fIsOutputConstant) return ""; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_ScatterElements.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_ScatterElements.hxx index e436b74..5b17a79 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_ScatterElements.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_ScatterElements.hxx @@ -273,7 +273,7 @@ std::string Generate_GPU_ALPAKA(std::string opName) override { << ", alpaka::getPtrNative(deviceBuf_" << fNI << ")" << ", alpaka::getPtrNative(deviceBuf_" << fNU << ")" << ", static_cast(" << totalElements << "));\n"; - + out << SP <<"alpaka::wait(queue);\n"; return out.str(); } }; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx index 73b32a3..77f989c 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx @@ -99,6 +99,7 @@ public: out << SP << "alpaka::exec(queue, workDiv_" << fNX << ", sigmoidKernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), static_cast(" << length << "));\n"; + out << SP <<"alpaka::wait(queue);\n"; return out.str(); } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx index 0c5762b..c9af13e 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx @@ -252,6 +252,7 @@ std::string Generate_GPU_ALPAKA(std::string opName) override { << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" << ", alpaka::getPtrNative(deviceBuf_" << fNYs[i] << ")" << ", static_cast(" << length << "));\n"; + out << SP <<"alpaka::wait(queue);\n"; out << SP << "}\n"; } return out.str(); diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Tile.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Tile.hxx index 36a93c5..608308c 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Tile.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Tile.hxx @@ -21,7 +21,7 @@ private: std::string fNY; std::vector fShapeInput; std::vector fShapeY; - std::vector fRepeats; // populated in Initialize() if repeats are known at generation time + std::vector fRepeats; public: ROperator_Tile(){} @@ -72,7 +72,9 @@ public: // which will cause the kernel to use the runtime repeats pointer path. fRepeats.resize(num_elements); std::copy(repeats_data, repeats_data + num_elements, fRepeats.begin()); - + if (fRepeats.size()){ + model.RemoveInitializedTensor(fNRepeats); + } fShapeY = ShapeInference({fShapeInput, fRepeats})[0]; model.AddIntermediateTensor(fNY, model.GetTensorType(fNInput), fShapeY); @@ -239,7 +241,7 @@ public: << ", devAcc, " << kname << ", " << args << ");\n"; out << SP << "alpaka::exec(queue, workDiv_" << opName << ", " << kname << ", " << args << ");\n"; - + out << SP <<"alpaka::wait(queue);\n"; return out.str(); } @@ -247,4 +249,4 @@ public: }//SOFIE -#endif //SOFIE_ROPERATOR_Tile \ No newline at end of file +#endif //SOFIE_ROPERATOR_Tile diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx index 92a2eee..de33544 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx @@ -172,21 +172,27 @@ public: op += SP + "struct TransposeKernel_" + OpName + " {\n"; op += SP + SP + "template\n"; op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const& acc, T const* input, T* output,"; - op += "const std::size_t totalElements) const {\n"; + op += "const std::size_t totalElements) const {\n"; op += SP + SP + SP + SP + "auto const idx = alpaka::getIdx(acc)[0];\n"; op += SP + SP + SP + SP + "if(idx >= totalElements) return;\n"; op += SP + SP + SP + SP + "std::size_t input_idx = 0;\n"; op += SP + SP + SP + SP + "std::size_t remaining = idx;\n"; op += SP + SP + SP + SP + "std::size_t coord;\n"; - auto inputStrides = UTILITY::ComputeStrideFromShape(fShapeData); + + auto inputStrides = UTILITY::ComputeStrideFromShape(fShapeData); auto outputStrides = UTILITY::ComputeStrideFromShape(fShapeOutput); - for (size_t k = 0; k < fShapeData.size(); k++) { - op += SP + SP + SP + SP + "coord = remaining * " + std::to_string(1/outputStrides[k]) + ";\n"; - op += SP + SP + SP + SP + "remaining = remaining - coord * " + std::to_string(outputStrides[k]) + ";\n"; - op += SP + SP + SP + SP + "input_idx += coord * " + std::to_string(inputStrides[fAttrPerm[k]]) + ";\n"; + + for (size_t k = 0; k < fShapeData.size(); k++) { + op += SP + SP + SP + SP + "coord = remaining / " + + std::to_string(outputStrides[k]) + "u;\n"; + op += SP + SP + SP + SP + "remaining = remaining - coord * " + + std::to_string(outputStrides[k]) + "u;\n"; + op += SP + SP + SP + SP + "input_idx += coord * " + + std::to_string(inputStrides[fAttrPerm[k]]) + "u;\n"; } - op += SP + SP + SP + SP + SP + "output[idx] = input[input_idx];\n"; - op += SP + SP + SP + SP + "}\n"; + + op += SP + SP + SP + SP + "output[idx] = input[input_idx];\n"; + op += SP + SP + SP + "}\n"; op += SP + SP + SP + "};\n"; return op; @@ -204,15 +210,15 @@ public: auto length = ConvertShapeToLength(fShapeOutput); out << "\n//------ TRANSPOSE_GPU_ALPAKA\n"; - out << SP << "auto const elementsPerThread_"<(1));\n"; - out << SP << "auto const elementsPerGrid_"< const kernelCfg_" << fNData << " = {elementsPerGrid_" << fNData << ", elementsPerThread_" << fNData << "};\n"; - out << SP << "auto const workDiv_" << fNData << " = alpaka::getValidWorkDiv(kernelCfg_" << fNData << ", devAcc, transposeKernel_" << OpName << ", alpaka::getPtrNative(deviceBuf_" << fNData + out << SP << "auto const elementsPerThread_"<(1));\n"; + out << SP << "auto const elementsPerGrid_"< const kernelCfg_" << fNOutput << " = {elementsPerGrid_" << fNOutput << ", elementsPerThread_" << fNOutput << "};\n"; + out << SP << "auto const workDiv_" << fNOutput << " = alpaka::getValidWorkDiv(kernelCfg_" << fNOutput << ", devAcc, transposeKernel_" << OpName << ", alpaka::getPtrNative(deviceBuf_" << fNData << "), alpaka::getPtrNative(deviceBuf_" << fNOutput << "), static_cast(" << length << "));\n"; - out << SP << "alpaka::exec(queue, workDiv_" << fNData + out << SP << "alpaka::exec(queue, workDiv_" << fNOutput << ", transposeKernel_" << OpName << ", alpaka::getPtrNative(deviceBuf_" << fNData << "), alpaka::getPtrNative(deviceBuf_" << fNOutput << "), static_cast(" << length << "));\n"; - + out << SP <<"alpaka::wait(queue);\n"; return out.str(); } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Where.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Where.hxx index 19d217d..2a55700 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Where.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Where.hxx @@ -9,8 +9,6 @@ namespace SOFIE{ - - template class ROperator_Where final : public ROperator{ private: @@ -103,7 +101,7 @@ public: if (model.IsInitializedTensor(fNA)) { auto data = model.GetInitializedTensorData(fNA); std::shared_ptr broadcastedData( - UTILITY::UnidirectionalBroadcast(static_cast(data.get()), fShapeA, fShapeY), + UTILITY::UnidirectionalBroadcast(static_cast(data.get()), fShapeA, fShapeY), std::default_delete()); // Update the data and the shape of A model.AddConstantTensor(fNBroadcastedA, model.GetTensorType(fNA), fShapeY, broadcastedData); @@ -119,7 +117,7 @@ public: if (model.IsInitializedTensor(fNB)) { auto data = model.GetInitializedTensorData(fNB); std::shared_ptr broadcastedData( - UTILITY::UnidirectionalBroadcast(static_cast(data.get()), fShapeB, fShapeY), + UTILITY::UnidirectionalBroadcast(static_cast(data.get()), fShapeB, fShapeY), std::default_delete()); // do not update tensor B but add broadcasted one (since it can be input to some other operators) model.AddConstantTensor(fNBroadcastedB, model.GetTensorType(fNB), fShapeY, broadcastedData); @@ -135,7 +133,7 @@ public: if (model.IsInitializedTensor(fNC)) { auto data = model.GetInitializedTensorData(fNC); std::shared_ptr broadcastedData( - UTILITY::UnidirectionalBroadcast(static_cast(data.get()), fShapeC, fShapeY), + UTILITY::UnidirectionalBroadcast(static_cast(data.get()), fShapeC, fShapeY), std::default_delete()); // do not update tensor C but add broadcasted one (since it can be input to some other operators) model.AddConstantTensor(fNBroadcastedC, model.GetTensorType(fNC), fShapeY, broadcastedData); @@ -217,7 +215,7 @@ public: } if (fIsOutputConstant && model.Verbose()) std::cout << "Where op ---> " << fNY << " " << ConvertShapeToString(fShapeY) << " : " - << ((dataY.size() > 0) ? ConvertValuesToString(dataY) : ConvertShapeToString(shapeDataY) ) + << ((dataY.size() > 0) ? ConvertValuesToString(dataY) : ConvertDimShapeToString(shapeDataY) ) << ((dataY.size() > 0) ? " (constant)" : " (shape)") << std::endl; // output is a constant tensor @@ -254,34 +252,34 @@ public: if (fShapeA != fShapeY) { out << SP << "// Broadcasting uninitialized tensor " << fNA << "\n"; //out << SP << "{\n"; - out << SP << "SOFIE::UTILITY::UnidirectionalBroadcast<" << typeName << ">(tensor_" << fNA << ", " << ConvertShapeToString(fShapeA) << ", " << ConvertShapeToString(fShapeY) - << ", fTensor_" << fNBroadcastedA << ");\n"; + out << SP << "TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast(tensor_" << fNA << ", " << ConvertShapeToString(fShapeA) << ", " << ConvertShapeToString(fShapeY) + << ", tensor_" << fNBroadcastedA << ");\n"; } // Broadcast B if it's uninitialized if (fShapeB != fShapeY) { out << SP << "// Broadcasting uninitialized tensor " << fNB << "\n"; //out << SP << "{\n"; - out << SP << "SOFIE::UTILITY::UnidirectionalBroadcast<" << typeName << ">(tensor_" << fNB << ", " << ConvertShapeToString(fShapeB) << ", " << ConvertShapeToString(fShapeY) - << ", fTensor_" << fNBroadcastedB << ");\n"; + out << SP << "TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast(tensor_" << fNB << ", " << ConvertShapeToString(fShapeB) << ", " << ConvertShapeToString(fShapeY) + << ", tensor_" << fNBroadcastedB << ");\n"; } // Broadcast C if it's uninitialized if (fShapeC != fShapeY) { // special case if C is an input tensor if (fIsInputBoolTensor) { size_t inputLength = ConvertShapeToLength(fShapeC); - out << SP << "std::vector fTensor_" << fNC << "(tensor_" << fNC << ", tensor_" << fNC << " + " << inputLength << ");\n"; + out << SP << "std::vector tmp_tensor_" << fNC << "(tensor_" << fNC << ", tensor_" << fNC << " + " << inputLength << ");\n"; } out << SP << "// Broadcasting uninitialized tensor " << fNC << "\n"; //out << SP << "{\n"; - out << SP << "SOFIE::UTILITY::UnidirectionalBroadcast(fTensor_" << fNC << ".data(), " << ConvertShapeToString(fShapeC) << ", " << ConvertShapeToString(fShapeY) - << ", fTensor_" << fNBroadcastedC << ");\n"; + out << SP << "TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast(tmp_tensor_" << fNC << ".data(), " << ConvertShapeToString(fShapeC) << ", " << ConvertShapeToString(fShapeY) + << ", tensor_" << fNBroadcastedC << ");\n"; } std::string nameA = fNBroadcastedA.empty()? fNA : fNBroadcastedA; std::string nameB = fNBroadcastedB.empty()? fNB : fNBroadcastedB; std::string nameC = fNBroadcastedC.empty()? fNC : fNBroadcastedC; out << SP << "for (size_t id = 0; id < " << length << " ; id++){\n"; // get output tensor applying condition - out << SP << SP << "tensor_" << fNY << "[id] = " << "(fTensor_" << nameC << "[id]) ? tensor_" + out << SP << SP << "tensor_" << fNY << "[id] = " << "tensor_" << nameC << "[id] ? tensor_" << nameA << "[id] : tensor_" + nameB + "[id];\n"; out << SP << "}\n"; return out.str(); @@ -291,5 +289,4 @@ public: }//SOFIE - -#endif // SOFIE_ROperator_Where +#endif //TMVA_SOFIE_ROperator_Where diff --git a/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx b/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx index e7e5ef2..d59eee8 100644 --- a/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx +++ b/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx @@ -162,6 +162,14 @@ template<> struct TensorType { static const std::string Name() { return "bool"; } }; +template<> +struct TensorType { + static const std::string Name() { return "int8_t"; } +}; +template<> +struct TensorType { + static const std::string Name() { return "uint8_t"; } +}; struct TensorMemoryInfo { std::string_view tensor_name; @@ -194,21 +202,12 @@ std::vector ConvertShapeToDim(const std::vector & shape); std::vector ConvertShapeToInt(const std::vector & shape); -inline std::size_t ConvertShapeToLength(const std::vector & shape){ - // Empty shape represent scalar values, so we return a length=1 - std::size_t fLength = 1; - for (auto& dim: shape) fLength *= dim; - return fLength; -} +std::size_t ConvertShapeToLength(const std::vector & shape); std::string ConvertShapeToString(const std::vector & shape); std::string ConvertDimShapeToString(const std::vector & shape); -std::string ConvertShapeToString(const std::vector & shape); - - std::string ConvertDimShapeToLength(const std::vector & shape); -std::string ConvertDynamicShapeToLength(const std::vector & shape); template @@ -228,8 +227,11 @@ std::string ConvertValuesToString(size_t n, const T * data) { ret << "{ "; for (size_t i = 0; i < n; i++) { if (std::is_floating_point_v) - ret << std::setprecision(std::numeric_limits::max_digits10); - ret << data[i]; + ret << std::setprecision(std::numeric_limits::max_digits10) << data[i]; + else + // cast in case of boolean (int8) + ret << (int64_t) data[i]; + if (i < n-1) ret << ", "; } ret << "}"; @@ -255,8 +257,14 @@ public: bool IsConstantTensor() const { return fConstant;} // query if tensor needs to be written in a weight file. Constant tensors are not written in a file bool IsWeightTensor() const { return !fConstant && !fIsNotWritable;} + // check if a Tensor is Writable (need to be written in the file or in the generated code (e.g. as a constant tensor) + // if an initialized tensors is used in a constant operator at compile time does not need to be written and can be omitted in + // the generated code + bool IsNotWritable() const { return fIsNotWritable; } // set not writable initialized tensors - i.e. tensor that must not be written in a file void SetNotWritable() { fIsNotWritable = true;} + // set as constant (needed for non-float initialized tensors) + void SetConstant() { fConstant = true;} template T const *data() const @@ -279,7 +287,7 @@ public: case ETensorType::INT64: fSize *= sizeof(int64_t); break; case ETensorType::BOOL: fSize *= sizeof(bool); break; default: - throw std::runtime_error("TMVA::SOFIE doesn't yet supports serialising data-type " + + throw std::runtime_error("SOFIE doesn't yet supports serialising data-type " + ConvertTypeToString(fType)); } fPersistentData = static_cast(fData.get()); @@ -365,7 +373,7 @@ T* BroadcastConvBias(const T* data, const size_t channel, const std::vector, class ContT = std::span > -void BroadcastTensor(ConstContT data, const std::vector& shape, const std::vector& targetShape, ContT broadcastedData) { +template> +void BroadcastTensor(ConstContT data, const std::vector& shape, const std::vector& targetShape, T *broadcastedData) { // Size of the shapes (tensor input here have shapes with same sizes, we have already added the needed ones ) size_t size = shape.size(); // Current length of the broadcasted tensor size_t curLength = data.size(); - size_t targetLength = broadcastedData.size(); - assert(ConvertShapeToLength(targetShape) == targetLength); // special case when broadcasting last dimensions (initial shapes must be the same) if (size > 1 && shape.front() == targetShape.front() && shape.back() == 1) { size_t bsize = targetShape.back(); @@ -419,16 +425,16 @@ void BroadcastTensor(ConstContT data, const std::vector& shape, const st bsize *= targetShape[k]; } for (size_t i = 0; i < curLength; i++) { - std::fill(broadcastedData.begin() + i*bsize, broadcastedData.begin() + (i+1)*bsize , data[i]); + std::fill(broadcastedData + i*bsize, broadcastedData + (i+1)*bsize , data[i]); } return; } - std::copy(data.begin(), data.end(), broadcastedData.begin()); + std::copy(data.begin(), data.end(), broadcastedData); // Product of the previous dimensions of targetShape size_t arrayNum = 1; // New broadcasted data: is this needed? - std::vector newData(targetLength); + std::vector newData(ConvertShapeToLength(targetShape)); for (size_t idx = 0; idx < size; idx++) { size_t dim = shape[idx]; @@ -444,8 +450,8 @@ void BroadcastTensor(ConstContT data, const std::vector& shape, const st for (size_t arrayIdx = 0; arrayIdx < arrayNum; arrayIdx++) { for (size_t targetIdx = 0; targetIdx < targetDim; targetIdx++) { size_t offset = arrayIdx * arrayLength * targetDim + targetIdx * arrayLength; - std::copy(broadcastedData.begin() + arrayIdx * arrayLength, - broadcastedData.begin() + (arrayIdx + 1) * arrayLength, + std::copy(broadcastedData + arrayIdx * arrayLength, + broadcastedData + (arrayIdx + 1) * arrayLength, newData.begin() + offset); } } @@ -459,12 +465,11 @@ void BroadcastTensor(ConstContT data, const std::vector& shape, const st // Update current length curLength = newLength; // Update broadcasted data - std::copy(newData.begin(), newData.begin() + newLength, broadcastedData.begin()); + std::copy(newData.begin(), newData.begin() + newLength, broadcastedData); } // Update the number of arrays arrayNum *= targetDim; } - //return broadcastedData; } // interface where we allocate a new array for broadcasted data @@ -472,13 +477,10 @@ template T* CreateBroadcastTensor(const T* data, const std::vector& shape, const std::vector& targetShape, size_t targetLength) { // newShape is an array of size equal to dimension along which we are broadcasting the tensor T* broadcastedData = new T[targetLength]; - std::span bData(broadcastedData, broadcastedData+targetLength); size_t curLength = ConvertShapeToLength(shape); - std::span inData(data, curLength); - BroadcastTensor, std::span>(inData, shape, targetShape, bData); + BroadcastTensor({data, curLength}, shape, targetShape, broadcastedData); return broadcastedData; } - // Unidirectional broadcasting shape to targetShape// In unidirectional broadcast - only tensor B can have the shape changed not // tensor A - otherwise is a multidirectional broadcast template @@ -489,14 +491,14 @@ T* UnidirectionalBroadcast(const T* data, const std::vector& shape, cons std::vector newShape(targetSize, 1); size_t offset = targetSize - shape.size(); std::copy(shape.begin(), shape.end(), newShape.begin() + offset); - return CreateBroadcastTensor(data, newShape, targetShape, ConvertShapeToLength(targetShape)); + return CreateBroadcastTensor(data, newShape, targetShape, ConvertShapeToLength(targetShape)); } - return CreateBroadcastTensor(data, shape, targetShape, ConvertShapeToLength(targetShape)); + return CreateBroadcastTensor(data, shape, targetShape, ConvertShapeToLength(targetShape)); } // Unidirectional broadcasting shape to targetShape using a passed vector to avoid allocations template -void UnidirectionalBroadcast(const T* data, const std::vector& shape, const std::vector& targetShape, std::span broadcastedData) { +void UnidirectionalBroadcast(const T* data, const std::vector& shape, const std::vector& targetShape, T *broadcastedData) { size_t curLength = ConvertShapeToLength(shape); std::span inData(const_cast(data), curLength); // Prepend shape with ones @@ -505,9 +507,9 @@ void UnidirectionalBroadcast(const T* data, const std::vector& shape, co std::vector newShape(targetSize, 1); size_t offset = targetSize - shape.size(); std::copy(shape.begin(), shape.end(), newShape.begin() + offset); - BroadcastTensor(inData, newShape, targetShape, broadcastedData); + BroadcastTensor(inData, newShape, targetShape, broadcastedData); } - BroadcastTensor>(inData, shape, targetShape, broadcastedData); + BroadcastTensor(inData, shape, targetShape, broadcastedData); } /// compute stride of a tensor given its shape (assume layout is row-major) @@ -697,20 +699,20 @@ extern "C" void sgemm_(const char * transa, const char * transb, const int * m, struct GNN_Data { - SOFIE::RTensor node_data; // the node feature data, tensor with shape (num_nodes, num_node_features) - SOFIE::RTensor edge_data; // the edge feature data, tensor with shape (num_edges, num_edge_features) - SOFIE::RTensor global_data; // the global features, tensor with shape (1, num_global_features) - SOFIE::RTensor edge_index; // the edge index (receivers and senders for each edge), tensor with shape (2, num_edges) + RTensor node_data; // the node feature data, tensor with shape (num_nodes, num_node_features) + RTensor edge_data; // the edge feature data, tensor with shape (num_edges, num_edge_features) + RTensor global_data; // the global features, tensor with shape (1, num_global_features) + RTensor edge_index; // the edge index (receivers and senders for each edge), tensor with shape (2, num_edges) // edge_index[0,:] are the receivers and edge_index[1,:] are the senders // need to have default constructor since RTensor has not one - GNN_Data(): node_data(SOFIE::RTensor({})), edge_data(SOFIE::RTensor({})), global_data(SOFIE::RTensor({})), edge_index(SOFIE::RTensor({})) {} + GNN_Data(): node_data(RTensor({})), edge_data(RTensor({})), global_data(RTensor({})), edge_index(RTensor({})) {} }; template -SOFIE::RTensor Concatenate( SOFIE::RTensor & t1, SOFIE::RTensor & t2, int axis = 0) +RTensor Concatenate( RTensor & t1, RTensor & t2, int axis = 0) { // concatenate tensor along axis. Shape must be the same except in the dimension of the concatenated axis if (t1.GetMemoryLayout() != t2.GetMemoryLayout()) @@ -725,8 +727,8 @@ SOFIE::RTensor Concatenate( SOFIE::RTensor & t1, SOFIE::RTensor & t2, } std::vector outShape = shape1; outShape[axis] = shape1[axis] + shape2[axis]; - SOFIE::RTensor tout(outShape, t1.GetMemoryLayout()); - if (t1.GetMemoryLayout() == SOFIE::MemoryLayout::ColumnMajor) { + RTensor tout(outShape, t1.GetMemoryLayout()); + if (t1.GetMemoryLayout() == MemoryLayout::ColumnMajor) { throw std::runtime_error("TMVA RTensor Concatenate is not yet supported for column major tensors"); } @@ -759,10 +761,10 @@ inline GNN_Data Concatenate(GNN_Data & data1, GNN_Data & data2, int axis = 0) { inline GNN_Data Copy(const GNN_Data & data) { GNN_Data out; - out.node_data = SOFIE::RTensor(data.node_data.GetShape()); - out.edge_data = SOFIE::RTensor(data.edge_data.GetShape()); - out.global_data = SOFIE::RTensor(data.global_data.GetShape()); - out.edge_index = SOFIE::RTensor(data.edge_index.GetShape()); + out.node_data = RTensor(data.node_data.GetShape()); + out.edge_data = RTensor(data.edge_data.GetShape()); + out.global_data = RTensor(data.global_data.GetShape()); + out.edge_index = RTensor(data.edge_index.GetShape()); std::copy(data.node_data.GetData(), data.node_data.GetData()+ data.node_data.GetSize(), out.node_data.GetData()); std::copy(data.edge_data.GetData(), data.edge_data.GetData()+ data.edge_data.GetSize(), out.edge_data.GetData()); std::copy(data.global_data.GetData(), data.global_data.GetData()+ data.global_data.GetSize(), out.global_data.GetData()); @@ -809,6 +811,23 @@ void ReadTensorFromStream(std::istream &is, T &target, std::string const &expect } } + +// code for the memory greeding allocations +struct TensorLifeInfo { + int begin; // start time (op index) lifetime + int end; // end time lifetime + size_t size; // size of tensors in bytes +}; + +struct MemoryResult { + std::size_t total_bytes = 0; // total memory needed + std::vector offsets; // resulted offsets for each tensor +}; + +/// Greedy best-fit planner with coalescing free list. +MemoryResult OrganizeMemory(const std::vector & tensorsInfo ); + + inline std::string ConvertOutputTypeToString(ETensorType t) { // The std::vector is a special type that is not wrapping continuous memory. // We don't want to use it as a return type. @@ -816,6 +835,7 @@ inline std::string ConvertOutputTypeToString(ETensorType t) { return ConvertTypeToString(t); } + } // namespace SOFIE -#endif //SOFIE_COMMON +#endif //TMVA_SOFIE_COMMON diff --git a/src/SOFIE_core/src/RModel.cxx b/src/SOFIE_core/src/RModel.cxx index 456cf23..3dd1d23 100644 --- a/src/SOFIE_core/src/RModel.cxx +++ b/src/SOFIE_core/src/RModel.cxx @@ -311,6 +311,15 @@ std::shared_ptr RModel::GetInitializedTensorData(std::string tensor_name) } } +void RModel::RemoveInitializedTensor(std::string tensor_name) { + auto f = fInitializedTensors.find(tensor_name); + if (f == fInitializedTensors.end()) { + throw std::runtime_error("TMVA-SOFIE: tensor " + tensor_name + " not found when trying to remove it"); + } else { + fInitializedTensors.erase(f); + } +} + void RModel::SetNotWritableInitializedTensor(const std::string & tensor_name) { auto t = fInitializedTensors.find(tensor_name); if (t == fInitializedTensors.end()) { @@ -540,7 +549,7 @@ void RModel::Initialize(const std::map & inputParams, bool auto shape = ConvertShapeToInt(input.second.shape); if (verbose) std::cout << "converting input shape for " << input.first << " " << ConvertShapeToString(shape) << " from " - << ConvertShapeToString(input.second.shape) << std::endl; + << ConvertDimShapeToString(input.second.shape) << std::endl; if (!shape.empty()) { // case shape is defined (not parametric) we add the tensor in the fReadyInputTensorInfos map and // we remove the tensor from the fInputTensorInfo where th eold parametric shape was stored @@ -643,6 +652,7 @@ void RModel::GenerateInitializedTensorInfo() fGC += "// initialized tensors\n"; for (auto &i : fInitializedTensors) { + if (i.second.IsNotWritable()) continue; if (!fUseWeightFile || i.second.IsConstantTensor()) { if (i.second.type() == ETensorType::FLOAT) { fGC += GenerateConstantTensorCode(i); @@ -749,7 +759,7 @@ void RModel::GenerateDynamicTensorInfo() { std::stringstream out; for (auto &i : fDynamicTensorInfos) { - auto length = ConvertDynamicShapeToLength(i.second.shape); + auto length = ConvertDimShapeToLength(i.second.shape); out << SP << "if (" << length << " > 0) {\n"; out << SP << SP << "fTensor_" << i.first << ".resize(" << length << ");\n"; out << SP << SP << "tensor_" << i.first << " = fTensor_" << i.first << ".data();\n"; @@ -1331,7 +1341,7 @@ void RModel::PrintOutputTensors() { if (!IsDynamicTensor(it)) std::cout << "shape: " << ConvertShapeToString(GetTensorShape(it)) << std::endl; else - std::cout << "shape: " << ConvertShapeToString(GetDynamicTensorShape(it)) << std::endl; + std::cout << "shape: " << ConvertDimShapeToString(GetDimTensorShape(it)) << std::endl; } std::cout << "\n"; } diff --git a/src/SOFIE_core/src/RModel_ALPAKA.cxx b/src/SOFIE_core/src/RModel_ALPAKA.cxx index 6c2d293..f1945b7 100644 --- a/src/SOFIE_core/src/RModel_ALPAKA.cxx +++ b/src/SOFIE_core/src/RModel_ALPAKA.cxx @@ -113,7 +113,7 @@ void RModel::GenerateDynamicTensorInfo_GPU_ALPAKA() { std::stringstream out; for (auto &i : fDynamicTensorInfos) { - auto length = ConvertDynamicShapeToLength(i.second.shape); + auto length = ConvertDimShapeToLength(i.second.shape); out << SP << "if (" << length << " > 0) {\n"; out << "auto bufDev_" + i.first + " = alpaka::allocBuf(devAcc, Ext1D::all(Idx{" << length << "}));\n"; @@ -207,7 +207,7 @@ void RModel::GenerateOutput_GPU_ALPAKA() { fGC += (fOperators[op_idx]->Generate_GPU_ALPAKA(std::to_string(op_idx))); } - fGC += "\n\n alpaka::wait(queue);\n"; + // fGC += "\n\n alpaka::wait(queue);\n"; fGC += SP + "return "; if (outputSize>1) fGC += " {"; for (size_t i = 0; i < outputSize; i++) { @@ -336,7 +336,7 @@ void RModel::GenerateSessionCode_GPU_ALPAKA() { ReadInitializedTensorsFromFile(0); fGC += "\n"; } - + MoveInitializedTensorsToBuffers_ALPAKA(); GenerateDynamicTensorInfo_GPU_ALPAKA(); @@ -380,7 +380,7 @@ void RModel::GenerateSessionCode_GPU_ALPAKA() { } void RModel::GenerateGPU_ALPAKA(std::underlying_type_t options, int batchSize, bool verbose) { - fVerbose = verbose; + fVerbose = true; fBatchSize = batchSize; if (static_cast>(Options::kNoSession) & options) { @@ -425,6 +425,7 @@ void RModel::GenerateGPU_ALPAKA(std::underlying_type_t options, int bat void RModel::MoveInitializedTensorsToBuffers_ALPAKA(){ for (auto &i : fInitializedTensors) { + if (i.second.IsNotWritable()) continue; std::string tensor_name = "tensor_" + i.first; auto length = ConvertShapeToLength(i.second.shape()); std::string slength = std::to_string(length); diff --git a/src/SOFIE_core/src/SOFIE_common.cxx b/src/SOFIE_core/src/SOFIE_common.cxx index 05f873b..cd1b60a 100644 --- a/src/SOFIE_core/src/SOFIE_common.cxx +++ b/src/SOFIE_core/src/SOFIE_common.cxx @@ -4,6 +4,8 @@ #include #include #include +#include +#include namespace SOFIE { @@ -46,6 +48,14 @@ std::vector ConvertShapeToInt(const std::vector & shape){ return ret_shape; } + +std::size_t ConvertShapeToLength(const std::vector & shape){ + // Empty shape represent scalar values, so we return a length=1 + std::size_t fLength = 1; + for (auto& dim: shape) fLength *= dim; + return fLength; +} + std::string ConvertTypeToString(ETensorType type){ switch(type){ case ETensorType::FLOAT : { @@ -79,7 +89,7 @@ std::string ConvertTypeToString(ETensorType type){ return "double"; } case ETensorType::BOOL : { - return "bool"; + return "uint8_t"; } default:{ return "other_" + std::to_string( (int) type); @@ -120,7 +130,7 @@ std::string ConvertDimShapeToString(const std::vector & shape) { std::stringstream out; out << "{ "; for (size_t i = 0; i < shape.size(); i++) { - out << shape[i].GetVal(); + out << shape[i]; if (i < shape.size()-1) out << " , "; } out << " }"; @@ -133,21 +143,21 @@ std::string ConvertDimShapeToLength(const std::vector & shape) { std::string length; // case of empty vectors return 1 if (shape.empty()) return "1"; - size_t int_length = 0; + int64_t int_length = -1; for (size_t i = 0; i < shape.size(); i++) { if (shape[i].isParam) { if (!length.empty()) length += " * "; length += shape[i].param; } else { - if (int_length == 0) + if (int_length == -1) int_length = shape[i].dim; else int_length *= shape[i].dim; } } // multiply the integer components to the parametric one - // if larger than 1 - if (int_length > 0) { + // if larger than 1 - otherwise returns -1 + if (int_length >= 0) { if (!length.empty() && int_length > 1) { length += " * "; length += std::to_string(int_length); @@ -157,12 +167,6 @@ std::string ConvertDimShapeToLength(const std::vector & shape) { } return length; } -std::string ConvertShapeToString(const std::vector & shape) { - return ConvertDimShapeToString(shape); -} -std::string ConvertDynamicShapeToLength(const std::vector & shape) { - return ConvertDimShapeToLength(shape); -} namespace{ @@ -402,14 +406,15 @@ std::pair> UTILITY::MultidirectionalBroadcastShape(std + " to a common shape."); } } -// unidirectional broadcast- only B changes +// unidirectional broadcast- of shape A to target B std::vector UTILITY::UnidirectionalBroadcastShape(std::vector & shapeA, std::vector & shapeB) { - auto ret = UTILITY::MultidirectionalBroadcastShape(shapeA, shapeB); + auto ret = UTILITY::MultidirectionalBroadcastShape(shapeB, shapeA); if (ret.first > 1) { - std::runtime_error("TMVA::SOFIE - Error unidirectional broadcasting tensors of shape " - + ConvertShapeToString(shapeA) + " and " + ConvertShapeToString(shapeB) - + " to a common shape."); + throw + std::runtime_error("TMVA::SOFIE - Error unidirectional broadcasting tensors of shape " + + ConvertShapeToString(shapeA) + " to " + ConvertShapeToString(shapeB) + + " in a common shape."); } return ret.second; } @@ -537,4 +542,128 @@ std::vector UTILITY::ComputeStrideFromShape(const std::vector & shape) return strides; } -} // namespace SOFIE +struct FreeBlock { + std::size_t offset; + std::size_t size; + bool operator<(const FreeBlock& other) const { + // order by offset for deterministic coalescing + return offset < other.offset; + } +}; + +struct MemoryEvent { + int t; // time (i.e. operator index) + int type; // 0 = END first, 1 = START + int idx; // tensor index + bool operator<(const MemoryEvent& o) const { + if (t != o.t) return t < o.t; + return type < o.type; // END before START at the same time + } +}; + +/// Greedy best-fit planner with coalescing free list. +MemoryResult OrganizeMemory(const std::vector & tensorsInfo ) +{ + // Basic validation + for (const auto &t : tensorsInfo) { + if (!(t.end > t.begin)) { + throw std::runtime_error("Each tensor must have end > begin."); + } + } + + // Build events: free before allocate at equal times. + std::vector events; + events.reserve(tensorsInfo.size() * 2); + for (int i = 0; i < (int)tensorsInfo.size(); ++i) { + events.push_back({tensorsInfo[i].end, 0, i}); // END + events.push_back({tensorsInfo[i].begin, 1, i}); // START + } + std::sort(events.begin(), events.end()); + + std::vector tensorsOffset(tensorsInfo.size()); + + // Free list ordered by offset (for O(log n) coalescing) + // and faster insert/erase with respect to a vector + std::set free_list; + + // Bookkeeping: size/offset map for frees. + std::unordered_map live_size; + std::unordered_map live_offset; + + std::size_t total_bytes = 0; + + auto allocate_best_fit = [&](std::size_t need) -> std::size_t { + // Find the *smallest* block whose size >= need (best-fit). + // Since free_list is ordered by offset, we scan to find best by size. + // (For very large sets you could maintain a multimap by size as well.) + auto best = free_list.end(); + for (auto it = free_list.begin(); it != free_list.end(); ++it) { + if (it->size >= need) { + if (best == free_list.end() || it->size < best->size) + best = it; + } + } + if (best != free_list.end()) { + std::size_t off = best->offset; + if (best->size == need) { + free_list.erase(best); + } else { + FreeBlock updated{best->offset + need, best->size - need}; + free_list.erase(best); + free_list.insert(updated); + } + return off; + } + // No free block large enough; grow the heap. + std::size_t off = total_bytes; + total_bytes += need; + return off; + }; + + auto try_coalesce = [&](std::set::iterator it) { + // Coalesce with previous + if (it != free_list.begin()) { + auto prev = std::prev(it); + if (prev->offset + prev->size == it->offset) { + FreeBlock merged{prev->offset, prev->size + it->size}; + free_list.erase(prev); + it = free_list.erase(it); + it = free_list.insert(merged).first; + } + } + // Coalesce with next + auto next = std::next(it); + if (next != free_list.end() && it->offset + it->size == next->offset) { + FreeBlock merged{it->offset, it->size + next->size}; + free_list.erase(next); + it = free_list.erase(it); + free_list.insert(merged); + } + }; + + // Sweep through time. + for (const auto &e : events) { + if (e.type == 0) { // END: free + auto it_sz = live_size.find(e.idx); + auto it_off = live_offset.find(e.idx); + if (it_sz != live_size.end() && it_off != live_offset.end()) { + FreeBlock fb{it_off->second, it_sz->second}; + // Insert and coalesce with neighbors + auto it = free_list.insert(fb).first; + try_coalesce(it); + live_size.erase(it_sz); + live_offset.erase(it_off); + } + } else { // START: allocate + auto &t = tensorsInfo[e.idx]; + std::size_t off = allocate_best_fit(t.size); + tensorsOffset[e.idx] = off; + live_size[e.idx] = t.size; + live_offset[e.idx] = off; + } + } + + return MemoryResult{total_bytes, std::move(tensorsOffset)}; +} + +} // namespace SOFIE \ No newline at end of file diff --git a/src/SOFIE_core/test/input_models/GNN_model.onnx b/src/SOFIE_core/test/input_models/GNN_model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..833e34d543db48123d848f2ae8bf3bdf51d0bc09 GIT binary patch literal 591051 zcmeEP1z1!|8>V9gL=*u77eomWmaa1+7IG006+0G$6;MhR5V0^XcE?pwY;5e{%!ppQ zy8{~=72E6mXZvh`ig=Bm`#;a`p50~7%$)h&dFP9F^jIdkNdr?7lOtnlRdIFla&l=D znd;)|>AP~Q4QrG(%a9ip&ExjrMWm!A^Y|?7w1_w^$3!zCDykMMAu)=_Z5kEDb+OhY z{{(es=_f?Q^Y}>-kvx+SUv^4zL}WjH3NJZ0HlZIc*{M}Xh*Oy0J;8NsLUaf(MerV1 z;XP;aO6kdW8dF2$wOW-Fa2PLsAllnzF30vxvn2?a}6d4!6=W}CXdC3u^^#e^p zy4#lXj<$|$TZQ}C_6(Y&&Z?fbsJ%Zup@*@+Iczly8-ED>ah>9X@Kdi3wU49A~d9GlDcqXgW17hQNY-{J3 z#CV=_WCkzBxm9vRQVdu8WSkWSQt6sJHfM#$=9Dzh+L0X>o4{kcxcNA;V-whY5|c^$ zqgZV6pIi?GeU^Wt5!;2sYwTgqc4qT=aebVGpYq9g@{(Dfv{-@($Ew13(?l~WHcdPQ zTg0YuU9C0AKNU6w$&ZVtAbItlPC?oI$YY#P`V>rVK)xwI1zo>%3d-)mn1W=49oY?f za~XqNt_etf{CAsxg<352bXFB1-ihXz~R?NQsSzb5{C8 zCyt6g6^(!sS-{9ZIE${t_9Ol%iq|J1H7+HnK>f2+B3u2NxWpIrOR!igeNnC#Qd%P4 z0Wz^k7fPch%hVf)zFtnVl)o2aeIT=w7-y1bePB*dE{7|cprw8+n4^k@+@(xI_7xej z(xVp)iPD=+9KqY7hLgb=rKR-KOzv}6^ZRlicNZnA7q(c!p}8R;1O#LxhdGq;q{yBS zCvjUN`tY42dC3V*kulB!zUdq(Vr@=wiG(AO>E7F!u=tq71UDxiXMSu7uQ4Iv{UV}y zd}rYtb52Nb78{wf%*bSn%~b_f2^&V5D*8J+;pKN~l6ca|+T-Htp)`ugJhBH#Uzk;4gw~Inp8o+YTpjcY-@Q%epN=Lg_d-Nzm}f=g~0tN1>sr zPRT$~sV~I#CzlS4sSRYqQC@6Rx+Mc2MYGaT5$Ad+ff19iz^}qN7a`6`8$tRzyKJm` zTZwfNLLtMJAB=U3NiA!Wx2tyj`%|f zn*Ijda&PGnx>CX|%f0E7A1(%ZM#d#3kU6Z9U%HjeFWtDR_+_^;ahLik+;x-jORh^L z!>nYOB8Hj8Vo3qg#6ZR_2wW<%Oz$#T=1n;$Jo9rvp}PtwWUO7PSf&gWr7PBN$}^{z z4w3^IJW~-T-M-XAE1OJ`0c2pM2t>IqN?fy7nV9GQRhZ{4&oza(SIIVixt%q`V!tQb zyvkz90U*mFB83F-1p85LRBSRYGDYUbG*j`B9NTndvQ3|gVVf1_$5d>+3Ye66SQu_h z#lQPvFDBRZx8<6LN{7*PQm*M*-rc&8u&csP>FPuX=^GB83+tp;>lF(m%JmBxIDj;<4|U4GIaaF z`+M;gN7kMS@MUJon5L^7({!y>O~Ie(!YPw#R-8jq@sXDs2g~8kltH4@lqGf^^m{op zzm#Z}8zhwmP?>1DDiO^ua%w705S3V`;L_v@@NQb!PR*B1LwKkByE`jE$;QRfv!sL|0rFH)~z->!5RT?#ct- z;I34h{N?75pK(@-TTU_~gnbi(o%l-19QS`d6u#sq2!)F~Bcf9zNm`*B#H7cUzOC-x zK>YP`BA&v{l*yu-5?So5w0mVnz6wC>~339sm;-r5kJ|Iw*cNrFk1= zAFFaVeo=f|X#&Z^s&sM)-WK(M>rQ|*v+ND{&F>AY?n;;-ayXLqDmxs-ov8FU$~*!U z;Bg!+^EkT8c^uu8J&t9kP*z-UTWL0{z%-ddSY_x%GULnRbCm9pr4}t^j{l!N$8ztQ zl?ITBm%`PE>#js2zbLq^G(}Vco}$-Lun(SIHt3a9f}Wr*Lk2ScA8yB+@`Te(72%Zn z9aRuc1;?X@9O3-wGAWhJr~C-}#jZz>-#w6uBN<6$0Dhg^zd_ zC8ZK_ac|kj-gMA$Wuc2=dh1`I&o)qsW3al{m>=u1*xd7#oSE1XB zxV%KZaMDV0i1`GymqxGj2V9+daw7(k*nMbWh?bZ>L|pD3|I47YcF+cO=Z` zA@BHB_(at{Bvi>hnUO?S(F)G|9l_$wyk;r|a6O61KPr3KKKi|vZRM&@;kv<#?=5k3 zGn;SMr813HPE_hEB>}F=HX5FCPHlH3r?#t0nQ62ayxU!`|J|$1=|7rSc zxx;=+11KXXVjUK)rxGU#D-*{&zW~P+9oT}X-KVm#>_#P)$zg{OSP9Dh2l6KWQHtzR zd0(}=0tb}3spUDKf`{5mjsv>=b{z1F{nB2)EeE7a=YWf(9MG$xIiScdO=^jVfgZ{%(EV3ofuC_ctE|?SM&6W2%>OA1REWh@ zXtloAcVK~vHTr^hS|tk=ys#4qkaVtS$rrC)R@>la;0{Y6jNFHY5AX=m_~6Zs}}NLm<4 z4o>0n;&|~ymVlcwFp0+%Cu@ocZ}SL#3fIlUT1)W4L7%0Ol4!0gkU;%YZm*y54Q~1} z8sW=MN{)>eDxfMgRO-3@l4x`GD5w}yAi zLSSJng?O$rUw^|~7bp+>XjCUzdkV*92vOZtC8`rZ7eYHhby}At!eN$<;D8oaR};~{ zl4Sjk31R}q_3*UTmtF-ekfSzJz9EbeEjT_z@WlASD65J@hL+?YndamR&4^@mKPR~> zXVObvx){)fuO&xrEdM;?8TpMRnP1mbQ92N*x@0z!%$zb0ixGZTei`^;<6z!LwhlDl z$HhkS1T&wk2!chXr=sq#UzsGQu88C?pPDFEN}dDh&0`Ivzh-7a9Mkx|66zkFN_M~q zi%J`9CwWuwZ2|ca|4Xb$m>L0u|4*0?FO3Wq5dFi5pnl8YH*)irkrnT*Y{D&Dv}(z1 z*`}F@ok?WiyGxqv+mm%xS57_6Lu|!vVk`C#Z<|_0q{Q%&Lt1b>ysh;l7Y@}}tlkkR zkugM$DPEjcRB6^KPlA$vW^z}RW=&3osx)h7xk>S@W_?n;D!avEYHiNY=^ZqU9s+le_KN-Hy221F~ftJ2C0l-5>e z7E`@4R%T(Q@%ysZ<|~zJbE!tGEMzEKn*|(Oer+a?(6=YMF7gmwVmD-&nTB+S# z6<2C6ud-L_N>e8pdaBZ%<)5M4RcUK=rKwl&DUQD~4gYpiU#BZtw%sMmw}*))Kee}5 zo47-2Z?2b*wI=yDXs+CPJ>iF~*Q9mgJv4bG*+DP;{0@5fkJ&-X8cBAXS*3fI5*mSxpEpZ<2Eu&?k+N5V#wrqlOhz&w-}A$E1GI zQC8mNM5I!SorJTNU{VrS_IOCClW5{Hp{kYU<1REz&tGV6; z-^P?Jcys?d*9-j0y9?#x?FIP87aUwKddzm|j5tF?1s>iio^>)Ao(j`PML{YBVwD^*U~H^Bln|$Z(kf2D!vS*$k9UYilK!U<#v&Wf7mWkRrkM?7yA4k z@IoR=q4yVZLt;A& za_B<<_XnRj^$U|j4`uaQ0pJQC>bpVh_h5yqWySJ7COQIWCTYG*UPw;nYfs*-z~ca9 zv%Wk@WX!)75C~BwT4Hi!j1yxs%vhDA#84XJ7WDG>q=ux3p)|;g7gED=hryX+rc4ff zl*nO+G7-z;3lK~6Y$fba7!?1(RZqV%T&ecekum;mXvHwW3L}Pxby))4EpmK`xY9^0 z9w@Ff^5IC!f`XRFv9y^#jHQWNDhb(%S}H}Km7jOSjrq?VaYD&2al0ig6?l?L2bZ`8 z;;)yJO)1=r0WjogD`88yQftJw5bQF`R?xUt>G3ahm#HjdBvL0!<3-AQ{s{->JBkI( z#3OGf{Hnd!_fJ&JF6y$h1m7d_dLpY3#q!+cV|hLRbnqeze+5x zO#M!Qle@g4IgG)O*>m>>qM0GV3*}0VatHGS;y9J2UEavNM}?a9BNC(+?Ild~N#0ii zKPi#V6X(N*Nv^p*uGad}D+fcCRuV5c-e29H*thOavwlmPFJ8f=*Q7QiC1}Ox*Y8n3 zX8o3HCozpOOIM^{F6a6!{d>Wn$gEww<#1#Z@1#=HzDb!o1S{1$1nG?ge5CAk{C?#+ zUg|ooIiI!?ES=Alc=}yuyw-Y`_d&M_cSeyax&XO6si6;3SaTZ;h)Hp5? z9nd8I{xn&1q;+D0Ca-?}EIL^u$;2O5IuTE5K)zW{BbMERVZlNMK%AMoR#cqnqV4cIhuDtk`54s9vs{mbqNl|IBhNbl+7j*yU#D6>IO~^dyDk za&kWUsgeTnS|-AxTWPTx*M&ora;297+RHHazkZmxOB*aUcImZr8@~GU1U>7=5Ok%s zevYI||Dc>EFa3MT;3zEfq_DbTNV@b!q(=+OCh3`#DIKMebWhb0MiEKp6JD0WOXfyI zMcMIFGU8)tODEjRH|X6%3Sj;Rj>J?`&F9$ zH_-e4%JiR$YiT^h#cv)&i$e7HY8eU%Kv--XksslZe0ssFN}Ep+grf;{j${xyr&b{$ z@>mb(VfIFDf-MB{ng^T9Xiy9Cb~p!l*E0N4Pj+ z*O2|gCU$Lto`T{I9^~zWiAg9;}*j>3jT0NiKH{#rK72-1?%7 zM}jwEJ|d{hgwNMeU&x^9GBD52-f!btm;RbEKMt(jE>zX(rJ=#P}CFuOubW>o3BufC!*C=z*`c_yK z#yLrLr$jSNEMe>=kvx^C7IR%(e}pok;H*^n&GF}av?6J8t^}r-nxDUw2&w`GRg642 zDfKQ*p8RVoHG~JAiiFz)z*8wu&WJO7m4l~T0_C?A>{=pt>g^>?29w#fyXOy3JT13f zOSIO0Tf25E=MIB8APjCHI(C5MA5@gMF~hFih+S6-D3;rrOtguxp43CoLGaq0cnHXg@8cmThaC$WC<)g5aLm)hR!;jRe=ByDFsxU! zev*kI(oN(F(=CNx82iX+x6nGL^dlb zne-5+X*Eww5ajE-xcey8OnyK8({fm|pplYeX~}CzIPsUMnJiO-T-dpC+B0J?WCktP zKjjjodVvC};HWp{M9p&9w6LkfrYkj2%e@|})>RNCX_Dc!vy&~)B*#v6!O%c`BGy3l zkkdeQDMJHQ`k8!`)I}~z>f!zq%UzWNG#Jq^N&i*2AH@;9U^x|37jnDbK2o}`bPwbz zsoa%|!a0>+EU_*gSMg!GGH?=}OQsYebtzj1Rr;~2Q0dodT?yHVipjwYm%0#;s}w>l zH^`~3KH>kOLa3YcWua1%=q_|~yUB%KJV?T8dx7ZHcgYa>R_r4#UGA5Pp-N@unNft_ zEAmq2nG&LhTTV8{95V*uNy6+dO3|0E&GJx&F5$D4244iel1fd~^1{~drHT5CC6?51 zlT$?%QK3>)QDq;>6Gcg&S(HxaQ95a^M(tc?Vx5mtVx2PNif0IuE(&5# zmC~r?#$DBgi@%&S>N*BbBu8xvsf4>cPb7-c?FBsXoE-EW_&K1DaEY%FN0n%pF-gxa z0(*=c2SLh_z>>uUS2CSfV$q)`ish9$V~&{$qUfqb6u&l2M;Wz54_C?*1>$Fw>Zs*L zEY)5-GRD7@I_eD)9T3Rtk;*Q4NkvbE_<*PK`DouyQ?VQjPT-gJ6uaygF7aO`J}|X( zf;B+sF!LywS;HANSRre;u%Vnebhf za6$B_(&&E@qc4%lu8^V`W!V)s6GKKZCt1`>5nl0fDUa|9kuqTjuPppw?n${WO0s#$ z9#gDjC(Q%=)udPQN{5b_a?&g1K7NTYa*|PEV2DV3h2#ecih@OxOXv=eVh-0$kxDc# zGj7sAb=-u}3ZbBDrMP2x(dEAqV7VfR8oSC#uedm=mR=FQFCQzGWX6bM#a^xzlT`Rq zCf4vHzc@@RAv?cwm>8FiYD;8c;_~t#`H^FQnb4^r$aR)GS>1(Os_2UFBUPD%-=2=c zk7S001%#?nbfw&o_xp*iY+;D5xUu9JoJe-XReY9aQ-SP?7dZoiy!dH~sss`Ol6)8P z`seq~$Qns3aYpIzl+i%gwQ_oAWOrepk_fK2E`qWuIU1x$tjbl0A#5(`Q>E}qxzVLO z!Yh)j4iCAk4lhzT(f+5+>i9b06-kv9GehC`%If&M@QQ~#4t%lb0dve4NFcOTrSR&v zXLKkCuXqqRs1#oLPvGE}5neg1&8nH2(3=$aa`{Pdv7`)uD?U0?RLbp204*(o9Jb|I z%UCGf+n&ddiAW+6-d>*8y5b85W0rnITwLM+f!DxXnD>#ubPq+SR!<2NRKAkT(tfcC z(ILDPfuagm;XSd63ZECp6G%e|bFf6k$fCC-o{OLimSs}EXeUuPEHNP=-6=9Ig3ss1 z#PX6O$VuJGH?B>1Q7-o~oIZT|%0+`|@t1Qk}L=c83mWouJFP+1e6@i)fBt&N# zD*&^PA~1WCBg;z%<|HOC3%pbE@&iIyE5#mt4w$(lj{kE)^U~6xd8|A%m)nod1QyA1 zR5my(E0?<~A+6x#JK~>8OycoPYAVCC&{zJW0a`gDC5oT&Ie;c#v1c7o1ZW=RWQVad@8mcm?S88EcJU09k?v1uZa z6)%M(YHxBDmPl~Lp$1DUB0ZLG{;%Xeq1s9>CV00iKX?n0aJ|Hs%}4+a>i=05-S*pA zG~RmK`9#DR31OCp9r6bAS&>Wfu!uNHW2M6;8kGsz zh9bxojx*`x&9bTVp6^VhJxL;WYy!JaVlvTeiejj83cgWC0#l{0NJgm85%@HqF4Z)8m@KOdOCiipVNX}z0k$+b{Bq%9@5`_f4 zD@bSLfLxNoj&Dd(V6kx}D&)04ToG z!3DBeCfhc-x|5B(Li1u1l7yn?qSF*rdt6+Vi@B0{DXGZ`>;Naxw?z#i z?gxQ@iDYUCFND*Hb&XN@>2AX01D@h6&0xXtAngVE*e>24*80**hpI%;JuWFG!jyHP zK9!@cUV)XNjOSb)R1;)xE20LYa8lfb;Ev9WB~?;`C3{P@Z|UUWoCY#MPW^280iQM#}Qw!J!^^lsa&4& z9MDZA2Xu3i=YW!ssh9&+kc7xj!vRTtml9fvmzrRC4k(GRia-60IADJ1956@50TrUF zUmPZpA1?+RMhay@JeA0vTWKuNP34JO-^v2rm@H5bjICsWqP>Rm{|O76$Y6mUCi?u? z==j7~k}xIq;CB#TyLfwB>xwUej>+v|cm23MEZLbV;TL0d5x3V_+9~lyR(vfXDM9-` z&+?Z11T0V79Vy3C=ms&~GE4cSqfk!YmBQ^9SSA8Fu8W5fwfk0hau;+~+Hlgt>t%0J zcUAJbpN!YJxhf~big}&Lbj43synQxd#S^a;S15GqP-vuQtXJ1qy_u{LQ3o0;r zao{!@_n|KIery3ha-DIz(QUwoM;7?9&r|S8-x@_UHihdCsloh_qkw6ZMPU0qLo{JA zfc;t+uc?Dp&0K=^ z*;%0(%`St3FT7yOpt{)4BMWI?H-$%x>tG)ZZQN?!VyeIOHPjsR1jENy$Ir(XqQwR7 zX#Tam!0PWpaB*BBTBlJT-f3SI*BcJ-U3NR*ajyZqzxp)F&k6(wvi?Mk8|0x0S+{_9 zc5R#$ZH9M_SOj*g@Pr@peWB4}efaPG{ix4I0Nd!!0Do_ZLJ#)6M1OQB0CfjBgYfx_ zK&$mEbfW%Iw6e21jtC3@ZH&f&&7dAEO8grIC)}W&B7Lansb^7mR|{OfMJzZ={q48> zQZCiteTg6I)CVwjCR8|$HfCE9qTfgxOy zZH)WBeTc&J8p6flJ;3pO8nE}uy}-QRJoLdsm)@v773nQKOUK!41$U~gL<@?~qUyTp zFg|qwb@Q|Xsy%l&io29Xam_uUySEE^gT{h|-FG9?8F|1nZUJh#$Pk*WVB__gJE)#V zrlG&1Jn`JbEnu@vJbL2b00&v@1s^*!0!Fosae=Qb9=x^&KJ|Ju9XMe=cpkL_+%@(E zo~xYc+0`de@lh1)-=hwCANU-ZbhJR(Y0JRm*t#Gl&ky>KwSkS+xuW1f4Pe}bX<+p1 zy;R-ayU>m!UBSvbE%P6&zljbs4n`gLACSYQ-5{+OfKD5|F$7kyuaOh>)$pYjyj_B3 zcASx~6@HL%vSMSsgKn@<_v3yW-rB-bM`ogaEo(vcIRMd{`gp_o+2Cm45@eH62X9*R zgo@pij`|E~1@?xy!Np$YFxu)PTHnGKt-W@Hs(tM}r90mq&mLj|*~6BAQN`8q@C7yD z8&Ct*+;N#s_Nj^EL+(=SG2S>lOCRpbsEI$-e?|w!RDn|h&!f=hHF4YZYe8s0BdEV> zGnnYbhOf1rfJL>vP|DOrl!b5q{Ao^>(B9Gu*NY!Q&(64p4t4PdCB7fPqXyp8gniAy zX8-HRy^1b29%YNdCmcZ!9-4w>Z?Y&}_&GFU+C*?Aw<}tu?*=)^&Cs^O*@!po99VL; z4%QCAVCA2MXk6n^YI)0CD(2<~`fx)VxN#eWH$Hy>&bJ>8(pygfiuX&C zcNgfOd&}d&wKIF@Hx|BFcaA+ynUR7{PkaiR{dEUjJ+%;w`S&vTqc9QVn9iX+_tt}n zSDu0H)lUMK0c+5DZ5s5SFbnOSS{)dS{(!F6cuqO10l4(gNpLsG4sG%_2N_cf!9D)J zVB=ppAc?CBn+47Q{bH}9IE!__bqo!%S9J!7D-%%XUfS^Hv}<5oA8-1G&wXH*G#x!o zyi5->+l21%7S0lb%D{;Ia2MAk*{u_@3bj)FEOYZFs02v{>N)({~>R zJ32aIzxD0tHGOMB*Q+teG(a0J*W84(Upzp=hR;Ighd6j}@kO*c4ubru25|L^spWEbMs29^^aa}Ov69{un(Iqd8HKj}GaV%syhK%# zs{{4Ky09H|g$d)P`iRaCMX(nzZ&dh?`pvYTT}Zo3u27!_+dV z5LS0&*wh-;KDY}6N0p!!{u;n4sSaK@%O2L4I}&U+9#0S1(gggo={`UYicqZ+Y>-lP zjBdec53X&UfF61J!ewo5ppnbiuvP21VA%F57*)~7r(8VogbmKPziC5kW8;i>R-1`> z8>j;t;x)xhnuv%x?0UeN1CTjCCxwjk)NKIWdVfGg{c23AL(fW}%= zL3<4g@Ch6M8V#SK(f5;)#iZrH_k|}MSM8N=wsj-arnV^>y>=jY_rwm`zB9s;Qm3M; z2VKy}Q7^%QryD`dNsVEaTOOKrWhb~a@iA3!do-H8-wba*%|SUOxD-x3hS%D2@>R=>l=Z_Z#Y4Pb$W z#X_f}z3`leMZnv%F~~I9<>#H&5sb1_hm&-5@%t2Wym*2)_V>Pyimy~hC-1fe1-12Y z!Kz7M>5Hy7O=}P?h-*o+pPr=sA9ez)4qiAtk&R%(9(Yct$KdHj7pUggZ)dIhRdGS% z)|5Nn2Is6yfqJoIo$3$--6P*1e~-bi*j^iRE}lkHX75GjK7W7-NA82I z^#{Yy{XId>{q3|`Yu)^lOGZ+G!=ljAUMs=9Q!Jc0sTmz&RSoVRX$EJR_J-qXM8nJp zC_jwguife=pwP>Ter*2^B;B%xcRae_%NyF^yeV$zUYDI24QfO z8^Fi=`r)hH-+(^4T%0*?7btG`hBi1i8YQ)h1YP*fh_$i@o^R0+X2$nN?w12FzMg^a zQ7z%?5-WIc^ea%fV+>Vv;SJh2uLdmANW}Omxv#kyE?hJR9NgOsUK|{N=B#@Q)Iwiw z&zM;a`m0UO|MY?f`#uhWEsjk`Q*7>2tGIf2XnR|%Hv3sVwZ#e+HEN6kF4;lv+9815 zI~Q>-IOC%;ZSmEJ`Je^oAu5>cNF99U4vMS21MI(8)X`PfQKuLO%nsn-#gSahUS~$% z8Rh}CI<>`|hk5i;?tOIh?*rgq<}tu|&=Gg)-W@+Z&<)%8HpZj6UPdR^hU1dW(@-%R zVzrgeP{G1joV+j<=Xu_v{pa(Mn)gGbRvl1poip&Ny^heo^$Jvc=p%@mm=6k<{Y@?3 z*$t{aUb`cB=mucCHy!6~{YY;d?GJMrd!nM@iP(Fv9*A1yi;Z9RLTZ_Meq@@!30G(^ zXF*5QFb~40H~)fBa}S{TE=SRnwj+>+2@4lbze*MDz5&$q;<4890WjzEMe1IB9c-Cs z2Gu5hqB6Htp-a?n0GsMl(bL_-u;cR);4OC@xVUrx%-hrrIYsxv#!H7_{|!xOP8SH; zXS>4vQ@X>YQEoVF0T;e)+8h)$>;!ygQn-%pVqo{@0x;{GAq-z;fO`**h8&Nbbo#X( zphfGes4ys!8a44KGS)D_cXC&Q77yH@Q&DvoTBC%{qQdb(vvw%T7{FXy0%~{;!0ZJW z>3wL10}rQRy1*aiT4mrFI)OMZvIa7^VFxeP4FTyPJ@B4vTNt|D9UITmgavQ9!H0HM zIAmf?%&9$&IvE#@uP)WYTZi%CQr(+KE4?f9J{<@BA3o2gnj-+uHpMFON92SJ^3NK6 z6xC{}3AGN}z=Kwc(EFIS`0dc0Ab8Csr1dTbOz1TX=b8?Jfo}ddMVLJZ}W!-HF9uQ^* zSJzXA)z}em(ZiGYaD~l4;1DhZd2>~RELusX*W3sKVrIjTWApG9v-;S(WH8pB zv;bOFJq|4z?!vx_)A2Nvj?J1o;yQ&E@JzvBcx!(T7@vI(H(eG99hON}5*1I9v7Au4 z1TG3JfbsULl=&t$zT90K7BB9KO73kz-E0CN$2^B}k19kuRl7sAqBF>R-cc}kDnIJwXO=@ZjOg7{whJi?fh}(h}9rDcPMW7qz`rnda!-R zj#$mziZ)3MzzaU{ac)u=nv)*@Pu8@=MM*c%_ATu}%_)vBckxkh((E2m>vRRFJ={T^ zUHKSYHcrLcW0DZNXCKffMFaQwlmOMT4(tryf0}BzTnleEs}0qz@26{6y+BL(weiw7 zmbmy-6EH5+0EfQ0LuGA!gmR*vp-d_(zqtKndd$5*SZiA-R=m9CBRj9sD5@$NXda}hY1)F1jU z@TVW1YlsW3@AKp2Ed^ctO>k(x0&0G>nIPwYH(lJYF+BTbI@q4z3kq>hWM1TlyN&FO zIY*vQYMb8dEO=>-do`$sM_g&PP-@=tdpYSqv^- z%mQk)uK5O5Z-6YFDI79C9;z8xP^Uk*V!Nh)gFfSXU~m2a;J`MA1)En;Y?C;^*7w8@ zo9W=UhBc9zf5#nXdsWAU{g0x6qaLuZpDC;}u`@0%m=1zlo(GwJLqJ&6Nsy~i3+IG% zfZmbv756enOY7eUnfs6WIqE$@>4*Jc=FqmZ z@$@B>My?*@gv>>1R#oXcYSQ z*FQ|XfK%O;^<{(P{#X@B@|ZVg-U%h z-vDb)sNqrDpP{8Xsd)bJeh zqTt82>3IK^mT+#%hB$c82-GKY5Iz|e0W#5L+IYk>+RiNjbs5|lf7tvc67Y=RfjCp`7F|3u8BU13 zfYjd4M5n{uv47D80Q+0wTO%y-!Ek4Eb>T%2xUMb^ocs<9PQ8wdgU*0^M?#?S(-N?G zV{06vwFMQ}96)(zw}R!s1O{$yj0$Hqqu*}cg(g@W0L7g{z$%V8WEbuO>`xpjr?nd% zx7`Z1u|OTWzy2aoT!gnG*i@z%dH zU~%qoFy-NJN^4Fba@=}0SB&-5}B#M2oWQ)dvFcB_I1a(Z46P4T|3%;R0O)Z=@e?_+8;YD z_ru12wIwU&T42294NcY9g3Lz_LsyHAA#bC#=wmx`>=3D^F1&>@3`zNr|NA}whh4UT z;Q?x}N8~tgF#i<_=+1&QZt`HktNK*o)G!c;n<4W{wn%N@L*L@mIbc?OJ}k(wM>GB$ zgBWKw z73Poac!S}^ZR)_zoUC`Y-C(BXe!53aBbd~krp6J-vs-i;yg1t%=Pe1tQ?|Llp-(gL z;2ks?j5DymU2_=DTY!4(nu#)#tI^lPn&9F+*D3SHW@z!qelW-I1HGq(8B90%gs7+g zf{$M7Q6Sq97N#9XQ#7xEg4yI=J^n(W<}0YY8zIR2lDR_PvzBe0BNVWI6P_r@c*X`-m1|F_dc46$MhkvF|!u*9BGU*ZO2n; z`X_fjbTEYP>zH91GzF*)X+x>)UWeEZ^65GQ52K+`5Su?(0=Cnp7|u<>MNfLcrw0Py z^4e`+W>q~u->pvA#KH$3+-3mP{9EsIU^`<2=P0~*vKeIeI*Uv)df}vHr|A?+e|*xw z0~I#!f;ol}VC!NECU>5!pcc?99b6GI-B^ONbF-8R&y3^g#TyXg4`INcEJrFm6 ztk>-WQQqoaAelhbi;Pa#QC%Oa?dlAsoHz^)E^HoBCz=G zFsL!u5$8>Oi>935fN-tTXmIfd#Evqg)!SRs}$u zHvqbKU4iDLHiU1(&Lg!$>iPb4qv*UH{ZMF9BTV=BfX>d1!?_C`0rYo-YG;OlW=qt` zGU!iFd2|gJySl=S-SqLv!OqbA{dVx$)e3H&ZG?Mm8wSnS1flJ_!(m?2GvMNy<{;Ch zir<7*yV3UjyHO9Y3M6Scpj8Wac>8)ke6a5TP;{&nw(FaL&pVjnxXCqPUUMy6*v$rq zhaI3@A|` zi|(O#umRcXS6Ks+789?X`4&(deuqBW&je>pGr)zj{z64hC!(Ung&_FGC&X^`4`O@K zbnpEw@Qc+JIIG1wbUIoeejGmrsZAcaGr%zd7iOEHITPQ3P}Ax_P4|!dUX56=n}zDC$if4=p0mSwIZfgG19wpHhsWTfPZK=BiP*5$o~Ys2gad|9Qo5ugEHA{^BQQ|(;eu+$$8YB8HUhb=N}L?xDjx9 zbQEM(t%HlaqQSuZYOpt|3m=>crhC<`3Qk+)fZ@Y;gXh#9pb@hdwV6QE>&{O>(a8(Z zt4Rpa?X$rM^*bQ0UnBf@L3h-<{~F&rTUeMsgNq*2U4tz5yz*PJw;_yvZjU=8yhQ5# zE}@gxd_bS-1?Xzb7O>|a3-5`$LmfJliVEV~@#8JEakEqw)a#Ij8b;~EjIEY%MkhNw zZ?F#vKI{aKP|v{I;TOUF94qiH)&#qh?%cr?tXsSW87+xMh4mW1UETTv6m5vx z#XmwT`oBU!=e1#~^Gy1c*DUn@qa`}Om_;>CumSIqOVFpVb5yIF#fay1#&3((Ow@Sq zYLvn0PtSI)i7&n}#k>_(FgtTKojo;=jL|&M{)RJ}WpM_a=>HFzd*LpEJ%-aKLZ_pv zR3Ui2Z3$XX%?jVy!NC!Aj{?0#7f@`$CF*p>KFa#-7%;G)5ja10IncCngimKz!+S>@ zLPZx0FsEu`e4bq$)!tQ%s8x%>;-2g2goBsB2=E%{_iuzZuge35DWk!t6Oq8oDax<& z@hW(TTPN`0%?zNkz!Cltp#e?Z>tWZxETH4}H`q4L0FE821Ny(MhkZuZLBj_bKzdXh z8a2uX*t|Ew%OVYMQ*L7z@&Qw(UCpp#_h#VC1RsQSTu_IZ7m?wsap=UVy+~UvCV#O; zCR%xMINGbG2eS9ng*lFT@PpcF&}MCOI`41N8ur=^52`aB%=a__TXZt1 zu<-53{Mb>t&8M}X)c|wg{f`uSKuRaK;ym zj-$gHN6_70-UdGAd9-i*1GI1ZPQRXdH_@$SM^TLzOTg9mnz(WOf#`gcKX`uWAD@JO z>cFS_HiAFY)!>VOli+B-TKLhyw;;ZOBd&h*A$m3E8cLgBhWGCM3zekY0-9sBk#_Hf z(A(M&%-5*{12l@MWj4!@-?S#^agW)k%D6@_;tyk3@>gwSQ1?AONc}BkwV)|liY}wP zt4}~~2N$&A(-n}Rn*emIPf#J+)o`0FHRxcDIbM2xIoh~$ElO|l4vnsL8P()y;e95_84_GvH_!ZA(Uy~ z2~g>JaL{XibRg#%*iF1Q=C{^^Mdr;xA#jFO))9)Ueg_TqdrX}PNC)FDSR!zG0A0H{ z2iVb7sm?83;H)7Obxf}iS@fWR;hz?u?kHmvxHMzu`bIhE$O{Kl{mg09E~`G|KR2Ma zych=p{QINf`?Eodq1EZUBvblWHz&C5GCMzQp*HN8H42S?dxA2kmPXChV#B9dCOC8O zGIVV$7i`b!henRh@mq6(lYi-!HKwdrqT_atz=q*^*nFcFT=wP>bvXVlSl4nIs8{U- z^7J%-&f%80huuVAK1vIF_LvMhU-$$DpRR+a`3*$J2F?Ri6bJFmFMy1|kwAA&Z5aL5 z9)|}N0Oy?s!1$kk0f^oNMtpn;lJ1(}`u+34G`k$6NycViW;$r|QVSXf=h1Hp?$Vd2 zR_MsP59s6iI&e?jJWw(r9Gvo*3r6pF3GO`X3~IcefvnHprB;qt!)EOkfs+ok=rw6Q zDW{3I>D`OZfEu06@vW6((X_$Wz_hp))KU^g%KM`MO!LhJycaywG(a1i%reJKy4M7+ ztIb9B2T!2Yv;G9EK_tN6Ff%{fD-}%I_6``@9`a2wd5dC)>4K4K^zn#BF7Q%_9bEO; z3J%JR0H5B^1fIiPG3Squz(!L8faQ8{vqxXQ=OsA5L-8y0#H9$m%w2-+x-O($ZtkG> z_tb&BP7vHpvBNs*&M0W3GhADf19#1t4*Ul^M@5q@@WYFn(4r3K!IdoyP@3ZyO1H{e zI??U~^_26*FS>tw6kFtmy_&c~^VL%j@XrQkKY7!q-xU)D9)O|e-*^J)`7R3CEUU^r8Nt501{_F{F%nMb)q`F z-Xa(IPb0tc&>odEe*{d%?Lle$jpzjb9_m(4123Aq2j#D+hu!vGLtC|8QbuR}K$n4A zz?$OM$Y9oJv@Bvj{pScfymnJ<@aEYFw1wV_N*ZTVhn82v#e<5d_Pgp++g5Y{zN59_ zwqOp7>Ng%4WWJ(1t=oc5T%V2nu66}~#j3;6ebiy^1~tHjS#_aRO+yf^wjK3oTLlI^ zTaT(gct?4}e?)d6Ht6;OeUO*Bmb$xV4wy=ZQQeo-fQxO_{9LC5qhn3i0?h{XVCSa~ z&;;&Na=?Fh9%0X-g3}UD9 z{5I6lgO98S0BpYv)zMvnN=oL^aToVdMt+u{PKh~88&nl{(E0@KfZA}q`vde*zZyPa zb^}bX;(+YHU7(?d4Q#0!gg7zjC})1|j#*95gW3}wBOT}2XjYm9>U-=OihIU_Y6*t; z-L2i=&4%m9vd2o)3y@@)z4Z|6SQlnC8%^uabHRU9YXjP`*>H>5KlGkYZnzuQ3|uy< ziJQF81XtJA!u{IT$NSS~BBxg$LFz^$SSyPK=N_#Mo_p#;YyJ`{YH>4AKle7RodCg_ zP7W}W)eRhR4nTd|+QB9_^f1?98$Gq&4WParqa&Z{;J9JV;6RnbXlGzQP8^Cb6h*X z0(GOO(9a^Qu&zfnJjHFBZ?}h|kVUE`HtWMh(bNr;<=7IuKd260_FqJH>K##x?KIl& zxGp?xdm4DEHGl;KGmm;MqtZq;#Elvl;2AH5fWvic@xQO?5zLqYx)z;73mY|rDW2od z<#X?-s?~zf>LOb}``RJB&Zp7LhdJN`-wZTEfWdeRzj*I-OvVrzlGeAVuE1+Pr zCbnH=fjf>ff*n?H@IQxa=m%yu=&Am0I3G>}uksFoPVG^C$>Op3W{3R2wpQ90b{>o( zecJ%tk$&jW_38Au(?O_dAZc6ADlj!544K&NMa7H81EaMy;NzNaz=d_1@YMwloR~5Z z9KS%43Dfm));Jwl$WwzG-n0Q5p0YuH$_O-cR3p5@`V#0+QiDSO#Gwx+JMtYHuBSdN z2?S;ze1P`Si`3Mm&hTvOCiL#S$#k5(E$$w)1st;90;st|z(vm&;A!8p$li7lnmp(# zde*E4ZgA8J8tN^fM@`D4oKhWe*H`VqsR0v!b+|sh(*Hg(s<9m1UtJgX{OE*Ms4pU3 zISTZo#(=JayHaERI0l9oCIFN9N#N|l4d7U+HlE-0DneWQ0MBO{l}OrM++neA0RYNpww3ctDCbOu$U1#|{`8XT&&Ax*Q zNwQp{(P`-1gM6eB^arqvukQQovN0I4$qIdXanZMr=LxjAe>^x_WgOa6XbWc~Pop`< z8$gror|FW%ZZPa@6=?5S9qxGi0lhjLj#_hGQ%#NUBlo+nfx##UK1AICO?6E`eb2{q z?8bxW#3>W(Gxj7p5O^Of^#1_t&ZxnmZK~trz3lO>*SA48`)O!{RW%%S>>se^?sjl< ztOXqLaxri_(VChRJA=-cSQk&W=fHL@b5NpIEU06Ugx+vx&-K23K~321Up8cIt^wy{>tlXeV|eS3GyD{-0Sfm0jk@#C(esX-M;dvbKxT|Ne^XVs#Jz;R;8qOkbZ&|+UD1JmG_Xb80^Fg4*IIO?1C4xdH-^ph_oMq;e2{b8 zUi3Kq4VvMj4d>Oojb?SJg=;PwkAg>pg56duJT2}O=+e#4FL<*y&U>|->d&=+yIl66 zH)k$^!H06uoU0Sj&R4@xac5oFXWamFGNUnCQSbtF25@k0g!_b;!`51kaG>T(Kt={_HCxQ0}oc%{1|B?LIZz_q$*QmXo z+7zCL&b@C8jltP;r;0M zfz!d|A?C2&;VQUmSU)NNJ)q4rR)LRurqf$Gap`(9Y+(9zbvS37Be*%>B&zdxGTq|C zCD0qVz*hgiP_Z#xg5%b$!pla28hWzPj_+sB` zu;m}OfX4E5v^Lug40Oyy4vW-X=3pY@-OX1+9pk3*^fol6T)TWc-ku$*LZ z30qlPsg<%Lxq~u|Pd9mnBExT@I}ev4P2Nky{pT5)ky)5Opua8}y>}a}r9K*TI>1 zng}}i#Djmcs$mD6v7j>!M}w!>!duTxQ1cTJVASa9aHCcr7`fR9XWG32b3eWYOcQSyrUI*J`DpFhdUy%11kJy4 z1=YLYj%Lj_!lP^~L8on+*q1vR#ctgXMz0@%{Qs$jK}SP8WVjA~T(<_C{I@Q!YuE}5 zbhiLI53WEDcQ(Lp`KRf>CULNF;{p`nsY@Gfb%7Jimw|VCjIpOz7+Ss06;8eq3A)xD z1FT~^vhbS4JQP%(6YLY@KS^ZI)BXpFKx6K+`F^~EFG$WJDRQqy|!Hk_wQL@ z?n)gPKa~xq__(00{(FJH=}>xb9+{XejZtLOWZIzHajMZ+2YlA;X1>L=zd$da%XFOy zck`#G%?8t5*ihrzJg}~#A$2NyJUzR04U|JQLAh>2Y0ZHcU1@5Cm&DoO?2H(CeUElv zYP+i-E8{Av;g$j3^}pk1dQu-c+iwIX=ez(DF5f~AxcAVNQLCvSOJi_0z!sgEcL&@r zi3T_PvuM{6gltEfK>y$)s6(f}{F>A?!0X@4*fH|VT(ocfmjAJLA3#}T+y1Zt1xW%1 zkYGTJAQA)>d3$X!E9RU6Zh z=X_N&_ttN!tf`{ke(C+JwRd;#1U))nEN?b_#J>1bkx?g>oe$!?H&kP+)^)0t@xsL^? zQK8FxbZt6a%sXAFI=h^>ZRH~KEw!eOOS^EX@mnZcoAKuvH(b5Nmv}^;SJZ9y6K>*R zE!S--$xRX?D5jyU+?E~7o7@xVL@9UC;7lo*S-2x*UHeE2qF!<4#xC@D-FmfLi4|=4 z)K=O=l@PTXC2;c0QR;*=BM~F?<@jRnxXRHSerKJLZamCFM$Yd?ODiJhv2+arKLmTdP*QtnybrmbgA(ma&tl(p@y1UXFZkx^6!gaEKNZy2QzDWyRv?$2>UY0OeR+ zJ?TcDJ8V4bK3$ntPV6c9jz=`P zkZ!nt1QjwJN+u1DsC%+HQ@NEUVsvRgewI^C+P&>Xood;N!4;VH-eB3kFl>O6Azpvz?NBtIq39gU;Xy6YHqceRtMLuUux z5tB#6Cz_CA7ek5d@A0{ZJzH+LLtL(mfC5>eLc}UAuy_W=_R8keNhXwG(vA&eHv4IvE?16}qcr{9 zbYRmk+MvA{WH@)@)SFgvqt=l<-Cs|B^e-Wr&U(YXPtR$cg?o6fj9`bx0aUZfZBFTP zRy947L*{9f>7{n#Ou18-`W&&8{sU~}8T&6U2^;A3!?)aU!6dFnUl#9I{D6LCAry2QN zq=};Cas7qt-oa76*q^1AZ*4>lgVVTIv>lt?tRNGV!TiRlB~>&$P6y4Na(t+n=y~3e z^iPlWomIL6j~{=NbVrNJGJ6Npi#h@7igTLd^Qp{FUQxS^R$@lpS2XN|6-QW`iiHu0 zJoJ58b->k9GPSgW^8BNo$nm$86_2~gYMGx&Y}ONbobGVV^~Gq#>*BKWzyR7+&rxw& zJVNb%;1bnoVkIkPdC0QUwkj=CokUhjO&$@LrA%+oLdjlNn`_%dvil)xIUx2f&1@b| zMKWx~+(iXtuk&t+d4 zY9pafsPLNrc{%?r?$GQY#k&rWL3MmY0ouv-y z3GVE+Lrt>lCH1^&>pDjlCq?Yw2E$4T54V*{(W64FXj4LF1=Odpg!wYpUXV{lQU%g`nvx#b zBAlL^+KSrehe^F>^V3#EG#4o)JMyZfB}8OXRegDW5}z+rjbq(=>AW2dasTyN>R42w z^M?zGTV7p7i*_}H-o^vEl$RB#b1Pf*xXTk|Tf=jd;*`u8E%n);YIoUra&w_Kjg`_~ z2GVhOL(cTvLZNd5M1|O{;&ko~a?^!ik!^E}77S@663Up%Jp)hB?D)YVHem_9UYJV^ zEmlb$-_V$EO!1asTZ;*w!pq5Te|y=#@-r?yo8+o@^VliFODxM*g^p^o2$CB$6fN>p zm!mEGMb4Qa^7OhB{L-?o99zv>-alGNcqDceR((z=;pe)`Ti1N#@jfx~W%dX9bhM-B zyC_szgf|s&yo)?)l@jX~X_KYGE68>BJ-M!V0yi-otL6^6&Bi_JkZbJ#k=^^<-=;n3jjdOucJ4Z&!PLUSc0?XAc;a);T7OxMEK*V!-=0K1!ya&a zsx~pjrHJU;N+@Y;W6=T?AZ6C3$ zQVY?^zoE2w-c1UNfnwXWPBOOZK;6WGFZklc(z3;<4r0OXYVwv}2a$Q!S;qG3nXXr- zTe@+W2kTj_-ljLW9{2Fq9s^%D7B^yx$>b^qGR{X&_?Wij%zGy6^uD&J6x>+Emc64! zE*is`r91P`;BL~T!`Ei0;GY|aZ!J*v{?5{4#1-{?F?V5Q*i^yzvk&F`Kz_EjQZx zGOx_rCy~0($fSbBCve9V^K^Gdp5;*vb7}v_d(>)DdHJzy4;p7ZRW(f7NqT##P*tZS zWwg;^HtZV8yNfiYT&;?7?!I1fYt>@X%6czt-_cSrtmGiNY)B$@x0j$r8DNqqEitF$eNJC*!<$E7#Ax{oIe+^-&69;X|%Ze@DFl#BdeSP#ye zy^{=!nv26JH~7f@?L7RTwQOXNUo^^9m79$yB#K5h=l%<#w54QHv732{D-Cp!Bh{Nq zv3=UL>|olOe3QGICh+0XY3hMbHnNpRHrHL+kDPb!;trBe8xH5Bm3gFD z>SpRQu&CUaT%A{h%-~(2wqi;LZ+U)*qwE#2hF3Iyq9z5Oq=zo!(rdMI6~~WGr+rx` zeOFF&>73y-?d>DFaKlwxsb(fOJ?^NkEM_Ek^s<%~n?9!} z71VBk%ks!pnT6%6iR;;B;xx{GuD#lAU>pxQ(U6WEt)gxknMKaC@>9>rIn+*D4wUpM zQ1$7UUtE||NCa(c?Yk}5f@fKk6_s}eP_u`t*snx>HK+YNW{=}M^`nREa4C^LK5f7o|`+J`BLMD-6)_T6tr5N>!drYgsCaC2DOk`x#TI!nDOip$P<0}hC(2`e&)Q7v) z@ZbWss7JpIO7Iz1nLgE2M)kPLO)^IKF7!y@njehByQM(kQwNOYNbn2((vPtUgd@>Wx4es^*b z*Hk)jtDt&Ra%&1#D`6yk-X!TE9)y|m1gbb||j{!G`Go+F#Qi)qGoH{RIs z82eSWr{zi8xkT(MrCeg z`C+-G)k+(hmso_aKWms~xT?79vBOICn3ji6uI!*o?p2B3?paLFiWU$FmDiGu-yEJ3 zo=t_Wom2cx3JQy&+DOpF4{E0n6ZW1HukJIx&9?PjMVqau)Y8?1KQ=F@WG>7_hdRAe zQ*sZZvSBkxiFT3q#t*098P4+LB!8N@q_FfD=tk|wnW-6Foz!M|KdNGVS$bsof*wCU zlx|r+)cc=JCf=eR*)m@Kl1!_Pn8XArt`d@#e6^WhH`Lq}@Dv z^f?-m;73P{gJ{2PUg3MF3-@nhthWE$lZrPkE`53#%J&xVwLoq;HhT*_a4SXwW8>-EVtsKjt1KDUk76gdQb_W0W&6jA>Pf%i;#7ARv9!ums?+Kz9XtDk6INa#MVoW= zOvX^%BKhUq$piT1`NDF|; zONM2V(r#pVnZ3?N&UaOm^92v6%SN)2{m@X3HF4*j&h}!&>Pci-&zrA$Zc%4y%hUXF zpGjBxEKrVKZp-hR9#bY(jii;;FCDDj;(AFhg? zDA7`DSC%Kx{!@9yn#GNIW_l7|Tz*{XaA~LNcW}PCFN~1V4Tww|? zBqvNX5laggi>6IF)3ZJLVqA?=G-7iB<&Jjm*j)Mn?UXJYntg$ac^8(skGpYDUKfR< zjm6<=RcVPXzr2;6Uy{iZuHNrDznhjvv{Fn&P3`S|>Tx^u?2`oEI?RpB53`o94&@Wk z<1Ex2oqRcZ{ysjP=*I~K9qEC4rZRU+Zs}4ppR74{jauopsjOAFn^JaSFPiV(N*Nkz zE5qW=#q4qI_*|>+>2LG5rBzb|&m2CQ_vhVA9=EK-J-tF=g*LJkT`WPF9d99S z$9oByZXq6bJ+0=`mcHMcIm*^X&QiDk6V;mkicTz?#tU{>h`?&I)Fs2WP=Uo8 zY1q5+e9$(7ug5*)ck%P7?T2SHVC!KT^v;|0^X5z6x%(=e7{7s9@72~0o%l$X$HsEM zG9|>>r=>;N&Wq?quB+MzsIgc)%}^W*8cI*EnTq~K&*-Ao(Nk)xfjE}5oOW*%2(Buz1nLdAH(OU=O1q2v2EAzq41SF>!h0;6XYzD`pl%pN0+G0#7f<# zjQh0i;v;&eZz0Dw%Pa4AyyLhx^Ob4aCUE%X(QJ0Ql95^*-d2 zS!0eW=l7ma`t@bD37JSc+by7~BAG62(4LiEnTfXSNgLhri1zbu@<)pql&gu243KGD zyZLrH@%#x#<}_2?FU;akj_+x}yi7VX{*-Q+o`ulMtm2#UX@oN5^CG_BVkc`k7%AOS zS8>1mv#4>Yjg%PfAa;#>lpYd0k0K7NRvzZctJ*If&%OsXa{Fbqm6!sa^3#MPR3^G7 zKU~|A^R;x7Ca01p+|oc6D)p8Yy9rJo>cA21Yt)CGwfoV_J#;|7w2bfQDSBt=3+EY5 z^3EGOHLg>F^5L+X=*3T2CU}cwEsBVwX3x}V^*)i!hZ$TUXd!i;=qgu@*iMnIBdCSR zL~g5eQS^OA(Wn)9XtTeGd>e3(x>er6{mZwZO|~g~!Ng5e-kgVKl={rc^V`zLQ$<9c zI0tDmYbVtyU5xtKj3(==L0q-(7BUN+lD;kFfU@an30ZRbTTWgYOJhdPC$p%V%EQgJ zthEcib*xtK$)#g?Orv#Vky=)kdRajZ+*w4LA9j|}IdwU7w4RuFsFVm9;33ys8%x*j z=9Z3@P8=NZh@9tVQCQ4C&X}1?ICQy5%i@dDR*Q`sYwjr48*b$W9oOj=jC@HwZWk9% z11|EV#NK3;U7L$XCR67c1-QjDAKJEW9FNS}!b3*C;2O=G#H;RZ;(UgS+}JULhOKmw zGcLEGrzW0qOdk_rKU|xkWs<{VY?shA{W{ucyS0pK{*;r;cBe*93sb(5<|5C zSGnHfFpu2PgS;zyuwRd|(j@OL<@~%=+EzG&*l*=?ntn5uht6tB%N$Sfq)@ZftP>0$YqND5_W2QGrn^zi}T+ z%OWF4S?VrEo3*8~?X6`#b9=dawKW$WWhIji4(Gm|n{tA`F;DZELHpNuasBSGG^y}4 z<+N`JS$)@8-HKj4xZAzrBIm(lrA_DJa@`Cc{&2XI(A5=OCqbK`I5CVP@7<*uH`{O* zmm9QUgR@v|-E+Rf$Dk5Jz%Alt&Y{ZsIYkBlo8!BM7igR5y5~KaAa>4>vS@UXs zd49qJo>VM}wEZUJroOlMS;Hc7UB|}UKB|IPQ`b$H`SfI6jG27-QeV{1<1GuUJE9sr zKd<_I(q=XmE+A_^%VOPzcrM^Khy43);e!>AqdkVAkP|m(-q16Y zvCUFmxv-eR>*wQ*+d6VwlE3bzwuHT9&Jk+f_J;DI_YRu3b_~DxG?NlX>`@J`7mzmw zpX6c#J}4W<>}1DF;e2h+Qa-jzi|;uDvC&Gq-ww9pdBbK>#uaB_*7sogrC~Rz^z#B@ zS>_>i|Cm`yb=x{R3x6xQMsK<@B&0gqR4OQIR++`+TX@RK7c%MC&R5(s?^0!r=5Kkp zpeUN_8lP_9skUi5hy9DB@CWM>+B0oIv1V8tJsCZQLWhnmO z+U;Dw9NGP&m*BUN-9}5>roU?y#?5zRaP4?|QM2wj8rsN24(O^cR<^IJtnBVZQ;Ov% zd#xX+=2Oavo7=R`@!N~k-cMb`Id@B1anwrWtI&ta-%Qm#&n(4f8Wj>_)oT8?%`?`FD!xfbFW|O@vad-W(JYy39{NC++b!lrp_!C%!AaPA zxr;9K-YS_(M(|35XB2VVQ>M(#E2Ar)qlkruV%PvZxu=h@Z1?sqMV^RJU)ML*hR!D_ z8v_ibS9}~@cwb0%ua`kxhL)C%EjrT2rnlInLw~wH%~+POFDVkb=}Cj)#g~z70xaZ{i7ARvXML$qH2EbIm5yCWo7VJ>hx9BX);d}9 zKoeuJAjCwpSviYRT0NqhPV>2R$xQCuZ326pJ;_zx6_{B2mjz7OtxO?V0Hmos4 z?Xk;J_$SY%o`nl2V*hC_b7zujQE4>kYSm`_yH>J!$;KSj_<(xrLOyYBmbM?*@kwgX zp1dM3d$PLzV{!4gTS<|W;w~?adCtjWEW{~0Pg!hSOEz)MrkyA3g==2}+3(;$-O7G} zlw9)_m(04rCADe%d4o&I8+GlZahd!gY~&(dZe2juExt@$*!T>0GjfvJ{3_ny>PE{7 zf2PbKH$+U2JIdLh+k_JRZ(v3O8_|6?Y(Q)D`I(dCHJ&7$Mrh1R$ta{Px zCk&2PeoTs$~>2retZO*w>`j5 zmk&&9XEL9Xo0S&fcZhU%%gbJiPE(AwFUOYYwq$X07`HvLfKN>J=CoY*NN5}ZXR;|(3Q77GnKn#GVhJ?7W3Lq;f6VP_+x4*x#r_5 zn$+!-(qr%xT3&Gm*=qX}ZMl0=8Nafycy--cblAL$mcA`4tF5({UAq(!!LOb8NR6^` zZ`NuxaLonIxpbf9$5pzkCmqF{<}+#0UDt#m6oocB-BQTP27d1>``Y&T>wO$vC=A;EVjXl^10WVNOf(+kOQw}w&8{&u4C z%0-k=$d0pSJJY=9+QZ1z^0MJ&TY08#BeLli&wg=UqR6_b^v3rJk6L6d&VGKU%+x(n z2OZ3xW)yi(ZP>|;Y_?m7`_Z3tZ6a>)I2V0cY0zdh%ilz#r+#7^{d?TQE5E!ya~Dna z&L`it%;X_i8~A$rjl8;V5pkhcZdo~}It`wkSJd?_DsES<%p0Q~ai3j!(`f z3tTZ3U6vN&P30Yh>k?KLo|sbQq6N`{*a~8;s=PRCH6``;(n*NPuRu1nvRu$MZ9TZwROyAntBFr7U6h%b8|r*(!k`GHFrs$ylNCTolI zuiVr&!)T)?2DDqK8|-N$rsSAV#I!^@_)=S5lC_!-UnnJa*LM>~Z!MxjcTGg)_&nnE zCv94lmm9t4G?EV8y3TcX&C!_;%_lbNdy`S&Im#mQ?QEju_x`5&#D${;*;f}&2V=Bp z<^3+J2BAx-jP^i3BzQPgem0&~?@!|7i`w*Pi)@-*;xernmq(skX)l|<+fSRCUn0-G zN2t2$B03I_r`J37a?rk&f1%+#;W z1BqIr3z})OMoQ1+ zKIWU~-ohg4acz-lUK1CN2zt*Y>|e3Z*2+BX<9)u^>55Y0WNumI_G&UaK1J<)@VZjl zA{Q0fnZR3;a?xFzkJMMYSG~!uq;}X+K|GIJOKrmPOINLv^rqh^GL0xeSx0&(_j_HX zXAMU2z#C6^d&oh4QouvnIhPS(+6mI5J(@;tAw}i;1%bTF z>lN=>X(%^zysKV|F%eU=1-nh1w5@c09@+i=l<;j_CgO&O! z{dyb9%exE7E(PPb?8>S%!tC3w#w;xA# z!-toW>&)(}HSdq0J&7a8wb5gWGdMuEY>S9}?j=Rah$JQKc1OAIK#UaiTZ(Nr9XM{m zOzymK6HgzKNH1T-lVha_+27?OZ!tF);qKa=600n@*~uPqAe+fbZAsR(OQ9xD>x-9( z<;0e4!D7kQNa1(LQdo6esXAUcM9ZF+5_)-M`oUiABH@9VNSbEJlSed@%i5;%%STx> zxm{`bbhe*}-!)M5FsjZuo0m{or@}JUUh4;*Rax$tGE18!Ifhq_y-xit+luU#qc}OE zveX-ULmhaim)H~`>AYPWTTSt$x1S0MsqMeg=CPgJW6(y#wsKa6KB^|eW^N+K8>yss z*~E9%-BKg36VZ++KZ$-i>bqSQ(3oQG0t(QD+ZRal$|Y1 zIeTCdWpy4T+s$Yz*Ika}tiiLCnjLp(vwJ&;^V)WeNnL%|AS+NzjwmmDo*43~koF?$ z-WrZ~)%J?XXfKS*g^61pWrSXl#^PaLA+i^3;;^Y@#aO$$95X=c(@*auvcpPp%t1$? zS9?X8PxcI!*Gr3}VS_2N?_o~bGnkA|6K6>WUM351;Tun=!-E6d#=HuhZ#RjBvv!*p zWJfXmPVqARE7alTT7FyFTYlOVApAP{$m|PE`RZ;(=Ja+EL4yZNzk|J0y`1Q@oCsU- zwv4TKI@Fc2wXMM}58BR|ZJKkl%)T-!dx4sqHbE>!EvdhjFL%C5We@G={)SB{NEazuG&)1AE)Eq|r6#elm4jS% zr>@ATvqQ;_3*yMm`;`1X4dvL(4;<9GgE($+oSz$36tmwBlVwXB=a=KX&7)EF+*GLt#;#_XYQi2wTqfE!h}-Tm~Txuz@g)9McJLktnNEOu_Y%` z%*6_#>wXhqFw{++d^kW_Ju4&2CM43QH|3=5?xNbbSbo`ca*VcCe4^sRu5=@&nABU5 zoqqY%V}6=lpKBg}N82h_6OOkGl~vuUiO$K-sK+(~_FL0aJ)d!tmz}kun02ezV|y;K zr%FEIQ7@YGI#r{$vyPG8s`}|;yWi(cb)S)se+<1%x0Fd`SMZ8{y@g}9#WYqM=kZHi zsV+OF6Qj-h3C9gxq?K)BJ{Vg@$~tDEL!7%z`LJBM+|NRe-rqsgE?i3P>F|-Q`nd7& z9#fQ}d3uVh$M^Y8oU81d)`Ksa3z5?Hu@YotAuld#D~0CpUM#$JAco7aw!f`*SnXR`*pZ6O>hj+c>uJ^Us z-5>YS2=~pD-?I|W^iEJK?e9uQnmVzE?mRF3_?`PlVdu!6f9&@;R$uzRtWGviH_aM8oDz!d%ExqZ&NJOR`L0gcsOp!Gqe~;0b3RR` z`*m^&{i8;Fs%dWu9nhWQYpz!hTy*13VI$Po=GVw6_c6BLcbk(;uTZrSGr0NKR@|w6 zH4aU^lOD2b4+RanL(hWmszwPDctOb<>hhK=sH)W?suTW_vhVMv8V@B0&v&Dl$x}J% zWh#Z78O`Ts1gf9QxT?lycW}u244vnQgA{6)kskW)JQ+W5Pam*&9p`)Uif<2C#B=<2 za*2hDS*7WUoAYNbZT?s>E4h{5+}x!E*kvoO{)c$k_*1kc-cH+|`L?!dUOr3K}wc5moKki7!3N zt!-0$lMjaGkq189QctvaO+z0W$ePjKe5}j`wfpRbw5GvGYE@3*Q4ibji>;@r_T)9` zOP5@v&sO)iqHAtpn(q!jzq5ztoDxwXzyQGiFxgRYmqG(OKiw zyC(P9>UBG+ws{J1uctI*vVnZ>{gBJnxTG7CU7AWcWU8MHM{}i`U!kzaU7>koy7Zd z9#WH4hT>zJT3WrSN|vpsQ&N{j+`;U)5<5>%_W7jJp3?)V(S)r$X@|b3+H0qB@|dI*kma7N*mA>77N?M0BD#lb5IyFD+yJ3#XJ7PF1M(#B3_pC_u^QVkT8#jJH z#TUe|Rm?RK11qvuwQ)=%I%#u7_E7sinLI2si$_n-&?XM2GHKl>W9sLpv2#DF>j!)C z%lO=Mzvf9ExpJs_c%!}?E%tDudZ&3q;AwWaKY=$byTe;%9Mo1w>vPYxD)-C!NL7+j zsNCQ;{Jf2s@D1FrcwYOYv{urTS&N^k+f3$hEw!6^z($`MPA*QJmpG`cMq1G&#}Ues z*oD-sxF0?5I!%`%1PtO=Z#Rn&{x+00Kq7G8Id<$vJ zSZ8t$&Y@KQJKFfia`p6-!=zi>jP^9B%SA>kqWzY6#HIKEwa|n|H0NU)8As+8W0vfu zb;nzhl75$5Z=F^uFO8=SOU!7}y5+ihS%wroHdTGUcDTC!g0mWEP)FNSFHX4>DWmMeX{Kb&n~!coz?~fYA;*>e-tk8)fL$_k2e6PK42zJ*i6OQ3F}~ z%@wjZlCF~BaCO^=Hk{rjm%O~?G}mw1fgM9mDJDg)^0C4Se_j%y%bRB|jZd3MCo}IU zfi*WPxksGja;FUC{c^4p_GCVdER$C$+kP8Yf3kH5HCD5=j+v$zxkSsROD7 zYP~?WweFYMRAJ=}9++()0|RH$!J)$_vdV6HbjyLG2M$yo*59t!?w`DUMXyuz;)yLq z?pmXE(Yo!P2HDV@QL&V@Hk}`NEaBqX%KNns-zb}WcPU2~AD|_lE~>*XkL1*CnQC>f zmWs=UQ`E9@bG|y{BsCqlg&*gs$}hC>)!T9Bc+Kn=JmaW|Z}`~@YTTop+#~uZHQ2bC z@01)x!Sn8NfgqJ0(rNbJZ7Q}Wl+xYzszCGnW>LK&lGb0CO~ZAqXn373+o~1e<%T`k zIoEMoXm_3F&rG545^rgJwPZ5T+KKKrkMRs|?Nzc^HnlrHRZa4k%TKIl^NmK)v`v4F z8ujuzUr6<&Ouw;wSQ}fZKGBkU7dxPoOpD^@*N&>j+U^var?23;LH+q%RyZk_-tz86 zu~asBzVc{67k>Wa0v{}MlpgALRLe?z>GFO~`u)kP$a7*I`KrW>^uY7l^SJLCiYoYs z%MYH$`Sv(*#DbgDvs^B@bJ{sdt~yO=+AdA$Qf)UMsCk*LCv_%$>m6jiF}FC zN}(t748(+{qz*NB&(%nKMy-{_1&q!qZTp{53RUW>1|As2Jw0cUh%=H+>Xo7Jr!AEF zTawckzFk56FLvY=7iaP6>hsmqahKS{a0Q=xzFzSdvyoSwc|~g2Xl0vEBCjp8QVA)i z_07B9p@Gfyi2aSIQh9ylfsqTf7Uy~W)=b)7`(gU#8YB6gv!rVqV$^o=)6yTioK)Q- z{8eEzfrpk&rHZlUO6Qt0(q9cetJvu-a=&St)d`hMMdz+v6qn4Ay4Krj@VUd|)sSd& zIe+0;&OhzAy5He7*WBuw9=33t!p=t2@4#fbvbm`ebx)hWkdaD0rZ2hU3w@ERP&`{T z{;1xlvekFUzRQ`oBKOt0O4Z6g<0C^%M4j?` zct+VI9LB`AeHRiILD z`f#WGN!%sKOjPf2gt`Q2^I+y3pyk2GNEW)!yT`m$Dh@58+t!K zi>t5FPQRtRR#H7mamC4Z`P#Bsif=V-vVH0V4k{X>-k)SB zC#j>T;<%}-zjO%&wl)^;Csn6zJwv#D@*4`ilgK8qX4*>kEo#3t4|&_O*2l2i+kLIfDUz-S@b={RQbF{kUFhkkivA3?GZ3}8U_9gWyvR8e3 zI9rM9TSPfp`7UkjJ4u%~=eln1kxhK6&3T?Ry%oQkaD*>>TumouK4a6kO+2Z@1htRZ zM4qaiq2=p7`8JPO#`Bjo;Vxry%Ok6e<>bQiDPfAK+`G#{SvB4~rkXW7vTbIHn+T4?I7hHM#vF^02 z&PZB5udi-gTW!^){s{`m*~r)L^kxt5Lo}hzYTj8kTP?SHFZm}Gp;pdQx$;)+xvSmR znYq9Cv&;T|pBz(oqE9N992Q3V@{i>61}_yg>Xzb>ouu^G)`g1C^z!X=Ad5rWb)#Ur zC-nN%Ev|n)m;7*Ss5+@v0=snjZTF{td3pQ3@6-7v)n#XMZO_@#$%^@`a&oY(w*BVD zYU1JKF7jNYSkLarXMZzwDVZl-UF_*Rvt8vRYwnkhNy_%2L*7dt&SMmuzZhCF@bwB3c zE~MZJGn<+*A4QGe9?xsc&?mo`{CR4)SZeROhJ6FW0zCY47utHl$7O4X@d|q@qw0nO zgQFs%{g;L4;XepK00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_ z009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=|Dk}3gSI*=Pj$0C;XxtcT>}F9heZ1u z>;LwpE;e5e8hu&mRH1^u@#o+EbJ_BBxs!hN+!5iy-&Q-tR{gTF3jbh$96$~r2ap5E z0ptL306BmhKn@@WkORm81R1IPj70CE61fE+*$AP0~G$N}U4asWAi z96$~r2ap5E0ptL306BmhKn@@WkORm81R1IPj70CE61fE+*$AP0~G$N}U4asWAi96$~r2ap5E0ptL306BmhKn@@WkORm8 z1R1IU5@BnMokC+5}GE*V>CtC#W&433J34yfQCo0#{1@oFwyIs_m9 z0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV= z5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHaf zKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_ z009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz z00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_< z0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb z2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$## zAOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;| zfB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U< z00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa z0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV= z5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHaf zKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_ z009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz z00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_< z0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb z2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$## zAOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;| zfB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U< z00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bcL z9}2Wf&^OiZpWo6nud#l96VqJAE9^}TjEgvtse5G4?rt#=QC)ht1^17Ph>CIR9u*kb zBOp4Ydw5_>pQvEBdf}0MVwy(=cX8_(65chxY0Kudng!Hr)TCt#?evQOgVQ5}qoPBi zV}iqD^5lbp>Jv(@+JJ?!7=?JqI$Y{xb+JT>E0tIzlEu}7ARvaL{?uDN%@~Al1YAW^+Hd$D*OUMMis$=Rif8>(@xBz|=LmZJI)Z<55&x|S zdi|FX^!mRLL9c+X5qzj`YX4gk^xs!R!@oCEtNoQsEgTu}Q+dAE##{QPwm%*I$5qLy zf2E-Izb@$iBdY(*%K7g{^^Y3*>!|+Is_DNU)!*vseSK5Az?d&jI3W??0bN6)g1f~0 zSNF`nxyAMSD;ew%8Sv-ge6Ok6!$F}xAN|KwRPk4eY4^)weyyj!jOZ__>6eGq|E&@I zqn7?MqCZtq{lE7f^KVD=hx_UGw2cbxtG$&3|3fS3Uu}7RD$n;?`laRl>F__Un*P<6 z_otF*1^wfG`qM$Te`ZDf_oMnpP5pIL|7lhA-;e4Kb@gj^M3;#0zR_CNe))gEFJ3|& zwdU9H?`@a*(KH(7zhh?fc;a`YS=T{Zmk*yZj!{%lf7T|6I&}Rw#9UrGPd+7w~Ho zzw|i#P(087W38^||GU-wqqu*H=-=E9{Tr?B9|ipy)jzem--`KvsMY;e+W)%M{a)Zd z1@>=li~j11`1cwa@V%&CBm1XT*Yp2StLqu?U$(kmi}}wA<-gJDel6hFC}wFll`kEi z|IJWK#lQC{_pcW5hmw4M4mz)In*WCb+VjCbD}Gh}%3ZVIw_e@f!lV`Mm(lxYzIpyT z(fi|_^Uu-yr@d|dJJI{?J#)XlsoB?&!ssr6VZl*9ylWM4s`5V{`ZKEDvUctNXjrpF zRG(m{3V$UX^9Fqz@o71B3IMYttk0e65?>Or6j_e@=U>`Q_)znHK)iI`w~#Ld9>=KSeJpI8vJ;_VxGvc%_3)aa|}FBvngXpWgMXy#`? z)R9kicyc@Wdy-HEzB1C!&6jt(E!7#mYYpi1&xxJ!nzfmBFG6x?cyR&n=9 z3%@m9h21BPwL`45`#=WJV)unc_JL#&+V$0(;WLYLW#fGZVGYE_&_8N@V&e&io z`AQDs_1~GmSmH$-M*r^}+_qg;-FdFaH0qZkS)lo zm1~N2(repeat_name, input_name, output_name)); break; + case ETensorType::INT64: op.reset(new ROperator_Tile(repeat_name, input_name, output_name)); break; default: throw std::runtime_error("TMVA::SOFIE - Unsupported - Operator Tile does not yet support input type " + std::to_string(static_cast(input_type))); diff --git a/src/SOFIE_parsers/src/RModelParser_ONNX.cxx b/src/SOFIE_parsers/src/RModelParser_ONNX.cxx index 27c2c1a..5924836 100644 --- a/src/SOFIE_parsers/src/RModelParser_ONNX.cxx +++ b/src/SOFIE_parsers/src/RModelParser_ONNX.cxx @@ -135,6 +135,7 @@ struct ExtractDataFromTP { }; template std::shared_ptr GetInitializedTensorData(onnx::TensorProto * tensorproto, size_t length) { + std::cout<<"Getting Initialized Tensor data for tensor " << tensorproto->name() << " of type " << tensorproto->data_type() << " and length " << length << std::endl; std::shared_ptr data(malloc(length * sizeof(T)), free); if (!tensorproto->raw_data().empty()) { @@ -586,6 +587,13 @@ void RModelParser_ONNX::ParseONNXGraph(RModel & rmodel, const onnx::GraphProto & if (verbose) std::cout << "add INT64 initialized tensor " << input_name << " shape " << ConvertShapeToString(shape) << std::endl; rmodel.AddInitializedTensor(input_name, ETensorType::INT64, shape, data); allInitializedTensors[input_name] = i; + std::cout<<"Printing initialized values for tensor: "<(data.get()); + + for (size_t i = 0; i < fLength; ++i) { + std::cout << rawData[i] << " "; + } + std::cout << std::endl; break; } default: From c3b892a9b6d76ff8883be764627eead3eff9ac63 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Tue, 31 Mar 2026 01:23:46 +0200 Subject: [PATCH 32/43] feat: alpaka wait only before blas calls --- src/SOFIE_core/inc/SOFIE/RModel_Base.hxx | 1 - .../inc/SOFIE/ROperator_BasicBinary.hxx | 8 +-- src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx | 4 +- src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx | 4 +- src/SOFIE_core/inc/SOFIE/ROperator_Expand.hxx | 4 +- src/SOFIE_core/inc/SOFIE/ROperator_Gather.hxx | 5 +- src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx | 1 + .../inc/SOFIE/ROperator_LeakyRelu.hxx | 4 +- .../inc/SOFIE/ROperator_ScatterElements.hxx | 4 +- .../inc/SOFIE/ROperator_Sigmoid.hxx | 4 +- src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx | 4 +- src/SOFIE_core/inc/SOFIE/ROperator_Tile.hxx | 4 +- .../inc/SOFIE/ROperator_Transpose.hxx | 4 +- src/SOFIE_core/src/RModel_ALPAKA.cxx | 2 +- .../TestCustomModelsFromONNXForAlpakaCuda.cxx | 54 +++++++++---------- 15 files changed, 51 insertions(+), 56 deletions(-) diff --git a/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx b/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx index 601e3a9..6c272d3 100644 --- a/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx +++ b/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx @@ -89,7 +89,6 @@ protected: size_t idx = 1; std::cout<<"insider allocate on stack and length\n"; do { - std::cout<<"Printing idx: "< const kernelCfg_" << fNY << " = {elementsPerGrid_" << fNY << ", elementsPerThread_" << fNY << "};\n"; out << SP << "auto const workDiv_" << fNY << " = alpaka::getValidWorkDiv(kernelCfg_" << fNY << ", devAcc, binary" << OpName << "Kernel, alpaka::getPtrNative(deviceBuf_" << fNA << "), alpaka::getPtrNative(deviceBuf_" << fNB << "), alpaka::getPtrNative(deviceBuf_" << fNY << "));\n"; - out << SP << "alpaka::exec(queue, workDiv_" << fNY + out << SP << "auto task_" << OpName << " = alpaka::createTaskKernel(workDiv_" << fNY << ", binary" << OpName << "Kernel, alpaka::getPtrNative(deviceBuf_" << fNA << "), alpaka::getPtrNative(deviceBuf_" << fNB << "), alpaka::getPtrNative(deviceBuf_" << fNY << "));\n"; - out << SP <<"alpaka::wait(queue);\n"; + out << SP << "alpaka::enqueue(queue, task_" << OpName << ");\n"; return out.str(); } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx index 2cb797b..39c8554 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx @@ -189,8 +189,8 @@ public: out << SP << "alpaka::KernelCfg const kernelCfg_" << fNY << " = {elementsPerGrid_" << fNY << ", elementsPerThread_" << fNY << "};\n"; out << SP << "auto const workDiv_" << fNY << " = alpaka::getValidWorkDiv(kernelCfg_" << fNY << ", devAcc, castKernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), alpaka::getPtrNative(deviceBuf_" << fNY << "));\n"; - out << SP << "alpaka::exec(queue, workDiv_" << fNY << ", castKernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), static_cast(" << length << ")); \n"; - out << SP <<"alpaka::wait(queue);\n"; + out << SP << "auto task_" << OpName << " = alpaka::createTaskKernel(workDiv_" << fNY << ", castKernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), static_cast(" << length << ")); \n"; + out << SP << "alpaka::enqueue(queue, task_" << OpName << ");\n"; return out.str(); } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx index 10d6d0d..d621381 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx @@ -440,9 +440,9 @@ out << SP << "auto const elementsPerGrid_"< const kernelCfg_" << OpName << " = {elementsPerGrid_" << OpName << ", elementsPerThread_" << OpName << "};\n"; out << SP << "auto const workDiv_" << OpName << " = alpaka::getValidWorkDiv(kernelCfg_" << OpName << ", devAcc, concatKernel_" << OpName << ", input_ptrs_" << OpName << ", alpaka::getPtrNative(deviceBuf_" << fOutput << "), static_cast(" << length << "));\n"; - out << SP << "alpaka::exec(queue, workDiv_" << OpName + out << SP << "auto task_" << OpName << " = alpaka::createTaskKernel(workDiv_" << OpName << ", concatKernel_" << OpName << ", input_ptrs_" << OpName << ", alpaka::getPtrNative(deviceBuf_" << fOutput << "), static_cast(" << length << "));\n"; - out << SP <<"alpaka::wait(queue);\n"; + out << SP << "alpaka::enqueue(queue, task_" << OpName << ");\n"; return out.str(); } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Expand.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Expand.hxx index 786556d..cdcadae 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Expand.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Expand.hxx @@ -338,12 +338,12 @@ std::string Generate_GPU_ALPAKA(std::string opName) override { << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" << ", static_cast(" << totalElements << "));\n"; - out << SP << "alpaka::exec(queue, workDiv_" << opName + out << SP << "auto task_" << opName << " = alpaka::createTaskKernel(workDiv_" << opName << ", " << kname << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" << ", static_cast(" << totalElements << "));\n"; - out << SP <<"alpaka::wait(queue);\n"; + out << SP <<"alpaka::enqueue(queue, task_" << opName << ");\n"; return out.str(); } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Gather.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Gather.hxx index a56b012..b6083ef 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Gather.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Gather.hxx @@ -390,14 +390,13 @@ std::string Generate_GPU_ALPAKA(std::string opName) override { << ", alpaka::getPtrNative(deviceBuf_" << fNIndices << ")" << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" << ", static_cast(" << totalElements << "));\n"; - out << SP << "alpaka::exec(queue, workDiv_" << opName + out << SP << "auto task_" << opName << " = alpaka::createTaskKernel(workDiv_" << opName << ", " << kname << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" << ", alpaka::getPtrNative(deviceBuf_" << fNIndices << ")" << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" << ", static_cast(" << totalElements << "));\n"; - out << SP <<"alpaka::wait(queue);\n"; - + out << SP << "alpaka::enqueue(queue, task_" << opName << ");\n"; return out.str(); } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx index 47efe01..89ed7f1 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx @@ -434,6 +434,7 @@ namespace SOFIE{ } std::stringstream out; out << "\n//--------- Gemm_GPU_ALPAKA\n"; + out << SP << "alpaka::wait(queue);\n"; out << SP << "char " << opName << "_transA = " << (fAttrTransA ? "\'t\'" : "\'n\'") << ";\n"; out << SP << "char " << opName << "_transB = " << (fAttrTransB ? "\'t\'" : "\'n\'") << ";\n"; // need to consider case A and B have dim > 2 (for MatMul) diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_LeakyRelu.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_LeakyRelu.hxx index 1218b56..6f0c0d4 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_LeakyRelu.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_LeakyRelu.hxx @@ -110,10 +110,10 @@ public: out << SP << "alpaka::KernelCfg const kernelCfg_" << fNX << " = {elementsPerGrid_" << fNX << ", elementsPerThread_" << fNX << "};\n"; out << SP << "auto const workDiv_" << fNX << " = alpaka::getValidWorkDiv(kernelCfg_" << fNX << ", devAcc, leakyReluKernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), static_cast(" << length << "), " << OpName << "_alpha);\n"; - out << SP << "alpaka::exec(queue, workDiv_" << fNX + out << SP << "auto task_" << OpName << " = alpaka::createTaskKernel(workDiv_" << fNX << ", leakyReluKernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), static_cast(" << length << "), " << OpName << "_alpha);\n"; - out << SP <<"alpaka::wait(queue);\n"; + out << SP <<"alpaka::enqueue(queue, task_" << OpName << ");\n"; return out.str(); } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_ScatterElements.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_ScatterElements.hxx index 5b17a79..1732d6e 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_ScatterElements.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_ScatterElements.hxx @@ -267,13 +267,13 @@ std::string Generate_GPU_ALPAKA(std::string opName) override { << ", alpaka::getPtrNative(deviceBuf_" << fNI << ")" << ", alpaka::getPtrNative(deviceBuf_" << fNU << ")" << ", static_cast(" << totalElements << "));\n"; - out << SP << "alpaka::exec(queue, workDiv_" << opName + out << SP << "auto task_" << opName << " = alpaka::createTaskKernel(workDiv_" << opName << ", scatterElementsKernel_" << opName << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" << ", alpaka::getPtrNative(deviceBuf_" << fNI << ")" << ", alpaka::getPtrNative(deviceBuf_" << fNU << ")" << ", static_cast(" << totalElements << "));\n"; - out << SP <<"alpaka::wait(queue);\n"; + out << SP <<"alpaka::enqueue(queue, task_" << opName << ");\n"; return out.str(); } }; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx index 77f989c..755e788 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx @@ -96,10 +96,10 @@ public: out << SP << "alpaka::KernelCfg const kernelCfg_" << fNX << " = {elementsPerGrid_" << fNX << ", elementsPerThread_" << fNX << "};\n"; out << SP << "auto const workDiv_" << fNX << " = alpaka::getValidWorkDiv(kernelCfg_" << fNX << ", devAcc, sigmoidKernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), static_cast(" << length << "));\n"; - out << SP << "alpaka::exec(queue, workDiv_" << fNX + out << SP << "auto task_" << OpName << " = alpaka::createTaskKernel(workDiv_" << fNX << ", sigmoidKernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), static_cast(" << length << "));\n"; - out << SP <<"alpaka::wait(queue);\n"; + out << SP << "alpaka::enqueue(queue, task_" << OpName << ");\n"; return out.str(); } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx index c9af13e..5fdbc47 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx @@ -247,12 +247,12 @@ std::string Generate_GPU_ALPAKA(std::string opName) override { << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" << ", alpaka::getPtrNative(deviceBuf_" << fNYs[i] << ")" << ", static_cast(" << length << "));\n"; - out << SP << SP << "alpaka::exec(queue, workDiv_" << i + out << SP << SP << "auto task_" << opName << "_" << i << " = alpaka::createTaskKernel(workDiv_" << i << ", " << kname << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" << ", alpaka::getPtrNative(deviceBuf_" << fNYs[i] << ")" << ", static_cast(" << length << "));\n"; - out << SP <<"alpaka::wait(queue);\n"; + out << SP << "alpaka::enqueue(queue, task_" << opName << "_" << i << ");\n"; out << SP << "}\n"; } return out.str(); diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Tile.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Tile.hxx index 608308c..fdaf05b 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Tile.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Tile.hxx @@ -239,9 +239,9 @@ public: << " = {elementsPerGrid_" << opName << ", elementsPerThread_" << opName << "};\n"; out << SP << "auto const workDiv_" << opName << " = alpaka::getValidWorkDiv(kernelCfg_" << opName << ", devAcc, " << kname << ", " << args << ");\n"; - out << SP << "alpaka::exec(queue, workDiv_" << opName + out << SP << "auto task_" << opName << " = alpaka::createTaskKernel(workDiv_" << opName << ", " << kname << ", " << args << ");\n"; - out << SP <<"alpaka::wait(queue);\n"; + out << SP <<"alpaka::enqueue(queue, task_" << opName << ");\n"; return out.str(); } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx index de33544..ed94166 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx @@ -215,10 +215,10 @@ public: out << SP << "alpaka::KernelCfg const kernelCfg_" << fNOutput << " = {elementsPerGrid_" << fNOutput << ", elementsPerThread_" << fNOutput << "};\n"; out << SP << "auto const workDiv_" << fNOutput << " = alpaka::getValidWorkDiv(kernelCfg_" << fNOutput << ", devAcc, transposeKernel_" << OpName << ", alpaka::getPtrNative(deviceBuf_" << fNData << "), alpaka::getPtrNative(deviceBuf_" << fNOutput << "), static_cast(" << length << "));\n"; - out << SP << "alpaka::exec(queue, workDiv_" << fNOutput + out << SP << "auto task_" << OpName << " = alpaka::createTaskKernel(workDiv_" << fNOutput << ", transposeKernel_" << OpName << ", alpaka::getPtrNative(deviceBuf_" << fNData << "), alpaka::getPtrNative(deviceBuf_" << fNOutput << "), static_cast(" << length << "));\n"; - out << SP <<"alpaka::wait(queue);\n"; + out << SP <<"alpaka::enqueue(queue, task_" << OpName << ");\n"; return out.str(); } diff --git a/src/SOFIE_core/src/RModel_ALPAKA.cxx b/src/SOFIE_core/src/RModel_ALPAKA.cxx index f1945b7..9690471 100644 --- a/src/SOFIE_core/src/RModel_ALPAKA.cxx +++ b/src/SOFIE_core/src/RModel_ALPAKA.cxx @@ -207,7 +207,7 @@ void RModel::GenerateOutput_GPU_ALPAKA() { fGC += (fOperators[op_idx]->Generate_GPU_ALPAKA(std::to_string(op_idx))); } - // fGC += "\n\n alpaka::wait(queue);\n"; + fGC += "\n\n alpaka::wait(queue);\n"; fGC += SP + "return "; if (outputSize>1) fGC += " {"; for (size_t i = 0; i < outputSize; i++) { diff --git a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx index 1303251..3d66c96 100644 --- a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx +++ b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx @@ -96,40 +96,40 @@ class SofieAlpakaTest : public ::testing::Test { }; -// TEST_F(SofieAlpakaTest, Linear64) -// { -// constexpr float TOLERANCE = DEFAULT_TOLERANCE; +TEST_F(SofieAlpakaTest, Linear64) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; -// auto A = alpaka::allocBuf(host, Ext1D::all(Idx{1600})); -// float *A_ptr = reinterpret_cast(alpaka::getPtrNative(A)); + auto A = alpaka::allocBuf(host, Ext1D::all(Idx{1600})); + float *A_ptr = reinterpret_cast(alpaka::getPtrNative(A)); -// for (Idx i = 0; i < 1600; ++i) { -// A_ptr[i] = 1.0; -// } + for (Idx i = 0; i < 1600; ++i) { + A_ptr[i] = 1.0; + } -// auto A_d = alpaka::allocBuf(device, Ext1D::all(Idx{1600})); -// alpaka::memcpy(queue, A_d, A); -// alpaka::wait(queue); + auto A_d = alpaka::allocBuf(device, Ext1D::all(Idx{1600})); + alpaka::memcpy(queue, A_d, A); + alpaka::wait(queue); -// auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{160})); + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{160})); -// { -// SOFIE_Linear_64::Session session("Linear_64_FromONNX_GPU_ALPAKA.dat"); -// auto result = session.infer(A_d); -// alpaka::wait(queue); -// cudaDeviceSynchronize(); - -// alpaka::memcpy(queue, result_h, result); -// alpaka::wait(queue); -// } + { + SOFIE_Linear_64::Session session("Linear_64_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(A_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } -// float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); -// float *correct = Linear_64_ExpectedOutput::all_ones; + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float *correct = Linear_64_ExpectedOutput::all_ones; -// for (size_t i = 0; i < 160; ++i) { -// EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE); -// } -// } + for (size_t i = 0; i < 160; ++i) { + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE); + } +} TEST_F(SofieAlpakaTest, LinearWithLeakyRelu) { From ba9643ea7af9bcfd414d0f4da192e265efe839cd Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Mon, 13 Apr 2026 09:37:17 +0200 Subject: [PATCH 33/43] feat: Support for heterogeneous inference on comparison operators --- .../inc/SOFIE/ROperator_Comparision.hxx | 149 ++++++++++--- src/SOFIE_core/src/RModel_ALPAKA.cxx | 12 +- .../TestCustomModelsFromONNXForAlpakaCuda.cxx | 197 ++++++++++++++++++ 3 files changed, 327 insertions(+), 31 deletions(-) diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Comparision.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Comparision.hxx index a00ed28..81b5ac6 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Comparision.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Comparision.hxx @@ -1,4 +1,3 @@ - #ifndef SOFIE_ROperator_Comparision #define SOFIE_ROperator_Comparision @@ -75,24 +74,19 @@ public: fNX1(UTILITY::Clean_name(nameX1)), fNX2(UTILITY::Clean_name(nameX2)), fNY(UTILITY::Clean_name(nameY)){ fKind = OperatorKind::COMPARISON; fInputTensorNames = { fNX1, fNX2 }; - - // output will be a boolean vector so should not be considered for memory optimized pool fOutputTensorNames = { fNY }; } - // type of output given input std::vector TypeInference(std::vector input) override { return input; } - // shape of output tensors given input tensors std::vector> ShapeInference(std::vector> input) override { - auto ret = input; // return vector size 1 with first input + auto ret = input; return ret; } void Initialize(RModel& model) override { - // input must be a graph input, or already initialized intermediate tensor if (!model.CheckIfTensorAlreadyExist(fNX1)){ throw std::runtime_error(std::string("TMVA SOFIE Comparision Op Input Tensor ") + fNX1 + "is not found in model"); } @@ -105,38 +99,31 @@ public: fTensorType2 = model.GetTensorType(fNX2); bool broadcast = !UTILITY::AreSameShape(fShapeX1, fShapeX2); if (broadcast) { - // Y is the common shape of A and B fShapeY = UTILITY::UnidirectionalBroadcastShape(fShapeX1, fShapeX2); bool broadcastX1 = !UTILITY::AreSameShape(fShapeX1, fShapeY); bool broadcastX2 = !UTILITY::AreSameShape(fShapeX2, fShapeY); - // Broadcast A to Y if (broadcastX1) { if (model.IsInitializedTensor(fNX1)) { auto data = model.GetInitializedTensorData(fNX1); std::shared_ptr broadcastedData( UTILITY::UnidirectionalBroadcast(static_cast(data.get()), fShapeX1, fShapeY), std::default_delete()); - // Update the data and the shape of A model.UpdateInitializedTensor(fNX1, model.GetTensorType(fNX1), fShapeY, broadcastedData); fShapeX1 = fShapeY; } else { - // Add an intermediate tensor for broadcasting A fNBroadcastedX1 = "Broadcasted" + fNX1; model.AddIntermediateTensor(fNBroadcastedX1, model.GetTensorType(fNX1), fShapeY); } } - // Broadcast B to Y if (broadcastX2) { if (model.IsInitializedTensor(fNX2)) { auto data = model.GetInitializedTensorData(fNX2); std::shared_ptr broadcastedData( UTILITY::UnidirectionalBroadcast(static_cast(data.get()), fShapeX2, fShapeY), std::default_delete()); - // Update the data and the shape of B model.UpdateInitializedTensor(fNX2, model.GetTensorType(fNX2), fShapeY, broadcastedData); fShapeX2 = fShapeY; } else { - // Add an intermediate tensor for broadcasting B fNBroadcastedX2 = "Broadcasted" + fNX2; model.AddIntermediateTensor(fNBroadcastedX2, model.GetTensorType(fNX2), fShapeY); } @@ -144,8 +131,7 @@ public: } else { fShapeY = fShapeX1; } - // case of constant tensors - if (model.IsInitializedTensor(fNX1) && model.IsInitializedTensor(fNX2) ) { + if (model.IsInitializedTensor(fNX1) && model.IsInitializedTensor(fNX2)) { fIsOutputConstant = true; auto data1 = static_cast(model.GetInitializedTensorData(fNX1).get()); auto data2 = static_cast(model.GetInitializedTensorData(fNX2).get()); @@ -159,9 +145,8 @@ public: << ConvertValuesToString(length,outData) << std::endl; delete [] outData; } else { - model.AddIntermediateTensor(fNY, ETensorType::BOOL , fShapeY); + model.AddIntermediateTensor(fNY, ETensorType::BOOL, fShapeY); } - // check if this is not output operators to add a specific line for definining the tensor_xxx variable const auto & outputTensorNames = model.GetOutputTensorNames(); fIsModelOutput = false; if (std::find(outputTensorNames.begin(), outputTensorNames.end(), fNY) != outputTensorNames.end()) @@ -171,14 +156,12 @@ public: std::string Generate(std::string OpName) override { if (fIsOutputConstant) return ""; OpName = "op_" + OpName; - - if (fShapeY.empty()) { + if (fShapeY.empty()) { throw std::runtime_error("TMVA SOFIE Comparision Op called to Generate without being initialized first"); } std::stringstream out; out << SP << "\n//------ " << ComparisionTrait::Name() << "\n"; size_t length = ConvertShapeToLength(fShapeY); - // Broadcast A if it's uninitialized if (!fNBroadcastedX1.empty()) { std::string type1 = ConvertTypeToString(fTensorType1); out << SP << "// Broadcasting uninitialized tensor " << fNX1 << "\n"; @@ -188,7 +171,6 @@ public: out << SP << SP << "delete[] data;\n"; out << SP << "}\n"; } - // Broadcast B if it's uninitialized if (!fNBroadcastedX2.empty()) { std::string type2 = ConvertTypeToString(fTensorType2); out << SP << "// Broadcasting uninitialized tensor " << fNX2 << "\n"; @@ -200,14 +182,133 @@ public: } const std::string& nameX1 = fNBroadcastedX1.empty()? fNX1 : fNBroadcastedX1; const std::string& nameX2 = fNBroadcastedX2.empty()? fNX2 : fNBroadcastedX2; - out << SP << "for (size_t id = 0; id < " << length << " ; id++){\n"; out << SP << SP << "fTensor_" << fNY << "[id] = " << ComparisionTrait::Op( "tensor_" + nameX1 + "[id]" , "tensor_" + nameX2 + "[id]") << " ;\n"; out << SP << "}\n"; - // since output is a boolean need to add the tensor_xxx variable since it is not defined as a pointer to a boolean std::vector if (!fIsModelOutput) out << SP << "const std::vector & tensor_" << fNY << " = fTensor_" << fNY << ";\n"; + return out.str(); + } + + std::string Generate_GPU_Kernel_ALPAKA(std::string opName) override { + if (fIsOutputConstant) return ""; + opName = "op_" + opName; + if (fShapeY.empty()) + throw std::runtime_error("TMVA SOFIE Comparision Op called to Generate without being initialized first"); + + const std::size_t D = fShapeY.size(); + std::size_t totalElements = ConvertShapeToLength(fShapeY); + std::vector shapeX1_padded(D, 1); + std::vector shapeX2_padded(D, 1); + { + size_t off1 = D - fShapeX1.size(); + for (size_t i = 0; i < fShapeX1.size(); ++i) + shapeX1_padded[off1 + i] = fShapeX1[i]; + size_t off2 = D - fShapeX2.size(); + for (size_t i = 0; i < fShapeX2.size(); ++i) + shapeX2_padded[off2 + i] = fShapeX2[i]; + } + + auto stridesX1 = UTILITY::ComputeStrideFromShape(shapeX1_padded); + auto stridesX2 = UTILITY::ComputeStrideFromShape(shapeX2_padded); + auto stridesY = UTILITY::ComputeStrideFromShape(fShapeY); + + std::string type1 = ConvertTypeToString(fTensorType1); + std::string type2 = ConvertTypeToString(fTensorType2); + std::string kname = "ComparisonKernel_" + opName; + std::string opname = ComparisionTrait::Name(); + + std::string op; + op = "\n//------ " + opname + "_KERNEL_ALPAKA\n"; + op += SP + "struct " + kname + " {\n"; + op += SP + SP + "template\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(\n"; + op += SP + SP + SP + "TAcc const& acc,\n"; + op += SP + SP + SP + type1 + " const* __restrict__ x1,\n"; + op += SP + SP + SP + type2 + " const* __restrict__ x2,\n"; + op += SP + SP + SP + "uint8_t* __restrict__ output,\n"; + op += SP + SP + SP + "std::size_t const totalElements) const {\n\n"; + + op += SP + SP + SP + "auto const global_thread_idx = alpaka::getIdx(acc)[0];\n"; + op += SP + SP + SP + "if (global_thread_idx >= totalElements) return;\n"; + op += SP + SP + SP + "auto const grid_thread_extent = alpaka::getWorkDiv(acc)[0];\n\n"; + + op += SP + SP + SP + "for (std::size_t elem_idx = global_thread_idx; elem_idx < totalElements; elem_idx += grid_thread_extent) {\n\n"; + + for (std::size_t d = 0; d < D; ++d) { + op += SP + SP + SP + SP + "std::size_t const out_" + std::to_string(d) + + " = (elem_idx / " + std::to_string(stridesY[d]) + "u) % " + + std::to_string(fShapeY[d]) + "u;\n"; + } + op += "\n"; + + op += SP + SP + SP + SP + "std::size_t const x1_idx =\n"; + for (std::size_t d = 0; d < D; ++d) { + if (shapeX1_padded[d] == 1) + op += SP + SP + SP + SP + SP + "0u"; + else + op += SP + SP + SP + SP + SP + + "out_" + std::to_string(d) + + " * " + std::to_string(stridesX1[d]) + "u"; + op += (d + 1 < D) ? " +\n" : ";\n\n"; + } + + op += SP + SP + SP + SP + "std::size_t const x2_idx =\n"; + for (std::size_t d = 0; d < D; ++d) { + if (shapeX2_padded[d] == 1) + op += SP + SP + SP + SP + SP + "0u"; + else + op += SP + SP + SP + SP + SP + + "out_" + std::to_string(d) + + " * " + std::to_string(stridesX2[d]) + "u"; + op += (d + 1 < D) ? " +\n" : ";\n\n"; + } + + op += SP + SP + SP + SP + "output[elem_idx] = "+ ComparisionTrait::Op("x1[x1_idx]" , "x2[x2_idx]") + " ;\n"; + op += SP + SP + SP + "}\n"; + op += SP + SP + "}\n"; + op += SP + "};\n"; + + return op; + } + + std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string opName) override { + if (fIsOutputConstant) return ""; + opName = "op_" + opName; + std::string kname = "ComparisonKernel_" + opName; + return SP + kname + " comparisonKernel_" + opName + ";\n"; + } + + std::string Generate_GPU_ALPAKA(std::string opName) override { + if (fIsOutputConstant) return ""; + opName = "op_" + opName; + if (fShapeY.empty()) + throw std::runtime_error("TMVA SOFIE Comparision Op called to Generate without being initialized first"); + + std::size_t totalElements = ConvertShapeToLength(fShapeY); + std::string kname = "comparisonKernel_" + opName; + + std::stringstream out; + out << "\n//------ " << ComparisionTrait::Name() << "_GPU_ALPAKA\n"; + out << SP << "auto const elementsPerThread_" << opName << " = Vec::all(static_cast(1));\n"; + out << SP << "auto const elementsPerGrid_" << opName << " = Vec::all(Idx{" << totalElements << "});\n"; + out << SP << "alpaka::KernelCfg const kernelCfg_" << opName + << " = {elementsPerGrid_" << opName << ", elementsPerThread_" << opName << "};\n"; + out << SP << "auto const workDiv_" << opName << " = alpaka::getValidWorkDiv(kernelCfg_" << opName + << ", devAcc, " << kname + << ", alpaka::getPtrNative(deviceBuf_" << fNX1 << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNX2 << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" + << ", static_cast(" << totalElements << "));\n"; + out << SP << "auto task_" << opName << " = alpaka::createTaskKernel(workDiv_" << opName + << ", " << kname + << ", alpaka::getPtrNative(deviceBuf_" << fNX1 << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNX2 << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" + << ", static_cast(" << totalElements << "));\n"; + out << SP << "alpaka::enqueue(queue, task_" << opName << ");\n"; + return out.str(); } diff --git a/src/SOFIE_core/src/RModel_ALPAKA.cxx b/src/SOFIE_core/src/RModel_ALPAKA.cxx index 9690471..4eb6a3d 100644 --- a/src/SOFIE_core/src/RModel_ALPAKA.cxx +++ b/src/SOFIE_core/src/RModel_ALPAKA.cxx @@ -59,13 +59,6 @@ void RModel::GenerateGPU_ALPAKA_Buffers() { std::string tensor_declaration_block = ""; for (auto &i : fIntermediateTensorInfos) { - if (i.second.type == ETensorType::BOOL) { - tensor_declaration_block += "std::vector fTensor_" + i.first + - " = std::vector(" + - std::to_string(ConvertShapeToLength(i.second.shape)) + - ");\n"; - // No pointer allocation needed for BOOL - } size_t length = ConvertShapeToLength(i.second.shape); @@ -81,6 +74,10 @@ void RModel::GenerateGPU_ALPAKA_Buffers() { tensor_declaration_block += "BufI641D deviceBuf_" + i.first + " = alpaka::allocBuf(devAcc, Ext1D::all(Idx{" + std::to_string(length) + "}));\n"; + } else if (i.second.type == ETensorType::BOOL) { + tensor_declaration_block += "BufUI81D deviceBuf_" + i.first + + " = alpaka::allocBuf(devAcc, Ext1D::all(Idx{" + + std::to_string(length) + "}));\n"; } } @@ -279,6 +276,7 @@ void RModel::GenerateSessionCode_GPU_ALPAKA() { fGC += "using BufF1D = alpaka::Buf;\n"; fGC += "using BufD1D = alpaka::Buf;\n"; fGC += "using BufI641D = alpaka::Buf;\n\n"; + fGC += "using BufUI81D = alpaka::Buf;\n\n"; fGC += "\nalpaka::Platform const platform{};\n"; fGC += "DevAcc devAcc = alpaka::getDevByIdx(platform, 0);\n"; diff --git a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx index 3d66c96..6238643 100644 --- a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx +++ b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx @@ -52,6 +52,17 @@ #include "GatherND_NegativeIndices_FromONNX_GPU_ALPAKA.hxx" #include "GatherND_Batch_FromONNX_GPU_ALPAKA.hxx" +#include "Equal_FromONNX_GPU_ALPAKA.hxx" +#include "LessOrEqual_FromONNX_GPU_ALPAKA.hxx" +#include "GreaterOrEqual_FromONNX_GPU_ALPAKA.hxx" +#include "Greater_FromONNX_GPU_ALPAKA.hxx" +#include "Less_FromONNX_GPU_ALPAKA.hxx" +#include "input_models/references/Equal.ref.hxx" +#include "input_models/references/LessOrEqual.ref.hxx" +#include "input_models/references/GreaterOrEqual.ref.hxx" +#include "input_models/references/Greater.ref.hxx" +#include "input_models/references/Less.ref.hxx" + #include #include #include @@ -1094,3 +1105,189 @@ TEST_F(SofieAlpakaTest, GatherND_Batch) for (size_t i = 0; i < expected.size(); ++i) EXPECT_LE(std::abs(res[i] - expected[i]), TOLERANCE) << "i=" << i; } + +TEST_F(SofieAlpakaTest, Equal) +{ + std::vector input1 = {1.0f, 2.0f, 3.0f}; + std::vector input2 = {4.0f, 2.0f, 6.0f}; + const std::size_t outputSize = sizeof(Equal_ExpectedOutput::outputs) / sizeof(bool); + + auto input1_h = alpaka::allocBuf(host, Ext1D::all(Idx{input1.size()})); + auto input2_h = alpaka::allocBuf(host, Ext1D::all(Idx{input2.size()})); + float* in1_ptr = reinterpret_cast(alpaka::getPtrNative(input1_h)); + float* in2_ptr = reinterpret_cast(alpaka::getPtrNative(input2_h)); + for (Idx i = 0; i < input1.size(); ++i) in1_ptr[i] = input1[i]; + for (Idx i = 0; i < input2.size(); ++i) in2_ptr[i] = input2[i]; + + auto input1_d = alpaka::allocBuf(device, Ext1D::all(Idx{input1.size()})); + auto input2_d = alpaka::allocBuf(device, Ext1D::all(Idx{input2.size()})); + alpaka::memcpy(queue, input1_d, input1_h); + alpaka::memcpy(queue, input2_d, input2_h); + alpaka::wait(queue); + + // Output is bool — allocate as bool buffer + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{outputSize})); + + { + SOFIE_Equal::Session session("Equal_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input1_d, input2_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + uint8_t* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + bool* correct = Equal_ExpectedOutput::outputs; + EXPECT_EQ(outputSize, sizeof(Equal_ExpectedOutput::outputs) / sizeof(bool)); + for (size_t i = 0; i < outputSize; ++i) + EXPECT_EQ(res_ptr[i], correct[i]) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, LessOrEqual) +{ + std::vector input1 = {1.0f, 2.0f, 3.0f}; + std::vector input2 = {4.0f, 2.0f, 6.0f}; + const std::size_t outputSize = sizeof(LessOrEqual_ExpectedOutput::outputs) / sizeof(bool); + + auto input1_h = alpaka::allocBuf(host, Ext1D::all(Idx{input1.size()})); + auto input2_h = alpaka::allocBuf(host, Ext1D::all(Idx{input2.size()})); + float* in1_ptr = reinterpret_cast(alpaka::getPtrNative(input1_h)); + float* in2_ptr = reinterpret_cast(alpaka::getPtrNative(input2_h)); + for (Idx i = 0; i < input1.size(); ++i) in1_ptr[i] = input1[i]; + for (Idx i = 0; i < input2.size(); ++i) in2_ptr[i] = input2[i]; + + auto input1_d = alpaka::allocBuf(device, Ext1D::all(Idx{input1.size()})); + auto input2_d = alpaka::allocBuf(device, Ext1D::all(Idx{input2.size()})); + alpaka::memcpy(queue, input1_d, input1_h); + alpaka::memcpy(queue, input2_d, input2_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{outputSize})); + + { + SOFIE_LessOrEqual::Session session("LessOrEqual_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input1_d, input2_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + uint8_t* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + bool* correct = LessOrEqual_ExpectedOutput::outputs; + EXPECT_EQ(outputSize, sizeof(LessOrEqual_ExpectedOutput::outputs) / sizeof(bool)); + for (size_t i = 0; i < outputSize; ++i) + EXPECT_EQ(res_ptr[i], correct[i]) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, GreaterOrEqual) +{ + std::vector input1 = {1.0f, 2.0f, 3.0f}; + std::vector input2 = {4.0f, 2.0f, 6.0f}; + const std::size_t outputSize = sizeof(GreaterOrEqual_ExpectedOutput::outputs) / sizeof(bool); + + auto input1_h = alpaka::allocBuf(host, Ext1D::all(Idx{input1.size()})); + auto input2_h = alpaka::allocBuf(host, Ext1D::all(Idx{input2.size()})); + float* in1_ptr = reinterpret_cast(alpaka::getPtrNative(input1_h)); + float* in2_ptr = reinterpret_cast(alpaka::getPtrNative(input2_h)); + for (Idx i = 0; i < input1.size(); ++i) in1_ptr[i] = input1[i]; + for (Idx i = 0; i < input2.size(); ++i) in2_ptr[i] = input2[i]; + + auto input1_d = alpaka::allocBuf(device, Ext1D::all(Idx{input1.size()})); + auto input2_d = alpaka::allocBuf(device, Ext1D::all(Idx{input2.size()})); + alpaka::memcpy(queue, input1_d, input1_h); + alpaka::memcpy(queue, input2_d, input2_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{outputSize})); + + { + SOFIE_GreaterOrEqual::Session session("GreaterOrEqual_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input1_d, input2_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + uint8_t* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + bool* correct = GreaterOrEqual_ExpectedOutput::outputs; + EXPECT_EQ(outputSize, sizeof(GreaterOrEqual_ExpectedOutput::outputs) / sizeof(bool)); + for (size_t i = 0; i < outputSize; ++i) + EXPECT_EQ(res_ptr[i], correct[i]) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, Greater) +{ + std::vector input1 = {1.0f, 2.0f, 3.0f}; + std::vector input2 = {4.0f, 2.0f, 6.0f}; + const std::size_t outputSize = sizeof(Greater_ExpectedOutput::outputs) / sizeof(bool); + + auto input1_h = alpaka::allocBuf(host, Ext1D::all(Idx{input1.size()})); + auto input2_h = alpaka::allocBuf(host, Ext1D::all(Idx{input2.size()})); + float* in1_ptr = reinterpret_cast(alpaka::getPtrNative(input1_h)); + float* in2_ptr = reinterpret_cast(alpaka::getPtrNative(input2_h)); + for (Idx i = 0; i < input1.size(); ++i) in1_ptr[i] = input1[i]; + for (Idx i = 0; i < input2.size(); ++i) in2_ptr[i] = input2[i]; + + auto input1_d = alpaka::allocBuf(device, Ext1D::all(Idx{input1.size()})); + auto input2_d = alpaka::allocBuf(device, Ext1D::all(Idx{input2.size()})); + alpaka::memcpy(queue, input1_d, input1_h); + alpaka::memcpy(queue, input2_d, input2_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{outputSize})); + + { + SOFIE_Greater::Session session("Greater_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input1_d, input2_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + uint8_t* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + bool* correct = Greater_ExpectedOutput::outputs; + EXPECT_EQ(outputSize, sizeof(Greater_ExpectedOutput::outputs) / sizeof(bool)); + for (size_t i = 0; i < outputSize; ++i) + EXPECT_EQ(res_ptr[i], correct[i]) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, Less) +{ + std::vector input1 = {1.0f, 2.0f, 3.0f}; + std::vector input2 = {4.0f, 2.0f, 6.0f}; + const std::size_t outputSize = sizeof(Less_ExpectedOutput::outputs) / sizeof(bool); + + auto input1_h = alpaka::allocBuf(host, Ext1D::all(Idx{input1.size()})); + auto input2_h = alpaka::allocBuf(host, Ext1D::all(Idx{input2.size()})); + float* in1_ptr = reinterpret_cast(alpaka::getPtrNative(input1_h)); + float* in2_ptr = reinterpret_cast(alpaka::getPtrNative(input2_h)); + for (Idx i = 0; i < input1.size(); ++i) in1_ptr[i] = input1[i]; + for (Idx i = 0; i < input2.size(); ++i) in2_ptr[i] = input2[i]; + + auto input1_d = alpaka::allocBuf(device, Ext1D::all(Idx{input1.size()})); + auto input2_d = alpaka::allocBuf(device, Ext1D::all(Idx{input2.size()})); + alpaka::memcpy(queue, input1_d, input1_h); + alpaka::memcpy(queue, input2_d, input2_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{outputSize})); + + { + SOFIE_Less::Session session("Less_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input1_d, input2_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + uint8_t* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + bool* correct = Less_ExpectedOutput::outputs; + EXPECT_EQ(outputSize, sizeof(Less_ExpectedOutput::outputs) / sizeof(bool)); + for (size_t i = 0; i < outputSize; ++i) + EXPECT_EQ(res_ptr[i], correct[i]) << "i=" << i; +} From 6ba3f29440b38e727b3d48475b13aa10362c4ac7 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Mon, 13 Apr 2026 10:13:38 +0200 Subject: [PATCH 34/43] feat: Support for heterogeneous inference for slice and unary operators --- src/SOFIE_core/inc/SOFIE/ROperator.hxx | 8 + .../inc/SOFIE/ROperator_BasicUnary.hxx | 52 ++- src/SOFIE_core/inc/SOFIE/ROperator_Slice.hxx | 96 ++++ src/SOFIE_core/src/RModel_ALPAKA.cxx | 21 +- .../test/TestCustomModelsFromONNX.cxx | 1 + .../TestCustomModelsFromONNXForAlpakaCuda.cxx | 429 ++++++++++++++++++ 6 files changed, 593 insertions(+), 14 deletions(-) diff --git a/src/SOFIE_core/inc/SOFIE/ROperator.hxx b/src/SOFIE_core/inc/SOFIE/ROperator.hxx index 6c9a812..242dda6 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator.hxx @@ -29,6 +29,14 @@ enum class OperatorKind { TANH=13, SOFTMAX=14, LEAKYRELU=15, + UNARY_RECIPROCAL=16, + UNARY_SQRT=17, + UNARY_NEG=18, + UNARY_EXP=19, + UNARY_LOG=20, + UNARY_SIN=21, + UNARY_COS=22, + UNARY_ABS=23 }; inline const char* toString(OperatorKind kind) { diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_BasicUnary.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_BasicUnary.hxx index b98ded5..529a190 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_BasicUnary.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_BasicUnary.hxx @@ -77,6 +77,33 @@ public: ROperator_BasicUnary(std::string nameX, std::string nameY) : fNX(UTILITY::Clean_name(nameX)), fNY(UTILITY::Clean_name(nameY)) { + + switch(Op) { + case EBasicUnaryOperator::kReciprocal: + fKind = OperatorKind::UNARY_RECIPROCAL; + break; + case EBasicUnaryOperator::kSqrt: + fKind = OperatorKind::UNARY_SQRT; + break; + case EBasicUnaryOperator::kNeg: + fKind = OperatorKind::UNARY_NEG; + break; + case EBasicUnaryOperator::kExp: + fKind = OperatorKind::UNARY_EXP; + break; + case EBasicUnaryOperator::kLog: + fKind = OperatorKind::UNARY_LOG; + break; + case EBasicUnaryOperator::kSin: + fKind = OperatorKind::UNARY_SIN; + break; + case EBasicUnaryOperator::kCos: + fKind = OperatorKind::UNARY_COS; + break; + case EBasicUnaryOperator::kAbs: + fKind = OperatorKind::UNARY_ABS; + break; + } fInputTensorNames = { fNX }; fOutputTensorNames = { fNY }; } @@ -107,20 +134,24 @@ public: return out.str(); } - std::string Generate_GPU_Kernel_ALPAKA() { + std::string Generate_GPU_Kernel_ALPAKA(std::string /*OpName*/) override { + if (fIsOutputConstant) + return ""; + std::string op; op = "\n//------ " + UnaryOpTraits::Name() + "_KERNEL_ALPAKA\n"; op += SP + "struct Unary" + UnaryOpTraits::Name() + "Kernel{\n"; op += SP + SP + "template\n"; - op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, T* data, std::size_t numElements) const {\n"; - op += SP + SP + SP + "for (auto i : alpaka::uniformElements(acc, numElements)) {\n"; - op += SP + SP + SP + "data[i] = " << UnaryOpTraits::Op("data[i]") << ";\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, T const * data, T * output, std::size_t const length) const {\n"; + op += SP + SP + SP + "auto idx = alpaka::getIdx(acc)[0];\n"; + op += SP + SP + SP + "if (idx < length) {\n"; + op += SP + SP + SP + "output[idx] = " +UnaryOpTraits::Op("data[idx]") + ";\n"; op += SP + SP + "}\n"; op += SP + "}\n};\n"; return op; } - std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string /*opName*/) override { + std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string /*OpName*/) override { return SP + "Unary" + UnaryOpTraits::Name() + "Kernel " + UnaryOpTraits::Name() + "Kernel;\n"; } @@ -129,8 +160,15 @@ public: std::stringstream out; auto length = ConvertShapeToLength(fShapeX); out << "\n//------ "+OpName+"_ALPAKA\n"; - out << SP << "alpaka::WorkDivMembers workDiv_"<::all("<<(length+255)/256<<"), alpaka::Vec::all(256), alpaka::Vec::all(1));\n"; - out << SP << "alpaka::exec(queue, workDiv_" << fNX << ", " << UnaryOpTraits::Name() << "Kernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), static_cast(" << length << ")); \n"; + out << SP << "auto const elementsPerThread_"<(1));\n"; + out << SP << "auto const elementsPerGrid_"< const kernelCfg_" << fNY << " = {elementsPerGrid_" << fNY << ", elementsPerThread_" << fNY << "};\n"; + out << SP << "auto const workDiv_" << fNY << " = alpaka::getValidWorkDiv(kernelCfg_" << fNY << ", devAcc, " << UnaryOpTraits::Name() << "Kernel, alpaka::getPtrNative(deviceBuf_" << fNX + << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), " << length << ");\n"; + out << SP << "auto task_" << OpName << " = alpaka::createTaskKernel(workDiv_" << fNY + << ", " << UnaryOpTraits::Name() << "Kernel, alpaka::getPtrNative(deviceBuf_" << fNX + << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), " << length << ");\n"; + out << SP << "alpaka::enqueue(queue, task_" << OpName << ");\n"; return out.str(); } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Slice.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Slice.hxx index 6d40003..127adaa 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Slice.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Slice.hxx @@ -255,6 +255,102 @@ public: return out.str(); } + std::string Generate_GPU_Kernel_ALPAKA(std::string opName) override { + if (fIsOutputConstant) return ""; + opName = "op_" + opName; + if (fShapeInput.empty() || fShapeOutput.empty()) + throw std::runtime_error("TMVA SOFIE Slice Op called to Generate without being initialized first"); + + const std::size_t D = fShapeInput.size(); + + auto inputStrides = UTILITY::ComputeStrideFromShape(fShapeInput); + auto outputStrides = UTILITY::ComputeStrideFromShape(fShapeOutput); + + std::size_t totalElements = ConvertShapeToLength(fShapeOutput); + std::string kname = "SliceKernel_" + opName; + + std::string op; + op = "\n//------ SLICE_KERNEL_ALPAKA\n"; + op += SP + "struct " + kname + " {\n"; + op += SP + SP + "template\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(\n"; + op += SP + SP + SP + "TAcc const& acc,\n"; + op += SP + SP + SP + "T const* __restrict__ input,\n"; + op += SP + SP + SP + "T* __restrict__ output,\n"; + op += SP + SP + SP + "std::size_t const totalElements) const {\n\n"; + + op += SP + SP + SP + "auto const global_thread_idx = alpaka::getIdx(acc)[0];\n"; + op += SP + SP + SP + "if (global_thread_idx >= totalElements) return;\n"; + op += SP + SP + SP + "auto const grid_thread_extent = alpaka::getWorkDiv(acc)[0];\n\n"; + + op += SP + SP + SP + "for (std::size_t elem_idx = global_thread_idx; elem_idx < totalElements; elem_idx += grid_thread_extent) {\n\n"; + + for (std::size_t d = 0; d < D; ++d) { + op += SP + SP + SP + SP + "std::size_t const out_" + std::to_string(d) + + " = (elem_idx / " + std::to_string(outputStrides[d]) + "u) % " + + std::to_string(fShapeOutput[d]) + "u;\n"; + } + op += "\n"; + + // Map each output coord back to input coord: + // input_coord[d] = fStart[d] + out_d * fSteps[d] + // Negative steps are supported naturally since fStart/fEnd/fSteps are + // already corrected for negative/default values during Initialize(). + op += SP + SP + SP + SP + "std::size_t const input_idx =\n"; + for (std::size_t d = 0; d < D; ++d) { + // input coordinate for this dim: start + out_d * step + std::string input_coord = "(" + std::to_string(fStart[d]) + + " + out_" + std::to_string(d) + + " * " + std::to_string(fSteps[d]) + ")"; + op += SP + SP + SP + SP + SP + + "static_cast(" + input_coord + ")" + + " * " + std::to_string(inputStrides[d]) + "u"; + op += (d + 1 < D) ? " +\n" : ";\n\n"; + } + + op += SP + SP + SP + SP + "output[elem_idx] = input[input_idx];\n"; + op += SP + SP + SP + "}\n"; + op += SP + SP + "}\n"; + op += SP + "};\n"; + return op; + } + + std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string opName) override { + if (fIsOutputConstant) return ""; + opName = "op_" + opName; + std::string kname = "SliceKernel_" + opName; + return SP + kname + " sliceKernel_" + opName + ";\n"; + } + + std::string Generate_GPU_ALPAKA(std::string opName) override { + if (fIsOutputConstant) return ""; + opName = "op_" + opName; + if (fShapeInput.empty() || fShapeOutput.empty()) + throw std::runtime_error("TMVA SOFIE Slice Op called to Generate without being initialized first"); + + std::size_t totalElements = ConvertShapeToLength(fShapeOutput); + std::string kname = "sliceKernel_" + opName; + + std::stringstream out; + out << "\n//------ SLICE_GPU_ALPAKA\n"; + out << SP << "auto const elementsPerThread_" << opName << " = Vec::all(static_cast(1));\n"; + out << SP << "auto const elementsPerGrid_" << opName << " = Vec::all(Idx{" << totalElements << "});\n"; + out << SP << "alpaka::KernelCfg const kernelCfg_" << opName + << " = {elementsPerGrid_" << opName << ", elementsPerThread_" << opName << "};\n"; + out << SP << "auto const workDiv_" << opName << " = alpaka::getValidWorkDiv(kernelCfg_" << opName + << ", devAcc, " << kname + << ", alpaka::getPtrNative(deviceBuf_" << fNData << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNOutput << ")" + << ", static_cast(" << totalElements << "));\n"; + out << SP << "alpaka::exec(queue, workDiv_" << opName + << ", " << kname + << ", alpaka::getPtrNative(deviceBuf_" << fNData << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNOutput << ")" + << ", static_cast(" << totalElements << "));\n"; + + return out.str(); + } + }; }//SOFIE diff --git a/src/SOFIE_core/src/RModel_ALPAKA.cxx b/src/SOFIE_core/src/RModel_ALPAKA.cxx index 4eb6a3d..6989983 100644 --- a/src/SOFIE_core/src/RModel_ALPAKA.cxx +++ b/src/SOFIE_core/src/RModel_ALPAKA.cxx @@ -231,7 +231,16 @@ void RModel::GenerateSessionCode_GPU_ALPAKA() { SOFIE::OperatorKind::EINSUM, SOFIE::OperatorKind::COMPARISON, SOFIE::OperatorKind::ELU, + SOFIE::OperatorKind::UNARY_RECIPROCAL, + SOFIE::OperatorKind::UNARY_SQRT, + SOFIE::OperatorKind::UNARY_NEG, + SOFIE::OperatorKind::UNARY_EXP, + SOFIE::OperatorKind::UNARY_LOG, + SOFIE::OperatorKind::UNARY_SIN, + SOFIE::OperatorKind::UNARY_COS, + SOFIE::OperatorKind::UNARY_ABS }; + bool OpNeedsBlas = false; // single initiation operators must only be initialized only once and their count should be stored in the registered_operators set to avoid generating multiple kernels for the same operator kind @@ -241,14 +250,12 @@ void RModel::GenerateSessionCode_GPU_ALPAKA() { OpNeedsBlas = true; } if(single_initialized_operators.find(fOperators[id]->GetKind()) != single_initialized_operators.end()) { - if(registered_operators.find(fOperators[id]->GetKind()) == registered_operators.end()) { + if (fVerbose) + std::cout<<"Generating ALPAKA kernel for operator"<< toString(fOperators[id]->GetKind()) << std::endl; - if (fVerbose) - std::cout<<"Generating ALPAKA kernel for operator"<< toString(fOperators[id]->GetKind()) << std::endl; - - fGC += fOperators[id]->Generate_GPU_Kernel_ALPAKA(std::to_string(id)); - registered_operators.insert(fOperators[id]->GetKind()); + fGC += fOperators[id]->Generate_GPU_Kernel_ALPAKA(std::to_string(id)); + registered_operators.insert(fOperators[id]->GetKind()); } } else { if (fVerbose) @@ -275,7 +282,7 @@ void RModel::GenerateSessionCode_GPU_ALPAKA() { fGC += "using QueueAcc = alpaka::Queue;\n\n"; fGC += "using BufF1D = alpaka::Buf;\n"; fGC += "using BufD1D = alpaka::Buf;\n"; - fGC += "using BufI641D = alpaka::Buf;\n\n"; + fGC += "using BufI641D = alpaka::Buf;\n"; fGC += "using BufUI81D = alpaka::Buf;\n\n"; fGC += "\nalpaka::Platform const platform{};\n"; diff --git a/src/SOFIE_core/test/TestCustomModelsFromONNX.cxx b/src/SOFIE_core/test/TestCustomModelsFromONNX.cxx index 14eb6a3..902cbcc 100644 --- a/src/SOFIE_core/test/TestCustomModelsFromONNX.cxx +++ b/src/SOFIE_core/test/TestCustomModelsFromONNX.cxx @@ -2849,6 +2849,7 @@ TEST(ONNX, Slice_Neg) { } } + TEST(ONNX, RangeFloat) { constexpr float TOLERANCE = DEFAULT_TOLERANCE; diff --git a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx index 6238643..bf4123c 100644 --- a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx +++ b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx @@ -63,6 +63,29 @@ #include "input_models/references/Greater.ref.hxx" #include "input_models/references/Less.ref.hxx" +#include "Slice_FromONNX_GPU_ALPAKA.hxx" +#include "Slice_Default_Axis_FromONNX_GPU_ALPAKA.hxx" +#include "Slice_Default_Steps_FromONNX_GPU_ALPAKA.hxx" +#include "Slice_Neg_FromONNX_GPU_ALPAKA.hxx" +#include "input_models/references/Slice.ref.hxx" +#include "input_models/references/Slice_Default_Axis.ref.hxx" +#include "input_models/references/Slice_Default_Steps.ref.hxx" +#include "input_models/references/Slice_Neg.ref.hxx" + +#include "Sin_FromONNX_GPU_ALPAKA.hxx" +#include "Cos_FromONNX_GPU_ALPAKA.hxx" +#include "Abs_FromONNX_GPU_ALPAKA.hxx" +#include "Sqrt_FromONNX_GPU_ALPAKA.hxx" +#include "Reciprocal_FromONNX_GPU_ALPAKA.hxx" +#include "Exp_FromONNX_GPU_ALPAKA.hxx" +#include "Log_FromONNX_GPU_ALPAKA.hxx" +#include "Neg_FromONNX_GPU_ALPAKA.hxx" +#include "input_models/references/Sqrt.ref.hxx" +#include "input_models/references/Reciprocal.ref.hxx" +#include "input_models/references/Exp.ref.hxx" +#include "input_models/references/Log.ref.hxx" +#include "input_models/references/Neg.ref.hxx" + #include #include #include @@ -1291,3 +1314,409 @@ TEST_F(SofieAlpakaTest, Less) for (size_t i = 0; i < outputSize; ++i) EXPECT_EQ(res_ptr[i], correct[i]) << "i=" << i; } + +TEST_F(SofieAlpakaTest, Slice) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector input = Slice::input; + const std::size_t outputSize = sizeof(Slice::output) / sizeof(float); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{outputSize})); + + { + SOFIE_Slice::Session session("Slice_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float* correct = Slice::output; + EXPECT_EQ(outputSize, sizeof(Slice::output) / sizeof(float)); + for (size_t i = 0; i < outputSize; ++i) + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, Slice_Default_Axis) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector input = Slice_Default_Axis::input; + const std::size_t outputSize = sizeof(Slice_Default_Axis::output) / sizeof(float); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{outputSize})); + + { + SOFIE_Slice_Default_Axis::Session session("Slice_Default_Axis_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float* correct = Slice_Default_Axis::output; + EXPECT_EQ(outputSize, sizeof(Slice_Default_Axis::output) / sizeof(float)); + for (size_t i = 0; i < outputSize; ++i) + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, Slice_Default_Steps) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector input = Slice_Default_Steps::input; + const std::size_t outputSize = sizeof(Slice_Default_Steps::output) / sizeof(float); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{outputSize})); + + { + SOFIE_Slice_Default_Steps::Session session("Slice_Default_Steps_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float* correct = Slice_Default_Steps::output; + EXPECT_EQ(outputSize, sizeof(Slice_Default_Steps::output) / sizeof(float)); + for (size_t i = 0; i < outputSize; ++i) + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, Slice_Neg) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector input = Slice_Neg::input; + const std::size_t outputSize = sizeof(Slice_Neg::output) / sizeof(float); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{outputSize})); + + { + SOFIE_Slice_Neg::Session session("Slice_Neg_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float* correct = Slice_Neg::output; + EXPECT_EQ(outputSize, sizeof(Slice_Neg::output) / sizeof(float)); + for (size_t i = 0; i < outputSize; ++i) + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, Sin) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector input({ + -0.786738f, -0.197796f, -0.187787f, 0.142758f, + 0.876096f, -0.653239f, 0.145444f, -1.107658f, + 2.259171f, -0.947054f, -0.506689f, 1.801250f + }); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + + { + SOFIE_Sin::Session session; + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + EXPECT_EQ(input.size(), 12u); + for (size_t i = 0; i < input.size(); ++i) + EXPECT_LE(std::abs(res_ptr[i] - std::sin(input[i])), TOLERANCE) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, Cos) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector input({ + 1.152504f, -1.459324f, 0.691594f, 0.347690f, + -1.307323f, 1.832516f, -1.261772f, 0.014224f, + 1.311477f, 1.147405f, -0.567206f, -0.530606f + }); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + + { + SOFIE_Cos::Session session; + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + EXPECT_EQ(input.size(), 12u); + for (size_t i = 0; i < input.size(); ++i) + EXPECT_LE(std::abs(res_ptr[i] - std::cos(input[i])), TOLERANCE) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, Abs) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector input({1.f, -2.f, -3.f, 4.f, -5.f, 6.f}); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + + { + SOFIE_Abs::Session session; + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + EXPECT_EQ(input.size(), 6u); + for (size_t i = 0; i < input.size(); ++i) + EXPECT_LE(std::abs(res_ptr[i] - std::abs(input[i])), TOLERANCE) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, Sqrt) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector input({0.8344f, 0.4716f, 0.6226f, 0.8448f, 0.2483f, 0.9467f}); + const std::size_t outputSize = sizeof(Sqrt_ExpectedOutput::output) / sizeof(float); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{outputSize})); + + { + SOFIE_Sqrt::Session session("Sqrt_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float* correct = Sqrt_ExpectedOutput::output; + EXPECT_EQ(outputSize, sizeof(Sqrt_ExpectedOutput::output) / sizeof(float)); + for (size_t i = 0; i < outputSize; ++i) + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, Reciprocal) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector input({1.2691f, -1.2160f, 0.6393f, -0.4438f, 0.8065f, 0.2011f}); + const std::size_t outputSize = sizeof(Reciprocal_ExpectedOutput::output) / sizeof(float); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{outputSize})); + + { + SOFIE_Reciprocal::Session session("Reciprocal_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float* correct = Reciprocal_ExpectedOutput::output; + EXPECT_EQ(outputSize, sizeof(Reciprocal_ExpectedOutput::output) / sizeof(float)); + for (size_t i = 0; i < outputSize; ++i) + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, Exp) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector input({ + 1.46566453f, 0.63334515f, 2.4048165f, 0.54468453f, + -1.41271672f, -0.18609187f, 0.2754482f, 1.10615209f, + 0.88474389f, 0.47531232f + }); + const std::size_t outputSize = sizeof(Exp_ExpectedOutput::output) / sizeof(float); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{outputSize})); + + { + SOFIE_Exp::Session session("Exp_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float* correct = Exp_ExpectedOutput::output; + EXPECT_EQ(outputSize, sizeof(Exp_ExpectedOutput::output) / sizeof(float)); + for (size_t i = 0; i < outputSize; ++i) + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, Log) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector input({1.f, 2.f, 3.f, 4.f}); + const std::size_t outputSize = sizeof(Log_ExpectedOutput::outputs) / sizeof(float); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{outputSize})); + + { + SOFIE_Log::Session session("Log_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float* correct = Log_ExpectedOutput::outputs; + EXPECT_EQ(outputSize, sizeof(Log_ExpectedOutput::outputs) / sizeof(float)); + for (size_t i = 0; i < outputSize; ++i) + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, Neg) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector input({ + -1.9100f, 1.8811f, -1.7269f, -0.1094f, + -0.0145f, 0.2509f, 0.5893f, -2.2733f, + -0.7077f, 1.0645f, -0.8607f, 0.2085f + }); + const std::size_t outputSize = sizeof(Neg_ExpectedOutput::outputs) / sizeof(float); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{outputSize})); + + { + SOFIE_Neg::Session session("Neg_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float* correct = Neg_ExpectedOutput::outputs; + EXPECT_EQ(outputSize, sizeof(Neg_ExpectedOutput::outputs) / sizeof(float)); + for (size_t i = 0; i < outputSize; ++i) + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i; +} \ No newline at end of file From 7f27cd4a1483b031bcdd3a8a2732a5448c919091 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Mon, 13 Apr 2026 11:29:13 +0200 Subject: [PATCH 35/43] feat: Support for heterogeneous inference for where operator --- src/SOFIE_core/inc/SOFIE/ROperator_Where.hxx | 136 ++++++++++++++++++ src/SOFIE_core/src/RModel_ALPAKA.cxx | 1 + src/SOFIE_core/test/CMakeLists.txt | 2 +- .../TestCustomModelsFromONNXForAlpakaCuda.cxx | 49 +++++++ 4 files changed, 187 insertions(+), 1 deletion(-) diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Where.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Where.hxx index 2a55700..13a3ce3 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Where.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Where.hxx @@ -285,6 +285,142 @@ public: return out.str(); } + + std::string Generate_GPU_Kernel_ALPAKA(std::string opName) override { + if (fIsOutputConstant) return ""; + opName = "op_" + opName; + if (fShapeY.empty()) + throw std::runtime_error("TMVA SOFIE Where Op called to Generate without being initialized first"); + + const std::size_t D = fShapeY.size(); + std::size_t totalElements = ConvertShapeToLength(fShapeY); + + std::vector shapeA_padded(D, 1); + std::vector shapeB_padded(D, 1); + std::vector shapeC_padded(D, 1); + { + size_t offA = D - fShapeA.size(); + for (size_t i = 0; i < fShapeA.size(); ++i) shapeA_padded[offA + i] = fShapeA[i]; + size_t offB = D - fShapeB.size(); + for (size_t i = 0; i < fShapeB.size(); ++i) shapeB_padded[offB + i] = fShapeB[i]; + size_t offC = D - fShapeC.size(); + for (size_t i = 0; i < fShapeC.size(); ++i) shapeC_padded[offC + i] = fShapeC[i]; + } + + auto stridesA = UTILITY::ComputeStrideFromShape(shapeA_padded); + auto stridesB = UTILITY::ComputeStrideFromShape(shapeB_padded); + auto stridesC = UTILITY::ComputeStrideFromShape(shapeC_padded); + auto stridesY = UTILITY::ComputeStrideFromShape(fShapeY); + + std::string typeName = TensorType::Name(); + std::string kname = "WhereKernel_" + opName; + + std::string op; + op = "\n//------ WHERE_KERNEL_ALPAKA\n"; + op += SP + "struct " + kname + " {\n"; + op += SP + SP + "template\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(\n"; + op += SP + SP + SP + "TAcc const& acc,\n"; + op += SP + SP + SP + "uint8_t const* __restrict__ cond,\n"; + op += SP + SP + SP + "T const* __restrict__ x,\n"; + op += SP + SP + SP + "T const* __restrict__ y,\n"; + op += SP + SP + SP + "T* __restrict__ output,\n"; + op += SP + SP + SP + "std::size_t const totalElements) const {\n\n"; + + op += SP + SP + SP + "auto const global_thread_idx = alpaka::getIdx(acc)[0];\n"; + op += SP + SP + SP + "if (global_thread_idx >= totalElements) return;\n"; + op += SP + SP + SP + "auto const grid_thread_extent = alpaka::getWorkDiv(acc)[0];\n\n"; + + op += SP + SP + SP + "for (std::size_t elem_idx = global_thread_idx; elem_idx < totalElements; elem_idx += grid_thread_extent) {\n\n"; + + for (std::size_t d = 0; d < D; ++d) { + op += SP + SP + SP + SP + "std::size_t const out_" + std::to_string(d) + + " = (elem_idx / " + std::to_string(stridesY[d]) + "u) % " + + std::to_string(fShapeY[d]) + "u;\n"; + } + op += "\n"; + + op += SP + SP + SP + SP + "std::size_t const c_idx =\n"; + for (std::size_t d = 0; d < D; ++d) { + if (shapeC_padded[d] == 1) + op += SP + SP + SP + SP + SP + "0u"; + else + op += SP + SP + SP + SP + SP + + "out_" + std::to_string(d) + + " * " + std::to_string(stridesC[d]) + "u"; + op += (d + 1 < D) ? " +\n" : ";\n\n"; + } + + op += SP + SP + SP + SP + "std::size_t const x_idx =\n"; + for (std::size_t d = 0; d < D; ++d) { + if (shapeA_padded[d] == 1) + op += SP + SP + SP + SP + SP + "0u"; + else + op += SP + SP + SP + SP + SP + + "out_" + std::to_string(d) + + " * " + std::to_string(stridesA[d]) + "u"; + op += (d + 1 < D) ? " +\n" : ";\n\n"; + } + + op += SP + SP + SP + SP + "std::size_t const y_idx =\n"; + for (std::size_t d = 0; d < D; ++d) { + if (shapeB_padded[d] == 1) + op += SP + SP + SP + SP + SP + "0u"; + else + op += SP + SP + SP + SP + SP + + "out_" + std::to_string(d) + + " * " + std::to_string(stridesB[d]) + "u"; + op += (d + 1 < D) ? " +\n" : ";\n\n"; + } + + op += SP + SP + SP + SP + "output[elem_idx] = cond[c_idx] ? x[x_idx] : y[y_idx];\n"; + op += SP + SP + SP + "}\n"; + op += SP + SP + "}\n"; + op += SP + "};\n"; + + return op; + } + + std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string opName) override { + if (fIsOutputConstant) return ""; + opName = "op_" + opName; + std::string kname = "WhereKernel_" + opName; + return SP + kname + " whereKernel_" + opName + ";\n"; + } + + std::string Generate_GPU_ALPAKA(std::string opName) override { + if (fIsOutputConstant) return ""; + opName = "op_" + opName; + if (fShapeY.empty()) + throw std::runtime_error("TMVA SOFIE Where Op called to Generate without being initialized first"); + + std::size_t totalElements = ConvertShapeToLength(fShapeY); + std::string kname = "whereKernel_" + opName; + + std::stringstream out; + out << "\n//------ WHERE_GPU_ALPAKA\n"; + out << SP << "auto const elementsPerThread_" << opName << " = Vec::all(static_cast(1));\n"; + out << SP << "auto const elementsPerGrid_" << opName << " = Vec::all(Idx{" << totalElements << "});\n"; + out << SP << "alpaka::KernelCfg const kernelCfg_" << opName + << " = {elementsPerGrid_" << opName << ", elementsPerThread_" << opName << "};\n"; + out << SP << "auto const workDiv_" << opName << " = alpaka::getValidWorkDiv(kernelCfg_" << opName + << ", devAcc, " << kname + << ", alpaka::getPtrNative(deviceBuf_" << fNC << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNA << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNB << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" + << ", static_cast(" << totalElements << "));\n"; + out << SP << "alpaka::exec(queue, workDiv_" << opName + << ", " << kname + << ", alpaka::getPtrNative(deviceBuf_" << fNC << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNA << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNB << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" + << ", static_cast(" << totalElements << "));\n"; + + return out.str(); + } + }; }//SOFIE diff --git a/src/SOFIE_core/src/RModel_ALPAKA.cxx b/src/SOFIE_core/src/RModel_ALPAKA.cxx index 6989983..5720c37 100644 --- a/src/SOFIE_core/src/RModel_ALPAKA.cxx +++ b/src/SOFIE_core/src/RModel_ALPAKA.cxx @@ -128,6 +128,7 @@ std::string RModel::GenerateInferSignature_GPU_ALPAKA(bool isdecl) { if (type == ETensorType::FLOAT) return "BufF1D"; if (type == ETensorType::DOUBLE) return "BufD1D"; if (type == ETensorType::INT64) return "BufI641D"; + if (type == ETensorType::BOOL) return "BufUI81D"; throw std::runtime_error("TMVA-SOFIE: input tensor " + name + " is of a data type which is not yet supported."); }; diff --git a/src/SOFIE_core/test/CMakeLists.txt b/src/SOFIE_core/test/CMakeLists.txt index fd848df..d7611e0 100644 --- a/src/SOFIE_core/test/CMakeLists.txt +++ b/src/SOFIE_core/test/CMakeLists.txt @@ -31,7 +31,7 @@ set(CAPTURE_STR if (msg.find(\"multiple output tensors are not supported\") != std::string::npos) {\n\ std::cerr << \"[SKIP] Multiple outputs are not supported for @1\" << std::endl;\n\ } else if (msg.find(\"is of a data type which is not yet supported\") != std::string::npos) {\n\ - std::cerr << \"[SKIP] Operatorr with nsupported data type in @1: \" << msg << std::endl;\n\ + std::cerr << \"[SKIP] Operator with unsupported data type in @1: \" << msg << std::endl;\n\ } else {\n\ std::cerr << \"[ERROR] Failed processing @1: \" << msg << std::endl;\n\ failures++;\n\ diff --git a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx index bf4123c..57da2f6 100644 --- a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx +++ b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx @@ -86,6 +86,8 @@ #include "input_models/references/Log.ref.hxx" #include "input_models/references/Neg.ref.hxx" +#include "Where_FromONNX_GPU_ALPAKA.hxx" + #include #include #include @@ -1719,4 +1721,51 @@ TEST_F(SofieAlpakaTest, Neg) EXPECT_EQ(outputSize, sizeof(Neg_ExpectedOutput::outputs) / sizeof(float)); for (size_t i = 0; i < outputSize; ++i) EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, Where) +{ + std::vector input1 = {1.f, 2.f}; + std::vector input2 = {3.f, 4.f, 5.f, 6.f}; + std::vector cond_vec = {true, false, true}; + std::vector correct = {1.f, 2.f, 5.f, 6.f, 1.f, 2.f}; + + auto input1_h = alpaka::allocBuf(host, Ext1D::all(Idx{input1.size()})); + float* in1_ptr = reinterpret_cast(alpaka::getPtrNative(input1_h)); + for (Idx i = 0; i < input1.size(); ++i) in1_ptr[i] = input1[i]; + + auto input1_d = alpaka::allocBuf(device, Ext1D::all(Idx{input1.size()})); + alpaka::memcpy(queue, input1_d, input1_h); + + auto input2_h = alpaka::allocBuf(host, Ext1D::all(Idx{input2.size()})); + float* in2_ptr = reinterpret_cast(alpaka::getPtrNative(input2_h)); + for (Idx i = 0; i < input2.size(); ++i) in2_ptr[i] = input2[i]; + + auto input2_d = alpaka::allocBuf(device, Ext1D::all(Idx{input2.size()})); + alpaka::memcpy(queue, input2_d, input2_h); + + auto cond_h = alpaka::allocBuf(host, Ext1D::all(Idx{cond_vec.size()})); + uint8_t* cond_ptr = reinterpret_cast(alpaka::getPtrNative(cond_h)); + for (Idx i = 0; i < cond_vec.size(); ++i) cond_ptr[i] = cond_vec[i]; + + auto cond_d = alpaka::allocBuf(device, Ext1D::all(Idx{cond_vec.size()})); + alpaka::memcpy(queue, cond_d, cond_h); + + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{correct.size()})); + + { + SOFIE_Where::Session session("Where_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input1_d, input2_d, cond_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + EXPECT_EQ(correct.size(), 6u); + for (size_t i = 0; i < correct.size(); ++i) + EXPECT_EQ(res_ptr[i], correct[i]) << "i=" << i; } \ No newline at end of file From 860d34f4e1f52fea3a947a60729acb1f864c94d9 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Mon, 13 Apr 2026 15:53:08 +0200 Subject: [PATCH 36/43] feat: Support for heterogeneous inference for reduce and softplus operators --- .../inc/SOFIE/ROperator_BasicUnary.hxx | 20 +- src/SOFIE_core/inc/SOFIE/ROperator_Reduce.hxx | 123 +++++++++++++ .../TestCustomModelsFromONNXForAlpakaCuda.cxx | 172 ++++++++++++++++++ .../test/input_models/Softplus.onnx | 11 ++ src/SOFIE_parsers/src/ParseBasicUnary.cxx | 15 ++ src/SOFIE_parsers/src/RModelParser_ONNX.cxx | 8 + 6 files changed, 348 insertions(+), 1 deletion(-) create mode 100644 src/SOFIE_core/test/input_models/Softplus.onnx diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_BasicUnary.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_BasicUnary.hxx index 529a190..4e9d988 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_BasicUnary.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_BasicUnary.hxx @@ -8,7 +8,7 @@ namespace SOFIE { -enum class EBasicUnaryOperator { kReciprocal, kSqrt , kNeg, kExp, kLog, kSin, kCos, kAbs }; +enum class EBasicUnaryOperator { kReciprocal, kSqrt , kNeg, kExp, kLog, kSin, kCos, kAbs, kSoftplus, kAtan, kFloor }; template struct UnaryOpTraits { @@ -62,6 +62,24 @@ struct UnaryOpTraits { static std::string Op(const std::string &X) { return "std::abs(" + X + ")"; } }; +template +struct UnaryOpTraits { + static std::string Name() { return "Softplus"; } + static std::string Op(const std::string &X) { return "std::log(std::exp(" + X + ") + 1)"; } +}; + +template +struct UnaryOpTraits { + static std::string Name() { return "Atan"; } + static std::string Op(const std::string &X) { return "std::atan(" + X + ")"; } +}; + +template +struct UnaryOpTraits { + static std::string Name() { return "Floor"; } + static std::string Op(const std::string &X) { return "std::floor(" + X + ")"; } +}; + template class ROperator_BasicUnary final : public ROperator { private: diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Reduce.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Reduce.hxx index 886aef1..0b51e2c 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Reduce.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Reduce.hxx @@ -261,6 +261,129 @@ public: return out.str(); } + std::string Generate_GPU_Kernel_ALPAKA(std::string /*opName*/) override { + if (fShapeX.empty() || fShapeY.empty()) + throw std::runtime_error("TMVA SOFIE Reduce Op called to Generate without being initialized first"); + + const std::size_t Dx = fShapeX.size(); + + auto inputStrides = UTILITY::ComputeStrideFromShape(fShapeX); + auto outputStrides = UTILITY::ComputeStrideFromShape(fShapeYNotPruned); + + std::size_t inputLength = ConvertShapeToLength(fShapeX); + std::size_t outputLength = ConvertShapeToLength(fShapeY); + std::size_t reducedLength = inputLength / outputLength; + + std::string kname = "ReduceKernel_" + Name(); + + std::string op; + op = "\n//------ " + Name() + "_KERNEL_ALPAKA\n"; + op += SP + "struct " + kname + " {\n"; + op += SP + SP + "template\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(\n"; + op += SP + SP + SP + "TAcc const& acc,\n"; + op += SP + SP + SP + "T const* __restrict__ input,\n"; + op += SP + SP + SP + "T* __restrict__ output,\n"; + op += SP + SP + SP + "std::size_t const outputLength) const {\n\n"; + + op += SP + SP + SP + "auto const global_thread_idx = alpaka::getIdx(acc)[0];\n"; + op += SP + SP + SP + "if (global_thread_idx >= outputLength) return;\n"; + op += SP + SP + SP + "auto const grid_thread_extent = alpaka::getWorkDiv(acc)[0];\n\n"; + + op += SP + SP + SP + "for (std::size_t out_idx = global_thread_idx; out_idx < outputLength; out_idx += grid_thread_extent) {\n\n"; + + for (std::size_t d = 0; d < Dx; ++d) { + op += SP + SP + SP + SP + "std::size_t const oy_" + std::to_string(d) + + " = (out_idx / " + std::to_string(outputStrides[d]) + "u) % " + + std::to_string(fShapeYNotPruned[d]) + "u;\n"; + } + op += "\n"; + + std::string startVal = (Op == ReduceProd) ? "static_cast(1)" : "static_cast(0)"; + op += SP + SP + SP + SP + "T acc_val = " + startVal + ";\n\n"; + + std::vector redAxes; + std::vector keepAxes; + for (std::size_t d = 0; d < Dx; ++d) { + if (std::find(fAttrAxes.begin(), fAttrAxes.end(), (int64_t)d) != fAttrAxes.end()) + redAxes.push_back(d); + else + keepAxes.push_back(d); + } + + std::string indent = SP + SP + SP + SP; + for (std::size_t rd : redAxes) { + op += indent + "for (std::size_t r_" + std::to_string(rd) + + " = 0; r_" + std::to_string(rd) + + " < " + std::to_string(fShapeX[rd]) + "u; r_" + + std::to_string(rd) + "++) {\n"; + indent += SP; + } + + op += indent + "std::size_t const in_idx =\n"; + for (std::size_t d = 0; d < Dx; ++d) { + std::string coord = (std::find(redAxes.begin(), redAxes.end(), d) != redAxes.end()) + ? "r_" + std::to_string(d) + : "oy_" + std::to_string(d); + op += indent + SP + coord + " * " + std::to_string(inputStrides[d]) + "u"; + op += (d + 1 < Dx) ? " +\n" : ";\n"; + } + + if (Op == ReduceProd) + op += indent + "acc_val *= input[in_idx];\n"; + else if (Op == ReduceSum || Op == ReduceMean) + op += indent + "acc_val += input[in_idx];\n"; + else if (Op == ReduceSumSquare) + op += indent + "acc_val += input[in_idx] * input[in_idx];\n"; + + for (std::size_t i = 0; i < redAxes.size(); ++i) { + indent = indent.substr(SP.length()); + op += indent + "}\n"; + } + + if (Op == ReduceMean) + op += SP + SP + SP + SP + "acc_val /= static_cast(" + std::to_string(reducedLength) + "u);\n"; + + op += SP + SP + SP + SP + "output[out_idx] = acc_val;\n"; + op += SP + SP + SP + "}\n"; + op += SP + SP + "}\n"; + op += SP + "};\n"; + + return op; + } + + std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string /*opName*/) override { + std::string kname = "ReduceKernel_" + Name(); + return SP + kname + " reduceKernel_" + Name() + ";\n"; + } + + std::string Generate_GPU_ALPAKA(std::string /*opName*/) override { + if (fShapeX.empty() || fShapeY.empty()) + throw std::runtime_error("TMVA SOFIE Reduce Op called to Generate without being initialized first"); + + std::size_t outputLength = ConvertShapeToLength(fShapeY); + std::string kname = "reduceKernel_" + Name(); + + std::stringstream out; + out << "\n//------ " << Name() << "_GPU_ALPAKA\n"; + out << SP << "auto const elementsPerThread_" << fNY << " = Vec::all(static_cast(1));\n"; + out << SP << "auto const elementsPerGrid_" << fNY << " = Vec::all(Idx{" << outputLength << "});\n"; + out << SP << "alpaka::KernelCfg const kernelCfg_" << fNY + << " = {elementsPerGrid_" << fNY << ", elementsPerThread_" << fNY << "};\n"; + out << SP << "auto const workDiv_" << fNY << " = alpaka::getValidWorkDiv(kernelCfg_" << fNY + << ", devAcc, " << kname + << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" + << ", static_cast(" << outputLength << "));\n"; + out << SP << "alpaka::exec(queue, workDiv_" << fNY + << ", " << kname + << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" + << ", static_cast(" << outputLength << "));\n"; + + return out.str(); + } + }; }//SOFIE diff --git a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx index 57da2f6..197d05a 100644 --- a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx +++ b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx @@ -88,6 +88,16 @@ #include "Where_FromONNX_GPU_ALPAKA.hxx" +#include "Softplus_FromONNX_GPU_ALPAKA.hxx" + +#include "ReduceMean_FromONNX_GPU_ALPAKA.hxx" +#include "ReduceProd_FromONNX_GPU_ALPAKA.hxx" +#include "ReduceSum_FromONNX_GPU_ALPAKA.hxx" +#include "ReduceSumSquare_FromONNX_GPU_ALPAKA.hxx" +#include "input_models/references/ReduceMean.ref.hxx" +#include "input_models/references/ReduceProd.ref.hxx" + + #include #include #include @@ -1723,6 +1733,38 @@ TEST_F(SofieAlpakaTest, Neg) EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i; } +TEST_F(SofieAlpakaTest, Softplus) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector input({0.1,-0.2,0.3,-0.4,0.5,1.}); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + + { + SOFIE_Softplus::Session session("Softplus_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + for (size_t i = 0; i < input.size(); ++i){ + double exp_value = std::log(std::exp(input[i])+1); + EXPECT_LE(std::abs(res_ptr[i] - exp_value), TOLERANCE); + } +} + TEST_F(SofieAlpakaTest, Where) { std::vector input1 = {1.f, 2.f}; @@ -1768,4 +1810,134 @@ TEST_F(SofieAlpakaTest, Where) EXPECT_EQ(correct.size(), 6u); for (size_t i = 0; i < correct.size(); ++i) EXPECT_EQ(res_ptr[i], correct[i]) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, ReduceMean) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector input = {5.f, 2.f, 3.f, 5.f, 5.f, 4.f}; + const std::size_t outputSize = sizeof(ReduceMean_ExpectedOutput::output) / sizeof(float); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{outputSize})); + + { + SOFIE_ReduceMean::Session session("ReduceMean_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float* correct = ReduceMean_ExpectedOutput::output; + EXPECT_EQ(outputSize, sizeof(ReduceMean_ExpectedOutput::output) / sizeof(float)); + for (size_t i = 0; i < outputSize; ++i) + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, ReduceProd) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector input = {5.f, 2.f, 3.f, 5.f, 5.f, 4.f}; + const std::size_t outputSize = sizeof(ReduceProd_ExpectedOutput::output) / sizeof(float); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{outputSize})); + + { + SOFIE_ReduceProd::Session session("ReduceProd_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float* correct = ReduceProd_ExpectedOutput::output; + EXPECT_EQ(outputSize, sizeof(ReduceProd_ExpectedOutput::output) / sizeof(float)); + for (size_t i = 0; i < outputSize; ++i) + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, ReduceSum) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector input = {5.f, 2.f, 3.f, 5.f, 5.f, 4.f}; + std::vector correct = {24.f}; + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{correct.size()})); + + { + SOFIE_ReduceSum::Session session("ReduceSum_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + EXPECT_EQ(correct.size(), 1u); + for (size_t i = 0; i < correct.size(); ++i) + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, ReduceSumSquare) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector input = {5.f, 2.f, 3.f, 5.f, 5.f, 4.f}; + std::vector correct = {38.f, 66.f}; + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{correct.size()})); + + { + SOFIE_ReduceSumSquare::Session session("ReduceSumSquare_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + EXPECT_EQ(correct.size(), 2u); + for (size_t i = 0; i < correct.size(); ++i) + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i; } \ No newline at end of file diff --git a/src/SOFIE_core/test/input_models/Softplus.onnx b/src/SOFIE_core/test/input_models/Softplus.onnx new file mode 100644 index 0000000..2f6a69f --- /dev/null +++ b/src/SOFIE_core/test/input_models/Softplus.onnx @@ -0,0 +1,11 @@ +  onnx-example:S + +inputoutput"SoftplusAbsZ +input +  + +b +output +  + +B \ No newline at end of file diff --git a/src/SOFIE_parsers/src/ParseBasicUnary.cxx b/src/SOFIE_parsers/src/ParseBasicUnary.cxx index 1470f26..40d0225 100644 --- a/src/SOFIE_parsers/src/ParseBasicUnary.cxx +++ b/src/SOFIE_parsers/src/ParseBasicUnary.cxx @@ -79,5 +79,20 @@ ParserFuncSignature ParseAbs = [](RModelParser_ONNX &parser, const onnx::NodePro return ParseBasicUnary(parser, nodeproto); }; +//Parse Softplus +ParserFuncSignature ParseSoftplus = [](RModelParser_ONNX &parser, const onnx::NodeProto &nodeproto) { + return ParseBasicUnary(parser, nodeproto); +}; + +//Parse Atan +ParserFuncSignature ParseAtan = [](RModelParser_ONNX &parser, const onnx::NodeProto &nodeproto) { + return ParseBasicUnary(parser, nodeproto); +}; + +//Parse Floor +ParserFuncSignature ParseFloor = [](RModelParser_ONNX &parser, const onnx::NodeProto &nodeproto) { + return ParseBasicUnary(parser, nodeproto); +}; + } // namespace SOFIE diff --git a/src/SOFIE_parsers/src/RModelParser_ONNX.cxx b/src/SOFIE_parsers/src/RModelParser_ONNX.cxx index 5924836..23a19a6 100644 --- a/src/SOFIE_parsers/src/RModelParser_ONNX.cxx +++ b/src/SOFIE_parsers/src/RModelParser_ONNX.cxx @@ -24,6 +24,10 @@ extern ParserFuncSignature ParseLog; extern ParserFuncSignature ParseSin; extern ParserFuncSignature ParseCos; extern ParserFuncSignature ParseAbs; +extern ParserFuncSignature ParseSoftplus; +extern ParserFuncSignature ParseAtan; +extern ParserFuncSignature ParseFloor; + // Binary operators extern ParserFuncSignature ParseAdd; extern ParserFuncSignature ParseSub; @@ -164,6 +168,10 @@ RModelParser_ONNX::RModelParser_ONNX() noexcept : fOperatorsMapImpl(std::make_un RegisterOperator("Sin", ParseSin); RegisterOperator("Cos", ParseCos); RegisterOperator("Abs", ParseAbs); + RegisterOperator("Softplus", ParseSoftplus); + RegisterOperator("Atan", ParseAtan); + RegisterOperator("Floor", ParseFloor); + // Binary operators RegisterOperator("Add", ParseAdd); RegisterOperator("Sub", ParseSub); From 4433ca28222deeacbac33d01a21f1072bc8438fe Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Tue, 28 Apr 2026 16:19:49 +0200 Subject: [PATCH 37/43] feat: Make ROOT usage optional --- CMakeLists.txt | 31 +- README.md | 11 +- cmake/modules/SofieTestMacros.cmake | 73 +++ src/SOFIE_core/CMakeLists.txt | 28 +- src/SOFIE_core/inc/SOFIE/RFunction_MLP.hxx | 4 +- src/SOFIE_core/inc/SOFIE/RModel.hxx | 3 + src/SOFIE_core/inc/SOFIE/RModel_Base.hxx | 3 + .../inc/SOFIE/ROperator_BasicBinary.hxx | 2 +- src/SOFIE_core/inc/SOFIE/ROperator_Conv.hxx | 504 ++++++++++++++++-- .../inc/SOFIE/ROperator_ScatterElements.hxx | 4 +- src/SOFIE_core/inc/SOFIE/ROperator_TopK.hxx | 2 +- src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx | 2 +- src/SOFIE_core/src/RFunction_MLP.cxx | 6 +- src/SOFIE_core/src/RModel.cxx | 2 + src/SOFIE_core/src/RModel_ALPAKA.cxx | 7 +- src/SOFIE_core/src/RModel_Base.cxx | 25 +- src/SOFIE_core/test/CMakeLists.txt | 8 +- .../TestCustomModelsFromONNXForAlpakaCuda.cxx | 168 +++++- src/SOFIE_parsers/CMakeLists.txt | 10 +- src/SOFIE_parsers/src/RModelParser_ONNX.cxx | 29 +- 20 files changed, 810 insertions(+), 112 deletions(-) create mode 100644 cmake/modules/SofieTestMacros.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index c9bd226..7ec316c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,6 +4,10 @@ project(Sofie DESCRIPTION "SOFIE" LANGUAGES CXX) +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + find_package(BLAS) if(NOT BLAS_FOUND) message(WARNING "BLAS not found: TMVA-SOFIE will not be fully tested") @@ -42,8 +46,19 @@ else() endif() endif() -find_package(ROOT REQUIRED COMPONENTS Core TMVA Tree) -include(${ROOT_USE_FILE}) +option(SOFIE_WITH_ROOT "Enable ROOT support (required for .root weight files and ROOT serialization)" OFF) + +if(SOFIE_WITH_ROOT) + find_package(ROOT REQUIRED COMPONENTS Core TMVA Tree) + if(ROOT_FOUND) + include(${ROOT_USE_FILE}) + message(STATUS "ROOT found: enabling ROOT support in SOFIE") + else() + message(FATAL_ERROR "SOFIE_WITH_ROOT is ON but ROOT was not found") + endif() +else() + message(STATUS "Building SOFIE without ROOT support (SOFIE_WITH_ROOT=OFF)") +endif() set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) set(CMAKE_INSTALL_BINDIR "bin" CACHE PATH "user executables (bin)") @@ -60,6 +75,16 @@ if(testing) enable_testing() endif() -include(cmake/modules/RoottestMacros.cmake) +if(SOFIE_WITH_ROOT AND ROOT_FOUND) + include(cmake/modules/RoottestMacros.cmake) +else() + include(cmake/modules/SofieTestMacros.cmake) +endif() add_subdirectory(src) + +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +add_compile_options(-std=c++20) diff --git a/README.md b/README.md index 597cb56..23ab074 100644 --- a/README.md +++ b/README.md @@ -6,8 +6,13 @@ Since SOFIE is a part of ROOT and therefore needs to be built altogether, it tak ## Installation -1. Getting a ROOT binary. -Download a pre-built binary of ROOT based on your architecture from [here](https://root.cern/install/). +1. SOFIE requires ROOT by default. To explore SOFIE APIs within ROOT, follow the mentioned steps: + 1. Getting a ROOT binary. Download a pre-built binary of ROOT based on your architecture from [here](https://root.cern/install/). + 2. Source it so that SOFIE is able to include its code. For example: + ``` + source root_v6.36.02.Linux-ubuntu24.04-x86_64-gcc13.3/root/bin/thisroot.sh + ``` + To disable ROOT, simply disable the `-DSOFIE_WITH_ROOT=OFF` cmake flag in the build command. 2. Build standalone SOFIE ```bash @@ -23,7 +28,7 @@ The commands above should build the SOFIE standalone. To include it within the R source setup.sh ``` -Now ROOT should also access the SOFIE libraries while it runs. This helps to accelerate development. Submit your developments here and we will proceed with the developments in ROOT carefull. +Now ROOT should also access the SOFIE libraries while it runs. This helps to accelerate development. Submit your developments here and we will proceed with the developments in ROOT carefully. This step is not required if SOFIE is built for usage without ROOT (`-DSOFIE_WITH_ROOT=OFF`). 3. To enable testing generated code with alpaka implementations, build using the following command: ```bash diff --git a/cmake/modules/SofieTestMacros.cmake b/cmake/modules/SofieTestMacros.cmake new file mode 100644 index 0000000..1f4d235 --- /dev/null +++ b/cmake/modules/SofieTestMacros.cmake @@ -0,0 +1,73 @@ +# Fallback test macros used when ROOT is not available. +# These provide the same interface as ROOTTEST_GENERATE_EXECUTABLE and +# ROOTTEST_ADD_TEST from RoottestMacros.cmake but without requiring ROOT. + +macro(ROOTTEST_GENERATE_EXECUTABLE executable) + cmake_parse_arguments(ARG "" "RESOURCE_LOCK" + "LIBRARIES;COMPILE_FLAGS;DEPENDS;FIXTURES_SETUP;FIXTURES_CLEANUP;FIXTURES_REQUIRED" + ${ARGN}) + + add_executable(${executable} EXCLUDE_FROM_ALL ${ARG_UNPARSED_ARGUMENTS}) + set_target_properties(${executable} PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + + if(ARG_DEPENDS) + add_dependencies(${executable} ${ARG_DEPENDS}) + endif() + + if(ARG_LIBRARIES) + target_link_libraries(${executable} ${ARG_LIBRARIES}) + endif() + + if(ARG_COMPILE_FLAGS) + set_target_properties(${executable} PROPERTIES COMPILE_FLAGS ${ARG_COMPILE_FLAGS}) + endif() + + set(_sofie_build_test ${executable}-build) + add_test(NAME ${_sofie_build_test} + COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR} --target ${executable}) + + if(ARG_FIXTURES_SETUP) + set_property(TEST ${_sofie_build_test} PROPERTY FIXTURES_SETUP ${ARG_FIXTURES_SETUP}) + endif() + if(ARG_FIXTURES_CLEANUP) + set_property(TEST ${_sofie_build_test} PROPERTY FIXTURES_CLEANUP ${ARG_FIXTURES_CLEANUP}) + endif() + if(ARG_FIXTURES_REQUIRED) + set_property(TEST ${_sofie_build_test} PROPERTY FIXTURES_REQUIRED ${ARG_FIXTURES_REQUIRED}) + endif() +endmacro() + +function(ROOTTEST_ADD_TEST testname) + cmake_parse_arguments(ARG "" + "WORKING_DIR;TIMEOUT;RESOURCE_LOCK" + "EXEC;COMMAND;DEPENDS;FIXTURES_SETUP;FIXTURES_CLEANUP;FIXTURES_REQUIRED;ENVIRONMENT;PROPERTIES" + ${ARGN}) + + if(ARG_EXEC) + set(_cmd ${ARG_EXEC}) + elseif(ARG_COMMAND) + set(_cmd ${ARG_COMMAND}) + else() + message(FATAL_ERROR "ROOTTEST_ADD_TEST: must specify EXEC or COMMAND") + endif() + + add_test(NAME ${testname} COMMAND ${_cmd} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + + if(ARG_FIXTURES_SETUP) + set_property(TEST ${testname} PROPERTY FIXTURES_SETUP ${ARG_FIXTURES_SETUP}) + endif() + if(ARG_FIXTURES_CLEANUP) + set_property(TEST ${testname} PROPERTY FIXTURES_CLEANUP ${ARG_FIXTURES_CLEANUP}) + endif() + if(ARG_FIXTURES_REQUIRED) + set_property(TEST ${testname} PROPERTY FIXTURES_REQUIRED ${ARG_FIXTURES_REQUIRED}) + endif() + if(ARG_ENVIRONMENT) + set_property(TEST ${testname} PROPERTY ENVIRONMENT ${ARG_ENVIRONMENT}) + endif() + if(ARG_TIMEOUT) + set_property(TEST ${testname} PROPERTY TIMEOUT ${ARG_TIMEOUT}) + endif() +endfunction() diff --git a/src/SOFIE_core/CMakeLists.txt b/src/SOFIE_core/CMakeLists.txt index 4cab8e0..a803b7f 100644 --- a/src/SOFIE_core/CMakeLists.txt +++ b/src/SOFIE_core/CMakeLists.txt @@ -89,22 +89,22 @@ set(sources_cxx target_sources(SOFIE_core PRIVATE ${sources_headers} ${sources_cxx}) target_include_directories(SOFIE_core PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/inc) target_link_libraries(SOFIE_core PUBLIC utils) -target_link_libraries(SOFIE_core PUBLIC - Tree - Core - RIO -) -ROOT_GENERATE_DICTIONARY(G__SOFIE_core ${sources_headers} - LINKDEF inc/LinkDef.h - MODULE SOFIE_core - OPTIONS --deep -) +if(SOFIE_WITH_ROOT AND ROOT_FOUND) + target_compile_definitions(SOFIE_core PUBLIC SOFIE_SUPPORT_ROOT_BINARY) + target_link_libraries(SOFIE_core PUBLIC Tree Core RIO) -# Install the dictionaries. -install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libSOFIE_core_rdict.pcm - ${CMAKE_CURRENT_BINARY_DIR}/libSOFIE_core.rootmap - DESTINATION lib) + ROOT_GENERATE_DICTIONARY(G__SOFIE_core ${sources_headers} + LINKDEF inc/LinkDef.h + MODULE SOFIE_core + OPTIONS --deep + ) + + # Install the dictionaries. + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libSOFIE_core_rdict.pcm + ${CMAKE_CURRENT_BINARY_DIR}/libSOFIE_core.rootmap + DESTINATION lib) +endif() install(TARGETS SOFIE_core LIBRARY DESTINATION lib diff --git a/src/SOFIE_core/inc/SOFIE/RFunction_MLP.hxx b/src/SOFIE_core/inc/SOFIE/RFunction_MLP.hxx index 8dfc0e1..d9f8626 100644 --- a/src/SOFIE_core/inc/SOFIE/RFunction_MLP.hxx +++ b/src/SOFIE_core/inc/SOFIE/RFunction_MLP.hxx @@ -15,7 +15,7 @@ enum class Activation { class RFunction_MLP: public RFunction_Update { private: - Int_t fNumLayers; // Number of Layers in MLP + int_t fNumLayers; // Number of Layers in MLP Activation fActivationFunction; bool fActivateFinal; // if True, fActivationFunction is applied as the activation for the last layer std::vector fKernelTensors; @@ -23,7 +23,7 @@ private: public: virtual ~RFunction_MLP() {} - RFunction_MLP(FunctionTarget target, Int_t numLayers, Activation activation_function=Activation::RELU, bool activate_final=false, GraphType gType=GraphType::GNN); + RFunction_MLP(FunctionTarget target, int_t numLayers, Activation activation_function=Activation::RELU, bool activate_final=false, GraphType gType=GraphType::GNN); void Initialize(); diff --git a/src/SOFIE_core/inc/SOFIE/RModel.hxx b/src/SOFIE_core/inc/SOFIE/RModel.hxx index 50fc231..ea3f4dd 100644 --- a/src/SOFIE_core/inc/SOFIE/RModel.hxx +++ b/src/SOFIE_core/inc/SOFIE/RModel.hxx @@ -249,8 +249,11 @@ public: bool UseSession() const { return fUseSession; } +#ifdef SOFIE_SUPPORT_ROOT_BINARY // Use the ClassDef macro to allow definition of custom streaming ClassDefNV(RModel, 3); +#endif + }; template diff --git a/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx b/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx index 6c272d3..c7f73eb 100644 --- a/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx +++ b/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx @@ -12,7 +12,10 @@ #include #include #include "SOFIE/SOFIE_common.hxx" + +#ifdef SOFIE_SUPPORT_ROOT_BINARY #include "TBuffer.h" +#endif namespace SOFIE { diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx index ad3f51a..e7a04ed 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx @@ -396,7 +396,7 @@ public: op += SP + SP + "template\n"; op += SP + SP + "ALPAKA_FN_ACC void operator()(TAcc const & acc, T const * A, T const * B, T * C) const {\n"; op += SP + SP + SP + "auto idx = alpaka::getIdx(acc)[0];\n"; - op += SP + SP + SP + "if (idx < " + ConvertShapeToLength(fShapeY) + ") {\n"; + op += SP + SP + SP + "if (idx < " + std::to_string(ConvertShapeToLength(fShapeY)) + ") {\n"; auto stridesA = UTILITY::ComputeStrideFromShape(fShapeA); auto stridesB = UTILITY::ComputeStrideFromShape(fShapeB); diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Conv.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Conv.hxx index 15ca91e..a1c8576 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Conv.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Conv.hxx @@ -43,6 +43,7 @@ private: std::string fType; size_t fDim; // dimension of the convolution + size_t gemm_n, gemm_m, gemm_k; // dimensions of the equivalent gemm operation after im2col transformation public: @@ -66,6 +67,7 @@ public: } fInputTensorNames = { fNX, fNB }; fOutputTensorNames = { fNY }; + fKind = OperatorKind::CONV; } ROperator_Conv(std::string autopad, std::vector dilations, @@ -202,83 +204,86 @@ public: void Initialize(RModel& model) override { fUseSession = model.UseSession(); - if (!model.CheckIfTensorAlreadyExist(fNX)) { - throw - std::runtime_error("TMVA SOFIE Conv op Input Tensor " + fNX + " is not found in model"); - } + if (!model.CheckIfTensorAlreadyExist(fNX)) + throw std::runtime_error("TMVA SOFIE Conv op Input Tensor " + fNX + " is not found in model"); + fShapeX = model.GetTensorShape(fNX); - if (fShapeX.size() < 3 || fShapeX.size() > 5) { - std::cout << fNX << " : " << ConvertShapeToString(fShapeX) << std::endl; - throw - std::runtime_error("TMVA SOFIE Conv Op input data tensor" + fNX + " is not of 3,4 or 5 dimensions"); - } + if (fShapeX.size() < 3 || fShapeX.size() > 5) + throw std::runtime_error("TMVA SOFIE Conv Op input data tensor" + fNX + " is not of 3,4 or 5 dimensions"); + fDim = fShapeX.size() - 2; - if (!model.CheckIfTensorAlreadyExist(fNW)) { - throw - std::runtime_error("TMVA SOFIE Conv op Input weight Tensor " + fNW + " is not found in model"); - } + + if (!model.CheckIfTensorAlreadyExist(fNW)) + throw std::runtime_error("TMVA SOFIE Conv op Input weight Tensor " + fNW + " is not found in model"); + fShapeW = model.GetTensorShape(fNW); - if (fShapeW.size() < 3 || fShapeW.size() > 5) { - std::cout << fNW << " : " << ConvertShapeToString(fShapeW) << std::endl; + if (fShapeW.size() < 3 || fShapeW.size() > 5) throw std::runtime_error("TMVA SOFIE Conv Op input weight tensor" + fNW + " is not of 3,4 or 5 dimensions"); - } + fShapeY = ShapeInference({fShapeX, fShapeW})[0]; model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShapeY); + if (fNB != "") { - if (!model.CheckIfTensorAlreadyExist(fNB)) { - throw - std::runtime_error("TMVA SOFIE Conv op Input Tensor " + fNB + " is not found in model"); - } + if (!model.CheckIfTensorAlreadyExist(fNB)) + throw std::runtime_error("TMVA SOFIE Conv op Input Tensor " + fNB + " is not found in model"); + fShapeB = model.GetTensorShape(fNB); std::vector targetShape(fShapeY.begin() + 1, fShapeY.end()); bool broadcast_needed = !UTILITY::AreSameShape(fShapeB, targetShape); if (broadcast_needed) { - auto original_data = model.GetInitializedTensorData(fNB); - // make bias shape equal to Y shape by adding 1 - if (fShapeB.size() < 1) - throw std::runtime_error("TMVA SOFIE Conv op: Bias Tensor has empty shape"); - // we assume bias tensor dimension is equal to number of filters that is the second dimension in - // the output tensor - if (fShapeB[0] != fShapeY[1]) - throw std::runtime_error("TMVA SOFIE Conv op: Bias Tensor has wrong shape: " + - ConvertShapeToString(fShapeB)); - if (fType != "float") - throw std::runtime_error("TMVA SOFIE Conv op: Broadcasting for non-float type tensors is not supported"); - // here is the actual broadcasting - if (!fUseSession) { - std::vector shape(fDim + 1, 1); - shape[0] = fShapeB[0]; - std::shared_ptr new_data_ptr( - UTILITY::UnidirectionalBroadcast(static_cast(original_data.get()), shape, targetShape), - std::default_delete()); - model.UpdateInitializedTensor(fNB, model.GetTensorType(fNB), targetShape, new_data_ptr); - fShapeB = model.GetTensorShape(fNB); - fNB2 = fNB; // use same name - } - else { - // In case of session add broadcasting code in Session constructor and in GenerateInitCode - // we need to add a new intermediate tensor for broadcasted bias tensor - fNB2 = fNB + "bcast"; - model.AddIntermediateTensor(fNB2, model.GetTensorType(fNB), targetShape); - } + auto original_data = model.GetInitializedTensorData(fNB); + if (fShapeB.size() < 1) + throw std::runtime_error("TMVA SOFIE Conv op: Bias Tensor has empty shape"); + if (fShapeB[0] != fShapeY[1]) + throw std::runtime_error("TMVA SOFIE Conv op: Bias Tensor has wrong shape: " + + ConvertShapeToString(fShapeB)); + if (fType != "float") + throw std::runtime_error("TMVA SOFIE Conv op: Broadcasting for non-float type tensors is not supported"); + if (!fUseSession) { + std::vector shape(fDim + 1, 1); + shape[0] = fShapeB[0]; + std::shared_ptr new_data_ptr( + UTILITY::UnidirectionalBroadcast(static_cast(original_data.get()), shape, targetShape), + std::default_delete()); + model.UpdateInitializedTensor(fNB, model.GetTensorType(fNB), targetShape, new_data_ptr); + fShapeB = model.GetTensorShape(fNB); + fNB2 = fNB; + } else { + fNB2 = fNB + "bcast"; + model.AddIntermediateTensor(fNB2, model.GetTensorType(fNB), targetShape); + } } } - size_t outputChannelSize = fShapeY[2]; // size/channel = D * H * W + // Compute kernel size once — product of all spatial kernel dimensions size_t kernelSize = fAttrKernelShape[0]; - for (size_t i = 1; i < fDim; i++) { - outputChannelSize *= fShapeY[2 + i]; + for (size_t i = 1; i < fDim; i++) kernelSize *= fAttrKernelShape[i]; - } + + // Spatial output size: OH * OW (* OD for 3D) + size_t spatialSize = fShapeY[2]; + for (size_t i = 1; i < fDim; i++) + spatialSize *= fShapeY[2 + i]; std::vector shape1 = {fShapeW[0], fShapeW[1], kernelSize}; - std::vector shape2 = {fShapeW[1], kernelSize, outputChannelSize}; - model.AddIntermediateTensor(fNX +"_f", ConvertStringToType(fType), shape1 ); - model.AddIntermediateTensor(fNX +"_xcol", ConvertStringToType(fType), shape2 ); - convK = fNX +"_f"; - imcol = fNX +"_xcol"; + std::vector shape2 = {fShapeW[1], kernelSize, spatialSize}; + model.AddIntermediateTensor(fNX + "_f", ConvertStringToType(fType), shape1); + model.AddIntermediateTensor(fNX + "_xcol", ConvertStringToType(fType), shape2); + convK = fNX + "_f"; + imcol = fNX + "_xcol"; fOutputTensorNames.emplace_back(convK); fOutputTensorNames.emplace_back(imcol); + + // GEMM dimensions — set once here, reused in Generate() and Generate_GPU_ALPAKA() + // gemm_n = output channels (total, not per group — group case divides at launch time) + // gemm_m = spatial output size (OH * OW * OD) + // gemm_k = input channels per group * kernel spatial size + gemm_n = fShapeW[0]; // total output channels + gemm_m = spatialSize; // OH * OW (* OD) + gemm_k = fShapeW[1] * kernelSize; // IC_per_group * KH * KW (* KD) + if(fAttrGroup > 1) { + gemm_n /= fAttrGroup; + } } std::string GenerateInitCode() override { @@ -519,11 +524,396 @@ public: out << SP << "}\n"; // end of batch size loop return out.str(); + } + + std::string Generate_GPU_Kernel_ALPAKA(std::string opName) override { + opName = "op_" + opName; + if (fShapeX.empty() || fShapeW.empty() || fShapeY.empty()) + throw std::runtime_error("TMVA SOFIE Conv Op called to Generate without being initialized first"); + + size_t oDepth = (fDim > 2) ? fShapeY[2] : 1; + size_t oHeight = (fDim > 1) ? fShapeY[fDim] : 1; + size_t oWidth = fShapeY[fDim + 1]; + size_t iDepth = (fDim > 2) ? fShapeX[2] : 1; + size_t iHeight = (fDim > 1) ? fShapeX[fDim] : 1; + size_t iWidth = fShapeX[fDim + 1]; + size_t kHeight = (fDim > 1) ? fShapeW[fDim] : 1; + size_t kWidth = fShapeW[fDim + 1]; + size_t kDepth = (fDim > 2) ? fShapeW[2] : 1; + + size_t kernelSize = fAttrKernelShape[0] * fAttrKernelShape[1] * fAttrKernelShape[2]; + size_t colRows = fShapeW[1] * kernelSize; + size_t colCols = oDepth * oHeight * oWidth; + size_t colElements = colRows * colCols; + size_t outChannels = fShapeW[0]; + size_t spatialSize = oDepth * oHeight * oWidth; + + // Strides for weight vectorisation + size_t id = (fDim > 2) ? fDim - 3 : 2; + size_t ih = (fDim > 1) ? fDim - 2 : 1; + size_t iw = fDim - 1; + size_t wstrideDil = fAttrDilations[iw]; + size_t hstrideDil = fAttrDilations[ih] * fAttrKernelShape[iw]; + size_t dstrideDil = fAttrDilations[id] * fAttrKernelShape[ih] * fAttrKernelShape[iw]; + size_t icstrideDil = fAttrKernelShape[id] * fAttrKernelShape[ih] * fAttrKernelShape[iw]; + size_t ocstrideDil = fShapeW[1] * icstrideDil; + size_t hstride = kWidth; + size_t dstride = kHeight * kWidth; + size_t icstride = kHeight * kWidth * kDepth; + size_t ocstride = fShapeW[1] * icstride; + size_t wTotalElements = ConvertShapeToLength(fShapeW); + + std::string op; + + // ----------------------------------------------------------------------- + // Kernel 1: Weight vectorisation — reorder W into _f with dilation layout + // Each thread handles one output element of _f + // ----------------------------------------------------------------------- + std::string wKname = "WeightVecKernel_" + opName; + op = "\n//------ WEIGHT_VEC_KERNEL_ALPAKA (Conv " + opName + ")\n"; + op += SP + "struct " + wKname + " {\n"; + op += SP + SP + "template\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(\n"; + op += SP + SP + SP + "TAcc const& acc,\n"; + op += SP + SP + SP + "T const* __restrict__ W,\n"; + op += SP + SP + SP + "T* __restrict__ f,\n"; + op += SP + SP + SP + "std::size_t const totalElements) const {\n\n"; + + op += SP + SP + SP + "auto const global_thread_idx = alpaka::getIdx(acc)[0];\n"; + op += SP + SP + SP + "if (global_thread_idx >= totalElements) return;\n"; + op += SP + SP + SP + "auto const grid_thread_extent = alpaka::getWorkDiv(acc)[0];\n\n"; + + op += SP + SP + SP + "for (std::size_t elem_idx = global_thread_idx; elem_idx < totalElements; elem_idx += grid_thread_extent) {\n\n"; + + // Decompose elem_idx into (oc, ic, kd, kh, kw) using compile-time strides + op += SP + SP + SP + SP + "std::size_t const oc = elem_idx / " + std::to_string(ocstride) + "u;\n"; + op += SP + SP + SP + SP + "std::size_t const oc_rem = elem_idx % " + std::to_string(ocstride) + "u;\n"; + op += SP + SP + SP + SP + "std::size_t const ic = oc_rem / " + std::to_string(icstride) + "u;\n"; + op += SP + SP + SP + SP + "std::size_t const ic_rem = oc_rem % " + std::to_string(icstride) + "u;\n"; + if (fDim > 2) { + op += SP + SP + SP + SP + "std::size_t const kd = ic_rem / " + std::to_string(kHeight * kWidth) + "u;\n"; + op += SP + SP + SP + SP + "std::size_t const kh = (ic_rem / " + std::to_string(kWidth) + "u) % " + std::to_string(kHeight) + "u;\n"; + op += SP + SP + SP + SP + "std::size_t const kw = ic_rem % " + std::to_string(kWidth) + "u;\n\n"; + } else if (fDim > 1) { + op += SP + SP + SP + SP + "std::size_t const kd = 0u;\n"; + op += SP + SP + SP + SP + "std::size_t const kh = ic_rem / " + std::to_string(kWidth) + "u;\n"; + op += SP + SP + SP + SP + "std::size_t const kw = ic_rem % " + std::to_string(kWidth) + "u;\n\n"; + } else { + op += SP + SP + SP + SP + "std::size_t const kd = 0u;\n"; + op += SP + SP + SP + SP + "std::size_t const kh = 0u;\n"; + op += SP + SP + SP + SP + "std::size_t const kw = ic_rem;\n\n"; + } + + // Compute destination index in _f (dilated layout) + op += SP + SP + SP + SP + "std::size_t const f_idx =\n"; + op += SP + SP + SP + SP + SP + "oc * " + std::to_string(ocstrideDil) + "u +\n"; + op += SP + SP + SP + SP + SP + "ic * " + std::to_string(icstrideDil) + "u"; + if (fDim > 2) op += " +\n" + SP + SP + SP + SP + SP + "kd * " + std::to_string(dstrideDil) + "u"; + if (fDim > 1) op += " +\n" + SP + SP + SP + SP + SP + "kh * " + std::to_string(hstrideDil) + "u"; + op += " +\n" + SP + SP + SP + SP + SP + "kw * " + std::to_string(wstrideDil) + "u;\n\n"; + + op += SP + SP + SP + SP + "f[f_idx] = W[elem_idx];\n"; + op += SP + SP + SP + "}\n"; // end grid-stride loop + op += SP + SP + "}\n"; // end operator() + op += SP + "};\n\n"; // end struct + + // ----------------------------------------------------------------------- + // Kernel 2: Im2Col + // ----------------------------------------------------------------------- + std::string im2colKname = "Im2ColKernel_" + opName; + op += SP + "//------ IM2COL_KERNEL_ALPAKA (Conv " + opName + ")\n"; + op += SP + "struct " + im2colKname + " {\n"; + op += SP + SP + "template\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(\n"; + op += SP + SP + SP + "TAcc const& acc,\n"; + op += SP + SP + SP + "T const* __restrict__ input,\n"; + op += SP + SP + SP + "T* __restrict__ col,\n"; + op += SP + SP + SP + "std::size_t const totalElements) const {\n\n"; + + op += SP + SP + SP + "auto const global_thread_idx = alpaka::getIdx(acc)[0];\n"; + op += SP + SP + SP + "if (global_thread_idx >= totalElements) return;\n"; + op += SP + SP + SP + "auto const grid_thread_extent = alpaka::getWorkDiv(acc)[0];\n\n"; + + op += SP + SP + SP + "for (std::size_t elem_idx = global_thread_idx; elem_idx < totalElements; elem_idx += grid_thread_extent) {\n\n"; + + op += SP + SP + SP + SP + "std::size_t const col_row = elem_idx / " + std::to_string(colCols) + "u;\n"; + op += SP + SP + SP + SP + "std::size_t const col_col = elem_idx % " + std::to_string(colCols) + "u;\n\n"; + + op += SP + SP + SP + SP + "std::size_t const ic = col_row / " + std::to_string(kernelSize) + "u;\n"; + op += SP + SP + SP + SP + "std::size_t const k_rem = col_row % " + std::to_string(kernelSize) + "u;\n"; + if (fDim > 2) { + op += SP + SP + SP + SP + "std::size_t const kd = k_rem / " + std::to_string(kHeight * kWidth) + "u;\n"; + op += SP + SP + SP + SP + "std::size_t const kh = (k_rem / " + std::to_string(kWidth) + "u) % " + std::to_string(kHeight) + "u;\n"; + op += SP + SP + SP + SP + "std::size_t const kw = k_rem % " + std::to_string(kWidth) + "u;\n\n"; + } else if (fDim > 1) { + op += SP + SP + SP + SP + "std::size_t const kd = 0u;\n"; + op += SP + SP + SP + SP + "std::size_t const kh = k_rem / " + std::to_string(kWidth) + "u;\n"; + op += SP + SP + SP + SP + "std::size_t const kw = k_rem % " + std::to_string(kWidth) + "u;\n\n"; + } else { + op += SP + SP + SP + SP + "std::size_t const kd = 0u;\n"; + op += SP + SP + SP + SP + "std::size_t const kh = 0u;\n"; + op += SP + SP + SP + SP + "std::size_t const kw = k_rem;\n\n"; + } + + if (fDim > 2) { + op += SP + SP + SP + SP + "std::size_t const od = col_col / " + std::to_string(oHeight * oWidth) + "u;\n"; + op += SP + SP + SP + SP + "std::size_t const oh = (col_col / " + std::to_string(oWidth) + "u) % " + std::to_string(oHeight) + "u;\n"; + op += SP + SP + SP + SP + "std::size_t const ow = col_col % " + std::to_string(oWidth) + "u;\n\n"; + } else if (fDim > 1) { + op += SP + SP + SP + SP + "std::size_t const od = 0u;\n"; + op += SP + SP + SP + SP + "std::size_t const oh = col_col / " + std::to_string(oWidth) + "u;\n"; + op += SP + SP + SP + SP + "std::size_t const ow = col_col % " + std::to_string(oWidth) + "u;\n\n"; + } else { + op += SP + SP + SP + SP + "std::size_t const od = 0u;\n"; + op += SP + SP + SP + SP + "std::size_t const oh = 0u;\n"; + op += SP + SP + SP + SP + "std::size_t const ow = col_col;\n\n"; + } + + op += SP + SP + SP + SP + "int64_t const id_in = static_cast(od * " + std::to_string(fAttrStrides[0]) + + "u + kd * " + std::to_string(fAttrDilations[0]) + "u) - " + std::to_string(fAttrPads[0]) + ";\n"; + op += SP + SP + SP + SP + "int64_t const ih_in = static_cast(oh * " + std::to_string(fAttrStrides[1]) + + "u + kh * " + std::to_string(fAttrDilations[1]) + "u) - " + std::to_string(fAttrPads[1]) + ";\n"; + op += SP + SP + SP + SP + "int64_t const iw_in = static_cast(ow * " + std::to_string(fAttrStrides[2]) + + "u + kw * " + std::to_string(fAttrDilations[2]) + "u) - " + std::to_string(fAttrPads[2]) + ";\n\n"; + + op += SP + SP + SP + SP + "bool const in_bounds =\n"; + op += SP + SP + SP + SP + SP + "id_in >= 0 && id_in < " + std::to_string(iDepth) + " &&\n"; + op += SP + SP + SP + SP + SP + "ih_in >= 0 && ih_in < " + std::to_string(iHeight) + " &&\n"; + op += SP + SP + SP + SP + SP + "iw_in >= 0 && iw_in < " + std::to_string(iWidth) + ";\n\n"; + + op += SP + SP + SP + SP + "if (in_bounds) {\n"; + op += SP + SP + SP + SP + SP + "std::size_t const in_idx =\n"; + op += SP + SP + SP + SP + SP + SP + "ic * " + std::to_string(iDepth * iHeight * iWidth) + "u +\n"; + op += SP + SP + SP + SP + SP + SP + "static_cast(id_in) * " + std::to_string(iHeight * iWidth) + "u +\n"; + op += SP + SP + SP + SP + SP + SP + "static_cast(ih_in) * " + std::to_string(iWidth) + "u +\n"; + op += SP + SP + SP + SP + SP + SP + "static_cast(iw_in);\n"; + op += SP + SP + SP + SP + SP + "col[elem_idx] = input[in_idx];\n"; + op += SP + SP + SP + SP + "} else {\n"; + op += SP + SP + SP + SP + SP + "col[elem_idx] = static_cast(0);\n"; + op += SP + SP + SP + SP + "}\n"; + op += SP + SP + SP + "}\n"; + op += SP + SP + "}\n"; + op += SP + "};\n\n"; + + // ----------------------------------------------------------------------- + // Kernel 3: Bias broadcast (only if bias present) + // ----------------------------------------------------------------------- + if (!fNB2.empty()) { + std::string biasKname = "BiasBroadcastKernel_" + opName; + op += SP + "//------ BIAS_BROADCAST_KERNEL_ALPAKA (Conv " + opName + ")\n"; + op += SP + "struct " + biasKname + " {\n"; + op += SP + SP + "template\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(\n"; + op += SP + SP + SP + "TAcc const& acc,\n"; + op += SP + SP + SP + "T const* __restrict__ bias,\n"; + op += SP + SP + SP + "T* __restrict__ output,\n"; + op += SP + SP + SP + "std::size_t const totalElements) const {\n\n"; + + op += SP + SP + SP + "auto const global_thread_idx = alpaka::getIdx(acc)[0];\n"; + op += SP + SP + SP + "if (global_thread_idx >= totalElements) return;\n"; + op += SP + SP + SP + "auto const grid_thread_extent = alpaka::getWorkDiv(acc)[0];\n\n"; + + op += SP + SP + SP + "for (std::size_t elem_idx = global_thread_idx; elem_idx < totalElements; elem_idx += grid_thread_extent) {\n"; + op += SP + SP + SP + SP + "std::size_t const channel = elem_idx / " + std::to_string(spatialSize) + "u;\n"; + op += SP + SP + SP + SP + "output[elem_idx] = bias[channel];\n"; + op += SP + SP + SP + "}\n"; + op += SP + SP + "}\n"; + op += SP + "};\n\n"; } + return op; + } + + std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string opName) override { + opName = "op_" + opName; + std::string op; + op = SP + "WeightVecKernel_" + opName + " weightVecKernel_" + opName + ";\n"; + op += SP + "Im2ColKernel_" + opName + " im2colKernel_" + opName + ";\n"; + if (!fNB2.empty()) + op += SP + "BiasBroadcastKernel_" + opName + " biasBroadcastKernel_" + opName + ";\n"; + return op; + } + + std::string Generate_GPU_ALPAKA(std::string opName) override { + opName = "op_" + opName; + if (fShapeX.empty() || fShapeW.empty() || fShapeY.empty()) + throw std::runtime_error("TMVA SOFIE Conv Op called to Generate without being initialized first"); + + size_t bsize = fShapeX[0]; + size_t oDepth = (fDim > 2) ? fShapeY[2] : 1; + size_t oHeight = (fDim > 1) ? fShapeY[fDim] : 1; + size_t oWidth = fShapeY[fDim + 1]; + size_t iDepth = (fDim > 2) ? fShapeX[2] : 1; + size_t iHeight = (fDim > 1) ? fShapeX[fDim] : 1; + size_t iWidth = fShapeX[fDim + 1]; + size_t outChannels = fShapeW[0]; + size_t kernelSize = fAttrKernelShape[0] * fAttrKernelShape[1] * fAttrKernelShape[2]; + size_t colElements = gemm_k * gemm_m; // colRows * colCols + size_t wTotal = ConvertShapeToLength(fShapeW); + + // For group conv: per-group output channels and _f offset + // gemm_n stays as total output channels — we divide per group at launch + size_t groupFOffset = gemm_n * gemm_k; // elements of _f per group + + std::stringstream out; + out << "\n//------ CONV_GPU_ALPAKA\n"; + + // ----------------------------------------------------------------------- + // Step 1: Weight vectorisation kernel — runs once, fully on GPU + // ----------------------------------------------------------------------- + out << SP << "// Step 1: vectorise W -> _f on GPU (once per infer call)\n"; + out << SP << "{\n"; + out << SP << SP << "auto const elementsPerThread_wv = Vec::all(static_cast(1));\n"; + out << SP << SP << "auto const elementsPerGrid_wv = Vec::all(Idx{" << wTotal << "});\n"; + out << SP << SP << "alpaka::KernelCfg const cfg_wv = {elementsPerGrid_wv, elementsPerThread_wv};\n"; + out << SP << SP << "auto const workDiv_wv = alpaka::getValidWorkDiv(cfg_wv, devAcc, weightVecKernel_" << opName + << ", alpaka::getPtrNative(deviceBuf_" << fNW << ")" + << ", alpaka::getPtrNative(deviceBuf_" << convK << ")" + << ", static_cast(" << wTotal << "));\n"; + out << SP << SP << "alpaka::exec(queue, workDiv_wv, weightVecKernel_" << opName + << ", alpaka::getPtrNative(deviceBuf_" << fNW << ")" + << ", alpaka::getPtrNative(deviceBuf_" << convK << ")" + << ", static_cast(" << wTotal << "));\n"; + out << SP << SP << "alpaka::wait(queue);\n"; + out << SP << "}\n\n"; + + // ----------------------------------------------------------------------- + // Step 2: Batch loop + // ----------------------------------------------------------------------- + out << SP << "for (std::size_t n = 0; n < " << bsize << "; n++) {\n\n"; + out << SP << SP << "std::size_t const x_offset = n * " + << fShapeX[1] * iDepth * iHeight * iWidth << "u;\n"; + out << SP << SP << "std::size_t const out_offset = n * " + << fShapeY[1] * gemm_m << "u;\n\n"; + + // ----------------------------------------------------------------------- + // Step 3: Im2Col + // ----------------------------------------------------------------------- + out << SP << SP << "// Step 3: im2col\n"; + out << SP << SP << "{\n"; + out << SP << SP << SP << "auto const elementsPerThread_im2col = Vec::all(static_cast(1));\n"; + out << SP << SP << SP << "auto const elementsPerGrid_im2col = Vec::all(Idx{" << colElements << "});\n"; + out << SP << SP << SP << "alpaka::KernelCfg const cfg_im2col = {elementsPerGrid_im2col, elementsPerThread_im2col};\n"; + out << SP << SP << SP << "auto const workDiv_im2col = alpaka::getValidWorkDiv(cfg_im2col, devAcc, im2colKernel_" << opName + << ", alpaka::getPtrNative(deviceBuf_" << fNX << ") + x_offset" + << ", alpaka::getPtrNative(deviceBuf_" << imcol << ")" + << ", static_cast(" << colElements << "));\n"; + out << SP << SP << SP << "alpaka::exec(queue, workDiv_im2col, im2colKernel_" << opName + << ", alpaka::getPtrNative(deviceBuf_" << fNX << ") + x_offset" + << ", alpaka::getPtrNative(deviceBuf_" << imcol << ")" + << ", static_cast(" << colElements << "));\n"; + out << SP << SP << SP << "alpaka::wait(queue);\n"; + out << SP << SP << "}\n\n"; + + // ----------------------------------------------------------------------- + // Step 4: GEMM (+ optional bias) — group or non-group + // gemm_n/gemm_m/gemm_k are member variables set in Initialize() + // For grouped conv we use gemm_n/fAttrGroup per group, keeping gemm_n as total + // ----------------------------------------------------------------------- + if (fAttrGroup == 1) { + if (!fNB2.empty()) { + size_t biasElements = gemm_n * gemm_m; + out << SP << SP << "// Step 4a: broadcast bias into output slice\n"; + out << SP << SP << "{\n"; + out << SP << SP << SP << "auto const elementsPerThread_bias = Vec::all(static_cast(1));\n"; + out << SP << SP << SP << "auto const elementsPerGrid_bias = Vec::all(Idx{" << biasElements << "});\n"; + out << SP << SP << SP << "alpaka::KernelCfg const cfg_bias = {elementsPerGrid_bias, elementsPerThread_bias};\n"; + out << SP << SP << SP << "auto const workDiv_bias = alpaka::getValidWorkDiv(cfg_bias, devAcc, biasBroadcastKernel_" << opName + << ", alpaka::getPtrNative(deviceBuf_" << fNB2 << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNY << ") + out_offset" + << ", static_cast(" << biasElements << "));\n"; + out << SP << SP << SP << "alpaka::exec(queue, workDiv_bias, biasBroadcastKernel_" << opName + << ", alpaka::getPtrNative(deviceBuf_" << fNB2 << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNY << ") + out_offset" + << ", static_cast(" << biasElements << "));\n"; + out << SP << SP << SP << "alpaka::wait(queue);\n"; + out << SP << SP << "}\n\n"; + out << SP << SP << "// Step 4b: GEMM beta=1 fuses bias\n"; + out << SP << SP << "blas.matmul('n', 'n', " + << gemm_n << ", " << gemm_m << ", " << gemm_k + << ", 1.0f, alpaka::getPtrNative(deviceBuf_" << convK << ")" + << ", alpaka::getPtrNative(deviceBuf_" << imcol << ")" + << ", 1.0f, alpaka::getPtrNative(deviceBuf_" << fNY << ") + out_offset);\n\n"; + } else { + out << SP << SP << "// Step 4: GEMM beta=0 (no bias)\n"; + out << SP << SP << "blas.matmul('n', 'n', " + << gemm_n << ", " << gemm_m << ", " << gemm_k + << ", 1.0f, alpaka::getPtrNative(deviceBuf_" << convK << ")" + << ", alpaka::getPtrNative(deviceBuf_" << imcol << ")" + << ", 0.0f, alpaka::getPtrNative(deviceBuf_" << fNY << ") + out_offset);\n\n"; + } + } else { + // Group convolution — gemm_n stays as total, divide per group at launch + out << SP << SP << "for (std::size_t g = 0; g < " << fAttrGroup << "; g++) {\n\n"; + out << SP << SP << SP << "std::size_t const g_out_offset = out_offset + g * " + << gemm_n * gemm_m << "u;\n"; + out << SP << SP << SP << "std::size_t const f_offset = g * " << groupFOffset << "u;\n\n"; + + if (!fNB2.empty()) { + size_t groupBiasElements = gemm_n * gemm_m; + out << SP << SP << SP << "// Broadcast group bias\n"; + out << SP << SP << SP << "{\n"; + out << SP << SP << SP << SP << "auto const elementsPerThread_bias = Vec::all(static_cast(1));\n"; + out << SP << SP << SP << SP << "auto const elementsPerGrid_bias = Vec::all(Idx{" << groupBiasElements << "});\n"; + out << SP << SP << SP << SP << "alpaka::KernelCfg const cfg_bias = {elementsPerGrid_bias, elementsPerThread_bias};\n"; + out << SP << SP << SP << SP << "auto const workDiv_bias = alpaka::getValidWorkDiv(cfg_bias, devAcc, biasBroadcastKernel_" << opName + << ", alpaka::getPtrNative(deviceBuf_" << fNB2 << ") + g * " << gemm_n + << ", alpaka::getPtrNative(deviceBuf_" << fNY << ") + g_out_offset" + << ", static_cast(" << groupBiasElements << "));\n"; + out << SP << SP << SP << SP << "alpaka::exec(queue, workDiv_bias, biasBroadcastKernel_" << opName + << ", alpaka::getPtrNative(deviceBuf_" << fNB2 << ") + g * " << gemm_n + << ", alpaka::getPtrNative(deviceBuf_" << fNY << ") + g_out_offset" + << ", static_cast(" << groupBiasElements << "));\n"; + out << SP << SP << SP << SP << "alpaka::wait(queue);\n"; + out << SP << SP << SP << "}\n\n"; + out << SP << SP << SP << "blas.matmul('n', 'n', " + << gemm_n << ", " << gemm_m << ", " << gemm_k + << ", 1.0f, alpaka::getPtrNative(deviceBuf_" << convK << ") + f_offset" + << ", alpaka::getPtrNative(deviceBuf_" << imcol << ")" + << ", 1.0f, alpaka::getPtrNative(deviceBuf_" << fNY << ") + g_out_offset);\n\n"; + } else { + out << SP << SP << SP << "blas.matmul('n', 'n', " + << gemm_n << ", " << gemm_m << ", " << gemm_k + << ", 1.0f, alpaka::getPtrNative(deviceBuf_" << convK << ") + f_offset" + << ", alpaka::getPtrNative(deviceBuf_" << imcol << ")" + << ", 0.0f, alpaka::getPtrNative(deviceBuf_" << fNY << ") + g_out_offset);\n\n"; + } + out << SP << SP << "}\n"; // end group loop + } + + // print the contents of deviceBuf_W, deviceBuf_x_f and deviceBuf_x_xcol for debugging + out << SP << SP << "}\n"; + out << SP << SP << "// Debug: print _W, _f and _xcol\n"; + out << SP << SP << "std::cout << \"_W: \";\n"; + out << SP << SP << "for (std::size_t i = 0; i < " << wTotal << "; i++) {\n"; + out << SP << SP << SP << "printf(\"%f \", alpaka::getPtrNative(deviceBuf_" << fNW << ") + i);\n"; + out << SP << SP << "std::cout << std::endl;}\n"; + out << SP << SP << "for (std::size_t i = 0; i < " << wTotal << "; i++) {\n"; + out << SP << SP << SP << "printf(\"%f \", alpaka::getPtrNative(deviceBuf_" << convK << ") + i);\n"; + out << SP << SP << "}\n"; + out << SP << SP << "std::cout << std::endl;\n"; + out << SP << SP << "std::cout << \"_xcol: \";\n"; + out << SP << SP << "for (std::size_t i = 0; i < " << colElements << "; i++) {\n"; + out << SP << SP << SP << "printf(\"%f \", alpaka::getPtrNative(deviceBuf_" << imcol << ") + i);\n"; + out << SP << SP << "std::cout << std::endl;\n"; + + + out << SP << "}\n"; // end batch loop + return out.str(); + } + /*! \brief Returns the blas routines needed to compile the generated code */ std::vector GetBlasRoutines() override { return { std::string("Gemm"), std::string("Axpy") }; } + + + std::string GetBlasConfig(){ + auto lda = std::to_string(gemm_k); + auto ldb = std::to_string(gemm_n); + auto ldc = std::to_string(gemm_n); + return std::to_string(gemm_n) + ", " + std::to_string(gemm_m) + ", " + std::to_string(gemm_k) + ", " + ldb + ", " + lda + ", " + ldc; + } + }; } // namespace SOFIE diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_ScatterElements.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_ScatterElements.hxx index 1732d6e..9fd3adf 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_ScatterElements.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_ScatterElements.hxx @@ -203,9 +203,9 @@ public: op += SP + SP + SP + SP + "std::size_t remaining = elem_idx;\n"; for (std::size_t d = 0; d < D; ++d) { op += SP + SP + SP + SP + "std::size_t const idx_" + std::to_string(d) - + " = remaining / " + strideI[d] + ";\n"; + + " = remaining / " + std::to_string(strideI[d]) + ";\n"; op += SP + SP + SP + SP + "remaining -= idx_" + std::to_string(d) - + " * " + strideI[d] + ";\n"; + + " * " + std::to_string(strideI[d]) + ";\n"; } op += "\n"; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_TopK.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_TopK.hxx index 06d8179..ce51778 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_TopK.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_TopK.hxx @@ -77,7 +77,7 @@ public: fAttrAxis = fAttrAxis < 0 ? fShapeX.size() + fAttrAxis : fAttrAxis; if(static_cast(fAttrAxis) >= fShapeX.size()){ throw - std::runtime_error("TMVA::SOFIE ONNX TopK op axis = "+ std::to_string(fAttrAxis) +" value exeeds size of tensor " +fNX+" of size "+fShapeX.size()+" ."); + std::runtime_error("TMVA::SOFIE ONNX TopK op axis = "+ std::to_string(fAttrAxis) +" value exeeds size of tensor " +fNX+" of size "+std::to_string(fShapeX.size())+" ."); } // fK cannot be larger that axis dimension fK = std::min(fK, fShapeX[fAttrAxis]); diff --git a/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx b/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx index d59eee8..b75ee3e 100644 --- a/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx +++ b/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx @@ -3,7 +3,7 @@ #include "SOFIE/RTensor.hxx" -#include "ROOT/RSpan.hxx" +#include #include #include diff --git a/src/SOFIE_core/src/RFunction_MLP.cxx b/src/SOFIE_core/src/RFunction_MLP.cxx index eff76f6..34abef1 100644 --- a/src/SOFIE_core/src/RFunction_MLP.cxx +++ b/src/SOFIE_core/src/RFunction_MLP.cxx @@ -10,7 +10,7 @@ namespace SOFIE { -RFunction_MLP::RFunction_MLP(FunctionTarget target, Int_t numLayers, Activation activation_function, bool activate_final, GraphType gType): +RFunction_MLP::RFunction_MLP(FunctionTarget target, int_t numLayers, Activation activation_function, bool activate_final, GraphType gType): RFunction_Update(target, gType), fNumLayers(numLayers), fActivationFunction(activation_function), fActivateFinal(activate_final) { // assuming all the linear layers has a kernel and a bias initialized tensors @@ -43,12 +43,12 @@ void RFunction_MLP::Initialize() { double beta = (fBiasTensors[i].empty()) ? 0. : 1.; op_gemm.reset(new ROperator_Gemm(1.0,beta,0,0,fGemmInput,UTILITY::Clean_name(fKernelTensors[i]),UTILITY::Clean_name(fBiasTensors[i]),fFuncName+"Gemm"+std::to_string(i))); function_block->AddOperator(std::move(op_gemm)); - fGemmInput = fFuncName+"Gemm"+i; + fGemmInput = fFuncName+"Gemm"+std::to_string(i); if (fActivationFunction == Activation::RELU) { std::unique_ptr op_relu; op_relu.reset(new ROperator_Relu(fFuncName+"Gemm"+std::to_string(i), fFuncName+"Relu"+std::to_string(i))); function_block->AddOperator(std::move(op_relu)); - fGemmInput = fFuncName+"Relu"+i; + fGemmInput = fFuncName+"Relu"+std::to_string(i); } } diff --git a/src/SOFIE_core/src/RModel.cxx b/src/SOFIE_core/src/RModel.cxx index 3dd1d23..c50921f 100644 --- a/src/SOFIE_core/src/RModel.cxx +++ b/src/SOFIE_core/src/RModel.cxx @@ -1404,6 +1404,7 @@ void RModel::OutputGenerated(std::string filename, bool append) { } } +#ifdef SOFIE_SUPPORT_ROOT_BINARY void RModel::Streamer(TBuffer &R__b) { if (R__b.IsReading()) { RModel::Class()->ReadBuffer(R__b, this); @@ -1418,5 +1419,6 @@ void RModel::Streamer(TBuffer &R__b) { RModel::Class()->WriteBuffer(R__b, this); } } +#endif }//SOFIE diff --git a/src/SOFIE_core/src/RModel_ALPAKA.cxx b/src/SOFIE_core/src/RModel_ALPAKA.cxx index 5720c37..5f1f369 100644 --- a/src/SOFIE_core/src/RModel_ALPAKA.cxx +++ b/src/SOFIE_core/src/RModel_ALPAKA.cxx @@ -4,7 +4,10 @@ #include #include +#ifdef SOFIE_SUPPORT_ROOT_BINARY #include "TFile.h" +#endif + #include "SOFIE/RModel.hxx" #include "SOFIE/SOFIE_common.hxx" @@ -247,7 +250,7 @@ void RModel::GenerateSessionCode_GPU_ALPAKA() { // single initiation operators must only be initialized only once and their count should be stored in the registered_operators set to avoid generating multiple kernels for the same operator kind fGC += "\n//--- ALPAKA Kernels\n"; for (size_t id = 0; id < fOperators.size(); id++) { - if(fOperators[id]->GetKind() == OperatorKind::GEMM){ + if(fOperators[id]->GetKind() == OperatorKind::GEMM || fOperators[id]->GetKind() == OperatorKind::CONV) { OpNeedsBlas = true; } if(single_initialized_operators.find(fOperators[id]->GetKind()) != single_initialized_operators.end()) { @@ -348,7 +351,7 @@ void RModel::GenerateSessionCode_GPU_ALPAKA() { for (size_t id = 0; id < fOperators.size(); id++) { fGC += fOperators[id]->GenerateInitCode_GPU_ALPAKA(); - if (fOperators[id]->GetKind() == OperatorKind::GEMM){ + if (fOperators[id]->GetKind() == OperatorKind::GEMM || fOperators[id]->GetKind() == OperatorKind::CONV) { fGC += "\nblas.AddLayoutConfig("+fOperators[id]->GetBlasConfig()+");\n"; } } diff --git a/src/SOFIE_core/src/RModel_Base.cxx b/src/SOFIE_core/src/RModel_Base.cxx index f212c53..74e310f 100644 --- a/src/SOFIE_core/src/RModel_Base.cxx +++ b/src/SOFIE_core/src/RModel_Base.cxx @@ -32,9 +32,16 @@ void RModel_Base::GenerateHeaderInfo(std::string& hgname) { fGC += "#include \"SOFIE/SOFIE_common.hxx\"\n"; if (fUseWeightFile) fGC += "#include \n"; - // Include TFile when saving the weights in a binary ROOT file - if (fWeightFile == WeightFileType::RootBinary) - fGC += "#include \"TFile.h\"\n"; + + if (fWeightFile == WeightFileType::RootBinary){ + #ifdef SOFIE_SUPPORT_ROOT_BINARY + // Include TFile when saving the weights in a binary ROOT file + fGC += "#include \"TFile.h\"\n"; + #else + throw std::runtime_error("TMVA-SOFIE: ROOT binary weight file option is enabled but the code is not compiled with ROOT support"); + #endif + + } fGC += "\nnamespace SOFIE_" + fName + "{\n"; if (!fNeededBlasRoutines.empty()) { @@ -82,9 +89,15 @@ void RModel_Base::GenerateHeaderInfo_GPU_ALPAKA(std::string& hgname) { fGC += "#include \"SOFIE/SOFIE_common.hxx\"\n"; if (fUseWeightFile) fGC += "#include \n"; - // Include TFile when saving the weights in a binary ROOT file - if (fWeightFile == WeightFileType::RootBinary) - fGC += "#include \"TFile.h\"\n"; + + if (fWeightFile == WeightFileType::RootBinary){ + #ifdef SOFIE_SUPPORT_ROOT_BINARY + // Include TFile when saving the weights in a binary ROOT file + fGC += "#include \"TFile.h\"\n"; + #else + throw std::runtime_error("TMVA-SOFIE: ROOT binary weight file option is enabled but the code is not compiled with ROOT support"); + #endif + } fGC += "\nusing Dim1D = alpaka::DimInt<1>;\n"; fGC += "\nnamespace SOFIE_" + fName + "{\n"; diff --git a/src/SOFIE_core/test/CMakeLists.txt b/src/SOFIE_core/test/CMakeLists.txt index d7611e0..33f1046 100644 --- a/src/SOFIE_core/test/CMakeLists.txt +++ b/src/SOFIE_core/test/CMakeLists.txt @@ -121,7 +121,7 @@ if (ENABLE_ALPAKA_TESTS) ROOTTEST_GENERATE_EXECUTABLE( TestCustomModelsFromONNXForAlpakaCuda TestCustomModelsFromONNXForAlpakaCuda.cxx - LIBRARIES MathCore SOFIE_core GTest::gtest GTest::gtest_main + LIBRARIES SOFIE_core GTest::gtest GTest::gtest_main FIXTURES_REQUIRED sofie-compile-models-onnx-alpaka FIXTURES_SETUP sofie-test-models-onnx-alpaka-build ) @@ -131,7 +131,6 @@ if (ENABLE_ALPAKA_TESTS) ${CMAKE_CURRENT_BINARY_DIR} ${alpaka_SOURCE_DIR}/include ${sofieblas_SOURCE_DIR}/include - ${ROOT_INCLUDE_DIRS} ${CUDAToolkit_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR} ) @@ -140,7 +139,9 @@ if (ENABLE_ALPAKA_TESTS) TestCustomModelsFromONNXForAlpakaCuda PROPERTIES CUDA_SEPARABLE_COMPILATION OFF - CUDA_ARCHITECTURES 70 80 86 + CUDA_ARCHITECTURES 70 80 86 + CUDA_STANDARD 20 + CUDA_STANDARD_REQUIRED ON ) target_compile_definitions( @@ -178,7 +179,6 @@ if (ENABLE_ALPAKA_TESTS) CUDA::cudart CUDA::cublas CUDA::cublasLt - ${ROOT_LIBRARIES} ) ROOTTEST_ADD_TEST( diff --git a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx index 197d05a..a842e65 100644 --- a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx +++ b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx @@ -97,6 +97,23 @@ #include "input_models/references/ReduceMean.ref.hxx" #include "input_models/references/ReduceProd.ref.hxx" +#include "ConvWithPadding_FromONNX_GPU_ALPAKA.hxx" +#include "input_models/references/ConvWithPadding.ref.hxx" + +#include "ConvWithoutPadding_FromONNX_GPU_ALPAKA.hxx" +#include "input_models/references/ConvWithoutPadding.ref.hxx" + +#include "ConvWithAutopadSameLower_FromONNX_GPU_ALPAKA.hxx" +#include "input_models/references/ConvWithAutopadSameLower.ref.hxx" + +#include "ConvWithStridesPadding_FromONNX_GPU_ALPAKA.hxx" +#include "input_models/references/ConvWithStridesPadding.ref.hxx" + +#include "ConvWithStridesNoPadding_FromONNX_GPU_ALPAKA.hxx" +#include "input_models/references/ConvWithStridesNoPadding.ref.hxx" + +#include "ConvWithAsymmetricPadding_FromONNX_GPU_ALPAKA.hxx" +#include "input_models/references/ConvWithAsymmetricPadding.ref.hxx" #include #include @@ -1937,7 +1954,154 @@ TEST_F(SofieAlpakaTest, ReduceSumSquare) } float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); - EXPECT_EQ(correct.size(), 2u); for (size_t i = 0; i < correct.size(); ++i) EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i; -} \ No newline at end of file +} + +// TEST_F(SofieAlpakaTest, ConvWithPadding) +// { +// constexpr float TOLERANCE = DEFAULT_TOLERANCE; + +// // Preparing the standard all-ones input +// std::vector input(25); +// std::iota(input.begin(), input.end(), 0.0f); + +// auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); +// float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); +// for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + +// auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); +// alpaka::memcpy(queue, input_d, input_h); +// alpaka::wait(queue); + +// auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{sizeof(ConvWithPadding_ExpectedOutput::all_ones) / sizeof(float)})); + +// { +// SOFIE_ConvWithPadding::Session session("ConvWithPadding_FromONNX_GPU_ALPAKA.dat"); +// auto result = session.infer(input_d); +// alpaka::wait(queue); +// cudaDeviceSynchronize(); +// alpaka::memcpy(queue, result_h, result); +// alpaka::wait(queue); + +// } + +// float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); +// float *correct = ConvWithPadding_ExpectedOutput::all_ones; + +// for (size_t i = 0; i < 25; ++i) { +// std::cout<<"res: "< output = s.infer(input.data()); + +// // Checking output size +// EXPECT_EQ(output.size(), sizeof(ConvWithoutPadding_ExpectedOutput::all_ones) / sizeof(float)); + +// float *correct = ConvWithoutPadding_ExpectedOutput::all_ones; + +// // Checking every output value, one by one +// for (size_t i = 0; i < output.size(); ++i) { +// EXPECT_LE(std::abs(output[i] - correct[i]), TOLERANCE); +// } +// } + + +// TEST(ONNX, ConvWithAutopadSameLower) +// { +// constexpr float TOLERANCE = DEFAULT_TOLERANCE; + +// // Preparing the standard all-ones input +// std::vector input(25); +// std::iota(input.begin(), input.end(), 0.0f); +// SOFIE_ConvWithAutopadSameLower::Session s("ConvWithAutopadSameLower_FromONNX.dat"); +// std::vector output = s.infer(input.data()); + +// // Checking output size +// EXPECT_EQ(output.size(), sizeof(ConvWithAutopadSameLower_ExpectedOutput::all_ones) / sizeof(float)); + +// float *correct = ConvWithAutopadSameLower_ExpectedOutput::all_ones; + +// // Checking every output value, one by one +// for (size_t i = 0; i < output.size(); ++i) { +// EXPECT_LE(std::abs(output[i] - correct[i]), TOLERANCE); +// } +// } + + +// TEST(ONNX, ConvWithStridesPadding) +// { +// constexpr float TOLERANCE = DEFAULT_TOLERANCE; + +// // Preparing the standard all-ones input +// std::vector input(35); +// std::iota(input.begin(), input.end(), 0.0f); +// SOFIE_ConvWithStridesPadding::Session s("ConvWithStridesPadding_FromONNX.dat"); +// std::vector output = s.infer(input.data()); + +// // Checking output size +// EXPECT_EQ(output.size(), sizeof(ConvWithStridesPadding_ExpectedOutput::all_ones) / sizeof(float)); + +// float *correct = ConvWithStridesPadding_ExpectedOutput::all_ones; + +// // Checking every output value, one by one +// for (size_t i = 0; i < output.size(); ++i) { +// EXPECT_LE(std::abs(output[i] - correct[i]), TOLERANCE); +// } +// } + + +// TEST(ONNX, ConvWithStridesNoPadding) +// { +// constexpr float TOLERANCE = DEFAULT_TOLERANCE; + +// // Preparing the standard all-ones input +// std::vector input(35); +// std::iota(input.begin(), input.end(), 0.0f); +// SOFIE_ConvWithStridesNoPadding::Session s("ConvWithStridesNoPadding_FromONNX.dat"); +// std::vector output = s.infer(input.data()); + +// // Checking output size +// EXPECT_EQ(output.size(), sizeof(ConvWithStridesNoPadding_ExpectedOutput::all_ones) / sizeof(float)); + +// float *correct = ConvWithStridesNoPadding_ExpectedOutput::all_ones; + +// // Checking every output value, one by one +// for (size_t i = 0; i < output.size(); ++i) { +// EXPECT_LE(std::abs(output[i] - correct[i]), TOLERANCE); +// } +// } + + +// // Disables test (asymmetric padding not supported) +// TEST(DISABLED_ONNX, ConvWithAsymmetricPadding) +// { +// constexpr float TOLERANCE = DEFAULT_TOLERANCE; + +// // Preparing the standard all-ones input +// std::vector input(35); +// std::iota(input.begin(), input.end(), 0.0f); +// SOFIE_ConvWithAsymmetricPadding::Session s("ConvWithAsymmetricPadding_FromONNX.dat"); +// std::vector output = s.infer(input.data()); + +// // Checking output size +// EXPECT_EQ(output.size(), sizeof(ConvWithAsymmetricPadding_ExpectedOutput::all_ones) / sizeof(float)); + +// float *correct = ConvWithAsymmetricPadding_ExpectedOutput::all_ones; + +// // Checking every output value, one by one +// for (size_t i = 0; i < output.size(); ++i) { +// EXPECT_LE(std::abs(output[i] - correct[i]), TOLERANCE); +// } +// } diff --git a/src/SOFIE_parsers/CMakeLists.txt b/src/SOFIE_parsers/CMakeLists.txt index 0e7e03d..c583b56 100644 --- a/src/SOFIE_parsers/CMakeLists.txt +++ b/src/SOFIE_parsers/CMakeLists.txt @@ -103,14 +103,16 @@ target_include_directories(SOFIE_parsers PUBLIC set_target_properties(SOFIE_parsers PROPERTIES POSITION_INDEPENDENT_CODE TRUE) +if(SOFIE_WITH_ROOT AND ROOT_FOUND) ROOT_GENERATE_DICTIONARY(G__SOFIE_parsers ${sources_headers} LINKDEF inc/LinkDef.h MODULE SOFIE_parsers OPTIONS --deep -) -install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libSOFIE_parsers_rdict.pcm - ${CMAKE_CURRENT_BINARY_DIR}/libSOFIE_parsers.rootmap - DESTINATION lib) + ) + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libSOFIE_parsers_rdict.pcm + ${CMAKE_CURRENT_BINARY_DIR}/libSOFIE_parsers.rootmap + DESTINATION lib) +endif() install(TARGETS SOFIE_parsers LIBRARY DESTINATION lib diff --git a/src/SOFIE_parsers/src/RModelParser_ONNX.cxx b/src/SOFIE_parsers/src/RModelParser_ONNX.cxx index 23a19a6..0e6b6cd 100644 --- a/src/SOFIE_parsers/src/RModelParser_ONNX.cxx +++ b/src/SOFIE_parsers/src/RModelParser_ONNX.cxx @@ -1,4 +1,3 @@ -#include "Byteswap.h" #include "SOFIE/RModelParser_ONNX.hxx" #include "onnx_proto3.pb.h" @@ -9,6 +8,10 @@ #include #include #include +#include +#include +#include +#include #include "SOFIE/SOFIE_common.hxx" @@ -137,19 +140,31 @@ struct ExtractDataFromTP { static_cast(data)); } }; +// Reverse the bytes of a trivially-copyable value (used on big-endian hosts). +// ONNX raw_data is always stored in little-endian order. +template +static T bswap_value(T value) noexcept { + static_assert(std::is_trivially_copyable_v); + std::array bytes; + std::memcpy(bytes.data(), &value, sizeof(T)); + std::reverse(bytes.begin(), bytes.end()); + T result; + std::memcpy(&result, bytes.data(), sizeof(T)); + return result; +} + template std::shared_ptr GetInitializedTensorData(onnx::TensorProto * tensorproto, size_t length) { std::cout<<"Getting Initialized Tensor data for tensor " << tensorproto->name() << " of type " << tensorproto->data_type() << " and length " << length << std::endl; std::shared_ptr data(malloc(length * sizeof(T)), free); if (!tensorproto->raw_data().empty()) { -#ifdef R__BYTESWAP std::memcpy(data.get(), tensorproto->raw_data().c_str(), length * sizeof(T)); -#else - for (std::size_t k = 0; k < length; ++k) - (reinterpret_cast::value_type *>(data.get()))[k] = - RByteSwap::bswap((reinterpret_cast::value_type *>(tensorproto->raw_data().c_str()))[k]); -#endif + if constexpr (std::endian::native != std::endian::little) { + T *ptr = static_cast(data.get()); + for (std::size_t k = 0; k < length; ++k) + ptr[k] = bswap_value(ptr[k]); + } } else { ExtractDataFromTP::Copy(tensorproto, data.get()); } From dae834ebf81d3d0e26d689115b37b29ff951a540 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Thu, 7 May 2026 11:25:43 +0200 Subject: [PATCH 38/43] feat: Support for heterogeneous inference for Conv operator --- src/SOFIE_core/inc/SOFIE/ROperator_Conv.hxx | 163 +++++----- src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx | 2 +- src/SOFIE_core/src/RModel_ALPAKA.cxx | 2 +- .../TestCustomModelsFromONNXForAlpakaCuda.cxx | 298 +++++++++++------- 4 files changed, 281 insertions(+), 184 deletions(-) diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Conv.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Conv.hxx index a1c8576..7ef7a6d 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Conv.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Conv.hxx @@ -86,6 +86,7 @@ public: } fInputTensorNames = { fNX }; fOutputTensorNames = { fNY }; + fKind = OperatorKind::CONV; } std::vector TypeInference(std::vector input) override { @@ -565,10 +566,8 @@ public: std::string op; - // ----------------------------------------------------------------------- // Kernel 1: Weight vectorisation — reorder W into _f with dilation layout // Each thread handles one output element of _f - // ----------------------------------------------------------------------- std::string wKname = "WeightVecKernel_" + opName; op = "\n//------ WEIGHT_VEC_KERNEL_ALPAKA (Conv " + opName + ")\n"; op += SP + "struct " + wKname + " {\n"; @@ -613,13 +612,11 @@ public: op += " +\n" + SP + SP + SP + SP + SP + "kw * " + std::to_string(wstrideDil) + "u;\n\n"; op += SP + SP + SP + SP + "f[f_idx] = W[elem_idx];\n"; - op += SP + SP + SP + "}\n"; // end grid-stride loop - op += SP + SP + "}\n"; // end operator() - op += SP + "};\n\n"; // end struct + op += SP + SP + SP + "}\n"; + op += SP + SP + "}\n"; + op += SP + "};\n\n"; - // ----------------------------------------------------------------------- // Kernel 2: Im2Col - // ----------------------------------------------------------------------- std::string im2colKname = "Im2ColKernel_" + opName; op += SP + "//------ IM2COL_KERNEL_ALPAKA (Conv " + opName + ")\n"; op += SP + "struct " + im2colKname + " {\n"; @@ -669,12 +666,31 @@ public: op += SP + SP + SP + SP + "std::size_t const ow = col_col;\n\n"; } - op += SP + SP + SP + SP + "int64_t const id_in = static_cast(od * " + std::to_string(fAttrStrides[0]) - + "u + kd * " + std::to_string(fAttrDilations[0]) + "u) - " + std::to_string(fAttrPads[0]) + ";\n"; - op += SP + SP + SP + SP + "int64_t const ih_in = static_cast(oh * " + std::to_string(fAttrStrides[1]) - + "u + kh * " + std::to_string(fAttrDilations[1]) + "u) - " + std::to_string(fAttrPads[1]) + ";\n"; - op += SP + SP + SP + SP + "int64_t const iw_in = static_cast(ow * " + std::to_string(fAttrStrides[2]) - + "u + kw * " + std::to_string(fAttrDilations[2]) + "u) - " + std::to_string(fAttrPads[2]) + ";\n\n"; + // Depth: trivially 0 for fDim < 3 (od=kd=0 always); pads[0] is height-begin for 2D, so + // applying it here would make id_in negative and zero the whole output. + if (fDim >= 3) { + op += SP + SP + SP + SP + "int64_t const id_in = static_cast(od * " + std::to_string(fAttrStrides[0]) + + "u + kd * " + std::to_string(fAttrDilations[0]) + "u) - " + std::to_string(fAttrPads[0]) + ";\n"; + } else { + op += SP + SP + SP + SP + "int64_t const id_in = 0;\n"; + } + // Height: for fDim==3 the height dim is at strides/pads index 1; for fDim==2 it is at index 0. + // For fDim==1 oh=kh=0 so ih_in=0. + { + size_t const hIdx = (fDim > 2) ? 1 : 0; + if (fDim >= 2) { + op += SP + SP + SP + SP + "int64_t const ih_in = static_cast(oh * " + std::to_string(fAttrStrides[hIdx]) + + "u + kh * " + std::to_string(fAttrDilations[hIdx]) + "u) - " + std::to_string(fAttrPads[hIdx]) + ";\n"; + } else { + op += SP + SP + SP + SP + "int64_t const ih_in = 0;\n"; + } + } + // Width: fAttrStrides/Dilations/Pads are ordered [d,h,w] so width is at index fDim-1. + { + size_t const wIdx = fDim - 1; + op += SP + SP + SP + SP + "int64_t const iw_in = static_cast(ow * " + std::to_string(fAttrStrides[wIdx]) + + "u + kw * " + std::to_string(fAttrDilations[wIdx]) + "u) - " + std::to_string(fAttrPads[wIdx]) + ";\n\n"; + } op += SP + SP + SP + SP + "bool const in_bounds =\n"; op += SP + SP + SP + SP + SP + "id_in >= 0 && id_in < " + std::to_string(iDepth) + " &&\n"; @@ -695,9 +711,7 @@ public: op += SP + SP + "}\n"; op += SP + "};\n\n"; - // ----------------------------------------------------------------------- // Kernel 3: Bias broadcast (only if bias present) - // ----------------------------------------------------------------------- if (!fNB2.empty()) { std::string biasKname = "BiasBroadcastKernel_" + opName; op += SP + "//------ BIAS_BROADCAST_KERNEL_ALPAKA (Conv " + opName + ")\n"; @@ -787,30 +801,26 @@ public: << fShapeY[1] * gemm_m << "u;\n\n"; // ----------------------------------------------------------------------- - // Step 3: Im2Col - // ----------------------------------------------------------------------- - out << SP << SP << "// Step 3: im2col\n"; - out << SP << SP << "{\n"; - out << SP << SP << SP << "auto const elementsPerThread_im2col = Vec::all(static_cast(1));\n"; - out << SP << SP << SP << "auto const elementsPerGrid_im2col = Vec::all(Idx{" << colElements << "});\n"; - out << SP << SP << SP << "alpaka::KernelCfg const cfg_im2col = {elementsPerGrid_im2col, elementsPerThread_im2col};\n"; - out << SP << SP << SP << "auto const workDiv_im2col = alpaka::getValidWorkDiv(cfg_im2col, devAcc, im2colKernel_" << opName - << ", alpaka::getPtrNative(deviceBuf_" << fNX << ") + x_offset" - << ", alpaka::getPtrNative(deviceBuf_" << imcol << ")" - << ", static_cast(" << colElements << "));\n"; - out << SP << SP << SP << "alpaka::exec(queue, workDiv_im2col, im2colKernel_" << opName - << ", alpaka::getPtrNative(deviceBuf_" << fNX << ") + x_offset" - << ", alpaka::getPtrNative(deviceBuf_" << imcol << ")" - << ", static_cast(" << colElements << "));\n"; - out << SP << SP << SP << "alpaka::wait(queue);\n"; - out << SP << SP << "}\n\n"; - - // ----------------------------------------------------------------------- - // Step 4: GEMM (+ optional bias) — group or non-group - // gemm_n/gemm_m/gemm_k are member variables set in Initialize() - // For grouped conv we use gemm_n/fAttrGroup per group, keeping gemm_n as total + // Step 3 + 4: Im2Col then GEMM — structure differs for grouped vs non-grouped // ----------------------------------------------------------------------- if (fAttrGroup == 1) { + // Non-grouped: single im2col per batch, then GEMM + out << SP << SP << "// Step 3: im2col\n"; + out << SP << SP << "{\n"; + out << SP << SP << SP << "auto const elementsPerThread_im2col = Vec::all(static_cast(1));\n"; + out << SP << SP << SP << "auto const elementsPerGrid_im2col = Vec::all(Idx{" << colElements << "});\n"; + out << SP << SP << SP << "alpaka::KernelCfg const cfg_im2col = {elementsPerGrid_im2col, elementsPerThread_im2col};\n"; + out << SP << SP << SP << "auto const workDiv_im2col = alpaka::getValidWorkDiv(cfg_im2col, devAcc, im2colKernel_" << opName + << ", alpaka::getPtrNative(deviceBuf_" << fNX << ") + x_offset" + << ", alpaka::getPtrNative(deviceBuf_" << imcol << ")" + << ", static_cast(" << colElements << "));\n"; + out << SP << SP << SP << "alpaka::exec(queue, workDiv_im2col, im2colKernel_" << opName + << ", alpaka::getPtrNative(deviceBuf_" << fNX << ") + x_offset" + << ", alpaka::getPtrNative(deviceBuf_" << imcol << ")" + << ", static_cast(" << colElements << "));\n"; + out << SP << SP << SP << "alpaka::wait(queue);\n"; + out << SP << SP << "}\n\n"; + if (!fNB2.empty()) { size_t biasElements = gemm_n * gemm_m; out << SP << SP << "// Step 4a: broadcast bias into output slice\n"; @@ -828,26 +838,48 @@ public: << ", static_cast(" << biasElements << "));\n"; out << SP << SP << SP << "alpaka::wait(queue);\n"; out << SP << SP << "}\n\n"; - out << SP << SP << "// Step 4b: GEMM beta=1 fuses bias\n"; + out << SP << SP << "// Step 4b: GEMM beta=1 accumulates onto bias-initialised output\n"; out << SP << SP << "blas.matmul('n', 'n', " - << gemm_n << ", " << gemm_m << ", " << gemm_k - << ", 1.0f, alpaka::getPtrNative(deviceBuf_" << convK << ")" - << ", alpaka::getPtrNative(deviceBuf_" << imcol << ")" + << gemm_m << ", " << gemm_n << ", " << gemm_k + << ", 1.0f, alpaka::getPtrNative(deviceBuf_" << imcol << ")" + << ", alpaka::getPtrNative(deviceBuf_" << convK << ")" << ", 1.0f, alpaka::getPtrNative(deviceBuf_" << fNY << ") + out_offset);\n\n"; } else { out << SP << SP << "// Step 4: GEMM beta=0 (no bias)\n"; out << SP << SP << "blas.matmul('n', 'n', " - << gemm_n << ", " << gemm_m << ", " << gemm_k - << ", 1.0f, alpaka::getPtrNative(deviceBuf_" << convK << ")" - << ", alpaka::getPtrNative(deviceBuf_" << imcol << ")" + << gemm_m << ", " << gemm_n << ", " << gemm_k + << ", 1.0f, alpaka::getPtrNative(deviceBuf_" << imcol << ")" + << ", alpaka::getPtrNative(deviceBuf_" << convK << ")" << ", 0.0f, alpaka::getPtrNative(deviceBuf_" << fNY << ") + out_offset);\n\n"; } + // Wait for GEMM to finish before next batch overwrites the shared _xcol buffer. + out << SP << SP << "alpaka::wait(queue);\n\n"; + } else { - // Group convolution — gemm_n stays as total, divide per group at launch + // Grouped convolution: im2col and GEMM per group with group-adjusted input pointer. + // Each group processes fShapeW[1] input channels starting at g * fShapeW[1]. out << SP << SP << "for (std::size_t g = 0; g < " << fAttrGroup << "; g++) {\n\n"; + out << SP << SP << SP << "std::size_t const g_in_offset = x_offset + g * " + << fShapeW[1] * iDepth * iHeight * iWidth << "u;\n"; out << SP << SP << SP << "std::size_t const g_out_offset = out_offset + g * " << gemm_n * gemm_m << "u;\n"; - out << SP << SP << SP << "std::size_t const f_offset = g * " << groupFOffset << "u;\n\n"; + out << SP << SP << SP << "std::size_t const f_offset = g * " << groupFOffset << "u;\n\n"; + + out << SP << SP << SP << "// im2col for group g (reads only this group's input channels)\n"; + out << SP << SP << SP << "{\n"; + out << SP << SP << SP << SP << "auto const elementsPerThread_im2col = Vec::all(static_cast(1));\n"; + out << SP << SP << SP << SP << "auto const elementsPerGrid_im2col = Vec::all(Idx{" << colElements << "});\n"; + out << SP << SP << SP << SP << "alpaka::KernelCfg const cfg_im2col = {elementsPerGrid_im2col, elementsPerThread_im2col};\n"; + out << SP << SP << SP << SP << "auto const workDiv_im2col = alpaka::getValidWorkDiv(cfg_im2col, devAcc, im2colKernel_" << opName + << ", alpaka::getPtrNative(deviceBuf_" << fNX << ") + g_in_offset" + << ", alpaka::getPtrNative(deviceBuf_" << imcol << ")" + << ", static_cast(" << colElements << "));\n"; + out << SP << SP << SP << SP << "alpaka::exec(queue, workDiv_im2col, im2colKernel_" << opName + << ", alpaka::getPtrNative(deviceBuf_" << fNX << ") + g_in_offset" + << ", alpaka::getPtrNative(deviceBuf_" << imcol << ")" + << ", static_cast(" << colElements << "));\n"; + out << SP << SP << SP << SP << "alpaka::wait(queue);\n"; + out << SP << SP << SP << "}\n\n"; if (!fNB2.empty()) { size_t groupBiasElements = gemm_n * gemm_m; @@ -867,36 +899,21 @@ public: out << SP << SP << SP << SP << "alpaka::wait(queue);\n"; out << SP << SP << SP << "}\n\n"; out << SP << SP << SP << "blas.matmul('n', 'n', " - << gemm_n << ", " << gemm_m << ", " << gemm_k - << ", 1.0f, alpaka::getPtrNative(deviceBuf_" << convK << ") + f_offset" - << ", alpaka::getPtrNative(deviceBuf_" << imcol << ")" + << gemm_m << ", " << gemm_n << ", " << gemm_k + << ", 1.0f, alpaka::getPtrNative(deviceBuf_" << imcol << ")" + << ", alpaka::getPtrNative(deviceBuf_" << convK << ") + f_offset" << ", 1.0f, alpaka::getPtrNative(deviceBuf_" << fNY << ") + g_out_offset);\n\n"; } else { out << SP << SP << SP << "blas.matmul('n', 'n', " - << gemm_n << ", " << gemm_m << ", " << gemm_k - << ", 1.0f, alpaka::getPtrNative(deviceBuf_" << convK << ") + f_offset" - << ", alpaka::getPtrNative(deviceBuf_" << imcol << ")" + << gemm_m << ", " << gemm_n << ", " << gemm_k + << ", 1.0f, alpaka::getPtrNative(deviceBuf_" << imcol << ")" + << ", alpaka::getPtrNative(deviceBuf_" << convK << ") + f_offset" << ", 0.0f, alpaka::getPtrNative(deviceBuf_" << fNY << ") + g_out_offset);\n\n"; } + // Wait for GEMM to finish before next group's im2col overwrites the shared _xcol buffer. + out << SP << SP << SP << "alpaka::wait(queue);\n\n"; out << SP << SP << "}\n"; // end group loop } - - // print the contents of deviceBuf_W, deviceBuf_x_f and deviceBuf_x_xcol for debugging - out << SP << SP << "}\n"; - out << SP << SP << "// Debug: print _W, _f and _xcol\n"; - out << SP << SP << "std::cout << \"_W: \";\n"; - out << SP << SP << "for (std::size_t i = 0; i < " << wTotal << "; i++) {\n"; - out << SP << SP << SP << "printf(\"%f \", alpaka::getPtrNative(deviceBuf_" << fNW << ") + i);\n"; - out << SP << SP << "std::cout << std::endl;}\n"; - out << SP << SP << "for (std::size_t i = 0; i < " << wTotal << "; i++) {\n"; - out << SP << SP << SP << "printf(\"%f \", alpaka::getPtrNative(deviceBuf_" << convK << ") + i);\n"; - out << SP << SP << "}\n"; - out << SP << SP << "std::cout << std::endl;\n"; - out << SP << SP << "std::cout << \"_xcol: \";\n"; - out << SP << SP << "for (std::size_t i = 0; i < " << colElements << "; i++) {\n"; - out << SP << SP << SP << "printf(\"%f \", alpaka::getPtrNative(deviceBuf_" << imcol << ") + i);\n"; - out << SP << SP << "std::cout << std::endl;\n"; - out << SP << "}\n"; // end batch loop return out.str(); @@ -908,10 +925,10 @@ public: std::string GetBlasConfig(){ - auto lda = std::to_string(gemm_k); - auto ldb = std::to_string(gemm_n); - auto ldc = std::to_string(gemm_n); - return std::to_string(gemm_n) + ", " + std::to_string(gemm_m) + ", " + std::to_string(gemm_k) + ", " + ldb + ", " + lda + ", " + ldc; + auto lda = std::to_string(gemm_m); // ld for xcol^T (gemm_m×gemm_k col-major) + auto ldb = std::to_string(gemm_k); // ld for xf^T (gemm_k×gemm_n col-major) + auto ldc = std::to_string(gemm_m); // ld for y^T (gemm_m×gemm_n col-major) + return std::to_string(gemm_m) + ", " + std::to_string(gemm_n) + ", " + std::to_string(gemm_k) + ", " + lda + ", " + ldb + ", " + ldc + ", 'n', 'n'"; } }; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx index 89ed7f1..a82e111 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx @@ -527,7 +527,7 @@ namespace SOFIE{ auto lda = (fAttrTransA ? m : k); auto ldb = (fAttrTransB ? k : n); auto ldc = n; - return n+", "+m+", "+k+", "+ldb+", "+lda+", "+ldc; + return n+", "+m+", "+k+", "+ldb+", "+lda+", "+ldc+", "+(fAttrTransB ? "'t'" : "'n'")+", "+(fAttrTransA ? "'t'" : "'n'"); } }; diff --git a/src/SOFIE_core/src/RModel_ALPAKA.cxx b/src/SOFIE_core/src/RModel_ALPAKA.cxx index 5f1f369..f105aea 100644 --- a/src/SOFIE_core/src/RModel_ALPAKA.cxx +++ b/src/SOFIE_core/src/RModel_ALPAKA.cxx @@ -352,7 +352,7 @@ void RModel::GenerateSessionCode_GPU_ALPAKA() { for (size_t id = 0; id < fOperators.size(); id++) { fGC += fOperators[id]->GenerateInitCode_GPU_ALPAKA(); if (fOperators[id]->GetKind() == OperatorKind::GEMM || fOperators[id]->GetKind() == OperatorKind::CONV) { - fGC += "\nblas.AddLayoutConfig("+fOperators[id]->GetBlasConfig()+");\n"; + fGC += "\nblas.addLayoutConfig("+fOperators[id]->GetBlasConfig()+");\n"; } } diff --git a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx index a842e65..e4bfc2e 100644 --- a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx +++ b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx @@ -1958,150 +1958,230 @@ TEST_F(SofieAlpakaTest, ReduceSumSquare) EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i; } -// TEST_F(SofieAlpakaTest, ConvWithPadding) -// { -// constexpr float TOLERANCE = DEFAULT_TOLERANCE; +TEST_F(SofieAlpakaTest, ConvWithPadding) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; -// // Preparing the standard all-ones input -// std::vector input(25); -// std::iota(input.begin(), input.end(), 0.0f); + // Preparing the standard all-ones input + std::vector input(25); + std::iota(input.begin(), input.end(), 0.0f); -// auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); -// float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); -// for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; -// auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); -// alpaka::memcpy(queue, input_d, input_h); -// alpaka::wait(queue); + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); -// auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{sizeof(ConvWithPadding_ExpectedOutput::all_ones) / sizeof(float)})); + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{sizeof(ConvWithPadding_ExpectedOutput::all_ones) / sizeof(float)})); -// { -// SOFIE_ConvWithPadding::Session session("ConvWithPadding_FromONNX_GPU_ALPAKA.dat"); -// auto result = session.infer(input_d); -// alpaka::wait(queue); -// cudaDeviceSynchronize(); -// alpaka::memcpy(queue, result_h, result); -// alpaka::wait(queue); + { + SOFIE_ConvWithPadding::Session session("ConvWithPadding_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); -// } + } -// float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); -// float *correct = ConvWithPadding_ExpectedOutput::all_ones; + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float *correct = ConvWithPadding_ExpectedOutput::all_ones; -// for (size_t i = 0; i < 25; ++i) { -// std::cout<<"res: "< output = s.infer(input.data()); + // Preparing the standard all-ones input + std::vector input(25); + std::iota(input.begin(), input.end(), 0.0f); -// // Checking output size -// EXPECT_EQ(output.size(), sizeof(ConvWithoutPadding_ExpectedOutput::all_ones) / sizeof(float)); + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; -// float *correct = ConvWithoutPadding_ExpectedOutput::all_ones; + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{sizeof(ConvWithoutPadding_ExpectedOutput::all_ones) / sizeof(float)})); + + { + SOFIE_ConvWithoutPadding::Session session("ConvWithoutPadding_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + + } + + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float *correct = ConvWithoutPadding_ExpectedOutput::all_ones; + constexpr size_t nOut_convNoPad = sizeof(ConvWithoutPadding_ExpectedOutput::all_ones) / sizeof(float); + + for (size_t i = 0; i < nOut_convNoPad; ++i) { + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i; + } + +} + + +TEST_F(SofieAlpakaTest, ConvWithAutopadSameLower) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + // Preparing the standard all-ones input + std::vector input(25); + std::iota(input.begin(), input.end(), 0.0f); + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{sizeof(ConvWithAutopadSameLower_ExpectedOutput::all_ones) / sizeof(float)})); + + { + SOFIE_ConvWithAutopadSameLower::Session session("ConvWithAutopadSameLower_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float *correct = ConvWithAutopadSameLower_ExpectedOutput::all_ones; -// // Checking every output value, one by one -// for (size_t i = 0; i < output.size(); ++i) { -// EXPECT_LE(std::abs(output[i] - correct[i]), TOLERANCE); -// } -// } + for (size_t i = 0; i < 9; ++i) { + std::cout << "res: " << res_ptr[i] << ", correct: " << correct[i] << std::endl; + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i; + } +} -// TEST(ONNX, ConvWithAutopadSameLower) -// { -// constexpr float TOLERANCE = DEFAULT_TOLERANCE; +TEST_F(SofieAlpakaTest, ConvWithStridesPadding) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; -// // Preparing the standard all-ones input -// std::vector input(25); -// std::iota(input.begin(), input.end(), 0.0f); -// SOFIE_ConvWithAutopadSameLower::Session s("ConvWithAutopadSameLower_FromONNX.dat"); -// std::vector output = s.infer(input.data()); + // Preparing the standard all-ones input + std::vector input(35); + std::iota(input.begin(), input.end(), 0.0f); -// // Checking output size -// EXPECT_EQ(output.size(), sizeof(ConvWithAutopadSameLower_ExpectedOutput::all_ones) / sizeof(float)); + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; -// float *correct = ConvWithAutopadSameLower_ExpectedOutput::all_ones; + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{sizeof(ConvWithStridesPadding_ExpectedOutput::all_ones) / sizeof(float)})); -// // Checking every output value, one by one -// for (size_t i = 0; i < output.size(); ++i) { -// EXPECT_LE(std::abs(output[i] - correct[i]), TOLERANCE); -// } -// } + { + SOFIE_ConvWithStridesPadding::Session session("ConvWithStridesPadding_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float *correct = ConvWithStridesPadding_ExpectedOutput::all_ones; + constexpr size_t nOut_stridesPad = sizeof(ConvWithStridesPadding_ExpectedOutput::all_ones) / sizeof(float); -// TEST(ONNX, ConvWithStridesPadding) -// { -// constexpr float TOLERANCE = DEFAULT_TOLERANCE; + for (size_t i = 0; i < nOut_stridesPad; ++i) { + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i; + } +} -// // Preparing the standard all-ones input -// std::vector input(35); -// std::iota(input.begin(), input.end(), 0.0f); -// SOFIE_ConvWithStridesPadding::Session s("ConvWithStridesPadding_FromONNX.dat"); -// std::vector output = s.infer(input.data()); -// // Checking output size -// EXPECT_EQ(output.size(), sizeof(ConvWithStridesPadding_ExpectedOutput::all_ones) / sizeof(float)); +TEST_F(SofieAlpakaTest, ConvWithStridesNoPadding) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; -// float *correct = ConvWithStridesPadding_ExpectedOutput::all_ones; + // Preparing the standard all-ones input + std::vector input(35); + std::iota(input.begin(), input.end(), 0.0f); -// // Checking every output value, one by one -// for (size_t i = 0; i < output.size(); ++i) { -// EXPECT_LE(std::abs(output[i] - correct[i]), TOLERANCE); -// } -// } + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{sizeof(ConvWithStridesNoPadding_ExpectedOutput::all_ones) / sizeof(float)})); -// TEST(ONNX, ConvWithStridesNoPadding) -// { -// constexpr float TOLERANCE = DEFAULT_TOLERANCE; + { + SOFIE_ConvWithStridesNoPadding::Session session("ConvWithStridesNoPadding_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); -// // Preparing the standard all-ones input -// std::vector input(35); -// std::iota(input.begin(), input.end(), 0.0f); -// SOFIE_ConvWithStridesNoPadding::Session s("ConvWithStridesNoPadding_FromONNX.dat"); -// std::vector output = s.infer(input.data()); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float *correct = ConvWithStridesNoPadding_ExpectedOutput::all_ones; + constexpr size_t nOut_stridesNoPad = sizeof(ConvWithStridesNoPadding_ExpectedOutput::all_ones) / sizeof(float); -// // Checking output size -// EXPECT_EQ(output.size(), sizeof(ConvWithStridesNoPadding_ExpectedOutput::all_ones) / sizeof(float)); + for (size_t i = 0; i < nOut_stridesNoPad; ++i) { + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i; + } +} -// float *correct = ConvWithStridesNoPadding_ExpectedOutput::all_ones; -// // Checking every output value, one by one -// for (size_t i = 0; i < output.size(); ++i) { -// EXPECT_LE(std::abs(output[i] - correct[i]), TOLERANCE); -// } -// } +// Disables test (asymmetric padding not supported) +TEST_F(SofieAlpakaTest, ConvWithAsymmetricPadding) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + // Preparing the standard all-ones input + std::vector input(35); + std::iota(input.begin(), input.end(), 0.0f); -// // Disables test (asymmetric padding not supported) -// TEST(DISABLED_ONNX, ConvWithAsymmetricPadding) -// { -// constexpr float TOLERANCE = DEFAULT_TOLERANCE; + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; -// // Preparing the standard all-ones input -// std::vector input(35); -// std::iota(input.begin(), input.end(), 0.0f); -// SOFIE_ConvWithAsymmetricPadding::Session s("ConvWithAsymmetricPadding_FromONNX.dat"); -// std::vector output = s.infer(input.data()); + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{sizeof(ConvWithAsymmetricPadding_ExpectedOutput::all_ones) / sizeof(float)})); -// // Checking output size -// EXPECT_EQ(output.size(), sizeof(ConvWithAsymmetricPadding_ExpectedOutput::all_ones) / sizeof(float)); + { + SOFIE_ConvWithAsymmetricPadding::Session session("ConvWithAsymmetricPadding_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); -// float *correct = ConvWithAsymmetricPadding_ExpectedOutput::all_ones; + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float *correct = ConvWithAsymmetricPadding_ExpectedOutput::all_ones; + constexpr size_t nOut_asymPad = sizeof(ConvWithAsymmetricPadding_ExpectedOutput::all_ones) / sizeof(float); -// // Checking every output value, one by one -// for (size_t i = 0; i < output.size(); ++i) { -// EXPECT_LE(std::abs(output[i] - correct[i]), TOLERANCE); -// } -// } + for (size_t i = 0; i < nOut_asymPad; ++i) { + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i; + } +} From 2b6ede39728e1893a85b6a434b85f5c30809f087 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Mon, 11 May 2026 10:47:26 +0200 Subject: [PATCH 39/43] feat: infer methods using alpaka views --- src/SOFIE_core/inc/SOFIE/RModel.hxx | 3 + src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx | 6 +- src/SOFIE_core/src/RModel_ALPAKA.cxx | 198 ++++++++++++++++---- src/SOFIE_core/src/RModel_Base.cxx | 1 + 4 files changed, 173 insertions(+), 35 deletions(-) diff --git a/src/SOFIE_core/inc/SOFIE/RModel.hxx b/src/SOFIE_core/inc/SOFIE/RModel.hxx index ea3f4dd..b10c02f 100644 --- a/src/SOFIE_core/inc/SOFIE/RModel.hxx +++ b/src/SOFIE_core/inc/SOFIE/RModel.hxx @@ -174,6 +174,9 @@ public: // used to infer the sub-graphs std::string GenerateInferSignature_GPU_ALPAKA(bool isdecl = true); + // generate the _infer_impl signature using ViewPlainPtr types instead of Buf types + std::string GenerateImplSignature_GPU_ALPAKA(bool isdecl = true); + void RemoveIntermediateTensor(const std::string& tensor_name){ fIntermediateTensorInfos.erase(tensor_name); } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx index a82e111..218e11e 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx @@ -496,10 +496,12 @@ namespace SOFIE{ } // in the case of bias if (!fNC.empty()){ + // Use getPtrNative() for all args so the raw-pointer overload is selected regardless + // of whether each buffer is a BufXxx (member weight/bias/output) or ViewPlainPtr (input view). if (fActivation == EActivationType::RELU){ - out << SP << "blas.gemmrelu("< std::string { + ETensorType type = GetTensorType(name); + if (type == ETensorType::FLOAT) return "ViewConstF1D"; + if (type == ETensorType::DOUBLE) return "ViewConstD1D"; + if (type == ETensorType::INT64) return "ViewConstI641D"; + if (type == ETensorType::BOOL) return "ViewConstUI81D"; + throw std::runtime_error("TMVA-SOFIE: input tensor " + name + + " is of a data type which is not yet supported."); + }; + + std::string rGC; + std::unordered_map inputParams; + int i_input = 0; + for (auto &name : fInputTensorNames) { + if (IsDimInputTensor(name)) { + auto shape = GetDynamicTensorShape(name); + for (auto &d : shape) { + std::string pName = d.param; + if (d.isParam && inputParams.count(pName) == 0) { + if (isdecl) rGC += "size_t "; + rGC += d.param + ","; + inputParams[pName] = i_input; + } + } + } + if (isdecl) { + rGC += GetViewConstType(name) + " const& "; + } + rGC += "deviceBuf_" + name + ","; + i_input++; + } + + if (fInputTensorNames.size() > 0) rGC.pop_back(); + return rGC; +} + void RModel::GenerateOutput_GPU_ALPAKA() { if (fVerbose) std::cout << "Generating main inference code for " << fName << std::endl; @@ -171,56 +212,138 @@ void RModel::GenerateOutput_GPU_ALPAKA() { if (outputSize == 0) throw std::runtime_error("TMVA-SOFIE: output size=0 are not supported"); - bool sameOutputTypes = true; - std::string inferReturnType; ETensorType eFirstOutputType = GetTensorType(*fOutputTensorNames.begin()); + bool sameOutputTypes = true; + for (std::string const &name : fOutputTensorNames) { + if (GetTensorType(name) != eFirstOutputType) + sameOutputTypes = false; + } - fGC += "\n\n"; - if (outputSize == 1) { - fGC += "alpaka::Buf"; - } else { - // if all output types are the same we return an std::vector - otherwise a tuple - for (std::string const &name : fOutputTensorNames) { - if (GetTensorType(name) != eFirstOutputType) - sameOutputTypes = false; - } - if (sameOutputTypes) - fGC += "std::array, " + std::to_string(outputSize) + ">"; - else { - inferReturnType = "std::tuple<"; - for (size_t i = 0; i < outputSize; i++) { - inferReturnType += "alpaka::Buf"; - if (i < outputSize - 1) - inferReturnType += ","; + auto GetViewConstType = [this](const std::string &name) -> std::string { + ETensorType type = GetTensorType(name); + if (type == ETensorType::FLOAT) return "ViewConstF1D"; + if (type == ETensorType::DOUBLE) return "ViewConstD1D"; + if (type == ETensorType::INT64) return "ViewConstI641D"; + if (type == ETensorType::BOOL) return "ViewConstUI81D"; + throw std::runtime_error("TMVA-SOFIE: input tensor " + name + " is of an unsupported data type."); + }; + + // Collect deduplicated dynamic dimension parameter names in declaration order + std::vector dynParamNames; + { + std::unordered_map seen; + for (auto &name : fInputTensorNames) { + if (IsDimInputTensor(name)) { + auto shape = GetDynamicTensorShape(name); + for (auto &d : shape) { + if (d.isParam && seen.count(d.param) == 0) { + dynParamNames.push_back(d.param); + seen[d.param] = 1; + } + } } - inferReturnType += ">"; - fGC += inferReturnType; } } - fGC += " infer("; - fGC += GenerateInferSignature_GPU_ALPAKA(); + fGC += "\n\n"; + + // === 1. _infer_impl: all operator code, takes ViewPlainPtr const& for inputs === + fGC += "void _infer_impl("; + fGC += GenerateImplSignature_GPU_ALPAKA(); fGC += "){\n"; for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) { if (fVerbose) std::cout << "Generating code for operator .... " << op_idx << std::endl; - fGC += (fOperators[op_idx]->Generate_GPU_ALPAKA(std::to_string(op_idx))); + fGC += fOperators[op_idx]->Generate_GPU_ALPAKA(std::to_string(op_idx)); } - fGC += "\n\n alpaka::wait(queue);\n"; + fGC += "}\n\n"; + + // === 2. Span-based infer: generic entry point === + // Dynamic params are forwarded explicitly; non-float inputs not yet supported here. + std::string spanDynDecl; + for (auto &p : dynParamNames) + spanDynDecl += ", size_t " + p; + + fGC += "void infer(std::span inputs, std::span outputs" + spanDynDecl + "){\n"; + + // Build _infer_impl call: dyn params first, then inputs[i] + { + fGC += SP + "_infer_impl("; + bool first = true; + for (auto &p : dynParamNames) { + if (!first) fGC += ", "; + fGC += p; + first = false; + } + for (size_t i = 0; i < fInputTensorNames.size(); i++) { + if (!first) fGC += ", "; + fGC += "inputs[" + std::to_string(i) + "]"; + first = false; + } + fGC += ");\n"; + } + + // Copy member output buffers into caller-provided output views + for (size_t i = 0; i < outputSize; i++) { + std::string tensorName = *(fOutputTensorNames.begin() + i); + fGC += SP + "alpaka::memcpy(queue, outputs[" + std::to_string(i) + "], deviceBuf_" + tensorName + ");\n"; + } + fGC += SP + "alpaka::wait(queue);\n"; + fGC += "}\n\n"; + + // === 3. Typed infer: backward-compatible wrapper that delegates to _infer_impl === + // Build return type + std::string returnType; + if (outputSize == 1) { + returnType = "alpaka::Buf"; + } else if (sameOutputTypes) { + returnType = "std::array, " + std::to_string(outputSize) + ">"; + } else { + returnType = "std::tuple<"; + for (size_t i = 0; i < outputSize; i++) { + std::string tname = *(fOutputTensorNames.begin() + i); + returnType += "alpaka::Buf"; + if (i < outputSize - 1) returnType += ","; + } + returnType += ">"; + } + + fGC += returnType + " infer("; + fGC += GenerateInferSignature_GPU_ALPAKA(); + fGC += "){\n"; + + // Wrap each typed input buffer in a ViewConstXX, then call _infer_impl + std::vector typedImplArgs; + for (auto &p : dynParamNames) + typedImplArgs.push_back(p); + for (auto &name : fInputTensorNames) { + std::string viewType = GetViewConstType(name); + fGC += SP + viewType + " const view_" + name + + "{alpaka::getPtrNative(deviceBuf_" + name + "), devAcc, alpaka::getExtents(deviceBuf_" + name + ")};\n"; + typedImplArgs.push_back("view_" + name); + } + + fGC += SP + "_infer_impl("; + for (size_t i = 0; i < typedImplArgs.size(); i++) { + if (i > 0) fGC += ", "; + fGC += typedImplArgs[i]; + } + fGC += ");\n"; + + // Return the member output buffer(s) fGC += SP + "return "; - if (outputSize>1) fGC += " {"; + if (outputSize > 1) fGC += "{"; for (size_t i = 0; i < outputSize; i++) { std::string tensorName = *(fOutputTensorNames.begin() + i); - bool isIntermediate = fIntermediateTensorInfos.count(tensorName) > 0; - fGC += "deviceBuf_"+tensorName; - if (i < outputSize - 1) - fGC += ","; + fGC += "deviceBuf_" + tensorName; + if (i < outputSize - 1) fGC += ","; } - if (outputSize>1) fGC += " };\n"; - else fGC += ";\n"; - fGC += "}\n"; // end of infer function scope + if (outputSize > 1) fGC += "}"; + fGC += ";\n"; + fGC += "}\n"; } void RModel::GenerateSessionCode_GPU_ALPAKA() { @@ -288,6 +411,15 @@ void RModel::GenerateSessionCode_GPU_ALPAKA() { fGC += "using BufD1D = alpaka::Buf;\n"; fGC += "using BufI641D = alpaka::Buf;\n"; fGC += "using BufUI81D = alpaka::Buf;\n\n"; + fGC += "// Non-owning device view types (ViewPlainPtr) for the span-based infer interface\n"; + fGC += "using ViewF1D = alpaka::ViewPlainPtr;\n"; + fGC += "using ViewConstF1D = alpaka::ViewPlainPtr;\n"; + fGC += "using ViewD1D = alpaka::ViewPlainPtr;\n"; + fGC += "using ViewConstD1D = alpaka::ViewPlainPtr;\n"; + fGC += "using ViewI641D = alpaka::ViewPlainPtr;\n"; + fGC += "using ViewConstI641D = alpaka::ViewPlainPtr;\n"; + fGC += "using ViewUI81D = alpaka::ViewPlainPtr;\n"; + fGC += "using ViewConstUI81D = alpaka::ViewPlainPtr;\n\n"; fGC += "\nalpaka::Platform const platform{};\n"; fGC += "DevAcc devAcc = alpaka::getDevByIdx(platform, 0);\n"; diff --git a/src/SOFIE_core/src/RModel_Base.cxx b/src/SOFIE_core/src/RModel_Base.cxx index 74e310f..7139529 100644 --- a/src/SOFIE_core/src/RModel_Base.cxx +++ b/src/SOFIE_core/src/RModel_Base.cxx @@ -83,6 +83,7 @@ void RModel_Base::GenerateHeaderInfo_GPU_ALPAKA(std::string& hgname) { } fGC += "#include \n"; fGC += "#include \n"; + fGC += "#include \n"; // for the session we need to include SOFIE_Common functions //needed for convolution operator (need to add a flag) From ff9d6cf8ea0e34f37942340cba3ddb58ee4c39ca Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Mon, 11 May 2026 16:28:35 +0200 Subject: [PATCH 40/43] feat: Support for inference on batchnorm operator --- .../SOFIE/ROperator_BatchNormalization.hxx | 83 ++++++++++++++++++ .../TestCustomModelsFromONNXForAlpakaCuda.cxx | 79 +++++++++++++++++ .../test/input_models/BatchNorm.onnx | Bin 0 -> 282 bytes .../test/input_models/BatchNormRelu.onnx | Bin 0 -> 350 bytes 4 files changed, 162 insertions(+) create mode 100644 src/SOFIE_core/test/input_models/BatchNorm.onnx create mode 100644 src/SOFIE_core/test/input_models/BatchNormRelu.onnx diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_BatchNormalization.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_BatchNormalization.hxx index 1a6098d..0e5b98d 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_BatchNormalization.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_BatchNormalization.hxx @@ -227,6 +227,89 @@ public: return out.str(); } + std::string Generate_GPU_Kernel_ALPAKA(std::string opName) override { + opName = "op_" + opName; + if (fShapeX.empty()) + throw std::runtime_error("TMVA SOFIE BatchNormalization called to Generate without being initialized first"); + + std::size_t totalElements = ConvertShapeToLength(fShapeY); + + std::string kname = "BatchNormKernel_" + opName; + std::string op; + op = "\n//------ BATCHNORM_KERNEL_ALPAKA\n"; + op += SP + "struct " + kname + " {\n"; + op += SP + SP + "template\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(\n"; + op += SP + SP + SP + "TAcc const& acc,\n"; + op += SP + SP + SP + "T const* __restrict__ X,\n"; + op += SP + SP + SP + "T const* __restrict__ scale,\n"; + op += SP + SP + SP + "T const* __restrict__ bias,\n"; + op += SP + SP + SP + "T const* __restrict__ mean,\n"; + op += SP + SP + SP + "T* __restrict__ Y,\n"; + op += SP + SP + SP + "std::size_t const totalElements) const {\n\n"; + + op += SP + SP + SP + "auto const global_thread_idx = alpaka::getIdx(acc)[0];\n"; + op += SP + SP + SP + "if (global_thread_idx >= totalElements) return;\n"; + op += SP + SP + SP + "auto const grid_thread_extent = alpaka::getWorkDiv(acc)[0];\n\n"; + + op += SP + SP + SP + "for (std::size_t i = global_thread_idx; i < totalElements; i += grid_thread_extent) {\n"; + + op += SP + SP + SP + SP + "T val = (X[i] - mean[i]) * scale[i] + bias[i];\n"; + + if (fActivation == EActivationType::RELU) + op += SP + SP + SP + SP + "Y[i] = val > static_cast(0) ? val : static_cast(0);\n"; + else + op += SP + SP + SP + SP + "Y[i] = val;\n"; + + op += SP + SP + SP + "}\n"; + op += SP + SP + "}\n"; + op += SP + "};\n"; + + return op; + } + + std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string opName) override { + opName = "op_" + opName; + std::string kname = "BatchNormKernel_" + opName; + return SP + kname + " batchNormKernel_" + opName + ";\n"; + } + + std::string Generate_GPU_ALPAKA(std::string opName) override { + opName = "op_" + opName; + if (fShapeX.empty()) + throw std::runtime_error("TMVA SOFIE BatchNormalization called to Generate without being initialized first"); + + std::size_t totalElements = ConvertShapeToLength(fShapeY); + std::string kname = "batchNormKernel_" + opName; + + std::stringstream out; + out << "\n//------ BATCHNORM_GPU_ALPAKA\n"; + out << SP << "auto const elementsPerThread_" << fNY << " = Vec::all(static_cast(1));\n"; + out << SP << "auto const elementsPerGrid_" << fNY << " = Vec::all(Idx{" << totalElements << "});\n"; + out << SP << "alpaka::KernelCfg const kernelCfg_" << fNY + << " = {elementsPerGrid_" << fNY << ", elementsPerThread_" << fNY << "};\n"; + out << SP << "auto const workDiv_" << fNY << " = alpaka::getValidWorkDiv(kernelCfg_" << fNY + << ", devAcc, " << kname + << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNScale << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNB << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNMean << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" + << ", static_cast(" << totalElements << "));\n"; + + out << SP << "auto task_" << fNY << " = alpaka::createTaskKernel(workDiv_" << fNY + << ", " << kname + << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNScale << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNB << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNMean << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" + << ", static_cast(" << totalElements << "));\n"; + out << SP <<"alpaka::enqueue(queue, task_" << fNY << ");\n"; + + return out.str(); + } + std::vector GetBlasRoutines() override { return { std::string("Copy"), std::string("Axpy") }; } }; diff --git a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx index e4bfc2e..9e696c6 100644 --- a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx +++ b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx @@ -115,6 +115,9 @@ #include "ConvWithAsymmetricPadding_FromONNX_GPU_ALPAKA.hxx" #include "input_models/references/ConvWithAsymmetricPadding.ref.hxx" +#include "BatchNorm_FromONNX_GPU_ALPAKA.hxx" +#include "BatchNormRelu_FromONNX_GPU_ALPAKA.hxx" + #include #include #include @@ -2185,3 +2188,79 @@ TEST_F(SofieAlpakaTest, ConvWithAsymmetricPadding) EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i; } } + +TEST_F(SofieAlpakaTest, BatchNormalization) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector input = { + 1.f, 2.f, 3.f, 4.f, // channel 0 + 5.f, 6.f, 7.f, 8.f // channel 1 + }; + const std::size_t outputSize = input.size(); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{outputSize})); + + { + SOFIE_BatchNorm::Session session("BatchNorm_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + + float inv_std = 1.f / std::sqrt(1.f + 1e-5f); + ASSERT_EQ(outputSize, 8u); + for (size_t i = 0; i < outputSize; ++i) + EXPECT_LE(std::abs(res_ptr[i] - input[i] * inv_std), TOLERANCE) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, BatchNormalizationRelu) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector input = { + -1.f, 2.f, -3.f, 4.f, + 5.f, -6.f, 7.f, -8.f + }; + const std::size_t outputSize = input.size(); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{outputSize})); + + { + SOFIE_BatchNormRelu::Session session("BatchNormRelu_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + + float inv_std = 1.f / std::sqrt(1.f + 1e-5f); + ASSERT_EQ(outputSize, 8u); + for (size_t i = 0; i < outputSize; ++i) { + float expected = std::max(0.f, input[i] * inv_std); + EXPECT_LE(std::abs(res_ptr[i] - expected), TOLERANCE) << "i=" << i; + } +} diff --git a/src/SOFIE_core/test/input_models/BatchNorm.onnx b/src/SOFIE_core/test/input_models/BatchNorm.onnx new file mode 100644 index 0000000000000000000000000000000000000000..f03cd9ade9e1a678b19e3db9f6494d36a0c4185f GIT binary patch literal 282 zcmdczElDjdv1(@GO5tLR;9@OKPRvQ=VoAzOEaqa#O-;<>VlGQ85@L*$ z;&e(ZNzU-gFUnOCf-@6yGOH3xGV}Aa1i9E#3yL#y^7BO3991`8z{sd2#Kn=DpPQOj zQkpB8mX>A@6ca+2q9w|~B*5qdvC)fzfuX@3gtbJ#!eAGGL?Hks3UU#Ws4!S9$b|?4 lqNITy5fbAP;b0UJ;9}xn1Y#x_P6A3s;*xY?;bIWr2LMEAIB5U? literal 0 HcmV?d00001 diff --git a/src/SOFIE_core/test/input_models/BatchNormRelu.onnx b/src/SOFIE_core/test/input_models/BatchNormRelu.onnx new file mode 100644 index 0000000000000000000000000000000000000000..badf2c2418ec98184141ff5b1dcdc338d614f765 GIT binary patch literal 350 zcmdczElDjdu{zGgmC3~z!NppfoS2i!#gdenSj@$eo0^!%#axzHB*d1K z7oT5RBE{*HSdyIKmtT~tBm`$B=44hSmSpDVX$f+%rxp}v=H%yzt~si1zJQTYONfgj zH$OKuucS0rG%YR79w^2o&IL73h%r)%B`7tgR0&K9@xsjn32KRQFbOa^LEPlU!NAa9 z55ig^U}3NuL81@<69u^$NmLlD7UTzn0a4OGuL_BAiEuCq32-rSFaj|X3?~64BXLP) QDZpKhL%S0T7lQyl04Rk=0ssI2 literal 0 HcmV?d00001 From 16d8fab48c485f7792d0a09fc889e31ccbea9e95 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Tue, 19 May 2026 06:39:36 +0200 Subject: [PATCH 41/43] feat: fusion --- src/SOFIE_core/inc/SOFIE/RModel.hxx | 18 ++ src/SOFIE_core/inc/SOFIE/ROperator.hxx | 5 + .../inc/SOFIE/ROperator_BasicUnary.hxx | 5 + .../inc/SOFIE/ROperator_Basic_Is.hxx | 91 +++++++ src/SOFIE_core/inc/SOFIE/ROperator_Clip.hxx | 249 ++++++++++++++++++ .../SOFIE/ROperator_LayerNormalization.hxx | 243 +++++++++++++++++ src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx | 8 +- .../inc/SOFIE/ROperator_Sigmoid.hxx | 5 + src/SOFIE_core/inc/SOFIE/ROperator_Tanh.hxx | 43 +++ src/SOFIE_core/src/RModel_ALPAKA.cxx | 203 ++++++++++++-- .../TestCustomModelsFromONNXForAlpakaCuda.cxx | 146 ++++++++++ .../test/input_models/LayerNorm.onnx | Bin 0 -> 249 bytes .../test/input_models/LayerNorm3D.onnx | Bin 0 -> 318 bytes .../test/input_models/LayerNormScaleBias.onnx | Bin 0 -> 258 bytes .../test/input_models/simple_transformer.onnx | Bin 0 -> 10932 bytes 15 files changed, 992 insertions(+), 24 deletions(-) create mode 100644 src/SOFIE_core/inc/SOFIE/ROperator_Basic_Is.hxx create mode 100644 src/SOFIE_core/inc/SOFIE/ROperator_Clip.hxx create mode 100644 src/SOFIE_core/test/input_models/LayerNorm.onnx create mode 100644 src/SOFIE_core/test/input_models/LayerNorm3D.onnx create mode 100644 src/SOFIE_core/test/input_models/LayerNormScaleBias.onnx create mode 100644 src/SOFIE_core/test/input_models/simple_transformer.onnx diff --git a/src/SOFIE_core/inc/SOFIE/RModel.hxx b/src/SOFIE_core/inc/SOFIE/RModel.hxx index b10c02f..766bf8b 100644 --- a/src/SOFIE_core/inc/SOFIE/RModel.hxx +++ b/src/SOFIE_core/inc/SOFIE/RModel.hxx @@ -45,6 +45,24 @@ private: MemoryPoolInfo fIntermediateMemoryInfo; /// fIntermediateTensorFrequencyLookup; /// opIndices; ///< consecutive op indices forming this group + std::string inputTensor; ///< input tensor name of the first op + std::string outputTensor; ///< output tensor name of the last op + size_t numElements = 0; + bool isFused() const { return opIndices.size() > 1; } + std::string suffix() const { + std::string s; + for (auto i : opIndices) s += "_" + std::to_string(i); + return s; + } + }; + std::vector fEltwiseFusionGroups; /// fOpToFusionGroupIdx; /// fusion group index + std::set fFusionIntermediateTensors; ///& removal_func){ return;}; + // Elementwise kernel fusion interface + virtual bool IsElementwise() const { return false; } + // Returns the C++ expression applying this op to inputVar (a local T variable) for fused kernel generation + virtual std::string GetElementwiseExpr(const std::string& /*inputVar*/) const { return ""; } + //virtual void Forward_reference() = 0; //virtual void Forward_blas() = 0; virtual ~ROperator(){} diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_BasicUnary.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_BasicUnary.hxx index 4e9d988..eb3150c 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_BasicUnary.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_BasicUnary.hxx @@ -197,6 +197,11 @@ public: return {}; } } + + bool IsElementwise() const override { return !fIsOutputConstant; } + std::string GetElementwiseExpr(const std::string& v) const override { + return UnaryOpTraits::Op(v); + } }; } // namespace SOFIE diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Basic_Is.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Basic_Is.hxx new file mode 100644 index 0000000..da2b5fb --- /dev/null +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Basic_Is.hxx @@ -0,0 +1,91 @@ +#ifndef SOFIE_ROPERATOR_BASIC_IS +#define SOFIE_ROPERATOR_BASIC_IS + +#include +#include +#include +#include + +namespace SOFIE { + +enum class EBasicIsOperator { kIsInf, kIsInfPos, kIsInfNeg, kIsNaN }; + +template +struct IsOpTraits { +}; +template<> +struct IsOpTraits { + static std::string Name() { return "IsInf"; } + static std::string Op(const std::string &x) { return "std::isinf(" + x + ")"; } + static bool Impl(float x) { return std::isinf(x);} +}; +template<> +struct IsOpTraits { + static std::string Name() { return "IsInfPos"; } + static std::string Op(const std::string &x) { return "(std::isinf(" + x + ") && " + x + "> 0)"; } + static bool Impl(float x) { return std::isinf(x) && x > 0;} +}; +template<> +struct IsOpTraits { + static std::string Name() { return "IsInfNeg"; } + static std::string Op(const std::string &x) { return "(std::isinf(" + x + ") && " + x + "< 0)"; } + static bool Impl(float x) { return std::isinf(x) && x < 0;} +}; +template<> +struct IsOpTraits { + static std::string Name() { return "IsInf"; } + static std::string Op(const std::string &x) { return "std::isnan(" + x + ")"; } + static bool Impl(float x) { return std::isnan(x);} +}; + + + +template +class ROperator_Basic_Is final : public ROperator { +private: + std::string fNX; + std::string fNY; + + std::vector fShapeX; + std::vector fShapeY; + +public: + ROperator_Basic_Is() {} + + ROperator_Basic_Is(std::string nameX, std::string nameY) + : fNX(UTILITY::Clean_name(nameX)), fNY(UTILITY::Clean_name(nameY)) + { + fInputTensorNames = { fNX }; + fOutputTensorNames = { fNY }; + } + + void Initialize(RModel& model) override { + if (!model.CheckIfTensorAlreadyExist(fNX)) { + throw std::runtime_error("TMVA::SOFIE - Tensor " + fNX + " not found."); + } + fShapeX = model.GetDimTensorShape(fNX); + fShapeY = fShapeX; + model.AddIntermediateTensor(fNY, ETensorType::BOOL, fShapeY); + } + + std::string Generate(std::string opName) override + { + opName = "op_" + opName; + std::stringstream out; + + out << SP << "\n//---- Operator" << IsOpTraits::Name() << " " << opName << "\n"; + auto length = ConvertDimShapeToLength(fShapeX); + out << SP << "for (size_t i = 0; i < " << length << "; i++) {\n"; + out << SP << SP << "tensor_" << fNY << "[i] = " << IsOpTraits::Op("tensor_" + fNX + "[i]") << ";\n"; + out << SP << "}\n"; + return out.str(); + } + + std::vector GetStdLibs() override { + return { std::string("cmath") }; + } +}; + +} // namespace SOFIE + +#endif \ No newline at end of file diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Clip.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Clip.hxx new file mode 100644 index 0000000..333b114 --- /dev/null +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Clip.hxx @@ -0,0 +1,249 @@ +#ifndef SOFIE_ROPERATOR_CLIP +#define SOFIE_ROPERATOR_CLIP + +#include "SOFIE_common.hxx" +#include "ROperator.hxx" +#include "RModel.hxx" + +#include +#include +#include +#include + +namespace SOFIE { + +// --------------------------------------------------------------------------- +// ROperator_Clip +// +// ONNX spec: Y = max(min_val, min(max_val, X)) element-wise +// +// The min and max bounds are optional in the ONNX spec: +// - if fNMin is empty → no lower clipping (effectively -inf) +// - if fNMax is empty → no upper clipping (effectively +inf) +// +// Bounds can be provided either as: +// (a) initializer / constant tensors (scalar, shape []), +// (b) runtime input tensors (resolved at Generate time), +// (c) compile-time float literals (via the fMin / fMax attributes). +// +// The implementation follows the Selu operator style exactly: +// - static shape stored in fShape +// - dynamic shape stored in fDimShape +// - a flat loop over all elements in Generate() +// --------------------------------------------------------------------------- + +template +class ROperator_Clip final : public ROperator { +private: + + // Tensor names + std::string fNX; // input data + std::string fNY; // output + std::string fNMin; // optional: tensor name for min bound + std::string fNMax; // optional: tensor name for max bound + + + // Static shape (non-dynamic path, mirrors Selu) + std::vector fShape; + + // Dynamic shape (Dim-aware, for dynamic input tensors) + std::vector fDimShape; + bool fIsDynamic = false; + + // Compile-time bound values — used when bounds are constant tensors + // Initialised to the ONNX defaults (no clipping) + T fMin = std::numeric_limits::lowest(); // -inf equivalent + T fMax = std::numeric_limits::max(); // +inf equivalent + + // Flags indicating whether each bound is: + // - absent (no input provided) + // - a constant resolved at Initialize time + // - a runtime tensor that must be read in the generated code + bool fHasMin = false; + bool fHasMax = false; + bool fMinIsConstant = false; + bool fMaxIsConstant = false; + +public: + + ROperator_Clip() {} + + // Constructor for the common case where bounds are tensor inputs + // (follows ONNX node input order: X, min, max) + ROperator_Clip(std::string nameX, + std::string nameY, + std::string nameMin = "", + std::string nameMax = "") + : fNX (UTILITY::Clean_name(nameX)), + fNY (UTILITY::Clean_name(nameY)), + fNMin(nameMin.empty() ? "" : UTILITY::Clean_name(nameMin)), + fNMax(nameMax.empty() ? "" : UTILITY::Clean_name(nameMax)) + { + fInputTensorNames = { fNX }; + if (!fNMin.empty()) fInputTensorNames.push_back(fNMin); + if (!fNMax.empty()) fInputTensorNames.push_back(fNMax); + fOutputTensorNames = { fNY }; + } + + // Convenience constructor when bounds are known scalars at model-build time + ROperator_Clip(std::string nameX, + std::string nameY, + T minVal, + T maxVal) + : fNX (UTILITY::Clean_name(nameX)), + fNY (UTILITY::Clean_name(nameY)), + fMin(minVal), fMax(maxVal), + fHasMin(true), fHasMax(true), + fMinIsConstant(true), fMaxIsConstant(true) + { + fInputTensorNames = { fNX }; + fOutputTensorNames = { fNY }; + } + + + // ----------------------------------------------------------------------- + void Initialize(RModel& model) override + { + // ---- validate main input ------------------------------------------ + if (!model.CheckIfTensorAlreadyExist(fNX)) + throw std::runtime_error( + "SOFIE Clip Op Input Tensor " + fNX + " is not found in model"); + + // ---- collect shape (static or dynamic, mirrors BasicBinary) ------- + if (model.IsDynamicTensor(fNX)) { + fIsDynamic = true; + fDimShape = model.GetDynamicTensorShape(fNX); + } else { + fShape = model.GetTensorShape(fNX); + fDimShape = ConvertShapeToDim(fShape); + } + + // ---- resolve min bound -------------------------------------------- + if (!fNMin.empty() && model.CheckIfTensorAlreadyExist(fNMin)) { + fHasMin = true; + if (model.IsInitializedTensor(fNMin)) { + // constant scalar tensor — read value now + auto data = static_cast(model.GetInitializedTensorData(fNMin).get()); + fMin = data[0]; + fMinIsConstant = true; + model.SetNotWritableInitializedTensor(fNMin); + } + // else: runtime input — will be dereferenced in generated code + } + + // ---- resolve max bound -------------------------------------------- + if (!fNMax.empty() && model.CheckIfTensorAlreadyExist(fNMax)) { + fHasMax = true; + if (model.IsInitializedTensor(fNMax)) { + auto data = static_cast(model.GetInitializedTensorData(fNMax).get()); + fMax = data[0]; + fMaxIsConstant = true; + model.SetNotWritableInitializedTensor(fNMax); + } + } + + // ---- register output tensor --------------------------------------- + if (fIsDynamic) + model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fDimShape); + else + model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShape); + + if (model.Verbose()) { + std::cout << "Clip : " << fNX << " " + << ConvertShapeToString(fShape); + if (fHasMin) + std::cout << " min=" << (fMinIsConstant + ? std::to_string(fMin) : fNMin + "(runtime)"); + if (fHasMax) + std::cout << " max=" << (fMaxIsConstant + ? std::to_string(fMax) : fNMax + "(runtime)"); + std::cout << " --> " << fNY << "\n"; + } + + // only needs and — no cmath + model.AddNeededStdLib("algorithm"); + model.AddNeededStdLib("limits"); + } + + + // ----------------------------------------------------------------------- + // Generate + // ----------------------------------------------------------------------- + std::string Generate(std::string OpName) override + { + OpName = "op_" + OpName; + + if (fShape.empty() && fDimShape.empty()) + throw std::runtime_error( + "SOFIE Operator Clip called to Generate without being initialized first"); + + std::stringstream out; + out << SP << "\n//------ CLIP " << OpName << "\n"; + + // ---- build the length expression (static or dynamic) ------------- + std::string length = ConvertDimShapeToLength(fDimShape); + + // ---- build min/max expressions for the generated code ------------ + // + // Priority: + // 1. compile-time constant value → emit literal + // 2. runtime input tensor → emit tensor_[0] (scalar) + // 3. not provided → emit numeric_limits extreme + // + std::string minExpr, maxExpr; + + if (fMinIsConstant) { + minExpr = ToStringHighPrec(fMin); + } else if (fHasMin) { + minExpr = "tensor_" + fNMin + "[0]"; // scalar input tensor + } else { + // No lower bound — use lowest representable value + minExpr = "std::numeric_limits<" + TensorType::Name() + + ">::lowest()"; + } + + if (fMaxIsConstant) { + maxExpr = ToStringHighPrec(fMax); + } else if (fHasMax) { + maxExpr = "tensor_" + fNMax + "[0]"; + } else { + // No upper bound — use max representable value + maxExpr = "std::numeric_limits<" + TensorType::Name() + + ">::max()"; + } + + auto tensorValue = [](const std::string & name, const std::string & index) { + std::stringstream s; + s << "tensor_" << name << "[" << index << "]"; + return s.str(); + }; + + // ---- flat element loop (identical structure to Selu) ------------- + out << SP << "for (int id = 0; id < " << length << " ; id++) {\n"; + std::string firstExpr = fHasMax ? "std::min(" + maxExpr + ", " + tensorValue(fNX, "id") + ")" : tensorValue(fNX, "id"); + std::string secondExpr = fHasMin ? "std::max(" + minExpr + ", " + firstExpr + ")" : firstExpr; + out << SP << SP << tensorValue(fNY, "id") << " = " << secondExpr << ";\n"; + out << SP << "}\n"; + + return out.str(); + } + + +private: + + // Helper: convert a T value to string with enough precision + std::string ToStringHighPrec(T val) const { + std::ostringstream ss; + ss << std::setprecision(std::numeric_limits::max_digits10) << val; + // add dot if missing + if (ss.str().find(".") == std::string::npos) ss << "."; + // append 'f' suffix for float literals so generated code compiles + // cleanly without implicit double→float conversion warnings + if (std::is_same::value) ss << "f"; + return ss.str(); + } +}; + +} // namespace SOFIE + +#endif // SOFIE_ROPERATOR_CLIP \ No newline at end of file diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_LayerNormalization.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_LayerNormalization.hxx index 12ea5b7..ca3ae74 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_LayerNormalization.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_LayerNormalization.hxx @@ -316,6 +316,249 @@ public: return out.str(); } + std::string Generate_GPU_Kernel_ALPAKA(std::string opName) override { + opName = "op_" + opName; + if (fShapeX.empty()) + throw std::runtime_error("TMVA::SOFIE LayerNormalization called to Generate without being initialized first"); + + // Each thread handles one "row" — one element of the axes dims [0..axis) + // and iterates over all normalized dims [axis..size) + // axesLength = product of fShapeX[0..axis) + // normalizedLength = product of fShapeX[axis..size) + // totalElements = axesLength (one thread per row) + + std::vector inputShape(fSize); + for (size_t i = 0; i < fSize; i++) + inputShape[i] = fShapeX[i].GetVal(); + + auto strides = UTILITY::ComputeStrideFromShape(fShapeX); + auto scaleStrides = UTILITY::ComputeStrideFromShape(fShapeScale); + auto biasStrides = (!fNB.empty()) ? UTILITY::ComputeStrideFromShape(fShapeB) + : std::vector{}; + auto axesStrides = UTILITY::ComputeStrideFromShape(fAxesShape); + + // Build index expressions reusing the same logic as Generate() + // input index: axis_0*stride0 + axis_1*stride1 + ... + norm_0*stride_axis + ... + // For the kernel we decompose the linear thread index into axis coords, + // then loop over normalized dims inside the kernel. + + std::string kname = "LayerNormKernel_" + opName; + std::string op; + op = "\n//------ LAYERNORM_KERNEL_ALPAKA\n"; + op += SP + "struct " + kname + " {\n"; + op += SP + SP + "template\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(\n"; + op += SP + SP + SP + "TAcc const& acc,\n"; + op += SP + SP + SP + "T const* __restrict__ X,\n"; + op += SP + SP + SP + "T const* __restrict__ scale,\n"; + if (!fNB.empty()) + op += SP + SP + SP + "T const* __restrict__ bias,\n"; + if (!fNMean.empty()) + op += SP + SP + SP + "T* __restrict__ out_mean,\n"; + if (!fNInvStdDev.empty()) + op += SP + SP + SP + "T* __restrict__ out_invstd,\n"; + op += SP + SP + SP + "T* __restrict__ Y,\n"; + op += SP + SP + SP + "std::size_t const axesLength) const {\n\n"; + + op += SP + SP + SP + "auto const global_thread_idx = alpaka::getIdx(acc)[0];\n"; + op += SP + SP + SP + "if (global_thread_idx >= axesLength) return;\n"; + op += SP + SP + SP + "auto const grid_thread_extent = alpaka::getWorkDiv(acc)[0];\n\n"; + + op += SP + SP + SP + "for (std::size_t row = global_thread_idx; row < axesLength; row += grid_thread_extent) {\n\n"; + + // Decompose row into per-axes-dim coords using compile-time strides + if (fAxis > 0) { + for (size_t i = 0; i < fAxis; ++i) { + op += SP + SP + SP + SP + "std::size_t const axis_" + std::to_string(i) + + " = (row / " + axesStrides[i].GetVal() + "u) % " + + inputShape[i] + "u;\n"; + } + op += "\n"; + } + + // Base input offset for this row (contribution from axes dims only) + op += SP + SP + SP + SP + "std::size_t const row_base =\n"; + if (fAxis == 0) { + op += SP + SP + SP + SP + SP + "0u;\n\n"; + } else { + for (size_t i = 0; i < fAxis; ++i) { + op += SP + SP + SP + SP + SP + "axis_" + std::to_string(i) + + " * " + strides[i].GetVal() + "u"; + op += (i + 1 < fAxis) ? " +\n" : ";\n\n"; + } + } + + // Scale index base (from axes dims) + op += SP + SP + SP + SP + "std::size_t const scale_base =\n"; + { + bool any = false; + for (size_t i = 0; i < fAxis; ++i) { + if (fShapeScale[i].dim != 1) { + op += SP + SP + SP + SP + SP + "axis_" + std::to_string(i) + + " * " + scaleStrides[i].GetVal() + "u"; + if (any) op = " +\n" + op; + any = true; + } + } + if (!any) op += SP + SP + SP + SP + SP + "0u"; + op += ";\n\n"; + } + + if (!fNB.empty()) { + op += SP + SP + SP + SP + "std::size_t const bias_base =\n"; + bool any = false; + for (size_t i = 0; i < fAxis; ++i) { + if (fShapeB[i].dim != 1) { + op += SP + SP + SP + SP + SP + "axis_" + std::to_string(i) + + " * " + biasStrides[i].GetVal() + "u"; + if (any) op = " +\n" + op; + any = true; + } + } + if (!any) op += SP + SP + SP + SP + SP + "0u"; + op += ";\n\n"; + } + + // ---- Pass 1: compute mean ---- + op += SP + SP + SP + SP + "T mean = static_cast(0);\n"; + + // Unroll normalized dims loop for mean + for (size_t j = fAxis; j < fSize; ++j) + op += SP + SP + SP + SP + "for (std::size_t n_" + std::to_string(j) + + " = 0; n_" + std::to_string(j) + " < " + inputShape[j] + + "u; n_" + std::to_string(j) + "++) {\n"; + + // Normalized dim index + op += SP + SP + SP + SP + SP + "std::size_t const norm_idx = row_base"; + for (size_t j = fAxis; j < fSize; ++j) + op += " + n_" + std::to_string(j) + " * " + strides[j].GetVal() + "u"; + op += ";\n"; + op += SP + SP + SP + SP + SP + "mean += X[norm_idx];\n"; + + for (size_t j = fAxis; j < fSize; ++j) + op += SP + SP + SP + SP + "}\n"; + + op += SP + SP + SP + SP + "mean /= static_cast(" + fNormalizedLength + ");\n\n"; + + // ---- Pass 2: compute variance ---- + op += SP + SP + SP + SP + "T sum = static_cast(0);\n"; + + for (size_t j = fAxis; j < fSize; ++j) + op += SP + SP + SP + SP + "for (std::size_t n_" + std::to_string(j) + + " = 0; n_" + std::to_string(j) + " < " + inputShape[j] + + "u; n_" + std::to_string(j) + "++) {\n"; + + op += SP + SP + SP + SP + SP + "std::size_t const norm_idx = row_base"; + for (size_t j = fAxis; j < fSize; ++j) + op += " + n_" + std::to_string(j) + " * " + strides[j].GetVal() + "u"; + op += ";\n"; + op += SP + SP + SP + SP + SP + "T tmp = X[norm_idx] - mean;\n"; + op += SP + SP + SP + SP + SP + "sum += tmp * tmp;\n"; + + for (size_t j = fAxis; j < fSize; ++j) + op += SP + SP + SP + SP + "}\n"; + + op += SP + SP + SP + SP + "T const invStdDev = static_cast(1) / " + "alpaka::math::sqrt(acc, sum / static_cast(" + + fNormalizedLength + ") + static_cast(" + std::to_string(fAttrEpsilon) + "));\n\n"; + + // Save mean and invstd if requested + if (!fNMean.empty()) + op += SP + SP + SP + SP + "out_mean[row] = mean;\n"; + if (!fNInvStdDev.empty()) + op += SP + SP + SP + SP + "out_invstd[row] = invStdDev;\n"; + op += "\n"; + + // ---- Pass 3: normalize, scale, bias ---- + for (size_t j = fAxis; j < fSize; ++j) + op += SP + SP + SP + SP + "for (std::size_t n_" + std::to_string(j) + + " = 0; n_" + std::to_string(j) + " < " + inputShape[j] + + "u; n_" + std::to_string(j) + "++) {\n"; + + op += SP + SP + SP + SP + SP + "std::size_t const norm_idx = row_base"; + for (size_t j = fAxis; j < fSize; ++j) + op += " + n_" + std::to_string(j) + " * " + strides[j].GetVal() + "u"; + op += ";\n"; + + // Scale index (normalized dims contribution) + op += SP + SP + SP + SP + SP + "std::size_t const s_idx = scale_base"; + for (size_t j = fAxis; j < fSize; ++j) { + if (fShapeScale[j].dim != 1) + op += " + n_" + std::to_string(j) + " * " + scaleStrides[j].GetVal() + "u"; + } + op += ";\n"; + + op += SP + SP + SP + SP + SP + "T val = scale[s_idx] * invStdDev * (X[norm_idx] - mean);\n"; + + if (!fNB.empty()) { + op += SP + SP + SP + SP + SP + "std::size_t const b_idx = bias_base"; + for (size_t j = fAxis; j < fSize; ++j) { + if (fShapeB[j].dim != 1) + op += " + n_" + std::to_string(j) + " * " + biasStrides[j].GetVal() + "u"; + } + op += ";\n"; + op += SP + SP + SP + SP + SP + "val += bias[b_idx];\n"; + } + + op += SP + SP + SP + SP + SP + "Y[norm_idx] = val;\n"; + + for (size_t j = fAxis; j < fSize; ++j) + op += SP + SP + SP + SP + "}\n"; + + op += SP + SP + SP + "}\n"; // end row loop + op += SP + SP + "}\n"; // end operator() + op += SP + "};\n"; // end struct + + return op; + } + + std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string opName) override { + opName = "op_" + opName; + std::string kname = "LayerNormKernel_" + opName; + return SP + kname + " layerNormKernel_" + opName + ";\n"; + } + + std::string Generate_GPU_ALPAKA(std::string opName) override { + opName = "op_" + opName; + if (fShapeX.empty()) + throw std::runtime_error("TMVA::SOFIE LayerNormalization called to Generate without being initialized first"); + + // One thread per row (per axes element) + // axesLength is known at generation time for static shapes + std::string axesLengthStr = fAxesLength; + // For static models fAxesLength is a number string; use it directly + // For dynamic models it may be a param expression — still valid in generated code + + std::string kname = "layerNormKernel_" + opName; + + std::stringstream out; + out << "\n//------ LAYERNORM_GPU_ALPAKA\n"; + out << SP << "auto const elementsPerThread_" << opName << " = Vec::all(static_cast(1));\n"; + out << SP << "auto const elementsPerGrid_" << opName << " = Vec::all(Idx{" << axesLengthStr << "});\n"; + out << SP << "alpaka::KernelCfg const kernelCfg_" << opName + << " = {elementsPerGrid_" << opName << ", elementsPerThread_" << opName << "};\n"; + + // Build argument list + std::string args = + "alpaka::getPtrNative(deviceBuf_" + fNX + "), " + + "alpaka::getPtrNative(deviceBuf_" + fNScale + ")"; + if (!fNB.empty()) + args += ", alpaka::getPtrNative(deviceBuf_" + fNB + ")"; + if (!fNMean.empty()) + args += ", alpaka::getPtrNative(deviceBuf_" + fNMean + ")"; + if (!fNInvStdDev.empty()) + args += ", alpaka::getPtrNative(deviceBuf_" + fNInvStdDev + ")"; + args += ", alpaka::getPtrNative(deviceBuf_" + fNY + ")"; + args += ", static_cast(" + axesLengthStr + ")"; + + out << SP << "auto const workDiv_" << opName << " = alpaka::getValidWorkDiv(kernelCfg_" << opName + << ", devAcc, " << kname << ", " << args << ");\n"; + out << SP << "alpaka::exec(queue, workDiv_" << opName + << ", " << kname << ", " << args << ");\n"; + + return out.str(); + } + std::vector GetBlasRoutines() override { return { std::string("Axpy") }; } std::vector GetStdLibs() override { return { std::string("cmath") }; } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx index fcc3cd6..cea6a5b 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx @@ -101,9 +101,10 @@ public: out << SP << "alpaka::KernelCfg const kernelCfg_" << fNY << " = {elementsPerGrid_" << fNY << ", elementsPerThread_" << fNY << "};\n"; out << SP << "auto const workDiv_" << fNY << " = alpaka::getValidWorkDiv(kernelCfg_" << fNY << ", devAcc, reluKernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), static_cast(" << length << "));\n"; - out << SP << "alpaka::exec(queue, workDiv_" << fNY + out << SP << "auto task_" << OpName << " = alpaka::createTaskKernel(workDiv_" << fNY << ", reluKernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), static_cast(" << length << "));\n"; + out << SP << "alpaka::enqueue(queue, task_" << OpName << ");\n"; return out.str(); } @@ -111,6 +112,11 @@ public: return fNY; } + bool IsElementwise() const override { return true; } + std::string GetElementwiseExpr(const std::string& v) const override { + return "(" + v + ") >= T(0) ? (" + v + ") : T(0)"; + } + void UpdateFusableTensorName(std::string fusable_tensor_name, const std::function& removal_func){ removal_func(fNX); removal_func(fNY); diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx index 755e788..8f7f001 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx @@ -107,6 +107,11 @@ public: return fNY; } + bool IsElementwise() const override { return true; } + std::string GetElementwiseExpr(const std::string& v) const override { + return "static_cast(1) / (static_cast(1) + exp(-(" + v + ")))"; + } + void UpdateFusableTensorName(std::string fusable_tensor_name, const std::function& removal_func){ removal_func(fNX); removal_func(fNY); diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Tanh.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Tanh.hxx index 37c92ee..0edce9b 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Tanh.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Tanh.hxx @@ -24,6 +24,7 @@ public: ROperator_Tanh(){} ROperator_Tanh(std::string nameX, std::string nameY): fNX(UTILITY::Clean_name(nameX)), fNY(UTILITY::Clean_name(nameY)){ + fKind = OperatorKind::TANH; fInputTensorNames = { fNX }; fOutputTensorNames = { fNY }; } @@ -63,6 +64,48 @@ public: } std::vector GetStdLibs() override { return { std::string("cmath") };} + + bool IsElementwise() const override { return true; } + std::string GetElementwiseExpr(const std::string& v) const override { + return "tanh(" + v + ")"; + } + + std::string Generate_GPU_Kernel_ALPAKA(std::string /*opName*/) override { + std::string op; + op = "\n//------ TANH_KERNEL_ALPAKA\n"; + op += "struct TanhKernel {\n"; + op += SP + "template\n"; + op += SP + "ALPAKA_FN_ACC void operator()(TAcc const& acc, T const* __restrict__ data, T* __restrict__ out, std::size_t numElements) const {\n"; + op += SP + SP + "const auto idx = alpaka::getIdx(acc)[0];\n"; + op += SP + SP + "if (idx < numElements) { out[idx] = tanh(data[idx]); }\n"; + op += SP + "}\n"; + op += "};\n"; + return op; + } + + std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string /*opName*/) override { + return SP + "TanhKernel tanhKernel;\n"; + } + + std::string Generate_GPU_ALPAKA(std::string OpName) override { + OpName = "op_" + OpName; + if (fShape.empty()) { + throw std::runtime_error("TMVA SOFIE Tanh called to Generate_GPU_ALPAKA without being initialized"); + } + std::stringstream out; + size_t length = ConvertShapeToLength(fShape); + out << "\n//------ TANH_GPU_ALPAKA\n"; + out << SP << "auto const elementsPerThread_"<(1));\n"; + out << SP << "auto const elementsPerGrid_"< const kernelCfg_" << fNX << " = {elementsPerGrid_" << fNX << ", elementsPerThread_" << fNX << "};\n"; + out << SP << "auto const workDiv_" << fNX << " = alpaka::getValidWorkDiv(kernelCfg_" << fNX << ", devAcc, tanhKernel, alpaka::getPtrNative(deviceBuf_" << fNX + << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), static_cast(" << length << "));\n"; + out << SP << "auto task_" << OpName << " = alpaka::createTaskKernel(workDiv_" << fNX + << ", tanhKernel, alpaka::getPtrNative(deviceBuf_" << fNX + << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), static_cast(" << length << "));\n"; + out << SP << "alpaka::enqueue(queue, task_" << OpName << ");\n"; + return out.str(); + } }; }//SOFIE diff --git a/src/SOFIE_core/src/RModel_ALPAKA.cxx b/src/SOFIE_core/src/RModel_ALPAKA.cxx index 9db8cae..cc6306e 100644 --- a/src/SOFIE_core/src/RModel_ALPAKA.cxx +++ b/src/SOFIE_core/src/RModel_ALPAKA.cxx @@ -1,8 +1,10 @@ #include #include +#include #include #include #include +#include #ifdef SOFIE_SUPPORT_ROOT_BINARY #include "TFile.h" @@ -13,6 +15,88 @@ namespace SOFIE { +void RModel::ComputeEltwiseFusionGroups() { + fEltwiseFusionGroups.clear(); + fOpToFusionGroupIdx.clear(); + fFusionIntermediateTensors.clear(); + + // Build tensor -> consumer op indices map + std::unordered_map> tensorConsumers; + for (size_t i = 0; i < fOperators.size(); i++) { + for (const auto& name : fOperators[i]->GetOpInputTensors()) + tensorConsumers[std::string(name)].push_back(i); + } + + // Returns true if tensorName is safe to treat as a fusion intermediate: + // consumed by exactly one op AND not a model output. + auto isFuseSafe = [&](const std::string& tensorName) -> bool { + for (const auto& outName : fOutputTensorNames) + if (outName == tensorName) return false; + auto it = tensorConsumers.find(tensorName); + return it != tensorConsumers.end() && it->second.size() == 1; + }; + + std::vector opAssigned(fOperators.size(), false); + + for (size_t i = 0; i < fOperators.size(); i++) { + if (opAssigned[i]) continue; + opAssigned[i] = true; + + EltwiseFusionGroup group; + group.opIndices.push_back(i); + + auto firstInputs = fOperators[i]->GetOpInputTensors(); + group.inputTensor = firstInputs.empty() ? "" : std::string(firstInputs[0]); + + // Extend chain: only if CURRENT op is elementwise and its single output can be fused + size_t current = i; + while (fOperators[current]->IsElementwise()) { + auto curOutputs = fOperators[current]->GetOpOutputTensors(); + if (curOutputs.size() != 1) break; + std::string curOut = std::string(curOutputs[0]); + if (!isFuseSafe(curOut)) break; + + size_t nextIdx = tensorConsumers.find(curOut)->second[0]; + // Must be strictly the next op in sequence and itself elementwise with single input + if (nextIdx != current + 1) break; + if (opAssigned[nextIdx]) break; + if (!fOperators[nextIdx]->IsElementwise()) break; + auto nextInputs = fOperators[nextIdx]->GetOpInputTensors(); + if (nextInputs.size() != 1) break; + + opAssigned[nextIdx] = true; + group.opIndices.push_back(nextIdx); + current = nextIdx; + } + + // Output tensor is the last op's output + auto lastOutputs = fOperators[current]->GetOpOutputTensors(); + group.outputTensor = lastOutputs.empty() ? "" : std::string(lastOutputs[0]); + + // Element count from intermediate tensor info (all op outputs are intermediates) + if (!group.outputTensor.empty()) { + auto it = fIntermediateTensorInfos.find(group.outputTensor); + if (it != fIntermediateTensorInfos.end()) + group.numElements = ConvertShapeToLength(it->second.shape); + } + + size_t gIdx = fEltwiseFusionGroups.size(); + for (auto opIdx : group.opIndices) + fOpToFusionGroupIdx[opIdx] = gIdx; + + // Mark all-but-last outputs as fusion intermediates (skip allocation) + if (group.isFused()) { + for (size_t k = 0; k + 1 < group.opIndices.size(); k++) { + auto midOuts = fOperators[group.opIndices[k]]->GetOpOutputTensors(); + if (!midOuts.empty()) + fFusionIntermediateTensors.insert(std::string(midOuts[0])); + } + } + + fEltwiseFusionGroups.push_back(std::move(group)); + } +} + void RModel::GenerateInitializedTensorInfo_GPU_ALPAKA() { if (!fInitializedTensors.empty()){ fGC += "\n// initialized tensors for weights\n"; @@ -62,6 +146,8 @@ void RModel::GenerateGPU_ALPAKA_Buffers() { std::string tensor_declaration_block = ""; for (auto &i : fIntermediateTensorInfos) { + // Skip tensors that are purely intermediate within a fused kernel chain + if (fFusionIntermediateTensors.count(i.first)) continue; size_t length = ConvertShapeToLength(i.second.shape); @@ -252,10 +338,40 @@ void RModel::GenerateOutput_GPU_ALPAKA() { fGC += GenerateImplSignature_GPU_ALPAKA(); fGC += "){\n"; + std::set fusedGroupsLaunched; for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) { if (fVerbose) std::cout << "Generating code for operator .... " << op_idx << std::endl; - fGC += fOperators[op_idx]->Generate_GPU_ALPAKA(std::to_string(op_idx)); + + auto gIt = fOpToFusionGroupIdx.find(op_idx); + size_t gIdx = (gIt != fOpToFusionGroupIdx.end()) ? gIt->second : SIZE_MAX; + bool inFusedGroup = (gIdx != SIZE_MAX) && fEltwiseFusionGroups[gIdx].isFused(); + + if (inFusedGroup) { + // Only emit the fused kernel launch once, at the chain leader + if (fEltwiseFusionGroups[gIdx].opIndices[0] == op_idx && !fusedGroupsLaunched.count(gIdx)) { + const auto& grp = fEltwiseFusionGroups[gIdx]; + std::string sfx = grp.suffix(); + std::string kname = "fusedEltwiseKernel" + sfx; + fGC += "\n//------ FUSED_ELTWISE_GPU_ALPAKA" + sfx + "\n"; + fGC += SP + "{\n"; + fGC += SP + SP + "auto const elementsPerThread_fused" + sfx + " = Vec::all(static_cast(1));\n"; + fGC += SP + SP + "auto const elementsPerGrid_fused" + sfx + " = Vec::all(Idx{" + std::to_string(grp.numElements) + "});\n"; + fGC += SP + SP + "alpaka::KernelCfg const cfg_fused" + sfx + " = {elementsPerGrid_fused" + sfx + ", elementsPerThread_fused" + sfx + "};\n"; + fGC += SP + SP + "auto const workDiv_fused" + sfx + " = alpaka::getValidWorkDiv(cfg_fused" + sfx + ", devAcc, " + kname + + ", alpaka::getPtrNative(deviceBuf_" + grp.inputTensor + "), alpaka::getPtrNative(deviceBuf_" + grp.outputTensor + + "), static_cast(" + std::to_string(grp.numElements) + "));\n"; + fGC += SP + SP + "auto task_fused" + sfx + " = alpaka::createTaskKernel(workDiv_fused" + sfx + ", " + kname + + ", alpaka::getPtrNative(deviceBuf_" + grp.inputTensor + "), alpaka::getPtrNative(deviceBuf_" + grp.outputTensor + + "), static_cast(" + std::to_string(grp.numElements) + "));\n"; + fGC += SP + SP + "alpaka::enqueue(queue, task_fused" + sfx + ");\n"; + fGC += SP + "}\n"; + fusedGroupsLaunched.insert(gIdx); + } + // Chain followers: skip — their logic is inside the fused kernel + } else { + fGC += fOperators[op_idx]->Generate_GPU_ALPAKA(std::to_string(op_idx)); + } } fGC += "\n\n alpaka::wait(queue);\n"; fGC += "}\n\n"; @@ -347,8 +463,9 @@ void RModel::GenerateOutput_GPU_ALPAKA() { } void RModel::GenerateSessionCode_GPU_ALPAKA() { - + std::set registered_operators; + std::set fusedGroupsEmitted; // tracks which fusion groups have had their struct/decl emitted std::set single_initialized_operators = { SOFIE::OperatorKind::RELU, SOFIE::OperatorKind::SIGMOID, @@ -370,24 +487,54 @@ void RModel::GenerateSessionCode_GPU_ALPAKA() { bool OpNeedsBlas = false; - // single initiation operators must only be initialized only once and their count should be stored in the registered_operators set to avoid generating multiple kernels for the same operator kind + // Generate kernel struct declarations, accounting for elementwise fusion groups. + // For fused chains (≥2 elementwise ops), a single FusedEltwiseKernel is generated + // instead of individual kernel structs for the participating ops. fGC += "\n//--- ALPAKA Kernels\n"; for (size_t id = 0; id < fOperators.size(); id++) { if(fOperators[id]->GetKind() == OperatorKind::GEMM || fOperators[id]->GetKind() == OperatorKind::CONV) { OpNeedsBlas = true; } - if(single_initialized_operators.find(fOperators[id]->GetKind()) != single_initialized_operators.end()) { - if(registered_operators.find(fOperators[id]->GetKind()) == registered_operators.end()) { + + auto gIt = fOpToFusionGroupIdx.find(id); + size_t gIdx = (gIt != fOpToFusionGroupIdx.end()) ? gIt->second : SIZE_MAX; + bool inFusedGroup = (gIdx != SIZE_MAX) && fEltwiseFusionGroups[gIdx].isFused(); + + if (inFusedGroup) { + // Only emit the fused kernel struct once, at the chain leader + if (fEltwiseFusionGroups[gIdx].opIndices[0] == id && !fusedGroupsEmitted.count(gIdx)) { + const auto& grp = fEltwiseFusionGroups[gIdx]; + std::string sfx = grp.suffix(); + fGC += "\n//------ FUSED_ELTWISE_KERNEL" + sfx + "\n"; + fGC += "struct FusedEltwiseKernel" + sfx + " {\n"; + fGC += SP + "template\n"; + fGC += SP + "ALPAKA_FN_ACC void operator()(TAcc const& acc, T const* __restrict__ data, T* __restrict__ out, std::size_t n) const {\n"; + fGC += SP + SP + "const auto idx = alpaka::getIdx(acc)[0];\n"; + fGC += SP + SP + "if (idx < n) {\n"; + fGC += SP + SP + SP + "T v = data[idx];\n"; + for (size_t opIdx : grp.opIndices) + fGC += SP + SP + SP + "v = " + fOperators[opIdx]->GetElementwiseExpr("v") + ";\n"; + fGC += SP + SP + SP + "out[idx] = v;\n"; + fGC += SP + SP + "}\n"; + fGC += SP + "}\n"; + fGC += "};\n"; + fusedGroupsEmitted.insert(gIdx); + } + // Chain followers: skip (their logic is inside the fused kernel) + } else { + // Unfused op: generate individual kernel struct (with dedup for single_initialized_operators) + if (single_initialized_operators.find(fOperators[id]->GetKind()) != single_initialized_operators.end()) { + if (registered_operators.find(fOperators[id]->GetKind()) == registered_operators.end()) { if (fVerbose) - std::cout<<"Generating ALPAKA kernel for operator"<< toString(fOperators[id]->GetKind()) << std::endl; - + std::cout << "Generating ALPAKA kernel for operator " << toString(fOperators[id]->GetKind()) << std::endl; fGC += fOperators[id]->Generate_GPU_Kernel_ALPAKA(std::to_string(id)); registered_operators.insert(fOperators[id]->GetKind()); + } + } else { + if (fVerbose) + std::cout << "Generating ALPAKA kernel for operator " << toString(fOperators[id]->GetKind()) << std::endl; + fGC += fOperators[id]->Generate_GPU_Kernel_ALPAKA(std::to_string(id)); } - } else { - if (fVerbose) - std::cout<<"Generating ALPAKA kernel for operator"<< toString(fOperators[id]->GetKind()) << std::endl; - fGC += fOperators[id]->Generate_GPU_Kernel_ALPAKA(std::to_string(id)); } } @@ -493,23 +640,32 @@ void RModel::GenerateSessionCode_GPU_ALPAKA() { } registered_operators.clear(); + fusedGroupsEmitted.clear(); for (size_t id = 0; id < fOperators.size(); id++) { - - if(single_initialized_operators.find(fOperators[id]->GetKind()) != single_initialized_operators.end()) { - - if(registered_operators.find(fOperators[id]->GetKind()) == registered_operators.end()) { - + auto gIt = fOpToFusionGroupIdx.find(id); + size_t gIdx = (gIt != fOpToFusionGroupIdx.end()) ? gIt->second : SIZE_MAX; + bool inFusedGroup = (gIdx != SIZE_MAX) && fEltwiseFusionGroups[gIdx].isFused(); + + if (inFusedGroup) { + if (fEltwiseFusionGroups[gIdx].opIndices[0] == id && !fusedGroupsEmitted.count(gIdx)) { + std::string sfx = fEltwiseFusionGroups[gIdx].suffix(); + fGC += SP + "FusedEltwiseKernel" + sfx + " fusedEltwiseKernel" + sfx + ";\n"; + fusedGroupsEmitted.insert(gIdx); + } + } else { + if (single_initialized_operators.find(fOperators[id]->GetKind()) != single_initialized_operators.end()) { + if (registered_operators.find(fOperators[id]->GetKind()) == registered_operators.end()) { + if (fVerbose) + std::cout << "Declaring ALPAKA kernel for operator " << toString(fOperators[id]->GetKind()) << std::endl; + fGC += fOperators[id]->Generate_GPU_Kernel_Definitions_ALPAKA(std::to_string(id)); + registered_operators.insert(fOperators[id]->GetKind()); + } + } else { if (fVerbose) - std::cout<<"Declaring ALPAKA kernel for operator"<< toString(fOperators[id]->GetKind()) << std::endl; - + std::cout << "Declaring ALPAKA kernel for operator " << toString(fOperators[id]->GetKind()) << std::endl; fGC += fOperators[id]->Generate_GPU_Kernel_Definitions_ALPAKA(std::to_string(id)); - registered_operators.insert(fOperators[id]->GetKind()); } - } else { - if (fVerbose) - std::cout<<"Declaring ALPAKA kernel for operator"<< toString(fOperators[id]->GetKind()) << std::endl; - fGC += fOperators[id]->Generate_GPU_Kernel_Definitions_ALPAKA(std::to_string(id)); } } @@ -546,6 +702,7 @@ void RModel::GenerateGPU_ALPAKA(std::underlying_type_t options, int bat throw std::runtime_error("SOFIE GPU does not yet supports GNN Inference."); Initialize(batchSize, verbose); + ComputeEltwiseFusionGroups(); std::string hgname; if (!fIsSubGraph) { diff --git a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx index 9e696c6..532470f 100644 --- a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx +++ b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx @@ -118,6 +118,11 @@ #include "BatchNorm_FromONNX_GPU_ALPAKA.hxx" #include "BatchNormRelu_FromONNX_GPU_ALPAKA.hxx" +#include "LayerNorm_FromONNX_GPU_ALPAKA.hxx" +#include "LayerNormScaleBias_FromONNX_GPU_ALPAKA.hxx" +#include "LayerNorm3D_FromONNX_GPU_ALPAKA.hxx" + + #include #include #include @@ -2264,3 +2269,144 @@ TEST_F(SofieAlpakaTest, BatchNormalizationRelu) EXPECT_LE(std::abs(res_ptr[i] - expected), TOLERANCE) << "i=" << i; } } + +TEST_F(SofieAlpakaTest, LayerNorm) +{ + constexpr float TOLERANCE = 1e-4f; + std::vector input = {1.f, 2.f, 3.f, 4.f, + 5.f, 6.f, 7.f, 8.f}; + const std::size_t outputSize = input.size(); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{outputSize})); + + { + SOFIE_LayerNorm::Session session("LayerNorm_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + + // Row 0: mean=2.5, std=sqrt(1.25+1e-5) ≈ 1.118034 + // Row 1: mean=6.5, std=sqrt(1.25+1e-5) ≈ 1.118034 + // Y[0] = (1-2.5)/1.118034 ≈ -1.3416 + // Y[1] = (2-2.5)/1.118034 ≈ -0.4472 + // Y[2] = (3-2.5)/1.118034 ≈ 0.4472 + // Y[3] = (4-2.5)/1.118034 ≈ 1.3416 + float inv_std = 1.f / std::sqrt(1.25f + 1e-5f); + std::vector expected = { + (1.f - 2.5f) * inv_std, (2.f - 2.5f) * inv_std, + (3.f - 2.5f) * inv_std, (4.f - 2.5f) * inv_std, + (5.f - 6.5f) * inv_std, (6.f - 6.5f) * inv_std, + (7.f - 6.5f) * inv_std, (8.f - 6.5f) * inv_std + }; + ASSERT_EQ(outputSize, 8u); + for (size_t i = 0; i < outputSize; ++i) + EXPECT_LE(std::abs(res_ptr[i] - expected[i]), TOLERANCE) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, LayerNormScaleBias) +{ + constexpr float TOLERANCE = 1e-4f; + + std::vector input = {1.f, 2.f, 3.f, 4.f, + 5.f, 6.f, 7.f, 8.f}; + const std::size_t outputSize = input.size(); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{outputSize})); + + { + SOFIE_LayerNormScaleBias::Session session("LayerNormScaleBias_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + + float inv_std = 1.f / std::sqrt(1.25f + 1e-5f); + std::vector expected = { + 2.f * (1.f - 2.5f) * inv_std + 1.f, 2.f * (2.f - 2.5f) * inv_std + 1.f, + 2.f * (3.f - 2.5f) * inv_std + 1.f, 2.f * (4.f - 2.5f) * inv_std + 1.f, + 2.f * (5.f - 6.5f) * inv_std + 1.f, 2.f * (6.f - 6.5f) * inv_std + 1.f, + 2.f * (7.f - 6.5f) * inv_std + 1.f, 2.f * (8.f - 6.5f) * inv_std + 1.f + }; + ASSERT_EQ(outputSize, 8u); + for (size_t i = 0; i < outputSize; ++i) + EXPECT_LE(std::abs(res_ptr[i] - expected[i]), TOLERANCE) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, LayerNorm3D) +{ + constexpr float TOLERANCE = 1e-4f; + + std::vector input(24); + std::iota(input.begin(), input.end(), 0.f); // 0..23 + const std::size_t outputSize = input.size(); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{outputSize})); + + { + SOFIE_LayerNorm3D::Session session("LayerNorm3D_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + + auto compute_expected = [](std::vector row) { + float mean = 0.f; + for (float v : row) mean += v; + mean /= row.size(); + float var = 0.f; + for (float v : row) var += (v - mean) * (v - mean); + var /= row.size(); + float inv_std = 1.f / std::sqrt(var + 1e-5f); + std::vector out; + for (float v : row) out.push_back((v - mean) * inv_std); + return out; + }; + + std::vector row0(input.begin(), input.begin() + 12); + std::vector row1(input.begin() + 12, input.end()); + auto exp0 = compute_expected(row0); + auto exp1 = compute_expected(row1); + + ASSERT_EQ(outputSize, 24u); + for (size_t i = 0; i < 12; ++i) + EXPECT_LE(std::abs(res_ptr[i] - exp0[i]), TOLERANCE) << "row0 i=" << i; + for (size_t i = 0; i < 12; ++i) + EXPECT_LE(std::abs(res_ptr[12 + i] - exp1[i]), TOLERANCE) << "row1 i=" << i; +} diff --git a/src/SOFIE_core/test/input_models/LayerNorm.onnx b/src/SOFIE_core/test/input_models/LayerNorm.onnx new file mode 100644 index 0000000000000000000000000000000000000000..97142e7df1feeaa8166c22b88b244db9207086cd GIT binary patch literal 249 zcmdczElDjdv3kVFmB+;x!NppfoS2i!#gdenSS-XCDaGlNSeaVnmtT~t zBm`$B=44hSmSpDVX^C*LBvxb=OZczElDjdv0Bc=mCD5!!NppfoS2i!#gdenSS-XCDaGlNSeaVnmtT~t zBm`$B=44hSmSpDVX>oJ0Bvxb=OE4~AWYQAkVoxn7&dkZr6J2vu-FyKfBT$H|xFoSS zBfg}vAQh}ih#PKx>fF*6XeBmpHNu}L_wa4`r70sx%EIv4-| literal 0 HcmV?d00001 diff --git a/src/SOFIE_core/test/input_models/LayerNormScaleBias.onnx b/src/SOFIE_core/test/input_models/LayerNormScaleBias.onnx new file mode 100644 index 0000000000000000000000000000000000000000..99ea54060e8fe573450624e1289587d64868da8a GIT binary patch literal 258 zcmdczElDjdv3kwOmB+;x!NppfoS2i!#gdenSS-XCDaGlNSeaVnmtT~t zBm`$B=44hSmSpDVX^C*LBvxb=OZ;s`GR1_lNPWUM6vmIgZxB->z*jH85s g9uVT=;^ANv;^1Q9U;<*6B%oj+qnLdP$ z7`aS7nv5SX)OO6v7>ka_lS#%!dg5rleZt5Y=xGaLilH7OJqk<8MU%0^My@}d$)&XZ zLq>f6L;-cz8zaj?gc?-MrFs_9rQzx1fkV9oUvG!8STcSjR)}ZP&Q65#g;;(fS~#3F zx-F+IR(RBEWb^T4CcXIVOMU*+7K^hB*<(}jeD`PkA0|YbkR(oRE97G7d^VFedKM7L z;@;7=iw`*$B74@zrNWkQQ`mai(h3$TPo%&F)fQ}8%~fgCfH3MUh!LRGl|bZ;1Z~WNkj=K3N>$(dkMd#_8vrHobmOk_JeWbP){3K2EKH(?lAHbv8>_jKy9&P zGII!#J!_!Hy=U~RHjqg1yrCTbusCQ4BsLKpA7)C z0)%!9p%=*vm?8nSm8&t2tb^S-T6a?I&<%)FP~3I31pqWg3v5%+8%N95SOJ{V+0J~R z37dXnc`mg@P*o_sK$+h*#z1!o20F7lW1!b(5JpT}x?bH2UIUf_@pMbmo7oJv31*TR zGX8oE{<@5Rjr$HEvF<8GbXgI-5k+)a5qFIu?gkans}ym|intq5#4Ri0sZqo;t0Fha z-pF+EXv{i(Sr`9ox>T_7&?N~rL77c(Ha1(49C;}lZ%}QemUs2gy}|BgW27TKhcfYb zRmGUf#?oWLvB zCVQCa#cv(-#1z$*noKgMgew}thY^0t5Y%BM`AOL7Y$fE{ z(YC{Y&}i(OOgWdLu5>1ojUI{@CZa|vTR0qzO&NKzv7y3O4j4vuES{ndP_!(&N`Nj2 zfHLc91;8N>tH{HE9#q>#ld)7bnu@2{Sw!3eLdkd*t)Wxk@F7N2+gSV{!v(G!w$1T_ z2ya%nsdkbEPh;}V(Q7xW{)KPW?^-H$tBw}I^78rkcpSp2I6ih(JY>h=%>H;GU+L!F zFC@~dC>4X_*#2BBJK_9+RS9=khN@2Ek!kC$_s*J6zG)2pd}D9vVM2T-f)C2KK*FOUbL3Fho22y`gDEq@e96^^1FYT-LCB=$4eun&2OIzef;sh z(y~+8!MERkE#x}(P^e?kx6PG*y03KU-4D&p+m4pjZaz5J^hsv$0h?{`(i^9WO&88= z*tg=Lf!A6d4RvjMBXshfErUmoy=*>i`Joy4{*J*%OS?+9{-oV>PAV^20GWnPw28G4 z6f>jb1P|p)Je7rnNmA7~9PYu?IIH%L#R{-X03w-+!;!_4}`cE=_yP)%X9m zIQ`3k;)N%Un0Ku23|)M9wRsu+wRq`?Ul&jRV`=di+86rmxz~#ej7jt8ktLxQkM0?G z@fYWBII?k9sQAur20A_+F`GV!2TyfQg&tfUG2eamtqu0rH;dD*&J9zpHD>ni!oYJc zJyd*h<6nvwSDqMnDs-Rem})UkzW>p{z2E6K*B;Z%WzqTO*z+$JUB1ZyNBdUur%GR_ z<5v$41m3zcbaCAa8|<6ERXqAwpSjO@Zo}pC%ZnE_eIvB%(y)2*)n4;>@yx*Ak3UwN z0VO%;07|&-`ZE7He?)z0#+P9PqoyzQKj)}IL8_Mr=CHBb#m2U{sxdS-r+^KXB>GYNH)$!Kd zE{`10Jg%S*SU+G#H+ zW?lCOJTkDJpytJv?^o;{twZz;pf9qrm#y2ver@bG%zm9~6dR}4y^(HCuds3cEA#be zk(HbuAFrM#(|dzudOnZ;@_917&s(PF{5d`UmFa{2T6#WD@K+U_%js*IV8s5ezk8!| z1?-a6A@-&u80p}|0-Qbq4nNA4(cxMM!n?tyBx^HT|J7O5uLa#c*{VLT*NuHT3f#$l zVf^RfH66lhS~R!cpGU>Dx+-H`8OqDgMp_Ge~3d%LJb*ZGaU03&vA^~J25UfD8`9+dvO#~Z% zuZ7J*!4DJOyJ`Q!B?;}o1C9Z-y6PQO2Hr%a;?Vr*AI~Jma6b=TN2y(R^_AO;v`5#s zH<7Jq0G5Rke{t++khk-$KB?W9rdb{FC5 z1s+M>gj6&v_MYKiAkD5LZEd<#Cne zK>y-%$nknPPp?m03EAF2Sq`)~K8GBi#<}|hZa#sV599U-w7ao42mKX-j#(PC%P_OQ zz$+kD6c8&4lt%@cK^NC8D6|YxE%EzfjKLYM8wHz&VvFN}Ms*vby7`43exZk7=n)Wl z1cV*|fmT4E6%gJWpw=Uu9MEz%d!y!(UBc=ZY!JX}7=SLJrRKtm-^bsjdL+Zx9A9gY z?JCAr6N}QMJqnw2Z(@u~@T--9X4>kwxj`{2A?#LsB*8PB2roiPP)M;saYKUp=aize zE{TK|yf-cH7t5T{(#vR-BaH^3W^ZB(e{l-mMzGKhH9Kzi2^f|>qmdFgNwSHR6>J#+R&qLH}P#4U`bp_{bBmDd14?%iRHxPBf052->FD1NQZYFDLnG_Fmo>b>m z;_D1atV~8w&y$Zm>Mpvs{8hsh#`U`6U{l(9s~W|nu#$jEy4+Wj8Q@{O5(PU2VIYr^ zky-|BNk%OLd2|K_p1KTHG7bC^gc#kj2u=~4kpk>MVQ^KEU`F__H{p<)-(?r!f+!6J z17}HJm7+uy@NqgCsFHz4DFs#o_eWjTYLwR^Nc1anan~Wi(Ds61nIO-VNxU$(QmDWr;Y5$v$a>A#az0IV70x+a>CaxtNW^@E`?aK1 zSa--#(Xy!@BYPRJT0jailsnF65XE>C(p{fmDQ%I~AH>VU{sD2kUWkNXz*@5M^2=+3 z=uTRUbtl7Kms*;Hq^;uAY)Jrr-oLEF!U9Iay~#R5;lvWw#*-|D^%4WJDe-^>Qjyg0 dt2~og5{pT*uZ9^M>!lWZHP7&n9W`}y{tvCJMDhRt literal 0 HcmV?d00001 From 21740c6f92cd2468531691ee27f040f86846596c Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Thu, 21 May 2026 08:39:24 +0200 Subject: [PATCH 42/43] fix: several fixes with operator initialization and cpu generation --- src/SOFIE_core/inc/SOFIE/RModel.hxx | 4 + src/SOFIE_core/inc/SOFIE/RModel_Base.hxx | 5 +- src/SOFIE_core/inc/SOFIE/RModel_GNN.hxx | 4 +- .../inc/SOFIE/RModel_GraphIndependent.hxx | 2 +- src/SOFIE_core/inc/SOFIE/ROperator.hxx | 2 +- .../inc/SOFIE/ROperator_BasicBinary.hxx | 93 +- .../inc/SOFIE/ROperator_BasicNary.hxx | 4 +- .../inc/SOFIE/ROperator_Basic_Is.hxx | 85 +- .../SOFIE/ROperator_BatchNormalization.hxx | 24 +- src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx | 115 +-- src/SOFIE_core/inc/SOFIE/ROperator_Clip.hxx | 121 +++ .../inc/SOFIE/ROperator_Comparision.hxx | 10 +- src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx | 207 +++-- .../inc/SOFIE/ROperator_Constant.hxx | 8 +- src/SOFIE_core/inc/SOFIE/ROperator_Conv.hxx | 607 +++++-------- .../inc/SOFIE/ROperator_ConvTranspose.hxx | 2 +- .../inc/SOFIE/ROperator_ConvTranspose.icc | 30 +- src/SOFIE_core/inc/SOFIE/ROperator_Custom.hxx | 4 +- src/SOFIE_core/inc/SOFIE/ROperator_Einsum.hxx | 10 +- src/SOFIE_core/inc/SOFIE/ROperator_Elu.hxx | 6 +- src/SOFIE_core/inc/SOFIE/ROperator_Expand.hxx | 10 +- .../inc/SOFIE/ROperator_EyeLike.hxx | 6 +- src/SOFIE_core/inc/SOFIE/ROperator_GRU.hxx | 2 +- src/SOFIE_core/inc/SOFIE/ROperator_GRU.icc | 34 +- src/SOFIE_core/inc/SOFIE/ROperator_Gather.hxx | 6 +- .../inc/SOFIE/ROperator_GatherND.hxx | 20 +- src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx | 231 +++-- .../inc/SOFIE/ROperator_Identity.hxx | 4 +- src/SOFIE_core/inc/SOFIE/ROperator_LSTM.hxx | 2 +- src/SOFIE_core/inc/SOFIE/ROperator_LSTM.icc | 42 +- .../inc/SOFIE/ROperator_LeakyRelu.hxx | 13 +- src/SOFIE_core/inc/SOFIE/ROperator_Not.hxx | 118 +++ src/SOFIE_core/inc/SOFIE/ROperator_Pad.hxx | 16 +- src/SOFIE_core/inc/SOFIE/ROperator_Pool.hxx | 18 +- src/SOFIE_core/inc/SOFIE/ROperator_RNN.hxx | 2 +- src/SOFIE_core/inc/SOFIE/ROperator_RNN.icc | 32 +- src/SOFIE_core/inc/SOFIE/ROperator_Random.hxx | 4 +- src/SOFIE_core/inc/SOFIE/ROperator_Range.hxx | 10 +- src/SOFIE_core/inc/SOFIE/ROperator_Reduce.hxx | 10 +- src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx | 6 +- .../inc/SOFIE/ROperator_Reshape.hxx | 128 +-- .../inc/SOFIE/ROperator_ScatterElements.hxx | 20 +- src/SOFIE_core/inc/SOFIE/ROperator_Selu.hxx | 4 +- src/SOFIE_core/inc/SOFIE/ROperator_Shape.hxx | 6 +- .../inc/SOFIE/ROperator_Sigmoid.hxx | 6 +- src/SOFIE_core/inc/SOFIE/ROperator_Slice.hxx | 431 +++++++-- .../inc/SOFIE/ROperator_Softmax.hxx | 255 +++--- src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx | 16 +- .../inc/SOFIE/ROperator_SubGraph.hxx | 4 +- src/SOFIE_core/inc/SOFIE/ROperator_Swish.hxx | 4 +- src/SOFIE_core/inc/SOFIE/ROperator_Tanh.hxx | 6 +- src/SOFIE_core/inc/SOFIE/ROperator_Tile.hxx | 16 +- src/SOFIE_core/inc/SOFIE/ROperator_TopK.hxx | 8 +- .../inc/SOFIE/ROperator_Transpose.hxx | 10 +- src/SOFIE_core/inc/SOFIE/ROperator_Where.hxx | 609 ++++++++----- src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx | 166 +++- src/SOFIE_core/src/RFunction_MLP.cxx | 2 +- src/SOFIE_core/src/RModel.cxx | 847 +++++++++++++++--- .../TestCustomModelsFromONNXForAlpakaCuda.cxx | 142 +++ src/SOFIE_core/test/input_models/Clip.onnx | Bin 0 -> 148 bytes src/SOFIE_core/test/input_models/IsInf.onnx | Bin 0 -> 89 bytes src/SOFIE_core/test/input_models/IsNaN.onnx | Bin 0 -> 89 bytes src/SOFIE_core/test/input_models/Not.onnx | Bin 0 -> 85 bytes .../input_models/simple_transformer.onnx.data | Bin 0 -> 203008 bytes src/SOFIE_parsers/CMakeLists.txt | 5 +- src/SOFIE_parsers/src/ParseBasicIs.cxx | 66 ++ src/SOFIE_parsers/src/ParseClip.cxx | 46 + src/SOFIE_parsers/src/ParseGRU.cxx | 2 +- src/SOFIE_parsers/src/ParseLSTM.cxx | 2 +- src/SOFIE_parsers/src/ParseNot.cxx | 38 + src/SOFIE_parsers/src/ParseRNN.cxx | 2 +- src/SOFIE_parsers/src/ParseWhere.cxx | 4 + src/SOFIE_parsers/src/RModelParser_ONNX.cxx | 12 +- 73 files changed, 3267 insertions(+), 1519 deletions(-) create mode 100644 src/SOFIE_core/inc/SOFIE/ROperator_Not.hxx create mode 100644 src/SOFIE_core/test/input_models/Clip.onnx create mode 100644 src/SOFIE_core/test/input_models/IsInf.onnx create mode 100644 src/SOFIE_core/test/input_models/IsNaN.onnx create mode 100644 src/SOFIE_core/test/input_models/Not.onnx create mode 100644 src/SOFIE_core/test/input_models/simple_transformer.onnx.data create mode 100644 src/SOFIE_parsers/src/ParseBasicIs.cxx create mode 100644 src/SOFIE_parsers/src/ParseClip.cxx create mode 100644 src/SOFIE_parsers/src/ParseNot.cxx diff --git a/src/SOFIE_core/inc/SOFIE/RModel.hxx b/src/SOFIE_core/inc/SOFIE/RModel.hxx index 766bf8b..96b1a8a 100644 --- a/src/SOFIE_core/inc/SOFIE/RModel.hxx +++ b/src/SOFIE_core/inc/SOFIE/RModel.hxx @@ -45,6 +45,8 @@ private: MemoryPoolInfo fIntermediateMemoryInfo; /// fIntermediateTensorFrequencyLookup; /// opIndices; ///< consecutive op indices forming this group @@ -135,6 +137,8 @@ public: void AddShapeTensor(const std::string & name, const std::vector & shapeValues, bool scalar = false); + void AddExtraCodeForDimShapes(const std::string & code) { fExtraCodeForDimShapes += code; } + // add and initialize subgraph to the model void InitializeSubGraph(std::shared_ptr graph); diff --git a/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx b/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx index c7f73eb..d4f9afa 100644 --- a/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx +++ b/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx @@ -130,9 +130,8 @@ public: } void AddNeededStdLib(std::string libname) { - if (fAllowedStdLib.find(libname) != fAllowedStdLib.end()) { - fNeededStdLib.insert(libname); - } + // if the library is already in the set, insert does nothing + fNeededStdLib.insert(libname); } void AddNeededCustomHeader(std::string filename) { diff --git a/src/SOFIE_core/inc/SOFIE/RModel_GNN.hxx b/src/SOFIE_core/inc/SOFIE/RModel_GNN.hxx index 558f82c..93bb092 100644 --- a/src/SOFIE_core/inc/SOFIE/RModel_GNN.hxx +++ b/src/SOFIE_core/inc/SOFIE/RModel_GNN.hxx @@ -66,7 +66,7 @@ struct GNN_Init { break; } default: { - throw std::runtime_error("TMVA SOFIE: Invalid Update function supplied for creating GNN function block."); + throw std::runtime_error("SOFIE: Invalid Update function supplied for creating GNN function block."); } } } @@ -88,7 +88,7 @@ struct GNN_Init { break; } default: { - throw std::runtime_error("TMVA SOFIE: Invalid Aggregate function supplied for creating GNN function block."); + throw std::runtime_error("SOFIE: Invalid Aggregate function supplied for creating GNN function block."); } } } diff --git a/src/SOFIE_core/inc/SOFIE/RModel_GraphIndependent.hxx b/src/SOFIE_core/inc/SOFIE/RModel_GraphIndependent.hxx index 407c645..dfade7f 100644 --- a/src/SOFIE_core/inc/SOFIE/RModel_GraphIndependent.hxx +++ b/src/SOFIE_core/inc/SOFIE/RModel_GraphIndependent.hxx @@ -49,7 +49,7 @@ struct GraphIndependent_Init { } default: { throw std::runtime_error( - "TMVA SOFIE: Invalid Update function supplied for creating GraphIndependent function block."); + "SOFIE: Invalid Update function supplied for creating GraphIndependent function block."); } } } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator.hxx b/src/SOFIE_core/inc/SOFIE/ROperator.hxx index eab393a..4b7741c 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator.hxx @@ -95,7 +95,7 @@ protected: const std::string SP = " "; ///< space used to correctly indent the generated C++ code bool fUseSession = false; ///< flag to identify if using the session class bool fIsOutputConstant = false; ///< flag to identify if operator has a constant output (no need to generate code) - bool fIsOutputParamShape = false; ///< flag to identify of the output represents a parametric shape (can be knwon at compile time) + bool fIsOutputParamShape = false; ///< flag to identify of the output represents a parametric shape (can be known at compile time) mutable std::vector fInputTensorNames; mutable std::vector fOutputTensorNames; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx index e7a04ed..5d15078 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx @@ -54,6 +54,18 @@ struct BinaryOperatorTrait { static std::string Op(const std::string &t1, const std::string t2) { return "std::pow(" + t1 + "," + t2 + ")"; } static T Func(T t1, T t2) { return std::pow(t1, t2); } }; +template +struct BinaryOperatorTrait { + static const std::string Name() { return "Mod"; } + static std::string Op(const std::string & t1, const std::string t2) { return "(" + t1 + " % " + t2 + ")"; } + static T Func(T t1, T t2) { return t1 % t2; } +}; +template +struct BinaryOperatorTrait { + static const std::string Name() { return "FMod"; } + static std::string Op(const std::string & t1, const std::string t2) { return "std::fmod(" + t1 + "," + t2 + ")"; } + static T Func(T t1, T t2) { return std::fmod(t1, t2); } +}; template class ROperator_BasicBinary final : public ROperator { @@ -97,14 +109,14 @@ public: { // input must be a graph input, or already initialized intermediate tensor if (!model.CheckIfTensorAlreadyExist(fNA)) { - throw std::runtime_error(std::string("TMVA SOFIE Binary Op Input Tensor ") + fNA + "is not found in model"); + throw std::runtime_error(std::string("SOFIE Binary Op Input Tensor ") + fNA + "is not found in model"); } if (!model.CheckIfTensorAlreadyExist(fNB)) { - throw std::runtime_error(std::string("TMVA SOFIE Binary Op Input Tensor ") + fNB + "is not found in model"); + throw std::runtime_error(std::string("SOFIE Binary Op Input Tensor ") + fNB + "is not found in model"); } int dynamicInputs = 0; if (model.IsDynamicTensor(fNA)) { - fDimShapeA = model.GetDimTensorShape(fNA); + fDimShapeA = model.GetDynamicTensorShape(fNA); dynamicInputs |= 1; } else { fShapeA = model.GetTensorShape(fNA); @@ -112,18 +124,18 @@ public: } if (model.IsDynamicTensor(fNB)) { dynamicInputs |= 2; - fDimShapeB = model.GetDimTensorShape(fNB); + fDimShapeB = model.GetDynamicTensorShape(fNB); } else { fShapeB = model.GetTensorShape(fNB); fDimShapeB = ConvertShapeToDim(fShapeB); } if (dynamicInputs & 1 && model.Verbose()) std::cout << BinaryOperatorTrait::Name() << " : input " << fNA << " is dynamic " - << ConvertDimShapeToString(fDimShapeA) << " "; + << ConvertDimShapeToString(fDimShapeA) << std::endl; if (dynamicInputs & 2 && model.Verbose()) std::cout << BinaryOperatorTrait::Name() << " : input " << fNB << " is dynamic " - << ConvertDimShapeToString(fDimShapeB) << " "; - std::cout << std::endl; + << ConvertDimShapeToString(fDimShapeB) << std::endl; + // check if need to broadcast at initialization time if shapes are known and different // (we could broadcast the tensor tensor to maximum values of dynamic shapes - to be done) // case of known shapes @@ -132,6 +144,7 @@ public: auto ret = UTILITY::MultidirectionalBroadcastShape(fShapeA, fShapeB); fBroadcastFlag = ret.first; fShapeY = ret.second; + auto lengthY = ConvertShapeToLength(fShapeY); if (model.IsConstantTensor(fNA) && model.IsConstantTensor(fNB)) { bool broadcast = fBroadcastFlag > 0; if (broadcast) { @@ -143,7 +156,7 @@ public: fNBroadcastedA = "Broadcasted" + fNA + "to" + fNY; auto data = model.GetInitializedTensorData(fNA); std::shared_ptr broadcastedData( - UTILITY::UnidirectionalBroadcast(static_cast(data.get()), fShapeA, fShapeY), + UTILITY::UnidirectionalBroadcast(static_cast(data.get()), fShapeA, fShapeY), std::default_delete()); if (model.Verbose()) std::cout << "broadcasted data A " << ConvertShapeToString(fShapeY) << " : " @@ -164,7 +177,7 @@ public: << ConvertValuesToString(ConvertShapeToLength(fShapeB), static_cast(data.get())) << std::endl; std::shared_ptr broadcastedData( - UTILITY::UnidirectionalBroadcast(static_cast(data.get()), fShapeB, fShapeY), + UTILITY::UnidirectionalBroadcast(static_cast(data.get()), fShapeB, fShapeY), std::default_delete()); // do not update tensor B but add broadcasted one (since it can be input to some other operators) if (model.Verbose()) @@ -185,12 +198,12 @@ public: const std::string &nameB = fNBroadcastedB.empty() ? fNB : fNBroadcastedB; auto dataA = static_cast(model.GetInitializedTensorData(nameA).get()); auto dataB = static_cast(model.GetInitializedTensorData(nameB).get()); - std::vector dataY(ConvertShapeToLength(fShapeY)); + std::vector dataY(lengthY); for (size_t i = 0; i < dataY.size(); i++) { dataY[i] = BinaryOperatorTrait::Func(dataA[i], dataB[i]); } model.AddConstantTensor(fNY, fShapeY, dataY.data()); - // flag tensors to not be written in the weight file + // flag tensors to not be written in the generated code or weight file model.SetNotWritableInitializedTensor(nameA); model.SetNotWritableInitializedTensor(nameB); fIsOutputConstant = true; @@ -199,6 +212,59 @@ public: << " , " << fNB << " " << ConvertShapeToString(fShapeB) << " ---> " << fNY << " " << ConvertShapeToString(fShapeY) << " : " << ConvertValuesToString(dataY) << std::endl; } + } else if (((model.IsShapeTensor(fNA) && model.IsShapeTensor(fNB)) || + (model.IsShapeTensor(fNA) && model.IsInitializedTensor(fNB)) || + (model.IsShapeTensor(fNB) && model.IsInitializedTensor(fNA))) + && (fShapeA.size() <=1 && fShapeB.size() <=1 && model.GetTensorType(fNA) == ETensorType::INT64)) { + // case of shape tensors ( tensors are of rank 0 or 1 ) + std::vector dimValA; + std::vector dimValB; + if (model.IsShapeTensor(fNA)) + dimValA = model.GetShapeTensorValues(fNA); + if (model.IsShapeTensor(fNB)) + dimValB = model.GetShapeTensorValues(fNB); + // adjust for broadcasting - repet values until it reaches shapes of Y + if (!fShapeY.empty() && fShapeY[0] > 1) { + if (dimValA.size() == 1) dimValA = std::vector( fShapeY[0], dimValA[0]); + if (dimValB.size() == 1) dimValB = std::vector( fShapeY[0], dimValB[0]); + } + + auto convertDataToDim = [&](const std::string & name, const std::vector & shape, std::vector & dimValues) { + auto data = static_cast(model.GetInitializedTensorData(name).get()); + dimValues.resize(lengthY); + for (size_t i = 0; i < lengthY; i++) { + if (!shape.empty() && lengthY == shape[0]) + dimValues[i] = Dim{ static_cast(data[i])}; + else // case dataA is a scalar + dimValues[i] = Dim{ static_cast(data[0])}; + } + }; + if (model.IsInitializedTensor(fNA)) { + convertDataToDim(fNA,fShapeA,dimValA); + } else if (model.IsInitializedTensor(fNB)) { + convertDataToDim(fNB,fShapeB,dimValB); + } + + //perform binary operations on shape tensors + std::vector dimValY(lengthY); + for (size_t i = 0; i < lengthY; i++) { + if (!dimValA[i].isParam && !dimValB[i].isParam) { + size_t d = BinaryOperatorTrait::Func(dimValA[i].dim, dimValB[i].dim); + dimValY[i] = Dim{d}; + } else { + auto res = BinaryOperatorTrait::Op(dimValA[i].GetVal(), dimValB[i].GetVal()); + dimValY[i] = Dim{res, static_cast(-1)}; + } + } + model.AddShapeTensor(fNY,dimValY, fShapeY.empty()); // cannot be a scalar + if (model.Verbose()) { + std::cout << BinaryOperatorTrait::Name() << " : " << fNA << " " << ConvertShapeToString(fShapeA) + << " , " << fNB << " " << ConvertShapeToString(fShapeB) << " ---> " << fNY << " " + << ConvertShapeToString(fShapeY) << " : " << ConvertDimShapeToString(dimValY) << " (shape)" << std::endl; + } + // no code needs to be generated (flag this as a constant output tensor) + fIsOutputConstant = true; + } else { // case of defined and non-constant tensors model.AddIntermediateTensor(fNY, model.GetTensorType(fNA), fShapeY); @@ -271,9 +337,6 @@ public: opName = "op_" + opName; - if (fDimShapeY.empty()) { - throw std::runtime_error("TMVA SOFIE Binary Op called to Generate without being initialized first"); - } std::stringstream out; out << SP << "\n//------ " << opName << " " << BinaryOperatorTrait::Name() << " --> " << ConvertDimShapeToString(fDimShapeY) << "\n"; @@ -474,7 +537,7 @@ public: return ""; if (fDimShapeY.empty()) { - throw std::runtime_error("TMVA SOFIE Operator Basic Binary called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Operator Basic Binary called to Generate without being initialized first"); } std::stringstream out; auto length = ConvertDimShapeToLength(fDimShapeY); diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_BasicNary.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_BasicNary.hxx index cbe0497..dad27da 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_BasicNary.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_BasicNary.hxx @@ -119,7 +119,7 @@ public: void Initialize(RModel& model) override { for (auto &it : fNInputs) { if (!model.CheckIfTensorAlreadyExist(it)) { - throw std::runtime_error("TMVA SOFIE BasicNary Op Input Tensor " + it + " is not found in model"); + throw std::runtime_error("SOFIE BasicNary Op Input Tensor " + it + " is not found in model"); } fShapeInputs.push_back(model.GetTensorShape(it)); } @@ -145,7 +145,7 @@ public: std::string Generate(std::string OpName) override { OpName = "op_" + OpName; if (fShapeY.empty()) { - throw std::runtime_error("TMVA SOFIE BasicNary called to Generate without being initialized first"); + throw std::runtime_error("SOFIE BasicNary called to Generate without being initialized first"); } std::stringstream out; size_t length = ConvertShapeToLength(fShapeY); diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Basic_Is.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Basic_Is.hxx index da2b5fb..1a224ac 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Basic_Is.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Basic_Is.hxx @@ -13,33 +13,32 @@ enum class EBasicIsOperator { kIsInf, kIsInfPos, kIsInfNeg, kIsNaN }; template struct IsOpTraits { }; + template<> struct IsOpTraits { static std::string Name() { return "IsInf"; } static std::string Op(const std::string &x) { return "std::isinf(" + x + ")"; } - static bool Impl(float x) { return std::isinf(x);} }; + template<> struct IsOpTraits { static std::string Name() { return "IsInfPos"; } - static std::string Op(const std::string &x) { return "(std::isinf(" + x + ") && " + x + "> 0)"; } - static bool Impl(float x) { return std::isinf(x) && x > 0;} + static std::string Op(const std::string &x) { return "(std::isinf(" + x + ") && " + x + " > 0)"; } }; + template<> struct IsOpTraits { static std::string Name() { return "IsInfNeg"; } - static std::string Op(const std::string &x) { return "(std::isinf(" + x + ") && " + x + "< 0)"; } - static bool Impl(float x) { return std::isinf(x) && x < 0;} + static std::string Op(const std::string &x) { return "(std::isinf(" + x + ") && " + x + " < 0)"; } }; + template<> struct IsOpTraits { - static std::string Name() { return "IsInf"; } + static std::string Name() { return "IsNaN"; } static std::string Op(const std::string &x) { return "std::isnan(" + x + ")"; } - static bool Impl(float x) { return std::isnan(x);} }; - template class ROperator_Basic_Is final : public ROperator { private: @@ -55,8 +54,8 @@ public: ROperator_Basic_Is(std::string nameX, std::string nameY) : fNX(UTILITY::Clean_name(nameX)), fNY(UTILITY::Clean_name(nameY)) { - fInputTensorNames = { fNX }; - fOutputTensorNames = { fNY }; + fInputTensorNames = { fNX }; + fOutputTensorNames = { fNY }; } void Initialize(RModel& model) override { @@ -73,7 +72,7 @@ public: opName = "op_" + opName; std::stringstream out; - out << SP << "\n//---- Operator" << IsOpTraits::Name() << " " << opName << "\n"; + out << SP << "\n//---- Operator " << IsOpTraits::Name() << " " << opName << "\n"; auto length = ConvertDimShapeToLength(fShapeX); out << SP << "for (size_t i = 0; i < " << length << "; i++) {\n"; out << SP << SP << "tensor_" << fNY << "[i] = " << IsOpTraits::Op("tensor_" + fNX + "[i]") << ";\n"; @@ -81,6 +80,68 @@ public: return out.str(); } + std::string Generate_GPU_Kernel_ALPAKA(std::string /*opName*/) override + { + if (fIsOutputConstant) + return ""; + + std::string op; + op = "\n//------ " + IsOpTraits::Name() + "_KERNEL_ALPAKA\n"; + op += SP + "struct Is" + IsOpTraits::Name() + "Kernel {\n"; + op += SP + SP + "template\n"; + // Output is uint8_t (bool storage), input is T (float/double). + op += SP + SP + "ALPAKA_FN_ACC void operator()(\n"; + op += SP + SP + SP + "TAcc const & acc,\n"; + op += SP + SP + SP + "T const * data,\n"; + op += SP + SP + SP + "uint8_t * output,\n"; + op += SP + SP + SP + "std::size_t const length) const\n"; + op += SP + SP + "{\n"; + op += SP + SP + SP + "auto idx = alpaka::getIdx(acc)[0];\n"; + op += SP + SP + SP + "if (idx < length) {\n"; + op += SP + SP + SP + SP + "output[idx] = static_cast(" + IsOpTraits::Op("data[idx]") + ");\n"; + op += SP + SP + SP + "}\n"; + op += SP + SP + "}\n"; + op += SP + "};\n"; + return op; + } + + std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string /*opName*/) override + { + return SP + "Is" + IsOpTraits::Name() + "Kernel " + IsOpTraits::Name() + "Kernel;\n"; + } + + std::string Generate_GPU_ALPAKA(std::string opName) override + { + opName = "op_" + opName; + std::stringstream out; + auto length = ConvertDimShapeToLength(fShapeX); + + out << "\n//------ " << opName << "_ALPAKA\n"; + out << SP << "auto const elementsPerThread_" << fNY << " = Vec::all(static_cast(1));\n"; + out << SP << "auto const elementsPerGrid_" << fNY << " = Vec::all(Idx{" << length << "});\n"; + out << SP << "alpaka::KernelCfg const kernelCfg_" << fNY + << " = {elementsPerGrid_" << fNY << ", elementsPerThread_" << fNY << "};\n"; + out << SP << "auto const workDiv_" << fNY + << " = alpaka::getValidWorkDiv(kernelCfg_" << fNY << ", devAcc, " + << IsOpTraits::Name() << "Kernel" + << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" + << ", " << length << ");\n"; + out << SP << "auto task_" << opName + << " = alpaka::createTaskKernel(workDiv_" << fNY + << ", " << IsOpTraits::Name() << "Kernel" + << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" + << ", " << length << ");\n"; + out << SP << "alpaka::enqueue(queue, task_" << opName << ");\n"; + return out.str(); + } + + bool IsElementwise() const override { return !fIsOutputConstant; } + std::string GetElementwiseExpr(const std::string& v) const override { + return IsOpTraits::Op(v); + } + std::vector GetStdLibs() override { return { std::string("cmath") }; } @@ -88,4 +149,4 @@ public: } // namespace SOFIE -#endif \ No newline at end of file +#endif // SOFIE_ROPERATOR_BASIC_IS diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_BatchNormalization.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_BatchNormalization.hxx index 0e5b98d..def1870 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_BatchNormalization.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_BatchNormalization.hxx @@ -59,7 +59,7 @@ public: } else{ throw - std::runtime_error("TMVA SOFIE Encountered unsupported type parsing a BatchNormalization operator"); + std::runtime_error("SOFIE Encountered unsupported type parsing a BatchNormalization operator"); } } @@ -72,12 +72,12 @@ public: std::vector> ShapeInference(std::vector> input) override { if (input.size() != 5 ) { throw - std::runtime_error("TMVA SOFIE BatchNormalization Op Shape inference need 5 input tensors"); + std::runtime_error("SOFIE BatchNormalization Op Shape inference need 5 input tensors"); } for(size_t i = 0; i < input.size(); i++) { if (input[i].size() != 4) { throw - std::runtime_error("TMVA SOFIE BatchNormalization Op Shape inference only accept tensor with 4 dimensions"); + std::runtime_error("SOFIE BatchNormalization Op Shape inference only accept tensor with 4 dimensions"); } } @@ -88,30 +88,30 @@ public: void Initialize(RModel& model) override { if (!model.CheckIfTensorAlreadyExist(fNX)) { throw - std::runtime_error("TMVA SOFIE BatchNormalization op Input Tensor " + fNX + " fnx is not found in model"); + std::runtime_error("SOFIE BatchNormalization op Input Tensor " + fNX + " fnx is not found in model"); } if (!model.CheckIfTensorAlreadyExist(fNScale)) { throw - std::runtime_error("TMVA SOFIE BatchNormalization op Input Tensor " + fNScale + " fns is not found in model"); + std::runtime_error("SOFIE BatchNormalization op Input Tensor " + fNScale + " fns is not found in model"); } if (!model.CheckIfTensorAlreadyExist(fNB)) { throw - std::runtime_error("TMVA SOFIE BatchNormalization op Input Tensor " + fNB + " fnb is not found in model"); + std::runtime_error("SOFIE BatchNormalization op Input Tensor " + fNB + " fnb is not found in model"); } if (!model.CheckIfTensorAlreadyExist(fNMean)) { throw - std::runtime_error("TMVA SOFIE BatchNormalization op Input Tensor " + fNMean + " fnm is not found in model"); + std::runtime_error("SOFIE BatchNormalization op Input Tensor " + fNMean + " fnm is not found in model"); } if (!model.CheckIfTensorAlreadyExist(fNVar)) { throw - std::runtime_error("TMVA SOFIE BatchNormalization op Input Tensor " + fNVar + " fnv is not found in model"); + std::runtime_error("SOFIE BatchNormalization op Input Tensor " + fNVar + " fnv is not found in model"); } fShapeX = model.GetTensorShape(fNX); if (fShapeX.size() < 2 || fShapeX.size() > 4) { throw - std::runtime_error("TMVA SOFIE BatchNormalization Op input tensor " + fNX + " fnx has wrong shape : " + ConvertShapeToString(fShapeX)); + std::runtime_error("SOFIE BatchNormalization Op input tensor " + fNX + " fnx has wrong shape : " + ConvertShapeToString(fShapeX)); } fShapeScale = model.GetTensorShape(fNScale); @@ -185,7 +185,7 @@ public: std::string Generate(std::string OpName) override { OpName = "op_" + OpName; if (fShapeX.empty()){ - throw std::runtime_error("TMVA SOFIE Batch Normalization called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Batch Normalization called to Generate without being initialized first"); } std::stringstream out; @@ -230,7 +230,7 @@ public: std::string Generate_GPU_Kernel_ALPAKA(std::string opName) override { opName = "op_" + opName; if (fShapeX.empty()) - throw std::runtime_error("TMVA SOFIE BatchNormalization called to Generate without being initialized first"); + throw std::runtime_error("SOFIE BatchNormalization called to Generate without being initialized first"); std::size_t totalElements = ConvertShapeToLength(fShapeY); @@ -277,7 +277,7 @@ public: std::string Generate_GPU_ALPAKA(std::string opName) override { opName = "op_" + opName; if (fShapeX.empty()) - throw std::runtime_error("TMVA SOFIE BatchNormalization called to Generate without being initialized first"); + throw std::runtime_error("SOFIE BatchNormalization called to Generate without being initialized first"); std::size_t totalElements = ConvertShapeToLength(fShapeY); std::string kname = "batchNormKernel_" + opName; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx index 39c8554..9b44e9f 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx @@ -26,15 +26,15 @@ private: std::string fNX; std::string fNY; - std::vector fShape; - std::string fAttrType = "float"; + std::vector fShape; + ETensorType fType; public: ROperator_Cast(){} - ROperator_Cast(std::string attr_type,std::string nameX, std::string nameY): - fNX(UTILITY::Clean_name(nameX)), fNY(UTILITY::Clean_name(nameY)), - fAttrType(attr_type) { - fKind = OperatorKind::CAST; + ROperator_Cast(ETensorType type,std::string nameX, std::string nameY): + fNX(UTILITY::Clean_name(nameX)), fNY(UTILITY::Clean_name(nameY)), + fType(type) + { fInputTensorNames = { fNX }; fOutputTensorNames = { fNY }; } @@ -51,105 +51,55 @@ public: void Initialize(RModel& model) override { //input must be a graph input, or already initialized intermediate tensor if (model.CheckIfTensorAlreadyExist(fNX) == false){ - throw std::runtime_error("TMVA SOFIE Cast Op Input Tensor is not found in model"); + throw std::runtime_error("SOFIE Cast Op Input Tensor is not found in model"); } - fShape = model.GetTensorShape(fNX); - // shoud we add a check if the same type + fShape = model.GetDimTensorShape(fNX); + // should we add a check if the same type auto inputType = model.GetTensorType(fNX); - const size_t n = ConvertShapeToLength(fShape); if (model.IsInitializedTensor(fNX)) { fIsOutputConstant = true; auto inputData = model.GetInitializedTensorData(fNX); - if (ConvertStringToType(fAttrType) == ETensorType::INT64) { - auto inputTypeStr = ConvertTypeToString(inputType); - if (inputTypeStr == "int32_t") { - auto* src = static_cast(inputData.get()); - auto converted = convertToInt64(src, n); - model.AddConstantTensor(fNY, fShape, converted); - } - else if (inputTypeStr == "float") { - auto* src = static_cast(inputData.get()); - auto converted = convertToInt64(src, n); - model.AddConstantTensor(fNY, fShape, converted); - } - else if (inputTypeStr == "double") { - auto* src = static_cast(inputData.get()); - auto converted = convertToInt64(src, n); - model.AddConstantTensor(fNY, fShape, converted); - } - else if (inputTypeStr == "int8_t") { - auto* src = static_cast(inputData.get()); - auto converted = convertToInt64(src, n); - model.AddConstantTensor(fNY, fShape, converted); - } - else if (inputTypeStr == "int16_t") { - auto* src = static_cast(inputData.get()); - auto converted = convertToInt64(src, n); - model.AddConstantTensor(fNY, fShape, converted); - } - else if (inputTypeStr == "uint8_t") { - auto* src = static_cast(inputData.get()); - auto converted = convertToInt64(src, n); - model.AddConstantTensor(fNY, fShape, converted); - } - else if (inputTypeStr == "uint16_t") { - auto* src = static_cast(inputData.get()); - auto converted = convertToInt64(src, n); - model.AddConstantTensor(fNY, fShape, converted); - } - else if (inputTypeStr == "uint32_t") { - auto* src = static_cast(inputData.get()); - auto converted = convertToInt64(src, n); - model.AddConstantTensor(fNY, fShape, converted); - } - else if (inputTypeStr == "uint64_t") { - auto* src = static_cast(inputData.get()); - auto converted = convertToInt64(src, n); - model.AddConstantTensor(fNY, fShape, converted); - } - else if (inputTypeStr == "int64_t") { - model.AddConstantTensor( - fNY, fShape, - static_cast(inputData.get()) - ); - } - else { - throw std::runtime_error("Unsupported input type for INT64 conversion"); - } - + if (fType == ETensorType::INT64) { + model.AddConstantTensor(fNY, ConvertShapeToInt(fShape), static_cast(inputData.get())); model.SetNotWritableInitializedTensor(fNX); } else fIsOutputConstant = false; + } else if (model.IsShapeTensor(fNX) && fType == ETensorType::INT64) { + auto shapeData = model.GetShapeTensorValues(fNX); + model.AddShapeTensor(fNY, shapeData, fShape.size() == 0); + fIsOutputConstant = true; } if (!fIsOutputConstant) - model.AddIntermediateTensor(fNY, ConvertStringToType(fAttrType), fShape); + model.AddIntermediateTensor(fNY, fType, fShape); if (model.Verbose()) { - std::cout << "Cast : " << ConvertTypeToString(inputType) << " " << fNX << " -> " << fAttrType << " for " << fNY; + std::cout << "Cast : " << ConvertTypeToString(inputType) << " " << fNX << " -> " << ConvertTypeToString(fType); + if (fType == ETensorType::BOOL) std::cout << " (converted from BOOL) "; + std::cout << " for " << fNY << " shape " << ConvertDimShapeToString(fShape); if (fIsOutputConstant) std::cout << " (constant) "; std::cout << std::endl; } } - std::string Generate(std::string OpName) override { - if (fIsOutputConstant) return ""; + std::string Generate(std::string opName) override { + + // output shape can be empty if is a scalar - OpName = "op_" + OpName; - if (fShape.empty()) { - throw std::runtime_error("TMVA SOFIE Cast called to Generate without being initialized first"); - } std::stringstream out; - size_t length = ConvertShapeToLength(fShape); + auto length = ConvertDimShapeToLength(fShape); - // out << SP << ETensorType << " " << OpName << "_attr = " << fattr << ";\n"; - out << "\n//------ CAST\n"; + out << "\n//------ CAST " << opName << " ---> " << fNY << " " << ConvertDimShapeToString(fShape) << "\n"; // no generated code for constant outputs if (fIsOutputConstant) return out.str(); out << SP << "for (int id = 0; id < " << length << " ; id++){\n"; - out << SP << SP << "tensor_" << fNY << "[id] = static_cast<"<< fAttrType << ">(tensor_" << fNX << "[id]);\n"; + // need to handle bool case separatly since casting to uint8 will not give right result + if (fType == ETensorType::BOOL) + out << SP << SP << "tensor_" << fNY << "[id] = (tensor_" << fNX << "[id] != 0) ? 1 : 0;\n"; + else + out << SP << SP << "tensor_" << fNY << "[id] = static_cast<"<< ConvertTypeToString(fType) << ">(tensor_" << fNX << "[id]);\n"; out << SP << "}\n"; return out.str(); @@ -178,7 +128,7 @@ public: if (fIsOutputConstant) return ""; OpName = "op_" + OpName; if (fShape.empty()) { - throw std::runtime_error("TMVA SOFIE Operator Cast called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Operator Cast called to Generate without being initialized first"); } std::stringstream out; @@ -193,6 +143,11 @@ public: out << SP << "alpaka::enqueue(queue, task_" << OpName << ");\n"; return out.str(); } + + bool IsElementwise() const override { return true; } + std::string GetElementwiseExpr(const std::string& v) const override { + return "static_cast<" + fAttrType + ">(" + v + ")"; + } }; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Clip.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Clip.hxx index 333b114..dce78da 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Clip.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Clip.hxx @@ -5,6 +5,7 @@ #include "ROperator.hxx" #include "RModel.hxx" +#include #include #include #include @@ -166,6 +167,126 @@ public: } + // ----------------------------------------------------------------------- + // GPU ALPAKA + // ----------------------------------------------------------------------- + + std::string Generate_GPU_Kernel_ALPAKA(std::string /*opName*/) override + { + std::string op; + op = "\n//------ CLIP_KERNEL_ALPAKA\n"; + op += "struct ClipKernel {\n"; + op += SP + "template\n"; + op += SP + "ALPAKA_FN_ACC void operator()(\n"; + op += SP + SP + "TAcc const & acc,\n"; + op += SP + SP + "T const * __restrict__ data,\n"; + op += SP + SP + "T * __restrict__ out,\n"; + op += SP + SP + "std::size_t numElements,\n"; + op += SP + SP + "T minVal,\n"; + op += SP + SP + "T maxVal) const\n"; + op += SP + "{\n"; + op += SP + SP + "auto idx = alpaka::getIdx(acc)[0];\n"; + op += SP + SP + "if (idx < numElements) {\n"; + op += SP + SP + SP + "T val = data[idx];\n"; + op += SP + SP + SP + "val = val < minVal ? minVal : val;\n"; + op += SP + SP + SP + "out[idx] = val > maxVal ? maxVal : val;\n"; + op += SP + SP + "}\n"; + op += SP + "}\n"; + op += "};\n"; + return op; + } + + std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string /*opName*/) override + { + return "ClipKernel clipKernel;\n"; + } + + std::string Generate_GPU_ALPAKA(std::string OpName) override + { + OpName = "op_" + OpName; + if (fShape.empty() && fDimShape.empty()) + throw std::runtime_error( + "SOFIE Operator Clip called to Generate_GPU_ALPAKA without being initialized first"); + + std::stringstream out; + out << "\n//------ CLIP_GPU_ALPAKA " << OpName << "\n"; + + std::string length = ConvertDimShapeToLength(fDimShape); + + std::string minExpr, maxExpr; + if (fMinIsConstant) { + minExpr = ToStringHighPrec(fMin); + } else if (fHasMin) { + throw std::runtime_error( + "SOFIE Clip GPU ALPAKA: runtime (non-constant) min bound is not supported in GPU path"); + } else { + minExpr = "std::numeric_limits<" + TensorType::Name() + ">::lowest()"; + } + + if (fMaxIsConstant) { + maxExpr = ToStringHighPrec(fMax); + } else if (fHasMax) { + throw std::runtime_error( + "SOFIE Clip GPU ALPAKA: runtime (non-constant) max bound is not supported in GPU path"); + } else { + maxExpr = "std::numeric_limits<" + TensorType::Name() + ">::max()"; + } + + std::string castMin = "static_cast<" + TensorType::Name() + ">(" + minExpr + ")"; + std::string castMax = "static_cast<" + TensorType::Name() + ">(" + maxExpr + ")"; + + out << SP << "auto const elementsPerThread_" << fNX << " = Vec::all(static_cast(1));\n"; + out << SP << "auto const elementsPerGrid_" << fNX << " = Vec::all(Idx{" << length << "});\n"; + out << SP << "alpaka::KernelCfg const kernelCfg_" << fNX + << " = {elementsPerGrid_" << fNX << ", elementsPerThread_" << fNX << "};\n"; + out << SP << "auto const workDiv_" << fNX + << " = alpaka::getValidWorkDiv(kernelCfg_" << fNX << ", devAcc, clipKernel" + << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" + << ", static_cast(" << length << ")" + << ", " << castMin << ", " << castMax << ");\n"; + out << SP << "auto task_" << OpName + << " = alpaka::createTaskKernel(workDiv_" << fNX << ", clipKernel" + << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" + << ", static_cast(" << length << ")" + << ", " << castMin << ", " << castMax << ");\n"; + out << SP << "alpaka::enqueue(queue, task_" << OpName << ");\n"; + return out.str(); + } + + bool IsElementwise() const override { return true; } + + std::string GetElementwiseExpr(const std::string& v) const override + { + std::string minExpr, maxExpr; + if (fMinIsConstant) minExpr = ToStringHighPrec(fMin); + else if (fHasMin) minExpr = "tensor_" + fNMin + "[0]"; + else minExpr = "std::numeric_limits<" + TensorType::Name() + ">::lowest()"; + + if (fMaxIsConstant) maxExpr = ToStringHighPrec(fMax); + else if (fHasMax) maxExpr = "tensor_" + fNMax + "[0]"; + else maxExpr = "std::numeric_limits<" + TensorType::Name() + ">::max()"; + + std::string expr = fHasMax || fMaxIsConstant ? "std::min(" + maxExpr + ", " + v + ")" : v; + if (fHasMin || fMinIsConstant) + expr = "std::max(" + minExpr + ", " + expr + ")"; + return expr; + } + + std::string GetFusableOutputTensorName() override { return fNY; } + + void UpdateFusableTensorName(std::string fusable_tensor_name, + const std::function& removal_func) override + { + removal_func(fNX); + removal_func(fNY); + fNX = fusable_tensor_name; + fNY = fusable_tensor_name; + fInputTensorNames[0] = fNX; + fOutputTensorNames[0] = fNY; + } + // ----------------------------------------------------------------------- // Generate // ----------------------------------------------------------------------- diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Comparision.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Comparision.hxx index 81b5ac6..e158499 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Comparision.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Comparision.hxx @@ -88,10 +88,10 @@ public: void Initialize(RModel& model) override { if (!model.CheckIfTensorAlreadyExist(fNX1)){ - throw std::runtime_error(std::string("TMVA SOFIE Comparision Op Input Tensor ") + fNX1 + "is not found in model"); + throw std::runtime_error(std::string("SOFIE Comparision Op Input Tensor ") + fNX1 + "is not found in model"); } if (!model.CheckIfTensorAlreadyExist(fNX2)) { - throw std::runtime_error(std::string("TMVA SOFIE Comparision Op Input Tensor ") + fNX2 + "is not found in model"); + throw std::runtime_error(std::string("SOFIE Comparision Op Input Tensor ") + fNX2 + "is not found in model"); } fShapeX1 = model.GetTensorShape(fNX1); fShapeX2 = model.GetTensorShape(fNX2); @@ -157,7 +157,7 @@ public: if (fIsOutputConstant) return ""; OpName = "op_" + OpName; if (fShapeY.empty()) { - throw std::runtime_error("TMVA SOFIE Comparision Op called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Comparision Op called to Generate without being initialized first"); } std::stringstream out; out << SP << "\n//------ " << ComparisionTrait::Name() << "\n"; @@ -194,7 +194,7 @@ public: if (fIsOutputConstant) return ""; opName = "op_" + opName; if (fShapeY.empty()) - throw std::runtime_error("TMVA SOFIE Comparision Op called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Comparision Op called to Generate without being initialized first"); const std::size_t D = fShapeY.size(); std::size_t totalElements = ConvertShapeToLength(fShapeY); @@ -284,7 +284,7 @@ public: if (fIsOutputConstant) return ""; opName = "op_" + opName; if (fShapeY.empty()) - throw std::runtime_error("TMVA SOFIE Comparision Op called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Comparision Op called to Generate without being initialized first"); std::size_t totalElements = ConvertShapeToLength(fShapeY); std::string kname = "comparisonKernel_" + opName; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx index d621381..eeefe80 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx @@ -22,8 +22,8 @@ std::vector fInputs; std::string fOutput; std::vectorfOutputShape; + std::vector fOutputShapeData; // in case output is a shape tensor we store here the output shape value data (can be parametric) std::vector> fInputShapes; - ETensorType fInputType; public: @@ -52,20 +52,20 @@ fAxis = inputs[0].size()+fAxis; } if (fAxis < 0 || fAxis >= (int) inputs[0].size()) - throw std::runtime_error("TMVA SOFIE Concat Op - invalid axis value "); + throw std::runtime_error("SOFIE Concat Op - invalid axis value "); int concat_dim=0; // case of Concat (fNewAxis = 0) and not ConcatFromSequence if(fnewAxis == 0){ for (size_t i = 0; i < inputs.size(); i++) { if (i > 0 && inputs[i].size() != inputs[i - 1].size()) - throw std::runtime_error("TMVA SOFIE Concat Op - input tensors have different shapes " + + throw std::runtime_error("SOFIE Concat Op - input tensors have different shapes " + ConvertShapeToString(inputs[i]) + " and " + ConvertShapeToString(inputs[i - 1])); for (size_t iaxis = 0; iaxis < inputs[i].size(); iaxis++) { if ((int)iaxis == fAxis) concat_dim += inputs[i][iaxis]; else if (i > 0 && inputs[i][iaxis] != inputs[i - 1][iaxis]) - throw std::runtime_error("TMVA SOFIE Concat Op - input tensors have wrong shapes " + + throw std::runtime_error("SOFIE Concat Op - input tensors have wrong shapes " + ConvertShapeToString(inputs[i]) + " and " + ConvertShapeToString(inputs[i - 1])); } @@ -80,14 +80,14 @@ if(fnewAxis == 1){ for(size_t i = 0; i < inputs.size(); i++) { if (i > 0 && inputs[i].size() != inputs[i-1].size() ) - throw std::runtime_error("TMVA SOFIE Concat Op - input tensors have different shapes " + fInputs[i] + " : " + + throw std::runtime_error("SOFIE Concat Op - input tensors have different shapes " + fInputs[i] + " : " + ConvertShapeToString(inputs[i]) + " and " + fInputs[i-1] + " : " + ConvertShapeToString(inputs[i-1])); for (size_t iaxis = 0; iaxis < inputs[i].size(); iaxis++) { if ((int) iaxis == fAxis) stack.push_back(inputs[i][iaxis]); else if (i> 0 && inputs[i][iaxis] != inputs[i-1][iaxis]) - throw std::runtime_error("TMVA SOFIE Concat Op - input tensors have wrong shapes " + + throw std::runtime_error("SOFIE Concat Op - input tensors have wrong shapes " + ConvertShapeToString(inputs[i]) + " and " + ConvertShapeToString(inputs[i-1])); } @@ -107,13 +107,13 @@ fAxis = inputs[0].size()+fAxis; } if (fAxis < 0 || fAxis >= (int) inputs[0].size()) - throw std::runtime_error("TMVA SOFIE Concat Op - invalid axis value "); + throw std::runtime_error("SOFIE Concat Op - invalid axis value "); Dim concat_dim; if(fnewAxis == 0){ for (size_t i = 0; i < inputs.size(); i++) { if (i > 0 && inputs[i].size() != inputs[i - 1].size()) - throw std::runtime_error("TMVA SOFIE Concat Op - input tensors have different shapes " + fInputs[i] + " : " + + throw std::runtime_error("SOFIE Concat Op - input tensors have different shapes " + fInputs[i] + " : " + ConvertDimShapeToString(inputs[i]) + " and " + fInputs[i-1] + " : " + ConvertDimShapeToString(inputs[i - 1])); for (size_t iaxis = 0; iaxis < inputs[i].size(); iaxis++) { if ((int)iaxis == fAxis) { @@ -132,7 +132,7 @@ ret[iaxis] = inputs[i][iaxis]; } else if ((!inputs[i][iaxis].isParam && !ret[iaxis].isParam) && (inputs[i][iaxis].dim != ret[iaxis].dim)) { - throw std::runtime_error("TMVA SOFIE Concat Op - input tensors have wrong shapes " + + throw std::runtime_error("SOFIE Concat Op - input tensors have wrong shapes " + ConvertDimShapeToString(inputs[i]) + " and " + ConvertDimShapeToString(inputs[i - 1])); } @@ -163,89 +163,131 @@ // for example for fAxis == 0 // output shapes: [inputs.size(), inputs[0][0], inputs[0][1],....] if(fnewAxis == 1){ - throw std::runtime_error("TMVA SOFIE Concat Op - stacking (i.e. COncatFromSequence with new_axis=1) is not supported "); + throw std::runtime_error("SOFIE Concat Op - stacking (i.e. COncatFromSequence with new_axis=1) is not supported "); } return ret; } void Initialize(RModel& model) override { + std::vector> inputIntShapes; for (auto &it : fInputs) { if (model.CheckIfTensorAlreadyExist(it) == false) { - throw std::runtime_error("TMVA SOFIE Concat Op Input Tensor " + it + " is not found in model"); + throw std::runtime_error("SOFIE Concat Op Input Tensor " + it + " is not found in model"); } fInputShapes.push_back(model.GetDimTensorShape(it)); + if (!model.IsDynamicTensor(it)) { + inputIntShapes.push_back(ConvertShapeToInt(fInputShapes.back())); + } + } + if (inputIntShapes.size() == fInputs.size()) { + // if all input shapes are static we can compute output shape at initialization time + auto outputIntShape = ShapeInference(inputIntShapes)[0]; + fOutputShape = ConvertShapeToDim(outputIntShape); + if (model.Verbose()) + std::cout << "Initialize Concat operator with defined inputs shapes, " + << "output has shape " << ConvertShapeToString(outputIntShape) << std::endl; + + } else { + // if at least one input shape is dynamic we need to compute output shape using the symbolic expression for the dimensions + fOutputShape = ShapeInference(fInputShapes, model); + if (model.Verbose()) + std::cout << "Initialize Concat operator with dynamic inputs shapes, " + << "output has shape " << ConvertDimShapeToString(fOutputShape) << std::endl; } - fOutputShape = ShapeInference(fInputShapes, model); - if (model.Verbose()) - std::cout << "Output of concat operator has shape " << ConvertDimShapeToString(fOutputShape) << std::endl; // check if concat has constant inputs , axis 0(concat contigous memory and type is integer) bool isOutputShape = false; - fInputType = model.GetTensorType(fInputs[0]); - if (model.GetTensorType(fInputs[0]) == ETensorType::INT64 && fAxis == 0) { - fIsOutputConstant = true; - isOutputShape = true; - for ( auto & input : fInputs) { - if (!model.IsInitializedTensor(input)) { - fIsOutputConstant = false; - if (!model.IsShapeTensor(input)) { - isOutputShape = false; - break; - } - } + // if (model.GetTensorType(fInputs[0]) == ETensorType::INT64 && fAxis == 0) { + fIsOutputConstant = true; + isOutputShape = true; + + for (auto &input : fInputs) { + if (model.IsDynamicTensor(input)) { + fIsOutputConstant = false; + isOutputShape = false; + break; } - if (fIsOutputConstant) { - auto outputShape = ConvertShapeToInt(fOutputShape); // conversion must be possible - std::vector outputData(ConvertShapeToLength(outputShape)); - size_t offset = 0; - for ( auto & input : fInputs) { - auto inputData = static_cast(model.GetInitializedTensorData(input).get()); - auto inputShape = model.GetTensorShape(input); // shape is not dynamic if it is constant - size_t inputLength = ConvertShapeToLength(inputShape); - std::copy(inputData, inputData + inputLength, outputData.begin() + offset ); - offset += inputLength; - // the data of the input tensor don't need to be written in the generated code and data file - model.SetNotWritableInitializedTensor(input); - } - model.AddConstantTensor(fOutput, outputShape, outputData.data()); - if (model.Verbose()) { - std::cout << "output of Concat is a constant tensor " << ConvertShapeToString(outputShape) << " : " - << ConvertValuesToString(outputData) << " (constant)" << std::endl; - } - } else if (isOutputShape) { - auto outputShape = ConvertShapeToInt(fOutputShape); // conversion must be possible - std::vector outputData(ConvertShapeToLength(outputShape)); - size_t offset = 0; - for ( auto & input : fInputs) { - std::vector inputData; - auto inputShape = model.GetTensorShape(input); // shape is not dynamic - size_t inputLength = ConvertShapeToLength(inputShape); // shape can be a scalar - if (model.IsShapeTensor(input)) { - inputData = model.GetShapeTensorValues(input); - } else if (model.IsInitializedTensor(input)) { - inputData.resize(inputLength); - auto intData = static_cast(model.GetInitializedTensorData(input).get()); - for (size_t i = 0; i < inputData.size(); i++) - inputData[i] = Dim{ static_cast(intData[i])}; - } - else { - // this should not happen - throw std::runtime_error("TMVA SOFIE Concat Operator- invalid input type for shape output type"); + if (!model.IsInitializedTensor(input)) { + if (model.IsShapeTensor(input)) { + // if it is a shape tensor we can have constant output if the shapes are defined) + auto shapeData = model.GetShapeTensorValues(input); + bool isShapeFullyDefined = ConvertShapeToInt(shapeData).size() == shapeData.size(); + if (!isShapeFullyDefined) { + fIsOutputConstant = false; + } else { + // if shape is fully defined we can consider output as constant and we can compute the output + // shape at initialization time + fIsOutputConstant = fIsOutputConstant && true; } - std::copy(inputData.begin(), inputData.end(), outputData.begin() + offset ); - offset += inputLength; + // inputs are then shape tensors and output is a shape tensor + isOutputShape = true; + } else { + // case of standard intermediate tensor + fIsOutputConstant = false; + isOutputShape = false; + break; } - // add output tensor - model.AddShapeTensor(fOutput,outputData, false); // cannot be a scalar - if (model.Verbose()) { - std::cout << "output of Concat is a shape tensor " << ConvertShapeToString(outputShape) << " : " - << ConvertDimShapeToString(outputData) << " (shape)" << std::endl; + } else { + fIsOutputConstant = fIsOutputConstant && true; + } + } + //} + + if (fIsOutputConstant) { + auto outputShape = ConvertShapeToInt(fOutputShape); // conversion must be possible + std::vector outputData(ConvertShapeToLength(outputShape)); + size_t offset = 0; + for (auto &input : fInputs) { + auto inputData = static_cast(model.GetInitializedTensorData(input).get()); + auto inputShape = model.GetTensorShape(input); // shape is not dynamic if it is constant + size_t inputLength = ConvertShapeToLength(inputShape); + std::copy(inputData, inputData + inputLength, outputData.begin() + offset); + offset += inputLength; + // the data of the input tensor don't need to be written in the generated code and data file + model.SetNotWritableInitializedTensor(input); + } + model.AddConstantTensor(fOutput, outputShape, outputData.data()); + if (model.Verbose()) { + std::cout << "output of Concat is a constant tensor " << ConvertShapeToString(outputShape) << " : " + << ConvertValuesToString(outputData) << " (constant)" << std::endl; + } + } else if (isOutputShape) { + auto outputShape = ConvertShapeToInt(fOutputShape); // conversion must be possible + if (outputShape.size() != 1) + throw std::runtime_error("SOFIE Concat Op - output shape for shape tensor must have rank 1"); + // output shape is a rank 1 tensor with size equal to the output rank + std::vector outputData(outputShape[0]); + size_t offset = 0; + for (auto &input : fInputs) { + std::vector inputData; + auto inputShape = model.GetTensorShape(input); // shape is not dynamic + size_t inputLength = ConvertShapeToLength(inputShape); // shape can be a scalar + if (model.IsShapeTensor(input)) { + inputData = model.GetShapeTensorValues(input); + } else if (model.IsInitializedTensor(input)) { + inputData.resize(inputLength); + auto intData = static_cast(model.GetInitializedTensorData(input).get()); + for (size_t i = 0; i < inputData.size(); i++) + inputData[i] = Dim{static_cast(intData[i])}; + } else { + // this should not happen + throw std::runtime_error("SOFIE Concat Operator- invalid tensor input " + input + + " for shape output type"); } - fIsOutputConstant = true; + std::copy(inputData.begin(), inputData.end(), outputData.begin() + offset); + offset += inputLength; + } + // add output tensor + model.AddShapeTensor(fOutput, outputData, false); // cannot be a scalar + fOutputShapeData = outputData; + if (model.Verbose()) { + std::cout << "output of Concat is a shape tensor " << ConvertShapeToString(outputShape) << " : " + << ConvertDimShapeToString(outputData) << " (shape)" << std::endl; } + fIsOutputParamShape = true; } - if (!fIsOutputConstant) { + if (!fIsOutputConstant && !fIsOutputParamShape) { model.AddIntermediateTensor(fOutput, model.GetTensorType(fInputs[0]), fOutputShape); if (model.Verbose()) { std::cout << "Concat ---> " << fOutput << " " << ConvertDimShapeToString(fOutputShape) << std::endl; @@ -260,8 +302,14 @@ if (fIsOutputConstant) return out.str(); - if(fOutputShape.empty()){ - throw std::runtime_error("TMVA SOFIE Concat called to Generate without being initialized first"); + if (fIsOutputParamShape) { + // output is a shape tensor defined by the concatenation of the input shapes + out << "// output is a shape tensor defined by the concatenation of the input shapes\n"; + for (int i = 0; i < static_cast(fOutputShape + [0].dim); i++) { + out << SP << "tensor_" << fOutput << "[" << i << "] = " << fOutputShapeData[i] << ";\n"; + } + return out.str(); } // special case when memory is contiguous bool hasShapeOnes = true; @@ -275,10 +323,11 @@ std::string offset; for(size_t i=0; i 0) out << offset; + out << SP << "TMVA::Experimental::SOFIE::Copy(tensor_" << fOutput; + if (i > 0) + out << offset; offset += " + " + length; - out << ");\n"; + out << ", " << "tensor_" << fInputs[i] << ", " + length << ");\n"; } } else { @@ -328,7 +377,7 @@ if (fIsOutputConstant) return ""; opName = "op_" + opName; if (fOutputShape.empty()) - throw std::runtime_error("TMVA SOFIE Operator Concat called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Operator Concat called to Generate without being initialized first"); const std::size_t D = fOutputShape.size(); const std::size_t Nin = fInputs.size(); @@ -417,7 +466,7 @@ std::string Generate_GPU_ALPAKA(std::string OpName) override { OpName = "op_" + OpName; if (fOutputShape.empty()) { - throw std::runtime_error("TMVA SOFIE Operator Concat called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Operator Concat called to Generate without being initialized first"); } std::stringstream out; auto length = ConvertDimShapeToLength(fOutputShape); diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Constant.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Constant.hxx index 6590909..8640e96 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Constant.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Constant.hxx @@ -52,7 +52,7 @@ public: // case of ConstantOfShape (since no inputs in case of Constant operator) fIsConstantOfShape = true; if (model.CheckIfTensorAlreadyExist(fNX) == false){ - throw std::runtime_error("TMVA SOFIE ConstantOfShape Op Input Tensor is not found in model"); + throw std::runtime_error("SOFIE ConstantOfShape Op Input Tensor is not found in model"); } // get output shape from input values: // can work only if input is a constant or initialized tensor (or dynamic one) @@ -60,7 +60,7 @@ public: auto input_tensor = static_cast(dptr.get()); auto input_shape = model.GetTensorShape(fNX); if (input_shape.size() > 1 ) - throw std::runtime_error("TMVA SOFIE ConstantOfShape Op Input Tensor has invalid shape"); + throw std::runtime_error("SOFIE ConstantOfShape Op Input Tensor has invalid shape"); if (input_tensor != nullptr && !input_shape.empty()) { fShape = std::vector (input_shape[0]); for (size_t i = 0; i < fShape.size(); i++) @@ -70,7 +70,7 @@ public: length = ConvertShapeToLength(fShape); if (fValues.size() != 1) - throw std::runtime_error("TMVA SOFIE ConstantOfShape Op value Tensor has invalid size " + std::to_string(fValues.size())); + throw std::runtime_error("SOFIE ConstantOfShape Op value Tensor has invalid size " + std::to_string(fValues.size())); T value = fValues[0]; fValues = std::vector(length, value); @@ -80,7 +80,7 @@ public: // in case of standard constant the shape is provided as input length = ConvertShapeToLength(fShape); if (length != fValues.size()) - throw std::runtime_error("TMVA SOFIE Constant Op has invalid shape : " + ConvertShapeToString(fShape) + + throw std::runtime_error("SOFIE Constant Op has invalid shape : " + ConvertShapeToString(fShape) + " with " + std::to_string(fValues.size()) + " values"); } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Conv.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Conv.hxx index 7ef7a6d..c87ecd9 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Conv.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Conv.hxx @@ -19,6 +19,8 @@ template class ROperator_Conv final : public ROperator { private: + bool fBroadcastBias = false; + std::string fAttrAutopad; std::vector fAttrDilations; size_t fAttrGroup; @@ -29,21 +31,19 @@ private: std::string fNX; std::string fNW; std::string fNB; - std::string fNB2; // bias tensor name after broadcasting std::string fNY; std::string convK; std::string imcol; - std::vector fShapeX; + std::vector fShapeX; std::vector fShapeW; std::vector fShapeB; - std::vector fShapeY; + std::vector fShapeY; std::string fType; size_t fDim; // dimension of the convolution - size_t gemm_n, gemm_m, gemm_k; // dimensions of the equivalent gemm operation after im2col transformation public: @@ -67,7 +67,6 @@ public: } fInputTensorNames = { fNX, fNB }; fOutputTensorNames = { fNY }; - fKind = OperatorKind::CONV; } ROperator_Conv(std::string autopad, std::vector dilations, @@ -86,7 +85,6 @@ public: } fInputTensorNames = { fNX }; fOutputTensorNames = { fNY }; - fKind = OperatorKind::CONV; } std::vector TypeInference(std::vector input) override { @@ -95,29 +93,31 @@ public: } // function returning output shape given input - std::vector> ShapeInference(std::vector> input) override { + std::vector DoShapeInference(const std::vector & input, const std::vector & weight) { // shape of convolution input has to be (according to ONNX): N x C x H x W // Where N : batch size, C : input channels, H : input height, W : input width - if (input.size() > 3 ) { - throw - std::runtime_error("TMVA SOFIE Conv Op Shape inference need 2 or 3 input tensors"); + if (input.size() -2 != fDim) { + throw std::runtime_error("TMVA SOFIE Conv Op Shape inference - invalid input "); } - for(size_t i = 0; i < input.size(); i++) { - if (input[i].size() -2 != fDim) { - throw - std::runtime_error("TMVA SOFIE Conv Op Shape inference - invalid inputs "); - } + if (weight.size() -2 != fDim) { + throw std::runtime_error("TMVA SOFIE Conv Op Shape inference - invalid weights "); + } + if (fAttrGroup == 0 && input[1].isParam) + throw std::runtime_error("TMVA SOFIE Conv - param shapes not supported without group attr"); + if (fAttrKernelShape.empty()) { + if (input[2].isParam || (fDim > 1 && input[3].isParam) || (fDim > 2 && input[4].isParam)) + throw std::runtime_error("TMVA SOFIE Conv - param shapes not supported without kernel attr"); } if (fAttrGroup == 0) { - fAttrGroup = input[0][1] / input[1][1]; + fAttrGroup = input[1].dim / weight[1]; } // kernel shape - size_t k1 = ((fAttrKernelShape.empty())? input[1][2] : fAttrKernelShape[0]); - size_t k2 = (fDim > 1) ? ((fAttrKernelShape.empty()) ? input[1][3] : fAttrKernelShape[1]) : 1; - size_t k3 = (fDim > 2) ? ((fAttrKernelShape.empty()) ? input[1][4] : fAttrKernelShape[2]) : 1; + size_t k1 = ((fAttrKernelShape.empty())? weight[2] : fAttrKernelShape[0]); + size_t k2 = (fDim > 1) ? ((fAttrKernelShape.empty()) ? weight[3] : fAttrKernelShape[1]) : 1; + size_t k3 = (fDim > 2) ? ((fAttrKernelShape.empty()) ? weight[4] : fAttrKernelShape[2]) : 1; size_t i1 = (fDim > 1) ? ((fDim > 2) ? 3 : 2) : 1; @@ -136,28 +136,37 @@ public: k2 + (fAttrDilations[1] - 1) * (k2 - 1), k3 + (fAttrDilations[2] - 1) * (k3 - 1)}; + if (fAttrStrides.empty()) { + fAttrStrides = {1, 1, 1}; + } + if (fDim < 3) + fAttrStrides.resize(3, 1); + if (fAttrAutopad == "NOTSET") { if (fAttrPads.empty()) { fAttrPads = {1, 1, 1, 1, 1, 1}; } } else if (fAttrAutopad == "SAME_UPPER" || fAttrAutopad == "SAME_LOWER") { - if (fDim == 1) - fAttrPads = {fAttrKernelShape[0] / 2, fAttrKernelShape[0] / 2}; - else if (fDim == 2) - fAttrPads = {fAttrKernelShape[0] / 2, fAttrKernelShape[1] / 2, fAttrKernelShape[0] / 2, fAttrKernelShape[1] / 2}; - else if (fDim == 3) - fAttrPads = {fAttrKernelShape[0] / 2, fAttrKernelShape[1] / 2, fAttrKernelShape[2] / 2, - fAttrKernelShape[0] / 2, fAttrKernelShape[1] / 2, fAttrKernelShape[2] / 2}; - // add extra padding at beginning or end (depending if SAME_UPPER or SAME_LOWER) - // need to check this! - if (fAttrKernelShape[0] % 2 == 1) { - (fAttrAutopad == "SAME_UPPER") ? fAttrPads[0]++ : fAttrPads[i1]++; + for (size_t d = 0; d < fDim; ++d) { + if (input[d + 2].isParam) + throw std::runtime_error( + "TMVA SOFIE Conv Op: SAME padding with parametric input shape is not supported"); } - if (fDim > 1 && fAttrKernelShape[1] % 2 == 1) { - (fAttrAutopad == "SAME_UPPER") ? fAttrPads[1]++ : fAttrPads[i2]++; - } - if (fDim > 2 && fAttrKernelShape[2] % 2 == 1) { - (fAttrAutopad == "SAME_UPPER") ? fAttrPads[2]++ : fAttrPads[i3]++; + // ONNX SAME padding: total_pad = max(0, (ceil(in/stride)-1)*stride + kernel - in) + // SAME_UPPER places extra padding at end, SAME_LOWER at beginning + fAttrPads.assign(6, 0); + for (size_t d = 0; d < fDim; ++d) { + size_t inSize = input[d + 2].dim; + size_t stride_d = fAttrStrides[d]; + size_t outSize = (inSize + stride_d - 1) / stride_d; + int totalPad = std::max(0, (int)((outSize - 1) * stride_d + fAttrKernelShape[d]) - (int)inSize); + if (fAttrAutopad == "SAME_UPPER") { + fAttrPads[d] = (size_t)(totalPad / 2); + fAttrPads[d + fDim] = (size_t)(totalPad - totalPad / 2); + } else { + fAttrPads[d] = (size_t)(totalPad - totalPad / 2); + fAttrPads[d + fDim] = (size_t)(totalPad / 2); + } } } else if (fAttrAutopad != "VALID") { throw @@ -166,139 +175,184 @@ public: // to be sure pad is vector of size 6 if (fDim < 3) fAttrPads.resize(6, 0); - if (fAttrStrides.empty()) { - fAttrStrides = {1, 1, 1}; - } - if (fDim < 3) - fAttrStrides.resize(3, 1); + Dim input1 = input[2]; + Dim input2 = (fDim > 1) ? input[3] : Dim{1}; + Dim input3 = (fDim > 2) ? input[4] : Dim{1}; + size_t pad1 = fAttrPads[0] + fAttrPads[i1]; - size_t input1 = input[0][2]; - size_t input2 = (fDim > 1) ? input[0][3] : 1; - size_t input3 = (fDim > 2) ? input[0][4] : 1; + // function to get output dimension of convolution given input - size_t pad1 = fAttrPads[0] + fAttrPads[i1]; - size_t output1 = (input1 + pad1 - fAttrKernelShape[0]) / fAttrStrides[0] + 1; + auto computeOutput = [&](Dim inputDim, size_t kernel, size_t pad, size_t stride) { + if (!inputDim.isParam) { + size_t outSize = (inputDim.dim + pad - kernel) / stride + 1; + return Dim{outSize}; + } else { + if (stride == 1){ + if ((pad - kernel + 1) == 0 ) + // output is same as input + return inputDim; + else { + int64_t v = pad - kernel + 1; + std::string outStr = "(" + inputDim.param + "+" + std::to_string(v) + ")"; + return Dim{ outStr, static_cast(-1)}; + } + } else { // general case (stride not 1) + int64_t v = pad - kernel; + std::string outStr = "((" + inputDim.param + "+" + std::to_string(v) + ")/" + + std::to_string(stride) + "1)"; + return Dim{ outStr, static_cast(-1)}; + } + } + throw std::runtime_error("TMVA SOFIE Conv Op - invalid values"); + return Dim{}; + }; + + Dim output1 = computeOutput(input1, fAttrKernelShape[0], pad1, fAttrStrides[0]); - size_t batch_size = input[0][0]; // first element in input tensor - size_t output_channels = input[1][0]; // first element in weight tensor + Dim batch_size = input[0]; // first element in input tensor + Dim output_channels = Dim{weight[0]}; // first element in weight tensor - std::vector> ret({{ batch_size, output_channels, output1 }}); + std::vector ret({ batch_size, output_channels, output1 }); if (fDim == 1) return ret; size_t pad2 = fAttrPads[1] + fAttrPads[i2]; - size_t output2 = (input2 + pad2 - fAttrKernelShape[1]) / fAttrStrides[1] + 1; + Dim output2 = computeOutput(input2, fAttrKernelShape[1], pad2, fAttrStrides[1]); + // output is N x M x OH x OW - ret[0].push_back(output2); + ret.push_back(output2); if (fDim == 2) return ret; size_t pad3 = fAttrPads[2] + fAttrPads[i3]; - size_t output3 = (input3 + pad3 - fAttrKernelShape[2] ) / fAttrStrides[2] + 1; + Dim output3 = computeOutput(input3, fAttrKernelShape[2], pad3, fAttrStrides[2]); // output is N x M x OH x OW x OD - ret[0].push_back(output3); + ret.push_back(output3); return ret; } void Initialize(RModel& model) override { fUseSession = model.UseSession(); - if (!model.CheckIfTensorAlreadyExist(fNX)) - throw std::runtime_error("TMVA SOFIE Conv op Input Tensor " + fNX + " is not found in model"); - - fShapeX = model.GetTensorShape(fNX); - if (fShapeX.size() < 3 || fShapeX.size() > 5) - throw std::runtime_error("TMVA SOFIE Conv Op input data tensor" + fNX + " is not of 3,4 or 5 dimensions"); - + if (!model.CheckIfTensorAlreadyExist(fNX)) { + throw + std::runtime_error("TMVA SOFIE Conv op Input Tensor " + fNX + " is not found in model"); + } + fShapeX = model.GetDimTensorShape(fNX); + if (fShapeX.size() < 3 || fShapeX.size() > 5) { + std::cout << fNX << " : " << ConvertDimShapeToString(fShapeX) << std::endl; + throw + std::runtime_error("TMVA SOFIE Conv Op input data tensor" + fNX + " is not of 3,4 or 5 dimensions"); + } fDim = fShapeX.size() - 2; - - if (!model.CheckIfTensorAlreadyExist(fNW)) - throw std::runtime_error("TMVA SOFIE Conv op Input weight Tensor " + fNW + " is not found in model"); - + if (!model.CheckIfTensorAlreadyExist(fNW)) { + throw + std::runtime_error("TMVA SOFIE Conv op Input weight Tensor " + fNW + " is not found in model"); + } fShapeW = model.GetTensorShape(fNW); - if (fShapeW.size() < 3 || fShapeW.size() > 5) + if (fShapeW.size() < 3 || fShapeW.size() > 5) { + std::cout << fNW << " : " << ConvertShapeToString(fShapeW) << std::endl; throw std::runtime_error("TMVA SOFIE Conv Op input weight tensor" + fNW + " is not of 3,4 or 5 dimensions"); - - fShapeY = ShapeInference({fShapeX, fShapeW})[0]; + } + fShapeY = DoShapeInference(fShapeX, fShapeW); model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShapeY); - if (fNB != "") { - if (!model.CheckIfTensorAlreadyExist(fNB)) - throw std::runtime_error("TMVA SOFIE Conv op Input Tensor " + fNB + " is not found in model"); - + if (!model.CheckIfTensorAlreadyExist(fNB)) { + throw + std::runtime_error("TMVA SOFIE Conv op Input Tensor " + fNB + " is not found in model"); + } fShapeB = model.GetTensorShape(fNB); - std::vector targetShape(fShapeY.begin() + 1, fShapeY.end()); - bool broadcast_needed = !UTILITY::AreSameShape(fShapeB, targetShape); + if (fShapeB.size() != 1) + throw + std::runtime_error("TMVA SOFIE Conv op : invalid shape for Bias tensor (is not 1D)"); + std::vector targetShape(fShapeY.begin() + 1, fShapeY.end()); + auto shapeDimB = model.GetDimTensorShape(fNB); + bool broadcast_needed = !UTILITY::AreSameShape(shapeDimB, targetShape); if (broadcast_needed) { - auto original_data = model.GetInitializedTensorData(fNB); - if (fShapeB.size() < 1) - throw std::runtime_error("TMVA SOFIE Conv op: Bias Tensor has empty shape"); - if (fShapeB[0] != fShapeY[1]) - throw std::runtime_error("TMVA SOFIE Conv op: Bias Tensor has wrong shape: " + - ConvertShapeToString(fShapeB)); - if (fType != "float") - throw std::runtime_error("TMVA SOFIE Conv op: Broadcasting for non-float type tensors is not supported"); - if (!fUseSession) { - std::vector shape(fDim + 1, 1); - shape[0] = fShapeB[0]; - std::shared_ptr new_data_ptr( - UTILITY::UnidirectionalBroadcast(static_cast(original_data.get()), shape, targetShape), - std::default_delete()); - model.UpdateInitializedTensor(fNB, model.GetTensorType(fNB), targetShape, new_data_ptr); - fShapeB = model.GetTensorShape(fNB); - fNB2 = fNB; - } else { - fNB2 = fNB + "bcast"; - model.AddIntermediateTensor(fNB2, model.GetTensorType(fNB), targetShape); - } + auto original_data = model.GetInitializedTensorData(fNB); + // make bias shape equal to Y shape by adding 1 + if (fShapeB.size() < 1) + throw std::runtime_error("TMVA SOFIE Conv op: Bias Tensor has empty shape"); + // we assume bias tensor dimension is equal to number of filters that is the second dimension in + // the output tensor + if (!(shapeDimB[0] == fShapeY[1])) + throw std::runtime_error("TMVA SOFIE Conv op: Bias Tensor has wrong shape: " + + ConvertShapeToString(fShapeB)); + if (fType != "float") + throw std::runtime_error("TMVA SOFIE Conv op: Broadcasting for non-float type tensors is not supported"); + // here is the actual broadcasting + fBroadcastBias = true; + if (!fUseSession) { + // do here broadcasting + std::vector shape(fDim + 1, 1); + shape[0] = fShapeB[0]; + auto intTargetShape = ConvertShapeToInt(targetShape); + std::shared_ptr new_data_ptr( + UTILITY::UnidirectionalBroadcast(static_cast(original_data.get()), shape, intTargetShape), + std::default_delete()); + model.UpdateInitializedTensor(fNB, model.GetTensorType(fNB), intTargetShape, new_data_ptr); + fShapeB = model.GetTensorShape(fNB); + } } } - - // Compute kernel size once — product of all spatial kernel dimensions + // output channel size can be parametric and is an expression + std::vector outputDims = std::vector(fShapeY.begin()+2, fShapeY.end()); + //check if shape is not parametric + std::vector outputInts = ConvertShapeToInt(outputDims); + Dim channelDim; + if (outputInts.empty()) { + auto outputChannelSize = ConvertDimShapeToLength(outputDims); // size/channel = D * H * W + channelDim = Dim{ outputChannelSize, static_cast(-1)}; + } else { + size_t outputChannelSize = ConvertShapeToLength(outputInts); + channelDim = Dim{ outputChannelSize }; + } size_t kernelSize = fAttrKernelShape[0]; - for (size_t i = 1; i < fDim; i++) + for (size_t i = 1; i < fDim; i++) { kernelSize *= fAttrKernelShape[i]; - - // Spatial output size: OH * OW (* OD for 3D) - size_t spatialSize = fShapeY[2]; - for (size_t i = 1; i < fDim; i++) - spatialSize *= fShapeY[2 + i]; + } std::vector shape1 = {fShapeW[0], fShapeW[1], kernelSize}; - std::vector shape2 = {fShapeW[1], kernelSize, spatialSize}; - model.AddIntermediateTensor(fNX + "_f", ConvertStringToType(fType), shape1); - model.AddIntermediateTensor(fNX + "_xcol", ConvertStringToType(fType), shape2); - convK = fNX + "_f"; - imcol = fNX + "_xcol"; + std::vector shape2 = {Dim{fShapeW[1]}, Dim{kernelSize}, channelDim }; + model.AddIntermediateTensor(fNX +"_f", ConvertStringToType(fType), shape1 ); + model.AddIntermediateTensor(fNX +"_xcol", ConvertStringToType(fType), shape2 ); + convK = fNX +"_f"; + imcol = fNX +"_xcol"; fOutputTensorNames.emplace_back(convK); fOutputTensorNames.emplace_back(imcol); + fInputTensorNames.emplace_back(convK); + fInputTensorNames.emplace_back(imcol); - // GEMM dimensions — set once here, reused in Generate() and Generate_GPU_ALPAKA() - // gemm_n = output channels (total, not per group — group case divides at launch time) - // gemm_m = spatial output size (OH * OW * OD) - // gemm_k = input channels per group * kernel spatial size - gemm_n = fShapeW[0]; // total output channels - gemm_m = spatialSize; // OH * OW (* OD) - gemm_k = fShapeW[1] * kernelSize; // IC_per_group * KH * KW (* KD) - if(fAttrGroup > 1) { - gemm_n /= fAttrGroup; + if (model.Verbose()) { + std::cout << "Conv - " << fDim << " " << fNX << " : " << ConvertDimShapeToString(fShapeX) + << " --> " << fNY << " : " << ConvertDimShapeToString(fShapeY) << std::endl; } } std::string GenerateInitCode() override { std::stringstream out; // Generate initialization code for broadcasting of bias tensor - if (!fNB2.empty()) { + if (fBroadcastBias) { // include a separate scope to avoid defining unique operator temp variables std::vector shape(fDim + 1, 1); + // bias (is a 1D tensor) shape[0] = fShapeB[0]; - std::vector targetShape(fShapeY.begin() + 1, fShapeY.end()); - out << SP << "{\n"; - out << SP << SP << "float * data = SOFIE::UTILITY::UnidirectionalBroadcast(tensor_" - << fNB << ", " << ConvertShapeToString(shape) << ", " << ConvertShapeToString(fShapeY) << ");\n"; - out << SP << SP << "std::copy(data, data + " << ConvertShapeToLength(targetShape) << ", tensor_" << fNB2 << ");\n"; + std::vector targetShape(fShapeY.begin() + 1, fShapeY.end()); + out << "//--- broadcast bias tensor " << fNB << "for Conv op if needed \n"; + // in case of dynamic tensors check needs to be done at run time + bool isOutDynamic = ConvertShapeToInt(targetShape).empty(); + auto length = ConvertDimShapeToLength(targetShape); + if (isOutDynamic) + out << SP << "if (" << length << " > " << ConvertShapeToLength(shape) << ") {\n"; + else + out << SP << "{\n"; + out << SP << SP << "float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast(tensor_" + << fNB << ", " << ConvertShapeToString(shape) << ", " << ConvertDimShapeToString(fShapeY) << ");\n"; + out << SP << SP << "fTensor_" << fNB << ".resize(" << length << ");\n"; + out << SP << SP << "std::copy(data, data + " << length << ", fTensor_" << fNB << ".begin());\n"; + out << SP << SP << "tensor_" << fNB << " = fTensor_" << fNB << ".data();\n"; out << SP << SP << "delete[] data;\n"; out << SP << "}\n"; } @@ -314,16 +368,22 @@ public: } std::stringstream out; - size_t bsize = fShapeX[0]; + auto bsize = fShapeX[0]; size_t kDepth = (fDim > 2) ? fShapeW[2] : 1; // kernel depth size_t kHeight = (fDim > 1) ? fShapeW[fDim] : 1; // kernel height size_t kWidth = fShapeW[fDim+1]; // kernel width - size_t iDepth = (fDim > 2) ? fShapeX[2] : 1; // input depth - size_t iHeight = (fDim > 1) ? fShapeX[fDim] : 1; // input height - size_t iWidth = fShapeX[fDim+1]; // input width - size_t oDepth = (fDim > 2) ? fShapeY[2] : 1; // output depth - size_t oHeight = (fDim > 1) ? fShapeY[fDim] : 1; // ouput height - size_t oWidth = fShapeY[fDim+1]; // output width + auto iDepth = (fDim > 2) ? fShapeX[2] : Dim{1}; // input depth + auto iHeight = (fDim > 1) ? fShapeX[fDim] : Dim{1}; // input height + auto iWidth = fShapeX[fDim+1]; // input width + auto oDepth = (fDim > 2) ? fShapeY[2] : Dim{1}; // output depth + auto oHeight = (fDim > 1) ? fShapeY[fDim] : Dim{1}; // ouput height + auto oWidth = fShapeY[fDim+1]; // output width + // total output size for a channel + auto outputChannelStride = ConvertDimShapeToLength(std::vector{oDepth, oHeight, oWidth}); // size of channel = D * H * W + auto outputBatchStride = ConvertDimShapeToLength(std::vector{fShapeY[1] , oDepth, oHeight, oWidth}); // size of C * D * H * W + // input size + auto inputChannelStride = ConvertDimShapeToLength(std::vector{iDepth, iHeight, iWidth}); + auto inputBatchStride = ConvertDimShapeToLength(std::vector{fShapeX[1] , iDepth, iHeight, iWidth}); // size of C * D * H * W out << "\n//---- operator Conv " << OpName << "\n"; @@ -371,13 +431,16 @@ public: //out << SP << "char " << OpName << "_transA = 'T';\n"; out << SP << "char " << OpName << "_transA = 'N';\n"; out << SP << "char " << OpName << "_transB = 'N';\n"; - out << SP << "int " << OpName << "_m = " << oHeight * oWidth * oDepth << ";\n"; // output h*w + out << SP << "int " << OpName << "_m = " << outputChannelStride << ";\n"; // output h*w assert(fShapeY[1] == fShapeW[0]); - assert(fShapeW[1] == fShapeX[1] / fAttrGroup); + //assert(fShapeW[1] == fShapeX[1] / fAttrGroup); out << SP << "int " << OpName << "_n = " << fShapeW[0] << ";\n"; // output channels out << SP << "int " << OpName << "_k = " << fShapeW[1] * fAttrKernelShape[0] * fAttrKernelShape[1] * fAttrKernelShape[2] << ";\n"; out << SP << "float " << OpName << "_alpha = 1.0;\n"; - out << SP << "float " << OpName << "_beta = 0.0;\n"; + if (fNB != "") + out << SP << "float " << OpName << "_beta = 1.0;\n"; + else // when bias is not present beta needs to be equal to zero to avoid re-using previous results in output tensor + out << SP << "float " << OpName << "_beta = 0.0;\n"; // Loop on batch size @@ -414,14 +477,14 @@ public: fAttrPads[2] = (fAttrPads[2] + fAttrPads[5]) / 2; } } - out << SP << SP << "size_t out_offset = n * " << fShapeY[1] * oDepth * oHeight * oWidth << ";\n"; + out << SP << SP << "size_t out_offset = n * " << outputBatchStride << ";\n"; if (fAttrGroup == 1) { - out << SP << SP << "size_t x_offset = n * " << fShapeX[1] * iHeight * iWidth << ";\n"; + out << SP << SP << "size_t x_offset = n * " << inputBatchStride << ";\n"; // when using im2col - resulting matrix is transposed, the dimension is (input_c * filter_h * filter_y, output_h * // output_w) if (fDim < 3) { - out << SP << SP << "SOFIE::UTILITY::Im2col(tensor_" << fNX + out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col(tensor_" << fNX << " + x_offset," // channels, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, // dilation_w, @@ -437,7 +500,7 @@ public: out << "," << "tensor_" <(tensor_" << fNX + out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col_3d(tensor_" << fNX << " + x_offset," // channels, d, h, w, k_d, k_h, k_w, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w, // dilation_d, dilation_h, dilation_w, @@ -450,24 +513,36 @@ public: << "tensor_" << fNX << "_xcol);\n\n "; } // BLAS - out << SP << SP << "BLAS::sgemm_(&" << OpName << "_transA, &" << OpName << "_transB, &" << OpName << "_m, &" - << OpName << "_n, &" << OpName << "_k, &" << OpName << "_alpha, " << "tensor_" << fNX << "_xcol, &" << OpName - << "_m,\n"; // use m if op_xcol is not transpose , otherwise k - out << SP << SP << SP << "tensor_" << fNX << "_f, &" << OpName << "_k, &" << OpName << "_beta, tensor_" << fNY - << " + out_offset, &" << OpName << "_m);\n"; + out << SP << "TMVA::Experimental::SOFIE::Gemm_Call(" + << "tensor_" << fNY << " + out_offset, false, false, " + << OpName << "_m, " << OpName << "_n, " << OpName << "_k, " + << OpName << "_alpha, " << "tensor_" << fNX << "_xcol, tensor_" << fNX << "_f, " + << OpName << "_beta, "; + if (fNB != "") + out << "tensor_" << fNB; + else + out << "nullptr"; + out << ");\n"; + + + // out << SP << SP << "BLAS::sgemm_(&" << OpName << "_transA, &" << OpName << "_transB, &" << OpName << "_m, &" + // << OpName << "_n, &" << OpName << "_k, &" << OpName << "_alpha, " << "tensor_" << fNX << "_xcol, &" << OpName + // << "_m,\n"; // use m if op_xcol is not transpose , otherwise k + // out << SP << SP << SP << "tensor_" << fNX << "_f, &" << OpName << "_k, &" << OpName << "_beta, tensor_" << fNY + // << " + out_offset, &" << OpName << "_m);\n"; } else { // case of group convolution // Unroll (IM2COL) the input tensor- make loop on groups and repeat operations (IM2COL + GEMM for each // group) // out << SP << SP << "size_t out_offset = n * " << fShapeY[1] * oDepth * oHeight * oWidth << ";\n"; out << SP << SP << "for (size_t g = 0; g < " << fAttrGroup << "; g++) {\n"; - out << SP << SP << "size_t x_offset = n * " << fShapeX[1] * iDepth * iHeight * iWidth << " + g * " - << fShapeW[1] * iDepth * iHeight * iWidth << ";\n "; - out << SP << SP << "size_t out_offset = n * " << fShapeY[1] * oDepth * oHeight * oWidth << " + g * " - << fShapeW[0] * oDepth * oHeight * oWidth / fAttrGroup << ";\n "; + out << SP << SP << "size_t x_offset = n * " << inputBatchStride << " + g * " + << fShapeW[1] << " * " << inputChannelStride << ";\n "; + out << SP << SP << "size_t g_offset = g * " << fShapeW[0] << " * (" << outputChannelStride << ") / " << fAttrGroup << ";\n "; + out << SP << SP << "size_t out_offset = n * " << outputBatchStride << " + g_offset;\n"; if (fDim < 3) { - out << SP << SP << "SOFIE::UTILITY::Im2col(tensor_" << fNX + out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col(tensor_" << fNX << " + x_offset," // channels, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, // dilation_w, @@ -483,7 +558,7 @@ public: out << ", tensor_" << fNX << "_xcol);\n\n "; } else { // 3d im2col - out << SP << SP << "SOFIE::UTILITY::Im2col_3d(tensor_" << fNX + out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col_3d(tensor_" << fNX << " + x_offset," // channels, d, h, w, k_d, k_h, k_w, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w, // dilation_d, dilation_h, dilation_w, @@ -502,242 +577,24 @@ public: out << SP << SP << SP << "size_t offset_f = g * " << fShapeW[0] * fShapeW[1] * fAttrKernelShape[0] * fAttrKernelShape[1] * fAttrKernelShape[2] / fAttrGroup << ";\n"; - out << SP << SP << "BLAS::sgemm_(&" << OpName << "_transA, &" << OpName << "_transB, &" << OpName << "_m, &" - << OpName << "_n, &" << OpName << "_k, &" << OpName << "_alpha, tensor_" << fNX << "_xcol, &" << OpName - << "_m,\n"; // use m if op_xcol is not transpose , otherwise k - out << SP << SP << SP << "tensor_" << fNX << "_f + offset_f, &" << OpName << "_k, &" << OpName << "_beta, tensor_" << fNY - << " + out_offset" - << ", &" << OpName << "_m);\n"; + out << SP << "TMVA::Experimental::SOFIE::Gemm_Call(" + << "tensor_" << fNY << " + out_offset, false, false, " + << OpName << "_m, " << OpName << "_n, " << OpName << "_k, " + << OpName << "_alpha, " << "tensor_" << fNX << "_xcol, tensor_" << fNX << "_f + offset_f, " + << OpName << "_beta, "; + if (fNB != "") + out << "tensor_" << fNB << " + g_offset"; + else + out << "nullptr"; + out << ");\n"; out << SP << SP << "}\n"; // end of group loop } - - if (fNB2 != "") { - out << SP << "int " << OpName << "_size = " << fShapeY[1] * oDepth * oHeight * oWidth << ";\n"; - out << SP << "float " << OpName << "_gamma = 1.0;\n"; - out << SP << "int " << OpName << "_incx = 1;\n"; - out << SP << "int " << OpName << "_incy = 1;\n"; - - out << SP << "BLAS::saxpy_(&" << OpName << "_size, &" << OpName << "_gamma, tensor_" << fNB2 << ", &" - << OpName << "_incx, tensor_" << fNY << " + out_offset, &" << OpName << "_incy);\n"; - - } out << SP << "}\n"; // end of batch size loop return out.str(); - } - - std::string Generate_GPU_Kernel_ALPAKA(std::string opName) override { - opName = "op_" + opName; - if (fShapeX.empty() || fShapeW.empty() || fShapeY.empty()) - throw std::runtime_error("TMVA SOFIE Conv Op called to Generate without being initialized first"); - - size_t oDepth = (fDim > 2) ? fShapeY[2] : 1; - size_t oHeight = (fDim > 1) ? fShapeY[fDim] : 1; - size_t oWidth = fShapeY[fDim + 1]; - size_t iDepth = (fDim > 2) ? fShapeX[2] : 1; - size_t iHeight = (fDim > 1) ? fShapeX[fDim] : 1; - size_t iWidth = fShapeX[fDim + 1]; - size_t kHeight = (fDim > 1) ? fShapeW[fDim] : 1; - size_t kWidth = fShapeW[fDim + 1]; - size_t kDepth = (fDim > 2) ? fShapeW[2] : 1; - - size_t kernelSize = fAttrKernelShape[0] * fAttrKernelShape[1] * fAttrKernelShape[2]; - size_t colRows = fShapeW[1] * kernelSize; - size_t colCols = oDepth * oHeight * oWidth; - size_t colElements = colRows * colCols; - size_t outChannels = fShapeW[0]; - size_t spatialSize = oDepth * oHeight * oWidth; - - // Strides for weight vectorisation - size_t id = (fDim > 2) ? fDim - 3 : 2; - size_t ih = (fDim > 1) ? fDim - 2 : 1; - size_t iw = fDim - 1; - size_t wstrideDil = fAttrDilations[iw]; - size_t hstrideDil = fAttrDilations[ih] * fAttrKernelShape[iw]; - size_t dstrideDil = fAttrDilations[id] * fAttrKernelShape[ih] * fAttrKernelShape[iw]; - size_t icstrideDil = fAttrKernelShape[id] * fAttrKernelShape[ih] * fAttrKernelShape[iw]; - size_t ocstrideDil = fShapeW[1] * icstrideDil; - size_t hstride = kWidth; - size_t dstride = kHeight * kWidth; - size_t icstride = kHeight * kWidth * kDepth; - size_t ocstride = fShapeW[1] * icstride; - size_t wTotalElements = ConvertShapeToLength(fShapeW); - - std::string op; - - // Kernel 1: Weight vectorisation — reorder W into _f with dilation layout - // Each thread handles one output element of _f - std::string wKname = "WeightVecKernel_" + opName; - op = "\n//------ WEIGHT_VEC_KERNEL_ALPAKA (Conv " + opName + ")\n"; - op += SP + "struct " + wKname + " {\n"; - op += SP + SP + "template\n"; - op += SP + SP + "ALPAKA_FN_ACC void operator()(\n"; - op += SP + SP + SP + "TAcc const& acc,\n"; - op += SP + SP + SP + "T const* __restrict__ W,\n"; - op += SP + SP + SP + "T* __restrict__ f,\n"; - op += SP + SP + SP + "std::size_t const totalElements) const {\n\n"; - - op += SP + SP + SP + "auto const global_thread_idx = alpaka::getIdx(acc)[0];\n"; - op += SP + SP + SP + "if (global_thread_idx >= totalElements) return;\n"; - op += SP + SP + SP + "auto const grid_thread_extent = alpaka::getWorkDiv(acc)[0];\n\n"; - - op += SP + SP + SP + "for (std::size_t elem_idx = global_thread_idx; elem_idx < totalElements; elem_idx += grid_thread_extent) {\n\n"; - - // Decompose elem_idx into (oc, ic, kd, kh, kw) using compile-time strides - op += SP + SP + SP + SP + "std::size_t const oc = elem_idx / " + std::to_string(ocstride) + "u;\n"; - op += SP + SP + SP + SP + "std::size_t const oc_rem = elem_idx % " + std::to_string(ocstride) + "u;\n"; - op += SP + SP + SP + SP + "std::size_t const ic = oc_rem / " + std::to_string(icstride) + "u;\n"; - op += SP + SP + SP + SP + "std::size_t const ic_rem = oc_rem % " + std::to_string(icstride) + "u;\n"; - if (fDim > 2) { - op += SP + SP + SP + SP + "std::size_t const kd = ic_rem / " + std::to_string(kHeight * kWidth) + "u;\n"; - op += SP + SP + SP + SP + "std::size_t const kh = (ic_rem / " + std::to_string(kWidth) + "u) % " + std::to_string(kHeight) + "u;\n"; - op += SP + SP + SP + SP + "std::size_t const kw = ic_rem % " + std::to_string(kWidth) + "u;\n\n"; - } else if (fDim > 1) { - op += SP + SP + SP + SP + "std::size_t const kd = 0u;\n"; - op += SP + SP + SP + SP + "std::size_t const kh = ic_rem / " + std::to_string(kWidth) + "u;\n"; - op += SP + SP + SP + SP + "std::size_t const kw = ic_rem % " + std::to_string(kWidth) + "u;\n\n"; - } else { - op += SP + SP + SP + SP + "std::size_t const kd = 0u;\n"; - op += SP + SP + SP + SP + "std::size_t const kh = 0u;\n"; - op += SP + SP + SP + SP + "std::size_t const kw = ic_rem;\n\n"; - } - - // Compute destination index in _f (dilated layout) - op += SP + SP + SP + SP + "std::size_t const f_idx =\n"; - op += SP + SP + SP + SP + SP + "oc * " + std::to_string(ocstrideDil) + "u +\n"; - op += SP + SP + SP + SP + SP + "ic * " + std::to_string(icstrideDil) + "u"; - if (fDim > 2) op += " +\n" + SP + SP + SP + SP + SP + "kd * " + std::to_string(dstrideDil) + "u"; - if (fDim > 1) op += " +\n" + SP + SP + SP + SP + SP + "kh * " + std::to_string(hstrideDil) + "u"; - op += " +\n" + SP + SP + SP + SP + SP + "kw * " + std::to_string(wstrideDil) + "u;\n\n"; - - op += SP + SP + SP + SP + "f[f_idx] = W[elem_idx];\n"; - op += SP + SP + SP + "}\n"; - op += SP + SP + "}\n"; - op += SP + "};\n\n"; - - // Kernel 2: Im2Col - std::string im2colKname = "Im2ColKernel_" + opName; - op += SP + "//------ IM2COL_KERNEL_ALPAKA (Conv " + opName + ")\n"; - op += SP + "struct " + im2colKname + " {\n"; - op += SP + SP + "template\n"; - op += SP + SP + "ALPAKA_FN_ACC void operator()(\n"; - op += SP + SP + SP + "TAcc const& acc,\n"; - op += SP + SP + SP + "T const* __restrict__ input,\n"; - op += SP + SP + SP + "T* __restrict__ col,\n"; - op += SP + SP + SP + "std::size_t const totalElements) const {\n\n"; - - op += SP + SP + SP + "auto const global_thread_idx = alpaka::getIdx(acc)[0];\n"; - op += SP + SP + SP + "if (global_thread_idx >= totalElements) return;\n"; - op += SP + SP + SP + "auto const grid_thread_extent = alpaka::getWorkDiv(acc)[0];\n\n"; - - op += SP + SP + SP + "for (std::size_t elem_idx = global_thread_idx; elem_idx < totalElements; elem_idx += grid_thread_extent) {\n\n"; - - op += SP + SP + SP + SP + "std::size_t const col_row = elem_idx / " + std::to_string(colCols) + "u;\n"; - op += SP + SP + SP + SP + "std::size_t const col_col = elem_idx % " + std::to_string(colCols) + "u;\n\n"; - - op += SP + SP + SP + SP + "std::size_t const ic = col_row / " + std::to_string(kernelSize) + "u;\n"; - op += SP + SP + SP + SP + "std::size_t const k_rem = col_row % " + std::to_string(kernelSize) + "u;\n"; - if (fDim > 2) { - op += SP + SP + SP + SP + "std::size_t const kd = k_rem / " + std::to_string(kHeight * kWidth) + "u;\n"; - op += SP + SP + SP + SP + "std::size_t const kh = (k_rem / " + std::to_string(kWidth) + "u) % " + std::to_string(kHeight) + "u;\n"; - op += SP + SP + SP + SP + "std::size_t const kw = k_rem % " + std::to_string(kWidth) + "u;\n\n"; - } else if (fDim > 1) { - op += SP + SP + SP + SP + "std::size_t const kd = 0u;\n"; - op += SP + SP + SP + SP + "std::size_t const kh = k_rem / " + std::to_string(kWidth) + "u;\n"; - op += SP + SP + SP + SP + "std::size_t const kw = k_rem % " + std::to_string(kWidth) + "u;\n\n"; - } else { - op += SP + SP + SP + SP + "std::size_t const kd = 0u;\n"; - op += SP + SP + SP + SP + "std::size_t const kh = 0u;\n"; - op += SP + SP + SP + SP + "std::size_t const kw = k_rem;\n\n"; - } - - if (fDim > 2) { - op += SP + SP + SP + SP + "std::size_t const od = col_col / " + std::to_string(oHeight * oWidth) + "u;\n"; - op += SP + SP + SP + SP + "std::size_t const oh = (col_col / " + std::to_string(oWidth) + "u) % " + std::to_string(oHeight) + "u;\n"; - op += SP + SP + SP + SP + "std::size_t const ow = col_col % " + std::to_string(oWidth) + "u;\n\n"; - } else if (fDim > 1) { - op += SP + SP + SP + SP + "std::size_t const od = 0u;\n"; - op += SP + SP + SP + SP + "std::size_t const oh = col_col / " + std::to_string(oWidth) + "u;\n"; - op += SP + SP + SP + SP + "std::size_t const ow = col_col % " + std::to_string(oWidth) + "u;\n\n"; - } else { - op += SP + SP + SP + SP + "std::size_t const od = 0u;\n"; - op += SP + SP + SP + SP + "std::size_t const oh = 0u;\n"; - op += SP + SP + SP + SP + "std::size_t const ow = col_col;\n\n"; - } - - // Depth: trivially 0 for fDim < 3 (od=kd=0 always); pads[0] is height-begin for 2D, so - // applying it here would make id_in negative and zero the whole output. - if (fDim >= 3) { - op += SP + SP + SP + SP + "int64_t const id_in = static_cast(od * " + std::to_string(fAttrStrides[0]) - + "u + kd * " + std::to_string(fAttrDilations[0]) + "u) - " + std::to_string(fAttrPads[0]) + ";\n"; - } else { - op += SP + SP + SP + SP + "int64_t const id_in = 0;\n"; - } - // Height: for fDim==3 the height dim is at strides/pads index 1; for fDim==2 it is at index 0. - // For fDim==1 oh=kh=0 so ih_in=0. - { - size_t const hIdx = (fDim > 2) ? 1 : 0; - if (fDim >= 2) { - op += SP + SP + SP + SP + "int64_t const ih_in = static_cast(oh * " + std::to_string(fAttrStrides[hIdx]) - + "u + kh * " + std::to_string(fAttrDilations[hIdx]) + "u) - " + std::to_string(fAttrPads[hIdx]) + ";\n"; - } else { - op += SP + SP + SP + SP + "int64_t const ih_in = 0;\n"; - } - } - // Width: fAttrStrides/Dilations/Pads are ordered [d,h,w] so width is at index fDim-1. - { - size_t const wIdx = fDim - 1; - op += SP + SP + SP + SP + "int64_t const iw_in = static_cast(ow * " + std::to_string(fAttrStrides[wIdx]) - + "u + kw * " + std::to_string(fAttrDilations[wIdx]) + "u) - " + std::to_string(fAttrPads[wIdx]) + ";\n\n"; } - op += SP + SP + SP + SP + "bool const in_bounds =\n"; - op += SP + SP + SP + SP + SP + "id_in >= 0 && id_in < " + std::to_string(iDepth) + " &&\n"; - op += SP + SP + SP + SP + SP + "ih_in >= 0 && ih_in < " + std::to_string(iHeight) + " &&\n"; - op += SP + SP + SP + SP + SP + "iw_in >= 0 && iw_in < " + std::to_string(iWidth) + ";\n\n"; - - op += SP + SP + SP + SP + "if (in_bounds) {\n"; - op += SP + SP + SP + SP + SP + "std::size_t const in_idx =\n"; - op += SP + SP + SP + SP + SP + SP + "ic * " + std::to_string(iDepth * iHeight * iWidth) + "u +\n"; - op += SP + SP + SP + SP + SP + SP + "static_cast(id_in) * " + std::to_string(iHeight * iWidth) + "u +\n"; - op += SP + SP + SP + SP + SP + SP + "static_cast(ih_in) * " + std::to_string(iWidth) + "u +\n"; - op += SP + SP + SP + SP + SP + SP + "static_cast(iw_in);\n"; - op += SP + SP + SP + SP + SP + "col[elem_idx] = input[in_idx];\n"; - op += SP + SP + SP + SP + "} else {\n"; - op += SP + SP + SP + SP + SP + "col[elem_idx] = static_cast(0);\n"; - op += SP + SP + SP + SP + "}\n"; - op += SP + SP + SP + "}\n"; - op += SP + SP + "}\n"; - op += SP + "};\n\n"; - - // Kernel 3: Bias broadcast (only if bias present) - if (!fNB2.empty()) { - std::string biasKname = "BiasBroadcastKernel_" + opName; - op += SP + "//------ BIAS_BROADCAST_KERNEL_ALPAKA (Conv " + opName + ")\n"; - op += SP + "struct " + biasKname + " {\n"; - op += SP + SP + "template\n"; - op += SP + SP + "ALPAKA_FN_ACC void operator()(\n"; - op += SP + SP + SP + "TAcc const& acc,\n"; - op += SP + SP + SP + "T const* __restrict__ bias,\n"; - op += SP + SP + SP + "T* __restrict__ output,\n"; - op += SP + SP + SP + "std::size_t const totalElements) const {\n\n"; - - op += SP + SP + SP + "auto const global_thread_idx = alpaka::getIdx(acc)[0];\n"; - op += SP + SP + SP + "if (global_thread_idx >= totalElements) return;\n"; - op += SP + SP + SP + "auto const grid_thread_extent = alpaka::getWorkDiv(acc)[0];\n\n"; - - op += SP + SP + SP + "for (std::size_t elem_idx = global_thread_idx; elem_idx < totalElements; elem_idx += grid_thread_extent) {\n"; - op += SP + SP + SP + SP + "std::size_t const channel = elem_idx / " + std::to_string(spatialSize) + "u;\n"; - op += SP + SP + SP + SP + "output[elem_idx] = bias[channel];\n"; - op += SP + SP + SP + "}\n"; - op += SP + SP + "}\n"; - op += SP + "};\n\n"; - } - - return op; - } - std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string opName) override { opName = "op_" + opName; std::string op; @@ -751,7 +608,7 @@ public: std::string Generate_GPU_ALPAKA(std::string opName) override { opName = "op_" + opName; if (fShapeX.empty() || fShapeW.empty() || fShapeY.empty()) - throw std::runtime_error("TMVA SOFIE Conv Op called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Conv Op called to Generate without being initialized first"); size_t bsize = fShapeX[0]; size_t oDepth = (fDim > 2) ? fShapeY[2] : 1; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_ConvTranspose.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_ConvTranspose.hxx index b9d917b..5a4acf3 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_ConvTranspose.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_ConvTranspose.hxx @@ -88,7 +88,7 @@ public: if (std::is_same::value) { fType = "float"; } else { - throw std::runtime_error("TMVA SOFIE Encountered unsupported type parsing a Conv operator"); + throw std::runtime_error("SOFIE Encountered unsupported type parsing a Conv operator"); } } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_ConvTranspose.icc b/src/SOFIE_core/inc/SOFIE/ROperator_ConvTranspose.icc index 3a52796..52b6b3e 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_ConvTranspose.icc +++ b/src/SOFIE_core/inc/SOFIE/ROperator_ConvTranspose.icc @@ -105,22 +105,22 @@ void ROperator_ConvTranspose::Initialize(RModel& model){ fUseSession = model.UseSession(); if (!model.CheckIfTensorAlreadyExist(fNX)) { - throw std::runtime_error("TMVA SOFIE Conv Transpose op Input Tensor " + fNX + " is not found in model"); + throw std::runtime_error("SOFIE Conv Transpose op Input Tensor " + fNX + " is not found in model"); } fShapeX = model.GetTensorShape(fNX); if (fShapeX.size() < 3 || fShapeX.size() > 5) { std::cout << fNX << " : " << ConvertShapeToString(fShapeX) << std::endl; - throw std::runtime_error("TMVA SOFIE Conv Transpose Op input data tensor" + fNX + + throw std::runtime_error("SOFIE Conv Transpose Op input data tensor" + fNX + " is not of 3,4 or 5 dimensions"); } fDim = fShapeX.size() - 2; if (!model.CheckIfTensorAlreadyExist(fNW)) { - throw std::runtime_error("TMVA SOFIE Conv op Input weight Tensor " + fNW + " is not found in model"); + throw std::runtime_error("SOFIE Conv op Input weight Tensor " + fNW + " is not found in model"); } fShapeW = model.GetTensorShape(fNW); if (fShapeW.size() < 3 || fShapeW.size() > 5) { std::cout << fNW << " : " << ConvertShapeToString(fShapeW) << std::endl; - throw std::runtime_error("TMVA SOFIE Conv Transpose Op input weight tensor" + fNW + + throw std::runtime_error("SOFIE Conv Transpose Op input weight tensor" + fNW + " is not of 3,4 or 5 dimensions"); } fShapeY = ShapeInference({fShapeX, fShapeW})[0]; @@ -128,11 +128,11 @@ void ROperator_ConvTranspose::Initialize(RModel& model){ model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShapeY); if (fNB != "") { if (!model.CheckIfTensorAlreadyExist(fNB)) { - throw std::runtime_error("TMVA SOFIE ConvTrans op Input Tensor " + fNB + " is not found in model"); + throw std::runtime_error("SOFIE ConvTrans op Input Tensor " + fNB + " is not found in model"); } fShapeB = model.GetTensorShape(fNB); if (fShapeB.size() < 1) - throw std::runtime_error("TMVA SOFIE ConvTrans op: Bias Tensor has empty shape"); + throw std::runtime_error("SOFIE ConvTrans op: Bias Tensor has empty shape"); size_t bsize = ConvertShapeToLength(fShapeB); size_t ysize = ConvertShapeToLength(fShapeY); @@ -143,13 +143,13 @@ void ROperator_ConvTranspose::Initialize(RModel& model){ // we assume bias tensor size is equal to number of filters that is the second dimension in // the output tensor if (bsize != fShapeY[1] ) - throw std::runtime_error("TMVA SOFIE ConvTrans op: Bias Tensor has wrong shape: " + + throw std::runtime_error("SOFIE ConvTrans op: Bias Tensor has wrong shape: " + ConvertShapeToString(fShapeB)); auto original_data = model.GetInitializedTensorData(fNB); if (fType != "float") - throw std::runtime_error("TMVA SOFIE ConvTrans op: Broadcasting for non-float type tensors is not supported"); + throw std::runtime_error("SOFIE ConvTrans op: Broadcasting for non-float type tensors is not supported"); // here the acual broadcasting if (!fUseSession) { // Broadcast B from M to N x M x Od x Oh x Ow @@ -170,7 +170,7 @@ void ROperator_ConvTranspose::Initialize(RModel& model){ else { // bias tensor is already correct shape, no need to broadcast if (fShapeY != fShapeB) - throw std::runtime_error("TMVA SOFIE ConvTrans op: Broadcasting is not needed but bias has wrong shape" + + throw std::runtime_error("SOFIE ConvTrans op: Broadcasting is not needed but bias has wrong shape" + ConvertShapeToString(fShapeB)); fNBroadcastedB = fNB; } @@ -218,7 +218,7 @@ std::string ROperator_ConvTranspose::Generate(std::string OpName) OpName = "op_" + OpName; if (fShapeX.empty() || fShapeW.empty() || (fNB != "" && fShapeB.empty()) || fShapeY.empty()) { - throw std::runtime_error("TMVA SOFIE Conv Op called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Conv Op called to Generate without being initialized first"); } std::stringstream out; @@ -331,7 +331,7 @@ std::string ROperator_ConvTranspose::Generate(std::string OpName) // Resulting matrix op_xcol is (output channels * filter_h * filter_w , output_h * output_w) if (fDim == 1) { if (fAttrPads[0] != fAttrPads[1]) { - std::cout << "TMVA SOFIE Operator Conv: asymmetric padding not supported. Assume an average padding " + std::cout << "SOFIE Operator Conv: asymmetric padding not supported. Assume an average padding " << std::endl; fAttrPads[0] = (fAttrPads[0] + fAttrPads[1]) / 2; } @@ -339,7 +339,7 @@ std::string ROperator_ConvTranspose::Generate(std::string OpName) } if (fDim == 2) { if (fAttrPads[0] != fAttrPads[2] || fAttrPads[1] != fAttrPads[3]) { - std::cout << "TMVA SOFIE Operator ConvTranspose: asymmetric padding not supported. Assume an average padding " + std::cout << "SOFIE Operator ConvTranspose: asymmetric padding not supported. Assume an average padding " << std::endl; fAttrPads[0] = (fAttrPads[0] + fAttrPads[2]) / 2; fAttrPads[1] = (fAttrPads[1] + fAttrPads[3]) / 2; @@ -347,7 +347,7 @@ std::string ROperator_ConvTranspose::Generate(std::string OpName) } if (fDim == 3) { if (fAttrPads[0] != fAttrPads[3] || fAttrPads[1] != fAttrPads[4] || fAttrPads[2] != fAttrPads[5]) { - std::cout << "TMVA SOFIE Operator ConvTranspose: asymmetric padding not supported. Assume an average padding " + std::cout << "SOFIE Operator ConvTranspose: asymmetric padding not supported. Assume an average padding " << std::endl; fAttrPads[0] = (fAttrPads[0] + fAttrPads[3]) / 2; fAttrPads[1] = (fAttrPads[1] + fAttrPads[4]) / 2; @@ -385,7 +385,7 @@ std::string ROperator_ConvTranspose::Generate(std::string OpName) out << ", tensor_" << fNY << " + out_offset);\n\n "; } else { // 3d : needs a col2im for 3d - throw std::runtime_error("TMVA SOFIE 3D Conv Transpose not yet supported"); + throw std::runtime_error("SOFIE 3D Conv Transpose not yet supported"); out << SP << SP << "SOFIE::UTILITY::Im2col_3d(tensor_" << fNX << " + x_offset," // channels, d, h, w, k_d, k_h, k_w, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w, @@ -436,7 +436,7 @@ std::string ROperator_ConvTranspose::Generate(std::string OpName) out << ", tensor_" << fNY << " + out_offset);\n\n "; } else { // 3d im2col - throw std::runtime_error("TMVA SOFIE 3D Conv Transpose not yet supported"); + throw std::runtime_error("SOFIE 3D Conv Transpose not yet supported"); out << SP << SP << "SOFIE::UTILITY::Im2col_3d(tensor_" << fNX << " + x_offset," diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Custom.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Custom.hxx index c24d329..fb618d4 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Custom.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Custom.hxx @@ -48,13 +48,13 @@ public: for(auto& it:fInputNames){ if (model.CheckIfTensorAlreadyExist(it) == false){ - throw std::runtime_error("TMVA SOFIE Custom " + fOpName + " Op Input Tensor " + it + " is not found in model"); + throw std::runtime_error("SOFIE Custom " + fOpName + " Op Input Tensor " + it + " is not found in model"); } fInputSizes.push_back(ConvertShapeToLength(model.GetTensorShape(it))); } if(fOutputNames.size() != fOutputShapes.size()){ - throw std::runtime_error("TMVA SOFIE Custom "+ fOpName + " Op was not intialized with the names/shapes of all the output tensors"); + throw std::runtime_error("SOFIE Custom "+ fOpName + " Op was not intialized with the names/shapes of all the output tensors"); } for(long unsigned int i=0; i labelsMap; for ( auto & name : fNInputs) { if (!model.CheckIfTensorAlreadyExist(name)) - throw std::runtime_error(std::string("TMVA SOFIE Einsum Op Input Tensor ") + name + "is not found in model"); + throw std::runtime_error(std::string("SOFIE Einsum Op Input Tensor ") + name + "is not found in model"); // if (model.IsDynamicTensor(name) || model.IsDimInputTensor(name) ) { // // not yet supported @@ -141,7 +141,7 @@ public: std::string labels = fInputLabels[i]; for (size_t j = 0; j < shape.size(); j++) { if (j >= labels.length()) { - throw std::runtime_error(std::string("TMVA SOFIE Einsum Op Input Tensor has invalid label or shape ") + labels + " " + ConvertShapeToString(shape)); + throw std::runtime_error(std::string("SOFIE Einsum Op Input Tensor has invalid label or shape ") + labels + " " + ConvertShapeToString(shape)); } labelsMap[labels[j]] = shape[j]; } @@ -150,7 +150,7 @@ public: // get output shape from label maps for (char l : fOutputLabels) { if (labelsMap.count(l) == 0) - throw std::runtime_error(std::string("TMVA SOFIE Einsum Op : output label ") + std::string(&l) + " is not present in inputs"); + throw std::runtime_error(std::string("SOFIE Einsum Op : output label ") + std::string(&l) + " is not present in inputs"); fShapeY.push_back(labelsMap[l]); } // we need to get the labels we are going to sum @@ -210,7 +210,7 @@ public: opName = "op_" + opName; if (fShapeY.size() != fOutputLabels.length()) { - throw std::runtime_error("TMVA SOFIE Einsum Op called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Einsum Op called to Generate without being initialized first"); } // function to write compute expression index from strides diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Elu.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Elu.hxx index dcbfd68..43b1886 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Elu.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Elu.hxx @@ -35,7 +35,7 @@ public: fType = "float"; } else{ - throw std::runtime_error("TMVA SOFIE Encountered unsupported type parsing a Elu operator"); + throw std::runtime_error("SOFIE Encountered unsupported type parsing a Elu operator"); } } @@ -50,7 +50,7 @@ public: void Initialize(RModel& model) override { if (model.CheckIfTensorAlreadyExist(fNX) == false){ //input must be a graph input, or already initialized intermediate tensor - throw std::runtime_error("TMVA SOFIE Elu Op Input Tensor is not found in model"); + throw std::runtime_error("SOFIE Elu Op Input Tensor is not found in model"); } fShape = model.GetTensorShape(fNX); model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShape); @@ -60,7 +60,7 @@ public: std::string Generate(std::string OpName) override { OpName = "op_" + OpName; if (fShape.empty()) { - throw std::runtime_error("TMVA SOFIE Operator Elu called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Operator Elu called to Generate without being initialized first"); } std::stringstream out; size_t length = ConvertShapeToLength(fShape); diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Expand.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Expand.hxx index cdcadae..bbcb916 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Expand.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Expand.hxx @@ -40,7 +40,7 @@ public: void Initialize(RModel& model) override { // input must be a graph input, or already initialized intermediate tensor if (!model.CheckIfTensorAlreadyExist(fNX)) { - throw std::runtime_error("TMVA SOFIE Expand Op Input Tensor " + fNX + " is not found in model"); + throw std::runtime_error("SOFIE Expand Op Input Tensor " + fNX + " is not found in model"); } fShapeX = model.GetDimTensorShape(fNX); if (model.IsInitializedTensor(fNShape)) { @@ -145,7 +145,7 @@ public: if (fIsOutputConstant) return ""; opName = "op_" + opName; if (fShapeY.empty()) { - throw std::runtime_error("TMVA SOFIE Expand Op called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Expand Op called to Generate without being initialized first"); } std::stringstream out; out << SP << "\n//------ Expand " << opName << " --> " << ConvertDimShapeToString(fShapeY) << "\n"; @@ -170,7 +170,7 @@ std::string Generate_GPU_Kernel_ALPAKA(std::string opName) override { opName = "op_" + opName; if (fShapeY.empty()) - throw std::runtime_error("TMVA SOFIE Expand Op called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Expand Op called to Generate without being initialized first"); // Can only generate a static kernel if all dimensions are concrete values auto isStatic = [](const std::vector& shape) { @@ -281,7 +281,7 @@ std::string Generate_GPU_ALPAKA(std::string opName) override { if (fIsOutputConstant) return ""; opName = "op_" + opName; if (fShapeY.empty()) - throw std::runtime_error("TMVA SOFIE Operator Expand called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Operator Expand called to Generate without being initialized first"); std::stringstream out; out << "\n//------ EXPAND_GPU_ALPAKA\n"; @@ -318,7 +318,7 @@ std::string Generate_GPU_ALPAKA(std::string opName) override { if (!staticShapes) { // Dynamic shapes — not yet supported on GPU, throw a clear error throw std::runtime_error( - "TMVA SOFIE Expand GPU: dynamic shapes are not yet supported for GPU inference. " + "SOFIE Expand GPU: dynamic shapes are not yet supported for GPU inference. " "Tensor " + fNX + " has a dynamic shape."); } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_EyeLike.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_EyeLike.hxx index 8e94e1c..91103ef 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_EyeLike.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_EyeLike.hxx @@ -40,11 +40,11 @@ public: void Initialize(RModel& model) override { if (model.CheckIfTensorAlreadyExist(fNX) == false){ //input must be a graph input, or already initialized intermediate tensor - throw std::runtime_error("TMVA SOFIE EyeLike Op Input Tensor is not found in model"); + throw std::runtime_error("SOFIE EyeLike Op Input Tensor is not found in model"); } fShape = model.GetTensorShape(fNX); if (fShape.size() != 2) - throw std::runtime_error("TMVA SOFIE EyeLike Op Input Tensor is not of rank 2"); + throw std::runtime_error("SOFIE EyeLike Op Input Tensor is not of rank 2"); if(fdtype){ ETensorType extractedType = static_cast(fdtype); @@ -59,7 +59,7 @@ public: std::string Generate(std::string OpName) override { OpName = "op_" + OpName; if (fShape.empty()){ - throw std::runtime_error("TMVA SOFIE Operator EyeLike called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Operator EyeLike called to Generate without being initialized first"); } auto length = ConvertShapeToLength(fShape); auto stride = SOFIE::UTILITY::ComputeStrideFromShape(fShape); diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_GRU.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_GRU.hxx index 5b553ff..037e016 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_GRU.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_GRU.hxx @@ -114,7 +114,7 @@ template class ROperator_GRU final : public ROperator { fType = "float"; } else { throw std::runtime_error( - "TMVA SOFIE Encountered unsupported type parsing a GRU operator"); + "SOFIE Encountered unsupported type parsing a GRU operator"); } } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_GRU.icc b/src/SOFIE_core/inc/SOFIE/ROperator_GRU.icc index 38030d1..f24460c 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_GRU.icc +++ b/src/SOFIE_core/inc/SOFIE/ROperator_GRU.icc @@ -38,33 +38,33 @@ void ROperator_GRU::Initialize(RModel& model){ fUseSession = model.UseSession(); // Check the input and output tensors if (!model.CheckIfTensorAlreadyExist(fNX)) { - throw std::runtime_error("TMVA SOFIE GRU Op input tensor " + fNX + " is not found in model."); + throw std::runtime_error("SOFIE GRU Op input tensor " + fNX + " is not found in model."); } fShapeX = model.GetTensorShape(fNX); if (fShapeX.size() != 3) { - throw std::runtime_error("TMVA SOFIE GRU Op input tensor " + fNX + " is not of 3 dimensions."); + throw std::runtime_error("SOFIE GRU Op input tensor " + fNX + " is not of 3 dimensions."); } if (!model.CheckIfTensorAlreadyExist(fNW)) { - throw std::runtime_error("TMVA SOFIE GRU Op input tensor " + fNW + " is not found in model."); + throw std::runtime_error("SOFIE GRU Op input tensor " + fNW + " is not found in model."); } fShapeW = model.GetTensorShape(fNW); if (fShapeW.size() != 3) { - throw std::runtime_error("TMVA SOFIE GRU Op input tensor " + fNW + " is not of 3 dimensions."); + throw std::runtime_error("SOFIE GRU Op input tensor " + fNW + " is not of 3 dimensions."); } if (!model.CheckIfTensorAlreadyExist(fNR)) { - throw std::runtime_error("TMVA SOFIE GRU Op input tensor " + fNR + " is not found in model."); + throw std::runtime_error("SOFIE GRU Op input tensor " + fNR + " is not found in model."); } fShapeR = model.GetTensorShape(fNR); if (fShapeR.size() != 3) { - throw std::runtime_error("TMVA SOFIE GRU Op input tensor " + fNR + " is not of 3 dimensions."); + throw std::runtime_error("SOFIE GRU Op input tensor " + fNR + " is not of 3 dimensions."); } if (!fNB.empty()) { if (!model.CheckIfTensorAlreadyExist(fNB)) { - throw std::runtime_error("TMVA SOFIE GRU op input tensor " + fNB + " is not found in model."); + throw std::runtime_error("SOFIE GRU op input tensor " + fNB + " is not found in model."); } fShapeB = model.GetTensorShape(fNB); if (fShapeB.size() != 2 && fShapeB.size() != 4) { - throw std::runtime_error("TMVA SOFIE GRU op input tensor " + fNB + " is not of 2 or 4 dimensions."); + throw std::runtime_error("SOFIE GRU op input tensor " + fNB + " is not of 2 or 4 dimensions."); } if (fShapeB.size() == 2) { // Broadcasting the bias @@ -99,25 +99,25 @@ void ROperator_GRU::Initialize(RModel& model){ } if (!fNSequence_lens.empty()) { if (!model.CheckIfTensorAlreadyExist(fNSequence_lens)) { - throw std::runtime_error("TMVA SOFIE GRU Op input tensor " + + throw std::runtime_error("SOFIE GRU Op input tensor " + fNSequence_lens + "is not found in model."); } fShapeSequence_lens = model.GetTensorShape(fNSequence_lens); if (fShapeSequence_lens.size() != 1) { - throw std::runtime_error("TMVA SOFIE GRU Op input tensor " + + throw std::runtime_error("SOFIE GRU Op input tensor " + fNSequence_lens + " is not of 1 dimension."); } } if (!fNInitial_h.empty()) { if (!model.CheckIfTensorAlreadyExist(fNInitial_h)) { - throw std::runtime_error("TMVA SOFIE GRU Op input tensor " + + throw std::runtime_error("SOFIE GRU Op input tensor " + fNInitial_h + " is not found in model."); } fShapeInitial_h = model.GetTensorShape(fNInitial_h); if (fShapeInitial_h.size() != 3) { - throw std::runtime_error("TMVA SOFIE GRU Op input tensor " + + throw std::runtime_error("SOFIE GRU Op input tensor " + fNInitial_h + " is not of 3 dimensions."); } } @@ -141,7 +141,7 @@ void ROperator_GRU::Initialize(RModel& model){ activation != "ScaledTanh" && activation != "HardSigmoid" && activation != "Elu" && activation != "Softsign" && activation != "Softplus") { - throw std::runtime_error("TMVA SOFIE - Activation function " + + throw std::runtime_error("SOFIE - Activation function " + activation + " not implemented"); } } @@ -150,22 +150,22 @@ void ROperator_GRU::Initialize(RModel& model){ fAttrDirection != "reverse" && fAttrDirection != "bidirectional") { throw std::runtime_error( - "TMVA SOFIE - Invalid GRU direction fAttrDirection = " + + "SOFIE - Invalid GRU direction fAttrDirection = " + fAttrDirection); } if (3 * fAttrHiddenSize != fShapeW[1]) { throw std::runtime_error( - "TMVA SOFIE - fAttrHiddenSize must be equal to " + + "SOFIE - fAttrHiddenSize must be equal to " + std::to_string(fShapeW[1] / 3)); } if (fAttrLayout > 1) { - throw std::runtime_error("TMVA SOFIE - Layout fAttrLayout = " + + throw std::runtime_error("SOFIE - Layout fAttrLayout = " + std::to_string(fAttrLayout) + " must be 0 (timewise) or 1 (batchwise)"); } if (fAttrLinearBeforeReset > 1) { throw std::runtime_error( - "TMVA SOFIE - fAttrInputForget = " + std::to_string(fAttrLinearBeforeReset) + "SOFIE - fAttrInputForget = " + std::to_string(fAttrLinearBeforeReset) + " must be 0 or 1."); } if (fAttrActivations.empty()) { diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Gather.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Gather.hxx index b6083ef..e3cd58d 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Gather.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Gather.hxx @@ -49,7 +49,7 @@ public: void Initialize(RModel& model) override { if (!model.CheckIfTensorAlreadyExist(fNX)) { - throw std::runtime_error("TMVA SOFIE Gather Op Input Tensor " + fNX + " is not found in model"); + throw std::runtime_error("SOFIE Gather Op Input Tensor " + fNX + " is not found in model"); } fShapeX = model.GetDimTensorShape(fNX); if (model.Verbose()) @@ -280,7 +280,7 @@ std::string Generate_GPU_Kernel_ALPAKA(std::string opName) override { if (fIsOutputConstant) return ""; opName = "op_" + opName; if (fShapeY.empty()) - throw std::runtime_error("TMVA SOFIE Gather Op called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Gather Op called to Generate without being initialized first"); const std::size_t D = fShapeY.size(); // output rank = q + r - 1 const std::size_t r = fShapeX.size(); @@ -373,7 +373,7 @@ std::string Generate_GPU_ALPAKA(std::string opName) override { if (fIsOutputConstant) return ""; opName = "op_" + opName; if (fShapeY.empty()) - throw std::runtime_error("TMVA SOFIE Gather Op called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Gather Op called to Generate without being initialized first"); auto totalElements = ConvertDimShapeToLength(fShapeY); std::string kname = "gatherKernel_" + opName; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_GatherND.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_GatherND.hxx index 3fa45fa..8612368 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_GatherND.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_GatherND.hxx @@ -53,9 +53,9 @@ public: void Initialize(RModel& model) override { if (!model.CheckIfTensorAlreadyExist(fNData)) - throw std::runtime_error("TMVA SOFIE GatherND: data tensor " + fNData + " not found in model"); + throw std::runtime_error("SOFIE GatherND: data tensor " + fNData + " not found in model"); if (!model.CheckIfTensorAlreadyExist(fNIndices)) - throw std::runtime_error("TMVA SOFIE GatherND: indices tensor " + fNIndices + " not found in model"); + throw std::runtime_error("SOFIE GatherND: indices tensor " + fNIndices + " not found in model"); fShapeData = model.GetTensorShape(fNData); fShapeIndices = model.GetTensorShape(fNIndices); @@ -66,17 +66,17 @@ public: size_t last_idx_dim = fShapeIndices.back(); if (r < 1) - throw std::runtime_error("TMVA SOFIE GatherND: data rank must be >= 1"); + throw std::runtime_error("SOFIE GatherND: data rank must be >= 1"); if (q < 1) - throw std::runtime_error("TMVA SOFIE GatherND: indices rank must be >= 1"); + throw std::runtime_error("SOFIE GatherND: indices rank must be >= 1"); if (b >= std::min(q, r)) - throw std::runtime_error("TMVA SOFIE GatherND: batch_dims must be < min(q, r)"); + throw std::runtime_error("SOFIE GatherND: batch_dims must be < min(q, r)"); if (last_idx_dim > r - b) - throw std::runtime_error("TMVA SOFIE GatherND: indices_shape[-1] must be <= r - batch_dims"); + throw std::runtime_error("SOFIE GatherND: indices_shape[-1] must be <= r - batch_dims"); for (size_t i = 0; i < b; ++i) { if (fShapeData[i] != fShapeIndices[i]) - throw std::runtime_error("TMVA SOFIE GatherND: first batch_dims dimensions of data and indices must match"); + throw std::runtime_error("SOFIE GatherND: first batch_dims dimensions of data and indices must match"); } // Output shape: batch_dims + indices[0..q-2] + data[b + last_idx_dim .. r-1] @@ -103,7 +103,7 @@ public: std::string Generate(std::string opName) override { opName = "op_" + opName; if (fShapeY.empty()) - throw std::runtime_error("TMVA SOFIE GatherND called to Generate without being initialized first"); + throw std::runtime_error("SOFIE GatherND called to Generate without being initialized first"); size_t r = fShapeData.size(); size_t q = fShapeIndices.size(); @@ -163,7 +163,7 @@ public: std::string Generate_GPU_Kernel_ALPAKA(std::string opName) override { opName = "op_" + opName; if (fShapeY.empty()) - throw std::runtime_error("TMVA SOFIE GatherND called to Generate without being initialized first"); + throw std::runtime_error("SOFIE GatherND called to Generate without being initialized first"); size_t r = fShapeData.size(); size_t q = fShapeIndices.size(); @@ -271,7 +271,7 @@ public: std::string Generate_GPU_ALPAKA(std::string opName) override { opName = "op_" + opName; if (fShapeY.empty()) - throw std::runtime_error("TMVA SOFIE GatherND called to Generate without being initialized first"); + throw std::runtime_error("SOFIE GatherND called to Generate without being initialized first"); std::size_t totalElements = ConvertShapeToLength(fShapeY); std::string kname = "gatherNDKernel_" + opName; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx index 218e11e..d340776 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx @@ -24,6 +24,7 @@ namespace SOFIE{ private: bool fIsDynamic = false; bool fBroadcastBias = false; + bool fCheckBiasShapeAtRuntime = false; // flag to identify the need to do a run time check of bias shape compatibility in case of dynamic shapes and uni-directional broadcasting float fAttrAlpha = 1.0; float fAttrBeta = 1.0; @@ -39,6 +40,7 @@ namespace SOFIE{ std::vector fShapeA; std::vector fShapeB; std::vector fShapeC; + std::vector fDimShapeC; std::vector fShapeY; RModel * fModel = nullptr; @@ -49,7 +51,6 @@ namespace SOFIE{ fAttrAlpha(alpha), fAttrBeta(beta), fAttrTransA(transA), fAttrTransB(transB), fNA(UTILITY::Clean_name(nameA)), fNB(UTILITY::Clean_name(nameB)), fNY(UTILITY::Clean_name(nameY)) { - fKind = OperatorKind::GEMM; fActivation = activation; fType = "float"; static_assert(std::is_same_v, @@ -62,7 +63,6 @@ namespace SOFIE{ fAttrAlpha(alpha), fAttrBeta(beta), fAttrTransA(transA), fAttrTransB(transB), fNA(UTILITY::Clean_name(nameA)), fNB(UTILITY::Clean_name(nameB)), fNC(UTILITY::Clean_name(nameC)), fNY(UTILITY::Clean_name(nameY)), fActivation(activation) { - fKind = OperatorKind::GEMM; fActivation = activation; fType = "float"; @@ -77,12 +77,12 @@ namespace SOFIE{ template std::vector DoShapeInference(const std::vector> & input){ - if (input.size() > 3) throw std::runtime_error("TMVA SOFIE Gemm Op Shape Inference only need 2 or 3 input tensor"); + if (input.size() > 3) throw std::runtime_error("SOFIE Gemm Op Shape Inference only need 2 or 3 input tensor"); // accept tensor with input dimensions > 2 // example: A = (d1,d2,...,N1,N2) B = (d1,d2,...,N2,N3) --> Y = (d1,d2,..,N1,N3) for (auto& i: input){ if (i.size() < 2){ - throw std::runtime_error("TMVA SOFIE Gemm Op Shape Inference only accept input tensor with >=2 dimensions"); + throw std::runtime_error("SOFIE Gemm Op Shape Inference only accept input tensor with >=2 dimensions"); } } @@ -118,7 +118,7 @@ namespace SOFIE{ else if (valueA.GetVal() == "1") s_y.push_back(input[1][i]); else if (!valueA.isParam && !valueB.isParam) - throw std::runtime_error("TMVA SOFIE Gemm Op - invalid input shapes " + valueA.GetVal() + " and " + throw std::runtime_error("SOFIE Gemm Op - invalid input shapes " + valueA.GetVal() + " and " + valueB.GetVal()); else if (valueA.isParam && valueB.isParam){ // check which parameter is first in RModel list @@ -133,7 +133,7 @@ namespace SOFIE{ else if (!valueB.isParam) s_y.push_back(input[1][i]); else - throw std::runtime_error("TMVA SOFIE Gemm Op - invalid input shapes " + valueA.GetVal() + " and " + throw std::runtime_error("SOFIE Gemm Op - invalid input shapes " + valueA.GetVal() + " and " + valueB.GetVal()); } else @@ -162,11 +162,11 @@ namespace SOFIE{ fModel = &model; if ((model.CheckIfTensorAlreadyExist(fNA) == false) || (model.CheckIfTensorAlreadyExist(fNB) == false) ){ //input must be a graph input, or already initialized intermediate tensor - throw std::runtime_error("TMVA SOFIE Gemm Op Input Tensor " + fNA + " or " + fNB + " is not found in model"); + throw std::runtime_error("SOFIE Gemm Op Input Tensor " + fNA + " or " + fNB + " is not found in model"); } if (fNC != ""){ if (model.CheckIfTensorAlreadyExist(fNC) == false){ //input must be a graph input, or already initialized intermediate tensor - throw std::runtime_error("TMVA SOFIE Gemm Op Input Tensor" + fNC + " is not found in model"); + throw std::runtime_error("SOFIE Gemm Op Input Tensor " + fNC + " is not found in model"); } } if (model.IsDynamicTensor(fNA) || model.IsDimInputTensor(fNA) ) { @@ -213,37 +213,38 @@ namespace SOFIE{ // bias is normally not dynamic (not support it for time being) if (fNC != ""){ - // normally bias is fixed and not dynamic - if (model.IsDynamicTensor(fNC)) { - throw std::runtime_error("TMVA SOFIE Gemm Op Input Tensor" + fNC + " is dynamic and is not supported"); + if (model.IsDynamicTensor(fNC)) + fDimShapeC = model.GetDynamicTensorShape(fNC); + else { + fShapeC = model.GetTensorShape(fNC); + fDimShapeC = ConvertShapeToDim(fShapeC); } - fShapeC = model.GetTensorShape(fNC); // for dynamic outputs broadcasting is always needed bool broadcast_needed = false; if (fIsDynamic && shapeY.empty()) broadcast_needed = true; else - // consider broadcasting also if same length + // consider broadcasting also if they have different length broadcast_needed = (fShapeC != shapeY); if (broadcast_needed) { fBroadcastBias = true; // check if broadcasting is compatible and note that prepend 1 to shapeC - auto shapeDimC = ConvertShapeToDim(fShapeC); - auto r = UTILITY::MultidirectionalBroadcastShape(fShapeY, shapeDimC); - // return flag must be equal to 1 since this is a unidirectional broadcast of C->Y - if (r.first > 1) { - throw std::runtime_error("TMVA SOFIE Gemm Op - bias tensor of shape " + ConvertShapeToString(fShapeC) + " cannot be uni-directional broadcasted to " + ConvertDimShapeToString(fShapeY)); - } - fShapeC = ConvertShapeToInt(shapeDimC); - if (fShapeC.empty()) { - throw std::runtime_error("TMVA SOFIE Gemm Op - Error in bias tensor " + ConvertDimShapeToString(shapeDimC) ); + auto r = UTILITY::MultidirectionalBroadcastShape(fShapeY, fDimShapeC); + // return flag must not have bit equal to 2 since this is a unidirectional broadcast of C->Y + // + if ((r.first & 2) == 2) { + throw std::runtime_error("SOFIE Gemm Op - bias tensor of shape " + ConvertDimShapeToString(fDimShapeC) + " cannot be uni-directional broadcasted to " + ConvertDimShapeToString(fShapeY)); + } else if (r.first == 4) { + // we need to do a run time check of bias shape if it is compatible + fCheckBiasShapeAtRuntime = true; } + fShapeC = ConvertShapeToInt(fDimShapeC); } } - // remove appended or prepended value of 1 + // remove appended or prepended value of 1 in Y if (prependOne) { if (fIsDynamic) fShapeY.erase(fShapeY.begin()); @@ -276,9 +277,9 @@ namespace SOFIE{ std::string Generate(std::string opName) override { opName = "op_" + opName; - if (fShapeA.empty() || fShapeB.empty() || fShapeY.empty() || (fNC != "" && fShapeC.empty())) { - throw std::runtime_error("TMVA SOFIE Gemm Op called to Generate without being initialized first"); - } + // if (fShapeA.empty() || fShapeB.empty() || fShapeY.empty() || (fNC != "" && fShapeC.empty())) { + // throw std::runtime_error("SOFIE Gemm Op called to Generate without being initialized first"); + // } std::stringstream out; out << "\n//--------- Gemm " << opName << " " << ConvertDimShapeToString(fShapeA) << " * " << ConvertDimShapeToString(fShapeB) << " -> " << ConvertDimShapeToString(fShapeY) << "\n"; @@ -286,8 +287,13 @@ namespace SOFIE{ int64_t dimA = fShapeA.size(); int64_t dimB = fShapeB.size(); int64_t dimY = fShapeY.size(); - if (dimA != dimB || dimA != dimY) { - throw std::runtime_error("TMVA SOFIE Gemm(MatMul) has invalid shape for inputs or output"); + int64_t dimC = fDimShapeC.size(); + if (dimA != dimB || dimA != dimY || (fBroadcastBias && dimC != dimY)) { + std::cout << " shape A " << ConvertDimShapeToString(fShapeA) + << " shape B " << ConvertDimShapeToString(fShapeB) + << " shape C " << ConvertDimShapeToString(fDimShapeC) + << " shape Y " << ConvertDimShapeToString(fShapeY) << std::endl; + throw std::runtime_error("SOFIE Gemm(MatMul) has invalid shape for inputs or output"); } auto m = (fAttrTransA ? fShapeA[dimA-1].GetVal() : fShapeA[dimA-2].GetVal()); auto n = (fAttrTransB ? fShapeB[dimB-2].GetVal() : fShapeB[dimB-1].GetVal()); @@ -302,26 +308,49 @@ namespace SOFIE{ } auto lengthGemm = ConvertDimShapeToLength(sY); // size of the Gemm operation auto lengthExtra_Y = ConvertDimShapeToLength(sExtraY); // extra length in case input tensors are of dim>2 (MatMul) + std::string lengthExtra_C; + std::vector sExtraC; + std::vector sC; + bool haveExtraC = false; + if (dimC > 2) { + sC = {fDimShapeC[dimC-2], fDimShapeC[dimC-1]}; + for (int64_t i = 0; i < dimC-2; i++) { + sExtraC.push_back(fDimShapeC[i]); + } + lengthExtra_C = ConvertDimShapeToLength(sExtraC); + if (lengthExtra_C != "1") haveExtraC = true; + } else if (dimC > 0) { + for (int64_t i = 0; i < dimC; i++) { + sC.push_back(fDimShapeC[i]); + } + } // case bias is present if (!fNC.empty()){ + // when the 2 last dims of bias and Y are not compatible we need to perform a run time broadcast + if (sC != sY) fBroadcastBias = true; if (!fBroadcastBias) { // add a check in case broadcasting was not needed or done outside of session // C should have smaller dimension of Y if (!fIsDynamic) { - if (std::stoi(lengthGemm) != static_cast(ConvertShapeToLength(fShapeC))) - throw std::runtime_error("TMVA SOFIE Gemm Op " + opName + " Bias tensor has not correct size " + if ((std::stoi(lengthGemm) != std::stoi(ConvertDimShapeToLength(sC))) || + ( haveExtraC && std::stoi(lengthExtra_Y) != std::stoi(lengthExtra_C))) + throw std::runtime_error("SOFIE Gemm Op " + opName + " Bias tensor " + fNC + " has not correct size " + ConvertShapeToString(fShapeC) + " output length " + lengthGemm); } else { // add a dynamic check (C should not be a dynamic tensor) - out << SP << "assert(" << lengthGemm << " == " << ConvertShapeToLength(fShapeC) << ");\n"; + out << SP << "assert(" << lengthGemm << " == " << ConvertDimShapeToLength(sC) << ");\n"; + if (haveExtraC) out << SP << "assert(" << lengthExtra_Y << " == " << lengthExtra_C << ");\n"; } } } else { + fBroadcastBias = false; //in this case fAttrBeta needs to be equal to zero otherwise second time we run we will use // the previous result if (fAttrBeta != 0) { - throw std::runtime_error("TMVA SOFIE Gemm Op " + opName + " Bias tensor is not present but beta value in Gemm is not zero"); + // some model don't have bias but Beta is not zero - force it to zero + fAttrBeta = 0; + std::cout << "WARNING: SOFIE Gemm Op " + opName + " Bias tensor is not present but beta value in Gemm is not zero - force it to zero\n"; } } @@ -338,57 +367,116 @@ namespace SOFIE{ std::vector sA(fShapeA.begin(), fShapeA.begin()+dimA-2); std::vector sB(fShapeB.begin(), fShapeB.begin()+dimB-2); std::vector mA = {fShapeA[dimA-2], fShapeA[dimA-1]}; - std::vector mB = {fShapeA[dimB-2], fShapeB[dimB-1]}; + std::vector mB = {fShapeB[dimB-2], fShapeB[dimB-1]}; lengthExtra_A = ConvertDimShapeToLength(sA); lengthExtra_B = ConvertDimShapeToLength(sB); - // size of A performing matmul is m*k and n*k for B + // if A ( b, m, k) and B (b, k, n) these are the strides of A and B ( m*k for A and n*k for B ) increment_A = ConvertDimShapeToLength(mA); increment_B = ConvertDimShapeToLength(mB); } bool extraA = (doStackMul && lengthExtra_A != "1"); bool extraB = (doStackMul && lengthExtra_B != "1"); + bool extraC = (doStackMul && haveExtraC && !fBroadcastBias); + // run time check for bias broadcasting + std::string biasShapeType = opName + "_biasShapeType"; + if (fBroadcastBias && fCheckBiasShapeAtRuntime) { + // create a flag according to bias shape: + // = 1 for (1,Y2) + // = 2 for (Y1,1) + // = 3 for a scalar + out << SP << "int " << biasShapeType << " = 0;\n"; + // case vector of columns + if (sC[0].GetVal() != "1" && sC[1].GetVal() != sY[1].GetVal()) + out << SP << "if (" << sC[0] << " == 1 && " << sC[1] << " == " << sY[1] << ")\n"; + else if (sC[0].GetVal() == "1") + out << SP << "if (" << sC[1] << " == " << sY[1] << ")\n"; + else if (sC[1].GetVal() == sY[1].GetVal()) + out << SP << "if (" << sC[0] << " == 1)\n"; + + out << SP << SP << biasShapeType << " = 1;\n"; + + // case vector of rows + if (sC[1].GetVal() != "1" && sC[0].GetVal() != sY[0].GetVal()) + out << SP << "else if (" << sC[1] << " == 1 && " << sC[0] << " == " << sY[0] << ")\n"; + else if (sC[1].GetVal() == "1") + out << SP << "else if (" << sC[0] << " == " << sY[0] << ")\n"; + else if (sC[0].GetVal() == sY[0].GetVal()) + out << SP << "else if (" << sC[1] << " == 1)\n"; + + out << SP << SP << biasShapeType << " = 2;\n"; + + // case scalar + if (sC[0].GetVal() != "1" && sC[1].GetVal() != "1") + out << SP << "else if (" << sC[0] << " == 1 && " << sC[1] << " == 1 )\n"; + else if (sC[0].GetVal() == "1") + out << SP << "else if (" << sC[1] << " == 1)\n"; + else if (sC[1].GetVal() == "1") + out << SP << "else if (" << sC[0] << " == 1)\n"; + out << SP << SP << biasShapeType << " = 3;\n"; + out << SP << "else\n"; + out << SP << SP << "throw std::runtime_error(\"SOFIE Gemm Op - bias tensor " + << ConvertDimShapeToString(fDimShapeC) << " cannot be broadcasted to " + << ConvertDimShapeToString(fShapeY) << "\");\n"; + } + auto SP2 = SP; if (doStackMul) { out << SP << "size_t " << opName << "_y_offset = 0;\n"; // needed if we stack the gemm operations if (extraA) out << SP << "size_t " << opName << "_A_offset = 0;\n"; if (extraB) out << SP << "size_t " << opName << "_B_offset = 0;\n"; + if (extraC) + out << SP << "size_t " << opName << "_C_offset = 0;\n"; out << SP << "for (size_t i = 0; i < " << lengthExtra_Y << "; i++){\n"; - out << SP; + SP2 += SP; } - // do the bias broadcasting + // do the bias broadcasting at run time by + // initializing output Y vector with bias values if (fBroadcastBias) { + fAttrBeta = 1.; - out << SP << "for (size_t j = 0; j < " << sY[0] << "; j++) { \n"; - out << SP << SP << "size_t y_index = "; - if (doStackMul) // add offset in caseof stack multiplications (not sure if bias is present in these cases) + + // loop on first output dimension + out << SP2 << "for (size_t j = 0; j < " << sY[0] << "; j++) { \n"; + out << SP2 << SP << "size_t y_index = "; + if (doStackMul) // add offset in case of stack multiplications (not sure if bias is present in these cases) out << opName << "_y_offset + "; if (sY[1].GetVal() != "1") out << sY[1] << " * j;\n"; else out << "j;\n"; - out << SP << SP << "for (size_t k = 0; k < " << sY[1] << "; k++) { \n"; - std::string bias_index; - if (fShapeC[0] == 1 && fShapeC[1] == sY[1].dim) - bias_index = "k"; - else if (fShapeC[1] == 1 && fShapeC[0] == sY[0].dim) - bias_index = "j"; - else if (fShapeC[0] == 1 && fShapeC[1] == 1) // scalar case - bias_index = "0"; - else { - throw std::runtime_error("TMVA SOFIE Gemm Op - invalid shape for bias tensor " + ConvertShapeToString(fShapeC)); + std::string prefix = SP2 + SP + "TMVA::Experimental::SOFIE::"; + std::string target = "tensor_" + fNY; + if (sC.size() != 2) { + throw std::runtime_error("SOFIE Gemm Op - invalid rank for bias tensor " + ConvertDimShapeToString(fDimShapeC) + ConvertDimShapeToString(sC)); + } if (sC[0].GetVal() == "1" && sC[1].GetVal() == sY[1].GetVal()) { + out << prefix << "Copy(" << target << " + y_index, tensor_" << fNC << ", " << sY[1] << ");\n"; + } else if (sC[1].GetVal() == "1" && sC[0].GetVal() == sY[0].GetVal()) { + out << prefix << "Fill(" << target << " + y_index, tensor_" << fNC << "[j], " << sY[1] << ");\n"; + } else if (sC[0].GetVal() == "1" && sC[1].GetVal() == "1") { + // scalar case + out << prefix << "Fill(" << target << " + y_index, tensor_" << fNC << "[0], " << sY[1] << ");\n"; + } else if (fCheckBiasShapeAtRuntime) { + // in the generic dynamic case we check at run time that bias is compatible + // we check that bias[0] = 1 or equal to SY[0] and that bias[1] = 1 or equal to SY[1] + // tbd: this run-time check coul;d be moved outside the loop for better run time efficiency + out << SP2 << SP << "if (" << biasShapeType << " == 1)\n"; // case vector of columns + out << SP << prefix << "Copy(" << target << " + y_index, tensor_" << fNC << ", " << sY[1] << ");\n"; + out << SP2 << SP << "else if (" << biasShapeType << " == 2)\n"; // case vector of rows + out << SP << prefix << "Fill(" << target << " + y_index, tensor_" << fNC << "[j], " << sY[1] << ");\n"; + out << SP2 << SP << "else \n"; // scalar case + out << SP << prefix << "Fill(" << target << " + y_index, tensor_" << fNC << "[0], " << sY[1] << ");\n"; + } else { + throw std::runtime_error("SOFIE Gemm Op - invalid shape for bias tensor " + ConvertDimShapeToString(fDimShapeC)); } - out << SP << SP << SP << "tensor_" << fNY << "[y_index + k] = " << "tensor_" << fNC << "[" << bias_index << "];\n"; - out << SP << SP << "}\n"; - out << SP << "}\n"; + out << SP2 << "}\n"; } if (fType == "float"){ - out << SP << "TMVA::Experimental::SOFIE::Gemm_Call(" - << "tensor_" << fNY; + out << SP2 << "TMVA::Experimental::SOFIE::Gemm_Call(" << "tensor_" << fNY; if (doStackMul) out << " + " << opName << "_y_offset"; out << ", " << (fAttrTransB ? "true, " : "false, ") @@ -399,18 +487,17 @@ namespace SOFIE{ out << ", tensor_" << fNA; if (extraA) out << " + " << opName << "_A_offset"; out << ", " << std::setprecision(std::numeric_limits::max_digits10) << fAttrBeta << ","; - // in the case of bias and no broadcasting needed - if (!fNC.empty() && !fBroadcastBias) + // in the case of bias and no broadcasting needed - I need to add bias as an extra tensor in Gemm call + if (!fNC.empty() && !fBroadcastBias) { out << "tensor_" << fNC; - else + if (extraC) { + out << " + " << opName << "_C_offset"; + } + } else { out << "nullptr"; + } out << ");\n"; - if(fActivation == EActivationType::RELU){ - out << SP << "for (int id = 0; id < " << ConvertDimShapeToLength(fShapeY) << " ; id++){\n"; - out << SP << SP << "tensor_" << fNY << "[id] = ((tensor_" << fNY << "[id] > 0 )? tensor_" << fNY << "[id] : 0);\n"; - out << SP << "}\n"; - } } if (doStackMul) { @@ -419,8 +506,18 @@ namespace SOFIE{ out << SP << SP << opName << "_A_offset += " << increment_A << ";\n"; if (lengthExtra_B != "1") out << SP << SP << opName << "_B_offset += " << increment_B << ";\n"; + if (extraC) + // increment_C is lengthGEmm + out << SP << SP << opName << "_C_offset += " << lengthGemm << ";\n"; + out << SP << "}\n"; // end of loop on the stacked multiplication + } - out << "}\n"; // end of loop on the stacked multiplications + // fuse with Relu + if(fActivation == EActivationType::RELU){ + out << SP << "//--- applying RELU to output\n"; + std::string tnsr = "tensor_" + fNY; + std::string reluSize = ConvertDimShapeToLength(fShapeY); + out << SP << "TMVA::Experimental::SOFIE::Relu(" << tnsr << ", " << tnsr << ", " << reluSize << ");\n"; } return out.str(); @@ -430,7 +527,7 @@ namespace SOFIE{ opName = "op_" + opName; if (fShapeA.empty() || fShapeB.empty() || fShapeY.empty() || (fNC != "" && fShapeC.empty())) { - throw std::runtime_error("TMVA SOFIE Gemm Op called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Gemm Op called to Generate without being initialized first"); } std::stringstream out; out << "\n//--------- Gemm_GPU_ALPAKA\n"; @@ -442,7 +539,7 @@ namespace SOFIE{ int64_t dimB = fShapeB.size(); int64_t dimY = fShapeY.size(); if (dimA != dimB || dimA != dimY) { - throw std::runtime_error("TMVA SOFIE Gemm(MatMul) has invalid shape for inputs or output"); + throw std::runtime_error("SOFIE Gemm(MatMul) has invalid shape for inputs or output"); } auto m = (fAttrTransA ? fShapeA[dimA-1].GetVal() : fShapeA[dimA-2].GetVal()); auto n = (fAttrTransB ? fShapeB[dimB-2].GetVal() : fShapeB[dimB-1].GetVal()); @@ -471,7 +568,7 @@ namespace SOFIE{ // C should have smaller dimension of Y if (!fIsDynamic) { if (std::stoi(lengthGemm) != static_cast(ConvertShapeToLength(fShapeC))) - throw std::runtime_error("TMVA SOFIE Gemm Op " + opName + " Bias tensor has not correct size " + throw std::runtime_error("SOFIE Gemm Op " + opName + " Bias tensor has not correct size " + ConvertShapeToString(fShapeC) + " output length " + lengthGemm); } else { // add a dynamic check (C should not be a dynamic tensor) @@ -482,7 +579,7 @@ namespace SOFIE{ //in this case fAttrBeta needs to be equal to zero otherwise second time we run we will use // the previous result if (fAttrBeta != 0) { - throw std::runtime_error("TMVA SOFIE Gemm Op " + opName + " Bias tensor is not present but beta value in Gemm is not zero"); + throw std::runtime_error("SOFIE Gemm Op " + opName + " Bias tensor is not present but beta value in Gemm is not zero"); } } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Identity.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Identity.hxx index efb6b14..d68b00c 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Identity.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Identity.hxx @@ -41,7 +41,7 @@ public: void Initialize(RModel& model) override { //input must be a graph input, or already initialized intermediate tensor if (model.CheckIfTensorAlreadyExist(fNX) == false){ - throw std::runtime_error("TMVA SOFIE Identity Op Input Tensor is not found in model"); + throw std::runtime_error("SOFIE Identity Op Input Tensor is not found in model"); } fShape = model.GetTensorShape(fNX); if (model.IsInitializedTensor(fNX)) { @@ -77,7 +77,7 @@ public: if (fIsOutputConstant || fIsInputInitialized) return ""; OpName = "op_" + OpName; if (fShape.empty()) { - throw std::runtime_error("TMVA SOFIE Operator Identity called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Operator Identity called to Generate without being initialized first"); } std::stringstream out; out << "\n//------ IDENTITY\n"; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_LSTM.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_LSTM.hxx index 5bfd4e3..69fb7a2 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_LSTM.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_LSTM.hxx @@ -106,7 +106,7 @@ template class ROperator_LSTM final : public ROperator { fType = "float"; } else { throw std::runtime_error( - "TMVA SOFIE Encountered unsupported type parsing a LSTM operator"); + "SOFIE Encountered unsupported type parsing a LSTM operator"); } fInputTensorNames = { fNX, fNW, fNR }; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_LSTM.icc b/src/SOFIE_core/inc/SOFIE/ROperator_LSTM.icc index ebf4daf..2fb390d 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_LSTM.icc +++ b/src/SOFIE_core/inc/SOFIE/ROperator_LSTM.icc @@ -40,33 +40,33 @@ auto ROperator_LSTM::Initialize(RModel& model) fUseSession = model.UseSession(); // Check the input and output tensors if (!model.CheckIfTensorAlreadyExist(fNX)) { - throw std::runtime_error("TMVA SOFIE LSTM Op input tensor " + fNX + " is not found in model."); + throw std::runtime_error("SOFIE LSTM Op input tensor " + fNX + " is not found in model."); } fShapeX = model.GetTensorShape(fNX); if (fShapeX.size() != 3) { - throw std::runtime_error("TMVA SOFIE LSTM Op input tensor " + fNX + " is not of 3 dimensions."); + throw std::runtime_error("SOFIE LSTM Op input tensor " + fNX + " is not of 3 dimensions."); } if (!model.CheckIfTensorAlreadyExist(fNW)) { - throw std::runtime_error("TMVA SOFIE LSTM Op input tensor " + fNW + " is not found in model."); + throw std::runtime_error("SOFIE LSTM Op input tensor " + fNW + " is not found in model."); } fShapeW = model.GetTensorShape(fNW); if (fShapeW.size() != 3) { - throw std::runtime_error("TMVA SOFIE LSTM Op input tensor " + fNW + " is not of 3 dimensions."); + throw std::runtime_error("SOFIE LSTM Op input tensor " + fNW + " is not of 3 dimensions."); } if (!model.CheckIfTensorAlreadyExist(fNR)) { - throw std::runtime_error("TMVA SOFIE LSTM Op input tensor " + fNR + " is not found in model."); + throw std::runtime_error("SOFIE LSTM Op input tensor " + fNR + " is not found in model."); } fShapeR = model.GetTensorShape(fNR); if (fShapeR.size() != 3) { - throw std::runtime_error("TMVA SOFIE LSTM Op input tensor " + fNR + " is not of 3 dimensions."); + throw std::runtime_error("SOFIE LSTM Op input tensor " + fNR + " is not of 3 dimensions."); } if (!fNB.empty()) { if (!model.CheckIfTensorAlreadyExist(fNB)) { - throw std::runtime_error("TMVA SOFIE LSTM op input tensor " + fNB + " is not found in model."); + throw std::runtime_error("SOFIE LSTM op input tensor " + fNB + " is not found in model."); } fShapeB = model.GetTensorShape(fNB); if (fShapeB.size() != 2 && fShapeB.size() != 5) { - throw std::runtime_error("TMVA SOFIE LSTM op input tensor " + fNB + " is not of 2 or 5 dimensions."); + throw std::runtime_error("SOFIE LSTM op input tensor " + fNB + " is not of 2 or 5 dimensions."); } if (fShapeB.size() == 2) { // Broadcasting the bias @@ -103,46 +103,46 @@ auto ROperator_LSTM::Initialize(RModel& model) } if (!fNSequence_lens.empty()) { if (!model.CheckIfTensorAlreadyExist(fNSequence_lens)) { - throw std::runtime_error("TMVA SOFIE LSTM Op input tensor " + + throw std::runtime_error("SOFIE LSTM Op input tensor " + fNSequence_lens + "is not found in model."); } fShapeSequence_lens = model.GetTensorShape(fNSequence_lens); if (fShapeSequence_lens.size() != 1) { - throw std::runtime_error("TMVA SOFIE LSTM Op input tensor " + + throw std::runtime_error("SOFIE LSTM Op input tensor " + fNSequence_lens + " is not of 1 dimension."); } } if (!fNInitial_h.empty()) { if (!model.CheckIfTensorAlreadyExist(fNInitial_h)) { - throw std::runtime_error("TMVA SOFIE LSTM Op input tensor " + + throw std::runtime_error("SOFIE LSTM Op input tensor " + fNInitial_h + " is not found in model."); } fShapeInitial_h = model.GetTensorShape(fNInitial_h); if (fShapeInitial_h.size() != 3) { - throw std::runtime_error("TMVA SOFIE LSTM Op input tensor " + + throw std::runtime_error("SOFIE LSTM Op input tensor " + fNInitial_h + " is not of 3 dimensions."); } } if (!fNInitial_c.empty()) { if (!model.CheckIfTensorAlreadyExist(fNInitial_c)) { - throw std::runtime_error("TMVA SOFIE LSTM Op input tensor " + + throw std::runtime_error("SOFIE LSTM Op input tensor " + fNInitial_c + " is not found in model."); } fShapeInitial_c = model.GetTensorShape(fNInitial_c); if (fShapeInitial_c.size() != 3) { - throw std::runtime_error("TMVA SOFIE LSTM Op input tensor " + + throw std::runtime_error("SOFIE LSTM Op input tensor " + fNInitial_c + " is not of 3 dimensions."); } } if (!fNP.empty()) { if (!model.CheckIfTensorAlreadyExist(fNP)) { - throw std::runtime_error("TMVA SOFIE LSTM op input tensor " + fNP + " is not found in model."); + throw std::runtime_error("SOFIE LSTM op input tensor " + fNP + " is not found in model."); } fShapeP = model.GetTensorShape(fNP); if (fShapeP.size() != 2 && fShapeP.size() != 4) { - throw std::runtime_error("TMVA SOFIE LSTM op input tensor " + fNP + " is not of 2 or 4 dimensions."); + throw std::runtime_error("SOFIE LSTM op input tensor " + fNP + " is not of 2 or 4 dimensions."); } if (fShapeP.size() == 2) { // Broadcasting the weight for peepholes @@ -196,28 +196,28 @@ auto ROperator_LSTM::Initialize(RModel& model) activation != "ScaledTanh" && activation != "HardSigmoid" && activation != "Elu" && activation != "Softsign" && activation != "Softplus") { - throw std::runtime_error("TMVA SOFIE - Activation function " + + throw std::runtime_error("SOFIE - Activation function " + activation + " not implemented"); } } if (fAttrDirection != "forward" && fAttrDirection != "backward" && fAttrDirection != "bidirectional") { throw std::runtime_error( - "TMVA SOFIE - Invalid LSTM direction fAttrDirection = " + + "SOFIE - Invalid LSTM direction fAttrDirection = " + fAttrDirection); } if (4 * fAttrHiddenSize != fShapeW[1]) { throw std::runtime_error( - "TMVA SOFIE - fAttrHiddenSize must be equal to " + + "SOFIE - fAttrHiddenSize must be equal to " + std::to_string(fShapeW[1] / 4)); } if (fAttrInputForget > 1) { throw std::runtime_error( - "TMVA SOFIE - fAttrInputForget = " + std::to_string(fAttrInputForget) + "SOFIE - fAttrInputForget = " + std::to_string(fAttrInputForget) + " must be 0 or 1."); } if (fAttrLayout > 1) { - throw std::runtime_error("TMVA SOFIE - Layout fAttrLayout = " + + throw std::runtime_error("SOFIE - Layout fAttrLayout = " + std::to_string(fAttrLayout) + " must be 0 (timewise) or 1 (batchwise)"); } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_LeakyRelu.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_LeakyRelu.hxx index 6f0c0d4..c0e80aa 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_LeakyRelu.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_LeakyRelu.hxx @@ -34,7 +34,7 @@ public: } else{ throw - std::runtime_error("TMVA SOFIE Encountered unsupported type parsing a Leaky Relu operator"); + std::runtime_error("SOFIE Encountered unsupported type parsing a Leaky Relu operator"); } fInputTensorNames = { fNX }; @@ -52,7 +52,7 @@ public: void Initialize(RModel& model) override { if (model.CheckIfTensorAlreadyExist(fNX) == false){ //input must be a graph input, or already initialized intermediate tensor - throw std::runtime_error("TMVA SOFIE Leaky Relu Op Input Tensor is not found in model"); + throw std::runtime_error("SOFIE Leaky Relu Op Input Tensor is not found in model"); } fShape = model.GetTensorShape(fNX); model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShape); @@ -62,7 +62,7 @@ public: std::string Generate(std::string OpName) override { OpName = "op_" + OpName; if (fShape.empty()) { - throw std::runtime_error("TMVA SOFIE Operator Leaky Relu called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Operator Leaky Relu called to Generate without being initialized first"); } std::stringstream out; size_t length = ConvertShapeToLength(fShape); @@ -98,7 +98,7 @@ public: std::string Generate_GPU_ALPAKA(std::string OpName) override { OpName = "op_" + OpName; if (fShape.empty()) { - throw std::runtime_error("TMVA SOFIE Operator LeakyRelu called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Operator LeakyRelu called to Generate without being initialized first"); } std::stringstream out; @@ -117,6 +117,11 @@ public: return out.str(); } + bool IsElementwise() const override { return true; } + std::string GetElementwiseExpr(const std::string& v) const override { + return "((" + v + " >= 0) ? " + v + " : " + std::to_string(falpha) + " * " + v + ")"; + } + std::string GetFusableOutputTensorName() override { return fNY; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Not.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Not.hxx new file mode 100644 index 0000000..02b5cb4 --- /dev/null +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Not.hxx @@ -0,0 +1,118 @@ +#ifndef TMVA_EXPERIMENTAL_SOFIE_ROPERATOR_NOT +#define TMVA_EXPERIMENTAL_SOFIE_ROPERATOR_NOT + +#include +#include +#include + + +namespace SOFIE { + + +class ROperator_Not final : public ROperator { +private: + std::string fNX; + std::string fNY; + + std::vector fShapeX; + std::vector fShapeY; + +public: + ROperator_Not() {} + + ROperator_Not(std::string nameX, std::string nameY) + : fNX(UTILITY::Clean_name(nameX)), fNY(UTILITY::Clean_name(nameY)) + { + fInputTensorNames = { fNX }; + fOutputTensorNames = { fNY }; + } + + + void Initialize(RModel& model) override { + if (!model.CheckIfTensorAlreadyExist(fNX)) { + throw std::runtime_error("TMVA::SOFIE - Tensor " + fNX + " not found."); + } + fShapeX = model.GetDimTensorShape(fNX); + fShapeY = fShapeX; + model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShapeY); + } + + std::string Generate(std::string opName) override + { + opName = "op_" + opName; + std::stringstream out; + + out << SP << "\n//---- Operator Not " << opName << "\n"; + auto length = ConvertDimShapeToLength(fShapeX); + out << SP << "for (size_t i = 0; i < " << length << "; i++) {\n"; + out << SP << SP << "tensor_" << fNY << "[i] = !tensor_" + fNX + "[i];\n"; + out << SP << "}\n"; + return out.str(); + } + + std::string Generate_GPU_Kernel_ALPAKA(std::string /*opName*/) override + { + if (fIsOutputConstant) + return ""; + + std::string op; + op = "\n//------ NOT_KERNEL_ALPAKA\n"; + op += SP + "struct NotKernel {\n"; + op += SP + SP + "template\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(\n"; + op += SP + SP + SP + "TAcc const & acc,\n"; + op += SP + SP + SP + "T const * data,\n"; + op += SP + SP + SP + "T * output,\n"; + op += SP + SP + SP + "std::size_t const length) const\n"; + op += SP + SP + "{\n"; + op += SP + SP + SP + "auto idx = alpaka::getIdx(acc)[0];\n"; + op += SP + SP + SP + "if (idx < length) {\n"; + op += SP + SP + SP + SP + "output[idx] = !data[idx];\n"; + op += SP + SP + SP + "}\n"; + op += SP + SP + "}\n"; + op += SP + "};\n"; + return op; + } + + std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string /*opName*/) override + { + return SP + "NotKernel notKernel;\n"; + } + + std::string Generate_GPU_ALPAKA(std::string opName) override + { + opName = "op_" + opName; + std::stringstream out; + auto length = ConvertDimShapeToLength(fShapeX); + + out << "\n//------ " << opName << "_ALPAKA\n"; + out << SP << "auto const elementsPerThread_" << fNY << " = Vec::all(static_cast(1));\n"; + out << SP << "auto const elementsPerGrid_" << fNY << " = Vec::all(Idx{" << length << "});\n"; + out << SP << "alpaka::KernelCfg const kernelCfg_" << fNY + << " = {elementsPerGrid_" << fNY << ", elementsPerThread_" << fNY << "};\n"; + out << SP << "auto const workDiv_" << fNY + << " = alpaka::getValidWorkDiv(kernelCfg_" << fNY << ", devAcc, " + << "NotKernel" + << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" + << ", " << length << ");\n"; + out << SP << "auto task_" << opName + << " = alpaka::createTaskKernel(workDiv_" << fNY + << ", " << "NotKernel" + << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" + << ", " << length << ");\n"; + out << SP << "alpaka::enqueue(queue, task_" << opName << ");\n"; + return out.str(); + } + + bool IsElementwise() const override { return !fIsOutputConstant; } + std::string GetElementwiseExpr(const std::string& v) const override { + return "!" + v; + } + +}; + +} // namespace SOFIE + +#endif diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Pad.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Pad.hxx index dae3a5b..04365d8 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Pad.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Pad.hxx @@ -61,13 +61,13 @@ public: void Initialize(RModel& model) override { if (model.CheckIfTensorAlreadyExist(fNX) == false){ //input must be a graph input, or already initialized intermediate tensor - throw std::runtime_error("TMVA SOFIE Pad Op Input Tensor is not found in model"); + throw std::runtime_error("SOFIE Pad Op Input Tensor is not found in model"); } fInputShape = model.GetTensorShape(fNX); if (fMode != EMode::kConstant) { - throw std::runtime_error("TMVA SOFIE Pad Op supports now only Constant mode"); + throw std::runtime_error("SOFIE Pad Op supports now only Constant mode"); } // get pads data @@ -75,7 +75,7 @@ public: if (model.IsInitializedTensor(fNP)) { padsData = static_cast(model.GetInitializedTensorData(fNP).get()); } else { - throw std::runtime_error("TMVA SOFIE Pad Op supports now only initialized Pads data"); + throw std::runtime_error("SOFIE Pad Op supports now only initialized Pads data"); } // get constant value fConstantValue = 0; @@ -84,7 +84,7 @@ public: T * cData = static_cast(model.GetInitializedTensorData(fNCV).get()); fConstantValue = cData[0]; } else { - throw std::runtime_error("TMVA SOFIE Pad Op supports now only initialized Constant Value data"); + throw std::runtime_error("SOFIE Pad Op supports now only initialized Constant Value data"); } } std::vector axes; @@ -103,10 +103,10 @@ public: for (size_t i = 0; i < nax; i++) axes[i] = data[i]; } else { - throw std::runtime_error("TMVA SOFIE Pad Op invalid input Axes type"); + throw std::runtime_error("SOFIE Pad Op invalid input Axes type"); } } else { - throw std::runtime_error("TMVA SOFIE Pad Op supports now only initialized Axes data"); + throw std::runtime_error("SOFIE Pad Op supports now only initialized Axes data"); } } @@ -127,7 +127,7 @@ public: fPads[i].second = padsData[axesSize + i]; int64_t outDim = static_cast(fOutputShape[i]) + fPads[i].first + fPads[i].second; if (outDim < 0) - throw std::runtime_error("TMVA SOFIE Pad Op : invalid Pads values"); + throw std::runtime_error("SOFIE Pad Op : invalid Pads values"); fOutputShape[i] = outDim; } } @@ -149,7 +149,7 @@ public: std::string Generate(std::string OpName) override { OpName = "op_" + OpName; if (fOutputShape.empty()){ - throw std::runtime_error("TMVA SOFIE Operator Pad called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Operator Pad called to Generate without being initialized first"); } std::stringstream out; auto inputStride = UTILITY::ComputeStrideFromShape(fInputShape); diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Pool.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Pool.hxx index e6fbc25..8e11271 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Pool.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Pool.hxx @@ -76,7 +76,7 @@ public: fType = "float"; } else { throw - std::runtime_error("TMVA SOFIE Encountered unsupported type parsing a Pool operator"); + std::runtime_error("SOFIE Encountered unsupported type parsing a Pool operator"); } fInputTensorNames = { fNX }; fOutputTensorNames = { fNY }; @@ -94,19 +94,19 @@ public: // Where N is batch size, C : input channels, H : input height, W = input width // or it can be [N, C, F1,F2,....FN] . Minimum dimension is 3 if (input.size() != 1 ) { - throw std::runtime_error("TMVA SOFIE" + Name() + "Op Shape inference need 1 input tensor"); + throw std::runtime_error("SOFIE" + Name() + "Op Shape inference need 1 input tensor"); } if (input[0].size() < 3) { - throw std::runtime_error("TMVA SOFIE" + Name() + "Op Shape inference only accept tensor with at least 3 dimensions"); + throw std::runtime_error("SOFIE" + Name() + "Op Shape inference only accept tensor with at least 3 dimensions"); } // support only input tensors with dim = 3,4,5 if (input[0].size() < 3 || input[0].size() > 5) { - throw std::runtime_error("TMVA SOFIE" + Name() + "Op : tensors with dimension " + std::to_string(input[0].size()) + " are not yet supported"); + throw std::runtime_error("SOFIE" + Name() + "Op : tensors with dimension " + std::to_string(input[0].size()) + " are not yet supported"); } if (input[0].size() -2 != fDim) { throw - std::runtime_error("TMVA SOFIE Pool Op Shape inference - invalid inputs "); + std::runtime_error("SOFIE Pool Op Shape inference - invalid inputs "); } // kernel shape size_t k1 = ((fAttrKernelShape.empty())? input[0][2] : fAttrKernelShape[0]); @@ -156,7 +156,7 @@ public: } } else if (fAttrAutopad != "VALID") { throw - std::runtime_error("TMVA SOFIE" + Name() + "Op invalid Autopad value : " + fAttrAutopad); + std::runtime_error("SOFIE" + Name() + "Op invalid Autopad value : " + fAttrAutopad); } // to be sure pad is vector of size 6 if (fDim < 3) fAttrPads.resize(6, 0); @@ -204,13 +204,13 @@ public: if (!model.CheckIfTensorAlreadyExist(fNX)) { throw - std::runtime_error("TMVA SOFIE Pool op Input Tensor " + fNX + " is not found in model"); + std::runtime_error("SOFIE Pool op Input Tensor " + fNX + " is not found in model"); } fShapeX = model.GetTensorShape(fNX); if (fShapeX.size() < 3 || fShapeX.size() > 5) { std::cout << fNX << " : " << ConvertShapeToString(fShapeX) << std::endl; throw - std::runtime_error("TMVA SOFIE Pool Op input data tensor" + fNX + " is not of 3,4 or 5 dimensions"); + std::runtime_error("SOFIE Pool Op input data tensor" + fNX + " is not of 3,4 or 5 dimensions"); } fDim = fShapeX.size() - 2; // case of GlobalAveragePool. It is a pool case with kernel shape == image shape @@ -267,7 +267,7 @@ public: OpName = "op_" + OpName; if (fShapeX.empty() || fShapeY.empty()) { - throw std::runtime_error("TMVA SOFIE Pool Op called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Pool Op called to Generate without being initialized first"); } std::stringstream out; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_RNN.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_RNN.hxx index aed7bc1..3a0f58f 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_RNN.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_RNN.hxx @@ -91,7 +91,7 @@ template class ROperator_RNN final : public ROperator { fType = "float"; } else { throw std::runtime_error( - "TMVA SOFIE Encountered unsupported type parsing a RNN operator"); + "SOFIE Encountered unsupported type parsing a RNN operator"); } fInputTensorNames = { fNX, fNW, fNR }; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_RNN.icc b/src/SOFIE_core/inc/SOFIE/ROperator_RNN.icc index c10c2a5..467fda8 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_RNN.icc +++ b/src/SOFIE_core/inc/SOFIE/ROperator_RNN.icc @@ -38,40 +38,40 @@ auto ROperator_RNN::Initialize(RModel& model) fUseSession = model.UseSession(); // Check the input and output tensors if (!model.CheckIfTensorAlreadyExist(fNX)) { - throw std::runtime_error("TMVA SOFIE RNN Op input tensor " + fNX + + throw std::runtime_error("SOFIE RNN Op input tensor " + fNX + " is not found in model."); } fShapeX = model.GetTensorShape(fNX); if (fShapeX.size() != 3) { - throw std::runtime_error("TMVA SOFIE RNN Op input tensor " + fNX + + throw std::runtime_error("SOFIE RNN Op input tensor " + fNX + " is not of 3 dimensions."); } if (!model.CheckIfTensorAlreadyExist(fNW)) { - throw std::runtime_error("TMVA SOFIE RNN Op input tensor " + fNW + + throw std::runtime_error("SOFIE RNN Op input tensor " + fNW + " is not found in model."); } fShapeW = model.GetTensorShape(fNW); if (fShapeW.size() != 3) { - throw std::runtime_error("TMVA SOFIE RNN Op input tensor " + fNW + + throw std::runtime_error("SOFIE RNN Op input tensor " + fNW + " is not of 3 dimensions."); } if (!model.CheckIfTensorAlreadyExist(fNR)) { - throw std::runtime_error("TMVA SOFIE RNN Op input tensor " + fNR + + throw std::runtime_error("SOFIE RNN Op input tensor " + fNR + " is not found in model."); } fShapeR = model.GetTensorShape(fNR); if (fShapeR.size() != 3) { - throw std::runtime_error("TMVA SOFIE RNN Op input tensor " + fNR + + throw std::runtime_error("SOFIE RNN Op input tensor " + fNR + " is not of 3 dimensions."); } if (!fNB.empty()) { if (!model.CheckIfTensorAlreadyExist(fNB)) { - throw std::runtime_error("TMVA SOFIE RNN op input tensor " + fNB + + throw std::runtime_error("SOFIE RNN op input tensor " + fNB + " is not found in model."); } fShapeB = model.GetTensorShape(fNB); if (fShapeB.size() != 2 && fShapeB.size() != 4) { - throw std::runtime_error("TMVA SOFIE RNN op input tensor " + fNB + + throw std::runtime_error("SOFIE RNN op input tensor " + fNB + " is not of 2 or 4 dimensions."); } if (fShapeB.size() == 2) { @@ -111,23 +111,23 @@ auto ROperator_RNN::Initialize(RModel& model) } if (!fNSequence_lens.empty()) { if (!model.CheckIfTensorAlreadyExist(fNSequence_lens)) { - throw std::runtime_error("TMVA SOFIE RNN Op input tensor " + + throw std::runtime_error("SOFIE RNN Op input tensor " + fNSequence_lens + "is not found in model."); } fShapeSequence_lens = model.GetTensorShape(fNSequence_lens); if (fShapeSequence_lens.size() != 1) { - throw std::runtime_error("TMVA SOFIE RNN Op input tensor " + + throw std::runtime_error("SOFIE RNN Op input tensor " + fNSequence_lens + " is not of 1 dimension."); } } if (!fNInitial_h.empty()) { if (!model.CheckIfTensorAlreadyExist(fNInitial_h)) { - throw std::runtime_error("TMVA SOFIE RNN Op input tensor " + + throw std::runtime_error("SOFIE RNN Op input tensor " + fNInitial_h + " is not found in model."); } fShapeInitial_h = model.GetTensorShape(fNInitial_h); if (fShapeInitial_h.size() != 3) { - throw std::runtime_error("TMVA SOFIE RNN Op input tensor " + + throw std::runtime_error("SOFIE RNN Op input tensor " + fNInitial_h + " is not of 3 dimensions."); } } @@ -152,24 +152,24 @@ auto ROperator_RNN::Initialize(RModel& model) activation != "ScaledTanh" && activation != "HardSigmoid" && activation != "Elu" && activation != "Softsign" && activation != "Softplus") { - throw std::runtime_error("TMVA SOFIE - Activation function " + + throw std::runtime_error("SOFIE - Activation function " + activation + " not implemented"); } } if (fAttrDirection != "forward" && fAttrDirection != "backward" && fAttrDirection != "bidirectional") { throw std::runtime_error( - "TMVA SOFIE - Invalid RNN direction fAttrDirection = " + + "SOFIE - Invalid RNN direction fAttrDirection = " + fAttrDirection); } if (fAttrHiddenSize != fShapeW[1]) { throw std::runtime_error( - "TMVA SOFIE - fAttrHiddenSize must be equal to " + + "SOFIE - fAttrHiddenSize must be equal to " + std::to_string(fShapeW[1])); } if (fAttrLayout > 1) { throw std::runtime_error( - "TMVA SOFIE - Layout fAttrLayout = " + std::to_string(fAttrLayout) + + "SOFIE - Layout fAttrLayout = " + std::to_string(fAttrLayout) + " must be 0 (timewise) or 1 (batchwise)"); } if (fAttrActivations.empty()) { diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Random.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Random.hxx index cde08b5..0de1cd9 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Random.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Random.hxx @@ -121,13 +121,13 @@ public: if (fUseROOT) { if (fMode == kNormal) { if (fParams.count("mean") == 0 || fParams.count("scale") == 0) - throw std::runtime_error("TMVA SOFIE RandomNormal op : no mean or scale are defined"); + throw std::runtime_error("SOFIE RandomNormal op : no mean or scale are defined"); float mean = fParams["mean"]; float scale = fParams["scale"]; out << SP << SP << "tensor_" << fNY << "[i] = fRndmEngine->Gaus(" << mean << "," << scale << ");\n"; } else if (fMode == kUniform) { if (fParams.count("high") == 0 || fParams.count("low") == 0) - throw std::runtime_error("TMVA SOFIE RandomUniform op : no low or high are defined"); + throw std::runtime_error("SOFIE RandomUniform op : no low or high are defined"); float high = fParams["high"]; float low = fParams["low"]; out << SP << SP << "tensor_" << fNY << "[i] = fRndmEngine->Uniform(" << low << "," << high << ");\n"; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Range.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Range.hxx index fea9814..8ea17d9 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Range.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Range.hxx @@ -50,15 +50,15 @@ public: //input must be a graph input, or already initialized intermediate tensor if (!model.CheckIfTensorAlreadyExist(fNStart)) { throw - std::runtime_error("TMVA SOFIE Range Op Input Tensor " + fNStart + "is not found in model"); + std::runtime_error("SOFIE Range Op Input Tensor " + fNStart + "is not found in model"); } if (!model.CheckIfTensorAlreadyExist(fNLimit)) { throw - std::runtime_error("TMVA SOFIE Range Op Input Tensor " + fNLimit + "is not found in model"); + std::runtime_error("SOFIE Range Op Input Tensor " + fNLimit + "is not found in model"); } if (!model.CheckIfTensorAlreadyExist(fNDelta)) { throw - std::runtime_error("TMVA SOFIE Range Op Input Tensor " + fNDelta + "is not found in model"); + std::runtime_error("SOFIE Range Op Input Tensor " + fNDelta + "is not found in model"); } ETensorType type = ConvertStringToType(fType); if (model.IsInitializedTensor(fNStart) && model.IsInitializedTensor(fNDelta) && model.IsInitializedTensor(fNLimit)) { @@ -66,7 +66,7 @@ public: T * limit = static_cast(model.GetInitializedTensorData(fNLimit).get()); T * delta = static_cast(model.GetInitializedTensorData(fNDelta).get()); if (!start || !delta || !limit) - std::runtime_error("TMVA SOFIE Range Op Input Tensor has invalid input data"); + std::runtime_error("SOFIE Range Op Input Tensor has invalid input data"); T a = *start; T b = *limit; T d = *delta; @@ -102,7 +102,7 @@ public: OpName = "op_" + OpName; if (fShape.empty()) { - throw std::runtime_error("TMVA SOFIE Range operator called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Range operator called to Generate without being initialized first"); } std::string sizeName = fShape[0].param; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Reduce.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Reduce.hxx index 0b51e2c..34e9819 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Reduce.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Reduce.hxx @@ -68,7 +68,7 @@ public: for (size_t j = 0; j < fAttrAxes.size(); j++) { if (fAttrAxes[j] < 0) fAttrAxes[j] += outputShape.size(); if (fAttrAxes[j] < 0 || (size_t) fAttrAxes[j] >= outputShape.size() ) - throw std::runtime_error("TMVA SOFIE Reduce Op - invalid axes values " + std::to_string(fAttrAxes[j])); + throw std::runtime_error("SOFIE Reduce Op - invalid axes values " + std::to_string(fAttrAxes[j])); // set to 1 the reduced dims outputShape[fAttrAxes[j]] = 1; } @@ -94,7 +94,7 @@ public: if (!model.CheckIfTensorAlreadyExist(fNX)) { // input must be a graph input, or already initialized intermediate tensor - throw std::runtime_error("TMVA SOFIE Reduce Op Input Tensor " + fNX + " is not found in model"); + throw std::runtime_error("SOFIE Reduce Op Input Tensor " + fNX + " is not found in model"); } fShapeX = model.GetTensorShape(fNX); // check if tensor with axes is provided @@ -122,7 +122,7 @@ public: std::string Generate(std::string opName) override { opName = "op_" + opName; if (fShapeX.empty() || fShapeY.empty()) { - throw std::runtime_error("TMVA SOFIE Reduce Op called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Reduce Op called to Generate without being initialized first"); } size_t inputLength = SOFIE::ConvertShapeToLength(fShapeX); @@ -263,7 +263,7 @@ public: std::string Generate_GPU_Kernel_ALPAKA(std::string /*opName*/) override { if (fShapeX.empty() || fShapeY.empty()) - throw std::runtime_error("TMVA SOFIE Reduce Op called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Reduce Op called to Generate without being initialized first"); const std::size_t Dx = fShapeX.size(); @@ -359,7 +359,7 @@ public: std::string Generate_GPU_ALPAKA(std::string /*opName*/) override { if (fShapeX.empty() || fShapeY.empty()) - throw std::runtime_error("TMVA SOFIE Reduce Op called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Reduce Op called to Generate without being initialized first"); std::size_t outputLength = ConvertShapeToLength(fShapeY); std::string kname = "reduceKernel_" + Name(); diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx index cea6a5b..0ced730 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx @@ -40,7 +40,7 @@ public: void Initialize(RModel& model) override { if (model.CheckIfTensorAlreadyExist(fNX) == false){ //input must be a graph input, or already initialized intermediate tensor - throw std::runtime_error("TMVA SOFIE Relu Op Input Tensor " + fNX + " is not found in model"); + throw std::runtime_error("SOFIE Relu Op Input Tensor " + fNX + " is not found in model"); } fShape = model.GetDimTensorShape(fNX); @@ -55,7 +55,7 @@ public: std::string Generate(std::string OpName) override { OpName = "op_" + OpName; if (fShape.empty()) { - throw std::runtime_error("TMVA SOFIE Operator Relu called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Operator Relu called to Generate without being initialized first"); } std::stringstream out; auto length = ConvertDimShapeToLength(fShape); @@ -90,7 +90,7 @@ public: std::string Generate_GPU_ALPAKA(std::string OpName) override { OpName = "op_" + OpName; if (fShape.empty()) { - throw std::runtime_error("TMVA SOFIE Operator Relu called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Operator Relu called to Generate without being initialized first"); } std::stringstream out; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Reshape.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Reshape.hxx index 2b3391c..9362151 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Reshape.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Reshape.hxx @@ -31,11 +31,12 @@ private: std::string fNData; // input data tensor name std::string fNInput2; // reshape or axes tensor name depending on operator - std::string fNOutput; // output tensor name - std::vector fShapeInput; // input shape data + std::string fNOutput; // output tensor name + std::vector fShapeInput; // input shape data std::vector fShapeOutput; // output shape data - std::vector fAttrAxes; // axes attributes (provided for all version of Squeeze/Unsqueeze) - std::vector fShape; // shape tensor values provided for Reshape + std::vector fOutputShapeData; // in case output is a shape tensor we store here the shape value data (can be parametric) + std::vector fAttrAxes; // axes attributes (provided for all version of Squeeze/Unsqueeze) + std::vector fShape; // shape tensor values provided for Reshape for int shapes4 public: @@ -73,34 +74,44 @@ public: fOutputTensorNames = { fNOutput }; } - // output type is same as input - std::vector TypeInference(std::vector input) override { - auto ret = std::vector(1, input[0]); - return ret; - } - std::vector> ShapeInference(std::vector> input) override { - return input; - } // output shape - std::vector> ShapeInference(const std::vector> & input) { - std::vector> ret; - auto & input_shape = input[0]; + std::vector DoShapeInference(const std::vector & input_shape, const std::vector & target_shape) { if (fOpMode == Reshape) { // correct the provided shape (here we have the value) for 0 or -1 - std::vector output_shape(fShape.size()); - assert(!fShape.empty() && !fDynamicShape); + // the target_shape can be a scalar in case of not present shape input tensor + std::vector output_shape = target_shape; + bool hasMinusOne = false; + bool hasZero = false; for (size_t i = 0; i < output_shape.size(); i++) { - if (fShape[i] > 0 || (fAllowZero && fShape[i] >= 0)) - output_shape[i] = Dim{ static_cast(fShape[i]) }; - else if (!fAllowZero && fShape[i] == 0) - output_shape[i] = input_shape[i]; + // case for zero values in given shape: in this case we take the corresponding value from input shape + if (!output_shape[i].isParam) { + if (output_shape[i].dim == 0) { + hasZero = true; + if (fAllowZero) + output_shape[i] = Dim{0}; + else { + if (i > 0 && output_shape.size() != input_shape.size()) + std::cout << "WARNING: TMVA Reshape Op : output shape has zero value at index " << i << + " but input shape has a different rank than output shape" << std::endl; + if (i >= input_shape.size()) + throw std::runtime_error("TMVA Reshape Op : output shape has zero value at index " + std::to_string(i) + + " but input shape does not have corresponding index"); + } + output_shape[i] = input_shape[i]; + } else if (output_shape[i].dim == static_cast(-1)) { + hasMinusOne = true; + } + } + } + if (hasZero && hasMinusOne) { + throw std::runtime_error("TMVA Reshape Op : zero value in shape is not allowed when there is also a -1 in shape"); } - // now case of -1 in shape + // now case of -1 in shape - we can infer the value of -1 from all other values for (size_t i = 0; i < output_shape.size(); i++) { - if (fShape[i] == -1) { + if (output_shape[i] == static_cast(-1) && !output_shape[i].isParam) { auto tmp = output_shape; - tmp.erase(tmp.begin() + i); + tmp.erase(tmp.begin() + i); // erase -1 value to compute the length of the other dimensions auto tmp_length = ConvertDimShapeToLength(tmp); auto input_length = ConvertDimShapeToLength(input_shape); if (fVerbose) @@ -173,7 +184,7 @@ public: throw std::runtime_error("TMVA Reshape Op : Invalid shapes : " + ConvertDimShapeToString(input_shape) + ConvertDimShapeToString(output_shape)); } - ret.push_back(output_shape); + return output_shape; } else if (fOpMode == Flatten) { // flatten case @@ -184,7 +195,7 @@ public: auto l1 = ConvertDimShapeToLength(s1); auto l2 = ConvertDimShapeToLength(s2); std::vector newShape = {Dim{l1}, Dim{l2}}; - ret.push_back(newShape); + return newShape; } else if (fOpMode == Squeeze) { // squeeze // assume no axis is provided - remove all axes with value equal to 1 @@ -199,10 +210,8 @@ public: } } } else { - std::cout << "getting shape for Squeeze...from attribute\n"; auto axes = fAttrAxes; for (size_t i = 0; i < axes.size(); i++) { - std::cout << i << " " << axes[i] << std::endl; if (axes[i] < 0) axes[i] += input_shape.size(); if (!(output_shape[axes[i]] == Dim{1})) @@ -212,22 +221,18 @@ public: // for calling vector::erase we must sort axes in decreasing order to avoid std::sort(axes.begin(), axes.end(), std::greater()); for (auto & axis : axes) { - std::cout << "erase give axis " << axis << " -> "; - for (auto & o : output_shape) std::cout << o << " , "; - std::cout << std::endl; output_shape.erase(output_shape.begin() + axis); } } - ret.push_back(output_shape); + return output_shape; } else if (fOpMode == Unsqueeze) { // unsqueeze - std::cout << "doing unsqueeze....\n"; assert(!fAttrAxes.empty()); auto output_shape = input_shape; auto &axes = fAttrAxes; // output rank - int64_t r = input[0].size() + axes.size(); + int64_t r = input_shape.size() + axes.size(); for (auto &a : axes) { int64_t i = static_cast(a); if (i < -r || i > r - 1) @@ -238,16 +243,18 @@ public: // negative axes output_shape.insert(output_shape.end() + i + 1, Dim{1}); } - ret.push_back(output_shape); + return output_shape; } - return ret; + throw std::runtime_error("TMVA Reshape Op : Invalid ReshapeOpMode"); + return {Dim{}}; } void Initialize(RModel& model) override { fVerbose = model.Verbose(); if (fVerbose) - std::cout << "initialize reshape op type " << fOpMode << " - " << fNInput2 << " " << fNData << std::endl; + std::cout << "initialize reshape op type " << fOpMode << " - for input " << fNData + << " to shape given by " << fNInput2 << std::endl; if (model.CheckIfTensorAlreadyExist(fNData) == false) { // input must be a graph input, or already initialized intermediate tensor @@ -272,16 +279,22 @@ public: else fAttrAxes = std::vector(values, values + n); - fShapeOutput = ShapeInference({fShapeInput})[0]; + std::vector targetShape(fShape.begin(),fShape.end()); + fShapeOutput = DoShapeInference(fShapeInput, targetShape); // set flag to not write tensor in weight file. Its data will be hard-coded in way model is constructed model.SetNotWritableInitializedTensor(fNInput2); } else if (model.IsShapeTensor(fNInput2)) { auto shapeData = model.GetShapeTensorValues(fNInput2); - fShapeOutput = shapeData; + fShapeOutput = DoShapeInference(fShapeInput, shapeData); + if (model.Verbose()) + std::cout << "Reshape op - get output shape from shape tensor " << fNInput2 << " with value " << ConvertDimShapeToString(shapeData) << std::endl; } else { // we cannot get shape at initialization time but at run-time fDynamicShape = true; // size of shape output us given by size of shape input tensor + if (model.IsDynamicTensor(fNInput2)) { + throw std::runtime_error("TMVA Reshape Op 2nd input Tensor " + fNInput2 + " cannot have dynamic shape"); + } auto shapeInput2 = model.GetTensorShape(fNInput2); fShapeOutput.resize(shapeInput2[0]); for (size_t i = 0; i < fShapeOutput.size(); i++) { @@ -293,10 +306,9 @@ public: } } else if (!fAttrAxes.empty()) { // case fNShape is empty and axes are provided as attributes (e.g. for Unsqueeze) - std::cout << "attribute axes exists\n"; - fShapeOutput = ShapeInference({fShapeInput})[0]; + fShapeOutput = DoShapeInference(fShapeInput, std::vector{}); } else if (fOpMode == Flatten || fOpMode == Squeeze) { - fShapeOutput = ShapeInference({fShapeInput})[0]; + fShapeOutput = DoShapeInference(fShapeInput, std::vector{}); } else { throw std::runtime_error("TMVA Reshape Op : Invalid Input/Attribute data"); } @@ -313,14 +325,15 @@ public: ConvertValuesToString(ConvertShapeToLength(o_shape), inputData) << std::endl; } } - // for shape tensors we can have it if output shape is size==1 or a scalar + // for input shape tensors we can have it if output shape is size==1 or a scalar else if (model.IsShapeTensor(fNData) && fShapeOutput.size() <=1) { - fIsOutputConstant = true; - auto inputData = model.GetShapeTensorValues(fNData); - model.AddShapeTensor(fNOutput, inputData); + // not sure if we ever end-up here - maybe reshaping from scalar to vector or viceversa + fIsOutputParamShape = true; + fOutputShapeData = model.GetShapeTensorValues(fNData); + model.AddShapeTensor(fNOutput, fOutputShapeData); if (model.Verbose()) { std::cout << Name() << " : " << fNData << " " << ConvertDimShapeToString(fShapeInput) << " --> " << fNOutput << " (shape) " << ConvertDimShapeToString(fShapeOutput) << " : " << - ConvertDimShapeToString(inputData) << std::endl; + ConvertDimShapeToString(fOutputShapeData) << std::endl; } } else { @@ -332,7 +345,7 @@ public: } std::string Generate(std::string opName) override { - if (fIsOutputConstant) return ""; //no op for constant tensors + std::stringstream out; std::string opType = "Reshape"; @@ -341,10 +354,21 @@ public: else if (fOpMode == Squeeze) opType = "Squeeze"; else if (fOpMode == Unsqueeze) - opType = "Unsquueze"; + opType = "Unsqueeze"; out << SP << "///--------" << opType << " operator " << opName << " --> " << ConvertDimShapeToString(fShapeOutput) << "\n"; + if (fIsOutputConstant) return out.str(); //no op for constant tensors + + if (fIsOutputParamShape) { + // no code to generate here for param shape output. Tensor output is defined in Session constructor + out << "//----------------output is a shape tensor----------\n"; + for (int i = 0; i < static_cast(fShapeOutput[0].dim); i++) { + out << SP << "tensor_" << fNOutput << "[" << i << " ] = " << fOutputShapeData[i].GetVal() << ";\n"; + } + return out.str(); + } + // in case of dynamic output shape we need to set the shape value from input shape tensor // and take case of the zero values if (fDynamicShape) { @@ -363,7 +387,8 @@ public: if (lengthOut != lengthIn) { // check needs to be done at run-time out << SP << "if (" << lengthOut << "!=" << lengthIn << ")\n"; - out << "throw std::runtime_error(\"TMVA SOFIE Reshape Op : output lengths is different than input one\");\n"; + out << SP << SP << "throw std::runtime_error(\"SOFIE Reshape " << opName << " output length " + << lengthOut << " is different than input one " << lengthIn << "\");\n"; } @@ -371,6 +396,7 @@ public: << ");\n"; return out.str(); } + std::string Generate_GPU_ALPAKA(std::string opName) override { if (fIsOutputConstant) return ""; @@ -389,7 +415,7 @@ std::string Generate_GPU_ALPAKA(std::string opName) override { auto lengthIn = ConvertDimShapeToLength(fShapeInput); if (lengthOut != lengthIn) { out << SP << "if (" << lengthOut << " != " << lengthIn << ")\n"; - out << SP << SP << "throw std::runtime_error(\"TMVA SOFIE " << opType + out << SP << SP << "throw std::runtime_error(\"SOFIE " << opType << " Op : output length is different from input length\");\n"; } } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_ScatterElements.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_ScatterElements.hxx index 9fd3adf..6bb2ac3 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_ScatterElements.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_ScatterElements.hxx @@ -41,7 +41,7 @@ private: else if (name == "min") return "std::min(" + t1 + "," + t2 + ")"; else - throw std::runtime_error("TMVA SOFIE ScatterElements : invalid reduction attribute"); + throw std::runtime_error("SOFIE ScatterElements : invalid reduction attribute"); return std::string(); } @@ -73,24 +73,24 @@ public: void Initialize(RModel& model) override { // input must be a graph input, or already initialized intermediate tensor if (!model.CheckIfTensorAlreadyExist(fNX)){ - throw std::runtime_error(std::string("TMVA SOFIE ScatterElements Op Input Tensor ") + fNX + "is not found in model"); + throw std::runtime_error(std::string("SOFIE ScatterElements Op Input Tensor ") + fNX + "is not found in model"); } if (!model.CheckIfTensorAlreadyExist(fNI)) { - throw std::runtime_error(std::string("TMVA SOFIE ScatterElements Op Input Tensor ") + fNI + "is not found in model"); + throw std::runtime_error(std::string("SOFIE ScatterElements Op Input Tensor ") + fNI + "is not found in model"); } if (!model.CheckIfTensorAlreadyExist(fNU)) { - throw std::runtime_error(std::string("TMVA SOFIE ScatterElements Op Input Tensor ") + fNU + "is not found in model"); + throw std::runtime_error(std::string("SOFIE ScatterElements Op Input Tensor ") + fNU + "is not found in model"); } //tbd check for constant tensors fShapeX = model.GetTensorShape(fNX); fShapeI = model.GetTensorShape(fNI); if (model.GetTensorShape(fNU) != fShapeI) - throw std::runtime_error(std::string("TMVA SOFIE ScatterElements - update tensor has invalid shape ")) ; + throw std::runtime_error(std::string("SOFIE ScatterElements - update tensor has invalid shape ")) ; if (fShapeX.size() == 0) - throw std::runtime_error(std::string("TMVA SOFIE ScatterElements - input tensor has zero rank ")) ; + throw std::runtime_error(std::string("SOFIE ScatterElements - input tensor has zero rank ")) ; if (fShapeX.size() != fShapeI.size()) - throw std::runtime_error(std::string("TMVA SOFIE ScatterElements - index tensor has invalid rank ")) ; + throw std::runtime_error(std::string("SOFIE ScatterElements - index tensor has invalid rank ")) ; if (fAxis < 0) fAxis += fShapeX.size(); @@ -109,7 +109,7 @@ public: if (fIsOutputConstant) return ""; if (fShapeY.empty()) { - throw std::runtime_error("TMVA SOFIE ScatterElements Op called to Generate without being initialized first"); + throw std::runtime_error("SOFIE ScatterElements Op called to Generate without being initialized first"); } std::stringstream out; out << SP << "\n//-------- ScatterElements --- " << opName << "\n"; @@ -171,7 +171,7 @@ public: std::string Generate_GPU_Kernel_ALPAKA(std::string opName) override { opName = "op_" + opName; if (fShapeY.empty()) { - throw std::runtime_error("TMVA SOFIE ScatterElements Op called to Generate without being initialized first"); + throw std::runtime_error("SOFIE ScatterElements Op called to Generate without being initialized first"); } const std::size_t D = fShapeI.size(); @@ -248,7 +248,7 @@ std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string opName) override std::string Generate_GPU_ALPAKA(std::string opName) override { opName = "op_" + opName; if (fShapeY.empty()) { - throw std::runtime_error("TMVA SOFIE ScatterElements Op called to Generate without being initialized first"); + throw std::runtime_error("SOFIE ScatterElements Op called to Generate without being initialized first"); } std::size_t totalElements = ConvertShapeToLength(fShapeI); diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Selu.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Selu.hxx index 96f4445..68ef253 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Selu.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Selu.hxx @@ -38,7 +38,7 @@ public: void Initialize(RModel& model) override { if (model.CheckIfTensorAlreadyExist(fNX) == false){ //input must be a graph input, or already initialized intermediate tensor - throw std::runtime_error("TMVA SOFIE Selu Op Input Tensor is not found in model"); + throw std::runtime_error("SOFIE Selu Op Input Tensor is not found in model"); } fShape = model.GetTensorShape(fNX); model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShape); @@ -48,7 +48,7 @@ public: std::string Generate(std::string OpName) override { OpName = "op_" + OpName; if (fShape.empty()){ - throw std::runtime_error("TMVA SOFIE Operator Selu called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Operator Selu called to Generate without being initialized first"); } std::stringstream out; int length = 1; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Shape.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Shape.hxx index 34e69eb..299de7c 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Shape.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Shape.hxx @@ -47,7 +47,7 @@ public: void Initialize(RModel& model) override { if (model.CheckIfTensorAlreadyExist(fNX) == false){ //input must be a graph input, or already initialized intermediate tensor - throw std::runtime_error("TMVA SOFIE Shape Op Input Tensor " + fNX + " is not found in model"); + throw std::runtime_error("SOFIE Shape Op Input Tensor " + fNX + " is not found in model"); } fShape = model.GetTensorShape(fNX); size_t length = fShape.size(); // this the size of shape not length of tensor @@ -87,7 +87,7 @@ public: OpName = "op_" + OpName; if (fShape.empty()) { - throw std::runtime_error("TMVA SOFIE Shape op called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Shape op called to Generate without being initialized first"); } std::stringstream out; @@ -107,7 +107,7 @@ public: OpName = "op_" + OpName; if (fShape.empty()) { - throw std::runtime_error("TMVA SOFIE Shape op called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Shape op called to Generate without being initialized first"); } std::stringstream out; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx index 8f7f001..58355bc 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx @@ -39,7 +39,7 @@ public: void Initialize(RModel& model) override { if (model.CheckIfTensorAlreadyExist(fNX) == false){ //input must be a graph input, or already initialized intermediate tensor - throw std::runtime_error("TMVA SOFIE Sigmoid Op Input Tensor is not found in model"); + throw std::runtime_error("SOFIE Sigmoid Op Input Tensor is not found in model"); } fShape = model.GetTensorShape(fNX); model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShape); @@ -48,7 +48,7 @@ public: std::string Generate(std::string opName) override { if (fShape.empty()){ - throw std::runtime_error("TMVA SOFIE Operator Sigmoid called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Operator Sigmoid called to Generate without being initialized first"); } std::stringstream out; int length = 1; @@ -85,7 +85,7 @@ public: std::string Generate_GPU_ALPAKA(std::string OpName) override { OpName = "op_" + OpName; if (fShape.empty()) { - throw std::runtime_error("TMVA SOFIE Operator Sigmoid called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Operator Sigmoid called to Generate without being initialized first"); } std::stringstream out; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Slice.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Slice.hxx index 127adaa..ba3bbf9 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Slice.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Slice.hxx @@ -20,16 +20,27 @@ class ROperator_Slice final : public ROperator private: + // flags to indicate if start/end and steps are not defined at compiled time + bool fIsStartUndef = false; + bool fIsEndUndef = false; + bool fIsStepUndef = false; + bool fIdentitySlice = false; std::string fNData; // input data tensor name std::string fNOutput; // output data name std::vector fNames; // tensor names for meta(axis) information - std::vector fShapeInput; // input shape data - std::vector fShapeOutput; // output shape data + std::vector fShapeInput; // input shape + std::vector fShapeOutput; // output shape + std::vector fOutputShapeData; // output shape data in case output is a shape param tensor + // saved Start/End.Steps are corrected from initial ONNX for negative/default values // and are available for each axis - std::vector fStart; // starting values of slices - std::vector fEnd; // End values of slices - std::vector fSteps; // step values of slices + std::vector fStart; // starting values of slices for all axes + std::vector fEnd; // End values of slices for all axes + std::vector fSteps; // step values of slices for all axes + std::vector fStartDims; // input starting values of slices + std::vector fEndDims; // input End values of slices + std::vector fStepDims; // input step values of slices + std::vector fAxes; // axes for input start/emd/step values std::vector> fAttributes; // attributes for the version <=10 case @@ -62,23 +73,6 @@ public: fAttributes.push_back(axes); } - // output type is same as input - std::vector TypeInference(std::vector input) override { - auto ret = std::vector(1, input[0]); - return ret; - } - - // output shape - std::vector> ShapeInference(std::vector> input) override { - auto & input_shape = input[0]; - // assume dimension of output shape is SAME AS INPUT ! - std::vector> ret(1, input_shape); - auto & output_shape = ret[0]; - for (size_t i = 0; i < input_shape.size(); i++) { - output_shape[i] = (fEnd[i]-fStart[i])/ fSteps[i]; - } - return ret; - } void Initialize(RModel& model) override { @@ -86,34 +80,53 @@ public: throw std::runtime_error("TMVA Slice Op Input Tensor is not found in model"); } - std::vector> shapes; - fShapeInput = model.GetTensorShape(fNData); + std::vector> shapes; + fShapeInput = model.GetDimTensorShape(fNData); shapes.push_back(fShapeInput); std::vector> itensors(4); - if (fNames.size() > 0) { + + if (fNames.size() > 0) { // size has to be equal to 4 // loop on the extra 2 or 3 or 4 inputs - for (size_t i = 0; i < fNames.size(); ++i) { + for (size_t i = 0; i < 4; ++i) { if (!fNames[i].empty()) { - // std::cout << " i " << i << " getting data for tensor " << fNames[i] << std::endl; - auto dptr = model.GetInitializedTensorData(fNames[i]); - auto tensor = static_cast(dptr.get()); - auto vec = model.GetTensorShape(fNames[i]); - assert(vec.size() == 1); - itensors[i] = std::vector(tensor, tensor + vec[0]); - } else { - switch (i) { - case 2: // missing axes - itensors[2] = std::vector(fShapeInput.size()); - std::iota(itensors[2].begin(), itensors[2].end(), 0); - break; - case 3: // missing steps - itensors[3] = std::vector(itensors[0].size(), 1); - default: break; + if (model.IsInitializedTensor(fNames[i])) { + auto dptr = model.GetInitializedTensorData(fNames[i]); + auto tensor = static_cast(dptr.get()); + auto vec = model.GetTensorShape(fNames[i]); + assert(vec.size() == 1); + itensors[i] = std::vector(tensor, tensor + vec[0]); + + } else if (model.IsShapeTensor(fNames[i])) { + // case is a shape tensor + if (i == 0) { + fStartDims = model.GetShapeTensorValues(fNames[i]); + } else if (i == 1) { + fEndDims = model.GetShapeTensorValues(fNames[i]); + } else if (i == 3) { + fStepDims = model.GetShapeTensorValues(fNames[i]); + } + } else { + // case is an intermediate tensor + auto shape = model.GetTensorShape(fNames[i]); + size_t s = shape[0]; + for (size_t k = 0; k < s; k++) { + if (i == 0) { + fStartDims.push_back( Dim{std::string("start_") + fNOutput + "_" + std::to_string(k)}); + fIsStartUndef = true; + } else if (i == 1) { + fEndDims.push_back(Dim{std::string("end_") + fNOutput + "_" + std::to_string(k)}); + fIsEndUndef = true; + } else if (i == 3) { + fStepDims.push_back(Dim{std::string("step_") + fNOutput + "_" + std::to_string(k)}); + fIsStepUndef = true; + } + } } } } } else { + // old slice versions assert(fAttributes.size() > 1); for (size_t i = 0; i < fAttributes.size(); i++) { itensors[i] = fAttributes[i]; @@ -121,64 +134,173 @@ public: } size_t dim = fShapeInput.size(); - fSteps = std::vector(dim, 1); - fStart = std::vector(dim, 0); - fEnd = std::vector(dim, 0); - std::copy(fShapeInput.begin(), fShapeInput.end(), fEnd.begin()); - - auto istart = itensors[0]; - auto iend = itensors[1]; - auto iaxes = itensors[2]; - auto isteps = itensors[3]; + // default values + fSteps = std::vector(dim, Dim{1}); + fStart = std::vector(dim, Dim{0}); + fEnd = fShapeInput; - // make tensor axis - // if iaxes.size is =0 tensor axis is missing and use defaults - if (iaxes.size() > 0) { - for (size_t i = 0; i < iaxes.size(); i++) { + // default axes + if (itensors[2].empty()) { + fAxes.resize(dim); + std::iota(fAxes.begin(), fAxes.end(), 0); + } else { + fAxes = itensors[2]; + for (size_t i = 0; i < fAxes.size(); i++) { // negative axes - they count from the back - if (iaxes[i] < 0) iaxes[i] = dim + iaxes[i]; - if (iaxes[i] < 0 || iaxes[i] >= static_cast(dim)) - throw std::runtime_error("TMVA Slice Op : invalid axis value " + std::to_string(iaxes[i]) + + if (fAxes[i] < 0) fAxes[i] = dim + fAxes[i]; + if (fAxes[i] < 0 || fAxes[i] >= static_cast(dim)) + throw std::runtime_error("TMVA Slice Op : invalid axis value " + std::to_string(fAxes[i]) + " for " + std::to_string(i)); - - size_t iAxisDim = fShapeInput[iaxes[i]]; - // find start/end/step for given axis - // check step size for clamping starting/end value - if (istart[i] < 0) istart[i] = iAxisDim + istart[i]; - if (iend[i] < 0) iend[i] = iAxisDim + iend[i]; - if (istart[i] < 0) istart[i] = 0; - if (isteps[i] > 0) { - if (istart[i] > static_cast(iAxisDim)) istart[i] = static_cast(iAxisDim); - if (iend[i] < 0) iend[i] = 0; - if (iend[i] > static_cast(iAxisDim)) iend[i] = static_cast(iAxisDim); - } else if (isteps[i] < 0) { - if (istart[i] > static_cast(iAxisDim)-1) istart[i] = static_cast(iAxisDim) -1; - if (iend[i] < -1) iend[i] = -1; - if (iend[i] > static_cast(iAxisDim)-1) iend[i] = static_cast(iAxisDim) -1; + } + } + // Loop on axis to get start/end/step values + for (size_t i = 0; i < fAxes.size(); i++) { + if (!itensors[0].empty() ) + fStartDims.push_back(Dim{ static_cast(itensors[0][i])}); + if (fStartDims.empty()) + throw std::runtime_error("TMVA Slice Op : Missing start input tensor"); + + if (!itensors[1].empty()) + fEndDims.push_back(Dim{ static_cast(itensors[1][i])}); + else if (fEndDims.empty()) + throw std::runtime_error("TMVA Slice Op : Missing end input tensor"); + + if (!itensors[3].empty()) { + fStepDims.push_back(Dim{ static_cast(itensors[3][i])}); + } + else if (fStepDims.size() < fAxes.size()) // this can happen since it is optional + fStepDims.push_back(Dim{size_t(1)}); + + if (!fShapeInput[fAxes[i]].isParam) { + size_t iAxisDim = fShapeInput[fAxes[i]].dim; + //correct values if too large or too small + IType istart = 0; + if (!fStartDims[i].isParam) { + istart = static_cast(fStartDims[i].dim); + if (istart < 0) istart = iAxisDim + istart; + } + IType iend = static_cast(iAxisDim); + if (!fEndDims[i].isParam) { + iend = static_cast(fEndDims[i].dim); + if (iend < 0) iend = iAxisDim + iend; + } + //steps + IType istep = 1; + if (!fStepDims[i].isParam) { + istep = static_cast(fStepDims[i].dim); + } else { + throw std::runtime_error("TMVA Slice Op : parametric step inputs are not supported"); + } + // clamp start end values depending on steps + // start must be [0,N] for positive steps or [0,N-1] for negative + // end must be [0,N] for positive steps or [-1, N-1] for negative + if (istart < 0) istart = 0; + if (istep > 0) { + if (istart > static_cast(iAxisDim)) istart = static_cast(iAxisDim); + if (iend < 0) iend = 0; + if (iend > static_cast(iAxisDim)) iend = static_cast(iAxisDim); + } else if (istep < 0) { + if (istart > static_cast(iAxisDim)-1) istart = static_cast(iAxisDim) -1; + if (iend < -1) iend = -1; + if (iend > static_cast(iAxisDim)-1) iend = static_cast(iAxisDim) -1; } else { - throw std::runtime_error("TMVA Slice Op : invalid step value " + std::to_string(isteps[i]) + + throw std::runtime_error("TMVA Slice Op : invalid step value " + std::to_string(istep) + " for " + std::to_string(i)); } - fStart[iaxes[i]] = istart[i]; - fEnd[iaxes[i]] = iend[i]; - fSteps[iaxes[i]] = isteps[i]; + // for parametric values clamping we will done at run time + if (fStartDims[i].isParam) + fStart[fAxes[i]] = fStartDims[i]; + else + fStart[fAxes[i]] = Dim{size_t(istart)}; + if (fStartDims[i].isParam) + fEnd[fAxes[i]] = fEndDims[i]; + else + fEnd[fAxes[i]] = Dim{size_t(iend)}; + + fSteps[fAxes[i]] = Dim{size_t(istep)}; + } else { + //std::cout << i << " Param dim for " << fAxes[i] << " " << fShapeInput[fAxes[i]] << std::endl; + // correct only negative values + if (!fStartDims[i].isParam) { + IType istart = static_cast(fStartDims[i].dim); + if (istart < 0) { + std::string sstart = std::string("(") + fShapeInput[fAxes[i]].param + "-" + std::to_string(-istart) +")"; + fStart[fAxes[i]] = Dim{sstart,size_t(-1)}; + } else { + fStart[fAxes[i]] = Dim{size_t(istart)}; + } + } else { + fStart[fAxes[i]] = fStartDims[i]; + } + if (!fEndDims[i].isParam) { + IType iend = static_cast(fEndDims[i].dim); + if (iend < 0) { + std::string send = std::string("(") + fShapeInput[fAxes[i]].param + "-" + std::to_string(-iend) +")"; + fEnd[fAxes[i]] = Dim{send,size_t(-1)}; + } else if (iend == std::numeric_limits::max()){ + fEnd[fAxes[i]] = fShapeInput[fAxes[i]]; + } else { + fEnd[fAxes[i]] = Dim{size_t(iend)}; + } + } else { + fEnd[fAxes[i]] = fEndDims[i]; + } + + fSteps[fAxes[i]] = fStepDims[i]; } + } + // find output shape + fShapeOutput.resize(dim); + for (size_t i = 0; i < dim; i++) { + if (!fEnd[i].isParam && !fStart[i].isParam && !fSteps[i].isParam) { + int64_t istart = static_cast(fStart[i].dim); + int64_t iend = static_cast(fEnd[i].dim); + int64_t istep= static_cast(fSteps[i].dim); + int64_t s = (iend-istart)/istep; + fShapeOutput[i] = Dim{static_cast(s)}; + } else { + std::string s; + if (fStart[i].GetVal() != "0") + s = "(" + fEnd[i].GetVal() + "-" + fStart[i].GetVal() + ")"; + else + s = fEnd[i].GetVal(); + if (fSteps[i].GetVal() != "1") { + s.insert(0,"("); + s += ")/" + fSteps[i].GetVal() + ")"; + } + fShapeOutput[i] = Dim{s,size_t(-1)}; + // add also the shape parameters to RModel to declare them when + // allocating output tensor + if (fEnd[i].isParam && fEnd[i].dim != size_t(-1)) + model.AddShapeParam(fEnd[i].param,fEnd[i].dim ); + if (fStart[i].isParam && fStart[i].dim != size_t(-1)) + model.AddShapeParam(fStart[i].param,fStart[i].dim ); + if (fSteps[i].isParam && fSteps[i].dim != size_t(-1)) + model.AddShapeParam(fSteps[i].param,fSteps[i].dim ); - fShapeOutput = ShapeInference({fShapeInput})[0]; + } + } // case input is a constant tensor and of int64 type if (model.IsInitializedTensor(fNData) && model.GetTensorType(fNData) == ETensorType::INT64) { fIsOutputConstant = true; auto inputData = static_cast(model.GetInitializedTensorData(fNData).get()); - size_t outputSize = ConvertShapeToLength(fShapeOutput); + size_t outputSize = ConvertShapeToLength(ConvertShapeToInt(fShapeOutput)); std::vector outputData(outputSize); - std::vector inputStride = UTILITY::ComputeStrideFromShape(fShapeInput); - // perform slice using a recursive function- need to use two lambda functions for this + std::vector inputStride = UTILITY::ComputeStrideFromShape(ConvertShapeToInt(fShapeInput)); + if (model.Verbose()) { + std::cout << "Do slice for initialized input ..(start, end, step)\n"; + for (size_t ii = 0; ii< fStart.size(); ii++) + std::cout << fStart [ii] << " " << fEnd[ii] << " " << fSteps[ii] << std::endl; + } + // perform slice using a recursive function- need to use two lambda functions for this auto sliceRecursive = [&](size_t iaxis, size_t & outIdx, size_t & inOffset) { auto slice_impl = [&](size_t iax, size_t & outputIdx, size_t & inputOffset, auto & sliceRecImpl) { + if (fStart[iax].isParam || fEnd[iax].isParam || fSteps[iax].isParam) + throw std::runtime_error("TMVA Slice Op : cannot have parametric values when input is constant"); // compute indices std::vector indices; - for (IType i = fStart[iax]; (fSteps[iax] > 0) ? i < fEnd[iax] : i > fEnd[iax]; i += fSteps[iax] ) + for (IType i = (IType) fStart[iax].dim; (IType(fSteps[iax].dim) > 0) ? i < IType(fEnd[iax].dim) : i > IType(fEnd[iax].dim); i += IType(fSteps[iax].dim) ) indices.push_back(i); if (iax == dim-1) { // last axis for (size_t i = 0; i < indices.size(); i++) { @@ -199,40 +321,157 @@ public: size_t offset = 0; sliceRecursive(0, idx, offset); - model.AddConstantTensor(fNOutput, fShapeOutput, outputData.data()); + model.AddConstantTensor(fNOutput, ConvertShapeToInt(fShapeOutput), outputData.data()); if (model.Verbose()) { - std::cout << "Slice: output is a constant tensor " << ConvertShapeToString(fShapeOutput) << " : " + std::cout << "Slice: output is a constant tensor " << ConvertDimShapeToString(fShapeOutput) << " : " << ConvertValuesToString(outputData) << std::endl; } } + else if (model.IsShapeTensor(fNData) && !fStart[0].isParam && !fEnd[0].isParam) { + // case of input is a shape tensor. In this case rank=1 always, axis =0 and Slice is trivial + auto inputData = model.GetShapeTensorValues(fNData); + fOutputShapeData = std::vector(inputData.begin() + fStart[0].dim, inputData.begin() + fEnd[0].dim); + // try to convert to integer values if possible + auto outputData = ConvertShapeToInt(fOutputShapeData); + fShapeOutput = { Dim{fOutputShapeData.size()}}; + if (outputData.empty()) { + // is a param shape tensor + model.AddShapeTensor(fNOutput, fOutputShapeData); + fIsOutputParamShape = true; + if (model.Verbose()) { + std::cout << "Slice: output is a shape tensor -> " << fNOutput << " " << ConvertDimShapeToString(fShapeOutput) << " with values " + << ConvertDimShapeToString(fOutputShapeData) << " (shape)" << std::endl; + } + } else { + fIsOutputConstant = true; + std::vector data(outputData.size()); + std::copy(outputData.begin(), outputData.end(), data.begin()); + model.AddConstantTensor(fNOutput, {data.size()}, data.data()); + if (model.Verbose()) { + std::cout << "Slice: output is a constant tensor -> " << fNOutput << " " << ConvertDimShapeToString(fShapeOutput) << " with values " + << ConvertDimShapeToString(fOutputShapeData) << " constant " << std::endl; + } + } + } else { + // check if Slice is just an Identity operator in case start = 0, end = input_shape and step=1 + size_t ndim = fShapeInput.size(); + fIdentitySlice = fShapeOutput.size() == ndim; + // check also if input data is not input to the model. In that case we copy the data since we cannot just copy from the input pointer + fIdentitySlice &= (!model.IsReadyInputTensor(fNData) && !model.IsDimInputTensor(fNData)); + for (size_t idim = 0; idim < ndim; idim++) { + if (!fIdentitySlice) break; + fIdentitySlice &= (fStart[idim].GetVal() == "0"); + fIdentitySlice &= (fSteps[idim].GetVal() == "1"); + fIdentitySlice &= (fEnd[idim].GetVal() == fShapeInput[idim].GetVal()); + } + model.AddIntermediateTensor(fNOutput, model.GetTensorType(fNData), fShapeOutput); + //if (fIdentitySlice) model.AddAliasTensor(fNOutput, fNData); + if (model.Verbose()) { - std::cout << "Slice ---> " << fNOutput << " " << ConvertShapeToString(fShapeOutput) << std::endl; + std::cout << "Slice " << fNData << " " << ConvertDimShapeToString(fShapeInput) + << "---> " << fNOutput << " " << ConvertDimShapeToString(fShapeOutput); + if (fIdentitySlice) std::cout << " (using alias tensor since slice is an identity) "; + std::cout << std::endl; + } } } - std::string Generate(std::string OpName) override { - if (fIsOutputConstant) return ""; //no op for constant tensors + std::string Generate(std::string opName) override { - OpName = "op_" + OpName; if (fShapeInput.empty() || fShapeOutput.empty()){ - throw std::runtime_error("TMVA SOFIE Slice Op called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Slice Op called to Generate without being initialized first"); } std::stringstream out; - //std::string opName = "Slice"; - out << SP << "///------- Slice operator\n" << std::endl; - // loop on the dimensions depending no the orders + out << "///------- Slice operator " << opName << "---> " << fNOutput << " " + << ConvertDimShapeToString(fShapeOutput) << "\n" << std::endl; + if (fIsOutputConstant) return out.str(); //no op for constant tensors + if (fIsOutputParamShape) { + out << "/// Slice output is a shape tensor with values : " << ConvertDimShapeToString(fShapeOutput) << "\n"; + // need to generate code assigning values to shape tensors + for (int i = 0; i < static_cast(fShapeOutput[0].dim); i++) { + out << SP << "tensor_" << fNOutput << "[" << i << "] = " << fOutputShapeData[i] << ";\n"; + } + return out.str(); + } + size_t ndim = fShapeInput.size(); - std::vector strides(ndim,1); - for (int i = int(ndim-2); i >=0 ; i--) { - strides[i] = strides[i+1]*fShapeInput[i+1]; + + if (fIdentitySlice) { + out << "/// Slice is just an identity (copy) \n"; + //out << SP << "tensor_" << fNOutput << " = const_cast<" << ConvertTypeToString(fOutputType) << " *>(tensor_" << fNData << ");\n"; + out << SP << "std::copy(tensor_" << fNData << ", tensor_" << fNData << " + " << ConvertDimShapeToLength(fShapeInput) << ", tensor_" << fNOutput << ");\n"; + return out.str(); } + // loop on the dimensions depending no the orders + auto strides = UTILITY::ComputeStrideFromShape(fShapeInput); + + out << SP << "{\n"; // define operator scope + for (size_t i = 0; i < fStepDims.size(); i++) { + if (fStepDims[i].isParam) { + if (fIsStepUndef) + out << SP << "size_t " << fStepDims[i] << " = tensor_" << fNames[3] << "[" << i << "];\n"; + } + } + // special case for parametric values for start/end. Need to do clipping + for (size_t i = 0; i < fStartDims.size(); i++) { + if (fStartDims[i].isParam && fStartDims[i].param != fShapeInput[fAxes[i]].param) { + std::string s_start = "start_" + std::to_string(i); + if (fIsStartUndef) { + s_start = fStartDims[i].param; + out << SP << "size_t " << s_start << " = tensor_" << fNames[0] << "[" << i << "];\n"; + } else { + out << SP << "size_t " << s_start << " = " << fStartDims[i] << ";\n"; + fStart[fAxes[i]] = s_start; // need to use this value later when slicing + } + out << SP << "if (" << s_start << " < 0) " << s_start << " += " << fShapeInput[fAxes[i]] <<";\n"; + out << SP << "if (" << s_start << " < 0) " << s_start << " = 0;\n"; + if (!fStepDims[i].isParam) { + if (static_cast(fStepDims[i].dim) > 0 ) + out << SP << "if (" << s_start << " > " << fShapeInput[fAxes[i]] << " ) " << s_start << " = " << fShapeInput[fAxes[i]] <<";\n"; + else + out << SP << "if (" << s_start << " > " << fShapeInput[fAxes[i]] << " - 1" << " ) " << s_start << " = " << fShapeInput[fAxes[i]] << " - 1;\n"; + } + } + // special case if step is negative and shape are equal and step is negative + else if (fStartDims[i].isParam && fStartDims[i].param == fShapeInput[fAxes[i]].param && !fStepDims[i].isParam && static_cast(fStepDims[i].dim) < 0 ) { + fStart[fAxes[i]] = Dim{ fStartDims[i].param + "-1" }; + } + } + // now to for end + for (size_t i = 0; i < fEndDims.size(); i++) { + if (fEndDims[i].isParam && fEndDims[i].param != fShapeInput[fAxes[i]].param) { + std::string s_end = "end_" + std::to_string(i); + if (fIsEndUndef) { + s_end = fEndDims[i].param; + out << SP << "size_t " << s_end << " = tensor_" << fNames[1] << "[" << i << "];\n"; + } else { + out << SP << "size_t " << s_end << " = " << fEndDims[i] << ";\n"; + fEnd[fAxes[i]] = s_end; // need to use this value later when slicing + } + out << SP << "if (" << s_end << " < 0) " << s_end << " += " << fShapeInput[fAxes[i]] <<";\n"; + if (!fStepDims[i].isParam) { + if (static_cast(fStepDims[i].dim) > 0 ) { + out << SP << "if (" << s_end << " < 0) " << s_end << " = 0;\n"; + out << SP << "if (" << s_end << " > " << fShapeInput[fAxes[i]] << " ) " << s_end << " = " << fShapeInput[fAxes[i]] <<";\n"; + } else { + out << SP << "if (" << s_end << " < -1) " << s_end << " = -1;\n"; + out << SP << "if (" << s_end << " > " << fShapeInput[fAxes[i]] << " - 1" << " ) " << s_end << " = " << fShapeInput[fAxes[i]] << " - 1;\n"; + } + } + } + // special case if step is negative and shape are equal and step is negative + else if (fEndDims[i].isParam && fEndDims[i].param == fShapeInput[fAxes[i]].param && !fStepDims[i].isParam && static_cast(fStepDims[i].dim) < 0 ) { + fEnd[fAxes[i]] = Dim{ fEndDims[i].param + "-1" }; + } + } + out << SP << "size_t iOut = 0;\n"; std::string MSP = SP; for (size_t idim = 0; idim < ndim; idim++) { @@ -259,7 +498,7 @@ public: if (fIsOutputConstant) return ""; opName = "op_" + opName; if (fShapeInput.empty() || fShapeOutput.empty()) - throw std::runtime_error("TMVA SOFIE Slice Op called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Slice Op called to Generate without being initialized first"); const std::size_t D = fShapeInput.size(); @@ -326,7 +565,7 @@ public: if (fIsOutputConstant) return ""; opName = "op_" + opName; if (fShapeInput.empty() || fShapeOutput.empty()) - throw std::runtime_error("TMVA SOFIE Slice Op called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Slice Op called to Generate without being initialized first"); std::size_t totalElements = ConvertShapeToLength(fShapeOutput); std::string kname = "sliceKernel_" + opName; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Softmax.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Softmax.hxx index 8a78d84..5626c0f 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Softmax.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Softmax.hxx @@ -9,22 +9,25 @@ namespace SOFIE { -template class ROperator_Softmax final : public ROperator { private: + bool fLogSoftmax; // for the logsoftmax case + bool fUseVDT = false; int64_t fAttrAxis; std::string fNX; std::string fNY; - std::vector fShape; + std::vector fShape; std::string fType; public: ROperator_Softmax() {} - ROperator_Softmax(int64_t attr_axis, std::string nameX, std::string nameY) - : fAttrAxis(attr_axis), fNX(UTILITY::Clean_name(nameX)), fNY(UTILITY::Clean_name(nameY)) + ROperator_Softmax(int64_t attr_axis, std::string nameX, std::string nameY, bool logSoftmax = false) + : fLogSoftmax(logSoftmax), + fAttrAxis(attr_axis), fNX(UTILITY::Clean_name(nameX)), fNY(UTILITY::Clean_name(nameY)) + { fInputTensorNames = { fNX }; fOutputTensorNames = { fNY }; @@ -40,144 +43,144 @@ public: void Initialize(RModel& model) override { if (model.CheckIfTensorAlreadyExist(fNX) == false) { // input must be a graph input, or already initialized intermediate tensor - throw std::runtime_error("TMVA SOFIE Softmax Op Input Tensor is not found in model"); + throw std::runtime_error("SOFIE Softmax Op Input Tensor is not found in model"); } - fShape = model.GetTensorShape(fNX); + fShape = model.GetDimTensorShape(fNX); model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShape); fType = ConvertTypeToString(model.GetTensorType(fNX)); if (model.Verbose()) { - std::cout << "Softmax -> " << fNY << " " << ConvertShapeToString(fShape) << std::endl; + std::cout << "Softmax -> " << fNY << " " << ConvertDimShapeToString(fShape) << std::endl; + } + fUseVDT = model.UseVDT(); + if (fUseVDT) { + model.AddNeededCustomHeader("vdt/exp.h"); + if (fLogSoftmax) + model.AddNeededCustomHeader("vdt/log.h"); } } - std::string Generate(std::string OpName) override { - OpName = "op_" + OpName; + std::string Generate(std::string opName) override { + opName = "op_" + opName; if (fShape.empty()) { - throw std::runtime_error("TMVA SOFIE Operator Softmax called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Operator Softmax called to Generate without being initialized first"); } std::stringstream out; + out << "///------- Softmax " << opName << " ---> " // << fNY << " " + << ConvertDimShapeToString(fShape) << "\n" << std::endl; size_t size = fShape.size(); - size_t length = ConvertShapeToLength(fShape); + auto length_str = ConvertDimShapeToLength(fShape); size_t axis = fAttrAxis < 0 ? size + fAttrAxis : fAttrAxis; - out << "\n" << SP << "//------ SOFTMAX - " << size << " " << length << " " << axis << "\n"; - // use safe numerically implementation by subtracting max of tensor - if (size == 1) { - out << SP << fType << " vmax = tensor_" << fNX << "[0];\n"; - out << SP << "for (size_t i = 1; i < " << length << " ; i++){\n"; - out << SP << SP << "if (tensor_" << fNX << "[i] > vmax) vmax = tensor_" << fNX << "[i];\n"; - out << SP << "}\n"; - out << SP << fType << " sum = 0.0;\n"; - out << SP << "for (size_t i = 0; i < " << length << " ; i++){\n"; - out << SP << SP << "tensor_" << fNY << "[i] = std::exp(tensor_" << fNX << "[i] - vmax);\n"; - out << SP << SP << "sum += tensor_" << fNY << "[i];\n"; - out << SP << "}\n"; - out << SP << "for (size_t i = 0; i < " << length << " ; i++){\n"; - out << SP << SP << "tensor_" << fNY << "[i] /= sum;\n"; - out << SP << "}\n"; - } else { - size_t batch = fShape[0]; - size_t channel = fShape[1]; - size_t width = (size > 2) ? fShape[size - 1] : 1; - size_t height = (size > 3) ? fShape[size - 2] : 1; - size_t depth = (size > 4) ? fShape[size - 3] : 1; - size_t hStride = width; - size_t dStride = height * width; - size_t cStride = depth * dStride; - size_t bStride = channel * cStride; - - size_t N = 0; // Size of the axis - size_t iStride = 0; - if (axis == 0) { - N = batch; - iStride = bStride; - } else if (axis == 1) { - N = channel; - iStride = cStride; - } else if (axis == size - 1) { - N = width; - iStride = 1; - } else if (size > 3 && axis == size - 2) { - N = height; - iStride = hStride; - } else if (size == 5 && axis == size - 3) { - N = depth; - iStride = dStride; - } else { - throw - std::runtime_error("TMVA::SOFIE - Softmax operator along the axis " - + std::to_string(fAttrAxis) + " with " + std::to_string(size) - + "d input tensor not supported."); - } - bool notBatch = axis != 0; - bool notChannel = axis != 1; - bool notDepth = (size == 5 && axis != 2); - bool notHeight = (size == 5 && axis != 3) || (size == 4 && axis != 2); - bool notWidth = (size == 5 && axis != 4) || (size == 4 && axis != 3) || (size == 3 && axis != 2); + std::string expFunction = (fUseVDT) ? "vdt::fast_expf" : "std::exp"; + std::string logFunction = (fUseVDT) ? "vdt::fast_logf" : "std::log"; - if (notBatch) { - out << SP << "for (size_t n = 0; n < " << batch << " ; n++){\n"; - } - if (notChannel) { - out << SP << SP << "for (size_t c = 0; c < " << channel << " ; c++){\n"; - } - if (notDepth) { - out << SP << SP << "for (size_t d = 0; d < " << depth << " ; d++){\n"; - } - if (notHeight) { - out << SP << SP << "for (size_t h = 0; h < " << height << " ; h++){\n"; - } - if (notWidth) { - out << SP << SP << "for (size_t w = 0; w < " << width << " ; w++){\n"; - } - out << SP << SP << SP << fType << " sum = 0.;\n"; - out << SP << SP << SP << "size_t index = 0"; - if (notBatch) { - out << " + n * " << bStride; - } - if (notChannel) { - out << "+ c * " << cStride; - } - if (notDepth) { - out << " + d * " << dStride; - } - if (notHeight) { - out << " + h * " << hStride; - } - if (notWidth) { - out << " + w"; + // Check if this is the special case where memory is contiguous. + if (axis == size - 1) { + std::string axis_size = fShape[axis].GetVal(); + std::string num_rows; + if (IsInteger(length_str) && IsInteger(axis_size)) { + num_rows = std::to_string(std::stoul(length_str) / std::stoul(axis_size)); + } else { + num_rows = "(" + length_str + ") / (" + axis_size + ")"; + } + + out << SP << "//----- softmax axis is last one - " << axis << "\n"; + out << SP << "for (int i = 0; i < " << num_rows << "; ++i) {\n"; + out << SP << SP << "size_t offset = i * " << axis_size << ";\n"; + out << SP << SP << fType << " const * x_ptr = &tensor_" << fNX << "[offset];\n"; + out << SP << SP << fType << " * y_ptr = &tensor_" << fNY << "[offset];\n"; + + out << SP << SP << fType << " vmax = x_ptr[0];\n"; + out << SP << SP << "for (int j = 1; j < " << axis_size << "; ++j) {\n"; + out << SP << SP << SP << "if (x_ptr[j] > vmax) vmax = x_ptr[j];\n"; + out << SP << SP << "}\n"; + + out << SP << SP << fType << " sum = 0.0;\n"; + out << SP << SP << "for (int j = 0; j < " << axis_size << "; ++j) {\n"; + out << SP << SP << SP << "y_ptr[j] = " << expFunction << "(x_ptr[j] - vmax);\n"; + out << SP << SP << SP << "sum += y_ptr[j];\n"; + out << SP << SP << "}\n"; + + out << SP << SP << fType << " inv_sum = 1.0f / sum;\n"; + out << SP << SP << "for (int j = 0; j < " << axis_size << "; ++j) {\n"; + out << SP << SP << SP << "y_ptr[j] *= inv_sum;\n"; + if (fLogSoftmax) + out << SP << SP << SP << "y_ptr[j] = " << logFunction << "(y_ptr[j]);\n"; + out << SP << SP << "}\n"; + out << SP << "}\n"; + + } else { + // generic case for any axis + auto stride = UTILITY::ComputeStrideFromShape(fShape); + size_t k = 0; + std::vector l(size); + for (size_t i = 0; i < size; i++) { + if (i != axis) { + for (size_t j = 0; j < k; j++) out << SP; + l[i] = std::string("i") + std::to_string(i); + out << SP << "for (int " << l[i] << " = 0; " << l[i] << " < " << fShape[i] << "; " << l[i] << "++) {\n"; + k++; + } + } + for (size_t j = 0; j < size-1; j++) out << SP; + out << fType << " sum = 0.;\n"; + for (size_t j = 0; j < size-1; j++) out << SP; + out << "size_t index = "; + bool first = true; + for (size_t i = 0; i < size; i++) { + if (i == axis) continue; + if (!first) out << " + "; + if (stride[i].GetVal() != "1") + out << stride[i] << "*"; + out << l[i]; + first = false; } out << ";\n"; - // apply softmax along the axis - find first maximum value for numerical stability - if (N == 0) - throw std::runtime_error("TMVA::SOFIE - Softmax operator is along axis with zero elements"); - out << SP << SP << SP << fType << " vmax = tensor_" << fNX << "[index];\n"; - out << SP << SP << SP << "for (size_t i = 1; i < " << N << "; i++) {\n"; - out << SP << SP << SP << SP << "if (tensor_" << fNX << "[index + i*" << iStride << "] > vmax)\n"; - out << SP << SP << SP << SP << SP << "vmax = tensor_" << fNX << "[index + i*" << iStride << "];\n"; - out << SP << SP << SP << "}\n"; - out << SP << SP << SP << "for (size_t i = 0; i < " << N << "; i++) {\n"; - out << SP << SP << SP << SP << "tensor_" << fNY << "[index + i*" << iStride << "] = std::exp(tensor_" << fNX - << "[index + i*" << iStride << "] - vmax);\n"; - out << SP << SP << SP << SP << "sum += tensor_" << fNY << "[index + i*" << iStride << "];\n"; - out << SP << SP << SP << "}\n"; - out << SP << SP << SP << "for (size_t i = 0; i < " << N << "; i++) {\n"; - out << SP << SP << SP << SP << "tensor_" << fNY << "[index + i*" << iStride << "] /= sum;\n"; - out << SP << SP << SP << "}\n"; - if (notWidth) { - out << SP << SP << "}\n"; // end w - } - if (notHeight) { - out << SP << SP << "}\n"; // end h - } - if (notDepth) { - out << SP << SP << "}\n"; // end d - } - if (notChannel) { - out << SP << SP << "}\n"; // end c - } - if (notBatch) { - out << SP << "}\n"; // end n + // find maximum looping along reduced axis + for (size_t j = 0; j < size-1; j++) out << SP; + out << fType << " vmax = tensor_" << fNX << "[index];\n"; + for (size_t j = 0; j < size-1; j++) out << SP; + out << "for (int i = 1; i < " << fShape[axis] << "; i++) {\n"; + for (size_t j = 0; j < size; j++) out << SP; + out << fType << " x = tensor_" << fNX << "[index + i"; + if (stride[axis].GetVal() != "1") out << "*(" << stride[axis] << ")"; + out << "];\n"; + for (size_t j = 0; j < size; j++) out << SP; + out << "if (x > vmax) vmax = x;\n"; + for (size_t j = 0; j < size-1; j++) out << SP; + out << "}\n"; + // compute softmax + for (size_t j = 0; j < size-1; j++) out << SP; + out << "for (int i = 0; i < " << fShape[axis] << "; i++) {\n"; + for (size_t j = 0; j < size; j++) out << SP; + out << "size_t id = index + i"; + if (stride[axis].GetVal() != "1") out << "*(" << stride[axis] << ")"; + out << ";\n"; + for (size_t j = 0; j < size; j++) out << SP; + out << "tensor_" << fNY << "[id] = " << expFunction << "(tensor_" << fNX << "[id] - vmax);\n"; + for (size_t j = 0; j < size; j++) out << SP; + out << "sum += tensor_" << fNY << "[id];\n"; + for (size_t j = 0; j < size-1; j++) out << SP; + out << "}\n"; + // normalize + for (size_t j = 0; j < size-1; j++) out << SP; + out << "for (int i = 0; i < " << fShape[axis] << "; i++) {\n"; + for (size_t j = 0; j < size; j++) out << SP; + out << "size_t id = index + i"; + if (stride[axis].GetVal() != "1") out << "*(" << stride[axis] << ")"; + out << ";\n"; + for (size_t j = 0; j < size; j++) out << SP; + out << "tensor_" << fNY << "[id] /= sum;\n"; + if (fLogSoftmax) { + for (size_t j = 0; j < size; j++) out << SP; + out << "tensor_" << fNY << "[id] = " << logFunction << "(tensor_" << fNY << "[id]);\n"; + } + for (size_t j = 0; j < size-1; j++) out << SP; + out << "}\n"; + //end loops + for (int i = static_cast(k) - 1; i >= 0; i--) { + for (int j = 0; j < i; j++) out << SP; + out << "}\n"; } } return out.str(); diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx index 5fdbc47..8e128e4 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx @@ -51,14 +51,14 @@ public: void Initialize(RModel& model) override { if (model.CheckIfTensorAlreadyExist(fNX) == false){ //input must be a graph input, or already initialized intermediate tensor - throw std::runtime_error("TMVA SOFIE Split Op Input Tensor is not found in model"); + throw std::runtime_error("SOFIE Split Op Input Tensor is not found in model"); } fInputShape = model.GetTensorShape(fNX); // correct for negative axis if (fAxis < 0) fAxis += fInputShape.size(); if (fAxis < 0 || fAxis >= static_cast(fInputShape.size()) ) - throw std::runtime_error("TMVA SOFIE Split - invalid axis " + std::to_string(fAxis)); + throw std::runtime_error("SOFIE Split - invalid axis " + std::to_string(fAxis)); // compute output shapes size_t nsplit = fNYs.size(); @@ -77,10 +77,10 @@ public: } else { // get split tensor values if (!model.IsInitializedTensor(fNSplit)) - throw std::runtime_error("TMVA SOFIE Split - non-initialized split tensors are not supported"); + throw std::runtime_error("SOFIE Split - non-initialized split tensors are not supported"); auto splitShape = model.GetTensorShape(fNSplit); if (splitShape.size() != 1 || splitShape[0] != nsplit) - throw std::runtime_error("TMVA SOFIE Split - split input tensor has invalid shape"); + throw std::runtime_error("SOFIE Split - split input tensor has invalid shape"); auto split_data = static_cast(model.GetInitializedTensorData(fNSplit).get()); fSplit = std::vector(split_data, split_data + nsplit); } @@ -94,7 +94,7 @@ public: fOutputShapes.push_back(outputShape); } if (tot_split != fInputShape[fAxis]) - throw std::runtime_error("TMVA SOFIE Split - Sum of split sizes must match the input dimension along the axis"); + throw std::runtime_error("SOFIE Split - Sum of split sizes must match the input dimension along the axis"); if (model.Verbose()) { @@ -109,7 +109,7 @@ public: std::string Generate(std::string OpName) override { OpName = "op_" + OpName; if (fOutputShapes.empty()){ - throw std::runtime_error("TMVA SOFIE Operator Split called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Operator Split called to Generate without being initialized first"); } auto input_strides = UTILITY::ComputeStrideFromShape(fInputShape); @@ -156,7 +156,7 @@ public: std::string Generate_GPU_Kernel_ALPAKA(std::string opName) override { opName = "op_" + opName; if (fOutputShapes.empty()) - throw std::runtime_error("TMVA SOFIE Operator Split called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Operator Split called to Generate without being initialized first"); const std::size_t D = fInputShape.size(); const std::size_t Nin = fNYs.size(); @@ -228,7 +228,7 @@ std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string opName) override std::string Generate_GPU_ALPAKA(std::string opName) override { opName = "op_" + opName; if (fOutputShapes.empty()) - throw std::runtime_error("TMVA SOFIE Operator Split called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Operator Split called to Generate without being initialized first"); std::stringstream out; out << "\n//------ SPLIT_GPU_ALPAKA\n"; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_SubGraph.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_SubGraph.hxx index cb17671..572dd43 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_SubGraph.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_SubGraph.hxx @@ -50,7 +50,7 @@ public: void Initialize(RModel& model) override { //input must be a graph input, or already initialized intermediate tensor if (model.CheckIfTensorAlreadyExist(fNX) == false){ - throw std::runtime_error("TMVA SOFIE If Op Input Tensor is not found in model"); + throw std::runtime_error("SOFIE If Op Input Tensor is not found in model"); } //add the subgraph model to parent RModel and initialize them model.InitializeSubGraph(fModel_then); @@ -71,7 +71,7 @@ public: fType = type; else { if (type != fType) - throw std::runtime_error("TMVA SOFIE If Op supports only all outputs of the same type"); + throw std::runtime_error("SOFIE If Op supports only all outputs of the same type"); } model.AddIntermediateTensor(fNYs[i], fType, shape ); } diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Swish.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Swish.hxx index a2552f1..e1dc974 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Swish.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Swish.hxx @@ -38,7 +38,7 @@ public: void Initialize(RModel& model) override { if (model.CheckIfTensorAlreadyExist(fNX) == false){ //input must be a graph input, or already initialized intermediate tensor - throw std::runtime_error("TMVA SOFIE Swish Op Input Tensor is not found in model"); + throw std::runtime_error("SOFIE Swish Op Input Tensor is not found in model"); } fShape = model.GetTensorShape(fNX); model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShape); @@ -48,7 +48,7 @@ public: std::string Generate(std::string OpName) override { OpName = "op_" + OpName; if (fShape.empty()){ - throw std::runtime_error("TMVA SOFIE Operator Swish called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Operator Swish called to Generate without being initialized first"); } std::stringstream out; int length = 1; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Tanh.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Tanh.hxx index 0edce9b..9fcb60a 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Tanh.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Tanh.hxx @@ -41,7 +41,7 @@ public: void Initialize(RModel& model) override { //input must be a graph input, or already initialized intermediate tensor if (model.CheckIfTensorAlreadyExist(fNX) == false){ - throw std::runtime_error("TMVA SOFIE Tanh Op Input Tensor is not found in model"); + throw std::runtime_error("SOFIE Tanh Op Input Tensor is not found in model"); } fShape = model.GetTensorShape(fNX); model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShape); @@ -52,7 +52,7 @@ public: std::string Generate(std::string OpName) override { OpName = "op_" + OpName; if (fShape.empty()) { - throw std::runtime_error("TMVA SOFIE Tanh operator called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Tanh operator called to Generate without being initialized first"); } std::stringstream out; size_t length = ConvertShapeToLength(fShape); @@ -90,7 +90,7 @@ public: std::string Generate_GPU_ALPAKA(std::string OpName) override { OpName = "op_" + OpName; if (fShape.empty()) { - throw std::runtime_error("TMVA SOFIE Tanh called to Generate_GPU_ALPAKA without being initialized"); + throw std::runtime_error("SOFIE Tanh called to Generate_GPU_ALPAKA without being initialized"); } std::stringstream out; size_t length = ConvertShapeToLength(fShape); diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Tile.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Tile.hxx index fdaf05b..f060047 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Tile.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Tile.hxx @@ -46,23 +46,23 @@ public: void Initialize(RModel& model) override { if (model.CheckIfTensorAlreadyExist(fNInput) == false) - throw std::runtime_error("TMVA SOFIE Tile Op Input Tensor is not found in model"); + throw std::runtime_error("SOFIE Tile Op Input Tensor is not found in model"); if (model.CheckIfTensorAlreadyExist(fNRepeats) == false) - throw std::runtime_error("TMVA SOFIE Tile Op Repeats Tensor is not found in model"); + throw std::runtime_error("SOFIE Tile Op Repeats Tensor is not found in model"); fShapeInput = model.GetTensorShape(fNInput); if (!model.IsInitializedTensor(fNRepeats)) - throw std::runtime_error("TMVA SOFIE Tile Op: non-initialized repeats input is not supported"); + throw std::runtime_error("SOFIE Tile Op: non-initialized repeats input is not supported"); auto repptr = model.GetInitializedTensorData(fNRepeats); auto repeats_data = static_cast(repptr.get()); if (repeats_data == nullptr) - throw std::runtime_error("TMVA SOFIE Tile Op: failed to retrieve repeats tensor data"); + throw std::runtime_error("SOFIE Tile Op: failed to retrieve repeats tensor data"); auto repeats_shape = model.GetTensorShape(fNRepeats); if (repeats_shape.size() != 1) - throw std::runtime_error("TMVA SOFIE Tile Op: repeats tensor must be 1D"); + throw std::runtime_error("SOFIE Tile Op: repeats tensor must be 1D"); size_t num_elements = repeats_shape[0]; @@ -88,7 +88,7 @@ public: std::string Generate(std::string OpName) override { OpName = "op_" + OpName; if (fShapeInput.empty() || fShapeY.empty()) - throw std::runtime_error("TMVA SOFIE Tile Op called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Tile Op called to Generate without being initialized first"); std::stringstream out; std::string input = "tensor_" + fNInput; @@ -146,7 +146,7 @@ public: std::string Generate_GPU_Kernel_ALPAKA(std::string opName) override { opName = "op_" + opName; if (fShapeInput.empty() || fShapeY.empty()) - throw std::runtime_error("TMVA SOFIE Operator Tile called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Operator Tile called to Generate without being initialized first"); const std::size_t D = fShapeInput.size(); @@ -217,7 +217,7 @@ public: std::string Generate_GPU_ALPAKA(std::string opName) override { opName = "op_" + opName; if (fShapeInput.empty() || fShapeY.empty()) - throw std::runtime_error("TMVA SOFIE Operator Tile called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Operator Tile called to Generate without being initialized first"); bool repeatsKnown = !fRepeats.empty(); std::size_t totalElements = ConvertShapeToLength(fShapeY); diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_TopK.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_TopK.hxx index ce51778..7db1768 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_TopK.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_TopK.hxx @@ -48,7 +48,7 @@ public: std::vector> ShapeInference(std::vector> input) override { if (input.size() != 2) { - throw std::runtime_error("TMVA SOFIE TopK Op Shape Inference needs exactly 2 input tensors"); + throw std::runtime_error("SOFIE TopK Op Shape Inference needs exactly 2 input tensors"); } auto shape = input[0]; // Shape format: [ m x n x o x p ... ] @@ -62,11 +62,11 @@ public: void Initialize(RModel& model) override { if (model.CheckIfTensorAlreadyExist(fNX) == false) { // input must be a graph input, or already initialized intermediate tensor - throw std::runtime_error("TMVA SOFIE TopK Op Input Tensor is not found in model"); + throw std::runtime_error("SOFIE TopK Op Input Tensor is not found in model"); } if (model.CheckIfTensorAlreadyExist(fNK) == false) { // input must be a graph input, or already initialized intermediate tensor - throw std::runtime_error("TMVA SOFIE TopK Op Input Tensor i.e. K is not found in model"); + throw std::runtime_error("SOFIE TopK Op Input Tensor i.e. K is not found in model"); } fShapeX = model.GetTensorShape(fNX); @@ -111,7 +111,7 @@ public: std::string Generate(std::string OpName) override { OpName = "op_" + OpName; if (fShapeX.empty()) { - throw std::runtime_error("TMVA SOFIE Operator TopK called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Operator TopK called to Generate without being initialized first"); } std::stringstream out; size_t size = fShapeX.size(); diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx b/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx index ed94166..a7f3e46 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx +++ b/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx @@ -46,10 +46,10 @@ public: } std::vector> ShapeInference(std::vector> input) override { - if (input.size() > 1) throw std::runtime_error("TMVA SOFIE Tranpose Op Shape Inference only need 1 input tensor"); + if (input.size() > 1) throw std::runtime_error("SOFIE Tranpose Op Shape Inference only need 1 input tensor"); auto& data = input[0]; if (fAttrPerm.size() != data.size() ) - throw std::runtime_error("TMVA SOFIE Tranpose Op - Invalid axes attributes"); + throw std::runtime_error("SOFIE Tranpose Op - Invalid axes attributes"); std::vector output_shape(fAttrPerm.size()); for (size_t i = 0; i < fAttrPerm.size(); i++){ @@ -64,7 +64,7 @@ public: void Initialize(RModel& model) override { if (model.CheckIfTensorAlreadyExist(fNData) == false){ //input must be a graph input, or already initialized intermediate tensor std::cout<<"Input tensor for transpose: "< fShapeA; - std::vector fShapeB; - std::vector fShapeC; + + // static shapes (used when tensors are not dynamic) ) + std::vector fShapeX; std::vector fShapeY; + std::vector fShapeC; + std::vector fShapeZ; + + // Dynamic generic shapes + std::vector fDimShapeC; + std::vector fDimShapeX; + std::vector fDimShapeY; + std::vector fDimShapeZ; + // Broadcast flag: mirrors convention of BasicBinary + // bit 0: broadcast Y->X (Y needs expanding) + // bit 1: broadcast X->Y (X needs expanding) + // bit 2: broadcast C->Z (C needs expanding) + // bit 4: shapes may differ at runtime (dynamic) + int fBroadcastFlag = 0; public: ROperator_Where(){} - ROperator_Where(const std::string & nameA, const std::string & nameB, const std::string & nameC, const std::string & nameY): - fNA(UTILITY::Clean_name(nameA)), fNB(UTILITY::Clean_name(nameB)), fNC(UTILITY::Clean_name(nameC)), fNY(UTILITY::Clean_name(nameY)){ - fInputTensorNames = { fNA, fNB, fNC }; - fOutputTensorNames = { fNY }; + ROperator_Where(const std::string & nameC, const std::string & nameX, const std::string & nameY, const std::string & nameZ): + fNX(UTILITY::Clean_name(nameX)), fNY(UTILITY::Clean_name(nameY)), fNC(UTILITY::Clean_name(nameC)), fNZ(UTILITY::Clean_name(nameZ)){ + fInputTensorNames = { fNX, fNY, fNC }; + fOutputTensorNames = { fNZ }; } // type of output given input @@ -53,180 +67,292 @@ public: void Initialize(RModel& model) override { // input must be a graph input, or already initialized intermediate tensor - if (!model.CheckIfTensorAlreadyExist(fNA)){ - throw std::runtime_error(std::string("TMVA SOFIE Where Op Input Tensor ") + fNA + "is not found in model"); + if (!model.CheckIfTensorAlreadyExist(fNX)){ + throw std::runtime_error(std::string("SOFIE Where Op Input Tensor ") + fNX + "is not found in model"); } - if (!model.CheckIfTensorAlreadyExist(fNB)) { - throw std::runtime_error(std::string("TMVA SOFIE Where Op Input Tensor ") + fNB + "is not found in model"); + if (!model.CheckIfTensorAlreadyExist(fNY)) { + throw std::runtime_error(std::string("SOFIE Where Op Input Tensor ") + fNY + "is not found in model"); } if (!model.CheckIfTensorAlreadyExist(fNC)) { - throw std::runtime_error(std::string("TMVA SOFIE Where Op Input Tensor ") + fNC + "is not found in model"); + throw std::runtime_error(std::string("SOFIE Where Op Input Tensor ") + fNC + "is not found in model"); } // check if fNC input tensor is boolean if (model.IsReadyInputTensor(fNC)) fIsInputBoolTensor = true; - // check broadcast for A, B and C - fShapeA = model.GetTensorShape(fNA); - fShapeB = model.GetTensorShape(fNB); - fShapeC = model.GetTensorShape(fNC); - bool broadcast = !UTILITY::AreSameShape(fShapeA, fShapeB) || !UTILITY::AreSameShape(fShapeA, fShapeC); - if (broadcast) { - // find shape to broadcast between A,B,C looking for max length - size_t lengthA = ConvertShapeToLength(fShapeA); - size_t lengthB = ConvertShapeToLength(fShapeB); - size_t lengthC = ConvertShapeToLength(fShapeC); - bool broadcastA = false, broadcastB = false, broadcastC = false; - if (lengthA >= lengthB && lengthA >= lengthC) { - fShapeY = fShapeA; - //broadcast B and C if different than A - broadcastB = (lengthB != lengthA); - broadcastC = (lengthC != lengthA); - } - else if (lengthB >= lengthA && lengthB >= lengthC) { - fShapeY = fShapeB; - //broadcast A and C if different than B - broadcastA = (lengthA != lengthB); - broadcastC = (lengthC != lengthB); - } - else if (lengthC >= lengthA && lengthC >= lengthB) { - fShapeY = fShapeC; - //broadcast A and B if different than C - broadcastA = (lengthA != lengthC); - broadcastB = (lengthB != lengthC); - } - // Broadcast A to Y - if (broadcastA) { - fNBroadcastedA = "BC_" + fNA + "_to_" + fNY; - if (model.IsInitializedTensor(fNA)) { - auto data = model.GetInitializedTensorData(fNA); - std::shared_ptr broadcastedData( - UTILITY::UnidirectionalBroadcast(static_cast(data.get()), fShapeA, fShapeY), - std::default_delete()); - // Update the data and the shape of A - model.AddConstantTensor(fNBroadcastedA, model.GetTensorType(fNA), fShapeY, broadcastedData); - fShapeA = fShapeY; - } else { - // Add an intermediate tensor for broadcasting A - model.AddIntermediateTensor(fNBroadcastedA, model.GetTensorType(fNA), fShapeY); + // ---------------------------------------------------------------- // + // Collect shapes – dynamic or static + // ---------------------------------------------------------------- // + int dynamicInputs = 0; // bitmask: bit0=C, bit1=X, bit2=Y + + if (model.IsDynamicTensor(fNC)) { + fDimShapeC = model.GetDynamicTensorShape(fNC); + dynamicInputs |= 1; + } else { + fShapeC = model.GetTensorShape(fNC); + fDimShapeC = ConvertShapeToDim(fShapeC); + } + if (model.IsDynamicTensor(fNX)) { + fDimShapeX = model.GetDynamicTensorShape(fNX); + dynamicInputs |= 2; + } else { + fShapeX = model.GetTensorShape(fNX); + fDimShapeX = ConvertShapeToDim(fShapeX); + } + if (model.IsDynamicTensor(fNY)) { + fDimShapeY = model.GetDynamicTensorShape(fNY); + dynamicInputs |= 4; + } else { + fShapeY = model.GetTensorShape(fNY); + fDimShapeY = ConvertShapeToDim(fShapeY); + } + + + if (model.Verbose()) { + if (dynamicInputs & 1) + std::cout << "Where : condition " << fNC << " is dynamic " << ConvertDimShapeToString(fDimShapeC) << "\n"; + if (dynamicInputs & 2) + std::cout << "Where : " << fNX << " is dynamic " << ConvertDimShapeToString(fDimShapeX) << "\n"; + if (dynamicInputs & 4) + std::cout << "Where : Y " << fNZ << " is dynamic " << ConvertDimShapeToString(fDimShapeZ) << "\n"; + } + + // ---------------------------------------------------------------- // + // Static path: all shapes known at code-gen time + // ---------------------------------------------------------------- // + if (dynamicInputs == 0) { + + bool broadcast = !UTILITY::AreSameShape(fShapeX, fShapeY) || !UTILITY::AreSameShape(fShapeX, fShapeC); + if (broadcast) { + // find shape to broadcast between X,Y,C looking for max length + size_t lengthX = ConvertShapeToLength(fShapeX); + size_t lengthY = ConvertShapeToLength(fShapeY); + size_t lengthC = ConvertShapeToLength(fShapeC); + bool broadcastX = false, broadcastY = false, broadcastC = false; + if (lengthX >= lengthY && lengthX >= lengthC) { + fShapeZ = fShapeX; + // broadcast Y and C if different than X + broadcastY = (lengthY != lengthX); + broadcastC = (lengthC != lengthX); + } else if (lengthY >= lengthX && lengthY >= lengthC) { + fShapeZ = fShapeY; + // broadcast X and C if different than Y + broadcastX = (lengthX != lengthY); + broadcastC = (lengthC != lengthY); + } else if (lengthC >= lengthX && lengthC >= lengthY) { + fShapeZ = fShapeC; + // broadcast X and Y if different than C + broadcastX = (lengthX != lengthC); + broadcastY = (lengthY != lengthC); } - } - // Broadcast B to Y - if (broadcastB) { - fNBroadcastedB = "BC_" + fNB + "_to_" + fNY; - if (model.IsInitializedTensor(fNB)) { - auto data = model.GetInitializedTensorData(fNB); - std::shared_ptr broadcastedData( - UTILITY::UnidirectionalBroadcast(static_cast(data.get()), fShapeB, fShapeY), - std::default_delete()); - // do not update tensor B but add broadcasted one (since it can be input to some other operators) - model.AddConstantTensor(fNBroadcastedB, model.GetTensorType(fNB), fShapeY, broadcastedData); - fShapeB = fShapeY; - } else { - // Add an intermediate tensor for broadcasting B - model.AddIntermediateTensor(fNBroadcastedB, model.GetTensorType(fNB), fShapeY); + + // Broadcast X to Z + if (broadcastX) { + fNBroadcastedX = "BC_" + fNX + "_to_" + fNZ; + if (model.IsInitializedTensor(fNX)) { + auto data = model.GetInitializedTensorData(fNX); + std::shared_ptr broadcastedData( + UTILITY::UnidirectionalBroadcast(static_cast(data.get()), fShapeX, fShapeZ), + std::default_delete()); + // Update the data and the shape of X + model.AddConstantTensor(fNBroadcastedX, model.GetTensorType(fNX), fShapeZ, broadcastedData); + fShapeX = fShapeZ; + } else { + // I need to prepend to shape of X the extra dimensions added for broadcasting to Z + if (fShapeX.size() < fShapeZ.size()) { + size_t nPrepend = fShapeZ.size() - fShapeX.size(); + fShapeX.insert(fShapeX.begin(), nPrepend, 1); + } + } } - } - // Broadcast C to Y - if (broadcastC) { - fNBroadcastedC = "BC_" + fNC + "_to_" + fNY; - if (model.IsInitializedTensor(fNC)) { - auto data = model.GetInitializedTensorData(fNC); - std::shared_ptr broadcastedData( - UTILITY::UnidirectionalBroadcast(static_cast(data.get()), fShapeC, fShapeY), - std::default_delete()); - // do not update tensor C but add broadcasted one (since it can be input to some other operators) - model.AddConstantTensor(fNBroadcastedC, model.GetTensorType(fNC), fShapeY, broadcastedData); - fShapeC = fShapeY; - } else { - // Add an intermediate tensor for broadcasting B - model.AddIntermediateTensor(fNBroadcastedC, model.GetTensorType(fNC), fShapeY); + // Broadcast Y to Z + if (broadcastY) { + fNBroadcastedY = "BC_" + fNY + "_to_" + fNZ; + if (model.IsInitializedTensor(fNY)) { + auto data = model.GetInitializedTensorData(fNY); + std::shared_ptr broadcastedData( + UTILITY::UnidirectionalBroadcast(static_cast(data.get()), fShapeY, fShapeZ), + std::default_delete()); + // do not update tensor B but add broadcasted one (since it can be input to some other operators) + model.AddConstantTensor(fNBroadcastedY, model.GetTensorType(fNY), fShapeZ, broadcastedData); + fShapeY = fShapeZ; + } else { + // I need to prepend to shape of Y the extra dimensions added for broadcasting to Z + if (fShapeY.size() < fShapeZ.size()) { + size_t nPrepend = fShapeZ.size() - fShapeY.size(); + fShapeY.insert(fShapeY.begin(), nPrepend, 1); + } + + } } - } - } else { - fShapeY = fShapeA; - } - // check case of constant output (if all inputs are defined) - if (model.IsInitializedTensor(fNC)) { - - std::string nameC = fNBroadcastedC.empty()? fNC : fNBroadcastedC; - auto dataC = static_cast(model.GetInitializedTensorData(nameC).get()); - model.SetNotWritableInitializedTensor(nameC); - T * dataA = nullptr; - T * dataB = nullptr; - std::vector shapeDataA; - std::vector shapeDataB; - if (model.IsInitializedTensor(fNA)) { - std::string nameA = fNBroadcastedA.empty()? fNA : fNBroadcastedA; - dataA = static_cast(model.GetInitializedTensorData(nameA).get()); - // flag tensors to not be written in a file - model.SetNotWritableInitializedTensor(nameA); - } else if (model.IsShapeTensor(fNA)) - shapeDataA = model.GetShapeTensorValues(fNA); - if (model.IsInitializedTensor(fNB)) { - std::string nameB = fNBroadcastedB.empty()? fNB : fNBroadcastedB; - dataB = static_cast(model.GetInitializedTensorData(nameB).get()); - model.SetNotWritableInitializedTensor(nameB); - } else if (model.IsShapeTensor(fNB)) - shapeDataB = model.GetShapeTensorValues(fNB); - - std::vector dataY; - std::vector shapeDataY; - - bool isOutputConstantTensor = true; - if (dataA && dataB) { - dataY.resize(ConvertShapeToLength(fShapeY)); - for (size_t i = 0; i < dataY.size(); i++) - dataY[i] = (dataC[i]) ? dataA[i] : dataB[i]; - } - else if (dataA && shapeDataB.size()>0 ) { - shapeDataY.resize(ConvertShapeToLength(fShapeY)); - for (size_t i = 0; i < shapeDataY.size(); i++) { - shapeDataY[i] = (dataC[i]) ? Dim{size_t(dataA[i])} : shapeDataB[i]; - isOutputConstantTensor &= !shapeDataY[i].isParam; + // Broadcast C to Z + if (broadcastC) { + fNBroadcastedC = "BC_" + fNC + "_to_" + fNZ; + if (model.IsInitializedTensor(fNC)) { + auto data = model.GetInitializedTensorData(fNC); + std::shared_ptr broadcastedData( + UTILITY::UnidirectionalBroadcast(static_cast(data.get()), fShapeC, fShapeZ), + std::default_delete()); + // do not update tensor C but add broadcasted one (since it can be input to some other operators) + model.AddConstantTensor(fNBroadcastedC, model.GetTensorType(fNC), fShapeZ, broadcastedData); + fShapeC = fShapeZ; + } else { + // I need to prepend to shape of C the extra dimensions added for broadcasting to Z + if (fShapeC.size() < fShapeZ.size()) { + size_t nPrepend = fShapeZ.size() - fShapeC.size(); + fShapeC.insert(fShapeC.begin(), nPrepend, 1); + } + } } + } else { + fShapeZ = fShapeX; } - else if (dataB && shapeDataA.size()>0 ) { - shapeDataY.resize(ConvertShapeToLength(fShapeY)); - for (size_t i = 0; i < shapeDataY.size(); i++) { - shapeDataY[i] = (dataC[i]) ? shapeDataB[i] : Dim{size_t(dataB[i])}; - isOutputConstantTensor &= !shapeDataY[i].isParam; + // check case of constant output (if all inputs are defined) + if (model.IsInitializedTensor(fNC)) { + std::string nameC = fNBroadcastedC.empty() ? fNC : fNBroadcastedC; + auto dataC = static_cast(model.GetInitializedTensorData(nameC).get()); + model.SetNotWritableInitializedTensor(nameC); + T *dataX = nullptr; + T *dataY = nullptr; + std::vector shapeDataX; + std::vector shapeDataY; + if (model.IsInitializedTensor(fNX)) { + std::string nameX = fNBroadcastedX.empty() ? fNX : fNBroadcastedX; + dataX = static_cast(model.GetInitializedTensorData(nameX).get()); + // flag tensors to not be written in a file + model.SetNotWritableInitializedTensor(nameX); + } else if (model.IsShapeTensor(fNX)) { + shapeDataX = model.GetShapeTensorValues(fNX); + } + if (model.IsInitializedTensor(fNY)) { + std::string nameY = fNBroadcastedY.empty() ? fNY : fNBroadcastedY; + dataY = static_cast(model.GetInitializedTensorData(nameY).get()); + model.SetNotWritableInitializedTensor(nameY); + } else if (model.IsShapeTensor(fNY)) { + shapeDataY = model.GetShapeTensorValues(fNY); + } + std::vector dataZ; // used in case output is constant tensor + std::vector shapeDataZ; // used in case output is a shape tensor (can be also constant if all + // dimensions are not parametric) + // if fNC (condition) is initialized we know the output is a shape or a constant tensor, + // so we can compute it at initialization and add it as a constant tensor to the model + // (and not add the operator output as intermediate tensor to the model) + bool isOutputConstantTensor = true; + if (dataX && dataY) { + dataZ.resize(ConvertShapeToLength(fShapeZ)); + for (size_t i = 0; i < dataZ.size(); i++) + dataZ[i] = (dataC[i]) ? dataX[i] : dataY[i]; + if (model.Verbose()) + std::cout << "data A and B : dataZ constant: " << ConvertValuesToString(dataZ) << std::endl; + } else if (dataX && shapeDataY.size() > 0) { + shapeDataZ.resize(ConvertShapeToLength(fShapeZ)); + for (size_t i = 0; i < shapeDataZ.size(); i++) { + shapeDataZ[i] = (dataC[i]) ? Dim{size_t(dataX[i])} : shapeDataY[i]; + isOutputConstantTensor &= !shapeDataZ[i].isParam; + } + if (model.Verbose()) + std::cout << "data A but shapeB " << ConvertDimShapeToString(shapeDataY) << " " + << isOutputConstantTensor << std::endl; + } else if (dataY && shapeDataX.size() > 0) { + shapeDataZ.resize(ConvertShapeToLength(fShapeZ)); + for (size_t i = 0; i < shapeDataZ.size(); i++) { + shapeDataZ[i] = (dataC[i]) ? shapeDataY[i] : Dim{size_t(dataY[i])}; + isOutputConstantTensor &= !shapeDataZ[i].isParam; + } + if (model.Verbose()) + std::cout << "data B but shapeA " << ConvertDimShapeToString(shapeDataX) << " " + << isOutputConstantTensor << std::endl; + } else if (shapeDataY.size() > 0 && shapeDataX.size() > 0) { + shapeDataZ.resize(ConvertShapeToLength(fShapeZ)); + for (size_t i = 0; i < shapeDataZ.size(); i++) { + shapeDataZ[i] = (dataC[i]) ? shapeDataX[i] : shapeDataY[i]; + isOutputConstantTensor &= !shapeDataZ[i].isParam; + } + if (model.Verbose()) + std::cout << " shapeA and B " << ConvertDimShapeToString(shapeDataX) << " shapeB " + << ConvertDimShapeToString(shapeDataY) << " " << isOutputConstantTensor << std::endl; + } + fIsOutputConstant = true; + // add as constant or shape tensor depending on the case + if (dataZ.size() > 0) + model.AddConstantTensor(fNZ, fShapeZ, dataZ.data()); + else if (shapeDataZ.size() > 0) + model.AddShapeTensor(fNZ, shapeDataZ, fShapeZ.size() == 0); + else { + fIsOutputConstant = false; } + if (fIsOutputConstant && model.Verbose()) + std::cout << "Where op ---> " << fNZ << " " << ConvertShapeToString(fShapeZ) << " : " + << ((dataZ.size() > 0) ? ConvertValuesToString(dataZ) : ConvertDimShapeToString(shapeDataZ)) + << ((dataZ.size() > 0) ? " (constant)" : " (shape)") << std::endl; + + // output is a constant tensor + if (fIsOutputConstant) + fOutputTensorNames.pop_back(); } - else if (shapeDataB.size() > 0 && shapeDataA.size()>0 ) { - shapeDataY.resize(ConvertShapeToLength(fShapeY)); - for (size_t i = 0; i < shapeDataY.size(); i++) { - shapeDataY[i] = (dataC[i]) ? shapeDataA[i] : shapeDataB[i]; - isOutputConstantTensor &= !shapeDataY[i].isParam; + if (!fIsOutputConstant) { + + fDimShapeZ = ConvertShapeToDim(fShapeZ); + model.AddIntermediateTensor(fNZ, model.GetTensorType(fNX), fShapeZ); + if (model.Verbose()) + std::cout << "Where : condition : " << fNC << " " << ConvertShapeToString(fShapeC) << " X " + << fNX << " " << ConvertShapeToString(fShapeX) << " Y " << fNY << " " + << ConvertShapeToString(fShapeY) << " ---> " << fNZ << " " << ConvertShapeToString(fShapeZ) + << std::endl; + } + } else { + // ---------------------------------------------------------------- // + // Dynamic path: at least one input has a parametric shape + // Need to use BroadcastShape to find output shape + // ---------------------------------------------------------------- // + auto retXY = UTILITY::MultidirectionalBroadcastShape(fDimShapeX, fDimShapeY); + fBroadcastFlag = retXY.first; + fDimShapeZ = retXY.second; + auto retCZ = UTILITY::MultidirectionalBroadcastShape(fDimShapeC, fDimShapeZ); + fBroadcastFlag |= retCZ.first; + fDimShapeZ = retCZ.second; + + // Resolve std::max params to actual input dim params (same logic as BasicBinary) + if (fBroadcastFlag & 4) { + auto IsInputDimParam = [&](const std::string &p) { + for (auto &input : model.GetInputTensorNames()) + for (auto &s : model.GetDimTensorShape(input)) + if (s.isParam && s.param == p) return true; + return false; + }; + for (size_t i = 0; i < fDimShapeZ.size(); i++) { + auto &s = fDimShapeZ[i]; + if (s.isParam && s.param.find("std::max") != std::string::npos) { + // prefer A dim over B dim + if (i < fDimShapeX.size() && IsInputDimParam(fDimShapeX[i].param)) { + s = (fDimShapeX[i].dim != 1) ? fDimShapeX[i] : fDimShapeY[i]; + } else if (i < fDimShapeY.size() && IsInputDimParam(fDimShapeY[i].param)) { + s = (fDimShapeY[i].dim != 1) ? fDimShapeY[i] : fDimShapeX[i]; + } + } } } - fIsOutputConstant = true; // this contains both case constant tensor output ans shape tensor output - if (isOutputConstantTensor && dataY.empty()) { - dataY.resize(shapeDataY.size()); - for (size_t i = 0; i < shapeDataY.size(); i++) - dataY[i] = static_cast(shapeDataY[i].dim); + // I need to prepend to shape of X,Y,C the extra dimensions added for broadcasting to Z + if (fDimShapeX.size() < fDimShapeZ.size()) { + size_t nPrepend = fDimShapeZ.size() - fDimShapeX.size(); + fDimShapeX.insert(fDimShapeX.begin(), nPrepend, Dim{1}); + } + if (fDimShapeY.size() < fDimShapeZ.size()) { + size_t nPrepend = fDimShapeZ.size() - fDimShapeY.size(); + fDimShapeY.insert(fDimShapeY.begin(), nPrepend, Dim{1}); } - if (dataY.size() > 0) - model.AddConstantTensor(fNY, fShapeY, dataY.data()); - else if (shapeDataY.size() > 0 ) - model.AddShapeTensor(fNY, shapeDataY, fShapeY.size() == 0); - else { - fIsOutputConstant = false; + if (fDimShapeC.size() < fDimShapeZ.size()) { + size_t nPrepend = fDimShapeZ.size() - fDimShapeC.size(); + fDimShapeC.insert(fDimShapeC.begin(), nPrepend, Dim{1}); } - if (fIsOutputConstant && model.Verbose()) - std::cout << "Where op ---> " << fNY << " " << ConvertShapeToString(fShapeY) << " : " - << ((dataY.size() > 0) ? ConvertValuesToString(dataY) : ConvertDimShapeToString(shapeDataY) ) - << ((dataY.size() > 0) ? " (constant)" : " (shape)") << std::endl; - // output is a constant tensor - if (fIsOutputConstant) fOutputTensorNames.pop_back(); - } - if (!fIsOutputConstant) { - model.AddIntermediateTensor(fNY, model.GetTensorType(fNA), fShapeY); - if (model.Verbose()) - std::cout << "Where op " << " condition : " << fNC << " " << ConvertShapeToString(fShapeC) << - " X " << fNA << " " << ConvertShapeToString(fShapeA) << " Y " << fNB << " " << ConvertShapeToString(fShapeB) - << " ---> " << fNY << " " << ConvertShapeToString(fShapeY) << std::endl; + model.AddIntermediateTensor(fNZ, model.GetTensorType(fNX), fDimShapeZ); + + if (model.Verbose()) + std::cout << "Where (dynamic) : C=" << ConvertDimShapeToString(fDimShapeC) + << " A=" << ConvertDimShapeToString(fDimShapeX) + << " B=" << ConvertDimShapeToString(fDimShapeY) + << " --> Y=" << ConvertDimShapeToString(fDimShapeZ) << "\n"; } } @@ -237,51 +363,118 @@ public: std::string Generate(std::string opName) override { - if (fIsOutputConstant) return ""; - opName = "op_" + opName; - - if (fShapeY.empty()) { - throw std::runtime_error("TMVA SOFIE Where Op called to Generate without being initialized first"); - } std::stringstream out; - out << SP << "\n//-------- Where " << opName << " --> " << ConvertShapeToString(fShapeY) << "\n"; - size_t length = ConvertShapeToLength(fShapeY); - std::string typeName = TensorType::Name(); - // Broadcast A if it's uninitialized - if (fShapeA != fShapeY) { - out << SP << "// Broadcasting uninitialized tensor " << fNA << "\n"; - //out << SP << "{\n"; - out << SP << "TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast(tensor_" << fNA << ", " << ConvertShapeToString(fShapeA) << ", " << ConvertShapeToString(fShapeY) - << ", tensor_" << fNBroadcastedA << ");\n"; - } - // Broadcast B if it's uninitialized - if (fShapeB != fShapeY) { - out << SP << "// Broadcasting uninitialized tensor " << fNB << "\n"; - //out << SP << "{\n"; - out << SP << "TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast(tensor_" << fNB << ", " << ConvertShapeToString(fShapeB) << ", " << ConvertShapeToString(fShapeY) - << ", tensor_" << fNBroadcastedB << ");\n"; + out << SP << "\n//------ WHERE " << opName << " --> " << ConvertDimShapeToString(fDimShapeZ) << "\n"; + if (fIsOutputConstant) return out.str(); + + + // ---------------------------------------------------------------- // + // Runtime broadcast validation (dynamic shapes, flag bit 4) + // ---------------------------------------------------------------- // + if (fBroadcastFlag & 4) { + auto lengthX = ConvertDimShapeToLength(fDimShapeX); + auto lengthY = ConvertDimShapeToLength(fDimShapeY); + auto lengthC = ConvertDimShapeToLength(fDimShapeC); + out << SP << "if (" << lengthX << " != " << lengthY << " || " + << lengthX << " != " << lengthC << ") {\n"; + for (size_t i = 0; i < fDimShapeZ.size(); i++) { + // validate X vs Z + if (i < fDimShapeX.size() && fDimShapeX[i].isParam) { + out << SP << SP << "if (" << fDimShapeX[i] << " != 1 && " + << fDimShapeX[i] << " != " << fDimShapeZ[i] << ")\n"; + out << SP << SP << SP + << "throw std::runtime_error(\"SOFIE Where: cannot broadcast A dim " << i << " in " << opName << "\");\n"; + } + // validate Y vs Z + if (i < fDimShapeY.size() && fDimShapeY[i].isParam) { + out << SP << SP << "if (" << fDimShapeY[i] << " != 1 && " + << fDimShapeY[i] << " != " << fDimShapeZ[i] << ")\n"; + out << SP << SP << SP + << "throw std::runtime_error(\"SOFIE Where: cannot broadcast B dim " << i << " in " << opName << "\");\n"; + } + // validate C vs Z + if (i < fDimShapeC.size() && fDimShapeC[i].isParam) { + out << SP << SP << "if (" << fDimShapeC[i] << " != 1 && " + << fDimShapeC[i] << " != " << fDimShapeZ[i] << ")\n"; + out << SP << SP << SP + << "throw std::runtime_error(\"SOFIE Where: cannot broadcast C dim " << i << " in " << opName << "\");\n"; + } + } + out << SP << "}\n"; } - // Broadcast C if it's uninitialized - if (fShapeC != fShapeY) { - // special case if C is an input tensor - if (fIsInputBoolTensor) { - size_t inputLength = ConvertShapeToLength(fShapeC); - out << SP << "std::vector tmp_tensor_" << fNC << "(tensor_" << fNC << ", tensor_" << fNC << " + " << inputLength << ");\n"; + // implement now where using teh strides and looping on the different dimensions + // ---------------------------------------------------------------- // + // Generate loop(s) with per-dimension stride-based index arithmetic + // ---------------------------------------------------------------- // + auto stridesX = UTILITY::ComputeStrideFromShape(fDimShapeX); + auto stridesY = UTILITY::ComputeStrideFromShape(fDimShapeY); + auto stridesC = UTILITY::ComputeStrideFromShape(fDimShapeC); + auto stridesZ = UTILITY::ComputeStrideFromShape(fDimShapeZ); + + auto buildIdxExpr = [&](const std::vector &dimShape, + const std::vector &strides, + size_t rankZ) -> std::string { + if (dimShape.empty() || + std::all_of(dimShape.begin(), dimShape.end(), + [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) + return "0"; + std::string expr; + size_t offset = rankZ - dimShape.size(); + for (size_t i = 0; i < dimShape.size(); ++i) { + if (dimShape[i].dim == 1 || dimShape[i].GetVal() == "1") continue; + expr += "idx_" + std::to_string(i + offset); + if (strides[i].GetVal() != "1") + expr += " * " + strides[i].GetVal(); + expr += " + "; } - out << SP << "// Broadcasting uninitialized tensor " << fNC << "\n"; - //out << SP << "{\n"; - out << SP << "TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast(tmp_tensor_" << fNC << ".data(), " << ConvertShapeToString(fShapeC) << ", " << ConvertShapeToString(fShapeY) - << ", tensor_" << fNBroadcastedC << ");\n"; + if (expr.size() >= 3) + for (int j = 0; j < 3; j++) expr.pop_back(); // remove trailing " + " + return expr.empty() ? "0" : expr; + }; + + std::string idxX = buildIdxExpr(fDimShapeX, stridesX, fDimShapeZ.size()); + std::string idxY = buildIdxExpr(fDimShapeY, stridesY, fDimShapeZ.size()); + std::string idxC = buildIdxExpr(fDimShapeC, stridesC, fDimShapeZ.size()); + + // Emit nested loops over output shape + int nloop = 0; + std::string idxZ; + // case Z is a scalar (all dimensions are 1) or Z has no dimension + if (fDimShapeZ.empty() || + std::all_of(fDimShapeZ.begin(), fDimShapeZ.end(), + [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) { + idxZ = "0"; + } else { + for (size_t i = 0; i < fDimShapeZ.size(); ++i) { + if (fDimShapeZ[i].dim != 1 && fDimShapeZ[i].GetVal() != "1") { + nloop++; + for (int j = 0; j < nloop; j++) out << SP; + out << "for (size_t idx_" << i << " = 0; idx_" << i + << " < " << fDimShapeZ[i] << "; ++idx_" << i << ") {\n"; + idxZ += "idx_" + std::to_string(i); + if (stridesZ[i].GetVal() != "1") + idxZ += " * " + stridesZ[i].GetVal(); + idxZ += " + "; + } + } + if (idxZ.size() >= 3) + for (int j = 0; j < 3; j++) idxZ.pop_back(); } - std::string nameA = fNBroadcastedA.empty()? fNA : fNBroadcastedA; - std::string nameB = fNBroadcastedB.empty()? fNB : fNBroadcastedB; - std::string nameC = fNBroadcastedC.empty()? fNC : fNBroadcastedC; - out << SP << "for (size_t id = 0; id < " << length << " ; id++){\n"; - // get output tensor applying condition - out << SP << SP << "tensor_" << fNY << "[id] = " << "tensor_" << nameC << "[id] ? tensor_" - << nameA << "[id] : tensor_" + nameB + "[id];\n"; - out << SP << "}\n"; + + // Inner assignment + for (int j = 0; j < nloop + 1; j++) out << SP; + out << "tensor_" << fNZ << "[" << idxZ << "] = " + << "tensor_" << fNC << "[" << idxC << "] ? " + << "tensor_" << fNX << "[" << idxX << "] : " + << "tensor_" << fNY << "[" << idxY << "];\n"; + + // Close loops + for (int i = nloop; i > 0; i--) { + for (int j = 0; j < i; j++) out << SP; + out << "}\n"; + } + return out.str(); } @@ -290,7 +483,7 @@ public: if (fIsOutputConstant) return ""; opName = "op_" + opName; if (fShapeY.empty()) - throw std::runtime_error("TMVA SOFIE Where Op called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Where Op called to Generate without being initialized first"); const std::size_t D = fShapeY.size(); std::size_t totalElements = ConvertShapeToLength(fShapeY); @@ -392,7 +585,7 @@ public: if (fIsOutputConstant) return ""; opName = "op_" + opName; if (fShapeY.empty()) - throw std::runtime_error("TMVA SOFIE Where Op called to Generate without being initialized first"); + throw std::runtime_error("SOFIE Where Op called to Generate without being initialized first"); std::size_t totalElements = ConvertShapeToLength(fShapeY); std::string kname = "whereKernel_" + opName; diff --git a/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx b/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx index b75ee3e..5ace31b 100644 --- a/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx +++ b/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx @@ -213,33 +213,69 @@ std::string ConvertDimShapeToLength(const std::vector & shape); template std::string ConvertValToString(T value) { std::stringstream ret; - if (std::is_floating_point_v) - ret << std::setprecision(std::numeric_limits::max_digits10); - ret << value; + ret << std::to_string(value); + return ret.str(); +} +// float specialization +template<> +inline std::string ConvertValToString(float value) { + std::stringstream ret; + // special case for infinity and Nan + if (std::isinf(value)) + ret << (value > 0 ? "std::numeric_limits::infinity()" : + "-std::numeric_limits::infinity()"); + else if (std::isnan(value)) + ret << "std::numeric_limits::quiet_NaN()"; + else { + ret << std::setprecision(std::numeric_limits::max_digits10); + ret << value; + } + return ret.str(); +} +// double specialization +template<> +inline std::string ConvertValToString(double value) { + std::stringstream ret; + // special case for infinity and Nan + if (std::isinf(value)) + ret << (value > 0 ? "std::numeric_limits::infinity()" : + "-std::numeric_limits::infinity()"); + else if (std::isnan(value)) + ret << "std::numeric_limits::quiet_NaN()"; + else { + ret << std::setprecision(std::numeric_limits::max_digits10); + ret << value; + } + return ret.str(); +} +// int64_t specialization for INT64_MIN +template<> +inline std::string ConvertValToString(int64_t value) { + std::stringstream ret; + if (value == INT64_MIN) + ret << "INT64_MIN"; + else + ret << std::to_string(value); return ret.str(); } // convert list of values in a string taking into account the precision template -std::string ConvertValuesToString(size_t n, const T * data) { +std::string ConvertValuesToString(size_t n, const T * data, size_t maxprint = -1) { std::stringstream ret; ret << "{ "; - for (size_t i = 0; i < n; i++) { - if (std::is_floating_point_v) - ret << std::setprecision(std::numeric_limits::max_digits10) << data[i]; - else - // cast in case of boolean (int8) - ret << (int64_t) data[i]; - + for (size_t i = 0; i < std::min(n,maxprint); i++) { + ret << ConvertValToString(data[i]); if (i < n-1) ret << ", "; + if (i < n-1 && i == maxprint-1) ret << "..... "; } ret << "}"; return ret.str(); } template -std::string ConvertValuesToString(const std::vector & data) { - return ConvertValuesToString(data.size(), data.data()); +std::string ConvertValuesToString(const std::vector & data, size_t maxprint = 5) { + return ConvertValuesToString(data.size(), data.data(), maxprint); } class InitializedTensor { @@ -255,7 +291,7 @@ public: std::shared_ptr const &sharedptr() const { return fData; } // query if tensor comes from a Constant operator bool IsConstantTensor() const { return fConstant;} - // query if tensor needs to be written in a weight file. Constant tensors are not written in a file + // query if tensor needs to be written in a weight file. Constant tensors are not written in a separate file bool IsWeightTensor() const { return !fConstant && !fIsNotWritable;} // check if a Tensor is Writable (need to be written in the file or in the generated code (e.g. as a constant tensor) // if an initialized tensors is used in a constant operator at compile time does not need to be written and can be omitted in @@ -263,6 +299,8 @@ public: bool IsNotWritable() const { return fIsNotWritable; } // set not writable initialized tensors - i.e. tensor that must not be written in a file void SetNotWritable() { fIsNotWritable = true;} + // set writable initialized tensors - i.e. tensor that must be written in a file + void SetWritable() { fIsNotWritable = false;} // set as constant (needed for non-float initialized tensors) void SetConstant() { fConstant = true;} @@ -280,16 +318,8 @@ public: for (std::size_t item : fShape) { fSize *= static_cast(item); } - switch (fType) { - case ETensorType::FLOAT: fSize *= sizeof(float); break; - case ETensorType::DOUBLE: fSize *= sizeof(double); break; - case ETensorType::INT32: fSize *= sizeof(int32_t); break; - case ETensorType::INT64: fSize *= sizeof(int64_t); break; - case ETensorType::BOOL: fSize *= sizeof(bool); break; - default: - throw std::runtime_error("SOFIE doesn't yet supports serialising data-type " + - ConvertTypeToString(fType)); - } + // get size in bytes + fSize *= GetTypeSize(fType); fPersistentData = static_cast(fData.get()); } void CastPersistentToShared() @@ -373,7 +403,7 @@ T* BroadcastConvBias(const T* data, const size_t channel, const std::vector -void FillOutput(T const *arr, std::vector &out, std::size_t n) -{ - out.resize(n); - for (std::size_t i = 0; i < n; ++i) { - out[i] = arr[i]; - } -} - } // end namespace UTILITY namespace BLAS{ @@ -712,7 +732,7 @@ struct GNN_Data { }; template -RTensor Concatenate( RTensor & t1, RTensor & t2, int axis = 0) +TMVA::Experimental::RTensor Concatenate( TMVA::Experimental::RTensor & t1, TMVA::Experimental::RTensor & t2, int axis = 0) { // concatenate tensor along axis. Shape must be the same except in the dimension of the concatenated axis if (t1.GetMemoryLayout() != t2.GetMemoryLayout()) @@ -727,8 +747,8 @@ RTensor Concatenate( RTensor & t1, RTensor & t2, int axis = 0) } std::vector outShape = shape1; outShape[axis] = shape1[axis] + shape2[axis]; - RTensor tout(outShape, t1.GetMemoryLayout()); - if (t1.GetMemoryLayout() == MemoryLayout::ColumnMajor) { + TMVA::Experimental::RTensor tout(outShape, t1.GetMemoryLayout()); + if (t1.GetMemoryLayout() == TMVA::Experimental::MemoryLayout::ColumnMajor) { throw std::runtime_error("TMVA RTensor Concatenate is not yet supported for column major tensors"); } @@ -783,10 +803,35 @@ inline void Gemm_Call(float *output, bool transa, bool transb, int m, int n, int if (C != nullptr) { std::copy(C, C + m * n, output); } - SOFIE::BLAS::sgemm_(transa ? &ct : &cn, transb ? &ct : &cn, &m, &n, &k, &alpha, A, lda, B, ldb, + TMVA::Experimental::SOFIE::BLAS::sgemm_(transa ? &ct : &cn, transb ? &ct : &cn, &m, &n, &k, &alpha, A, lda, B, ldb, &beta, output, ldc); } +inline void Fill(float *output, float value, int size) +{ + std::fill(output, output + size, value); +} + +template +inline void Copy(T *output, T const *input, int size) +{ + std::copy(input, input + size, output); +} + +inline void Relu(float *output, float const *input, int size) +{ + for (int i = 0; i < size; i++) { + output[i] = (input[i] > 0.0f) ? input[i] : 0.0f; + } +} +// function to read float from the file dealing with inf and nan values +inline float ParseFloatToken (const std::string & s) { + if (s == "inf") return std::numeric_limits::infinity(); + if (s == "-inf") return -std::numeric_limits::infinity(); + if (s == "nan") return std::numeric_limits::quiet_NaN(); + return std::stof(s); +} + template void ReadTensorFromStream(std::istream &is, T &target, std::string const &expectedName, std::size_t expectedLength) { @@ -803,14 +848,20 @@ void ReadTensorFromStream(std::istream &is, T &target, std::string const &expect std::to_string(expectedLength) + " , read " + std::to_string(length); throw std::runtime_error(err_msg); } + std::string token; for (size_t i = 0; i < length; ++i) { - is >> target[i]; + is >> token; + target[i] = ParseFloatToken(token); } if (is.fail()) { throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor " + expectedName); } } +//Utility functions to generate code +void EmitNestedLoops(std::stringstream &out, size_t loopRank, const std::vector shape); +void CloseNestedLoops(std::stringstream &out, size_t loopRank); + // code for the memory greeding allocations struct TensorLifeInfo { @@ -827,6 +878,41 @@ struct MemoryResult { /// Greedy best-fit planner with coalescing free list. MemoryResult OrganizeMemory(const std::vector & tensorsInfo ); +// Simple Dimension classes ans helpers to add constexpr meta info on input +// tensors to the emitted code. +struct SingleDim { + enum class Kind { + Static, + Symbolic + }; + + Kind kind; + std::size_t dim; + std::string_view name; + + constexpr SingleDim(std::size_t v) : kind(Kind::Static), dim(v), name() {} + constexpr SingleDim(const char *v) : kind(Kind::Symbolic), dim(0), name(v) {} +}; + +struct TensorDims { + const SingleDim *data; + std::size_t size; + + constexpr std::size_t total_size() const + { + std::size_t result = 1; + for (std::size_t i = 0; i < size; ++i) { + result *= data[i].dim; + } + return result; + } +}; + +template +constexpr TensorDims makeDims(Arr const &arr) +{ + return TensorDims{arr.data(), arr.size()}; +} inline std::string ConvertOutputTypeToString(ETensorType t) { // The std::vector is a special type that is not wrapping continuous memory. diff --git a/src/SOFIE_core/src/RFunction_MLP.cxx b/src/SOFIE_core/src/RFunction_MLP.cxx index 34abef1..5666f3e 100644 --- a/src/SOFIE_core/src/RFunction_MLP.cxx +++ b/src/SOFIE_core/src/RFunction_MLP.cxx @@ -16,7 +16,7 @@ RFunction_MLP::RFunction_MLP(FunctionTarget target, int_t numLayers, Activation // assuming all the linear layers has a kernel and a bias initialized tensors if (fActivateFinal) { if (fActivationFunction == Activation::Invalid) { - throw std::runtime_error("TMVA SOFIE GNN doesn't currently supports the provided activation function for " + + throw std::runtime_error("SOFIE GNN doesn't currently supports the provided activation function for " + fFuncName + " update."); } function_block->AddOutputTensorNameList({fFuncName + "Relu" + std::to_string(fNumLayers)}); diff --git a/src/SOFIE_core/src/RModel.cxx b/src/SOFIE_core/src/RModel.cxx index c50921f..fcb8e6d 100644 --- a/src/SOFIE_core/src/RModel.cxx +++ b/src/SOFIE_core/src/RModel.cxx @@ -17,8 +17,50 @@ namespace { const std::string SP = " "; } +void ReplaceAll(std::string &str, const std::string &from, const std::string &to) +{ + size_t pos = 0; + while ((pos = str.find(from, pos)) != std::string::npos) { + str.replace(pos, from.length(), to); + pos += to.length(); + } +} + +bool IsIdentifierChar(char c) +{ + return std::isalnum(static_cast(c)) || c == '_'; +} + +// Returns true if s is a valid C++ identifier (can be used as a variable name). +// Dim::param can be either a plain name (e.g. "W") or a computed expression +// (e.g. "((W+-3)/2+1)"); only the former can be used as a C++ variable name. +bool IsIdentifier(const std::string &s) +{ + if (s.empty() || std::isdigit(static_cast(s[0]))) + return false; + for (char c : s) + if (!IsIdentifierChar(c)) + return false; + return true; +} + +// Get the data member name corresponding to a tensor with a given name. +std::string TensorMember(std::string const &name) +{ + return "tensor_" + name; +} + +} // namespace + +std::underlying_type_t operator|(Options opA, Options opB) { + return static_cast>(opA) | static_cast>(opB); +} +std::underlying_type_t operator|(std::underlying_type_t opA, Options opB) { + return opA | static_cast>(opB); +} + -const std::vector& RModel::GetTensorShape(const std::string & name) const { +std::vector RModel::GetTensorShape(const std::string & name) const { auto f = fReadyInputTensorInfos.find(name); if (f != fReadyInputTensorInfos.end()) { return f->second.shape; @@ -29,7 +71,7 @@ const std::vector& RModel::GetTensorShape(const std::string & name) cons } auto f3 = fInputTensorInfos.find(name); if (f3 != fInputTensorInfos.end()) { - throw std::runtime_error("TMVA SOFIE tensor [" + name + "] is an input tensor with unspecified dimension parameter"); + throw std::runtime_error("SOFIE tensor [" + name + "] is an input tensor with unspecified dimension parameter"); } auto f4 = fIntermediateTensorInfos.find(name); if (f4 != fIntermediateTensorInfos.end()) { @@ -46,12 +88,12 @@ const std::vector& RModel::GetTensorShape(const std::string & name) cons } if (fDynamicTensorInfos.find(name) != fDynamicTensorInfos.end()) - throw std::runtime_error("TMVA SOFIE tensor [" + name + "] is a dynamic tensor. Use GetDynamicTensorShape instead of GetTensorShape"); + throw std::runtime_error("SOFIE tensor [" + name + "] is a dynamic tensor. Use GetDynamicTensorShape instead of GetTensorShape"); if (fIsSubGraph && fParentGraph) return fParentGraph->GetTensorShape(name); - throw std::runtime_error("TMVA SOFIE tensor [" + name + "] for which the shape is requested is not found"); + throw std::runtime_error("SOFIE tensor [" + name + "] for which the shape is requested is not found"); } std::vector RModel::GetDimTensorShape(const std::string & name) const { @@ -74,12 +116,12 @@ std::vector RModel::GetDynamicTensorShape(const std::string & name) const { } // throw error if shape is not dynamic if (!IsDynamicTensor(name)) - throw std::runtime_error("TMVA SOFIE tensor [" + name + "] for which the shape is requested is not dynamic"); + throw std::runtime_error("SOFIE tensor [" + name + "] for which the shape is requested is not dynamic"); - throw std::runtime_error("TMVA SOFIE tensor [" + name + "] for which the shape is requested is not found"); + throw std::runtime_error("SOFIE tensor [" + name + "] for which the shape is requested is not found"); } -const ETensorType& RModel::GetTensorType(const std::string & name) const { +ETensorType RModel::GetTensorType(std::string name) const { auto f = fReadyInputTensorInfos.find(name); if (f != fReadyInputTensorInfos.end()) { return f->second.type; @@ -108,7 +150,7 @@ const ETensorType& RModel::GetTensorType(const std::string & name) const { if (fIsSubGraph && fParentGraph) return fParentGraph->GetTensorType(name); - throw std::runtime_error("TMVA SOFIE tensor [" + name + "] for which the type is requested is not found, model name: " + fName); + throw std::runtime_error("SOFIE tensor [" + name + "] for which the type is requested is not found, model name: " + fName); } bool RModel::CheckIfTensorAlreadyExist(std::string tensor_name) { @@ -145,30 +187,34 @@ void RModel::AddInputTensorName(std::string input_name) { fInputTensorNames.emplace_back(UTILITY::Clean_name(input_name)); } -void RModel::AddOperator(std::unique_ptr op, int order_execution) { - AddBlasRoutines(op->GetBlasRoutines()); - auto libs = op->GetStdLibs(); - auto op_input_tensors = op->GetOpInputTensors(); - for (auto& stdlib : libs) { - AddNeededStdLib(stdlib); - } - if (order_execution >= 0) { - fOperators.insert(fOperators.begin() + order_execution, std::move(op)); - } else { - fOperators.push_back(std::move(op)); - } +void RModel::AddOperator(std::unique_ptr op, int order_execution) +{ + AddBlasRoutines(op->GetBlasRoutines()); + auto libs = op->GetStdLibs(); + auto op_input_tensors = op->GetOpInputTensors(); + for (auto &stdlib : libs) { + AddNeededStdLib(stdlib); + } + if (order_execution >= 0) { + fOperators.insert(fOperators.begin() + order_execution, std::move(op)); + } else { + fOperators.push_back(std::move(op)); + order_execution = fOperators.size() - 1; + } - // storing the last usage of tensors which are input to - // operators (but are not inputs to the model, i.e. they are intermediate - // tensors). This information is needed to keep a check on when a - // particular intermediate tensor can be flushed to free up memory for reuse. - for(size_t index = 0; index & s fShapeTensors[tensor_name] = std::make_pair(shape_values, scalar); } +void RModel::AddAliasTensor(const std::string & name, const std::string & origin){ + // add an alias tensor to origin + auto tensor_name = UTILITY::Clean_name(name); + auto origin_name = UTILITY::Clean_name(origin); + if (fAliasTensors.count(tensor_name) != 0) { + throw std::runtime_error("TMVA-SOFIE: alias tensor with name " + tensor_name + " already exists \n"); + } + fAliasTensors[tensor_name] = origin_name; +} + bool RModel::IsShapeTensor(const std::string & tensor_name) const { return fShapeTensors.count(tensor_name) != 0; } +bool RModel::IsAliasTensor(const std::string & tensor_name) const { + return fAliasTensors.count(tensor_name) != 0; +} + const std::vector & RModel::GetShapeTensorValues(const std::string & tensor_name) const { //if (!IsShapeTensor(tensor_name) ) return std::vector{}; return fShapeTensors.at(tensor_name).first; @@ -213,8 +273,8 @@ bool RModel::IsInitializedTensor(const std::string& tensorName) const { std::string name = UTILITY::Clean_name(tensorName); return fInitializedTensors.find(name) != fInitializedTensors.end(); } - bool RModel::IsConstantTensor(const std::string& tensorName) const { + // a constant tensor is an initialized tensor but has the constant flag set std::string name = UTILITY::Clean_name(tensorName); auto itr = fInitializedTensors.find(name); if (itr == fInitializedTensors.end()) return false; @@ -311,15 +371,6 @@ std::shared_ptr RModel::GetInitializedTensorData(std::string tensor_name) } } -void RModel::RemoveInitializedTensor(std::string tensor_name) { - auto f = fInitializedTensors.find(tensor_name); - if (f == fInitializedTensors.end()) { - throw std::runtime_error("TMVA-SOFIE: tensor " + tensor_name + " not found when trying to remove it"); - } else { - fInitializedTensors.erase(f); - } -} - void RModel::SetNotWritableInitializedTensor(const std::string & tensor_name) { auto t = fInitializedTensors.find(tensor_name); if (t == fInitializedTensors.end()) { @@ -328,7 +379,7 @@ void RModel::SetNotWritableInitializedTensor(const std::string & tensor_name) { t->second.SetNotWritable(); } -std::string RModel::AllocateIntermediateMemory(std::span op_output_tensors) +std::string RModel::AllocateIntermediateMemory(std::span op_output_tensors) { std::stringstream code; @@ -343,7 +394,7 @@ std::string RModel::AllocateIntermediateMemory(std::span op_o std::string typeName = ConvertTypeToString(GetTensorType(name)); code << "\n // Allocating memory for intermediate tensor " << name << " with size " << size << " bytes"; code << "\n" - << typeName << "* tensor_" << name << " = reinterpret_cast<" << typeName + << typeName << "* " << TensorMember(name) << " = reinterpret_cast<" << typeName << "*>(fIntermediateMemoryPool.data() + " << location << ");\n"; }; @@ -357,6 +408,11 @@ std::string RModel::AllocateIntermediateMemory(std::span op_o fDynamicTensorInfos.find(name) != fDynamicTensorInfos.end()) continue; + // case of alias tensor + if (IsAliasTensor(name)) { + continue; + } + auto tensor_size = GetTypeSize(GetTensorType(name)) * ConvertShapeToLength(GetTensorShape(name)); // important fill the pair in the ordered output tensors with the string view and not the string TensorMemoryInfo tmi = {it, tensor_size}; @@ -428,7 +484,7 @@ std::string RModel::AllocateIntermediateMemory(std::span op_o return code.str(); } -void RModel::CheckAndFlushIntermediateMemory(std::span op_input_tensors, const size_t& op_idx){ +void RModel::CheckAndFlushIntermediateMemory(std::span op_input_tensors, const size_t& op_idx){ if (fVerbose) std::cout << "*** CheckAndFlushIntermediateMemory: Loop on input tensors for op " << op_idx << "\n"; //print available chunks if (fVerbose) std::cout << "available chunks before freeing them : \n"; @@ -436,9 +492,14 @@ void RModel::CheckAndFlushIntermediateMemory(std::span op_inp chunk != fIntermediateMemoryInfo.available_stack.end(); chunk++) { if (fVerbose) std::cout << "-- free chunk " << chunk->first << " size = " << chunk->second << std::endl; } - for (auto &it : op_input_tensors) { - // last occurence of the tensor is reached => flush it from memory - if (fVerbose) std::cout << ".. input tensors : " << it; + for (auto &iv : op_input_tensors) { + // last occurrence of the tensor is reached => flush it from memory + if (fVerbose) std::cout << ".. input tensors : " << iv; + + // for alias tensors replace name with its alias + std::string it{iv}; // convert view to string + if (IsAliasTensor(it)) + it = fAliasTensors[it]; if (fIntermediateTensorFrequencyLookup[it] == op_idx) { if (fVerbose) std::cout << " flash condition is met - looping on chunks to find matching one \n"; for (auto chunk = fIntermediateMemoryInfo.total_stack.begin(); @@ -524,6 +585,7 @@ void RModel::Initialize(const std::map & inputParams, bool fIntermediateTensorInfos.clear(); fDynamicTensorInfos.clear(); + // loop on inputs and see if shape can be full specified // if the batch size is provided it can be used to specify the full shape // Add the full specified tensors in fReadyInputTensors collection @@ -578,24 +640,13 @@ void RModel::Initialize(const std::map & inputParams, bool PrintDynamicTensors(); } - // check if there are initialized tensors to write in a weight file - // support for the time being only weight of FLOAT type - if (fUseWeightFile) { - bool modelHasWeights = false; - for (auto &i : fInitializedTensors) { - if (i.second.type() == ETensorType::FLOAT) { - modelHasWeights = true; - break; - } - } - if (!modelHasWeights) - fUseWeightFile = false; - } // Go through model and initialize each operator int i = 0; std::vector temp_available_stack; // vector stores individual chunks of available memory that maybe reused + // Build set of initialized tensors consumed by at least one runtime operator (need for later) + std::unordered_set runtimeInitializedInputs; for(size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx){ if (verbose) { auto& r = *fOperators[op_idx].get(); @@ -604,16 +655,69 @@ void RModel::Initialize(const std::map & inputParams, bool fOperators[op_idx]->Initialize(*this); for(auto &it:fOperators[op_idx]->GetOpOutputTensors()){ std::string name = std::string{it}; + // check if tensor is not an initialized or output tensor and it is not already in the list if (fIntermediateTensorFrequencyLookup.find(it) == fIntermediateTensorFrequencyLookup.end() && std::find(fOutputTensorNames.begin(), fOutputTensorNames.end(), name) == fOutputTensorNames.end() && - fInitializedTensors.find(name) == fInitializedTensors.end() && - fDynamicTensorInfos.find(name) == fDynamicTensorInfos.end()){ + fInitializedTensors.find(name) == fInitializedTensors.end()) + { fIntermediateTensorFrequencyLookup[it] = op_idx; } } + // loop for non-constant operators and flag the inputs which are initialized tensors to make sure they are writable + if (!fOperators[op_idx]->IsOutputConstant()) { + for (auto &it : fOperators[op_idx]->GetOpInputTensors()) { + std::string name = std::string{it}; + if (fInitializedTensors.find(name) != fInitializedTensors.end()) { + runtimeInitializedInputs.insert(name); + } + } + } + i++; } + // loop on initialized tensors and make the integers as constant to be + // not written in a weight file and check if the tensors flagged as not writable are really not writable, + // i.e. are not used by non constant operators + for (auto &it : fInitializedTensors) { + // check if not-writable tensors are really not writable, i.e. are not used by non constant operators + if (it.second.IsNotWritable() && runtimeInitializedInputs.find(it.first) != runtimeInitializedInputs.end()) { + it.second.SetWritable(); + if (verbose) { + std::cout << "Initialized tensor " << it.first << " is flagged as not writable but is used by non constant operators, set it as writable \n"; + } + } + // if the tensor is an integer we can flag it as constant since it will not be written in a weight file and it is considered equivalent as being created from a Constant operator + // only FLOAT tensors are written in a weight file + if (it.second.type() != ETensorType::FLOAT) { + it.second.SetConstant(); + } + } + + // check if there are initialized tensors to write in a weight file + if (fUseWeightFile) { + bool modelHasWeights = false; + for (auto &it : fInitializedTensors) { + if (it.second.IsWeightTensor()) { + modelHasWeights = true; + break; + } + } + if (!modelHasWeights) + fUseWeightFile = false; + } + + // update fIntermediateTensorFrequencyLookup for alias tensors + for (auto & it : fAliasTensors) { + if (fIntermediateTensorFrequencyLookup.find(it.first) == fIntermediateTensorFrequencyLookup.end()) continue; + if (fIntermediateTensorFrequencyLookup.find(it.second) == fIntermediateTensorFrequencyLookup.end() ) + fIntermediateTensorFrequencyLookup[it.second] = fIntermediateTensorFrequencyLookup[it.first]; + else { + // take the largest one + fIntermediateTensorFrequencyLookup[it.second] = std::max(fIntermediateTensorFrequencyLookup[it.second],fIntermediateTensorFrequencyLookup[it.first] ); + } + } + fIsInitialized = true; } @@ -646,29 +750,90 @@ void RModel::InitializeSubGraph(std::shared_ptr graph) { } +// Function to generate the code for declaring and initializing constant tensors +// This is for tensors which are not part of weight files and can be created from the Constant operator +template +std::string GenerateConstantTensorCode(const std::pair &t) +{ + std::stringstream strs; + std::string type = ConvertTypeToString(t.second.type()); + size_t length = ConvertShapeToLength(t.second.shape()); + // avoid using stack sizes for constant tensors to reduce compilation time + // also for weights which can be broadcasted do not use stack but allocate as a std::vector + bool allocateOnStack = (length > 100 || t.second.IsWeightTensor()) ? false : true; + + const T *data = t.second.data(); + + // and check if all values are the same + bool sameData = false; + + // for non stack allocation check if data are the same + if (!allocateOnStack && length > 1) { + size_t idx = 1; + do { + sameData = (data[idx] == data[idx - 1]); + idx++; + } while (sameData && idx < length); + } + if (allocateOnStack) { + strs << type << " fTensor_" << t.first << "[" << length << "] = " << ConvertValuesToString(length, data) << ";\n"; + strs << type << " * " << TensorMember(t.first) << " = fTensor_" + t.first + ";\n"; + } else { + strs << "std::vector<" << type << "> fTensor_" << t.first << " = "; + if (sameData) + strs << "std::vector<" << type << ">(" << length << ", " << ConvertValToString(data[0]) << ");\n"; + else { + strs << ConvertValuesToString(length, data) << ";\n"; + } + strs << type << " * " << TensorMember(t.first) << " = fTensor_" + t.first + ".data();\n"; + } + return strs.str(); +} + void RModel::GenerateInitializedTensorInfo() { if (!fInitializedTensors.empty()) - fGC += "// initialized tensors\n"; + fGC += "// initialized (weights and constant) tensors\n"; + // here are constant tensor or initialized ones which are not weights (e.g. int64_t tensors ) for (auto &i : fInitializedTensors) { if (i.second.IsNotWritable()) continue; - if (!fUseWeightFile || i.second.IsConstantTensor()) { + size_t length = ConvertShapeToLength(i.second.shape()); + if (!fUseWeightFile || i.second.IsConstantTensor() || !i.second.IsWeightTensor() || i.second.type() != ETensorType::FLOAT ) { if (i.second.type() == ETensorType::FLOAT) { + // check if NaN of Inf are inside tensor data + bool hasInfOrNaN = false; + const float *data = i.second.data(); + for (size_t idx = 0; idx < length; idx++) { + if (std::is_floating_point::value) { + if (std::isinf(data[idx]) || std::isnan(data[idx])) { + hasInfOrNaN = true; + break; + } + } + } + if (hasInfOrNaN) + AddNeededStdLib("limits"); fGC += GenerateConstantTensorCode(i); - fConstantTensorSize += ConvertShapeToLength(i.second.shape()) * 4; + fConstantTensorSize += length * sizeof(float); } else if (i.second.type() == ETensorType::INT64) { fGC += GenerateConstantTensorCode(i); - fConstantTensorSize += ConvertShapeToLength(i.second.shape()) * 8; + fConstantTensorSize += length * sizeof(int64_t); + } else if (i.second.type() == ETensorType::INT32) { + fGC += GenerateConstantTensorCode(i); + fConstantTensorSize += length * sizeof(int32_t); + } else if (i.second.type() == ETensorType::BOOL || i.second.type() == ETensorType::UINT8 ) { + fGC += GenerateConstantTensorCode(i); + fConstantTensorSize += length * sizeof(uint8_t); } + } else { // case of tensors which are read from a file - size_t length = ConvertShapeToLength(i.second.shape()); if (i.second.type() == ETensorType::FLOAT) { fGC += "std::vector fTensor_" + i.first + " = std::vector(" + std::to_string(length) + ");\n"; - fGC += "float * tensor_" + i.first + " = fTensor_" + i.first + ".data();\n"; - fWeightsTensorSize += ConvertShapeToLength(i.second.shape()) * 4; + fGC += "float * " + TensorMember(i.first) + " = fTensor_" + i.first + ".data();\n"; + fWeightsTensorSize += length * sizeof(float); } } } @@ -689,9 +854,10 @@ void RModel::GenerateIntermediateTensorInfo() { if (!fIntermediateTensorInfos.empty()) { std::string tensor_declaration_block = ""; for (auto &i : fIntermediateTensorInfos) { - if (i.second.type == ETensorType::BOOL) { + bool is_alias = (IsAliasTensor(i.first)); + if (i.second.type == ETensorType::BOOL && !is_alias) { tensor_declaration_block += "std::vector fTensor_" + i.first + " = std::vector(" + std::to_string(ConvertShapeToLength(i.second.shape)) + ");\n"; - tensor_declaration_block += "std::uint8_t * tensor_" + i.first + " = fTensor_" + i.first + ".data();\n"; + tensor_declaration_block += "std::uint8_t * " + TensorMember(i.first) + " = fTensor_" + i.first + ".data();\n"; continue; } bool is_extended = (fOptimizationLevel == OptimizationLevel::kExtended); @@ -700,25 +866,29 @@ void RModel::GenerateIntermediateTensorInfo() { bool not_in_output_names = (std::find(fOutputTensorNames.begin(), fOutputTensorNames.end(), i.first) == fOutputTensorNames.end()); - if ((not_in_freq_map && not_in_output_names) || (!not_in_freq_map && !is_extended && not_in_output_names)) { + if (((not_in_freq_map && not_in_output_names) || (!not_in_freq_map && !is_extended && not_in_output_names) ) && !is_alias) { size_t length = ConvertShapeToLength(i.second.shape); if (i.second.type == ETensorType::FLOAT) { tensor_declaration_block += "std::vector fTensor_" + i.first + " = std::vector(" + std::to_string(length) + ");\n"; - tensor_declaration_block += "float * tensor_" + i.first + " = fTensor_" + i.first + ".data();\n"; + tensor_declaration_block += "float * " + TensorMember(i.first) + " = fTensor_" + i.first + ".data();\n"; fOtherTensorSize += 4 * length; } else if (i.second.type == ETensorType::DOUBLE) { tensor_declaration_block += "std::vector fTensor_" + i.first + " = std::vector(" + std::to_string(length) + ");\n"; - tensor_declaration_block += "double * tensor_" + i.first + " = fTensor_" + i.first + ".data();\n"; + tensor_declaration_block += "double * " + TensorMember(i.first) + " = fTensor_" + i.first + ".data();\n"; fOtherTensorSize += 8 * length; } else if (i.second.type == ETensorType::INT64) { tensor_declaration_block += "std::vector fTensor_" + i.first + " = std::vector(" + std::to_string(length) + ");\n"; - tensor_declaration_block += "int64_t * tensor_" + i.first + " = fTensor_" + i.first + ".data();\n"; + tensor_declaration_block += "int64_t * " + TensorMember(i.first) + " = fTensor_" + i.first + ".data();\n"; fOtherTensorSize += 8 * length; } } + if (is_alias) { + tensor_declaration_block += ConvertTypeToString(i.second.type) + " * " + TensorMember(i.first) + " = nullptr;\n"; + } + } if (tensor_declaration_block.length()) { @@ -729,17 +899,10 @@ void RModel::GenerateIntermediateTensorInfo() { if (!fDynamicTensorInfos.empty()) { fGC += "//--- declare the dynamic tensors\n"; for (auto &i : fDynamicTensorInfos) { - if (i.second.type == ETensorType::FLOAT) { - fGC += "std::vector fTensor_" + i.first + ";\n"; - fGC += "float * tensor_" + i.first + " = nullptr;\n"; - } else if (i.second.type == ETensorType::DOUBLE) { - fGC += "std::vector fTensor_" + i.first + ";\n"; - fGC += "double * tensor_" + i.first + " = nullptr;\n"; - } else if (i.second.type == ETensorType::INT64) { - fGC += "std::vector fTensor_" + i.first + ";\n"; - fGC += "int64_t * tensor_" + i.first + " = nullptr;\n"; - } + fGC += ConvertTypeToString(i.second.type) + " * " + TensorMember(i.first) + " = nullptr;\n"; } + fGC += "//--- dynamic tensors pool\n"; + fGC += "std::vector fDynamicMemoryPool;\n"; } } @@ -757,17 +920,160 @@ void RModel::GenerateOperatorDeclarations() { void RModel::GenerateDynamicTensorInfo() { + // generate code for allocating dynamic tensors using the greedy memory allocations + if (fDynamicTensorInfos.empty()) + return; + + if (fVerbose) { + std::cout << "generating code for dynamic tensor management" << std::endl; + PrintDynamicTensors(); + } + std::stringstream out; + out << "// dynamic tensor memory management\n"; + out << SP << "std::vector dynamicTensorInfos;\n"; + out << SP << "dynamicTensorInfos.reserve(" << fDynamicTensorInfos.size() << ");\n"; + + // loop on all the operators to find begin/end life of the tensors + int op_index = 0; + std::vector> tensors; + tensors.reserve(fDynamicTensorInfos.size()); + for (auto & op : fOperators) { + // loop on output tensors - + for (auto &it : op->GetOpOutputTensors()) { + if (fVerbose) { + auto op_ptr = op.get(); + std::cout << "Looping on operator " << op_index << " " << typeid(*op_ptr).name() << std::endl; + } + // check if is a dynamic tensor and not an alias tensor or output tensor + std::string name = std::string(it); + if ( fDynamicTensorInfos.find(name) != fDynamicTensorInfos.end() && !IsAliasTensor(name) + && std::find(fOutputTensorNames.begin(), fOutputTensorNames.end(), name) == fOutputTensorNames.end()) { + auto tensor_size = ConvertDimShapeToLength(GetDimTensorShape(name)); + auto type = GetTensorType(name); + size_t type_size = GetTypeSize(type); + int begin = op_index; + int end = fOperators.size(); + // look for end + auto it_lookup = fIntermediateTensorFrequencyLookup.find(name); + if (it_lookup != fIntermediateTensorFrequencyLookup.end()) + end = it_lookup->second + 1; // end is last time used + 1 + // // some tensors (like xcol in convolutions) are just used within the operators + // if (end == 0 && begin > 0) end = begin+1; + + if (begin> end) { + std::cout << "op " << op_index << "tensor_" << name << " begin " << begin << " " << " end " << end << std::endl; + throw std::runtime_error("TMVA-SOFIE: RModel::GenerateDynamicTensorInfo: tensor_" + name + " has end before begin"); + } + + // write in code + out << SP << "dynamicTensorInfos.push_back( {" << begin << ", " << end << ", " << type_size << "* (" << tensor_size << ") });" + << " // tensor_" << name << std::endl; + tensors.push_back({name,type}); + } + } + op_index++; // increment operator index + } + out << "\n" << SP << "auto memory_result = OrganizeMemory(dynamicTensorInfos);\n\n"; + out << "// allocating now the memory\n"; + out << SP << "fDynamicMemoryPool = std::vector(memory_result.total_bytes);\n"; + out << SP << "int idx = 0;\n"; + for (auto & it : tensors) { + out << SP << "tensor_" << it.first << " = reinterpret_cast<" << ConvertTypeToString(it.second) << " *>(fDynamicMemoryPool.data() + memory_result.offsets[idx++]);\n"; + } + // check that all dynamic tensors are covered + bool missingTensor = false; for (auto &i : fDynamicTensorInfos) { - auto length = ConvertDimShapeToLength(i.second.shape); - out << SP << "if (" << length << " > 0) {\n"; - out << SP << SP << "fTensor_" << i.first << ".resize(" << length << ");\n"; - out << SP << SP << "tensor_" << i.first << " = fTensor_" << i.first << ".data();\n"; - out << SP << "}\n"; + if (IsAliasTensor(i.first)) continue; + if (std::find(fOutputTensorNames.begin(), fOutputTensorNames.end(), i.first) != fOutputTensorNames.end()) continue; + if (std::find(tensors.begin(), tensors.end(), std::pair{i.first, i.second.type}) == tensors.end()) { + std::cout << "Dynamic tensors " << i.first << " is not in list of operator input/output " << std::endl; + missingTensor = true; + } } + if (missingTensor) + throw std::runtime_error("TMVA-SOFIE: RModel::GenerateDynamicTensorInfo - some tensors are not in input/output list"); + fGC += out.str(); } +/// Check if a given parameter is used for the shape of an input tensor. +bool RModel::IsInputTensorShapeParam(std::string const ¶mName) const +{ + for (auto &name : fInputTensorNames) { + if (IsDimInputTensor(name)) { + auto shape = GetDynamicTensorShape(name); + for (auto &d : shape) { + if (d.param == paramName) + return true; + } + } + } + return false; +} + +/// Collects all identifiers starting with "tensor_" in the input code, +/// provided that the occurrence is not immediately preceded by a +/// character that is valid in a C++ identifier. Excludes input and output tensor names. +/// Returns a deduplicated std::vector. +std::vector RModel::CollectTensorMemberNames(const std::string &input) +{ + const std::string target = "tensor_"; + + std::vector result; + + for (size_t i = 0; i < input.size();) { + + bool doCollect = false; + + if (i + target.size() <= input.size() && input.compare(i, target.size(), target) == 0 && + (i == 0 || !IsIdentifierChar(input[i - 1]))) { + + doCollect = true; + + std::size_t j = i + target.size(); + + // Extend to full identifier + while (j < input.size() && IsIdentifierChar(input[j])) + ++j; + + std::string fullName = input.substr(i, j - i); + + // Exclude input tensor names + for (std::string const &name : fInputTensorNames) { + if (fullName == target + name) { + doCollect = false; + break; + } + } + + // Exclude output tensor names + if (doCollect) { + for (std::string const &name : fOutputTensorNames) { + if (fullName == target + name) { + doCollect = false; + break; + } + } + } + + if (doCollect) { + result.push_back(fullName); + } + + i = j; // advance past the identifier + } else { + ++i; + } + } + + // Deduplicate (order not preserved) + std::sort(result.begin(), result.end()); + result.erase(std::unique(result.begin(), result.end()), result.end()); + + return result; +} + std::string RModel::GenerateInferSignature(bool isdecl) { // generate the infer signature given the inputs: eg. "float * tensor1, float * tensor2" // if (decl = false) generate only calling signature (tensor1,tensor2,....) @@ -803,6 +1109,26 @@ std::string RModel::GenerateInferSignature(bool isdecl) { return rGC; } +namespace { + +std::string typeForOutput(ETensorType t) { + // The std::vector is a special type that is not wrapping continuous memory. + // We don't want to use it as a return type. + if (t == ETensorType::BOOL) t = ETensorType::UINT8; + return ConvertTypeToString(t); +} + +std::string memberNameForDimShape(std::string name) +{ + if (!name.empty()) { + name[0] = std::toupper(static_cast(name[0])); + } + name = "f" + name; + return name; +} + +} + void RModel::GenerateOutput() { size_t outputSize = fOutputTensorNames.size(); @@ -813,7 +1139,7 @@ void RModel::GenerateOutput() ETensorType eFirstOutputType = GetTensorType(*fOutputTensorNames.begin()); fGC += "\n\n"; if (outputSize == 1) { - fGC += "std::vector<" + ConvertOutputTypeToString(eFirstOutputType) + ">"; + fGC += "std::vector<" + typeForOutput(eFirstOutputType) + ">"; } else { // if all output types are the same we return an std::vector - otherwise a tuple for (std::string const &name : fOutputTensorNames) { @@ -821,11 +1147,11 @@ void RModel::GenerateOutput() sameOutputTypes = false; } if (sameOutputTypes) - fGC += "std::vector>"; + fGC += "std::vector>"; else { inferReturnType = "std::tuple<"; for (size_t i = 0; i < outputSize; i++) { - inferReturnType += "std::vector<" + ConvertOutputTypeToString(GetTensorType(fOutputTensorNames[i])) + ">"; + inferReturnType += "std::vector<" + typeForOutput(GetTensorType(fOutputTensorNames[i])) + ">"; if (i < outputSize - 1) inferReturnType += ","; } @@ -840,13 +1166,74 @@ void RModel::GenerateOutput() if (!doInferArgs.empty()) doInferArgs += ","; for (std::string const &name : fOutputTensorNames) { - fGC += SP + "std::vector<" + ConvertOutputTypeToString(GetTensorType(name)) + " > output_tensor_" + name + ";\n"; - doInferArgs += " output_tensor_" + name + ","; + bool isDynamic = fDynamicTensorInfos.count(name) > 0; + std::string n; + if(!isDynamic) { + n = std::to_string(ConvertShapeToLength(GetTensorShape(name))); + } else { + std::string dimLen = ConvertDimShapeToLength(GetDynamicTensorShape(name)); + // Use the session member (fXxx) when any dim is a runtime-computed identifier + // (e.g. NonZero count). For expression-type dims derived from input shapes + // (e.g. "((W+-3)/2+1)"), use the expression directly. + // for input shape parameters we don't need to use the session member since it is passed as argument to the infer function and it is not a runtime computed value + bool hasRuntimeParam = false; + for (auto const &dim : GetDynamicTensorShape(name)) { + if (dim.isParam && IsIdentifier(dim.param) && !IsInputTensorShapeParam(dim.param)) + hasRuntimeParam = true; + } + n = hasRuntimeParam ? memberNameForDimShape(dimLen) : dimLen; + } + std::string outputName = "output_tensor_" + name; + fGC += SP + "std::vector<" + typeForOutput(GetTensorType(name)) + " > " + outputName + "(" + n + ");\n"; + doInferArgs += " " + outputName + ".data(),"; + if(isDynamic) { + for (auto const &dim : GetDynamicTensorShape(name)) { + if (dim.isParam && !IsInputTensorShapeParam(dim.param) && IsIdentifier(dim.param)) { + fGC += SP + "size_t " + dim.param + " = 0;\n"; + doInferArgs += " " + dim.param + ","; + } + } + } } if (!doInferArgs.empty()) doInferArgs.back() = ' '; - fGC += SP + "doInfer(" + doInferArgs + ");\n"; + // verifying if the dynamic parameters are within allowed range + std::unordered_set input_params_checked; + std::string dynamic_parameters_check = ""; + for (auto &name : fInputTensorNames) { + if (IsDimInputTensor(name)) { + auto shape = GetDynamicTensorShape(name); + for (auto &d : shape) { + std::string pName = d.param; + if (d.isParam && input_params_checked.count(pName) == 0) { + std::string memberName = memberNameForDimShape(d.param); + dynamic_parameters_check += d.param + " > " + memberName + " || "; + input_params_checked.insert(pName); + fGC += SP + "if (" + d.param + " > " + memberName + ") {\n"; + fGC += SP + SP + "throw std::runtime_error(\"TMVA-SOFIE: dynamic input tensor shape parameter " + + d.param + " exceeds the initialized maximum allowed shape.\");\n"; + fGC += SP + "}\n"; + } + } + } + } + + if (fUseSession) { + fGC += SP + "doInfer(*this, " + doInferArgs + ");\n"; + } else { + fGC += SP + "doInfer(" + doInferArgs + ");\n"; + } + + // If the output tensors have dynamic sizes, now is the time to set them + for (std::string const &name : fOutputTensorNames) { + bool isDynamic = fDynamicTensorInfos.count(name) > 0; + if (isDynamic) { + std::string outputName = "output_tensor_" + name; + auto tensor_size = ConvertDimShapeToLength(GetDimTensorShape(name)); + fGC += SP + outputName + ".resize(" + tensor_size + ");\n"; + } + } fGC += SP + "return {"; for (size_t i = 0; i < fOutputTensorNames.size(); i++) { @@ -860,23 +1247,43 @@ void RModel::GenerateOutput() void RModel::GenerateSessionCode() { + std::string sessionName = !fIsSubGraph ? "Session" : "Session_" + fName; + + if (fUseSession && !fIsGNNComponent) { + // forward declare session struct + fGC += "struct " + sessionName + ";\n"; + } + // Determine the signature of the actual inference function std::string doInferSignature = GenerateInferSignature(); if (!doInferSignature.empty()) doInferSignature += ", "; for (auto const &name : fOutputTensorNames) { - doInferSignature += " std::vector<" + ConvertOutputTypeToString(GetTensorType(name)) + "> &output_tensor_" + name + ","; + bool isDynamic = fDynamicTensorInfos.count(name) > 0; + doInferSignature += typeForOutput(GetTensorType(name)) + " *tensor_" + name + ","; + if(isDynamic) { + for (auto const &dim : GetDynamicTensorShape(name)) { + if (dim.isParam && !IsInputTensorShapeParam(dim.param) && IsIdentifier(dim.param)) + doInferSignature += " size_t &" + dim.param + "_output,"; + } + } } doInferSignature.back() = ' '; - doInferSignature = "void doInfer(" + doInferSignature + ")"; + if (fUseSession) { + doInferSignature = sessionName + " const &session, " + doInferSignature; + } + + doInferSignature = "inline void doInfer(" + doInferSignature + ")"; + + if (!fIsGNNComponent) { + // forward declare inference implementation + fGC += doInferSignature + ";\n"; + } // define the Session struct (for GNN this is generated in RModel_GNN) if (fUseSession && !fIsGNNComponent) { - if (!fIsSubGraph) - fGC += "struct Session {\n"; - else - fGC += "struct Session_" + fName + " {\n"; + fGC += "struct " + sessionName + " {\n"; } // generate code for declaring the initialized tensors @@ -913,7 +1320,15 @@ void RModel::GenerateSessionCode() // generate code for declarations of some specific operators GenerateOperatorDeclarations(); - + // storing the parameters for future checking to avoid mismatches + if (!fDimShapeNames.empty()) { + fGC += "\n// dynamic shape parameters\n"; + auto dimShapeNames = fDimShapeNames; + std::sort(dimShapeNames.begin(), dimShapeNames.end()); + for (const auto &p : dimShapeNames) { + fGC += "size_t " + memberNameForDimShape(p) + ";\n"; + } + } // add subgraph session if (!fSubGraphs.empty()) fGC += "// subgraph sessions\n"; @@ -923,9 +1338,6 @@ void RModel::GenerateSessionCode() // Generate code for Session constructor if (fUseSession) { - std::string sessionName = "Session"; - if (fIsSubGraph) - sessionName += "_" + fName; // add here specific operator code that needs to define session data members fGC += "\n"; for (size_t id = 0; id < fOperators.size(); id++) { @@ -951,6 +1363,7 @@ void RModel::GenerateSessionCode() // add initialization of shape parameters // assume all parameters are of type size_t if (!fDimShapeNames.empty()) { + // need to use same order as in infer function not alphabetical one for (auto &p : fDimShapeNames) { fGC += ",\n"; fGC += " size_t " + p + " = " + fShapeParams[p]; @@ -958,6 +1371,17 @@ void RModel::GenerateSessionCode() } fGC += ") {\n"; + // initializing dynamic parameters + if (!fDimShapeNames.empty()) { + fGC += "\n\n"; + std::sort(fDimShapeNames.begin(), fDimShapeNames.end()); + for (const auto &p : fDimShapeNames) { + fGC += " " + memberNameForDimShape(p) + " = " + p + ";\n"; + } + } + // add some extra code needed for initialization of dynamic parameters + fGC += fExtraCodeForDimShapes; + if (fUseWeightFile) { fGC += "\n//--- reading weights from file\n"; ReadInitializedTensorsFromFile(fReadPos); @@ -976,7 +1400,17 @@ void RModel::GenerateSessionCode() fGC += "}\n\n"; } - fGC += doInferSignature + "{\n"; + // generate the inference overload that returns an output struct + GenerateOutput(); + + // end of session + if (fUseSession && !fIsGNNComponent) { + fGC += "}; // end of Session\n\n"; + + GenerateRequiredInputTensorInfo(); + } + + fGC += doInferSignature + " {\n"; fGC += "\n"; // generate the inference code @@ -986,32 +1420,47 @@ void RModel::GenerateSessionCode() if (fOutputTensorNames.size() == 0) throw std::runtime_error("TMVA-SOFIE: output size=0 are not supported"); + std::string allOperatorCode; + for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) { if (fVerbose) std::cout << "Generating code for operator .... " << op_idx << std::endl; - fGC += (fOperators[op_idx]->Generate(std::to_string(op_idx))); + std::string operatorCode = fOperators[op_idx]->Generate(std::to_string(op_idx)); + allOperatorCode += operatorCode; } - fGC += SP + "using SOFIE::UTILITY::FillOutput;\n\n"; + // If the generated code users members of the session struct, use the + // local variable name that we're using for the session: + ReplaceAll(allOperatorCode, "this->", "session."); - for (std::string const &name : fOutputTensorNames) { - // need to check is size is the same (don't want to return a vector with - // larger size) in that case better to copy - bool isIntermediate = fIntermediateTensorInfos.count(name) > 0; - std::string n = isIntermediate ? std::to_string(ConvertShapeToLength(GetTensorShape(name))) - : ConvertDimShapeToLength(GetDimTensorShape(name)); - fGC += SP + "FillOutput(tensor_" + name + ", output_tensor_" + name + ", " + n + ");\n"; + if (fUseSession && !fIsGNNComponent) { + // Collect all "tensor_*" data members that are not input or output tensors + std::vector tensorMemberNames = CollectTensorMemberNames(allOperatorCode); + for (auto const& name: tensorMemberNames) { + fGC += " auto &" + name + " = session." + name + ";\n"; + } + fGC += "\n"; } - fGC += "}\n\n"; - - // generate the inference overload that returns an output struct - GenerateOutput(); + fGC += allOperatorCode; - // end of session - if (fUseSession && !fIsGNNComponent) { - fGC += "}; // end of Session\n\n"; + for (auto const& name: fOutputTensorNames) { + bool isDynamic = fDynamicTensorInfos.count(name) > 0; + if(isDynamic) { + for (auto const &dim : GetDynamicTensorShape(name)) { + if (dim.isParam && !IsInputTensorShapeParam(dim.param) && IsIdentifier(dim.param)) + fGC += " " + dim.param + "_output = " + dim.param + ";\n"; + } + } + if(IsConstantTensor(name)) { + std::string t = "session.tensor_" + name; + size_t length = ConvertShapeToLength(fInitializedTensors[name].shape()); + fGC += " std::copy(" + t + ", " + t + " + " + std::to_string(length) + ", tensor_" + name + ");\n"; + } } + fGC += "\n"; + + fGC += "}\n"; } void RModel::Generate(std::underlying_type_t options, int batchSize, long pos, bool verbose) @@ -1046,6 +1495,13 @@ void RModel::Generate(std::underlying_type_t options, int batchSize, lo // initialize the model including all operators and sub-graphs Initialize(batchSize, verbose); + // if having dynamic tensor we need to have a Session + if (!fDynamicTensorInfos.empty()) { + fUseSession = true; + if (verbose) + std::cout << "Warning: Force having a Session since model has dynamic tensors " << std::endl; + } + std::string hgname; if (!fIsGNNComponent && !fIsSubGraph) { fGC.clear(); @@ -1067,7 +1523,7 @@ void RModel::Generate(std::underlying_type_t options, int batchSize, lo GenerateSessionCode(); if (!fIsGNNComponent && !fIsSubGraph) { - fGC += ("} //SOFIE_" + fName + "\n"); + fGC += ("} //TMVA_SOFIE_" + fName + "\n"); fGC += "\n#endif // " + hgname + "\n"; } } @@ -1075,7 +1531,9 @@ void RModel::Generate(std::underlying_type_t options, int batchSize, lo void RModel::ReadInitializedTensorsFromFile(long pos) { // generate the code to read initialized tensors from a text data file if (fWeightFile == WeightFileType::Text) { - if (fInitializedTensors.empty()) return; + // check if there are tensors to write + + if (!fUseWeightFile) return; fGC += " std::ifstream f;\n"; fGC += " f.open(filename);\n"; @@ -1087,7 +1545,7 @@ void RModel::ReadInitializedTensorsFromFile(long pos) { fGC += " f.seekg(" + std::to_string(pos) + ");\n"; } - fGC += " using SOFIE::ReadTensorFromStream;\n"; + fGC += " using TMVA::Experimental::SOFIE::ReadTensorFromStream;\n"; // loop on tensors and parse the file for (auto& i: fInitializedTensors) { @@ -1098,7 +1556,7 @@ void RModel::ReadInitializedTensorsFromFile(long pos) { std::string length = std::to_string(ConvertShapeToLength(i.second.shape())); fGC += " ReadTensorFromStream(f, " + tensor_name + ", \"" + tensor_name + "\", " + length + ");\n"; } else { - std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be read from a file"); + throw std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be read from a file"); } } fGC += " f.close();\n"; @@ -1133,7 +1591,7 @@ void RModel::ReadInitializedTensorsFromFile(long pos) { fGC += " fTensor_" + i.first + " = *reinterpret_cast*>(rootFile->Get(\""; fGC += dirName + "/" + tensor_name + "\"));\n"; } else { - std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be read from a ROOT file"); + throw std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be read from a ROOT file"); } fGC += " }\n"; } @@ -1201,7 +1659,7 @@ long RModel::WriteInitializedTensorsToFile(std::string filename) { outputDir->WriteObjectAny(&tensorDataVector, "std::vector", tensorName.c_str()); } else { - std::runtime_error("tmva-sofie tensor " + tensorName + " with type " + ConvertTypeToString(item.second.type()) + + throw std::runtime_error("tmva-sofie tensor " + tensorName + " with type " + ConvertTypeToString(item.second.type()) + " cannot be written to a ROOT file"); } } @@ -1238,15 +1696,21 @@ long RModel::WriteInitializedTensorsToFile(std::string filename) { // round to zero sub-normal values float value = data[idx]; if (value != 0. && std::abs(value) < std::numeric_limits::min() ) value = 0; - f << std::setprecision(std::numeric_limits::max_digits10) << value; + // handle non-finite values explicitly + if (std::isinf(value)) + f << (value > 0 ? "inf" : "-inf"); + else if (std::isnan(value)) + f << "nan"; + else + f << std::setprecision(std::numeric_limits::max_digits10) << value; f << ( (idx < length-1) ? " " : "\n" ); } } else { - std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be written to a file"); + throw std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be written to a file"); } if (f.fail()) - std::runtime_error("tmva-sofie failed to write tensor data to file for " + tensor_name); + throw std::runtime_error("tmva-sofie failed to write tensor data to file for " + tensor_name); } long curr_pos = f.tellp(); f.close(); @@ -1256,7 +1720,104 @@ long RModel::WriteInitializedTensorsToFile(std::string filename) { } } -void RModel::PrintRequiredInputTensors() { +void RModel::PrintSummary() const { + std::cout << "Summary of model " << GetName() << std::endl; + for(size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx){ + auto& r = *fOperators[op_idx].get(); + std::string raw_name = typeid(r).name(); + // look for ROperator_NAME + std::string name = raw_name.substr(raw_name.find("ROperator_")+10, raw_name.size()); + std::cout << op_idx << " " << name << " : "; + for (auto & t_in : r.GetOpInputTensors()) std::cout << t_in << " "; + std::cout << " ----> "; + for (auto & t_out : r.GetOpOutputTensors()) std::cout << t_out << " "; + std::cout << std::endl; + } +} + +/// To emit the dimensions of the input tensors as a data member of a session, +/// which is helpful when validating the inference inputs. +void RModel::GenerateRequiredInputTensorInfo() +{ + fGC += "\n// Input tensor dimensions\n"; + fGC += "using TMVA::Experimental::SOFIE::SingleDim;\n"; + fGC += "using TMVA::Experimental::SOFIE::TensorDims;\n"; + fGC += "using TMVA::Experimental::SOFIE::makeDims;\n\n"; + bool hasDynamicInputTensors = false; + + for (std::size_t iInput = 0; iInput < fInputTensorNames.size(); ++iInput) { + auto const &name = fInputTensorNames[iInput]; + if (IsDimInputTensor(name)) { + hasDynamicInputTensors = true; + } + std::vector shape = GetDimTensorShape(name); + fGC += "constexpr std::array dim_" + name + "{"; + for (std::size_t iDim = 0; iDim < shape.size(); ++iDim) { + auto const &dim = shape[iDim]; + if (dim.isParam) { + fGC += "SingleDim{\"" + dim.GetVal() + "\"}"; + } else { + fGC += "SingleDim{" + dim.GetVal() + "}"; + } + if (iDim != shape.size() - 1) { + fGC += ", "; + } + } + fGC += "};\n"; + } + fGC += "\nconstexpr std::array inputTensorDims{\n"; + for (std::size_t iInput = 0; iInput < fInputTensorNames.size(); ++iInput) { + auto const &name = fInputTensorNames[iInput]; + fGC += SP + "makeDims(dim_" + name + ")"; + if (iInput == fInputTensorNames.size() - 1) { + fGC += "\n"; + } else { + fGC += ",\n"; + } + } + fGC += "};\n"; + + fGC += + "\nconstexpr bool hasDynamicInputTensors{" + std::string{hasDynamicInputTensors ? "true" : "false"} + "};\n\n"; + + fGC += "\n// Output tensor dimensions\n"; + bool hasDynamicOutputTensors = false; + for (std::size_t iOutput = 0; iOutput < fOutputTensorNames.size(); ++iOutput) { + auto const &name = fOutputTensorNames[iOutput]; + if (IsDynamicTensor(name)) { + hasDynamicOutputTensors = true; + } + std::vector shape = GetDimTensorShape(name); + fGC += "constexpr std::array dim_" + name + "{"; + for (std::size_t iDim = 0; iDim < shape.size(); ++iDim) { + auto const &dim = shape[iDim]; + if (dim.isParam) { + fGC += "SingleDim{\"" + dim.GetVal() + "\"}"; + } else { + fGC += "SingleDim{" + dim.GetVal() + "}"; + } + if (iDim != shape.size() - 1) { + fGC += ", "; + } + } + fGC += "};\n"; + } + fGC += "\nconstexpr std::array outputTensorDims{\n"; + for (std::size_t iOutput = 0; iOutput < fOutputTensorNames.size(); ++iOutput) { + auto const &name = fOutputTensorNames[iOutput]; + fGC += SP + "makeDims(dim_" + name + ")"; + if (iOutput == fOutputTensorNames.size() - 1) { + fGC += "\n"; + } else { + fGC += ",\n"; + } + } + fGC += "};\n"; + fGC += + "\nconstexpr bool hasDynamicOutputTensors{" + std::string{hasDynamicOutputTensors ? "true" : "false"} + "};\n\n"; +} + +void RModel::PrintRequiredInputTensors() const { std::cout << "Model requires following inputs:\n"; for (auto& inputInfo: fInputTensorInfos) { std::cout << "Parametrised Tensor name: " << inputInfo.first << "\t"; @@ -1286,7 +1847,7 @@ void RModel::PrintRequiredInputTensors() { std::cout << "\n"; } -void RModel::PrintInitializedTensors() { +void RModel::PrintInitializedTensors() const { std::cout << "Model initialized the following tensors:\n"; for (auto& it: fInitializedTensors) { std::cout << "Tensor name: \"" << it.first << "\"\t"; @@ -1298,13 +1859,13 @@ void RModel::PrintInitializedTensors() { } std::cout << "]"; if (it.second.IsConstantTensor()) std::cout << " (Constant)"; - else if (!it.second.IsWeightTensor()) std::cout << " (Not Writable)"; + if (it.second.IsNotWritable()) std::cout << " (Not Writable)"; std::cout << std::endl; } std::cout << "\n"; } -void RModel::PrintIntermediateTensors() { +void RModel::PrintIntermediateTensors() const { std::cout << "Model specify the following intermediate tensors:\n"; for (auto& it: fIntermediateTensorInfos) { std::cout << "Tensor name: \"" << it.first << "\"\t"; @@ -1319,7 +1880,7 @@ void RModel::PrintIntermediateTensors() { std::cout << "\n"; } -void RModel::PrintDynamicTensors() { +void RModel::PrintDynamicTensors() const { std::cout << "Model specify the following dynamic tensors:\n"; for (auto& it: fDynamicTensorInfos) { std::cout << "Tensor name: \"" << it.first << "\"\t"; @@ -1334,14 +1895,16 @@ void RModel::PrintDynamicTensors() { std::cout << "\n"; } -void RModel::PrintOutputTensors() { +void RModel::PrintOutputTensors() const { std::cout << "Model specify the following output tensors:\n"; for (auto& it: fOutputTensorNames) { std::cout << "Tensor name: \"" << it << "\"\t"; - if (!IsDynamicTensor(it)) - std::cout << "shape: " << ConvertShapeToString(GetTensorShape(it)) << std::endl; - else - std::cout << "shape: " << ConvertDimShapeToString(GetDimTensorShape(it)) << std::endl; + try { + auto shape = GetDimTensorShape(it); + std::cout << "with shape: " << ConvertDimShapeToString(shape) << std::endl; + } catch (...) { + std::cout << "with shape not yet defined" << std::endl; + } } std::cout << "\n"; } diff --git a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx index 532470f..3a47d75 100644 --- a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx +++ b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx @@ -122,6 +122,10 @@ #include "LayerNormScaleBias_FromONNX_GPU_ALPAKA.hxx" #include "LayerNorm3D_FromONNX_GPU_ALPAKA.hxx" +#include "IsInf_FromONNX_GPU_ALPAKA.hxx" +#include "IsNaN_FromONNX_GPU_ALPAKA.hxx" +#include "Clip_FromONNX_GPU_ALPAKA.hxx" +#include "Not_FromONNX_GPU_ALPAKA.hxx" #include #include @@ -2410,3 +2414,141 @@ TEST_F(SofieAlpakaTest, LayerNorm3D) for (size_t i = 0; i < 12; ++i) EXPECT_LE(std::abs(res_ptr[12 + i] - exp1[i]), TOLERANCE) << "row1 i=" << i; } + +TEST_F(SofieAlpakaTest, IsInf) +{ + // Input contains finite values, +inf, -inf; output is bool (uint8_t). + float pos_inf = std::numeric_limits::infinity(); + float neg_inf = -std::numeric_limits::infinity(); + std::vector input = {1.0f, pos_inf, neg_inf, 0.0f, -1.0f, 2.0f, neg_inf, pos_inf}; + const std::size_t N = input.size(); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{N})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < N; ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{N})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{N})); + + { + SOFIE_IsInf::Session session; + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + uint8_t* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + ASSERT_EQ(N, 8u); + for (size_t i = 0; i < N; ++i) + EXPECT_EQ(static_cast(res_ptr[i]), std::isinf(input[i])) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, IsNaN) +{ + // Input contains finite values, +inf, and NaN; output is bool (uint8_t). + float nan_val = std::numeric_limits::quiet_NaN(); + float pos_inf = std::numeric_limits::infinity(); + std::vector input = {1.0f, nan_val, 0.0f, pos_inf, nan_val, 2.0f, -1.0f, nan_val}; + const std::size_t N = input.size(); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{N})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < N; ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{N})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{N})); + + { + SOFIE_IsNaN::Session session; + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + uint8_t* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + ASSERT_EQ(N, 8u); + for (size_t i = 0; i < N; ++i) + EXPECT_EQ(static_cast(res_ptr[i]), std::isnan(input[i])) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, Clip) +{ + // Model clips to [-1.0, 1.0] (initializer constants). + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + constexpr float clip_min = -1.0f; + constexpr float clip_max = 1.0f; + + std::vector input = { + -2.0f, -1.5f, -1.0f, -0.5f, + 0.0f, 0.5f, 1.0f, 1.5f, + 2.0f, -0.3f, 0.7f, 1.2f + }; + const std::size_t N = input.size(); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{N})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < N; ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{N})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{N})); + + { + SOFIE_Clip::Session session; + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + ASSERT_EQ(N, 12u); + for (size_t i = 0; i < N; ++i) { + float expected = std::max(clip_min, std::min(clip_max, input[i])); + EXPECT_LE(std::abs(res_ptr[i] - expected), TOLERANCE) << "i=" << i; + } +} + +TEST_F(SofieAlpakaTest, Not) +{ + // Input and output are bool tensors (uint8_t on device). + std::vector input = {1, 0, 1, 1, 0, 0, 1, 0}; + const std::size_t N = input.size(); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{N})); + uint8_t* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < N; ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{N})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{N})); + + { + SOFIE_Not::Session session; + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + uint8_t* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + ASSERT_EQ(N, 8u); + for (size_t i = 0; i < N; ++i) + EXPECT_EQ(static_cast(res_ptr[i]), !static_cast(input[i])) << "i=" << i; +} diff --git a/src/SOFIE_core/test/input_models/Clip.onnx b/src/SOFIE_core/test/input_models/Clip.onnx new file mode 100644 index 0000000000000000000000000000000000000000..a91d748dc043b609a085efbf7e35fca0f56e46df GIT binary patch literal 148 zcmdz%qu7@;o?Zn$t;M^&CG+b5-Wt*@=HsAa!M@DKsg~!u!@q@;u0+( z0Y)Vj28M?HPH=5-F?*O;VnviN)N~;(E)EVxAr>ws4xS_tE~vT4qE0Ma3cPKc-z3m1a`F97(g6F>j} literal 0 HcmV?d00001 diff --git a/src/SOFIE_core/test/input_models/IsNaN.onnx b/src/SOFIE_core/test/input_models/IsNaN.onnx new file mode 100644 index 0000000000000000000000000000000000000000..d1a6e0569f1510a1c2c0330587060833b84589ef GIT binary patch literal 89 zcmdcPKc-z3m1a`F97i76CeNp literal 0 HcmV?d00001 diff --git a/src/SOFIE_core/test/input_models/Not.onnx b/src/SOFIE_core/test/input_models/Not.onnx new file mode 100644 index 0000000000000000000000000000000000000000..b29ca991d1ea1a97837bde89ae185d7d3c182715 GIT binary patch literal 85 zcmderC*CLbNQz5vNZ2KDEhL8*MbP@Vl1B=<%2>Ry)y@6 zJe;xC^*B1^TtL~hd^)n`03>g@iJ@nnlh&eWnj_f@@@r3^VC8MB&wdA4HBXUUy$_93 zSnw{=q0i5Bh>W-ivCr=zKca>5XOAJg)dx{L-vw2T^D%!#J~lY}fhhW_vMK01SZL0m z>ZO42#;g$L7Y{*?A?GkV<}&227>2}Y42CW0MERoLXcjXLQYXhjen%Wt)2nFsZ!88W zs-Ptz8Fe97i6x{^(U~}ESuhp^f4Zan-XWlMSb!6_t}Cb}zY_Xd-=l$(O@VhK#KCx8 zZx93oLcfl4Aa+;?yv#&Y$oJ63xFm4S`a@$aJyDQj43e;`!pyROBL!T=%Ye2j<)BV0g>Thk~>WXH1P*WqI#0*^G|Wu zP8nK6HK0*aCrToUg}!_f(4N_at&w3QHqjWY);6P6`V|xhtR+8B?<)cNw21lWFQxVqg{sOJ#CrPDKE3U111hsb`W5eD&w2YF1Xi700y!RO! zj{k=|huuQ;u0EtQsS#w?r(x=~?`YZwsc7m-DsC+%BA>s?GN&W>ecN5kjo@m}m`J)| z!F4Dx+k>I=i>ZjI5!!4VN~WoYqk;TfdR_Q^u8FI5r^NS zk71}sPfT-_p{wzAjJA4-4eeH_F{-EXSG!5QE*aM5D$%IRQ52W-fbc%+G2+rA$l7%r z+AiNjN$EEXz55EZQ(RD{`%dIzW0cy#_aT1MYpB|sg=Fp)aH>8F4N?1{en}KcGxk7| zwi{?h#4Gih&x54Z8_k@iV)W?WSotO&JnsDm`OdQ-@7gO|Xmtb~j(0+_ojaNglw;b1 zYSfs01^(F)*goMf+FyJD8PjEuJo6UXb3MUlb^$7eSP?})Bej3VKP)F4`Ar4!@9vgAVvBEnuIEB zy}FZrmw$(-`C(8o{V=4bE`yejC(zuw0PI%}0`LB*nEB{F1lo-NW1o}QzT6Yb_ebM| z5s$#3R)8egxDQ+XPwJeqxvEXnwq)baZAEREwEsC%SJ)@uq+&G|1Tr+H9_5w!K~?s;jD3G z4YHN<*_=CmY~9nfZ12~8%#ym@jPC0eMpJZ%8sv^;O5(<_*)fDs%x(i8RW?X{HZVb9 zR*a@l3UaS)D3PvUb+;v~nX4fq@|%K9Jzhg(ygMtseoxqLWX5V13ee19FI<|nk;w`v z1eq$48ehH$^8AqyQMim1=V>r-_8x32Ud(95y`~Lii`lB1My$HXm{IrM!Fro-Wy(G8 zVDz`)OhMyXHtEMIh~2q>Io^95OTGlL;(z{Qd3h<+dc+R4^=}P^JPBaE+*uTb#VgmA zo3j1`JQ)KkCkUJr!Th>Cj?LuN!`l5@SN%&W)6tb|lIM7k9BZeNB0s|0 zu}qlRQ=iez^#QfnHJFvYijDkb#~d{6!AfpV#M&t;kZ&JFeC75S*w=@3aoNPOC+9Lw z6PB{H`)?F{)?!=NE-cUO2+g_S%4)VYU~I=n;CT#G^2hmMOIu&oZKocS+dB(KZkWWT zg$`qTXN_bV;+}wJua2r#z7#qR*C0D&JW~q#zz+;!6Z;F;$iK6hj9Kms@AH8Ik1sA5 z-Cdt8{ihEj9ykVgp(mBn^a7GJ`8;G=&SOlMMS`~Z0hXPa&y?nD27zHX3FuX+G|=yr>i%{OH&gW5n`nMLe1uQ0=NH5>f*DQf0cP`{kjjA_Ss=2!Jn#(aei z9Ek@TleCVpJiZ1V1==yZ*RHI1^E5VP<`#xu#}e)4cnsb>iZT9p0`gXeGXB|nA^J`? zrd-~W@ok#STsqmC;kT!N!)_5{b8|Q=dD28OMfDi?oX6<)hCE*yohl)YQhM51d)j52sZCe2xC1{iIEO=Ou0A?R9#;YxzlIxOzXj>*1E9N zWC81aD1xb+_6tgJuSogtfb?gFGC%y_1 z{Ul8803#;mh%**cuVyUI2Vtw-H`1W%K*3%OE&Mc^jl6Hj*q{A}k?#3TReKwh;V&+t zdUrgnc)f^?IkkzY%3jWv@7m33)xID*_?@cMmzBOno>*PK4`MnWqW)YXCRf#swa#*6 z(lffSrrOnv)tP~e{LF4f>fu1;bHi!q&|zSs?1mNR`7Bv-2)xIxW3wY17}X#FnDXYK zv~eiQ8+eZB_c(%@?on{ua40KTT|~)aD<+ejCR~PZp!O-Bi}5B%kJ_y0WhCo?-r?>*%=W3&afX4XSUa zQ2yl~h)J8ks&C{$VJ~Z@+Q%CdqU~&XdJFQTrbBp%ZTK1w3|uVG!LA|34lL-E>kN|g6GU#Y%7}!6~m`9f^}agukl)e_V;u& zs!j#-enVMlRygXL+yU9#-)R4-1QfC?>a|6QmK%(~Y-=U(EpCwf@ZAhOM_8MQXYo?P zK-Nd6$L36wFeTx@YEt=3hSZkLeL0zRG^hmkW5!JW$7NWqYlUJ>D@633$mH7pgn&Q0 zS@H6(z-$@Mrg)vhdjENBplm#|WNsHW=7R&nOjyRs%LJ@QZv;_1?TeDtHmEtUnbJ`` z8E4b;5IVz_G1xMbb&Dy(gXb17{Q45=`FcHDakv}n^)Lnx`UJAhCp2*U@nA-g-<$dV z(vEF63uMAu{Fq?BNQ@Wq*=)le4DU@|0e@Z^Ro(W&nuB6S->EOFCebMP5lKDj7O>9u zzJgZ&0P@lUh^jB2QP*82Hho?a?axsRFLjxN@7xe<&rCmO#ZWQMqqA8H7BNm@xG%VUpo+Tj{7mvd4p-z*Gnk(*hS(N8Zx{Q z9}CzITUjPBh~eokDlm7S&3LZp$JneH$L2rW&6ehFWTmUlVEX1DmZvHbRy&IsY2PA{ zrd=hr!zZxTse_r3o`YFFZxIt5ZOcfnCktb%may65`?8kKd{$a_9u2mQL+z)L*mOq* zyjA}wbME(NQr5Y!`KHfM+UF<@lt-i1){iwauY&M{XHZh(2koXC*^oLfMk8*fzdpDy zT4Mn#jW0xN?84*^Sq>iiRRBf}^ez}|2CB9>r zijFW=;+>4KhO-#OjXNkwbO7Gx;ZT}B59L)unBvA$=x6W1_3II|-&lLbdd^AI_)G>x zA=fj+LmB!0GlUm@2raC4G5X71VZQTE2z)Yw;r;H<)W4Fj3zzR=vwu3Up(D4jmc>7y z(=mX#bbka}+~~u)?fV3dn*NOOUwh_})R#@)39Nw?!`NUREp=JN8U^m)>e6u#XFtc- zRaQ)C>SZLIrI;xJrr4?mdE*)#9&OpogzoCewwGu@Rvb?R*8dRAzm=qQYd@xP|8Tar z-+I>H&zKGD&donoE1AUU!&zJPCT8RtD@I;LQ6+dG)bTb!%yKFAUTDf%@2*4N>xs~G z*N&CFu>{raVQBb!220n@U}Jax1%W1zSj?NpDiS?F@}Ze{>4F*0FRK{dw;ZA4k==~` zoRO^4&ZR7G(?V8M9>l)y;>Z|$ZGyZ(zZa+XIWzkVAVl9FbdQQikTD(SbH$?|%KIYtxMuruS>+<{_d;osG44_2&jfa*u&~XKk!(0Y zTEAowenKEtY&T)uJ@|}#rWn$;i2+ z*z7InU0=jX*A~;ZhdxYDTQ5d>lOdOyS1}n@gspNK!+1TI$Et<3C^e|3mCp`>wAGXH zHxedzR2ncv+nMyoqnMiZscdgz#hNR&u;J(Z821pusCa7#PtnD}S#5(3r^m3;TR&-7 zm=SCBX)43tKa(|HIgBj>@@=vx6R^ zQU{~VMdA_H&4$-!0Pga|u)O zVH{&{cPG&OA5pOX5|vEZ0v_qR(F~R{QlkgNGfTi2EW3=7`_qxPVkH`6cVg?aAEf?| zfDMgIKwWtj1U3gUZqr6HS&hHYa!3&CG|GtWeW3>vm31HOPHNHCZYg6lV?FD3XdgA%yWxaAxhvXg0|5U$#UroVDJk1UHNB ztYFeQHpjW~ zGJ1rbfR+kN{V3J{9@3k){S*L_!UEz^A^eA+D%Z?>6{9y+QlKXnHKwb!xt z8*X;Zn8a33U(S?Fn7}F)HDYyt?w#-PMCqO2!l-MXQqhwiGdLCQVzK~H}orZqIg&9UAjD7x3Y&h!+mU}}mcufrW{2Pk0tCb}8+Bpo0 zd4SOgk3hH63OZENAj9JkwDySso^Z8Nl(1gt)ldoj_Fn_5U8itE{%I^-(oA!`Qqc8` z7K`mJV@#zU3Jz2%Z8Gl=`Lv-RpQ{4Z{C&!bg~zbfdl!u;ypQ6Fcoa<-iIGKFsP8id z#Z~&4SmBPDC4msQ_Yz6$DnYk3Gf;h01p%6uAhtP)Aq`US%a>!TVl4@~dI23HbWq?I z3*mRmux9*glp1y_E!SLwU+woGeCaGuoi3%C$oVL$nM!%QUAN-ieMeEBWkTtm0YZ&u z0XRRqLhHL;0;9H`AYIp@ls%nEO2gkm<-=60o0*9Mi!x>A-yhWSa0Qr8`;O)&wov)^ zE(Yq!Xh6#!3^cok6}O&XrAq)#mE8mFu^H&XZw4FJ6;v{B7=(VD1jWYtA+ur;N^{22 z+D}u^=$#?*M%55o2Mw67+kw(QEX05L1f?5u2=Dba2!=NB_p!oc-aZIQO~6^?1_TaY z05+@jXnHn6$B1vxz%xTllbqTdIZ0F{Cy13*8H#HsQj45q)a?360{`Bl_3vvjYgjgj z3g*+A9Z@j#?OzE0avC*#FJb2Qv!qyF4-VuT6s8|W?c+GKbDxc3&v;Ppn}rojGO8}U zRA#RIM2&62xcT@G6pu^*@gE=L&j?3(IV+U*Ya_dx9HP)jHVf& z9(x>zP5B75Za&yLx(N#lqmlTWMUfdxdGrM}vuy<9xndmI{T=e1ElH+RC$#P@L(Tdu zCHrX)Naj2x0`WdVSDnY8Hx(c<8Z9i5#(>=Fic(P&O{$%9X#QKyL%DDt(o+^gg?t~B zhH!op@xtnx=b+H(D~Nj)Qr_lq4x+BCP}Pu5jk6a+QXe_>5BnEYO8>_EC_IJqOXc&s)q_Tt?Z(t=REy2?#p%L3H>R6dZ2> zpAl_fewsnAib2S8{7TiwK2y_k>p`;R0uhbXBZ*rKvD!0{1pn4Tzu?0V^~M@RDjPJb zosZ?MerUk;WJzEdmOX1m*%0m-7(GVc<5Et~M}kiAmNHSPV5EBms!xf^us2uG=HWAt z`VCSh9^MH8vmaQV>4^F90?5oTLw(&|km)R_O-vm6^ta%?^L7vzcL+O1>M-|^1(;Q) zV6o3QRIEOZP5N)3&IB>Kt^tB=zhI_e8ftVU!Zsy$);-wrhDBZ1`3 zkErFJBf6fGAlEn>#HP+54LlE;8IP$%okO)3ZX=Jq=b#EN5=z5g5OGBr&5VjCHu)JS z%{(Ct-D3m`EuQ0q!?Bog`3M%SDgpz~P0%vy21r9$Qo2z_csKTv@4L#-Vq7As&+#Gm z#zauHZbnI+Tv)&8G$i`?Lg+p>$T9kea-U}EoM!+UkL$`%gKCm~D+3EZ%OEhY7X%(n zLbYimW~Nj?X23^kza<%MQfE`Q2~`mD?9IW6;CBM0bGnh}{VK@p zf1C;$HOfxc6JW9}6G}5r64$v};Jdbh;m*ep;}?Lw#$}|qRR|s#b=a|>2?PcSz|Of1 z<}(k1WEE1=GNgge4$`K>PWY(zLC{XDAq(9;f`s>%T;j7}?OBASUF$J;*LQ4P|BP5w zXJT14;-#=NP(3LPq;xNlubqtgGdPd@bGFdRMTvYSfzUoGl--7;rq}=F%6SKGtlho5Y%@S zp_ccK<|?YtMI!~ioj0{~?TP+(!!YW(K6?DECuKdJgWS*@qHrfv$U0DZY>&`5?Frg2 zrbvIkL$}pEL1ZyTXxdqgl1T!pC;9>9WCL1eouT{>MOb`#6?h~MK<&5gp#3iwU12Nc z?Ys}7(H|iyHXK8f|AE^2KVT3Q1fsb|h3a`fsOz{7Xmz^)%)9RgpC%*FA3Ox3$6i9s zSu@P<|BZCG_5$A6Bf?839->EcESM-Sb9wg)j2U~F1{w|^(r4X-P8Tas6?}twH!6W& zQ%n6P{RDp57l``v22|6Vl!FgFz;K88VA}MImT*3S{<1vW;C}$DZ#@R}>JOBcr*cUC zehFLqzXrP*d<;l=2eI8xqF`(n42%&G@sCpOu0Mg}3$nq3$pgb!9ZCfAiNTvE#O+=_ zP7uX_#p64`|L_|P*Hu7c7MDY@Noc-rF1Ej!4WZ&05S4QV1G`A6$KfiFpKhlXGd^LZ z$6#phQ;n9l_k-i@0?a>f5kq5Ml1A-QOmhB7QgRwlwctJ)oL`EDWtC_!8PTI|50>QY zgONtAaa#~6>2hB51a=-aLuvgDv%${G=RuKtFIvAk68Lf`(&LGsO=b{?vMwK?z5b4(iaQ>AmhVmwimULeQY zE}%+JMA}tVKx1Vnm_CUXC#WDLe<&v1Jqf%w{gvU~W{@*GKs2(^HEUn zjx-q7foNqGT2D!WkVmOlJ^vlYk~D+FOC}WMBx3QzyAXWV0}4XVAm28NhA&-@2W|Jk zu$XGB_I*mcd)|P?kqCmob0ojl7cdlkfspQ(A?P)9s5}x(N6v#K&(zRT^a&(gm(a|yC%G)D4J0OA zq1`GJWHXDACwT?BrIo}v`8wJh974;qKf&f%8`=llBQbT{?^@7}N|fVhu}u@`vLB&F z@=mGh(T}7I*bky!)47cAB_;)Vaamj*J@`X`;w24G8QzJq@Ha#@>y&?66JOb(1ZegmbCe-xvQDcd8+A zMYKrWs_D=xp|vVF1n%A3~?mE zQ!it&U^evL_6lP!K7q_*XNXA>V)+07@w35DIDlhz#!FFF--|e&pN|Tq*P-L>e?a|(%jBGlAm)GXlKpDbTGo*b)y*jJ z9)ylz#!!8`2MS(hDw&!h&|lgcX`K^Hy`F>C8TY{PzXYh(1B{g9qT7)VkhX*KF_L7$ z;~8&2z0iiX+iU{!s4pNIKV0d2%oo&et5L9J4>&KngZ}UyE7CsW+P5u``7H;fE6NJ^ zkyWUIIm&9s-&B2oW001fCA=#aF_A~npz9fw-n@n_gPy_2h6K=e^TenjHE0&R7ONZj zp>taYX}bI#WVfn7e;5N59r0k%h&>4u%~l0`}#zb7F zor%Y=`uAVZcRmLy?Fgm(Eswe%ZG*yR7tuI$Fv{=rrji@?mC=f$z-#YOAP=#mffs6M z@x?A6{h|+y%}MZ}Q@Kp>2H~%6hnTTjP$iy5tqXQS+OGm^47r8HbL=2`tPU~{C(~Tc zYxTds2aRT(#%#Y!P;BLb-(B~D|H3(FylWg}{K~;46Ayt~|6h>NUk3{xBlt@<;M9K) zbLYmO&7m-2!!IFIKfeIG`R^flSUOs+sRh;X5|TLE2!~C$kBa?5{N>KgYWI#QB{4tg zCG{m7xpzM{>>C7Y2XNW->VpMQQ*mFX*xr%Y3(eLaC0S4y~g90`ihGGgS#X_w71)L4Bc9Uo4C<%N5Yv@VaR{hvTn z!cz$8nS^d%9grzG01XW%(RtfBnr?pq!kXH#dVCxn9M9!^6FKeOri7@2^RVN52}r8l zfY;*&l`QQ?9ZeSC`$sh>E$EK5G1E|Vb{1$)<)QDVRKn}H1ryGkg}nNQ5ad;kwf#S1 z>4@{RwCo86=5{5S`{!fM{U&tu2nFfvMx{3LDpuRS!H6f%K($=1qnn|1 za{&Uk_JoeJ=OBH0JJ|ekLec9|>Jr<;Y2X1Q)VvpX%)bxPVIL`vF;%+V3&%>`50sBe z6qfx_VY!6_j*A|E&haA_zG%htXJshSY{JCJWypV)NwNaOXqM3mnYB%ziHk*Q77h*1 zo@0R^>obYzgOt~uz6{cdx zwm~S1ili!&bnEJL=r5glLWU~(FZ^j;)7j!EEpv4|K*ep8ur64f~E z5e9FV4AGNwAaq$b8$Nm8E6OI%0kQ38=BT=wP4F|PVkWMYY=;dfgkDra<3+I5mwOXkW7edpF z2q0T$03B}wZRt6n_!@}J#TMX=sxRoLJBR*5dt&E%E-NrQgFLd=K@j+irj5OTb~Sb& zJHH-?-CP{({su&g#lo0v0a&;H5b#%6Vbjeu@M}It@_3CW>UASPva5o44XJ|(qrPMN z^UYAa`UB+I)`HC^e{@)J9YkyXLl-?QNQKjh_v33&UHXOCeAZylCr%&F>xqi(J3zG7 zM`*Z7fraZY0M9qs-sKg?B!xXd#hRm-@#z7UcOh8n*+$Lt6Hrt-28(ahKiLtenbEhjMY=uhCyn@SXoEvPYHPMnwUuyf3DE_bj4!DAg2{5?uaPb@+2 zoi&j8KtaTHoUc^;hxoVr1A1TD(cb1A*l-zbOkDxBNhv{XtRo~o_CU+NO0W@IKwRfL zjEOIY5UfYzXNQ2d_lvMW8iEb|pJJ1X2^NQDLY7lsFqfW!7%?AZlNS*2+c})S$wysL zS2W%m4Quc113!K_=#H<&$}ffxd-oy+_x}KbiS?+HGdg z*e3h{^4E7raoJ5Yl8nKK9s5yqu?N&YeGhiROXw4{9I`e~0%|u1wEteCau07>=OzW$ z5w0lhxt#jly$9@rs~Gw)g;Y1Bf~#B!#xo1h>#7|(zeuGGi!XuT)p444G!X-PM3IQk z=h5=cFXEU;xEgksa=0AwmdAqJV*-#{1k{{gtNI>F94&*m+001DG^bPyR)`?8c((_jZgL(16{Ru_$f%T2Q`e z8mgP7VePvAK+ac^4o){@4&pSA<9nj2)hRWXwt!oXH>5wE0&X04=zB>6VV@79&jlV9 zJNiNL@N`s2^)N$u2u%V?Kozu~Jo@kpMRxa;9p10dcj`;3E8v0M&zE5AI}Bn!j)K6kQCn(nhwn&lA*hZYUpcfATJL4LKF5SqP7#7zW>54ms)FEM)~LO@5n|>zLDT7Cw4b4) zwD@1NF&P2%*Ak$2*A~!Nwt{ZkC^SSa|Ml!3{E5qmUWz$l$Xk6yz;bbFDoI_x^?t`5V-t-mpJ z!CWZUH-t2m68dF4MGK?*;B~qW@b932hpROJycDX|5oS|U4f~Bg#sU%uf4fR1>-TnO8q15FVCN{=_{1yW#W@|vw z=YKh$1GMDIXUs2Hi!Kh)*f9A9Bt5L4rgqWjI$|J%s!LHbVg*$Pf5YNY6EUl`8KR%3 za5cu12)^c1SIIR@ejLSR zjowtcW1uo{q(6-rS4MfdZ5Z(9Cc3P60byYWp{%i<%cOTJc>~T0Ra2Kh<2DTr8~X^Q z3AxJZh+ZIAx0hNT8-N~zTWKk?kE*&>D(&pHfNkAp&L<_P`gMocP4_|7gpbM)j=dJu z&lg(HF5u>@L~MI>2fQRhpmuH+m^ay?$5SV8Zc3yZ?r_|3c28Uq-GD9ct{^)l2fsJU z(Edvmc6?oo`ca{%sHdPi_>h*)5W&HTS~MRw59~}h4*_&EDeDj|uQNhHQ6=T?jiI5Y zCMfM&;b3;f9khdc;=+_Spm_ZO)ji4}Z)q&#Cwv0=5&@OHE`pe^_ej+56P))q3)*rE zp{zg&T9Zxa|MV69x_SZHosV*S$Qg2cU>3%13r5knVpQ>xg$74Ui18T_Xf6{@caEl3 z+x}wi&52m)^$A$c@1N>k0|jP>q4(@ZP%6nG{(GuHY_$~3v(iBtRzqTMO#!jVWH31- z1M>y8@=pG08)$2q(!5;)Fds^~79Yt$v zlsukSzNqz`@{-;GaH+is71cUyKrPlD=Y#e^KQtQ9gqe#&LC@q7@{TPda{Cy{i2j1) zA;)71JSgvVHFmrA3(_phI3Hsrar`(H+kPHKS&zpgHR~tGU(Z8}_r>VDE*aB?972cB zZ8*VJ%kh@)sno2e(s1ZK%zrf%L}C6I`dx|k_byZCplKL-Z6VgJD1fB!91w?{rHV0n zDEfOqXrrBhxwAY$H2nybo$E=x;RRmjtQ%a>~Hl<^PFBb#{&0;kU* zs)q4c%}XcpzGFeud4hJV8HCc_sZi=N3&=S~u(0|KVV93XgVP4cY8i!=N8eE6ai*X# zup;*6<1zeeGn%ca1Kx>8q(aI0`tkdPjSD%x%w`z&)}(MO-koVaHsn1I30 zZGo@+jM}6xC^(u7g~cy0#(`4baJS<)y1Y@}m@S^+Biv9(l5NAAPcJ2GzvXNO+p8-Rl(xMv4;YpzAvKMM4OHjJyDe4)?AbsOEERPGsl*NBx z*r9JAI)5B3j>KU;mto0IjHON2#-Tjn17!8B0WI^C^7iF|>O}`N9eNQBYHdNgrI03l z;h3*P{et|SS=7^?WBjZ&$a|~rKpu1aQ#V5;!@Xls;+tUhWjp+edjt9MYb1H=U9fB9 za*486;1;Qm4i_3Q?o%{c2TkW{Kms^D{)6piXCV00QHWWnq~1>>xt)Y|@@wG<5WiSP z1$hsZb-V;fvx-IWtKF!Xt)dl6_o4gb7;N!9!TH}()LpnmHKTu_=AI=r=;j3SP%-U& z_AD9%v*gj$5-j~3L#um#KvwYs>ePq8ae6X%thRv|r=ujvY7dI3-;b>>mkIPXGfvGR&f7h?!eC%SJ zy6qRLBCLgiv`D2$!p&jRZcs(qdgQrIaquqr3C8QkK&FKW+RjabN3+ymx1$kVZ?#~3 z=u7Cgwg`+XJuzwKWEciDm~`|kM6HSg>DQ@B$;e;mxbr(YN4=sW)kTLAJlGK zCOrFuQgFQ(ZK6(s-QS0x`7-((jr1dY`K;CFbYY%0B&D%oin=k=ehXD%Q=aA&HnW(z40$Y0^h_>t} zzOld2HtrlY#UJH7j!L3AwH->vC4u|mQs_4S76z=-U{a5hU^3$r@Y=2lCH*ErR{kr{ z+(@Q&@0McbvwhSezYNvuxV#|kFxDhjg58kUATHd5PIArz{<)V%KbK?jUlweRZbq+` z_uxKl55AwB4e8%FCT_+{w2$5m1zW4opll6_x4b6$`xq$i>(BXN$El_%kA`_X$EX$V zz#jXHfvYmX%Qyuke{z)hb1cwf1*d@zI-qn6qs(7p4XQDrAdtk-*h>$9410;*v3Y36-;8eOkAXDg8PR^cLxap}(6V?ZTB{9F8uXd4 zv$W{fP>gNYwAd;dL`sM51&`Mb(A29I+kZQu%HXt6wR1g0=s$(@`xNb8)^Uu+BAODC z1!{o{RJ=P0(y}6<>8Da?6(2{l@oMm0dly9S`q3oACvl$HALDGSz!F% zOANhwk<{KZhK`iOa6GOA6=|=K_oqggxONX1^m;)XaywB^^b{mVWFJ1+kE>A$e-2{ThcOpM}I4btcrh&dzBy*!FsweFx6TAS%mUG|aceLRM>12;hH>-|8!o`djzXM)zZ0eiP9!S=-uC=@@07>h#E z``mY!%H7Mr>z%NqA`y)l14yz}P$_YwmBo7Sz2rMKlnuw+)Tg+iH646;*Fj?ap5~4{ z0*RdOCUHV#u+K&4*fkzvM>m7Kix1YGTZ@V9nGoNw55=RKsK|VXP%wwfa`X#9tu!E^ zb8L`jbDhTjQwuUyM5Vp^(sG;E(7Jpz#@eNU+j=)tGhI*>BY@l^9>DL2MNQ3plzN&{ z{iF}rekPdX3m3t{6OS?eA2kHJep2iuX@EEEw}WB+ zHB>CWhiP)1@r$o>9;yz-wzKHqW*M5zS_ftoQK04Soa*0IXtZNAHeE`?)}FD@ zDSLyOuBeRq_#YZw?7?vZ`=EHkAQW_ISGs;Uj2-*9`uONONP1ouX0w+;GA$7U8WgCS z6HIF_ZU@Ch?%ugi0BO>b0&PJ9$;vlJ{=Hw6iqpCM$2nlyevTG%c{ua%67tGrN>Td; zD)UT$#4(G&XkRynuONKW8_5_2= z{vcWJkB(nW(d&pem}M(4r{Vy(b9;{-w|EfPrG|8T`HT_G+^nB_U0J)j06Lh75Z?74 zRNo~;o>ZvJyckC-#$N}sE5{*sm@^pIe#O9j&rtPy5eapWQiBwlQ?eb{tL9R><*gHUrMoM9P&1SMMIqK}us)CaBTB8W#x!v~^`Ujn}E#YEDT$K?|rQGAli`p@2jm@FyL zByn1QZa!5_yD6;md5QI3>Oe7gJhlq$h`(zNJx!7@w>xe3wd zBjNqG8_BsJ<8rF?gdfkWA#7tC-y-EAsPy+FPg92HeFy;Cy)SniE z>aRo}s| zdZ+nWW3aVq0;7I?vH0$A&cjP1DZ3t^ zRWta%@_~rj`_S*mBh;Pz$?a;GfoRfiC|rL9N!V@7t}6mDY{$S?UbMry6=N>OkwUvW zKt5l_(Aev=?$Jf`8g~;SqA!A@>pDz*bqlH^09u2}IOfEj+U%5Ldba`KICwukqU}(t zi3chBs=%yUS4@HbuK@FdM%_9>o>}v+$Qj-4@0wp#W-te0pyPU0;NIaB+2_N1UMZ)&G$l~aq}E3ou$L# zpG_PSod`Zy4>f-{w&n17aJK$H;-Zp3r@cW%53f?|zDdv!v=#-|?ZEce16-iEDUb@d6#Dl2JmzsnF<|ASViID_>^bM)`80N?shZZ9ws4HIsn zXvZ|F(>Wt=izN{qUZkwb=d#g{ABb7vY;iz)V2gaa$!3&OsE+du;UX%3dXc%003X%ry z2LIFE(5T>g0&ft=w)aN9YaujpJ=gARCpOR`@YyjCDuU$+Zgg>sO-7)d=8?SuVm_31$AP z#vNWt5E)v7)w4WkRCT{dVyhDj(jTz4<`6EaXs6kuKcIg2Qs}yT5+96ao^L`hIv#3- zcKK9vTA2?ig#no69u0|UT44Jh>vT?M+Jz*7&#N9h8zuq=Qwy4&5kai%tuZ_5HeD3) z5j8}6LEv+P9`A~Sz6%#&ww(ri8AHhN5!e&0>3>3~G$iVTl+;m99*@c=H{mToi+lM+eaTv>xqp5y6Dj z*I;f`DzwY^LAs(hF+6>T7z@9l&u*4yoRmW!W-z~Vs1_WqbkW9J>Zsjgic$Pq;oRHz zA;>_D_4&R+Y}#IEd_D;ZoL9r1{x*8y`BR)KtBP(92-=q3AwM0H@j^@-+M1ljQ!~D! zTkZ_#+ya2oW%0aS|KQmK1>;2EtAg?<%SVHrx6bn9S1RTTIr z_YiBL6l&C~;-WR3?0qdF3KyHfqWlnzDrWnF1==V#B^q|*jDe6FzIZV>9o(DULn}qpPYuSSEkguaUadxTaJFShBK|{JdJD!#pmo=+ZNuS z61UERR6#cuKkui`P9D@W@HHLWF%~Kow^N@<6|m&2FNs?3h}pU8K(`(DWp zZC6iZ8f(eqaK^d0_#U(a*F)l=LoBujWdgPa;)sP;b0 z-tS?!Kg1mj9=lP4P3MuCE`ZRNC)qn>7Fz#Tjy^k>J~QH}Q07Q6WM27>0+l4Qm*uY@ zTNF!+Z5lx7)JwEd>_=8qfXMhkoUQg51*Q|IGj0G9Z3B&A{le<;|G=fr3Y4;IAa?ND`3Pyen*u&bH$kxRh?&9tN}4%81>BR1!8J_<$mp{eTh|B1 z+v};BY%>Uq9l?s_cZw4^2R~A4DZBg7geK4wkp? zLf0q`mbE=)Yi%!yF37??n)m5S%MP@$e*{yTl(6(+04&&h8{CSH;1Z_{`bOY{0ry3) zq*(@vJqs~t^)Hrrtb?wi*U&yV8}_s)V79$4b^o#%W4~;Lj!(~_WA6vx(sO_}xq>oY z?Ifz92YMQ-v9BhE2JK;eUqLfyH^i|Vk^@#+`~<`&c;OLn$zx3ogKFtUx+t7vv2tqkRuI zz!Iw-a33}U1s3mfv>U!rhpwr_Ey5Y2>b-?|#m{m7R1V!;6R1m5G&;z4kq3nrP_W5a zXzE=+s&1VJ<0)=%UFH=w?#aL-+P)C`o^_499|xp@r?&*q|%+ZAvfA4o&n4&%l2SakkmPD_uhMC)BF-*mKu=1CtVRSW*WrJbrSo1E2!*yKtgI+t|hZY6vg`Q|EMo$%=U+3-#ciKwGR6{ zvcUN(K-8yNVUJ)HR+MkVs`3OJtkuQDo)k1V_7mLa%oW83&V%-y(%9AEf$lpP8*AP+ zT<_b1r4I0NZwXXZrQx0% zCA6aQCG{~qPaJ;L5E;dNpkg3R9e%QGMP4MNG{0kgoeS^@valvU43;###h|QQmbIRX z&WX3sXlFhgv>Kk-grsTgU1;o|g7_25((vxa$V^)e7K z-2|*V10krq4ph>5s9^Jd)H-o9T>sh3c6BAV=M(E}toTk-+C^wPkOyx4N$AkuA&N+` zz(pnZFnwAh5lqQ9i&`GbatZNZFzXZ=D_2naI0vkzU&#Kh-57guDY*3wKySD|xSlQ` zmG%Lcm~a}RSf9>ee==Ci*0{t4v^Ww-j_iJf zf^e3_ojnS&S+>pgH0woFdJq={HvdXhfkmUc$hpFeHZpC~{qSqL-!ll;U(JHd!+t2i zl|qH-RamgS6*KMJaqXre$YX1%>p(6s6@NvdHGeZLbSiCKst3hWK0x7{QuySOg|Q~n zp>NS^(0aiPESXR&RZhECZX`tYiC%Sb_!w#*faRO4{2OI4}zxr zq`JGFVB@wr8vncmraH_;jmK=p>B*&4pPL|}Xc5lNp9H!y?nBq~YtTMaPvjWyv7-;w$7GtQ5Mt$dGCkJ?PVkqJ7!< zP?>)l{AS;P8mmsMet!t8PE2AxAt!8lSAp94QMA>n4fE#IqQ(}0Vz;kEVCV~Z-+hHa zR=a8N6$-lU-e@231k!5Xq1aRrbiI=X(-r;<$t8U?Pe%<}wmXws-c zG^~7#Id0J)(9|LNW>ruT7>RD*kM0R*2dNb1g;~VHK`t7Wx&Hx&FTPN*-GMs%8Be5G)@aPBQPl2`UUN!EgOpFjlL=#_)U8AYd8kX^w}e1+m!2 z)eu8HFXnFt6Vv|Bn3vf|gC^F&9?6NI-&g~NrOKG;$GQz`B&ohE>q8aYr!`X7K`^*# zUbwy~GM;KK^?AR-P03<<44A34!w&1kXg!d` z*d_Ml)Wt$LYg7k@>pJQBJ51jZo`&DR^UhxoO+B#z>W)TZR3_rV z`{5Avlf92yeqpiMFEHM>78}3RK+nNd%u7I;cx(t%)^0+LKeo8GVK)f=uBUr)|3Xdc zU)WLN2|;g*skKB2x&>^7n%h~>`rkvs8?}LT7uyG2p9|H09@DzHkKlpwbCB47AFO>{ zQKJ7C1}ttxji9wyteZ-TZCBu~3Fjd?jAcqs*iwf(n&5J@4+MLIsp)$^aNRr=(?>c( z_1qS4esh=h?QLhz$Z$0Md>zvl+E9n}?xKKm?{KOV<0CcsfndQTVbkb5aPL$R4jL{- zi>}?GH;OmWLjID-eOwZ0tlI#)K0jgi(-@K8?2ou-!w=}@8Dk3{b3(}kJW%5c|% z76`iLf<|M{Bkyzx1d-j;Ve37S#SOs)5L2nCR2|9-?+s2)n{&5~q$(aCkL90&e+%^V|do zU15%XsnfvCF$X>O1Y-KI1K{^437svM(hAQ=)He4epI+8Low6$Hdak7wi7SZ1M*#`; zjlifvmUko7=x*Er#wYtp&g6JdxpW)#2j_!n<`UMctQ1C_86;7?DI~>J1_obcV(F7w z6lhiDl%}bmV96>lN0G&uaFL}N^Ja>Ffr{`CROB8YrX~R_pXz}! z!30LimO#GbU38nm_O)6s@Y}K>NZfRd(8wt4$zfeVl_NRXF*m5I>r=)SVcne%y>w6Z zCbatY5bBs$sjtqS+0KpN=5-L(I;eu6de39O8)_&>t%9V)GPwS+7#EyO1%n+w>BHw+ z(C@+$wAKzGgI|w+s)OmK7~aqa775kj|fkP0QRN z=)onHwP9TjPeoeo9!ZQXj!-)KALOa`h+5CJqcO|qWPk0W78Bo#R%!)f<5hI11C5{qT00(IK}b{ zhzMP?58fSN!1UV1SRRF5XJ|5Sk8-Ew%aIR-0g zKhnC-_OK-NB~1^wPh4`xLhvD$E5dfzvyu4;eT#$!B_X)$TP@p*gb}0nY4Ap41n7^O z$JjG_P(dmYa$?-kF+K)mj77v)@*Qzk{XtwZexQ3pE=F9vij#j+l$gIBv+Z;s#XAZd z%xAJ$t6KPgc7w#U|1kghS11@^%6JK0U@+#DsImGuvG}+Uzp*ZMUIGxS%?IFR)>Vv1 z-GIHZY_6Kt14ieY&~x1p7I0L?iMzF+D{CAuR!B*|HyisuUH{p3KBL!&=@@i^K{RNic>Gqv@g#= zCdv`=S$4Q2BLJJ0VALLQpGq0^qLk!IaKGA;qoz`Ty}SQHsoQi&8Cj0TEu~c9?G|irX77TG zv5-Hi35#DB5bH7{%xQUvDgvfu?|DZB6%L|32Y-?s=f`57b|f|ypF^#LTcGkgj+|2Z z20oA1qS*0Zs9X>X*Hv;rU}v8*cWxp|ShHSJ5x-eF7Q|>~j>`4g1r3ac{pK z_i_FS>Sit{btd%x6`$B=AQVps<5s!nJ62rfirWvaG#nwetP_k z=n)y-;ObT=t)D>@8Ruxh*iJgErvc3-e#gVsPtnKJ3de@6gi~eTF|Gay?3>yEGxvQ0 zt%3;9P$`A_0fhUk%VU-vfd4#lq1ms6rkz}mTc1Be-O;UhrN|Dte|e$Lfn6xx>j*dd zzQdl?ab%c;iO9G+4}V@w0|=4eEf(a_++WX855@VROAk@IM3$5HJT7bwv9mj{ou;mBG9u|<%RA7&Rz2mIGXfh+sZD8_x}MIx7<-`%`>9h z`3gE+7ZR(KMCw)|!07{-pdUDrWL!HzPRd?`CFp`{=P{n?#Ro8yYE5k`N08Ty?cLyJ zgTs%d!p0k|IA-S;JT(3@HaNQDxNW6y3QpjzKM|PPeg+priSYwY?;v&l6^wfN9**7h z#B8a15ZG`T(kjpAT(ot9O&iRpKv@ZYU#THw;+l9r`UBpS(dT;q)?h{l4~zD#0-Fjk zzB1|(iWJqkQ`xHgl+4TEIa3CH)ZfMw{X4MSE)u6a{){1B9>nX<63jm_1vKWU@+WLm z_`XAh^IZ0;ay#GN!+Qb!P&ZAQx3gB~M20$?;_a8Tq<;_5kyQp`OGSR>&Fi3fRh+l) zP~p|`zQWq^+Pr~>3z&GH!5JPk5VCC}4xeO&4%Z9ljBUd>mzTbDXXrEZPaMu)KRAe9 zAy2VewSed@mf@zBUIZ(DG4LKAil)WA7_n#%R_Euy8oT2(-AM)GedYO-yt5>h@jFKd zHK>EI1rKhz4cp?TLr`Wi)L)Fp-T#ijw8h%I-P(s(Atg>3DZO(m6`d5Mn4tT*6O&M>kH;88?oJYB^VeZw zr4%(QG{O^jnyi2J9_NggC(fn;FN>#OLIN_ z<77q8O;5lz>^i0QCXlvG;=B{Pmkjcyc%N?>Xj@PM}W6ZEpPj&@Y)eW*-XL5$rOh4HO@{+5m%xGLO6GX%g&y;buxMNY zj_~n;jw3Djv`K&#yT6m!+r+uggVLN+c?f9QwxiOm9{O{K9_PHW3T}5k!x0-^u>6Y* zU#zaey>4j*xwWi=^E#AVmEVuH3oe0a;}BL?o8#y%cjz3U%*$q!fy|XQoE?1^GBd-V zW|kdJj*~#Y)gn}Ey92Y9%JEtIl#!Vn! z4D|Rp*JpsF!dA2%`2=^a_QA^dSkvvhyy=_=LhLYC;v-}QoJMvk{IplV@FF9#9YV&d zXfeQ|5ApcCzZ;ILJj4?tcEG9C61aDy3OBKBIHz79&0pMo5+a7{pt(W}^m%Nj>5CM3 z$DRZHG+`I`GkzRiAK+p85i&&j7ar_^`n21f@mF1 z@^L)b5Ih|u^d329+uyFpe>p^m64f@JT4+g}MCAmF>Pk1P>XqZ&rm>l9M?Xo}IuCOamc!qvF;KEI z24iM*VxwF;e$~#xhI-b8H>&|>f2M_ZkAVwX&1e}a%kLCdrRn1$p!m;Q67=6wymdMc zwy}9{Xq5+DBy|}!OXY#9u?jcV`Y)(0UjwJ~b+~eo3*+^8V12n3B_my6qkWSo#d;=U zr6hl7KH?jn&3JI=I)ux2LEO~+Sa3fRzHE(w&F>cBZ?I&U3vqt6ojS}wE$*aO7<`?f z%e}Z)jvp6qWo!aZY@IR>P1;^#vG@sGQ!tGG#+a-x&lHGsw%343$^~4J02pAP$VD@6 zRdlU|c2_9ych1F%vMtzcvcLwM%_6{mxFX+jTa5pkqR-7b#Wb3W`(ej&mh)CC!nh~* z=#C$AHpg!#XLx)#*3rYr0{h^+yPFyaoToGj_a~9Uk)xhd7;67-Kw~D{u1^g_-E^ z6XF(td2b{h@R8(-%=eIlq^C5?G+oruCBuKX+C%yuXTk?vb$)3Plg|^u!giSPfP#7aL%$g37k_cp-uD%7QR*G zTMU))*_4auKD|XKT|9_mQ@@i-LFeGwJmxhKM_9@7r~2zK zwkQ!M-5VzKA}So!&j(9WNenxy%^x1uf*HL*(5z$$6{r4^x&@Ia9vC4~&D@4hhyLJ~ z91f`XeAlTvM|YK5PK4i3YVlj$G?}(qukx^!m(8wAVPfwE`Kf0S!+ziov->~ z+1YrsJVyOOcX6+7E&ZMIj_eE1hpFAV@awP^pIdki-_2Yh z8rLe$ZD;LXCd82ZzEikp!v~fj`2{mx^SIBU78KqkLwA@YcS2o~^PUD)AdAJfOC?SkJ5=|B_oq-)+y>MvOPT|v{QP`fF25UE77H#s_F1&Y5NqBTSyH*#H2|b+! z$sbn0lv58OUS;k7}>C)pCG|8j>+YFdJD92a3XmX3rvibArDfsR*6_vHc zVI7s?oBvwE=)=r=`;!a5Umt=>lR&7NsDrmJ4>2CfRkC1~26x^^nyaoE1%hL&FL!wo z9?Ae%qOHiM9=b(t=;-hg9eUhi+Z%XOUzXc#_?6CM*X{oC0KB&_2h8dx;D<^Tkv!7^ z&l~;1i`N-*NVOSq)YSORtOTYDm*e)!2c5v2ro&8A3v|bd1-BK z$E_xan@|mpTbrPRZuLhW)RzWw;_NPrfA8<^C2Xw@!^BdTn`?^gAjy1~1 z*aUCU!q|`S<%l}B#mf!H@6+c#Jd)>Som22x?>2J0NuE0;?Fk(MM~L>D1jnt5!MEE1 zg)Hkg=Zp+LiRFe)Ev^SKkr0vsRB*?nvE;qwE>H&CPv^X#yZ;cQk$E9=LhJ*57jrY3|jBf77@3$5Chn+FF_}XtmUOWJupiSV!_2IkVa$I*)mtXeQ1Bg^9+?()} zR<^5f4?iSh*<3k3|J8Y6y^a=dKT4Tb_x7c4BqX`OTvs&HTxPOwX&{!m#p31pW;FS% zfFD`45#ZVhoV8IB-}SbWUVcAgR4^U~WB-TeI?z6ea(t0mh9lRAaZ2N^L2;`DBU=nX z>hDg{HT$vfA-mS@MjO#{vJ^MFO@qJmNCYLc9$y_3-6m}zE-B16WPa(*TpmKTrRzRPIy?nZcOrofxCo^|;3 z5AbV-5fwkLhrjkTU~^hFJgHxRQQM`sj2q$5AW)@iWRmcf;$C>r+zy&$m3YR9?f#@Et`#de#K8<+&1E{GrC}KPJWhj??Cz7Up8$;so5Ew+K%E$iySp<+&QQ&(yt9 zmw!)Y!XM@R;H8vF7CK&n164=R$uJ0(t2YaOFU!TzC7xKGF#{()@}_GHHp9tz^ROf3 z8QJXP3%_Pba19IR;>Og4Xm-v7=bP36E6H*LTN6dip9p&tyVTBuECe!W5MRSjM`czIA7C?yuW8dxQ&SFaHY5yq`mGPX!TIlZ8(j zMWmtdGp@g+$vORqAd_Bks1bP#@7)xK#)VOMv8YtIVbxt?Izya4e$xnQb&PU!( zrdf<-lTy}=%8X~?+v^GXdZBozxCOlY5>Wdb>mAk4YNLCn+}#3s`pHOi78c%+@##z_qr>?Fy%E2;AuO7~Dg zKAp;39L9@%+efTl?}iJNrQ{~ZGz#u zp%k~?d@&iZZ=8sproud*chvY+0f|U>05hC)xlH7-@1hp};g~MoR*>fUjWa|QGoHfO z2cZD%BjDTRUpUb#8Ad4>Lf6J@@M`R3&)zbm{|?~x`3dy*xi?@XxdtoDyW!iEA$o4I zJa3<8A=>Q{xb(PdkrqbHU&ODuE6Vl_R!jN6I^6Up>)G{+*3cB z82a#FbDTr-?^PtO`Z9)$i7_)5BCbCCgI>xwLoKU4P+j^3R6D;wzcLLf+gpyEk<$Ew zlnU~;bp-lHJ)|-VcMzu|<1y%HJ8`$l#|}k(?vz~;n4e0eW5&#erM;`6d8Za`u5WDqE$7G2NX|_QV_AK6jzf9aXr# zhOMOtS8UrY!+i^^g8j=DfMP)vN}Jfh(p3uFP(u&xtkB|TtdoN$$=cimmgwIm^G)bH zPKuj1t{PtHc7jY}5S70y#a;GqBd@x)p^xN7tScAik9ErNy)1)nF*O$^-f+NoI1-LM z9YXn|#yMXiZLu{tlU`sQvB){?_{de4a~pdbeyFvfxbO~HR3{)syDq^Bt3rJ6UBJ#g zDnnt=8r&LdjYnseLi(9CI5uDl-ISxrNxjtL-Jd0*rq?0z+UOFfUlhj(`$~BEZW1mz zf0B%kNM_7a0uif2AUmZA4Na4Xzx^d#F~s(*YK-H@)>QB19#9h$!+ASx{=keO;YoWP zPG|4~*{T@@PtyVz-)0egbTS8RO4YdVyYJ0r^(yeKk}WW5YbuPtn?q(isigOQZ>0NP zRbqH}JNUNbLGd1SUTf=HYSjOcn3moLov_`wVq-V*pOVRvx8iuYT!s6);Tvs9-GX!VI_}b_e_8>ZSuQ2RhACf)Ak#sTUqvr2t3A-6o?(O7Ytd zD&mg6TQOCGJt9U$;uZ^4&LU8eH};uK=Feljch49SsG`Qtng0g0R@{IeuOp!U;uLU8 zRp4eFpfIg93D3Q_ji29a!n3_IamSi3IJzwkW6D+mOf-Zu(-+gH+l;aJkS_m3QVPyJ zL)^Q;3$6`maW*ahoTMbzXLA7uwqGSPYW_l8d@&lC>hel74mNpa;m-(lF81GixNu1f zSI<@AqyGFyGNwuJSGsSRU9^z|)!qcGSj@O-H9StxW}Hp!W5BPO2n8nEnAY$>xC>SJ z&i!_fut}4@+w~sCGhaMs{15v3c@u2R5rM}dZSFYR$8TQNLDk+IAs5!|1OvmbaK9ys zo|bYab)DDHcJz1r`BjE@eDn=7@6H4BntwQL`~VottfCzQw}rAvZTP-jo*Vn32*=*3 z0`Z?mnWuRk59MV;;KmIQx@tSr`PX2>ns9QkRKT?^kmJRd+<;nxgHY>s6rTxqgI8J; z=|~TSW9~D}wqB6nb&e*HXHNf53atiwH@W-$Fa-ntuj}HIR z(fIm(`24IM3{L!lS4o)=)%yUxdCo`wN4i}5Ks-^({{?Giwm@TTB%BwF!1#SN02jdt3oYc?Enh<0rIwP<&bc5i8RJNS|6Xym;LKZ5p!t z7}+sk*>edVBz2I830pvD)Corp{)8EO?_l(K(&e`@>~vpb+?AWr{VmXk;AwK6GdJ|M~$;C`Uh8BcB77) zE^k#|OP3cM5PhsD#Mipai+Hh|8j1(tVTn@OQ*Z>84=QkT@4o^6?<)M8Fg0HJitgj< z_S*1*<;5?qO~7}$D!js+0wP>4%_&IcU~a02wyV~o)SDl){W;>o_Yoq6$asj`mW)Rm zrFkQ7HEyJ7HQfB^PYou?a+2#-!RWRa(&nVYb#=~ybM=Oxc1;Qfo`#ans)a(6qMOh@ zUV>|z(2qx#<)KMKG!eeFr#nrC^F`M_!<*CJFypod-r{`0!-DDV2I2T~+*G`(Wj512 z`%aUFpFc#pftR7eNP`P9$4l|2xUul=u{fISR^x^upNYn*U&Y7Pkt89d4)-u@Abpk`KNute}**G+ABo3s=qRCJvUHN7J4riE=&z*Pa z?TSLMv@FMxRB`+vr_Ra!J%e-pR6?%0JU<|($fchC3%9pR;E_2ALe*!Z;qZ%OeCF{0 z8|;i2pH-7TY1&17pH<@c5?x;QOE^^uQ05;@v!JKZ9Fs2j;IuhWc>0_p<&IS0+;dWV z`cBpxDv;+UT28bBVWJMr*)S6Jd%3`B=8vTr8Ngwk6ymkh^FtTcHaM^(jsB_20mB0Yn65tsJ~JN8)KO2+NBlP#nEMCJm-d5_ z)?pmuql42lgeb8P@lj<4{yc`Tapg2@>FfZf>F4m7eh`{3{E2V#ufZjW1nObp3t4Yh zgT^Nt=vkG938U`fJorj4naFdO8(E0gW&(CA5-ixT7N&ZiB;hP06carg6{XIDrLQh$ zG&&nz&d?J!<*&)9(R_>xg5P3V_$;CI76qu*iI zDth|7l9)EoyeGkJp1TX%^}-?bD*>0H&b<1{;l@qvdNAlB2s ztS>=~PxLi~`<*#J$B79urftUL^HSW-Nj-FQcM_~)d8{)hHbd*pQaJlzGA?!#7fPSX zr1=wdxcdvP!plQS+%Kgtk(0bF2@aRxRg(vEeqY^+bL@q1zHF|jYe6;E=PH4hrUrlP zN<1uVortSPcYuwQI7~P8LGO<5pelBpXm)9HEB>tlO@&zKe#Oo!AASwumL~|E{;Q_Ms}72(_x3-f&pO`UOIglCpjG^Tc=gQ-5)w!Z{R9$?z;{D#j*Rug?V~9 z;t0}T;B(Pm%6>p|p6@_?4-KwwojxZmcMlThiE*jMMQ|(i3+&i_i!Q6^z(Vzhm_X(E zo0A{m7EN6~;OY+0ONb^mP6~Xr|1C^;Y6&u$matkL;hAS1b>E`P)!y9zavRk+4be|H z=BAGSG8Op_iH9^Y&lk?0ya-p)dZ|Ej9EpFJ0_8s@VeLgFDEhPjji--=(P4S`yf7bM zDYO4g*&dEY|Afc4;&FqM6Q)&3@ll-*>C5zC5NkG^=dO!$GYy`=OI2UuORLcIkqYOX z@eUOX&%>1{BX;(j3#5%Tf-&NrnDDI%##|8RlPsEHBY1OZga_9(!K}6`P_tc* zZ*fn+(>U{jnOx!Nqhi!sQ)G6%&9)1uEp98Dd z*|v=Nvpo~5PoKvA)cbJD<|Ak>@_>Q}IeJ}d7+>@E6U=`2n~dqSM6yqXAN{5tEA4;a z@1!G4zY2!rH|gl8asz~ASD-4;m;Re-g>oz8;OO?@+&PJEtdxF%w%hzLByt4i%+i4U z&z*3mN(HQMio#Zd8!+WSIGJqGg(L1aVxsymFdXRvEiTS@!?y~rWW7aie`9*1`UUep z6rew?1@*kPU}C5jIGVeo-Q-gI;;sV*3l^d?>#yuuy% zc=63?%nlnaj2O{|LDD8z+^@`+j(!Fm@`=Ky)hc9NkrcPiLx*eblI6$6zCqWaN|2?h z{P8c&IMyT*{l-MW-pMO*RN+?C$UBQAqpsnEQxoxD(@p02m7>OA68-%DeqzpUrkAjD zRZnTaBZE-#sPid!42bdjY_xctyWZsa+rKcD`SaK22MN_jWKq-OeYo0Jku&qXL|>FY zrhgMZvtD?ikUw34RWBBU=2%^Ra!eU`%=RQlW8f<;+)*#W}^H@ovU0o z2v+;#dF#&a!uz4DkGU!xxUh2aqGdR>G}q)!tWAlqK#Oh`s_`l#H{;MN4Su>)2@GzS zi4L3ng)5@u zA;XP!%ocU9j6h;`A~9|ZMvtE7#644)FRv+tgjKR|*4YnK7x6gJMv=E0#m@Xt55c}W z(JVW>7uTH<@JZK`S&`uv3=^rK-mDv>=x~bAaBmNh|J@Fowg}+$l5C_XlB{_vxKj7faJ5)NU4zfCKQb%O7`sO+`|8k2gW3$Diht=d+ zQ4yNNXW)2=nZgsYA93gUKTw{15f5?FoM>k-T>TveAu4`&cvmT~b{T9hF@lfo(!7oP zPkeOwF}6((#b0*!NO5H-RyZhgZxpKG?w<4X^FJBxn8kK-WXv-xw=rS8iX*V8FpD0x z*X1wj*AYke&sg}+20915P`%I&u7u{pXPZjgX+I0DN##PlD~J2z+JsTwCFtO-z!$DM zO^m;*@;BnN@aS+qP}jRn?QNszj+P?SUwnreAI-*qC-Ox551Y;B7UCvfS9ljQh^Y(Q z5sFjr_4?;vElkFty;Gpsd?7xW+Ju2`|A1cNLXf%pmQ>%%!u-pf5LcnZ#a4XAw~6XB zbhaYwUMI(oom)?nTA6-p(gfayU+A}tB3Lo|F1QcK@gL@NQCmf8c-xB<3INzen&o|scL(7A4P~!oaWy$il z4{{(s?h#Dql(>dvbvQNqIArtxa9;UoqIU2ixs{^Bhb|V!ldL;4;(|L4&z~opBYlGw z&e(w8?8=~bx*k8$++%SGUbc4?V+ae; zzAg!yq+VcZ(>?s$9F6PVcEW-zrep5!!~@OyNa@AbSQTOidIJ5I(KYIBX@LdFiXgeA8|!N2}zs5w@aUsODmI5%zuv+4&pJVuc-UMj^IviUo8 zL=#ANe;_Ztj$?L6JC1bqA!B!h(|zMY!N$7+LYIa^!X7X9xI&lvtoj@@(h2@}LQnZp{qCMUtq zbvX~8wj9G~0b>p_e#V%u>cn>OaWd(@RC+pX0LDrtksvoW$eel!P8kj0rmB}%P(Kw4 z2FmbU=sEbl{~d|U>jLv|2k`fOOkAq{S+;6B+}zNL(RHz;)7(thcQY5;l4beD(kCx)!X{~6^>wW1US1BI71d(x zvM<#6Wh$mliNz^r`)I0JDomdt&$T~S;2e^)IJGl!91FFhrT1Idck>dCc(M~e+D&8G z4?|qO>l-f2I0{W!n}w>19k}6t6rFcKPVXDW+k5XNBSmEI-g6&2GpmfOtb{VkNMF08 zsUa;&gCvRyt@qqdlaw^jP?DymXb;uz`TgHN@5_76^W696`dpj5aku*^l#Ky+Xp#-X zTU~e;SJyG)e-xQ_SD!rniUD_yC~&KF68 zoDDCT-(7BaL`{vITNeXIZ_1H%Uz5;2vJC6BjQR2mGRs8N*e^obAh`cFhPKPGXC$4_ zZAl9zM5Oay4YgyqQ4(l~Z%1dlMEojKirQN$LF(ZNG|aPxu^f8iTy!44>qWr#d#;%N z?;BQktE0n+7#mX@%&8~}u=`HELZ4eDT)WFEh^ThL)AyXgy)?tv_`D>1UsH-bM^3=^ zYZc&~REY%3YN}YjO6}p{)M4#oV3NCyLdhg20X-YLO=rE?*X)m z3W7SyH<{h?W={1g)4QD*JK-C3<%MW~!P0xo!0{ez_<0=XOTTCG<@`CLgi5?5bPUvI zCgPO1KU|o+4tqx=8g|!}GY96kqw(}AIQ3nQYYntO>r(p8^jF2cHGq>XD1YFEjN{xh z=y+3*{J!v&Q3(=&4bpT*I1~esm8#?pQif1|h+~${$3<6PVE4N=X2#lOoLzx75Cvc6 z#kWcn9B}6Z3aJz7eh{vyaEJ8WP9XV#^32QRAbGI@2=^YwEbm8{$-mEOZW+dduVhJK z#9T!EY?y4{iK*RBaGI7Tt|&OcwMVLv9qKosqm53_4+@;vO_Ammy}E11S8H_BvRLGL;8)@F?E`=2WZW20ba> z%{O5v^IVfO&sSvyCv`DWwhrJtKLOg8Hi6th1unk48;(mq#4Arn@KCiZDRsWi&+PJq z&m&3re2W76ce>rOm&vQ}x9JqlQSB^LPr1viyn29%)D?gO=e0@Crngw=(#)K{os9G7 zXUNZ31v}h-b2{4^@eVTq>js+W-gO-PCdM)SGM}*ih!{~lVZrPbtOc)uFX<}|4l>1u zZoo97M6`~`f*~I|KQNGB^IobjLcz^g+t&zJz3%hg(@ci>mn5th6Cl3Fr*n!zE4T~3 z^5pn=XYLZctH$|^!Ko8d_@9n`f(1?8IHAZCuF&6NX0RISY?EcrZ4x1mQlD@K&5Nm* zuoIi*mDz{qG}sK@)H2h&z^onz5Z41%mp&KN`~41>j? zRxm0`z#Zq@z}7Mv)2>NDVoQ!Oqje5FEM;*-U=lL+0_2gE8Z)Fe13s7~qQ8JT(Z7+4 zCgdETmn4+Mj-x{5OUyY({ofmOA?&O&b#yI<_ku}GS8@uIHs1lZeo`iv9!B8dF*967 z=R#$2v`YbXxpTg+!7PQ3E;c>ftFLRg_-UnJ_)LJcUf&JXS8Bn1^Z^#HY)4VrO;i7` z06dzz;AKKSr}^(E25Hb)<#Ib%+`@;^00r{GLda?vrAF)}~|Zp9)xGlFAG)v_rkn7Zew2vmz-U;i&mTSTwZ;b7s7P zg^51!qc5I!_-#CA-DZrfwJ(`T7jv;mk>-0sy>R)yUvPDcA}cL;kKPe4;*ckGkbaS2 zACwQLk4>_t-XcTpS#2s_QuP9}+e<;O_XzsA$-}}Im0Z!1HE^_6iLJR7L|v49c(gKr zcg*_%99E<~K4lT`Y>gvM*ImKXuXxTWOPBKW?y8a#k}@ps*jf;d)+EPQePmV|1@WC- zC&B%f;fyt}kTE^RXWSd?8UKB6U`MtfnZB6d(*M-hD)k=5)43G0OC`xkg_l?nB?=27 zY%u5HT1c`r#Qm?=;t^3RNSBJkjRUS+t?MvM=#eB7?uawWhnrA8v>&5wc{r&vpE4S! zk!M)~sWyHvcN!o5ilsBo4MiYyZ8<)er$}s5JaOMHf5vOD8|hJ)`{$5Bz0B%dsJ$HM z8LP503SBWsvJh)_QU<|PiVR+j#%o&^qU@TkL8ktfE~I{yVEaadSm$_ccJ1pk%)x8HjIc!&F4`W9MR%!xY^6FmANGUO zd#J*Wh%{q$stBmh9e|lWuJGKehx2_m4vxhKvDr+NO_WMvOoyf-E%qk04~y_ZrXG47 z4~ACCh>r(qkY#rFVCX|UN_>(ev0t7-eR4kcXa71}QyqoZ=T>5_b_`^ z`_b7RIP-fACsrW9iiF9rPiamx$%JNpI#kIuA1QWo++27d@t0Bg^%6((CD?2O74~-P zb37tXFl&Z0M>+)A##4i^EVYr5e$0SNs~UTQb^tBx8pY6s1&oAZ94gT4R%g&33=4OI z2H{(falL?hI4Vruh26#Tb82DEzk290FT^))>f{`~OHwuo4)0WjZ>MC~|0+1f`b;RZ z`n)A%nZ;w;867s?FP`b5ImNA6O6<~$3hbHV0_0xv1o&oi8>~ANh<@EC{5Yyce%6{n z*JCvjCfS1ZEf-Pic0PVeDFlnYVm!0;CF4)M47msQF*77)<5?G3qPDvcM>kOYl3gmb zUt4k4y)|I&dv&7aeGDJEPvPg)OZCQhcL{_- zrD>PkQtDcxT&9>m#3;y-{GfVn#S>pnS0xFeWj|1N>j$h{G6u2&j<_tYnJdI@=G~|- z6Fhnx!xvA53zgrPGwma|K3$chbliacCJ(TDD^KQ1lz`Uf81VJEi%Kg5;m1c6_K(wM zY@fZ9xnEoW^OqYi#!r{PxWyOv`=Afo)UkwZmAh7I|~fi?QCM%+8ut zz|5Yi&De0=VEU~Y+x|Sl;AL^?pEilHff;L|+%OOVK5LRGzIVW&vkgs3O;L8DI9a=^ z7AIb>0{e$w!LxG?p1+ohde`rwb)+&WoJKpcD_%l*3=gj#QfE#7%fycb;_T(u%P`?| zB$IG)JMO0p?~Cb3C-P5r_`>{M5IwtMKlG4G}bZ=~%$$4?a`gS%U?)Fucfb|mv; zt(H?BUXayu7{Vqie-Lhq25+l#Fkq^|k_Tgq`=w##rm-)Sqz>WDUM2R7eG+_l_ei3NAOnU z0L(ib&1mZc!Tb~MFy-qUe!sX1?Va!Embk3~^WkAUaYT_Vop1@aAFGG4IcLy(1IMsw zg5)U8P+2ce#(%X5Y{++$F3_U zsc)nk&t96yO<56zQ&-5d9$BjFj$7$C)v6fAb8kala0QCrI{|U0yKy|`25j7P8ZE0m z;E$0!Ne#RQ(R45RPKEBvxqpnii56-08-T$5516wWJ0WzEA$|-IBR8#{&^|nC9I?`1 z57l$frti)?wS2_6D!qgyv1h?JDgyV|JcSDRkKCAED#Qy8@zah=F@M~SL%B;bKJBHO z|Kfe%l1l%7POL@MRiPY_o{a^jeyC8s5B%N2AW#1}yf4w@_tLY+p7Ps_wowWuQeN?4 zg#d5-ry%XJ^_x5TX9XF<6$}VPXyn+fN4wRSV>D+w((*Y z#3xC>k{@DZj@?VhrOb53@(}zlc$B%AzM3h}(;==_dvIGj-5+XXfJ3|=)z~62- zZ_olmZdye3uQCxA`VK)2*?3-4j`0jdG&Icx3;MpgJ?lHaxJi_R3`|Fu)z6RkvI$%M zE1>%UEuu*M&i!roxZJNitfU^}IpZAMC>CHt&u;-eVL@{5lN>2I*bj%7%i#_;aq?h? z4l81L6)sFvBk_#|=s9uuIz)jo zdSB&ru-kVt^(E*WIXJvai*E)HQZ!j zd8lrp4*%Xv2w2(6qz}x4bBi=su|h4jY*HiEc$GtTI?b%1IJ^ix1-g~e_$W4q{}s&O z&0z^P!R41O#U>?iIqHY; z$DV`bxpy!+`WWO5)X7jS&0drp0t@+ zizd6_;W`j`c81eRor1^b3zMQ8Gq~+k4DUBSK|@{YVEHu#q)xr(ynofh@C*+o>(F=H znUDd&H&jT(t4>VndCtwz8-c%7=eghJTu(I2>_T%~6(xFyRv3hR)Cen*gh;wyfebpjP;E#_~EYUDc1 z#kkqtp^#Xu3&A#Dpz=Tmgw8`ong14dtKUQ+!#^DFsR6U4+4zQ{PhQ-s$%$J)a6+Jatx;}RU##*LjDVS+^su+XrOfeJg$;$#kv7cGUA7whP`S`zas-s6$FVKn{P2XR}yXE;Q^^CAN}2TwX>FXgloV;;iNI zgv|(V!ty?7HSEQPu3YedV=&E9oh&L4WE0+~up#f|(ENuVMjhS_4trhENhu7rjVqBG zS)FM8MS+?A?*VQ;Jr#!O{BCy98j!1bimUCCU<+ReJ>!#bj(i5}$)GHBi$8WRn2P;& zL&*OX1(mj9C>mDB0=gW^_MXnYb5m$&NLTwNps=)qUoJRL8Ux4z^>C-;8F`orQ*a`DB zSv7VWXZN5Q`n%j2gNd{!%_k0Y-YjLlpF0n7d%N&bwHD*o@5AJtuco;WKRmhg0fq)T zr=Qp{#+<#Bj_b~Ak;u$=FgW^(^T|;n1}7a*w&yli?GcEZ4#|=rwYB`C$(^umFXi)k zBO#7vT0}KAqV7NnC%pYDtSbJf#n*@JZq^L>o(x*=0iP3rt}w=c=)LG(E9viXWq~T#}u9<|~+XO~Z#%C9ptw zBhE5(g-u^1+5C>Dc&Mrqt{BO&XX`{slhte7yg~}UzZD{XDeur%TgQDZ9pNpstAgi) zqp+bvo3i<~(3CZVhpY0CoVy4mP1%g#-Rs<$xlZ^f-5AySrSVQ}5ByN8K-2q**k+{2 z?pV2;8C@gHo;y)PJ@7I_C`1*ET|J@A1+n0CA`Ivj@>WlrjV3N_$V7U=VSzJv!l4Mq z!5OD75GTsEr-VN7$XVYaStz}Gbm&`R@y_4@Ns%Uc9j9xrFY?S(MAsSc;9s{AVP%M2ehf9Vf?%qn5?Qx?6@Fo!a;ICv@jxWK zzrSAqPkV;o@H8?0lPYmCX|)i;oh6jLw&7g)OqnMZ8{j5&N9wZ7a=q5%CkY#iMv&Q0DmsN51-!~ zLC#Ji5MzUIf7LtK9IM9K-j^j;dlXUsUoQ8??hSXg>M1w-iyhqHX5r8bP41fCJM`=B z24m5UpgBc~{5_qIOPBxPOT{h$$FyLGc&^T>FWCekr7f^=a5i(#-xJeX)d(v$#xooS zF6Bggy7CMW%x2|Cg|s}I+xmk$_#+PfjX1$}aW^<|P!c98L_^Y7DROANH|O*qmce)q zPAWHn(xr4b9jwEwKa&AEl!fT|*^YC5mciGE^-O}FDk-*?$6KPx1)OQ_2Imj=DDLhuDakgW~M|AN7pNamt(q(!2R<4YF|uWr%hKL+@U~w5gBsw+{%B zMUj*TIyug@m#5(heka%q$g^kopHSaXf%I6)ajG**K-NZ$VRo;E?Uy!iFCv4%L^gz1 zvsIK9B5C8<_kyfTmo?5cs^c6|KH&Fwc_RBuh;9Gu#I1GtPCY8}Y=+5WZiQ1h{JeAy ztoHo|h?6IEaRO|@6Eo}-@Wc_*4&IuI4lGg`#IrW{!1zWQXB#a~n%jRs+Sx~VaYi$k zT>A|1JTY>|mpZ(9n&JNUZm4zZ;yN=vGxD|czC&k#QLiddJ6VfazrYR5p7>(G)Fx1A zP2e4MafgD{bUtmMiRK;zx6I1K{WaFuuwsO}{%;IEYr3Ica|V7KdWG{|jB?@Ygh*P( zO>Xhx$ryXF9HW1UVaA|8>g%3_HyS>u?j*?mCpZX(592w}EN#{TbXY>qyXzu!Na)C)7#lVJ-Hv`B$#tg$eZM%CV7eM_@BAp9!1j4Sx0tWO$?iT71`oIYUsWgmz`m zrq0~U>SVow9`3zikuE$l6g|YBLD~;ZcFOUem>L+16ITebOFw0T0QJX3G#q2HV*;I5}nJB7Z0cHN0Q{rv>M*wAz4o2#$J@0(9W4iiILfK zXXO%3IazxNcD(fyq@^6B9JMx4UaU-Xl1~Cx`2*z~!{N%Dd1$6Pz>M2uLszyZ&t-uV z^p?LzS(6XAWQII@ELDx2zcQWn>doVcj3#jBO@ztS>CZrW4GYmli2F=l!qqY{cEw=} zWLuTl%851j0&?(b@@|aK9ztT_$DeRZj(sCOh%HmZ*?K2gG$>Oc#o~!v$`?Mje9{Z< z%#$hUZkeg{Z%3eYfW8YG#EFBlGwi-6P6Cd|v)Pwqh)ZZKOnm+ogU?OC;0ODmpN+%% zP{1v@w<^`)^Y(=iCzR@w(Npc-{aEbVqT2 z<{aP*X%5Cmp7K98!{{u31f9n-nH4u5gGSmIx8R!|{wn^!cVGDs3QR8H;Y%vyHLK1V zKY9(i*KMGka)ncyH3*Zf!fJ0>&W%duz!zyvcH!>7`1NWwQ>^p{?#_J=ng@2nr7AJ} z`6m>`cgV7*dX}cYzxol(JY%t)I^Y^?4Y~Th$2jEW02{sUVw|lo$=j<*;)LztV|h5f zt(7C`FT%l8OcW>mUC#+en_*(37Q1GWGHWWw=e>-+#!RcfhQgDpA>~{gv`A{PN}E*J zocdbOn=TDTLqfz_QJPiWDb8xY(k6u?wy?9`1D5QY&2zlU;r(x$V7qD#auYqd=>lW0 zy{ir$xZlUx<9RS)^($^)FWr$W$^xsi3dpM$B&WLEpg#HuthT!X>&w5vYt!4{u;?5Z znLL4u7D{BvS_@GAC`FEWW-{w;RH4I7MfPQ97>3{4MSGP!z*NZ_EixY=NgTxdv*sWg z;>?_N6=uD}Xpd^*GSp5VqCGPsC|LFnaO!b9S#k}EZ;fL5=R3@!i?`sbc_{>R6w%*; zAnP=~2|pdy1od%IcIP(*QuA7m^V>wdN4W@(4NEY3hbnoevja7Lhyd@^IE?k}$KE1k zru_3~W2ZO~cIBM`Oqe|cc#8IR77)by@N!=sxvMyF%XT4$zd$K;EKP_%r?oPMl10z6E`t84-)a zsxu%mR2%CKtz>Svh4UX(s<8i`9jyB-NKOVvfsmgcS3NHZ?if>F%C;tGTS0fv{>A8X zWiB*XUk2@!g`C5bnNWEll8H}$3IPk>V1te_2&|MRQ33S(3U~^V;&H&OkYhtkLV0fA zir{0D8cdVQg~pO%X0iW6aJ?l$YTSb{>c0}~$rNC{{+o>M0$wdL>w1ob@h~BG*Ou~7RbPghHhRNcaU4l1u~JgI;_4&IMbJ#2rjN-%<}0q z=rU@Hvf+2R4XUnq=!gJoI4Kx!&5ot_d=Zf86wWJmF)D>6(H!*h6str(2=he}T;;d}kYuIkO141t7p^FP;8r^hJ>$@`C zxxJBzk?i5j^+NIet_(<^{+VFesW_B09gi3lakuYkv8J#3(R(~9V=aKHF&?11oM`h~@QE_1$Ltk}fz4&w zN1#sjdrJ`M4}U?^^fR2zOu@akB2X!kn$pK+#jU`g zDF)Q}zHnlZ9+OEK!1JyOY-+0$#9rwJ9qNs(xS`2P>BpcJVttIHSX4Xi$;pN5E-UUO!+h` zyekTQn%dDPEyy_Fx*+Mkt<8$X|K&~}Qev0b2$JBW89eX(hnT8!8Yr}%V20@jd@bsR z56={VV;`Syadtm9Jw%ChnEsRby`II4ou(jQGzAU^lwpd9412n@60|4X#@mWbFo$~m z6w2-}T`{WYsPF`QAIZRzt+&yjcstyA@exjnsS+%xDfu7(T=eO#XA$> zy|Fc(s4avL8F4fTZH5S;RQT$!(0DAp6Bp`_pq5Z4)LGl`Z%r`9o!<4(zB&)uO71g# z7s4Pis1^_3i^tTB29z;Z06U_MQ_oR1W1tyoFCRkQmRPR7UY~hamW)c1wU{__BRDu; zgd9@t;(W&Lpzmfetazo)T*=r8%+HlLFtnN5aC@BjIynjWzZHqzxGW|fJ_lW=KN(*= z7Y$!03*p<9id@Oc2e{_EJ99GjI9Ld0!ZYn=-?b!?DuoA9v8c$!XnOlbQ|Y9*u!31rM3{{7>A0e{!sG z>{b4q4?$=*9Dv7u)!-ueJ80(;%+2u7W~VrI^M>d3;4QUG=DV^hb}BrBJrfQ2{@Dw0 zmWLpF%ibhIV8St9*is|QhW6a!iWm`6 zraJ@;JaY^f%Yt@!cl?l6gr-)CIMSX0(Y%NFEJ~5ARqVneS7@in5@FVveS<~Qrh!dG z2EOu1!<3FYOzwgKRG41Hzx_;}ZCf9LBZ^8SpjdkIcO@P?zja+m2V!B(Qyo+%Wgm`{I(PIzs&oX7&pU7gOUnr-rCYf6|yM&G3?JcChAYXBNDH1OzT zMKYs9fP4toB3kZeQK+(*;a5>_PT~cOl@}l{#x=>}J>M|AN|Q|fJQW;b453QW6JAs2 z3#1<8PV^6hdp;jmRLBDL7d z@3Oh0;uj$1XbxXb%M&9v|3+=GVBV5TPoUi3EbgE)i>hxGQ0$P*z3i>RV{c2j$?~eK zslF(FyHJkjHd*2mZ;nY{I0cSSmaF?wDp>SHVbsTJZ0V^%1;1!G>n6u~m3SiGc^vw5 zU*pS_*ZBkbez4B{2zNcZlUpAC7a#qmOz&7ESYBy^z|HmW-8~b`ZSt^4PJ_s(DC4~S ziMYgUInGQ9$E|BFVsuI-wC#`JHe9n{LQU5}!owP{wx@p6*(vb%Ko;65SYldOCbNCn zTU;9<&z?^+MX{M$TxD_)BrWfU_2W-*WMnn3H~T4ESrf!Hf0iP?Qysxp^fOmZ*`30j z<*@R47oKG@A*MbJuciLv(&*fxMOB1rswjXv;rmcW$`dXI_~N@h7CR%tp?*pQ?wLY& zvDCMH`oUb#_!ZAN1YSX>S!&3!ePJa^BJ^d+ z&dckW!2dYV$yFyUQTlKvNrrQ~t44ZzuY(g(x2D@5Y`o{sN&F{97B-Edp4lW2JvNH& zVt*jSVh*;4n(+=(R$w}P53mLz#AwOPv@N%V*u)SWbP*VU3lr$RL0gI3O^bu8XL{j* z(lfYEox~Q+J}`RJ3zgK*V{0=Xwo!ga!Z8OmdYml|G z5!^4-(%s-@ZX?Y+PpDf7Ih0BN;TVZ&mj8i#i8*yfDWK;MH5mWF!o%5qxa#--Tx$Bs znQuvi^9{Wa6)M1jSqQgu!CFqaC)apRVkD#(KE^dM>%h-lji6Bm9FF5SrN=^S#!WM( z?*9pf2mP zmf-O7BlF01H}C`1IUkxuPnKwdYylzI+wqXGj!`8M)h&!bY%3V*zQBzq35W_xlT12? zof?Nn&FXD=C*~F5 zp+y1Eey|b@W}d*yBNKVk>*hn&K`~M=s6`I6U*!ybbVH3agU2l%po4!Tx1UG%oi~yp z`)&gIWP4zZ-y?pfiw#;A3X#eEG$GDE%KuZ;%tcT}C*_+Af5mIcac>xf(bobbu)!HG z+7949%R7gX(e0`3no5vDIjoAUX%X>I^WY|Vh9KP+LkS{BwF zS0%s7)!7yDUSKUud+vtAxRO#n#D`0==x_38fV|-PRzfA6aI{Wik2oj$PJ^VNeZX>Yyl@*n9LX%B;!oqP2e*#7(Cw1 z2C>NJ+|Mglq0C){b*)urQbH8ipsln6{DUsklScQ6I5BfGim4(p+LOfP!4*A zA}OW*E7xh+P+<_l_?%gS|Nnhx_Y1SOf(qnd&Mm%{bpn6BVG=)t&bR%ej6mcv<^O;oSBi{==I(M9R*Bxols6ZTI_lrS{@P;xP}^GPOv|;VgJ{u^INrJc9|cO}uMD z-OQjv0n>i#K2BL+58w4=vGZX*>a3^tzP*xcmRuZM-o6B0ewvAU2Lkc@t9E>H+7qQs z7T}hQRH)D#huwSSiE30a4*gz=KYkaX^8VA<^hG=}$7$6&*DQw*7}%%&9N zpg+Awmq`Ud?#?7M4+(?l6TcXKG!H~uN0_ZSmnpA!9|MFHSecWH@TFllcfiny;f{te zU#au@u=*wtKi&sEjeqc?a2m6n;MOr_od;Vww45++$GkyzKNj{ag2>|D7W+|mKMum7&fuHG@5IiY?X zZfEv^ot!G(Wu(cEQf)GtCd#IzM?%!AR($YiCxom&!gXf*gZJNC==g4s2~@ZP!lkw7 zFnA3mahQou-i~*IF2RrKMyO9Y`Vo6`?A@kL{vCJ=A1;fthMzaWy0o`o_A?!rI2*89 z%)!c}Q?N;47tdsxBKc@pfZL}ph0vTgAXTowW;^%O?uuP7ETY7HuX&8qE;yjah7Qj8 z%S8NkatQN#bGSpfZy|P1HpC}PLJtv994@+oYJ=0z?2S0PcVh|KPI85xd-Z5fl_tyW zRb;;XR}FXnQ)Jth4Z$u$+P@Xpf#+@1h)Z4_Xy1#1#%1b+)7NHWot4?c?-j{bS?Vp6 z{>R-?(I(X*iloS>6VAL4q&|}rZYSN5RtSlZLC-N<9vH!mCDZTof+P`)6#M4amlHQqQO>hTRS1CT?uQ@N?AVMC6z+d)0}X zT>*}%eS>%H0JD@*nOA$Jpvkm0EI3XxsOb-&skRst=18+aj3&G3f+!JiI*PiIXEE)< zbUG81X7o&|al2v}lXrLq-V6ByNfSG92vflCVGAf*j)IQaJ$zIxiBr!Dp|$=DI1(@i zmWfa0REA8UAwZD?m@2TjYj5GSXNIuxxE8A33I}(2Wnx%)1q!e-ZR)i*-S?r zjw?4=M;%gsFej`6?&$V`e6crv@#gS=!7W%lI|ko-%Am;Ki>OU~eV-_wc9+zRq)Ox}DMme`PZwxp;1?mfR1IgEcbPw|h)An2OXWh^trg|l?%5WzH#nj=I z?^3KxMHzJYKWD0Q1i)#hGz^M+poz|3sIrWpSut&Lmu3$;X77fC$Fk(h*nN0k^d7Qk zhv}b8A$H`m0(&|5E|-y!hGUhqL-$Z7s%51zm1X|?WM@sB-^sw0mQXtfrGlA_QcS&P)HPQe@Eif|IAj9EKl*L#Zt2?yU)f1z8 zgR#+MI=Gsz;+q5@KY5IL6IoTZ@>UJgl6xMs{$2vvL;==!zc`!bW6mA=_7lY@9~PxH zA8Bk1YBKB@&$o>rv*;PzYLJJ_T1oc5X`Y~4F%0F)I`DSGAMTvvZ!Y%UQ+%Ow4~l08 zV%xH>5bNZ@TP$@2JvQdSGl%({>jfbayuSut+H2zMg<+IY3OxWIA23~`;c*S5H)kOGlIK+U#*9Q8udm4NRHc42*Iv&QuhIQ(5P*uThPBq^xbtqF>;7 z^(dD(-3z>DOaPO@UGVt4G`lNr4{xd1ePij|cC6a^7UHfx#<$B3;nX}Sb~Hzly-)q^ zt|v2b+pAh|NZbU{;}@Y-vK5y`3zFPzM%ZiEj88ZJ0u{sgI6m8q_ie^Jh)$6s_OvTf z!8#AJQ_^JimkXbEnSF@&JiznODugE+;sa!k`4 zfOV;KAC`R^uUrY@w0ibHbfXg6H1rK$2u;Vmcg@jz-)5M5h~O!jpG}~7OXF3>aPPpn zw97l5!N6Gw__{ftpI_$4y`12S_dd&`#;5|jCG!LB*6Cn$>FiYNM>Qv8rOf83@8`;Z@_!`0oAl~;F-b>R29!hira-zfguzg}SIA4qH3!yeje$R( z&I~u+!AV%kOZejslC($XXG;Jq=rDx?n$h^)IvKSzaQFcS zjRGxJW@{ZPYSi;LzVkt5S1M@v#e!8*Et3#fhU?uQ;w61`(6q|MhvHK)TUC}w(|u*) zxe{!=T*RebRE0mLvg~!^AVw%}7dJ2YCf-O{h=$Dtw10UNU&L$xM{QMBs8gKeJ+_8* zHty)>EytdVdxFN}1MsS>fqBprg90t`xM%xf+}JC^#-8?q!(2By>*hf0#7vYhi^j1E zJ?=l*jgU1i&H7M=uixYV)@>buu~tD=dAT+@_IUyHO>cwqJY_cHMl7y6q6yZ4I%FCV zWIb~4;04Ql%!Q6x?&A^4l|+7H^mi$+16iY-TX`M`*tui9a1M60`=Mr4I`5$>?JHlR zLr%|Xf!H;_nU9Ag$hP-#><6#c(7*~472Pke4b{;%trbR0%kauQJ7{yFzS`}6u;Fb! zOv`J9Y!w~0A&2?|f4T8ZSM&no=gz732jGQw(xfK31Q+R*!FJjkq9hxL)ob8G!k|B(ym_VHf;@< zOdUBJ&rN{n%f2|6D+X~JCS&E~0ob@pkp1ZP07V5q<1VW-)GjGRL3b&ll^o6GSB-<# ztjSn&O@p0bFG8AnI&hp<3$~62Xl{jaOpW6ZpfQ6lbzYS9x;+VeCXeG}UCPL|v}23) z5R>op7EkSOLd!aJvV595Y_$&urwM{YWYb^V^JqHU{hozef6t+PnpNZdG{=8>VWrzWKK~%K++Mn;^$tCbww!-WZ;nR7LxI zc(7%H4`c|N!NpHsA#bIX@w0R};&XloKBU`$t>+N8<-P>5iAcfqMRu@kaUWEE`45}6 z{o_vyi$|-@C>-A-PP`pEK=h&<)EFDWNxLuDn7^Beu&9Ke-@>T3Ly674tHkCl`icM9 zg~PXd2QhH?2<3vks7mMu_m(56aN~#GWsS4VXEFr$F@3mCkN#R20&$88~6pL^~{yIz@kvhhfk_9Pz&(LhCNz8rbm6agrYe?jLybr=jS6>jTyo>suMcM5K3*hbv1@b1$8><(-hdUQr zq2qZduG7+ikiN%|yqcbmb}UBUSE8)YV`tbuOB9Z8Ooou2PTo^N-SmdwdAQ)_Kj!)# zaaP03l3D$M55q~ytne;Je0{J6-X-1P0+;<`8U#GyxRVB6Uam#z3!Y%Oy$;)YnL~lD z8eZA#WPI{Uk$tvS0hJ*R3_f&lCV{{3Z}J4=_obOwmmtOA1E#g?*%&fg-LODkjR`Hi6p5FEbE(!+We1SC1qxk|}-YP_la?;U9l`?|I zYx!!Tsqjqk6BDc;$J#c2pXZF?%9%KOkrJGKM?1AN zgvmUkC16Bz%W^&1#Ibh_B(4dNX2*k$;h3xzal5C(dvN(BJlGTpxo@9fMb8|3uwIBdfUPmiH3(aFb%E8DL~d@| z82T=4fPY7mx!S-;n4obF?cx;4UcIL{rLqBH+Y^xacbo6&tWDa@eIabAC;XhGz?Os> za4zpfVBUiFyziIPNpolhIB(VgkFPeUn=Q!hU$7fgr+>$DL4Npn_$nM*c!V?BTLI4J zX@BXhWB6%~6w3eIj_Lk#?BD;MVrc*$-DXOXgoFxQqO*+C2&=-*)k19H&cEpJT!NgX z3}b1YKbjw>eVgi!alh4EEPPGy>uNq`e-|bhbKWz%DBtz=j|7Rk8xHTxX^-{&2$(k0 z9~+PLfzc~7s8*q!2rgpe!H))hG*^nwiR$E7tOUsse$ITb8cz#8+lkwW4p&q#A0O{9 zXWsuV0hjjz#CYi^IR3N=1kep1eUHbN^x0o^#}g%O@8d<|Ql|6BYG}0!K)a4z@L*OK zI^NPIPX)}dbWSL~T3N#Ve4W4y*A(E3k9wSz>lbdHd^QALbApG4!p3*sXX3(-s^EK> zo-LdVxQh2a;JS@^ww`X|-mVaZ2WvNCpQ$svQ5B>)e>E}~O1oE1_+!;`N#Zx{816CF zAblEgBxTVBM(JKR98pzdXI<@Oe)>v~M_ry+zUc`_tM{WsygHk*aS2|X5Cx%u8u+L8 zH>A1mWR}Ut!O;q5D3bMo#TgpJf+xiuP?zB}jm~1A$S1Hc8HR3+>3p@%p|E0?2|B`U zIPyrFd|KNF9uw<8VxJ^A8o(8vMeIv4X6T(g=M1Tgk0DDMZd@ zi?QMKJQ1xo&e&~y1|5w(+=HJ{lyk0PUKC!%Yk%u_ql0zEyXB^WlC3!Wrk}NANP=d* zj&UD{2ElsyKl$)Mn4zV3~MxF+N!?f+Uzw=v{xr@@R&>ivF&<_B^8QWBt&{u&~74CBzp z+bCF+h7IPQvF?Tt*|$KFy&S(4gg*+fZh;$d*##GTE~vo0t}#KJPcK?Bi>G{fB$2 zKUtPeWAlbm9$!RhQLVC6-kK|h=@drUR7Tx;wi)t9NSBq7o(t3fyhs(hv@1|_?=vibFZqCV=u(W6vIiZHx~oi zK5SO}b_Qrpk4Eh)lR;l|JdPUh0moG;^zeCIde>}#lXO?5yPo#K9gTh5*GIN|*uZmK zaaw`KUFrZo#=BZp7ma()DZ_@k&!9DZJNIPU7QE=X95#yx@%>CKLPqP;yagKYb-FCs zv#=eyMGZK~MaRK{aY9Zzya2tU2f#l{n%2~Ph4`&X-18X@n7u-u)R{>UpB8_#&D6t1 z6@BAX;u zy)&@{U6KFb-`FU8GH3uEiP@IAooI< z=6)?k@OD;43Kvjo(dyP`}yV*zM4!p;MZ{Lch`thPA-hlU>E zmp~bQrb#4x&{Lw3I__|g<;MyNzw;B9yy0!;rC?eB3a4A80~2rpS;rbPcY=9in_k>} zT9I^Ky#cmNCU@vnG$y7@fX`2Yc;nn5T;%!&^gj0C?9t{ZV!Rip2tNq(l&!gUt^Kfw zN|N*!;kaSxJ``JPg{KbZ@4;jM+D=-1v|}IR805x1m8aGbdSvVVdst(> zkg<|>;<%$9pg`@ppy^*E(zRFk^o8u*p)`y;8M%C)8S_-1{eibjHshdaJWOVp_x8pt z-l8BMzZLEPlj~wMu`&%7^-Iwyk=fkW<;=08p-yg}m7>D;$DP`lKqQA0;uK+kHD6PZ+&Iw-;*g;iD|MnW{!oj#UA56elZL zAEIh}8gsq+q1@Fp=t_G4@2|4mxfjc7UYrYl(V}!|Z!(l?pTvJ^5`1WP0sM-UB>5`t z{MfB`b7Z7{!8+GU$g{hRIy1h)w>f5L|JDuDm8zlL?lN8%RUut}d@!m_BhoW=U zjQgK~v#Pbos0Xa$DW^(Jg2v#=(ZM+KR0d})r9+wLmgCG{;@KH*p`2qm*JNFiI9eQ+ z|4^l=1FGb~@eJNMmE}wsel)f`e2-YOM^Dhcs%Z-Bk>+1Yg z=1vIn+mD0k@4&q&3NC)K#U-lDkFZJ>4@CEJhoJV(Qv!_ge^_-5(dC_$In>X3EzKX6>uPROk9gM;ijNZ)sJ#m@2I z-zns8JSc#i9|2hF?3?pa+7)WnxME{6^N%Qez-K$6c#TuFAhoO?RDuz;pScQKUoGU- zM`@9b-!I^w?gW91N~NO$ewl-;7dH?o;~91rrNz7$oX zc5sUVAMh3*BuUFEQL=8L4)H&@P^dEfAAa5w11fG_IPJI}tRe;^VY@4a84q(G=kCMP zsf(d#dp1=27NFF9L%yIs1`4+_X3=kb8gtJm=e(0Vog?!Vre4m&r6V2r1I6XwGR_nH z?>@q`iVp7S50Cx~tx6vq04t}oo!X=u&xsl7>3hxKa!WS3PaePBCj!SlBcU5I7SSJF>Dqr~UsW-tl zzW_1oJSRHO6)ohhVwih0Bt5oLs*WWfo=-AMqXz-xQ(zb~!X_z6*wn0#W?CJ+Hy;+sYdrfybi| zaM+xNf4mH+Z(K1f^%jLO(a*t0{})_kz4EE5NicQlOm4y-X=)p|4sJQ=Qh`bYn~UDW zUHetZmaJw>TXI6sSE5Ndx)v8~c?ZMH&w5ff3j((-gJQE>ytykBwoedie3rh0zN5FzcDQ5q2Rz~U_0URTs<;j3d?Ftl_r;WDbY(|qBN_s0J;S8 z_$6v%F|b?!2aXg#d8sx&vC2o;_AC7TrPZ)NrwDW1@`N9*N%4!CSyn+9!gud-hF&&T zFPt_8Gc^3Udw~-4l-fMF{wD;-Zq621RjH$)Yau%Oy7PAH8#%*|(V)4u8=9j&!EkdV zcq@`;C zR@jJHat-kOW!{{x10p2LU?yfveZwbzzKEkWDtUvE99Gr+J z&Ai~wt84JVKt!NxAPu))NRaZApZL5zu6WA{x%*q$E~U~0`UN6%dqF9@e#w~Do;h$r zLjf5*jts1M3n76e5X*d&W1PIv!z2^#sej@orfAV@4=`&R*vohkr_KtWNneMN_iyp*CP%;*_E~szt{sz9 zJ&^AngoIZgIaort(T_dwW5+E{cDpWV$#}=v+~e_N`Zp|_=M8?R@4?Zyc8qIG;J72(a zrTLx{(lpkNdFI%rvc%jUms&i=_k-=2(ee;I?Jsg8T5o|Dbp)5mUqH?eabEEh=1_1& z*QqX0{_P*OHLKCQ!+<&~bcoaA4vafqhi;xJ=#(sk?Q?bM^83p4@LF~1)$#}TSq;L` zkaMuQj=7IpciZAn z%2Tv7?ZLf!^Kd{e8`=y#Am;H+%f{oUa8;ZiW3UE*+WF5A&Ae!~c|G`GsRryx$-@ml z9CB3Zr^AT%^2Dt73BH%{W9}&xc&eU*c|UYup=>=~@DL?A6^3|%dcvDBQChQAoyJ(^ zquSa4OjzuVD}Jc*_J<$B2p4Ty#{r+VMib?-9^(`JA?`qxJ*TN10mmnwgd+zNc)4i@ z5i?$(b!{RSH(3KSvL67kIE;#;lqh}5Itm}R3(V~5!LarcM9p-Dg;7E@*9^w@e#v;% zr5A5}qn2>L0#o8gg5I_P)a}&bPhV&e^4ry5Z&se=q^l)3Y~6&3a&6#m^$F)z%G2I( zRT|g(4b4tn=en-}sCuyZ%WH~ue@{TchD3P$aQ_^$LrPrK*i4K(bCE9_VZu+1zYdzG zF9SLZf!SvT`uD?6jL&F=x%0%S(hLpqv(S$F5H-jxV4aK{(v49L?wqJz7tGUPT%D0- z*n4>%jy!V;?!HE#J6YB*`Vvf9rB0V@P@};u@mSyekMFwtgx^`pvY(S9F~og6?tQ09 zJN6Bu?1nhBWLfinnmzb4y8uSLZ-?x!5%7FU4TMR50W+`N5ZPA3&z&Ge-O}{H*wK!0 z3}(RUYB`eX$MgNlzs-fVzenoNa%L!}f|)s`49bXI!-}67H5Z z4R`rHQwLz$_1BQEUX82Y1%cStTX3~a0i{uyx+J`VSIb}FuB%d*e=H}*|EDDB(jMUM z?8+8auWsdiq(7o1`|ho8SO-h4ncySYr-B@ZAc68-ci4Kz06#kY2N?xC9^G>vx|$hh zyYme?mqtO7xGuTHGFGY6K7+~3O!#kxF~+a_3Zid{`TJHi_;_zD7THzei0_FYxSD|$ zOAJWvQcvie@m}!0<_rI6;vkF)uH#4MPs9mpOmKgmJRTLeqT_;%;P>Sl{u|DN{Q0AB zyW~c&jM66;Emg^a1asWNI#_#4W$26v(-Bw3qt-u9Fg3En<(pFQ0{EiDqXIZ+;)T7> z*}L%d2j-@&#PJ_$h4FEfu-hUB!*c#Z!L$ag~W*<&!;rT_! z|AArB8=R5fgL^)w!Sp7^{2y13v*)jddVeKiJ|Y#&IfMpQYTKfQGBaeCq}Ig z!S9Z~AhJn=&P&#ybzfEK5^G)ha^WSISk}ilm!>#8nsrz0E3jC)8)GjCz%NjgOgGV_ zUAgD^e|!A+QwmDNXzv@mbR`tkcMYNUe>>sQjv`zbJcplLp+R0HG4{?LNwO~753(b4 zsOitopxSj7+cdwz&&mPJGm7L(wok);pANxzyZi95UkXd+e*qhZn~<8;%FaNQu)iRd z`@zoYHxGotqDAHj=%EJr{YD>56*OqqNE;Mb ztm9-g(|D;3vtXt{Ab(v)m&ENB!R3pS;PA;&@V&eNoYxES<*gtvPJD-3K#ABi7o&N( zGIut^6D1c8V8>Kldi$L&(WCy*(eVQr4<8=Uk=P6Rh-C+n6$O`8w z^yltiw4^V0CB~dP7^O)c)_UQtL*bYoKNIJS(<6T22chnA0`#5MA{NZ29$sU_X_KE= z+FZ=pEcJz3%QkT8o=qTbQ3djn&#_}$F)!CK2zLGY>zFztmfjrFGQ^ip1i|N54d6Q8yc2$=KL$XhsEdaa#zK_Ym^I6{GxCdsQTy6HQIbUtkU{c0ZESVB3 zSjU)VXHQylf8t;9Ez|bE)Vb|=>Zg2;q^dUU*uvfgU(%75T*iaH^5N>dY)*Z_b5PwIL4t;qEUpvL+{cS&w_KoA8~l&R8*|`214<8R2}Su@nRnZqpVVK zce57?N_6qTH-F(d!4D`toq^{(&+$qZR>1EUmvM^58NuGfe=s^!7fXIT5C%KBgYeBu z=)M!o1v=bBi9O6kcvl}g=h|>ROBLzWcttvG&j9*wbO9rOw*Oh^3O%c8SazY6OAk~h zUN@I>yZoieuj(nn+a-nk3WsXuy4U3j95X=o>QWG?D1n0UDzt3VOF-q8-%rwByB*9A|j*19*(Bz^O&q z=$B`U9kQixT%wCpVw@WJS*pY$xeD$d49$5Mehu#VufhH~x%lqLH?Ueh1g`9C?LT!K zXnkx2JGBCoSU`O7#uOh7i|;j@!8U3h&`o9J}pk*uQY0qpfxr4 zJXM;E`l?Sp9$pC-9PdFwwj+Lceh&p`S=fT*r|1oxZsXX%K3dF*7C45YxkW{^!t5Ug>qhIzzS$kCsjHF)l^ALLyR!vKRXc<5>ku8h+n_M%@n z&mHc9-VKcL_4zL>o2o?eo5bk4%Zg;`I0N#!{R#hSzBD~Q+{b+zF2?irW4U+E75w0t zaDG|C1#FVjr#&MFxioGZ%4oZwt?qdkC$3HVEMMd9nR8fAwE`vvDU-oURj8jI0+B_J z@s(34E@rHmCoJE-i9PeKtN*~K={M1yc`$Q*as`%>3be!O447nX!zJRKn4{wX#*Dc= zrC%TIhjhUqxgVmZw6olDA4ohCp%sj&?rfNX@%xqFRhbygD{n>>A9=EqQ85-}bmJti zvoMvHC0eJ9EJs$AV*M*0kk2>>250ohLH0A=b9x3HhrId5ff1bN$@N_Iu|{5ELMr;s#|q_h?J?!OKAF3qoOuJ77h~LgST#?NIJ{`%h8lCZuA4F~-YhWY4%X~MWuZJ7xg?73x-W^X4$Jw- zXibQT3`gla2^!jS7eC8eaygZfv~uPUu1T-rJWi%#;g~VlTzlVNJb!*M|Hw{+%U|XSO^bsq-{{}L zn2(!4bG`!q{zfCp_-Ep|`MPw+f9mw>Cm*5Sm{7PAD^30g_2}MUA%Cn*n?Alf5&sn? z3Elmeqe)Df?a@y#$AvcMJ>@m%t@;JCr!oihNkt;*u0XU4x8t3qjK?2t1MG-rM7u={@jh+6UZnVvMkGs}_!J&4UzOC8AR?1iN#c zgpNE81VDd;m!25z~n4;rH%;v0ES z=CLsZKgpAvZDtfCK1_pG0i#e~u>&HXC}YN{NY?Gj;J>V0gm-@xJmoq19H`|h*T_JeuPKrS zU6{}EV!rGhw^ZXdH@8WRR&F}Q_b`WmO>H;Ke%K19zR%?QtfWcOg9z+AzZ0H3%!XA? z_HfLq0{#u_6Y;|%;XwULsE%%7I|Sx=VlMQxaz_R3EEl^gN0sO^XTwSfd$_~q;4b4{ zL&>WIemmP&Txj{jtt{_G`TEl^=xEJ9Y1<8#P1a*f3u7ZZ(IdweNkH0dZ{c~C*-;YL zpqZunxJ%}u#4Py+Ue#M>RbXR%1BQP)ay<|7 zp?$9#470vg$(alA{a7WweO)CuHnAP7H1*|p7*(e zO?Bnmtk`BqasP!Ie5S+5TMyan+73UR&E(BzDv^H`ci9X)88+Ddz)9c7b0Oy!y>a8?H7ONAwHMul**yqGud+88XgYj@s|hBdL~s6CZy)5|jmE7fBSFgO7B-L9#+}aZxl8U!q)X%jcRIg`Gk+TeRaRGc zo0Vm-cW5(2Z@hpe{aJXq`W4jN+6;MJ*|^ov0E{vQalV2S?Yt}m4R0~J`pz-fy{gyt{54S53l;Yu=H>+>b&a!tso(8-uOwNI7xt2#%7I^jAA#zHLEiGgIHeoRQMBK&+1s2@s{4o5C=? zXDP1w5D8ArpK;1HmMhvj4j;8W01bP8NHlo~d!iUuMK=h}jn)Ut880Dvwhp9RyTY3f zhH+N&^+8=(h6EjxCF%1;V4M41_<3X(MDzr3pMJfDSaI>3d^J}vvRTc~b!MGV9d$BX zbqHT86!Q*pY0&#(5T>wQ=f#H0T;j=|2bYyrd;=8X-jw z^UQyBHIe5hyb*5F3II>5TK-B+HXM?j#mCK##cjLr5b|#4U!XK0J!B4+*pmFUb=)GHv4=OWYTa^kQer_vs2W^G4 zY-O63(}>b;invL(k9mW|Vclal+&`>LTB5GP9QQbGgwjsVFrJ#ngALwl(B-dUEqnyDhINAN|lH&;~Tt6X@(KgzGKEC1=6aMjb44< zxUE(j!1=2Zov(QvwQegBzc`jFOJ52Ho=X$+@4+yUTaKMk>}QFWK%H{Hw_O_a&4n7U zWZByZ;kUu~jVK)beHC(6N5B*2AKi5>flr7Mhh^jIp+o)&G(4<<9|cR%U84~9HW%Tn zR2fL=u;sZ$17QC827H*`i&MZ2TkBgP(V2BNH~qyg!IIz=7sMUEJd5AX90T%hH$XgT zEXzAQfv+ulkU3#_r{(kEmzJWi+S(XDF=pg-3o$zBWej{=t4(Y2HK@d)*Zl7fQD`C1 zh788$JeS%H-jWiqYr_iO$|*>2`Oh~TNR`ATMunKZc?+*~YzSs6Wa4mhA4)jOLe1`* z*?L(qID9pkm+^Ap)ZgpTU$?GWet0ne#~2Ux;jIhYb_pG_t*#b!N|s{pED;<7^Lf#Z z5-1gOM3?3heB`xKe#?L#gqcfo$=-VO!sT#&M$c22?XCzDb;OA}^Y3iiG|YR8pGN87 zkKE5pMbbY}imco&L37)5X@OBNy3GlOBM-tLZ~aqLuZT%|^%m@lF0rV0s^(I)lRzwr)=A8~g}HtI#$^RPgSm`#$$GXombYb@jTsOIC` zQ!!BQ7zeg~bA(MkUqt-iWv4z>R_FQLU%r-#DMvYt z{|=z|%vg-=c!KvLnZISU2yN}qBW~lEyVz8e?rv`3&Qxc>lm~7?>-aa2@>CS_jN@@p z{Cdv&(RD}&l7WBw#=zW@Yz{o_HAIf1knQ)2Ph?quGGlT2&&CfM(nn+5*%WX(`fvp+zU7H;7HS&Ag8B;?ip&q{a^nwV&AEXub034~dv&h!<9=-3c@pNPMWRdJ zI8G&2mTNw`4lXk`_4XUiuvkTt%7rz8;D$79io1p-9&Fd5r9!k<Qhi7$niL02?bhym)5}1{VYvx$TrAqN-Ie3D_t0WgiWh9p;rxbJ$Q?TbH+H0f;Ke^a zaCtnwSZ$5A(}A~`a~jqM6@qP>61{9_j@E{^z+r?wE%#?0fgaYqo;FvY6~w`x=)3$z z(5L=+UHE8P4Zk6^SSUMA7N&idfE$-=pz2jNZ<_TRBXd5%38R_t;DaR|$u#4iTImp* zB|6wK^9hvb+{B-M8wDp{5y+jdMEjha@I&=$JbI`A-`}ps%*(oTTA>dgU>yu@_cTeH ziw-^T{SGQ0)PdFr3(Wr3!sjgZLOVkR`uHmQtZ029`}E5UO)8S;B)eC$FYLC{{KPNpyF0YVnzi}HIoCbJpd5J_^Pvk-#n zW8s(QE52@uBK|B3KuEsP-GH|ru(GO+)RwxIJoD$GfYNlnF>&z!OmWijEgy_ z0?(f{%-NmM1BZ4+K#}nTs7_A6Gif5icZTZ3D#HcFv%bU>^^N$=#RSeiZqD&~uL@p^ ziotYZH$2(80uPl`VQ69*^x4k=*Uo%SuOSd6Rgc1}$V~99Qib`|SJ1Pg76WgTfYq5y zZU^(tt{qtcOHDVT(yBE4laBGejIUUv`~^f=?(^?Ub<#f84W}GA zi&mv6sKMCLJ@MCA249VsGmlMB!UeEU2!q$2iJ*M2jDNUOhc+LYC{%DCLXE}eaN+7Z zeC3;caPWg7nZvRP>s+E>%D(~ZDUhN0?@PI5rV4aOrxQ~GB+1W-YSh#0Ei6dX;3OWc z;7&KF@(Id5@M8B5^ji5BJ2W<9p=ur9Fkg`ze%i^`iCpLZyDg4M!b|+gcbgzp`35RQ z)q%eI6WqR89}Mn9VDyZ#9CbZ$>YJ4$bT|>fuW$*3J;#*kp0$qnbi^aD`X-6K!>ZIa zi}eZ?)nl}|9}dxT$VH~0()|)%p{fVRUXdhPt*`KQ`U%%A7SG11~?e0gSN{r!;0D`SQGk?3*Ohy zTW{8&3-)bHna>q z#G&Q#G||KxLwB~Yz3oNV77~hXte0wY>@Y;PjRQ~TR$j+N2z!$vAT*{CWQSBiOZ@=0 z?X9%*j`{&w2i=)t;2b>G%to|ld*vxH+}f;u)W0VMk|nFa#q2ltjpY~&^gi-Gy}RLM z<8@r2CPDPa77M>WE8@g{$8vdDBXCacHC%LAhCh0+iTfK>Y#I6I1HQ7K%{wGi;FlnI zy05wwXNwq67nxz4s=JV1S^F3sv`ovo{oNZ?%97B(`U2nZMI06@XM*~bVqD&vjK<6U zBKGRRrDI9_vCAI(e#O3dAd?vQUljP%JWP;jd2;PCD%b>kO;0hWvs3KlDh^wUx*ne!WTXb#kVns`Cs0@Ib*GAL6N&O8SaW_{y`nu^2ZoOVISd zUtDeYN%UJUPKFj5p``@Ev;DH<1)hhyjK5c%uSc3|exv7L1$Wl^2S|DT;#JyZiGHpR zuc-VK?Yr45H18I#_w7CeuP~si@0P$a_m7;PaTjXovW~Fad>G$Ti7hA3ypIpKWyo?wikmcUxj1 z{%!#l_O^h74!|Qv#udKN3{PER!F&8%Zp*lQfGc-kgOv|xu#VpBnj2gsa|n29{6;U~ zB3SlblWyJM1;s5f@LhB)thG1CHS=?Y+tN#!KmI-!9-0my%Q9`D3-L5O1JmV-Bz|s} z&}j{0a}A0RnVro{rS1RX~&~XbUT>lKW5_4gU730I1 z4GJFZQbw(3qQv_CS=?{8my2kwN1ul>B+xUQ%VfW=r>W{_&~rdwa%>-5dd_mZuaDsD zJ#Kv8Jag{jxDe1@{uB?#9*5tKT15Qr9SFNxhcAp;IhEuNZs+JKh?Lk0AO0qRho~CO ziE4)SBU=~V8z#5u$Ugpe|-9kvlgDl&3m8o z(_Gf!jvbFV7x7#{QcMSjfN?JDHm&PK$2qEaSw@^5EEr%lLj* zF{)hCB3rH*;mh|*{9?Pa}mbhQlVQ}=HA?`fggD;8BXp_ zz-OVusI@f-W=QI?^UqyaK3S72NKHWp={jD`y&B`>9r1qfTnry(?w_@a!UmTF}WN5MT)4^-5>ig>pftBZ#Cc;O3tkS&Jq zbY&=A+rT*}`Qd@6U;J>I=PHmnHnA|WQj^pkZ|4n* z+=TL3(hzC*5o1ng;V#BhyXZR)`ll1rUgmCT^0)vdF@DPOzpVn5PyHx2LWO*Feh=wG z<}mBTPp;ql3*Qph!E5*W@@tlV2NjFIXj!q88$YT6CQdTJ=`y-FadIC1-rE9#{cq5i zeb1UQb*SAbHM-WIfcMLM!MX0L=hqtq^G81Z<^9b*!J`dL7#MpA)3k1&xq>~mCHp~9 zVgPg$u+EJ+V{VxUUDiQTF>w$YFkmLdSC0pD9U-5?)|-#b%IPsY{IRj)Tqb zQ^6|F9Xw+D;c1dJURuYpcppOW@sS0B*SikG+Rm?B`1QwlAw?1LU>IvA{$<=US#s!} z8wM-+bI*v*=>u^cY+9s4pybZ1|fKAZ$AG09Se!GBDr_-Erdxk z8=+5KlK0!`i5FX?XorI`cW|8^`X1B4{5WS`^VVdjnGnhsI|*^&>ybEPmXM3sqyaV` z(lKFY4N6FB!k)Ft#ID7cudWHt>6S_1E$b?vY^ggG#NS8zKa{<@JJEB_NiN++8D zALQlXI*14r#=k?Cko7>spW-%K9;41$bK2L!QQ7wauQgqjzGa@qIVXl;;b3RI_M{v6~$&kmU z+C;aOGHeTMZ>9Fg>wZ*q#j}*&1TdV`onT*qUE<>2| za1duKR;LZ$p1=jWH!xuen+ND@#!n^4O}zC9EBpw*<*qty`PKp9%wMO!k>&9$9k5=f zl4Y}0=(h)Qbf?A>h?DMy#cSQUrgN_NwCDk}jtk{y#~ESIpY5Fcq%w$}AkMn$p^(o0 zzVBhy5qXkVhvcXQ6CA%I!Ei8k$Rxxsh6@PAK`x2J~oGJMb=ByUtu(vER zd-at2Z<-FBan=v&9ABeWZ6;jTgq)+@m9XrN6)&UV$Zlfk=x2Nv!Yenz&Da>2YZc6y z_TPZN=cnTATv=*3c@xauz7~Jp&>(M@<>_~&KpEq7j~E;P)7j7A zM0y|OhRD+93v$qEunLw&)e4(#8u67i!Kk(OA#=gZg%b}~!eG`_4rUeMoFfM0CA}@Y z+s5+xmIYwB{2`j`|0Mj*-h+b~(gbTSaVs_i!_{kuUs@tL$EY_Lu5k?hDlw*-sw7?3 z;l*|%j<~KQ3!j*N#q#byIHFq>B2K(VC-09!538}v_ZAJV)s=$vYx}qWYqp18$@`fbwV|WtBTUg78ZP!{17Z=zR^n6Hr#rx z84NCHz?utjyzlL6u+j1*)*sTw&{^Ja(qIuRx|MT>)@EzyfPh z+p*wM40Mg$4Nc2?g&zJsoa$~h95OXSxr9OPlQzv8`({b3l^bK06y{hxqUl zSjH`K7VDh6k{~VC~FwjG3fK zC!Nnn&r=k&;$^5S%NQ94O@+k^a^S9u2z{rfLjARl34Ux*pxYZ|xxI#I5ZoY*GkxC* z-7PnBGRFc?>WvoB^H8L#DwCmc;|4T;>&dqy6~TcJZA@Vt@A;dQiM_rI#_~bNu-<4uj^|wk|AkigSH=|6W^K!~wkw56 z-%7d0{Vze*)*J44TmhNH1~7qQ2pw#MJ>?PDt_qg_9V>$+b6deekpuOEzaW+6XA9&- z$XvTXux{_-uJ+%=a|Tc0ZD+J#NdgCI*)RAyy*5n!p-X!^ronc<%iJ61VK|znfzmb$05LZi_ZzNML)yx&GNiyP$T{sU5HoJ z8S`S9EU_ulp|$~f=*4ykYd7s@Y}|B^xn2lY%5p)bwjJVAbg&^!l^n8bgSe@hcxT~0 zUc5AnS8N?b=fH9}VbX*;Ay3hIqbdZ6EW+tgvShkiBWz!d2$R%lNwBM+F0BFwf}RNl zvD>&dmaWSf)Fyr2Qsg?70J*WIy!p#4aFqUl>EUT`^aDHZ&GAHoecE(RlLif5xDP+R zNrn;32fK4QS{EM zgA>pDQB(r<-BkM<)>ZtHuYO)hBN>BhY?{ zDg?e$B5p;=cumHJ+o<}0FJ!*a;y!6E^E~Utj};}pXPvQ4a2Hw*Dx-aKHq15Z!q?T6 zX!JlC68x`0>Te}FT9@%%bH%vwhwkv$UYksp(B~d;PeCEj0e@x{fl-nw>FspH%*lH6 ztFZ|EH&TwcZoG>FXDab%(Idf;UDw$?t_c@THiwa#E?6<-57LcMV3|^Y27PVtr%(i| zFTIC@t|71??+x@OGS`S)1X`>%#nOd}bYHCyF0KsXue=0k2wej$yS58|E|eyVM+d>W zm0yIr1P1ILznT9pSdyGQUX6KyvrtX`3P>9JV9g6}xH3Zz!i_~p;q8%VVY`J3Tu}~x zKbAmVrHSxy*%ihBWAm(?h0G(n2y|+-=0F5HJ%jUX<;jxdFK|?{jQ3P7w;U^T1--puZ8_IJ3Q1GZp%LWN9w$D^Q3 z0Oxl%0(of*gThw9y3Iuz(8qPe{razkWD7)pO zsl^SjANvNbTmRyXKVCuGr;+&G%Y>U}&@SvBWe)<~_m(=T)rf71aE3A#hqfZg|Lp)L z!B?yCmd%|m#cKmmv6U?XxmG$Yw9oC+(S>8?Xm36_r3lBiu zN-fB}+yM@w{6H&NjTAJm#Gh<#y>z1_y4}U;JG??>RSwYFm7Zg;}r?ymSVq$ zG&k07fNvK!1jlcp(50h-tuM;)aPSLm(wuK7Gf$N!^E&*!t>tKz6ayLToi(q9IS-~Z zU|!)T)E%Qv3p&K$<@pSZTmZsa6J@%}6Cu>{G>~K2DB@8B<6?etckH_bkwMX*CK&{> zK0f@moN81)YL9nRF2L83_h5Z>BR<+JPqv*2=VqS^<$ELcK(e+d`4A^TQVUL?M0*E1 zl)Gbh&kQX1qeEe{8WFYCWBZtImJ_Q>@W1Czxt#hIe6T|gL+>3&?zCM2`J^xge^w(nWH_!g+IcyS(i2u^#Xl`%cehp znIR&ijOE~Jk^rWyl%N`=IVAm6+C@Zh|d$h;LBzQ4$f?cX|E6ic2CF3tD3O&+G)Y7 z>(cark0=Q%N#d_xd5T*{w?fdq419U|101=Lj?*38p*dU}yN)W;!DlPcR*6D?u7%M5 zd=7Xx9E2@?PaZ~r=(AhDO`ew8)cy0 zP@K9Li4nrsXEKJ2)Mi}4mv;?AL{S&7rzk_H&|>%~1n8X;E=ljhZ9+YANAe?9MV`g6ZhFM}WHXdMv*PT-lTmW+9uNraf3z|Mwws4=0g|J+pb#zm5Xw;QFDMTPew{GIK zetLoB;t=o*7AG$rM8Pxn1*quw78~z!cr{p`i}8&@triVhQKkZ#BWHu{Mpt-xzLk&I zE&&(KS_J~-1a8!8eY6_51iyk5h(hWw$gGus;@flh?b%73KK=YZiq1A5#;%LPBOxh5 z62eFlLefK;voq2|2qQ^ILXw0eJt2%rC8;DyrKVIWVe}=<*%?VvNs=VVNctcdl_be~ z-e3Hz>7M(Xz1LdT!hI)sFd=w^Y1_=L#9-eYTw@pmW-kVrKAxmpP+)(Zr(E!t+RQu# zhZjcS1{iJHd(9R29vdX`Wf2Hl7Lr3|GPvMaEs4K4tl(7iZ@Bk)v}uROe*C#Q7#_;k z;_qERX1etn5}VGBHVy2g0G6AO-SnnZ5R?*PkZBMQ2jI_L=It+#zYlKGZy zkTdEv7&GHGnEcMbYZmpA+@}qAz+(pw`J0?JuXC@i4O|jss;c*yhXih_XN8wb~*IO`dA|6dp#})I(7LfQi=zmSY)ZAAU z7u6ZzxbyzxOn3$O=Y7Ryn2qc&_aBcd zsFj$4S(OiT*09`NVhjA$Ujhr4UV_gPyrDC^47Z;@3=bOA3p%C^Hhn$1Qd0Fb623iZ zfeR-R@Iw3poc8o1>B-1K|A(rkzvn)~-rIi>pFY6{@lkMrb-rz1D#K;Pd?NYM4y)Fb z;?DWgA;&ru23`l#{42Nd=Gi*75B3A+&zeyFn-Hcyf<@aNFqrTQR}5Ex^=n?koR%Oo zo>PNnRllHO)?pC34=(sRd_C@8{|k?rIuZBrSD^L50Hz&RC~zB|2@Op1^Z7EkV2A2C zlm%XaqgEq9v-~OgR%@EZ&$=$zu&kfxj@AUfpApcOZ;2xJ6viE0hi#6};NG3xnEGY3 zX<=w1JTp6uHO0dU{2R-0j`tC=k{6Eg5wF>dF2WgSRZV9q3@J!I7X;cVIwbwUN_?*x zPu}jzg|!)sk-$-h&oy;$>D*hmvY2ts6i48~`+oS3)(^<{D9hIyS^?!HN~V0R0CHwU zE2xaGg2ed>1*3yvNZhbU@?~rwaY-3v`pIRasW+SVYZd;3m{-cC|0rGuvPXkJf|&e> zI*1l}-*ADUGd^(ph_Wdu@Z`U4(2O5py6GsF=7+12s5!^c{m8TYODj*I{n+DpaH*=P zcX&CR3alqxpM0=o<{j93a5cU1L&0>o2ID8Zd!C=Pd4lQjgtuhwpskW$#+mr(I^)Cz zFvf;pjA`RLAqL80iN{J4+#RBMdGygY6YkJ>M&vG zIsAT8(R8hUFD_7FerDA!oc&)XPHWmu4qt4cll5akq4Y9lWu#M;6#|Lt;)!T`_!aKu zFO}HJ6QRQ;7ytXnh5OM_jJ>>xcs0b*4>63Hy<#M6PbkCFTUn>3BZd1TR12nj%>=)x zth2lCGSMqfhP%FuX*J;}Jad0X79Jl|u%>ShsoQ7-8S&Thd+nR3Yw6g6?`|{T z;ILt2==4??bDgq|>tB#@ZK-f0dq}~n;0|~u z%f<|rjr~Vkz2N3>D;VQDpIk|GA*Y(Va8h(9E^*f=@VwQIb8EX8gTDkfe$EFc_mQwa zYaV2Jm(bI{y>J@K;dC@jDmc5R02)6{!lSzSn6u$Fj68P(jKeIU(t91w-ct+L_p6$& z(rbjcNcDmnbjbc7%nxQ6Ov#Avv7KFz6=1dvWTf$9@hntr?$&U$yGc zQ*$ppw?ms3?K8%7mscPyE#HbSW-FRrQe8@0ZMfJl&r4z?QZ?ltSw;<~{WURND5mdM zErDf!+^Mzq1<3yV8yDK^@~qrvNxY0*Nqxdq9C&O=Kk%2p?HH|sD@v?W`hceC)n0A}l&|f%*;# zN6ize1qGh;B%yK{ zLRMf6@d~4uI?xZR)}_LlG!;|F>EjB%N=KP`zi1_P+s2y;xHllhAQsLqy-hNHk1p6W z=`60E`!9UYu_1=V1$6qT8~CqeBgQg~bke0VTxNU~vo`Dl?$fWJ&oX>EvlR-oPdr?5 z$@(bl49+I}Q4dkFA3HRdKYaKg9KX98I4a1e?4aML%x-@~z~k8$#@#~2!ri9qZd5ly8f1uW}LY(QO4mh7S6*CX&CsJ@D^^p{94ITtm@+6XDw0$xAZd zB!TDk4R|s!3@fiRfYPQfurZ+&?>tl}7}I_e3hx(UvGWc5x%m&i{*Z(LF7d>D?-}r% z`=0fx-T}?PV?@DTrC_*QIE0PLWj)$mpnJywtrq5ym%cY~o%UK@+RY`@t=oz!zkkQx z9WU^CbR%BOy#vRF7vkOr=}Dvsd3?*vE0Od#(c7F1rd3Z#jTmd>Jj{Td?Yh4^*GY!8XYS`nabFuN>Kr zo34bzeYFIV9siYP@h`&ZP&*9kRt49s4QS&01Z%6)h);eIsQGOGtx99$x_CiG4i99D z2E)AgH;~sZMs>v;aK9UgoLc6o9I8$9Z;S_7;d{_CEChxq0plxXs3=p#e)(xMHd%}E z}Z*T>$$fyAb>8tsguP)+(1jnwvGoYny2vz0-?FP1lcY6f9WtEjtT6VSameJ2C)i`wLL(DtdL-oa9fG^dQtTuU%bx-0M z55OmSyjG(i^72iPuh>fRW`?2LO!oUfbfONbt>~Fw z3~K9_fq~}$h}l_e^ZNtT+Z2Pt@MDxHPX-qU)=SdEJl=nEptjl^llHDfi`Us;!Jd2J zj?0w0{4iD9kb#z)_hFd9YwGs=FL-}dK%<_Q7^WkqekSc0JBrN|Jr<;C{TkN2Rt%h` z8KiSO_GWWIZ`x>7++BdqnbEX4JrB7TD5zcf zLL)X3$c$_OSH}sMx8yzY$H`DjZ#f#wiXhF*?;E^X3T87HtI(|p4a!`A+kRJ4VV8yc z0#%gn3n#7hm9*(m0m?mY&?!CFv2w^GPZg@P37EdQ`LUSSuUk=iW`K0z* zGxjdNON9UF@(ecFfM`=D&+DiR+i&o3*B!z0XZ^c?sB%O!RfKSKAKS5P_p z8?lZU2idb`AerEYt|rHTW4@dg9Wuo3=V55E5rA`ZO@6Dj3aXUXgYgzsusZPxabD*bmEx{aDbD~`QHPEoVOcIIldJog9$I*bcw{#;_meGG2L)=>H4 z;Uui0idM1tE~;rcSTXNy-9QUUA0OrQuI(ibAJ-DA-rJya={V?Y2mz(Amta>J0ODys zNN2trIKym7-sm&1VwnVb)=SZ}Faz_yS3~N7cPNw=NQ8-HJYDfsa9gni%B?Pdvsn@5 zxq6~*cm|O5USK{jAF579prg{i;2b&^t^WF9=b0p+Ie>9r63|xtB8)kZ4~-+6SoSXf z)>;3+_GQDtxc3KegN*T1OAYuOZvonr_zhE}E1#1;W)Z#!Y$ipLuWq&eA^J1}f4*Oh;G@&EC4=ab}g5TxA$bUjnejikt*@~%(0nqb@dA1sNgqa~{rW99UBAf5W3BueQlsa<>%yuS29 zRM2$f>{sOV>b=1B{eSUzUn7L8_@i=4GYBul^Biuq(WvGw-~B*AlrO(qQ)6?_hj5 z5VPZ7L%*siYBk*j+co7xO77``>M(>OZl>@-3P8( z;lT%Nm~{@yRZCH{p_gZBe~q#D(y(IQZR7_oqi$Zak^jMr`i#E8&RYMU8DQ!8&#&UXP8u{)!9PA+Zi=!6I(H%wIy z#4xvF)N}eq!z{NF(IJ0+AKJsIrGjsrK?pENqA0ogYT`pQJ${8k(MRI@?h zgI)b@Wf*jF3cqD|L`D~?esl=UB;-L7Qk^b;e~5tK(?7GWO!Z%xETwr9(1F#FQfoypoC_j6OHb-BB=WE7|v93j>$R>yu)xo0sAAmc37Uk}DCd6|p zIy})MiOOA|cgX{r`)^`c`WKq{A{W>7+y?2QbYv7Bn0HD9DnT;z`qT;D=k&mM-f*nW zO@^dxJJ9&{eCFZ4$7_n_!n{LIA$4;*xZPh*>Yob0AoMOZP8*H~4@7|XqJtTQmqpoi4#%M0Mv{3Tk;S3_9aM$m7z!ak-m@I57@{ox^u9kv`C0!oOA z!64K>rwVG54uRWj_RfWaAT&QiY=7qx#Tk21_w)yh`rJoqV|{7U%46X9CkLbb62a^2 z2-HkD0P?9lz-O5kZf+vz`U#xxwbx)$iWI^!3`y8d0gc^11P!vs zfX9*!lv|u4YU#g`!+VV(p+4yRCk_XevK}YbB1loW4+599a8R`zd(OvWGs}*ix*~)2 z@iFLX{uhkRx=F4^E2x*rP++4)Y!5#J(Xg3VQg{;_iu;I;56g9)8Gx{}^HJ^HGSZ%F z4N)_%LSVfm24fKRHhrW?`>%rDgay!Rx(J+pbfH_XD)BF=LqC>}Klrx<^8ETpVn;Zv z-Vh9eNtdzc>{C<^asxhZEyMo(M*olb82PLp^Tc*wo46l1%2)2o|L!F6UJ+EzQ-$Ex z{m>uz3UnS-qhPlihHajPV^)S?>&BZX%lsq>`*MIdrj~$uU>bT34MTCd3nVqAqdVR~ zk2ZCbj=hQDCrVL0>l=LD%KRdt7(C9rJ^rPYXsz)Xq?;`z^>e+^xk?5myZ&NE{2a8# z-)v^8qRt&zNy~^ zkMM&rX7T87OGeca?}3HWeF(1ahW<{1oLelP6|VS18x&5OgOfgX_585dN_noEf_?Pva(yMoYq3o$*?b=Ka;g%9K5%F*+F=MIB%g)(;M^FN4Rn ziAd*^0Qb=oP_aM6auP3a>#=v}_cV~{TVtTIWh5$n?W9Gt8@76VMz`zXq~d1*NOP5W z!bh&OH}f63j!uJvJJ_sg=Yl=@7tyIG7uz%Wz>P`3+D$J|wC5}@DcOx_T;ECUuqm)E zITVdojKi?~ENhhY8xF2u-|x+9(fOt|>byM#Zho7fc7hVf>_*cX9^-QLn_{c$2JA`r zfxRQ^Xx_7U+8T0`O5Hc(?5Rn>J-!i#?q+H%Ks55d{>MlO&3M`bcpx(by)p18Zz&uVc>c$ z7|6>|s@RUTOM7wk!VHKlK7p-|R>MGdG)6h4qBJWM{Z3v%@zTk(qd5b~90#cUu#sq+ z^+5fD$xLVahD!eN)L~0F=9Q$NZMY*CA5Ei+YULQDo&`1U7@uaU3~DyUBd4lpiPC#( z6iqyY@&|E*n`($Pp1#0wCNy=?Gr~=!*#& zolHfp#!HFiv2mbRa~KS2-=K8OVo875d$e#)qp_yDVd$qn;Ho_gMe|3a-Mo41`#c`X zXNj@AWj5N5>7v}!2;!Qu0!1t*pkurhRu?}2nYbJRTN-H7z+EUvE=L)6A(h^+v$C~d)r^E(|U$uZm{<=%%aV zeb`BY<18WUy#?9ong)(Rv6#p!M^Qu!v1$Jb2EU9jtb8iSUM~W3ufrgiz;+csHNkuR zJ@nZg1-6UTFfVQZT)xF3NBtJ$NqmtTz6CwKI*~J3ojq53G1vY)aO#fovUi-sNV9M- zuA7AG-v0%!k6)p*%NIGTwwicf3uIbPIv6^|L(@4`Y}j@e3=dy~RLy1Y%w4kvbMQdF-j0?p%|pcfEDyZ4X95~(@zTV{dhfk!C!-2$Yb z4IMstf_S(94Log8K~e$PaSxFDZ=6IhzL2(uEQek-Gqibfo^?|_f%XrVfxjdYMy{<# zf$MnM{yY_;%wLiuk8BX!@-9y9gG9^H_KOQq`E@_>A42uXr(4%i~JP6^`Rv)BCe?i`n5-9p`01u|K?@FdR zTE#HNd37T(uXuypH|A7h(@*qsbb!1e9uVa|7LBhfVaJ2lAZmTVQ~A*jP1DYUn!+`z zHhK}7h;QRW#;oLD>g9Ru_{ip<|B!Dvfi``JV~ocOz$stAva**@d?*1)>p`Hyhd}*s z8x-W$(eT_D@chBPGaL26^l&ri9|hF^E(EdDcvK73pqv)~vYPcE-8ewZyVpg(*WuvDlGu5h3+ zeDGT{9g8cUqR{jgXfq!WU%EsR#lmYk>j|hC$*>?Q3nNZl1AS-#k(llMo==hVvKb`! zP(GGV^G3~xlhC;{0>UI!RGOA5QCaf>dVYza_lGAGZjM1`*K0(vtc&qqAEH9nCG2Rd z!y<=X5L`5Xz$q;6St>=!%fY-sPw679YRqVy0nxL3Ax`@>aE+INS^qg~I;#&;OrGFp z|2Ghou$}dOn1Z>B4OV`>gt^8K!TJ1qYBu#dSWU=8CBwrQl#c8y2k@wIgW$J9AiI8I z!<0r8374Yo+kB9jXGp@g2SJ#{F)(AfcGrVlsPa`2wtkNTs{%H|4_N~yjtyv`y$>rV z_R*%Bk&rqigtlvULpe_s>2@usz*p#h^#z1Av-|Crg+%Lc0l0;oB&kcbvt9RC8nP=C zDtH<%D#W4K?_q^Hg$+Zt3e3_`?Szt7^UKCTS(E^eZZMa zBo3aRp{eu;Bn5wi-s@MOU4I13*1LeRvV6!W{STzyt$2|QfmmWa2Dxdj=$$hfg*iw# zHGM>A90!Fe?0q*}%#%ONAzsS0#6d&M6V|Wd8SpYGM|TZ~6E+Y{mk6xweNMSyZIXub zw?V126|6O9fbdg8zWuu-woe>|oXS**)b9yZH@b$cm4QSwyN-BxJV6V=V6>ey6Y3am zL+$gw#BSOrXtihexAsDIhHBBsjsWPMAO`QML}-}z7406b#n`fYXlrKzirH*;HPQ)_ zwi~0+Q=eyZCJp#EpVPi(F>-w)>Ci%7Olb;5HN~4GdR82ac@>5RBc_nNtv^XqXf!D0 zgi}J<-{H|zNUC7`z$Z?4dv-cRfHRP+^=J@SMg!+wBDHz7q-oF)u=HFAUb&l~`Bw(G z$bVv$XDB-DY=WRrHlt2qK4=-ABqjvG?T`#C3T}k<(`sO+VuKu&ViV2+<$QrwGO;-l zfla~9P;Pq?jThWQnaVRdyQ>4je$-OImMUm}5C~3Bd%^M)#nR>=G^m_Js~h`KZX}^z zLsmdW*}v$PHiJ4`tV6ANY`#^y3r!o^ShNR;^9_5ZNqj+R#tUApC;=n?bc5ToBqC}L zq@N|%Fk9}8i^hM)`snp&ZqWhl7cwE6d28CY^g+SmAZ+rhz>(|9Aj*FemMlPQ+VmSD z2bG{xU@$oRlf)Zf_nqMHZm9T=9K;bGRC;9+xC^qNQgbSrs}IKT5ucD-}=AEzMs;e}pS{-EDr1;WEFWF9{dgmeq~&1e4Ar83C7 z%*U|UUzn~M!uGypr+hU3lp1>)3HBa ze7_Q-cBIgdaqXaGJ^+f_#$cdl0M?E_1jIB2IlErwAAd@*sB<9{IU8bJRwk4hBh!SE znC`-(f%!=^b=f`IEhs~W>}43ne9fMF*$({Y78F0@BR6so+A6Jr700fE!Sc&!#e62! z3)z_;=tzrvqd}dWbs?UYkh3B{!o9=3Q}?=QQvWP)xNr;Q+uW&Cu^I!~57|>nI*LlqT}h*UI43Jf;gAp8_f@(=872rixvQQ2Wyrw7Qdp z`VX!l_g6a!)K^65`bpTV_Y#T@nWKu41M1$%N84fESe17H#DjNHnSD0331xcMP$3n1 z`csEuRhr~qhk+-$&_Y2$DR4}oI^(;L>(VaHZ z*nExQ`QKpchc;+Ea*L$q=Y!k#$&l)`9j06o0YB)aM8migmDatdD%u0+SFjUY2ZlnD zi4hu{UqJchFR}a0GvG@5CH0R&&@1aJ=AC{-IK|;6MIGBw>AH-%y|tu5X}bxHorkA} zFu&`Ug(wWY$Frgzk+bCr?cI3{6{BWg(>O60*IXo=e_p}FVXPlFFBaNw>;ZFi7wld4 zneIwx!^WHxG-#0`SN$@vR!sxH6a^5CY@|xj0wAji^4Ff{1*W|sBRAZD*0sh^OOru3 zQcEH>nM&&eOrdWqJ2Rx8z-zJtx(z8MDtE4f?9L3raoPfvx|0ak`~d4VNhU)zUSqCi z9Gr@8Mu*_L#Qc^Y^y{djE4_m?-tZ*ItLz1y_$|I09rlQcgmwgUBP+AiO z0`s3#V|E*=qg{{4TXdJf`Jz74Z($c@gqP>-Mtjqy}5z#dG$xXJePe!$st}w=_ z3^+?eB(>QKK#kW9{1<_ws*IhXgS~jhqc)@F*&@v74nRLq1Q@7Ep?PB{hK#-rmS?|$ z(&wFMJN*u6y7CW_#vNF4;4W#L7>$PZ-?7)J1qJd^$X&;@$L}90N8MFoZdQdV^^WN1 zZ;K9RIzcut4D;r;Q=yJOQD^Tzhnszr^BzfF^iTG4I8k|d5=f8dp~sdG&@9Pg%qIgrZSY#bsLOWmIrW0GGmZ zQ2E@Oaz8%6frR_u(ZKu|p|+&H@+_+C?tr1?F<|`t674^|4*05kGUm}8=-F`wLK*{r ze@cTy9iI!rX`9>VJw5%LX_uSLw${XD1O`xhKfuVbZ*6vxvwF6awwW#{(*i! z`=E;T^JtoU1x{zNiAU%ds2y{LHk4F=bI&`>zazr__D~f1CQ1sXKL?m< z+@b4A(D>&Vh-OSz5$9jh%g-WtWrsk`b13m_`hv~l*co@DiSpC;Qtj*Cv4UmAylzO) z<9Hhj|IP+!hrYz(*AGmMRD|Gvvaz)LCe$rRL#;Ix(7wSGR*(1q1Fb*Mu=)vFMYMv* zeKt7jje;nvDCS4m!}OAAJn8<2V4@R?^)bURSM3?)(YC~LTD`X!SaW@ zvHj~WSQIS+zqz*{m>b3Xp1(lW6;Akjzfp_T9O$gC#^w>9nJ)j0q|g5gqWB+@qI9;) zI`su^yHM=+uR!70Oo`t8za)RhHRO-H*Uwjl$Ald_&@X1@pLfp-}7EEp$tCBwjJUlg52 z$qLVJs5$;TZ7<#m!p;x*dC77V#>LQGS_olEom3v$LzK?f(}?fGAyzLGr2lBsR9JgTLv5(H6EZxIGGuI8~rua{xP*T|+O6 zD#AVJk42Z3AxGmb2#$Xx=8j>^>ySm=j6R{P`W&2!xeVgwt8l8n0iteJQBm(AYFzr2 zbdMIGl=TxkPwfTrx|Yorh14NN04Wc?p#4(zxkj&thDj`6ucb_DU*E-g7YR5F{wDEC z>LjkOOQFc{2-d$EhxHr;mz(dvdQlT_iatt0s=Cl_vkc3{DKIi36Jw1hpnSwv&}_U< z`mQHnnKB8MS)n}EU!U7kgJ49IjeNXXd$82P6OOeV0i?dU!*HOfTYgGJbS z{4H9V7^2-JBM@evh2XP}7@Wp5z=<=kXRRN$Vl>J9;)@=yHbKblJaBuIhqj|kvGX8; zx}q^~c61Qa((h0i{s|o#ydY7Zc_qHiquz&Ykw0%E_$g_CTUP~D>WPQwayCOfUIR_N zCgAm?g9@*%msBo}XFT~^D6I^oW;et8;E3#`NR$H8d6!v}pbUt!djermle04qy)MBQf!_|4HlZhj>-@(Y6S@z0UY zh(cMz8t6OBI=o{=xI%mZ#j_iUehg#bc*R1}?jE#`u?Lk~rNCFOkTm>%zRg;P>z>p= ztjl!}PWhMS53a@3-)twLs7_<`CqTKc2=!wTYoBj}yoNt0+p!HAxv7|H<3qhJi(H7BtcUdF zXAIN*f^PL8RPO%`!`@f19dj8(Xk3QIv=3mg{|zc;?FVV$I*H~%AFvsA0rMJ$pe6YT zdG}-nKz@$PSEau12x-YRKN5gW`?4pz|aS)N>ZXm`t|2oa-#J=FFVhsB2 z+0J|t9Yn3;U*ITR0xi!LuwUMSQ=xQZ|vj=<@w+xc?)AUooD;+4s5^L zkIuuVfZhE|D4Lc@ODET$^T-M+eCP^s%^`5=pC+bbXkf<41!&zp7&y8cB#AZwFope` zUVD0JVw4o3r!x=XgkM1B5^`{yMjtixyB&q8P~PJ`-SnP~fX z54u-HLBre%kjG`<@oix!dSXEoqgzo;aGEFv?nX|zqRFXK#mFz&M~cQAKu$wje*4~s z&~N(%S5NK$KQ~LzI>BR%=_IT*ZKdhk-l9`z6{^KAC$v`+!!9a9(I*4cQGbC^q8n7b zmIC+56Dp{41iNGR!OQ(R9Z0_kd7YWaKldMX+;$SU-IJMKyb*Fq5PGdnCI+FKNs`fi zrh&)PsK!6o`jh>C%v-emd;)IgJibVAY7Z#?%wFVL$t6y(|CQ2L!Q(H1O%z^Zmqax)AP*9Bl^R}h-# zt%B-=`C!4DgOzWrSmw43Et39&*}_=N^Y_QCU7x`C+6AhUsKh*awWNIMHk8(7(1FYj z44)@Ot9#*CszXusPatrXT;uU|D~YJMUcx`*f(E`ZpeJv~A{|4F=oy7>>%&neD;~IO zJHbEeEv71*C9|JBVj7x`#AaqU7P-5C#w6A!x8((f$HlT-_&PH3UITC*Rp*z~&qQg+ zH>_5_%bv%2s^k$#V}ll<&?OlKD>c#K?Jz13G9OTLA$9)w7QN1#C2Fs$9=aa6g8gxaf#DVyCo~)~2C<(>O@sI?>I8!e zJi^f)pWl9UHV6ifM(;txP;hrKXfJ%s^y0IUyt1dnKt~EmPTrt4XbKQbraO1#gZZ@6 zI5Dyj%PI47eDxsJ_f|v5P+u@It_PLS^^os;37myru_W9dSgLsD9;#_2Yt}`ocW)9QhTzXC23=`LiJMbR&qr9w6Znuh34( z6@rEqVBnC?7|Hr~ILDJs`pXwXT?gyP`j_qdQkb{M`v~^FcLuL>$)M1YiJH-J)U0k` zoh2+^wdn}BCR{_UJy$^g34qe@n<(%Jz}pkzP@^RjoKI>2cbg58&7(jR_nwzJH<@_t z{!TSJZji81E@)U(1+n8dfzG06%$QP*&U2Xm?eAnfzK-qB7EPhW%eqi*c7fCunL+WV zR!nS($4aMZsHp!K?QZvi+Z26T!DYE!n>H}GF%#wDr&NCCGMTrb8-%V^5O?e>)Rq}z zcxVi|-BG3a!zw^FOiUx(98g=g5tIwVv9UT0IDdTdg)!H7?1D3uc7-)p33OBIUVr=yZWD@5J1MPkBO&KtUre`+r}+#L&3 zv{~+^EuDGQLy_>>k+0lC^BIf7V5uUu+a{x>MiQ*h$pDY#;~?)5<2Ji1VeM&x(WT97 zM{$(bx^O;tyL045eGx^W?@5MY2L`S3MT@C+pmfBGF*(dYXZ#s#-5*TV`YT8c z=RQ}#0fq`C%yZh0QLpU5Qav1nE38QNXr}#3he7RgUzC;KBHb(Qz`3wUJa(7PGEhhp-`C^3{BG?V7}T#RKI+gF>y^YaO{Lj!nZI^)bLygw;ce!Wh&*4 zmx5orBgz!4pkV`o{k9BrHeQDAQFS1k`;OQ6v7Pc>}s0orYfQnQwZvjOp$XV55BoL~8jw(NA`V@+br4Vs=&yvj_d0k07)(lC0>f zWjnnsp!g*QxD`y3{}c;ib3Niv)kr#Juh=PIIP`G1*+=AC~gW=wB&-Y zkHd>)d$+uWmqC5lAndvO5w#;bz&vje8ko&Ot8w4JQV<4uipA{xH3Zy~d$8$YEk>=B zV*M{Urgkm>=S@$bcTqnzyAgvl(sFA%Dj@lu0IXk^`0(A@Tfa^+KLwPzrv zUP>pTV_IOi=@YClN&rFLC~Ug65iQ%Cq0;m<^Gz{+!SW}N+K^9c6CaSI(wFF!$UMeF zr$XPoAmAvBp(=jCV5gjfy%*Yv^T}SQyz7Y$7w4eYz+aRZo6G<7CxG^t++>7;aPG^4q04Vb80a@P_s?=6bQjEW`omm4& zNffoyeTpOB1c7kx3`zbZ5mx-X!hDCh65ETplyk6z=i=Uq)^(ZS(|#YMDyqEtr^A`n z_74Oy&#LP(J&3AIqTG}KaDCg4t*>qo{_XA1rz`>6&F4|+axvk5%wP=ZADFq|57htq z2HGu8K)5gkIKmouhG(`$C$|2KL&z{WJ={v6g=xD#!**jxLxW59bMNd(>*oh}J`ihN- zU0{*^7+tD^K{nzHR*!Z^uiaH>yz2+y^kKdkFir{|k!PJz0AB z2lTJbN1yv`s2dmrUN^te@^Lwg6WK($+dFwp_f(iai*>z7?xL1XKAOc>fqaiE+U|Bn zqoCi6!Bs+c6~BSrg$GdY(>J6(r_hagk_O^`qTSChAcsEz-*+Mwy@)~g8Sg-4ND$Ue zc!-?zr}@SaZ{c%RA9(361KT&ZXv5eLP&{J_N{gehZ`VigX#0un^HZ_&5c9U%od(ye zT8MJhMd5)9kayud$@8CvC8Ktt!I51sZ+SA@-ix5Guo&Z(c0lc!*ECrBE^>J+mo)z( z@MkVSgVazEg{0He(JOFk{UwyiwxILJg-q+-LO4f?shitw6m@YR$wd*;JBm;`(g(cj zCt^8cE3#FSR)vlH0B88~5V=?n^gb*LQ*DPz1onLbPAJEdE$Vu)a zRbStMNO>dad7TA{StBv~>ns$$7|Tnt-+@AI4+=^WpeFDQ)-Kr$beI)85BkC4ravet z%3$b?t0;Y0Lv8QQhNkwJOsDS!`MZ~(HdO_jS1&{7uNk;vNhjnfFTmavwZwVeB&hRb z9lO|zVg9ecnd1jlDXmbxIt?n zM5fcRoq5k?k^-{&Vi441`=Bg;GV#142WfU8xSvlz!+E#Z^Sqj_(`{xR>wfCm84k8S zPq4q}2uN!(nMOQ}@Y6FTVXHTx@OLy={u72_vmEJtJ}Kpih8`z1XUiaOZxZEfyg>v5eIUOcN!;Jvfez&$6qPKdQf>}U zG>Ed>6$ojfngcrds= zmUw$#MuXC!wBlkuRDV{6K;8fF)Qr0r(YOQ}-cwYbnvTNNi%GS~ThJVKoz#}L5R2`b zpf_tcx@i|veT6%?!mbVTl8eAUxEtpUDM9jKB1m&y@wllCX!VxO*W=rw zae&t5OVBN>LjQ&{;OhDZ40?tW&(c72FgQ))?5iQ_#$fc*vcZWNEy(Zbpn-QSu!3cP zJ#|((7NG#eP`vJ;Eax5s#1lLk+&`StJVV^rsOMf47Y>pA`idx>h zH|=PVvl8;MKZ9tuBDLM;gN|3$qv7-*tlriF#uHu8ruH888!&HE!VO4u-2>$-c7mhx zcu+9Af}ZNkC!G|E9w%O-QtSnqnq!K>6jj29-#m_eL%#9)Q|Q3y;K?sWfxeX@7KhY; zQsEqMTX~dlE1keNkB6RRF;KZNfVlRmv3abMIK~b`*@1DWXt)lA8%Ll%=QeQDx@gjP z#)IE4$Ios)Sa2ks?Xzr%-D9}JD_e`9Z{8p^_^Qza?uhHU!`{Dh|x z>DXI5w{4TismH!(P&eU!9G!hw%<0?4XGxL}LXynV(t1ron)}K~lEj9jC5$9_-C06N zW-CdOBuSDaB$bh*n)}K~NF^glsEmXp8L1@2bN!ybJC5c!X1?G1<-E@GbFx)c7f^Vu zi3>`4ldlS~-PIg7tJ6V{GD4)<{~4U$-eVHCR$j5-5SmA>hLYQZps=J4ynma3=JF_* zW|)iPr|p1tS21YLtfIc_GOYRb1QJ#rQYicGLCJI9sU{DQ4kuIpL3}^BgQrLJs9F%##DY9SM zgD)}9yl3|(e+l^l!VmGV8Zhl##duvQ=GNdR!k2-7YcBM!?2x zK!CmO=o%sKI%Bnauu8K-PY1k7c9{5~jn@~lb%Lry(J&<}c?unEg z<*c!;4|?B=L6@@}^_G}o!jpC66RpGY-6ueE=pafjN||$;J;qLaj(ZMY0cG4;aP==k zWz0BH*02uZsjX&%5B-ONHuGG!p(nWKi~`yZH(?|3K*LSxy$_+e5cx8Nt{PF1Ev?Hx znz6A|2clbFV7wp>qtfm|$uWHpuG3|1Z@zM~Of}kCsKE2;d`P_U7Nk=gQImWRgBvTs zFyR&#Up%Cc&$M9j@Xy?(VL7<{SInGp{lM~T6-s+HuxX>dd&Snd)VI6c1T%?6?4|t}*=fpfG*83$g)cyO@B~Q4eHSI%jYr|iO_(}v zIhgD`j{y_Q(8qoO7ho@Zis?OO;VCa$o|pE6wc&|oL|ZeGsD z@x^wa=f$9NC(ZirRzYsRC~$Fp1?v8`%<8)V>Qv3fq^x8RXPjkYQ%;bs_zjHLuSdRG z2IAID&^Yxw^gY`N(skB+&tdY1+a2chf+y&3epqo zYTJYIjX9w4(g*noH!d!;L$ka&@V{zOccBx8 z#<5Ntr~iAD~|o4bpz+S?7m=U_l&k6_BR(@u@;neiRn}br*B{#)09f1Pn47 ziAu=|kXw;%duuyL1|4QC3wwgzsn>Kz)}VjRC2&d#gGPtf5V7|^7S8Lj@j(s@w)sJ4 zcqOXKd`OQy%E~EEX>C@IaqVx>q2VYOTqJF*&w7#AahE8!UmOVQsBg|=5CqMAjoyFu zhJslG&^L%dvU3zqI32DiTzZ9zpAF)El^viJL;Q114s;&6jjM3z8+3sQ+RPTEEDlb?_iB?C^j}pGWA(yFj)!z*;A3 z9G%lA@S42O;JfEK80r+VcdIJF(di3i+&f4|t;{yY5(wQG4@&2!3j3S6bPwEzLEIRu z=Io`0AG~m56J&?=0l#NmC^%rtEMM1v)&KOn;g)D=Hy*@AI`G0q2HF=c zV43wtsA(f!>zz>A6YW{dfT_^-s2*cRb;Epp$}2V%f_e%A$$b@9K5@aa(Wk)lDuQp_ zV^(QJ{UvvQfb-T<*um8pFz*_9@cgOA@HjdQeaCFJrb5T#&md?l=DN-q;2f{Zs^0bj zmE%{|HoOSrPK6*aImp^i`ogpwm!N>omEmEhP@{hzRC%;7Ob>-Bqa_d@b02+6R)FBw zWKmewOc?#~IZ7uOK!mX#Yq|IcT^`3{YQlKjX&wMEKK(%Nw-~7HLA^&C5+SOQ&b6Xd zAom{#S_=`!K>_NLk52Y)EoH6#U@l!oSUD}9o8M~&*|qDrs`pP= zs(mZ!(Y!u>|8>x?{-6x2;ex2wxYg= zz+h|yNNA2BG%ZkMZlgQ!e0uLpUsATM29mE75rdpT!jXaC+}8mccn%a$t}f{DOv=$D zi=?Xk=yvWTPwev@Wz@Ij(S9EFS6%_rKl`J>fkup5coiJ{iHBQhg61`mEb2xlMw4ee za(+I#Hm9M}$4CgWABMhDWf-MfhC0I@apk631)q_G3&USP$=yL{`Yj#}-X`J5R=RhN zk%L{I<=`+moh8){pswVhl*!ryg38A^HZsa6SdGB!vvl5CaTRrK$ba|wET3kd4Iakn z)EzpRGOR+7=?;dX8P`CZZNfE2W?@2SiK5c#40c;je#ai(AmGi+bmaoHzn6+3eadls zz&x<7Rbk?Sb}*L|qk2sYm;OzxhN54j-GAVo1-qc5JDtgIIzzV&;aC!7imTMs;Ct>M z)~-J;px=KYYB9nA;h zwuKlmy9H%e8bsop?TX}-MQGYQ06NFGqVdBwC|us6P@O#mec2f(dtZ&wzugCeGGb#~ zxyS=JW>RfJ1aO6V6@6BV_Ts;8rdCEKEaRf6De;^@gOk_fWQOgF-rz{JIH&7{1^M zHm;oqvfeUs)L!G7ZjX7m(KoDfScFyiSJ0TA-O;*-NrUbM-akyhA@C?SKh?tIe-LMV zzzUGfI4H8-8Hd|8wSe%&J`jxhkSmuy!_5J@sQI^@`&i8c^*Sjpk?Epu4DpE#)cwTEqTNMwuDi*0-fe=8VV@`?@Dv@f3s>#@ zh{F7MzI8|zJC-qbWR32d1C4f z3NU$bI+o2(2k}22MM^&xk#qTOEM+;M`vR>I=8mnfKMFDe;#7S#KfLi3xa;5wT_IZ#GoX*IKt zibhrVPj1pb08Q77fcm!)m^yJ8c1{=tvRP`8`R@o?Ve?<3-Goog>KHp zpw2o^edWHGFhrL{QIDd_`_JgxRd_Aos}%lLTxAob4&8L-HyR+Ubmn2OQG|h3&<@OqHz91=CoW!dkII2jysr8S46!{yP&1H z7dY6_{%1drF%v=pI83WKfxPkIKY6uyn;-+Xq_zYbp|Gj2Z2OJJ&w~TKe~Sc z=(+5tnZ_aTecZtkPLz{2bCm^6ssO!10x;7F#LTa?kn!;V+WJ#p=f0~bkv1vnDZ7)( zZ(_>&Zx~e^0n*j$`OhmMsQz;S*l({z_uqslh|>Zh2txUW49H%urhMm3Mfs^7KG>vHj)izqM3xtZ0pM$}uA}M?Q*gIDqT7)39wy0gAUj;rXf~5cJ_A3bs|{ zgzX9gf$yjpb_;@0>EBxT+YN0*W{Ae`}wjelo^UivK{-WVtHHkg32 z%lkk)*pl>gQ+W3eb-i4x!Vn>KeOc@RyDR!=Ix~~(cREvs>uAW zG2B4vh&$iEBafviTDdNV#ly(!|C*kApIcCL<}fTg(E;Ht;aGEAg?iN6pf_wbM5!pZ zNS+GU?H^#L;~^{vO#}V? znHds27wE7<&`qfVx85dTx9~bdH=F^tg%R9*lqZA>zI6_* zWtXuadKP-VdkF>;#$wIhJ5b?G-1-)G$o^P^?ib9^Omh!6YEHt)vJ*Id80C6fL?Vfn zn5%6U@b;-MP!<%0nUX%J|L$LI-s>KkSHEVBKPSLUj2gU?{AgQ1J?6A%?Alj2`dS7w9}A~7+zKOH zJXu2#Y0zz>Mc#Y2;I;z?G4}HzT36*Pd+UC5@aqmEoA035jC>b8^qFAz0#W+N6Qr%J z0-*_Ur$i^X+0q8)UC|xH%eOKwYnpl255thh1t{tNf+?fdL1=CQCpWQ*)>^ed16>w&!)533em!gk662!6PW(rJD^GU+@N zu8fC-jc=Kyc{xZDZ=zg10^3#dh}HcQz3h`9qGT(V?u^8C(PA8(5{VJ_4}n(RK@^Yh zQAqy%sjyWagosBES@PW;*lFktI$!ScdsZ)Cr;<1VeQqjBCP#q*&F5_o(i(lc8`J4_ z0-b)XMajGru=bC}s*gUXGrfr?yC`|p7B|RtD`UP=Csbd3$Asa;D7!TU_?lXDJ*9z) zV(QHOyq=Xr=0f2NH7}35fb9=^K&;y}tXz@}E{o_rOMc3iU#E|PWz2C&DrC*cNBbpo*6 zPb@!H3>m&B(D0!j3aoE%;RU5aV>g-$!&A|+g4Jg^-#Z*J&Aou;9H9Rkc z?7nG~Q?Eqt?#ocE`hktdn?daLf$2SdOn2To(O}CP&^U21x>28_;L&oZy-)efX-kMn z(@L|f@#y|z3JM5gkhL)de8jnC2Vbq(75 zQwjPVA3@?P7ez);Uh=0B3k;MPVloywue<@@8&%+YliILNN$ZHoW22v)z_8yoL*nLd z;1n|hl{*H(D#r@S;A|4*M=7DO=fCLj@;DY|^uT&MAJFRDK;4kEe=k@GTBmoS>40Zo z>_sfEG(+%-nFs|x)?nrQn`k!U12f-P&K#t>AT_=Ylg!h>Z{-6VIqE%V{@Kne-z^2h z=S3(D5`o|3&#*i(7OXc{fY*jMApLB^{XV9F?gh%5crxPV1Y*RaHs*559i2P`_VK)Q8y5VM;53o{qbfR*uayq<(?5Y~XFNA;4F#X7bttIMux?B>MUB}29y{zmkhR?7>5k-Q*!>md zOBpWQZ~-z#T}0t2IcxV(GMl{X`w7KQ5I?T*(LEW3rA$@)| z%6+RKJK+-Zq@MA@Aj)O3??u|q#Lz$%VdbI8lnrlQh&g(&oGIqbZ73eD=$c-1f;aE_kA z4ga#Cd(Ubhzd z?4_Bmks%t6aD>j3Tj+LZF;<3lr>@9aFl;-?Iw${*m9l|Qc&tCzj(7>B+uERP7tM4d zvp{+Hk|;_+zVfZQq-kXHqVwtKS3Cse(X&z8G!nN}my^aq`Qf+!f&TglAo&=raCb|` zIRB*}J`~7O20z5qKB=IrPG>A^J!Y3~#>@$7$j``zHn&^oY_g1%Ri}aHt4pY6jL-d%yFl_$XcL^=#mmw?;vFKH%yg7bKwc4T z6EJ(X8nd0I;l<9Q7~4u5tsRY+F!3`_SeApHOY*VruD7W1mh-~P)SGsXcnvZ!>R(yS zE#B3mz#(5Dy-Qr4&jQrD{TuOupTqe2KPjge4cL7MxTl^T2if$1SpbVC6|eoY0((pGReWD6F0r66&9tq9rl znTrQcRd}AOKwbWX7$SXWt=|hSV`Sj5xdIoirR+`91fHEPDbMyIm38Vo!Ky1t@LUrNeg!F*6?GPUBvs(Em-rxWU$Ms18?eJ9 z5*wR-2b;^;kU9Aq+V4$-_C0@r%s1IuJ}8j%&99j9`3gA4Jt6%u9MlO;AQ11G;ouqz z_7h)2fM+sDzZ0*#tCPrr-%b|mc{J) zj(aj66IX(meTgR(A)y_tU^ubzZNo8T;aSvtKfr3o(Oz&Qhq;aG;N2F5t8{h1xU6L0}ZG zDDz9eMh`jgm`)hqi(zwDD|kMC1rZqYum}ldj?Rxn_rvqBVBIq)HLF6cqEVEqox~!X#z1!L0~p`)6=p4n1x?#5w)1H! zy6KOFg+GH)Gkh)+TdYEvbvY=XkZ)aA4W&oZA?dFS(ut};6{v->70)1fQaX<-jX>G{ zXs}Vpq285)^wS)aEWIZ({%{LSErMyDGLwrPdUL097a(BO6BMtp5;^oxa^J{WVt-r` zxs`n9GSN2H;78fAtX*K#y9PTcqb(iQkH_YcCOb6+lh#(F&9QP6zOQFm=NK{k=Aj^1 zB9e`}o7;XN9MIS*F8&$Fi2rCYrw&r|B2irZPEoP+ z3o1L7u!0jg5Vv+a%3pV&@ks?ZP)@gg+$4xwF_+kbEf`&$52>Rk(H-C}v33#_9+zpa zqq%5YaXd(NaS&&%RY+@Ru>{+dOsiL4Fl(KPt_N$dWmgfZRF+^E-_9zZq@u&mTGZDu zMBS|y(5RbFQUnJ%4GhQ5AGem zFzSH|T7RlW?Y4Fhtoe>^t*>c*n@Dr-mE8T;1qd2K`qs*0B2(pj^wRl?<|k`d#XscN zSUs5umqmkW=tSlwjw0spK!tjfF{ncur_Y0OEdq*>d zTO#GUaV&h+OVE6M%l&+BqQ-s}7Y8hdiRVkuQ1}dd|C>tl*PrM_KFc!40I;uriCMBj z+*~b#2+1klBB((N(Q!}(H-n_jm+N1i#*=1u^zwk9T-v5mFkwYP5l?P(t!^F^l4Odq5q^27k!apV7tC*%9czlKLS>k!M%3j0bp=#;EOT zEt~i^I#s>~<=#}*b~p_*y^45+c8h`46 zGOrmBLpcGry(5XaHI4W7ughmlr*g0f6IJC}%H|6?a)zj8zK_7TiO)r=+4=aCikhe*oQ3L^F^vQ=l7VMy9AX2 zL#FLj1*W@`p?yXzH?pY$o1UQ{U7m~eGy`rpRf^85gISv`o%^0k9gWGmhL%Dr_H2=#96RS^RhFu5h`7K4s6i=`b zoxt{nqu>yxPkyHN%<>1#nTK5k!7wR!)MaDLobRZ4)(thcN%IToi`n&*8{1CLX+|%M zK5!G7uRee}Cn=bh-)4gctI#?!84CU5zCKjf|S z4ucS#OAtKt2nzoCrm$M(i95GNps?kadeZ3cDpE?z~t12;t*nI`|2!?qJW7zRR%;pm@KjwN4uVQ5t%nD#k_;$BBt+4Eu$KfS;- zR|l}z%xd(TFac7xgh2b^k36|lftsIca2s`!$@=+_@6j{Nu$3&(Ueb^;d>G^qyOeo;sPr#9b*ko ztFb&{BnbYO;mXp-4$WQAo3#N#{MJHq(of9R?uQPyMnd=*6@(PJ;Y67NIug%=0AIJO)NhHn8A{akeY^Z_jQz6aU2TI=ZKB$TA=gZ!Q}3*TM@5{ot5pO0o`QKLIcT-Cj!UQbg4>+{ zCOA?q@~KY;Uj7&*7KfPd-Xmg&UBHsc3p8I%1M&MdbT8V8h4NU^dLKi)(-}~HYUQ={ zAL;)_pm%>u)G?q8@803u^8Oe|bxbCWtO(j?%em3X$LKvWls7Eug!p;I=+LqmEV90% z#f^GUJ5EKngc+E9WfRn%&cl4|w_p%?2I{v>L8rohQMIZXiw?=DE9oYB*#$%Lnfd6f z8w_q^w&9*)CHOAYB~HLUFnCcTD#wJN`OU{nHarmYf4Al1({{q12Xr2Ik;&9sym->3 zebBMuEF?_sg|P>3W8sciFw2;Mon7I?2s#X9<+mtLkPOCso0AMI^gUF~Ph$RAZRDvQ$}~gLxN+1xoMs&ZVFL;%QJ;5cmJUO;{uR*Q;Q+ctrMS7O6-#S~2l3hwBs!lI zdDaI(xvm&}hd-y>Y!I(=h=dsIfw)J%z|F2FRyvG;vgl&WH8}+a@sSXArWXiWtGU^G zI)~J+r~GguH#lSjGVAx;dDCC4+sPvkldMFmdu=?ZXDTt6w}2*lEf*e^a`D|vu9;NF z1gFO->MF8v&;CZJ`VoO__hTzo4m^6qSzVl5P3PRJYtu`KjU z#74K}z{Fv4w7*Q6{#eqfrll~!>{~SVe$S;D`Dh-d%>(|3!+;e}AZqp>#7j89EmoX_ zgd7uewhv+vi>;aEczd)RUI(&bQ?BoG7b1>1a~;tW5DWX{>#L)YFpb%tv zLliN0UV+JB@_=nggRnHh_c=Qktd_kbZtMaqS-{X_Q89QI&jr!O1wcYDpnFE%UeC!Hp(`w?*jdPDL!ngcf<0h#&*{^Sbclni1m zf9=Ie^G)#L>Q``Dd6qQrH%xEsT2%AT=yP=zI#IUEFQEWb&o`szy}mGhs{>{YQbDGX z1328e!4e*I=O#a|pyy3zjG2Fmi%*_KgJF|Uvhk%T#&inixg?_C%v8ucMH$KE`$X>j zi&2)Yr-;y(bN#c^ssAJf{rk~bzgjZXh|ceG;?w5mufUAj{(8YL{|Boa)IY0kd<@+V-CD$*%O|i+~*;0e)S(% zZZJlZyNx)pBoariXv6l995mRz5v`}*fj#5XDN8v))N$hu>Mn}{{mWOmZ3N{Hqx+)x zh%WP<6wA$T^&=jintS&NWy(Hl6vF@ARepO48T~nZC-cFgg#2dfTDXT*7!=wr0bQ?L z5H8nnfqI;kR?ps8X&J?Q>**YnKLG1pr$dQZD8%%YF(v8Q>ebp1UU40w`VWEz{Xh&# zXPDXdG^*!5Qn=^Vf@weURvW!z{=Gk7`E1HDpXtt}AE^g;9_cXCR)aKp1(&T*DYE;$ zpiH|4Jw!PWytW>^`)klqbPNi=-ehKB=HT$`1?Wh1KqkzzF0;tNcCC?MvF{@N&K;uB zMdvZ=GV#qvo@7$90U%5Ln43F}7(^p#FkeagzgrnN9l3x?y+^!Yn-|uG213M!ZoGN# zSzK6>0kXpH*w*&sK^|!vIu~yQ-7M z03EN|K>ynWknIe&)>6cPWk4WCbx9#%+`lZ;OJ}{rHHlS{=7mXkI3WEk~qgBZa*vT)TdHN1+QLjMRi;D{V zr?ud1^PV-D^u`EWhF0G#(c1R{CZ>>}D4L38$+0cxNTbXKY1_ zt%z#faM9-GVyt^J2;V&{g3^LyYz#IAoq5DVof(MluAM@S`4tp;f8dgoKBVh5flpF6 zhDkNhcI6tni zWvrVT4D;i-rNv!TR(}zhOZPK&IR^B{_ClLYu_&IGjum-dp}Zm%8aGG7Jz+HRZWCZ} z&=D-?MShW!&&jvUMac_CqTSGSl-~~koB4%kKEw@VbAnj>KS#iMmz=wtw8f;Gmw^qS zOwP33OzZ6y6u$kgn6}~zS`tgkt?(X}wS?eegLHi1copNcPNFJ^SgwoQQ92o*Gk7xH zHT^|arZ&XQxDGM*hNJgSZIl$J5o2~GI!~fK`omYg@Hb*C#{{5%l?o>=D?!N}do&DI zv4HMzxUsqpnO+7gH2#S4?Gjc$`T}>*x&VbcegpAJYBd!Hf_wE7>IE+$E|Hv4Gv8RJD{rqJJepG^f=NiyR(}l{Jh|+nb%Z^^!0jx?FO#UW^)JKcHM`7_Z>J5A7aDgJXCF{ zLb3IJrjyakq`k>YvP_CG*@fux<|Q%eNq@T;C^9J21DDq}koSNxiLSS}uyQ%>G|od` z+yYLABeA^P3Bs&Tq0PuE7^gD}l|^V1?h91fE0cC2XJEofGLfX#F3!R*6RFg_a%<737_ zHO8iDyKr_ zjzpgP`2qLqeHu!wZV@9!X3{sLSa#)8U!CNmpzh{>N%1II_i zaQOHKNPlXB?6EG(?kR%kp@`+74{(n;b;O(9q5bH*BIJt$q&jgVkQIoZG;(EHAX@%t zhrXVz)Qffpjkb~>rgsVIGz_6R%`ccp8i8HKeDoYj{Ux_IS=(<&0#C!$AW)5*VXj}p zLw3wUyKDX!a^)ZJ-Sr7PCg*|R`Zk3u`F^h5+y)eH-J)m?J`XySzqNdCgbk_~u)jt< zZi})(u=xE9^@%dpxPJxhMNVK+5Cf(Qg_uxmgr;?0NvkR6<7ceI^uRQfO*P{$`pBSO zJ`(GX#xwVM3&8U7FmPVGjw$0GveGYQuxh^=b$-gwp}L+)UHf6@Gh(((LE@zMAa4AB z++5n9$sVPG>DiMgc8vq=ez#Ga%M|V7+qthk&6{>+pv%3xXm#ZncS-(*Rm~G%<4Nj* zO8tahWd|^?Ly3cXd;@pYLeTgdps!Xb&#Gua33+BE%0F0xO)&bMaR&JT>b|@5kuS{8 zqzu|CNS+f(GloMvvik{)yHf@A#G*8xx}S1yZCvnhx1yvo9=xaAr5TR_8@VJ0z3)80 z>;&p@b3TA>)nZ~@odcPUm^Y@L!sJdv%*#1|LYqv*=0D9~>%wq|SQEtSpJ>Q`?uCZG zg|VPUVvFsa42|&>*gl+QI@xnSABT!UsEOn7}!w5o)%@(1;>-RTAoNNu7_nIo*)dIVZ##p0^1 z?`bVZ&KBN+h>&Bb6aE^;4^x6c$3&0_3%Smt&k*!F5Q@^%K)yqap4~Q3kHb8O+r1x! zXS<-z@gKAqK@5S6K`?mzX^0tUgu+cB;5Kv>7zQgqclJfFeC&$qi=VL5^aq5JkGddz z3H99W;So;~LBG#9mgM>$c0L|~=BJ~%&ZdJrVfIuQe5Vp6d6Z9z8cwX@0bDa?2p<~~ zNExfKpb@n3Z5I%$zKNo&N2Z zG~yFZY`6z4`Ug?dK9VsDE7WN(Ctmg@kfj-m#D%S*!k&FGW%z%X zd7ueR&X$1e=LOt&v4Hwd_w$y#Ghmc+8U?fca>ascytA+bWd+)#@mzt%qtw$C@)Car zUk48xIb`k+fZF!!5V7DeTQw+!dR{7^#Ig zVSqMb)BXgk+uI5|163$>dB?Sbeq#GAJ>0xf1m&}Dp}JJf2k$zKvX?JV@Rj(!<|Y{C zI1B>z-UrP!n(3tthqi6epcJ14<@g2Y{CG5Ue%yc#2V#JXYiKGw3LXOvqsML~)^lRN zglVDV;RE!3ahp5L-hrOx_c8L=Ep&Q#4Ais3m{iZ1`|Pj8jp6NBrQQj~v#U^MRjOz% zj3y3C9k}Eg>wDiLFYRv2bq+lI15ro7PlGOF~?gQpX*dcIKKkaiK1 z^(a@U{&j}r_j%Yp$QWzCcY*JzBJwn;n6p%a@~ywKjrYUATX!dR439$ZUFSd_W^$SF zt=uRrKM*%B5-qg*1%^)5p!VtqbrZ#?>F2_%^0ZMH^EWp3r)PUbuHxO5M9el=h=t*# zJM6!~GoBHLxX(ds`7#KSmcFHPZq3$|7LZtUwTc8_kpN#e?*!Jy?&r zgc;c~SQt%u&%`4rZg|5AuZ=

Bo5V#ykDo1N8OiTUs)0e^(`=VWEhmBC&Q|^3d&b}VD&xgdCbM7 zp!X#i`c@vtDn0;>0~Vq1i7R_Cstj~Yjx+hjZ7jO!2e@xZ2ZtsBy8TC=quR?1Pf%_% zVF}2GJYw0$_L1i#0!&-H!RPIJjJRqHnsGF%e^QVu>>tL(*Pe(Z?GLH<@Br6IsX}$O zm-yJYeQS@;kj}m8%DW``ojzxbQQbJN!VPu)&7XuVC2~io!qc(7k&xWEPoW z{evy&yVwS@E25bo`t}U-1K!;B-UHA{F@=QAa+zT0IB@^<6HDKg!R7)#6er$M$X2H)LOmKmCvP5hBi_5T(+J}gU7)Y00|^I& ze3ijn6i%c4ID)*rhF7?3-yl}7(*zy*C$aFKFFuER;}j0<{-A$t1G8LSNwbrK-0<6YFz)pP3rho- zn{hVSF3$!V{qJZH@;4+jK2@Z;xMH0t<(L;tWA59q%uW8_E}j|CZFw3-BrRnVj}*h=iZ%$&dVzbiOQ3Fs1)B7&0)FBa z2wy+NiRV9~@N1qzQyEO2xCmYl@eIQj4ki}!@7+#cAO1v7Pc;bop~! zTul4Jl>xNJet`k9b0`b2Q6$%yfU`*r>l^wGA~vhJUfM76ypm^aej0dHd?&a3cdSf0 z%IjKIQC2e-q-iN^tNlw*^_;?5uKbT1_7tL1eqzWhPcYUYeOj`Y`Tudt zP<9UcQs1bN|0U4h>ct1Blwg`2OP~3ZEw+CFR@Z`|pmPvzBv0^2D;dZ>+KMEvyEEy` zS6qMbET+_-#e(oKv8?l8@Rqlz!*&6y_=Z-u|CjqtiHw5}&^-JAnol-Dtt=aqwM}5j<;LhLzlu6y^5k^e%ab;~ z2h~M6WDE_(*j*PO=KLT`nqvYl0?vTT=95?l4~Y9xMcU71m}qtz%t>$2uRh7D%85I* z`v^(>Pgwb#TzexQe-JIbsvqY9sf{=-B`>@$pYur^IY=#Q&Ikguax22#mk#Nfj&FQ zvV-Pe5nFHw2yUq$1^sTgYA3>i1u(Y%fF`W`({XR8Rjd;4il_61>KQ5?kdd&ARP$y@X1 zK=A8-ggT-tMPX0(pq0se=6Wp(#eV&Gx^EzeKX!@qtUsV$K6QO~^nj4M0_OYWcZ_qM z2X%Xl(Wm?V$_w^j#Pq+J(A$_eIa^UMb_Ms#sRQE`BT!O#Qz3P< z!1}2}P%E$(cn&N_)4tbH8q>%e=8!)%;SFz>1z_WZSgatvyKsJ$NHE<2Eo;bQ_+cG* ze;ZA+m{5`A@eg2A2I4B>GPsey-EGQq)-GFvrE@DmWs$>6@*7a_)0pPN#;|y8Hd-5f zg294XaCsL9iKWE(rFCubu_q8Y{1ViKu7{@dbJW30-{0pm;B=bU6;5?frbqw_y8|>M zJc9`%E#T*YdRRlgW&|OHG^AMdSZYVtI&Yee3;#zk1 zP}f?9K_?#L&)Uo~~KIgiw-!)CdZr-6TvzuI4N1mub`NX?edPQB@!Nat~KHjtg z1+hoD_m~$nw;#g+cg|twe-|O6R1VtAcu<=i#mcmQL3(E-vv)Xw;iolN_@;z;>XM&k zT{eqZ@EJT5^O$`X>6vbk5dF3{gxb(P$%_7rEiqtq(h`D==dhOidRD&RHfh%NTot9^ zxvP$HqU_cs+Pkn_S2E<@_bP^62P2PL6Y$lvpzzp6U;T|s5#Lrs> z30bGWej{bTjjdtZj63AJi(rOlh!gj{oO)>g<=T5|AZ3FQ*!aK19EWRY7B>?tXHmAw zBN45}*&{zl>^!=USBAX@@8g?s`mh`+r8j(dev$*0paR6&E;P_T&6HVdP<^9X6fQrG33a4jKDT3S9bZ%vi2A7sA+$uhamFx7y zkY6}e;9MyjZnMQ2OVrKfUy5L^tf(Gy+ePaW`rLocT7}>r>q5`b2g^< zo8fr!-#d)`Lb|9u)IEwuQ%CCL+W!EBH9f(_q@J7hj0Wk+VW3=ID_XFcem{rf9KHKW zKszR%CEWQ0@)IMt5hLHQ_gs|t*Ry~ncX;NaGpx(P6p~7ga*Gj(r15r8hx=Ku8l1zz zS3LpAr7Ku|v5pH9#eMU)Q4G}{gsL>}j1 zrF3C@0pfi>DU{cPMV9+s&^PZnT547Q-wB|8xE=(>X+?&azko&NC(P*c67w&RX2Wx_ z+3giES-r6E;VY(-rw5T z!?H!BK@FtdQKvf!v3VEoG&zAS1=Fx({x@`weu4HMD!}D=5hf|GgTtBtlxG08x8FeH z<~N{E9SZRaib0R=>c-!KFt_}05nZZH_bpXOlw_Mg#~p6lk9u^4sr0S2@bfJ{QbYud#e;xSx2M3&l&ms4ZDlnk=+{uNWk+?BXGmpIR+m?b` z^ySge zZgmmq7)I5G;jF^vF&Mrc1J(npQ2WP69_>E_GG3g3#$e)z2T^zRnjE?lC@}4`3{yt$ z!LVJYC>ynJs!Qn$7W=I?Cb;K7SFHsac=zBPeG4$WM;>(-FSyAM!|`Hxb*!p z6rY+x8ETE9W57`m5BLlkVJ$bAGZiY%RANZ$9Q5f$jFy_Itmkeu4wyoH#T9xeU0%os z6w!PXd<-PMF$%X}+NIXrL3iOHsGoco!rk-G;QdPQE)b&Ky;$_SGm7pmRV*Y&0_Oh_ zBd6jz+V4ul!KX4ne+}(FbFj!J^#FL3#zJZnd7<|BL++{d%x{J}Iu;k9GI=EU_dE_^ zqobiTZzp^(`i{XpIurU%eo|sL&Gp(5Y;RDEm_VbYBIk?z=Kl1ag z7(BWLEl1s_Zo`iXp&n&DHywlQ;ME`}o!RB;?iq zjfuC`v7&?s9K5Lkq_TA)^O-xLTJ4% zb>szO zwh!}*m12I)7ts7O3$@cmV@TOc&<=&vZznVlX$KFc zK;?$liqJpk3^pr+7WOM9AHR&&za9h^mwAxeb7`*!>`c!WM{X({4?qKa{r&u zE|L6Rx1zw~$sO3Xx)v5BtYZ(hP2CLPO?18ue1MX(G*7%5UR1t*CaPRRp|tiqsFyDThmtyM zvD(LjPZ&Z=)<$%h*T$@FbYg>VCFbfZ=Yzwqg3jGz$xy1jb)Jat9NdQA$hDZ%QG!73E;d+7#|pUkODq8IVh7 zw?pS4t~@GMIE7b3cV zvULTvR;$6|)I65e9S`Aeu7K?9L1s6BJkP>&*w9R8@_iksPQM1uV|Rn2+gS|teMA|M z-4Nz7pV-YOi*zy;g0gcumwuivimkN5vY(SMXDEmA9u3s(5`f7Ii0iewQ6YR=#g%hD zC=wE5A-x~GWtlSkN5Li1nntcyr1IoWr zHs2pBztfC=eJ&VxeCJlvWz0r+5c3u_fWSM^p?>TbU_IBNo^>hY9XX6TYx+QMI`eIg zQCBqeBrxhDvaWs($%=MN-B|~@zMF|vWvmd6ci~;(S)jM?E_R=J1uk<;sD~>G9BRD4 zQ1mx;Obo+>NBcoY-F#*{=0bPpM-1KG2NukH36ZwBVEktum?bCjXzx|P-)#lii}_q4 z`ki^MzK5CiX{_}9R}Ai(&ON_t!C2=x_n5Z>B=sv0ciIJd&OHMu?lvIU;_4{e zmI5`A)EV{DZz#UjrKqbkfDs|&i@JN2M>`p#WaR}>QgIeqb%&y>4(+uxFM0DPPT#W= zG}D=ja??U6Hz+}iV-v~K)q{DQNny%6LG~yGb^Ct9o*NRW zhwvSAc~qfH6A#wq-B_Bk8LgiLfbFO};ERre{`4ql5-dim%AWfW$!A~OC zYTMmVzm#&T)=_Ap^XNn>+5_3Ep$sxtVw-lUar`Nn#u(oy%sP*$JS{kL8<> zyaJh?h6l^%VN+ByHpLfWTk$VoH)9ou*JrX^Icev)OITdaTWs@v4b}U;p*cT_b*7oD z_3#B~8%y8wl4q=C?Hue)zW7F(+tr`!c*^{r(7W&yN|wz6b>1p2o7xZ6!=7WH))&NU zGPvx}4@fu2L-A1u7JU2%OJ7=twdW~EcKAP(AJ{9B7WWeQo&SiDr3Z;E}nJ<#P|&ziVuSo-E4$^ZeyXFWL!`%SEM`eJ@-w0hv6%p zV{q_!up0H0^REdYIe7~+CtL)jaUal(IS;mzs&LkG4HW)NIi`yf1?F)%(tu3j|M z{w#$^)9s+T>IYo`F33i_$IQ4pEPoz(t*2M80G`hSrdF}gMMJ=qW`@vrJ3;k{y0<1C zWs+nYI{T-A_)iVANA7{hnR*~Sy4O)=u!U8x%z^SyHRQhN&6H6Onc<&9FnCit8vZ>W zldDST_d5wy!UsHfVHD5(_5;M?G0fykAbx#B9^6G?XyyJ5^DCY~)BX9xA@oJ-!*3xx z{}B3=>r@aM<6BrAWTs8b2%?dA>oA)Bp5WBed zpI0#S(gZM`vYqY*_nFZTEtKDiVPW51f${4FxcVz88Do9X<=`R+beaZC&QSGf35r#- zMJc;aqH;|(Q;l-xE#K@g$np^ecUQ2ypY~vIaXag&qr|3t#O%pS$a22mP z)C3x<|Cr=!vncsM5%eB^pE@(uU|{u-ODAcVx?Ra#b~X@8>mu40P_M1%Arv2bhr%S} z+85o_WwZf3RvGhT%VbPHu?AX->bSIZ4>&$O3*tY1aI>Z>JmK6Ku%f;I|ASSycwIAi zd0vJ{2u9tGyO4XOl`HSPQRGHH=d~_-F><{VdRo#Rc667*_aiZ%0)-;08se?35n}z+ z5g_Ta4=TKaGE<`-h%q4SDkgCdcV|&MD#_pa2$XNGq1(Yp zST8Vz5pGhjx^9Q+AswR9(<`AMh_s{BzcC?d5jYwbVUyw$+9cJWbN`j-aFq6ZF^fUG zoQsr0ZgCIm_q69MhV;QZQOA-vEVlwh#?zLd;AB4(NO#-2tm$Hf5q+WPLk!gUuLZ}< zL(nWCe*Ay0Aoq?C+MR1L`Q%ZEP8@>bCl5ryq6Sue?i!9K9bNb&5-N9(L;JzR1F|;* zk9P$i`1J!8VKWTAQ;)ebNAS$m9pHD@64R6Ou=>m~s3E3_`_X5hGieokARRhJk%ii& z8eVt$4r}Z@g&Ec4o!M%SDyw&3Ga(sD7dC;_suc1=68ohp0cE1=sNMXOy7%5e_8Kv1 z1{pLbcqr=XZn8jPfdu^ZGn)PP9+HnzPkBx{NS{CFezIK<8Ga8WIhCT|_zbLkN&R>! zew2|P!*qXDap|113UQeUYQo;oym}6de5#@MhDwNT%tW6>d#Rgm6_#g9IlGVy#TUL| z^Q7}w5?qh&e@daFy8<%PCxh{H>Z$Pf2no`Iz+bq);;*#VxkWok#U!-2GzhJ1n$Yh@ zFtqCJ0OQ-eAa*fzQ{Vav>sSSIc$Uoy=l8|(xsjkBJetZBV8dL~|k265#E#Yy)AE5EQT9mUhTsXaun@qAq<;oLS6j}-5r$n*GH#jueG&eGZR>lKuwO@RvYIF!F4@9d!kAo0j&iNSZV?Zs^j96c9gza3@b zLoLJ+-v}0u*OSNI9bG~%F^i8YiB~6vdi~Q-cfWzxo&6Q$uJ08A>)tS{$1f-gI)l|+ z91cmVwGeu1Ahh@;pjK}m)IW0o{q3$8xI7JwrH?_XJ?0qs&s%hF>IO5QozgjCSdI=l=~zSSUw}6hZ z-_a?e5lmw*p`WV=^VN^gQ}Yg@m)~GvQL!jpxg2uY?;u%P18qH0Ap6@@7;)x4w(lu| zsE%$Ff18gc3L_}I>4phumXI64z_jHHgzVtd3;u%{*zV=2H>pwM!MU5D`Fbc$0&Je6g z!jd-X4d|-`slT%#u<{Iw=N}Q}PA5N5ubs?r`gQ2~dY|}-Yp`5Z2eK&I7hg;v?b3&O zR%+4g(=#r-Q^X%96@kU{EOdTI{djvn^XRY3KyUbYG&@v*wl1_|v73i6IdTZLU4zPe z>frso1X^6uxoMB%P;ZO0JD$V6e`yB)tY++#wNXx^k(r;o4ra^lF;2U54wYkMaU{uU+IV909ifB*BRB#BuQ; zPhLnF8f=@-IuAWZ{h(#k$DW9BJEI|bRyNj+TLhUm`-7otI{KWOi(EAw%odh|&~_B{ zzBl5AUX@U+{SMl#!-(PV8#=ZpL(*5$S-)SVuC4PR*BQbE9!`!aq#b9$3-T}dqxJIs zq+@Sn9}*rDht^tA-<$S2?{+!%Tu7Q`#1fG*kamqf{(~kTJrF1dJIH?_58KaYu%ldr zwz@I&P5h2+=S|UR?^WvEU4xA?YC*iffSJuR=f>x)L4ITG!r7Ka1S@W?4t zf1~~%3H7dP0>Hv?0JPhEfC7mIgMzPt&#aH&S@JJxkM}|4-DRTo*vs@?+Js?Kj)8sR zRcKjijRNiLE3$WCU}{2s%=pdFdr2|0)n9?6zb8PGb1_KnuY+K{|FLu8FjD_5E;yF~ zs%^x+i75v&lY`95;x zcj5+t8ffmmo$XF=G z$l_#RPE$Z#G=yhTmObh~CXQHt0<3IbGQog%tWa+km^#wOc zl3Um~ITq97kD_*@f^C>u3hKhoEITy?r?0t%b^UjNjq(NMEJ~S_fU!oCOR@Qd8U@c< zM4^|aquaI7#Bonh*xc*FuelFU^Z6TUT}h)DU<3{}kHPG;74z8a3-KqYZ;3o-(q9Wf zrE?FJBg$D@`Bx~La~LBny=m7<9Mrj#lf3*CwLOE z_n>{iWk@-eLVY7ikY3{m#>1D>Zge~g>yb;|zPac!`ZGw@{RvK8C$TAA01lrDP~ZCw zwj=~WiQW%1d$@rWo;b&B!XWJ5YP!pIV(i$9ko#r_1Uqa5^TDLu zuiD8wzjb51YC1~irikpaMu79YOkB457N}bNxyABHs9a);xvTr(EJMo3PMw6pf5=;% z?*z&92eGhc77rFIhU8Az#jePU$GpYW>3%XF-O6G?^g_7OaPaAt3V}G@SK_^Se6w{8TSS-Dyf9_HM=46 z^>OME^A-tYc~^Y5T!8SzBT(~rKL`qI9F&#TP{@vf9?OV`!F-30 zXj7pD`RQ9|@O?FUc=cn-_5(~@J|7ck=U!5CA2o;ddE=1BVBqJ4%D)#A*D8{$Y`#(E z)s3rq5|bwA1$xav>XU6iS!*69*%?C3uCwS=d>`wk(_CTjJ7{DZ>F4uBVQn8ltb}2x zz5X16w{x~FW(PK($wIwLk?23Z9UOYsarW5`>hzQO2jjhQA+!A@#B7X&rUi>IS9t;KZ;&th@-5tO zNP!XC9-*JZPAHTVLiIN->Ym)eB}e{5lV%@iG05i*Uut+t67^=iCf`f(BiwxdHs!Ml zxU+INv0C?n|FPE)a+kW$#x_CZaS_;z&xgdhOT6XY5fcF+uzJMAY> zpBcsduir)W+eKWn#FN|hqur*14Osrqr{#4?)PAxKeDt%ytPjo3vIun9f0?O>H>iC5 zvqIu@l*!d&$TwGxZgES&U~32jSQeqimVW0Ulu_?)W+lDius*H@gky5)|NDXW?#{qv z-tA!a;uY&2^&AuVB^c1-0S5mR#M9a?VPWe^6nlJO=}oPuKlurmc0{37_6YDB_a_LJ zyx>U_c7QE$nq5BcVB*vsirgP>QDFRnH2(;!ziW)H`YMo%{=#xL4veYoPrmXcidPw- z$I^XFo?Hmg8MVCSniMs&8P|HW!H69X!D;a$s49L9Zf{m%<%JJ|P2+ca8e$;jjzlyTs_?>%=hKFH#}6p~i74)PEDAb;pn zsG58in+i9g!^j)xTe%-P{da?|;b=&@=ZO{t{XqVFHJG)v@(~kSu~gRv9F)hX+kLwz zjELl3qh3QPWwJdVQjgs=%J>BwVS_t>#KkwLd*C;475+jE6A7(Ih{ z-x7#*zD#}uHOP!gSY^{q^fs0 zY7O)(Rugmf3{wv7tEibP2bZZ!!1%B;N(`lnWoeIKme&civo!(zIfdZiE(F=BbF6bF z{cfV~)6Z`aWwLz1`As$mu4GbvPR9J?)T6OG6^s6$o_F_Cy!^9*sq9Bmzj-uE=S!g_ z?lz8pyA#vxsq^WJIXX-zMOiL+HR3h=aq>CLJ^c`@A{|jBGH0npx6$s1Et}gQu9h!SdCln{OhvWa)7fr?;?>v=GQN?*bQfJz4-mS^jbK^ZN=`9;dl{iU+g| zn+Jxoc7y-Yx0o0%2P?}G47~6JLYFK-<0%{>#AEeCri-#;BY#`Q}%fRrP zn&z81Anfgr7Q*46zIKyEZBKzRo3o@Xq(QX#4`?^JjFO*fd61O6fM!cksr|~$ykrWG z))kO(yA=!j6fm{t5mx?zW&b>=czWUdQyct*E0f1doUPG1R7=CtDSR zyh#I#?LR>zd4CeFU&iDh^1&~-2*u5vU`4F(>~S&FFEtMY)5}=f`BI2Y%L03T2+{}s zgmt}LK(fT2rIoir+~`bvZ1@!olMEoi;u2Q+7-4b8UNns&R+4@sH~fcVKzS;+A|Isc ziHMo~{+VkE`eDkYdo-U+#n_@nSiv4po{RX1h9%fkO7{!&$H)Pzz}M|Gc{9FH!Xv9#SxIt+OLr3s6+-Mt9h9P<%7IlVdU(w5Zrq` zQ;uGbI{9Nj?MUp`VLh0nCW%{g%}3jnjbQlM5o(g{A@}<@mOC&D#6>kAeLx&l-5sEj zKV-I>&SLbu8L(_Z8&b;u< z4W67Afr9f7c+`+IEYC?`#_uVo7cxyD$nt?CeIPxo6t-!u5tr_ZLg37%W)9-KEV~{Z zhFN0$SJDDwwu8Zb5hQ8lOf~u(DrFydvgiV6Y^-=>=@$stlg0!GdpZV<{7!cWrVwW2 zbHR=^qWTkC!MdmjicRUh?skZK9NWRok7a;_mMPZ{EO12p@ z^SehO|L#pl(!0Vc9Wd5LCqlp?rW0UhpJQ2fVqh4rH=5If2h<&sBGzNm-`Mn1&4EAzo{ z6!C>Wrn6AJ#ZZ`en7Q1pL6={~qW8rd-f?9(hF+z+`>&*1eOS$OW(mMNJ{c2!F`>K6 zKiu!_CX{b7U|(gOAaQxZLbs_{z@OCbH2Aeh>Ar+5SlJ0JCs#vdziz18@{&t~q7~B8 zX$rFyQP6PiJlGiQ!Q3Gwm`|B$Y4`?_@_rvKKXnT2PLBcGeQH={u@}P{NTXl&n3ckI zFzca0f17goBtSg9X{#yrHJ5W!YP&dVkLXX*CZUAMw*L_Bp zxo=TCmxiQ)V?kGUAM?H}WiH>YqFpfMnfA?qmZ13@9sd);n^USpX74a!6@0k ziC6V#LfeV&q3x%;kl*(OdVVq=~SwF=btEVwCohFezZK>iszZ#)dKZqHJj^~-t89No!$eXn40 zsunWS5}EX`#R_HA6SV4mk5`9BKx%p%N|w(A^~^l*=sT5Zf4>Z+dj?{6@^{GHXa}3a za==-Sya?-Dplb0I6e!tL7lXy%Fk>8c3Lim<3kP}h9OCl|c){m8=yc^gm`&7*)NgK} zcW)U>*ftqWwo;zh?K7CY+QOs+k~BiIF0|iw3p`39c-hmB7&2%Og!S@6+2`9J8U8DNy+%95>*+AJ$#P+2R?KA@s@<|E4dwPV1EMr8DS2j4l1 z#@YvL{OcC%NF?s+>_}|dWJr406!2W*2d-9H4E*gBcsDFTi+`-psQ(pQJ^n71_Mo11 z2b$|1J2C^`2-=7IU~anKAUt#@O0z>mI@_J0?ByOXez}JUo_C0fwT&2AMj5=}?ILNY z7zI=IF=fhqlyCOp3CFC#@sA|T-il~^>K)koW}@DbD`4s>CvFvKudm;UqSN=Hn>G>z z2aHJx!FIF*mHmLf|GAB!>t~*9p9PZp1+9qz(E<&L*cP$ENV|~ zfG*Kg44D6b12Ghfdf9`x1LVr(WK|LR0 z!=)csb@?1>?zwS6z;>k0bj%zf!#1}kC>UtTtSpmxeCSgUAKbxY6DUJDbUj-3J_&XI z7(%f794zR3M*1%4_U)I^q0x&vj8B8gP{EWjQ_$U)&VwB)8ZUwIAW)Uv_bq0Z+IraWDu& zH*){7bo|gp8S?IPSaBu-YBOt4x9L4LA1i^j5i2pEsssn~m)Nz7dVGF~h0asAAhvKK z+9hnj!AaeaSyPQkX6vzL{ST-s{13D{_hZ1)G>n{YfZ9=VCK1d7lkwA`e)K}DO}`DH zV1jC(hiST|y?<}g?b4v|(tP4o zBokZqB$m21L2>pIY=3(MwGp@22g(Cy%&te{M=u$FxDg`n5~uA+612rn1FInm&}U5? zn(lc^*+ggJC{SK$>=)`LJHoZ6VtF!_Q|`ACeC|zxf&>j(*c+fhp^!J(ZAO`NFtO_v za%sgLaH&|20=>a4$h|vXV{8e7kfmkdvH2_-tQpPyVs@dXigLPt z$T5A$3XIeW(c-~A^h+wo!olQ$wDVDT8|_8uw7w9tB?`qBuXsZCUee4+C$J4cqu+Ld zfiY#GyT~^=?r-9ec4GQj2~>A0A?^56OpQ{Y!5hY0R(;~OpUzX)?_=-_o(~Cbo}ice zhIZG{Mc!jhK}Nwb@ZS0vRo!|JviuIpx2z@YWMN^_z)Z-ye+~qxIajPNbz!8PDb|F2 z!M0T;khdZe+TUf-j_4}Z_*p@3aUJH`_Ggau1&}$`5nCplq zyU`7`?LknpZ36M|wOoG8hy|ZAVA7LOMV0;;FuqO(2Klde?k|J6^!rsugC*OsBWxQa z6<ed92_lzgt{f;t%0ozcXV~r63H$b^=Dl^+S10UZzPW_RCK$T6t%S-;N0&JB(|hs{JVS9Az_3)72hbQH5XMU9N30Mb=X$q zf!;IKtYv>5$b+qM#Do19H{l}ks;}VP&Uw)4*Vz4MBzk#%N8M6WG@2d-x`h^)=hy(z zXSNX!=O#1IpUV@?o1xTuFbGo4Ol=E#3c6#nu-p73c^zJJGrvqoe9(%jt|06f`VQ&^ zqo`A`FIY?__I|4xGb>-yJNA?~-Dy0Ucy&#^-eXl!B!qt~fRtcw^!Azt2|Fi3ta>BL zTu2iiT>y<^u3uI z7R?GWnDnEi!nXSWXa+Ch78aW+cYhyrN6o{>C5O<|X+QNrok0g(Vs6*2g!&jW^fY{l zvv?|0e0>UCjqgw=?h&YJjWBd|3+q^rg`R_4G0gA^WN!Gzl!1HT@etyfWk+MB%?%Lz zQ^X=Cf1w@sOjv!2Itb?M#pzH(_Z5T@Kjnboo4MFB&xyKkWUOVn2}nEBdDoW17~Lx4 zl62%2u6_`0C1r7H`p&F&q!~yilExtQU72FIMGlzV)d#b~v6$LZ0en^`COG#3{hJ%W z*y9qb9MTHfMSo)MyK6kFG7mI&H-n1a4UgsDAtLP^$TAev4K{!>5X(pdiR8kyQCyPN z4rPO%5##a<#!5=SdVMk!)KQM;@=Mx3$Ew(W{ z`A8(TtcMY@ptBD)*^g+u>#NhBZ|JP|H&IvJqDy{>e!Yt|xZwad+%~nTz>D z-ofJ^TJ)T62J#n)m|3Oas$c9Nvfu#L6{(n1b)8p4J)<3@IcxX_p{x2J8khK?$Ff7r z@Fsb>=09Tgg*7;!Pbr2KRKwsQ4Jh6#!n*5pKdd^0##@&{X5j$z+w%c*dnco*@Hh@0 z+6t{zk1?CrpMufvz#t_aoZ~K_%ZmsWx-=6jEh{l%cN&Cf2jZ-OA5lKwELX8&E`?Vj z79WTX{daMjfokfT*n=v=c}#q>l2>(fVCwEJwDJCes)5INg3T**=$Z`r1NVdU@JQ(L zWf-_(GI))RL=Qy_8!V?@uB{;$2zPOVWjFX0428^55)?SC!Ip{pFlf^QRA!lzCu%<3 z)qaAK&%2SopM_C(s5{BxZ_1fOayOw4hVFj?ng2dw5yOsA_vU>tuzkiVb%Q7u=mpM+ zo2WDOJ`3D;8KuJeyylPFpz=Nr(R=^|2B)E6<4hC^Hj(Fi5EIx1Gu7!mn4dNSrib1G zfo>a)uf2kjhu^`1W;?;^)96jUapxijG#L64Wj8j0%c@eQvD*Yv!{0gefN{y`VPN#G z4sxW);P=~KP~USLv6ZODqR|u0%i6)TY<+I@jGp!7qo4%)B?l zta%U6w(B(ZZh4AUg}G4fL4W5bE9uVdj*`JL7TIGc3Pzo!PKL<}iysToJ?%02KRki* zlQXc;Zzp8-(_np6DY(Z`C*_?;@NhcFN?S5PI<$(|w|7t{tsW#s_GnNs5Tiylk|(k^ zak|H$>NfQQ2of!}@|ZPmxb1eG+5Erz4--GnpGex$M0tTe$x`i9)VW6$(iE8;^2DPw<}oGia@x zh!bUrg|`(n3;YXF%A=@XPTihGLm@%_0LF9C;F zx43kB08h?&fQFmifO_ON-0XG{y-&4+U}Jib!1_9~bKMM)8@8fg++tB#7isOI0fw~h-Ab&`n?+uT5fh2>nm*aTq zmS`|`wS#5*kAwPk33EJfh`jbAL=OK&b7AN^7P4bL_Kw&M-N|tfIMfHbCq{s^!y%|X zRfP6O+c9DOPHghu%P~0R7RxKVfrD*6qM=0ze!%LIR&;UN!?ZhY zV{U8#6MJ+qk68ymJbR=fVs##xKDmJ{i<6kL#72 z7&!hK1okR|=o!7RIOYpP`Fw|E)n_oVS34G;sYNUKZC*Oc9_!D~11_UEHH4$-E9#w? zvm9GOeg&C(0gwLo6HkA58Ow)1fxtJ-z)=U(3-6066$8mndrXmXlw;3T^zFZ0iQZ<# z%=nfIRQ?(Q-PH{cxzz-eX+lxR>1P;F@)E43hrwpJ3z3RCOh57tf?v<)(T6(Fd9Vk0 z;BJHNjbmVQ{U#=z&&SAvM=^Lx4RiXD2Z1|IpxQJN60`%b*`gCXz9yjVflEwnou|;r z`v(%DT0mnzo%cK-g_xIRXteYkN_9^<@}dS%4{zYMH_O2@_Y`Q-c0kkkKG2zSnR2Zr z5d3@#_q#w@LSZ#a@Hs@dF)ke zuG$4n&0gT!@*0%`hbU6g`=U~PhwJsA?DOZ})q(Q$a=y8m zcKLz3Fe#{#TO9ZaeEMa9IX#cwj@13Wqk;(>zA>v%M{HPj9t)Gn_Y|`P+KXw|qWQoL za%Z7Z-UYLM`T$+@+;orHgL=}_7<@#Jx@7OO7Vjg>Bs>8cDk%Hib1xsiq8>cAKckNC zLar`uS5)o24#H`H;Oz7h?Fegm)W+SE1sR3X+G~z2|4smpph-;qc`uWnKMOviXunE~ zJlCITM(ytn#zRIx!_s!FIDQ#yE*Fp{pidsxZ`}OeB}~RMkbh4N-Q=-z?291oze^$R zS_9EK8zEnvi4OT|(K~>+`P0m??06aGUJJ#hOYWF(Weo(nj6Nt8=tW zoOq3SuD7RMTm*Xde+wazt{5@&GeoR-14T10!0PFpsMYRcn$44N0Cn*;`)JX*-vTh0 zMDw-9YcSgsh&6Z5gMZ#@G`Js3cOA|o(#Oo%UJIJF)hyAPe10cqqioc0ramj>>eOB= z`KJ!h&!^{($AU=`M_dnrnWUqX{}9JWn=huJz3@SBX7b^0;wwJxDi zNEgKQ%LLt%#5$^63RwdlV1D6ph<;hk*(Wz}k3Wb7OKy_)Tn&|zx8Uafofzd62_yc@ z1{Qq~TQ)=y_wymw_4>#c^h$=nua`kO_eW6_KaN(!n2DWq5j57sIv1&+GI2Hv9%Mm4 zz(oA2Lwq9YYDvq#01Wr(5=d;o{5 zXO*dm(M8GrimKwT!w1z3^Y4yj01XK2j$v)Meb}1 z2-sKx>c1M8zGVxVd>RLdD)P--(}iV+sXyYd0#bgRfgZY1yp?iF&gOk_+dkq!ieBR5 zX^*gJ>Q?CaQLs@{V&SVzt*`n70`*7QchoTA;|x(W2GmyhhY&A*KMDPqIwT8__90kI8*X{DsE5)z>wYVc^cxS&QPBH2(9+X zQ6gUj0*$9r@@rOL{doiS2+4U~hrF&)d>TxlDCd68vUP#Olm z2R?+f1J^J%IR!FDd?qh@9h&*Q2KlqKkn2FR-n1-eb|8QJe0_`=XaLQc6WH%%$H7`c zvu}kf<5%k8Q4sbU)TXgT9sf|n}_L*P4Lnq4dq}E12rsXrRblL?m>eE1&_<(7b z0q8!Af&6AyR2Png@MnHl^7j+i1cjipSuGm=BBsicC@6nT%#C~Bx%^`!@l+P0&Nm$v zIXQ#+&HJKu;Uw@~_L_2CLox7T1=z-?poeiaJ>#3;xLXsma_dq1c`W9?JB@;QmWmoB z{q{oU@R?VOz$_vKOp**x+C51z?@K7=J1zyQpj;L-Rs~kW&!MK`3-_uz1X9W~iPgF& zEL|>QidQbZH}9a%g|~cO2kD&iwLrPtOXSOJAm#{twio-kPEZ*S>{G#A&t2h7Da|O? zTncPw7woW!MIC$vk$Xp@bWg6ep}ryVf@mP7F*fhI!8+u}QIOtlrMaXLWm(IZE7ySejA@V|CJt#>Atd|@#a!`A5D%ZE$p5h$ zh6jEE*>5wrKB^e~SCcL~@fxeur5%FK5f*%@6bnvw#}d~Ybl+LR1<4`Vnv;oJs9lB! zK4-zAS6S$0@E3AM{(%i;SLsWfHiQLZ%KueoC(iV)P4wH5sWP1aC_l$+mKDB87 z-VbG4Ch`#0fs#8HMD6cup!(Ax^s9-W9kVg{M;5cDVfUfA_X}3I0-@yRa13`KJ)?4u z!agzs_1fw8D=x%9t9;C#{Q-n4R^#vG^)M!ybofIv7(1tjjwfSKIPne_@N#DCc^qVl zpP=s~M}uP84{*x;rX3V%g;fv(bTmi~EMq>uj$>QOMJ60j$Th3~a{tKy>c#UxCwK@` zZgmjZC-jCz->X4zI#Hq9`Wp=B{$5_7WG8zTMKug4J)Y;pXmNG5{Y6=rH>~b4&{7JE{cR34H(xp&PQUq{shNT}caegjXO$!8_ou61q0QFRlc*hNJ1>LcSQD0Fy^RJjg z%-wBlz+3WZv~32xe{idKE$TG#rk;K-{4rW;%g>+vi7KM}c z9w@#60>`h}KB_v1&tHrt)0!dg&P7zqs>OuHEHKEmBJYh3wB_o7`NS2}Rhmj%17~JB zNyeo4tvSiRGBBvv8wH!qx%S{*V5D{klUG$T;hj+K9ycGJ7s#mNB?!y=9D}Iywdi+N z1j2>uxpGO6BDhy0`30vi!On9c!P{pn+9CnR{G|Nepi9hxbz_me21C{!7t~h9L6(}F ziw{0hgzniyY#hp>{|x}2WyDjy-IH5=uV+sC-f{WV>3rqj{oq$I1Uzl5i5YxTB>i~U zx?wpuRdp7F?mdSOeN59>Ma@Eudd~AM5j%VKx53GU01T_szw|AH&e`zn56$ zahp85Lm}$mYgD~Hz;rzKgL!E$jEp1(!ra^3>Rc3W9sd@Z_Z$YJIP&ooo&=ZV{gi?0 zjsNeU82|nTHtr3Cl8i1q>{JpIOiloEU1yNKHpZA3eU#eXX6`nbV9?nQbIuSyvNQSi zj*Y#8PI<6`T9idh;>xr= z#84_He)ePT?XO0;jB*ItVnjXx+KqprOn=Efth#>-y=zy1=1t1-rJmsfHatS*)=i@H z4;dz; zyJ5gYVm>ELXD)xIU|ePgw|di+x>zr;%v*8DK3t-+av)mXu>jAADX5itjPmVg(BeWJ z3p+-nQ=M~|)FTQsKUZT}Hz`KG2!Mdlhcx@eq444b=3+Mhb{zYQ4X+{~+#m*h$$vk5 z&U4Tst-|u3604pq#gl4sOxa4aY|j>M@qQoAp?!h)Wk1%jGaV(BV?n3?H_Xr&NxPVI zXbCGvzca<`oO1!@Z}S23Kc6r^_aAlDXkvcDItX&n1WsDN7c9op!%Q^x$b|B{?w!Ga&QN_Jhi~oh@nt1B!r8dckudalzkPx zVzpky7@s&Bvs9kw9<~B%H1DFt^C9RGK;N6u{jqT{-7C|*!AifBw`d1r&N1rB?)RDr zu5y${f444K@538*WZY zu;a%SFdfl|kw2${)rKEToHU-x`u>BQ746tKe+MzD4r5_aDHNRdBrmfbb83y@Az2w1 zdF(RCb}Z+bm-c|wxcAg6+!JDQ9wJ-X1zJK)(Ll5m^IO9)(A)#uZ~g!~)eE%$L>{7s z-k`jLtPL#DKWYgGwOe?>p^22Y=G49PN|c%5CE zk-zVY_S+9ZKI!gbEI(oM%N)@AOg-+?N5PKQXV7|kDw+rE!ua4Ha4v+{7#|HGVP_L) z#npjwcOT9|sk3X^H{39_0h$fz*$3$I@Q`tv zh_0t^^59Jeu~|Be3$FJ7+42Wykd?+-YBM0RsRPS`h%tFGABFp0C|ZLqLfnRf;9}Sd z#CchuvuyxZd4DIroB~awI-!$w4o@3Go$)b+;G3F&-gUzvLx zb_|#EAY}VRV#WVrhT+$!gJu+U=!Qews@~*3jzaC9o+$C#FWNEUCZ^{WF|Icq>pXs8 zZ7)CS%dQ4PVtHsUjz*ckEmT_WflSRdjPLUU?e{Li0qf4{=QbK6I@@Fs0c)0{Mw=`heX$jU7n>+3!otab5@Hn-#~1A zy!ToN=<@>XP9$RWR{A?0j%JS5kr>`z7jzDuM)!AJ(KT~EsN(PPf>GajR;MkL8@P?> zKTVh*|2r2*pJm6iq@e_~K>|q%x7z>fu@AWG8qzk?&WpT%D!_l4i2SB57&!A3NG_|e zaX7FfZiSd-tSfQN1jfyTiVXd2y!n`7l@GBg>bDKkZp8wL~avYH9DkIi`=b_2p+PA5KaE>zs; z0ghv@gUKTST6_zp&%MCfX~GreTOZ(EGv;|?&($)8YJYQzK$3D)*%0U#)f5lL?Nvq`2>NF4VU%*S3w|F9aU+B1|` zLce**-rn4=zLb?OT7mZJuX)kfW+)NSJuT7*&!tvF`G+J3k6Z&9W4@t5Z56lbb{xZ6 z%0b;G4jLkxsgLLutEgQ>{ReLq@|~Vs^YD9CziK!6*_X1|mA4`8;$^m@pb=cG1Q-c> zz->k<^>w~y!HwUswy6-Uevg8cX63{e(*alYX0S5o;O_P5pj@y^QDR8m>?=!{Y+whg zDDQ#yL!vRY&ISEmjN&e(=EN;~$Amdi5MFkW8PNIUQji4dM~{O_<d3nj7|IZ34MBf1$aV6PzPWw`y`1FmX3UC)4-5sAepL+E1fCEDcds zLLTVU_%QE{3EZF}mo#|lBoS6(ZTU86@mYxCpPM1keG{r|e7N*l4T}-qg2esn(8B&2 z%Pem~&r7a&edZtX-VJ3=6QA;)kLW&OEmsH~(wWYTJE-mj)DKmIZNL9yiy=?$P=~mZ`CR*fnx>T)9{-*@UG?_yOOq$nS> z3tSq)z{>tAx6^ZmL5l4j(tzageO@kGsk zK*=5AUAfxw#Pg3Sx3-PfcTQ(B&ya3j^O}68<=op!MIMkVkk#~)SsQ1d`ul_6*`GS~ z2K59-zi!y7-2w&AeJO*yC&%{|-FsIvMdE`b^6s1fuY_py@jCz;;%4@O&ZGk9}tzCknzY@ zox!p6Hp*XVKYX))0?~%^Ag&+9*5KaEMqtqh}`S)1JaIS~S zj^~&n{S4BD14J=xLr}2fto7y{3P?-4jXe{IeYD;d>jypO`Nw{u_cLc;3;KY4*fG#i z-^aa^XfBN`S4a|XkcLvnT+gRKo25C%&3?*xPB03?W7zo2T9EXxXQ8DIAZ;0}C=B^T z`ScZRku)9s)QAW3|_bXj!;08g%p%qA3(c%2-G%yf&8Tj&|J2MS(1n3 zTv!|k_KZX2=MzlwbQRYlKmGmtH&HJz7mbEILj&1-%(_|3P0ss3`W#;#SWaET#9LC1 zKccAJ`vL1TifIOnz%hAmQ5G_hG@K7OCO#2s-rsg(L{;7YXeKfzZnAZBWcoCn%jpiv8XnAPusXQ==Z=Uv3W z-X}2bz*_R#9pzSh8uH&0K&jotm4mM-+}<3*>O;iD3Os@Sa0AVIFGtyl9;hSf%e@|- zq;Bb#TqnT`(?5&{)21chq}2`0<5!|{Y!#NYk-ph+hBxb0GY`+#U>jVE%_XnUE|j{L zyKE;$i5Q9sucFR*JE+fPaG;OtLh3whp}xUb_;$Z{+|*@dH!Ve3GQGcTwu+khFy)Vr48jHy`zAvxj~6a2SWl+HaM z_B-kH=}UP@7wRyqy9sVm%GOMsgmFXFAiwuekge*%{O@jq_SVjr`aPF4I1^$O(^V&({9l7|lYq(fm8`D#50(s0<95M!LB2E@b;k7Mp@tVgeVrq; z?!AnHEddys@e<``?-jmFH$X}2U}m_jKmA^_!8ACE`?db$n;)D6_k~-*)!rDw6K=8% zn=WF})$bTE@hc?k%|Y?t2=ZG-Dgw1=m*N%x_7B`ad2O<&Ie7}Rm}|zr)#gCX{kN!P zL;W|}mr;H+0=4JGlIO!2(y!a|w1JeRKfu9lChZpvIAFcaS`?@!qk}3PD~;lSy`X%@ z>sRDy@kU{ncM5YSKU9B0>BNgVSa9|r#$4A1T|Z)-xZZ;zNC0hLEliEP0LedJL)*lE z?EbAz>Rq7`wR!$5&J;- zoV>}35h5qt%B<(y#mzS#qOj?LDD+SVCb?e3G;s)I+}e*#|0SdMj&SaIe+AgR42E{i z7qH=Z5&C&Vb7hx>im1Q)Kyy+uE9trd)n~mJ?{h4c6kA=yb^VW9I}5524u7!y2{cK>n^4svC(eyhPf^dCrq#xm*UJ-ofNH^x+bK=vb= z-U|}yn#h6idgmbWQ5ps&7NcNNZcgCo{jBKO5j5@lhu8OOz#@-t;H+7Ifri(30Nrcc z-DoadSfh{)&gIunW}?ZvENHY4fLx6jE0lxo8bG@R;!{zBGXEzZ;l(a4nW3Ed2ZesV%;PHM!NMQF z=DT&!^zt}mxG#Xs&K$IrDqgBMP`47YY zU7;>#YxI417y>_~bN!D`L2uf1(DH64M)d7h9(Cy#RAkPp@q`W>#p|^)T5;F?;0kgN{cc`TO#VnE4>~UCNyb2@xd!yHqR%n}5%L6(SdnezC zD-$W3-|!_7&yvzcRty z`wCrMVlA($#$?}tn97d8BHIv(u=(prFI*cqtdA=p9I=lotjbp)d-Vw_B5;xMe zRpc5q2g)4cq4B|Vs7TxdNm^2h9m@Op3+?awIDX_P;f)y0A;tRC`;(=gO{7nVFGpYggY7}6^W z42yL@V{jm?v$1R< zl&srD{XiT9QZ44D^$5kT0#V+f0}%To11q=vL5Ui9jJ;N%Y;R9kdGaWv_mP46iG$?d zqWh1=YbN_+gMON7+#xR(vqT1Hy0J5akE}qq&V^9(G8;DdHd5DYCQF|Cm^rgj;>L`C zsWN+sQ$|lo0rgxcHj#yNgK|cR42IW@=oBm`6H}Wjrt^C8J z(Sx~XHwICibJ1RLi5s>xLR^g_3Xi7Yypg}4V4aFp{Bed#qbRzw&!^w*JBT^@88?)C zhmMeHwAW9k`$ZAc?mQMGMg|}lp_P+-s~l4=Cqvz}B($s@NSOd_v|sR$a*#?8j@`q~ zl{Z0ICUMB|8MelhqH%Kq$|IkHp(ANVYsWIdMg{n^)`K!_l)`(D8Er$I1%V=?}l$oa<2S?hKtUOT(5_Vi9nB-@j;v5I_ z^r-j#zX2$3zbJ}7ZjANmi%^i`YOS<6Og>7^-Twc(P@jYub>ZX}`G7U6K4KN4+2$ID z%~8=Ho94j-Cv`&Ag#C(&#x&kDmwLp9?6KB+a|vBnA7wuKiiqQp%nDaOgw(xDpd@h{ zi$Ado!pF?UmMj%CU$~8I^F6x9{No)(53s$l89axnW8I@Gm~+1vELOaP#G#4kL(Dj< z_E_NCeqfXE0F-3yMajeKpnV>ocEdP~B{qd^*KZJRJ`AF#WuwWMk*Hblg1fkVz_@Lv zp>`>GCSEE<^0)0G!TF0K*;7mAd9^z@o%%vuC^Nx3{|abs+=wM(^H-a#*v=jo%B=GIy8Okh` zic}AXb9khQl|)jO{vKsG-)n>GpoyTnEg!3D(!gJ*1p_?aW7O6csG1XmTxUOdhZLam zI-aq@c4VKnL+#E7ShOV>8t=z}whnoAckD&KwUO99TN@lJ`eJGnov*(#pddubYM$1C zM~~B(?xM@;D#>#lQG~kplZca|EovKZj9a*!r}vsZR2lSw{9Hd^4KpG7$q(}CUIBNP z^N_SU47E}cQTn%;iO*{(T=eN~Q~wT}g8xG3{l(y4R0o0;#tMPLA5%6H&t5qPjN2}w zq`w?Aev>b+{gtRddjt+xb`;w6zJYLe5{$7>!H(-MQTU}v;q8^ewO2BXHLHWF|8%fw z{2-hZ5eo^6!eH~tL)bj>0@G1{%mWwxVCo*L&}?!Qg!I1-MMsx`X{!fl@;4}0oW#5z zoI?@wtQ^dc8xm5m$A${he0;Kl==Pog3r5LBR%SaC_J!|%hRv1 zjH?pVrB3zwtCd)xxW(*m)0t;-ojDoL;1bQVB4MUE=)9xMDAuB|yh4=mVmZkDTSblX zNT{ff!^m)7lrKrdq*Y%qbop73HBRN?JvW)Afg1Rp>w?<4c9>!{!*+pmq=Jy)#*1S`ykUrLLa9M5r)|2D5w>b=!Whj$2&~P4E*f3e~ty;!e2k zqC#c%Z`7$60BL8*!`7uU>i7<%`_v(kQ}$m-oOv9pDwa^5Y&P{C3`DWkRfS*Q7BnUv zk`2(FOSf3mLOynbhAbSvjuj z-4gAo>n4plg9^WX1cBNIv<$GM9^NR<3l`J6j67Ac8?1QhYYcx+9-s91H?Bdb$5-+n_hyxD)!<4VrpCVy>D-=7>VAargY5cZDa9o4B2UwZ>XE7MWFKM32BSE2v*hp3rOofwlnxX(I-^izk~ z`0wAq|KvDgiUf&VTdDW=%3`!#5({nxq|w|MCX#HS87Yl+8yfE*e#c?jC&oiqULKlj zZiI@^bdYUn0)a{2?2K^g15~^KTUW|%#}}jh+C0#bHiEi<-jBUMi7Ha&VG7*?(p%>; z*AdB>F404i8VmG1y8&h92iYR0k7)U%6eF9B!Pxi*rrocDg1aZNYI!R7b{zuWPSm2# zk4dO?A{>%lCSzPyAIu+Djt%X3*!r#-IQeOVo~6T>_+t=2^H)!^&v4&~cF9+_W7t5_ zVmFOOBkymR&H}jfji;hH9HvA1>Y3cC!xkh%=d${kuNY+X7d?lUP!Fgl zIu4nOvXuKEj=v$2Pduq;?)sBiew+jCXT>PV2qJ&Y00@7T1fG&vkl3jKWU{@y(We+| z*8M^6fyYo^HwnDA%6a7FM35XyW}3_XU`wA?sNE;P)^F#rMM_;ULfXCW8w9?N&%tqV zFZ7{3c9EYCNFJ#a8PSGlcIOd>sh@!K*5zFA-x*B2atKZ?K7{qct6cK*xT4~o9|+b& zV46Q-+I-6Fdn8Z>{!n(I{~`3BScT1pN-=bR6GST-@PK(6nyAgh@ZUecKkO2!3TPfM zrcRi+u`E4!4sV`j#te_dqSiDiZZJzn)rR}rsUnI=%SJMB=Q72&H6J0p$1T|WF9!>D zOeURX1k~g{K3nSeX9C*q_h7}R%Un2} zJm=cCxnHO6lr0m1xBUsOZ@M4!UnOBvW+eE3bjM_e<6P(C45q1{$s48=Ve&%C%aJFi zJnb1c&@N7u{t&Cq`a@Qv2>d+}?Vo&Oo8P{{mfp{))1i%72b=*x!7!d7q#m1Nk6Hb% z6FhU>evtl7v-TU?4p|HKpkS#lSZy>x)kSk=zCb~Km@ZtVZwk$;wsNtilR~oPm8dMT z6)o1DCLPtDIa!Z_fFs{Qdd~;4cHRZ6`~QLGoJ$Z&`k&{mTUb#-8A#(el%5#E~ANV}|thNRb6Ps|Ku)D9;3HIGSW`9RyLN*>rK12Kj;%$%mfP$J>Me5f%=!)NY`p}uY z!FvxBZPG(=>0nX0$vH?Veh0q0gOJEHU{sfk8fgj?>^>{9n){5tEAvqH?;bYxtHy@w z?_j*(FN({An0F}=jr1w^c;F-mCAFg3k%5r3m}Zjn=b*$!g>*nao)*vmhLU|~ch?mx zPLD#%3?U@zhI8SN5QVSMfoAtEpxJ2zI$WmS%t4NqSS?W?VG7M3rz zB)?V!2oE-~nJrnUJs}?1s+$mC!@mXn5Sp5Vg&S_8=X8J#V`=X` zD+71zio&e)ILOmG2H{svGNpMJ{J*B)TYnS7yC-v>(fMfq;}R;xIf@poSZuDCi&En) zO!odI_}}<}+S=Vge_#o?zBomFP|mo69RSuvj zy@+ileJ)CWH=gI`%!Fn>1>9%VW6zl%pq4Ty+NXCw4b8X)wa3wFi9WiHRB%C0SIj(l z5B!JPV~!T0HKr4b@Gh&Fcn?jJJTMuHX&0gi^|Ss%nMZ#nOnocTS=JR~kzJW{%ol7~ zM`z}V-^|>Re3Ca@AZueYvOQBkaGTCD^Q-hucC$7C;$c}n#-0slQFg@!hd=Bo zi&BJk;+1G8I)hG|rm*Di1BquOoAP`9XQ<5^0fpvy*gh~0b;wh0Zyw8q>rR2kmvYp7 z6%SrDd$1&R3NI->#(aCzoJyP$;k$jj>~R%jIk`j2nzz&)M820PbHF*Z4a6trv&e#2 z^qO%Q#S^VTJ&~c+?3c{y=oR#c%>nVs2?|NzALjRJFV9Nv1{>N^AjyTg^yX8q!IgvD zpuIDet^b1_%B!fU)=oZ&=~yiO4Boo;q5f++ulQ8WOj>?m^Ra&*Em$Km>vR*;kN-r? z%YIm6Mji@(BZbT8BVhGx8!>B@EGRGsR&JD|;O`1W$&zJU`YNA$CT&1#*G3RDk5jN= zPMB5qhs)o@Duh9MpuUb_{=i3=DIM#`sDuD{B;s{p;^bGG?DRtOTc!^FJvv+=w&1&PTo}T^8bfMx|PI1>5h_;<&eCw ziiaNZ$KX_A>D=X{sU5+#0f(?xl+~t4#%IvYokXk{jRL z<1;oygo1y5FjTCj`ET4hP~N?y&@MO%!t4ErkKf3mBJ08B*H8?Iyh(fiSE!9Okj!^t z3bDTvld6fII-VtuI)%m;9%5^3HachD1iN9g(K+)y{LE|O*$_yb{7>+BA z>!IYj7nZ!W;RQP7JWEmq)??}W)_4yBMdoOld4Ta}yTNyKAO@v+KpSl~HD(=0_o<7} zsD$>KCSNcxb_Wdidw}jH#nj;>#4*b2Xqm;q{N)%(oxK%PJ|3r@(x;@K-DUjpF<2=p z0L{B$%qR9E$V;9m1Y??~n2*yS-f0@@IE`V3LrcN(&213wqu<-}KU4gtyOO{599p<{ z<_7<)@LZpAFtP6orkkiY?UbCJ-*Dz{C*P&%dD`~XJ+;XY{G%LSA`c4Pbe7gox^9G>O zaiv0{x~6Crnt^yr7v?;_1s#UApiAa|pdNSwJCds~Ke7g^x|_`Y`EO9$T91|= z`hh$&Oq6(A3k*t+L*~Tin4J9yz3&`m@{Vxk|JDcW{;dS_3NLhBPadZ~lyf{df@fqt zfz~4`Gz3r@k9?EMg8RF7khJVNs( z%BOzr4QgZGp#R)yV7_b~T6FrLFs=7yZC{5&;EfdSH}oA#O`Hx&dKOBBGq-Rb&f7C7 zw>hSXsoF-MX>dF*nzs})COE?IJELLq_D-aePr$mbo6&dQJsxP`!@|t1K+D|`yu#g3 za9RVbx~M}*afzZ{zdIU?(BK3jL?@jg*c_HA>R2)l{l_LS%i3c+E-{cx$8`mH^C%Xg z+XF(?w_r%qSP*$ z4_p((Of}T0V^YE0x@v>|hf(N$s-0ziHh|~8x)@r%087?i5skOgg@83C=sR!#1|+P& z{A(YWaAhZvgMMZicZKi{+R;X9x?mT#t^`bPi0tD@rmM3$g{IZ~8Rk zjQ11c^QQ4wd$pS7`_i2KEs2*rUVzC1KXTU~N0iSASA-TGXMTPeJRoEwq#SnwL#GsO zqrMChPp7iV7!ejsT7{aw*0OfPbs&hdwwA9k0paAOsPWJa%{!lGja}w}{IRCO%WxEE z5FJ9DvI9HmP@(o-2;37vgv~aTG8V>Gg@#2%B?8z3Ww(&wEPx1=|_J9o^ zw$py|4gLFAk*E8vXpvMM9S^RBu#?=9u6`&Xm6#Z2gM*bWxk_9(*d z++)6L+#uGwCxlNBF^x|BAm+|bW;>2%zS~=wae5Eb*gFUP(k8I5E88Kdn+QF;wSbi_ zBA-E$NP5OiBp&W466pV96;b#2B$+yomn?f-7)s%{H&fKr& zyF$?U0Mm9}0&e0(5S*ohVS>#lIFM+a^l=O%zc6Fg3(V;Gt$>vu=VAHlp^)z{gaa{y z(CFkCG_Co@h#ttD(vw74(_Nss{To;LrLpJzCZhP64Qj9U2J4k>7??MK8!g)ek>(3A zFk%oY^F1gBbdXDnLb&;y5-upT;C?|3qDi}kpvPcckoO!f(%QNhGBov2u5(qPHgP*j zzB=&||3DPmeC2sH`dBr}3jKC?GK)89qFUR7jBP)}nYSs{-#O1#ZkY{iL9;nEyifDaeZ=|8+}@j{zVDg z^WhpSar&pYJ~RkJG9AIUPap)nkLE=Y(=hAH6qMU9Vt&Q;(AKz=wNCcJ)S(wx$ga`o z&^{S;_L%Vd6NAC6LK|D0^&yO&VZ}>zG|Zld6+h>*Kr4sn)McNi4I}qlE*Blp7Mxro_p9v>g zi>kICR7ZhCf;w6U`vzX$aOt|3`5Bank zg8Ld`O~e|sm|;fwawBdr&KBmaT?>XMXCjZ^i8E);LTN-;PVD9(!0tR{IuR#C_V1T5 z**FW9tg#*3?@R%kH%=&~-g3X_mx>^#0#>M{fyQB;U}Vw3LxC<@qy(*FNZCiXnHTr&z(nbr!aZH;xDTo(+2XQ8K~HKcw%%iIH> z@w%>d)T!{6G~pr;^NQJ9Q`j6h{RIH zj(=rEr+*Ts&I$d@_lhcdcSWUfYEI>cxoCB56Xkh#^VAnoCg`}DEncI=>)%dd!lUz; z<~u1Xh!EizH&xh`c+no;?tc$`-CQx ztS%EZo5%1)y*J`a#W=M7&=0LG)rpntZ7u0u$_{j)-&9@#(;0MEq3%;T3^qNL#-v)eHdmEHQG$5ZkVNLNl)9qR|; zwk--hGh})b7eMf|&Gbx8<(#`nXVJp$+)B%d3ydyuvkpH<-j>Fz7A_?=_CVzG^oX}b zjDYv<7~Xd@9O$(T64M5NiT-;o-L{i8D=&&#dgSrOZQppw@_Ed$Ul?cCt+C~i8$`GE zg7nElx%Q=XAPzpm%)3p+#N_ojkLEeeuqWIl@I4owF=xR&&B!ObiwX0Mz+G=Bw3_yV z*7d81>r={-Z4A+Sx;pZPqulJuHq_Y^z?z>`iX0!^WAf|B+rM`~sel3D=gWi3F_BJq2v*1WQzVE9#$?YC^Bc>jRTqbw{N_~ z^9M^w)Tt04bvG51Dso%AR+! z!unN%q2ksVF8L!;2pc;oyb{-gYAMYbbKbD@>|P+f;iD*9*%Rc?$14Q;XM^d|r%X_= z1(Ne5yj^`5#tmA@ogR(>-$*NzPu#@3TY5uvnIUltrZC~}Qjzz*EZ(MP1bz?3GqqXM z(MW2H#g~l0#$_P*-8(4S%$L(ncR%+l-N_1#yg+k>HET!mxP^^lzS+Oob%PD$Cm4z8 z$-&t2nEW96dLTaZRuq?F%RT8iN~H+(q@D!V%1m7ZNmRp2U4`+~6grrm~8cmP92LqIB6TmSO!Lm^JvJeb9Okie@u`?~-h);2@)$tSpGnKdZW?fLK>8&Rk^U*wxQ z8q}uvWBkDu#>VXg<=#rgnAC3Ikl_FgU*cPXUWd5X3}`e@SU=>I4> z|A3g%FN}{QNf-%9G7^O(gv>oBBT1WNBne?8Ns=Ta#fVK3+mg1mEyG&c{K~In?m2dr z*p`;q*d)Y;5JF3c{l4G-{lU2RzUO_;^L!pMk8Y%_kSi1s+lQNX&!tH0jF-md|2RT;xCWoC9>a~IdrRNv zJGn%Zz|>-AN=geLn+I{iU)T=j{G>w9b6-I3S|ev{I|$APXYuYc8V;}xMVq_5(D!Bs zbnn&{n?f?k`OZtwgnT6*cf|b(fC_YnKkn$C{S~-<>w<-|9l6G8A!htNnfHHs0o#km^ZuJw+_b0`HoiCGiiMw~hV5O^ z*;0jWg-0MXVln3?w!x|!7wKw#XDs>LowX`8mnT1m{N>++>V7ZIdAyjt%(`$zO@(Ch zu^oH24&$Wf-Joe=1lOKhDmgUSZ)H$Q9b%$?DcB$I`9URE~7;EY9CC}eouMnt2y4h zi8MEcb9`QgCNI;JmD0^(dj-^ylpZ2JMW+{uS(J;9+rx$#5ZlWJ)M}=9}~nu ztNZd=a7AjnC&TYdRMm^RlCzjdw39821;Bk&dQQ+>cI8yHB|Yh zC%$_yoFC2>IhmCcSueLy>kP-Gk+XvEULXpd;byt$*JHHH+MaD&6+_Z{Pu7+m{eSLR z>ULPe@h<@~94>?Dr({`q@{nA;G8LoDqA_voOR{K`*mBemEZr#diGdDKw9tX{6}85Y zAJw8m^tSA&YmcTgf|J?(GK5We3HtZl)fGqAOL5W5xOCio3hS!n>?yN3MHhkdjVO2 zz?EO`6ML5cS+%W!E~QRD%VbBqF=Plj4i{(VtM91!%39P6-zYtnSF`nYXTdG}2pNgn z!1~}da^1NabGlpw(~hT-b;cmBT5Sc<+w&l@^Gnk0^_Fb2oK)$a;6)Jhq!&jE#!+tB-KX@tU zB>w5DJ#4ab*Z(P)^th0KPu z6FRKmWjNi3JeM`xvZewwlf9(uzVqo#2TQbjJX3TJ|0?C(?MYuWJy=!$ST;#rMJHRj zG3l3Ol=N(cnER4>|DZ4MQn`dLO&TEh2HWKL3GYES@F;nW9!musr?7S9Q%dlT=aODa zY3AYtq1E-5-y9WwlhWC!KkF=+i4J+sGk2)rJBiC*zk$Zptw8_#F0gHX5~}P*LVeX3 zu6_6qS)Q|^Is4~h?74E#?&&LfMHM0!TOnV5D>(OI>&bq}DomKL7;S_5aQyp|qM!H) zHSW3$=U2DmvLu0N>d~52DYvEgT|)2mmT=LL7)QwqCZc|x#7)glNa6e= z?N&}^?an`?qLX{5rcP*%74c+R_5du)yFO=>Q< zzK2Yr-`%UcnLML=qT9!RsqVjRv~S5$HlHP!H$(eU{4>2&u=X4@cnn0Zj=xi;b~RRc z^}>$Ryf}N)DjK$1Fl67XWAjuyOsc&=9y#d{7JgH}SYTL{0YiR8M+4p@171C=k8_+39G290jR8jBO; zGjF2sC0auK-UpC2)RUFd{Gg?LH6~_O!~Xn1?7ULVnpYpxs@L-&J5`2^pZ}xANlfu` zFOrRB4Z7QmWmSiPQi-1hi&9p6a@0K-|dbzWrT(DEltmM+L8{(d@TUOm! zM-i{JJg`v34O7~2TF_cjGKvB-`s+2F4kP@iBiJ&i8wRW zA45vCn0?=c^P5()f7hi@uA0tGt=xo9zz^~=dI_JxVon@w&F7x>;cc5+^Rq4MSpVT5 z*)E)eitdGy`bjdz4FuL-SfI|f+(vm1E!mLsTvCniA!!~@hha6d_}PIVZjy(g&(_{Z zPFWP6e3mp7wV=H`nAP?@P?6#z*L^q-=jOCRbN38LNLa$)wjE0KYR-GcU(;czQ^#$wn`3LPS^kzx-Y1Rcrwv2h7Aq(@{E9R^fsBsJFbSj3lslfa&(DHl`!KaC ztP{A6yiK~tLDFIE2{NC#8;YLwfuftO$l5fIqjyeZg<3C%>@TAX3m0^708B2LEIM7B zG3F0HF1*m0Jxng}u1kAPUOWJe-7NWVqaWKYSj7kYR$|upG_brjo;`hbQk-DH*!oQ5 zTSJ8c3k~jYJD-;a(Ah$8S`GoM<)F=C)Z(}>oZxosD_5+27wF_rt_kxih z+G2EASB!Vt409PMXQi_Sv8??3Rm;u9t7TnZdRH#$&YO z9Wt{U!qJm@qH9xM)~w${r$!=Xy;iWRuNgQxYgt!toC+d)k+tZ4t-ARdR6gNgIeCn5 z@UCR)A+({T>*>TVOE5Vx92Kv{S)K9|I9{~FO9NGG+%%S}?rB;7uvRMlT?W1Co$)|P zUu>D~#Qsk-sLHeyzKR^_YD;farVH)c!bl!ZJt;ic8(VH2g{nvoY|Q^CyDyGohx1|R zE3i%)hWb^X!0FryRK|_qlvGJD$WBAHOFJrec?2`x&0+5;zFgt; zNIGw5h1T0=pmk|BnAS&0Z@L6v!XA;4w-K82VsmO-mMe#g{7&vQ4QG3FMJtP8eB+4F zS+qhs?s}UXrn~{qq2X+vcAIjR-4|@3L9*`Wd5|qOn6>SNKh<@cR4cw6DW7_9iOwB7 zj`#?!!x2z;xMO_dP|C3MVa<3?)`vC9RSSEt!=nO7R4-u7fwpqP$6@I84ypV`D~#@A zhDmcmd9BbLjdv|V}xBznl$6$Q&64E*TDhHX(<0fZ&P8=jWls27Z zQ~nOAK2#Dpax2j#C*Jq8uhgpIc=Grlcwq7uDE-BU%RJgs-S9Wyl`|APf6RfhUr$4s zW-!)m>dgo4h+Wm$u9)cMhRtH0<$9bN*WQ)=efr`hy;kG_d^yy`l|459110C%Lc{hips^kR9%f58N4phDzhA~1 zvwhJ}eOpSp(hM2O-Bh)1A(<++L5$rLjy?S|8PGkv!m>+ z+)06d7eQ(39-^OKjjn0!D0D*%2Zav8dRH+os9sAgV~>F3Nm1^six z%tLOH+tA*a9d-+}Q%{rWhAo`mG9Tmn&gAByeT7y#oNa>^a{TJMqO(o(dtFZD0_7S! zpqa%bqjrGD+ehST-xrkop2|gQcS+vu1ruwaom4b*G?8y_)&<0JvsWD{ocz@BUtOiz z(+dTUZU~2137*u6&G67Io~a~>3)EZbV^m+v*pdL-3q9B(F99naO`s;h0MWDyl8kY# zqVxBflvBB!Gy080YuAn#+<7iH75+rJZtW!B!nSOCYb$tMyD7zg8Hsr*%el^PGX?tj zv2jahRJ6B}U7h3LM(?&<*dbhW-K(fR(Up9iGpXCmulU4}-niY{3$s6WLxtN;W5Wbz zuDct8C!Sfc>!YcHLuxNu-XzkE5X{tZu9T(Epp5fk_FeWjd0p{@+Tf9L!tYc0p5svb zc(N-ui!879K$dK{0#tl188iPD-v|4nWV?75IDgwjbq)#K+_^34N;gWr-8VwM_b*hs zWgTiRehsI3yKwZ^B1^3gM7y$mq;i@>{!a=i`HMfMi!&y3ZakKF3eL1k7Z{i{nKz0o zbJ(omQ21JC9X@(F>YiZ3Ol%OI0zHIX@5_g+pOV2*c;Su%JM0L@hr3fTGH?kRbSETz z`c0$#>L6A$2dHWNI@Z=!(Eg^Upt?InzBOzx?sq9AC=5WA^DmIz)eCcy!rA{9XSB(j z&aTRdU}zn{=W8t3^Ery_k*D{B)`*>*3s+fpr?d_2#MvOLwe`WMe?nCAodL4lT=3=YSoD};fd&3O zz$W9|B}q#5`EU)9xW|F*PWjQ{5uagY*j$b{)){r@SAcmB6_*%hu&(tyNx#`b^1Hf@i{AW2(Hcjv z`or?~8B8BH^1=a73AWd|sG5_Ib z+Hd;{ln!}Hn#4eKcz6*4E>A{dVjx$Q%%nU=3${_V;pVD0l$~~r6yx(o*S+n+^n4A* zmvn|OyLV)J7%^g&=z4ZRYVx{6rzYisCcr@D7p7q7&?I&ZvEz)lR@{6kjV!WK#a`qf zWvjcRa`99??E5e2mJepttyWCl2O;mK@MC-*jp4I@0f*m@kcE0UAARYEs@I3);xpaZ zVa5tptbZ$c|Le!5IXbCijR87(P2v4k>nU+hAvt|BnblGmlHpu3(5-u*{&#y7vfxf=?O9K0RuY~MrGGd;yW){|n@M>=pq_~nkh zgB#T;SoUor`^_y3K zS5X*LUdg2MkKI`lbd8?9U&&SfDln;+Ii+|Gz`7rLp(>^mpTH&Da5)+GPx_7mv&tzZ zSKPzCp2*7fUB&zJS~kxNXRq>NO1Ydy4KoI?Dk7B>@jckGDFCdtMdR9BG4EI|ChK!z z2Quo&=z_l2VP?c?PH{?Pv)dl5A3Xv+1%IJvPbI06#-e4a2k65VqIUWMtW7DD1CM%R z#N8zvp1he#eoCN#=vAn5Iwi-aHb9WU2@N-pPHgu<|Ah%)wKf6IA0C7i8&XKSX8@Hq z`r__auEJYt0ZC$hb;LHJe`XwHEWbr1(lk`&4uQl`J|fT80^S=1^Y&vGEGaIB%z`oO z`)wp2Hcsb)XWm?pAh;>Qf2XO5qN4vMi+%bLS*g7P`fEew+Bv^SSzi`$6<&a(>D@>< z{xa0~-v+(eXthFreN;u*1v#Tm%cieArP6gfAaeB?iqC0{=Enj>udS=GwEHU2nSBx- zx_INqt{pKgxHU$m9s#G@QEXAZl1+o0DDg{2&MNp!3dLU~v6I8NDd8m;My&(C0aLJK z)I7M_PQ?#Diy8I5YN~b>`II3u!GC20CE3p+UvK1)YNg=q_P~vED<~vP_!QRk#IQR# z!n>5nWIqrcg~w5)9LgoXjs?}crD*+nC)9>^$H=DFP@CZ`CC7>^U)5!3x_q4cKl)L= zRX41&cu8(6#_{%FCUM}0<4}9DKgD1Ck@U}okjLINn7mqpjRz-+9z zYxnP_{E|@|m5%KC`#nl>S%C$e7h~|BYtdkD$2GHJc(=u33{ML|j{&p!@USx2IN%V> zIn@cnqq}3(=SnDkFav9T{s?ia$6?wzv7?_F1)7SN^1XpdcCF5Us&)sc+HpQ_tlvi3 zf(SOPnTxhRO<}*yR)X>92QN>IV&6l%Xyb@sc&RKHZLgn&roS{CqVz)5+BW`fw0{yjy~|JG>q7&}|=@_&)FDT~o`5e`L`A13$ z6g^Q3BLqa#jg@v#nTjVYe~- zd=c%QFJXCe9TZ&lCG){9eARgjhulMJRiS?5O5^rF6VUul zYgT&%vcEy>H-45xPtPLAf9ZzNQ$|zJDsj#)=t`CohGOm6zL2m~bTd zEaHcA#GLZSP{~ap_BW;)+l|3MyL@ouG5qdr555{B{(h^)yLviLjK zUG){~tz0nLs)EA5o6QYlLPU4gY7E@_GiW|^l}s^*B+tc9LA@iDb6jU)ZQsFQOftjx z^hB!tRdiHMdo88>CUm3oN$M(j3`eIXP>#b5@@g1NEAOsj%?}Icl1A+1ZSRt%AeysE z=CgI|4sbo>Ej-fGODcab2@SRvpMRqf=kiNXcG*K^p4AYxP5k*WyI^g=IL;j7!`V5* zA#laNRCDqqyjdeJBp zy&_Wco>$b^x(aG-e+BwJh3gkQwh>R?Rz?^i1L zJcMZ5El3)-kZ)ACMXRqww_>9s^*<1f@8Z4C6d}6M9-M%%n;Ss!&o;SY_$9Hku!q{% zw(^@l#QD7UE^IVlgoQ@uJFQbOHIJyGG}z2N>Dk$1J7G{V(sKKsdh&> z)!uI{XMYTUyy|eWU8>}gwI4}iZ7r!5odd(GU%}z|OmyCsNiSUnb4=?I=&*hqMuh6H z@%ue;!L8XaQ=N>v!rl4hlgW6>`7?Z}4rIF{u@L^M8*Apy!}zRTlyxN)T|M+bb7L{( z_fpz#QnBL?si^%RzDfRV$^4h$n7(QlukABNWM$5h(?fAb`f(c084)0!lYZpsspSaG zXwVYCGN3B3(m@QFGbg zQuLDs22R5_@3m+c_%FHUbQfGW7s!zR5&G3zN?bb>^MA6%(ts^sKX@ExcP^z}+19A{ z6+O!8D$1(Lrr5^rnCF!X&4*fJy{+iqa-P6-nI=+f{!6_!(U;SIv1d|gIi_l!xKsJd z@lj(bsp}$)*&i-;+)ebQb|`9gJ%Epqt@-lM5oo@)E5>EE<8#ZM(Rgt!)_&DjUim77 zUFQ#?ZKG%L%IyxQd#ObIUxBFUrQqr_Nv!y$989(g1UGUfSGBzii4LPVZczePkC?$n z_YFb2`@e&i^9~@mPem?s$>X>Gq>RFGTy$qL>kgfi6lwE~7E|0f?A`#8Y2Gd0+c=2J zY7)4y^9X9$^a)CgUgV`b2p&J$k;>LduK#x_zw9>|bH|Rrht?v?w{54CTkpn|?wesG zbrNji6tKJ(#`eOSqqsVDw7+4M$h{;=iLFmkS-K@>Y<~{#k|Wsdn~S1f~@9ummD!nHHqtH3hnp!1&ER+iEf=(WZLPJqxp9~_~O zz1J!Iz%q2R&xf!Z?WtvI7*Qu;Qf+oui2v+>Arrb|{oz({)i|H4Vr(G3+KbIk+X($mNv_W3TtD>(>ehZ8t2AlU z@}C)BEgON^u$2^PZO)dI4yS^c{S_w8 z6UhQrVveMX~5bjd%jN7sUJjWi7_f z*a_xml$b3tKPjn?VE^RD)D$fKy*GXVU0flQwHN=7pPxW|+x=wOznS8X>7~HrX}s~H zcox=mHu|S%>G3Y%Wr;iu;X(h9uV*G0+NR4@9`1bONmnefkD#!dW#APQ#n!*XU{2S= zV7Gb{W%SXYuc&r$TT)JWhuVrhsu0%aTvkUqUnMiW&|WLAdnrcxs1=2Fa@f@kl-cBh z%1fD2_VXgCZqi_Uqc#`4%0=Ls-i4KQM(O_=ML9FQ!Sel>+u+rP6+UBygzU^3HoSH9eBI4c4hNYQJeaaYg~o+&wC;jsYL zb*+*OZ-lpY#d)dRrvs0SY{TWpi^$Tx4MohGkEL>gID_I)x9ox(?v{Y2NzP#Qbr>pt zDw2%sizO{#6qfFaC3D-N_FcVvsm%mbC)i;^)dbF%E1upSR>p_J-1yCjz8qCColT#P zNS42gnKj@&M2D~DygSRtNpQWJ*L)z)+2XFeP-wH4CxfZwmGotwV4Oak&Z?6kQdqRe z5c&TL`lo(aR5%zt97giUHo{*K+=?v<*K*;`Vc1}^f!ydQg#2+IQYJs6tWJ*TKjJtQ zj~^purV@35HIv8sn^Jx2rC9K~A5_jBi>fbqQgeHu`E@-?EfG5?Z_{K>R7^!v8yiq$ zxRZJ0U~F~{Le10d#`+D#6n3NwhGrqwe*ZJn-C4ldtG7VyoEUDLW5t&5m$9KbOG+|Y z(1E`@@yI7ju)*sTWZgYaj(P>#UG}Cp-7MVKp#!E3pNxtFt*Nx~FY@=vqr;EVz(Hsr zl>;ikVbEzX`)?WR9lC+eFGnhJIu8Af?p(R<3KY%VFDKj+zd`O3*|LYd=lCPzX7KpE4=R(~(RQkyJo-(b()76)|6gZpR!+jeHtjG$(_QS_ zPEdHrA5=DFD(XMjqiJM-TrzbARe6_z=I&=>!Kk0$;~!R-b=i(9e?AIjVIs$TWgdF< zJ_d!has19njT+%s)FrNwD*o^N`1zu2HfSxEOxz2G4PQ$wqk3}97(2majX`747%siA z2h2}Q;Nn?IEa&W}lPYetJcr>B4)})Qz%!{*|%^&gS4|p{Er-lTBr9I6hzq5A=w| zqPCMc_P~E+{nt{oUq4c0!gk9s`+A5OrVeUX#7f(GO~TrXPoQe-ILd%Q06O62tGl3O z@ds*-)>84JVXXT(S$51D!F3;3az*C{a@C1IgemT9`pXV0g2cSNZytJ#>WbUrlF|Oh zNf`grHBda3Mw@=?0d_%?pmae4)zxez)zEJ!+II(pUD-;ef(>%sEAbqDyB{J-T)8Ir zH%fF{hsC}jXt+~C!(R2`ocU_r^+n0SId)k2=RvUSIfraAlo)3|3tQx1F0vE);|SqF zw>M)?v=d!K8)QRXZ`n+C=gaLJ+4Q}N>N+gorVXvxkll_V#q3-3L$+*qd=UIMFQuHf zAF0%969w2eV;?FxTui1jBjvoUS4iJB(`Yq4oa0Az5gzLg(A3A1%eSRs;KASES>FiU zzu1jKznX&9?qO*5eNWc^{5>d44rhyJ_GN9b4OI;ILP`JpLAr-OQrgp5d}7&bj;~%1 zdAfFZ@5kO)`+FPgc1G;~8!fP8?{pORP`17FiuBs$5G_1Vx(N$VbHJKLPPJ$2?@i#e z-AVB2M{!=$50FwI{K%ayS))6%!{xRqsAv z>U~UI@?AKqt_Yt~XDbY86~Wp+gpbXz0iv%>PZR7)<#VWbxtZeouOsb>DcHpC;AM6)BG^B& z2r@^F!@zyktnF4SYffiNhUHNhf74YkmxCzhb_{yFw}fs!=4idH2;wZotQ4qVL;Mpt z`Jn^nIZWoYCv5TZy{YJW<{cHadMOtUa_30REy&wrk1YxB$^Gv>oHW)My%dY6aOzri zTB1c)MLp@QUdoE=q6gm8RrCOuv*)z$C=aest!jo8@8joA9UX`l%zEYI@`Siy-lQ+P%tR(x;=C<4sJ^)$ zH6)4*!HifAp0*6@?(ZV~cmG1;Ejz)ZwdC|cbGb6XNJSaLNf$eb^1ki~S3O#xdUrp3 zIL(J0_btWXeWO|7<%nM5zWR8y1sZ?0;gr<_vEKcK*cna}Y}iNO+QtnO%O0Lperksu zS6Siy^r;wi%^9l}jbh#CXgQC%ko7!AkpU@&qUtwNM(bJlaH{BCTe=b}@0p-BsJ&d( zX(?sGBDQpPh9t{>DL^+L74iqQhuKLGoEUlcskq%;d(>G;l&|M4dZ z|J|ImnHNQO$LY~tPFg72vV}AYGL5?@2eYR|0(yO34>{jzF}HmZH`*Md+JMP&@%U66 z_IwoGveQ)(PO3rfDq_3fd>I!6?SyR&A{+3$2|}*5!lJwq%I-g#^H**nLzqm5C;mwq zcq7#x{}%iwdyub>n1@SM@{KcHSmADA%xlv|^uUPUGw2JL+qg zM%rg=FM3+txo%J|?l{I58_xM~_{Zar4;|QX{|cdLZK8}jTC{6Pq4?DgAVmI5<(rmp zN}9w4<-%w0u#*xi7NLiEJ-PW#;Mj%RJ%{3jYfX?2s_$F#%4&A)--%~_+O z^Lq8srKu)4qkAxMl~`w;Z70<^Eih zSs?q{PXon*+|iNg{keZ`UkrB9v0=$EDd+bx7gb^ z{#_6EgHm*W7kK^fJItImP4vE(LDm>0j70v3&9t6!}IrW1+q86e5Hxqz%f%<$*fQa3ahFZNvaU+d-27b*o)*HEZ36J2ODD{{ zr^1X6hrpu$9BjHcmkoPIal^F_;;eAMj2ENmp+yX3~nPjb2Jd2 zeVsI(E3o9I73tq-)fI`az_@2PTRH?`!FOUGT;NYF7sd#`Vhy;OzXy-F1oXApPMOy_ zvrR+_+DSt=FGu(|ED}iZ`cFFdl@>1#7XFcKLs>chXE{T=9;(D5&Uy4Bs(D%fDdh*j z{aQD?HSsIK)EO0sJ&iD+i!%6 z{6C=KVSBloLqGQLt(K2|Gn{GFB7A8dgu7Oc5Zw?*VB^s$s+iLUBbF>(}7n2(O(9vcICnbxFZd{O5{&+aLzaGnewt|apu*ZjO zqOtyjn8C*`fW&h{xvK1YI51~22cD1P?Impm2PO*~dQ_6`?4NSz{tlQOPLkGQlXS{P zoS%m>=+wX6IV{Qz^c{|J4H3_ZDF0%Mf0f?ZI||<0$O$TB_L5 zlN6`lOSS{1uwiQts5PxdZDUW!e=MPGd@01Y-7CdU5FFmg;ZS;R46l4QkB_cd%+;Ab z7=C6n)<=Yb!;iwRb72iu7l`|UU9{}ca*&i)pUG9_-B>xs5AMBj;>NP!pdZ_XO~nzC zzBbILubyH&CDg84J^FC;^_jfUu#gMy_rj*6b$EVP7gS!zro%o=uJK0F%^66h*#*$Q zperhuU62f$KaqLkA z2s;>v%?_WaELG&x?%Ysk3omhPtQ%(h)tCI=3?tKc!5wq%#v$IfD6w%fxSty#_WwrG zc3%TIX%l%)XdLS%E`-pY=G^#m6e;_a%R>92s%eYaB)pNP?0r(j@?!bKb!XwT>qy~$ zf1p##0Izx>DvA!twIxd-{+2Ub-7yr8-WrcNWt%9vjo3@R%Qx0uTp?|L-w$EzARg${ z6@yF?ZyzHx-SuXIDV+{A!+N4;eJ@P&rHy$ix-Kc8e;kj*$ciSYFX0B%67u8Vr zsW&Sgos*w!n$A^47N~6{^yAe=QvQ@Dm!1%NuFIC}INb)df8CH&v18d}WiA`W4?uI1 z$mbm2OSXCM$S)xV&!Zk&!KN!!W$u|IE1aKg*t{@m;?<^$7R zcq0#BjZ}crBW}d?_{%)nLZ<<3z zlQN~kVi!E|(jDtxv?k+StcAtdk}0K0&OnG&6n_?C-2|U9qx*=r(Xz2r;YHu9F7j-F3{oFa15HehD(;r!>U2k!2OR| z9NEep6}3N|x%}NEw)v#umgRj>b#Rp6Sa)Ze+!zec5WnTCD^mXb3`#hwWzy&={Phqn z`End$-+v=XJ;8=|g*%9DL>n&a>?Hc7f?0DvUG{AE z7j*m4morM!;KYd)VxQQkuDa<=Rl8HbR(ONFb$+b8J%Uf%72e#IKjexO9qTqI$su(= zsBXNKc7L19PVsBm)p;*eE$mElmiOlE-V3qFat(O*TP4_HQ{{|*g?>8JTS^gU*--hM3x8y^mZZgT|VeuMGF32^~@X_G&eFHFXahd;vh`5uDr{RraXQ#r_I9q%7D5A7yN*zips4j#LR z%@TUEe{m}L+GyF~J0JFazKGvU4QI1!!DyHx^vRGf;Pg!gR<#sLhF2MKZP;6=_jLvO z_#@?nx?$9!_L%+4K&r|a#}&3h^Bysb%X&@Z@D+bTPS_qO^}7%H1>KBA$pbl~Zzy_= z8o^aB%^@&%CYGj&>}zfU=EQ#@`=R3AQ?teBG3uDy@L9!~ca$6y=g!U>C$R3?Fe;k1 zK+LJV$zefjRP4!9>pp!AmcBWl+1x^DqCd5)%U9U+*LhO5Sp;Q$x?vK}fjZr7@EA{$ z>wg-yj{Zw{$ow&LxgS<1xa0o4M6s`Dpeoo-u3YgKXx@*9>W7iyj<--~`-dpaU4e5t zs`zlO8QQ;Ih1$Ru;cJqR7iUTo3tQXF*A3C{^UHg8H*_ z1WQM7*bcPAfzRVGx7{$*eiW>{Uy-*@O%-&Kfr1O12F()!F`;n=X7!F|ud^Yv+uR$A zjvtnH-*!ZuSvStI7>K6BzL4^832X0r37%ODYPt<&w{e+tD{Cg2#{49$+|ZemI|ZWS zrh%N6zY1%sBBeZsW#C~floQ>>4t+q8B)G|xd*Lf=>}QZlJ6K_9=u=XriaYZMH}MX5 zVPjmO)Rfo*Qv{14c5#F7Y}jMrVkbTq?tnHOgdg-|k;wG^2r*H~ShPG-*00pab;7r+ z`0gTUCtQ>?VVzi!e%9!4bsyAL9U#5(FSR})Kt6mdhsSH?C+KMwLH>vG(kH`CRrg(c967^9vqR%$>zp8a#^Y zbG*^{cmX*#+!J2%CMoSxTP({HxdL`)wb)f^)^?Em{;=kvmqIVU7Nt&TuVT&W-;9<= z@~9{_1KjK;qc+$G`un{|yJQho_YP)<^v~cqI}6Tji4}|$#Ap{6)TjO+DO8Sf$>*KW zFl7-A>uksR>^15eA~XNRYYEz&PNO1iAE6apk?P9_@WyMd?D{NHdp36sF zl&lzf@obi0&e)cH3m(}ebX0VW>>fCbZ6hP_ozqCHeUmTyi{H<`T+3-=qu3ZKG)s%I zJkmFq`wQQk=J{;!zjGBrPOKGov?e)h&UHv1Jr;wmu4SL`zFeujK{V?lSXhf}l)PD{ zGoQdWVGk)6U6D$QL{8$)bS%DU$)1(zP^1fjMrQ*U#*d;eu3vHEO&`#7x54HM2dU`L zUa}uI7JdG(6uq{oxN_YzEI9fN98mYdMx8kuy1kQfw8DpRXsEoe#Rco`#tYqLsNh%* z!sf4q_PJpXICpBI(oR2ua>6J~S^6BJzQl1(%y^7mdK*42h+_SyHO49rH3x4o!&>7^ zxxQ%=Ty{z1s2@@}X?!}-om-URj2JsA9FulTV#8bqZV;@p5XTEp(KAzOy!92uR&>IZ zt=hAqpHf}+cnzBAS~Gkk&=)VoqDNa~$GzRKp>+f+qT=E3!uzm)>09#Km5OdBZcx*U z=aBSSJhyKW(JtpY#h3giXCLvx{ZCs!SNR^46NMKTdq`Be=DY)`l0@a`I{IKB<6#(Lq#e{DGW(iSj(62}$4)qy^Hjxp@;YSLYbL(hL!iTnOX z&?`sCg-?9A;|t*@DT*^jzwCxyQ`fLQNlC?bdGLa`-*q} zHe3Vy)ik{I$6g~GnSk|>-f72u}lZy_WNByAbp2P||MoGS2 zWVx^{K(E_yShHDV=;zU6p*@?@Y~k?<9T&V5=bkhKE$8-zb5mPm%bETh)_XH$yLzGT znKoRzqXZoO`VkBX0qoqy5jU=#fI<5@p>^w(yl-wYJKBl7Yo|SMV`nIqIX;8xJ^$}0 zb7sS^5J*{f9dvsqLv6dxuy$xKE{*yF)ZR%P{9!4>kqXhF(?yO=sR1L1-Nmehe1hV* zS@ZzbOzXmCkAUs=3C4WmZA!{{3{{;|Sk=1a<;dv6VT2z?H?ZNupDcIO{9lC2ozy0zMC<;%4;sN3t zx>5K7z8Q^q9oLgb)m^eK9*xnoO8EN!BK@Bm$nKPyZ1Wdm_J((43X;`LKV(y_M==?u ze*>i}&Qn>24eK6`r{*(ppc&aBm!9q>SpF}`-)a+>4;YO3k;zy;G!BX@JW+Et9JA88 zV$2y&)CqP>cGyIyS^FF5JIBhk16q@#o8Ts_$d}w_EaIFe(P%f>jDjuIXi`iiONTL> zb-f0j4d{fGS2t7H%^<9e{Fb(@apX(Rov~s|GnMoeJi0-3(3}>9c`>uF?7k11+g$@w z8(EIuenIxFEdWibC%HcH#b;ull`w6JQkz2s0RP6o!o55PE))dh*fi34f zg;S}*mtVJC%DR3VTu-Hf;4jgsVIioo98drEefBSP;-HF94)=abDTY5Gx_>C-{gcA< z+e9|?zbI9{F@a9miF8{dWCcy(=(1>RE`3LulyW)!Y!v619B}5}5|>P{qo|{Tr51XX zR4>lS^JBOln~E8|mJ@}ZrN$uVHe@hBA#|e!-|f;UOuTgbzh# zNIBx5bShhPbR0}zt06Nu{=x?-uqKxjE0;*=Q+l!jBITABJ0br1VL5tUE$AO><X^;F! zrnA3^KDi>vPWKb&-U~frd=Xei>Zq#vJFtdMZ6cUFN)RxHOoDW(|ip8e?=Hb!bmauVT7Yv!c zob6s%u|mwDS;zZe&PQJi8RWwOgTCUB_>G|V{tZ0mo+QgH+ekV3SNiBT1oOB20ijn0 z;Ir-Pu>Xcxs9uoD@nFjp8DCM+Td`B}>CKNvwdK6)1E5yFMeym*$cDDJ;Ap4mqDRvS z48OQa!SO5DTzeJjbw<)$u@b)jMRL>g%b=KkUL6$Uz=d{Ztk=y`*F@e0(~M*AY_lI% z8aYoPE! ze=hx@hC=h!@TH^(^h#L)r5$`lH{x|l)I5cme-99Rf#DqOJBQ)BT*^+hMvpTyABBokl+-!!dWsIlvHg$9hYj+6=-nyQ5HV5P?~* zsz_n=NJ>hMz?O*qn=~`d7+i#xWS$z^Q%XkX!I17s+#!+)o0K%4atSfAU(=WR6 zhsg`s?9gp068!|~TOM-B$9mc4o;8;)I#eU{V?}=G^Td*5`9sBLwZ0eQF;f_&f61EGp1+pl}T;b)kj}2>HAWkG>%Pw zk-!JbKryS0+lZ$l@z6grF>vW3E_`r6Fu=#K%Voi?oIf5^d|e73EZ%FOg5~twSoCvu z0L8$b#kxcX_6u3hnLVz6Zq!sv&JU&LQ-D7IJQr+$F8C@zaPEf1xW(R3kk0U{TpTx> zRp$H1@5(_bzHv8&{Ja6;-;ZXM%b{YcqbpD`{U8-M*t7W&VBNEQ!WS}}%RbHk=MHO0 z5j>p>z7@$=;#@hhs19P2M?&$BFs^9*0)7p_w6tRc`-pd^oyl@G=)Ic#lJ7{Z{+Gbi zdmyT|I&k`v8 zj~Dun?!;MHgQeKjNGY)|A#Aqr+w6`6i#~R|%H9aQEX6s$bph{<8HKjDx?;@mT*~d_ zLfV#Ra^d8!5aNB1vNkTJ@WbU)(({`PlwMXurHdAE z$%tS{`#=xAidSj#9oUy^|9h^$~p9L3a{u=4K$UykDJ0%xgeg?_L z*NDEBL1y!A($$%R5m=z3+E&~X!y&IGA6(kqfV>k^SoQlwx33=@@KxPdtk;g?y+$z{ zr{~Fach+&7@TRzY7Wd)NVOVrzv|wrZkj+)`I~%oGij6g3o$-8XNpPf?i#@n3ccRFA zwaQ9uZ-}rCgh7R*4-Wp{uVmIe<$x9|XVqFD2EZlkjzY5MHw#jsc1x zSXU_eX+6aG^Jp2iI1fX}>c}xWeFckq3;4u%VX1Z&298;bxBd_r_syG%HUE@C)8y+S zC;BJ3j<*%r{5GI{J%At28Ho;web^#yywLZ8$-oQI|Mw^2dmSRQ%LkA**pwZLm!ZOa z4do6jftaZy*jqfq8r3Y0%-#(2_zGAUZ-%6?*U35Xq8xfNl0Te^;A^vo@{Vb}(AsSj z8wNSbHw~LGM@k+2ma3J*xmo12b^iZ@rW?OXdGBt5ajQ%@U&OsemoAm9>H(YQEMWa# z$DnKbClEEDKd8>O7tEzdsc!hcU>*>UTQ5$>n)QOIu|EhYaSD5mpUzGTmf`p5V=>^* z<$|4+CrzBFVBKARSy47;REfu8)V0l%+%By^t5{ZzDP|dZgKo!1xzOb^ z)me)E;g$A0_r`im*%!)<6JpTls}*bPSD`X61XAy-1WTchVAk!EtHua7<@I!0n&u^V zIDK$ykjMm-S)m2EqeoH?jI^(Ws-Z@Z+j|o!r?PLNIm@zBTK94$PHx`%z?K)_F^>zF_R;|K7kkzKIPFdpiu z{_mqXU~w4RUz`V(30t7L?M(|vg~l{F)bxpwR>npQs( zW0!S?(j7l2KK?LREM5+NM;CC#mw{;SxRDGCEYR)aV%D6!@0PQ!4=;G)$My#AC`dC4 zhklGi)1TLhUPTWn+{T?#4?dAHAcQiaO~~e+=z}>v4AVIUntXzBnwZb4ug0?BZV#-! zSx1WGeq6gW6r;L$fa%FCa5^JS@L)SgWv7N=mD3oJ;lD%bXN{!Ya-9nMtVhM%9aQk; zKlxhLV$M&n5&4Wl$kG;pro7NCqNxpg-VnW;SEA6Tdkw+bt<C`avDNwq%X zSR?ZD>Y0KyX%j2@zpql#i2Y>VV>Xse4x!3{`$6;GRVtnS5GI~$!%Eso6F>NH-HltI zyktvmiE(`Q@2;3!-a?9tM=p3;CAR2Y0B#0?w=0mv1UG+T{RckDlr?+ z=uF?g4&d_b<2n1>DS#&{aOldBoZUErQ$J3j^$JIBx-$q)H`-zRAi>LVUW}R-E>er@ zC5U;WK;_3As(@IG)DSUY~H-v`YnY2Cu96sU(O#Ii5v*P_w zq5rhVV%vFgKODm9Keoyf>sDh)w^nHII7!}vN0UkM2-KWbN;*-xQ53t1zV6b)aGOO? z{cs~#WmvM+vubj+?8iG21>bk85~g)phv}Ukf<=qi_Ybz@%(^u^&AUHqGA~FfO`a6v zCw8I!@m$sECkWc)hKddEE=+7QU+8Bm*)V7td)EG4RYpmR-;9P-N)E*;ey^~TJ>^^QT@0AZNA=qv;%da>Q0 zP&Pb23rrWilWrMzXU+CVhC($rX>(-zaf0j87|LNICt#eVIP<+nqkhNfs4D3r7ibkq~~qd(^hmP{S(#>f;MH)sNmdqRy7JzX%V` zmtz+i(w1Vf$4rO_vEk%l*C8h17&LkXVOiL5P~TdEs@ZX%JCIB1j%)bZztL!w@Q4&| zvt-ria5-v@6Zx%3ri_DYsCwlEN>h$NPxpS@)V*G0N@LmbYbV|;8FACp3`xKHCs0fb z!Mai>x;A?e+w~NA%>&_NX#0uOn})Mj0}6QC>cej z(QeqVr-n9XPQVPGhjhhc3YOe_FQv};i7NiOMW$olLm6L%Lg&9B_m?X0ERDh{gEmxh z;0pL98Ka-2iXECGu_|#s?0M;lhMi*s6FC+1PORgK_Hh`0u@c;V5?sEr4*XRPOJ#`nl__<=uFS$~A zq@7@9m&(uIiucJ06Lek}iTX=7lQKONCR6G%l0Og{RlWZxzQbbK;Wwjg+hmApbcZNVBF$3URY! z-*$p=q?wARJtkoF-`~VO-iGxXeuaP)p%^~T9Q|geN?n5!`0Kl|Tr?+0cpaM|cTF1X zDH4o?)2(DbDiJE`7I0>pozNU2a;#&8ZrWe#rV6fyrVHY{*>^!wwl$(@IG8h^= ziQjnu>5AUS&HIAU2yB_!BD?PN68YpmNV9&8RMf5$EB3V^tH1s5nBd??E_Py*L1N$g zYd_4g_$Wty{g0A!UX!(9UoLb#3+7GYy*#`fr@oA#_#6fec5D+3LSU+301;#qWa-WdGTjSb@)Ej-3Wugv1h~9UC%IMYT?{kHs zcPUxvy%_zLpQoS#XS`V60Tt~ds9|T%|`(aQ*A1=u5z?y4X zSyeY5tBjXW)$d0^n`uC*!G8$m&q+@a{#KGhr9#?2K&KDVBFx1`MTMHwHgD?3tD8=y@$|bX@`{wC!sQU2FHz! z;5{hxYU3C=WZxm7n-1of2zO2q_pG|#mSB13l^j;llf7p((&;q{_M0xau@6Kym&G); zf0hNIukFPQ_KKXVizW59>GYxAj2%`aaM({fsc3RnitraanOn24rO6ALcMnJR69&BX z!!XW%z6qL)rh~}~bKdKkAkO!br2M@nY93xF&e`OQ7P~uu_pd`S_2q4dQh1?Z+EWNj zKS-z19y9X^Qlu z3+s1XkKe;xQPt~>)RI^MOJ5FQrDb0(9dQ{ngS$Y%F&*ta6oWRF6S?;GGl=aL%uap8 z^BmkLX%6{=!FqcRoM#1%*$H@hWfxp}LiDKl?3MkTdSl|Zl)+zpw74bA`fveEZ zS5B7=qL#7A{AGA+p|j{=^yBJwTc{-9ik$Y_B+PSr3MscY0^C}~dHdW&U)dW-;5H-R#8p6qA4lofG)l7;AND46$;)X?rSxjT&DYD;^O?RCKLkP{H{I*TF$+hDx$ ze9?jZg*t{Gh7%Jy3de68n|! zgo(DoBi3Vu;7i!xhX9eWeXG&tdet2UVyYQ5(jR~6`uXG#p=9k z(q6%hY%cIX!%H{GWW`GK|HFZGJJ!g7dT%Il)*EOt9|Bh5u5UA2f8QL!3wFA*=6<4_ zTWJW1Z-cRDssuH!#rfiAEQS2}l9~lKU#Sp$@pUseb@?~R;-<)L9e+(#4x#LLx*obJ z`*Dob1+aZ3v~&Hz=qi5O7LJP{`|wS=HDr$1g=SOcbTjtz{Y_ex=FFbGH59i^Jb$0J zlJA2F;!fX_qvu<4mf;jWJv|(4JC#ymFH8OuJe*A)yRhx}6Qo?Wlzepds68Q}p*SZz zcYh|w&EnYxTVS^S1d&O1W=m6BRQnjSTlZMpado2LvR2cK%Maz{{F5DmJA+BCTKCp2Ce_`;-aO~=<6~yMwRP>LK6rxvWw^i{!Hv?n4@E{6tt^uom?!xA$zU(2+2lZXy)!wAfU&F?t!_JNvrJR7Ot9N9b2T=Kj z>Fo1!6J>QWq!x=Fly|czv7a` zLf_^$Qp}KIa50&Qudb}-sH4J%5-*tAbIhR5#g=cCm@$0+NIoY{LCH2fX>M1sa|TN} zv*&zvj`QZ`%ehoH+K!?>OI-TqEhq-egKH2Vx?t@&Yht=tgl#>1T(J_bF7-jz}R67ml&7!MD^>`VX8e9cM z;ZX2Q+7EV)g5_ylL%PNf@@9>v(7%ewc3=nLZ~hgQj_rjOvC*Kr(2+KOwi2w(C(wH0 zG-&@lN`3(~viBeD;aJmB4C%ZEUtKoA`tLKj^jZoSG|a@LA;%%6NOXfAT+GJ(RakrL zB$z}l=CIS>$*|80O5IjVF<-xmUg{sk(@SbveL~)egOmeIgdX=~@XNazYn)?o@ z|Lz_Fz)&^Q{+#$XDi}8SJA%{P10R0X{)ZBe7W|2NCo!eq{?;oV~wS)ENol#vfNv=L1 z7_!neiXHCbQju((ui%AEjlXpTkOHbC8k9ptvr4pmJ{Y3P4*Fj}z8%0{MB>b|Fv z(qBuSS2Ez%Nn6o>d4lS4{-dOzv*0j%gMW7=OryBxKWhXxM76dRFyGf0)aoH}#G zk%v_K{EL`Bx^uyCCxmNy82QPJcgz%Cho{#`*LSn*^JFQ0HCM95(TQN*P4v>OJ4mWw z*GXG{S3340kTo6u1lzYp=x`|jt=qY9LkFQV4fY4ckcLq<4eeO{qFT~_f1mu`-Ic>G z_h;ScNNN2X@weW#!Azm^70mby)5Z+p7=t_H@sG$)$jhMmoi`_s{~NqN&%zP&I-%m{ zNIGqmD3R;np979pS^{qDKb^UFJE#yQ*OT70b36Fi@xiVK=nN^b(0h6G){uU zE3zxq!-~5ejukBEl`@Q3iXJWjT=^)R!-w7gh4(Vn-a84&t%gwc)SVQ+OLEi77O0Qy z&)RPmocrl1sGP(;akGkDAN6A85y2kn_&|!PT`&5;=S%imazL}tM_wB86P^?N)yykN zq?x0Vo5X#f>OdM)#fd(kXMH*OlHl3RUx~NeXJYEdFp6Is#F}ywx%{ipzE3FFbbJgS zyXnKaed9(*J)2MQE#92>>^S6#Zn}~i%PBYd9hD5* zFH`9|&>UG&9J1R0%dbsG>+kchb)Q=3?=93EF%p|B9!XY*j#2bGkuMvw62cmvgVHq! z6qk=kkGG4SGtUv+bhrRY^bbPa;V91Q)=%gG8z}GfBPh)}Nb#4y!uO88tk~bp?Mh;Q zR*e@~%>AaQo>(XQpAxfL=aI#lfN;=M4#ehBqTjSbTh^{z1+hhfWpVgdNbUWrtUl@k z{vU2bm0c{=3I1K+iGG;BWCA;ywCAF@o0RM9C3x7UCH;;1n6XHYbxsH6{63~!HEI=k zH!r5IPkUnXinT&(7$SP%EHKRfx!}NfBE2%j1Kn4n?O(r$O#XH78y3%+bG?f{q|Cv% zs72!3{a5yn3FEKx3|areA7J%hqTpu9Zhne3LWlO_?Dl13SS#lLCc$+~6dBJuFQq)| zc3jXmmDK(-<@|4xv2<84HtbWOazhB)c7H9-pOt8QX%cHgtvKa`4KH<$|1dRIS5loy6yysdR&+QFpf*l)ug$J=1l)S2vP{~EHN`HLN}iu|+QL0H{?ka$VO zS)&Z(tnMw8Unp|EA`7Xy^|Cl{$#O{FeS*UKcVz#Ku^5a??~&)jlhooMviQM|B%k2_KzF+n1%AIL*j1hIdEgY0 z5gsdg+I>mgb{552BbFIPP-^8A_B$2Iy0?cU)zGcv_eKxaKbg#CS5|VZ)fg-bnSwd_ zk$mx{ugD7zKt-##OGgJo{i0Qztlkdw4rACm1|f8qHy5`3Kvh49XYnW)3cj6|E3XL8 zQO+<u*lgj4naZK@L2nzd&qY?|id$K8MM)q*K z)+jo*Oh!^v)p<#4VFg##`->j>c3k^J4^#IC)1J|R=ytn1?(NnY!-jUl(mvu{`=LoT zb6L*OS)sUeasuz@<}P;Swqze8vL5Z`;rA=T0~@jkOY6>1wskBj+YDukj!u|r>d7e` z8c5qi51Yn$kRo*ND8ngzDDLHSZeD1N(`F23U9Ai?C9Zh!xdQk8)gGNSXKCV4500H= zNOZdt>N+nZb!$7w%AO0y<^j~Py;sWp+lJgu_UCeg30$(jAL}r>4TGye&&eL^2KR&9 zW6Ow+|A4dx1?T;n1J&kQNOjJa0}JCQ`u0>dd?i@qryf#T;b1IUwnOZN=dcS*^wTp>K;u;3;ZNlmwX&`EIe9KUaa zs_{Mf&=xh9T@e1C)GpZSDLgMP(x`f!*wei`@1}VwNy@Dn$ld;(3LHPlhgPlNro~Sr zm0~}XkFe#2bq46XQuM;?_oT8)(aGu>N#XJWq?e~5)$TV*(QTOIc>X1PC~e1|tOZXd zNO;8-4Wp#F!$khw529)kP=A$!=mH(gEi2E0Z@2|&oEOP~`Uaf7{S^fn8REn}o>*+x zLuBbgIAX08x*YF=4K7Ap(8-8)Y+8kZ28${6_)WR4T?t(rwwyCono`vFRtSGQSa?zP zUr=S-Bt^HG=+vt}s+gpUmuK)qLagC`7LVjE{CRebKzpf z8V)}&l%vLr{`%X$fzy)i9Mz*93YMJ&UE*6fP^O25=t${9I-(+cnp^E@1B|j#Q{9dQ zkn`snRy(@FSJ6i|aceh`O<%?N`<9}x<`k!19>`IbV&Ut8I9BbS0yZo3*|}M5#Y<4qYblj&0kjeKdGFt|p=HZ#{`$<86$yJr zW$iAKQ#((Eu2kv;rPGEDqPrvo~3>XK=oSl=i&?bf5MrxxPo zJ7V73EqE5+wv*wQ{f?8Y*d^zUn^x9 zx0RGjlw|Q>JrtN_K>T`lCed@+^vD)#ENwaPMt90w|3+lK=3{Z<&zM`DPe}`+(cFI_ z8pR2{#(XBK?o58H2HLl`%Wde@kYi4UpHp0itJ4z}Ebmpj%;qC5J@uz`HkKcJCgoFYCk2{~L}J?hE(%pHj8+az1OG6v6m zZ$V!zd>ecEa2ZhkG$;cPn4+Lrx-6L9l{xni%qU5=dt~PRpk6_5ZIdvW-R*)Ho$1qtenDL#o_!x%%HB< zK=|`qu>AMc*s@b})}~FD^vC~24vFIL?TgQjnx)O=M( za4-=J7TU12%8gxQ!FHBX751mo)FA-LNvflcpi zkjo0EK$_^G$=$VsH9QwRrK4D zY1hfnG9Z`yeyf4#gb}P>wVo`!g{Mh>EpBc%6%#iv#G3RVw093da39R!GtD8*-2rn= z=27mBli<7G66*gDmw^7X76^}VfXHQ&ga@P@ z7#>+f8jlmQV*Bn3&OSa5Vmtr4j?librKNx}+2Lby}n!+XCU!|~n zL%8I4uIxCe9;#}PysPg+`HP91v1SM96|Pd4nj!63$neIPanFB7yWn=S%=y3 zDxI%56Z5HIz6FkWFrQ6*MpMaO2gz{k8M>Reh+Ep$k=4C3;3^45g6U^c^xt~HDqjmW z+a1u;U>R4Kl~diSnV>nHQM^MH$Idw+;6Ijx=Qn|K9~odt-}{gqf0*k2I7k({r=rV| zN{ARYl(o$kR5zwO*NVJ?uJZ;tb<9wVezAbFul_}fZ&PLPeMgOjf%rVf5#7_4uwvbf zVn3(09CT?BQ@=;hw4n_e+Wih`b4Os7aT?I~(d;+-3=Az68vm6R;nmm&_UV_XAlQJ; z^@zc$I0YJL2c!1sNm;kIhuri&g#A)fm^C?o!mpiz*giqR&*;zL4`yK4$oc5=crvTH zpO%b=3}aoFQL^*15Dp{<5UcW2wanw9E+g3D(kZeq7c7p6Zq#yr zG6p)ffiS@!Zb?@{!SE?)Il+-*#2qkojT6QjoU?*A=!$)5$Ry9$^l?S_ii7)EiyzEkM=aM{Xmo zPvnaI;(Y0d!q3?sPG=io>qJ9z6}x>~Ymw2rGm7@U^2XF-J{Wba4iquxU2|L}bBn3~ z3MTEMz|o_{uE@x(L~3v|72dq(&&2NSdM9isONEA|g8vLF1)FaKm}jXtT=;Lb$9|!^ zxAj@EqTxc?szI2h?k4=}`LsiD3EgLkexpk=1kMZKl7J9uGMWNW)7>%YT_EfHzd+IW zNt|(dHJ3L9@z7@B+x;t0K6X>=5yKZs%6kOOvmH1#auAZL17^*s2IpSpTrhn$nWuL} zTwve|0jt_@R#`B3o6P2_)*+ZTCx;pu=dz~wY_V=f znVjt|`eFX)NtrXppSF`H<1r_*VdDmpiihlm~|F{6=+?;wUwsg2Jqh zg63gfaamu{QIYhD3f@$b(QuJvZ8wGrHW^d=lQ$4P$QtyQP3OCNr*ht|Ctwk=o7}Ai zRJxm1^2&H>MIq+F7 zSPZKNPuCJqoVqC+My;U&6Mga)9)gnME>dMh2>N{Mk2Z@uxMjf+2-o#w`+@eNliiKA z$8O0JHBp=*7(f%>4ngN_5rER8r18R?=QOz>n=Zgof1Gi|X;i^SU08>Fc2Rs7Xg@ENa~ zqLFP!jE%1%b-{}X2Y)*WhsMc1bbNP49UFloLAMn99#)inXUCr!a~SJvXt z$dTy3y_7uGM59ykG&I>ZQ*ezMM2}M?EYP;$yAuX;s&^GAM0Z%_kO`=Ku?DTAKfw6S z6u!I49P>Mgvv%~2VxOqjq)hYT+-_Y+ciB|73VT6K=iSB7n@z6<_+#zV95CD#E*P#a zg~!;4%U0N7jMF>t|NIW>KX(#3SqDi`WH!pdHiGxI7rvbB`@tg55uDrGqqFi?$*Lk3 zDh+;t`YwWV(C%kctSOWWQv=xQ{&@Vpsw*4M*-f|VME=P$0qt$NLD{;A9OYF31^>jd z-9&qSHP9QQ*0xH@kqXXTJ(yH$|CC#LsYvZU3?c=COcVUOc&|+0@ZNVI{-F^DhMcG8 z`)v4egWwK*t#eZrin(YBfc^-fvv#^6broC+t>r25`4NuxIv)s%k3#33f@L{=4^(xt zV?7%qF1dJ$bb6C^NUg5#s`?JA{qcfg$Q z{#f96MAq;2hWrF4+bzyZ=;3}`w#b_eFWe?=O)X@edrwhr=j5-elx#Oj^!KHaO9KQWT9z|9pX5zho+(9K`u?-Ld9uPb^q^Kz4IpfzO>?P*tJF zPK}0GW%UEXzXy=9-eqWM*hDUtDb%v!Zz{SmhPyuK$136LtKIOD3heeuw!i%bdw*WR zLHlNNVfh=AjG_mLWJg(Sg?@o#2qp0i_mJmB*W?1`l54NOIxRn z#MB|*$Ra`qSttILf)4s%;c@Xy#_fQZ&7C>-xzK)tVlY`ho=RRMNpt5-;D8Szhc{@L ze9U18zRFyV7H<}S^UL<^Fa1N$^V_rYYag(#_Qrs3PPpp8KqRBtm{{W{G<8Q!J&Ng`E&N3PNv81&N%>@|99Z1|NpI4~``v1`4rn77&8Or@ zk+ssgB=8lh6?nxn1UIjUzz{!?HS0H0PEni@n!ssjUbYQd|F;lx7Om&p|Cyq<_W*9W zJsDia4CDijy*REwWb+n@Vf}47DHEnipR^9B*jIZYE6|tdx_K zbSi)!-08#Flkbw>qk-f-)R~ko&dABr#J$2V4qS&UfopN*cq}v)6(@g{auaq5ts;&t zHVx!yqs`f6N)b5h6JE1D6EOYBPC9lo0SmU;Q+~6U|@`Gp0udi){K5@Z8WDH9sPyq&1^(>6X!4^|lMDj?SR08}H=g zmR?YG@-dXe6@b~O4xIITx)?!ZIEV!*1W6MYxl`Mr_%mu~i zx(kW^u|j7P?6LwwPQ6?~KK*XO1Fx01bd>n*^dBrd3C^tc2$VL9Zj#o}*{tq$pVHf1 z6&}f-L6MXztLrQ%ec(vU@I%P^_yFwA^v3F|e?f^(bec+tY|l*mOx`DRQs5*$-L@2P$H@ zNN+J~Hx@$iMTuMV-$I6~$SK@)zO7SaM3Fx(U~sE%lJ^9LaO;4lU=Y|*aO5{a-8;dm{&xuJMz~^1@9R+E z9n89$10;AP{;f-2K-G6CsMoH>+}ugv^FVkd7wJ*RF2QWJ48{*O)@bE)2{e~PAiBR` zo1UG)u5euBki?E@&S|PRH5WtH4C2&tZ=}$o2wqh(l0)X7g$Es`veAqve0;`P^nCir z##blej-Bi9u9q4!+Lwr~`6kJ%?I5%tW5Nr>?#-*FJ!)Jc+_E;A$q#Bx@!F|wsEKJP zE|_|rs^%OfZE+Gj_B6zpBSnJR%tYi7?4pU+bI(N*01B@QzJMb^*$0zIE2?#`$D_|utrteG3-R#1IV)-AD? z3y;=A#^!LediM*2Ue^;onlMZb{U1f9+cD(dA(y8N-bPFqlN$kh`xlT30!x(kOKBaU|qYd5LM~R;bPXX7&BBbEyUb2atT|%Uxx0LL-35?t6N!$@Q4(B$ZztEOsy~>8q|m z-t(sv@4isvhNGqA_H9tTGMJ1^=VGISn_zloQp~)aWVz2BN#Tcv|1-n9ldWWUNe$os z^ToRd)^L$l?EmL%qB7&HP`W-4%itYkPP$32KE$J6mnPX<uHDrE=f;WqP)QZUoauqJV}FOrsNY0i>P_if;BeGFw1d)sZxApn08Kqg z<<0&H7@KQ|Ubj}FMqeV$j}65af1bh40mE_C_-Kx4*$Y)$d&Ace15v9#lWW}vaM=Vi zPCEXET*ro^=Rxt_n_dM?osLV%KOKPNv!MX;A;AN@OR1(Ilp;8Rsdj6?_UQ{~_7z#H z9#$Nw{TqCW!!S*7u~Zuq-HzSth0nF_Y&o?nDyAHeYD;&DZgx97q`!a*Yo5`EfOed- zdMwx24!{TRZCLs5by=tLkt>!SqB<8-Qp8PnYZ@*xqxxOh#c2SaKDitZSoTKmBEgLG z=z_JYZVDa87rM&PtQ*)FM+~t?@3XhTXR+u!4xWjHdlmR~dk=0^9{}x1BYIr1j{SN# zk@=PNoR}St`CkJ#=g2736t0lf?-o;4mvNl(m++a_FXzhgXV7H0lS2RX61&6kUcaz5r*f{gcSZES$-^a;>2OK&;9c>j2S2gGBL*tg z3y)%o3F{m9v+K~GFgNfasYjQ|Ubf>gwzoG|?CgwM>ywh zz#HH#X5q&=Z_HTN5naQkV}AFav0~0I5EWS{#pZ1jTK6kZjecER@XnS^mdEmrO`e!r z`wwXE9+Y#E{W!1FNQ@o+Gq|qW52;70s8GM1U`}_TvdbRqFSs1gxRYG-R)A{Bn_|0v zma^*l7|M3*i+Uz zI?m1SEcAt?=%)=3_fU}!_xF?S7K&VN+uLr}yjKct+JB&W`PnV|-2^uK{Ww&Q+zd@4 z7NP5*5p<=0C>Lz2kvzW&ZtJojRvUi?ue}R7v{7`8Yi`I&UV#;XW3Z`m6?zE0C9t)c zVrze9-G{aipZbXQ%pSvHQiJt%v(RbT7)&28y2<pFe*Nx4VH`rU>1T4-B|eL>Uzd$B68L~?x=LE$f| z$n3o%*Pe>xlF9YOhCPzW<$DOK+^3Vzl!H)qB?aO?|3{7vrQ{bKA~@Gasj1U2UVm&L z7H$=tu!BV>(GcM~{hm+B9wLWXXoR^XgD474QMl073{MHZT33QqJ4*jnkw>JvZYuoJOd8CFEu7xzQm=!>xGy*0v({p4{oTyzxmL9ZK5 znA}heJC4jkBk_KZH))2ZU2DNDcM+;|10>yF)4=b}UvjRi$k2+pJlt^t7Y`r8&hIwM z;r*;3VtWs4(s@wU?g?D>?__G~I}Hr8v~=r<*t1@6h03zNoF%xuCHL3k=@GHq^1mGV zut4Y=Wi`;UeF&%cHdDz+m8@N~U#_=}=kSX$Y`?^YZ4X}}7wcgDv~C5rzRDuqvTD-I znm{Shf-&uWg`ASbUU2I?P$gK)79BgX`dO3g6Fh*Y^=OBw?}v!z87l{dEy0upzUbqz zUEFuukYlbXM=sKWevcF?x*tLXt-YbP&l-0B5zE;^Gt9EBl>-(=aSK&|GuDCesUOh1 z0yyf)63A>4%*7btC%&j=*M$CHx^lae-gOODbr*M#B{ov+$DOdJ&YPVMjb!hiw4x8R zkWvO(W8@+YSiR`Yruvmq-pP|tW#x{!X5G-PH3pMvexuZvi>d2zcPwAg4nO^dSg_Ux z4RXUcbMY`#nmv-gmIPtRcnk0^+C~|cLc2Ctfm{C@&;Li!`G>`ret&$BBuSE_l941N zNmBEi!ypM^kR(Y+k}#VjNoGls*0z2mZEHhlX$dWv=RQ6ov1voBA8o?0A%xb(#>Rfn z_fOYe7gjURbKmE@->+Bdtlp@Y-;WaZZA8t4E}|(L$asf4xj(n4^l#Y>S)c*u+7L?k zXi6~~*FxHh(HMQW6REr2k!su0gmKW%!t3uo{C?uu&IX=m*Zv_4V-m$1*_NuuZp4aL zBj_1>PjxT*W6haA<=|ZI#IkoJJFfw3a1F*!VK2wS<@9p%9Mq(A1oeX^eRi(5Ff2#` z=ci*S@%9YVE?Y&4F6n+5LzaMc<~m4^o{D9%FNXg!nDh&JQHkLbR9*!N`s*B&eKj7f zsx0WkL`QPGG}bu#4*97xq;rejUsNnsYzK}5zkisph96v}%GlV#Vz@scOn zF6fL_0}jX`w^mW}yXm0fGrirp@uaZtBdO*==3|^?_DY@`N*HuPY$3KhGUaI|_T|n>4OoDH=Eb zDzdMhl+p&LV3~D+xWatI<~3*K#P=gG;#=N9ygenAc(*|A!sEn@-C6UP`!z|hvZEWF$mYF4Sr~b`tr2OeT@bX>97UZ4CJneJJiRSR^VbmG z(O9TV(yxl7)8^1N!W*@|+$*`G6hVAecJ1d#CB$ykxMVbKZ^PN+d^vv=v%;CJ>~i@O z)M);eJYNsM+m$4;)^+53<0WW2s6ZokqN}fcMI{NX?7p1wc`e% z@ZGO3I`lQ@YbA8Po`rdm()1M++;EU1|KfbJ$2n-f6R_@z zKc&V7kz$rD*!8Rs)uv~K({d#$uZGCF9fL7;v;!7@^$<>v_u-io^GQ#oLgND)GI?7o zzIZSvbmL@%mpw7_<`|wA?S#CM%Tg&LGdWLKmA0>M^bfCge=2$LRAf4Sg zmNTzFMX9fa%lJ3k{qsZZ@SAdV4|cFW-zr5}{|i=Qzmo%h=t=D}`Hr!#GsX;1vnOZ{ z>cW;$_J_@~^3^U85Nd_xDW0SaFBO)#Dl90@5yr7GQq1iX7~INCfjvEiiEWj5SL1_` zGrLIIVK!p)U%Z#`s#op4BZKx|o}rqnWH$}>z$WF%?z_3~vvQMg4v(eNSL12$t+}X6 zGS{0WOb|Y$S`40VgU@dIv;Tqb>JM)V!!JW6$5&+{cirA!O#;fyxG5g)3`@(aUCkwEAPB$g%)@ zH^PdZ*e#)&@%u!aUeEq6o;S8mpg8MN;nl1_o3AXXdBamlH>E)eHMEHRt9`NJ`c*UPhF9nHg06TY&=Ge>n^mv zMTigMJm|>YesuOC&;FX3@nbfOjH~BkRtu2JqV{Are4}WsT_Q|+ThozOCe-R+D{P9` zYqF1bSL08s-uy6^>N+kUNBdi_yF2II)t7{GG2*mKNqD>3l5<4A3vu$g&}CC(w zId(cPG77#GcI$7#t=t({|ECH1zs`lC$|NlKuuxKr8l`VZ(}~cIE5TQHP#pXznXInO zCI8POC@|Dbj9WSev)c!Y(9dIpujMEVHTeS`zT_OWN+WFjTqrq#GZshBLqW+c$@{J& zuI!%5oRK}?xneLV3XpUYETn@oCZbzlvUt;D4%zsyYs7gJ^U%BE!EOU6G|^ua&fm-# zm=;LWeT2ra#lmDV|2*r4!od#{(eUsz6x|wufmLTjYk&#bWNOK5>=9A4WsxYI_)1h> z`Wxz#W}(OMp=kK#wygar7An7=M9IG{r5%P~-k~TPrntzvpVOyI@?U4?P zJpE~<^>WU(@*LG(qQaHALUU{mlwPY9NrR#=zs8dDSpU;&15e9&-wr@U*Z&Eh9uh7( zFow)Oo1?-0f(U%FT@;*sE0l?S;F)3wDn6&`O#*L$=bwAz80!vL@Ux|8KWPyL7o8E2 zCWoPDMU5yK9WH`2j#$;tlX709qFbn^XgxECITIPGibJ=ABJh%|HTy$;dDxz^Hy;xr zd$`MQ*%6B^qiN-LDVV-347JIB$c@vNLRz<;*tYBge{t4efO382x(q0pBBNuTd zVG*^iw-Hu~D?%+rQVz5IeE9jI{U?AD`R?sL)*R2~d0=doHH9#Pr0MP{c5wA%Hr?MM zEsI&VOV&{Iw;k|$j|r%-a#eW_2o@DR_kx|{Gx4F@T#WdQv(p&I~xass-wD5=f;gU~tb zfHLlN9W)E4R?eRU+I6MiPS3z5@|1|p;aS?R%&R#yoibEIaHT^}40_CYsIXw#VVXpz zJs0BW@9i=4hiSCQu#Rd9JD~4~7ASan7P3Cw6EnYa!^9DOWPOZh2jBB7xT}XKu1YteEoQgUf_Z56 zx&@Av@VVjrL{Va6DLk9|QoOAVR(2mEHZ4?O>$jmob7LZX88VdG7mi05x)w7(CevW! z8Zzx0g-L_$peFu%k=xV=iUl|H2_-YJq+V2`}a`%hC=vnRFy?YvzoYT7z%YN^Gv}fa>ktC7V}s$!DVn zed)a%>uz+V+gl9~_|%^&qHM7Bf97Ix!6;HJPCwuFcLaIu?TfX){S4&?r%hyiV*f!ORByeedNZm$dbM9diu0~~Kb%gxf4Cio#ne!`=jN4(&_&;282uTm=_Ka0bF5RMHfA(&U`QXlp%`tX9s%MM;1~ zxl2*gI9-@dv!l>JC-QX%sP5}cHC6kC<$4G1J2+y?uzA?NXeosb8wY8lqexSHQM$|i zLW}1c_kCLTjJ9IhwMI@c= zAez)4z-7)2;r1~Ppii#wpQXi;^!rj^MF`YyIU!<(EEYM|_M|!6EV8Q~%Po(WkZ-*T zS~zp3czN9KtgQoG=;=hIX9~rMp;J-a;j(PDY%+zeyT%;z2+o4X;10{~WbAYS;tp*S zZUHkP)NQ>mEX@%OQ<%N5g72g4S76dtUqQ)5e`xso5764%!R?yUP!c!>EI#}obdhtV z+;ZNh{x6o=#=DTy=@nFPrK2b*TqhFx%%L~DvvvtPA!;0NLEis{lK;gnxM*MxY`=6A zwq2cpmF^v>EUGJ1#;1W-TL=cv>df7&!(g!*(df~RlHNO$WgxRh&29_hE6%Oun5;v8C=}n}4Pd69 zO)|!C_cJ@vStvUWp)biwH1>@Xt?x6%!v~$X4?POY_AUUk;AxcjiaUJ3{*$N7t@wTc z=ze@5JWIckOa5?VR*Ozw(mk1(9vX5P-YN`T^Mz8sP>yt;2aP>vp!K~a)O7eDxQUaX zp7&Jtu(!a9t;GQQCsN41ex$f`Uveor2|L*BGWzOliWzWJSj`I(4P9@5;^=1CF?2Yk z-06;xS_P?DG1Q#U1a5Af*ezHrg)1sV>mg=2erg5RVD6!GIRoMB>2`}fAR7N!%OE3YEuB~~hXNy)U{We~lXI4$S{f}Z z9_Dk$?}@%*W`#&DHpNBx{i*H8HYkekOw$_NscFY<$jx`fBX69r;98s9u+n`Qy&g8z&h4QFumLKk=v%A^(4^m#Bv{tyE0dwv6j z-)McoQwQOBaW&=@ug50Z36WzfB$rU`%FSyLg(0;f*qP7HO>4<(+B$l8Mnej_YjW%A zu6XvbKYc0PNS-hJ(cAiKxb=zo;%#wMyXGgboiLc{c1_2o-6f#Dpho9z4pgOe(5zw<|Mm$li9r=p`|K>8YgQpa{nkX`U-a&{)(lL3#K%B+gdX8xFji_eWyy; zG#}0T22kWoN4PSoJq15pje#$pimIg&vyb`+i`VZVstTq(RC4I_8Ef8zGH=EkUhG0UORl?7;qRb-+yxt=Lb24Jclyn>P`|niEHzatD||?aegd^6DtG&hQor?XRMOKVP+dS~$09 zly#f3#LKBBs68-E6ukryyz3+PAHT?H1^%co{pu~QI(>pZwTt1cZBTW*Cd7CW=Xvi zT)A-`?cQ(4bJ19eXV!vx<@b84HGAdO9o(0d5q0jfrHm(;!o^|=?i}t-i~QQrUF8a@ z9)Djb0=AqtCY#6&nYL({d`e{(;!iUdS(0||GN{$>17lTBIgq^yN@lD#^L*ai*B5gQ z#lqlvT3-2h73xMm*IRs-D{?+WVavr|p?FFL9Ney^TF!LpO4d@=*OlT}TL-Ke#`C`Q zT~wtW*CD*LP^7oAA2Dt|`EP0xzO|WfrEe#8Hs4hFZt{kf^EpCQ8-TDb0X+|1kj7b< zVA!c)R5K@E*8csYTon>P#UZ7o?jtXZ37aJfd!~WHdaa!3G>2+R{4slK zm1NtW&*$--$Zq3u3csHw@;p~z(baPzq(Q^yreCG9)?@6^_)TAHyFk1&v_sGCALUAi zGEi(iD@EBqgGDB>>^)r!%HP&X$;*0BWz%e&d{2emEdy|8pZOSP8^tp!H;i<7D_d>& zNyPa4E_{tcuwcww+4Lt@bo&$v8SDFriZRE)@SJDx|8Os{!x(XWT@Z$jUMmdTmr)xI z>0L}eilUZcIN|Jv7VOD2{2NRGciFjjCltf`jzzbm4PZjXP?|heH z<>Vnat>;3j`Z<&mHZU(W`L&-WCPNN-6HVri=drKIAa3Qb_h2>WQnLpO?SUH3S2C+` z{9yK*Ig*`IceJ{F8$u_>;Qm=&6w@OSkL|TakKfv3(OO>$c0Q}#P<7wUyQ*g?bm&%c#e_+t}z zZi&szdyz&#gW3y+qae$fpJ)SdKu_LYF=8mE{>o@ag%9Nc{H_Ozn0W* zd}Xsvy+ENZ@Tpk8n{#*F*%dcL-!L|XOdO_A$-&|99ibLe*FT5t$vdDQYAjD z?u+U5VN@{qu;l&qc=pa6k!y}0mEzp3u<`zQp$Lzp+u!d1tC%Eme$pEQ?^l4y)ppE5 z=kA?tD5W)?gD`y@R(U62U?le(-&KfXndZ2o(UoGtxl6lj7x#@EsP!u)TJ0RqGv0i$ z>C-SiOHV?_moEjxoDyk&oE0wDj)K!Cb1W#BNpU-0i>NQt$@kWCxP94;GB?bkn*Vn+ zgr7OzIO!iSSe;W%KIKfi2i_E+e=h^i>(OxVNq0(}#O#>YHh9Y;9)rX~(QLh1L_K15 z?bpAH+xm$Z{^BS!aNkKc`h$qD0Rm|_!m{Hq&C3?6cC5h2thww3vPAX1hvLc;D{6`9 zLe1ZyxZ<{!cI&P})*rP(IcdEx7d$^47!JO1{=!My1iyuH`((mG!IX@%3UW(x3e~X+44yfvChb4=zLgC@#oO!U~J=%2t`6g~6X@%ieo^m0mXO3D~ScF`BPe>8}^o0sCJnf{nDs3$Z$ zw;;{S(dbq)mTDF~lI$Wbi<;aWP;_c8HJXM|K(}@{df6nbSy>9cQ!FUv_l}U98^SYv zf8KF*hUTJW;8uHw=W7=v|Fh3UMY$b$53c}=e)mE7b{#1le}u9-7a`5u7A;0E;hwcA z>Bj!2e|A5aQumJ~i@qlzv={TrcREYYXbLGD@4=#*BQfCpAcFCa*h60{w+&lGu7$xg z?Og(iZ3~%2Ifu@2PA2rf7zkX=pOJ4Qs%}-#IPUeAtlSD07CK?cMV`6aRe|MA=IT9I zkKyVY+{tv5H7gEqfAM_Tn;}RoV^4s~+)(maHykzhEkNV=OisGYz7y^LL~{@3eE!F0 zV3)xVIeR>5zsZvfYR(a*w+V0Vpj}@(g1P)3!0_h_$^A|V7(UODnpft?8BT#a6&8CV zF(Gh1J#6QPmE6y8>-Jc<)%6usJ;KmtVld@0Pp!$b2t2o*k}?K{Q^n~+qRlmtg6>|1 zeCMU8e7aULHZ2!<#_lxuVsFfI?m^jJ%AJGizzvT@9?fN6pk2; z!LA3HqvI)(ytj+GJL6Fo@&zcJ6SuZzfertC_HN7vEcDcd{WNL#yECQ4O~ef z#sT#1>uAnhCX?fAJ_}5bm(wg}k=^aV7O!;5#*b%Jx4)?e|CupC>|>z8+JzccOzuN-Vp!o_~E&E(umc^OY$g zqpl6iD|u%2c8|yiv%&zw1~ROXWJ9y9T-e2ef+IN>QP+o*8%M}xRXfF#t8ZI~83%$W@r6HGm+)OR-5g^)>%gviBx%3Al$xglJ07?* z`FX0y{$CL3tYf97`bY5pXN2Y~Ytekm21Z{_#+h2qNPq7SS)10PVxXh`%j6DJ{(BTw zPkSyD!8$Nz2Vvps0J>7q8Eg7x%dP>;3flV_S~3Sw?Dn2`HjSS}nJQ3zGf1lWCr{Wv z=t$LH{(zJL321V7rzq``C7^B;HD|E1ZpsZbxIsaC?)FcgC`|g^6%nsEgUVb{t11EhLsQ6Z zk}DJ>j)!p<_<6T%Jy~74EV~}#nZf)d6g_zN@opz%_gf)C=gr2}UPGv1SrWE%>xb_S zT9W%A&W`-21h;*Ac!n{7>{O#rx$h}EicW*C>tV1v{tA>88n7z-E^nhBfl5cr@}C|wF^Y78-Q*dld$Mge+r!^Kyl?gD15H6>vbsmz&G;0 zXC&4=N~g7oNTWqR9R=RBl-zbfy!iEd7!&`QqE_FPRcb;%4NmG zde9Bhs)9HEEv{T0M5;Pp&U3n=t}#}n3(SHO9&^ad)=lJ&GNFuTq2z6bWD=7v3eBxh zDcuvr?1r9J9EEwKXH%o9D+VUpgF-plFQR`Wg>yeYLq8b`u4Y5JJcz2U&m+$#eelh& zjd<*D-gof#$9+r{1mAci3^)d&FV8`@uX|!i3U#{pHLU#2C*<-DksW4IU zoJ+7XNlA-tttQ35dcS}MCsJp(slt}UQrW5T!guB#Fy8(kH@*cYKzP&G4gxm*P?R`&VT5xIN~d>`Mi`yrhEre184S zS%i-{4M_z&_knwo&ib&dezZ%0mi*P+=n2@0m1 z2dgXXF!HGl80;>nnmS(r!<;;o=b5vjErWYzgLewsLt`<|p6?dS=hD6ELZMNeD4?GO zsv@0HH>L>G-?(9EMXtEM)tp?WbVNnNMu=N-6B6#kW2K)H22A5@?!z^hd@~;F_ej*B ztOxaX<*ExM$yD>;xzNS@EJu7d7S$(bOL-2%(YNana23&5GibL+TKy8#n=)nZXD5Y! zP6p&8jKS)YhoL|-9a~-3^PXEprG>x#zk8CBDm$Xxa$or3?}kz1HjwwWHW71i3G5zv zMr6%9BwPpeCu5W?W_ArgtAS6Xw68`|)Vj;y=od#VzuX32p0%a)T1lEte$<-P1vLjE zq51f0NVxV`nD&dNheK@9eM=d%X#9A7w@gHK;_rd3P_B7iEWGvyP}JPL&=%B z`g%U=t+dCq&F@9whV_^|@T}x<|0}FXJ0)6wbcE0`y&#Rb+kx9gfzDzO>>uuf%1n2N z+V?kT=3S94)Y+l^jn(A)AQc@SGze{VFUWj0fJ_SKqiJXqDz2L^+Ay-5TCcR*6~_@49&}uKw?p9I!uz zH2++ZJYWTujS3SLO=qA$n*`c9A=L24Ul7@N3&Mlfqh0njh&0^_x;xKgJNs#(X@0gS zu-OG>U+#)Gm%CE+yGo%8xGJ3(?}ll070kK0BkKS6wKx&Em<)5rl6meNtnEG@8~gKn z>`W&LZ0!ohi9N}`_W>yMy&??ai3)rcch^l_1S@mAzW?qAkiLUHs zEtjm{m{F@M^9>xcA@u8k)Ksv5%C2`I2HKI6-h}sqSB2-{?gC^hO#XKj4(=FDhMQ0H zzAx=Cy6zo-W$ zo*#?}iAN#s-+kiBnSod{LoWw0D<}EUbW9TA!m8a6v70;Mn#1f8aEs8h16OhkTuyHe zAXa?riABRll4koOInv)2&7u))FKF;|l@-psYe@!|K=vU$6pr2B3yUTd?o1p(s*h{Q zqKe&uzkdeDLnEl*)&^*Nw~!iEjisCmW@tCgPgJiyF6#f;#GJ8YsQme3LJ= zNL3=GZWm-vxG&OL4ng2d&MACei&@>g(V{UEbk=F|t-TW|%xMJ~I*mcI4)ee@$Au!k zV~4MN4PpoIoNuQeq`%#W${7xF{QWQr4u2-1MvkJQy8#$j7%L1PKIz>)OlNmc8o2+W z#>#tNvy-J+yzDxb!mP&9wEY_1GdVE-B2bK5-;2zuhC$Hq^TO)#PzXJ4hS}fT7G;j} zz_HhS3ZFWWPI#En&Ob(D?J6rMxv&9i+?WyeOFr0zb)pI{{yzS~9bfZUGM$i&M|$(V zy@$WZ(2c=&Jgamal8DLQ&c^-Y0x;=@8RlQ4);4v<9wA-QtVkwwOz_Z^_Fq; zrj8w|n=Pn}-|L3?YlUaG1(?MCQmeiZO+a;KPIa8Q) zEM-Q0XViv!2)o%|fu?}{zIKZ!wvs;&UyZ?VlU<Gdn>8+BJk{H6%~H5 z#;7`LvNs=2>Y5dDO3HSjt-B!E4;{vN{yEHNcm&%2F32@)%~EQGHC8MhfyRH@gXT{M zux^PXOXl82dOwhT44m&B-d_Y*11-W1sNHv54(ue6TjnegAJ4t4eP(nr^q#foI1UI{m|H&gJOA;C4+8oyog|ApxYJzd$#u1DMSj zFCu?mB}Xmu$M9!O!facrIP#@C?Y826ew7}y#||=2=e#O-$Y;^~7w5{HQgG#8{B!Mp z{d}Tp1U14=Fh-P$8Vbdfiicnr{ZSPfe+M4&j-=M%HJF)wfJ#*gYNJg>u~&c6mF^H_ zok>Kl>>)*^?GO>1VK?hCRMgHo2fB!XBD-e0q@KPG8ozlW+_XQ46BX^b(=-A+-p@sw z@&0JCek-#@tNG7q6T#mbAyR)00(braNtyAe8=opQE*=ev8A*~cWr$Ss{+Lw%gHg;3 zvBdhd!iM_u;G42o=+1cRwN4wsE8LHa>D|PMeMxj=Ts&3I8Yq%y z^vCAS1)_L1@4-LvOw6mF$V=e7(>F7)W`Q3n7xu=ec2n8)HUgR>+kyM@O8C^#AC+!} zA}Q$>3J`HRp#`iQCBv%FaC?m9dK;qH#T`o{k}l!gAqKmM3;s z^55Ci3ROSHGEdh56?*OY?4LFYEi*Q&=k9~zpO{DemhX=@U6@sQU&P7RMMygB+(cVZ+hd0qyfqqi zx4Nqg*;#VoyYHd4{tqZIE0z=PnxkvsBs?**C)J*hL5~OHc;~iNAG$9Q+`7aFg|$lB z;XM?6RtB;AO-tcn#Ug1+FzR-Wl-sslgV0|Fh$HXYW9_{r5k7YXD!=4{B5&6D3QHt| zx*G;LUx(5OJ0bM(-;n0;1AynZ!pxx_?Dkg(yMO12wAb6=V39wnvwZoz^GZ}K-YJs2 zhJptyK+mxm;>u52D$sig`+kUZuR4&6b5{zFyDgN*myx)7P_$0wjDyd5Ty$>%W%E1n zw)b!}!;#7<0A4sX91NBF=(2v!q$|D zFP4zy^58qI#54uP2?`S&)#`m2BNEbW7zE^aFNHdTV=@mooC z&5RCOno~`5si^MeNi9daP||^;!Z;;U9Q0m}UN_fLcKKDPZOwwl`vDX%*9(I?Tasqb zT2a!jNq+XXAGRI1D=wI*FNyv8H?p_#xhX|jW%*Xc!x4ahcJsXZ^qo#01cLg=`E~c{NE;BG@NxteXSjpVu+J#}v(ct&)`n}*8 z8H^sr1=P}IIPU*35p|natF-NBi`}o5qcPzq#Ju=H1Wf0dcE?>}<PT6>#Tb z^N;#a-+hoha3+>rT!dzu*HFs*cq}>n6ZbPN3eA^ZkXz#mCa+?!d=U5UN}Sj^z8rJ! zEk|cc<(+aTSsjv0;jcqz@F+i24|^hqyC#tT$Vh4^I|4OD5ombZq^enwBRj{9rQ8$? zXx+F0bY^F{n;K2(89&Oc2bK#>kc|irnTG1M5wfDN)d*os{|J(x+zPwQ=zSW5|-#wzeW-Y3nqbV?=Kp5t16DG0uAphI~EV;~cgaCKE zy+nzvyLy8m$4i)OKPH~t9)LBMnEm`ScWvLEg@+5Y?D+D*q)tu%1N|^P-H#&sZ-#=o z-snDhC3=Q(XMb*sK6q9z9<%F*@M~vEn1mR4eTC$lw}dV@&q8G{A1G1ml0E0Ymv7yj zLs`G_JW+jIX#B^L@=!2k?2Tex)?ZSjrzauHe{T7gAPgURf_DWLn^S5+`hBf>PZ=)-GfvOjR5@b9@_ z@y}uLrinXSFIPd!u>&Ib>TERqaVom)ZjV7{55dD3z3_w$Gcji#5gt>PVd6<&vMRbF zGwG8T zCVhaCwf5w1Knhf5z*!G(cDjzhvgN#I`Dp{RZb59YW$zfg5znTrrqoaEXxe}d6q?Fi z8go8d7nDhp@34RJq$4QJdWnjV`@&JdbDsWM^#7?zR1W_g22UGDh8c6DBa6atrkM@j zce|>x)4D-fK63|NjK#oFdxdpdJSDTY)Z zR-xH-Uu5+kfC?d0FPHWt|ymQ0yy<@1xcOZKvpMz_IiV9b2N%hwh3e0&5ip||c#y3|)<`3DkfY?6B+kB zA#cA8y1d>b^5P=M(0eTVt)nQ?JOJE(dI(SYNTm4oksQPK^rCs+33%g=>31Z2c_Nm= z_?+Z6@pmxFfuvA4>+P1gfUaG4YRm~F<&D{r%XLdE$TN}8Ix@TEm(HT*n?ZD9L@-&t z?nPEj?$~-^80c0lmR%{3nk}^ALscqi%zMM`TZ1X;zhYr`a1EyZ8$upw6X|rzLdxFy z6J)nv0g7WlHK!Fq^ZjQ@SK_Ry>^TYD=ie1R0~4q%#2XWqT@gFeM({km3!XkXgdW=P zjC$ArzPB_(<+t^MPfl8 zQZ;TwMZgfLq02E5FsBE;+_IQ7J-#As|6y3i4vxWRx^phUna?#3c<)%_+jes&v(WA3 ztn)TlfB3W5*<}faTwR1koWb1jn~Gu=a3^y|oqQ}M0iU$%MK+x(gdsIt>~4r7o9@hi zgmH3I+A)#wRSzmQ&4T>y2cm7Xts}b@uL5pTqaElrO^0 zp%0c$u%##S;_#(s3?{t^62|F>_>C6pE(_sD_CEhm3VCQoI zj9*-3yOr}qSn_-fRyz55spfLgvAFSX_FN`=K}CddHsBV$O~%6Xwcn zsF!kmGT8mQ9<@I|m$arQrL5>a^w5w(<)Izf)#XW6Q`?EqN$m6s(=hw=jnt-U6H(*Z z#I)?$q)X4yXXzbixASN$c>YS(&VMZNXCJLz_Y{R=%OP+j_wu{cOObtej{27O!Sh~$ zaZM-LrzH$S|IrE0o^_)6-($j&XEtSt_e9MP_ocK1<}~+~C56c&Uj?8P-zfs~Cq5Oy zg9GSGjuN%?OChP5gthrhEZS}Z#(nOx`ct_uv41U$%DFuEW@fQw4l3T>kfz;=#fq3d zm{SytWs^^U^54m@iFcXWbEcxLWgr#*`V3r8PGHadJ-KXlv5`A?IMvBZv!>Fmk!zEO!j;2*`>g9jUnHG zuCPHenf1LWJ68%;CWWGG>=|Lw^hSi<8v?B(He!N)6dDd~ll|9R0J}+XXg}8#n|?GS z{hmQo|JzLwe%TvC$GB0-EOv%;8cy2oiy-oI8(1zuYBu_agsTP-^iPqn@al>w=MRa5 zq$o=9T8)X#><{q0FKx1Cc1odwq6;0-=6MVnFHOS8pQ52**L*Bq_d&!sO@-18R;a$; zLy8;gh>DBVvh(sp^0jDz`sux}b>VcdE3!ooLnuC5?@6W2mK1s9E8Y_)z^22@rmdSv zB8Gc?wSnT(J$4H9$%Q~^H>j`pp;0vw)QcX;P0hn-_YpUYs6let)s8+?Z6HNjFF(&; z_Citm|Df`PBi6WEi=w+fg7)J#lG)QxR5*-LDa=;;Xn!hxG%c7d}bEdY7Ec% z4w~a-$xw7%R(J9OfkQ|;jrkETg`}|;vchM+U*q+8ph&vmTeReV!fkR3`25XToFS$n zIIuxvOuGu{|5{^WWPh|eJ4)nC8c!v!evoU1oB+k=QdMo*P%L=w2FivpXv5D%ry>35 zcIh6de<=&s1TATYS&G^xl2DW^gq_~qD7{YvRo~Bpf?vl&!D&a4ZNc-wr%tNYzuYmb z*q6PI!OUauA=_a|xPLO=;r;|>UROxXQ+Osa(E*Z*nFIcL5f%>{K|4R&lX}%4Nm-XE zWn_5?CGc6xW4o}^SWxKMM69|TO(n@{OjudNy#6boW^Ze#J`ugMJ_(y0TOs(zX86*1 zJt_)yD(!)LLUE>xzG6%gX84(rQDetHW96{EmtKTuMbXF&Qu6S9t-$Bb^EnXl$h=$OG+eZB{( zclMIA`@aCEus}+CIhAL$j(C0cKy*9skJzzx23g7@(P@w?)=f`D$4qBDYr!+sR$q~Q zmj7RObjK58hLHNueCdL@Emau#-8}1iQ1axUWYuAj|7TZ<-4TN|4p$_D=P!`?T>^O| za?WtUZsGNXGsLz7q>LaRO3mRuPvL)%)X7yu-U-5_T}q0~7$-UM9ZGqzQfjK&E;23+ zpoF!5g2E$T+L_Nxzm#ezCd8eh7F`kv|9N5Ai4@GLWmZLhov=D=2HrjY5l*}q_8*7j zazI78LrPV}NcK=%^r!tr8@W3f#T`#;G9J1p3vZxS%QHf8XR{PI(NAP9pGehH2V%vA z$rQciD_oh<4KHxECf_T7HvKvZOAl?t5Az);zSB@l)4Jg9_uIt5zT?pKe={jBm%XV~ z9U$veA|`q`qg}W8l>2QRG(0q?+LUj=@N+2I9iKw`f9+1*M+4|nP$DWPyo1CCLrFdL zquwyGFCF~FicUvcVA|*3pml03Xi9erwa*xg`qiGI{`I4ZlOM$G;Pq5n7lQfM3WVKE zUomZh3%*f}$BMA~P?xxopnN{1yzhdA=S(rFVjxM7=D<0 zsqI#XvYY^soim0C9V>*D`i10V-HFV$s6g%duipH*4Gy;CJZbg{+Nt34$?v^;F<1z}o z-WgKz?m+eCOkpfngRd3y`zEX+-R<-GlmlzY_d*PWx4R>1E!C)9d`F}m&lWm}l-p`K zxAkv4&qzB^)_&%UUuGWtyS2h;~ zrCU!x-e7hdVmnf_u9813vcTb1(f_=5pF-=#T>FmQ9`125P ztrra*Gyy9=jumd>L6mvTBlWdUyw?oE>=oU~@mHhJ$&*wD%~VN!xDKvdL5du+ST-C8 zLB~NI&}MKG)Ubc0@f`1;>qz8XUV^6wCt+cm6W%tTgVkOKMbd3c@-3f;PD4k~jsg=L z{JRI0C)iTKh%Itc&~Z`VWFo%YX0A-qV=;Nu1}xY%N3yaQgpt3jV6Oadxq0DaRCXOi z!JnVNlUeTQWFL)z+|y~P3BPE)jxYgn`4d$G*1GfbDk(T7bZs>nNE!_dV~I1An8Q2KBL<_%w>*M#-EEtWb6_t zeDDm;Wm(Y<&)Ga{>Vu8xjgTul!|DYM#Z!Apjx;bFR)LuDd4OqxZDmh8u|!D!hced`ATuY ze?0R{N=0nr28`M9RSMwwHeNHaYlH^KA zii?m+uB4L6U4Av1rKPp}x)N)RX{1&6b?hPujnzTr)DP|F63K{psX*I~aWqdZ9zDnp6p< zLN@g;F=^IkqVqQ&<`iz_?9*sW@*Ihs@jvKax=+Q6DmhO2#e{Stv!t-HNszR494^0Z zMThsjg6O$hP}>zw;_DeCTl-g)gA@BD25H4iW*~)n@{YQArO3LIiblN!ICw;mVpjkK zx$J=)+79YplhLqf2#Mh*h4r7l>9>NKwg#e`%n9E5^xMOI5-f8k)1=Rz3GJf}DJI1dJJ*b%=A-Xp zWq2)g{V*4ey8ptT>^AD-yJOasP?l5$j>Pua@gHUtjn&kBIk|=BK!<_AdQebKTYAlQOhu8i9 zokJ2ZU|?R*hE z>LlDbl|pgvMpNE(o`;!*ot2=$qlxBy9*8fd=87JT5>KbLN5z1sj`cm3sAlbtUqV(?9@M|IcZJ^ncEiqMc!?6 zodZ%bRXfr480e<2C+!+%Iz2KAGTd3myf%TtOmc)wUH<(PtqDeK3Luj?5$v;Chc=Hn zXP>YLqo<4o`zUi#OjXd>wdojY<&EORaO_O%BX(}QA=G6f#s71D45J?Ey(pB*W*-%b z#fUvg?9&N+tPlT{{})VHSNNX>>z_RpvTiSkFL9;lx|7g&WhfnfK7{J@K4Q0>3(C9( zQ85ix?>vY(jBC%eJ0qoVm{f;UoLu|E~UI*2jc7U ziJ0+p6xDsX9CEbpi|oDSVE;h?hSd)xpWZ=WXqqKla(4)&(`S6&bHug7W6{J+MMZn% z6wvT9n0Wp!dapf!k}1Jpe%J(*^FN^S;07^syE|QunT9H}BGKHt0t>##gdC?P;e8dz zZOBBd@ZW}p!LRgbiyFk(PnfZH@F^^Ia768ic7-zOV6`mbrrxl&P0{$q1`A{XqVtoJ zQu%0W_Jp4iaSHZ`h5OLCiStl<_p9nMcYkb)Pa}uNSz`HNGwPa~Mn|irk(1;oz8$g% zvpp}1)|^$;IV43`t)5MWF*TCb*R8rIc^@QXxMNR%N_2U}GE>=v4p)ssd1S5RHp`DH z&6lBdzkbkUD26**2T;_4^AMHvmvE?_j4k~i2&W9rcRI|4hL>SfJ7OC~kKt$c#%sdp z@278De?dfiY(jZ+TSdcZ1!vA>;&Hh*^<*6f<=mT+O1nga&e?=@@qJ)%!fG@RN+#WR zrjq>BRMEM;5>%}r*dDYR+DG>T^|c?RwDf0?IOha3{Ne}EN!LYC$~e-UkD~ql_T(>2 zn7dI|-FKxu75woZq;Y=GhI_)MOyu3v|0c{(# z;e@r+ru`hOrX^BA-DEQV`wZuF+@c4s|nrt3}+Ob(w_L(gn#+zYK^KE8OJQB54Gti;zJ7LulOPbjA_&oSL_hn}>8}AHw z9dpNr{X!_a?LITp%Her^m9RCt42E@_(W*-T?*sOz`NNKsRsNEbHFJf2I#8_&y(_(q z4#n{P^Dy5<#{1B{QhD-1jK1h~97ZB!Z3Yy4E!sPA& zkq|ctTbYd;y>z80G_%4fqwOf`ev=3p&U%%7qC#fPGfLe1sJpVB!p1KISy8#7Zv4Oe z+0N@-(%*@w?Zv`(gcEkEZh_sV+3@_o{?rjNj~XsN5Sm#jAU9u4CduDJ(;I8j-7C-^ z=KObN1b5s-e?ZwiD>(Epn9aaSk1Y^vzd89fNf_D~dN8N%M`u*NpP|jL2EG&SCM{}v~H95BGKcM)3R-4GGx?FpuByN z?BidL`zI`XeaN>P&}DatNZE4(vZ_vkQ~5)^ z?wlib_OO@Y!;AWWmw$`Se!WtyVIkHH+AA8e{YmCp4_PaQpu5k9*u*|}b7>{nAK%9O z9uvs-3Zcg4Ef^5zNp=t8!Rv@QJvK<#*ac{e4AnQkSqVM`uF!jOBNtaBxYR4z4$c)C<5j(UYN1r!XD~K@%D$5bt-fQ%8hWead(l?z_W3@7>Z-j& zrNwMf^4mO!a2Z0yH#cL#!euC`kHOoC!>QNos*s&tS$&#yX!YGoL4%Kh%wW)W7fcQ$~!QMf-97Gc;t1+x$%Q^>l!l> zu6Bd-xP@qLw}AU~24RWXoEb>~Y4e6+L1`fPWm}-q>VU}O4ncL-YhhjJ4CjtA+_~L_ zd9xkjWt;|kF0r;ZJ4B(mmki2~ky72BUZEOG`d8Zi)L@qlcZUJ0pVq?N+BkCfas}#Q zpF!%_fmEp&3f6yR3T0N6h#Jfs&s`s3L8~|Tu=iEI>Knar+ZmxXo1<6W8b*O8+&%8J z0A*jkscPZ#pf|M*G|UQA>gALaJcpJCZlm_W0T}&mK6?Op=XP{3=`MdHQrCV+4$~UJ z`T=W%7dA^df1c&<{Di*J_&vxK?Qn;6#@^U$*jvecP-Q{byRQSn&7x`d*{S%v{|Mas z(F)9I@eo0W_lbwU@$-FL2B?&ElMNQCzXELI*DrGxsD!Ty?}Et%tV7tV6$^#F`h zZb9fb30*exUcB%wd1G;nIa0cbJ`U(pZ&?q|M60%*C|n#5KYGNEmGXZ648CJ7Oan4(A%1B#Tb!J zW!^XRxho^F+~oo!8vJq6NKfqG8C!j|kEF4;q;luaMQ9m`*G^06PGbrtaHcfif(zbp z{(JcJZ8$v{8=QmmtZ9HghJe9~M7;DC}h}WGMJm znxtcP>-ZGjpRc2oT60iOWZw;YenXkXA#QXEZR(FwrN;J zui0ZkZOk}sQZ7al+tcFZ_X<)wZYC$zl4+1Doq3 zFvCBMT4fv2d|$S3Uu=dKKOKc~hd!7z$P&%}`Udj9)8PKI+*`TtOEBCw6E!n-LDYiz z=vzb+2@leby#vXB+yfzIWUdqzttFK za~iT@C6QTTM~3I63T?4Yk+=1La2jU=s>Us-w0NLsie%ruWs}I!C`IGJG3b^UC@id2 zQ%b`bFdtbBAs>xEYX=3C6{|(#rLm}d#%EJzmn7d^2E`WLa4{ktORV+cRADS8mU^JN z{~5_?`6>~x)FPZ7O3UwZwkyM*fo#R#!I-8l7(+eOm`vJI3yay4?j85IpGj@6m7A*5+ zw(?*sjhKhKWr$W6S)=2uJ;R> zaC*2=s23a%UX{Vre9)JD6i-FL52j>T6iJEC`%rT~&g9s5h?Ez(Lg!K|!uPsUM%^lE z>9dp0Q#(hM};e+&WrRj58-rY}%!fK?OTC-XesD{onk39Hv*_z*Mh;*Mi~ zxCf?8^nv!g!H`$WT%s4AB2}Tr%6HlDa-kD-KKlhyZ2AjT-`!HjB{|7oX<_F724l{b za%|Y@O95Z4!Dzn*p>_=x8q=e!*Bn!1WLu*1twC7o?@cPFK{$NJFs$su9S@CwV;yEv zX39it;O>wfkHuI#CJXBJC`p%D3K3>$*ey7>ZsbO_xH53iKNKx%${_3L7h>j68_vEK zih}-2;dz`EY`G&jrIEXAMtEZPmbbz?*NH-|XeeOeNqBvBI_~6IZaIIKja}>LGT*(8 z>F4zF!Z@7t?@2NCXBjh5`e9w1 z6^YPWj%cyjlX5-Qpx3t(sIVrIItP8McZvukW7Zro>FEV92FB>E-M$vh*TY43-~XU} z%WN#0;{k`Sofe7vY_W8y8#TynIs3AK@*YS89uGv@&6SjBX@(WYP06`6jDr7F(5aqe z63nQq@y!)#iFY@at4Q^5KNKmJVzV&@>7~IQpJH^~P}KKHGvEUc3~V6$e0<9<6UL=!TLHnO)Lgrx=^6BrERI4$jwLt?4@K z`RY5dw;~o>DwlB%IvxrpDxhJ>Rc6w12i46{=oqz}as~6A`VPSM{c_MwH>ZxNNm#-6 z4^7S~?wk*h%1yj>z;dTDA^&F`>OUWfsnv2U>wiR~ooo}jt=~wt1(`y|`Fcfo6xNNn4NBEbh4-rj zRF_7xcb|2uHI@|c=Tzoj@wxl|clDYf{J~JgY^ox@aaGek~J;gr33_LYqYH_o#5 zR3LPIBc!)hc4#*>oeEmrMNfQxFxm|i4e2|WiSY5XG{U_`Ztudf5!}I zX64y22Ytl8`(qhWv|Ad1V;pe&@y-<|fe0+AL;nmSMyUYf4~WUc)Cg z(6w_edb#?OOX2`5Vc(r>T}ZXkxj@*O{vj^En1oSlcrM*CQgS@%M-j?vVoH-4Iyr6D ztHUP>IAntH7yPKYCYeHm=c0PXA^o{$=5%ZSXmYAIio3VRkZrmzcS z^pgUsmq%lMWHM@cZ|j?W55a~PF4VZ+obMTt5dUNz)qb=J{pR(6U#Nm|2FykA zhG4x~hPoBCLNhQ)$ac2*C_CO4rE}(xL&r&|f1W4QgF>Y0D~o6??@eFzWe?1wVsQAf z8m8RzB-@qeMAMrf)a?0P*j9cGO~W^$?QfjlYrY0*?QlG5HUOVrbS3p&XHxs05|z2u zLiZvGG^Q&=w`&D7Is}kTB#Od+(@}Hpl>~X8uul20PmaHju++I?d(}P>=sb(y@CfX5 zVGliy5Q^9-Xs37#%FuKOJ32zBYnSPD#|P+Z|K{$fngP(Ddj@XB?9rV*kh=Y&&}5lC zbz3=8*uX6!&VM^jdd58Rf3?v2o*li~un9GjF6#~djFDt-A|&n7-}TYQpNZ#Zs>J0Y zNAAi!2MxPg!0mhjXdNo`+Sq;7#t;RVOcbK-?NBnlwjh(*1EP83K4DxtMacSQRGs#D z$6W`135(Ax(7hm<_i6(u^S5B`qe*1nfiL(lbEr45S?HoxQ~M>>8=Fk0^|j#lxp^a9 z{KJ=L@zKKVU9xbOr_$bw%+}u%g&sddll`@YRDS&xIK?G^ve!@@{q&BIPrj+IsWZob zMi=m!Iu~{RRp6J)K0nJ+Dd#xvIxU%>mRuwXT<1gD8~*)#xmj;LOe?(hJ96%C6Aix- zPC0)oFzz~MLoPEfV5m|ozcdA>&6`V>t$8 zM?V)bUz4i0!)BxGR-nS#i<$45A|FMLlHAUOQ}xTazmIarJ0F*d0EDHv5lx3QmU8k#Ue zNd8YGd2|g!?Xf-J?>mZ=>(@#ad#teY(h?l*yNoJUFQMHh`eS{B9tt-^V0`ch6wlnL zW{*8qTQS42z>h@aI;@^Ki3-Adh4^MKcYJrka~)@#>|;S^_XE@_01J{mm}}xe&2MK5 zPz|DDr$ka;U|+zhu_SvmSxVa-kCpRNs6J%|MbBCx%HF<^Za7EN%n~!~c|MxlpN3%g z%F$>&dWT;5w?;Akl)V}P<{HX$eywckCyQ3wSFaFW(0MQo{Xk@7o(}XFZ=nesO3Q!^vnsM zc$F^|=8VJmlCijUz!nPFRVq3Mq=?-ArJ<9WSq8L2oL+DUYWD5|Z8B#$k9-4J$J#~9 z#4jLvdx0>F8U-cCXJSFXBrG$PLniOLLhlSBS^JZ!x9tv;z&`bt#*I|x!d{E(6VYK% zKQbsbV9xsAg=}??{%yrFN^A-yhXz;j-@zT!w{6M%kE^0#!(gE+KcY8&{2^*rkE*up zI>6g>?h;B`Ct0RE zg&e`vy_Tb8;3{t&d^l|s=s z{Y0DPY^-_1J3w~}@){UOKKHg!@h{mTCXu^vUVCEQ@nIOk8d?#vwKUeQ;I?lnW$Qbk zi2F^nr$ZIC7Mw3WyclYqdXvxk4@JYJv0#|uAfm%6pe*}=R6K}#$~Xs6Z?+3E%A&C> z4@&B?qs)V1%!Qe1zx)*VbAeYtoyw$ zHRl(KzGqCxcX$LDn$Af(>lS004Pe}`3h*A~Ol4lRQq-UXQvTzkZ%=Tghg+Rdrtq%5 x{J|Kse9gIRn=+vkC#&U8c1mqGKL_8<6EUlE0BH}lLEHh}OaA+xFrBrI{txmP;aUIy literal 0 HcmV?d00001 diff --git a/src/SOFIE_parsers/CMakeLists.txt b/src/SOFIE_parsers/CMakeLists.txt index c583b56..a740a03 100644 --- a/src/SOFIE_parsers/CMakeLists.txt +++ b/src/SOFIE_parsers/CMakeLists.txt @@ -5,7 +5,7 @@ # For the list of contributors see $ROOTSYS/README/CREDITS. ############################################################################ -# CMakeLists.txt file for building TMVA SOFIE package +# CMakeLists.txt file for building SOFIE package ############################################################################ #Author: Sitong An, Lorenzo Moneta 10/03/2021 @@ -33,6 +33,7 @@ set(sources_cxx src/RModelParser_ONNX.cxx src/ParseBasicUnary.cxx src/ParseBasicBinary.cxx + src/ParseBasicIs.cxx src/ParseBatchNormalization.cxx src/ParseCast.cxx src/ParseConcat.cxx @@ -80,6 +81,8 @@ set(sources_cxx src/ParseWhere.cxx src/ParseEinsum.cxx src/ParseRandom.cxx + src/ParseNot.cxx + src/ParseClip.cxx src/ParseScatterElements.cxx ${PROTO_SRCS} ${DEPENDENCIES} diff --git a/src/SOFIE_parsers/src/ParseBasicIs.cxx b/src/SOFIE_parsers/src/ParseBasicIs.cxx new file mode 100644 index 0000000..a1abad4 --- /dev/null +++ b/src/SOFIE_parsers/src/ParseBasicIs.cxx @@ -0,0 +1,66 @@ +#include "SOFIE/RModelParser_ONNX.hxx" +#include "SOFIE/ROperator_Basic_Is.hxx" +#include "onnx_proto3.pb.h" + +namespace SOFIE { + +template +std::unique_ptr ParseBasicIs(RModelParser_ONNX &parser, const onnx::NodeProto &nodeproto) +{ + + std::string input_name = nodeproto.input(0); + if (!parser.IsRegisteredTensorType(input_name)) { + throw + std::runtime_error("SOFIE ONNX Parser " + IsOpTraits::Name() + " op has input tensor " + input_name + + " but its type is not yet registered"); + } + + // get attributes for the IsInf operator + int detect_negative = 1; + int detect_positive = 1; + for (int_t i = 0; i < nodeproto.attribute_size(); i++) { + std::string attribute_name = nodeproto.attribute(i).name(); + if (attribute_name == "detect_negative") + detect_negative = nodeproto.attribute(i).i(); + if (attribute_name == "detect_positive") + detect_positive = nodeproto.attribute(i).i(); + } + + if (detect_positive == 0 && detect_negative == 0) + throw std::runtime_error("SOFIE ONNX Parser IsInf op has invalide attributes"); + + + std::unique_ptr op; + std::string output_name = nodeproto.output(0); + + if (nodeproto.attribute_size() == 0 || (detect_negative == 1 && detect_positive == 1)) + op.reset(new ROperator_Basic_Is(input_name, output_name)); + else if (nodeproto.attribute_size() > 0) { + // case detect_negative or detective_positive are set + if (detect_negative == 0) + op.reset(new ROperator_Basic_Is(input_name, output_name)); + else if (detect_positive == 0) + op.reset(new ROperator_Basic_Is(input_name, output_name)); + } else + throw std::runtime_error("SOFIE ONNX Parser " + IsOpTraits::Name() + " operator - invalid attributes"); + + // Register the output type (is always BOOL) + if (!parser.IsRegisteredTensorType(output_name)) { + parser.RegisterTensorType(output_name, ETensorType::BOOL); + } + + return op; +}; + +// Parse IsNaN +ParserFuncSignature ParseIsNaN = [](RModelParser_ONNX &parser, const onnx::NodeProto &nodeproto) { + return ParseBasicIs(parser, nodeproto); +}; + +// Parse IsInf +ParserFuncSignature ParseIsInf = [](RModelParser_ONNX &parser, const onnx::NodeProto &nodeproto) { + return ParseBasicIs(parser, nodeproto); +}; + + +} // namespace SOFIE diff --git a/src/SOFIE_parsers/src/ParseClip.cxx b/src/SOFIE_parsers/src/ParseClip.cxx new file mode 100644 index 0000000..4424c76 --- /dev/null +++ b/src/SOFIE_parsers/src/ParseClip.cxx @@ -0,0 +1,46 @@ +#include "SOFIE/RModelParser_ONNX.hxx" +#include "SOFIE/ROperator_Clip.hxx" +#include "onnx_proto3.pb.h" + +namespace SOFIE { + +ParserFuncSignature ParseClip = [](RModelParser_ONNX &parser, const onnx::NodeProto &nodeproto) +{ + ETensorType input_type = ETensorType::UNDEFINED; + + std::string input_name = nodeproto.input(0); + if (parser.IsRegisteredTensorType(input_name)) { + input_type = parser.GetTensorType(input_name); + } else { + throw std::runtime_error("SOFIE ONNX Parser Clip op has input tensor " + input_name + + " but its type is not yet registered"); + } + + std::string output_name = nodeproto.output(0); + + // ONNX opset 11+: min and max are optional tensor inputs (empty string when absent) + std::string min_name = (nodeproto.input_size() > 1 && !nodeproto.input(1).empty()) + ? nodeproto.input(1) : ""; + std::string max_name = (nodeproto.input_size() > 2 && !nodeproto.input(2).empty()) + ? nodeproto.input(2) : ""; + + std::unique_ptr op; + switch (input_type) { + case ETensorType::FLOAT: + op.reset(new ROperator_Clip(input_name, output_name, min_name, max_name)); + break; + case ETensorType::DOUBLE: + op.reset(new ROperator_Clip(input_name, output_name, min_name, max_name)); + break; + default: + throw std::runtime_error("SOFIE ONNX Parser Clip op does not yet support input type " + + std::to_string(static_cast(input_type))); + } + + if (!parser.IsRegisteredTensorType(output_name)) + parser.RegisterTensorType(output_name, input_type); + + return op; +}; + +} // namespace SOFIE diff --git a/src/SOFIE_parsers/src/ParseGRU.cxx b/src/SOFIE_parsers/src/ParseGRU.cxx index ec2cddf..58ce983 100644 --- a/src/SOFIE_parsers/src/ParseGRU.cxx +++ b/src/SOFIE_parsers/src/ParseGRU.cxx @@ -46,7 +46,7 @@ ParserFuncSignature ParseGRU = [](RModelParser_ONNX &parser, const onnx::NodePro } else if (attribute_name == "linear_before_reset") { attr_linear_before_reset = nodeproto.attribute(i).i(); } else { - std::cout << "TMVA SOFIE Warning - Model Loading - Attribute " << attribute_name << " in OperatorNode " + std::cout << "SOFIE Warning - Model Loading - Attribute " << attribute_name << " in OperatorNode " << nodeproto.name() << " is not defined in ONNX IR and not applied!\n"; } } diff --git a/src/SOFIE_parsers/src/ParseLSTM.cxx b/src/SOFIE_parsers/src/ParseLSTM.cxx index b9dc165..a95ee01 100644 --- a/src/SOFIE_parsers/src/ParseLSTM.cxx +++ b/src/SOFIE_parsers/src/ParseLSTM.cxx @@ -46,7 +46,7 @@ ParserFuncSignature ParseLSTM = [](RModelParser_ONNX &parser, const onnx::NodePr } else if (attribute_name == "layout") { attr_layout = nodeproto.attribute(i).i(); } else { - std::cout << "TMVA SOFIE Warning - Model Loading - Attribute " << attribute_name << " in OperatorNode " + std::cout << "SOFIE Warning - Model Loading - Attribute " << attribute_name << " in OperatorNode " << nodeproto.name() << " is not defined in ONNX IR and not applied!\n"; } } diff --git a/src/SOFIE_parsers/src/ParseNot.cxx b/src/SOFIE_parsers/src/ParseNot.cxx new file mode 100644 index 0000000..ca315eb --- /dev/null +++ b/src/SOFIE_parsers/src/ParseNot.cxx @@ -0,0 +1,38 @@ +#include "SOFIE/RModelParser_ONNX.hxx" +#include "SOFIE/ROperator_Not.hxx" +#include "onnx_proto3.pb.h" + +namespace SOFIE { + +ParserFuncSignature ParseNot = [](RModelParser_ONNX &parser, const onnx::NodeProto &nodeproto) +{ + ETensorType input_type = ETensorType::UNDEFINED; + + if (nodeproto.input_size() != 1 || nodeproto.output_size() != 1) + std::runtime_error("TMVA::SOFIE ONNX Parser Not op has invalid input or output size "); + + std::string input_name = nodeproto.input(0); + + if (parser.IsRegisteredTensorType(input_name)) { + input_type = parser.GetTensorType(input_name); + if (input_type !=ETensorType::BOOL && input_type !=ETensorType::UINT8 ) + throw std::runtime_error("TMVA::SOFIE ONNX Parser Not op has invalid input type " + ConvertTypeToString(input_type)); + } else { + throw + std::runtime_error("TMVA::SOFIE ONNX Parser Not op has input tensor " + input_name + + " but its type is not yet registered"); + } + + std::string output_name = nodeproto.output(0); + std::unique_ptr op(new ROperator_Not(input_name, output_name)); + + // Infer the output type + if (!parser.IsRegisteredTensorType(output_name)) { + parser.RegisterTensorType(output_name, input_type); + } + + return op; +}; + + +} // namespace SOFIE diff --git a/src/SOFIE_parsers/src/ParseRNN.cxx b/src/SOFIE_parsers/src/ParseRNN.cxx index d75b577..2d20e15 100644 --- a/src/SOFIE_parsers/src/ParseRNN.cxx +++ b/src/SOFIE_parsers/src/ParseRNN.cxx @@ -43,7 +43,7 @@ ParserFuncSignature ParseRNN = [](RModelParser_ONNX &parser, const onnx::NodePro } else if (attribute_name == "layout") { attr_layout = nodeproto.attribute(i).i(); } else { - std::cout << "TMVA SOFIE Warning - Model Loading - Attribute " << attribute_name << " in OperatorNode " + std::cout << "SOFIE Warning - Model Loading - Attribute " << attribute_name << " in OperatorNode " << nodeproto.name() << " is not defined in ONNX IR and not applied!\n"; } } diff --git a/src/SOFIE_parsers/src/ParseWhere.cxx b/src/SOFIE_parsers/src/ParseWhere.cxx index ea73cff..a7a3685 100644 --- a/src/SOFIE_parsers/src/ParseWhere.cxx +++ b/src/SOFIE_parsers/src/ParseWhere.cxx @@ -11,6 +11,10 @@ ParserFuncSignature ParseWhere = [](RModelParser_ONNX &parser, const onnx::NodeP throw std::runtime_error("TMVA::SOFIE ONNX Parser Where op has invalid input size"); } // condition boolean vector is input 0 + if (!parser.IsRegisteredTensorType(nodeproto.input(0))){ + throw std::runtime_error("TMVA::SOFIE ONNX Parser Where op has input tensor " + nodeproto.input(0) + + " but its type is not yet registered"); + } if (!parser.IsRegisteredTensorType(nodeproto.input(1))){ throw std::runtime_error("TMVA::SOFIE ONNX Parser Where op has input tensor " + nodeproto.input(1) + " but its type is not yet registered"); diff --git a/src/SOFIE_parsers/src/RModelParser_ONNX.cxx b/src/SOFIE_parsers/src/RModelParser_ONNX.cxx index 0e6b6cd..ddc7104 100644 --- a/src/SOFIE_parsers/src/RModelParser_ONNX.cxx +++ b/src/SOFIE_parsers/src/RModelParser_ONNX.cxx @@ -48,6 +48,11 @@ extern ParserFuncSignature ParseLess; extern ParserFuncSignature ParseLessEq; extern ParserFuncSignature ParseGreater; extern ParserFuncSignature ParseGreaterEq; +//Is Operators +extern ParserFuncSignature ParseIsInf; +extern ParserFuncSignature ParseIsNaN; +extern ParserFuncSignature ParseNot; +extern ParserFuncSignature ParseClip; // Reduce operators extern ParserFuncSignature ParseReduceMean; extern ParserFuncSignature ParseReduceSum; @@ -204,6 +209,11 @@ RModelParser_ONNX::RModelParser_ONNX() noexcept : fOperatorsMapImpl(std::make_un RegisterOperator("LessOrEqual", ParseLessEq); RegisterOperator("Greater", ParseGreater); RegisterOperator("GreaterOrEqual", ParseGreaterEq); + // Is / Not operators + RegisterOperator("IsInf", ParseIsInf); + RegisterOperator("IsNaN", ParseIsNaN); + RegisterOperator("Not", ParseNot); + RegisterOperator("Clip", ParseClip); // Reduce operators RegisterOperator("ReduceMean", ParseReduceMean); RegisterOperator("ReduceSum", ParseReduceSum); @@ -763,7 +773,7 @@ void RModelParser_ONNX::ParseONNXGraph(RModel & rmodel, const onnx::GraphProto & std::cout << "\t" << i << " " << nodesOrder[i] << " parsing operator " << op_type << std::endl; } - std::unique_ptr op = ParseOperator(i, graph, nodesOrder, nodesChildren[i]); + std::unique_ptr op = ParseOperator(i, graph, nodesOrder, nodesChildren[nodesOrder[i]]); if (!op) { if (verbose) { std::cout << "\t\tskipping operator since it is fused with previous one" << std::endl; From 2a98dca50676a114f05ef637971c4ef8546ed8e2 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Sun, 24 May 2026 20:35:37 +0200 Subject: [PATCH 43/43] feat: project restructure and benchmark tool --- .vscode/settings.json | 81 ----- CMakeLists.txt | 62 +++- benchmark/CMakeLists.txt | 316 ++++++++++++++++++ benchmark/README.md | 95 ++++++ benchmark/models/.gitkeep | 0 .../models}/GNN_model.onnx | Bin .../models}/simple_transformer.onnx | Bin .../models}/simple_transformer.onnx.data | Bin benchmark/src/BenchmarkBackend.hxx | 37 ++ benchmark/src/BenchmarkEmitter.cxx.in | 220 ++++++++++++ benchmark/src/BenchmarkRunner.cxx.in | 71 ++++ benchmark/src/BenchmarkUtils.hxx | 168 ++++++++++ benchmark/src/ONNXRuntimeBenchmark.hxx | 231 +++++++++++++ check_style.sh | 37 ++ cmake/SOFIEConfig.cmake.in | 13 + {src/SOFIE_core => core}/CMakeLists.txt | 12 +- {src/SOFIE_core => core}/README.md | 4 +- {src/SOFIE_core => core}/inc/LinkDef.h | 0 .../inc/SOFIE/FunctionList.hxx | 0 .../inc/SOFIE/OperatorList.hxx | 0 .../inc/SOFIE/RFunction.hxx | 0 .../inc/SOFIE/RFunction_MLP.hxx | 0 .../inc/SOFIE/RFunction_Mean.hxx | 0 .../inc/SOFIE/RFunction_Sum.hxx | 0 {src/SOFIE_core => core}/inc/SOFIE/RModel.hxx | 30 +- .../inc/SOFIE/RModel_Base.hxx | 0 .../inc/SOFIE/RModel_GNN.hxx | 0 .../inc/SOFIE/RModel_GraphIndependent.hxx | 0 .../inc/SOFIE/ROperator.hxx | 1 + .../inc/SOFIE/ROperator_BasicBinary.hxx | 8 +- .../inc/SOFIE/ROperator_BasicNary.hxx | 0 .../inc/SOFIE/ROperator_BasicUnary.hxx | 4 +- .../inc/SOFIE/ROperator_Basic_Is.hxx | 9 +- .../SOFIE/ROperator_BatchNormalization.hxx | 11 +- .../inc/SOFIE/ROperator_Cast.hxx | 23 +- .../inc/SOFIE/ROperator_Clip.hxx | 15 +- .../inc/SOFIE/ROperator_Comparision.hxx | 9 +- .../inc/SOFIE/ROperator_Concat.hxx | 9 +- .../inc/SOFIE/ROperator_Constant.hxx | 0 .../inc/SOFIE/ROperator_Conv.hxx | 304 ++++++++++++++--- .../inc/SOFIE/ROperator_ConvTranspose.hxx | 0 .../inc/SOFIE/ROperator_ConvTranspose.icc | 0 .../inc/SOFIE/ROperator_Custom.hxx | 0 .../inc/SOFIE/ROperator_Einsum.hxx | 0 .../inc/SOFIE/ROperator_Elu.hxx | 0 .../inc/SOFIE/ROperator_Erf.hxx | 0 .../inc/SOFIE/ROperator_Expand.hxx | 10 +- .../inc/SOFIE/ROperator_EyeLike.hxx | 0 .../inc/SOFIE/ROperator_GRU.hxx | 0 .../inc/SOFIE/ROperator_GRU.icc | 0 .../inc/SOFIE/ROperator_Gather.hxx | 9 +- .../inc/SOFIE/ROperator_GatherND.hxx | 9 +- .../inc/SOFIE/ROperator_Gemm.hxx | 59 +++- .../inc/SOFIE/ROperator_Identity.hxx | 0 .../inc/SOFIE/ROperator_LSTM.hxx | 0 .../inc/SOFIE/ROperator_LSTM.icc | 0 .../SOFIE/ROperator_LayerNormalization.hxx | 8 +- .../inc/SOFIE/ROperator_LeakyRelu.hxx | 4 +- .../inc/SOFIE/ROperator_Not.hxx | 11 +- .../inc/SOFIE/ROperator_Pad.hxx | 0 .../inc/SOFIE/ROperator_Pool.hxx | 0 .../inc/SOFIE/ROperator_RNN.hxx | 0 .../inc/SOFIE/ROperator_RNN.icc | 0 .../inc/SOFIE/ROperator_Random.hxx | 0 .../inc/SOFIE/ROperator_Range.hxx | 0 .../inc/SOFIE/ROperator_Reduce.hxx | 16 +- .../inc/SOFIE/ROperator_Relu.hxx | 4 +- .../inc/SOFIE/ROperator_Reshape.hxx | 9 + .../inc/SOFIE/ROperator_ScatterElements.hxx | 7 +- .../inc/SOFIE/ROperator_Selu.hxx | 0 .../inc/SOFIE/ROperator_Shape.hxx | 0 .../inc/SOFIE/ROperator_Sigmoid.hxx | 4 +- .../inc/SOFIE/ROperator_Slice.hxx | 18 +- .../inc/SOFIE/ROperator_Softmax.hxx | 0 .../inc/SOFIE/ROperator_Split.hxx | 8 +- .../inc/SOFIE/ROperator_SubGraph.hxx | 0 .../inc/SOFIE/ROperator_Swish.hxx | 0 .../inc/SOFIE/ROperator_Tanh.hxx | 4 +- .../inc/SOFIE/ROperator_Tile.hxx | 5 +- .../inc/SOFIE/ROperator_TopK.hxx | 0 .../inc/SOFIE/ROperator_Transpose.hxx | 4 +- .../inc/SOFIE/ROperator_Where.hxx | 42 +-- .../inc/SOFIE/SOFIEHelpers.hxx | 0 .../inc/SOFIE/SOFIE_common.hxx | 23 +- {src/SOFIE_core => core}/src/Prototype.cxx | 0 {src/SOFIE_core => core}/src/RFunction.cxx | 0 .../SOFIE_core => core}/src/RFunction_MLP.cxx | 0 .../src/RFunction_Mean.cxx | 0 .../SOFIE_core => core}/src/RFunction_Sum.cxx | 0 {src/SOFIE_core => core}/src/RModel.cxx | 84 ++--- .../SOFIE_core => core}/src/RModel_ALPAKA.cxx | 48 +-- {src/SOFIE_core => core}/src/RModel_Base.cxx | 6 +- {src/SOFIE_core => core}/src/RModel_GNN.cxx | 0 .../src/RModel_GraphIndependent.cxx | 0 {src/SOFIE_core => core}/src/SOFIE_common.cxx | 16 +- {src/SOFIE_core => core}/test/CMakeLists.txt | 4 +- .../test/Conv1dModelGenerator.py | 0 .../test/Conv2dModelGenerator.py | 0 .../test/Conv3dModelGenerator.py | 0 .../test/ConvTrans2dModelGenerator.py | 0 .../test/EmitFromONNX.cxx.in | 0 .../test/EmitFromONNX_GPU_ALPAKA.cxx.in | 0 .../test/EmitFromRoot.cxx.in | 0 {src/SOFIE_core => core}/test/GNN/EmitGNN.cxx | 0 .../test/GNN/EmitGraphIndependent.cxx | 0 .../test/LinearModelGenerator.py | 0 .../test/RecurrentModelGenerator.py | 0 .../test/TestCustomModelsFromONNX.cxx | 0 .../TestCustomModelsFromONNXForAlpakaCuda.cxx | 58 +++- .../test/TestCustomModelsFromROOT.cxx | 0 .../test/TestSofieModels.cxx | 0 .../test/input_models/Abs.onnx | 0 .../test/input_models/Add.onnx | 0 .../test/input_models/AddBroadcast1.onnx | 0 .../test/input_models/AddBroadcast2.onnx | 0 .../test/input_models/AddBroadcast3.onnx | 0 .../test/input_models/AddBroadcast4.onnx | 0 .../test/input_models/AddBroadcast5.onnx | 0 .../test/input_models/AddBroadcast6.onnx | 0 .../test/input_models/AddBroadcast7.onnx | 0 .../test/input_models/AvgPool.onnx | Bin .../test/input_models/BatchNorm.onnx | Bin .../test/input_models/BatchNormRelu.onnx | Bin .../test/input_models/Cast.onnx | 0 .../test/input_models/Clip.onnx | Bin .../test/input_models/ComplexTopK.onnx | Bin .../test/input_models/Concat_0D.onnx | Bin .../test/input_models/Constant.onnx | Bin .../test/input_models/ConvTranspose1d.onnx | Bin .../test/input_models/ConvTranspose2d.onnx | Bin .../input_models/ConvTransposeBias2d.onnx | Bin .../ConvTransposeBias2dBatched.onnx | Bin .../ConvWithAsymmetricPadding.onnx | Bin .../ConvWithAutopadSameLower.onnx | Bin .../test/input_models/ConvWithPadding.onnx | Bin .../ConvWithStridesNoPadding.onnx | Bin .../input_models/ConvWithStridesPadding.onnx | Bin .../test/input_models/ConvWithoutPadding.onnx | Bin .../test/input_models/Cos.onnx | 0 .../test/input_models/Div.onnx | 0 .../test/input_models/Einsum_3.onnx | 0 .../test/input_models/Einsum_4.onnx | 0 .../test/input_models/Einsum_dotprod.onnx | Bin .../test/input_models/Einsum_matmul.onnx | 0 .../test/input_models/Elu.onnx | Bin .../test/input_models/Equal.onnx | 0 .../test/input_models/Erf.onnx | 0 .../test/input_models/Exp.onnx | 0 .../test/input_models/ExpandDiffSize.onnx | Bin .../test/input_models/ExpandSameSize.onnx | Bin .../test/input_models/EyeLike.onnx | 0 core/test/input_models/GNN_model.onnx | Bin 0 -> 591051 bytes .../test/input_models/GRUBatchwise.onnx | Bin .../test/input_models/GRUBidirectional.onnx | Bin .../test/input_models/GRUDefaults.onnx | Bin .../test/input_models/GRUInitialBias.onnx | Bin .../test/input_models/GRUSeqLength.onnx | Bin .../test/input_models/Gather2d.onnx | Bin .../test/input_models/GatherAxis0.onnx | Bin .../test/input_models/GatherAxis1.onnx | Bin .../test/input_models/GatherAxis2.onnx | 0 .../test/input_models/GatherAxis3.onnx | 0 .../test/input_models/GatherND_Batch.onnx | Bin .../test/input_models/GatherND_Ex1.onnx | Bin .../test/input_models/GatherND_Ex2.onnx | Bin .../test/input_models/GatherND_Ex3.onnx | Bin .../test/input_models/GatherND_Ex4.onnx | Bin .../test/input_models/GatherND_Ex5.onnx | Bin .../GatherND_NegativeIndices.onnx | Bin .../input_models/GatherNegativeIndices.onnx | Bin .../test/input_models/Greater.onnx | 0 .../test/input_models/GreaterOrEqual.onnx | 0 .../test/input_models/IsInf.onnx | Bin .../test/input_models/IsNaN.onnx | Bin .../test/input_models/LSTMBatchwise.onnx | Bin .../test/input_models/LSTMBidirectional.onnx | Bin .../test/input_models/LSTMDefaults.onnx | Bin .../test/input_models/LSTMInitialBias.onnx | Bin .../test/input_models/LSTMPeepholes.onnx | Bin .../test/input_models/LayerNorm.onnx | Bin .../test/input_models/LayerNorm3D.onnx | Bin .../test/input_models/LayerNormScaleBias.onnx | Bin .../input_models/LayerNormalization2d.onnx | Bin .../input_models/LayerNormalization4d.onnx | 0 .../test/input_models/Less.onnx | 0 .../test/input_models/LessOrEqual.onnx | 0 .../input_models/LinearWithLeakyRelu.onnx | 0 .../test/input_models/LinearWithSelu.onnx | Bin .../test/input_models/LinearWithSigmoid.onnx | Bin .../test/input_models/Linear_16.onnx | Bin .../test/input_models/Linear_32.onnx | Bin .../test/input_models/Linear_64.onnx | Bin .../test/input_models/Log.onnx | 0 .../test/input_models/Max.onnx | 0 .../MaxMultidirectionalBroadcast.onnx | Bin .../test/input_models/MaxPool1d.onnx | Bin .../test/input_models/MaxPool2d.onnx | Bin .../test/input_models/MaxPool3d.onnx | Bin .../MeanMultidirectionalBroadcast.onnx | Bin .../MinMultidirectionalBroadcast.onnx | Bin .../test/input_models/Mul.onnx | 0 .../test/input_models/Neg.onnx | 0 .../test/input_models/Not.onnx | Bin .../test/input_models/Pad.onnx | Bin .../test/input_models/Pow.onnx | 0 .../test/input_models/Pow_broadcast.onnx | 0 .../test/input_models/RNNBatchwise.onnx | Bin .../test/input_models/RNNBidirectional.onnx | Bin .../RNNBidirectionalBatchwise.onnx | Bin .../test/input_models/RNNDefaults.onnx | Bin .../test/input_models/RNNSeqLength.onnx | Bin .../test/input_models/RNNSequence.onnx | Bin .../input_models/RNNSequenceBatchwise.onnx | Bin .../test/input_models/RandomNormal.onnx | Bin .../test/input_models/RandomUniform.onnx | Bin .../test/input_models/RangeFloat.onnx | 0 .../test/input_models/RangeInt.onnx | 0 .../test/input_models/Reciprocal.onnx | 0 .../test/input_models/ReduceMean.onnx | Bin .../test/input_models/ReduceProd.onnx | Bin .../test/input_models/ReduceSum.onnx | Bin .../test/input_models/ReduceSumSquare.onnx | Bin .../test/input_models/ScatterElements.onnx | 0 .../test/input_models/Shape.onnx | Bin .../test/input_models/Sin.onnx | 0 .../test/input_models/Slice.onnx | Bin .../test/input_models/Slice_Default_Axis.onnx | Bin .../input_models/Slice_Default_Steps.onnx | Bin .../test/input_models/Slice_Neg.onnx | Bin .../test/input_models/Softmax1d.onnx | 0 .../test/input_models/Softmax2d.onnx | 0 .../test/input_models/Softmax3d.onnx | 0 .../test/input_models/Softmax4d.onnx | 0 .../test/input_models/Softplus.onnx | 0 .../test/input_models/Split_0.onnx | 0 .../test/input_models/Split_1.onnx | 0 .../test/input_models/Split_2.onnx | 0 .../test/input_models/Sqrt.onnx | 0 .../test/input_models/Sub.onnx | 0 .../SumMultidirectionalBroadcast.onnx | Bin .../test/input_models/Tanh.onnx | 0 .../test/input_models/Tile5D.onnx | 0 .../test/input_models/TopK.onnx | Bin .../test/input_models/Transpose.onnx | Bin .../test/input_models/Where.onnx | 0 .../test/input_models/references/Add.ref.hxx | 0 .../references/AddBroadcast1.ref.hxx | 0 .../references/AddBroadcast2.ref.hxx | 0 .../references/AddBroadcast3.ref.hxx | 0 .../references/AddBroadcast4.ref.hxx | 0 .../references/AddBroadcast5.ref.hxx | 0 .../references/AddBroadcast6.ref.hxx | 0 .../references/AddBroadcast7.ref.hxx | 0 .../input_models/references/AvgPool.ref.hxx | 0 .../test/input_models/references/Cast.ref.hxx | 0 .../references/ComplexTopK.ref.hxx | 0 .../input_models/references/Constant.ref.hxx | 0 .../references/ConvTranspose1d.ref.hxx | 0 .../references/ConvTranspose2d.ref.hxx | 0 .../references/ConvTranspose3d.ref.hxx | 0 .../references/ConvTransposeBias2d.ref.hxx | 0 .../ConvTransposeBias2dBatched.ref.hxx | 0 .../ConvWithAsymmetricPadding.ref.hxx | 0 .../ConvWithAutopadSameLower.ref.hxx | 0 .../references/ConvWithPadding.ref.hxx | 0 .../ConvWithStridesNoPadding.ref.hxx | 0 .../references/ConvWithStridesPadding.ref.hxx | 0 .../references/ConvWithoutPadding.ref.hxx | 0 .../test/input_models/references/Div.ref.hxx | 0 .../test/input_models/references/Elu.ref.hxx | 0 .../input_models/references/Equal.ref.hxx | 0 .../test/input_models/references/Erf.ref.hxx | 0 .../test/input_models/references/Exp.ref.hxx | 0 .../references/ExpandDiffSize.ref.hxx | 0 .../references/ExpandSameSize.ref.hxx | 0 .../input_models/references/EyeLike.ref.hxx | 0 .../references/GRUBatchwise.ref.hxx | 0 .../references/GRUBidirectional.ref.hxx | 0 .../references/GRUDefaults.ref.hxx | 0 .../references/GRUInitialBias.ref.hxx | 0 .../references/GRUSeqLength.ref.hxx | 0 .../input_models/references/Gather2d.ref.hxx | 0 .../references/GatherAxis0.ref.hxx | 0 .../references/GatherAxis1.ref.hxx | 0 .../references/GatherAxis2.ref.hxx | 0 .../references/GatherAxis3.ref.hxx | 0 .../references/GatherNegativeIndices.ref.hxx | 0 .../input_models/references/Greater.ref.hxx | 0 .../references/GreaterOrEqual.ref.hxx | 0 .../references/LSTMBatchwise.ref.hxx | 0 .../references/LSTMBidirectional.ref.hxx | 0 .../references/LSTMDefaults.ref.hxx | 0 .../references/LSTMInitialBias.ref.hxx | 0 .../references/LSTMPeepholes.ref.hxx | 0 .../references/LayerNormalization2d.hxx | 0 .../references/LayerNormalization4d.hxx | 0 .../test/input_models/references/Less.ref.hxx | 0 .../references/LessOrEqual.ref.hxx | 0 .../references/LinearWithLeakyRelu.ref.hxx | 0 .../references/LinearWithSelu.ref.hxx | 0 .../references/LinearWithSigmoid.ref.hxx | 0 .../input_models/references/Linear_16.ref.hxx | 0 .../input_models/references/Linear_32.ref.hxx | 0 .../input_models/references/Linear_64.ref.hxx | 0 .../test/input_models/references/Log.ref.hxx | 0 .../test/input_models/references/Max.ref.hxx | 0 .../MaxMultidirectionalBroadcast.ref.hxx | 0 .../input_models/references/MaxPool1d.ref.hxx | 0 .../input_models/references/MaxPool2d.ref.hxx | 0 .../input_models/references/MaxPool3d.ref.hxx | 0 .../MeanMultidirectionalBroadcast.ref.hxx | 0 .../MinMultidirectionalBroadcast.ref.hxx | 0 .../test/input_models/references/Mul.ref.hxx | 0 .../test/input_models/references/Neg.ref.hxx | 0 .../test/input_models/references/Pow.ref.hxx | 0 .../references/Pow_broadcast.ref.hxx | 0 .../references/RNNBatchwise.ref.hxx | 0 .../references/RNNBidirectional.ref.hxx | 0 .../RNNBidirectionalBatchwise.ref.hxx | 0 .../references/RNNDefaults.ref.hxx | 0 .../references/RNNSeqLength.ref.hxx | 0 .../references/RNNSequence.ref.hxx | 0 .../references/RNNSequenceBatchwise.ref.hxx | 0 .../references/RangeFloat.ref.hxx | 0 .../input_models/references/RangeInt.ref.hxx | 0 .../references/Reciprocal.ref.hxx | 0 .../references/ReduceMean.ref.hxx | 0 .../references/ReduceProd.ref.hxx | 0 .../input_models/references/Shape.ref.hxx | 0 .../input_models/references/Slice.ref.hxx | 0 .../references/Slice_Default_Axis.ref.hxx | 0 .../references/Slice_Default_Steps.ref.hxx | 0 .../input_models/references/Slice_Neg.ref.hxx | 0 .../input_models/references/Softmax1d.ref.hxx | 0 .../input_models/references/Softmax2d.ref.hxx | 0 .../input_models/references/Softmax3d.ref.hxx | 0 .../input_models/references/Softmax4d.ref.hxx | 0 .../test/input_models/references/Sqrt.ref.hxx | 0 .../test/input_models/references/Sub.ref.hxx | 0 .../SumMultidirectionalBroadcast.ref.hxx | 0 .../test/input_models/references/Tanh.ref.hxx | 0 .../input_models/references/Tile5D.ref.hxx | 0 .../test/input_models/references/TopK.ref.hxx | 0 {src/SOFIE_parsers => parsers}/CMakeLists.txt | 12 +- {src/SOFIE_parsers => parsers}/inc/LinkDef.h | 0 .../inc/SOFIE/RModelParser_ONNX.hxx | 0 {src/SOFIE_parsers => parsers}/onnx_proto3 | 0 .../src/ParseBasicBinary.cxx | 0 .../src/ParseBasicIs.cxx | 0 .../src/ParseBasicNary.cxx | 0 .../src/ParseBasicUnary.cxx | 0 .../src/ParseBatchNormalization.cxx | 0 .../src/ParseCast.cxx | 7 +- .../src/ParseClip.cxx | 0 .../src/ParseComparision.cxx | 0 .../src/ParseConcat.cxx | 0 .../src/ParseConstant.cxx | 0 .../src/ParseConv.cxx | 0 .../src/ParseConvTranspose.cxx | 0 .../src/ParseEinsum.cxx | 0 .../src/ParseElu.cxx | 0 .../src/ParseErf.cxx | 0 .../src/ParseExpand.cxx | 0 .../src/ParseEyeLike.cxx | 0 .../src/ParseFuseBatchnormRelu.cxx | 0 .../src/ParseFuseConvAdd.cxx | 0 .../src/ParseFuseConvTransposeAdd.cxx | 0 .../src/ParseFuseGemmRelu.cxx | 0 .../src/ParseFuseMatMulAdd.cxx | 0 .../src/ParseGRU.cxx | 0 .../src/ParseGather.cxx | 0 .../src/ParseGatherND.cxx | 0 .../src/ParseGemm.cxx | 0 .../src/ParseIdentity.cxx | 0 .../SOFIE_parsers => parsers}/src/ParseIf.cxx | 0 .../src/ParseLSTM.cxx | 0 .../src/ParseLayerNormalization.cxx | 0 .../src/ParseLeakyRelu.cxx | 0 .../src/ParseMatMul.cxx | 0 .../src/ParseNot.cxx | 0 .../src/ParsePad.cxx | 0 .../src/ParsePool.cxx | 0 .../src/ParseRNN.cxx | 0 .../src/ParseRandom.cxx | 0 .../src/ParseRange.cxx | 0 .../src/ParseReduce.cxx | 0 .../src/ParseRelu.cxx | 0 .../src/ParseReshape.cxx | 0 .../src/ParseScatterElements.cxx | 0 .../src/ParseSelu.cxx | 0 .../src/ParseShape.cxx | 0 .../src/ParseSigmoid.cxx | 0 .../src/ParseSlice.cxx | 0 .../src/ParseSoftmax.cxx | 2 +- .../src/ParseSplit.cxx | 0 .../src/ParseTanh.cxx | 0 .../src/ParseTile.cxx | 0 .../src/ParseTopK.cxx | 0 .../src/ParseTranspose.cxx | 0 .../src/ParseWhere.cxx | 4 +- .../src/RModelParser_ONNX.cxx | 0 src/.vscode/settings.json | 61 ---- src/CMakeLists.txt | 11 - {src/utils => utils}/CMakeLists.txt | 8 +- {src/utils => utils}/SOFIE/RTensor.hxx | 0 405 files changed, 1828 insertions(+), 517 deletions(-) delete mode 100644 .vscode/settings.json create mode 100644 benchmark/CMakeLists.txt create mode 100644 benchmark/README.md create mode 100644 benchmark/models/.gitkeep rename {src/SOFIE_core/test/input_models => benchmark/models}/GNN_model.onnx (100%) rename {src/SOFIE_core/test/input_models => benchmark/models}/simple_transformer.onnx (100%) rename {src/SOFIE_core/test/input_models => benchmark/models}/simple_transformer.onnx.data (100%) create mode 100644 benchmark/src/BenchmarkBackend.hxx create mode 100644 benchmark/src/BenchmarkEmitter.cxx.in create mode 100644 benchmark/src/BenchmarkRunner.cxx.in create mode 100644 benchmark/src/BenchmarkUtils.hxx create mode 100644 benchmark/src/ONNXRuntimeBenchmark.hxx create mode 100644 check_style.sh create mode 100644 cmake/SOFIEConfig.cmake.in rename {src/SOFIE_core => core}/CMakeLists.txt (90%) rename {src/SOFIE_core => core}/README.md (98%) rename {src/SOFIE_core => core}/inc/LinkDef.h (100%) rename {src/SOFIE_core => core}/inc/SOFIE/FunctionList.hxx (100%) rename {src/SOFIE_core => core}/inc/SOFIE/OperatorList.hxx (100%) rename {src/SOFIE_core => core}/inc/SOFIE/RFunction.hxx (100%) rename {src/SOFIE_core => core}/inc/SOFIE/RFunction_MLP.hxx (100%) rename {src/SOFIE_core => core}/inc/SOFIE/RFunction_Mean.hxx (100%) rename {src/SOFIE_core => core}/inc/SOFIE/RFunction_Sum.hxx (100%) rename {src/SOFIE_core => core}/inc/SOFIE/RModel.hxx (92%) rename {src/SOFIE_core => core}/inc/SOFIE/RModel_Base.hxx (100%) rename {src/SOFIE_core => core}/inc/SOFIE/RModel_GNN.hxx (100%) rename {src/SOFIE_core => core}/inc/SOFIE/RModel_GraphIndependent.hxx (100%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator.hxx (98%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_BasicBinary.hxx (98%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_BasicNary.hxx (100%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_BasicUnary.hxx (94%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Basic_Is.hxx (90%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_BatchNormalization.hxx (95%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Cast.hxx (83%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Clip.hxx (94%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Comparision.hxx (96%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Concat.hxx (97%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Constant.hxx (100%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Conv.hxx (70%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_ConvTranspose.hxx (100%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_ConvTranspose.icc (100%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Custom.hxx (100%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Einsum.hxx (100%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Elu.hxx (100%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Erf.hxx (100%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Expand.hxx (95%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_EyeLike.hxx (100%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_GRU.hxx (100%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_GRU.icc (100%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Gather.hxx (96%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_GatherND.hxx (95%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Gemm.hxx (91%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Identity.hxx (100%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_LSTM.hxx (100%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_LSTM.icc (100%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_LayerNormalization.hxx (98%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_LeakyRelu.hxx (92%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Not.hxx (87%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Pad.hxx (100%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Pool.hxx (100%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_RNN.hxx (100%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_RNN.icc (100%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Random.hxx (100%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Range.hxx (100%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Reduce.hxx (96%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Relu.hxx (91%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Reshape.hxx (98%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_ScatterElements.hxx (95%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Selu.hxx (100%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Shape.hxx (100%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Sigmoid.hxx (91%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Slice.hxx (97%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Softmax.hxx (100%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Split.hxx (95%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_SubGraph.hxx (100%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Swish.hxx (100%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Tanh.hxx (90%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Tile.hxx (97%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_TopK.hxx (100%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Transpose.hxx (95%) rename {src/SOFIE_core => core}/inc/SOFIE/ROperator_Where.hxx (95%) rename {src/SOFIE_core => core}/inc/SOFIE/SOFIEHelpers.hxx (100%) rename {src/SOFIE_core => core}/inc/SOFIE/SOFIE_common.hxx (97%) rename {src/SOFIE_core => core}/src/Prototype.cxx (100%) rename {src/SOFIE_core => core}/src/RFunction.cxx (100%) rename {src/SOFIE_core => core}/src/RFunction_MLP.cxx (100%) rename {src/SOFIE_core => core}/src/RFunction_Mean.cxx (100%) rename {src/SOFIE_core => core}/src/RFunction_Sum.cxx (100%) rename {src/SOFIE_core => core}/src/RModel.cxx (95%) rename {src/SOFIE_core => core}/src/RModel_ALPAKA.cxx (93%) rename {src/SOFIE_core => core}/src/RModel_Base.cxx (92%) rename {src/SOFIE_core => core}/src/RModel_GNN.cxx (100%) rename {src/SOFIE_core => core}/src/RModel_GraphIndependent.cxx (100%) rename {src/SOFIE_core => core}/src/SOFIE_common.cxx (97%) rename {src/SOFIE_core => core}/test/CMakeLists.txt (97%) rename {src/SOFIE_core => core}/test/Conv1dModelGenerator.py (100%) rename {src/SOFIE_core => core}/test/Conv2dModelGenerator.py (100%) rename {src/SOFIE_core => core}/test/Conv3dModelGenerator.py (100%) rename {src/SOFIE_core => core}/test/ConvTrans2dModelGenerator.py (100%) rename {src/SOFIE_core => core}/test/EmitFromONNX.cxx.in (100%) rename {src/SOFIE_core => core}/test/EmitFromONNX_GPU_ALPAKA.cxx.in (100%) rename {src/SOFIE_core => core}/test/EmitFromRoot.cxx.in (100%) rename {src/SOFIE_core => core}/test/GNN/EmitGNN.cxx (100%) rename {src/SOFIE_core => core}/test/GNN/EmitGraphIndependent.cxx (100%) rename {src/SOFIE_core => core}/test/LinearModelGenerator.py (100%) rename {src/SOFIE_core => core}/test/RecurrentModelGenerator.py (100%) rename {src/SOFIE_core => core}/test/TestCustomModelsFromONNX.cxx (100%) rename {src/SOFIE_core => core}/test/TestCustomModelsFromONNXForAlpakaCuda.cxx (97%) rename {src/SOFIE_core => core}/test/TestCustomModelsFromROOT.cxx (100%) rename {src/SOFIE_core => core}/test/TestSofieModels.cxx (100%) rename {src/SOFIE_core => core}/test/input_models/Abs.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Add.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/AddBroadcast1.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/AddBroadcast2.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/AddBroadcast3.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/AddBroadcast4.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/AddBroadcast5.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/AddBroadcast6.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/AddBroadcast7.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/AvgPool.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/BatchNorm.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/BatchNormRelu.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Cast.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Clip.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/ComplexTopK.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Concat_0D.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Constant.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/ConvTranspose1d.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/ConvTranspose2d.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/ConvTransposeBias2d.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/ConvTransposeBias2dBatched.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/ConvWithAsymmetricPadding.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/ConvWithAutopadSameLower.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/ConvWithPadding.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/ConvWithStridesNoPadding.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/ConvWithStridesPadding.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/ConvWithoutPadding.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Cos.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Div.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Einsum_3.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Einsum_4.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Einsum_dotprod.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Einsum_matmul.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Elu.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Equal.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Erf.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Exp.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/ExpandDiffSize.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/ExpandSameSize.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/EyeLike.onnx (100%) create mode 100644 core/test/input_models/GNN_model.onnx rename {src/SOFIE_core => core}/test/input_models/GRUBatchwise.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/GRUBidirectional.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/GRUDefaults.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/GRUInitialBias.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/GRUSeqLength.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Gather2d.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/GatherAxis0.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/GatherAxis1.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/GatherAxis2.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/GatherAxis3.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/GatherND_Batch.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/GatherND_Ex1.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/GatherND_Ex2.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/GatherND_Ex3.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/GatherND_Ex4.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/GatherND_Ex5.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/GatherND_NegativeIndices.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/GatherNegativeIndices.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Greater.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/GreaterOrEqual.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/IsInf.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/IsNaN.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/LSTMBatchwise.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/LSTMBidirectional.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/LSTMDefaults.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/LSTMInitialBias.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/LSTMPeepholes.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/LayerNorm.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/LayerNorm3D.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/LayerNormScaleBias.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/LayerNormalization2d.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/LayerNormalization4d.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Less.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/LessOrEqual.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/LinearWithLeakyRelu.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/LinearWithSelu.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/LinearWithSigmoid.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Linear_16.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Linear_32.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Linear_64.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Log.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Max.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/MaxMultidirectionalBroadcast.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/MaxPool1d.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/MaxPool2d.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/MaxPool3d.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/MeanMultidirectionalBroadcast.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/MinMultidirectionalBroadcast.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Mul.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Neg.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Not.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Pad.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Pow.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Pow_broadcast.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/RNNBatchwise.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/RNNBidirectional.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/RNNBidirectionalBatchwise.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/RNNDefaults.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/RNNSeqLength.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/RNNSequence.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/RNNSequenceBatchwise.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/RandomNormal.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/RandomUniform.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/RangeFloat.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/RangeInt.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Reciprocal.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/ReduceMean.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/ReduceProd.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/ReduceSum.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/ReduceSumSquare.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/ScatterElements.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Shape.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Sin.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Slice.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Slice_Default_Axis.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Slice_Default_Steps.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Slice_Neg.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Softmax1d.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Softmax2d.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Softmax3d.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Softmax4d.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Softplus.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Split_0.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Split_1.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Split_2.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Sqrt.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Sub.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/SumMultidirectionalBroadcast.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Tanh.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Tile5D.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/TopK.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Transpose.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/Where.onnx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Add.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/AddBroadcast1.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/AddBroadcast2.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/AddBroadcast3.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/AddBroadcast4.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/AddBroadcast5.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/AddBroadcast6.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/AddBroadcast7.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/AvgPool.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Cast.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/ComplexTopK.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Constant.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/ConvTranspose1d.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/ConvTranspose2d.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/ConvTranspose3d.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/ConvTransposeBias2d.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/ConvTransposeBias2dBatched.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/ConvWithAsymmetricPadding.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/ConvWithAutopadSameLower.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/ConvWithPadding.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/ConvWithStridesNoPadding.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/ConvWithStridesPadding.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/ConvWithoutPadding.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Div.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Elu.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Equal.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Erf.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Exp.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/ExpandDiffSize.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/ExpandSameSize.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/EyeLike.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/GRUBatchwise.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/GRUBidirectional.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/GRUDefaults.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/GRUInitialBias.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/GRUSeqLength.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Gather2d.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/GatherAxis0.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/GatherAxis1.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/GatherAxis2.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/GatherAxis3.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/GatherNegativeIndices.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Greater.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/GreaterOrEqual.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/LSTMBatchwise.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/LSTMBidirectional.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/LSTMDefaults.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/LSTMInitialBias.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/LSTMPeepholes.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/LayerNormalization2d.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/LayerNormalization4d.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Less.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/LessOrEqual.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/LinearWithLeakyRelu.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/LinearWithSelu.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/LinearWithSigmoid.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Linear_16.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Linear_32.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Linear_64.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Log.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Max.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/MaxMultidirectionalBroadcast.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/MaxPool1d.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/MaxPool2d.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/MaxPool3d.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/MeanMultidirectionalBroadcast.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/MinMultidirectionalBroadcast.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Mul.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Neg.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Pow.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Pow_broadcast.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/RNNBatchwise.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/RNNBidirectional.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/RNNBidirectionalBatchwise.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/RNNDefaults.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/RNNSeqLength.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/RNNSequence.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/RNNSequenceBatchwise.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/RangeFloat.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/RangeInt.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Reciprocal.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/ReduceMean.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/ReduceProd.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Shape.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Slice.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Slice_Default_Axis.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Slice_Default_Steps.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Slice_Neg.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Softmax1d.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Softmax2d.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Softmax3d.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Softmax4d.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Sqrt.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Sub.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/SumMultidirectionalBroadcast.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Tanh.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/Tile5D.ref.hxx (100%) rename {src/SOFIE_core => core}/test/input_models/references/TopK.ref.hxx (100%) rename {src/SOFIE_parsers => parsers}/CMakeLists.txt (92%) rename {src/SOFIE_parsers => parsers}/inc/LinkDef.h (100%) rename {src/SOFIE_parsers => parsers}/inc/SOFIE/RModelParser_ONNX.hxx (100%) rename {src/SOFIE_parsers => parsers}/onnx_proto3 (100%) rename {src/SOFIE_parsers => parsers}/src/ParseBasicBinary.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseBasicIs.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseBasicNary.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseBasicUnary.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseBatchNormalization.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseCast.cxx (78%) rename {src/SOFIE_parsers => parsers}/src/ParseClip.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseComparision.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseConcat.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseConstant.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseConv.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseConvTranspose.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseEinsum.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseElu.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseErf.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseExpand.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseEyeLike.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseFuseBatchnormRelu.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseFuseConvAdd.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseFuseConvTransposeAdd.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseFuseGemmRelu.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseFuseMatMulAdd.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseGRU.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseGather.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseGatherND.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseGemm.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseIdentity.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseIf.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseLSTM.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseLayerNormalization.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseLeakyRelu.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseMatMul.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseNot.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParsePad.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParsePool.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseRNN.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseRandom.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseRange.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseReduce.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseRelu.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseReshape.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseScatterElements.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseSelu.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseShape.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseSigmoid.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseSlice.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseSoftmax.cxx (91%) rename {src/SOFIE_parsers => parsers}/src/ParseSplit.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseTanh.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseTile.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseTopK.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseTranspose.cxx (100%) rename {src/SOFIE_parsers => parsers}/src/ParseWhere.cxx (90%) rename {src/SOFIE_parsers => parsers}/src/RModelParser_ONNX.cxx (100%) delete mode 100644 src/.vscode/settings.json delete mode 100644 src/CMakeLists.txt rename {src/utils => utils}/CMakeLists.txt (56%) rename {src/utils => utils}/SOFIE/RTensor.hxx (100%) diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 182ccd4..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,81 +0,0 @@ -{ - "files.associations": { - "*.icc": "cpp", - "limits": "cpp", - "cctype": "cpp", - "clocale": "cpp", - "cmath": "cpp", - "cstdarg": "cpp", - "cstddef": "cpp", - "cstdio": "cpp", - "cstdlib": "cpp", - "cstring": "cpp", - "ctime": "cpp", - "cwchar": "cpp", - "cwctype": "cpp", - "array": "cpp", - "atomic": "cpp", - "bit": "cpp", - "*.tcc": "cpp", - "bitset": "cpp", - "compare": "cpp", - "complex": "cpp", - "concepts": "cpp", - "cstdint": "cpp", - "deque": "cpp", - "map": "cpp", - "set": "cpp", - "string": "cpp", - "unordered_map": "cpp", - "unordered_set": "cpp", - "vector": "cpp", - "exception": "cpp", - "algorithm": "cpp", - "functional": "cpp", - "iterator": "cpp", - "memory": "cpp", - "memory_resource": "cpp", - "numeric": "cpp", - "optional": "cpp", - "random": "cpp", - "regex": "cpp", - "string_view": "cpp", - "system_error": "cpp", - "tuple": "cpp", - "type_traits": "cpp", - "utility": "cpp", - "fstream": "cpp", - "initializer_list": "cpp", - "iomanip": "cpp", - "iosfwd": "cpp", - "iostream": "cpp", - "istream": "cpp", - "new": "cpp", - "numbers": "cpp", - "ostream": "cpp", - "sstream": "cpp", - "stdexcept": "cpp", - "streambuf": "cpp", - "cinttypes": "cpp", - "typeinfo": "cpp", - "charconv": "cpp", - "chrono": "cpp", - "condition_variable": "cpp", - "list": "cpp", - "ratio": "cpp", - "future": "cpp", - "mutex": "cpp", - "semaphore": "cpp", - "shared_mutex": "cpp", - "span": "cpp", - "stop_token": "cpp", - "thread": "cpp", - "cfenv": "cpp", - "variant": "cpp", - "format": "cpp", - "any": "cpp", - "source_location": "cpp", - "run_inference_particle_net.C": "cpp", - "test.C": "cpp" - } -} diff --git a/CMakeLists.txt b/CMakeLists.txt index 7ec316c..b978649 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,9 +8,12 @@ set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) +include(GNUInstallDirs) +include(CMakePackageConfigHelpers) + find_package(BLAS) if(NOT BLAS_FOUND) - message(WARNING "BLAS not found: TMVA-SOFIE will not be fully tested") + message(WARNING "BLAS not found: sofie will not be fully tested") endif() message(STATUS "Looking for Protobuf") @@ -21,25 +24,25 @@ if(NOT Protobuf_FOUND) endif() if(NOT Protobuf_FOUND) if(fail-on-missing) - message(FATAL_ERROR "Protobuf libraries not found and they are required (tmva-sofie option enabled)") + message(FATAL_ERROR "Protobuf libraries not found and they are required (sofie option enabled)") else() - message(STATUS "Protobuf not found. Switching off tmva-sofie option") + message(STATUS "Protobuf not found. Switching off sofie option") message(FATAL_ERROR "SOFIE cannot be installed without Protobuf") endif() else() if(Protobuf_VERSION LESS 3.0) if(fail-on-missing) - message(FATAL_ERROR "Protobuf libraries found but is less than the version required (3.0) (tmva-sofie option enabled)") + message(FATAL_ERROR "Protobuf libraries found but is less than the version required (3.0) (sofie option enabled)") else() - message(STATUS "Protobuf found but its version is not high enough (>3.0). Switching off tmva-sofie option") + message(STATUS "Protobuf found but its version is not high enough (>3.0). Switching off sofie option") message(FATAL_ERROR "SOFIE cannot be installed without Protobuf") endif() else() if(NOT TARGET protobuf::protoc) if(fail-on-missing) - message(FATAL_ERROR "Protobuf compiler not found (tmva-sofie option enabled)") + message(FATAL_ERROR "Protobuf compiler not found (sofie option enabled)") else() - message(STATUS "Protobuf compiler not found. Switching off tmva-sofie option") + message(STATUS "Protobuf compiler not found. Switching off sofie option") message(FATAL_ERROR "SOFIE cannot be installed without Protobuf") endif() endif() @@ -61,30 +64,57 @@ else() endif() set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) -set(CMAKE_INSTALL_BINDIR "bin" CACHE PATH "user executables (bin)") -set(CMAKE_INSTALL_INCLUDEDIR "include" CACHE PATH "header files") -set(CMAKE_INSTALL_LIBDIR "lib" CACHE PATH "libraries") if(ccache) set(CMAKE_C_COMPILER_LAUNCHER ccache) set(CMAKE_CXX_COMPILER_LAUNCHER ccache) endif() +option(testing "Build and run tests" OFF) if(testing) - find_package(GTest REQUIRED) + find_package(GTest REQUIRED) enable_testing() endif() +option(SOFIE_BENCHMARK "Build the SOFIE CUDA benchmark toolkit" OFF) + if(SOFIE_WITH_ROOT AND ROOT_FOUND) include(cmake/modules/RoottestMacros.cmake) else() include(cmake/modules/SofieTestMacros.cmake) endif() -add_subdirectory(src) +add_subdirectory(utils) +add_subdirectory(core) +add_subdirectory(parsers) -set(CMAKE_CXX_STANDARD 20) -set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_EXTENSIONS OFF) +if(SOFIE_BENCHMARK) + add_subdirectory(benchmark) +endif() + +# ── Install cmake package config files ────────────────────────────────────── + +configure_package_config_file( + ${CMAKE_CURRENT_SOURCE_DIR}/cmake/SOFIEConfig.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/SOFIEConfig.cmake + INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/SOFIE +) + +write_basic_package_version_file( + ${CMAKE_CURRENT_BINARY_DIR}/SOFIEConfigVersion.cmake + VERSION ${PROJECT_VERSION} + COMPATIBILITY AnyNewerVersion +) + +install( + EXPORT SOFIETargets + FILE SOFIETargets.cmake + NAMESPACE SOFIE:: + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/SOFIE +) -add_compile_options(-std=c++20) +install(FILES + ${CMAKE_CURRENT_BINARY_DIR}/SOFIEConfig.cmake + ${CMAKE_CURRENT_BINARY_DIR}/SOFIEConfigVersion.cmake + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/SOFIE +) diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt new file mode 100644 index 0000000..794aede --- /dev/null +++ b/benchmark/CMakeLists.txt @@ -0,0 +1,316 @@ +cmake_minimum_required(VERSION 3.18) +include(FetchContent) + +################################################################################ +# SOFIE Alpaka Benchmark Toolkit +# +# Usage: +# cmake -Bbuild -DSOFIE_BENCHMARK=ON . +# cmake --build build --target sofie_benchmark +# cd build/benchmark && ./sofie_benchmark [options] +# +# To also benchmark with ONNX Runtime GPU: +# cmake -Bbuild -DSOFIE_BENCHMARK=ON -DSOFIE_BENCHMARK_ORT=ON \ +# [-DONNXRUNTIME_ROOT=/usr/local/onnxruntime] . +# ./sofie_benchmark --onnxruntime +# +# Place .onnx models in benchmark/models/ and re-run cmake to register them. +################################################################################ + +option(SOFIE_BENCHMARK_ORT + "Also benchmark ONNX Runtime GPU alongside SOFIE (requires ORT ≥ 1.18)" + OFF) + +include_directories( + ${CMAKE_CURRENT_SOURCE_DIR}/../core/inc + ${CMAKE_CURRENT_SOURCE_DIR}/../parsers/inc +) + +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +################################################################################ +# Discover models +################################################################################ + +file(GLOB BENCHMARK_ONNX_MODELS + "${CMAKE_CURRENT_SOURCE_DIR}/models/*.onnx") + +if(NOT BENCHMARK_ONNX_MODELS) + message(STATUS + "SOFIE Benchmark: No .onnx models found in benchmark/models/. " + "Add ONNX models there and re-run cmake to enable benchmarking.") + return() +endif() + +list(LENGTH BENCHMARK_ONNX_MODELS N_MODELS) +message(STATUS "SOFIE Benchmark: Found ${N_MODELS} model(s) in benchmark/models/") + +################################################################################ +# Fetch Alpaka and sofieBLAS (same pinned revisions as the test suite) +################################################################################ + +FetchContent_Declare( + sofieBLAS + GIT_REPOSITORY https://github.com/ML4EP/sofieBLAS + GIT_TAG dev +) +FetchContent_MakeAvailable(sofieBLAS) + +FetchContent_Declare( + alpaka + GIT_REPOSITORY https://github.com/alpaka-group/alpaka + GIT_TAG 2fa91a34ed11b2076e474c5507d920e85cf9b79d +) +FetchContent_MakeAvailable(alpaka) + +################################################################################ +# CUDA (same as the test suite) +################################################################################ + +enable_language(CUDA) +find_package(CUDAToolkit REQUIRED) +message(STATUS "SOFIE Benchmark: CUDA backend (${CUDAToolkit_VERSION})") + +################################################################################ +# Optional: ONNX Runtime GPU backend +################################################################################ + +set(SOFIE_ORT_FOUND FALSE) + +if(SOFIE_BENCHMARK_ORT) + # Prefer manual detection — the installed ORT CMake config may reference + # a wrong lib path (e.g. lib64 vs lib) and raise a hard error even with QUIET. + # If ONNXRUNTIME_ROOT is provided, go straight to the manual path. + # Otherwise attempt the CMake config with NO_DEFAULT_PATH so it only looks + # where we tell it, and fall through to manual on failure. + + set(_ort_search_roots "") + if(DEFINED ONNXRUNTIME_ROOT) + list(APPEND _ort_search_roots "${ONNXRUNTIME_ROOT}") + endif() + list(APPEND _ort_search_roots + /usr/local/onnxruntime /usr/local /usr /opt) + + # Manual header + library search (reliable, no broken cmake-config risk) + find_path(ONNXRUNTIME_INCLUDE_DIR + NAMES onnxruntime_cxx_api.h + PATHS ${_ort_search_roots} + PATH_SUFFIXES include include/onnxruntime + NO_DEFAULT_PATH) + + find_library(ONNXRUNTIME_LIBRARY + NAMES onnxruntime + PATHS ${_ort_search_roots} + PATH_SUFFIXES lib lib64 + NO_DEFAULT_PATH) + + if(ONNXRUNTIME_INCLUDE_DIR AND ONNXRUNTIME_LIBRARY) + set(SOFIE_ORT_FOUND TRUE) + add_library(onnxruntime::onnxruntime SHARED IMPORTED) + set_target_properties(onnxruntime::onnxruntime PROPERTIES + IMPORTED_LOCATION "${ONNXRUNTIME_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${ONNXRUNTIME_INCLUDE_DIR}") + set(SOFIE_ORT_TARGET onnxruntime::onnxruntime) + message(STATUS "SOFIE Benchmark: ONNX Runtime found — ${ONNXRUNTIME_LIBRARY}") + message(STATUS "SOFIE Benchmark: ORT headers — ${ONNXRUNTIME_INCLUDE_DIR}") + else() + message(WARNING + "SOFIE Benchmark: SOFIE_BENCHMARK_ORT=ON but ONNX Runtime " + "not found. Set -DONNXRUNTIME_ROOT= or install ORT. " + "ORT benchmarking will be disabled.") + endif() +endif() + +if(SOFIE_BENCHMARK_ORT AND NOT SOFIE_ORT_FOUND) + message(STATUS "SOFIE Benchmark: ORT benchmarking disabled (library not found)") +endif() + +################################################################################ +# Build per-model strings for configure_file +################################################################################ + +set(_EMIT_BLOCK +"try {\n\ + EmitBenchmarkModel(\"@1@\", \"@2@\", outDir);\n\ +} catch (const std::exception &e) {\n\ + std::cerr << \"[ERROR] @2@: \" << e.what() << \"\\n\";\n\ + ++failures;\n\ +} catch (...) {\n\ + std::cerr << \"[ERROR] @2@: unknown exception\\n\";\n\ + ++failures;\n\ +}\n\ +") + +set(_RUN_BLOCK +" Benchmark_@3@(warmup, iterations, weightsDir);\n\ +") + +# ORT call: passes the full ONNX path directly (no SOFIE weights needed) +set(_ORT_RUN_BLOCK +"#ifdef SOFIE_BENCHMARK_ORT\n\ + if (run_ort) BenchmarkORT_GPU(\"@1@\", \"@2@\", warmup, iterations);\n\ +#endif\n\ +") + +set(BENCHMARK_EMIT_CAPTURES "") +set(BENCHMARK_BENCH_HEADERS "") +set(BENCHMARK_RUN_CALLS "") +set(GENERATED_HEADERS "") + +foreach(ONNX_FILE ${BENCHMARK_ONNX_MODELS}) + get_filename_component(MODEL_NAME "${ONNX_FILE}" NAME_WE) + + string(REGEX REPLACE "[^A-Za-z0-9]" "_" MODEL_CPPNAME "${MODEL_NAME}") + + set(GEN_HXX "${CMAKE_CURRENT_BINARY_DIR}/${MODEL_NAME}_GPU_ALPAKA.hxx") + set(GEN_BENCH "${CMAKE_CURRENT_BINARY_DIR}/${MODEL_NAME}_bench.hxx") + list(APPEND GENERATED_HEADERS "${GEN_HXX}" "${GEN_BENCH}") + + string(REPLACE "@1@" "${ONNX_FILE}" _emit_cap "${_EMIT_BLOCK}") + string(REPLACE "@2@" "${MODEL_NAME}" _emit_cap "${_emit_cap}") + string(APPEND BENCHMARK_EMIT_CAPTURES "${_emit_cap}") + + # SOFIE Alpaka call + string(REPLACE "@3@" "${MODEL_CPPNAME}" _run_cap "${_RUN_BLOCK}") + string(APPEND BENCHMARK_RUN_CALLS "${_run_cap}") + + # ORT call (guarded by #ifdef at compile time + run_ort flag at runtime) + string(REPLACE "@1@" "${ONNX_FILE}" _ort_cap "${_ORT_RUN_BLOCK}") + string(REPLACE "@2@" "${MODEL_NAME}" _ort_cap "${_ort_cap}") + string(APPEND BENCHMARK_RUN_CALLS "${_ort_cap}") + + string(APPEND BENCHMARK_BENCH_HEADERS + "#include \"${MODEL_NAME}_bench.hxx\"\n") +endforeach() + +################################################################################ +# Configure emitter and runner sources +################################################################################ + +configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/BenchmarkEmitter.cxx.in" + "${CMAKE_CURRENT_BINARY_DIR}/BenchmarkEmitter_all.cxx" + @ONLY +) + +set(RUNNER_SRC "${CMAKE_CURRENT_BINARY_DIR}/BenchmarkRunner_all.cu") +configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/BenchmarkRunner.cxx.in" + "${RUNNER_SRC}" + @ONLY +) + +################################################################################ +# Emitter executable (plain C++, generates SOFIE headers at build time) +################################################################################ + +add_executable(sofie_benchmark_emitter + "${CMAKE_CURRENT_BINARY_DIR}/BenchmarkEmitter_all.cxx" +) + +target_include_directories(sofie_benchmark_emitter PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/../core/inc" + "${CMAKE_CURRENT_SOURCE_DIR}/../parsers/inc" +) + +target_link_libraries(sofie_benchmark_emitter PRIVATE + SOFIE_core + SOFIE_parsers + protobuf::libprotobuf +) + +target_compile_options(sofie_benchmark_emitter PRIVATE + -Wno-unused-parameter + -Wno-array-bounds +) + +################################################################################ +# Custom command: run emitter → generate inference + benchmark headers +################################################################################ + +add_custom_command( + OUTPUT ${GENERATED_HEADERS} + COMMAND "${CMAKE_COMMAND}" -E make_directory "${CMAKE_CURRENT_BINARY_DIR}" + COMMAND "$" "${CMAKE_CURRENT_BINARY_DIR}" + DEPENDS sofie_benchmark_emitter ${BENCHMARK_ONNX_MODELS} + WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" + COMMENT "SOFIE Benchmark: generating headers for ${N_MODELS} model(s)..." + VERBATIM +) + +add_custom_target(sofie_benchmark_headers + DEPENDS ${GENERATED_HEADERS} +) + +################################################################################ +# Benchmark runner (compiled as .cu, same as the test suite) +################################################################################ + +set_source_files_properties("${RUNNER_SRC}" PROPERTIES LANGUAGE CUDA) + +add_executable(sofie_benchmark "${RUNNER_SRC}") + +add_dependencies(sofie_benchmark sofie_benchmark_headers) + +target_include_directories(sofie_benchmark PRIVATE + "${CMAKE_CURRENT_BINARY_DIR}" # generated headers live here + "${CMAKE_CURRENT_SOURCE_DIR}/src" # ONNXRuntimeBenchmark.hxx + "${alpaka_SOURCE_DIR}/include" + "${sofieblas_SOURCE_DIR}/include" + "${CUDAToolkit_INCLUDE_DIRS}" +) + +set_target_properties(sofie_benchmark PROPERTIES + CUDA_SEPARABLE_COMPILATION OFF + CUDA_ARCHITECTURES "70;75;80;86;89;90" + CUDA_STANDARD 20 + CUDA_STANDARD_REQUIRED ON + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" +) + +target_compile_definitions(sofie_benchmark PRIVATE + ALPAKA_ACC_GPU_CUDA_ENABLED + ALPAKA_HAS_STD_ATOMIC_REF + $<$:SOFIE_BENCHMARK_ORT> +) + +target_compile_options(sofie_benchmark PRIVATE + $<$: + --extended-lambda + --expt-relaxed-constexpr + --use_fast_math + -O2 + -Wno-deprecated-gpu-targets + > + $<$: + -O2 + -fPIC + > +) + +target_link_libraries(sofie_benchmark PRIVATE + SOFIE_core + CUDA::cudart + CUDA::cublas + CUDA::cublasLt + $<$:${SOFIE_ORT_TARGET}> +) + +if(SOFIE_ORT_FOUND) + message(STATUS "SOFIE Benchmark: target 'sofie_benchmark' configured " + "(${N_MODELS} model(s), CUDA backend + ORT-GPU)") +else() + message(STATUS "SOFIE Benchmark: target 'sofie_benchmark' configured " + "(${N_MODELS} model(s), CUDA backend; " + "re-configure with -DSOFIE_BENCHMARK_ORT=ON for ORT comparison)") +endif() + +# Convenience CTest entry +if(testing) + add_test( + NAME SofieBenchmark + COMMAND sofie_benchmark --warmup 5 --iterations 20 + WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" + ) +endif() diff --git a/benchmark/README.md b/benchmark/README.md new file mode 100644 index 0000000..1a9a0f5 --- /dev/null +++ b/benchmark/README.md @@ -0,0 +1,95 @@ +# SOFIE Alpaka Benchmark Toolkit + +Measures **inference latency and throughput** for ONNX models compiled by SOFIE and +executed via Alpaka (CUDA backend). Optionally runs the same models through +**ONNX Runtime GPU** for a side-by-side comparison. + +--- + +## Quick Start + +### 1. Add your models + +``` +benchmark/models/ + GNN_model.onnx + simple_transformer.onnx + resnet50.onnx + ... +``` + +Re-run CMake after adding or removing files (it globs `models/*.onnx`). + +### 2. Configure + +```bash +# SOFIE inference only (default) +cmake -B build -DSOFIE_BENCHMARK=ON /path/to/SOFIE + +# With ONNX Runtime GPU comparison +cmake -B build \ + -DSOFIE_BENCHMARK=ON \ + -DSOFIE_BENCHMARK_ORT=ON \ + -DONNXRUNTIME_ROOT=/path/to/onnxruntime \ + /path/to/SOFIE +``` + +| CMake flag | Default | Description | +|---|---|---| +| `-DSOFIE_BENCHMARK=ON` | — | Enable the benchmark suite | +| `-DSOFIE_BENCHMARK_ORT=ON` | `OFF` | Also benchmark ONNX Runtime GPU | +| `-DONNXRUNTIME_ROOT=` | — | Hint for finding ORT headers/library | + +> **Tested with ONNX Runtime 1.22.0 GPU** +> (`onnxruntime-linux-x64-gpu-1.22.0`). The CMake config bundled with some ORT +> installations may reference an incorrect `lib64/` path — this toolkit uses manual +> header/library detection to avoid that. + +### 3. Build + +```bash +cmake --build build --target sofie_benchmark -j$(nproc) +``` + +This automatically: +1. Builds **`sofie_benchmark_emitter`** — parses each `.onnx` and emits: + - `_GPU_ALPAKA.hxx` — SOFIE CUDA/Alpaka inference code + - `_GPU_ALPAKA.dat` — serialized weights + - `_bench.hxx` — timing wrapper `Benchmark_()` +2. Builds **`sofie_benchmark`** — compiles all generated code as `.cu` and links the + timing loop. + +### 4. Run + +```bash +cd build/benchmark + +# SOFIE only (no ORT needed at runtime) +./sofie_benchmark + +# SOFIE + ONNX Runtime GPU comparison +LD_LIBRARY_PATH=/path/to/onnxruntime/lib:$LD_LIBRARY_PATH \ +./sofie_benchmark --onnxruntime +``` + +--- + +## Runtime Options + +| Flag | Default | Description | +|------|---------|-------------| +| `--warmup, -w ` | 10 | Warm-up iterations (not timed) | +| `--iterations, -n ` | 100 | Timed iterations | +| `--weights-dir ` | `.` | Directory containing `.dat` weight files | +| `--onnxruntime, --ort` | off | Run ONNX Runtime GPU benchmark after each SOFIE model | +| `--help, -h` | | Print this help and exit | + + +--- + +## Re-running after adding models + +```bash +cmake build # re-configure (re-globs) +cmake --build build --target sofie_benchmark -j$(nproc) # re-build +``` diff --git a/benchmark/models/.gitkeep b/benchmark/models/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/src/SOFIE_core/test/input_models/GNN_model.onnx b/benchmark/models/GNN_model.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/GNN_model.onnx rename to benchmark/models/GNN_model.onnx diff --git a/src/SOFIE_core/test/input_models/simple_transformer.onnx b/benchmark/models/simple_transformer.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/simple_transformer.onnx rename to benchmark/models/simple_transformer.onnx diff --git a/src/SOFIE_core/test/input_models/simple_transformer.onnx.data b/benchmark/models/simple_transformer.onnx.data similarity index 100% rename from src/SOFIE_core/test/input_models/simple_transformer.onnx.data rename to benchmark/models/simple_transformer.onnx.data diff --git a/benchmark/src/BenchmarkBackend.hxx b/benchmark/src/BenchmarkBackend.hxx new file mode 100644 index 0000000..6e7987e --- /dev/null +++ b/benchmark/src/BenchmarkBackend.hxx @@ -0,0 +1,37 @@ +#pragma once +// Backend type aliases — selected at compile time by CMake via -DSOFIE_BACKEND_* +// The runner and every generated meta-header use these aliases so they +// stay free of any backend-specific API (cuda_runtime.h, hip_runtime.h, …). + +#include + +namespace sofie_bench { + +using Idx = std::size_t; +using Dim1 = alpaka::DimInt<1>; +using Ext1 = alpaka::Vec; + +#if defined(SOFIE_BACKEND_CUDA) + + using AccTag = alpaka::TagGpuCudaRt; + using Platform = alpaka::PlatformCudaRt; + using Device = alpaka::DevCudaRt; + using Queue = alpaka::Queue; + +#elif defined(SOFIE_BACKEND_HIP) + + using AccTag = alpaka::TagGpuHipRt; + using Platform = alpaka::PlatformHipRt; + using Device = alpaka::DevHipRt; + using Queue = alpaka::Queue; + +#else // CPU serial (default / fallback) + + using AccTag = alpaka::TagCpuSerial; + using Platform = alpaka::PlatformCpu; + using Device = alpaka::DevCpu; + using Queue = alpaka::Queue; + +#endif + +} // namespace sofie_bench diff --git a/benchmark/src/BenchmarkEmitter.cxx.in b/benchmark/src/BenchmarkEmitter.cxx.in new file mode 100644 index 0000000..cc02357 --- /dev/null +++ b/benchmark/src/BenchmarkEmitter.cxx.in @@ -0,0 +1,220 @@ +// SOFIE Benchmark Emitter +// Auto-configured by CMake — do not edit directly. +// For each .onnx model in benchmark/models/ this binary generates: +// _GPU_ALPAKA.hxx — SOFIE inference code +// _GPU_ALPAKA.dat — serialized weights +// _bench.hxx — timing function, following the same +// pattern as the unit tests + +#include "SOFIE/RModel_Base.hxx" +#include "SOFIE/RModel.hxx" +#include "SOFIE/RModelParser_ONNX.hxx" + +#include +#include +#include +#include +#include +#include + +using namespace SOFIE; + +// Resolve a single Dim to a concrete size; dynamic dims default to 1. +static size_t resolveDim(const Dim &d) { + return (d.dim > 0) ? static_cast(d.dim) : 1u; +} + +static int EmitBenchmarkModel(const std::string &onnxPath, + const std::string &modelName, + const std::string &outDir) +{ + std::cout << "[Benchmark] Processing: " << onnxPath << "\n"; + + RModelParser_ONNX parser; + RModel model = parser.Parse(onnxPath); + + const auto &inputNames = model.GetInputTensorNames(); + if (inputNames.empty()) { + std::cerr << "[WARN] " << modelName << " has no inputs – skipping.\n"; + return 1; + } + + // Map SOFIE tensor type to C++ type string + auto tensorTypeToCpp = [](ETensorType t) -> std::string { + switch (t) { + case ETensorType::FLOAT: return "float"; + case ETensorType::DOUBLE: return "double"; + case ETensorType::INT32: return "int32_t"; + case ETensorType::INT64: return "int64_t"; + case ETensorType::UINT8: return "uint8_t"; + case ETensorType::INT8: return "int8_t"; + case ETensorType::UINT16: return "uint16_t"; + case ETensorType::INT16: return "int16_t"; + case ETensorType::UINT32: return "uint32_t"; + case ETensorType::UINT64: return "uint64_t"; + case ETensorType::BOOL: return "uint8_t"; + default: return "float"; + } + }; + + // Collect input metadata before code generation + struct InputMeta { + std::string cppType; + size_t numElements; + }; + std::vector inputs; + for (const auto &n : inputNames) { + InputMeta m; + try { m.cppType = tensorTypeToCpp(model.GetTensorType(n)); } + catch (...) { m.cppType = "float"; } + m.numElements = 1; + try { + for (const auto &d : model.GetDimTensorShape(n)) + m.numElements *= resolveDim(d); + } catch (...) {} + inputs.push_back(m); + } + + // Generate SOFIE GPU/Alpaka inference code + model.GenerateGPU_ALPAKA(); + + std::string hxxPath = outDir + "/" + modelName + "_GPU_ALPAKA.hxx"; + std::string benchPath = outDir + "/" + modelName + "_bench.hxx"; + + model.OutputGenerated(hxxPath); + + // Sanitize model name into a valid C++ identifier + std::string cppName = modelName; + for (char &c : cppName) + if (!std::isalnum(static_cast(c))) c = '_'; + + // Build "session.infer(input_d_0, input_d_1, ...)" + std::ostringstream inferCall; + inferCall << "session.infer("; + for (size_t i = 0; i < inputs.size(); ++i) { + if (i) inferCall << ", "; + inferCall << "input_d_" << i; + } + inferCall << ")"; + + // ----------------------------------------------------------------------- + // Write _bench.hxx — follows the exact same pattern as the unit tests + // in TestCustomModelsFromONNXForAlpakaCuda.cxx, with timing added. + // ----------------------------------------------------------------------- + std::ofstream bench(benchPath); + if (!bench.is_open()) { + std::cerr << "[ERROR] Cannot open " << benchPath << "\n"; + return 1; + } + + bench + << "// Auto-generated benchmark for model: " << modelName << "\n" + << "// DO NOT EDIT — regenerated by the SOFIE benchmark emitter.\n" + << "#pragma once\n\n" + << "#include \"" << modelName << "_GPU_ALPAKA.hxx\"\n" + << "#include \n" + << "#include \n" + << "#include \n" + << "#include \n" + << "#include \n" + << "#include \n\n" + << "inline void Benchmark_" << cppName + << "(int warmup, int iterations, const std::string& weightsDir) {\n" + << " using Idx = std::size_t;\n" + << " using Dim1 = alpaka::DimInt<1>;\n" + << " using Ext1 = alpaka::Vec;\n\n" + << " // ---- Device/host setup (mirrors unit-test pattern) ----\n" + << " alpaka::PlatformCpu hostPlatform{};\n" + << " auto host = alpaka::getDevByIdx(hostPlatform, 0u);\n" + << " alpaka::PlatformCudaRt platform{};\n" + << " auto device = alpaka::getDevByIdx(platform, 0u);\n" + << " alpaka::Queue queue{device};\n\n" + << " std::mt19937 rng(42);\n" + << " std::uniform_real_distribution fdist(-1.0f, 1.0f);\n\n"; + + // Allocate host buffers, fill with data, allocate device buffers, copy + for (size_t i = 0; i < inputs.size(); ++i) { + const std::string &T = inputs[i].cppType; + const size_t N = inputs[i].numElements; + bench + << " // Input " << i << ": " << T << "[" << N << "]\n" + << " auto input_h_" << i << " = alpaka::allocBuf<" << T + << ", Idx>(host, Ext1::all(Idx{" << N << "}));\n" + << " {\n" + << " auto *p = reinterpret_cast<" << T + << "*>(alpaka::getPtrNative(input_h_" << i << "));\n"; + if (T == "float" || T == "double") { + bench + << " for (size_t k = 0; k < " << N + << "; ++k) p[k] = static_cast<" << T << ">(fdist(rng));\n"; + } else { + // Integer/index tensors: fill with zeros so any index value + // is a safe, in-bounds reference into the data arrays. + bench + << " std::fill(p, p + " << N + << ", static_cast<" << T << ">(0));\n"; + } + bench + << " }\n" + << " auto input_d_" << i << " = alpaka::allocBuf<" << T + << ", Idx>(device, Ext1::all(Idx{" << N << "}));\n" + << " alpaka::memcpy(queue, input_d_" << i + << ", input_h_" << i << ");\n\n"; + } + bench << " alpaka::wait(queue);\n\n"; + + // Create session (loads weights from .dat file) + bench + << " // ---- Create session (loads weights) ----\n" + << " std::string weightFile = weightsDir + \"/" + << modelName << "_GPU_ALPAKA.dat\";\n" + << " SOFIE_" << cppName + << "::Session session(weightFile);\n\n"; + + // Warmup (not timed) + bench + << " // ---- Warmup ----\n" + << " for (int w = 0; w < warmup; ++w)\n" + << " " << inferCall.str() << ";\n" + << " alpaka::wait(session.queue);\n" + << " cudaDeviceSynchronize();\n\n"; + + // Timed loop + bench + << " // ---- Timed benchmark ----\n" + << " auto t0 = std::chrono::high_resolution_clock::now();\n" + << " for (int _i = 0; _i < iterations; ++_i)\n" + << " " << inferCall.str() << ";\n" + << " alpaka::wait(session.queue);\n" + << " cudaDeviceSynchronize();\n" + << " auto t1 = std::chrono::high_resolution_clock::now();\n\n" + << " double avg_ms = std::chrono::duration" + "(t1 - t0).count() / iterations;\n" + << " double throughput = (avg_ms > 0.0) ? 1000.0 / avg_ms : 0.0;\n" + << " std::printf(\"%-30s avg %8.4f ms (%8.1f inf/s)\\n\",\n" + << " \"" << modelName << "\", avg_ms, throughput);\n" + << "}\n"; + + bench.close(); + + std::cout << "[Benchmark] Wrote: " << hxxPath << "\n" + << " Wrote: " << benchPath << "\n"; + return 0; +} + +// =========================================================================== +// main() — one EmitBenchmarkModel call per model, injected by CMake +// =========================================================================== +int main(int argc, char *argv[]) { + if (argc < 2) { + std::cerr << "Usage: sofie_benchmark_emitter \n"; + return 1; + } + std::string outDir = argv[1]; + int failures = 0; + +@BENCHMARK_EMIT_CAPTURES@ + + std::cout << "[Benchmark Emitter] Done — " << failures << " failure(s).\n"; + return failures == 0 ? 0 : 1; +} diff --git a/benchmark/src/BenchmarkRunner.cxx.in b/benchmark/src/BenchmarkRunner.cxx.in new file mode 100644 index 0000000..a0a4d61 --- /dev/null +++ b/benchmark/src/BenchmarkRunner.cxx.in @@ -0,0 +1,71 @@ +// SOFIE Alpaka Benchmark Runner +// Auto-configured by CMake — do not edit directly. +// Compiled as .cu so NVCC can compile the generated SOFIE CUDA kernels. + +#include +#include +#include +#include +#include + +// Per-model benchmark functions (generated by the emitter, one per .onnx file) +@BENCHMARK_BENCH_HEADERS@ + +// Optional ONNX Runtime GPU comparison +#ifdef SOFIE_BENCHMARK_ORT +#include "ONNXRuntimeBenchmark.hxx" +#endif + +int main(int argc, char *argv[]) { + int warmup = 10; + int iterations = 100; + std::string weightsDir = "."; + bool run_ort = false; // opt-in via --onnxruntime + + for (int i = 1; i < argc; ++i) { + std::string a = argv[i]; + if ((a == "--warmup" || a == "-w") && i + 1 < argc) warmup = std::stoi(argv[++i]); + else if ((a == "--iterations" || a == "-n") && i + 1 < argc) iterations = std::stoi(argv[++i]); + else if (a == "--weights-dir" && i + 1 < argc) weightsDir = argv[++i]; + else if (a == "--onnxruntime" || a == "--ort") run_ort = true; + else if (a == "--help" || a == "-h") { + std::cout << + "Usage: sofie_benchmark [options]\n" + " --warmup, -w Warmup iterations (default: 10)\n" + " --iterations, -n Timed iterations (default: 100)\n" + " --weights-dir

SOFIE .dat weight files (default: .)\n" +#ifdef SOFIE_BENCHMARK_ORT + " --onnxruntime, --ort Also run ONNX Runtime GPU comparison\n" +#else + " --onnxruntime, --ort (not available; rebuild with -DSOFIE_BENCHMARK_ORT=ON)\n" +#endif + ; + return 0; + } + } + +#ifndef SOFIE_BENCHMARK_ORT + if (run_ort) { + std::fprintf(stderr, + "Warning: --onnxruntime requested but this binary was built without " + "ORT support.\n Rebuild with -DSOFIE_BENCHMARK_ORT=ON.\n"); + run_ort = false; + } +#endif + + std::printf("=== SOFIE Alpaka Benchmark ===\n"); + std::printf("Warmup: %d | Iterations: %d", warmup, iterations); +#ifdef SOFIE_BENCHMARK_ORT + if (run_ort) std::printf(" | ORT-GPU comparison: ON"); +#endif + std::printf("\n\n"); + + std::printf("%-30s %18s %16s\n", "Model", "Avg (ms)", "Throughput (inf/s)"); + std::printf("%s\n", std::string(70, '-').c_str()); + + // One Benchmark_() call per discovered model, + // immediately followed by the optional BenchmarkORT_GPU() call. +@BENCHMARK_RUN_CALLS@ + + return 0; +} diff --git a/benchmark/src/BenchmarkUtils.hxx b/benchmark/src/BenchmarkUtils.hxx new file mode 100644 index 0000000..26a4546 --- /dev/null +++ b/benchmark/src/BenchmarkUtils.hxx @@ -0,0 +1,168 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace sofie_bench { + +struct BenchmarkConfig { + int warmupIter = 10; + int benchIter = 100; + int deviceId = 0; + float tolerance = 1e-3f; + bool validateOrt = false; + std::string weightsDir = "."; + bool csvOutput = false; + bool verbose = false; +}; + +struct BenchmarkResult { + std::string modelName; + size_t inputElements = 0; + size_t outputElements = 0; + float avgInferMs = 0.0f; // per-inference average (chrono) + float throughput = 0.0f; // inferences / second + float weightMemMB = 0.0f; // device memory for model weights (if measurable) + float runtimeMemMB = 0.0f; // device memory for intermediates + bool ortRan = false; + bool ortMatch = false; + float ortMaxDiff = -1.0f; + bool skipped = false; + std::string skipReason; +}; + +inline BenchmarkConfig ParseArgs(int argc, char *argv[]) { + BenchmarkConfig cfg; + for (int i = 1; i < argc; ++i) { + std::string arg = argv[i]; + if ((arg == "--warmup" || arg == "-w") && i + 1 < argc) + cfg.warmupIter = std::stoi(argv[++i]); + else if ((arg == "--iterations" || arg == "-n") && i + 1 < argc) + cfg.benchIter = std::stoi(argv[++i]); + else if ((arg == "--device" || arg == "-d") && i + 1 < argc) + cfg.deviceId = std::stoi(argv[++i]); + else if ((arg == "--tolerance" || arg == "-t") && i + 1 < argc) + cfg.tolerance = std::stof(argv[++i]); + else if (arg == "--validate-ort") + cfg.validateOrt = true; + else if ((arg == "--weights-dir") && i + 1 < argc) + cfg.weightsDir = argv[++i]; + else if (arg == "--csv") + cfg.csvOutput = true; + else if (arg == "--verbose" || arg == "-v") + cfg.verbose = true; + else if (arg == "--help" || arg == "-h") { + std::cout << "SOFIE Alpaka Benchmark\n\n" + << "Options:\n" + << " --warmup, -w Warmup iterations (default: 10)\n" + << " --iterations, -n Benchmark iterations (default: 100)\n" + << " --device, -d Device index (default: 0)\n" + << " --tolerance, -t ONNXRuntime diff tolerance (default: 1e-3)\n" + << " --validate-ort Compare SOFIE outputs to ONNXRuntime\n" + << " --weights-dir Directory containing .dat weight files (default: .)\n" + << " --csv Print results in CSV format\n" + << " --verbose, -v Verbose output\n"; + std::exit(0); + } + } + return cfg; +} + +// Print device name (caller obtains it via alpaka::getName(dev)) +inline void PrintDeviceInfo(const std::string &deviceName) { + std::cout << "Device: " << deviceName << "\n"; +} + +inline void PrintHeader(const BenchmarkConfig &cfg, const std::string &deviceName = "") { + std::cout << "\n=== SOFIE Alpaka Benchmark ===\n"; + if (!deviceName.empty()) + PrintDeviceInfo(deviceName); + std::cout << "Warmup: " << cfg.warmupIter + << " | Iterations: " << cfg.benchIter; + if (cfg.validateOrt) + std::cout << " | ONNXRuntime validation ON (tol=" << cfg.tolerance << ")"; + std::cout << "\n\n"; + + if (cfg.csvOutput) { + std::cout << "Model,InputElems,OutputElems,AvgInferMs,Throughput(inf/s)," + "WeightMem(MB),RuntimeMem(MB),OrtMatch,OrtMaxDiff\n"; + } else { + std::cout << std::left + << std::setw(30) << "Model" + << std::setw(12) << "Input" + << std::setw(12) << "Output" + << std::setw(14) << "Avg(ms)" + << std::setw(16) << "Throughput(i/s)" + << std::setw(12) << "ORT Check" + << "\n"; + std::cout << std::string(96, '-') << "\n"; + } +} + +inline void PrintResult(const BenchmarkResult &r, const BenchmarkConfig &cfg) { + if (r.skipped) { + if (!cfg.csvOutput) + std::cout << std::left << std::setw(30) << r.modelName + << " [SKIPPED: " << r.skipReason << "]\n"; + return; + } + + if (cfg.csvOutput) { + std::cout << r.modelName << "," + << r.inputElements << "," + << r.outputElements << "," + << std::fixed << std::setprecision(4) << r.avgInferMs << "," + << std::fixed << std::setprecision(1) << r.throughput << "," + << std::fixed << std::setprecision(2) << r.weightMemMB << "," + << std::fixed << std::setprecision(2) << r.runtimeMemMB << ","; + if (r.ortRan) + std::cout << (r.ortMatch ? "PASS" : "FAIL") << "," << r.ortMaxDiff; + else + std::cout << "N/A,N/A"; + std::cout << "\n"; + } else { + std::string ortStr = "N/A"; + if (r.ortRan) { + std::ostringstream oss; + oss << (r.ortMatch ? "PASS" : "FAIL") + << "(d=" << std::scientific << std::setprecision(1) << r.ortMaxDiff << ")"; + ortStr = oss.str(); + } + std::cout << std::left + << std::setw(30) << r.modelName + << std::setw(12) << r.inputElements + << std::setw(12) << r.outputElements + << std::setw(14) << std::fixed << std::setprecision(4) << r.avgInferMs + << std::setw(16) << std::fixed << std::setprecision(1) << r.throughput + << std::setw(12) << ortStr + << "\n"; + } +} + +inline void PrintSummary(const std::vector &results, const BenchmarkConfig &cfg) { + if (cfg.csvOutput) return; + + std::cout << "\n" << std::string(96, '=') << "\n"; + int ran = 0, skipped = 0, ortFail = 0; + float totalMs = 0.0f; + for (const auto &r : results) { + if (r.skipped) { ++skipped; continue; } + ++ran; + totalMs += r.avgInferMs; + if (r.ortRan && !r.ortMatch) ++ortFail; + } + std::cout << "Summary: " << ran << " model(s) benchmarked"; + if (skipped) std::cout << ", " << skipped << " skipped"; + if (ran > 0) std::cout << ", avg inference " << std::fixed << std::setprecision(4) << (totalMs / ran) << " ms"; + if (ortFail) std::cout << ", " << ortFail << " ORT mismatch(es)"; + std::cout << "\n"; +} + +} // namespace sofie_bench diff --git a/benchmark/src/ONNXRuntimeBenchmark.hxx b/benchmark/src/ONNXRuntimeBenchmark.hxx new file mode 100644 index 0000000..4831c02 --- /dev/null +++ b/benchmark/src/ONNXRuntimeBenchmark.hxx @@ -0,0 +1,231 @@ +// SOFIE Benchmark — ONNX Runtime GPU backend +// Generic benchmark: loads any ONNX model, introspects shapes, runs with the +// CUDA ExecutionProvider. Float inputs are filled with uniform random values; +// integer inputs are zeroed (safe for index tensors like edge_index). +// +// Data stays on the HOST side of the ORT API (ORT handles H↔D transfers +// internally) — this measures end-to-end latency from the application's +// perspective. Use the optional IOBinding path (--ort-device-io, WIP) to +// measure pure GPU compute time comparable to the SOFIE numbers. +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +// ── helpers ────────────────────────────────────────────────────────────────── + +namespace sofie_ort_bench_detail { + +/// Total element count from a shape vector (-1 dynamic dims are treated as 1). +inline std::size_t shapeToSize(const std::vector& shape) { + std::size_t n = 1; + for (auto d : shape) n *= (d > 0 ? static_cast(d) : 1u); + return n; +} + +/// Human-readable ORT element-type name. +inline const char* ortTypeName(ONNXTensorElementDataType t) { + switch (t) { + case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT: return "float32"; + case ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE: return "float64"; + case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32: return "int32"; + case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64: return "int64"; + case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8: return "uint8"; + case ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL: return "bool"; + default: return "other"; + } +} + +} // namespace sofie_ort_bench_detail + +// ── main benchmark function ─────────────────────────────────────────────────── + +/// Run @p model_path through ONNX Runtime's CUDAExecutionProvider. +/// Results are printed in the same table format as the SOFIE Alpaka benchmark. +/// +/// @param model_path Full path to the .onnx file. +/// @param model_name Display name shown in the table (typically the stem). +/// @param warmup Number of warm-up iterations (not timed). +/// @param iterations Number of timed iterations. +/// @param device_id CUDA device index (default 0). +/// @param verbose If true, print per-input shape/type information. +inline void BenchmarkORT_GPU(const std::string& model_path, + const std::string& model_name, + int warmup, + int iterations, + int device_id = 0, + bool verbose = false) +{ + using namespace sofie_ort_bench_detail; + + // ── ORT session setup ──────────────────────────────────────────────────── + Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "sofie_ort_bench"); + + Ort::SessionOptions opts; + opts.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); + opts.SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL); + + OrtCUDAProviderOptions cuda_opts{}; + cuda_opts.device_id = device_id; + cuda_opts.arena_extend_strategy = 0; // kNextPowerOfTwo + cuda_opts.gpu_mem_limit = SIZE_MAX; + cuda_opts.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchExhaustive; + cuda_opts.do_copy_in_default_stream = 1; + opts.AppendExecutionProvider_CUDA(cuda_opts); + + Ort::Session session(env, model_path.c_str(), opts); + Ort::AllocatorWithDefaultOptions alloc; + Ort::MemoryInfo mem_cpu = + Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); + + // ── introspect inputs ───────────────────────────────────────────────────── + const std::size_t num_inputs = session.GetInputCount(); + + std::vector input_names_str(num_inputs); + std::vector input_names_ptr(num_inputs); + std::vector> input_shapes(num_inputs); + std::vector input_types(num_inputs); + + // backing storage — one allocation per input + std::vector> float_data(num_inputs); + std::vector> double_data(num_inputs); + std::vector> int64_data(num_inputs); + std::vector> int32_data(num_inputs); + std::vector> uint8_data(num_inputs); + // Note: bool_data uses uint8_t storage; pointer is cast to bool* for CreateTensor + // (sizeof(bool)==sizeof(uint8_t)==1 on all supported platforms) + + std::mt19937 rng(42); + std::uniform_real_distribution fdist(-1.f, 1.f); + + std::vector input_tensors; + input_tensors.reserve(num_inputs); + + for (std::size_t i = 0; i < num_inputs; ++i) { + // name + auto name_ptr = session.GetInputNameAllocated(i, alloc); + input_names_str[i] = name_ptr.get(); + input_names_ptr[i] = input_names_str[i].c_str(); + + // type + shape + auto info = session.GetInputTypeInfo(i); + auto tinfo = info.GetTensorTypeAndShapeInfo(); + input_types[i] = tinfo.GetElementType(); + input_shapes[i] = tinfo.GetShape(); + + // replace dynamic dims (-1) with 1 for benchmarking + for (auto& d : input_shapes[i]) if (d < 0) d = 1; + + std::size_t n = shapeToSize(input_shapes[i]); + + if (verbose) { + std::printf(" Input %-2zu %-20s type=%-8s numel=%zu\n", + i, input_names_str[i].c_str(), + ortTypeName(input_types[i]), n); + } + + // fill data and create OrtValue + switch (input_types[i]) { + case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT: { + float_data[i].resize(n); + for (auto& v : float_data[i]) v = fdist(rng); + input_tensors.push_back(Ort::Value::CreateTensor( + mem_cpu, float_data[i].data(), n, + input_shapes[i].data(), input_shapes[i].size())); + break; + } + case ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE: { + double_data[i].resize(n, 0.0); + for (auto& v : double_data[i]) + v = static_cast(fdist(rng)); + input_tensors.push_back(Ort::Value::CreateTensor( + mem_cpu, double_data[i].data(), n, + input_shapes[i].data(), input_shapes[i].size())); + break; + } + case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64: { + // Zero: safe for index tensors (edge_index, etc.) + int64_data[i].assign(n, 0); + input_tensors.push_back(Ort::Value::CreateTensor( + mem_cpu, int64_data[i].data(), n, + input_shapes[i].data(), input_shapes[i].size())); + break; + } + case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32: { + int32_data[i].assign(n, 0); + input_tensors.push_back(Ort::Value::CreateTensor( + mem_cpu, int32_data[i].data(), n, + input_shapes[i].data(), input_shapes[i].size())); + break; + } + case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8: { + uint8_data[i].assign(n, 0); + input_tensors.push_back(Ort::Value::CreateTensor( + mem_cpu, uint8_data[i].data(), n, + input_shapes[i].data(), input_shapes[i].size())); + break; + } + case ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL: { + // ORT requires bool* — use uint8_t backing (1 byte each, same size) + uint8_data[i].assign(n, 0); + input_tensors.push_back(Ort::Value::CreateTensor( + mem_cpu, + reinterpret_cast(uint8_data[i].data()), n, + input_shapes[i].data(), input_shapes[i].size())); + break; + } + default: + throw std::runtime_error( + std::string("BenchmarkORT_GPU: unsupported input type for ") + + input_names_str[i]); + } + } + + // ── output names ───────────────────────────────────────────────────────── + const std::size_t num_outputs = session.GetOutputCount(); + std::vector output_names_str(num_outputs); + std::vector output_names_ptr(num_outputs); + for (std::size_t i = 0; i < num_outputs; ++i) { + auto ptr = session.GetOutputNameAllocated(i, alloc); + output_names_str[i] = ptr.get(); + output_names_ptr[i] = output_names_str[i].c_str(); + } + + // build run-options that disable CPU fallback for a pure GPU measurement + Ort::RunOptions run_opts; + + // ── warm-up ────────────────────────────────────────────────────────────── + for (int w = 0; w < warmup; ++w) { + session.Run(run_opts, + input_names_ptr.data(), input_tensors.data(), num_inputs, + output_names_ptr.data(), num_outputs); + } + cudaDeviceSynchronize(); + + // ── timed run ───────────────────────────────────────────────────────────── + auto t0 = std::chrono::high_resolution_clock::now(); + for (int it = 0; it < iterations; ++it) { + session.Run(run_opts, + input_names_ptr.data(), input_tensors.data(), num_inputs, + output_names_ptr.data(), num_outputs); + } + cudaDeviceSynchronize(); + auto t1 = std::chrono::high_resolution_clock::now(); + + double avg_ms = std::chrono::duration(t1 - t0).count() + / iterations; + double throughput = (avg_ms > 0.0) ? 1000.0 / avg_ms : 0.0; + + // Print in the same table format, with "[ORT]" tag in the model column + std::string label = std::string(model_name) + " [ORT-GPU]"; + std::printf("%-30s avg %8.4f ms (%8.1f inf/s)\n", + label.c_str(), avg_ms, throughput); +} diff --git a/check_style.sh b/check_style.sh new file mode 100644 index 0000000..22a56e4 --- /dev/null +++ b/check_style.sh @@ -0,0 +1,37 @@ +#!/bin/bash +set -e + +# Directories +SRC_DIR="./include" +TEST_DIR="./tests" + +echo "📝 Discovering source/header files..." + +FILES=$(find "$SRC_DIR" "$TEST_DIR" \ + -path "$TEST_DIR/build" -prune -o \ + -type f \( \ + -name '*.cpp' -o -name '*.cc' -o -name '*.cxx' -o \ + -name '*.h' -o -name '*.hpp' -o -name '*.hxx' -o -name '*.hh' \ + \) -print) + +if [ -z "$FILES" ]; then + echo "⚠️ No files found to process." + exit 0 +fi + +echo "🎯 Files to check:" +echo "$FILES" + +echo "🎨 Running clang-format..." +for file in $FILES; do + echo "Formatting $file" + clang-format -i "$file" +done + +echo "🔍 Running clang-tidy..." +for file in $FILES; do + echo "Linting $file" + clang-tidy "$file" --extra-arg=-std=c++20 -- -I"$SRC_DIR" || true +done + +echo "✅ Formatting and linting complete." diff --git a/cmake/SOFIEConfig.cmake.in b/cmake/SOFIEConfig.cmake.in new file mode 100644 index 0000000..94ebc4a --- /dev/null +++ b/cmake/SOFIEConfig.cmake.in @@ -0,0 +1,13 @@ +@PACKAGE_INIT@ + +include(CMakeFindDependencyMacro) + +find_dependency(Protobuf) + +if(@SOFIE_WITH_ROOT@) + find_dependency(ROOT COMPONENTS Core TMVA Tree) +endif() + +include("${CMAKE_CURRENT_LIST_DIR}/SOFIETargets.cmake") + +check_required_components(SOFIE) diff --git a/src/SOFIE_core/CMakeLists.txt b/core/CMakeLists.txt similarity index 90% rename from src/SOFIE_core/CMakeLists.txt rename to core/CMakeLists.txt index a803b7f..36cf037 100644 --- a/src/SOFIE_core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -87,7 +87,10 @@ set(sources_cxx ) target_sources(SOFIE_core PRIVATE ${sources_headers} ${sources_cxx}) -target_include_directories(SOFIE_core PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/inc) +target_include_directories(SOFIE_core PUBLIC + $ + $ +) target_link_libraries(SOFIE_core PUBLIC utils) if(SOFIE_WITH_ROOT AND ROOT_FOUND) @@ -107,9 +110,12 @@ if(SOFIE_WITH_ROOT AND ROOT_FOUND) endif() install(TARGETS SOFIE_core - LIBRARY DESTINATION lib + EXPORT SOFIETargets + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} +) +install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/inc/" + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} ) -install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/inc/" DESTINATION "include") if(testing) add_subdirectory(test) diff --git a/src/SOFIE_core/README.md b/core/README.md similarity index 98% rename from src/SOFIE_core/README.md rename to core/README.md index 2259d7a..b0a50a1 100644 --- a/src/SOFIE_core/README.md +++ b/core/README.md @@ -12,10 +12,10 @@ This is a new development in TMVA and is currently in early experimental stage. ## Installation -Build ROOT with the cmake option tmva-sofie enabled. +Build ROOT with the cmake option sofie enabled. ```bash -cmake ../root -Dtmva-sofie=ON +cmake ../root -Dsofie=ON make -j8 ``` diff --git a/src/SOFIE_core/inc/LinkDef.h b/core/inc/LinkDef.h similarity index 100% rename from src/SOFIE_core/inc/LinkDef.h rename to core/inc/LinkDef.h diff --git a/src/SOFIE_core/inc/SOFIE/FunctionList.hxx b/core/inc/SOFIE/FunctionList.hxx similarity index 100% rename from src/SOFIE_core/inc/SOFIE/FunctionList.hxx rename to core/inc/SOFIE/FunctionList.hxx diff --git a/src/SOFIE_core/inc/SOFIE/OperatorList.hxx b/core/inc/SOFIE/OperatorList.hxx similarity index 100% rename from src/SOFIE_core/inc/SOFIE/OperatorList.hxx rename to core/inc/SOFIE/OperatorList.hxx diff --git a/src/SOFIE_core/inc/SOFIE/RFunction.hxx b/core/inc/SOFIE/RFunction.hxx similarity index 100% rename from src/SOFIE_core/inc/SOFIE/RFunction.hxx rename to core/inc/SOFIE/RFunction.hxx diff --git a/src/SOFIE_core/inc/SOFIE/RFunction_MLP.hxx b/core/inc/SOFIE/RFunction_MLP.hxx similarity index 100% rename from src/SOFIE_core/inc/SOFIE/RFunction_MLP.hxx rename to core/inc/SOFIE/RFunction_MLP.hxx diff --git a/src/SOFIE_core/inc/SOFIE/RFunction_Mean.hxx b/core/inc/SOFIE/RFunction_Mean.hxx similarity index 100% rename from src/SOFIE_core/inc/SOFIE/RFunction_Mean.hxx rename to core/inc/SOFIE/RFunction_Mean.hxx diff --git a/src/SOFIE_core/inc/SOFIE/RFunction_Sum.hxx b/core/inc/SOFIE/RFunction_Sum.hxx similarity index 100% rename from src/SOFIE_core/inc/SOFIE/RFunction_Sum.hxx rename to core/inc/SOFIE/RFunction_Sum.hxx diff --git a/src/SOFIE_core/inc/SOFIE/RModel.hxx b/core/inc/SOFIE/RModel.hxx similarity index 92% rename from src/SOFIE_core/inc/SOFIE/RModel.hxx rename to core/inc/SOFIE/RModel.hxx index 96b1a8a..103c3c5 100644 --- a/src/SOFIE_core/inc/SOFIE/RModel.hxx +++ b/core/inc/SOFIE/RModel.hxx @@ -13,6 +13,7 @@ class RModel final : public RModel_Base { private: bool fIsInitialized = false; bool fIsSubGraph = false; + bool fUseVDT = false; int fVerbose = 0; int fBatchSize = -1; long fReadPos = 0; // reading file position @@ -28,6 +29,7 @@ private: std::unordered_map fIntermediateTensorInfos; std::unordered_map fDynamicTensorInfos; std::unordered_map, bool>> fShapeTensors; // constant tensors describing a shape + std::unordered_map fAliasTensors; // alias tensors (name -> original tensor name) std::unordered_map fShapeParams; // parameters defining the dynamic shape (e.g. batch size), store also its default value std::vector fDimShapeNames; // parameter names used to define the shapes @@ -85,9 +87,9 @@ public: int Verbose() const { return fVerbose;} - const std::vector &GetTensorShape(const std::string & name) const; + std::vector GetTensorShape(const std::string & name) const; std::vector GetDimTensorShape(const std::string & name) const; - const ETensorType &GetTensorType(const std::string & name) const; + ETensorType GetTensorType(std::string name) const; std::vector GetDynamicTensorShape(const std::string & name) const ; // get the values for the tensor representing a shape @@ -136,6 +138,8 @@ public: } void AddShapeTensor(const std::string & name, const std::vector & shapeValues, bool scalar = false); + void AddAliasTensor(const std::string & name, const std::string & origin); + bool IsAliasTensor(const std::string & tensor_name) const; void AddExtraCodeForDimShapes(const std::string & code) { fExtraCodeForDimShapes += code; } @@ -239,6 +243,9 @@ protected: void GenerateGPU_ALPAKA_Buffers(); void CheckAndFuseOperators(); + bool IsInputTensorShapeParam(std::string const ¶mName) const; + std::vector CollectTensorMemberNames(const std::string &input); + void GenerateRequiredInputTensorInfo(); public: const std::vector &GetInputTensorNames() const { return fInputTensorNames; } @@ -248,8 +255,9 @@ public: void ReadInitializedTensorsFromFile(long); long WriteInitializedTensorsToFile(std::string filename = ""); - void PrintIntermediateTensors(); - void PrintOutputTensors(); + void PrintIntermediateTensors() const; + void PrintOutputTensors() const; + void PrintSummary() const; void OutputGenerated(std::string filename = "", bool append = false); std::vector GetOutputTensorNames() { return fOutputTensorNames; } void SetFilename(std::string filename) { fName = filename; } @@ -260,20 +268,24 @@ public: //a view only T obj; if (fInitializedTensors.find(tensor_name) != fInitializedTensors.end()){ - throw std::runtime_error("TMVA-SOFIE: initialized tensor with name " + tensor_name + " already exists \n"); + throw std::runtime_error("sofie: initialized tensor with name " + tensor_name + " already exists \n"); } InitializedTensor new_tensor_ {GetTemplatedType(obj), new_tensor.GetShape() , static_cast(new_tensor.GetData())}; fInitializedTensors[tensor_name] = new_tensor_; } */ - void PrintRequiredInputTensors(); - void PrintInitializedTensors(); - void PrintDynamicTensors(); + void PrintRequiredInputTensors() const; + void PrintInitializedTensors() const; + void PrintDynamicTensors() const; void HeadInitializedTensors(std::string name, int n_print = 50); bool UseSession() const { return fUseSession; } - + void SetUseVDT(bool on) { + fUseVDT = on; + } + bool UseVDT() const { return fUseVDT;} + #ifdef SOFIE_SUPPORT_ROOT_BINARY // Use the ClassDef macro to allow definition of custom streaming ClassDefNV(RModel, 3); diff --git a/src/SOFIE_core/inc/SOFIE/RModel_Base.hxx b/core/inc/SOFIE/RModel_Base.hxx similarity index 100% rename from src/SOFIE_core/inc/SOFIE/RModel_Base.hxx rename to core/inc/SOFIE/RModel_Base.hxx diff --git a/src/SOFIE_core/inc/SOFIE/RModel_GNN.hxx b/core/inc/SOFIE/RModel_GNN.hxx similarity index 100% rename from src/SOFIE_core/inc/SOFIE/RModel_GNN.hxx rename to core/inc/SOFIE/RModel_GNN.hxx diff --git a/src/SOFIE_core/inc/SOFIE/RModel_GraphIndependent.hxx b/core/inc/SOFIE/RModel_GraphIndependent.hxx similarity index 100% rename from src/SOFIE_core/inc/SOFIE/RModel_GraphIndependent.hxx rename to core/inc/SOFIE/RModel_GraphIndependent.hxx diff --git a/src/SOFIE_core/inc/SOFIE/ROperator.hxx b/core/inc/SOFIE/ROperator.hxx similarity index 98% rename from src/SOFIE_core/inc/SOFIE/ROperator.hxx rename to core/inc/SOFIE/ROperator.hxx index 4b7741c..20cdf6e 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator.hxx +++ b/core/inc/SOFIE/ROperator.hxx @@ -110,6 +110,7 @@ public: } OperatorKind GetKind() const { return fKind; } + bool IsOutputConstant() const { return fIsOutputConstant; } void RegisterOperatorOrder(const size_t ord){ fOpOrder = ord; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx b/core/inc/SOFIE/ROperator_BasicBinary.hxx similarity index 98% rename from src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx rename to core/inc/SOFIE/ROperator_BasicBinary.hxx index 5d15078..a40f6b8 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_BasicBinary.hxx +++ b/core/inc/SOFIE/ROperator_BasicBinary.hxx @@ -14,7 +14,9 @@ enum EBasicBinaryOperator { Sub, Mul, Div, - Pow + Pow, + Mod, + FMod }; template @@ -544,9 +546,7 @@ public: out << "\n//------ "+OpName+"_ALPAKA\n"; out << SP << "auto const elementsPerThread_"<(1));\n"; out << SP << "auto const elementsPerGrid_"< const kernelCfg_" << fNY << " = {elementsPerGrid_" << fNY << ", elementsPerThread_" << fNY << "};\n"; - out << SP << "auto const workDiv_" << fNY << " = alpaka::getValidWorkDiv(kernelCfg_" << fNY << ", devAcc, binary" << OpName << "Kernel, alpaka::getPtrNative(deviceBuf_" << fNA - << "), alpaka::getPtrNative(deviceBuf_" << fNB << "), alpaka::getPtrNative(deviceBuf_" << fNY << "));\n"; + out << SP << "auto const workDiv_" << fNY << " = sofie_workdiv(elementsPerGrid_" << fNY << ");\n"; out << SP << "auto task_" << OpName << " = alpaka::createTaskKernel(workDiv_" << fNY << ", binary" << OpName << "Kernel, alpaka::getPtrNative(deviceBuf_" << fNA << "), alpaka::getPtrNative(deviceBuf_" << fNB << "), alpaka::getPtrNative(deviceBuf_" << fNY << "));\n"; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_BasicNary.hxx b/core/inc/SOFIE/ROperator_BasicNary.hxx similarity index 100% rename from src/SOFIE_core/inc/SOFIE/ROperator_BasicNary.hxx rename to core/inc/SOFIE/ROperator_BasicNary.hxx diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_BasicUnary.hxx b/core/inc/SOFIE/ROperator_BasicUnary.hxx similarity index 94% rename from src/SOFIE_core/inc/SOFIE/ROperator_BasicUnary.hxx rename to core/inc/SOFIE/ROperator_BasicUnary.hxx index eb3150c..05b861a 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_BasicUnary.hxx +++ b/core/inc/SOFIE/ROperator_BasicUnary.hxx @@ -180,9 +180,7 @@ public: out << "\n//------ "+OpName+"_ALPAKA\n"; out << SP << "auto const elementsPerThread_"<(1));\n"; out << SP << "auto const elementsPerGrid_"< const kernelCfg_" << fNY << " = {elementsPerGrid_" << fNY << ", elementsPerThread_" << fNY << "};\n"; - out << SP << "auto const workDiv_" << fNY << " = alpaka::getValidWorkDiv(kernelCfg_" << fNY << ", devAcc, " << UnaryOpTraits::Name() << "Kernel, alpaka::getPtrNative(deviceBuf_" << fNX - << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), " << length << ");\n"; + out << SP << "auto const workDiv_" << fNY << " = sofie_workdiv(elementsPerGrid_" << fNY << ");\n"; out << SP << "auto task_" << OpName << " = alpaka::createTaskKernel(workDiv_" << fNY << ", " << UnaryOpTraits::Name() << "Kernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), " << length << ");\n"; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Basic_Is.hxx b/core/inc/SOFIE/ROperator_Basic_Is.hxx similarity index 90% rename from src/SOFIE_core/inc/SOFIE/ROperator_Basic_Is.hxx rename to core/inc/SOFIE/ROperator_Basic_Is.hxx index 1a224ac..fabe976 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Basic_Is.hxx +++ b/core/inc/SOFIE/ROperator_Basic_Is.hxx @@ -119,14 +119,7 @@ public: out << "\n//------ " << opName << "_ALPAKA\n"; out << SP << "auto const elementsPerThread_" << fNY << " = Vec::all(static_cast(1));\n"; out << SP << "auto const elementsPerGrid_" << fNY << " = Vec::all(Idx{" << length << "});\n"; - out << SP << "alpaka::KernelCfg const kernelCfg_" << fNY - << " = {elementsPerGrid_" << fNY << ", elementsPerThread_" << fNY << "};\n"; - out << SP << "auto const workDiv_" << fNY - << " = alpaka::getValidWorkDiv(kernelCfg_" << fNY << ", devAcc, " - << IsOpTraits::Name() << "Kernel" - << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" - << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" - << ", " << length << ");\n"; + out << SP << "auto const workDiv_" << fNY << " = sofie_workdiv(elementsPerGrid_" << fNY << ");\n"; out << SP << "auto task_" << opName << " = alpaka::createTaskKernel(workDiv_" << fNY << ", " << IsOpTraits::Name() << "Kernel" diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_BatchNormalization.hxx b/core/inc/SOFIE/ROperator_BatchNormalization.hxx similarity index 95% rename from src/SOFIE_core/inc/SOFIE/ROperator_BatchNormalization.hxx rename to core/inc/SOFIE/ROperator_BatchNormalization.hxx index def1870..8bc3b3d 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_BatchNormalization.hxx +++ b/core/inc/SOFIE/ROperator_BatchNormalization.hxx @@ -286,16 +286,7 @@ public: out << "\n//------ BATCHNORM_GPU_ALPAKA\n"; out << SP << "auto const elementsPerThread_" << fNY << " = Vec::all(static_cast(1));\n"; out << SP << "auto const elementsPerGrid_" << fNY << " = Vec::all(Idx{" << totalElements << "});\n"; - out << SP << "alpaka::KernelCfg const kernelCfg_" << fNY - << " = {elementsPerGrid_" << fNY << ", elementsPerThread_" << fNY << "};\n"; - out << SP << "auto const workDiv_" << fNY << " = alpaka::getValidWorkDiv(kernelCfg_" << fNY - << ", devAcc, " << kname - << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" - << ", alpaka::getPtrNative(deviceBuf_" << fNScale << ")" - << ", alpaka::getPtrNative(deviceBuf_" << fNB << ")" - << ", alpaka::getPtrNative(deviceBuf_" << fNMean << ")" - << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" - << ", static_cast(" << totalElements << "));\n"; + out << SP << "auto const workDiv_" << fNY << " = sofie_workdiv(elementsPerGrid_" << fNY << ");\n"; out << SP << "auto task_" << fNY << " = alpaka::createTaskKernel(workDiv_" << fNY << ", " << kname diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx b/core/inc/SOFIE/ROperator_Cast.hxx similarity index 83% rename from src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx rename to core/inc/SOFIE/ROperator_Cast.hxx index 9b44e9f..84d0048 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Cast.hxx +++ b/core/inc/SOFIE/ROperator_Cast.hxx @@ -60,7 +60,20 @@ public: fIsOutputConstant = true; auto inputData = model.GetInitializedTensorData(fNX); if (fType == ETensorType::INT64) { - model.AddConstantTensor(fNY, ConvertShapeToInt(fShape), static_cast(inputData.get())); + size_t length = ConvertShapeToLength(fShape); + std::vector convertedData; + if (inputType == ETensorType::FLOAT) { + convertedData = convertToInt64(static_cast(inputData.get()), length); + } else if (inputType == ETensorType::DOUBLE) { + convertedData = convertToInt64(static_cast(inputData.get()), length); + } else if (inputType == ETensorType::INT32) { + convertedData = convertToInt64(static_cast(inputData.get()), length); + } else { + // Already INT64 — safe direct copy + convertedData.assign(static_cast(inputData.get()), + static_cast(inputData.get()) + length); + } + model.AddConstantTensor(fNY, ConvertShapeToInt(fShape), convertedData.data()); model.SetNotWritableInitializedTensor(fNX); } else @@ -132,13 +145,11 @@ public: } std::stringstream out; - auto length = ConvertShapeToLength(fShape); + auto length = ConvertDimShapeToLength(fShape); out << "\n//------ CAST_GPU_ALPAKA\n"; out << SP << "auto const elementsPerThread_"<(1));\n"; out << SP << "auto const elementsPerGrid_"< const kernelCfg_" << fNY << " = {elementsPerGrid_" << fNY << ", elementsPerThread_" << fNY << "};\n"; - out << SP << "auto const workDiv_" << fNY << " = alpaka::getValidWorkDiv(kernelCfg_" << fNY << ", devAcc, castKernel, alpaka::getPtrNative(deviceBuf_" << fNX - << "), alpaka::getPtrNative(deviceBuf_" << fNY << "));\n"; + out << SP << "auto const workDiv_" << fNY << " = sofie_workdiv(elementsPerGrid_" << fNY << ");\n"; out << SP << "auto task_" << OpName << " = alpaka::createTaskKernel(workDiv_" << fNY << ", castKernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), static_cast(" << length << ")); \n"; out << SP << "alpaka::enqueue(queue, task_" << OpName << ");\n"; return out.str(); @@ -146,7 +157,7 @@ public: bool IsElementwise() const override { return true; } std::string GetElementwiseExpr(const std::string& v) const override { - return "static_cast<" + fAttrType + ">(" + v + ")"; + return "static_cast<" + ConvertTypeToString(fType) + ">(" + v + ")"; } }; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Clip.hxx b/core/inc/SOFIE/ROperator_Clip.hxx similarity index 94% rename from src/SOFIE_core/inc/SOFIE/ROperator_Clip.hxx rename to core/inc/SOFIE/ROperator_Clip.hxx index dce78da..0439b50 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Clip.hxx +++ b/core/inc/SOFIE/ROperator_Clip.hxx @@ -235,18 +235,11 @@ public: std::string castMin = "static_cast<" + TensorType::Name() + ">(" + minExpr + ")"; std::string castMax = "static_cast<" + TensorType::Name() + ">(" + maxExpr + ")"; - out << SP << "auto const elementsPerThread_" << fNX << " = Vec::all(static_cast(1));\n"; - out << SP << "auto const elementsPerGrid_" << fNX << " = Vec::all(Idx{" << length << "});\n"; - out << SP << "alpaka::KernelCfg const kernelCfg_" << fNX - << " = {elementsPerGrid_" << fNX << ", elementsPerThread_" << fNX << "};\n"; - out << SP << "auto const workDiv_" << fNX - << " = alpaka::getValidWorkDiv(kernelCfg_" << fNX << ", devAcc, clipKernel" - << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" - << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" - << ", static_cast(" << length << ")" - << ", " << castMin << ", " << castMax << ");\n"; + out << SP << "auto const elementsPerThread_" << fNY << " = Vec::all(static_cast(1));\n"; + out << SP << "auto const elementsPerGrid_" << fNY << " = Vec::all(Idx{" << length << "});\n"; + out << SP << "auto const workDiv_" << fNY << " = sofie_workdiv(elementsPerGrid_" << fNY << ");\n"; out << SP << "auto task_" << OpName - << " = alpaka::createTaskKernel(workDiv_" << fNX << ", clipKernel" + << " = alpaka::createTaskKernel(workDiv_" << fNY << ", clipKernel" << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" << ", static_cast(" << length << ")" diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Comparision.hxx b/core/inc/SOFIE/ROperator_Comparision.hxx similarity index 96% rename from src/SOFIE_core/inc/SOFIE/ROperator_Comparision.hxx rename to core/inc/SOFIE/ROperator_Comparision.hxx index e158499..db7b9e6 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Comparision.hxx +++ b/core/inc/SOFIE/ROperator_Comparision.hxx @@ -293,14 +293,7 @@ public: out << "\n//------ " << ComparisionTrait::Name() << "_GPU_ALPAKA\n"; out << SP << "auto const elementsPerThread_" << opName << " = Vec::all(static_cast(1));\n"; out << SP << "auto const elementsPerGrid_" << opName << " = Vec::all(Idx{" << totalElements << "});\n"; - out << SP << "alpaka::KernelCfg const kernelCfg_" << opName - << " = {elementsPerGrid_" << opName << ", elementsPerThread_" << opName << "};\n"; - out << SP << "auto const workDiv_" << opName << " = alpaka::getValidWorkDiv(kernelCfg_" << opName - << ", devAcc, " << kname - << ", alpaka::getPtrNative(deviceBuf_" << fNX1 << ")" - << ", alpaka::getPtrNative(deviceBuf_" << fNX2 << ")" - << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" - << ", static_cast(" << totalElements << "));\n"; + out << SP << "auto const workDiv_" << opName << " = sofie_workdiv(elementsPerGrid_" << opName << ");\n"; out << SP << "auto task_" << opName << " = alpaka::createTaskKernel(workDiv_" << opName << ", " << kname << ", alpaka::getPtrNative(deviceBuf_" << fNX1 << ")" diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx b/core/inc/SOFIE/ROperator_Concat.hxx similarity index 97% rename from src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx rename to core/inc/SOFIE/ROperator_Concat.hxx index eeefe80..f396554 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Concat.hxx +++ b/core/inc/SOFIE/ROperator_Concat.hxx @@ -24,6 +24,7 @@ std::vectorfOutputShape; std::vector fOutputShapeData; // in case output is a shape tensor we store here the output shape value data (can be parametric) std::vector> fInputShapes; + ETensorType fInputType = ETensorType::UNDEFINED; public: @@ -288,7 +289,8 @@ fIsOutputParamShape = true; } if (!fIsOutputConstant && !fIsOutputParamShape) { - model.AddIntermediateTensor(fOutput, model.GetTensorType(fInputs[0]), fOutputShape); + fInputType = model.GetTensorType(fInputs[0]); + model.AddIntermediateTensor(fOutput, fInputType, fOutputShape); if (model.Verbose()) { std::cout << "Concat ---> " << fOutput << " " << ConvertDimShapeToString(fOutputShape) << std::endl; } @@ -323,7 +325,7 @@ std::string offset; for(size_t i=0; i 0) out << offset; offset += " + " + length; @@ -487,8 +489,7 @@ out << SP << "auto const elementsPerThread_"<(1));\n"; out << SP << "auto const elementsPerGrid_"< const kernelCfg_" << OpName << " = {elementsPerGrid_" << OpName << ", elementsPerThread_" << OpName << "};\n"; - out << SP << "auto const workDiv_" << OpName << " = alpaka::getValidWorkDiv(kernelCfg_" << OpName << ", devAcc, concatKernel_" << OpName << ", input_ptrs_" << OpName << ", alpaka::getPtrNative(deviceBuf_" << fOutput << "), static_cast(" << length << "));\n"; + out << SP << "auto const workDiv_" << OpName << " = sofie_workdiv(elementsPerGrid_" << OpName << ");\n"; out << SP << "auto task_" << OpName << " = alpaka::createTaskKernel(workDiv_" << OpName << ", concatKernel_" << OpName << ", input_ptrs_" << OpName << ", alpaka::getPtrNative(deviceBuf_" << fOutput << "), static_cast(" << length << "));\n"; out << SP << "alpaka::enqueue(queue, task_" << OpName << ");\n"; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Constant.hxx b/core/inc/SOFIE/ROperator_Constant.hxx similarity index 100% rename from src/SOFIE_core/inc/SOFIE/ROperator_Constant.hxx rename to core/inc/SOFIE/ROperator_Constant.hxx diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Conv.hxx b/core/inc/SOFIE/ROperator_Conv.hxx similarity index 70% rename from src/SOFIE_core/inc/SOFIE/ROperator_Conv.hxx rename to core/inc/SOFIE/ROperator_Conv.hxx index c87ecd9..835a0ff 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Conv.hxx +++ b/core/inc/SOFIE/ROperator_Conv.hxx @@ -67,6 +67,7 @@ public: } fInputTensorNames = { fNX, fNB }; fOutputTensorNames = { fNY }; + fKind = OperatorKind::CONV; } ROperator_Conv(std::string autopad, std::vector dilations, @@ -85,6 +86,7 @@ public: } fInputTensorNames = { fNX }; fOutputTensorNames = { fNY }; + fKind= OperatorKind::CONV; } std::vector TypeInference(std::vector input) override { @@ -348,7 +350,7 @@ public: out << SP << "if (" << length << " > " << ConvertShapeToLength(shape) << ") {\n"; else out << SP << "{\n"; - out << SP << SP << "float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast(tensor_" + out << SP << SP << "float * data = SOFIE::UTILITY::UnidirectionalBroadcast(tensor_" << fNB << ", " << ConvertShapeToString(shape) << ", " << ConvertDimShapeToString(fShapeY) << ");\n"; out << SP << SP << "fTensor_" << fNB << ".resize(" << length << ");\n"; out << SP << SP << "std::copy(data, data + " << length << ", fTensor_" << fNB << ".begin());\n"; @@ -484,7 +486,7 @@ public: // when using im2col - resulting matrix is transposed, the dimension is (input_c * filter_h * filter_y, output_h * // output_w) if (fDim < 3) { - out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col(tensor_" << fNX + out << SP << SP << "SOFIE::UTILITY::Im2col(tensor_" << fNX << " + x_offset," // channels, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, // dilation_w, @@ -500,7 +502,7 @@ public: out << "," << "tensor_" <(tensor_" << fNX + out << SP << SP << "SOFIE::UTILITY::Im2col_3d(tensor_" << fNX << " + x_offset," // channels, d, h, w, k_d, k_h, k_w, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w, // dilation_d, dilation_h, dilation_w, @@ -513,7 +515,7 @@ public: << "tensor_" << fNX << "_xcol);\n\n "; } // BLAS - out << SP << "TMVA::Experimental::SOFIE::Gemm_Call(" + out << SP << "SOFIE::Gemm_Call(" << "tensor_" << fNY << " + out_offset, false, false, " << OpName << "_m, " << OpName << "_n, " << OpName << "_k, " << OpName << "_alpha, " << "tensor_" << fNX << "_xcol, tensor_" << fNX << "_f, " @@ -542,7 +544,7 @@ public: out << SP << SP << "size_t out_offset = n * " << outputBatchStride << " + g_offset;\n"; if (fDim < 3) { - out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col(tensor_" << fNX + out << SP << SP << "SOFIE::UTILITY::Im2col(tensor_" << fNX << " + x_offset," // channels, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, // dilation_w, @@ -558,7 +560,7 @@ public: out << ", tensor_" << fNX << "_xcol);\n\n "; } else { // 3d im2col - out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col_3d(tensor_" << fNX + out << SP << SP << "SOFIE::UTILITY::Im2col_3d(tensor_" << fNX << " + x_offset," // channels, d, h, w, k_d, k_h, k_w, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w, // dilation_d, dilation_h, dilation_w, @@ -578,7 +580,7 @@ public: << fShapeW[0] * fShapeW[1] * fAttrKernelShape[0] * fAttrKernelShape[1] * fAttrKernelShape[2] / fAttrGroup << ";\n"; - out << SP << "TMVA::Experimental::SOFIE::Gemm_Call(" + out << SP << "SOFIE::Gemm_Call(" << "tensor_" << fNY << " + out_offset, false, false, " << OpName << "_m, " << OpName << "_n, " << OpName << "_k, " << OpName << "_alpha, " << "tensor_" << fNX << "_xcol, tensor_" << fNX << "_f + offset_f, " @@ -595,12 +597,223 @@ public: return out.str(); } + std::string Generate_GPU_Kernel_ALPAKA(std::string opName) override { + opName = "op_" + opName; + if (fShapeX.empty() || fShapeW.empty() || fShapeY.empty()) + throw std::runtime_error("TMVA SOFIE Conv Op called to Generate without being initialized first"); + + size_t oDepth = (fDim > 2) ? fShapeY[2].dim : 1; + size_t oHeight = (fDim > 1) ? fShapeY[fDim].dim : 1; + size_t oWidth = fShapeY[fDim + 1].dim; + size_t iDepth = (fDim > 2) ? fShapeX[2].dim : 1; + size_t iHeight = (fDim > 1) ? fShapeX[fDim].dim : 1; + size_t iWidth = fShapeX[fDim + 1].dim; + size_t kHeight = (fDim > 1) ? fShapeW[fDim] : 1; + size_t kWidth = fShapeW[fDim + 1]; + size_t kDepth = (fDim > 2) ? fShapeW[2] : 1; + + size_t kernelSize = fAttrKernelShape[0] * fAttrKernelShape[1] * fAttrKernelShape[2]; + size_t colRows = fShapeW[1] * kernelSize; + size_t colCols = oDepth * oHeight * oWidth; + size_t colElements = colRows * colCols; + size_t outChannels = fShapeW[0]; + size_t spatialSize = oDepth * oHeight * oWidth; + + // Strides for weight vectorisation + size_t id = (fDim > 2) ? fDim - 3 : 2; + size_t ih = (fDim > 1) ? fDim - 2 : 1; + size_t iw = fDim - 1; + size_t wstrideDil = fAttrDilations[iw]; + size_t hstrideDil = fAttrDilations[ih] * fAttrKernelShape[iw]; + size_t dstrideDil = fAttrDilations[id] * fAttrKernelShape[ih] * fAttrKernelShape[iw]; + size_t icstrideDil = fAttrKernelShape[id] * fAttrKernelShape[ih] * fAttrKernelShape[iw]; + size_t ocstrideDil = fShapeW[1] * icstrideDil; + size_t hstride = kWidth; + size_t dstride = kHeight * kWidth; + size_t icstride = kHeight * kWidth * kDepth; + size_t ocstride = fShapeW[1] * icstride; + size_t wTotalElements = ConvertShapeToLength(fShapeW); + + std::string op; + + // Kernel 1: Weight vectorisation — reorder W into _f with dilation layout + // Each thread handles one output element of _f + std::string wKname = "WeightVecKernel_" + opName; + op = "\n//------ WEIGHT_VEC_KERNEL_ALPAKA (Conv " + opName + ")\n"; + op += SP + "struct " + wKname + " {\n"; + op += SP + SP + "template\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(\n"; + op += SP + SP + SP + "TAcc const& acc,\n"; + op += SP + SP + SP + "T const* __restrict__ W,\n"; + op += SP + SP + SP + "T* __restrict__ f,\n"; + op += SP + SP + SP + "std::size_t const totalElements) const {\n\n"; + + op += SP + SP + SP + "auto const global_thread_idx = alpaka::getIdx(acc)[0];\n"; + op += SP + SP + SP + "if (global_thread_idx >= totalElements) return;\n"; + op += SP + SP + SP + "auto const grid_thread_extent = alpaka::getWorkDiv(acc)[0];\n\n"; + + op += SP + SP + SP + "for (std::size_t elem_idx = global_thread_idx; elem_idx < totalElements; elem_idx += grid_thread_extent) {\n\n"; + + // Decompose elem_idx into (oc, ic, kd, kh, kw) using compile-time strides + op += SP + SP + SP + SP + "std::size_t const oc = elem_idx / " + std::to_string(ocstride) + "u;\n"; + op += SP + SP + SP + SP + "std::size_t const oc_rem = elem_idx % " + std::to_string(ocstride) + "u;\n"; + op += SP + SP + SP + SP + "std::size_t const ic = oc_rem / " + std::to_string(icstride) + "u;\n"; + op += SP + SP + SP + SP + "std::size_t const ic_rem = oc_rem % " + std::to_string(icstride) + "u;\n"; + if (fDim > 2) { + op += SP + SP + SP + SP + "std::size_t const kd = ic_rem / " + std::to_string(kHeight * kWidth) + "u;\n"; + op += SP + SP + SP + SP + "std::size_t const kh = (ic_rem / " + std::to_string(kWidth) + "u) % " + std::to_string(kHeight) + "u;\n"; + op += SP + SP + SP + SP + "std::size_t const kw = ic_rem % " + std::to_string(kWidth) + "u;\n\n"; + } else if (fDim > 1) { + op += SP + SP + SP + SP + "std::size_t const kd = 0u;\n"; + op += SP + SP + SP + SP + "std::size_t const kh = ic_rem / " + std::to_string(kWidth) + "u;\n"; + op += SP + SP + SP + SP + "std::size_t const kw = ic_rem % " + std::to_string(kWidth) + "u;\n\n"; + } else { + op += SP + SP + SP + SP + "std::size_t const kd = 0u;\n"; + op += SP + SP + SP + SP + "std::size_t const kh = 0u;\n"; + op += SP + SP + SP + SP + "std::size_t const kw = ic_rem;\n\n"; + } + + // Compute destination index in _f (dilated layout) + op += SP + SP + SP + SP + "std::size_t const f_idx =\n"; + op += SP + SP + SP + SP + SP + "oc * " + std::to_string(ocstrideDil) + "u +\n"; + op += SP + SP + SP + SP + SP + "ic * " + std::to_string(icstrideDil) + "u"; + if (fDim > 2) op += " +\n" + SP + SP + SP + SP + SP + "kd * " + std::to_string(dstrideDil) + "u"; + if (fDim > 1) op += " +\n" + SP + SP + SP + SP + SP + "kh * " + std::to_string(hstrideDil) + "u"; + op += " +\n" + SP + SP + SP + SP + SP + "kw * " + std::to_string(wstrideDil) + "u;\n\n"; + + op += SP + SP + SP + SP + "f[f_idx] = W[elem_idx];\n"; + op += SP + SP + SP + "}\n"; + op += SP + SP + "}\n"; + op += SP + "};\n\n"; + + // Kernel 2: Im2Col + std::string im2colKname = "Im2ColKernel_" + opName; + op += SP + "//------ IM2COL_KERNEL_ALPAKA (Conv " + opName + ")\n"; + op += SP + "struct " + im2colKname + " {\n"; + op += SP + SP + "template\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(\n"; + op += SP + SP + SP + "TAcc const& acc,\n"; + op += SP + SP + SP + "T const* __restrict__ input,\n"; + op += SP + SP + SP + "T* __restrict__ col,\n"; + op += SP + SP + SP + "std::size_t const totalElements) const {\n\n"; + + op += SP + SP + SP + "auto const global_thread_idx = alpaka::getIdx(acc)[0];\n"; + op += SP + SP + SP + "if (global_thread_idx >= totalElements) return;\n"; + op += SP + SP + SP + "auto const grid_thread_extent = alpaka::getWorkDiv(acc)[0];\n\n"; + + op += SP + SP + SP + "for (std::size_t elem_idx = global_thread_idx; elem_idx < totalElements; elem_idx += grid_thread_extent) {\n\n"; + + op += SP + SP + SP + SP + "std::size_t const col_row = elem_idx / " + std::to_string(colCols) + "u;\n"; + op += SP + SP + SP + SP + "std::size_t const col_col = elem_idx % " + std::to_string(colCols) + "u;\n\n"; + + op += SP + SP + SP + SP + "std::size_t const ic = col_row / " + std::to_string(kernelSize) + "u;\n"; + op += SP + SP + SP + SP + "std::size_t const k_rem = col_row % " + std::to_string(kernelSize) + "u;\n"; + if (fDim > 2) { + op += SP + SP + SP + SP + "std::size_t const kd = k_rem / " + std::to_string(kHeight * kWidth) + "u;\n"; + op += SP + SP + SP + SP + "std::size_t const kh = (k_rem / " + std::to_string(kWidth) + "u) % " + std::to_string(kHeight) + "u;\n"; + op += SP + SP + SP + SP + "std::size_t const kw = k_rem % " + std::to_string(kWidth) + "u;\n\n"; + } else if (fDim > 1) { + op += SP + SP + SP + SP + "std::size_t const kd = 0u;\n"; + op += SP + SP + SP + SP + "std::size_t const kh = k_rem / " + std::to_string(kWidth) + "u;\n"; + op += SP + SP + SP + SP + "std::size_t const kw = k_rem % " + std::to_string(kWidth) + "u;\n\n"; + } else { + op += SP + SP + SP + SP + "std::size_t const kd = 0u;\n"; + op += SP + SP + SP + SP + "std::size_t const kh = 0u;\n"; + op += SP + SP + SP + SP + "std::size_t const kw = k_rem;\n\n"; + } + + if (fDim > 2) { + op += SP + SP + SP + SP + "std::size_t const od = col_col / " + std::to_string(oHeight * oWidth) + "u;\n"; + op += SP + SP + SP + SP + "std::size_t const oh = (col_col / " + std::to_string(oWidth) + "u) % " + std::to_string(oHeight) + "u;\n"; + op += SP + SP + SP + SP + "std::size_t const ow = col_col % " + std::to_string(oWidth) + "u;\n\n"; + } else if (fDim > 1) { + op += SP + SP + SP + SP + "std::size_t const od = 0u;\n"; + op += SP + SP + SP + SP + "std::size_t const oh = col_col / " + std::to_string(oWidth) + "u;\n"; + op += SP + SP + SP + SP + "std::size_t const ow = col_col % " + std::to_string(oWidth) + "u;\n\n"; + } else { + op += SP + SP + SP + SP + "std::size_t const od = 0u;\n"; + op += SP + SP + SP + SP + "std::size_t const oh = 0u;\n"; + op += SP + SP + SP + SP + "std::size_t const ow = col_col;\n\n"; + } + + // Depth: trivially 0 for fDim < 3 (od=kd=0 always); pads[0] is height-begin for 2D, so + // applying it here would make id_in negative and zero the whole output. + if (fDim >= 3) { + op += SP + SP + SP + SP + "int64_t const id_in = static_cast(od * " + std::to_string(fAttrStrides[0]) + + "u + kd * " + std::to_string(fAttrDilations[0]) + "u) - " + std::to_string(fAttrPads[0]) + ";\n"; + } else { + op += SP + SP + SP + SP + "int64_t const id_in = 0;\n"; + } + // Height: for fDim==3 the height dim is at strides/pads index 1; for fDim==2 it is at index 0. + // For fDim==1 oh=kh=0 so ih_in=0. + { + size_t const hIdx = (fDim > 2) ? 1 : 0; + if (fDim >= 2) { + op += SP + SP + SP + SP + "int64_t const ih_in = static_cast(oh * " + std::to_string(fAttrStrides[hIdx]) + + "u + kh * " + std::to_string(fAttrDilations[hIdx]) + "u) - " + std::to_string(fAttrPads[hIdx]) + ";\n"; + } else { + op += SP + SP + SP + SP + "int64_t const ih_in = 0;\n"; + } + } + // Width: fAttrStrides/Dilations/Pads are ordered [d,h,w] so width is at index fDim-1. + { + size_t const wIdx = fDim - 1; + op += SP + SP + SP + SP + "int64_t const iw_in = static_cast(ow * " + std::to_string(fAttrStrides[wIdx]) + + "u + kw * " + std::to_string(fAttrDilations[wIdx]) + "u) - " + std::to_string(fAttrPads[wIdx]) + ";\n\n"; + } + + op += SP + SP + SP + SP + "bool const in_bounds =\n"; + op += SP + SP + SP + SP + SP + "id_in >= 0 && id_in < " + std::to_string(iDepth) + " &&\n"; + op += SP + SP + SP + SP + SP + "ih_in >= 0 && ih_in < " + std::to_string(iHeight) + " &&\n"; + op += SP + SP + SP + SP + SP + "iw_in >= 0 && iw_in < " + std::to_string(iWidth) + ";\n\n"; + + op += SP + SP + SP + SP + "if (in_bounds) {\n"; + op += SP + SP + SP + SP + SP + "std::size_t const in_idx =\n"; + op += SP + SP + SP + SP + SP + SP + "ic * " + std::to_string(iDepth * iHeight * iWidth) + "u +\n"; + op += SP + SP + SP + SP + SP + SP + "static_cast(id_in) * " + std::to_string(iHeight * iWidth) + "u +\n"; + op += SP + SP + SP + SP + SP + SP + "static_cast(ih_in) * " + std::to_string(iWidth) + "u +\n"; + op += SP + SP + SP + SP + SP + SP + "static_cast(iw_in);\n"; + op += SP + SP + SP + SP + SP + "col[elem_idx] = input[in_idx];\n"; + op += SP + SP + SP + SP + "} else {\n"; + op += SP + SP + SP + SP + SP + "col[elem_idx] = static_cast(0);\n"; + op += SP + SP + SP + SP + "}\n"; + op += SP + SP + SP + "}\n"; + op += SP + SP + "}\n"; + op += SP + "};\n\n"; + + // Kernel 3: Bias broadcast (only if bias present) + if (!fNB.empty()) { + std::string biasKname = "BiasBroadcastKernel_" + opName; + op += SP + "//------ BIAS_BROADCAST_KERNEL_ALPAKA (Conv " + opName + ")\n"; + op += SP + "struct " + biasKname + " {\n"; + op += SP + SP + "template\n"; + op += SP + SP + "ALPAKA_FN_ACC void operator()(\n"; + op += SP + SP + SP + "TAcc const& acc,\n"; + op += SP + SP + SP + "T const* __restrict__ bias,\n"; + op += SP + SP + SP + "T* __restrict__ output,\n"; + op += SP + SP + SP + "std::size_t const totalElements) const {\n\n"; + + op += SP + SP + SP + "auto const global_thread_idx = alpaka::getIdx(acc)[0];\n"; + op += SP + SP + SP + "if (global_thread_idx >= totalElements) return;\n"; + op += SP + SP + SP + "auto const grid_thread_extent = alpaka::getWorkDiv(acc)[0];\n\n"; + + op += SP + SP + SP + "for (std::size_t elem_idx = global_thread_idx; elem_idx < totalElements; elem_idx += grid_thread_extent) {\n"; + op += SP + SP + SP + SP + "std::size_t const channel = elem_idx / " + std::to_string(spatialSize) + "u;\n"; + op += SP + SP + SP + SP + "output[elem_idx] = bias[channel];\n"; + op += SP + SP + SP + "}\n"; + op += SP + SP + "}\n"; + op += SP + "};\n\n"; + } + + return op; + } + std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string opName) override { opName = "op_" + opName; std::string op; op = SP + "WeightVecKernel_" + opName + " weightVecKernel_" + opName + ";\n"; op += SP + "Im2ColKernel_" + opName + " im2colKernel_" + opName + ";\n"; - if (!fNB2.empty()) + if (!fNB.empty()) op += SP + "BiasBroadcastKernel_" + opName + " biasBroadcastKernel_" + opName + ";\n"; return op; } @@ -610,15 +823,19 @@ public: if (fShapeX.empty() || fShapeW.empty() || fShapeY.empty()) throw std::runtime_error("SOFIE Conv Op called to Generate without being initialized first"); - size_t bsize = fShapeX[0]; - size_t oDepth = (fDim > 2) ? fShapeY[2] : 1; - size_t oHeight = (fDim > 1) ? fShapeY[fDim] : 1; - size_t oWidth = fShapeY[fDim + 1]; - size_t iDepth = (fDim > 2) ? fShapeX[2] : 1; - size_t iHeight = (fDim > 1) ? fShapeX[fDim] : 1; - size_t iWidth = fShapeX[fDim + 1]; + size_t bsize = fShapeX[0].dim; + size_t oDepth = (fDim > 2) ? fShapeY[2].dim : 1; + size_t oHeight = (fDim > 1) ? fShapeY[fDim].dim : 1; + size_t oWidth = fShapeY[fDim + 1].dim; + size_t iDepth = (fDim > 2) ? fShapeX[2].dim : 1; + size_t iHeight = (fDim > 1) ? fShapeX[fDim].dim : 1; + size_t iWidth = fShapeX[fDim + 1].dim; size_t outChannels = fShapeW[0]; size_t kernelSize = fAttrKernelShape[0] * fAttrKernelShape[1] * fAttrKernelShape[2]; + // gemm dimensions computed from shape members + size_t gemm_n = outChannels; // output channels + size_t gemm_k = fShapeW[1] * kernelSize; // input channels/group * kernel volume + size_t gemm_m = oDepth * oHeight * oWidth; // output spatial size per channel size_t colElements = gemm_k * gemm_m; // colRows * colCols size_t wTotal = ConvertShapeToLength(fShapeW); @@ -636,11 +853,7 @@ public: out << SP << "{\n"; out << SP << SP << "auto const elementsPerThread_wv = Vec::all(static_cast(1));\n"; out << SP << SP << "auto const elementsPerGrid_wv = Vec::all(Idx{" << wTotal << "});\n"; - out << SP << SP << "alpaka::KernelCfg const cfg_wv = {elementsPerGrid_wv, elementsPerThread_wv};\n"; - out << SP << SP << "auto const workDiv_wv = alpaka::getValidWorkDiv(cfg_wv, devAcc, weightVecKernel_" << opName - << ", alpaka::getPtrNative(deviceBuf_" << fNW << ")" - << ", alpaka::getPtrNative(deviceBuf_" << convK << ")" - << ", static_cast(" << wTotal << "));\n"; + out << SP << SP << "auto const workDiv_wv = sofie_workdiv(elementsPerGrid_wv);\n"; out << SP << SP << "alpaka::exec(queue, workDiv_wv, weightVecKernel_" << opName << ", alpaka::getPtrNative(deviceBuf_" << fNW << ")" << ", alpaka::getPtrNative(deviceBuf_" << convK << ")" @@ -653,9 +866,9 @@ public: // ----------------------------------------------------------------------- out << SP << "for (std::size_t n = 0; n < " << bsize << "; n++) {\n\n"; out << SP << SP << "std::size_t const x_offset = n * " - << fShapeX[1] * iDepth * iHeight * iWidth << "u;\n"; + << fShapeX[1].dim * iDepth * iHeight * iWidth << "u;\n"; out << SP << SP << "std::size_t const out_offset = n * " - << fShapeY[1] * gemm_m << "u;\n\n"; + << fShapeY[1].dim * gemm_m << "u;\n\n"; // ----------------------------------------------------------------------- // Step 3 + 4: Im2Col then GEMM — structure differs for grouped vs non-grouped @@ -666,11 +879,7 @@ public: out << SP << SP << "{\n"; out << SP << SP << SP << "auto const elementsPerThread_im2col = Vec::all(static_cast(1));\n"; out << SP << SP << SP << "auto const elementsPerGrid_im2col = Vec::all(Idx{" << colElements << "});\n"; - out << SP << SP << SP << "alpaka::KernelCfg const cfg_im2col = {elementsPerGrid_im2col, elementsPerThread_im2col};\n"; - out << SP << SP << SP << "auto const workDiv_im2col = alpaka::getValidWorkDiv(cfg_im2col, devAcc, im2colKernel_" << opName - << ", alpaka::getPtrNative(deviceBuf_" << fNX << ") + x_offset" - << ", alpaka::getPtrNative(deviceBuf_" << imcol << ")" - << ", static_cast(" << colElements << "));\n"; + out << SP << SP << SP << "auto const workDiv_im2col = sofie_workdiv(elementsPerGrid_im2col);\n"; out << SP << SP << SP << "alpaka::exec(queue, workDiv_im2col, im2colKernel_" << opName << ", alpaka::getPtrNative(deviceBuf_" << fNX << ") + x_offset" << ", alpaka::getPtrNative(deviceBuf_" << imcol << ")" @@ -678,19 +887,15 @@ public: out << SP << SP << SP << "alpaka::wait(queue);\n"; out << SP << SP << "}\n\n"; - if (!fNB2.empty()) { + if (!fNB.empty()) { size_t biasElements = gemm_n * gemm_m; out << SP << SP << "// Step 4a: broadcast bias into output slice\n"; out << SP << SP << "{\n"; out << SP << SP << SP << "auto const elementsPerThread_bias = Vec::all(static_cast(1));\n"; out << SP << SP << SP << "auto const elementsPerGrid_bias = Vec::all(Idx{" << biasElements << "});\n"; - out << SP << SP << SP << "alpaka::KernelCfg const cfg_bias = {elementsPerGrid_bias, elementsPerThread_bias};\n"; - out << SP << SP << SP << "auto const workDiv_bias = alpaka::getValidWorkDiv(cfg_bias, devAcc, biasBroadcastKernel_" << opName - << ", alpaka::getPtrNative(deviceBuf_" << fNB2 << ")" - << ", alpaka::getPtrNative(deviceBuf_" << fNY << ") + out_offset" - << ", static_cast(" << biasElements << "));\n"; + out << SP << SP << SP << "auto const workDiv_bias = sofie_workdiv(elementsPerGrid_bias);\n"; out << SP << SP << SP << "alpaka::exec(queue, workDiv_bias, biasBroadcastKernel_" << opName - << ", alpaka::getPtrNative(deviceBuf_" << fNB2 << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNB << ")" << ", alpaka::getPtrNative(deviceBuf_" << fNY << ") + out_offset" << ", static_cast(" << biasElements << "));\n"; out << SP << SP << SP << "alpaka::wait(queue);\n"; @@ -726,11 +931,7 @@ public: out << SP << SP << SP << "{\n"; out << SP << SP << SP << SP << "auto const elementsPerThread_im2col = Vec::all(static_cast(1));\n"; out << SP << SP << SP << SP << "auto const elementsPerGrid_im2col = Vec::all(Idx{" << colElements << "});\n"; - out << SP << SP << SP << SP << "alpaka::KernelCfg const cfg_im2col = {elementsPerGrid_im2col, elementsPerThread_im2col};\n"; - out << SP << SP << SP << SP << "auto const workDiv_im2col = alpaka::getValidWorkDiv(cfg_im2col, devAcc, im2colKernel_" << opName - << ", alpaka::getPtrNative(deviceBuf_" << fNX << ") + g_in_offset" - << ", alpaka::getPtrNative(deviceBuf_" << imcol << ")" - << ", static_cast(" << colElements << "));\n"; + out << SP << SP << SP << SP << "auto const workDiv_im2col = sofie_workdiv(elementsPerGrid_im2col);\n"; out << SP << SP << SP << SP << "alpaka::exec(queue, workDiv_im2col, im2colKernel_" << opName << ", alpaka::getPtrNative(deviceBuf_" << fNX << ") + g_in_offset" << ", alpaka::getPtrNative(deviceBuf_" << imcol << ")" @@ -738,19 +939,15 @@ public: out << SP << SP << SP << SP << "alpaka::wait(queue);\n"; out << SP << SP << SP << "}\n\n"; - if (!fNB2.empty()) { + if (!fNB.empty()) { size_t groupBiasElements = gemm_n * gemm_m; out << SP << SP << SP << "// Broadcast group bias\n"; out << SP << SP << SP << "{\n"; out << SP << SP << SP << SP << "auto const elementsPerThread_bias = Vec::all(static_cast(1));\n"; out << SP << SP << SP << SP << "auto const elementsPerGrid_bias = Vec::all(Idx{" << groupBiasElements << "});\n"; - out << SP << SP << SP << SP << "alpaka::KernelCfg const cfg_bias = {elementsPerGrid_bias, elementsPerThread_bias};\n"; - out << SP << SP << SP << SP << "auto const workDiv_bias = alpaka::getValidWorkDiv(cfg_bias, devAcc, biasBroadcastKernel_" << opName - << ", alpaka::getPtrNative(deviceBuf_" << fNB2 << ") + g * " << gemm_n - << ", alpaka::getPtrNative(deviceBuf_" << fNY << ") + g_out_offset" - << ", static_cast(" << groupBiasElements << "));\n"; + out << SP << SP << SP << SP << "auto const workDiv_bias = sofie_workdiv(elementsPerGrid_bias);\n"; out << SP << SP << SP << SP << "alpaka::exec(queue, workDiv_bias, biasBroadcastKernel_" << opName - << ", alpaka::getPtrNative(deviceBuf_" << fNB2 << ") + g * " << gemm_n + << ", alpaka::getPtrNative(deviceBuf_" << fNB << ") + g * " << gemm_n << ", alpaka::getPtrNative(deviceBuf_" << fNY << ") + g_out_offset" << ", static_cast(" << groupBiasElements << "));\n"; out << SP << SP << SP << SP << "alpaka::wait(queue);\n"; @@ -782,10 +979,17 @@ public: std::string GetBlasConfig(){ - auto lda = std::to_string(gemm_m); // ld for xcol^T (gemm_m×gemm_k col-major) - auto ldb = std::to_string(gemm_k); // ld for xf^T (gemm_k×gemm_n col-major) - auto ldc = std::to_string(gemm_m); // ld for y^T (gemm_m×gemm_n col-major) - return std::to_string(gemm_m) + ", " + std::to_string(gemm_n) + ", " + std::to_string(gemm_k) + ", " + lda + ", " + ldb + ", " + ldc + ", 'n', 'n'"; + size_t oDepth_ = (fDim > 2) ? fShapeY[2].dim : 1; + size_t oHeight_ = (fDim > 1) ? fShapeY[fDim].dim : 1; + size_t oWidth_ = fShapeY[fDim + 1].dim; + size_t kSize_ = fAttrKernelShape[0] * fAttrKernelShape[1] * fAttrKernelShape[2]; + size_t gemm_n_ = fShapeW[0]; + size_t gemm_k_ = fShapeW[1] * kSize_; + size_t gemm_m_ = oDepth_ * oHeight_ * oWidth_; + auto lda = std::to_string(gemm_m_); // ld for xcol^T (gemm_m×gemm_k col-major) + auto ldb = std::to_string(gemm_k_); // ld for xf^T (gemm_k×gemm_n col-major) + auto ldc = std::to_string(gemm_m_); // ld for y^T (gemm_m×gemm_n col-major) + return std::to_string(gemm_m_) + ", " + std::to_string(gemm_n_) + ", " + std::to_string(gemm_k_) + ", " + lda + ", " + ldb + ", " + ldc + ", 'n', 'n'"; } }; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_ConvTranspose.hxx b/core/inc/SOFIE/ROperator_ConvTranspose.hxx similarity index 100% rename from src/SOFIE_core/inc/SOFIE/ROperator_ConvTranspose.hxx rename to core/inc/SOFIE/ROperator_ConvTranspose.hxx diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_ConvTranspose.icc b/core/inc/SOFIE/ROperator_ConvTranspose.icc similarity index 100% rename from src/SOFIE_core/inc/SOFIE/ROperator_ConvTranspose.icc rename to core/inc/SOFIE/ROperator_ConvTranspose.icc diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Custom.hxx b/core/inc/SOFIE/ROperator_Custom.hxx similarity index 100% rename from src/SOFIE_core/inc/SOFIE/ROperator_Custom.hxx rename to core/inc/SOFIE/ROperator_Custom.hxx diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Einsum.hxx b/core/inc/SOFIE/ROperator_Einsum.hxx similarity index 100% rename from src/SOFIE_core/inc/SOFIE/ROperator_Einsum.hxx rename to core/inc/SOFIE/ROperator_Einsum.hxx diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Elu.hxx b/core/inc/SOFIE/ROperator_Elu.hxx similarity index 100% rename from src/SOFIE_core/inc/SOFIE/ROperator_Elu.hxx rename to core/inc/SOFIE/ROperator_Elu.hxx diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Erf.hxx b/core/inc/SOFIE/ROperator_Erf.hxx similarity index 100% rename from src/SOFIE_core/inc/SOFIE/ROperator_Erf.hxx rename to core/inc/SOFIE/ROperator_Erf.hxx diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Expand.hxx b/core/inc/SOFIE/ROperator_Expand.hxx similarity index 95% rename from src/SOFIE_core/inc/SOFIE/ROperator_Expand.hxx rename to core/inc/SOFIE/ROperator_Expand.hxx index bbcb916..95955ed 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Expand.hxx +++ b/core/inc/SOFIE/ROperator_Expand.hxx @@ -158,7 +158,7 @@ public: // No need to broadcast A if it's an initialized tensor or shapes are the same if (!fInitialized && fShapeX != fShapeY) { out << SP << "// Broadcasting uninitialized tensor " << fNX << "\n"; - out << SP << "TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast(tensor_" << fNX << ", " << ConvertDimShapeToString(fShapeX) << ", " << ConvertDimShapeToString(fShapeY) + out << SP << "SOFIE::UTILITY::UnidirectionalBroadcast(tensor_" << fNX << ", " << ConvertDimShapeToString(fShapeX) << ", " << ConvertDimShapeToString(fShapeY) << ", tensor_"<(1));\n"; out << SP << "auto const elementsPerGrid_" << opName << " = Vec::all(Idx{" << totalElements << "});\n"; - out << SP << "alpaka::KernelCfg const kernelCfg_" << opName - << " = {elementsPerGrid_" << opName << ", elementsPerThread_" << opName << "};\n"; - out << SP << "auto const workDiv_" << opName << " = alpaka::getValidWorkDiv(kernelCfg_" << opName - << ", devAcc, " << kname - << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" - << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" - << ", static_cast(" << totalElements << "));\n"; + out << SP << "auto const workDiv_" << opName << " = sofie_workdiv(elementsPerGrid_" << opName << ");\n"; out << SP << "auto task_" << opName << " = alpaka::createTaskKernel(workDiv_" << opName << ", " << kname << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_EyeLike.hxx b/core/inc/SOFIE/ROperator_EyeLike.hxx similarity index 100% rename from src/SOFIE_core/inc/SOFIE/ROperator_EyeLike.hxx rename to core/inc/SOFIE/ROperator_EyeLike.hxx diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_GRU.hxx b/core/inc/SOFIE/ROperator_GRU.hxx similarity index 100% rename from src/SOFIE_core/inc/SOFIE/ROperator_GRU.hxx rename to core/inc/SOFIE/ROperator_GRU.hxx diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_GRU.icc b/core/inc/SOFIE/ROperator_GRU.icc similarity index 100% rename from src/SOFIE_core/inc/SOFIE/ROperator_GRU.icc rename to core/inc/SOFIE/ROperator_GRU.icc diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Gather.hxx b/core/inc/SOFIE/ROperator_Gather.hxx similarity index 96% rename from src/SOFIE_core/inc/SOFIE/ROperator_Gather.hxx rename to core/inc/SOFIE/ROperator_Gather.hxx index e3cd58d..3c16f18 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Gather.hxx +++ b/core/inc/SOFIE/ROperator_Gather.hxx @@ -382,14 +382,7 @@ std::string Generate_GPU_ALPAKA(std::string opName) override { out << "\n//------ GATHER_GPU_ALPAKA\n"; out << SP << "auto const elementsPerThread_" << opName << " = Vec::all(static_cast(1));\n"; out << SP << "auto const elementsPerGrid_" << opName << " = Vec::all(Idx{" << totalElements << "});\n"; - out << SP << "alpaka::KernelCfg const kernelCfg_" << opName - << " = {elementsPerGrid_" << opName << ", elementsPerThread_" << opName << "};\n"; - out << SP << "auto const workDiv_" << opName << " = alpaka::getValidWorkDiv(kernelCfg_" << opName - << ", devAcc, " << kname - << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" - << ", alpaka::getPtrNative(deviceBuf_" << fNIndices << ")" - << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" - << ", static_cast(" << totalElements << "));\n"; + out << SP << "auto const workDiv_" << opName << " = sofie_workdiv(elementsPerGrid_" << opName << ");\n"; out << SP << "auto task_" << opName << " = alpaka::createTaskKernel(workDiv_" << opName << ", " << kname << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_GatherND.hxx b/core/inc/SOFIE/ROperator_GatherND.hxx similarity index 95% rename from src/SOFIE_core/inc/SOFIE/ROperator_GatherND.hxx rename to core/inc/SOFIE/ROperator_GatherND.hxx index 8612368..ffcdab8 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_GatherND.hxx +++ b/core/inc/SOFIE/ROperator_GatherND.hxx @@ -280,14 +280,7 @@ public: out << "\n//------ GATHERND_GPU_ALPAKA\n"; out << SP << "auto const elementsPerThread_" << opName << " = Vec::all(static_cast(1));\n"; out << SP << "auto const elementsPerGrid_" << opName << " = Vec::all(Idx{" << totalElements << "});\n"; - out << SP << "alpaka::KernelCfg const kernelCfg_" << opName - << " = {elementsPerGrid_" << opName << ", elementsPerThread_" << opName << "};\n"; - out << SP << "auto const workDiv_" << opName << " = alpaka::getValidWorkDiv(kernelCfg_" << opName - << ", devAcc, " << kname - << ", alpaka::getPtrNative(deviceBuf_" << fNData << ")" - << ", alpaka::getPtrNative(deviceBuf_" << fNIndices << ")" - << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" - << ", static_cast(" << totalElements << "));\n"; + out << SP << "auto const workDiv_" << opName << " = sofie_workdiv(elementsPerGrid_" << opName << ");\n"; out << SP << "alpaka::exec(queue, workDiv_" << opName << ", " << kname << ", alpaka::getPtrNative(deviceBuf_" << fNData << ")" diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx b/core/inc/SOFIE/ROperator_Gemm.hxx similarity index 91% rename from src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx rename to core/inc/SOFIE/ROperator_Gemm.hxx index d340776..c1c6c1c 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Gemm.hxx +++ b/core/inc/SOFIE/ROperator_Gemm.hxx @@ -57,6 +57,7 @@ namespace SOFIE{ "TMVA::SOFIE - Unsupported type parsing a Gemm operator"); fInputTensorNames = { fNA, fNB }; fOutputTensorNames = { fNY }; + fKind = OperatorKind::GEMM; } ROperator_Gemm(float alpha, float beta, int_t transA, int_t transB, std::string nameA, std::string nameB, std::string nameC, std::string nameY, EActivationType activation=EActivationType::UNDEFINED): @@ -68,6 +69,7 @@ namespace SOFIE{ fInputTensorNames = {fNA, fNB, fNC}; fOutputTensorNames = { fNY }; + fKind = OperatorKind::GEMM; } std::vector TypeInference(std::vector input) override { @@ -446,7 +448,7 @@ namespace SOFIE{ else out << "j;\n"; - std::string prefix = SP2 + SP + "TMVA::Experimental::SOFIE::"; + std::string prefix = SP2 + SP + "SOFIE::"; std::string target = "tensor_" + fNY; if (sC.size() != 2) { throw std::runtime_error("SOFIE Gemm Op - invalid rank for bias tensor " + ConvertDimShapeToString(fDimShapeC) + ConvertDimShapeToString(sC)); @@ -476,7 +478,7 @@ namespace SOFIE{ if (fType == "float"){ - out << SP2 << "TMVA::Experimental::SOFIE::Gemm_Call(" << "tensor_" << fNY; + out << SP2 << "SOFIE::Gemm_Call(" << "tensor_" << fNY; if (doStackMul) out << " + " << opName << "_y_offset"; out << ", " << (fAttrTransB ? "true, " : "false, ") @@ -517,7 +519,7 @@ namespace SOFIE{ out << SP << "//--- applying RELU to output\n"; std::string tnsr = "tensor_" + fNY; std::string reluSize = ConvertDimShapeToLength(fShapeY); - out << SP << "TMVA::Experimental::SOFIE::Relu(" << tnsr << ", " << tnsr << ", " << reluSize << ");\n"; + out << SP << "SOFIE::Relu(" << tnsr << ", " << tnsr << ", " << reluSize << ");\n"; } return out.str(); @@ -526,7 +528,7 @@ namespace SOFIE{ std::string Generate_GPU_ALPAKA(std::string opName) override { opName = "op_" + opName; - if (fShapeA.empty() || fShapeB.empty() || fShapeY.empty() || (fNC != "" && fShapeC.empty())) { + if (fShapeA.empty() || fShapeB.empty() || fShapeY.empty() || (fNC != "" && fDimShapeC.empty())) { throw std::runtime_error("SOFIE Gemm Op called to Generate without being initialized first"); } std::stringstream out; @@ -565,44 +567,73 @@ namespace SOFIE{ if (!fNC.empty()){ if (!fBroadcastBias) { // add a check in case broadcasting was not needed or done outside of session - // C should have smaller dimension of Y + // C should have same size as Y if (!fIsDynamic) { if (std::stoi(lengthGemm) != static_cast(ConvertShapeToLength(fShapeC))) throw std::runtime_error("SOFIE Gemm Op " + opName + " Bias tensor has not correct size " - + ConvertShapeToString(fShapeC) + " output length " + lengthGemm); + + ConvertDimShapeToString(fDimShapeC) + " output length " + lengthGemm); } else { - // add a dynamic check (C should not be a dynamic tensor) - out << SP << "assert(" << lengthGemm << " != " << ConvertShapeToLength(fShapeC) << ");\n"; + // add a dynamic check (C should equal output size) + out << SP << "assert(" << lengthGemm << " == " << ConvertDimShapeToLength(fDimShapeC) << ");\n"; } } } else { + fBroadcastBias = false; //in this case fAttrBeta needs to be equal to zero otherwise second time we run we will use // the previous result if (fAttrBeta != 0) { - throw std::runtime_error("SOFIE Gemm Op " + opName + " Bias tensor is not present but beta value in Gemm is not zero"); + // some model don't have bias but Beta is not zero - force it to zero + fAttrBeta = 0; + std::cout << "WARNING: SOFIE Gemm Op " + opName + " Bias tensor is not present but beta value in Gemm is not zero - force it to zero\n"; } } // include MatMul case where we stack the Gemm operations // exclude case where we have only 1's in the additional dims bool doStackMul = dimY > 2 && ( fIsDynamic || std::stoi(lengthExtra) > 1); - if (doStackMul) { - out << SP << "size_t " << opName << "_yoffset = 0;\n"; // needed if we stack the gemm operations + + // Compute per-iteration strides for each buffer when stacking. + // m/n/k are std::string from Dim::GetVal(); stoi() is safe for static shapes. + size_t strideA = 0, strideB = 0, strideY = 0, strideC = 0; + if (doStackMul && !fIsDynamic) { + strideA = static_cast(std::stoi(m)) * static_cast(std::stoi(k)); + strideB = static_cast(std::stoi(n)) * static_cast(std::stoi(k)); + strideY = static_cast(std::stoi(m)) * static_cast(std::stoi(n)); + strideC = !fNC.empty() ? static_cast(std::stoi(lengthGemm)) : 0; + out << SP << "size_t " << opName << "_yoffset = 0;\n"; + out << SP << "for (int i = 0; i < " << lengthExtra << "; i++){\n"; + } else if (doStackMul) { + // Dynamic case: emit symbolic stride expressions + out << SP << "size_t " << opName << "_yoffset = 0;\n"; out << SP << "for (int i = 0; i < " << lengthExtra << "; i++){\n"; - out << SP; } + // in the case of bias if (!fNC.empty()){ // Use getPtrNative() for all args so the raw-pointer overload is selected regardless // of whether each buffer is a BufXxx (member weight/bias/output) or ViewPlainPtr (input view). + std::string pA = "alpaka::getPtrNative(deviceBuf_" + fNA + ")"; + std::string pB = "alpaka::getPtrNative(deviceBuf_" + fNB + ")"; + std::string pC = "alpaka::getPtrNative(deviceBuf_" + fNC + ")"; + std::string pY = "alpaka::getPtrNative(deviceBuf_" + fNY + ")"; + if (doStackMul && !fIsDynamic) { + pA += " + i * " + std::to_string(strideA); + pB += " + i * " + std::to_string(strideB); + pY += " + i * " + std::to_string(strideY); + if (!fBroadcastBias && strideC > 0) pC += " + i * " + std::to_string(strideC); + } if (fActivation == EActivationType::RELU){ - out << SP << "blas.gemmrelu("<(1));\n"; out << SP << "auto const elementsPerGrid_" << opName << " = Vec::all(Idx{" << axesLengthStr << "});\n"; - out << SP << "alpaka::KernelCfg const kernelCfg_" << opName - << " = {elementsPerGrid_" << opName << ", elementsPerThread_" << opName << "};\n"; - // Build argument list std::string args = "alpaka::getPtrNative(deviceBuf_" + fNX + "), " @@ -551,8 +548,7 @@ public: args += ", alpaka::getPtrNative(deviceBuf_" + fNY + ")"; args += ", static_cast(" + axesLengthStr + ")"; - out << SP << "auto const workDiv_" << opName << " = alpaka::getValidWorkDiv(kernelCfg_" << opName - << ", devAcc, " << kname << ", " << args << ");\n"; + out << SP << "auto const workDiv_" << opName << " = sofie_workdiv(elementsPerGrid_" << opName << ");\n"; out << SP << "alpaka::exec(queue, workDiv_" << opName << ", " << kname << ", " << args << ");\n"; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_LeakyRelu.hxx b/core/inc/SOFIE/ROperator_LeakyRelu.hxx similarity index 92% rename from src/SOFIE_core/inc/SOFIE/ROperator_LeakyRelu.hxx rename to core/inc/SOFIE/ROperator_LeakyRelu.hxx index c0e80aa..81fdb09 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_LeakyRelu.hxx +++ b/core/inc/SOFIE/ROperator_LeakyRelu.hxx @@ -107,9 +107,7 @@ public: out << SP << "constexpr float " << OpName << "_alpha = " << std::setprecision(std::numeric_limits::max_digits10) << falpha << ";\n"; out << SP << "auto const elementsPerThread_"<(1));\n"; out << SP << "auto const elementsPerGrid_"< const kernelCfg_" << fNX << " = {elementsPerGrid_" << fNX << ", elementsPerThread_" << fNX << "};\n"; - out << SP << "auto const workDiv_" << fNX << " = alpaka::getValidWorkDiv(kernelCfg_" << fNX << ", devAcc, leakyReluKernel, alpaka::getPtrNative(deviceBuf_" << fNX - << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), static_cast(" << length << "), " << OpName << "_alpha);\n"; + out << SP << "auto const workDiv_" << fNX << " = sofie_workdiv(elementsPerGrid_" << fNX << ");\n"; out << SP << "auto task_" << OpName << " = alpaka::createTaskKernel(workDiv_" << fNX << ", leakyReluKernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), static_cast(" << length << "), " << OpName << "_alpha);\n"; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Not.hxx b/core/inc/SOFIE/ROperator_Not.hxx similarity index 87% rename from src/SOFIE_core/inc/SOFIE/ROperator_Not.hxx rename to core/inc/SOFIE/ROperator_Not.hxx index 02b5cb4..c04ab1f 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Not.hxx +++ b/core/inc/SOFIE/ROperator_Not.hxx @@ -88,17 +88,10 @@ public: out << "\n//------ " << opName << "_ALPAKA\n"; out << SP << "auto const elementsPerThread_" << fNY << " = Vec::all(static_cast(1));\n"; out << SP << "auto const elementsPerGrid_" << fNY << " = Vec::all(Idx{" << length << "});\n"; - out << SP << "alpaka::KernelCfg const kernelCfg_" << fNY - << " = {elementsPerGrid_" << fNY << ", elementsPerThread_" << fNY << "};\n"; - out << SP << "auto const workDiv_" << fNY - << " = alpaka::getValidWorkDiv(kernelCfg_" << fNY << ", devAcc, " - << "NotKernel" - << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" - << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" - << ", " << length << ");\n"; + out << SP << "auto const workDiv_" << fNY << " = sofie_workdiv(elementsPerGrid_" << fNY << ");\n"; out << SP << "auto task_" << opName << " = alpaka::createTaskKernel(workDiv_" << fNY - << ", " << "NotKernel" + << ", " << "notKernel" << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" << ", " << length << ");\n"; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Pad.hxx b/core/inc/SOFIE/ROperator_Pad.hxx similarity index 100% rename from src/SOFIE_core/inc/SOFIE/ROperator_Pad.hxx rename to core/inc/SOFIE/ROperator_Pad.hxx diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Pool.hxx b/core/inc/SOFIE/ROperator_Pool.hxx similarity index 100% rename from src/SOFIE_core/inc/SOFIE/ROperator_Pool.hxx rename to core/inc/SOFIE/ROperator_Pool.hxx diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_RNN.hxx b/core/inc/SOFIE/ROperator_RNN.hxx similarity index 100% rename from src/SOFIE_core/inc/SOFIE/ROperator_RNN.hxx rename to core/inc/SOFIE/ROperator_RNN.hxx diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_RNN.icc b/core/inc/SOFIE/ROperator_RNN.icc similarity index 100% rename from src/SOFIE_core/inc/SOFIE/ROperator_RNN.icc rename to core/inc/SOFIE/ROperator_RNN.icc diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Random.hxx b/core/inc/SOFIE/ROperator_Random.hxx similarity index 100% rename from src/SOFIE_core/inc/SOFIE/ROperator_Random.hxx rename to core/inc/SOFIE/ROperator_Random.hxx diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Range.hxx b/core/inc/SOFIE/ROperator_Range.hxx similarity index 100% rename from src/SOFIE_core/inc/SOFIE/ROperator_Range.hxx rename to core/inc/SOFIE/ROperator_Range.hxx diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Reduce.hxx b/core/inc/SOFIE/ROperator_Reduce.hxx similarity index 96% rename from src/SOFIE_core/inc/SOFIE/ROperator_Reduce.hxx rename to core/inc/SOFIE/ROperator_Reduce.hxx index 34e9819..e0d2b7b 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Reduce.hxx +++ b/core/inc/SOFIE/ROperator_Reduce.hxx @@ -274,7 +274,7 @@ public: std::size_t outputLength = ConvertShapeToLength(fShapeY); std::size_t reducedLength = inputLength / outputLength; - std::string kname = "ReduceKernel_" + Name(); + std::string kname = "ReduceKernel_" + Name() + "_" + fNY; std::string op; op = "\n//------ " + Name() + "_KERNEL_ALPAKA\n"; @@ -353,8 +353,8 @@ public: } std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string /*opName*/) override { - std::string kname = "ReduceKernel_" + Name(); - return SP + kname + " reduceKernel_" + Name() + ";\n"; + std::string kname = "ReduceKernel_" + Name() + "_" + fNY; + return SP + kname + " reduceKernel_" + Name() + "_" + fNY + ";\n"; } std::string Generate_GPU_ALPAKA(std::string /*opName*/) override { @@ -362,19 +362,13 @@ public: throw std::runtime_error("SOFIE Reduce Op called to Generate without being initialized first"); std::size_t outputLength = ConvertShapeToLength(fShapeY); - std::string kname = "reduceKernel_" + Name(); + std::string kname = "reduceKernel_" + Name() + "_" + fNY; std::stringstream out; out << "\n//------ " << Name() << "_GPU_ALPAKA\n"; out << SP << "auto const elementsPerThread_" << fNY << " = Vec::all(static_cast(1));\n"; out << SP << "auto const elementsPerGrid_" << fNY << " = Vec::all(Idx{" << outputLength << "});\n"; - out << SP << "alpaka::KernelCfg const kernelCfg_" << fNY - << " = {elementsPerGrid_" << fNY << ", elementsPerThread_" << fNY << "};\n"; - out << SP << "auto const workDiv_" << fNY << " = alpaka::getValidWorkDiv(kernelCfg_" << fNY - << ", devAcc, " << kname - << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" - << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" - << ", static_cast(" << outputLength << "));\n"; + out << SP << "auto const workDiv_" << fNY << " = sofie_workdiv(elementsPerGrid_" << fNY << ");\n"; out << SP << "alpaka::exec(queue, workDiv_" << fNY << ", " << kname << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx b/core/inc/SOFIE/ROperator_Relu.hxx similarity index 91% rename from src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx rename to core/inc/SOFIE/ROperator_Relu.hxx index 0ced730..96d5931 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Relu.hxx +++ b/core/inc/SOFIE/ROperator_Relu.hxx @@ -98,9 +98,7 @@ public: out << "\n//------ RELU_GPU_ALPAKA\n"; out << SP << "auto const elementsPerThread_"<(1));\n"; out << SP << "auto const elementsPerGrid_"< const kernelCfg_" << fNY << " = {elementsPerGrid_" << fNY << ", elementsPerThread_" << fNY << "};\n"; - out << SP << "auto const workDiv_" << fNY << " = alpaka::getValidWorkDiv(kernelCfg_" << fNY << ", devAcc, reluKernel, alpaka::getPtrNative(deviceBuf_" << fNX - << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), static_cast(" << length << "));\n"; + out << SP << "auto const workDiv_" << fNY << " = sofie_workdiv(elementsPerGrid_" << fNY << ");\n"; out << SP << "auto task_" << OpName << " = alpaka::createTaskKernel(workDiv_" << fNY << ", reluKernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), static_cast(" << length << "));\n"; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Reshape.hxx b/core/inc/SOFIE/ROperator_Reshape.hxx similarity index 98% rename from src/SOFIE_core/inc/SOFIE/ROperator_Reshape.hxx rename to core/inc/SOFIE/ROperator_Reshape.hxx index 9362151..56554dd 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Reshape.hxx +++ b/core/inc/SOFIE/ROperator_Reshape.hxx @@ -402,6 +402,15 @@ std::string Generate_GPU_ALPAKA(std::string opName) override { opName = "op_" + opName; + if (fIsOutputParamShape) { + // shape tensor output: fill host-side tensor values, no device copy needed + std::stringstream out; + for (int i = 0; i < static_cast(fShapeOutput[0].dim); i++) { + out << SP << "tensor_" << fNOutput << "[" << i << "] = " << fOutputShapeData[i].GetVal() << ";\n"; + } + return out.str(); + } + std::string opType = "Reshape"; if (fOpMode == Flatten) opType = "Flatten"; else if (fOpMode == Squeeze) opType = "Squeeze"; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_ScatterElements.hxx b/core/inc/SOFIE/ROperator_ScatterElements.hxx similarity index 95% rename from src/SOFIE_core/inc/SOFIE/ROperator_ScatterElements.hxx rename to core/inc/SOFIE/ROperator_ScatterElements.hxx index 6bb2ac3..b69ee71 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_ScatterElements.hxx +++ b/core/inc/SOFIE/ROperator_ScatterElements.hxx @@ -261,12 +261,7 @@ std::string Generate_GPU_ALPAKA(std::string opName) override { out << SP << "auto const elementsPerThread_" << opName << " = Vec::all(static_cast(1));\n"; out << SP << "auto const elementsPerGrid_" << opName << " = Vec::all(Idx{" << totalElements << "});\n"; - out << SP << "alpaka::KernelCfg const kernelCfg_" << opName << " = {elementsPerGrid_" << opName << ", elementsPerThread_" << opName << "};\n"; - out << SP << "auto const workDiv_" << opName << " = alpaka::getValidWorkDiv(kernelCfg_" << opName << ", devAcc, scatterElementsKernel_" << opName - << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" - << ", alpaka::getPtrNative(deviceBuf_" << fNI << ")" - << ", alpaka::getPtrNative(deviceBuf_" << fNU << ")" - << ", static_cast(" << totalElements << "));\n"; + out << SP << "auto const workDiv_" << opName << " = sofie_workdiv(elementsPerGrid_" << opName << ");\n"; out << SP << "auto task_" << opName << " = alpaka::createTaskKernel(workDiv_" << opName << ", scatterElementsKernel_" << opName << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Selu.hxx b/core/inc/SOFIE/ROperator_Selu.hxx similarity index 100% rename from src/SOFIE_core/inc/SOFIE/ROperator_Selu.hxx rename to core/inc/SOFIE/ROperator_Selu.hxx diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Shape.hxx b/core/inc/SOFIE/ROperator_Shape.hxx similarity index 100% rename from src/SOFIE_core/inc/SOFIE/ROperator_Shape.hxx rename to core/inc/SOFIE/ROperator_Shape.hxx diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx b/core/inc/SOFIE/ROperator_Sigmoid.hxx similarity index 91% rename from src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx rename to core/inc/SOFIE/ROperator_Sigmoid.hxx index 58355bc..aa9aa09 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Sigmoid.hxx +++ b/core/inc/SOFIE/ROperator_Sigmoid.hxx @@ -93,9 +93,7 @@ public: out << "\n//------ SIGMOID_GPU_ALPAKA\n"; out << SP << "auto const elementsPerThread_"<(1));\n"; out << SP << "auto const elementsPerGrid_"< const kernelCfg_" << fNX << " = {elementsPerGrid_" << fNX << ", elementsPerThread_" << fNX << "};\n"; - out << SP << "auto const workDiv_" << fNX << " = alpaka::getValidWorkDiv(kernelCfg_" << fNX << ", devAcc, sigmoidKernel, alpaka::getPtrNative(deviceBuf_" << fNX - << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), static_cast(" << length << "));\n"; + out << SP << "auto const workDiv_" << fNX << " = sofie_workdiv(elementsPerGrid_" << fNX << ");\n"; out << SP << "auto task_" << OpName << " = alpaka::createTaskKernel(workDiv_" << fNX << ", sigmoidKernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), static_cast(" << length << "));\n"; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Slice.hxx b/core/inc/SOFIE/ROperator_Slice.hxx similarity index 97% rename from src/SOFIE_core/inc/SOFIE/ROperator_Slice.hxx rename to core/inc/SOFIE/ROperator_Slice.hxx index ba3bbf9..fb738cf 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Slice.hxx +++ b/core/inc/SOFIE/ROperator_Slice.hxx @@ -526,8 +526,8 @@ public: for (std::size_t d = 0; d < D; ++d) { op += SP + SP + SP + SP + "std::size_t const out_" + std::to_string(d) - + " = (elem_idx / " + std::to_string(outputStrides[d]) + "u) % " - + std::to_string(fShapeOutput[d]) + "u;\n"; + + " = (elem_idx / " + outputStrides[d].GetVal() + "u) % " + + fShapeOutput[d].GetVal() + "u;\n"; } op += "\n"; @@ -538,12 +538,12 @@ public: op += SP + SP + SP + SP + "std::size_t const input_idx =\n"; for (std::size_t d = 0; d < D; ++d) { // input coordinate for this dim: start + out_d * step - std::string input_coord = "(" + std::to_string(fStart[d]) + std::string input_coord = "(" + fStart[d].GetVal() + " + out_" + std::to_string(d) - + " * " + std::to_string(fSteps[d]) + ")"; + + " * " + fSteps[d].GetVal() + ")"; op += SP + SP + SP + SP + SP + "static_cast(" + input_coord + ")" - + " * " + std::to_string(inputStrides[d]) + "u"; + + " * " + inputStrides[d].GetVal() + "u"; op += (d + 1 < D) ? " +\n" : ";\n\n"; } @@ -574,13 +574,7 @@ public: out << "\n//------ SLICE_GPU_ALPAKA\n"; out << SP << "auto const elementsPerThread_" << opName << " = Vec::all(static_cast(1));\n"; out << SP << "auto const elementsPerGrid_" << opName << " = Vec::all(Idx{" << totalElements << "});\n"; - out << SP << "alpaka::KernelCfg const kernelCfg_" << opName - << " = {elementsPerGrid_" << opName << ", elementsPerThread_" << opName << "};\n"; - out << SP << "auto const workDiv_" << opName << " = alpaka::getValidWorkDiv(kernelCfg_" << opName - << ", devAcc, " << kname - << ", alpaka::getPtrNative(deviceBuf_" << fNData << ")" - << ", alpaka::getPtrNative(deviceBuf_" << fNOutput << ")" - << ", static_cast(" << totalElements << "));\n"; + out << SP << "auto const workDiv_" << opName << " = sofie_workdiv(elementsPerGrid_" << opName << ");\n"; out << SP << "alpaka::exec(queue, workDiv_" << opName << ", " << kname << ", alpaka::getPtrNative(deviceBuf_" << fNData << ")" diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Softmax.hxx b/core/inc/SOFIE/ROperator_Softmax.hxx similarity index 100% rename from src/SOFIE_core/inc/SOFIE/ROperator_Softmax.hxx rename to core/inc/SOFIE/ROperator_Softmax.hxx diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx b/core/inc/SOFIE/ROperator_Split.hxx similarity index 95% rename from src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx rename to core/inc/SOFIE/ROperator_Split.hxx index 8e128e4..9604ca8 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Split.hxx +++ b/core/inc/SOFIE/ROperator_Split.hxx @@ -240,13 +240,7 @@ std::string Generate_GPU_ALPAKA(std::string opName) override { out << SP << "{\n"; out << SP << SP << "auto const elementsPerThread_" << i << " = Vec::all(static_cast(1));\n"; out << SP << SP << "auto const elementsPerGrid_" << i << " = Vec::all(Idx{" << length << "});\n"; - out << SP << SP << "alpaka::KernelCfg const kernelCfg_" << i - << " = {elementsPerGrid_" << i << ", elementsPerThread_" << i << "};\n"; - out << SP << SP << "auto const workDiv_" << i << " = alpaka::getValidWorkDiv(kernelCfg_" << i - << ", devAcc, " << kname - << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" - << ", alpaka::getPtrNative(deviceBuf_" << fNYs[i] << ")" - << ", static_cast(" << length << "));\n"; + out << SP << SP << "auto const workDiv_" << i << " = sofie_workdiv(elementsPerGrid_" << i << ");\n"; out << SP << SP << "auto task_" << opName << "_" << i << " = alpaka::createTaskKernel(workDiv_" << i << ", " << kname << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_SubGraph.hxx b/core/inc/SOFIE/ROperator_SubGraph.hxx similarity index 100% rename from src/SOFIE_core/inc/SOFIE/ROperator_SubGraph.hxx rename to core/inc/SOFIE/ROperator_SubGraph.hxx diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Swish.hxx b/core/inc/SOFIE/ROperator_Swish.hxx similarity index 100% rename from src/SOFIE_core/inc/SOFIE/ROperator_Swish.hxx rename to core/inc/SOFIE/ROperator_Swish.hxx diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Tanh.hxx b/core/inc/SOFIE/ROperator_Tanh.hxx similarity index 90% rename from src/SOFIE_core/inc/SOFIE/ROperator_Tanh.hxx rename to core/inc/SOFIE/ROperator_Tanh.hxx index 9fcb60a..9408cd5 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Tanh.hxx +++ b/core/inc/SOFIE/ROperator_Tanh.hxx @@ -97,9 +97,7 @@ public: out << "\n//------ TANH_GPU_ALPAKA\n"; out << SP << "auto const elementsPerThread_"<(1));\n"; out << SP << "auto const elementsPerGrid_"< const kernelCfg_" << fNX << " = {elementsPerGrid_" << fNX << ", elementsPerThread_" << fNX << "};\n"; - out << SP << "auto const workDiv_" << fNX << " = alpaka::getValidWorkDiv(kernelCfg_" << fNX << ", devAcc, tanhKernel, alpaka::getPtrNative(deviceBuf_" << fNX - << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), static_cast(" << length << "));\n"; + out << SP << "auto const workDiv_" << fNX << " = sofie_workdiv(elementsPerGrid_" << fNX << ");\n"; out << SP << "auto task_" << OpName << " = alpaka::createTaskKernel(workDiv_" << fNX << ", tanhKernel, alpaka::getPtrNative(deviceBuf_" << fNX << "), alpaka::getPtrNative(deviceBuf_" << fNY << "), static_cast(" << length << "));\n"; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Tile.hxx b/core/inc/SOFIE/ROperator_Tile.hxx similarity index 97% rename from src/SOFIE_core/inc/SOFIE/ROperator_Tile.hxx rename to core/inc/SOFIE/ROperator_Tile.hxx index f060047..5a3921e 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Tile.hxx +++ b/core/inc/SOFIE/ROperator_Tile.hxx @@ -235,10 +235,7 @@ public: out << "\n//------ TILE_GPU_ALPAKA\n"; out << SP << "auto const elementsPerThread_" << opName << " = Vec::all(static_cast(1));\n"; out << SP << "auto const elementsPerGrid_" << opName << " = Vec::all(Idx{" << totalElements << "});\n"; - out << SP << "alpaka::KernelCfg const kernelCfg_" << opName - << " = {elementsPerGrid_" << opName << ", elementsPerThread_" << opName << "};\n"; - out << SP << "auto const workDiv_" << opName << " = alpaka::getValidWorkDiv(kernelCfg_" << opName - << ", devAcc, " << kname << ", " << args << ");\n"; + out << SP << "auto const workDiv_" << opName << " = sofie_workdiv(elementsPerGrid_" << opName << ");\n"; out << SP << "auto task_" << opName << " = alpaka::createTaskKernel(workDiv_" << opName << ", " << kname << ", " << args << ");\n"; out << SP <<"alpaka::enqueue(queue, task_" << opName << ");\n"; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_TopK.hxx b/core/inc/SOFIE/ROperator_TopK.hxx similarity index 100% rename from src/SOFIE_core/inc/SOFIE/ROperator_TopK.hxx rename to core/inc/SOFIE/ROperator_TopK.hxx diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx b/core/inc/SOFIE/ROperator_Transpose.hxx similarity index 95% rename from src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx rename to core/inc/SOFIE/ROperator_Transpose.hxx index a7f3e46..83508d0 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Transpose.hxx +++ b/core/inc/SOFIE/ROperator_Transpose.hxx @@ -212,9 +212,7 @@ public: out << "\n//------ TRANSPOSE_GPU_ALPAKA\n"; out << SP << "auto const elementsPerThread_"<(1));\n"; out << SP << "auto const elementsPerGrid_"< const kernelCfg_" << fNOutput << " = {elementsPerGrid_" << fNOutput << ", elementsPerThread_" << fNOutput << "};\n"; - out << SP << "auto const workDiv_" << fNOutput << " = alpaka::getValidWorkDiv(kernelCfg_" << fNOutput << ", devAcc, transposeKernel_" << OpName << ", alpaka::getPtrNative(deviceBuf_" << fNData - << "), alpaka::getPtrNative(deviceBuf_" << fNOutput << "), static_cast(" << length << "));\n"; + out << SP << "auto const workDiv_" << fNOutput << " = sofie_workdiv(elementsPerGrid_" << fNOutput << ");\n"; out << SP << "auto task_" << OpName << " = alpaka::createTaskKernel(workDiv_" << fNOutput << ", transposeKernel_" << OpName << ", alpaka::getPtrNative(deviceBuf_" << fNData << "), alpaka::getPtrNative(deviceBuf_" << fNOutput << "), static_cast(" << length << "));\n"; diff --git a/src/SOFIE_core/inc/SOFIE/ROperator_Where.hxx b/core/inc/SOFIE/ROperator_Where.hxx similarity index 95% rename from src/SOFIE_core/inc/SOFIE/ROperator_Where.hxx rename to core/inc/SOFIE/ROperator_Where.hxx index 4d7e4f7..b9956e9 100644 --- a/src/SOFIE_core/inc/SOFIE/ROperator_Where.hxx +++ b/core/inc/SOFIE/ROperator_Where.hxx @@ -482,20 +482,20 @@ public: std::string Generate_GPU_Kernel_ALPAKA(std::string opName) override { if (fIsOutputConstant) return ""; opName = "op_" + opName; - if (fShapeY.empty()) + if (fShapeZ.empty()) throw std::runtime_error("SOFIE Where Op called to Generate without being initialized first"); - const std::size_t D = fShapeY.size(); - std::size_t totalElements = ConvertShapeToLength(fShapeY); + const std::size_t D = fShapeZ.size(); + std::size_t totalElements = ConvertShapeToLength(fShapeZ); std::vector shapeA_padded(D, 1); std::vector shapeB_padded(D, 1); std::vector shapeC_padded(D, 1); { - size_t offA = D - fShapeA.size(); - for (size_t i = 0; i < fShapeA.size(); ++i) shapeA_padded[offA + i] = fShapeA[i]; - size_t offB = D - fShapeB.size(); - for (size_t i = 0; i < fShapeB.size(); ++i) shapeB_padded[offB + i] = fShapeB[i]; + size_t offA = D - fShapeX.size(); + for (size_t i = 0; i < fShapeX.size(); ++i) shapeA_padded[offA + i] = fShapeX[i]; + size_t offB = D - fShapeY.size(); + for (size_t i = 0; i < fShapeY.size(); ++i) shapeB_padded[offB + i] = fShapeY[i]; size_t offC = D - fShapeC.size(); for (size_t i = 0; i < fShapeC.size(); ++i) shapeC_padded[offC + i] = fShapeC[i]; } @@ -503,7 +503,7 @@ public: auto stridesA = UTILITY::ComputeStrideFromShape(shapeA_padded); auto stridesB = UTILITY::ComputeStrideFromShape(shapeB_padded); auto stridesC = UTILITY::ComputeStrideFromShape(shapeC_padded); - auto stridesY = UTILITY::ComputeStrideFromShape(fShapeY); + auto stridesZ = UTILITY::ComputeStrideFromShape(fShapeZ); std::string typeName = TensorType::Name(); std::string kname = "WhereKernel_" + opName; @@ -514,9 +514,9 @@ public: op += SP + SP + "template\n"; op += SP + SP + "ALPAKA_FN_ACC void operator()(\n"; op += SP + SP + SP + "TAcc const& acc,\n"; - op += SP + SP + SP + "uint8_t const* __restrict__ cond,\n"; op += SP + SP + SP + "T const* __restrict__ x,\n"; op += SP + SP + SP + "T const* __restrict__ y,\n"; + op += SP + SP + SP + "uint8_t const* __restrict__ cond,\n"; op += SP + SP + SP + "T* __restrict__ output,\n"; op += SP + SP + SP + "std::size_t const totalElements) const {\n\n"; @@ -528,8 +528,8 @@ public: for (std::size_t d = 0; d < D; ++d) { op += SP + SP + SP + SP + "std::size_t const out_" + std::to_string(d) - + " = (elem_idx / " + std::to_string(stridesY[d]) + "u) % " - + std::to_string(fShapeY[d]) + "u;\n"; + + " = (elem_idx / " + std::to_string(stridesZ[d]) + "u) % " + + std::to_string(fShapeZ[d]) + "u;\n"; } op += "\n"; @@ -584,31 +584,23 @@ public: std::string Generate_GPU_ALPAKA(std::string opName) override { if (fIsOutputConstant) return ""; opName = "op_" + opName; - if (fShapeY.empty()) + if (fShapeZ.empty()) throw std::runtime_error("SOFIE Where Op called to Generate without being initialized first"); - std::size_t totalElements = ConvertShapeToLength(fShapeY); + std::size_t totalElements = ConvertShapeToLength(fShapeZ); std::string kname = "whereKernel_" + opName; std::stringstream out; out << "\n//------ WHERE_GPU_ALPAKA\n"; out << SP << "auto const elementsPerThread_" << opName << " = Vec::all(static_cast(1));\n"; out << SP << "auto const elementsPerGrid_" << opName << " = Vec::all(Idx{" << totalElements << "});\n"; - out << SP << "alpaka::KernelCfg const kernelCfg_" << opName - << " = {elementsPerGrid_" << opName << ", elementsPerThread_" << opName << "};\n"; - out << SP << "auto const workDiv_" << opName << " = alpaka::getValidWorkDiv(kernelCfg_" << opName - << ", devAcc, " << kname - << ", alpaka::getPtrNative(deviceBuf_" << fNC << ")" - << ", alpaka::getPtrNative(deviceBuf_" << fNA << ")" - << ", alpaka::getPtrNative(deviceBuf_" << fNB << ")" - << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" - << ", static_cast(" << totalElements << "));\n"; + out << SP << "auto const workDiv_" << opName << " = sofie_workdiv(elementsPerGrid_" << opName << ");\n"; out << SP << "alpaka::exec(queue, workDiv_" << opName << ", " << kname - << ", alpaka::getPtrNative(deviceBuf_" << fNC << ")" - << ", alpaka::getPtrNative(deviceBuf_" << fNA << ")" - << ", alpaka::getPtrNative(deviceBuf_" << fNB << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNX << ")" << ", alpaka::getPtrNative(deviceBuf_" << fNY << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNC << ")" + << ", alpaka::getPtrNative(deviceBuf_" << fNZ << ")" << ", static_cast(" << totalElements << "));\n"; return out.str(); diff --git a/src/SOFIE_core/inc/SOFIE/SOFIEHelpers.hxx b/core/inc/SOFIE/SOFIEHelpers.hxx similarity index 100% rename from src/SOFIE_core/inc/SOFIE/SOFIEHelpers.hxx rename to core/inc/SOFIE/SOFIEHelpers.hxx diff --git a/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx b/core/inc/SOFIE/SOFIE_common.hxx similarity index 97% rename from src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx rename to core/inc/SOFIE/SOFIE_common.hxx index 5ace31b..e36df0a 100644 --- a/src/SOFIE_core/inc/SOFIE/SOFIE_common.hxx +++ b/core/inc/SOFIE/SOFIE_common.hxx @@ -203,6 +203,7 @@ std::vector ConvertShapeToDim(const std::vector & shape); std::vector ConvertShapeToInt(const std::vector & shape); std::size_t ConvertShapeToLength(const std::vector & shape); +std::size_t ConvertShapeToLength(const std::vector & shape); std::string ConvertShapeToString(const std::vector & shape); std::string ConvertDimShapeToString(const std::vector & shape); @@ -732,24 +733,24 @@ struct GNN_Data { }; template -TMVA::Experimental::RTensor Concatenate( TMVA::Experimental::RTensor & t1, TMVA::Experimental::RTensor & t2, int axis = 0) +RTensor Concatenate( RTensor & t1, RTensor & t2, int axis = 0) { // concatenate tensor along axis. Shape must be the same except in the dimension of the concatenated axis if (t1.GetMemoryLayout() != t2.GetMemoryLayout()) - throw std::runtime_error("TMVA RTensor Concatenate - tensors have different memory layout"); + throw std::runtime_error("RTensor Concatenate - tensors have different memory layout"); auto & shape1 = t1.GetShape(); auto & shape2 = t2.GetShape(); if (t1.GetSize()/shape1[axis] != t2.GetSize()/shape2[axis]) { std::cout << "axis " << axis << " sizes " << t1.GetSize() << " " << t2.GetSize() << " "; std::cout << "shape 1 : " << ConvertShapeToString(t1.GetShape()); std::cout << " shape 2 : " << ConvertShapeToString(t2.GetShape()) << std::endl; - throw std::runtime_error("TMVA RTensor Concatenate - tensors have incompatible shapes"); + throw std::runtime_error("RTensor Concatenate - tensors have incompatible shapes"); } std::vector outShape = shape1; outShape[axis] = shape1[axis] + shape2[axis]; - TMVA::Experimental::RTensor tout(outShape, t1.GetMemoryLayout()); - if (t1.GetMemoryLayout() == TMVA::Experimental::MemoryLayout::ColumnMajor) { - throw std::runtime_error("TMVA RTensor Concatenate is not yet supported for column major tensors"); + RTensor tout(outShape, t1.GetMemoryLayout()); + if (t1.GetMemoryLayout() == MemoryLayout::ColumnMajor) { + throw std::runtime_error("RTensor Concatenate is not yet supported for column major tensors"); } auto & stride1 = t1.GetStrides(); @@ -803,8 +804,8 @@ inline void Gemm_Call(float *output, bool transa, bool transb, int m, int n, int if (C != nullptr) { std::copy(C, C + m * n, output); } - TMVA::Experimental::SOFIE::BLAS::sgemm_(transa ? &ct : &cn, transb ? &ct : &cn, &m, &n, &k, &alpha, A, lda, B, ldb, - &beta, output, ldc); + BLAS::sgemm_(transa ? &ct : &cn, transb ? &ct : &cn, &m, &n, &k, &alpha, A, lda, B, ldb, + &beta, output, ldc); } inline void Fill(float *output, float value, int size) @@ -840,11 +841,11 @@ void ReadTensorFromStream(std::istream &is, T &target, std::string const &expect is >> name >> length; if (name != expectedName) { std::string err_msg = - "TMVA-SOFIE failed to read the correct tensor name; expected name is " + expectedName + " , read " + name; + "sofie failed to read the correct tensor name; expected name is " + expectedName + " , read " + name; throw std::runtime_error(err_msg); } if (length != expectedLength) { - std::string err_msg = "TMVA-SOFIE failed to read the correct tensor size; expected size is " + + std::string err_msg = "sofie failed to read the correct tensor size; expected size is " + std::to_string(expectedLength) + " , read " + std::to_string(length); throw std::runtime_error(err_msg); } @@ -854,7 +855,7 @@ void ReadTensorFromStream(std::istream &is, T &target, std::string const &expect target[i] = ParseFloatToken(token); } if (is.fail()) { - throw std::runtime_error("TMVA-SOFIE failed to read the values for tensor " + expectedName); + throw std::runtime_error("sofie failed to read the values for tensor " + expectedName); } } diff --git a/src/SOFIE_core/src/Prototype.cxx b/core/src/Prototype.cxx similarity index 100% rename from src/SOFIE_core/src/Prototype.cxx rename to core/src/Prototype.cxx diff --git a/src/SOFIE_core/src/RFunction.cxx b/core/src/RFunction.cxx similarity index 100% rename from src/SOFIE_core/src/RFunction.cxx rename to core/src/RFunction.cxx diff --git a/src/SOFIE_core/src/RFunction_MLP.cxx b/core/src/RFunction_MLP.cxx similarity index 100% rename from src/SOFIE_core/src/RFunction_MLP.cxx rename to core/src/RFunction_MLP.cxx diff --git a/src/SOFIE_core/src/RFunction_Mean.cxx b/core/src/RFunction_Mean.cxx similarity index 100% rename from src/SOFIE_core/src/RFunction_Mean.cxx rename to core/src/RFunction_Mean.cxx diff --git a/src/SOFIE_core/src/RFunction_Sum.cxx b/core/src/RFunction_Sum.cxx similarity index 100% rename from src/SOFIE_core/src/RFunction_Sum.cxx rename to core/src/RFunction_Sum.cxx diff --git a/src/SOFIE_core/src/RModel.cxx b/core/src/RModel.cxx similarity index 95% rename from src/SOFIE_core/src/RModel.cxx rename to core/src/RModel.cxx index fcb8e6d..6e9267b 100644 --- a/src/SOFIE_core/src/RModel.cxx +++ b/core/src/RModel.cxx @@ -15,7 +15,6 @@ namespace SOFIE { namespace { const std::string SP = " "; -} void ReplaceAll(std::string &str, const std::string &from, const std::string &to) { @@ -52,14 +51,6 @@ std::string TensorMember(std::string const &name) } // namespace -std::underlying_type_t operator|(Options opA, Options opB) { - return static_cast>(opA) | static_cast>(opB); -} -std::underlying_type_t operator|(std::underlying_type_t opA, Options opB) { - return opA | static_cast>(opB); -} - - std::vector RModel::GetTensorShape(const std::string & name) const { auto f = fReadyInputTensorInfos.find(name); if (f != fReadyInputTensorInfos.end()) { @@ -167,7 +158,7 @@ bool RModel::CheckIfTensorAlreadyExist(std::string tensor_name) { void RModel::AddInputTensorInfo(std::string input_name, ETensorType type, std::vector shape) { input_name = UTILITY::Clean_name(input_name); if (CheckIfTensorAlreadyExist(input_name)) { - throw std::runtime_error("TMVA-SOFIE: input tensor with name " + input_name + " already exists \n"); + throw std::runtime_error("sofie: input tensor with name " + input_name + " already exists \n"); } InputTensorInfo inputInfo { type, shape }; @@ -177,7 +168,7 @@ void RModel::AddInputTensorInfo(std::string input_name, ETensorType type, std::v void RModel::AddInputTensorInfo(std::string input_name, ETensorType type, std::vector shape) { input_name = UTILITY::Clean_name(input_name); if (CheckIfTensorAlreadyExist(input_name)) { - throw std::runtime_error("TMVA-SOFIE: input tensor with name " + input_name + " already exists \n"); + throw std::runtime_error("sofie: input tensor with name " + input_name + " already exists \n"); } TensorInfo inputInfo { type, shape }; fReadyInputTensorInfos[input_name] = inputInfo; @@ -222,7 +213,7 @@ void RModel::AddInitializedTensor(std::string tensor_name, ETensorType type, std tensor_name = UTILITY::Clean_name(tensor_name); //NB: own data if (CheckIfTensorAlreadyExist(tensor_name)) { - throw std::runtime_error("TMVA-SOFIE: initialized tensor with name " + tensor_name + " already exists \n"); + throw std::runtime_error("sofie: initialized tensor with name " + tensor_name + " already exists \n"); } InitializedTensor new_tensor {type, shape, data}; fInitializedTensors[tensor_name] = new_tensor; @@ -232,7 +223,7 @@ void RModel::AddConstantTensor(std::string tensor_name, ETensorType type, std::v tensor_name = UTILITY::Clean_name(tensor_name); //NB: own data if (CheckIfTensorAlreadyExist(tensor_name)) { - throw std::runtime_error("TMVA-SOFIE: constant tensor with name " + tensor_name + " already exists \n"); + throw std::runtime_error("sofie: constant tensor with name " + tensor_name + " already exists \n"); } InitializedTensor new_tensor {type, shape, data, true}; // add here flag to specify is a constant tensor fInitializedTensors[tensor_name] = new_tensor; @@ -241,7 +232,7 @@ void RModel::AddConstantTensor(std::string tensor_name, ETensorType type, std::v void RModel::AddShapeTensor(const std::string & name, const std::vector & shape_values, bool scalar){ auto tensor_name = UTILITY::Clean_name(name); if (fShapeTensors.count(tensor_name) != 0) { - throw std::runtime_error("TMVA-SOFIE: shape tensor with name " + tensor_name + " already exists \n"); + throw std::runtime_error("sofie: shape tensor with name " + tensor_name + " already exists \n"); } fShapeTensors[tensor_name] = std::make_pair(shape_values, scalar); } @@ -251,7 +242,7 @@ void RModel::AddAliasTensor(const std::string & name, const std::string & origin auto tensor_name = UTILITY::Clean_name(name); auto origin_name = UTILITY::Clean_name(origin); if (fAliasTensors.count(tensor_name) != 0) { - throw std::runtime_error("TMVA-SOFIE: alias tensor with name " + tensor_name + " already exists \n"); + throw std::runtime_error("sofie: alias tensor with name " + tensor_name + " already exists \n"); } fAliasTensors[tensor_name] = origin_name; } @@ -308,7 +299,7 @@ void RModel::AddIntermediateTensor(std::string tensor_name, ETensorType type, st void RModel::AddIntermediateTensor(std::string tensor_name, ETensorType type, std::vector shape) { tensor_name = UTILITY::Clean_name(tensor_name); if (CheckIfTensorAlreadyExist(tensor_name)) { - throw std::runtime_error("TMVA-SOFIE: intermediate tensor with name " + tensor_name + " already exists \n"); + throw std::runtime_error("sofie: intermediate tensor with name " + tensor_name + " already exists \n"); } TensorInfo new_tensor {type, shape}; fIntermediateTensorInfos[tensor_name] = new_tensor; @@ -317,7 +308,7 @@ void RModel::AddIntermediateTensor(std::string tensor_name, ETensorType type, st void RModel::AddDynamicTensor(std::string tensor_name, ETensorType type, std::vector shape){ tensor_name = UTILITY::Clean_name(tensor_name); if (CheckIfTensorAlreadyExist(tensor_name)){ - throw std::runtime_error("TMVA-SOFIE: intermediate tensor with name " + tensor_name + " already exists \n"); + throw std::runtime_error("sofie: intermediate tensor with name " + tensor_name + " already exists \n"); } DynamicTensorInfo new_tensor {type, shape}; fDynamicTensorInfos[tensor_name] = new_tensor; @@ -356,7 +347,7 @@ void RModel::UpdateOutputTensorList(std::vector curr_output_tensors void RModel::UpdateInitializedTensor(std::string tensor_name, ETensorType type, std::vector shape, std::shared_ptr data) { tensor_name = UTILITY::Clean_name(tensor_name); if (!CheckIfTensorAlreadyExist(tensor_name)) { - throw std::runtime_error("TMVA-SOFIE: tensor " + tensor_name + " not found when trying to update it"); + throw std::runtime_error("sofie: tensor " + tensor_name + " not found when trying to update it"); } InitializedTensor new_tensor {type, shape, data}; fInitializedTensors[tensor_name] = new_tensor; @@ -365,21 +356,30 @@ void RModel::UpdateInitializedTensor(std::string tensor_name, ETensorType type, std::shared_ptr RModel::GetInitializedTensorData(std::string tensor_name) { auto f = fInitializedTensors.find(tensor_name); if (f == fInitializedTensors.end()) { - throw std::runtime_error("TMVA-SOFIE: tensor " + tensor_name + " not found when trying to get its data"); + throw std::runtime_error("sofie: tensor " + tensor_name + " not found when trying to get its data"); } else { return f->second.sharedptr(); } } +void RModel::RemoveInitializedTensor(std::string tensor_name) { + auto f = fInitializedTensors.find(tensor_name); + if (f == fInitializedTensors.end()) { + throw std::runtime_error("sofie: tensor " + tensor_name + " not found when trying to remove it"); + } else { + fInitializedTensors.erase(f); + } +} + void RModel::SetNotWritableInitializedTensor(const std::string & tensor_name) { auto t = fInitializedTensors.find(tensor_name); if (t == fInitializedTensors.end()) { - throw std::runtime_error("TMVA-SOFIE: initialized tensor " + tensor_name + " not found when trying to get its info"); + throw std::runtime_error("sofie: initialized tensor " + tensor_name + " not found when trying to get its info"); } t->second.SetNotWritable(); } -std::string RModel::AllocateIntermediateMemory(std::span op_output_tensors) +std::string RModel::AllocateIntermediateMemory(std::span op_output_tensors) { std::stringstream code; @@ -484,7 +484,7 @@ std::string RModel::AllocateIntermediateMemory(std::span return code.str(); } -void RModel::CheckAndFlushIntermediateMemory(std::span op_input_tensors, const size_t& op_idx){ +void RModel::CheckAndFlushIntermediateMemory(std::span op_input_tensors, const size_t& op_idx){ if (fVerbose) std::cout << "*** CheckAndFlushIntermediateMemory: Loop on input tensors for op " << op_idx << "\n"; //print available chunks if (fVerbose) std::cout << "available chunks before freeing them : \n"; @@ -931,7 +931,7 @@ void RModel::GenerateDynamicTensorInfo() std::stringstream out; out << "// dynamic tensor memory management\n"; - out << SP << "std::vector dynamicTensorInfos;\n"; + out << SP << "std::vector dynamicTensorInfos;\n"; out << SP << "dynamicTensorInfos.reserve(" << fDynamicTensorInfos.size() << ");\n"; // loop on all the operators to find begin/end life of the tensors @@ -963,7 +963,7 @@ void RModel::GenerateDynamicTensorInfo() if (begin> end) { std::cout << "op " << op_index << "tensor_" << name << " begin " << begin << " " << " end " << end << std::endl; - throw std::runtime_error("TMVA-SOFIE: RModel::GenerateDynamicTensorInfo: tensor_" + name + " has end before begin"); + throw std::runtime_error("sofie: RModel::GenerateDynamicTensorInfo: tensor_" + name + " has end before begin"); } // write in code @@ -992,7 +992,7 @@ void RModel::GenerateDynamicTensorInfo() } } if (missingTensor) - throw std::runtime_error("TMVA-SOFIE: RModel::GenerateDynamicTensorInfo - some tensors are not in input/output list"); + throw std::runtime_error("sofie: RModel::GenerateDynamicTensorInfo - some tensors are not in input/output list"); fGC += out.str(); } @@ -1097,7 +1097,7 @@ std::string RModel::GenerateInferSignature(bool isdecl) { if (isdecl) { std::string type = ConvertTypeToString(GetTensorType(name)); if (type == "other") - throw std::runtime_error("TMVA-SOFIE: input tensor " + name + + throw std::runtime_error("sofie: input tensor " + name + " is of a data type which is not yet supported."); rGC += type + " const* "; } @@ -1211,7 +1211,7 @@ void RModel::GenerateOutput() dynamic_parameters_check += d.param + " > " + memberName + " || "; input_params_checked.insert(pName); fGC += SP + "if (" + d.param + " > " + memberName + ") {\n"; - fGC += SP + SP + "throw std::runtime_error(\"TMVA-SOFIE: dynamic input tensor shape parameter " + + fGC += SP + SP + "throw std::runtime_error(\"sofie: dynamic input tensor shape parameter " + d.param + " exceeds the initialized maximum allowed shape.\");\n"; fGC += SP + "}\n"; } @@ -1418,7 +1418,7 @@ void RModel::GenerateSessionCode() std::cout << "Generating main inference code for " << fName << std::endl; if (fOutputTensorNames.size() == 0) - throw std::runtime_error("TMVA-SOFIE: output size=0 are not supported"); + throw std::runtime_error("sofie: output size=0 are not supported"); std::string allOperatorCode; @@ -1484,7 +1484,7 @@ void RModel::Generate(std::underlying_type_t options, int batchSize, lo } if (fUseWeightFile && !fUseSession) { throw std::runtime_error( - "TMVA-SOFIE: RModel::Generate: cannot use a separate weight file without generating a Session class"); + "sofie: RModel::Generate: cannot use a separate weight file without generating a Session class"); } if (static_cast>(Options::kGNN) & options) @@ -1538,14 +1538,14 @@ void RModel::ReadInitializedTensorsFromFile(long pos) { fGC += " std::ifstream f;\n"; fGC += " f.open(filename);\n"; fGC += " if (!f.is_open()) {\n"; - fGC += " throw std::runtime_error(\"tmva-sofie failed to open file \" + filename + \" for input weights\");\n"; + fGC += " throw std::runtime_error(\"sofie failed to open file \" + filename + \" for input weights\");\n"; fGC += " }\n"; if(fIsGNNComponent) { fGC += " f.seekg(" + std::to_string(pos) + ");\n"; } - fGC += " using TMVA::Experimental::SOFIE::ReadTensorFromStream;\n"; + fGC += " using SOFIE::ReadTensorFromStream;\n"; // loop on tensors and parse the file for (auto& i: fInitializedTensors) { @@ -1556,7 +1556,7 @@ void RModel::ReadInitializedTensorsFromFile(long pos) { std::string length = std::to_string(ConvertShapeToLength(i.second.shape())); fGC += " ReadTensorFromStream(f, " + tensor_name + ", \"" + tensor_name + "\", " + length + ");\n"; } else { - throw std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be read from a file"); + throw std::runtime_error("sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be read from a file"); } } fGC += " f.close();\n"; @@ -1568,12 +1568,12 @@ void RModel::ReadInitializedTensorsFromFile(long pos) { fGC += " {\n"; fGC += " std::unique_ptr rootFile(TFile::Open(filename.c_str(), \"READ\"));\n"; fGC += " if (!rootFile->IsOpen()) {\n"; - fGC += " throw std::runtime_error(\"tmva-sofie failed to open ROOT file for input weights\");\n"; + fGC += " throw std::runtime_error(\"sofie failed to open ROOT file for input weights\");\n"; fGC += " }\n"; std::string dirName = fName + "_weights"; fGC += " if (!rootFile->GetKey(\"" + dirName + "\")) {\n"; - fGC += " throw std::runtime_error(\"tmva-sofie failed to open ROOT directory for input weights\");\n"; + fGC += " throw std::runtime_error(\"sofie failed to open ROOT directory for input weights\");\n"; fGC += " }\n"; for (auto &i : fInitializedTensors) { @@ -1591,7 +1591,7 @@ void RModel::ReadInitializedTensorsFromFile(long pos) { fGC += " fTensor_" + i.first + " = *reinterpret_cast*>(rootFile->Get(\""; fGC += dirName + "/" + tensor_name + "\"));\n"; } else { - throw std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be read from a ROOT file"); + throw std::runtime_error("sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be read from a ROOT file"); } fGC += " }\n"; } @@ -1659,7 +1659,7 @@ long RModel::WriteInitializedTensorsToFile(std::string filename) { outputDir->WriteObjectAny(&tensorDataVector, "std::vector", tensorName.c_str()); } else { - throw std::runtime_error("tmva-sofie tensor " + tensorName + " with type " + ConvertTypeToString(item.second.type()) + + throw std::runtime_error("sofie tensor " + tensorName + " with type " + ConvertTypeToString(item.second.type()) + " cannot be written to a ROOT file"); } } @@ -1681,7 +1681,7 @@ long RModel::WriteInitializedTensorsToFile(std::string filename) { } if (!f.is_open()) throw - std::runtime_error("tmva-sofie failed to open file " + filename + " for tensor weight data"); + std::runtime_error("sofie failed to open file " + filename + " for tensor weight data"); for (auto& i: fInitializedTensors) { // skip Constant tensors and not writable tensors (e.g. shape tensors) if (!i.second.IsWeightTensor()) { @@ -1707,10 +1707,10 @@ long RModel::WriteInitializedTensorsToFile(std::string filename) { } } else { - throw std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be written to a file"); + throw std::runtime_error("sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be written to a file"); } if (f.fail()) - throw std::runtime_error("tmva-sofie failed to write tensor data to file for " + tensor_name); + throw std::runtime_error("sofie failed to write tensor data to file for " + tensor_name); } long curr_pos = f.tellp(); f.close(); @@ -1740,9 +1740,9 @@ void RModel::PrintSummary() const { void RModel::GenerateRequiredInputTensorInfo() { fGC += "\n// Input tensor dimensions\n"; - fGC += "using TMVA::Experimental::SOFIE::SingleDim;\n"; - fGC += "using TMVA::Experimental::SOFIE::TensorDims;\n"; - fGC += "using TMVA::Experimental::SOFIE::makeDims;\n\n"; + fGC += "using SOFIE::SingleDim;\n"; + fGC += "using SOFIE::TensorDims;\n"; + fGC += "using SOFIE::makeDims;\n\n"; bool hasDynamicInputTensors = false; for (std::size_t iInput = 0; iInput < fInputTensorNames.size(); ++iInput) { diff --git a/src/SOFIE_core/src/RModel_ALPAKA.cxx b/core/src/RModel_ALPAKA.cxx similarity index 93% rename from src/SOFIE_core/src/RModel_ALPAKA.cxx rename to core/src/RModel_ALPAKA.cxx index cc6306e..621b701 100644 --- a/src/SOFIE_core/src/RModel_ALPAKA.cxx +++ b/core/src/RModel_ALPAKA.cxx @@ -218,7 +218,7 @@ std::string RModel::GenerateInferSignature_GPU_ALPAKA(bool isdecl) { if (type == ETensorType::DOUBLE) return "BufD1D"; if (type == ETensorType::INT64) return "BufI641D"; if (type == ETensorType::BOOL) return "BufUI81D"; - throw std::runtime_error("TMVA-SOFIE: input tensor " + name + + throw std::runtime_error("sofie: input tensor " + name + " is of a data type which is not yet supported."); }; @@ -260,7 +260,7 @@ std::string RModel::GenerateImplSignature_GPU_ALPAKA(bool isdecl) { if (type == ETensorType::DOUBLE) return "ViewConstD1D"; if (type == ETensorType::INT64) return "ViewConstI641D"; if (type == ETensorType::BOOL) return "ViewConstUI81D"; - throw std::runtime_error("TMVA-SOFIE: input tensor " + name + + throw std::runtime_error("sofie: input tensor " + name + " is of a data type which is not yet supported."); }; @@ -296,7 +296,7 @@ void RModel::GenerateOutput_GPU_ALPAKA() { size_t outputSize = fOutputTensorNames.size(); if (outputSize == 0) - throw std::runtime_error("TMVA-SOFIE: output size=0 are not supported"); + throw std::runtime_error("sofie: output size=0 are not supported"); ETensorType eFirstOutputType = GetTensorType(*fOutputTensorNames.begin()); bool sameOutputTypes = true; @@ -311,7 +311,7 @@ void RModel::GenerateOutput_GPU_ALPAKA() { if (type == ETensorType::DOUBLE) return "ViewConstD1D"; if (type == ETensorType::INT64) return "ViewConstI641D"; if (type == ETensorType::BOOL) return "ViewConstUI81D"; - throw std::runtime_error("TMVA-SOFIE: input tensor " + name + " is of an unsupported data type."); + throw std::runtime_error("sofie: input tensor " + name + " is of an unsupported data type."); }; // Collect deduplicated dynamic dimension parameter names in declaration order @@ -357,10 +357,7 @@ void RModel::GenerateOutput_GPU_ALPAKA() { fGC += SP + "{\n"; fGC += SP + SP + "auto const elementsPerThread_fused" + sfx + " = Vec::all(static_cast(1));\n"; fGC += SP + SP + "auto const elementsPerGrid_fused" + sfx + " = Vec::all(Idx{" + std::to_string(grp.numElements) + "});\n"; - fGC += SP + SP + "alpaka::KernelCfg const cfg_fused" + sfx + " = {elementsPerGrid_fused" + sfx + ", elementsPerThread_fused" + sfx + "};\n"; - fGC += SP + SP + "auto const workDiv_fused" + sfx + " = alpaka::getValidWorkDiv(cfg_fused" + sfx + ", devAcc, " + kname + - ", alpaka::getPtrNative(deviceBuf_" + grp.inputTensor + "), alpaka::getPtrNative(deviceBuf_" + grp.outputTensor + - "), static_cast(" + std::to_string(grp.numElements) + "));\n"; + fGC += SP + SP + "auto const workDiv_fused" + sfx + " = sofie_workdiv(elementsPerGrid_fused" + sfx + ");\n"; fGC += SP + SP + "auto task_fused" + sfx + " = alpaka::createTaskKernel(workDiv_fused" + sfx + ", " + kname + ", alpaka::getPtrNative(deviceBuf_" + grp.inputTensor + "), alpaka::getPtrNative(deviceBuf_" + grp.outputTensor + "), static_cast(" + std::to_string(grp.numElements) + "));\n"; @@ -382,7 +379,7 @@ void RModel::GenerateOutput_GPU_ALPAKA() { for (auto &p : dynParamNames) spanDynDecl += ", size_t " + p; - fGC += "void infer(std::span inputs, std::span outputs" + spanDynDecl + "){\n"; + fGC += "__host__ void infer(std::span inputs, std::span outputs" + spanDynDecl + "){\n"; // Build _infer_impl call: dyn params first, then inputs[i] { @@ -427,7 +424,7 @@ void RModel::GenerateOutput_GPU_ALPAKA() { returnType += ">"; } - fGC += returnType + " infer("; + fGC += "__host__ " + returnType + " infer("; fGC += GenerateInferSignature_GPU_ALPAKA(); fGC += "){\n"; @@ -538,6 +535,19 @@ void RModel::GenerateSessionCode_GPU_ALPAKA() { } } + // Emit a namespace-scope helper that avoids alpaka::getValidWorkDiv (which calls + // cudaFuncGetAttributes via a void* cast — broken on CUDA 12.x for JIT kernels). + fGC += "\ntemplate\n"; + fGC += "inline alpaka::WorkDivMembers sofie_workdiv(\n"; + fGC += " alpaka::Vec const& numElems, TIdx blockSz = TIdx{256})\n{\n"; + fGC += " auto const numBlocks = alpaka::Vec::all(\n"; + fGC += " (numElems[0] + blockSz - TIdx{1}) / blockSz);\n"; + fGC += " return alpaka::WorkDivMembers(\n"; + fGC += " numBlocks,\n"; + fGC += " alpaka::Vec::all(blockSz),\n"; + fGC += " alpaka::Vec::all(TIdx{1}));\n"; + fGC += "}\n\n"; + // define the Session struct (for GNN this is generated in RModel_GNN) fGC += "\n\ntemplate \n"; if (fUseSession) { @@ -694,7 +704,7 @@ void RModel::GenerateGPU_ALPAKA(std::underlying_type_t options, int bat } if (fUseWeightFile && !fUseSession) { throw std::runtime_error( - "TMVA-SOFIE: RModel::Generate: cannot use a separate weight file without generating a Session class"); + "sofie: RModel::Generate: cannot use a separate weight file without generating a Session class"); } if (static_cast>(Options::kGNN) & options || @@ -727,18 +737,10 @@ void RModel::MoveInitializedTensorsToBuffers_ALPAKA(){ std::string tensor_name = "tensor_" + i.first; auto length = ConvertShapeToLength(i.second.shape()); std::string slength = std::to_string(length); - if (i.second.type() == ETensorType::FLOAT) { - fGC += " auto hostBuf_"+i.first+" = alpaka::createView(hostAcc, tensor_"+i.first+");\n"; - fGC += " alpaka::memcpy(queue, deviceBuf_"+i.first+", hostBuf_"+i.first+");\n"; - } else if (i.second.type() == ETensorType::DOUBLE) { - fGC += " auto hostBuf_"+i.first+" = alpaka::createView(hostAcc, tensor_"+i.first+");\n"; - fGC += " alpaka::memcpy(queue, deviceBuf_"+i.first+", hostBuf_"+i.first+");\n"; - } else if (i.second.type() == ETensorType::INT64) { - fGC += " auto hostBuf_"+i.first+" = alpaka::createView(hostAcc, tensor_"+i.first+", " + slength + ");\n"; - fGC += " alpaka::memcpy(queue, deviceBuf_"+i.first+", hostBuf_"+i.first+");\n"; - } else { - std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be read from a ROOT file"); - } + // Use the 3-argument createView(dev, container, extent) which calls std::data() + // internally — works for both std::vector and raw C arrays. + fGC += " auto hostBuf_"+i.first+" = alpaka::createView(hostAcc, tensor_"+i.first+", " + slength + ");\n"; + fGC += " alpaka::memcpy(queue, deviceBuf_"+i.first+", hostBuf_"+i.first+");\n"; } } diff --git a/src/SOFIE_core/src/RModel_Base.cxx b/core/src/RModel_Base.cxx similarity index 92% rename from src/SOFIE_core/src/RModel_Base.cxx rename to core/src/RModel_Base.cxx index 7139529..9c49e37 100644 --- a/src/SOFIE_core/src/RModel_Base.cxx +++ b/core/src/RModel_Base.cxx @@ -38,7 +38,7 @@ void RModel_Base::GenerateHeaderInfo(std::string& hgname) { // Include TFile when saving the weights in a binary ROOT file fGC += "#include \"TFile.h\"\n"; #else - throw std::runtime_error("TMVA-SOFIE: ROOT binary weight file option is enabled but the code is not compiled with ROOT support"); + throw std::runtime_error("sofie: ROOT binary weight file option is enabled but the code is not compiled with ROOT support"); #endif } @@ -96,7 +96,7 @@ void RModel_Base::GenerateHeaderInfo_GPU_ALPAKA(std::string& hgname) { // Include TFile when saving the weights in a binary ROOT file fGC += "#include \"TFile.h\"\n"; #else - throw std::runtime_error("TMVA-SOFIE: ROOT binary weight file option is enabled but the code is not compiled with ROOT support"); + throw std::runtime_error("sofie: ROOT binary weight file option is enabled but the code is not compiled with ROOT support"); #endif } @@ -117,7 +117,7 @@ void RModel_Base::OutputGenerated(std::string filename, bool append) { else f.open(filename); if (!f.is_open()) { - throw std::runtime_error("tmva-sofie failed to open file for output generated inference code"); + throw std::runtime_error("sofie failed to open file for output generated inference code"); } f << fGC; f.close(); diff --git a/src/SOFIE_core/src/RModel_GNN.cxx b/core/src/RModel_GNN.cxx similarity index 100% rename from src/SOFIE_core/src/RModel_GNN.cxx rename to core/src/RModel_GNN.cxx diff --git a/src/SOFIE_core/src/RModel_GraphIndependent.cxx b/core/src/RModel_GraphIndependent.cxx similarity index 100% rename from src/SOFIE_core/src/RModel_GraphIndependent.cxx rename to core/src/RModel_GraphIndependent.cxx diff --git a/src/SOFIE_core/src/SOFIE_common.cxx b/core/src/SOFIE_common.cxx similarity index 97% rename from src/SOFIE_core/src/SOFIE_common.cxx rename to core/src/SOFIE_common.cxx index cd1b60a..a2bafde 100644 --- a/src/SOFIE_core/src/SOFIE_common.cxx +++ b/core/src/SOFIE_common.cxx @@ -56,6 +56,20 @@ std::size_t ConvertShapeToLength(const std::vector & shape){ return fLength; } +std::size_t ConvertShapeToLength(const std::vector & shape){ + // convert generic shape to a string + // multiply all the integer specified dimensions of the shape + std::size_t length = 1; + for (size_t i = 0; i < shape.size(); i++) { + if (!shape[i].isParam) { + length *= shape[i].dim; + } else { + return static_cast(-1); // return -1 in case of parametric shapes + } + } + return length; +} + std::string ConvertTypeToString(ETensorType type){ switch(type){ case ETensorType::FLOAT : { @@ -666,4 +680,4 @@ MemoryResult OrganizeMemory(const std::vector & tensorsInfo ) return MemoryResult{total_bytes, std::move(tensorsOffset)}; } -} // namespace SOFIE \ No newline at end of file +} // namespace SOFIE diff --git a/src/SOFIE_core/test/CMakeLists.txt b/core/test/CMakeLists.txt similarity index 97% rename from src/SOFIE_core/test/CMakeLists.txt rename to core/test/CMakeLists.txt index 33f1046..12f19b1 100644 --- a/src/SOFIE_core/test/CMakeLists.txt +++ b/core/test/CMakeLists.txt @@ -4,8 +4,8 @@ include(FetchContent) ############################################################################ # Basic setup ############################################################################ -include_directories(${CMAKE_SOURCE_DIR}/src/SOFIE_core/inc) -include_directories(${CMAKE_SOURCE_DIR}/src/SOFIE_parsers/inc) +include_directories(${CMAKE_SOURCE_DIR}/core/inc) +include_directories(${CMAKE_SOURCE_DIR}/parsers/inc) set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) diff --git a/src/SOFIE_core/test/Conv1dModelGenerator.py b/core/test/Conv1dModelGenerator.py similarity index 100% rename from src/SOFIE_core/test/Conv1dModelGenerator.py rename to core/test/Conv1dModelGenerator.py diff --git a/src/SOFIE_core/test/Conv2dModelGenerator.py b/core/test/Conv2dModelGenerator.py similarity index 100% rename from src/SOFIE_core/test/Conv2dModelGenerator.py rename to core/test/Conv2dModelGenerator.py diff --git a/src/SOFIE_core/test/Conv3dModelGenerator.py b/core/test/Conv3dModelGenerator.py similarity index 100% rename from src/SOFIE_core/test/Conv3dModelGenerator.py rename to core/test/Conv3dModelGenerator.py diff --git a/src/SOFIE_core/test/ConvTrans2dModelGenerator.py b/core/test/ConvTrans2dModelGenerator.py similarity index 100% rename from src/SOFIE_core/test/ConvTrans2dModelGenerator.py rename to core/test/ConvTrans2dModelGenerator.py diff --git a/src/SOFIE_core/test/EmitFromONNX.cxx.in b/core/test/EmitFromONNX.cxx.in similarity index 100% rename from src/SOFIE_core/test/EmitFromONNX.cxx.in rename to core/test/EmitFromONNX.cxx.in diff --git a/src/SOFIE_core/test/EmitFromONNX_GPU_ALPAKA.cxx.in b/core/test/EmitFromONNX_GPU_ALPAKA.cxx.in similarity index 100% rename from src/SOFIE_core/test/EmitFromONNX_GPU_ALPAKA.cxx.in rename to core/test/EmitFromONNX_GPU_ALPAKA.cxx.in diff --git a/src/SOFIE_core/test/EmitFromRoot.cxx.in b/core/test/EmitFromRoot.cxx.in similarity index 100% rename from src/SOFIE_core/test/EmitFromRoot.cxx.in rename to core/test/EmitFromRoot.cxx.in diff --git a/src/SOFIE_core/test/GNN/EmitGNN.cxx b/core/test/GNN/EmitGNN.cxx similarity index 100% rename from src/SOFIE_core/test/GNN/EmitGNN.cxx rename to core/test/GNN/EmitGNN.cxx diff --git a/src/SOFIE_core/test/GNN/EmitGraphIndependent.cxx b/core/test/GNN/EmitGraphIndependent.cxx similarity index 100% rename from src/SOFIE_core/test/GNN/EmitGraphIndependent.cxx rename to core/test/GNN/EmitGraphIndependent.cxx diff --git a/src/SOFIE_core/test/LinearModelGenerator.py b/core/test/LinearModelGenerator.py similarity index 100% rename from src/SOFIE_core/test/LinearModelGenerator.py rename to core/test/LinearModelGenerator.py diff --git a/src/SOFIE_core/test/RecurrentModelGenerator.py b/core/test/RecurrentModelGenerator.py similarity index 100% rename from src/SOFIE_core/test/RecurrentModelGenerator.py rename to core/test/RecurrentModelGenerator.py diff --git a/src/SOFIE_core/test/TestCustomModelsFromONNX.cxx b/core/test/TestCustomModelsFromONNX.cxx similarity index 100% rename from src/SOFIE_core/test/TestCustomModelsFromONNX.cxx rename to core/test/TestCustomModelsFromONNX.cxx diff --git a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx b/core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx similarity index 97% rename from src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx rename to core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx index 3a47d75..e415cce 100644 --- a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx +++ b/core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx @@ -127,6 +127,8 @@ #include "Clip_FromONNX_GPU_ALPAKA.hxx" #include "Not_FromONNX_GPU_ALPAKA.hxx" +#include "GNN_model_FromONNX_GPU_ALPAKA.hxx" + #include #include #include @@ -2483,7 +2485,7 @@ TEST_F(SofieAlpakaTest, IsNaN) TEST_F(SofieAlpakaTest, Clip) { - // Model clips to [-1.0, 1.0] (initializer constants). + // Model clips to [-1.0, 1.0]. constexpr float TOLERANCE = DEFAULT_TOLERANCE; constexpr float clip_min = -1.0f; constexpr float clip_max = 1.0f; @@ -2506,7 +2508,7 @@ TEST_F(SofieAlpakaTest, Clip) auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{N})); { - SOFIE_Clip::Session session; + SOFIE_Clip::Session session("Clip_FromONNX_GPU_ALPAKA.dat"); auto result = session.infer(input_d); alpaka::wait(queue); cudaDeviceSynchronize(); @@ -2552,3 +2554,55 @@ TEST_F(SofieAlpakaTest, Not) for (size_t i = 0; i < N; ++i) EXPECT_EQ(static_cast(res_ptr[i]), !static_cast(input[i])) << "i=" << i; } + +// GNN model: 3370 nodes (29 features each), 24126 edges (5 features each), +// edge_index shape [2, 24126]. Output: sigmoid score per edge in [0, 1]. +TEST_F(SofieAlpakaTest, GNN_model) +{ + // ---- sizes ------------------------------------------------------- + constexpr Idx N_x = 97730; // 3370 nodes × 29 features + constexpr Idx N_ef = 120630; // 24126 edges × 5 features + constexpr Idx N_ei = 48252; // 2 rows × 24126 edges (int64) + constexpr Idx N_out = 24126; // one sigmoid score per edge + + // ---- host buffers ------------------------------------------------- + auto x_h = alpaka::allocBuf(host, Ext1D::all(Idx{N_x})); + auto ef_h = alpaka::allocBuf(host, Ext1D::all(Idx{N_ef})); + auto ei_h = alpaka::allocBuf(host, Ext1D::all(Idx{N_ei})); + + float* x_ptr = reinterpret_cast (alpaka::getPtrNative(x_h)); + float* ef_ptr = reinterpret_cast (alpaka::getPtrNative(ef_h)); + int64_t* ei_ptr = reinterpret_cast(alpaka::getPtrNative(ei_h)); + + for (Idx i = 0; i < N_x; ++i) x_ptr[i] = 0.5f; + for (Idx i = 0; i < N_ef; ++i) ef_ptr[i] = 0.5f; + for (Idx i = 0; i < N_ei; ++i) ei_ptr[i] = 0; // all self-loops on node 0 + + // ---- device buffers ----------------------------------------------- + auto x_d = alpaka::allocBuf(device, Ext1D::all(Idx{N_x})); + auto ef_d = alpaka::allocBuf(device, Ext1D::all(Idx{N_ef})); + auto ei_d = alpaka::allocBuf(device, Ext1D::all(Idx{N_ei})); + + alpaka::memcpy(queue, x_d, x_h); + alpaka::memcpy(queue, ef_d, ef_h); + alpaka::memcpy(queue, ei_d, ei_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{N_out})); + + { + SOFIE_GNN_model::Session session("GNN_model_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(x_d, ef_d, ei_d); + alpaka::wait(session.queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + ASSERT_EQ(N_out, 24126u); + for (Idx i = 0; i < N_out; ++i) { + EXPECT_GE(res_ptr[i], 0.0f) << "output[" << i << "] < 0"; + EXPECT_LE(res_ptr[i], 1.0f) << "output[" << i << "] > 1"; + } +} diff --git a/src/SOFIE_core/test/TestCustomModelsFromROOT.cxx b/core/test/TestCustomModelsFromROOT.cxx similarity index 100% rename from src/SOFIE_core/test/TestCustomModelsFromROOT.cxx rename to core/test/TestCustomModelsFromROOT.cxx diff --git a/src/SOFIE_core/test/TestSofieModels.cxx b/core/test/TestSofieModels.cxx similarity index 100% rename from src/SOFIE_core/test/TestSofieModels.cxx rename to core/test/TestSofieModels.cxx diff --git a/src/SOFIE_core/test/input_models/Abs.onnx b/core/test/input_models/Abs.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/Abs.onnx rename to core/test/input_models/Abs.onnx diff --git a/src/SOFIE_core/test/input_models/Add.onnx b/core/test/input_models/Add.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/Add.onnx rename to core/test/input_models/Add.onnx diff --git a/src/SOFIE_core/test/input_models/AddBroadcast1.onnx b/core/test/input_models/AddBroadcast1.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/AddBroadcast1.onnx rename to core/test/input_models/AddBroadcast1.onnx diff --git a/src/SOFIE_core/test/input_models/AddBroadcast2.onnx b/core/test/input_models/AddBroadcast2.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/AddBroadcast2.onnx rename to core/test/input_models/AddBroadcast2.onnx diff --git a/src/SOFIE_core/test/input_models/AddBroadcast3.onnx b/core/test/input_models/AddBroadcast3.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/AddBroadcast3.onnx rename to core/test/input_models/AddBroadcast3.onnx diff --git a/src/SOFIE_core/test/input_models/AddBroadcast4.onnx b/core/test/input_models/AddBroadcast4.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/AddBroadcast4.onnx rename to core/test/input_models/AddBroadcast4.onnx diff --git a/src/SOFIE_core/test/input_models/AddBroadcast5.onnx b/core/test/input_models/AddBroadcast5.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/AddBroadcast5.onnx rename to core/test/input_models/AddBroadcast5.onnx diff --git a/src/SOFIE_core/test/input_models/AddBroadcast6.onnx b/core/test/input_models/AddBroadcast6.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/AddBroadcast6.onnx rename to core/test/input_models/AddBroadcast6.onnx diff --git a/src/SOFIE_core/test/input_models/AddBroadcast7.onnx b/core/test/input_models/AddBroadcast7.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/AddBroadcast7.onnx rename to core/test/input_models/AddBroadcast7.onnx diff --git a/src/SOFIE_core/test/input_models/AvgPool.onnx b/core/test/input_models/AvgPool.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/AvgPool.onnx rename to core/test/input_models/AvgPool.onnx diff --git a/src/SOFIE_core/test/input_models/BatchNorm.onnx b/core/test/input_models/BatchNorm.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/BatchNorm.onnx rename to core/test/input_models/BatchNorm.onnx diff --git a/src/SOFIE_core/test/input_models/BatchNormRelu.onnx b/core/test/input_models/BatchNormRelu.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/BatchNormRelu.onnx rename to core/test/input_models/BatchNormRelu.onnx diff --git a/src/SOFIE_core/test/input_models/Cast.onnx b/core/test/input_models/Cast.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/Cast.onnx rename to core/test/input_models/Cast.onnx diff --git a/src/SOFIE_core/test/input_models/Clip.onnx b/core/test/input_models/Clip.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/Clip.onnx rename to core/test/input_models/Clip.onnx diff --git a/src/SOFIE_core/test/input_models/ComplexTopK.onnx b/core/test/input_models/ComplexTopK.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/ComplexTopK.onnx rename to core/test/input_models/ComplexTopK.onnx diff --git a/src/SOFIE_core/test/input_models/Concat_0D.onnx b/core/test/input_models/Concat_0D.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/Concat_0D.onnx rename to core/test/input_models/Concat_0D.onnx diff --git a/src/SOFIE_core/test/input_models/Constant.onnx b/core/test/input_models/Constant.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/Constant.onnx rename to core/test/input_models/Constant.onnx diff --git a/src/SOFIE_core/test/input_models/ConvTranspose1d.onnx b/core/test/input_models/ConvTranspose1d.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/ConvTranspose1d.onnx rename to core/test/input_models/ConvTranspose1d.onnx diff --git a/src/SOFIE_core/test/input_models/ConvTranspose2d.onnx b/core/test/input_models/ConvTranspose2d.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/ConvTranspose2d.onnx rename to core/test/input_models/ConvTranspose2d.onnx diff --git a/src/SOFIE_core/test/input_models/ConvTransposeBias2d.onnx b/core/test/input_models/ConvTransposeBias2d.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/ConvTransposeBias2d.onnx rename to core/test/input_models/ConvTransposeBias2d.onnx diff --git a/src/SOFIE_core/test/input_models/ConvTransposeBias2dBatched.onnx b/core/test/input_models/ConvTransposeBias2dBatched.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/ConvTransposeBias2dBatched.onnx rename to core/test/input_models/ConvTransposeBias2dBatched.onnx diff --git a/src/SOFIE_core/test/input_models/ConvWithAsymmetricPadding.onnx b/core/test/input_models/ConvWithAsymmetricPadding.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/ConvWithAsymmetricPadding.onnx rename to core/test/input_models/ConvWithAsymmetricPadding.onnx diff --git a/src/SOFIE_core/test/input_models/ConvWithAutopadSameLower.onnx b/core/test/input_models/ConvWithAutopadSameLower.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/ConvWithAutopadSameLower.onnx rename to core/test/input_models/ConvWithAutopadSameLower.onnx diff --git a/src/SOFIE_core/test/input_models/ConvWithPadding.onnx b/core/test/input_models/ConvWithPadding.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/ConvWithPadding.onnx rename to core/test/input_models/ConvWithPadding.onnx diff --git a/src/SOFIE_core/test/input_models/ConvWithStridesNoPadding.onnx b/core/test/input_models/ConvWithStridesNoPadding.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/ConvWithStridesNoPadding.onnx rename to core/test/input_models/ConvWithStridesNoPadding.onnx diff --git a/src/SOFIE_core/test/input_models/ConvWithStridesPadding.onnx b/core/test/input_models/ConvWithStridesPadding.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/ConvWithStridesPadding.onnx rename to core/test/input_models/ConvWithStridesPadding.onnx diff --git a/src/SOFIE_core/test/input_models/ConvWithoutPadding.onnx b/core/test/input_models/ConvWithoutPadding.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/ConvWithoutPadding.onnx rename to core/test/input_models/ConvWithoutPadding.onnx diff --git a/src/SOFIE_core/test/input_models/Cos.onnx b/core/test/input_models/Cos.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/Cos.onnx rename to core/test/input_models/Cos.onnx diff --git a/src/SOFIE_core/test/input_models/Div.onnx b/core/test/input_models/Div.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/Div.onnx rename to core/test/input_models/Div.onnx diff --git a/src/SOFIE_core/test/input_models/Einsum_3.onnx b/core/test/input_models/Einsum_3.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/Einsum_3.onnx rename to core/test/input_models/Einsum_3.onnx diff --git a/src/SOFIE_core/test/input_models/Einsum_4.onnx b/core/test/input_models/Einsum_4.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/Einsum_4.onnx rename to core/test/input_models/Einsum_4.onnx diff --git a/src/SOFIE_core/test/input_models/Einsum_dotprod.onnx b/core/test/input_models/Einsum_dotprod.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/Einsum_dotprod.onnx rename to core/test/input_models/Einsum_dotprod.onnx diff --git a/src/SOFIE_core/test/input_models/Einsum_matmul.onnx b/core/test/input_models/Einsum_matmul.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/Einsum_matmul.onnx rename to core/test/input_models/Einsum_matmul.onnx diff --git a/src/SOFIE_core/test/input_models/Elu.onnx b/core/test/input_models/Elu.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/Elu.onnx rename to core/test/input_models/Elu.onnx diff --git a/src/SOFIE_core/test/input_models/Equal.onnx b/core/test/input_models/Equal.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/Equal.onnx rename to core/test/input_models/Equal.onnx diff --git a/src/SOFIE_core/test/input_models/Erf.onnx b/core/test/input_models/Erf.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/Erf.onnx rename to core/test/input_models/Erf.onnx diff --git a/src/SOFIE_core/test/input_models/Exp.onnx b/core/test/input_models/Exp.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/Exp.onnx rename to core/test/input_models/Exp.onnx diff --git a/src/SOFIE_core/test/input_models/ExpandDiffSize.onnx b/core/test/input_models/ExpandDiffSize.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/ExpandDiffSize.onnx rename to core/test/input_models/ExpandDiffSize.onnx diff --git a/src/SOFIE_core/test/input_models/ExpandSameSize.onnx b/core/test/input_models/ExpandSameSize.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/ExpandSameSize.onnx rename to core/test/input_models/ExpandSameSize.onnx diff --git a/src/SOFIE_core/test/input_models/EyeLike.onnx b/core/test/input_models/EyeLike.onnx similarity index 100% rename from src/SOFIE_core/test/input_models/EyeLike.onnx rename to core/test/input_models/EyeLike.onnx diff --git a/core/test/input_models/GNN_model.onnx b/core/test/input_models/GNN_model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..833e34d543db48123d848f2ae8bf3bdf51d0bc09 GIT binary patch literal 591051 zcmeEP1z1!|8>V9gL=*u77eomWmaa1+7IG006+0G$6;MhR5V0^XcE?pwY;5e{%!ppQ zy8{~=72E6mXZvh`ig=Bm`#;a`p50~7%$)h&dFP9F^jIdkNdr?7lOtnlRdIFla&l=D znd;)|>AP~Q4QrG(%a9ip&ExjrMWm!A^Y|?7w1_w^$3!zCDykMMAu)=_Z5kEDb+OhY z{{(es=_f?Q^Y}>-kvx+SUv^4zL}WjH3NJZ0HlZIc*{M}Xh*Oy0J;8NsLUaf(MerV1 z;XP;aO6kdW8dF2$wOW-Fa2PLsAllnzF30vxvn2?a}6d4!6=W}CXdC3u^^#e^p zy4#lXj<$|$TZQ}C_6(Y&&Z?fbsJ%Zup@*@+Iczly8-ED>ah>9X@Kdi3wU49A~d9GlDcqXgW17hQNY-{J3 z#CV=_WCkzBxm9vRQVdu8WSkWSQt6sJHfM#$=9Dzh+L0X>o4{kcxcNA;V-whY5|c^$ zqgZV6pIi?GeU^Wt5!;2sYwTgqc4qT=aebVGpYq9g@{(Dfv{-@($Ew13(?l~WHcdPQ zTg0YuU9C0AKNU6w$&ZVtAbItlPC?oI$YY#P`V>rVK)xwI1zo>%3d-)mn1W=49oY?f za~XqNt_etf{CAsxg<352bXFB1-ihXz~R?NQsSzb5{C8 zCyt6g6^(!sS-{9ZIE${t_9Ol%iq|J1H7+HnK>f2+B3u2NxWpIrOR!igeNnC#Qd%P4 z0Wz^k7fPch%hVf)zFtnVl)o2aeIT=w7-y1bePB*dE{7|cprw8+n4^k@+@(xI_7xej z(xVp)iPD=+9KqY7hLgb=rKR-KOzv}6^ZRlicNZnA7q(c!p}8R;1O#LxhdGq;q{yBS zCvjUN`tY42dC3V*kulB!zUdq(Vr@=wiG(AO>E7F!u=tq71UDxiXMSu7uQ4Iv{UV}y zd}rYtb52Nb78{wf%*bSn%~b_f2^&V5D*8J+;pKN~l6ca|+T-Htp)`ugJhBH#Uzk;4gw~Inp8o+YTpjcY-@Q%epN=Lg_d-Nzm}f=g~0tN1>sr zPRT$~sV~I#CzlS4sSRYqQC@6Rx+Mc2MYGaT5$Ad+ff19iz^}qN7a`6`8$tRzyKJm` zTZwfNLLtMJAB=U3NiA!Wx2tyj`%|f zn*Ijda&PGnx>CX|%f0E7A1(%ZM#d#3kU6Z9U%HjeFWtDR_+_^;ahLik+;x-jORh^L z!>nYOB8Hj8Vo3qg#6ZR_2wW<%Oz$#T=1n;$Jo9rvp}PtwWUO7PSf&gWr7PBN$}^{z z4w3^IJW~-T-M-XAE1OJ`0c2pM2t>IqN?fy7nV9GQRhZ{4&oza(SIIVixt%q`V!tQb zyvkz90U*mFB83F-1p85LRBSRYGDYUbG*j`B9NTndvQ3|gVVf1_$5d>+3Ye66SQu_h z#lQPvFDBRZx8<6LN{7*PQm*M*-rc&8u&csP>FPuX=^GB83+tp;>lF(m%JmBxIDj;<4|U4GIaaF z`+M;gN7kMS@MUJon5L^7({!y>O~Ie(!YPw#R-8jq@sXDs2g~8kltH4@lqGf^^m{op zzm#Z}8zhwmP?>1DDiO^ua%w705S3V`;L_v@@NQb!PR*B1LwKkByE`jE$;QRfv!sL|0rFH)~z->!5RT?#ct- z;I34h{N?75pK(@-TTU_~gnbi(o%l-19QS`d6u#sq2!)F~Bcf9zNm`*B#H7cUzOC-x zK>YP`BA&v{l*yu-5?So5w0mVnz6wC>~339sm;-r5kJ|Iw*cNrFk1= zAFFaVeo=f|X#&Z^s&sM)-WK(M>rQ|*v+ND{&F>AY?n;;-ayXLqDmxs-ov8FU$~*!U z;Bg!+^EkT8c^uu8J&t9kP*z-UTWL0{z%-ddSY_x%GULnRbCm9pr4}t^j{l!N$8ztQ zl?ITBm%`PE>#js2zbLq^G(}Vco}$-Lun(SIHt3a9f}Wr*Lk2ScA8yB+@`Te(72%Zn z9aRuc1;?X@9O3-wGAWhJr~C-}#jZz>-#w6uBN<6$0Dhg^zd_ zC8ZK_ac|kj-gMA$Wuc2=dh1`I&o)qsW3al{m>=u1*xd7#oSE1XB zxV%KZaMDV0i1`GymqxGj2V9+daw7(k*nMbWh?bZ>L|pD3|I47YcF+cO=Z` zA@BHB_(at{Bvi>hnUO?S(F)G|9l_$wyk;r|a6O61KPr3KKKi|vZRM&@;kv<#?=5k3 zGn;SMr813HPE_hEB>}F=HX5FCPHlH3r?#t0nQ62ayxU!`|J|$1=|7rSc zxx;=+11KXXVjUK)rxGU#D-*{&zW~P+9oT}X-KVm#>_#P)$zg{OSP9Dh2l6KWQHtzR zd0(}=0tb}3spUDKf`{5mjsv>=b{z1F{nB2)EeE7a=YWf(9MG$xIiScdO=^jVfgZ{%(EV3ofuC_ctE|?SM&6W2%>OA1REWh@ zXtloAcVK~vHTr^hS|tk=ys#4qkaVtS$rrC)R@>la;0{Y6jNFHY5AX=m_~6Zs}}NLm<4 z4o>0n;&|~ymVlcwFp0+%Cu@ocZ}SL#3fIlUT1)W4L7%0Ol4!0gkU;%YZm*y54Q~1} z8sW=MN{)>eDxfMgRO-3@l4x`GD5w}yAi zLSSJng?O$rUw^|~7bp+>XjCUzdkV*92vOZtC8`rZ7eYHhby}At!eN$<;D8oaR};~{ zl4Sjk31R}q_3*UTmtF-ekfSzJz9EbeEjT_z@WlASD65J@hL+?YndamR&4^@mKPR~> zXVObvx){)fuO&xrEdM;?8TpMRnP1mbQ92N*x@0z!%$zb0ixGZTei`^;<6z!LwhlDl z$HhkS1T&wk2!chXr=sq#UzsGQu88C?pPDFEN}dDh&0`Ivzh-7a9Mkx|66zkFN_M~q zi%J`9CwWuwZ2|ca|4Xb$m>L0u|4*0?FO3Wq5dFi5pnl8YH*)irkrnT*Y{D&Dv}(z1 z*`}F@ok?WiyGxqv+mm%xS57_6Lu|!vVk`C#Z<|_0q{Q%&Lt1b>ysh;l7Y@}}tlkkR zkugM$DPEjcRB6^KPlA$vW^z}RW=&3osx)h7xk>S@W_?n;D!avEYHiNY=^ZqU9s+le_KN-Hy221F~ftJ2C0l-5>e z7E`@4R%T(Q@%ysZ<|~zJbE!tGEMzEKn*|(Oer+a?(6=YMF7gmwVmD-&nTB+S# z6<2C6ud-L_N>e8pdaBZ%<)5M4RcUK=rKwl&DUQD~4gYpiU#BZtw%sMmw}*))Kee}5 zo47-2Z?2b*wI=yDXs+CPJ>iF~*Q9mgJv4bG*+DP;{0@5fkJ&-X8cBAXS*3fI5*mSxpEpZ<2Eu&?k+N5V#wrqlOhz&w-}A$E1GI zQC8mNM5I!SorJTNU{VrS_IOCClW5{Hp{kYU<1REz&tGV6; z-^P?Jcys?d*9-j0y9?#x?FIP87aUwKddzm|j5tF?1s>iio^>)Ao(j`PML{YBVwD^*U~H^Bln|$Z(kf2D!vS*$k9UYilK!U<#v&Wf7mWkRrkM?7yA4k z@IoR=q4yVZLt;A& za_B<<_XnRj^$U|j4`uaQ0pJQC>bpVh_h5yqWySJ7COQIWCTYG*UPw;nYfs*-z~ca9 zv%Wk@WX!)75C~BwT4Hi!j1yxs%vhDA#84XJ7WDG>q=ux3p)|;g7gED=hryX+rc4ff zl*nO+G7-z;3lK~6Y$fba7!?1(RZqV%T&ecekum;mXvHwW3L}Pxby))4EpmK`xY9^0 z9w@Ff^5IC!f`XRFv9y^#jHQWNDhb(%S}H}Km7jOSjrq?VaYD&2al0ig6?l?L2bZ`8 z;;)yJO)1=r0WjogD`88yQftJw5bQF`R?xUt>G3ahm#HjdBvL0!<3-AQ{s{->JBkI( z#3OGf{Hnd!_fJ&JF6y$h1m7d_dLpY3#q!+cV|hLRbnqeze+5x zO#M!Qle@g4IgG)O*>m>>qM0GV3*}0VatHGS;y9J2UEavNM}?a9BNC(+?Ild~N#0ii zKPi#V6X(N*Nv^p*uGad}D+fcCRuV5c-e29H*thOavwlmPFJ8f=*Q7QiC1}Ox*Y8n3 zX8o3HCozpOOIM^{F6a6!{d>Wn$gEww<#1#Z@1#=HzDb!o1S{1$1nG?ge5CAk{C?#+ zUg|ooIiI!?ES=Alc=}yuyw-Y`_d&M_cSeyax&XO6si6;3SaTZ;h)Hp5? z9nd8I{xn&1q;+D0Ca-?}EIL^u$;2O5IuTE5K)zW{BbMERVZlNMK%AMoR#cqnqV4cIhuDtk`54s9vs{mbqNl|IBhNbl+7j*yU#D6>IO~^dyDk za&kWUsgeTnS|-AxTWPTx*M&ora;297+RHHazkZmxOB*aUcImZr8@~GU1U>7=5Ok%s zevYI||Dc>EFa3MT;3zEfq_DbTNV@b!q(=+OCh3`#DIKMebWhb0MiEKp6JD0WOXfyI zMcMIFGU8)tODEjRH|X6%3Sj;Rj>J?`&F9$ zH_-e4%JiR$YiT^h#cv)&i$e7HY8eU%Kv--XksslZe0ssFN}Ep+grf;{j${xyr&b{$ z@>mb(VfIFDf-MB{ng^T9Xiy9Cb~p!l*E0N4Pj+ z*O2|gCU$Lto`T{I9^~zWiAg9;}*j>3jT0NiKH{#rK72-1?%7 zM}jwEJ|d{hgwNMeU&x^9GBD52-f!btm;RbEKMt(jE>zX(rJ=#P}CFuOubW>o3BufC!*C=z*`c_yK z#yLrLr$jSNEMe>=kvx^C7IR%(e}pok;H*^n&GF}av?6J8t^}r-nxDUw2&w`GRg642 zDfKQ*p8RVoHG~JAiiFz)z*8wu&WJO7m4l~T0_C?A>{=pt>g^>?29w#fyXOy3JT13f zOSIO0Tf25E=MIB8APjCHI(C5MA5@gMF~hFih+S6-D3;rrOtguxp43CoLGaq0cnHXg@8cmThaC$WC<)g5aLm)hR!;jRe=ByDFsxU! zev*kI(oN(F(=CNx82iX+x6nGL^dlb zne-5+X*Eww5ajE-xcey8OnyK8({fm|pplYeX~}CzIPsUMnJiO-T-dpC+B0J?WCktP zKjjjodVvC};HWp{M9p&9w6LkfrYkj2%e@|})>RNCX_Dc!vy&~)B*#v6!O%c`BGy3l zkkdeQDMJHQ`k8!`)I}~z>f!zq%UzWNG#Jq^N&i*2AH@;9U^x|37jnDbK2o}`bPwbz zsoa%|!a0>+EU_*gSMg!GGH?=}OQsYebtzj1Rr;~2Q0dodT?yHVipjwYm%0#;s}w>l zH^`~3KH>kOLa3YcWua1%=q_|~yUB%KJV?T8dx7ZHcgYa>R_r4#UGA5Pp-N@unNft_ zEAmq2nG&LhTTV8{95V*uNy6+dO3|0E&GJx&F5$D4244iel1fd~^1{~drHT5CC6?51 zlT$?%QK3>)QDq;>6Gcg&S(HxaQ95a^M(tc?Vx5mtVx2PNif0IuE(&5# zmC~r?#$DBgi@%&S>N*BbBu8xvsf4>cPb7-c?FBsXoE-EW_&K1DaEY%FN0n%pF-gxa z0(*=c2SLh_z>>uUS2CSfV$q)`ish9$V~&{$qUfqb6u&l2M;Wz54_C?*1>$Fw>Zs*L zEY)5-GRD7@I_eD)9T3Rtk;*Q4NkvbE_<*PK`DouyQ?VQjPT-gJ6uaygF7aO`J}|X( zf;B+sF!LywS;HANSRre;u%Vnebhf za6$B_(&&E@qc4%lu8^V`W!V)s6GKKZCt1`>5nl0fDUa|9kuqTjuPppw?n${WO0s#$ z9#gDjC(Q%=)udPQN{5b_a?&g1K7NTYa*|PEV2DV3h2#ecih@OxOXv=eVh-0$kxDc# zGj7sAb=-u}3ZbBDrMP2x(dEAqV7VfR8oSC#uedm=mR=FQFCQzGWX6bM#a^xzlT`Rq zCf4vHzc@@RAv?cwm>8FiYD;8c;_~t#`H^FQnb4^r$aR)GS>1(Os_2UFBUPD%-=2=c zk7S001%#?nbfw&o_xp*iY+;D5xUu9JoJe-XReY9aQ-SP?7dZoiy!dH~sss`Ol6)8P z`seq~$Qns3aYpIzl+i%gwQ_oAWOrepk_fK2E`qWuIU1x$tjbl0A#5(`Q>E}qxzVLO z!Yh)j4iCAk4lhzT(f+5+>i9b06-kv9GehC`%If&M@QQ~#4t%lb0dve4NFcOTrSR&v zXLKkCuXqqRs1#oLPvGE}5neg1&8nH2(3=$aa`{Pdv7`)uD?U0?RLbp204*(o9Jb|I z%UCGf+n&ddiAW+6-d>*8y5b85W0rnITwLM+f!DxXnD>#ubPq+SR!<2NRKAkT(tfcC z(ILDPfuagm;XSd63ZECp6G%e|bFf6k$fCC-o{OLimSs}EXeUuPEHNP=-6=9Ig3ss1 z#PX6O$VuJGH?B>1Q7-o~oIZT|%0+`|@t1Qk}L=c83mWouJFP+1e6@i)fBt&N# zD*&^PA~1WCBg;z%<|HOC3%pbE@&iIyE5#mt4w$(lj{kE)^U~6xd8|A%m)nod1QyA1 zR5my(E0?<~A+6x#JK~>8OycoPYAVCC&{zJW0a`gDC5oT&Ie;c#v1c7o1ZW=RWQVad@8mcm?S88EcJU09k?v1uZa z6)%M(YHxBDmPl~Lp$1DUB0ZLG{;%Xeq1s9>CV00iKX?n0aJ|Hs%}4+a>i=05-S*pA zG~RmK`9#DR31OCp9r6bAS&>Wfu!uNHW2M6;8kGsz zh9bxojx*`x&9bTVp6^VhJxL;WYy!JaVlvTeiejj83cgWC0#l{0NJgm85%@HqF4Z)8m@KOdOCiipVNX}z0k$+b{Bq%9@5`_f4 zD@bSLfLxNoj&Dd(V6kx}D&)04ToG z!3DBeCfhc-x|5B(Li1u1l7yn?qSF*rdt6+Vi@B0{DXGZ`>;Naxw?z#i z?gxQ@iDYUCFND*Hb&XN@>2AX01D@h6&0xXtAngVE*e>24*80**hpI%;JuWFG!jyHP zK9!@cUV)XNjOSb)R1;)xE20LYa8lfb;Ev9WB~?;`C3{P@Z|UUWoCY#MPW^280iQM#}Qw!J!^^lsa&4& z9MDZA2Xu3i=YW!ssh9&+kc7xj!vRTtml9fvmzrRC4k(GRia-60IADJ1956@50TrUF zUmPZpA1?+RMhay@JeA0vTWKuNP34JO-^v2rm@H5bjICsWqP>Rm{|O76$Y6mUCi?u? z==j7~k}xIq;CB#TyLfwB>xwUej>+v|cm23MEZLbV;TL0d5x3V_+9~lyR(vfXDM9-` z&+?Z11T0V79Vy3C=ms&~GE4cSqfk!YmBQ^9SSA8Fu8W5fwfk0hau;+~+Hlgt>t%0J zcUAJbpN!YJxhf~big}&Lbj43synQxd#S^a;S15GqP-vuQtXJ1qy_u{LQ3o0;r zao{!@_n|KIery3ha-DIz(QUwoM;7?9&r|S8-x@_UHihdCsloh_qkw6ZMPU0qLo{JA zfc;t+uc?Dp&0K=^ z*;%0(%`St3FT7yOpt{)4BMWI?H-$%x>tG)ZZQN?!VyeIOHPjsR1jENy$Ir(XqQwR7 zX#Tam!0PWpaB*BBTBlJT-f3SI*BcJ-U3NR*ajyZqzxp)F&k6(wvi?Mk8|0x0S+{_9 zc5R#$ZH9M_SOj*g@Pr@peWB4}efaPG{ix4I0Nd!!0Do_ZLJ#)6M1OQB0CfjBgYfx_ zK&$mEbfW%Iw6e21jtC3@ZH&f&&7dAEO8grIC)}W&B7Lansb^7mR|{OfMJzZ={q48> zQZCiteTg6I)CVwjCR8|$HfCE9qTfgxOy zZH)WBeTc&J8p6flJ;3pO8nE}uy}-QRJoLdsm)@v773nQKOUK!41$U~gL<@?~qUyTp zFg|qwb@Q|Xsy%l&io29Xam_uUySEE^gT{h|-FG9?8F|1nZUJh#$Pk*WVB__gJE)#V zrlG&1Jn`JbEnu@vJbL2b00&v@1s^*!0!Fosae=Qb9=x^&KJ|Ju9XMe=cpkL_+%@(E zo~xYc+0`de@lh1)-=hwCANU-ZbhJR(Y0JRm*t#Gl&ky>KwSkS+xuW1f4Pe}bX<+p1 zy;R-ayU>m!UBSvbE%P6&zljbs4n`gLACSYQ-5{+OfKD5|F$7kyuaOh>)$pYjyj_B3 zcASx~6@HL%vSMSsgKn@<_v3yW-rB-bM`ogaEo(vcIRMd{`gp_o+2Cm45@eH62X9*R zgo@pij`|E~1@?xy!Np$YFxu)PTHnGKt-W@Hs(tM}r90mq&mLj|*~6BAQN`8q@C7yD z8&Ct*+;N#s_Nj^EL+(=SG2S>lOCRpbsEI$-e?|w!RDn|h&!f=hHF4YZYe8s0BdEV> zGnnYbhOf1rfJL>vP|DOrl!b5q{Ao^>(B9Gu*NY!Q&(64p4t4PdCB7fPqXyp8gniAy zX8-HRy^1b29%YNdCmcZ!9-4w>Z?Y&}_&GFU+C*?Aw<}tu?*=)^&Cs^O*@!po99VL; z4%QCAVCA2MXk6n^YI)0CD(2<~`fx)VxN#eWH$Hy>&bJ>8(pygfiuX&C zcNgfOd&}d&wKIF@Hx|BFcaA+ynUR7{PkaiR{dEUjJ+%;w`S&vTqc9QVn9iX+_tt}n zSDu0H)lUMK0c+5DZ5s5SFbnOSS{)dS{(!F6cuqO10l4(gNpLsG4sG%_2N_cf!9D)J zVB=ppAc?CBn+47Q{bH}9IE!__bqo!%S9J!7D-%%XUfS^Hv}<5oA8-1G&wXH*G#x!o zyi5->+l21%7S0lb%D{;Ia2MAk*{u_@3bj)FEOYZFs02v{>N)({~>R zJ32aIzxD0tHGOMB*Q+teG(a0J*W84(Upzp=hR;Ighd6j}@kO*c4ubru25|L^spWEbMs29^^aa}Ov69{un(Iqd8HKj}GaV%syhK%# zs{{4Ky09H|g$d)P`iRaCMX(nzZ&dh?`pvYTT}Zo3u27!_+dV z5LS0&*wh-;KDY}6N0p!!{u;n4sSaK@%O2L4I}&U+9#0S1(gggo={`UYicqZ+Y>-lP zjBdec53X&UfF61J!ewo5ppnbiuvP21VA%F57*)~7r(8VogbmKPziC5kW8;i>R-1`> z8>j;t;x)xhnuv%x?0UeN1CTjCCxwjk)NKIWdVfGg{c23AL(fW}%= zL3<4g@Ch6M8V#SK(f5;)#iZrH_k|}MSM8N=wsj-arnV^>y>=jY_rwm`zB9s;Qm3M; z2VKy}Q7^%QryD`dNsVEaTOOKrWhb~a@iA3!do-H8-wba*%|SUOxD-x3hS%D2@>R=>l=Z_Z#Y4Pb$W z#X_f}z3`leMZnv%F~~I9<>#H&5sb1_hm&-5@%t2Wym*2)_V>Pyimy~hC-1fe1-12Y z!Kz7M>5Hy7O=}P?h-*o+pPr=sA9ez)4qiAtk&R%(9(Yct$KdHj7pUggZ)dIhRdGS% z)|5Nn2Is6yfqJoIo$3$--6P*1e~-bi*j^iRE}lkHX75GjK7W7-NA82I z^#{Yy{XId>{q3|`Yu)^lOGZ+G!=ljAUMs=9Q!Jc0sTmz&RSoVRX$EJR_J-qXM8nJp zC_jwguife=pwP>Ter*2^B;B%xcRae_%NyF^yeV$zUYDI24QfO z8^Fi=`r)hH-+(^4T%0*?7btG`hBi1i8YQ)h1YP*fh_$i@o^R0+X2$nN?w12FzMg^a zQ7z%?5-WIc^ea%fV+>Vv;SJh2uLdmANW}Omxv#kyE?hJR9NgOsUK|{N=B#@Q)Iwiw z&zM;a`m0UO|MY?f`#uhWEsjk`Q*7>2tGIf2XnR|%Hv3sVwZ#e+HEN6kF4;lv+9815 zI~Q>-IOC%;ZSmEJ`Je^oAu5>cNF99U4vMS21MI(8)X`PfQKuLO%nsn-#gSahUS~$% z8Rh}CI<>`|hk5i;?tOIh?*rgq<}tu|&=Gg)-W@+Z&<)%8HpZj6UPdR^hU1dW(@-%R zVzrgeP{G1joV+j<=Xu_v{pa(Mn)gGbRvl1poip&Ny^heo^$Jvc=p%@mm=6k<{Y@?3 z*$t{aUb`cB=mucCHy!6~{YY;d?GJMrd!nM@iP(Fv9*A1yi;Z9RLTZ_Meq@@!30G(^ zXF*5QFb~40H~)fBa}S{TE=SRnwj+>+2@4lbze*MDz5&$q;<4890WjzEMe1IB9c-Cs z2Gu5hqB6Htp-a?n0GsMl(bL_-u;cR);4OC@xVUrx%-hrrIYsxv#!H7_{|!xOP8SH; zXS>4vQ@X>YQEoVF0T;e)+8h)$>;!ygQn-%pVqo{@0x;{GAq-z;fO`**h8&Nbbo#X( zphfGes4ys!8a44KGS)D_cXC&Q77yH@Q&DvoTBC%{qQdb(vvw%T7{FXy0%~{;!0ZJW z>3wL10}rQRy1*aiT4mrFI)OMZvIa7^VFxeP4FTyPJ@B4vTNt|D9UITmgavQ9!H0HM zIAmf?%&9$&IvE#@uP)WYTZi%CQr(+KE4?f9J{<@BA3o2gnj-+uHpMFON92SJ^3NK6 z6xC{}3AGN}z=Kwc(EFIS`0dc0Ab8Csr1dTbOz1TX=b8?Jfo}ddMVLJZ}W!-HF9uQ^* zSJzXA)z}em(ZiGYaD~l4;1DhZd2>~RELusX*W3sKVrIjTWApG9v-;S(WH8pB zv;bOFJq|4z?!vx_)A2Nvj?J1o;yQ&E@JzvBcx!(T7@vI(H(eG99hON}5*1I9v7Au4 z1TG3JfbsULl=&t$zT90K7BB9KO73kz-E0CN$2^B}k19kuRl7sAqBF>R-cc}kDnIJwXO=@ZjOg7{whJi?fh}(h}9rDcPMW7qz`rnda!-R zj#$mziZ)3MzzaU{ac)u=nv)*@Pu8@=MM*c%_ATu}%_)vBckxkh((E2m>vRRFJ={T^ zUHKSYHcrLcW0DZNXCKffMFaQwlmOMT4(tryf0}BzTnleEs}0qz@26{6y+BL(weiw7 zmbmy-6EH5+0EfQ0LuGA!gmR*vp-d_(zqtKndd$5*SZiA-R=m9CBRj9sD5@$NXda}hY1)F1jU z@TVW1YlsW3@AKp2Ed^ctO>k(x0&0G>nIPwYH(lJYF+BTbI@q4z3kq>hWM1TlyN&FO zIY*vQYMb8dEO=>-do`$sM_g&PP-@=tdpYSqv^- z%mQk)uK5O5Z-6YFDI79C9;z8xP^Uk*V!Nh)gFfSXU~m2a;J`MA1)En;Y?C;^*7w8@ zo9W=UhBc9zf5#nXdsWAU{g0x6qaLuZpDC;}u`@0%m=1zlo(GwJLqJ&6Nsy~i3+IG% zfZmbv756enOY7eUnfs6WIqE$@>4*Jc=FqmZ z@$@B>My?*@gv>>1R#oXcYSQ z*FQ|XfK%O;^<{(P{#X@B@|ZVg-U%h z-vDb)sNqrDpP{8Xsd)bJeh zqTt82>3IK^mT+#%hB$c82-GKY5Iz|e0W#5L+IYk>+RiNjbs5|lf7tvc67Y=RfjCp`7F|3u8BU13 zfYjd4M5n{uv47D80Q+0wTO%y-!Ek4Eb>T%2xUMb^ocs<9PQ8wdgU*0^M?#?S(-N?G zV{06vwFMQ}96)(zw}R!s1O{$yj0$Hqqu*}cg(g@W0L7g{z$%V8WEbuO>`xpjr?nd% zx7`Z1u|OTWzy2aoT!gnG*i@z%dH zU~%qoFy-NJN^4Fba@=}0SB&-5}B#M2oWQ)dvFcB_I1a(Z46P4T|3%;R0O)Z=@e?_+8;YD z_ru12wIwU&T42294NcY9g3Lz_LsyHAA#bC#=wmx`>=3D^F1&>@3`zNr|NA}whh4UT z;Q?x}N8~tgF#i<_=+1&QZt`HktNK*o)G!c;n<4W{wn%N@L*L@mIbc?OJ}k(wM>GB$ zgBWKw z73Poac!S}^ZR)_zoUC`Y-C(BXe!53aBbd~krp6J-vs-i;yg1t%=Pe1tQ?|Llp-(gL z;2ks?j5DymU2_=DTY!4(nu#)#tI^lPn&9F+*D3SHW@z!qelW-I1HGq(8B90%gs7+g zf{$M7Q6Sq97N#9XQ#7xEg4yI=J^n(W<}0YY8zIR2lDR_PvzBe0BNVWI6P_r@c*X`-m1|F_dc46$MhkvF|!u*9BGU*ZO2n; z`X_fjbTEYP>zH91GzF*)X+x>)UWeEZ^65GQ52K+`5Su?(0=Cnp7|u<>MNfLcrw0Py z^4e`+W>q~u->pvA#KH$3+-3mP{9EsIU^`<2=P0~*vKeIeI*Uv)df}vHr|A?+e|*xw z0~I#!f;ol}VC!NECU>5!pcc?99b6GI-B^ONbF-8R&y3^g#TyXg4`INcEJrFm6 ztk>-WQQqoaAelhbi;Pa#QC%Oa?dlAsoHz^)E^HoBCz=G zFsL!u5$8>Oi>935fN-tTXmIfd#Evqg)!SRs}$u zHvqbKU4iDLHiU1(&Lg!$>iPb4qv*UH{ZMF9BTV=BfX>d1!?_C`0rYo-YG;OlW=qt` zGU!iFd2|gJySl=S-SqLv!OqbA{dVx$)e3H&ZG?Mm8wSnS1flJ_!(m?2GvMNy<{;Ch zir<7*yV3UjyHO9Y3M6Scpj8Wac>8)ke6a5TP;{&nw(FaL&pVjnxXCqPUUMy6*v$rq zhaI3@A|` zi|(O#umRcXS6Ks+789?X`4&(deuqBW&je>pGr)zj{z64hC!(Ung&_FGC&X^`4`O@K zbnpEw@Qc+JIIG1wbUIoeejGmrsZAcaGr%zd7iOEHITPQ3P}Ax_P4|!dUX56=n}zDC$if4=p0mSwIZfgG19wpHhsWTfPZK=BiP*5$o~Ys2gad|9Qo5ugEHA{^BQQ|(;eu+$$8YB8HUhb=N}L?xDjx9 zbQEM(t%HlaqQSuZYOpt|3m=>crhC<`3Qk+)fZ@Y;gXh#9pb@hdwV6QE>&{O>(a8(Z zt4Rpa?X$rM^*bQ0UnBf@L3h-<{~F&rTUeMsgNq*2U4tz5yz*PJw;_yvZjU=8yhQ5# zE}@gxd_bS-1?Xzb7O>|a3-5`$LmfJliVEV~@#8JEakEqw)a#Ij8b;~EjIEY%MkhNw zZ?F#vKI{aKP|v{I;TOUF94qiH)&#qh?%cr?tXsSW87+xMh4mW1UETTv6m5vx z#XmwT`oBU!=e1#~^Gy1c*DUn@qa`}Om_;>CumSIqOVFpVb5yIF#fay1#&3((Ow@Sq zYLvn0PtSI)i7&n}#k>_(FgtTKojo;=jL|&M{)RJ}WpM_a=>HFzd*LpEJ%-aKLZ_pv zR3Ui2Z3$XX%?jVy!NC!Aj{?0#7f@`$CF*p>KFa#-7%;G)5ja10IncCngimKz!+S>@ zLPZx0FsEu`e4bq$)!tQ%s8x%>;-2g2goBsB2=E%{_iuzZuge35DWk!t6Oq8oDax<& z@hW(TTPN`0%?zNkz!Cltp#e?Z>tWZxETH4}H`q4L0FE821Ny(MhkZuZLBj_bKzdXh z8a2uX*t|Ew%OVYMQ*L7z@&Qw(UCpp#_h#VC1RsQSTu_IZ7m?wsap=UVy+~UvCV#O; zCR%xMINGbG2eS9ng*lFT@PpcF&}MCOI`41N8ur=^52`aB%=a__TXZt1 zu<-53{Mb>t&8M}X)c|wg{f`uSKuRaK;ym zj-$gHN6_70-UdGAd9-i*1GI1ZPQRXdH_@$SM^TLzOTg9mnz(WOf#`gcKX`uWAD@JO z>cFS_HiAFY)!>VOli+B-TKLhyw;;ZOBd&h*A$m3E8cLgBhWGCM3zekY0-9sBk#_Hf z(A(M&%-5*{12l@MWj4!@-?S#^agW)k%D6@_;tyk3@>gwSQ1?AONc}BkwV)|liY}wP zt4}~~2N$&A(-n}Rn*emIPf#J+)o`0FHRxcDIbM2xIoh~$ElO|l4vnsL8P()y;e95_84_GvH_!ZA(Uy~ z2~g>JaL{XibRg#%*iF1Q=C{^^Mdr;xA#jFO))9)Ueg_TqdrX}PNC)FDSR!zG0A0H{ z2iVb7sm?83;H)7Obxf}iS@fWR;hz?u?kHmvxHMzu`bIhE$O{Kl{mg09E~`G|KR2Ma zych=p{QINf`?Eodq1EZUBvblWHz&C5GCMzQp*HN8H42S?dxA2kmPXChV#B9dCOC8O zGIVV$7i`b!henRh@mq6(lYi-!HKwdrqT_atz=q*^*nFcFT=wP>bvXVlSl4nIs8{U- z^7J%-&f%80huuVAK1vIF_LvMhU-$$DpRR+a`3*$J2F?Ri6bJFmFMy1|kwAA&Z5aL5 z9)|}N0Oy?s!1$kk0f^oNMtpn;lJ1(}`u+34G`k$6NycViW;$r|QVSXf=h1Hp?$Vd2 zR_MsP59s6iI&e?jJWw(r9Gvo*3r6pF3GO`X3~IcefvnHprB;qt!)EOkfs+ok=rw6Q zDW{3I>D`OZfEu06@vW6((X_$Wz_hp))KU^g%KM`MO!LhJycaywG(a1i%reJKy4M7+ ztIb9B2T!2Yv;G9EK_tN6Ff%{fD-}%I_6``@9`a2wd5dC)>4K4K^zn#BF7Q%_9bEO; z3J%JR0H5B^1fIiPG3Squz(!L8faQ8{vqxXQ=OsA5L-8y0#H9$m%w2-+x-O($ZtkG> z_tb&BP7vHpvBNs*&M0W3GhADf19#1t4*Ul^M@5q@@WYFn(4r3K!IdoyP@3ZyO1H{e zI??U~^_26*FS>tw6kFtmy_&c~^VL%j@XrQkKY7!q-xU)D9)O|e-*^J)`7R3CEUU^r8Nt501{_F{F%nMb)q`F z-Xa(IPb0tc&>odEe*{d%?Lle$jpzjb9_m(4123Aq2j#D+hu!vGLtC|8QbuR}K$n4A zz?$OM$Y9oJv@Bvj{pScfymnJ<@aEYFw1wV_N*ZTVhn82v#e<5d_Pgp++g5Y{zN59_ zwqOp7>Ng%4WWJ(1t=oc5T%V2nu66}~#j3;6ebiy^1~tHjS#_aRO+yf^wjK3oTLlI^ zTaT(gct?4}e?)d6Ht6;OeUO*Bmb$xV4wy=ZQQeo-fQxO_{9LC5qhn3i0?h{XVCSa~ z&;;&Na=?Fh9%0X-g3}UD9 z{5I6lgO98S0BpYv)zMvnN=oL^aToVdMt+u{PKh~88&nl{(E0@KfZA}q`vde*zZyPa zb^}bX;(+YHU7(?d4Q#0!gg7zjC})1|j#*95gW3}wBOT}2XjYm9>U-=OihIU_Y6*t; z-L2i=&4%m9vd2o)3y@@)z4Z|6SQlnC8%^uabHRU9YXjP`*>H>5KlGkYZnzuQ3|uy< ziJQF81XtJA!u{IT$NSS~BBxg$LFz^$SSyPK=N_#Mo_p#;YyJ`{YH>4AKle7RodCg_ zP7W}W)eRhR4nTd|+QB9_^f1?98$Gq&4WParqa&Z{;J9JV;6RnbXlGzQP8^Cb6h*X z0(GOO(9a^Qu&zfnJjHFBZ?}h|kVUE`HtWMh(bNr;<=7IuKd260_FqJH>K##x?KIl& zxGp?xdm4DEHGl;KGmm;MqtZq;#Elvl;2AH5fWvic@xQO?5zLqYx)z;73mY|rDW2od z<#X?-s?~zf>LOb}``RJB&Zp7LhdJN`-wZTEfWdeRzj*I-OvVrzlGeAVuE1+Pr zCbnH=fjf>ff*n?H@IQxa=m%yu=&Am0I3G>}uksFoPVG^C$>Op3W{3R2wpQ90b{>o( zecJ%tk$&jW_38Au(?O_dAZc6ADlj!544K&NMa7H81EaMy;NzNaz=d_1@YMwloR~5Z z9KS%43Dfm));Jwl$WwzG-n0Q5p0YuH$_O-cR3p5@`V#0+QiDSO#Gwx+JMtYHuBSdN z2?S;ze1P`Si`3Mm&hTvOCiL#S$#k5(E$$w)1st;90;st|z(vm&;A!8p$li7lnmp(# zde*E4ZgA8J8tN^fM@`D4oKhWe*H`VqsR0v!b+|sh(*Hg(s<9m1UtJgX{OE*Ms4pU3 zISTZo#(=JayHaERI0l9oCIFN9N#N|l4d7U+HlE-0DneWQ0MBO{l}OrM++neA0RYNpww3ctDCbOu$U1#|{`8XT&&Ax*Q zNwQp{(P`-1gM6eB^arqvukQQovN0I4$qIdXanZMr=LxjAe>^x_WgOa6XbWc~Pop`< z8$gror|FW%ZZPa@6=?5S9qxGi0lhjLj#_hGQ%#NUBlo+nfx##UK1AICO?6E`eb2{q z?8bxW#3>W(Gxj7p5O^Of^#1_t&ZxnmZK~trz3lO>*SA48`)O!{RW%%S>>se^?sjl< ztOXqLaxri_(VChRJA=-cSQk&W=fHL@b5NpIEU06Ugx+vx&-K23K~321Up8cIt^wy{>tlXeV|eS3GyD{-0Sfm0jk@#C(esX-M;dvbKxT|Ne^XVs#Jz;R;8qOkbZ&|+UD1JmG_Xb80^Fg4*IIO?1C4xdH-^ph_oMq;e2{b8 zUi3Kq4VvMj4d>Oojb?SJg=;PwkAg>pg56duJT2}O=+e#4FL<*y&U>|->d&=+yIl66 zH)k$^!H06uoU0Sj&R4@xac5oFXWamFGNUnCQSbtF25@k0g!_b;!`51kaG>T(Kt={_HCxQ0}oc%{1|B?LIZz_q$*QmXo z+7zCL&b@C8jltP;r;0M zfz!d|A?C2&;VQUmSU)NNJ)q4rR)LRurqf$Gap`(9Y+(9zbvS37Be*%>B&zdxGTq|C zCD0qVz*hgiP_Z#xg5%b$!pla28hWzPj_+sB` zu;m}OfX4E5v^Lug40Oyy4vW-X=3pY@-OX1+9pk3*^fol6T)TWc-ku$*LZ z30qlPsg<%Lxq~u|Pd9mnBExT@I}ev4P2Nky{pT5)ky)5Opua8}y>}a}r9K*TI>1 zng}}i#Djmcs$mD6v7j>!M}w!>!duTxQ1cTJVASa9aHCcr7`fR9XWG32b3eWYOcQSyrUI*J`DpFhdUy%11kJy4 z1=YLYj%Lj_!lP^~L8on+*q1vR#ctgXMz0@%{Qs$jK}SP8WVjA~T(<_C{I@Q!YuE}5 zbhiLI53WEDcQ(Lp`KRf>CULNF;{p`nsY@Gfb%7Jimw|VCjIpOz7+Ss06;8eq3A)xD z1FT~^vhbS4JQP%(6YLY@KS^ZI)BXpFKx6K+`F^~EFG$WJDRQqy|!Hk_wQL@ z?n)gPKa~xq__(00{(FJH=}>xb9+{XejZtLOWZIzHajMZ+2YlA;X1>L=zd$da%XFOy zck`#G%?8t5*ihrzJg}~#A$2NyJUzR04U|JQLAh>2Y0ZHcU1@5Cm&DoO?2H(CeUElv zYP+i-E8{Av;g$j3^}pk1dQu-c+iwIX=ez(DF5f~AxcAVNQLCvSOJi_0z!sgEcL&@r zi3T_PvuM{6gltEfK>y$)s6(f}{F>A?!0X@4*fH|VT(ocfmjAJLA3#}T+y1Zt1xW%1 zkYGTJAQA)>d3$X!E9RU6Zh z=X_N&_ttN!tf`{ke(C+JwRd;#1U))nEN?b_#J>1bkx?g>oe$!?H&kP+)^)0t@xsL^? zQK8FxbZt6a%sXAFI=h^>ZRH~KEw!eOOS^EX@mnZcoAKuvH(b5Nmv}^;SJZ9y6K>*R zE!S--$xRX?D5jyU+?E~7o7@xVL@9UC;7lo*S-2x*UHeE2qF!<4#xC@D-FmfLi4|=4 z)K=O=l@PTXC2;c0QR;*=BM~F?<@jRnxXRHSerKJLZamCFM$Yd?ODiJhv2+arKLmTdP*QtnybrmbgA(ma&tl(p@y1UXFZkx^6!gaEKNZy2QzDWyRv?$2>UY0OeR+ zJ?TcDJ8V4bK3$ntPV6c9jz=`P zkZ!nt1QjwJN+u1DsC%+HQ@NEUVsvRgewI^C+P&>Xood;N!4;VH-eB3kFl>O6Azpvz?NBtIq39gU;Xy6YHqceRtMLuUux z5tB#6Cz_CA7ek5d@A0{ZJzH+LLtL(mfC5>eLc}UAuy_W=_R8keNhXwG(vA&eHv4IvE?16}qcr{9 zbYRmk+MvA{WH@)@)SFgvqt=l<-Cs|B^e-Wr&U(YXPtR$cg?o6fj9`bx0aUZfZBFTP zRy947L*{9f>7{n#Ou18-`W&&8{sU~}8T&6U2^;A3!?)aU!6dFnUl#9I{D6LCAry2QN zq=};Cas7qt-oa76*q^1AZ*4>lgVVTIv>lt?tRNGV!TiRlB~>&$P6y4Na(t+n=y~3e z^iPlWomIL6j~{=NbVrNJGJ6Npi#h@7igTLd^Qp{FUQxS^R$@lpS2XN|6-QW`iiHu0 zJoJ58b->k9GPSgW^8BNo$nm$86_2~gYMGx&Y}ONbobGVV^~Gq#>*BKWzyR7+&rxw& zJVNb%;1bnoVkIkPdC0QUwkj=CokUhjO&$@LrA%+oLdjlNn`_%dvil)xIUx2f&1@b| zMKWx~+(iXtuk&t+d4 zY9pafsPLNrc{%?r?$GQY#k&rWL3MmY0ouv-y z3GVE+Lrt>lCH1^&>pDjlCq?Yw2E$4T54V*{(W64FXj4LF1=Odpg!wYpUXV{lQU%g`nvx#b zBAlL^+KSrehe^F>^V3#EG#4o)JMyZfB}8OXRegDW5}z+rjbq(=>AW2dasTyN>R42w z^M?zGTV7p7i*_}H-o^vEl$RB#b1Pf*xXTk|Tf=jd;*`u8E%n);YIoUra&w_Kjg`_~ z2GVhOL(cTvLZNd5M1|O{;&ko~a?^!ik!^E}77S@663Up%Jp)hB?D)YVHem_9UYJV^ zEmlb$-_V$EO!1asTZ;*w!pq5Te|y=#@-r?yo8+o@^VliFODxM*g^p^o2$CB$6fN>p zm!mEGMb4Qa^7OhB{L-?o99zv>-alGNcqDceR((z=;pe)`Ti1N#@jfx~W%dX9bhM-B zyC_szgf|s&yo)?)l@jX~X_KYGE68>BJ-M!V0yi-otL6^6&Bi_JkZbJ#k=^^<-=;n3jjdOucJ4Z&!PLUSc0?XAc;a);T7OxMEK*V!-=0K1!ya&a zsx~pjrHJU;N+@Y;W6=T?AZ6C3$ zQVY?^zoE2w-c1UNfnwXWPBOOZK;6WGFZklc(z3;<4r0OXYVwv}2a$Q!S;qG3nXXr- zTe@+W2kTj_-ljLW9{2Fq9s^%D7B^yx$>b^qGR{X&_?Wij%zGy6^uD&J6x>+Emc64! zE*is`r91P`;BL~T!`Ei0;GY|aZ!J*v{?5{4#1-{?F?V5Q*i^yzvk&F`Kz_EjQZx zGOx_rCy~0($fSbBCve9V^K^Gdp5;*vb7}v_d(>)DdHJzy4;p7ZRW(f7NqT##P*tZS zWwg;^HtZV8yNfiYT&;?7?!I1fYt>@X%6czt-_cSrtmGiNY)B$@x0j$r8DNqqEitF$eNJC*!<$E7#Ax{oIe+^-&69;X|%Ze@DFl#BdeSP#ye zy^{=!nv26JH~7f@?L7RTwQOXNUo^^9m79$yB#K5h=l%<#w54QHv732{D-Cp!Bh{Nq zv3=UL>|olOe3QGICh+0XY3hMbHnNpRHrHL+kDPb!;trBe8xH5Bm3gFD z>SpRQu&CUaT%A{h%-~(2wqi;LZ+U)*qwE#2hF3Iyq9z5Oq=zo!(rdMI6~~WGr+rx` zeOFF&>73y-?d>DFaKlwxsb(fOJ?^NkEM_Ek^s<%~n?9!} z71VBk%ks!pnT6%6iR;;B;xx{GuD#lAU>pxQ(U6WEt)gxknMKaC@>9>rIn+*D4wUpM zQ1$7UUtE||NCa(c?Yk}5f@fKk6_s}eP_u`t*snx>HK+YNW{=}M^`nREa4C^LK5f7o|`+J`BLMD-6)_T6tr5N>!drYgsCaC2DOk`x#TI!nDOip$P<0}hC(2`e&)Q7v) z@ZbWss7JpIO7Iz1nLgE2M)kPLO)^IKF7!y@njehByQM(kQwNOYNbn2((vPtUgd@>Wx4es^*b z*Hk)jtDt&Ra%&1#D`6yk-X!TE9)y|m1gbb||j{!G`Go+F#Qi)qGoH{RIs z82eSWr{zi8xkT(MrCeg z`C+-G)k+(hmso_aKWms~xT?79vBOICn3ji6uI!*o?p2B3?paLFiWU$FmDiGu-yEJ3 zo=t_Wom2cx3JQy&+DOpF4{E0n6ZW1HukJIx&9?PjMVqau)Y8?1KQ=F@WG>7_hdRAe zQ*sZZvSBkxiFT3q#t*098P4+LB!8N@q_FfD=tk|wnW-6Foz!M|KdNGVS$bsof*wCU zlx|r+)cc=JCf=eR*)m@Kl1!_Pn8XArt`d@#e6^WhH`Lq}@Dv z^f?-m;73P{gJ{2PUg3MF3-@nhthWE$lZrPkE`53#%J&xVwLoq;HhT*_a4SXwW8>-EVtsKjt1KDUk76gdQb_W0W&6jA>Pf%i;#7ARv9!ums?+Kz9XtDk6INa#MVoW= zOvX^%BKhUq$piT1`NDF|; zONM2V(r#pVnZ3?N&UaOm^92v6%SN)2{m@X3HF4*j&h}!&>Pci-&zrA$Zc%4y%hUXF zpGjBxEKrVKZp-hR9#bY(jii;;FCDDj;(AFhg? zDA7`DSC%Kx{!@9yn#GNIW_l7|Tz*{XaA~LNcW}PCFN~1V4Tww|? zBqvNX5laggi>6IF)3ZJLVqA?=G-7iB<&Jjm*j)Mn?UXJYntg$ac^8(skGpYDUKfR< zjm6<=RcVPXzr2;6Uy{iZuHNrDznhjvv{Fn&P3`S|>Tx^u?2`oEI?RpB53`o94&@Wk z<1Ex2oqRcZ{ysjP=*I~K9qEC4rZRU+Zs}4ppR74{jauopsjOAFn^JaSFPiV(N*Nkz zE5qW=#q4qI_*|>+>2LG5rBzb|&m2CQ_vhVA9=EK-J-tF=g*LJkT`WPF9d99S z$9oByZXq6bJ+0=`mcHMcIm*^X&QiDk6V;mkicTz?#tU{>h`?&I)Fs2WP=Uo8 zY1q5+e9$(7ug5*)ck%P7?T2SHVC!KT^v;|0^X5z6x%(=e7{7s9@72~0o%l$X$HsEM zG9|>>r=>;N&Wq?quB+MzsIgc)%}^W*8cI*EnTq~K&*-Ao(Nk)xfjE}5oOW*%2(Buz1nLdAH(OU=O1q2v2EAzq41SF>!h0;6XYzD`pl%pN0+G0#7f<# zjQh0i;v;&eZz0Dw%Pa4AyyLhx^Ob4aCUE%X(QJ0Ql95^*-d2 zS!0eW=l7ma`t@bD37JSc+by7~BAG62(4LiEnTfXSNgLhri1zbu@<)pql&gu243KGD zyZLrH@%#x#<}_2?FU;akj_+x}yi7VX{*-Q+o`ulMtm2#UX@oN5^CG_BVkc`k7%AOS zS8>1mv#4>Yjg%PfAa;#>lpYd0k0K7NRvzZctJ*If&%OsXa{Fbqm6!sa^3#MPR3^G7 zKU~|A^R;x7Ca01p+|oc6D)p8Yy9rJo>cA21Yt)CGwfoV_J#;|7w2bfQDSBt=3+EY5 z^3EGOHLg>F^5L+X=*3T2CU}cwEsBVwX3x}V^*)i!hZ$TUXd!i;=qgu@*iMnIBdCSR zL~g5eQS^OA(Wn)9XtTeGd>e3(x>er6{mZwZO|~g~!Ng5e-kgVKl={rc^V`zLQ$<9c zI0tDmYbVtyU5xtKj3(==L0q-(7BUN+lD;kFfU@an30ZRbTTWgYOJhdPC$p%V%EQgJ zthEcib*xtK$)#g?Orv#Vky=)kdRajZ+*w4LA9j|}IdwU7w4RuFsFVm9;33ys8%x*j z=9Z3@P8=NZh@9tVQCQ4C&X}1?ICQy5%i@dDR*Q`sYwjr48*b$W9oOj=jC@HwZWk9% z11|EV#NK3;U7L$XCR67c1-QjDAKJEW9FNS}!b3*C;2O=G#H;RZ;(UgS+}JULhOKmw zGcLEGrzW0qOdk_rKU|xkWs<{VY?shA{W{ucyS0pK{*;r;cBe*93sb(5<|5C zSGnHfFpu2PgS;zyuwRd|(j@OL<@~%=+EzG&*l*=?ntn5uht6tB%N$Sfq)@ZftP>0$YqND5_W2QGrn^zi}T+ z%OWF4S?VrEo3*8~?X6`#b9=dawKW$WWhIji4(Gm|n{tA`F;DZELHpNuasBSGG^y}4 z<+N`JS$)@8-HKj4xZAzrBIm(lrA_DJa@`Cc{&2XI(A5=OCqbK`I5CVP@7<*uH`{O* zmm9QUgR@v|-E+Rf$Dk5Jz%Alt&Y{ZsIYkBlo8!BM7igR5y5~KaAa>4>vS@UXs zd49qJo>VM}wEZUJroOlMS;Hc7UB|}UKB|IPQ`b$H`SfI6jG27-QeV{1<1GuUJE9sr zKd<_I(q=XmE+A_^%VOPzcrM^Khy43);e!>AqdkVAkP|m(-q16Y zvCUFmxv-eR>*wQ*+d6VwlE3bzwuHT9&Jk+f_J;DI_YRu3b_~DxG?NlX>`@J`7mzmw zpX6c#J}4W<>}1DF;e2h+Qa-jzi|;uDvC&Gq-ww9pdBbK>#uaB_*7sogrC~Rz^z#B@ zS>_>i|Cm`yb=x{R3x6xQMsK<@B&0gqR4OQIR++`+TX@RK7c%MC&R5(s?^0!r=5Kkp zpeUN_8lP_9skUi5hy9DB@CWM>+B0oIv1V8tJsCZQLWhnmO z+U;Dw9NGP&m*BUN-9}5>roU?y#?5zRaP4?|QM2wj8rsN24(O^cR<^IJtnBVZQ;Ov% zd#xX+=2Oavo7=R`@!N~k-cMb`Id@B1anwrWtI&ta-%Qm#&n(4f8Wj>_)oT8?%`?`FD!xfbFW|O@vad-W(JYy39{NC++b!lrp_!C%!AaPA zxr;9K-YS_(M(|35XB2VVQ>M(#E2Ar)qlkruV%PvZxu=h@Z1?sqMV^RJU)ML*hR!D_ z8v_ibS9}~@cwb0%ua`kxhL)C%EjrT2rnlInLw~wH%~+POFDVkb=}Cj)#g~z70xaZ{i7ARvXML$qH2EbIm5yCWo7VJ>hx9BX);d}9 zKoeuJAjCwpSviYRT0NqhPV>2R$xQCuZ326pJ;_zx6_{B2mjz7OtxO?V0Hmos4 z?Xk;J_$SY%o`nl2V*hC_b7zujQE4>kYSm`_yH>J!$;KSj_<(xrLOyYBmbM?*@kwgX zp1dM3d$PLzV{!4gTS<|W;w~?adCtjWEW{~0Pg!hSOEz)MrkyA3g==2}+3(;$-O7G} zlw9)_m(04rCADe%d4o&I8+GlZahd!gY~&(dZe2juExt@$*!T>0GjfvJ{3_ny>PE{7 zf2PbKH$+U2JIdLh+k_JRZ(v3O8_|6?Y(Q)D`I(dCHJ&7$Mrh1R$ta{Px zCk&2PeoTs$~>2retZO*w>`j5 zmk&&9XEL9Xo0S&fcZhU%%gbJiPE(AwFUOYYwq$X07`HvLfKN>J=CoY*NN5}ZXR;|(3Q77GnKn#GVhJ?7W3Lq;f6VP_+x4*x#r_5 zn$+!-(qr%xT3&Gm*=qX}ZMl0=8Nafycy--cblAL$mcA`4tF5({UAq(!!LOb8NR6^` zZ`NuxaLonIxpbf9$5pzkCmqF{<}+#0UDt#m6oocB-BQTP27d1>``Y&T>wO$vC=A;EVjXl^10WVNOf(+kOQw}w&8{&u4C z%0-k=$d0pSJJY=9+QZ1z^0MJ&TY08#BeLli&wg=UqR6_b^v3rJk6L6d&VGKU%+x(n z2OZ3xW)yi(ZP>|;Y_?m7`_Z3tZ6a>)I2V0cY0zdh%ilz#r+#7^{d?TQE5E!ya~Dna z&L`it%;X_i8~A$rjl8;V5pkhcZdo~}It`wkSJd?_DsES<%p0Q~ai3j!(`f z3tTZ3U6vN&P30Yh>k?KLo|sbQq6N`{*a~8;s=PRCH6``;(n*NPuRu1nvRu$MZ9TZwROyAntBFr7U6h%b8|r*(!k`GHFrs$ylNCTolI zuiVr&!)T)?2DDqK8|-N$rsSAV#I!^@_)=S5lC_!-UnnJa*LM>~Z!MxjcTGg)_&nnE zCv94lmm9t4G?EV8y3TcX&C!_;%_lbNdy`S&Im#mQ?QEju_x`5&#D${;*;f}&2V=Bp z<^3+J2BAx-jP^i3BzQPgem0&~?@!|7i`w*Pi)@-*;xernmq(skX)l|<+fSRCUn0-G zN2t2$B03I_r`J37a?rk&f1%+#;W z1BqIr3z})OMoQ1+ zKIWU~-ohg4acz-lUK1CN2zt*Y>|e3Z*2+BX<9)u^>55Y0WNumI_G&UaK1J<)@VZjl zA{Q0fnZR3;a?xFzkJMMYSG~!uq;}X+K|GIJOKrmPOINLv^rqh^GL0xeSx0&(_j_HX zXAMU2z#C6^d&oh4QouvnIhPS(+6mI5J(@;tAw}i;1%bTF z>lN=>X(%^zysKV|F%eU=1-nh1w5@c09@+i=l<;j_CgO&O! z{dyb9%exE7E(PPb?8>S%!tC3w#w;xA# z!-toW>&)(}HSdq0J&7a8wb5gWGdMuEY>S9}?j=Rah$JQKc1OAIK#UaiTZ(Nr9XM{m zOzymK6HgzKNH1T-lVha_+27?OZ!tF);qKa=600n@*~uPqAe+fbZAsR(OQ9xD>x-9( z<;0e4!D7kQNa1(LQdo6esXAUcM9ZF+5_)-M`oUiABH@9VNSbEJlSed@%i5;%%STx> zxm{`bbhe*}-!)M5FsjZuo0m{or@}JUUh4;*Rax$tGE18!Ifhq_y-xit+luU#qc}OE zveX-ULmhaim)H~`>AYPWTTSt$x1S0MsqMeg=CPgJW6(y#wsKa6KB^|eW^N+K8>yss z*~E9%-BKg36VZ++KZ$-i>bqSQ(3oQG0t(QD+ZRal$|Y1 zIeTCdWpy4T+s$Yz*Ika}tiiLCnjLp(vwJ&;^V)WeNnL%|AS+NzjwmmDo*43~koF?$ z-WrZ~)%J?XXfKS*g^61pWrSXl#^PaLA+i^3;;^Y@#aO$$95X=c(@*auvcpPp%t1$? zS9?X8PxcI!*Gr3}VS_2N?_o~bGnkA|6K6>WUM351;Tun=!-E6d#=HuhZ#RjBvv!*p zWJfXmPVqARE7alTT7FyFTYlOVApAP{$m|PE`RZ;(=Ja+EL4yZNzk|J0y`1Q@oCsU- zwv4TKI@Fc2wXMM}58BR|ZJKkl%)T-!dx4sqHbE>!EvdhjFL%C5We@G={)SB{NEazuG&)1AE)Eq|r6#elm4jS% zr>@ATvqQ;_3*yMm`;`1X4dvL(4;<9GgE($+oSz$36tmwBlVwXB=a=KX&7)EF+*GLt#;#_XYQi2wTqfE!h}-Tm~Txuz@g)9McJLktnNEOu_Y%` z%*6_#>wXhqFw{++d^kW_Ju4&2CM43QH|3=5?xNbbSbo`ca*VcCe4^sRu5=@&nABU5 zoqqY%V}6=lpKBg}N82h_6OOkGl~vuUiO$K-sK+(~_FL0aJ)d!tmz}kun02ezV|y;K zr%FEIQ7@YGI#r{$vyPG8s`}|;yWi(cb)S)se+<1%x0Fd`SMZ8{y@g}9#WYqM=kZHi zsV+OF6Qj-h3C9gxq?K)BJ{Vg@$~tDEL!7%z`LJBM+|NRe-rqsgE?i3P>F|-Q`nd7& z9#fQ}d3uVh$M^Y8oU81d)`Ksa3z5?Hu@YotAuld#D~0CpUM#$JAco7aw!f`*SnXR`*pZ6O>hj+c>uJ^Us z-5>YS2=~pD-?I|W^iEJK?e9uQnmVzE?mRF3_?`PlVdu!6f9&@;R$uzRtWGviH_aM8oDz!d%ExqZ&NJOR`L0gcsOp!Gqe~;0b3RR` z`*m^&{i8;Fs%dWu9nhWQYpz!hTy*13VI$Po=GVw6_c6BLcbk(;uTZrSGr0NKR@|w6 zH4aU^lOD2b4+RanL(hWmszwPDctOb<>hhK=sH)W?suTW_vhVMv8V@B0&v&Dl$x}J% zWh#Z78O`Ts1gf9QxT?lycW}u244vnQgA{6)kskW)JQ+W5Pam*&9p`)Uif<2C#B=<2 za*2hDS*7WUoAYNbZT?s>E4h{5+}x!E*kvoO{)c$k_*1kc-cH+|`L?!dUOr3K}wc5moKki7!3N zt!-0$lMjaGkq189QctvaO+z0W$ePjKe5}j`wfpRbw5GvGYE@3*Q4ibji>;@r_T)9` zOP5@v&sO)iqHAtpn(q!jzq5ztoDxwXzyQGiFxgRYmqG(OKiw zyC(P9>UBG+ws{J1uctI*vVnZ>{gBJnxTG7CU7AWcWU8MHM{}i`U!kzaU7>koy7Zd z9#WH4hT>zJT3WrSN|vpsQ&N{j+`;U)5<5>%_W7jJp3?)V(S)r$X@|b3+H0qB@|dI*kma7N*mA>77N?M0BD#lb5IyFD+yJ3#XJ7PF1M(#B3_pC_u^QVkT8#jJH z#TUe|Rm?RK11qvuwQ)=%I%#u7_E7sinLI2si$_n-&?XM2GHKl>W9sLpv2#DF>j!)C z%lO=Mzvf9ExpJs_c%!}?E%tDudZ&3q;AwWaKY=$byTe;%9Mo1w>vPYxD)-C!NL7+j zsNCQ;{Jf2s@D1FrcwYOYv{urTS&N^k+f3$hEw!6^z($`MPA*QJmpG`cMq1G&#}Ues z*oD-sxF0?5I!%`%1PtO=Z#Rn&{x+00Kq7G8Id<$vJ zSZ8t$&Y@KQJKFfia`p6-!=zi>jP^9B%SA>kqWzY6#HIKEwa|n|H0NU)8As+8W0vfu zb;nzhl75$5Z=F^uFO8=SOU!7}y5+ihS%wroHdTGUcDTC!g0mWEP)FNSFHX4>DWmMeX{Kb&n~!coz?~fYA;*>e-tk8)fL$_k2e6PK42zJ*i6OQ3F}~ z%@wjZlCF~BaCO^=Hk{rjm%O~?G}mw1fgM9mDJDg)^0C4Se_j%y%bRB|jZd3MCo}IU zfi*WPxksGja;FUC{c^4p_GCVdER$C$+kP8Yf3kH5HCD5=j+v$zxkSsROD7 zYP~?WweFYMRAJ=}9++()0|RH$!J)$_vdV6HbjyLG2M$yo*59t!?w`DUMXyuz;)yLq z?pmXE(Yo!P2HDV@QL&V@Hk}`NEaBqX%KNns-zb}WcPU2~AD|_lE~>*XkL1*CnQC>f zmWs=UQ`E9@bG|y{BsCqlg&*gs$}hC>)!T9Bc+Kn=JmaW|Z}`~@YTTop+#~uZHQ2bC z@01)x!Sn8NfgqJ0(rNbJZ7Q}Wl+xYzszCGnW>LK&lGb0CO~ZAqXn373+o~1e<%T`k zIoEMoXm_3F&rG545^rgJwPZ5T+KKKrkMRs|?Nzc^HnlrHRZa4k%TKIl^NmK)v`v4F z8ujuzUr6<&Ouw;wSQ}fZKGBkU7dxPoOpD^@*N&>j+U^var?23;LH+q%RyZk_-tz86 zu~asBzVc{67k>Wa0v{}MlpgALRLe?z>GFO~`u)kP$a7*I`KrW>^uY7l^SJLCiYoYs z%MYH$`Sv(*#DbgDvs^B@bJ{sdt~yO=+AdA$Qf)UMsCk*LCv_%$>m6jiF}FC zN}(t748(+{qz*NB&(%nKMy-{_1&q!qZTp{53RUW>1|As2Jw0cUh%=H+>Xo7Jr!AEF zTawckzFk56FLvY=7iaP6>hsmqahKS{a0Q=xzFzSdvyoSwc|~g2Xl0vEBCjp8QVA)i z_07B9p@Gfyi2aSIQh9ylfsqTf7Uy~W)=b)7`(gU#8YB6gv!rVqV$^o=)6yTioK)Q- z{8eEzfrpk&rHZlUO6Qt0(q9cetJvu-a=&St)d`hMMdz+v6qn4Ay4Krj@VUd|)sSd& zIe+0;&OhzAy5He7*WBuw9=33t!p=t2@4#fbvbm`ebx)hWkdaD0rZ2hU3w@ERP&`{T z{;1xlvekFUzRQ`oBKOt0O4Z6g<0C^%M4j?` zct+VI9LB`AeHRiILD z`f#WGN!%sKOjPf2gt`Q2^I+y3pyk2GNEW)!yT`m$Dh@58+t!K zi>t5FPQRtRR#H7mamC4Z`P#Bsif=V-vVH0V4k{X>-k)SB zC#j>T;<%}-zjO%&wl)^;Csn6zJwv#D@*4`ilgK8qX4*>kEo#3t4|&_O*2l2i+kLIfDUz-S@b={RQbF{kUFhkkivA3?GZ3}8U_9gWyvR8e3 zI9rM9TSPfp`7UkjJ4u%~=eln1kxhK6&3T?Ry%oQkaD*>>TumouK4a6kO+2Z@1htRZ zM4qaiq2=p7`8JPO#`Bjo;Vxry%Ok6e<>bQiDPfAK+`G#{SvB4~rkXW7vTbIHn+T4?I7hHM#vF^02 z&PZB5udi-gTW!^){s{`m*~r)L^kxt5Lo}hzYTj8kTP?SHFZm}Gp;pdQx$;)+xvSmR znYq9Cv&;T|pBz(oqE9N992Q3V@{i>61}_yg>Xzb>ouu^G)`g1C^z!X=Ad5rWb)#Ur zC-nN%Ev|n)m;7*Ss5+@v0=snjZTF{td3pQ3@6-7v)n#XMZO_@#$%^@`a&oY(w*BVD zYU1JKF7jNYSkLarXMZzwDVZl-UF_*Rvt8vRYwnkhNy_%2L*7dt&SMmuzZhCF@bwB3c zE~MZJGn<+*A4QGe9?xsc&?mo`{CR4)SZeROhJ6FW0zCY47utHl$7O4X@d|q@qw0nO zgQFs%{g;L4;XepK00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_ z009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=|Dk}3gSI*=Pj$0C;XxtcT>}F9heZ1u z>;LwpE;e5e8hu&mRH1^u@#o+EbJ_BBxs!hN+!5iy-&Q-tR{gTF3jbh$96$~r2ap5E z0ptL306BmhKn@@WkORm81R1IPj70CE61fE+*$AP0~G$N}U4asWAi z96$~r2ap5E0ptL306BmhKn@@WkORm81R1IPj70CE61fE+*$AP0~G$N}U4asWAi96$~r2ap5E0ptL306BmhKn@@WkORm8 z1R1IU5@BnMokC+5}GE*V>CtC#W&433J34yfQCo0#{1@oFwyIs_m9 z0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV= z5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHaf zKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_ z009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz z00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_< z0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb z2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$## zAOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;| zfB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U< z00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa z0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV= z5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHaf zKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_ z009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz z00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_< z0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb z2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$## zAOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;| zfB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U< z00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bcL z9}2Wf&^OiZpWo6nud#l96VqJAE9^}TjEgvtse5G4?rt#=QC)ht1^17Ph>CIR9u*kb zBOp4Ydw5_>pQvEBdf}0MVwy(=cX8_(65chxY0Kudng!Hr)TCt#?evQOgVQ5}qoPBi zV}iqD^5lbp>Jv(@+JJ?!7=?JqI$Y{xb+JT>E0tIzlEu}7ARvaL{?uDN%@~Al1YAW^+Hd$D*OUMMis$=Rif8>(@xBz|=LmZJI)Z<55&x|S zdi|FX^!mRLL9c+X5qzj`YX4gk^xs!R!@oCEtNoQsEgTu}Q+dAE##{QPwm%*I$5qLy zf2E-Izb@$iBdY(*%K7g{^^Y3*>!|+Is_DNU)!*vseSK5Az?d&jI3W??0bN6)g1f~0 zSNF`nxyAMSD;ew%8Sv-ge6Ok6!$F}xAN|KwRPk4eY4^)weyyj!jOZ__>6eGq|E&@I zqn7?MqCZtq{lE7f^KVD=hx_UGw2cbxtG$&3|3fS3Uu}7RD$n;?`laRl>F__Un*P<6 z_otF*1^wfG`qM$Te`ZDf_oMnpP5pIL|7lhA-;e4Kb@gj^M3;#0zR_CNe))gEFJ3|& zwdU9H?`@a*(KH(7zhh?fc;a`YS=T{Zmk*yZj!{%lf7T|6I&}Rw#9UrGPd+7w~Ho zzw|i#P(087W38^||GU-wqqu*H=-=E9{Tr?B9|ipy)jzem--`KvsMY;e+W)%M{a)Zd z1@>=li~j11`1cwa@V%&CBm1XT*Yp2StLqu?U$(kmi}}wA<-gJDel6hFC}wFll`kEi z|IJWK#lQC{_pcW5hmw4M4mz)In*WCb+VjCbD}Gh}%3ZVIw_e@f!lV`Mm(lxYzIpyT z(fi|_^Uu-yr@d|dJJI{?J#)XlsoB?&!ssr6VZl*9ylWM4s`5V{`ZKEDvUctNXjrpF zRG(m{3V$UX^9Fqz@o71B3IMYttk0e65?>Or6j_e@=U>`Q_)znHK)iI`w~#Ld9>=KSeJpI8vJ;_VxGvc%_3)aa|}FBvngXpWgMXy#`? z)R9kicyc@Wdy-HEzB1C!&6jt(E!7#mYYpi1&xxJ!nzfmBFG6x?cyR&n=9 z3%@m9h21BPwL`45`#=WJV)unc_JL#&+V$0(;WLYLW#fGZVGYE_&_8N@V&e&io z`AQDs_1~GmSmH$-M*r^}+_qg;-FdFaH0qZkS)lo zm1~N2 + $ ) set(sources_cxx src/RModelParser_ONNX.cxx @@ -118,6 +119,9 @@ if(SOFIE_WITH_ROOT AND ROOT_FOUND) endif() install(TARGETS SOFIE_parsers - LIBRARY DESTINATION lib + EXPORT SOFIETargets + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} +) +install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/inc/" + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} ) -install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/inc/" DESTINATION "include") diff --git a/src/SOFIE_parsers/inc/LinkDef.h b/parsers/inc/LinkDef.h similarity index 100% rename from src/SOFIE_parsers/inc/LinkDef.h rename to parsers/inc/LinkDef.h diff --git a/src/SOFIE_parsers/inc/SOFIE/RModelParser_ONNX.hxx b/parsers/inc/SOFIE/RModelParser_ONNX.hxx similarity index 100% rename from src/SOFIE_parsers/inc/SOFIE/RModelParser_ONNX.hxx rename to parsers/inc/SOFIE/RModelParser_ONNX.hxx diff --git a/src/SOFIE_parsers/onnx_proto3 b/parsers/onnx_proto3 similarity index 100% rename from src/SOFIE_parsers/onnx_proto3 rename to parsers/onnx_proto3 diff --git a/src/SOFIE_parsers/src/ParseBasicBinary.cxx b/parsers/src/ParseBasicBinary.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseBasicBinary.cxx rename to parsers/src/ParseBasicBinary.cxx diff --git a/src/SOFIE_parsers/src/ParseBasicIs.cxx b/parsers/src/ParseBasicIs.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseBasicIs.cxx rename to parsers/src/ParseBasicIs.cxx diff --git a/src/SOFIE_parsers/src/ParseBasicNary.cxx b/parsers/src/ParseBasicNary.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseBasicNary.cxx rename to parsers/src/ParseBasicNary.cxx diff --git a/src/SOFIE_parsers/src/ParseBasicUnary.cxx b/parsers/src/ParseBasicUnary.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseBasicUnary.cxx rename to parsers/src/ParseBasicUnary.cxx diff --git a/src/SOFIE_parsers/src/ParseBatchNormalization.cxx b/parsers/src/ParseBatchNormalization.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseBatchNormalization.cxx rename to parsers/src/ParseBatchNormalization.cxx diff --git a/src/SOFIE_parsers/src/ParseCast.cxx b/parsers/src/ParseCast.cxx similarity index 78% rename from src/SOFIE_parsers/src/ParseCast.cxx rename to parsers/src/ParseCast.cxx index 7685421..a0993d4 100644 --- a/src/SOFIE_parsers/src/ParseCast.cxx +++ b/parsers/src/ParseCast.cxx @@ -13,20 +13,19 @@ ParserFuncSignature ParseCast = [](RModelParser_ONNX &parser, const onnx::NodePr } std::unique_ptr op; - std::string attr_type; + ETensorType attr_type; for (int_t i = 0; i < nodeproto.attribute_size(); i++) { std::string attribute_name = nodeproto.attribute(i).name(); if (attribute_name == "to") - attr_type = ConvertTypeToString(static_cast(nodeproto.attribute(i).i())); + attr_type = static_cast(nodeproto.attribute(i).i()); } std::string output_name = nodeproto.output(0); op.reset(new ROperator_Cast(attr_type, nodeproto.input(0), output_name)); if (!parser.IsRegisteredTensorType(output_name)) { - ETensorType output_type = ConvertStringToType(attr_type); - parser.RegisterTensorType(output_name, output_type); + parser.RegisterTensorType(output_name, attr_type); } return op; diff --git a/src/SOFIE_parsers/src/ParseClip.cxx b/parsers/src/ParseClip.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseClip.cxx rename to parsers/src/ParseClip.cxx diff --git a/src/SOFIE_parsers/src/ParseComparision.cxx b/parsers/src/ParseComparision.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseComparision.cxx rename to parsers/src/ParseComparision.cxx diff --git a/src/SOFIE_parsers/src/ParseConcat.cxx b/parsers/src/ParseConcat.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseConcat.cxx rename to parsers/src/ParseConcat.cxx diff --git a/src/SOFIE_parsers/src/ParseConstant.cxx b/parsers/src/ParseConstant.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseConstant.cxx rename to parsers/src/ParseConstant.cxx diff --git a/src/SOFIE_parsers/src/ParseConv.cxx b/parsers/src/ParseConv.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseConv.cxx rename to parsers/src/ParseConv.cxx diff --git a/src/SOFIE_parsers/src/ParseConvTranspose.cxx b/parsers/src/ParseConvTranspose.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseConvTranspose.cxx rename to parsers/src/ParseConvTranspose.cxx diff --git a/src/SOFIE_parsers/src/ParseEinsum.cxx b/parsers/src/ParseEinsum.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseEinsum.cxx rename to parsers/src/ParseEinsum.cxx diff --git a/src/SOFIE_parsers/src/ParseElu.cxx b/parsers/src/ParseElu.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseElu.cxx rename to parsers/src/ParseElu.cxx diff --git a/src/SOFIE_parsers/src/ParseErf.cxx b/parsers/src/ParseErf.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseErf.cxx rename to parsers/src/ParseErf.cxx diff --git a/src/SOFIE_parsers/src/ParseExpand.cxx b/parsers/src/ParseExpand.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseExpand.cxx rename to parsers/src/ParseExpand.cxx diff --git a/src/SOFIE_parsers/src/ParseEyeLike.cxx b/parsers/src/ParseEyeLike.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseEyeLike.cxx rename to parsers/src/ParseEyeLike.cxx diff --git a/src/SOFIE_parsers/src/ParseFuseBatchnormRelu.cxx b/parsers/src/ParseFuseBatchnormRelu.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseFuseBatchnormRelu.cxx rename to parsers/src/ParseFuseBatchnormRelu.cxx diff --git a/src/SOFIE_parsers/src/ParseFuseConvAdd.cxx b/parsers/src/ParseFuseConvAdd.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseFuseConvAdd.cxx rename to parsers/src/ParseFuseConvAdd.cxx diff --git a/src/SOFIE_parsers/src/ParseFuseConvTransposeAdd.cxx b/parsers/src/ParseFuseConvTransposeAdd.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseFuseConvTransposeAdd.cxx rename to parsers/src/ParseFuseConvTransposeAdd.cxx diff --git a/src/SOFIE_parsers/src/ParseFuseGemmRelu.cxx b/parsers/src/ParseFuseGemmRelu.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseFuseGemmRelu.cxx rename to parsers/src/ParseFuseGemmRelu.cxx diff --git a/src/SOFIE_parsers/src/ParseFuseMatMulAdd.cxx b/parsers/src/ParseFuseMatMulAdd.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseFuseMatMulAdd.cxx rename to parsers/src/ParseFuseMatMulAdd.cxx diff --git a/src/SOFIE_parsers/src/ParseGRU.cxx b/parsers/src/ParseGRU.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseGRU.cxx rename to parsers/src/ParseGRU.cxx diff --git a/src/SOFIE_parsers/src/ParseGather.cxx b/parsers/src/ParseGather.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseGather.cxx rename to parsers/src/ParseGather.cxx diff --git a/src/SOFIE_parsers/src/ParseGatherND.cxx b/parsers/src/ParseGatherND.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseGatherND.cxx rename to parsers/src/ParseGatherND.cxx diff --git a/src/SOFIE_parsers/src/ParseGemm.cxx b/parsers/src/ParseGemm.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseGemm.cxx rename to parsers/src/ParseGemm.cxx diff --git a/src/SOFIE_parsers/src/ParseIdentity.cxx b/parsers/src/ParseIdentity.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseIdentity.cxx rename to parsers/src/ParseIdentity.cxx diff --git a/src/SOFIE_parsers/src/ParseIf.cxx b/parsers/src/ParseIf.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseIf.cxx rename to parsers/src/ParseIf.cxx diff --git a/src/SOFIE_parsers/src/ParseLSTM.cxx b/parsers/src/ParseLSTM.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseLSTM.cxx rename to parsers/src/ParseLSTM.cxx diff --git a/src/SOFIE_parsers/src/ParseLayerNormalization.cxx b/parsers/src/ParseLayerNormalization.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseLayerNormalization.cxx rename to parsers/src/ParseLayerNormalization.cxx diff --git a/src/SOFIE_parsers/src/ParseLeakyRelu.cxx b/parsers/src/ParseLeakyRelu.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseLeakyRelu.cxx rename to parsers/src/ParseLeakyRelu.cxx diff --git a/src/SOFIE_parsers/src/ParseMatMul.cxx b/parsers/src/ParseMatMul.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseMatMul.cxx rename to parsers/src/ParseMatMul.cxx diff --git a/src/SOFIE_parsers/src/ParseNot.cxx b/parsers/src/ParseNot.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseNot.cxx rename to parsers/src/ParseNot.cxx diff --git a/src/SOFIE_parsers/src/ParsePad.cxx b/parsers/src/ParsePad.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParsePad.cxx rename to parsers/src/ParsePad.cxx diff --git a/src/SOFIE_parsers/src/ParsePool.cxx b/parsers/src/ParsePool.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParsePool.cxx rename to parsers/src/ParsePool.cxx diff --git a/src/SOFIE_parsers/src/ParseRNN.cxx b/parsers/src/ParseRNN.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseRNN.cxx rename to parsers/src/ParseRNN.cxx diff --git a/src/SOFIE_parsers/src/ParseRandom.cxx b/parsers/src/ParseRandom.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseRandom.cxx rename to parsers/src/ParseRandom.cxx diff --git a/src/SOFIE_parsers/src/ParseRange.cxx b/parsers/src/ParseRange.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseRange.cxx rename to parsers/src/ParseRange.cxx diff --git a/src/SOFIE_parsers/src/ParseReduce.cxx b/parsers/src/ParseReduce.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseReduce.cxx rename to parsers/src/ParseReduce.cxx diff --git a/src/SOFIE_parsers/src/ParseRelu.cxx b/parsers/src/ParseRelu.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseRelu.cxx rename to parsers/src/ParseRelu.cxx diff --git a/src/SOFIE_parsers/src/ParseReshape.cxx b/parsers/src/ParseReshape.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseReshape.cxx rename to parsers/src/ParseReshape.cxx diff --git a/src/SOFIE_parsers/src/ParseScatterElements.cxx b/parsers/src/ParseScatterElements.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseScatterElements.cxx rename to parsers/src/ParseScatterElements.cxx diff --git a/src/SOFIE_parsers/src/ParseSelu.cxx b/parsers/src/ParseSelu.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseSelu.cxx rename to parsers/src/ParseSelu.cxx diff --git a/src/SOFIE_parsers/src/ParseShape.cxx b/parsers/src/ParseShape.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseShape.cxx rename to parsers/src/ParseShape.cxx diff --git a/src/SOFIE_parsers/src/ParseSigmoid.cxx b/parsers/src/ParseSigmoid.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseSigmoid.cxx rename to parsers/src/ParseSigmoid.cxx diff --git a/src/SOFIE_parsers/src/ParseSlice.cxx b/parsers/src/ParseSlice.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseSlice.cxx rename to parsers/src/ParseSlice.cxx diff --git a/src/SOFIE_parsers/src/ParseSoftmax.cxx b/parsers/src/ParseSoftmax.cxx similarity index 91% rename from src/SOFIE_parsers/src/ParseSoftmax.cxx rename to parsers/src/ParseSoftmax.cxx index aea042e..19bd57a 100644 --- a/src/SOFIE_parsers/src/ParseSoftmax.cxx +++ b/parsers/src/ParseSoftmax.cxx @@ -24,7 +24,7 @@ ParserFuncSignature ParseSoftmax = [](RModelParser_ONNX &parser, const onnx::Nod attr_axis = nodeproto.attribute(0).i(); switch (input_type) { - case ETensorType::FLOAT: op.reset(new ROperator_Softmax(attr_axis, input_name, output_name)); break; + case ETensorType::FLOAT: op.reset(new ROperator_Softmax(attr_axis, input_name, output_name)); break; default: throw std::runtime_error("TMVA::SOFIE - Unsupported - Operator Softmax does not yet support input type " + std::to_string(static_cast(input_type))); diff --git a/src/SOFIE_parsers/src/ParseSplit.cxx b/parsers/src/ParseSplit.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseSplit.cxx rename to parsers/src/ParseSplit.cxx diff --git a/src/SOFIE_parsers/src/ParseTanh.cxx b/parsers/src/ParseTanh.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseTanh.cxx rename to parsers/src/ParseTanh.cxx diff --git a/src/SOFIE_parsers/src/ParseTile.cxx b/parsers/src/ParseTile.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseTile.cxx rename to parsers/src/ParseTile.cxx diff --git a/src/SOFIE_parsers/src/ParseTopK.cxx b/parsers/src/ParseTopK.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseTopK.cxx rename to parsers/src/ParseTopK.cxx diff --git a/src/SOFIE_parsers/src/ParseTranspose.cxx b/parsers/src/ParseTranspose.cxx similarity index 100% rename from src/SOFIE_parsers/src/ParseTranspose.cxx rename to parsers/src/ParseTranspose.cxx diff --git a/src/SOFIE_parsers/src/ParseWhere.cxx b/parsers/src/ParseWhere.cxx similarity index 90% rename from src/SOFIE_parsers/src/ParseWhere.cxx rename to parsers/src/ParseWhere.cxx index a7a3685..636c7e2 100644 --- a/src/SOFIE_parsers/src/ParseWhere.cxx +++ b/parsers/src/ParseWhere.cxx @@ -35,10 +35,10 @@ ParserFuncSignature ParseWhere = [](RModelParser_ONNX &parser, const onnx::NodeP switch (input_type) { case ETensorType::FLOAT: - op.reset(new ROperator_Where(nodeproto.input(1), nodeproto.input(2), nodeproto.input(0), output_name)); + op.reset(new ROperator_Where(nodeproto.input(0), nodeproto.input(1), nodeproto.input(2), output_name)); break; case ETensorType::INT64: - op.reset(new ROperator_Where(nodeproto.input(1), nodeproto.input(2), nodeproto.input(0), output_name)); + op.reset(new ROperator_Where(nodeproto.input(0), nodeproto.input(1), nodeproto.input(2), output_name)); break; default: throw std::runtime_error("TMVA::SOFIE - Unsupported - Where Operator does not yet support input type " + diff --git a/src/SOFIE_parsers/src/RModelParser_ONNX.cxx b/parsers/src/RModelParser_ONNX.cxx similarity index 100% rename from src/SOFIE_parsers/src/RModelParser_ONNX.cxx rename to parsers/src/RModelParser_ONNX.cxx diff --git a/src/.vscode/settings.json b/src/.vscode/settings.json deleted file mode 100644 index 8bc121a..0000000 --- a/src/.vscode/settings.json +++ /dev/null @@ -1,61 +0,0 @@ -{ - "files.associations": { - "*.icc": "cpp", - "iostream": "cpp", - "ostream": "cpp", - "cctype": "cpp", - "clocale": "cpp", - "cmath": "cpp", - "cstdarg": "cpp", - "cstddef": "cpp", - "cstdio": "cpp", - "cstdlib": "cpp", - "cstring": "cpp", - "ctime": "cpp", - "cwchar": "cpp", - "cwctype": "cpp", - "array": "cpp", - "atomic": "cpp", - "bit": "cpp", - "bitset": "cpp", - "compare": "cpp", - "complex": "cpp", - "concepts": "cpp", - "cstdint": "cpp", - "deque": "cpp", - "map": "cpp", - "set": "cpp", - "string": "cpp", - "unordered_map": "cpp", - "unordered_set": "cpp", - "vector": "cpp", - "exception": "cpp", - "algorithm": "cpp", - "functional": "cpp", - "iterator": "cpp", - "memory": "cpp", - "memory_resource": "cpp", - "numeric": "cpp", - "optional": "cpp", - "random": "cpp", - "regex": "cpp", - "string_view": "cpp", - "system_error": "cpp", - "tuple": "cpp", - "type_traits": "cpp", - "utility": "cpp", - "fstream": "cpp", - "initializer_list": "cpp", - "iomanip": "cpp", - "iosfwd": "cpp", - "istream": "cpp", - "limits": "cpp", - "new": "cpp", - "numbers": "cpp", - "sstream": "cpp", - "stdexcept": "cpp", - "streambuf": "cpp", - "cinttypes": "cpp", - "typeinfo": "cpp" - } -} \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt deleted file mode 100644 index 102ca3b..0000000 --- a/src/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -# Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. -# All rights reserved. -# -# For the licensing terms see $ROOTSYS/LICENSE. -# For the list of contributors see $ROOTSYS/README/CREDITS. - -set(sofie_legacy_eval_backend ON CACHE BOOL "" FORCE) - -add_subdirectory(SOFIE_core) -add_subdirectory(SOFIE_parsers) -add_subdirectory(utils) diff --git a/src/utils/CMakeLists.txt b/utils/CMakeLists.txt similarity index 56% rename from src/utils/CMakeLists.txt rename to utils/CMakeLists.txt index 2ede060..36cfc55 100644 --- a/src/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -2,10 +2,14 @@ add_library(utils INTERFACE) target_include_directories(utils INTERFACE $ - $ + $ +) + +install(TARGETS utils + EXPORT SOFIETargets ) install( DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/SOFIE - DESTINATION include + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} ) diff --git a/src/utils/SOFIE/RTensor.hxx b/utils/SOFIE/RTensor.hxx similarity index 100% rename from src/utils/SOFIE/RTensor.hxx rename to utils/SOFIE/RTensor.hxx