Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 44 additions & 42 deletions paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@
#include "paddle/fluid/platform/cpu_helper.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/platform/init.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/init.h"

#ifdef PADDLE_WITH_MKLML
#include "paddle/fluid/platform/dynload/mklml.h"
Expand Down Expand Up @@ -166,45 +166,45 @@ bool AnalysisPredictor::Init(
return true;
}

void ParseCommandLineFlags(){
std::vector<char *> internal_argv;
std::string dummy = "dummy";
internal_argv.push_back(strdup(dummy.c_str()));
std::vector<std::string> envs;
std::vector<std::string> undefok;
void ParseCommandLineFlags() {
std::vector<char *> internal_argv;
std::string dummy = "dummy";
internal_argv.push_back(strdup(dummy.c_str()));
std::vector<std::string> envs;
std::vector<std::string> undefok;
#ifdef PADDLE_WITH_CUDA
envs.push_back("fraction_of_gpu_memory_to_use");
envs.push_back("initial_gpu_memory_in_mb");
envs.push_back("reallocate_gpu_memory_in_mb");
envs.push_back("fraction_of_gpu_memory_to_use");
envs.push_back("initial_gpu_memory_in_mb");
envs.push_back("reallocate_gpu_memory_in_mb");
#endif
envs.push_back("allocator_strategy");
envs.push_back("initial_cpu_memory_in_mb");
undefok.push_back("initial_cpu_memory_in_mb");
char* env_str = nullptr;
if (envs.size() > 0) {
std::string env_string = "--tryfromenv=";
for (auto t : envs) {
env_string += t + ",";
}
env_string = env_string.substr(0, env_string.length() - 1);
env_str = strdup(env_string.c_str());
internal_argv.push_back(env_str);
LOG(INFO) << "get env_string" << env_string;
envs.push_back("allocator_strategy");
envs.push_back("initial_cpu_memory_in_mb");
undefok.push_back("initial_cpu_memory_in_mb");
char *env_str = nullptr;
if (envs.size() > 0) {
std::string env_string = "--tryfromenv=";
for (auto t : envs) {
env_string += t + ",";
}
env_string = env_string.substr(0, env_string.length() - 1);
env_str = strdup(env_string.c_str());
internal_argv.push_back(env_str);
LOG(INFO) << "get env_string" << env_string;
}

char* undefok_str = nullptr;
if (undefok.size() > 0) {
std::string undefok_string = "--undefok=";
for (auto t : undefok) {
undefok_string += t + ",";
}
undefok_string = undefok_string.substr(0, undefok_string.length() - 1);
undefok_str = strdup(undefok_string.c_str());
internal_argv.push_back(undefok_str);
char *undefok_str = nullptr;
if (undefok.size() > 0) {
std::string undefok_string = "--undefok=";
for (auto t : undefok) {
undefok_string += t + ",";
}
int internal_argc = internal_argv.size();
char** arr = internal_argv.data();
paddle::platform::ParseCommandLineFlags(internal_argc, arr, true);
undefok_string = undefok_string.substr(0, undefok_string.length() - 1);
undefok_str = strdup(undefok_string.c_str());
internal_argv.push_back(undefok_str);
}
int internal_argc = internal_argv.size();
char **arr = internal_argv.data();
paddle::platform::ParseCommandLineFlags(internal_argc, arr, true);
}

bool AnalysisPredictor::PrepareScope(
Expand Down Expand Up @@ -1136,6 +1136,7 @@ USE_TRT_CONVERTER(hard_swish);
USE_TRT_CONVERTER(split);
USE_TRT_CONVERTER(transpose);
USE_TRT_CONVERTER(prelu);
USE_TRT_CONVERTER(box_coder);
USE_TRT_CONVERTER(conv2d_transpose);
USE_TRT_CONVERTER(leaky_relu);
USE_TRT_CONVERTER(shuffle_channel);
Expand All @@ -1148,17 +1149,18 @@ USE_TRT_CONVERTER(fused_embedding_eltwise_layernorm);
USE_TRT_CONVERTER(skip_layernorm);
USE_TRT_CONVERTER(slice);
USE_TRT_CONVERTER(scale);
USE_TRT_CONVERTER(cast);
USE_TRT_CONVERTER(stack);
USE_TRT_CONVERTER(reshape);
USE_TRT_CONVERTER(flatten);
//USE_TRT_CONVERTER(clip);
//USE_TRT_CONVERTER(gather);
// USE_TRT_CONVERTER(clip);
// USE_TRT_CONVERTER(gather);
// USE_TRT_CONVERTER(anchor_generator);
//USE_TRT_CONVERTER(yolo_box);
//USE_TRT_CONVERTER(roi_align);
//USE_TRT_CONVERTER(affine_channel);
//USE_TRT_CONVERTER(multiclass_nms);
//USE_TRT_CONVERTER(nearest_interp);
// USE_TRT_CONVERTER(yolo_box);
// USE_TRT_CONVERTER(roi_align);
// USE_TRT_CONVERTER(affine_channel);
// USE_TRT_CONVERTER(multiclass_nms);
// USE_TRT_CONVERTER(nearest_interp);
#endif

namespace paddle_infer {
Expand Down
3 changes: 2 additions & 1 deletion paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
# Add TRT tests
nv_library(tensorrt_converter
SRCS matmul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc elementwise_op.cc
batch_norm_op.cc activation_op.cc softmax_op.cc concat_op.cc dropout_op.cc
batch_norm_op.cc activation_op.cc softmax_op.cc concat_op.cc dropout_op.cc box_coder_op.cc
pad_op.cc split_op.cc prelu_op.cc leaky_relu_op.cc gelu_op.cc layer_norm_op.cc multihead_matmul_op.cc
shuffle_channel_op.cc swish_op.cc instance_norm_op.cc stack_op.cc transpose_op.cc reshape_op.cc flatten_op.cc
emb_eltwise_layernorm.cc skip_layernorm.cc scale_op.cc slice_op.cc hard_sigmoid_op.cc hard_swish_op.cc clip_op.cc
cast_op.cc
DEPS tensorrt_engine tensorrt_plugin operator scope framework_proto op_registry)

nv_test(test_op_converter SRCS test_op_converter.cc DEPS
Expand Down
82 changes: 82 additions & 0 deletions paddle/fluid/inference/tensorrt/convert/box_coder_op.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/plugin/box_coder_op_plugin.h"

namespace nvinfer1 {
class ILayer;
} // namespace nvinfer1
namespace paddle {
namespace framework {
class Scope;
namespace proto {
class OpDesc;
} // namespace proto
} // namespace framework
} // namespace paddle

namespace paddle {
namespace inference {
namespace tensorrt {

class BoxCoderOpConverter : public OpConverter {
public:
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override {
framework::OpDesc op_desc(op, nullptr);

auto* input = engine_->GetITensor(op_desc.Input("TargetBox")[0]);
int input_num = op_desc.Input("TargetBox").size();
// Get output
auto* prior_box_var = scope.FindVar(op_desc.Input("PriorBox")[0]);
auto* prior_box_var_var = scope.FindVar(op_desc.Input("PriorBoxVar")[0]);
PADDLE_ENFORCE_NOT_NULL(
prior_box_var,
platform::errors::NotFound(
"Variable Alpha of prelu TRT converter is not found."));
auto* prior_box_tensor = prior_box_var->GetMutable<framework::LoDTensor>();
auto* prior_box_var_tensor =
prior_box_var_var->GetMutable<framework::LoDTensor>();
platform::CPUPlace cpu_place;
std::unique_ptr<framework::LoDTensor> prior_box_tensor_temp(
new framework::LoDTensor());
std::unique_ptr<framework::LoDTensor> prior_box_var_tensor_temp(
new framework::LoDTensor());
prior_box_tensor_temp->Resize(prior_box_tensor->dims());
prior_box_var_tensor_temp->Resize(prior_box_var_tensor_temp->dims());
TensorCopySync(*prior_box_tensor, cpu_place, prior_box_tensor_temp.get());
TensorCopySync(*prior_box_var_tensor, cpu_place,
prior_box_var_tensor_temp.get());
float* prior_box_data =
prior_box_tensor_temp->mutable_data<float>(cpu_place);
float* prior_box_var_data =
prior_box_var_tensor_temp->mutable_data<float>(cpu_place);

nvinfer1::ILayer* layer = nullptr;

plugin::BoxCoderPlugin* plugin = new plugin::BoxCoderPlugin(
prior_box_data, prior_box_var_data, prior_box_tensor_temp->numel());
layer = engine_->AddPlugin(&input, input_num, plugin);

auto output_name = op_desc.Output("OutputBox")[0];
RreplenishLayerAndOutput(layer, "box_coder", {output_name}, test_mode);
}
};

} // namespace tensorrt
} // namespace inference
} // namespace paddle

REGISTER_TRT_OP_CONVERTER(box_coder, BoxCoderOpConverter);
66 changes: 66 additions & 0 deletions paddle/fluid/inference/tensorrt/convert/cast_op.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"

namespace nvinfer1 {
class ILayer;
} // namespace nvinfer1
namespace paddle {
namespace framework {
class Scope;
namespace proto {
class OpDesc;
} // namespace proto
} // namespace framework
} // namespace paddle

namespace paddle {
namespace inference {
namespace tensorrt {

/*
* cast converter from fluid to tensorRT.
*/
class CastOpConverter : public OpConverter {
public:
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override {
framework::OpDesc op_desc(op, nullptr);
// Declare inputs
auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
// int in_dtype = BOOST_GET_CONST(int, desc.GetAttr("in_dtype"));
// int out_dtype = BOOST_GET_CONST(int, desc.GetAttr("out_dtype"));
nvinfer1::ILayer* layer = nullptr;
// if(in_dtype == 20 && out_dtype == 5) {
// input->setType(nvinfer1::DataType::kINT8);
// PADDLE_ENFORCE_EQ(input->getType() == nvinfer1::DataType::kINT8, true,
// platform::errors::InvalidArgument("xxx"));
layer = TRT_ENGINE_ADD_LAYER(engine_, Identity, *input);
// layer->getOutput(0)->setType(nvinfer1::DataType::kFLOAT);
// } else {
// PADDLE_THROW(platform::errors::InvalidArgument("not supported dtype"));
// }
// layer->setPrecision(nvinfer1::DataType::kFLOAT);

auto output_name = op_desc.Output("Out")[0];
RreplenishLayerAndOutput(layer, "cast", {output_name}, test_mode);
}
};

} // namespace tensorrt
} // namespace inference
} // namespace paddle

REGISTER_TRT_OP_CONVERTER(cast, CastOpConverter);
10 changes: 10 additions & 0 deletions paddle/fluid/inference/tensorrt/op_teller.cc
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ struct SimpleOpTypeSetTeller : public Teller {
"split",
"instance_norm",
"gelu",
"box_coder",
"layer_norm",
"scale",
"stack",
Expand All @@ -108,6 +109,7 @@ struct SimpleOpTypeSetTeller : public Teller {
"reshape",
"flatten2",
"flatten",
"cast",
};
};

Expand Down Expand Up @@ -137,6 +139,14 @@ bool OpTeller::Tell(const std::string& op_type, const framework::OpDesc& desc,
if (op_type == "transpose2") {
if (!desc.HasAttr("axis")) return false;
}
if (op_type == "cast") {
if (!desc.HasAttr("in_dtype") || !desc.HasAttr("out_dtype")) return false;
int in_dtype = BOOST_GET_CONST(int, desc.GetAttr("in_dtype"));
int out_dtype = BOOST_GET_CONST(int, desc.GetAttr("out_dtype"));
if (!(in_dtype == 20 && out_dtype == 5) ||
!(in_dtype == 3 && out_dtype == 5))
return false;
}
if (op_type == "matmul") {
auto* block = desc.Block();
for (auto& param_name : desc.Inputs()) {
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ nv_library(tensorrt_plugin
instance_norm_op_plugin.cu emb_eltwise_layernorm_plugin.cu
qkv_to_context_plugin.cu skip_layernorm_op_plugin.cu slice_op_plugin.cu
hard_swish_op_plugin.cu stack_op_plugin.cu special_slice_plugin.cu
box_coder_op_plugin.cu
#anchor_generator_op_plugin.cu
#yolo_box_op_plugin.cu
#roi_align_op_plugin.cu
Expand Down
Loading