cryoco · cryoco · May 26, 2021 · May 27, 2021
diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -38,9 +38,9 @@
 #include "paddle/fluid/platform/cpu_helper.h"
 #include "paddle/fluid/platform/device_context.h"
 #include "paddle/fluid/platform/gpu_info.h"
+#include "paddle/fluid/platform/init.h"
 #include "paddle/fluid/platform/place.h"
 #include "paddle/fluid/platform/profiler.h"
-#include "paddle/fluid/platform/init.h"
 
 #ifdef PADDLE_WITH_MKLML
 #include "paddle/fluid/platform/dynload/mklml.h"
@@ -166,45 +166,45 @@ bool AnalysisPredictor::Init(
   return true;
 }
 
-void ParseCommandLineFlags(){
-    std::vector<char *> internal_argv;
-    std::string dummy = "dummy";
-    internal_argv.push_back(strdup(dummy.c_str()));
-    std::vector<std::string> envs;
-    std::vector<std::string> undefok;
+void ParseCommandLineFlags() {
+  std::vector<char *> internal_argv;
+  std::string dummy = "dummy";
+  internal_argv.push_back(strdup(dummy.c_str()));
+  std::vector<std::string> envs;
+  std::vector<std::string> undefok;
 #ifdef PADDLE_WITH_CUDA
-    envs.push_back("fraction_of_gpu_memory_to_use");
-    envs.push_back("initial_gpu_memory_in_mb");
-    envs.push_back("reallocate_gpu_memory_in_mb");
+  envs.push_back("fraction_of_gpu_memory_to_use");
+  envs.push_back("initial_gpu_memory_in_mb");
+  envs.push_back("reallocate_gpu_memory_in_mb");
 #endif
-    envs.push_back("allocator_strategy");
-    envs.push_back("initial_cpu_memory_in_mb");
-    undefok.push_back("initial_cpu_memory_in_mb");
-    char* env_str = nullptr;
-    if (envs.size() > 0) {
-        std::string env_string = "--tryfromenv=";
-        for (auto t : envs) {
-            env_string += t + ",";
-        }
-        env_string = env_string.substr(0, env_string.length() - 1);
-        env_str = strdup(env_string.c_str());
-        internal_argv.push_back(env_str);
-        LOG(INFO) << "get env_string" << env_string;
+  envs.push_back("allocator_strategy");
+  envs.push_back("initial_cpu_memory_in_mb");
+  undefok.push_back("initial_cpu_memory_in_mb");
+  char *env_str = nullptr;
+  if (envs.size() > 0) {
+    std::string env_string = "--tryfromenv=";
+    for (auto t : envs) {
+      env_string += t + ",";
     }
+    env_string = env_string.substr(0, env_string.length() - 1);
+    env_str = strdup(env_string.c_str());
+    internal_argv.push_back(env_str);
+    LOG(INFO) << "get env_string" << env_string;
+  }
 
-    char* undefok_str = nullptr;
-    if (undefok.size() > 0) {
-        std::string undefok_string = "--undefok=";
-        for (auto t : undefok) {
-            undefok_string += t + ",";
-        }
-        undefok_string = undefok_string.substr(0, undefok_string.length() - 1);
-        undefok_str = strdup(undefok_string.c_str());
-        internal_argv.push_back(undefok_str);
+  char *undefok_str = nullptr;
+  if (undefok.size() > 0) {
+    std::string undefok_string = "--undefok=";
+    for (auto t : undefok) {
+      undefok_string += t + ",";
     }
-    int internal_argc = internal_argv.size();
-    char** arr = internal_argv.data();
-    paddle::platform::ParseCommandLineFlags(internal_argc, arr, true);
+    undefok_string = undefok_string.substr(0, undefok_string.length() - 1);
+    undefok_str = strdup(undefok_string.c_str());
+    internal_argv.push_back(undefok_str);
+  }
+  int internal_argc = internal_argv.size();
+  char **arr = internal_argv.data();
+  paddle::platform::ParseCommandLineFlags(internal_argc, arr, true);
 }
 
 bool AnalysisPredictor::PrepareScope(
@@ -1136,6 +1136,7 @@ USE_TRT_CONVERTER(hard_swish);
 USE_TRT_CONVERTER(split);
 USE_TRT_CONVERTER(transpose);
 USE_TRT_CONVERTER(prelu);
+USE_TRT_CONVERTER(box_coder);
 USE_TRT_CONVERTER(conv2d_transpose);
 USE_TRT_CONVERTER(leaky_relu);
 USE_TRT_CONVERTER(shuffle_channel);
@@ -1148,17 +1149,18 @@ USE_TRT_CONVERTER(fused_embedding_eltwise_layernorm);
 USE_TRT_CONVERTER(skip_layernorm);
 USE_TRT_CONVERTER(slice);
 USE_TRT_CONVERTER(scale);
+USE_TRT_CONVERTER(cast);
 USE_TRT_CONVERTER(stack);
 USE_TRT_CONVERTER(reshape);
 USE_TRT_CONVERTER(flatten);
-//USE_TRT_CONVERTER(clip);
-//USE_TRT_CONVERTER(gather);
+// USE_TRT_CONVERTER(clip);
+// USE_TRT_CONVERTER(gather);
 // USE_TRT_CONVERTER(anchor_generator);
-//USE_TRT_CONVERTER(yolo_box);
-//USE_TRT_CONVERTER(roi_align);
-//USE_TRT_CONVERTER(affine_channel);
-//USE_TRT_CONVERTER(multiclass_nms);
-//USE_TRT_CONVERTER(nearest_interp);
+// USE_TRT_CONVERTER(yolo_box);
+// USE_TRT_CONVERTER(roi_align);
+// USE_TRT_CONVERTER(affine_channel);
+// USE_TRT_CONVERTER(multiclass_nms);
+// USE_TRT_CONVERTER(nearest_interp);
 #endif
 
 namespace paddle_infer {

diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
@@ -1,10 +1,11 @@
 # Add TRT tests
 nv_library(tensorrt_converter
            SRCS matmul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc elementwise_op.cc
-                batch_norm_op.cc activation_op.cc softmax_op.cc concat_op.cc dropout_op.cc
+                batch_norm_op.cc activation_op.cc softmax_op.cc concat_op.cc dropout_op.cc box_coder_op.cc
                 pad_op.cc split_op.cc prelu_op.cc leaky_relu_op.cc gelu_op.cc layer_norm_op.cc multihead_matmul_op.cc
                 shuffle_channel_op.cc swish_op.cc instance_norm_op.cc stack_op.cc transpose_op.cc reshape_op.cc flatten_op.cc
                 emb_eltwise_layernorm.cc skip_layernorm.cc scale_op.cc slice_op.cc hard_sigmoid_op.cc hard_swish_op.cc clip_op.cc
+                cast_op.cc
            DEPS tensorrt_engine tensorrt_plugin operator scope framework_proto op_registry)
 
 nv_test(test_op_converter SRCS test_op_converter.cc DEPS

diff --git a/paddle/fluid/inference/tensorrt/convert/box_coder_op.cc b/paddle/fluid/inference/tensorrt/convert/box_coder_op.cc
@@ -0,0 +1,82 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
+#include "paddle/fluid/inference/tensorrt/plugin/box_coder_op_plugin.h"
+
+namespace nvinfer1 {
+class ILayer;
+}  // namespace nvinfer1
+namespace paddle {
+namespace framework {
+class Scope;
+namespace proto {
+class OpDesc;
+}  // namespace proto
+}  // namespace framework
+}  // namespace paddle
+
+namespace paddle {
+namespace inference {
+namespace tensorrt {
+
+class BoxCoderOpConverter : public OpConverter {
+ public:
+  void operator()(const framework::proto::OpDesc& op,
+                  const framework::Scope& scope, bool test_mode) override {
+    framework::OpDesc op_desc(op, nullptr);
+
+    auto* input = engine_->GetITensor(op_desc.Input("TargetBox")[0]);
+    int input_num = op_desc.Input("TargetBox").size();
+    // Get output
+    auto* prior_box_var = scope.FindVar(op_desc.Input("PriorBox")[0]);
+    auto* prior_box_var_var = scope.FindVar(op_desc.Input("PriorBoxVar")[0]);
+    PADDLE_ENFORCE_NOT_NULL(
+        prior_box_var,
+        platform::errors::NotFound(
+            "Variable Alpha of prelu TRT converter is not found."));
+    auto* prior_box_tensor = prior_box_var->GetMutable<framework::LoDTensor>();
+    auto* prior_box_var_tensor =
+        prior_box_var_var->GetMutable<framework::LoDTensor>();
+    platform::CPUPlace cpu_place;
+    std::unique_ptr<framework::LoDTensor> prior_box_tensor_temp(
+        new framework::LoDTensor());
+    std::unique_ptr<framework::LoDTensor> prior_box_var_tensor_temp(
+        new framework::LoDTensor());
+    prior_box_tensor_temp->Resize(prior_box_tensor->dims());
+    prior_box_var_tensor_temp->Resize(prior_box_var_tensor_temp->dims());
+    TensorCopySync(*prior_box_tensor, cpu_place, prior_box_tensor_temp.get());
+    TensorCopySync(*prior_box_var_tensor, cpu_place,
+                   prior_box_var_tensor_temp.get());
+    float* prior_box_data =
+        prior_box_tensor_temp->mutable_data<float>(cpu_place);
+    float* prior_box_var_data =
+        prior_box_var_tensor_temp->mutable_data<float>(cpu_place);
+
+    nvinfer1::ILayer* layer = nullptr;
+
+    plugin::BoxCoderPlugin* plugin = new plugin::BoxCoderPlugin(
+        prior_box_data, prior_box_var_data, prior_box_tensor_temp->numel());
+    layer = engine_->AddPlugin(&input, input_num, plugin);
+
+    auto output_name = op_desc.Output("OutputBox")[0];
+    RreplenishLayerAndOutput(layer, "box_coder", {output_name}, test_mode);
+  }
+};
+
+}  // namespace tensorrt
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_TRT_OP_CONVERTER(box_coder, BoxCoderOpConverter);
diff --git a/paddle/fluid/inference/tensorrt/convert/cast_op.cc b/paddle/fluid/inference/tensorrt/convert/cast_op.cc
@@ -0,0 +1,66 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
+
+namespace nvinfer1 {
+class ILayer;
+}  // namespace nvinfer1
+namespace paddle {
+namespace framework {
+class Scope;
+namespace proto {
+class OpDesc;
+}  // namespace proto
+}  // namespace framework
+}  // namespace paddle
+
+namespace paddle {
+namespace inference {
+namespace tensorrt {
+
+/*
+ * cast converter from fluid to tensorRT.
+ */
+class CastOpConverter : public OpConverter {
+ public:
+  void operator()(const framework::proto::OpDesc& op,
+                  const framework::Scope& scope, bool test_mode) override {
+    framework::OpDesc op_desc(op, nullptr);
+    // Declare inputs
+    auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
+    // int in_dtype = BOOST_GET_CONST(int, desc.GetAttr("in_dtype"));
+    // int out_dtype = BOOST_GET_CONST(int, desc.GetAttr("out_dtype"));
+    nvinfer1::ILayer* layer = nullptr;
+    // if(in_dtype == 20 && out_dtype == 5) {
+    //   input->setType(nvinfer1::DataType::kINT8);
+    // PADDLE_ENFORCE_EQ(input->getType() == nvinfer1::DataType::kINT8, true,
+    // platform::errors::InvalidArgument("xxx"));
+    layer = TRT_ENGINE_ADD_LAYER(engine_, Identity, *input);
+    //   layer->getOutput(0)->setType(nvinfer1::DataType::kFLOAT);
+    // } else {
+    //   PADDLE_THROW(platform::errors::InvalidArgument("not supported dtype"));
+    // }
+    // layer->setPrecision(nvinfer1::DataType::kFLOAT);
+
+    auto output_name = op_desc.Output("Out")[0];
+    RreplenishLayerAndOutput(layer, "cast", {output_name}, test_mode);
+  }
+};
+
+}  // namespace tensorrt
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_TRT_OP_CONVERTER(cast, CastOpConverter);
diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -100,6 +100,7 @@ struct SimpleOpTypeSetTeller : public Teller {
       "split",
       "instance_norm",
       "gelu",
+      "box_coder",
       "layer_norm",
       "scale",
       "stack",
@@ -108,6 +109,7 @@ struct SimpleOpTypeSetTeller : public Teller {
       "reshape",
       "flatten2",
       "flatten",
+      "cast",
   };
 };
 
@@ -137,6 +139,14 @@ bool OpTeller::Tell(const std::string& op_type, const framework::OpDesc& desc,
     if (op_type == "transpose2") {
       if (!desc.HasAttr("axis")) return false;
     }
+    if (op_type == "cast") {
+      if (!desc.HasAttr("in_dtype") || !desc.HasAttr("out_dtype")) return false;
+      int in_dtype = BOOST_GET_CONST(int, desc.GetAttr("in_dtype"));
+      int out_dtype = BOOST_GET_CONST(int, desc.GetAttr("out_dtype"));
+      if (!(in_dtype == 20 && out_dtype == 5) ||
+          !(in_dtype == 3 && out_dtype == 5))
+        return false;
+    }
     if (op_type == "matmul") {
       auto* block = desc.Block();
       for (auto& param_name : desc.Inputs()) {

diff --git a/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt b/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt
@@ -6,6 +6,7 @@ nv_library(tensorrt_plugin
            instance_norm_op_plugin.cu emb_eltwise_layernorm_plugin.cu
            qkv_to_context_plugin.cu skip_layernorm_op_plugin.cu slice_op_plugin.cu
            hard_swish_op_plugin.cu stack_op_plugin.cu special_slice_plugin.cu
+           box_coder_op_plugin.cu
            #anchor_generator_op_plugin.cu
            #yolo_box_op_plugin.cu
            #roi_align_op_plugin.cu