diff --git a/perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml b/perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml index 2f46c68d91bde..178c3f8bba0a3 100644 --- a/perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml +++ b/perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml @@ -37,4 +37,4 @@ calibration_image_list_path: "" # Path to a file which contains path to images. Those images will be used for int8 quantization. is_using_image_transport: true # if true, will use image_transport, false -> will only publish raw images type_adaptation_activated: true # if true, will use type_adaptation to transfer images in GPU. - is_publish_debug_rois_image_: true # if true, will draw ROIs in the image. \ No newline at end of file + is_publish_debug_rois_image_: true # if true, will draw ROIs in the image. diff --git a/perception/tensorrt_yolox/config/yolox_tiny.param.yaml b/perception/tensorrt_yolox/config/yolox_tiny.param.yaml index c1b53df28b085..0f3a5ba4b436c 100644 --- a/perception/tensorrt_yolox/config/yolox_tiny.param.yaml +++ b/perception/tensorrt_yolox/config/yolox_tiny.param.yaml @@ -13,4 +13,4 @@ clip_value: 0.0 # If positive value is specified, the value of each layer output will be clipped between [0.0, clip_value]. This option is valid only when precision==int8 and used to manually specify the dynamic range instead of using any calibration. preprocess_on_gpu: true # If true, pre-processing is performed on GPU. calibration_image_list_path: "" # Path to a file which contains path to images. Those images will be used for int8 quantization. - \ No newline at end of file + diff --git a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp index 162eeb99dd622..7aa9e6d036a87 100644 --- a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp +++ b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp @@ -15,6 +15,8 @@ #ifndef TENSORRT_YOLOX__TENSORRT_YOLOX_HPP_ #define TENSORRT_YOLOX__TENSORRT_YOLOX_HPP_ +#include "type_adapters/image_container.hpp" + #include #include #include @@ -25,8 +27,6 @@ #include #include -#include "type_adapters/image_container.hpp" - namespace tensorrt_yolox { using cuda_utils::CudaUniquePtr; @@ -109,10 +109,10 @@ class TrtYoloX bool doInference( const std::vector & images, ObjectArrays & objects, std::vector & masks, std::vector & color_masks); - + bool doInference( - const std::vector> & images, ObjectArrays & objects, std::vector & masks, - std::vector & color_masks); + const std::vector> & images, ObjectArrays & objects, + std::vector & masks, std::vector & color_masks); /** * @brief run inference including pre-process and post-process @@ -124,7 +124,8 @@ class TrtYoloX const std::vector & images, ObjectArrays & objects, const std::vector & roi); bool doInferenceWithRoi( - const std::vector> & images, ObjectArrays & objects, const std::vector & roi); + const std::vector> & images, ObjectArrays & objects, + const std::vector & roi); /** * @brief run multi-scale inference including pre-process and post-process @@ -134,9 +135,10 @@ class TrtYoloX */ bool doMultiScaleInference( const cv::Mat & image, ObjectArrays & objects, const std::vector & roi); - + bool doMultiScaleInference( - const std::shared_ptr & image, ObjectArrays & objects, const std::vector & roi); + const std::shared_ptr & image, ObjectArrays & objects, + const std::vector & roi); /** * @brief allocate buffer for preprocess on GPU @@ -198,7 +200,8 @@ class TrtYoloX const std::vector & images, const std::vector & rois); void preprocessWithRoiGpu( - const std::vector> & images, const std::vector & rois); + const std::vector> & images, + const std::vector & rois); /** * @brief run multi-scale preprocess including resizing, letterbox, NHWC2NCHW and toFloat on CPU * @param[in] images batching images @@ -212,10 +215,12 @@ class TrtYoloX * @param[in] rois region of interest */ void multiScalePreprocessGpu(const cv::Mat & image, const std::vector & rois); - void multiScalePreprocessGpu(const std::shared_ptr & image, const std::vector & rois); + void multiScalePreprocessGpu( + const std::shared_ptr & image, const std::vector & rois); bool multiScaleFeedforward(const cv::Mat & image, int batch_size, ObjectArrays & objects); - bool multiScaleFeedforward(const std::shared_ptr & image, int batch_size, ObjectArrays & objects); + bool multiScaleFeedforward( + const std::shared_ptr & image, int batch_size, ObjectArrays & objects); bool multiScaleFeedforwardAndDecode( const cv::Mat & images, int batch_size, ObjectArrays & objects); @@ -223,14 +228,15 @@ class TrtYoloX const std::shared_ptr & images, int batch_size, ObjectArrays & objects); bool feedforward(const std::vector & images, ObjectArrays & objects); - bool feedforward(const std::vector> & images, ObjectArrays & objects); + bool feedforward( + const std::vector> & images, ObjectArrays & objects); bool feedforwardAndDecode( const std::vector & images, ObjectArrays & objects, std::vector & masks, std::vector & color_masks); bool feedforwardAndDecode( - const std::vector> & images, ObjectArrays & objects, std::vector & masks, - std::vector & color_masks); + const std::vector> & images, ObjectArrays & objects, + std::vector & masks, std::vector & color_masks); void decodeOutputs(float * prob, ObjectArray & objects, float scale, cv::Size & img_size) const; void generateGridsAndStride( diff --git a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp index 9beec56c89a48..157161b671211 100644 --- a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp +++ b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp @@ -29,7 +29,6 @@ #include #include #include -#include #if __has_include() #include @@ -88,13 +87,11 @@ class TrtYoloXNode : public rclcpp::Node const tensorrt_yolox::Object & object, cv::Mat & mask, const int width, const int height); int mapRoiLabel2SegLabel(const int32_t roi_label_index); void postInference( - const std_msgs::msg::Header header, - const tensorrt_yolox::ObjectArrays & objects, - const int width, const int height, - std::vector & masks); - void drawImageDetection(cv_bridge::CvImagePtr image, - const tensorrt_yolox::ObjectArrays & objects, - const int width, const int height); + const std_msgs::msg::Header header, const tensorrt_yolox::ObjectArrays & objects, + const int width, const int height, std::vector & masks); + void drawImageDetection( + cv_bridge::CvImagePtr image, const tensorrt_yolox::ObjectArrays & objects, const int width, + const int height); image_transport::Publisher image_pub_; image_transport::Publisher mask_pub_; diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp index c9a4da9224c38..4256ed02535cf 100644 --- a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp +++ b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp @@ -550,8 +550,8 @@ void TrtYoloX::preprocessGpu(const std::vector> // Copy into pinned memory CHECK_CUDA_ERROR(cudaMemcpyAsync( image_buf_d_.get() + index, image->cuda_mem(), - image->width() * image->height() * 3 * sizeof(unsigned char), - cudaMemcpyDeviceToDevice, images[0]->cuda_stream()->stream())); + image->width() * image->height() * 3 * sizeof(unsigned char), cudaMemcpyDeviceToDevice, + images[0]->cuda_stream()->stream())); b++; if (multitask_) { @@ -578,7 +578,6 @@ void TrtYoloX::preprocessGpu(const std::vector> } } - for (uint32_t i = 0; i < batch_size; i++) { auto stream = images[i]->cuda_stream()->stream(); // Preprocess on GPU @@ -586,8 +585,6 @@ void TrtYoloX::preprocessGpu(const std::vector> input_d_.get(), images[i]->cuda_mem(), input_width, input_height, 3, images[i]->width(), images[i]->height(), 3, 1, static_cast(norm_factor_), stream); } - - } void TrtYoloX::preprocess(const std::vector & images) @@ -658,8 +655,8 @@ bool TrtYoloX::doInference( } bool TrtYoloX::doInference( - const std::vector> & images, ObjectArrays & objects, std::vector & masks, - [[maybe_unused]] std::vector & color_masks) + const std::vector> & images, ObjectArrays & objects, + std::vector & masks, [[maybe_unused]] std::vector & color_masks) { if (!trt_common_->isInitialized()) { return false; @@ -998,7 +995,8 @@ bool TrtYoloX::feedforward(const std::vector & images, ObjectArrays & o return true; } -bool TrtYoloX::feedforward(const std::vector> & images, ObjectArrays & objects) +bool TrtYoloX::feedforward( + const std::vector> & images, ObjectArrays & objects) { std::vector buffers = { input_d_.get(), out_num_detections_d_.get(), out_boxes_d_.get(), out_scores_d_.get(), @@ -1036,10 +1034,10 @@ bool TrtYoloX::feedforward(const std::vector> & const auto y1 = out_boxes[i * max_detections_ * 4 + j * 4 + 1] / scales_[i]; const auto x2 = out_boxes[i * max_detections_ * 4 + j * 4 + 2] / scales_[i]; const auto y2 = out_boxes[i * max_detections_ * 4 + j * 4 + 3] / scales_[i]; - object.x_offset = std::clamp(0, static_cast(x1), - static_cast(images[i]->width())); - object.y_offset = std::clamp(0, static_cast(y1), - static_cast(images[i]->height())); + object.x_offset = + std::clamp(0, static_cast(x1), static_cast(images[i]->width())); + object.y_offset = + std::clamp(0, static_cast(y1), static_cast(images[i]->height())); object.width = static_cast(std::max(0.0F, x2 - x1)); object.height = static_cast(std::max(0.0F, y2 - y1)); object.score = out_scores[i * max_detections_ + j]; @@ -1051,7 +1049,6 @@ bool TrtYoloX::feedforward(const std::vector> & return true; } - bool TrtYoloX::feedforwardAndDecode( const std::vector & images, ObjectArrays & objects, std::vector & out_masks, [[maybe_unused]] std::vector & color_masks) @@ -1122,8 +1119,8 @@ bool TrtYoloX::feedforwardAndDecode( } bool TrtYoloX::feedforwardAndDecode( - const std::vector> & images, ObjectArrays & objects, std::vector & out_masks, - [[maybe_unused]] std::vector & color_masks) + const std::vector> & images, ObjectArrays & objects, + std::vector & out_masks, [[maybe_unused]] std::vector & color_masks) { std::vector buffers = {input_d_.get(), out_prob_d_.get()}; if (multitask_) { @@ -1139,7 +1136,8 @@ bool TrtYoloX::feedforwardAndDecode( if (multitask_ && !use_gpu_preprocess_) { CHECK_CUDA_ERROR(cudaMemcpyAsync( segmentation_out_prob_h_.get(), segmentation_out_prob_d_.get(), - sizeof(float) * segmentation_out_elem_num_, cudaMemcpyDeviceToHost, images[0]->cuda_stream()->stream())); + sizeof(float) * segmentation_out_elem_num_, cudaMemcpyDeviceToHost, + images[0]->cuda_stream()->stream())); } cudaStreamSynchronize(images[0]->cuda_stream()->stream()); objects.clear(); @@ -1242,7 +1240,8 @@ bool TrtYoloX::multiScaleFeedforward(const cv::Mat & image, int batch_size, Obje return true; } -bool TrtYoloX::multiScaleFeedforward(const std::shared_ptr & image, int batch_size, ObjectArrays & objects) +bool TrtYoloX::multiScaleFeedforward( + const std::shared_ptr & image, int batch_size, ObjectArrays & objects) { std::vector buffers = { input_d_.get(), out_num_detections_d_.get(), out_boxes_d_.get(), out_scores_d_.get(), @@ -1279,10 +1278,10 @@ bool TrtYoloX::multiScaleFeedforward(const std::shared_ptr & ima const auto y1 = out_boxes[i * max_detections_ * 4 + j * 4 + 1] / scales_[i]; const auto x2 = out_boxes[i * max_detections_ * 4 + j * 4 + 2] / scales_[i]; const auto y2 = out_boxes[i * max_detections_ * 4 + j * 4 + 3] / scales_[i]; - object.x_offset = std::clamp(0, static_cast(x1), - static_cast(image->width())); - object.y_offset = std::clamp(0, static_cast(y1), - static_cast( image->height())); + object.x_offset = + std::clamp(0, static_cast(x1), static_cast(image->width())); + object.y_offset = + std::clamp(0, static_cast(y1), static_cast(image->height())); object.width = static_cast(std::max(0.0F, x2 - x1)); object.height = static_cast(std::max(0.0F, y2 - y1)); object.score = out_scores[i * max_detections_ + j]; diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp index 77511221cbf22..9faa96b5a6706 100644 --- a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp +++ b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp @@ -111,7 +111,7 @@ TrtYoloXNode::TrtYoloXNode(const rclcpp::NodeOptions & node_options) type_adaptation_activated_ = declare_parameter("type_adaptation_activated"); is_publish_debug_rois_image_ = declare_parameter("is_publish_debug_rois_image"); // We don't use type adaptation when preprocessing is performed on CPU. - type_adaptation_activated_ = type_adaptation_activated_ && preprocess_on_gpu; + type_adaptation_activated_ = type_adaptation_activated_ && preprocess_on_gpu; roi_overlay_segment_labels_.UNKNOWN = declare_parameter("roi_overlay_segment_label.UNKNOWN"); roi_overlay_segment_labels_.CAR = declare_parameter("roi_overlay_segment_label.CAR"); @@ -149,13 +149,11 @@ TrtYoloXNode::TrtYoloXNode(const rclcpp::NodeOptions & node_options) mask_pub_ = image_transport::create_publisher(this, "~/out/mask"); color_mask_pub_ = image_transport::create_publisher(this, "~/out/color_mask"); image_pub_ = image_transport::create_publisher(this, "~/out/image"); - } - else { + } else { simple_image_pub_ = this->create_publisher("~/out/image", 1); simple_mask_pub_ = this->create_publisher("~/out/mask", 1); simple_color_mask_pub_ = this->create_publisher("~/out/color_mask", 1); } - if (declare_parameter("build_only", false)) { RCLCPP_INFO(this->get_logger(), "TensorRT engine file is built and exit."); @@ -168,8 +166,7 @@ bool TrtYoloXNode::checkInputBlocked() bool result; if (type_adaptation_activated_) { result = (gpu_image_sub_ == nullptr) || (gpu_image_sub_->get_publisher_count() == 0); - } - else { + } else { result = !image_sub_; } return result; @@ -179,8 +176,8 @@ void TrtYoloXNode::setUpImageSubscriber() { if (type_adaptation_activated_) { gpu_image_sub_ = this->create_subscription( - "~/in/image", rclcpp::QoS{1}, - std::bind(&TrtYoloXNode::onGpuImage, this, std::placeholders::_1)); + "~/in/image", rclcpp::QoS{1}, + std::bind(&TrtYoloXNode::onGpuImage, this, std::placeholders::_1)); } else { image_sub_ = image_transport::create_subscription( this, "~/in/image", std::bind(&TrtYoloXNode::onImage, this, std::placeholders::_1), "raw", @@ -194,22 +191,22 @@ uint32_t TrtYoloXNode::getNumOutputConnections() // Image output topic conenctions if (is_using_image_transport_) { num_subscribers = image_pub_.getNumSubscribers() + mask_pub_.getNumSubscribers() + - color_mask_pub_.getNumSubscribers(); + color_mask_pub_.getNumSubscribers(); } else { num_subscribers = simple_image_pub_->get_subscription_count() + - simple_mask_pub_->get_subscription_count() + - simple_color_mask_pub_->get_subscription_count(); + simple_mask_pub_->get_subscription_count() + + simple_color_mask_pub_->get_subscription_count(); } // Objects output topic connections - num_subscribers += objects_pub_->get_subscription_count() + - objects_pub_->get_intra_process_subscription_count(); + num_subscribers += + objects_pub_->get_subscription_count() + objects_pub_->get_intra_process_subscription_count(); return num_subscribers; } void TrtYoloXNode::onConnect() { using std::placeholders::_1; - + if (getNumOutputConnections() == 0) { image_sub_.shutdown(); gpu_image_sub_.reset(); @@ -219,11 +216,8 @@ void TrtYoloXNode::onConnect() } void TrtYoloXNode::postInference( - const std_msgs::msg::Header header, - const tensorrt_yolox::ObjectArrays & objects, - const int width, const int height, - std::vector & masks - ) + const std_msgs::msg::Header header, const tensorrt_yolox::ObjectArrays & objects, const int width, + const int height, std::vector & masks) { auto & mask = masks.at(0); tier4_perception_msgs::msg::DetectedObjectsWithFeature out_objects; @@ -257,7 +251,7 @@ void TrtYoloXNode::postInference( std::memcpy(&out_mask_msg->data[i * step], &compressed_data.at(i).first, sizeof(uint8_t)); std::memcpy(&out_mask_msg->data[i * step + 1], &compressed_data.at(i).second, sizeof(int)); } - if (is_using_image_transport_){ + if (is_using_image_transport_) { mask_pub_.publish(out_mask_msg); } else { simple_mask_pub_->publish(*out_mask_msg); @@ -297,9 +291,9 @@ void TrtYoloXNode::postInference( } } -void TrtYoloXNode::drawImageDetection(cv_bridge::CvImagePtr image_ptr, - const tensorrt_yolox::ObjectArrays & objects, - const int width, const int height) +void TrtYoloXNode::drawImageDetection( + cv_bridge::CvImagePtr image_ptr, const tensorrt_yolox::ObjectArrays & objects, const int width, + const int height) { for (const auto & yolox_object : objects.at(0)) { const auto left = std::max(0, static_cast(yolox_object.x_offset)); @@ -309,10 +303,11 @@ void TrtYoloXNode::drawImageDetection(cv_bridge::CvImagePtr image_ptr, const auto bottom = std::min(static_cast(yolox_object.y_offset + yolox_object.height), height); cv::rectangle( - image_ptr->image, cv::Point(left, top), cv::Point(right, bottom), cv::Scalar(0, 0, 255), 3, 8, 0); + image_ptr->image, cv::Point(left, top), cv::Point(right, bottom), cv::Scalar(0, 0, 255), 3, 8, + 0); } - sensor_msgs::msg::Image::SharedPtr output_image_msg_ptr = image_ptr->toImageMsg(); + sensor_msgs::msg::Image::SharedPtr output_image_msg_ptr = image_ptr->toImageMsg(); if (is_using_image_transport_) { image_pub_.publish(output_image_msg_ptr); } else { @@ -323,7 +318,7 @@ void TrtYoloXNode::drawImageDetection(cv_bridge::CvImagePtr image_ptr, void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg) { stop_watch_ptr_->toc("processing_time", true); - + cv_bridge::CvImagePtr in_image_ptr; try { in_image_ptr = cv_bridge::toCvCopy(msg, sensor_msgs::image_encodings::BGR8); @@ -362,7 +357,7 @@ void TrtYoloXNode::onGpuImage(std::shared_ptr msg) std::vector masks = {cv::Mat(cv::Size(height, width), CV_8UC1, cv::Scalar(0))}; std::vector color_masks = { cv::Mat(cv::Size(height, width), CV_8UC3, cv::Scalar(0, 0, 0))}; - + std::vector> msgs = {msg}; if (!trt_yolox_->doInference(msgs, objects, masks, color_masks)) { RCLCPP_WARN(this->get_logger(), "Fail to inference"); @@ -374,7 +369,8 @@ void TrtYoloXNode::onGpuImage(std::shared_ptr msg) if (is_publish_debug_rois_image_) { sensor_msgs::msg::Image image_msg; msg->get_sensor_msgs_image(image_msg); - cv_bridge::CvImagePtr image_ptr = cv_bridge::toCvCopy(image_msg, sensor_msgs::image_encodings::BGR8); + cv_bridge::CvImagePtr image_ptr = + cv_bridge::toCvCopy(image_msg, sensor_msgs::image_encodings::BGR8); drawImageDetection(image_ptr, objects, width, height); } }