Skip to content

Commit

Permalink
style(pre-commit): autofix
Browse files Browse the repository at this point in the history
  • Loading branch information
pre-commit-ci[bot] committed Nov 12, 2024
1 parent 79bf927 commit ad08dcf
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 73 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,4 @@
calibration_image_list_path: "" # Path to a file which contains path to images. Those images will be used for int8 quantization.
is_using_image_transport: true # if true, will use image_transport, false -> will only publish raw images
type_adaptation_activated: true # if true, will use type_adaptation to transfer images in GPU.
is_publish_debug_rois_image_: true # if true, will draw ROIs in the image.
is_publish_debug_rois_image_: true # if true, will draw ROIs in the image.
2 changes: 1 addition & 1 deletion perception/tensorrt_yolox/config/yolox_tiny.param.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@
clip_value: 0.0 # If positive value is specified, the value of each layer output will be clipped between [0.0, clip_value]. This option is valid only when precision==int8 and used to manually specify the dynamic range instead of using any calibration.
preprocess_on_gpu: true # If true, pre-processing is performed on GPU.
calibration_image_list_path: "" # Path to a file which contains path to images. Those images will be used for int8 quantization.

34 changes: 20 additions & 14 deletions perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#ifndef TENSORRT_YOLOX__TENSORRT_YOLOX_HPP_
#define TENSORRT_YOLOX__TENSORRT_YOLOX_HPP_

#include "type_adapters/image_container.hpp"

#include <cuda_utils/cuda_unique_ptr.hpp>
#include <cuda_utils/stream_unique_ptr.hpp>
#include <opencv2/opencv.hpp>
Expand All @@ -25,8 +27,6 @@
#include <string>
#include <vector>

#include "type_adapters/image_container.hpp"

namespace tensorrt_yolox
{
using cuda_utils::CudaUniquePtr;
Expand Down Expand Up @@ -109,10 +109,10 @@ class TrtYoloX
bool doInference(
const std::vector<cv::Mat> & images, ObjectArrays & objects, std::vector<cv::Mat> & masks,
std::vector<cv::Mat> & color_masks);

bool doInference(
const std::vector<std::shared_ptr<ImageContainer>> & images, ObjectArrays & objects, std::vector<cv::Mat> & masks,
std::vector<cv::Mat> & color_masks);
const std::vector<std::shared_ptr<ImageContainer>> & images, ObjectArrays & objects,
std::vector<cv::Mat> & masks, std::vector<cv::Mat> & color_masks);

/**
* @brief run inference including pre-process and post-process
Expand All @@ -124,7 +124,8 @@ class TrtYoloX
const std::vector<cv::Mat> & images, ObjectArrays & objects, const std::vector<cv::Rect> & roi);

bool doInferenceWithRoi(
const std::vector<std::shared_ptr<ImageContainer>> & images, ObjectArrays & objects, const std::vector<cv::Rect> & roi);
const std::vector<std::shared_ptr<ImageContainer>> & images, ObjectArrays & objects,
const std::vector<cv::Rect> & roi);

/**
* @brief run multi-scale inference including pre-process and post-process
Expand All @@ -134,9 +135,10 @@ class TrtYoloX
*/
bool doMultiScaleInference(
const cv::Mat & image, ObjectArrays & objects, const std::vector<cv::Rect> & roi);

bool doMultiScaleInference(
const std::shared_ptr<ImageContainer> & image, ObjectArrays & objects, const std::vector<cv::Rect> & roi);
const std::shared_ptr<ImageContainer> & image, ObjectArrays & objects,
const std::vector<cv::Rect> & roi);

/**
* @brief allocate buffer for preprocess on GPU
Expand Down Expand Up @@ -198,7 +200,8 @@ class TrtYoloX
const std::vector<cv::Mat> & images, const std::vector<cv::Rect> & rois);

void preprocessWithRoiGpu(
const std::vector<std::shared_ptr<ImageContainer>> & images, const std::vector<cv::Rect> & rois);
const std::vector<std::shared_ptr<ImageContainer>> & images,
const std::vector<cv::Rect> & rois);
/**
* @brief run multi-scale preprocess including resizing, letterbox, NHWC2NCHW and toFloat on CPU
* @param[in] images batching images
Expand All @@ -212,25 +215,28 @@ class TrtYoloX
* @param[in] rois region of interest
*/
void multiScalePreprocessGpu(const cv::Mat & image, const std::vector<cv::Rect> & rois);
void multiScalePreprocessGpu(const std::shared_ptr<ImageContainer> & image, const std::vector<cv::Rect> & rois);
void multiScalePreprocessGpu(
const std::shared_ptr<ImageContainer> & image, const std::vector<cv::Rect> & rois);

bool multiScaleFeedforward(const cv::Mat & image, int batch_size, ObjectArrays & objects);
bool multiScaleFeedforward(const std::shared_ptr<ImageContainer> & image, int batch_size, ObjectArrays & objects);
bool multiScaleFeedforward(
const std::shared_ptr<ImageContainer> & image, int batch_size, ObjectArrays & objects);

bool multiScaleFeedforwardAndDecode(
const cv::Mat & images, int batch_size, ObjectArrays & objects);
bool multiScaleFeedforwardAndDecode(
const std::shared_ptr<ImageContainer> & images, int batch_size, ObjectArrays & objects);

bool feedforward(const std::vector<cv::Mat> & images, ObjectArrays & objects);
bool feedforward(const std::vector<std::shared_ptr<ImageContainer>> & images, ObjectArrays & objects);
bool feedforward(
const std::vector<std::shared_ptr<ImageContainer>> & images, ObjectArrays & objects);

bool feedforwardAndDecode(
const std::vector<cv::Mat> & images, ObjectArrays & objects, std::vector<cv::Mat> & masks,
std::vector<cv::Mat> & color_masks);
bool feedforwardAndDecode(
const std::vector<std::shared_ptr<ImageContainer>> & images, ObjectArrays & objects, std::vector<cv::Mat> & masks,
std::vector<cv::Mat> & color_masks);
const std::vector<std::shared_ptr<ImageContainer>> & images, ObjectArrays & objects,
std::vector<cv::Mat> & masks, std::vector<cv::Mat> & color_masks);

void decodeOutputs(float * prob, ObjectArray & objects, float scale, cv::Size & img_size) const;
void generateGridsAndStride(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
#include <std_msgs/msg/header.hpp>
#include <tier4_perception_msgs/msg/detected_objects_with_feature.hpp>
#include <tier4_perception_msgs/msg/semantic.hpp>
#include <tier4_perception_msgs/msg/semantic.hpp>

#if __has_include(<cv_bridge/cv_bridge.hpp>)
#include <cv_bridge/cv_bridge.hpp>
Expand Down Expand Up @@ -88,13 +87,11 @@ class TrtYoloXNode : public rclcpp::Node
const tensorrt_yolox::Object & object, cv::Mat & mask, const int width, const int height);
int mapRoiLabel2SegLabel(const int32_t roi_label_index);
void postInference(
const std_msgs::msg::Header header,
const tensorrt_yolox::ObjectArrays & objects,
const int width, const int height,
std::vector<cv::Mat> & masks);
void drawImageDetection(cv_bridge::CvImagePtr image,
const tensorrt_yolox::ObjectArrays & objects,
const int width, const int height);
const std_msgs::msg::Header header, const tensorrt_yolox::ObjectArrays & objects,
const int width, const int height, std::vector<cv::Mat> & masks);
void drawImageDetection(
cv_bridge::CvImagePtr image, const tensorrt_yolox::ObjectArrays & objects, const int width,
const int height);
image_transport::Publisher image_pub_;
image_transport::Publisher mask_pub_;

Expand Down
41 changes: 20 additions & 21 deletions perception/tensorrt_yolox/src/tensorrt_yolox.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -550,8 +550,8 @@ void TrtYoloX::preprocessGpu(const std::vector<std::shared_ptr<ImageContainer>>
// Copy into pinned memory
CHECK_CUDA_ERROR(cudaMemcpyAsync(
image_buf_d_.get() + index, image->cuda_mem(),
image->width() * image->height() * 3 * sizeof(unsigned char),
cudaMemcpyDeviceToDevice, images[0]->cuda_stream()->stream()));
image->width() * image->height() * 3 * sizeof(unsigned char), cudaMemcpyDeviceToDevice,
images[0]->cuda_stream()->stream()));
b++;

if (multitask_) {
Expand All @@ -578,16 +578,13 @@ void TrtYoloX::preprocessGpu(const std::vector<std::shared_ptr<ImageContainer>>
}
}


for (uint32_t i = 0; i < batch_size; i++) {
auto stream = images[i]->cuda_stream()->stream();
// Preprocess on GPU
resize_bilinear_letterbox_nhwc_to_nchw32_batch_gpu(
input_d_.get(), images[i]->cuda_mem(), input_width, input_height, 3, images[i]->width(),
images[i]->height(), 3, 1, static_cast<float>(norm_factor_), stream);
}


}

void TrtYoloX::preprocess(const std::vector<cv::Mat> & images)
Expand Down Expand Up @@ -658,8 +655,8 @@ bool TrtYoloX::doInference(
}

bool TrtYoloX::doInference(
const std::vector<std::shared_ptr<ImageContainer>> & images, ObjectArrays & objects, std::vector<cv::Mat> & masks,
[[maybe_unused]] std::vector<cv::Mat> & color_masks)
const std::vector<std::shared_ptr<ImageContainer>> & images, ObjectArrays & objects,
std::vector<cv::Mat> & masks, [[maybe_unused]] std::vector<cv::Mat> & color_masks)
{
if (!trt_common_->isInitialized()) {
return false;
Expand Down Expand Up @@ -998,7 +995,8 @@ bool TrtYoloX::feedforward(const std::vector<cv::Mat> & images, ObjectArrays & o
return true;
}

bool TrtYoloX::feedforward(const std::vector<std::shared_ptr<ImageContainer>> & images, ObjectArrays & objects)
bool TrtYoloX::feedforward(
const std::vector<std::shared_ptr<ImageContainer>> & images, ObjectArrays & objects)
{
std::vector<void *> buffers = {
input_d_.get(), out_num_detections_d_.get(), out_boxes_d_.get(), out_scores_d_.get(),
Expand Down Expand Up @@ -1036,10 +1034,10 @@ bool TrtYoloX::feedforward(const std::vector<std::shared_ptr<ImageContainer>> &
const auto y1 = out_boxes[i * max_detections_ * 4 + j * 4 + 1] / scales_[i];
const auto x2 = out_boxes[i * max_detections_ * 4 + j * 4 + 2] / scales_[i];
const auto y2 = out_boxes[i * max_detections_ * 4 + j * 4 + 3] / scales_[i];
object.x_offset = std::clamp(0, static_cast<int32_t>(x1),
static_cast<int32_t>(images[i]->width()));
object.y_offset = std::clamp(0, static_cast<int32_t>(y1),
static_cast<int32_t>(images[i]->height()));
object.x_offset =
std::clamp(0, static_cast<int32_t>(x1), static_cast<int32_t>(images[i]->width()));
object.y_offset =
std::clamp(0, static_cast<int32_t>(y1), static_cast<int32_t>(images[i]->height()));
object.width = static_cast<int32_t>(std::max(0.0F, x2 - x1));
object.height = static_cast<int32_t>(std::max(0.0F, y2 - y1));
object.score = out_scores[i * max_detections_ + j];
Expand All @@ -1051,7 +1049,6 @@ bool TrtYoloX::feedforward(const std::vector<std::shared_ptr<ImageContainer>> &
return true;
}


bool TrtYoloX::feedforwardAndDecode(
const std::vector<cv::Mat> & images, ObjectArrays & objects, std::vector<cv::Mat> & out_masks,
[[maybe_unused]] std::vector<cv::Mat> & color_masks)
Expand Down Expand Up @@ -1122,8 +1119,8 @@ bool TrtYoloX::feedforwardAndDecode(
}

bool TrtYoloX::feedforwardAndDecode(
const std::vector<std::shared_ptr<ImageContainer>> & images, ObjectArrays & objects, std::vector<cv::Mat> & out_masks,
[[maybe_unused]] std::vector<cv::Mat> & color_masks)
const std::vector<std::shared_ptr<ImageContainer>> & images, ObjectArrays & objects,
std::vector<cv::Mat> & out_masks, [[maybe_unused]] std::vector<cv::Mat> & color_masks)
{
std::vector<void *> buffers = {input_d_.get(), out_prob_d_.get()};
if (multitask_) {
Expand All @@ -1139,7 +1136,8 @@ bool TrtYoloX::feedforwardAndDecode(
if (multitask_ && !use_gpu_preprocess_) {
CHECK_CUDA_ERROR(cudaMemcpyAsync(
segmentation_out_prob_h_.get(), segmentation_out_prob_d_.get(),
sizeof(float) * segmentation_out_elem_num_, cudaMemcpyDeviceToHost, images[0]->cuda_stream()->stream()));
sizeof(float) * segmentation_out_elem_num_, cudaMemcpyDeviceToHost,
images[0]->cuda_stream()->stream()));
}
cudaStreamSynchronize(images[0]->cuda_stream()->stream());
objects.clear();
Expand Down Expand Up @@ -1242,7 +1240,8 @@ bool TrtYoloX::multiScaleFeedforward(const cv::Mat & image, int batch_size, Obje
return true;
}

bool TrtYoloX::multiScaleFeedforward(const std::shared_ptr<ImageContainer> & image, int batch_size, ObjectArrays & objects)
bool TrtYoloX::multiScaleFeedforward(
const std::shared_ptr<ImageContainer> & image, int batch_size, ObjectArrays & objects)
{
std::vector<void *> buffers = {
input_d_.get(), out_num_detections_d_.get(), out_boxes_d_.get(), out_scores_d_.get(),
Expand Down Expand Up @@ -1279,10 +1278,10 @@ bool TrtYoloX::multiScaleFeedforward(const std::shared_ptr<ImageContainer> & ima
const auto y1 = out_boxes[i * max_detections_ * 4 + j * 4 + 1] / scales_[i];
const auto x2 = out_boxes[i * max_detections_ * 4 + j * 4 + 2] / scales_[i];
const auto y2 = out_boxes[i * max_detections_ * 4 + j * 4 + 3] / scales_[i];
object.x_offset = std::clamp(0, static_cast<int32_t>(x1),
static_cast<int32_t>(image->width()));
object.y_offset = std::clamp(0, static_cast<int32_t>(y1),
static_cast<int32_t>( image->height()));
object.x_offset =
std::clamp(0, static_cast<int32_t>(x1), static_cast<int32_t>(image->width()));
object.y_offset =
std::clamp(0, static_cast<int32_t>(y1), static_cast<int32_t>(image->height()));
object.width = static_cast<int32_t>(std::max(0.0F, x2 - x1));
object.height = static_cast<int32_t>(std::max(0.0F, y2 - y1));
object.score = out_scores[i * max_detections_ + j];
Expand Down
Loading

0 comments on commit ad08dcf

Please sign in to comment.