diff --git a/include/mtr/mtr.hpp b/include/mtr/mtr.hpp index a707d6c..02a632b 100644 --- a/include/mtr/mtr.hpp +++ b/include/mtr/mtr.hpp @@ -154,8 +154,9 @@ class TrtMTR IntentionPoint intention_point_; - size_t num_target_, num_agent_, num_timestamp_, num_agent_dim_, num_agent_class_, num_agent_attr_; - size_t num_polyline_, num_point_, num_point_dim_, num_point_attr_; + int32_t num_target_, num_agent_, num_timestamp_, num_agent_dim_, num_agent_class_, + num_agent_attr_; + int32_t num_polyline_, num_point_, num_point_dim_, num_point_attr_; // source data cuda::unique_ptr d_target_index_{nullptr}; @@ -181,8 +182,8 @@ class TrtMTR // outputs cuda::unique_ptr d_out_score_{nullptr}; cuda::unique_ptr d_out_trajectory_{nullptr}; - std::unique_ptr h_out_score_{nullptr}; - std::unique_ptr h_out_trajectory_{nullptr}; + std::vector h_out_score_; + std::vector h_out_trajectory_; // debug cuda::EventDebugger event_debugger_; diff --git a/include/mtr/trajectory.hpp b/include/mtr/trajectory.hpp index d3e301a..c014aee 100644 --- a/include/mtr/trajectory.hpp +++ b/include/mtr/trajectory.hpp @@ -29,14 +29,14 @@ constexpr size_t PredictedStateDim = 7; */ struct PredictedState { - explicit PredictedState(const float * state) - : x_(state[0]), - y_(state[1]), - dx_(state[2]), - dy_(state[3]), - yaw_(state[4]), - vx_(state[5]), - vy_(state[6]) + explicit PredictedState(const std::array & state) + : x_(state.at(0)), + y_(state.at(1)), + dx_(state.at(2)), + dy_(state.at(3)), + yaw_(state.at(4)), + vx_(state.at(5)), + vy_(state.at(6)) { } @@ -62,17 +62,18 @@ struct PredictedState }; // struct PredictedState /** - * @brief A class to represent waypoints for a single mode. + * @brief A class to represent waypoints for a single motion mode. */ struct PredictedMode { - PredictedMode(const float score, const float * waypoints, const size_t num_future) + PredictedMode(const float score, const std::vector & waypoints, const size_t num_future) : score_(score), num_future_(num_future) { for (size_t t = 0; t < num_future_; ++t) { - const auto start_ptr = waypoints + t * num_state_dim_; - std::vector state(start_ptr, start_ptr + num_state_dim_); - waypoints_.emplace_back(state.data()); + const auto start_itr = waypoints.cbegin() + t * num_state_dim_; + std::array state; + std::copy_n(start_itr, PredictedStateDim, state.begin()); + waypoints_.emplace_back(state); } } @@ -98,15 +99,15 @@ struct PredictedMode struct PredictedTrajectory { PredictedTrajectory( - const float * scores, const float * trajectories, const size_t num_mode, + const std::vector & scores, const std::vector & modes, const size_t num_mode, const size_t num_future) : num_mode_(num_mode), num_future_(num_future) { for (size_t m = 0; m < num_mode_; ++m) { - const auto score = *(scores + m); - const auto start_ptr = trajectories + m * num_future_ * num_state_dim_; - std::vector waypoints(start_ptr, start_ptr + num_future_ * num_state_dim_); - modes_.emplace_back(score, waypoints.data(), num_future_); + const auto score = scores.at(m); + const auto wp_itr = modes.cbegin() + m * num_future_ * num_state_dim_; + std::vector waypoints(wp_itr, wp_itr + num_future_ * num_state_dim_); + modes_.emplace_back(score, waypoints, num_future_); } // sort by score diff --git a/src/mtr.cpp b/src/mtr.cpp index 892789f..334b83b 100644 --- a/src/mtr.cpp +++ b/src/mtr.cpp @@ -137,9 +137,6 @@ void TrtMTR::initCudaPtr(const AgentData & agent_data, const PolylineData & poly d_out_score_ = cuda::make_unique(num_target_ * config_.num_mode); d_out_trajectory_ = cuda::make_unique( num_target_ * config_.num_mode * config_.num_future * PredictedStateDim); - h_out_score_ = std::make_unique(sizeof(float) * num_target_ * config_.num_mode); - h_out_trajectory_ = std::make_unique( - sizeof(float) * num_target_ * config_.num_mode * config_.num_future * PredictedStateDim); } bool TrtMTR::preProcess(const AgentData & agent_data, const PolylineData & polyline_data) @@ -207,20 +204,30 @@ bool TrtMTR::postProcess( num_target_, config_.num_mode, config_.num_future, num_agent_dim_, d_target_state_.get(), PredictedStateDim, d_out_trajectory_.get(), stream_)); + // clear containers on the host device and reserve size for the allocation. + h_out_score_.clear(); + h_out_trajectory_.clear(); + h_out_score_.reserve(num_target_ * config_.num_mode); + h_out_trajectory_.reserve( + num_target_ * config_.num_mode * config_.num_future * PredictedStateDim); + CHECK_CUDA_ERROR(cudaMemcpyAsync( - h_out_score_.get(), d_out_score_.get(), sizeof(float) * num_target_ * config_.num_mode, + h_out_score_.data(), d_out_score_.get(), sizeof(float) * num_target_ * config_.num_mode, cudaMemcpyDeviceToHost, stream_)); CHECK_CUDA_ERROR(cudaMemcpyAsync( - h_out_trajectory_.get(), d_out_trajectory_.get(), + h_out_trajectory_.data(), d_out_trajectory_.get(), sizeof(float) * num_target_ * config_.num_mode * config_.num_future * PredictedStateDim, cudaMemcpyDeviceToHost, stream_)); trajectories.reserve(num_target_); for (size_t b = 0; b < num_target_; ++b) { - const auto score_ptr = h_out_score_.get() + b * config_.num_mode; - const auto trajectory_ptr = - h_out_trajectory_.get() + b * config_.num_mode * config_.num_future * PredictedStateDim; - trajectories.emplace_back(score_ptr, trajectory_ptr, config_.num_mode, config_.num_future); + const auto score_itr = h_out_score_.cbegin() + config_.num_mode; + std::vector scores(score_itr, score_itr + config_.num_mode); + const auto mode_itr = + h_out_trajectory_.cbegin() + b * config_.num_mode * config_.num_future * PredictedStateDim; + std::vector modes( + mode_itr, mode_itr + config_.num_mode * config_.num_future * PredictedStateDim); + trajectories.emplace_back(scores, modes, config_.num_mode, config_.num_future); } return true;