diff --git a/tello_driver/CMakeLists.txt b/tello_driver/CMakeLists.txt index 4d7b8bd..b1aef6e 100644 --- a/tello_driver/CMakeLists.txt +++ b/tello_driver/CMakeLists.txt @@ -70,6 +70,7 @@ set(DRIVER_NODE_DEPS class_loader cv_bridge geometry_msgs + rclcpp_components OpenCV rclcpp ros2_shared @@ -127,7 +128,8 @@ set(JOY_NODE_DEPS geometry_msgs rclcpp sensor_msgs - tello_msgs) + tello_msgs + rclcpp_components) add_library(tello_joy_node SHARED ${JOY_NODE_SOURCES}) diff --git a/tello_driver/h264decoder/h264decoder.cpp b/tello_driver/h264decoder/h264decoder.cpp index 66f4080..8174577 100644 --- a/tello_driver/h264decoder/h264decoder.cpp +++ b/tello_driver/h264decoder/h264decoder.cpp @@ -1,49 +1,45 @@ extern "C" { #include #include +#include #include #include } -#ifndef PIX_FMT_RGB24 -#define PIX_FMT_RGB24 AV_PIX_FMT_RGB24 -#endif - -#ifndef CODEC_CAP_TRUNCATED -#define CODEC_CAP_TRUNCATED AV_CODEC_CAP_TRUNCATED -#endif - -#ifndef CODEC_FLAG_TRUNCATED -#define CODEC_FLAG_TRUNCATED AV_CODEC_FLAG_TRUNCATED -#endif +#include #include "h264decoder.hpp" -#include typedef unsigned char ubyte; /* For backward compatibility with release 9 or so of libav */ -#if (LIBAVCODEC_VERSION_MAJOR <= 54) +#if (LIBAVCODEC_VERSION_MAJOR <= 54) # define av_frame_alloc avcodec_alloc_frame -# define av_frame_free avcodec_free_frame +# define av_frame_free avcodec_free_frame #endif H264Decoder::H264Decoder() + : pkt_{std::make_unique()} { +#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(58, 9, 100) avcodec_register_all(); +#endif codec = avcodec_find_decoder(AV_CODEC_ID_H264); if (!codec) throw H264InitFailure("cannot find decoder"); - + context = avcodec_alloc_context3(codec); if (!context) throw H264InitFailure("cannot allocate context"); - if(codec->capabilities & CODEC_CAP_TRUNCATED) { - context->flags |= CODEC_FLAG_TRUNCATED; +#if LIBAVCODEC_VERSION_MAJOR < 60 + // Note: CODEC_CAP_TRUNCATED was prefixed with AV_... + if(codec->capabilities & AV_CODEC_CAP_TRUNCATED) { + context->flags |= AV_CODEC_FLAG_TRUNCATED; } +#endif int err = avcodec_open2(context, codec, nullptr); if (err < 0) @@ -52,17 +48,14 @@ H264Decoder::H264Decoder() parser = av_parser_init(AV_CODEC_ID_H264); if (!parser) throw H264InitFailure("cannot init parser"); - + frame = av_frame_alloc(); if (!frame) throw H264InitFailure("cannot allocate frame"); -#if 1 - pkt = new AVPacket; - if (!pkt) + if (!pkt_) throw H264InitFailure("cannot allocate packet"); - av_init_packet(pkt); -#endif + av_init_packet(pkt_.get()); } @@ -72,34 +65,42 @@ H264Decoder::~H264Decoder() avcodec_close(context); av_free(context); av_frame_free(&frame); -#if 1 - delete pkt; -#endif } -ssize_t H264Decoder::parse(const ubyte* in_data, ssize_t in_size) +ParseResult H264Decoder::parse(const ubyte* in_data, ptrdiff_t in_size) { - auto nread = av_parser_parse2(parser, context, &pkt->data, &pkt->size, - in_data, in_size, - 0, 0, AV_NOPTS_VALUE); - return nread; -} - - -bool H264Decoder::is_frame_available() const -{ - return pkt->size > 0; + auto nread = av_parser_parse2(parser, context, &pkt_->data, &pkt_->size, + in_data, in_size, + 0, 0, AV_NOPTS_VALUE); + // We might have a frame to decode. But what exactly is a packet? + //Is it guaranteed to contain a complete frame? + if (pkt_->size) + { + const auto frame = decode_frame(); + return ParseResult{nread, frame}; + } + return ParseResult{nread, nullptr}; } -const AVFrame& H264Decoder::decode_frame() +const AVFrame* H264Decoder::decode_frame() { +#if (LIBAVCODEC_VERSION_MAJOR > 56) + int ret = avcodec_send_packet(context, pkt_.get()); + if (!ret) { + ret = avcodec_receive_frame(context, frame); + if (!ret) + return frame; + } + return nullptr; +#else int got_picture = 0; int nread = avcodec_decode_video2(context, frame, &got_picture, pkt); if (nread < 0 || got_picture == 0) - throw H264DecodeFailure("error decoding frame\n"); + return nullptr; return *frame; +#endif } @@ -123,19 +124,19 @@ const AVFrame& ConverterRGB24::convert(const AVFrame &frame, ubyte* out_rgb) int w = frame.width; int h = frame.height; int pix_fmt = frame.format; - - context = sws_getCachedContext(context, - w, h, (AVPixelFormat)pix_fmt, - w, h, AV_PIX_FMT_BGR24, SWS_BILINEAR, - nullptr, nullptr, nullptr); + + context = sws_getCachedContext(context, + w, h, (AVPixelFormat)pix_fmt, + w, h, AV_PIX_FMT_RGB24, SWS_BILINEAR, + nullptr, nullptr, nullptr); if (!context) throw H264DecodeFailure("cannot allocate context"); - + // Setup framergb with out_rgb as external buffer. Also say that we want RGB24 output. - avpicture_fill((AVPicture*)framergb, out_rgb, AV_PIX_FMT_BGR24, w, h); + av_image_fill_arrays(framergb->data, framergb->linesize, out_rgb, AV_PIX_FMT_RGB24, w, h, 1); // Do the conversion. sws_scale(context, frame.data, frame.linesize, 0, h, - framergb->data, framergb->linesize); + framergb->data, framergb->linesize); framergb->width = w; framergb->height = h; return *framergb; @@ -144,15 +145,14 @@ const AVFrame& ConverterRGB24::convert(const AVFrame &frame, ubyte* out_rgb) /* Determine required size of framebuffer. -avpicture_get_size is used in http://dranger.com/ffmpeg/tutorial01.html -to do this. However, avpicture_get_size returns the size of a compact -representation, without padding bytes. Since we use avpicture_fill to +avpicture_get_size is used in http://dranger.com/ffmpeg/tutorial01.html +to do this. However, avpicture_get_size returns the size of a compact +representation, without padding bytes. Since we use av_image_fill_arrays to fill the buffer we should also use it to determine the required size. */ int ConverterRGB24::predict_size(int w, int h) { - // TODO do we need this? - return avpicture_fill((AVPicture*)framergb, nullptr, AV_PIX_FMT_BGR24, w, h); + return av_image_fill_arrays(framergb->data, framergb->linesize, nullptr, AV_PIX_FMT_RGB24, w, h, 1); } diff --git a/tello_driver/h264decoder/h264decoder.hpp b/tello_driver/h264decoder/h264decoder.hpp index 3616f87..03f279c 100644 --- a/tello_driver/h264decoder/h264decoder.hpp +++ b/tello_driver/h264decoder/h264decoder.hpp @@ -1,24 +1,26 @@ #pragma once /* -This h264 decoder class is just a thin wrapper around libav -functions to decode h264 videos. It would have been easy to use -libav directly in the python module code but I like to keep these +This h264 decoder class is just a thin wrapper around libav +functions to decode h264 videos. It would have been easy to use +libav directly in the python module code but I like to keep these things separate. - + It is mostly based on roxlu's code. See http://roxlu.com/2014/039/decoding-h264-and-yuv420p-playback However, in contrast to roxlu's code the color space conversion is done by libav functions - so on the CPU, I suppose. -Most functions/members throw exceptions. This way, error states are -conveniently forwarded to python via the exception translation -mechanisms of boost::python. +Most functions/members throw exceptions. This way, error states are +conveniently forwarded to python via the exception translation +mechanisms of boost::python. */ // for ssize_t (signed int type as large as pointer type) #include #include +#include +#include struct AVCodecContext; struct AVFrame; @@ -37,13 +39,20 @@ class H264Exception : public std::runtime_error class H264InitFailure : public H264Exception { public: - H264InitFailure(const char* s) : H264Exception(s) {} + H264InitFailure(const char* s) : H264Exception(s) {} }; class H264DecodeFailure : public H264Exception { public: - H264DecodeFailure(const char* s) : H264Exception(s) {} + H264DecodeFailure(const char* s) : H264Exception(s) {} +}; + + +struct ParseResult +{ + ptrdiff_t num_bytes_consumed = 0; + const AVFrame* frame = nullptr; }; @@ -52,26 +61,27 @@ class H264Decoder /* Persistent things here, using RAII for cleanup. */ AVCodecContext *context; AVFrame *frame; - AVCodec *codec; + const AVCodec *codec; AVCodecParserContext *parser; - /* In the documentation example on the github master branch, the -packet is put on the heap. This is done here to store the pointers -to the encoded data, which must be kept around between calls to -parse- and decode frame. In release 11 it is put on the stack, too. + /* In the documentation example on the github master branch, the +packet is put on the heap. This is done here to store the pointers +to the encoded data, which must be kept around between calls to +parse- and decode frame. In release 11 it is put on the stack, too. */ - AVPacket *pkt; + std::unique_ptr pkt_; + + const AVFrame* decode_frame(); + public: H264Decoder(); ~H264Decoder(); - /* First, parse a continuous data stream, dividing it into -packets. When there is enough data to form a new frame, decode -the data and return the frame. parse returns the number -of consumed bytes of the input stream. It stops consuming + /* First, parse a continuous data stream, dividing it into +packets. When there is enough data to form a new frame, decode +the data and return the frame. parse returns the number +of consumed bytes of the input stream. It stops consuming bytes at frame boundaries. */ - ssize_t parse(const unsigned char* in_data, ssize_t in_size); - bool is_frame_available() const; - const AVFrame& decode_frame(); + ParseResult parse(const unsigned char* in_data, ptrdiff_t in_size); }; // TODO: Rename to OutputStage or so?! @@ -79,16 +89,16 @@ class ConverterRGB24 { SwsContext *context; AVFrame *framergb; - + public: ConverterRGB24(); ~ConverterRGB24(); - - /* Returns, given a width and height, + + /* Returns, given a width and height, how many bytes the frame buffer is going to need. */ int predict_size(int w, int h); - /* Given a decoded frame, convert it to RGB format and fill -out_rgb with the result. Returns a AVFrame structure holding + /* Given a decoded frame, convert it to RGB format and fill +out_rgb with the result. Returns a AVFrame structure holding additional information about the RGB frame, such as the number of bytes in a row and so on. */ const AVFrame& convert(const AVFrame &frame, unsigned char* out_rgb); diff --git a/tello_driver/include/tello_driver_node.hpp b/tello_driver/include/tello_driver_node.hpp index 2d21f4d..2077bcd 100644 --- a/tello_driver/include/tello_driver_node.hpp +++ b/tello_driver/include/tello_driver_node.hpp @@ -1,7 +1,7 @@ #include #include "rclcpp/rclcpp.hpp" -#include "cv_bridge/cv_bridge.h" +#include "cv_bridge/cv_bridge.hpp" #include "geometry_msgs/msg/twist.hpp" #include "sensor_msgs/msg/camera_info.hpp" #include "tello_msgs/msg/flight_data.hpp" diff --git a/tello_driver/src/video_socket.cpp b/tello_driver/src/video_socket.cpp index 8882497..f5d243f 100644 --- a/tello_driver/src/video_socket.cpp +++ b/tello_driver/src/video_socket.cpp @@ -75,12 +75,12 @@ namespace tello_driver try { while (next < seq_buffer_next_) { // Parse h264 - ssize_t consumed = decoder_.parse(seq_buffer_.data() + next, seq_buffer_next_ - next); + auto result = decoder_.parse(seq_buffer_.data() + next, seq_buffer_next_ - next); // Is a frame available? - if (decoder_.is_frame_available()) { + if (result.frame) { // Decode the frame - const AVFrame &frame = decoder_.decode_frame(); + const AVFrame &frame = *result.frame; // Convert pixels from YUV420P to BGR24 int size = converter_.predict_size(frame.width, frame.height); @@ -113,7 +113,7 @@ namespace tello_driver } } - next += consumed; + next += result.num_bytes_consumed; } } catch (std::runtime_error e) {