Skip to content

Commit

Permalink
Upgrades H264 Decoder
Browse files Browse the repository at this point in the history
This commit updates the h264 decoder to work with newer versions of libavcodec that ships with Ubuntu Noble. There are still some colour artifacts.
  • Loading branch information
arjo129 committed Jun 29, 2024
1 parent d961961 commit 2f53c49
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 85 deletions.
4 changes: 3 additions & 1 deletion tello_driver/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ set(DRIVER_NODE_DEPS
class_loader
cv_bridge
geometry_msgs
rclcpp_components
OpenCV
rclcpp
ros2_shared
Expand Down Expand Up @@ -127,7 +128,8 @@ set(JOY_NODE_DEPS
geometry_msgs
rclcpp
sensor_msgs
tello_msgs)
tello_msgs
rclcpp_components)

add_library(tello_joy_node SHARED
${JOY_NODE_SOURCES})
Expand Down
104 changes: 52 additions & 52 deletions tello_driver/h264decoder/h264decoder.cpp
Original file line number Diff line number Diff line change
@@ -1,49 +1,45 @@
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavutil/avutil.h>
#include <libavutil/imgutils.h>
#include <libavutil/mem.h>
#include <libswscale/swscale.h>
}

#ifndef PIX_FMT_RGB24
#define PIX_FMT_RGB24 AV_PIX_FMT_RGB24
#endif

#ifndef CODEC_CAP_TRUNCATED
#define CODEC_CAP_TRUNCATED AV_CODEC_CAP_TRUNCATED
#endif

#ifndef CODEC_FLAG_TRUNCATED
#define CODEC_FLAG_TRUNCATED AV_CODEC_FLAG_TRUNCATED
#endif
#include <iostream>

#include "h264decoder.hpp"
#include <utility>

typedef unsigned char ubyte;

/* For backward compatibility with release 9 or so of libav */
#if (LIBAVCODEC_VERSION_MAJOR <= 54)
#if (LIBAVCODEC_VERSION_MAJOR <= 54)
# define av_frame_alloc avcodec_alloc_frame
# define av_frame_free avcodec_free_frame
# define av_frame_free avcodec_free_frame
#endif


H264Decoder::H264Decoder()
: pkt_{std::make_unique<AVPacket>()}
{
#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(58, 9, 100)
avcodec_register_all();
#endif

codec = avcodec_find_decoder(AV_CODEC_ID_H264);
if (!codec)
throw H264InitFailure("cannot find decoder");

context = avcodec_alloc_context3(codec);
if (!context)
throw H264InitFailure("cannot allocate context");

if(codec->capabilities & CODEC_CAP_TRUNCATED) {
context->flags |= CODEC_FLAG_TRUNCATED;
#if LIBAVCODEC_VERSION_MAJOR < 60
// Note: CODEC_CAP_TRUNCATED was prefixed with AV_...
if(codec->capabilities & AV_CODEC_CAP_TRUNCATED) {
context->flags |= AV_CODEC_FLAG_TRUNCATED;
}
#endif

int err = avcodec_open2(context, codec, nullptr);
if (err < 0)
Expand All @@ -52,17 +48,14 @@ H264Decoder::H264Decoder()
parser = av_parser_init(AV_CODEC_ID_H264);
if (!parser)
throw H264InitFailure("cannot init parser");

frame = av_frame_alloc();
if (!frame)
throw H264InitFailure("cannot allocate frame");

#if 1
pkt = new AVPacket;
if (!pkt)
if (!pkt_)
throw H264InitFailure("cannot allocate packet");
av_init_packet(pkt);
#endif
av_init_packet(pkt_.get());
}


Expand All @@ -72,34 +65,42 @@ H264Decoder::~H264Decoder()
avcodec_close(context);
av_free(context);
av_frame_free(&frame);
#if 1
delete pkt;
#endif
}


ssize_t H264Decoder::parse(const ubyte* in_data, ssize_t in_size)
ParseResult H264Decoder::parse(const ubyte* in_data, ptrdiff_t in_size)
{
auto nread = av_parser_parse2(parser, context, &pkt->data, &pkt->size,
in_data, in_size,
0, 0, AV_NOPTS_VALUE);
return nread;
}


bool H264Decoder::is_frame_available() const
{
return pkt->size > 0;
auto nread = av_parser_parse2(parser, context, &pkt_->data, &pkt_->size,
in_data, in_size,
0, 0, AV_NOPTS_VALUE);
// We might have a frame to decode. But what exactly is a packet?
//Is it guaranteed to contain a complete frame?
if (pkt_->size)
{
const auto frame = decode_frame();
return ParseResult{nread, frame};
}
return ParseResult{nread, nullptr};
}


const AVFrame& H264Decoder::decode_frame()
const AVFrame* H264Decoder::decode_frame()
{
#if (LIBAVCODEC_VERSION_MAJOR > 56)
int ret = avcodec_send_packet(context, pkt_.get());
if (!ret) {
ret = avcodec_receive_frame(context, frame);
if (!ret)
return frame;
}
return nullptr;
#else
int got_picture = 0;
int nread = avcodec_decode_video2(context, frame, &got_picture, pkt);
if (nread < 0 || got_picture == 0)
throw H264DecodeFailure("error decoding frame\n");
return nullptr;
return *frame;
#endif
}


Expand All @@ -123,19 +124,19 @@ const AVFrame& ConverterRGB24::convert(const AVFrame &frame, ubyte* out_rgb)
int w = frame.width;
int h = frame.height;
int pix_fmt = frame.format;

context = sws_getCachedContext(context,
w, h, (AVPixelFormat)pix_fmt,
w, h, AV_PIX_FMT_BGR24, SWS_BILINEAR,
nullptr, nullptr, nullptr);
context = sws_getCachedContext(context,
w, h, (AVPixelFormat)pix_fmt,
w, h, AV_PIX_FMT_RGB24, SWS_BILINEAR,
nullptr, nullptr, nullptr);
if (!context)
throw H264DecodeFailure("cannot allocate context");

// Setup framergb with out_rgb as external buffer. Also say that we want RGB24 output.
avpicture_fill((AVPicture*)framergb, out_rgb, AV_PIX_FMT_BGR24, w, h);
av_image_fill_arrays(framergb->data, framergb->linesize, out_rgb, AV_PIX_FMT_RGB24, w, h, 1);
// Do the conversion.
sws_scale(context, frame.data, frame.linesize, 0, h,
framergb->data, framergb->linesize);
framergb->data, framergb->linesize);
framergb->width = w;
framergb->height = h;
return *framergb;
Expand All @@ -144,15 +145,14 @@ const AVFrame& ConverterRGB24::convert(const AVFrame &frame, ubyte* out_rgb)
/*
Determine required size of framebuffer.
avpicture_get_size is used in http://dranger.com/ffmpeg/tutorial01.html
to do this. However, avpicture_get_size returns the size of a compact
representation, without padding bytes. Since we use avpicture_fill to
avpicture_get_size is used in http://dranger.com/ffmpeg/tutorial01.html
to do this. However, avpicture_get_size returns the size of a compact
representation, without padding bytes. Since we use av_image_fill_arrays to
fill the buffer we should also use it to determine the required size.
*/
int ConverterRGB24::predict_size(int w, int h)
{
// TODO do we need this?
return avpicture_fill((AVPicture*)framergb, nullptr, AV_PIX_FMT_BGR24, w, h);
return av_image_fill_arrays(framergb->data, framergb->linesize, nullptr, AV_PIX_FMT_RGB24, w, h, 1);
}


Expand Down
64 changes: 37 additions & 27 deletions tello_driver/h264decoder/h264decoder.hpp
Original file line number Diff line number Diff line change
@@ -1,24 +1,26 @@
#pragma once
/*
This h264 decoder class is just a thin wrapper around libav
functions to decode h264 videos. It would have been easy to use
libav directly in the python module code but I like to keep these
This h264 decoder class is just a thin wrapper around libav
functions to decode h264 videos. It would have been easy to use
libav directly in the python module code but I like to keep these
things separate.
It is mostly based on roxlu's code. See
http://roxlu.com/2014/039/decoding-h264-and-yuv420p-playback
However, in contrast to roxlu's code the color space conversion is
done by libav functions - so on the CPU, I suppose.
Most functions/members throw exceptions. This way, error states are
conveniently forwarded to python via the exception translation
mechanisms of boost::python.
Most functions/members throw exceptions. This way, error states are
conveniently forwarded to python via the exception translation
mechanisms of boost::python.
*/

// for ssize_t (signed int type as large as pointer type)
#include <cstdlib>
#include <stdexcept>
#include <utility>
#include <memory>

struct AVCodecContext;
struct AVFrame;
Expand All @@ -37,13 +39,20 @@ class H264Exception : public std::runtime_error
class H264InitFailure : public H264Exception
{
public:
H264InitFailure(const char* s) : H264Exception(s) {}
H264InitFailure(const char* s) : H264Exception(s) {}
};

class H264DecodeFailure : public H264Exception
{
public:
H264DecodeFailure(const char* s) : H264Exception(s) {}
H264DecodeFailure(const char* s) : H264Exception(s) {}
};


struct ParseResult
{
ptrdiff_t num_bytes_consumed = 0;
const AVFrame* frame = nullptr;
};


Expand All @@ -52,43 +61,44 @@ class H264Decoder
/* Persistent things here, using RAII for cleanup. */
AVCodecContext *context;
AVFrame *frame;
AVCodec *codec;
const AVCodec *codec;
AVCodecParserContext *parser;
/* In the documentation example on the github master branch, the
packet is put on the heap. This is done here to store the pointers
to the encoded data, which must be kept around between calls to
parse- and decode frame. In release 11 it is put on the stack, too.
/* In the documentation example on the github master branch, the
packet is put on the heap. This is done here to store the pointers
to the encoded data, which must be kept around between calls to
parse- and decode frame. In release 11 it is put on the stack, too.
*/
AVPacket *pkt;
std::unique_ptr<AVPacket> pkt_;

const AVFrame* decode_frame();

public:
H264Decoder();
~H264Decoder();
/* First, parse a continuous data stream, dividing it into
packets. When there is enough data to form a new frame, decode
the data and return the frame. parse returns the number
of consumed bytes of the input stream. It stops consuming
/* First, parse a continuous data stream, dividing it into
packets. When there is enough data to form a new frame, decode
the data and return the frame. parse returns the number
of consumed bytes of the input stream. It stops consuming
bytes at frame boundaries.
*/
ssize_t parse(const unsigned char* in_data, ssize_t in_size);
bool is_frame_available() const;
const AVFrame& decode_frame();
ParseResult parse(const unsigned char* in_data, ptrdiff_t in_size);
};

// TODO: Rename to OutputStage or so?!
class ConverterRGB24
{
SwsContext *context;
AVFrame *framergb;

public:
ConverterRGB24();
~ConverterRGB24();

/* Returns, given a width and height,
/* Returns, given a width and height,
how many bytes the frame buffer is going to need. */
int predict_size(int w, int h);
/* Given a decoded frame, convert it to RGB format and fill
out_rgb with the result. Returns a AVFrame structure holding
/* Given a decoded frame, convert it to RGB format and fill
out_rgb with the result. Returns a AVFrame structure holding
additional information about the RGB frame, such as the number of
bytes in a row and so on. */
const AVFrame& convert(const AVFrame &frame, unsigned char* out_rgb);
Expand Down
2 changes: 1 addition & 1 deletion tello_driver/include/tello_driver_node.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include <asio.hpp>

#include "rclcpp/rclcpp.hpp"
#include "cv_bridge/cv_bridge.h"
#include "cv_bridge/cv_bridge.hpp"
#include "geometry_msgs/msg/twist.hpp"
#include "sensor_msgs/msg/camera_info.hpp"
#include "tello_msgs/msg/flight_data.hpp"
Expand Down
8 changes: 4 additions & 4 deletions tello_driver/src/video_socket.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,12 @@ namespace tello_driver
try {
while (next < seq_buffer_next_) {
// Parse h264
ssize_t consumed = decoder_.parse(seq_buffer_.data() + next, seq_buffer_next_ - next);
auto result = decoder_.parse(seq_buffer_.data() + next, seq_buffer_next_ - next);

// Is a frame available?
if (decoder_.is_frame_available()) {
if (result.frame) {
// Decode the frame
const AVFrame &frame = decoder_.decode_frame();
const AVFrame &frame = *result.frame;

// Convert pixels from YUV420P to BGR24
int size = converter_.predict_size(frame.width, frame.height);
Expand Down Expand Up @@ -113,7 +113,7 @@ namespace tello_driver
}
}

next += consumed;
next += result.num_bytes_consumed;
}
}
catch (std::runtime_error e) {
Expand Down

0 comments on commit 2f53c49

Please sign in to comment.