Use FFMPEG to draw the motion vector MV

This paper will use FFMPEG to extract the motion vector MV of each frame of video, and use OpencV to draw it out.

Motion vector MV

Anyone familiar with video coding is familiar with the motion vector, which is a vector that marks the position relationship between the current block and the reference block during interframe prediction. Interframe prediction includes unidirectional prediction (P frame) and bidirectional prediction (B frame). Unidirectional prediction requires only one MV, and bidirectional prediction requires two MV.

MV is defined in FFMPEG as follows:

typedef struct AVMotionVector {
       /**
        * Where the current macroblock comes from; negative value when it comes
        * from the past, positive value when it comes from the future.
        * XXX: set exact relative ref frame reference instead of a +/- 1 "direction".
        */
       // Indicate whether the reference block is in the previous frame (negative) or the following frame (positive).
       int32_t source;
        /** * Width and height of the block. */
       // The width and height of the block
       uint8_t w, h;
       /** * Absolute source position. Can be outside the frame area. */
       int16_t src_x, src_y;
       /** * Absolute destination position. Can be outside the frame area. */
        int16_t dst_x, dst_y;
        /** * Extra flag information. * Currently unused. */
        uint64_t flags;
        /** * Motion vector * src_x = dst_x + motion_x / motion_scale * src_y = dst_y + motion_y / motion_scale */
       int32_t motion_x, motion_y;
        uint16_t motion_scale;
    } AVMotionVector;
Copy the code

The sample code of FFMPEG provides an example procedure of mv extraction, which can be drawn on the image using OpencV after extracting MV.

extern "C"
{
#include <libavutil/motion_vector.h>
#include <libavformat/avformat.h>
}
#include <opencv.hpp>
using namespace cv;
​
static AVFormatContext *fmt_ctx = NULL;
static AVCodecContext *video_dec_ctx = NULL;
static AVStream *video_stream = NULL;
static const char *src_filename = NULL;
​
static int video_stream_idx = - 1;
static AVFrame *frame = NULL;
static int video_frame_count = 0;
​
FILE *fout;
VideoWriter out;
​
static int decode_packet(const AVPacket *pkt)
{
    int ret = avcodec_send_packet(video_dec_ctx, pkt);
    if (ret < 0) {
        printf("Error while sending a packet to the decoder: %s\n");
        return ret;
    }
​
    while (ret >= 0)  {
        ret = avcodec_receive_frame(video_dec_ctx, frame);
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
            break;
        }
        else if (ret < 0) {
            printf("Error while receiving a frame from the decoder: %s\n");
            return ret;
        }
​
        if (ret >= 0) {
            int i;
            AVFrameSideData *sd;
​
            video_frame_count++;
            sd = av_frame_get_side_data(frame, AV_FRAME_DATA_MOTION_VECTORS);
            // Get data for each frame
            cv::Mat yuvImg;
            yuvImg.create(frame->height * 3 / 2, frame->width, CV_8UC1);
            memcpy(yuvImg.data, frame->data[0], frame->linesize[0] * frame->height*sizeof(uint8_t));
            memcpy(yuvImg.data + frame->linesize[0] * frame->height*sizeof(uint8_t), frame->data[1], frame->linesize[1] * frame->height/2*sizeof(uint8_t));
            memcpy(yuvImg.data + (frame->linesize[0] * frame->height + frame->linesize[1] * frame->height / 2) *sizeof(uint8_t), frame->data[2], frame->linesize[2] * frame->height / 2 * sizeof(uint8_t));
            cv::Mat rgbImg;
            cv::cvtColor(yuvImg, rgbImg, CV_YUV2BGR_I420);
            if (sd) {
                const AVMotionVector *mvs = (const AVMotionVector *)sd->data;
                for (i = 0; i < sd->size / sizeof(*mvs); i++) {
                    const AVMotionVector *mv = &mvs[i];
                    / / draw the mv
                    line(rgbImg, Point(mv->src_x, mv->src_y), Point(mv->dst_x, mv->dst_y), Scalar(0.0.255)); }}// Write the mv frame to the file
            out << rgbImg;
            av_frame_unref(frame); }}return 0;
}
​
static int open_codec_context(AVFormatContext *fmt_ctx, enum AVMediaType type)
{
    int ret;
    AVStream *st;
    AVCodecContext *dec_ctx = NULL;
    AVCodec *dec = NULL;
    AVDictionary *opts = NULL;
​
    ret = av_find_best_stream(fmt_ctx, type, - 1.- 1, &dec, 0);
    if (ret < 0) {
        fprintf(stderr, "Could not find %s stream in input file '%s'\n".av_get_media_type_string(type), src_filename);
        return ret;
    }
    else {
        int stream_idx = ret;
        st = fmt_ctx->streams[stream_idx];
​
        dec_ctx = avcodec_alloc_context3(dec);
        if(! dec_ctx) {fprintf(stderr, "Failed to allocate codec\n");
            return AVERROR(EINVAL);
        }
​
        ret = avcodec_parameters_to_context(dec_ctx, st->codecpar);
        if (ret < 0) {
            fprintf(stderr, "Failed to copy codec parameters to codec context\n");
            return ret;
        }
​
        /* Init the video decoder */
        av_dict_set(&opts, "flags2"."+export_mvs".0);
        if ((ret = avcodec_open2(dec_ctx, dec, &opts)) < 0) {
            fprintf(stderr, "Failed to open %s codec\n".av_get_media_type_string(type));
            return ret;
        }
​
        video_stream_idx = stream_idx;
        video_stream = fmt_ctx->streams[video_stream_idx];
        video_dec_ctx = dec_ctx;
    }
​
    return 0;
}
​
int main(int argc, char **argv)
{
    fout = fopen("out.yuv"."wb");
    //out.open("out.avi", CV_FOURCC('X', 'V', 'I', 'D'),25, Size(640, 272));
    out.open("out.mp4".CV_FOURCC('D'.'I'.'V'.'X'), 25.Size(640.272));
    int ret = 0;
    AVPacket pkt = { 0 };
​
    if(argc ! =2) {
        fprintf(stderr, "Usage: %s <video>\n", argv[0]);
        exit(1);
    }
    src_filename = argv[1];
​
    if (avformat_open_input(&fmt_ctx, src_filename, NULL.NULL) < 0) {
        fprintf(stderr, "Could not open source file %s\n", src_filename);
        exit(1);
    }
​
    if (avformat_find_stream_info(fmt_ctx, NULL) < 0) {
        fprintf(stderr, "Could not find stream information\n");
        exit(1);
    }
​
    open_codec_context(fmt_ctx, AVMEDIA_TYPE_VIDEO);
​
    av_dump_format(fmt_ctx, 0, src_filename, 0);
​
    if(! video_stream) {fprintf(stderr, "Could not find video stream in the input, aborting\n");
        ret = 1;
        goto end;
    }
​
    frame = av_frame_alloc(a);if(! frame) {fprintf(stderr, "Could not allocate frame\n");
        ret = AVERROR(ENOMEM);
        goto end;
    }
​
    printf("framenum,source,blockw,blockh,srcx,srcy,dstx,dsty,flags\n");
​
    /* read frames from the file */
    while (av_read_frame(fmt_ctx, &pkt) >= 0) {
        if (pkt.stream_index == video_stream_idx)
            ret = decode_packet(&pkt);
        av_packet_unref(&pkt);
        if (ret < 0)
            break;
    }
​
    /* flush cached frames */
    decode_packet(NULL);
​
end:
    avcodec_free_context(&video_dec_ctx);
    avformat_close_input(&fmt_ctx);
    av_frame_free(&frame);
    fclose(fout);
    system("pause");
    return ret < 0;
}
Copy the code

The problem

There are several problems with mv extracted from FFMPEG:

  • The position of the macroblock in the image is not given
  • The two MVS are not specifically pointed out for bidirectional prediction
  • The specific reference image is not indicated

If you are interested, please pay attention to wechat public account Video Coding