[FFmpeg-devel] [PATCH] libi264: Add Hardware Accelerated H.264 Encoder based on libVA

hamza at mayartech.com hamza at mayartech.com
Thu Dec 31 18:35:47 CET 2015


From: Bryan Christ <bryan.christ at mediafire.com>

This commit adds a hardware accelerated H.264 encoder which utilizes
libVA (open source implementation of VA-API). Information about libva
is available at: https://en.wikipedia.org/wiki/Video_Acceleration_API
This encoder is only availbale on linux and supported hardware which
can be viewed at:
https://en.wikipedia.org/wiki/Video_Acceleration_API#Supported_hardware_and_drivers

The short name for encoder is "libi264". The encoder must be enablde at
configure time using the --enable-libi264 switch. By default it is
turned off.
---
 Changelog                           |    1 +
 MAINTAINERS                         |    1 +
 configure                           |    8 +-
 doc/general.texi                    |   11 +
 libavcodec/Makefile                 |    1 +
 libavcodec/allcodecs.c              |    1 +
 libavcodec/libi264.c                | 1476 +++++++++++++++++++++++++++++++++++
 libavcodec/libi264.h                |  107 +++
 libavcodec/libi264_param_set.c      |  425 ++++++++++
 libavcodec/libi264_param_set.h      |   81 ++
 libavcodec/libi264_va_display.c     |  104 +++
 libavcodec/libi264_va_display.h     |   77 ++
 libavcodec/libi264_va_display_drm.c |   96 +++
 libavcodec/libi264_va_display_x11.c |  171 ++++
 libavcodec/version.h                |    2 +-
 15 files changed, 2560 insertions(+), 2 deletions(-)
 create mode 100644 libavcodec/libi264.c
 create mode 100644 libavcodec/libi264.h
 create mode 100644 libavcodec/libi264_param_set.c
 create mode 100644 libavcodec/libi264_param_set.h
 create mode 100644 libavcodec/libi264_va_display.c
 create mode 100644 libavcodec/libi264_va_display.h
 create mode 100644 libavcodec/libi264_va_display_drm.c
 create mode 100644 libavcodec/libi264_va_display_x11.c

diff --git a/Changelog b/Changelog
index d9c2ea8..99acb56 100644
--- a/Changelog
+++ b/Changelog
@@ -49,6 +49,7 @@ version <next>:
 - VAAPI VP9 hwaccel
 - audio high-order multiband parametric equalizer
 - automatic bitstream filtering
+- H.264 hwaccelerated encoding through libVA
 
 
 version 2.8:
diff --git a/MAINTAINERS b/MAINTAINERS
index 9add13d..e37cb6f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -203,6 +203,7 @@ Codecs:
   libcelt_dec.c                         Nicolas George
   libdirac*                             David Conrad
   libgsm.c                              Michel Bardiaux
+  libi264*                              Bryan Christ
   libkvazaar.c                          Arttu Ylä-Outinen
   libopenjpeg.c                         Jaikrishnan Menon
   libopenjpegenc.c                      Michael Bradshaw
diff --git a/configure b/configure
index da74ccd..335c172 100755
--- a/configure
+++ b/configure
@@ -265,6 +265,7 @@ External library support:
   --enable-libwavpack      enable wavpack encoding via libwavpack [no]
   --enable-libwebp         enable WebP encoding via libwebp [no]
   --enable-libx264         enable H.264 encoding via x264 [no]
+  --enable-libi264         enable H.264 encoding via Intel's libva [no]
   --enable-libx265         enable HEVC encoding via x265 [no]
   --enable-libxavs         enable AVS encoding via xavs [no]
   --enable-libxcb          enable X11 grabbing using XCB [autodetect]
@@ -1484,6 +1485,9 @@ EXTERNAL_LIBRARY_LIST="
     libtwolame
     libutvideo
     libv4l2
+    libva
+    libva-drm
+    libva-x11
     libvidstab
     libvo_aacenc
     libvo_amrwbenc
@@ -1491,6 +1495,7 @@ EXTERNAL_LIBRARY_LIST="
     libvpx
     libwavpack
     libwebp
+    libX11
     libx264
     libx265
     libxavs
@@ -2658,7 +2663,7 @@ libwebp_anim_encoder_deps="libwebp"
 libx262_encoder_deps="libx262"
 libx264_encoder_deps="libx264"
 libx264rgb_encoder_deps="libx264"
-libx264rgb_encoder_select="libx264_encoder"
+libi264_encoder_deps="libi264"
 libx265_encoder_deps="libx265"
 libxavs_encoder_deps="libxavs"
 libxvid_encoder_deps="libxvid"
@@ -5528,6 +5533,7 @@ enabled libx264           && { use_pkg_config x264 "stdint.h x264.h" x264_encode
                                die "ERROR: libx264 must be installed and version must be >= 0.118."; } &&
                              { check_cpp_condition x264.h "X264_MPEG2" &&
                                enable libx262; }
+enabled libi264           && require libva va/va.h vaInitialize -lva -lX11 -lva-x11 -lva-drm
 enabled libx265           && require_pkg_config x265 x265.h x265_api_get &&
                              { check_cpp_condition x265.h "X265_BUILD >= 57" ||
                                die "ERROR: libx265 version must be >= 57."; }
diff --git a/doc/general.texi b/doc/general.texi
index 06933ab..bca7ca0 100644
--- a/doc/general.texi
+++ b/doc/general.texi
@@ -131,6 +131,17 @@ x264 is under the GNU Public License Version 2 or later
 details), you must upgrade FFmpeg's license to GPL in order to use it.
 @end float
 
+ at section libva
+
+FFmpeg can make use of the libva library for H.264 encoding. libva is an 
+implementation of VA-API for Linux. libva can only be used for H.264 encoding
+on unix based systems and Intel GPUs which have support for hardware accelerated
+H.264 encoding.
+
+Go to @url{http://www.freedesktop.org/wiki/Software/vaapi/} and follow the
+instructions for installing the library. Then pass @code{--enable-libi264} to
+configure to enable it.
+
 @section x265
 
 FFmpeg can make use of the x265 library for HEVC encoding.
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 0717d0a..d4a8200 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -847,6 +847,7 @@ OBJS-$(CONFIG_LIBWEBP_ENCODER)            += libwebpenc_common.o libwebpenc.o
 OBJS-$(CONFIG_LIBWEBP_ANIM_ENCODER)       += libwebpenc_common.o libwebpenc_animencoder.o
 OBJS-$(CONFIG_LIBX262_ENCODER)            += libx264.o
 OBJS-$(CONFIG_LIBX264_ENCODER)            += libx264.o
+OBJS-$(CONFIG_LIBI264_ENCODER)            += libi264.o libi264_va_display_drm.o libi264_va_display.o libi264_va_display_x11.o libi264_param_set.o
 OBJS-$(CONFIG_LIBX265_ENCODER)            += libx265.o
 OBJS-$(CONFIG_LIBXAVS_ENCODER)            += libxavs.o
 OBJS-$(CONFIG_LIBXVID_ENCODER)            += libxvid.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 4eeb6f3..3ad90b6 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -592,6 +592,7 @@ void avcodec_register_all(void)
     REGISTER_ENCODER(LIBXVID,           libxvid);
     REGISTER_DECODER(LIBZVBI_TELETEXT,  libzvbi_teletext);
     REGISTER_ENCODER(LIBAACPLUS,        libaacplus);
+    REGISTER_ENCODER(LIBI264,           libi264);
 
     /* text */
     REGISTER_DECODER(BINTEXT,           bintext);
diff --git a/libavcodec/libi264.c b/libavcodec/libi264.c
new file mode 100644
index 0000000..272954c
--- /dev/null
+++ b/libavcodec/libi264.c
@@ -0,0 +1,1476 @@
+/*
+ * Interface for libva H.264 encoding using libva library meant for hardware
+ * encoding on intel processors
+ * Copyright (C) 2015 Bryan Christ
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <va/va.h>
+
+#include "libi264.h"
+#include "libi264_param_set.h"
+#include "libi264_va_display.h"
+#include "avcodec.h"
+#include "libavutil/internal.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/opt.h"
+#include "libavcodec/internal.h"
+
+
+#define FRAME_P 0
+#define FRAME_B 1
+#define FRAME_I 2
+#define FRAME_IDR 7
+
+
+static  unsigned int max_frame_num = (2<<16);
+static  unsigned int max_pic_order_cnt_lsb = (2<<8);
+static  unsigned int log2_max_frame_num = 16;
+static  unsigned int log2_max_pic_order_cnt_lsb = 8;
+
+
+#define CHECK_VASTATUS(avctx, va_status,func)                                         \
+    if (va_status != VA_STATUS_SUCCESS) {                                             \
+        av_log(avctx, AV_LOG_ERROR, "%s:%s (%d) failed\n", __func__, func, __LINE__); \
+        return -1;                                                                    \
+    }
+
+#define current_slot(ictx) (ictx->current_frame_display % SURFACE_NUM)
+
+static int string_to_rc(char *str)
+{
+    int rc_mode;
+
+    if (!strncmp(str, "NONE", 4))
+        rc_mode = VA_RC_NONE;
+    else if (!strncmp(str, "CBR", 3))
+        rc_mode = VA_RC_CBR;
+    else if (!strncmp(str, "VBR", 3))
+        rc_mode = VA_RC_VBR;
+    else if (!strncmp(str, "VCM", 3))
+        rc_mode = VA_RC_VCM;
+    else if (!strncmp(str, "CQP", 3))
+        rc_mode = VA_RC_CQP;
+    else if (!strncmp(str, "VBR_CONSTRAINED", 15))
+        rc_mode = VA_RC_VBR_CONSTRAINED;
+    else {
+        rc_mode = VA_RC_VBR;
+    }
+    return rc_mode;
+}
+
+static void i264_param_default(AVCodecContext *avctx, I264Context *ctx)
+{
+
+    if(avctx->gop_size > 0)
+        ctx->intra_idr_period = avctx->gop_size;
+    else
+        ctx->intra_idr_period = 250;
+
+    if(ctx->intra_period <= 0 ||
+       ctx->intra_period > ctx->intra_idr_period ||
+       ctx->intra_idr_period % ctx->intra_period != 0)
+        ctx->intra_period = ctx->intra_idr_period;
+
+    if(ctx->profile) {
+        if (strncmp(ctx->profile, "BP", 2) == 0)
+            ctx->h264_profile = VAProfileH264Baseline;
+        else if (strncmp(ctx->profile, "MP", 2) == 0)
+            ctx->h264_profile = VAProfileH264Main;
+        else if (strncmp(ctx->profile, "HP", 2) == 0)
+            ctx->h264_profile = VAProfileH264High;
+        else
+            ctx->h264_profile = VAProfileH264High;
+    }
+    else
+        ctx->h264_profile = VAProfileH264High;
+
+    if(ctx->rc_mode_str)
+        ctx->rc_mode = string_to_rc(ctx->rc_mode_str);
+    else
+        ctx->rc_mode = VA_RC_VBR;
+
+    if(avctx->bit_rate > 0) {
+        ctx->rc_mode = VA_RC_CBR;
+        ctx->frame_bitrate = avctx->bit_rate;
+    }
+    else
+    {
+        ctx->frame_bitrate = 0;
+    }
+
+    if(avctx->coder_type >= 0)
+        ctx->h264_entropy_mode = avctx->coder_type == FF_CODER_TYPE_AC;
+    else
+        ctx->h264_entropy_mode = 1;
+
+    if(avctx->max_b_frames >= 0)
+        ctx->ip_period = 1 + avctx->max_b_frames;
+    else
+        ctx->ip_period = 1;
+
+    if(ctx->ip_period >= ctx->intra_period)
+        ctx->ip_period = ctx->intra_period - 1;
+
+    ctx->constraint_set_flag = 0;
+    ctx->config_attrib_num = 0;
+    ctx->h264_packedheader = 0;
+    ctx->h264_maxref = (1<<16|1);
+    ctx->num_ref_frames = 2;
+
+    if(avctx->qmin >= 0)
+        ctx->initial_qp = avctx->qmin;
+    else
+        ctx->initial_qp = 26;
+
+    ctx->minimal_qp = 0;
+
+    ctx->current_frame_type = FRAME_IDR;
+    ctx->current_frame_display = 0;
+    ctx->current_frame_num = 0;
+    ctx->current_frame_encoding = 0;
+
+    ctx->nb_surfaces_loaded = 0;
+    ctx->last_p = 0;
+}
+
+static int
+build_packed_pic_buffer(I264Context *ictx, unsigned char **header_buffer)
+{
+    I264Bitstream bs;
+
+    ff_i264_bitstream_start(&bs);
+    ff_i264_nal_start_code_prefix(&bs);
+    ff_i264_nal_header(&bs, NAL_REF_IDC_HIGH, NAL_PPS);
+    ff_i264_pps_rbsp(&ictx->pic_param, &bs);
+    ff_i264_bitstream_end(&bs);
+
+    *header_buffer = (unsigned char *)bs.buffer;
+    return bs.bit_offset;
+}
+
+static int
+build_packed_seq_buffer(I264Context *ictx, unsigned char **header_buffer)
+{
+    I264Bitstream bs;
+
+    ff_i264_bitstream_start(&bs);
+    ff_i264_nal_start_code_prefix(&bs);
+    ff_i264_nal_header(&bs, NAL_REF_IDC_HIGH, NAL_SPS);
+    ff_i264_sps_rbsp(ictx, &ictx->seq_param, &bs);
+    ff_i264_bitstream_end(&bs);
+
+    *header_buffer = (unsigned char *)bs.buffer;
+    return bs.bit_offset;
+}
+
+static int
+build_packed_slice_buffer(I264Context *ictx, unsigned char **header_buffer)
+{
+    I264Bitstream bs;
+    int is_idr = !!ictx->pic_param.pic_fields.bits.idr_pic_flag;
+    int is_ref = !!ictx->pic_param.pic_fields.bits.reference_pic_flag;
+
+    ff_i264_bitstream_start(&bs);
+    ff_i264_nal_start_code_prefix(&bs);
+
+    if (IS_I_SLICE(ictx->slice_param.slice_type)) {
+        ff_i264_nal_header(&bs, NAL_REF_IDC_HIGH, is_idr ? NAL_IDR : NAL_NON_IDR);
+    } else if (IS_P_SLICE(ictx->slice_param.slice_type)) {
+        ff_i264_nal_header(&bs, NAL_REF_IDC_MEDIUM, NAL_NON_IDR);
+    } else if (IS_B_SLICE(ictx->slice_param.slice_type)) {
+        ff_i264_nal_header(&bs, is_ref ? NAL_REF_IDC_LOW : NAL_REF_IDC_NONE, NAL_NON_IDR);
+    } else {
+        return -1;
+    }
+
+    ff_i264_slice_header(ictx, &bs);
+    ff_i264_bitstream_end(&bs);
+
+    *header_buffer = (unsigned char *)bs.buffer;
+    return bs.bit_offset;
+}
+
+static int init_sps(AVCodecContext *avctx, I264Context *ctx, VAEncSequenceParameterBufferH264 *seq_param)
+{
+    seq_param->level_idc = 41 /*SH_LEVEL_3*/;
+    seq_param->picture_width_in_mbs = ctx->frame_width_mbaligned / 16;
+    seq_param->picture_height_in_mbs = ctx->frame_height_mbaligned / 16;
+    seq_param->bits_per_second = ctx->frame_bitrate;
+
+    seq_param->intra_period = ctx->intra_period;
+    seq_param->intra_idr_period = ctx->intra_idr_period;
+    seq_param->ip_period = ctx->ip_period;
+
+    seq_param->max_num_ref_frames = ctx->num_ref_frames;
+    seq_param->seq_fields.bits.frame_mbs_only_flag = 1;
+    seq_param->time_scale = 900;
+    seq_param->num_units_in_tick = 15; /* Tc = num_units_in_tick / time_sacle */
+    seq_param->seq_fields.bits.log2_max_pic_order_cnt_lsb_minus4 = log2_max_pic_order_cnt_lsb - 4;
+    seq_param->seq_fields.bits.log2_max_frame_num_minus4 = log2_max_frame_num - 4;
+    seq_param->seq_fields.bits.frame_mbs_only_flag = 1;
+    seq_param->seq_fields.bits.chroma_format_idc = 1;
+    seq_param->seq_fields.bits.direct_8x8_inference_flag = 1;
+
+    if (avctx->width != ctx->frame_width_mbaligned ||
+            avctx->height != ctx->frame_height_mbaligned) {
+        seq_param->frame_cropping_flag = 1;
+        seq_param->frame_crop_left_offset = 0;
+        seq_param->frame_crop_right_offset = (ctx->frame_width_mbaligned - avctx->width)/2;
+        seq_param->frame_crop_top_offset = 0;
+        seq_param->frame_crop_bottom_offset = (ctx->frame_height_mbaligned - avctx->height)/2;
+    }
+
+    return 0;
+}
+
+static int calc_poc(I264Context *ictx, int pic_order_cnt_lsb)
+{
+    static int pic_order_cnt_msb_ref = 0, pic_order_cnt_lsb_ref = 0;
+    int prev_pic_order_cnt_msb, prev_pic_order_cnt_lsb;
+    int pic_order_cnt_msb, top_field_order_cnt;
+
+    if (ictx->current_frame_type == FRAME_IDR)
+        prev_pic_order_cnt_msb = prev_pic_order_cnt_lsb = 0;
+    else {
+        prev_pic_order_cnt_msb = pic_order_cnt_msb_ref;
+        prev_pic_order_cnt_lsb = pic_order_cnt_lsb_ref;
+    }
+
+    if ((pic_order_cnt_lsb < prev_pic_order_cnt_lsb) &&
+        ((prev_pic_order_cnt_lsb - pic_order_cnt_lsb) >= (int)(max_pic_order_cnt_lsb / 2)))
+        pic_order_cnt_msb = prev_pic_order_cnt_msb + max_pic_order_cnt_lsb;
+    else if ((pic_order_cnt_lsb > prev_pic_order_cnt_lsb) &&
+             ((pic_order_cnt_lsb - prev_pic_order_cnt_lsb) > (int)(max_pic_order_cnt_lsb / 2)))
+        pic_order_cnt_msb = prev_pic_order_cnt_msb - max_pic_order_cnt_lsb;
+    else
+        pic_order_cnt_msb = prev_pic_order_cnt_msb;
+
+    top_field_order_cnt = pic_order_cnt_msb + pic_order_cnt_lsb;
+
+    if (ictx->current_frame_type != FRAME_B) {
+        pic_order_cnt_msb_ref = pic_order_cnt_msb;
+        pic_order_cnt_lsb_ref = pic_order_cnt_lsb;
+    }
+
+    return top_field_order_cnt;
+}
+
+#define partition(ref, field, key, ascending)   \
+    while (i <= j) {                            \
+        if (ascending) {                        \
+            while (ref[i].field < key)          \
+                i++;                            \
+            while (ref[j].field > key)          \
+                j--;                            \
+        } else {                                \
+            while (ref[i].field > key)          \
+                i++;                            \
+            while (ref[j].field < key)          \
+                j--;                            \
+        }                                       \
+        if (i <= j) {                           \
+            tmp = ref[i];                       \
+            ref[i] = ref[j];                    \
+            ref[j] = tmp;                       \
+            i++;                                \
+            j--;                                \
+        }                                       \
+    }                                           \
+
+static void sort_one(VAPictureH264 ref[], int left, int right,
+                     int ascending, int frame_idx)
+{
+    int i = left, j = right;
+    unsigned int key;
+    VAPictureH264 tmp;
+
+    if (frame_idx) {
+        key = ref[(left + right) / 2].frame_idx;
+        partition(ref, frame_idx, key, ascending);
+    } else {
+        key = ref[(left + right) / 2].TopFieldOrderCnt;
+        partition(ref, TopFieldOrderCnt, (signed int)key, ascending);
+    }
+
+    /* recursion */
+    if (left < j)
+        sort_one(ref, left, j, ascending, frame_idx);
+
+    if (i < right)
+        sort_one(ref, i, right, ascending, frame_idx);
+}
+
+static void sort_two(VAPictureH264 ref[], int left, int right, unsigned int key, unsigned int frame_idx,
+                     int partition_ascending, int list0_ascending, int list1_ascending)
+{
+    int i = left, j = right;
+    VAPictureH264 tmp;
+
+    if (frame_idx) {
+        partition(ref, frame_idx, key, partition_ascending);
+    } else {
+        partition(ref, TopFieldOrderCnt, (signed int)key, partition_ascending);
+    }
+
+    sort_one(ref, left, i-1, list0_ascending, frame_idx);
+    sort_one(ref, j+1, right, list1_ascending, frame_idx);
+}
+
+static int update_ref_pic_list(I264Context *ictx)
+{
+    unsigned int current_poc = ictx->current_curr_pic.TopFieldOrderCnt;
+
+    if (ictx->current_frame_type == FRAME_P) {
+        memcpy(ictx->ref_pic_list0_P, ictx->reference_frames, ictx->num_short_term * sizeof(VAPictureH264));
+        sort_one(ictx->ref_pic_list0_P, 0, ictx->num_short_term-1, 0, 1);
+    }
+
+    if (ictx->current_frame_type == FRAME_B) {
+        memcpy(ictx->ref_pic_list0_B, ictx->reference_frames, ictx->num_short_term * sizeof(VAPictureH264));
+        sort_two(ictx->ref_pic_list0_B, 0, ictx->num_short_term-1, current_poc, 0,
+                 1, 0, 1);
+
+        memcpy(ictx->ref_pic_list1_B, ictx->reference_frames,ictx-> num_short_term * sizeof(VAPictureH264));
+        sort_two(ictx->ref_pic_list1_B, 0, ictx->num_short_term-1, current_poc, 0,
+                 0, 1, 0);
+    }
+
+    return 0;
+}
+
+static void init_sei(I264Context *ictx)
+{
+    int init_cpb_size;
+    int target_bit_rate;
+
+    /* it comes for the bps defined in SPS */
+    target_bit_rate = ictx->seq_param.bits_per_second;
+    init_cpb_size = (target_bit_rate * 8) >> 10;
+    ictx->initial_cpb_removal_delay = init_cpb_size * 0.5 * 1024 / target_bit_rate * 90000;
+
+    ictx->cpb_removal_delay = 2;
+    ictx->initial_cpb_removal_delay_length = 24;
+    ictx->cpb_removal_delay_length = 24;
+    ictx->dpb_output_delay_length = 24;
+}
+
+static int init_pps(I264Context *ictx)
+{
+    int i = 0;
+
+    ictx->pic_param.CurrPic.picture_id = ictx->ref_surface[current_slot(ictx)];
+    ictx->pic_param.CurrPic.frame_idx = ictx->current_frame_num;
+    ictx->pic_param.CurrPic.flags = 0;
+    ictx->pic_param.CurrPic.TopFieldOrderCnt = calc_poc(ictx, (ictx->current_frame_display - ictx->current_IDR_display) % max_pic_order_cnt_lsb);
+    ictx->pic_param.CurrPic.BottomFieldOrderCnt = ictx->pic_param.CurrPic.TopFieldOrderCnt;
+    ictx->current_curr_pic = ictx->pic_param.CurrPic;
+
+    if (getenv("TO_DEL")) { /* set RefPicList into ReferenceFrames */
+        update_ref_pic_list(ictx); /* calc RefPicList */
+        memset(ictx->pic_param.ReferenceFrames, 0xff, 16 * sizeof(VAPictureH264)); /* invalid all */
+        if (ictx->current_frame_type == FRAME_P) {
+            ictx->pic_param.ReferenceFrames[0] = ictx->ref_pic_list0_P[0];
+        } else if (ictx->current_frame_type == FRAME_B) {
+            ictx->pic_param.ReferenceFrames[0] = ictx->ref_pic_list0_B[0];
+            ictx->pic_param.ReferenceFrames[1] = ictx->ref_pic_list1_B[0];
+        }
+    } else {
+        memcpy(ictx->pic_param.ReferenceFrames, ictx->reference_frames, ictx->num_short_term*sizeof(VAPictureH264));
+        for (i = ictx->num_short_term; i < SURFACE_NUM; i++) {
+            ictx->pic_param.ReferenceFrames[i].picture_id = VA_INVALID_SURFACE;
+            ictx->pic_param.ReferenceFrames[i].flags = VA_PICTURE_H264_INVALID;
+        }
+    }
+
+    ictx->pic_param.pic_fields.bits.idr_pic_flag = (ictx->current_frame_type == FRAME_IDR);
+    ictx->pic_param.pic_fields.bits.reference_pic_flag = (ictx->current_frame_type != FRAME_B);
+    ictx->pic_param.pic_fields.bits.entropy_coding_mode_flag = ictx->h264_entropy_mode;
+    ictx->pic_param.pic_fields.bits.deblocking_filter_control_present_flag = 0;
+    ictx->pic_param.frame_num = ictx->current_frame_num;
+    ictx->pic_param.coded_buf = ictx->coded_buf[current_slot(ictx)];
+    ictx->pic_param.last_picture = 0;
+    ictx->pic_param.pic_init_qp = ictx->initial_qp;
+
+    return 0;
+}
+
+static int render_sequence(AVCodecContext *avctx, I264Context *ictx)
+{
+    VABufferID seq_param_buf, hrd_buf_id, rc_param_buf, render_id[3];
+    VAStatus va_status;
+    VAEncMiscParameterRateControl *misc_rate_ctrl;
+    VAEncMiscParameterHRD *misc_hrd_param;
+    VAEncMiscParameterBuffer *misc_param, *misc_param_tmp;
+
+    va_status = vaCreateBuffer(ictx->va_dpy, ictx->context_id,
+                               VAEncSequenceParameterBufferType,
+                               sizeof(ictx->seq_param),1,&ictx->seq_param,&seq_param_buf);
+
+    CHECK_VASTATUS(avctx, va_status,"vaCreateBuffer");
+/*
+    va_status = vaCreateBuffer(ictx->va_dpy, ictx->context_id,
+                               VAEncMiscParameterBufferType,
+                               sizeof(VAEncMiscParameterBuffer) + sizeof(VAEncMiscParameterRateControl),
+                               1,NULL, &rc_param_buf);
+    CHECK_VASTATUS(avctx, va_status,"vaCreateBuffer");
+
+    vaMapBuffer(ictx->va_dpy, rc_param_buf, (void **) &misc_param);
+    misc_param->type = VAEncMiscParameterTypeRateControl;
+    misc_rate_ctrl = (VAEncMiscParameterRateControl *)misc_param->data;
+    memset(misc_rate_ctrl, 0, sizeof(*misc_rate_ctrl));
+    misc_rate_ctrl->bits_per_second = ictx->frame_bitrate;
+    misc_rate_ctrl->target_percentage = 90;
+    misc_rate_ctrl->window_size = 1000;
+    misc_rate_ctrl->initial_qp = ictx->initial_qp;
+    misc_rate_ctrl->min_qp = ictx->minimal_qp;
+    misc_rate_ctrl->basic_unit_size = 0;
+    vaUnmapBuffer(ictx->va_dpy, rc_param_buf);
+
+    //render_id[0] = seq_param_buf;
+    //render_id[1] = rc_param_buf;
+*/
+    /* hrd parameter */
+    vaCreateBuffer(ictx->va_dpy, ictx->context_id,
+                   VAEncMiscParameterBufferType,
+                   sizeof(VAEncMiscParameterBuffer) + sizeof(VAEncMiscParameterRateControl),
+                   1, NULL, &hrd_buf_id);
+    CHECK_VASTATUS(avctx, va_status, "vaCreateBuffer");
+
+    vaMapBuffer(ictx->va_dpy, hrd_buf_id, (void **)&misc_param);
+    misc_param->type = VAEncMiscParameterTypeHRD;
+    misc_hrd_param = (VAEncMiscParameterHRD *)misc_param->data;
+
+    if (ictx->frame_bitrate > 0) {
+        misc_hrd_param->initial_buffer_fullness = ictx->frame_bitrate * 4;
+        misc_hrd_param->buffer_size = ictx->frame_bitrate * 8;
+    } else {
+        misc_hrd_param->initial_buffer_fullness = 0;
+        misc_hrd_param->buffer_size = 0;
+    }
+
+    render_id[0] = seq_param_buf;
+    //render_id[1] = rc_param_buf;
+    render_id[1] = hrd_buf_id;
+
+    va_status = vaRenderPicture(ictx->va_dpy, ictx->context_id, &render_id[0], 2);
+
+    CHECK_VASTATUS(avctx, va_status,"vaRenderPicture");
+
+    return 0;
+}
+
+static int render_picture(AVCodecContext *avctx, I264Context *ictx)
+{
+    VABufferID pic_param_buf;
+    VAStatus va_status;
+
+    init_pps(ictx);
+
+    va_status = vaCreateBuffer(ictx->va_dpy, ictx->context_id,VAEncPictureParameterBufferType,
+                               sizeof(ictx->pic_param),1,&ictx->pic_param, &pic_param_buf);
+    CHECK_VASTATUS(avctx, va_status,"vaCreateBuffer");
+
+    va_status = vaRenderPicture(ictx->va_dpy, ictx->context_id, &pic_param_buf, 1);
+    CHECK_VASTATUS(avctx, va_status,"vaRenderPicture");
+
+    return 0;
+}
+
+static int init_va(AVCodecContext *avctx, I264Context *ictx)
+{
+    VAProfile profile_list[]={VAProfileH264High,VAProfileH264Main,VAProfileH264Baseline,VAProfileH264ConstrainedBaseline};
+    VAEntrypoint *entrypoints;
+    int num_entrypoints, slice_entrypoint;
+    int support_encode = 0;
+    int major_ver, minor_ver;
+    VAStatus va_status;
+    unsigned int i;
+
+    ictx->va_dpy = ff_va_open_display();
+    va_status = vaInitialize(ictx->va_dpy, &major_ver, &minor_ver);
+    CHECK_VASTATUS(avctx, va_status, "vaInitialize");
+
+    num_entrypoints = vaMaxNumEntrypoints(ictx->va_dpy);
+    entrypoints = malloc(num_entrypoints * sizeof(*entrypoints));
+    if (!entrypoints)
+    {
+        av_log(avctx, AV_LOG_ERROR, "error: failed to initialize VA entrypoints array\n");
+        return -1;
+    }
+
+    /* use the highest profile */
+    for (i = 0; i < sizeof(profile_list)/sizeof(profile_list[0]); i++) {
+        if (ictx->h264_profile != profile_list[i])
+            continue;
+
+        ictx->h264_profile = profile_list[i];
+        vaQueryConfigEntrypoints(ictx->va_dpy, ictx->h264_profile, entrypoints, &num_entrypoints);
+        for (slice_entrypoint = 0; slice_entrypoint < num_entrypoints; slice_entrypoint++) {
+            if (entrypoints[slice_entrypoint] == VAEntrypointEncSlice) {
+                support_encode = 1;
+                break;
+            }
+        }
+        if (support_encode == 1)
+            break;
+    }
+
+    if (support_encode == 0) {
+        av_log(avctx, AV_LOG_ERROR, "Can't find VAEntrypointEncSlice for H264 profiles\n");
+        return -1;
+    } else {
+        switch (ictx->h264_profile) {
+            case VAProfileH264Baseline:
+                av_log(avctx, AV_LOG_INFO, "Use profile VAProfileH264Baseline\n");
+                ictx->ip_period = 1;
+                ictx->constraint_set_flag |= (1 << 0); /* Annex A.2.1 */
+                ictx->h264_entropy_mode = 0;
+                break;
+            case VAProfileH264ConstrainedBaseline:
+                av_log(avctx, AV_LOG_INFO, "Use profile VAProfileH264ConstrainedBaseline\n");
+                ictx->constraint_set_flag |= (1 << 0 | 1 << 1); /* Annex A.2.2 */
+                ictx->ip_period = 1;
+                break;
+
+            case VAProfileH264Main:
+                av_log(avctx, AV_LOG_INFO, "Use profile VAProfileH264Main\n");
+                ictx->constraint_set_flag |= (1 << 1); /* Annex A.2.2 */
+                break;
+
+            case VAProfileH264High:
+                av_log(avctx, AV_LOG_INFO, "Use profile VAProfileH264High\n");
+                ictx->constraint_set_flag |= (1 << 3); /* Annex A.2.4 */
+                break;
+            default:
+                av_log(avctx, AV_LOG_INFO, "Unknown profile. Set to Baseline");
+                ictx->h264_profile = VAProfileH264Baseline;
+                ictx->ip_period = 1;
+                ictx->constraint_set_flag |= (1 << 0); /* Annex A.2.1 */
+                break;
+        }
+    }
+
+    /* find out the format for the render target, and rate control mode */
+    for (i = 0; i < VAConfigAttribTypeMax; i++)
+        ictx->attrib[i].type = i;
+
+    va_status = vaGetConfigAttributes(ictx->va_dpy, ictx->h264_profile, VAEntrypointEncSlice,
+                                      &ictx->attrib[0], VAConfigAttribTypeMax);
+    CHECK_VASTATUS(avctx, va_status, "vaGetConfigAttributes");
+    /* check the interested configattrib */
+    if ((ictx->attrib[VAConfigAttribRTFormat].value & VA_RT_FORMAT_YUV420) == 0) {
+        av_log(avctx, AV_LOG_ERROR, "Not find desired YUV420 RT format\n");
+        return -1;
+    } else {
+        ictx->config_attrib[ictx->config_attrib_num].type = VAConfigAttribRTFormat;
+        ictx->config_attrib[ictx->config_attrib_num].value = VA_RT_FORMAT_YUV420;
+        ictx->config_attrib_num++;
+    }
+
+    if (ictx->attrib[VAConfigAttribRateControl].value != VA_ATTRIB_NOT_SUPPORTED) {
+        int tmp = ictx->attrib[VAConfigAttribRateControl].value;
+
+        av_log(avctx, AV_LOG_INFO, "Support rate control mode (0x%x):", tmp);
+
+        if (tmp & VA_RC_NONE)
+            av_log(avctx, AV_LOG_INFO, "NONE ");
+        if (tmp & VA_RC_CBR)
+            av_log(avctx, AV_LOG_INFO, "CBR ");
+        if (tmp & VA_RC_VBR)
+            av_log(avctx, AV_LOG_INFO, "VBR ");
+        if (tmp & VA_RC_VCM)
+            av_log(avctx, AV_LOG_INFO, "VCM ");
+        if (tmp & VA_RC_CQP)
+            av_log(avctx, AV_LOG_INFO, "CQP ");
+        if (tmp & VA_RC_VBR_CONSTRAINED)
+            av_log(avctx, AV_LOG_INFO, "VBR_CONSTRAINED ");
+
+        av_log(avctx, AV_LOG_INFO, "\n");
+
+        /* need to check if support rc_mode */
+        ictx->config_attrib[ictx->config_attrib_num].type = VAConfigAttribRateControl;
+        ictx->config_attrib[ictx->config_attrib_num].value = ictx->rc_mode;
+        ictx->config_attrib_num++;
+    }
+
+
+    if (ictx->attrib[VAConfigAttribEncPackedHeaders].value != VA_ATTRIB_NOT_SUPPORTED) {
+        int tmp = ictx->attrib[VAConfigAttribEncPackedHeaders].value;
+
+        av_log(avctx, AV_LOG_INFO, "Support VAConfigAttribEncPackedHeaders\n");
+
+        ictx->h264_packedheader = 1;
+        ictx->config_attrib[ictx->config_attrib_num].type = VAConfigAttribEncPackedHeaders;
+        ictx->config_attrib[ictx->config_attrib_num].value = VA_ENC_PACKED_HEADER_NONE;
+
+        if (tmp & VA_ENC_PACKED_HEADER_SEQUENCE) {
+            av_log(avctx, AV_LOG_INFO, "Support packed sequence headers\n");
+            ictx->config_attrib[ictx->config_attrib_num].value |= VA_ENC_PACKED_HEADER_SEQUENCE;
+        }
+
+        if (tmp & VA_ENC_PACKED_HEADER_PICTURE) {
+            av_log(avctx, AV_LOG_INFO, "Support packed picture headers\n");
+            ictx->config_attrib[ictx->config_attrib_num].value |= VA_ENC_PACKED_HEADER_PICTURE;
+        }
+
+        if (tmp & VA_ENC_PACKED_HEADER_SLICE) {
+            av_log(avctx, AV_LOG_INFO, "Support packed slice headers\n");
+            ictx->config_attrib[ictx->config_attrib_num].value |= VA_ENC_PACKED_HEADER_SLICE;
+        }
+
+        if (tmp & VA_ENC_PACKED_HEADER_MISC) {
+            av_log(avctx, AV_LOG_INFO, "Support packed misc headers\n");
+            ictx->config_attrib[ictx->config_attrib_num].value |= VA_ENC_PACKED_HEADER_MISC;
+        }
+
+        ictx->enc_packed_header_idx = ictx->config_attrib_num;
+        ictx->config_attrib_num++;
+    }
+
+    if (ictx->attrib[VAConfigAttribEncInterlaced].value != VA_ATTRIB_NOT_SUPPORTED) {
+        int tmp = ictx->attrib[VAConfigAttribEncInterlaced].value;
+
+        av_log(avctx, AV_LOG_INFO, "Support VAConfigAttribEncInterlaced\n");
+
+        if (tmp & VA_ENC_INTERLACED_FRAME)
+            av_log(avctx, AV_LOG_INFO, "support VA_ENC_INTERLACED_FRAME\n");
+        if (tmp & VA_ENC_INTERLACED_FIELD)
+            av_log(avctx, AV_LOG_INFO, "Support VA_ENC_INTERLACED_FIELD\n");
+        if (tmp & VA_ENC_INTERLACED_MBAFF)
+            av_log(avctx, AV_LOG_INFO, "Support VA_ENC_INTERLACED_MBAFF\n");
+        if (tmp & VA_ENC_INTERLACED_PAFF)
+            av_log(avctx, AV_LOG_INFO, "Support VA_ENC_INTERLACED_PAFF\n");
+
+        ictx->config_attrib[ictx->config_attrib_num].type = VAConfigAttribEncInterlaced;
+        ictx->config_attrib[ictx->config_attrib_num].value = VA_ENC_PACKED_HEADER_NONE;
+        ictx->config_attrib_num++;
+    }
+
+    if (ictx->attrib[VAConfigAttribEncMaxRefFrames].value != VA_ATTRIB_NOT_SUPPORTED) {
+        ictx->h264_maxref = ictx->attrib[VAConfigAttribEncMaxRefFrames].value;
+
+        av_log(avctx, AV_LOG_INFO, "Support %d RefPicList0 and %d RefPicList1\n",
+                ictx->h264_maxref & 0xffff, (ictx->h264_maxref >> 16) & 0xffff );
+    }
+
+    if (ictx->attrib[VAConfigAttribEncMaxSlices].value != VA_ATTRIB_NOT_SUPPORTED)
+        av_log(avctx, AV_LOG_INFO, "Support %d slices\n", ictx->attrib[VAConfigAttribEncMaxSlices].value);
+
+    if (ictx->attrib[VAConfigAttribEncSliceStructure].value != VA_ATTRIB_NOT_SUPPORTED) {
+        int tmp = ictx->attrib[VAConfigAttribEncSliceStructure].value;
+
+        av_log(avctx, AV_LOG_INFO, "Support VAConfigAttribEncSliceStructure\n");
+
+        if (tmp & VA_ENC_SLICE_STRUCTURE_ARBITRARY_ROWS)
+            av_log(avctx, AV_LOG_INFO, "Support VA_ENC_SLICE_STRUCTURE_ARBITRARY_ROWS\n");
+        if (tmp & VA_ENC_SLICE_STRUCTURE_POWER_OF_TWO_ROWS)
+            av_log(avctx, AV_LOG_INFO, "Support VA_ENC_SLICE_STRUCTURE_POWER_OF_TWO_ROWS\n");
+        if (tmp & VA_ENC_SLICE_STRUCTURE_ARBITRARY_MACROBLOCKS)
+            av_log(avctx, AV_LOG_INFO, "Support VA_ENC_SLICE_STRUCTURE_ARBITRARY_MACROBLOCKS\n");
+    }
+    if (ictx->attrib[VAConfigAttribEncMacroblockInfo].value != VA_ATTRIB_NOT_SUPPORTED) {
+        av_log(avctx, AV_LOG_INFO, "Support VAConfigAttribEncMacroblockInfo\n");
+    }
+
+    free(entrypoints);
+    return 0;
+}
+
+static int setup_encode(AVCodecContext *avctx, I264Context *ictx)
+{
+    VAStatus va_status;
+    VASurfaceID *tmp_surfaceid;
+    int codedbuf_size, i;
+
+    va_status = vaCreateConfig(ictx->va_dpy, ictx->h264_profile, VAEntrypointEncSlice,
+            &ictx->config_attrib[0], ictx->config_attrib_num, &ictx->config_id);
+    CHECK_VASTATUS(avctx, va_status, "vaCreateConfig");
+
+    ictx->frame_width_mbaligned = (avctx->width + 15) & (~15);
+    ictx->frame_height_mbaligned = (avctx->height + 15) & (~15);
+    if (avctx->width != ictx->frame_width_mbaligned ||
+        avctx->height != ictx->frame_height_mbaligned) {
+        av_log( avctx, AV_LOG_INFO,
+                "Source frame is %dx%d and will code clip to %dx%d with crop\n",
+                avctx->width, avctx->height,
+                ictx->frame_width_mbaligned, ictx->frame_height_mbaligned);
+    }
+
+    /* create source surfaces */
+    va_status = vaCreateSurfaces(ictx->va_dpy, VA_RT_FORMAT_YUV420,
+                                 ictx->frame_width_mbaligned, ictx->frame_height_mbaligned,
+                                 &ictx->src_surface_id[0], SURFACE_NUM,
+                                 NULL, 0);
+    CHECK_VASTATUS(avctx, va_status, "vaCreateSurfaces");
+
+    /* create reference surfaces */
+    va_status = vaCreateSurfaces(ictx->va_dpy, VA_RT_FORMAT_YUV420,
+                                 ictx->frame_width_mbaligned, ictx->frame_height_mbaligned,
+                                 &ictx->ref_surface[0], SURFACE_NUM, NULL, 0);
+
+    CHECK_VASTATUS(avctx, va_status, "vaCreateSurfaces");
+
+    tmp_surfaceid = calloc(2 * SURFACE_NUM, sizeof(VASurfaceID));
+    memcpy(tmp_surfaceid, ictx->src_surface_id, SURFACE_NUM * sizeof(VASurfaceID));
+    memcpy(tmp_surfaceid + SURFACE_NUM, ictx->ref_surface, SURFACE_NUM * sizeof(VASurfaceID));
+
+    /* Create a context for this encode pipe */
+    va_status = vaCreateContext(ictx->va_dpy, ictx->config_id,
+                                ictx->frame_width_mbaligned, ictx->frame_height_mbaligned,
+                                VA_PROGRESSIVE,
+                                tmp_surfaceid, 2 * SURFACE_NUM,
+                                &ictx->context_id);
+    CHECK_VASTATUS(avctx, va_status, "vaCreateContext");
+    free(tmp_surfaceid);
+
+    codedbuf_size = (ictx->frame_width_mbaligned * ictx->frame_height_mbaligned * 400) / (16*16);
+
+    for (i = 0; i < SURFACE_NUM; i++) {
+        /* create coded buffer once for all
+         * other VA buffers which won't be used again after vaRenderPicture.
+         * so APP can always vaCreateBuffer for every frame
+         * but coded buffer need to be mapped and accessed after vaRenderPicture/vaEndPicture
+         * so VA won't maintain the coded buffer
+         */
+        va_status = vaCreateBuffer(ictx->va_dpy, ictx->context_id, VAEncCodedBufferType,
+                codedbuf_size, 1, NULL, &ictx->coded_buf[i]);
+        CHECK_VASTATUS(avctx, va_status,"vaCreateBuffer");
+    }
+
+    memset(&ictx->seq_param, 0, sizeof(ictx->seq_param));
+    memset(&ictx->pic_param, 0, sizeof(ictx->pic_param));
+
+    return 0;
+}
+
+static int release_encode(I264Context *ictx)
+{
+    int i;
+
+    vaDestroySurfaces(ictx->va_dpy, &ictx->src_surface_id[0], SURFACE_NUM);
+    vaDestroySurfaces(ictx->va_dpy, &ictx->ref_surface[0], SURFACE_NUM);
+
+    for (i = 0; i < SURFACE_NUM; i++)
+        vaDestroyBuffer(ictx->va_dpy, ictx->coded_buf[i]);
+
+    vaDestroyContext(ictx->va_dpy, ictx->context_id);
+    vaDestroyConfig(ictx->va_dpy, ictx->config_id);
+
+    return 0;
+}
+
+static int deinit_va(I264Context *ictx)
+{
+    vaTerminate(ictx->va_dpy);
+
+    ff_va_close_display(ictx->va_dpy);
+
+    return 0;
+}
+
+static int load_surface(AVCodecContext *avctx, I264Context *ictx, VASurface *surface, int frame_no, AVFrame *frame)
+{
+    VAImage surface_image;
+    unsigned char *surface_p=NULL, *Y_start=NULL, *U_start=NULL, *V_start=NULL;
+    int Y_pitch=0, U_pitch=0, V_pitch=0, row;
+    VAStatus va_status;
+    VASurfaceID surface_id = *(surface->surface_id);
+
+    surface->frame_num = frame_no;
+
+    va_status = vaDeriveImage(ictx->va_dpy, surface_id, &surface_image);
+    CHECK_VASTATUS(avctx, va_status,"vaDeriveImage");
+
+    vaMapBuffer(ictx->va_dpy, surface_image.buf, (void **)&surface_p);
+    assert(VA_STATUS_SUCCESS == va_status);
+
+    Y_start = surface_p;
+    Y_pitch = surface_image.pitches[0];
+    switch (surface_image.format.fourcc) {
+    case VA_FOURCC_NV12:
+        U_start = (unsigned char *)surface_p + surface_image.offsets[1];
+        V_start = U_start + 1;
+        U_pitch = surface_image.pitches[1];
+        V_pitch = surface_image.pitches[1];
+        break;
+    case VA_FOURCC_IYUV:
+        U_start = (unsigned char *)surface_p + surface_image.offsets[1];
+        V_start = (unsigned char *)surface_p + surface_image.offsets[2];
+        U_pitch = surface_image.pitches[1];
+        V_pitch = surface_image.pitches[2];
+        break;
+    case VA_FOURCC_YV12:
+        U_start = (unsigned char *)surface_p + surface_image.offsets[2];
+        V_start = (unsigned char *)surface_p + surface_image.offsets[1];
+        U_pitch = surface_image.pitches[2];
+        V_pitch = surface_image.pitches[1];
+        break;
+    case VA_FOURCC_YUY2:
+        U_start = surface_p + 1;
+        V_start = surface_p + 3;
+        U_pitch = surface_image.pitches[0];
+        V_pitch = surface_image.pitches[0];
+        break;
+    default:
+        return -1;
+    }
+
+    /* copy Y plane */
+    for (row = 0; row < frame->height; row++) {
+        unsigned char *Y_row = Y_start + row * Y_pitch;
+        memcpy(Y_row, frame->data[0] + row*frame->linesize[0], frame->width);
+    }
+
+    for (row = 0; row < frame->height/2; row++) {
+        unsigned char *U_row = U_start + row * U_pitch;
+        unsigned char *u_ptr = NULL, *v_ptr=NULL;
+//      int j;
+        int j, N, Nmod;
+        switch (surface_image.format.fourcc) {
+        case VA_FOURCC_NV12:
+            u_ptr = frame->data[1] + row * frame->linesize[1];
+            v_ptr = frame->data[2] + row * frame->linesize[2];
+
+
+                        Nmod = (frame->width/2) & 7; // mod 8
+                        N    = (frame->width/2) - Nmod;
+                        __asm__(
+                                "movq      %0,      %%rax  \n\t"
+                                "movq      %1,      %%rbx  \n\t"
+                                "movq      %2,      %%rcx  \n\t"
+                                "movq      %3,      %%rdx  \n\t"
+                                "asm_loop:                 \n\t"
+                                "movq      (%%rax), %%xmm0 \n\t"
+                                "movq      (%%rbx), %%xmm1 \n\t"
+                                "punpcklbw %%xmm1,  %%xmm0 \n\t"
+                                "movdqu    %%xmm0,  (%%rcx)\n\t"
+                                "addq      $0x8,    %%rax  \n\t"
+                                "addq      $0x8,    %%rbx  \n\t"
+                                "addq      $0x10,   %%rcx  \n\t"
+                                "cmp       %%rcx,   %%rdx  \n\t"
+                                "jnz       asm_loop"
+                                :
+                                : "r"(u_ptr), "r"(v_ptr), "r"(U_row),
+                                  "r" (U_row+2*N)
+                                : "rax", "rbx", "rcx", "rdx", "xmm0", "xmm1"
+                        );
+                        for (j=N; j<N+Nmod; j++)
+                        {
+                                U_row[2*j]   = u_ptr[j];
+                                U_row[2*j+1] = v_ptr[j];
+                        }
+//          for(j = 0; j < frame->width/2; j++) {
+//                  U_row[2*j] = u_ptr[j];
+//              U_row[2*j+1] = v_ptr[j];
+//          }
+            break;
+        case VA_FOURCC_IYUV:
+        case VA_FOURCC_YV12:
+        case VA_FOURCC_YUY2:
+        default:
+            av_log(avctx, AV_LOG_ERROR,"unsupported fourcc in load_surface_yuv\n");
+            return -1;
+        }
+    }
+
+    vaUnmapBuffer(ictx->va_dpy, surface_image.buf);
+
+    vaDestroyImage(ictx->va_dpy, surface_image.image_id);
+
+    return 0;
+}
+
+static void encoding2display_order(I264Context *ictx,
+                                   unsigned long long encoding_order,int intra_period,
+                                   int intra_idr_period,int ip_period,
+                                   unsigned long long *displaying_order,
+                                   int *frame_type)
+{
+    int encoding_order_gop = encoding_order % intra_period;
+
+    /* new sequence like
+     * IDR PPPPP IPPPPP
+     * IDR (PBB)(PBB)(IBB)(PBB)
+     */
+
+    if (encoding_order % intra_idr_period == 0) { /* the first frame */
+        *frame_type = FRAME_IDR;
+        *displaying_order = encoding_order;
+    }
+    else if(encoding_order_gop % intra_period == 0)
+    {
+        *frame_type = FRAME_I;
+        *displaying_order = encoding_order;
+    }
+    else if(ictx->nb_surfaces_loaded < ip_period)
+    {
+        *frame_type = FRAME_P;
+        *displaying_order = encoding_order;
+    }
+    else if (((encoding_order_gop - 1) % ip_period) != 0) { /* B frames */
+        *frame_type = FRAME_B;
+        *displaying_order = encoding_order - 1;
+
+        if(ictx->last_p + ip_period-1 >= (intra_period * ((encoding_order / intra_period)+1)))
+        {
+            *frame_type = FRAME_P;
+            *displaying_order = encoding_order;
+            ictx->last_p = encoding_order;
+        }
+    }
+    else {
+        *frame_type = FRAME_P;
+        *displaying_order = encoding_order + ip_period - 1;
+
+        if(*displaying_order >= (intra_period * ((encoding_order / intra_period)+1)))
+            *displaying_order = encoding_order;
+
+        ictx->last_p = encoding_order;
+    }
+}
+
+static int render_packedsequence(AVCodecContext *avctx, I264Context *ictx)
+{
+    VAEncPackedHeaderParameterBuffer packedheader_param_buffer;
+    VABufferID packedseq_para_bufid, packedseq_data_bufid, render_id[2];
+    unsigned int length_in_bits;
+    unsigned char *packedseq_buffer = NULL;
+    VAStatus va_status;
+
+    length_in_bits = build_packed_seq_buffer(ictx, &packedseq_buffer);
+
+    packedheader_param_buffer.type = VAEncPackedHeaderSequence;
+
+    packedheader_param_buffer.bit_length = length_in_bits; /*length_in_bits*/
+    packedheader_param_buffer.has_emulation_bytes = 0;
+    va_status = vaCreateBuffer(ictx->va_dpy,
+                               ictx->context_id,
+                               VAEncPackedHeaderParameterBufferType,
+                               sizeof(packedheader_param_buffer), 1, &packedheader_param_buffer,
+                               &packedseq_para_bufid);
+    CHECK_VASTATUS(avctx, va_status,"vaCreateBuffer");
+
+    va_status = vaCreateBuffer(ictx->va_dpy,
+                               ictx->context_id,
+                               VAEncPackedHeaderDataBufferType,
+                               (length_in_bits + 7) / 8, 1, packedseq_buffer,
+                               &packedseq_data_bufid);
+    CHECK_VASTATUS(avctx, va_status,"vaCreateBuffer");
+
+    render_id[0] = packedseq_para_bufid;
+    render_id[1] = packedseq_data_bufid;
+    va_status = vaRenderPicture(ictx->va_dpy, ictx->context_id, render_id, 2);
+    CHECK_VASTATUS(avctx, va_status,"vaRenderPicture");
+
+    free(packedseq_buffer);
+
+    return 0;
+}
+
+static int render_packedpicture(AVCodecContext *avctx, I264Context *ictx)
+{
+    VAEncPackedHeaderParameterBuffer packedheader_param_buffer;
+    VABufferID packedpic_para_bufid, packedpic_data_bufid, render_id[2];
+    unsigned int length_in_bits;
+    unsigned char *packedpic_buffer = NULL;
+    VAStatus va_status;
+
+    length_in_bits = build_packed_pic_buffer(ictx, &packedpic_buffer);
+    packedheader_param_buffer.type = VAEncPackedHeaderPicture;
+    packedheader_param_buffer.bit_length = length_in_bits;
+    packedheader_param_buffer.has_emulation_bytes = 0;
+
+    va_status = vaCreateBuffer(ictx->va_dpy,
+                               ictx->context_id,
+                               VAEncPackedHeaderParameterBufferType,
+                               sizeof(packedheader_param_buffer), 1, &packedheader_param_buffer,
+                               &packedpic_para_bufid);
+    CHECK_VASTATUS(avctx, va_status,"vaCreateBuffer");
+
+    va_status = vaCreateBuffer(ictx->va_dpy,
+                               ictx->context_id,
+                               VAEncPackedHeaderDataBufferType,
+                               (length_in_bits + 7) / 8, 1, packedpic_buffer,
+                               &packedpic_data_bufid);
+    CHECK_VASTATUS(avctx, va_status,"vaCreateBuffer");
+
+    render_id[0] = packedpic_para_bufid;
+    render_id[1] = packedpic_data_bufid;
+    va_status = vaRenderPicture(ictx->va_dpy, ictx->context_id, render_id, 2);
+    CHECK_VASTATUS(avctx, va_status,"vaRenderPicture");
+
+    free(packedpic_buffer);
+
+    return 0;
+}
+
+static int render_packedslice(AVCodecContext *avctx, I264Context *ictx)
+{
+    VAEncPackedHeaderParameterBuffer packedheader_param_buffer;
+    VABufferID packedslice_para_bufid, packedslice_data_bufid, render_id[2];
+    unsigned int length_in_bits;
+    unsigned char *packedslice_buffer = NULL;
+    VAStatus va_status;
+
+    length_in_bits = build_packed_slice_buffer(ictx, &packedslice_buffer);
+    packedheader_param_buffer.type = VAEncPackedHeaderSlice;
+    packedheader_param_buffer.bit_length = length_in_bits;
+    packedheader_param_buffer.has_emulation_bytes = 0;
+
+    va_status = vaCreateBuffer(ictx->va_dpy,
+                               ictx->context_id,
+                               VAEncPackedHeaderParameterBufferType,
+                               sizeof(packedheader_param_buffer), 1, &packedheader_param_buffer,
+                               &packedslice_para_bufid);
+    CHECK_VASTATUS(avctx, va_status,"vaCreateBuffer");
+
+    va_status = vaCreateBuffer(ictx->va_dpy,
+                               ictx->context_id,
+                               VAEncPackedHeaderDataBufferType,
+                               (length_in_bits + 7) / 8, 1, packedslice_buffer,
+                               &packedslice_data_bufid);
+    CHECK_VASTATUS(avctx, va_status,"vaCreateBuffer");
+
+    render_id[0] = packedslice_para_bufid;
+    render_id[1] = packedslice_data_bufid;
+    va_status = vaRenderPicture(ictx->va_dpy, ictx->context_id, render_id, 2);
+    CHECK_VASTATUS(avctx, va_status, "vaRenderPicture");
+
+    free(packedslice_buffer);
+    return 0;
+}
+
+static int render_slice(AVCodecContext *avctx, I264Context *ictx)
+{
+    VABufferID slice_param_buf;
+    VAStatus va_status;
+    int i;
+
+    update_ref_pic_list(ictx);
+
+    /* one frame, one slice */
+    ictx->slice_param.macroblock_address = 0;
+    ictx->slice_param.num_macroblocks = ictx->frame_width_mbaligned * ictx->frame_height_mbaligned/(16*16); /* Measured by MB */
+    ictx->slice_param.slice_type = (ictx->current_frame_type == FRAME_IDR)?2:ictx->current_frame_type;
+    if (ictx->current_frame_type == FRAME_IDR) {
+        if (ictx->current_frame_encoding != 0)
+            ++ictx->slice_param.idr_pic_id;
+    } else if (ictx->current_frame_type == FRAME_P) {
+        int refpiclist0_max = ictx->h264_maxref & 0xffff;
+        memcpy(ictx->slice_param.RefPicList0, ictx->ref_pic_list0_P, refpiclist0_max*sizeof(VAPictureH264));
+
+        for (i = refpiclist0_max; i < 32; i++) {
+            ictx->slice_param.RefPicList0[i].picture_id = VA_INVALID_SURFACE;
+            ictx->slice_param.RefPicList0[i].flags = VA_PICTURE_H264_INVALID;
+        }
+    } else if (ictx->current_frame_type == FRAME_B) {
+        int refpiclist0_max = ictx->h264_maxref & 0xffff;
+        int refpiclist1_max = (ictx->h264_maxref >> 16) & 0xffff;
+
+        memcpy(ictx->slice_param.RefPicList0, ictx->ref_pic_list0_B, refpiclist0_max*sizeof(VAPictureH264));
+        for (i = refpiclist0_max; i < 32; i++) {
+            ictx->slice_param.RefPicList0[i].picture_id = VA_INVALID_SURFACE;
+            ictx->slice_param.RefPicList0[i].flags = VA_PICTURE_H264_INVALID;
+        }
+
+        memcpy(ictx->slice_param.RefPicList1, ictx->ref_pic_list1_B, refpiclist1_max*sizeof(VAPictureH264));
+        for (i = refpiclist1_max; i < 32; i++) {
+            ictx->slice_param.RefPicList1[i].picture_id = VA_INVALID_SURFACE;
+            ictx->slice_param.RefPicList1[i].flags = VA_PICTURE_H264_INVALID;
+        }
+    }
+
+    ictx->slice_param.slice_alpha_c0_offset_div2 = 0;
+    ictx->slice_param.slice_beta_offset_div2 = 0;
+    ictx->slice_param.direct_spatial_mv_pred_flag = 1;
+    ictx->slice_param.pic_order_cnt_lsb = (ictx->current_frame_display - ictx->current_IDR_display) % max_pic_order_cnt_lsb;
+
+    if(ictx->h264_packedheader && (ictx->config_attrib[ictx->enc_packed_header_idx].value & VA_ENC_PACKED_HEADER_SLICE))
+        render_packedslice(avctx, ictx);
+
+    va_status = vaCreateBuffer(ictx->va_dpy, ictx->context_id, VAEncSliceParameterBufferType,
+                               sizeof(ictx->slice_param),1,&ictx->slice_param,&slice_param_buf);
+    CHECK_VASTATUS(avctx, va_status,"vaCreateBuffer");;
+
+    va_status = vaRenderPicture(ictx->va_dpy, ictx->context_id, &slice_param_buf, 1);
+    CHECK_VASTATUS(avctx, va_status,"vaRenderPicture");
+
+    return 0;
+}
+
+static VASurface* get_surface(I264Context *ictx, int frame_no)
+{
+    int i;
+
+    for(i = 0; i < SURFACE_NUM; i++)
+    {
+        if(ictx->src_sufrace[i].frame_num == frame_no)
+            return &ictx->src_sufrace[i];
+    }
+
+    return NULL;
+}
+
+static int save_output_packet(AVCodecContext *avctx, I264Context *ictx, AVPacket *pkt, int *got_packet)
+{
+    VAStatus va_status;
+    VACodedBufferSegment *buf_list = NULL, *tmp_list = NULL;
+    unsigned int coded_size = 0;
+    int ret = 0;
+    unsigned char *p;
+    VASurface *surface = get_surface(ictx, ictx->current_frame_display);
+
+    *got_packet = 0;
+    //va_status = vaSyncSurface(ictx->va_dpy, ictx->src_surface[ictx->current_frame_display % SURFACE_NUM]);
+    va_status = vaSyncSurface(ictx->va_dpy, *(surface->surface_id));
+    CHECK_VASTATUS(avctx, va_status,"vaSyncSurface");
+
+    va_status = vaMapBuffer(ictx->va_dpy, ictx->coded_buf[ictx->current_frame_display % SURFACE_NUM],(void **)(&buf_list));
+    CHECK_VASTATUS(avctx, va_status,"vaMapBuffer");
+
+    tmp_list = buf_list;
+
+    while (tmp_list != NULL) {
+        coded_size += tmp_list->size;
+        tmp_list = (VACodedBufferSegment *) tmp_list->next;
+    }
+
+    if(coded_size > 0) {
+        if ((ret = ff_alloc_packet2(avctx, pkt, coded_size)) < 0)
+            goto cleanup;
+
+        *got_packet = 1;
+    }
+    else {
+        goto cleanup;
+    }
+
+    p = pkt->data;
+
+    while (buf_list != NULL) {
+        memcpy(p, buf_list->buf, buf_list->size);
+        p += buf_list->size;
+        buf_list = (VACodedBufferSegment *) buf_list->next;
+    }
+
+cleanup:
+    vaUnmapBuffer(ictx->va_dpy, ictx->coded_buf[ictx->current_frame_display % SURFACE_NUM]);
+
+    return ret;
+}
+
+static int update_reference_frames(I264Context *ictx)
+{
+    int i;
+
+    if (ictx->current_frame_type == FRAME_B)
+        return 0;
+
+    ictx->current_curr_pic.flags = VA_PICTURE_H264_SHORT_TERM_REFERENCE;
+    ictx->num_short_term++;
+    if (ictx->num_short_term > ictx->num_ref_frames)
+        ictx->num_short_term = ictx->num_ref_frames;
+    for (i=ictx->num_short_term-1; i>0; i--)
+        ictx->reference_frames[i] = ictx->reference_frames[i-1];
+    ictx->reference_frames[0] = ictx->current_curr_pic;
+
+    if (ictx->current_frame_type != FRAME_B)
+        ictx->current_frame_num++;
+    if (ictx->current_frame_num > max_frame_num)
+        ictx->current_frame_num = 0;
+
+    return 0;
+}
+
+static int build_packed_sei_buffer_timing(I264Context *ictx, unsigned char **sei_buffer)
+{
+    I264Bitstream sei_bs;
+
+    ff_i264_bitstream_start(&sei_bs);
+    ff_i264_nal_start_code_prefix(&sei_bs);
+    ff_i264_nal_header(&sei_bs, NAL_REF_IDC_NONE, NAL_SEI);
+    ff_i264_sei_rbsp(ictx, &sei_bs);
+    ff_i264_bitstream_end(&sei_bs);
+
+    *sei_buffer = (unsigned char *)sei_bs.buffer;
+
+    return sei_bs.bit_offset;
+}
+
+static int update_sei_param(AVCodecContext* avctx, I264Context* ictx)
+{
+    VAEncPackedHeaderParameterBuffer packed_header_param_buffer;
+    unsigned int length_in_bits, offset_in_bytes;
+    unsigned char *packed_sei_buffer = NULL;
+    VAStatus va_status;
+    VABufferID packed_sei_header_param_buf_id, packed_sei_buf_id, buffId[2];
+
+    length_in_bits = build_packed_sei_buffer_timing(ictx, &packed_sei_buffer);
+
+    offset_in_bytes = 0;
+    packed_header_param_buffer.type = VAEncPackedHeaderH264_SEI;
+    packed_header_param_buffer.bit_length = length_in_bits;
+    packed_header_param_buffer.has_emulation_bytes = 0;
+
+    va_status = vaCreateBuffer(ictx->va_dpy, ictx->context_id,
+                               VAEncPackedHeaderParameterBufferType,
+                               sizeof(packed_header_param_buffer), 1, &packed_header_param_buffer,
+                               &packed_sei_header_param_buf_id);
+
+    CHECK_VASTATUS(avctx, va_status, "vaCreateBuffer");
+
+    va_status = vaCreateBuffer(ictx->va_dpy, ictx->context_id,
+                               VAEncPackedHeaderDataBufferType,
+                               (length_in_bits + 7) / 8, 1, packed_sei_buffer,
+                               &packed_sei_buf_id);
+    CHECK_VASTATUS(avctx, va_status,"vaCreateBuffer");
+
+    buffId[0] = packed_sei_header_param_buf_id;
+    buffId[1] = packed_sei_buf_id;
+
+    va_status = vaRenderPicture(ictx->va_dpy, ictx->context_id, &buffId[0], 2);
+    CHECK_VASTATUS(avctx, va_status,"vaRenderPicture");
+
+    free(packed_sei_buffer);
+    return 0;
+}
+
+static av_cold int I264_init(AVCodecContext *avctx)
+{
+    int ret, i;
+    I264Context *ictx = avctx->priv_data;
+
+    //memset(ictx, 0, sizeof(I264Context));
+
+    i264_param_default(avctx, ictx);
+
+    avctx->coded_frame = &ictx->out_pic;
+
+    for(i = 0; i < SURFACE_NUM; i++)
+        ictx->src_sufrace[i].surface_id = &ictx->src_surface_id[i];
+
+    ret = init_va(avctx, ictx);
+    if(ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "I264 Initialization failed in function init_va()\n");
+        return -1;
+    }
+
+    ret = setup_encode(avctx, ictx);
+
+    init_sps(avctx, ictx, &ictx->seq_param);
+    init_pps(ictx);
+
+    if(ictx->frame_bitrate > 0)
+        init_sei(ictx);
+
+    if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) {
+        int sps_size, pps_size;
+        unsigned char *sps, *pps;
+
+        sps_size = (build_packed_seq_buffer(ictx, &sps) + 7) / 8;
+        avctx->extradata_size = sps_size;
+
+        pps_size = (build_packed_pic_buffer(ictx, &pps) + 7) / 8;
+        avctx->extradata_size += pps_size;
+
+        avctx->extradata = av_malloc(avctx->extradata_size);
+
+        memcpy(avctx->extradata, sps, sps_size);
+        memcpy(avctx->extradata+sps_size, pps, pps_size);
+
+        free(sps);
+        free(pps);
+    }
+
+    if(ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "I264 Initialization failed in function setup_encode()\n");
+        return -1;
+    }
+    return 0;
+}
+
+static int I264_frame(AVCodecContext *avctx, AVPacket *pkt, const AVFrame *frame,
+                      int *got_packet)
+{
+    VAStatus va_status;
+    I264Context *ictx = avctx->priv_data;
+    VASurface *surface;
+
+    if(!ictx->nb_surfaces_loaded && !frame)
+    {
+        *got_packet = 0;
+        return 0;
+    }
+
+    if(ictx->nb_surfaces_loaded < 16 && frame)
+    {
+        load_surface(avctx, ictx, &ictx->src_sufrace[ictx->nb_surfaces_loaded], ictx->nb_surfaces_loaded, frame);
+
+        ictx->src_sufrace[ictx->nb_surfaces_loaded].pts = frame->pts;
+
+        if(ictx->nb_surfaces_loaded >= 1)
+            ictx->duration = ictx->src_sufrace[ictx->nb_surfaces_loaded].pts - ictx->src_sufrace[ictx->nb_surfaces_loaded-1].pts;
+
+        ictx->nb_surfaces_loaded++;
+        *got_packet = 0;
+        return 0;
+    }
+
+    memset(&ictx->pic_param, 0, sizeof(ictx->pic_param));
+    memset(&ictx->slice_param, 0, sizeof(ictx->slice_param));
+
+    encoding2display_order(ictx, ictx->current_frame_encoding, ictx->intra_period, ictx->intra_idr_period,
+                           ictx->ip_period, &ictx->current_frame_display, &ictx->current_frame_type);
+    if (ictx->current_frame_type == FRAME_IDR) {
+        ictx->num_short_term = 0;
+        ictx->current_frame_num = 0;
+        ictx->current_IDR_display = ictx->current_frame_display;
+    }
+
+    surface = get_surface(ictx, ictx->current_frame_display);
+
+    va_status = vaBeginPicture(ictx->va_dpy, ictx->context_id, *(surface->surface_id));
+    CHECK_VASTATUS(avctx, va_status,"vaBeginPicture");
+
+    if (ictx->current_frame_type == FRAME_IDR) {
+        render_sequence(avctx, ictx);
+
+        if(ictx->frame_bitrate > 0)
+            update_sei_param(avctx, ictx);
+
+        render_picture(avctx, ictx);
+        if (ictx->h264_packedheader) {
+            render_packedsequence(avctx, ictx);
+            render_packedpicture(avctx, ictx);
+        }
+    } else {
+        if(ictx->frame_bitrate > 0)
+            update_sei_param(avctx, ictx);
+
+        render_picture(avctx, ictx);
+    }
+
+    render_slice(avctx, ictx);
+
+    va_status = vaEndPicture(ictx->va_dpy, ictx->context_id);
+    CHECK_VASTATUS(avctx, va_status,"vaEndPicture");;
+
+    save_output_packet(avctx, ictx, pkt, got_packet);
+
+    update_reference_frames(ictx);
+
+    switch (ictx->current_frame_type) {
+    case FRAME_IDR:
+        pkt->flags |= AV_PKT_FLAG_KEY;
+
+    case FRAME_I:
+        ictx->out_pic.pict_type = AV_PICTURE_TYPE_I;
+        break;
+    case FRAME_P:
+        ictx->out_pic.pict_type = AV_PICTURE_TYPE_P;
+        break;
+    case FRAME_B:
+        ictx->out_pic.pict_type = AV_PICTURE_TYPE_B;
+        break;
+    }
+
+    pkt->pts = surface->pts;
+    pkt->dts = (ictx->current_frame_encoding - (ictx->ip_period-1))*ictx->duration;
+
+    if(pkt->pts < pkt->dts)
+        pkt->pts = pkt->dts;
+
+    if(frame != NULL)
+    {
+        load_surface(avctx, ictx, surface, ictx->current_frame_encoding+SURFACE_NUM, frame);
+        surface->pts = frame->pts;
+    }
+    else
+        ictx->nb_surfaces_loaded--;
+
+    ictx->current_frame_encoding++;
+
+    return 0;
+}
+
+static av_cold int I264_close(AVCodecContext *avctx)
+{
+    I264Context *ictx = avctx->priv_data;
+
+    if(ictx == NULL)
+        return -1;
+
+    if(avctx->extradata)
+        av_freep(&avctx->extradata);
+
+    release_encode(ictx);
+    deinit_va(ictx);
+
+    return 0;
+}
+
+static av_cold void I264_init_static(AVCodec *codec)
+{
+}
+
+#define OFFSET(x) offsetof(I264Context, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    { "intra_period",  "Set I frame period",                                                   OFFSET(intra_period),      AV_OPT_TYPE_INT,    { .i64 = -1 }, -1, INT_MAX, VE},
+    { "rc_mode",       "Rate control mode <NONE|CBR|VBR|VCM|CQP|VBR_CONTRAINED>",              OFFSET(rc_mode_str),       AV_OPT_TYPE_STRING, { 0 },          0, 0,       VE},
+    { "profile",       "Set profile restrictions <BP|MP|HP>",                                  OFFSET(profile),           AV_OPT_TYPE_STRING, { 0 },          0, 0,       VE},
+    { NULL },
+};
+
+static const AVClass i264_class = {
+    .class_name = "libi264",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+static const AVCodecDefault i264_defaults[] = {
+    { "b",                "0" },
+    { "bf",               "-1" },
+    { "g",                "-1" },
+    { "coder",            "-1" },
+    { "qmin",             "-1" },
+    { NULL },
+};
+
+static const enum AVPixelFormat pix_fmts[] = {
+    AV_PIX_FMT_YUV420P,
+    AV_PIX_FMT_NONE
+};
+
+AVCodec ff_libi264_encoder = {
+    .name             = "libi264",
+    .type             = AVMEDIA_TYPE_VIDEO,
+    .id               = AV_CODEC_ID_H264,
+    .priv_data_size   = sizeof(I264Context),
+    .init             = I264_init,
+    .encode2          = I264_frame,
+    .close            = I264_close,
+    .capabilities     = CODEC_CAP_DELAY,
+    .long_name        = NULL_IF_CONFIG_SMALL("Intel HWA H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
+    .priv_class       = &i264_class,
+    .defaults         = i264_defaults,
+    .init_static_data = I264_init_static,
+    .pix_fmts         = pix_fmts
+};
diff --git a/libavcodec/libi264.h b/libavcodec/libi264.h
new file mode 100644
index 0000000..e8ef67e
--- /dev/null
+++ b/libavcodec/libi264.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2015 Bryan Christ
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_LIBI264_H
+#define AVCODEC_LIBI264_H
+
+#include <va/va.h>
+#include <va/va_enc_h264.h>
+
+#include "avcodec.h"
+
+
+#define SURFACE_NUM 16 /* 16 surfaces for source YUV */
+
+typedef struct VASurface
+{
+    VASurfaceID     *surface_id;
+    unsigned int    frame_num;
+    int64_t         pts;
+} VASurface;
+
+typedef struct I264Context
+{
+    VADisplay va_dpy;
+    VAProfile h264_profile;
+    VAConfigAttrib attrib[VAConfigAttribTypeMax];
+    VAConfigAttrib config_attrib[VAConfigAttribTypeMax];
+    VAConfigID config_id;
+    VAContextID context_id;
+    VASurfaceID src_surface_id[SURFACE_NUM];
+    VASurface   src_sufrace[SURFACE_NUM];
+    VABufferID  coded_buf[SURFACE_NUM];
+    VASurfaceID ref_surface[SURFACE_NUM];
+    VAEncSequenceParameterBufferH264 seq_param;
+    VAEncPictureParameterBufferH264 pic_param;
+    VAEncSliceParameterBufferH264 slice_param;
+
+    AVFrame out_pic;
+
+    int intra_period;
+    int intra_idr_period;
+    int ip_period;
+    int b_frames;
+    int constraint_set_flag;
+    int h264_entropy_mode;
+    char *rc_mode_str;
+    int rc_mode;
+    char *profile;
+    int config_attrib_num;
+    int enc_packed_header_idx;
+    int h264_packedheader;
+    int h264_maxref;
+    int frame_bitrate;
+    int initial_qp;
+    int minimal_qp;
+
+    unsigned int num_ref_frames;
+
+    int frame_width_mbaligned;
+    int frame_height_mbaligned;
+
+    int current_frame_type;
+
+    unsigned long long current_frame_encoding;
+    unsigned long long current_frame_display;
+    unsigned long long current_frame_num;
+    unsigned long long current_IDR_display;
+    unsigned long long last_p;
+
+    VAPictureH264 current_curr_pic;
+    VAPictureH264 reference_frames[16];
+    VAPictureH264 ref_pic_list0_P[32];
+    VAPictureH264 ref_pic_list0_B[32];
+    VAPictureH264 ref_pic_list1_B[32];
+
+    unsigned int num_short_term;
+
+    int nb_surfaces_loaded;
+
+    int initial_cpb_removal_delay;
+    int initial_cpb_removal_delay_length;
+    int cpb_removal_delay;
+    int cpb_removal_delay_length;
+    int dpb_output_delay_length;
+
+    int64_t duration;
+
+} I264Context;
+
+#endif /* AVCODEC_LIBI264_H */
diff --git a/libavcodec/libi264_param_set.c b/libavcodec/libi264_param_set.c
new file mode 100644
index 0000000..7052e84
--- /dev/null
+++ b/libavcodec/libi264_param_set.c
@@ -0,0 +1,425 @@
+/*
+ * Copyright (C) 2015 Bryan Christ
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdlib.h>
+#include <assert.h>
+
+#include "libi264_param_set.h"
+
+
+static unsigned int
+va_swap32(unsigned int val)
+{
+    unsigned char *pval = (unsigned char *)&val;
+
+    return ((pval[0] << 24)     |
+            (pval[1] << 16)     |
+            (pval[2] << 8)      |
+            (pval[3] << 0));
+}
+
+void ff_i264_bitstream_start(I264Bitstream *bs)
+{
+    bs->max_size_in_dword = BITSTREAM_ALLOCATE_STEPPING;
+    bs->buffer = calloc(bs->max_size_in_dword * sizeof(int), 1);
+    bs->bit_offset = 0;
+}
+
+void ff_i264_bitstream_end(I264Bitstream *bs)
+{
+    int pos = (bs->bit_offset >> 5);
+    int bit_offset = (bs->bit_offset & 0x1f);
+    int bit_left = 32 - bit_offset;
+
+    if (bit_offset) {
+        bs->buffer[pos] = va_swap32((bs->buffer[pos] << bit_left));
+    }
+}
+
+static void
+bitstream_put_ui(I264Bitstream *bs, unsigned int val, int size_in_bits)
+{
+    int pos = (bs->bit_offset >> 5);
+    int bit_offset = (bs->bit_offset & 0x1f);
+    int bit_left = 32 - bit_offset;
+
+    if (!size_in_bits)
+        return;
+
+    bs->bit_offset += size_in_bits;
+
+    if (bit_left > size_in_bits) {
+        bs->buffer[pos] = (bs->buffer[pos] << size_in_bits | val);
+    } else {
+        size_in_bits -= bit_left;
+        bs->buffer[pos] = (bs->buffer[pos] << bit_left) | (val >> size_in_bits);
+        bs->buffer[pos] = va_swap32(bs->buffer[pos]);
+
+        if (pos + 1 == bs->max_size_in_dword) {
+            bs->max_size_in_dword += BITSTREAM_ALLOCATE_STEPPING;
+            bs->buffer = realloc(bs->buffer, bs->max_size_in_dword * sizeof(unsigned int));
+        }
+
+        bs->buffer[pos + 1] = val;
+    }
+}
+
+static void
+bitstream_put_ue(I264Bitstream *bs, unsigned int val)
+{
+    int size_in_bits = 0;
+    int tmp_val = ++val;
+
+    while (tmp_val) {
+        tmp_val >>= 1;
+        size_in_bits++;
+    }
+
+    bitstream_put_ui(bs, 0, size_in_bits - 1); // leading zero
+    bitstream_put_ui(bs, val, size_in_bits);
+}
+
+static void
+bitstream_put_se(I264Bitstream *bs, int val)
+{
+    unsigned int new_val;
+
+    if (val <= 0)
+        new_val = -2 * val;
+    else
+        new_val = 2 * val - 1;
+
+    bitstream_put_ue(bs, new_val);
+}
+
+static void
+bitstream_byte_aligning(I264Bitstream *bs, int bit)
+{
+    int bit_offset = (bs->bit_offset & 0x7);
+    int bit_left = 8 - bit_offset;
+    int new_val;
+
+    if (!bit_offset)
+        return;
+
+    assert(bit == 0 || bit == 1);
+
+    if (bit)
+        new_val = (1 << bit_left) - 1;
+    else
+        new_val = 0;
+
+    bitstream_put_ui(bs, new_val, bit_left);
+}
+
+static void
+rbsp_trailing_bits(I264Bitstream *bs)
+{
+    bitstream_put_ui(bs, 1, 1);
+    bitstream_byte_aligning(bs, 0);
+}
+
+void ff_i264_nal_start_code_prefix(I264Bitstream *bs)
+{
+    bitstream_put_ui(bs, 0x00000001, 32);
+}
+
+void ff_i264_nal_header(I264Bitstream *bs, int nal_ref_idc, int nal_unit_type)
+{
+    bitstream_put_ui(bs, 0, 1);                /* forbidden_zero_bit: 0 */
+    bitstream_put_ui(bs, nal_ref_idc, 2);
+    bitstream_put_ui(bs, nal_unit_type, 5);
+}
+
+void ff_i264_sps_rbsp(I264Context *ctx, VAEncSequenceParameterBufferH264 *seq_param, I264Bitstream *bs)
+{
+    int profile_idc = PROFILE_IDC_BASELINE;
+
+    if (ctx->h264_profile  == VAProfileH264High)
+        profile_idc = PROFILE_IDC_HIGH;
+    else if (ctx->h264_profile  == VAProfileH264Main)
+        profile_idc = PROFILE_IDC_MAIN;
+
+    bitstream_put_ui(bs, profile_idc, 8);               /* profile_idc */
+    bitstream_put_ui(bs, !!(ctx->constraint_set_flag & 1), 1);                         /* constraint_set0_flag */
+    bitstream_put_ui(bs, !!(ctx->constraint_set_flag & 2), 1);                         /* constraint_set1_flag */
+    bitstream_put_ui(bs, !!(ctx->constraint_set_flag & 4), 1);                         /* constraint_set2_flag */
+    bitstream_put_ui(bs, !!(ctx->constraint_set_flag & 8), 1);                         /* constraint_set3_flag */
+    bitstream_put_ui(bs, 0, 4);                         /* reserved_zero_4bits */
+    bitstream_put_ui(bs, seq_param->level_idc, 8);      /* level_idc */
+    bitstream_put_ue(bs, seq_param->seq_parameter_set_id);      /* seq_parameter_set_id */
+
+    if ( profile_idc == PROFILE_IDC_HIGH) {
+        bitstream_put_ue(bs, 1);        /* chroma_format_idc = 1, 4:2:0 */
+        bitstream_put_ue(bs, 0);        /* bit_depth_luma_minus8 */
+        bitstream_put_ue(bs, 0);        /* bit_depth_chroma_minus8 */
+        bitstream_put_ui(bs, 0, 1);     /* qpprime_y_zero_transform_bypass_flag */
+        bitstream_put_ui(bs, 0, 1);     /* seq_scaling_matrix_present_flag */
+    }
+
+    bitstream_put_ue(bs, seq_param->seq_fields.bits.log2_max_frame_num_minus4); /* log2_max_frame_num_minus4 */
+    bitstream_put_ue(bs, seq_param->seq_fields.bits.pic_order_cnt_type);        /* pic_order_cnt_type */
+
+    if (seq_param->seq_fields.bits.pic_order_cnt_type == 0)
+        bitstream_put_ue(bs, seq_param->seq_fields.bits.log2_max_pic_order_cnt_lsb_minus4);     /* log2_max_pic_order_cnt_lsb_minus4 */
+    else {
+        assert(0);
+    }
+
+    bitstream_put_ue(bs, seq_param->max_num_ref_frames);        /* num_ref_frames */
+    bitstream_put_ui(bs, 0, 1);                                 /* gaps_in_frame_num_value_allowed_flag */
+
+    bitstream_put_ue(bs, seq_param->picture_width_in_mbs - 1);  /* pic_width_in_mbs_minus1 */
+    bitstream_put_ue(bs, seq_param->picture_height_in_mbs - 1); /* pic_height_in_map_units_minus1 */
+    bitstream_put_ui(bs, seq_param->seq_fields.bits.frame_mbs_only_flag, 1);    /* frame_mbs_only_flag */
+
+    if (!seq_param->seq_fields.bits.frame_mbs_only_flag) {
+        assert(0);
+    }
+
+    bitstream_put_ui(bs, seq_param->seq_fields.bits.direct_8x8_inference_flag, 1);      /* direct_8x8_inference_flag */
+    bitstream_put_ui(bs, seq_param->frame_cropping_flag, 1);            /* frame_cropping_flag */
+
+    if (seq_param->frame_cropping_flag) {
+        bitstream_put_ue(bs, seq_param->frame_crop_left_offset);        /* frame_crop_left_offset */
+        bitstream_put_ue(bs, seq_param->frame_crop_right_offset);       /* frame_crop_right_offset */
+        bitstream_put_ue(bs, seq_param->frame_crop_top_offset);         /* frame_crop_top_offset */
+        bitstream_put_ue(bs, seq_param->frame_crop_bottom_offset);      /* frame_crop_bottom_offset */
+    }
+
+    if(ctx->frame_bitrate <= 0)
+        bitstream_put_ui(bs, 0, 1); /* vui_parameters_present_flag */
+    else {
+        bitstream_put_ui(bs, 1, 1); /* vui_parameters_present_flag */
+        bitstream_put_ui(bs, 0, 1); /* aspect_ratio_info_present_flag */
+        bitstream_put_ui(bs, 0, 1); /* overscan_info_present_flag */
+        bitstream_put_ui(bs, 0, 1); /* video_signal_type_present_flag */
+        bitstream_put_ui(bs, 0, 1); /* chroma_loc_info_present_flag */
+        bitstream_put_ui(bs, 1, 1); /* timing_info_present_flag */
+        {
+            bitstream_put_ui(bs, 15, 32);
+            bitstream_put_ui(bs, 900, 32);
+            bitstream_put_ui(bs, 1, 1);
+        }
+        bitstream_put_ui(bs, 1, 1); /* nal_hrd_parameters_present_flag */
+        {
+            // hrd_parameters
+            bitstream_put_ue(bs, 0);    /* cpb_cnt_minus1 */
+            bitstream_put_ui(bs, 4, 4); /* bit_rate_scale */
+            bitstream_put_ui(bs, 6, 4); /* cpb_size_scale */
+
+            bitstream_put_ue(bs, ctx->frame_bitrate / 1024 - 1); /* bit_rate_value_minus1[0] */
+            bitstream_put_ue(bs, ctx->frame_bitrate / 1024 * 8 - 1); /* cpb_size_value_minus1[0] */
+            bitstream_put_ui(bs, 1, 1);  /* cbr_flag[0] */
+
+            bitstream_put_ui(bs, 23, 5);   /* initial_cpb_removal_delay_length_minus1 */
+            bitstream_put_ui(bs, 23, 5);   /* cpb_removal_delay_length_minus1 */
+            bitstream_put_ui(bs, 23, 5);   /* dpb_output_delay_length_minus1 */
+            bitstream_put_ui(bs, 23, 5);   /* time_offset_length  */
+        }
+        bitstream_put_ui(bs, 0, 1);   /* vcl_hrd_parameters_present_flag */
+        bitstream_put_ui(bs, 0, 1);   /* low_delay_hrd_flag */
+
+        bitstream_put_ui(bs, 0, 1); /* pic_struct_present_flag */
+        bitstream_put_ui(bs, 0, 1); /* bitstream_restriction_flag */
+    }
+
+    rbsp_trailing_bits(bs);     /* rbsp_trailing_bits */
+}
+
+
+void ff_i264_pps_rbsp(VAEncPictureParameterBufferH264 *pic_param, I264Bitstream *bs)
+{
+    bitstream_put_ue(bs, pic_param->pic_parameter_set_id);      /* pic_parameter_set_id */
+    bitstream_put_ue(bs, pic_param->seq_parameter_set_id);      /* seq_parameter_set_id */
+
+    bitstream_put_ui(bs, pic_param->pic_fields.bits.entropy_coding_mode_flag, 1);  /* entropy_coding_mode_flag */
+
+    bitstream_put_ui(bs, 0, 1);                         /* pic_order_present_flag: 0 */
+
+    bitstream_put_ue(bs, 0);                            /* num_slice_groups_minus1 */
+
+    bitstream_put_ue(bs, pic_param->num_ref_idx_l0_active_minus1);      /* num_ref_idx_l0_active_minus1 */
+    bitstream_put_ue(bs, pic_param->num_ref_idx_l1_active_minus1);      /* num_ref_idx_l1_active_minus1 1 */
+
+    bitstream_put_ui(bs, pic_param->pic_fields.bits.weighted_pred_flag, 1);     /* weighted_pred_flag: 0 */
+    bitstream_put_ui(bs, pic_param->pic_fields.bits.weighted_bipred_idc, 2);    /* weighted_bipred_idc: 0 */
+
+    bitstream_put_se(bs, pic_param->pic_init_qp - 26);  /* pic_init_qp_minus26 */
+    bitstream_put_se(bs, 0);                            /* pic_init_qs_minus26 */
+    bitstream_put_se(bs, 0);                            /* chroma_qp_index_offset */
+
+    bitstream_put_ui(bs, pic_param->pic_fields.bits.deblocking_filter_control_present_flag, 1); /* deblocking_filter_control_present_flag */
+    bitstream_put_ui(bs, 0, 1);                         /* constrained_intra_pred_flag */
+    bitstream_put_ui(bs, 0, 1);                         /* redundant_pic_cnt_present_flag */
+
+    /* more_rbsp_data */
+    bitstream_put_ui(bs, pic_param->pic_fields.bits.transform_8x8_mode_flag, 1);    /*transform_8x8_mode_flag */
+    bitstream_put_ui(bs, 0, 1);                         /* pic_scaling_matrix_present_flag */
+    bitstream_put_se(bs, pic_param->second_chroma_qp_index_offset );    /*second_chroma_qp_index_offset */
+
+    rbsp_trailing_bits(bs);
+}
+
+void ff_i264_sei_rbsp(I264Context *ictx, I264Bitstream *bs)
+{
+    unsigned char *byte_buf;
+    int bp_byte_size, i, pic_byte_size;
+    I264Bitstream sei_bp_bs, sei_pic_bs;
+
+    ff_i264_bitstream_start(&sei_bp_bs);
+    bitstream_put_ue(&sei_bp_bs, 0);       /*seq_parameter_set_id*/
+    bitstream_put_ui(&sei_bp_bs, ictx->initial_cpb_removal_delay, ictx->initial_cpb_removal_delay_length);
+    bitstream_put_ui(&sei_bp_bs, 0, ictx->cpb_removal_delay_length);
+    if ( sei_bp_bs.bit_offset & 0x7) {
+        bitstream_put_ui(&sei_bp_bs, 1, 1);
+    }
+    ff_i264_bitstream_end(&sei_bp_bs);
+    bp_byte_size = (sei_bp_bs.bit_offset + 7) / 8;
+
+    ff_i264_bitstream_start(&sei_pic_bs);
+    bitstream_put_ui(&sei_pic_bs, ictx->cpb_removal_delay*ictx->current_frame_encoding, ictx->cpb_removal_delay_length);
+    bitstream_put_ui(&sei_pic_bs, 0, ictx->dpb_output_delay_length);
+    if ( sei_pic_bs.bit_offset & 0x7) {
+        bitstream_put_ui(&sei_pic_bs, 1, 1);
+    }
+    ff_i264_bitstream_end(&sei_pic_bs);
+    pic_byte_size = (sei_pic_bs.bit_offset + 7) / 8;
+
+
+    /* Write the SEI buffer period data */
+    bitstream_put_ui(bs, 0, 8);
+    bitstream_put_ui(bs, bp_byte_size, 8);
+
+    byte_buf = (unsigned char *)sei_bp_bs.buffer;
+    for(i = 0; i < bp_byte_size; i++) {
+        bitstream_put_ui(bs, byte_buf[i], 8);
+    }
+    free(byte_buf);
+
+    /* write the SEI timing data */
+    bitstream_put_ui(bs, 0x01, 8);
+    bitstream_put_ui(bs, pic_byte_size, 8);
+
+    byte_buf = (unsigned char *)sei_pic_bs.buffer;
+    for(i = 0; i < pic_byte_size; i++) {
+        bitstream_put_ui(bs, byte_buf[i], 8);
+    }
+    free(byte_buf);
+
+    rbsp_trailing_bits(bs);
+}
+
+void ff_i264_slice_header(I264Context *ictx, I264Bitstream *bs)
+{
+    int first_mb_in_slice = ictx->slice_param.macroblock_address;
+
+    bitstream_put_ue(bs, first_mb_in_slice);                             /* first_mb_in_slice: 0 */
+    bitstream_put_ue(bs, ictx->slice_param.slice_type);                  /* slice_type */
+    bitstream_put_ue(bs, ictx->slice_param.pic_parameter_set_id);        /* pic_parameter_set_id: 0 */
+    bitstream_put_ui(bs,
+                     ictx->pic_param.frame_num,
+                     ictx->seq_param.seq_fields.bits.log2_max_frame_num_minus4 + 4); /* frame_num */
+
+    /* frame_mbs_only_flag == 1 */
+    if (!ictx->seq_param.seq_fields.bits.frame_mbs_only_flag) {
+        /* FIXME: */
+        assert(0);
+    }
+
+    if (ictx->pic_param.pic_fields.bits.idr_pic_flag)
+        bitstream_put_ue(bs, ictx->slice_param.idr_pic_id);       /* idr_pic_id: 0 */
+
+    if (ictx->seq_param.seq_fields.bits.pic_order_cnt_type == 0) {
+        bitstream_put_ui(bs, ictx->pic_param.CurrPic.TopFieldOrderCnt, ictx->seq_param.seq_fields.bits.log2_max_pic_order_cnt_lsb_minus4 + 4);
+        /* pic_order_present_flag == 0 */
+    } else {
+        /* FIXME: */
+        assert(0);
+    }
+
+    /* redundant_pic_cnt_present_flag == 0 */
+    /* slice type */
+    if (IS_P_SLICE(ictx->slice_param.slice_type)) {
+        bitstream_put_ui(bs, ictx->slice_param.num_ref_idx_active_override_flag, 1);            /* num_ref_idx_active_override_flag: */
+
+        if (ictx->slice_param.num_ref_idx_active_override_flag)
+            bitstream_put_ue(bs, ictx->slice_param.num_ref_idx_l0_active_minus1);
+
+        /* ref_pic_list_reordering */
+        bitstream_put_ui(bs, 0, 1);            /* ref_pic_list_reordering_flag_l0: 0 */
+    } else if (IS_B_SLICE(ictx->slice_param.slice_type)) {
+        bitstream_put_ui(bs, ictx->slice_param.direct_spatial_mv_pred_flag, 1);            /* direct_spatial_mv_pred: 1 */
+
+        bitstream_put_ui(bs, ictx->slice_param.num_ref_idx_active_override_flag, 1);       /* num_ref_idx_active_override_flag: */
+
+        if (ictx->slice_param.num_ref_idx_active_override_flag) {
+            bitstream_put_ue(bs, ictx->slice_param.num_ref_idx_l0_active_minus1);
+            bitstream_put_ue(bs, ictx->slice_param.num_ref_idx_l1_active_minus1);
+        }
+
+        /* ref_pic_list_reordering */
+        bitstream_put_ui(bs, 0, 1);            /* ref_pic_list_reordering_flag_l0: 0 */
+        bitstream_put_ui(bs, 0, 1);            /* ref_pic_list_reordering_flag_l1: 0 */
+    }
+
+    if ((ictx->pic_param.pic_fields.bits.weighted_pred_flag &&
+         IS_P_SLICE(ictx->slice_param.slice_type)) ||
+        ((ictx->pic_param.pic_fields.bits.weighted_bipred_idc == 1) &&
+         IS_B_SLICE(ictx->slice_param.slice_type))) {
+        /* FIXME: fill weight/offset table */
+        assert(0);
+    }
+
+    /* dec_ref_pic_marking */
+    if (ictx->pic_param.pic_fields.bits.reference_pic_flag) {     /* nal_ref_idc != 0 */
+        unsigned char no_output_of_prior_pics_flag = 0;
+        unsigned char long_term_reference_flag = 0;
+        unsigned char adaptive_ref_pic_marking_mode_flag = 0;
+
+        if (ictx->pic_param.pic_fields.bits.idr_pic_flag) {
+            bitstream_put_ui(bs, no_output_of_prior_pics_flag, 1);            /* no_output_of_prior_pics_flag: 0 */
+            bitstream_put_ui(bs, long_term_reference_flag, 1);            /* long_term_reference_flag: 0 */
+        } else {
+            bitstream_put_ui(bs, adaptive_ref_pic_marking_mode_flag, 1);            /* adaptive_ref_pic_marking_mode_flag: 0 */
+        }
+    }
+
+    if (ictx->pic_param.pic_fields.bits.entropy_coding_mode_flag &&
+        !IS_I_SLICE(ictx->slice_param.slice_type))
+        bitstream_put_ue(bs, ictx->slice_param.cabac_init_idc);               /* cabac_init_idc: 0 */
+
+    bitstream_put_se(bs, ictx->slice_param.slice_qp_delta);                   /* slice_qp_delta: 0 */
+
+    /* ignore for SP/SI */
+
+    if (ictx->pic_param.pic_fields.bits.deblocking_filter_control_present_flag) {
+        bitstream_put_ue(bs, ictx->slice_param.disable_deblocking_filter_idc);           /* disable_deblocking_filter_idc: 0 */
+
+        if (ictx->slice_param.disable_deblocking_filter_idc != 1) {
+            bitstream_put_se(bs, ictx->slice_param.slice_alpha_c0_offset_div2);          /* slice_alpha_c0_offset_div2: 2 */
+            bitstream_put_se(bs, ictx->slice_param.slice_beta_offset_div2);              /* slice_beta_offset_div2: 2 */
+        }
+    }
+
+    if (ictx->pic_param.pic_fields.bits.entropy_coding_mode_flag) {
+        bitstream_byte_aligning(bs, 1);
+    }
+}
diff --git a/libavcodec/libi264_param_set.h b/libavcodec/libi264_param_set.h
new file mode 100644
index 0000000..c8f15dd
--- /dev/null
+++ b/libavcodec/libi264_param_set.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2015 Bryan Christ
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_LIBI264_PARAM_SET_H
+#define AVCODEC_LIBI264_PARAM_SET_H
+
+#include <va/va.h>
+#include <va/va_enc_h264.h>
+
+#include "libi264.h"
+
+
+#define NAL_REF_IDC_NONE        0
+#define NAL_REF_IDC_LOW         1
+#define NAL_REF_IDC_MEDIUM      2
+#define NAL_REF_IDC_HIGH        3
+
+#define NAL_NON_IDR             1
+#define NAL_IDR                 5
+#define NAL_SPS                 7
+#define NAL_PPS                 8
+#define NAL_SEI         6
+
+#define SLICE_TYPE_P            0
+#define SLICE_TYPE_B            1
+#define SLICE_TYPE_I            2
+#define IS_P_SLICE(type) (SLICE_TYPE_P == (type))
+#define IS_B_SLICE(type) (SLICE_TYPE_B == (type))
+#define IS_I_SLICE(type) (SLICE_TYPE_I == (type))
+
+#define ENTROPY_MODE_CAVLC      0
+#define ENTROPY_MODE_CABAC      1
+
+#define PROFILE_IDC_BASELINE    66
+#define PROFILE_IDC_MAIN        77
+#define PROFILE_IDC_HIGH        100
+
+#define BITSTREAM_ALLOCATE_STEPPING     4096
+
+
+typedef struct I264Bitstream {
+    unsigned int *buffer;
+    int bit_offset;
+    int max_size_in_dword;
+} I264Bitstream;
+
+
+void ff_i264_bitstream_start(I264Bitstream *bs);
+
+void ff_i264_bitstream_end(I264Bitstream *bs);
+
+void ff_i264_nal_start_code_prefix(I264Bitstream *bs);
+
+void ff_i264_nal_header(I264Bitstream *bs, int nal_ref_idc, int nal_unit_type);
+
+void ff_i264_sps_rbsp(I264Context *ctx, VAEncSequenceParameterBufferH264 *seq_param, I264Bitstream *bs);
+
+void ff_i264_pps_rbsp(VAEncPictureParameterBufferH264 *pic_param, I264Bitstream *bs);
+
+void ff_i264_sei_rbsp(I264Context *ictx, I264Bitstream *bs);
+
+void ff_i264_slice_header(I264Context *ictx, I264Bitstream *bs);
+
+#endif /* AVCODEC_LIBI264_PARAM_SET_H */
diff --git a/libavcodec/libi264_va_display.c b/libavcodec/libi264_va_display.c
new file mode 100644
index 0000000..f04bf6b
--- /dev/null
+++ b/libavcodec/libi264_va_display.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2012 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <va/va.h>
+
+#include "libi264_va_display.h"
+#include "libavutil/log.h"
+
+extern const VADisplayHooks va_display_hooks_x11;
+extern const VADisplayHooks va_display_hooks_drm;
+
+static const VADisplayHooks *g_display_hooks;
+static const VADisplayHooks *g_display_hooks_available[] = {
+    &va_display_hooks_drm,
+    &va_display_hooks_x11,
+};
+
+VADisplay
+ff_va_open_display(void)
+{
+    VADisplay va_dpy = NULL;
+    unsigned int i;
+
+    for (i = 0; !va_dpy && g_display_hooks_available[i]; i++) {
+        g_display_hooks = g_display_hooks_available[i];
+
+        if (!g_display_hooks->open_display)
+            continue;
+
+        va_dpy = g_display_hooks->open_display();
+    }
+
+    if (!va_dpy)  {
+        av_log(NULL, AV_LOG_ERROR, "error: failed to initialize display\n");
+    }
+    return va_dpy;
+}
+
+void
+ff_va_close_display(VADisplay va_dpy)
+{
+    if (!va_dpy)
+        return;
+
+    if (g_display_hooks && g_display_hooks->close_display)
+        g_display_hooks->close_display(va_dpy);
+}
+
+VAStatus
+ff_va_put_surface(
+    VADisplay          va_dpy,
+    VASurfaceID        surface,
+    const VARectangle *src_rect,
+    const VARectangle *dst_rect
+)
+{
+    if (!va_dpy)
+        return VA_STATUS_ERROR_INVALID_DISPLAY;
+
+    if (g_display_hooks && g_display_hooks->put_surface)
+        return g_display_hooks->put_surface(va_dpy, surface, src_rect, dst_rect);
+    return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
diff --git a/libavcodec/libi264_va_display.h b/libavcodec/libi264_va_display.h
new file mode 100644
index 0000000..bbfb91c
--- /dev/null
+++ b/libavcodec/libi264_va_display.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2012 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VA_DISPLAY_H
+#define AVCODEC_VA_DISPLAY_H
+
+#include <va/va.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+    const char *name;
+    VADisplay (*open_display)   (void);
+    void      (*close_display)  (VADisplay va_dpy);
+    VAStatus  (*put_surface)    (VADisplay va_dpy, VASurfaceID surface,
+                                 const VARectangle *src_rect,
+                                 const VARectangle *dst_rect);
+} VADisplayHooks;
+
+VADisplay
+ff_va_open_display(void);
+
+void
+ff_va_close_display(VADisplay va_dpy);
+
+VAStatus
+ff_va_put_surface(
+    VADisplay          va_dpy,
+    VASurfaceID        surface,
+    const VARectangle *src_rect,
+    const VARectangle *dst_rect
+);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* AVCODEC_VA_DISPLAY_H */
diff --git a/libavcodec/libi264_va_display_drm.c b/libavcodec/libi264_va_display_drm.c
new file mode 100644
index 0000000..9d05db2
--- /dev/null
+++ b/libavcodec/libi264_va_display_drm.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2012 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <dlfcn.h>
+
+#ifdef IN_LIBVA
+# include "va/drm/va_drm.h"
+#else
+# include <va/va_drm.h>
+#endif
+#include "libi264_va_display.h"
+#include "libavutil/log.h"
+
+static int drm_fd = -1;
+
+static VADisplay
+va_open_display_drm(void)
+{
+    drm_fd = open("/dev/dri/card0", O_RDWR);
+    if (drm_fd < 0) 
+    {
+       av_log(NULL, AV_LOG_ERROR, "error: can't open DRM connection!\n");
+       return NULL;
+    }
+    return vaGetDisplayDRM(drm_fd);
+}
+
+static void
+va_close_display_drm(VADisplay va_dpy)
+{
+    if (drm_fd < 0)
+        return;
+
+    close(drm_fd);
+    drm_fd = -1;
+}
+
+
+static VAStatus
+va_put_surface_drm(
+    VADisplay          va_dpy,
+    VASurfaceID        surface,
+    const VARectangle *src_rect,
+    const VARectangle *dst_rect
+)
+{
+    return VA_STATUS_ERROR_OPERATION_FAILED;
+}
+
+const VADisplayHooks va_display_hooks_drm = {
+    "drm",
+    va_open_display_drm,
+    va_close_display_drm,
+    va_put_surface_drm,
+};
diff --git a/libavcodec/libi264_va_display_x11.c b/libavcodec/libi264_va_display_x11.c
new file mode 100644
index 0000000..6930a5c
--- /dev/null
+++ b/libavcodec/libi264_va_display_x11.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2012 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <va/va_x11.h>
+
+#include "libi264_va_display.h"
+#include "libavutil/log.h"
+
+static Display *x11_display;
+static Window   x11_window;
+
+static VADisplay
+va_open_display_x11(void)
+{
+    Display *local_display = NULL;
+    int     i = 0;
+    char    *display_names[] = { ":0", ":1", NULL };
+
+    do
+    {
+        local_display = XOpenDisplay(display_names[i++]);
+
+        if(local_display != NULL)  // found a display
+            break;
+    } while(display_names[i] != NULL);
+
+    if(local_display == NULL)
+    {
+        av_log(NULL, AV_LOG_ERROR, "error: can't connect to X server!\n");
+        return NULL;
+    }
+   
+    x11_display = local_display;
+    return vaGetDisplay(x11_display);
+}
+
+static void
+va_close_display_x11(VADisplay va_dpy)
+{
+    if (!x11_display)
+        return;
+
+    if (x11_window) {
+        XUnmapWindow(x11_display, x11_window);
+        XDestroyWindow(x11_display, x11_window);
+        x11_window = None;
+    }
+    XCloseDisplay(x11_display);
+    x11_display = NULL;
+}
+
+static int
+ensure_window(unsigned int width, unsigned int height)
+{
+    Window win, rootwin;
+    unsigned int black_pixel, white_pixel;
+    int screen;
+
+    if (!x11_display)
+        return 0;
+
+    if (x11_window) {
+        XResizeWindow(x11_display, x11_window, width, height);
+        return 1;
+    }
+
+    screen      = DefaultScreen(x11_display);
+    rootwin     = RootWindow(x11_display, screen);
+    black_pixel = BlackPixel(x11_display, screen);
+    white_pixel = WhitePixel(x11_display, screen);
+
+    win = XCreateSimpleWindow(
+        x11_display,
+        rootwin,
+        0, 0, width, height,
+        1, black_pixel, white_pixel
+    );
+    if (!win)
+        return 0;
+    x11_window = win;
+
+    XMapWindow(x11_display, x11_window);
+    XSync(x11_display, False);
+    return 1;
+}
+
+static inline bool
+validate_rect(const VARectangle *rect)
+{
+    return (rect            &&
+            rect->x >= 0    &&
+            rect->y >= 0    &&
+            rect->width > 0 &&
+            rect->height > 0);
+}
+
+static VAStatus
+va_put_surface_x11(
+    VADisplay          va_dpy,
+    VASurfaceID        surface,
+    const VARectangle *src_rect,
+    const VARectangle *dst_rect
+)
+{
+    unsigned int win_width, win_height;
+
+    if (!va_dpy)
+        return VA_STATUS_ERROR_INVALID_DISPLAY;
+    if (surface == VA_INVALID_SURFACE)
+        return VA_STATUS_ERROR_INVALID_SURFACE;
+    if (!validate_rect(src_rect) || !validate_rect(dst_rect))
+        return VA_STATUS_ERROR_INVALID_PARAMETER;
+
+    win_width  = dst_rect->x + dst_rect->width;
+    win_height = dst_rect->y + dst_rect->height;
+    if (!ensure_window(win_width, win_height))
+        return VA_STATUS_ERROR_ALLOCATION_FAILED;
+    return vaPutSurface(va_dpy, surface, x11_window,
+                        src_rect->x, src_rect->y,
+                        src_rect->width, src_rect->height,
+                        dst_rect->x, dst_rect->y,
+                        dst_rect->width, dst_rect->height,
+                        NULL, 0,
+                        VA_FRAME_PICTURE);
+}
+
+const VADisplayHooks va_display_hooks_x11 = {
+    "x11",
+    va_open_display_x11,
+    va_close_display_x11,
+    va_put_surface_x11,
+};
diff --git a/libavcodec/version.h b/libavcodec/version.h
index b17b794..9846275 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -29,7 +29,7 @@
 #include "libavutil/version.h"
 
 #define LIBAVCODEC_VERSION_MAJOR  57
-#define LIBAVCODEC_VERSION_MINOR  20
+#define LIBAVCODEC_VERSION_MINOR  21
 #define LIBAVCODEC_VERSION_MICRO 100
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
-- 
1.9.1



More information about the ffmpeg-devel mailing list