[FFmpeg-devel] [PATCH v4] Improved the performance of 1 decode + N filter graphs and adaptive bitrate.

Shaofei Wang shaofei.wang at intel.com
Tue Feb 12 00:41:04 EET 2019


It enabled multiple filter graph concurrency, which bring above about
4%~20% improvement in some 1:N scenarios by CPU or GPU acceleration

Below are some test cases and comparison as reference.
(Hardware platform: Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz)
(Software: Intel iHD driver - 16.9.00100, CentOS 7)

For 1:N transcode by GPU acceleration with vaapi:
./ffmpeg -vaapi_device /dev/dri/renderD128 -hwaccel vaapi \
    -hwaccel_output_format vaapi \
    -i ~/Videos/1920x1080p_30.00_x264_qp28.h264 \
    -vf "scale_vaapi=1280:720" -c:v h264_vaapi -f null /dev/null \
    -vf "scale_vaapi=720:480" -c:v h264_vaapi -f null /dev/null

    test results:
                2 encoders 5 encoders 10 encoders
    Improved       6.1%    6.9%       5.5%

For 1:N transcode by GPU acceleration with QSV:
./ffmpeg -hwaccel qsv -c:v h264_qsv \
    -i ~/Videos/1920x1080p_30.00_x264_qp28.h264 \
    -vf "scale_qsv=1280:720:format=nv12" -c:v h264_qsv -f null /dev/null \
    -vf "scale_qsv=720:480:format=nv12" -c:v h264_qsv -f null /dev/null

    test results:
                2 encoders  5 encoders 10 encoders
    Improved       6%       4%         15%

For Intel GPU acceleration case, 1 decode to N scaling, by QSV:
./ffmpeg -hwaccel qsv -c:v h264_qsv \
    -i ~/Videos/1920x1080p_30.00_x264_qp28.h264 \
    -vf "scale_qsv=1280:720:format=nv12,hwdownload" -pix_fmt nv12 -f null /dev/null \
    -vf "scale_qsv=720:480:format=nv12,hwdownload" -pix_fmt nv12 -f null /dev/null

    test results:
                2 scale  5 scale   10 scale
    Improved       12%     21%        21%

For CPU only 1 decode to N scaling:
./ffmpeg -i ~/Videos/1920x1080p_30.00_x264_qp28.h264 \
    -vf "scale=1280:720" -pix_fmt nv12 -f null /dev/null \
    -vf "scale=720:480" -pix_fmt nv12 -f null /dev/null

    test results:
                2 scale  5 scale   10 scale
    Improved       25%    107%       148%

Signed-off-by: Wang, Shaofei <shaofei.wang at intel.com>
Reviewed-by: Zhao, Jun <jun.zhao at intel.com>
---
 fftools/ffmpeg.c        | 112 +++++++++++++++++++++++++++++++++++++++++++++---
 fftools/ffmpeg.h        |  14 ++++++
 fftools/ffmpeg_filter.c |   4 ++
 3 files changed, 124 insertions(+), 6 deletions(-)

diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
index 544f1a1..67b1a2a 100644
--- a/fftools/ffmpeg.c
+++ b/fftools/ffmpeg.c
@@ -1419,13 +1419,18 @@ static void finish_output_stream(OutputStream *ost)
  *
  * @return  0 for success, <0 for severe errors
  */
-static int reap_filters(int flush)
+static int reap_filters(int flush, InputFilter * ifilter)
 {
     AVFrame *filtered_frame = NULL;
     int i;
 
-    /* Reap all buffers present in the buffer sinks */
+    /* Reap all buffers present in the buffer sinks or just reap specified
+     * input filter buffer */
     for (i = 0; i < nb_output_streams; i++) {
+        if (ifilter) {
+            if (ifilter != output_streams[i]->filter->graph->inputs[0])
+                continue;
+        }
         OutputStream *ost = output_streams[i];
         OutputFile    *of = output_files[ost->file_index];
         AVFilterContext *filter;
@@ -2179,7 +2184,8 @@ static int ifilter_send_frame(InputFilter *ifilter, AVFrame *frame)
             }
         }
 
-        ret = reap_filters(1);
+        ret = HAVE_THREADS ? reap_filters(1, ifilter) : reap_filters(1, NULL);
+
         if (ret < 0 && ret != AVERROR_EOF) {
             av_log(NULL, AV_LOG_ERROR, "Error while filtering: %s\n", av_err2str(ret));
             return ret;
@@ -2208,6 +2214,14 @@ static int ifilter_send_eof(InputFilter *ifilter, int64_t pts)
 
     ifilter->eof = 1;
 
+#if HAVE_THREADS
+    ifilter->waited_frm = NULL;
+    pthread_mutex_lock(&ifilter->process_mutex);
+    ifilter->t_end = 1;
+    pthread_cond_signal(&ifilter->process_cond);
+    pthread_mutex_unlock(&ifilter->process_mutex);
+    pthread_join(ifilter->f_thread, NULL);
+#endif
     if (ifilter->filter) {
         ret = av_buffersrc_close(ifilter->filter, pts, AV_BUFFERSRC_FLAG_PUSH);
         if (ret < 0)
@@ -2252,12 +2266,95 @@ static int decode(AVCodecContext *avctx, AVFrame *frame, int *got_frame, AVPacke
     return 0;
 }
 
+#if HAVE_THREADS
+static void *filter_pipeline(void *arg)
+{
+    InputFilter *fl = arg;
+    AVFrame *frm;
+    int ret;
+    while(1) {
+        pthread_mutex_lock(&fl->process_mutex);
+        while (fl->waited_frm == NULL && !fl->t_end)
+            pthread_cond_wait(&fl->process_cond, &fl->process_mutex);
+        pthread_mutex_unlock(&fl->process_mutex);
+
+        if (fl->t_end) break;
+
+        frm = fl->waited_frm;
+        ret = ifilter_send_frame(fl, frm);
+        if (ret < 0) {
+            av_log(NULL, AV_LOG_ERROR,
+                   "Failed to inject frame into filter network: %s\n", av_err2str(ret));
+        } else {
+            ret = reap_filters(0, fl);
+        }
+        fl->t_error = ret;
+
+        pthread_mutex_lock(&fl->finish_mutex);
+        fl->waited_frm = NULL;
+        pthread_cond_signal(&fl->finish_cond);
+        pthread_mutex_unlock(&fl->finish_mutex);
+
+        if (ret < 0)
+            break;
+    }
+    return fl;
+}
+#endif
+
 static int send_frame_to_filters(InputStream *ist, AVFrame *decoded_frame)
 {
     int i, ret;
     AVFrame *f;
 
     av_assert1(ist->nb_filters > 0); /* ensure ret is initialized */
+#if HAVE_THREADS
+    for (i = 0; i < ist->nb_filters; i++) {
+        //it will use abr_pipeline mode by default
+        if (i < ist->nb_filters - 1) {
+            f = &ist->filters[i]->input_frm;
+            ret = av_frame_ref(f, decoded_frame);
+            if (ret < 0)
+                break;
+        } else
+            f = decoded_frame;
+
+        if (!ist->filters[i]->b_abr_thread_init) {
+            if ((ret = pthread_create(&ist->filters[i]->f_thread, NULL, filter_pipeline,
+                            ist->filters[i]))) {
+                av_log(NULL, AV_LOG_ERROR,
+                        "pthread_create failed: %s. Try to increase `ulimit -v` or \
+                        decrease `ulimit -s`.\n", strerror(ret));
+                return AVERROR(ret);
+            }
+            pthread_mutex_init(&ist->filters[i]->process_mutex, NULL);
+            pthread_mutex_init(&ist->filters[i]->finish_mutex, NULL);
+            pthread_cond_init(&ist->filters[i]->process_cond, NULL);
+            pthread_cond_init(&ist->filters[i]->finish_cond, NULL);
+            ist->filters[i]->t_end = 0;
+            ist->filters[i]->t_error = 0;
+            ist->filters[i]->b_abr_thread_init = 1;
+        }
+
+        pthread_mutex_lock(&ist->filters[i]->process_mutex);
+        ist->filters[i]->waited_frm = f;
+        pthread_cond_signal(&ist->filters[i]->process_cond);
+        pthread_mutex_unlock(&ist->filters[i]->process_mutex);
+    }
+
+    for (i = 0; i < ist->nb_filters; i++) {
+        pthread_mutex_lock(&ist->filters[i]->finish_mutex);
+        while(ist->filters[i]->waited_frm != NULL)
+            pthread_cond_wait(&ist->filters[i]->finish_cond, &ist->filters[i]->finish_mutex);
+        pthread_mutex_unlock(&ist->filters[i]->finish_mutex);
+    }
+    for (i = 0; i < ist->nb_filters; i++) {
+        if (ist->filters[i]->t_error < 0) {
+            ret = ist->filters[i]->t_error;
+            break;
+        }
+    }
+#else
     for (i = 0; i < ist->nb_filters; i++) {
         if (i < ist->nb_filters - 1) {
             f = ist->filter_frame;
@@ -2266,6 +2363,7 @@ static int send_frame_to_filters(InputStream *ist, AVFrame *decoded_frame)
                 break;
         } else
             f = decoded_frame;
+
         ret = ifilter_send_frame(ist->filters[i], f);
         if (ret == AVERROR_EOF)
             ret = 0; /* ignore */
@@ -2275,6 +2373,8 @@ static int send_frame_to_filters(InputStream *ist, AVFrame *decoded_frame)
             break;
         }
     }
+#endif
+
     return ret;
 }
 
@@ -4537,10 +4637,10 @@ static int transcode_from_filter(FilterGraph *graph, InputStream **best_ist)
     *best_ist = NULL;
     ret = avfilter_graph_request_oldest(graph->graph);
     if (ret >= 0)
-        return reap_filters(0);
+        return reap_filters(0, NULL);
 
     if (ret == AVERROR_EOF) {
-        ret = reap_filters(1);
+        ret = reap_filters(1, NULL);
         for (i = 0; i < graph->nb_outputs; i++)
             close_output_stream(graph->outputs[i]->ost);
         return ret;
@@ -4642,7 +4742,7 @@ static int transcode_step(void)
     if (ret < 0)
         return ret == AVERROR_EOF ? 0 : ret;
 
-    return reap_filters(0);
+    return HAVE_THREADS ? ret : reap_filters(0, NULL);
 }
 
 /*
diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h
index eb1eaf6..9a8e776 100644
--- a/fftools/ffmpeg.h
+++ b/fftools/ffmpeg.h
@@ -253,6 +253,20 @@ typedef struct InputFilter {
 
     AVBufferRef *hw_frames_ctx;
 
+    // for abr pipeline
+    int b_abr_thread_init;
+#if HAVE_THREADS
+    AVFrame *waited_frm;
+    AVFrame input_frm;
+    pthread_t f_thread;
+    pthread_cond_t process_cond;
+    pthread_cond_t finish_cond;
+    pthread_mutex_t process_mutex;
+    pthread_mutex_t finish_mutex;
+    int t_end;
+    int t_error;
+#endif
+
     int eof;
 } InputFilter;
 
diff --git a/fftools/ffmpeg_filter.c b/fftools/ffmpeg_filter.c
index 6518d50..5d1e521 100644
--- a/fftools/ffmpeg_filter.c
+++ b/fftools/ffmpeg_filter.c
@@ -197,6 +197,7 @@ DEF_CHOOSE_FORMAT(channel_layouts, uint64_t, channel_layout, channel_layouts, 0,
 int init_simple_filtergraph(InputStream *ist, OutputStream *ost)
 {
     FilterGraph *fg = av_mallocz(sizeof(*fg));
+    int i;
 
     if (!fg)
         exit_program(1);
@@ -225,6 +226,9 @@ int init_simple_filtergraph(InputStream *ist, OutputStream *ost)
     GROW_ARRAY(ist->filters, ist->nb_filters);
     ist->filters[ist->nb_filters - 1] = fg->inputs[0];
 
+    for (i = 0; i < ist->nb_filters; i++)
+        ist->filters[i]->b_abr_thread_init = 0;
+
     GROW_ARRAY(filtergraphs, nb_filtergraphs);
     filtergraphs[nb_filtergraphs - 1] = fg;
 
-- 
1.8.3.1



More information about the ffmpeg-devel mailing list