[FFmpeg-devel] [PATCH] avfilter/vf_overlay: add slice threading

Paul B Mahol onemda at gmail.com
Sun Apr 29 11:04:12 EEST 2018


On 4/29/18, Michael Niedermayer <michael at niedermayer.cc> wrote:
> On Sat, Apr 28, 2018 at 12:00:46PM +0200, Paul B Mahol wrote:
>> Signed-off-by: Paul B Mahol <onemda at gmail.com>
>> ---
>>  libavfilter/vf_overlay.c | 281
>> ++++++++++++++++++++++++++++++++---------------
>>  1 file changed, 190 insertions(+), 91 deletions(-)
>>
>> diff --git a/libavfilter/vf_overlay.c b/libavfilter/vf_overlay.c
>> index c6a6ac82f3..cb304e9522 100644
>> --- a/libavfilter/vf_overlay.c
>> +++ b/libavfilter/vf_overlay.c
>> @@ -40,6 +40,10 @@
>>  #include "framesync.h"
>>  #include "video.h"
>>
>> +typedef struct ThreadData {
>> +    AVFrame *dst, *src;
>> +} ThreadData;
>> +
>>  static const char *const var_names[] = {
>>      "main_w",    "W", ///< width  of the main    video
>>      "main_h",    "H", ///< height of the main    video
>> @@ -124,7 +128,7 @@ typedef struct OverlayContext {
>>
>>      AVExpr *x_pexpr, *y_pexpr;
>>
>> -    void (*blend_image)(AVFilterContext *ctx, AVFrame *dst, const AVFrame
>> *src, int x, int y);
>> +    int (*blend_slice)(AVFilterContext *ctx, void *arg, int jobnr, int
>> nb_jobs);
>>  } OverlayContext;
>>
>>  static av_cold void uninit(AVFilterContext *ctx)
>> @@ -403,10 +407,10 @@ static int config_output(AVFilterLink *outlink)
>>   * Blend image in src to destination buffer dst at position (x, y).
>>   */
>>
>> -static av_always_inline void blend_image_packed_rgb(AVFilterContext
>> *ctx,
>> +static av_always_inline void blend_slice_packed_rgb(AVFilterContext
>> *ctx,
>>                                     AVFrame *dst, const AVFrame *src,
>>                                     int main_has_alpha, int x, int y,
>> -                                   int is_straight)
>> +                                   int is_straight, int jobnr, int
>> nb_jobs)
>>  {
>>      OverlayContext *s = ctx->priv;
>>      int i, imax, j, jmax;
>> @@ -425,13 +429,19 @@ static av_always_inline void
>> blend_image_packed_rgb(AVFilterContext *ctx,
>>      const int sb = s->overlay_rgba_map[B];
>>      const int sa = s->overlay_rgba_map[A];
>>      const int sstep = s->overlay_pix_step[0];
>> +    int slice_start, slice_end;
>>      uint8_t *S, *sp, *d, *dp;
>>
>>      i = FFMAX(-y, 0);
>> -    sp = src->data[0] + i     * src->linesize[0];
>> -    dp = dst->data[0] + (y+i) * dst->linesize[0];
>> +    imax = FFMIN(-y + dst_h, src_h);
>> +
>> +    slice_start = (imax * jobnr) / nb_jobs;
>> +    slice_end = (imax * (jobnr+1)) / nb_jobs;
>> +
>> +    sp = src->data[0] + (i + slice_start)     * src->linesize[0];
>> +    dp = dst->data[0] + (y + i + slice_start) * dst->linesize[0];
>>
>> -    for (imax = FFMIN(-y + dst_h, src_h); i < imax; i++) {
>> +    for (i = i + slice_start; i < slice_end; i++) {
>>          j = FFMAX(-x, 0);
>>          S = sp + j     * sstep;
>>          d = dp + (x+j) * dstep;
>> @@ -495,7 +505,9 @@ static av_always_inline void
>> blend_plane(AVFilterContext *ctx,
>>                                           int dst_offset,
>>                                           int dst_step,
>>                                           int straight,
>> -                                         int yuv)
>> +                                         int yuv,
>> +                                         int jobnr,
>> +                                         int nb_jobs)
>>  {
>>      int src_wp = AV_CEIL_RSHIFT(src_w, hsub);
>>      int src_hp = AV_CEIL_RSHIFT(src_h, vsub);
>> @@ -505,16 +517,22 @@ static av_always_inline void
>> blend_plane(AVFilterContext *ctx,
>>      int xp = x>>hsub;
>>      uint8_t *s, *sp, *d, *dp, *dap, *a, *da, *ap;
>>      int jmax, j, k, kmax;
>> +    int slice_start, slice_end;
>>
>>      j = FFMAX(-yp, 0);
>> -    sp = src->data[i] + j         * src->linesize[i];
>> +    jmax = FFMIN(-yp + dst_hp, src_hp);
>> +
>> +    slice_start = (jmax * jobnr) / nb_jobs;
>> +    slice_end = ((jmax * (jobnr+1)) / nb_jobs);
>> +
>> +    sp = src->data[i] + slice_start * src->linesize[i];
>>      dp = dst->data[dst_plane]
>> -                      + (yp+j)    * dst->linesize[dst_plane]
>> +                      + (yp + slice_start) * dst->linesize[dst_plane]
>>                        + dst_offset;
>> -    ap = src->data[3] + (j<<vsub) * src->linesize[3];
>> -    dap = dst->data[3] + ((yp+j) << vsub) * dst->linesize[3];
>> +    ap = src->data[3] + (slice_start << vsub) * src->linesize[3];
>> +    dap = dst->data[3] + ((yp + slice_start) << vsub) *
>> dst->linesize[3];
>>
>> -    for (jmax = FFMIN(-yp + dst_hp, src_hp); j < jmax; j++) {
>> +    for (j = j + slice_start; j < slice_end; j++) {
>>          k = FFMAX(-xp, 0);
>>          d = dp + (xp+k) * dst_step;
>>          s = sp + k;
>> @@ -577,17 +595,23 @@ static av_always_inline void
>> blend_plane(AVFilterContext *ctx,
>>  static inline void alpha_composite(const AVFrame *src, const AVFrame
>> *dst,
>>                                     int src_w, int src_h,
>>                                     int dst_w, int dst_h,
>> -                                   int x, int y)
>> +                                   int x, int y,
>> +                                   int jobnr, int nb_jobs)
>>  {
>>      uint8_t alpha;          ///< the amount of overlay to blend on to
>> main
>>      uint8_t *s, *sa, *d, *da;
>>      int i, imax, j, jmax;
>> +    int slice_start, slice_end;
>> +
>> +    imax = FFMIN(-y + dst_h, src_h);
>> +    slice_start = (imax * jobnr) / nb_jobs;
>> +    slice_end = ((imax * (jobnr+1)) / nb_jobs);
>>
>>      i = FFMAX(-y, 0);
>> -    sa = src->data[3] + i     * src->linesize[3];
>> -    da = dst->data[3] + (y+i) * dst->linesize[3];
>> +    sa = src->data[3] + (i + slice_start) * src->linesize[3];
>> +    da = dst->data[3] + (y + i + slice_start) * dst->linesize[3];
>>
>> -    for (imax = FFMIN(-y + dst_h, src_h); i < imax; i++) {
>
>> +    for (i = i + slice_start; i < imax; i++) {
>
> shouldnt this use slice_end ?

Yes.


More information about the ffmpeg-devel mailing list