[FFmpeg-cvslog] vp8: use a fixed-size edge emu buffer

Tue Feb 4 16:26:55 CET 2014

ffmpeg | branch: master | Anton Khirnov <anton at khirnov.net> | Mon Jan 20 13:12:54 2014 +0100| [e46ad30a808744ddf3855567e162292a4eaabac7] | committer: Anton Khirnov

vp8: use a fixed-size edge emu buffer

The reason is the same as for e588615d938f8581f0d6f3771662d08cadfc00de

Based on a patch by Ronald S. Bultje <rsbultje at gmail.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e46ad30a808744ddf3855567e162292a4eaabac7
---

 libavcodec/vp8.c |   25 +++++++++++--------------
 libavcodec/vp8.h |    4 +++-
 2 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index 6318f94..30e24cc 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -45,7 +45,6 @@ static void free_buffers(VP8Context *s)
             pthread_mutex_destroy(&s->thread_data[i].lock);
 #endif
             av_freep(&s->thread_data[i].filter_strength);
-            av_freep(&s->thread_data[i].edge_emu_buffer);
         }
     av_freep(&s->thread_data);
     av_freep(&s->macroblocks_base);
@@ -1161,6 +1160,7 @@ void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
     uint8_t *src = ref->f->data[0];
 
     if (AV_RN32A(mv)) {
+        int src_linesize = linesize;
 
         int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
         int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
@@ -1175,12 +1175,13 @@ void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
             y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
             s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
                                      src - my_idx * linesize - mx_idx,
-                                     linesize, linesize,
+                                     EDGE_EMU_LINESIZE, linesize,
                                      block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
                                      x_off - mx_idx, y_off - my_idx, width, height);
-            src = td->edge_emu_buffer + mx_idx + linesize * my_idx;
+            src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
+            src_linesize = EDGE_EMU_LINESIZE;
         }
-        mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
+        mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
     } else {
         ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
         mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
@@ -1227,19 +1228,19 @@ void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst
             y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
             s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
                                      src1 - my_idx * linesize - mx_idx,
-                                     linesize, linesize,
+                                     EDGE_EMU_LINESIZE, linesize,
                                      block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
                                      x_off - mx_idx, y_off - my_idx, width, height);
-            src1 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
-            mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
+            src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
+            mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
 
             s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
                                      src2 - my_idx * linesize - mx_idx,
-                                     linesize, linesize,
+                                     EDGE_EMU_LINESIZE, linesize,
                                      block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
                                      x_off - mx_idx, y_off - my_idx, width, height);
-            src2 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
-            mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
+            src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE* my_idx;
+            mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
         } else {
             mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
             mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
@@ -1918,10 +1919,6 @@ int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     s->linesize   = curframe->tf.f->linesize[0];
     s->uvlinesize = curframe->tf.f->linesize[1];
 
-    if (!s->thread_data[0].edge_emu_buffer)
-        for (i = 0; i < MAX_THREADS; i++)
-            s->thread_data[i].edge_emu_buffer = av_malloc(21*s->linesize);
-
     memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
     /* Zero macroblock structures for top/top-left prediction from outside the frame. */
     if (!s->mb_layout)
diff --git a/libavcodec/vp8.h b/libavcodec/vp8.h
index 6555629..6d864b9 100644
--- a/libavcodec/vp8.h
+++ b/libavcodec/vp8.h
@@ -120,7 +120,9 @@ typedef struct VP8ThreadData {
 #endif
     int thread_mb_pos; // (mb_y << 16) | (mb_x & 0xFFFF)
     int wait_mb_pos; // What the current thread is waiting on.
-    uint8_t *edge_emu_buffer;
+
+#define EDGE_EMU_LINESIZE 32
+    DECLARE_ALIGNED(16, uint8_t, edge_emu_buffer)[21 * EDGE_EMU_LINESIZE];
     VP8FilterStrength *filter_strength;
 } VP8ThreadData;