[FFmpeg-cvslog] vp8: pack struct VP8ThreadData more efficiently

Mans Rullgard git at videolan.org
Thu Aug 2 23:39:52 CEST 2012


ffmpeg | branch: master | Mans Rullgard <mans at mansr.com> | Wed Aug  1 14:01:08 2012 +0100| [cf5781fad0e67c6e49abc9b84390c0ca9485873e] | committer: Mans Rullgard

vp8: pack struct VP8ThreadData more efficiently

Reordering the members in this struct reduces the holes required
to maintain alignment.  With this order, the only remaining, and
unavoidable, hole is 3 bytes following left_nnz.

Signed-off-by: Mans Rullgard <mans at mansr.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=cf5781fad0e67c6e49abc9b84390c0ca9485873e
---

 libavcodec/vp8.h |   34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/libavcodec/vp8.h b/libavcodec/vp8.h
index 6b3caa2..a337173 100644
--- a/libavcodec/vp8.h
+++ b/libavcodec/vp8.h
@@ -94,21 +94,8 @@ typedef struct {
 } VP8Macroblock;
 
 typedef struct {
-#if HAVE_THREADS
-    pthread_mutex_t lock;
-    pthread_cond_t  cond;
-#endif
-    int thread_nr;
-    int thread_mb_pos; // (mb_y << 16) | (mb_x & 0xFFFF)
-    int wait_mb_pos; // What the current thread is waiting on.
-    uint8_t *edge_emu_buffer;
-    /**
-     * For coeff decode, we need to know whether the above block had non-zero
-     * coefficients. This means for each macroblock, we need data for 4 luma
-     * blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9
-     * per macroblock. We keep the last row in top_nnz.
-     */
-    DECLARE_ALIGNED(8, uint8_t, left_nnz)[9];
+    DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16];
+    DECLARE_ALIGNED(16, DCTELEM, block_dc)[16];
     /**
      * This is the index plus one of the last non-zero coeff
      * for each of the blocks in the current macroblock.
@@ -117,8 +104,21 @@ typedef struct {
      *     2+-> full transform
      */
     DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4];
-    DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16];
-    DECLARE_ALIGNED(16, DCTELEM, block_dc)[16];
+    /**
+     * For coeff decode, we need to know whether the above block had non-zero
+     * coefficients. This means for each macroblock, we need data for 4 luma
+     * blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9
+     * per macroblock. We keep the last row in top_nnz.
+     */
+    DECLARE_ALIGNED(8, uint8_t, left_nnz)[9];
+    int thread_nr;
+#if HAVE_THREADS
+    pthread_mutex_t lock;
+    pthread_cond_t  cond;
+#endif
+    int thread_mb_pos; // (mb_y << 16) | (mb_x & 0xFFFF)
+    int wait_mb_pos; // What the current thread is waiting on.
+    uint8_t *edge_emu_buffer;
     VP8FilterStrength *filter_strength;
 } VP8ThreadData;
 



More information about the ffmpeg-cvslog mailing list