[FFmpeg-cvslog] avcodec/jpeg2000dwt: use 32x32->64 multiplies in the 9/7i DWT

Michael Niedermayer git at videolan.org
Mon Jun 22 02:36:33 CEST 2015


ffmpeg | branch: master | Michael Niedermayer <michaelni at gmx.at> | Mon Jun 22 02:22:36 2015 +0200| [b1fdf81c6eed786742c08037a9aa662ef7967ab2] | committer: Michael Niedermayer

avcodec/jpeg2000dwt: use 32x32->64 multiplies in the 9/7i DWT

This significantly improves the quality when the integer 9/7 transform
is used

Signed-off-by: Michael Niedermayer <michaelni at gmx.at>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b1fdf81c6eed786742c08037a9aa662ef7967ab2
---

 libavcodec/jpeg2000dwt.c                 |   33 ++++++++++++++++++------
 tests/ref/fate/j2k-dwt                   |   40 +++++++++++++++---------------
 tests/ref/fate/jpeg2000-dcinema          |    4 +--
 tests/ref/vsynth/vsynth1-jpeg2000-97     |    8 +++---
 tests/ref/vsynth/vsynth2-jpeg2000-97     |    8 +++---
 tests/ref/vsynth/vsynth3-jpeg2000-97     |    8 +++---
 tests/ref/vsynth/vsynth_lena-jpeg2000-97 |    8 +++---
 7 files changed, 63 insertions(+), 46 deletions(-)

diff --git a/libavcodec/jpeg2000dwt.c b/libavcodec/jpeg2000dwt.c
index a1fe713..dff2516 100644
--- a/libavcodec/jpeg2000dwt.c
+++ b/libavcodec/jpeg2000dwt.c
@@ -40,12 +40,13 @@
 
 /* Lifting parameters in integer format.
  * Computed as param = (float param) * (1 << 16) */
-#define I_LFTG_ALPHA  103949
-#define I_LFTG_BETA     3472
-#define I_LFTG_GAMMA   57862
-#define I_LFTG_DELTA   29066
-#define I_LFTG_K       80621
-#define I_LFTG_X       53274
+#define I_LFTG_ALPHA  103949ll
+#define I_LFTG_BETA     3472ll
+#define I_LFTG_GAMMA   57862ll
+#define I_LFTG_DELTA   29066ll
+#define I_LFTG_K       80621ll
+#define I_LFTG_X       53274ll
+#define I_PRESHIFT 8
 
 static inline void extend53(int *p, int i0, int i1)
 {
@@ -246,11 +247,16 @@ static void sd_1d97_int(int *p, int i0, int i1)
 
 static void dwt_encode97_int(DWTContext *s, int *t)
 {
-    int lev,
-        w = s->linelen[s->ndeclevels-1][0];
+    int lev;
+    int w = s->linelen[s->ndeclevels-1][0];
+    int h = s->linelen[s->ndeclevels-1][1];
+    int i;
     int *line = s->i_linebuf;
     line += 5;
 
+    for (i = 0; i < w * h; i++)
+        t[i] <<= I_PRESHIFT;
+
     for (lev = s->ndeclevels-1; lev >= 0; lev--){
         int lh = s->linelen[lev][0],
             lv = s->linelen[lev][1],
@@ -294,6 +300,9 @@ static void dwt_encode97_int(DWTContext *s, int *t)
         }
 
     }
+
+    for (i = 0; i < w * h; i++)
+        t[i] = (t[i] + ((1<<I_PRESHIFT)>>1)) >> I_PRESHIFT;
 }
 
 static void sr_1d53(int *p, int i0, int i1)
@@ -471,11 +480,16 @@ static void dwt_decode97_int(DWTContext *s, int32_t *t)
 {
     int lev;
     int w       = s->linelen[s->ndeclevels - 1][0];
+    int h       = s->linelen[s->ndeclevels - 1][1];
+    int i;
     int32_t *line = s->i_linebuf;
     int32_t *data = t;
     /* position at index O of line range [0-5,w+5] cf. extend function */
     line += 5;
 
+    for (i = 0; i < w * h; i++)
+        data[i] <<= I_PRESHIFT;
+
     for (lev = 0; lev < s->ndeclevels; lev++) {
         int lh = s->linelen[lev][0],
             lv = s->linelen[lev][1],
@@ -515,6 +529,9 @@ static void dwt_decode97_int(DWTContext *s, int32_t *t)
                 data[w * i + lp] = l[i];
         }
     }
+
+    for (i = 0; i < w * h; i++)
+        data[i] = (data[i] + ((1<<I_PRESHIFT)>>1)) >> I_PRESHIFT;
 }
 
 int ff_jpeg2000_dwt_init(DWTContext *s, uint16_t border[2][2],
diff --git a/tests/ref/fate/j2k-dwt b/tests/ref/fate/j2k-dwt
index 5ede1ce..c130faf 100644
--- a/tests/ref/fate/j2k-dwt
+++ b/tests/ref/fate/j2k-dwt
@@ -1,60 +1,60 @@
 5/3i, decomp:15 border 151 170 140 183 milli-err2:        0
-9/7i, decomp:15 border 151 170 140 183 milli-err2:     5188
+9/7i, decomp:15 border 151 170 140 183 milli-err2:      544
 9/7f, decomp:15 border 151 170 140 183 err2:              0.0001
 5/3i, decomp:21 border 173 201  81 189 milli-err2:        0
-9/7i, decomp:21 border 173 201  81 189 milli-err2:     6478
+9/7i, decomp:21 border 173 201  81 189 milli-err2:      592
 9/7f, decomp:21 border 173 201  81 189 err2:              0.0001
 5/3i, decomp:22 border 213 227  76 245 milli-err2:        0
-9/7i, decomp:22 border 213 227  76 245 milli-err2:     6539
+9/7i, decomp:22 border 213 227  76 245 milli-err2:      533
 9/7f, decomp:22 border 213 227  76 245 err2:              0.0001
 5/3i, decomp:13 border 134 157 184 203 milli-err2:        0
-9/7i, decomp:13 border 134 157 184 203 milli-err2:    19203
+9/7i, decomp:13 border 134 157 184 203 milli-err2:      535
 9/7f, decomp:13 border 134 157 184 203 err2:              0.0001
 5/3i, decomp: 1 border 204 237   6 106 milli-err2:        0
-9/7i, decomp: 1 border 204 237   6 106 milli-err2:      924
+9/7i, decomp: 1 border 204 237   6 106 milli-err2:      219
 9/7f, decomp: 1 border 204 237   6 106 err2:              0.0000
 5/3i, decomp:28 border  76 211  13 210 milli-err2:        0
-9/7i, decomp:28 border  76 211  13 210 milli-err2:    17297
+9/7i, decomp:28 border  76 211  13 210 milli-err2:      791
 9/7f, decomp:28 border  76 211  13 210 err2:              0.0002
 5/3i, decomp:21 border  76  99  43 123 milli-err2:        0
-9/7i, decomp:21 border  76  99  43 123 milli-err2:     9039
+9/7i, decomp:21 border  76  99  43 123 milli-err2:      686
 9/7f, decomp:21 border  76  99  43 123 err2:              0.0001
 5/3i, decomp:15 border 192 243 174 204 milli-err2:        0
-9/7i, decomp:15 border 192 243 174 204 milli-err2:     7693
+9/7i, decomp:15 border 192 243 174 204 milli-err2:      476
 9/7f, decomp:15 border 192 243 174 204 err2:              0.0001
 5/3i, decomp:21 border  17  68  93 204 milli-err2:        0
-9/7i, decomp:21 border  17  68  93 204 milli-err2:     7810
+9/7i, decomp:21 border  17  68  93 204 milli-err2:      633
 9/7f, decomp:21 border  17  68  93 204 err2:              0.0001
 5/3i, decomp:11 border 142 168  82 174 milli-err2:        0
-9/7i, decomp:11 border 142 168  82 174 milli-err2:    18168
+9/7i, decomp:11 border 142 168  82 174 milli-err2:      696
 9/7f, decomp:11 border 142 168  82 174 err2:              0.0001
 5/3i, decomp:23 border 142 209 171 235 milli-err2:        0
-9/7i, decomp:23 border 142 209 171 235 milli-err2:     7313
+9/7i, decomp:23 border 142 209 171 235 milli-err2:      626
 9/7f, decomp:23 border 142 209 171 235 err2:              0.0001
 5/3i, decomp:30 border  37 185  79 245 milli-err2:        0
-9/7i, decomp:30 border  37 185  79 245 milli-err2:    13498
+9/7i, decomp:30 border  37 185  79 245 milli-err2:      953
 9/7f, decomp:30 border  37 185  79 245 err2:              0.0002
 5/3i, decomp: 5 border 129 236  30 243 milli-err2:        0
-9/7i, decomp: 5 border 129 236  30 243 milli-err2:     8775
+9/7i, decomp: 5 border 129 236  30 243 milli-err2:      620
 9/7f, decomp: 5 border 129 236  30 243 err2:              0.0001
 5/3i, decomp:10 border   5 160 146 247 milli-err2:        0
-9/7i, decomp:10 border   5 160 146 247 milli-err2:    13478
+9/7i, decomp:10 border   5 160 146 247 milli-err2:      797
 9/7f, decomp:10 border   5 160 146 247 err2:              0.0002
 5/3i, decomp: 5 border 104 162   6  47 milli-err2:        0
-9/7i, decomp: 5 border 104 162   6  47 milli-err2:     7808
+9/7i, decomp: 5 border 104 162   6  47 milli-err2:      603
 9/7f, decomp: 5 border 104 162   6  47 err2:              0.0001
 5/3i, decomp:24 border  78 250 102 218 milli-err2:        0
-9/7i, decomp:24 border  78 250 102 218 milli-err2:    12570
+9/7i, decomp:24 border  78 250 102 218 milli-err2:      836
 9/7f, decomp:24 border  78 250 102 218 err2:              0.0002
 5/3i, decomp:28 border  86  98  56  79 milli-err2:        0
-9/7i, decomp:28 border  86  98  56  79 milli-err2:     4148
+9/7i, decomp:28 border  86  98  56  79 milli-err2:      597
 9/7f, decomp:28 border  86  98  56  79 err2:              0.0001
 5/3i, decomp: 6 border  95 238 197 214 milli-err2:        0
-9/7i, decomp: 6 border  95 238 197 214 milli-err2:     7686
+9/7i, decomp: 6 border  95 238 197 214 milli-err2:      478
 9/7f, decomp: 6 border  95 238 197 214 err2:              0.0001
 5/3i, decomp:17 border  77 169  93 165 milli-err2:        0
-9/7i, decomp:17 border  77 169  93 165 milli-err2:    12026
+9/7i, decomp:17 border  77 169  93 165 milli-err2:      616
 9/7f, decomp:17 border  77 169  93 165 err2:              0.0001
 5/3i, decomp:22 border 178 187   7 119 milli-err2:        0
-9/7i, decomp:22 border 178 187   7 119 milli-err2:     4971
+9/7i, decomp:22 border 178 187   7 119 milli-err2:      392
 9/7f, decomp:22 border 178 187   7 119 err2:              0.0000
diff --git a/tests/ref/fate/jpeg2000-dcinema b/tests/ref/fate/jpeg2000-dcinema
index 8040cb1..c7bf52a 100644
--- a/tests/ref/fate/jpeg2000-dcinema
+++ b/tests/ref/fate/jpeg2000-dcinema
@@ -1,3 +1,3 @@
 #tb 0: 1/24
-0,          0,          0,        1, 12441600, 0xbf142791
-0,          1,          1,        1, 12441600, 0x6b7a2ab5
+0,          0,          0,        1, 12441600, 0xda6b6cde
+0,          1,          1,        1, 12441600, 0xb0994664
diff --git a/tests/ref/vsynth/vsynth1-jpeg2000-97 b/tests/ref/vsynth/vsynth1-jpeg2000-97
index b0fc029..78dc6ad 100644
--- a/tests/ref/vsynth/vsynth1-jpeg2000-97
+++ b/tests/ref/vsynth/vsynth1-jpeg2000-97
@@ -1,4 +1,4 @@
-4c7dbe2451f56a49c29b0b5d7808d74d *tests/data/fate/vsynth1-jpeg2000-97.avi
-3661616 tests/data/fate/vsynth1-jpeg2000-97.avi
-d079e946a2fb75ad5ce6cb2760d1cc62 *tests/data/fate/vsynth1-jpeg2000-97.out.rawvideo
-stddev:    4.63 PSNR: 34.81 MAXDIFF:   54 bytes:  7603200/  7603200
+a19cc0e1a1c1bf76ff5a0b63a0bdfbd1 *tests/data/fate/vsynth1-jpeg2000-97.avi
+3654420 tests/data/fate/vsynth1-jpeg2000-97.avi
+3b71c0f8aebf45122da77d892a6ebf00 *tests/data/fate/vsynth1-jpeg2000-97.out.rawvideo
+stddev:    4.23 PSNR: 35.59 MAXDIFF:   53 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth2-jpeg2000-97 b/tests/ref/vsynth/vsynth2-jpeg2000-97
index a0c24d8..7ba2d9f 100644
--- a/tests/ref/vsynth/vsynth2-jpeg2000-97
+++ b/tests/ref/vsynth/vsynth2-jpeg2000-97
@@ -1,4 +1,4 @@
-c3582d23a1fca31a6218346b82167f88 *tests/data/fate/vsynth2-jpeg2000-97.avi
-2451092 tests/data/fate/vsynth2-jpeg2000-97.avi
-d1329b49bcfcf74279eb07f7e20ddcec *tests/data/fate/vsynth2-jpeg2000-97.out.rawvideo
-stddev:    3.73 PSNR: 36.69 MAXDIFF:   30 bytes:  7603200/  7603200
+b86217f0bcbd84a9368ad3f98af32157 *tests/data/fate/vsynth2-jpeg2000-97.avi
+2448506 tests/data/fate/vsynth2-jpeg2000-97.avi
+4d9d9db91075a1eca2a6b9f152e4defc *tests/data/fate/vsynth2-jpeg2000-97.out.rawvideo
+stddev:    3.23 PSNR: 37.94 MAXDIFF:   29 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth3-jpeg2000-97 b/tests/ref/vsynth/vsynth3-jpeg2000-97
index 1d8d148..caf8d9d 100644
--- a/tests/ref/vsynth/vsynth3-jpeg2000-97
+++ b/tests/ref/vsynth/vsynth3-jpeg2000-97
@@ -1,4 +1,4 @@
-2f8a9b514fbf1cb034076459463a7b76 *tests/data/fate/vsynth3-jpeg2000-97.avi
-83866 tests/data/fate/vsynth3-jpeg2000-97.avi
-febc7ef2ae9ec3f34b74d456922ae858 *tests/data/fate/vsynth3-jpeg2000-97.out.rawvideo
-stddev:    4.87 PSNR: 34.37 MAXDIFF:   51 bytes:    86700/    86700
+5e17fdaae1a22f3eef8c82b512e4b1b9 *tests/data/fate/vsynth3-jpeg2000-97.avi
+83670 tests/data/fate/vsynth3-jpeg2000-97.avi
+8ec04513b2e6645c9ea340e3fe9fe8f2 *tests/data/fate/vsynth3-jpeg2000-97.out.rawvideo
+stddev:    4.52 PSNR: 35.02 MAXDIFF:   47 bytes:    86700/    86700
diff --git a/tests/ref/vsynth/vsynth_lena-jpeg2000-97 b/tests/ref/vsynth/vsynth_lena-jpeg2000-97
index 089479b..ee5ad31 100644
--- a/tests/ref/vsynth/vsynth_lena-jpeg2000-97
+++ b/tests/ref/vsynth/vsynth_lena-jpeg2000-97
@@ -1,4 +1,4 @@
-60808e880f1fd410b010feeca9105f4e *tests/data/fate/vsynth_lena-jpeg2000-97.avi
-1931500 tests/data/fate/vsynth_lena-jpeg2000-97.avi
-6d775a823d4b96cc6c121665bc7eb359 *tests/data/fate/vsynth_lena-jpeg2000-97.out.rawvideo
-stddev:    3.38 PSNR: 37.53 MAXDIFF:   28 bytes:  7603200/  7603200
+ca78db12e1af7cbf44fdce165aaa5130 *tests/data/fate/vsynth_lena-jpeg2000-97.avi
+1918756 tests/data/fate/vsynth_lena-jpeg2000-97.avi
+5fd8a2e35503b48af302b3ef5e317683 *tests/data/fate/vsynth_lena-jpeg2000-97.out.rawvideo
+stddev:    2.84 PSNR: 39.04 MAXDIFF:   28 bytes:  7603200/  7603200



More information about the ffmpeg-cvslog mailing list