[FFmpeg-devel] [PATCH 2/6] wmalossless: allow calling madd_int16

Mon Apr 18 15:07:27 CEST 2016

This is done by actually handling the cascaded LMS data as if it
were int16_t, thus requiring switching at various locations the
computations.
---
 libavcodec/wmalosslessdec.c | 61 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/libavcodec/wmalosslessdec.c b/libavcodec/wmalosslessdec.c
index f7f249b..3885dc1 100644
--- a/libavcodec/wmalosslessdec.c
+++ b/libavcodec/wmalosslessdec.c
@@ -497,15 +497,29 @@ static int decode_cdlms(WmallDecodeCtx *s)
                 s->cdlms[c][i].bitsend = get_bitsz(&s->gb, cbits) + 2;
                 shift_l = 32 - s->cdlms[c][i].bitsend;
                 shift_r = 32 - s->cdlms[c][i].scaling - 2;
+                if (s->bits_per_sample > 16) {
                 for (j = 0; j < s->cdlms[c][i].coefsend; j++)
                     s->cdlms[c][i].coefs[j] =
                         (get_bits(&s->gb, s->cdlms[c][i].bitsend) << shift_l) >> shift_r;
+                } else {
+                    for (j = 0; j < s->cdlms[c][i].coefsend; j++) {
+                        int16_t *ptr = (int16_t*)s->cdlms[c][i].coefs;
+                        ptr[j] = (get_bits(&s->gb, s->cdlms[c][i].bitsend) << shift_l) >> shift_r;
+                    }
+                }
             }
         }
 
+        if (s->bits_per_sample > 16) {
         for (i = 0; i < s->cdlms_ttl[c]; i++)
             memset(s->cdlms[c][i].coefs + s->cdlms[c][i].order,
                    0, WMALL_COEFF_PAD_SIZE);
+        } else {
+            for (i = 0; i < s->cdlms_ttl[c]; i++) {
+                int16_t *ptr = (int16_t*)s->cdlms[c][i].coefs;
+                memset(ptr + s->cdlms[c][i].order, 0, 2*WMALL_COEFF_PAD_SIZE);
+            }
+        }
     }
 
     return 0;
@@ -702,6 +716,7 @@ static void lms_update(WmallDecodeCtx *s, int ich, int ilms, int input)
     int range  = 1 << s->bits_per_sample - 1;
     int order  = s->cdlms[ich][ilms].order;
 
+    if (s->bits_per_sample > 16) {
     if (recent)
         recent--;
     else {
@@ -720,6 +735,26 @@ static void lms_update(WmallDecodeCtx *s, int ich, int ilms, int input)
     s->cdlms[ich][ilms].recent = recent;
     memset(s->cdlms[ich][ilms].lms_updates + recent + order, 0,
            sizeof(s->cdlms[ich][ilms].lms_updates) - 4*(recent+order));
+    } else {
+        int16_t *prevvalues = s->cdlms[ich][ilms].lms_prevvalues;
+        int16_t *updates    = s->cdlms[ich][ilms].lms_updates;
+        if (recent)
+            recent--;
+        else {
+            memcpy(prevvalues + order, prevvalues, 2 * order);
+            memcpy(updates + order, updates, 2 * order);
+            recent = order - 1;
+        }
+    
+        prevvalues[recent] = av_clip(input, -range, range - 1);
+        updates[recent] = WMASIGN(input) * s->update_speed[ich];
+    
+        updates[recent + (order >> 4)] >>= 2;
+        updates[recent + (order >> 3)] >>= 1;
+        s->cdlms[ich][ilms].recent = recent;
+        memset(updates + recent + order, 0,
+               sizeof(s->cdlms[ich][ilms].lms_updates) - 2*(recent+order));
+    }
 }
 
 static void use_high_update_speed(WmallDecodeCtx *s, int ich)
@@ -729,6 +764,7 @@ static void use_high_update_speed(WmallDecodeCtx *s, int ich)
         recent = s->cdlms[ich][ilms].recent;
         if (s->update_speed[ich] == 16)
             continue;
+        if (s->bits_per_sample > 16) {
         if (s->bV3RTM) {
             for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
                 s->cdlms[ich][ilms].lms_updates[icoef + recent] *= 2;
@@ -736,6 +772,12 @@ static void use_high_update_speed(WmallDecodeCtx *s, int ich)
             for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
                 s->cdlms[ich][ilms].lms_updates[icoef] *= 2;
         }
+        } else {
+            int16_t *updates = (int16_t *)s->cdlms[ich][ilms].lms_updates;
+            if (s->bV3RTM) updates += recent;
+            for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
+                updates[icoef] *= 2;
+        }
     }
     s->update_speed[ich] = 16;
 }
@@ -747,12 +789,19 @@ static void use_normal_update_speed(WmallDecodeCtx *s, int ich)
         recent = s->cdlms[ich][ilms].recent;
         if (s->update_speed[ich] == 8)
             continue;
+        if (s->bits_per_sample > 16) {
         if (s->bV3RTM)
             for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
                 s->cdlms[ich][ilms].lms_updates[icoef + recent] /= 2;
         else
             for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
                 s->cdlms[ich][ilms].lms_updates[icoef] /= 2;
+        } else {
+            int16_t *updates = (int16_t *)s->cdlms[ich][ilms].lms_updates;
+            if (s->bV3RTM) updates += recent;
+                for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
+                    updates[icoef] /= 2;
+        }
     }
     s->update_speed[ich] = 8;
 }
@@ -767,6 +816,7 @@ static void revert_cdlms(WmallDecodeCtx *s, int ch,
         for (icoef = coef_begin; icoef < coef_end; icoef++) {
             pred = 1 << (s->cdlms[ch][ilms].scaling - 1);
             residue = s->channel_residues[ch][icoef];
+            if (s->bits_per_sample > 16) {
             pred += s->dsp.scalarproduct_and_madd_int32(s->cdlms[ch][ilms].coefs,
                                                         s->cdlms[ch][ilms].lms_prevvalues
                                                             + s->cdlms[ch][ilms].recent,
@@ -775,6 +825,17 @@ static void revert_cdlms(WmallDecodeCtx *s, int ch,
                                                         FFALIGN(s->cdlms[ch][ilms].order,
                                                                 WMALL_COEFF_PAD_SIZE),
                                                         WMASIGN(residue));
+            } else {
+            int16_t *coeffs = s->cdlms[ch][ilms].coefs;
+            int16_t *prevvalues = s->cdlms[ch][ilms].lms_prevvalues;
+            int16_t *updates = s->cdlms[ch][ilms].lms_updates;
+            pred += s->dsp.scalarproduct_and_madd_int16(coeffs,
+                                                        prevvalues + s->cdlms[ch][ilms].recent,
+                                                        updates + s->cdlms[ch][ilms].recent,
+                                                        FFALIGN(s->cdlms[ch][ilms].order,
+                                                                WMALL_COEFF_PAD_SIZE),
+                                                        WMASIGN(residue));
+            }
             input = residue + (pred >> s->cdlms[ch][ilms].scaling);
             lms_update(s, ch, ilms, input);
             s->channel_residues[ch][icoef] = input;
-- 
2.8.1