[Ffmpeg-devel] [PATCH] DV weights

Dan Maas dmaas
Sat Feb 25 20:41:33 CET 2006


This patch implements AC coefficient weighing (per SMPTE 314M) for the
DV encoder and decoder.

I have also attached the Python script I used to make the tables.

Weighing is implemented as fixed-point multiplication. I have chosen
the number of bits of precision that gives the smallest error without
causing 32-bit overflows. Weighing is inherently a lossy process
because weighted coefficients have fewer bits than unweighted ones.
But, in no case is the error greater than one least-significant bit.
(the Python script checks this)

Performance impact is less than 1%.

Regards,
Dan
-------------- next part --------------
Index: libavcodec/dv.c
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/dv.c,v
retrieving revision 1.75
diff -u -p -u -r1.75 dv.c
--- libavcodec/dv.c	24 Feb 2006 09:16:26 -0000	1.75
+++ libavcodec/dv.c	25 Feb 2006 19:29:18 -0000
@@ -253,6 +253,7 @@ static int dvvideo_init(AVCodecContext *
 typedef struct BlockInfo {
     const uint8_t *shift_table;
     const uint8_t *scan_table;
+    const int *iweight_table;
     uint8_t pos; /* position in block */
     uint8_t dct_mode;
     uint8_t partial_bit_count;
@@ -295,6 +296,7 @@ static void dv_decode_ac(GetBitContext *
     int last_index = get_bits_size(gb);
     const uint8_t *scan_table = mb->scan_table;
     const uint8_t *shift_table = mb->shift_table;
+    const int *iweight_table = mb->iweight_table;
     int pos = mb->pos;
     int partial_bit_count = mb->partial_bit_count;
     int level, pos1, run, vlc_len, index;
@@ -344,7 +346,12 @@ static void dv_decode_ac(GetBitContext *
 
         assert(level);
         pos1 = scan_table[pos];
-        block[pos1] = level << shift_table[pos1];
+        level <<= shift_table[pos1];
+
+        /* unweigh, round, and shift down */
+        level = (level*iweight_table[pos] + (1 << (dv_iweight_bits-1))) >> dv_iweight_bits;
+
+        block[pos1] = level;
 
         UPDATE_CACHE(re, gb);
     }
@@ -410,6 +417,7 @@ static inline void dv_decode_video_segme
             dct_mode = get_bits1(&gb);
             mb->dct_mode = dct_mode;
             mb->scan_table = s->dv_zigzag[dct_mode];
+            mb->iweight_table = dct_mode ? dv_iweight_248 : dv_iweight_88;
             class1 = get_bits(&gb, 2);
             mb->shift_table = s->dv_idct_shift[class1 == 3][dct_mode]
                 [quant + dv_quant_offset[class1]];
@@ -648,7 +656,7 @@ static always_inline PutBitContext* dv_e
 }
 
 static always_inline void dv_set_class_number(DCTELEM* blk, EncBlockInfo* bi,
-                                              const uint8_t* zigzag_scan, int bias)
+                                              const uint8_t* zigzag_scan, const int *weight, int bias)
 {
     int i, area;
     static const int classes[] = {12, 24, 36, 0xffff};
@@ -665,7 +673,11 @@ static always_inline void dv_set_class_n
 
           if (level+15 > 30U) {
               bi->sign[i] = (level>>31)&1;
-              bi->mb[i] = level= ABS(level)>>4;
+              /* weigh it and and shift down into range, adding for rounding */
+              /* the extra division by a factor of 2^4 reverses the 8x expansion of the DCT
+                 AND the 2x doubling of the weights */
+              level = (ABS(level) * weight[i] + (1<<(dv_weight_bits+3))) >> (dv_weight_bits+4);
+              bi->mb[i] = level;
               if(level>max) max= level;
               bi->bit_size[area] += dv_rl2vlc_size(i - prev  - 1, level);
               bi->next[prev]= i;
@@ -872,7 +887,9 @@ static inline void dv_encode_video_segme
             s->fdct[enc_blk->dct_mode](block);
 
             dv_set_class_number(block, enc_blk,
-                                enc_blk->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct, j/4);
+                                enc_blk->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct,
+                                enc_blk->dct_mode ? dv_weight_248 : dv_weight_88,
+                                j/4);
 
             init_put_bits(pb, ptr, block_sizes[j]/8);
             put_bits(pb, 9, (uint16_t)(((enc_blk->mb[0] >> 3) - 1024 + 2) >> 2));
Index: libavcodec/dvdata.h
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/dvdata.h,v
retrieving revision 1.15
diff -u -p -u -r1.15 dvdata.h
--- libavcodec/dvdata.h	12 Jan 2006 22:43:15 -0000	1.15
+++ libavcodec/dvdata.h	25 Feb 2006 19:29:21 -0000
@@ -1256,6 +1256,51 @@ static const uint16_t dv_place_411[1350]
  0x0834, 0x2320, 0x2f44, 0x3810, 0x1658,
 };
 
+/* DV25/50 DCT coefficient weights and inverse weights */
+/* created by dvtables.py */
+static const int dv_weight_bits = 18;
+static const int dv_weight_88[64] = {
+ 131072, 257107, 257107, 242189, 252167, 242189, 235923, 237536,
+ 237536, 235923, 229376, 231390, 223754, 231390, 229376, 222935,
+ 224969, 217965, 217965, 224969, 222935, 200636, 218652, 211916,
+ 212325, 211916, 218652, 200636, 188995, 196781, 205965, 206433,
+ 206433, 205965, 196781, 188995, 185364, 185364, 200636, 200704,
+ 200636, 185364, 185364, 174609, 180568, 195068, 195068, 180568,
+ 174609, 170091, 175557, 189591, 175557, 170091, 165371, 170627,
+ 170627, 165371, 160727, 153560, 160727, 144651, 144651, 136258,
+};
+static const int dv_weight_248[64] = {
+ 131072, 242189, 257107, 237536, 229376, 200636, 242189, 223754,
+ 224969, 196781, 262144, 242189, 229376, 200636, 257107, 237536,
+ 211916, 185364, 235923, 217965, 229376, 211916, 206433, 180568,
+ 242189, 223754, 224969, 196781, 211916, 185364, 235923, 217965,
+ 200704, 175557, 222935, 205965, 200636, 185364, 195068, 170627,
+ 229376, 211916, 206433, 180568, 200704, 175557, 222935, 205965,
+ 175557, 153560, 188995, 174609, 165371, 144651, 200636, 185364,
+ 195068, 170627, 175557, 153560, 188995, 174609, 165371, 144651,
+};
+static const int dv_iweight_bits = 14;
+static const int dv_iweight_88[64] = {
+ 32768, 16710, 16710, 17735, 17015, 17735, 18197, 18079,
+ 18079, 18197, 18725, 18559, 19196, 18559, 18725, 19284,
+ 19108, 19692, 19692, 19108, 19284, 21400, 19645, 20262,
+ 20214, 20262, 19645, 21400, 22733, 21845, 20867, 20815,
+ 20815, 20867, 21845, 22733, 23173, 23173, 21400, 21400,
+ 21400, 23173, 23173, 24600, 23764, 22017, 22017, 23764,
+ 24600, 25267, 24457, 22672, 24457, 25267, 25971, 25191,
+ 25191, 25971, 26715, 27962, 26715, 29642, 29642, 31536,
+};
+static const int dv_iweight_248[64] = {
+ 32768, 17735, 16710, 18079, 18725, 21400, 17735, 19196,
+ 19108, 21845, 16384, 17735, 18725, 21400, 16710, 18079,
+ 20262, 23173, 18197, 19692, 18725, 20262, 20815, 23764,
+ 17735, 19196, 19108, 21845, 20262, 23173, 18197, 19692,
+ 21400, 24457, 19284, 20867, 21400, 23173, 22017, 25191,
+ 18725, 20262, 20815, 23764, 21400, 24457, 19284, 20867,
+ 24457, 27962, 22733, 24600, 25971, 29642, 21400, 23173,
+ 22017, 25191, 24457, 27962, 22733, 24600, 25971, 29642,
+};
+
 static const uint16_t dv_audio_shuffle525[10][9] = {
   {  0, 30, 60, 20, 50, 80, 10, 40, 70 }, /* 1st channel */
   {  6, 36, 66, 26, 56, 86, 16, 46, 76 },
-------------- next part --------------
#!/usr/bin/python

# dvtables - precompute some tables for ffmpeg's DV codec
# Copyright (C) 2006 Daniel Maas <dmaas at maasdigital.com>
# last update 25 Feb 2006
#
# dvtables is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# dvtables is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#

import sys
from math import *

def clamp(x, lo, hi):
    if x < lo:
        return lo
    elif x > hi:
        return hi
    else:
        return x

# DV25 and DV50 zig-zag permutations, as used by ffmpeg
zigzag_88 = (
    0,   1,  8, 16,  9,  2,  3, 10,
    17, 24, 32, 25, 18, 11,  4,  5,
    12, 19, 26, 33, 40, 48, 41, 34,
    27, 20, 13,  6,  7, 14, 21, 28,
    35, 42, 49, 56, 57, 50, 43, 36,
    29, 22, 15, 23, 30, 37, 44, 51,
    58, 59, 52, 45, 38, 31, 39, 46,
    53, 60, 61, 54, 47, 55, 62, 63
)

zigzag_248 = (
     0,  8,  1,  9, 16, 24,  2, 10,
    17, 25, 32, 40, 48, 56, 33, 41,
    18, 26,  3, 11,  4, 12, 19, 27,
    34, 42, 49, 57, 50, 58, 35, 43,
    20, 28,  5, 13,  6, 14, 21, 29,
    36, 44, 51, 59, 52, 60, 37, 45,
    22, 30,  7, 15, 23, 31, 38, 46,
    53, 61, 54, 62, 39, 47, 55, 63,
)

#
# Compute and print zig-zagged DV25/DV50 forward weight tables
#

# The spec gives weights as floating point numbers, where the "unity"
# value is 1/2. (i.e. a weight of 1/2 leaves the coefficient unchanged)

# To make the math simpler, we double the weights so that "unity" is 1,
# and double the incoming AC coefficients as well, so the result is the same.

# weight formulas from SMPTE 314M

def CS(x):
    return cos(pi * (x/16.0))

w = [1.0,
     CS(4)/(4.0*CS(7)*CS(2)),
     CS(4)/(2.0*CS(6)),
     1.0/(2.0*CS(5)),
     7.0/8.0,
     CS(4)/CS(3),
     CS(4)/CS(2),
     CS(4)/CS(1)]

def compute_weight_88(h,v):
    # compute weight per the spec
    if h == 0 and v == 0:
        weight = 0.25
    else:
        weight = w[h]*w[v]/2.0
    # double it
    weight *= 2.0
    return weight

def compute_weight_248(h,v):
    # compute weight per the spec        
    if h == 0 and v == 0:
        weight = 0.25
    elif v < 4:
        weight = w[h] * w[2*v]/2.0
    else:
        weight = w[h] * w[2*(v-4)]/2.0
    # double it
    weight *= 2.0
    return weight

# We implement weighing as fixed-point integer multiplication.

# The maximum possible precision is 18 bits, to avoid overflowing 32
# bits when multiplying by 10-bit DCT levels which have been expanded
# by a factor of 8 by the DCT, times two because we double the
# coefficients).

weight_bits = 18
print "static const int dv_weight_bits = %d;" % weight_bits

# forward weights for 8-8 DCT
weights_88 = range(64)

fwd_error = 0

for v in range(8):
    for h in range(8):
        fweight = compute_weight_88(h,v)

        # convert to fixed-point and round to closest value
        iweight = int(fweight * (1 << weight_bits) + 0.5)
        weights_88[8*v+h] = iweight

        # check the precision of the integer weight for all possible input levels
        for level in range(1024):
            fresult = int(level * fweight + 0.5)
            iresult = (level * iweight + (1 << (weight_bits-1))) >> weight_bits
            if fresult != iresult:
                # ensure the error is no more than one least-significant bit
                assert abs(fresult - iresult) < 2
                fwd_error += 1
                #sys.stderr.write("error level %d weight %f | %d result %d | %d\n" % (level,fweight,iweight,fresult,iresult))
      

# forward weights for 2-4-8 DCT
weights_248 = range(64)

for v in range(8):
    # interlace fields
    v = (v/2) + 4*(v&1)
    
    for h in range(8):
        fweight = compute_weight_248(h,v)

        # convert to fixed-point and round to closest value
        iweight = int(fweight * (1 << weight_bits) + 0.5)
        weights_248[8*v+h] = iweight

        # check the precision of the integer weight for all possible input levels
        for level in range(1024):
            fresult = int(level * fweight + 0.5)
            iresult = (level * iweight + (1 << (weight_bits-1))) >> weight_bits
            if fresult != iresult:
                # ensure the error is no more than one least-significant bit
                assert abs(fresult - iresult) < 2
                fwd_error += 1
                #sys.stderr.write("error level %d weight %f | %d result %d | %d\n" % (level,fweight,iweight,fresult,iresult))        

if 0:
    sys.stderr.write("DV25/50 forward level/weight errors %d\n" % fwd_error)

print "static const int dv_weight_88[64] = {"
for j in range(8):
    print "",
    for i in range(8):
        print "%6d," % (weights_88[zigzag_88[8*j+i]]),
    print ""
print "};"


print "static const int dv_weight_248[64] = {"
for j in range(8):
    print "",
    for i in range(8):
        print "%6d," % (weights_248[zigzag_248[8*j+i]]),
    print ""
print "};"
    
#
# Compute inverse DV25/DV50 weights in fixed point
#

# Weighing and then unweighing an AC coefficient is inherently a lossy
# process, because the weighted coefficients have one bit less
# precision than unweighted ones. The best can do is minimize the
# error by using weights of sufficient precision.

# Fixed-point precision of inverse weights
# The maximum possible precision is 24 bits, to avoid 32-bit overflow when multiplying
# by 8-bit weighted coefficients.
# However, using more than 14 bits of precision does not reduce error any further
iweight_bits = 14

print "static const int dv_iweight_bits = %d;" % iweight_bits

def make_fixed_dv25(w):
    w = 1.0 / float(w)
    w *= (1 << weight_bits) * (1 << iweight_bits)
    return int(w + 0.5)

# inverse weights for 8-8 DCT
print "static const int dv_iweight_88[64] = {"
for j in range(8):
    print "",
    for i in range(8):
        w = weights_88[zigzag_88[8*j+i]]
        print "%5d," % (make_fixed_dv25(w)),
    print ""
print "};"

# inverse weights for 2-4-8 DCT
print "static const int dv_iweight_248[64] = {"
for j in range(8):
    print "",
    for i in range(8):
        w = weights_248[zigzag_248[8*j+i]]
        print "%5d," % (make_fixed_dv25(w)),
    print ""
print "};"


# Find the error created by weighing and then unweighing an AC coefficient.
# We test all possible combinations of weight and level.
if 0:
    inv_error = 0
    for i in range(128):
        for level in range(1024):
            if i >= 64:
                fwd_weight = weights_248[zigzag_248[i-64]]
            else:
                fwd_weight = weights_88[zigzag_88[i]]
                
            inv_weight = make_fixed_dv25(fwd_weight)
            
            # weigh the coefficient
            weighted = (level * fwd_weight + (1 << (weight_bits-1))) >> weight_bits
                
            # unweigh the coefficient
            result = (weighted * inv_weight + (1 << (iweight_bits-1))) >> iweight_bits

            # compare input vs output
            if result != level:
                # make sure the error is at most one least-significant bit
                assert abs(result-level) < 2
                #sys.stderr.write("level %d fwd_weight %d inv_weight %d weighted %d result %d\n" % (level,fwd_weight,inv_weight,weighted,result))
                inv_error += 1

    sys.stderr.write("DV25/50 inverse level/weight errors %d\n" % inv_error)
        
#
# DV50 macroblock locations
#

# find_macroblock() returns a 16-bit quantity
# lower 8 bits are horizontal index, upper 8 bits are vertical index

def find_macroblock_dv50(channel, seq, vid_block_num, is_pal):
    if is_pal:
        ysupers = 24
    else:
        ysupers = 20
        
    # X and Y indices of the superblock we're inside
    super_x_order = (2,1,3,0,4)
    super_y_bases = (2,6,8,0,4)
    super_x = super_x_order[vid_block_num % 5]
    super_y = (2*(super_y_bases[vid_block_num % 5] + seq) + channel) % ysupers

    # index within the superblock
    within_super = vid_block_num / 5

    # locate us within the superblock's "brick" pattern
    if(within_super < 3):
	within_super_x = 0;
	within_super_y = within_super;
    elif(within_super < 6):
	within_super_x = 1;
	within_super_y = 2 - (within_super-3);
    elif(within_super < 9):
	within_super_x = 2;
	within_super_y = (within_super-6);
    elif(within_super < 12):
	within_super_x = 3;
	within_super_y = 2 - (within_super-9);
    elif(within_super < 15):
	within_super_x = 4;
	within_super_y = (within_super-12);
    elif(within_super < 18):
	within_super_x = 5;
	within_super_y = 2 - (within_super-15);
    elif(within_super < 21):
	within_super_x = 6;
	within_super_y = (within_super-18);
    elif(within_super < 24):
	within_super_x = 7;
	within_super_y = 2 - (within_super-21);
    else:
	within_super_x = 8;
	within_super_y = (within_super-24);
    
    return ((3*super_y + within_super_y) << 8) | \
	4*(9*super_x + within_super_x)

# print macroblock location table for DV50 NTSC
if 0:
    # 2 channels per frame, 10 DIF sequences per channel,
    # 27 video segments per DIF sequence, 5 macroblocks per video segment

    print "static const uint16_t dv_place_422_525[2*10*27*5] = {"
    for slice in range(2*10*27):
        channel = slice / (10*27)
        slice %= (10*27)
        seq = slice / 27
        slice %= 27
        vid_block_num = 5*slice

        print "",
        for mb in range(5):
            print "0x%04x," % (find_macroblock_dv50(channel, seq, vid_block_num + mb, 0)),
        print ""

    print "};"

# print macroblock location table for DV50 PAL
if 0:
    # 2 channels per frame, 12 DIF sequences per channel,
    # 27 video segments per DIF sequence, 5 macroblocks per video segment

    print "static const uint16_t dv_place_422_625[2*12*27*5] = {"
    for slice in range(2*12*27):
        channel = slice / (12*27)
        slice %= (12*27)
        seq = slice / 27
        slice %= 27
        vid_block_num = 5*slice

        print "",
        for mb in range(5):
            print "0x%04x," % (find_macroblock_dv50(channel, seq, vid_block_num + mb, 1)),
        print ""

    print "};"



More information about the ffmpeg-devel mailing list