FFmpeg
vp9dsp.h
Go to the documentation of this file.
1 /*
2  * VP9 compatible video decoder
3  *
4  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5  * Copyright (C) 2013 Clément Bœsch <u pkh me>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #ifndef AVCODEC_VP9DSP_H
25 #define AVCODEC_VP9DSP_H
26 
27 #include <stddef.h>
28 #include <stdint.h>
29 
30 #include "libavcodec/vp9.h"
32 
33 typedef void (*vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride,
34  const uint8_t *ref, ptrdiff_t ref_stride,
35  int h, int mx, int my);
36 typedef void (*vp9_scaled_mc_func)(uint8_t *dst, ptrdiff_t dst_stride,
37  const uint8_t *ref, ptrdiff_t ref_stride,
38  int h, int mx, int my, int dx, int dy);
39 
40 typedef struct VP9DSPContext {
41  /*
42  * dimension 1: 0=4x4, 1=8x8, 2=16x16, 3=32x32
43  * dimension 2: intra prediction modes
44  *
45  * dst/left/top is aligned by transform-size (i.e. 4, 8, 16 or 32 pixels)
46  * stride is aligned by 16 pixels
47  * top[-1] is top/left; top[4,7] is top-right for 4x4
48  */
49  // FIXME(rbultje) maybe replace left/top pointers with HAVE_TOP/
50  // HAVE_LEFT/HAVE_TOPRIGHT flags instead, and then handle it in-place?
51  // also needs to fit in with what H.264/VP8/etc do
53  ptrdiff_t stride,
54  const uint8_t *left,
55  const uint8_t *top);
56 
57  /*
58  * dimension 1: 0=4x4, 1=8x8, 2=16x16, 3=32x32, 4=lossless (3-4=dct only)
59  * dimension 2: 0=dct/dct, 1=dct/adst, 2=adst/dct, 3=adst/adst
60  *
61  * dst is aligned by transform-size (i.e. 4, 8, 16 or 32 pixels)
62  * stride is aligned by 16 pixels
63  * block is 16-byte aligned
64  * eob indicates the position (+1) of the last non-zero coefficient,
65  * in scan-order. This can be used to write faster versions, e.g. a
66  * dc-only 4x4/8x8/16x16/32x32, or a 4x4-only (eob<10) 8x8/16x16/32x32,
67  * etc.
68  */
69  // FIXME also write idct_add_block() versions for whole (inter) pred
70  // blocks, so we can do 2 4x4s at once
71  void (*itxfm_add[N_TXFM_SIZES + 1][N_TXFM_TYPES])(uint8_t *dst,
72  ptrdiff_t stride,
73  int16_t *block, int eob);
74 
75  /*
76  * dimension 1: width of filter (0=4, 1=8, 2=16)
77  * dimension 2: 0=col-edge filter (h), 1=row-edge filter (v)
78  *
79  * dst/stride are aligned by 8
80  */
81  void (*loop_filter_8[3][2])(uint8_t *dst, ptrdiff_t stride,
82  int mb_lim, int lim, int hev_thr);
83 
84  /*
85  * dimension 1: 0=col-edge filter (h), 1=row-edge filter (v)
86  *
87  * The width of filter is assumed to be 16; dst/stride are aligned by 16
88  */
89  void (*loop_filter_16[2])(uint8_t *dst, ptrdiff_t stride,
90  int mb_lim, int lim, int hev_thr);
91 
92  /*
93  * dimension 1/2: width of filter (0=4, 1=8) for each filter half
94  * dimension 3: 0=col-edge filter (h), 1=row-edge filter (v)
95  *
96  * dst/stride are aligned by operation size
97  * this basically calls loop_filter[d1][d3][0](), followed by
98  * loop_filter[d2][d3][0]() on the next 8 pixels
99  * mb_lim/lim/hev_thr contain two values in the lowest two bytes of the
100  * integer.
101  */
102  // FIXME perhaps a mix4 that operates on 32px (for AVX2)
103  void (*loop_filter_mix2[2][2][2])(uint8_t *dst, ptrdiff_t stride,
104  int mb_lim, int lim, int hev_thr);
105 
106  /*
107  * dimension 1: hsize (0: 64, 1: 32, 2: 16, 3: 8, 4: 4)
108  * dimension 2: filter type (0: smooth, 1: regular, 2: sharp, 3: bilin)
109  * dimension 3: averaging type (0: put, 1: avg)
110  * dimension 4: x subpel interpolation (0: none, 1: 8tap/bilin)
111  * dimension 5: y subpel interpolation (0: none, 1: 8tap/bilin)
112  *
113  * dst/stride are aligned by hsize
114  */
115  vp9_mc_func mc[5][N_FILTERS][2][2][2];
116 
117  /*
118  * for scalable MC, first 3 dimensions identical to above, the other two
119  * don't exist since it changes per stepsize.
120  */
122 } VP9DSPContext;
123 
124 extern const int16_t attribute_visibility_hidden ff_vp9_subpel_filters[3][16][8];
125 
126 void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp, int bitexact);
127 
131 
132 void ff_vp9dsp_init_aarch64(VP9DSPContext *dsp, int bpp);
133 void ff_vp9dsp_init_arm(VP9DSPContext *dsp, int bpp);
134 void ff_vp9dsp_init_riscv(VP9DSPContext *dsp, int bpp, int bitexact);
135 void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact);
136 void ff_vp9dsp_init_mips(VP9DSPContext *dsp, int bpp);
137 void ff_vp9dsp_init_loongarch(VP9DSPContext *dsp, int bpp);
138 
139 #endif /* AVCODEC_VP9DSP_H */
N_TXFM_TYPES
@ N_TXFM_TYPES
Definition: vp9.h:42
ff_vp9dsp_init
void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp, int bitexact)
Definition: vp9dsp.c:88
VP9DSPContext::loop_filter_8
void(* loop_filter_8[3][2])(uint8_t *dst, ptrdiff_t stride, int mb_lim, int lim, int hev_thr)
Definition: vp9dsp.h:81
VP9DSPContext
Definition: vp9dsp.h:40
ff_vp9dsp_init_arm
void ff_vp9dsp_init_arm(VP9DSPContext *dsp, int bpp)
Definition: vp9dsp_init_arm.c:244
attribute_visibility_hidden
#define attribute_visibility_hidden
Definition: attributes_internal.h:29
ff_vp9dsp_init_10
void ff_vp9dsp_init_10(VP9DSPContext *dsp)
N_TXFM_SIZES
@ N_TXFM_SIZES
Definition: vp9.h:32
mx
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t mx
Definition: dsp.h:53
VP9DSPContext::loop_filter_mix2
void(* loop_filter_mix2[2][2][2])(uint8_t *dst, ptrdiff_t stride, int mb_lim, int lim, int hev_thr)
Definition: vp9dsp.h:103
vp9_scaled_mc_func
void(* vp9_scaled_mc_func)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, int h, int mx, int my, int dx, int dy)
Definition: vp9dsp.h:36
VP9DSPContext::smc
vp9_scaled_mc_func smc[5][N_FILTERS][2]
Definition: vp9dsp.h:121
attributes_internal.h
ff_vp9_subpel_filters
const int16_t attribute_visibility_hidden ff_vp9_subpel_filters[3][16][8]
Definition: vp9dsp.c:32
my
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t my
Definition: dsp.h:53
ff_vp9dsp_init_aarch64
void ff_vp9dsp_init_aarch64(VP9DSPContext *dsp, int bpp)
Definition: vp9dsp_init_aarch64.c:244
ff_vp9dsp_init_mips
void ff_vp9dsp_init_mips(VP9DSPContext *dsp, int bpp)
Definition: vp9dsp_init_mips.c:212
VP9DSPContext::itxfm_add
void(* itxfm_add[N_TXFM_SIZES+1][N_TXFM_TYPES])(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob)
Definition: vp9dsp.h:71
VP9DSPContext::intra_pred
void(* intra_pred[N_TXFM_SIZES][N_INTRA_PRED_MODES])(uint8_t *dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
Definition: vp9dsp.h:52
vp9.h
ff_vp9dsp_init_8
void ff_vp9dsp_init_8(VP9DSPContext *dsp)
N_INTRA_PRED_MODES
@ N_INTRA_PRED_MODES
Definition: vp9.h:61
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:83
VP9DSPContext::mc
vp9_mc_func mc[5][N_FILTERS][2][2][2]
Definition: vp9dsp.h:115
N_FILTERS
@ N_FILTERS
Definition: vp9.h:69
ff_vp9dsp_init_riscv
void ff_vp9dsp_init_riscv(VP9DSPContext *dsp, int bpp, int bitexact)
Definition: vp9dsp_init.c:143
ff_vp9dsp_init_12
void ff_vp9dsp_init_12(VP9DSPContext *dsp)
stride
#define stride
Definition: h264pred_template.c:537
left
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled left
Definition: snow.txt:386
vp9_mc_func
void(* vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, int h, int mx, int my)
Definition: vp9dsp.h:33
ref
static int ref[MAX_W *MAX_W]
Definition: jpeg2000dwt.c:117
ff_vp9dsp_init_loongarch
void ff_vp9dsp_init_loongarch(VP9DSPContext *dsp, int bpp)
Definition: vp9dsp_init_loongarch.c:83
ff_vp9dsp_init_x86
void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact)
Definition: vp9dsp_init.c:216
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
h
h
Definition: vp9dsp_template.c:2070
VP9DSPContext::loop_filter_16
void(* loop_filter_16[2])(uint8_t *dst, ptrdiff_t stride, int mb_lim, int lim, int hev_thr)
Definition: vp9dsp.h:89