FFmpeg
h264dsp_init_loongarch.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021 Loongson Technology Corporation Limited
3  * Contributed by Shiyou Yin <yinshiyou-hf@loongson.cn>
4  * Xiwei Gu <guxiwei-hf@loongson.cn>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
24 #include "h264dsp_loongarch.h"
25 
27  const int chroma_format_idc)
28 {
30 
31  if (have_lsx(cpu_flags)) {
32  if (chroma_format_idc <= 1)
33  c->h264_loop_filter_strength = ff_h264_loop_filter_strength_lsx;
34  if (bit_depth == 8) {
35  c->h264_idct_add = ff_h264_idct_add_8_lsx;
36  c->h264_idct8_add = ff_h264_idct8_add_8_lsx;
37  c->h264_idct_dc_add = ff_h264_idct_dc_add_8_lsx;
38  c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_lsx;
39 
40  if (chroma_format_idc <= 1) {
41  c->h264_idct_add8 = ff_h264_idct_add8_8_lsx;
42  c->h264_h_loop_filter_chroma = ff_h264_h_lpf_chroma_8_lsx;
43  c->h264_h_loop_filter_chroma_intra = ff_h264_h_lpf_chroma_intra_8_lsx;
44  } else
45  c->h264_idct_add8 = ff_h264_idct_add8_422_8_lsx;
46 
47  c->h264_idct_add16 = ff_h264_idct_add16_8_lsx;
48  c->h264_idct8_add4 = ff_h264_idct8_add4_8_lsx;
49  c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_8_lsx;
50  c->h264_idct_add16intra = ff_h264_idct_add16_intra_8_lsx;
51 
52  c->h264_add_pixels4_clear = ff_h264_add_pixels4_8_lsx;
53  c->h264_add_pixels8_clear = ff_h264_add_pixels8_8_lsx;
54  c->h264_v_loop_filter_luma = ff_h264_v_lpf_luma_8_lsx;
55  c->h264_h_loop_filter_luma = ff_h264_h_lpf_luma_8_lsx;
56  c->h264_v_loop_filter_luma_intra = ff_h264_v_lpf_luma_intra_8_lsx;
57  c->h264_h_loop_filter_luma_intra = ff_h264_h_lpf_luma_intra_8_lsx;
58  c->h264_v_loop_filter_chroma = ff_h264_v_lpf_chroma_8_lsx;
59 
60  c->h264_v_loop_filter_chroma_intra = ff_h264_v_lpf_chroma_intra_8_lsx;
61 
62  c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16_8_lsx;
63  c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels8_8_lsx;
64  c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels4_8_lsx;
65  c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels16_8_lsx;
66  c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels8_8_lsx;
67  c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels4_8_lsx;
68  c->h264_idct8_add = ff_h264_idct8_add_8_lsx;
69  c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_lsx;
70  }
71  }
72 #if HAVE_LASX
73  if (have_lasx(cpu_flags)) {
74  if (chroma_format_idc <= 1)
75  c->h264_loop_filter_strength = ff_h264_loop_filter_strength_lasx;
76  if (bit_depth == 8) {
77  c->h264_add_pixels4_clear = ff_h264_add_pixels4_8_lasx;
78  c->h264_add_pixels8_clear = ff_h264_add_pixels8_8_lasx;
79  c->h264_v_loop_filter_luma = ff_h264_v_lpf_luma_8_lasx;
80  c->h264_h_loop_filter_luma = ff_h264_h_lpf_luma_8_lasx;
81  c->h264_v_loop_filter_luma_intra = ff_h264_v_lpf_luma_intra_8_lasx;
82  c->h264_h_loop_filter_luma_intra = ff_h264_h_lpf_luma_intra_8_lasx;
83 
84  /* Weighted MC */
85  c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels16_8_lasx;
86  c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels8_8_lasx;
87 
88  c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16_8_lasx;
89  c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels8_8_lasx;
90 
91  c->h264_idct8_add = ff_h264_idct8_add_8_lasx;
92  c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_lasx;
93  c->h264_idct8_add4 = ff_h264_idct8_add4_8_lasx;
94  }
95  }
96 #endif // #if HAVE_LASX
97 }
ff_h264_idct8_add_8_lsx
void ff_h264_idct8_add_8_lsx(uint8_t *dst, int16_t *src, int dst_stride)
ff_h264_idct_add_8_lsx
void ff_h264_idct_add_8_lsx(uint8_t *dst, int16_t *src, int dst_stride)
ff_h264_idct_dc_add_8_lsx
void ff_h264_idct_dc_add_8_lsx(uint8_t *dst, int16_t *src, int dst_stride)
have_lasx
#define have_lasx(flags)
Definition: cpu.h:29
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:107
bit_depth
static void bit_depth(AudioStatsContext *s, const uint64_t *const mask, uint8_t *depth)
Definition: af_astats.c:246
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:56
ff_h264_loop_filter_strength_lasx
void ff_h264_loop_filter_strength_lasx(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2], int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field)
Definition: h264_deblock_lasx.c:112
ff_h264_v_lpf_luma_intra_8_lsx
void ff_h264_v_lpf_luma_intra_8_lsx(uint8_t *src, ptrdiff_t stride, int alpha, int beta)
ff_h264dsp_init_loongarch
av_cold void ff_h264dsp_init_loongarch(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
Definition: h264dsp_init_loongarch.c:26
ff_h264_h_lpf_luma_8_lsx
void ff_h264_h_lpf_luma_8_lsx(uint8_t *src, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
ff_weight_h264_pixels4_8_lsx
void ff_weight_h264_pixels4_8_lsx(uint8_t *src, ptrdiff_t stride, int height, int log2_denom, int weight_src, int offset)
ff_h264_idct8_dc_add_8_lsx
void ff_h264_idct8_dc_add_8_lsx(uint8_t *dst, int16_t *src, int dst_stride)
ff_h264_luma_dc_dequant_idct_8_lsx
void ff_h264_luma_dc_dequant_idct_8_lsx(int16_t *_output, int16_t *_input, int qmul)
av_cold
#define av_cold
Definition: attributes.h:90
h264dsp_loongarch.h
ff_h264_h_lpf_chroma_8_lsx
void ff_h264_h_lpf_chroma_8_lsx(uint8_t *src, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
ff_h264_add_pixels4_8_lsx
void ff_h264_add_pixels4_8_lsx(uint8_t *_dst, int16_t *_src, int stride)
ff_biweight_h264_pixels8_8_lsx
void ff_biweight_h264_pixels8_8_lsx(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int height, int log2_denom, int weight_dst, int weight_src, int offset)
ff_h264_loop_filter_strength_lsx
void ff_h264_loop_filter_strength_lsx(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2], int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field)
ff_h264_h_lpf_luma_intra_8_lasx
void ff_h264_h_lpf_luma_intra_8_lasx(uint8_t *data, ptrdiff_t img_width, int alpha_in, int beta_in)
Definition: h264dsp_lasx.c:406
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
H264DSPContext
Context for storing H.264 DSP functions.
Definition: h264dsp.h:42
have_lsx
#define have_lsx(flags)
Definition: cpu.h:28
ff_weight_h264_pixels8_8_lsx
void ff_weight_h264_pixels8_8_lsx(uint8_t *src, ptrdiff_t stride, int height, int log2_denom, int weight_src, int offset)
ff_h264_v_lpf_chroma_8_lsx
void ff_h264_v_lpf_chroma_8_lsx(uint8_t *src, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
ff_h264_idct_add16_8_lsx
void ff_h264_idct_add16_8_lsx(uint8_t *dst, const int32_t *blk_offset, int16_t *block, int32_t dst_stride, const uint8_t nzc[15 *8])
Definition: h264idct_loongarch.c:28
ff_h264_v_lpf_luma_intra_8_lasx
void ff_h264_v_lpf_luma_intra_8_lasx(uint8_t *data, ptrdiff_t img_width, int alpha_in, int beta_in)
Definition: h264dsp_lasx.c:582
ff_weight_h264_pixels16_8_lsx
void ff_weight_h264_pixels16_8_lsx(uint8_t *src, ptrdiff_t stride, int height, int log2_denom, int weight_src, int offset_in)
ff_h264_add_pixels8_8_lasx
void ff_h264_add_pixels8_8_lasx(uint8_t *_dst, int16_t *_src, int stride)
Definition: h264dsp_lasx.c:729
ff_h264_v_lpf_luma_8_lsx
void ff_h264_v_lpf_luma_8_lsx(uint8_t *src, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
ff_h264_idct_add8_422_8_lsx
void ff_h264_idct_add8_422_8_lsx(uint8_t **dst, const int32_t *blk_offset, int16_t *block, int32_t dst_stride, const uint8_t nzc[15 *8])
Definition: h264idct_loongarch.c:121
ff_biweight_h264_pixels16_8_lsx
void ff_biweight_h264_pixels16_8_lsx(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int height, int log2_denom, int weight_dst, int weight_src, int offset_in)
ff_h264_add_pixels8_8_lsx
void ff_h264_add_pixels8_8_lsx(uint8_t *_dst, int16_t *_src, int stride)
ff_h264_idct8_add4_8_lsx
void ff_h264_idct8_add4_8_lsx(uint8_t *dst, const int32_t *blk_offset, int16_t *block, int32_t dst_stride, const uint8_t nzc[15 *8])
Definition: h264idct_loongarch.c:49
ff_h264_idct_add16_intra_8_lsx
void ff_h264_idct_add16_intra_8_lsx(uint8_t *dst, const int32_t *blk_offset, int16_t *block, int32_t dst_stride, const uint8_t nzc[15 *8])
Definition: h264idct_loongarch.c:169
ff_h264_h_lpf_chroma_intra_8_lsx
void ff_h264_h_lpf_chroma_intra_8_lsx(uint8_t *src, ptrdiff_t stride, int alpha, int beta)
ff_h264_v_lpf_luma_8_lasx
void ff_h264_v_lpf_luma_8_lasx(uint8_t *data, ptrdiff_t img_width, int alpha_in, int beta_in, int8_t *tc)
Definition: h264dsp_lasx.c:244
ff_h264_idct_add8_8_lsx
void ff_h264_idct_add8_8_lsx(uint8_t **dst, const int32_t *blk_offset, int16_t *block, int32_t dst_stride, const uint8_t nzc[15 *8])
Definition: h264idct_loongarch.c:93
ff_h264_v_lpf_chroma_intra_8_lsx
void ff_h264_v_lpf_chroma_intra_8_lsx(uint8_t *src, ptrdiff_t stride, int alpha, int beta)
ff_h264_add_pixels4_8_lasx
void ff_h264_add_pixels4_8_lasx(uint8_t *_dst, int16_t *_src, int stride)
Definition: h264dsp_lasx.c:702
ff_h264_h_lpf_luma_8_lasx
void ff_h264_h_lpf_luma_8_lasx(uint8_t *data, ptrdiff_t img_width, int alpha_in, int beta_in, int8_t *tc)
Definition: h264dsp_lasx.c:67
ff_h264_h_lpf_luma_intra_8_lsx
void ff_h264_h_lpf_luma_intra_8_lsx(uint8_t *src, ptrdiff_t stride, int alpha, int beta)
ff_biweight_h264_pixels4_8_lsx
void ff_biweight_h264_pixels4_8_lsx(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int height, int log2_denom, int weight_dst, int weight_src, int offset)
cpu.h