FFmpeg
h264pred_init.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <stdint.h>
22 
23 #include "libavutil/attributes.h"
24 #include "libavutil/aarch64/cpu.h"
25 #include "libavcodec/avcodec.h"
26 #include "libavcodec/h264pred.h"
27 
28 /* PERFORMANCE WARNING:
29  * These assembly optimizations have been identified as "performance regressions."
30  * Due to advancements in modern CPU micro-architectures and compiler optimization
31  * the C implementations now consistently outperform these handwritten routines.
32  *
33  * Keep them here for historical reference.
34  *
35  * New optimizations are highly welcome! If you can provide an optimized
36  * implementation that demonstrably beats the current C version in rigorous
37  * benchmarks, please submit a patch.
38  */
39 #define ENABLE_INEFFICIENT_ASM 0
40 
41 void ff_pred16x16_vert_neon(uint8_t *src, ptrdiff_t stride);
42 void ff_pred16x16_hor_neon(uint8_t *src, ptrdiff_t stride);
43 void ff_pred16x16_plane_neon(uint8_t *src, ptrdiff_t stride);
44 void ff_pred16x16_dc_neon(uint8_t *src, ptrdiff_t stride);
45 void ff_pred16x16_128_dc_neon(uint8_t *src, ptrdiff_t stride);
46 void ff_pred16x16_left_dc_neon(uint8_t *src, ptrdiff_t stride);
47 void ff_pred16x16_top_dc_neon(uint8_t *src, ptrdiff_t stride);
48 
49 void ff_pred8x8_vert_neon(uint8_t *src, ptrdiff_t stride);
50 void ff_pred8x8_hor_neon(uint8_t *src, ptrdiff_t stride);
51 void ff_pred8x8_plane_neon(uint8_t *src, ptrdiff_t stride);
52 void ff_pred8x8_dc_neon(uint8_t *src, ptrdiff_t stride);
53 void ff_pred8x8_128_dc_neon(uint8_t *src, ptrdiff_t stride);
54 void ff_pred8x8_left_dc_neon(uint8_t *src, ptrdiff_t stride);
55 void ff_pred8x8_top_dc_neon(uint8_t *src, ptrdiff_t stride);
56 void ff_pred8x8_l0t_dc_neon(uint8_t *src, ptrdiff_t stride);
57 void ff_pred8x8_0lt_dc_neon(uint8_t *src, ptrdiff_t stride);
58 void ff_pred8x8_l00_dc_neon(uint8_t *src, ptrdiff_t stride);
59 void ff_pred8x8_0l0_dc_neon(uint8_t *src, ptrdiff_t stride);
60 
61 void ff_pred16x16_vert_neon_10(uint8_t *src, ptrdiff_t stride);
62 void ff_pred16x16_hor_neon_10(uint8_t *src, ptrdiff_t stride);
63 void ff_pred16x16_plane_neon_10(uint8_t *src, ptrdiff_t stride);
64 void ff_pred16x16_dc_neon_10(uint8_t *src, ptrdiff_t stride);
65 void ff_pred16x16_top_dc_neon_10(uint8_t *src, ptrdiff_t stride);
66 
67 void ff_pred8x8_vert_neon_10(uint8_t *src, ptrdiff_t stride);
68 void ff_pred8x8_hor_neon_10(uint8_t *src, ptrdiff_t stride);
69 void ff_pred8x8_plane_neon_10(uint8_t *src, ptrdiff_t stride);
70 void ff_pred8x8_dc_neon_10(uint8_t *src, ptrdiff_t stride);
71 void ff_pred8x8_128_dc_neon_10(uint8_t *src, ptrdiff_t stride);
72 void ff_pred8x8_left_dc_neon_10(uint8_t *src, ptrdiff_t stride);
73 void ff_pred8x8_top_dc_neon_10(uint8_t *src, ptrdiff_t stride);
74 void ff_pred8x8_l0t_dc_neon_10(uint8_t *src, ptrdiff_t stride);
75 void ff_pred8x8_0lt_dc_neon_10(uint8_t *src, ptrdiff_t stride);
76 void ff_pred8x8_l00_dc_neon_10(uint8_t *src, ptrdiff_t stride);
77 void ff_pred8x8_0l0_dc_neon_10(uint8_t *src, ptrdiff_t stride);
78 
80  const int bit_depth,
81  const int chroma_format_idc)
82 {
83  if (bit_depth == 8) {
84  if (chroma_format_idc <= 1) {
85 #if ENABLE_INEFFICIENT_ASM
86  h->pred8x8[VERT_PRED8x8 ] = ff_pred8x8_vert_neon;
87  h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_hor_neon;
88 #endif
91 #if ENABLE_INEFFICIENT_ASM
93 #endif
96 #if ENABLE_INEFFICIENT_ASM
97  h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_neon;
100 #endif
105  }
106  }
107 
108 #if ENABLE_INEFFICIENT_ASM
109  h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_neon;
110 #endif
111  h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vert_neon;
112  h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_hor_neon;
113 #if ENABLE_INEFFICIENT_ASM
117 #endif
120  h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_neon;
121  }
122  if (bit_depth == 10) {
123  if (chroma_format_idc <= 1) {
124 #if ENABLE_INEFFICIENT_ASM
126 #endif
127  h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_hor_neon_10;
130 #if ENABLE_INEFFICIENT_ASM
132 #endif
135 #if ENABLE_INEFFICIENT_ASM
136  h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_neon_10;
138 #endif
144  }
145  }
146 
147  h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_neon_10;
149  h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_hor_neon_10;
154  }
155 }
156 
158  int bit_depth, const int chroma_format_idc)
159 {
160  int cpu_flags = av_get_cpu_flags();
161 
162  if (have_neon(cpu_flags))
163  h264_pred_init_neon(h, codec_id, bit_depth, chroma_format_idc);
164 }
ff_pred16x16_128_dc_neon
void ff_pred16x16_128_dc_neon(uint8_t *src, ptrdiff_t stride)
HOR_PRED8x8
#define HOR_PRED8x8
Definition: h264pred.h:69
ff_pred8x8_hor_neon
void ff_pred8x8_hor_neon(uint8_t *src, ptrdiff_t stride)
DC_PRED8x8
#define DC_PRED8x8
Definition: h264pred.h:68
ff_pred16x16_top_dc_neon
void ff_pred16x16_top_dc_neon(uint8_t *src, ptrdiff_t stride)
ff_h264_pred_init_aarch64
av_cold void ff_h264_pred_init_aarch64(H264PredContext *h, int codec_id, int bit_depth, const int chroma_format_idc)
Definition: h264pred_init.c:157
ff_pred16x16_hor_neon_10
void ff_pred16x16_hor_neon_10(uint8_t *src, ptrdiff_t stride)
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:109
bit_depth
static void bit_depth(AudioStatsContext *s, const uint64_t *const mask, uint8_t *depth)
Definition: af_astats.c:246
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:56
ff_pred8x8_vert_neon
void ff_pred8x8_vert_neon(uint8_t *src, ptrdiff_t stride)
ff_pred8x8_top_dc_neon
void ff_pred8x8_top_dc_neon(uint8_t *src, ptrdiff_t stride)
ff_pred8x8_plane_neon
void ff_pred8x8_plane_neon(uint8_t *src, ptrdiff_t stride)
av_cold
#define av_cold
Definition: attributes.h:106
ff_pred8x8_0l0_dc_neon_10
void ff_pred8x8_0l0_dc_neon_10(uint8_t *src, ptrdiff_t stride)
ff_pred16x16_dc_neon
void ff_pred16x16_dc_neon(uint8_t *src, ptrdiff_t stride)
ff_pred16x16_left_dc_neon
void ff_pred16x16_left_dc_neon(uint8_t *src, ptrdiff_t stride)
ff_pred16x16_plane_neon_10
void ff_pred16x16_plane_neon_10(uint8_t *src, ptrdiff_t stride)
ff_pred8x8_128_dc_neon
void ff_pred8x8_128_dc_neon(uint8_t *src, ptrdiff_t stride)
codec_id
enum AVCodecID codec_id
Definition: vaapi_decode.c:410
AV_CODEC_ID_SVQ3
@ AV_CODEC_ID_SVQ3
Definition: codec_id.h:75
ff_pred8x8_plane_neon_10
void ff_pred8x8_plane_neon_10(uint8_t *src, ptrdiff_t stride)
h264_pred_init_neon
static av_cold void h264_pred_init_neon(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc)
Definition: h264pred_init.c:79
TOP_DC_PRED8x8
#define TOP_DC_PRED8x8
Definition: h264pred.h:75
ff_pred16x16_dc_neon_10
void ff_pred16x16_dc_neon_10(uint8_t *src, ptrdiff_t stride)
VERT_PRED8x8
#define VERT_PRED8x8
Definition: h264pred.h:70
ff_pred8x8_vert_neon_10
void ff_pred8x8_vert_neon_10(uint8_t *src, ptrdiff_t stride)
ff_pred8x8_hor_neon_10
void ff_pred8x8_hor_neon_10(uint8_t *src, ptrdiff_t stride)
ff_pred8x8_l0t_dc_neon
void ff_pred8x8_l0t_dc_neon(uint8_t *src, ptrdiff_t stride)
ff_pred8x8_0l0_dc_neon
void ff_pred8x8_0l0_dc_neon(uint8_t *src, ptrdiff_t stride)
DC_128_PRED8x8
#define DC_128_PRED8x8
Definition: h264pred.h:76
ff_pred16x16_top_dc_neon_10
void ff_pred16x16_top_dc_neon_10(uint8_t *src, ptrdiff_t stride)
have_neon
#define have_neon(flags)
Definition: cpu.h:26
attributes.h
PLANE_PRED8x8
#define PLANE_PRED8x8
Definition: h264pred.h:71
ff_pred8x8_128_dc_neon_10
void ff_pred8x8_128_dc_neon_10(uint8_t *src, ptrdiff_t stride)
AV_CODEC_ID_VP7
@ AV_CODEC_ID_VP7
Definition: codec_id.h:235
AV_CODEC_ID_RV40
@ AV_CODEC_ID_RV40
Definition: codec_id.h:121
ALZHEIMER_DC_L00_PRED8x8
#define ALZHEIMER_DC_L00_PRED8x8
Definition: h264pred.h:81
ff_pred16x16_vert_neon_10
void ff_pred16x16_vert_neon_10(uint8_t *src, ptrdiff_t stride)
ff_pred8x8_dc_neon_10
void ff_pred8x8_dc_neon_10(uint8_t *src, ptrdiff_t stride)
LEFT_DC_PRED8x8
#define LEFT_DC_PRED8x8
Definition: h264pred.h:74
avcodec.h
ff_pred8x8_0lt_dc_neon
void ff_pred8x8_0lt_dc_neon(uint8_t *src, ptrdiff_t stride)
ff_pred16x16_vert_neon
void ff_pred16x16_vert_neon(uint8_t *src, ptrdiff_t stride)
h264pred.h
ff_pred16x16_plane_neon
void ff_pred16x16_plane_neon(uint8_t *src, ptrdiff_t stride)
ff_pred8x8_dc_neon
void ff_pred8x8_dc_neon(uint8_t *src, ptrdiff_t stride)
ff_pred8x8_l00_dc_neon_10
void ff_pred8x8_l00_dc_neon_10(uint8_t *src, ptrdiff_t stride)
ALZHEIMER_DC_0L0_PRED8x8
#define ALZHEIMER_DC_0L0_PRED8x8
Definition: h264pred.h:82
H264PredContext
Context for storing H.264 prediction functions.
Definition: h264pred.h:94
ff_pred16x16_hor_neon
void ff_pred16x16_hor_neon(uint8_t *src, ptrdiff_t stride)
ALZHEIMER_DC_L0T_PRED8x8
#define ALZHEIMER_DC_L0T_PRED8x8
Definition: h264pred.h:79
ff_pred8x8_top_dc_neon_10
void ff_pred8x8_top_dc_neon_10(uint8_t *src, ptrdiff_t stride)
ff_pred8x8_l00_dc_neon
void ff_pred8x8_l00_dc_neon(uint8_t *src, ptrdiff_t stride)
AV_CODEC_ID_VP8
@ AV_CODEC_ID_VP8
Definition: codec_id.h:192
ALZHEIMER_DC_0LT_PRED8x8
#define ALZHEIMER_DC_0LT_PRED8x8
Definition: h264pred.h:80
h
h
Definition: vp9dsp_template.c:2070
stride
#define stride
Definition: h264pred_template.c:536
ff_pred8x8_0lt_dc_neon_10
void ff_pred8x8_0lt_dc_neon_10(uint8_t *src, ptrdiff_t stride)
cpu.h
ff_pred8x8_l0t_dc_neon_10
void ff_pred8x8_l0t_dc_neon_10(uint8_t *src, ptrdiff_t stride)
ff_pred8x8_left_dc_neon
void ff_pred8x8_left_dc_neon(uint8_t *src, ptrdiff_t stride)
src
#define src
Definition: vp8dsp.c:248
ff_pred8x8_left_dc_neon_10
void ff_pred8x8_left_dc_neon_10(uint8_t *src, ptrdiff_t stride)