[FFmpeg-devel] avfilter/x86/vf_blend : add avx2 for 8b func (v2)
James Darnley
james.darnley at gmail.com
Wed Jan 17 00:00:10 EET 2018
On 2018-01-16 22:26, Martin Vignali wrote:
> diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
> index d7cd996842..9db2d90e57 100644
> --- a/libavutil/x86/x86util.asm
> +++ b/libavutil/x86/x86util.asm
> @@ -335,7 +335,7 @@
> %endmacro
>
> %macro ABS2 4
> -%if cpuflag(ssse3)
> +%if cpuflag(ssse3)||cpuflag(avx2)
> pabsw %1, %1
> pabsw %2, %2
> %elif cpuflag(mmxext) ; a, b, tmp0, tmp1
Why? AVX2 implies all earlier flags.
> +;%1 dst, %2 src %3 xm fill by zero (only use in SSE2)
> +%macro PMOVZXBW 3
> +%if cpuflag(avx2)
> + vpmovzxbw %1, %2
> +%else; SSE2
> + movh %1, %2
> + punpcklbw %1, %3
> +%endif
> +%endmacro
Are you aware that SSE4.1 added the packed move sign/zero extend
instructions? I don't suggest that you make an SSE4 but if you use many
3-operand instructions an AVX version might be worthwhile.
> @@ -85,4 +102,25 @@ av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
> case BLEND_NEGATION: param->blend = ff_blend_negation_ssse3; break;
> }
> }
> + if (EXTERNAL_AVX2_FAST(cpu_flags) && param->opacity == 1 && !is_16bit) {
> + switch (param->mode) {
> + case BLEND_ADDITION: param->blend = ff_blend_addition_avx2; break;
> + case BLEND_GRAINMERGE: param->blend = ff_blend_grainmerge_avx2; break;
> + case BLEND_AND: param->blend = ff_blend_and_avx2; break;
> + case BLEND_AVERAGE: param->blend = ff_blend_average_avx2; break;
> + case BLEND_DARKEN: param->blend = ff_blend_darken_avx2; break;
> + case BLEND_GRAINEXTRACT: param->blend = ff_blend_grainextract_avx2; break;
> + case BLEND_HARDMIX: param->blend = ff_blend_hardmix_avx2; break;
> + case BLEND_LIGHTEN: param->blend = ff_blend_lighten_avx2; break;
> + case BLEND_MULTIPLY: param->blend = ff_blend_multiply_avx2; break;
> + case BLEND_OR: param->blend = ff_blend_or_avx2; break;
> + case BLEND_PHOENIX: param->blend = ff_blend_phoenix_avx2; break;
> + case BLEND_SCREEN: param->blend = ff_blend_screen_avx2; break;
> + case BLEND_SUBTRACT: param->blend = ff_blend_subtract_avx2; break;
> + case BLEND_XOR: param->blend = ff_blend_xor_avx2; break;
> + case BLEND_DIFFERENCE: param->blend = ff_blend_difference_avx2; break;
> + case BLEND_EXTREMITY: param->blend = ff_blend_extremity_avx2; break;
> + case BLEND_NEGATION: param->blend = ff_blend_negation_avx2; break;
> + }
> + }
> }
If you're going to align things vertically then do it for every line.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 603 bytes
Desc: OpenPGP digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20180116/dbfb3015/attachment.sig>
More information about the ffmpeg-devel
mailing list