[FFmpeg-devel] [PATCH 1/4] lavc/flacenc: add sse4 version of the 16-bit lpc encoder

Michael Niedermayer michaelni at gmx.at
Fri Feb 21 17:04:08 CET 2014


On Thu, Feb 20, 2014 at 07:48:57PM +0100, James Darnley wrote:
> From 1.8 to 2.4 times faster.  Runtime is reduced by 2 to 39%.  The
> speed-up generally increases with compression_level.
> 
> This lpc encoder is not used with levels < 3 so it provides no speed-up
> in these cases.
> ---
>  LICENSE                         |    1 +
>  libavcodec/flacenc.c            |    2 +-
>  libavcodec/x86/Makefile         |    3 +
>  libavcodec/x86/flac_dsp_gpl.asm |   83 +++++++++++++++++++++++++++++++++++++++
>  libavcodec/x86/flacdsp_init.c   |    4 ++
>  5 files changed, 92 insertions(+), 1 deletions(-)
>  create mode 100644 libavcodec/x86/flac_dsp_gpl.asm
> 
> diff --git a/LICENSE b/LICENSE
> index 1f757aa..c194087 100644
> --- a/LICENSE
> +++ b/LICENSE
> @@ -15,6 +15,7 @@ Specifically, the GPL parts of FFmpeg are
>  - libpostproc
>  - libmpcodecs
>  - optional x86 optimizations in the files
> +  libavcodec/x86/flac_dsp_gpl.asm
>    libavcodec/x86/idct_mmx.c
>  - libutvideo encoding/decoding wrappers in
>    libavcodec/libutvideo*.cpp
> diff --git a/libavcodec/flacenc.c b/libavcodec/flacenc.c
> index 1fc8c4c..e958cd8 100644
> --- a/libavcodec/flacenc.c
> +++ b/libavcodec/flacenc.c
> @@ -80,7 +80,7 @@ typedef struct FlacSubframe {
>      int shift;
>      RiceContext rc;
>      int32_t samples[FLAC_MAX_BLOCKSIZE];
> -    int32_t residual[FLAC_MAX_BLOCKSIZE+1];
> +    int32_t residual[FLAC_MAX_BLOCKSIZE+3];
>  } FlacSubframe;
>  
>  typedef struct FlacFrame {
> diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
> index 0d3594f..374b1d2 100644
> --- a/libavcodec/x86/Makefile
> +++ b/libavcodec/x86/Makefile
> @@ -76,6 +76,9 @@ YASM-OBJS-$(CONFIG_DSPUTIL)            += x86/dsputil.o                 \
>  YASM-OBJS-$(CONFIG_ENCODERS)           += x86/dsputilenc.o
>  YASM-OBJS-$(CONFIG_FFT)                += x86/fft.o
>  YASM-OBJS-$(CONFIG_FLAC_DECODER)       += x86/flacdsp.o
> +ifdef CONFIG_GPL
> +YASM-OBJS-$(CONFIG_FLAC_ENCODER)       += x86/flac_dsp_gpl.o
> +endif
>  YASM-OBJS-$(CONFIG_H263DSP)            += x86/h263_loopfilter.o
>  YASM-OBJS-$(CONFIG_H264CHROMA)         += x86/h264_chromamc.o           \
>                                            x86/h264_chromamc_10bit.o
> diff --git a/libavcodec/x86/flac_dsp_gpl.asm b/libavcodec/x86/flac_dsp_gpl.asm
> new file mode 100644
> index 0000000..2221af8
> --- /dev/null
> +++ b/libavcodec/x86/flac_dsp_gpl.asm
> @@ -0,0 +1,83 @@
> +;*****************************************************************************
> +;* FLAC DSP functions
> +;*
> +;* Copyright (c) 2014 James Darnley <james.darnley at gmail.com>
> +;*
> +;* This file is part of FFmpeg.
> +;*
> +;* FFmpeg is free software; you can redistribute it and/or modify
> +;* it under the terms of the GNU General Public License as published by
> +;* the Free Software Foundation; either version 2 of the License, or
> +;* (at your option) any later version.
> +;*
> +;* FFmpeg is distributed in the hope that it will be useful,
> +;* but WITHOUT ANY WARRANTY; without even the implied warranty of
> +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +;* GNU General Public License for more details.
> +;*
> +;* You should have received a copy of the GNU General Public License along
> +;* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
> +;* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
> +;******************************************************************************
> +
> +%include "libavutil/x86/x86util.asm"
> +
> +SECTION_TEXT
> +
> +INIT_XMM sse4
> +%if ARCH_X86_64
> +    cglobal flac_enc_lpc_16, 5, 7, 4, 0, res, smp, len, order, coefs, shift
> +    %define posj   r5
> +    %define negj   r6
> +    %define length r2d
> +
> +    movsxd orderq, orderd
> +%else
> +    cglobal flac_enc_lpc_16, 5, 6, 4, 0, res, smp, len, order, coefs, shift

> +    %define posj   r2
> +    %define negj   r5
> +    %define length r2mp
> +%endif

why dont you list these "local registers" in cglobal ?


[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Dictatorship naturally arises out of democracy, and the most aggravated
form of tyranny and slavery out of the most extreme liberty. -- Plato
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20140221/72fd4dfb/attachment.asc>


More information about the ffmpeg-devel mailing list