[FFmpeg-devel] [PATCH 1/7] avutil: add shared assembly constants

James Darnley james.darnley at gmail.com
Fri Oct 2 19:08:28 CEST 2015


---
So here is the test file I was working on with the thoughts I had.
---
; This section is intended to possibly be included in x86inc.asm

; Align all constant to 32 bytes whether they are used in AVX code or not.
%assign constant_align 32

; Value to be used as padding to achieve alignment.  Should not be used except
; when a user fails to define a constant as a multiple of 32 bytes.

; The default behavior of nasm/yasm if to pad with NOPs if you don't specify
; what the padding should be.  Would a non-zero be of value to a user trying to
; debug?  I don't know.
%define constant_align_value db 0

; Might it be better to not force alignment?  Without alignment instructions
; that require alignment will cause a crash in development rather than just
; producing garbage or incorrect results.  Of course that only works if the user
; misaligns a constant they use.  Misalignment might not be caught until running
; unrelated code.

; A macro that imitates what x86inc.asm does for function names but without
; needing to deal with registers and stacks.
; - Mangles the name using the private prefix (ff) and also the prefix char (_)
;   when that is needed.
; - It sets a global label to the correct name.
; - It aligns the data before finally placing the label.
; - Then the user just enters their constant data.
%macro global_constant 1
    %xdefine %1 mangle(private_prefix %+ _ %+ %1)
    global %1
    align constant_align,constant_align_value
    %1:
%endmacro

; An alternate but compatible form of the macro which lets the user define the
; constants on the same line.  This form would allow easy sorting.  And it is
; this I used in the patch.
%macro global_constant 1-2+
    %xdefine %1 mangle(private_prefix %+ _ %+ %1)
    global %1
    align constant_align,constant_align_value
    %1:
    %if %0 == 2
        %2
    %endif
%endmacro

SECTION_RODATA 32

global_constant pb_1 ; As this constant is 11 bytes lone, 21 bytes with value
    times 11 db 1    ; 0x0 will be inserted before the next constant.

global_constant pb_ff ; This allows two labels to reference the same constant.
global_constant pw_m1
    times 32 db 0xff

global_constant pb_2, times 10 db 2 ; Here is the two argument macro.

; A sed-like tool could automatically generate the "header" file from the labels
; defined in the constants file.

---
 libavutil/x86/constants.asm | 92 +++++++++++++++++++++++++++++++++++++++++++++
 libavutil/x86/constants.h   | 46 +++++++++++++++++++++++
 libavutil/x86/constants.inc | 39 +++++++++++++++++++
 tests/ref/fate/source       |  1 +
 4 files changed, 178 insertions(+)
 create mode 100644 libavutil/x86/constants.asm
 create mode 100644 libavutil/x86/constants.h
 create mode 100644 libavutil/x86/constants.inc

diff --git a/libavutil/x86/constants.asm b/libavutil/x86/constants.asm
new file mode 100644
index 0000000..fbdb1a2
--- /dev/null
+++ b/libavutil/x86/constants.asm
@@ -0,0 +1,92 @@
+;* MMX/SSE/AVX constants used across x86 dsp optimizations.
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+%include "libavutil/x86/x86util.asm"
+
+; This section is intended to possibly be included in x86inc.asm
+
+; Align all constant to 32 bytes whether they are used in AVX code or not.
+%assign constant_align 32
+
+; Value to be used as padding to achieve alignment.  Should not be used except
+; when a user fails to define a comnstant as a multiple of 32 bytes.
+%define constant_align_value db 0x0
+
+; A macro that imitates what x86inc.asm does for function names but without
+; needing to deal with registers and stacks.
+; - Mangles the name using the private prefix (ff) and also the prefix char (_)
+;   when that is needed.
+; - It sets a global label to the correct name.
+; - It aligns the data before finally placing the label.
+; - Then the user just enters their constant data.
+%macro global_constant 1-2+
+    %xdefine %1 mangle(private_prefix %+ _ %+ %1)
+    global %1
+    align constant_align,constant_align_value
+    %1:
+    %if %0 == 2
+        %2
+    %endif
+%endmacro
+
+SECTION_RODATA 32
+
+global_constant pb_0, times 32 db 0
+global_constant pb_1, times 32 db 1
+global_constant pb_2, times 32 db 2
+global_constant pb_3, times 32 db 3
+global_constant pb_15, times 32 db 15
+global_constant pb_80, times 32 db 0x80
+global_constant pb_FC, times 32 db 0xFC
+global_constant pb_FE, times 32 db 0xFE
+
+global_constant pw_1, times 16 dw 1
+global_constant pw_2, times 16 dw 2
+global_constant pw_3, times 16 dw 3
+global_constant pw_4, times 16 dw 4
+global_constant pw_5, times 16 dw 5
+global_constant pw_8, times 16 dw 8
+global_constant pw_9, times 16 dw 9
+global_constant pw_10, times 16 dw 10
+global_constant pw_15, times 16 dw 15
+global_constant pw_16, times 16 dw 16
+global_constant pw_17, times 16 dw 17
+global_constant pw_18, times 16 dw 18
+global_constant pw_20, times 16 dw 20
+global_constant pw_32, times 16 dw 32
+global_constant pw_42, times 16 dw 42
+global_constant pw_53, times 16 dw 53
+global_constant pw_64, times 16 dw 64
+global_constant pw_96, times 16 dw 96
+global_constant pw_128, times 16 dw 128
+global_constant pw_255, times 16 dw 255
+global_constant pw_256, times 16 dw 256
+global_constant pw_512, times 16 dw 512
+global_constant pw_1019, times 16 dw 1019
+global_constant pw_1023, times 16 dw 1023
+global_constant pw_1024, times 16 dw 1024
+global_constant pw_2048, times 16 dw 2048
+global_constant pw_4096, times 16 dw 4096
+global_constant pw_8192, times 16 dw 8192
+
+; TODO: perhaps change name to pb_ff
+global_constant pw_m1, times 32 db 0xff
+
+global_constant pd_1, times 8 dd 1
+
+global_constant ps_neg, times 8 dd 0x80000000
diff --git a/libavutil/x86/constants.h b/libavutil/x86/constants.h
new file mode 100644
index 0000000..bab16d3
--- /dev/null
+++ b/libavutil/x86/constants.h
@@ -0,0 +1,46 @@
+#ifndef AVUTIL_X86_CONSTANTS_H
+#define AVUTIL_X86_CONSTANTS_H
+
+#include "libavutil/x86/asm.h"
+
+extern const ymm_reg ff_pb_0;
+extern const ymm_reg ff_pb_1;
+extern const ymm_reg ff_pb_2;
+extern const ymm_reg ff_pb_3;
+extern const ymm_reg ff_pb_15;
+extern const ymm_reg ff_pb_80;
+extern const ymm_reg ff_pb_FC;
+extern const ymm_reg ff_pb_FE;
+extern const ymm_reg ff_pw_1;
+extern const ymm_reg ff_pw_2;
+extern const ymm_reg ff_pw_3;
+extern const ymm_reg ff_pw_4;
+extern const ymm_reg ff_pw_5;
+extern const ymm_reg ff_pw_8;
+extern const ymm_reg ff_pw_9;
+extern const ymm_reg ff_pw_10;
+extern const ymm_reg ff_pw_15;
+extern const ymm_reg ff_pw_16;
+extern const ymm_reg ff_pw_17;
+extern const ymm_reg ff_pw_18;
+extern const ymm_reg ff_pw_20;
+extern const ymm_reg ff_pw_32;
+extern const ymm_reg ff_pw_42;
+extern const ymm_reg ff_pw_53;
+extern const ymm_reg ff_pw_64;
+extern const ymm_reg ff_pw_96;
+extern const ymm_reg ff_pw_128;
+extern const ymm_reg ff_pw_255;
+extern const ymm_reg ff_pw_256;
+extern const ymm_reg ff_pw_512;
+extern const ymm_reg ff_pw_1019;
+extern const ymm_reg ff_pw_1023;
+extern const ymm_reg ff_pw_1024;
+extern const ymm_reg ff_pw_2048;
+extern const ymm_reg ff_pw_4096;
+extern const ymm_reg ff_pw_8192;
+extern const ymm_reg ff_pw_m1;
+extern const ymm_reg ff_pd_1;
+extern const ymm_reg ff_ps_neg;
+
+#endif /* AVCODEC_X86_CONSTANTS_H */
diff --git a/libavutil/x86/constants.inc b/libavutil/x86/constants.inc
new file mode 100644
index 0000000..cb03fd9
--- /dev/null
+++ b/libavutil/x86/constants.inc
@@ -0,0 +1,39 @@
+cextern pb_0
+cextern pb_1
+cextern pb_2
+cextern pb_3
+cextern pb_15
+cextern pb_80
+cextern pb_FC
+cextern pb_FE
+cextern pw_1
+cextern pw_2
+cextern pw_3
+cextern pw_4
+cextern pw_5
+cextern pw_8
+cextern pw_9
+cextern pw_10
+cextern pw_15
+cextern pw_16
+cextern pw_17
+cextern pw_18
+cextern pw_20
+cextern pw_32
+cextern pw_42
+cextern pw_53
+cextern pw_64
+cextern pw_96
+cextern pw_128
+cextern pw_255
+cextern pw_256
+cextern pw_512
+cextern pw_1019
+cextern pw_1023
+cextern pw_1024
+cextern pw_2048
+cextern pw_4096
+cextern pw_8192
+cextern pw_m1
+cextern pd_1
+cextern ps_neg
diff --git a/tests/ref/fate/source b/tests/ref/fate/source
index 9cd8b30..8a8157a 100644
--- a/tests/ref/fate/source
+++ b/tests/ref/fate/source
@@ -11,6 +11,7 @@ libavfilter/log2_tab.c
 libavformat/file_open.c
 libavformat/golomb_tab.c
 libavformat/log2_tab.c
+libavutil/x86/constants.h
 libavutil/x86_cpu.h
 libswresample/log2_tab.c
 libswscale/log2_tab.c
-- 
2.5.3



More information about the ffmpeg-devel mailing list