[FFmpeg-devel] [PATCH] x86: use new gcc atomic built-ins if available
James Almer
jamrial at gmail.com
Sun Oct 26 02:32:57 CET 2014
__sync built-ins are considered legacy and will be deprecated.
These new memory model aware built-ins have been available since GCC 4.7.0
Signed-off-by: James Almer <jamrial at gmail.com>
---
https://gcc.gnu.org/onlinedocs/gcc-4.9.0/gcc/_005f_005fatomic-Builtins.html
This is an RFC for a couple reasons.
The first is the memory model parameter. The documentation mentions that the
__sync functions match the behavoir of the new __atomic functions when the
latter use the full barrier model (__ATOMIC_SEQ_CST), so i went with it for
consistency's sake. It may however be a good idea to check if any of the more
relaxed models available for these new functions can be used instead.
It's worth mentioning that when i tested, gcc-tsan liked the __atomic load and
store functions a lot more than __sync_synchronize(), regardless of memory
model.
The second reason is __atomic_compare_exchange_n(), and how it differs from
__sync_val_compare_and_swap().
While the latter returns *ptr as it was before the operation, the former
doesn't and instead copies *ptr to oldval if the result of the comparison is
false. This means that returning oldval will match the old behavoir without
having to change the wrapper.
A disassemble example from libavutil/buffer.o however hints that the __atomic
function may be slower because of it writting oldval.
__sync_val_compare_and_swap:
8e3: 48 89 d8 mov rax,rbx
8e6: f0 48 0f b1 16 lock cmpxchg QWORD PTR [rsi],rdx
8eb: 48 85 c0 test rax,rax
__atomic_compare_exchange_n:
8f0: 48 8d 4c 24 20 lea rcx,[rsp+0x20]
[...]
90c: 48 89 d8 mov rax,rbx
90f: 48 89 5c 24 20 mov QWORD PTR [rsp+0x20],rbx
914: f0 48 0f b1 16 lock cmpxchg QWORD PTR [rsi],rdx
919: 74 03 je 91e <av_buffer_pool_get+0x3e>
91b: 48 89 01 mov QWORD PTR [rcx],rax
91e: 48 8b 44 24 20 mov rax,QWORD PTR [rsp+0x20]
923: 48 85 c0 test rax,rax
So the question is, do we keep using __sync_val_compare_and_swap as long as
gcc offers it (Which is probably a very long time), or immediately switch to
__atomic_compare_exchange_n if available?
configure | 4 +++-
libavutil/atomic_gcc.h | 17 ++++++++++++++++-
2 files changed, 19 insertions(+), 2 deletions(-)
diff --git a/configure b/configure
index 3eb1aa0..7697ed8 100755
--- a/configure
+++ b/configure
@@ -1596,6 +1596,7 @@ ARCH_FEATURES="
BUILTIN_LIST="
atomic_cas_ptr
+ atomic_compare_exchange
machine_rw_barrier
MemoryBarrier
mm_empty
@@ -2021,7 +2022,7 @@ simd_align_16_if_any="altivec neon sse"
symver_if_any="symver_asm_label symver_gnu_asm"
# threading support
-atomics_gcc_if="sync_val_compare_and_swap"
+atomics_gcc_if_any="sync_val_compare_and_swap atomic_compare_exchange"
atomics_suncc_if="atomic_cas_ptr machine_rw_barrier"
atomics_win32_if="MemoryBarrier"
atomics_native_if_any="$ATOMICS_LIST"
@@ -4673,6 +4674,7 @@ if ! disabled network; then
fi
check_builtin atomic_cas_ptr atomic.h "void **ptr; void *oldval, *newval; atomic_cas_ptr(ptr, oldval, newval)"
+check_builtin atomic_compare_exchange "" "int *ptr, *oldval; int newval; __atomic_compare_exchange_n(ptr, oldval, newval, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)"
check_builtin machine_rw_barrier mbarrier.h "__machine_rw_barrier()"
check_builtin MemoryBarrier windows.h "MemoryBarrier()"
check_builtin sarestart signal.h "SA_RESTART"
diff --git a/libavutil/atomic_gcc.h b/libavutil/atomic_gcc.h
index 2bb43c3..4b0e425 100644
--- a/libavutil/atomic_gcc.h
+++ b/libavutil/atomic_gcc.h
@@ -28,28 +28,43 @@
#define avpriv_atomic_int_get atomic_int_get_gcc
static inline int atomic_int_get_gcc(volatile int *ptr)
{
+#if HAVE_ATOMIC_COMPARE_EXCHANGE
+ return __atomic_load_n(ptr, __ATOMIC_SEQ_CST);
+#else
__sync_synchronize();
return *ptr;
+#endif
}
#define avpriv_atomic_int_set atomic_int_set_gcc
static inline void atomic_int_set_gcc(volatile int *ptr, int val)
{
+#if HAVE_ATOMIC_COMPARE_EXCHANGE
+ __atomic_store_n(ptr, val, __ATOMIC_SEQ_CST);
+#else
*ptr = val;
__sync_synchronize();
+#endif
}
#define avpriv_atomic_int_add_and_fetch atomic_int_add_and_fetch_gcc
static inline int atomic_int_add_and_fetch_gcc(volatile int *ptr, int inc)
{
+#if HAVE_ATOMIC_COMPARE_EXCHANGE
+ return __atomic_add_fetch(ptr, inc, __ATOMIC_SEQ_CST);
+#else
return __sync_add_and_fetch(ptr, inc);
+#endif
}
#define avpriv_atomic_ptr_cas atomic_ptr_cas_gcc
static inline void *atomic_ptr_cas_gcc(void * volatile *ptr,
void *oldval, void *newval)
{
-#ifdef __ARMCC_VERSION
+#if HAVE_ATOMIC_COMPARE_EXCHANGE
+ __atomic_compare_exchange_n(ptr, &oldval, newval, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+ return oldval;
+#elif defined(__ARMCC_VERSION)
// armcc will throw an error if ptr is not an integer type
volatile uintptr_t *tmp = (volatile uintptr_t*)ptr;
return (void*)__sync_val_compare_and_swap(tmp, oldval, newval);
--
2.0.4
More information about the ffmpeg-devel
mailing list