[FFmpeg-devel] [PATCH 1/2] libavutil/libavfilter: opencl wrapper based on comments on 20130327

Michael Niedermayer michaelni at gmx.at
Wed Mar 27 18:14:20 CET 2013


On Wed, Mar 27, 2013 at 09:21:09PM +0800, Wei Gao wrote:
> 

>  configure          |    4 
>  libavutil/Makefile |    3 
>  libavutil/opencl.c |  674 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  libavutil/opencl.h |  197 +++++++++++++++
>  4 files changed, 878 insertions(+)
> dfbeaa4af370343e8f62d4b398c49c1f9531d242  0001-opencl-wrapper-based-on-comments-on-20130327.patch
> From 544da77d67c6c27e415363c3ebd2f1894f98932e Mon Sep 17 00:00:00 2001
> From: highgod0401 <highgod0401 at gmail.com>
> Date: Wed, 27 Mar 2013 21:17:29 +0800
> Subject: [PATCH] opencl wrapper based on comments on 20130327
> 
> ---
>  configure          |   4 +
>  libavutil/Makefile |   3 +
>  libavutil/opencl.c | 674 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  libavutil/opencl.h | 197 ++++++++++++++++
>  4 files changed, 878 insertions(+)
>  create mode 100644 libavutil/opencl.c
>  create mode 100644 libavutil/opencl.h
> 
> diff --git a/configure b/configure
> index 8443db4..9c42a85 100755
> --- a/configure
> +++ b/configure
> @@ -233,6 +233,7 @@ External library support:
>    --enable-libxvid         enable Xvid encoding via xvidcore,
>                             native MPEG-4/Xvid encoder exists [no]
>    --enable-openal          enable OpenAL 1.1 capture support [no]
> +  --enable-opencl          enable OpenCL code
>    --enable-openssl         enable openssl [no]
>    --enable-x11grab         enable X11 grabbing [no]
>    --enable-zlib            enable zlib [autodetect]
> @@ -1178,6 +1179,7 @@ EXTERNAL_LIBRARY_LIST="
>      libxavs
>      libxvid
>      openal
> +    opencl
>      openssl
>      x11grab
>      zlib
> @@ -3982,6 +3984,7 @@ enabled openal     && { { for al_libs in "${OPENAL_LIBS}" "-lopenal" "-lOpenAL32
>                          die "ERROR: openal not found"; } &&
>                        { check_cpp_condition "AL/al.h" "defined(AL_VERSION_1_1)" ||
>                          die "ERROR: openal must be installed and version must be 1.1 or compatible"; }
> +enabled opencl     && require2 opencl CL/cl.h clEnqueueNDRangeKernel -lOpenCL
>  enabled openssl    && { check_lib openssl/ssl.h SSL_library_init -lssl -lcrypto ||
>                          check_lib openssl/ssl.h SSL_library_init -lssl32 -leay32 ||
>                          check_lib openssl/ssl.h SSL_library_init -lssl -lcrypto -lws2_32 -lgdi32 ||
> @@ -4350,6 +4353,7 @@ echo "network support           ${network-no}"
>  echo "threading support         ${thread_type-no}"
>  echo "safe bitstream reader     ${safe_bitstream_reader-no}"
>  echo "SDL support               ${sdl-no}"
> +echo "opencl enabled            ${opencl-no}"
>  echo "texi2html enabled         ${texi2html-no}"
>  echo "perl enabled              ${perl-no}"
>  echo "pod2man enabled           ${pod2man-no}"
> diff --git a/libavutil/Makefile b/libavutil/Makefile
> index 103ce5e..6375e10 100644
> --- a/libavutil/Makefile
> +++ b/libavutil/Makefile
> @@ -52,6 +52,8 @@ HEADERS = adler32.h                                                     \
>  
>  HEADERS-$(CONFIG_LZO)                   += lzo.h
>  
> +HEADERS-$(CONFIG_OPENCL)                += opencl.h
> +
>  ARCH_HEADERS = bswap.h                                                  \
>                 intmath.h                                                \
>                 intreadwrite.h                                           \


> @@ -115,6 +117,7 @@ SKIPHEADERS-$(HAVE_MACHINE_RW_BARRIER)          += atomic_suncc.h
>  SKIPHEADERS-$(HAVE_MEMORYBARRIER)               += atomic_win32.h
>  SKIPHEADERS-$(HAVE_SYNC_VAL_COMPARE_AND_SWAP)   += atomic_gcc.h
>  
> +OBJS-$(CONFIG_OPENCL)                   += opencl.o
>  TESTPROGS = adler32                                                     \
>              aes                                                         \
>              atomic                                                      \

this should be where the other OBJS-$(...) are


> diff --git a/libavutil/opencl.c b/libavutil/opencl.c
> new file mode 100644
> index 0000000..1888a34
> --- /dev/null
> +++ b/libavutil/opencl.c
> @@ -0,0 +1,674 @@
> +/*
> + * Copyright (C) 2012 Peng Gao <peng at multicorewareinc.com>
> + * Copyright (C) 2012 Li   Cao <li at multicorewareinc.com>
> + * Copyright (C) 2012 Wei  Gao <weigao at multicorewareinc.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "opencl.h"
> +#include "avstring.h"
> +#include "log.h"
> +#include "avassert.h"
> +
> +#if !HAVE_MEMORYBARRIER && !HAVE_SYNC_VAL_COMPARE_AND_SWAP && !HAVE_MACHINE_RW_BARRIER
> +#if HAVE_PTHREADS
> +
> +#include <pthread.h>
> +static pthread_mutex_t atomic_opencl_lock = PTHREAD_MUTEX_INITIALIZER;
> +
> +static void lock_opencl(void)
> +{
> +    pthread_mutex_lock(&atomic_opencl_lock);
> +}
> +
> +static void unlock_opencl(void)
> +{
> +    pthread_mutex_unlock(&atomic_opencl_lock);
> +}
> +
> +#elif !HAVE_THREADS
> +
> +static void lock_opencl(void)
> +{
> +}
> +
> +static void unlock_opencl(void)
> +{
> +}
> +
> +#endif

> +#else
> +static void lock_opencl(void)
> +{
> +}
> +
> +static void unlock_opencl(void)
> +{
> +}

Empty functions will not prevent thread saftey issues


[...]

[...]
> +static int compile_kernel_file(GPUEnv *gpu_env, const char *build_option)
> +{
> +    cl_int status;
> +    char *temp, *source_str = NULL;
> +    size_t source_str_len = 0;
> +    int i, ret = 0;
> +
> +    if (gpu_env->program)
> +        return ret;
> +
> +    for (i = 0; i < gpu_env->kernel_function_count; i++) {
> +        source_str_len += strlen(gpu_env->kernel_code[i]);
> +    }
> +    source_str = av_mallocz(source_str_len + 1);
> +    if (!source_str) {
> +        return AVERROR(ENOMEM);
> +    }
> +    temp = source_str;
> +    for (i = 0; i < gpu_env->kernel_function_count; i++) {
> +        memcpy(temp, gpu_env->kernel_code[i], strlen(gpu_env->kernel_code[i]));
> +        temp += strlen(gpu_env->kernel_code[i]);
> +    }

If i understand this correctly, all registered filter source is
concatenated together and then compiled.
This is quite wastefull considerung that only a small subset of
filters will be used in most use cases.

Even in the future when things are precompiled it would be wastefull
to load all when only 5% are used


[...]
> +
> +int av_opencl_buffer_create(cl_mem *cl_buf, size_t cl_buf_size, int flags, void *host_ptr)
> +{
> +    cl_int status;
> +    *cl_buf = clCreateBuffer(gpu_env.context, flags, cl_buf_size, host_ptr, &status);
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils, AV_LOG_ERROR, "Could not create OpenCL buffer: %s\n", opencl_errstr(status));
> +        return AVERROR_EXTERNAL;
> +    }
> +    return 0;
> +}
> +
> +void av_opencl_buffer_release(cl_mem cl_buf)
> +{
> +    cl_int status = 0;
> +    if (!cl_buf)
> +        return;
> +    status = clReleaseMemObject(cl_buf);
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils, AV_LOG_ERROR, "Could not release OpenCL buffer: %s\n", opencl_errstr(status));
> +    }
> +}
> +

> +int av_opencl_buffer_write(cl_mem dst_cl_buf, uint8_t *src_buf, size_t buf_size)
> +{
> +    cl_int status;
> +    void *mapped = clEnqueueMapBuffer(gpu_env.command_queue, dst_cl_buf,
> +                                      CL_TRUE,CL_MAP_WRITE, 0, sizeof(uint8_t) * buf_size,
> +                                      0, NULL, NULL, &status);

gpu_env.command_queue is a global field how is this supposed to work ?
there can be multiple filters from multiple threads accessing it

I do not really know OpenCL but shouldnt each filter instance have its
own command_queue ?

also the function is set to blocking mode, this does not seem ideal
from a performance point of view


[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

When you are offended at any man's fault, turn to yourself and study your
own failings. Then you will forget your anger. -- Epictetus
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20130327/a542e06d/attachment.asc>


More information about the ffmpeg-devel mailing list