[FFmpeg-devel] [PATCH 1/2] libavutil/libavfilter: add opencl wrapper to ffmpeg

Stefano Sabatini stefasab at gmail.com
Sun Mar 24 18:40:27 CET 2013


On date Sunday 2013-03-24 19:55:00 +0800, Wei Gao encoded:
> 

> From ab91c74fa166f557eadb39ab399d7ba667b91340 Mon Sep 17 00:00:00 2001
> From: highgod0401 <highgod0401 at gmail.com>
> Date: Sun, 24 Mar 2013 19:20:16 +0800
> Subject: [PATCH 1/2] add opencl wrapper to ffmpeg
> 
> ---
>  configure          |   4 +
>  libavutil/Makefile |   3 +
>  libavutil/opencl.c | 995 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  libavutil/opencl.h | 205 +++++++++++
>  4 files changed, 1207 insertions(+)
>  create mode 100644 libavutil/opencl.c
>  create mode 100644 libavutil/opencl.h
> 
> diff --git a/configure b/configure
> index 8443db4..9c42a85 100755
> --- a/configure
> +++ b/configure
> @@ -233,6 +233,7 @@ External library support:
>    --enable-libxvid         enable Xvid encoding via xvidcore,
>                             native MPEG-4/Xvid encoder exists [no]
>    --enable-openal          enable OpenAL 1.1 capture support [no]
> +  --enable-opencl          enable OpenCL code
>    --enable-openssl         enable openssl [no]
>    --enable-x11grab         enable X11 grabbing [no]
>    --enable-zlib            enable zlib [autodetect]
> @@ -1178,6 +1179,7 @@ EXTERNAL_LIBRARY_LIST="
>      libxavs
>      libxvid
>      openal
> +    opencl
>      openssl
>      x11grab
>      zlib
> @@ -3982,6 +3984,7 @@ enabled openal     && { { for al_libs in "${OPENAL_LIBS}" "-lopenal" "-lOpenAL32
>                          die "ERROR: openal not found"; } &&
>                        { check_cpp_condition "AL/al.h" "defined(AL_VERSION_1_1)" ||
>                          die "ERROR: openal must be installed and version must be 1.1 or compatible"; }
> +enabled opencl     && require2 opencl CL/cl.h clEnqueueNDRangeKernel -lOpenCL
>  enabled openssl    && { check_lib openssl/ssl.h SSL_library_init -lssl -lcrypto ||
>                          check_lib openssl/ssl.h SSL_library_init -lssl32 -leay32 ||
>                          check_lib openssl/ssl.h SSL_library_init -lssl -lcrypto -lws2_32 -lgdi32 ||
> @@ -4350,6 +4353,7 @@ echo "network support           ${network-no}"
>  echo "threading support         ${thread_type-no}"
>  echo "safe bitstream reader     ${safe_bitstream_reader-no}"
>  echo "SDL support               ${sdl-no}"
> +echo "opencl enabled            ${opencl-no}"
>  echo "texi2html enabled         ${texi2html-no}"
>  echo "perl enabled              ${perl-no}"
>  echo "pod2man enabled           ${pod2man-no}"
> diff --git a/libavutil/Makefile b/libavutil/Makefile
> index 103ce5e..6375e10 100644
> --- a/libavutil/Makefile
> +++ b/libavutil/Makefile
> @@ -52,6 +52,8 @@ HEADERS = adler32.h                                                     \
>  
>  HEADERS-$(CONFIG_LZO)                   += lzo.h
>  
> +HEADERS-$(CONFIG_OPENCL)                += opencl.h
> +
>  ARCH_HEADERS = bswap.h                                                  \
>                 intmath.h                                                \
>                 intreadwrite.h                                           \
> @@ -115,6 +117,7 @@ SKIPHEADERS-$(HAVE_MACHINE_RW_BARRIER)          += atomic_suncc.h
>  SKIPHEADERS-$(HAVE_MEMORYBARRIER)               += atomic_win32.h
>  SKIPHEADERS-$(HAVE_SYNC_VAL_COMPARE_AND_SWAP)   += atomic_gcc.h
>  
> +OBJS-$(CONFIG_OPENCL)                   += opencl.o
>  TESTPROGS = adler32                                                     \
>              aes                                                         \
>              atomic                                                      \
> diff --git a/libavutil/opencl.c b/libavutil/opencl.c
> new file mode 100644
> index 0000000..7e62325
> --- /dev/null
> +++ b/libavutil/opencl.c
> @@ -0,0 +1,995 @@
> +/*
> + * Copyright (C) 2012 Peng Gao <peng at multicorewareinc.com>
> + * Copyright (C) 2012 Li   Cao <li at multicorewareinc.com>
> + * Copyright (C) 2012 Wei  Gao <weigao at multicorewareinc.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "opencl.h"
> +#include "avstring.h"
> +#include "log.h"
> +
> +
> +#define MAX_KERNEL_STRING_LEN   64

> +#define MAX_CLFILE_NUM 50
> +#define MAX_CLKERNEL_NUM 200

nit: you could drop "CL" from the names

> +#define MAX_CLFILE_PATH 255

unused


> +#define MAX_FILTER_NAME_LEN 64
> +#define MAX_FILTER_NUM 200
> +#define MAX_KERNEL_SRCFILE_LEN 256
> +
> +
> +typedef struct OpenCLEnv {
> +    cl_platform_id platform;
> +    cl_context   context;
> +    cl_device_id devices;
> +    cl_command_queue command_queue;
> +} OpenCLEnv;
> +
> +typedef struct GPUEnv {
> +    cl_platform_id platform;
> +    cl_device_type device_type;
> +    cl_context context;
> +    cl_device_id *device_ids;
> +    cl_device_id  device_id;
> +    cl_command_queue command_queue;
> +    cl_kernel kernels[MAX_CLFILE_NUM];
> +    cl_program programs[MAX_CLFILE_NUM]; //one program object maps one kernel source file

> +    char  kernel_srcfile[MAX_CLFILE_NUM][MAX_KERNEL_SRCFILE_LEN]; //the max len of kernel file name is 256

pointless comment

> +    int file_count; // only one kernel file

confusing comment

> +
> +    char kernel_names[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN+1];
> +    av_opencl_kernel_function kernel_functions[MAX_CLKERNEL_NUM];
> +    const char *kernel_code[MAX_CLKERNEL_NUM];
> +    int kernel_count;
> +    int runtime_kernel_count;

> +    int is_user_created; // 1: created , 0:no create and needed to create by opencl wrapper

Can you explain the whole user_created thing?

> +    uint8_t *temp_buffer;
> +    int temp_buffer_size;
> +} GPUEnv;
> +

> +typedef struct FilterBufferNode {
> +    char filter_name[MAX_FILTER_NAME_LEN+1];
> +    void *cl_inbuf;
> +    int buf_size;
> +} FilterBufferNode;
> +

> +typedef struct OpenclUtils {
> +    const AVClass *class;
> +    int   log_offset;
> +    void *log_ctx;
> +} OpenclUtils;

Put this close to the openclutils_class definition.

> +
> +typedef struct OpenclErrorMsg {
> +    int err_code;
> +    const char *err_str;
> +} OpenclErrorMsg;
> +
> +static OpenclErrorMsg opencl_err_msg[] = {
> +        {CL_DEVICE_NOT_FOUND,                               "DEVICE NOT FOUND"},
> +        {CL_DEVICE_NOT_AVAILABLE,                           "DEVICE NOT AVAILABLE"},
> +        {CL_COMPILER_NOT_AVAILABLE,                         "COMPILER NOT AVAILABLE"},
> +        {CL_MEM_OBJECT_ALLOCATION_FAILURE,                  "MEM OBJECT ALLOCATION FAILURE"},
> +        {CL_OUT_OF_RESOURCES,                               "OUT OF RESOURCES"},
> +        {CL_OUT_OF_HOST_MEMORY,                             "OUT OF HOST MEMORY"},
> +        {CL_PROFILING_INFO_NOT_AVAILABLE,                   "PROFILING INFO NOT AVAILABLE"},
> +        {CL_MEM_COPY_OVERLAP,                               "MEM COPY OVERLAP"},
> +        {CL_IMAGE_FORMAT_MISMATCH,                          "IMAGE FORMAT MISMATCH"},
> +        {CL_IMAGE_FORMAT_NOT_SUPPORTED,                     "IMAGE FORMAT NOT_SUPPORTED"},
> +        {CL_BUILD_PROGRAM_FAILURE,                          "BUILD PROGRAM FAILURE"},
> +        {CL_MAP_FAILURE,                                    "MAP FAILURE"},
> +        {CL_MISALIGNED_SUB_BUFFER_OFFSET,                   "MISALIGNED SUB BUFFER OFFSET"},
> +        {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST,      "EXEC STATUS ERROR FOR EVENTS IN WAIT LIST"},
> +        {CL_COMPILE_PROGRAM_FAILURE,                        "COMPILE PROGRAM FAILURE"},
> +        {CL_LINKER_NOT_AVAILABLE,                           "LINKER NOT AVAILABLE"},
> +        {CL_LINK_PROGRAM_FAILURE,                           "LINK PROGRAM FAILURE"},
> +        {CL_DEVICE_PARTITION_FAILED,                        "DEVICE PARTITION FAILED"},
> +        {CL_KERNEL_ARG_INFO_NOT_AVAILABLE,                  "KERNEL ARG INFO NOT AVAILABLE"},
> +        {CL_INVALID_VALUE,                                  "INVALID VALUE"},
> +        {CL_INVALID_DEVICE_TYPE,                            "INVALID DEVICE TYPE"},
> +        {CL_INVALID_PLATFORM,                               "INVALID PLATFORM"},
> +        {CL_INVALID_DEVICE,                                 "INVALID DEVICE"},
> +        {CL_INVALID_CONTEXT,                                "INVALID CONTEXT"},
> +        {CL_INVALID_QUEUE_PROPERTIES,                       "INVALID QUEUE PROPERTIES"},
> +        {CL_INVALID_COMMAND_QUEUE,                          "INVALID COMMAND QUEUE"},
> +        {CL_INVALID_HOST_PTR,                               "INVALID HOST PTR"},
> +        {CL_INVALID_MEM_OBJECT,                             "INVALID MEM OBJECT"},
> +        {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR,                "INVALID IMAGE FORMAT DESCRIPTOR"},
> +        {CL_INVALID_IMAGE_SIZE,                             "INVALID IMAGE SIZE"},
> +        {CL_INVALID_SAMPLER,                                "INVALID SAMPLER"},
> +        {CL_INVALID_BINARY,                                 "INVALID BINARY"},
> +        {CL_INVALID_BUILD_OPTIONS,                          "INVALID BUILD OPTIONS"},
> +        {CL_INVALID_PROGRAM,                                "INVALID PROGRAM"},
> +        {CL_INVALID_PROGRAM_EXECUTABLE,                     "INVALID PROGRAM EXECUTABLE"},
> +        {CL_INVALID_KERNEL_NAME,                            "INVALID KERNEL NAME"},
> +        {CL_INVALID_KERNEL_DEFINITION,                      "INVALID KERNEL DEFINITION"},
> +        {CL_INVALID_KERNEL,                                 "INVALID KERNEL"},
> +        {CL_INVALID_ARG_INDEX,                              "INVALID ARG INDEX"},
> +        {CL_INVALID_ARG_VALUE,                              "INVALID ARG VALUE"},
> +        {CL_INVALID_ARG_SIZE,                               "INVALID ARG_SIZE"},
> +        {CL_INVALID_KERNEL_ARGS,                            "INVALID KERNEL ARGS"},
> +        {CL_INVALID_WORK_DIMENSION,                         "INVALID WORK DIMENSION"},
> +        {CL_INVALID_WORK_GROUP_SIZE,                        "INVALID WORK GROUP SIZE"},
> +        {CL_INVALID_WORK_ITEM_SIZE,                         "INVALID WORK ITEM SIZE"},
> +        {CL_INVALID_GLOBAL_OFFSET,                          "INVALID GLOBAL OFFSET"},
> +        {CL_INVALID_EVENT_WAIT_LIST,                        "INVALID EVENT WAIT LIST"},
> +        {CL_INVALID_EVENT,                                  "INVALID EVENT"},
> +        {CL_INVALID_OPERATION,                              "INVALID OPERATION"},
> +        {CL_INVALID_GL_OBJECT,                              "INVALID GL OBJECT"},
> +        {CL_INVALID_BUFFER_SIZE,                            "INVALID BUFFER SIZE"},
> +        {CL_INVALID_MIP_LEVEL,                              "INVALID MIP LEVEL"},
> +        {CL_INVALID_GLOBAL_WORK_SIZE,                       "INVALID GLOBAL WORK SIZE"},
> +        {CL_INVALID_PROPERTY,                               "INVALID PROPERTY"},
> +        {CL_INVALID_IMAGE_DESCRIPTOR,                       "INVALID IMAGE DESCRIPTOR"},
> +        {CL_INVALID_COMPILER_OPTIONS,                       "INVALID COMPILER OPTIONS"},
> +        {CL_INVALID_LINKER_OPTIONS,                         "INVALID LINKER OPTIONS"},
> +        {CL_INVALID_DEVICE_PARTITION_COUNT,                 "INVALID DEVICE PARTITION COUNT"},
> +};
> +
> +static const AVClass openclutils_class = {"OPENCLUTILS", av_default_item_name,
> +                                                   NULL, LIBAVUTIL_VERSION_INT,
> +                                                   offsetof(OpenclUtils, log_offset),
> +                                                   offsetof(OpenclUtils, log_ctx)};
> +static OpenclUtils openclutils = {&openclutils_class};
> +static GPUEnv gpu_env;

> +static FilterBufferNode filter_buffer[MAX_FILTER_NUM];

filter_buffer is never used in this patch, please remove it together
with all the related definitions.

> +static int isinited;
> +
> +int av_opencl_register_kernel(const char *kernel_name, const char *kernel_code)
> +{
> +    if (gpu_env.kernel_count < MAX_CLKERNEL_NUM) {
> +        if (strlen(kernel_name) <= MAX_KERNEL_STRING_LEN) {
> +            gpu_env.kernel_code[gpu_env.kernel_count] = kernel_code;
> +            av_strlcpy(gpu_env.kernel_names[gpu_env.kernel_count], kernel_name, MAX_KERNEL_STRING_LEN+1);
> +            gpu_env.kernel_count++;
> +        } else {
> +            av_log(&openclutils,AV_LOG_ERROR,"registered kernel name %s is too long\n",kernel_name);

av_log(&openclutils, AV_LOG_ERROR,
       "Registered kernel name '%s' is too long\n", kernel_name);

Please note the spacing, spaces are useful to improve readability (and
consistency with the rest of the codebase). The same remarks about
spacing apply for the remainder of the patch, which I'm omitting.

> +            return AVERROR(ENOMEM);

AVERROR(EINVAL)

> +        }
> +    } else {
> +        av_log(&openclutils,AV_LOG_ERROR,"Could not register kernel with name '%s', maximum number of registered kernels %d already reached\n",kernel_name,MAX_CLKERNEL_NUM);
> +        return AVERROR(ENOMEM);

AVERROR(EINVAL)

> +    }
> +    return 0;
> +}
> +static const char *opencl_errstr(int status)
> +{
> +    int i;
> +    for (i = 0;i < sizeof(opencl_err_msg); i++) {
> +        if(opencl_err_msg[i].err_code == status)

if_(...

> +            return opencl_err_msg[i].err_str;
> +    }
> +    return "unknown error";
> +}
> +
> +static int access_binaries(cl_device_id *device_ids, int numdevices, const char *cl_file_name, FILE **fhandle,
> +                                size_t *binarysizes, char **binaries, int write)
> +{
> +    FILE *fd = NULL;
> +    int status;
> +    char filename[1024] = {0};
> +    char cl_name[1024] = {0};
> +    char devicename[1024] = {0};
> +    int i;
> +    for (i = 0; i < numdevices; i++) {
> +        if (device_ids[i] != 0) {
> +            status = clGetDeviceInfo(device_ids[i],
> +                                     CL_DEVICE_NAME,
> +                                     sizeof(devicename),
> +                                     devicename,
> +                                     NULL);
> +            if (status == CL_SUCCESS) {
> +                av_strlcpy(cl_name,cl_file_name,sizeof(cl_name));
> +                snprintf(filename, sizeof(filename),"./%s-%s.bin", cl_name, devicename);
> +                if (!write) {
> +                    fd = fopen(filename,"rb");
> +                    if (fd) {
> +                        if (fhandle)
> +                            *fhandle = fd;
> +                        break;
> +                    }
> +                } else {

> +                    FILE *output = NULL;

why don't you reuse fd?

> +                    if (binarysizes[i] != 0) {
> +                        output = fopen(filename, "wb");
> +                        if (!output) {

> +                            av_log(&openclutils,AV_LOG_ERROR,"Could not write file '%s'\n",filename);
> +                            return AVERROR(EINVAL);;
                                                     ^^

Also, per my previous reply:

ret = errno;
av_log(&openclutils,AV_LOG_ERROR, "Could not write file '%s'\n", filename);
return AVERROR(ret);


> +                        }
> +                        fwrite(binaries[i], sizeof(char), binarysizes[i], output);
> +                        fclose(output);
> +                    }
> +                }
> +            } else {
> +                av_log(&openclutils,AV_LOG_ERROR,"Could not get OpenCL device information: %s\n",opencl_errstr(status));
> +                return AVERROR_EXTERNAL;
> +            }
> +        }
> +    }
> +    return 0;
> +}
> +
> +static int check_generated_binary(cl_context context, const char * cl_file_name, FILE **fhandle)
> +{
> +    int ret = 0;
> +    cl_int status;
> +    size_t numdevices;
> +    cl_device_id *device_ids;
> +    status = clGetContextInfo(context,
> +                              CL_CONTEXT_NUM_DEVICES,
> +                              sizeof(numdevices),
> +                              &numdevices,
> +                              NULL);
> +    if (status != CL_SUCCESS){
> +        av_log(&openclutils,AV_LOG_ERROR,"Could not get OpenCL context number of devices: %s\n",opencl_errstr(status));
> +        return AVERROR_EXTERNAL;
> +    }
> +
> +    device_ids = av_malloc(sizeof(cl_device_id) * numdevices);
> +    if (!device_ids)
> +        return AVERROR(ENOMEM);
> +
> +    /* grab the handles to all of the devices in the context. */
> +    status = clGetContextInfo(context,
> +                              CL_CONTEXT_DEVICES,
> +                              sizeof(cl_device_id) * numdevices,
> +                              device_ids,
> +                              NULL);
> +    if (status != CL_SUCCESS){
> +        av_log(&openclutils,AV_LOG_ERROR,"Could not get OpenCL context devices: %s\n",opencl_errstr(status));
> +        ret = AVERROR_EXTERNAL;
> +        goto end;
> +    }
> +    ret = access_binaries(device_ids, numdevices,cl_file_name, fhandle, NULL, NULL, 0);
> +end:
> +    av_free(device_ids);
> +    return ret;
> +}
> +
> +static int generate_bin_from_kernel_source(cl_program program, const char * cl_file_name)
> +{
> +    int i = 0;
> +    cl_int status;

> +    size_t *binarysizes = NULL;
> +    size_t numdevices;

nit: binary_sizes and num_devices sound more readable

> +    cl_device_id *device_ids = NULL;
> +    char **binaries = NULL;
> +    int ret = 0;
> +    status = clGetProgramInfo(program,
> +                              CL_PROGRAM_NUM_DEVICES,
> +                              sizeof(numdevices),
> +                              &numdevices,
> +                              NULL);
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils,AV_LOG_ERROR,"Could not get OpenCL program information: %s\n",opencl_errstr(status));
> +        return AVERROR_EXTERNAL;
> +    }
> +    device_ids = av_mallocz(sizeof(cl_device_id) * numdevices);
> +    if (!device_ids)
> +        return AVERROR(ENOMEM);
> +    /* grab the handles to all of the devices in the program. */
> +    status = clGetProgramInfo(program,
> +                              CL_PROGRAM_DEVICES,
> +                              sizeof(cl_device_id) * numdevices,
> +                              device_ids,
> +                              NULL);
> +
> +    /* figure out the sizes of each of the binaries. */
> +    binarysizes = av_mallocz(sizeof(size_t) * numdevices);
> +    if (!binarysizes) {
> +        ret = AVERROR(ENOMEM);
> +        goto end;
> +    }
> +
> +    status = clGetProgramInfo(program,
> +                              CL_PROGRAM_BINARY_SIZES,
> +                              sizeof(size_t) * numdevices,
> +                              binarysizes, NULL);
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils,AV_LOG_ERROR,"Could not get OpenCL program info: %s\n",opencl_errstr(status));
> +        ret = AVERROR_EXTERNAL;
> +        goto end;
> +    }
> +    /* copy over all of the generated binaries. */
> +    binaries = av_mallocz(sizeof(char *) * numdevices);
> +    if (!binaries) {
> +        ret = AVERROR(ENOMEM);
> +        goto end;
> +    }
> +    for (i = 0; i < numdevices; i++) {
> +        if (binarysizes[i] != 0) {
> +            binaries[i] = av_mallocz(sizeof(char) * binarysizes[i]);
> +            if (!binaries[i]) {
> +                ret = AVERROR(ENOMEM);
> +                goto end;
> +            }
> +        }
> +    }
> +
> +    status = clGetProgramInfo(program,
> +                              CL_PROGRAM_BINARIES,
> +                              sizeof(char *) * numdevices,
> +                              binaries,
> +                              NULL);
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils,AV_LOG_ERROR,"Could not get OpenCL program info: %s\n",opencl_errstr(status));
> +        ret = AVERROR_EXTERNAL;
> +        goto end;
> +    }
> +    ret = access_binaries(device_ids, numdevices, cl_file_name, NULL, binarysizes, binaries, 1);
> +end:
> +    // Release all resouces and memory
> +    if (binaries) {
> +        for (i = 0;i < numdevices;i++ ) {
> +            av_free(binaries[i]);
> +        }
> +    }
> +    av_free(binaries);
> +    av_free(binarysizes);
> +    av_free(device_ids);
> +    return ret;
> +}
> +

> +int av_opencl_create_kernel(const char *kernelname, AVOpenCLKernelEnv *env)

nit: kernel_name for increased readability

> +{
> +    int status;
> +    env->kernel = clCreateKernel(gpu_env.programs[0], kernelname, &status);
> +    env->context = gpu_env.context;
> +    env->command_queue = gpu_env.command_queue;
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils,AV_LOG_ERROR,"Could not create OpenCL kernel: %s\n",opencl_errstr(status));
> +        return AVERROR_EXTERNAL;
> +    }
> +    return 0;
> +}
> +
> +void av_opencl_release_kernel(AVOpenCLKernelEnv * env)
> +{
> +    int status = clReleaseKernel(env->kernel);
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils,AV_LOG_ERROR,"Could not release kernel: %s\n",opencl_errstr(status));
> +    }
> +}
> +
> +static int init_opencl_env(GPUEnv *gpu_env, AVOpenCLExternalInfo *ext_opencl_info)
> +{
> +    size_t device_length;
> +    cl_int status;
> +    cl_uint numplatforms, numdevices;
> +    cl_platform_id *platform_ids = NULL;
> +    cl_context_properties cps[3];
> +    char platform_name[100];
> +    unsigned int i;
> +    int ret = 0;
> +    cl_device_type device_type[] = {CL_DEVICE_TYPE_GPU, CL_DEVICE_TYPE_CPU, CL_DEVICE_TYPE_DEFAULT};
> +    if (ext_opencl_info) {
> +        if (gpu_env->is_user_created)
> +            return 0;
> +        gpu_env->platform = ext_opencl_info->platform;
> +        gpu_env->is_user_created = 1;
> +        gpu_env->command_queue = ext_opencl_info->command_queue;
> +        gpu_env->context = ext_opencl_info->context;
> +        gpu_env->device_ids = ext_opencl_info->device_ids;
> +        gpu_env->device_id = ext_opencl_info->device_id;
> +        gpu_env->device_type = ext_opencl_info->device_type;
> +    } else {
> +        if (!gpu_env->is_user_created) {
> +            status = clGetPlatformIDs(0,NULL,&numplatforms);
> +            if (status != CL_SUCCESS) {
> +                av_log(&openclutils,AV_LOG_ERROR,"Could not get OpenCL platform ids: %s\n",opencl_errstr(status));
> +                return AVERROR_EXTERNAL;
> +            }
> +            if (numplatforms > 0) {
> +                platform_ids = av_mallocz(numplatforms * sizeof(cl_platform_id));
> +                if (!platform_ids) {
> +                    ret = AVERROR(ENOMEM);
> +                    goto end;
> +                }
> +                status = clGetPlatformIDs(numplatforms, platform_ids, NULL);
> +                if (status != CL_SUCCESS) {
> +                    av_log(&openclutils,AV_LOG_ERROR,"Could not get OpenCL platform ids: %s\n",opencl_errstr(status));
> +                    ret = AVERROR_EXTERNAL;
> +                    goto end;
> +                }
> +                for (i = 0; i < numplatforms; i++) {
> +                    status = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_VENDOR,
> +                                               sizeof(platform_name), platform_name,
> +                                               NULL);
> +
> +                    if (status != CL_SUCCESS) {
> +                        av_log(&openclutils,AV_LOG_ERROR,"Could not get OpenCL platform info: %s\n",opencl_errstr(status));
> +                        ret = AVERROR_EXTERNAL;
> +                        goto end;
> +                    }
> +                    gpu_env->platform = platform_ids[i];
> +                    status = clGetDeviceIDs(gpu_env->platform /* platform */,
> +                                            CL_DEVICE_TYPE_GPU /* device_type */,
> +                                            0 /* num_entries */,
> +                                            NULL /* devices */,
> +                                            &numdevices);
> +                    if (status != CL_SUCCESS) {
> +                        av_log(&openclutils,AV_LOG_ERROR,"Could not get OpenCL device number:%s\n",opencl_errstr(status));
> +                        ret = AVERROR_EXTERNAL;
> +                        goto end;
> +                    }
> +                    if (numdevices == 0) {
> +                        //find CPU device
> +                        status = clGetDeviceIDs( gpu_env->platform /* platform */,
> +                                             CL_DEVICE_TYPE_CPU /* device_type */,
> +                                             0 /* num_entries */,
> +                                             NULL /* devices */,
> +                                             &numdevices );
> +                    }
> +                    if (status != CL_SUCCESS) {
> +                        av_log(&openclutils,AV_LOG_ERROR,"Could not get OpenCL device ids: %s\n",opencl_errstr(status));
> +                        ret = AVERROR_EXTERNAL;
> +                        goto end;
> +                    }
> +                    if(numdevices)
> +                       break;
> +
> +                }
> +            }
> +            if (!gpu_env->platform) {
> +                av_log(&openclutils,AV_LOG_ERROR,"Could not get OpenCL platforms\n");
> +                ret = AVERROR_EXTERNAL;
> +                goto end;
> +            }
> +
> +           /*
> +                 * Use available platform.
> +                 */
> +            av_log(&openclutils,AV_LOG_VERBOSE,"Platform Name: %s\n",platform_name);
> +            cps[0] = CL_CONTEXT_PLATFORM;
> +            cps[1] = (cl_context_properties)gpu_env->platform;
> +            cps[2] = 0;
> +            /* Check for GPU. */
> +
> +            for (i = 0;i < sizeof(device_type);i++) {
> +                gpu_env->device_type = device_type[i];
> +                gpu_env->context = clCreateContextFromType(cps,
> +                                                           gpu_env->device_type,
> +                                                           NULL,
> +                                                           NULL,
> +                                                           &status );
> +                if ((gpu_env->context) && (status == CL_SUCCESS))
> +                    break;
> +            }
> +            if ((!gpu_env->context) || (status != CL_SUCCESS)) {
> +                av_log(&openclutils,AV_LOG_ERROR,"Could not get OpenCL context from device type: %s\n",opencl_errstr(status));
> +                ret = AVERROR_EXTERNAL;
> +                goto end;
> +            }
> +            /* Detect OpenCL devices. */
> +            /* First, get the size of device list data */
> +            status = clGetContextInfo(gpu_env->context, CL_CONTEXT_DEVICES,
> +                                      0, NULL, &device_length);
> +            if ((status != CL_SUCCESS) || (device_length == 0)) {
> +                av_log(&openclutils,AV_LOG_ERROR,"Could not get OpenCL device length: %s\n",opencl_errstr(status));
> +                ret = AVERROR_EXTERNAL;
> +                goto end;
> +            }
> +            /* Now allocate memory for device list based on the size we got earlier */
> +            gpu_env->device_ids = av_mallocz(device_length);
> +            if (!gpu_env->device_ids) {
> +                ret = AVERROR(ENOMEM);
> +                goto end;
> +            }
> +            /* Now, get the device list data */
> +            status = clGetContextInfo(gpu_env->context, CL_CONTEXT_DEVICES, device_length,
> +                                      gpu_env->device_ids, NULL);
> +            if (status != CL_SUCCESS) {
> +                av_log(&openclutils,AV_LOG_ERROR,"Could not get OpenCL context info: %s\n",opencl_errstr(status));
> +                ret = AVERROR_EXTERNAL;
> +                goto end;
> +            }
> +            /* Create OpenCL command queue. */
> +            gpu_env->command_queue = clCreateCommandQueue(gpu_env->context,
> +                                                           gpu_env->device_ids[0],
> +                                                           0, &status);
> +            if (status != CL_SUCCESS) {
> +                av_log(&openclutils,AV_LOG_ERROR,"Could not create OpenCL command queue: %s\n",opencl_errstr(status));
> +                ret = AVERROR_EXTERNAL;
> +                goto end;
> +            }
> +        }
> +    }
> +end:
> +    av_free(platform_ids);
> +    return ret;
> +}
> +
> +static void release_opencl_env(GPUEnv *gpu_env)
> +{
> +    int i, status;
> +    if (!isinited)
> +        return;

> +    av_free(gpu_env->temp_buffer);
> +    gpu_env->temp_buffer = NULL;

av_freep

> +    if (gpu_env->is_user_created)
> +        return;
> +    gpu_env->runtime_kernel_count--;
> +    if (!gpu_env->runtime_kernel_count) {
> +        for (i = 0; i<gpu_env->file_count; i++) {
> +            if (gpu_env->programs[i]) {

> +                status = clReleaseProgram(gpu_env->programs[i]);
> +                if (status != CL_SUCCESS) {
> +                    av_log(&openclutils,AV_LOG_ERROR,"Could not release OpenCL platform: %s\n",opencl_errstr(status));

OpenCL program?

> +                }
> +                gpu_env->programs[i] = NULL;
> +            }
> +        }
> +        if (gpu_env->command_queue) {
> +            status = clReleaseCommandQueue(gpu_env->command_queue);
> +            if (status != CL_SUCCESS) {
> +                av_log(&openclutils,AV_LOG_ERROR,"Could not release OpenCL command queue: %s\n",opencl_errstr(status));
> +            }
> +            gpu_env->command_queue = NULL;
> +        }
> +        if (gpu_env->context) {
> +            status = clReleaseContext(gpu_env->context);
> +            if (status != CL_SUCCESS) {
> +                av_log(&openclutils,AV_LOG_ERROR,"Could not release OpenCL context: %s\n\n",opencl_errstr(status));
> +            }
> +            gpu_env->context = NULL;
> +        }

> +        av_free(gpu_env->device_ids);

av_freep

> +        isinited = 0;
> +    }
> +    return;
> +}
> +
> +int av_opencl_register_kernel_function(const char *kernel_name, av_opencl_kernel_function function)
> +{
> +    int i;
> +    for (i = 0; i < gpu_env.kernel_count; i++) {
> +        if (av_strcasecmp(kernel_name, gpu_env.kernel_names[i])==0) {
> +            gpu_env.kernel_functions[i] = function;
> +            gpu_env.runtime_kernel_count++;
> +            return 0;
> +        }
> +    }

you could generate an error message

> +    return AVERROR(EIO);

return AVERROR(EINVAL) seems more proper

> +}
> +

> +static int detect_kernel_program(const GPUEnv *gpu_env, const char *cl_file_name)
> +{
> +    int i;
> +    for (i = 0; i < gpu_env->file_count; i++) {
> +        if (av_strcasecmp(gpu_env->kernel_srcfile[i], cl_file_name)==0) {
> +            if(gpu_env->programs[i])
> +                return 1;
> +        }
> +    }
> +    return 0;
> +}

avoid to create separate functions for trivial operations like a loop,
it only increases the bloat and reduces readability, rather merge the
code in compile_kernel_file()

> +
> +static int compile_kernel_file(const char *filename, GPUEnv *gpu_info,
> +                            int indx, const char *build_option)

indx is unused

> +{
> +    cl_int status;
> +    size_t file_size;
> +    char *source_str = NULL;
> +    const char *source;
> +    size_t source_size[1];
> +    char *buildlog = NULL;
> +    int b_error, binary_status, binary_existed;
> +    char *binary = NULL;
> +    char *temp;
> +    size_t numdevices;
> +    cl_device_id *device_ids = NULL;
> +    FILE *fd = NULL;
> +    FILE *fd1;
> +    int idx;
> +    int kernel_src_size = 0;
> +    int ret = 0;
> +    int i;

many of these variables may be avoided reusing the existing variables

> +    if (detect_kernel_program(gpu_info, filename) == 1)
> +        return ret;
> +
> +    idx = gpu_info->file_count;
> +    for (i = 0;i < gpu_env.kernel_count;i++) {
> +        kernel_src_size += strlen(gpu_env.kernel_code[i]);
> +    }

> +    source_str = av_mallocz(kernel_src_size + 2);

why +2?

> +    if (!source_str) {

> +        ret = AVERROR(ENOMEM);
> +        return ret;

return AVERROR(ENOMEM)?

> +    }
> +    temp = source_str;
> +    for (i = 0;i < gpu_env.kernel_count;i++) {
> +        memcpy(temp,gpu_env.kernel_code[i],strlen(gpu_env.kernel_code[i]));
> +        temp += strlen(gpu_env.kernel_code[i]);
> +    }
> +    source = source_str;
> +    source_size[0] = strlen(source);

Also all this code is effectively useful only in the
clCreateProgramWithSource() block, thus should be only executed there.


> +    ret = check_generated_binary(gpu_info->context, filename, &fd);
> +    if (ret)
> +        goto end;
> +    binary_existed = !!fd;
> +    if (binary_existed) {
> +        status = clGetContextInfo(gpu_info->context,
> +                                  CL_CONTEXT_NUM_DEVICES,
> +                                  sizeof(numdevices),
> +                                  &numdevices,
> +                                  NULL);
> +        if(status != CL_SUCCESS) {
> +            av_log(&openclutils,AV_LOG_ERROR,"Could not release OpenCL device number: %s\n",
> +                   opencl_errstr(status));
> +            ret = AVERROR_EXTERNAL;
> +            goto end;
> +        }
> +        device_ids = av_mallocz(sizeof(cl_device_id) * numdevices);
> +        if (!device_ids) {
> +            ret = AVERROR(ENOMEM);
> +            goto end;
> +        }
> +

> +        b_error = 0;
> +        file_size = 0;
> +        b_error |= fseek(fd, 0, SEEK_END) < 0;
> +        b_error |= (file_size = ftell(fd)) <= 0;
> +        b_error |= fseek(fd, 0, SEEK_SET) < 0;
> +        if (b_error) {
> +            ret = AVERROR_EXTERNAL;
> +            goto end;
> +        }

I have no idea what this is trying to achieve and why is failing. Also
system call errors are not external, you are supposed to return the
corresponding AVERROR(errno) code.

> +        binary = av_mallocz(file_size);
> +        if (!binary) {
> +            ret = AVERROR(ENOMEM);
> +            goto end;
> +        }
> +
> +        b_error |= fread(binary, 1, file_size, fd) != file_size;
> +        fclose(fd);
> +        fd = NULL;
> +        /* grab the handles to all of the devices in the context. */
> +        status = clGetContextInfo(gpu_info->context,
> +                                  CL_CONTEXT_DEVICES,
> +                                  sizeof(cl_device_id) * numdevices,
> +                                  device_ids,
> +                                  NULL);
> +        if(status != CL_SUCCESS) {
> +            av_log(&openclutils,AV_LOG_ERROR,"Could not get OpenCL device ids: %s\n",
> +                   opencl_errstr(status));
> +            ret = AVERROR_EXTERNAL;
> +            goto end;
> +        }
> +        gpu_info->programs[idx] = clCreateProgramWithBinary(gpu_info->context,
> +                                                            numdevices,
> +                                                            device_ids,
> +                                                            &file_size,
> +                                                            (const uint8_t**)&binary,
> +                                                            &binary_status,
> +                                                            &status );
> +         if(status != CL_SUCCESS) {
> +            av_log(&openclutils,AV_LOG_ERROR,"Could not create program with binary: %s\n",
> +                   opencl_errstr(status));
> +            ret = AVERROR_EXTERNAL;
> +            goto end;
> +        }
> +
> +    } else {
> +
> +        /* create a CL program using the kernel source */
> +        gpu_info->programs[idx] = clCreateProgramWithSource(gpu_info->context,
> +                                                            1,
> +                                                            &source,
> +                                                            source_size,
> +                                                            &status);
> +        if(status != CL_SUCCESS) {
> +            av_log(&openclutils,AV_LOG_ERROR,"Could not create OpenCL program with source code: %s\n",
> +                   opencl_errstr(status));
> +            ret = AVERROR_EXTERNAL;
> +            goto end;
> +        }
> +    }
> +
> +    if ((!gpu_info->programs[idx]) || (status != CL_SUCCESS)) {
> +        av_log(&openclutils,AV_LOG_ERROR,"Could not create OpenCL program: %s\n",opencl_errstr(status));
> +        ret = AVERROR_EXTERNAL;
> +        goto end;
> +    }
> +
> +
> +    /* create a cl program executable for all the devices specified */
> +    if (!gpu_info->is_user_created)
> +        status = clBuildProgram(gpu_info->programs[idx], 1, gpu_info->device_ids,
> +                                build_option, NULL, NULL);
> +    else
> +        status = clBuildProgram(gpu_info->programs[idx], 1, &(gpu_info->device_id),
> +                                 build_option, NULL, NULL);
> +
> +    if (status != CL_SUCCESS) {
> +        if (status != CL_SUCCESS) {
> +            av_log(&openclutils,AV_LOG_ERROR,"Could not compile OpenCL kernel: %s\n",opencl_errstr(status));
> +            ret = AVERROR_EXTERNAL;
> +        }
> +        if (!gpu_info->is_user_created)
> +            status = clGetProgramBuildInfo(gpu_info->programs[idx],
> +                                           gpu_info->device_ids[0],
> +                                           CL_PROGRAM_BUILD_LOG, 0, NULL, &file_size);
> +        else
> +            status = clGetProgramBuildInfo(gpu_info->programs[idx],
> +                                           gpu_info->device_id,
> +                                           CL_PROGRAM_BUILD_LOG, 0, NULL, &file_size);
> +
> +        if (status != CL_SUCCESS) {
> +            av_log(&openclutils,AV_LOG_ERROR,"Could not get OpenCL build info length: %s\n",opencl_errstr(status));
> +            ret = AVERROR_EXTERNAL;
> +            goto end;
> +        }
> +        buildlog = av_mallocz(file_size);
> +        if (!buildlog) {
> +            ret = AVERROR(ENOMEM);
> +            goto end;
> +        }

> +        if (!gpu_info->is_user_created)
> +            status = clGetProgramBuildInfo(gpu_info->programs[idx], gpu_info->device_ids[0],
> +                                           CL_PROGRAM_BUILD_LOG, file_size, buildlog, &file_size);
> +        else
> +            status = clGetProgramBuildInfo(gpu_info->programs[idx], gpu_info->device_id,
> +                                           CL_PROGRAM_BUILD_LOG, file_size, buildlog, &file_size);
> +
> +        if (status != CL_SUCCESS) {
> +            av_log(&openclutils,AV_LOG_ERROR,"Could not get OpenCL build info: %s\n",opencl_errstr(status));
> +            ret = AVERROR_EXTERNAL;
> +            goto end;
> +        }
> +        fd1 = fopen( "kernel-build.log", "w+" );
> +        if (fd1) {
> +            fwrite(buildlog, sizeof(char), file_size, fd1);
> +            fclose(fd1);
> +        }
> +        ret = AVERROR_EXTERNAL;
> +        goto end;
> +    }
> +    av_strlcpy(gpu_env.kernel_srcfile[idx],filename,MAX_KERNEL_SRCFILE_LEN);
> +    if (binary_existed == 0) {
> +        ret = generate_bin_from_kernel_source(gpu_env.programs[idx], filename);
> +        if (ret) {
> +            av_log(&openclutils,AV_LOG_ERROR,"generate_bin_from_kernel_source error\n");
> +            goto end;
> +        }
> +    }
> +    gpu_info->file_count += 1;
> +end:
> +    av_free(source_str);
> +    av_free(device_ids);
> +    av_free(binary);
> +    av_free(buildlog);
> +    return ret;
> +}
> +
> +static int get_kernel_env_and_func(const char *kernel_name,
> +                                AVOpenCLKernelEnv *env,
> +                                av_opencl_kernel_function *function)
> +{
> +    for (int i = 0; i < gpu_env.kernel_count; i++) {
> +        if (av_strcasecmp(kernel_name, gpu_env.kernel_names[i])==0) {
> +            env->context = gpu_env.context;
> +            env->command_queue = gpu_env.command_queue;
> +            env->program = gpu_env.programs[0];
> +            env->kernel = gpu_env.kernels[i];
> +            *function = gpu_env.kernel_functions[i];
> +            return 0;
> +        }
> +    }
> +    return AVERROR_EXTERNAL;
> +}
> +
> +int av_opencl_run_kernel(const char *kernel_name, void **userdata)
> +{
> +    AVOpenCLKernelEnv env = {0};
> +    av_opencl_kernel_function function;
> +    int status;
> +    av_strlcpy(env.kernel_name,kernel_name,AV_OPENCL_MAX_KERNEL_NAME_SIZE);
> +    status = get_kernel_env_and_func(kernel_name, &env, &function);
> +    if (!status) {
> +        return(function(userdata, &env));
> +    }
> +    return status;
> +}
> +

> +int av_opencl_init_run_env(const char *build_option,void *ext_opencl_info)

you're declaring here:
void *ext_opencl_info

> +{
> +    int status;
> +    if (!isinited) {
> +        /*initialize devices, context, comand_queue*/
> +        status = init_opencl_env(&gpu_env,ext_opencl_info);

but then you're doing an implicit cast here.

So the question is, what's the point of passing void* ?

> +        if (status) {
> +            av_log(&openclutils,AV_LOG_ERROR,"init_opencl_env Failed\n");

Note here and for the rest of the code.

Avoid to mention the name of an internal function in an error message,
as it is useless to the reader (which is not supposed to be
necessarily a programmer), just write in plain english a short
explanation of the problem.

> +            return status;
> +        }

Also: the called function is already issuing an error message in case
of failure, so there is no point issuing another (less specific) error
message here.

> +        /*initialize program, kernel_name, kernel_count*/

> +        status = compile_kernel_file("ffmpeg-kernels", &gpu_env, 0, build_option);
> +

> +        if (status) {
> +            av_log(&openclutils,AV_LOG_ERROR,"compile_kernel_file Failed please check the file 'kernel-build.log',kernel_count = %d\n",gpu_env.kernel_count);
> +            return status;

same consideration here about the error message

> +        }

> +        if (gpu_env.kernel_count == 0) {
> +            av_log(&openclutils,AV_LOG_ERROR,"av_opencl_init_run_env kernel count is 0\n");
> +            return AVERROR_EXTERNAL;
> +        }

can this happen in case the two called functions don't fail above? If
not you should rather add an assert check.

> +        isinited = 1;
> +    }
> +    return 0;
> +}
> +
> +void av_opencl_release_run_env(void)
> +{
> +    return release_opencl_env(&gpu_env);
> +}

pointless wrapper?

> +
> +int av_opencl_is_inited(void)
> +{
> +    return isinited;
> +}
> +
> +void av_opencl_get_kernel_env(AVOpenCLKernelEnv *env)
> +{
> +    env->context = gpu_env.context;
> +    env->command_queue = gpu_env.command_queue;
> +    env->program = gpu_env.programs[0];
> +}
> +

> +int av_opencl_buffer_create(void **cl_buf, int flags, size_t size, void *host_ptr)
> +{
> +    int status;
> +    *cl_buf = clCreateBuffer(gpu_env.context, (flags), (size), host_ptr, &status);
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils,AV_LOG_ERROR,"Could not create OpenCL buffer: %s\n",opencl_errstr(status));
> +        return AVERROR_EXTERNAL;
> +    }
> +    return 0;
> +}
> +

> +void av_opencl_release_buffer(void *cl_buf)
> +{
> +    int status = 0;
> +    if (cl_buf)
> +        status = clReleaseMemObject(cl_buf);

this should be a no-op in case cl_buf is NULL.

if (!cl_buf)
   return;

status = clReleaseMemObject(...

> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils,AV_LOG_ERROR,"Could not release OpenCL buffer: %s\n",opencl_errstr(status));
> +    }

> +}
> +
> +int av_opencl_buffer_read(void *cl_inbuf, uint8_t *outbuf, size_t size)
> +{
> +    int status;
> +    void *mapped = clEnqueueMapBuffer(gpu_env.command_queue, cl_inbuf,
> +                                      CL_TRUE,CL_MAP_READ, 0, sizeof(uint8_t) * size,
> +                                      0, NULL, NULL, &status);
> +
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils,AV_LOG_ERROR,"Could not map OpenCL buffer: %s\n",opencl_errstr(status));
> +        return AVERROR_EXTERNAL;
> +    }
> +    memcpy(outbuf,mapped,sizeof(uint8_t) * size);
> +
> +    status = clEnqueueUnmapMemObject(gpu_env.command_queue, cl_inbuf, mapped, 0, NULL, NULL);
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils,AV_LOG_ERROR,"Could not unmap OpenCL buffer: %s\n",opencl_errstr(status));
> +        return AVERROR_EXTERNAL;
> +    }
> +    return 0;
> +}
> +
> +cl_device_id av_opencl_get_device_id(void)
> +{
> +    if (!gpu_env.is_user_created) {
> +        return *(gpu_env.device_ids);
> +    } else
> +        return gpu_env.device_id;
> +}
> +
> +cl_context av_opencl_get_context(void)
> +{
> +    return gpu_env.context;
> +}
> +
> +cl_command_queue av_opencl_get_command_queue(void)
> +{
> +    return gpu_env.command_queue;
> +}
> +
> +int av_opencl_buffer_write_image(void *cl_inbuf, size_t cl_buffer_size, uint8_t **data,
> +                                   int *plane_size, int plane_num, int offset)
> +{
> +    int buffersize = 0;
> +    uint8_t *temp;
> +    int status;
> +    void *mapped;
> +    int i;
> +    if ((plane_num > 8) || (plane_num <= 0)) {
> +        return AVERROR(EIO);
> +    }
> +    for (i = 0;i < plane_num;i++) {
> +        buffersize += plane_size[i];
> +    }
> +    if (buffersize > cl_buffer_size) {
> +        av_log(&openclutils,AV_LOG_ERROR,"memory buffer size is too large\n");
> +        return AVERROR_EXTERNAL;
> +    }
> +    mapped = clEnqueueMapBuffer(gpu_env.command_queue, cl_inbuf,
> +                                      CL_TRUE,CL_MAP_WRITE, 0, buffersize + offset,
> +                                      0, NULL, NULL, &status);
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils,AV_LOG_ERROR,"Could not map OpenCL buffer: %s\n",opencl_errstr(status));
> +        return AVERROR_EXTERNAL;
> +    }
> +    temp = mapped;
> +    temp += offset;
> +    for (i = 0;i < plane_num;i++) {
> +        memcpy(temp,data[i],plane_size[i]);
> +        temp += plane_size[i];
> +    }
> +    status = clEnqueueUnmapMemObject(gpu_env.command_queue, cl_inbuf, mapped, 0, NULL, NULL);
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils,AV_LOG_ERROR,"Could not unmap OpenCL buffer: %s\n",opencl_errstr(status));
> +        return AVERROR_EXTERNAL;
> +    }
> +    return 0;
> +}
> +
> +int av_opencl_buffer_read_image(void *cl_inbuf, size_t cl_buffer_size, uint8_t **data,
> +                                          int *plane_size, int plane_num)
> +{
> +    int size = 0;
> +    int ret = 0;
> +    uint8_t *temp;
> +    int i;

> +    if ((plane_num > 8) || (plane_num <= 0)) {
> +        return AVERROR(EIO);



> +    }
> +    for (i = 0;i < plane_num;i++) {
> +        size += plane_size[i];
> +    }
> +    if (size > cl_buffer_size) {
> +        av_log(&openclutils,AV_LOG_ERROR,"memory buffer size is too large\n");
> +        return AVERROR_EXTERNAL;
> +    }
> +    if (!(gpu_env.temp_buffer)) {
> +        gpu_env.temp_buffer = av_malloc(size);
> +        if (!gpu_env.temp_buffer)
> +            return AVERROR(ENOMEM);
> +        gpu_env.temp_buffer_size = size;
> +    }
> +
> +    if(size > gpu_env.temp_buffer_size) {
> +        av_free(gpu_env.temp_buffer);
> +        gpu_env.temp_buffer = av_malloc(size);
> +        if (!gpu_env.temp_buffer)
> +            return AVERROR(ENOMEM);
> +        gpu_env.temp_buffer_size = size;
> +    }
> +    temp = gpu_env.temp_buffer;
> +    ret = av_opencl_buffer_read(cl_inbuf, gpu_env.temp_buffer, size);
> +    if (!ret) {
> +        for (int i = 0;i < plane_num;i++) {
> +            memcpy(data[i],temp,plane_size[i]);
> +            temp += plane_size[i];
> +        }
> +    } else {
> +        av_log(&openclutils,AV_LOG_ERROR,"read OpenCL buffer error\n");
> +        return ret;
> +    }
> +    return ret;
> +}
> +
> diff --git a/libavutil/opencl.h b/libavutil/opencl.h
> new file mode 100644
> index 0000000..8e0d05e
> --- /dev/null
> +++ b/libavutil/opencl.h
> @@ -0,0 +1,205 @@
> +/*
> + * Copyright (C) 2012 Peng Gao <peng at multicorewareinc.com>
> + * Copyright (C) 2012 Li   Cao <li at multicorewareinc.com>
> + * Copyright (C) 2012 Wei  Gao <weigao at multicorewareinc.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "config.h"
> +

> +#ifndef LIBAVUTIL_OPENCLWRAPPER_H
> +#define LIBAVUTIL_OPENCLWRAPPER_H

LIBAVUTIL_OPENCL_H

> +
> +#include <CL/cl.h>
> +
> +#define AV_OPENCL_KERNEL( ... )# __VA_ARGS__
> +
> +#define AV_OPENCL_MAX_KERNEL_NAME_SIZE 150
> +
> +typedef struct AVOpenCLKernelEnv {
> +    cl_context context;
> +    cl_command_queue command_queue;
> +    cl_program program;
> +    cl_kernel kernel;
> +    char kernel_name[AV_OPENCL_MAX_KERNEL_NAME_SIZE];
> +} AVOpenCLKernelEnv;
> +
> +typedef struct AVOpenCLExternalInfo {
> +    cl_platform_id platform;
> +    cl_device_type device_type;
> +    cl_context context;
> +    cl_device_id *device_ids;
> +    cl_device_id  device_id;
> +    cl_command_queue command_queue;
> +    char *platform_name;
> +} AVOpenCLExternalInfo;
> +

> +/**
> + * User defined function, used to set the input parameter in the kernel
> + *environment. This function launches kernel and copies data from GPU to
> + *CPU, or from CPU to GPU.
> + */
> +typedef int (*av_opencl_kernel_function) (void **userdata, AVOpenCLKernelEnv *kenv);

/**
 * User defined function, used to set the input parameter in the kernel
 * environment. This function launches kernel and copies data from GPU to
 * CPU, or from CPU to GPU.
 */
typedef int (*av_opencl_kernel_function) (void **userdata, AVOpenCLKernelEnv *kenv);

spaces after leading "*"

same below

> +
> +/**
> + * Register a function for running the kernel specified by the kernel name. The function is user defined to run the kernel.
                                                                               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Drop this since not very useful.

> + *
> + */
> +int av_opencl_register_kernel_function(const char *kernel_name, av_opencl_kernel_function function);

What happens in case the function was already defined?
Also missing docs for the return code.

> +
> +/**
> + *Load OpenCL kernel.
> + *
> + *@param kernel_name   this kernel name is used to find the kernel in OpenCL runtime environment.
> + *@param userdata         this userdata is the all parameters for running the kernel specified by kernel name
> + *@return  >=0 on success, others on failure

a negative error value on failure

> + */
> +int av_opencl_run_kernel(const char *kernel_name, void **userdata);
> +
> +/**
> + * Init the run time  OpenCL environment.
> + *
> + *This function must be called befor calling any function related to OpenCL.
> + *
> + *
> + *@param build_option         option of compile the kernel in OpenCL runtime environment,reference "OpenCL Specification Version: 1.2 chapter 5.6.4"
> + *@param ext_opencl_info    this is the extern OpenCL environment which the application program has created
> + *@return  >=0 on success, a negative value on error
> + */
> +int av_opencl_init_run_env(const char *build_option,void *ext_opencl_info);

av_opencl_init() also should be good

Also what's this ext_opencl_info? Why it is *void (and then casted to
a type later in the code)?

> +
> +/**
> + * Release OpenCL resources , this function must be called after calling any functions related to OpenCL.
> + */
> +void av_opencl_release_run_env(void);

av_opencl_uninit()?

> +/**
> + * Get the OpenCL status, this function is used the check whether or not the OpenCL run time has been created.
> + *

> + *@return  >=0 not init, 1, inited;

self-contradictory

> + *
> + */
> +int av_opencl_is_inited(void);

> +
> +/**
> + * Create kernel object  by a kernel name on the specified OpenCL run time indicated by env parameter.
> + *
> + *@param kernelname          kernel name.
> + *@param env                     The kernel environment which has been created by av_opencl_init_run_env
> + *@return  >=0 on success, a negative value on error
> + *
> + */
> +int av_opencl_create_kernel(const char *kernelname, AVOpenCLKernelEnv *env);

/**
 * Create kernel object by a kernel name on the specified OpenCL run time
 * indicated by the env parameter.
 *
 * @param kernel_name      kernel name
 * @param env              the kernel environment which has been created by av_opencl_init_run_env
 * @return >=0 on success, a negative error code on failure
 */
int av_opencl_create_kernel(const char *kernel_name, AVOpenCLKernelEnv *env);

> +
> +/**
> + *  Release kernel object.
> + *
> + *@param env  The kernel environment which has been created by av_opencl_init_run_env.
> + */
> +void av_opencl_release_kernel(AVOpenCLKernelEnv * env);
> +
> +/**
> + *  Get the kernel environment.
> + *
> + *@param env  The kernel environment which has been created by av_opencl_init_run_env.
> + *
> + */
> +void av_opencl_get_kernel_env(AVOpenCLKernelEnv *env);
> +

> +/**
> + *  Create OpenCL buffer, the buffer is used to save the data which is used by OpenCL kernel.
> + *
> + *@param cl_buf         The pointer of OpenCL buffer.
> + *@param flags           The flags which used to control buffer attribute

> + *@param size            The size of OpenCL buffer

size in bytes of the OpenCL buffer to create

> + *@param host_ptr      The host pointer of OpenCL buffer
> + *@return  >=0 on success, a negative value on error
> + */
> +int av_opencl_buffer_create(void **cl_buf, int flags, size_t size,void *host_ptr);

int av_opencl_buffer_create(void **cl_buf, size_t cl_buf_size, int flags, void *host_ptr);

slightly more logical

> +
> +/**
> + *  Read data form OpenCL buffer to memory. Src is OpenCL buffer, dst is CPU memory
> + *
> + *@param cl_buf         The pointer of OpenCL buffer.
> + *@param outbuf         CPU memory
> + *@param size            The size of OpenCL buffer
> + *@return  >=0 on success, a negative value on error
> + */
> +int av_opencl_buffer_read(void *cl_inbuf, uint8_t *outbuf, size_t size);

/**
 * Read data from OpenCL buffer to memory buffer.
 *
 * @param dst_buf       pointer to destination buffer (CPU memory)
 * @param src_cl_buf    pointer to source OpenCL buffer
 * @param size          size in bytes of the source and destination buffers
 * @return >=0 on success, a negative error code in case of failure
 */
int av_opencl_buffer_read(uint8_t *dst_buf, void *src_cl_buf, size_t buf_size);

DST, SRC is favored over SRC, DST since it conveys the idea:
DST = SRC

> +
> +/**
> + *  Write data from memory to OpenCL buffer. Src is frames buffer(data[0],data[1]...), dst is OpenCL buffer.
> + *
> + *@param cl_buf                The pointer of OpenCL buffer.
> + *@param cl_buffer_size     OpenCL buffer size
> + *@param data                  Picture or audio data for each plane
> + *@plane_size                   Size of each plane

@param plane_size

> + *@param plane_num         The input plane number
> + *@param offset                The offset of OpenCL buffer start position
> + *@return  >=0 on success, other values on error
> + */
> +int av_opencl_buffer_write_image(void *cl_inbuf, size_t cl_buffer_size, uint8_t **data, int *plane_size, int plane_num, int offset);

cl_inbuf is misnamed supposing that it is the *destination* buffer.

> +
> +/**
> + *  Get OpenCL device id.
> + *
> + */
> +cl_device_id av_opencl_get_device_id(void);
> +
> +/**
> + *  Get OpenCL context.
> + *
> + */
> +cl_context av_opencl_get_context(void);
> +
> +/**
> + *  Get OpenCL command queue.
> + *
> + */
> +cl_command_queue av_opencl_get_command_queue(void);
> +
> +/**
> + *  Release OpenCL buffer.
> + *
> + */
> +void av_opencl_release_buffer(void *cl_buf);
> +

> +/**
> + *  Read frame data from OpenCL buffer to frame buffer, src buffer is OpenCL buffer, dst buffer is frame buffer(data[0],data[1]....).
> + *
> + *@param cl_buf                The pointer of OpenCL buffer.
> + *@param cl_buffer_size     OpenCL buffer size
> + *@param data                  Picture or audio data for each plane
> + *@plane_size                   Size of each plane
> + *@param plane_num         The input plane number
> + *@return  >=0 on success, other values on error
> + */
> +int av_opencl_buffer_read_image(void *cl_inbuf, size_t cl_buffer_size, uint8_t **data,
> +                                          int *plane_size, int plane_num);

Put this close to the rest of the av_opencl_buffer_ API.

Also mismatch between doxy and declaration function parameters.

> +/**
> + *  Register kernel.This function is use to register kernel and kernel code.OpenCL wrapper will use the kernel name
> + *  to find the kernel code and compile it in the runtime. The kernel name should no longer than 64 and the max kernel
> + *  number is 200.
> + *
> + *@param kernel_name                  Regist kernel name
> + *@param kernel_code                   Kernel code
> + *@return  >=0 on success, other values on error
> + */
> +int av_opencl_register_kernel(const char *kernel_name,const char *kernel_code);

I already rewrote this doxy in my previous reply. Please try to follow my
directions or we have to restart every time from the same point.

> +

> +#endif/* AVUTIL_OPENCL_... */

#endif /* LIBAVUTIL_OPENCL_H */
-- 
FFmpeg = Fast and Fascinating Maxi Pitiless Elastic Gadget


More information about the ffmpeg-devel mailing list