[FFmpeg-devel] [PATCH 1/2] libavutil/libavfilter: opencl wrapper based on comments on 20130331

Michael Niedermayer michaelni at gmx.at
Sun Mar 31 12:39:28 CEST 2013


On Sun, Mar 31, 2013 at 03:57:21PM +0800, Wei Gao wrote:
[...]
> +void av_opencl_free_external_environment(AVOpenCLExternalEnv *ext_opencl_env)
> +{
> +    av_freep(&ext_opencl_env);
> +}

this doesnt work, av_freep sets the _local_ pointer to zero not the
one from the user, you need 

void av_opencl_free_external_environment(AVOpenCLExternalEnv **ext_opencl_env)

for the zeroing to work


[...]
> +int av_opencl_create_kernel(AVOpenCLKernelEnv *env, const char *kernel_name)
> +{
> +    cl_int status;
> +    int i, ret = 0;
> +    LOCK_OPENCL;
> +    if (strlen(kernel_name) + 1 > AV_OPENCL_MAX_KERNEL_NAME_LEN) {
> +        av_log(&openclutils, AV_LOG_ERROR, "Created kernel name %s is too long\n", kernel_name);
> +        ret = AVERROR(EINVAL);
> +        goto end;
> +    }
> +    if (!env->kernel) {
> +        for (i = 0;i < gpu_env.kernel_count;i++) {
> +            if (av_strcasecmp(kernel_name, gpu_env.kernel_node[i].kernel_name) == 0) {
> +                env->kernel = gpu_env.kernel_node[i].kernel;
> +                gpu_env.kernel_node[i].kernel_ref++;
> +                break;
> +            }
> +        }
> +        if (!env->kernel) {
> +            if (gpu_env.kernel_count >= MAX_KERNEL_NUM) {
> +                av_log(&openclutils, AV_LOG_ERROR,
> +                "Could not create kernel with name '%s', maximum number of kernels %d already reached\n",
> +                    kernel_name, MAX_KERNEL_NUM);
> +                ret = AVERROR(EINVAL);
> +                goto end;
> +            }
> +            for (i = 0; i < gpu_env.program_count; i++) {
> +                env->kernel = clCreateKernel(gpu_env.programs[i], kernel_name, &status);
> +                if (status == CL_SUCCESS)
> +                    break;
> +            }
> +            if (status != CL_SUCCESS) {
> +                av_log(&openclutils, AV_LOG_ERROR, "Could not create OpenCL kernel: %s\n", opencl_errstr(status));
> +                ret = AVERROR_EXTERNAL;
> +                goto end;
> +            }
> +            av_strlcpy(gpu_env.kernel_node[gpu_env.kernel_count].kernel_name, kernel_name,
> +                       sizeof(gpu_env.kernel_node[gpu_env.kernel_count].kernel_name));
> +            gpu_env.kernel_node[gpu_env.kernel_count].kernel= env->kernel;
> +            gpu_env.kernel_node[gpu_env.kernel_count].kernel_ref++;
> +            gpu_env.kernel_count++;
> +        }
> +        env->command_queue = gpu_env.command_queue;
> +        av_strlcpy(env->kernel_name, kernel_name, sizeof(env->kernel_name));
> +    }
> +end:
> +    UNLOCK_OPENCL;
> +    return ret;
> +}
> +
> +void av_opencl_release_kernel(AVOpenCLKernelEnv *env)
> +{
> +    cl_int status;
> +    int i;
> +    LOCK_OPENCL
> +    if (!env->kernel)
> +        goto end;
> +    for (i = 0;i < gpu_env.kernel_count;i++) {
> +        if (gpu_env.kernel_node[i].kernel == env->kernel) {
> +            if (gpu_env.kernel_node[i].kernel_ref == 1) {
> +                status = clReleaseKernel(env->kernel);
> +                if (status != CL_SUCCESS) {
> +                    av_log(&openclutils, AV_LOG_ERROR, "Could not release kernel: %s\n",
> +                          opencl_errstr(status));
> +                }
> +                gpu_env.kernel_node[i].kernel = NULL;
> +                env->kernel = NULL;
> +                memset(gpu_env.kernel_node[i].kernel_name, 0, sizeof(gpu_env.kernel_node[i].kernel_name));
> +                gpu_env.kernel_count--;
> +            }
> +            gpu_env.kernel_node[i].kernel_ref--;

This doesnt look like creating kernels A,B,C,D and then releasing B
would work
as the kernel_count would then point to D and overwrite it on the
next create or loose it not being able to release

also, i wonder if caching kernels like this is worth it, is
clCreateKernel() slow ?


[...]
> +void av_opencl_uninit(void)
> +{
> +    cl_int status;
> +    int i;
> +    LOCK_OPENCL
> +    if (!gpu_env.opencl_is_inited)
> +        goto end;
> +    if (gpu_env.is_user_created)
> +        goto end;
> +    for (i = 0;i < gpu_env.kernel_count;i++) {
> +        if (gpu_env.kernel_node[i].kernel)
> +            goto end;
> +    }
> +    for (i = 0; i < gpu_env.program_count; i++) {
> +        if (gpu_env.programs[i]) {
> +            status = clReleaseProgram(gpu_env.programs[i]);
> +            if (status != CL_SUCCESS) {
> +                av_log(&openclutils, AV_LOG_ERROR, "Could not release OpenCL program: %s\n", opencl_errstr(status));
> +            }
> +            gpu_env.programs[i] = NULL;
> +        }
> +    }
> +    if (gpu_env.command_queue) {
> +        status = clReleaseCommandQueue(gpu_env.command_queue);
> +        if (status != CL_SUCCESS) {
> +            av_log(&openclutils, AV_LOG_ERROR, "Could not release OpenCL command queue: %s\n", opencl_errstr(status));
> +        }
> +        gpu_env.command_queue = NULL;
> +    }
> +    if (gpu_env.context) {
> +        status = clReleaseContext(gpu_env.context);
> +        if (status != CL_SUCCESS) {
> +            av_log(&openclutils, AV_LOG_ERROR, "Could not release OpenCL context: %s\n", opencl_errstr(status));
> +        }
> +        gpu_env.context = NULL;
> +    }
> +    av_freep(&(gpu_env.device_ids));
> +    gpu_env.opencl_is_inited = 0;

this can be hit by a race condition

consider
Thread1 av_opencl_init, av_opencl_create_kernel, av_opencl_release_kernel, av_opencl_uninit
Thread2 av_opencl_init,                                                                      av_opencl_create_kernel, av_opencl_release_kernel, av_opencl_uninit

Thread1 uninit frees the various things that thread2 will still access

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Many things microsoft did are stupid, but not doing something just because
microsoft did it is even more stupid. If everything ms did were stupid they
would be bankrupt already.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20130331/93d1a7c0/attachment.asc>


More information about the ffmpeg-devel mailing list