[FFmpeg-devel] [PATCH 1/3] libavutil/opencl: add opencl device list APIs 20130410
Stefano Sabatini
stefasab at gmail.com
Wed Apr 10 14:09:28 CEST 2013
On date Wednesday 2013-04-10 17:28:03 +0800, Wei Gao encoded:
>
> From 6a2e3e305b5d85d39ae373adc7d606f9db041f84 Mon Sep 17 00:00:00 2001
> From: highgod0401 <highgod0401 at gmail.com>
> Date: Wed, 10 Apr 2013 17:02:08 +0800
> Subject: [PATCH 1/3] add opencl device list APIs 20130410
>
> ---
> libavutil/opencl.c | 340 +++++++++++++++++++++++++++++++----------------------
> libavutil/opencl.h | 40 ++++++-
> 2 files changed, 239 insertions(+), 141 deletions(-)
>
> diff --git a/libavutil/opencl.c b/libavutil/opencl.c
> index d0f75b9..ca5f6d7 100644
> --- a/libavutil/opencl.c
> +++ b/libavutil/opencl.c
> @@ -43,22 +43,17 @@ static pthread_mutex_t atomic_opencl_lock = PTHREAD_MUTEX_INITIALIZER;
> #define MAX_KERNEL_CODE_NUM 200
>
> typedef struct {
> - int dev_idx;
> - int platform_idx;
> -} UserSpecDevInfo;
> -
> -typedef struct {
> int is_compiled;
> const char *kernel_string;
> } KernelCode;
>
> typedef struct {
> int init_count;
> - UserSpecDevInfo usr_spec_dev_info;
> + int platform_idx;
> + int device_idx;
> cl_platform_id platform_id;
> cl_device_type device_type;
> cl_context context;
> - cl_device_id *device_ids;
> cl_device_id device_id;
> cl_command_queue command_queue;
> int program_count;
> @@ -71,6 +66,7 @@ typedef struct {
> * passed as AVOpenCLExternalEnv when initing ,0:created by opencl wrapper.
> */
> int is_user_created;
> + AVOpenCLDeviceList device_list;
> } GPUEnv;
>
> typedef struct {
> @@ -170,6 +166,157 @@ static const char *opencl_errstr(cl_int status)
> return "unknown error";
> }
>
> +static void release_device_list(AVOpenCLDeviceList *device_list)
> +{
> + int i, j;
> + if (!device_list)
> + return;
> + for (i = 0; i < device_list->platform_num; i++) {
> + if (!device_list->platform_node[i])
> + continue;
> + for (j = 0; j < device_list->platform_node[i]->device_num; j++) {
> + av_freep(&(device_list->platform_node[i]->device_node[j]));
> + }
> + av_freep(&device_list->platform_node[i]->device_node);
> + av_freep(&device_list->platform_node[i]);
> + }
> + av_freep(&device_list->platform_node);
> + device_list->platform_num = 0;
> +}
> +
> +static int get_device_list(AVOpenCLDeviceList *device_list)
> +{
> + cl_int status;
> + int i, j, k, temp, total_devices_num,ret = 0;
> + int *devices_num;
> + cl_platform_id *platform_ids = NULL;
> + cl_device_id *device_ids = NULL;
> + status = clGetPlatformIDs(0, NULL, &device_list->platform_num);
> + if (status != CL_SUCCESS) {
> + av_log(&openclutils, AV_LOG_ERROR,
> + "Could not get OpenCL platform ids: %s\n", opencl_errstr(status));
> + return AVERROR_EXTERNAL;
> + }
> + platform_ids = av_mallocz(device_list->platform_num * sizeof(cl_platform_id));
> + if (!platform_ids)
> + return AVERROR(ENOMEM);
> + status = clGetPlatformIDs(device_list->platform_num, platform_ids, NULL);
> + if (status != CL_SUCCESS) {
> + av_log(&openclutils, AV_LOG_ERROR,
> + "Could not get OpenCL platform ids: %s\n", opencl_errstr(status));
> + ret = AVERROR_EXTERNAL;
> + goto end;
> + }
> + device_list->platform_node = av_mallocz(device_list->platform_num * sizeof(AVOpenCLPlatformNode *));
> + if (!device_list->platform_node) {
> + ret = AVERROR(ENOMEM);
> + goto end;
> + }
> + devices_num = av_mallocz(sizeof(int) * FF_ARRAY_ELEMS(device_type));
> + if (!devices_num) {
> + ret = AVERROR(ENOMEM);
> + goto end;
> + }
> + for (i = 0; i < device_list->platform_num; i++) {
> + device_list->platform_node[i] = av_mallocz(sizeof(AVOpenCLPlatformNode));
> + if (!device_list->platform_node[i]) {
> + ret = AVERROR(ENOMEM);
> + goto end;
> + }
> + device_list->platform_node[i]->platform_id = platform_ids[i];
> + status = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_VENDOR,
> + sizeof(device_list->platform_node[i]->platform_name),
> + device_list->platform_node[i]->platform_name, NULL);
> + total_devices_num = 0;
> + for (j = 0; j < FF_ARRAY_ELEMS(device_type); j++) {
> + status = clGetDeviceIDs(device_list->platform_node[i]->platform_id,
> + device_type[j], 0, NULL, &devices_num[j]);
> + total_devices_num += devices_num[j];
> + }
> + device_list->platform_node[i]->device_node = av_mallocz(total_devices_num * sizeof(AVOpenCLDeviceNode *));
> + if (!device_list->platform_node[i]->device_node) {
> + ret = AVERROR(ENOMEM);
> + goto end;
> + }
> + for (j = 0; j < FF_ARRAY_ELEMS(device_type); j++) {
> + if (devices_num[j]) {
> + device_ids = av_mallocz(devices_num[j] * sizeof(cl_device_id));
> + if (!device_ids) {
> + ret = AVERROR(ENOMEM);
> + goto end;
> + }
> + status = clGetDeviceIDs(device_list->platform_node[i]->platform_id, device_type[j],
> + devices_num[j], device_ids, NULL);
> + if (status != CL_SUCCESS) {
> + av_log(&openclutils, AV_LOG_WARNING,
> + "Get device ID error: %s\n", opencl_errstr(status));
> + av_freep(&device_ids);
> + continue;
> + }
> + for (k = 0; k < devices_num[j]; k++) {
> + temp = device_list->platform_node[i]->device_num;
better name for "temp"?
> + device_list->platform_node[i]->device_node[temp] = av_mallocz(sizeof(AVOpenCLDeviceNode));
> + if (!device_list->platform_node[i]->device_node[temp]) {
> + ret = AVERROR(ENOMEM);
> + goto end;
> + }
> + device_list->platform_node[i]->device_node[temp]->device_id = device_ids[k];
> + device_list->platform_node[i]->device_node[temp]->device_type = device_type[j];
> + status = clGetDeviceInfo(device_list->platform_node[i]->device_node[temp]->device_id,
> + CL_DEVICE_NAME,
> + sizeof(device_list->platform_node[i]->device_node[temp]->device_name),
> + device_list->platform_node[i]->device_node[temp]->device_name,
> + NULL);
This would be much nicer if you do:
AVDeviceNodeWhatever node = device_list->platform_node[i]->device_node[temp];
> + if (status != CL_SUCCESS) {
> + av_log(&openclutils, AV_LOG_WARNING,
> + "Get device name error: %s\n", opencl_errstr(status));
> + continue;
> + }
> + device_list->platform_node[i]->device_num++;
> + }
> + av_freep(&device_ids);
> + }
> + }
> + }
> +end:
> + av_freep(&platform_ids);
> + av_freep(&devices_num);
> + av_freep(&device_ids);
> + if (ret < 0)
> + release_device_list(device_list);
> + return ret;
> +}
> +
> +AVOpenCLDeviceList *av_opencl_get_device_list(void)
> +{
> + AVOpenCLDeviceList *device_list = NULL;
> + int ret = 0;
> + device_list = av_mallocz(sizeof(AVOpenCLDeviceList));
> + if (!device_list) {
> + av_log(&openclutils, AV_LOG_ERROR,
> + "Could not malloc opencl device list data space\n");
"Could not allocate opencl device list\n"
> + ret = AVERROR(ENOMEM);
or just return ENOMEM (which is custom in case of malloc fail).
> + goto end;
> + }
> + ret = get_device_list(device_list);
> + if (ret < 0) {
> + av_log(&openclutils, AV_LOG_ERROR, "Could not get device list from environment\n");
> + goto end;
> + }
> + end:
> + if (ret < 0) {
> + release_device_list(device_list);
> + av_freep(&device_list);
> + }
> + return device_list;
> +}
> +
> +void av_opencl_free_device_list(AVOpenCLDeviceList **device_list)
> +{
> + release_device_list(*device_list);
> + av_freep(device_list);
> +}
> +
> AVOpenCLExternalEnv *av_opencl_alloc_external_env(void)
> {
> AVOpenCLExternalEnv *ext = av_mallocz(sizeof(AVOpenCLExternalEnv));
> @@ -273,13 +420,9 @@ end:
>
> static int init_opencl_env(GPUEnv *gpu_env, AVOpenCLExternalEnv *ext_opencl_env)
> {
> - size_t device_length;
> cl_int status;
> - cl_uint num_platforms, num_devices;
> - cl_platform_id *platform_ids = NULL;
> cl_context_properties cps[3];
> - char platform_name[100];
> - int i, j, ret = 0;
> + int i, ret = 0;
>
> if (ext_opencl_env) {
> if (gpu_env->is_user_created)
> @@ -288,154 +431,80 @@ static int init_opencl_env(GPUEnv *gpu_env, AVOpenCLExternalEnv *ext_opencl_env)
> gpu_env->is_user_created = 1;
> gpu_env->command_queue = ext_opencl_env->command_queue;
> gpu_env->context = ext_opencl_env->context;
> - gpu_env->device_ids = ext_opencl_env->device_ids;
> gpu_env->device_id = ext_opencl_env->device_id;
> gpu_env->device_type = ext_opencl_env->device_type;
> } else {
> if (!gpu_env->is_user_created) {
> - status = clGetPlatformIDs(0, NULL, &num_platforms);
> - if (status != CL_SUCCESS) {
> - av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL platform ids: %s\n", opencl_errstr(status));
> - return AVERROR_EXTERNAL;
> + if (!gpu_env->device_list.platform_num) {
> + ret = get_device_list(&gpu_env->device_list);
> + if (ret < 0) {
> + return ret;
> + }
> }
> - if (gpu_env->usr_spec_dev_info.platform_idx >= 0) {
> - if (num_platforms < gpu_env->usr_spec_dev_info.platform_idx + 1) {
> + if (gpu_env->platform_idx >= 0) {
> + if (gpu_env->device_list.platform_num < gpu_env->platform_idx + 1) {
> av_log(&openclutils, AV_LOG_ERROR, "User set platform index not exist\n");
> return AVERROR(EINVAL);
> }
> - }
> - if (num_platforms > 0) {
> - platform_ids = av_mallocz(num_platforms * sizeof(cl_platform_id));
> - if (!platform_ids) {
> - ret = AVERROR(ENOMEM);
> - goto end;
> - }
> - status = clGetPlatformIDs(num_platforms, platform_ids, NULL);
> - if (status != CL_SUCCESS) {
> - av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL platform ids: %s\n", opencl_errstr(status));
> - ret = AVERROR_EXTERNAL;
> - goto end;
> - }
> - i = 0;
> - if (gpu_env->usr_spec_dev_info.platform_idx >= 0) {
> - i = gpu_env->usr_spec_dev_info.platform_idx;
> + if (!gpu_env->device_list.platform_node[gpu_env->platform_idx]->device_num) {
> + av_log(&openclutils, AV_LOG_ERROR, "No devices in user specific platform\n");
... with index %d\n"
so you provide more feedback to the poor user.
> + return AVERROR(EINVAL);
> }
> - while (i < num_platforms) {
> - status = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_VENDOR,
> - sizeof(platform_name), platform_name,
> - NULL);
> -
> - if (status != CL_SUCCESS) {
> - av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL platform info: %s\n", opencl_errstr(status));
> - ret = AVERROR_EXTERNAL;
> - goto end;
> - }
> - gpu_env->platform_id = platform_ids[i];
> - for (j = 0; j < FF_ARRAY_ELEMS(device_type); j++) {
> - status = clGetDeviceIDs(gpu_env->platform_id, device_type[j], 0, NULL, &num_devices);
> - if (status == CL_SUCCESS)
> - break;
> - }
> - if (num_devices)
> - break;
> - if (gpu_env->usr_spec_dev_info.platform_idx >= 0) {
> - av_log(&openclutils, AV_LOG_ERROR, "Device number of user set platform is 0\n");
> - ret = AVERROR_EXTERNAL;
> - goto end;
> - }
> - if (i >= num_platforms - 1) {
> - if (status != CL_SUCCESS) {
> - av_log(&openclutils, AV_LOG_ERROR,
> - "Could not get OpenCL device ids: %s\n", opencl_errstr(status));
> - ret = AVERROR(EINVAL);
> - goto end;
> - }
> + gpu_env->platform_id = gpu_env->device_list.platform_node[gpu_env->platform_idx]->platform_id;
> + } else {
> + /* get a usable platform for default*/
... by default */
> + for (i = 0; i < gpu_env->device_list.platform_num; i++) {
> + if (gpu_env->device_list.platform_node[i]->device_num) {
> + gpu_env->platform_id = gpu_env->device_list.platform_node[i]->platform_id;
> + gpu_env->platform_idx = i;
> + break;
> }
> - i++;
> }
> }
> if (!gpu_env->platform_id) {
> av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL platforms\n");
> - ret = AVERROR_EXTERNAL;
> - goto end;
> + return AVERROR_EXTERNAL;
> }
> - if (gpu_env->usr_spec_dev_info.dev_idx >= 0) {
> - if (num_devices < gpu_env->usr_spec_dev_info.dev_idx + 1) {
> + /* get a usable device*/
> + if (gpu_env->device_idx >= 0) {
> + if (gpu_env->device_list.platform_node[gpu_env->platform_idx]->device_num < gpu_env->device_idx + 1) {
> av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL device idx in the user set platform\n");
> - ret = AVERROR(EINVAL);
> - goto end;
> + return AVERROR(EINVAL);
> }
> + gpu_env->device_id = gpu_env->device_list.platform_node[gpu_env->platform_idx]->device_node[gpu_env->device_idx]->device_id;
> + gpu_env->device_type = gpu_env->device_list.platform_node[gpu_env->platform_idx]->device_node[gpu_env->device_idx]->device_type;
> + } else {
> + gpu_env->device_id = gpu_env->device_list.platform_node[gpu_env->platform_idx]->device_node[0]->device_id;
> + gpu_env->device_type = gpu_env->device_list.platform_node[gpu_env->platform_idx]->device_node[0]->device_type;
> + gpu_env->device_idx = 0;
You can factorize
gpu_env->device_list.platform_node[gpu_env->platform_idx] and enhance
readability.
> }
>
> /*
> * Use available platform.
> */
> - av_log(&openclutils, AV_LOG_VERBOSE, "Platform Name: %s\n", platform_name);
> + av_log(&openclutils, AV_LOG_VERBOSE, "Platform Name: %s, device id is 0x%x\n",
> + gpu_env->device_list.platform_node[gpu_env->platform_idx]->platform_name,
> + (unsigned int)gpu_env->device_id);
Inconsistent logging:
Platform Name: %s, device id: 0x%x\n
> cps[0] = CL_CONTEXT_PLATFORM;
> cps[1] = (cl_context_properties)gpu_env->platform_id;
> cps[2] = 0;
> -
> /* Check for GPU. */
> - for (i = 0; i < FF_ARRAY_ELEMS(device_type); i++) {
> - gpu_env->device_type = device_type[i];
> - gpu_env->context = clCreateContextFromType(cps, gpu_env->device_type,
> - NULL, NULL, &status);
> - if (status == CL_SUCCESS)
> - break;
> - }
> - if (!gpu_env->context) {
> - av_log(&openclutils, AV_LOG_ERROR,
> - "Could not get OpenCL context from device type: %s\n", opencl_errstr(status));
> - ret = AVERROR_EXTERNAL;
> - goto end;
> - }
> - /* Detect OpenCL devices. */
> - /* First, get the size of device list data */
> - status = clGetContextInfo(gpu_env->context, CL_CONTEXT_DEVICES,
> - 0, NULL, &device_length);
> + gpu_env->context = clCreateContextFromType(cps, gpu_env->device_type,
> + NULL, NULL, &status);
> if (status != CL_SUCCESS) {
> av_log(&openclutils, AV_LOG_ERROR,
> - "Could not get OpenCL device length: %s\n", opencl_errstr(status));
> - ret = AVERROR_EXTERNAL;
> - goto end;
> - }
> - if (device_length == 0) {
> - av_log(&openclutils, AV_LOG_ERROR, "Could not get OpenCL device length\n");
> - ret = AVERROR_EXTERNAL;
> - goto end;
> - }
> - /* Now allocate memory for device list based on the size we got earlier */
> - gpu_env->device_ids = av_mallocz(device_length);
> - if (!gpu_env->device_ids) {
> - ret = AVERROR(ENOMEM);
> - goto end;
> - }
> - /* Now, get the device list data */
> - status = clGetContextInfo(gpu_env->context, CL_CONTEXT_DEVICES, device_length,
> - gpu_env->device_ids, NULL);
> - if (status != CL_SUCCESS) {
> - av_log(&openclutils, AV_LOG_ERROR,
> - "Could not get OpenCL context info: %s\n", opencl_errstr(status));
> - ret = AVERROR_EXTERNAL;
> - goto end;
> - }
> - /* Create OpenCL command queue. */
> - i = 0;
> - if (gpu_env->usr_spec_dev_info.dev_idx >= 0) {
> - i = gpu_env->usr_spec_dev_info.dev_idx;
> + "Could not get OpenCL context from device type: %s\n", opencl_errstr(status));
> + return AVERROR_EXTERNAL;
> }
> - gpu_env->command_queue = clCreateCommandQueue(gpu_env->context, gpu_env->device_ids[i],
> + gpu_env->command_queue = clCreateCommandQueue(gpu_env->context, gpu_env->device_id,
> 0, &status);
> if (status != CL_SUCCESS) {
> av_log(&openclutils, AV_LOG_ERROR,
> "Could not create OpenCL command queue: %s\n", opencl_errstr(status));
> - ret = AVERROR_EXTERNAL;
> - goto end;
> + return AVERROR_EXTERNAL;
> }
> }
> }
> -end:
> - av_free(platform_ids);
> return ret;
> }
>
> @@ -481,17 +550,8 @@ static int compile_kernel_file(GPUEnv *gpu_env, const char *build_options)
> ret = AVERROR_EXTERNAL;
> goto end;
> }
> - i = 0;
> - if (gpu_env->usr_spec_dev_info.dev_idx >= 0)
> - i = gpu_env->usr_spec_dev_info.dev_idx;
> - /* create a cl program executable for all the devices specified */
> - if (!gpu_env->is_user_created)
> - status = clBuildProgram(gpu_env->programs[gpu_env->program_count], 1, &gpu_env->device_ids[i],
> - build_options, NULL, NULL);
> - else
> - status = clBuildProgram(gpu_env->programs[gpu_env->program_count], 1, &(gpu_env->device_id),
> - build_options, NULL, NULL);
> -
> + status = clBuildProgram(gpu_env->programs[gpu_env->program_count], 1, &(gpu_env->device_id),
> + build_options, NULL, NULL);
> if (status != CL_SUCCESS) {
> av_log(&openclutils, AV_LOG_ERROR,
> "Could not compile OpenCL kernel: %s\n", opencl_errstr(status));
> @@ -516,10 +576,10 @@ int av_opencl_init(AVDictionary *options, AVOpenCLExternalEnv *ext_opencl_env)
> opt_platform_entry = av_dict_get(options, "platform_idx", NULL, 0);
> opt_device_entry = av_dict_get(options, "device_idx", NULL, 0);
> /* initialize devices, context, command_queue */
> - gpu_env.usr_spec_dev_info.platform_idx = -1;
> - gpu_env.usr_spec_dev_info.dev_idx = -1;
> + gpu_env.platform_idx = -1;
> + gpu_env.device_idx = -1;
> if (opt_platform_entry) {
> - gpu_env.usr_spec_dev_info.platform_idx = strtol(opt_platform_entry->value, &pos, 10);
> + gpu_env.platform_idx = strtol(opt_platform_entry->value, &pos, 10);
> if (pos == opt_platform_entry->value) {
> av_log(&openclutils, AV_LOG_ERROR, "Platform index should be a number\n");
> ret = AVERROR(EINVAL);
> @@ -527,7 +587,7 @@ int av_opencl_init(AVDictionary *options, AVOpenCLExternalEnv *ext_opencl_env)
> }
> }
> if (opt_device_entry) {
> - gpu_env.usr_spec_dev_info.dev_idx = strtol(opt_device_entry->value, &pos, 10);
> + gpu_env.device_idx = strtol(opt_device_entry->value, &pos, 10);
> if (pos == opt_platform_entry->value) {
> av_log(&openclutils, AV_LOG_ERROR, "Device index should be a number\n");
> ret = AVERROR(EINVAL);
> @@ -595,7 +655,7 @@ void av_opencl_uninit(void)
> }
> gpu_env.context = NULL;
> }
> - av_freep(&(gpu_env.device_ids));
> + release_device_list(&gpu_env.device_list);
> end:
> UNLOCK_OPENCL
> }
> diff --git a/libavutil/opencl.h b/libavutil/opencl.h
> index 6ebde10..5c19cf1 100644
> --- a/libavutil/opencl.h
> +++ b/libavutil/opencl.h
> @@ -39,6 +39,28 @@
>
> #define AV_OPENCL_MAX_KERNEL_NAME_SIZE 150
>
> +#define AV_OPENCL_MAX_DEVICE_NAME_SIZE 100
> +
> +#define AV_OPENCL_MAX_PLATFORM_NAME_SIZE 100
> +
> +typedef struct {
> + int device_type;
> + char device_name[AV_OPENCL_MAX_DEVICE_NAME_SIZE];
> + cl_device_id device_id;
> +} AVOpenCLDeviceNode;
> +
> +typedef struct {
> + cl_platform_id platform_id;
> + char platform_name[AV_OPENCL_MAX_PLATFORM_NAME_SIZE];
> + int device_num;
> + AVOpenCLDeviceNode **device_node;
> +} AVOpenCLPlatformNode;
> +
> +typedef struct {
> + int platform_num;
> + AVOpenCLPlatformNode **platform_node;
> +} AVOpenCLDeviceList;
> +
> typedef struct {
> cl_command_queue command_queue;
> cl_kernel kernel;
> @@ -49,13 +71,29 @@ typedef struct {
> cl_platform_id platform_id;
> cl_device_type device_type;
> cl_context context;
> - cl_device_id *device_ids;
> cl_device_id device_id;
> cl_command_queue command_queue;
> char *platform_name;
> } AVOpenCLExternalEnv;
>
> /**
> + * Get OpenCL device list.
> + *
> + * It must be freed with av_opencl_free_device_list().
> + *
> + * @return pointer to allocated device list
> + */
> +AVOpenCLDeviceList *av_opencl_get_device_list(void);
Would it make sense to have:
int av_opencl_get_device_list(AVOpenCLDeviceList **list);
instead?
So you can propagate the error code to the application.
> +
> +/**
> + * Free OpenCL device list.
> + *
> + * @param device_list pointer to OpenCL environment device list
> + * created by av_opencl_get_device_list()
> + */
> +void av_opencl_free_device_list(AVOpenCLDeviceList **device_list);
> +
> +/**
> * Allocate OpenCL external environment.
> *
> * It must be freed with av_opencl_free_external_env().
> --
> 1.7.11.msysgit.1
[...]
--
FFmpeg = Friendly and Funny Meaningful Pure Ephemeral Guru
More information about the ffmpeg-devel
mailing list