FFmpeg: libavfilter/vf_libopencv.c Source File

00001 /*
00002  * Copyright (c) 2010 Stefano Sabatini
00003  *
00004  * This file is part of FFmpeg.
00005  *
00006  * FFmpeg is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU Lesser General Public
00008  * License as published by the Free Software Foundation; either
00009  * version 2.1 of the License, or (at your option) any later version.
00010  *
00011  * FFmpeg is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  * Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with FFmpeg; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00019  */
00020 
00026 /* #define DEBUG */
00027 
00028 #include <opencv/cv.h>
00029 #include <opencv/cxcore.h>
00030 #include "libavutil/avstring.h"
00031 #include "libavutil/common.h"
00032 #include "libavutil/file.h"
00033 #include "avfilter.h"
00034 #include "formats.h"
00035 #include "video.h"
00036 
00037 static void fill_iplimage_from_picref(IplImage *img, const AVFilterBufferRef *picref, enum PixelFormat pixfmt)
00038 {
00039     IplImage *tmpimg;
00040     int depth, channels_nb;
00041 
00042     if      (pixfmt == PIX_FMT_GRAY8) { depth = IPL_DEPTH_8U;  channels_nb = 1; }
00043     else if (pixfmt == PIX_FMT_BGRA)  { depth = IPL_DEPTH_8U;  channels_nb = 4; }
00044     else if (pixfmt == PIX_FMT_BGR24) { depth = IPL_DEPTH_8U;  channels_nb = 3; }
00045     else return;
00046 
00047     tmpimg = cvCreateImageHeader((CvSize){picref->video->w, picref->video->h}, depth, channels_nb);
00048     *img = *tmpimg;
00049     img->imageData = img->imageDataOrigin = picref->data[0];
00050     img->dataOrder = IPL_DATA_ORDER_PIXEL;
00051     img->origin    = IPL_ORIGIN_TL;
00052     img->widthStep = picref->linesize[0];
00053 }
00054 
00055 static void fill_picref_from_iplimage(AVFilterBufferRef *picref, const IplImage *img, enum PixelFormat pixfmt)
00056 {
00057     picref->linesize[0] = img->widthStep;
00058     picref->data[0]     = img->imageData;
00059 }
00060 
00061 static int query_formats(AVFilterContext *ctx)
00062 {
00063     static const enum PixelFormat pix_fmts[] = {
00064         PIX_FMT_BGR24, PIX_FMT_BGRA, PIX_FMT_GRAY8, PIX_FMT_NONE
00065     };
00066 
00067     ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
00068     return 0;
00069 }
00070 
00071 static int null_draw_slice(AVFilterLink *link, int y, int h, int slice_dir)
00072 {
00073     return 0;
00074 }
00075 
00076 typedef struct {
00077     const char *name;
00078     int (*init)(AVFilterContext *ctx, const char *args);
00079     void (*uninit)(AVFilterContext *ctx);
00080     void (*end_frame_filter)(AVFilterContext *ctx, IplImage *inimg, IplImage *outimg);
00081     void *priv;
00082 } OCVContext;
00083 
00084 typedef struct {
00085     int type;
00086     int    param1, param2;
00087     double param3, param4;
00088 } SmoothContext;
00089 
00090 static av_cold int smooth_init(AVFilterContext *ctx, const char *args)
00091 {
00092     OCVContext *ocv = ctx->priv;
00093     SmoothContext *smooth = ocv->priv;
00094     char type_str[128] = "gaussian";
00095 
00096     smooth->param1 = 3;
00097     smooth->param2 = 0;
00098     smooth->param3 = 0.0;
00099     smooth->param4 = 0.0;
00100 
00101     if (args)
00102         sscanf(args, "%127[^:]:%d:%d:%lf:%lf", type_str, &smooth->param1, &smooth->param2, &smooth->param3, &smooth->param4);
00103 
00104     if      (!strcmp(type_str, "blur"         )) smooth->type = CV_BLUR;
00105     else if (!strcmp(type_str, "blur_no_scale")) smooth->type = CV_BLUR_NO_SCALE;
00106     else if (!strcmp(type_str, "median"       )) smooth->type = CV_MEDIAN;
00107     else if (!strcmp(type_str, "gaussian"     )) smooth->type = CV_GAUSSIAN;
00108     else if (!strcmp(type_str, "bilateral"    )) smooth->type = CV_BILATERAL;
00109     else {
00110         av_log(ctx, AV_LOG_ERROR, "Smoothing type '%s' unknown.\n", type_str);
00111         return AVERROR(EINVAL);
00112     }
00113 
00114     if (smooth->param1 < 0 || !(smooth->param1%2)) {
00115         av_log(ctx, AV_LOG_ERROR,
00116                "Invalid value '%d' for param1, it has to be a positive odd number\n",
00117                smooth->param1);
00118         return AVERROR(EINVAL);
00119     }
00120     if ((smooth->type == CV_BLUR || smooth->type == CV_BLUR_NO_SCALE || smooth->type == CV_GAUSSIAN) &&
00121         (smooth->param2 < 0 || (smooth->param2 && !(smooth->param2%2)))) {
00122         av_log(ctx, AV_LOG_ERROR,
00123                "Invalid value '%d' for param2, it has to be zero or a positive odd number\n",
00124                smooth->param2);
00125         return AVERROR(EINVAL);
00126     }
00127 
00128     av_log(ctx, AV_LOG_VERBOSE, "type:%s param1:%d param2:%d param3:%f param4:%f\n",
00129            type_str, smooth->param1, smooth->param2, smooth->param3, smooth->param4);
00130     return 0;
00131 }
00132 
00133 static void smooth_end_frame_filter(AVFilterContext *ctx, IplImage *inimg, IplImage *outimg)
00134 {
00135     OCVContext *ocv = ctx->priv;
00136     SmoothContext *smooth = ocv->priv;
00137     cvSmooth(inimg, outimg, smooth->type, smooth->param1, smooth->param2, smooth->param3, smooth->param4);
00138 }
00139 
00140 static int read_shape_from_file(int *cols, int *rows, int **values, const char *filename,
00141                                 void *log_ctx)
00142 {
00143     uint8_t *buf, *p, *pend;
00144     size_t size;
00145     int ret, i, j, w;
00146 
00147     if ((ret = av_file_map(filename, &buf, &size, 0, log_ctx)) < 0)
00148         return ret;
00149 
00150     /* prescan file to get the number of lines and the maximum width */
00151     w = 0;
00152     for (i = 0; i < size; i++) {
00153         if (buf[i] == '\n') {
00154             if (*rows == INT_MAX) {
00155                 av_log(log_ctx, AV_LOG_ERROR, "Overflow on the number of rows in the file\n");
00156                 return AVERROR_INVALIDDATA;
00157             }
00158             ++(*rows);
00159             *cols = FFMAX(*cols, w);
00160             w = 0;
00161         } else if (w == INT_MAX) {
00162             av_log(log_ctx, AV_LOG_ERROR, "Overflow on the number of columns in the file\n");
00163             return AVERROR_INVALIDDATA;
00164         }
00165         w++;
00166     }
00167     if (*rows > (SIZE_MAX / sizeof(int) / *cols)) {
00168         av_log(log_ctx, AV_LOG_ERROR, "File with size %dx%d is too big\n",
00169                *rows, *cols);
00170         return AVERROR_INVALIDDATA;
00171     }
00172     if (!(*values = av_mallocz(sizeof(int) * *rows * *cols)))
00173         return AVERROR(ENOMEM);
00174 
00175     /* fill *values */
00176     p    = buf;
00177     pend = buf + size-1;
00178     for (i = 0; i < *rows; i++) {
00179         for (j = 0;; j++) {
00180             if (p > pend || *p == '\n') {
00181                 p++;
00182                 break;
00183             } else
00184                 (*values)[*cols*i + j] = !!isgraph(*(p++));
00185         }
00186     }
00187     av_file_unmap(buf, size);
00188 
00189 #ifdef DEBUG
00190     {
00191         char *line;
00192         if (!(line = av_malloc(*cols + 1)))
00193             return AVERROR(ENOMEM);
00194         for (i = 0; i < *rows; i++) {
00195             for (j = 0; j < *cols; j++)
00196                 line[j] = (*values)[i * *cols + j] ? '@' : ' ';
00197             line[j] = 0;
00198             av_log(log_ctx, AV_LOG_DEBUG, "%3d: %s\n", i, line);
00199         }
00200         av_free(line);
00201     }
00202 #endif
00203 
00204     return 0;
00205 }
00206 
00207 static int parse_iplconvkernel(IplConvKernel **kernel, char *buf, void *log_ctx)
00208 {
00209     char shape_filename[128] = "", shape_str[32] = "rect";
00210     int cols = 0, rows = 0, anchor_x = 0, anchor_y = 0, shape = CV_SHAPE_RECT;
00211     int *values = NULL, ret;
00212 
00213     sscanf(buf, "%dx%d+%dx%d/%32[^=]=%127s", &cols, &rows, &anchor_x, &anchor_y, shape_str, shape_filename);
00214 
00215     if      (!strcmp(shape_str, "rect"   )) shape = CV_SHAPE_RECT;
00216     else if (!strcmp(shape_str, "cross"  )) shape = CV_SHAPE_CROSS;
00217     else if (!strcmp(shape_str, "ellipse")) shape = CV_SHAPE_ELLIPSE;
00218     else if (!strcmp(shape_str, "custom" )) {
00219         shape = CV_SHAPE_CUSTOM;
00220         if ((ret = read_shape_from_file(&cols, &rows, &values, shape_filename, log_ctx)) < 0)
00221             return ret;
00222     } else {
00223         av_log(log_ctx, AV_LOG_ERROR,
00224                "Shape unspecified or type '%s' unknown.\n", shape_str);
00225         return AVERROR(EINVAL);
00226     }
00227 
00228     if (rows <= 0 || cols <= 0) {
00229         av_log(log_ctx, AV_LOG_ERROR,
00230                "Invalid non-positive values for shape size %dx%d\n", cols, rows);
00231         return AVERROR(EINVAL);
00232     }
00233 
00234     if (anchor_x < 0 || anchor_y < 0 || anchor_x >= cols || anchor_y >= rows) {
00235         av_log(log_ctx, AV_LOG_ERROR,
00236                "Shape anchor %dx%d is not inside the rectangle with size %dx%d.\n",
00237                anchor_x, anchor_y, cols, rows);
00238         return AVERROR(EINVAL);
00239     }
00240 
00241     *kernel = cvCreateStructuringElementEx(cols, rows, anchor_x, anchor_y, shape, values);
00242     av_freep(&values);
00243     if (!*kernel)
00244         return AVERROR(ENOMEM);
00245 
00246     av_log(log_ctx, AV_LOG_VERBOSE, "Structuring element: w:%d h:%d x:%d y:%d shape:%s\n",
00247            rows, cols, anchor_x, anchor_y, shape_str);
00248     return 0;
00249 }
00250 
00251 typedef struct {
00252     int nb_iterations;
00253     IplConvKernel *kernel;
00254 } DilateContext;
00255 
00256 static av_cold int dilate_init(AVFilterContext *ctx, const char *args)
00257 {
00258     OCVContext *ocv = ctx->priv;
00259     DilateContext *dilate = ocv->priv;
00260     char default_kernel_str[] = "3x3+0x0/rect";
00261     char *kernel_str;
00262     const char *buf = args;
00263     int ret;
00264 
00265     dilate->nb_iterations = 1;
00266 
00267     if (args)
00268         kernel_str = av_get_token(&buf, ":");
00269     if ((ret = parse_iplconvkernel(&dilate->kernel,
00270                                    *kernel_str ? kernel_str : default_kernel_str,
00271                                    ctx)) < 0)
00272         return ret;
00273     av_free(kernel_str);
00274 
00275     sscanf(buf, ":%d", &dilate->nb_iterations);
00276     av_log(ctx, AV_LOG_VERBOSE, "iterations_nb:%d\n", dilate->nb_iterations);
00277     if (dilate->nb_iterations <= 0) {
00278         av_log(ctx, AV_LOG_ERROR, "Invalid non-positive value '%d' for nb_iterations\n",
00279                dilate->nb_iterations);
00280         return AVERROR(EINVAL);
00281     }
00282     return 0;
00283 }
00284 
00285 static av_cold void dilate_uninit(AVFilterContext *ctx)
00286 {
00287     OCVContext *ocv = ctx->priv;
00288     DilateContext *dilate = ocv->priv;
00289 
00290     cvReleaseStructuringElement(&dilate->kernel);
00291 }
00292 
00293 static void dilate_end_frame_filter(AVFilterContext *ctx, IplImage *inimg, IplImage *outimg)
00294 {
00295     OCVContext *ocv = ctx->priv;
00296     DilateContext *dilate = ocv->priv;
00297     cvDilate(inimg, outimg, dilate->kernel, dilate->nb_iterations);
00298 }
00299 
00300 static void erode_end_frame_filter(AVFilterContext *ctx, IplImage *inimg, IplImage *outimg)
00301 {
00302     OCVContext *ocv = ctx->priv;
00303     DilateContext *dilate = ocv->priv;
00304     cvErode(inimg, outimg, dilate->kernel, dilate->nb_iterations);
00305 }
00306 
00307 typedef struct {
00308     const char *name;
00309     size_t priv_size;
00310     int  (*init)(AVFilterContext *ctx, const char *args);
00311     void (*uninit)(AVFilterContext *ctx);
00312     void (*end_frame_filter)(AVFilterContext *ctx, IplImage *inimg, IplImage *outimg);
00313 } OCVFilterEntry;
00314 
00315 static OCVFilterEntry ocv_filter_entries[] = {
00316     { "dilate", sizeof(DilateContext), dilate_init, dilate_uninit, dilate_end_frame_filter },
00317     { "erode",  sizeof(DilateContext), dilate_init, dilate_uninit, erode_end_frame_filter  },
00318     { "smooth", sizeof(SmoothContext), smooth_init, NULL, smooth_end_frame_filter },
00319 };
00320 
00321 static av_cold int init(AVFilterContext *ctx, const char *args)
00322 {
00323     OCVContext *ocv = ctx->priv;
00324     char name[128], priv_args[1024];
00325     int i;
00326     char c;
00327 
00328     sscanf(args, "%127[^=:]%c%1023s", name, &c, priv_args);
00329 
00330     for (i = 0; i < FF_ARRAY_ELEMS(ocv_filter_entries); i++) {
00331         OCVFilterEntry *entry = &ocv_filter_entries[i];
00332         if (!strcmp(name, entry->name)) {
00333             ocv->name             = entry->name;
00334             ocv->init             = entry->init;
00335             ocv->uninit           = entry->uninit;
00336             ocv->end_frame_filter = entry->end_frame_filter;
00337 
00338             if (!(ocv->priv = av_mallocz(entry->priv_size)))
00339                 return AVERROR(ENOMEM);
00340             return ocv->init(ctx, priv_args);
00341         }
00342     }
00343 
00344     av_log(ctx, AV_LOG_ERROR, "No libopencv filter named '%s'\n", name);
00345     return AVERROR(EINVAL);
00346 }
00347 
00348 static av_cold void uninit(AVFilterContext *ctx)
00349 {
00350     OCVContext *ocv = ctx->priv;
00351 
00352     if (ocv->uninit)
00353         ocv->uninit(ctx);
00354     av_free(ocv->priv);
00355     memset(ocv, 0, sizeof(*ocv));
00356 }
00357 
00358 static int end_frame(AVFilterLink *inlink)
00359 {
00360     AVFilterContext *ctx = inlink->dst;
00361     OCVContext *ocv = ctx->priv;
00362     AVFilterLink *outlink= inlink->dst->outputs[0];
00363     AVFilterBufferRef *inpicref  = inlink ->cur_buf;
00364     AVFilterBufferRef *outpicref = outlink->out_buf;
00365     IplImage inimg, outimg;
00366     int ret;
00367 
00368     fill_iplimage_from_picref(&inimg , inpicref , inlink->format);
00369     fill_iplimage_from_picref(&outimg, outpicref, inlink->format);
00370     ocv->end_frame_filter(ctx, &inimg, &outimg);
00371     fill_picref_from_iplimage(outpicref, &outimg, inlink->format);
00372 
00373     if ((ret = ff_draw_slice(outlink, 0, outlink->h, 1)) < 0 ||
00374         (ret = ff_end_frame(outlink)) < 0)
00375         return ret;
00376     return 0;
00377 }
00378 
00379 AVFilter avfilter_vf_ocv = {
00380     .name        = "ocv",
00381     .description = NULL_IF_CONFIG_SMALL("Apply transform using libopencv."),
00382 
00383     .priv_size = sizeof(OCVContext),
00384 
00385     .query_formats = query_formats,
00386     .init = init,
00387     .uninit = uninit,
00388 
00389     .inputs    = (const AVFilterPad[]) {{ .name             = "default",
00390                                           .type             = AVMEDIA_TYPE_VIDEO,
00391                                           .draw_slice       = null_draw_slice,
00392                                           .end_frame        = end_frame,
00393                                           .min_perms        = AV_PERM_READ },
00394                                         { .name = NULL}},
00395 
00396     .outputs   = (const AVFilterPad[]) {{ .name             = "default",
00397                                           .type             = AVMEDIA_TYPE_VIDEO, },
00398                                         { .name = NULL}},
00399 };