[FFmpeg-devel] lavu: add text encoding conversion API

Nicolas George george at nsup.org
Wed Sep 23 19:28:51 CEST 2015


This is a proposal for an API in libavutil to perform text encoding
conversions. I had it in mind to clean up the subtitles code initially, and
now it could be useful for James' proposal about metadata. The issues are
relying on the optional system feature iconv, clumsy and annoying buffer
management, handling of errors and replacement characters.

For now, this is just the API, not as a patch with the code, but I suspect
designing the API cleanly is way harder than implementing it.

The API would go in, probably, textconv.h, with the opaque parts in
textconv.c. The structures are in logical order, not in the order required
for compilation.

Please comment.


  Nicolas George

 * Library for text encoding conversion.
 * This structure holds the global state to perform text character encoding
 * (aka charset, aka code page) conversions.
 * It can contain different modules, depending on what was enabled at build
 * time, and can also get extra modules from the application.
typedef struct AVTextEncConvLib AVTextEncConvLib;

/* opaque */
struct AVTextEncConvLib {
    AVTextEncConvModule *modules;
    unsigned nb_modules;

 * Module for text encoding conversion.
 * Different modules may support more or less character encodings with
 * various options. Module availability depends on the system and the build
 * options.
typedef struct AVTextEncConvModule AVTextEncConvModule;

/* public */
struct AVTextEncConvModule {
    char name[16];
    int (*open)(AVTextEncConvContext *cxt, const char *from, const char *to);
    size_t priv_data_size;
    unsigned priority;

 * Allocate a text encoding conversion library.
 * See the values below for the flags.
int av_text_enc_conv_lib_alloc(AVTextEncConvLib **lib, unsigned flags);

enum {

     * Disable registering the builtin conversion module.
     * The builtin conversion modules handles UTF-8, UTF-16, UCS-2, UCS-4
     * and is always available at priority -1000.

     * Disable registering the extra builtin conversion modules.
     * The extra builtin conversion modules may be, depending on the build
     * options: iconv.
     * They have all priority between -100 and 0.


void av_text_enc_conv_lib_freep(AVTextEncConvLib **lib);

 * Convert a text buffer directly with default options.
int av_text_enc_conv_do(const AVTextEncConvLib *lib,
                        uint8_t *in, size_t in_size,
                        uint8_t **out, size_t *out_size);

 * Context for text encoding conversions.
 * The context is initialized for a specific from→to conversion.
 * Options can be set on the context before each conversion operation.
typedef struct AVTextEncConvContext AVTextEncConvContext;

/* opaque */
struct AVTextEncConvContext {

    AVClass *av_class;

    void *priv_data;

    int (*convert)(AVTextEncConvContext *ctx,
                   uint8_t **inbuf,  unsigned inbuf_size,
                   uint8_t **outbuf, unsigned outbuf_size);

     * Replacement string for invalid input or impossible output.
     * Must be in output encoding.
     * If replacement is not used, then an error is triggered.
    unsigned use_replacement;
    uint8_t *replacement;
    size_t replacement_size;

 * Register conversions module in the library.
 * mod_size is the size of a module according to the application.
 * The module structures are copied to the library structure.
int av_text_conv_lib_register_modules(AVTextEncConvLib *lib,
                                      const AVTextEncConvModule *mod,
                                      unsigned nb_mod,
                                      size_t mod_size);

 * Open a text conversion context.
int av_text_enc_conv_open(const AVTextEncConvLib *lib,
                          AVTextEncConvContext *ctx,
                          const char *from,
                          const char *to);

void av_text_enc_conv_context_freep(AVTextEncConvContext **ctx);

 * Set the replacement string for a text conversion context.
 * The string must be in UTF-8.
 * @return  0 on success or an AVERROR code on failure,
 *          including AVERROR(EILSEQ) if the output is not possible
 * If status is neither 0 nor a return value indicating an error that
 * depends on the input string, then the function returns it immediately. It
 * allows to write the following pattern:
 *     ret = av_text_enc_conv_set_replacement(lib, ctx, "�", 0);
 *     ret = av_text_enc_conv_set_replacement(lib, ctx, "?", ret);
 *     if (ret < 0)
int av_text_enc_conv_set_replacement(const AVTextEncConvLib *lib,
                                     AVTextEncConvContext *ctx,
                                     const uint8_t *repl,
                                     int status);

 * Convert a text buffer.
 * If out points to NULL, a new buffer is allocated.
 * If out points to an existing buffer, it is used;
 * if it is too small, AVERROR(E2BIG) is returned.
 * If a conversion error happens and error is not NULL, it is set to the
 * input position that caused the error.
int av_text_enc_conv_buffer(AVTextEncConvContext *ctx,
                            uint8_t *in,   size_t in_size,
                            uint8_t **out, size_t **out_size,
                            uint8_t **error);

 * Convert a text buffer into an AVBPrint buffer.
int av_text_enc_conv_bprint(AVBPrint *bp, AVTextEncConvContext *ctx,
                            uint8_t *in, size_t in_size,
                            uint8_t **error);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20150923/52aa1855/attachment.sig>

More information about the ffmpeg-devel mailing list