[Ffmpeg-devel] [PATCH] (somewhat) optimized C aes 128 bit

Reimar Döffinger Reimar.Doeffinger
Sun Jan 14 14:25:30 CET 2007


Hello,
a little update. I did a bit of loop unrolling to avoid some copys (not properly
benchmarked, difference seems minimal).
I also added a bit of wrapper code to allow use of libgcrypt, which
seems quite a bit faster still...

Greetings,
Reimar D?ffinger
-------------- next part --------------
Index: libavutil/aes128.c
===================================================================
--- libavutil/aes128.c	(revision 0)
+++ libavutil/aes128.c	(revision 0)
@@ -0,0 +1,180 @@
+/*
+ * AES 128 bit decryption
+ * Copyright (c) 2007 Reimar Doeffinger.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Based on public domain AES reference code by Paulo Barreto, Vincent Rijmen
+ */
+
+#include "common.h"
+#include "aes128.h"
+
+#ifdef CONFIG_GCRYPT
+AES128Context *aes128_init(void) {
+    AES128Context *res = av_malloc(sizeof(*res));
+    gcry_cipher_open(&res->ch, GCRY_CIPHER_AES, GCRY_CIPHER_MODE_CBC, 0);
+    return res;
+}
+void aes128_set_key(AES128Context *c, const uint8_t *key) {
+    gcry_cipher_ctl(c->ch, GCRYCTL_SET_KEY, key, 16);
+}
+void aes128_cbc_decrypt(AES128Context *c, uint8_t *mem, int blockcnt, uint8_t *IV) {
+    blockcnt <<= 4;
+    gcry_cipher_ctl(c->ch, GCRYCTL_SET_IV, IV, 16);
+    memcpy(IV, &mem[blockcnt - 16], 16);
+    gcry_cipher_decrypt(c->ch, mem, blockcnt, mem, blockcnt);
+}
+#else
+static const uint8_t rcon[10] = {
+    0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36
+};
+static const uint8_t logtbl[256] = {
+      0,   0,  25,   1,  50,   2,  26, 198,  75, 199,  27, 104,  51, 238, 223,   3,
+    100,   4, 224,  14,  52, 141, 129, 239,  76, 113,   8, 200, 248, 105,  28, 193,
+    125, 194,  29, 181, 249, 185,  39, 106,  77, 228, 166, 114, 154, 201,   9, 120,
+    101,  47, 138,   5,  33,  15, 225,  36,  18, 240, 130,  69,  53, 147, 218, 142,
+    150, 143, 219, 189,  54, 208, 206, 148,  19,  92, 210, 241,  64,  70, 131,  56,
+    102, 221, 253,  48, 191,   6, 139,  98, 179,  37, 226, 152,  34, 136, 145,  16,
+    126, 110,  72, 195, 163, 182,  30,  66,  58, 107,  40,  84, 250, 133,  61, 186,
+     43, 121,  10,  21, 155, 159,  94, 202,  78, 212, 172, 229, 243, 115, 167,  87,
+    175,  88, 168,  80, 244, 234, 214, 116,  79, 174, 233, 213, 231, 230, 173, 232,
+     44, 215, 117, 122, 235,  22,  11, 245,  89, 203,  95, 176, 156, 169,  81, 160,
+    127,  12, 246, 111,  23, 196,  73, 236, 216,  67,  31,  45, 164, 118, 123, 183,
+    204, 187,  62,  90, 251,  96, 177, 134,  59,  82, 161, 108, 170,  85,  41, 157,
+    151, 178, 135, 144,  97, 190, 220, 252, 188, 149, 207, 205,  55,  63,  91, 209,
+     83,  57, 132,  60,  65, 162, 109,  71,  20,  42, 158,  93,  86, 242, 211, 171,
+     68,  17, 146, 217,  35,  32,  46, 137, 180, 124, 184,  38, 119, 153, 227, 165,
+    103,  74, 237, 222, 197,  49, 254,  24,  13,  99, 140, 128, 192, 247, 112,   7
+};
+static const uint8_t invsubst[256] = {
+     82,   9, 106, 213,  48,  54, 165,  56, 191,  64, 163, 158, 129, 243, 215, 251,
+    124, 227,  57, 130, 155,  47, 255, 135,  52, 142,  67,  68, 196, 222, 233, 203,
+     84, 123, 148,  50, 166, 194,  35,  61, 238,  76, 149,  11,  66, 250, 195,  78,
+      8,  46, 161, 102,  40, 217,  36, 178, 118,  91, 162,  73, 109, 139, 209,  37,
+    114, 248, 246, 100, 134, 104, 152,  22, 212, 164,  92, 204,  93, 101, 182, 146,
+    108, 112,  72,  80, 253, 237, 185, 218,  94,  21,  70,  87, 167, 141, 157, 132,
+    144, 216, 171,   0, 140, 188, 211,  10, 247, 228,  88,   5, 184, 179,  69,   6,
+    208,  44,  30, 143, 202,  63,  15,   2, 193, 175, 189,   3,   1,  19, 138, 107,
+     58, 145,  17,  65,  79, 103, 220, 234, 151, 242, 207, 206, 240, 180, 230, 115,
+    150, 172, 116,  34, 231, 173,  53, 133, 226, 249,  55, 232,  28, 117, 223, 110,
+     71, 241,  26, 113,  29,  41, 197, 137, 111, 183,  98,  14, 170,  24, 190,  27,
+    252,  86,  62,  75, 198, 210, 121,  32, 154, 219, 192, 254, 120, 205,  90, 244,
+     31, 221, 168,  51, 136,   7, 199,  49, 177,  18,  16,  89,  39, 128, 236,  95,
+     96,  81, 127, 169,  25, 181,  74,  13,  45, 229, 122, 159, 147, 201, 156, 239,
+    160, 224,  59,  77, 174,  42, 245, 176, 200, 235, 187,  60, 131,  83, 153,  97,
+     23,  43,   4, 126, 186, 119, 214,  38, 225, 105,  20,  99,  85,  33,  12, 125
+};
+#define XORBLOCK(a, rk) \
+    ((uint64_t *)(a))[0] ^= ((uint64_t *)(rk))[0];\
+    ((uint64_t *)(a))[1] ^= ((uint64_t *)(rk))[1];
+#define COPYBLOCK(b, a) \
+    ((uint64_t *)(b))[0] = ((uint64_t *)(a))[0];\
+    ((uint64_t *)(b))[1] = ((uint64_t *)(a))[1];
+#define SUBSTSHIFTROWS(b, a) \
+    b[0]  = invsubst[a[0]];  b[1]  = invsubst[a[13]]; b[2]  = invsubst[a[10]];\
+    b[3]  = invsubst[a[7]];  b[4]  = invsubst[a[4]];  b[5]  = invsubst[a[1]];\
+    b[6]  = invsubst[a[14]]; b[7]  = invsubst[a[11]]; b[8]  = invsubst[a[8]];\
+    b[9]  = invsubst[a[5]];  b[10] = invsubst[a[2]];  b[11] = invsubst[a[15]];\
+    b[12] = invsubst[a[12]]; b[13] = invsubst[a[9]];  b[14] = invsubst[a[6]];\
+    b[15] = invsubst[a[3]];
+#define INVMIX(b, a) \
+    ((uint32_t *)(b))[0] = c->multbl[0][a[0]]  ^ c->multbl[1][a[1]]  ^ c->multbl[2][a[2]]  ^ c->multbl[3][a[3]];\
+    ((uint32_t *)(b))[1] = c->multbl[0][a[4]]  ^ c->multbl[1][a[5]]  ^ c->multbl[2][a[6]]  ^ c->multbl[3][a[7]];\
+    ((uint32_t *)(b))[2] = c->multbl[0][a[8]]  ^ c->multbl[1][a[9]]  ^ c->multbl[2][a[10]] ^ c->multbl[3][a[11]];\
+    ((uint32_t *)(b))[3] = c->multbl[0][a[12]] ^ c->multbl[1][a[13]] ^ c->multbl[2][a[14]] ^ c->multbl[3][a[15]];
+
+#define MUL(a, b) ((a && b) ? invlogtbl[(logtbl[a] + logtbl[b])%255] : 0)
+AES128Context *aes128_init(void) {
+    AES128Context *c = av_mallocz(sizeof(*c));
+    uint8_t *invlogtbl = av_malloc(256);
+    uint8_t *tbl0, *tbl1, *tbl2, *tbl3;
+    int i;
+    for (i = 0; i < 256; i++) {
+        c->subst[invsubst[i]] = i;
+        invlogtbl[logtbl[i]] = i;
+    }
+    invlogtbl[255] = 1;
+    tbl0 = (uint8_t *)c->multbl[0];
+    tbl1 = (uint8_t *)c->multbl[1];
+    tbl2 = (uint8_t *)c->multbl[2];;
+    tbl3 = (uint8_t *)c->multbl[3];
+    for (i = 0; i < 256; i++) {
+        tbl0[4*i+0] = MUL(0xe, i); tbl0[4*i+1] = MUL(0x9, i);
+        tbl0[4*i+2] = MUL(0xd, i); tbl0[4*i+3] = MUL(0xb, i);
+        tbl1[4*i+0] = MUL(0xb, i); tbl1[4*i+1] = MUL(0xe, i);
+        tbl1[4*i+2] = MUL(0x9, i); tbl1[4*i+3] = MUL(0xd, i);
+        tbl2[4*i+0] = MUL(0xd, i); tbl2[4*i+1] = MUL(0xb, i);
+        tbl2[4*i+2] = MUL(0xe, i); tbl2[4*i+3] = MUL(0x9, i);
+        tbl3[4*i+0] = MUL(0x9, i); tbl3[4*i+1] = MUL(0xd, i);
+        tbl3[4*i+2] = MUL(0xb, i); tbl3[4*i+3] = MUL(0xe, i);
+    }
+    av_free(invlogtbl);
+    return c;
+}
+
+void aes128_set_key(AES128Context *c, const uint8_t *key) {
+    uint8_t tmp[4][4];
+    long r, i, j;
+    memcpy(tmp, key, 16);
+    memcpy(c->key[0], tmp, 16);
+    for (r = 1; r < 11; r++) {
+        for (i = 0; i < 4; i++) tmp[0][i] ^= c->subst[tmp[3][(i+1)&3]];
+        tmp[0][0] ^= rcon[r - 1];
+        for (i = 0; i < 4; i++) for(j = 1; j < 4; j++) tmp[j][i] ^= tmp[j-1][i];
+        memcpy(c->key[r], tmp, 16);
+    }
+}
+
+static void aes128_decrypt_block(AES128Context *c, uint8_t *block) {
+    long r = 8;
+    uint8_t tmp[16];
+    XORBLOCK(block, c->key[10]);
+    SUBSTSHIFTROWS(tmp, block);
+    XORBLOCK(tmp, c->key[9]);
+    INVMIX(tmp, tmp);
+    SUBSTSHIFTROWS(block, tmp);
+    do {
+        XORBLOCK(block, c->key[r]);
+        INVMIX(tmp, block);
+        SUBSTSHIFTROWS(block, tmp);
+    } while (--r);
+    XORBLOCK(block, c->key[0]);
+}
+
+void aes128_cbc_decrypt(AES128Context *c, uint8_t *mem, int blockcnt, uint8_t *IV) {
+    uint8_t tmp[16];
+    if (blockcnt & 1) {
+        COPYBLOCK(tmp, mem);
+        aes128_decrypt_block(c, mem);
+        XORBLOCK(mem, IV);
+        COPYBLOCK(IV, tmp);
+        mem += 16;
+    }
+    blockcnt >>= 1;
+    while (blockcnt-- > 0) {
+        COPYBLOCK(tmp, mem);
+        aes128_decrypt_block(c, mem);
+        XORBLOCK(mem, IV);
+        mem += 16;
+        COPYBLOCK(IV, mem);
+        aes128_decrypt_block(c, mem);
+        XORBLOCK(mem, tmp);
+        mem += 16;
+    }
+}
+#endif
Index: libavutil/aes128.h
===================================================================
--- libavutil/aes128.h	(revision 0)
+++ libavutil/aes128.h	(revision 0)
@@ -0,0 +1,20 @@
+#ifndef AES128_H
+#define AES128_H
+
+#ifdef CONFIG_GCRYPT
+#include <gcrypt.h>
+typedef struct {
+    gcry_cipher_hd_t ch;
+} AES128Context;
+#else
+typedef struct {
+    uint32_t multbl[4][256];
+    uint8_t subst[256];
+    uint8_t key[11][16];
+} AES128Context;
+#endif
+AES128Context *aes128_init(void);
+void aes128_set_key(AES128Context *c, const uint8_t *key);
+void aes128_cbc_decrypt(AES128Context *c, uint8_t *mem, int blockcnt, uint8_t *IV);
+
+#endif



More information about the ffmpeg-devel mailing list