sw_aes.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482
  1. /**
  2. * Copyright (c) 2007, Cameron Rich
  3. *
  4. * All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions are met:
  8. *
  9. * * Redistributions of source code must retain the above copyright notice,
  10. * this list of conditions and the following disclaimer.
  11. * * Redistributions in binary form must reproduce the above copyright notice,
  12. * this list of conditions and the following disclaimer in the documentation
  13. * and/or other materials provided with the distribution.
  14. * * Neither the name of the axTLS project nor the names of its contributors
  15. * may be used to endorse or promote products derived from this software
  16. * without specific prior written permission.
  17. *
  18. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  22. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  23. * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  24. * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  25. * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  26. * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  27. * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  28. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29. */
  30. #include <stdlib.h>
  31. #include "sw_aes.h"
  32. /**
  33. * AES implementation - this is a small code version. There are much faster
  34. * versions around but they are much larger in size (i.e. they use large
  35. * submix tables).
  36. */
  37. #include <string.h>
  38. #include <stdint.h>
  39. #include "sw_aes.h"
  40. #ifndef htonl
  41. #define htonl(a) \
  42. ((((a) >> 24) & 0x000000ff) | \
  43. (((a) >> 8) & 0x0000ff00) | \
  44. (((a) << 8) & 0x00ff0000) | \
  45. (((a) << 24) & 0xff000000))
  46. #endif
  47. #ifndef ntohl
  48. #define ntohl(a) htonl((a))
  49. #endif
  50. #ifndef htons
  51. #define htons(a) \
  52. ((((a) >> 8) & 0x00ff) | \
  53. (((a) << 8) & 0xff00))
  54. #endif
  55. #ifndef ntohs
  56. #define ntohs(a) htons((a))
  57. #endif
  58. #define rot1(x) (((x) << 24) | ((x) >> 8))
  59. #define rot2(x) (((x) << 16) | ((x) >> 16))
  60. #define rot3(x) (((x) << 8) | ((x) >> 24))
  61. /*
  62. * This cute trick does 4 'mul by two' at once. Stolen from
  63. * Dr B. R. Gladman <brg@gladman.uk.net> but I'm sure the u-(u>>7) is
  64. * a standard graphics trick
  65. * The key to this is that we need to xor with 0x1b if the top bit is set.
  66. * a 1xxx xxxx 0xxx 0xxx First we mask the 7bit,
  67. * b 1000 0000 0000 0000 then we shift right by 7 putting the 7bit in 0bit,
  68. * c 0000 0001 0000 0000 we then subtract (c) from (b)
  69. * d 0111 1111 0000 0000 and now we and with our mask
  70. * e 0001 1011 0000 0000
  71. */
  72. #define mt 0x80808080
  73. #define ml 0x7f7f7f7f
  74. #define mh 0xfefefefe
  75. #define mm 0x1b1b1b1b
  76. #define mul2(x,t) ((t)=((x)&mt), \
  77. ((((x)+(x))&mh)^(((t)-((t)>>7))&mm)))
  78. #define inv_mix_col(x,f2,f4,f8,f9) (\
  79. (f2)=mul2(x,f2), \
  80. (f4)=mul2(f2,f4), \
  81. (f8)=mul2(f4,f8), \
  82. (f9)=(x)^(f8), \
  83. (f8)=((f2)^(f4)^(f8)), \
  84. (f2)^=(f9), \
  85. (f4)^=(f9), \
  86. (f8)^=rot3(f2), \
  87. (f8)^=rot2(f4), \
  88. (f8)^rot1(f9))
  89. /*
  90. * AES S-box
  91. */
  92. static const uint8_t aes_sbox[256] =
  93. {
  94. 0x63,0x7C,0x77,0x7B,0xF2,0x6B,0x6F,0xC5,
  95. 0x30,0x01,0x67,0x2B,0xFE,0xD7,0xAB,0x76,
  96. 0xCA,0x82,0xC9,0x7D,0xFA,0x59,0x47,0xF0,
  97. 0xAD,0xD4,0xA2,0xAF,0x9C,0xA4,0x72,0xC0,
  98. 0xB7,0xFD,0x93,0x26,0x36,0x3F,0xF7,0xCC,
  99. 0x34,0xA5,0xE5,0xF1,0x71,0xD8,0x31,0x15,
  100. 0x04,0xC7,0x23,0xC3,0x18,0x96,0x05,0x9A,
  101. 0x07,0x12,0x80,0xE2,0xEB,0x27,0xB2,0x75,
  102. 0x09,0x83,0x2C,0x1A,0x1B,0x6E,0x5A,0xA0,
  103. 0x52,0x3B,0xD6,0xB3,0x29,0xE3,0x2F,0x84,
  104. 0x53,0xD1,0x00,0xED,0x20,0xFC,0xB1,0x5B,
  105. 0x6A,0xCB,0xBE,0x39,0x4A,0x4C,0x58,0xCF,
  106. 0xD0,0xEF,0xAA,0xFB,0x43,0x4D,0x33,0x85,
  107. 0x45,0xF9,0x02,0x7F,0x50,0x3C,0x9F,0xA8,
  108. 0x51,0xA3,0x40,0x8F,0x92,0x9D,0x38,0xF5,
  109. 0xBC,0xB6,0xDA,0x21,0x10,0xFF,0xF3,0xD2,
  110. 0xCD,0x0C,0x13,0xEC,0x5F,0x97,0x44,0x17,
  111. 0xC4,0xA7,0x7E,0x3D,0x64,0x5D,0x19,0x73,
  112. 0x60,0x81,0x4F,0xDC,0x22,0x2A,0x90,0x88,
  113. 0x46,0xEE,0xB8,0x14,0xDE,0x5E,0x0B,0xDB,
  114. 0xE0,0x32,0x3A,0x0A,0x49,0x06,0x24,0x5C,
  115. 0xC2,0xD3,0xAC,0x62,0x91,0x95,0xE4,0x79,
  116. 0xE7,0xC8,0x37,0x6D,0x8D,0xD5,0x4E,0xA9,
  117. 0x6C,0x56,0xF4,0xEA,0x65,0x7A,0xAE,0x08,
  118. 0xBA,0x78,0x25,0x2E,0x1C,0xA6,0xB4,0xC6,
  119. 0xE8,0xDD,0x74,0x1F,0x4B,0xBD,0x8B,0x8A,
  120. 0x70,0x3E,0xB5,0x66,0x48,0x03,0xF6,0x0E,
  121. 0x61,0x35,0x57,0xB9,0x86,0xC1,0x1D,0x9E,
  122. 0xE1,0xF8,0x98,0x11,0x69,0xD9,0x8E,0x94,
  123. 0x9B,0x1E,0x87,0xE9,0xCE,0x55,0x28,0xDF,
  124. 0x8C,0xA1,0x89,0x0D,0xBF,0xE6,0x42,0x68,
  125. 0x41,0x99,0x2D,0x0F,0xB0,0x54,0xBB,0x16,
  126. };
  127. /*
  128. * AES is-box
  129. */
  130. static const uint8_t aes_isbox[256] =
  131. {
  132. 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38,
  133. 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb,
  134. 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87,
  135. 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb,
  136. 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d,
  137. 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e,
  138. 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2,
  139. 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25,
  140. 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16,
  141. 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92,
  142. 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda,
  143. 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84,
  144. 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a,
  145. 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06,
  146. 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02,
  147. 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b,
  148. 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea,
  149. 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73,
  150. 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85,
  151. 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e,
  152. 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89,
  153. 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b,
  154. 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20,
  155. 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4,
  156. 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31,
  157. 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f,
  158. 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d,
  159. 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef,
  160. 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0,
  161. 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61,
  162. 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26,
  163. 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
  164. };
  165. static const unsigned char Rcon[30]=
  166. {
  167. 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,
  168. 0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f,
  169. 0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4,
  170. 0xb3,0x7d,0xfa,0xef,0xc5,0x91,
  171. };
  172. /* ----- no more static functions ----- */
  173. void AES_encrypt(const AES_CTX *ctx, uint32_t *data);
  174. void AES_decrypt(const AES_CTX *ctx, uint32_t *data);
  175. /* Perform doubling in Galois Field GF(2^8) using the irreducible polynomial
  176. x^8+x^4+x^3+x+1 */
  177. static unsigned char AES_xtime(uint32_t x)
  178. {
  179. return (x&0x80) ? (x<<1)^0x1b : x<<1;
  180. }
  181. /**
  182. * Set up AES with the key/iv and cipher size.
  183. */
  184. void AES_set_key(AES_CTX *ctx, const uint8_t *key,
  185. const uint8_t *iv, AES_MODE mode)
  186. {
  187. int i, ii;
  188. uint32_t *W, tmp, tmp2;
  189. const unsigned char *ip;
  190. int words;
  191. switch (mode)
  192. {
  193. case AES_MODE_128:
  194. i = 10;
  195. words = 4;
  196. break;
  197. case AES_MODE_256:
  198. i = 14;
  199. words = 8;
  200. break;
  201. default: /* fail silently */
  202. return;
  203. }
  204. ctx->rounds = i;
  205. ctx->key_size = words;
  206. W = ctx->ks;
  207. for (i = 0; i < words; i+=2)
  208. {
  209. W[i+0]= ((uint32_t)key[ 0]<<24)|
  210. ((uint32_t)key[ 1]<<16)|
  211. ((uint32_t)key[ 2]<< 8)|
  212. ((uint32_t)key[ 3] );
  213. W[i+1]= ((uint32_t)key[ 4]<<24)|
  214. ((uint32_t)key[ 5]<<16)|
  215. ((uint32_t)key[ 6]<< 8)|
  216. ((uint32_t)key[ 7] );
  217. key += 8;
  218. }
  219. ip = Rcon;
  220. ii = 4 * (ctx->rounds+1);
  221. for (i = words; i<ii; i++)
  222. {
  223. tmp = W[i-1];
  224. if ((i % words) == 0)
  225. {
  226. tmp2 =(uint32_t)aes_sbox[(tmp )&0xff]<< 8;
  227. tmp2|=(uint32_t)aes_sbox[(tmp>> 8)&0xff]<<16;
  228. tmp2|=(uint32_t)aes_sbox[(tmp>>16)&0xff]<<24;
  229. tmp2|=(uint32_t)aes_sbox[(tmp>>24) ];
  230. tmp=tmp2^(((unsigned int)*ip)<<24);
  231. ip++;
  232. }
  233. if ((words == 8) && ((i % words) == 4))
  234. {
  235. tmp2 =(uint32_t)aes_sbox[(tmp )&0xff] ;
  236. tmp2|=(uint32_t)aes_sbox[(tmp>> 8)&0xff]<< 8;
  237. tmp2|=(uint32_t)aes_sbox[(tmp>>16)&0xff]<<16;
  238. tmp2|=(uint32_t)aes_sbox[(tmp>>24) ]<<24;
  239. tmp=tmp2;
  240. }
  241. W[i]=W[i-words]^tmp;
  242. }
  243. /* copy the iv across */
  244. memcpy(ctx->iv, iv, 16);
  245. }
  246. /**
  247. * Change a key for decryption.
  248. */
  249. void AES_convert_key(AES_CTX *ctx)
  250. {
  251. int i;
  252. uint32_t *k,w,t1,t2,t3,t4;
  253. k = ctx->ks;
  254. k += 4;
  255. for (i= ctx->rounds*4; i > 4; i--)
  256. {
  257. w= *k;
  258. w = inv_mix_col(w,t1,t2,t3,t4);
  259. *k++ =w;
  260. }
  261. }
  262. /**
  263. * Encrypt a byte sequence (with a block size 16) using the AES cipher.
  264. */
  265. void AES_cbc_encrypt(AES_CTX *ctx, const uint8_t *msg, uint8_t *out, int length)
  266. {
  267. int i;
  268. uint32_t tin[4], tout[4], iv[4];
  269. memcpy(iv, ctx->iv, AES_IV_SIZE);
  270. for (i = 0; i < 4; i++)
  271. tout[i] = ntohl(iv[i]);
  272. for (length -= AES_BLOCKSIZE; length >= 0; length -= AES_BLOCKSIZE)
  273. {
  274. uint32_t msg_32[4];
  275. uint32_t out_32[4];
  276. memcpy(msg_32, msg, AES_BLOCKSIZE);
  277. msg += AES_BLOCKSIZE;
  278. for (i = 0; i < 4; i++)
  279. tin[i] = ntohl(msg_32[i])^tout[i];
  280. AES_encrypt(ctx, tin);
  281. for (i = 0; i < 4; i++)
  282. {
  283. tout[i] = tin[i];
  284. out_32[i] = htonl(tout[i]);
  285. }
  286. memcpy(out, out_32, AES_BLOCKSIZE);
  287. out += AES_BLOCKSIZE;
  288. }
  289. for (i = 0; i < 4; i++)
  290. iv[i] = htonl(tout[i]);
  291. memcpy(ctx->iv, iv, AES_IV_SIZE);
  292. }
  293. /**
  294. * Decrypt a byte sequence (with a block size 16) using the AES cipher.
  295. */
  296. void AES_cbc_decrypt(AES_CTX *ctx, const uint8_t *msg, uint8_t *out, int length)
  297. {
  298. int i;
  299. uint32_t tin[4], xor[4], tout[4], data[4], iv[4];
  300. memcpy(iv, ctx->iv, AES_IV_SIZE);
  301. for (i = 0; i < 4; i++)
  302. xor[i] = ntohl(iv[i]);
  303. for (length -= 16; length >= 0; length -= 16)
  304. {
  305. uint32_t msg_32[4];
  306. uint32_t out_32[4];
  307. memcpy(msg_32, msg, AES_BLOCKSIZE);
  308. msg += AES_BLOCKSIZE;
  309. for (i = 0; i < 4; i++)
  310. {
  311. tin[i] = ntohl(msg_32[i]);
  312. data[i] = tin[i];
  313. }
  314. AES_decrypt(ctx, data);
  315. for (i = 0; i < 4; i++)
  316. {
  317. tout[i] = data[i]^xor[i];
  318. xor[i] = tin[i];
  319. out_32[i] = htonl(tout[i]);
  320. }
  321. memcpy(out, out_32, AES_BLOCKSIZE);
  322. out += AES_BLOCKSIZE;
  323. }
  324. for (i = 0; i < 4; i++)
  325. iv[i] = htonl(xor[i]);
  326. memcpy(ctx->iv, iv, AES_IV_SIZE);
  327. }
  328. /**
  329. * Encrypt a single block (16 bytes) of data
  330. */
  331. void AES_encrypt(const AES_CTX *ctx, uint32_t *data)
  332. {
  333. /* To make this code smaller, generate the sbox entries on the fly.
  334. * This will have a really heavy effect upon performance.
  335. */
  336. uint32_t tmp[4];
  337. uint32_t tmp1, old_a0, a0, a1, a2, a3, row;
  338. int curr_rnd;
  339. int rounds = ctx->rounds;
  340. const uint32_t *k = ctx->ks;
  341. /* Pre-round key addition */
  342. for (row = 0; row < 4; row++)
  343. data[row] ^= *(k++);
  344. /* Encrypt one block. */
  345. for (curr_rnd = 0; curr_rnd < rounds; curr_rnd++)
  346. {
  347. /* Perform ByteSub and ShiftRow operations together */
  348. for (row = 0; row < 4; row++)
  349. {
  350. a0 = (uint32_t)aes_sbox[(data[row%4]>>24)&0xFF];
  351. a1 = (uint32_t)aes_sbox[(data[(row+1)%4]>>16)&0xFF];
  352. a2 = (uint32_t)aes_sbox[(data[(row+2)%4]>>8)&0xFF];
  353. a3 = (uint32_t)aes_sbox[(data[(row+3)%4])&0xFF];
  354. /* Perform MixColumn iff not last round */
  355. if (curr_rnd < (rounds - 1))
  356. {
  357. tmp1 = a0 ^ a1 ^ a2 ^ a3;
  358. old_a0 = a0;
  359. a0 ^= tmp1 ^ AES_xtime(a0 ^ a1);
  360. a1 ^= tmp1 ^ AES_xtime(a1 ^ a2);
  361. a2 ^= tmp1 ^ AES_xtime(a2 ^ a3);
  362. a3 ^= tmp1 ^ AES_xtime(a3 ^ old_a0);
  363. }
  364. tmp[row] = ((a0 << 24) | (a1 << 16) | (a2 << 8) | a3);
  365. }
  366. /* KeyAddition - note that it is vital that this loop is separate from
  367. the MixColumn operation, which must be atomic...*/
  368. for (row = 0; row < 4; row++)
  369. data[row] = tmp[row] ^ *(k++);
  370. }
  371. }
  372. /**
  373. * Decrypt a single block (16 bytes) of data
  374. */
  375. void AES_decrypt(const AES_CTX *ctx, uint32_t *data)
  376. {
  377. uint32_t tmp[4];
  378. uint32_t xt0,xt1,xt2,xt3,xt4,xt5,xt6;
  379. uint32_t a0, a1, a2, a3, row;
  380. int curr_rnd;
  381. int rounds = ctx->rounds;
  382. const uint32_t *k = ctx->ks + ((rounds+1)*4);
  383. /* pre-round key addition */
  384. for (row=4; row > 0; row--)
  385. data[row-1] ^= *(--k);
  386. /* Decrypt one block */
  387. for (curr_rnd = 0; curr_rnd < rounds; curr_rnd++)
  388. {
  389. /* Perform ByteSub and ShiftRow operations together */
  390. for (row = 4; row > 0; row--)
  391. {
  392. a0 = aes_isbox[(data[(row+3)%4]>>24)&0xFF];
  393. a1 = aes_isbox[(data[(row+2)%4]>>16)&0xFF];
  394. a2 = aes_isbox[(data[(row+1)%4]>>8)&0xFF];
  395. a3 = aes_isbox[(data[row%4])&0xFF];
  396. /* Perform MixColumn iff not last round */
  397. if (curr_rnd<(rounds-1))
  398. {
  399. /* The MDS cofefficients (0x09, 0x0B, 0x0D, 0x0E)
  400. are quite large compared to encryption; this
  401. operation slows decryption down noticeably. */
  402. xt0 = AES_xtime(a0^a1);
  403. xt1 = AES_xtime(a1^a2);
  404. xt2 = AES_xtime(a2^a3);
  405. xt3 = AES_xtime(a3^a0);
  406. xt4 = AES_xtime(xt0^xt1);
  407. xt5 = AES_xtime(xt1^xt2);
  408. xt6 = AES_xtime(xt4^xt5);
  409. xt0 ^= a1^a2^a3^xt4^xt6;
  410. xt1 ^= a0^a2^a3^xt5^xt6;
  411. xt2 ^= a0^a1^a3^xt4^xt6;
  412. xt3 ^= a0^a1^a2^xt5^xt6;
  413. tmp[row-1] = ((xt0<<24)|(xt1<<16)|(xt2<<8)|xt3);
  414. }
  415. else
  416. tmp[row-1] = ((a0<<24)|(a1<<16)|(a2<<8)|a3);
  417. }
  418. for (row = 4; row > 0; row--)
  419. data[row-1] = tmp[row-1] ^ *(--k);
  420. }
  421. }