28 #define AESNI_KEY_ROUNDS(x) (10 + x *2) 29 #define AESNI_KEY_BYTES(x) (16 + x * 8) 39 r1 ^= _mm_slli_si128 (r1, 4);
40 r1 ^= _mm_slli_si128 (r1, 4);
41 r1 ^= _mm_slli_si128 (r1, 4);
42 return r1 ^ _mm_shuffle_epi32 (r2, 0xff);
48 k[0] = _mm_loadu_si128 ((
const __m128i *) key);
65 *r1 ^= r = _mm_slli_si128 (*r1, 0x4);
66 *r1 ^= r = _mm_slli_si128 (r, 0x4);
67 *r1 ^= _mm_slli_si128 (r, 0x4);
68 *r1 ^= _mm_shuffle_epi32 (*r2, 0x55);
69 *r3 ^= _mm_slli_si128 (*r3, 0x4);
70 *r3 ^= *r2 = _mm_shuffle_epi32 (*r1, 0xff);
78 k[0] = r1 = _mm_loadu_si128 ((__m128i *) key);
79 r3 = _mm_loadu_si128 ((__m128i *) (key + 16));
82 r2 = _mm_aeskeygenassist_si128 (r3, 0x1);
84 k[1] = (__m128i) _mm_shuffle_pd ((__m128d) k[1], (__m128d) r1, 0);
85 k[2] = (__m128i) _mm_shuffle_pd ((__m128d) r1, (__m128d) r3, 1);
86 r2 = _mm_aeskeygenassist_si128 (r3, 0x2);
91 r2 = _mm_aeskeygenassist_si128 (r3, 0x4);
93 k[4] = (__m128i) _mm_shuffle_pd ((__m128d) k[4], (__m128d) r1, 0);
94 k[5] = (__m128i) _mm_shuffle_pd ((__m128d) r1, (__m128d) r3, 1);
95 r2 = _mm_aeskeygenassist_si128 (r3, 0x8);
100 r2 = _mm_aeskeygenassist_si128 (r3, 0x10);
102 k[7] = (__m128i) _mm_shuffle_pd ((__m128d) k[7], (__m128d) r1, 0);
103 k[8] = (__m128i) _mm_shuffle_pd ((__m128d) r1, (__m128d) r3, 1);
104 r2 = _mm_aeskeygenassist_si128 (r3, 0x20);
109 r2 = _mm_aeskeygenassist_si128 (r3, 0x40);
111 k[10] = (__m128i) _mm_shuffle_pd ((__m128d) k[10], (__m128d) r1, 0);
112 k[11] = (__m128i) _mm_shuffle_pd ((__m128d) r1, (__m128d) r3, 1);
113 r2 = _mm_aeskeygenassist_si128 (r3, 0x80);
122 *r1 ^= r = _mm_slli_si128 (*r1, 0x4);
123 *r1 ^= r = _mm_slli_si128 (r, 0x4);
124 *r1 ^= _mm_slli_si128 (r, 0x4);
125 *r1 ^= *r2 = _mm_shuffle_epi32 (*r2, 0xff);
132 *r3 ^= r = _mm_slli_si128 (*r3, 0x4);
133 *r3 ^= r = _mm_slli_si128 (r, 0x4);
134 *r3 ^= _mm_slli_si128 (r, 0x4);
135 *r3 ^= _mm_shuffle_epi32 (_mm_aeskeygenassist_si128 (r1, 0x0), 0xaa);
142 k[0] = r1 = _mm_loadu_si128 ((__m128i *) key);
143 k[1] = r3 = _mm_loadu_si128 ((__m128i *) (key + 16));
144 r2 = _mm_aeskeygenassist_si128 (k[1], 0x01);
149 r2 = _mm_aeskeygenassist_si128 (r3, 0x02);
154 r2 = _mm_aeskeygenassist_si128 (r3, 0x04);
159 r2 = _mm_aeskeygenassist_si128 (r3, 0x08);
164 r2 = _mm_aeskeygenassist_si128 (r3, 0x10);
169 r2 = _mm_aeskeygenassist_si128 (r3, 0x20);
174 r2 = _mm_aeskeygenassist_si128 (r3, 0x40);
207 for (
int i = 1;
i < (rounds / 2);
i++)
210 k[rounds -
i] = _mm_aesimc_si128 (k[
i]);
211 k[
i] = _mm_aesimc_si128 (r);
214 k[rounds / 2] = _mm_aesimc_si128 (k[rounds / 2]);
static_always_inline void aes192_key_expand(__m128i *k, u8 *key)
#define AESNI_KEY_ROUNDS(x)
static_always_inline void aes_key_expand(__m128i *k, u8 *key, aesni_key_size_t ks)
static_always_inline __m128i aes128_key_assist(__m128i r1, __m128i r2)
#define static_always_inline
static_always_inline void aes256_key_expand(__m128i *k, u8 *key)
static_always_inline void aes256_key_assist1(__m128i *r1, __m128i *r2)
static_always_inline void aes256_key_assist2(__m128i r1, __m128i *r3)
static_always_inline void aes128_key_expand(__m128i *k, u8 *key)
static_always_inline void aes_key_enc_to_dec(__m128i *k, aesni_key_size_t ks)
static_always_inline void aes192_key_assist(__m128i *r1, __m128i *r2, __m128i *r3)