2 poly1305 implementation using 32 bit * 32 bit = 64 bit multiplication and 64 bit addition
8 #if (USE_UNALIGNED == 1)
11 #define U32TO8(p, v) \
13 *((uint32_t *)(p)) = v; \
16 /* interpret four 8 bit unsigned integers as a 32 bit unsigned integer in little endian */
18 U8TO32(const unsigned char *p) {
20 (((uint32_t)(p[0] & 0xff)) |
21 ((uint32_t)(p[1] & 0xff) << 8) |
22 ((uint32_t)(p[2] & 0xff) << 16) |
23 ((uint32_t)(p[3] & 0xff) << 24));
26 /* store a 32 bit unsigned integer as four 8 bit unsigned integers in little endian */
28 U32TO8(unsigned char *p, uint32_t v) {
30 p[1] = (v >> 8) & 0xff;
31 p[2] = (v >> 16) & 0xff;
32 p[3] = (v >> 24) & 0xff;
37 poly1305_init(struct poly1305_context *st, const unsigned char key[32]) {
38 /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
39 st->r[0] = (U8TO32(&key[ 0])) & 0x3ffffff;
40 st->r[1] = (U8TO32(&key[ 3]) >> 2) & 0x3ffff03;
41 st->r[2] = (U8TO32(&key[ 6]) >> 4) & 0x3ffc0ff;
42 st->r[3] = (U8TO32(&key[ 9]) >> 6) & 0x3f03fff;
43 st->r[4] = (U8TO32(&key[12]) >> 8) & 0x00fffff;
52 /* save pad for later */
53 st->pad[0] = U8TO32(&key[16]);
54 st->pad[1] = U8TO32(&key[20]);
55 st->pad[2] = U8TO32(&key[24]);
56 st->pad[3] = U8TO32(&key[28]);
63 poly1305_blocks(struct poly1305_context *st, const unsigned char *m, size_t bytes) {
64 const uint32_t hibit = (st->final) ? 0 : (1 << 24); /* 1 << 128 */
65 uint32_t r0, r1, r2, r3, r4;
66 uint32_t s1, s2, s3, s4;
67 uint32_t h0, h1, h2, h3, h4;
68 uint64_t d0, d1, d2, d3, d4;
88 while(bytes >= POLY1305_BLOCK_SIZE) {
90 h0 += (U8TO32(m + 0)) & 0x3ffffff;
91 h1 += (U8TO32(m + 3) >> 2) & 0x3ffffff;
92 h2 += (U8TO32(m + 6) >> 4) & 0x3ffffff;
93 h3 += (U8TO32(m + 9) >> 6) & 0x3ffffff;
94 h4 += (U8TO32(m + 12) >> 8) | hibit;
97 d0 = ((uint64_t)h0 * r0) + ((uint64_t)h1 * s4) + ((uint64_t)h2 * s3) + ((uint64_t)h3 * s2) + ((uint64_t)h4 * s1);
98 d1 = ((uint64_t)h0 * r1) + ((uint64_t)h1 * r0) + ((uint64_t)h2 * s4) + ((uint64_t)h3 * s3) + ((uint64_t)h4 * s2);
99 d2 = ((uint64_t)h0 * r2) + ((uint64_t)h1 * r1) + ((uint64_t)h2 * r0) + ((uint64_t)h3 * s4) + ((uint64_t)h4 * s3);
100 d3 = ((uint64_t)h0 * r3) + ((uint64_t)h1 * r2) + ((uint64_t)h2 * r1) + ((uint64_t)h3 * r0) + ((uint64_t)h4 * s4);
101 d4 = ((uint64_t)h0 * r4) + ((uint64_t)h1 * r3) + ((uint64_t)h2 * r2) + ((uint64_t)h3 * r1) + ((uint64_t)h4 * r0);
103 /* (partial) h %= p */
104 c = (uint32_t)(d0 >> 26);
105 h0 = (uint32_t)d0 & 0x3ffffff;
107 c = (uint32_t)(d1 >> 26);
108 h1 = (uint32_t)d1 & 0x3ffffff;
110 c = (uint32_t)(d2 >> 26);
111 h2 = (uint32_t)d2 & 0x3ffffff;
113 c = (uint32_t)(d3 >> 26);
114 h3 = (uint32_t)d3 & 0x3ffffff;
116 c = (uint32_t)(d4 >> 26);
117 h4 = (uint32_t)d4 & 0x3ffffff;
123 m += POLY1305_BLOCK_SIZE;
124 bytes -= POLY1305_BLOCK_SIZE;
135 poly1305_finish(struct poly1305_context *st, unsigned char mac[16]) {
136 uint32_t h0, h1, h2, h3, h4, c;
137 uint32_t g0, g1, g2, g3, g4;
141 /* process the remaining block */
143 size_t i = st->leftover;
146 for(; i < POLY1305_BLOCK_SIZE; i++) {
151 poly1305_blocks(st, st->buffer, POLY1305_BLOCK_SIZE);
190 g4 = h4 + c - (1 << 26);
192 /* select h if h < p, or h + -p if h >= p */
193 mask = (g4 >> ((sizeof(uint32_t) * 8) - 1)) - 1;
200 h0 = (h0 & mask) | g0;
201 h1 = (h1 & mask) | g1;
202 h2 = (h2 & mask) | g2;
203 h3 = (h3 & mask) | g3;
204 h4 = (h4 & mask) | g4;
206 /* h = h % (2^128) */
207 h0 = ((h0) | (h1 << 26)) & 0xffffffff;
208 h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff;
209 h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
210 h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff;
212 /* mac = (h + pad) % (2^128) */
213 f = (uint64_t)h0 + st->pad[0] ;
215 f = (uint64_t)h1 + st->pad[1] + (f >> 32);
217 f = (uint64_t)h2 + st->pad[2] + (f >> 32);
219 f = (uint64_t)h3 + st->pad[3] + (f >> 32);
225 U32TO8(mac + 12, h3);
227 /* zero out the state */
246 poly1305_update(struct poly1305_context *st, const unsigned char *m, size_t bytes) {
249 /* handle leftover */
251 size_t want = (POLY1305_BLOCK_SIZE - st->leftover);
257 for(i = 0; i < want; i++) {
258 st->buffer[st->leftover + i] = m[i];
263 st->leftover += want;
265 if(st->leftover < POLY1305_BLOCK_SIZE) {
269 poly1305_blocks(st, st->buffer, POLY1305_BLOCK_SIZE);
273 /* process full blocks */
274 if(bytes >= POLY1305_BLOCK_SIZE) {
275 size_t want = (bytes & ~(POLY1305_BLOCK_SIZE - 1));
276 poly1305_blocks(st, m, want);
283 #if (USE_MEMCPY == 1)
284 memcpy(st->buffer + st->leftover, m, bytes);
287 for(i = 0; i < bytes; i++) {
288 st->buffer[st->leftover + i] = m[i];
292 st->leftover += bytes;
297 poly1305_auth(unsigned char mac[16], const unsigned char *m, size_t bytes, const unsigned char key[32]) {
298 struct poly1305_context ctx;
299 poly1305_init(&ctx, key);
300 poly1305_update(&ctx, m, bytes);
301 poly1305_finish(&ctx, mac);