diff --git a/audit/poly1305.c b/audit/poly1305.c deleted file mode 100644 index bf79143..0000000 --- a/audit/poly1305.c +++ /dev/null @@ -1,442 +0,0 @@ -/* =================================================================== - * - * Copyright (c) 2018, Helder Eijs - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * =================================================================== - */ - -#include "common.h" -#include "endianess.h" - -FAKE_INIT(poly1305) - -typedef struct mac_state_t { - uint32_t r[4], rr[4]; /** first key - variable in polynomial **/ - uint32_t s[5]; /** second key - fixed term in polynomial **/ - uint32_t h[5]; /** state **/ - - uint8_t buffer[16]; /** temp input **/ - unsigned buffer_used; -} mac_state; - -/* - * Load 16 bytes as the secret r, which is the value we evaluate the polynomial - * with, modulo 2^130-5. - * - * The secret gets encoded into four 32-bit words (r[]), after appropriate clamping - * (reset) is applied to 22 of its bits. - * - * Additionaly, reduce modulo 2^130-5 the value 2^130*r into rr[], which we can - * reuse several times later during each multiplication. - * - * @param[out] r: The 4-word array with the r value (little-endian) - * @param[out] rr: The 4-word array with the value (r * 2^130) modulo 2^130-5 (little-endian) - * @param[in] secret: The 16 bytes encoding r (not necessarily clamped already) - */ -STATIC void poly1305_load_r(uint32_t r[4], uint32_t rr[4], const uint8_t secret[16]) -{ - unsigned i; - uint32_t mask; - - for (i=0; i<4; i++) { - /** - * The 4 most significant bits in a word are reset. - * The 2 least significant bits in a word are reset, except for r[0] - */ - mask = (i==0) ? 0x0FFFFFFFU : 0x0FFFFFFCU; - r[i] = LOAD_U32_LITTLE(secret+i*4) & mask; - rr[i] = (r[i] >> 2)*5; - } -} - -/* - * Load the next chunk of message as an integer. - * - * @param[out] m: The 5-word array the chunk will be read into (little-endian) - * @param[in] data: The next chunk of message, at most 16 bytes. It is - * smaller than 16 only if it is the last chunk. - * @param[in] len: The length of the chunk (<=16) - */ -STATIC void poly1305_load_m(uint32_t m[5], const uint8_t data[], size_t len) -{ - uint8_t copy[sizeof(uint32_t)*5]; - - assert(len<=16); - - memset(copy, 0, sizeof(copy)); - memcpy(copy, data, len); - copy[len] = 1; /** 2^128 or 2^{8*(l mod 16)} **/ - - m[0] = LOAD_U32_LITTLE(copy); - m[1] = LOAD_U32_LITTLE(copy+4); - m[2] = LOAD_U32_LITTLE(copy+8); - m[3] = LOAD_U32_LITTLE(copy+12); - m[4] = LOAD_U32_LITTLE(copy+16); -} - -/* - * Load 16 bytes as the secret s, which is the fixed term for the polynomial, modulo 2^130-5. - * - * @param[out] m: The 5-word array that will contain the secret s (little-endian) - * @param[in] s: The 16 bytes that encode the value s. It is typically the - * result of an AES of ChaCha20 encryption. - */ -static void poly1305_load_s(uint32_t m[5], const uint8_t s[16]) -{ - m[0] = LOAD_U32_LITTLE(s); - m[1] = LOAD_U32_LITTLE(s+4); - m[2] = LOAD_U32_LITTLE(s+8); - m[3] = LOAD_U32_LITTLE(s+12); - m[4] = 0; -} - -/** - * Multiply a value by the secret r, "almost" modulo 2^130-5. - * - * @param[in,out] h: The 5-word array with the value to multiply (little-endian). - * The result is stored back here. - * The result is guaranteed to be smaller than 2^131 (not 2^130-5, - * hence the "almost" modulo) for any value of h[] in input. - * @param[in] r: The 4-word array with the multiplier, as generated by - * poly1305_load_r() (little-endian). - * @param[in] rr: The 4-word array with the other value generated by - * poly1305__load_r() for the same multipler (little-endian). - */ -STATIC void poly1305_multiply(uint32_t h[5], const uint32_t r[4], const uint32_t rr[4]) -{ - uint64_t a0, a1, a2, a3; - uint64_t aa0, aa1, aa2, aa3; - uint64_t x0, x1, x2, x3, x4; - uint64_t carry; - - /* - * Boundaries - * - h[0..4] < 2^32 - * - r[0..3] < 2^28 < 5*2^26 - * - rr[0..3] < 5*2^26 - */ - - a0 = r[0]; - a1 = r[1]; - a2 = r[2]; - a3 = r[3]; - aa0 = rr[0]; - aa1 = rr[1]; - aa2 = rr[2]; - aa3 = rr[3]; - - /** - * Schoolbook multiplication between h[] and r[], with the caveat that - * the components exceeding 2^130 are folded back with a right shift and - * a multiplication by 5 (already precomputed in rr[]). - * - * Each sum is guaranteed to be smaller than 2^63 (x0 being the worst case). - */ - x0 = a0*h[0] + aa0*h[4] + aa1*h[3] + aa2*h[2] + aa3*h[1]; - x1 = a0*h[1] + a1*h[0] + aa1*h[4] + aa2*h[3] + aa3*h[2]; - x2 = a0*h[2] + a1*h[1] + a2*h[0] + aa2*h[4] + aa3*h[3]; - x3 = a0*h[3] + a1*h[2] + a2*h[1] + a3*h[0] + aa3*h[4]; - x4 = (a0 & 3)*h[4]; /** < 2^34 **/ - - /** Clear upper half of x3 **/ - x4 += x3 >> 32; - x3 &= UINT32_MAX; - - /** Clear the 62 most significant bits of x4 and - * create carry for x0 **/ - carry = (x4 >> 2)*5; /** < 2^35 **/ - x4 &= 3; - - /** Reduce x0 to 32 bits and store into h0 **/ - x0 += carry; - h[0] = x0 & UINT32_MAX; - carry = x0 >> 32; - - /** Reduce x1 to 32 bits and store into h1 **/ - x1 += carry; - h[1] = x1 & UINT32_MAX; - carry = x1 >> 32; - - /** Reduce x2 to 32 bits and store into h2 **/ - x2 += carry; - h[2] = x2 & UINT32_MAX; - carry = x2 >> 32; - - /** Reduce x3 to 32 bits and store into h3 **/ - x3 += carry; - h[3] = x3 & UINT32_MAX; - carry = x3 >> 32; /** < 1 **/ - - /** Reduce x4 to 32 bits and store into h4 **/ - x4 += carry; /** < 2^3 **/ - assert(x4 < 8); - h[4] = (uint32_t)x4; -} - -/* - * Reduce a value h[] modulo 2^130-5. - * - * @param[in,out] h: The 5-word array with the value to reduce (little-endian). - * The result is stored back here and it is guaranteed to - * be smaller than 2^130- 5. - * The incoming value h must be smaller than 2^131. - */ -STATIC void poly1305_reduce(uint32_t h[5]) -{ - unsigned i; - - assert(h[4]<8); - - for (i=0; i<2; i++) { - uint32_t mask, carry; - uint32_t g[5]; - - /** Compute h+(-p) by adding and removing 2^130 **/ - g[0] = h[0] + 5; carry = g[0] < h[0]; - g[1] = h[1] + carry; carry = g[1] < h[1]; - g[2] = h[2] + carry; carry = g[2] < h[2]; - g[3] = h[3] + carry; carry = g[3] < h[3]; - g[4] = h[4] + carry - 4; - - mask = (g[4] >> 31) - 1U; /** All 1s if g[] is a valid reduction **/ - h[0] = (h[0] & ~mask) ^ (g[0] & mask); - h[1] = (h[1] & ~mask) ^ (g[1] & mask); - h[2] = (h[2] & ~mask) ^ (g[2] & mask); - h[3] = (h[3] & ~mask) ^ (g[3] & mask); - h[4] = (h[4] & ~mask) ^ (g[4] & mask); - } -} - -/** - * Add two values. - * - * It must be assured that the sum does not exceed 2^160. - * - * @param[in,out] h: The 5-word variable to accumulate into (little-endian). - * @param[in] m: The other 5-word term to add (little-endian). - */ -STATIC void poly1305_accumulate(uint32_t h[5], const uint32_t m[5]) -{ -#if 0 - // 128-bit type exist and little-endian - uint32_t carry; - __uint128_t a, b, c; - - memcpy(&a, h, 16); - memcpy(&b, m, 16); - c = a + b; carry = c < a; - memcpy(h, &c, 16); - h[4] += m[4] + carry; -#else - uint8_t carry; - uint64_t tmp; - - h[0] += m[0]; - carry = h[0] < m[0]; - - tmp = (uint64_t)h[1] + m[1] + carry; - h[1] = (uint32_t) tmp; - carry = (tmp >> 32) & 1; - - tmp = (uint64_t)h[2] + m[2] + carry; - h[2] = (uint32_t) tmp; - carry = (tmp >> 32) & 1; - - tmp = (uint64_t)h[3] + m[3] + carry; - h[3] = (uint32_t) tmp; - carry = (tmp >> 32) & 1; - - tmp = (uint64_t)h[4] + m[4] + carry; - h[4] = (uint32_t) tmp; - - assert((tmp >> 32) == 0); -#endif -} - -/** - * Process the next chunk of the message. - * - * This procedure performs the following operation (assuming that msg is 16 byte long): - * - * h = r * (h + (2^128 + little_endian_int(msg))) quasi-modulo 2^130-5 - * - * Quasi-modulo means that the computations are performed modulo 2^130-5 but the - * result is still only guaranteed to be smaller than 2^131. - * - * @param[in,out] h: The 5-word variable to accumulate into. - * In input, it must be smaller than 2^131. - * In output, it is guranteed to remain smaller than 2^131. - * @param[in] r: The 4-word array with the multiplier, as generated by - * poly1305_load_r() - * @param[in] rr: The 4-word array with the other value generated by - * poly1305__load_r() for the same multipler. - * @param[in] data: The next chunk of message, at most 16 bytes. It is - * smaller than 16 only if it is the last chunk. - * @param[in] len: The length of chunk (<=16) - */ -static void poly1305_process(uint32_t h[5], uint32_t r[4], uint32_t rr[4], uint8_t msg[], size_t len) -{ - uint32_t m[5]; - - if (len == 0) - return; - - poly1305_load_m(m, msg, len); - poly1305_accumulate(h, m); /** We add two values that don't exceed 2^131, so - * this addition will not overflow 2^160. - */ - poly1305_multiply(h, r, rr); -} - -/* - * Terminate processing of the message and create the final MAC tag. - * - * @param[in,out] h: The 5-word variable where the resulting MAC must be put into, - * truncated to 128 bits. - * In input, it contains the value the polynomial has been evaluated at, - * without the fixed term. The input is smaller than 2^131. - * @param[in] s: The 5-word value s, that is, the fixed term of the - * polynomial, as created by poly1305_load_s(). - */ -static void poly1305_finalize(uint32_t h[5], const uint32_t s[5]) -{ - poly1305_reduce(h); - poly1305_accumulate(h, s); - h[4] = 0; /** modulo 2**128 **/ -} - -/* --------------------------------------------------------- */ - -EXPORT_SYM int poly1305_init(mac_state **pState, - const uint8_t r[16], - size_t r_len, - const uint8_t s[16], - size_t s_len) -{ - mac_state *ms; - - if (NULL == pState || NULL == r || NULL == s) - return ERR_NULL; - - if (r_len != 16 || s_len != 16) - return ERR_KEY_SIZE; - - *pState = ms = (mac_state*) calloc(1, sizeof(mac_state)); - if (NULL == ms) - return ERR_MEMORY; - - poly1305_load_r(ms->r, ms->rr, r); - poly1305_load_s(ms->s, s); - - return 0; -} - -EXPORT_SYM int poly1305_destroy(mac_state *state) -{ - if (NULL == state) - return ERR_NULL; - free(state); - return 0; -} - -EXPORT_SYM int poly1305_update(mac_state *state, - const uint8_t *in, - size_t len) -{ - if (NULL == state || NULL == in) - return ERR_NULL; - - while (len>0) { - unsigned btc; - - btc = (unsigned)MIN(len, 16 - state->buffer_used); - memcpy(state->buffer + state->buffer_used, in, btc); - state->buffer_used += btc; - in += btc; - len -= btc; - - if (state->buffer_used == 16) { - poly1305_process(state->h, state->r, state->rr, state->buffer, 16); - state->buffer_used = 0; - } - } - - return 0; -} - -EXPORT_SYM int poly1305_digest(const mac_state *state, - uint8_t digest[16], - size_t len) -{ - mac_state temp; - unsigned i; - - if (NULL == state || NULL == digest) { - return ERR_NULL; - } - - if (len != 16) - return ERR_DIGEST_SIZE; - - temp = *state; - - if (temp.buffer_used > 0) { - poly1305_process(temp.h, temp.r, temp.rr, temp.buffer, temp.buffer_used); - } - - poly1305_finalize(temp.h, temp.s); - - for (i=0; i<4; i++) { - STORE_U32_LITTLE(digest+i*4, temp.h[i]); - } - - return 0; -} - -#ifdef PROFILE -int main(void) -{ - const unsigned data_size = 1024*1024; - mac_state *state; - const uint8_t r[16] = "1234567890123456"; - const uint8_t s[16] = "1234567890123456"; - uint8_t *data; - - data = malloc(data_size); - for (int i=0; i