1
0
Fork 0
mirror of https://github.com/HACKERALERT/Picocrypt.git synced 2024-12-29 19:02:43 +00:00
Picocrypt/audit/poly1305.c
2021-03-22 09:15:05 -04:00

442 lines
13 KiB
C

/* ===================================================================
*
* Copyright (c) 2018, Helder Eijs <helderijs@gmail.com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* ===================================================================
*/
#include "common.h"
#include "endianess.h"
FAKE_INIT(poly1305)
typedef struct mac_state_t {
uint32_t r[4], rr[4]; /** first key - variable in polynomial **/
uint32_t s[5]; /** second key - fixed term in polynomial **/
uint32_t h[5]; /** state **/
uint8_t buffer[16]; /** temp input **/
unsigned buffer_used;
} mac_state;
/*
* Load 16 bytes as the secret r, which is the value we evaluate the polynomial
* with, modulo 2^130-5.
*
* The secret gets encoded into four 32-bit words (r[]), after appropriate clamping
* (reset) is applied to 22 of its bits.
*
* Additionaly, reduce modulo 2^130-5 the value 2^130*r into rr[], which we can
* reuse several times later during each multiplication.
*
* @param[out] r: The 4-word array with the r value (little-endian)
* @param[out] rr: The 4-word array with the value (r * 2^130) modulo 2^130-5 (little-endian)
* @param[in] secret: The 16 bytes encoding r (not necessarily clamped already)
*/
STATIC void poly1305_load_r(uint32_t r[4], uint32_t rr[4], const uint8_t secret[16])
{
unsigned i;
uint32_t mask;
for (i=0; i<4; i++) {
/**
* The 4 most significant bits in a word are reset.
* The 2 least significant bits in a word are reset, except for r[0]
*/
mask = (i==0) ? 0x0FFFFFFFU : 0x0FFFFFFCU;
r[i] = LOAD_U32_LITTLE(secret+i*4) & mask;
rr[i] = (r[i] >> 2)*5;
}
}
/*
* Load the next chunk of message as an integer.
*
* @param[out] m: The 5-word array the chunk will be read into (little-endian)
* @param[in] data: The next chunk of message, at most 16 bytes. It is
* smaller than 16 only if it is the last chunk.
* @param[in] len: The length of the chunk (<=16)
*/
STATIC void poly1305_load_m(uint32_t m[5], const uint8_t data[], size_t len)
{
uint8_t copy[sizeof(uint32_t)*5];
assert(len<=16);
memset(copy, 0, sizeof(copy));
memcpy(copy, data, len);
copy[len] = 1; /** 2^128 or 2^{8*(l mod 16)} **/
m[0] = LOAD_U32_LITTLE(copy);
m[1] = LOAD_U32_LITTLE(copy+4);
m[2] = LOAD_U32_LITTLE(copy+8);
m[3] = LOAD_U32_LITTLE(copy+12);
m[4] = LOAD_U32_LITTLE(copy+16);
}
/*
* Load 16 bytes as the secret s, which is the fixed term for the polynomial, modulo 2^130-5.
*
* @param[out] m: The 5-word array that will contain the secret s (little-endian)
* @param[in] s: The 16 bytes that encode the value s. It is typically the
* result of an AES of ChaCha20 encryption.
*/
static void poly1305_load_s(uint32_t m[5], const uint8_t s[16])
{
m[0] = LOAD_U32_LITTLE(s);
m[1] = LOAD_U32_LITTLE(s+4);
m[2] = LOAD_U32_LITTLE(s+8);
m[3] = LOAD_U32_LITTLE(s+12);
m[4] = 0;
}
/**
* Multiply a value by the secret r, "almost" modulo 2^130-5.
*
* @param[in,out] h: The 5-word array with the value to multiply (little-endian).
* The result is stored back here.
* The result is guaranteed to be smaller than 2^131 (not 2^130-5,
* hence the "almost" modulo) for any value of h[] in input.
* @param[in] r: The 4-word array with the multiplier, as generated by
* poly1305_load_r() (little-endian).
* @param[in] rr: The 4-word array with the other value generated by
* poly1305__load_r() for the same multipler (little-endian).
*/
STATIC void poly1305_multiply(uint32_t h[5], const uint32_t r[4], const uint32_t rr[4])
{
uint64_t a0, a1, a2, a3;
uint64_t aa0, aa1, aa2, aa3;
uint64_t x0, x1, x2, x3, x4;
uint64_t carry;
/*
* Boundaries
* - h[0..4] < 2^32
* - r[0..3] < 2^28 < 5*2^26
* - rr[0..3] < 5*2^26
*/
a0 = r[0];
a1 = r[1];
a2 = r[2];
a3 = r[3];
aa0 = rr[0];
aa1 = rr[1];
aa2 = rr[2];
aa3 = rr[3];
/**
* Schoolbook multiplication between h[] and r[], with the caveat that
* the components exceeding 2^130 are folded back with a right shift and
* a multiplication by 5 (already precomputed in rr[]).
*
* Each sum is guaranteed to be smaller than 2^63 (x0 being the worst case).
*/
x0 = a0*h[0] + aa0*h[4] + aa1*h[3] + aa2*h[2] + aa3*h[1];
x1 = a0*h[1] + a1*h[0] + aa1*h[4] + aa2*h[3] + aa3*h[2];
x2 = a0*h[2] + a1*h[1] + a2*h[0] + aa2*h[4] + aa3*h[3];
x3 = a0*h[3] + a1*h[2] + a2*h[1] + a3*h[0] + aa3*h[4];
x4 = (a0 & 3)*h[4]; /** < 2^34 **/
/** Clear upper half of x3 **/
x4 += x3 >> 32;
x3 &= UINT32_MAX;
/** Clear the 62 most significant bits of x4 and
* create carry for x0 **/
carry = (x4 >> 2)*5; /** < 2^35 **/
x4 &= 3;
/** Reduce x0 to 32 bits and store into h0 **/
x0 += carry;
h[0] = x0 & UINT32_MAX;
carry = x0 >> 32;
/** Reduce x1 to 32 bits and store into h1 **/
x1 += carry;
h[1] = x1 & UINT32_MAX;
carry = x1 >> 32;
/** Reduce x2 to 32 bits and store into h2 **/
x2 += carry;
h[2] = x2 & UINT32_MAX;
carry = x2 >> 32;
/** Reduce x3 to 32 bits and store into h3 **/
x3 += carry;
h[3] = x3 & UINT32_MAX;
carry = x3 >> 32; /** < 1 **/
/** Reduce x4 to 32 bits and store into h4 **/
x4 += carry; /** < 2^3 **/
assert(x4 < 8);
h[4] = (uint32_t)x4;
}
/*
* Reduce a value h[] modulo 2^130-5.
*
* @param[in,out] h: The 5-word array with the value to reduce (little-endian).
* The result is stored back here and it is guaranteed to
* be smaller than 2^130- 5.
* The incoming value h must be smaller than 2^131.
*/
STATIC void poly1305_reduce(uint32_t h[5])
{
unsigned i;
assert(h[4]<8);
for (i=0; i<2; i++) {
uint32_t mask, carry;
uint32_t g[5];
/** Compute h+(-p) by adding and removing 2^130 **/
g[0] = h[0] + 5; carry = g[0] < h[0];
g[1] = h[1] + carry; carry = g[1] < h[1];
g[2] = h[2] + carry; carry = g[2] < h[2];
g[3] = h[3] + carry; carry = g[3] < h[3];
g[4] = h[4] + carry - 4;
mask = (g[4] >> 31) - 1U; /** All 1s if g[] is a valid reduction **/
h[0] = (h[0] & ~mask) ^ (g[0] & mask);
h[1] = (h[1] & ~mask) ^ (g[1] & mask);
h[2] = (h[2] & ~mask) ^ (g[2] & mask);
h[3] = (h[3] & ~mask) ^ (g[3] & mask);
h[4] = (h[4] & ~mask) ^ (g[4] & mask);
}
}
/**
* Add two values.
*
* It must be assured that the sum does not exceed 2^160.
*
* @param[in,out] h: The 5-word variable to accumulate into (little-endian).
* @param[in] m: The other 5-word term to add (little-endian).
*/
STATIC void poly1305_accumulate(uint32_t h[5], const uint32_t m[5])
{
#if 0
// 128-bit type exist and little-endian
uint32_t carry;
__uint128_t a, b, c;
memcpy(&a, h, 16);
memcpy(&b, m, 16);
c = a + b; carry = c < a;
memcpy(h, &c, 16);
h[4] += m[4] + carry;
#else
uint8_t carry;
uint64_t tmp;
h[0] += m[0];
carry = h[0] < m[0];
tmp = (uint64_t)h[1] + m[1] + carry;
h[1] = (uint32_t) tmp;
carry = (tmp >> 32) & 1;
tmp = (uint64_t)h[2] + m[2] + carry;
h[2] = (uint32_t) tmp;
carry = (tmp >> 32) & 1;
tmp = (uint64_t)h[3] + m[3] + carry;
h[3] = (uint32_t) tmp;
carry = (tmp >> 32) & 1;
tmp = (uint64_t)h[4] + m[4] + carry;
h[4] = (uint32_t) tmp;
assert((tmp >> 32) == 0);
#endif
}
/**
* Process the next chunk of the message.
*
* This procedure performs the following operation (assuming that msg is 16 byte long):
*
* h = r * (h + (2^128 + little_endian_int(msg))) quasi-modulo 2^130-5
*
* Quasi-modulo means that the computations are performed modulo 2^130-5 but the
* result is still only guaranteed to be smaller than 2^131.
*
* @param[in,out] h: The 5-word variable to accumulate into.
* In input, it must be smaller than 2^131.
* In output, it is guranteed to remain smaller than 2^131.
* @param[in] r: The 4-word array with the multiplier, as generated by
* poly1305_load_r()
* @param[in] rr: The 4-word array with the other value generated by
* poly1305__load_r() for the same multipler.
* @param[in] data: The next chunk of message, at most 16 bytes. It is
* smaller than 16 only if it is the last chunk.
* @param[in] len: The length of chunk (<=16)
*/
static void poly1305_process(uint32_t h[5], uint32_t r[4], uint32_t rr[4], uint8_t msg[], size_t len)
{
uint32_t m[5];
if (len == 0)
return;
poly1305_load_m(m, msg, len);
poly1305_accumulate(h, m); /** We add two values that don't exceed 2^131, so
* this addition will not overflow 2^160.
*/
poly1305_multiply(h, r, rr);
}
/*
* Terminate processing of the message and create the final MAC tag.
*
* @param[in,out] h: The 5-word variable where the resulting MAC must be put into,
* truncated to 128 bits.
* In input, it contains the value the polynomial has been evaluated at,
* without the fixed term. The input is smaller than 2^131.
* @param[in] s: The 5-word value s, that is, the fixed term of the
* polynomial, as created by poly1305_load_s().
*/
static void poly1305_finalize(uint32_t h[5], const uint32_t s[5])
{
poly1305_reduce(h);
poly1305_accumulate(h, s);
h[4] = 0; /** modulo 2**128 **/
}
/* --------------------------------------------------------- */
EXPORT_SYM int poly1305_init(mac_state **pState,
const uint8_t r[16],
size_t r_len,
const uint8_t s[16],
size_t s_len)
{
mac_state *ms;
if (NULL == pState || NULL == r || NULL == s)
return ERR_NULL;
if (r_len != 16 || s_len != 16)
return ERR_KEY_SIZE;
*pState = ms = (mac_state*) calloc(1, sizeof(mac_state));
if (NULL == ms)
return ERR_MEMORY;
poly1305_load_r(ms->r, ms->rr, r);
poly1305_load_s(ms->s, s);
return 0;
}
EXPORT_SYM int poly1305_destroy(mac_state *state)
{
if (NULL == state)
return ERR_NULL;
free(state);
return 0;
}
EXPORT_SYM int poly1305_update(mac_state *state,
const uint8_t *in,
size_t len)
{
if (NULL == state || NULL == in)
return ERR_NULL;
while (len>0) {
unsigned btc;
btc = (unsigned)MIN(len, 16 - state->buffer_used);
memcpy(state->buffer + state->buffer_used, in, btc);
state->buffer_used += btc;
in += btc;
len -= btc;
if (state->buffer_used == 16) {
poly1305_process(state->h, state->r, state->rr, state->buffer, 16);
state->buffer_used = 0;
}
}
return 0;
}
EXPORT_SYM int poly1305_digest(const mac_state *state,
uint8_t digest[16],
size_t len)
{
mac_state temp;
unsigned i;
if (NULL == state || NULL == digest) {
return ERR_NULL;
}
if (len != 16)
return ERR_DIGEST_SIZE;
temp = *state;
if (temp.buffer_used > 0) {
poly1305_process(temp.h, temp.r, temp.rr, temp.buffer, temp.buffer_used);
}
poly1305_finalize(temp.h, temp.s);
for (i=0; i<4; i++) {
STORE_U32_LITTLE(digest+i*4, temp.h[i]);
}
return 0;
}
#ifdef PROFILE
int main(void)
{
const unsigned data_size = 1024*1024;
mac_state *state;
const uint8_t r[16] = "1234567890123456";
const uint8_t s[16] = "1234567890123456";
uint8_t *data;
data = malloc(data_size);
for (int i=0; i<data_size; i++) {
data[i] = (uint8_t) i;
}
poly1305_init(&state, r, 16, s, 16);
for (int i=0; i<1024; i++)
poly1305_update(state, data, 1024*1024);
poly1305_destroy(state);
free(data);
}
#endif