/* kyber-common.c - the Kyber key encapsulation mechanism (common part) * Copyright (C) 2024 g10 Code GmbH * * This file was modified for use by Libgcrypt. * * This file is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * This file is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see . * SPDX-License-Identifier: LGPL-2.1-or-later * * You can also use this file under the same licence of original code. * SPDX-License-Identifier: CC0 OR Apache-2.0 * */ /* Original code from: Repository: https://github.com/pq-crystals/kyber.git Branch: standard Commit: 11d00ff1f20cfca1f72d819e5a45165c1e0a2816 Licence: Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/); or Apache 2.0 License (https://www.apache.org/licenses/LICENSE-2.0.html). Authors: Joppe Bos Léo Ducas Eike Kiltz Tancrède Lepoint Vadim Lyubashevsky John Schanck Peter Schwabe Gregor Seiler Damien Stehlé Kyber Home: https://www.pq-crystals.org/kyber/ */ /* * From original code, following modification was made. * * - C++ style comments are changed to C-style. * * - Functions "poly_cbd_eta1" "poly_cbd_eta2" are removed. * * - Constant "zeta" is static, not available outside. * * - "poly_compress" and "poly_decompress" are now two variants _128 * and _160. * * - "poly_getnoise_eta1" is now two variants _2 and _3_4. * * - "poly_getnoise_eta2" directly uses "cbd2" function. */ /*************** kyber/ref/cbd.c */ /************************************************* * Name: load32_littleendian * * Description: load 4 bytes into a 32-bit integer * in little-endian order * * Arguments: - const uint8_t *x: pointer to input byte array * * Returns 32-bit unsigned integer loaded from x **************************************************/ static uint32_t load32_littleendian(const uint8_t x[4]) { uint32_t r; r = (uint32_t)x[0]; r |= (uint32_t)x[1] << 8; r |= (uint32_t)x[2] << 16; r |= (uint32_t)x[3] << 24; return r; } /************************************************* * Name: load24_littleendian * * Description: load 3 bytes into a 32-bit integer * in little-endian order. * This function is only needed for Kyber-512 * * Arguments: - const uint8_t *x: pointer to input byte array * * Returns 32-bit unsigned integer loaded from x (most significant byte is zero) **************************************************/ #if !defined(KYBER_K) || KYBER_K == 2 static uint32_t load24_littleendian(const uint8_t x[3]) { uint32_t r; r = (uint32_t)x[0]; r |= (uint32_t)x[1] << 8; r |= (uint32_t)x[2] << 16; return r; } #endif /************************************************* * Name: cbd2 * * Description: Given an array of uniformly random bytes, compute * polynomial with coefficients distributed according to * a centered binomial distribution with parameter eta=2 * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *buf: pointer to input byte array **************************************************/ static void cbd2(poly *r, const uint8_t buf[2*KYBER_N/4]) { unsigned int i,j; uint32_t t,d; int16_t a,b; for(i=0;i>1) & 0x55555555; for(j=0;j<8;j++) { a = (d >> (4*j+0)) & 0x3; b = (d >> (4*j+2)) & 0x3; r->coeffs[8*i+j] = a - b; } } } /************************************************* * Name: cbd3 * * Description: Given an array of uniformly random bytes, compute * polynomial with coefficients distributed according to * a centered binomial distribution with parameter eta=3. * This function is only needed for Kyber-512 * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *buf: pointer to input byte array **************************************************/ #if !defined(KYBER_K) || KYBER_K == 2 static void cbd3(poly *r, const uint8_t buf[3*KYBER_N/4]) { unsigned int i,j; uint32_t t,d; int16_t a,b; for(i=0;i>1) & 0x00249249; d += (t>>2) & 0x00249249; for(j=0;j<4;j++) { a = (d >> (6*j+0)) & 0x7; b = (d >> (6*j+3)) & 0x7; r->coeffs[4*i+j] = a - b; } } } #endif /*************** kyber/ref/indcpa.c */ /************************************************* * Name: rej_uniform * * Description: Run rejection sampling on uniform random bytes to generate * uniform random integers mod q * * Arguments: - int16_t *r: pointer to output buffer * - unsigned int len: requested number of 16-bit integers (uniform mod q) * - const uint8_t *buf: pointer to input buffer (assumed to be uniformly random bytes) * - unsigned int buflen: length of input buffer in bytes * * Returns number of sampled 16-bit integers (at most len) **************************************************/ static unsigned int rej_uniform(int16_t *r, unsigned int len, const uint8_t *buf, unsigned int buflen) { unsigned int ctr, pos; uint16_t val0, val1; ctr = pos = 0; while(ctr < len && pos + 3 <= buflen) { val0 = ((buf[pos+0] >> 0) | ((uint16_t)buf[pos+1] << 8)) & 0xFFF; val1 = ((buf[pos+1] >> 4) | ((uint16_t)buf[pos+2] << 4)) & 0xFFF; pos += 3; if(val0 < KYBER_Q) r[ctr++] = val0; if(ctr < len && val1 < KYBER_Q) r[ctr++] = val1; } return ctr; } /*************** kyber/ref/ntt.c */ /* Code to generate zetas and zetas_inv used in the number-theoretic transform: #define KYBER_ROOT_OF_UNITY 17 static const uint8_t tree[128] = { 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120, 4, 68, 36, 100, 20, 84, 52, 116, 12, 76, 44, 108, 28, 92, 60, 124, 2, 66, 34, 98, 18, 82, 50, 114, 10, 74, 42, 106, 26, 90, 58, 122, 6, 70, 38, 102, 22, 86, 54, 118, 14, 78, 46, 110, 30, 94, 62, 126, 1, 65, 33, 97, 17, 81, 49, 113, 9, 73, 41, 105, 25, 89, 57, 121, 5, 69, 37, 101, 21, 85, 53, 117, 13, 77, 45, 109, 29, 93, 61, 125, 3, 67, 35, 99, 19, 83, 51, 115, 11, 75, 43, 107, 27, 91, 59, 123, 7, 71, 39, 103, 23, 87, 55, 119, 15, 79, 47, 111, 31, 95, 63, 127 }; void init_ntt() { unsigned int i; int16_t tmp[128]; tmp[0] = MONT; for(i=1;i<128;i++) tmp[i] = fqmul(tmp[i-1],MONT*KYBER_ROOT_OF_UNITY % KYBER_Q); for(i=0;i<128;i++) { zetas[i] = tmp[tree[i]]; if(zetas[i] > KYBER_Q/2) zetas[i] -= KYBER_Q; if(zetas[i] < -KYBER_Q/2) zetas[i] += KYBER_Q; } } */ static const int16_t zetas[128] = { -1044, -758, -359, -1517, 1493, 1422, 287, 202, -171, 622, 1577, 182, 962, -1202, -1474, 1468, 573, -1325, 264, 383, -829, 1458, -1602, -130, -681, 1017, 732, 608, -1542, 411, -205, -1571, 1223, 652, -552, 1015, -1293, 1491, -282, -1544, 516, -8, -320, -666, -1618, -1162, 126, 1469, -853, -90, -271, 830, 107, -1421, -247, -951, -398, 961, -1508, -725, 448, -1065, 677, -1275, -1103, 430, 555, 843, -1251, 871, 1550, 105, 422, 587, 177, -235, -291, -460, 1574, 1653, -246, 778, 1159, -147, -777, 1483, -602, 1119, -1590, 644, -872, 349, 418, 329, -156, -75, 817, 1097, 603, 610, 1322, -1285, -1465, 384, -1215, -136, 1218, -1335, -874, 220, -1187, -1659, -1185, -1530, -1278, 794, -1510, -854, -870, 478, -108, -308, 996, 991, 958, -1460, 1522, 1628 }; /************************************************* * Name: fqmul * * Description: Multiplication followed by Montgomery reduction * * Arguments: - int16_t a: first factor * - int16_t b: second factor * * Returns 16-bit integer congruent to a*b*R^{-1} mod q **************************************************/ static int16_t fqmul(int16_t a, int16_t b) { return montgomery_reduce((int32_t)a*b); } /************************************************* * Name: ntt * * Description: Inplace number-theoretic transform (NTT) in Rq. * input is in standard order, output is in bitreversed order * * Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq **************************************************/ void ntt(int16_t r[256]) { unsigned int len, start, j, k; int16_t t, zeta; k = 1; for(len = 128; len >= 2; len >>= 1) { for(start = 0; start < 256; start = j + len) { zeta = zetas[k++]; for(j = start; j < start + len; j++) { t = fqmul(zeta, r[j + len]); r[j + len] = r[j] - t; r[j] = r[j] + t; } } } } /************************************************* * Name: invntt_tomont * * Description: Inplace inverse number-theoretic transform in Rq and * multiplication by Montgomery factor 2^16. * Input is in bitreversed order, output is in standard order * * Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq **************************************************/ void invntt(int16_t r[256]) { unsigned int start, len, j, k; int16_t t, zeta; const int16_t f = 1441; /* mont^2/128 */ k = 127; for(len = 2; len <= 128; len <<= 1) { for(start = 0; start < 256; start = j + len) { zeta = zetas[k--]; for(j = start; j < start + len; j++) { t = r[j]; r[j] = barrett_reduce(t + r[j + len]); r[j + len] = r[j + len] - t; r[j + len] = fqmul(zeta, r[j + len]); } } } for(j = 0; j < 256; j++) r[j] = fqmul(r[j], f); } /************************************************* * Name: basemul * * Description: Multiplication of polynomials in Zq[X]/(X^2-zeta) * used for multiplication of elements in Rq in NTT domain * * Arguments: - int16_t r[2]: pointer to the output polynomial * - const int16_t a[2]: pointer to the first factor * - const int16_t b[2]: pointer to the second factor * - int16_t zeta: integer defining the reduction polynomial **************************************************/ void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta) { r[0] = fqmul(a[1], b[1]); r[0] = fqmul(r[0], zeta); r[0] += fqmul(a[0], b[0]); r[1] = fqmul(a[0], b[1]); r[1] += fqmul(a[1], b[0]); } /*************** kyber/ref/poly.c */ /************************************************* * Name: poly_compress * * Description: Compression and subsequent serialization of a polynomial * * Arguments: - uint8_t *r: pointer to output byte array * (of length KYBER_POLYCOMPRESSEDBYTES) * - const poly *a: pointer to input polynomial **************************************************/ #if !defined(KYBER_K) || KYBER_K == 2 || KYBER_K == 3 void poly_compress_128(uint8_t r[KYBER_POLYCOMPRESSEDBYTES_2_3], const poly *a) { unsigned int i,j; int32_t u; uint32_t d0; uint8_t t[8]; for(i=0;icoeffs[8*i+j]; u += (u >> 15) & KYBER_Q; /* t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; */ d0 = u << 4; d0 += 1665; d0 *= 80635; d0 >>= 28; t[j] = d0 & 0xf; } r[0] = t[0] | (t[1] << 4); r[1] = t[2] | (t[3] << 4); r[2] = t[4] | (t[5] << 4); r[3] = t[6] | (t[7] << 4); r += 4; } } #endif #if !defined(KYBER_K) || KYBER_K == 4 void poly_compress_160(uint8_t r[KYBER_POLYCOMPRESSEDBYTES_4], const poly *a) { unsigned int i,j; int32_t u; uint32_t d0; uint8_t t[8]; for(i=0;icoeffs[8*i+j]; u += (u >> 15) & KYBER_Q; /* t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; */ d0 = u << 5; d0 += 1664; d0 *= 40318; d0 >>= 27; t[j] = d0 & 0x1f; } r[0] = (t[0] >> 0) | (t[1] << 5); r[1] = (t[1] >> 3) | (t[2] << 2) | (t[3] << 7); r[2] = (t[3] >> 1) | (t[4] << 4); r[3] = (t[4] >> 4) | (t[5] << 1) | (t[6] << 6); r[4] = (t[6] >> 2) | (t[7] << 3); r += 5; } } #endif /************************************************* * Name: poly_decompress * * Description: De-serialization and subsequent decompression of a polynomial; * approximate inverse of poly_compress * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *a: pointer to input byte array * (of length KYBER_POLYCOMPRESSEDBYTES bytes) **************************************************/ #if !defined(KYBER_K) || KYBER_K == 2 || KYBER_K == 3 void poly_decompress_128(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES_2_3]) { unsigned int i; for(i=0;icoeffs[2*i+0] = (((uint16_t)(a[0] & 15)*KYBER_Q) + 8) >> 4; r->coeffs[2*i+1] = (((uint16_t)(a[0] >> 4)*KYBER_Q) + 8) >> 4; a += 1; } } #endif #if !defined(KYBER_K) || KYBER_K == 4 void poly_decompress_160(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES_4]) { unsigned int i; unsigned int j; uint8_t t[8]; for(i=0;i> 0); t[1] = (a[0] >> 5) | (a[1] << 3); t[2] = (a[1] >> 2); t[3] = (a[1] >> 7) | (a[2] << 1); t[4] = (a[2] >> 4) | (a[3] << 4); t[5] = (a[3] >> 1); t[6] = (a[3] >> 6) | (a[4] << 2); t[7] = (a[4] >> 3); a += 5; for(j=0;j<8;j++) r->coeffs[8*i+j] = ((uint32_t)(t[j] & 31)*KYBER_Q + 16) >> 5; } } #endif /************************************************* * Name: poly_tobytes * * Description: Serialization of a polynomial * * Arguments: - uint8_t *r: pointer to output byte array * (needs space for KYBER_POLYBYTES bytes) * - const poly *a: pointer to input polynomial **************************************************/ void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a) { unsigned int i; uint16_t t0, t1; for(i=0;icoeffs[2*i]; t0 += ((int16_t)t0 >> 15) & KYBER_Q; t1 = a->coeffs[2*i+1]; t1 += ((int16_t)t1 >> 15) & KYBER_Q; r[3*i+0] = (t0 >> 0); r[3*i+1] = (t0 >> 8) | (t1 << 4); r[3*i+2] = (t1 >> 4); } } /************************************************* * Name: poly_frombytes * * Description: De-serialization of a polynomial; * inverse of poly_tobytes * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *a: pointer to input byte array * (of KYBER_POLYBYTES bytes) **************************************************/ void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]) { unsigned int i; for(i=0;icoeffs[2*i] = ((a[3*i+0] >> 0) | ((uint16_t)a[3*i+1] << 8)) & 0xFFF; r->coeffs[2*i+1] = ((a[3*i+1] >> 4) | ((uint16_t)a[3*i+2] << 4)) & 0xFFF; } } /************************************************* * Name: poly_frommsg * * Description: Convert 32-byte message to polynomial * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *msg: pointer to input message **************************************************/ void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]) { unsigned int i,j; int16_t mask; #if (KYBER_INDCPA_MSGBYTES != KYBER_N/8) #error "KYBER_INDCPA_MSGBYTES must be equal to KYBER_N/8 bytes!" #endif for(i=0;i> j)&1); r->coeffs[8*i+j] = mask & ((KYBER_Q+1)/2); } } } /************************************************* * Name: poly_tomsg * * Description: Convert polynomial to 32-byte message * * Arguments: - uint8_t *msg: pointer to output message * - const poly *a: pointer to input polynomial **************************************************/ void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *a) { unsigned int i,j; uint32_t t; for(i=0;icoeffs[8*i+j]; /* t += ((int16_t)t >> 15) & KYBER_Q; */ /* t = (((t << 1) + KYBER_Q/2)/KYBER_Q) & 1; */ t <<= 1; t += 1665; t *= 80635; t >>= 28; t &= 1; msg[i] |= t << j; } } } /************************************************* * Name: poly_getnoise_eta1 * * Description: Sample a polynomial deterministically from a seed and a nonce, * with output polynomial close to centered binomial distribution * with parameter KYBER_ETA1 * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *seed: pointer to input seed * (of length KYBER_SYMBYTES bytes) * - uint8_t nonce: one-byte input nonce **************************************************/ #if !defined(KYBER_K) || KYBER_K == 2 void poly_getnoise_eta1_2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) { uint8_t buf[KYBER_ETA1_2*KYBER_N/4]; prf(buf, sizeof(buf), seed, nonce); cbd3(r, buf); } #endif #if !defined(KYBER_K) || KYBER_K == 3 || KYBER_K == 4 void poly_getnoise_eta1_3_4(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) { uint8_t buf[KYBER_ETA1_3_4*KYBER_N/4]; prf(buf, sizeof(buf), seed, nonce); cbd2(r, buf); } #endif /************************************************* * Name: poly_getnoise_eta2 * * Description: Sample a polynomial deterministically from a seed and a nonce, * with output polynomial close to centered binomial distribution * with parameter KYBER_ETA2 * * Arguments: - poly *r: pointer to output polynomial * - const uint8_t *seed: pointer to input seed * (of length KYBER_SYMBYTES bytes) * - uint8_t nonce: one-byte input nonce **************************************************/ void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce) { uint8_t buf[KYBER_ETA2*KYBER_N/4]; prf(buf, sizeof(buf), seed, nonce); cbd2(r, buf); } /************************************************* * Name: poly_ntt * * Description: Computes negacyclic number-theoretic transform (NTT) of * a polynomial in place; * inputs assumed to be in normal order, output in bitreversed order * * Arguments: - uint16_t *r: pointer to in/output polynomial **************************************************/ void poly_ntt(poly *r) { ntt(r->coeffs); poly_reduce(r); } /************************************************* * Name: poly_invntt_tomont * * Description: Computes inverse of negacyclic number-theoretic transform (NTT) * of a polynomial in place; * inputs assumed to be in bitreversed order, output in normal order * * Arguments: - uint16_t *a: pointer to in/output polynomial **************************************************/ void poly_invntt_tomont(poly *r) { invntt(r->coeffs); } /************************************************* * Name: poly_basemul_montgomery * * Description: Multiplication of two polynomials in NTT domain * * Arguments: - poly *r: pointer to output polynomial * - const poly *a: pointer to first input polynomial * - const poly *b: pointer to second input polynomial **************************************************/ void poly_basemul_montgomery(poly *r, const poly *a, const poly *b) { unsigned int i; for(i=0;icoeffs[4*i], &a->coeffs[4*i], &b->coeffs[4*i], zetas[64+i]); basemul(&r->coeffs[4*i+2], &a->coeffs[4*i+2], &b->coeffs[4*i+2], -zetas[64+i]); } } /************************************************* * Name: poly_tomont * * Description: Inplace conversion of all coefficients of a polynomial * from normal domain to Montgomery domain * * Arguments: - poly *r: pointer to input/output polynomial **************************************************/ void poly_tomont(poly *r) { unsigned int i; const int16_t f = (1ULL << 32) % KYBER_Q; for(i=0;icoeffs[i] = montgomery_reduce((int32_t)r->coeffs[i]*f); } /************************************************* * Name: poly_reduce * * Description: Applies Barrett reduction to all coefficients of a polynomial * for details of the Barrett reduction see comments in reduce.c * * Arguments: - poly *r: pointer to input/output polynomial **************************************************/ void poly_reduce(poly *r) { unsigned int i; for(i=0;icoeffs[i] = barrett_reduce(r->coeffs[i]); } /************************************************* * Name: poly_add * * Description: Add two polynomials; no modular reduction is performed * * Arguments: - poly *r: pointer to output polynomial * - const poly *a: pointer to first input polynomial * - const poly *b: pointer to second input polynomial **************************************************/ void poly_add(poly *r, const poly *a, const poly *b) { unsigned int i; for(i=0;icoeffs[i] = a->coeffs[i] + b->coeffs[i]; } /************************************************* * Name: poly_sub * * Description: Subtract two polynomials; no modular reduction is performed * * Arguments: - poly *r: pointer to output polynomial * - const poly *a: pointer to first input polynomial * - const poly *b: pointer to second input polynomial **************************************************/ void poly_sub(poly *r, const poly *a, const poly *b) { unsigned int i; for(i=0;icoeffs[i] = a->coeffs[i] - b->coeffs[i]; } /*************** kyber/ref/reduce.c */ /************************************************* * Name: montgomery_reduce * * Description: Montgomery reduction; given a 32-bit integer a, computes * 16-bit integer congruent to a * R^-1 mod q, where R=2^16 * * Arguments: - int32_t a: input integer to be reduced; * has to be in {-q2^15,...,q2^15-1} * * Returns: integer in {-q+1,...,q-1} congruent to a * R^-1 modulo q. **************************************************/ int16_t montgomery_reduce(int32_t a) { int16_t t; t = (int16_t)a*QINV; t = (a - (int32_t)t*KYBER_Q) >> 16; return t; } /************************************************* * Name: barrett_reduce * * Description: Barrett reduction; given a 16-bit integer a, computes * centered representative congruent to a mod q in {-(q-1)/2,...,(q-1)/2} * * Arguments: - int16_t a: input integer to be reduced * * Returns: integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q. **************************************************/ int16_t barrett_reduce(int16_t a) { int16_t t; const int16_t v = ((1<<26) + KYBER_Q/2)/KYBER_Q; t = ((int32_t)v*a + (1<<25)) >> 26; t *= KYBER_Q; return a - t; }