Tor  0.4.7.0-alpha-dev
keccak-tiny-unrolled.c
1 /** libkeccak-tiny
2  *
3  * A single-file implementation of SHA-3 and SHAKE.
4  *
5  * Implementor: David Leon Gil
6  * License: CC0, attribution kindly requested. Blame taken too,
7  * but not liability.
8  */
9 #include "keccak-tiny.h"
10 
11 #include <string.h>
13 #include "byteorder.h"
14 
15 /******** Endianness conversion helpers ********/
16 
17 static inline uint64_t
18 loadu64le(const unsigned char *x) {
19  uint64_t r;
20  memcpy(&r, x, sizeof(r));
21  return _le64toh(r);
22 }
23 
24 static inline void
25 storeu64le(uint8_t *x, uint64_t u) {
26  uint64_t val = _le64toh(u);
27  memcpy(x, &val, sizeof(u));
28 }
29 
30 /******** The Keccak-f[1600] permutation ********/
31 
32 /*** Constants. ***/
33 static const uint8_t rho[24] = \
34  { 1, 3, 6, 10, 15, 21,
35  28, 36, 45, 55, 2, 14,
36  27, 41, 56, 8, 25, 43,
37  62, 18, 39, 61, 20, 44};
38 static const uint8_t pi[24] = \
39  {10, 7, 11, 17, 18, 3,
40  5, 16, 8, 21, 24, 4,
41  15, 23, 19, 13, 12, 2,
42  20, 14, 22, 9, 6, 1};
43 static const uint64_t RC[24] = \
44  {1ULL, 0x8082ULL, 0x800000000000808aULL, 0x8000000080008000ULL,
45  0x808bULL, 0x80000001ULL, 0x8000000080008081ULL, 0x8000000000008009ULL,
46  0x8aULL, 0x88ULL, 0x80008009ULL, 0x8000000aULL,
47  0x8000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, 0x8000000000008003ULL,
48  0x8000000000008002ULL, 0x8000000000000080ULL, 0x800aULL, 0x800000008000000aULL,
49  0x8000000080008081ULL, 0x8000000000008080ULL, 0x80000001ULL, 0x8000000080008008ULL};
50 
51 /*** Helper macros to unroll the permutation. ***/
52 #define rol(x, s) (((x) << s) | ((x) >> (64 - s)))
53 #define REPEAT6(e) e e e e e e
54 #define REPEAT24(e) REPEAT6(e e e e)
55 #define REPEAT5(e) e e e e e
56 #define FOR5(v, s, e) \
57  v = 0; \
58  REPEAT5(e; v += s;)
59 
60 /*** Keccak-f[1600] ***/
61 static inline void keccakf(void* state) {
62  uint64_t* a = (uint64_t*)state;
63  uint64_t b[5] = {0};
64  uint64_t t = 0;
65  uint8_t x, y, i = 0;
66 
67  REPEAT24(
68  // Theta
69  FOR5(x, 1,
70  b[x] = 0;
71  FOR5(y, 5,
72  b[x] ^= a[x + y]; ))
73  FOR5(x, 1,
74  FOR5(y, 5,
75  a[y + x] ^= b[(x + 4) % 5] ^ rol(b[(x + 1) % 5], 1); ))
76  // Rho and pi
77  t = a[1];
78  x = 0;
79  REPEAT24(b[0] = a[pi[x]];
80  a[pi[x]] = rol(t, rho[x]);
81  t = b[0];
82  x++; )
83  // Chi
84  FOR5(y,
85  5,
86  FOR5(x, 1,
87  b[x] = a[y + x];)
88  FOR5(x, 1,
89  a[y + x] = b[x] ^ ((~b[(x + 1) % 5]) & b[(x + 2) % 5]); ))
90  // Iota
91  a[0] ^= RC[i];
92  i++; )
93 }
94 
95 /******** The FIPS202-defined functions. ********/
96 
97 /*** Some helper macros. ***/
98 
99 // `xorin` modified to handle Big Endian systems, `buf` being unaligned on
100 // systems that care about such things. Assumes that len is a multiple of 8,
101 // which is always true for the rates we use, and the modified finalize.
102 static inline void
103 xorin8(uint8_t *dst, const uint8_t *src, size_t len) {
104  uint64_t* a = (uint64_t*)dst; // Always aligned.
105  for (size_t i = 0; i < len; i += 8) {
106  a[i/8] ^= loadu64le(src + i);
107  }
108 }
109 
110 // `setout` likewise modified to handle Big Endian systems. Assumes that len
111 // is a multiple of 8, which is true for every rate we use.
112 static inline void
113 setout8(const uint8_t *src, uint8_t *dst, size_t len) {
114  const uint64_t *si = (const uint64_t*)src; // Always aligned.
115  for (size_t i = 0; i < len; i+= 8) {
116  storeu64le(dst+i, si[i/8]);
117  }
118 }
119 
120 #define P keccakf
121 #define Plen KECCAK_MAX_RATE
122 
123 #define KECCAK_DELIM_DIGEST 0x06
124 #define KECCAK_DELIM_XOF 0x1f
125 
126 // Fold P*F over the full blocks of an input.
127 #define foldP(I, L, F) \
128  while (L >= s->rate) { \
129  F(s->a, I, s->rate); \
130  P(s->a); \
131  I += s->rate; \
132  L -= s->rate; \
133  }
134 
135 static inline void
136 keccak_absorb_blocks(keccak_state *s, const uint8_t *buf, size_t nr_blocks)
137 {
138  size_t blen = nr_blocks * s->rate;
139  foldP(buf, blen, xorin8);
140 }
141 
142 static int
143 keccak_update(keccak_state *s, const uint8_t *buf, size_t len)
144 {
145  if (s->finalized)
146  return -1;
147  if ((buf == NULL) && len != 0)
148  return -1;
149 
150  size_t remaining = len;
151  while (remaining > 0) {
152  if (s->offset == 0) {
153  const size_t blocks = remaining / s->rate;
154  size_t direct_bytes = blocks * s->rate;
155  if (direct_bytes > 0) {
156  keccak_absorb_blocks(s, buf, blocks);
157  remaining -= direct_bytes;
158  buf += direct_bytes;
159  }
160  }
161 
162  const size_t buf_avail = s->rate - s->offset;
163  const size_t buf_bytes = (buf_avail > remaining) ? remaining : buf_avail;
164  if (buf_bytes > 0) {
165  memcpy(&s->block[s->offset], buf, buf_bytes);
166  s->offset += buf_bytes;
167  remaining -= buf_bytes;
168  buf += buf_bytes;
169  }
170  if (s->offset == s->rate) {
171  keccak_absorb_blocks(s, s->block, 1);
172  s->offset = 0;
173  }
174  }
175  return 0;
176 }
177 
178 static void
179 keccak_finalize(keccak_state *s)
180 {
181  // Xor in the DS and pad frame.
182  s->block[s->offset++] = s->delim; // DS.
183  for (size_t i = s->offset; i < s->rate; i++) {
184  s->block[i] = 0;
185  }
186  s->block[s->rate - 1] |= 0x80; // Pad frame.
187 
188  // Xor in the last block.
189  xorin8(s->a, s->block, s->rate);
190 
191  memwipe(s->block, 0, sizeof(s->block));
192  s->finalized = 1;
193  s->offset = s->rate;
194 }
195 
196 static inline void
197 keccak_squeeze_blocks(keccak_state *s, uint8_t *out, size_t nr_blocks)
198 {
199  for (size_t n = 0; n < nr_blocks; n++) {
200  keccakf(s->a);
201  setout8(s->a, out, s->rate);
202  out += s->rate;
203  }
204 }
205 
206 static int
207 keccak_squeeze(keccak_state *s, uint8_t *out, size_t outlen)
208 {
209  if (!s->finalized)
210  return -1;
211 
212  size_t remaining = outlen;
213  while (remaining > 0) {
214  if (s->offset == s->rate) {
215  const size_t blocks = remaining / s->rate;
216  const size_t direct_bytes = blocks * s->rate;
217  if (blocks > 0) {
218  keccak_squeeze_blocks(s, out, blocks);
219  out += direct_bytes;
220  remaining -= direct_bytes;
221  }
222 
223  if (remaining > 0) {
224  keccak_squeeze_blocks(s, s->block, 1);
225  s->offset = 0;
226  }
227  }
228 
229  const size_t buf_bytes = s->rate - s->offset;
230  const size_t indirect_bytes = (buf_bytes > remaining) ? remaining : buf_bytes;
231  if (indirect_bytes > 0) {
232  memcpy(out, &s->block[s->offset], indirect_bytes);
233  out += indirect_bytes;
234  s->offset += indirect_bytes;
235  remaining -= indirect_bytes;
236  }
237  }
238  return 0;
239 }
240 
241 int
242 keccak_digest_init(keccak_state *s, size_t bits)
243 {
244  if (s == NULL)
245  return -1;
246  if (bits != 224 && bits != 256 && bits != 384 && bits != 512)
247  return -1;
248 
249  keccak_cleanse(s);
250  s->rate = KECCAK_RATE(bits);
251  s->delim = KECCAK_DELIM_DIGEST;
252  return 0;
253 }
254 
255 int
256 keccak_digest_update(keccak_state *s, const uint8_t *buf, size_t len)
257 {
258  if (s == NULL)
259  return -1;
260  if (s->delim != KECCAK_DELIM_DIGEST)
261  return -1;
262 
263  return keccak_update(s, buf, len);
264 }
265 
266 int
267 keccak_digest_sum(const keccak_state *s, uint8_t *out, size_t outlen)
268 {
269  if (s == NULL)
270  return -1;
271  if (s->delim != KECCAK_DELIM_DIGEST)
272  return -1;
273  if (out == NULL || outlen > 4 * (KECCAK_MAX_RATE - s->rate) / 8)
274  return -1;
275 
276  // Work in a copy so that incremental/rolling hashes are easy.
277  keccak_state s_tmp;
278  keccak_clone(&s_tmp, s);
279  keccak_finalize(&s_tmp);
280  int ret = keccak_squeeze(&s_tmp, out, outlen);
281  keccak_cleanse(&s_tmp);
282  return ret;
283 }
284 
285 int
286 keccak_xof_init(keccak_state *s, size_t bits)
287 {
288  if (s == NULL)
289  return -1;
290  if (bits != 128 && bits != 256)
291  return -1;
292 
293  keccak_cleanse(s);
294  s->rate = KECCAK_RATE(bits);
295  s->delim = KECCAK_DELIM_XOF;
296  return 0;
297 }
298 
299 int
300 keccak_xof_absorb(keccak_state *s, const uint8_t *buf, size_t len)
301 {
302  if (s == NULL)
303  return -1;
304  if (s->delim != KECCAK_DELIM_XOF)
305  return -1;
306 
307  return keccak_update(s, buf, len);
308 }
309 
310 int
311 keccak_xof_squeeze(keccak_state *s, uint8_t *out, size_t outlen)
312 {
313  if (s == NULL)
314  return -1;
315  if (s->delim != KECCAK_DELIM_XOF)
316  return -1;
317 
318  if (!s->finalized)
319  keccak_finalize(s);
320 
321  return keccak_squeeze(s, out, outlen);
322 }
323 
324 void
325 keccak_clone(keccak_state *out, const keccak_state *in)
326 {
327  memcpy(out, in, sizeof(keccak_state));
328 }
329 
330 void
331 keccak_cleanse(keccak_state *s)
332 {
333  memwipe(s, 0, sizeof(keccak_state));
334 }
335 
336 /** The sponge-based hash construction. **/
337 static inline int hash(uint8_t* out, size_t outlen,
338  const uint8_t* in, size_t inlen,
339  size_t bits, uint8_t delim) {
340  if ((out == NULL) || ((in == NULL) && inlen != 0)) {
341  return -1;
342  }
343 
344  int ret = 0;
345  keccak_state s;
346  keccak_cleanse(&s);
347 
348  switch (delim) {
349  case KECCAK_DELIM_DIGEST:
350  ret |= keccak_digest_init(&s, bits);
351  ret |= keccak_digest_update(&s, in, inlen);
352  // Use the internal API instead of sum to avoid the memcpy.
353  keccak_finalize(&s);
354  ret |= keccak_squeeze(&s, out, outlen);
355  break;
356  case KECCAK_DELIM_XOF:
357  ret |= keccak_xof_init(&s, bits);
358  ret |= keccak_xof_absorb(&s, in, inlen);
359  ret |= keccak_xof_squeeze(&s, out, outlen);
360  break;
361  default:
362  return -1;
363  }
364  keccak_cleanse(&s);
365  return ret;
366 }
367 
368 /*** Helper macros to define SHA3 and SHAKE instances. ***/
369 #define defshake(bits) \
370  int shake##bits(uint8_t* out, size_t outlen, \
371  const uint8_t* in, size_t inlen) { \
372  return hash(out, outlen, in, inlen, bits, KECCAK_DELIM_XOF); \
373  }
374 #define defsha3(bits) \
375  int sha3_##bits(uint8_t* out, size_t outlen, \
376  const uint8_t* in, size_t inlen) { \
377  if (outlen > (bits/8)) { \
378  return -1; \
379  } \
380  return hash(out, outlen, in, inlen, bits, KECCAK_DELIM_DIGEST); \
381  }
382 
383 /*** FIPS202 SHAKE VOFs ***/
384 defshake(128)
385 defshake(256)
386 
387 /*** FIPS202 SHA3 FOFs ***/
388 defsha3(224)
389 defsha3(256)
390 defsha3(384)
391 defsha3(512)
void memwipe(void *mem, uint8_t byte, size_t sz)
Definition: crypto_util.c:55
Common functions for cryptographic routines.