Tor  0.4.3.0-alpha-dev
binascii.c
Go to the documentation of this file.
1 /* Copyright (c) 2001, Matej Pfajfar.
2  * Copyright (c) 2001-2004, Roger Dingledine.
3  * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
4  * Copyright (c) 2007-2019, The Tor Project, Inc. */
5 /* See LICENSE for licensing information */
6 
7 /**
8  * \file binascii.c
9  *
10  * \brief Miscellaneous functions for encoding and decoding various things
11  * in base{16,32,64}.
12  */
13 
14 #include "orconfig.h"
15 
16 #include "lib/encoding/binascii.h"
17 #include "lib/log/log.h"
18 #include "lib/log/util_bug.h"
19 #include "lib/cc/torint.h"
21 #include "lib/intmath/muldiv.h"
22 #include "lib/malloc/malloc.h"
23 
24 #include <stddef.h>
25 #include <string.h>
26 #include <stdlib.h>
27 
28 /** Return a pointer to a NUL-terminated hexadecimal string encoding
29  * the first <b>fromlen</b> bytes of <b>from</b>. (fromlen must be <= 32.) The
30  * result does not need to be deallocated, but repeated calls to
31  * hex_str will trash old results.
32  */
33 const char *
34 hex_str(const char *from, size_t fromlen)
35 {
36  static char buf[65];
37  if (fromlen>(sizeof(buf)-1)/2)
38  fromlen = (sizeof(buf)-1)/2;
39  base16_encode(buf,sizeof(buf),from,fromlen);
40  return buf;
41 }
42 
43 /* Return the base32 encoded size in bytes using the source length srclen.
44  *
45  * (WATCH OUT: This API counts the terminating NUL byte, but
46  * base64_encode_size does not.)
47  */
48 size_t
49 base32_encoded_size(size_t srclen)
50 {
51  size_t enclen;
52  tor_assert(srclen < SIZE_T_CEILING / 8);
53  enclen = BASE32_NOPAD_BUFSIZE(srclen);
54  tor_assert(enclen < INT_MAX && enclen > srclen);
55  return enclen;
56 }
57 
58 /** Implements base32 encoding as in RFC 4648. */
59 void
60 base32_encode(char *dest, size_t destlen, const char *src, size_t srclen)
61 {
62  unsigned int i, v, u;
63  size_t nbits = srclen * 8;
64  size_t bit;
65 
66  /* We need enough space for the encoded data and the extra NUL byte. */
67  tor_assert(base32_encoded_size(srclen) <= destlen);
68  tor_assert(destlen < SIZE_T_CEILING);
69 
70  /* Make sure we leave no uninitialized data in the destination buffer. */
71  memset(dest, 0, destlen);
72 
73  for (i=0,bit=0; bit < nbits; ++i, bit+=5) {
74  /* set v to the 16-bit value starting at src[bits/8], 0-padded. */
75  size_t idx = bit / 8;
76  v = ((uint8_t)src[idx]) << 8;
77  if (idx+1 < srclen)
78  v += (uint8_t)src[idx+1];
79  /* set u to the 5-bit value at the bit'th bit of buf. */
80  u = (v >> (11-(bit%8))) & 0x1F;
81  dest[i] = BASE32_CHARS[u];
82  }
83  dest[i] = '\0';
84 }
85 
86 /** Implements base32 decoding as in RFC 4648.
87  * Return the number of bytes decoded if successful; -1 otherwise.
88  */
89 int
90 base32_decode(char *dest, size_t destlen, const char *src, size_t srclen)
91 {
92  /* XXXX we might want to rewrite this along the lines of base64_decode, if
93  * it ever shows up in the profile. */
94  unsigned int i;
95  size_t nbits, j, bit;
96  char *tmp;
97  nbits = ((srclen * 5) / 8) * 8;
98 
99  tor_assert(srclen < SIZE_T_CEILING / 5);
100  tor_assert((nbits/8) <= destlen); /* We need enough space. */
101  tor_assert(destlen < SIZE_T_CEILING);
102 
103  /* Make sure we leave no uninitialized data in the destination buffer. */
104  memset(dest, 0, destlen);
105 
106  /* Convert base32 encoded chars to the 5-bit values that they represent. */
107  tmp = tor_malloc_zero(srclen);
108  for (j = 0; j < srclen; ++j) {
109  if (src[j] > 0x60 && src[j] < 0x7B) tmp[j] = src[j] - 0x61;
110  else if (src[j] > 0x31 && src[j] < 0x38) tmp[j] = src[j] - 0x18;
111  else if (src[j] > 0x40 && src[j] < 0x5B) tmp[j] = src[j] - 0x41;
112  else {
113  log_warn(LD_GENERAL, "illegal character in base32 encoded string");
114  tor_free(tmp);
115  return -1;
116  }
117  }
118 
119  /* Assemble result byte-wise by applying five possible cases. */
120  for (i = 0, bit = 0; bit < nbits; ++i, bit += 8) {
121  switch (bit % 40) {
122  case 0:
123  dest[i] = (((uint8_t)tmp[(bit/5)]) << 3) +
124  (((uint8_t)tmp[(bit/5)+1]) >> 2);
125  break;
126  case 8:
127  dest[i] = (((uint8_t)tmp[(bit/5)]) << 6) +
128  (((uint8_t)tmp[(bit/5)+1]) << 1) +
129  (((uint8_t)tmp[(bit/5)+2]) >> 4);
130  break;
131  case 16:
132  dest[i] = (((uint8_t)tmp[(bit/5)]) << 4) +
133  (((uint8_t)tmp[(bit/5)+1]) >> 1);
134  break;
135  case 24:
136  dest[i] = (((uint8_t)tmp[(bit/5)]) << 7) +
137  (((uint8_t)tmp[(bit/5)+1]) << 2) +
138  (((uint8_t)tmp[(bit/5)+2]) >> 3);
139  break;
140  case 32:
141  dest[i] = (((uint8_t)tmp[(bit/5)]) << 5) +
142  ((uint8_t)tmp[(bit/5)+1]);
143  break;
144  }
145  }
146 
147  memset(tmp, 0, srclen); /* on the heap, this should be safe */
148  tor_free(tmp);
149  tmp = NULL;
150  return i;
151 }
152 
153 #define BASE64_OPENSSL_LINELEN 64
154 
155 /** Return the Base64 encoded size of <b>srclen</b> bytes of data in
156  * bytes.
157  *
158  * (WATCH OUT: This API <em>does not</em> count the terminating NUL byte,
159  * but base32_encoded_size does.)
160  *
161  * If <b>flags</b>&amp;BASE64_ENCODE_MULTILINE is true, return the size
162  * of the encoded output as multiline output (64 character, `\n' terminated
163  * lines).
164  */
165 size_t
166 base64_encode_size(size_t srclen, int flags)
167 {
168  size_t enclen;
169 
170  /* Use INT_MAX for overflow checking because base64_encode() returns int. */
171  tor_assert(srclen < INT_MAX);
172  tor_assert(CEIL_DIV(srclen, 3) < INT_MAX / 4);
173 
174  enclen = BASE64_LEN(srclen);
175  if (flags & BASE64_ENCODE_MULTILINE)
176  enclen += CEIL_DIV(enclen, BASE64_OPENSSL_LINELEN);
177 
178  tor_assert(enclen < INT_MAX && (enclen == 0 || enclen > srclen));
179  return enclen;
180 }
181 
182 /** Return an upper bound on the number of bytes that might be needed to hold
183  * the data from decoding the base64 string <b>srclen</b>. This is only an
184  * upper bound, since some part of the base64 string might be padding or
185  * space. */
186 size_t
187 base64_decode_maxsize(size_t srclen)
188 {
189  tor_assert(srclen < INT_MAX / 3);
190 
191  return CEIL_DIV(srclen * 3, 4);
192 }
193 
194 /** Internal table mapping 6 bit values to the Base64 alphabet. */
195 static const char base64_encode_table[64] = {
196  'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
197  'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
198  'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
199  'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
200  'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
201  'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
202  'w', 'x', 'y', 'z', '0', '1', '2', '3',
203  '4', '5', '6', '7', '8', '9', '+', '/'
204 };
205 
206 /** Base64 encode <b>srclen</b> bytes of data from <b>src</b>. Write
207  * the result into <b>dest</b>, if it will fit within <b>destlen</b>
208  * bytes. Return the number of bytes written on success; -1 if
209  * destlen is too short, or other failure.
210  *
211  * If <b>flags</b>&amp;BASE64_ENCODE_MULTILINE is true, return encoded
212  * output in multiline format (64 character, `\n' terminated lines).
213  */
214 int
215 base64_encode(char *dest, size_t destlen, const char *src, size_t srclen,
216  int flags)
217 {
218  const unsigned char *usrc = (unsigned char *)src;
219  const unsigned char *eous = usrc + srclen;
220  char *d = dest;
221  uint32_t n = 0;
222  size_t linelen = 0;
223  size_t enclen;
224  int n_idx = 0;
225 
226  if (!src || !dest)
227  return -1;
228 
229  /* Ensure that there is sufficient space, including the NUL. */
230  enclen = base64_encode_size(srclen, flags);
231  if (destlen < enclen + 1)
232  return -1;
233  if (destlen > SIZE_T_CEILING)
234  return -1;
235  if (enclen > INT_MAX)
236  return -1;
237 
238  /* Make sure we leave no uninitialized data in the destination buffer. */
239  memset(dest, 0, destlen);
240 
241  /* XXX/Yawning: If this ends up being too slow, this can be sped up
242  * by separating the multiline format case and the normal case, and
243  * processing 48 bytes of input at a time when newlines are desired.
244  */
245 #define ENCODE_CHAR(ch) \
246  STMT_BEGIN \
247  *d++ = ch; \
248  if (flags & BASE64_ENCODE_MULTILINE) { \
249  if (++linelen % BASE64_OPENSSL_LINELEN == 0) { \
250  linelen = 0; \
251  *d++ = '\n'; \
252  } \
253  } \
254  STMT_END
255 
256 #define ENCODE_N(idx) \
257  ENCODE_CHAR(base64_encode_table[(n >> ((3 - idx) * 6)) & 0x3f])
258 
259 #define ENCODE_PAD() ENCODE_CHAR('=')
260 
261  /* Iterate over all the bytes in src. Each one will add 8 bits to the
262  * value we're encoding. Accumulate bits in <b>n</b>, and whenever we
263  * have 24 bits, batch them into 4 bytes and flush those bytes to dest.
264  */
265  for ( ; usrc < eous; ++usrc) {
266  n = (n << 8) | *usrc;
267  if ((++n_idx) == 3) {
268  ENCODE_N(0);
269  ENCODE_N(1);
270  ENCODE_N(2);
271  ENCODE_N(3);
272  n_idx = 0;
273  n = 0;
274  }
275  }
276  switch (n_idx) {
277  case 0:
278  /* 0 leftover bits, no pading to add. */
279  break;
280  case 1:
281  /* 8 leftover bits, pad to 12 bits, write the 2 6-bit values followed
282  * by 2 padding characters.
283  */
284  n <<= 4;
285  ENCODE_N(2);
286  ENCODE_N(3);
287  ENCODE_PAD();
288  ENCODE_PAD();
289  break;
290  case 2:
291  /* 16 leftover bits, pad to 18 bits, write the 3 6-bit values followed
292  * by 1 padding character.
293  */
294  n <<= 2;
295  ENCODE_N(1);
296  ENCODE_N(2);
297  ENCODE_N(3);
298  ENCODE_PAD();
299  break;
300  // LCOV_EXCL_START -- we can't reach this point, because we enforce
301  // 0 <= ncov_idx < 3 in the loop above.
302  default:
303  /* Something went catastrophically wrong. */
305  return -1;
306  // LCOV_EXCL_STOP
307  }
308 
309 #undef ENCODE_N
310 #undef ENCODE_PAD
311 #undef ENCODE_CHAR
312 
313  /* Multiline output always includes at least one newline. */
314  if (flags & BASE64_ENCODE_MULTILINE && linelen != 0)
315  *d++ = '\n';
316 
317  tor_assert(d - dest == (ptrdiff_t)enclen);
318 
319  *d++ = '\0'; /* NUL terminate the output. */
320 
321  return (int) enclen;
322 }
323 
324 /** As base64_encode, but do not add any internal spaces, and remove external
325  * padding from the output stream.
326  * dest must be at least base64_encode_size(srclen, 0), including space for
327  * the removed external padding. */
328 int
329 base64_encode_nopad(char *dest, size_t destlen,
330  const uint8_t *src, size_t srclen)
331 {
332  int n = base64_encode(dest, destlen, (const char*) src, srclen, 0);
333  if (n <= 0)
334  return n;
335  tor_assert((size_t)n < destlen && dest[n] == 0);
336  char *in, *out;
337  in = out = dest;
338  while (*in) {
339  if (*in == '=' || *in == '\n') {
340  ++in;
341  } else {
342  *out++ = *in++;
343  }
344  }
345  *out = 0;
346 
347  tor_assert(out - dest <= INT_MAX);
348 
349  return (int)(out - dest);
350 }
351 
352 #undef BASE64_OPENSSL_LINELEN
353 
354 /** @{ */
355 /** Special values used for the base64_decode_table */
356 #define X 255
357 #define SP 64
358 #define PAD 65
359 /** @} */
360 /** Internal table mapping byte values to what they represent in base64.
361  * Numbers 0..63 are 6-bit integers. SPs are spaces, and should be
362  * skipped. Xs are invalid and must not appear in base64. PAD indicates
363  * end-of-string. */
364 static const uint8_t base64_decode_table[256] = {
365  X, X, X, X, X, X, X, X, X, SP, SP, SP, X, SP, X, X, /* */
366  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
367  SP, X, X, X, X, X, X, X, X, X, X, 62, X, X, X, 63,
368  52, 53, 54, 55, 56, 57, 58, 59, 60, 61, X, X, X, PAD, X, X,
369  X, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
370  15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, X, X, X, X, X,
371  X, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
372  41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, X, X, X, X, X,
373  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
374  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
375  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
376  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
377  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
378  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
379  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
380  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
381 };
382 
383 /** Base64 decode <b>srclen</b> bytes of data from <b>src</b>. Write
384  * the result into <b>dest</b>, if it will fit within <b>destlen</b>
385  * bytes. Return the number of bytes written on success; -1 if
386  * destlen is too short, or other failure.
387  *
388  * NOTE 1: destlen is checked conservatively, as though srclen contained no
389  * spaces or padding.
390  *
391  * NOTE 2: This implementation does not check for the correct number of
392  * padding "=" characters at the end of the string, and does not check
393  * for internal padding characters.
394  */
395 int
396 base64_decode(char *dest, size_t destlen, const char *src, size_t srclen)
397 {
398  const char *eos = src+srclen;
399  uint32_t n=0;
400  int n_idx=0;
401  size_t di = 0;
402 
403  if (destlen > INT_MAX)
404  return -1;
405 
406  /* Make sure we leave no uninitialized data in the destination buffer. */
407  memset(dest, 0, destlen);
408 
409  /* Iterate over all the bytes in src. Each one will add 0 or 6 bits to the
410  * value we're decoding. Accumulate bits in <b>n</b>, and whenever we have
411  * 24 bits, batch them into 3 bytes and flush those bytes to dest.
412  */
413  for ( ; src < eos; ++src) {
414  unsigned char c = (unsigned char) *src;
415  uint8_t v = base64_decode_table[c];
416  switch (v) {
417  case X:
418  /* This character isn't allowed in base64. */
419  return -1;
420  case SP:
421  /* This character is whitespace, and has no effect. */
422  continue;
423  case PAD:
424  /* We've hit an = character: the data is over. */
425  goto end_of_loop;
426  default:
427  /* We have an actual 6-bit value. Append it to the bits in n. */
428  n = (n<<6) | v;
429  if ((++n_idx) == 4) {
430  /* We've accumulated 24 bits in n. Flush them. */
431  if (destlen < 3 || di > destlen - 3)
432  return -1;
433  dest[di++] = (n>>16);
434  dest[di++] = (n>>8) & 0xff;
435  dest[di++] = (n) & 0xff;
436  n_idx = 0;
437  n = 0;
438  }
439  }
440  }
441  end_of_loop:
442  /* If we have leftover bits, we need to cope. */
443  switch (n_idx) {
444  case 0:
445  default:
446  /* No leftover bits. We win. */
447  break;
448  case 1:
449  /* 6 leftover bits. That's invalid; we can't form a byte out of that. */
450  return -1;
451  case 2:
452  /* 12 leftover bits: The last 4 are padding and the first 8 are data. */
453  if (destlen < 1 || di > destlen - 1)
454  return -1;
455  dest[di++] = n >> 4;
456  break;
457  case 3:
458  /* 18 leftover bits: The last 2 are padding and the first 16 are data. */
459  if (destlen < 2 || di > destlen - 2)
460  return -1;
461  dest[di++] = n >> 10;
462  dest[di++] = n >> 2;
463  }
464 
465  tor_assert(di <= destlen);
466 
467  return (int)di;
468 }
469 #undef X
470 #undef SP
471 #undef PAD
472 
473 /** Encode the <b>srclen</b> bytes at <b>src</b> in a NUL-terminated,
474  * uppercase hexadecimal string; store it in the <b>destlen</b>-byte buffer
475  * <b>dest</b>.
476  */
477 void
478 base16_encode(char *dest, size_t destlen, const char *src, size_t srclen)
479 {
480  const char *end;
481  char *cp;
482 
483  tor_assert(srclen < SIZE_T_CEILING / 2 - 1);
484  tor_assert(destlen >= BASE16_BUFSIZE(srclen));
485  tor_assert(destlen < SIZE_T_CEILING);
486 
487  /* Make sure we leave no uninitialized data in the destination buffer. */
488  memset(dest, 0, destlen);
489 
490  cp = dest;
491  end = src+srclen;
492  while (src<end) {
493  *cp++ = "0123456789ABCDEF"[ (*(const uint8_t*)src) >> 4 ];
494  *cp++ = "0123456789ABCDEF"[ (*(const uint8_t*)src) & 0xf ];
495  ++src;
496  }
497  *cp = '\0';
498 }
499 
500 /** Given a hexadecimal string of <b>srclen</b> bytes in <b>src</b>, decode
501  * it and store the result in the <b>destlen</b>-byte buffer at <b>dest</b>.
502  * Return the number of bytes decoded on success, -1 on failure. If
503  * <b>destlen</b> is greater than INT_MAX or less than half of
504  * <b>srclen</b>, -1 is returned. */
505 int
506 base16_decode(char *dest, size_t destlen, const char *src, size_t srclen)
507 {
508  const char *end;
509  char *dest_orig = dest;
510  int v1,v2;
511 
512  if ((srclen % 2) != 0)
513  return -1;
514  if (destlen < srclen/2 || destlen > INT_MAX)
515  return -1;
516 
517  /* Make sure we leave no uninitialized data in the destination buffer. */
518  memset(dest, 0, destlen);
519 
520  end = src+srclen;
521  while (src<end) {
522  v1 = hex_decode_digit(*src);
523  v2 = hex_decode_digit(*(src+1));
524  if (v1<0||v2<0)
525  return -1;
526  *(uint8_t*)dest = (v1<<4)|v2;
527  ++dest;
528  src+=2;
529  }
530 
531  tor_assert((dest-dest_orig) <= (ptrdiff_t) destlen);
532 
533  return (int) (dest-dest_orig);
534 }
size_t base64_decode_maxsize(size_t srclen)
Definition: binascii.c:187
static int hex_decode_digit(char c)
Definition: compat_ctype.h:43
#define LD_GENERAL
Definition: log.h:62
size_t base64_encode_size(size_t srclen, int flags)
Definition: binascii.c:166
#define tor_assert(expr)
Definition: util_bug.h:102
void base32_encode(char *dest, size_t destlen, const char *src, size_t srclen)
Definition: binascii.c:60
static const uint8_t base64_decode_table[256]
Definition: binascii.c:364
#define tor_free(p)
Definition: malloc.h:52
#define tor_fragile_assert()
Definition: util_bug.h:246
Integer definitions used throughout Tor.
Headers for util_malloc.c.
#define X
Definition: binascii.c:356
int base64_encode(char *dest, size_t destlen, const char *src, size_t srclen, int flags)
Definition: binascii.c:215
int base32_decode(char *dest, size_t destlen, const char *src, size_t srclen)
Definition: binascii.c:90
#define SIZE_T_CEILING
Definition: torint.h:126
static const char base64_encode_table[64]
Definition: binascii.c:195
const char * hex_str(const char *from, size_t fromlen)
Definition: binascii.c:34
Header for binascii.c.
void base16_encode(char *dest, size_t destlen, const char *src, size_t srclen)
Definition: binascii.c:478
#define BASE64_LEN(n)
Definition: binascii.h:28
Header for muldiv.c.
#define BASE32_CHARS
Definition: binascii.h:53
int base64_encode_nopad(char *dest, size_t destlen, const uint8_t *src, size_t srclen)
Definition: binascii.c:329
int base64_decode(char *dest, size_t destlen, const char *src, size_t srclen)
Definition: binascii.c:396
Locale-independent character-type inspection (header)
int base16_decode(char *dest, size_t destlen, const char *src, size_t srclen)
Definition: binascii.c:506
Headers for log.c.
Macros to manage assertions, fatal and non-fatal.