Line data Source code
1 : /* Copyright (c) 2001, Matej Pfajfar.
2 : * Copyright (c) 2001-2004, Roger Dingledine.
3 : * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
4 : * Copyright (c) 2007-2021, The Tor Project, Inc. */
5 : /* See LICENSE for licensing information */
6 :
7 : /**
8 : * \file binascii.c
9 : *
10 : * \brief Miscellaneous functions for encoding and decoding various things
11 : * in base{16,32,64}.
12 : */
13 :
14 : #include "orconfig.h"
15 :
16 : #include "lib/encoding/binascii.h"
17 : #include "lib/log/log.h"
18 : #include "lib/log/util_bug.h"
19 : #include "lib/cc/torint.h"
20 : #include "lib/string/compat_ctype.h"
21 : #include "lib/intmath/muldiv.h"
22 : #include "lib/malloc/malloc.h"
23 :
24 : #include <stddef.h>
25 : #include <string.h>
26 : #include <stdlib.h>
27 :
28 : /** Return a pointer to a NUL-terminated hexadecimal string encoding
29 : * the first <b>fromlen</b> bytes of <b>from</b>. (fromlen must be \<= 32.) The
30 : * result does not need to be deallocated, but repeated calls to
31 : * hex_str will trash old results.
32 : */
33 : const char *
34 9651 : hex_str(const char *from, size_t fromlen)
35 : {
36 9651 : static char buf[65];
37 9651 : if (fromlen>(sizeof(buf)-1)/2)
38 : fromlen = (sizeof(buf)-1)/2;
39 9651 : base16_encode(buf,sizeof(buf),from,fromlen);
40 9651 : return buf;
41 : }
42 :
43 : /* Return the base32 encoded size in bytes using the source length srclen.
44 : *
45 : * (WATCH OUT: This API counts the terminating NUL byte, but
46 : * base64_encode_size does not.)
47 : */
48 : size_t
49 28167 : base32_encoded_size(size_t srclen)
50 : {
51 28167 : size_t enclen;
52 28167 : tor_assert(srclen < SIZE_T_CEILING / 8);
53 28167 : enclen = BASE32_NOPAD_BUFSIZE(srclen);
54 28167 : tor_assert(enclen < INT_MAX && enclen > srclen);
55 28167 : return enclen;
56 : }
57 :
58 : /** Implements base32 encoding as in RFC 4648. */
59 : void
60 27908 : base32_encode(char *dest, size_t destlen, const char *src, size_t srclen)
61 : {
62 27908 : unsigned int i, v, u;
63 27908 : size_t nbits = srclen * 8;
64 27908 : size_t bit;
65 :
66 : /* We need enough space for the encoded data and the extra NUL byte. */
67 27908 : tor_assert(base32_encoded_size(srclen) <= destlen);
68 27908 : tor_assert(destlen < SIZE_T_CEILING);
69 :
70 : /* Make sure we leave no uninitialized data in the destination buffer. */
71 27908 : memset(dest, 0, destlen);
72 :
73 450642 : for (i=0,bit=0; bit < nbits; ++i, bit+=5) {
74 : /* set v to the 16-bit value starting at src[bits/8], 0-padded. */
75 422734 : size_t idx = bit / 8;
76 422734 : v = ((uint8_t)src[idx]) << 8;
77 422734 : if (idx+1 < srclen)
78 394669 : v += (uint8_t)src[idx+1];
79 : /* set u to the 5-bit value at the bit'th bit of buf. */
80 422734 : u = (v >> (11-(bit%8))) & 0x1F;
81 422734 : dest[i] = BASE32_CHARS[u];
82 : }
83 27908 : dest[i] = '\0';
84 27908 : }
85 :
86 : /** Implements base32 decoding as in RFC 4648.
87 : * Return the number of bytes decoded if successful; -1 otherwise.
88 : */
89 : int
90 177 : base32_decode(char *dest, size_t destlen, const char *src, size_t srclen)
91 : {
92 : /* XXXX we might want to rewrite this along the lines of base64_decode, if
93 : * it ever shows up in the profile. */
94 177 : unsigned int i;
95 177 : size_t nbits, j, bit;
96 177 : char *tmp;
97 177 : nbits = ((srclen * 5) / 8) * 8;
98 :
99 177 : tor_assert(srclen < SIZE_T_CEILING / 5);
100 177 : tor_assert((nbits/8) <= destlen); /* We need enough space. */
101 177 : tor_assert(destlen < SIZE_T_CEILING);
102 :
103 : /* Make sure we leave no uninitialized data in the destination buffer. */
104 177 : memset(dest, 0, destlen);
105 :
106 : /* Convert base32 encoded chars to the 5-bit values that they represent. */
107 177 : tmp = tor_malloc_zero(srclen);
108 10019 : for (j = 0; j < srclen; ++j) {
109 9669 : if (src[j] > 0x60 && src[j] < 0x7B) tmp[j] = src[j] - 0x61;
110 1934 : else if (src[j] > 0x31 && src[j] < 0x38) tmp[j] = src[j] - 0x18;
111 198 : else if (src[j] > 0x40 && src[j] < 0x5B) tmp[j] = src[j] - 0x41;
112 : else {
113 4 : log_warn(LD_GENERAL, "illegal character in base32 encoded string");
114 4 : tor_free(tmp);
115 4 : return -1;
116 : }
117 : }
118 :
119 : /* Assemble result byte-wise by applying five possible cases. */
120 6206 : for (i = 0, bit = 0; bit < nbits; ++i, bit += 8) {
121 6033 : switch (bit % 40) {
122 1216 : case 0:
123 1216 : dest[i] = (((uint8_t)tmp[(bit/5)]) << 3) +
124 1216 : (((uint8_t)tmp[(bit/5)+1]) >> 2);
125 1216 : break;
126 1216 : case 8:
127 1216 : dest[i] = (((uint8_t)tmp[(bit/5)]) << 6) +
128 1216 : (((uint8_t)tmp[(bit/5)+1]) << 1) +
129 1216 : (((uint8_t)tmp[(bit/5)+2]) >> 4);
130 1216 : break;
131 1201 : case 16:
132 1201 : dest[i] = (((uint8_t)tmp[(bit/5)]) << 4) +
133 1201 : (((uint8_t)tmp[(bit/5)+1]) >> 1);
134 1201 : break;
135 1200 : case 24:
136 1200 : dest[i] = (((uint8_t)tmp[(bit/5)]) << 7) +
137 1200 : (((uint8_t)tmp[(bit/5)+1]) << 2) +
138 1200 : (((uint8_t)tmp[(bit/5)+2]) >> 3);
139 1200 : break;
140 1200 : case 32:
141 1200 : dest[i] = (((uint8_t)tmp[(bit/5)]) << 5) +
142 1200 : ((uint8_t)tmp[(bit/5)+1]);
143 1200 : break;
144 : }
145 : }
146 :
147 173 : memset(tmp, 0, srclen); /* on the heap, this should be safe */
148 173 : tor_free(tmp);
149 173 : tmp = NULL;
150 173 : return i;
151 : }
152 :
153 : #define BASE64_OPENSSL_LINELEN 64
154 :
155 : /** Return the Base64 encoded size of <b>srclen</b> bytes of data in
156 : * bytes.
157 : *
158 : * (WATCH OUT: This API <em>does not</em> count the terminating NUL byte,
159 : * but base32_encoded_size does.)
160 : *
161 : * If <b>flags</b>&BASE64_ENCODE_MULTILINE is true, return the size
162 : * of the encoded output as multiline output (64 character, `\n' terminated
163 : * lines).
164 : */
165 : size_t
166 13920 : base64_encode_size(size_t srclen, int flags)
167 : {
168 13920 : size_t enclen;
169 :
170 : /* Use INT_MAX for overflow checking because base64_encode() returns int. */
171 13920 : tor_assert(srclen < INT_MAX);
172 13920 : tor_assert(CEIL_DIV(srclen, 3) < INT_MAX / 4);
173 :
174 13920 : enclen = BASE64_LEN(srclen);
175 13920 : if (flags & BASE64_ENCODE_MULTILINE)
176 2468 : enclen += CEIL_DIV(enclen, BASE64_OPENSSL_LINELEN);
177 :
178 13920 : tor_assert(enclen < INT_MAX && (enclen == 0 || enclen > srclen));
179 13920 : return enclen;
180 : }
181 :
182 : /** Return an upper bound on the number of bytes that might be needed to hold
183 : * the data from decoding the base64 string <b>srclen</b>. This is only an
184 : * upper bound, since some part of the base64 string might be padding or
185 : * space. */
186 : size_t
187 8717 : base64_decode_maxsize(size_t srclen)
188 : {
189 8717 : tor_assert(srclen < INT_MAX / 3);
190 :
191 8717 : return CEIL_DIV(srclen * 3, 4);
192 : }
193 :
194 : /** Internal table mapping 6 bit values to the Base64 alphabet. */
195 : static const char base64_encode_table[64] = {
196 : 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
197 : 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
198 : 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
199 : 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
200 : 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
201 : 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
202 : 'w', 'x', 'y', 'z', '0', '1', '2', '3',
203 : '4', '5', '6', '7', '8', '9', '+', '/'
204 : };
205 :
206 : /** Base64 encode <b>srclen</b> bytes of data from <b>src</b>. Write
207 : * the result into <b>dest</b>, if it will fit within <b>destlen</b>
208 : * bytes. Return the number of bytes written on success; -1 if
209 : * destlen is too short, or other failure.
210 : *
211 : * If <b>flags</b>&BASE64_ENCODE_MULTILINE is true, return encoded
212 : * output in multiline format (64 character, `\n' terminated lines).
213 : */
214 : int
215 12382 : base64_encode(char *dest, size_t destlen, const char *src, size_t srclen,
216 : int flags)
217 : {
218 12382 : const unsigned char *usrc = (unsigned char *)src;
219 12382 : const unsigned char *eous = usrc + srclen;
220 12382 : char *d = dest;
221 12382 : uint32_t n = 0;
222 12382 : size_t linelen = 0;
223 12382 : size_t enclen;
224 12382 : int n_idx = 0;
225 :
226 12382 : if (!src || !dest)
227 : return -1;
228 :
229 : /* Ensure that there is sufficient space, including the NUL. */
230 12380 : enclen = base64_encode_size(srclen, flags);
231 12380 : if (destlen < enclen + 1)
232 : return -1;
233 12379 : if (destlen > SIZE_T_CEILING)
234 : return -1;
235 12377 : if (enclen > INT_MAX)
236 : return -1;
237 :
238 : /* Make sure we leave no uninitialized data in the destination buffer. */
239 12377 : memset(dest, 0, destlen);
240 :
241 : /* XXX/Yawning: If this ends up being too slow, this can be sped up
242 : * by separating the multiline format case and the normal case, and
243 : * processing 48 bytes of input at a time when newlines are desired.
244 : */
245 : #define ENCODE_CHAR(ch) \
246 : STMT_BEGIN \
247 : *d++ = ch; \
248 : if (flags & BASE64_ENCODE_MULTILINE) { \
249 : if (++linelen % BASE64_OPENSSL_LINELEN == 0) { \
250 : linelen = 0; \
251 : *d++ = '\n'; \
252 : } \
253 : } \
254 : STMT_END
255 :
256 : #define ENCODE_N(idx) \
257 : ENCODE_CHAR(base64_encode_table[(n >> ((3 - idx) * 6)) & 0x3f])
258 :
259 : #define ENCODE_PAD() ENCODE_CHAR('=')
260 :
261 : /* Iterate over all the bytes in src. Each one will add 8 bits to the
262 : * value we're encoding. Accumulate bits in <b>n</b>, and whenever we
263 : * have 24 bits, batch them into 4 bytes and flush those bytes to dest.
264 : */
265 1057389 : for ( ; usrc < eous; ++usrc) {
266 1045012 : n = (n << 8) | *usrc;
267 1045012 : if ((++n_idx) == 3) {
268 341222 : ENCODE_N(0);
269 341222 : ENCODE_N(1);
270 341222 : ENCODE_N(2);
271 341222 : ENCODE_N(3);
272 : n_idx = 0;
273 : n = 0;
274 : }
275 : }
276 12377 : switch (n_idx) {
277 : case 0:
278 : /* 0 leftover bits, no padding to add. */
279 : break;
280 2630 : case 1:
281 : /* 8 leftover bits, pad to 12 bits, write the 2 6-bit values followed
282 : * by 2 padding characters.
283 : */
284 2630 : n <<= 4;
285 2630 : ENCODE_N(2);
286 2630 : ENCODE_N(3);
287 2630 : ENCODE_PAD();
288 2630 : ENCODE_PAD();
289 : break;
290 9358 : case 2:
291 : /* 16 leftover bits, pad to 18 bits, write the 3 6-bit values followed
292 : * by 1 padding character.
293 : */
294 9358 : n <<= 2;
295 9358 : ENCODE_N(1);
296 9358 : ENCODE_N(2);
297 9358 : ENCODE_N(3);
298 9358 : ENCODE_PAD();
299 : break;
300 : // LCOV_EXCL_START -- we can't reach this point, because we enforce
301 : // 0 <= ncov_idx < 3 in the loop above.
302 : default:
303 : /* Something went catastrophically wrong. */
304 : tor_fragile_assert();
305 : return -1;
306 : // LCOV_EXCL_STOP
307 : }
308 :
309 : #undef ENCODE_N
310 : #undef ENCODE_PAD
311 : #undef ENCODE_CHAR
312 :
313 : /* Multiline output always includes at least one newline. */
314 12377 : if (flags & BASE64_ENCODE_MULTILINE && linelen != 0)
315 1259 : *d++ = '\n';
316 :
317 12377 : tor_assert(d - dest == (ptrdiff_t)enclen);
318 :
319 12377 : *d++ = '\0'; /* NUL terminate the output. */
320 :
321 12377 : return (int) enclen;
322 : }
323 :
324 : /** As base64_encode, but do not add any internal spaces, and remove external
325 : * padding from the output stream.
326 : * dest must be at least base64_encode_size(srclen, 0), including space for
327 : * the removed external padding. */
328 : int
329 4294 : base64_encode_nopad(char *dest, size_t destlen,
330 : const uint8_t *src, size_t srclen)
331 : {
332 4294 : int n = base64_encode(dest, destlen, (const char*) src, srclen, 0);
333 4294 : if (n <= 0)
334 : return n;
335 4293 : tor_assert((size_t)n < destlen && dest[n] == 0);
336 : char *in, *out;
337 : in = out = dest;
338 113409 : while (*in) {
339 109116 : if (*in == '=' || *in == '\n') {
340 6428 : ++in;
341 : } else {
342 102688 : *out++ = *in++;
343 : }
344 : }
345 4293 : *out = 0;
346 :
347 4293 : tor_assert(out - dest <= INT_MAX);
348 :
349 4293 : return (int)(out - dest);
350 : }
351 :
352 : #undef BASE64_OPENSSL_LINELEN
353 :
354 : /** @{ */
355 : /** Special values used for the base64_decode_table */
356 : #define X 255
357 : #define SP 64
358 : #define PAD 65
359 : /** @} */
360 : /** Internal table mapping byte values to what they represent in base64.
361 : * Numbers 0..63 are 6-bit integers. SPs are spaces, and should be
362 : * skipped. Xs are invalid and must not appear in base64. PAD indicates
363 : * end-of-string. */
364 : static const uint8_t base64_decode_table[256] = {
365 : X, X, X, X, X, X, X, X, X, SP, SP, SP, X, SP, X, X, /* */
366 : X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
367 : SP, X, X, X, X, X, X, X, X, X, X, 62, X, X, X, 63,
368 : 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, X, X, X, PAD, X, X,
369 : X, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
370 : 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, X, X, X, X, X,
371 : X, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
372 : 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, X, X, X, X, X,
373 : X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
374 : X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
375 : X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
376 : X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
377 : X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
378 : X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
379 : X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
380 : X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
381 : };
382 :
383 : /** Base64 decode <b>srclen</b> bytes of data from <b>src</b>. Write
384 : * the result into <b>dest</b>, if it will fit within <b>destlen</b>
385 : * bytes. Return the number of bytes written on success; -1 if
386 : * destlen is too short, or other failure.
387 : *
388 : * NOTE 1: destlen is checked conservatively, as though srclen contained no
389 : * spaces or padding.
390 : *
391 : * NOTE 2: This implementation does not check for the correct number of
392 : * padding "=" characters at the end of the string, and does not check
393 : * for internal padding characters.
394 : */
395 : int
396 58369 : base64_decode(char *dest, size_t destlen, const char *src, size_t srclen)
397 : {
398 58369 : const char *eos = src+srclen;
399 58369 : uint32_t n=0;
400 58369 : int n_idx=0;
401 58369 : size_t di = 0;
402 :
403 58369 : if (destlen > INT_MAX)
404 : return -1;
405 :
406 : /* Make sure we leave no uninitialized data in the destination buffer. */
407 58369 : memset(dest, 0, destlen);
408 :
409 : /* Iterate over all the bytes in src. Each one will add 0 or 6 bits to the
410 : * value we're decoding. Accumulate bits in <b>n</b>, and whenever we have
411 : * 24 bits, batch them into 3 bytes and flush those bytes to dest.
412 : */
413 44698149 : for ( ; src < eos; ++src) {
414 44672429 : unsigned char c = (unsigned char) *src;
415 44672429 : uint8_t v = base64_decode_table[c];
416 44672429 : switch (v) {
417 : case X:
418 : /* This character isn't allowed in base64. */
419 : return -1;
420 690392 : case SP:
421 : /* This character is whitespace, and has no effect. */
422 690392 : continue;
423 32130 : case PAD:
424 : /* We've hit an = character: the data is over. */
425 32130 : goto end_of_loop;
426 43949392 : default:
427 : /* We have an actual 6-bit value. Append it to the bits in n. */
428 43949392 : n = (n<<6) | v;
429 43949392 : if ((++n_idx) == 4) {
430 : /* We've accumulated 24 bits in n. Flush them. */
431 10962061 : if (destlen < 3 || di > destlen - 3)
432 : return -1;
433 10962057 : dest[di++] = (n>>16);
434 10962057 : dest[di++] = (n>>8) & 0xff;
435 10962057 : dest[di++] = (n) & 0xff;
436 10962057 : n_idx = 0;
437 10962057 : n = 0;
438 : }
439 : }
440 : }
441 25720 : end_of_loop:
442 : /* If we have leftover bits, we need to cope. */
443 57850 : switch (n_idx) {
444 : case 0:
445 : default:
446 : /* No leftover bits. We win. */
447 : break;
448 : case 1:
449 : /* 6 leftover bits. That's invalid; we can't form a byte out of that. */
450 : return -1;
451 19915 : case 2:
452 : /* 12 leftover bits: The last 4 are padding and the first 8 are data. */
453 19915 : if (destlen < 1 || di > destlen - 1)
454 : return -1;
455 19915 : dest[di++] = n >> 4;
456 19915 : break;
457 20189 : case 3:
458 : /* 18 leftover bits: The last 2 are padding and the first 16 are data. */
459 20189 : if (destlen < 2 || di > destlen - 2)
460 : return -1;
461 19879 : dest[di++] = n >> 10;
462 19879 : dest[di++] = n >> 2;
463 : }
464 :
465 57485 : tor_assert(di <= destlen);
466 :
467 57485 : return (int)di;
468 : }
469 : #undef X
470 : #undef SP
471 : #undef PAD
472 :
473 : /** Encode the <b>srclen</b> bytes at <b>src</b> in a NUL-terminated,
474 : * uppercase hexadecimal string; store it in the <b>destlen</b>-byte buffer
475 : * <b>dest</b>.
476 : */
477 : void
478 24081 : base16_encode(char *dest, size_t destlen, const char *src, size_t srclen)
479 : {
480 24081 : const char *end;
481 24081 : char *cp;
482 :
483 24081 : tor_assert(srclen < SIZE_T_CEILING / 2 - 1);
484 24081 : tor_assert(destlen >= BASE16_BUFSIZE(srclen));
485 24081 : tor_assert(destlen < SIZE_T_CEILING);
486 :
487 : /* Make sure we leave no uninitialized data in the destination buffer. */
488 24081 : memset(dest, 0, destlen);
489 :
490 24081 : cp = dest;
491 24081 : end = src+srclen;
492 522690 : while (src<end) {
493 498609 : *cp++ = "0123456789ABCDEF"[ (*(const uint8_t*)src) >> 4 ];
494 498609 : *cp++ = "0123456789ABCDEF"[ (*(const uint8_t*)src) & 0xf ];
495 498609 : ++src;
496 : }
497 24081 : *cp = '\0';
498 24081 : }
499 :
500 : /** Given a hexadecimal string of <b>srclen</b> bytes in <b>src</b>, decode
501 : * it and store the result in the <b>destlen</b>-byte buffer at <b>dest</b>.
502 : * Return the number of bytes decoded on success, -1 on failure. If
503 : * <b>destlen</b> is greater than INT_MAX or less than half of
504 : * <b>srclen</b>, -1 is returned. */
505 : int
506 58877 : base16_decode(char *dest, size_t destlen, const char *src, size_t srclen)
507 : {
508 58877 : const char *end;
509 58877 : char *dest_orig = dest;
510 58877 : int v1,v2;
511 :
512 58877 : if ((srclen % 2) != 0)
513 : return -1;
514 58868 : if (destlen < srclen/2 || destlen > INT_MAX)
515 : return -1;
516 :
517 : /* Make sure we leave no uninitialized data in the destination buffer. */
518 58862 : memset(dest, 0, destlen);
519 :
520 58862 : end = src+srclen;
521 1253035 : while (src<end) {
522 1194745 : v1 = hex_decode_digit(*src);
523 1194745 : v2 = hex_decode_digit(*(src+1));
524 1194745 : if (v1<0||v2<0)
525 : return -1;
526 1194173 : *(uint8_t*)dest = (v1<<4)|v2;
527 1194173 : ++dest;
528 1194173 : src+=2;
529 : }
530 :
531 58290 : tor_assert((dest-dest_orig) <= (ptrdiff_t) destlen);
532 :
533 58290 : return (int) (dest-dest_orig);
534 : }
|