tor  0.4.1.0-alpha-dev
util_string.c
Go to the documentation of this file.
1 /* Copyright (c) 2003-2004, Roger Dingledine
2  * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
3  * Copyright (c) 2007-2019, The Tor Project, Inc. */
4 /* See LICENSE for licensing information */
5 
11 #include "lib/string/util_string.h"
13 #include "lib/err/torerr.h"
14 #include "lib/ctime/di_ops.h"
15 #include "lib/defs/digest_sizes.h"
16 
17 #include <string.h>
18 #include <stdlib.h>
19 
28 const void *
29 tor_memmem(const void *_haystack, size_t hlen,
30  const void *_needle, size_t nlen)
31 {
32 #if defined(HAVE_MEMMEM) && (!defined(__GNUC__) || __GNUC__ >= 2)
33  raw_assert(nlen);
34  return memmem(_haystack, hlen, _needle, nlen);
35 #else
36  /* This isn't as fast as the GLIBC implementation, but it doesn't need to
37  * be. */
38  const char *p, *last_possible_start;
39  const char *haystack = (const char*)_haystack;
40  const char *needle = (const char*)_needle;
41  char first;
42  raw_assert(nlen);
43 
44  if (nlen > hlen)
45  return NULL;
46 
47  p = haystack;
48  /* Last position at which the needle could start. */
49  last_possible_start = haystack + hlen - nlen;
50  first = *(const char*)needle;
51  while ((p = memchr(p, first, last_possible_start + 1 - p))) {
52  if (fast_memeq(p, needle, nlen))
53  return p;
54  if (++p > last_possible_start) {
55  /* This comparison shouldn't be necessary, since if p was previously
56  * equal to last_possible_start, the next memchr call would be
57  * "memchr(p, first, 0)", which will return NULL. But it clarifies the
58  * logic. */
59  return NULL;
60  }
61  }
62  return NULL;
63 #endif /* defined(HAVE_MEMMEM) && (!defined(__GNUC__) || __GNUC__ >= 2) */
64 }
65 
66 const void *
67 tor_memstr(const void *haystack, size_t hlen, const char *needle)
68 {
69  return tor_memmem(haystack, hlen, needle, strlen(needle));
70 }
71 
73 int
74 tor_mem_is_zero(const char *mem, size_t len)
75 {
76  static const char ZERO[] = {
77  0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
78  };
79  while (len >= sizeof(ZERO)) {
80  /* It's safe to use fast_memcmp here, since the very worst thing an
81  * attacker could learn is how many initial bytes of a secret were zero */
82  if (fast_memcmp(mem, ZERO, sizeof(ZERO)))
83  return 0;
84  len -= sizeof(ZERO);
85  mem += sizeof(ZERO);
86  }
87  /* Deal with leftover bytes. */
88  if (len)
89  return fast_memeq(mem, ZERO, len);
90 
91  return 1;
92 }
93 
95 int
96 tor_digest_is_zero(const char *digest)
97 {
98  static const uint8_t ZERO_DIGEST[] = {
99  0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0
100  };
101  return tor_memeq(digest, ZERO_DIGEST, DIGEST_LEN);
102 }
103 
105 int
106 tor_digest256_is_zero(const char *digest)
107 {
108  return tor_mem_is_zero(digest, DIGEST256_LEN);
109 }
110 
113 void
114 tor_strstrip(char *s, const char *strip)
115 {
116  char *readp = s;
117  while (*readp) {
118  if (strchr(strip, *readp)) {
119  ++readp;
120  } else {
121  *s++ = *readp++;
122  }
123  }
124  *s = '\0';
125 }
126 
129 void
130 tor_strlower(char *s)
131 {
132  while (*s) {
133  *s = TOR_TOLOWER(*s);
134  ++s;
135  }
136 }
137 
140 void
141 tor_strupper(char *s)
142 {
143  while (*s) {
144  *s = TOR_TOUPPER(*s);
145  ++s;
146  }
147 }
148 
151 int
152 tor_strisprint(const char *s)
153 {
154  while (*s) {
155  if (!TOR_ISPRINT(*s))
156  return 0;
157  s++;
158  }
159  return 1;
160 }
161 
164 int
165 tor_strisnonupper(const char *s)
166 {
167  while (*s) {
168  if (TOR_ISUPPER(*s))
169  return 0;
170  s++;
171  }
172  return 1;
173 }
174 
177 int
178 tor_strisspace(const char *s)
179 {
180  while (*s) {
181  if (!TOR_ISSPACE(*s))
182  return 0;
183  s++;
184  }
185  return 1;
186 }
187 
190 int
191 strcmp_opt(const char *s1, const char *s2)
192 {
193  if (!s1) {
194  if (!s2)
195  return 0;
196  else
197  return -1;
198  } else if (!s2) {
199  return 1;
200  } else {
201  return strcmp(s1, s2);
202  }
203 }
204 
208 int
209 strcmpstart(const char *s1, const char *s2)
210 {
211  size_t n = strlen(s2);
212  return strncmp(s1, s2, n);
213 }
214 
218 int
219 strcasecmpstart(const char *s1, const char *s2)
220 {
221  size_t n = strlen(s2);
222  return strncasecmp(s1, s2, n);
223 }
224 
231 int
232 fast_memcmpstart(const void *mem, size_t memlen,
233  const char *prefix)
234 {
235  size_t plen = strlen(prefix);
236  if (memlen < plen)
237  return -1;
238  return fast_memcmp(mem, prefix, plen);
239 }
240 
244 int
245 strcmpend(const char *s1, const char *s2)
246 {
247  size_t n1 = strlen(s1), n2 = strlen(s2);
248  if (n2>n1)
249  return strcmp(s1,s2);
250  else
251  return strncmp(s1+(n1-n2), s2, n2);
252 }
253 
257 int
258 strcasecmpend(const char *s1, const char *s2)
259 {
260  size_t n1 = strlen(s1), n2 = strlen(s2);
261  if (n2>n1) /* then they can't be the same; figure out which is bigger */
262  return strcasecmp(s1,s2);
263  else
264  return strncasecmp(s1+(n1-n2), s2, n2);
265 }
266 
270 const char *
271 eat_whitespace(const char *s)
272 {
273  raw_assert(s);
274 
275  while (1) {
276  switch (*s) {
277  case '\0':
278  default:
279  return s;
280  case ' ':
281  case '\t':
282  case '\n':
283  case '\r':
284  ++s;
285  break;
286  case '#':
287  ++s;
288  while (*s && *s != '\n')
289  ++s;
290  }
291  }
292 }
293 
297 const char *
298 eat_whitespace_eos(const char *s, const char *eos)
299 {
300  raw_assert(s);
301  raw_assert(eos && s <= eos);
302 
303  while (s < eos) {
304  switch (*s) {
305  case '\0':
306  default:
307  return s;
308  case ' ':
309  case '\t':
310  case '\n':
311  case '\r':
312  ++s;
313  break;
314  case '#':
315  ++s;
316  while (s < eos && *s && *s != '\n')
317  ++s;
318  }
319  }
320  return s;
321 }
322 
325 const char *
326 eat_whitespace_no_nl(const char *s)
327 {
328  while (*s == ' ' || *s == '\t' || *s == '\r')
329  ++s;
330  return s;
331 }
332 
335 const char *
336 eat_whitespace_eos_no_nl(const char *s, const char *eos)
337 {
338  while (s < eos && (*s == ' ' || *s == '\t' || *s == '\r'))
339  ++s;
340  return s;
341 }
342 
346 const char *
347 find_whitespace(const char *s)
348 {
349  /* tor_assert(s); */
350  while (1) {
351  switch (*s)
352  {
353  case '\0':
354  case '#':
355  case ' ':
356  case '\r':
357  case '\n':
358  case '\t':
359  return s;
360  default:
361  ++s;
362  }
363  }
364 }
365 
368 const char *
369 find_whitespace_eos(const char *s, const char *eos)
370 {
371  /* tor_assert(s); */
372  while (s < eos) {
373  switch (*s)
374  {
375  case '\0':
376  case '#':
377  case ' ':
378  case '\r':
379  case '\n':
380  case '\t':
381  return s;
382  default:
383  ++s;
384  }
385  }
386  return s;
387 }
388 
393 const char *
394 find_str_at_start_of_line(const char *haystack, const char *needle)
395 {
396  size_t needle_len = strlen(needle);
397 
398  do {
399  if (!strncmp(haystack, needle, needle_len))
400  return haystack;
401 
402  haystack = strchr(haystack, '\n');
403  if (!haystack)
404  return NULL;
405  else
406  ++haystack;
407  } while (*haystack);
408 
409  return NULL;
410 }
411 
416 int
417 string_is_C_identifier(const char *string)
418 {
419  size_t iter;
420  size_t length = strlen(string);
421  if (!length)
422  return 0;
423 
424  for (iter = 0; iter < length ; iter++) {
425  if (iter == 0) {
426  if (!(TOR_ISALPHA(string[iter]) ||
427  string[iter] == '_'))
428  return 0;
429  } else {
430  if (!(TOR_ISALPHA(string[iter]) ||
431  TOR_ISDIGIT(string[iter]) ||
432  string[iter] == '_'))
433  return 0;
434  }
435  }
436 
437  return 1;
438 }
439 
441 #define TOP_BITS(x) ((uint8_t)(0xFF << (8 - (x))))
442 
443 #define LOW_BITS(x) ((uint8_t)(0xFF >> (8 - (x))))
444 
448 static uint8_t
449 bytes_in_char(uint8_t b)
450 {
451  if ((TOP_BITS(1) & b) == 0x00)
452  return 1; // a 1-byte UTF-8 char, aka ASCII
453  if ((TOP_BITS(3) & b) == TOP_BITS(2))
454  return 2; // a 2-byte UTF-8 char
455  if ((TOP_BITS(4) & b) == TOP_BITS(3))
456  return 3; // a 3-byte UTF-8 char
457  if ((TOP_BITS(5) & b) == TOP_BITS(4))
458  return 4; // a 4-byte UTF-8 char
459 
460  // Invalid: either the top 2 bits are 10, or the top 5 bits are 11111.
461  return 0;
462 }
463 
465 static bool
467 {
468  uint8_t top2bits = b & TOP_BITS(2);
469  return top2bits == TOP_BITS(1);
470 }
471 
475 static bool
476 validate_char(const uint8_t *c, uint8_t len)
477 {
478  if (len == 1)
479  return true; // already validated this is an ASCII char earlier.
480 
481  uint8_t mask = LOW_BITS(7 - len); // bitmask for the leading byte.
482  uint32_t codepoint = c[0] & mask;
483 
484  mask = LOW_BITS(6); // bitmask for continuation bytes.
485  for (uint8_t i = 1; i < len; i++) {
486  if (!is_continuation_byte(c[i]))
487  return false;
488  codepoint <<= 6;
489  codepoint |= (c[i] & mask);
490  }
491 
492  if (len == 2 && codepoint <= 0x7f)
493  return false; // Invalid, overly long encoding, should have fit in 1 byte.
494 
495  if (len == 3 && codepoint <= 0x7ff)
496  return false; // Invalid, overly long encoding, should have fit in 2 bytes.
497 
498  if (len == 4 && codepoint <= 0xffff)
499  return false; // Invalid, overly long encoding, should have fit in 3 bytes.
500 
501  if (codepoint >= 0xd800 && codepoint <= 0xdfff)
502  return false; // Invalid, reserved for UTF-16 surrogate pairs.
503 
504  return codepoint <= 0x10ffff; // Check if within maximum.
505 }
506 
509 int
510 string_is_utf8(const char *str, size_t len)
511 {
512  for (size_t i = 0; i < len;) {
513  uint8_t num_bytes = bytes_in_char(str[i]);
514  if (num_bytes == 0) // Invalid leading byte found.
515  return false;
516 
517  size_t next_char = i + num_bytes;
518  if (next_char > len)
519  return false;
520 
521  // Validate the continuation bytes in this multi-byte character,
522  // and advance to the next character in the string.
523  if (!validate_char((const uint8_t*)&str[i], num_bytes))
524  return false;
525  i = next_char;
526  }
527  return true;
528 }
529 
533 int
534 string_is_utf8_no_bom(const char *str, size_t len)
535 {
536  if (len >= 3 && (!strcmpstart(str, "\uFEFF") ||
537  !strcmpstart(str, "\uFFFE"))) {
538  return false;
539  }
540  return string_is_utf8(str, len);
541 }
static uint8_t bytes_in_char(uint8_t b)
Definition: util_string.c:449
Headers for di_ops.c.
int tor_mem_is_zero(const char *mem, size_t len)
Definition: util_string.c:74
void tor_strstrip(char *s, const char *strip)
Definition: util_string.c:114
int strcmpend(const char *s1, const char *s2)
Definition: util_string.c:245
void tor_strupper(char *s)
Definition: util_string.c:141
const char * find_whitespace(const char *s)
Definition: util_string.c:347
int strcmpstart(const char *s1, const char *s2)
Definition: util_string.c:209
Header for util_string.c.
const char * eat_whitespace_eos_no_nl(const char *s, const char *eos)
Definition: util_string.c:336
int string_is_C_identifier(const char *string)
Definition: util_string.c:417
#define TOP_BITS(x)
Definition: util_string.c:441
#define DIGEST256_LEN
Definition: digest_sizes.h:23
int tor_strisprint(const char *s)
Definition: util_string.c:152
int strcasecmpend(const char *s1, const char *s2)
Definition: util_string.c:258
#define LOW_BITS(x)
Definition: util_string.c:443
int tor_memeq(const void *a, const void *b, size_t sz)
Definition: di_ops.c:107
const void * tor_memmem(const void *_haystack, size_t hlen, const void *_needle, size_t nlen)
Definition: util_string.c:29
#define DIGEST_LEN
Definition: digest_sizes.h:20
int string_is_utf8(const char *str, size_t len)
Definition: util_string.c:510
const char * eat_whitespace(const char *s)
Definition: util_string.c:271
int fast_memcmpstart(const void *mem, size_t memlen, const char *prefix)
Definition: util_string.c:232
int string_is_utf8_no_bom(const char *str, size_t len)
Definition: util_string.c:534
void tor_strlower(char *s)
Definition: util_string.c:130
int tor_digest_is_zero(const char *digest)
Definition: util_string.c:96
const char * find_whitespace_eos(const char *s, const char *eos)
Definition: util_string.c:369
int tor_strisnonupper(const char *s)
Definition: util_string.c:165
Locale-independent character-type inspection (header)
static bool validate_char(const uint8_t *c, uint8_t len)
Definition: util_string.c:476
#define fast_memcmp(a, b, c)
Definition: di_ops.h:26
Headers for torerr.c.
Definitions for common sizes of cryptographic digests.
int tor_strisspace(const char *s)
Definition: util_string.c:178
int strcmp_opt(const char *s1, const char *s2)
Definition: util_string.c:191
int tor_digest256_is_zero(const char *digest)
Definition: util_string.c:106
int strcasecmpstart(const char *s1, const char *s2)
Definition: util_string.c:219
const char * eat_whitespace_no_nl(const char *s)
Definition: util_string.c:326
const char * eat_whitespace_eos(const char *s, const char *eos)
Definition: util_string.c:298
static bool is_continuation_byte(uint8_t b)
Definition: util_string.c:466
const char * find_str_at_start_of_line(const char *haystack, const char *needle)
Definition: util_string.c:394