Line data Source code
1 : /* Copyright (c) 2007-2021, The Tor Project, Inc. */
2 : /* See LICENSE for licensing information */
3 :
4 : /**
5 : * \file geoip.c
6 : * \brief Functions related to maintaining an IP-to-country database;
7 : * to summarizing client connections by country to entry guards, bridges,
8 : * and directory servers; and for statistics on answering network status
9 : * requests.
10 : *
11 : * There are two main kinds of functions in this module: geoip functions,
12 : * which map groups of IPv4 and IPv6 addresses to country codes, and
13 : * statistical functions, which collect statistics about different kinds of
14 : * per-country usage.
15 : *
16 : * The geoip lookup tables are implemented as sorted lists of disjoint address
17 : * ranges, each mapping to a singleton geoip_country_t. These country objects
18 : * are also indexed by their names in a hashtable.
19 : *
20 : * The tables are populated from disk at startup by the geoip_load_file()
21 : * function. For more information on the file format they read, see that
22 : * function. See the scripts and the README file in src/config for more
23 : * information about how those files are generated.
24 : *
25 : * Tor uses GeoIP information in order to implement user requests (such as
26 : * ExcludeNodes {cc}), and to keep track of how much usage relays are getting
27 : * for each country.
28 : */
29 :
30 : #define GEOIP_PRIVATE
31 : #include "lib/geoip/geoip.h"
32 : #include "lib/container/map.h"
33 : #include "lib/container/order.h"
34 : #include "lib/container/smartlist.h"
35 : #include "lib/crypt_ops/crypto_digest.h"
36 : #include "lib/ctime/di_ops.h"
37 : #include "lib/encoding/binascii.h"
38 : #include "lib/fs/files.h"
39 : #include "lib/log/escape.h"
40 : #include "lib/malloc/malloc.h"
41 : #include "lib/net/address.h" //????
42 : #include "lib/net/inaddr.h"
43 : #include "lib/string/compat_ctype.h"
44 : #include "lib/string/compat_string.h"
45 : #include "lib/string/scanf.h"
46 : #include "lib/string/util_string.h"
47 :
48 : #include <stdio.h>
49 : #include <string.h>
50 :
51 : static void init_geoip_countries(void);
52 :
53 : /** An entry from the GeoIP IPv4 file: maps an IPv4 range to a country. */
54 : typedef struct geoip_ipv4_entry_t {
55 : uint32_t ip_low; /**< The lowest IP in the range, in host order */
56 : uint32_t ip_high; /**< The highest IP in the range, in host order */
57 : intptr_t country; /**< An index into geoip_countries */
58 : } geoip_ipv4_entry_t;
59 :
60 : /** An entry from the GeoIP IPv6 file: maps an IPv6 range to a country. */
61 : typedef struct geoip_ipv6_entry_t {
62 : struct in6_addr ip_low; /**< The lowest IP in the range, in host order */
63 : struct in6_addr ip_high; /**< The highest IP in the range, in host order */
64 : intptr_t country; /**< An index into geoip_countries */
65 : } geoip_ipv6_entry_t;
66 :
67 : /** A list of geoip_country_t */
68 : static smartlist_t *geoip_countries = NULL;
69 : /** A map from lowercased country codes to their position in geoip_countries.
70 : * The index is encoded in the pointer, and 1 is added so that NULL can mean
71 : * not found. */
72 : static strmap_t *country_idxplus1_by_lc_code = NULL;
73 : /** List of all known geoip_ipv4_entry_t sorted
74 : * by their respective ip_low values. */
75 : static smartlist_t *geoip_ipv4_entries = NULL;
76 : /** List of all known geoip_ipv6_entry_t, sorted by their respective
77 : * ip_low values. */
78 : static smartlist_t *geoip_ipv6_entries = NULL;
79 :
80 : /** SHA1 digest of the IPv4 GeoIP file to include in extra-info
81 : * descriptors. */
82 : static char geoip_digest[DIGEST_LEN];
83 : /** SHA1 digest of the IPv6 GeoIP file to include in extra-info
84 : * descriptors. */
85 : static char geoip6_digest[DIGEST_LEN];
86 :
87 : /** Return a list of geoip_country_t for all known countries. */
88 : const smartlist_t *
89 33 : geoip_get_countries(void)
90 : {
91 33 : if (geoip_countries == NULL) {
92 0 : init_geoip_countries();
93 : }
94 33 : return geoip_countries;
95 : }
96 :
97 : /** Return the index of the <b>country</b>'s entry in the GeoIP
98 : * country list if it is a valid 2-letter country code, otherwise
99 : * return -1. */
100 0 : MOCK_IMPL(country_t,
101 : geoip_get_country,(const char *country))
102 : {
103 0 : void *idxplus1_;
104 0 : intptr_t idx;
105 :
106 0 : idxplus1_ = strmap_get_lc(country_idxplus1_by_lc_code, country);
107 0 : if (!idxplus1_)
108 : return -1;
109 :
110 0 : idx = ((uintptr_t)idxplus1_)-1;
111 0 : return (country_t)idx;
112 : }
113 :
114 : /** Add an entry to a GeoIP table, mapping all IP addresses between <b>low</b>
115 : * and <b>high</b>, inclusive, to the 2-letter country code <b>country</b>. */
116 : static void
117 47 : geoip_add_entry(const tor_addr_t *low, const tor_addr_t *high,
118 : const char *country)
119 : {
120 47 : intptr_t idx;
121 47 : void *idxplus1_;
122 :
123 47 : IF_BUG_ONCE(tor_addr_family(low) != tor_addr_family(high))
124 : return;
125 47 : IF_BUG_ONCE(tor_addr_compare(high, low, CMP_EXACT) < 0)
126 : return;
127 :
128 47 : idxplus1_ = strmap_get_lc(country_idxplus1_by_lc_code, country);
129 :
130 47 : if (!idxplus1_) {
131 24 : geoip_country_t *c = tor_malloc_zero(sizeof(geoip_country_t));
132 24 : strlcpy(c->countrycode, country, sizeof(c->countrycode));
133 24 : tor_strlower(c->countrycode);
134 24 : smartlist_add(geoip_countries, c);
135 24 : idx = smartlist_len(geoip_countries) - 1;
136 24 : strmap_set_lc(country_idxplus1_by_lc_code, country, (void*)(idx+1));
137 : } else {
138 23 : idx = ((uintptr_t)idxplus1_)-1;
139 : }
140 : {
141 47 : geoip_country_t *c = smartlist_get(geoip_countries, (int)idx);
142 47 : tor_assert(!strcasecmp(c->countrycode, country));
143 : }
144 :
145 47 : if (tor_addr_family(low) == AF_INET) {
146 26 : geoip_ipv4_entry_t *ent = tor_malloc_zero(sizeof(geoip_ipv4_entry_t));
147 26 : ent->ip_low = tor_addr_to_ipv4h(low);
148 26 : ent->ip_high = tor_addr_to_ipv4h(high);
149 26 : ent->country = idx;
150 26 : smartlist_add(geoip_ipv4_entries, ent);
151 21 : } else if (tor_addr_family(low) == AF_INET6) {
152 21 : geoip_ipv6_entry_t *ent = tor_malloc_zero(sizeof(geoip_ipv6_entry_t));
153 21 : ent->ip_low = *tor_addr_to_in6_assert(low);
154 21 : ent->ip_high = *tor_addr_to_in6_assert(high);
155 21 : ent->country = idx;
156 21 : smartlist_add(geoip_ipv6_entries, ent);
157 : }
158 : }
159 :
160 : /** Add an entry to the GeoIP table indicated by <b>family</b>,
161 : * parsing it from <b>line</b>. The format is as for geoip_load_file(). */
162 : STATIC int
163 48 : geoip_parse_entry(const char *line, sa_family_t family)
164 : {
165 48 : tor_addr_t low_addr, high_addr;
166 48 : char c[3];
167 48 : char *country = NULL;
168 :
169 48 : if (!geoip_countries)
170 3 : init_geoip_countries();
171 48 : if (family == AF_INET) {
172 27 : if (!geoip_ipv4_entries)
173 3 : geoip_ipv4_entries = smartlist_new();
174 21 : } else if (family == AF_INET6) {
175 21 : if (!geoip_ipv6_entries)
176 1 : geoip_ipv6_entries = smartlist_new();
177 : } else {
178 0 : log_warn(LD_GENERAL, "Unsupported family: %d", family);
179 0 : return -1;
180 : }
181 :
182 49 : while (TOR_ISSPACE(*line))
183 1 : ++line;
184 48 : if (*line == '#')
185 : return 0;
186 :
187 48 : char buf[512];
188 48 : if (family == AF_INET) {
189 27 : unsigned int low, high;
190 31 : if (tor_sscanf(line,"%u,%u,%2s", &low, &high, c) == 3 ||
191 4 : tor_sscanf(line,"\"%u\",\"%u\",\"%2s\",", &low, &high, c) == 3) {
192 26 : tor_addr_from_ipv4h(&low_addr, low);
193 26 : tor_addr_from_ipv4h(&high_addr, high);
194 : } else
195 1 : goto fail;
196 26 : country = c;
197 : } else { /* AF_INET6 */
198 21 : char *low_str, *high_str;
199 21 : struct in6_addr low, high;
200 21 : char *strtok_state;
201 21 : strlcpy(buf, line, sizeof(buf));
202 21 : low_str = tor_strtok_r(buf, ",", &strtok_state);
203 21 : if (!low_str)
204 0 : goto fail;
205 21 : high_str = tor_strtok_r(NULL, ",", &strtok_state);
206 21 : if (!high_str)
207 0 : goto fail;
208 21 : country = tor_strtok_r(NULL, "\n", &strtok_state);
209 21 : if (!country)
210 0 : goto fail;
211 21 : if (strlen(country) != 2)
212 0 : goto fail;
213 21 : if (tor_inet_pton(AF_INET6, low_str, &low) <= 0)
214 0 : goto fail;
215 21 : tor_addr_from_in6(&low_addr, &low);
216 21 : if (tor_inet_pton(AF_INET6, high_str, &high) <= 0)
217 0 : goto fail;
218 21 : tor_addr_from_in6(&high_addr, &high);
219 : }
220 47 : geoip_add_entry(&low_addr, &high_addr, country);
221 47 : return 0;
222 :
223 1 : fail:
224 1 : log_warn(LD_GENERAL, "Unable to parse line from GEOIP %s file: %s",
225 : family == AF_INET ? "IPv4" : "IPv6", escaped(line));
226 1 : return -1;
227 : }
228 :
229 : /** Sorting helper: return -1, 1, or 0 based on comparison of two
230 : * geoip_ipv4_entry_t */
231 : static int
232 26 : geoip_ipv4_compare_entries_(const void **_a, const void **_b)
233 : {
234 26 : const geoip_ipv4_entry_t *a = *_a, *b = *_b;
235 26 : if (a->ip_low < b->ip_low)
236 : return -1;
237 0 : else if (a->ip_low > b->ip_low)
238 : return 1;
239 : else
240 0 : return 0;
241 : }
242 :
243 : /** bsearch helper: return -1, 1, or 0 based on comparison of an IP (a pointer
244 : * to a uint32_t in host order) to a geoip_ipv4_entry_t */
245 : static int
246 133 : geoip_ipv4_compare_key_to_entry_(const void *_key, const void **_member)
247 : {
248 : /* No alignment issue here, since _key really is a pointer to uint32_t */
249 133 : const uint32_t addr = *(uint32_t *)_key;
250 133 : const geoip_ipv4_entry_t *entry = *_member;
251 133 : if (addr < entry->ip_low)
252 : return -1;
253 86 : else if (addr > entry->ip_high)
254 : return 1;
255 : else
256 46 : return 0;
257 : }
258 :
259 : /** Sorting helper: return -1, 1, or 0 based on comparison of two
260 : * geoip_ipv6_entry_t */
261 : static int
262 28 : geoip_ipv6_compare_entries_(const void **_a, const void **_b)
263 : {
264 28 : const geoip_ipv6_entry_t *a = *_a, *b = *_b;
265 28 : return fast_memcmp(a->ip_low.s6_addr, b->ip_low.s6_addr,
266 : sizeof(struct in6_addr));
267 : }
268 :
269 : /** bsearch helper: return -1, 1, or 0 based on comparison of an IPv6
270 : * (a pointer to a in6_addr) to a geoip_ipv6_entry_t */
271 : static int
272 117 : geoip_ipv6_compare_key_to_entry_(const void *_key, const void **_member)
273 : {
274 117 : const struct in6_addr *addr = (struct in6_addr *)_key;
275 117 : const geoip_ipv6_entry_t *entry = *_member;
276 :
277 117 : if (fast_memcmp(addr->s6_addr, entry->ip_low.s6_addr,
278 : sizeof(struct in6_addr)) < 0)
279 : return -1;
280 73 : else if (fast_memcmp(addr->s6_addr, entry->ip_high.s6_addr,
281 : sizeof(struct in6_addr)) > 0)
282 : return 1;
283 : else
284 36 : return 0;
285 : }
286 :
287 : /** Set up a new list of geoip countries with no countries (yet) set in it,
288 : * except for the unknown country.
289 : */
290 : static void
291 10 : init_geoip_countries(void)
292 : {
293 10 : geoip_country_t *geoip_unresolved;
294 10 : geoip_countries = smartlist_new();
295 : /* Add a geoip_country_t for requests that could not be resolved to a
296 : * country as first element (index 0) to geoip_countries. */
297 10 : geoip_unresolved = tor_malloc_zero(sizeof(geoip_country_t));
298 10 : strlcpy(geoip_unresolved->countrycode, "??",
299 : sizeof(geoip_unresolved->countrycode));
300 10 : smartlist_add(geoip_countries, geoip_unresolved);
301 10 : country_idxplus1_by_lc_code = strmap_new();
302 10 : strmap_set_lc(country_idxplus1_by_lc_code, "??", (void*)(1));
303 10 : }
304 :
305 : /** Clear appropriate GeoIP database, based on <b>family</b>, and
306 : * reload it from the file <b>filename</b>. Return 0 on success, -1 on
307 : * failure.
308 : *
309 : * Recognized line formats for IPv4 are:
310 : * INTIPLOW,INTIPHIGH,CC
311 : * and
312 : * "INTIPLOW","INTIPHIGH","CC","CC3","COUNTRY NAME"
313 : * where INTIPLOW and INTIPHIGH are IPv4 addresses encoded as 4-byte unsigned
314 : * integers, and CC is a country code.
315 : *
316 : * Recognized line format for IPv6 is:
317 : * IPV6LOW,IPV6HIGH,CC
318 : * where IPV6LOW and IPV6HIGH are IPv6 addresses and CC is a country code.
319 : *
320 : * It also recognizes, and skips over, blank lines and lines that start
321 : * with '#' (comments).
322 : */
323 : int
324 14 : geoip_load_file(sa_family_t family, const char *filename, int severity)
325 : {
326 14 : FILE *f;
327 14 : crypto_digest_t *geoip_digest_env = NULL;
328 :
329 14 : tor_assert(family == AF_INET || family == AF_INET6);
330 :
331 14 : if (!(f = tor_fopen_cloexec(filename, "r"))) {
332 10 : log_fn(severity, LD_GENERAL, "Failed to open GEOIP file %s.",
333 : filename);
334 10 : return -1;
335 : }
336 4 : if (!geoip_countries)
337 2 : init_geoip_countries();
338 :
339 4 : if (family == AF_INET) {
340 3 : if (geoip_ipv4_entries) {
341 10 : SMARTLIST_FOREACH(geoip_ipv4_entries, geoip_ipv4_entry_t *, e,
342 : tor_free(e));
343 1 : smartlist_free(geoip_ipv4_entries);
344 : }
345 3 : geoip_ipv4_entries = smartlist_new();
346 : } else { /* AF_INET6 */
347 1 : if (geoip_ipv6_entries) {
348 0 : SMARTLIST_FOREACH(geoip_ipv6_entries, geoip_ipv6_entry_t *, e,
349 : tor_free(e));
350 0 : smartlist_free(geoip_ipv6_entries);
351 : }
352 1 : geoip_ipv6_entries = smartlist_new();
353 : }
354 4 : geoip_digest_env = crypto_digest_new();
355 :
356 4 : log_notice(LD_GENERAL, "Parsing GEOIP %s file %s.",
357 : (family == AF_INET) ? "IPv4" : "IPv6", filename);
358 38 : while (!feof(f)) {
359 38 : char buf[512];
360 38 : if (fgets(buf, (int)sizeof(buf), f) == NULL)
361 : break;
362 34 : crypto_digest_add_bytes(geoip_digest_env, buf, strlen(buf));
363 : /* FFFF track full country name. */
364 34 : geoip_parse_entry(buf, family);
365 : }
366 : /*XXXX abort and return -1 if no entries/illformed?*/
367 4 : fclose(f);
368 :
369 : /* Sort list and remember file digests so that we can include it in
370 : * our extra-info descriptors. */
371 4 : if (family == AF_INET) {
372 3 : smartlist_sort(geoip_ipv4_entries, geoip_ipv4_compare_entries_);
373 3 : crypto_digest_get_digest(geoip_digest_env, geoip_digest, DIGEST_LEN);
374 : } else {
375 : /* AF_INET6 */
376 1 : smartlist_sort(geoip_ipv6_entries, geoip_ipv6_compare_entries_);
377 1 : crypto_digest_get_digest(geoip_digest_env, geoip6_digest, DIGEST_LEN);
378 : }
379 4 : crypto_digest_free(geoip_digest_env);
380 :
381 4 : return 0;
382 : }
383 :
384 : /** Given an IP address in host order, return a number representing the
385 : * country to which that address belongs, -1 for "No geoip information
386 : * available", or 0 for the 'unknown country'. The return value will always
387 : * be less than geoip_get_n_countries(). To decode it, call
388 : * geoip_get_country_name().
389 : */
390 : int
391 12168 : geoip_get_country_by_ipv4(uint32_t ipaddr)
392 : {
393 12168 : geoip_ipv4_entry_t *ent;
394 12168 : if (!geoip_ipv4_entries)
395 : return -1;
396 52 : ent = smartlist_bsearch(geoip_ipv4_entries, &ipaddr,
397 : geoip_ipv4_compare_key_to_entry_);
398 52 : return ent ? (int)ent->country : 0;
399 : }
400 :
401 : /** Given an IPv6 address, return a number representing the country to
402 : * which that address belongs, -1 for "No geoip information available", or
403 : * 0 for the 'unknown country'. The return value will always be less than
404 : * geoip_get_n_countries(). To decode it, call geoip_get_country_name().
405 : */
406 : int
407 44 : geoip_get_country_by_ipv6(const struct in6_addr *addr)
408 : {
409 44 : geoip_ipv6_entry_t *ent;
410 :
411 44 : if (!geoip_ipv6_entries)
412 : return -1;
413 42 : ent = smartlist_bsearch(geoip_ipv6_entries, addr,
414 : geoip_ipv6_compare_key_to_entry_);
415 42 : return ent ? (int)ent->country : 0;
416 : }
417 :
418 : /** Given an IP address, return a number representing the country to which
419 : * that address belongs, -1 for "No geoip information available", or 0 for
420 : * the 'unknown country'. The return value will always be less than
421 : * geoip_get_n_countries(). To decode it, call geoip_get_country_name().
422 : */
423 12305 : MOCK_IMPL(int,
424 : geoip_get_country_by_addr,(const tor_addr_t *addr))
425 : {
426 12305 : if (tor_addr_family(addr) == AF_INET) {
427 12156 : return geoip_get_country_by_ipv4(tor_addr_to_ipv4h(addr));
428 149 : } else if (tor_addr_family(addr) == AF_INET6) {
429 33 : return geoip_get_country_by_ipv6(tor_addr_to_in6(addr));
430 : } else {
431 : return -1;
432 : }
433 : }
434 :
435 : /** Return the number of countries recognized by the GeoIP country list. */
436 19 : MOCK_IMPL(int,
437 : geoip_get_n_countries,(void))
438 : {
439 19 : if (!geoip_countries)
440 5 : init_geoip_countries();
441 19 : return (int) smartlist_len(geoip_countries);
442 : }
443 :
444 : /** Return the two-letter country code associated with the number <b>num</b>,
445 : * or "??" for an unknown value. */
446 : const char *
447 55 : geoip_get_country_name(country_t num)
448 : {
449 55 : if (geoip_countries && num >= 0 && num < smartlist_len(geoip_countries)) {
450 31 : geoip_country_t *c = smartlist_get(geoip_countries, num);
451 31 : return c->countrycode;
452 : } else
453 : return "??";
454 : }
455 :
456 : /** Return true iff we have loaded a GeoIP database.*/
457 61 : MOCK_IMPL(int,
458 : geoip_is_loaded,(sa_family_t family))
459 : {
460 61 : tor_assert(family == AF_INET || family == AF_INET6);
461 61 : if (geoip_countries == NULL)
462 : return 0;
463 15 : if (family == AF_INET)
464 13 : return geoip_ipv4_entries != NULL;
465 : else /* AF_INET6 */
466 2 : return geoip_ipv6_entries != NULL;
467 : }
468 :
469 : /** Return the hex-encoded SHA1 digest of the loaded GeoIP file. The
470 : * result does not need to be deallocated, but will be overwritten by the
471 : * next call of hex_str(). */
472 : const char *
473 7 : geoip_db_digest(sa_family_t family)
474 : {
475 7 : tor_assert(family == AF_INET || family == AF_INET6);
476 7 : if (family == AF_INET)
477 4 : return hex_str(geoip_digest, DIGEST_LEN);
478 : else /* AF_INET6 */
479 3 : return hex_str(geoip6_digest, DIGEST_LEN);
480 : }
481 :
482 : /** Release all storage held by the GeoIP databases and country list. */
483 : STATIC void
484 241 : clear_geoip_db(void)
485 : {
486 241 : if (geoip_countries) {
487 24 : SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, tor_free(c));
488 4 : smartlist_free(geoip_countries);
489 : }
490 :
491 241 : strmap_free(country_idxplus1_by_lc_code, NULL);
492 241 : if (geoip_ipv4_entries) {
493 14 : SMARTLIST_FOREACH(geoip_ipv4_entries, geoip_ipv4_entry_t *, ent,
494 : tor_free(ent));
495 3 : smartlist_free(geoip_ipv4_entries);
496 : }
497 241 : if (geoip_ipv6_entries) {
498 16 : SMARTLIST_FOREACH(geoip_ipv6_entries, geoip_ipv6_entry_t *, ent,
499 : tor_free(ent));
500 1 : smartlist_free(geoip_ipv6_entries);
501 : }
502 241 : geoip_countries = NULL;
503 241 : country_idxplus1_by_lc_code = NULL;
504 241 : geoip_ipv4_entries = NULL;
505 241 : geoip_ipv6_entries = NULL;
506 241 : }
507 :
508 : /** Release all storage held in this file. */
509 : void
510 237 : geoip_free_all(void)
511 : {
512 237 : clear_geoip_db();
513 :
514 237 : memset(geoip_digest, 0, sizeof(geoip_digest));
515 237 : memset(geoip6_digest, 0, sizeof(geoip6_digest));
516 237 : }
|