#!/usr/bin/env python

# Converts OONI JSON indexes into CSV. JSON indexes come from
# https://measurements.ooni.torproject.org/api/.
#
# Usage: ./json2csv < ooni.json > ooni.csv

import csv
import datetime
import json
import re
import sys

# Regex to extract date, country code, AS number, and test name from a report
# URL. Sample URL:
# https://s3.amazonaws.com/ooni-public/sanitised/2017-04-14/20170414T002940Z-GB-AS786-meek_fronted_requests_test-20170414T002941Z_AS786_TBh4y19qx6OuNTMVAZppdGiKUuqTDgKmBAOBr7z2Gszn3h6iO9-0.2.0-probe.json
URL_REGEX = re.compile(r'/(\d{8}T\d{6}Z)-(\w+)-AS(\d+)-([\w_]+)-')

INPUT_DATE_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
OUTPUT_DATE_FORMAT = "%Y-%m-%d %H:%M:%S"

w = csv.writer(sys.stdout)
w.writerow(("date", "cc", "asn", "test_name"))

data = json.load(sys.stdin)
for entry in data["results"]:
    date = datetime.datetime.strptime(entry["test_start_time"], INPUT_DATE_FORMAT)

    url = entry["download_url"]
    m = URL_REGEX.search(url)
    assert m is not None, url
    _, cc, asn, test_name = m.groups()

    w.writerow((date.strftime(OUTPUT_DATE_FORMAT), cc, asn, test_name))
