[tor-commits] [onionoo/master] Switch to using MaxMind's GeoLite2 city database.

karsten Mon, 24 Feb 2014 05:24:25 -0800

commit c0f47f7896ebd1db2461e57dd0bca32238f0fb0d
Author: Karsten Loesing <karsten.loes...@gmx.net>
Date:   Fri Feb 21 20:03:48 2014 +0000


    Switch to using MaxMind's GeoLite2 city database.
---
 geoip/deanonymind.py                          |  228 ----------------------
 geoip/geoip-manual                            |   91 ---------
 src/org/torproject/onionoo/LookupService.java |  254 +++++++++----------------
 3 files changed, 92 insertions(+), 481 deletions(-)

diff --git a/geoip/deanonymind.py b/geoip/deanonymind.py
deleted file mode 100755
index 1137031..0000000
--- a/geoip/deanonymind.py
+++ /dev/null
@@ -1,228 +0,0 @@
-#!/usr/bin/env python
-import optparse
-import os
-import sys
-import zipfile
-
-"""
-Take a MaxMind GeoLite City blocks file as input and replace A1 entries
-with the block number of the preceding entry iff the preceding
-(subsequent) entry ends (starts) directly before (after) the A1 entry and
-both preceding and subsequent entries contain the same block number.
-
-Then apply manual changes, either replacing A1 entries that could not be
-replaced automatically or overriding previously made automatic changes.
-"""
-
-def main():
-    options = parse_options()
-    country_blocks = read_location_file(options.in_location)
-    assignments = read_file(options.in_maxmind)
-    assignments = apply_automatic_changes(assignments,
-            options.block_number, country_blocks)
-    write_file(options.out_automatic, assignments)
-    manual_assignments = read_file(options.in_manual, must_exist=False)
-    assignments = apply_manual_changes(assignments, manual_assignments,
-            options.block_number)
-    write_file(options.out_manual, assignments)
-
-def parse_options():
-    parser = optparse.OptionParser()
-    parser.add_option('-i', action='store', dest='in_maxmind',
-            default='GeoLiteCity-Blocks.csv', metavar='FILE',
-            help='use the specified MaxMind GeoLite City blocks .csv '
-                 'file as input [default: %default]')
-    parser.add_option('-l', action='store', dest='in_location',
-            default='GeoLiteCity-Location.csv', metavar='FILE',
-            help='use the specified MaxMind GeoLite City location .csv '
-                 'file as input [default: %default]')
-    parser.add_option('-b', action='store', dest='block_number',
-            default=242, metavar='NUM',
-            help='replace entries with this block number [default: '
-                 '%default]')
-    parser.add_option('-g', action='store', dest='in_manual',
-            default='geoip-manual', metavar='FILE',
-            help='use the specified .csv file for manual changes or to '
-                 'override automatic changes [default: %default]')
-    parser.add_option('-a', action='store', dest='out_automatic',
-            default="Automatic-GeoLiteCity-Blocks.csv", metavar='FILE',
-            help='write full input file plus automatic changes to the '
-                 'specified .csv file [default: %default]')
-    parser.add_option('-m', action='store', dest='out_manual',
-            default='Manual-GeoLiteCity-Blocks.csv', metavar='FILE',
-            help='write full input file plus automatic and manual '
-                 'changes to the specified .csv file [default: %default]')
-    (options, args) = parser.parse_args()
-    return options
-
-def read_location_file(path):
-    if not os.path.exists(path):
-        print 'File %s does not exist.  Exiting.' % (path, )
-        sys.exit(1)
-    countries = {}
-    country_blocks = {}
-    for line in open(path):
-        if line.startswith('C') or line.startswith('l'):
-            continue
-        keys = ['locId', 'country', 'region', 'city', 'postalCode',
-                'latitude', 'longitude', 'metroCode', 'areaCode']
-        stripped_line = line.replace('"', '').strip()
-        parts = stripped_line.split(',')
-        entry = dict((k, v) for k, v in zip(keys, parts))
-        if entry['region'] == '':
-            countries[entry['country']] = entry['locId']
-            country_blocks[entry['locId']] = entry['locId']
-        elif entry['country'] in countries:
-            country_blocks[entry['locId']] = countries[entry['country']]
-    return country_blocks
-
-def read_file(path, must_exist=True):
-    if not os.path.exists(path):
-        if must_exist:
-            print 'File %s does not exist.  Exiting.' % (path, )
-            sys.exit(1)
-        else:
-            return
-    csv_file = open(path)
-    csv_content = csv_file.read()
-    csv_file.close()
-    assignments = []
-    for line in csv_content.split('\n'):
-        stripped_line = line.strip()
-        if len(stripped_line) > 0 and not stripped_line.startswith('#'):
-            assignments.append(stripped_line)
-    return assignments
-
-def apply_automatic_changes(assignments, block_number, country_blocks):
-    print '\nApplying automatic changes...'
-    result_lines = []
-    prev_line = None
-    a1_lines = []
-    block_number_str = '"%d"' % (block_number, )
-    for line in assignments:
-        if block_number_str in line:
-            a1_lines.append(line)
-        else:
-            if len(a1_lines) > 0:
-                new_a1_lines = process_a1_lines(prev_line, a1_lines, line,
-                                                country_blocks)
-                for new_a1_line in new_a1_lines:
-                    result_lines.append(new_a1_line)
-                a1_lines = []
-            result_lines.append(line)
-            prev_line = line
-    if len(a1_lines) > 0:
-        new_a1_lines = process_a1_lines(prev_line, a1_lines, None,
-                                        country_blocks)
-        for new_a1_line in new_a1_lines:
-            result_lines.append(new_a1_line)
-    return result_lines
-
-def process_a1_lines(prev_line, a1_lines, next_line, country_blocks):
-    if not prev_line or not next_line:
-        return a1_lines   # Can't merge first or last line in file.
-    if len(a1_lines) > 1:
-        return a1_lines   # Can't merge more than 1 line at once.
-    a1_line = a1_lines[0].strip()
-    prev_entry = parse_line(prev_line)
-    a1_entry = parse_line(a1_line)
-    next_entry = parse_line(next_line)
-    touches_prev_entry = int(prev_entry['end_num']) + 1 == \
-            int(a1_entry['start_num'])
-    touches_next_entry = int(a1_entry['end_num']) + 1 == \
-            int(next_entry['start_num'])
-    same_block_number = prev_entry['block_number'] == \
-            next_entry['block_number']
-    same_country = country_blocks[prev_entry['block_number']] == \
-            country_blocks[next_entry['block_number']]
-    if touches_prev_entry and touches_next_entry:
-        if same_block_number:
-            new_line = format_line_with_other_country(a1_entry, prev_entry)
-            print '-%s\n+%s' % (a1_line, new_line, )
-            return [new_line]
-        elif same_country:
-            new_line = format_line_with_other_country_block(a1_entry,
-                    country_blocks[prev_entry['block_number']])
-            print '-%s\n+%s' % (a1_line, new_line, )
-            return [new_line]
-    return a1_lines
-
-def parse_line(line):
-    if not line:
-        return None
-    keys = ['start_num', 'end_num', 'block_number']
-    stripped_line = line.replace('"', '').strip()
-    parts = stripped_line.split(',')
-    entry = dict((k, v) for k, v in zip(keys, parts))
-    return entry
-
-def format_line_with_other_country(original_entry, other_entry):
-    return '"%s","%s","%s"' % (original_entry['start_num'],
-            original_entry['end_num'], other_entry['block_number'], )
-
-def format_line_with_other_country_block(original_entry, country_block):
-    return '"%s","%s","%s"' % (original_entry['start_num'],
-            original_entry['end_num'], country_block, )
-
-def apply_manual_changes(assignments, manual_assignments, block_number):
-    if not manual_assignments:
-        return assignments
-    print '\nApplying manual changes...'
-    block_number_str = '%d' % (block_number, )
-    manual_dict = {}
-    for line in manual_assignments:
-        start_num = parse_line(line)['start_num']
-        if start_num in manual_dict:
-            print ('Warning: duplicate start number in manual '
-                   'assignments:\n  %s\n  %s\nDiscarding first entry.' %
-                   (manual_dict[start_num], line, ))
-        manual_dict[start_num] = line
-    result = []
-    for line in assignments:
-        entry = parse_line(line)
-        start_num = entry['start_num']
-        if start_num in manual_dict:
-            manual_line = manual_dict[start_num]
-            manual_entry = parse_line(manual_line)
-            if entry['end_num'] == manual_entry['end_num']:
-                if len(manual_entry['block_number']) == 0:
-                    print '-%s' % (line, )  # only remove, don't replace
-                    del manual_dict[start_num]
-                elif entry['block_number'] != manual_entry['block_number']:
-                    new_line = format_line_with_other_country(entry,
-                            manual_entry)
-                    print '-%s\n+%s' % (line, new_line, )
-                    result.append(new_line)
-                    del manual_dict[start_num]
-                else:
-                    print ('Warning: automatic and manual replacement '
-                           'already match:\n  %s\n  %s\nNot applying '
-                           'manual change.' % (line, manual_line, ))
-                    result.append(line)
-            else:
-                print ('Warning: only partial match between '
-                       'original/automatically replaced assignment and '
-                       'manual assignment:\n  %s\n  %s\nNot applying '
-                       'manual change.' % (line, manual_line, ))
-                result.append(line)
-        elif 'block_number' in entry and \
-                entry['block_number'] == block_number_str:
-            print ('Warning: no manual replacement for A1 entry:\n  %s'
-                % (line, ))
-            result.append(line)
-        else:
-            result.append(line)
-    if len(manual_dict) > 0:
-        print 'Warning: could not apply all manual assignments:'
-        for line in manual_dict.values():
-            print '  %s' % (line, )
-    return result
-
-def write_file(path, assignments):
-    out_file = open(path, 'w')
-    out_file.write('\n'.join(assignments))
-    out_file.close()
-
-if __name__ == '__main__':
-    main()
-
diff --git a/geoip/geoip-manual b/geoip/geoip-manual
deleted file mode 100644
index e174dde..0000000
--- a/geoip/geoip-manual
+++ /dev/null
@@ -1,91 +0,0 @@
-# This file contains manual overrides of A1 entries (and possibly others)
-# in MaxMind's GeoLite City database.  Use deanonymind.py in the same
-# directory to process this file when producing a new geoip file.  See
-# INSTALL for details.
-
-# GB, taken from GeoLite Country February database.  -KL 2013-02-21
-"772808704","772810751","77"
-
-# From geoip-manual (country):
-# CH, because previous MaxMind entry 46.19.141.0-46.19.142.255 is CH, and
-# RIR delegation files say 46.19.136.0-46.19.143.255 is CH.
-# -KL 2012-11-27
-"773033728","773033983","44"
-
-# From geoip-manual (country):
-# GB, because next MaxMind entry 46.166.129.0-46.166.134.255 is GB, and
-# RIR delegation files say 46.166.128.0-46.166.191.255 is GB.
-# -KL 2012-11-27
-"782663680","782663935","77"
-
-# From geoip-manual (country):
-# US, because previous MaxMind entry 70.159.21.51-70.232.244.255 is US,
-# because next MaxMind entry 70.232.245.58-70.232.245.59 is A2 ("Satellite
-# Provider") which is a country information about as useless as A1, and
-# because RIR delegation files say 70.224.0.0-70.239.255.255 is US.
-# -KL 2012-11-27
-"1189672192","1189672249","223"
-
-# From geoip-manual (country):
-# US, because next MaxMind entry 70.232.246.0-70.240.141.255 is US,
-# because previous MaxMind entry 70.232.245.58-70.232.245.59 is A2
-# ("Satellite Provider") which is a country information about as useless
-# as A1, and because RIR delegation files say 70.224.0.0-70.239.255.255 is
-# US.  -KL 2012-11-27
-"1189672252","1189672447","223"
-
-# From geoip-manual (country):
-# GB, despite neither previous (GE) nor next (LV) MaxMind entry being GB,
-# but because RIR delegation files agree with both previous and next
-# MaxMind entry and say GB for 91.228.0.0-91.228.3.255.  -KL 2012-11-27
-"1541668864","1541669887","77"
-
-# NL, even though previous entry is CY, but because next entry is NL and
-# RIR says entire range 176.56.160.0-176.56.191.255 is NL.  -KL 2013-05-13
-"2956504064","2956504319","161"
-
-# NL, even though previous entry is RU and next entry is GB, but because
-# RIR says entire range 176.56.160.0-176.56.191.255 is NL.  -KL 2013-05-13
-"2956504576","2956504831","161"
-
-# GB, even though previous entry is NL and next entry is RU, but because
-# RIR says entire range 185.25.84.0-185.25.87.255 is GB.  -KL 2013-05-13
-"3105444864","3105445887","77"
-
-# US, even though previous entry is MF, but because next entry is US and
-# RIR says entire range 199.101.192.0-199.101.199.255 is US.  -KL 2013-05-13
-"3345334272","3345334527","223"
-
-# From geoip-manual (country):
-# US, because ARIN says 199.255.208.0-199.255.215.255 is US.
-# -KL 2013-07-08
-# Changed entry start from 199.255.213.0 to 199.255.208.0 on 2013-08-12.
-# -KL 2013-08-12
-# Split up into 199.255.208.0-199.255.209.127 and
-# 199.255.210.0-199.255.215.255 on 2013-10-11. -KL 2013-10-11
-"3355430912","3355431295","223"
-"3355431424","3355432959","223"
-
-# US, because previous entry is US, next entry is not adjacent, and RIR
-# says 204.14.72.0-204.14.79.255 is US.  -KL 2013-05-13
-"3423488000","3423490047","223"
-
-# US, even though previous entry is CA, but because next entry is US and
-# RIR says entire range 204.12.160.0-204.12.191.255 is US.  -KL 2013-05-13
-"3423379456","3423379967","223"
-
-# RU, even though next entry is SE and even though RIR says
-# 217.15.160.0-217.15.175.255 is EU (which isn't really a country), but
-# because previous entry is RU and RIR says 217.15.144.0-217.15.159.255 is
-# RU.  -KL 2013-05-13
-"3641679872","3641681151","184"
-
-# From geoip-manual (country):
-# FR, because previous MaxMind entry 217.15.166.0-217.15.166.255 is FR,
-# and RIR delegation files contain a block 217.15.160.0-217.15.175.255
-# which, however, is EU, not FR.  But merging with next MaxMind entry
-# 217.15.176.0-217.15.191.255 which is KZ and which fully matches what
-# the RIR delegation files say seems unlikely to be correct.
-# -KL 2012-11-27
-"3641681664","3641683967","75"
-
diff --git a/src/org/torproject/onionoo/LookupService.java 
b/src/org/torproject/onionoo/LookupService.java
index 43971b5..928a550 100644
--- a/src/org/torproject/onionoo/LookupService.java
+++ b/src/org/torproject/onionoo/LookupService.java
@@ -20,10 +20,8 @@ import java.util.regex.Pattern;
 public class LookupService {
 
   File geoipDir;
-  File geoLiteCityBlocksCsvFile;
-  File geoLiteCityLocationCsvFile;
-  File iso3166CsvFile;
-  File regionCsvFile;
+  File geoLite2CityBlocksCsvFile;
+  File geoLite2CityLocationsCsvFile;
   File geoIPASNum2CsvFile;
   private boolean hasAllFiles = false;
   public LookupService(File geoipDir) {
@@ -33,34 +31,17 @@ public class LookupService {
 
   /* Make sure we have all required .csv files. */
   private void findRequiredCsvFiles() {
-    File[] geoLiteCityBlocksCsvFiles = new File[] {
-        new File(this.geoipDir, "Manual-GeoLiteCity-Blocks.csv"),
-        new File(this.geoipDir, "Automatic-GeoLiteCity-Blocks.csv"),
-        new File(this.geoipDir, "GeoLiteCity-Blocks.csv") };
-    for (File file : geoLiteCityBlocksCsvFiles) {
-      if (file.exists()) {
-        this.geoLiteCityBlocksCsvFile = file;
-        break;
-      }
-    }
-    if (this.geoLiteCityBlocksCsvFile == null) {
-      System.err.println("No *GeoLiteCity-Blocks.csv file in geoip/.");
-      return;
-    }
-    this.geoLiteCityLocationCsvFile = new File(this.geoipDir,
-        "GeoLiteCity-Location.csv");
-    if (!this.geoLiteCityLocationCsvFile.exists()) {
-      System.err.println("No GeoLiteCity-Location.csv file in geoip/.");
-      return;
-    }
-    this.iso3166CsvFile = new File(this.geoipDir, "iso3166.csv");
-    if (!this.iso3166CsvFile.exists()) {
-      System.err.println("No iso3166.csv file in geoip/.");
+    this.geoLite2CityBlocksCsvFile = new File(this.geoipDir,
+        "GeoLite2-City-Blocks.csv");
+    if (!this.geoLite2CityBlocksCsvFile.exists()) {
+      System.err.println("No GeoLite2-City-Blocks.csv file in geoip/.");
       return;
     }
-    this.regionCsvFile = new File(this.geoipDir, "region.csv");
-    if (!this.regionCsvFile.exists()) {
-      System.err.println("No region.csv file in geoip/.");
+    this.geoLite2CityLocationsCsvFile = new File(this.geoipDir,
+        "GeoLite2-City-Locations.csv");
+    if (!this.geoLite2CityLocationsCsvFile.exists()) {
+      System.err.println("No GeoLite2-City-Locations.csv file in "
+          + "geoip/.");
       return;
     }
     this.geoIPASNum2CsvFile = new File(this.geoipDir, "GeoIPASNum2.csv");
@@ -71,6 +52,31 @@ public class LookupService {
     this.hasAllFiles = true;
   }
 
+  private Pattern ipv4Pattern = Pattern.compile("^[0-9\\.]{7,15}$");
+  private long parseAddressString(String addressString) {
+    long addressNumber = -1L;
+    if (ipv4Pattern.matcher(addressString).matches()) {
+      String[] parts = addressString.split("\\.", 4);
+      if (parts.length == 4) {
+        addressNumber = 0L;
+        for (int i = 0; i < 4; i++) {
+          addressNumber *= 256L;
+          int octetValue = -1;
+          try {
+            octetValue = Integer.parseInt(parts[i]);
+          } catch (NumberFormatException e) {
+          }
+          if (octetValue < 0 || octetValue > 255) {
+            addressNumber = -1L;
+            break;
+          }
+          addressNumber += octetValue;
+        }
+      }
+    }
+    return addressNumber;
+  }
+
   public SortedMap<String, LookupResult> lookup(
       SortedSet<String> addressStrings) {
 
@@ -83,28 +89,8 @@ public class LookupService {
 
     /* Obtain a map from relay IP address strings to numbers. */
     Map<String, Long> addressStringNumbers = new HashMap<String, Long>();
-    Pattern ipv4Pattern = Pattern.compile("^[0-9\\.]{7,15}$");
     for (String addressString : addressStrings) {
-      long addressNumber = -1L;
-      if (ipv4Pattern.matcher(addressString).matches()) {
-        String[] parts = addressString.split("\\.", 4);
-        if (parts.length == 4) {
-          addressNumber = 0L;
-          for (int i = 0; i < 4; i++) {
-            addressNumber *= 256L;
-            int octetValue = -1;
-            try {
-              octetValue = Integer.parseInt(parts[i]);
-            } catch (NumberFormatException e) {
-            }
-            if (octetValue < 0 || octetValue > 255) {
-              addressNumber = -1L;
-              break;
-            }
-            addressNumber += octetValue;
-          }
-        }
-      }
+      long addressNumber = this.parseAddressString(addressString);
       if (addressNumber >= 0L) {
         addressStringNumbers.put(addressString, addressNumber);
       }
@@ -113,65 +99,57 @@ public class LookupService {
       return lookupResults;
     }
 
-    /* Obtain a map from IP address numbers to blocks. */
+    /* Obtain a map from IP address numbers to blocks and to latitudes and
+       longitudes. */
     Map<Long, Long> addressNumberBlocks = new HashMap<Long, Long>();
+    Map<Long, String[]> addressNumberLatLong =
+        new HashMap<Long, String[]>();
     try {
       SortedSet<Long> sortedAddressNumbers = new TreeSet<Long>(
           addressStringNumbers.values());
-      long firstAddressNumber = sortedAddressNumbers.first();
       BufferedReader br = new BufferedReader(new InputStreamReader(
-          new FileInputStream(geoLiteCityBlocksCsvFile), "ISO-8859-1"));
-      String line;
-      long previousStartIpNum = -1L;
+          new FileInputStream(geoLite2CityBlocksCsvFile), "ISO-8859-1"));
+      String line = br.readLine();
       while ((line = br.readLine()) != null) {
-        if (!line.startsWith("\"")) {
+        if (!line.startsWith("::ffff:")) {
+          /* TODO Make this less hacky and IPv6-ready at some point. */
           continue;
         }
-        String[] parts = line.replaceAll("\"", "").split(",", 3);
-        if (parts.length != 3) {
+        String[] parts = line.replaceAll("\"", "").split(",", 10);
+        if (parts.length != 10) {
           System.err.println("Illegal line '" + line + "' in "
-              + geoLiteCityBlocksCsvFile.getAbsolutePath() + ".");
+              + geoLite2CityBlocksCsvFile.getAbsolutePath() + ".");
           br.close();
           return lookupResults;
         }
         try {
-          long startIpNum = Long.parseLong(parts[0]);
-          if (startIpNum <= previousStartIpNum) {
-            System.err.println("Line '" + line + "' not sorted in "
-                + geoLiteCityBlocksCsvFile.getAbsolutePath() + ".");
+          String startAddressString = parts[0].substring(7); /* ::ffff: */
+          long startIpNum = this.parseAddressString(startAddressString);
+          int networkMaskLength = Integer.parseInt(parts[1]);
+          if (networkMaskLength < 96 || networkMaskLength > 128) {
+            System.err.println("Illegal network mask in '" + line
+                + "' in " + geoLite2CityBlocksCsvFile.getAbsolutePath()
+                + ".");
             br.close();
             return lookupResults;
           }
-          previousStartIpNum = startIpNum;
-          while (firstAddressNumber < startIpNum &&
-              firstAddressNumber != -1L) {
-            sortedAddressNumbers.remove(firstAddressNumber);
-            if (sortedAddressNumbers.isEmpty()) {
-              firstAddressNumber = -1L;
-            } else {
-              firstAddressNumber = sortedAddressNumbers.first();
-            }
+          long endIpNum = startIpNum + (1 << (128 - networkMaskLength))
+              - 1;
+          for (long addressNumber : sortedAddressNumbers.
+              tailSet(startIpNum).headSet(endIpNum + 1L)) {
+            String blockString = parts[2].length() > 0 ? parts[2] :
+                parts[3];
+            long blockNumber = Long.parseLong(blockString);
+            addressNumberBlocks.put(addressNumber, blockNumber);
+            String latitude = parts[6];
+            String longitude = parts[7];
+            addressNumberLatLong.put(addressNumber,
+                new String[] { latitude, longitude });
           }
-          long endIpNum = Long.parseLong(parts[1]);
-          while (firstAddressNumber <= endIpNum &&
-              firstAddressNumber != -1L) {
-            long blockNumber = Long.parseLong(parts[2]);
-            addressNumberBlocks.put(firstAddressNumber, blockNumber);
-            sortedAddressNumbers.remove(firstAddressNumber);
-            if (sortedAddressNumbers.isEmpty()) {
-              firstAddressNumber = -1L;
-            } else {
-              firstAddressNumber = sortedAddressNumbers.first();
-            }
-          }
-          if (firstAddressNumber == -1L) {
-            break;
-          }
-        }
-        catch (NumberFormatException e) {
+        } catch (NumberFormatException e) {
           System.err.println("Number format exception while parsing line "
               + "'" + line + "' in "
-              + geoLiteCityBlocksCsvFile.getAbsolutePath() + ".");
+              + geoLite2CityBlocksCsvFile.getAbsolutePath() + ".");
           br.close();
           return lookupResults;
         }
@@ -179,7 +157,7 @@ public class LookupService {
       br.close();
     } catch (IOException e) {
       System.err.println("I/O exception while reading "
-          + geoLiteCityBlocksCsvFile.getAbsolutePath() + ".");
+          + geoLite2CityBlocksCsvFile.getAbsolutePath() + ".");
       return lookupResults;
     }
 
@@ -189,16 +167,14 @@ public class LookupService {
       Set<Long> blockNumbers = new HashSet<Long>(
           addressNumberBlocks.values());
       BufferedReader br = new BufferedReader(new InputStreamReader(
-          new FileInputStream(geoLiteCityLocationCsvFile), "ISO-8859-1"));
-      String line;
+          new FileInputStream(geoLite2CityLocationsCsvFile),
+          "ISO-8859-1"));
+      String line = br.readLine();
       while ((line = br.readLine()) != null) {
-        if (line.startsWith("C") || line.startsWith("l")) {
-          continue;
-        }
-        String[] parts = line.replaceAll("\"", "").split(",", 9);
-        if (parts.length != 9) {
+        String[] parts = line.replaceAll("\"", "").split(",", 10);
+        if (parts.length != 10) {
           System.err.println("Illegal line '" + line + "' in "
-              + geoLiteCityLocationCsvFile.getAbsolutePath() + ".");
+              + geoLite2CityLocationsCsvFile.getAbsolutePath() + ".");
           br.close();
           return lookupResults;
         }
@@ -207,66 +183,18 @@ public class LookupService {
           if (blockNumbers.contains(locId)) {
             blockLocations.put(locId, line);
           }
-        }
-        catch (NumberFormatException e) {
+        } catch (NumberFormatException e) {
           System.err.println("Number format exception while parsing line "
               + "'" + line + "' in "
-              + geoLiteCityLocationCsvFile.getAbsolutePath() + ".");
-          br.close();
-          return lookupResults;
-        }
-      }
-      br.close();
-    } catch (IOException e) {
-      System.err.println("I/O exception while reading "
-          + geoLiteCityLocationCsvFile.getAbsolutePath() + ".");
-      return lookupResults;
-    }
-
-    /* Read country names to memory. */
-    Map<String, String> countryNames = new HashMap<String, String>();
-    try {
-      BufferedReader br = new BufferedReader(new InputStreamReader(
-          new FileInputStream(iso3166CsvFile), "ISO-8859-1"));
-      String line;
-      while ((line = br.readLine()) != null) {
-        String[] parts = line.replaceAll("\"", "").split(",", 2);
-        if (parts.length != 2) {
-          System.err.println("Illegal line '" + line + "' in "
-              + iso3166CsvFile.getAbsolutePath() + ".");
-          br.close();
-          return lookupResults;
-        }
-        countryNames.put(parts[0].toLowerCase(), parts[1]);
-      }
-      br.close();
-    } catch (IOException e) {
-      System.err.println("I/O exception while reading "
-          + iso3166CsvFile.getAbsolutePath() + ".");
-      return lookupResults;
-    }
-
-    /* Read region names to memory. */
-    Map<String, String> regionNames = new HashMap<String, String>();
-    try {
-      BufferedReader br = new BufferedReader(new InputStreamReader(
-          new FileInputStream(regionCsvFile), "ISO-8859-1"));
-      String line;
-      while ((line = br.readLine()) != null) {
-        String[] parts = line.replaceAll("\"", "").split(",", 3);
-        if (parts.length != 3) {
-          System.err.println("Illegal line '" + line + "' in "
-              + regionCsvFile.getAbsolutePath() + ".");
+              + geoLite2CityLocationsCsvFile.getAbsolutePath() + ".");
           br.close();
           return lookupResults;
         }
-        regionNames.put(parts[0].toLowerCase() + ","
-            + parts[1].toLowerCase(), parts[2]);
       }
       br.close();
     } catch (IOException e) {
       System.err.println("I/O exception while reading "
-          + regionCsvFile.getAbsolutePath() + ".");
+          + geoLite2CityLocationsCsvFile.getAbsolutePath() + ".");
       return lookupResults;
     }
 
@@ -346,6 +274,7 @@ public class LookupService {
       }
       long addressNumber = addressStringNumbers.get(addressString);
       if (!addressNumberBlocks.containsKey(addressNumber) &&
+          !addressNumberLatLong.containsKey(addressNumber) &&
           !addressNumberASN.containsKey(addressNumber)) {
         continue;
       }
@@ -355,22 +284,23 @@ public class LookupService {
         if (blockLocations.containsKey(blockNumber)) {
           String[] parts = blockLocations.get(blockNumber).
               replaceAll("\"", "").split(",", -1);
-          String countryCode = parts[1].toLowerCase();
-          lookupResult.countryCode = countryCode;
-          if (countryNames.containsKey(countryCode)) {
-            lookupResult.countryName = countryNames.get(countryCode);
+          lookupResult.countryCode = parts[3].toLowerCase();
+          if (parts[4].length() > 0) {
+            lookupResult.countryName = parts[4];
           }
-          String regionCode = countryCode + "," + parts[2].toLowerCase();
-          if (regionNames.containsKey(regionCode)) {
-            lookupResult.regionName = regionNames.get(regionCode);
+          if (parts[6].length() > 0) {
+            lookupResult.regionName = parts[6];
           }
-          if (parts[3].length() > 0) {
-            lookupResult.cityName = parts[3];
+          if (parts[7].length() > 0) {
+            lookupResult.cityName = parts[7];
           }
-          lookupResult.latitude = parts[5];
-          lookupResult.longitude = parts[6];
         }
       }
+      if (addressNumberLatLong.containsKey(addressNumber)) {
+        String[] latLong = addressNumberLatLong.get(addressNumber);
+        lookupResult.latitude = latLong[0];
+        lookupResult.longitude = latLong[1];
+      }
       if (addressNumberASN.containsKey(addressNumber)) {
         String[] parts = addressNumberASN.get(addressNumber).split(" ",
             2);



_______________________________________________
tor-commits mailing list
tor-commits@lists.torproject.org
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits

[tor-commits] [onionoo/master] Switch to using MaxMind's GeoLite2 city database.

Reply via email to