I think I have a performance problem for a simple select with range.
My Tables:
CREATE TABLE locations(
locid INTEGER PRIMARY KEY,
country TEXT,
region TEXT,
city TEXT,
postalCode TEXT,
latitude REAL,
longitude REAL,
dmaCode INTEGER,
areaCode INTEGER)
CREATE TABLE blocks(
startIpNum INTEGER,
endIpNum INTEGER,
locId INTEGER)
My Data:
http://www.maxmind.com/app/geolitecity
Blocks table has 2,776,436 rows
Locations table has 159,488 rows
After inserting the data I run analyze.
My Query:
select * from blocks,locations where locations.locid = blocks.locid AND ? >=
blocks.startIpNum AND ? <= blocks.endIpNum
(replace ? with a number)
Disclaimer:
I'm a bit new to databases.
Performance issues:
I use python's sqlite3 module to run the query.
With this configuration it takes about 0.6 seconds to complete the query. I
think this is too slow. I could write a binary tree myself and have searches
like this take, O(log(num_rows)) which is
7*something_which_shouldnt_take_too_much. Am I wrong? (see the disclaimer)
Anyway, I thought the problem was that startIpNum, endIpNum are not indexed.
So I added indices for them (even tried indexing them both). This only makes
the query take about 3 seconds.
Ideas anyone?
Source:
is attached.
Thank you for your help
--
Dani
http://daniva.googlepages.com
--
Dani
http://daniva.googlepages.com
'''geolite
GeoLite City is a free IP to city database provided by MaxMind.
They provide a C API (and a python wrapper) for the database.
If you can't compile the C sources on your server (or get a binary
version), this script might be helpful for you.
The script puts the geoip data in a sqllite database, and provides
interfaces for updating and searching the database.
To use this script, get the database in CSV format:
http://www.maxmind.com/app/geolitecity
You also need to have python 2.5 for this script (sqlite3 is used)
'''
import sqlite3 as sqlite
import os
def dottedQuadToNum(ip):
"convert decimal dotted quad string to long integer"
hexn = ''.join(["%02X" % long(i) for i in ip.split('.')])
return long(hexn, 16)
def cursorToDict(cursor):
val = cursor.next()
return dict([(cursor.description[i][0],val[i]) for i in xrange(len(cursor.description))])
def test():
import sqlite3
from time import clock
x = sqlite3.connect('geolite.db')
y = x.cursor()
ip = dottedQuadToNum("84.108.189.94")
res = y.execute('select * from blocks,locations where locations.locid = blocks.locid AND ? >= blocks.startIpNum AND ? <= blocks.endIpNum', [ip,ip])
begin = clock()
f = res.next()
end = clock()
y.close()
x.close()
return end-begin, f
def test2():
from time import clock
x = GeoLiteDB()
x.connect();
begin = clock()
x.ipLocation("84.108.189.94");
end = clock()
x.close()
return end - begin
def createDB(dbPath = 'geolite.db', locationsPath='GeoLiteCity-Location.csv', blocksPath='GeoLiteCity-Blocks.csv', warnOnDelete = True):
if os.path.exists(dbPath):
if warnOnDelete:
print "file %s will be deleted. Press any key to continue, or 'n' to abort..." % (os.path.abspath(dbPath))
if getch() == 'n':
print 'aborted.'
return None
os.remove(os.path.abspath(dbPath))
conn = sqlite.connect(dbPath)
cursor = conn.cursor()
try:
cursor.execute('''CREATE TABLE locations(
locid INTEGER PRIMARY KEY,
country TEXT,
region TEXT,
city TEXT,
postalCode TEXT,
latitude REAL,
longitude REAL,
dmaCode INTEGER,
areaCode INTEGER)''')
cursor.execute('''CREATE TABLE blocks(
startIpNum INTEGER,
endIpNum INTEGER,
locId INTEGER)''')
locations = file(locationsPath,'r')
print ('parsing locations. This will a while.')
print locations.readline().strip() #should print copyright note
print locations.readline().strip() #should print column names
lines = ([x.strip('"') for x in line.strip().split(',')] for line in locations.xreadlines())
cursor.executemany('insert into locations values (?,?,?,?,?,?,?,?,?)', lines)
locations.close()
blocks = file(blocksPath,'r')
print ('parsing blocks. This will take longer.')
print blocks.readline().strip() #should print copyright note
print blocks.readline().strip() #should print column names
lines = ([x.strip('"') for x in line.strip().split(',')] for line in blocks.xreadlines())
cursor.executemany('insert into blocks values (?,?,?)', lines)
blocks.close()
# cursor.execute('''CREATE UNIQUE INDEX startIpNumIx ON blocks(startIpNum);''')
# cursor.execute('''CREATE UNIQUE INDEX endIpNumIx ON blocks(endIpNum);''')
conn.commit()
print 'analyze'
cursor.execute('''ANALYZE;''')
numBlocks = cursor.execute('select count(*) from blocks').fetchone()[0]
numLocations = cursor.execute('select count(*) from locations').fetchone()[0]
return numBlocks, numLocations
finally:
cursor.close()
conn.close()
class GeoLiteDB:
def __init__(self, dbPath = 'geolite.db'):
self.dbPath = dbPath
self._conn = None
self._cursor = None
def connect(self):
if self._conn:
raise 'database already opened'
self._conn = sqlite.connect(self.dbPath)
self._cursor = self._conn.cursor()
def close(self):
if not self._conn:
raise 'databse was not opened'
self._cursor.close()
self._conn.close()
def autoConnect(self):
if not self._conn:
self.connect()
def countBlocks(self):
self.autoConnect()
return self._cursor.execute('select count(*) from blocks').next()[0]
def countLocations(self):
self.autoConnect()
return self._cursor.execute('select count(*) from locations').next()[0]
def ipLocation(self, ip):
self.autoConnect()
if isinstance(ip,str):
ip = dottedQuadToNum(ip)
return cursorToDict(self._cursor.execute('select * from blocks,locations where locations.locid = blocks.locid AND ? >= blocks.startIpNum AND ? <= blocks.endIpNum', [ip,ip]))
#cross platform getch, from http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/134892
class _Getch:
"""Gets a single character from standard input. Does not echo to the
screen."""
def __init__(self):
try:
self.impl = _GetchWindows()
except ImportError:
self.impl = _GetchUnix()
def __call__(self): return self.impl()
class _GetchUnix:
def __init__(self):
import tty, sys
def __call__(self):
import sys, tty, termios
fd = sys.stdin.fileno()
old_settings = termios.tcgetattr(fd)
try:
tty.setraw(sys.stdin.fileno())
ch = sys.stdin.read(1)
finally:
termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
return ch
class _GetchWindows:
def __init__(self):
import msvcrt
def __call__(self):
import msvcrt
return msvcrt.getch()
getch = _Getch()
-----------------------------------------------------------------------------
To unsubscribe, send email to [EMAIL PROTECTED]
-----------------------------------------------------------------------------