Hi, when using bind variables I get a huge performace drop compared to using a plain string. The query is demonstrated in the attached file "problematic_query.py".
The database used can be downloaded from http://xile.org/le/prepared_statement.zip (1.75 MB) or generated by using the attached file "create_test_db.py". Kind regards, Markus
# -*- coding: utf-8 -*- import time from pysqlite2 import dbapi2 as sqlite print 'pysqlite %s, sqlite %s' % (sqlite.version, sqlite.sqlite_version) x = sqlite.connect('test.db3') c = x.cursor() # get all entries which contain a specific word start = time.time() c.execute("""SELECT * FROM entry, word, word_entry WHERE entry.id = word_entry.entry_id AND word.id = word_entry.word_id AND word.word GLOB 'hui*' """) print '%.3f seconds' % (time.time() - start) # 0.000 seconds start = time.time() c.execute("""SELECT * FROM entry, word, word_entry WHERE entry.id = word_entry.entry_id AND word.id = word_entry.word_id AND word.word GLOB ? """, ('hui*',)) print '%.3f seconds' % (time.time() - start) # 0.297 seconds c.close() x.close()
# -*- coding: utf-8 -*- from random import choice import re from pysqlite2 import dbapi2 as sqlite ##x = sqlite.connect(':memory:') x = sqlite.connect('test.db3') c = x.cursor() def generate_words(): words = [] for count in range(20): words.append(''.join([choice('aeiou' if i%2 else 'bcdfghklmnprstw') for i in range(4)])) return ' '.join(words) # schema c.execute("""CREATE TABLE entry ( id INTEGER PRIMARY KEY, note LONGTEXT NOT NULL )""") c.execute("""CREATE TABLE word ( id INTEGER PRIMARY KEY, word VARCHAR(40) NOT NULL UNIQUE )""") c.execute("""CREATE TABLE word_entry ( id INTEGER PRIMARY KEY, word_id INT NOT NULL CONSTRAINT word_id_exists REFERENCES word(id) , entry_id INT NOT NULL CONSTRAINT entry_id_exists REFERENCES entry(id) )""") c.execute("""CREATE INDEX word_entry_entryIndex ON word_entry (entry_id)""") c.execute("""CREATE UNIQUE INDEX word_entry_wordEntryIndex ON word_entry (word_id, entry_id)""") # fill 'entry' table for count in range(1, 5001): c.execute('INSERT INTO entry (id, note) VALUES (?, ?)', (count, generate_words())) # build index regexp = re.compile('\w+', re.UNICODE) wordDict = {} id = 1 c.execute('SELECT id, note FROM entry') for entry_id, note in c.fetchall(): for word in set(regexp.findall(note.lower())): if len(word) <= 40: if word in wordDict: word_id = wordDict[word] else: word_id = id c.execute('INSERT INTO word (id, word) VALUES (?, ?)', (id, word)) wordDict[word] = id id += 1 c.execute('INSERT INTO word_entry (word_id, entry_id) VALUES (?, ?)', (word_id, entry_id)) c.close() x.commit() x.close()
----------------------------------------------------------------------------- To unsubscribe, send email to [EMAIL PROTECTED] -----------------------------------------------------------------------------