Hi,

when using bind variables I get a huge performace drop compared to
using a plain string.  The query is demonstrated in the attached file
"problematic_query.py".

The database used can be downloaded from
  http://xile.org/le/prepared_statement.zip (1.75 MB)

or generated by using the attached file "create_test_db.py".

Kind regards,
Markus
# -*- coding: utf-8 -*-

import time
from pysqlite2 import dbapi2 as sqlite
print 'pysqlite %s, sqlite %s' % (sqlite.version, sqlite.sqlite_version)

x = sqlite.connect('test.db3')
c = x.cursor()

# get all entries which contain a specific word

start = time.time()
c.execute("""SELECT * FROM entry, word, word_entry WHERE
 entry.id = word_entry.entry_id AND
 word.id = word_entry.word_id AND
 word.word GLOB 'hui*'
""")
print '%.3f seconds' % (time.time() - start) # 0.000 seconds

start = time.time()
c.execute("""SELECT * FROM entry, word, word_entry WHERE
 entry.id = word_entry.entry_id AND
 word.id = word_entry.word_id AND
 word.word GLOB ?
""", ('hui*',))
print '%.3f seconds' % (time.time() - start) # 0.297 seconds

c.close()
x.close()
# -*- coding: utf-8 -*-

from random import choice
import re
from pysqlite2 import dbapi2 as sqlite

##x = sqlite.connect(':memory:')
x = sqlite.connect('test.db3')
c = x.cursor()

def generate_words():
    words = []
    for count in range(20):
         words.append(''.join([choice('aeiou' if i%2 else 'bcdfghklmnprstw') for i in range(4)]))
    return ' '.join(words)

# schema
c.execute("""CREATE TABLE entry (
    id INTEGER PRIMARY KEY,
    note LONGTEXT NOT NULL
)""")
c.execute("""CREATE TABLE word (
    id INTEGER PRIMARY KEY,
    word VARCHAR(40) NOT NULL UNIQUE
)""")
c.execute("""CREATE TABLE word_entry (
    id INTEGER PRIMARY KEY,
    word_id INT NOT NULL CONSTRAINT word_id_exists REFERENCES word(id) ,
    entry_id INT NOT NULL CONSTRAINT entry_id_exists REFERENCES entry(id)
)""")
c.execute("""CREATE INDEX word_entry_entryIndex ON word_entry (entry_id)""")
c.execute("""CREATE UNIQUE INDEX word_entry_wordEntryIndex ON word_entry (word_id, entry_id)""")

# fill 'entry' table
for count in range(1, 5001):
    c.execute('INSERT INTO entry (id, note) VALUES (?, ?)', (count, generate_words()))

# build index
regexp = re.compile('\w+', re.UNICODE)
wordDict = {}
id = 1
c.execute('SELECT id, note FROM entry')
for entry_id, note in c.fetchall():
    for word in set(regexp.findall(note.lower())):
        if len(word) <= 40:
            if word in wordDict:
                word_id = wordDict[word]
            else:
                word_id = id
                c.execute('INSERT INTO word (id, word) VALUES (?, ?)', (id, word))
                wordDict[word] = id
                id += 1
            c.execute('INSERT INTO word_entry (word_id, entry_id) VALUES (?, ?)', (word_id, entry_id))

c.close()
x.commit()
x.close()
-----------------------------------------------------------------------------
To unsubscribe, send email to [EMAIL PROTECTED]
-----------------------------------------------------------------------------

Reply via email to