I recently noticed some very slow queries for my sqlite book database. 
Turns out that it happened due to the query planner decided to scan 
sub-queries instead of using an automatic covering index to search them. 

The database contains about 3000 entries and with two subqueries it 
took a noticable time to run, with three subqueries I had to give up and 
abort to save the CPU from overheating. 

Using indexes only take milliseconds in all cases.

Dropping analyze (sqlite_stat1) information from the database made 
the problem go away, i.e. made sqlite use index instead of scanning. 
As did adding a few more columns to the ORDER BY part.

These sql statements should be enough to demonstrate the problem. 
I am using sqlite 3.25.1

**********
create.txt
**********

BEGIN;

CREATE TABLE "Authors" (
    "AuthorID" INTEGER PRIMARY KEY,
    "Last Name" TEXT NOT NULL,
    "First Name" TEXT NOT NULL);

CREATE TABLE "Books" (
    "BookID" INTEGER PRIMARY KEY,
    "Title" TEXT NOT NULL);
 
CREATE TABLE "Genres"(
    "GenreID" INTEGER PRIMARY KEY,
    "Genre" TEXT UNIQUE NOT NULL);

CREATE TABLE "DatesRead"(
    "BookID" INTEGER,
    "Date Read" TEXT,
    PRIMARY KEY("BookID", "Date Read"),
    FOREIGN KEY(BookID) REFERENCES Books(BookID)) WITHOUT ROWID;

CREATE TABLE "AuthorBooks"(
    "BookID" INTEGER,
    "AuthorID" INTEGER,
    PRIMARY KEY("BookID", "AuthorID" ),
    FOREIGN KEY(BookID) REFERENCES Books(BookID),
    FOREIGN KEY(AuthorID) REFERENCES Authors(AuthorID)) WITHOUT ROWID;

CREATE TABLE "BookGenres"(
    "BookID" INTEGER,
    "GenreID" INTEGER,  
    PRIMARY KEY("BookID", "GenreID" ),
    FOREIGN KEY(BookID) REFERENCES Books(BookID),
    FOREIGN KEY(GenreID) REFERENCES Genres(GenreID)) WITHOUT ROWID;

ANALYZE;
INSERT INTO sqlite_stat1 VALUES('BookGenres','BookGenres','3190 2 1');
INSERT INTO sqlite_stat1 VALUES('DatesRead','DatesRead','3047 2 1');
INSERT INTO sqlite_stat1 VALUES('AuthorBooks','AuthorBooks','3549 2 1');
INSERT INTO sqlite_stat1 VALUES('Authors',NULL,'1329');
INSERT INTO sqlite_stat1 VALUES('Books',NULL,'2978');
INSERT INTO sqlite_stat1 VALUES('Genres','sqlite_autoindex_Genres_1','112
1');

COMMIT;

************
queries.txt
************

select "--------------------";
select "order by bookid only";
select "--------------------";

EXPLAIN QUERY PLAN
SELECT BookID,Title,"Author(s)","Date(s)","Genre(s)"
FROM Books
JOIN (SELECT BookID, group_concat(ltrim("First Name"||' '||"Last Name"),',
') AS 'Author(s)' FROM Books JOIN AuthorBooks USING(BookID) JOIN Authors
USING(AuthorID) GROUP BY BookID) USING(BookID)
JOIN (SELECT BookID, group_concat("Date read",', ') AS 'Date(s)' FROM Books
JOIN DatesRead USING(BookID) GROUP BY BookID) USING(BookID)
JOIN (SELECT BookID, group_concat(Genre,', ') AS 'Genre(s)' FROM Books JOIN
BookGenres USING(BookID) JOIN Genres USING(GenreID) GROUP BY BookID)
USING(BookID)
ORDER BY BookID;

select "-------------------------------------------";
select "order by bookid and some other fields too";
select "------------------------------------------";

EXPLAIN QUERY PLAN
SELECT BookID,Title,"Author(s)","Date(s)","Genre(s)"
FROM Books
JOIN (SELECT BookID, group_concat(ltrim("First Name"||' '||"Last Name"),',
') AS 'Author(s)' FROM Books JOIN AuthorBooks USING(BookID) JOIN Authors
USING(AuthorID) GROUP BY BookID) USING(BookID)
JOIN (SELECT BookID, group_concat("Date read",', ') AS 'Date(s)' FROM Books
JOIN DatesRead USING(BookID) GROUP BY BookID) USING(BookID)
JOIN (SELECT BookID, group_concat(Genre,', ') AS 'Genre(s)' FROM Books JOIN
BookGenres USING(BookID) JOIN Genres USING(GenreID) GROUP BY BookID)
USING(BookID)
ORDER BY BookID, Title, "Author(s)", "Date(s)";


************
Test run:
************

del test.db
type create.txt | sqlite3 test.db
type queries.txt | sqlite3 test.db


************
Output:
************

--------------------
order by bookid only
--------------------
QUERY PLAN
|--MATERIALIZE 1
|  |--SCAN TABLE Books
|  |--SEARCH TABLE AuthorBooks USING PRIMARY KEY (BookID=?)
|  `--SEARCH TABLE Authors USING INTEGER PRIMARY KEY (rowid=?)
|--MATERIALIZE 2
|  |--SCAN TABLE Books
|  `--SEARCH TABLE DatesRead USING PRIMARY KEY (BookID=?)
|--MATERIALIZE 3
|  |--SCAN TABLE Books
|  |--SEARCH TABLE BookGenres USING PRIMARY KEY (BookID=?)
|  `--SEARCH TABLE Genres USING INTEGER PRIMARY KEY (rowid=?)
|--SCAN SUBQUERY 1
|--SCAN SUBQUERY 2
|--SCAN SUBQUERY 3
|--SEARCH TABLE Books USING INTEGER PRIMARY KEY (rowid=?)
`--USE TEMP B-TREE FOR ORDER BY
-------------------------------------------
order by bookid and some other fields too
------------------------------------------
QUERY PLAN
|--MATERIALIZE 1
|  |--SCAN TABLE Books
|  |--SEARCH TABLE AuthorBooks USING PRIMARY KEY (BookID=?)
|  `--SEARCH TABLE Authors USING INTEGER PRIMARY KEY (rowid=?)
|--MATERIALIZE 2
|  |--SCAN TABLE Books
|  `--SEARCH TABLE DatesRead USING PRIMARY KEY (BookID=?)
|--MATERIALIZE 3
|  |--SCAN TABLE Books
|  |--SEARCH TABLE BookGenres USING PRIMARY KEY (BookID=?)
|  `--SEARCH TABLE Genres USING INTEGER PRIMARY KEY (rowid=?)
|--SCAN SUBQUERY 1
|--SEARCH TABLE Books USING INTEGER PRIMARY KEY (rowid=?)
|--SEARCH SUBQUERY 2 USING AUTOMATIC COVERING INDEX (BookID=?)
|--SEARCH SUBQUERY 3 USING AUTOMATIC COVERING INDEX (BookID=?)
`--USE TEMP B-TREE FOR ORDER BY


As can be seen the first query (when analyze information is in place) 
will use three nested scans as opposed to the second query with more 
order by columns that instead uses indexes to search.

Now, might this be a weakness in the query planner, or is there some 
precautions I could/should always take to avoid the scanning?






--
Sent from: http://sqlite.1065341.n5.nabble.com/
_______________________________________________
sqlite-users mailing list
sqlite-users@mailinglists.sqlite.org
http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/sqlite-users

Reply via email to