A scientific application I work with has clumsy data retrieval options. I dumped the application's output--integer and floating point numbers--into an SQLite database and soon after began to enjoy the power of SQL to pull out interesting results.
The main complaint for making the transfer to SQLite a permanent part of our solution is the time it takes to insert the numbers into a database. It takes about a minute to insert 24 million numbers into three tables. Most database people (including me) would be thrilled to see this kind of insert performance but my colleagues are asking if it can be sped up. The attached C program measures insert performance for populating a table with an integer and three random floating point values with user defined transaction size. Usage is: ./sqlite_insert <number of rows> <transaction size> It writes to the hardcoded database file /tmp/a.db On my Dell Precision 360, Pentium4 3.0 GHz, 1 GB RAM, IDE disk drive, ext3, RHEL v3, Linux kernel 2.4.21, I peak out around 121,000 inserts/second using a transaction size of 20,000: ./sqlite_insert 100000 20000 100000 inserts to /tmp/a.db in 0.829 s = 120626.53 inserts/s Performance drops a bit when I increase the number of rows to two million (a typical size for my application): ./sqlite_insert 2000000 50000 2000000 inserts to /tmp/a.db in 17.124 s = 116795.07 inserts/s What kind of insert performance do you see on your machine? If it is substantially better than 120 kinserts/s what kind of hardware do you have? I'm especially interested in how much faster the code runs on systems with multiple disks in a RAID 0 configuration. Are there other tricks to speeding insert performance? Is it even reasonable to ask for more? -- Al
/* [EMAIL PROTECTED] # sample build: gcc -o sqlite_insert sqlite_insert.c \ -L/usr/local/sqlite-3.2.1/lib -lsqlite3 -I/usr/local/sqlite-3.2.1/include # sample run: ./sqlite_insert 100000 50000 */ #include <sqlite3.h> #include <stdlib.h> /* RAND_MAX */ #include <sys/time.h> /* timeval, gettimeofday() */ int sql_begin(sqlite3 *db) { /* {{{1 */ char *errmsg; if (sqlite3_exec(db, "BEGIN TRANSACTION", NULL, NULL, &errmsg) != SQLITE_OK) { printf("couldn't begin transaction: %s\n", errmsg); return 0; } else { return 1; } } /* 1}}} */ int sql_commit(sqlite3 *db) { /* {{{1 */ char *errmsg; if (sqlite3_exec(db, "COMMIT TRANSACTION", NULL, NULL, &errmsg) != SQLITE_OK) { printf("couldn't commit transaction: %s\n", errmsg); return 0; } else { return 1; } } /* 1}}} */ float elapsed(struct timeval start, struct timeval end) { /* {{{1 */ return (float) (end.tv_sec - start.tv_sec ) + ((float) (end.tv_usec - start.tv_usec)/1000000); } /* 1}}} */ int main(int argc, char *argv[]) { const char *zLeftover; #define CMD_SIZE 1000 char rm_command[CMD_SIZE], *errmsg, *dbfile = "/tmp/a.db"; /* *dbfile = ":memory:" is faster, but not of interest */ int rc, i, N, xact_size, n_this_xact = 0; double x, y, z; float delta_T; struct timeval start_time, end_time; sqlite3 *db; sqlite3_stmt *Stmt; if (argc < 3) { printf("\nUsage: %s <N> <X>\n\n", argv[0]); printf("\tInsert <N> rows into a table of an SQLite database\n"); printf("\tusing transaction sizes of <X>.\n"); printf("\tThe table has four columns of numeric data:\n"); printf("\t field_1 integer\n"); printf("\t field_2 float\n"); printf("\t field_3 float\n"); printf("\t field_4 float\n"); printf("\tThe integer field will have values 1..<N> while the\n"); printf("\tdouble precision values are random on [-50.0, 50.0]\n"); exit(0); } N = atoi(argv[1]); xact_size = atoi(argv[2]); snprintf(rm_command, CMD_SIZE-1, "rm -f %s", dbfile); system(rm_command); /* the database file must not exist before calling sqlite3_open() and trying to insert */ gettimeofday(&start_time, 0); rc = sqlite3_open(dbfile, &db); sql_begin(db); rc = sqlite3_prepare(db, "create table table_name(field_1 integer primary key," "field_2 float, " "field_3 float, " "field_4 float)", -1, &Stmt, &zLeftover); rc = sqlite3_step(Stmt); rc = sqlite3_finalize(Stmt); rc = sqlite3_prepare(db, "insert into table_name values(?,?,?,?)", -1, &Stmt, &zLeftover); for (i = 0; i < N; i++) { x = 50.0 - (100.0*rand()/(RAND_MAX+1.0)); y = 50.0 - (100.0*rand()/(RAND_MAX+1.0)); z = 50.0 - (100.0*rand()/(RAND_MAX+1.0)); rc = sqlite3_bind_int( Stmt, 1, i); rc = sqlite3_bind_double(Stmt, 2, x); rc = sqlite3_bind_double(Stmt, 3, y); rc = sqlite3_bind_double(Stmt, 4, z); rc = sqlite3_step(Stmt); rc = sqlite3_reset(Stmt); ++n_this_xact; if (!(n_this_xact % xact_size)) { /* have done xact_size inserts, finish this * transaction and start a new one */ sql_commit(db); sql_begin( db); n_this_xact = 0; } } sql_commit(db); sqlite3_close(db); gettimeofday(&end_time, 0); delta_T = elapsed(start_time, end_time); printf(" %d inserts to %s in %.3f s = %.2f inserts/s\n", N, dbfile, delta_T, N/delta_T); return 0; }