A scientific application I work with has clumsy data retrieval
options.  I dumped the application's output--integer and floating
point numbers--into an SQLite database and soon after began to
enjoy the power of SQL to pull out interesting results.

The main complaint for making the transfer to SQLite a permanent
part of our solution is the time it takes to insert the numbers
into a database.  It takes about a minute to insert 24 million
numbers into three tables.  Most database people (including me)
would be thrilled to see this kind of insert performance but
my colleagues are asking if it can be sped up.

The attached C program measures insert performance for populating
a table with an integer and three random floating point values with
user defined transaction size.  Usage is:

    ./sqlite_insert  <number of rows>   <transaction size>

It writes to the hardcoded database file /tmp/a.db
On my Dell Precision 360, Pentium4 3.0 GHz, 1 GB RAM, IDE disk
drive, ext3, RHEL v3, Linux kernel 2.4.21, I peak out around 121,000
inserts/second using a transaction size of 20,000:
    ./sqlite_insert 100000 20000
    100000 inserts to /tmp/a.db in 0.829 s = 120626.53 inserts/s

Performance drops a bit when I increase the number of rows to
two million (a typical size for my application):

    ./sqlite_insert 2000000 50000
    2000000 inserts to /tmp/a.db in 17.124 s = 116795.07 inserts/s

What kind of insert performance do you see on your machine?  If it
is substantially better than 120 kinserts/s what kind of hardware
do you have?  I'm especially interested in how much faster the
code runs on systems with multiple disks in a RAID 0 configuration.
Are there other tricks to speeding insert performance?  Is it even
reasonable to ask for more?     -- Al
/*
  [EMAIL PROTECTED]

  # sample build:
  gcc -o sqlite_insert sqlite_insert.c \
      -L/usr/local/sqlite-3.2.1/lib -lsqlite3 -I/usr/local/sqlite-3.2.1/include

  # sample run:
  ./sqlite_insert 100000 50000

*/
                                                                                
#include <sqlite3.h>
#include <stdlib.h>    /* RAND_MAX */
#include <sys/time.h>  /* timeval, gettimeofday() */
                                                                                
int sql_begin(sqlite3 *db) { /* {{{1 */
    char *errmsg;
    if (sqlite3_exec(db, "BEGIN TRANSACTION",
                     NULL, NULL, &errmsg) != SQLITE_OK) {
        printf("couldn't begin transaction:  %s\n", errmsg);
        return 0;
    } else {
        return 1;
    }
} /* 1}}} */
int sql_commit(sqlite3 *db) { /* {{{1 */
    char *errmsg;
    if (sqlite3_exec(db, "COMMIT TRANSACTION",
                     NULL, NULL, &errmsg) != SQLITE_OK) {
        printf("couldn't commit transaction:  %s\n", errmsg);
        return 0;
    } else {
        return 1;
    }
} /* 1}}} */
float elapsed(struct timeval start, struct timeval end) { /* {{{1 */
    return  (float) (end.tv_sec  - start.tv_sec ) +
           ((float) (end.tv_usec - start.tv_usec)/1000000);
} /* 1}}} */
                                                                                
int main(int argc, char *argv[]) {
    const char *zLeftover;
#define CMD_SIZE 1000
    char  rm_command[CMD_SIZE],
         *errmsg, *dbfile = "/tmp/a.db"; 
               /* *dbfile = ":memory:" is faster, but not of interest */
    int    rc, i, N, xact_size, n_this_xact = 0;
    double x, y, z;
    float  delta_T;
    struct timeval start_time, end_time;
    sqlite3      *db;
    sqlite3_stmt *Stmt;
                                                                                
    if (argc < 3) {
        printf("\nUsage:   %s  <N>  <X>\n\n", argv[0]);
        printf("\tInsert <N> rows into a table of an SQLite database\n");
        printf("\tusing transaction sizes of <X>.\n");
        printf("\tThe table has four columns of numeric data:\n";);
        printf("\t  field_1 integer\n");
        printf("\t  field_2 float\n");
        printf("\t  field_3 float\n");
        printf("\t  field_4 float\n");
        printf("\tThe integer field will have values 1..<N> while the\n");
        printf("\tdouble precision values are random on [-50.0, 50.0]\n");
        exit(0);
    }
    N         = atoi(argv[1]);
    xact_size = atoi(argv[2]);
    snprintf(rm_command, CMD_SIZE-1, "rm -f %s", dbfile);
    system(rm_command);  /* the database file must not exist before
                            calling sqlite3_open() and trying to insert */
                                                                                
    gettimeofday(&start_time, 0);
                                                                                
    rc = sqlite3_open(dbfile, &db);
                                                                                
    sql_begin(db);
    rc = sqlite3_prepare(db, "create table table_name(field_1 integer primary key,"
                                                     "field_2 float,  "
                                                     "field_3 float,  "
                                                     "field_4 float)",
                         -1, &Stmt, &zLeftover);
    rc = sqlite3_step(Stmt);
    rc = sqlite3_finalize(Stmt);
                                                                                
    rc = sqlite3_prepare(db, "insert into table_name values(?,?,?,?)",
                         -1, &Stmt, &zLeftover);
                                                                                
    for (i = 0; i < N; i++) {
        x  = 50.0 - (100.0*rand()/(RAND_MAX+1.0));
        y  = 50.0 - (100.0*rand()/(RAND_MAX+1.0));
        z  = 50.0 - (100.0*rand()/(RAND_MAX+1.0));
        rc = sqlite3_bind_int(   Stmt, 1, i);
        rc = sqlite3_bind_double(Stmt, 2, x);
        rc = sqlite3_bind_double(Stmt, 3, y);
        rc = sqlite3_bind_double(Stmt, 4, z);
        rc = sqlite3_step(Stmt);
        rc = sqlite3_reset(Stmt);
        ++n_this_xact;
        if (!(n_this_xact % xact_size)) {
            /* have done xact_size inserts, finish this
             * transaction and start a new one */
            sql_commit(db);
            sql_begin( db);
            n_this_xact = 0;
        }
    }
    sql_commit(db);
    sqlite3_close(db);

    gettimeofday(&end_time, 0);
    delta_T = elapsed(start_time, end_time);
    printf(" %d inserts to %s in %.3f s = %.2f inserts/s\n",
           N, dbfile, delta_T, N/delta_T);

    return 0;
}

Reply via email to