I modified my csvimport to allow for quoted fields.  Still automaticallhy 
detects the # of columns.
If quoted fields also contain internal quotes they must be escaped by doubling 
them.
e.g.
col1,"this is, a ""quoted"" field",col3
It's not fully RFC4180 compliant as it doesn't allow for CF/LF inside quotes -- 
does anybody really need/use that?

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "sqlite3.h"
#define BUFSIZE 1000000
#define MOD 100000
#define MAXTOK 100000
char *nextToken(char *line) {
  static char token[MAXTOK];
  static char *p;
  int n=0;
  if (line) {
    p = line;
  }
  while(*p && *p != ',' && *p!='\r' && *p!= '\n') {
    if (*p == '"') { // quoted field
      ++p; // move past first quote
      while((*p && *p!='"') || *(p+1)=='"') {
        if (*p == '"' && *(p+1)=='"') { // escaped quote?
          token[n++]='"';
          p+=2;
          continue;
        }
        token[n++] = *p;
        ++p;
      }
      token[n++]=0;
      ++p;
      if (*p == ',') ++p;
      return token;
    }
    token[n++]=*p;
    ++p;
  }
  if (n > MAXTOK) {
    fprintf(stderr,"Token too big??\n");
    exit(1);
  }
  token[n]=0;
  ++p;
  if (*p == ',') ++p;
  if (n > 0) {
    return token;
  }
  return NULL;
}
// Add comma delimited file to exisiting database/table
// Quoted strings are accepted
int main(int argc, char *argv[]) {
  sqlite3 *db;
  sqlite3_stmt *stmt;
  int rc;
  int ncol=0;
  int nline=0;
  char *buf=malloc(BUFSIZE);
  char sql[8192];
  FILE *fp;
  char *filename;
  char *databasename;
  char *tablename;
  if (argc != 4) {
    fprintf(stderr,"Usage: %s filename databasename tablename\n",argv[0]);
    exit(1);
  }
  filename = argv[1];
  databasename = argv[2];
  tablename = argv[3];
  rc = sqlite3_open_v2(databasename,&db,SQLITE_OPEN_READWRITE,NULL);
  if (rc) {
    fprintf(stderr,"Error opening database '%s': 
%s\n",databasename,sqlite3_errmsg(db));
    exit(1);
  }
  sprintf(sql,"insert into %s values (",tablename);
  fp=fopen(filename,"r");
  if (fp == NULL) {
    perror(filename);
    exit(1);
  }
  buf[BUFSIZE-1] = '*';
  fgets(buf,BUFSIZE,fp);
  if (buf[BUFSIZE-1] != '*') {
    fprintf(stderr,"BUFSIZE not big enough...aborting\n");
    exit(1);
  }
  // count the columns
  char *p=nextToken(buf);
  ncol=0;
  while(p) {
    ++ncol;
    strcat(sql,ncol==1?"":",");
    strcat(sql,"?");
    p=nextToken(NULL);
  }
  printf("%d columns detected\n",ncol);
  strcat(sql,")");
  rewind(fp);
  // Let's wrap things in a transaction
  rc = sqlite3_exec(db,"BEGIN",NULL,NULL,NULL);
  if (rc) {
    fprintf(stderr,"BEGIN failed on '%s': %s\n",sql,sqlite3_errmsg(db));
    exit(1);
  }
  // prepare our statement
  rc = sqlite3_prepare(db,sql,strlen(sql),&stmt,NULL);
  if (rc) {
    fprintf(stderr,"Prepare failed on '%s': %s\n",sql,sqlite3_errmsg(db));
    exit(1);
  }
  // Loop over file file
  while(fgets(buf,BUFSIZE,fp)) {
    char *p=nextToken(buf);
    int i=1;
    ++nline;
    if ((nline % MOD)==0) {
      printf("%d\r",nline);
      fflush(stdout);
    }
    while(p) { // bind the columns as text, table will take care of conversion 
to column types
      rc=sqlite3_bind_text(stmt,i,p,-1,SQLITE_TRANSIENT);
      if (rc) {
        fprintf(stderr,"bind_text failed on '%s': %s\n",sql,sqlite3_errmsg(db));
        exit(1);
      }
      ++i;
      p=nextToken(NULL);
    }
    if (--i != ncol) {
      fprintf(stderr,"expected %d cols, got %d cols on line#%d\n",ncol,i,nline);
    } else {
      rc = sqlite3_step(stmt);
      if (rc != SQLITE_DONE) {
        fprintf(stderr,"Insert failed on '%s': %s\n",sql,sqlite3_errmsg(db));
        exit(1);
      }
      rc = sqlite3_reset(stmt);
      if (rc) {
        fprintf(stderr,"Reset failed on '%s': %s\n",sql,sqlite3_errmsg(db));
        exit(1);
      }
    }
  }
  rc=sqlite3_finalize(stmt);
  if (rc) {
    fprintf(stderr,"Finalize failed: %s\n",sqlite3_errmsg(db));
    exit(1);
  }
  printf("%d inserts, committing...\n",nline);
  rc = sqlite3_exec(db,"COMMIT",NULL,NULL,NULL);
  if (rc) {
    fprintf(stderr,"COMMIT failed on '%s': %s\n",sql,sqlite3_errmsg(db));
    exit(1);
  }
  rc=sqlite3_close(db);
  if (rc) {
    fprintf(stderr,"Close failed on '%s': %s\n",argv[2],sqlite3_errmsg(db));
    exit(1);
  }
  fclose(fp);
  return 0;
}




Michael D. Black
Senior Scientist
Advanced Analytics Directorate
Advanced GEOINT Solutions Operating Unit
Northrop Grumman Information Systems



From: sqlite-users-boun...@sqlite.org [sqlite-users-boun...@sqlite.org] on 
behalf of Oliver Schneider [oli...@f-prot.com]
Sent: Friday, May 04, 2012 12:40 PM
To: sqlite-users@sqlite.org
Subject: EXT :Re: [sqlite] is SQLite the right tool to analyze a 44GB file


On 2012-05-01 20:41, Baruch Burstein wrote:
> It is already wrapped in a transaction.
> I seem to remember seeing somewhere that the .import command doesn't
> understand escaping, e.g.
>
>     "one","two,three"
>
> will get imported as
>
>     "one" | "two | three"
>
> (the quotes are part of the data, and the second column was split into two
> by the comma)
> Just a point to be aware of.
That could be mitigated by writing a small script that "converts" the
CSV contents line-wise to SQL-statements, right?


// Oliver

PS: re-sending after the mailing lists chokes on my message with S/MIME
signature.
_______________________________________________
sqlite-users mailing list
sqlite-users@sqlite.org
http://sqlite.org:8080/cgi-bin/mailman/listinfo/sqlite-users
_______________________________________________
sqlite-users mailing list
sqlite-users@sqlite.org
http://sqlite.org:8080/cgi-bin/mailman/listinfo/sqlite-users

Reply via email to