I modified my csvimport to allow for quoted fields. Still automaticallhy detects the # of columns. If quoted fields also contain internal quotes they must be escaped by doubling them. e.g. col1,"this is, a ""quoted"" field",col3 It's not fully RFC4180 compliant as it doesn't allow for CF/LF inside quotes -- does anybody really need/use that?
#include <stdio.h> #include <string.h> #include <stdlib.h> #include "sqlite3.h" #define BUFSIZE 1000000 #define MOD 100000 #define MAXTOK 100000 char *nextToken(char *line) { static char token[MAXTOK]; static char *p; int n=0; if (line) { p = line; } while(*p && *p != ',' && *p!='\r' && *p!= '\n') { if (*p == '"') { // quoted field ++p; // move past first quote while((*p && *p!='"') || *(p+1)=='"') { if (*p == '"' && *(p+1)=='"') { // escaped quote? token[n++]='"'; p+=2; continue; } token[n++] = *p; ++p; } token[n++]=0; ++p; if (*p == ',') ++p; return token; } token[n++]=*p; ++p; } if (n > MAXTOK) { fprintf(stderr,"Token too big??\n"); exit(1); } token[n]=0; ++p; if (*p == ',') ++p; if (n > 0) { return token; } return NULL; } // Add comma delimited file to exisiting database/table // Quoted strings are accepted int main(int argc, char *argv[]) { sqlite3 *db; sqlite3_stmt *stmt; int rc; int ncol=0; int nline=0; char *buf=malloc(BUFSIZE); char sql[8192]; FILE *fp; char *filename; char *databasename; char *tablename; if (argc != 4) { fprintf(stderr,"Usage: %s filename databasename tablename\n",argv[0]); exit(1); } filename = argv[1]; databasename = argv[2]; tablename = argv[3]; rc = sqlite3_open_v2(databasename,&db,SQLITE_OPEN_READWRITE,NULL); if (rc) { fprintf(stderr,"Error opening database '%s': %s\n",databasename,sqlite3_errmsg(db)); exit(1); } sprintf(sql,"insert into %s values (",tablename); fp=fopen(filename,"r"); if (fp == NULL) { perror(filename); exit(1); } buf[BUFSIZE-1] = '*'; fgets(buf,BUFSIZE,fp); if (buf[BUFSIZE-1] != '*') { fprintf(stderr,"BUFSIZE not big enough...aborting\n"); exit(1); } // count the columns char *p=nextToken(buf); ncol=0; while(p) { ++ncol; strcat(sql,ncol==1?"":","); strcat(sql,"?"); p=nextToken(NULL); } printf("%d columns detected\n",ncol); strcat(sql,")"); rewind(fp); // Let's wrap things in a transaction rc = sqlite3_exec(db,"BEGIN",NULL,NULL,NULL); if (rc) { fprintf(stderr,"BEGIN failed on '%s': %s\n",sql,sqlite3_errmsg(db)); exit(1); } // prepare our statement rc = sqlite3_prepare(db,sql,strlen(sql),&stmt,NULL); if (rc) { fprintf(stderr,"Prepare failed on '%s': %s\n",sql,sqlite3_errmsg(db)); exit(1); } // Loop over file file while(fgets(buf,BUFSIZE,fp)) { char *p=nextToken(buf); int i=1; ++nline; if ((nline % MOD)==0) { printf("%d\r",nline); fflush(stdout); } while(p) { // bind the columns as text, table will take care of conversion to column types rc=sqlite3_bind_text(stmt,i,p,-1,SQLITE_TRANSIENT); if (rc) { fprintf(stderr,"bind_text failed on '%s': %s\n",sql,sqlite3_errmsg(db)); exit(1); } ++i; p=nextToken(NULL); } if (--i != ncol) { fprintf(stderr,"expected %d cols, got %d cols on line#%d\n",ncol,i,nline); } else { rc = sqlite3_step(stmt); if (rc != SQLITE_DONE) { fprintf(stderr,"Insert failed on '%s': %s\n",sql,sqlite3_errmsg(db)); exit(1); } rc = sqlite3_reset(stmt); if (rc) { fprintf(stderr,"Reset failed on '%s': %s\n",sql,sqlite3_errmsg(db)); exit(1); } } } rc=sqlite3_finalize(stmt); if (rc) { fprintf(stderr,"Finalize failed: %s\n",sqlite3_errmsg(db)); exit(1); } printf("%d inserts, committing...\n",nline); rc = sqlite3_exec(db,"COMMIT",NULL,NULL,NULL); if (rc) { fprintf(stderr,"COMMIT failed on '%s': %s\n",sql,sqlite3_errmsg(db)); exit(1); } rc=sqlite3_close(db); if (rc) { fprintf(stderr,"Close failed on '%s': %s\n",argv[2],sqlite3_errmsg(db)); exit(1); } fclose(fp); return 0; } Michael D. Black Senior Scientist Advanced Analytics Directorate Advanced GEOINT Solutions Operating Unit Northrop Grumman Information Systems From: sqlite-users-boun...@sqlite.org [sqlite-users-boun...@sqlite.org] on behalf of Oliver Schneider [oli...@f-prot.com] Sent: Friday, May 04, 2012 12:40 PM To: sqlite-users@sqlite.org Subject: EXT :Re: [sqlite] is SQLite the right tool to analyze a 44GB file On 2012-05-01 20:41, Baruch Burstein wrote: > It is already wrapped in a transaction. > I seem to remember seeing somewhere that the .import command doesn't > understand escaping, e.g. > > "one","two,three" > > will get imported as > > "one" | "two | three" > > (the quotes are part of the data, and the second column was split into two > by the comma) > Just a point to be aware of. That could be mitigated by writing a small script that "converts" the CSV contents line-wise to SQL-statements, right? // Oliver PS: re-sending after the mailing lists chokes on my message with S/MIME signature. _______________________________________________ sqlite-users mailing list sqlite-users@sqlite.org http://sqlite.org:8080/cgi-bin/mailman/listinfo/sqlite-users _______________________________________________ sqlite-users mailing list sqlite-users@sqlite.org http://sqlite.org:8080/cgi-bin/mailman/listinfo/sqlite-users