Hi Michael, May I make a request on behalf of the C++ challenged crowd?
Is it possible to fuse your contribution with the existing capabilities of the ".import" command so that it can be activated via syntax similar to - "RECORDS DELIMITED BY... FIELDS TERMINATED BY... OPTIONALLY ENCLOSED BY..." It would make ".import" so much more powerful. Thanks ! On Sun, May 6, 2012 at 5:59 AM, Black, Michael (IS) <michael.bla...@ngc.com>wrote: > I modified my csvimport to allow for quoted fields. Still automaticallhy > detects the # of columns. > If quoted fields also contain internal quotes they must be escaped by > doubling them. > e.g. > col1,"this is, a ""quoted"" field",col3 > It's not fully RFC4180 compliant as it doesn't allow for CF/LF inside > quotes -- does anybody really need/use that? > > #include <stdio.h> > #include <string.h> > #include <stdlib.h> > #include "sqlite3.h" > #define BUFSIZE 1000000 > #define MOD 100000 > #define MAXTOK 100000 > char *nextToken(char *line) { > static char token[MAXTOK]; > static char *p; > int n=0; > if (line) { > p = line; > } > while(*p && *p != ',' && *p!='\r' && *p!= '\n') { > if (*p == '"') { // quoted field > ++p; // move past first quote > while((*p && *p!='"') || *(p+1)=='"') { > if (*p == '"' && *(p+1)=='"') { // escaped quote? > token[n++]='"'; > p+=2; > continue; > } > token[n++] = *p; > ++p; > } > token[n++]=0; > ++p; > if (*p == ',') ++p; > return token; > } > token[n++]=*p; > ++p; > } > if (n > MAXTOK) { > fprintf(stderr,"Token too big??\n"); > exit(1); > } > token[n]=0; > ++p; > if (*p == ',') ++p; > if (n > 0) { > return token; > } > return NULL; > } > // Add comma delimited file to exisiting database/table > // Quoted strings are accepted > int main(int argc, char *argv[]) { > sqlite3 *db; > sqlite3_stmt *stmt; > int rc; > int ncol=0; > int nline=0; > char *buf=malloc(BUFSIZE); > char sql[8192]; > FILE *fp; > char *filename; > char *databasename; > char *tablename; > if (argc != 4) { > fprintf(stderr,"Usage: %s filename databasename tablename\n",argv[0]); > exit(1); > } > filename = argv[1]; > databasename = argv[2]; > tablename = argv[3]; > rc = sqlite3_open_v2(databasename,&db,SQLITE_OPEN_READWRITE,NULL); > if (rc) { > fprintf(stderr,"Error opening database '%s': > %s\n",databasename,sqlite3_errmsg(db)); > exit(1); > } > sprintf(sql,"insert into %s values (",tablename); > fp=fopen(filename,"r"); > if (fp == NULL) { > perror(filename); > exit(1); > } > buf[BUFSIZE-1] = '*'; > fgets(buf,BUFSIZE,fp); > if (buf[BUFSIZE-1] != '*') { > fprintf(stderr,"BUFSIZE not big enough...aborting\n"); > exit(1); > } > // count the columns > char *p=nextToken(buf); > ncol=0; > while(p) { > ++ncol; > strcat(sql,ncol==1?"":","); > strcat(sql,"?"); > p=nextToken(NULL); > } > printf("%d columns detected\n",ncol); > strcat(sql,")"); > rewind(fp); > // Let's wrap things in a transaction > rc = sqlite3_exec(db,"BEGIN",NULL,NULL,NULL); > if (rc) { > fprintf(stderr,"BEGIN failed on '%s': %s\n",sql,sqlite3_errmsg(db)); > exit(1); > } > // prepare our statement > rc = sqlite3_prepare(db,sql,strlen(sql),&stmt,NULL); > if (rc) { > fprintf(stderr,"Prepare failed on '%s': %s\n",sql,sqlite3_errmsg(db)); > exit(1); > } > // Loop over file file > while(fgets(buf,BUFSIZE,fp)) { > char *p=nextToken(buf); > int i=1; > ++nline; > if ((nline % MOD)==0) { > printf("%d\r",nline); > fflush(stdout); > } > while(p) { // bind the columns as text, table will take care of > conversion to column types > rc=sqlite3_bind_text(stmt,i,p,-1,SQLITE_TRANSIENT); > if (rc) { > fprintf(stderr,"bind_text failed on '%s': > %s\n",sql,sqlite3_errmsg(db)); > exit(1); > } > ++i; > p=nextToken(NULL); > } > if (--i != ncol) { > fprintf(stderr,"expected %d cols, got %d cols on > line#%d\n",ncol,i,nline); > } else { > rc = sqlite3_step(stmt); > if (rc != SQLITE_DONE) { > fprintf(stderr,"Insert failed on '%s': > %s\n",sql,sqlite3_errmsg(db)); > exit(1); > } > rc = sqlite3_reset(stmt); > if (rc) { > fprintf(stderr,"Reset failed on '%s': %s\n",sql,sqlite3_errmsg(db)); > exit(1); > } > } > } > rc=sqlite3_finalize(stmt); > if (rc) { > fprintf(stderr,"Finalize failed: %s\n",sqlite3_errmsg(db)); > exit(1); > } > printf("%d inserts, committing...\n",nline); > rc = sqlite3_exec(db,"COMMIT",NULL,NULL,NULL); > if (rc) { > fprintf(stderr,"COMMIT failed on '%s': %s\n",sql,sqlite3_errmsg(db)); > exit(1); > } > rc=sqlite3_close(db); > if (rc) { > fprintf(stderr,"Close failed on '%s': %s\n",argv[2],sqlite3_errmsg(db)); > exit(1); > } > fclose(fp); > return 0; > } > > > > > Michael D. Black > Senior Scientist > Advanced Analytics Directorate > Advanced GEOINT Solutions Operating Unit > Northrop Grumman Information Systems > > > > From: sqlite-users-boun...@sqlite.org [sqlite-users-boun...@sqlite.org] > on behalf of Oliver Schneider [oli...@f-prot.com] > Sent: Friday, May 04, 2012 12:40 PM > To: sqlite-users@sqlite.org > Subject: EXT :Re: [sqlite] is SQLite the right tool to analyze a 44GB file > > > On 2012-05-01 20:41, Baruch Burstein wrote: > > It is already wrapped in a transaction. > > I seem to remember seeing somewhere that the .import command doesn't > > understand escaping, e.g. > > > > "one","two,three" > > > > will get imported as > > > > "one" | "two | three" > > > > (the quotes are part of the data, and the second column was split into > two > > by the comma) > > Just a point to be aware of. > That could be mitigated by writing a small script that "converts" the > CSV contents line-wise to SQL-statements, right? > > > // Oliver > > PS: re-sending after the mailing lists chokes on my message with S/MIME > signature. > _______________________________________________ > sqlite-users mailing list > sqlite-users@sqlite.org > http://sqlite.org:8080/cgi-bin/mailman/listinfo/sqlite-users > _______________________________________________ > sqlite-users mailing list > sqlite-users@sqlite.org > http://sqlite.org:8080/cgi-bin/mailman/listinfo/sqlite-users > _______________________________________________ sqlite-users mailing list sqlite-users@sqlite.org http://sqlite.org:8080/cgi-bin/mailman/listinfo/sqlite-users