I updated my csvimport utility to allow hex fields.
So hex fields like X'01020304' will get imported as blobs if the option is 
enabled.

Sooo....

test.csv:
X'000106350000000000',X'00000000000000'
X'000106350000000001',X'00000000000001'

csvimport test.csv test.db t
csvimport -x test.csv test.db t

Here's a dump of test.db after both command are run...you can see the first two 
are strings and the last two are blobs.
CREATE TABLE t(a blob,b blob);
INSERT INTO "t" VALUES('X''000106350000000000''','X''00000000000000''');
INSERT INTO "t" VALUES('X''000106350000000001''','X''00000000000001''');
INSERT INTO "t" VALUES(X'000106350000000000',X'00000000000000');
INSERT INTO "t" VALUES(X'000106350000000001',X'00000000000001');



// csvimport -- At least somewhat RFC4180 compliant
// quoted fields can span multiple lines
// quotes can be inserted by escaping with another quote
// will parse fields like X'01020304' as blobs
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "sqlite3.h"

#define BUFSIZE 1000000
#define MOD 100000
#define MAXTOKEN 100000

int nline=0;
int ncol;
char buf[BUFSIZE];
int peek;
char delim=',';
char quote='"';
char eol='\n';
int hexFlag=0;

int nextChar(FILE *fp) {
  static char *ptr=buf;
  if (*ptr == 0) {
    if(fgets(buf,BUFSIZE,fp)==NULL) {
      return EOF;
    }
    ++nline;
    ptr=buf;
  }
  peek = *(ptr+1);
  return *ptr++;
}

int countColumns(FILE *fp) {
  // 1st line(s) of the file determine how many columns we have
  int p;
  int ncol = 0;
  while((p=nextChar(fp))!=EOF) {
    if (p == quote) { // start quote
      p=nextChar(fp);
      if (p==quote) continue; // escaped quote
      while((p=nextChar(fp)) && p!=quote); // skip to next quote
    }
    if (p == delim) {
      ++ncol;
    }
    if (p == eol) {
      break;
    }
  }
  return ++ncol;
}

char *nextToken(FILE *fp) {
  // 1st line(s) of the file determine how many columns we have
  int p;
  int n=0;
  static char *token;
  int inquote=0;
  if (token == NULL) {
    token=malloc(MAXTOKEN);
    if (token==NULL) {
      fprintf(stderr,"malloc error: %%m\n");
      exit(1);
    }
  }
  token[0]=0;
  while((p=nextChar(fp))!=EOF) {
    if (p == quote) { // start quote
      p=nextChar(fp);
      if (p==quote) { // escaped quote
        token[n++]=p;
        token[n]=0;
        continue;
      }
      token[n++]=p;
      token[n]=0;
      inquote = 1;
      while(inquote) {
        p=nextChar(fp);
        if (p==quote && peek==quote) {
          p=nextChar(fp); // skip on escaped quote
          token[n++]=p;
          token[n]=0;
          continue;
        }
        if (p==quote) {
          inquote=0;
        } else {
          token[n++]=p;
          token[n]=0;
        }
      }
      continue;
    }
    if (p == delim) {
      token[n]=0;
      return token;
    }
    if (p == eol) {
      break;
    }
    token[n++]=p;
    token[n]=0;
  }
  if (feof(fp) && n==0) {
    return NULL;
  }
  token[n]=0;
  return token;
}

void checkrc(sqlite3 *db,int rc,int checkrc,int flag,char *msg,char *str) {
  if (rc != checkrc) {
    fprintf(stderr,"\nLine#%d: ",nline);
    fprintf(stderr,msg,str);
    fprintf(stderr,"%s\n",sqlite3_errmsg(db));
    if (flag) { // then fatal
      exit(1);
    }
  }
}

char escape(char *s) {
  if (*s != '\\') return *s;
  ++s;
  switch(*s) {
  case 't':
    return '\t';
  case 'n':
    return '\n';
  case '"':
    return '"';
  }
  fprintf(stderr,"Unknown escape sequence=%s\n",--s);
  return '\0';
}

// Converts hex string X'0000' to blob value
unsigned char *makeblob(char *s, int *size) {
  static unsigned char *blob,*b;
  int i;
  if (blob) free(blob);
  *size=(strlen(s)-3)/2;
  blob=malloc(*size);
  b=blob;
  for(i=2; s[i]!='\''; i+=2) {
    int value;
    sscanf(&s[i],"%2x",&value);
    *b=value;
    b++;
  }
  return blob;
}

// Add comma delimited file to exisiting database/table
// Quoted strings are accepted
int main(int argc, char *argv[]) {
  sqlite3 *db;
  sqlite3_stmt *stmt;
  int rc;
  int ninsert=0;
  int mycol;
  char sql[32768];
  FILE *fp;
  char *filename;
  char *databasename;
  char *tablename;

  while(argv[1] && argv[1][0]=='-') {
    switch(argv[1][1]) {
    case 'd':
      delim=escape(&argv[1][2]);
      break;
    case 'e':
      eol=escape(&argv[1][2]);
      break;
    case 'x':
      hexFlag=1;
      break;
    default:
      fprintf(stderr,"Bad option: %s\n",argv[1]);
    }
    ++argv;
    --argc;
  }
  if (argc != 4) {
    fprintf(stderr,"%s Version 1.1\n",argv[0]);
    fprintf(stderr,"Usage: csvimport [-d] [-e] [-r] filename databasename 
tablename\n");
    fprintf(stderr,"    -d   Change field delimiter, default -d,\n");
    fprintf(stderr,"    -e   Change field enclosure char, default -e\\\"\n");
    fprintf(stderr,"    -r   Change record delimiter, default -r\\n\n");
    fprintf(stderr,"    -x   Parse hex fields as blobs (e.g. X'01020304'\n");
    exit(1);
  }
  filename = argv[1];
  databasename = argv[2];
  tablename = argv[3];
  rc = sqlite3_open_v2(databasename,&db,SQLITE_OPEN_READWRITE,NULL);
  checkrc(db,SQLITE_OK,rc,1,"Error opening database '%s': ",databasename);
  fp=fopen(filename,"r");
  if (fp == NULL) {
    perror(filename);
    exit(1);
  }
  // count the columns
  ncol = countColumns(fp);

  fprintf(stderr,"%d columns detected...checking file contents\n",ncol);
  rewind(fp);
  nline=0;
  while((mycol=countColumns(fp)) == ncol);
  if (!feof(fp)) {
    fprintf(stderr,"Error at line#%d, expected %d cols, got %d
cols...aborting\n",nline,ncol,mycol);
    exit(1);
  }
  rewind(fp);
  printf("importing %d lines...\n",nline);
  sprintf(sql,"insert into %s values (",tablename);
  for(mycol=0; mycol<ncol; ++mycol) {
    strcat(sql,mycol==0?"":",");
    strcat(sql,"?");
  }
  strcat(sql,")");
  // prepare our statement
  rc = sqlite3_prepare(db,sql,strlen(sql),&stmt,NULL);
  checkrc(db,SQLITE_OK,rc,1,"Prepare failed on '%s':",sql);
  // Let's wrap things in a transaction
  rc = sqlite3_exec(db,"BEGIN",NULL,NULL,NULL);
  checkrc(db,SQLITE_OK,rc,1,"BEGIN failed on '%s': ",sql);
  // Loop over file
  rewind(fp);
  nline=0;
  while(!feof(fp)) {
    int i;
    for(i=1; i<=ncol; ++i) { // bind the columns as text, table will take care 
of conversion to column types
      char *p=nextToken(fp);
      if (feof(fp) && p==NULL) break;
      if (hexFlag && strncasecmp(p,"X'",2)==0) {
        int n;
        unsigned char* blob=makeblob(p,&n);
        rc=sqlite3_bind_blob(stmt,i,blob,n,SQLITE_TRANSIENT);
      } else {
        rc=sqlite3_bind_text(stmt,i,p,-1,SQLITE_TRANSIENT);
      }
      checkrc(db,SQLITE_OK,rc,0,"Bind_text failed on '%s': ",sql);
    }
    if (feof(fp)) break;
    rc = sqlite3_step(stmt);
    checkrc(db,SQLITE_DONE,rc,1,"Insert failed on '%s': ",databasename);
    ++ninsert;
    if ((ninsert % MOD)==0) {
      printf("%d\r",ninsert);
      fflush(stdout);
    }
    rc = sqlite3_reset(stmt);
    checkrc(db,SQLITE_OK,rc,1,"Reset failed on '%s': ",sql);
    sqlite3_clear_bindings(stmt);
    checkrc(db,SQLITE_OK,rc,0,"clear_bindinds failed on '%s': ",sql);
  }
  rc=sqlite3_finalize(stmt);
  checkrc(db,SQLITE_OK,rc,1,"Finalize failed: ",NULL);
  printf("%d lines, %d inserts, committing...\n",nline,ninsert);
  rc = sqlite3_exec(db,"COMMIT",NULL,NULL,NULL);
  checkrc(db,SQLITE_OK,rc,1,"COMMIT failed on '%s': ",sql);
  rc=sqlite3_close(db);
  checkrc(db,SQLITE_OK,rc,1,"Close failed on '%s': ",argv[2]);
  fclose(fp);
  return 0;
}



_______________________________________________
sqlite-users mailing list
sqlite-users@sqlite.org
http://sqlite.org:8080/cgi-bin/mailman/listinfo/sqlite-users

Reply via email to