/*+++++++++++
  tokenize.c: functions to split a command line into tokens and stash them
  into an array similar to argc/argv
  hoenicka_markus@compuserve.com 2-17-00
  $Id: tokenize.c,v 1.6 2000/05/08 06:23:48 markus Exp markus $
  +++++++++++*/

#include <stdio.h>
#include <string.h>

#include "strfncs.h"
#include "tokenize.h"

/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  cmdln_tokenize(): splits a command line into tokens and stashes them
  in an array similar to argc/argv. Single and double quotes are
  respected to group tokens.

  int cmdln_tokenize returns 0 if successful, 1 if out of memory, 2 if
  command line empty

  int *inargc ptr to a counter for the tokens. Should be initialized
  to 0 before you call this function, unless you add to an existing
  array.

  char ***inargv ptr to the array of ptrs to the token
  strings. This array must be allocated with malloc() before you call
  this function. It must be able to hold at least inargcmax string
  entries. If more than inargcmax tokens are found, the array grows
  dynamically.

  int inargcmax size of ***inargv array (number of entries). The array
  grows in increments of inargcmax, i.e. selecting a higher inargcmax
  reduces the number of calls to realloc().

  char *inbuffer buffer holding the command line. This buffer will be
  modified while parsing, so keep a copy before you call this function
  if you need the original afterwards

  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
int cmdln_tokenize(int *inargc, char ***inargv, int inargcmax, char* inbuffer) {
  int nomem = 0;
  int quot0_detect = 0;
  int quot1_detect = 0;
  char *this_token;
  char *next_token;
  char *delimiter;
  char *eostring;
  char **resizedinargv; /* temporary inargv for realloc */

  /* we have to take care of quotation marks. strings
     included in pairs of "" or '' should be treated
     as one argument. We keep pointers to the first
     occurrences of either quotation mark. If after
     the first call to strtok one of the quotation
     marks disappears, we flip a switch to modify the
     strtok search string in the next round of the while loop */

  /* first we save the end of the original string */
  stripwhite(inbuffer, 2, 0);
  eostring = &inbuffer[strlen(inbuffer)];

  /* in the first round we look for the command, this must not
     have quotes, so we ignore them now */
  this_token = inbuffer;
  delimiter = strpbrk(inbuffer, " \n\r");
  if (delimiter == NULL) { /* no more whitespace detected */
    next_token = NULL;
  }
  else if (*delimiter == ' ') { /* space detected */
    *delimiter = '\0';
    if (*(delimiter+1) == '\0') { /* if string ends here */
      next_token = NULL;
    }
    else { /* if string continues */
      next_token = stripwhite(delimiter+1, 1, 0);
      if (*next_token == '\'') { /* single quote detected */
	quot0_detect++;
	*next_token = '\0';
	next_token += 1;
      }
      else if (*next_token == '\"') { /* double quote detected */
	quot1_detect++;
	*next_token = '\0';
	next_token += 1;
      }
    }
  }
  else if (*delimiter == '\0') { /* end of string detected */
    if (delimiter == inbuffer) {
      return 2; /* empty string detected */
    }
  }
/*    printf("this %s\n", this_token); */
/*    printf("next %s\n", next_token); */

  while (this_token != NULL) { /* loop until string ends */

    /* save token in token array */
    (*inargv)[(*inargc)++] = stripwhite(this_token, 1, 0);
/*      printf("%s\n", this_token); */

    /* check size of array and adjust if necessary */
    if (*inargc == inargcmax) {
      inargcmax += 10;
      resizedinargv = (char**)realloc(*inargv,
				      (size_t)inargcmax*sizeof(char*));
      if (resizedinargv == NULL) {
	return 1;
      }
      else {
	*inargv = resizedinargv;
      }
    }
	      
    /* prepare next round */
    if (next_token == NULL) { /* if string ends */
      this_token = NULL;
    }
    else if (quot0_detect) { /* if single quote was detected prev. */
      quot0_detect--;
      delimiter = strpbrk(next_token, "\'");
      if (delimiter == NULL) {
	this_token = next_token;
	next_token = NULL;
      }
      else {
	this_token = next_token;
	*delimiter = '\0';
	if (delimiter+1 == eostring) {
	  next_token = NULL;
	}
	else {
	  next_token = stripwhite(delimiter+1, 1, 0);
	  if (*next_token == '\'') {
	    quot0_detect++;
	    next_token++;
	  }
	  else if (*next_token == '\"') {
	    quot1_detect++;
	    next_token++;
	  }
	}
      }
    }
    else if (quot1_detect) {
      quot1_detect--;
      delimiter = strpbrk(next_token, "\"");
      if (delimiter == NULL) {
	this_token = next_token;
	next_token = NULL;
      }
      else {
	this_token = next_token;
	*delimiter = '\0';
	if (delimiter+1 == eostring) {
	  next_token = NULL;
	}
	else {
	  next_token = stripwhite(delimiter+1, 1, 0);
	  if (*next_token == '\'') {
	    quot0_detect++;
	    next_token++;
	  }
	  else if (*next_token == '\"') {
	    quot1_detect++;
	    next_token++;
	  }
	}
      }
    }
    else {
/*        printf("still alive\n"); */
      delimiter = strpbrk(next_token, " \'\"\n\r");
      if (delimiter == NULL) { /* end of string */
	this_token = next_token;
	next_token = NULL;
      }
      else if (*delimiter == ' ') {
/*  	printf("space\n"); */
	*delimiter = '\0';
	this_token = next_token;
	if (*(delimiter+1) == '\0') {
	  next_token = NULL;
	}
	else {
	  delimiter = stripwhite(delimiter+1, 1, 0);
	  if (*delimiter == '\'') {
/*  	    printf("single quote after space\n"); */
	    quot0_detect++;
	    *delimiter = '\0';
	    if (delimiter+1 == eostring) {
	      next_token = NULL;
	    }
	    else {
	      next_token = stripwhite(delimiter+1, 1, 0);
	  if (*next_token == '\'') {
	    quot0_detect++;
	    next_token++;
	  }
	  else if (*next_token == '\"') {
	    quot1_detect++;
	    next_token++;
	  }
	    }
	  }
	  else if (*delimiter == '\"') {
/*  	    printf("double quote after space\n"); */
	    quot1_detect++;
	    *delimiter = '\0';
	    if (delimiter+1 == eostring) {
	      next_token = NULL;
	    }
	    else {
	      next_token = stripwhite(delimiter+1, 1, 0);
	  if (*next_token == '\'') {
	    quot0_detect++;
	    next_token++;
	  }
	  else if (*next_token == '\"') {
	    quot1_detect++;
	    next_token++;
	  }
	    }
	  }
	  else {
	    next_token = (delimiter == eostring) ? NULL : delimiter;
	  }
	}
      }
      else if (*delimiter == '\'') {
/*  	printf("single quote\n"); */
	quot0_detect++;
	this_token = next_token;
	*delimiter = '\0';
	if (delimiter+1 == eostring) {
	  next_token = NULL;
	}
	else {
	  next_token = stripwhite(delimiter+1, 1, 0);
	  if (*next_token == '\'') {
	    quot0_detect++;
	    next_token++;
	  }
	  else if (*next_token == '\"') {
	    quot1_detect++;
	    next_token++;
	  }
	}
      }
      else if (*delimiter == '\"') {
/*  	printf("double quote\n"); */
	quot1_detect++;
	this_token = next_token;
	*delimiter = '\0';
	if (delimiter+1 == eostring) {
	  next_token = NULL;
	}
	else {
	  next_token = stripwhite(delimiter+1, 1, 0);
	  if (*next_token == '\'') {
	    quot0_detect++;
	    next_token++;
	  }
	  else if (*next_token == '\"') {
	    quot1_detect++;
	    next_token++;
	  }
	}
      }
    }
/*      printf("this %s\n", this_token); */
/*      printf("next %s\n", next_token); */

  }
  return 0;
}

/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  sql_tokenize(): splits a pseudo-SQL request into tokens. Call
                  repeatedly and give the sqltoken.next_token of the
                  previous iteration as an argument for inbuffer,
                  until sqltoken.next_token is NULL. The field name
		  separator is currently set to : (colon)

  char* sql_tokenize returns a pointer to the next token, or NULL
  if no token is found

  char *inbuffer buffer holding the command line.

  struct SQLTOKEN *sqltoken pointer to a structure which will be
                  filled with the length and the type of the token
                  and with the start of the following token, if any
  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
char *sql_tokenize(char* inbuffer, struct SQLTOKEN *sqltoken) {
  int nomem = 0;
  int i = 0;
  int have_token = 0;
  char *this_token;
  char *next_token[4];
  char *delimiter;
  char *eostring;
  char *endtoken;

  while (!have_token) {
    if (inbuffer[i] == '(') {
      have_token = 1;
      this_token = &inbuffer[i];
      sqltoken->next_token = strchr(&inbuffer[i], (int)':');
      sqltoken->type = 2;
      if (sqltoken->next_token != NULL) {
	sqltoken->length = (int)(sqltoken->next_token-&inbuffer[i]);
      }
      else {
	sqltoken->length = strlen(&inbuffer[i]);
      }
    }
    else if (inbuffer[i] == ')') {
      have_token = 1;
      this_token = &inbuffer[i];
      next_token[0] = strstr(&inbuffer[i], " AND ");
      next_token[1] = strstr(&inbuffer[i], " OR ");
      next_token[2] = strstr(&inbuffer[i], " NOT ");

      qsort(next_token, 3, sizeof(char*), compare_ptr);
      sqltoken->next_token = next_token[0];
      sqltoken->type = 3;
      if (sqltoken->next_token != NULL) {
	sqltoken->length = (int)(sqltoken->next_token-&inbuffer[i]);
      }
      else {
	sqltoken->length = strlen(&inbuffer[i]);
      }
    }
    else if (strncmp(&inbuffer[i], " AND ", 5) == 0) {
      have_token = 1;
      this_token = &inbuffer[i];
      next_token[0] = strchr(&inbuffer[i], (int)':');
      next_token[1] = strchr(&inbuffer[i], (int)'(');
      qsort(next_token, 2, sizeof(char*), compare_ptr);
      sqltoken->next_token = next_token[0];
      sqltoken->type = 1;
      if (sqltoken->next_token != NULL) {
	sqltoken->length = (int)(sqltoken->next_token-&inbuffer[i]);
      }
      else {
	sqltoken->length = strlen(&inbuffer[i]);
      }
    }
    else if (strncmp(&inbuffer[i], " OR ", 4) == 0) {
      have_token = 1;
      this_token = &inbuffer[i];
      next_token[0] = strchr(&inbuffer[i], (int)':');
      next_token[1] = strchr(&inbuffer[i], (int)'(');
      qsort(next_token, 2, sizeof(char*), compare_ptr);
      sqltoken->next_token = next_token[0];
      sqltoken->type = 1;
      if (sqltoken->next_token != NULL) {
	sqltoken->length = (int)(sqltoken->next_token-&inbuffer[i]);
      }
      else {
	sqltoken->length = strlen(&inbuffer[i]);
      }
    }
    else if (strncmp(&inbuffer[i], " NOT ", 5) == 0) {
      have_token = 1;
      this_token = &inbuffer[i];
      next_token[0] = strchr(&inbuffer[i], (int)':');
      next_token[1] = strchr(&inbuffer[i], (int)'(');
      qsort(next_token, 2, sizeof(char*), compare_ptr);
      sqltoken->next_token = next_token[0];
      sqltoken->type = 1;
      if (sqltoken->next_token != NULL) {
	sqltoken->length = (int)(sqltoken->next_token-&inbuffer[i]);
      }
      else {
	sqltoken->length = strlen(&inbuffer[i]);
      }
    }
    else if (inbuffer[i] == ':') {
      if (strlen(&inbuffer[i]) > 5) {
	if (inbuffer[i+4] == '!') {
	  if (inbuffer[i+5] == '=') {
	    have_token = 1;
	    this_token = &inbuffer[i];
	    sqltoken->length = 6;
	  }
	}
	else if (inbuffer[i+4] == '=') {
	  have_token = 1;
	  this_token = &inbuffer[i];
	  sqltoken->length = 5;
	}
	else if (inbuffer[i+4] == '<') {
	  have_token = 1;
	  this_token = &inbuffer[i];
	  if (inbuffer[i+5] == '>' || inbuffer[i+5] == '=') {
	    sqltoken->length = 6;
	  }
	  else {
	    sqltoken->length = 5;
	  }
	}
	else if (inbuffer[i+4] == '>') {
	  have_token = 1;
	  this_token = &inbuffer[i];
	  if (inbuffer[i+5] == '=') {
	    sqltoken->length = 6;
	  }
	  else {
	    sqltoken->length = 5;
	  }
	}
      }
      if (have_token) {
	sqltoken->next_token = &inbuffer[i+sqltoken->length];
	sqltoken->type = 4;
      }
    }
    else if (inbuffer[i] == '\'') {
      have_token = 1;
      sqltoken->type = 0;
      this_token = &inbuffer[i+1];
      endtoken = strchr(&inbuffer[i+1], (int)'\''); /* jump to next ' */
      if (endtoken != NULL) {
	next_token[0] = strstr(endtoken, " AND ");
	next_token[1] = strstr(endtoken, " OR ");
	next_token[2] = strstr(endtoken, " NOT ");
	next_token[3] = strchr(endtoken, (int)')');
	qsort(next_token, 4, sizeof(char*), compare_ptr);
      
	sqltoken->next_token = next_token[0];
	if (endtoken != NULL) {
	  sqltoken->length = (int)(endtoken-&inbuffer[i+1]);
	}
	else {
	  sqltoken->length = strlen(&inbuffer[i]);
	}
      }
    }
    else { /* this is obviously some value */
      have_token = 1;
      sqltoken->type = 0;
      this_token = &inbuffer[i];
      next_token[0] = strstr(&inbuffer[i], " AND ");
      next_token[1] = strstr(&inbuffer[i], " OR ");
      next_token[2] = strstr(&inbuffer[i], " NOT ");
      next_token[3] = strchr(&inbuffer[i], (int)')');
      qsort(next_token, 4, sizeof(char*), compare_ptr);
      
      sqltoken->next_token = next_token[0];
      if (sqltoken->next_token != NULL) {
	sqltoken->length = (int)(sqltoken->next_token-&inbuffer[i]);
      }
      else {
	sqltoken->length = strlen(&inbuffer[i]);
      }
    }
    i++;
    if (inbuffer[i] == '\0' && !have_token) { /* don't read past the end */
      this_token = NULL;
      have_token = 1;
      sqltoken->type = 0;
      sqltoken->next_token = NULL;
      sqltoken->length = 0;
    }
  }
  return this_token;
}

/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  nstrtok(): (hopefully) thread-safe replacement for strtok()
             first call it with the string as first argument. In
             subsequent calls, use previous_token+len instead.
             In contrast to strtok(), nstrtok() does *not* modify
	     the string, so you have to terminate the token
	     yourself, e.g.:
	     strncpy(buffer, token, len);
	     buffer[len] = '\0';
	     As in strtok(), the delim strings can be different in
             subsequent calls to nstrtok()

  char* nstrtok returns a pointer to the next token, or NULL
  if no token is found

  char* value the string to search in

  int* len ptr to a variable that will receive the length of a token
           or 0 if no token is found

  char* delim ptr to a string consisting of the token-delimiting
              characters

  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
char* nstrtok(char* value, int* len, char* delim) {
  int delim_found;
  int i;
  char *eostring;
  char *tokenend;

  /* take a shortcut if the string is empty */
  if (!value[0]) {
    *len = 0;
    return NULL;
  }

  eostring = &value[strlen(value)]; /* position of terminating \0 */

  /* remove all delimiters at the beginning of the token */
  do {
    delim_found = 0;
    for (i = 0; i < strlen(delim); i++) {
      if (*value == delim[i]) {
	delim_found++;
	break;
      }
    }
    if (delim_found) {
      value++;
    }
  } while (delim_found && value < eostring);
  if (delim_found || value == eostring) {
    *len = 0;
    return NULL;
  }

  /* now search for delimiters at the end of the token */
  /* value now points to start of token */
  tokenend = value;

  /* delim_found must be 0 now */
  while (!delim_found && tokenend < eostring) {
    for (i = 0; i < strlen(delim); i++) {
      if (*tokenend == delim[i]) {
	delim_found++;
	break;
      }
    }
    if (!delim_found) {
      tokenend++;
    }
  }
  *len = tokenend-value;
  return value;
}

/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  string_tokenize(): splits a string into tokens and stashes them
  in an array similar to argc/argv. This implementation relies on the
  fact that all printable characters except the space have ASCII
  values 33 and up. The tokens are assumed to be separated by
  whitespace, which is \t (9), \n (10), \r (13), SPC (32). Do not
  use this function where this coarse separation is not sufficient

  int string_tokenize returns 0 if successful, 1 if out of memory, 2 if
  command line empty

  int *inargc ptr to a counter for the tokens. Should be initialized
  to 0 before you call this function, unless you add to an existing
  array.

  char ***inargv ptr to the array of ptrs to the token
  strings. This array must be allocated with malloc() before you call
  this function. It must be able to hold at least inargcmax string
  entries. If more than inargcmax tokens are found, the array grows
  dynamically.

  int inargcmax size of ***inargv array (number of entries). The array
  grows in increments of inargcmax, i.e. selecting a higher inargcmax
  reduces the number of calls to realloc().

  char *inbuffer buffer holding the string. This buffer will be
  modified while parsing, so keep a copy before you call this function
  if you need the original afterwards

  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
int string_tokenize(int *inargc, char ***inargv, int inargcmax, char* inbuffer) {
  int num_tokens = 0;
  char *token;
  char *eof_string;
  char **new_inargv;

  token = inbuffer;
  eof_string = &inbuffer[strlen(inbuffer)]; /* points to the terminating \0 */

  while(1) {
    /* search for the start of a token */
    while (*token < 33 && token < eof_string) { /* all whitespace has ASCII values 32 or less */
      token++;
    }
    
    if (token == eof_string){
      if (num_tokens) {
	return 0;
      }
      else {
	return 2;
      }
    }

    /* we obviously are at the start of a token. Save a pointer */
    (*inargv)[(*inargc)++] = token;
    num_tokens++;
/*      printf("%s\n", start_token); */

    /* check size of array and adjust if necessary */
    if (*inargc == inargcmax) {
      inargcmax += 10;
      new_inargv = (char**)realloc(*inargv,
				      (size_t)inargcmax*sizeof(char*));
      if (new_inargv == NULL) {
	return 1;
      }
      else {
	*inargv = new_inargv;
      }
    }
	      

    /* Look for the end */
    while (*token > 32 && token < eof_string) {
      token++;
    }
    
    if (token == eof_string){
      if (num_tokens) {
	return 0;
      }
      else { /* well, this should never happen */
	return 2;
      }
    }
    else {
      *token = '\0'; /* terminate token */
      token++;
    }
  }
}
