Changeset: 119723724e8a for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/119723724e8a Modified Files: sql/server/sql_scan.c sql/server/sql_scan.h Branch: Jul2021 Log Message:
Avoid looking up raw_strings variable on every read We keep a bit in the sql scanner that controls whether it reads raw strings or not. It is initialized using the raw_strings setting/property. diffs (149 lines): diff --git a/sql/server/sql_scan.c b/sql/server/sql_scan.c --- a/sql/server/sql_scan.c +++ b/sql/server/sql_scan.c @@ -528,6 +528,7 @@ scanner_init(struct scanner *s, bstream .rs = rs, .ws = ws, .mode = LINE_N, + .raw_string_mode = GDKgetenv_istrue("raw_strings"), }; } @@ -985,11 +986,9 @@ int scanner_symbol(mvc * c, int cur) return cur; return tokenize(c, cur); case '\'': -#ifdef SQL_STRINGS_USE_ESCAPES - if (lc->next_string_is_raw || GDKgetenv_istrue("raw_strings")) + if (lc->raw_string_mode || lc->next_string_is_raw) return scanner_string(c, cur, false); return scanner_string(c, cur, true); -#endif case '"': return scanner_string(c, cur, false); case '{': @@ -1272,9 +1271,7 @@ sql_get_next_token(YYSTYPE *yylval, void if (token == IDENT || token == COMPARISON || token == RANK || token == aTYPE || token == ALIAS) { yylval->sval = sa_strndup(c->sa, yylval->sval, lc->yycur-lc->yysval); -#ifdef SQL_STRINGS_USE_ESCAPES lc->next_string_is_raw = false; -#endif } else if (token == STRING) { char quote = *yylval->sval; char *str = sa_alloc( c->sa, (lc->yycur-lc->yysval-2)*2 + 1 ); @@ -1307,9 +1304,7 @@ sql_get_next_token(YYSTYPE *yylval, void strcpy(str, yylval->sval + 3); token = yylval->sval[2] == '\'' ? USTRING : UIDENT; quote = yylval->sval[2]; -#ifdef SQL_STRINGS_USE_ESCAPES lc->next_string_is_raw = true; -#endif break; case 'x': case 'X': @@ -1321,9 +1316,7 @@ sql_get_next_token(YYSTYPE *yylval, void *dst = 0; quote = '\''; token = XSTRING; -#ifdef SQL_STRINGS_USE_ESCAPES lc->next_string_is_raw = true; -#endif break; case 'r': case 'R': @@ -1336,9 +1329,7 @@ sql_get_next_token(YYSTYPE *yylval, void *dst = 0; break; default: -#ifdef SQL_STRINGS_USE_ESCAPES - if (GDKgetenv_istrue("raw_strings") || - lc->next_string_is_raw) { + if (lc->raw_string_mode || lc->next_string_is_raw) { dst = str; for (char *src = yylval->sval + 1; *src; dst++) if ((*dst = *src++) == '\'' && *src == '\'') @@ -1349,23 +1340,14 @@ sql_get_next_token(YYSTYPE *yylval, void (unsigned char *)yylval->sval + 1, lc->yycur - lc->yysval - 1); } -#else - dst = str; - for (char *src = yylval->sval + 1; *src; dst++) - if ((*dst = *src++) == '\'' && *src == '\'') - src++; - *dst = 0; -#endif break; } yylval->sval = str; /* reset original */ lc->rs->buf[lc->rs->pos+lc->yycur- 1] = quote; -#ifdef SQL_STRINGS_USE_ESCAPES } else { lc->next_string_is_raw = false; -#endif } return(token); diff --git a/sql/server/sql_scan.h b/sql/server/sql_scan.h --- a/sql/server/sql_scan.h +++ b/sql/server/sql_scan.h @@ -15,11 +15,6 @@ typedef enum { LINE_1, LINE_N } prot; -/* Currently, MonetDB interprets \ specially in strings. This is - * contrary to the SQL standard. Remove this define to revert to the - * standard interpretation. */ -#define SQL_STRINGS_USE_ESCAPES 1 - struct scanner { bstream *rs; stream *ws; @@ -36,15 +31,31 @@ struct scanner { prot mode; /* which mode (line (1,N), blocked) */ char *schema; /* Keep schema name of create statement, needed AUTO_INCREMENT, SERIAL */ char *errstr; /* error message from the bowels of the scanner */ -#ifdef SQL_STRINGS_USE_ESCAPES - /* because we interpret \ in strings, we need state in the - * scanner so that we Do The Right Thing (TM) when we get a - * unicode string split up in multiple parts (i.e. U&'string1' - * 'string2') where the second and subsequent string MUST NOT - * undergo \ interpretation (luckily, when we get rid of this - * interpretation-by-default, we can remove the state) */ + + + /* Currently, MonetDB interprets \ specially in strings. This is contrary + * to the SQL standard. The default behavior of the reader is controlled by + * ``raw_strings`` mserver5 option (``--set raw_strings=<true|false>``) and + * the database property of the same name of merovingian (``monetdb set + * raw_strings=yes <database>``). If it is ``true`` by default the reader + * interprets strings as specified in the SQL standard, i.e. no + * interpretation of the \ characters. If it is ``false`` \ characters are + * used for escaping. + * + * The default value of this setting is false as of the Jul2021 release, + * i.e. MonetDB by default interprets \ in strings. + * + * In case that we are interpreting \ in strings, we need state in the + * scanner so that we Do The Right Thing (TM) when we get a unicode string + * split up in multiple parts (i.e. U&'string1' 'string2') where the second + * and subsequent string MUST NOT undergo \ interpretation. + * + * Because we need to be backward compatible (i.e. we need have ``select + * char_length('a\nb')`` return 3) the next 2 members will probably stay for + * the forseeable future. + */ bool next_string_is_raw; -#endif + bool raw_string_mode; }; #define QUERY(scanner) (scanner.rs->buf+scanner.rs->pos) @@ -55,4 +66,3 @@ sql_export void scanner_query_processed( extern int scanner_init_keywords(void); #endif /* _SQL_SCAN_H_ */ - _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list