Changeset: 119723724e8a for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/119723724e8a
Modified Files:
        sql/server/sql_scan.c
        sql/server/sql_scan.h
Branch: Jul2021
Log Message:

Avoid looking up raw_strings variable on every read

We keep a bit in the sql scanner that controls whether it reads raw
strings or not. It is initialized using the raw_strings
setting/property.


diffs (149 lines):

diff --git a/sql/server/sql_scan.c b/sql/server/sql_scan.c
--- a/sql/server/sql_scan.c
+++ b/sql/server/sql_scan.c
@@ -528,6 +528,7 @@ scanner_init(struct scanner *s, bstream 
                .rs = rs,
                .ws = ws,
                .mode = LINE_N,
+               .raw_string_mode = GDKgetenv_istrue("raw_strings"),
        };
 }
 
@@ -985,11 +986,9 @@ int scanner_symbol(mvc * c, int cur)
                        return cur;
                return tokenize(c, cur);
        case '\'':
-#ifdef SQL_STRINGS_USE_ESCAPES
-               if (lc->next_string_is_raw || GDKgetenv_istrue("raw_strings"))
+               if (lc->raw_string_mode || lc->next_string_is_raw)
                        return scanner_string(c, cur, false);
                return scanner_string(c, cur, true);
-#endif
        case '"':
                return scanner_string(c, cur, false);
        case '{':
@@ -1272,9 +1271,7 @@ sql_get_next_token(YYSTYPE *yylval, void
        if (token == IDENT || token == COMPARISON ||
            token == RANK || token == aTYPE || token == ALIAS) {
                yylval->sval = sa_strndup(c->sa, yylval->sval, 
lc->yycur-lc->yysval);
-#ifdef SQL_STRINGS_USE_ESCAPES
                lc->next_string_is_raw = false;
-#endif
        } else if (token == STRING) {
                char quote = *yylval->sval;
                char *str = sa_alloc( c->sa, (lc->yycur-lc->yysval-2)*2 + 1 );
@@ -1307,9 +1304,7 @@ sql_get_next_token(YYSTYPE *yylval, void
                        strcpy(str, yylval->sval + 3);
                        token = yylval->sval[2] == '\'' ? USTRING : UIDENT;
                        quote = yylval->sval[2];
-#ifdef SQL_STRINGS_USE_ESCAPES
                        lc->next_string_is_raw = true;
-#endif
                        break;
                case 'x':
                case 'X':
@@ -1321,9 +1316,7 @@ sql_get_next_token(YYSTYPE *yylval, void
                        *dst = 0;
                        quote = '\'';
                        token = XSTRING;
-#ifdef SQL_STRINGS_USE_ESCAPES
                        lc->next_string_is_raw = true;
-#endif
                        break;
                case 'r':
                case 'R':
@@ -1336,9 +1329,7 @@ sql_get_next_token(YYSTYPE *yylval, void
                        *dst = 0;
                        break;
                default:
-#ifdef SQL_STRINGS_USE_ESCAPES
-                       if (GDKgetenv_istrue("raw_strings") ||
-                           lc->next_string_is_raw) {
+                       if (lc->raw_string_mode || lc->next_string_is_raw) {
                                dst = str;
                                for (char *src = yylval->sval + 1; *src; dst++)
                                        if ((*dst = *src++) == '\'' && *src == 
'\'')
@@ -1349,23 +1340,14 @@ sql_get_next_token(YYSTYPE *yylval, void
                                              (unsigned char *)yylval->sval + 1,
                                              lc->yycur - lc->yysval - 1);
                        }
-#else
-                       dst = str;
-                       for (char *src = yylval->sval + 1; *src; dst++)
-                               if ((*dst = *src++) == '\'' && *src == '\'')
-                                       src++;
-                       *dst = 0;
-#endif
                        break;
                }
                yylval->sval = str;
 
                /* reset original */
                lc->rs->buf[lc->rs->pos+lc->yycur- 1] = quote;
-#ifdef SQL_STRINGS_USE_ESCAPES
        } else {
                lc->next_string_is_raw = false;
-#endif
        }
 
        return(token);
diff --git a/sql/server/sql_scan.h b/sql/server/sql_scan.h
--- a/sql/server/sql_scan.h
+++ b/sql/server/sql_scan.h
@@ -15,11 +15,6 @@
 
 typedef enum { LINE_1, LINE_N } prot;
 
-/* Currently, MonetDB interprets \ specially in strings.  This is
- * contrary to the SQL standard.  Remove this define to revert to the
- * standard interpretation. */
-#define SQL_STRINGS_USE_ESCAPES 1
-
 struct scanner {
        bstream *rs;
        stream *ws;
@@ -36,15 +31,31 @@ struct scanner {
        prot mode;              /* which mode (line (1,N), blocked) */
        char *schema;   /* Keep schema name of create statement, needed 
AUTO_INCREMENT, SERIAL */
        char *errstr;   /* error message from the bowels of the scanner */
-#ifdef SQL_STRINGS_USE_ESCAPES
-       /* because we interpret \ in strings, we need state in the
-        * scanner so that we Do The Right Thing (TM) when we get a
-        * unicode string split up in multiple parts (i.e. U&'string1'
-        * 'string2') where the second and subsequent string MUST NOT
-        * undergo \ interpretation (luckily, when we get rid of this
-        * interpretation-by-default, we can remove the state) */
+
+
+       /* Currently, MonetDB interprets \ specially in strings.  This is 
contrary
+        * to the SQL standard. The default behavior of the reader is 
controlled by
+        * ``raw_strings`` mserver5 option (``--set raw_strings=<true|false>``) 
and
+        * the database property of the same name of merovingian (``monetdb set
+        * raw_strings=yes <database>``). If it is ``true`` by default the 
reader
+        * interprets strings as specified in the SQL standard, i.e. no
+        * interpretation of the \ characters. If it is ``false`` \ characters 
are
+        * used for escaping.
+        *
+        * The default value of this setting is false as of the Jul2021 release,
+        * i.e. MonetDB by default interprets \ in strings.
+        *
+        * In case that we are interpreting \ in strings, we need state in the
+        * scanner so that we Do The Right Thing (TM) when we get a unicode 
string
+        * split up in multiple parts (i.e. U&'string1' 'string2') where the 
second
+        * and subsequent string MUST NOT undergo \ interpretation.
+        *
+        * Because we need to be backward compatible (i.e. we need have ``select
+        * char_length('a\nb')`` return 3) the next 2 members will probably 
stay for
+        * the forseeable future.
+        */
        bool next_string_is_raw;
-#endif
+       bool raw_string_mode;
 };
 
 #define QUERY(scanner) (scanner.rs->buf+scanner.rs->pos)
@@ -55,4 +66,3 @@ sql_export void scanner_query_processed(
 
 extern int scanner_init_keywords(void);
 #endif /* _SQL_SCAN_H_ */
-
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to