Changeset: 1e4c318f6db5 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=1e4c318f6db5
Added Files:
        documentation/source/input.rst
Modified Files:
        documentation/source/index.rst
        monetdb5/modules/atoms/str.c
        sql/server/rel_optimizer.c
        sql/server/rel_rel.c
        sql/server/rel_rel.h
        tools/merovingian/client/monetdb.c
Branch: statistics-analytics
Log Message:

Merged with default


diffs (truncated from 477 to 300 lines):

diff --git a/documentation/source/index.rst b/documentation/source/index.rst
--- a/documentation/source/index.rst
+++ b/documentation/source/index.rst
@@ -10,6 +10,7 @@ Welcome to MonetDB's documentation!
    :maxdepth: 2
    :caption: Contents:
 
+   input
 
 
 Indices and tables
diff --git a/documentation/source/input.rst b/documentation/source/input.rst
new file mode 100644
--- /dev/null
+++ b/documentation/source/input.rst
@@ -0,0 +1,123 @@
+***********************
+Giving input to MonetDB
+***********************
+
+This chapter discusses what kinds of input MonetDB accepts as syntactically
+correct.
+
+Literals
+========
+
+
+Strings
+-------
+
+String literals are denoted by enclosing a sequence of UTF-8 characters between
+single quotes: ``'``. MonetDB allows the use of different quote sequences that
+control the interpretation of the contents of the string.
+
+.. note:: The quote sequences are *not* case sensitive, i.e. the strings
+   ``E'abc'`` and ``e'abc'``, are exactly the same.
+
+The available modes
+are as follows:
+
+Enhanced strings
+
+  Enhanced strings are enclosed between ``E'`` and ``'``. Within them various
+  C-like escape sequences are valid::
+
+    sql>SELECT E'Newline chars\nare interpreted like in C-strings';
+    +-------------------------------------------------+
+    | single_value                                    |
+    +=================================================+
+    | Newline chars                                   |
+    : are interpreted like in C-strings               :
+    +-------------------------------------------------+
+    1 tuple
+
+  .. warning::
+    By default MonetDB interprets strings in this mode, i.e. if you use just
+    single quotes to enclose a string it will be interpreted like this.
+
+Raw strings
+
+  Raw strings are enclosed between the lexemes ``R'`` and ``'``. In this mode
+  all characters are interpreted literally except for the single quote 
character
+  (``'``) that needs to be escaped by writing it twice. This mode is what the
+  SQL specifies as strings::
+
+    sql>SELECT R'Backslash doesn''t have a special meaning here: \n';
+    +---------------------------------------------------+
+    | single_value                                      |
+    +===================================================+
+    | Backslash doesn't have a special meaning here: \n |
+    +---------------------------------------------------+
+    1 tuple
+
+Blobs
+  Strings enclosed in ``X'`` and ``'``, are intended for input of binary
+  blobs. Only hexadecimal digits (case insensitive) are allowed in this mode,
+  and the string must have an even number of characters. Every hexadecimal 
digit
+  pair is interpreted as one byte::
+
+   sql>SELECT X'12EEff';
+   +---------------------------------------------------+
+   | single_value                                      |
+   +===================================================+
+   | 12EEFF                                            |
+   +---------------------------------------------------+
+   1 tuple
+
+   sql>SELECT X'1';
+   incorrect blob 1 in: "select X'1';"
+
+Unicode sequences
+
+  Strings enclosed between ``U&'`` and ``'``, are interpreted as unicode
+  sequences::
+
+   sql>select U&'\000a';
+   +--------------+
+   | single_value |
+   +==============+
+   |              |
+   +--------------+
+   1 tuple
+   sql>select U&'\0061';
+   +--------------+
+   | single_value |
+   +==============+
+   | a            |
+   +--------------+
+   1 tuple
+   sql>select U&'\00a';
+   Bad Unicode string in: "select U&'\00a';"
+   sql>select U&'\00oa';
+   Bad Unicode string in: "select U&'\00oa';"
+
+
+Numbers
+-------
+
+Comments
+--------
+
+You can write comments in three different ways:
+
+SQL line comments
+
+  These start with two hyphens: ``--`` and extend to the end of the line.
+
+Python line comments
+
+  Anything between the hash character ``#`` and the end of the line is ignored
+  by MonetDB.
+
+C block comments
+
+  MonetDB also ignores anything that is written between the lexemes ``/*`` and
+  ``*/``.
+
+Identifiers
+===========
diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c
--- a/monetdb5/modules/atoms/str.c
+++ b/monetdb5/modules/atoms/str.c
@@ -96,7 +96,7 @@
  * high-performance hash-lookup (all code inlined).
  */
 
-/* These tables were generated from the Unicode 10.0.0 spec. */
+/* These tables were generated from the Unicode 12.1.0 spec. */
 struct UTF8_lower_upper {
        unsigned int from, to;
 } UTF8_toUpper[] = { /* code points with non-null uppercase conversion */
@@ -333,6 +333,7 @@ struct UTF8_lower_upper {
        { 0x0275, 0x019F, },
        { 0x027D, 0x2C64, },
        { 0x0280, 0x01A6, },
+       { 0x0282, 0xA7C5, },
        { 0x0283, 0x01A9, },
        { 0x0287, 0xA7B1, },
        { 0x0288, 0x01AE, },
@@ -594,6 +595,52 @@ struct UTF8_lower_upper {
        { 0x0584, 0x0554, },
        { 0x0585, 0x0555, },
        { 0x0586, 0x0556, },
+       { 0x10D0, 0x1C90, },
+       { 0x10D1, 0x1C91, },
+       { 0x10D2, 0x1C92, },
+       { 0x10D3, 0x1C93, },
+       { 0x10D4, 0x1C94, },
+       { 0x10D5, 0x1C95, },
+       { 0x10D6, 0x1C96, },
+       { 0x10D7, 0x1C97, },
+       { 0x10D8, 0x1C98, },
+       { 0x10D9, 0x1C99, },
+       { 0x10DA, 0x1C9A, },
+       { 0x10DB, 0x1C9B, },
+       { 0x10DC, 0x1C9C, },
+       { 0x10DD, 0x1C9D, },
+       { 0x10DE, 0x1C9E, },
+       { 0x10DF, 0x1C9F, },
+       { 0x10E0, 0x1CA0, },
+       { 0x10E1, 0x1CA1, },
+       { 0x10E2, 0x1CA2, },
+       { 0x10E3, 0x1CA3, },
+       { 0x10E4, 0x1CA4, },
+       { 0x10E5, 0x1CA5, },
+       { 0x10E6, 0x1CA6, },
+       { 0x10E7, 0x1CA7, },
+       { 0x10E8, 0x1CA8, },
+       { 0x10E9, 0x1CA9, },
+       { 0x10EA, 0x1CAA, },
+       { 0x10EB, 0x1CAB, },
+       { 0x10EC, 0x1CAC, },
+       { 0x10ED, 0x1CAD, },
+       { 0x10EE, 0x1CAE, },
+       { 0x10EF, 0x1CAF, },
+       { 0x10F0, 0x1CB0, },
+       { 0x10F1, 0x1CB1, },
+       { 0x10F2, 0x1CB2, },
+       { 0x10F3, 0x1CB3, },
+       { 0x10F4, 0x1CB4, },
+       { 0x10F5, 0x1CB5, },
+       { 0x10F6, 0x1CB6, },
+       { 0x10F7, 0x1CB7, },
+       { 0x10F8, 0x1CB8, },
+       { 0x10F9, 0x1CB9, },
+       { 0x10FA, 0x1CBA, },
+       { 0x10FD, 0x1CBD, },
+       { 0x10FE, 0x1CBE, },
+       { 0x10FF, 0x1CBF, },
        { 0x13F8, 0x13F0, },
        { 0x13F9, 0x13F1, },
        { 0x13FA, 0x13F2, },
@@ -611,6 +658,7 @@ struct UTF8_lower_upper {
        { 0x1C88, 0xA64A, },
        { 0x1D79, 0xA77D, },
        { 0x1D7D, 0x2C63, },
+       { 0x1D8E, 0xA7C6, },
        { 0x1E01, 0x1E00, },
        { 0x1E03, 0x1E02, },
        { 0x1E05, 0x1E04, },
@@ -1109,6 +1157,7 @@ struct UTF8_lower_upper {
        { 0xA78C, 0xA78B, },
        { 0xA791, 0xA790, },
        { 0xA793, 0xA792, },
+       { 0xA794, 0xA7C4, },
        { 0xA797, 0xA796, },
        { 0xA799, 0xA798, },
        { 0xA79B, 0xA79A, },
@@ -1121,6 +1170,11 @@ struct UTF8_lower_upper {
        { 0xA7A9, 0xA7A8, },
        { 0xA7B5, 0xA7B4, },
        { 0xA7B7, 0xA7B6, },
+       { 0xA7B9, 0xA7B8, },
+       { 0xA7BB, 0xA7BA, },
+       { 0xA7BD, 0xA7BC, },
+       { 0xA7BF, 0xA7BE, },
+       { 0xA7C3, 0xA7C2, },
        { 0xAB53, 0xA7B3, },
        { 0xAB70, 0x13A0, },
        { 0xAB71, 0x13A1, },
@@ -1387,6 +1441,38 @@ struct UTF8_lower_upper {
        { 0x118DD, 0x118BD, },
        { 0x118DE, 0x118BE, },
        { 0x118DF, 0x118BF, },
+       { 0x16E60, 0x16E40, },
+       { 0x16E61, 0x16E41, },
+       { 0x16E62, 0x16E42, },
+       { 0x16E63, 0x16E43, },
+       { 0x16E64, 0x16E44, },
+       { 0x16E65, 0x16E45, },
+       { 0x16E66, 0x16E46, },
+       { 0x16E67, 0x16E47, },
+       { 0x16E68, 0x16E48, },
+       { 0x16E69, 0x16E49, },
+       { 0x16E6A, 0x16E4A, },
+       { 0x16E6B, 0x16E4B, },
+       { 0x16E6C, 0x16E4C, },
+       { 0x16E6D, 0x16E4D, },
+       { 0x16E6E, 0x16E4E, },
+       { 0x16E6F, 0x16E4F, },
+       { 0x16E70, 0x16E50, },
+       { 0x16E71, 0x16E51, },
+       { 0x16E72, 0x16E52, },
+       { 0x16E73, 0x16E53, },
+       { 0x16E74, 0x16E54, },
+       { 0x16E75, 0x16E55, },
+       { 0x16E76, 0x16E56, },
+       { 0x16E77, 0x16E57, },
+       { 0x16E78, 0x16E58, },
+       { 0x16E79, 0x16E59, },
+       { 0x16E7A, 0x16E5A, },
+       { 0x16E7B, 0x16E5B, },
+       { 0x16E7C, 0x16E5C, },
+       { 0x16E7D, 0x16E5D, },
+       { 0x16E7E, 0x16E5E, },
+       { 0x16E7F, 0x16E5F, },
        { 0x1E922, 0x1E900, },
        { 0x1E923, 0x1E901, },
        { 0x1E924, 0x1E902, },
@@ -2017,6 +2103,52 @@ struct UTF8_lower_upper {
        { 0x13F3, 0x13FB, },
        { 0x13F4, 0x13FC, },
        { 0x13F5, 0x13FD, },
+       { 0x1C90, 0x10D0, },
+       { 0x1C91, 0x10D1, },
+       { 0x1C92, 0x10D2, },
+       { 0x1C93, 0x10D3, },
+       { 0x1C94, 0x10D4, },
+       { 0x1C95, 0x10D5, },
+       { 0x1C96, 0x10D6, },
+       { 0x1C97, 0x10D7, },
+       { 0x1C98, 0x10D8, },
+       { 0x1C99, 0x10D9, },
+       { 0x1C9A, 0x10DA, },
+       { 0x1C9B, 0x10DB, },
+       { 0x1C9C, 0x10DC, },
+       { 0x1C9D, 0x10DD, },
+       { 0x1C9E, 0x10DE, },
+       { 0x1C9F, 0x10DF, },
+       { 0x1CA0, 0x10E0, },
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to