Changeset: 1e4c318f6db5 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=1e4c318f6db5
Added Files:
documentation/source/input.rst
Modified Files:
documentation/source/index.rst
monetdb5/modules/atoms/str.c
sql/server/rel_optimizer.c
sql/server/rel_rel.c
sql/server/rel_rel.h
tools/merovingian/client/monetdb.c
Branch: statistics-analytics
Log Message:
Merged with default
diffs (truncated from 477 to 300 lines):
diff --git a/documentation/source/index.rst b/documentation/source/index.rst
--- a/documentation/source/index.rst
+++ b/documentation/source/index.rst
@@ -10,6 +10,7 @@ Welcome to MonetDB's documentation!
:maxdepth: 2
:caption: Contents:
+ input
Indices and tables
diff --git a/documentation/source/input.rst b/documentation/source/input.rst
new file mode 100644
--- /dev/null
+++ b/documentation/source/input.rst
@@ -0,0 +1,123 @@
+***********************
+Giving input to MonetDB
+***********************
+
+This chapter discusses what kinds of input MonetDB accepts as syntactically
+correct.
+
+Literals
+========
+
+
+Strings
+-------
+
+String literals are denoted by enclosing a sequence of UTF-8 characters between
+single quotes: ``'``. MonetDB allows the use of different quote sequences that
+control the interpretation of the contents of the string.
+
+.. note:: The quote sequences are *not* case sensitive, i.e. the strings
+ ``E'abc'`` and ``e'abc'``, are exactly the same.
+
+The available modes
+are as follows:
+
+Enhanced strings
+
+ Enhanced strings are enclosed between ``E'`` and ``'``. Within them various
+ C-like escape sequences are valid::
+
+ sql>SELECT E'Newline chars\nare interpreted like in C-strings';
+ +-------------------------------------------------+
+ | single_value |
+ +=================================================+
+ | Newline chars |
+ : are interpreted like in C-strings :
+ +-------------------------------------------------+
+ 1 tuple
+
+ .. warning::
+ By default MonetDB interprets strings in this mode, i.e. if you use just
+ single quotes to enclose a string it will be interpreted like this.
+
+Raw strings
+
+ Raw strings are enclosed between the lexemes ``R'`` and ``'``. In this mode
+ all characters are interpreted literally except for the single quote
character
+ (``'``) that needs to be escaped by writing it twice. This mode is what the
+ SQL specifies as strings::
+
+ sql>SELECT R'Backslash doesn''t have a special meaning here: \n';
+ +---------------------------------------------------+
+ | single_value |
+ +===================================================+
+ | Backslash doesn't have a special meaning here: \n |
+ +---------------------------------------------------+
+ 1 tuple
+
+Blobs
+ Strings enclosed in ``X'`` and ``'``, are intended for input of binary
+ blobs. Only hexadecimal digits (case insensitive) are allowed in this mode,
+ and the string must have an even number of characters. Every hexadecimal
digit
+ pair is interpreted as one byte::
+
+ sql>SELECT X'12EEff';
+ +---------------------------------------------------+
+ | single_value |
+ +===================================================+
+ | 12EEFF |
+ +---------------------------------------------------+
+ 1 tuple
+
+ sql>SELECT X'1';
+ incorrect blob 1 in: "select X'1';"
+
+Unicode sequences
+
+ Strings enclosed between ``U&'`` and ``'``, are interpreted as unicode
+ sequences::
+
+ sql>select U&'\000a';
+ +--------------+
+ | single_value |
+ +==============+
+ | |
+ +--------------+
+ 1 tuple
+ sql>select U&'\0061';
+ +--------------+
+ | single_value |
+ +==============+
+ | a |
+ +--------------+
+ 1 tuple
+ sql>select U&'\00a';
+ Bad Unicode string in: "select U&'\00a';"
+ sql>select U&'\00oa';
+ Bad Unicode string in: "select U&'\00oa';"
+
+
+Numbers
+-------
+
+Comments
+--------
+
+You can write comments in three different ways:
+
+SQL line comments
+
+ These start with two hyphens: ``--`` and extend to the end of the line.
+
+Python line comments
+
+ Anything between the hash character ``#`` and the end of the line is ignored
+ by MonetDB.
+
+C block comments
+
+ MonetDB also ignores anything that is written between the lexemes ``/*`` and
+ ``*/``.
+
+Identifiers
+===========
diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c
--- a/monetdb5/modules/atoms/str.c
+++ b/monetdb5/modules/atoms/str.c
@@ -96,7 +96,7 @@
* high-performance hash-lookup (all code inlined).
*/
-/* These tables were generated from the Unicode 10.0.0 spec. */
+/* These tables were generated from the Unicode 12.1.0 spec. */
struct UTF8_lower_upper {
unsigned int from, to;
} UTF8_toUpper[] = { /* code points with non-null uppercase conversion */
@@ -333,6 +333,7 @@ struct UTF8_lower_upper {
{ 0x0275, 0x019F, },
{ 0x027D, 0x2C64, },
{ 0x0280, 0x01A6, },
+ { 0x0282, 0xA7C5, },
{ 0x0283, 0x01A9, },
{ 0x0287, 0xA7B1, },
{ 0x0288, 0x01AE, },
@@ -594,6 +595,52 @@ struct UTF8_lower_upper {
{ 0x0584, 0x0554, },
{ 0x0585, 0x0555, },
{ 0x0586, 0x0556, },
+ { 0x10D0, 0x1C90, },
+ { 0x10D1, 0x1C91, },
+ { 0x10D2, 0x1C92, },
+ { 0x10D3, 0x1C93, },
+ { 0x10D4, 0x1C94, },
+ { 0x10D5, 0x1C95, },
+ { 0x10D6, 0x1C96, },
+ { 0x10D7, 0x1C97, },
+ { 0x10D8, 0x1C98, },
+ { 0x10D9, 0x1C99, },
+ { 0x10DA, 0x1C9A, },
+ { 0x10DB, 0x1C9B, },
+ { 0x10DC, 0x1C9C, },
+ { 0x10DD, 0x1C9D, },
+ { 0x10DE, 0x1C9E, },
+ { 0x10DF, 0x1C9F, },
+ { 0x10E0, 0x1CA0, },
+ { 0x10E1, 0x1CA1, },
+ { 0x10E2, 0x1CA2, },
+ { 0x10E3, 0x1CA3, },
+ { 0x10E4, 0x1CA4, },
+ { 0x10E5, 0x1CA5, },
+ { 0x10E6, 0x1CA6, },
+ { 0x10E7, 0x1CA7, },
+ { 0x10E8, 0x1CA8, },
+ { 0x10E9, 0x1CA9, },
+ { 0x10EA, 0x1CAA, },
+ { 0x10EB, 0x1CAB, },
+ { 0x10EC, 0x1CAC, },
+ { 0x10ED, 0x1CAD, },
+ { 0x10EE, 0x1CAE, },
+ { 0x10EF, 0x1CAF, },
+ { 0x10F0, 0x1CB0, },
+ { 0x10F1, 0x1CB1, },
+ { 0x10F2, 0x1CB2, },
+ { 0x10F3, 0x1CB3, },
+ { 0x10F4, 0x1CB4, },
+ { 0x10F5, 0x1CB5, },
+ { 0x10F6, 0x1CB6, },
+ { 0x10F7, 0x1CB7, },
+ { 0x10F8, 0x1CB8, },
+ { 0x10F9, 0x1CB9, },
+ { 0x10FA, 0x1CBA, },
+ { 0x10FD, 0x1CBD, },
+ { 0x10FE, 0x1CBE, },
+ { 0x10FF, 0x1CBF, },
{ 0x13F8, 0x13F0, },
{ 0x13F9, 0x13F1, },
{ 0x13FA, 0x13F2, },
@@ -611,6 +658,7 @@ struct UTF8_lower_upper {
{ 0x1C88, 0xA64A, },
{ 0x1D79, 0xA77D, },
{ 0x1D7D, 0x2C63, },
+ { 0x1D8E, 0xA7C6, },
{ 0x1E01, 0x1E00, },
{ 0x1E03, 0x1E02, },
{ 0x1E05, 0x1E04, },
@@ -1109,6 +1157,7 @@ struct UTF8_lower_upper {
{ 0xA78C, 0xA78B, },
{ 0xA791, 0xA790, },
{ 0xA793, 0xA792, },
+ { 0xA794, 0xA7C4, },
{ 0xA797, 0xA796, },
{ 0xA799, 0xA798, },
{ 0xA79B, 0xA79A, },
@@ -1121,6 +1170,11 @@ struct UTF8_lower_upper {
{ 0xA7A9, 0xA7A8, },
{ 0xA7B5, 0xA7B4, },
{ 0xA7B7, 0xA7B6, },
+ { 0xA7B9, 0xA7B8, },
+ { 0xA7BB, 0xA7BA, },
+ { 0xA7BD, 0xA7BC, },
+ { 0xA7BF, 0xA7BE, },
+ { 0xA7C3, 0xA7C2, },
{ 0xAB53, 0xA7B3, },
{ 0xAB70, 0x13A0, },
{ 0xAB71, 0x13A1, },
@@ -1387,6 +1441,38 @@ struct UTF8_lower_upper {
{ 0x118DD, 0x118BD, },
{ 0x118DE, 0x118BE, },
{ 0x118DF, 0x118BF, },
+ { 0x16E60, 0x16E40, },
+ { 0x16E61, 0x16E41, },
+ { 0x16E62, 0x16E42, },
+ { 0x16E63, 0x16E43, },
+ { 0x16E64, 0x16E44, },
+ { 0x16E65, 0x16E45, },
+ { 0x16E66, 0x16E46, },
+ { 0x16E67, 0x16E47, },
+ { 0x16E68, 0x16E48, },
+ { 0x16E69, 0x16E49, },
+ { 0x16E6A, 0x16E4A, },
+ { 0x16E6B, 0x16E4B, },
+ { 0x16E6C, 0x16E4C, },
+ { 0x16E6D, 0x16E4D, },
+ { 0x16E6E, 0x16E4E, },
+ { 0x16E6F, 0x16E4F, },
+ { 0x16E70, 0x16E50, },
+ { 0x16E71, 0x16E51, },
+ { 0x16E72, 0x16E52, },
+ { 0x16E73, 0x16E53, },
+ { 0x16E74, 0x16E54, },
+ { 0x16E75, 0x16E55, },
+ { 0x16E76, 0x16E56, },
+ { 0x16E77, 0x16E57, },
+ { 0x16E78, 0x16E58, },
+ { 0x16E79, 0x16E59, },
+ { 0x16E7A, 0x16E5A, },
+ { 0x16E7B, 0x16E5B, },
+ { 0x16E7C, 0x16E5C, },
+ { 0x16E7D, 0x16E5D, },
+ { 0x16E7E, 0x16E5E, },
+ { 0x16E7F, 0x16E5F, },
{ 0x1E922, 0x1E900, },
{ 0x1E923, 0x1E901, },
{ 0x1E924, 0x1E902, },
@@ -2017,6 +2103,52 @@ struct UTF8_lower_upper {
{ 0x13F3, 0x13FB, },
{ 0x13F4, 0x13FC, },
{ 0x13F5, 0x13FD, },
+ { 0x1C90, 0x10D0, },
+ { 0x1C91, 0x10D1, },
+ { 0x1C92, 0x10D2, },
+ { 0x1C93, 0x10D3, },
+ { 0x1C94, 0x10D4, },
+ { 0x1C95, 0x10D5, },
+ { 0x1C96, 0x10D6, },
+ { 0x1C97, 0x10D7, },
+ { 0x1C98, 0x10D8, },
+ { 0x1C99, 0x10D9, },
+ { 0x1C9A, 0x10DA, },
+ { 0x1C9B, 0x10DB, },
+ { 0x1C9C, 0x10DC, },
+ { 0x1C9D, 0x10DD, },
+ { 0x1C9E, 0x10DE, },
+ { 0x1C9F, 0x10DF, },
+ { 0x1CA0, 0x10E0, },
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list