subject:"Ltree syntax improvement"

Ltree syntax improvement

2019-01-29 Thread Dmitry Belyavsky

Dear all,

Please find attached the patch extending the sets of symbols allowed in
ltree labels. The patch introduces 2 variants of escaping symbols, via
backslashing separate symbols and via quoting the labels in whole for
ltree, lquery and ltxtquery datatypes.

-- 
SY, Dmitry Belyavsky
diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out
index 8226930905..5f45726229 100644
--- a/contrib/ltree/expected/ltree.out
+++ b/contrib/ltree/expected/ltree.out
@@ -1,4 +1,5 @@
 CREATE EXTENSION ltree;
+SET standard_conforming_strings=on;
 -- Check whether any of our opclasses fail amvalidate
 SELECT amname, opcname
 FROM pg_opclass opc LEFT JOIN pg_am am ON am.oid = opcmethod
@@ -7679,3 +7680,1587 @@ SELECT count(*) FROM _ltreetest WHERE t ? '{23.*.1,23.*.2}' ;
 15
 (1 row)
 
+-- Extended syntax, escaping, quoting etc
+-- success
+SELECT E'\\.'::ltree;
+ ltree 
+---
+ "."
+(1 row)
+
+SELECT E'\\ '::ltree;
+ ltree 
+---
+ " "
+(1 row)
+
+SELECT E''::ltree;
+ ltree 
+---
+ "\"
+(1 row)
+
+SELECT E'\\a'::ltree;
+ ltree 
+---
+ a
+(1 row)
+
+SELECT E'\\n'::ltree;
+ ltree 
+---
+ n
+(1 row)
+
+SELECT E'x'::ltree;
+ ltree 
+---
+ "x\"
+(1 row)
+
+SELECT E'x\\ '::ltree;
+ ltree 
+---
+ "x "
+(1 row)
+
+SELECT E'x\\.'::ltree;
+ ltree 
+---
+ "x."
+(1 row)
+
+SELECT E'x\\a'::ltree;
+ ltree 
+---
+ xa
+(1 row)
+
+SELECT E'x\\n'::ltree;
+ ltree 
+---
+ xn
+(1 row)
+
+SELECT 'a b.с d'::ltree;
+ltree
+-
+ "a b"."с d"
+(1 row)
+
+SELECT ' e . f '::ltree;
+ ltree 
+---
+ e.f
+(1 row)
+
+SELECT ' '::ltree;
+ ltree 
+---
+ 
+(1 row)
+
+SELECT E'\\ g  . h\\ '::ltree;
+   ltree   
+---
+ " g"."h "
+(1 row)
+
+SELECT E'\\ g'::ltree;
+ ltree 
+---
+ " g"
+(1 row)
+
+SELECT E' h\\ '::ltree;
+ ltree 
+---
+ "h "
+(1 row)
+
+SELECT '" g  "." h "'::ltree;
+ltree 
+--
+ " g  "." h "
+(1 row)
+
+SELECT '" g  " '::ltree;
+ ltree  
+
+ " g  "
+(1 row)
+
+SELECT '" g  "   ." h "  '::ltree;
+ltree 
+--
+ " g  "." h "
+(1 row)
+
+SELECT nlevel(E'Bottom\\.Test'::ltree);
+ nlevel 
+
+  1
+(1 row)
+
+SELECT subpath(E'Bottom\\.'::ltree, 0, 1);
+  subpath  
+---
+ "Bottom."
+(1 row)
+
+SELECT subpath(E'a\\.b', 0, 1);
+ subpath 
+-
+ "a.b"
+(1 row)
+
+SELECT subpath(E'a\\..b', 1, 1);
+ subpath 
+-
+ b
+(1 row)
+
+SELECT subpath(E'a\\..\\b', 1, 1);
+ subpath 
+-
+ b
+(1 row)
+
+SELECT subpath(E'a b.с d'::ltree, 1, 1);
+ subpath 
+-
+ "с d"
+(1 row)
+
+SELECT(
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'\z\z\z\z\z')::ltree;
+  ltree  
+-
+ 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789z
+(1 row)
+
+SELECT('   ' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'\a\b\c\d\e   ')::ltree;
+  ltree  
+-
+ 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789abcde
+(1 row)
+
+SELECT 'abc\|d'::lquery;
+ lquery  
+-
+ "abc|d"
+(1 row)
+
+SELECT 'abc\|d'::ltree ~ 'abc\|d'::lquery;
+ ?column? 
+--
+ t
+(1 row)
+
+SELECT 'abc|d'::ltree ~ 'abc*'::lquery; --true
+ ?column?

Ltree syntax improvement

2019-01-29 Thread Dmitry Belyavsky

Dear all,

Please find attached the patch extending the sets of symbols allowed in
ltree labels. The patch introduces 2 variants of escaping symbols, via
backslashing separate symbols and via quoting the labels in whole for
ltree, lquery and ltxtquery datatypes.

-- 
SY, Dmitry Belyavsky
diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out
index 8226930905..5f45726229 100644
--- a/contrib/ltree/expected/ltree.out
+++ b/contrib/ltree/expected/ltree.out
@@ -1,4 +1,5 @@
 CREATE EXTENSION ltree;
+SET standard_conforming_strings=on;
 -- Check whether any of our opclasses fail amvalidate
 SELECT amname, opcname
 FROM pg_opclass opc LEFT JOIN pg_am am ON am.oid = opcmethod
@@ -7679,3 +7680,1587 @@ SELECT count(*) FROM _ltreetest WHERE t ? '{23.*.1,23.*.2}' ;
 15
 (1 row)
 
+-- Extended syntax, escaping, quoting etc
+-- success
+SELECT E'\\.'::ltree;
+ ltree 
+---
+ "."
+(1 row)
+
+SELECT E'\\ '::ltree;
+ ltree 
+---
+ " "
+(1 row)
+
+SELECT E''::ltree;
+ ltree 
+---
+ "\"
+(1 row)
+
+SELECT E'\\a'::ltree;
+ ltree 
+---
+ a
+(1 row)
+
+SELECT E'\\n'::ltree;
+ ltree 
+---
+ n
+(1 row)
+
+SELECT E'x'::ltree;
+ ltree 
+---
+ "x\"
+(1 row)
+
+SELECT E'x\\ '::ltree;
+ ltree 
+---
+ "x "
+(1 row)
+
+SELECT E'x\\.'::ltree;
+ ltree 
+---
+ "x."
+(1 row)
+
+SELECT E'x\\a'::ltree;
+ ltree 
+---
+ xa
+(1 row)
+
+SELECT E'x\\n'::ltree;
+ ltree 
+---
+ xn
+(1 row)
+
+SELECT 'a b.с d'::ltree;
+ltree
+-
+ "a b"."с d"
+(1 row)
+
+SELECT ' e . f '::ltree;
+ ltree 
+---
+ e.f
+(1 row)
+
+SELECT ' '::ltree;
+ ltree 
+---
+ 
+(1 row)
+
+SELECT E'\\ g  . h\\ '::ltree;
+   ltree   
+---
+ " g"."h "
+(1 row)
+
+SELECT E'\\ g'::ltree;
+ ltree 
+---
+ " g"
+(1 row)
+
+SELECT E' h\\ '::ltree;
+ ltree 
+---
+ "h "
+(1 row)
+
+SELECT '" g  "." h "'::ltree;
+ltree 
+--
+ " g  "." h "
+(1 row)
+
+SELECT '" g  " '::ltree;
+ ltree  
+
+ " g  "
+(1 row)
+
+SELECT '" g  "   ." h "  '::ltree;
+ltree 
+--
+ " g  "." h "
+(1 row)
+
+SELECT nlevel(E'Bottom\\.Test'::ltree);
+ nlevel 
+
+  1
+(1 row)
+
+SELECT subpath(E'Bottom\\.'::ltree, 0, 1);
+  subpath  
+---
+ "Bottom."
+(1 row)
+
+SELECT subpath(E'a\\.b', 0, 1);
+ subpath 
+-
+ "a.b"
+(1 row)
+
+SELECT subpath(E'a\\..b', 1, 1);
+ subpath 
+-
+ b
+(1 row)
+
+SELECT subpath(E'a\\..\\b', 1, 1);
+ subpath 
+-
+ b
+(1 row)
+
+SELECT subpath(E'a b.с d'::ltree, 1, 1);
+ subpath 
+-
+ "с d"
+(1 row)
+
+SELECT(
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'\z\z\z\z\z')::ltree;
+  ltree  
+-
+ 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789z
+(1 row)
+
+SELECT('   ' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'\a\b\c\d\e   ')::ltree;
+  ltree  
+-
+ 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789abcde
+(1 row)
+
+SELECT 'abc\|d'::lquery;
+ lquery  
+-
+ "abc|d"
+(1 row)
+
+SELECT 'abc\|d'::ltree ~ 'abc\|d'::lquery;
+ ?column? 
+--
+ t
+(1 row)
+
+SELECT 'abc|d'::ltree ~ 'abc*'::lquery; --true
+ ?column?

Re: Ltree syntax improvement

2019-03-07 Thread Chris Travers

On Tue, Feb 5, 2019 at 2:27 PM Nikolay Shaplov  wrote:

> В письме от вторник, 29 января 2019 г. 20:43:07 MSK пользователь Dmitry
> Belyavsky написал:
>
> > Please find attached the patch extending the sets of symbols allowed in
> > ltree labels. The patch introduces 2 variants of escaping symbols, via
> > backslashing separate symbols and via quoting the labels in whole for
> > ltree, lquery and ltxtquery datatypes.
>
> Hi!
>
> Let's me join the review process...
>
> I've just give a superficial look to the patch, read it through, and try
> here
> and there, so first I'll give feedback about exterior features of the
> patch.
>
> So, the first question: why do we need all this? The answer seems to be
> obvious, but it would be good say it explicitly. What problems would it
> solve?
> Do these problems really exists?
>

Yes, it does.  We maintain an extension (https://github.com/adjust/wltree)
which has a fixed separator (::) and allows any utf-8 character in the tree.

In our case we currently use our extended tree to store user-defined
hierarchies of labels, which might contain whitespace, Chinese, Japanese,
or Korean characters, etc.

I would love to be able to deprecate our work on this extension and
eventually use stock ltree.

>
> The second question is about coding style. As far as I can see you've
> passed
> your code through pg_indent, but it does not solve all problems.
>
> As far as I know, postgres commiters are quite strict about code width,
> the
> code should not be wider that 80 col, except places were string constants
> are
> introduced (There you can legally exceed 80 col). So I would advice to use
> vim
> with  tabstop=4  and colorcolumn=80 and make sure that new code text does
> not
> cross red vertical line.
>
> Comments. There is no fixed coding style for comments in postgres. Usually
> this
> means that it is better to follow coding in the code around. But ltree
> does
> not have much comments, so I would suggest to use the best style I've seen
> in
> the code
> /*
>  * [function name]
>  * < tab > [Short description, what does it do]
>  *
>  * [long explanation how it works, several subparagraph if needed
>  */
> (Seen this in src/include/utils/rel.h)
> or something like that.
> At least it would be good to have explicit separation between descriptions
> and
> explanation of how it works.
>
> And about comment
> /*
>  * Function calculating bytes to escape
>  * to_escape is an array of "special" 1-byte symbols
>  * Behvaiour:
>  * If there is no "special" symbols, return 0
>  * If there are any special symbol, we need initial and final quote, so
> return
> 2
>  * If there are any quotes, we need to escape all of them and also initial
> and
> final quote, so
>  * return 2 + number of quotes
>  */
>
> It has some formatting. But it should not.  As far as I understand this
> comment should be treated as single paragraph by reformatter, and
> refomatted.
> I do not understand why pg_indent have not done it. Documentation (https://
> www.postgresql.org/docs/11/source-format.html) claims that if you want to
> avoid reformatting, you should add "-" at the beginning and at
> the
> end of the comment. So it would be good either remove this formatting, or
> add
> "" if you are sure you want to keep it.
>
> Third part is about error messages.
> First I've seen errors like elog(ERROR, "internal error during splitting
> levels");. I've never seen error like that in postgres. May be if this
> error
> is in the part of the code, that should never be reached in real live, may
> be
> it would be good to change it to Assert? Or if this code can be normally
> reached, add some more explanation of what had happened...
>
> About other error messages.
>
> There are messages like
> errmsg("syntax error"),
> errdetail("Unexpected end of line.")));
>
> or
>
> errmsg("escaping syntax error")));
>
>
> In postgres there is error message style guide
> https://www.postgresql.org/docs/11/error-style-guide.html
>
> Here I would expect messages like that
>
> Primary:Error parsing ltree path
> Detail: Curly quote is opened and not closed
>
> Or something like that, I am not expert in English, but this way it would
> be
> better for sure. Key points are: Primary message should point to general
> area
> where error occurred (there can be a large SQL with a lot of things in it,
> so
> it is good to point that problem is with ltree staff). And is is better to
> word from the point of view of a user. What for you (and me) is unexpected
> end
> of line, for him it is unclosed curly quote.
>
> Also I am not sure, if it is easy, but if it is possible, it would be good
> to
> move "^" symbol that points to the place of the error, to the place inside
> ' '
> where unclosed curly quote is located. As a user I would appreciate that.
>
> So that's what I've seen while giving it superficial look...
>
>
>

-- 
Best Regards,
Chris Travers
Head of Database

Tel: +49 162 9037 210 | Skype: einhverfr | www.adjust

Re: Ltree syntax improvement

2019-03-07 Thread Filip Rembiałkowski

Good day.

Sorry to pop in, but if you are active users of ltree, please let me
know if you rely on the exclamation operator (negative matching)?
I have just filed a patch,  fixing very old bug - and it changes the
logic substantially.
https://commitfest.postgresql.org/23/2054/ - I'd be grateful for your
comments (please use the other thread).




On Thu, Mar 7, 2019 at 1:10 PM Chris Travers  wrote:
>
>
>
> On Tue, Feb 5, 2019 at 2:27 PM Nikolay Shaplov  wrote:
>>
>> В письме от вторник, 29 января 2019 г. 20:43:07 MSK пользователь Dmitry
>> Belyavsky написал:
>>
>> > Please find attached the patch extending the sets of symbols allowed in
>> > ltree labels. The patch introduces 2 variants of escaping symbols, via
>> > backslashing separate symbols and via quoting the labels in whole for
>> > ltree, lquery and ltxtquery datatypes.
>>
>> Hi!
>>
>> Let's me join the review process...
>>
>> I've just give a superficial look to the patch, read it through, and try  
>> here
>> and there, so first I'll give feedback about exterior features of the patch.
>>
>> So, the first question: why do we need all this? The answer seems to be
>> obvious, but it would be good say it explicitly. What problems would it 
>> solve?
>> Do these problems really exists?
>
>
> Yes, it does.  We maintain an extension (https://github.com/adjust/wltree) 
> which has a fixed separator (::) and allows any utf-8 character in the tree.
>
> In our case we currently use our extended tree to store user-defined 
> hierarchies of labels, which might contain whitespace, Chinese, Japanese, or 
> Korean characters, etc.
>
> I would love to be able to deprecate our work on this extension and 
> eventually use stock ltree.
>>
>>
>> The second question is about coding style. As far as I can see you've passed
>> your code through pg_indent, but it does not solve all problems.
>>
>> As far as I know, postgres commiters are quite strict about code width, the
>> code should not be wider that 80 col, except places were string constants are
>> introduced (There you can legally exceed 80 col). So I would advice to use 
>> vim
>> with  tabstop=4  and colorcolumn=80 and make sure that new code text does not
>> cross red vertical line.
>>
>> Comments. There is no fixed coding style for comments in postgres. Usually 
>> this
>> means that it is better to follow coding in the code around. But ltree does
>> not have much comments, so I would suggest to use the best style I've seen in
>> the code
>> /*
>>  * [function name]
>>  * < tab > [Short description, what does it do]
>>  *
>>  * [long explanation how it works, several subparagraph if needed
>>  */
>> (Seen this in src/include/utils/rel.h)
>> or something like that.
>> At least it would be good to have explicit separation between descriptions 
>> and
>> explanation of how it works.
>>
>> And about comment
>> /*
>>  * Function calculating bytes to escape
>>  * to_escape is an array of "special" 1-byte symbols
>>  * Behvaiour:
>>  * If there is no "special" symbols, return 0
>>  * If there are any special symbol, we need initial and final quote, so 
>> return
>> 2
>>  * If there are any quotes, we need to escape all of them and also initial 
>> and
>> final quote, so
>>  * return 2 + number of quotes
>>  */
>>
>> It has some formatting. But it should not.  As far as I understand this
>> comment should be treated as single paragraph by reformatter, and refomatted.
>> I do not understand why pg_indent have not done it. Documentation (https://
>> www.postgresql.org/docs/11/source-format.html) claims that if you want to
>> avoid reformatting, you should add "-" at the beginning and at 
>> the
>> end of the comment. So it would be good either remove this formatting, or add
>> "" if you are sure you want to keep it.
>>
>> Third part is about error messages.
>> First I've seen errors like elog(ERROR, "internal error during splitting
>> levels");. I've never seen error like that in postgres. May be if this error
>> is in the part of the code, that should never be reached in real live, may be
>> it would be good to change it to Assert? Or if this code can be normally
>> reached, add some more explanation of what had happened...
>>
>> About other error messages.
>>
>> There are messages like
>> errmsg("syntax error"),
>> errdetail("Unexpected end of line.")));
>>
>> or
>>
>> errmsg("escaping syntax error")));
>>
>>
>> In postgres there is error message style guide
>> https://www.postgresql.org/docs/11/error-style-guide.html
>>
>> Here I would expect messages like that
>>
>> Primary:Error parsing ltree path
>> Detail: Curly quote is opened and not closed
>>
>> Or something like that, I am not expert in English, but this way it would be
>> better for sure. Key points are: Primary message should point to general area
>> where error occurred (there can be a large SQL with a lot of things in it, so
>> it is good to point that problem is with ltree staff). And is is better to
>> word from the point of vie

Re: Ltree syntax improvement

2019-03-31 Thread Nikolay Shaplov

В письме от четверг, 7 марта 2019 г. 13:09:49 MSK пользователь Chris Travers 
написал:

>  We maintain an extension (https://github.com/adjust/wltree)
> which has a fixed separator (::) and allows any utf-8 character in the tree.
> 
> In our case we currently use our extended tree to store user-defined
> hierarchies of labels, which might contain whitespace, Chinese, Japanese,
> or Korean characters, etc.
> 
> I would love to be able to deprecate our work on this extension and
> eventually use stock ltree.
I am afraid, that if would not be possible to have ltree with custom 
separator. Or we need to pass a very long way to reach there.

To have custom separator we will need some place to store it.

We can use GUC variable, and set separator for ltree globally. It might solve 
some of the problems. But will get some more. If for example we dump database, 
and then restore it on instance where that GUC option is not set, everything 
will be brocken.

It is not opclass option (that are discussed  here on the list), because 
opclass options is no about parsing, but about comparing things that was 
already parsed.

It is not option of an attribute (if we can have custom attribute option), 
because we can have ltree as a variable, without creating an attribute...

It should be an option of ltree type itself. I have no idea how we can 
implement this. And who will allow us to commit such strange thing ;-)

Re: Ltree syntax improvement

2019-02-05 Thread Nikolay Shaplov

В письме от вторник, 29 января 2019 г. 20:43:07 MSK пользователь Dmitry 
Belyavsky написал:

> Please find attached the patch extending the sets of symbols allowed in
> ltree labels. The patch introduces 2 variants of escaping symbols, via
> backslashing separate symbols and via quoting the labels in whole for
> ltree, lquery and ltxtquery datatypes.

Hi!

Let's me join the review process...

I've just give a superficial look to the patch, read it through, and try  here 
and there, so first I'll give feedback about exterior features of the patch.

So, the first question: why do we need all this? The answer seems to be 
obvious, but it would be good say it explicitly. What problems would it solve? 
Do these problems really exists? 

The second question is about coding style. As far as I can see you've passed 
your code through pg_indent, but it does not solve all problems.

As far as I know, postgres commiters are quite strict about code width, the 
code should not be wider that 80 col, except places were string constants are 
introduced (There you can legally exceed 80 col). So I would advice to use vim 
with  tabstop=4  and colorcolumn=80 and make sure that new code text does not 
cross red vertical line. 

Comments. There is no fixed coding style for comments in postgres. Usually this 
means that it is better to follow coding in the code around. But ltree does 
not have much comments, so I would suggest to use the best style I've seen in 
the code
/*
 * [function name]
 * < tab > [Short description, what does it do]
 * 
 * [long explanation how it works, several subparagraph if needed
 */
(Seen this in src/include/utils/rel.h)
or something like that.
At least it would be good to have explicit separation between descriptions and 
explanation of how it works.

And about comment
/*
 * Function calculating bytes to escape
 * to_escape is an array of "special" 1-byte symbols
 * Behvaiour:
 * If there is no "special" symbols, return 0
 * If there are any special symbol, we need initial and final quote, so return 
2
 * If there are any quotes, we need to escape all of them and also initial and 
final quote, so
 * return 2 + number of quotes
 */

It has some formatting. But it should not.  As far as I understand this 
comment should be treated as single paragraph by reformatter, and refomatted. 
I do not understand why pg_indent have not done it. Documentation (https://
www.postgresql.org/docs/11/source-format.html) claims that if you want to 
avoid reformatting, you should add "-" at the beginning and at the 
end of the comment. So it would be good either remove this formatting, or add 
"" if you are sure you want to keep it.

Third part is about error messages.
First I've seen errors like elog(ERROR, "internal error during splitting 
levels");. I've never seen error like that in postgres. May be if this error 
is in the part of the code, that should never be reached in real live, may be 
it would be good to change it to Assert? Or if this code can be normally 
reached, add some more explanation of what had happened...

About other error messages.

There are messages like 
errmsg("syntax error"),
errdetail("Unexpected end of line.")));

or

errmsg("escaping syntax error")));


In postgres there is error message style guide 
https://www.postgresql.org/docs/11/error-style-guide.html

Here I would expect messages like that

Primary:Error parsing ltree path
Detail: Curly quote is opened and not closed

Or something like that, I am not expert in English, but this way it would be 
better for sure. Key points are: Primary message should point to general area 
where error occurred (there can be a large SQL with a lot of things in it, so 
it is good to point that problem is with ltree staff). And is is better to 
word from the point of view of a user. What for you (and me) is unexpected end 
of line, for him it is unclosed curly quote. 

Also I am not sure, if it is easy, but if it is possible, it would be good to 
move "^" symbol that points to the place of the error, to the place inside ' ' 
where unclosed curly quote is located. As a user I would appreciate that.

So that's what I've seen while giving it superficial look...

Re: Ltree syntax improvement

2019-02-20 Thread Nikolay Shaplov

В письме от вторник, 29 января 2019 г. 20:43:07 MSK пользователь Dmitry 
Belyavsky написал:
> Dear all,
> 
> Please find attached the patch extending the sets of symbols allowed in
> ltree labels. The patch introduces 2 variants of escaping symbols, via
> backslashing separate symbols and via quoting the labels in whole for
> ltree, lquery and ltxtquery datatypes.

Sorry I still did not do full revision, I've stumbled up on other things in 
the code, which, as I think should be fixed before final checking through. (I 
guess that code do what is expected, since it passes tests and so on, it needs 
recheck, but I'd like to do this full recheck only once)

So my doubts about this code is that a lot parts of code is just a copy-paste 
of the same code that was written right above. And it can be rewritten in a 
way that the same lines (or parts of lines) is repeated only once...
This should make code more readable, and make code more similar to the code of 
postgres core, I've seen.

Here I will speak about lquery_in function from ltree_io.c.

1. May be it is better here to switch from else if (state == 
LQPRS_[something]) to switch (stat) { case LQPRS_[something]: }
because because here the main case is to determine what is the current state 
of your state machine, do something with it, and then go to the next step. 

Now after you've done what should be done in this step, you should make sure 
that no other code is executed writing more ifs... If you use switch/case you 
will be able to say break wherever you like, it will save you some ifs.

Theoretical example

(cl is for character length fc is for fist char)

if (cl==1 && fc =='1') {do_case1();}
else if (cl==1 && fc =='2') {do_case2();}
else if (cl==1 && fc =='3') {do_case3();}
else {do_otherwise();}

If it is wrapped in switch/case it will be like

if (cl==1)
{
  if (fc =='1') {do_case1(); break;}
  if (fc =='2') {do_case2(); break;}
  if (fc =='3') {do_case3(); break;}
}
/*if nothing is found */
do_otherwise();

This for example will allow you to get rid of multiply charlen == 1 checks in 

else if ((lptr->flag & LVAR_QUOTEDPART) == 0)   


{   


if (charlen == 1 && t_iseq(ptr, '@'))   


{   


if (lptr->start == ptr) 


UNCHAR; 


lptr->flag |= LVAR_INCASE;  


curqlevel->flag |= LVAR_INCASE; 


}   


else if (charlen == 1 && t_iseq(ptr, '*'))  


{   


if (lptr->start == ptr) 


UNCHAR; 


lptr->flag |= LVAR_ANYEND;  


curqlevel->flag |= LVAR_ANYEND;

Re: Ltree syntax improvement

2019-02-24 Thread Dmitry Belyavsky

Dear Nikolay,

On Wed, Feb 20, 2019 at 12:28 PM Nikolay Shaplov  wrote:

> В письме от вторник, 29 января 2019 г. 20:43:07 MSK пользователь Dmitry
> Belyavsky написал:
> > Dear all,
> >
> > Please find attached the patch extending the sets of symbols allowed in
> > ltree labels. The patch introduces 2 variants of escaping symbols, via
> > backslashing separate symbols and via quoting the labels in whole for
> > ltree, lquery and ltxtquery datatypes.
>
> Sorry I still did not do full revision, I've stumbled up on other things
> in
> the code, which, as I think should be fixed before final checking through.
> (I
> guess that code do what is expected, since it passes tests and so on, it
> needs
> recheck, but I'd like to do this full recheck only once)
>
> So my doubts about this code is that a lot parts of code is just a
> copy-paste
> of the same code that was written right above. And it can be rewritten in
> a
> way that the same lines (or parts of lines) is repeated only once...
> This should make code more readable, and make code more similar to the
> code of
> postgres core, I've seen.
>
> Here I will speak about lquery_in function from ltree_io.c.
>
> 1. May be it is better here to switch from else if (state ==
> LQPRS_[something]) to switch (stat) { case LQPRS_[something]: }
> because because here the main case is to determine what is the current
> state
> of your state machine, do something with it, and then go to the next step.
>
> Now after you've done what should be done in this step, you should make
> sure
> that no other code is executed writing more ifs... If you use switch/case
> you
> will be able to say break wherever you like, it will save you some ifs.
>

I thought about it writing the code. But it significantly increased the
size of the patch
so I decided to follow the original coding style here.


>
> Theoretical example
>
> (cl is for character length fc is for fist char)
>
> if (cl==1 && fc =='1') {do_case1();}
> else if (cl==1 && fc =='2') {do_case2();}
> else if (cl==1 && fc =='3') {do_case3();}
> else {do_otherwise();}
>
> If it is wrapped in switch/case it will be like
>
> if (cl==1)
> {
>   if (fc =='1') {do_case1(); break;}
>   if (fc =='2') {do_case2(); break;}
>   if (fc =='3') {do_case3(); break;}
> }
> /*if nothing is found */
> do_otherwise();
>
> This for example will allow you to get rid of multiply charlen == 1 checks
> in
>
> else if ((lptr->flag & LVAR_QUOTEDPART) == 0)
>
>
> {
>
>
> if (charlen == 1 && t_iseq(ptr, '@'))
>
>
> {
>
>
> if (lptr->start == ptr)
>
>
> UNCHAR;
>
>
> lptr->flag |= LVAR_INCASE;
>
>
> curqlevel->flag |= LVAR_INCASE;
>
>
> }
>
>
> else if (charlen == 1 && t_iseq(ptr, '*'))
>
>
> {
>
>
> if (lptr->start == ptr)
>
>
> UNCHAR;
>
>
> lptr->flag |= LVAR_ANYEND;
>
>
> curqlevel->flag |= LVAR_ANYEND;
>
>
> }
>
>
> else if (charlen == 1 && t_iseq(ptr, '%'))
>
>
> {
>
>
> if (lptr->start == ptr)
>
>
> UNCHAR;
>
>
> lptr->flag |= LVAR_SUBLEXEME;
>
>
> curqlevel->flag |= LVAR_SUBLEXEME;
>
>
> }
>
>
> else if (charlen == 1 && t_iseq(ptr, '|'))
>
>
> {
>
>
> real_nodeitem_len(lptr, ptr, escaped_count,
> tail_space_bytes, tail_space_symbols);
>
>
> if (state == LQPRS_WAITDELIMSTRICT)
>
>
> adjust_quoted_nodeitem(lptr);
>
>
>
> check_level_length(lptr, pos);
>
>
> state = LQPRS_WAITVAR;
>
>
> }
>
>
> else if (charlen == 1 && t_iseq(ptr, '.'))
>
>
> {
>
>
> real_nodeitem_len(lptr, ptr, escaped_count,
> tail_space_bytes, tail_space_symbols);
>
>
> if (state == LQPRS_WAITDELIMSTRICT)
>
>
> adjust_quoted_nodeitem(lptr);
>
>
> check_level_length(lptr, pos);
>
>
>
> state = LQPRS_WAITLEVEL;
>
>
> curqlevel = NEXTLEV(curqlevel);
>
>
> }
>
>
> else if (charlen == 1 && t_iseq(ptr, '\\'))
>
>
> {
>
>
> if (state == LQPRS_WAITDELIMSTRICT)
>
>
> UNCHAR;
>
>
> state = LQPRS_WAITESCAPED;
>
>
> }
>
>
> else
>
>
> {
>
>
> if (charlen == 1 && strchr("!{}", *ptr))
>
>
> UNCHAR;
>
>
> if (state == LQPRS_WAITDELIMSTRICT)
>
>
> {
>
>
> if (t_isspace(ptr))
>
>
> {
>
>
>

Re: Ltree syntax improvement

2019-04-06 Thread Nikolay Shaplov

В письме от воскресенье, 24 февраля 2019 г. 14:31:55 MSK пользователь Dmitry 
Belyavsky написал:

Hi! Am back here again.

I've been thinking about this patch a while... Come to some conclusions and 
wrote some examples...

First I came to idea that the best way to simplify the code is change the 
state machine from if to switch/case. Because in your code a lot of repetition 
is done just because you can't say "Thats all, let's go to next symbol" in any 
place in the code. Now it should follow all the branches of if-else tree that 
is inside the state-machine "node" to get to the next symbol.

To show how simpler the things would be I changed the state machine processing 
in lquery_in form if to switch/case, and changed the code for LQPRS_WAITLEVEL 
state processing, removing duplicate code, using "break" wherever it is 
possible

(The indention in this example is unproper to make diff more clear)

so from that much of code
=
if (state == LQPRS_WAITLEVEL)
{
if (t_isspace(ptr))
{
ptr += charlen;
pos++;
continue;
}

escaped_count = 0;
real_levels++;
if (charlen == 1)
{
if (t_iseq(ptr, '!'))
{
GETVAR(curqlevel) = lptr = (nodeitem *) 
palloc0(sizeof(nodeitem) * numOR);
lptr->start = ptr + 1;
state = LQPRS_WAITDELIM;
curqlevel->numvar = 1;
curqlevel->flag |= LQL_NOT;
hasnot = true;
}
else if (t_iseq(ptr, '*'))
state = LQPRS_WAITOPEN;
else if (t_iseq(ptr, '\\'))
{
GETVAR(curqlevel) = lptr = (nodeitem *) 
palloc0(sizeof(nodeitem) * numOR);
lptr->start = ptr;
curqlevel->numvar = 1;
state = LQPRS_WAITESCAPED;
}
else if (strchr(".|@%{}", *ptr))
{
UNCHAR;
}
else
{
GETVAR(curqlevel) = lptr = (nodeitem *) 
palloc0(sizeof(nodeitem) * numOR);
lptr->start = ptr;
state = LQPRS_WAITDELIM;
curqlevel->numvar = 1;
if (t_iseq(ptr, '"'))
{
lptr->flag |= LVAR_QUOTEDPART;
}
}
}
else
{
GETVAR(curqlevel) = lptr = (nodeitem *) 
palloc0(sizeof(nodeitem) * 
numOR);
lptr->start = ptr;
state = LQPRS_WAITDELIM;
curqlevel->numvar = 1;
}
}
=
I came to this
=
 case LQPRS_WAITLEVEL:
if (t_isspace(ptr))
break; /* Just go to next symbol */
escaped_count = 0;
real_levels++;

if (charlen == 1)
{
if (strchr(".|@%{}", *ptr))
UNCHAR;
if (t_iseq(ptr, '*'))
{
state = LQPRS_WAITOPEN;
break;
}
}
GETVAR(curqlevel) = lptr = (nodeitem *) 
palloc0(sizeof(nodeitem) * 
numOR);
lptr->start = ptr;
curqlevel->numvar = 1;
state = LQPRS_WAITDELIM;
if (charlen == 1)
{
if (t_iseq(ptr, '\\'))
{
state = LQPRS_WAITESCAPED;
break;
}

Re: Ltree syntax improvement

2019-04-16 Thread Dmitry Belyavsky

Dear Nikolay,

Many thanks for your efforts!

On Sat, Apr 6, 2019 at 2:29 PM Nikolay Shaplov  wrote:

> В письме от воскресенье, 24 февраля 2019 г. 14:31:55 MSK пользователь
> Dmitry
> Belyavsky написал:
>
> Hi! Am back here again.
>
> I've been thinking about this patch a while... Come to some conclusions
> and
> wrote some examples...
>
> First I came to idea that the best way to simplify the code is change the
> state machine from if to switch/case. Because in your code a lot of
> repetition
> is done just because you can't say "Thats all, let's go to next symbol" in
> any
> place in the code. Now it should follow all the branches of if-else tree
> that
> is inside the state-machine "node" to get to the next symbol.
>
> To show how simpler the things would be I changed the state machine
> processing
> in lquery_in form if to switch/case, and changed the code for
> LQPRS_WAITLEVEL
> state processing, removing duplicate code, using "break" wherever it is
> possible
>
> (The indention in this example is unproper to make diff more clear)
>
> so from that much of code
> =
> if (state == LQPRS_WAITLEVEL)
> {
> if (t_isspace(ptr))
> {
> ptr += charlen;
> pos++;
> continue;
> }
>
> escaped_count = 0;
> real_levels++;
> if (charlen == 1)
> {
> if (t_iseq(ptr, '!'))
> {
> GETVAR(curqlevel) = lptr =
> (nodeitem *)
> palloc0(sizeof(nodeitem) * numOR);
> lptr->start = ptr + 1;
> state = LQPRS_WAITDELIM;
> curqlevel->numvar = 1;
> curqlevel->flag |= LQL_NOT;
> hasnot = true;
> }
> else if (t_iseq(ptr, '*'))
> state = LQPRS_WAITOPEN;
> else if (t_iseq(ptr, '\\'))
> {
> GETVAR(curqlevel) = lptr =
> (nodeitem *)
> palloc0(sizeof(nodeitem) * numOR);
> lptr->start = ptr;
> curqlevel->numvar = 1;
> state = LQPRS_WAITESCAPED;
> }
> else if (strchr(".|@%{}", *ptr))
> {
> UNCHAR;
> }
> else
> {
> GETVAR(curqlevel) = lptr =
> (nodeitem *)
> palloc0(sizeof(nodeitem) * numOR);
> lptr->start = ptr;
> state = LQPRS_WAITDELIM;
> curqlevel->numvar = 1;
> if (t_iseq(ptr, '"'))
> {
> lptr->flag |=
> LVAR_QUOTEDPART;
> }
> }
> }
> else
> {
> GETVAR(curqlevel) = lptr = (nodeitem *)
> palloc0(sizeof(nodeitem) *
> numOR);
> lptr->start = ptr;
> state = LQPRS_WAITDELIM;
> curqlevel->numvar = 1;
> }
> }
> =
> I came to this
> =
>  case LQPRS_WAITLEVEL:
> if (t_isspace(ptr))
> break; /* Just go to next symbol */
> escaped_count = 0;
> real_levels++;
>
> if (charlen == 1)
> {
> if (strchr(".|@%{}", *ptr))
> UNCHAR;
> if (t_iseq(ptr, '*'))
> {
> state = LQPRS_WAITOPEN;
> break;
> }
> }
> GETVAR(curqlevel) = lptr = (nodeitem *)
> palloc0(sizeof(nodeitem) *
> numOR);
> lptr->start = ptr;
> curqlevel->numvar = 1;
> state = LQPRS_WAITDELIM;

Re: Ltree syntax improvement

2019-07-08 Thread Thomas Munro

On Wed, Apr 17, 2019 at 5:29 AM Dmitry Belyavsky  wrote:
> I've applied your patch.
> From my point of view, there is no major difference between case and chain if 
> here.
> Neither case nor ifs allow extracting the common code to separate function - 
> just because there seem to be no identical pieces of code.

Hi Dmitry,

The documentation doesn't build[1], due to invalid XML.  Since I'm
here, here is some proof-reading of the English in the documentation:

   
-   A label is a sequence of alphanumeric characters
-   and underscores (for example, in C locale the characters
-   A-Za-z0-9_ are allowed).  Labels must be less
than 256 bytes
-   long.
+   A label is a sequence of characters. Labels must be
+   less than 256 symbols long. Label may contain any character
supported by Postgres

"fewer than 256 characters in length", and
"PostgreSQL"

+   except \0. If label contains spaces, dots,
lquery modifiers,

"spaces, dots or lquery modifiers,"

+   they may be escaped. Escaping can be done
either by preceeding
+   backslash (\\) symbol, or by wrapping the label
in whole in double
+   quotes ("). Initial and final unescaped
whitespace is stripped.

"Escaping can be done with either a preceding backslash [...] or by
wrapping the whole label in double quotes [...]."

   

+During converting text into internal representations, wrapping
double quotes

"During conversion to internal representation, "

+and escaping backslashes are removed. During converting internal
+representations into text, if the label does not contain any special

"During conversion from internal representation to text, "

+symbols, it is printed as is. Otherwise, it is wrapped in quotes and, if
+there are internal quotes, they are escaped with backslash. The
list of special

"escaped with backslashes."

+  
+Examples: 42, "\\42",
+\\4\\2,  42  and  "42"
+ will have the similar internal representation and, being

"will all have the same internal representation and,"

+converted from internal representation, will become 42.
+Literal abc def will turn into "abc
+def".
   

[1] https://travis-ci.org/postgresql-cfbot/postgresql/builds/71856

--
Thomas Munro
https://enterprisedb.com

Re: Ltree syntax improvement

2019-07-08 Thread Dmitry Belyavsky

Dear Thomas,

Thank you for your proofreading!

Please find the updated patch attached. It also contains the missing
escaping.

On Mon, Jul 8, 2019 at 10:39 AM Thomas Munro  wrote:

> On Wed, Apr 17, 2019 at 5:29 AM Dmitry Belyavsky 
> wrote:
> > I've applied your patch.
> > From my point of view, there is no major difference between case and
> chain if here.
> > Neither case nor ifs allow extracting the common code to separate
> function - just because there seem to be no identical pieces of code.
>
> Hi Dmitry,
>
> The documentation doesn't build[1], due to invalid XML.  Since I'm
> here, here is some proof-reading of the English in the documentation:
>
>
> -   A label is a sequence of alphanumeric characters
> -   and underscores (for example, in C locale the characters
> -   A-Za-z0-9_ are allowed).  Labels must be less
> than 256 bytes
> -   long.
> +   A label is a sequence of characters. Labels
> must be
> +   less than 256 symbols long. Label may contain any character
> supported by Postgres
>
> "fewer than 256 characters in length", and
> "PostgreSQL"
>
> +   except \0. If label contains spaces, dots,
> lquery modifiers,
>
> "spaces, dots or lquery modifiers,"
>
> +   they may be escaped. Escaping can be done
> either by preceeding
> +   backslash (\\) symbol, or by wrapping the label
> in whole in double
> +   quotes ("). Initial and final unescaped
> whitespace is stripped.
>
> "Escaping can be done with either a preceding backslash [...] or by
> wrapping the whole label in double quotes [...]."
>
>
>
> +During converting text into internal representations, wrapping
> double quotes
>
> "During conversion to internal representation, "
>
> +and escaping backslashes are removed. During converting internal
> +representations into text, if the label does not contain any special
>
> "During conversion from internal representation to text, "
>
> +symbols, it is printed as is. Otherwise, it is wrapped in quotes and,
> if
> +there are internal quotes, they are escaped with backslash. The
> list of special
>
> "escaped with backslashes."
>
> +  
> +Examples: 42, "\\42",
> +\\4\\2,  42  and  "42"
> + will have the similar internal representation and, being
>
> "will all have the same internal representation and,"
>
> +converted from internal representation, will become
> 42.
> +Literal abc def will turn into "abc
> +def".
>
>
> [1] https://travis-ci.org/postgresql-cfbot/postgresql/builds/71856
>
> --
> Thomas Munro
> https://enterprisedb.com
>


-- 
SY, Dmitry Belyavsky
diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out
index 8226930905..5f45726229 100644
--- a/contrib/ltree/expected/ltree.out
+++ b/contrib/ltree/expected/ltree.out
@@ -1,4 +1,5 @@
 CREATE EXTENSION ltree;
+SET standard_conforming_strings=on;
 -- Check whether any of our opclasses fail amvalidate
 SELECT amname, opcname
 FROM pg_opclass opc LEFT JOIN pg_am am ON am.oid = opcmethod
@@ -7679,3 +7680,1587 @@ SELECT count(*) FROM _ltreetest WHERE t ? '{23.*.1,23.*.2}' ;
 15
 (1 row)
 
+-- Extended syntax, escaping, quoting etc
+-- success
+SELECT E'\\.'::ltree;
+ ltree 
+---
+ "."
+(1 row)
+
+SELECT E'\\ '::ltree;
+ ltree 
+---
+ " "
+(1 row)
+
+SELECT E''::ltree;
+ ltree 
+---
+ "\"
+(1 row)
+
+SELECT E'\\a'::ltree;
+ ltree 
+---
+ a
+(1 row)
+
+SELECT E'\\n'::ltree;
+ ltree 
+---
+ n
+(1 row)
+
+SELECT E'x'::ltree;
+ ltree 
+---
+ "x\"
+(1 row)
+
+SELECT E'x\\ '::ltree;
+ ltree 
+---
+ "x "
+(1 row)
+
+SELECT E'x\\.'::ltree;
+ ltree 
+---
+ "x."
+(1 row)
+
+SELECT E'x\\a'::ltree;
+ ltree 
+---
+ xa
+(1 row)
+
+SELECT E'x\\n'::ltree;
+ ltree 
+---
+ xn
+(1 row)
+
+SELECT 'a b.с d'::ltree;
+ltree
+-
+ "a b"."с d"
+(1 row)
+
+SELECT ' e . f '::ltree;
+ ltree 
+---
+ e.f
+(1 row)
+
+SELECT ' '::ltree;
+ ltree 
+---
+ 
+(1 row)
+
+SELECT E'\\ g  . h\\ '::ltree;
+   ltree   
+---
+ " g"."h "
+(1 row)
+
+SELECT E'\\ g'::ltree;
+ ltree 
+---
+ " g"
+(1 row)
+
+SELECT E' h\\ '::ltree;
+ ltree 
+---
+ "h "
+(1 row)
+
+SELECT '" g  "." h "'::ltree;
+ltree 
+--
+ " g  "." h "
+(1 row)
+
+SELECT '" g  " '::ltree;
+ ltree  
+
+ " g  "
+(1 row)
+
+SELECT '" g  "   ." h "  '::ltree;
+ltree 
+--
+ " g  "." h "
+(1 row)
+
+SELECT nlevel(E'Bottom\\.Test'::ltree);
+ nlevel 
+
+  1
+(1 row)
+
+SELECT subpath(E'Bottom\\.'::ltree, 0, 1);
+  subpath  
+---
+ "Bottom."
+(1 row)
+
+SELECT subpath(E'a\\.b', 0, 1);
+ subpath 
+-
+ "a.b"
+(1 row)
+
+SELECT subpath(E'a\\..b', 1, 1);
+ subpath 
+-
+ b
+(1 row)
+
+SELECT subpath(E'a\\..\\b', 1, 1);
+ subpath 
+-
+ b
+(1 row)
+
+SELECT subpath(E'a b.с d'::ltree, 1, 1);
+ subpath 
+-
+ "с d"
+(1 row)
+
+SELECT(
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789

Re: Ltree syntax improvement

2019-07-08 Thread Alvaro Herrera

On 2019-Jul-08, Dmitry Belyavsky wrote:

> Dear Thomas,
> 
> Thank you for your proofreading!
> 
> Please find the updated patch attached. It also contains the missing
> escaping.

I think all these functions you're adding should have a short sentence
explaining what it does.

I'm not really convinced that we need this much testing.  It seems a bit
excessive.  Many of these very focused test SQL lines could be removed
with no loss of coverage, and many of the others could be grouped into
one.

-- 
Álvaro Herrerahttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

Re: Ltree syntax improvement

2019-07-08 Thread Dmitry Belyavsky

Dear Alvaro,

On Mon, Jul 8, 2019 at 11:16 PM Alvaro Herrera 
wrote:

> On 2019-Jul-08, Dmitry Belyavsky wrote:
>
> > Dear Thomas,
> >
> > Thank you for your proofreading!
> >
> > Please find the updated patch attached. It also contains the missing
> > escaping.
>
> I think all these functions you're adding should have a short sentence
> explaining what it does.
>
> I'm not really convinced that we need this much testing.  It seems a bit
> excessive.  Many of these very focused test SQL lines could be removed
> with no loss of coverage, and many of the others could be grouped into
> one.
>

I did not introduce any functions. I've just changed the parser.
I'm not sure that it makes sense to remove any tests as most of them were
written to catch really happened bugs during the implementation.

-- 
SY, Dmitry Belyavsky

Re: Ltree syntax improvement

2019-07-08 Thread Alvaro Herrera

On 2019-Jul-08, Dmitry Belyavsky wrote:

> I did not introduce any functions. I've just changed the parser.

I mean the C-level functions -- count_parts_ors() and so on.

> I'm not sure that it makes sense to remove any tests as most of them were
> written to catch really happened bugs during the implementation.

Well, I don't mean to decrease the coverage, only to condense a lot of
little tests in a small powerful test.

-- 
Álvaro Herrerahttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

Re: Ltree syntax improvement

2019-07-09 Thread Dmitry Belyavsky

On Mon, Jul 8, 2019 at 11:33 PM Alvaro Herrera 
wrote:

> On 2019-Jul-08, Dmitry Belyavsky wrote:
>
> > I did not introduce any functions. I've just changed the parser.
>
> I mean the C-level functions -- count_parts_ors() and so on.
>
> Added a comment to  count_parts_ors()

The other functions introduced by me were already described.

-- 
SY, Dmitry Belyavsky
diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out
index 8226930905..5f45726229 100644
--- a/contrib/ltree/expected/ltree.out
+++ b/contrib/ltree/expected/ltree.out
@@ -1,4 +1,5 @@
 CREATE EXTENSION ltree;
+SET standard_conforming_strings=on;
 -- Check whether any of our opclasses fail amvalidate
 SELECT amname, opcname
 FROM pg_opclass opc LEFT JOIN pg_am am ON am.oid = opcmethod
@@ -7679,3 +7680,1587 @@ SELECT count(*) FROM _ltreetest WHERE t ? '{23.*.1,23.*.2}' ;
 15
 (1 row)
 
+-- Extended syntax, escaping, quoting etc
+-- success
+SELECT E'\\.'::ltree;
+ ltree 
+---
+ "."
+(1 row)
+
+SELECT E'\\ '::ltree;
+ ltree 
+---
+ " "
+(1 row)
+
+SELECT E''::ltree;
+ ltree 
+---
+ "\"
+(1 row)
+
+SELECT E'\\a'::ltree;
+ ltree 
+---
+ a
+(1 row)
+
+SELECT E'\\n'::ltree;
+ ltree 
+---
+ n
+(1 row)
+
+SELECT E'x'::ltree;
+ ltree 
+---
+ "x\"
+(1 row)
+
+SELECT E'x\\ '::ltree;
+ ltree 
+---
+ "x "
+(1 row)
+
+SELECT E'x\\.'::ltree;
+ ltree 
+---
+ "x."
+(1 row)
+
+SELECT E'x\\a'::ltree;
+ ltree 
+---
+ xa
+(1 row)
+
+SELECT E'x\\n'::ltree;
+ ltree 
+---
+ xn
+(1 row)
+
+SELECT 'a b.с d'::ltree;
+ltree
+-
+ "a b"."с d"
+(1 row)
+
+SELECT ' e . f '::ltree;
+ ltree 
+---
+ e.f
+(1 row)
+
+SELECT ' '::ltree;
+ ltree 
+---
+ 
+(1 row)
+
+SELECT E'\\ g  . h\\ '::ltree;
+   ltree   
+---
+ " g"."h "
+(1 row)
+
+SELECT E'\\ g'::ltree;
+ ltree 
+---
+ " g"
+(1 row)
+
+SELECT E' h\\ '::ltree;
+ ltree 
+---
+ "h "
+(1 row)
+
+SELECT '" g  "." h "'::ltree;
+ltree 
+--
+ " g  "." h "
+(1 row)
+
+SELECT '" g  " '::ltree;
+ ltree  
+
+ " g  "
+(1 row)
+
+SELECT '" g  "   ." h "  '::ltree;
+ltree 
+--
+ " g  "." h "
+(1 row)
+
+SELECT nlevel(E'Bottom\\.Test'::ltree);
+ nlevel 
+
+  1
+(1 row)
+
+SELECT subpath(E'Bottom\\.'::ltree, 0, 1);
+  subpath  
+---
+ "Bottom."
+(1 row)
+
+SELECT subpath(E'a\\.b', 0, 1);
+ subpath 
+-
+ "a.b"
+(1 row)
+
+SELECT subpath(E'a\\..b', 1, 1);
+ subpath 
+-
+ b
+(1 row)
+
+SELECT subpath(E'a\\..\\b', 1, 1);
+ subpath 
+-
+ b
+(1 row)
+
+SELECT subpath(E'a b.с d'::ltree, 1, 1);
+ subpath 
+-
+ "с d"
+(1 row)
+
+SELECT(
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'\z\z\z\z\z')::ltree;
+  ltree  
+-
+ 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789z
+(1 row)
+
+SELECT('   ' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'\a\b\c\d\e   ')::ltree;
+  ltree  
+-
+ 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789abcde
+(1 row)
+
+SELECT 'abc\|d'::lquery;
+ lquery  
+-
+ "abc|d"
+(1 row)
+
+SELECT 'abc\|d'::ltree ~ 'abc\|d'::lquery;
+ ?column? 
+--

Re: Ltree syntax improvement

2019-07-11 Thread Thomas Munro

On Wed, Jul 10, 2019 at 7:40 AM Dmitry Belyavsky  wrote:
> [ltree_20190709.diff]

Hi Dmitry,

You need to update contrib/ltree_plpython/expected/ltree_plpython.out,
otherwise check-world fails when built with Python support.  The good
news is that it looks like it fails because you fixed something!
(Though I didn't check the details).

 CREATE FUNCTION test2() RETURNS ltree
 LANGUAGE plpythonu
 TRANSFORM FOR TYPE ltree
 AS $$
 return ['foo', 'bar', 'baz']
 $$;
 -- plpython to ltree is not yet implemented, so this will fail,
 -- because it will try to parse the Python list as an ltree input
 -- string.
 SELECT test2();
-ERROR:  syntax error at position 0
-CONTEXT:  while creating return value
-PL/Python function "test2"
+  test2
+-
+ "['foo', 'bar', 'baz']"
+(1 row)
+

-- 
Thomas Munro
https://enterprisedb.com

Re: Ltree syntax improvement

2019-07-11 Thread Dmitry Belyavsky

Dear Thomas,

On Thu, Jul 11, 2019 at 11:20 AM Thomas Munro 
wrote:

> On Wed, Jul 10, 2019 at 7:40 AM Dmitry Belyavsky 
> wrote:
> > [ltree_20190709.diff]
>
> Hi Dmitry,
>
> You need to update contrib/ltree_plpython/expected/ltree_plpython.out,
> otherwise check-world fails when built with Python support.  The good
> news is that it looks like it fails because you fixed something!
> (Though I didn't check the details).
>
>  CREATE FUNCTION test2() RETURNS ltree
>  LANGUAGE plpythonu
>  TRANSFORM FOR TYPE ltree
>  AS $$
>  return ['foo', 'bar', 'baz']
>  $$;
>  -- plpython to ltree is not yet implemented, so this will fail,
>  -- because it will try to parse the Python list as an ltree input
>  -- string.
>  SELECT test2();
> -ERROR:  syntax error at position 0
> -CONTEXT:  while creating return value
> -PL/Python function "test2"
> +  test2
> +-
> + "['foo', 'bar', 'baz']"
> +(1 row)
> +
>
>
See attached. I'm not familiar enough with python so I just removed the
failing tests.
If the main patch is accepted, the ltree_python extension should be
redesigned, I think...

-- 
SY, Dmitry Belyavsky
diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out
index 8226930905..5f45726229 100644
--- a/contrib/ltree/expected/ltree.out
+++ b/contrib/ltree/expected/ltree.out
@@ -1,4 +1,5 @@
 CREATE EXTENSION ltree;
+SET standard_conforming_strings=on;
 -- Check whether any of our opclasses fail amvalidate
 SELECT amname, opcname
 FROM pg_opclass opc LEFT JOIN pg_am am ON am.oid = opcmethod
@@ -7679,3 +7680,1587 @@ SELECT count(*) FROM _ltreetest WHERE t ? '{23.*.1,23.*.2}' ;
 15
 (1 row)
 
+-- Extended syntax, escaping, quoting etc
+-- success
+SELECT E'\\.'::ltree;
+ ltree 
+---
+ "."
+(1 row)
+
+SELECT E'\\ '::ltree;
+ ltree 
+---
+ " "
+(1 row)
+
+SELECT E''::ltree;
+ ltree 
+---
+ "\"
+(1 row)
+
+SELECT E'\\a'::ltree;
+ ltree 
+---
+ a
+(1 row)
+
+SELECT E'\\n'::ltree;
+ ltree 
+---
+ n
+(1 row)
+
+SELECT E'x'::ltree;
+ ltree 
+---
+ "x\"
+(1 row)
+
+SELECT E'x\\ '::ltree;
+ ltree 
+---
+ "x "
+(1 row)
+
+SELECT E'x\\.'::ltree;
+ ltree 
+---
+ "x."
+(1 row)
+
+SELECT E'x\\a'::ltree;
+ ltree 
+---
+ xa
+(1 row)
+
+SELECT E'x\\n'::ltree;
+ ltree 
+---
+ xn
+(1 row)
+
+SELECT 'a b.с d'::ltree;
+ltree
+-
+ "a b"."с d"
+(1 row)
+
+SELECT ' e . f '::ltree;
+ ltree 
+---
+ e.f
+(1 row)
+
+SELECT ' '::ltree;
+ ltree 
+---
+ 
+(1 row)
+
+SELECT E'\\ g  . h\\ '::ltree;
+   ltree   
+---
+ " g"."h "
+(1 row)
+
+SELECT E'\\ g'::ltree;
+ ltree 
+---
+ " g"
+(1 row)
+
+SELECT E' h\\ '::ltree;
+ ltree 
+---
+ "h "
+(1 row)
+
+SELECT '" g  "." h "'::ltree;
+ltree 
+--
+ " g  "." h "
+(1 row)
+
+SELECT '" g  " '::ltree;
+ ltree  
+
+ " g  "
+(1 row)
+
+SELECT '" g  "   ." h "  '::ltree;
+ltree 
+--
+ " g  "." h "
+(1 row)
+
+SELECT nlevel(E'Bottom\\.Test'::ltree);
+ nlevel 
+
+  1
+(1 row)
+
+SELECT subpath(E'Bottom\\.'::ltree, 0, 1);
+  subpath  
+---
+ "Bottom."
+(1 row)
+
+SELECT subpath(E'a\\.b', 0, 1);
+ subpath 
+-
+ "a.b"
+(1 row)
+
+SELECT subpath(E'a\\..b', 1, 1);
+ subpath 
+-
+ b
+(1 row)
+
+SELECT subpath(E'a\\..\\b', 1, 1);
+ subpath 
+-
+ b
+(1 row)
+
+SELECT subpath(E'a b.с d'::ltree, 1, 1);
+ subpath 
+-
+ "с d"
+(1 row)
+
+SELECT(
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'\z\z\z\z\z')::ltree;
+  ltree  
+-
+ 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789z
+(1 row)
+
+SELECT('   ' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'\a\b\c\d\e   ')::ltree;
+  ltree

Re: Ltree syntax improvement

2019-07-17 Thread Nikita Glukhov


Hi!

I have looked at the patch and found some problems.


1. I fixed some bugs (fixed patch with additional test cases is attached):

-- NULL 'lptr' pointer dereference at lquery_in()
=# SELECT '*'::lquery;
-- crash

-- '|' after '*{n}' is wrongly handled (LQPRS_WAITEND state)
=# SELECT '*{1}|2'::lquery;
WARNING:  problem in alloc set MessageContext: req size > alloc size for chunk 
0x1c2d508 in block 0x1c2bc58
WARNING:  problem in alloc set MessageContext: req size > alloc size for chunk 
0x1c2d508 in block 0x1c2bc58
   lquery
-
 *{1}.*{,48}
(1 row)

-- wrong handling of trailing whitespace
=# SELECT '"a" '::ltree;
ERROR:  name of level is empty
LINE 1: SELECT '"a" '::ltree;
   ^
DETAIL:  Name length is 0 in position 4.


=# SELECT '"a" '::lquery;
ERROR:  name of level is empty
LINE 1: SELECT '"a" '::lquery;
   ^
DETAIL:  Name length is 0 in position 4.


-- backslashes are not escaped in ltree_out()/lquery_out(),
-- which is not consistent with ltree_in()/lquery_in()
=# SELECT '"\\"'::ltree;
 ltree
---
 "\"
(1 row)

=# SELECT '"\\"'::lquery;
 lquery

 "\"
(1 row)

=# SELECT '"\\"'::ltree::text::ltree;
ERROR:  syntax error
DETAIL:  Unexpected end of line.

=# SELECT '"\\"'::lquery::text::lquery;
ERROR:  syntax error
DETAIL:  Unexpected end of line.



2. There are inconsistencies in whitespace handling before and after *, @, %, {}
(I have not fixed this because I'm not sure how it is supposed to work):

-- whitespace before '*' is not ignored
=# SELECT '"a" *'::lquery;
 lquery

 "a\""*
(1 row)

=# SELECT 'a *'::lquery;
 lquery

 "a "*
(1 row)

-- whitespace after '*' and '{}' is disallowed
=# SELECT 'a* .b'::lquery;
ERROR:  syntax error at position 2
LINE 1: SELECT 'a* .b'::lquery;
   ^

=# SELECT 'a* |b'::lquery;
ERROR:  syntax error at position 2
LINE 1: SELECT 'a* |b'::lquery;
   ^

=# SELECT '*{1} .a'::lquery;
ERROR:  syntax error at position 4
LINE 1: SELECT '*{1} .a'::lquery;
   ^

-- but the whitespace after levels without '*@%{}' is allowed
=# SELECT 'a |b'::lquery;
 lquery

 a|b
(1 row)



3. Empty level names between '!' and '|' are allowed.  This behavior can be
seen on master, so it seems that we cannot fix it now:

-- master
=# SELECT '!|a'::lquery;
 lquery

 !|a
(1 row)

-- patched
=# SELECT '!|a'::lquery;
 lquery

 !""|a
(1 row)

-- empty level names in other places are disallowed
=# SELECT '!a|'::lquery;
ERROR:  syntax error
LINE 1: SELECT '!a|'::lquery;
   ^
DETAIL:  Unexpected end of line.

=# SELECT '|a'::lquery;
ERROR:  syntax error at position 0
LINE 1: SELECT '|a'::lquery;
   ^


4. It looks strange to me that leading and trailing unquoted whitespace is
ignored, but the internal whitespace is treated like a quoted:

=# SELECT ' a b   .  c d '::lquery;
 lquery
-
 "a b"."c d"
(1 row)

I would prefer unquoted unescaped whitespace to be a delimiter always.


5. It seems wrong to me that ltree and lquery have different special character
sets now.  This leads to the fact that arbitrary ltree text cannot be used
directly as lquery text, as it seemed to be before the syntax improvements:

=# SELECT 'a|b'::ltree::text::lquery;
 lquery

 a|b
(1 row)

=# SELECT '"a|b"'::ltree::text::lquery;
 lquery

 a|b
(1 row)

=# SELECT '"a|b"'::lquery;
 lquery

 "a|b"
(1 row)

There might not be a problem if we had ltree::lquery cast.

Also I think that text[]::ltree/ltree::text[] casts for ltree
construction/deconstruction from text level names can be very useful.


6. ltree and escpecially lquery parsing code still look too complicated for me,
and I believe that the bugs described above are a direct consequence of this.
So the code needs to be refactored, maybe even without using of state machines.



On 11.07.2019 20:49, Dmitry Belyavsky wrote:


On Thu, Jul 11, 2019 at 11:20 AM Thomas Munro > wrote:


On Wed, Jul 10, 2019 at 7:40 AM Dmitry Belyavsky
mailto:beld...@gmail.com>> wrote:
> [ltree_20190709.diff]

You need to update contrib/ltree_plpython/expected/ltree_plpython.out,
otherwise check-world fails when built with Python support. The good
news is that it looks like it fails because you fixed something!
(Though I didn't check the details).

 CREATE FUNCTION test2() RETURNS ltree
 LANGUAGE plpythonu
 TRANSFORM FOR TYPE ltree
 AS $$
 return ['foo', 'bar', 'baz']
 $$;
 -- plpython to ltree is not yet implemented, so this will fail,
 -- because it will try to parse the Python list as an ltree input
 -- string.
 SELECT test2();
-ERROR:  syntax error at position 0
-CONTEXT:  while creating return value
-PL/Python function "test2"
+          test2
+-
+ "['foo', 'bar', 'baz']"
+(1 row)
+


See attached. I'm not familiar enough with python so I just rem

Re: Ltree syntax improvement

2019-08-01 Thread Thomas Munro

On Thu, Jul 18, 2019 at 1:28 AM Nikita Glukhov  wrote:
> 1. I fixed some bugs (fixed patch with additional test cases is attached):

Hi Nikita,

Thanks.  I have set this to "Needs review", in the September 'fest.

-- 
Thomas Munro
https://enterprisedb.com

Re: Ltree syntax improvement

2019-02-24 Thread Dmitry Belyavsky

Dear Nikolay,

On Wed, Feb 20, 2019 at 12:28 PM Nikolay Shaplov  wrote:

> В письме от вторник, 29 января 2019 г. 20:43:07 MSK пользователь Dmitry
> Belyavsky написал:
> > Dear all,
> >
> > Please find attached the patch extending the sets of symbols allowed in
> > ltree labels. The patch introduces 2 variants of escaping symbols, via
> > backslashing separate symbols and via quoting the labels in whole for
> > ltree, lquery and ltxtquery datatypes.
>
> Sorry I still did not do full revision, I've stumbled up on other things
> in
> the code, which, as I think should be fixed before final checking through.
> (I
> guess that code do what is expected, since it passes tests and so on, it
> needs
> recheck, but I'd like to do this full recheck only once)
>
> So my doubts about this code is that a lot parts of code is just a
> copy-paste
> of the same code that was written right above. And it can be rewritten in
> a
> way that the same lines (or parts of lines) is repeated only once...
> This should make code more readable, and make code more similar to the
> code of
> postgres core, I've seen.
>
> Here I will speak about lquery_in function from ltree_io.c.
>
> 1. May be it is better here to switch from else if (state ==
> LQPRS_[something]) to switch (stat) { case LQPRS_[something]: }
> because because here the main case is to determine what is the current
> state
> of your state machine, do something with it, and then go to the next step.
>
> Now after you've done what should be done in this step, you should make
> sure
> that no other code is executed writing more ifs... If you use switch/case
> you
> will be able to say break wherever you like, it will save you some ifs.
>

I thought about it writing the code. But it significantly increased the
size of the patch
so I decided to follow the original coding style here.


>
> Theoretical example
>
> (cl is for character length fc is for fist char)
>
> if (cl==1 && fc =='1') {do_case1();}
> else if (cl==1 && fc =='2') {do_case2();}
> else if (cl==1 && fc =='3') {do_case3();}
> else {do_otherwise();}
>
> If it is wrapped in switch/case it will be like
>
> if (cl==1)
> {
>   if (fc =='1') {do_case1(); break;}
>   if (fc =='2') {do_case2(); break;}
>   if (fc =='3') {do_case3(); break;}
> }
> /*if nothing is found */
> do_otherwise();
>
> This for example will allow you to get rid of multiply charlen == 1 checks
> in
>
> else if ((lptr->flag & LVAR_QUOTEDPART) == 0)
>
>
> {
>
>
> if (charlen == 1 && t_iseq(ptr, '@'))
>
>
> {
>
>
> if (lptr->start == ptr)
>
>
> UNCHAR;
>
>
> lptr->flag |= LVAR_INCASE;
>
>
> curqlevel->flag |= LVAR_INCASE;
>
>
> }
>
>
> else if (charlen == 1 && t_iseq(ptr, '*'))
>
>
> {
>
>
> if (lptr->start == ptr)
>
>
> UNCHAR;
>
>
> lptr->flag |= LVAR_ANYEND;
>
>
> curqlevel->flag |= LVAR_ANYEND;
>
>
> }
>
>
> else if (charlen == 1 && t_iseq(ptr, '%'))
>
>
> {
>
>
> if (lptr->start == ptr)
>
>
> UNCHAR;
>
>
> lptr->flag |= LVAR_SUBLEXEME;
>
>
> curqlevel->flag |= LVAR_SUBLEXEME;
>
>
> }
>
>
> else if (charlen == 1 && t_iseq(ptr, '|'))
>
>
> {
>
>
> real_nodeitem_len(lptr, ptr, escaped_count,
> tail_space_bytes, tail_space_symbols);
>
>
> if (state == LQPRS_WAITDELIMSTRICT)
>
>
> adjust_quoted_nodeitem(lptr);
>
>
>
> check_level_length(lptr, pos);
>
>
> state = LQPRS_WAITVAR;
>
>
> }
>
>
> else if (charlen == 1 && t_iseq(ptr, '.'))
>
>
> {
>
>
> real_nodeitem_len(lptr, ptr, escaped_count,
> tail_space_bytes, tail_space_symbols);
>
>
> if (state == LQPRS_WAITDELIMSTRICT)
>
>
> adjust_quoted_nodeitem(lptr);
>
>
> check_level_length(lptr, pos);
>
>
>
> state = LQPRS_WAITLEVEL;
>
>
> curqlevel = NEXTLEV(curqlevel);
>
>
> }
>
>
> else if (charlen == 1 && t_iseq(ptr, '\\'))
>
>
> {
>
>
> if (state == LQPRS_WAITDELIMSTRICT)
>
>
> UNCHAR;
>
>
> state = LQPRS_WAITESCAPED;
>
>
> }
>
>
> else
>
>
> {
>
>
> if (charlen == 1 && strchr("!{}", *ptr))
>
>
> UNCHAR;
>
>
> if (state == LQPRS_WAITDELIMSTRICT)
>
>
> {
>
>
> if (t_isspace(ptr))
>
>
> {
>
>
>

Re: Ltree syntax improvement

2019-03-07 Thread Chris Travers

On Tue, Feb 5, 2019 at 2:27 PM Nikolay Shaplov  wrote:

> В письме от вторник, 29 января 2019 г. 20:43:07 MSK пользователь Dmitry
> Belyavsky написал:
>
> > Please find attached the patch extending the sets of symbols allowed in
> > ltree labels. The patch introduces 2 variants of escaping symbols, via
> > backslashing separate symbols and via quoting the labels in whole for
> > ltree, lquery and ltxtquery datatypes.
>
> Hi!
>
> Let's me join the review process...
>
> I've just give a superficial look to the patch, read it through, and try
> here
> and there, so first I'll give feedback about exterior features of the
> patch.
>
> So, the first question: why do we need all this? The answer seems to be
> obvious, but it would be good say it explicitly. What problems would it
> solve?
> Do these problems really exists?
>

Yes, it does.  We maintain an extension (https://github.com/adjust/wltree)
which has a fixed separator (::) and allows any utf-8 character in the tree.

In our case we currently use our extended tree to store user-defined
hierarchies of labels, which might contain whitespace, Chinese, Japanese,
or Korean characters, etc.

I would love to be able to deprecate our work on this extension and
eventually use stock ltree.

>
> The second question is about coding style. As far as I can see you've
> passed
> your code through pg_indent, but it does not solve all problems.
>
> As far as I know, postgres commiters are quite strict about code width,
> the
> code should not be wider that 80 col, except places were string constants
> are
> introduced (There you can legally exceed 80 col). So I would advice to use
> vim
> with  tabstop=4  and colorcolumn=80 and make sure that new code text does
> not
> cross red vertical line.
>
> Comments. There is no fixed coding style for comments in postgres. Usually
> this
> means that it is better to follow coding in the code around. But ltree
> does
> not have much comments, so I would suggest to use the best style I've seen
> in
> the code
> /*
>  * [function name]
>  * < tab > [Short description, what does it do]
>  *
>  * [long explanation how it works, several subparagraph if needed
>  */
> (Seen this in src/include/utils/rel.h)
> or something like that.
> At least it would be good to have explicit separation between descriptions
> and
> explanation of how it works.
>
> And about comment
> /*
>  * Function calculating bytes to escape
>  * to_escape is an array of "special" 1-byte symbols
>  * Behvaiour:
>  * If there is no "special" symbols, return 0
>  * If there are any special symbol, we need initial and final quote, so
> return
> 2
>  * If there are any quotes, we need to escape all of them and also initial
> and
> final quote, so
>  * return 2 + number of quotes
>  */
>
> It has some formatting. But it should not.  As far as I understand this
> comment should be treated as single paragraph by reformatter, and
> refomatted.
> I do not understand why pg_indent have not done it. Documentation (https://
> www.postgresql.org/docs/11/source-format.html) claims that if you want to
> avoid reformatting, you should add "-" at the beginning and at
> the
> end of the comment. So it would be good either remove this formatting, or
> add
> "" if you are sure you want to keep it.
>
> Third part is about error messages.
> First I've seen errors like elog(ERROR, "internal error during splitting
> levels");. I've never seen error like that in postgres. May be if this
> error
> is in the part of the code, that should never be reached in real live, may
> be
> it would be good to change it to Assert? Or if this code can be normally
> reached, add some more explanation of what had happened...
>
> About other error messages.
>
> There are messages like
> errmsg("syntax error"),
> errdetail("Unexpected end of line.")));
>
> or
>
> errmsg("escaping syntax error")));
>
>
> In postgres there is error message style guide
> https://www.postgresql.org/docs/11/error-style-guide.html
>
> Here I would expect messages like that
>
> Primary:Error parsing ltree path
> Detail: Curly quote is opened and not closed
>
> Or something like that, I am not expert in English, but this way it would
> be
> better for sure. Key points are: Primary message should point to general
> area
> where error occurred (there can be a large SQL with a lot of things in it,
> so
> it is good to point that problem is with ltree staff). And is is better to
> word from the point of view of a user. What for you (and me) is unexpected
> end
> of line, for him it is unclosed curly quote.
>
> Also I am not sure, if it is easy, but if it is possible, it would be good
> to
> move "^" symbol that points to the place of the error, to the place inside
> ' '
> where unclosed curly quote is located. As a user I would appreciate that.
>
> So that's what I've seen while giving it superficial look...
>
>
>

-- 
Best Regards,
Chris Travers
Head of Database

Tel: +49 162 9037 210 | Skype: einhverfr | www.adjust

Re: Ltree syntax improvement

2019-03-07 Thread Filip Rembiałkowski

Good day.

Sorry to pop in, but if you are active users of ltree, please let me
know if you rely on the exclamation operator (negative matching)?
I have just filed a patch,  fixing very old bug - and it changes the
logic substantially.
https://commitfest.postgresql.org/23/2054/ - I'd be grateful for your
comments (please use the other thread).




On Thu, Mar 7, 2019 at 1:10 PM Chris Travers  wrote:
>
>
>
> On Tue, Feb 5, 2019 at 2:27 PM Nikolay Shaplov  wrote:
>>
>> В письме от вторник, 29 января 2019 г. 20:43:07 MSK пользователь Dmitry
>> Belyavsky написал:
>>
>> > Please find attached the patch extending the sets of symbols allowed in
>> > ltree labels. The patch introduces 2 variants of escaping symbols, via
>> > backslashing separate symbols and via quoting the labels in whole for
>> > ltree, lquery and ltxtquery datatypes.
>>
>> Hi!
>>
>> Let's me join the review process...
>>
>> I've just give a superficial look to the patch, read it through, and try  
>> here
>> and there, so first I'll give feedback about exterior features of the patch.
>>
>> So, the first question: why do we need all this? The answer seems to be
>> obvious, but it would be good say it explicitly. What problems would it 
>> solve?
>> Do these problems really exists?
>
>
> Yes, it does.  We maintain an extension (https://github.com/adjust/wltree) 
> which has a fixed separator (::) and allows any utf-8 character in the tree.
>
> In our case we currently use our extended tree to store user-defined 
> hierarchies of labels, which might contain whitespace, Chinese, Japanese, or 
> Korean characters, etc.
>
> I would love to be able to deprecate our work on this extension and 
> eventually use stock ltree.
>>
>>
>> The second question is about coding style. As far as I can see you've passed
>> your code through pg_indent, but it does not solve all problems.
>>
>> As far as I know, postgres commiters are quite strict about code width, the
>> code should not be wider that 80 col, except places were string constants are
>> introduced (There you can legally exceed 80 col). So I would advice to use 
>> vim
>> with  tabstop=4  and colorcolumn=80 and make sure that new code text does not
>> cross red vertical line.
>>
>> Comments. There is no fixed coding style for comments in postgres. Usually 
>> this
>> means that it is better to follow coding in the code around. But ltree does
>> not have much comments, so I would suggest to use the best style I've seen in
>> the code
>> /*
>>  * [function name]
>>  * < tab > [Short description, what does it do]
>>  *
>>  * [long explanation how it works, several subparagraph if needed
>>  */
>> (Seen this in src/include/utils/rel.h)
>> or something like that.
>> At least it would be good to have explicit separation between descriptions 
>> and
>> explanation of how it works.
>>
>> And about comment
>> /*
>>  * Function calculating bytes to escape
>>  * to_escape is an array of "special" 1-byte symbols
>>  * Behvaiour:
>>  * If there is no "special" symbols, return 0
>>  * If there are any special symbol, we need initial and final quote, so 
>> return
>> 2
>>  * If there are any quotes, we need to escape all of them and also initial 
>> and
>> final quote, so
>>  * return 2 + number of quotes
>>  */
>>
>> It has some formatting. But it should not.  As far as I understand this
>> comment should be treated as single paragraph by reformatter, and refomatted.
>> I do not understand why pg_indent have not done it. Documentation (https://
>> www.postgresql.org/docs/11/source-format.html) claims that if you want to
>> avoid reformatting, you should add "-" at the beginning and at 
>> the
>> end of the comment. So it would be good either remove this formatting, or add
>> "" if you are sure you want to keep it.
>>
>> Third part is about error messages.
>> First I've seen errors like elog(ERROR, "internal error during splitting
>> levels");. I've never seen error like that in postgres. May be if this error
>> is in the part of the code, that should never be reached in real live, may be
>> it would be good to change it to Assert? Or if this code can be normally
>> reached, add some more explanation of what had happened...
>>
>> About other error messages.
>>
>> There are messages like
>> errmsg("syntax error"),
>> errdetail("Unexpected end of line.")));
>>
>> or
>>
>> errmsg("escaping syntax error")));
>>
>>
>> In postgres there is error message style guide
>> https://www.postgresql.org/docs/11/error-style-guide.html
>>
>> Here I would expect messages like that
>>
>> Primary:Error parsing ltree path
>> Detail: Curly quote is opened and not closed
>>
>> Or something like that, I am not expert in English, but this way it would be
>> better for sure. Key points are: Primary message should point to general area
>> where error occurred (there can be a large SQL with a lot of things in it, so
>> it is good to point that problem is with ltree staff). And is is better to
>> word from the point of vie

Re: Ltree syntax improvement

2020-03-06 Thread Nikita Glukhov


Attached new version of the patch.

I did major refactoring of ltree label parsing, extracting common parsing
code for ltree, lquery, and ltxtquery.  This greatly simplified state
machines.

On the advice of Tomas Vondra, I also extracted a preliminary patch with
'if' to 'switch' conversion.

On 21.01.2020 22:13, Tom Lane wrote:


Dmitry Belyavsky  writes:

If the C part will be reviewed and considered mergeable, I'll update the
plpython tests.

I haven't looked at any of the code involved in this, but I did examine
the "failing" plpython test, and I'm quite distressed about that change
of behavior.  Simply removing the test case is certainly not okay,
and I do not think that just changing it to accept this new behavior
is okay either.  As Nikita said upthread:


7. ltree_plpython test does not fail now because Python list is converted to a
text and then to a ltree, and the textual representation of a Python list has
become a valid ltree text:

SELECT $$['foo', 'bar', 'baz']$$::ltree;
ltree
-
   "['foo', 'bar', 'baz']"
(1 row)

So Python lists can be now successfully converted to ltrees without a transform,
but the result is not that is expected ('foo.bar.baz'::ltree).

If this case doesn't throw an error, then we're going to have a
compatibility problem whenever somebody finally gets around to
implementing the python-to-ltree transform properly, because it
would break any code that might be relying on this (wrong) behavior.

In general, I think it's a mistake to allow unquoted punctuation to be
taken as part of an ltree label, which is what this patch is evidently
doing.  By doing that, you'll make it impossible for anyone to ever
again extend the ltree syntax, because if they want to assign special
meaning to braces or whatever, they can't do so without breaking
existing applications.  For example, if the existing code allowed
double-quote or backslash as a label character, we'd already have
rejected this patch as being too big a compatibility break.  So it's
not very forward-thinking to close off future improvements like this.

Thus, what I think you should do is require non-alphanumeric label
characters to be quoted, either via double-quotes or backslashes
(although it's questionable whether we really need two independent
quoting mechanisms here).  That would preserve extensibility, and
it'd also preserve our freedom to fix ltree_plpython later, since
the case of interest would still be an error for now.  And it would
mean that you don't have subtly different rules for what's data in
ltree versus what's data in lquery or ltxtquery.


Now non-alphanumeric label characters should be escaped in ltree,
lquery and ltxtquery.  Plpython tests does not require changes now.


BTW, the general rule in existing backend code that's doing string
parsing is to allow non-ASCII (high-bit-set) characters to be taken as
data without inquiring too closely as to what they are.  This avoids a
bunch of locale and encoding issues without much loss of flexibility.
(If we do ever extend the ltree syntax again, we'd certainly choose
ASCII punctuation characters for whatever special symbols we need,
else the feature might not be available in all encodings.)  So for
instance in your examples involving "Ñ", it's fine to take that as a
label character without concern for locale/encoding.
I'm not sure what I think about the whitespace business.  It looks
like what you propose is to strip unquoted leading and trailing
whitespace but allow embedded whitespace.  There's precedent for that,
certainly, but I wonder whether it isn't too confusing.  In any case
you didn't document that.


Embedded whitespace should also be escaped now.  I'm also not sure
about stripping unquoted leading and trailing whitespace.

--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
>From c697c173e121c3996325f7f61536852f8b380d9e Mon Sep 17 00:00:00 2001
From: Nikita Glukhov 
Date: Thu, 5 Mar 2020 17:34:59 +0300
Subject: [PATCH 1/2] Replace 'if' with 'switch' in ltree code

---
 contrib/ltree/ltree_io.c | 402 ---
 1 file changed, 204 insertions(+), 198 deletions(-)

diff --git a/contrib/ltree/ltree_io.c b/contrib/ltree/ltree_io.c
index 900a46a..e97f035 100644
--- a/contrib/ltree/ltree_io.c
+++ b/contrib/ltree/ltree_io.c
@@ -69,40 +69,43 @@ ltree_in(PG_FUNCTION_ARGS)
 	{
 		charlen = pg_mblen(ptr);
 
-		if (state == LTPRS_WAITNAME)
+		switch (state)
 		{
-			if (ISALNUM(ptr))
-			{
-lptr->start = ptr;
-lptr->wlen = 0;
-state = LTPRS_WAITDELIM;
-			}
-			else
-UNCHAR;
-		}
-		else if (state == LTPRS_WAITDELIM)
-		{
-			if (charlen == 1 && t_iseq(ptr, '.'))
-			{
-lptr->len = ptr - lptr->start;
-if (lptr->wlen > 255)
-	ereport(ERROR,
-			(errcode(ERRCODE_NAME_TOO_LONG),
-			 errmsg("name of level is too long"),
-			 errdetail("Name length is %d, must "
-	   "be < 256, in position %d.",
-

Re: Ltree syntax improvement

2019-02-05 Thread Nikolay Shaplov

В письме от вторник, 29 января 2019 г. 20:43:07 MSK пользователь Dmitry 
Belyavsky написал:

> Please find attached the patch extending the sets of symbols allowed in
> ltree labels. The patch introduces 2 variants of escaping symbols, via
> backslashing separate symbols and via quoting the labels in whole for
> ltree, lquery and ltxtquery datatypes.

Hi!

Let's me join the review process...

I've just give a superficial look to the patch, read it through, and try  here 
and there, so first I'll give feedback about exterior features of the patch.

So, the first question: why do we need all this? The answer seems to be 
obvious, but it would be good say it explicitly. What problems would it solve? 
Do these problems really exists? 

The second question is about coding style. As far as I can see you've passed 
your code through pg_indent, but it does not solve all problems.

As far as I know, postgres commiters are quite strict about code width, the 
code should not be wider that 80 col, except places were string constants are 
introduced (There you can legally exceed 80 col). So I would advice to use vim 
with  tabstop=4  and colorcolumn=80 and make sure that new code text does not 
cross red vertical line. 

Comments. There is no fixed coding style for comments in postgres. Usually this 
means that it is better to follow coding in the code around. But ltree does 
not have much comments, so I would suggest to use the best style I've seen in 
the code
/*
 * [function name]
 * < tab > [Short description, what does it do]
 * 
 * [long explanation how it works, several subparagraph if needed
 */
(Seen this in src/include/utils/rel.h)
or something like that.
At least it would be good to have explicit separation between descriptions and 
explanation of how it works.

And about comment
/*
 * Function calculating bytes to escape
 * to_escape is an array of "special" 1-byte symbols
 * Behvaiour:
 * If there is no "special" symbols, return 0
 * If there are any special symbol, we need initial and final quote, so return 
2
 * If there are any quotes, we need to escape all of them and also initial and 
final quote, so
 * return 2 + number of quotes
 */

It has some formatting. But it should not.  As far as I understand this 
comment should be treated as single paragraph by reformatter, and refomatted. 
I do not understand why pg_indent have not done it. Documentation (https://
www.postgresql.org/docs/11/source-format.html) claims that if you want to 
avoid reformatting, you should add "-" at the beginning and at the 
end of the comment. So it would be good either remove this formatting, or add 
"" if you are sure you want to keep it.

Third part is about error messages.
First I've seen errors like elog(ERROR, "internal error during splitting 
levels");. I've never seen error like that in postgres. May be if this error 
is in the part of the code, that should never be reached in real live, may be 
it would be good to change it to Assert? Or if this code can be normally 
reached, add some more explanation of what had happened...

About other error messages.

There are messages like 
errmsg("syntax error"),
errdetail("Unexpected end of line.")));

or

errmsg("escaping syntax error")));


In postgres there is error message style guide 
https://www.postgresql.org/docs/11/error-style-guide.html

Here I would expect messages like that

Primary:Error parsing ltree path
Detail: Curly quote is opened and not closed

Or something like that, I am not expert in English, but this way it would be 
better for sure. Key points are: Primary message should point to general area 
where error occurred (there can be a large SQL with a lot of things in it, so 
it is good to point that problem is with ltree staff). And is is better to 
word from the point of view of a user. What for you (and me) is unexpected end 
of line, for him it is unclosed curly quote. 

Also I am not sure, if it is easy, but if it is possible, it would be good to 
move "^" symbol that points to the place of the error, to the place inside ' ' 
where unclosed curly quote is located. As a user I would appreciate that.

So that's what I've seen while giving it superficial look...

Re: Ltree syntax improvement

2019-02-20 Thread Nikolay Shaplov

В письме от вторник, 29 января 2019 г. 20:43:07 MSK пользователь Dmitry 
Belyavsky написал:
> Dear all,
> 
> Please find attached the patch extending the sets of symbols allowed in
> ltree labels. The patch introduces 2 variants of escaping symbols, via
> backslashing separate symbols and via quoting the labels in whole for
> ltree, lquery and ltxtquery datatypes.

Sorry I still did not do full revision, I've stumbled up on other things in 
the code, which, as I think should be fixed before final checking through. (I 
guess that code do what is expected, since it passes tests and so on, it needs 
recheck, but I'd like to do this full recheck only once)

So my doubts about this code is that a lot parts of code is just a copy-paste 
of the same code that was written right above. And it can be rewritten in a 
way that the same lines (or parts of lines) is repeated only once...
This should make code more readable, and make code more similar to the code of 
postgres core, I've seen.

Here I will speak about lquery_in function from ltree_io.c.

1. May be it is better here to switch from else if (state == 
LQPRS_[something]) to switch (stat) { case LQPRS_[something]: }
because because here the main case is to determine what is the current state 
of your state machine, do something with it, and then go to the next step. 

Now after you've done what should be done in this step, you should make sure 
that no other code is executed writing more ifs... If you use switch/case you 
will be able to say break wherever you like, it will save you some ifs.

Theoretical example

(cl is for character length fc is for fist char)

if (cl==1 && fc =='1') {do_case1();}
else if (cl==1 && fc =='2') {do_case2();}
else if (cl==1 && fc =='3') {do_case3();}
else {do_otherwise();}

If it is wrapped in switch/case it will be like

if (cl==1)
{
  if (fc =='1') {do_case1(); break;}
  if (fc =='2') {do_case2(); break;}
  if (fc =='3') {do_case3(); break;}
}
/*if nothing is found */
do_otherwise();

This for example will allow you to get rid of multiply charlen == 1 checks in 

else if ((lptr->flag & LVAR_QUOTEDPART) == 0)   


{   


if (charlen == 1 && t_iseq(ptr, '@'))   


{   


if (lptr->start == ptr) 


UNCHAR; 


lptr->flag |= LVAR_INCASE;  


curqlevel->flag |= LVAR_INCASE; 


}   


else if (charlen == 1 && t_iseq(ptr, '*'))  


{   


if (lptr->start == ptr) 


UNCHAR; 


lptr->flag |= LVAR_ANYEND;  


curqlevel->flag |= LVAR_ANYEND;

Re: Ltree syntax improvement

2019-03-31 Thread Nikolay Shaplov

В письме от четверг, 7 марта 2019 г. 13:09:49 MSK пользователь Chris Travers 
написал:

>  We maintain an extension (https://github.com/adjust/wltree)
> which has a fixed separator (::) and allows any utf-8 character in the tree.
> 
> In our case we currently use our extended tree to store user-defined
> hierarchies of labels, which might contain whitespace, Chinese, Japanese,
> or Korean characters, etc.
> 
> I would love to be able to deprecate our work on this extension and
> eventually use stock ltree.
I am afraid, that if would not be possible to have ltree with custom 
separator. Or we need to pass a very long way to reach there.

To have custom separator we will need some place to store it.

We can use GUC variable, and set separator for ltree globally. It might solve 
some of the problems. But will get some more. If for example we dump database, 
and then restore it on instance where that GUC option is not set, everything 
will be brocken.

It is not opclass option (that are discussed  here on the list), because 
opclass options is no about parsing, but about comparing things that was 
already parsed.

It is not option of an attribute (if we can have custom attribute option), 
because we can have ltree as a variable, without creating an attribute...

It should be an option of ltree type itself. I have no idea how we can 
implement this. And who will allow us to commit such strange thing ;-)

Re: Ltree syntax improvement

2019-04-06 Thread Nikolay Shaplov

В письме от воскресенье, 24 февраля 2019 г. 14:31:55 MSK пользователь Dmitry 
Belyavsky написал:

Hi! Am back here again.

I've been thinking about this patch a while... Come to some conclusions and 
wrote some examples...

First I came to idea that the best way to simplify the code is change the 
state machine from if to switch/case. Because in your code a lot of repetition 
is done just because you can't say "Thats all, let's go to next symbol" in any 
place in the code. Now it should follow all the branches of if-else tree that 
is inside the state-machine "node" to get to the next symbol.

To show how simpler the things would be I changed the state machine processing 
in lquery_in form if to switch/case, and changed the code for LQPRS_WAITLEVEL 
state processing, removing duplicate code, using "break" wherever it is 
possible

(The indention in this example is unproper to make diff more clear)

so from that much of code
=
if (state == LQPRS_WAITLEVEL)
{
if (t_isspace(ptr))
{
ptr += charlen;
pos++;
continue;
}

escaped_count = 0;
real_levels++;
if (charlen == 1)
{
if (t_iseq(ptr, '!'))
{
GETVAR(curqlevel) = lptr = (nodeitem *) 
palloc0(sizeof(nodeitem) * numOR);
lptr->start = ptr + 1;
state = LQPRS_WAITDELIM;
curqlevel->numvar = 1;
curqlevel->flag |= LQL_NOT;
hasnot = true;
}
else if (t_iseq(ptr, '*'))
state = LQPRS_WAITOPEN;
else if (t_iseq(ptr, '\\'))
{
GETVAR(curqlevel) = lptr = (nodeitem *) 
palloc0(sizeof(nodeitem) * numOR);
lptr->start = ptr;
curqlevel->numvar = 1;
state = LQPRS_WAITESCAPED;
}
else if (strchr(".|@%{}", *ptr))
{
UNCHAR;
}
else
{
GETVAR(curqlevel) = lptr = (nodeitem *) 
palloc0(sizeof(nodeitem) * numOR);
lptr->start = ptr;
state = LQPRS_WAITDELIM;
curqlevel->numvar = 1;
if (t_iseq(ptr, '"'))
{
lptr->flag |= LVAR_QUOTEDPART;
}
}
}
else
{
GETVAR(curqlevel) = lptr = (nodeitem *) 
palloc0(sizeof(nodeitem) * 
numOR);
lptr->start = ptr;
state = LQPRS_WAITDELIM;
curqlevel->numvar = 1;
}
}
=
I came to this
=
 case LQPRS_WAITLEVEL:
if (t_isspace(ptr))
break; /* Just go to next symbol */
escaped_count = 0;
real_levels++;

if (charlen == 1)
{
if (strchr(".|@%{}", *ptr))
UNCHAR;
if (t_iseq(ptr, '*'))
{
state = LQPRS_WAITOPEN;
break;
}
}
GETVAR(curqlevel) = lptr = (nodeitem *) 
palloc0(sizeof(nodeitem) * 
numOR);
lptr->start = ptr;
curqlevel->numvar = 1;
state = LQPRS_WAITDELIM;
if (charlen == 1)
{
if (t_iseq(ptr, '\\'))
{
state = LQPRS_WAITESCAPED;
break;
}

Re: Ltree syntax improvement

2019-04-16 Thread Dmitry Belyavsky

Dear Nikolay,

Many thanks for your efforts!

On Sat, Apr 6, 2019 at 2:29 PM Nikolay Shaplov  wrote:

> В письме от воскресенье, 24 февраля 2019 г. 14:31:55 MSK пользователь
> Dmitry
> Belyavsky написал:
>
> Hi! Am back here again.
>
> I've been thinking about this patch a while... Come to some conclusions
> and
> wrote some examples...
>
> First I came to idea that the best way to simplify the code is change the
> state machine from if to switch/case. Because in your code a lot of
> repetition
> is done just because you can't say "Thats all, let's go to next symbol" in
> any
> place in the code. Now it should follow all the branches of if-else tree
> that
> is inside the state-machine "node" to get to the next symbol.
>
> To show how simpler the things would be I changed the state machine
> processing
> in lquery_in form if to switch/case, and changed the code for
> LQPRS_WAITLEVEL
> state processing, removing duplicate code, using "break" wherever it is
> possible
>
> (The indention in this example is unproper to make diff more clear)
>
> so from that much of code
> =
> if (state == LQPRS_WAITLEVEL)
> {
> if (t_isspace(ptr))
> {
> ptr += charlen;
> pos++;
> continue;
> }
>
> escaped_count = 0;
> real_levels++;
> if (charlen == 1)
> {
> if (t_iseq(ptr, '!'))
> {
> GETVAR(curqlevel) = lptr =
> (nodeitem *)
> palloc0(sizeof(nodeitem) * numOR);
> lptr->start = ptr + 1;
> state = LQPRS_WAITDELIM;
> curqlevel->numvar = 1;
> curqlevel->flag |= LQL_NOT;
> hasnot = true;
> }
> else if (t_iseq(ptr, '*'))
> state = LQPRS_WAITOPEN;
> else if (t_iseq(ptr, '\\'))
> {
> GETVAR(curqlevel) = lptr =
> (nodeitem *)
> palloc0(sizeof(nodeitem) * numOR);
> lptr->start = ptr;
> curqlevel->numvar = 1;
> state = LQPRS_WAITESCAPED;
> }
> else if (strchr(".|@%{}", *ptr))
> {
> UNCHAR;
> }
> else
> {
> GETVAR(curqlevel) = lptr =
> (nodeitem *)
> palloc0(sizeof(nodeitem) * numOR);
> lptr->start = ptr;
> state = LQPRS_WAITDELIM;
> curqlevel->numvar = 1;
> if (t_iseq(ptr, '"'))
> {
> lptr->flag |=
> LVAR_QUOTEDPART;
> }
> }
> }
> else
> {
> GETVAR(curqlevel) = lptr = (nodeitem *)
> palloc0(sizeof(nodeitem) *
> numOR);
> lptr->start = ptr;
> state = LQPRS_WAITDELIM;
> curqlevel->numvar = 1;
> }
> }
> =
> I came to this
> =
>  case LQPRS_WAITLEVEL:
> if (t_isspace(ptr))
> break; /* Just go to next symbol */
> escaped_count = 0;
> real_levels++;
>
> if (charlen == 1)
> {
> if (strchr(".|@%{}", *ptr))
> UNCHAR;
> if (t_iseq(ptr, '*'))
> {
> state = LQPRS_WAITOPEN;
> break;
> }
> }
> GETVAR(curqlevel) = lptr = (nodeitem *)
> palloc0(sizeof(nodeitem) *
> numOR);
> lptr->start = ptr;
> curqlevel->numvar = 1;
> state = LQPRS_WAITDELIM;

Re: Ltree syntax improvement

2019-07-08 Thread Thomas Munro

On Wed, Apr 17, 2019 at 5:29 AM Dmitry Belyavsky  wrote:
> I've applied your patch.
> From my point of view, there is no major difference between case and chain if 
> here.
> Neither case nor ifs allow extracting the common code to separate function - 
> just because there seem to be no identical pieces of code.

Hi Dmitry,

The documentation doesn't build[1], due to invalid XML.  Since I'm
here, here is some proof-reading of the English in the documentation:

   
-   A label is a sequence of alphanumeric characters
-   and underscores (for example, in C locale the characters
-   A-Za-z0-9_ are allowed).  Labels must be less
than 256 bytes
-   long.
+   A label is a sequence of characters. Labels must be
+   less than 256 symbols long. Label may contain any character
supported by Postgres

"fewer than 256 characters in length", and
"PostgreSQL"

+   except \0. If label contains spaces, dots,
lquery modifiers,

"spaces, dots or lquery modifiers,"

+   they may be escaped. Escaping can be done
either by preceeding
+   backslash (\\) symbol, or by wrapping the label
in whole in double
+   quotes ("). Initial and final unescaped
whitespace is stripped.

"Escaping can be done with either a preceding backslash [...] or by
wrapping the whole label in double quotes [...]."

   

+During converting text into internal representations, wrapping
double quotes

"During conversion to internal representation, "

+and escaping backslashes are removed. During converting internal
+representations into text, if the label does not contain any special

"During conversion from internal representation to text, "

+symbols, it is printed as is. Otherwise, it is wrapped in quotes and, if
+there are internal quotes, they are escaped with backslash. The
list of special

"escaped with backslashes."

+  
+Examples: 42, "\\42",
+\\4\\2,  42  and  "42"
+ will have the similar internal representation and, being

"will all have the same internal representation and,"

+converted from internal representation, will become 42.
+Literal abc def will turn into "abc
+def".
   

[1] https://travis-ci.org/postgresql-cfbot/postgresql/builds/71856

--
Thomas Munro
https://enterprisedb.com

Re: Ltree syntax improvement

2019-07-08 Thread Dmitry Belyavsky

Dear Thomas,

Thank you for your proofreading!

Please find the updated patch attached. It also contains the missing
escaping.

On Mon, Jul 8, 2019 at 10:39 AM Thomas Munro  wrote:

> On Wed, Apr 17, 2019 at 5:29 AM Dmitry Belyavsky 
> wrote:
> > I've applied your patch.
> > From my point of view, there is no major difference between case and
> chain if here.
> > Neither case nor ifs allow extracting the common code to separate
> function - just because there seem to be no identical pieces of code.
>
> Hi Dmitry,
>
> The documentation doesn't build[1], due to invalid XML.  Since I'm
> here, here is some proof-reading of the English in the documentation:
>
>
> -   A label is a sequence of alphanumeric characters
> -   and underscores (for example, in C locale the characters
> -   A-Za-z0-9_ are allowed).  Labels must be less
> than 256 bytes
> -   long.
> +   A label is a sequence of characters. Labels
> must be
> +   less than 256 symbols long. Label may contain any character
> supported by Postgres
>
> "fewer than 256 characters in length", and
> "PostgreSQL"
>
> +   except \0. If label contains spaces, dots,
> lquery modifiers,
>
> "spaces, dots or lquery modifiers,"
>
> +   they may be escaped. Escaping can be done
> either by preceeding
> +   backslash (\\) symbol, or by wrapping the label
> in whole in double
> +   quotes ("). Initial and final unescaped
> whitespace is stripped.
>
> "Escaping can be done with either a preceding backslash [...] or by
> wrapping the whole label in double quotes [...]."
>
>
>
> +During converting text into internal representations, wrapping
> double quotes
>
> "During conversion to internal representation, "
>
> +and escaping backslashes are removed. During converting internal
> +representations into text, if the label does not contain any special
>
> "During conversion from internal representation to text, "
>
> +symbols, it is printed as is. Otherwise, it is wrapped in quotes and,
> if
> +there are internal quotes, they are escaped with backslash. The
> list of special
>
> "escaped with backslashes."
>
> +  
> +Examples: 42, "\\42",
> +\\4\\2,  42  and  "42"
> + will have the similar internal representation and, being
>
> "will all have the same internal representation and,"
>
> +converted from internal representation, will become
> 42.
> +Literal abc def will turn into "abc
> +def".
>
>
> [1] https://travis-ci.org/postgresql-cfbot/postgresql/builds/71856
>
> --
> Thomas Munro
> https://enterprisedb.com
>


-- 
SY, Dmitry Belyavsky
diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out
index 8226930905..5f45726229 100644
--- a/contrib/ltree/expected/ltree.out
+++ b/contrib/ltree/expected/ltree.out
@@ -1,4 +1,5 @@
 CREATE EXTENSION ltree;
+SET standard_conforming_strings=on;
 -- Check whether any of our opclasses fail amvalidate
 SELECT amname, opcname
 FROM pg_opclass opc LEFT JOIN pg_am am ON am.oid = opcmethod
@@ -7679,3 +7680,1587 @@ SELECT count(*) FROM _ltreetest WHERE t ? '{23.*.1,23.*.2}' ;
 15
 (1 row)
 
+-- Extended syntax, escaping, quoting etc
+-- success
+SELECT E'\\.'::ltree;
+ ltree 
+---
+ "."
+(1 row)
+
+SELECT E'\\ '::ltree;
+ ltree 
+---
+ " "
+(1 row)
+
+SELECT E''::ltree;
+ ltree 
+---
+ "\"
+(1 row)
+
+SELECT E'\\a'::ltree;
+ ltree 
+---
+ a
+(1 row)
+
+SELECT E'\\n'::ltree;
+ ltree 
+---
+ n
+(1 row)
+
+SELECT E'x'::ltree;
+ ltree 
+---
+ "x\"
+(1 row)
+
+SELECT E'x\\ '::ltree;
+ ltree 
+---
+ "x "
+(1 row)
+
+SELECT E'x\\.'::ltree;
+ ltree 
+---
+ "x."
+(1 row)
+
+SELECT E'x\\a'::ltree;
+ ltree 
+---
+ xa
+(1 row)
+
+SELECT E'x\\n'::ltree;
+ ltree 
+---
+ xn
+(1 row)
+
+SELECT 'a b.с d'::ltree;
+ltree
+-
+ "a b"."с d"
+(1 row)
+
+SELECT ' e . f '::ltree;
+ ltree 
+---
+ e.f
+(1 row)
+
+SELECT ' '::ltree;
+ ltree 
+---
+ 
+(1 row)
+
+SELECT E'\\ g  . h\\ '::ltree;
+   ltree   
+---
+ " g"."h "
+(1 row)
+
+SELECT E'\\ g'::ltree;
+ ltree 
+---
+ " g"
+(1 row)
+
+SELECT E' h\\ '::ltree;
+ ltree 
+---
+ "h "
+(1 row)
+
+SELECT '" g  "." h "'::ltree;
+ltree 
+--
+ " g  "." h "
+(1 row)
+
+SELECT '" g  " '::ltree;
+ ltree  
+
+ " g  "
+(1 row)
+
+SELECT '" g  "   ." h "  '::ltree;
+ltree 
+--
+ " g  "." h "
+(1 row)
+
+SELECT nlevel(E'Bottom\\.Test'::ltree);
+ nlevel 
+
+  1
+(1 row)
+
+SELECT subpath(E'Bottom\\.'::ltree, 0, 1);
+  subpath  
+---
+ "Bottom."
+(1 row)
+
+SELECT subpath(E'a\\.b', 0, 1);
+ subpath 
+-
+ "a.b"
+(1 row)
+
+SELECT subpath(E'a\\..b', 1, 1);
+ subpath 
+-
+ b
+(1 row)
+
+SELECT subpath(E'a\\..\\b', 1, 1);
+ subpath 
+-
+ b
+(1 row)
+
+SELECT subpath(E'a b.с d'::ltree, 1, 1);
+ subpath 
+-
+ "с d"
+(1 row)
+
+SELECT(
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789

Re: Ltree syntax improvement

2019-07-08 Thread Alvaro Herrera

On 2019-Jul-08, Dmitry Belyavsky wrote:

> Dear Thomas,
> 
> Thank you for your proofreading!
> 
> Please find the updated patch attached. It also contains the missing
> escaping.

I think all these functions you're adding should have a short sentence
explaining what it does.

I'm not really convinced that we need this much testing.  It seems a bit
excessive.  Many of these very focused test SQL lines could be removed
with no loss of coverage, and many of the others could be grouped into
one.

-- 
Álvaro Herrerahttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

Re: Ltree syntax improvement

2019-07-08 Thread Dmitry Belyavsky

Dear Alvaro,

On Mon, Jul 8, 2019 at 11:16 PM Alvaro Herrera 
wrote:

> On 2019-Jul-08, Dmitry Belyavsky wrote:
>
> > Dear Thomas,
> >
> > Thank you for your proofreading!
> >
> > Please find the updated patch attached. It also contains the missing
> > escaping.
>
> I think all these functions you're adding should have a short sentence
> explaining what it does.
>
> I'm not really convinced that we need this much testing.  It seems a bit
> excessive.  Many of these very focused test SQL lines could be removed
> with no loss of coverage, and many of the others could be grouped into
> one.
>

I did not introduce any functions. I've just changed the parser.
I'm not sure that it makes sense to remove any tests as most of them were
written to catch really happened bugs during the implementation.

-- 
SY, Dmitry Belyavsky

Re: Ltree syntax improvement

2019-07-08 Thread Alvaro Herrera

On 2019-Jul-08, Dmitry Belyavsky wrote:

> I did not introduce any functions. I've just changed the parser.

I mean the C-level functions -- count_parts_ors() and so on.

> I'm not sure that it makes sense to remove any tests as most of them were
> written to catch really happened bugs during the implementation.

Well, I don't mean to decrease the coverage, only to condense a lot of
little tests in a small powerful test.

-- 
Álvaro Herrerahttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

Re: Ltree syntax improvement

2019-07-09 Thread Dmitry Belyavsky

On Mon, Jul 8, 2019 at 11:33 PM Alvaro Herrera 
wrote:

> On 2019-Jul-08, Dmitry Belyavsky wrote:
>
> > I did not introduce any functions. I've just changed the parser.
>
> I mean the C-level functions -- count_parts_ors() and so on.
>
> Added a comment to  count_parts_ors()

The other functions introduced by me were already described.

-- 
SY, Dmitry Belyavsky
diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out
index 8226930905..5f45726229 100644
--- a/contrib/ltree/expected/ltree.out
+++ b/contrib/ltree/expected/ltree.out
@@ -1,4 +1,5 @@
 CREATE EXTENSION ltree;
+SET standard_conforming_strings=on;
 -- Check whether any of our opclasses fail amvalidate
 SELECT amname, opcname
 FROM pg_opclass opc LEFT JOIN pg_am am ON am.oid = opcmethod
@@ -7679,3 +7680,1587 @@ SELECT count(*) FROM _ltreetest WHERE t ? '{23.*.1,23.*.2}' ;
 15
 (1 row)
 
+-- Extended syntax, escaping, quoting etc
+-- success
+SELECT E'\\.'::ltree;
+ ltree 
+---
+ "."
+(1 row)
+
+SELECT E'\\ '::ltree;
+ ltree 
+---
+ " "
+(1 row)
+
+SELECT E''::ltree;
+ ltree 
+---
+ "\"
+(1 row)
+
+SELECT E'\\a'::ltree;
+ ltree 
+---
+ a
+(1 row)
+
+SELECT E'\\n'::ltree;
+ ltree 
+---
+ n
+(1 row)
+
+SELECT E'x'::ltree;
+ ltree 
+---
+ "x\"
+(1 row)
+
+SELECT E'x\\ '::ltree;
+ ltree 
+---
+ "x "
+(1 row)
+
+SELECT E'x\\.'::ltree;
+ ltree 
+---
+ "x."
+(1 row)
+
+SELECT E'x\\a'::ltree;
+ ltree 
+---
+ xa
+(1 row)
+
+SELECT E'x\\n'::ltree;
+ ltree 
+---
+ xn
+(1 row)
+
+SELECT 'a b.с d'::ltree;
+ltree
+-
+ "a b"."с d"
+(1 row)
+
+SELECT ' e . f '::ltree;
+ ltree 
+---
+ e.f
+(1 row)
+
+SELECT ' '::ltree;
+ ltree 
+---
+ 
+(1 row)
+
+SELECT E'\\ g  . h\\ '::ltree;
+   ltree   
+---
+ " g"."h "
+(1 row)
+
+SELECT E'\\ g'::ltree;
+ ltree 
+---
+ " g"
+(1 row)
+
+SELECT E' h\\ '::ltree;
+ ltree 
+---
+ "h "
+(1 row)
+
+SELECT '" g  "." h "'::ltree;
+ltree 
+--
+ " g  "." h "
+(1 row)
+
+SELECT '" g  " '::ltree;
+ ltree  
+
+ " g  "
+(1 row)
+
+SELECT '" g  "   ." h "  '::ltree;
+ltree 
+--
+ " g  "." h "
+(1 row)
+
+SELECT nlevel(E'Bottom\\.Test'::ltree);
+ nlevel 
+
+  1
+(1 row)
+
+SELECT subpath(E'Bottom\\.'::ltree, 0, 1);
+  subpath  
+---
+ "Bottom."
+(1 row)
+
+SELECT subpath(E'a\\.b', 0, 1);
+ subpath 
+-
+ "a.b"
+(1 row)
+
+SELECT subpath(E'a\\..b', 1, 1);
+ subpath 
+-
+ b
+(1 row)
+
+SELECT subpath(E'a\\..\\b', 1, 1);
+ subpath 
+-
+ b
+(1 row)
+
+SELECT subpath(E'a b.с d'::ltree, 1, 1);
+ subpath 
+-
+ "с d"
+(1 row)
+
+SELECT(
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'\z\z\z\z\z')::ltree;
+  ltree  
+-
+ 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789z
+(1 row)
+
+SELECT('   ' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'\a\b\c\d\e   ')::ltree;
+  ltree  
+-
+ 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789abcde
+(1 row)
+
+SELECT 'abc\|d'::lquery;
+ lquery  
+-
+ "abc|d"
+(1 row)
+
+SELECT 'abc\|d'::ltree ~ 'abc\|d'::lquery;
+ ?column? 
+--

Re: Ltree syntax improvement

2019-07-11 Thread Thomas Munro

On Wed, Jul 10, 2019 at 7:40 AM Dmitry Belyavsky  wrote:
> [ltree_20190709.diff]

Hi Dmitry,

You need to update contrib/ltree_plpython/expected/ltree_plpython.out,
otherwise check-world fails when built with Python support.  The good
news is that it looks like it fails because you fixed something!
(Though I didn't check the details).

 CREATE FUNCTION test2() RETURNS ltree
 LANGUAGE plpythonu
 TRANSFORM FOR TYPE ltree
 AS $$
 return ['foo', 'bar', 'baz']
 $$;
 -- plpython to ltree is not yet implemented, so this will fail,
 -- because it will try to parse the Python list as an ltree input
 -- string.
 SELECT test2();
-ERROR:  syntax error at position 0
-CONTEXT:  while creating return value
-PL/Python function "test2"
+  test2
+-
+ "['foo', 'bar', 'baz']"
+(1 row)
+

-- 
Thomas Munro
https://enterprisedb.com

Re: Ltree syntax improvement

2019-07-11 Thread Dmitry Belyavsky

Dear Thomas,

On Thu, Jul 11, 2019 at 11:20 AM Thomas Munro 
wrote:

> On Wed, Jul 10, 2019 at 7:40 AM Dmitry Belyavsky 
> wrote:
> > [ltree_20190709.diff]
>
> Hi Dmitry,
>
> You need to update contrib/ltree_plpython/expected/ltree_plpython.out,
> otherwise check-world fails when built with Python support.  The good
> news is that it looks like it fails because you fixed something!
> (Though I didn't check the details).
>
>  CREATE FUNCTION test2() RETURNS ltree
>  LANGUAGE plpythonu
>  TRANSFORM FOR TYPE ltree
>  AS $$
>  return ['foo', 'bar', 'baz']
>  $$;
>  -- plpython to ltree is not yet implemented, so this will fail,
>  -- because it will try to parse the Python list as an ltree input
>  -- string.
>  SELECT test2();
> -ERROR:  syntax error at position 0
> -CONTEXT:  while creating return value
> -PL/Python function "test2"
> +  test2
> +-
> + "['foo', 'bar', 'baz']"
> +(1 row)
> +
>
>
See attached. I'm not familiar enough with python so I just removed the
failing tests.
If the main patch is accepted, the ltree_python extension should be
redesigned, I think...

-- 
SY, Dmitry Belyavsky
diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out
index 8226930905..5f45726229 100644
--- a/contrib/ltree/expected/ltree.out
+++ b/contrib/ltree/expected/ltree.out
@@ -1,4 +1,5 @@
 CREATE EXTENSION ltree;
+SET standard_conforming_strings=on;
 -- Check whether any of our opclasses fail amvalidate
 SELECT amname, opcname
 FROM pg_opclass opc LEFT JOIN pg_am am ON am.oid = opcmethod
@@ -7679,3 +7680,1587 @@ SELECT count(*) FROM _ltreetest WHERE t ? '{23.*.1,23.*.2}' ;
 15
 (1 row)
 
+-- Extended syntax, escaping, quoting etc
+-- success
+SELECT E'\\.'::ltree;
+ ltree 
+---
+ "."
+(1 row)
+
+SELECT E'\\ '::ltree;
+ ltree 
+---
+ " "
+(1 row)
+
+SELECT E''::ltree;
+ ltree 
+---
+ "\"
+(1 row)
+
+SELECT E'\\a'::ltree;
+ ltree 
+---
+ a
+(1 row)
+
+SELECT E'\\n'::ltree;
+ ltree 
+---
+ n
+(1 row)
+
+SELECT E'x'::ltree;
+ ltree 
+---
+ "x\"
+(1 row)
+
+SELECT E'x\\ '::ltree;
+ ltree 
+---
+ "x "
+(1 row)
+
+SELECT E'x\\.'::ltree;
+ ltree 
+---
+ "x."
+(1 row)
+
+SELECT E'x\\a'::ltree;
+ ltree 
+---
+ xa
+(1 row)
+
+SELECT E'x\\n'::ltree;
+ ltree 
+---
+ xn
+(1 row)
+
+SELECT 'a b.с d'::ltree;
+ltree
+-
+ "a b"."с d"
+(1 row)
+
+SELECT ' e . f '::ltree;
+ ltree 
+---
+ e.f
+(1 row)
+
+SELECT ' '::ltree;
+ ltree 
+---
+ 
+(1 row)
+
+SELECT E'\\ g  . h\\ '::ltree;
+   ltree   
+---
+ " g"."h "
+(1 row)
+
+SELECT E'\\ g'::ltree;
+ ltree 
+---
+ " g"
+(1 row)
+
+SELECT E' h\\ '::ltree;
+ ltree 
+---
+ "h "
+(1 row)
+
+SELECT '" g  "." h "'::ltree;
+ltree 
+--
+ " g  "." h "
+(1 row)
+
+SELECT '" g  " '::ltree;
+ ltree  
+
+ " g  "
+(1 row)
+
+SELECT '" g  "   ." h "  '::ltree;
+ltree 
+--
+ " g  "." h "
+(1 row)
+
+SELECT nlevel(E'Bottom\\.Test'::ltree);
+ nlevel 
+
+  1
+(1 row)
+
+SELECT subpath(E'Bottom\\.'::ltree, 0, 1);
+  subpath  
+---
+ "Bottom."
+(1 row)
+
+SELECT subpath(E'a\\.b', 0, 1);
+ subpath 
+-
+ "a.b"
+(1 row)
+
+SELECT subpath(E'a\\..b', 1, 1);
+ subpath 
+-
+ b
+(1 row)
+
+SELECT subpath(E'a\\..\\b', 1, 1);
+ subpath 
+-
+ b
+(1 row)
+
+SELECT subpath(E'a b.с d'::ltree, 1, 1);
+ subpath 
+-
+ "с d"
+(1 row)
+
+SELECT(
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'\z\z\z\z\z')::ltree;
+  ltree  
+-
+ 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789z
+(1 row)
+
+SELECT('   ' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'01234567890123456789012345678901234567890123456789' ||
+'\a\b\c\d\e   ')::ltree;
+  ltree

Re: Ltree syntax improvement

2019-07-17 Thread Nikita Glukhov


Hi!

I have looked at the patch and found some problems.


1. I fixed some bugs (fixed patch with additional test cases is attached):

-- NULL 'lptr' pointer dereference at lquery_in()
=# SELECT '*'::lquery;
-- crash

-- '|' after '*{n}' is wrongly handled (LQPRS_WAITEND state)
=# SELECT '*{1}|2'::lquery;
WARNING:  problem in alloc set MessageContext: req size > alloc size for chunk 
0x1c2d508 in block 0x1c2bc58
WARNING:  problem in alloc set MessageContext: req size > alloc size for chunk 
0x1c2d508 in block 0x1c2bc58
   lquery
-
 *{1}.*{,48}
(1 row)

-- wrong handling of trailing whitespace
=# SELECT '"a" '::ltree;
ERROR:  name of level is empty
LINE 1: SELECT '"a" '::ltree;
   ^
DETAIL:  Name length is 0 in position 4.


=# SELECT '"a" '::lquery;
ERROR:  name of level is empty
LINE 1: SELECT '"a" '::lquery;
   ^
DETAIL:  Name length is 0 in position 4.


-- backslashes are not escaped in ltree_out()/lquery_out(),
-- which is not consistent with ltree_in()/lquery_in()
=# SELECT '"\\"'::ltree;
 ltree
---
 "\"
(1 row)

=# SELECT '"\\"'::lquery;
 lquery

 "\"
(1 row)

=# SELECT '"\\"'::ltree::text::ltree;
ERROR:  syntax error
DETAIL:  Unexpected end of line.

=# SELECT '"\\"'::lquery::text::lquery;
ERROR:  syntax error
DETAIL:  Unexpected end of line.



2. There are inconsistencies in whitespace handling before and after *, @, %, {}
(I have not fixed this because I'm not sure how it is supposed to work):

-- whitespace before '*' is not ignored
=# SELECT '"a" *'::lquery;
 lquery

 "a\""*
(1 row)

=# SELECT 'a *'::lquery;
 lquery

 "a "*
(1 row)

-- whitespace after '*' and '{}' is disallowed
=# SELECT 'a* .b'::lquery;
ERROR:  syntax error at position 2
LINE 1: SELECT 'a* .b'::lquery;
   ^

=# SELECT 'a* |b'::lquery;
ERROR:  syntax error at position 2
LINE 1: SELECT 'a* |b'::lquery;
   ^

=# SELECT '*{1} .a'::lquery;
ERROR:  syntax error at position 4
LINE 1: SELECT '*{1} .a'::lquery;
   ^

-- but the whitespace after levels without '*@%{}' is allowed
=# SELECT 'a |b'::lquery;
 lquery

 a|b
(1 row)



3. Empty level names between '!' and '|' are allowed.  This behavior can be
seen on master, so it seems that we cannot fix it now:

-- master
=# SELECT '!|a'::lquery;
 lquery

 !|a
(1 row)

-- patched
=# SELECT '!|a'::lquery;
 lquery

 !""|a
(1 row)

-- empty level names in other places are disallowed
=# SELECT '!a|'::lquery;
ERROR:  syntax error
LINE 1: SELECT '!a|'::lquery;
   ^
DETAIL:  Unexpected end of line.

=# SELECT '|a'::lquery;
ERROR:  syntax error at position 0
LINE 1: SELECT '|a'::lquery;
   ^


4. It looks strange to me that leading and trailing unquoted whitespace is
ignored, but the internal whitespace is treated like a quoted:

=# SELECT ' a b   .  c d '::lquery;
 lquery
-
 "a b"."c d"
(1 row)

I would prefer unquoted unescaped whitespace to be a delimiter always.


5. It seems wrong to me that ltree and lquery have different special character
sets now.  This leads to the fact that arbitrary ltree text cannot be used
directly as lquery text, as it seemed to be before the syntax improvements:

=# SELECT 'a|b'::ltree::text::lquery;
 lquery

 a|b
(1 row)

=# SELECT '"a|b"'::ltree::text::lquery;
 lquery

 a|b
(1 row)

=# SELECT '"a|b"'::lquery;
 lquery

 "a|b"
(1 row)

There might not be a problem if we had ltree::lquery cast.

Also I think that text[]::ltree/ltree::text[] casts for ltree
construction/deconstruction from text level names can be very useful.


6. ltree and escpecially lquery parsing code still look too complicated for me,
and I believe that the bugs described above are a direct consequence of this.
So the code needs to be refactored, maybe even without using of state machines.



On 11.07.2019 20:49, Dmitry Belyavsky wrote:


On Thu, Jul 11, 2019 at 11:20 AM Thomas Munro > wrote:


On Wed, Jul 10, 2019 at 7:40 AM Dmitry Belyavsky
mailto:beld...@gmail.com>> wrote:
> [ltree_20190709.diff]

You need to update contrib/ltree_plpython/expected/ltree_plpython.out,
otherwise check-world fails when built with Python support. The good
news is that it looks like it fails because you fixed something!
(Though I didn't check the details).

 CREATE FUNCTION test2() RETURNS ltree
 LANGUAGE plpythonu
 TRANSFORM FOR TYPE ltree
 AS $$
 return ['foo', 'bar', 'baz']
 $$;
 -- plpython to ltree is not yet implemented, so this will fail,
 -- because it will try to parse the Python list as an ltree input
 -- string.
 SELECT test2();
-ERROR:  syntax error at position 0
-CONTEXT:  while creating return value
-PL/Python function "test2"
+          test2
+-
+ "['foo', 'bar', 'baz']"
+(1 row)
+


See attached. I'm not familiar enough with python so I just rem

Re: Ltree syntax improvement

2019-08-01 Thread Thomas Munro

On Thu, Jul 18, 2019 at 1:28 AM Nikita Glukhov  wrote:
> 1. I fixed some bugs (fixed patch with additional test cases is attached):

Hi Nikita,

Thanks.  I have set this to "Needs review", in the September 'fest.

-- 
Thomas Munro
https://enterprisedb.com

Re: Ltree syntax improvement

2020-07-01 Thread Daniel Gustafsson

> On 4 Apr 2020, at 01:26, Nikita Glukhov  wrote:

> Fixed patch attached.

This patch cause a regression in the ltree_plpython module, it needs the below
trivial change to make it pass again:

--- a/contrib/ltree_plpython/expected/ltree_plpython.out
+++ b/contrib/ltree_plpython/expected/ltree_plpython.out
@@ -39,5 +39,6 @@ $$;
 -- string.
 SELECT test2();
 ERROR:  ltree syntax error at character 1
+DETAIL:  Unexpected character
 CONTEXT:  while creating return value
 PL/Python function "test2"

Please submit a rebased version of the patch.

cheers ./daniel

Re: Ltree syntax improvement

2020-01-06 Thread Tomas Vondra


Hi,

This patch got mostly ignored since 2019-07 commitfest :-( The latest
patch (sent by Nikita) does not apply because of a minor conflict in
contrib/ltree/ltxtquery_io.c.

I see the patch removes a small bit of ltree_plpython tests which would
otherwise fail (with the "I don't know plpython" justification). Why not
to instead update the tests to accept the new output? Or is it really
the case that the case that we no longer need those tests?

The patch also reworks some parts from "if" to "switch" statements. I
agree switch statements are more readable, but maybe we should do this
in two steps - first adopting the "switch" without changing the logic,
and then making changes. But maybe that's an overkill.


regards

--
Tomas Vondra  http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

Re: Ltree syntax improvement

2020-01-21 Thread Dmitry Belyavsky

Dear Tomas,

If the C part will be reviewed and considered mergeable, I'll update the
plpython tests.

On Mon, Jan 6, 2020 at 4:49 PM Tomas Vondra 
wrote:

> Hi,
>
> This patch got mostly ignored since 2019-07 commitfest :-( The latest
> patch (sent by Nikita) does not apply because of a minor conflict in
> contrib/ltree/ltxtquery_io.c.
>
> I see the patch removes a small bit of ltree_plpython tests which would
> otherwise fail (with the "I don't know plpython" justification). Why not
> to instead update the tests to accept the new output? Or is it really
> the case that the case that we no longer need those tests?
>
> The patch also reworks some parts from "if" to "switch" statements. I
> agree switch statements are more readable, but maybe we should do this
> in two steps - first adopting the "switch" without changing the logic,
> and then making changes. But maybe that's an overkill.
>
>
> regards
>
> --
> Tomas Vondra  http://www.2ndQuadrant.com
> PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
>


-- 
SY, Dmitry Belyavsky

Re: Ltree syntax improvement

2020-01-21 Thread Tom Lane

Dmitry Belyavsky  writes:
> If the C part will be reviewed and considered mergeable, I'll update the
> plpython tests.

I haven't looked at any of the code involved in this, but I did examine
the "failing" plpython test, and I'm quite distressed about that change
of behavior.  Simply removing the test case is certainly not okay,
and I do not think that just changing it to accept this new behavior
is okay either.  As Nikita said upthread:

>> 7. ltree_plpython test does not fail now because Python list is converted to 
>> a
>> text and then to a ltree, and the textual representation of a Python list has
>> become a valid ltree text:
>> 
>> SELECT $$['foo', 'bar', 'baz']$$::ltree;
>>ltree
>> -
>>   "['foo', 'bar', 'baz']"
>> (1 row)
>> 
>> So Python lists can be now successfully converted to ltrees without a 
>> transform,
>> but the result is not that is expected ('foo.bar.baz'::ltree).

If this case doesn't throw an error, then we're going to have a
compatibility problem whenever somebody finally gets around to
implementing the python-to-ltree transform properly, because it
would break any code that might be relying on this (wrong) behavior.

In general, I think it's a mistake to allow unquoted punctuation to be
taken as part of an ltree label, which is what this patch is evidently
doing.  By doing that, you'll make it impossible for anyone to ever
again extend the ltree syntax, because if they want to assign special
meaning to braces or whatever, they can't do so without breaking
existing applications.  For example, if the existing code allowed
double-quote or backslash as a label character, we'd already have
rejected this patch as being too big a compatibility break.  So it's
not very forward-thinking to close off future improvements like this.

Thus, what I think you should do is require non-alphanumeric label
characters to be quoted, either via double-quotes or backslashes
(although it's questionable whether we really need two independent
quoting mechanisms here).  That would preserve extensibility, and
it'd also preserve our freedom to fix ltree_plpython later, since
the case of interest would still be an error for now.  And it would
mean that you don't have subtly different rules for what's data in
ltree versus what's data in lquery or ltxtquery.

BTW, the general rule in existing backend code that's doing string
parsing is to allow non-ASCII (high-bit-set) characters to be taken as
data without inquiring too closely as to what they are.  This avoids a
bunch of locale and encoding issues without much loss of flexibility.
(If we do ever extend the ltree syntax again, we'd certainly choose
ASCII punctuation characters for whatever special symbols we need,
else the feature might not be available in all encodings.)  So for
instance in your examples involving "Ñ", it's fine to take that as a
label character without concern for locale/encoding.

I'm not sure what I think about the whitespace business.  It looks
like what you propose is to strip unquoted leading and trailing
whitespace but allow embedded whitespace.  There's precedent for that,
certainly, but I wonder whether it isn't too confusing.  In any case
you didn't document that.

regards, tom lane

Re: Ltree syntax improvement

2020-09-30 Thread Michael Paquier

On Wed, Jul 01, 2020 at 03:23:30PM +0200, Daniel Gustafsson wrote:
> Please submit a rebased version of the patch.

Which has not happened after two months, so I have marked the patch as
returned with feedback.
--
Michael


signature.asc
Description: PGP signature

Re: Ltree syntax improvement

2020-03-24 Thread Tom Lane

Nikita Glukhov  writes:
> Attached new version of the patch.

I spent a little bit of time looking through this, and have a few
comments:

* You have a lot of places where tests for particular ASCII characters
are done like this:

if ((charlen == 1) && t_iseq(src, '\\'))

This is a tedious and expensive way to spell

if (*src == '\\')

because charlen is necessarily 1 if you are looking at an ASCII character;
there is no allowed backend encoding in which an ASCII character can be
the first byte of a multibyte character.  Aside from the direct
simplifications of the tests that this makes possible, I see some places
where you'd not have to pass charlen around, either.

* I spent a fair amount of time thinking that a lot of the added code
was wrong because it was only considering escaping and not
double-quoting.  I eventually concluded that the idea is to convert
double-quoting to a pure escape-based representation during input
and store it that way.  However, I don't really see why that is either
necessary or a good idea --- the internal storage already has a length
counter for each label.  So I think what you really ought to be doing
here is simplifying out both quotes and escapes during ltree_in
and just storing the notionally-represented string internally.
(If I've misunderstood what the plan is, well the utter lack of
documentation in the patch isn't helping.)

* The added test cases seem a bit excessive and repetitive.

regards, tom lane

Re: Ltree syntax improvement

2020-03-26 Thread Nikita Glukhov


On 25.03.2020 2:08, Tom Lane wrote:


Nikita Glukhov  writes:

Attached new version of the patch.

I spent a little bit of time looking through this, and have a few
comments:

* You have a lot of places where tests for particular ASCII characters
are done like this:

if ((charlen == 1) && t_iseq(src, '\\'))

This is a tedious and expensive way to spell

if (*src == '\\')

because charlen is necessarily 1 if you are looking at an ASCII character;
there is no allowed backend encoding in which an ASCII character can be
the first byte of a multibyte character.  Aside from the direct
simplifications of the tests that this makes possible, I see some places
where you'd not have to pass charlen around, either.


All unnecessary checks of charlen were removed, but t_iseq() were left for
consistency.


* I spent a fair amount of time thinking that a lot of the added code
was wrong because it was only considering escaping and not
double-quoting.  I eventually concluded that the idea is to convert
double-quoting to a pure escape-based representation during input
and store it that way.  However, I don't really see why that is either
necessary or a good idea --- the internal storage already has a length
counter for each label.  So I think what you really ought to be doing
here is simplifying out both quotes and escapes during ltree_in
and just storing the notionally-represented string internally.
(If I've misunderstood what the plan is, well the utter lack of
documentation in the patch isn't helping.)


ltree_in() removes quotes and escapes before storing strings (see
copy_unescaped()), just as you suggest.

ltree_out() adds escapes and quotes if necessary (see copy_escaped(),
extra_bytes_for_escaping()).

I have refactored code a bit, removed duplicated code, fixed several
bugs in reallocation of output strings, and added some comments.



* The added test cases seem a bit excessive and repetitive.


I have removed some tests that have become redundant after changes in
parsing.


--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
>From 8ec6f93203684d63bf3c5d006c2499a71a1a9dad Mon Sep 17 00:00:00 2001
From: Nikita Glukhov 
Date: Thu, 5 Mar 2020 17:34:59 +0300
Subject: [PATCH 1/2] Replace 'if' with 'switch' in ltree code

---
 contrib/ltree/ltree_io.c | 402 ---
 1 file changed, 204 insertions(+), 198 deletions(-)

diff --git a/contrib/ltree/ltree_io.c b/contrib/ltree/ltree_io.c
index 900a46a..e97f035 100644
--- a/contrib/ltree/ltree_io.c
+++ b/contrib/ltree/ltree_io.c
@@ -69,40 +69,43 @@ ltree_in(PG_FUNCTION_ARGS)
 	{
 		charlen = pg_mblen(ptr);
 
-		if (state == LTPRS_WAITNAME)
+		switch (state)
 		{
-			if (ISALNUM(ptr))
-			{
-lptr->start = ptr;
-lptr->wlen = 0;
-state = LTPRS_WAITDELIM;
-			}
-			else
-UNCHAR;
-		}
-		else if (state == LTPRS_WAITDELIM)
-		{
-			if (charlen == 1 && t_iseq(ptr, '.'))
-			{
-lptr->len = ptr - lptr->start;
-if (lptr->wlen > 255)
-	ereport(ERROR,
-			(errcode(ERRCODE_NAME_TOO_LONG),
-			 errmsg("name of level is too long"),
-			 errdetail("Name length is %d, must "
-	   "be < 256, in position %d.",
-	   lptr->wlen, pos)));
-
-totallen += MAXALIGN(lptr->len + LEVEL_HDRSIZE);
-lptr++;
-state = LTPRS_WAITNAME;
-			}
-			else if (!ISALNUM(ptr))
-UNCHAR;
+			case LTPRS_WAITNAME:
+if (ISALNUM(ptr))
+{
+	lptr->start = ptr;
+	lptr->wlen = 0;
+	state = LTPRS_WAITDELIM;
+}
+else
+	UNCHAR;
+break;
+
+			case LTPRS_WAITDELIM:
+if (charlen == 1 && t_iseq(ptr, '.'))
+{
+	lptr->len = ptr - lptr->start;
+	if (lptr->wlen > 255)
+		ereport(ERROR,
+(errcode(ERRCODE_NAME_TOO_LONG),
+ errmsg("name of level is too long"),
+ errdetail("Name length is %d, must "
+		   "be < 256, in position %d.",
+		   lptr->wlen, pos)));
+
+	totallen += MAXALIGN(lptr->len + LEVEL_HDRSIZE);
+	lptr++;
+	state = LTPRS_WAITNAME;
+}
+else if (!ISALNUM(ptr))
+	UNCHAR;
+break;
+
+			default:
+/* internal error */
+elog(ERROR, "internal error in parser");
 		}
-		else
-			/* internal error */
-			elog(ERROR, "internal error in parser");
 
 		ptr += charlen;
 		lptr->wlen++;
@@ -238,178 +241,181 @@ lquery_in(PG_FUNCTION_ARGS)
 	{
 		charlen = pg_mblen(ptr);
 
-		if (state == LQPRS_WAITLEVEL)
-		{
-			if (ISALNUM(ptr))
-			{
-GETVAR(curqlevel) = lptr = (nodeitem *) palloc0(sizeof(nodeitem) * (numOR + 1));
-lptr->start = ptr;
-state = LQPRS_WAITDELIM;
-curqlevel->numvar = 1;
-			}
-			else if (charlen == 1 && t_iseq(ptr, '!'))
-			{
-GETVAR(curqlevel) = lptr = (nodeitem *) palloc0(sizeof(nodeitem) * (numOR + 1));
-lptr->start = ptr + 1;
-state = LQPRS_WAITDELIM;
-curqlevel->numvar = 1;
-curqlevel->flag |= LQL_NOT;
-hasnot = true;
-			}
-			else if (charlen == 1 && t_iseq(ptr, '*'))
-state =

Re: Ltree syntax improvement

2020-04-01 Thread Tom Lane

Nikita Glukhov  writes:
> [ latest version of ltree syntax extension ]

This is going to need another rebase after all the other ltree hacking
that just got done.  However, I did include 0001 (use a switch) in
the commit I just pushed, so you don't need to worry about that.

regards, tom lane

Re: Ltree syntax improvement

2020-04-02 Thread Nikita Glukhov


On 02.04.2020 2:46, Tom Lane wrote:


Nikita Glukhov  writes:

[ latest version of ltree syntax extension ]

This is going to need another rebase after all the other ltree hacking
that just got done.  However, I did include 0001 (use a switch) in
the commit I just pushed, so you don't need to worry about that.

regards, tom lane


Rebased patch attached.

I’m not sure whether it's worth to introduce one LTREE_TOK_SPECIAL for
the whole set of special characters, and still check them with t_iseq().

--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company


>From 7db20b22e8bb79351e7d7b52761f7d63cd880973 Mon Sep 17 00:00:00 2001
From: Nikita Glukhov 
Date: Tue, 16 Jul 2019 17:59:32 +0300
Subject: [PATCH] Ltree syntax improvements

---
 contrib/ltree/expected/ltree.out | 1081 +-
 contrib/ltree/ltree.h|   25 +-
 contrib/ltree/ltree_io.c |  667 +--
 contrib/ltree/ltxtquery_io.c |  130 +++--
 contrib/ltree/sql/ltree.sql  |  267 ++
 doc/src/sgml/ltree.sgml  |   38 +-
 6 files changed, 1993 insertions(+), 215 deletions(-)

diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out
index c6d8f3e..0c925b6 100644
--- a/contrib/ltree/expected/ltree.out
+++ b/contrib/ltree/expected/ltree.out
@@ -1,4 +1,5 @@
 CREATE EXTENSION ltree;
+SET standard_conforming_strings=on;
 -- Check whether any of our opclasses fail amvalidate
 SELECT amname, opcname
 FROM pg_opclass opc LEFT JOIN pg_am am ON am.oid = opcmethod
@@ -53,7 +54,7 @@ SELECT repeat('x', 255)::ltree;
 
 SELECT repeat('x', 256)::ltree;
 ERROR:  label string is too long
-DETAIL:  Label length is 256, must be at most 255, at character 257.
+DETAIL:  Label length is 256, must be at most 255, at character 1.
 SELECT ltree2text('1.2.3.34.sdf');
   ltree2text  
 --
@@ -336,6 +337,11 @@ SELECT lca('1.2.2.3','1.2.3.4.5.6','1');
  
 (1 row)
 
+SELECT ''::lquery;
+ERROR:  lquery syntax error
+LINE 1: SELECT ''::lquery;
+   ^
+DETAIL:  Unexpected end of input.
 SELECT '1'::lquery;
  lquery 
 
@@ -480,6 +486,16 @@ SELECT 'foo*@@*'::lquery;
  foo@*
 (1 row)
 
+SELECT '*'::lquery;
+ lquery 
+
+ *
+(1 row)
+
+SELECT '*{1}|2'::lquery;
+ERROR:  lquery syntax error at character 5
+LINE 1: SELECT '*{1}|2'::lquery;
+   ^
 SELECT 'qwerty%@*.tu'::lquery;
 lquery
 --
@@ -516,17 +532,15 @@ SELECT '!.2.3'::lquery;
 ERROR:  lquery syntax error at character 2
 LINE 1: SELECT '!.2.3'::lquery;
^
-DETAIL:  Empty labels are not allowed.
 SELECT '1.!.3'::lquery;
 ERROR:  lquery syntax error at character 4
 LINE 1: SELECT '1.!.3'::lquery;
^
-DETAIL:  Empty labels are not allowed.
 SELECT '1.2.!'::lquery;
-ERROR:  lquery syntax error at character 6
+ERROR:  lquery syntax error
 LINE 1: SELECT '1.2.!'::lquery;
^
-DETAIL:  Empty labels are not allowed.
+DETAIL:  Unexpected end of input.
 SELECT '1.2.3|@.4'::lquery;
 ERROR:  lquery syntax error at character 7
 LINE 1: SELECT '1.2.3|@.4'::lquery;
@@ -539,7 +553,7 @@ SELECT (repeat('x', 255) || '*@@*')::lquery;
 
 SELECT (repeat('x', 256) || '*@@*')::lquery;
 ERROR:  label string is too long
-DETAIL:  Label length is 256, must be at most 255, at character 257.
+DETAIL:  Label length is 256, must be at most 255, at character 1.
 SELECT ('!' || repeat('x', 255))::lquery;
   lquery  
 --
@@ -548,7 +562,7 @@ SELECT ('!' || repeat('x', 255))::lquery;
 
 SELECT ('!' || repeat('x', 256))::lquery;
 ERROR:  label string is too long
-DETAIL:  Label length is 256, must be at most 255, at character 258.
+DETAIL:  Label length is 256, must be at most 255, at character 2.
 SELECT nlevel('1.2.3.4');
  nlevel 
 
@@ -8084,3 +8098,1056 @@ SELECT count(*) FROM _ltreetest WHERE t ? '{23.*.1,23.*.2}' ;
 15
 (1 row)
 
+-- Extended syntax, escaping, quoting etc
+-- success
+SELECT E'\\.'::ltree;
+ ltree 
+---
+ "."
+(1 row)
+
+SELECT E'\\ '::ltree;
+ ltree 
+---
+ " "
+(1 row)
+
+SELECT E''::ltree;
+ ltree 
+---
+ "\\"
+(1 row)
+
+SELECT E'\\a'::ltree;
+ ltree 
+---
+ a
+(1 row)
+
+SELECT E'\\n'::ltree;
+ ltree 
+---
+ n
+(1 row)
+
+SELECT E'x'::ltree;
+ ltree 
+---
+ "x\\"
+(1 row)
+
+SELECT E'x\\ '::ltree;
+ ltree 
+---
+ "x "
+(1 row)
+
+SELECT E'x\\.'::ltree;
+ ltree 
+---
+ "x."
+(1 row)
+
+SELECT E'x\\a'::ltree;
+ ltree 
+--

Re: Ltree syntax improvement

2020-04-02 Thread Tom Lane

Nikita Glukhov  writes:
> Rebased patch attached.

Thanks for rebasing!  The cfbot's not very happy though:

4842ltxtquery_io.c: In function ‘makepol’:
4843ltxtquery_io.c:188:13: error: ‘escaped’ may be used uninitialized in this 
function [-Werror=maybe-uninitialized]
4844  if (lenval - escaped <= 0)
4845 ^
4846ltxtquery_io.c:230:6: note: ‘escaped’ was declared here
4847  int   escaped;
4848  ^
4849cc1: all warnings being treated as errors

regards, tom lane

Re: Ltree syntax improvement

2020-04-03 Thread Nikita Glukhov


On 02.04.2020 19:16, Tom Lane wrote:


Nikita Glukhov  writes:

Rebased patch attached.

Thanks for rebasing!  The cfbot's not very happy though:

4842ltxtquery_io.c: In function ‘makepol’:
4843ltxtquery_io.c:188:13: error: ‘escaped’ may be used uninitialized in this 
function [-Werror=maybe-uninitialized]
4844  if (lenval - escaped <= 0)
4845 ^
4846ltxtquery_io.c:230:6: note: ‘escaped’ was declared here
4847  int   escaped;
4848  ^
4849cc1: all warnings being treated as errors

regards, tom lane


Fixed patch attached.

--
Nikita Glukhov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
>From c4929c05423f5b29063c739f25dfebfe99681d01 Mon Sep 17 00:00:00 2001
From: Nikita Glukhov 
Date: Tue, 16 Jul 2019 17:59:32 +0300
Subject: [PATCH] Ltree syntax improvements

---
 contrib/ltree/expected/ltree.out | 1081 +-
 contrib/ltree/ltree.h|   25 +-
 contrib/ltree/ltree_io.c |  667 +--
 contrib/ltree/ltxtquery_io.c |  118 +++--
 contrib/ltree/sql/ltree.sql  |  267 ++
 doc/src/sgml/ltree.sgml  |   38 +-
 6 files changed, 1984 insertions(+), 212 deletions(-)

diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out
index c6d8f3e..0c925b6 100644
--- a/contrib/ltree/expected/ltree.out
+++ b/contrib/ltree/expected/ltree.out
@@ -1,4 +1,5 @@
 CREATE EXTENSION ltree;
+SET standard_conforming_strings=on;
 -- Check whether any of our opclasses fail amvalidate
 SELECT amname, opcname
 FROM pg_opclass opc LEFT JOIN pg_am am ON am.oid = opcmethod
@@ -53,7 +54,7 @@ SELECT repeat('x', 255)::ltree;
 
 SELECT repeat('x', 256)::ltree;
 ERROR:  label string is too long
-DETAIL:  Label length is 256, must be at most 255, at character 257.
+DETAIL:  Label length is 256, must be at most 255, at character 1.
 SELECT ltree2text('1.2.3.34.sdf');
   ltree2text  
 --
@@ -336,6 +337,11 @@ SELECT lca('1.2.2.3','1.2.3.4.5.6','1');
  
 (1 row)
 
+SELECT ''::lquery;
+ERROR:  lquery syntax error
+LINE 1: SELECT ''::lquery;
+   ^
+DETAIL:  Unexpected end of input.
 SELECT '1'::lquery;
  lquery 
 
@@ -480,6 +486,16 @@ SELECT 'foo*@@*'::lquery;
  foo@*
 (1 row)
 
+SELECT '*'::lquery;
+ lquery 
+
+ *
+(1 row)
+
+SELECT '*{1}|2'::lquery;
+ERROR:  lquery syntax error at character 5
+LINE 1: SELECT '*{1}|2'::lquery;
+   ^
 SELECT 'qwerty%@*.tu'::lquery;
 lquery
 --
@@ -516,17 +532,15 @@ SELECT '!.2.3'::lquery;
 ERROR:  lquery syntax error at character 2
 LINE 1: SELECT '!.2.3'::lquery;
^
-DETAIL:  Empty labels are not allowed.
 SELECT '1.!.3'::lquery;
 ERROR:  lquery syntax error at character 4
 LINE 1: SELECT '1.!.3'::lquery;
^
-DETAIL:  Empty labels are not allowed.
 SELECT '1.2.!'::lquery;
-ERROR:  lquery syntax error at character 6
+ERROR:  lquery syntax error
 LINE 1: SELECT '1.2.!'::lquery;
^
-DETAIL:  Empty labels are not allowed.
+DETAIL:  Unexpected end of input.
 SELECT '1.2.3|@.4'::lquery;
 ERROR:  lquery syntax error at character 7
 LINE 1: SELECT '1.2.3|@.4'::lquery;
@@ -539,7 +553,7 @@ SELECT (repeat('x', 255) || '*@@*')::lquery;
 
 SELECT (repeat('x', 256) || '*@@*')::lquery;
 ERROR:  label string is too long
-DETAIL:  Label length is 256, must be at most 255, at character 257.
+DETAIL:  Label length is 256, must be at most 255, at character 1.
 SELECT ('!' || repeat('x', 255))::lquery;
   lquery  
 --
@@ -548,7 +562,7 @@ SELECT ('!' || repeat('x', 255))::lquery;
 
 SELECT ('!' || repeat('x', 256))::lquery;
 ERROR:  label string is too long
-DETAIL:  Label length is 256, must be at most 255, at character 258.
+DETAIL:  Label length is 256, must be at most 255, at character 2.
 SELECT nlevel('1.2.3.4');
  nlevel 
 
@@ -8084,3 +8098,1056 @@ SELECT count(*) FROM _ltreetest WHERE t ? '{23.*.1,23.*.2}' ;
 15
 (1 row)
 
+-- Extended syntax, escaping, quoting etc
+-- success
+SELECT E'\\.'::ltree;
+ ltree 
+---
+ "."
+(1 row)
+
+SELECT E'\\ '::ltree;
+ ltree 
+---
+ " "
+(1 row)
+
+SELECT E''::ltree;
+ ltree 
+---
+ "\\"
+(1 row)
+
+SELECT E'\\a'::ltree;
+ ltree 
+---
+ a
+(1 row)
+
+SELECT E'\\n'::ltree;
+ ltree 
+---
+ n
+(1 row)
+
+SELECT E'x'::ltree;
+ ltree 
+---
+ "x\\"
+(1 row)
+
+SELECT E'x\\ '::ltree;
+ ltree 
+---
+ "x "
+(1 row)
+
+SELECT E'x\\.'::ltree;
+ ltree 
+---
+ "x."
+(1 row)

Re: Ltree syntax improvement

2020-04-04 Thread Tom Lane

Nikita Glukhov  writes:
> Fixed patch attached.

I looked through this again, and I still don't feel very happy with it.

As I mentioned upthread, I'm not convinced that we ought to have
*both* quoting and backslash-escaping in these datatypes.  TIMTOWTDI
may be a virtue to the Perl crowd, but to a lot of other people it's
just confusing and extra complication.  In particular it will translate
directly to extra complication in every application that wants to use
non-alphanumeric labels, since they'll have to be prepared to deparse
whatever style the backend happens to put out.  It's also adding a far
from trivial amount of complication to the patch itself.  And it adds
bug-prone ambiguity; for instance, the patch fails to document whether
backslashes do anything special within quotes.

On balance, since the existing limitations on labels are an approximation
of SQL identifier rules, I think it'd be a good idea to use SQL-identifier
quoting rules; that is, you have to use quotes for anything that isn't
alphanumeric, backslashes are not special, and the way to get a double
quote that's data is to write two adjacent double quotes.  That's simple
and it doesn't create any confusion when writing an ltree value inside
a SQL literal.  It's less great if you're trying to write an ltree within
an array or record literal value, but we can't have everything (and the
backslash alternative would be no better for that anyway).

I've still got mixed emotions about the exception of allowing whitespace
to be stripped before or after a label.  While that probably doesn't pose
any forward-compatibility hazards (ie, it's unlikely we'd want to redefine
such syntax to mean something different), I'm not sure it passes the
least-confusion test.  We were just getting beat up a couple days ago
about the fact that record_in is lax about whitespace [1][2], so maybe
we shouldn't make that same choice here.

As far as the code itself goes, I don't think the code structure in
ltree_io.c is very well chosen.  There seem to be half a dozen routines
that are all intimately dependent on the quoting/escaping rules, and
it's really pretty hard to be sure that they're all in sync (and if
they're not, that's going to lead to crashes and perhaps security-grade
bugs).  Moreover it looks like you end up making multiple passes over the
input, so it's inefficient as well as hard to understand/maintain.  Given
the choice to have a separate frontend function that recognizes a label as
a single token, it seems like it'd be better if that function were also
responsible for generating a de-quoted label value as it went.

I also feel that not enough attention has been paid to the error
reporting.  For instance, the patch changes the longstanding policy of
reporting an overlength label with its end position:

@@ -53,7 +54,7 @@ SELECT repeat('x', 255)::ltree;
 
 SELECT repeat('x', 256)::ltree;
 ERROR:  label string is too long
-DETAIL:  Label length is 256, must be at most 255, at character 257.
+DETAIL:  Label length is 256, must be at most 255, at character 1.
 SELECT ltree2text('1.2.3.34.sdf');
   ltree2text  
 --

That might be an OK choice in a green field, but this isn't a green field,
even if we did change the wording of the message since v12.  I also noted
some random inconsistencies such as

regression=# select '1234567890*1234567890'::ltree;
ERROR:  ltree syntax error at character 11
LINE 1: select '1234567890*1234567890'::ltree;
   ^
regression=# select '1234567890+1234567890'::ltree;
ERROR:  ltree syntax error at character 11
LINE 1: select '1234567890+1234567890'::ltree;
   ^
DETAIL:  Unexpected character

Apparently that's because * is special to lquery while + isn't, but
that distinction really shouldn't matter to ltree.  (This DETAIL
message isn't close to following project style for detail messages,
either.)  It'd probably be better if the frontend function didn't
contain assumptions about which punctuation characters are important.

Another thought here, though it's not really the fault of this patch,
is that I really think ltree ought to go over to a treatment of
non-ASCII characters that's more like the core parser's, ie anything
that isn't ASCII is data (or assumed-to-be-alphanumeric, if you prefer).
The trouble with the current definition is that it's dependent on
LC_CTYPE, so labels that are acceptable to one database might not be
acceptable elsewhere.  We could remove that hazard, and as a bonus
eliminate some possibly-expensive character classification tests,
if we just said all non-ASCII characters are data.

regards, tom lane

[1] 
https://www.postgresql.org/message-id/bce3f9fc-6be9-44d8-ad25-f5ff109c7...@yugabyte.com
[2] 
https://www.postgresql.org/message-id/158593753274.7901.11178770123847486396%40wrigleys.postgresql.org

Re: Re: Ltree syntax improvement

2019-03-07 Thread David Steele


Hi Dmitry,

This patch appears too complex to be submitted to the last CF, as Andres 
has also noted [1].


I have set the target version to PG13.

Regards,
--
-David
da...@pgmasters.net

[1] 
https://www.postgresql.org/message-id/20190216054526.zss2cufdxfeudr4i%40alap3.anarazel.de

Re: Re: Ltree syntax improvement

2019-03-07 Thread David Steele


Hi Dmitry,

This patch appears too complex to be submitted to the last CF, as Andres 
has also noted [1].


I have set the target version to PG13.

Regards,
--
-David
da...@pgmasters.net

[1] 
https://www.postgresql.org/message-id/20190216054526.zss2cufdxfeudr4i%40alap3.anarazel.de

53 matches

Mail list logo