Re: Issue with point_ops and NaN

2021-04-03 Thread Julien Rouhaud
Le jeu. 1 avr. 2021 à 15:54, Laurenz Albe  a
écrit :

> On Thu, 2021-04-01 at 09:35 +0900, Kyotaro Horiguchi wrote:
> > > > > > > > > SELECT point('NaN','NaN') <@
> polygon('(0,0),(1,0),(1,1),(0,0)');
> > > > > > > > > ?column?
> > > > > > > > > --
> > > > > > > > >t
> > > > > > > > >(1 row)
> > >
> > > If you think of "NaN" literally as "not a number", then FALSE would
> > > make sense, since "not a number" implies "not a number between 0 and
> 1".
> > > But since NaN is the result of operations like 0/0 or infinity -
> infinity,
> > > NULL might be better.
> > > So I'd opt for NULL too.
> >
> > Thanks.  Do you think it's acceptable that returning false instead of
> > NULL as an alternative behavior?
>
> Yes, I think that is acceptable.
>

+1 especially after looking at the poc patch you sent to handle NULLs.

>


Re: Issue with point_ops and NaN

2021-04-01 Thread Laurenz Albe
On Thu, 2021-04-01 at 09:35 +0900, Kyotaro Horiguchi wrote:
> > > > > > > > SELECT point('NaN','NaN') <@ polygon('(0,0),(1,0),(1,1),(0,0)');
> > > > > > > > ?column? 
> > > > > > > > --
> > > > > > > >t
> > > > > > > >(1 row)
> > 
> > If you think of "NaN" literally as "not a number", then FALSE would
> > make sense, since "not a number" implies "not a number between 0 and 1".
> > But since NaN is the result of operations like 0/0 or infinity - infinity,
> > NULL might be better.
> > So I'd opt for NULL too.
> 
> Thanks.  Do you think it's acceptable that returning false instead of
> NULL as an alternative behavior?

Yes, I think that is acceptable.

Yours,
Laurenz Albe





Re: Issue with point_ops and NaN

2021-04-01 Thread Kyotaro Horiguchi
At Thu, 01 Apr 2021 09:34:40 +0900 (JST), Kyotaro Horiguchi 
 wrote in 
> I have to change almost all boolean-returning functions to
> tri-state-boolean ones. I'll give it try a bit futther.

The attached is a rush work of that, on top of the (rebased version of
the) base patch.  Disregarding its uneffectiveness, it gives a rough
estimate of how large that would be and how that affects other parts.

Maybe one of the largest issue with that would be that GiST doesn't
seem to like NULL to be returned from comparison functions.


regression=# set enable_seqscan to off;
regression=# set enable_indexscan to on;
regression=# SELECT * FROM circle_tbl WHERE f1 && circle(point(1,-2), 1) ORDER 
BY area(f1);
ERROR:  function 0x9d7bf6 returned NULL
(function 0x9d7bf6 is box_overlap())

That seems like the reason not to make the functions tri-state.

regards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center
diff --git a/src/backend/utils/adt/geo_ops.c b/src/backend/utils/adt/geo_ops.c
index a2e798ff95..b9ff60f56b 100644
--- a/src/backend/utils/adt/geo_ops.c
+++ b/src/backend/utils/adt/geo_ops.c
@@ -79,7 +79,7 @@ static inline void point_add_point(Point *result, Point *pt1, 
Point *pt2);
 static inline void point_sub_point(Point *result, Point *pt1, Point *pt2);
 static inline void point_mul_point(Point *result, Point *pt1, Point *pt2);
 static inline void point_div_point(Point *result, Point *pt1, Point *pt2);
-static inline bool point_eq_point(Point *pt1, Point *pt2);
+static inline tsbool point_eq_point(Point *pt1, Point *pt2);
 static inline float8 point_dt(Point *pt1, Point *pt2);
 static inline float8 point_sl(Point *pt1, Point *pt2);
 static int point_inside(Point *p, int npts, Point *plist);
@@ -88,18 +88,18 @@ static int  point_inside(Point *p, int npts, Point *plist);
 static inline void line_construct(LINE *result, Point *pt, float8 m);
 static inline float8 line_sl(LINE *line);
 static inline float8 line_invsl(LINE *line);
-static bool line_interpt_line(Point *result, LINE *l1, LINE *l2);
-static bool line_contain_point(LINE *line, Point *point);
+static tsbool line_interpt_line(Point *result, LINE *l1, LINE *l2);
+static tsbool line_contain_point(LINE *line, Point *point);
 static float8 line_closept_point(Point *result, LINE *line, Point *pt);
 
 /* Routines for line segments */
 static inline void statlseg_construct(LSEG *lseg, Point *pt1, Point *pt2);
 static inline float8 lseg_sl(LSEG *lseg);
 static inline float8 lseg_invsl(LSEG *lseg);
-static bool lseg_interpt_line(Point *result, LSEG *lseg, LINE *line);
-static bool lseg_interpt_lseg(Point *result, LSEG *l1, LSEG *l2);
+static tsbool lseg_interpt_line(Point *result, LSEG *lseg, LINE *line);
+static tsbool lseg_interpt_lseg(Point *result, LSEG *l1, LSEG *l2);
 static int lseg_crossing(float8 x, float8 y, float8 px, float8 py);
-static bool lseg_contain_point(LSEG *lseg, Point *point);
+static tsbool lseg_contain_point(LSEG *lseg, Point *point);
 static float8 lseg_closept_point(Point *result, LSEG *lseg, Point *pt);
 static float8 lseg_closept_line(Point *result, LSEG *lseg, LINE *line);
 static float8 lseg_closept_lseg(Point *result, LSEG *on_lseg, LSEG *to_lseg);
@@ -107,14 +107,14 @@ static float8 lseg_closept_lseg(Point *result, LSEG 
*on_lseg, LSEG *to_lseg);
 /* Routines for boxes */
 static inline void box_construct(BOX *result, Point *pt1, Point *pt2);
 static void box_cn(Point *center, BOX *box);
-static bool box_ov(BOX *box1, BOX *box2);
+static tsbool box_ov(BOX *box1, BOX *box2);
 static float8 box_ar(BOX *box);
 static float8 box_ht(BOX *box);
 static float8 box_wd(BOX *box);
-static bool box_contain_point(BOX *box, Point *point);
-static bool box_contain_box(BOX *contains_box, BOX *contained_box);
-static bool box_contain_lseg(BOX *box, LSEG *lseg);
-static bool box_interpt_lseg(Point *result, BOX *box, LSEG *lseg);
+static tsbool box_contain_point(BOX *box, Point *point);
+static tsbool box_contain_box(BOX *contains_box, BOX *contained_box);
+static tsbool box_contain_lseg(BOX *box, LSEG *lseg);
+static tsbool box_interpt_lseg(Point *result, BOX *box, LSEG *lseg);
 static float8 box_closept_point(Point *result, BOX *box, Point *point);
 static float8 box_closept_lseg(Point *result, BOX *box, LSEG *lseg);
 
@@ -124,9 +124,9 @@ static float8 circle_ar(CIRCLE *circle);
 /* Routines for polygons */
 static void make_bound_box(POLYGON *poly);
 static void poly_to_circle(CIRCLE *result, POLYGON *poly);
-static bool lseg_inside_poly(Point *a, Point *b, POLYGON *poly, int start);
-static bool poly_contain_poly(POLYGON *contains_poly, POLYGON *contained_poly);
-static bool plist_same(int npts, Point *p1, Point *p2);
+static tsbool lseg_inside_poly(Point *a, Point *b, POLYGON *poly, int start);
+static tsbool poly_contain_poly(POLYGON *contains_poly, POLYGON 
*contained_poly);
+static tsbool plist_same(int npts, Point *p1, Point *p2);
 static float8 dist_ppoly_internal(Point *pt, POLYGON *poly);
 
 /* Routines for encoding and 

Re: Issue with point_ops and NaN

2021-03-31 Thread Kyotaro Horiguchi
At Wed, 31 Mar 2021 12:01:08 +0200, Laurenz Albe  
wrote in 
> On Wed, 2021-03-31 at 15:48 +0900, Kyotaro Horiguchi wrote:
> > > > > > > SELECT point('NaN','NaN') <@ polygon('(0,0),(1,0),(1,1),(0,0)');
> > > > > > > ?column? 
> > > > > > > --
> > > > > > >   t
> > > > > > >   (1 row)
> > > > > 
> > > > > Agreed --- one could make an argument for either 'false' or NULL
> > > > > result, but surely not 'true'.
> > 
> > Thanks! However, Michael's suggestion is worth considering.  What do
> > you think about makeing NaN-involved comparison return NULL?  If you
> > agree to that, I'll make a further change to the patch.
> 
> If you think of "NaN" literally as "not a number", then FALSE would
> make sense, since "not a number" implies "not a number between 0 and 1".
> 
> But since NaN is the result of operations like 0/0 or infinity - infinity,
> NULL might be better.
> 
> So I'd opt for NULL too.

Thanks.  Do you think it's acceptable that returning false instead of
NULL as an alternative behavior?

regards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center




Re: Issue with point_ops and NaN

2021-03-31 Thread Kyotaro Horiguchi
At Wed, 31 Mar 2021 16:30:41 +0800, Julien Rouhaud  wrote 
in 
> On Wed, Mar 31, 2021 at 03:48:16PM +0900, Kyotaro Horiguchi wrote:
> > 
> > Thanks! However, Michael's suggestion is worth considering.  What do
> > you think about makeing NaN-involved comparison return NULL?  If you
> > agree to that, I'll make a further change to the patch.
> 
> As I mentioned in [1] I think that returning NULL would the right thing to do.
> But you mentioned elsewhere that it would need a lot more work to make the 
> code
> work that way, so given that we're 7 days away from the feature freeze maybe
> returning false would be a better option.  One important thing to consider is

Agreed that it's a better option.

I have to change almost all boolean-returning functions to
tri-state-boolean ones. I'll give it try a bit futther.

> that we should consistently return NULL for similar cases, and having some
> discrepancy there would be way worse than returning false everywhere.

Sure.

> [1] https://www.postgresql.org/message-id/20210330153940.vmncwnmuw3qnpkfa@nol

regards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center




Re: Issue with point_ops and NaN

2021-03-31 Thread Laurenz Albe
On Wed, 2021-03-31 at 15:48 +0900, Kyotaro Horiguchi wrote:
> > > > > > SELECT point('NaN','NaN') <@ polygon('(0,0),(1,0),(1,1),(0,0)');
> > > > > > ?column? 
> > > > > > --
> > > > > >   t
> > > > > >   (1 row)
> > > > 
> > > > Agreed --- one could make an argument for either 'false' or NULL
> > > > result, but surely not 'true'.
> 
> Thanks! However, Michael's suggestion is worth considering.  What do
> you think about makeing NaN-involved comparison return NULL?  If you
> agree to that, I'll make a further change to the patch.

If you think of "NaN" literally as "not a number", then FALSE would
make sense, since "not a number" implies "not a number between 0 and 1".

But since NaN is the result of operations like 0/0 or infinity - infinity,
NULL might be better.

So I'd opt for NULL too.

Yours,
Laurenz Albe





Re: Issue with point_ops and NaN

2021-03-31 Thread Julien Rouhaud
On Wed, Mar 31, 2021 at 03:48:16PM +0900, Kyotaro Horiguchi wrote:
> 
> Thanks! However, Michael's suggestion is worth considering.  What do
> you think about makeing NaN-involved comparison return NULL?  If you
> agree to that, I'll make a further change to the patch.

As I mentioned in [1] I think that returning NULL would the right thing to do.
But you mentioned elsewhere that it would need a lot more work to make the code
work that way, so given that we're 7 days away from the feature freeze maybe
returning false would be a better option.  One important thing to consider is
that we should consistently return NULL for similar cases, and having some
discrepancy there would be way worse than returning false everywhere.

[1] https://www.postgresql.org/message-id/20210330153940.vmncwnmuw3qnpkfa@nol




Re: Issue with point_ops and NaN

2021-03-31 Thread Kyotaro Horiguchi
At Wed, 31 Mar 2021 15:46:16 +0900 (JST), Kyotaro Horiguchi 
 wrote in 
> At Wed, 31 Mar 2021 09:26:00 +0900, Michael Paquier  
> wrote in 
> > On Tue, Mar 30, 2021 at 11:39:40PM +0800, Julien Rouhaud wrote:
> > > On Tue, Mar 30, 2021 at 11:02:32AM -0400, Tom Lane wrote:
> > >> Agreed --- one could make an argument for either 'false' or NULL
> > >> result, but surely not 'true'.
> > > 
> > > I would think that it should return NULL since it's not inside nor 
> > > outside the
> > > polygon, but I'm fine with false.
> > 
> > Yeah, this is trying to make an undefined point fit into a box that
> > has a  definition, so "false" does not make sense to me either here as
> > it implies that the point exists?  NULL seems adapted here.
> 
> Sounds reasonable.  The function may return NULL for other cases so
> it's easily changed to NULL.
> 
> # But it's bothersome to cover all parallels..

Hmm. Many internal functions handles bool, which cannot handle the
case of NaN naturally.  In short, it's more invasive than expected.

> Does anyone oppose to make the case NULL?  If no one objects, I'll do
> that.

Mmm. I'd like to reduce from +1 to +0.7 or so, considering the amount
of needed work...

regards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center




Re: Issue with point_ops and NaN

2021-03-31 Thread Kyotaro Horiguchi
At Wed, 31 Mar 2021 12:04:26 +0800, Julien Rouhaud  wrote 
in 
> On Tue, Mar 30, 2021 at 11:39:40PM +0800, Julien Rouhaud wrote:
> > On Tue, Mar 30, 2021 at 11:02:32AM -0400, Tom Lane wrote:
> > > Julien Rouhaud  writes:
> > > > On Tue, Mar 30, 2021 at 02:47:05PM +0200, Laurenz Albe wrote:
> > > >> I'd say that this is certainly wrong:
> > > >> SELECT point('NaN','NaN') <@ polygon('(0,0),(1,0),(1,1),(0,0)');
> > > >> 
> > > >> ?column? 
> > > >> --
> > > >>  t
> > > >>  (1 row)
> > > 
> > > > Yeah that's what I think too, but I wanted to have confirmation.
> > > 
> > > Agreed --- one could make an argument for either 'false' or NULL
> > > result, but surely not 'true'.
> > 
> > I would think that it should return NULL since it's not inside nor outside 
> > the
> > polygon, but I'm fine with false.
> > 
> > > I wonder if Horiguchi-san's patch [1] improves this case.
> > 
> > Oh I totally missed that patch :(
> > 
> > After a quick look I see this addition in point_inside():
> > 
> > +   /* NaN makes the point cannot be inside the polygon */
> > +   if (unlikely(isnan(x) || isnan(y)))
> > +   return 0;
> > 
> > So I would assume that it should fix this case too.  I'll check tomorrow.
> 
> I confirm that this patch fixes the issue, and after looking a bit more at the
> thread it's unsurprising since Jesse initially reported the exact same 
> problem.
> 
> I'll try to review it as soon as I'll be done with my work duties.

Thanks! However, Michael's suggestion is worth considering.  What do
you think about makeing NaN-involved comparison return NULL?  If you
agree to that, I'll make a further change to the patch.

regards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center




Re: Issue with point_ops and NaN

2021-03-31 Thread Kyotaro Horiguchi
At Wed, 31 Mar 2021 09:26:00 +0900, Michael Paquier  wrote 
in 
> On Tue, Mar 30, 2021 at 11:39:40PM +0800, Julien Rouhaud wrote:
> > On Tue, Mar 30, 2021 at 11:02:32AM -0400, Tom Lane wrote:
> >> Agreed --- one could make an argument for either 'false' or NULL
> >> result, but surely not 'true'.
> > 
> > I would think that it should return NULL since it's not inside nor outside 
> > the
> > polygon, but I'm fine with false.
> 
> Yeah, this is trying to make an undefined point fit into a box that
> has a  definition, so "false" does not make sense to me either here as
> it implies that the point exists?  NULL seems adapted here.

Sounds reasonable.  The function may return NULL for other cases so
it's easily changed to NULL.

# But it's bothersome to cover all parallels..

Does anyone oppose to make the case NULL?  If no one objects, I'll do
that.

regards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center




Re: Issue with point_ops and NaN

2021-03-30 Thread Julien Rouhaud
On Tue, Mar 30, 2021 at 11:39:40PM +0800, Julien Rouhaud wrote:
> On Tue, Mar 30, 2021 at 11:02:32AM -0400, Tom Lane wrote:
> > Julien Rouhaud  writes:
> > > On Tue, Mar 30, 2021 at 02:47:05PM +0200, Laurenz Albe wrote:
> > >> I'd say that this is certainly wrong:
> > >> SELECT point('NaN','NaN') <@ polygon('(0,0),(1,0),(1,1),(0,0)');
> > >> 
> > >> ?column? 
> > >> --
> > >>  t
> > >>  (1 row)
> > 
> > > Yeah that's what I think too, but I wanted to have confirmation.
> > 
> > Agreed --- one could make an argument for either 'false' or NULL
> > result, but surely not 'true'.
> 
> I would think that it should return NULL since it's not inside nor outside the
> polygon, but I'm fine with false.
> 
> > I wonder if Horiguchi-san's patch [1] improves this case.
> 
> Oh I totally missed that patch :(
> 
> After a quick look I see this addition in point_inside():
> 
> + /* NaN makes the point cannot be inside the polygon */
> + if (unlikely(isnan(x) || isnan(y)))
> + return 0;
> 
> So I would assume that it should fix this case too.  I'll check tomorrow.

I confirm that this patch fixes the issue, and after looking a bit more at the
thread it's unsurprising since Jesse initially reported the exact same problem.

I'll try to review it as soon as I'll be done with my work duties.




Re: Issue with point_ops and NaN

2021-03-30 Thread Michael Paquier
On Tue, Mar 30, 2021 at 11:39:40PM +0800, Julien Rouhaud wrote:
> On Tue, Mar 30, 2021 at 11:02:32AM -0400, Tom Lane wrote:
>> Agreed --- one could make an argument for either 'false' or NULL
>> result, but surely not 'true'.
> 
> I would think that it should return NULL since it's not inside nor outside the
> polygon, but I'm fine with false.

Yeah, this is trying to make an undefined point fit into a box that
has a  definition, so "false" does not make sense to me either here as
it implies that the point exists?  NULL seems adapted here.
--
Michael


signature.asc
Description: PGP signature


Re: Issue with point_ops and NaN

2021-03-30 Thread Julien Rouhaud
On Tue, Mar 30, 2021 at 11:02:32AM -0400, Tom Lane wrote:
> Julien Rouhaud  writes:
> > On Tue, Mar 30, 2021 at 02:47:05PM +0200, Laurenz Albe wrote:
> >> I'd say that this is certainly wrong:
> >> SELECT point('NaN','NaN') <@ polygon('(0,0),(1,0),(1,1),(0,0)');
> >> 
> >> ?column? 
> >> --
> >>  t
> >>  (1 row)
> 
> > Yeah that's what I think too, but I wanted to have confirmation.
> 
> Agreed --- one could make an argument for either 'false' or NULL
> result, but surely not 'true'.

I would think that it should return NULL since it's not inside nor outside the
polygon, but I'm fine with false.

> I wonder if Horiguchi-san's patch [1] improves this case.

Oh I totally missed that patch :(

After a quick look I see this addition in point_inside():

+   /* NaN makes the point cannot be inside the polygon */
+   if (unlikely(isnan(x) || isnan(y)))
+   return 0;

So I would assume that it should fix this case too.  I'll check tomorrow.




Re: Issue with point_ops and NaN

2021-03-30 Thread Tom Lane
Julien Rouhaud  writes:
> On Tue, Mar 30, 2021 at 02:47:05PM +0200, Laurenz Albe wrote:
>> I'd say that this is certainly wrong:
>> SELECT point('NaN','NaN') <@ polygon('(0,0),(1,0),(1,1),(0,0)');
>> 
>> ?column? 
>> --
>>  t
>>  (1 row)

> Yeah that's what I think too, but I wanted to have confirmation.

Agreed --- one could make an argument for either 'false' or NULL
result, but surely not 'true'.

I wonder if Horiguchi-san's patch [1] improves this case.

regards, tom lane

[1] https://commitfest.postgresql.org/32/2710/




Re: Issue with point_ops and NaN

2021-03-30 Thread Julien Rouhaud
On Tue, Mar 30, 2021 at 02:47:05PM +0200, Laurenz Albe wrote:
> On Tue, 2021-03-30 at 17:57 +0800, Julien Rouhaud wrote:
> > 
> > Getting a consistent behavior shouldn't be hard, but I'm unsure which 
> > behavior
> > is actually correct.
> 
> I'd say that this is certainly wrong:
> 
> SELECT point('NaN','NaN') <@ polygon('(0,0),(1,0),(1,1),(0,0)');
> 
>  ?column? 
> --
>  t
> (1 row)

Yeah that's what I think too, but I wanted to have confirmation.




Re: Issue with point_ops and NaN

2021-03-30 Thread Laurenz Albe
On Tue, 2021-03-30 at 17:57 +0800, Julien Rouhaud wrote:
> While running some sanity checks on the regression tests, I found one test 
> that
> returns different results depending on whether an index or a sequential scan 
> is
> used.
> 
> Minimal reproducer:
> 
> =# CREATE TABLE point_tbl AS select '(nan,nan)'::point f1;
> =# CREATE INDEX ON point_tbl USING gist(f1);
> 
> =# EXPLAIN SELECT * FROM point_tbl WHERE f1 <@ polygon 
> '(0,0),(0,100),(100,100),(50,50),(100,0),(0,0)';
>   QUERY PLAN
> --
>  Seq Scan on point_tbl  (cost=0.00..1.01 rows=1 width=16)
>Filter: (f1 <@ '((0,0),(0,100),(100,100),(50,50),(100,0),(0,0))'::polygon)
> (2 rows)
> 
> =# SELECT * FROM point_tbl WHERE f1 <@ polygon 
> '(0,0),(0,100),(100,100),(50,50),(100,0),(0,0)';
> f1
> ---
>  (NaN,NaN)
> (1 row)
> 
> SET enable_seqscan = 0;
> 
> 
> =# EXPLAIN SELECT * FROM point_tbl WHERE f1 <@ polygon 
> '(0,0),(0,100),(100,100),(50,50),(100,0),(0,0)';
>QUERY PLAN 
>   
> 
>  Index Only Scan using point_tbl_f1_idx on point_tbl  (cost=0.12..8.14 rows=1 
> width=16)
>Index Cond: (f1 <@ 
> '((0,0),(0,100),(100,100),(50,50),(100,0),(0,0))'::polygon)
> (2 rows)
> 
> =# SELECT * FROM point_tbl WHERE f1 <@ polygon 
> '(0,0),(0,100),(100,100),(50,50),(100,0),(0,0)';
>  f1 
> 
> (0 rows)
> 
> The discrepancy comes from the fact that the sequential scan checks the
> condition using point_inside() / lseg_crossing(), while the gist index will
> check the condition using box_overlap() / box_ov(), which have different
> opinions on how to handle NaN.
> 
> Getting a consistent behavior shouldn't be hard, but I'm unsure which behavior
> is actually correct.

I'd say that this is certainly wrong:

SELECT point('NaN','NaN') <@ polygon('(0,0),(1,0),(1,1),(0,0)');

 ?column? 
--
 t
(1 row)

Yours,
Laurenz Albe