Re: Remove dependence on integer wrapping

2024-08-07 Thread Joseph Koshakow
On Wed, Aug 7, 2024 at 11:08 AM Nathan Bossart 
wrote:
>
> I started looking at 0001 again with the intent of committing it, and this
> caught my eye:
>
> -/* make the amount positive for digit-reconstruction loop */
> -value = -value;
> +/*
> + * make the amount positive for digit-reconstruction loop, we can
> + * leave INT64_MIN unchanged
> + */
> +pg_neg_s64_overflow(value, );
>
> The comment mentions that we can leave the minimum value unchanged, but it
> doesn't explain why.  Can we explain why?

I went back to try and figure this out and realized that it would be
much simpler to just convert value to an unsigned integer and not worry
about overflow. So I've updated the patch to do that.

> +static inline bool
> +pg_neg_s64_overflow(int64 a, int64 *result)
> +{
> +if (unlikely(a == PG_INT64_MIN))
> +{
> +return true;
> +}
> +else
> +{
> +*result = -a;
> +return false;
> +}
> +}
>
> Can we add a comment that these routines do not set "result" when true is
> returned?

I've added a comment to the top of the file where we describe the
return values of the other functions.

I also updated the implementations of the pg_abs_sX() functions to
something a bit simpler. This was based on feedback in another patch
[0], and more closely matches similar logic in other places.

Thanks,
Joseph Koshakow

[0]
https://postgr.es/m/CAAvxfHdTsMZPWEHUrZ=h3cky9ccc3mtx2whuhygy+abp-mc...@mail.gmail.co
From 4c4a3ce3ed6d4f1c296c60e9cd48b3ab372b4a20 Mon Sep 17 00:00:00 2001
From: Matthew Kim <38759997+friendlymatt...@users.noreply.github.com>
Date: Tue, 9 Jul 2024 18:25:10 -0400
Subject: [PATCH v17 3/4] Handle overflows in do_to_timestamp().

This commit handles overflow when formatting timestamps with the 'CC'
pattern.
---
 src/backend/utils/adt/formatting.c | 25 +++--
 src/test/regress/expected/horology.out |  2 ++
 src/test/regress/sql/horology.sql  |  1 +
 3 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index 68069fcfd3..decf0b6123 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -77,6 +77,7 @@
 
 #include "catalog/pg_collation.h"
 #include "catalog/pg_type.h"
+#include "common/int.h"
 #include "common/unicode_case.h"
 #include "common/unicode_category.h"
 #include "mb/pg_wchar.h"
@@ -4797,11 +4798,31 @@ do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
 		if (tmfc.bc)
 			tmfc.cc = -tmfc.cc;
 		if (tmfc.cc >= 0)
+		{
 			/* +1 because 21st century started in 2001 */
-			tm->tm_year = (tmfc.cc - 1) * 100 + 1;
+			/* tm->tm_year = (tmfc.cc - 1) * 100 + 1; */
+			if (pg_mul_s32_overflow((tmfc.cc - 1), 100, >tm_year) ||
+pg_add_s32_overflow(tm->tm_year, 1, >tm_year))
+			{
+ereport(ERROR,
+		(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+		 errmsg("date out of range: \"%s\"",
+text_to_cstring(date_txt;
+			}
+		}
 		else
+		{
 			/* +1 because year == 599 is 600 BC */
-			tm->tm_year = tmfc.cc * 100 + 1;
+			/* tm->tm_year = tmfc.cc * 100 + 1; */
+			if (pg_mul_s32_overflow(tmfc.cc, 100, >tm_year) ||
+pg_add_s32_overflow(tm->tm_year, 1, >tm_year))
+			{
+ereport(ERROR,
+		(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+		 errmsg("date out of range: \"%s\"",
+text_to_cstring(date_txt;
+			}
+		}
 		fmask |= DTK_M(YEAR);
 	}
 
diff --git a/src/test/regress/expected/horology.out b/src/test/regress/expected/horology.out
index 241713cc51..311c688f89 100644
--- a/src/test/regress/expected/horology.out
+++ b/src/test/regress/expected/horology.out
@@ -3778,6 +3778,8 @@ SELECT to_date('-02-01','-MM-DD');  -- allowed, though it shouldn't be
  02-01-0001 BC
 (1 row)
 
+SELECT to_date('1', 'CC');
+ERROR:  date out of range: "1"
 -- to_char's TZ format code produces zone abbrev if known
 SELECT to_char('2012-12-12 12:00'::timestamptz, '-MM-DD HH:MI:SS TZ');
  to_char 
diff --git a/src/test/regress/sql/horology.sql b/src/test/regress/sql/horology.sql
index e5cf12ff63..12a035cf57 100644
--- a/src/test/regress/sql/horology.sql
+++ b/src/test/regress/sql/horology.sql
@@ -660,6 +660,7 @@ SELECT to_date('2016 365', ' DDD');  -- ok
 SELECT to_date('2016 366', ' DDD');  -- ok
 SELECT to_date('2016 367', ' DDD');
 SELECT to_date('-02-01','-MM-DD');  -- allowed, though it shouldn't be
+SELECT to_date('10000', 'CC');
 
 -- to_char's TZ format code produces zone abbrev if known
 SELECT to_char('2012-12-12 12:00'::timestamptz, '-MM-DD HH:MI:SS TZ');
-- 
2.34.1

From ca9398b5f6a693cae97616cfd4acc7b13fdc5

Re: Remove dependence on integer wrapping

2024-08-04 Thread Joseph Koshakow
On Fri, Jun 14, 2024 at 8:00 AM Alexander Lakhin 
wrote:
>
>And the most interesting case to me:
>SET temp_buffers TO 10;
>
>CREATE TEMP TABLE t(i int PRIMARY KEY);
>INSERT INTO t VALUES(1);
>
>#4  0x7f385cdd37f3 in __GI_abort () at ./stdlib/abort.c:79
>#5  0x5620071c4f51 in __addvsi3 ()
>#6  0x562007143f3c in init_htab (hashp=0x562008facb20,
nelem=610070812) at dynahash.c:720
>
>(gdb) f 6
>#6  0x560915207f3c in init_htab (hashp=0x560916039930,
nelem=10) at dynahash.c:720
>720 hctl->high_mask = (nbuckets << 1) - 1;
>(gdb) p nbuckets
>$1 = 1073741824

Alex, are you able to get a full stack trace for this panic? I'm unable
to reproduce this because I don't have enough memory in my system. I've
tried reducing `BLCKSZ` to 1024, which is the lowest value allowed per
my understanding, and I still don't have enough memory.

Here's what it looks like is happening:

1. When inserting into the table, we create a new dynamic hash table
and set `nelem` equal to `temp_buffers`, which is 10.

2. `nbuckets` is then set to the the next highest power of 2 from
   `nelem`, which is 1073741824.

/*
 * Allocate space for the next greater power of two number of buckets,
 * assuming a desired maximum load factor of 1.
 */
nbuckets = next_pow2_int(nelem);

3. Shift `nbuckets` to the left by 1. This would equal 2147483648,
which is larger than `INT_MAX`, which causes an overflow.

hctl->high_mask = (nbuckets << 1) - 1;

The max value allowed for `temp_buffers` is `INT_MAX / 2` (1073741823),
So any value of `temp_buffers` in the range (536870912, 1073741823]
would cause this overflow. Without `-ftrapv`, `nbuckets` would wrap
around to -2147483648, which is likely to cause all sorts of havoc, I'm
just not sure what exactly.

Also, `nbuckets = next_pow2_int(nelem);`, by itself is a bit sketchy
considering that `nelem` is a `long` and `nbuckets` is an `int`.
Potentially, the fix here is to just convert `nbuckets` to a `long`. I
plan on checking if that's feasible.

I also found this commit [0] that increased the max of `nbuckets` from
`INT_MAX / BLCKSZ` to `INT_MAX / 2`, which introduced the possibility
of this overflow. So I plan on reading through that as well.

Thanks,
Joseph Koshakow

[0]
https://github.com/postgres/postgres/commit/0007490e0964d194a606ba79bb11ae1642da3372


Re: Fix overflow in pg_size_pretty

2024-07-27 Thread Joseph Koshakow
On Sat, Jul 27, 2024 at 11:42 PM David Rowley  wrote:
>
> I didn't test to see where that's coming from, but I did test the two
> attached .c files.  int.c uses the 0 - (unsigned int) var method and
> int2.c uses (unsigned int) (-var).  Using clang and -ftrapv, I get:
>
> $ clang int.c -o int -O2 -ftrapv
> $ ./int
> 2147483648
> $ clang int2.c -o int2 -O2 -ftrapv
> $ ./int2
> Illegal instruction
>
> Similar with gcc:
> $ gcc int.c -o int -O2 -ftrapv
> $ ./int
> 2147483648
> $ gcc int2.c -o int2 -O2 -ftrapv
> $ ./int2
> Aborted
>
> I suspect your trap must be coming from somewhere else. It looks to me
> like the "uint64 usize = size < 0 ? 0 - (uint64) size : (uint64)
> size;" will be fine.

My mistake, you're absolutely right. The trap is coming from
`pg_strtoint64_safe()`.

return -((int64) tmp);

Which I had already addressed in the other thread and completely forgot
about.

I did some more research and it looks like unsigned integer arithmetic
is guaranteed to wrap around, unlike signed integer arithmetic [0].

Attached is an updated patch with your approach. I removed the 0 from
the negative case because I think it was unnecessary, but happy to add
it back in if I missed something.

Thanks for the review!

Thanks,
Joseph Koshakow

[0]
https://www.gnu.org/software/autoconf/manual/autoconf-2.63/html_node/Integer-Overflow-Basics.html
From 1811b94ba4c08a0de972e8ded4892cf294e9f687 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 27 Jul 2024 15:06:09 -0400
Subject: [PATCH] Fix overflow in pg_size_pretty

This commit removes an overflow from pg_size_pretty that causes
PG_INT64_MIN to by displayed with the bytes unit instead of the PB
unit.
---
 src/backend/utils/adt/dbsize.c   | 3 ++-
 src/test/regress/expected/dbsize.out | 6 ++
 src/test/regress/sql/dbsize.sql  | 2 ++
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c
index 25d7110c13..8d58fc24ce 100644
--- a/src/backend/utils/adt/dbsize.c
+++ b/src/backend/utils/adt/dbsize.c
@@ -575,9 +575,10 @@ pg_size_pretty(PG_FUNCTION_ARGS)
 	for (unit = size_pretty_units; unit->name != NULL; unit++)
 	{
 		uint8		bits;
+		uint64		usize = size < 0 ? -(uint64) size : (uint64) size;
 
 		/* use this unit if there are no more units or we're below the limit */
-		if (unit[1].name == NULL || i64abs(size) < unit->limit)
+		if (unit[1].name == NULL || usize < unit->limit)
 		{
 			if (unit->round)
 size = half_rounded(size);
diff --git a/src/test/regress/expected/dbsize.out b/src/test/regress/expected/dbsize.out
index f1121a87aa..eac878c3ec 100644
--- a/src/test/regress/expected/dbsize.out
+++ b/src/test/regress/expected/dbsize.out
@@ -12,6 +12,12 @@ SELECT size, pg_size_pretty(size), pg_size_pretty(-1 * size) FROM
  1000 | 909 TB | -909 TB
 (6 rows)
 
+SELECT pg_size_pretty((-9223372036854775808)::bigint);
+ pg_size_pretty 
+
+ -8192 PB
+(1 row)
+
 SELECT size, pg_size_pretty(size), pg_size_pretty(-1 * size) FROM
 (VALUES (10::numeric), (1000::numeric), (100::numeric),
 (10::numeric), (1::numeric),
diff --git a/src/test/regress/sql/dbsize.sql b/src/test/regress/sql/dbsize.sql
index b34cf33385..e1ad202016 100644
--- a/src/test/regress/sql/dbsize.sql
+++ b/src/test/regress/sql/dbsize.sql
@@ -3,6 +3,8 @@ SELECT size, pg_size_pretty(size), pg_size_pretty(-1 * size) FROM
 (10::bigint), (1::bigint),
 (1000::bigint)) x(size);
 
+SELECT pg_size_pretty((-9223372036854775808)::bigint);
+
 SELECT size, pg_size_pretty(size), pg_size_pretty(-1 * size) FROM
 (VALUES (10::numeric), (1000::numeric), (100::numeric),
 (10::numeric), (1::numeric),
-- 
2.34.1



Re: Fix overflow in pg_size_pretty

2024-07-27 Thread Joseph Koshakow
On Sat, Jul 27, 2024 at 8:00 PM David Rowley  wrote:
>
> On Sun, 28 Jul 2024 at 11:06, Joseph Koshakow  wrote:
>> > + uint64 usize = size < 0 ? (uint64) (-size) : (uint64) size;
>>
>> I think that the explicit test for PG_INT64_MIN is still required. If
>> `size` is equal to PG_INT64_MIN then `-size` will overflow. You end up
>> with the correct behavior if `size` wraps around, but that's only
>> guaranteed on platforms that support the `-fwrapv` flag.
>
> What if we spelt it out the same way as pg_lltoa() does?
>
> i.e: uint64 usize = size < 0 ? 0 - (uint64) size : (uint64) size;

My understanding of pg_lltoa() is that it produces an underflow and
relies wrapping around from 0 to PG_UINT64_MAX. In fact the following
SQL, which relies on pg_lltoa() under the hood, panics with `-ftrapv`
enabled (which panics on underflows and overflows):

SELECT int8out(-9223372036854775808);

So we should actually probably modify pg_lltoa() to use pg_abs_s64()
too.

Thanks,
Joe Koshakow


Re: Fix overflow in pg_size_pretty

2024-07-27 Thread Joseph Koshakow
On Sat, Jul 27, 2024 at 6:28 PM David Rowley  wrote:
>
> On Sun, 28 Jul 2024 at 07:18, Joseph Koshakow  wrote:
>> Attached is a patch that resolves an overflow in pg_size_pretty() that
>> resulted in unexpected behavior when PG_INT64_MIN was passed in as an
>> argument.
>
> Could we just fix this more simply by assigning the absolute value of
> the signed variable into an unsigned type?

I might be misunderstanding, but my previous patch does assign the
absolute value of the signed variable into an unsigned type.

> It's a bit less code and
> gets rid of the explicit test for PG_INT64_MIN.

> + uint64 usize = size < 0 ? (uint64) (-size) : (uint64) size;

I think that the explicit test for PG_INT64_MIN is still required. If
`size` is equal to PG_INT64_MIN then `-size` will overflow. You end up
with the correct behavior if `size` wraps around, but that's only
guaranteed on platforms that support the `-fwrapv` flag.

Thanks,
Joseph Koshakow


Re: Fix overflow in pg_size_pretty

2024-07-27 Thread Joseph Koshakow
On Sat, Jul 27, 2024 at 3:18 PM Joseph Koshakow  wrote:
>
> `SELECT -9223372036854775808::bigint` results in an out of range error,
> even though `-9223372036854775808` can fit in a `bigint` and
> `SELECT pg_typeof(-9223372036854775808)` returns `bigint`. That's why
> the `::bigint` cast is omitted from my test.

Turns out it was just an order of operations issue. Fix is attached.

Thanks,
Joseph Koshakow
From 1224087ab4e13a107b51ac17c77e83dc7db37ef9 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 27 Jul 2024 15:06:09 -0400
Subject: [PATCH] Fix overflow in pg_size_pretty

This commit removes an overflow from pg_size_pretty that causes
PG_INT64_MIN to by displayed with the bytes unit instead of the PB
unit.
---
 src/backend/utils/adt/dbsize.c   |  3 ++-
 src/include/common/int.h | 13 +
 src/test/regress/expected/dbsize.out |  6 ++
 src/test/regress/sql/dbsize.sql  |  2 ++
 4 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c
index 25d7110c13..5648f40101 100644
--- a/src/backend/utils/adt/dbsize.c
+++ b/src/backend/utils/adt/dbsize.c
@@ -21,6 +21,7 @@
 #include "catalog/pg_tablespace.h"
 #include "commands/dbcommands.h"
 #include "commands/tablespace.h"
+#include "common/int.h"
 #include "miscadmin.h"
 #include "storage/fd.h"
 #include "utils/acl.h"
@@ -577,7 +578,7 @@ pg_size_pretty(PG_FUNCTION_ARGS)
 		uint8		bits;
 
 		/* use this unit if there are no more units or we're below the limit */
-		if (unit[1].name == NULL || i64abs(size) < unit->limit)
+		if (unit[1].name == NULL || pg_abs_s64(size) < unit->limit)
 		{
 			if (unit->round)
 size = half_rounded(size);
diff --git a/src/include/common/int.h b/src/include/common/int.h
index 7fc046e78a..d86eb6dd5e 100644
--- a/src/include/common/int.h
+++ b/src/include/common/int.h
@@ -258,6 +258,19 @@ pg_mul_s64_overflow(int64 a, int64 b, int64 *result)
 #endif
 }
 
+static inline uint64
+pg_abs_s64(int64 a)
+{
+	if (unlikely(a == PG_INT64_MIN))
+	{
+		return ((uint64) PG_INT64_MAX) + 1;
+	}
+	else
+	{
+		return (uint64) i64abs(a);
+	}
+}
+
 /*
  * Overflow routines for unsigned integers
  *
diff --git a/src/test/regress/expected/dbsize.out b/src/test/regress/expected/dbsize.out
index f1121a87aa..eac878c3ec 100644
--- a/src/test/regress/expected/dbsize.out
+++ b/src/test/regress/expected/dbsize.out
@@ -12,6 +12,12 @@ SELECT size, pg_size_pretty(size), pg_size_pretty(-1 * size) FROM
  1000 | 909 TB | -909 TB
 (6 rows)
 
+SELECT pg_size_pretty((-9223372036854775808)::bigint);
+ pg_size_pretty 
+
+ -8192 PB
+(1 row)
+
 SELECT size, pg_size_pretty(size), pg_size_pretty(-1 * size) FROM
 (VALUES (10::numeric), (1000::numeric), (100::numeric),
 (10::numeric), (1::numeric),
diff --git a/src/test/regress/sql/dbsize.sql b/src/test/regress/sql/dbsize.sql
index b34cf33385..e1ad202016 100644
--- a/src/test/regress/sql/dbsize.sql
+++ b/src/test/regress/sql/dbsize.sql
@@ -3,6 +3,8 @@ SELECT size, pg_size_pretty(size), pg_size_pretty(-1 * size) FROM
 (10::bigint), (1::bigint),
 (1000::bigint)) x(size);
 
+SELECT pg_size_pretty((-9223372036854775808)::bigint);
+
 SELECT size, pg_size_pretty(size), pg_size_pretty(-1 * size) FROM
 (VALUES (10::numeric), (1000::numeric), (100::numeric),
 (10::numeric), (1::numeric),
-- 
2.34.1



Fix overflow in pg_size_pretty

2024-07-27 Thread Joseph Koshakow
Hi all,

Attached is a patch that resolves an overflow in pg_size_pretty() that
resulted in unexpected behavior when PG_INT64_MIN was passed in as an
argument.

The pg_abs_s64() helper function is extracted and simplified from patch
0001 from [0]. I didn't add similar functions for other sized integers
since they'd be unused, but I'd be happy to add them if others
disagree.

`SELECT -9223372036854775808::bigint` results in an out of range error,
even though `-9223372036854775808` can fit in a `bigint` and
`SELECT pg_typeof(-9223372036854775808)` returns `bigint`. That's why
the `::bigint` cast is omitted from my test.

[0]
https://www.postgresql.org/message-id/flat/caavxfhdbpoyegs7s+xf4iaw0-cgiq25jpydwbqqqvltle_t...@mail.gmail.com

Thanks,
Joseph Koshakow
From 6ec885412f2e0f3a3e019ec1906901e39c6d517a Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 27 Jul 2024 15:06:09 -0400
Subject: [PATCH] Fix overflow in pg_size_pretty

This commit removes an overflow from pg_size_pretty that causes
PG_INT64_MIN to by displayed with the bytes unit instead of the PB
unit.
---
 src/backend/utils/adt/dbsize.c   |  3 ++-
 src/include/common/int.h | 13 +
 src/test/regress/expected/dbsize.out |  6 ++
 src/test/regress/sql/dbsize.sql  |  2 ++
 4 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c
index 25d7110c13..5648f40101 100644
--- a/src/backend/utils/adt/dbsize.c
+++ b/src/backend/utils/adt/dbsize.c
@@ -21,6 +21,7 @@
 #include "catalog/pg_tablespace.h"
 #include "commands/dbcommands.h"
 #include "commands/tablespace.h"
+#include "common/int.h"
 #include "miscadmin.h"
 #include "storage/fd.h"
 #include "utils/acl.h"
@@ -577,7 +578,7 @@ pg_size_pretty(PG_FUNCTION_ARGS)
 		uint8		bits;
 
 		/* use this unit if there are no more units or we're below the limit */
-		if (unit[1].name == NULL || i64abs(size) < unit->limit)
+		if (unit[1].name == NULL || pg_abs_s64(size) < unit->limit)
 		{
 			if (unit->round)
 size = half_rounded(size);
diff --git a/src/include/common/int.h b/src/include/common/int.h
index 7fc046e78a..d86eb6dd5e 100644
--- a/src/include/common/int.h
+++ b/src/include/common/int.h
@@ -258,6 +258,19 @@ pg_mul_s64_overflow(int64 a, int64 b, int64 *result)
 #endif
 }
 
+static inline uint64
+pg_abs_s64(int64 a)
+{
+	if (unlikely(a == PG_INT64_MIN))
+	{
+		return ((uint64) PG_INT64_MAX) + 1;
+	}
+	else
+	{
+		return (uint64) i64abs(a);
+	}
+}
+
 /*
  * Overflow routines for unsigned integers
  *
diff --git a/src/test/regress/expected/dbsize.out b/src/test/regress/expected/dbsize.out
index f1121a87aa..3398a3eceb 100644
--- a/src/test/regress/expected/dbsize.out
+++ b/src/test/regress/expected/dbsize.out
@@ -12,6 +12,12 @@ SELECT size, pg_size_pretty(size), pg_size_pretty(-1 * size) FROM
  1000 | 909 TB | -909 TB
 (6 rows)
 
+SELECT pg_size_pretty(-9223372036854775808);
+ pg_size_pretty 
+
+ -8192 PB
+(1 row)
+
 SELECT size, pg_size_pretty(size), pg_size_pretty(-1 * size) FROM
 (VALUES (10::numeric), (1000::numeric), (100::numeric),
 (10::numeric), (1::numeric),
diff --git a/src/test/regress/sql/dbsize.sql b/src/test/regress/sql/dbsize.sql
index b34cf33385..e3e18e948e 100644
--- a/src/test/regress/sql/dbsize.sql
+++ b/src/test/regress/sql/dbsize.sql
@@ -3,6 +3,8 @@ SELECT size, pg_size_pretty(size), pg_size_pretty(-1 * size) FROM
 (10::bigint), (1::bigint),
 (1000::bigint)) x(size);
 
+SELECT pg_size_pretty(-9223372036854775808);
+
 SELECT size, pg_size_pretty(size), pg_size_pretty(-1 * size) FROM
 (VALUES (10::numeric), (1000::numeric), (100::numeric),
 (10::numeric), (1::numeric),
-- 
2.34.1



Re: Remove dependence on integer wrapping

2024-07-23 Thread Joseph Koshakow
On Mon, Jul 22, 2024 at 6:07 PM Matthew Kim  wrote:
>
> On Mon, Jul 22, 2024 at 5:52 PM Alexander Lakhin 
wrote:
>
>> Also there are several trap-producing cases with date types:
>> SELECT to_date('1', 'CC');
>
> Hi, I’ve attached a patch that fixes the to_date() overflow. Patches 1
through 3 remain unchanged.

Thanks for the contribution Mattew!

On Tue, Jul 23, 2024 at 2:14 AM jian he  wrote:
>
> On Tue, Jul 23, 2024 at 6:56 AM Joseph Koshakow  wrote:
>>
>> The specific bug that this patch fixes is preventing the following
>> statement:
>>
>> # INSERT INTO arroverflowtest(i[-2147483648:2147483647]) VALUES
('{1}');
>>
>> So we may want to add that test back in.
>>
> I agree with you.

I've updated the patch to add this test back in.

> also v13-0003-Remove-dependence-on-integer-wrapping-for-jsonb.patch
> in setPathArray we change to can
>
> if (idx == PG_INT32_MIN || -idx > nelems)
> {
> /*
>  * If asked to keep elements position consistent, it's not
allowed
>  * to prepend the array.
>  */
> if (op_type & JB_PATH_CONSISTENT_POSITION)
> ereport(ERROR,
> (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
>  errmsg("path element at position %d is out of
> range: %d",
> level + 1, idx)));
>     idx = PG_INT32_MIN;
> }

Done in the attached patch.
From 6f9b253b2f35dcbb91a44285b28f68916b039e9a Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 8 Jun 2024 22:16:46 -0400
Subject: [PATCH 1/4] Remove dependence on integer wrapping

This commit updates various parts of the code to no longer rely on
integer wrapping for correctness. Not all compilers support -fwrapv, so
it's best not to rely on it.
---
 src/backend/utils/adt/cash.c   |   7 +-
 src/backend/utils/adt/numeric.c|   5 +-
 src/backend/utils/adt/numutils.c   |  34 ---
 src/backend/utils/adt/timestamp.c  |  28 +-
 src/include/common/int.h   | 105 +
 src/interfaces/ecpg/pgtypeslib/timestamp.c |  11 +--
 src/test/regress/expected/timestamp.out|  13 +++
 src/test/regress/expected/timestamptz.out  |  13 +++
 src/test/regress/sql/timestamp.sql |   4 +
 src/test/regress/sql/timestamptz.sql   |   4 +
 10 files changed, 169 insertions(+), 55 deletions(-)

diff --git a/src/backend/utils/adt/cash.c b/src/backend/utils/adt/cash.c
index b20c358486..52687dbf7b 100644
--- a/src/backend/utils/adt/cash.c
+++ b/src/backend/utils/adt/cash.c
@@ -429,8 +429,11 @@ cash_out(PG_FUNCTION_ARGS)
 
 	if (value < 0)
 	{
-		/* make the amount positive for digit-reconstruction loop */
-		value = -value;
+		/*
+		 * make the amount positive for digit-reconstruction loop, we can
+		 * leave INT64_MIN unchanged
+		 */
+		pg_neg_s64_overflow(value, );
 		/* set up formatting data */
 		signsymbol = (*lconvert->negative_sign != '\0') ? lconvert->negative_sign : "-";
 		sign_posn = lconvert->n_sign_posn;
diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c
index d0f0923710..38965b4023 100644
--- a/src/backend/utils/adt/numeric.c
+++ b/src/backend/utils/adt/numeric.c
@@ -8114,15 +8114,14 @@ int64_to_numericvar(int64 val, NumericVar *var)
 
 	/* int64 can require at most 19 decimal digits; add one for safety */
 	alloc_var(var, 20 / DEC_DIGITS);
+	uval = pg_abs_s64(val);
 	if (val < 0)
 	{
 		var->sign = NUMERIC_NEG;
-		uval = -val;
 	}
 	else
 	{
 		var->sign = NUMERIC_POS;
-		uval = val;
 	}
 	var->dscale = 0;
 	if (val == 0)
@@ -11443,7 +11442,7 @@ power_var_int(const NumericVar *base, int exp, int exp_dscale,
 	 * Now we can proceed with the multiplications.
 	 */
 	neg = (exp < 0);
-	mask = abs(exp);
+	mask = pg_abs_s32(exp);
 
 	init_var(_prod);
 	set_var_from_var(base, _prod);
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index adc1e8a4cb..a3d7d6bf01 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 
+#include "common/int.h"
 #include "port/pg_bitutils.h"
 #include "utils/builtins.h"
 
@@ -131,6 +132,7 @@ pg_strtoint16_safe(const char *s, Node *escontext)
 	uint16		tmp = 0;
 	bool		neg = false;
 	unsigned char digit;
+	int16		result;
 
 	/*
 	 * The majority of cases are likely to be base-10 digits without any
@@ -190,10 +192,9 @@ pg_strtoint16_safe(const char *s, Node *escontext)
 
 	if (neg)
 	{
-		/* check the negative equivalent will fit without overflowing */
-		if (unlikely(tmp > (uint16) (-(PG_INT16_MIN + 1)) + 1))
+		if (pg_neg_u16_overflow(tmp, ))
 			goto out_of_range;
-		return -((int16) tmp);
+		return result;
 	}
 
 	if 

Re: Remove dependence on integer wrapping

2024-07-22 Thread Joseph Koshakow
On Mon, Jul 22, 2024 at 6:27 PM Nathan Bossart 
wrote:
>
> Actually, I think my concerns about prohibiting more than necessary go
away
> if we do the subtraction first.  If "upperIndx[i] - lowerIndx[i]"
> overflows, we know the array size is too big.  Similarly, if adding one to
> that result overflows, we again know the the array size is too big.  This
> appears to be how the surrounding code handles this problem (e.g.,
> ReadArrayDimensions()).  Thoughts?

I like that approach! It won't reject any valid bounds and is
consistent with the surrounding code. Also statements of the following
format will maintain the same error messages they had previously:

# INSERT INTO arroverflowtest(i[2147483646:2147483647]) VALUES
('{1,2}');
ERROR:  array lower bound is too large: 2147483646

The specific bug that this patch fixes is preventing the following
statement:

# INSERT INTO arroverflowtest(i[-2147483648:2147483647]) VALUES ('{1}');

So we may want to add that test back in.

Thanks,
Joseph Koshakow


Re: Remove dependence on integer wrapping

2024-07-22 Thread Joseph Koshakow
On Mon, Jul 22, 2024 at 11:17 AM Nathan Bossart 
wrote:
> On Fri, Jul 19, 2024 at 07:32:18PM -0400, Joseph Koshakow wrote:
>> On Fri, Jul 19, 2024 at 2:45 PM Nathan Bossart 
>> wrote:
>>> +/* dim[i] = 1 + upperIndx[i] - lowerIndx[i]; */
>>> +if (pg_add_s32_overflow(1, upperIndx[i], [i]))
>>> +ereport(ERROR,
>>> +(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
>>> + errmsg("array upper bound is too large: %d",
>>> +upperIndx[i])));
>>> +if (pg_sub_s32_overflow(dim[i], lowerIndx[i], [i]))
>>> +ereport(ERROR,
>>> +(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
>>> + errmsg("array size exceeds the maximum allowed
>> (%d)",
>>> +(int) MaxArraySize)));
>
> Am I understanding correctly that the main
> behavioral difference between these two approaches is that users will see
> different error messages?

Yes, you are understanding correctly. The approach written above will
have the error message "array upper bound is too large", while the
approach attached in patch
v13-0002-Remove-overflow-from-array_set_slice.patch will have the error
message "array lower bound is too large".

Thanks,
Joseph Koshakow


Re: Remove dependence on integer wrapping

2024-07-19 Thread Joseph Koshakow
On Fri, Jul 19, 2024 at 2:45 PM Nathan Bossart 
wrote:
>
> +/* dim[i] = 1 + upperIndx[i] - lowerIndx[i]; */
> +if (pg_add_s32_overflow(1, upperIndx[i], [i]))
> +ereport(ERROR,
> +(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
> + errmsg("array upper bound is too large: %d",
> +upperIndx[i])));
> +if (pg_sub_s32_overflow(dim[i], lowerIndx[i], [i]))
> +ereport(ERROR,
> +(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
> + errmsg("array size exceeds the maximum allowed
(%d)",
> +(int) MaxArraySize)));
>
> I think the problem with fixing it this way is that it prohibits more than
> is necessary.

My understanding is that 2147483647 (INT32_MAX) is not a valid upper
bound, which is what the first overflow check is checking. Any query of
the form
`INSERT INTO arroverflowtest(i[:2147483647]) VALUES ('{...}');`
will fail with an error of
`ERROR:  array lower bound is too large: `

The reason is the following bounds check found in arrayutils.c

/*
 * Verify sanity of proposed lower-bound values for an array
 *
 * The lower-bound values must not be so large as to cause overflow when
 * calculating subscripts, e.g. lower bound 2147483640 with length 10
 * must be disallowed.  We actually insist that dims[i] + lb[i] be
 * computable without overflow, meaning that an array with last
subscript
 * equal to INT_MAX will be disallowed.
 *
 * It is assumed that the caller already called ArrayGetNItems, so that
 * overflowed (negative) dims[] values have been eliminated.
 */
void
ArrayCheckBounds(int ndim, const int *dims, const int *lb)
{
(void) ArrayCheckBoundsSafe(ndim, dims, lb, NULL);
}

/*
 * This entry point can return the error into an ErrorSaveContext
 * instead of throwing an exception.
 */
bool
ArrayCheckBoundsSafe(int ndim, const int *dims, const int *lb,
struct Node *escontext)
{
int i;

for (i = 0; i < ndim; i++)
{
/* PG_USED_FOR_ASSERTS_ONLY prevents variable-isn't-read warnings */
int32 sum PG_USED_FOR_ASSERTS_ONLY;

if (pg_add_s32_overflow(dims[i], lb[i], ))
ereturn(escontext, false,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 errmsg("array lower bound is too large: %d",
lb[i])));
}

 return true;
}

Specifically "We actually insist that dims[i] + lb[i] be computable
without overflow, meaning that an array with last subscript equal to
INT32_MAX will be disallowed." If the upper bound is INT32_MAX,
then there's no lower bound where (lower_bound + size) won't overflow.

It might be possible to remove this restriction, but it's probably
easier to keep it.

> An easy way to deal with this problem is to first perform the calculation
> with everything cast to an int64.  Before setting dim[i], you'd check that
> the result is in [PG_INT32_MIN, PG_INT32_MAX] and fail if needed.
>
> int64 newdim;
>
> ...
>
> newdim = (int64) 1 + (int64) upperIndx[i] - (int64) lowerIndx[i];
> if (unlikely(newdim < PG_INT32_MIN || newdim > PG_INT32_MAX))
> ereport(ERROR,
> ...
> dim[i] = (int32) newdim;

I've rebased my patches and updated 0002 with this approach if this is
still the approach you want to go with. I went with the array size too
large error for similar reasons as the previous version of the patch.

Since the patches have been renumbered, here's an overview of their
status:

- 0001 is reviewed and waiting for v18.
- 0002 is under review and a bug fix.
- 0003 needs review.

Thanks,
Joseph Koshakow
From 6996c17b06d4b7c98130e5c70c368d08ea1ccc80 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 6 Jul 2024 14:35:00 -0400
Subject: [PATCH 2/3] Remove overflow from array_set_slice

This commit removes an overflow from array_set_slice that allows seting
absurd slice ranges.
---
 src/backend/utils/adt/arrayfuncs.c   | 10 +-
 src/test/regress/expected/arrays.out | 14 ++
 src/test/regress/sql/arrays.sql  |  9 +
 3 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index d6641b570d..e81aea4d19 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -2880,6 +2880,8 @@ array_set_slice(Datum arraydatum,
 
 		for (i = 0; i < nSubscripts; i++)
 		{
+			int64		newdim;
+
 			if (!upperProvided[i] || !lowerProvided[i])
 ereport(ERROR,
 		(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
@@ -2887,7 

Re: Remove dependence on integer wrapping

2024-07-18 Thread Joseph Koshakow
On Wed, Jul 17, 2024 at 9:31 PM jian he  wrote:
>
> i think "INSERT INTO arroverflowtest(i[2147483647:2147483647]) VALUES
('{}');"
> means to insert one element (size) to a customized lower/upper bounds.

Ah, thank you, I mistakenly understood that as an array with size
2147483647, with the first 2147483646 elements NULL.

I've updated the first calculation (upper_bound + 1) to retrun an error
saying "array upper bound is too large: %d" when it overflows. This
will change some of the existing error messages, but is just as correct
and doesn't require us to check the source array. Is there backwards
compatibility guarantees on error messages or is that acceptable?


For the second calculation ((upper_bound + 1) - lower_bound), I've kept the
existing error of "array size exceeds the maximum allowed (%d)". The
only way for that to underflow is if the upper bound is very negative
and the lower bound is very positive. I'm not entirely sure how to
interpret this scenario, but it's consistent with similar scenarios.

# INSERT INTO arroverflowtest(i[10:-99]) VALUES ('{1,2,3}');
ERROR:  array size exceeds the maximum allowed (134217727)

As a reminder:
- 0001 is reviewed.
- 0002 is reviewed and a bug fix.
- 0003 is currently under review and a bug fix.
- 0004 needs a review.

Thanks,
Joe Koshakow
From 6c47deeb39eb352d7888c9490613dcf18aee198b Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Thu, 13 Jun 2024 22:39:25 -0400
Subject: [PATCH 2/4] Handle overflow in money arithmetic

---
 src/backend/utils/adt/cash.c| 174 +++-
 src/test/regress/expected/money.out | 106 +
 src/test/regress/sql/money.sql  |  47 
 3 files changed, 247 insertions(+), 80 deletions(-)

diff --git a/src/backend/utils/adt/cash.c b/src/backend/utils/adt/cash.c
index f6f095a57b..52687dbf7b 100644
--- a/src/backend/utils/adt/cash.c
+++ b/src/backend/utils/adt/cash.c
@@ -26,6 +26,7 @@
 #include "libpq/pqformat.h"
 #include "utils/builtins.h"
 #include "utils/cash.h"
+#include "utils/float.h"
 #include "utils/numeric.h"
 #include "utils/pg_locale.h"
 
@@ -86,6 +87,82 @@ num_word(Cash value)
 	return buf;
 }/* num_word() */
 
+static inline Cash
+cash_pl_cash(Cash c1, Cash c2)
+{
+	Cash		res;
+
+	if (unlikely(pg_add_s64_overflow(c1, c2, )))
+		ereport(ERROR,
+(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("money out of range")));
+
+	return res;
+}
+
+static inline Cash
+cash_mi_cash(Cash c1, Cash c2)
+{
+	Cash		res;
+
+	if (unlikely(pg_sub_s64_overflow(c1, c2, )))
+		ereport(ERROR,
+(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("money out of range")));
+
+	return res;
+}
+
+static inline Cash
+cash_mul_float8(Cash c, float8 f)
+{
+	float8		res = rint(float8_mul((float8) c, f));
+
+	if (unlikely(isnan(res) || !FLOAT8_FITS_IN_INT64(res)))
+		ereport(ERROR,
+(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("money out of range")));
+
+	return (Cash) res;
+}
+
+static inline Cash
+cash_div_float8(Cash c, float8 f)
+{
+	float8		res = rint(float8_div((float8) c, f));
+
+	if (unlikely(isnan(res) || !FLOAT8_FITS_IN_INT64(res)))
+		ereport(ERROR,
+(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("money out of range")));
+
+	return (Cash) res;
+}
+
+static inline Cash
+cash_mul_int64(Cash c, int64 i)
+{
+	Cash		res;
+
+	if (unlikely(pg_mul_s64_overflow(c, i, )))
+		ereport(ERROR,
+(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("money out of range")));
+
+	return res;
+}
+
+static inline Cash
+cash_div_int64(Cash c, int64 i)
+{
+	if (unlikely(i == 0))
+		ereport(ERROR,
+(errcode(ERRCODE_DIVISION_BY_ZERO),
+ errmsg("division by zero")));
+
+	return c / i;
+}
+
 /* cash_in()
  * Convert a string to a cash data type.
  * Format is [$]###[,]###[.##]
@@ -615,11 +692,8 @@ cash_pl(PG_FUNCTION_ARGS)
 {
 	Cash		c1 = PG_GETARG_CASH(0);
 	Cash		c2 = PG_GETARG_CASH(1);
-	Cash		result;
-
-	result = c1 + c2;
 
-	PG_RETURN_CASH(result);
+	PG_RETURN_CASH(cash_pl_cash(c1, c2));
 }
 
 
@@ -631,11 +705,8 @@ cash_mi(PG_FUNCTION_ARGS)
 {
 	Cash		c1 = PG_GETARG_CASH(0);
 	Cash		c2 = PG_GETARG_CASH(1);
-	Cash		result;
-
-	result = c1 - c2;
 
-	PG_RETURN_CASH(result);
+	PG_RETURN_CASH(cash_mi_cash(c1, c2));
 }
 
 
@@ -667,10 +738,8 @@ cash_mul_flt8(PG_FUNCTION_ARGS)
 {
 	Cash		c = PG_GETARG_CASH(0);
 	float8		f = PG_GETARG_FLOAT8(1);
-	Cash		result;
 
-	result = rint(c * f);
-	PG_RETURN_CASH(result);
+	PG_RETURN_CASH(cash_mul_float8(c, f));
 }
 
 
@@ -682,10 +751,8 @@ flt8_mul_cash(PG_FUNCTION_ARGS)
 {
 	float8		f = PG_GETARG_FLOAT8(0);
 	Cash		c = PG_GETARG_CASH(1);
-	Cash		result;
 
-	result = rint(f * c);
-	PG_RETURN_CASH(result);
+	PG_RETURN_CASH(cash_mul_float8(c, f));
 }
 
 
@@ -697,15 +764,8 @@ cash_div_flt8(PG_FUNCTION_ARGS)
 {
 	Cash		c = PG_GETARG_

Re: Remove dependence on integer wrapping

2024-07-17 Thread Joseph Koshakow
On Tue, Jul 16, 2024 at 11:17 PM Nathan Bossart 
wrote:
> I've attached an editorialized version of 0002 based on my thoughts above.

Looks great, thanks!

Thanks,
Joe Koshakow


Re: Remove dependence on integer wrapping

2024-07-16 Thread Joseph Koshakow
On Tue, Jul 16, 2024 at 1:57 PM Nathan Bossart 
wrote:
>>
>>On Mon, Jul 15, 2024 at 07:55:22PM -0400, Joseph Koshakow wrote:
>> On Mon, Jul 15, 2024 at 11:31 AM Nathan Bossart 
>> wrote:
>>>I'm curious why you aren't using float8_mul/float8_div here, i.e.,
>>>
>>>fresult = rint(float8_mul((float8) c, f));
>>>fresult = rint(float8_div((float8) c, f));
>>
>> I wrongly assumed that it was only meant to be used to implement
>> multiplication and division for the built-in float types. I've updated
>> the patch to use these functions.
>
> The reason I suggested this is so that we could omit all the prerequisite
> isinf(), isnan(), etc. checks in the cash_mul_float8() and friends.  The
> checks are slighly different, but from a quick glance it just looks like
we
> might end up relying on the FLOAT8_FITS_IN_INT64 check in more cases.

I don't think we can omit the prerequisite isnan() checks. Neither
float8_mul() nor float8_div() reject nan inputs/result, and
FLOAT8_FITS_IN_INT64 has the following to say about inf and nan

These macros will do the right thing for Inf, but not necessarily for
NaN,
so check isnan(num) first if that's a possibility.

Though, I think you're right that we can remove the isinf() check from
cash_mul_float8(). That check is fully covered by FLOAT8_FITS_IN_INT64,
since all infinite inputs will result in an infinite output. That also
makes the infinite result check in float8_mul() redundant.
Additionally, I believe that the underflow check in float8_mul() is
unnecessary. val1 is an int64 casted to a float8, so it can never be
-1 < val < 1, so it can never cause an underflow to 0. So I went ahead
and removed float8_mul() since all of its checks are redundant.

For cash_div_float8() we have a choice. The isinf() input check
protects against the following, which is not rejected by any of
the other checks.

test=# SELECT '5'::money / 'inf'::float8;
 ?column?
--
$0.00
(1 row)

For now, I've kept the isinf() input check to reject the above query,
let me know if you think we should allow this.

The infinite check in float8_div() is redundant because it's covered
by FLOAT8_FITS_IN_INT64. Also, the underflow check in float8_div() is
unnecessary for similar reasons to float8_mul(). So if we continue to
have a divide by zero check in cash_div_float8(), then we can remove
float8_div() as well.

>>>and "cash_sub_int64"
>>
>> Did you mean "cash_div_int64"? There's only a single function that
>> subtracts cash and an integer, but there's multiple functions that
>> divide cash by an integer. I've added a "cash_div_int64" in the updated
>> patch.
>
> My personal preference would be to add helper functions for each of these
> so that all the overflow, etc. checks are centralized in one place and
> don't clutter the calling code.  Plus, it might help ensure error
> handling/messages remain consistent.

Ah, OK. I've added helpers for both subtraction and addition then.

> +static Cash
> +cash_mul_float8(Cash c, float8 f)
>
> nitpick: Can you mark these "inline"?  I imagine most compilers inline
them
> without any prompting, but we might as well make our intent clear.

Updated in the attached patch.

Once again, the other patches, 0001, 0003, and 0004 are unchanged but
have their version number incremented.

Thanks,
Joe Koshakow
From 176c3b4c7b6f3cf184390828f1141b26107b6fb2 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 6 Jul 2024 14:35:00 -0400
Subject: [PATCH 3/4] Remove overflow from array_set_slice

This commit removes an overflow from array_set_slice that allows seting
absurd slice ranges.
---
 src/backend/utils/adt/arrayfuncs.c   | 8 +++-
 src/test/regress/expected/arrays.out | 8 
 src/test/regress/sql/arrays.sql  | 6 ++
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index d6641b570d..95e027e9ea 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -2887,7 +2887,13 @@ array_set_slice(Datum arraydatum,
 		 errdetail("When assigning to a slice of an empty array value,"
    " slice boundaries must be fully specified.")));
 
-			dim[i] = 1 + upperIndx[i] - lowerIndx[i];
+			/* dim[i] = 1 + upperIndx[i] - lowerIndx[i]; */
+			if (pg_add_s32_overflow(1, upperIndx[i], [i]) ||
+pg_sub_s32_overflow(dim[i], lowerIndx[i], [i]))
+ereport(ERROR,
+		(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+		 errmsg("array size exceeds the maximum allowed (%d)",
+(int) MaxArraySize)));
 			lb[i] = lowerIndx[i];
 		}
 
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index 2340

Re: Remove dependence on integer wrapping

2024-07-15 Thread Joseph Koshakow
Thanks for the review!

On Mon, Jul 15, 2024 at 11:31 AM Nathan Bossart 
wrote:
>
>I took a closer look at 0002.
>
>I'm curious why you aren't using float8_mul/float8_div here, i.e.,
>
>fresult = rint(float8_mul((float8) c, f));
>fresult = rint(float8_div((float8) c, f));

I wrongly assumed that it was only meant to be used to implement
multiplication and division for the built-in float types. I've updated
the patch to use these functions.

>nitpick: I'd name the functions something like "cash_mul_float8" and
>"cash_div_float8". Perhaps we could also add functions like
>"cash_mul_int64"

Done in the updated patch.

>and "cash_sub_int64"

Did you mean "cash_div_int64"? There's only a single function that
subtracts cash and an integer, but there's multiple functions that
divide cash by an integer. I've added a "cash_div_int64" in the updated
patch.

The other patches, 0001, 0003, and 0004 are unchanged but have their
version number incremented.

Thanks,
Joe Koshakow
From 018d952a44d51fb9e0a186003556aebb69a66217 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 6 Jul 2024 14:35:00 -0400
Subject: [PATCH 3/4] Remove overflow from array_set_slice

This commit removes an overflow from array_set_slice that allows seting
absurd slice ranges.
---
 src/backend/utils/adt/arrayfuncs.c   | 8 +++-
 src/test/regress/expected/arrays.out | 8 
 src/test/regress/sql/arrays.sql  | 6 ++
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index d6641b570d..95e027e9ea 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -2887,7 +2887,13 @@ array_set_slice(Datum arraydatum,
 		 errdetail("When assigning to a slice of an empty array value,"
    " slice boundaries must be fully specified.")));
 
-			dim[i] = 1 + upperIndx[i] - lowerIndx[i];
+			/* dim[i] = 1 + upperIndx[i] - lowerIndx[i]; */
+			if (pg_add_s32_overflow(1, upperIndx[i], [i]) ||
+pg_sub_s32_overflow(dim[i], lowerIndx[i], [i]))
+ereport(ERROR,
+		(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+		 errmsg("array size exceeds the maximum allowed (%d)",
+(int) MaxArraySize)));
 			lb[i] = lowerIndx[i];
 		}
 
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index 23404982f7..a2382387e2 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2699,3 +2699,11 @@ SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail
 ERROR:  sample size must be between 0 and 6
 SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail
 ERROR:  sample size must be between 0 and 6
+-- Test for overflow in array slicing
+CREATE temp table arroverflowtest (i int[]);
+INSERT INTO arroverflowtest(i[-2147483648:2147483647]) VALUES ('{}');
+ERROR:  array size exceeds the maximum allowed (134217727)
+INSERT INTO arroverflowtest(i[1:2147483647]) VALUES ('{}');
+ERROR:  array size exceeds the maximum allowed (134217727)
+INSERT INTO arroverflowtest(i[2147483647:2147483647]) VALUES ('{}');
+ERROR:  array size exceeds the maximum allowed (134217727)
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 50aa539fdc..e9d6737117 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -825,3 +825,9 @@ SELECT array_dims(array_sample('[-1:2][2:3]={{1,2},{3,NULL},{5,6},{7,8}}'::int[]
 SELECT array_dims(array_sample('{{{1,2},{3,NULL}},{{5,6},{7,8}},{{9,10},{11,12}}}'::int[], 2));
 SELECT array_sample('{1,2,3,4,5,6}'::int[], -1); -- fail
 SELECT array_sample('{1,2,3,4,5,6}'::int[], 7); --fail
+
+-- Test for overflow in array slicing
+CREATE temp table arroverflowtest (i int[]);
+INSERT INTO arroverflowtest(i[-2147483648:2147483647]) VALUES ('{}');
+INSERT INTO arroverflowtest(i[1:2147483647]) VALUES ('{}');
+INSERT INTO arroverflowtest(i[2147483647:2147483647]) VALUES ('{}');
-- 
2.34.1

From 950da2c4ce85632f6085680c3ed7b75fb1f780f7 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 8 Jun 2024 22:16:46 -0400
Subject: [PATCH 1/4] Remove dependence on integer wrapping

This commit updates various parts of the code to no longer rely on
integer wrapping for correctness. Not all compilers support -fwrapv, so
it's best not to rely on it.
---
 src/backend/utils/adt/cash.c   |   7 +-
 src/backend/utils/adt/numeric.c|   5 +-
 src/backend/utils/adt/numutils.c   |  34 ---
 src/backend/utils/adt/timestamp.c  |  28 +-
 src/include/common/int.h   | 105 +
 src/interfaces/ecpg/pgtypeslib/timestamp.c |  11 +--
 src/test/regress/expected/timestamp.out|  13 +++
 src/test/regress/expected/timestamptz.out  |  13 +++
 src/test/regress/sql/timestamp

Re: Remove dependence on integer wrapping

2024-07-13 Thread Joseph Koshakow
On Fri, Jul 12, 2024 at 12:49 PM Nathan Bossart 
wrote:

> On Sat, Jul 06, 2024 at 07:04:38PM -0400, Joseph Koshakow wrote:
>> I've added another patch, 0004, to resolve the jsonb wrap-arounds.
>>
>> The other patches, 0001, 0002, and 0003 are unchanged but have their
>> version number incremented.
>
> IIUC some of these changes are bug fixes.  Can we split out the bug fixes
> to their own patches so that they can be back-patched?

They happen to already be split out into their own patches. 0002 and
0003 are both bug fixes (in the sense that they fix queries that
produce incorrect results even with -fwrapv). They also both apply
cleanly to master. If it would be useful, I can re-order the patches so
that the bug-fixes are first.

Thanks,
Joe Koshakow


Re: Wrong security context for deferred triggers?

2024-07-07 Thread Joseph Koshakow
On Mon, Jul 1, 2024 at 11:45 AM Laurenz Albe 
wrote:

> I asked them for a statement, and they were nice enough to write up
> https://postgr.es/m/e89e8dd9-7143-4db8-ac19-b2951cb0c0da%40gmail.com

> They have a workaround, so the patch is not absolutely necessary for them.

It sounds like the issue is that there is there is a constraint
trigger to check a table constraint that must be executed at commit
time, and we'd like to guarantee that if the triggering action was
successful, then the constraint check is also successful. This is an
even bigger issue for transactions that have multiple of these
constraint checks where there may be no single role that has the
privileges required to execute all checks.

Your patch would fix the issue in a majority of cases, but not all.
Since INSERT, UPDATE, DELETE privileges don't necessarily imply SELECT
privileges, the role that modifies a table doesn't necessarily have the
privileges required to check the constraints. It sounds like creating
the constraint check triggers as a security definer function, with a
role that has SELECT privileges, is the more complete solution rather
than a workaround.

Given the above and the fact that the patch is a breaking change, my
vote would still be to keep the current behavior and update the
documentation. Though I'd be happy to be overruled by someone with more
knowledge of triggers.

Thanks,
Joe Koshakow


Re: Remove dependence on integer wrapping

2024-07-06 Thread Joseph Koshakow
On Thu, Jun 13, 2024 at 12:00 AM Alexander Lakhin 
wrote:
> SELECT '[]'::jsonb -> -2147483648;
>
> #4  0x7efe232d67f3 in __GI_abort () at ./stdlib/abort.c:79
> #5  0x55e8fde9f211 in __negvsi2 ()
> #6  0x55e8fdcca62c in jsonb_array_element (fcinfo=0x55e8fec28220) at
jsonfuncs.c:948
>
> (gdb) f 6
> #6  0x55e14cb9362c in jsonb_array_element (fcinfo=0x55e14d493220) at
jsonfuncs.c:948
> 948 if (-element > nelements)
> (gdb) p element
> $1 = -2147483648
>
> ---
> SELECT jsonb_delete_path('{"a":[]}', '{"a",-2147483648}');
>
> #4  0x7f1873bef7f3 in __GI_abort () at ./stdlib/abort.c:79
> #5  0x564a009d2211 in __negvsi2 ()
> #6  0x564a00807c89 in setPathArray (it=0x7fff865c7380,
path_elems=0x564a017baf20, path_nulls=0x564a017baf40,
>  path_len=2, st=0x7fff865c7388, level=1, newval=0x0, nelems=2,
op_type=2) at jsonfuncs.c:5407
>
> (gdb) f 6
> #6  0x55985e823c89 in setPathArray (it=0x7ffc22258fe0,
path_elems=0x559860286f20, path_nulls=0x559860286f40,
>  path_len=2, st=0x7ffc22258fe8, level=1, newval=0x0, nelems=0,
op_type=2) at jsonfuncs.c:5407
> 5407if (-idx > nelems)
> (gdb) p idx
> $1 = -2147483648

I've added another patch, 0004, to resolve the jsonb wrap-arounds.

The other patches, 0001, 0002, and 0003 are unchanged but have their
version number incremented.

Thanks,
Joe Koshakow
From c8725de5f6e1ed476992c33f87b7e7c9475a952b Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 6 Jul 2024 15:41:09 -0400
Subject: [PATCH 4/4] Remove dependence on integer wrapping for jsonb

This commit updates various jsonb operators and functions to no longer
rely on integer wrapping for correctness. Not all compilers support
-fwrapv, so it's best not to rely on it.
---
 src/backend/utils/adt/jsonfuncs.c   |  4 ++--
 src/test/regress/expected/jsonb.out | 12 
 src/test/regress/sql/jsonb.sql  |  2 ++
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index 48c3f88140..8783c57303 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -946,7 +946,7 @@ jsonb_array_element(PG_FUNCTION_ARGS)
 	{
 		uint32		nelements = JB_ROOT_COUNT(jb);
 
-		if (-element > nelements)
+		if (element == PG_INT32_MIN || -element > nelements)
 			PG_RETURN_NULL();
 		else
 			element += nelements;
@@ -5425,7 +5425,7 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls,
 
 	if (idx < 0)
 	{
-		if (-idx > nelems)
+		if (idx == INT_MIN || -idx > nelems)
 		{
 			/*
 			 * If asked to keep elements position consistent, it's not allowed
diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out
index e66d760189..a9d93052fc 100644
--- a/src/test/regress/expected/jsonb.out
+++ b/src/test/regress/expected/jsonb.out
@@ -680,6 +680,18 @@ select '"foo"'::jsonb -> 'z';
  
 (1 row)
 
+select '[]'::jsonb -> -2147483648;
+ ?column? 
+--
+ 
+(1 row)
+
+select jsonb_delete_path('{"a":[]}', '{"a",-2147483648}');
+ jsonb_delete_path 
+---
+ {"a": []}
+(1 row)
+
 select '{"a": [{"b": "c"}, {"b": "cc"}]}'::jsonb ->> null::text;
  ?column? 
 --
diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql
index 97bc2242a1..6a18577ead 100644
--- a/src/test/regress/sql/jsonb.sql
+++ b/src/test/regress/sql/jsonb.sql
@@ -204,6 +204,8 @@ select '[{"b": "c"}, {"b": "cc"}]'::jsonb -> 'z';
 select '{"a": "c", "b": null}'::jsonb -> 'b';
 select '"foo"'::jsonb -> 1;
 select '"foo"'::jsonb -> 'z';
+select '[]'::jsonb -> -2147483648;
+select jsonb_delete_path('{"a":[]}', '{"a",-2147483648}');
 
 select '{"a": [{"b": "c"}, {"b": "cc"}]}'::jsonb ->> null::text;
 select '{"a": [{"b": "c"}, {"b": "cc"}]}'::jsonb ->> null::int;
-- 
2.34.1

From f3747da14c887f97e50d9a3104881cbd3d5f60de Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 8 Jun 2024 22:16:46 -0400
Subject: [PATCH 1/4] Remove dependence on integer wrapping

This commit updates various parts of the code to no longer rely on
integer wrapping for correctness. Not all compilers support -fwrapv, so
it's best not to rely on it.
---
 src/backend/utils/adt/cash.c   |   7 +-
 src/backend/utils/adt/numeric.c|   5 +-
 src/backend/utils/adt/numutils.c   |  34 ---
 src/backend/utils/adt/timestamp.c  |  28 +-
 src/include/common/int.h   | 105 +
 src/interfaces/ecpg/pgtypeslib/timestamp.c |  1

Re: Remove dependence on integer wrapping

2024-07-06 Thread Joseph Koshakow
On Thu, Jun 13, 2024 at 12:00 AM Alexander Lakhin 
wrote:
>
> And one more with array...
> CREATE TABLE t (ia int[]);
> INSERT INTO t(ia[2147483647:2147483647]) VALUES ('{}');

I've added another patch, 0003, to resolve this wrap-around. In fact I
discovered a bug that the following statement is accepted and inserts
an empty array into the table.

INSERT INTO t(ia[-2147483648:2147483647]) VALUES ('{}');

My patch resolves this bug as well.

The other patches, 0001 and 0002, are unchanged but have their version
number incremented.

As a reminder, 0001 is reviewed and waiting for v18 and a committer.
0002 and 0003 are unreviewed. So, I'm going to mark this as waiting for
a reviewer.

Thanks,
Joe Koshakow
From f3747da14c887f97e50d9a3104881cbd3d5f60de Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 8 Jun 2024 22:16:46 -0400
Subject: [PATCH 1/3] Remove dependence on integer wrapping

This commit updates various parts of the code to no longer rely on
integer wrapping for correctness. Not all compilers support -fwrapv, so
it's best not to rely on it.
---
 src/backend/utils/adt/cash.c   |   7 +-
 src/backend/utils/adt/numeric.c|   5 +-
 src/backend/utils/adt/numutils.c   |  34 ---
 src/backend/utils/adt/timestamp.c  |  28 +-
 src/include/common/int.h   | 105 +
 src/interfaces/ecpg/pgtypeslib/timestamp.c |  11 +--
 src/test/regress/expected/timestamp.out|  13 +++
 src/test/regress/expected/timestamptz.out  |  13 +++
 src/test/regress/sql/timestamp.sql |   4 +
 src/test/regress/sql/timestamptz.sql   |   4 +
 10 files changed, 169 insertions(+), 55 deletions(-)

diff --git a/src/backend/utils/adt/cash.c b/src/backend/utils/adt/cash.c
index 32fbad2f57..f6f095a57b 100644
--- a/src/backend/utils/adt/cash.c
+++ b/src/backend/utils/adt/cash.c
@@ -352,8 +352,11 @@ cash_out(PG_FUNCTION_ARGS)
 
 	if (value < 0)
 	{
-		/* make the amount positive for digit-reconstruction loop */
-		value = -value;
+		/*
+		 * make the amount positive for digit-reconstruction loop, we can
+		 * leave INT64_MIN unchanged
+		 */
+		pg_neg_s64_overflow(value, );
 		/* set up formatting data */
 		signsymbol = (*lconvert->negative_sign != '\0') ? lconvert->negative_sign : "-";
 		sign_posn = lconvert->n_sign_posn;
diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c
index 5510a203b0..4ea2d9b0b4 100644
--- a/src/backend/utils/adt/numeric.c
+++ b/src/backend/utils/adt/numeric.c
@@ -8110,15 +8110,14 @@ int64_to_numericvar(int64 val, NumericVar *var)
 
 	/* int64 can require at most 19 decimal digits; add one for safety */
 	alloc_var(var, 20 / DEC_DIGITS);
+	uval = pg_abs_s64(val);
 	if (val < 0)
 	{
 		var->sign = NUMERIC_NEG;
-		uval = -val;
 	}
 	else
 	{
 		var->sign = NUMERIC_POS;
-		uval = val;
 	}
 	var->dscale = 0;
 	if (val == 0)
@@ -11222,7 +11221,7 @@ power_var_int(const NumericVar *base, int exp, int exp_dscale,
 	 * Now we can proceed with the multiplications.
 	 */
 	neg = (exp < 0);
-	mask = abs(exp);
+	mask = pg_abs_s32(exp);
 
 	init_var(_prod);
 	set_var_from_var(base, _prod);
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index adc1e8a4cb..a3d7d6bf01 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 
+#include "common/int.h"
 #include "port/pg_bitutils.h"
 #include "utils/builtins.h"
 
@@ -131,6 +132,7 @@ pg_strtoint16_safe(const char *s, Node *escontext)
 	uint16		tmp = 0;
 	bool		neg = false;
 	unsigned char digit;
+	int16		result;
 
 	/*
 	 * The majority of cases are likely to be base-10 digits without any
@@ -190,10 +192,9 @@ pg_strtoint16_safe(const char *s, Node *escontext)
 
 	if (neg)
 	{
-		/* check the negative equivalent will fit without overflowing */
-		if (unlikely(tmp > (uint16) (-(PG_INT16_MIN + 1)) + 1))
+		if (pg_neg_u16_overflow(tmp, ))
 			goto out_of_range;
-		return -((int16) tmp);
+		return result;
 	}
 
 	if (unlikely(tmp > PG_INT16_MAX))
@@ -333,10 +334,9 @@ slow:
 
 	if (neg)
 	{
-		/* check the negative equivalent will fit without overflowing */
-		if (tmp > (uint16) (-(PG_INT16_MIN + 1)) + 1)
+		if (pg_neg_u16_overflow(tmp, ))
 			goto out_of_range;
-		return -((int16) tmp);
+		return result;
 	}
 
 	if (tmp > PG_INT16_MAX)
@@ -393,6 +393,7 @@ pg_strtoint32_safe(const char *s, Node *escontext)
 	uint32		tmp = 0;
 	bool		neg = false;
 	unsigned char digit;
+	int32		result;
 
 	/*
 	 * The majority of cases are likely to be base-10 digits without any
@@ -452,10 +453,9 @@ pg_strtoint32_safe(const char *s, Node *escontext)
 
 	if (neg)
 	{
-		/* check the negative equivalent will fit without overflowing */
-		if (unlikely(tmp > (uint32) (-(PG_INT32_MIN + 1)) + 1))
+		if (pg_neg_u32_overflow(tmp, ))
 			goto out_of_range;
-		return -((int32) tmp);
+		return result;
 	}

Re: Wrong security context for deferred triggers?

2024-06-22 Thread Joseph Koshakow
On Sat, Jun 22, 2024 at 6:23 PM David G. Johnston <
david.g.johns...@gmail.com> wrote:

> except invoker and triggerer are the same entity

Maybe "executor" would have been a better term than 'invoker". In this
specific example they are not the same entity. The trigger is
triggered and queued by one role and executed by a different role,
hence the confusion. Though I agree with Laurenz, special SQL syntax
for this exotic corner case is a little too much.

> Security definer on the function would take precedence as would its set
clause.

These trigger options seem a bit redundant with the equivalent options
on the function that is executed by the trigger. What would be the
advantages or differences of setting these options on the trigger
versus the function?

Thanks,
Joe Koshakow


Re: Wrong security context for deferred triggers?

2024-06-22 Thread Joseph Koshakow
On Mon, Jun 10, 2024 at 1:00 PM Laurenz Albe 
wrote:

>Like you, I was surprised by the current behavior.  There is a design
>principle that PostgreSQL tries to follow, called the "Principle of
>least astonishment".  Things should behave like a moderately skilled
>user would expect them to.  In my opinion, the current behavior
violates
>that principle.  Tomas seems to agree with that point of view.

I worry that both approaches violate this principle in different ways.
For example consider the following sequence of events:

SET ROLE r1;
BEGIN;
SET CONSTRAINTS ALL DEFERRED;
INSERT INTO ...;
SET ROLE r2;
SET search_path = '...';
COMMIT;

I think that it would be reasonable to expect that the triggers execute
with r2 and not r1, since the triggers were explicitly deferred and the
role was explicitly set. It would likely be surprising that the search
path was updated for the trigger but not the role. With your proposed
approach it would be impossible for someone to trigger a trigger with
one role and execute it with another, if that's a desirable feature.

>I didn't find this strange behavior myself: it was one of our customers
>who uses security definer functions for data modifications and has
>problems with the current behavior, and I am trying to improve the
>situation on their behalf.

Would it be possible to share more details about this use case? For
example, What are their current problems? Are they not able to set
constraints to immediate? Or can they update the trigger function
itself be a security definer function? That might help illuminate why
the current behavior is wrong.

>But I feel that the database user that runs the trigger should be the
>same user that ran the triggering SQL statement.  Even though I cannot
>put my hand on a case where changing this user would constitute a real
>security problem, it feels wrong.
>
>I am aware that that is rather squishy argumentation, but I have no
>better one.  Both my and Thomas' gut reaction seems to have been "the
>current behavior is wrong".

I understand the gut reaction, and I even have the same gut reaction,
but since we would be treating roles exceptionally compared to the rest
of the execution context, I would feel better if we had a more concrete
reason.

I also took a look at the code. It doesn't apply cleanly to master, so
I took the liberty of rebasing and attaching it.

> + /*
> + * The role could have been dropped since the trigger was queued.
> + * In that case, give up and error out.
> + */
> + pfree(GetUserNameFromId(evtshared->ats_rolid, false));

It feels a bit wasteful to allocate and copy the role name when we
never actually use it. Is it possible to check that the role exists
without copying the name?

Everything else looked good, and the code does what it says it will.

Thanks,
Joe Koshakow
From f5de4ea29d0f78549618c23db5951120218af203 Mon Sep 17 00:00:00 2001
From: Laurenz Albe 
Date: Wed, 6 Mar 2024 14:09:43 +0100
Subject: [PATCH] Make AFTER triggers run with the correct user

With deferred triggers, it is possible that the current role changes
between the time when the trigger is queued and the time it is
executed (for example, the triggering data modification could have been
executed in a SECURITY DEFINER function).

Up to now, deferred trigger functions would run with the current role
set to whatever was active at commit time.  That does not matter for
regular constraints, whose correctness doesn't depend on the current
role.  But for user-written contraint triggers, the current role
certainly matters.

Security considerations:
- The trigger function could be modified between the time the trigger
  is queued and the time it runs.  If the trigger was executed by a
  privileged user, the new behavior could be used for privilege
  escalation.  But if a privileged user executes DML on a table owned
  by an untrusted user, all bets are off anyway --- the malicious code
  could as well be in the trigger function from the beginning.
  So we don't consider this a security hazard.
- The previous behavior could lead to code inadvertently running with
  elevated privileges if a privileged user temporarily assumes lower
  privileges while executing DML on an untrusted table, but the deferred
  trigger runs with the user's original privileges.  However, that only
  applies if the privileged user commits *after* resuming the original
  role.  Should this be backpatched as a security bug?

Author: Laurenz Albe
Discussion: https://postgr.es/m/77ee784cf248e842f74588418f55c2931e47bd78.camel%40cybertec.at
---
 src/backend/commands/trigger.c | 23 
 src/test/regress/expected/triggers.out | 81 ++
 src/test/regress/sql/triggers.sql  | 75 
 3 files changed, 179 insertions(+)

diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index 58b7fc5bbd..69d583751a 100644
--- 

Re: Remove dependence on integer wrapping

2024-06-19 Thread Joseph Koshakow
On Thu, Jun 13, 2024 at 10:56 PM Joseph Koshakow  wrote:

On Thu, Jun 13, 2024 at 10:48 PM Joseph Koshakow 
wrote:
>I've attached
>v4-0002-Handle-overflow-in-money-arithmetic.patch which adds some
>overflow checks and tests. I didn't address the float
multiplication
>because I didn't see any helper methods in int.h. I did some some
>useful helpers in float.h, but they raise an error directly instead
>of returning a bool. Would those be appropriate for use with the
>money type? If not I can refactor out the inner parts into a new
method
>that returns a bool.

>v4-0001-Remove-dependence-on-integer-wrapping.patch is unchanged, I
>just incremented the version number.

I added overflow handling for float arithmetic to the `money` type.
v6-0002-Handle-overflow-in-money-arithmetic.patch is ready for review.

v6-0001-Remove-dependence-on-integer-wrapping.patch is unchanged, I
just incremented the version number.

Thanks,
Joe Koshakow
From 6eec604618ee76227ee33fcddcc121d9915ff0ab Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 8 Jun 2024 22:16:46 -0400
Subject: [PATCH 1/2] Remove dependence on integer wrapping

This commit updates various parts of the code to no longer rely on
integer wrapping for correctness. Not all compilers support -fwrapv, so
it's best not to rely on it.
---
 src/backend/utils/adt/cash.c   |   7 +-
 src/backend/utils/adt/numeric.c|   5 +-
 src/backend/utils/adt/numutils.c   |  34 ---
 src/backend/utils/adt/timestamp.c  |  28 +-
 src/include/common/int.h   | 105 +
 src/interfaces/ecpg/pgtypeslib/timestamp.c |  11 +--
 src/test/regress/expected/timestamp.out|  13 +++
 src/test/regress/expected/timestamptz.out  |  13 +++
 src/test/regress/sql/timestamp.sql |   4 +
 src/test/regress/sql/timestamptz.sql   |   4 +
 10 files changed, 169 insertions(+), 55 deletions(-)

diff --git a/src/backend/utils/adt/cash.c b/src/backend/utils/adt/cash.c
index 32fbad2f57..f6f095a57b 100644
--- a/src/backend/utils/adt/cash.c
+++ b/src/backend/utils/adt/cash.c
@@ -352,8 +352,11 @@ cash_out(PG_FUNCTION_ARGS)
 
 	if (value < 0)
 	{
-		/* make the amount positive for digit-reconstruction loop */
-		value = -value;
+		/*
+		 * make the amount positive for digit-reconstruction loop, we can
+		 * leave INT64_MIN unchanged
+		 */
+		pg_neg_s64_overflow(value, );
 		/* set up formatting data */
 		signsymbol = (*lconvert->negative_sign != '\0') ? lconvert->negative_sign : "-";
 		sign_posn = lconvert->n_sign_posn;
diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c
index 5510a203b0..4ea2d9b0b4 100644
--- a/src/backend/utils/adt/numeric.c
+++ b/src/backend/utils/adt/numeric.c
@@ -8110,15 +8110,14 @@ int64_to_numericvar(int64 val, NumericVar *var)
 
 	/* int64 can require at most 19 decimal digits; add one for safety */
 	alloc_var(var, 20 / DEC_DIGITS);
+	uval = pg_abs_s64(val);
 	if (val < 0)
 	{
 		var->sign = NUMERIC_NEG;
-		uval = -val;
 	}
 	else
 	{
 		var->sign = NUMERIC_POS;
-		uval = val;
 	}
 	var->dscale = 0;
 	if (val == 0)
@@ -11222,7 +11221,7 @@ power_var_int(const NumericVar *base, int exp, int exp_dscale,
 	 * Now we can proceed with the multiplications.
 	 */
 	neg = (exp < 0);
-	mask = abs(exp);
+	mask = pg_abs_s32(exp);
 
 	init_var(_prod);
 	set_var_from_var(base, _prod);
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index adc1e8a4cb..a3d7d6bf01 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 
+#include "common/int.h"
 #include "port/pg_bitutils.h"
 #include "utils/builtins.h"
 
@@ -131,6 +132,7 @@ pg_strtoint16_safe(const char *s, Node *escontext)
 	uint16		tmp = 0;
 	bool		neg = false;
 	unsigned char digit;
+	int16		result;
 
 	/*
 	 * The majority of cases are likely to be base-10 digits without any
@@ -190,10 +192,9 @@ pg_strtoint16_safe(const char *s, Node *escontext)
 
 	if (neg)
 	{
-		/* check the negative equivalent will fit without overflowing */
-		if (unlikely(tmp > (uint16) (-(PG_INT16_MIN + 1)) + 1))
+		if (pg_neg_u16_overflow(tmp, ))
 			goto out_of_range;
-		return -((int16) tmp);
+		return result;
 	}
 
 	if (unlikely(tmp > PG_INT16_MAX))
@@ -333,10 +334,9 @@ slow:
 
 	if (neg)
 	{
-		/* check the negative equivalent will fit without overflowing */
-		if (tmp > (uint16) (-(PG_INT16_MIN + 1)) + 1)
+		if (pg_neg_u16_overflow(tmp, ))
 			goto out_of_range;
-		return -((int16) tmp);
+		return result;
 	}
 
 	if (tmp > PG_INT16_MAX)
@@ -393,6 +393,7 @@ pg_strtoint32_safe(const char *s, Node *escontext)
 	uint32		tmp = 0;
 	bool		neg = false;
 	unsigned char digit;
+	int32		result;
 
 	/*
 	 * The majority of cases are likely to be base-10 digits wit

Re: Remove dependence on integer wrapping

2024-06-13 Thread Joseph Koshakow
On Thu, Jun 13, 2024 at 10:48 PM Joseph Koshakow  wrote:
>I've attached
>v4-0002-Handle-overflow-in-money-arithmetic.patch which adds some
>overflow checks and tests. I didn't address the float multiplication
>because I didn't see any helper methods in int.h. I did some some
>useful helpers in float.h, but they raise an error directly instead
>of returning a bool. Would those be appropriate for use with the
>money type? If not I can refactor out the inner parts into a new method
>that returns a bool.

>v4-0001-Remove-dependence-on-integer-wrapping.patch is unchanged, I
>just incremented the version number.

Oops I left a careless mistake in that last patch, my apologies. It's
fixed in the attached patches.

Thanks,
Joe Koshakow
From c54925ef698d37d968f138585141d308fe1acacc Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Thu, 13 Jun 2024 22:39:25 -0400
Subject: [PATCH 2/2] Handle overflow in money arithmetic

---
 src/backend/utils/adt/cash.c| 40 +++--
 src/test/regress/expected/money.out | 29 +
 src/test/regress/sql/money.sql  | 16 
 3 files changed, 77 insertions(+), 8 deletions(-)

diff --git a/src/backend/utils/adt/cash.c b/src/backend/utils/adt/cash.c
index f6f095a57b..e5e51aefbc 100644
--- a/src/backend/utils/adt/cash.c
+++ b/src/backend/utils/adt/cash.c
@@ -617,7 +617,10 @@ cash_pl(PG_FUNCTION_ARGS)
 	Cash		c2 = PG_GETARG_CASH(1);
 	Cash		result;
 
-	result = c1 + c2;
+	if (pg_add_s64_overflow(c1, c2, ))
+		ereport(ERROR,
+(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("money out of range")));
 
 	PG_RETURN_CASH(result);
 }
@@ -633,7 +636,10 @@ cash_mi(PG_FUNCTION_ARGS)
 	Cash		c2 = PG_GETARG_CASH(1);
 	Cash		result;
 
-	result = c1 - c2;
+	if (pg_sub_s64_overflow(c1, c2, ))
+		ereport(ERROR,
+(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("money out of range")));
 
 	PG_RETURN_CASH(result);
 }
@@ -770,7 +776,10 @@ cash_mul_int8(PG_FUNCTION_ARGS)
 	int64		i = PG_GETARG_INT64(1);
 	Cash		result;
 
-	result = c * i;
+	if (pg_mul_s64_overflow(c, i, ))
+		ereport(ERROR,
+(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("money out of range")));
 	PG_RETURN_CASH(result);
 }
 
@@ -785,7 +794,10 @@ int8_mul_cash(PG_FUNCTION_ARGS)
 	Cash		c = PG_GETARG_CASH(1);
 	Cash		result;
 
-	result = i * c;
+	if (pg_mul_s64_overflow(i, c, ))
+		ereport(ERROR,
+(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("money out of range")));
 	PG_RETURN_CASH(result);
 }
 
@@ -820,7 +832,10 @@ cash_mul_int4(PG_FUNCTION_ARGS)
 	int32		i = PG_GETARG_INT32(1);
 	Cash		result;
 
-	result = c * i;
+	if (pg_mul_s64_overflow(c, (int64) i, ))
+		ereport(ERROR,
+(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("money out of range")));
 	PG_RETURN_CASH(result);
 }
 
@@ -835,7 +850,10 @@ int4_mul_cash(PG_FUNCTION_ARGS)
 	Cash		c = PG_GETARG_CASH(1);
 	Cash		result;
 
-	result = i * c;
+	if (pg_mul_s64_overflow((int64) i, c, ))
+		ereport(ERROR,
+(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("money out of range")));
 	PG_RETURN_CASH(result);
 }
 
@@ -872,7 +890,10 @@ cash_mul_int2(PG_FUNCTION_ARGS)
 	int16		s = PG_GETARG_INT16(1);
 	Cash		result;
 
-	result = c * s;
+	if (pg_mul_s64_overflow(c, (int64) s, ))
+		ereport(ERROR,
+(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("money out of range")));
 	PG_RETURN_CASH(result);
 }
 
@@ -886,7 +907,10 @@ int2_mul_cash(PG_FUNCTION_ARGS)
 	Cash		c = PG_GETARG_CASH(1);
 	Cash		result;
 
-	result = s * c;
+	if (pg_mul_s64_overflow((int64) s, c, ))
+		ereport(ERROR,
+(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("money out of range")));
 	PG_RETURN_CASH(result);
 }
 
diff --git a/src/test/regress/expected/money.out b/src/test/regress/expected/money.out
index 7fd4e31804..950e6410a4 100644
--- a/src/test/regress/expected/money.out
+++ b/src/test/regress/expected/money.out
@@ -528,3 +528,32 @@ SELECT '-92233720368547758.08'::money::numeric;
  -92233720368547758.08
 (1 row)
 
+-- Test overflow checks
+SELECT '92233720368547758.07'::money + '0.01'::money;
+ERROR:  money out of range
+SELECT '-92233720368547758.08'::money - '0.01'::money;
+ERROR:  money out of range
+SELECT '92233720368547758.07'::money * 2::int8;
+ERROR:  money out of range
+SELECT '-92233720368547758.08'::money * 2::int8;
+ERROR:  money out of range
+SELECT 2::int8 * '92233720368547758.07'::money ;
+ERROR:  money out of range
+SELECT 2::int8 * '-92233720368547758.08'::money;
+ERROR:  money out of range
+SELECT '92233720368547758.07'::money * 2::int4;
+ERROR:  money out of range
+SELECT '-92233720368547758.08'::money * 2::int4;
+ERROR:  money out of range
+SELECT 2::int4 * '92233720368547758.07'::money ;
+ERROR:  money out of range
+SELECT 2::int4 * '-92233720368547758.08'::money;
+E

Re: Remove dependence on integer wrapping

2024-06-13 Thread Joseph Koshakow
On Thu, Jun 13, 2024 at 12:00 AM Alexander Lakhin 
wrote:
>
>Let me remind you of bug #18240. Yes, that was about float8, but with
>-ftrapv we can get into the trap with:
>SELECT 1_000_000_000::money * 1_000_000_000::int;
>server closed the connection unexpectedly

Interesting, it looks like there's no overflow handling of any money
arithmetic. I've attached
v4-0002-Handle-overflow-in-money-arithmetic.patch which adds some
overflow checks and tests. I didn't address the float multiplication
because I didn't see any helper methods in int.h. I did some some
useful helpers in float.h, but they raise an error directly instead
of returning a bool. Would those be appropriate for use with the
money type? If not I can refactor out the inner parts into a new method
that returns a bool.

v4-0001-Remove-dependence-on-integer-wrapping.patch is unchanged, I
just incremented the version number.

>Also there are several trap-producing cases with date types:
>SELECT to_date('1', 'CC');
>SELECT to_timestamp('10,999', 'Y,YYY');
>SELECT make_date(-2147483648, 1, 1);
>
>And one more with array...
>CREATE TABLE t (ia int[]);
>INSERT INTO t(ia[2147483647:2147483647]) VALUES ('{}');

I'll try and get patches to address these too in the next couple of
weeks unless someone beats me to it.

>I think it's not the whole iceberg too.

+1

Thanks,
Joe Koshakow
From 31e8de30a82e60151848439143169e562bc848a3 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 8 Jun 2024 22:16:46 -0400
Subject: [PATCH 1/2] Remove dependence on integer wrapping

This commit updates various parts of the code to no longer rely on
integer wrapping for correctness. Not all compilers support -fwrapv, so
it's best not to rely on it.
---
 src/backend/utils/adt/cash.c   |   7 +-
 src/backend/utils/adt/numeric.c|   5 +-
 src/backend/utils/adt/numutils.c   |  34 ---
 src/backend/utils/adt/timestamp.c  |  28 +-
 src/include/common/int.h   | 105 +
 src/interfaces/ecpg/pgtypeslib/timestamp.c |  11 +--
 src/test/regress/expected/timestamp.out|  13 +++
 src/test/regress/expected/timestamptz.out  |  13 +++
 src/test/regress/sql/timestamp.sql |   4 +
 src/test/regress/sql/timestamptz.sql   |   4 +
 10 files changed, 169 insertions(+), 55 deletions(-)

diff --git a/src/backend/utils/adt/cash.c b/src/backend/utils/adt/cash.c
index 32fbad2f57..f6f095a57b 100644
--- a/src/backend/utils/adt/cash.c
+++ b/src/backend/utils/adt/cash.c
@@ -352,8 +352,11 @@ cash_out(PG_FUNCTION_ARGS)
 
 	if (value < 0)
 	{
-		/* make the amount positive for digit-reconstruction loop */
-		value = -value;
+		/*
+		 * make the amount positive for digit-reconstruction loop, we can
+		 * leave INT64_MIN unchanged
+		 */
+		pg_neg_s64_overflow(value, );
 		/* set up formatting data */
 		signsymbol = (*lconvert->negative_sign != '\0') ? lconvert->negative_sign : "-";
 		sign_posn = lconvert->n_sign_posn;
diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c
index 5510a203b0..4ea2d9b0b4 100644
--- a/src/backend/utils/adt/numeric.c
+++ b/src/backend/utils/adt/numeric.c
@@ -8110,15 +8110,14 @@ int64_to_numericvar(int64 val, NumericVar *var)
 
 	/* int64 can require at most 19 decimal digits; add one for safety */
 	alloc_var(var, 20 / DEC_DIGITS);
+	uval = pg_abs_s64(val);
 	if (val < 0)
 	{
 		var->sign = NUMERIC_NEG;
-		uval = -val;
 	}
 	else
 	{
 		var->sign = NUMERIC_POS;
-		uval = val;
 	}
 	var->dscale = 0;
 	if (val == 0)
@@ -11222,7 +11221,7 @@ power_var_int(const NumericVar *base, int exp, int exp_dscale,
 	 * Now we can proceed with the multiplications.
 	 */
 	neg = (exp < 0);
-	mask = abs(exp);
+	mask = pg_abs_s32(exp);
 
 	init_var(_prod);
 	set_var_from_var(base, _prod);
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index adc1e8a4cb..a3d7d6bf01 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 
+#include "common/int.h"
 #include "port/pg_bitutils.h"
 #include "utils/builtins.h"
 
@@ -131,6 +132,7 @@ pg_strtoint16_safe(const char *s, Node *escontext)
 	uint16		tmp = 0;
 	bool		neg = false;
 	unsigned char digit;
+	int16		result;
 
 	/*
 	 * The majority of cases are likely to be base-10 digits without any
@@ -190,10 +192,9 @@ pg_strtoint16_safe(const char *s, Node *escontext)
 
 	if (neg)
 	{
-		/* check the negative equivalent will fit without overflowing */
-		if (unlikely(tmp > (uint16) (-(PG_INT16_MIN + 1)) + 1))
+		if (pg_neg_u16_overflow(tmp, ))
 			goto out_of_range;
-		return -((int16) tmp);
+		return result;
 	}
 
 	if (unlikely(tmp > PG_INT16_MAX))
@@ -333,10 +334,9 @@ slow:
 
 	if (neg)
 	{
-		/* check the negative equivalent will fit without overflowing */
-		if (tmp

Re: Remove dependence on integer wrapping

2024-06-11 Thread Joseph Koshakow
On Tue, Jun 11, 2024 at 12:22 PM Nathan Bossart 
wrote:

>I personally find that much easier to read.  Since the existing
open-coded
>overflow check is apparently insufficient, I think there's a reasonably
>strong case for centralizing this sort of thing so that we don't
continue
>to make the same mistakes.

Sounds good, the attached patch has these changes.

>tm2timestamp() in src/interfaces/ecpg/pgtypeslib/timestamp.c has the
same
>comment.  The code there looks very similar to the code for
tm2timestamp()
>in the other timestamp.c...

The attached patch has updated this file too. For some reason I was
under the impression that I should leave the ecpg/ files alone, though
I can't remember why.

Thanks,
Joe Koshakow
From adcf89561cec31499754a7c04da50c408a12724a Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 8 Jun 2024 22:16:46 -0400
Subject: [PATCH] Remove dependence on integer wrapping

This commit updates various parts of the code to no longer rely on
integer wrapping for correctness. Not all compilers support -fwrapv, so
it's best not to rely on it.
---
 src/backend/utils/adt/cash.c   |   7 +-
 src/backend/utils/adt/numeric.c|   5 +-
 src/backend/utils/adt/numutils.c   |  34 ---
 src/backend/utils/adt/timestamp.c  |  28 +-
 src/include/common/int.h   | 105 +
 src/interfaces/ecpg/pgtypeslib/timestamp.c |  11 +--
 src/test/regress/expected/timestamp.out|  13 +++
 src/test/regress/expected/timestamptz.out  |  13 +++
 src/test/regress/sql/timestamp.sql |   4 +
 src/test/regress/sql/timestamptz.sql   |   4 +
 10 files changed, 169 insertions(+), 55 deletions(-)

diff --git a/src/backend/utils/adt/cash.c b/src/backend/utils/adt/cash.c
index 32fbad2f57..f6f095a57b 100644
--- a/src/backend/utils/adt/cash.c
+++ b/src/backend/utils/adt/cash.c
@@ -352,8 +352,11 @@ cash_out(PG_FUNCTION_ARGS)
 
 	if (value < 0)
 	{
-		/* make the amount positive for digit-reconstruction loop */
-		value = -value;
+		/*
+		 * make the amount positive for digit-reconstruction loop, we can
+		 * leave INT64_MIN unchanged
+		 */
+		pg_neg_s64_overflow(value, );
 		/* set up formatting data */
 		signsymbol = (*lconvert->negative_sign != '\0') ? lconvert->negative_sign : "-";
 		sign_posn = lconvert->n_sign_posn;
diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c
index 5510a203b0..4ea2d9b0b4 100644
--- a/src/backend/utils/adt/numeric.c
+++ b/src/backend/utils/adt/numeric.c
@@ -8110,15 +8110,14 @@ int64_to_numericvar(int64 val, NumericVar *var)
 
 	/* int64 can require at most 19 decimal digits; add one for safety */
 	alloc_var(var, 20 / DEC_DIGITS);
+	uval = pg_abs_s64(val);
 	if (val < 0)
 	{
 		var->sign = NUMERIC_NEG;
-		uval = -val;
 	}
 	else
 	{
 		var->sign = NUMERIC_POS;
-		uval = val;
 	}
 	var->dscale = 0;
 	if (val == 0)
@@ -11222,7 +11221,7 @@ power_var_int(const NumericVar *base, int exp, int exp_dscale,
 	 * Now we can proceed with the multiplications.
 	 */
 	neg = (exp < 0);
-	mask = abs(exp);
+	mask = pg_abs_s32(exp);
 
 	init_var(_prod);
 	set_var_from_var(base, _prod);
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index adc1e8a4cb..a3d7d6bf01 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 
+#include "common/int.h"
 #include "port/pg_bitutils.h"
 #include "utils/builtins.h"
 
@@ -131,6 +132,7 @@ pg_strtoint16_safe(const char *s, Node *escontext)
 	uint16		tmp = 0;
 	bool		neg = false;
 	unsigned char digit;
+	int16		result;
 
 	/*
 	 * The majority of cases are likely to be base-10 digits without any
@@ -190,10 +192,9 @@ pg_strtoint16_safe(const char *s, Node *escontext)
 
 	if (neg)
 	{
-		/* check the negative equivalent will fit without overflowing */
-		if (unlikely(tmp > (uint16) (-(PG_INT16_MIN + 1)) + 1))
+		if (pg_neg_u16_overflow(tmp, ))
 			goto out_of_range;
-		return -((int16) tmp);
+		return result;
 	}
 
 	if (unlikely(tmp > PG_INT16_MAX))
@@ -333,10 +334,9 @@ slow:
 
 	if (neg)
 	{
-		/* check the negative equivalent will fit without overflowing */
-		if (tmp > (uint16) (-(PG_INT16_MIN + 1)) + 1)
+		if (pg_neg_u16_overflow(tmp, ))
 			goto out_of_range;
-		return -((int16) tmp);
+		return result;
 	}
 
 	if (tmp > PG_INT16_MAX)
@@ -393,6 +393,7 @@ pg_strtoint32_safe(const char *s, Node *escontext)
 	uint32		tmp = 0;
 	bool		neg = false;
 	unsigned char digit;
+	int32		result;
 
 	/*
 	 * The majority of cases are likely to be base-10 digits without any
@@ -452,10 +453,9 @@ pg_strtoint32_safe(const char *s, Node *escontext)
 
 	if (neg)
 	{
-		/* check the negative equivalent will fit without overflowing */
-		if (unlikely(tmp > (uint32) (-(PG_INT32_MIN + 1)) + 1))
+		if (pg_neg_u32_overflow(tmp, ))
 			goto out_of_range;

Re: Remove dependence on integer wrapping

2024-06-11 Thread Joseph Koshakow
>>   /* check the negative equivalent will fit without
overflowing */
>>   if (unlikely(tmp > (uint16) (-(PG_INT16_MIN + 1)) + 1))
>>   goto out_of_range;
>> +
>> + /*
>> +  * special case the minimum integer because its negation
cannot be
>> +  * represented
>> +  */
>> + if (tmp == ((uint16) PG_INT16_MAX) + 1)
>> + return PG_INT16_MIN;
>>   return -((int16) tmp);
>
> My first impression is that there appears to be two overflow checks, one
of
> which sends us to out_of_range, and another that just returns a special
> result.  Why shouldn't we add a pg_neg_s16_overflow() and replace this
> whole chunk with something like this?
>
>if (unlikely(pg_neg_s16_overflow(tmp, )))
>goto out_of_range;
>else
>return tmp;

tmp is an uint16 here, it seems like you might have read it as an
int16? We would need some helper method like

static inline bool
pg_neg_u16_overflow(uint16 a, int16 *result);

and then we could replace that whole chunk with something like

if (unlikely(pg_neg_u16_overflow(tmp, )))
goto out_of_range;
else
return result;


that pattern shows up a lot in this file, but I was worried that it
wasn't useful as a general purpose function. Happy to add it
though if you still feel otherwise.

>> + return ((uint32) INT32_MAX) + 1;
>>
>> + return ((uint64) INT64_MAX) + 1;
>
> nitpick: Any reason not to use PG_INT32_MAX/PG_INT64_MAX for these?

Carelessness, sorry about that, it's been fixed in the attached patch.

>> I believe this is a copy-and-paste from 841b4a2d5, which added this:
>>
>> +   *result = (date * INT64CONST(864)) + time;
>> +   /* check for major overflow */
>> +   if ((*result - time) / INT64CONST(864) != date)
>> +   return -1;
>> +   /* check for just-barely overflow (okay except time-of-day wraps) */
>> +   if ((*result < 0) ? (date >= 0) : (date < 0))
>> +   return -1;
>>
>> I think you could replace the whole thing by using overflow-aware
>> multiplication and addition primitives in the result calculation.
>> Lines 2-4 basically check for mult overflow and 5-7 for addition
>> overflow.
>
> Ah, I see.  Joe's patch does that in one place.  It's probably worth doing
> that in the other places this "just-barefly overflow" comment appears
IMHO.
>
> I was still confused by the comment about 1999, but I tracked it down to
>commit 542eeba [0].  IIUC it literally means that we need special handling
>for that date because POSTGRES_EPOCH_JDATE is 2000-01-01.
>
> [0]
https://postgr.es/m/CABUevEx5zUO%3DKRUg06a9qnQ_e9EvTKscL6HxAM_L3xj71R7AQw%40mail.gmail.com

> Yeah, I think so, and I think we probably don't need any special care
> if we switch to direct tests of overflow-aware primitives. (Though
>it'd be worth checking that '1999-12-31 24:00:00'::timestamp still
> works.  It doesn't look like I actually added a test case for that.)

The only other place I found this comment was in
`make_timestamp_internal`. I've updated that function and added some
tests. I also manually verified that the behavior matches before and
after this patch.

>> BTW, while I approve of trying to get rid of our need for -fwrapv,
>> I'm quite scared of actually doing it.
>
> I think that's a quite fair concern. One potentially relevant datapoint is
> that we actually don't have -fwrapv equivalent on all platforms, and I
don't
>recall a lot of complaints from windows users. Of course it's quite
possible
> that they'd never notice...
>
> I think this is a good argument for enabling -ftrapv in development
> builds. That gives us at least a *chance* of seeing these issues.

+1, I wouldn't recommend removing -fwrapv immediately after this
commit. However, if we can enable -ftrapv in development builds, then
we can find overflows much more easily.

> Whatever cases you may have discovered by running the regression tests
will
> be at best the tip of the iceberg.

Agreed.

> Is there any chance of using static
> analysis to find all the places of concern?

I'm not personally familiar with any static analysis tools, but I can
try and do some research. Andres had previously suggested SQLSmith. I
think any kind of fuzz testing with -ftrapv enabled will reveal a lot
of issues. Honestly just grepping for +,-,* in certain directories
(like backend/utils/adt) would probably be fairly fruitful for anyone
with the patience. My previous overflow patch was the result of looking
through all the arithmetic in datetime.c.

Thanks,
Joe Koshakow

Remove dependence on integer wrapping

2024-06-09 Thread Joseph Koshakow
Hi,

In [0] Andres suggested enabling -ftrapv in assert enabled builds. While
I vastly underestimated the complexity of updating `configure` to do
this, I was able to enable the flag locally. Enabling this flag causes
our existing regression tests to trap and fail in multiple different
spots. The attached patch resolves all of these overflows so that all
of our existing tests will pass with the -ftrapv flag enabled.

Some notes on the patch itself are:

I originally added the helper functions to int.h thinking I'd find
many more instances of overflow due to integer negation, however I
didn't find that many. So let me know if you think we'd be better
off without the functions.

I considered using #ifdef to rely on wrapping when -fwrapv was
enabled. This would save us some unnecessary branching when we could
rely on wrapping behavior, but it would mean that we could only enable
-ftrapv when -fwrapv was disabled, greatly reducing its utility.

The following comment was in the code for parsing timestamps:

/* check for just-barely overflow (okay except time-of-day wraps) */
/* caution: we want to allow 1999-12-31 24:00:00 */

I wasn't able to fully understand it even after staring at it for
a while. Is the comment suggesting that it is ok for the months field,
for example, to wrap around? That doesn't sound right to me I tested
the supplied timestamp, 1999-12-31 24:00:00, and it behaves the same
before and after the patch.

Thanks,
Joe Koshakow

[0]
https://www.postgresql.org/message-id/20240213191401.jjhsic7et4tiahjs%40awork3.anarazel.de
From 319bc904858ad8fbcc687a923733defd3358c7b9 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 8 Jun 2024 22:16:46 -0400
Subject: [PATCH] Remove dependence on integer wrapping

This commit updates various parts of the code to no longer rely on
integer wrapping for correctness. Not all compilers support -fwrapv, so
it's best not to rely on it.
---
 src/backend/utils/adt/cash.c  |  7 +++--
 src/backend/utils/adt/numeric.c   |  5 ++--
 src/backend/utils/adt/numutils.c  | 35 ++
 src/backend/utils/adt/timestamp.c | 13 ++---
 src/include/common/int.h  | 48 +++
 5 files changed, 92 insertions(+), 16 deletions(-)

diff --git a/src/backend/utils/adt/cash.c b/src/backend/utils/adt/cash.c
index 32fbad2f57..f6f095a57b 100644
--- a/src/backend/utils/adt/cash.c
+++ b/src/backend/utils/adt/cash.c
@@ -352,8 +352,11 @@ cash_out(PG_FUNCTION_ARGS)
 
 	if (value < 0)
 	{
-		/* make the amount positive for digit-reconstruction loop */
-		value = -value;
+		/*
+		 * make the amount positive for digit-reconstruction loop, we can
+		 * leave INT64_MIN unchanged
+		 */
+		pg_neg_s64_overflow(value, );
 		/* set up formatting data */
 		signsymbol = (*lconvert->negative_sign != '\0') ? lconvert->negative_sign : "-";
 		sign_posn = lconvert->n_sign_posn;
diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c
index 5510a203b0..4ea2d9b0b4 100644
--- a/src/backend/utils/adt/numeric.c
+++ b/src/backend/utils/adt/numeric.c
@@ -8110,15 +8110,14 @@ int64_to_numericvar(int64 val, NumericVar *var)
 
 	/* int64 can require at most 19 decimal digits; add one for safety */
 	alloc_var(var, 20 / DEC_DIGITS);
+	uval = pg_abs_s64(val);
 	if (val < 0)
 	{
 		var->sign = NUMERIC_NEG;
-		uval = -val;
 	}
 	else
 	{
 		var->sign = NUMERIC_POS;
-		uval = val;
 	}
 	var->dscale = 0;
 	if (val == 0)
@@ -11222,7 +11221,7 @@ power_var_int(const NumericVar *base, int exp, int exp_dscale,
 	 * Now we can proceed with the multiplications.
 	 */
 	neg = (exp < 0);
-	mask = abs(exp);
+	mask = pg_abs_s32(exp);
 
 	init_var(_prod);
 	set_var_from_var(base, _prod);
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index adc1e8a4cb..12bef9d63c 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -193,6 +193,13 @@ pg_strtoint16_safe(const char *s, Node *escontext)
 		/* check the negative equivalent will fit without overflowing */
 		if (unlikely(tmp > (uint16) (-(PG_INT16_MIN + 1)) + 1))
 			goto out_of_range;
+
+		/*
+		 * special case the minimum integer because its negation cannot be
+		 * represented
+		 */
+		if (tmp == ((uint16) PG_INT16_MAX) + 1)
+			return PG_INT16_MIN;
 		return -((int16) tmp);
 	}
 
@@ -336,6 +343,13 @@ slow:
 		/* check the negative equivalent will fit without overflowing */
 		if (tmp > (uint16) (-(PG_INT16_MIN + 1)) + 1)
 			goto out_of_range;
+
+		/*
+		 * special case the minimum integer because its negation cannot be
+		 * represented
+		 */
+		if (tmp == ((uint16) PG_INT16_MAX) + 1)
+			return PG_INT16_MIN;
 		return -((int16) tmp);
 	}
 
@@ -598,6 +612,13 @@ slow:
 		/* check the negative equivalent will fit without overflowing */
 		if (tmp > (uint32) (-(PG_INT32_MIN + 1)) + 1)
 			goto out_of_range;
+
+		/*
+		 * special case the minimum integer because its negation cannot be
+

Re: Wrong security context for deferred triggers?

2024-06-09 Thread Joseph Koshakow
On Sat, Jun 8, 2024 at 10:13 PM Isaac Morland 
wrote:

> Speaking as a table owner, when I set a trigger on it, I expect that when
the specified actions occur my trigger will fire and will do what I
specify, without regard to the execution environment of the caller
(search_path in particular); and my trigger should be able to do anything
that I can do. For the canonical case of a logging table the trigger has to
be able to do stuff the caller can't do. I don't expect to be able to do
stuff that the caller can do.
>
> Speaking as someone making an update on a table, I don't expect to have
it fail because my execution environment (search_path in particular) is
wrong for the trigger implementation, and I consider it a security
violation if the table owner is able to do stuff as me as a result,
especially if I am an administrator making an update as superuser.

Can you expand on this a bit? When a trigger executes should the
execution environment match:

  - The execution environment of the trigger owner at the time of
  trigger creation?
  - The execution environment of the function owner at the time of
  function creation?
  - An execution environment built from the trigger owner's default
  configuration parameters?
  - Something else?

While I am convinced that privileges should be checked using the
trigger owner's role, I'm less convinced of other configuration
parameters. For the search_path example, that can be resolved by
either fully qualifying object names or setting the search_path in the
function itself. Similar approaches can be taken with other
configuration parameters.

I also worry that it would be a source of confusion that the execution
environment of triggers come from the trigger/function owner, but the
execution environment of function calls come from the caller.

> I think it's pretty clear the existing behaviour is the wrong choice in
every other way than backward compatibility. I welcome examples to the
contrary, where the existing behaviour is not just OK but actually wanted.

This is perhaps a contrived example, but here's one. Suppose I create a
trigger that raises a notice that includes the current timestamp. I
would probably want to use the timezone of the caller, not the
trigger owner.

Thanks,
Joe Koshakow


Re: Wrong security context for deferred triggers?

2024-06-08 Thread Joseph Koshakow
On Sat, Jun 8, 2024 at 5:36 PM Joseph Koshakow  wrote:

>Additionally, I applied your patch to master and re-ran the example and
>didn't notice any behavior change.
>
>test=# CREATE TABLE tab (i integer);
>CREATE TABLE
>test=# CREATE FUNCTION trig() RETURNS trigger
>LANGUAGE plpgsql AS
> $$BEGIN
>RAISE NOTICE 'current_user = %', current_user;
>RETURN NEW;
> END;$$;
>CREATE FUNCTION
>test=# CREATE CONSTRAINT TRIGGER trig AFTER INSERT ON tab
>DEFERRABLE INITIALLY IMMEDIATE
>FOR EACH ROW EXECUTE FUNCTION trig();
>CREATE TRIGGER
>test=# CREATE ROLE duff;
>CREATE ROLE
>test=# GRANT INSERT ON tab TO duff;
>GRANT
>test=# SET ROLE duff;
>SET
>test=> BEGIN;
>BEGIN
>test=*> INSERT INTO tab VALUES (1);
>NOTICE:  current_user = duff
>INSERT 0 1
>test=*> SET CONSTRAINTS ALL DEFERRED;
>SET CONSTRAINTS
>test=*> INSERT INTO tab VALUES (2);
>INSERT 0 1
>test=*> RESET ROLE;
>RESET
>test=*# COMMIT;
>NOTICE:  current_user = joe
>COMMIT
>
>Though maybe I'm just doing something wrong.

Sorry, there's definitely something wrong with my environment. You can
ignore this.

Thanks,
Joe Koshakow


Re: Wrong security context for deferred triggers?

2024-06-08 Thread Joseph Koshakow
Hi,

I see that this patch is marked as ready for review, so I thought I
would attempt to review it. This is my first review, so please take it
with a grain of salt.

> So a deferred constraint trigger does not run with the same security
context
> as an immediate trigger.

It sounds like the crux of your argument is that the current behavior
is that triggers are executed with the role and security context of the
session at the time of execution. Instead, the trigger should be
executed with the role and security context of the session at the time
time of queuing (i.e. the same context as the action that triggered the
trigger). While I understand that the current behavior can be
surprising in some scenarios, it's not clear to me why this behavior is
wrong. It seems that the whole point of deferring a trigger to commit
time is that the context that the trigger is executed in is different
than the context that it was triggered in. Tables may have changed,
permissions may have changed, session configuration variables may have
changed, roles may have changed, etc. So why should the executing role
be treated differently and restored to the value at the time of
triggering. Perhaps you can expand on why you feel that the current
behavior is wrong?

> This is somewhat nasty in combination with
> SECURITY DEFINER functions: if that function performs an operation, and
that
> operation triggers a deferred trigger, that trigger will run in the wrong
> security context.
...
> The more serious concern is that the old code constitutes
> a narrow security hazard: a superuser could temporarily
> assume an unprivileged role to avoid risks while performing
> DML on a table controlled by an untrusted user, but for
> some reason resume being a superuser *before* COMMIT.
> Then a deferred trigger would inadvertently run with
> superuser privileges.

I find these examples to be surprising, but not necessarily wrong
(as per my comment above). If someone wants the triggers to be executed
as the triggering role, then they can run `SET CONSTRAINTS ALL
IMMEDIATE`. If deferring a trigger to commit time and executing it as
the triggering role is desirable, then maybe we should add a modifier
to triggers that can control this behavior. Something like
`SECURITY INVOKER | SECURITY TRIGGERER` (modeled after the modifiers in
`CREATE FUNCTION`) that control which role is used.

> This looks to me like another reason that triggers should run as the
> trigger owner. Which role owns the trigger won’t change as a result of
> constraints being deferred or not, or any role setting done during the
> transaction, including that relating to security definer functions.
>
> Right now triggers can’t do anything that those who can
> INSERT/UPDATE/DELETE (i.e., cause the trigger to fire) can’t do, which in
>particular breaks the almost canonical example of using triggers to log
> changes — I can’t do it without also allowing users to make spurious log
> entries.
>
> Also if I cause a trigger to fire, I’ve just given the trigger owner the
> opportunity to run arbitrary code as me.
>
>> I just realized one problem with running a deferred constraint trigger as
>> the triggering role: that role might have been dropped by the time the
>> trigger
>> executes.  But then we could still error out.
>
> This problem is also fixed by running triggers as their owners: there
> should be a dependency between an object and its owner. So the
> trigger-executing role can’t be dropped without dropping the trigger.

+1, this approach would remove all of the surprising/wrong behavior and
in my opinion is more obvious. I'd like to add some more reasons why
this behavior makes sense:

  - The documentation [0] indicates that to create a trigger, the
  creating role must have the `EXECUTE` privilege on the trigger
  function. In fact this check is skipped for the role that triggers
  trigger.

-- Create trig_owner role and function. Grant execute on function
-- to role.
test=# CREATE ROLE trig_owner;
CREATE ROLE
test=# GRANT CREATE ON SCHEMA public TO trig_owner;
GRANT
test=# CREATE OR REPLACE FUNCTION f() RETURNS trigger
LANGUAGE plpgsql AS
  $$BEGIN
RAISE NOTICE 'current_user = %', current_user;
RETURN NEW;
 END;$$;
 CREATE FUNCTION
 test=# REVOKE EXECUTE ON FUNCTION f FROM PUBLIC;
 REVOKE
 test=# GRANT EXECUTE ON FUNCTION f TO trig_owner;
 GRANT

 -- Create the trigger as trig_owner.
 test=# SET ROLE trig_owner;
 SET
 test=> CREATE TABLE t (a INT);
 CREATE TABLE
 test=> CREATE CONSTRAINT TRIGGER trig AFTER INSERT ON t
DEFERRABLE INITIALLY IMMEDIATE
FOR EACH ROW EXECUTE FUNCTION f();
CREATE TRIGGER

-- Trigger the trigger with a role that doesn't have execute
-- privileges on the trigger function and also call the function
-- directly. The trigger succeeds but the function call fails.
test=> RESET ROLE;
RESET
test=# CREATE ROLE r1;
 

Re: Fix overflow hazard in interval rounding

2024-06-02 Thread Joseph Koshakow
Hi Andres,

Sorry for such a late reply.

On Tue, Feb 13, 2024 at 2:14 PM Andres Freund  wrote:

> Random, mildly related thought: I wonder if it's time to, again, look at
> enabling -ftrapv in assert enabled builds.I had looked at that a few years
> back, and fixed a number of instances, but not all I think. But I think
we are
> a lot closer to avoiding signed overflows everywhere, and it'd be nice to
find
> overflow hazards more easily.

I agree that this would be very helpful.

> Many places are broken even with -fwrapv
> semantics (which we don't have on all compilers!). Trapping on such
overflows
> makes it far easier to find problems with tools like sqlsmith.

Does this mean that some of our existing tests will panic when compiled
with -ftrapv or -fwrapv? If so I'd be interested in resolving the
remaining issues if you could point me in the right direction of how to
set the flag.

Thanks,
Joe Koshakow


Re: drop column name conflict

2024-05-04 Thread Joseph Koshakow
On Sat, May 4, 2024 at 11:29 AM Tom Lane  wrote:
> I think we intentionally did not bother with preventing this,
> on the grounds that if you were silly enough to name a column
> that way then you deserve any ensuing problems.

Fair enough.

> If we were going to expend any code on the scenario, I'd prefer
> to make it be checks in column addition/renaming that disallow
> naming a column this way.

Is there any interest in making this change? The attached patch could
use some cleanup, but seems to accomplish what's described. It's
definitely more involved than the previous one and may not be worth the
effort. If you feel that it's worth it I can clean it up, otherwise
I'll drop it.

Thanks,
Joe Koshakow
From 936a9e3509867574633882f5c1ec714d2f2604ec Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 4 May 2024 10:12:37 -0400
Subject: [PATCH] Prevent name conflicts when dropping a column

Previously, dropped columns were always renamed to
"pg.dropped.". In the rare scenario that a
column with that name already exists, the column drop would fail with
an error about violating the unique constraint on
"pg_attribute_relid_attnam_index". This commit fixes that issue by
preventing users from creating columns with a name that matches
"pg.dropped.\d+". This is backwards incompatible.
---
 src/backend/catalog/heap.c | 57 --
 src/backend/commands/tablecmds.c   |  2 +
 src/include/catalog/heap.h |  3 ++
 src/test/regress/expected/alter_table.out  |  7 +++
 src/test/regress/expected/create_table.out |  3 ++
 src/test/regress/sql/alter_table.sql   |  6 +++
 src/test/regress/sql/create_table.sql  |  3 ++
 7 files changed, 77 insertions(+), 4 deletions(-)

diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 922ba79ac2..0a0afe833d 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -231,6 +231,9 @@ static const FormData_pg_attribute a6 = {
 
 static const FormData_pg_attribute *const SysAtt[] = {, , , , , };
 
+static const char *dropped_col_pre = "pg.dropped.";
+static const char *dropped_col_post = "";
+
 /*
  * This function returns a Form_pg_attribute pointer for a system attribute.
  * Note that we elog if the presented attno is invalid, which would only
@@ -468,10 +471,10 @@ CheckAttributeNamesTypes(TupleDesc tupdesc, char relkind,
 		MaxHeapAttributeNumber)));
 
 	/*
-	 * first check for collision with system attribute names
+	 * first check for collision with system attribute and reserved names
 	 *
 	 * Skip this for a view or type relation, since those don't have system
-	 * attributes.
+	 * attributes and cannot drop columns.
 	 */
 	if (relkind != RELKIND_VIEW && relkind != RELKIND_COMPOSITE_TYPE)
 	{
@@ -484,6 +487,11 @@ CheckAttributeNamesTypes(TupleDesc tupdesc, char relkind,
 		(errcode(ERRCODE_DUPLICATE_COLUMN),
 		 errmsg("column name \"%s\" conflicts with a system column name",
 NameStr(attr->attname;
+
+			if ((CHKATYPE_RESERVED_NAME & flags) == 0)
+			{
+CheckAttributeReservedName(NameStr(attr->attname));
+			}
 		}
 	}
 
@@ -679,6 +687,47 @@ CheckAttributeType(const char *attname,
 	}
 }
 
+/*
+ * TODO: Add function description.
+ */
+void
+CheckAttributeReservedName(const char *attname)
+{
+	size_t		name_len,
+pre_len,
+post_len;
+	int			i;
+
+	name_len = strlen(attname);
+	pre_len = strlen(dropped_col_pre);
+	post_len = strlen(dropped_col_post);
+
+	if (name_len < pre_len + post_len + 1)
+	{
+		return;
+	}
+	if (memcmp(attname, dropped_col_pre, pre_len) != 0)
+	{
+		return;
+	}
+	for (i = pre_len; i < name_len - post_len; i++)
+	{
+		if (!isdigit(attname[i]))
+		{
+			return;
+		}
+	}
+	if (memcmp(attname + (name_len - post_len), dropped_col_post, post_len) != 0)
+	{
+		return;
+	}
+
+	ereport(ERROR,
+			(errcode(ERRCODE_RESERVED_NAME),
+			 errmsg("column name \"%s\" conflicts with a reserved column name",
+	attname)));
+}
+
 /*
  * InsertPgAttributeTuples
  *		Construct and insert a set of tuples in pg_attribute.
@@ -1148,7 +1197,7 @@ heap_create_with_catalog(const char *relname,
 	 * hack to allow creating pg_statistic and cloning it during VACUUM FULL.
 	 */
 	CheckAttributeNamesTypes(tupdesc, relkind,
-			 allow_system_table_mods ? CHKATYPE_ANYARRAY : 0);
+			 (allow_system_table_mods ? CHKATYPE_ANYARRAY : 0) | (is_internal ? CHKATYPE_RESERVED_NAME : 0));
 
 	/*
 	 * This would fail later on anyway, if the relation already exists.  But
@@ -1705,7 +1754,7 @@ RemoveAttributeById(Oid relid, AttrNumber attnum)
 	 * Change the column name to something that isn't likely to conflict
 	 */
 	snprintf(newattname, sizeof(newattname),
-			 "pg.dropped.%d", attnum);
+			 "%s%d%s", dropped_col_pre, attnum, dropped_col_post);

drop column name conflict

2024-05-04 Thread Joseph Koshakow
Hi all,

There's a rare edge case in `alter table` that can prevent users from
dropping a column as shown below

# create table atacc1(a int, "pg.dropped.1" int);
CREATE TABLE
# alter table atacc1 drop column a;
ERROR:  duplicate key value violates unique constraint
"pg_attribute_relid_attnam_index"
DETAIL:  Key (attrelid, attname)=(16407, pg.dropped.1)
already exists.

It seems a bit silly and unlikely that anyone would ever find
themselves in this scenario, but it also seems easy enough to fix as
shown by the attached patch.

Does anyone think this is worth fixing? If so I can submit it to the
current commitfest.

Thanks,
Joe Koshakow
From 50f6e73d9bc1e00ad3988faa80a84af70aef Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 4 May 2024 10:12:37 -0400
Subject: [PATCH] Prevent name conflicts when dropping a column

Previously, dropped columns were always renamed to
"pg.dropped.". In the rare scenario that a
column with that name already exists, the column drop would fail with
an error about violating the unique constraint on
"pg_attribute_relid_attnam_index". This commit fixes that issue by
appending an int to dropped column name until we find a unique name.
Since tables have a maximum of 16,000 columns and the max int is larger
than 16,000, we are guaranteed to find a unique name.
---
 src/backend/catalog/heap.c| 16 +++-
 src/test/regress/expected/alter_table.out |  4 
 src/test/regress/sql/alter_table.sql  |  5 +
 3 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 922ba79ac2..852ed442e1 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -1658,11 +1658,13 @@ RemoveAttributeById(Oid relid, AttrNumber attnum)
 	Relation	rel;
 	Relation	attr_rel;
 	HeapTuple	tuple;
+	HeapTuple	drop_tuple_check;
 	Form_pg_attribute attStruct;
 	char		newattname[NAMEDATALEN];
 	Datum		valuesAtt[Natts_pg_attribute] = {0};
 	bool		nullsAtt[Natts_pg_attribute] = {0};
 	bool		replacesAtt[Natts_pg_attribute] = {0};
+	int			i;
 
 	/*
 	 * Grab an exclusive lock on the target table, which we will NOT release
@@ -1702,10 +1704,22 @@ RemoveAttributeById(Oid relid, AttrNumber attnum)
 	attStruct->attgenerated = '\0';
 
 	/*
-	 * Change the column name to something that isn't likely to conflict
+	 * Change the column name to something that doesn't conflict
 	 */
 	snprintf(newattname, sizeof(newattname),
 			 "pg.dropped.%d", attnum);
+	Assert(PG_INT32_MAX > MaxHeapAttributeNumber);
+	drop_tuple_check = SearchSysCacheCopy2(ATTNAME,
+		   ObjectIdGetDatum(relid),
+		   PointerGetDatum(newattname));
+	for (i = 0; HeapTupleIsValid(drop_tuple_check); i++)
+	{
+		snprintf(newattname, sizeof(newattname),
+ "pg.dropped.%d.%d", attnum, i);
+		drop_tuple_check = SearchSysCacheCopy2(ATTNAME,
+			   ObjectIdGetDatum(relid),
+			   PointerGetDatum(newattname));
+	}
 	namestrcpy(&(attStruct->attname), newattname);
 
 	/* Clear the missing value */
diff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out
index 7666c76238..844ae55467 100644
--- a/src/test/regress/expected/alter_table.out
+++ b/src/test/regress/expected/alter_table.out
@@ -1554,6 +1554,10 @@ insert into atacc1(id, value) values (null, 0);
 ERROR:  null value in column "id" of relation "atacc1" violates not-null constraint
 DETAIL:  Failing row contains (null, 0).
 drop table atacc1;
+-- test dropping a column doesn't cause name conflicts
+create table atacc1(a int, "pg.dropped.1" int);
+alter table atacc1 drop column a;
+drop table atacc1;
 -- test inheritance
 create table parent (a int, b int, c int);
 insert into parent values (1, 2, 3);
diff --git a/src/test/regress/sql/alter_table.sql b/src/test/regress/sql/alter_table.sql
index 9df5a63bdf..d5d912a2e2 100644
--- a/src/test/regress/sql/alter_table.sql
+++ b/src/test/regress/sql/alter_table.sql
@@ -1097,6 +1097,11 @@ insert into atacc1(value) values (100);
 insert into atacc1(id, value) values (null, 0);
 drop table atacc1;
 
+-- test dropping a column doesn't cause name conflicts
+create table atacc1(a int, "pg.dropped.1" int);
+alter table atacc1 drop column a;
+drop table atacc1;
+
 -- test inheritance
 create table parent (a int, b int, c int);
 insert into parent values (1, 2, 3);
-- 
2.34.1



Re: Fix overflow hazard in timestamp_pl_interval

2024-04-27 Thread Joseph Koshakow
Hi all,

Immediately after sending this I realized that timestamptz suffers
from the same problem. Attached is an updated patch that fixes
timestamptz too.

Thanks,
Joe Koshakow

On Sat, Apr 27, 2024 at 10:59 PM Joseph Koshakow  wrote:

> Hi all,
>
> Attached is a patch that fixes some overflow/underflow hazards in
> `timestamp_pl_interval`. The microseconds overflow could produce
> incorrect result. The month overflow would generally still result in an
> error from the timestamp month field being too low, but it's still
> better to catch it early.
>
> I couldn't find any existing timestamp plus interval tests so I stuck
> a new tests in `timestamp.sql`. If there's a better place, then
> please let me know.
>
> Thanks,
> Joe Koshakow
>
From 1a039ab807654fe9b7a2043e30ecdee925127d77 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 27 Apr 2024 22:32:44 -0400
Subject: [PATCH] Catch overflow when adding timestamp to interval

Previously, an interval microseconds field close to INT64_MAX or an
interval months field close to INT32_MAX could overflow when added to
a timestamp or timestamptz. The microseconds overflow could produce
incorrect results. The month overflow would generally still result in
an error from the resulting month field being too low, but it's still
better to catch it early.
---
 src/backend/utils/adt/timestamp.c | 21 +
 src/test/regress/expected/timestamp.out   |  3 +++
 src/test/regress/expected/timestamptz.out |  3 +++
 src/test/regress/sql/timestamp.sql|  3 +++
 src/test/regress/sql/timestamptz.sql  |  3 +++
 5 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
index 963f2ec74a..551c0dbd7a 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -3091,7 +3091,10 @@ timestamp_pl_interval(PG_FUNCTION_ARGS)
 		(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
 		 errmsg("timestamp out of range")));
 
-			tm->tm_mon += span->month;
+			if (pg_add_s32_overflow(tm->tm_mon, span->month, >tm_mon))
+ereport(ERROR,
+		(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+		 errmsg("timestamp out of range")));
 			if (tm->tm_mon > MONTHS_PER_YEAR)
 			{
 tm->tm_year += (tm->tm_mon - 1) / MONTHS_PER_YEAR;
@@ -3143,7 +3146,10 @@ timestamp_pl_interval(PG_FUNCTION_ARGS)
 		 errmsg("timestamp out of range")));
 		}
 
-		timestamp += span->time;
+		if (pg_add_s64_overflow(timestamp, span->time, ))
+			ereport(ERROR,
+	(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+	 errmsg("timestamp out of range")));
 
 		if (!IS_VALID_TIMESTAMP(timestamp))
 			ereport(ERROR,
@@ -3233,7 +3239,10 @@ timestamptz_pl_interval_internal(TimestampTz timestamp,
 		(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
 		 errmsg("timestamp out of range")));
 
-			tm->tm_mon += span->month;
+			if (pg_add_s32_overflow(tm->tm_mon, span->month, >tm_mon))
+ereport(ERROR,
+		(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+		 errmsg("timestamp out of range")));
 			if (tm->tm_mon > MONTHS_PER_YEAR)
 			{
 tm->tm_year += (tm->tm_mon - 1) / MONTHS_PER_YEAR;
@@ -3292,7 +3301,11 @@ timestamptz_pl_interval_internal(TimestampTz timestamp,
 		 errmsg("timestamp out of range")));
 		}
 
-		timestamp += span->time;
+		if (pg_add_s64_overflow(timestamp, span->time, ))
+			ereport(ERROR,
+	(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+	 errmsg("timestamp out of range")));
+
 
 		if (!IS_VALID_TIMESTAMP(timestamp))
 			ereport(ERROR,
diff --git a/src/test/regress/expected/timestamp.out b/src/test/regress/expected/timestamp.out
index cf337da517..fc427baa4a 100644
--- a/src/test/regress/expected/timestamp.out
+++ b/src/test/regress/expected/timestamp.out
@@ -1230,6 +1230,9 @@ SELECT timestamp '294276-12-31 23:59:59' - timestamp '1999-12-23 19:59:04.224193
 
 SELECT timestamp '294276-12-31 23:59:59' - timestamp '1999-12-23 19:59:04.224192' AS overflows;
 ERROR:  interval out of range
+-- test edge-case overflow in timestamp plus interval
+SELECT timestamp '294276-12-31 23:59:59' + interval '9223372036854775807 microseconds';
+ERROR:  timestamp out of range
 -- TO_CHAR()
 SELECT to_char(d1, 'DAY Day day DY Dy dy MONTH Month month RM MON Mon mon')
FROM TIMESTAMP_TBL;
diff --git a/src/test/regress/expected/timestamptz.out b/src/test/regress/expected/timestamptz.out
index bfb3825ff6..143aaeb126 100644
--- a/src/test/regress/expected/timestamptz.out
+++ b/src/test/regress/expected/timestamptz.out
@@ -1354,6 +1354,9 @@ SELECT timestamptz '294276-12-31 23:59:59 UTC' - timestamptz '1999-12-23 19:59:0
 
 SELECT timestamptz '294276-12-31 23:59:59 UTC' - timestamptz '1999-12-23 19:59:04.224192 UTC' AS overflows;
 ERROR:  interval out of range
+-

Fix overflow hazard in timestamp_pl_interval

2024-04-27 Thread Joseph Koshakow
Hi all,

Attached is a patch that fixes some overflow/underflow hazards in
`timestamp_pl_interval`. The microseconds overflow could produce
incorrect result. The month overflow would generally still result in an
error from the timestamp month field being too low, but it's still
better to catch it early.

I couldn't find any existing timestamp plus interval tests so I stuck
a new tests in `timestamp.sql`. If there's a better place, then
please let me know.

Thanks,
Joe Koshakow
From 4350e540fd45d3c868a36021ae79ce533bdeab5f Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 27 Apr 2024 22:32:44 -0400
Subject: [PATCH] Catch overflow when adding timestamp to interval

Previously, an interval microseconds field close to INT64_MAX or an
interval months field close to INT32_MAX could overflow when added to
a timestamp. The microseconds overflow could produce incorrect result.
The month overflow would generally still result in an error from the
timestamp month field being too low, but it's still better to catch it
early.
---
 src/backend/utils/adt/timestamp.c   | 12 +---
 src/test/regress/expected/timestamp.out |  3 +++
 src/test/regress/sql/timestamp.sql  |  3 +++
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
index 963f2ec74a..a6b9aeb7b8 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -3091,7 +3091,11 @@ timestamp_pl_interval(PG_FUNCTION_ARGS)
 		(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
 		 errmsg("timestamp out of range")));
 
-			tm->tm_mon += span->month;
+			if (pg_add_s32_overflow(tm->tm_mon, span->month, >tm_mon))
+ereport(ERROR,
+		(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+		 errmsg("timestamp out of range")));
+
 			if (tm->tm_mon > MONTHS_PER_YEAR)
 			{
 tm->tm_year += (tm->tm_mon - 1) / MONTHS_PER_YEAR;
@@ -3142,8 +3146,10 @@ timestamp_pl_interval(PG_FUNCTION_ARGS)
 		(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
 		 errmsg("timestamp out of range")));
 		}
-
-		timestamp += span->time;
+		if (pg_add_s64_overflow(timestamp, span->time, ))
+			ereport(ERROR,
+	(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+	 errmsg("timestamp out of range")));
 
 		if (!IS_VALID_TIMESTAMP(timestamp))
 			ereport(ERROR,
diff --git a/src/test/regress/expected/timestamp.out b/src/test/regress/expected/timestamp.out
index cf337da517..fc427baa4a 100644
--- a/src/test/regress/expected/timestamp.out
+++ b/src/test/regress/expected/timestamp.out
@@ -1230,6 +1230,9 @@ SELECT timestamp '294276-12-31 23:59:59' - timestamp '1999-12-23 19:59:04.224193
 
 SELECT timestamp '294276-12-31 23:59:59' - timestamp '1999-12-23 19:59:04.224192' AS overflows;
 ERROR:  interval out of range
+-- test edge-case overflow in timestamp plus interval
+SELECT timestamp '294276-12-31 23:59:59' + interval '9223372036854775807 microseconds';
+ERROR:  timestamp out of range
 -- TO_CHAR()
 SELECT to_char(d1, 'DAY Day day DY Dy dy MONTH Month month RM MON Mon mon')
FROM TIMESTAMP_TBL;
diff --git a/src/test/regress/sql/timestamp.sql b/src/test/regress/sql/timestamp.sql
index 820ef7752a..13baf01d01 100644
--- a/src/test/regress/sql/timestamp.sql
+++ b/src/test/regress/sql/timestamp.sql
@@ -338,6 +338,9 @@ SELECT extract(epoch from '5000-01-01 00:00:00'::timestamp);
 SELECT timestamp '294276-12-31 23:59:59' - timestamp '1999-12-23 19:59:04.224193' AS ok;
 SELECT timestamp '294276-12-31 23:59:59' - timestamp '1999-12-23 19:59:04.224192' AS overflows;
 
+-- test edge-case overflow in timestamp plus interval
+SELECT timestamp '294276-12-31 23:59:59' + interval '9223372036854775807 microseconds';
+
 -- TO_CHAR()
 SELECT to_char(d1, 'DAY Day day DY Dy dy MONTH Month month RM MON Mon mon')
FROM TIMESTAMP_TBL;
-- 
2.34.1



Re: Fix overflow hazard in interval rounding

2024-02-13 Thread Joseph Koshakow
On Tue, Feb 13, 2024 at 1:46 PM Tom Lane  wrote:

>I think you need to use ereturn not ereport here; see other error
>cases in AdjustIntervalForTypmod.

Attached is an updated patch that makes this adjustment.

>(We'd need ereport in back branches, but this problem seems to
>me to probably not be worth back-patching.)

Agreed, this seems like a pretty rare overflow/underflow.

Thanks,
Joe Koshakow
From 470aa9c8898b4e4ebbad97d6e421377b9a3e03cf Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Tue, 13 Feb 2024 13:06:13 -0500
Subject: [PATCH] Fix overflow hazard in interval rounding

This commit fixes overflow/underflow hazards present in the interval
rounding code used to parse intervals.
---
 src/backend/utils/adt/timestamp.c  | 18 ++
 src/test/regress/expected/interval.out |  9 +
 src/test/regress/sql/interval.sql  |  4 
 3 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
index c38f88dba7..97566d7e3b 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -1509,17 +1509,19 @@ AdjustIntervalForTypmod(Interval *interval, int32 typmod,
 
 			if (interval->time >= INT64CONST(0))
 			{
-interval->time = ((interval->time +
-   IntervalOffsets[precision]) /
-  IntervalScales[precision]) *
-	IntervalScales[precision];
+if (pg_add_s64_overflow(interval->time, IntervalOffsets[precision], >time))
+	ereturn(escontext, false,
+			(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+			 errmsg("interval out of range")));
+interval->time = (interval->time / IntervalScales[precision]) * IntervalScales[precision];
 			}
 			else
 			{
-interval->time = -(((-interval->time +
-	 IntervalOffsets[precision]) /
-	IntervalScales[precision]) *
-   IntervalScales[precision]);
+if (pg_sub_s64_overflow(IntervalOffsets[precision], interval->time, >time))
+	ereturn(escontext, false,
+			(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+			 errmsg("interval out of range")));
+interval->time = -((interval->time / IntervalScales[precision]) * IntervalScales[precision]);
 			}
 		}
 	}
diff --git a/src/test/regress/expected/interval.out b/src/test/regress/expected/interval.out
index b79b6fcd4d..055930ccac 100644
--- a/src/test/regress/expected/interval.out
+++ b/src/test/regress/expected/interval.out
@@ -929,6 +929,15 @@ SELECT interval '1 2:03:04.5678' minute to second(2);
  1 day 02:03:04.57
 (1 row)
 
+-- these should fail as out-of-range
+SELECT interval '2562047788:00:54.775807' SECOND(2);
+ERROR:  interval out of range
+LINE 1: SELECT interval '2562047788:00:54.775807' SECOND(2);
+^
+SELECT interval '-2562047788:00:54.775807' SECOND(2);
+ERROR:  interval out of range
+LINE 1: SELECT interval '-2562047788:00:54.775807' SECOND(2);
+^
 -- test casting to restricted precision (bug #14479)
 SELECT f1, f1::INTERVAL DAY TO MINUTE AS "minutes",
   (f1 + INTERVAL '1 month')::INTERVAL MONTH::INTERVAL YEAR AS "years"
diff --git a/src/test/regress/sql/interval.sql b/src/test/regress/sql/interval.sql
index 5566ad0e51..d945a13714 100644
--- a/src/test/regress/sql/interval.sql
+++ b/src/test/regress/sql/interval.sql
@@ -270,6 +270,10 @@ SELECT interval '1 2:03:04.5678' hour to second(2);
 SELECT interval '1 2.3456' minute to second(2);
 SELECT interval '1 2:03.5678' minute to second(2);
 SELECT interval '1 2:03:04.5678' minute to second(2);
+-- these should fail as out-of-range
+SELECT interval '2562047788:00:54.775807' SECOND(2);
+SELECT interval '-2562047788:00:54.775807' SECOND(2);
+
 
 -- test casting to restricted precision (bug #14479)
 SELECT f1, f1::INTERVAL DAY TO MINUTE AS "minutes",
-- 
2.34.1



Fix overflow hazard in interval rounding

2024-02-13 Thread Joseph Koshakow
Hi all,

Attached is a patch that fixes some overflow/underflow hazards that I
discovered in the interval rounding code.

The lines look a bit long, but I did run the following before committing:
`$ curl https://buildfarm.postgresql.org/cgi-bin/typedefs.pl -o
src/tools/pgindent/typedefs.list && src/tools/pgindent/pgindent
src/backend/utils/adt/timestamp.c`

Thanks,
Joe Koshakow
From 389b0d1e3f3cca6fca1e45fdd202b1ca066326c2 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Tue, 13 Feb 2024 13:06:13 -0500
Subject: [PATCH] Fix overflow hazard in interval rounding

This commit fixes overflow/underflow hazards present in the interval
rounding code used to parse intervals.
---
 src/backend/utils/adt/timestamp.c  | 18 ++
 src/test/regress/expected/interval.out |  9 +
 src/test/regress/sql/interval.sql  |  5 +
 3 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
index c38f88dba7..a3b65a755f 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -1509,17 +1509,19 @@ AdjustIntervalForTypmod(Interval *interval, int32 typmod,
 
 			if (interval->time >= INT64CONST(0))
 			{
-interval->time = ((interval->time +
-   IntervalOffsets[precision]) /
-  IntervalScales[precision]) *
-	IntervalScales[precision];
+if (pg_add_s64_overflow(interval->time, IntervalOffsets[precision], >time))
+	ereport(ERROR,
+			errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+			errmsg("interval out of range"));
+interval->time = (interval->time / IntervalScales[precision]) * IntervalScales[precision];
 			}
 			else
 			{
-interval->time = -(((-interval->time +
-	 IntervalOffsets[precision]) /
-	IntervalScales[precision]) *
-   IntervalScales[precision]);
+if (pg_sub_s64_overflow(IntervalOffsets[precision], interval->time, >time))
+	ereport(ERROR,
+			errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+			errmsg("interval out of range"));
+interval->time = -((interval->time / IntervalScales[precision]) * IntervalScales[precision]);
 			}
 		}
 	}
diff --git a/src/test/regress/expected/interval.out b/src/test/regress/expected/interval.out
index b79b6fcd4d..055930ccac 100644
--- a/src/test/regress/expected/interval.out
+++ b/src/test/regress/expected/interval.out
@@ -929,6 +929,15 @@ SELECT interval '1 2:03:04.5678' minute to second(2);
  1 day 02:03:04.57
 (1 row)
 
+-- these should fail as out-of-range
+SELECT interval '2562047788:00:54.775807' SECOND(2);
+ERROR:  interval out of range
+LINE 1: SELECT interval '2562047788:00:54.775807' SECOND(2);
+^
+SELECT interval '-2562047788:00:54.775807' SECOND(2);
+ERROR:  interval out of range
+LINE 1: SELECT interval '-2562047788:00:54.775807' SECOND(2);
+^
 -- test casting to restricted precision (bug #14479)
 SELECT f1, f1::INTERVAL DAY TO MINUTE AS "minutes",
   (f1 + INTERVAL '1 month')::INTERVAL MONTH::INTERVAL YEAR AS "years"
diff --git a/src/test/regress/sql/interval.sql b/src/test/regress/sql/interval.sql
index 5566ad0e51..838da2cc13 100644
--- a/src/test/regress/sql/interval.sql
+++ b/src/test/regress/sql/interval.sql
@@ -270,6 +270,11 @@ SELECT interval '1 2:03:04.5678' hour to second(2);
 SELECT interval '1 2.3456' minute to second(2);
 SELECT interval '1 2:03.5678' minute to second(2);
 SELECT interval '1 2:03:04.5678' minute to second(2);
+-- these should fail as out-of-range
+SELECT interval '2562047788:00:54.775807' SECOND(2);
+SELECT interval '-2562047788:00:54.775807' SECOND(2);
+
+
 
 -- test casting to restricted precision (bug #14479)
 SELECT f1, f1::INTERVAL DAY TO MINUTE AS "minutes",
-- 
2.34.1



Re: Infinite Interval

2023-11-18 Thread Joseph Koshakow
On Thu, Nov 16, 2023 at 2:03 AM Ashutosh Bapat 
wrote:
>
>On Tue, Nov 14, 2023 at 4:39 PM Dean Rasheed 
wrote:
>>
>> On Thu, 9 Nov 2023 at 12:49, Dean Rasheed 
wrote:
>> >
>> > OK, I have pushed 0001 and 0002. Here's the remaining (main) patch.
>> >
>>
>> OK, I have now pushed the main patch.
>
>Thanks a lot Dean.

Yes, thanks Dean!


Re: DecodeInterval fixes

2023-08-27 Thread Joseph Koshakow
On Tue, Aug 22, 2023 at 12:58 PM Jacob Champion 
wrote:
>
> On Mon, Aug 21, 2023 at 10:39 PM Michael Paquier 
wrote:
> > 0002 and 0003 make this stuff fail, but isn't there a risk that this
> > breaks applications that relied on these accidental behaviors?
> > Assuming that this is OK makes me nervous.
>
> I wouldn't argue for backpatching, for sure, but I guess I saw this as
> falling into the same vein as 5b3c5953 and bcc704b52 which were
> already committed.

I agree, I don't think we should try and backport this. As Jacob
highlighted, we've merged similar patches for other date time types.
If applications were relying on this behavior, the upgrade may be a
good time for them to re-evaluate their usage since it's outside the
documented spec and they may not be getting the units they're expecting
from intervals like '1 day month'.

Thanks,
Joe Koshakow


Re: Preventing non-superusers from altering session authorization

2023-07-10 Thread Joseph Koshakow
On Mon, Jul 10, 2023 at 4:32 PM Nathan Bossart 
wrote:
> Okay.  Here's a new patch set in which I believe I've addressed all
> feedback.  I didn't keep the GetAuthenticatedUserIsSuperuser() helper
> function around, as I didn't see a strong need for it.

Thanks, I think the patch set looks good to go!

> And I haven't
> touched the "is_superuser" GUC, either.  I figured we can take up any
> changes for it in the other thread.

Yeah, I think that makes sense.

Thanks,
Joe Koshakow


Re: Preventing non-superusers from altering session authorization

2023-07-09 Thread Joseph Koshakow
On Sun, Jul 9, 2023 at 1:03 PM Joseph Koshakow  wrote:

>> * Only a superuser may set auth ID to something other than himself

> Is "auth ID" the right term here? Maybe something like "Only a
> superuser may set their session authorization/ID to something other
> than their authenticated ID."

>>   But we set the GUC variable
>> * is_superuser to indicate whether the *current* session userid is a
>> * superuser.

> Just a small correction here, I believe the is_superuser GUC is meant
> to indicate whether the current user id is a superuser, not the current
> session user id. We only update is_superuser in SetSessionAuthorization
> because we are also updating the current user id in SetSessionUserId.

I just realized that you moved this comment from
SetSessionAuthorization. I think we should leave the part about setting
the GUC variable is_superuser on top of SetSessionAuthorization since
that's where we actually set the GUC.

Thanks,
Joe Koshakow


Re: DecodeInterval fixes

2023-07-09 Thread Joseph Koshakow
On Sat, Jul 8, 2023 at 5:06 PM Gurjeet Singh  wrote:

> I feel the staleness/deficiencies you mention above are not
> captured in the TODO wiki page. It'd be nice if these were documented,
> so that newcomers to the community can pick up work that they feel is
> an easy lift for them.

I think that's a good idea. I've definitely been confused by this in
previous patches I've submitted.


I've broken up this patch into three logical commits and attached them.
None of the actual code has changed.

Thanks,
Joe Koshakow
From b3fe06934b40489d1b4b157677f1292bc698c7da Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sun, 9 Jul 2023 13:12:16 -0400
Subject: [PATCH 1/3] Remove dead code in DecodeInterval

This commit removes dead code for handling unit type RESERVE. There
used to be a unit called "invalid" that was of type RESERVE. At some
point that unit was removed and there were no more units of type
RESERVE. Therefore, the code for RESERVE unit handling is unreachable.
---
 src/backend/utils/adt/datetime.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index 5d8d583ddc..2a5dddc43f 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -3582,11 +3582,6 @@ DecodeInterval(char **field, int *ftype, int nf, int range,
 		type = uval;
 		break;
 
-	case RESERV:
-		tmask = (DTK_DATE_M | DTK_TIME_M);
-		*dtype = uval;
-		break;
-
 	default:
 		return DTERR_BAD_FORMAT;
 }
-- 
2.34.1

From 6271c5fcca30de0982b4b6073b49c1cea6c7391b Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sun, 9 Jul 2023 13:17:08 -0400
Subject: [PATCH 2/3] Fix Interval 'ago' parsing

This commit Restrict the unit "ago" to only appear at the end of the
interval. According to the docs [0], this is the only valid place to
put it, but we allowed it multiple times at any point in the input.

[0] https://www.postgresql.org/docs/15/datatype-datetime.html#DATATYPE-INTERVAL-INPUT
---
 src/backend/utils/adt/datetime.c   | 6 ++
 src/test/regress/expected/interval.out | 9 +
 src/test/regress/sql/interval.sql  | 4 
 3 files changed, 19 insertions(+)

diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index 2a5dddc43f..9d09381328 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -3578,6 +3578,12 @@ DecodeInterval(char **field, int *ftype, int nf, int range,
 		break;
 
 	case AGO:
+		/*
+		 * 'ago' is only allowed to appear at the end of the
+		 * interval.
+		 */
+		if (i != nf - 1)
+			return DTERR_BAD_FORMAT;
 		is_before = true;
 		type = uval;
 		break;
diff --git a/src/test/regress/expected/interval.out b/src/test/regress/expected/interval.out
index 28b71d9681..42062f947f 100644
--- a/src/test/regress/expected/interval.out
+++ b/src/test/regress/expected/interval.out
@@ -1787,3 +1787,12 @@ SELECT extract(epoch from interval '10 days');
  864000.00
 (1 row)
 
+-- test that ago can only appear once at the end of the interval.
+SELECT INTERVAL '42 days 2 seconds ago ago';
+ERROR:  invalid input syntax for type interval: "42 days 2 seconds ago ago"
+LINE 1: SELECT INTERVAL '42 days 2 seconds ago ago';
+^
+SELECT INTERVAL '2 minutes ago 5 days';
+ERROR:  invalid input syntax for type interval: "2 minutes ago 5 days"
+LINE 1: SELECT INTERVAL '2 minutes ago 5 days';
+^
diff --git a/src/test/regress/sql/interval.sql b/src/test/regress/sql/interval.sql
index 56feda1a3d..8fd2e7f41e 100644
--- a/src/test/regress/sql/interval.sql
+++ b/src/test/regress/sql/interval.sql
@@ -582,3 +582,7 @@ SELECT f1,
 
 -- internal overflow test case
 SELECT extract(epoch from interval '10 days');
+
+-- test that ago can only appear once at the end of the interval.
+SELECT INTERVAL '42 days 2 seconds ago ago';
+SELECT INTERVAL '2 minutes ago 5 days';
-- 
2.34.1

From 2ffb81e95031b43955fdba784356fc54659775e2 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sun, 9 Jul 2023 13:21:23 -0400
Subject: [PATCH 3/3] Fix Interval unit parsing

This commit will error when the user has multiple consecutive units or
a unit without an accompanying value.
---
 src/backend/utils/adt/datetime.c   | 12 
 src/test/regress/expected/interval.out |  9 +
 src/test/regress/sql/interval.sql  |  4 
 3 files changed, 25 insertions(+)

diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index 9d09381328..edf22f458e 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -3278,6 +3278,7 @@ DecodeInterval(char **field, int *ftype, int nf, int range,
 {
 	bool		force_negative = false;
 	bool		is_before = false;
+	bool		parsing_unit_val = false;
 	char	   *cp;
 	int			fmask = 0,
 tmask,
@@ -3336,6 +3337,

Re: Preventing non-superusers from altering session authorization

2023-07-09 Thread Joseph Koshakow
On Sun, Jul 9, 2023 at 12:47 AM Nathan Bossart 
wrote:

> I think we should split this into two patches: one to move the permission
> check to check_session_authorization() and another for the behavior
change.
> I've attached an attempt at the first one (that borrows heavily from your
> latest patch).  AFAICT the only reason that the permission check lives in
> SetSessionAuthorization() is because AuthenticatedUserIsSuperuser is
static
> to miscinit.c and doesn't have an accessor function.  I added one, but it
> would probably just be removed by the following patch.  WDYT?

I think that's a good idea. We could even keep around the accessor
function as a good place to bundle the calls to
Assert(OidIsValid(AuthenticatedUserId))
and
superuser_arg(AuthenticatedUserId)

> * Only a superuser may set auth ID to something other than himself

Is "auth ID" the right term here? Maybe something like "Only a
superuser may set their session authorization/ID to something other
than their authenticated ID."

>   But we set the GUC variable
> * is_superuser to indicate whether the *current* session userid is a
> * superuser.

Just a small correction here, I believe the is_superuser GUC is meant
to indicate whether the current user id is a superuser, not the current
session user id. We only update is_superuser in SetSessionAuthorization
because we are also updating the current user id in SetSessionUserId.
For example,

test=# CREATE ROLE r1 SUPERUSER;
CREATE ROLE
test=# CREATE ROLE r2;
CREATE ROLE
test=# SET SESSION AUTHORIZATION r1;
SET
test=# SET ROLE r2;
SET
test=> SELECT session_user, current_user;
 session_user | current_user
--+--
 r1   | r2
(1 row)

test=> SHOW is_superuser;
 is_superuser
--
 off
(1 row)

Which has also made me realize that the comment on is_superuser in
guc_tables.c is incorrect:

> /* Not for general use --- used by SET SESSION AUTHORIZATION */

Additionally the C variable name for is_superuser is fairly misleading:

> session_auth_is_superuser

The documentation for this GUC in show.sgml is correct:

> True if the current role has superuser privileges.

As an aside, I'm starting to think we should consider removing this
GUC. It sometimes reports an incorrect value [0], and potentially is
not used internally for anything.

I've rebased my changes over your patch and attached them both.

[0]
https://www.postgresql.org/message-id/CAAvxfHcxH-hLndty6CRThGXL1hLsgCn%2BE3QuG_4Qi7GxrHmgKg%40mail.gmail.com
From 2e1689b5fe384d675043beb9df8eff49a0ff436e Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sun, 9 Jul 2023 12:58:41 -0400
Subject: [PATCH 2/2] Prevent non-superusers from altering session auth

Previously, if a user connected with as a role that had the superuser
attribute, then they could always execute a SET SESSION AUTHORIZATION
statement for the duration of their session. Even if the role was
altered to set superuser to false, the user was still allowed to
execute SET SESSION AUTHORIZATION. This allowed them to set their
session role to some other superuser and effectively regain the
superuser privileges. They could even reset their own superuser
attribute to true.

This commit alters the privilege checks for SET SESSION AUTHORIZATION
such that a user can only execute it if the role they connected with is
currently a superuser. This prevents users from regaining superuser
privileges after it has been revoked.
---
 doc/src/sgml/ref/set_session_auth.sgml |  2 +-
 src/backend/utils/init/miscinit.c  | 19 +--
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/doc/src/sgml/ref/set_session_auth.sgml b/doc/src/sgml/ref/set_session_auth.sgml
index f8fcafc194..94adab2468 100644
--- a/doc/src/sgml/ref/set_session_auth.sgml
+++ b/doc/src/sgml/ref/set_session_auth.sgml
@@ -51,7 +51,7 @@ RESET SESSION AUTHORIZATION
 
   
The session user identifier can be changed only if the initial session
-   user (the authenticated user) had the
+   user (the authenticated user) has the
superuser privilege.  Otherwise, the command is accepted only if it
specifies the authenticated user name.
   
diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c
index f5548a0f47..1aa393f9fd 100644
--- a/src/backend/utils/init/miscinit.c
+++ b/src/backend/utils/init/miscinit.c
@@ -467,7 +467,7 @@ ChangeToDataDir(void)
  * AuthenticatedUserId is determined at connection start and never changes.
  *
  * SessionUserId is initially the same as AuthenticatedUserId, but can be
- * changed by SET SESSION AUTHORIZATION (if AuthenticatedUserIsSuperuser).
+ * changed by SET SESSION AUTHORIZATION (if AuthenticatedUserId is a superuser).
  * This is the ID reported by the SESSION_USER SQL function.
  *
  * OuterUserId is the current user ID in effect at the "outer level"

Re: Preventing non-superusers from altering session authorization

2023-07-08 Thread Joseph Koshakow
On Sat, Jul 8, 2023 at 6:09 PM Nathan Bossart 
wrote:

>> I think the issue here is that if a session loses the ability to set
>> their session authorization in the middle of a transaction, then
>> rolling back the transaction may fail and cause the server to panic.
>> That's probably what the deleted comment mean when it said:
>>
>>> * It's OK because the check does not require catalog access and can't
>>> * fail during an end-of-transaction GUC reversion
>
> Yeah.  IIUC the ERROR longjmps to a block that calls AbortTransaction(),
> which ERRORs again when resetting the session authorization, which causes
> us to call AbortTransaction() again, etc., etc.

Everything seems to work fine if the privilege check is moved to
check_session_authorization. Which is maybe what the comment meant
instead of assign_session_authorization.

I've attached a patch with this change.

Thanks,
Joe Koshakow
From cb0198524d96068079e301a6785301440f3be3aa Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Thu, 15 Jun 2023 14:53:11 -0400
Subject: [PATCH] Prevent non-superusers from altering session auth

Previously, if a user connected with as a role that had the superuser
attribute, then they could always execute a SET SESSION AUTHORIZATION
statement for the duration of their session. Even if the role was
altered to set superuser to false, the user was still allowed to
execute SET SESSION AUTHORIZATION. This allowed them to set their
session role to some other superuser and effectively regain the
superuser privileges. They could even reset their own superuser
attribute to true.

This commit alters the privilege checks for SET SESSION AUTHORIZATION
such that a user can only execute it if the role they connected with is
currently a superuser. This prevents users from regaining superuser
privileges after it has been revoked.
---
 doc/src/sgml/ref/set_session_auth.sgml |  2 +-
 src/backend/commands/variable.c| 13 +++-
 src/backend/utils/init/miscinit.c  | 28 ++
 3 files changed, 19 insertions(+), 24 deletions(-)

diff --git a/doc/src/sgml/ref/set_session_auth.sgml b/doc/src/sgml/ref/set_session_auth.sgml
index f8fcafc194..94adab2468 100644
--- a/doc/src/sgml/ref/set_session_auth.sgml
+++ b/doc/src/sgml/ref/set_session_auth.sgml
@@ -51,7 +51,7 @@ RESET SESSION AUTHORIZATION
 
   
The session user identifier can be changed only if the initial session
-   user (the authenticated user) had the
+   user (the authenticated user) has the
superuser privilege.  Otherwise, the command is accepted only if it
specifies the authenticated user name.
   
diff --git a/src/backend/commands/variable.c b/src/backend/commands/variable.c
index f0f2e07655..e2f47eceb7 100644
--- a/src/backend/commands/variable.c
+++ b/src/backend/commands/variable.c
@@ -803,7 +803,8 @@ check_session_authorization(char **newval, void **extra, GucSource source)
 {
 	HeapTuple	roleTup;
 	Form_pg_authid roleform;
-	Oid			roleid;
+	Oid			roleid,
+authenticated_user_id;
 	bool		is_superuser;
 	role_auth_extra *myextra;
 
@@ -846,6 +847,16 @@ check_session_authorization(char **newval, void **extra, GucSource source)
 
 	ReleaseSysCache(roleTup);
 
+	authenticated_user_id = GetAuthenticatedUserId();
+	/* Must have authenticated already, else can't make permission check */
+	Assert(OidIsValid(authenticated_user_id));
+
+	if (roleid != authenticated_user_id &&
+		!superuser_arg(authenticated_user_id))
+		ereport(ERROR,
+(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied to set session authorization")));
+
 	/* Set up "extra" struct for assign_session_authorization to use */
 	myextra = (role_auth_extra *) guc_malloc(LOG, sizeof(role_auth_extra));
 	if (!myextra)
diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c
index a604432126..04e019df20 100644
--- a/src/backend/utils/init/miscinit.c
+++ b/src/backend/utils/init/miscinit.c
@@ -467,7 +467,7 @@ ChangeToDataDir(void)
  * AuthenticatedUserId is determined at connection start and never changes.
  *
  * SessionUserId is initially the same as AuthenticatedUserId, but can be
- * changed by SET SESSION AUTHORIZATION (if AuthenticatedUserIsSuperuser).
+ * changed by SET SESSION AUTHORIZATION (if AuthenticatedUserId is a superuser).
  * This is the ID reported by the SESSION_USER SQL function.
  *
  * OuterUserId is the current user ID in effect at the "outer level" (outside
@@ -492,8 +492,6 @@ static Oid	OuterUserId = InvalidOid;
 static Oid	CurrentUserId = InvalidOid;
 static const char *SystemUser = NULL;
 
-/* We also have to remember the superuser state of some of these levels */
-static bool AuthenticatedUserIsSuperuser = false;
 static bool SessionUserIsSuperuser = false;
 
 static int	SecurityRestrictionContext = 0;
@@ -731,6 +729,7 @@ InitializeSessionUserId(const char *rolename, Oid roleid)
 	HeapTuple	rol

Re: Preventing non-superusers from altering session authorization

2023-07-08 Thread Joseph Koshakow
I've discovered an issue with this approach. Let's say you have some
session open that is connected as a superuser and you run the following
commands:

  - CREATE ROLE r1 LOGIN SUPERUSER;
  - CREATE ROLE r2;
  - CREATE ROLE r3;

Then you open another session connected with user r1 and run the
following commands:

  - SET SESSION AUTHROIZATION r2;
  - BEGIN;
  - SET SESSION AUTHORIZATION r3;

Then in your original session run:

  - ALTER ROLE r1 NOSUPERUSER;

Finally in the r1 session run:

  - CREATE TABLE t ();

Postgres will then panic with the following logs:

2023-07-08 16:33:27.787 EDT [157141] ERROR:  permission denied for schema
public at character 14
2023-07-08 16:33:27.787 EDT [157141] STATEMENT:  CREATE TABLE t ();
2023-07-08 16:33:27.787 EDT [157141] ERROR:  permission denied to set
session authorization
2023-07-08 16:33:27.787 EDT [157141] WARNING:  AbortTransaction while in
ABORT state
2023-07-08 16:33:27.787 EDT [157141] ERROR:  permission denied to set
session authorization
2023-07-08 16:33:27.787 EDT [157141] WARNING:  AbortTransaction while in
ABORT state
2023-07-08 16:33:27.787 EDT [157141] ERROR:  permission denied to set
session authorization
2023-07-08 16:33:27.787 EDT [157141] WARNING:  AbortTransaction while in
ABORT state
2023-07-08 16:33:27.787 EDT [157141] ERROR:  permission denied to set
session authorization
2023-07-08 16:33:27.787 EDT [157141] PANIC:  ERRORDATA_STACK_SIZE exceeded
2023-07-08 16:33:27.882 EDT [156878] LOG:  server process (PID 157141) was
terminated by signal 6: Aborted
2023-07-08 16:33:27.882 EDT [156878] DETAIL:  Failed process was running:
CREATE TABLE t ();

I think the issue here is that if a session loses the ability to set
their session authorization in the middle of a transaction, then
rolling back the transaction may fail and cause the server to panic.
That's probably what the deleted comment mean when it said:

> * It's OK because the check does not require catalog access and can't
> * fail during an end-of-transaction GUC reversion

Interestingly, if the r1 session manually types `ROLLBACK` instead of
executing a command that fails, then everything is fine and there's no
panic. I'm not familiar enough with transaction handling to know why
there would be a difference there.

Thanks,
Joe Koshakow


Re: Preventing non-superusers from altering session authorization

2023-07-08 Thread Joseph Koshakow
Nathan Bossart  wrote:

> I see that RESET SESSION AUTHORIZATION
> with a concurrently dropped role will FATAL with your patch but succeed
> without it, which could be part of the reason.

I didn't even realize it, but the change to superuser_arg() in v2 fixed
this issue. The catalog lookup is only done if
userid != AuthenticatedUserId. So RESET SESSION AUTHORIZATION with a
concurrently dropped role will no longer FATAL.

Thanks,
Joe

On Sat, Jul 1, 2023 at 11:33 AM Joseph Koshakow  wrote:

> >> That might be a good change? If the original authenticated role ID no
> >> longer exists then we may want to return an error when trying to set
> >> your session authorization to that role.
> >
> > I was curious why we don't block DROP ROLE if there are active sessions
> for
> > the role or terminate any such sessions as part of the command, and I
> found
> > this discussion from 2016:
> >
> >https://postgr.es/m/flat/56E87CD8.60007%40ohmu.fi
>
> Ah, that makes sense that we don't prevent DROP ROLE on active roles.
> Though, we do error when you try and set your role or session
> authorization to a dropped role. So erroring on RESET SESSION
> AUTHORIZATION when the original role is dropped makes it consistent
> with SET SESSION AUTHORIZATION TO . On the other
> hand it makes it inconsistent with RESET ROLE, which does not error on
> a dropped role.
>
> - Joe Koshakow
>
> On Fri, Jun 23, 2023 at 1:54 PM Nathan Bossart 
> wrote:
>
>> On Thu, Jun 22, 2023 at 06:39:45PM -0400, Joseph Koshakow wrote:
>> > On Wed, Jun 21, 2023 at 11:48 PM Nathan Bossart <
>> nathandboss...@gmail.com>
>> > wrote:
>> >> I see that RESET SESSION AUTHORIZATION
>> >> with a concurrently dropped role will FATAL with your patch but succeed
>> >> without it, which could be part of the reason.
>> >
>> > That might be a good change? If the original authenticated role ID no
>> > longer exists then we may want to return an error when trying to set
>> > your session authorization to that role.
>>
>> I was curious why we don't block DROP ROLE if there are active sessions
>> for
>> the role or terminate any such sessions as part of the command, and I
>> found
>> this discussion from 2016:
>>
>> https://postgr.es/m/flat/56E87CD8.60007%40ohmu.fi
>>
>> --
>> Nathan Bossart
>> Amazon Web Services: https://aws.amazon.com
>>
>


Re: DecodeInterval fixes

2023-07-08 Thread Joseph Koshakow
Jacob Champion  writes:
> Hi Joe, here's a partial review:

Thanks so much for the review!

> I'm new to this code, but I agree that the use of `type` and the
> lookahead are not particularly obvious/intuitive. At the very least,
> they'd need some more explanation in the code. Your boolean flag idea
> sounds reasonable, though.

I've updated the patch with the boolean flag idea. I think it's a
bit cleaner and more readable.

>> There is one more problem I noticed, but didn't fix. We allow multiple
>> "@" to be sprinkled anywhere in the input, even though the docs [0]
>> only allow it to appear at the beginning of the input.
>
> (No particular opinion on this.)

I looked into this a bit. The reason this works is because the date
time lexer filters out all punctuation. That's what allows us to parse
things like `SELECT date 'January 8, 1999';`. It's probably not worth
trying to be smarter about what punctuation we allow where, at least
for now. Maybe in the future we can exclude "@" from the punctuation
that get's filtered out.

> It looks like this patch needs a rebase for the CI, too, but there are
> no conflicts.

The attached patch is rebased against master.

Thanks,
Joe Koshakow
From eee98dd65c3556528803b6ee2cab10e9ece8d871 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sun, 9 Apr 2023 20:37:27 -0400
Subject: [PATCH] Fix interval decode handling of invalid intervals

This patch does three things in the DecodeInterval function:

1) Removes dead code for handling unit type RESERVE. There used to be
a unit called "invalid" that was of type RESERVE. At some point that
unit was removed and there were no more units of type RESERVE.
Therefore, the code for RESERVE unit handling is unreachable.

2) Restrict the unit "ago" to only appear at the end of the
interval. According to the docs [0], this is the only valid place to
put it, but we allowed it multiple times at any point in the input.

3) Error when the user has multiple consecutive units or a unit without
an accompanying value.

[0] https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-INTERVAL-INPUT
---
 src/backend/utils/adt/datetime.c   | 25 +++--
 src/test/regress/expected/interval.out | 18 ++
 src/test/regress/sql/interval.sql  |  7 +++
 3 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index 5d8d583ddc..b930a67007 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -3278,6 +3278,7 @@ DecodeInterval(char **field, int *ftype, int nf, int range,
 {
 	bool		force_negative = false;
 	bool		is_before = false;
+	bool		parsing_unit_val = false;
 	char	   *cp;
 	int			fmask = 0,
 tmask,
@@ -3336,6 +3337,7 @@ DecodeInterval(char **field, int *ftype, int nf, int range,
 	itm_in->tm_usec > 0)
 	itm_in->tm_usec = -itm_in->tm_usec;
 type = DTK_DAY;
+parsing_unit_val = false;
 break;
 
 			case DTK_TZ:
@@ -3373,6 +3375,7 @@ DecodeInterval(char **field, int *ftype, int nf, int range,
 	 * are reading right to left.
 	 */
 	type = DTK_DAY;
+	parsing_unit_val = false;
 	break;
 }
 
@@ -3562,10 +3565,14 @@ DecodeInterval(char **field, int *ftype, int nf, int range,
 	default:
 		return DTERR_BAD_FORMAT;
 }
+parsing_unit_val = false;
 break;
 
 			case DTK_STRING:
 			case DTK_SPECIAL:
+/* reject consecutive unhandled units */
+if (parsing_unit_val)
+	return DTERR_BAD_FORMAT;
 type = DecodeUnits(i, field[i], );
 if (type == IGNORE_DTF)
 	continue;
@@ -3575,16 +3582,18 @@ DecodeInterval(char **field, int *ftype, int nf, int range,
 {
 	case UNITS:
 		type = uval;
+		parsing_unit_val = true;
 		break;
 
 	case AGO:
-		is_before = true;
-		type = uval;
-		break;
 
-	case RESERV:
-		tmask = (DTK_DATE_M | DTK_TIME_M);
-		*dtype = uval;
+		/*
+		 * 'ago' is only allowed to appear at the end of the
+		 * interval.
+		 */
+		if (i != nf - 1)
+			return DTERR_BAD_FORMAT;
+		is_before = true;
 		break;
 
 	default:
@@ -3605,6 +3614,10 @@ DecodeInterval(char **field, int *ftype, int nf, int range,
 	if (fmask == 0)
 		return DTERR_BAD_FORMAT;
 
+	/* reject if unit appeared and was never handled */
+	if (parsing_unit_val)
+		return DTERR_BAD_FORMAT;
+
 	/* finally, AGO negates everything */
 	if (is_before)
 	{
diff --git a/src/test/regress/expected/interval.out b/src/test/regress/expected/interval.out
index 28b71d9681..7aba799351 100644
--- a/src/test/regress/expected/interval.out
+++ b/src/test/regress/expected/interval.out
@@ -1787,3 +1787,21 @@ SELECT extract(epoch from interval '10 days');
  864000.00
 (1 row)
 
+-- test that ago can only appear once at the end of the interval.
+SELECT INTERVAL '42

Re: Preventing non-superusers from altering session authorization

2023-07-01 Thread Joseph Koshakow
>> That might be a good change? If the original authenticated role ID no
>> longer exists then we may want to return an error when trying to set
>> your session authorization to that role.
>
> I was curious why we don't block DROP ROLE if there are active sessions
for
> the role or terminate any such sessions as part of the command, and I
found
> this discussion from 2016:
>
>https://postgr.es/m/flat/56E87CD8.60007%40ohmu.fi

Ah, that makes sense that we don't prevent DROP ROLE on active roles.
Though, we do error when you try and set your role or session
authorization to a dropped role. So erroring on RESET SESSION
AUTHORIZATION when the original role is dropped makes it consistent
with SET SESSION AUTHORIZATION TO . On the other
hand it makes it inconsistent with RESET ROLE, which does not error on
a dropped role.

- Joe Koshakow

On Fri, Jun 23, 2023 at 1:54 PM Nathan Bossart 
wrote:

> On Thu, Jun 22, 2023 at 06:39:45PM -0400, Joseph Koshakow wrote:
> > On Wed, Jun 21, 2023 at 11:48 PM Nathan Bossart <
> nathandboss...@gmail.com>
> > wrote:
> >> I see that RESET SESSION AUTHORIZATION
> >> with a concurrently dropped role will FATAL with your patch but succeed
> >> without it, which could be part of the reason.
> >
> > That might be a good change? If the original authenticated role ID no
> > longer exists then we may want to return an error when trying to set
> > your session authorization to that role.
>
> I was curious why we don't block DROP ROLE if there are active sessions for
> the role or terminate any such sessions as part of the command, and I found
> this discussion from 2016:
>
> https://postgr.es/m/flat/56E87CD8.60007%40ohmu.fi
>
> --
> Nathan Bossart
> Amazon Web Services: https://aws.amazon.com
>


Re: Preventing non-superusers from altering session authorization

2023-06-22 Thread Joseph Koshakow
On Wed, Jun 21, 2023 at 11:48 PM Nathan Bossart 
wrote:
>
>On Wed, Jun 21, 2023 at 04:28:43PM -0400, Joseph Koshakow wrote:
>> + roleTup = SearchSysCache1(AUTHOID,
ObjectIdGetDatum(AuthenticatedUserId));
>> + if (!HeapTupleIsValid(roleTup))
>> + ereport(FATAL,
>> +
(errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
>> + errmsg("role with OID
%u does not exist", AuthenticatedUserId)));
>> + rform = (Form_pg_authid) GETSTRUCT(roleTup);
>
>I think "superuser_arg(AuthenticatedUserId)" would work here.

Yep, that worked. I've attached a patch with this change.

> I see that RESET SESSION AUTHORIZATION
> with a concurrently dropped role will FATAL with your patch but succeed
> without it, which could be part of the reason.

That might be a good change? If the original authenticated role ID no
longer exists then we may want to return an error when trying to set
your session authorization to that role.

Thanks,
Joe Koshakow
From 2b2817e3ea4f1541a781216afb7415435ca362a0 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Thu, 15 Jun 2023 14:53:11 -0400
Subject: [PATCH] Prevent non-superusers from altering session auth

Previously, if a user connected with as a role that had the superuser
attribute, then they could always execute a SET SESSION AUTHORIZATION
statement for the duration of their session. Even if the role was
altered to set superuser to false, the user was still allowed to
execute SET SESSION AUTHORIZATION. This allowed them to set their
session role to some other superuser and effectively regain the
superuser privileges. They could even reset their own superuser
attribute to true.

This commit alters the privilege checks for SET SESSION AUTHORIZATION
such that a user can only execute it if the role they connected with is
currently a superuser. This prevents users from regaining superuser
privileges after it has been revoked.
---
 doc/src/sgml/ref/set_session_auth.sgml |  2 +-
 src/backend/utils/init/miscinit.c  | 21 +++--
 2 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/doc/src/sgml/ref/set_session_auth.sgml b/doc/src/sgml/ref/set_session_auth.sgml
index f8fcafc194..94adab2468 100644
--- a/doc/src/sgml/ref/set_session_auth.sgml
+++ b/doc/src/sgml/ref/set_session_auth.sgml
@@ -51,7 +51,7 @@ RESET SESSION AUTHORIZATION
 
   
The session user identifier can be changed only if the initial session
-   user (the authenticated user) had the
+   user (the authenticated user) has the
superuser privilege.  Otherwise, the command is accepted only if it
specifies the authenticated user name.
   
diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c
index a604432126..4cef655703 100644
--- a/src/backend/utils/init/miscinit.c
+++ b/src/backend/utils/init/miscinit.c
@@ -467,7 +467,7 @@ ChangeToDataDir(void)
  * AuthenticatedUserId is determined at connection start and never changes.
  *
  * SessionUserId is initially the same as AuthenticatedUserId, but can be
- * changed by SET SESSION AUTHORIZATION (if AuthenticatedUserIsSuperuser).
+ * changed by SET SESSION AUTHORIZATION.
  * This is the ID reported by the SESSION_USER SQL function.
  *
  * OuterUserId is the current user ID in effect at the "outer level" (outside
@@ -492,8 +492,6 @@ static Oid	OuterUserId = InvalidOid;
 static Oid	CurrentUserId = InvalidOid;
 static const char *SystemUser = NULL;
 
-/* We also have to remember the superuser state of some of these levels */
-static bool AuthenticatedUserIsSuperuser = false;
 static bool SessionUserIsSuperuser = false;
 
 static int	SecurityRestrictionContext = 0;
@@ -731,6 +729,7 @@ InitializeSessionUserId(const char *rolename, Oid roleid)
 	HeapTuple	roleTup;
 	Form_pg_authid rform;
 	char	   *rname;
+	bool	   is_superuser;
 
 	/*
 	 * Don't do scans if we're bootstrapping, none of the system catalogs
@@ -770,10 +769,10 @@ InitializeSessionUserId(const char *rolename, Oid roleid)
 	rname = NameStr(rform->rolname);
 
 	AuthenticatedUserId = roleid;
-	AuthenticatedUserIsSuperuser = rform->rolsuper;
+	is_superuser = rform->rolsuper;
 
 	/* This sets OuterUserId/CurrentUserId too */
-	SetSessionUserId(roleid, AuthenticatedUserIsSuperuser);
+	SetSessionUserId(roleid, is_superuser);
 
 	/* Also mark our PGPROC entry with the authenticated user id */
 	/* (We assume this is an atomic store so no lock is needed) */
@@ -806,7 +805,7 @@ InitializeSessionUserId(const char *rolename, Oid roleid)
 		 * just document that the connection limit is approximate.
 		 */
 		if (rform->rolconnlimit >= 0 &&
-			!AuthenticatedUserIsSuperuser &&
+			!is_superuser &&
 			CountUserBackends(roleid) > rform->rolconnlimit)
 			ereport(FATAL,
 	(errcode(ERRCODE_TOO_MANY_CONNECTIONS),
@@ -818,7 +817,7 @@ Initializ

Preventing non-superusers from altering session authorization

2023-06-21 Thread Joseph Koshakow
Hi all,

I briefly mentioned this issue in another mailing thread [0].

Currently, a user is allowed to execute SET SESSION AUTHORIZATION [1]
if the role they connected to PostgreSQL with was a superuser at the
time of connection. Even if the role is later altered to no longer be a
superuser, the session can still execute SET SESSION AUTHORIZATION, as
long as the session isn't disconnected. As a consequence, if that role
is altered to no longer be a superuser, then the user can use SET
SESSION AUTHORIZATION to switch to another role that is a superuser and
regain superuser privileges. They can even re-grant themselves the
superuser attribute.

It is possible that the user had already run SET SESSION AUTHORIZATION
to set their session to a superuser before their connecting role lost
the superuser attribute. In this case there's not much we can do.

Also, from looking at the code and documentation, it looks like SET
SESSION AUTHORIZATION works this way intentionally. However, I'm unable
to figure out why we'd want it to work this way.

I've attached a patch that would fix this issue by checking the catalog
to see if the connecting role is currently a superuser every time SET
SESSION AUTHORIZATION is run. However, according to the comment I
deleted there's something invalid about reading the catalog from that
function, though I wasn't able to understand it fully.

One downside is that if a user switches their session authorization to
some role, then loses the superuser attribute on their connecting role,
they may be stuck in a that role with no way to reset their session
authorization without disconnecting and reconnecting.

Thanks,
Joe Koshakow

[0]
https://www.postgresql.org/message-id/CAAvxfHco7iGw4NarymhfLWN6PjzYRrbYFt2BnSFeSD5sFzqEJQ%40mail.gmail.com
[1] https://www.postgresql.org/docs/15/sql-set-session-authorization.html
From b5f7d42ea325b2be46b7c93e5404792046f1befc Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Thu, 15 Jun 2023 14:53:11 -0400
Subject: [PATCH] Prevent non-superusers from altering session auth

Previously, if a user connected with as a role that had the superuser
attribute, then they could always execute a SET SESSION AUTHORIZATION
statement for the duration of their session. Even if the role was
altered to set superuser to false, the user was still allowed to
execute SET SESSION AUTHORIZATION. This allowed them to set their
session role to some other superuser and effectively regain the
superuser privileges. They could even reset their own superuser
attribute to true.

This commit alters the privilege checks for SET SESSION AUTHORIZATION
such that a user can only execute it if the role they connected with is
currently a superuser. This prevents users from regaining superuser
privileges after it has been revoked.
---
 doc/src/sgml/ref/set_session_auth.sgml |  2 +-
 src/backend/utils/init/miscinit.c  | 33 +++---
 2 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/doc/src/sgml/ref/set_session_auth.sgml b/doc/src/sgml/ref/set_session_auth.sgml
index f8fcafc194..94adab2468 100644
--- a/doc/src/sgml/ref/set_session_auth.sgml
+++ b/doc/src/sgml/ref/set_session_auth.sgml
@@ -51,7 +51,7 @@ RESET SESSION AUTHORIZATION
 
   
The session user identifier can be changed only if the initial session
-   user (the authenticated user) had the
+   user (the authenticated user) has the
superuser privilege.  Otherwise, the command is accepted only if it
specifies the authenticated user name.
   
diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c
index a604432126..459af11691 100644
--- a/src/backend/utils/init/miscinit.c
+++ b/src/backend/utils/init/miscinit.c
@@ -467,7 +467,7 @@ ChangeToDataDir(void)
  * AuthenticatedUserId is determined at connection start and never changes.
  *
  * SessionUserId is initially the same as AuthenticatedUserId, but can be
- * changed by SET SESSION AUTHORIZATION (if AuthenticatedUserIsSuperuser).
+ * changed by SET SESSION AUTHORIZATION.
  * This is the ID reported by the SESSION_USER SQL function.
  *
  * OuterUserId is the current user ID in effect at the "outer level" (outside
@@ -492,8 +492,6 @@ static Oid	OuterUserId = InvalidOid;
 static Oid	CurrentUserId = InvalidOid;
 static const char *SystemUser = NULL;
 
-/* We also have to remember the superuser state of some of these levels */
-static bool AuthenticatedUserIsSuperuser = false;
 static bool SessionUserIsSuperuser = false;
 
 static int	SecurityRestrictionContext = 0;
@@ -731,6 +729,7 @@ InitializeSessionUserId(const char *rolename, Oid roleid)
 	HeapTuple	roleTup;
 	Form_pg_authid rform;
 	char	   *rname;
+	bool	   is_superuser;
 
 	/*
 	 * Don't do scans if we're bootstrapping, none of the system catalogs
@@ -770,10 +769,10 @@ InitializeSessionUserId(const char *rolename, Oid roleid)
 	rname = NameStr(rform->rolname);
 
 	AuthenticatedUserId = roleid;
-	AuthenticatedUserIsSuperuser = rform->rolsuper;
+	is_s

Re: is_superuser is not documented

2023-06-07 Thread Joseph Koshakow
On Wed, Jun 7, 2023 at 11:36 AM Fujii Masao 
wrote:
>
>
>
>On 2023/06/07 23:15, Joseph Koshakow wrote:
>> I think I may have discovered a reason why is_superuser is
>> intentionally undocumented. is_superuser is not updated if a role's
>> superuser attribute is changed by another session. Therefore,
>> is_superuser may show you an incorrect stale value.
>>
>> Perhaps this can be fixed with a show_hook? Otherwise it's probably
>> best not to document a GUC that can show an incorrect value.
>
>Or we can correct the description of is_superuser, for example,
>"True if the current role had superuser privileges when it connected to
>the database. Note that this parameter doesn't always indicate
>the current superuser status of the role."?

That description isn't exactly accurate either, since is_superuser is
re-evaluated whenever the role GUC is changed (i.e. through SET ROLE
or RESET ROLE), and potentially at other times I'm not aware of. I'm
curious to hear what others think though, since it seems like a bit of
a footgun. It will be up to the user to understand when `is_superuser`
is accurate or inaccurate. In most cases it will be impossible for
them to know unless they get the same information elsewhere, like
through pg_roles.


As an aside I think there's a similar issue with the
AuthenticatedUserIsSuperuser static variable. That variable is
initialized in miscinit.c in the InitializeSessionUserId function
based on whether the session role is a superuser when connecting. Then
as far as I can tell the variable is never updated.

When executing a SET SESSION AUTHORIZATION command, we check if
AuthenticatedUserIsSuperuser is true to determine if the session is
allowed to execute the command. That check happens in miscinit.c in the
SetSessionAuthorization function.

This means that if some role, r, connects as a superuser and then later
some other role removes r's superuser attribute, r can always set their
session authorization to a different role with the superuser attribute
to regain superuser privileges. So as long as r maintains an active
connection, they can never truly lose their superuser privileges.

- Joe Koshakow


Re: is_superuser is not documented

2023-06-07 Thread Joseph Koshakow
I think I may have discovered a reason why is_superuser is
intentionally undocumented. is_superuser is not updated if a role's
superuser attribute is changed by another session. Therefore,
is_superuser may show you an incorrect stale value.

Perhaps this can be fixed with a show_hook? Otherwise it's probably
best not to document a GUC that can show an incorrect value.

- Joe Koshakow


Re: Missing warning on revokes with grant options

2023-05-19 Thread Joseph Koshakow
Sorry for the multiple consecutive emails. I just came across this
comment that explains the current behavior in restrict_and_check_grant

/*
* Restrict the operation to what we can actually grant or revoke, and
* issue a warning if appropriate.  (For REVOKE this isn't quite what the
* spec says to do: the spec seems to want a warning only if no privilege
* bits actually change in the ACL. In practice that behavior seems much
* too noisy, as well as inconsistent with the GRANT case.)
*/

However, I still think the current behavior is a bit strange since
holding a grant option is not directly required to issue a revoke.
Perhaps for revoke the logic should be:
  - for each specified privilege:
  - if the set of acl items on the specified object that includes
this privilege is non empty
  - and none of those acl items have the current role as the
grantor
  - then issue a warning.

Thanks,
Joe Koshakow


Re: Missing warning on revokes with grant options

2023-05-19 Thread Joseph Koshakow
I've been thinking about this some more and reading the SQL99 spec. In
the original thread that added these warnings [0], which was linked
earlier in this thread by Nathan, the following assertion was made:

> After that, you get to the General Rules, which pretty clearly say that
> trying to grant privileges you don't have grant option for is just a
> warning and not an error condition.  (Such privileges will not be in the
> set of "identified privilege descriptors".)
>
> AFAICS the specification for REVOKE is exactly parallel.

I think it is true that for both GRANT and REVOKE, if a privilege was
specified in the statement and a corresponding privilege does not exist
in the identified set then a warning should be issued. However, the
meaning of "identified set" is different between GRANT and REVOKE.

In GRANT the identified set is defined as

4) A set of privilege descriptors is identified. The privilege
descriptors identified are those defining,
for each  explicitly or implicitly in , that
 on O held by A with
grant option.

Essentially it is all privileges specified in the GRANT statement on O
**where by A is the grantee with a grant option**.

In REVOKE the identified set is defined as

1) Case:
  a) If the  is a , then
for every 
 specified, a set of privilege descriptors is identified. A
privilege descriptor P is said to be
 identified if it belongs to the set of privilege descriptors that
defined, for any 
 explicitly or implicitly in , that  on O, or
any of the objects in S, granted
 by A to .

Essentially it is all privileges specified in the REVOKE statement on O
**where A is the grantor and the grantee is one of the grantees
specified in the REVOKE statement**.

In fact as far as I can tell, the ability to revoke a privilege does
not directly depend on having a grant option for that privilege, it
only depends on being the grantor of the specified privilege. However,
our code in restrict_and_check_grant doesn't match this. It treats the
rules for GRANTs and REVOKEs the same, in that you need a grant option
to execute either. It's possible that due to the abandoned privilege
rules that it is impossible for a privilege to exist where the grantor
doesn't also have a grant option on that privilege. I haven't read that
part of the spec closely enough.

As a consequence of how the identified set is defined for REVOKE, not
only should a warning be issued in the example from my previous email,
but I think a warning should also be issued even if the grantee has no
privileges on O. For example,

```
test=# SELECT current_role;
 current_role
--
 joe
(1 row)

test=# CREATE TABLE t ();
CREATE TABLE
test=# CREATE ROLE r1;
CREATE ROLE
test=# SELECT relacl FROM pg_class WHERE relname = 't';
 relacl


(1 row)

test=# REVOKE SELECT ON t FROM r1;
REVOKE
```

Here the identified set for the REVOKE statement is empty. So there is
no corresponding privilege descriptor in the identified set for the
SELECT privilege in the REVOKE statement. So a warning should be
issued. Recall:

18) If the  is a , then:
  a) For every combination of  and  on O specified in
, if there
 is no corresponding privilege descriptor in the set of identified
privilege descriptors, then a
 completion condition is raised: warning — privilege not revoked

Essentially the meaning of the warning for REVOKE does not mean "you
tried to revoke a privilege but you don't have a grant option", it
means "you tried to revoke a privilege (where you are the grantor), but
such a privilege does not exist".

Thanks,
Joe Koshakow

[0] https://postgr.es/m/20040511091816.E9887CF519E%40www.postgresql.com


Re: Missing warning on revokes with grant options

2023-05-18 Thread Joseph Koshakow
On Thu, May 18, 2023 at 7:17 PM Joseph Koshakow  wrote:
>
>I looked into this function and that is correct. We fail to find a
>match for the revoked privilege here:
>
>/*
>* Search the ACL for an existing entry for this grantee and grantor. If
>* one exists, just modify the entry in-place (well, in the same
position,
>* since we actually return a copy); otherwise, insert the new entry at
>* the end.
>*/
>
>for (dst = 0; dst < num; ++dst)
>{
>if (aclitem_match(mod_aip, old_aip + dst))
>{
>/* found a match, so modify existing item */
>new_acl = allocacl(num);
>new_aip = ACL_DAT(new_acl);
>memcpy(new_acl, old_acl, ACL_SIZE(old_acl));
>break;
>}
>}
>
>Seeing that there was no match, we add a new empty privilege to the end
>of the existing ACL list here:
>
>if (dst == num)
>{
>/* need to append a new item */
>new_acl = allocacl(num + 1);
>new_aip = ACL_DAT(new_acl);
>memcpy(new_aip, old_aip, num * sizeof(AclItem));
>
>/* initialize the new entry with no permissions */
>new_aip[dst].ai_grantee = mod_aip->ai_grantee;
>new_aip[dst].ai_grantor = mod_aip->ai_grantor;
>ACLITEM_SET_PRIVS_GOPTIONS(new_aip[dst],
>  ACL_NO_RIGHTS, ACL_NO_RIGHTS);
>num++; /* set num to the size of new_acl */
>}
>
>We then try and revoke the specified privileges from the new empty
>privilege, leaving it empty (modechg will equal ACL_MODECHG_DEL here):
>
>old_rights = ACLITEM_GET_RIGHTS(new_aip[dst]);
>old_goptions = ACLITEM_GET_GOPTIONS(new_aip[dst]);
>
>/* apply the specified permissions change */
>switch (modechg)
>{
>case ACL_MODECHG_ADD:
>ACLITEM_SET_RIGHTS(new_aip[dst],
>  old_rights | ACLITEM_GET_RIGHTS(*mod_aip));
>break;
>case ACL_MODECHG_DEL:
>ACLITEM_SET_RIGHTS(new_aip[dst],
>  old_rights & ~ACLITEM_GET_RIGHTS(*mod_aip));
>break;
>case ACL_MODECHG_EQL:
>ACLITEM_SET_RIGHTS(new_aip[dst],
>  ACLITEM_GET_RIGHTS(*mod_aip));
>break;
>}
>
>Then since the new privilege remains empty, we remove it from the ACL
>list:
>
>new_rights = ACLITEM_GET_RIGHTS(new_aip[dst]);
>new_goptions = ACLITEM_GET_GOPTIONS(new_aip[dst]);
>
>/*
>* If the adjusted entry has no permissions, delete it from the list.
>*/
>if (new_rights == ACL_NO_RIGHTS)
>{
>memmove(new_aip + dst,
>new_aip + dst + 1,
>(num - dst - 1) * sizeof(AclItem));
>/* Adjust array size to be 'num - 1' items */
>ARR_DIMS(new_acl)[0] = num - 1;
>SET_VARSIZE(new_acl, ACL_N_SIZE(num - 1));
>}

Sorry about the unformatted code, here's the entire quoted section
again with proper formatting:

I looked into this function and that is correct. We fail to find a
match for the revoked privilege here:

/*
 * Search the ACL for an existing entry for this grantee and grantor. If
 * one exists, just modify the entry in-place (well, in the same
position,
 * since we actually return a copy); otherwise, insert the new entry at
 * the end.
 */

for (dst = 0; dst < num; ++dst)
{
if (aclitem_match(mod_aip, old_aip + dst))
{
/* found a match, so modify existing item */
new_acl = allocacl(num);
new_aip = ACL_DAT(new_acl);
memcpy(new_acl, old_acl, ACL_SIZE(old_acl));
break;
}
}

Seeing that there was no match, we add a new empty privilege to the end
of the existing ACL list here:

if (dst == num)
{
/* need to append a new item */
new_acl = allocacl(num + 1);
new_aip = ACL_DAT(new_acl);
memcpy(new_aip, old_aip, num * sizeof(AclItem));

/* initialize the new entry with no permissions */
new_aip[dst].ai_grantee = mod_aip->ai_grantee;
new_aip[dst].ai_grantor = mod_aip->ai_grantor;
ACLITEM_SET_PRIVS_GOPTIONS(new_aip[dst],
   ACL_NO_RIGHTS, ACL_NO_RIGHTS);
num++;/* set num to the size of new_acl */
}

We then try and revoke the specified privileges from the new empty
privilege, leaving it empty (modechg will equal ACL_MODECHG_DEL here):

old_rights = ACLITEM_GET_RIGHTS(new_aip[dst]);
old_goptions = ACLITEM_GET_GOPTIONS(new_aip[dst]);

/* apply the specified permissions change */
switch (modechg)
{
case ACL_MODECHG_ADD:
ACLITEM_SET_RIGHTS(new_aip[dst],
   old_rights | ACLITEM_GET_RIGHTS(*mod_aip));
break;
case ACL_MODECHG_DEL:
ACLITEM_SET_RIGHTS(new_aip[dst],
   old_rights & ~ACLITEM_GET_RIG

Re: Missing warning on revokes with grant options

2023-05-18 Thread Joseph Koshakow
On Wed, May 17, 2023 at 11:48 PM Nathan Bossart 
wrote:
>
>The thread for the aforementioned change [0] mentions the standard
quite a
>bit, which might explain the current behavior.

I went through that thread and the quoted parts of the SQL standard. It
seems clear that if a user tries to REVOKE some privilege and they
don't have a grant option on that privilege, then a warning should be
issued. There was some additional discussion on when there should be
an error vs a warning, but I don't think it's that relevant to this
discussion. However, I was not able to find any discussion about the
restriction that a revoker can only revoke privileges that they granted
themselves.

The restriction was added to PostgreSQL at the same time as GRANT
OPTIONs were introduced. The commit [0] and mailing thread [1] don't
provide much details on this specific restriction.

The SQL99 standard for REVOKE is very dense and I may have
misunderstood parts, but here's my interpretation of how this
restriction might come from the standard and what it says about issuing
a warning (section 12.6).

Let's start with the Syntax Rules:

1) Let O be the object identified by the  contained in


In my example O is the table t.

3) Let U be the current user identifier and R be the current role name.
4) Case:
  a) If GRANTED BY  is not specified, then
Case:
  i) If U is not the null value, then let A be U.
  ii) Otherwise, let A be R.

In my example A is the role r1.

9) Case:
  a) If the  is a , then
for every 
 specified, a set of privilege descriptors is identified. A
privilege descriptor P is said to be
 identified if it belongs to the set of privilege descriptors that
defined, for any 
 explicitly or implicitly in , that  on O, or
any of the objects in S, granted
 by A to 

In my example,  is the role r1,  is the list of
privileges that only contain SELECT,  is SELECT. Therefore the
set of identified privilege descriptors would be a single privilege
descriptor on table t where the privileges contain SELECT, the grantor
is r1, and the grantee is r1. Such a privilege does not exist, so the
identified privilege set is empty.

Now onto the General Rules:

1) Case:
  a) If the  is a , then
Case:
  i) If neither WITH HIERARCHY OPTION nor GRANT OPTION FOR is
specified, then:
2) The identified privilege descriptors are destroyed.

In my example, the identified set of privileges is empty, so no
privileges are destroyed (which I'm interpreting to mean the same thing
as revoked).

18) If the  is a , then:
  a) For every combination of  and  on O specified in
, if there
 is no corresponding privilege descriptor in the set of identified
privilege descriptors, then a
 completion condition is raised: warning — privilege not revoked.

In my example the identified privileges set is empty, therefore it
cannot contain a corresponding privilege descriptor, therefore we
should be issuing a warning.

So I think our current behavior is not in spec. Would you agree with
this evaluation or do you think I've misunderstood something?

>> I wasn't able to locate where the check for
>>> A user can only revoke privileges that were granted directly by that
>>> user.
>> is in the code, but we should probably just add a warning there.
>
>І'm not certain, but I suspect the calls to aclupdate() in
>merge_acl_with_grant() take care of this because the grantors will
never
>match.

I looked into this function and that is correct. We fail to find a
match for the revoked privilege here:

/*
* Search the ACL for an existing entry for this grantee and grantor. If
* one exists, just modify the entry in-place (well, in the same position,
* since we actually return a copy); otherwise, insert the new entry at
* the end.
*/

for (dst = 0; dst < num; ++dst)
{
if (aclitem_match(mod_aip, old_aip + dst))
{
/* found a match, so modify existing item */
new_acl = allocacl(num);
new_aip = ACL_DAT(new_acl);
memcpy(new_acl, old_acl, ACL_SIZE(old_acl));
break;
}
}

Seeing that there was no match, we add a new empty privilege to the end
of the existing ACL list here:

if (dst == num)
{
/* need to append a new item */
new_acl = allocacl(num + 1);
new_aip = ACL_DAT(new_acl);
memcpy(new_aip, old_aip, num * sizeof(AclItem));

/* initialize the new entry with no permissions */
new_aip[dst].ai_grantee = mod_aip->ai_grantee;
new_aip[dst].ai_grantor = mod_aip->ai_grantor;
ACLITEM_SET_PRIVS_GOPTIONS(new_aip[dst],
  ACL_NO_RIGHTS, ACL_NO_RIGHTS);
num++; /* set num to the size of new_acl */
}

We then try and revoke the specified privileges from the new empty
privilege, leaving it empty (modechg will equal ACL_MODECHG_DEL here):

old_rights = ACLITEM_GET_RIGHTS(new_aip[dst]);
old_goptions = ACLITEM_GET_GOPTIONS(new_aip[dst]);

/* apply the specified permissions change */
switch (modechg)
{
case ACL_MODECHG_ADD:

Missing warning on revokes with grant options

2023-05-15 Thread Joseph Koshakow
Hi Hackers,

I noticed some confusing behavior with REVOKE recently. Normally if
REVOKE fails to revoke anything a warning is printed. For example, see
the following scenario:

```
test=# SELECT current_role;
 current_role
--
 joe
(1 row)

test=# CREATE ROLE r1;
CREATE ROLE
test=# CREATE TABLE t ();
CREATE TABLE
test=# GRANT SELECT ON TABLE t TO r1;
GRANT
test=# SET ROLE r1;
SET
test=> REVOKE SELECT ON TABLE t FROM r1;
WARNING:  no privileges could be revoked for "t"
WARNING:  no privileges could be revoked for column "tableoid" of relation
"t"
WARNING:  no privileges could be revoked for column "cmax" of relation "t"
WARNING:  no privileges could be revoked for column "xmax" of relation "t"
WARNING:  no privileges could be revoked for column "cmin" of relation "t"
WARNING:  no privileges could be revoked for column "xmin" of relation "t"
WARNING:  no privileges could be revoked for column "ctid" of relation "t"
REVOKE
test=> SELECT relacl FROM pg_class WHERE relname = 't';
   relacl
-
 {joe=arwdDxtm/joe,r1=r/joe}
(1 row)

```

However, if the REVOKE fails and the revoker has a grant option on the
privilege, then no warning is emitted. For example, see the following
scenario:

```
test=# SELECT current_role;
 current_role
--
 joe
(1 row)

test=# CREATE ROLE r1;
CREATE ROLE
test=# CREATE TABLE t ();
CREATE TABLE
test=# GRANT SELECT ON TABLE t TO r1 WITH GRANT OPTION;
GRANT
test=# SET ROLE r1;
SET
test=> REVOKE SELECT ON TABLE t FROM r1;
REVOKE
test=> SELECT relacl FROM pg_class WHERE relname = 't';
relacl
--
 {joe=arwdDxtm/joe,r1=r*/joe}
(1 row)

```
The warnings come from restrict_and_check_grant() in aclchk.c. The
psuedo code is

  if (revoked_privileges & available_grant_options == 0)
emit_warning()

In the second example, `r1` does have the proper grant options so no
warning is emitted. However, the revoke has no actual effect.

Reading through the docs [0], I'm not actually sure if the REVOKE
in the second example should succeed or not. At first it says:

> A user can only revoke privileges that were granted directly by that
> user. If, for example, user A has granted a privilege with grant
> option to user B, and user B has in turn granted it to user C, then
> user A cannot revoke the privilege directly from C.

Which seems pretty clear that you can only revoke privileges that you
directly granted. However later on it says:

> As long as some privilege is available, the command will proceed, but
>it will revoke only those privileges for which the user has grant
> options.
...
> while the other forms will issue a warning if grant options for any
> of the privileges specifically named in the command are not held.

Which seems to imply that you can revoke a privilege as long as you
have a grant option on that privilege.

Either way I think the REVOKE should either fail and emit a warning
OR succeed and emit no warning.

I wasn't able to locate where the check for
> A user can only revoke privileges that were granted directly by that
> user.
is in the code, but we should probably just add a warning there.

- Joe Koshakow

[0] https://www.postgresql.org/docs/15/sql-revoke.html


Re: is_superuser is not documented

2023-04-11 Thread Joseph Koshakow
On Tue, Apr 11, 2023 at 9:37 AM Fujii Masao 
wrote:

>>  > Yes, this patch moves the descriptions of is_superuser to
config.sgml
>>  > and changes its group to PRESET_OPTIONS.
>>
>> is_superuser feels a little out of place in this file. All of
>> the options here apply to the entire PostgreSQL server, while
>> is_superuser only applies to the current session.
>
>Aren't other preset options like lc_collate, lc_ctype and
server_encoding
>similar to is_superuser? They seem to behave in a similar way as their
>settings can be different for each connection depending on the
connected database.

I think the difference is that all of those options are constant for
all connections to the same database and once the database is created
they are immutable. is_superuser is set on a per session basis and can
be changed at any time.

Looking through the options it actually looks like all the options are
set either when the server is built, the server is started, or the
database is created, and once they're set they become immutable. The
one exception I see is in_hot_standby mode which can be updated from on
to off (I can't remember off the top of my head if it can be updated
the other way). I'm moving the goal post a bit but I think preset may
imply that the value isn't going to change once it's been set.

Having said all that I actually think this is the best place for
is_superuser since it doesn't seem to fit in anywhere else.

>> I'm not familiar with the origins of is_superuser and it may be too
>> late for this, but it seems like is_superuser would fit in much
better
>> as a system information function [0] rather than a GUC. Particularly
>> in the Session Information Functions.
>
>I understand your point, but I think it would be more confusing to
document
>is_superuser there because it's defined and behaves differently from
>session information functions like current_user. For instance,
>the value of is_superuser can be displayed using SHOW command,
>while current_user cannot. Therefore, it's better to keep is_superuser
>separate from the session information functions.

I was implying that I thought it would have made more sense for
is_superuser to be implemented as a function, behave as a function,
and not be visible via SHOW. However, there may have been a good reason
not to do this and it may already be too late for that.

In my opinion, this is ready to be committed. However, like I said
earlier I'm not very familiar with the GUC code so you may want to
wait for another opinion.

Thanks,
Joe Koshakow


DecodeInterval fixes

2023-04-09 Thread Joseph Koshakow
Hi all,

This patch does three things in the DecodeInterval function:

1) Removes dead code for handling unit type RESERVE. There used to be
a unit called "invalid" that was of type RESERVE. At some point that
unit was removed and there were no more units of type RESERVE.
Therefore, the code for RESERVE unit handling is unreachable.

2) Restrict the unit "ago" to only appear at the end of the
interval. According to the docs [0], this is the only valid place to
put it, but we allowed it multiple times at any point in the input.

3) Error when the user has multiple consecutive units or a unit without
an accompanying value. I spent a lot of time trying to come up with
robust ways to detect this and ultimately settled on using the "type"
field. I'm not entirely happy with this approach, because it involves
having to look ahead to the next field in a couple of places. The other
approach I was considering was to introduce a boolean flag called
"unhandled_unit". After parsing a unit it would be set to true, after
applying the unit to a number it would be set to false. If it was true
right before parsing a unit, then we would error. Please let me know
if you have any suggestions here.

There is one more problem I noticed, but didn't fix. We allow multiple
"@" to be sprinkled anywhere in the input, even though the docs [0]
only allow it to appear at the beginning of the input. For example,
the following query works fine:

# SELECT INTERVAL '1 @ year @ @ @ 6 days @ 10 @ minutes';
interval

 1 year 6 days 00:10:00
(1 row)

Unfortunately, this can't be fixed in DecodeInterval, because all of
the "@" fields are filtered out before this method. Additionally, I
believe this means that the lines

 if (type == IGNORE_DTF)
 continue;

in DecodeInterval, that appears right after decoding the units, are
unreachable since
"@" is the only unit of type IGNORE_DTF. Since "@" is filtered out,
we'll never decode a unit of type IGNORE_DTF.

For reference, I previously merged a couple similar patches to this
one, but for other date-time types [1], [2].

Thanks,
Joe Koshakow

[0]
https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-INTERVAL-INPUT
[1]
https://git.postgresql.org/gitweb/?p=postgresql.git;a=commit;h=5b3c5953553bb9fb0b171abc6041e7c7e9ca5b4d
[2]
https://git.postgresql.org/gitweb/?p=postgresql.git;a=commit;h=bcc704b52490492e6bd73c056b3e9644504d
From 4c5641f15e5409ef5973a5f305352018f06da538 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sun, 9 Apr 2023 20:37:27 -0400
Subject: [PATCH] Fix interval decode handling of invalid intervals

This patch does three things in the DecodeInterval function:

1) Removes dead code for handling unit type RESERVE. There used to be
a unit called "invalid" that was of type RESERVE. At some point that
unit was removed and there were no more units of type RESERVE.
Therefore, the code for RESERVE unit handling is unreachable.

2) Restrict the unit "ago" to only appear at the end of the
interval. According to the docs [0], this is the only valid place to
put it, but we allowed it multiple times at any point in the input.

3) Error when the user has multiple consecutive units or a unit without
an accompanying value.

[0] https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-INTERVAL-INPUT
---
 src/backend/utils/adt/datetime.c   | 55 +++---
 src/test/regress/expected/interval.out | 18 +
 src/test/regress/sql/interval.sql  |  7 
 3 files changed, 65 insertions(+), 15 deletions(-)

diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index be2e55bb29..17fc0d45ea 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -3335,7 +3335,8 @@ DecodeInterval(char **field, int *ftype, int nf, int range,
 if (force_negative &&
 	itm_in->tm_usec > 0)
 	itm_in->tm_usec = -itm_in->tm_usec;
-type = DTK_DAY;
+if (i != 0 && ftype[i - 1] != DTK_STRING && ftype[i - 1] != DTK_SPECIAL)
+	type = DTK_DAY;
 break;
 
 			case DTK_TZ:
@@ -3372,7 +3373,8 @@ DecodeInterval(char **field, int *ftype, int nf, int range,
 	 * specified. This handles the case of '1 +02:03' since we
 	 * are reading right to left.
 	 */
-	type = DTK_DAY;
+	if (i != 0 && ftype[i - 1] != DTK_STRING && ftype[i - 1] != DTK_SPECIAL)
+		type = DTK_DAY;
 	break;
 }
 
@@ -3475,12 +3477,14 @@ DecodeInterval(char **field, int *ftype, int nf, int range,
 		if (!AdjustMicroseconds(val, fval, 1, itm_in))
 			return DTERR_FIELD_OVERFLOW;
 		tmask = DTK_M(MICROSECOND);
+		type = IGNORE_DTF;
 		break;
 
 	case DTK_MILLISEC:
 		if (!AdjustMicroseconds(val, fval, 1000, itm_in))
 			return DTERR_FIELD_OVERFLOW;
 		tmas

Re: is_superuser is not documented

2023-04-08 Thread Joseph Koshakow
On Mon, Apr 3, 2023 at 10:47 AM Fujii Masao 
wrote:
>Yes, the patch has not been committed yet because of lack of review
comments.
>Do you have any review comments on this patch?
>Or you think it's ready for committer?

I'm not very familiar with this code, so I'm not sure how much my
review is worth, but maybe it will spark some discussion.

> Yes, this patch moves the descriptions of is_superuser to config.sgml
> and changes its group to PRESET_OPTIONS.

is_superuser feels a little out of place in this file. All of
the options here apply to the entire PostgreSQL server, while
is_superuser only applies to the current session. The description of
this file says :

> These options report various aspects of PostgreSQL behavior that
> might be of interest to certain applications, particularly
> administrative front-ends. Most of them are determined when
> PostgreSQL is compiled or when it is installed.

Which doesn't seem to apply to is_superuser. It doesn't affect
the behavior of PostgreSQL, only what the current session is allowed to
do. It's also not determined when PostgreSQL is compiled/installed. Is
there some update that we can make to the description that would make
is_superuser fit in better?

I'm not familiar with the origins of is_superuser and it may be too
late for this, but it seems like is_superuser would fit in much better
as a system information function [0] rather than a GUC. Particularly
in the Session Information Functions.

> - GUC_REPORT | GUC_NO_SHOW_ALL | GUC_NO_RESET_ALL | GUC_NOT_IN_SAMPLE |
GUC_DISALLOW_IN_FILE
> + GUC_REPORT | GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE

This looks good to me. The lack of is_superuser from SHOW ALL has been
a source of confusion to me in the past.

As a side note server_version, server_encoding, lc_collate, and
lc_ctype all appear in both the preset options section of config.sgml
and in show.sgml. I'm not sure what the logic is for just including
these three parameters in show.sgml, but I think we should either
include all of the preset options or none of them.

Thanks,
Joe Koshakow

[0] https://www.postgresql.org/docs/current/functions-info.html


Re: Infinite Interval

2023-04-02 Thread Joseph Koshakow
On Sun, Apr 2, 2023 at 6:54 PM Tom Lane  wrote:
>
>Joseph Koshakow  writes:
>>> I've added an errcontext to all the errors of the form "X out of
>>> range".
>
>Please note the style guidelines [1]:
>
>errcontext(const char *msg, ...) is not normally called directly
from
>an ereport message site; rather it is used in error_context_stack
>callback functions to provide information about the context in
which
>an error occurred, such as the current location in a PL function.
>
>If we should have this at all, which I doubt, it's probably
>errdetail not errcontext.

I've attached a patch with all of the errcontext calls removed. None of
the existing out of range errors have an errdetail call so I think this
is more consistent. If we do want to add errdetail, then we should
probably do it in a later patch and add it to all out of range errors,
not just the ones related to infinity.

>> How do you feel about redefining interval_mi in terms of interval_um
>> and interval_pl? That one felt like an improvement to me even outside
>> of the context of this change.
>
>I did not think so.  For one thing, it introduces integer-overflow
>hazards that you would not have otherwise; ie, interval_um might have
>to throw an error for INT_MIN input, even though the end result of
>the calculation would have been in range.

Good point, I didn't think of that.

Thanks,
Joe Koshakow
From f6bf9c201a94a0b338dff520442ac5e8d2922c89 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 1 Apr 2023 10:22:24 -0400
Subject: [PATCH 1/3] Move integer helper function to int.h

---
 src/backend/utils/adt/datetime.c | 25 -
 src/include/common/int.h | 13 +
 2 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index be2e55bb29..64f28a85b0 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -51,7 +51,6 @@ static int	DecodeDate(char *str, int fmask, int *tmask, bool *is2digits,
 	   struct pg_tm *tm);
 static char *AppendSeconds(char *cp, int sec, fsec_t fsec,
 		   int precision, bool fillzeros);
-static bool int64_multiply_add(int64 val, int64 multiplier, int64 *sum);
 static bool AdjustFractMicroseconds(double frac, int64 scale,
 	struct pg_itm_in *itm_in);
 static bool AdjustFractDays(double frac, int scale,
@@ -515,22 +514,6 @@ AppendTimestampSeconds(char *cp, struct pg_tm *tm, fsec_t fsec)
 	return AppendSeconds(cp, tm->tm_sec, fsec, MAX_TIMESTAMP_PRECISION, true);
 }
 
-
-/*
- * Add val * multiplier to *sum.
- * Returns true if successful, false on overflow.
- */
-static bool
-int64_multiply_add(int64 val, int64 multiplier, int64 *sum)
-{
-	int64		product;
-
-	if (pg_mul_s64_overflow(val, multiplier, ) ||
-		pg_add_s64_overflow(*sum, product, sum))
-		return false;
-	return true;
-}
-
 /*
  * Multiply frac by scale (to produce microseconds) and add to itm_in->tm_usec.
  * Returns true if successful, false if itm_in overflows.
@@ -621,7 +604,7 @@ AdjustMicroseconds(int64 val, double fval, int64 scale,
    struct pg_itm_in *itm_in)
 {
 	/* Handle the integer part */
-	if (!int64_multiply_add(val, scale, _in->tm_usec))
+	if (pg_mul_add_s64_overflow(val, scale, _in->tm_usec))
 		return false;
 	/* Handle the float part */
 	return AdjustFractMicroseconds(fval, scale, itm_in);
@@ -2701,9 +2684,9 @@ DecodeTimeForInterval(char *str, int fmask, int range,
 		return dterr;
 
 	itm_in->tm_usec = itm.tm_usec;
-	if (!int64_multiply_add(itm.tm_hour, USECS_PER_HOUR, _in->tm_usec) ||
-		!int64_multiply_add(itm.tm_min, USECS_PER_MINUTE, _in->tm_usec) ||
-		!int64_multiply_add(itm.tm_sec, USECS_PER_SEC, _in->tm_usec))
+	if (pg_mul_add_s64_overflow(itm.tm_hour, USECS_PER_HOUR, _in->tm_usec) ||
+		pg_mul_add_s64_overflow(itm.tm_min, USECS_PER_MINUTE, _in->tm_usec) ||
+		pg_mul_add_s64_overflow(itm.tm_sec, USECS_PER_SEC, _in->tm_usec))
 		return DTERR_FIELD_OVERFLOW;
 
 	return 0;
diff --git a/src/include/common/int.h b/src/include/common/int.h
index 450800894e..81726c65f7 100644
--- a/src/include/common/int.h
+++ b/src/include/common/int.h
@@ -254,6 +254,19 @@ pg_mul_s64_overflow(int64 a, int64 b, int64 *result)
 #endif
 }
 
+/*
+ * Add val * multiplier to *sum.
+ * Returns false if successful, true on overflow.
+ */
+static inline bool
+pg_mul_add_s64_overflow(int64 val, int64 multiplier, int64 *sum)
+{
+	int64		product;
+
+	return pg_mul_s64_overflow(val, multiplier, ) ||
+		pg_add_s64_overflow(*sum, product, sum);
+}
+
 /*
  * Overflow routines for unsigned integers
  *----
-- 
2.34.1

From 765aa1ebf9de5e5d48e1c588f7bde700743

Re: Infinite Interval

2023-04-02 Thread Joseph Koshakow
>On Sun, Apr 2, 2023 at 5:36 PM Tom Lane  wrote:
>
>    Joseph Koshakow  writes:
>> I've attached a patch with these changes that is meant to be applied
>> over the previous three patches. Let me know what you think.
>
>Does not really seem like an improvement to me --- I think it's
>adding more complexity than it removes.  The changes in CONTEXT
>messages are definitely not an improvement; you might as well
>not have the context messages at all as give misleading ones.
>(Those context messages are added by the previous patches, no?
>They do not really seem per project style, and I'm not sure
>that they are helpful.)

Yes they were added in the previous patch,
v17-0003-Add-infinite-interval-values.patch. I also had the following
note about them.

>I've added an errcontext to all the errors of the form "X out of
>range". My one concern is that some of the messages can be slightly
>confusing. For example date arithmetic is converted to timestamp
>arithmetic, so the errcontext talks about timestamps even though the
>actual operation used dates. For example,
>
>SELECT date 'infinity' + interval '-infinity';
>ERROR:  interval out of range
>CONTEXT:  while adding an interval and timestamp

I would be OK with removing all of the context messages or maybe only
keeping a select few, like the ones in interval_um.

How do you feel about redefining interval_mi in terms of interval_um
and interval_pl? That one felt like an improvement to me even outside
of the context of this change.

Thanks,
Joe Koshakow


Re: Infinite Interval

2023-04-02 Thread Joseph Koshakow
> > This code is duplicated in timestamp_pl_interval(). We could create a
function
> > to encode the infinity handling rules and then call it in these two
places. The
> > argument types are different, Timestamp and TimestampTz viz. which map
to in64,
> > so shouldn't be a problem. But it will be slightly unreadable. Or use
macros
> > but then it will be difficult to debug.
> >
> > What do you think?
>
> I was hoping that I could come up with a macro that we could re-use for
> all the similar logic. If that doesn't work then I'll try the helper
> functions. I'll update the patch in a follow-up email to give myself some
> time to think about this.

So I checked where are all the places that we do arithmetic between two
potentially infinite values, and it's at the top of the following
functions:

- timestamp_mi()
- timestamp_pl_interval()
- timestamptz_pl_interval_internal()
- interval_pl()
- interval_mi()
- timestamp_age()
- timestamptz_age()

I was able to get an extremely generic macro to work, but it was very
ugly and unmaintainable in my view. Instead I took the following steps
to clean this up:

- I rewrote interval_mi() to be implemented in terms of interval_um()
and interval_pl().
- I abstracted the infinite arithmetic from timestamp_mi(),
timestamp_age(), and timestamptz_age() into a helper function called
infinite_timestamp_mi_internal()
- I abstracted the infinite arithmetic from timestamp_pl_interval() and
timestamptz_pl_interval_internal() into a helper function called
infinite_timestamp_pl_interval_internal()

The helper functions return a bool to indicate if they set the result.
An alternative approach would be to check for finiteness in either of
the inputs, then call the helper function which would have a void
return type. I think this alternative approach would be slightly more
readable, but involve duplicate finiteness checks before and during the
helper function.

I've attached a patch with these changes that is meant to be applied
over the previous three patches. Let me know what you think.

With this patch I believe that I've addressed all open comments except
for the discussion around whether we should check just the months field
or all three fields for finiteness. Please let me know if I've missed
something.

Thanks,
Joe Koshakow
From e50d4ca6321c58d216d563f74502356d721c2b4b Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sun, 2 Apr 2023 17:15:01 -0400
Subject: [PATCH 4/4] Clean up infinity arithmetic

---
 src/backend/utils/adt/timestamp.c | 254 +++---
 src/test/regress/expected/interval.out|  16 +-
 src/test/regress/expected/timestamp.out   |   4 +-
 src/test/regress/expected/timestamptz.out |   4 +-
 4 files changed, 86 insertions(+), 192 deletions(-)

diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
index 78133dfb17..50a47f3778 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -2788,16 +2788,15 @@ timestamp_larger(PG_FUNCTION_ARGS)
 	PG_RETURN_TIMESTAMP(result);
 }
 
-
-Datum
-timestamp_mi(PG_FUNCTION_ARGS)
+/* Helper function to perform subtraction between two potentially infinite
+ * timestamps.
+ *
+ * Returns true if either dt1 or dt1 were infinite and result was set,
+ * false otherwise.
+ */
+bool
+infinite_timestamp_mi_internal(Timestamp dt1, Timestamp dt2, Interval *result)
 {
-	Timestamp	dt1 = PG_GETARG_TIMESTAMP(0);
-	Timestamp	dt2 = PG_GETARG_TIMESTAMP(1);
-	Interval   *result;
-
-	result = (Interval *) palloc(sizeof(Interval));
-
 	/*
 	 * Subtracting two infinite timestamps with different signs results in an
 	 * infinite interval with the same sign as the left operand. Subtracting
@@ -2812,6 +2811,7 @@ timestamp_mi(PG_FUNCTION_ARGS)
 	 errcontext("while subtracting timestamps")));
 		else
 			INTERVAL_NOBEGIN(result);
+		return true;
 	}
 	else if (TIMESTAMP_IS_NOEND(dt1))
 	{
@@ -2822,11 +2822,34 @@ timestamp_mi(PG_FUNCTION_ARGS)
 	 errcontext("while subtracting timestamps")));
 		else
 			INTERVAL_NOEND(result);
+		return true;
 	}
 	else if (TIMESTAMP_IS_NOBEGIN(dt2))
+	{
 		INTERVAL_NOEND(result);
+		return true;
+	}
 	else if (TIMESTAMP_IS_NOEND(dt2))
+	{
 		INTERVAL_NOBEGIN(result);
+		return true;
+	}
+	else
+		return false;
+}
+
+Datum
+timestamp_mi(PG_FUNCTION_ARGS)
+{
+	Timestamp	dt1 = PG_GETARG_TIMESTAMP(0);
+	Timestamp	dt2 = PG_GETARG_TIMESTAMP(1);
+	Interval   *result;
+
+	result = (Interval *) palloc(sizeof(Interval));
+
+	if (infinite_timestamp_mi_internal(dt1, dt2, result))
+	{
+	}
 	else
 	{
 		if (unlikely(pg_sub_s64_overflow(dt1, dt2, >time)))
@@ -3060,23 +3083,15 @@ interval_justify_days(PG_FUNCTION_ARGS)
 	PG_RETURN_INTERVAL_P(result);
 }
 
-/* timestamp_pl_interval()
- * Add an interval to a timestamp data type.
- * Note that interval has provisions for qualitative year/month and day
- *	units, so try to do the right thing with them.
- * To add a month,

Re: is_superuser is not documented

2023-04-01 Thread Joseph Koshakow
On Wed, Mar 29, 2023 at 5:21 PM Bruce Momjian  wrote:
>
>On Thu, Mar  2, 2023 at 12:00:43PM -0500, Joseph Koshakow wrote:
>>
>>
>> On Thu, Mar 2, 2023 at 11:53 AM Fujii Masao <
masao.fu...@oss.nttdata.com>
>> wrote:
>> >
>> >On 2022/09/14 14:27, bt22kawamotok wrote:
>> >> I update patch to reflect master update.
>> >
>> >Thanks for updating the patch!
>> >
>> >+   
>> >+Shows whether the current user is a superuser or not.
>> >+   
>> >
>> >How about adding the note about when this parameter can change,
>> >like we do for in_hot_standby docs?  I applied this change to
the patch.
>> >Attached is the updated version of the patch.
>> >
>>
>> I just came across this thread and noticed that the patch was never
>> merged. There is some brief docs for is_superuser in the SHOW docs:
>> https://www.postgresql.org/docs/current/sql-show.html, but the GUC
>> fields were never updated.
>>
>> Is there a reason that it never got merged or was it just forgotten
>> about?
>
>Uh, where are you looking?  I see it in the SGML, and in the PG 15
docs:
>
>https://www.postgresql.org/docs/current/sql-show.html
>
>IS_SUPERUSER
>
>True if the current role has superuser privileges.

The patch updated the guc table for is_superuser in
src/backend/utils/misc/guc_tables.c

- /* Not for general use --- used by SET SESSION AUTHORIZATION */
- {"is_superuser", PGC_INTERNAL, UNGROUPED,
+ {"is_superuser", PGC_INTERNAL, PRESET_OPTIONS,
  gettext_noop("Shows whether the current user is a superuser."),
  NULL,
- GUC_REPORT | GUC_NO_SHOW_ALL | GUC_NO_RESET_ALL | GUC_NOT_IN_SAMPLE |
GUC_DISALLOW_IN_FILE
+ GUC_REPORT | GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE

However, when I look at the code on master I don't see this update

/* Not for general use --- used by SET SESSION AUTHORIZATION */
{"is_superuser", PGC_INTERNAL, UNGROUPED,
gettext_noop("Shows whether the current user is a superuser."),
NULL,
GUC_REPORT | GUC_NO_SHOW_ALL | GUC_NO_RESET_ALL | GUC_NOT_IN_SAMPLE |
GUC_DISALLOW_IN_FILE

Similarly, when running `SHOW ALL` against master I don't see the
is_superuser variable

$ /usr/local/pgsql/bin/psql -c "SHOW ALL" test | grep is_superuser
$


Re: Infinite Interval

2023-03-25 Thread Joseph Koshakow
In terms of adding/subtracting infinities, the IEEE standard is pay
walled and I don't have a copy. I tried finding information online but
I also wasn't able to find anything useful. I additionally checked to see
the results of C++, C, and Java, and they all match which increases my
confidence that we're doing the right thing. Does anyone happen to have
a copy of the standard and can confirm?

- Joe Koshakow


Re: Infinite Interval

2023-03-25 Thread Joseph Koshakow
On Fri, Mar 24, 2023 at 9:43 AM Ashutosh Bapat 
wrote:
>
>You don't need to do this, but looks like we can add DAYS_PER_WEEK
macro and
>use it here.

I've attached a patch with this new macro. There's probably tons of
places it can be used instead of hardcoding the number 7, but I'll save
that for a future patch.

- Joe Koshakow
From 41fa5de65c757d72331aff6bb626fab76390e9db Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 18 Mar 2023 12:26:28 -0400
Subject: [PATCH 1/2] Move integer helper function to int.h

---
 src/backend/utils/adt/datetime.c | 25 -
 src/include/common/int.h | 13 +
 2 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index be2e55bb29..64f28a85b0 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -51,7 +51,6 @@ static int	DecodeDate(char *str, int fmask, int *tmask, bool *is2digits,
 	   struct pg_tm *tm);
 static char *AppendSeconds(char *cp, int sec, fsec_t fsec,
 		   int precision, bool fillzeros);
-static bool int64_multiply_add(int64 val, int64 multiplier, int64 *sum);
 static bool AdjustFractMicroseconds(double frac, int64 scale,
 	struct pg_itm_in *itm_in);
 static bool AdjustFractDays(double frac, int scale,
@@ -515,22 +514,6 @@ AppendTimestampSeconds(char *cp, struct pg_tm *tm, fsec_t fsec)
 	return AppendSeconds(cp, tm->tm_sec, fsec, MAX_TIMESTAMP_PRECISION, true);
 }
 
-
-/*
- * Add val * multiplier to *sum.
- * Returns true if successful, false on overflow.
- */
-static bool
-int64_multiply_add(int64 val, int64 multiplier, int64 *sum)
-{
-	int64		product;
-
-	if (pg_mul_s64_overflow(val, multiplier, ) ||
-		pg_add_s64_overflow(*sum, product, sum))
-		return false;
-	return true;
-}
-
 /*
  * Multiply frac by scale (to produce microseconds) and add to itm_in->tm_usec.
  * Returns true if successful, false if itm_in overflows.
@@ -621,7 +604,7 @@ AdjustMicroseconds(int64 val, double fval, int64 scale,
    struct pg_itm_in *itm_in)
 {
 	/* Handle the integer part */
-	if (!int64_multiply_add(val, scale, _in->tm_usec))
+	if (pg_mul_add_s64_overflow(val, scale, _in->tm_usec))
 		return false;
 	/* Handle the float part */
 	return AdjustFractMicroseconds(fval, scale, itm_in);
@@ -2701,9 +2684,9 @@ DecodeTimeForInterval(char *str, int fmask, int range,
 		return dterr;
 
 	itm_in->tm_usec = itm.tm_usec;
-	if (!int64_multiply_add(itm.tm_hour, USECS_PER_HOUR, _in->tm_usec) ||
-		!int64_multiply_add(itm.tm_min, USECS_PER_MINUTE, _in->tm_usec) ||
-		!int64_multiply_add(itm.tm_sec, USECS_PER_SEC, _in->tm_usec))
+	if (pg_mul_add_s64_overflow(itm.tm_hour, USECS_PER_HOUR, _in->tm_usec) ||
+		pg_mul_add_s64_overflow(itm.tm_min, USECS_PER_MINUTE, _in->tm_usec) ||
+		pg_mul_add_s64_overflow(itm.tm_sec, USECS_PER_SEC, _in->tm_usec))
 		return DTERR_FIELD_OVERFLOW;
 
 	return 0;
diff --git a/src/include/common/int.h b/src/include/common/int.h
index 450800894e..81726c65f7 100644
--- a/src/include/common/int.h
+++ b/src/include/common/int.h
@@ -254,6 +254,19 @@ pg_mul_s64_overflow(int64 a, int64 b, int64 *result)
 #endif
 }
 
+/*
+ * Add val * multiplier to *sum.
+ * Returns false if successful, true on overflow.
+ */
+static inline bool
+pg_mul_add_s64_overflow(int64 val, int64 multiplier, int64 *sum)
+{
+	int64		product;
+
+	return pg_mul_s64_overflow(val, multiplier, ) ||
+		pg_add_s64_overflow(*sum, product, sum);
+}
+
 /*
  * Overflow routines for unsigned integers
  *
-- 
2.34.1

From 242ffd232bef606c9c948f0ee9980152fb9e3bec Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 18 Mar 2023 12:38:58 -0400
Subject: [PATCH 2/2] Check for overflow in make_interval

---
 src/backend/utils/adt/timestamp.c  | 24 +++-
 src/include/common/int.h   | 13 +
 src/include/datatype/timestamp.h   |  1 +
 src/test/regress/expected/interval.out |  5 +
 src/test/regress/sql/interval.sql  |  4 
 5 files changed, 42 insertions(+), 5 deletions(-)

diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
index aaadc68ae6..ccf0019a3c 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -1517,13 +1517,27 @@ make_interval(PG_FUNCTION_ARGS)
  errmsg("interval out of range")));
 
 	result = (Interval *) palloc(sizeof(Interval));
-	result->month = years * MONTHS_PER_YEAR + months;
-	result->day = weeks * 7 + days;
+	result->month = months;
+	if (pg_mul_add_s32_overflow(years, MONTHS_PER_YEAR, >month))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("interval out of range")));
+	result->day = days;
+	if (pg_mul_

Re: Infinite Interval

2023-03-19 Thread Joseph Koshakow
On Sun, Mar 19, 2023 at 5:13 PM Tom Lane  wrote:
>
>Did you actually write "if TIMESTAMP_IS_NOBEGIN(dt2)" and not
>"if (TIMESTAMP_IS_NOBEGIN(dt2))"?  If the former, I'm not surprised
>that pgindent gets confused.  The parentheses are required by the
>C standard.  Your code might accidentally work because the macro
>has parentheses internally, but call sites have no business
>knowing that.  For example, it would be completely legit to change
>TIMESTAMP_IS_NOBEGIN to be a plain function, and then this would be
>syntactically incorrect.

Oh duh. I've been doing too much Rust development and did this without
thinking. I've attached a patch with a fix.

- Joe Koshakow
From d3543e7c410f83cbe3f3f3df9715025bc767fc5f Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 18 Mar 2023 13:59:34 -0400
Subject: [PATCH 3/3] Add infinite interval values

This commit adds positive and negative infinite values to the interval
data type. The entire range of intervals with INT_MAX months or INT_MIN
months are reserved for infinite values. This makes checking finiteness
much simpler.

Ashutosh Bapat and Joe Koshakow and Jian He
---
 doc/src/sgml/datatype.sgml|   2 +-
 doc/src/sgml/func.sgml|   5 +-
 src/backend/utils/adt/date.c  |  32 +
 src/backend/utils/adt/datetime.c  |   2 +
 src/backend/utils/adt/formatting.c|   2 +-
 src/backend/utils/adt/selfuncs.c  |  12 +-
 src/backend/utils/adt/timestamp.c | 679 ++
 src/include/datatype/timestamp.h  |  19 +
 src/include/utils/timestamp.h |   3 +
 src/test/regress/expected/horology.out|   6 +-
 src/test/regress/expected/interval.out| 559 --
 src/test/regress/expected/timestamp.out   |  62 ++
 src/test/regress/expected/timestamptz.out |  62 ++
 src/test/regress/sql/horology.sql |   6 +-
 src/test/regress/sql/interval.sql | 170 +-
 src/test/regress/sql/timestamp.sql|  19 +
 src/test/regress/sql/timestamptz.sql  |  18 +
 17 files changed, 1454 insertions(+), 204 deletions(-)

diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index faf0d74104..694af4000d 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -2321,7 +2321,7 @@ TIMESTAMP WITH TIME ZONE '2004-10-19 10:23:54+02'
  
  
   infinity
-  date, timestamp
+  date, timestamp, interval
   later than all other time stamps
  
  
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index a3a13b895f..33fa3e6670 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -9472,7 +9472,7 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');
  boolean
 
 
- Test for finite interval (currently always true)
+ Test for finite interval (not +/-infinity)
 
 
  isfinite(interval '4 hours')
@@ -10369,7 +10369,8 @@ SELECT EXTRACT(YEAR FROM TIMESTAMP '2001-02-16 20:38:40');
  When the input value is +/-Infinity, extract returns
  +/-Infinity for monotonically-increasing fields (epoch,
  julian, year, isoyear,
- decade, century, and millennium).
+ decade, century, and millennium
+ for all types and hour and day just for interval).
  For other fields, NULL is returned.  PostgreSQL
  versions before 9.6 returned zero for all cases of infinite input.
 
diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c
index a163fbb4ab..5b4ba76eed 100644
--- a/src/backend/utils/adt/date.c
+++ b/src/backend/utils/adt/date.c
@@ -2023,6 +2023,11 @@ interval_time(PG_FUNCTION_ARGS)
 	TimeADT		result;
 	int64		days;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("time out of range")));
+
 	result = span->time;
 	if (result >= USECS_PER_DAY)
 	{
@@ -2067,6 +2072,11 @@ time_pl_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeADT		result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("cannot add infinite interval to time")));
+
 	result = time + span->time;
 	result -= result / USECS_PER_DAY * USECS_PER_DAY;
 	if (result < INT64CONST(0))
@@ -2085,6 +2095,11 @@ time_mi_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeADT		result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("cannot subtract infinite interval from time")));
+
 	result = time - span->time;
 	result -= result / USECS_PER_DAY * USECS_PER_DAY;
 	if (result < INT64CONST(0))
@@ -2599,6 +2614,11 @@ timetz_pl_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL

Re: Infinite Interval

2023-03-19 Thread Joseph Koshakow
On Sat, Mar 18, 2023 at 3:55 PM Tom Lane  wrote:
>
>Joseph Koshakow  writes:
>> On Sat, Mar 18, 2023 at 3:08 PM Tom Lane  wrote:
>>> More specifically, those are from running pg_indent with an obsolete
>>> typedefs list.
>
>> I must be doing something wrong because even after doing that I get
the
>> same strange formatting. Specifically from the root directory I ran
>
>Hmm, I dunno what's going on there.  When I do this:
>
>>   curl https://buildfarm.postgresql.org/cgi-bin/typedefs.pl -o
>> src/tools/pgindent/typedefs.list
>
>I end up with a plausible set of updates, notably
>
>$ git diff
>diff --git a/src/tools/pgindent/typedefs.list
b/src/tools/pgindent/typedefs.list
>index 097f42e1b3..667f8e13ed 100644
>--- a/src/tools/pgindent/typedefs.list
>+++ b/src/tools/pgindent/typedefs.list
>...
>@@ -545,10 +548,12 @@ DataDumperPtr
> DataPageDeleteStack
> DatabaseInfo
> DateADT
>+DateTimeErrorExtra
> Datum
> DatumTupleFields
> DbInfo
> DbInfoArr
>+DbLocaleInfo
> DeClonePtrType
> DeadLockState
> DeallocateStmt
>
>so it sure ought to know DateTimeErrorExtra is a typedef.
>I then tried pgindent'ing datetime.c and timestamp.c,
>and it did not want to change either file.  I do get
>diffs like

> DecodeDateTime(char **field, int *ftype, int nf,
>   int *dtype, struct pg_tm *tm, fsec_t *fsec, int *tzp,
>-  DateTimeErrorExtra *extra)
>+  DateTimeErrorExtra * extra)
> {
>int fmask = 0,
>
>if I try to pgindent datetime.c with typedefs.list as it
>stands in HEAD.  That's pretty much pgindent's normal
>behavior when it doesn't recognize a name as a typedef.

I must have been doing something wrong because I tried again today and
it worked fine. However, I go get a lot of changes like the following:

  -   if TIMESTAMP_IS_NOBEGIN(dt2)
  -   ereport(ERROR,
  -
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
  -errmsg("timestamp out of
range")));
  +   if TIMESTAMP_IS_NOBEGIN
  +   (dt2)
  +   ereport(ERROR,
  +
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
  +errmsg("timestamp out of
range")));

Should I keep these pgindent changes or keep it the way I have it?

- Joe Koshakow


Re: Infinite Interval

2023-03-18 Thread Joseph Koshakow
On Sat, Mar 18, 2023 at 3:08 PM Tom Lane  wrote:
> Joseph Koshakow  writes:
>> On Thu, Mar 9, 2023 at 12:42 PM Ashutosh Bapat <
ashutosh.bapat@gmail.com>
>> wrote:
>>> There are a lot of these diffs. PG code doesn't leave an extra space
>>> between variable name and *.
>
>> Those appeared from running pg_indent. I've removed them all.
>
> More specifically, those are from running pg_indent with an obsolete
> typedefs list.  Good practice is to fetch an up-to-date list from
> the buildfarm:
>
> curl https://buildfarm.postgresql.org/cgi-bin/typedefs.pl -o
.../typedefs.list
>
> and use that.  (If your patch adds any typedefs, you can then add them
> to that list.)  There's been talk of trying harder to keep
> src/tools/pgindent/typedefs.list up to date, but not much has happened
> yet.

I must be doing something wrong because even after doing that I get the
same strange formatting. Specifically from the root directory I ran
  curl https://buildfarm.postgresql.org/cgi-bin/typedefs.pl -o
src/tools/pgindent/typedefs.list
  src/tools/pgindent/pgindent src/backend/utils/adt/datetime.c
src/include/common/int.h src/backend/utils/adt/timestamp.c
src/backend/utils/adt/date.c src/backend/utils/adt/formatting.c
src/backend/utils/adt/selfuncs.c src/include/datatype/timestamp.h
src/include/utils/timestamp.h

>The specific issue with float zero is that plus zero and minus zero
>are distinct concepts with distinct bit patterns, but the IEEE spec
>says that they compare as equal.  The C standard says about "if":
>
>   [#1] The controlling expression of  an  if  statement  shall
>   have scalar type.
>   [#2]  In  both  forms, the first substatement is executed if
>   the expression compares unequal to 0.  In the else form, the
>   second  substatement  is executed if the expression compares
>   equal to 0.
>
>so it sure looks to me like a float control expression is valid and
>minus zero should be treated as "false".  Nonetheless, personally
>I'd consider this to be poor style and would write "r != 0" or
>"r != 0.0" rather than depending on that.

Thanks for the info, I've updated the three instances of the check to
be "r != 0.0"

>BTW, this may already need a rebase over 75bd846b6.

The patches in this email should be rebased over master.

- Joe Koshakow
From da22f9b3d55433c408f04056eecf0fddf60f01c9 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 18 Mar 2023 12:38:58 -0400
Subject: [PATCH 2/3] Check for overflow in make_interval

---
 src/backend/utils/adt/timestamp.c  | 24 +++-
 src/include/common/int.h   | 13 +
 src/test/regress/expected/interval.out |  5 +
 src/test/regress/sql/interval.sql  |  4 
 4 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
index aaadc68ae6..b79af28ae3 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -1517,13 +1517,27 @@ make_interval(PG_FUNCTION_ARGS)
  errmsg("interval out of range")));
 
 	result = (Interval *) palloc(sizeof(Interval));
-	result->month = years * MONTHS_PER_YEAR + months;
-	result->day = weeks * 7 + days;
+	result->month = months;
+	if (pg_mul_add_s32_overflow(years, MONTHS_PER_YEAR, >month))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("interval out of range")));
+	result->day = days;
+	if (pg_mul_add_s32_overflow(weeks, 7, >day))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("interval out of range")));
 
 	secs = rint(secs * USECS_PER_SEC);
-	result->time = hours * ((int64) SECS_PER_HOUR * USECS_PER_SEC) +
-		mins * ((int64) SECS_PER_MINUTE * USECS_PER_SEC) +
-		(int64) secs;
+	result->time = secs;
+	if (pg_mul_add_s64_overflow(mins, ((int64) SECS_PER_MINUTE * USECS_PER_SEC), >time))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("interval out of range")));
+	if (pg_mul_add_s64_overflow(hours, ((int64) SECS_PER_HOUR * USECS_PER_SEC), >time))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("interval out of range")));
 
 	PG_RETURN_INTERVAL_P(result);
 }
diff --git a/src/include/common/int.h b/src/include/common/int.h
index 81726c65f7..48ef495551 100644
--- a/src/include/common/int.h
+++ b/src/include/common/int.h
@@ -154,6 +154,19 @@ pg_mul_s32_overflow(int32 a, int32 b, int32 *result)
 #endif
 }
 
+/*
+ * Add val * multiplier to *sum.
+ * Returns false if successful, true on overflow.
+ */
+static inline bool
+pg_mul_add_s32_overflow(int32 val, int32 multiplier, int32 *sum)
+{
+	int32		product;
+
+	r

Re: Date-Time dangling unit fix

2023-03-05 Thread Joseph Koshakow
Also I removed some dead code from the previous patch.

- Joe Koshakow
From 2ff08d729bca87992514d0651fdb62455e43cd8a Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 10 Dec 2022 18:59:26 -0500
Subject: [PATCH] Remove unknown ISO format, handle dandling units

This commit removes the date format of "y2001m02d04" and the time
format of "h04mm05s06". These were never documented and don't seem to
be valid ISO formats.

Additionally this commit handles repeated and dangling julian units
in DecodeDateTime.
---
 src/backend/utils/adt/datetime.c   | 219 ++---
 src/test/regress/expected/horology.out |  41 ++---
 src/test/regress/sql/horology.sql  |   4 +
 3 files changed, 36 insertions(+), 228 deletions(-)

diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index d166613895..bf7cb94b52 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -983,7 +983,7 @@ DecodeDateTime(char **field, int *ftype, int nf,
 	int			fmask = 0,
 tmask,
 type;
-	int			ptype = 0;		/* "prefix type" for ISO y2001m02d04 format */
+	int			ptype = 0;		/* "prefix type" for ISO and Julian formats */
 	int			i;
 	int			val;
 	int			dterr;
@@ -1174,10 +1174,6 @@ DecodeDateTime(char **field, int *ftype, int nf,
 
 			case DTK_NUMBER:
 
-/*
- * Was this an "ISO date" with embedded field labels? An
- * example is "y2001m02d04" - thomas 2001-02-04
- */
 if (ptype != 0)
 {
 	char	   *cp;
@@ -1188,84 +1184,11 @@ DecodeDateTime(char **field, int *ftype, int nf,
 	if (errno == ERANGE)
 		return DTERR_FIELD_OVERFLOW;
 
-	/*
-	 * only a few kinds are allowed to have an embedded
-	 * decimal
-	 */
-	if (*cp == '.')
-		switch (ptype)
-		{
-			case DTK_JULIAN:
-			case DTK_TIME:
-			case DTK_SECOND:
-break;
-			default:
-return DTERR_BAD_FORMAT;
-break;
-		}
-	else if (*cp != '\0')
+	if (*cp != '.' && *cp != '\0')
 		return DTERR_BAD_FORMAT;
 
 	switch (ptype)
 	{
-		case DTK_YEAR:
-			tm->tm_year = value;
-			tmask = DTK_M(YEAR);
-			break;
-
-		case DTK_MONTH:
-
-			/*
-			 * already have a month and hour? then assume
-			 * minutes
-			 */
-			if ((fmask & DTK_M(MONTH)) != 0 &&
-(fmask & DTK_M(HOUR)) != 0)
-			{
-tm->tm_min = value;
-tmask = DTK_M(MINUTE);
-			}
-			else
-			{
-tm->tm_mon = value;
-tmask = DTK_M(MONTH);
-			}
-			break;
-
-		case DTK_DAY:
-			tm->tm_mday = value;
-			tmask = DTK_M(DAY);
-			break;
-
-		case DTK_HOUR:
-			tm->tm_hour = value;
-			tmask = DTK_M(HOUR);
-			break;
-
-		case DTK_MINUTE:
-			tm->tm_min = value;
-			tmask = DTK_M(MINUTE);
-			break;
-
-		case DTK_SECOND:
-			tm->tm_sec = value;
-			tmask = DTK_M(SECOND);
-			if (*cp == '.')
-			{
-dterr = ParseFractionalSecond(cp, fsec);
-if (dterr)
-	return dterr;
-tmask = DTK_ALL_SECS_M;
-			}
-			break;
-
-		case DTK_TZ:
-			tmask = DTK_M(TZ);
-			dterr = DecodeTimezone(field[i], tzp);
-			if (dterr)
-return dterr;
-			break;
-
 		case DTK_JULIAN:
 			/* previous field was a label for "julian date" */
 			if (value < 0)
@@ -1510,6 +1433,9 @@ DecodeDateTime(char **field, int *ftype, int nf,
 
 	case UNITS:
 		tmask = 0;
+		/* prevent consecutive unhandled units */
+		if (ptype != 0)
+			return DTERR_BAD_FORMAT;
 		ptype = val;
 		break;
 
@@ -1536,7 +1462,6 @@ DecodeDateTime(char **field, int *ftype, int nf,
 			 ftype[i + 1] != DTK_TIME &&
 			 ftype[i + 1] != DTK_DATE))
 			return DTERR_BAD_FORMAT;
-
 		ptype = val;
 		break;
 
@@ -1567,6 +1492,10 @@ DecodeDateTime(char **field, int *ftype, int nf,
 		fmask |= tmask;
 	}			/* end loop over fields */
 
+	/* prefix type was dangling and never handled */
+	if (ptype != 0)
+		return DTERR_BAD_FORMAT;
+
 	/* do final checking/adjustment of Y/M/D fields */
 	dterr = ValidateDate(fmask, isjulian, is2digits, bc, tm);
 	if (dterr)
@@ -1933,7 +1862,7 @@ DecodeTimeOnly(char **field, int *ftype, int nf,
 	int			fmask = 0,
 tmask,
 type;
-	int			ptype = 0;		/* "prefix type" for ISO h04mm05s06 format */
+	int			ptype = 0;		/* "prefix type" for ISO format */
 	int			i;
 	int			val;
 	int			dterr;
@@ -2060,133 +1989,12 @@ DecodeTimeOnly(char **field, int *ftype, int nf,
 			case DTK_NUMBER:
 
 /*
- * Was this an "ISO time" with embedded field labels? An
- * example is "h04mm05s06" - thomas 2001-02-04
+ * Was this an "ISO time" An example is "T040506.789"
  */
 if (ptype != 0)
 {
-	cha

Re: Date-Time dangling unit fix

2023-03-05 Thread Joseph Koshakow
On Sun, Mar 5, 2023 at 12:54 PM Tom Lane  wrote:
>
> We do accept this:
>
> => select '12:34'::time;
>time
> --
>  12:34:00
> (1 row)
>
> so that must be going through a different code path, which I didn't
> try to identify yet.

That query will contain a single field of "12:34" with ftype DTK_TIME.
That will call into DecodeTime(), which calls into DecodeTimeCommon(),
where we have:

*tmask = DTK_TIME_M;

- Joe Koshakow


Re: Date-Time dangling unit fix

2023-03-05 Thread Joseph Koshakow
Attached is a patch for removing the discussed format of date-times.
From f35284762c02ed466496e4e562b5f95a884b5ef1 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 10 Dec 2022 18:59:26 -0500
Subject: [PATCH] Remove unknown ISO format, handle dandling units

This commit removes the date format of "y2001m02d04" and the time
format of "h04mm05s06". These were never documented and don't seem to
be valid ISO formats.

Additionally this commit handles repeated and dangling julian units
in DecodeDateTime.
---
 src/backend/utils/adt/datetime.c   | 210 ++---
 src/test/regress/expected/horology.out |  41 ++---
 src/test/regress/sql/horology.sql  |   4 +
 3 files changed, 37 insertions(+), 218 deletions(-)

diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index d166613895..51b72ad6c2 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -983,7 +983,7 @@ DecodeDateTime(char **field, int *ftype, int nf,
 	int			fmask = 0,
 tmask,
 type;
-	int			ptype = 0;		/* "prefix type" for ISO y2001m02d04 format */
+	int			ptype = 0;		/* "prefix type" for ISO and Julian formats */
 	int			i;
 	int			val;
 	int			dterr;
@@ -1174,10 +1174,6 @@ DecodeDateTime(char **field, int *ftype, int nf,
 
 			case DTK_NUMBER:
 
-/*
- * Was this an "ISO date" with embedded field labels? An
- * example is "y2001m02d04" - thomas 2001-02-04
- */
 if (ptype != 0)
 {
 	char	   *cp;
@@ -1188,84 +1184,11 @@ DecodeDateTime(char **field, int *ftype, int nf,
 	if (errno == ERANGE)
 		return DTERR_FIELD_OVERFLOW;
 
-	/*
-	 * only a few kinds are allowed to have an embedded
-	 * decimal
-	 */
-	if (*cp == '.')
-		switch (ptype)
-		{
-			case DTK_JULIAN:
-			case DTK_TIME:
-			case DTK_SECOND:
-break;
-			default:
-return DTERR_BAD_FORMAT;
-break;
-		}
-	else if (*cp != '\0')
+	if (*cp != '.' && *cp != '\0')
 		return DTERR_BAD_FORMAT;
 
 	switch (ptype)
 	{
-		case DTK_YEAR:
-			tm->tm_year = value;
-			tmask = DTK_M(YEAR);
-			break;
-
-		case DTK_MONTH:
-
-			/*
-			 * already have a month and hour? then assume
-			 * minutes
-			 */
-			if ((fmask & DTK_M(MONTH)) != 0 &&
-(fmask & DTK_M(HOUR)) != 0)
-			{
-tm->tm_min = value;
-tmask = DTK_M(MINUTE);
-			}
-			else
-			{
-tm->tm_mon = value;
-tmask = DTK_M(MONTH);
-			}
-			break;
-
-		case DTK_DAY:
-			tm->tm_mday = value;
-			tmask = DTK_M(DAY);
-			break;
-
-		case DTK_HOUR:
-			tm->tm_hour = value;
-			tmask = DTK_M(HOUR);
-			break;
-
-		case DTK_MINUTE:
-			tm->tm_min = value;
-			tmask = DTK_M(MINUTE);
-			break;
-
-		case DTK_SECOND:
-			tm->tm_sec = value;
-			tmask = DTK_M(SECOND);
-			if (*cp == '.')
-			{
-dterr = ParseFractionalSecond(cp, fsec);
-if (dterr)
-	return dterr;
-tmask = DTK_ALL_SECS_M;
-			}
-			break;
-
-		case DTK_TZ:
-			tmask = DTK_M(TZ);
-			dterr = DecodeTimezone(field[i], tzp);
-			if (dterr)
-return dterr;
-			break;
-
 		case DTK_JULIAN:
 			/* previous field was a label for "julian date" */
 			if (value < 0)
@@ -1510,6 +1433,9 @@ DecodeDateTime(char **field, int *ftype, int nf,
 
 	case UNITS:
 		tmask = 0;
+		/* prevent consecutive unhandled units */
+		if (ptype != 0)
+			return DTERR_BAD_FORMAT;
 		ptype = val;
 		break;
 
@@ -1536,7 +1462,6 @@ DecodeDateTime(char **field, int *ftype, int nf,
 			 ftype[i + 1] != DTK_TIME &&
 			 ftype[i + 1] != DTK_DATE))
 			return DTERR_BAD_FORMAT;
-
 		ptype = val;
 		break;
 
@@ -1567,6 +1492,10 @@ DecodeDateTime(char **field, int *ftype, int nf,
 		fmask |= tmask;
 	}			/* end loop over fields */
 
+	/* prefix type was dangling and never handled */
+	if (ptype != 0)
+		return DTERR_BAD_FORMAT;
+
 	/* do final checking/adjustment of Y/M/D fields */
 	dterr = ValidateDate(fmask, isjulian, is2digits, bc, tm);
 	if (dterr)
@@ -1933,7 +1862,7 @@ DecodeTimeOnly(char **field, int *ftype, int nf,
 	int			fmask = 0,
 tmask,
 type;
-	int			ptype = 0;		/* "prefix type" for ISO h04mm05s06 format */
+	int			ptype = 0;		/* "prefix type" for ISO format */
 	int			i;
 	int			val;
 	int			dterr;
@@ -2060,133 +1989,23 @@ DecodeTimeOnly(char **field, int *ftype, int nf,
 			case DTK_NUMBER:
 
 /*
- * Was this an "ISO time" with embedded field labels? An
- * example is "h04mm05s06" - thomas 2001-02-04
+ * Was this an "ISO time" An example is "T040506.789"
  */
 if (ptype != 0)
 {
 	cha

Re: Date-Time dangling unit fix

2023-03-04 Thread Joseph Koshakow
On Sat, Mar 4, 2023 at 4:05 PM Tom Lane  wrote:
>
>I started to look at this, and soon noticed that while we have test
cases
>matching this sort of date input, there is no documentation for it.
The
>code claims it's an "ISO" (presumably ISO 8601) format, and maybe it is
>because it looks a lot like the ISO 8601 format for intervals
(durations).
>But I don't have a copy of ISO 8601, and some googling fails to find
any
>indication that anybody else believes this is a valid datetime format.
>Wikipedia for example documents a lot of variants of ISO 8601 [1],
>but nothing that looks like this.
>
>I wonder if we should just rip this code out instead of fixing it.
>I suspect its real-world usage is not different from zero.  We'd
>have to keep the "Jnnn" Julian-date case, though, so maybe there's
>little to be saved.
>
>If we do keep it, there's documentation work to be done.  But the
>first bit of doco I'd want to see is a pointer to a standard.

I also don't have a copy of ISO 8601 and wasn't able to find anything
about this variant on Google. I did find this comment in datetime.c

/*
* Was this an "ISO date" with embedded field labels? An
* example is "y2001m02d04" - thomas 2001-02-04
*/

which comes from this commit [1], which was authored by Thomas Lockhart
(presumably the same thomas from the comment). I've CC'ed Thomas in
case the email still exists and they happen to remember. The commit
message mentions ISO, but not the variant mentioned in the comment.
The mailing list thread can be found here [2], but it doesn't provide
much more information. I also found the following thread [3], which
happens to have you in it in case you remember it, which seemed to be
the motivation for commit [1]. It only contains the following line
about ISO:

> o support for "ISO variants" on input, including embedded "T" preceeding
the time fields

All that seems to imply the "y2001m02d04" ISO variant was never really
discussed in much detail and it's probably fine to remove it. Though,
it has been around for 22 years which makes it a bit scary to remove.

- Joe Koshakow

[1]
https://git.postgresql.org/gitweb/?p=postgresql.git;a=commit;h=6f58115dddfa8ca63004c4784f57ef660422861d
[2]
https://www.postgresql.org/message-id/flat/3BB433D5.3CB4164E%40fourpalms.org
[3]
https://www.postgresql.org/message-id/flat/3B970FF8.B9990807%40fourpalms.org#c57d83c80d295bfa19887c92122369c3


Re: Date-time extraneous fields with reserved keywords

2023-03-04 Thread Joseph Koshakow
On Sat, Mar 4, 2023 at 2:48 PM Tom Lane  wrote:
>
>Right.  So really we ought to move the ValidateDate call as
>well as the next half-dozen lines about "mer" down into
>the subsequent "do additional checking" stanza.  It's all
>only relevant to normal date specs.
>
>BTW, looking at the set of RESERV tokens in datetktbl[],
>it looks to me like this change renders the final "default:"
>case unreachable, so probably we could just make that an error.

Please see the attached patch with these changes.

- Joe Koshakow
From 64a71ed287aa9611c22eaa6e2cbb7e080d93be79 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sun, 11 Dec 2022 16:08:43 -0500
Subject: [PATCH] Handle extraneous fields in date-time input

DecodeDateTime sometimest allowed extraneous fields to be included with
reserved keywords. For example `date '1995-08-06 epoch'` would be
parsed successfully, but the date was ignored. This commit fixes the
issue so an error is returned instead.
---
 src/backend/utils/adt/datetime.c  | 35 ++-
 src/test/regress/expected/date.out| 33 +
 src/test/regress/expected/timestamp.out   | 33 +
 src/test/regress/expected/timestamptz.out | 33 +
 src/test/regress/sql/date.sql | 10 +++
 src/test/regress/sql/timestamp.sql| 10 +++
 src/test/regress/sql/timestamptz.sql  | 10 +++
 7 files changed, 150 insertions(+), 14 deletions(-)

diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index 01660637a2..0c1207223c 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -1431,8 +1431,15 @@ DecodeDateTime(char **field, int *ftype, int nf,
 	*tzp = 0;
 break;
 
-			default:
+			case DTK_EPOCH:
+			case DTK_LATE:
+			case DTK_EARLY:
+tmask = (DTK_DATE_M | DTK_TIME_M | DTK_M(TZ));
 *dtype = val;
+break;
+
+			default:
+return DTERR_BAD_FORMAT;
 		}
 
 		break;
@@ -1567,22 +1574,22 @@ DecodeDateTime(char **field, int *ftype, int nf,
 		fmask |= tmask;
 	}			/* end loop over fields */
 
-	/* do final checking/adjustment of Y/M/D fields */
-	dterr = ValidateDate(fmask, isjulian, is2digits, bc, tm);
-	if (dterr)
-		return dterr;
-
-	/* handle AM/PM */
-	if (mer != HR24 && tm->tm_hour > HOURS_PER_DAY / 2)
-		return DTERR_FIELD_OVERFLOW;
-	if (mer == AM && tm->tm_hour == HOURS_PER_DAY / 2)
-		tm->tm_hour = 0;
-	else if (mer == PM && tm->tm_hour != HOURS_PER_DAY / 2)
-		tm->tm_hour += HOURS_PER_DAY / 2;
-
 	/* do additional checking for full date specs... */
 	if (*dtype == DTK_DATE)
 	{
+		/* do final checking/adjustment of Y/M/D fields */
+		dterr = ValidateDate(fmask, isjulian, is2digits, bc, tm);
+		if (dterr)
+			return dterr;
+
+		/* handle AM/PM */
+		if (mer != HR24 && tm->tm_hour > HOURS_PER_DAY / 2)
+			return DTERR_FIELD_OVERFLOW;
+		if (mer == AM && tm->tm_hour == HOURS_PER_DAY / 2)
+			tm->tm_hour = 0;
+		else if (mer == PM && tm->tm_hour != HOURS_PER_DAY / 2)
+			tm->tm_hour += HOURS_PER_DAY / 2;
+
 		if ((fmask & DTK_DATE_M) != DTK_DATE_M)
 		{
 			if ((fmask & DTK_TIME_M) == DTK_TIME_M)
diff --git a/src/test/regress/expected/date.out b/src/test/regress/expected/date.out
index f5949f3d17..c874f06546 100644
--- a/src/test/regress/expected/date.out
+++ b/src/test/regress/expected/date.out
@@ -1532,3 +1532,36 @@ select make_time(10, 55, 100.1);
 ERROR:  time field value out of range: 10:55:100.1
 select make_time(24, 0, 2.1);
 ERROR:  time field value out of range: 24:00:2.1
+-- test errors with reserved keywords
+SELECT date '1995-08-06 epoch';
+ERROR:  invalid input syntax for type date: "1995-08-06 epoch"
+LINE 1: SELECT date '1995-08-06 epoch';
+^
+SELECT date '1995-08-06 infinity';
+ERROR:  invalid input syntax for type date: "1995-08-06 infinity"
+LINE 1: SELECT date '1995-08-06 infinity';
+^
+SELECT date '1995-08-06 -infinity';
+ERROR:  invalid input syntax for type date: "1995-08-06 -infinity"
+LINE 1: SELECT date '1995-08-06 -infinity';
+^
+SELECT date 'epoch 1995-08-06';
+ERROR:  invalid input syntax for type date: "epoch 1995-08-06"
+LINE 1: SELECT date 'epoch 1995-08-06';
+^
+SELECT date 'infinity 1995-08-06';
+ERROR:  invalid input syntax for type date: "infinity 1995-08-06"
+LINE 1: SELECT date 'infinity 1995-08-06';
+^
+SELECT date '-infinity 1995-08-06';
+ERROR:  invalid input syntax for type date: "-infinity 1995-08-06"
+LINE 1: SELECT date '-infinity 1995-08-06';
+^
+SELECT date 'now infinity';
+ERROR:  invalid input syntax for type date: "now infinity"
+LINE 1: SELECT date 'now inf

Re: Date-time extraneous fields with reserved keywords

2023-03-04 Thread Joseph Koshakow
On Sat, Mar 4, 2023 at 1:56 PM Tom Lane  wrote:
>
>I think we should tread very carefully about disallowing inputs that
>have been considered acceptable for 25 years.  I agree with disallowing
>numeric fields along with 'epoch' and 'infinity', but for example
>this seems perfectly useful and sensible:
>
># select timestamptz 'today 12:34';
>  timestamptz
>
> 2023-03-04 12:34:00-05
>(1 row)

Yeah, that makes sense. I'll leave it as is with
the explicit case for 'epoch', 'infinity', and
'-infinity'.

>Why do you want to skip ValidateDate in some cases?  If we've not
>had to do that before, I don't see why it's a good idea now.

This goes back to the abstraction break of
setting tmask without updating tm. Certain
validations will check that if a field is set in
fmask (which is an accumulation of tmask from
every iteration) then it's value in tm is valid.
For example:

if (fmask & DTK_M(YEAR))
{
// ...
else
{
/* there is no year zero in AD/BC notation */
if (tm->tm_year <= 0)
return DTERR_FIELD_OVERFLOW;
}
}

As far as I can tell dtype always equals DTK_DATE
except when the timestamp/date is 'epoch',
'infinity', '-infinity', and none of the
validations apply to those date/timestamps.
Though, I think you're right this is probably
not a good idea. I'll try and brainstorm a
different approach, unless you have some ideas.


Re: Date-time extraneous fields with reserved keywords

2023-03-04 Thread Joseph Koshakow
Attached is the described patch. I have two notes
after implementing it:
  - It feels like a bit of an abstraction break to
  set tmask without actually setting any fields in
  tm.
  - I'm not sure if we should hard code in those
  three specific reserved keywords or set tmask
  in the default case.

Any thoughts?

- Joe Koshakow
From 78d8f39db8df68502369ffd9edd6f6e38f4dadb8 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sun, 11 Dec 2022 16:08:43 -0500
Subject: [PATCH] Handle extraneous fields in date-time input

DecodeDateTime sometimest allowed extraneous fields to be included with
reserved keywords. For example `date '1995-08-06 epoch'` would be
parsed successfully, but the date was ignored. This commit fixes the
issue so an error is returned instead.
---
 src/backend/utils/adt/datetime.c  | 18 ++---
 src/test/regress/expected/date.out| 33 +++
 src/test/regress/expected/timestamp.out   | 33 +++
 src/test/regress/expected/timestamptz.out | 33 +++
 src/test/regress/sql/date.sql | 10 +++
 src/test/regress/sql/timestamp.sql| 10 +++
 src/test/regress/sql/timestamptz.sql  | 10 +++
 7 files changed, 143 insertions(+), 4 deletions(-)

diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index 01660637a2..6f82465fd1 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -1431,6 +1431,13 @@ DecodeDateTime(char **field, int *ftype, int nf,
 	*tzp = 0;
 break;
 
+			case DTK_EPOCH:
+			case DTK_LATE:
+			case DTK_EARLY:
+tmask = (DTK_DATE_M | DTK_TIME_M | DTK_M(TZ));
+*dtype = val;
+break;
+
 			default:
 *dtype = val;
 		}
@@ -1567,10 +1574,13 @@ DecodeDateTime(char **field, int *ftype, int nf,
 		fmask |= tmask;
 	}			/* end loop over fields */
 
-	/* do final checking/adjustment of Y/M/D fields */
-	dterr = ValidateDate(fmask, isjulian, is2digits, bc, tm);
-	if (dterr)
-		return dterr;
+	if (*dtype == DTK_DATE)
+	{
+		/* do final checking/adjustment of Y/M/D fields */
+		dterr = ValidateDate(fmask, isjulian, is2digits, bc, tm);
+		if (dterr)
+			return dterr;
+	}
 
 	/* handle AM/PM */
 	if (mer != HR24 && tm->tm_hour > HOURS_PER_DAY / 2)
diff --git a/src/test/regress/expected/date.out b/src/test/regress/expected/date.out
index f5949f3d17..c874f06546 100644
--- a/src/test/regress/expected/date.out
+++ b/src/test/regress/expected/date.out
@@ -1532,3 +1532,36 @@ select make_time(10, 55, 100.1);
 ERROR:  time field value out of range: 10:55:100.1
 select make_time(24, 0, 2.1);
 ERROR:  time field value out of range: 24:00:2.1
+-- test errors with reserved keywords
+SELECT date '1995-08-06 epoch';
+ERROR:  invalid input syntax for type date: "1995-08-06 epoch"
+LINE 1: SELECT date '1995-08-06 epoch';
+^
+SELECT date '1995-08-06 infinity';
+ERROR:  invalid input syntax for type date: "1995-08-06 infinity"
+LINE 1: SELECT date '1995-08-06 infinity';
+^
+SELECT date '1995-08-06 -infinity';
+ERROR:  invalid input syntax for type date: "1995-08-06 -infinity"
+LINE 1: SELECT date '1995-08-06 -infinity';
+^
+SELECT date 'epoch 1995-08-06';
+ERROR:  invalid input syntax for type date: "epoch 1995-08-06"
+LINE 1: SELECT date 'epoch 1995-08-06';
+^
+SELECT date 'infinity 1995-08-06';
+ERROR:  invalid input syntax for type date: "infinity 1995-08-06"
+LINE 1: SELECT date 'infinity 1995-08-06';
+^
+SELECT date '-infinity 1995-08-06';
+ERROR:  invalid input syntax for type date: "-infinity 1995-08-06"
+LINE 1: SELECT date '-infinity 1995-08-06';
+^
+SELECT date 'now infinity';
+ERROR:  invalid input syntax for type date: "now infinity"
+LINE 1: SELECT date 'now infinity';
+^
+SELECT date '-infinity infinity';
+ERROR:  invalid input syntax for type date: "-infinity infinity"
+LINE 1: SELECT date '-infinity infinity';
+^
diff --git a/src/test/regress/expected/timestamp.out b/src/test/regress/expected/timestamp.out
index c64bcb7c12..c2159c2cec 100644
--- a/src/test/regress/expected/timestamp.out
+++ b/src/test/regress/expected/timestamp.out
@@ -2125,3 +2125,36 @@ select * from generate_series('2020-01-01 00:00'::timestamp,
   '2020-01-02 03:00'::timestamp,
   '0 hour'::interval);
 ERROR:  step size cannot equal zero
+-- test errors with reserved keywords
+SELECT timestamp '1995-08-06 01:01:01 epoch';
+ERROR:  invalid input syntax for type timestamp: "1995-08-06 01:01:01 epoch"
+LINE 1: SELECT timestamp '1995-08-06 01:01:01 epoch';
+ ^
+SELECT timestamp '1995-08-06 01:01:01 infinity';
+ERROR:  invalid input syntax fo

Re: Date-time extraneous fields with reserved keywords

2023-03-04 Thread Joseph Koshakow
On Sat, Mar 4, 2023 at 11:23 AM Keisuke Kuroda 
wrote:
>
>Good catch.
>Of the reserved words that are special values of type Date/Time,
>'now', 'today', 'tomorrow', 'yesterday', and 'allballs',
>I get an error even before applying the patch.

Thanks for pointing this out. After taking a look
at the code, 'now', 'today', 'tomorrow',
'yesterday', and 'allballs' all set the
appropriate tmask field which is what causes them
to error.

  case DTK_NOW:
tmask = (DTK_DATE_M | DTK_TIME_M | DTK_M(TZ));

  case DTK_YESTERDAY:
tmask = DTK_DATE_M;

  case DTK_TODAY:
tmask = DTK_DATE_M;

  case DTK_TOMORROW:
tmask = DTK_DATE_M;

  case DTK_ZULU:
tmask = (DTK_TIME_M | DTK_M(TZ));


while 'epoch', 'infinity', and '-infinity' do not
set tmask (note the default below handles all of
these fields)

  default:
  *dtype = val;

So I think a better fix here would be to also set
tmask for those three reserved keywords.


>One thing I noticed is that the following SQL
>returns normal results even after applying the patch.
>
>postgres=# select timestamp 'epoch 01:01:01';
>  timestamp
>-
> 1970-01-01 00:00:00
>(1 row)
>
>When 'epoch','infinity','-infinity' and time are specified together,
>the time specified in the SQL is not included in result.
>I think it might be better to assume that this pattern is also an
error.
>What do you think?

I agree this pattern should also be an error. I
think that the tmask approach will cause an error
for this pattern as well.

Thanks,
Joe Koshakow


Re: is_superuser is not documented

2023-03-02 Thread Joseph Koshakow
On Thu, Mar 2, 2023 at 11:53 AM Fujii Masao 
wrote:
>
>On 2022/09/14 14:27, bt22kawamotok wrote:
>> I update patch to reflect master update.
>
>Thanks for updating the patch!
>
>+   
>+Shows whether the current user is a superuser or not.
>+   
>
>How about adding the note about when this parameter can change,
>like we do for in_hot_standby docs?  I applied this change to the
patch.
>Attached is the updated version of the patch.
>

I just came across this thread and noticed that the patch was never
merged. There is some brief docs for is_superuser in the SHOW docs:
https://www.postgresql.org/docs/current/sql-show.html, but the GUC
fields were never updated.

Is there a reason that it never got merged or was it just forgotten
about?

- Joe Koshakow


Re: Infinite Interval

2023-03-01 Thread Joseph Koshakow
On Wed, Mar 1, 2023 at 3:03 PM Gregory Stark (as CFM) 
wrote:
>
>It looks like this patch needs a (perhaps trivial) rebase.

Attached is a rebased patch.

>It sounds like all the design questions are resolved so perhaps this
>can be set to Ready for Committer once it's rebased?

There hasn't really been a review of this patch yet. It's just been
mostly me talking to myself in this thread, and a couple of
contributions from jian.

- Joe Koshakow
From 1b35e2b96bcf69431bbd8720523163de10cf Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 17 Dec 2022 14:21:26 -0500
Subject: [PATCH] Add infinite interval values

This commit adds positive and negative infinite values to the interval
data type. The entire range of intervals with INT_MAX months or INT_MIN
months are reserved for infinite values. This makes checking finiteness
much simpler.

Ashutosh Bapat and Joe Koshakow and Jian He
---
 doc/src/sgml/datatype.sgml|   2 +-
 doc/src/sgml/func.sgml|   5 +-
 src/backend/utils/adt/date.c  |  32 +
 src/backend/utils/adt/datetime.c  |  39 +-
 src/backend/utils/adt/formatting.c|   2 +-
 src/backend/utils/adt/selfuncs.c  |  12 +-
 src/backend/utils/adt/timestamp.c | 711 ++
 src/include/common/int.h  |  18 +
 src/include/datatype/timestamp.h  |  19 +
 src/include/utils/timestamp.h |   3 +
 src/test/regress/expected/horology.out|   6 +-
 src/test/regress/expected/interval.out| 563 +++--
 src/test/regress/expected/timestamp.out   |  62 ++
 src/test/regress/expected/timestamptz.out |  62 ++
 src/test/regress/sql/horology.sql |   6 +-
 src/test/regress/sql/interval.sql | 174 +-
 src/test/regress/sql/timestamp.sql|  19 +
 src/test/regress/sql/timestamptz.sql  |  18 +
 18 files changed, 1519 insertions(+), 234 deletions(-)

diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index 467b49b199..d782d23574 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -2316,7 +2316,7 @@ TIMESTAMP WITH TIME ZONE '2004-10-19 10:23:54+02'
  
  
   infinity
-  date, timestamp
+  date, timestamp, interval
   later than all other time stamps
  
  
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 97b3f1c1a6..c83f38d263 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -9393,7 +9393,7 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');
  boolean
 
 
- Test for finite interval (currently always true)
+ Test for finite interval (not +/-infinity)
 
 
  isfinite(interval '4 hours')
@@ -10280,7 +10280,8 @@ SELECT EXTRACT(YEAR FROM TIMESTAMP '2001-02-16 20:38:40');
  When the input value is +/-Infinity, extract returns
  +/-Infinity for monotonically-increasing fields (epoch,
  julian, year, isoyear,
- decade, century, and millennium).
+ decade, century, and millennium
+ for all types and hour and day just for interval).
  For other fields, NULL is returned.  PostgreSQL
  versions before 9.6 returned zero for all cases of infinite input.
 
diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c
index 99171d9c92..dc271f663c 100644
--- a/src/backend/utils/adt/date.c
+++ b/src/backend/utils/adt/date.c
@@ -2023,6 +2023,11 @@ interval_time(PG_FUNCTION_ARGS)
 	TimeADT		result;
 	int64		days;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("time out of range")));
+
 	result = span->time;
 	if (result >= USECS_PER_DAY)
 	{
@@ -2067,6 +2072,11 @@ time_pl_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeADT		result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("cannot add infinite interval to time")));
+
 	result = time + span->time;
 	result -= result / USECS_PER_DAY * USECS_PER_DAY;
 	if (result < INT64CONST(0))
@@ -2085,6 +2095,11 @@ time_mi_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeADT		result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("cannot subtract infinite interval from time")));
+
 	result = time - span->time;
 	result -= result / USECS_PER_DAY * USECS_PER_DAY;
 	if (result < INT64CONST(0))
@@ -2599,6 +2614,11 @@ timetz_pl_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeTzADT  *result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("cannot add infinite interval to time")));
+
 	result = (TimeTzADT *) palloc(sizeof(TimeTz

Re: Inconsistency in ACL error message

2023-02-24 Thread Joseph Koshakow
On Fri, Feb 24, 2023 at 1:31 PM Nathan Bossart 
wrote:

> You might be interested in
>
>https://commitfest.postgresql.org/42/4145/

Ah, perfect. In that case ignore my patch!

- Joe Koshakow


Inconsistency in ACL error message

2023-02-24 Thread Joseph Koshakow
Hi all,

I noticed a very minor inconsistency in some ACL error messages. When
you are try and alter a role, it just says "permission denied":

  postgres=> ALTER ROLE bar NOCREATEDB;
  ERROR:  permission denied
  postgres=> ALTER ROLE bar SET search_path TO 'foo';
  ERROR:  permission denied

For almost all other ACL error, we include what the action was. For
example:

  postgres=> CREATE ROLE r;
  ERROR:  permission denied to create role
  postgres=> DROP ROLE postgres;
  ERROR:  permission denied to drop role
  postgres=> CREATE DATABASE foo;
  ERROR:  permission denied to create database


It's not a huge deal, but it's easy enough to fix that I thought I'd
generate a patch (attached). Let me know if people think that it's
worth merging.

- Joe Koshakow
From 3ab31bc755043973ce56ee620ad99b5789d12111 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Fri, 24 Feb 2023 12:05:19 -0500
Subject: [PATCH] Add details to ALTER ROLE permission errors

---
 src/backend/commands/user.c   | 4 ++--
 src/test/regress/expected/create_role.out | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/backend/commands/user.c b/src/backend/commands/user.c
index 3a92e930c0..2c7a4204a6 100644
--- a/src/backend/commands/user.c
+++ b/src/backend/commands/user.c
@@ -761,7 +761,7 @@ AlterRole(ParseState *pstate, AlterRoleStmt *stmt)
 			dvalidUntil || disreplication || dbypassRLS)
 			ereport(ERROR,
 	(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
-	 errmsg("permission denied")));
+	 errmsg("permission denied to alter role")));
 
 		/* an unprivileged user can change their own password */
 		if (dpassword && roleid != currentUserId)
@@ -1008,7 +1008,7 @@ AlterRoleSet(AlterRoleSetStmt *stmt)
 && roleid != GetUserId())
 ereport(ERROR,
 		(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
-		 errmsg("permission denied")));
+		 errmsg("permission denied to alter role")));
 		}
 
 		ReleaseSysCache(roletuple);
diff --git a/src/test/regress/expected/create_role.out b/src/test/regress/expected/create_role.out
index 9f431bd4f5..691cff86d2 100644
--- a/src/test/regress/expected/create_role.out
+++ b/src/test/regress/expected/create_role.out
@@ -98,7 +98,7 @@ ERROR:  must have admin option on role "regress_role_normal"
 ALTER ROLE regress_role_normal RENAME TO regress_role_abnormal;
 ERROR:  permission denied to rename role
 ALTER ROLE regress_role_normal NOINHERIT NOLOGIN CONNECTION LIMIT 7;
-ERROR:  permission denied
+ERROR:  permission denied to alter role
 -- ok, regress_tenant can create objects within the database
 SET SESSION AUTHORIZATION regress_tenant;
 CREATE TABLE tenant_table (i integer);
-- 
2.34.1



Re: Infinite Interval

2023-01-15 Thread Joseph Koshakow
On Sat, Jan 14, 2023 at 4:22 PM Joseph Koshakow  wrote:
>
> At this point the patch is ready for review again except for the one
> outstanding question of: Should finite checks on intervals only look at
> months or all three fields?
>
> - Joe

I've gone ahead and updated the patch to only look at the months field.
I'll submit this email and patch to the Feb commitfest.

- Joe
From 123cdf534cc1a0e9a44e7dc8641d23e2c5b09e31 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 17 Dec 2022 14:21:26 -0500
Subject: [PATCH] Add infinite interval values

This commit adds positive and negative infinite values to the interval
data type. The entire range of intervals with INT_MAX months or INT_MIN
months are reserved for infinite values. This makes checking finiteness
much simpler.

Ashutosh Bapat and Joe Koshakow and Jian He
---
 doc/src/sgml/datatype.sgml|   2 +-
 doc/src/sgml/func.sgml|   5 +-
 src/backend/utils/adt/date.c  |  32 +
 src/backend/utils/adt/datetime.c  |  39 +-
 src/backend/utils/adt/formatting.c|   2 +-
 src/backend/utils/adt/selfuncs.c  |  12 +-
 src/backend/utils/adt/timestamp.c | 705 ++
 src/include/common/int.h  |  18 +
 src/include/datatype/timestamp.h  |  19 +
 src/include/utils/timestamp.h |   3 +
 src/test/regress/expected/horology.out|   6 +-
 src/test/regress/expected/interval.out| 563 +++--
 src/test/regress/expected/timestamp.out   |  62 ++
 src/test/regress/expected/timestamptz.out |  62 ++
 src/test/regress/sql/horology.sql |   6 +-
 src/test/regress/sql/interval.sql | 174 +-
 src/test/regress/sql/timestamp.sql|  19 +
 src/test/regress/sql/timestamptz.sql  |  18 +
 18 files changed, 1516 insertions(+), 231 deletions(-)

diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index 467b49b199..d782d23574 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -2316,7 +2316,7 @@ TIMESTAMP WITH TIME ZONE '2004-10-19 10:23:54+02'
  
  
   infinity
-  date, timestamp
+  date, timestamp, interval
   later than all other time stamps
  
  
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index b8dac9ef46..36b31f7163 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -9393,7 +9393,7 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');
  boolean
 
 
- Test for finite interval (currently always true)
+ Test for finite interval (not +/-infinity)
 
 
  isfinite(interval '4 hours')
@@ -10280,7 +10280,8 @@ SELECT EXTRACT(YEAR FROM TIMESTAMP '2001-02-16 20:38:40');
  When the input value is +/-Infinity, extract returns
  +/-Infinity for monotonically-increasing fields (epoch,
  julian, year, isoyear,
- decade, century, and millennium).
+ decade, century, and millennium
+ for all types and hour and day just for interval).
  For other fields, NULL is returned.  PostgreSQL
  versions before 9.6 returned zero for all cases of infinite input.
 
diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c
index 99171d9c92..dc271f663c 100644
--- a/src/backend/utils/adt/date.c
+++ b/src/backend/utils/adt/date.c
@@ -2023,6 +2023,11 @@ interval_time(PG_FUNCTION_ARGS)
 	TimeADT		result;
 	int64		days;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("time out of range")));
+
 	result = span->time;
 	if (result >= USECS_PER_DAY)
 	{
@@ -2067,6 +2072,11 @@ time_pl_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeADT		result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("cannot add infinite interval to time")));
+
 	result = time + span->time;
 	result -= result / USECS_PER_DAY * USECS_PER_DAY;
 	if (result < INT64CONST(0))
@@ -2085,6 +2095,11 @@ time_mi_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeADT		result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("cannot subtract infinite interval from time")));
+
 	result = time - span->time;
 	result -= result / USECS_PER_DAY * USECS_PER_DAY;
 	if (result < INT64CONST(0))
@@ -2599,6 +2614,11 @@ timetz_pl_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeTzADT  *result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("cannot add infinite interval to time")));
+
 	result = (TimeTzADT *) palloc(sizeof(TimeTzADT));
 
 	result->time = time->time + span->time;
@@ -2621,6 +2641,1

Re: Infinite Interval

2023-01-14 Thread Joseph Koshakow
Ok, I've updated the patch to handle every function that inputs or
outputs intervals, as well as added some tests. In the process I
noticed that some of the existing date/timestamp/timestamptz don't
handle infinite values properly. For example,
postgres=# SELECT age('infinity'::timestamp);
age
--
-292253 years -11 mons -26 days -04:00:54.775807
(1 row)

It might be worth going through all those functions separately
and making sure they are correct.

I also added some overflow handling to make_interval.

I also added handling of infinite timestamp subtraction.

At this point the patch is ready for review again except for the one
outstanding question of: Should finite checks on intervals only look at
months or all three fields?

- Joe
From 23868228ad2c0be57408b38db76bced85ab83cb1 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 17 Dec 2022 14:21:26 -0500
Subject: [PATCH] This is WIP.

TODOs
1. Should we just use the months field to test for infinity?

Ashutosh Bapat and Joe Koshakow and Jian He
---
 doc/src/sgml/datatype.sgml|   2 +-
 doc/src/sgml/func.sgml|   5 +-
 src/backend/utils/adt/date.c  |  32 +
 src/backend/utils/adt/datetime.c  |  39 +-
 src/backend/utils/adt/formatting.c|   2 +-
 src/backend/utils/adt/selfuncs.c  |  12 +-
 src/backend/utils/adt/timestamp.c | 695 ++
 src/include/common/int.h  |  18 +
 src/include/datatype/timestamp.h  |  21 +
 src/include/utils/timestamp.h |   3 +
 src/test/regress/expected/horology.out|   6 +-
 src/test/regress/expected/interval.out| 503 ++--
 src/test/regress/expected/timestamp.out   |  62 ++
 src/test/regress/expected/timestamptz.out |  62 ++
 src/test/regress/sql/horology.sql |   6 +-
 src/test/regress/sql/interval.sql | 149 -
 src/test/regress/sql/timestamp.sql|  19 +
 src/test/regress/sql/timestamptz.sql  |  18 +
 18 files changed, 1461 insertions(+), 193 deletions(-)

diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index 467b49b199..d782d23574 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -2316,7 +2316,7 @@ TIMESTAMP WITH TIME ZONE '2004-10-19 10:23:54+02'
  
  
   infinity
-  date, timestamp
+  date, timestamp, interval
   later than all other time stamps
  
  
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index b8dac9ef46..36b31f7163 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -9393,7 +9393,7 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');
  boolean
 
 
- Test for finite interval (currently always true)
+ Test for finite interval (not +/-infinity)
 
 
  isfinite(interval '4 hours')
@@ -10280,7 +10280,8 @@ SELECT EXTRACT(YEAR FROM TIMESTAMP '2001-02-16 20:38:40');
  When the input value is +/-Infinity, extract returns
  +/-Infinity for monotonically-increasing fields (epoch,
  julian, year, isoyear,
- decade, century, and millennium).
+ decade, century, and millennium
+ for all types and hour and day just for interval).
  For other fields, NULL is returned.  PostgreSQL
  versions before 9.6 returned zero for all cases of infinite input.
 
diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c
index 99171d9c92..dc271f663c 100644
--- a/src/backend/utils/adt/date.c
+++ b/src/backend/utils/adt/date.c
@@ -2023,6 +2023,11 @@ interval_time(PG_FUNCTION_ARGS)
 	TimeADT		result;
 	int64		days;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("time out of range")));
+
 	result = span->time;
 	if (result >= USECS_PER_DAY)
 	{
@@ -2067,6 +2072,11 @@ time_pl_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeADT		result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("cannot add infinite interval to time")));
+
 	result = time + span->time;
 	result -= result / USECS_PER_DAY * USECS_PER_DAY;
 	if (result < INT64CONST(0))
@@ -2085,6 +2095,11 @@ time_mi_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeADT		result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("cannot subtract infinite interval from time")));
+
 	result = time - span->time;
 	result -= result / USECS_PER_DAY * USECS_PER_DAY;
 	if (result < INT64CONST(0))
@@ -2599,6 +2614,11 @@ timetz_pl_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeTzADT  *result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_

Re: Infinite Interval

2023-01-10 Thread Joseph Koshakow
On Sun, Jan 8, 2023 at 11:17 PM jian he  wrote:
>
>
>
> On Sun, Jan 8, 2023 at 4:22 AM Joseph Koshakow  wrote:
>>
>> On Sat, Jan 7, 2023 at 3:05 PM Joseph Koshakow  wrote:
>> >
>> > On Sat, Jan 7, 2023 at 3:04 PM Joseph Koshakow  wrote:
>> > >
>> > > I think this patch is just about ready for review, except for the
>> > > following two questions:
>> > >   1. Should finite checks on intervals only look at months or all three
>> > >   fields?
>> > >   2. Should we make the error messages for adding/subtracting infinite
>> > >   values more generic or leave them as is?
>> > >
>> > > My opinions are
>> > >   1. We should only look at months.
>> > >   2. We should make the errors more generic.
>> > >
>> > > Anyone else have any thoughts?
>>
>> Here's a patch with the more generic error messages.
>>
>> - Joe
>
>
> HI.
>
> I just found out another problem.
>
> select * from  generate_series(timestamp'-infinity', timestamp 'infinity', 
> interval 'infinity');
> ERROR:  timestamp out of range
>
> select * from  generate_series(timestamp'-infinity',timestamp 'infinity', 
> interval '-infinity'); --return following
>
>  generate_series
> -
> (0 rows)
>
>
> select * from generate_series(timestamp 'infinity',timestamp 'infinity', 
> interval 'infinity');
> --will run all the time.
>
> select * from  generate_series(timestamp 'infinity',timestamp 'infinity', 
> interval '-infinity');
> ERROR:  timestamp out of range
>
>  select * from  generate_series(timestamp'-infinity',timestamp'-infinity', 
> interval 'infinity');
> ERROR:  timestamp out of range
>
> select * from  generate_series(timestamp'-infinity',timestamp'-infinity', 
> interval '-infinity');
> --will run all the time.

Good catch, I didn't think to check non date/time functions.
Unfortunately, I think you may have opened Pandoras box. I went through
pg_proc.dat and found the following functions that all involve
intervals. We should probably investigate all of them and make sure
that they handle infinite intervals properly.

{ oid => '1026', descr => 'adjust timestamp to new time zone',
proname => 'timezone', prorettype => 'timestamp',
proargtypes => 'interval timestamptz', prosrc => 'timestamptz_izone' },

{ oid => '4133', descr => 'window RANGE support',
proname => 'in_range', prorettype => 'bool',
proargtypes => 'date date interval bool bool',
prosrc => 'in_range_date_interval' },

{ oid => '1305', descr => 'intervals overlap?',
proname => 'overlaps', prolang => 'sql', proisstrict => 'f',
provolatile => 's', prorettype => 'bool',
proargtypes => 'timestamptz interval timestamptz interval',
prosrc => 'see system_functions.sql' },

{ oid => '1305', descr => 'intervals overlap?',
proname => 'overlaps', prolang => 'sql', proisstrict => 'f',
provolatile => 's', prorettype => 'bool',
proargtypes => 'timestamptz interval timestamptz interval',
prosrc => 'see system_functions.sql' },
{ oid => '1306', descr => 'intervals overlap?',
proname => 'overlaps', prolang => 'sql', proisstrict => 'f',
provolatile => 's', prorettype => 'bool',
proargtypes => 'timestamptz timestamptz timestamptz interval',
prosrc => 'see system_functions.sql' },
{ oid => '1307', descr => 'intervals overlap?',
proname => 'overlaps', prolang => 'sql', proisstrict => 'f',
provolatile => 's', prorettype => 'bool',
proargtypes => 'timestamptz interval timestamptz timestamptz',
prosrc => 'see system_functions.sql' },

{ oid => '1308', descr => 'intervals overlap?',
proname => 'overlaps', proisstrict => 'f', prorettype => 'bool',
proargtypes => 'time time time time', prosrc => 'overlaps_time' },
{ oid => '1309', descr => 'intervals overlap?',
proname => 'overlaps', prolang => 'sql', proisstrict => 'f',
prorettype => 'bool', proargtypes => 'time interval time interval',
prosrc => 'see system_functions.sql' },
{ oid => '1310', descr => 'intervals overlap?',
proname => 'overlaps', prolang => 'sql', proisstrict => 'f',
prorettype => 'bool', proargtypes => 'time time time interval',
prosrc => 'see system_functions.sql' },
{ oid => '1311', descr => 'intervals overlap?',
proname => 'overlaps', prolang => 'sql', proisstrict => 'f',
prorettype => 'bool', proargtypes => 'time interval time time',
prosrc => 'see system_functions.sql' },

{ oid => '1386',
descr => 'date difference from today preserving months and years',
proname => 'age', prolang => 'sql', provolatile => 's',
prorettype => 'interval', proargtypes => 'timestamp

Re: Infinite Interval

2023-01-07 Thread Joseph Koshakow
On Sat, Jan 7, 2023 at 3:05 PM Joseph Koshakow  wrote:
>
> On Sat, Jan 7, 2023 at 3:04 PM Joseph Koshakow  wrote:
> >
> > I think this patch is just about ready for review, except for the
> > following two questions:
> >   1. Should finite checks on intervals only look at months or all three
> >   fields?
> >   2. Should we make the error messages for adding/subtracting infinite
> >   values more generic or leave them as is?
> >
> > My opinions are
> >   1. We should only look at months.
> >   2. We should make the errors more generic.
> >
> > Anyone else have any thoughts?

Here's a patch with the more generic error messages.

- Joe
From 6ed93bc20db57cea2d692e9288d97b66f4a526dc Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 17 Dec 2022 14:21:26 -0500
Subject: [PATCH] This is WIP.

TODOs
1. Should we just use the months field to test for infinity?

Ashutosh Bapat and Joe Koshakow and Jian He
---
 doc/src/sgml/datatype.sgml |   2 +-
 doc/src/sgml/func.sgml |   5 +-
 src/backend/utils/adt/date.c   |  20 ++
 src/backend/utils/adt/datetime.c   |  14 +-
 src/backend/utils/adt/timestamp.c  | 448 
 src/include/datatype/timestamp.h   |  21 ++
 src/test/regress/expected/horology.out |   6 +-
 src/test/regress/expected/interval.out | 466 +++--
 src/test/regress/sql/horology.sql  |   6 +-
 src/test/regress/sql/interval.sql  | 130 ++-
 10 files changed, 1002 insertions(+), 116 deletions(-)

diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index fdffba4442..2bcf959f70 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -2316,7 +2316,7 @@ TIMESTAMP WITH TIME ZONE '2004-10-19 10:23:54+02'
  
  
   infinity
-  date, timestamp
+  date, timestamp, interval
   later than all other time stamps
  
  
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 3bf8d021c3..7ddf76da4a 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -9369,7 +9369,7 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');
  boolean
 
 
- Test for finite interval (currently always true)
+ Test for finite interval (not +/-infinity)
 
 
  isfinite(interval '4 hours')
@@ -10256,7 +10256,8 @@ SELECT EXTRACT(YEAR FROM TIMESTAMP '2001-02-16 20:38:40');
  When the input value is +/-Infinity, extract returns
  +/-Infinity for monotonically-increasing fields (epoch,
  julian, year, isoyear,
- decade, century, and millennium).
+ decade, century, and millennium
+ for all types and hour and day just for interval).
  For other fields, NULL is returned.  PostgreSQL
  versions before 9.6 returned zero for all cases of infinite input.
 
diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c
index 99171d9c92..8334b9053f 100644
--- a/src/backend/utils/adt/date.c
+++ b/src/backend/utils/adt/date.c
@@ -2067,6 +2067,11 @@ time_pl_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeADT		result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("cannot add infinite interval to time")));
+
 	result = time + span->time;
 	result -= result / USECS_PER_DAY * USECS_PER_DAY;
 	if (result < INT64CONST(0))
@@ -2085,6 +2090,11 @@ time_mi_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeADT		result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("cannot subtract infinite interval from time")));
+
 	result = time - span->time;
 	result -= result / USECS_PER_DAY * USECS_PER_DAY;
 	if (result < INT64CONST(0))
@@ -2599,6 +2609,11 @@ timetz_pl_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeTzADT  *result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("cannot add infinite interval to time")));
+
 	result = (TimeTzADT *) palloc(sizeof(TimeTzADT));
 
 	result->time = time->time + span->time;
@@ -2621,6 +2636,11 @@ timetz_mi_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeTzADT  *result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("cannot subtract infinite interval from time")));
+
 	result = (TimeTzADT *) palloc(sizeof(TimeTzADT));
 
 	result->time = time->time - span->time;
diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index d166613895..4192e7a74b 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ 

Re: Infinite Interval

2023-01-07 Thread Joseph Koshakow
On Sat, Jan 7, 2023 at 3:04 PM Joseph Koshakow  wrote:
>
> On Thu, Jan 5, 2023 at 11:30 PM jian he  wrote:
> >
> >
> >
> > On Fri, Jan 6, 2023 at 6:54 AM Joseph Koshakow  wrote:
> >>
> >> Looks like some of the error messages have changed and we
> >> have some issues with parsing "+infinity" after rebasing.
> >
> >
> > There is a commit 2ceea5adb02603ef52579b568ca2c5aebed87358
> > https://git.postgresql.org/gitweb/?p=postgresql.git;a=commit;h=2ceea5adb02603ef52579b568ca2c5aebed87358
> > if you pull this commit then you can do select interval '+infinity', even 
> > though I don't know why.
>
> It turns out that I was just misreading the error. The test was
> expecting us to fail on "+infinity" but we succeeded. I just removed
> that test case.
>
> >> pgindent. Looks like some of the error messages have changed
>
> The conditions for checking valid addition/subtraction between infinite
> values were missing some cases which explains the change in error
> messages. I've updated the logic and removed duplicate checks.
>
> I removed the extract/date_part tests since they were duplicated in a
> test above. I also converted the DO command tests to using SQL with
> joins so it more closely matches the existing tests.
>
> I've updated the extract/date_part logic for infinite intervals. Fields
> that are monotonically increasing should return +/-infinity and all
> others should return NULL. For Intervals, the fields are the same as
> timestamps plus the hour and day fields since those don't overflow into
> the next highest field.
>
> I think this patch is just about ready for review, except for the
> following two questions:
>   1. Should finite checks on intervals only look at months or all three
>   fields?
>   2. Should we make the error messages for adding/subtracting infinite
>   values more generic or leave them as is?
>
> My opinions are
>   1. We should only look at months.
>   2. We should make the errors more generic.
>
> Anyone else have any thoughts?
>
> - Joe

Oops I forgot the actual patch. Please see attached.
From 4ea7c98d47dcbff1313a5013572cc79839e4417e Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 17 Dec 2022 14:21:26 -0500
Subject: [PATCH] This is WIP.

TODOs
1. Should we just use the months field to test for infinity?
2. Should the error messages for adding different sign infinties be "interval out of range"?

Ashutosh Bapat and Joe Koshakow and Jian He
---
 doc/src/sgml/datatype.sgml |   2 +-
 doc/src/sgml/func.sgml |   5 +-
 src/backend/utils/adt/date.c   |  20 ++
 src/backend/utils/adt/datetime.c   |  14 +-
 src/backend/utils/adt/timestamp.c  | 448 
 src/include/datatype/timestamp.h   |  21 ++
 src/test/regress/expected/horology.out |   6 +-
 src/test/regress/expected/interval.out | 466 +++--
 src/test/regress/sql/horology.sql  |   6 +-
 src/test/regress/sql/interval.sql  | 130 ++-
 10 files changed, 1002 insertions(+), 116 deletions(-)

diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index fdffba4442..2bcf959f70 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -2316,7 +2316,7 @@ TIMESTAMP WITH TIME ZONE '2004-10-19 10:23:54+02'
  
  
   infinity
-  date, timestamp
+  date, timestamp, interval
   later than all other time stamps
  
  
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 3bf8d021c3..7ddf76da4a 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -9369,7 +9369,7 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');
  boolean
 
 
- Test for finite interval (currently always true)
+ Test for finite interval (not +/-infinity)
 
 
  isfinite(interval '4 hours')
@@ -10256,7 +10256,8 @@ SELECT EXTRACT(YEAR FROM TIMESTAMP '2001-02-16 20:38:40');
  When the input value is +/-Infinity, extract returns
  +/-Infinity for monotonically-increasing fields (epoch,
  julian, year, isoyear,
- decade, century, and millennium).
+ decade, century, and millennium
+ for all types and hour and day just for interval).
  For other fields, NULL is returned.  PostgreSQL
  versions before 9.6 returned zero for all cases of infinite input.
 
diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c
index 99171d9c92..8334b9053f 100644
--- a/src/backend/utils/adt/date.c
+++ b/src/backend/utils/adt/date.c
@@ -2067,6 +2067,11 @@ time_pl_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeADT		result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(e

Re: Infinite Interval

2023-01-07 Thread Joseph Koshakow
On Thu, Jan 5, 2023 at 11:30 PM jian he  wrote:
>
>
>
> On Fri, Jan 6, 2023 at 6:54 AM Joseph Koshakow  wrote:
>>
>> Looks like some of the error messages have changed and we
>> have some issues with parsing "+infinity" after rebasing.
>
>
> There is a commit 2ceea5adb02603ef52579b568ca2c5aebed87358
> https://git.postgresql.org/gitweb/?p=postgresql.git;a=commit;h=2ceea5adb02603ef52579b568ca2c5aebed87358
> if you pull this commit then you can do select interval '+infinity', even 
> though I don't know why.

It turns out that I was just misreading the error. The test was
expecting us to fail on "+infinity" but we succeeded. I just removed
that test case.

>> pgindent. Looks like some of the error messages have changed

The conditions for checking valid addition/subtraction between infinite
values were missing some cases which explains the change in error
messages. I've updated the logic and removed duplicate checks.

I removed the extract/date_part tests since they were duplicated in a
test above. I also converted the DO command tests to using SQL with
joins so it more closely matches the existing tests.

I've updated the extract/date_part logic for infinite intervals. Fields
that are monotonically increasing should return +/-infinity and all
others should return NULL. For Intervals, the fields are the same as
timestamps plus the hour and day fields since those don't overflow into
the next highest field.

I think this patch is just about ready for review, except for the
following two questions:
  1. Should finite checks on intervals only look at months or all three
  fields?
  2. Should we make the error messages for adding/subtracting infinite
  values more generic or leave them as is?

My opinions are
  1. We should only look at months.
  2. We should make the errors more generic.

Anyone else have any thoughts?

- Joe




Re: Infinite Interval

2023-01-05 Thread Joseph Koshakow
Jian,

I incorporated your changes and updated interval.out and ran
pgindent. Looks like some of the error messages have changed and we
have some issues with parsing "+infinity" after rebasing.

- Joe
From 4bf672f9079322cffde635dff2078582fca55f09 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 17 Dec 2022 14:21:26 -0500
Subject: [PATCH] This is WIP.

TODOs
1. Various TODOs in code.
2. Correctly implement interval_part for infinite intervals.
3. Fix Tests.
4. Should we just use the months field to test for infinity?
5. Update docs

Ashutosh Bapat and Joe Koshakow
---
 src/backend/utils/adt/date.c   |  20 +
 src/backend/utils/adt/datetime.c   |  14 +-
 src/backend/utils/adt/timestamp.c  | 425 ---
 src/include/datatype/timestamp.h   |  22 +
 src/test/regress/expected/horology.out |   6 +-
 src/test/regress/expected/interval.out | 691 +++--
 src/test/regress/sql/horology.sql  |   6 +-
 src/test/regress/sql/interval.sql  | 191 ++-
 8 files changed, 1264 insertions(+), 111 deletions(-)

diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c
index 99171d9c92..8334b9053f 100644
--- a/src/backend/utils/adt/date.c
+++ b/src/backend/utils/adt/date.c
@@ -2067,6 +2067,11 @@ time_pl_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeADT		result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("cannot add infinite interval to time")));
+
 	result = time + span->time;
 	result -= result / USECS_PER_DAY * USECS_PER_DAY;
 	if (result < INT64CONST(0))
@@ -2085,6 +2090,11 @@ time_mi_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeADT		result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("cannot subtract infinite interval from time")));
+
 	result = time - span->time;
 	result -= result / USECS_PER_DAY * USECS_PER_DAY;
 	if (result < INT64CONST(0))
@@ -2599,6 +2609,11 @@ timetz_pl_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeTzADT  *result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("cannot add infinite interval to time")));
+
 	result = (TimeTzADT *) palloc(sizeof(TimeTzADT));
 
 	result->time = time->time + span->time;
@@ -2621,6 +2636,11 @@ timetz_mi_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeTzADT  *result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("cannot subtract infinite interval from time")));
+
 	result = (TimeTzADT *) palloc(sizeof(TimeTzADT));
 
 	result->time = time->time - span->time;
diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index d166613895..4192e7a74b 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -70,7 +70,7 @@ static bool DetermineTimeZoneAbbrevOffsetInternal(pg_time_t t,
   const char *abbr, pg_tz *tzp,
   int *offset, int *isdst);
 static pg_tz *FetchDynamicTimeZone(TimeZoneAbbrevTable *tbl, const datetkn *tp,
-   DateTimeErrorExtra *extra);
+   DateTimeErrorExtra * extra);
 
 
 const int	day_tab[2][13] =
@@ -978,7 +978,7 @@ ParseDateTime(const char *timestr, char *workbuf, size_t buflen,
 int
 DecodeDateTime(char **field, int *ftype, int nf,
 			   int *dtype, struct pg_tm *tm, fsec_t *fsec, int *tzp,
-			   DateTimeErrorExtra *extra)
+			   DateTimeErrorExtra * extra)
 {
 	int			fmask = 0,
 tmask,
@@ -1928,7 +1928,7 @@ DetermineTimeZoneAbbrevOffsetInternal(pg_time_t t, const char *abbr, pg_tz *tzp,
 int
 DecodeTimeOnly(char **field, int *ftype, int nf,
 			   int *dtype, struct pg_tm *tm, fsec_t *fsec, int *tzp,
-			   DateTimeErrorExtra *extra)
+			   DateTimeErrorExtra * extra)
 {
 	int			fmask = 0,
 tmask,
@@ -3233,7 +3233,7 @@ DecodeTimezone(const char *str, int *tzp)
 int
 DecodeTimezoneAbbrev(int field, const char *lowtoken,
 	 int *ftype, int *offset, pg_tz **tz,
-	 DateTimeErrorExtra *extra)
+	 DateTimeErrorExtra * extra)
 {
 	const datetkn *tp;
 
@@ -3635,6 +3635,8 @@ DecodeInterval(char **field, int *ftype, int nf, int range,
 			case DTK_STRING:
 			case DTK_SPECIAL:
 type = DecodeUnits(i, field[i], );
+if (type == UNKNOWN_FIELD)
+	type = DecodeSpecial(i, field[i], );
 if (type == IGNORE_DTF)
 	continue;
 
@@ -4040,7 +4042,7 @@ DecodeUnits(int field, const char *lowtoken, int *val)
  * separate SQLSTATE codes, so ...
  */
 void
-DateTimeParseError(int dterr, DateTimeErrorExtra *extra,
+DateTimeParseError(int dterr, DateTimeErrorExtra * extra,
    const char *str, const char *datatype,
    Node *escontext)
 {
@@ -4919,7 +4921,7 @@ InstallTime

Re: Infinite Interval

2023-01-05 Thread Joseph Koshakow
On Thu, Jan 5, 2023 at 5:20 AM jian he  wrote:
>
>
>
> On Wed, Jan 4, 2023 at 10:13 PM jian he  wrote:
>>
>>
>>
>> I don't know how to generate an interval.out file.

Personally I just write the .out files manually. I think it especially
helps as a way to double-check that the results are what you expected.
After running make check a regressions.diff file will be generated with
all the differences between your .out file and the results of the test.


> logic combine and clean up for functions in backend/utils/adt/timestamp.c 
> (timestamp_pl_interval,timestamptz_pl_interval, interval_pl, interval_mi).

One thing I was hoping to achieve was to avoid redundant checks if
possible. For example, in the following code:
> +if ((INTERVAL_IS_NOBEGIN(span1) && INTERVAL_IS_NOEND(span2))
> +  ||(INTERVAL_IS_NOBEGIN(span1) && !INTERVAL_NOT_FINITE(span2))
> +  ||(!INTERVAL_NOT_FINITE(span1) && INTERVAL_IS_NOEND(span2)))
> +   INTERVAL_NOBEGIN(result);
If `(INTERVAL_IS_NOBEGIN(span1) && INTERVAL_IS_NOEND(span2))` is false,
then we end up checking `INTERVAL_IS_NOBEGIN(span1)` twice

> For 1. I don't know how to format the code. I have a problem installing 
> pg_indent. If the format is wrong, please reformat.

I'll run pg_indent and send an updated patch if anything changes.

Thanks for your help on this patch!

- Joe Koshakow




Re: Infinite Interval

2023-01-02 Thread Joseph Koshakow
I have another patch, this one adds validations to operations that
return intervals and updated error messages. I tried to give all of the
error messages meaningful text, but I'm starting to think that almost all
of them should just say "interval out of range". The current approach
may reveal some implementation details and lead to confusion. For
example, some subtractions are converted to additions which would lead
to an error message about addition.

SELECT date 'infinity' - interval 'infinity';
ERROR:  cannot add infinite values with opposite signs

I've also updated the commit message to include the remaining TODOs,
which I've copied below

  1. Various TODOs in code.
  2. Correctly implement interval_part for infinite intervals.
  3. Test consolidation.
  4. Should we just use the months field to test for infinity?
From 65aceb25bc090375b60d140b1630cabcc90f1c9c Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 17 Dec 2022 14:21:26 -0500
Subject: [PATCH] This is WIP.

TODOs
1. Various TODOs in code.
2. Correctly implement interval_part for infinite intervals.
3. Test consolidation.
4. Should we just use the months field to test for infinity?

Ashutosh Bapat and Joe Koshakow
---
 src/backend/utils/adt/date.c   |   20 +
 src/backend/utils/adt/datetime.c   |   14 +-
 src/backend/utils/adt/timestamp.c  |  372 -
 src/include/datatype/timestamp.h   |   22 +
 src/test/regress/expected/horology.out |6 +-
 src/test/regress/expected/interval.out | 1006 +++-
 src/test/regress/sql/horology.sql  |6 +-
 src/test/regress/sql/interval.sql  |  200 -
 8 files changed, 1571 insertions(+), 75 deletions(-)

diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c
index 1cf7c7652d..c6259cd9c1 100644
--- a/src/backend/utils/adt/date.c
+++ b/src/backend/utils/adt/date.c
@@ -2073,6 +2073,11 @@ time_pl_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeADT		result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("cannot add infinite interval to time")));
+
 	result = time + span->time;
 	result -= result / USECS_PER_DAY * USECS_PER_DAY;
 	if (result < INT64CONST(0))
@@ -2091,6 +2096,11 @@ time_mi_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeADT		result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("cannot subtract infinite interval from time")));
+
 	result = time - span->time;
 	result -= result / USECS_PER_DAY * USECS_PER_DAY;
 	if (result < INT64CONST(0))
@@ -2605,6 +2615,11 @@ timetz_pl_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeTzADT  *result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("cannot add infinite interval to time")));
+
 	result = (TimeTzADT *) palloc(sizeof(TimeTzADT));
 
 	result->time = time->time + span->time;
@@ -2627,6 +2642,11 @@ timetz_mi_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeTzADT  *result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("cannot subtract infinite interval from time")));
+
 	result = (TimeTzADT *) palloc(sizeof(TimeTzADT));
 
 	result->time = time->time - span->time;
diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index b5b117a8ca..b60d91dfb8 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -70,7 +70,7 @@ static bool DetermineTimeZoneAbbrevOffsetInternal(pg_time_t t,
   const char *abbr, pg_tz *tzp,
   int *offset, int *isdst);
 static pg_tz *FetchDynamicTimeZone(TimeZoneAbbrevTable *tbl, const datetkn *tp,
-   DateTimeErrorExtra *extra);
+   DateTimeErrorExtra * extra);
 
 
 const int	day_tab[2][13] =
@@ -977,7 +977,7 @@ ParseDateTime(const char *timestr, char *workbuf, size_t buflen,
 int
 DecodeDateTime(char **field, int *ftype, int nf,
 			   int *dtype, struct pg_tm *tm, fsec_t *fsec, int *tzp,
-			   DateTimeErrorExtra *extra)
+			   DateTimeErrorExtra * extra)
 {
 	int			fmask = 0,
 tmask,
@@ -1927,7 +1927,7 @@ DetermineTimeZoneAbbrevOffsetInternal(pg_time_t t, const char *abbr, pg_tz *tzp,
 int
 DecodeTimeOnly(char **field, int *ftype, int nf,
 			   int *dtype, struct pg_tm *tm, fsec_t *fsec, int *tzp,
-			   DateTimeErrorExtra *extra)
+			   DateTimeErrorExtra * extra)
 {
 	int			fmask = 0,
 tmask,
@@ -3232,7 +3232,7 @@ DecodeTimezone(const char *str, int *tzp)
 int
 DecodeTimezoneAbbrev(int field, const char *lowtoken,
 	 int *ftype, int *offset, pg_tz **tz,
-	 DateTimeErrorExtra *extra)
+	 DateTimeErrorExtra * extra)
 {
 	const datetkn *tp;
 

Re: Infinite Interval

2023-01-02 Thread Joseph Koshakow
On Mon, Jan 2, 2023 at 1:21 PM Joseph Koshakow  wrote:
>
> On Sat, Dec 31, 2022 at 12:09 AM jian he  wrote:
> > In float8, select float8 'inf' / float8 'inf' return NaN. Now in your patch 
> >  select interval 'infinity' / float8 'infinity'; returns infinity.
> > I am not sure it's right. I found this related post 
> > (https://math.stackexchange.com/questions/181304/what-is-infinity-divided-by-infinity).
>
> Good point, I agree this should return an error. We also need to
> properly handle multiplication and division of infinite intervals by
> float8 'nan'. My patch is returning an infinite interval, but it should
> be returning an error. I'll upload a new patch shortly.
>
> - Joe

Attached is the patch to handle these scenarios. Apparently dividing by
NaN is currently broken:
postgres=# SELECT INTERVAL '1 day' / float8 'nan';
 ?column?
---
 -178956970 years -8 mons -2562047788:00:54.775808
(1 row)

This patch will fix the issue, but we may want a separate patch that
handles this specific, existing issue. Any thoughts?

- Joe
From 2110bbe8be4b1c5c66eb48c35b958d84352a6287 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 17 Dec 2022 14:21:26 -0500
Subject: [PATCH] This is WIP.

Following things are supported
1. Accepts '+/-infinity' as a valid string input for interval type.
2. Support interval_pl, interval_div
3. Tests in interval.sql for comparison operators working fine.

TODOs
1. Various TODOs in code
2. interval_pl: how to handle infinite values with opposite signs
3. timestamp, timestamptz, date and time arithmetic
4. Fix horology test.

Ashutosh Bapat
---
 src/backend/utils/adt/date.c   |  20 +
 src/backend/utils/adt/datetime.c   |  14 +-
 src/backend/utils/adt/timestamp.c  | 347 -
 src/include/datatype/timestamp.h   |  22 +
 src/test/regress/expected/horology.out |   6 +-
 src/test/regress/expected/interval.out | 993 -
 src/test/regress/sql/horology.sql  |   6 +-
 src/test/regress/sql/interval.sql  | 194 -
 8 files changed, 1527 insertions(+), 75 deletions(-)

diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c
index 1cf7c7652d..c6259cd9c1 100644
--- a/src/backend/utils/adt/date.c
+++ b/src/backend/utils/adt/date.c
@@ -2073,6 +2073,11 @@ time_pl_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeADT		result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("cannot add infinite interval to time")));
+
 	result = time + span->time;
 	result -= result / USECS_PER_DAY * USECS_PER_DAY;
 	if (result < INT64CONST(0))
@@ -2091,6 +2096,11 @@ time_mi_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeADT		result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("cannot subtract infinite interval from time")));
+
 	result = time - span->time;
 	result -= result / USECS_PER_DAY * USECS_PER_DAY;
 	if (result < INT64CONST(0))
@@ -2605,6 +2615,11 @@ timetz_pl_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeTzADT  *result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("cannot add infinite interval to time")));
+
 	result = (TimeTzADT *) palloc(sizeof(TimeTzADT));
 
 	result->time = time->time + span->time;
@@ -2627,6 +2642,11 @@ timetz_mi_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeTzADT  *result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("cannot subtract infinite interval from time")));
+
 	result = (TimeTzADT *) palloc(sizeof(TimeTzADT));
 
 	result->time = time->time - span->time;
diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index b5b117a8ca..b60d91dfb8 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -70,7 +70,7 @@ static bool DetermineTimeZoneAbbrevOffsetInternal(pg_time_t t,
   const char *abbr, pg_tz *tzp,
   int *offset, int *isdst);
 static pg_tz *FetchDynamicTimeZone(TimeZoneAbbrevTable *tbl, const datetkn *tp,
-   DateTimeErrorExtra *extra);
+   DateTimeErrorExtra * extra);
 
 
 const int	day_tab[2][13] =
@@ -977,7 +977,7 @@ ParseDateTime(const char *timestr, char *workbuf, size_t buflen,
 int
 DecodeDateTime(char **field, int *ftype, int nf,
 			   int *dtype, struct pg_tm *tm, fsec_t *fsec, int *tzp,
-			   DateTimeErrorExtra *extra)
+			   DateTimeErrorExtra * extra)
 {
 	int			fmask = 0,
 tmask,
@@ -1927,7 +1927,7 @@ DetermineTimeZoneAbbrevOffsetInternal(pg_time_t t, const char *a

Re: Infinite Interval

2023-01-02 Thread Joseph Koshakow
On Sat, Dec 31, 2022 at 12:09 AM jian he  wrote:
> In float8, select float8 'inf' / float8 'inf' return NaN. Now in your patch  
> select interval 'infinity' / float8 'infinity'; returns infinity.
> I am not sure it's right. I found this related post 
> (https://math.stackexchange.com/questions/181304/what-is-infinity-divided-by-infinity).

Good point, I agree this should return an error. We also need to
properly handle multiplication and division of infinite intervals by
float8 'nan'. My patch is returning an infinite interval, but it should
be returning an error. I'll upload a new patch shortly.

- Joe




Re: Infinite Interval

2022-12-30 Thread Joseph Koshakow
I have another update, I cleaned up some of the error messages, fixed
the horology tests, and ran pgindent.

- Joe
From 518c59be586abf5779c5727c2117b6a46b466503 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 17 Dec 2022 14:21:26 -0500
Subject: [PATCH] This is WIP.

Following things are supported
1. Accepts '+/-infinity' as a valid string input for interval type.
2. Support interval_pl, interval_div
3. Tests in interval.sql for comparison operators working fine.

TODOs
1. Various TODOs in code
2. interval_pl: how to handle infinite values with opposite signs
3. timestamp, timestamptz, date and time arithmetic
4. Fix horology test.

Ashutosh Bapat
---
 src/backend/utils/adt/date.c   |  20 +
 src/backend/utils/adt/datetime.c   |  14 +-
 src/backend/utils/adt/timestamp.c  | 332 -
 src/include/datatype/timestamp.h   |  22 +
 src/test/regress/expected/horology.out |   6 +-
 src/test/regress/expected/interval.out | 953 -
 src/test/regress/sql/horology.sql  |   6 +-
 src/test/regress/sql/interval.sql  | 182 -
 8 files changed, 1460 insertions(+), 75 deletions(-)

diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c
index 1cf7c7652d..c6259cd9c1 100644
--- a/src/backend/utils/adt/date.c
+++ b/src/backend/utils/adt/date.c
@@ -2073,6 +2073,11 @@ time_pl_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeADT		result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("cannot add infinite interval to time")));
+
 	result = time + span->time;
 	result -= result / USECS_PER_DAY * USECS_PER_DAY;
 	if (result < INT64CONST(0))
@@ -2091,6 +2096,11 @@ time_mi_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeADT		result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("cannot subtract infinite interval from time")));
+
 	result = time - span->time;
 	result -= result / USECS_PER_DAY * USECS_PER_DAY;
 	if (result < INT64CONST(0))
@@ -2605,6 +2615,11 @@ timetz_pl_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeTzADT  *result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("cannot add infinite interval to time")));
+
 	result = (TimeTzADT *) palloc(sizeof(TimeTzADT));
 
 	result->time = time->time + span->time;
@@ -2627,6 +2642,11 @@ timetz_mi_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeTzADT  *result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("cannot subtract infinite interval from time")));
+
 	result = (TimeTzADT *) palloc(sizeof(TimeTzADT));
 
 	result->time = time->time - span->time;
diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index b5b117a8ca..b60d91dfb8 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -70,7 +70,7 @@ static bool DetermineTimeZoneAbbrevOffsetInternal(pg_time_t t,
   const char *abbr, pg_tz *tzp,
   int *offset, int *isdst);
 static pg_tz *FetchDynamicTimeZone(TimeZoneAbbrevTable *tbl, const datetkn *tp,
-   DateTimeErrorExtra *extra);
+   DateTimeErrorExtra * extra);
 
 
 const int	day_tab[2][13] =
@@ -977,7 +977,7 @@ ParseDateTime(const char *timestr, char *workbuf, size_t buflen,
 int
 DecodeDateTime(char **field, int *ftype, int nf,
 			   int *dtype, struct pg_tm *tm, fsec_t *fsec, int *tzp,
-			   DateTimeErrorExtra *extra)
+			   DateTimeErrorExtra * extra)
 {
 	int			fmask = 0,
 tmask,
@@ -1927,7 +1927,7 @@ DetermineTimeZoneAbbrevOffsetInternal(pg_time_t t, const char *abbr, pg_tz *tzp,
 int
 DecodeTimeOnly(char **field, int *ftype, int nf,
 			   int *dtype, struct pg_tm *tm, fsec_t *fsec, int *tzp,
-			   DateTimeErrorExtra *extra)
+			   DateTimeErrorExtra * extra)
 {
 	int			fmask = 0,
 tmask,
@@ -3232,7 +3232,7 @@ DecodeTimezone(const char *str, int *tzp)
 int
 DecodeTimezoneAbbrev(int field, const char *lowtoken,
 	 int *ftype, int *offset, pg_tz **tz,
-	 DateTimeErrorExtra *extra)
+	 DateTimeErrorExtra * extra)
 {
 	const datetkn *tp;
 
@@ -3634,6 +3634,8 @@ DecodeInterval(char **field, int *ftype, int nf, int range,
 			case DTK_STRING:
 			case DTK_SPECIAL:
 type = DecodeUnits(i, field[i], );
+if (type == UNKNOWN_FIELD)
+	type = DecodeSpecial(i, field[i], );
 if (type == IGNORE_DTF)
 	continue;
 
@@ -4039,7 +4041,7 @@ DecodeUnits(int field, const char *lowtoken, int *val)
  * separate SQLSTATE codes, so ...
  */
 void
-DateTimeParseError(int dterr, DateTimeErrorExtra *extra,
+DateTimeParseError(int dterr, DateTimeErrorExtra * extra,
    const char *str

Re: Infinite Interval

2022-12-23 Thread Joseph Koshakow
Hi Ashutosh,

I ended up doing some more work on this today. All of the major
features should be implemented now. Below are what I think are the
outstanding TODOs:
- Clean up error messages and error codes
- Figure out how to correctly implement interval_part for infinite
intervals. For now I pretty much copied the implementation of
timestamp_part, but I'm not convinced that's correct.
- Fix horology tests.
- Test consolidation. After looking through the interval tests, I
realized that I may have duplicated some test cases. It would probably
be best to remove those duplicate tests.
- General cleanup, remove TODOs.

Attached is my most recent patch.

- Joe Koshakow
From 380cde4061afd6eed4cde938a4c668a2c96bb58f Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 17 Dec 2022 14:21:26 -0500
Subject: [PATCH] This is WIP.

Following things are supported
1. Accepts '+/-infinity' as a valid string input for interval type.
2. Support interval_pl, interval_div
3. Tests in interval.sql for comparison operators working fine.

TODOs
1. Various TODOs in code
2. interval_pl: how to handle infinite values with opposite signs
3. timestamp, timestamptz, date and time arithmetic
4. Fix horology test.

Ashutosh Bapat
---
 src/backend/utils/adt/date.c   |  20 +
 src/backend/utils/adt/datetime.c   |   2 +
 src/backend/utils/adt/timestamp.c  | 330 -
 src/include/datatype/timestamp.h   |  22 +
 src/test/regress/expected/interval.out | 953 -
 src/test/regress/sql/interval.sql  | 182 -
 6 files changed, 1442 insertions(+), 67 deletions(-)

diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c
index 1cf7c7652d..a2c9214bcf 100644
--- a/src/backend/utils/adt/date.c
+++ b/src/backend/utils/adt/date.c
@@ -2073,6 +2073,11 @@ time_pl_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeADT		result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("TODO")));
+
 	result = time + span->time;
 	result -= result / USECS_PER_DAY * USECS_PER_DAY;
 	if (result < INT64CONST(0))
@@ -2091,6 +2096,11 @@ time_mi_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeADT		result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("TODO")));
+
 	result = time - span->time;
 	result -= result / USECS_PER_DAY * USECS_PER_DAY;
 	if (result < INT64CONST(0))
@@ -2605,6 +2615,11 @@ timetz_pl_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeTzADT  *result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("TODO")));
+
 	result = (TimeTzADT *) palloc(sizeof(TimeTzADT));
 
 	result->time = time->time + span->time;
@@ -2627,6 +2642,11 @@ timetz_mi_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeTzADT  *result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("TODO")));
+
 	result = (TimeTzADT *) palloc(sizeof(TimeTzADT));
 
 	result->time = time->time - span->time;
diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index b5b117a8ca..1e98c6dc78 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -3634,6 +3634,8 @@ DecodeInterval(char **field, int *ftype, int nf, int range,
 			case DTK_STRING:
 			case DTK_SPECIAL:
 type = DecodeUnits(i, field[i], );
+if (type == UNKNOWN_FIELD)
+	type = DecodeSpecial(i, field[i], );
 if (type == IGNORE_DTF)
 	continue;
 
diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
index 3f2508c0c4..d108057ce5 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -79,6 +79,8 @@ static bool AdjustIntervalForTypmod(Interval *interval, int32 typmod,
 static TimestampTz timestamp2timestamptz(Timestamp timestamp);
 static Timestamp timestamptz2timestamp(TimestampTz timestamp);
 
+static void EncodeSpecialInterval(Interval *interval, char *str);
+static void negate_interval(Interval *interval, Interval *result);
 
 /* common code for timestamptypmodin and timestamptztypmodin */
 static int32
@@ -943,6 +945,14 @@ interval_in(PG_FUNCTION_ARGS)
 		 errmsg("interval out of range")));
 			break;
 
+		case DTK_LATE:
+			INTERVAL_NOEND(result);
+			break;
+
+		case DTK_EARLY:
+			INTERVAL_NOBEGIN(result);
+			break;
+
 		default:
 			elog(ERROR, "unexpected dtype %d while parsing interval \"%s\"",
  dtype, str);
@@ -965,8 +975,13 @@ interval_out(PG_FUNCTION_ARGS)
 			   *itm = 
 	char		buf[MAXDATELEN + 1];
 
-	interval2itm(*span, itm);
-	EncodeInterval(itm, IntervalStyle, buf);
+	if (INTERVAL_NOT_FINITE(span))
+		EncodeSpecialIn

Re: Infinite Interval

2022-12-17 Thread Joseph Koshakow
On Sat, Dec 17, 2022 at 2:34 PM Joseph Koshakow  wrote:
>
> Hi Ashutosh,
>
> I've added tests for all the operators and functions involving
> intervals and what I think the expected behaviors to be. The
> formatting might be slightly off and I've left the contents of the
> error messages as TODOs. Hopefully it's a good reference for the
> implementation.
>
> > Adding infinite interval to an infinite timestamp with opposite
> > direction is not going to yield 0 but some infinity. Since we are adding
> > interval to the timestamp the resultant timestamp is an infinity
> > preserving the direction.
>
> I think I disagree with this. Tom Lane in one of the previous threads
> said:
> > tl;dr: we should model it after the behavior of IEEE float infinities,
> > except we'll want to throw errors where those produce NaNs.
> and I agree with this opinion. I believe that means that adding an
> infinite interval to an infinite timestamp with opposite directions
> should yield an error instead of some infinity. Since with floats this
> would yield a NaN.
>
> > Dividing infinite interval by finite number keeps it infinite.
> > TODO: Do we change the sign of infinity if factor is negative?
> Again if we model this after the IEEE float behavior, then the answer
> is yes, we do change the sign of infinity.
>
> - Joe Koshakow
I ended up doing some more work in the attached patch. Here are some
updates:

- I modified the arithmetic operators to more closely match IEEE
floats. Error messages are still all TODO, and they may have the wrong
error code.
- I implemented some more operators and functions.
- I moved the helper functions you created into macros in timestamp.h
to more closely match the implementation of infinite timestamps and
dates. Also so dates.c could access them.
- There seems to be an existing overflow error with interval
subtraction. Many of the arithmetic operators of the form
`X - Interval` are converted to `X + (-Interval)`. This will overflow
in the case that some interval field is INT32_MIN or INT64_MIN.
Additionally, negating a positive infinity interval won't result in a
negative infinity interval and vice versa. We'll have to come up with
an efficient solution for this.

- Joe Koshakow
From e6e764dd8f8423f2aec0fb3782f170c59557adf6 Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 17 Dec 2022 14:21:26 -0500
Subject: [PATCH] This is WIP.

Following things are supported
1. Accepts '+/-infinity' as a valid string input for interval type.
2. Support interval_pl, interval_div
3. Tests in interval.sql for comparison operators working fine.

TODOs
1. Various TODOs in code
2. interval_pl: how to handle infinite values with opposite signs
3. timestamp, timestamptz, date and time arithmetic
4. Fix horology test.

Ashutosh Bapat
---
 src/backend/utils/adt/date.c   |  20 +
 src/backend/utils/adt/datetime.c   |   2 +
 src/backend/utils/adt/timestamp.c  | 188 +++-
 src/include/datatype/timestamp.h   |  22 +
 src/test/regress/expected/interval.out | 613 -
 src/test/regress/sql/interval.sql  | 121 +
 6 files changed, 949 insertions(+), 17 deletions(-)

diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c
index 1cf7c7652d..a2c9214bcf 100644
--- a/src/backend/utils/adt/date.c
+++ b/src/backend/utils/adt/date.c
@@ -2073,6 +2073,11 @@ time_pl_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeADT		result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("TODO")));
+
 	result = time + span->time;
 	result -= result / USECS_PER_DAY * USECS_PER_DAY;
 	if (result < INT64CONST(0))
@@ -2091,6 +2096,11 @@ time_mi_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeADT		result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("TODO")));
+
 	result = time - span->time;
 	result -= result / USECS_PER_DAY * USECS_PER_DAY;
 	if (result < INT64CONST(0))
@@ -2605,6 +2615,11 @@ timetz_pl_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeTzADT  *result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("TODO")));
+
 	result = (TimeTzADT *) palloc(sizeof(TimeTzADT));
 
 	result->time = time->time + span->time;
@@ -2627,6 +2642,11 @@ timetz_mi_interval(PG_FUNCTION_ARGS)
 	Interval   *span = PG_GETARG_INTERVAL_P(1);
 	TimeTzADT  *result;
 
+	if (INTERVAL_NOT_FINITE(span))
+		ereport(ERROR,
+(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("TODO")));
+
 	result = (TimeTzADT *) palloc(sizeof(TimeTzADT));
 
 	result->time = time->time - span->time;
diff --git a/src/backend/utils/a

Re: Infinite Interval

2022-12-17 Thread Joseph Koshakow
Hi Ashutosh,

I've added tests for all the operators and functions involving
intervals and what I think the expected behaviors to be. The
formatting might be slightly off and I've left the contents of the
error messages as TODOs. Hopefully it's a good reference for the
implementation.

> Adding infinite interval to an infinite timestamp with opposite
> direction is not going to yield 0 but some infinity. Since we are adding
> interval to the timestamp the resultant timestamp is an infinity
> preserving the direction.

I think I disagree with this. Tom Lane in one of the previous threads
said:
> tl;dr: we should model it after the behavior of IEEE float infinities,
> except we'll want to throw errors where those produce NaNs.
and I agree with this opinion. I believe that means that adding an
infinite interval to an infinite timestamp with opposite directions
should yield an error instead of some infinity. Since with floats this
would yield a NaN.

> Dividing infinite interval by finite number keeps it infinite.
> TODO: Do we change the sign of infinity if factor is negative?
Again if we model this after the IEEE float behavior, then the answer
is yes, we do change the sign of infinity.

- Joe Koshakow
From 4c1be4e2aa7abd56967fdce14b100715f3a63fee Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 17 Dec 2022 14:21:26 -0500
Subject: [PATCH] This is WIP.

Following things are supported
1. Accepts '+/-infinity' as a valid string input for interval type.
2. Support interval_pl, interval_div
3. Tests in interval.sql for comparison operators working fine.

TODOs
1. Various TODOs in code
2. interval_pl: how to handle infinite values with opposite signs
3. timestamp, timestamptz, date and time arithmetic
4. Fix horology test.

Ashutosh Bapat
---
 src/backend/utils/adt/datetime.c   |   2 +
 src/backend/utils/adt/timestamp.c  | 166 +++-
 src/test/regress/expected/interval.out | 565 -
 src/test/regress/sql/interval.sql  | 105 +
 4 files changed, 824 insertions(+), 14 deletions(-)

diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index b5b117a8ca..1e98c6dc78 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -3634,6 +3634,8 @@ DecodeInterval(char **field, int *ftype, int nf, int range,
 			case DTK_STRING:
 			case DTK_SPECIAL:
 type = DecodeUnits(i, field[i], );
+if (type == UNKNOWN_FIELD)
+	type = DecodeSpecial(i, field[i], );
 if (type == IGNORE_DTF)
 	continue;
 
diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
index 3f2508c0c4..0c7286b06e 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -79,6 +79,12 @@ static bool AdjustIntervalForTypmod(Interval *interval, int32 typmod,
 static TimestampTz timestamp2timestamptz(Timestamp timestamp);
 static Timestamp timestamptz2timestamp(TimestampTz timestamp);
 
+static void EncodeSpecialInterval(Interval *interval, char *str);
+static void interval_noend(Interval *interval);
+static bool interval_is_noend(Interval *interval);
+static void interval_nobegin(Interval *interval);
+static bool interval_is_nobegin(Interval *interval);
+static bool interval_not_finite(Interval *interval);
 
 /* common code for timestamptypmodin and timestamptztypmodin */
 static int32
@@ -943,6 +949,14 @@ interval_in(PG_FUNCTION_ARGS)
 		 errmsg("interval out of range")));
 			break;
 
+		case DTK_LATE:
+			interval_noend(result);
+			break;
+
+		case DTK_EARLY:
+			interval_nobegin(result);
+			break;
+
 		default:
 			elog(ERROR, "unexpected dtype %d while parsing interval \"%s\"",
  dtype, str);
@@ -965,8 +979,13 @@ interval_out(PG_FUNCTION_ARGS)
 			   *itm = 
 	char		buf[MAXDATELEN + 1];
 
-	interval2itm(*span, itm);
-	EncodeInterval(itm, IntervalStyle, buf);
+	if (interval_not_finite(span))
+		EncodeSpecialInterval(span, buf);
+	else
+	{
+		interval2itm(*span, itm);
+		EncodeInterval(itm, IntervalStyle, buf);
+	}
 
 	result = pstrdup(buf);
 	PG_RETURN_CSTRING(result);
@@ -1352,6 +1371,13 @@ AdjustIntervalForTypmod(Interval *interval, int32 typmod,
 		INT64CONST(0)
 	};
 
+	/*
+	 * Infinite interval after being subjected to typmod conversion remains
+	 * infinite.
+	 */
+	if (interval_not_finite(interval))
+		return;
+
 	/*
 	 * Unspecified range and precision? Then not necessary to adjust. Setting
 	 * typmod to -1 is the convention for all data types.
@@ -1545,6 +1571,17 @@ EncodeSpecialTimestamp(Timestamp dt, char *str)
 		elog(ERROR, "invalid argument for EncodeSpecialTimestamp");
 }
 
+static void
+EncodeSpecialInterval(Interval *interval, char *str)
+{
+	if (interval_is_nobegin(interval))
+		strcpy(str, EARLY);
+	else if (interval_is_noend(interval))
+		strcpy(str, LATE);
+	else		/* shouldn't happen */
+		elog(ERROR, "invalid argument for EncodeSpecialInterval");
+}
+
 Datum
 now(PG_F

Re: Infinite Interval

2022-12-15 Thread Joseph Koshakow
On Mon, Dec 12, 2022 at 8:05 AM Ashutosh Bapat
 wrote:
>
> Hi Joseph,
> I stumbled upon this requirement a few times. So I started working on
> this support in my spare time as a hobby project to understand
> horology code in PostgreSQL. This was sitting in my repositories for
> more than an year. Now that I have someone else showing an interest,
> it's time for it to face the world. Rebased it, fixed conflicts.
>
> PFA patch implementing infinite interval. It's still WIP, there are
> TODOs in the code and also the commit message lists things that are
> known to be incomplete. You might want to assess expected output
> carefully

That's great! I was also planning to just work on it as a hobby
project, so I'll try and review and add updates as I find free
time as well.

> > The proposed design from the most recent thread was to reserve
> > INT32_MAX months for infinity and INT32_MIN months for negative
> > infinity. As pointed out in the thread, these are currently valid
> > non-infinite intervals, but they are out of the documented range.
>
> The patch uses both months and days together to avoid this problem.

Can you expand on this part? I believe the full range of representable
intervals are considered valid as of v15.

- Joe Koshakow




Re: Date-Time dangling unit fix

2022-12-12 Thread Joseph Koshakow
On Mon, Dec 12, 2022 at 10:55 AM Joseph Koshakow  wrote:
>
> I just found another class of this bug that the submitted patch does
> not fix. If the units are at the beginning of the string, then they are
> also ignored. For example, `date 'm d y2020m11d3'` is also valid. I
> think the fix here is to check and make sure that ptype is 0 before
> reassigning the value to a non-zero number. I'll send an updated patch
> with this tonight.

Attached is the described patch.

- Joe Koshakow
From af72736bb4149afa629281e27da2141635a93cac Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 10 Dec 2022 18:59:26 -0500
Subject: [PATCH] Handle dangling units in date-time input

DecodeDateTime and DecodeTimeOnly allowed dangling unit types at the
beginning and end of inputs without returning an error. For example,
`date '1995-08-06 m y d'` and
`timestamp 'y m s d y2001m02d04 h04mm17s34'` were considered a valid
date and the dangling units were ignored. This commit fixes this issue
so an error is returned instead.
---
 src/backend/utils/adt/datetime.c  | 21 -
 src/test/regress/expected/date.out| 10 ++
 src/test/regress/expected/time.out|  5 +
 src/test/regress/expected/timestamp.out   | 10 ++
 src/test/regress/expected/timestamptz.out | 10 ++
 src/test/regress/expected/timetz.out  |  5 +
 src/test/regress/sql/date.sql |  5 +
 src/test/regress/sql/time.sql |  3 +++
 src/test/regress/sql/timestamp.sql|  5 +
 src/test/regress/sql/timestamptz.sql  |  5 +
 src/test/regress/sql/timetz.sql   |  3 +++
 11 files changed, 81 insertions(+), 1 deletion(-)

diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index b5b117a8ca..ebd7caff08 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -1509,6 +1509,9 @@ DecodeDateTime(char **field, int *ftype, int nf,
 
 	case UNITS:
 		tmask = 0;
+		/* prevent consecutive unhandled units */
+		if (ptype != 0)
+			return DTERR_BAD_FORMAT;
 		ptype = val;
 		break;
 
@@ -1535,7 +1538,9 @@ DecodeDateTime(char **field, int *ftype, int nf,
 			 ftype[i + 1] != DTK_TIME &&
 			 ftype[i + 1] != DTK_DATE))
 			return DTERR_BAD_FORMAT;
-
+		/* prevent consecutive unhandled units */
+		if (ptype != 0)
+			return DTERR_BAD_FORMAT;
 		ptype = val;
 		break;
 
@@ -1566,6 +1571,10 @@ DecodeDateTime(char **field, int *ftype, int nf,
 		fmask |= tmask;
 	}			/* end loop over fields */
 
+	/* prefix type was dangling and never handled */
+	if (ptype != 0)
+		return DTERR_BAD_FORMAT;
+
 	/* do final checking/adjustment of Y/M/D fields */
 	dterr = ValidateDate(fmask, isjulian, is2digits, bc, tm);
 	if (dterr)
@@ -2367,6 +2376,9 @@ DecodeTimeOnly(char **field, int *ftype, int nf,
 
 	case UNITS:
 		tmask = 0;
+		/* prevent consecutive unhandled units */
+		if (ptype != 0)
+			return DTERR_BAD_FORMAT;
 		ptype = val;
 		break;
 
@@ -2385,6 +2397,9 @@ DecodeTimeOnly(char **field, int *ftype, int nf,
 			 ftype[i + 1] != DTK_DATE))
 			return DTERR_BAD_FORMAT;
 
+		/* prevent consecutive unhandled units */
+		if (ptype != 0)
+			return DTERR_BAD_FORMAT;
 		ptype = val;
 		break;
 
@@ -2415,6 +2430,10 @@ DecodeTimeOnly(char **field, int *ftype, int nf,
 		fmask |= tmask;
 	}			/* end loop over fields */
 
+	/* prefix type was dangling and never handled */
+	if (ptype != 0)
+		return DTERR_BAD_FORMAT;
+
 	/* do final checking/adjustment of Y/M/D fields */
 	dterr = ValidateDate(fmask, isjulian, is2digits, bc, tm);
 	if (dterr)
diff --git a/src/test/regress/expected/date.out b/src/test/regress/expected/date.out
index f8f83e40e9..f4239a5402 100644
--- a/src/test/regress/expected/date.out
+++ b/src/test/regress/expected/date.out
@@ -1526,3 +1526,13 @@ select make_time(10, 55, 100.1);
 ERROR:  time field value out of range: 10:55:100.1
 select make_time(24, 0, 2.1);
 ERROR:  time field value out of range: 24:00:2.1
+-- test error on dangling units
+SELECT date '1995-08-06 m';
+ERROR:  invalid input syntax for type date: "1995-08-06 m"
+LINE 1: SELECT date '1995-08-06 m';
+^
+SET datestyle = ISO;
+SELECT date 'y m s d y2001m02d04';
+ERROR:  invalid input syntax for type date: "y m s d y2001m02d04"
+LINE 1: SELECT date 'y m s d y2001m02d04';
+^
diff --git a/src/test/regress/expected/time.out b/src/test/regress/expected/time.out
index a44caededd..5f7058eca8 100644
--- a/src/test/regress/expected/time.out
+++ b/src/test/regress/expected/time.out
@@ -229,3 +229,8 @@ SELECT date_part('epoch',   TIME '2020-05-26 13:30:25.575401');
  48625.575401
 (1 row)
 
+-- test error on dangling units
+SELECT time '12:30:15 d';
+ERROR:  invalid input syntax for type time: "12:30:15 d"
+LINE 1: SELECT time '12:30:

Re: Date-Time dangling unit fix

2022-12-12 Thread Joseph Koshakow
I just found another class of this bug that the submitted patch does
not fix. If the units are at the beginning of the string, then they are
also ignored. For example, `date 'm d y2020m11d3'` is also valid. I
think the fix here is to check and make sure that ptype is 0 before
reassigning the value to a non-zero number. I'll send an updated patch
with this tonight.


Date-time extraneous fields with reserved keywords

2022-12-11 Thread Joseph Koshakow
Hi all,

Attached is a patch to fix another parsing error for date-time types
that allow extraneous fields with certain reserved keywords. For
example both `date '1995-08-06 epoch'` and `date 'today epoch'` were
considered valid dates that both resolve to 1970-01-01.

- Joe Koshakow
From fb4c161afff08b926eea12d8689a148e99cbdb5c Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sun, 11 Dec 2022 16:08:43 -0500
Subject: [PATCH] Handle extraneous fields in date-time input

DecodeDateTime sometimest allowed extraneous fields to be included with
reserved keywords. For example `date '1995-08-06 epoch'` would be
parsed successfully, but the date was ignored. This commit fixes the
issue so an error is returned instead.
---
 src/backend/utils/adt/datetime.c|  3 +++
 src/test/regress/expected/date.out  | 17 +
 src/test/regress/expected/timestamp.out | 17 +
 src/test/regress/sql/date.sql   |  6 ++
 src/test/regress/sql/timestamp.sql  |  6 ++
 5 files changed, 49 insertions(+)

diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index b5b117a8ca..1e141a06f4 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -1431,6 +1431,9 @@ DecodeDateTime(char **field, int *ftype, int nf,
 break;
 
 			default:
+/* only allowed if we haven't already parsed some fields */
+if (fmask)
+	return DTERR_BAD_FORMAT;
 *dtype = val;
 		}
 
diff --git a/src/test/regress/expected/date.out b/src/test/regress/expected/date.out
index f8f83e40e9..50a4a52d8c 100644
--- a/src/test/regress/expected/date.out
+++ b/src/test/regress/expected/date.out
@@ -1526,3 +1526,20 @@ select make_time(10, 55, 100.1);
 ERROR:  time field value out of range: 10:55:100.1
 select make_time(24, 0, 2.1);
 ERROR:  time field value out of range: 24:00:2.1
+-- test errors with reserved keywords
+SELECT date '1995-08-06 epoch';
+ERROR:  invalid input syntax for type date: "1995-08-06 epoch"
+LINE 1: SELECT date '1995-08-06 epoch';
+^
+SELECT date '1995-08-06 infinity';
+ERROR:  invalid input syntax for type date: "1995-08-06 infinity"
+LINE 1: SELECT date '1995-08-06 infinity';
+^
+SELECT date '1995-08-06 -infinity';
+ERROR:  invalid input syntax for type date: "1995-08-06 -infinity"
+LINE 1: SELECT date '1995-08-06 -infinity';
+^
+SELECT date 'now infinity';
+ERROR:  invalid input syntax for type date: "now infinity"
+LINE 1: SELECT date 'now infinity';
+^
diff --git a/src/test/regress/expected/timestamp.out b/src/test/regress/expected/timestamp.out
index be66274738..f68ecd19ea 100644
--- a/src/test/regress/expected/timestamp.out
+++ b/src/test/regress/expected/timestamp.out
@@ -2110,3 +2110,20 @@ select * from generate_series('2020-01-01 00:00'::timestamp,
   '2020-01-02 03:00'::timestamp,
   '0 hour'::interval);
 ERROR:  step size cannot equal zero
+-- test errors with reserved keywords
+SELECT timestamp '1995-08-06 01:01:01 epoch';
+ERROR:  invalid input syntax for type timestamp: "1995-08-06 01:01:01 epoch"
+LINE 1: SELECT timestamp '1995-08-06 01:01:01 epoch';
+ ^
+SELECT timestamp '1995-08-06 01:01:01 infinity';
+ERROR:  invalid input syntax for type timestamp: "1995-08-06 01:01:01 infinity"
+LINE 1: SELECT timestamp '1995-08-06 01:01:01 infinity';
+ ^
+SELECT timestamp '1995-08-06 01:01:01 -infinity';
+ERROR:  invalid input syntax for type timestamp: "1995-08-06 01:01:01 -infinity"
+LINE 1: SELECT timestamp '1995-08-06 01:01:01 -infinity';
+ ^
+SELECT timestamp 'today epoch';
+ERROR:  invalid input syntax for type timestamp: "today epoch"
+LINE 1: SELECT timestamp 'today epoch';
+ ^
diff --git a/src/test/regress/sql/date.sql b/src/test/regress/sql/date.sql
index 9fd15be5f9..82da992e3a 100644
--- a/src/test/regress/sql/date.sql
+++ b/src/test/regress/sql/date.sql
@@ -371,3 +371,9 @@ select make_date(2013, 13, 1);
 select make_date(2013, 11, -1);
 select make_time(10, 55, 100.1);
 select make_time(24, 0, 2.1);
+
+-- test errors with reserved keywords
+SELECT date '1995-08-06 epoch';
+SELECT date '1995-08-06 infinity';
+SELECT date '1995-08-06 -infinity';
+SELECT date 'now infinity';
diff --git a/src/test/regress/sql/timestamp.sql b/src/test/regress/sql/timestamp.sql
index e1175b12ce..f7e3fe1270 100644
--- a/src/test/regress/sql/timestamp.sql
+++ b/src/test/regress/sql/timestamp.sql
@@ -391,3 +391,9 @@ select generate_series('2022-01-01 00:00'::timestamp,
 select * from generate_series('2020-01-01 00:00'::timestamp,
   '2020-01-02 03:00'::timestamp,
   '0 hour'::interval);
+
+-- test errors with reserved keywords
+SELECT timest

  1   2   >