Re: drop column name conflict

2024-05-04 Thread Joseph Koshakow
On Sat, May 4, 2024 at 11:29 AM Tom Lane  wrote:
> I think we intentionally did not bother with preventing this,
> on the grounds that if you were silly enough to name a column
> that way then you deserve any ensuing problems.

Fair enough.

> If we were going to expend any code on the scenario, I'd prefer
> to make it be checks in column addition/renaming that disallow
> naming a column this way.

Is there any interest in making this change? The attached patch could
use some cleanup, but seems to accomplish what's described. It's
definitely more involved than the previous one and may not be worth the
effort. If you feel that it's worth it I can clean it up, otherwise
I'll drop it.

Thanks,
Joe Koshakow
From 936a9e3509867574633882f5c1ec714d2f2604ec Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 4 May 2024 10:12:37 -0400
Subject: [PATCH] Prevent name conflicts when dropping a column

Previously, dropped columns were always renamed to
"pg.dropped.". In the rare scenario that a
column with that name already exists, the column drop would fail with
an error about violating the unique constraint on
"pg_attribute_relid_attnam_index". This commit fixes that issue by
preventing users from creating columns with a name that matches
"pg.dropped.\d+". This is backwards incompatible.
---
 src/backend/catalog/heap.c | 57 --
 src/backend/commands/tablecmds.c   |  2 +
 src/include/catalog/heap.h |  3 ++
 src/test/regress/expected/alter_table.out  |  7 +++
 src/test/regress/expected/create_table.out |  3 ++
 src/test/regress/sql/alter_table.sql   |  6 +++
 src/test/regress/sql/create_table.sql  |  3 ++
 7 files changed, 77 insertions(+), 4 deletions(-)

diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 922ba79ac2..0a0afe833d 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -231,6 +231,9 @@ static const FormData_pg_attribute a6 = {
 
 static const FormData_pg_attribute *const SysAtt[] = {, , , , , };
 
+static const char *dropped_col_pre = "pg.dropped.";
+static const char *dropped_col_post = "";
+
 /*
  * This function returns a Form_pg_attribute pointer for a system attribute.
  * Note that we elog if the presented attno is invalid, which would only
@@ -468,10 +471,10 @@ CheckAttributeNamesTypes(TupleDesc tupdesc, char relkind,
 		MaxHeapAttributeNumber)));
 
 	/*
-	 * first check for collision with system attribute names
+	 * first check for collision with system attribute and reserved names
 	 *
 	 * Skip this for a view or type relation, since those don't have system
-	 * attributes.
+	 * attributes and cannot drop columns.
 	 */
 	if (relkind != RELKIND_VIEW && relkind != RELKIND_COMPOSITE_TYPE)
 	{
@@ -484,6 +487,11 @@ CheckAttributeNamesTypes(TupleDesc tupdesc, char relkind,
 		(errcode(ERRCODE_DUPLICATE_COLUMN),
 		 errmsg("column name \"%s\" conflicts with a system column name",
 NameStr(attr->attname;
+
+			if ((CHKATYPE_RESERVED_NAME & flags) == 0)
+			{
+CheckAttributeReservedName(NameStr(attr->attname));
+			}
 		}
 	}
 
@@ -679,6 +687,47 @@ CheckAttributeType(const char *attname,
 	}
 }
 
+/*
+ * TODO: Add function description.
+ */
+void
+CheckAttributeReservedName(const char *attname)
+{
+	size_t		name_len,
+pre_len,
+post_len;
+	int			i;
+
+	name_len = strlen(attname);
+	pre_len = strlen(dropped_col_pre);
+	post_len = strlen(dropped_col_post);
+
+	if (name_len < pre_len + post_len + 1)
+	{
+		return;
+	}
+	if (memcmp(attname, dropped_col_pre, pre_len) != 0)
+	{
+		return;
+	}
+	for (i = pre_len; i < name_len - post_len; i++)
+	{
+		if (!isdigit(attname[i]))
+		{
+			return;
+		}
+	}
+	if (memcmp(attname + (name_len - post_len), dropped_col_post, post_len) != 0)
+	{
+		return;
+	}
+
+	ereport(ERROR,
+			(errcode(ERRCODE_RESERVED_NAME),
+			 errmsg("column name \"%s\" conflicts with a reserved column name",
+	attname)));
+}
+
 /*
  * InsertPgAttributeTuples
  *		Construct and insert a set of tuples in pg_attribute.
@@ -1148,7 +1197,7 @@ heap_create_with_catalog(const char *relname,
 	 * hack to allow creating pg_statistic and cloning it during VACUUM FULL.
 	 */
 	CheckAttributeNamesTypes(tupdesc, relkind,
-			 allow_system_table_mods ? CHKATYPE_ANYARRAY : 0);
+			 (allow_system_table_mods ? CHKATYPE_ANYARRAY : 0) | (is_internal ? CHKATYPE_RESERVED_NAME : 0));
 
 	/*
 	 * This would fail later on anyway, if the relation already exists.  But
@@ -1705,7 +1754,7 @@ RemoveAttributeById(Oid relid, AttrNumber attnum)
 	 * Change the column name to something that isn't likely to conflict
 	 */
 	snprintf(newattname, sizeof(newattname),
-			 "pg.dropped.%d", attnum);
+			 "%s%d%s", dropped_col_pre, attnum, dropped_col_post);
 	namestrcpy(&(attStruct->attname), newattname);
 
 	/* Clear the missing value */
diff --git a/src/backend/commands/tablecmds.c 

Re: drop column name conflict

2024-05-04 Thread Tom Lane
Joseph Koshakow  writes:
> There's a rare edge case in `alter table` that can prevent users from
> dropping a column as shown below

> # create table atacc1(a int, "pg.dropped.1" int);
> CREATE TABLE
> # alter table atacc1 drop column a;
> ERROR:  duplicate key value violates unique constraint
> "pg_attribute_relid_attnam_index"
> DETAIL:  Key (attrelid, attname)=(16407, pg.dropped.1)
> already exists.

I think we intentionally did not bother with preventing this,
on the grounds that if you were silly enough to name a column
that way then you deserve any ensuing problems.

If we were going to expend any code on the scenario, I'd prefer
to make it be checks in column addition/renaming that disallow
naming a column this way.  What you propose here doesn't remove
the fundamental tension about whether this is valid user namespace
or not, it just makes things less predictable.

regards, tom lane




drop column name conflict

2024-05-04 Thread Joseph Koshakow
Hi all,

There's a rare edge case in `alter table` that can prevent users from
dropping a column as shown below

# create table atacc1(a int, "pg.dropped.1" int);
CREATE TABLE
# alter table atacc1 drop column a;
ERROR:  duplicate key value violates unique constraint
"pg_attribute_relid_attnam_index"
DETAIL:  Key (attrelid, attname)=(16407, pg.dropped.1)
already exists.

It seems a bit silly and unlikely that anyone would ever find
themselves in this scenario, but it also seems easy enough to fix as
shown by the attached patch.

Does anyone think this is worth fixing? If so I can submit it to the
current commitfest.

Thanks,
Joe Koshakow
From 50f6e73d9bc1e00ad3988faa80a84af70aef Mon Sep 17 00:00:00 2001
From: Joseph Koshakow 
Date: Sat, 4 May 2024 10:12:37 -0400
Subject: [PATCH] Prevent name conflicts when dropping a column

Previously, dropped columns were always renamed to
"pg.dropped.". In the rare scenario that a
column with that name already exists, the column drop would fail with
an error about violating the unique constraint on
"pg_attribute_relid_attnam_index". This commit fixes that issue by
appending an int to dropped column name until we find a unique name.
Since tables have a maximum of 16,000 columns and the max int is larger
than 16,000, we are guaranteed to find a unique name.
---
 src/backend/catalog/heap.c| 16 +++-
 src/test/regress/expected/alter_table.out |  4 
 src/test/regress/sql/alter_table.sql  |  5 +
 3 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 922ba79ac2..852ed442e1 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -1658,11 +1658,13 @@ RemoveAttributeById(Oid relid, AttrNumber attnum)
 	Relation	rel;
 	Relation	attr_rel;
 	HeapTuple	tuple;
+	HeapTuple	drop_tuple_check;
 	Form_pg_attribute attStruct;
 	char		newattname[NAMEDATALEN];
 	Datum		valuesAtt[Natts_pg_attribute] = {0};
 	bool		nullsAtt[Natts_pg_attribute] = {0};
 	bool		replacesAtt[Natts_pg_attribute] = {0};
+	int			i;
 
 	/*
 	 * Grab an exclusive lock on the target table, which we will NOT release
@@ -1702,10 +1704,22 @@ RemoveAttributeById(Oid relid, AttrNumber attnum)
 	attStruct->attgenerated = '\0';
 
 	/*
-	 * Change the column name to something that isn't likely to conflict
+	 * Change the column name to something that doesn't conflict
 	 */
 	snprintf(newattname, sizeof(newattname),
 			 "pg.dropped.%d", attnum);
+	Assert(PG_INT32_MAX > MaxHeapAttributeNumber);
+	drop_tuple_check = SearchSysCacheCopy2(ATTNAME,
+		   ObjectIdGetDatum(relid),
+		   PointerGetDatum(newattname));
+	for (i = 0; HeapTupleIsValid(drop_tuple_check); i++)
+	{
+		snprintf(newattname, sizeof(newattname),
+ "pg.dropped.%d.%d", attnum, i);
+		drop_tuple_check = SearchSysCacheCopy2(ATTNAME,
+			   ObjectIdGetDatum(relid),
+			   PointerGetDatum(newattname));
+	}
 	namestrcpy(&(attStruct->attname), newattname);
 
 	/* Clear the missing value */
diff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out
index 7666c76238..844ae55467 100644
--- a/src/test/regress/expected/alter_table.out
+++ b/src/test/regress/expected/alter_table.out
@@ -1554,6 +1554,10 @@ insert into atacc1(id, value) values (null, 0);
 ERROR:  null value in column "id" of relation "atacc1" violates not-null constraint
 DETAIL:  Failing row contains (null, 0).
 drop table atacc1;
+-- test dropping a column doesn't cause name conflicts
+create table atacc1(a int, "pg.dropped.1" int);
+alter table atacc1 drop column a;
+drop table atacc1;
 -- test inheritance
 create table parent (a int, b int, c int);
 insert into parent values (1, 2, 3);
diff --git a/src/test/regress/sql/alter_table.sql b/src/test/regress/sql/alter_table.sql
index 9df5a63bdf..d5d912a2e2 100644
--- a/src/test/regress/sql/alter_table.sql
+++ b/src/test/regress/sql/alter_table.sql
@@ -1097,6 +1097,11 @@ insert into atacc1(value) values (100);
 insert into atacc1(id, value) values (null, 0);
 drop table atacc1;
 
+-- test dropping a column doesn't cause name conflicts
+create table atacc1(a int, "pg.dropped.1" int);
+alter table atacc1 drop column a;
+drop table atacc1;
+
 -- test inheritance
 create table parent (a int, b int, c int);
 insert into parent values (1, 2, 3);
-- 
2.34.1