On 11/12/16 10:38 AM, Andres Freund wrote:
> E.g. what if previously present collations are now unavailable?

You get an error message when you try to use the collation.  I think
that is a different class of problems.

>>  
>>      /*
>>       * Also forbid matching an any-encoding entry.  This test of course is 
>> not
>>       * backed up by the unique index, but it's not a problem since we don't
>>       * support adding any-encoding entries after initdb.
>>       */
> 
> Note that this isn't true anymore...

I think this is still correct, because the collation import does not
produce any any-encoding entries (collencoding = -1).

>> +
>> +Datum pg_import_system_collations(PG_FUNCTION_ARGS);
>> +
>> +Datum
>> +pg_import_system_collations(PG_FUNCTION_ARGS)
>> +{
> 
> Uh?

Required to avoid compiler warning about missing prototype.

> This function needs to have !superuser permissions revoked, which it
> afaics currently hasn't.

Done.

New patch attached (includes OID change because of conflict).

-- 
Peter Eisentraut              http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
From bb6710c55df3a5f7023ddcda749e05e05e49bc59 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <pete...@gmx.net>
Date: Tue, 29 Nov 2016 12:00:00 -0500
Subject: [PATCH v2] Add function to import operation system collations

Move this logic out of initdb into a user-callable function.  This
simplifies the code and makes it possible to update the standard
collations later on if additional operating system collations appear.
---
 src/backend/catalog/pg_collation.c    |  18 +++-
 src/backend/commands/collationcmds.c  | 151 ++++++++++++++++++++++++++++++-
 src/bin/initdb/initdb.c               | 164 +---------------------------------
 src/include/catalog/pg_collation_fn.h |   3 +-
 src/include/catalog/pg_proc.h         |   3 +
 5 files changed, 172 insertions(+), 167 deletions(-)

diff --git a/src/backend/catalog/pg_collation.c b/src/backend/catalog/pg_collation.c
index f37cf37..cda64c4 100644
--- a/src/backend/catalog/pg_collation.c
+++ b/src/backend/catalog/pg_collation.c
@@ -41,7 +41,8 @@ Oid
 CollationCreate(const char *collname, Oid collnamespace,
 				Oid collowner,
 				int32 collencoding,
-				const char *collcollate, const char *collctype)
+				const char *collcollate, const char *collctype,
+				bool if_not_exists)
 {
 	Relation	rel;
 	TupleDesc	tupDesc;
@@ -72,10 +73,21 @@ CollationCreate(const char *collname, Oid collnamespace,
 							  PointerGetDatum(collname),
 							  Int32GetDatum(collencoding),
 							  ObjectIdGetDatum(collnamespace)))
-		ereport(ERROR,
+	{
+		if (if_not_exists)
+		{
+			ereport(NOTICE,
 				(errcode(ERRCODE_DUPLICATE_OBJECT),
-				 errmsg("collation \"%s\" for encoding \"%s\" already exists",
+				 errmsg("collation \"%s\" for encoding \"%s\" already exists, skipping",
 						collname, pg_encoding_to_char(collencoding))));
+			return InvalidOid;
+		}
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_OBJECT),
+					 errmsg("collation \"%s\" for encoding \"%s\" already exists",
+							collname, pg_encoding_to_char(collencoding))));
+	}
 
 	/*
 	 * Also forbid matching an any-encoding entry.  This test of course is not
diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c
index 9bba748..f4b7b65 100644
--- a/src/backend/commands/collationcmds.c
+++ b/src/backend/commands/collationcmds.c
@@ -136,7 +136,11 @@ DefineCollation(ParseState *pstate, List *names, List *parameters)
 							 GetUserId(),
 							 GetDatabaseEncoding(),
 							 collcollate,
-							 collctype);
+							 collctype,
+							 false);
+
+	if (!newoid)
+		return InvalidObjectAddress;
 
 	ObjectAddressSet(address, CollationRelationId, newoid);
 
@@ -177,3 +181,148 @@ IsThereCollationInNamespace(const char *collname, Oid nspOid)
 				 errmsg("collation \"%s\" already exists in schema \"%s\"",
 						collname, get_namespace_name(nspOid))));
 }
+
+
+/*
+ * "Normalize" a locale name, stripping off encoding tags such as
+ * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
+ * -> "br_FR@euro").  Return true if a new, different name was
+ * generated.
+ */
+static bool
+normalize_locale_name(char *new, const char *old)
+{
+	char	   *n = new;
+	const char *o = old;
+	bool		changed = false;
+
+	while (*o)
+	{
+		if (*o == '.')
+		{
+			/* skip over encoding tag such as ".utf8" or ".UTF-8" */
+			o++;
+			while ((*o >= 'A' && *o <= 'Z')
+				   || (*o >= 'a' && *o <= 'z')
+				   || (*o >= '0' && *o <= '9')
+				   || (*o == '-'))
+				o++;
+			changed = true;
+		}
+		else
+			*n++ = *o++;
+	}
+	*n = '\0';
+
+	return changed;
+}
+
+
+Datum pg_import_system_collations(PG_FUNCTION_ARGS);
+
+Datum
+pg_import_system_collations(PG_FUNCTION_ARGS)
+{
+	bool		if_not_exists = PG_GETARG_BOOL(0);
+	Oid         nspid = PG_GETARG_OID(1);
+
+	FILE	   *locale_a_handle;
+	char		localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */
+	int			count = 0;
+
+	if (!superuser())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 (errmsg("must be superuser to import system collations"))));
+
+	locale_a_handle = OpenPipeStream("locale -a", "r");
+	if (locale_a_handle == NULL)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not execute command \"%s\": %m",
+						"locale -a")));
+
+	while (fgets(localebuf, sizeof(localebuf), locale_a_handle))
+	{
+		int			i;
+		size_t		len;
+		int			enc;
+		bool		skip;
+		char		alias[NAMEDATALEN];
+
+		len = strlen(localebuf);
+
+		if (len == 0 || localebuf[len - 1] != '\n')
+		{
+			elog(DEBUG1, "locale name too long, skipped: \"%s\"", localebuf);
+			continue;
+		}
+		localebuf[len - 1] = '\0';
+
+		/*
+		 * Some systems have locale names that don't consist entirely of ASCII
+		 * letters (such as "bokm&aring;l" or "fran&ccedil;ais").  This is
+		 * pretty silly, since we need the locale itself to interpret the
+		 * non-ASCII characters. We can't do much with those, so we filter
+		 * them out.
+		 */
+		skip = false;
+		for (i = 0; i < len; i++)
+		{
+			if (IS_HIGHBIT_SET(localebuf[i]))
+			{
+				skip = true;
+				break;
+			}
+		}
+		if (skip)
+		{
+			elog(DEBUG1, "locale name has non-ASCII characters, skipped: \"%s\"", localebuf);
+			continue;
+		}
+
+		enc = pg_get_encoding_from_locale(localebuf, false);
+		if (enc < 0)
+		{
+			/* error message printed by pg_get_encoding_from_locale() */
+			continue;
+		}
+		if (!PG_VALID_BE_ENCODING(enc))
+			continue;			/* ignore locales for client-only encodings */
+		if (enc == PG_SQL_ASCII)
+			continue;			/* C/POSIX are already in the catalog */
+
+		count++;
+
+		CollationCreate(localebuf, nspid, GetUserId(), enc,
+						localebuf, localebuf, if_not_exists);
+
+		CommandCounterIncrement();
+
+		/*
+		 * Generate aliases such as "en_US" in addition to "en_US.utf8" for
+		 * ease of use.  Note that collation names are unique per encoding
+		 * only, so this doesn't clash with "en_US" for LATIN1, say.
+		 *
+		 * This always runs in "if not exists" mode, to skip aliases that
+		 * conflict with an existing locale name for the same encoding.  For
+		 * example, "br_FR.iso88591" is normalized to "br_FR", both for
+		 * encoding LATIN1.  But the unnormalized locale "br_FR" already
+		 * exists for LATIN1.
+		 */
+		if (normalize_locale_name(alias, localebuf))
+		{
+			CollationCreate(alias, nspid, GetUserId(), enc,
+							localebuf, localebuf, true);
+			CommandCounterIncrement();
+		}
+	}
+
+	ClosePipeStream(locale_a_handle);
+
+	if (count == 0)
+		ereport(ERROR,
+				(errmsg("no usable system locales were found")));
+
+	PG_RETURN_VOID();
+}
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 24f9cc8..9c1a1fb 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -1608,42 +1608,6 @@ setup_description(FILE *cmdfd)
 	PG_CMD_PUTS("DROP TABLE tmp_pg_shdescription;\n\n");
 }
 
-#ifdef HAVE_LOCALE_T
-/*
- * "Normalize" a locale name, stripping off encoding tags such as
- * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
- * -> "br_FR@euro").  Return true if a new, different name was
- * generated.
- */
-static bool
-normalize_locale_name(char *new, const char *old)
-{
-	char	   *n = new;
-	const char *o = old;
-	bool		changed = false;
-
-	while (*o)
-	{
-		if (*o == '.')
-		{
-			/* skip over encoding tag such as ".utf8" or ".UTF-8" */
-			o++;
-			while ((*o >= 'A' && *o <= 'Z')
-				   || (*o >= 'a' && *o <= 'z')
-				   || (*o >= '0' && *o <= '9')
-				   || (*o == '-'))
-				o++;
-			changed = true;
-		}
-		else
-			*n++ = *o++;
-	}
-	*n = '\0';
-
-	return changed;
-}
-#endif   /* HAVE_LOCALE_T */
-
 /*
  * populate pg_collation
  */
@@ -1651,134 +1615,10 @@ static void
 setup_collation(FILE *cmdfd)
 {
 #if defined(HAVE_LOCALE_T) && !defined(WIN32)
-	int			i;
-	FILE	   *locale_a_handle;
-	char		localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */
-	int			count = 0;
-
-	locale_a_handle = popen_check("locale -a", "r");
-	if (!locale_a_handle)
-		return;					/* complaint already printed */
-
-	PG_CMD_PUTS("CREATE TEMP TABLE tmp_pg_collation ( "
-				"	collname name, "
-				"	locale name, "
-				"	encoding int) WITHOUT OIDS;\n\n");
-
-	while (fgets(localebuf, sizeof(localebuf), locale_a_handle))
-	{
-		size_t		len;
-		int			enc;
-		bool		skip;
-		char	   *quoted_locale;
-		char		alias[NAMEDATALEN];
-
-		len = strlen(localebuf);
-
-		if (len == 0 || localebuf[len - 1] != '\n')
-		{
-			if (debug)
-				fprintf(stderr, _("%s: locale name too long, skipped: \"%s\"\n"),
-						progname, localebuf);
-			continue;
-		}
-		localebuf[len - 1] = '\0';
-
-		/*
-		 * Some systems have locale names that don't consist entirely of ASCII
-		 * letters (such as "bokm&aring;l" or "fran&ccedil;ais").  This is
-		 * pretty silly, since we need the locale itself to interpret the
-		 * non-ASCII characters. We can't do much with those, so we filter
-		 * them out.
-		 */
-		skip = false;
-		for (i = 0; i < len; i++)
-		{
-			if (IS_HIGHBIT_SET(localebuf[i]))
-			{
-				skip = true;
-				break;
-			}
-		}
-		if (skip)
-		{
-			if (debug)
-				fprintf(stderr, _("%s: locale name has non-ASCII characters, skipped: \"%s\"\n"),
-						progname, localebuf);
-			continue;
-		}
-
-		enc = pg_get_encoding_from_locale(localebuf, debug);
-		if (enc < 0)
-		{
-			/* error message printed by pg_get_encoding_from_locale() */
-			continue;
-		}
-		if (!PG_VALID_BE_ENCODING(enc))
-			continue;			/* ignore locales for client-only encodings */
-		if (enc == PG_SQL_ASCII)
-			continue;			/* C/POSIX are already in the catalog */
-
-		count++;
-
-		quoted_locale = escape_quotes(localebuf);
-
-		PG_CMD_PRINTF3("INSERT INTO tmp_pg_collation VALUES (E'%s', E'%s', %d);\n\n",
-					   quoted_locale, quoted_locale, enc);
-
-		/*
-		 * Generate aliases such as "en_US" in addition to "en_US.utf8" for
-		 * ease of use.  Note that collation names are unique per encoding
-		 * only, so this doesn't clash with "en_US" for LATIN1, say.
-		 */
-		if (normalize_locale_name(alias, localebuf))
-		{
-			char	   *quoted_alias = escape_quotes(alias);
-
-			PG_CMD_PRINTF3("INSERT INTO tmp_pg_collation VALUES (E'%s', E'%s', %d);\n\n",
-						   quoted_alias, quoted_locale, enc);
-			free(quoted_alias);
-		}
-		free(quoted_locale);
-	}
+	PG_CMD_PUTS("SELECT pg_import_system_collations(if_not_exists => false, schema => 'pg_catalog');\n\n");
 
 	/* Add an SQL-standard name */
-	PG_CMD_PRINTF1("INSERT INTO tmp_pg_collation VALUES ('ucs_basic', 'C', %d);\n\n", PG_UTF8);
-
-	/*
-	 * When copying collations to the final location, eliminate aliases that
-	 * conflict with an existing locale name for the same encoding.  For
-	 * example, "br_FR.iso88591" is normalized to "br_FR", both for encoding
-	 * LATIN1.  But the unnormalized locale "br_FR" already exists for LATIN1.
-	 * Prefer the alias that matches the OS locale name, else the first locale
-	 * name by sort order (arbitrary choice to be deterministic).
-	 *
-	 * Also, eliminate any aliases that conflict with pg_collation's
-	 * hard-wired entries for "C" etc.
-	 */
-	PG_CMD_PUTS("INSERT INTO pg_collation (collname, collnamespace, collowner, collencoding, collcollate, collctype) "
-				" SELECT DISTINCT ON (collname, encoding)"
-				"   collname, "
-				"   (SELECT oid FROM pg_namespace WHERE nspname = 'pg_catalog') AS collnamespace, "
-				"   (SELECT relowner FROM pg_class WHERE relname = 'pg_collation') AS collowner, "
-				"   encoding, locale, locale "
-				"  FROM tmp_pg_collation"
-				"  WHERE NOT EXISTS (SELECT 1 FROM pg_collation WHERE collname = tmp_pg_collation.collname)"
-	 "  ORDER BY collname, encoding, (collname = locale) DESC, locale;\n\n");
-
-	/*
-	 * Even though the table is temp, drop it explicitly so it doesn't get
-	 * copied into template0/postgres databases.
-	 */
-	PG_CMD_PUTS("DROP TABLE tmp_pg_collation;\n\n");
-
-	pclose(locale_a_handle);
-
-	if (count == 0 && !debug)
-	{
-		printf(_("No usable system locales were found.\n"));
-		printf(_("Use the option \"--debug\" to see details.\n"));
-	}
+	PG_CMD_PRINTF2("INSERT INTO pg_collation (collname, collnamespace, collowner, collencoding, collcollate, collctype) VALUES ('ucs_basic', 'pg_catalog'::regnamespace, '%s'::regrole, %d, 'C', 'C');\n\n", escape_quotes(username), PG_UTF8);
 #endif   /* not HAVE_LOCALE_T  && not WIN32 */
 }
 
diff --git a/src/include/catalog/pg_collation_fn.h b/src/include/catalog/pg_collation_fn.h
index 574b288..ac1a81d 100644
--- a/src/include/catalog/pg_collation_fn.h
+++ b/src/include/catalog/pg_collation_fn.h
@@ -17,7 +17,8 @@
 extern Oid CollationCreate(const char *collname, Oid collnamespace,
 				Oid collowner,
 				int32 collencoding,
-				const char *collcollate, const char *collctype);
+				const char *collcollate, const char *collctype,
+				bool if_not_exists);
 extern void RemoveCollationById(Oid collationOid);
 
 #endif   /* PG_COLLATION_FN_H */
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 047a1ce..6d98188 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -5343,6 +5343,9 @@ DESCR("pg_controldata recovery state information as a function");
 DATA(insert OID = 3444 ( pg_control_init PGNSP PGUID 12 1 0 0 0 f f f f t f v s 0 0 2249 "" "{23,23,23,23,23,23,23,23,23,16,16,16,23}" "{o,o,o,o,o,o,o,o,o,o,o,o,o}" "{max_data_alignment,database_block_size,blocks_per_segment,wal_block_size,bytes_per_wal_segment,max_identifier_length,max_index_columns,max_toast_chunk_size,large_object_chunk_size,bigint_timestamps,float4_pass_by_value,float8_pass_by_value,data_page_checksum_version}" _null_ _null_ pg_control_init _null_ _null_ _null_ ));
 DESCR("pg_controldata init state information as a function");
 
+DATA(insert OID = 3445 ( pg_import_system_collations PGNSP PGUID 12 100 0 0 0 f f f f t f v r 2 0 2278 "16 4089" _null_ _null_ "{if_not_exists,schema}" _null_ _null_ pg_import_system_collations _null_ _null_ _null_ ));
+DESCR("import collations from operating system");
+
 /*
  * Symbolic values for provolatile column: these indicate whether the result
  * of a function is dependent *only* on the values of its explicit arguments,
-- 
2.10.2

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to