On 18-12-2016 18:30, Peter Eisentraut wrote: > Updated patch with that fix. > Peter, I reviewed and improved your patch.
* I document the new function. Since collation is a database object, I chose "Database Object Management Functions" section. * I've added a check to any-encoding database because I got 'FATAL: collation "C" already exists' on Debian 8, although, I didn't get on CentOS 7. The problem is that Debian has two locales for C (C and C.UTF-8) and CentOS has just one (C). * I've added OidIsValid to test the new collation row. * I've changed the parameter order. Schema seems more important than if_not_exists. Also, we generally leave those boolean parameters for the end of list. I don't turn if_not_exists optional but IMO it would be a good idea (default = true). * You removed some #if and #ifdef while moving things around. I put it back. * You didn't pgident some lines of code but I'm sure you didn't for a small patch footprint. * I didn't test on Windows. * As a last comment, you set cost = 100 and it seems reasonable because it lasts 411 ms to scan/load 923 collations in my slow VM. However, successive executions takes ~1200 ms. I'm attaching the complete and also a patch at the top of your last patch. -- Euler Taveira Timbira - http://www.timbira.com.br/ PostgreSQL: Consultoria, Desenvolvimento, Suporte 24x7 e Treinamento
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 10e3186..1e52a48 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -19190,6 +19190,38 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup()); in the database's default tablespace, the tablespace can be specified as 0. </para> + <para> + Operating system collations are loaded with the + <function>pg_import_system_collations</> function, shown in <xref + linkend="functions-import-collation">. + </para> + + <table id="functions-import-collation"> + <title>Collation Functions</title> + <tgroup cols="3"> + <thead> + <row><entry>Name</entry> <entry>Return Type</entry> <entry>Description</entry></row> + </thead> + + <tbody> + <row> + <entry> + <indexterm><primary>pg_import_system_collations</primary></indexterm> + <literal><function>pg_import_system_collations(<parameter>schema</> <type>regnamespace</>, <parameter>if_not_exists</> <type>boolean</>)</function></literal> + </entry> + <entry><type>void</type></entry> + <entry>Import operating system collations</entry> + </row> + </tbody> + </tgroup> + </table> + + <para> + <function>pg_import_system_collations</> loads collations that it finds on + the operating system into system catalog <literal>pg_collation</literal>, + skipping those that are already present. + </para> + </sect2> <sect2 id="functions-admin-index"> diff --git a/src/backend/catalog/pg_collation.c b/src/backend/catalog/pg_collation.c index 63c2eb9..694c0f6 100644 --- a/src/backend/catalog/pg_collation.c +++ b/src/backend/catalog/pg_collation.c @@ -98,10 +98,21 @@ CollationCreate(const char *collname, Oid collnamespace, PointerGetDatum(collname), Int32GetDatum(-1), ObjectIdGetDatum(collnamespace))) - ereport(ERROR, + { + if (if_not_exists) + { + ereport(NOTICE, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("collation \"%s\" already exists, skipping", + collname))); + return InvalidOid; + } + else + ereport(ERROR, (errcode(ERRCODE_DUPLICATE_OBJECT), errmsg("collation \"%s\" already exists", collname))); + } /* open pg_collation */ rel = heap_open(CollationRelationId, RowExclusiveLock); diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c index e108b50..cf3acea 100644 --- a/src/backend/commands/collationcmds.c +++ b/src/backend/commands/collationcmds.c @@ -139,7 +139,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters) collctype, false); - if (!newoid) + if (!OidIsValid(newoid)) return InvalidObjectAddress; ObjectAddressSet(address, CollationRelationId, newoid); @@ -183,6 +183,7 @@ IsThereCollationInNamespace(const char *collname, Oid nspOid) } +#ifdef HAVE_LOCALE_T /* * "Normalize" a locale name, stripping off encoding tags such as * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro" @@ -216,13 +217,15 @@ normalize_locale_name(char *new, const char *old) return changed; } +#endif /* HAVE_LOCALE_T */ Datum pg_import_system_collations(PG_FUNCTION_ARGS) { - bool if_not_exists = PG_GETARG_BOOL(0); - Oid nspid = PG_GETARG_OID(1); +#if defined(HAVE_LOCALE_T) && !defined(WIN32) + Oid nspid = PG_GETARG_OID(0); + bool if_not_exists = PG_GETARG_BOOL(1); FILE *locale_a_handle; char localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */ @@ -321,6 +324,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) if (count == 0) ereport(ERROR, (errmsg("no usable system locales were found"))); +#endif /* not HAVE_LOCALE_T && not WIN32 */ PG_RETURN_VOID(); } diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index b0126a9..bb8637e 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -5345,7 +5345,7 @@ DESCR("pg_controldata recovery state information as a function"); DATA(insert OID = 3444 ( pg_control_init PGNSP PGUID 12 1 0 0 0 f f f f t f v s 0 0 2249 "" "{23,23,23,23,23,23,23,23,23,16,16,16,23}" "{o,o,o,o,o,o,o,o,o,o,o,o,o}" "{max_data_alignment,database_block_size,blocks_per_segment,wal_block_size,bytes_per_wal_segment,max_identifier_length,max_index_columns,max_toast_chunk_size,large_object_chunk_size,bigint_timestamps,float4_pass_by_value,float8_pass_by_value,data_page_checksum_version}" _null_ _null_ pg_control_init _null_ _null_ _null_ )); DESCR("pg_controldata init state information as a function"); -DATA(insert OID = 3445 ( pg_import_system_collations PGNSP PGUID 12 100 0 0 0 f f f f t f v r 2 0 2278 "16 4089" _null_ _null_ "{if_not_exists,schema}" _null_ _null_ pg_import_system_collations _null_ _null_ _null_ )); +DATA(insert OID = 3445 ( pg_import_system_collations PGNSP PGUID 12 100 0 0 0 f f f f t f v r 2 0 2278 "4089 16" _null_ _null_ "{schema,if_not_exists}" _null_ _null_ pg_import_system_collations _null_ _null_ _null_ )); DESCR("import collations from operating system"); /*
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 10e3186..1e52a48 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -19190,6 +19190,38 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup()); in the database's default tablespace, the tablespace can be specified as 0. </para> + <para> + Operating system collations are loaded with the + <function>pg_import_system_collations</> function, shown in <xref + linkend="functions-import-collation">. + </para> + + <table id="functions-import-collation"> + <title>Collation Functions</title> + <tgroup cols="3"> + <thead> + <row><entry>Name</entry> <entry>Return Type</entry> <entry>Description</entry></row> + </thead> + + <tbody> + <row> + <entry> + <indexterm><primary>pg_import_system_collations</primary></indexterm> + <literal><function>pg_import_system_collations(<parameter>schema</> <type>regnamespace</>, <parameter>if_not_exists</> <type>boolean</>)</function></literal> + </entry> + <entry><type>void</type></entry> + <entry>Import operating system collations</entry> + </row> + </tbody> + </tgroup> + </table> + + <para> + <function>pg_import_system_collations</> loads collations that it finds on + the operating system into system catalog <literal>pg_collation</literal>, + skipping those that are already present. + </para> + </sect2> <sect2 id="functions-admin-index"> diff --git a/src/backend/catalog/pg_collation.c b/src/backend/catalog/pg_collation.c index fa42ad5..694c0f6 100644 --- a/src/backend/catalog/pg_collation.c +++ b/src/backend/catalog/pg_collation.c @@ -41,7 +41,8 @@ Oid CollationCreate(const char *collname, Oid collnamespace, Oid collowner, int32 collencoding, - const char *collcollate, const char *collctype) + const char *collcollate, const char *collctype, + bool if_not_exists) { Relation rel; TupleDesc tupDesc; @@ -72,10 +73,21 @@ CollationCreate(const char *collname, Oid collnamespace, PointerGetDatum(collname), Int32GetDatum(collencoding), ObjectIdGetDatum(collnamespace))) - ereport(ERROR, + { + if (if_not_exists) + { + ereport(NOTICE, (errcode(ERRCODE_DUPLICATE_OBJECT), - errmsg("collation \"%s\" for encoding \"%s\" already exists", + errmsg("collation \"%s\" for encoding \"%s\" already exists, skipping", collname, pg_encoding_to_char(collencoding)))); + return InvalidOid; + } + else + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("collation \"%s\" for encoding \"%s\" already exists", + collname, pg_encoding_to_char(collencoding)))); + } /* * Also forbid matching an any-encoding entry. This test of course is not @@ -86,10 +98,21 @@ CollationCreate(const char *collname, Oid collnamespace, PointerGetDatum(collname), Int32GetDatum(-1), ObjectIdGetDatum(collnamespace))) - ereport(ERROR, + { + if (if_not_exists) + { + ereport(NOTICE, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("collation \"%s\" already exists, skipping", + collname))); + return InvalidOid; + } + else + ereport(ERROR, (errcode(ERRCODE_DUPLICATE_OBJECT), errmsg("collation \"%s\" already exists", collname))); + } /* open pg_collation */ rel = heap_open(CollationRelationId, RowExclusiveLock); diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c index ccadfc2..cf3acea 100644 --- a/src/backend/commands/collationcmds.c +++ b/src/backend/commands/collationcmds.c @@ -136,7 +136,11 @@ DefineCollation(ParseState *pstate, List *names, List *parameters) GetUserId(), GetDatabaseEncoding(), collcollate, - collctype); + collctype, + false); + + if (!OidIsValid(newoid)) + return InvalidObjectAddress; ObjectAddressSet(address, CollationRelationId, newoid); @@ -177,3 +181,150 @@ IsThereCollationInNamespace(const char *collname, Oid nspOid) errmsg("collation \"%s\" already exists in schema \"%s\"", collname, get_namespace_name(nspOid)))); } + + +#ifdef HAVE_LOCALE_T +/* + * "Normalize" a locale name, stripping off encoding tags such as + * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro" + * -> "br_FR@euro"). Return true if a new, different name was + * generated. + */ +static bool +normalize_locale_name(char *new, const char *old) +{ + char *n = new; + const char *o = old; + bool changed = false; + + while (*o) + { + if (*o == '.') + { + /* skip over encoding tag such as ".utf8" or ".UTF-8" */ + o++; + while ((*o >= 'A' && *o <= 'Z') + || (*o >= 'a' && *o <= 'z') + || (*o >= '0' && *o <= '9') + || (*o == '-')) + o++; + changed = true; + } + else + *n++ = *o++; + } + *n = '\0'; + + return changed; +} +#endif /* HAVE_LOCALE_T */ + + +Datum +pg_import_system_collations(PG_FUNCTION_ARGS) +{ +#if defined(HAVE_LOCALE_T) && !defined(WIN32) + Oid nspid = PG_GETARG_OID(0); + bool if_not_exists = PG_GETARG_BOOL(1); + + FILE *locale_a_handle; + char localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */ + int count = 0; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + (errmsg("must be superuser to import system collations")))); + + locale_a_handle = OpenPipeStream("locale -a", "r"); + if (locale_a_handle == NULL) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not execute command \"%s\": %m", + "locale -a"))); + + while (fgets(localebuf, sizeof(localebuf), locale_a_handle)) + { + int i; + size_t len; + int enc; + bool skip; + char alias[NAMEDATALEN]; + + len = strlen(localebuf); + + if (len == 0 || localebuf[len - 1] != '\n') + { + elog(DEBUG1, "locale name too long, skipped: \"%s\"", localebuf); + continue; + } + localebuf[len - 1] = '\0'; + + /* + * Some systems have locale names that don't consist entirely of ASCII + * letters (such as "bokmål" or "français"). This is + * pretty silly, since we need the locale itself to interpret the + * non-ASCII characters. We can't do much with those, so we filter + * them out. + */ + skip = false; + for (i = 0; i < len; i++) + { + if (IS_HIGHBIT_SET(localebuf[i])) + { + skip = true; + break; + } + } + if (skip) + { + elog(DEBUG1, "locale name has non-ASCII characters, skipped: \"%s\"", localebuf); + continue; + } + + enc = pg_get_encoding_from_locale(localebuf, false); + if (enc < 0) + { + /* error message printed by pg_get_encoding_from_locale() */ + continue; + } + if (!PG_VALID_BE_ENCODING(enc)) + continue; /* ignore locales for client-only encodings */ + if (enc == PG_SQL_ASCII) + continue; /* C/POSIX are already in the catalog */ + + count++; + + CollationCreate(localebuf, nspid, GetUserId(), enc, + localebuf, localebuf, if_not_exists); + + CommandCounterIncrement(); + + /* + * Generate aliases such as "en_US" in addition to "en_US.utf8" for + * ease of use. Note that collation names are unique per encoding + * only, so this doesn't clash with "en_US" for LATIN1, say. + * + * This always runs in "if not exists" mode, to skip aliases that + * conflict with an existing locale name for the same encoding. For + * example, "br_FR.iso88591" is normalized to "br_FR", both for + * encoding LATIN1. But the unnormalized locale "br_FR" already + * exists for LATIN1. + */ + if (normalize_locale_name(alias, localebuf)) + { + CollationCreate(alias, nspid, GetUserId(), enc, + localebuf, localebuf, true); + CommandCounterIncrement(); + } + } + + ClosePipeStream(locale_a_handle); + + if (count == 0) + ereport(ERROR, + (errmsg("no usable system locales were found"))); +#endif /* not HAVE_LOCALE_T && not WIN32 */ + + PG_RETURN_VOID(); +} diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 1e7d677..c378595 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -1608,42 +1608,6 @@ setup_description(FILE *cmdfd) PG_CMD_PUTS("DROP TABLE tmp_pg_shdescription;\n\n"); } -#ifdef HAVE_LOCALE_T -/* - * "Normalize" a locale name, stripping off encoding tags such as - * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro" - * -> "br_FR@euro"). Return true if a new, different name was - * generated. - */ -static bool -normalize_locale_name(char *new, const char *old) -{ - char *n = new; - const char *o = old; - bool changed = false; - - while (*o) - { - if (*o == '.') - { - /* skip over encoding tag such as ".utf8" or ".UTF-8" */ - o++; - while ((*o >= 'A' && *o <= 'Z') - || (*o >= 'a' && *o <= 'z') - || (*o >= '0' && *o <= '9') - || (*o == '-')) - o++; - changed = true; - } - else - *n++ = *o++; - } - *n = '\0'; - - return changed; -} -#endif /* HAVE_LOCALE_T */ - /* * populate pg_collation */ @@ -1651,134 +1615,10 @@ static void setup_collation(FILE *cmdfd) { #if defined(HAVE_LOCALE_T) && !defined(WIN32) - int i; - FILE *locale_a_handle; - char localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */ - int count = 0; - - locale_a_handle = popen_check("locale -a", "r"); - if (!locale_a_handle) - return; /* complaint already printed */ - - PG_CMD_PUTS("CREATE TEMP TABLE tmp_pg_collation ( " - " collname name, " - " locale name, " - " encoding int) WITHOUT OIDS;\n\n"); - - while (fgets(localebuf, sizeof(localebuf), locale_a_handle)) - { - size_t len; - int enc; - bool skip; - char *quoted_locale; - char alias[NAMEDATALEN]; - - len = strlen(localebuf); - - if (len == 0 || localebuf[len - 1] != '\n') - { - if (debug) - fprintf(stderr, _("%s: locale name too long, skipped: \"%s\"\n"), - progname, localebuf); - continue; - } - localebuf[len - 1] = '\0'; - - /* - * Some systems have locale names that don't consist entirely of ASCII - * letters (such as "bokmål" or "français"). This is - * pretty silly, since we need the locale itself to interpret the - * non-ASCII characters. We can't do much with those, so we filter - * them out. - */ - skip = false; - for (i = 0; i < len; i++) - { - if (IS_HIGHBIT_SET(localebuf[i])) - { - skip = true; - break; - } - } - if (skip) - { - if (debug) - fprintf(stderr, _("%s: locale name has non-ASCII characters, skipped: \"%s\"\n"), - progname, localebuf); - continue; - } - - enc = pg_get_encoding_from_locale(localebuf, debug); - if (enc < 0) - { - /* error message printed by pg_get_encoding_from_locale() */ - continue; - } - if (!PG_VALID_BE_ENCODING(enc)) - continue; /* ignore locales for client-only encodings */ - if (enc == PG_SQL_ASCII) - continue; /* C/POSIX are already in the catalog */ - - count++; - - quoted_locale = escape_quotes(localebuf); - - PG_CMD_PRINTF3("INSERT INTO tmp_pg_collation VALUES (E'%s', E'%s', %d);\n\n", - quoted_locale, quoted_locale, enc); - - /* - * Generate aliases such as "en_US" in addition to "en_US.utf8" for - * ease of use. Note that collation names are unique per encoding - * only, so this doesn't clash with "en_US" for LATIN1, say. - */ - if (normalize_locale_name(alias, localebuf)) - { - char *quoted_alias = escape_quotes(alias); - - PG_CMD_PRINTF3("INSERT INTO tmp_pg_collation VALUES (E'%s', E'%s', %d);\n\n", - quoted_alias, quoted_locale, enc); - free(quoted_alias); - } - free(quoted_locale); - } + PG_CMD_PUTS("SELECT pg_import_system_collations(if_not_exists => false, schema => 'pg_catalog');\n\n"); /* Add an SQL-standard name */ - PG_CMD_PRINTF1("INSERT INTO tmp_pg_collation VALUES ('ucs_basic', 'C', %d);\n\n", PG_UTF8); - - /* - * When copying collations to the final location, eliminate aliases that - * conflict with an existing locale name for the same encoding. For - * example, "br_FR.iso88591" is normalized to "br_FR", both for encoding - * LATIN1. But the unnormalized locale "br_FR" already exists for LATIN1. - * Prefer the alias that matches the OS locale name, else the first locale - * name by sort order (arbitrary choice to be deterministic). - * - * Also, eliminate any aliases that conflict with pg_collation's - * hard-wired entries for "C" etc. - */ - PG_CMD_PUTS("INSERT INTO pg_collation (collname, collnamespace, collowner, collencoding, collcollate, collctype) " - " SELECT DISTINCT ON (collname, encoding)" - " collname, " - " (SELECT oid FROM pg_namespace WHERE nspname = 'pg_catalog') AS collnamespace, " - " (SELECT relowner FROM pg_class WHERE relname = 'pg_collation') AS collowner, " - " encoding, locale, locale " - " FROM tmp_pg_collation" - " WHERE NOT EXISTS (SELECT 1 FROM pg_collation WHERE collname = tmp_pg_collation.collname)" - " ORDER BY collname, encoding, (collname = locale) DESC, locale;\n\n"); - - /* - * Even though the table is temp, drop it explicitly so it doesn't get - * copied into template0/postgres databases. - */ - PG_CMD_PUTS("DROP TABLE tmp_pg_collation;\n\n"); - - pclose(locale_a_handle); - - if (count == 0 && !debug) - { - printf(_("No usable system locales were found.\n")); - printf(_("Use the option \"--debug\" to see details.\n")); - } + PG_CMD_PRINTF2("INSERT INTO pg_collation (collname, collnamespace, collowner, collencoding, collcollate, collctype) VALUES ('ucs_basic', 'pg_catalog'::regnamespace, '%s'::regrole, %d, 'C', 'C');\n\n", escape_quotes(username), PG_UTF8); #endif /* not HAVE_LOCALE_T && not WIN32 */ } diff --git a/src/include/catalog/pg_collation_fn.h b/src/include/catalog/pg_collation_fn.h index 1ea757f..482ba79 100644 --- a/src/include/catalog/pg_collation_fn.h +++ b/src/include/catalog/pg_collation_fn.h @@ -17,7 +17,8 @@ extern Oid CollationCreate(const char *collname, Oid collnamespace, Oid collowner, int32 collencoding, - const char *collcollate, const char *collctype); + const char *collcollate, const char *collctype, + bool if_not_exists); extern void RemoveCollationById(Oid collationOid); #endif /* PG_COLLATION_FN_H */ diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 37e022d..bb8637e 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -5345,6 +5345,9 @@ DESCR("pg_controldata recovery state information as a function"); DATA(insert OID = 3444 ( pg_control_init PGNSP PGUID 12 1 0 0 0 f f f f t f v s 0 0 2249 "" "{23,23,23,23,23,23,23,23,23,16,16,16,23}" "{o,o,o,o,o,o,o,o,o,o,o,o,o}" "{max_data_alignment,database_block_size,blocks_per_segment,wal_block_size,bytes_per_wal_segment,max_identifier_length,max_index_columns,max_toast_chunk_size,large_object_chunk_size,bigint_timestamps,float4_pass_by_value,float8_pass_by_value,data_page_checksum_version}" _null_ _null_ pg_control_init _null_ _null_ _null_ )); DESCR("pg_controldata init state information as a function"); +DATA(insert OID = 3445 ( pg_import_system_collations PGNSP PGUID 12 100 0 0 0 f f f f t f v r 2 0 2278 "4089 16" _null_ _null_ "{schema,if_not_exists}" _null_ _null_ pg_import_system_collations _null_ _null_ _null_ )); +DESCR("import collations from operating system"); + /* * Symbolic values for provolatile column: these indicate whether the result * of a function is dependent *only* on the values of its explicit arguments, diff --git a/src/include/commands/collationcmds.h b/src/include/commands/collationcmds.h index 699ce2f..28ab07f 100644 --- a/src/include/commands/collationcmds.h +++ b/src/include/commands/collationcmds.h @@ -21,4 +21,6 @@ extern ObjectAddress DefineCollation(ParseState *pstate, List *names, List *parameters); extern void IsThereCollationInNamespace(const char *collname, Oid nspOid); +extern Datum pg_import_system_collations(PG_FUNCTION_ARGS); + #endif /* COLLATIONCMDS_H */
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers