On Fri, Apr 04, 2025 at 04:11:05PM -0400, Andrew Dunstan wrote:
> Thanks. I have pushed these now with a few further small tweaks.

This drops all databases:

pg_dumpall --clean -Fd -f /tmp/dump
pg_restore -d template1 --globals-only /tmp/dump

That didn't match my expectations given this help text:

$ pg_restore --help|grep global
  -g, --globals-only           restore only global objects, no databases

This happens in dropDBs().  I found that by searching pg_dumpall.c for "OPF",
which finds all the content we can write to globals.dat.

commit 1495eff wrote:
> --- a/src/bin/pg_dump/pg_dumpall.c
> +++ b/src/bin/pg_dump/pg_dumpall.c

> @@ -1612,9 +1683,27 @@ dumpDatabases(PGconn *conn)
>                       continue;
>               }
>  
> +             /*
> +              * If this is not a plain format dump, then append dboid and 
> dbname to
> +              * the map.dat file.
> +              */
> +             if (archDumpFormat != archNull)
> +             {
> +                     if (archDumpFormat == archCustom)
> +                             snprintf(dbfilepath, MAXPGPATH, 
> "\"%s\"/\"%s\".dmp", db_subdir, oid);
> +                     else if (archDumpFormat == archTar)
> +                             snprintf(dbfilepath, MAXPGPATH, 
> "\"%s\"/\"%s\".tar", db_subdir, oid);
> +                     else
> +                             snprintf(dbfilepath, MAXPGPATH, 
> "\"%s\"/\"%s\"", db_subdir, oid);

Use appendShellString() instead.  Plain mode already does that for the
"pg_dumpall -f" argument, which is part of db_subdir here.  We don't want
weird filename characters to work out differently for plain vs. non-plain
mode.  Also, it's easier to search for appendShellString() than to search for
open-coded shell quoting.

> @@ -1641,19 +1727,30 @@ dumpDatabases(PGconn *conn)
>               if (filename)
>                       fclose(OPF);
>  
> -             ret = runPgDump(dbname, create_opts);
> +             ret = runPgDump(dbname, create_opts, dbfilepath, 
> archDumpFormat);
>               if (ret != 0)
>                       pg_fatal("pg_dump failed on database \"%s\", exiting", 
> dbname);
>  
>               if (filename)
>               {
> -                     OPF = fopen(filename, PG_BINARY_A);
> +                     char            global_path[MAXPGPATH];
> +
> +                     if (archDumpFormat != archNull)
> +                             snprintf(global_path, MAXPGPATH, 
> "%s/global.dat", filename);
> +                     else
> +                             snprintf(global_path, MAXPGPATH, "%s", 
> filename);
> +
> +                     OPF = fopen(global_path, PG_BINARY_A);
>                       if (!OPF)
>                               pg_fatal("could not re-open the output file 
> \"%s\": %m",
> -                                              filename);
> +                                              global_path);

Minor item: plain mode benefits from reopening, because pg_dump appended to
the plain output file.  There's no analogous need to reopen global.dat, since
just this one process writes to global.dat.

> @@ -1672,17 +1770,36 @@ runPgDump(const char *dbname, const char *create_opts)
>       initPQExpBuffer(&connstrbuf);
>       initPQExpBuffer(&cmd);
>  
> -     printfPQExpBuffer(&cmd, "\"%s\" %s %s", pg_dump_bin,
> -                                       pgdumpopts->data, create_opts);
> -
>       /*
> -      * If we have a filename, use the undocumented plain-append pg_dump
> -      * format.
> +      * If this is not a plain format dump, then append file name and dump
> +      * format to the pg_dump command to get archive dump.
>        */
> -     if (filename)
> -             appendPQExpBufferStr(&cmd, " -Fa ");
> +     if (archDumpFormat != archNull)
> +     {
> +             printfPQExpBuffer(&cmd, "\"%s\" -f %s %s", pg_dump_bin,
> +                                               dbfile, create_opts);
> +
> +             if (archDumpFormat == archDirectory)
> +                     appendPQExpBufferStr(&cmd, "  --format=directory ");
> +             else if (archDumpFormat == archCustom)
> +                     appendPQExpBufferStr(&cmd, "  --format=custom ");
> +             else if (archDumpFormat == archTar)
> +                     appendPQExpBufferStr(&cmd, "  --format=tar ");
> +     }
>       else
> -             appendPQExpBufferStr(&cmd, " -Fp ");
> +     {
> +             printfPQExpBuffer(&cmd, "\"%s\" %s %s", pg_dump_bin,
> +                                               pgdumpopts->data, 
> create_opts);

This uses pgdumpopts for plain mode only, so many pg_dumpall options silently
have no effect in non-plain mode.  Example:

strace -f pg_dumpall --lock-wait-timeout=10 2>&1 >/dev/null | grep exec
strace -f pg_dumpall --lock-wait-timeout=10 -Fd -f /tmp/dump3 2>&1 >/dev/null | 
grep exec

> --- a/src/bin/pg_dump/pg_restore.c
> +++ b/src/bin/pg_dump/pg_restore.c

> +/*
> + * read_one_statement
> + *
> + * This will start reading from passed file pointer using fgetc and read till
> + * semicolon(sql statement terminator for global.dat file)
> + *
> + * EOF is returned if end-of-file input is seen; time to shut down.

What makes it okay to use this particular subset of SQL lexing?

> +/*
> + * get_dbnames_list_to_restore
> + *
> + * This will mark for skipping any entries from dbname_oid_list that pattern 
> match an
> + * entry in the db_exclude_patterns list.
> + *
> + * Returns the number of database to be restored.
> + *
> + */
> +static int
> +get_dbnames_list_to_restore(PGconn *conn,
> +                                                     SimpleOidStringList 
> *dbname_oid_list,
> +                                                     SimpleStringList 
> db_exclude_patterns)
> +{
> +     int                     count_db = 0;
> +     PQExpBuffer query;
> +     PGresult   *res;
> +
> +     query = createPQExpBuffer();
> +
> +     if (!conn)
> +             pg_log_info("considering PATTERN as NAME for --exclude-database 
> option as no db connection while doing pg_restore.");

When do we not have a connection here?  We'd need to document this behavior
variation if it stays, but I'd prefer if we can just rely on having a
connection.

> +             /* If database is already created, then don't set createDB 
> flag. */
> +             if (opts->cparams.dbname)
> +             {
> +                     PGconn     *test_conn;
> +
> +                     test_conn = ConnectDatabase(db_cell->str, NULL, 
> opts->cparams.pghost,
> +                                                                             
> opts->cparams.pgport, opts->cparams.username, TRI_DEFAULT,
> +                                                                             
> false, progname, NULL, NULL, NULL, NULL);
> +                     if (test_conn)
> +                     {
> +                             PQfinish(test_conn);
> +
> +                             /* Use already created database for connection. 
> */
> +                             opts->createDB = 0;
> +                             opts->cparams.dbname = db_cell->str;
> +                     }
> +                     else
> +                     {
> +                             /* we'll have to create it */
> +                             opts->createDB = 1;
> +                             opts->cparams.dbname = connected_db;
> +                     }

In released versions, "pg_restore --create" fails if the database exists, and
pg_restore w/o --create fails unless the database exists.  I think we should
continue that pattern in this new feature.  If not, pg_restore should document
how it treats pg_dumpall-sourced dumps with the "create if not exists"
semantics appearing here.


Reply via email to