Hi
> The above comments are fixed in the attached, as well as a pass over the > docs > and extended tests to actually test matching a foreign server. What do > think > about this version? I'm still not convinced that there aren't more quoting > bugs in the parser, but I've left that intact for now. > This patch is based on the version that you sent 21.9. Just I modified string comparison in keyword detection. If we don't allow support abbreviations of keywords (and I dislike it), then the check of size is necessary. Any other is without change. Regards Pavel > > -- > Daniel Gustafsson https://vmware.com/ > >
diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml index 7682226b99..6e9a56f68c 100644 --- a/doc/src/sgml/ref/pg_dump.sgml +++ b/doc/src/sgml/ref/pg_dump.sgml @@ -789,6 +789,67 @@ PostgreSQL documentation </listitem> </varlistentry> + <varlistentry> + <term><option>--filter=<replaceable class="parameter">filename</replaceable></option></term> + <listitem> + <para> + Filtering rules for which objects to dump are read from the specified + file. Specify <filename>-</filename> to read from + <literal>STDIN</literal>. The file has the following format: +<synopsis> +{ include | exclude } { table | schema | foreign_data | data } <replaceable class="parameter">PATTERN</replaceable> +</synopsis> + </para> + + <para> + The first keyword specifies whether the object is to be included or + excluded, and the second keyword specifies the type of object to be + filtered: + <itemizedlist> + <listitem> + <para><literal>table</literal>: table</para> + </listitem> + <listitem> + <para><literal>schema</literal>: schema</para> + </listitem> + <listitem> + <para><literal>foreign_data</literal>: foreign server</para> + </listitem> + <listitem> + <para><literal>data</literal>: table data</para> + </listitem> + </itemizedlist> + </para> + + <para> + Example: +<programlisting> +include table mytable* +exclude table mytable2 +</programlisting> + With the this filter file, the dump would include all tables with + name starting by <literal>mytable</literal>, but exclude table + <literal>mytable2</literal>. + </para> + + <para> + Lines starting with <literal>#</literal> are considered comments and + are ignored. Comments can be placed after filter as well. Blank lines + are also ignored. + </para> + + <para> + The <option>--filter</option> option works just like the other + options to include or exclude tables (<option>-t</option> or + <option>--table</option>), schemas (<option>-n</option> or + <option>--schema</option>), table data (<option>--exclude-table-data</option>), + or foreign tables (<option>--include-foreign-data</option>). + It isn't possible to exclude a specific foreign table or + to include a specific table's data. + </para> + </listitem> + </varlistentry> + <varlistentry> <term><option>--if-exists</option></term> <listitem> diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index a485fb2d07..ec979b9f6f 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -55,10 +55,12 @@ #include "catalog/pg_trigger_d.h" #include "catalog/pg_type_d.h" #include "common/connect.h" +#include "common/string.h" #include "dumputils.h" #include "fe_utils/option_utils.h" #include "fe_utils/string_utils.h" #include "getopt_long.h" +#include "lib/stringinfo.h" #include "libpq/libpq-fs.h" #include "parallel.h" #include "pg_backup_db.h" @@ -90,6 +92,28 @@ typedef enum OidOptions zeroAsNone = 4 } OidOptions; +/* + * State data for reading filter items from stream. + */ +typedef struct +{ + FILE *fp; + char *filename; + int lineno; +} FilterStateData; + +/* + * List of objects that can be specified in filter file + */ +typedef enum +{ + FILTER_OBJECT_TYPE_NONE, + FILTER_OBJECT_TYPE_TABLE, + FILTER_OBJECT_TYPE_SCHEMA, + FILTER_OBJECT_TYPE_FOREIGN_DATA, + FILTER_OBJECT_TYPE_DATA +} FilterObjectType; + /* global decls */ static bool dosync = true; /* Issue fsync() to make dump durable on disk. */ @@ -308,6 +332,7 @@ static void appendReloptionsArrayAH(PQExpBuffer buffer, const char *reloptions, static char *get_synchronized_snapshot(Archive *fout); static void setupDumpWorker(Archive *AHX); static TableInfo *getRootTableInfo(const TableInfo *tbinfo); +static void read_filters_from_file(char *filename, DumpOptions *dopt); int @@ -380,6 +405,7 @@ main(int argc, char **argv) {"enable-row-security", no_argument, &dopt.enable_row_security, 1}, {"exclude-table-data", required_argument, NULL, 4}, {"extra-float-digits", required_argument, NULL, 8}, + {"filter", required_argument, NULL, 12}, {"if-exists", no_argument, &dopt.if_exists, 1}, {"inserts", no_argument, NULL, 9}, {"lock-wait-timeout", required_argument, NULL, 2}, @@ -613,6 +639,10 @@ main(int argc, char **argv) optarg); break; + case 12: /* filter implementation */ + read_filters_from_file(optarg, &dopt); + break; + default: fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); exit_nicely(1); @@ -1038,6 +1068,8 @@ help(const char *progname) " access to)\n")); printf(_(" --exclude-table-data=PATTERN do NOT dump data for the specified table(s)\n")); printf(_(" --extra-float-digits=NUM override default setting for extra_float_digits\n")); + printf(_(" --filter=FILENAME dump objects and data based on the filter expressions\n" + " in specified file\n")); printf(_(" --if-exists use IF EXISTS when dropping objects\n")); printf(_(" --include-foreign-data=PATTERN\n" " include data of foreign tables on foreign\n" @@ -18979,3 +19011,327 @@ appendReloptionsArrayAH(PQExpBuffer buffer, const char *reloptions, if (!res) pg_log_warning("could not parse reloptions array"); } + +/* + * Print error message and exit. + */ +static void +exit_invalid_filter_format(FilterStateData *fstate, char *message) +{ + if (fstate->fp != stdin) + { + pg_log_error("invalid format of filter file \"%s\" on line %d: %s", + fstate->filename, + fstate->lineno, + message); + + fclose(fstate->fp); + } + else + pg_log_error("invalid format of filter on line %d: %s", + fstate->lineno, + message); + + exit_nicely(1); +} + +/* + * Search keyword (can contains only ascii alphabetic characters) on line. + * Returns NULL, when the line is empty or first char is not alpha + */ +static const char * +filter_get_keyword(const char **line, int *size) +{ + const char *ptr = *line; + const char *result = NULL; + + /* skip initial white spaces */ + while (isspace(*ptr)) + ptr += 1; + + if (isascii(*ptr) && isalpha(*ptr)) + { + result = ptr++; + + while (isascii(*ptr) && (isalpha(*ptr) || *ptr == '_')) + ptr += 1; + + *size = ptr - result; + } + + *line = ptr; + + return result; +} + +/* + * Sets objname to string with object identifier. The line variable holds + * string of last line with object identifier (object name). Returns pointer to + * first char after object name. + */ +static char * +filter_get_object_name(FilterStateData *fstate, + StringInfo line, + char *str, + char **objname) +{ + /* skip white spaces */ + while (isspace(*str)) + str++; + + if (*str == '\0') + exit_invalid_filter_format(fstate, "missing object name"); + + if (*str == '"') + { + PQExpBuffer quoted_name = createPQExpBuffer(); + + appendPQExpBufferChar(quoted_name, '"'); + str++; + + while (1) + { + if (*str == '\0') + { + if (!pg_get_line_buf(fstate->fp, line)) + { + if (ferror(fstate->fp)) + { + pg_log_error("could not read from file \"%s\": %m", fstate->filename); + if (fstate->fp != stdin) + fclose(fstate->fp); + + exit_nicely(1); + } + + exit_invalid_filter_format(fstate,"unexpected end of file"); + } + + str = line->data; + (void) pg_strip_crlf(str); + + appendPQExpBufferChar(quoted_name, '\n'); + fstate->lineno += 1; + } + + if (*str == '"') + { + appendPQExpBufferChar(quoted_name, '"'); + str++; + + if (*str == '"') + { + appendPQExpBufferChar(quoted_name, '"'); + str++; + } + else + break; + } + else if (*str == '\\') + { + str++; + if (*str == 'n') + appendPQExpBufferChar(quoted_name, '\n'); + else if (*str == '\\') + appendPQExpBufferChar(quoted_name, '\\'); + + str++; + } + else + appendPQExpBufferChar(quoted_name, *str++); + } + + *objname = quoted_name->data; + } + else + { + char *startptr = str++; + + /* simple variant, read to end or to first space */ + while (*str && !isspace(*str)) + str++; + + *objname = pnstrdup(startptr, str - startptr); + } + + return str; +} + +/* + * Returns true, when one filter item was successfully read and parsed. + * When object name contains \n chars, then more than one line from input + * file can be processed. Returns false when EOF. Run exit on error. + */ +static bool +read_filter_item(FilterStateData *fstate, + bool *is_include, + char **objname, + FilterObjectType *objtype) +{ + StringInfoData line; + + initStringInfo(&line); + + if (pg_get_line_buf(fstate->fp, &line)) + { + char *str = line.data; + const char *keyword; + int size; + + fstate->lineno += 1; + + (void) pg_strip_crlf(str); + + /* skip initial white spaces */ + while (isspace(*str)) + str++; + + /* + * skip empty lines or lines when first noblank char is hash (comment) + */ + if (*str != '\0' && *str != '#') + { + keyword = filter_get_keyword((const char **) &str, &size); + if (!keyword) + exit_invalid_filter_format(fstate, + "no keyword found (expected \"include\" or \"exclude\")"); + + /* Now we expect sequence of two keywords */ + if (size == 7 && pg_strncasecmp(keyword, "include", 7) == 0) + *is_include = true; + else if (size == 7 && pg_strncasecmp(keyword, "exclude", 7) == 0) + *is_include = false; + else + exit_invalid_filter_format(fstate, + "invalid keyword (expected \"include\" or \"exclude\")"); + + keyword = filter_get_keyword((const char **) &str, &size); + if (!keyword) + exit_invalid_filter_format(fstate, + "no keyword found (expected \"table\", \"schema\", \"foreign_data\" or \"data\")"); + + if (size == 5 && pg_strncasecmp(keyword, "table", 5) == 0) + *objtype = FILTER_OBJECT_TYPE_TABLE; + else if (size == 6 && pg_strncasecmp(keyword, "schema", 6) == 0) + *objtype = FILTER_OBJECT_TYPE_SCHEMA; + else if (size == 12 && pg_strncasecmp(keyword, "foreign_data", 12) == 0) + *objtype = FILTER_OBJECT_TYPE_FOREIGN_DATA; + else if (size == 4 && pg_strncasecmp(keyword, "data", 4) == 0) + *objtype = FILTER_OBJECT_TYPE_DATA; + else + exit_invalid_filter_format(fstate, + "invalid keyword (expected \"table\", \"schema\", \"foreign_data\" or \"data\")"); + + str = filter_get_object_name(fstate, &line, str, objname); + + /* + * check possible content after object identifier. + * Allow comment started by hash. + */ + while (isspace(*str)) + str++; + + if (*str != '\0' && *str != '#') + exit_invalid_filter_format(fstate, + "unexpected chars after object name"); + } + else + { + *objname = NULL; + *objtype = FILTER_OBJECT_TYPE_NONE; + } + + free(line.data); + + return true; + } + + if (ferror(fstate->fp)) + { + pg_log_error("could not read from file \"%s\": %m", fstate->filename); + + if (fstate->fp != stdin) + fclose(fstate->fp); + + exit_nicely(1); + } + + free(line.data); + + return false; +} + +/* + * Read dumped object specification from file + */ +static void +read_filters_from_file(char *filename, DumpOptions *dopt) +{ + FilterStateData fstate; + bool is_include; + char *objname; + FilterObjectType objtype; + + fstate.filename = filename; + fstate.lineno = 0; + + /* use "-" as symbol for stdin */ + if (strcmp(filename, "-") != 0) + { + fstate.fp = fopen(filename, "r"); + if (!fstate.fp) + fatal("could not open the input file \"%s\": %m", + filename); + } + else + fstate.fp = stdin; + + while (read_filter_item(&fstate, &is_include, &objname, &objtype)) + { + if (objtype == FILTER_OBJECT_TYPE_TABLE) + { + if (is_include) + { + simple_string_list_append(&table_include_patterns, objname); + dopt->include_everything = false; + } + else + simple_string_list_append(&table_exclude_patterns, objname); + } + else if (objtype == FILTER_OBJECT_TYPE_SCHEMA) + { + if (is_include) + { + simple_string_list_append(&schema_include_patterns, + objname); + dopt->include_everything = false; + } + else + simple_string_list_append(&schema_exclude_patterns, + objname); + } + else if (objtype == FILTER_OBJECT_TYPE_DATA) + { + if (is_include) + exit_invalid_filter_format(&fstate, + "include filter is not allowed for this type of object"); + else + simple_string_list_append(&tabledata_exclude_patterns, + objname); + } + else if (objtype == FILTER_OBJECT_TYPE_FOREIGN_DATA) + { + if (is_include) + simple_string_list_append(&foreign_servers_include_patterns, + objname); + else + exit_invalid_filter_format(&fstate, + "exclude filter is not allowed for this type of object"); + } + + free(objname); + } + + if (fstate.fp != stdin) + fclose(fstate.fp); +} diff --git a/src/bin/pg_dump/t/004_pg_dump_filterfile.pl b/src/bin/pg_dump/t/004_pg_dump_filterfile.pl new file mode 100644 index 0000000000..adc5675e40 --- /dev/null +++ b/src/bin/pg_dump/t/004_pg_dump_filterfile.pl @@ -0,0 +1,198 @@ +use strict; +use warnings; + +use Config; +use PostgresNode; +use TestLib; +use Test::More tests => 29; + +my $tempdir = TestLib::tempdir; +my $inputfile; + + +my $node = PostgresNode->new('main'); +my $port = $node->port; +my $backupdir = $node->backup_dir; +my $plainfile = "$backupdir/plain.sql"; + +$node->init; +$node->start; + +# Generate test objects +$node->safe_psql('postgres', 'CREATE FOREIGN DATA WRAPPER dummy;'); +$node->safe_psql('postgres', 'CREATE SERVER dummyserver FOREIGN DATA WRAPPER dummy;'); + +$node->safe_psql('postgres', "CREATE TABLE table_one(a varchar)"); +$node->safe_psql('postgres', "CREATE TABLE table_two(a varchar)"); +$node->safe_psql('postgres', "CREATE TABLE table_three(a varchar)"); +$node->safe_psql('postgres', "CREATE TABLE \"strange aaa +name\"(a varchar)"); +$node->safe_psql('postgres', "CREATE TABLE \" +t +t +\"(a int)"); + +$node->safe_psql('postgres', "INSERT INTO table_one VALUES('*** TABLE ONE ***')"); +$node->safe_psql('postgres', "INSERT INTO table_two VALUES('*** TABLE TWO ***')"); +$node->safe_psql('postgres', "INSERT INTO table_three VALUES('*** TABLE THREE ***')"); + +open $inputfile, '>', "$tempdir/inputfile.txt" + or die "unable to open filterfile for writing"; +print $inputfile " include table table_one #comment\n"; +print $inputfile "include table table_two\n"; +print $inputfile "# skip this line\n"; +print $inputfile "\n"; +print $inputfile "exclude data table_one\n"; +close $inputfile; + +my ($cmd, $stdout, $stderr, $result); +command_ok( + [ "pg_dump", '-p', $port, "-f", $plainfile, "--filter=$tempdir/inputfile.txt", 'postgres' ], + "dump tables with filter"); + +my $dump = slurp_file($plainfile); + +ok($dump =~ qr/^CREATE TABLE public.table_one/m, "dumped table one"); +ok($dump =~ qr/^CREATE TABLE public.table_two/m, "dumped table two"); +ok($dump !~ qr/^CREATE TABLE public.table_three/m, "table three not dumped"); +ok($dump !~ qr/^COPY public.table_one/m, "content of table one is not included"); +ok($dump =~ qr/^COPY public.table_two/m, "content of table two is included"); + +open $inputfile, '>', "$tempdir/inputfile.txt" + or die "unable to open filterfile for writing"; +print $inputfile "exclude table table_one\n"; +close $inputfile; + +command_ok( + [ "pg_dump", '-p', $port, "-f", $plainfile, "--filter=$tempdir/inputfile.txt", 'postgres' ], + "dump tables with filter"); + +$dump = slurp_file($plainfile); + +ok($dump !~ qr/^CREATE TABLE public.table_one/m, "table one not dumped"); +ok($dump =~ qr/^CREATE TABLE public.table_two/m, "dumped table two"); +ok($dump =~ qr/^CREATE TABLE public.table_three/m, "dumped table three"); + +open $inputfile, '>', "$tempdir/inputfile.txt" + or die "unable to open filterfile for writing"; +print $inputfile "include table \"strange aaa +name\""; +close $inputfile; + +command_ok( + [ "pg_dump", '-p', $port, "-f", $plainfile, "--filter=$tempdir/inputfile.txt", 'postgres' ], + "dump tables with filter"); + +$dump = slurp_file($plainfile); + +ok($dump =~ qr/^CREATE TABLE public.\"strange aaa/m, "dump table with new line in name"); + +open $inputfile, '>', "$tempdir/inputfile.txt" + or die "unable to open filterfile for writing"; +print $inputfile "exclude table \"strange aaa\\nname\""; +close $inputfile; + +command_ok( + [ "pg_dump", '-p', $port, "-f", $plainfile, "--filter=$tempdir/inputfile.txt", 'postgres' ], + "dump tables with filter"); + +$dump = slurp_file($plainfile); + +ok($dump !~ qr/^CREATE TABLE public.\"strange aaa/m, "dump table with new line in name"); + +open $inputfile, '>', "$tempdir/inputfile.txt" + or die "unable to open filterfile for writing"; +print $inputfile "exclude schema public\n"; +close $inputfile; + +command_ok( + [ "pg_dump", '-p', $port, "-f", $plainfile, "--filter=$tempdir/inputfile.txt", 'postgres' ], + "dump tables with filter"); + +$dump = slurp_file($plainfile); + +ok($dump !~ qr/^CREATE TABLE/m, "no table dumped"); + +open $inputfile, '>', "$tempdir/inputfile.txt" + or die "unable to open filterfile for writing"; +print $inputfile "include table \" +t +t +\""; +close $inputfile; + +command_ok( + [ "pg_dump", '-p', $port, "-f", $plainfile, "--filter=$tempdir/inputfile.txt", 'postgres' ], + "dump tables with filter"); + +$dump = slurp_file($plainfile); + +ok($dump =~ qr/^CREATE TABLE public.\"\nt\nt\n\" \($/ms, "dump table with multiline strange name"); + +open $inputfile, '>', "$tempdir/inputfile.txt" + or die "unable to open filterfile for writing"; +print $inputfile "include table \"\\nt\\nt\\n\""; +close $inputfile; + +command_ok( + [ "pg_dump", '-p', $port, "-f", $plainfile, "--filter=$tempdir/inputfile.txt", 'postgres' ], + "dump tables with filter"); + +$dump = slurp_file($plainfile); + +ok($dump =~ qr/^CREATE TABLE public.\"\nt\nt\n\" \($/ms, "dump table with multiline strange name"); + +######################################### +# Test foreign_data + +open $inputfile, '>', "$tempdir/inputfile.txt" + or die "unable to open filterfile for writing"; +print $inputfile "include foreign_data doesnt_exists\n"; +close $inputfile; + +command_fails_like( + [ "pg_dump", '-p', $port, "-f", $plainfile, "--filter=$tempdir/inputfile.txt", 'postgres' ], + qr/pg_dump: error: no matching foreign servers were found for pattern/, + "dump foreign server"); + +open $inputfile, '>', "$tempdir/inputfile.txt" + or die "unable to open filterfile for writing"; +print $inputfile, "include foreign_data dummy*\n"; +close $inputfile; + +command_ok( + [ "pg_dump", '-p', $port, "-f", $plainfile, "--filter=$tempdir/inputfile.txt", 'postgres' ], + "dump foreign_data with filter"); + +######################################### +# Test broken input format + +open $inputfile, '>', "$tempdir/inputfile.txt" + or die "unable to open filterfile for writing"; +print $inputfile "k"; +close $inputfile; + +command_fails_like( + [ "pg_dump", '-p', $port, "-f", $plainfile, "--filter=$tempdir/inputfile.txt", 'postgres' ], + qr/invalid keyword/, + "broken format check"); + +open $inputfile, '>', "$tempdir/inputfile.txt" + or die "unable to open filterfile for writing"; +print $inputfile "include xxx"; +close $inputfile; + +command_fails_like( + [ "pg_dump", '-p', $port, "-f", $plainfile, "--filter=$tempdir/inputfile.txt", 'postgres' ], + qr/invalid keyword/, + "broken format check"); + +open $inputfile, '>', "$tempdir/inputfile.txt" + or die "unable to open filterfile for writing"; +print $inputfile "include table"; +close $inputfile; + +command_fails_like( + [ "pg_dump", '-p', $port, "-f", $plainfile, "--filter=$tempdir/inputfile.txt", 'postgres' ], + qr/missing object name/, + "broken format check");