Internally pg_dump have capability to filter the table data to dump by same
filter clause but it have no interface to use it and the patch here [1]
adds interface to it but it have at-least two issue, one is error message
in case of incorrect where clause specification is somehow hard to debug
and strange to pg_dump .Other issue is it applies the same filter clause to
multiple tables if pattern matching return multiple tables and it seems
undesired behavior to me because mostly we don’t want to applied the same
where clause specification to multiple table. The attached patch contain a
fix for both issue

[1].
https://www.postgresql.org/message-id/flat/CAGiT_HNav5B=OfCdfyFoqTa+oe5W1vG=pxktetcxxg4kcut...@mail.gmail.com


regards

Surafel
diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml
index 2f0807e912..1c43eaa9de 100644
--- a/doc/src/sgml/ref/pg_dump.sgml
+++ b/doc/src/sgml/ref/pg_dump.sgml
@@ -1103,6 +1103,21 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--where=<replaceable class="parameter">table</replaceable>:<replaceable class="parameter">filter_clause</replaceable></option></term>
+      <listitem>
+       <para>
+        When dumping data for <replaceable class="parameter">table</replaceable>, only include rows
+        that meet the <replaceable class="parameter">filter_clause</replaceable> condition.
+        This option is useful when you want to dump only a subset of a particular table.
+        <option>--where</option> can be given more than once to provide different filters for multiple tables.
+        Note that if multiple options refer to the same table, only the first filter_clause will be applied.
+        If necessary, use quotes in your shell to provide an argument that contains spaces.
+        E.g. --where=mytable:"created_at >= '2018-01-01' AND test = 'f'"
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
        <term><option>-?</option></term>
        <term><option>--help</option></term>
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 89d598f856..566469cdb7 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -121,6 +121,8 @@ static SimpleStringList tabledata_exclude_patterns = {NULL, NULL};
 static SimpleOidList tabledata_exclude_oids = {NULL, NULL};
 static SimpleStringList foreign_servers_include_patterns = {NULL, NULL};
 static SimpleOidList foreign_servers_include_oids = {NULL, NULL};
+static SimpleStringList tabledata_where_patterns = {NULL, NULL};
+static SimpleOidList tabledata_where_oids = {NULL, NULL};
 
 static const CatalogId nilCatalogId = {0, 0};
 
@@ -156,7 +158,8 @@ static void expand_foreign_server_name_patterns(Archive *fout,
 static void expand_table_name_patterns(Archive *fout,
 									   SimpleStringList *patterns,
 									   SimpleOidList *oids,
-									   bool strict_names);
+									   bool strict_names,
+									   bool match_data);
 static NamespaceInfo *findNamespace(Archive *fout, Oid nsoid);
 static void dumpTableData(Archive *fout, TableDataInfo *tdinfo);
 static void refreshMatViewData(Archive *fout, TableDataInfo *tdinfo);
@@ -386,6 +389,7 @@ main(int argc, char **argv)
 		{"on-conflict-do-nothing", no_argument, &dopt.do_nothing, 1},
 		{"rows-per-insert", required_argument, NULL, 10},
 		{"include-foreign-data", required_argument, NULL, 11},
+		{"where", required_argument, NULL, 12},
 
 		{NULL, 0, NULL, 0}
 	};
@@ -603,6 +607,10 @@ main(int argc, char **argv)
 										  optarg);
 				break;
 
+			case 12:				/* table data WHERE clause */
+				simple_string_list_append(&tabledata_where_patterns, optarg);
+				break;
+
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit_nicely(1);
@@ -805,17 +813,26 @@ main(int argc, char **argv)
 	{
 		expand_table_name_patterns(fout, &table_include_patterns,
 								   &table_include_oids,
-								   strict_names);
+								   strict_names, false);
 		if (table_include_oids.head == NULL)
 			fatal("no matching tables were found");
 	}
+
+	if (tabledata_where_patterns.head != NULL)
+	{
+		expand_table_name_patterns(fout, &tabledata_where_patterns,
+								   &tabledata_where_oids,
+								   true, true);
+		if (tabledata_where_oids.head == NULL)
+			fatal("no matching table was found");
+	}
 	expand_table_name_patterns(fout, &table_exclude_patterns,
 							   &table_exclude_oids,
-							   false);
+							   false, false);
 
 	expand_table_name_patterns(fout, &tabledata_exclude_patterns,
 							   &tabledata_exclude_oids,
-							   false);
+							   false, false);
 
 	expand_foreign_server_name_patterns(fout, &foreign_servers_include_patterns,
 										&foreign_servers_include_oids);
@@ -1046,6 +1063,7 @@ help(const char *progname)
 	printf(_("  --use-set-session-authorization\n"
 			 "                               use SET SESSION AUTHORIZATION commands instead of\n"
 			 "                               ALTER OWNER commands to set ownership\n"));
+	printf(_("  --where=TABLE:WHERE_CLAUSE   only dump selected rows for the given table\n"));
 
 	printf(_("\nConnection options:\n"));
 	printf(_("  -d, --dbname=DBNAME      database to dump\n"));
@@ -1393,16 +1411,20 @@ expand_foreign_server_name_patterns(Archive *fout,
 /*
  * Find the OIDs of all tables matching the given list of patterns,
  * and append them to the given OID list. See also expand_dbname_patterns()
- * in pg_dumpall.c
+ * in pg_dumpall.c .If match_data is set, then each pattern is first split on the
+ * ':' character, and the portion after the colon is appended to
+ * the SimpleOidList extra data.
  */
 static void
 expand_table_name_patterns(Archive *fout,
 						   SimpleStringList *patterns, SimpleOidList *oids,
-						   bool strict_names)
+						   bool strict_names, bool match_data)
 {
 	PQExpBuffer query;
 	PGresult   *res;
 	SimpleStringListCell *cell;
+	char *extra_data;
+	char *colon_char;
 	int			i;
 
 	if (patterns->head == NULL)
@@ -1417,6 +1439,19 @@ expand_table_name_patterns(Archive *fout,
 
 	for (cell = patterns->head; cell; cell = cell->next)
 	{
+		/* When match_data is set, split the pattern on the first unquoted ':' character,
+		 * and treat the second-half as extra data to append to the list.
+		 */
+		extra_data = NULL;
+		if (match_data)
+		{
+			colon_char = (char*) findUnquotedChar(cell->val, ':');
+			if (colon_char)
+			{
+				*colon_char = '\0';
+				extra_data = colon_char+1;
+			}
+		}
 		/*
 		 * Query must remain ABSOLUTELY devoid of unqualified names.  This
 		 * would be unnecessary given a pg_table_is_visible() variant taking a
@@ -1443,9 +1478,12 @@ expand_table_name_patterns(Archive *fout,
 		if (strict_names && PQntuples(res) == 0)
 			fatal("no matching tables were found for pattern \"%s\"", cell->val);
 
+		if (extra_data && PQntuples(res) != 1)
+			fatal("multiple matching tables \"%s\" on where clause", cell->val);
+
 		for (i = 0; i < PQntuples(res); i++)
 		{
-			simple_oid_list_append(oids, atooid(PQgetvalue(res, i, 0)));
+			simple_oid_list_append_data(oids, atooid(PQgetvalue(res, i, 0)), extra_data);
 		}
 
 		PQclear(res);
@@ -1872,6 +1910,7 @@ dumpTableData_copy(Archive *fout, void *dcontext)
 	 */
 	if (tdinfo->filtercond || tbinfo->relkind == RELKIND_FOREIGN_TABLE)
 	{
+		ArchiveHandle *AH = (ArchiveHandle *) fout;
 		/* Note: this syntax is only supported in 8.2 and up */
 		appendPQExpBufferStr(q, "COPY (SELECT ");
 		/* klugery to get rid of parens in column list */
@@ -1886,14 +1925,17 @@ dumpTableData_copy(Archive *fout, void *dcontext)
 		appendPQExpBuffer(q, "FROM %s %s) TO stdout;",
 						  fmtQualifiedDumpable(tbinfo),
 						  tdinfo->filtercond ? tdinfo->filtercond : "");
+		res = PQexec(AH->connection, q->data);
+		if (PQresultStatus(res) != PGRES_COPY_OUT)
+			fatal("processing of table \"%s\" failed", fmtQualifiedDumpable(tbinfo));
 	}
 	else
 	{
 		appendPQExpBuffer(q, "COPY %s %s TO stdout;",
 						  fmtQualifiedDumpable(tbinfo),
 						  column_list);
+		res = ExecuteSqlQuery(fout, q->data, PGRES_COPY_OUT);
 	}
-	res = ExecuteSqlQuery(fout, q->data, PGRES_COPY_OUT);
 	PQclear(res);
 	destroyPQExpBuffer(clistBuf);
 
@@ -2012,9 +2054,20 @@ dumpTableData_insert(Archive *fout, void *dcontext)
 					  "SELECT * FROM ONLY %s",
 					  fmtQualifiedDumpable(tbinfo));
 	if (tdinfo->filtercond)
+	{
+		ArchiveHandle *AH = (ArchiveHandle *) fout;
+		PGresult   *result;
+
 		appendPQExpBuffer(q, " %s", tdinfo->filtercond);
+		result = PQexec(AH->connection, q->data);
 
-	ExecuteSqlStatement(fout, q->data);
+		if (PQresultStatus(result) != PGRES_COMMAND_OK)
+			fatal("processing of table \"%s\" failed", fmtQualifiedDumpable(tbinfo));
+
+		PQclear(result);
+	}
+	else
+		ExecuteSqlStatement(fout, q->data);
 
 	while (1)
 	{
@@ -2390,6 +2443,7 @@ static void
 makeTableDataInfo(DumpOptions *dopt, TableInfo *tbinfo)
 {
 	TableDataInfo *tdinfo;
+	char *filter_clause;
 
 	/*
 	 * Nothing to do if we already decided to dump the table.  This will
@@ -2442,6 +2496,22 @@ makeTableDataInfo(DumpOptions *dopt, TableInfo *tbinfo)
 	tdinfo->dobj.namespace = tbinfo->dobj.namespace;
 	tdinfo->tdtable = tbinfo;
 	tdinfo->filtercond = NULL;	/* might get set later */
+
+	/*
+	 * --where=<table_name>:<filter_clause> may be provided for this table.
+	 * If provided, filter_clause will be something like "foo < 5", so wrap it in a WHERE clause.
+	 */
+	filter_clause = NULL;
+	if (simple_oid_list_find_data(&tabledata_where_oids,
+		tbinfo->dobj.catId.oid,
+		(void**) &filter_clause))
+	{
+		if (filter_clause)
+		{
+			tdinfo->filtercond = psprintf("WHERE (%s)", filter_clause);
+		}
+	}
+
 	addObjectDependency(&tdinfo->dobj, tbinfo->dobj.dumpId);
 
 	tbinfo->dataObj = tdinfo;
diff --git a/src/fe_utils/simple_list.c b/src/fe_utils/simple_list.c
index 5294fba8bb..92b3162ab9 100644
--- a/src/fe_utils/simple_list.c
+++ b/src/fe_utils/simple_list.c
@@ -24,12 +24,31 @@
  */
 void
 simple_oid_list_append(SimpleOidList *list, Oid val)
+{
+	simple_oid_list_append_data(list, val, NULL);
+}
+
+/*
+ * Is OID present in the list?
+ */
+bool
+simple_oid_list_member(SimpleOidList *list, Oid val)
+{
+	return simple_oid_list_find_data(list, val, NULL);
+}
+
+/*
+ * Append an OID to the list, along with extra pointer-sized data.
+ */
+void
+simple_oid_list_append_data(SimpleOidList *list, Oid val, void *extra_data)
 {
 	SimpleOidListCell *cell;
 
 	cell = (SimpleOidListCell *) pg_malloc(sizeof(SimpleOidListCell));
 	cell->next = NULL;
 	cell->val = val;
+	cell->extra_data = extra_data;
 
 	if (list->tail)
 		list->tail->next = cell;
@@ -40,16 +59,22 @@ simple_oid_list_append(SimpleOidList *list, Oid val)
 
 /*
  * Is OID present in the list?
+ * If so, return true, and provide pointer-sized data by setting result of extra_data parameter.
+ * If not, return false.
  */
 bool
-simple_oid_list_member(SimpleOidList *list, Oid val)
+simple_oid_list_find_data(SimpleOidList *list, Oid val, void **extra_data)
 {
 	SimpleOidListCell *cell;
 
 	for (cell = list->head; cell; cell = cell->next)
 	{
 		if (cell->val == val)
+		{
+			if (extra_data)
+				*extra_data = cell->extra_data;
 			return true;
+		}
 	}
 	return false;
 }
diff --git a/src/fe_utils/string_utils.c b/src/fe_utils/string_utils.c
index b4bba26934..d4d5f165c9 100644
--- a/src/fe_utils/string_utils.c
+++ b/src/fe_utils/string_utils.c
@@ -1027,3 +1027,34 @@ processSQLNamePattern(PGconn *conn, PQExpBuffer buf, const char *pattern,
 	return added_clause;
 #undef WHEREAND
 }
+
+/*
+ * findUnquotedChar
+ *
+ * Scan a string and return a pointer to the first character that
+ * matches a given character, which isn't in double-quotes.
+ *
+ * For example, searching for a colon inside of:
+ *    'hello"this:is:in:quotes"but:this:is:not'
+ * would return a pointer to this ^ colon.  (the one after 'but')
+ *
+ * Returns NULL if not found.
+ *
+ * str: string to scan.
+ * find_char: character to find.
+ */
+const char *
+findUnquotedChar(const char *str, char find_char)
+{
+	const char *p = str;
+	bool inquotes = false;
+
+	while (*p++)
+	{
+		if (*p == '"')
+			inquotes = !inquotes;
+		else if ((*p == find_char) && !inquotes)
+			return p;
+	}
+	return NULL;
+}
diff --git a/src/include/fe_utils/simple_list.h b/src/include/fe_utils/simple_list.h
index db04e677f2..b300c02874 100644
--- a/src/include/fe_utils/simple_list.h
+++ b/src/include/fe_utils/simple_list.h
@@ -21,6 +21,7 @@ typedef struct SimpleOidListCell
 {
 	struct SimpleOidListCell *next;
 	Oid			val;
+	void		*extra_data;
 } SimpleOidListCell;
 
 typedef struct SimpleOidList
@@ -59,6 +60,9 @@ extern void simple_oid_list_append(SimpleOidList *list, Oid val);
 extern bool simple_oid_list_member(SimpleOidList *list, Oid val);
 extern void simple_oid_list_destroy(SimpleOidList *list);
 
+extern void simple_oid_list_append_data(SimpleOidList *list, Oid val, void *extra_data);
+extern bool simple_oid_list_find_data(SimpleOidList *list, Oid val, void **extra_data);
+
 extern void simple_string_list_append(SimpleStringList *list, const char *val);
 extern bool simple_string_list_member(SimpleStringList *list, const char *val);
 extern void simple_string_list_destroy(SimpleStringList *list);
diff --git a/src/include/fe_utils/string_utils.h b/src/include/fe_utils/string_utils.h
index 5924d3248a..3aee484650 100644
--- a/src/include/fe_utils/string_utils.h
+++ b/src/include/fe_utils/string_utils.h
@@ -56,4 +56,6 @@ extern bool processSQLNamePattern(PGconn *conn, PQExpBuffer buf,
 								  const char *schemavar, const char *namevar,
 								  const char *altnamevar, const char *visibilityrule);
 
+extern const char *findUnquotedChar(const char *str, char find_char);
+
 #endif							/* STRING_UTILS_H */

Reply via email to