From 6cff0486e50f11ede3bdfde76c0f0c7414b5b051 Mon Sep 17 00:00:00 2001
From: Carter Thaxton <carter.thaxton@gmail.com>
Date: Sat, 19 May 2018 01:56:51 -0700
Subject: [PATCH] Add --include-table-data-where option to pg_dump

---
 doc/src/sgml/ref/pg_dump.sgml      | 22 +++++++++++++
 src/bin/pg_dump/pg_dump.c          | 64 +++++++++++++++++++++++++++++++++-----
 src/fe_utils/simple_list.c         | 27 ++++++++++++++--
 src/include/fe_utils/simple_list.h |  4 +++
 4 files changed, 108 insertions(+), 9 deletions(-)

diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml
index 50809b4..e02359e 100644
--- a/doc/src/sgml/ref/pg_dump.sgml
+++ b/doc/src/sgml/ref/pg_dump.sgml
@@ -771,6 +771,28 @@ PostgreSQL documentation
      </varlistentry>
 
      <varlistentry>
+      <term><option>--include-table-data-where=<replaceable class="parameter">table</replaceable>:<replaceable class="parameter">filter_clause</replaceable></option></term>
+      <listitem>
+       <para>
+        When dumping data for any tables matching the <replaceable
+        class="parameter">table</replaceable> pattern, only include rows
+        that meet the <replaceable
+        class="parameter">filter_clause</replaceable> condition.
+        This option is useful when you want to dump only a subset of a particular table.
+        The table pattern is interpreted according to the same rules as for <option>-t</option>.
+        <option>--include-table-data</option> can be given more than once to
+        provide different filters for multiple tables.
+        Note that if multiple options refer to the same table, only the first filter_clause will be applied.
+        If necessary, use quotes in your shell to provide an argument that contains spaces.
+        E.g. --include-table-data-where=mytable:"created_at >= '2018-01-01' AND test = 'f'"
+       </para>
+       <para>
+        To exclude data for all tables in the database, see <option>--schema-only</option>.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
       <term><option>--inserts</option></term>
       <listitem>
        <para>
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index d6ceb72..f8e62c3 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -119,6 +119,8 @@ static SimpleOidList schema_exclude_oids = {NULL, NULL};
 
 static SimpleStringList table_include_patterns = {NULL, NULL};
 static SimpleOidList table_include_oids = {NULL, NULL};
+static SimpleStringList tabledata_include_where_patterns = {NULL, NULL};
+static SimpleOidList tabledata_include_where_oids = {NULL, NULL};
 static SimpleStringList table_exclude_patterns = {NULL, NULL};
 static SimpleOidList table_exclude_oids = {NULL, NULL};
 static SimpleStringList tabledata_exclude_patterns = {NULL, NULL};
@@ -154,7 +156,8 @@ static void expand_schema_name_patterns(Archive *fout,
 static void expand_table_name_patterns(Archive *fout,
 						   SimpleStringList *patterns,
 						   SimpleOidList *oids,
-						   bool strict_names);
+						   bool strict_names,
+						   bool match_data);
 static NamespaceInfo *findNamespace(Archive *fout, Oid nsoid);
 static void dumpTableData(Archive *fout, TableDataInfo *tdinfo);
 static void refreshMatViewData(Archive *fout, TableDataInfo *tdinfo);
@@ -360,6 +363,7 @@ main(int argc, char **argv)
 		{"enable-row-security", no_argument, &dopt.enable_row_security, 1},
 		{"exclude-table-data", required_argument, NULL, 4},
 		{"if-exists", no_argument, &dopt.if_exists, 1},
+		{"include-table-data-where", required_argument, NULL, 8},
 		{"inserts", no_argument, &dopt.dump_inserts, 1},
 		{"lock-wait-timeout", required_argument, NULL, 2},
 		{"no-tablespaces", no_argument, &dopt.outputNoTablespaces, 1},
@@ -562,6 +566,10 @@ main(int argc, char **argv)
 				dosync = false;
 				break;
 
+			case 8:				/* include table(s) data WHERE clause */
+				simple_string_list_append(&tabledata_include_where_patterns, optarg);
+				break;
+
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit_nicely(1);
@@ -761,17 +769,26 @@ main(int argc, char **argv)
 	{
 		expand_table_name_patterns(fout, &table_include_patterns,
 								   &table_include_oids,
-								   strict_names);
+								   strict_names, false);
 		if (table_include_oids.head == NULL)
 			exit_horribly(NULL, "no matching tables were found\n");
 	}
+	if (tabledata_include_where_patterns.head != NULL)
+	{
+		expand_table_name_patterns(fout, &tabledata_include_where_patterns,
+								   &tabledata_include_where_oids,
+								   true, 	/* Always use strict names for WHERE pattern */
+								   true);	/* Match extra data after ':' character in each argument */
+		if (tabledata_include_where_oids.head == NULL)
+			exit_horribly(NULL, "no matching tables were found for WHERE clause\n");
+	}
 	expand_table_name_patterns(fout, &table_exclude_patterns,
 							   &table_exclude_oids,
-							   false);
+							   false, false);
 
 	expand_table_name_patterns(fout, &tabledata_exclude_patterns,
 							   &tabledata_exclude_oids,
-							   false);
+							   false, false);
 
 	/* non-matching exclusion patterns aren't an error */
 
@@ -980,6 +997,8 @@ help(const char *progname)
 			 "                               access to)\n"));
 	printf(_("  --exclude-table-data=TABLE   do NOT dump data for the named table(s)\n"));
 	printf(_("  --if-exists                  use IF EXISTS when dropping objects\n"));
+	printf(_("  --include-table-data-where=TABLE:WHERE_CLAUSE\n"));
+	printf(_("                               only dump selected rows for the given table(s)\n"));
 	printf(_("  --inserts                    dump data as INSERT commands, rather than COPY\n"));
 	printf(_("  --no-comments                do not dump comments\n"));
 	printf(_("  --no-publications            do not dump publications\n"));
@@ -1290,11 +1309,13 @@ expand_schema_name_patterns(Archive *fout,
 /*
  * Find the OIDs of all tables matching the given list of patterns,
  * and append them to the given OID list.
+ * If match_data is set, then each pattern is first split on the ':' character,
+ * and the portion after the colon is appended to the SimpleOidList extra data.
  */
 static void
 expand_table_name_patterns(Archive *fout,
 						   SimpleStringList *patterns, SimpleOidList *oids,
-						   bool strict_names)
+						   bool strict_names, bool match_data)
 {
 	PQExpBuffer query;
 	PGresult   *res;
@@ -1313,6 +1334,17 @@ expand_table_name_patterns(Archive *fout,
 
 	for (cell = patterns->head; cell; cell = cell->next)
 	{
+		/* When match_data is set, split the pattern on the ':' character,
+		 * and treat the second-half as extra data to append to the list.
+		 */
+		char *extra_data = NULL;
+		if (match_data) {
+			char *colon_char = strchr(cell->val, ':');
+			if (colon_char) {
+				*colon_char = '\0';		/* overwrite the colon, terminating the string before it */
+				extra_data = colon_char+1;	/* use remaining portion of the string as extra data */
+			}
+		}
 		/*
 		 * Query must remain ABSOLUTELY devoid of unqualified names.  This
 		 * would be unnecessary given a pg_table_is_visible() variant taking a
@@ -1341,7 +1373,7 @@ expand_table_name_patterns(Archive *fout,
 
 		for (i = 0; i < PQntuples(res); i++)
 		{
-			simple_oid_list_append(oids, atooid(PQgetvalue(res, i, 0)));
+			simple_oid_list_append2(oids, atooid(PQgetvalue(res, i, 0)), extra_data);
 		}
 
 		PQclear(res);
@@ -2232,6 +2264,7 @@ static void
 makeTableDataInfo(DumpOptions *dopt, TableInfo *tbinfo, bool oids)
 {
 	TableDataInfo *tdinfo;
+	char *filter_clause;
 
 	/*
 	 * Nothing to do if we already decided to dump the table.  This will
@@ -2281,7 +2314,24 @@ makeTableDataInfo(DumpOptions *dopt, TableInfo *tbinfo, bool oids)
 	tdinfo->dobj.namespace = tbinfo->dobj.namespace;
 	tdinfo->tdtable = tbinfo;
 	tdinfo->oids = oids;
-	tdinfo->filtercond = NULL;	/* might get set later */
+	tdinfo->filtercond = NULL;
+
+	/*
+	 * --include-table-data-where=<table_name>:<filter_clause> may be provided for this table.
+	 * If provided, filter_clause will be something like "foo < 5", so wrap it in a WHERE clause.
+	 */
+	filter_clause = NULL;
+	if (simple_oid_list_member2(&tabledata_include_where_oids,
+		tbinfo->dobj.catId.oid,
+		(void**) &filter_clause))
+	{
+		if (filter_clause) {
+			tdinfo->filtercond = psprintf("WHERE (%s)", filter_clause);
+		} else {
+			exit_horribly(NULL, "invalid pattern provided for --include-table-data-where\n");
+		}
+	}
+
 	addObjectDependency(&tdinfo->dobj, tbinfo->dobj.dumpId);
 
 	tbinfo->dataObj = tdinfo;
diff --git a/src/fe_utils/simple_list.c b/src/fe_utils/simple_list.c
index ef94b34..b0fded2 100644
--- a/src/fe_utils/simple_list.c
+++ b/src/fe_utils/simple_list.c
@@ -25,11 +25,30 @@
 void
 simple_oid_list_append(SimpleOidList *list, Oid val)
 {
+	simple_oid_list_append2(list, val, NULL);
+}
+
+/*
+ * Is OID present in the list?
+ */
+bool
+simple_oid_list_member(SimpleOidList *list, Oid val)
+{
+	return simple_oid_list_member2(list, val, NULL);
+}
+
+/*
+ * Append an OID to the list, along with extra pointer-sized data.
+ */
+void
+simple_oid_list_append2(SimpleOidList *list, Oid val, void *extra_data)
+{
 	SimpleOidListCell *cell;
 
 	cell = (SimpleOidListCell *) pg_malloc(sizeof(SimpleOidListCell));
 	cell->next = NULL;
 	cell->val = val;
+	cell->extra_data = extra_data;
 
 	if (list->tail)
 		list->tail->next = cell;
@@ -40,16 +59,20 @@ simple_oid_list_append(SimpleOidList *list, Oid val)
 
 /*
  * Is OID present in the list?
+ * Also return extra pointer-sized data by setting extra_data parameter
  */
 bool
-simple_oid_list_member(SimpleOidList *list, Oid val)
+simple_oid_list_member2(SimpleOidList *list, Oid val, void **extra_data)
 {
 	SimpleOidListCell *cell;
 
 	for (cell = list->head; cell; cell = cell->next)
 	{
-		if (cell->val == val)
+		if (cell->val == val) {
+			if (extra_data)
+				*extra_data = cell->extra_data;
 			return true;
+		}
 	}
 	return false;
 }
diff --git a/src/include/fe_utils/simple_list.h b/src/include/fe_utils/simple_list.h
index 9785489..40bc962 100644
--- a/src/include/fe_utils/simple_list.h
+++ b/src/include/fe_utils/simple_list.h
@@ -21,6 +21,7 @@ typedef struct SimpleOidListCell
 {
 	struct SimpleOidListCell *next;
 	Oid			val;
+	void		*extra_data;
 } SimpleOidListCell;
 
 typedef struct SimpleOidList
@@ -47,6 +48,9 @@ typedef struct SimpleStringList
 extern void simple_oid_list_append(SimpleOidList *list, Oid val);
 extern bool simple_oid_list_member(SimpleOidList *list, Oid val);
 
+extern void simple_oid_list_append2(SimpleOidList *list, Oid val, void *extra_data);
+extern bool simple_oid_list_member2(SimpleOidList *list, Oid val, void **extra_data);
+
 extern void simple_string_list_append(SimpleStringList *list, const char *val);
 extern bool simple_string_list_member(SimpleStringList *list, const char *val);
 
-- 
2.10.2

