From a7474069dad4616639d41d79edf3d844899c5b21 Mon Sep 17 00:00:00 2001
From: Carter Thaxton <carter.thaxton@gmail.com>
Date: Sat, 19 May 2018 01:56:51 -0700
Subject: [PATCH] Add --where option to pg_dump

---
 doc/src/sgml/ref/pg_dump.sgml       | 18 ++++++++++
 src/bin/pg_dump/pg_dump.c           | 65 +++++++++++++++++++++++++++++++++----
 src/fe_utils/simple_list.c          | 28 ++++++++++++++--
 src/fe_utils/string_utils.c         | 30 +++++++++++++++++
 src/include/fe_utils/simple_list.h  |  4 +++
 src/include/fe_utils/string_utils.h |  2 ++
 6 files changed, 138 insertions(+), 9 deletions(-)

diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml
index 50809b4..ed850de 100644
--- a/doc/src/sgml/ref/pg_dump.sgml
+++ b/doc/src/sgml/ref/pg_dump.sgml
@@ -1040,6 +1040,24 @@ PostgreSQL documentation
      </varlistentry>
 
      <varlistentry>
+      <term><option>--where=<replaceable class="parameter">table</replaceable>:<replaceable class="parameter">filter_clause</replaceable></option></term>
+      <listitem>
+       <para>
+        When dumping data for any tables matching the <replaceable
+        class="parameter">table</replaceable> pattern, only include rows
+        that meet the <replaceable class="parameter">filter_clause</replaceable> condition.
+        This option is useful when you want to dump only a subset of a particular table.
+        The table pattern is interpreted according to the same rules as for <option>-t</option>.
+        <option>--where</option> can be given more than once to
+        provide different filters for multiple tables.
+        Note that if multiple options refer to the same table, only the first filter_clause will be applied.
+        If necessary, use quotes in your shell to provide an argument that contains spaces.
+        E.g. --where=mytable:"created_at >= '2018-01-01' AND test = 'f'"
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
        <term><option>-?</option></term>
        <term><option>--help</option></term>
        <listitem>
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index d6ceb72..3ea5cdf 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -123,6 +123,8 @@ static SimpleStringList table_exclude_patterns = {NULL, NULL};
 static SimpleOidList table_exclude_oids = {NULL, NULL};
 static SimpleStringList tabledata_exclude_patterns = {NULL, NULL};
 static SimpleOidList tabledata_exclude_oids = {NULL, NULL};
+static SimpleStringList tabledata_where_patterns = {NULL, NULL};
+static SimpleOidList tabledata_where_oids = {NULL, NULL};
 
 
 char		g_opaque_type[10];	/* name for the opaque type */
@@ -154,7 +156,8 @@ static void expand_schema_name_patterns(Archive *fout,
 static void expand_table_name_patterns(Archive *fout,
 						   SimpleStringList *patterns,
 						   SimpleOidList *oids,
-						   bool strict_names);
+						   bool strict_names,
+						   bool match_data);
 static NamespaceInfo *findNamespace(Archive *fout, Oid nsoid);
 static void dumpTableData(Archive *fout, TableDataInfo *tdinfo);
 static void refreshMatViewData(Archive *fout, TableDataInfo *tdinfo);
@@ -371,6 +374,7 @@ main(int argc, char **argv)
 		{"snapshot", required_argument, NULL, 6},
 		{"strict-names", no_argument, &strict_names, 1},
 		{"use-set-session-authorization", no_argument, &dopt.use_setsessauth, 1},
+		{"where", required_argument, NULL, 8},
 		{"no-comments", no_argument, &dopt.no_comments, 1},
 		{"no-publications", no_argument, &dopt.no_publications, 1},
 		{"no-security-labels", no_argument, &dopt.no_security_labels, 1},
@@ -562,6 +566,10 @@ main(int argc, char **argv)
 				dosync = false;
 				break;
 
+			case 8:				/* table(s) data WHERE clause */
+				simple_string_list_append(&tabledata_where_patterns, optarg);
+				break;
+
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit_nicely(1);
@@ -761,17 +769,26 @@ main(int argc, char **argv)
 	{
 		expand_table_name_patterns(fout, &table_include_patterns,
 								   &table_include_oids,
-								   strict_names);
+								   strict_names, false);
 		if (table_include_oids.head == NULL)
 			exit_horribly(NULL, "no matching tables were found\n");
 	}
+	if (tabledata_where_patterns.head != NULL)
+	{
+		expand_table_name_patterns(fout, &tabledata_where_patterns,
+								   &tabledata_where_oids,
+								   true, 	/* Always use strict names for WHERE pattern */
+								   true);	/* Match extra data after ':' character in each argument */
+		if (tabledata_where_oids.head == NULL)
+			exit_horribly(NULL, "no matching tables were found for WHERE clause\n");
+	}
 	expand_table_name_patterns(fout, &table_exclude_patterns,
 							   &table_exclude_oids,
-							   false);
+							   false, false);
 
 	expand_table_name_patterns(fout, &tabledata_exclude_patterns,
 							   &tabledata_exclude_oids,
-							   false);
+							   false, false);
 
 	/* non-matching exclusion patterns aren't an error */
 
@@ -998,6 +1015,7 @@ help(const char *progname)
 	printf(_("  --use-set-session-authorization\n"
 			 "                               use SET SESSION AUTHORIZATION commands instead of\n"
 			 "                               ALTER OWNER commands to set ownership\n"));
+	printf(_("  --where=TABLE:WHERE_CLAUSE   only dump selected rows for the given table(s)\n"));
 
 	printf(_("\nConnection options:\n"));
 	printf(_("  -d, --dbname=DBNAME      database to dump\n"));
@@ -1290,15 +1308,19 @@ expand_schema_name_patterns(Archive *fout,
 /*
  * Find the OIDs of all tables matching the given list of patterns,
  * and append them to the given OID list.
+ * If match_data is set, then each pattern is first split on the ':' character,
+ * and the portion after the colon is appended to the SimpleOidList extra data.
  */
 static void
 expand_table_name_patterns(Archive *fout,
 						   SimpleStringList *patterns, SimpleOidList *oids,
-						   bool strict_names)
+						   bool strict_names, bool match_data)
 {
 	PQExpBuffer query;
 	PGresult   *res;
 	SimpleStringListCell *cell;
+	char *extra_data;
+	char *colon_char;
 	int			i;
 
 	if (patterns->head == NULL)
@@ -1313,6 +1335,17 @@ expand_table_name_patterns(Archive *fout,
 
 	for (cell = patterns->head; cell; cell = cell->next)
 	{
+		/* When match_data is set, split the pattern on the first unquoted ':' character,
+		 * and treat the second-half as extra data to append to the list.
+		 */
+		extra_data = NULL;
+		if (match_data) {
+			colon_char = (char*) findUnquotedChar(cell->val, ':');
+			if (colon_char) {
+				*colon_char = '\0';		/* overwrite the colon, terminating the string before it */
+				extra_data = colon_char+1;	/* use remaining portion of the string as extra data */
+			}
+		}
 		/*
 		 * Query must remain ABSOLUTELY devoid of unqualified names.  This
 		 * would be unnecessary given a pg_table_is_visible() variant taking a
@@ -1341,7 +1374,7 @@ expand_table_name_patterns(Archive *fout,
 
 		for (i = 0; i < PQntuples(res); i++)
 		{
-			simple_oid_list_append(oids, atooid(PQgetvalue(res, i, 0)));
+			simple_oid_list_append_data(oids, atooid(PQgetvalue(res, i, 0)), extra_data);
 		}
 
 		PQclear(res);
@@ -2232,6 +2265,7 @@ static void
 makeTableDataInfo(DumpOptions *dopt, TableInfo *tbinfo, bool oids)
 {
 	TableDataInfo *tdinfo;
+	char *filter_clause;
 
 	/*
 	 * Nothing to do if we already decided to dump the table.  This will
@@ -2281,7 +2315,24 @@ makeTableDataInfo(DumpOptions *dopt, TableInfo *tbinfo, bool oids)
 	tdinfo->dobj.namespace = tbinfo->dobj.namespace;
 	tdinfo->tdtable = tbinfo;
 	tdinfo->oids = oids;
-	tdinfo->filtercond = NULL;	/* might get set later */
+	tdinfo->filtercond = NULL;
+
+	/*
+	 * --where=<table_name>:<filter_clause> may be provided for this table.
+	 * If provided, filter_clause will be something like "foo < 5", so wrap it in a WHERE clause.
+	 */
+	filter_clause = NULL;
+	if (simple_oid_list_find_data(&tabledata_where_oids,
+		tbinfo->dobj.catId.oid,
+		(void**) &filter_clause))
+	{
+		if (filter_clause) {
+			tdinfo->filtercond = psprintf("WHERE (%s)", filter_clause);
+		} else {
+			exit_horribly(NULL, "invalid pattern provided for --where\n");
+		}
+	}
+
 	addObjectDependency(&tdinfo->dobj, tbinfo->dobj.dumpId);
 
 	tbinfo->dataObj = tdinfo;
diff --git a/src/fe_utils/simple_list.c b/src/fe_utils/simple_list.c
index ef94b34..cd7812d 100644
--- a/src/fe_utils/simple_list.c
+++ b/src/fe_utils/simple_list.c
@@ -25,11 +25,30 @@
 void
 simple_oid_list_append(SimpleOidList *list, Oid val)
 {
+	simple_oid_list_append_data(list, val, NULL);
+}
+
+/*
+ * Is OID present in the list?
+ */
+bool
+simple_oid_list_member(SimpleOidList *list, Oid val)
+{
+	return simple_oid_list_find_data(list, val, NULL);
+}
+
+/*
+ * Append an OID to the list, along with extra pointer-sized data.
+ */
+void
+simple_oid_list_append_data(SimpleOidList *list, Oid val, void *extra_data)
+{
 	SimpleOidListCell *cell;
 
 	cell = (SimpleOidListCell *) pg_malloc(sizeof(SimpleOidListCell));
 	cell->next = NULL;
 	cell->val = val;
+	cell->extra_data = extra_data;
 
 	if (list->tail)
 		list->tail->next = cell;
@@ -40,16 +59,21 @@ simple_oid_list_append(SimpleOidList *list, Oid val)
 
 /*
  * Is OID present in the list?
+ * If so, return true, and provide pointer-sized data by setting result of extra_data parameter.
+ * If not, return false.
  */
 bool
-simple_oid_list_member(SimpleOidList *list, Oid val)
+simple_oid_list_find_data(SimpleOidList *list, Oid val, void **extra_data)
 {
 	SimpleOidListCell *cell;
 
 	for (cell = list->head; cell; cell = cell->next)
 	{
-		if (cell->val == val)
+		if (cell->val == val) {
+			if (extra_data)
+				*extra_data = cell->extra_data;
 			return true;
+		}
 	}
 	return false;
 }
diff --git a/src/fe_utils/string_utils.c b/src/fe_utils/string_utils.c
index b47a396..0a71bd9 100644
--- a/src/fe_utils/string_utils.c
+++ b/src/fe_utils/string_utils.c
@@ -1019,3 +1019,33 @@ processSQLNamePattern(PGconn *conn, PQExpBuffer buf, const char *pattern,
 	return added_clause;
 #undef WHEREAND
 }
+
+/*
+ * findUnquotedChar
+ *
+ * Scan a string and return a pointer to the first character that
+ * matches a given character, which isn't in double-quotes.
+ *
+ * For example, searching for a colon inside of:
+ *    'hello"this:is:in:quotes"but:this:is:not'
+ * would return a pointer to this ^ colon.  (the one after 'but')
+ *
+ * Returns NULL if not found.
+ *
+ * str: string to scan.
+ * find_char: character to find.
+ */
+const char *
+findUnquotedChar(const char *str, char find_char)
+{
+	const char *p = str;
+	bool inquotes = false;
+
+	while (*p++) {
+		if (*p == '"')
+			inquotes = !inquotes;
+		else if ((*p == find_char) && !inquotes)
+			return p;
+	}
+	return NULL;
+}
diff --git a/src/include/fe_utils/simple_list.h b/src/include/fe_utils/simple_list.h
index 9785489..95b0a8e 100644
--- a/src/include/fe_utils/simple_list.h
+++ b/src/include/fe_utils/simple_list.h
@@ -21,6 +21,7 @@ typedef struct SimpleOidListCell
 {
 	struct SimpleOidListCell *next;
 	Oid			val;
+	void		*extra_data;
 } SimpleOidListCell;
 
 typedef struct SimpleOidList
@@ -47,6 +48,9 @@ typedef struct SimpleStringList
 extern void simple_oid_list_append(SimpleOidList *list, Oid val);
 extern bool simple_oid_list_member(SimpleOidList *list, Oid val);
 
+extern void simple_oid_list_append_data(SimpleOidList *list, Oid val, void *extra_data);
+extern bool simple_oid_list_find_data(SimpleOidList *list, Oid val, void **extra_data);
+
 extern void simple_string_list_append(SimpleStringList *list, const char *val);
 extern bool simple_string_list_member(SimpleStringList *list, const char *val);
 
diff --git a/src/include/fe_utils/string_utils.h b/src/include/fe_utils/string_utils.h
index 9a311e0..ccba60c 100644
--- a/src/include/fe_utils/string_utils.h
+++ b/src/include/fe_utils/string_utils.h
@@ -57,4 +57,6 @@ extern bool processSQLNamePattern(PGconn *conn, PQExpBuffer buf,
 					  const char *schemavar, const char *namevar,
 					  const char *altnamevar, const char *visibilityrule);
 
+extern const char *findUnquotedChar(const char *str, char find_char);
+
 #endif							/* STRING_UTILS_H */
-- 
2.10.2

