[2/3] incubator-hawq git commit: HAWQ-983. Fix bug that minirepro generates wrong dependency order of objects

mli Fri, 26 Aug 2016 00:07:43 -0700

HAWQ-983. Fix bug that minirepro generates wrong dependency order of objects


This patch changes gp_dump_query_oids by traversing the parsed query
tree instead of traversing the query tree struct, which has too
many node type and corner cases to consider. Even it is a little bit
risky to traverse the pased query tree string, we haven't seen any
sign that postgresql upstream is going to change the format.

Add 2 options: relation-oids and function-oids into pg_dump command
line tool to help minirepro dump all the objects in correct order.

In addition, we also fix a minirepro python script bug when column
stats has text type most common values that containing single quote,
minirepro failed to escape that text, which causes sql grammar error
and can't insert that statistics.

Update minirepro to deal with error messages correctly, set PGUSER
as default user, let output file accept relative path.


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/19124b7d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/19124b7d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/19124b7d

Branch: refs/heads/master
Commit: 19124b7da032b61d9215520483b67a109246e363
Parents: a17647b
Author: Haisheng Yuan <hy...@pivotal.io>
Authored: Fri Aug 5 16:50:35 2016 -0500
Committer: Ming LI <m...@pivotal.io>
Committed: Fri Aug 26 15:02:56 2016 +0800

----------------------------------------------------------------------
 src/bin/gpoptutils/gpoptutils.c | 141 +++++++++++------------
 src/bin/pg_dump/pg_dump.c       |  79 ++++++++++++-
 tools/bin/minirepro             | 214 ++++++++++++++++++++++-------------
 3 files changed, 282 insertions(+), 152 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/19124b7d/src/bin/gpoptutils/gpoptutils.c
----------------------------------------------------------------------
diff --git a/src/bin/gpoptutils/gpoptutils.c b/src/bin/gpoptutils/gpoptutils.c
index c6bd7ef..0c0197b 100644
--- a/src/bin/gpoptutils/gpoptutils.c
+++ b/src/bin/gpoptutils/gpoptutils.c
@@ -26,104 +26,85 @@
  *     LANGUAGE C STRICT;
  */
 
+#include "postgres_fe.h"
 #include "postgres.h"
 #include "funcapi.h"
 #include "utils/builtins.h"
-#include "gpopt/utils/nodeutils.h"
 #include "rewrite/rewriteHandler.h"
-#include "c.h"
+#include "tcop/tcopprot.h"
 
-extern
-List *pg_parse_and_rewrite(const char *query_string, Oid *paramTypes, int 
iNumParams);
-
-extern
-List *QueryRewrite(Query *parsetree);
-
-static
-Query *parseSQL(char *szSqlText);
-
-static
-void traverseQueryRTEs(Query *pquery, HTAB *phtab, StringInfoData *buf);
+#define atooid(x)  ((Oid) strtoul((x), NULL, 10))
 
 Datum gp_dump_query_oids(PG_FUNCTION_ARGS);
 
-#ifdef PG_MODULE_MAGIC
 PG_MODULE_MAGIC;
-#endif
-
 PG_FUNCTION_INFO_V1(gp_dump_query_oids);
 
-/*
- * Parse a query given as SQL text.
- */
-static Query *parseSQL(char *sqlText)
-{
-       Assert(sqlText);
-
-       List *queryTree = pg_parse_and_rewrite(sqlText, NULL, 0);
-
-       if (1 != list_length(queryTree))
-       {
-               elog(ERROR, "Cannot parse query. "
-                               "Please make sure the input contains a single 
valid query. \n%s", sqlText);
-       }
-
-       Query *query = (Query *) lfirst(list_head(queryTree));
-
-       return query;
-}
-
-static void traverseQueryRTEs
+static void
+traverseQueryOids
        (
-       Query *pquery,
-       HTAB *phtab,
-       StringInfoData *buf
+       Query          *pquery,
+       HTAB           *relhtab,
+       StringInfoData *relbuf,
+       HTAB           *funchtab,
+       StringInfoData *funcbuf
        )
 {
-       ListCell *plc;
-       bool found;
-       foreach (plc, pquery->rtable)
-       {
-               RangeTblEntry *rte = (RangeTblEntry *) lfirst(plc);
+       bool       found;
+       const char *whitespace = " \t\n\r";
+       char       *query = nodeToString(pquery);
+       char       *token = strtok(query, whitespace);
 
-               switch (rte->rtekind)
+       while (token)
+       {
+               if (pg_strcasecmp(token, ":relid") == 0)
+               {
+                       token = strtok(NULL, whitespace);
+                       if (token)
+                       {
+                               Oid relid = atooid(token);
+                               hash_search(relhtab, (void *)&relid, 
HASH_ENTER, &found);
+                               if (!found)
+                               {
+                                       if (relbuf->len != 0)
+                                               appendStringInfo(relbuf, "%s", 
",");
+                                       appendStringInfo(relbuf, "%u", relid);
+                               }
+                       }
+               }
+               else if (pg_strcasecmp(token, ":funcid") == 0)
                {
-                       case RTE_RELATION:
+                       token = strtok(NULL, whitespace);
+                       if (token)
                        {
-                               hash_search(phtab, (void *)&rte->relid, 
HASH_ENTER, &found);
+                               Oid funcid = atooid(token);
+                               hash_search(funchtab, (void *)&funcid, 
HASH_ENTER, &found);
                                if (!found)
                                {
-                                       if (0 != buf->len)
-                                               appendStringInfo(buf, "%s", ", 
");
-                                       appendStringInfo(buf, "%u", rte->relid);
+                                       if (funcbuf->len != 0)
+                                               appendStringInfo(funcbuf, "%s", 
",");
+                                       appendStringInfo(funcbuf, "%u", funcid);
                                }
                        }
-                               break;
-                       case RTE_SUBQUERY:
-                               traverseQueryRTEs(rte->subquery, phtab, buf);
-                               break;
-                       default:
-                               break;
                }
+
+               token = strtok(NULL, whitespace);
        }
 }
 
 /*
- * Function dumping dependent relation oids for a given SQL text
+ * Function dumping dependent relation & function oids for a given SQL text
  */
 Datum
 gp_dump_query_oids(PG_FUNCTION_ARGS)
 {
-       char *szSqlText = text_to_cstring(PG_GETARG_TEXT_P(0));
+       char *sqlText = text_to_cstring(PG_GETARG_TEXT_P(0));
+       List *queryList = pg_parse_and_rewrite(sqlText, NULL, 0);
+       ListCell *plc;
 
-       Query *pquery = parseSQL(szSqlText);
-       if (CMD_UTILITY == pquery->commandType && T_ExplainStmt == 
pquery->utilityStmt->type)
-       {
-               Query *pqueryExplain = ((ExplainStmt 
*)pquery->utilityStmt)->query;
-               List *plQueryTree = QueryRewrite(pqueryExplain);
-               Assert(1 == list_length(plQueryTree));
-               pquery = (Query *) lfirst(list_head(plQueryTree));
-       }
+       StringInfoData relbuf, funcbuf;
+       initStringInfo(&relbuf);
+       initStringInfo(&funcbuf);
 
        typedef struct OidHashEntry
        {
@@ -134,20 +115,32 @@ gp_dump_query_oids(PG_FUNCTION_ARGS)
        ctl.keysize = sizeof(Oid);
        ctl.entrysize = sizeof(OidHashEntry);
        ctl.hash = oid_hash;
+       HTAB *relhtab = hash_create("relid hash table", 100, &ctl, HASH_ELEM | 
HASH_FUNCTION);
+       HTAB *funchtab = hash_create("funcid hash table", 100, &ctl, HASH_ELEM 
| HASH_FUNCTION);
 
-       StringInfoData buf;
-       initStringInfo(&buf);
+       foreach(plc, queryList)
+       {
+               Query *query = (Query *) lfirst(plc);
+               if (CMD_UTILITY == query->commandType && T_ExplainStmt == 
query->utilityStmt->type)
+               {
+                       Node *queryExplain = ((ExplainStmt 
*)query->utilityStmt)->query;
+                       List *queryTree = pg_analyze_and_rewrite(queryExplain, 
sqlText, NULL, 0);
+                       Assert(1 == list_length(queryTree));
+                       query = (Query *) lfirst(list_head(queryTree));
+               }
+               traverseQueryOids(query, relhtab, &relbuf, funchtab, &funcbuf);
+       }
 
-       HTAB *phtab = hash_create("relid hash table", 100, &ctl, HASH_ELEM | 
HASH_FUNCTION);
-       traverseQueryRTEs(pquery, phtab, &buf);
-       hash_destroy(phtab);
+       hash_destroy(relhtab);
+       hash_destroy(funchtab);
 
        StringInfoData str;
        initStringInfo(&str);
-       appendStringInfo(&str, "{\"relids\": [%s]}", buf.data);
+       appendStringInfo(&str, "{\"relids\": \"%s\", \"funcids\": \"%s\"}", 
relbuf.data, funcbuf.data);
 
        text *result = cstring_to_text(str.data);
-       pfree(buf.data);
+       pfree(relbuf.data);
+       pfree(funcbuf.data);
        pfree(str.data);
 
        PG_RETURN_TEXT_P(result);

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/19124b7d/src/bin/pg_dump/pg_dump.c
----------------------------------------------------------------------
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 5b0db00..3eb154b 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -136,6 +136,10 @@ static SimpleOidList table_include_oids = {NULL, NULL};
 static SimpleStringList table_exclude_patterns = {NULL, NULL};
 static SimpleOidList table_exclude_oids = {NULL, NULL};
 
+static SimpleStringList relid_string_list = {NULL, NULL};
+static SimpleStringList funcid_string_list = {NULL, NULL};
+static SimpleOidList function_include_oids = {NULL, NULL};
+
 /*
  * Indicates whether or not SET SESSION AUTHORIZATION statements should be 
emitted
  * instead of ALTER ... OWNER statements to establish object ownership.
@@ -174,6 +178,8 @@ static void expand_schema_name_patterns(SimpleStringList 
*patterns,
                                                        SimpleOidList *oids);
 static void expand_table_name_patterns(SimpleStringList *patterns,
                                                   SimpleOidList *oids);
+static void expand_oid_patterns(SimpleStringList *patterns,
+                                                  SimpleOidList *oids);
 static NamespaceInfo *findNamespace(Oid nsoid, Oid objoid);
 static void dumpTableData(Archive *fout, TableDataInfo *tdinfo);
 static void dumpComment(Archive *fout, const char *target,
@@ -385,6 +391,8 @@ main(int argc, char **argv)
                {"no-gp-syntax", no_argument, NULL, 2},
                {"pre-data-schema-only", no_argument, &preDataSchemaOnly, 1},
                {"post-data-schema-only", no_argument, &postDataSchemaOnly, 1},
+               {"function-oids", required_argument, NULL, 3},
+               {"relation-oids", required_argument, NULL, 4},
                /* END MPP ADDITION */
                {NULL, 0, NULL, 0}
        };
@@ -585,6 +593,16 @@ main(int argc, char **argv)
                                gp_syntax_option = GPS_DISABLED;
                                break;
 
+                       case 3:
+                               simple_string_list_append(&funcid_string_list, 
optarg);
+                               include_everything = false;
+                               break;
+
+                       case 4:
+                               simple_string_list_append(&relid_string_list, 
optarg);
+                               include_everything = false;
+                               break;
+
                        default:
                                fprintf(stderr, _("Try \"%s --help\" for more 
information.\n"), progname);
                                exit(1);
@@ -794,6 +812,8 @@ main(int argc, char **argv)
        /* non-matching exclusion patterns aren't an error */
 
 
+       expand_oid_patterns(&relid_string_list, &table_include_oids);
+       expand_oid_patterns(&funcid_string_list, &function_include_oids);
 
        /*
         * Dumping blobs is now default unless we saw an inclusion switch or -s
@@ -944,6 +964,8 @@ help(const char *progname)
        /* START MPP ADDITION */
        printf(_("  --gp-syntax                 dump with Greenplum Database 
syntax (default if gpdb)\n"));
        printf(_("  --no-gp-syntax              dump without Greenplum Database 
syntax (default if postgresql)\n"));
+       printf(_("  --function-oids             dump only function(s) of given 
list of oids\n"));
+       printf(_("  --relation-oids             dump only relation(s) of given 
list of oids\n"));
        /* END MPP ADDITION */
 
        printf(_("\nConnection options:\n"));
@@ -1061,6 +1083,39 @@ expand_table_name_patterns(SimpleStringList *patterns, 
SimpleOidList *oids)
 }
 
 /*
+ * Parse the OIDs matching the given list of patterns separated by non-digit
+ * characters, and append them to the given OID list.
+ */
+static void
+expand_oid_patterns(SimpleStringList *patterns, SimpleOidList *oids)
+{
+       SimpleStringListCell *cell;
+
+       if (patterns->head == NULL)
+               return;                                 /* nothing to do */
+
+       for (cell = patterns->head; cell; cell = cell->next)
+       {
+               const char *cp = cell->val;
+               const char *ch = cp;
+               while (*cp)
+               {
+                       if (*cp < '0' || *cp > '9')
+                       {
+                               if (cp != ch)
+                                       simple_oid_list_append(oids, 
atooid(strndup(ch, cp - ch)));
+                               ch = ++cp;
+                       }
+                       else
+                               ++cp;
+               }
+
+               if (cp != ch)
+                       simple_oid_list_append(oids, atooid(strndup(ch, cp - 
ch)));
+       }
+}
+
+/*
  * selectDumpableNamespace: policy-setting subroutine
  *             Mark a namespace as to be dumped or not
  */
@@ -1151,6 +1206,28 @@ selectDumpableType(TypeInfo *tinfo)
 }
 
 /*
+ * selectDumpableFunction: policy-setting subroutine
+ *             Mark a function as to be dumped or not
+ */
+static void
+selectDumpableFunction(FuncInfo *finfo)
+{
+       /*
+        * If specific functions are being dumped, dump just those functions; 
else, dump
+        * according to the parent namespace's dump flag if parent namespace is 
not null;
+        * else, always dump the function.
+        */
+       if (function_include_oids.head != NULL)
+               finfo->dobj.dump = 
simple_oid_list_member(&function_include_oids,
+                                                                               
                   finfo->dobj.catId.oid);
+       else if (finfo->dobj.namespace)
+               finfo->dobj.dump = finfo->dobj.namespace->dobj.dump;
+       else
+               finfo->dobj.dump = true;
+}
+
+
+/*
  * selectDumpableObject: policy-setting subroutine
  *             Mark a generic dumpable object as to be dumped or not
  *
@@ -2790,7 +2867,7 @@ getFuncs(int *numFuncs)
                }
 
                /* Decide whether we want to dump it */
-               selectDumpableObject(&(finfo[i].dobj));
+               selectDumpableFunction(&finfo[i]);
 
                if (strlen(finfo[i].rolname) == 0)
                        write_msg(NULL,

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/19124b7d/tools/bin/minirepro
----------------------------------------------------------------------
diff --git a/tools/bin/minirepro b/tools/bin/minirepro
index 7238cd3..320b9e1 100755
--- a/tools/bin/minirepro
+++ b/tools/bin/minirepro
@@ -16,23 +16,81 @@
 # specific language governing permissions and limitations
 # under the License.
 
-#
-# Dump minimal schema objects and statistics for a given query
-#
+'''
+minirepro utility
+
+USAGE
+
+minirepro <db-name> [-h <master-host>] [-U <username>] [-p <port>]
+  -q <SQL-file> -f <repo-data-file>
+
+minirepro -?
+
+
+DESCRIPTION
+
+For any SQL commands, the minirepro utility generates HAWQ Database
+information for the commands. The information can be analyzed to
+perform root cause analysis.
+
+The minirepro utility reads the input SQL file, passes the input SQL
+command to hawq_toolkit function hawq_dump_query_oids() to get the dependent
+object ids. Then the utility uses pg_dump to dump the object DDLs, and
+queries the system catalog to collect statistics of these relations.
+The information is written to output file. The output includes a minimal
+sets of DDLs and statistics of relations and functions that are related
+to the input SQL commands.
+
+
+PARAMETERS
+
+<db-name>
+  Name of the HAWQ Database.
+
+-h <master-host>
+  HAWQ Database master host. Default is localhost.
+
+-U <username>
+  HAWQ Database user name to log into the database and run the
+  SQL command. Default is the PGUSER environment variable. If PGUSER
+  is not defined, OS user name running the utility is used.
+
+-p <port>
+  Port that is used to connect to HAWQ Database.
+  Default is the PGPORT environment variable. If PGPORT is not defined,
+  the default value is 5432.
+
+-q <SQL-file>
+  A text file that contains SQL commands. The commands can be on
+  multiple lines.
+
+-f <repo-data-file>
+  The output file that contains DDLs and statistics of relations
+  and functions that are related to the SQL commands.
+
+-? Show this help text and exit.
+
+
+EXAMPLE
+
+minirepro gptest -h locahost -U gpadmin -p 4444 -q ~/in.sql -f ~/out.sql
+'''
+
 import os, sys, re, json, platform, subprocess
 from optparse import OptionParser
 from pygresql import pgdb
 from datetime import datetime
 
-prog_version = '%prog 1.0'
+version = '1.10'
 PATH_PREFIX = '/tmp/'
+PGDUMP_FILE = 'pg_dump_out.sql'
 sysnslist = "('pg_toast', 'pg_bitmapindex', 'pg_catalog', 
'information_schema', 'hawq_toolkit', 'pg_aoseg')"
 pgoptions = '-c gp_session_role=utility'
 
 class MRQuery(object):
     def __init__(self):
-        self.relations = {}
-        self.funcs = {}
+        self.schemas = []
+        self.funcids = []
         self.relids = []
 
 def E(query_str):
@@ -51,7 +109,7 @@ def result_iter(cursor, arraysize=1000):
         for result in results:
             yield result
 
-def get_version(cursor):
+def get_server_version(cursor):
     query = "select version()"
     try:
         cursor.execute(query)
@@ -62,7 +120,7 @@ def get_version(cursor):
         sys.exit(1)
 
 def parse_cmd_line():
-    p = OptionParser(usage='Usage: %prog <database> [options]', 
version=prog_version, conflict_handler="resolve")
+    p = OptionParser(usage='Usage: %prog <database> [options]', version='%prog 
'+version, conflict_handler="resolve")
     p.add_option('-?', '--help', action='help', help='Show this help message 
and exit')
     p.add_option('-h', '--host', action='store',
                  dest='host', help='Specify a remote host')
@@ -76,65 +134,73 @@ def parse_cmd_line():
                  help='minirepro output file name')
     return p
 
-def dump_query(cursor, query_file):
+def dump_query(connectionInfo, query_file):
+    (host, port, user, db) = connectionInfo
     print "Extracting metadata from query file %s ..." % query_file
 
     with open(query_file, 'r') as query_f:
         sql_text = query_f.read()
     query = "select hawq_toolkit.hawq_dump_query_oids('%s')" % E(sql_text)
 
-    try:
-        cursor.execute(query)
-        vals = cursor.fetchone()
-        return vals[0]
-    except pgdb.DatabaseError as e:
-        sys.stderr.writelines('\nError while running 
hawq_toolkit.hawq_dump_query_oids(text).\nPlease make sure ' \
-            'the function is installed and the query file contains single 
valid query.\n\n' + str(e) + '\n\n')
+    toolkit_sql = PATH_PREFIX + 'toolkit.sql'
+    with open(toolkit_sql, 'w') as toolkit_f:
+        toolkit_f.write(query)
+
+    query_cmd = "psql %s --pset footer -Atq -h %s -p %s -U %s -f %s" % (db, 
host, port, user, toolkit_sql)
+    print query_cmd
+
+    p = subprocess.Popen(query_cmd, shell=True, stdout=subprocess.PIPE, 
stderr=subprocess.PIPE, env=os.environ)
+
+    if p.wait() is not 0:
+        errormsg = p.communicate()[1]
+        sys.stderr.writelines('\nError when executing function 
hawq_toolkit.hawq_dump_query_oids.\n\n' + errormsg + '\n\n')
         sys.exit(1)
 
-# relation oid will be extracted from the dump string
+    outmsg, errormsg = p.communicate()
+    if errormsg:
+        sys.stderr.writelines('\nError when executing function 
hawq_toolkit.hawq_dump_query_oids.\n\n' + errormsg + '\n\n')
+        sys.exit(1)
+    return outmsg
+
+# relation and function oids will be extracted from the dump string
 def parse_oids(cursor, json_oids):
     result = MRQuery()
-    oids = json.loads(json_oids)['relids']
-    result.relids = [str(x) for x in oids]
+    result.relids = json.loads(json_oids)['relids']
+    result.funcids = json.loads(json_oids)['funcids']
 
     if len(result.relids) == 0:
-        result.relids.append('0')
-    oid_str = ','.join(result.relids)
-    cat_query = "SELECT nspname, relname FROM pg_class, pg_namespace WHERE 
pg_class.relnamespace = pg_namespace.oid " \
-                "AND pg_class.oid IN (%s)" % oid_str
+        result.relids = '0'
+    if len(result.funcids) == 0:
+        result.funcids = '0'
+
+    cat_query = "SELECT distinct(nspname) FROM pg_class c, pg_namespace n 
WHERE " \
+                "c.relnamespace = n.oid AND c.oid IN (%s) " \
+                "AND n.nspname NOT IN %s" % (result.relids, sysnslist)
 
     cursor.execute(cat_query)
+    for vals in result_iter(cursor):
+        result.schemas.append(vals[0])
+
+    cat_query = "SELECT distinct(nspname) FROM pg_proc p, pg_namespace n WHERE 
" \
+                "p.pronamespace = n.oid AND p.oid IN (%s) " \
+                "AND n.nspname NOT IN %s" % (result.funcids, sysnslist)
 
+    cursor.execute(cat_query)
     for vals in result_iter(cursor):
-        schm, relname = vals[0], vals[1]
-        if schm not in result.relations:
-            result.relations[schm] = [relname]
-        else:
-            result.relations[schm].append(relname)
+        result.schemas.append(vals[0])
 
     return result
 
-def pg_dump_object(obj_dict, connectionInfo, envOpts):
-    for schema, table_list in obj_dict.iteritems():
-        out_file = PATH_PREFIX + schema + '.dp.sql'
-        dmp_cmd = 'pg_dump -h %s -p %s -U %s -sxO %s' % connectionInfo
-        dmp_cmd = "%s -t '%s.%s' -f %s" % (dmp_cmd, E(schema), 
E('|'.join(table_list)), E(out_file))
-        print dmp_cmd
-        p = subprocess.Popen(dmp_cmd, shell=True, stderr=subprocess.PIPE, 
env=envOpts)
-        if p.wait() is not 0:
-            sys.stderr.write('\nError while dumping schema.\n\n' + 
p.communicate()[1] + '\n\n')
-            sys.exit(1)
-
-def print_obj_ddl(filename, f_out):
-    if filename.endswith('.dp.sql'):
-        f_path = os.path.join(PATH_PREFIX, filename)
-        with open(f_path, 'r') as f_opened:
-            line_no = 1
-            for line in f_opened:
-                if line_no == 12 or line_no > 16:
-                    f_out.writelines(line)
-                line_no += 1
+def pg_dump_object(mr_query, connectionInfo, envOpts):
+    out_file = PATH_PREFIX + PGDUMP_FILE
+    dmp_cmd = 'pg_dump -h %s -p %s -U %s -sxO %s' % connectionInfo
+    dmp_cmd = "%s --relation-oids %s --function-oids %s -f %s" % \
+        (dmp_cmd, mr_query.relids, mr_query.funcids, E(out_file))
+    print dmp_cmd
+    p = subprocess.Popen(dmp_cmd, shell=True, stderr=subprocess.PIPE, 
env=envOpts)
+    if p.wait() is not 0:
+        sys.stderr.write('\nError while dumping schema.\n\n' + 
p.communicate()[1] + '\n\n')
+        sys.exit(1)
 
 def dump_tuple_count(cur, oid_str, f_out):
     stmt = "SELECT pgc.relname, pgn.nspname, pgc.relpages, pgc.reltuples FROM 
pg_class pgc, pg_namespace pgn " \
@@ -205,7 +271,7 @@ def dump_stats(cur, oid_str, f_out):
             if val is None:
                 val = 'NULL'
             elif isinstance(val, (str, unicode)) and val[0] == '{':
-                val = "E'%s'" % val
+                val = "E'%s'" % E(val)
             rowVals.append('\t{0}::{1}'.format(val, typ))
         f_out.writelines(pstring.format(E(vals[0]), E(vals[2]), 
',\n'.join(rowVals)))
 
@@ -216,12 +282,12 @@ def main():
         parser.error("No database specified")
         exit(1)
 
-    # OK - now let's setup all the arguments & options
+    # setup all the arguments & options
     envOpts = os.environ
     db = args[0]
     host = options.host or platform.node()
-    user = options.user or os.getlogin()
-    port = options.port or envOpts['PGPORT'] or '5432'
+    user = options.user or ('PGUSER' in envOpts and envOpts['PGUSER']) or 
os.getlogin()
+    port = options.port or ('PGPORT' in envOpts and envOpts['PGPORT']) or 
'5432'
     query_file = options.query_file
     output_file = options.output_file
 
@@ -234,12 +300,13 @@ def main():
     if not os.path.isfile(query_file):
         parser.error('Query file %s does not exist.' % query_file)
         exit(1)
+    output_file = os.path.abspath(output_file)
 
     timestamp = generate_timestamp()
     global PATH_PREFIX
     PATH_PREFIX = PATH_PREFIX + timestamp + '/'
 
-    # Create tmp dir if not already there
+    # create tmp dir if not already there
     try:
         os.stat(PATH_PREFIX)
     except:
@@ -253,28 +320,25 @@ def main():
     cursor = conn.cursor()
 
     # get server version, which is dumped to minirepro output file
-    version = get_version(cursor)
+    server_ver = get_server_version(cursor)
 
     """
-    invoke hawq_toolkit UDF, dump relation oids as json text
+    invoke hawq_toolkit UDF, dump object oids as json text
     input: query file name
     output: json oids string
     """
-    json_str = dump_query(cursor, query_file)
+    json_str = dump_query(connectionInfo, query_file)
 
     """
     parse json oids string, collect all things that need to be dumped
     input: json oids string
-    output: MRQuery class (self.relations, self.funcs, self.relids)
+    output: MRQuery class (self.schemas, self.funcids, self.relids)
     """
     mr_query = parse_oids(cursor, json_str)
 
-    # dump tables and views
+    # dump relations and functions
     print "Invoking pg_dump to dump DDL ..."
-    pg_dump_object(mr_query.relations, connectionInfo, envOpts)
-
-    # dump functions
-    # TODO #108977046
+    pg_dump_object(mr_query, connectionInfo, envOpts)
 
     ### start writing out to stdout ###
     output_dir = os.path.dirname(output_file)
@@ -282,44 +346,40 @@ def main():
         os.makedirs(output_dir)
     f_out = open(output_file, 'w')
     ts = datetime.today()
-    f_out.writelines(['-- MiniRepro 1.0',
+    f_out.writelines(['-- MiniRepro ' + version,
                            '\n-- Copyright 2016, The Apache Software 
Foundation'
                            '\n-- Database: ' + db,
                            '\n-- Date:     ' + ts.date().isoformat(),
                            '\n-- Time:     ' + ts.time().isoformat(),
                            '\n-- CmdLine:  ' + ' '.join(sys.argv),
-                           '\n-- Version:  ' + version + '\n\n'])
+                           '\n-- Version:  ' + server_ver + '\n\n'])
 
-    # Now be sure that when we load the rest we are doing it in the right
-    # database
+    # make sure we connect with the right database
     f_out.writelines('\\connect ' + db + '\n\n')
 
     # first create schema DDLs
     print "Writing schema DDLs ..."
-    table_schemas = ["CREATE SCHEMA %s;\n" % E(schema) for schema in 
mr_query.relations if schema != 'public']
+    table_schemas = ["CREATE SCHEMA %s;\n" % E(schema) for schema in 
mr_query.schemas if schema != 'public']
     f_out.writelines(table_schemas)
 
-    # write table & view DDLs
-    print "Writing table & view DDLs ..."
-    for f in os.listdir(PATH_PREFIX):
-        print_obj_ddl(f, f_out)
+    # write relation and function DDLs
+    print "Writing relation and function DDLs ..."
+    with open(PATH_PREFIX + PGDUMP_FILE, 'r') as f_pgdump:
+       f_out.writelines(f_pgdump)
 
-    # Now we have to explicitly allow editing of these pg_class &
-    # pg_statistic tables
+    # explicitly allow editing of these pg_class & pg_statistic tables
     f_out.writelines(['\n-- ',
                            '\n-- Allow system table modifications',
                            '\n-- ',
                            '\nset allow_system_table_mods="DML";\n\n'])
 
-
     # dump table stats
     print "Writing table statistics ..."
-    oid_str = ','.join(mr_query.relids)
-    dump_tuple_count(cursor, oid_str, f_out)
+    dump_tuple_count(cursor, mr_query.relids, f_out)
 
     # dump column stats
     print "Writing column statistics ..."
-    dump_stats(cursor, oid_str, f_out)
+    dump_stats(cursor, mr_query.relids, f_out)
 
     cursor.close()
     conn.close()

[2/3] incubator-hawq git commit: HAWQ-983. Fix bug that minirepro generates wrong dependency order of objects

Reply via email to