[GitHub] incubator-hawq pull request #846: Hawq 991. Add support for "HAWQ register" ...

2016-08-18 Thread xunzhang
Github user xunzhang closed the pull request at:

https://github.com/apache/incubator-hawq/pull/846


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] incubator-hawq pull request #846: Hawq 991. Add support for "HAWQ register" ...

2016-08-17 Thread xunzhang
Github user xunzhang commented on a diff in the pull request:

https://github.com/apache/incubator-hawq/pull/846#discussion_r75249924
  
--- Diff: tools/bin/hawqregister ---
@@ -40,186 +39,195 @@ EXECNAME = os.path.split(__file__)[-1]
 setup_tool_logging(EXECNAME,getLocalHostname(),getUserName())
 
 
-def create_opt_parser(version):
+def option_parser():
 parser = OptParser(option_class=OptChecker,
-   usage='usage: %prog [options] database_name 
table_name file_or_dir_path_in_hdfs',
-   version=version)
+   usage='usage: %prog [options] table_name',
+   version='%prog version $Revision: #1 $')
 parser.remove_option('-h')
 parser.add_option('-?', '--help', action='help')
-parser.add_option('-h', '--host', help="host of the target DB")
-parser.add_option('-p', '--port', help="port of the target DB", 
type='int', default=0)
-parser.add_option('-U', '--user', help="username of the target DB")
-return parser
-
-
-def check_hadoop_command():
-hdfscmd = "hadoop"
-result = local_ssh(hdfscmd);
-if result != 0:
-logger.error("command 'hadoop' is not available, please set 
environment variable $PATH to fix this")
+parser.add_option('-h', '--host', help='host of the target DB')
+parser.add_option('-p', '--port', help='port of the target DB', 
type='int', default=0)
+parser.add_option('-U', '--user', help='username of the target DB')
+parser.add_option('-d', '--database', default = 'postgres', dest = 
'database', help='database name')
+parser.add_option('-f', '--filepath', dest = 'filepath', help='file 
name in HDFS')
+parser.add_option('-c', '--config', dest = 'yml_config', default = '', 
help='configuration file in YAML format')
+return parser.parse_args()
+
+
+def option_parser_yml(yml_file):
+import yaml
+with open(yml_file, 'r') as f:
+params = yaml.load(f)
+if params['FileFormat'] == 'Parquet':
+offset = 
params['Parquet_FileLocations']['Files'][0]['path'].rfind('/')
--- End diff --

Good point. I will rewrite the code around.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] incubator-hawq pull request #846: Hawq 991. Add support for "HAWQ register" ...

2016-08-17 Thread xunzhang
Github user xunzhang commented on a diff in the pull request:

https://github.com/apache/incubator-hawq/pull/846#discussion_r75249501
  
--- Diff: tools/bin/hawqregister ---
@@ -40,186 +39,195 @@ EXECNAME = os.path.split(__file__)[-1]
 setup_tool_logging(EXECNAME,getLocalHostname(),getUserName())
 
 
-def create_opt_parser(version):
+def option_parser():
 parser = OptParser(option_class=OptChecker,
-   usage='usage: %prog [options] database_name 
table_name file_or_dir_path_in_hdfs',
-   version=version)
+   usage='usage: %prog [options] table_name',
+   version='%prog version $Revision: #1 $')
 parser.remove_option('-h')
 parser.add_option('-?', '--help', action='help')
-parser.add_option('-h', '--host', help="host of the target DB")
-parser.add_option('-p', '--port', help="port of the target DB", 
type='int', default=0)
-parser.add_option('-U', '--user', help="username of the target DB")
-return parser
-
-
-def check_hadoop_command():
-hdfscmd = "hadoop"
-result = local_ssh(hdfscmd);
-if result != 0:
-logger.error("command 'hadoop' is not available, please set 
environment variable $PATH to fix this")
+parser.add_option('-h', '--host', help='host of the target DB')
+parser.add_option('-p', '--port', help='port of the target DB', 
type='int', default=0)
+parser.add_option('-U', '--user', help='username of the target DB')
+parser.add_option('-d', '--database', default = 'postgres', dest = 
'database', help='database name')
+parser.add_option('-f', '--filepath', dest = 'filepath', help='file 
name in HDFS')
+parser.add_option('-c', '--config', dest = 'yml_config', default = '', 
help='configuration file in YAML format')
+return parser.parse_args()
+
+
+def option_parser_yml(yml_file):
+import yaml
+with open(yml_file, 'r') as f:
+params = yaml.load(f)
+if params['FileFormat'] == 'Parquet':
+offset = 
params['Parquet_FileLocations']['Files'][0]['path'].rfind('/')
+filepath = params['DFS_URL'] + 
params['Parquet_FileLocations']['Files'][0]['path'][:offset] if 
len(params['Parquet_FileLocations']['Files']) != 1 else params['DFS_URL'] + 
params['Parquet_FileLocations']['Files'][0]['path']
+return 'Parquet', filepath, params['Parquet_Schema'], 
params['Distribution_Policy']
+offset = params['AO_FileLocations']['Files'][0]['path'].rfind('/')
+filepath = params['DFS_URL'] + 
params['AO_FileLocations']['Files'][0]['path'][:offset] if 
len(params['AO_FileLocations']['Files']) != 1 else params['DFS_URL'] + 
params['AO_FileLocations']['Files'][0]['path']
+return 'AO', filepath, params['AO_Schema'], 
params['Distribution_Policy']
+
+
+def create_table(dburl, tablename, schema_info, fmt, distrbution_policy):
+try:
+schema = ','.join([k['name'] + ' ' + k['type'] for k in 
schema_info])
+fmt = 'ROW' if fmt == 'AO' else fmt
+query = 'create table %s(%s) with (appendonly=true, 
orientation=%s) %s;' % (tablename, schema, fmt, distrbution_policy)
+conn = dbconn.connect(dburl, False)
+rows = dbconn.execSQL(conn, query)
+conn.commit()
+except DatabaseError, ex:
+logger.error('Failed to execute query ""%s"' % query)
 sys.exit(1)
 
 
-def get_seg_name(options, databasename, tablename):
+def get_seg_name(dburl, tablename, database, fmt):
 try:
-relfilenode = 0
-relname = ""
-query = ("select pg_class2.relname from pg_class as pg_class1, 
pg_appendonly, pg_class as pg_class2 where pg_class1.relname ='%s' "
- "and  pg_class1.oid = pg_appendonly.relid and 
pg_appendonly.segrelid = pg_class2.oid;") % tablename
-dburl = dbconn.DbURL(hostname=options.host, port=options.port, 
username=options.user, dbname=databasename)
+relname = ''
+tablename = tablename.split('.')[-1]
+query = ("select pg_class2.relname from pg_class as pg_class1, 
pg_appendonly, pg_class as pg_class2 "
+ "where pg_class1.relname ='%s' and pg_class1.oid = 
pg_appendonly.relid and pg_appendonly.segrelid = pg_class2.oid;") % tablename
 conn = dbconn.connect(dburl, True)
 rows = dbconn.execSQL(conn, query)
-   conn.commit()
-if rows.rowcount == 0:
-logger.error("table '%s' not found in db '%s'" % (tablename, 
databasename));
+conn.commit()
+if not rows.rowcount:
+logger.error('table "%s" not found in db "%s"' % (tablename, 
database))
 sys.exit(1)
 for row in rows:
 relname = 

[GitHub] incubator-hawq pull request #846: Hawq 991. Add support for "HAWQ register" ...

2016-08-17 Thread xunzhang
Github user xunzhang commented on a diff in the pull request:

https://github.com/apache/incubator-hawq/pull/846#discussion_r75244483
  
--- Diff: tools/bin/hawqregister ---
@@ -40,186 +39,195 @@ EXECNAME = os.path.split(__file__)[-1]
 setup_tool_logging(EXECNAME,getLocalHostname(),getUserName())
 
 
-def create_opt_parser(version):
+def option_parser():
 parser = OptParser(option_class=OptChecker,
-   usage='usage: %prog [options] database_name 
table_name file_or_dir_path_in_hdfs',
-   version=version)
+   usage='usage: %prog [options] table_name',
+   version='%prog version $Revision: #1 $')
 parser.remove_option('-h')
 parser.add_option('-?', '--help', action='help')
-parser.add_option('-h', '--host', help="host of the target DB")
-parser.add_option('-p', '--port', help="port of the target DB", 
type='int', default=0)
-parser.add_option('-U', '--user', help="username of the target DB")
-return parser
-
-
-def check_hadoop_command():
-hdfscmd = "hadoop"
-result = local_ssh(hdfscmd);
-if result != 0:
-logger.error("command 'hadoop' is not available, please set 
environment variable $PATH to fix this")
+parser.add_option('-h', '--host', help='host of the target DB')
+parser.add_option('-p', '--port', help='port of the target DB', 
type='int', default=0)
+parser.add_option('-U', '--user', help='username of the target DB')
+parser.add_option('-d', '--database', default = 'postgres', dest = 
'database', help='database name')
+parser.add_option('-f', '--filepath', dest = 'filepath', help='file 
name in HDFS')
+parser.add_option('-c', '--config', dest = 'yml_config', default = '', 
help='configuration file in YAML format')
+return parser.parse_args()
+
+
+def option_parser_yml(yml_file):
+import yaml
+with open(yml_file, 'r') as f:
+params = yaml.load(f)
+if params['FileFormat'] == 'Parquet':
+offset = 
params['Parquet_FileLocations']['Files'][0]['path'].rfind('/')
+filepath = params['DFS_URL'] + 
params['Parquet_FileLocations']['Files'][0]['path'][:offset] if 
len(params['Parquet_FileLocations']['Files']) != 1 else params['DFS_URL'] + 
params['Parquet_FileLocations']['Files'][0]['path']
+return 'Parquet', filepath, params['Parquet_Schema'], 
params['Distribution_Policy']
+offset = params['AO_FileLocations']['Files'][0]['path'].rfind('/')
+filepath = params['DFS_URL'] + 
params['AO_FileLocations']['Files'][0]['path'][:offset] if 
len(params['AO_FileLocations']['Files']) != 1 else params['DFS_URL'] + 
params['AO_FileLocations']['Files'][0]['path']
+return 'AO', filepath, params['AO_Schema'], 
params['Distribution_Policy']
+
+
+def create_table(dburl, tablename, schema_info, fmt, distrbution_policy):
+try:
+schema = ','.join([k['name'] + ' ' + k['type'] for k in 
schema_info])
+fmt = 'ROW' if fmt == 'AO' else fmt
+query = 'create table %s(%s) with (appendonly=true, 
orientation=%s) %s;' % (tablename, schema, fmt, distrbution_policy)
+conn = dbconn.connect(dburl, False)
+rows = dbconn.execSQL(conn, query)
+conn.commit()
+except DatabaseError, ex:
+logger.error('Failed to execute query ""%s"' % query)
 sys.exit(1)
 
 
-def get_seg_name(options, databasename, tablename):
+def get_seg_name(dburl, tablename, database, fmt):
 try:
-relfilenode = 0
-relname = ""
-query = ("select pg_class2.relname from pg_class as pg_class1, 
pg_appendonly, pg_class as pg_class2 where pg_class1.relname ='%s' "
- "and  pg_class1.oid = pg_appendonly.relid and 
pg_appendonly.segrelid = pg_class2.oid;") % tablename
-dburl = dbconn.DbURL(hostname=options.host, port=options.port, 
username=options.user, dbname=databasename)
+relname = ''
+tablename = tablename.split('.')[-1]
+query = ("select pg_class2.relname from pg_class as pg_class1, 
pg_appendonly, pg_class as pg_class2 "
+ "where pg_class1.relname ='%s' and pg_class1.oid = 
pg_appendonly.relid and pg_appendonly.segrelid = pg_class2.oid;") % tablename
 conn = dbconn.connect(dburl, True)
 rows = dbconn.execSQL(conn, query)
-   conn.commit()
-if rows.rowcount == 0:
-logger.error("table '%s' not found in db '%s'" % (tablename, 
databasename));
+conn.commit()
+if not rows.rowcount:
+logger.error('table "%s" not found in db "%s"' % (tablename, 
database))
 sys.exit(1)
 for row in rows:
 relname = 

[GitHub] incubator-hawq pull request #846: Hawq 991. Add support for "HAWQ register" ...

2016-08-17 Thread xunzhang
Github user xunzhang commented on a diff in the pull request:

https://github.com/apache/incubator-hawq/pull/846#discussion_r75244123
  
--- Diff: tools/bin/hawqregister ---
@@ -40,186 +39,195 @@ EXECNAME = os.path.split(__file__)[-1]
 setup_tool_logging(EXECNAME,getLocalHostname(),getUserName())
 
 
-def create_opt_parser(version):
+def option_parser():
 parser = OptParser(option_class=OptChecker,
-   usage='usage: %prog [options] database_name 
table_name file_or_dir_path_in_hdfs',
-   version=version)
+   usage='usage: %prog [options] table_name',
+   version='%prog version $Revision: #1 $')
 parser.remove_option('-h')
 parser.add_option('-?', '--help', action='help')
-parser.add_option('-h', '--host', help="host of the target DB")
-parser.add_option('-p', '--port', help="port of the target DB", 
type='int', default=0)
-parser.add_option('-U', '--user', help="username of the target DB")
-return parser
-
-
-def check_hadoop_command():
-hdfscmd = "hadoop"
-result = local_ssh(hdfscmd);
-if result != 0:
-logger.error("command 'hadoop' is not available, please set 
environment variable $PATH to fix this")
+parser.add_option('-h', '--host', help='host of the target DB')
+parser.add_option('-p', '--port', help='port of the target DB', 
type='int', default=0)
+parser.add_option('-U', '--user', help='username of the target DB')
+parser.add_option('-d', '--database', default = 'postgres', dest = 
'database', help='database name')
+parser.add_option('-f', '--filepath', dest = 'filepath', help='file 
name in HDFS')
+parser.add_option('-c', '--config', dest = 'yml_config', default = '', 
help='configuration file in YAML format')
+return parser.parse_args()
+
+
+def option_parser_yml(yml_file):
+import yaml
+with open(yml_file, 'r') as f:
+params = yaml.load(f)
+if params['FileFormat'] == 'Parquet':
+offset = 
params['Parquet_FileLocations']['Files'][0]['path'].rfind('/')
+filepath = params['DFS_URL'] + 
params['Parquet_FileLocations']['Files'][0]['path'][:offset] if 
len(params['Parquet_FileLocations']['Files']) != 1 else params['DFS_URL'] + 
params['Parquet_FileLocations']['Files'][0]['path']
--- End diff --

Sure.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] incubator-hawq pull request #846: Hawq 991. Add support for "HAWQ register" ...

2016-08-17 Thread xunzhang
Github user xunzhang commented on a diff in the pull request:

https://github.com/apache/incubator-hawq/pull/846#discussion_r75243718
  
--- Diff: tools/bin/hawqregister ---
@@ -40,186 +39,195 @@ EXECNAME = os.path.split(__file__)[-1]
 setup_tool_logging(EXECNAME,getLocalHostname(),getUserName())
 
 
-def create_opt_parser(version):
+def option_parser():
 parser = OptParser(option_class=OptChecker,
-   usage='usage: %prog [options] database_name 
table_name file_or_dir_path_in_hdfs',
-   version=version)
+   usage='usage: %prog [options] table_name',
+   version='%prog version $Revision: #1 $')
 parser.remove_option('-h')
 parser.add_option('-?', '--help', action='help')
-parser.add_option('-h', '--host', help="host of the target DB")
-parser.add_option('-p', '--port', help="port of the target DB", 
type='int', default=0)
-parser.add_option('-U', '--user', help="username of the target DB")
-return parser
-
-
-def check_hadoop_command():
-hdfscmd = "hadoop"
-result = local_ssh(hdfscmd);
-if result != 0:
-logger.error("command 'hadoop' is not available, please set 
environment variable $PATH to fix this")
+parser.add_option('-h', '--host', help='host of the target DB')
+parser.add_option('-p', '--port', help='port of the target DB', 
type='int', default=0)
+parser.add_option('-U', '--user', help='username of the target DB')
+parser.add_option('-d', '--database', default = 'postgres', dest = 
'database', help='database name')
+parser.add_option('-f', '--filepath', dest = 'filepath', help='file 
name in HDFS')
+parser.add_option('-c', '--config', dest = 'yml_config', default = '', 
help='configuration file in YAML format')
+return parser.parse_args()
+
+
+def option_parser_yml(yml_file):
+import yaml
+with open(yml_file, 'r') as f:
+params = yaml.load(f)
+if params['FileFormat'] == 'Parquet':
+offset = 
params['Parquet_FileLocations']['Files'][0]['path'].rfind('/')
+filepath = params['DFS_URL'] + 
params['Parquet_FileLocations']['Files'][0]['path'][:offset] if 
len(params['Parquet_FileLocations']['Files']) != 1 else params['DFS_URL'] + 
params['Parquet_FileLocations']['Files'][0]['path']
+return 'Parquet', filepath, params['Parquet_Schema'], 
params['Distribution_Policy']
+offset = params['AO_FileLocations']['Files'][0]['path'].rfind('/')
+filepath = params['DFS_URL'] + 
params['AO_FileLocations']['Files'][0]['path'][:offset] if 
len(params['AO_FileLocations']['Files']) != 1 else params['DFS_URL'] + 
params['AO_FileLocations']['Files'][0]['path']
+return 'AO', filepath, params['AO_Schema'], 
params['Distribution_Policy']
+
+
+def create_table(dburl, tablename, schema_info, fmt, distrbution_policy):
+try:
+schema = ','.join([k['name'] + ' ' + k['type'] for k in 
schema_info])
+fmt = 'ROW' if fmt == 'AO' else fmt
+query = 'create table %s(%s) with (appendonly=true, 
orientation=%s) %s;' % (tablename, schema, fmt, distrbution_policy)
+conn = dbconn.connect(dburl, False)
+rows = dbconn.execSQL(conn, query)
+conn.commit()
+except DatabaseError, ex:
+logger.error('Failed to execute query ""%s"' % query)
 sys.exit(1)
 
 
-def get_seg_name(options, databasename, tablename):
+def get_seg_name(dburl, tablename, database, fmt):
 try:
-relfilenode = 0
-relname = ""
-query = ("select pg_class2.relname from pg_class as pg_class1, 
pg_appendonly, pg_class as pg_class2 where pg_class1.relname ='%s' "
- "and  pg_class1.oid = pg_appendonly.relid and 
pg_appendonly.segrelid = pg_class2.oid;") % tablename
-dburl = dbconn.DbURL(hostname=options.host, port=options.port, 
username=options.user, dbname=databasename)
+relname = ''
+tablename = tablename.split('.')[-1]
+query = ("select pg_class2.relname from pg_class as pg_class1, 
pg_appendonly, pg_class as pg_class2 "
+ "where pg_class1.relname ='%s' and pg_class1.oid = 
pg_appendonly.relid and pg_appendonly.segrelid = pg_class2.oid;") % tablename
 conn = dbconn.connect(dburl, True)
 rows = dbconn.execSQL(conn, query)
-   conn.commit()
-if rows.rowcount == 0:
-logger.error("table '%s' not found in db '%s'" % (tablename, 
databasename));
+conn.commit()
+if not rows.rowcount:
+logger.error('table "%s" not found in db "%s"' % (tablename, 
database))
 sys.exit(1)
 for row in rows:
 relname = 

[GitHub] incubator-hawq pull request #846: Hawq 991. Add support for "HAWQ register" ...

2016-08-17 Thread ictmalili
Github user ictmalili commented on a diff in the pull request:

https://github.com/apache/incubator-hawq/pull/846#discussion_r75243419
  
--- Diff: tools/bin/hawqregister ---
@@ -40,186 +39,195 @@ EXECNAME = os.path.split(__file__)[-1]
 setup_tool_logging(EXECNAME,getLocalHostname(),getUserName())
 
 
-def create_opt_parser(version):
+def option_parser():
 parser = OptParser(option_class=OptChecker,
-   usage='usage: %prog [options] database_name 
table_name file_or_dir_path_in_hdfs',
-   version=version)
+   usage='usage: %prog [options] table_name',
+   version='%prog version $Revision: #1 $')
 parser.remove_option('-h')
 parser.add_option('-?', '--help', action='help')
-parser.add_option('-h', '--host', help="host of the target DB")
-parser.add_option('-p', '--port', help="port of the target DB", 
type='int', default=0)
-parser.add_option('-U', '--user', help="username of the target DB")
-return parser
-
-
-def check_hadoop_command():
-hdfscmd = "hadoop"
-result = local_ssh(hdfscmd);
-if result != 0:
-logger.error("command 'hadoop' is not available, please set 
environment variable $PATH to fix this")
+parser.add_option('-h', '--host', help='host of the target DB')
+parser.add_option('-p', '--port', help='port of the target DB', 
type='int', default=0)
+parser.add_option('-U', '--user', help='username of the target DB')
+parser.add_option('-d', '--database', default = 'postgres', dest = 
'database', help='database name')
+parser.add_option('-f', '--filepath', dest = 'filepath', help='file 
name in HDFS')
+parser.add_option('-c', '--config', dest = 'yml_config', default = '', 
help='configuration file in YAML format')
+return parser.parse_args()
+
+
+def option_parser_yml(yml_file):
+import yaml
+with open(yml_file, 'r') as f:
+params = yaml.load(f)
+if params['FileFormat'] == 'Parquet':
+offset = 
params['Parquet_FileLocations']['Files'][0]['path'].rfind('/')
+filepath = params['DFS_URL'] + 
params['Parquet_FileLocations']['Files'][0]['path'][:offset] if 
len(params['Parquet_FileLocations']['Files']) != 1 else params['DFS_URL'] + 
params['Parquet_FileLocations']['Files'][0]['path']
+return 'Parquet', filepath, params['Parquet_Schema'], 
params['Distribution_Policy']
+offset = params['AO_FileLocations']['Files'][0]['path'].rfind('/')
+filepath = params['DFS_URL'] + 
params['AO_FileLocations']['Files'][0]['path'][:offset] if 
len(params['AO_FileLocations']['Files']) != 1 else params['DFS_URL'] + 
params['AO_FileLocations']['Files'][0]['path']
+return 'AO', filepath, params['AO_Schema'], 
params['Distribution_Policy']
+
+
+def create_table(dburl, tablename, schema_info, fmt, distrbution_policy):
+try:
+schema = ','.join([k['name'] + ' ' + k['type'] for k in 
schema_info])
+fmt = 'ROW' if fmt == 'AO' else fmt
+query = 'create table %s(%s) with (appendonly=true, 
orientation=%s) %s;' % (tablename, schema, fmt, distrbution_policy)
+conn = dbconn.connect(dburl, False)
+rows = dbconn.execSQL(conn, query)
+conn.commit()
+except DatabaseError, ex:
+logger.error('Failed to execute query ""%s"' % query)
 sys.exit(1)
 
 
-def get_seg_name(options, databasename, tablename):
+def get_seg_name(dburl, tablename, database, fmt):
 try:
-relfilenode = 0
-relname = ""
-query = ("select pg_class2.relname from pg_class as pg_class1, 
pg_appendonly, pg_class as pg_class2 where pg_class1.relname ='%s' "
- "and  pg_class1.oid = pg_appendonly.relid and 
pg_appendonly.segrelid = pg_class2.oid;") % tablename
-dburl = dbconn.DbURL(hostname=options.host, port=options.port, 
username=options.user, dbname=databasename)
+relname = ''
+tablename = tablename.split('.')[-1]
+query = ("select pg_class2.relname from pg_class as pg_class1, 
pg_appendonly, pg_class as pg_class2 "
+ "where pg_class1.relname ='%s' and pg_class1.oid = 
pg_appendonly.relid and pg_appendonly.segrelid = pg_class2.oid;") % tablename
 conn = dbconn.connect(dburl, True)
 rows = dbconn.execSQL(conn, query)
-   conn.commit()
-if rows.rowcount == 0:
-logger.error("table '%s' not found in db '%s'" % (tablename, 
databasename));
+conn.commit()
+if not rows.rowcount:
+logger.error('table "%s" not found in db "%s"' % (tablename, 
database))
 sys.exit(1)
 for row in rows:
 relname = 

[GitHub] incubator-hawq pull request #846: Hawq 991. Add support for "HAWQ register" ...

2016-08-17 Thread ictmalili
Github user ictmalili commented on a diff in the pull request:

https://github.com/apache/incubator-hawq/pull/846#discussion_r75243104
  
--- Diff: tools/bin/hawqregister ---
@@ -40,186 +39,195 @@ EXECNAME = os.path.split(__file__)[-1]
 setup_tool_logging(EXECNAME,getLocalHostname(),getUserName())
 
 
-def create_opt_parser(version):
+def option_parser():
 parser = OptParser(option_class=OptChecker,
-   usage='usage: %prog [options] database_name 
table_name file_or_dir_path_in_hdfs',
-   version=version)
+   usage='usage: %prog [options] table_name',
+   version='%prog version $Revision: #1 $')
 parser.remove_option('-h')
 parser.add_option('-?', '--help', action='help')
-parser.add_option('-h', '--host', help="host of the target DB")
-parser.add_option('-p', '--port', help="port of the target DB", 
type='int', default=0)
-parser.add_option('-U', '--user', help="username of the target DB")
-return parser
-
-
-def check_hadoop_command():
-hdfscmd = "hadoop"
-result = local_ssh(hdfscmd);
-if result != 0:
-logger.error("command 'hadoop' is not available, please set 
environment variable $PATH to fix this")
+parser.add_option('-h', '--host', help='host of the target DB')
+parser.add_option('-p', '--port', help='port of the target DB', 
type='int', default=0)
+parser.add_option('-U', '--user', help='username of the target DB')
+parser.add_option('-d', '--database', default = 'postgres', dest = 
'database', help='database name')
+parser.add_option('-f', '--filepath', dest = 'filepath', help='file 
name in HDFS')
+parser.add_option('-c', '--config', dest = 'yml_config', default = '', 
help='configuration file in YAML format')
+return parser.parse_args()
+
+
+def option_parser_yml(yml_file):
+import yaml
+with open(yml_file, 'r') as f:
+params = yaml.load(f)
+if params['FileFormat'] == 'Parquet':
+offset = 
params['Parquet_FileLocations']['Files'][0]['path'].rfind('/')
+filepath = params['DFS_URL'] + 
params['Parquet_FileLocations']['Files'][0]['path'][:offset] if 
len(params['Parquet_FileLocations']['Files']) != 1 else params['DFS_URL'] + 
params['Parquet_FileLocations']['Files'][0]['path']
+return 'Parquet', filepath, params['Parquet_Schema'], 
params['Distribution_Policy']
+offset = params['AO_FileLocations']['Files'][0]['path'].rfind('/')
+filepath = params['DFS_URL'] + 
params['AO_FileLocations']['Files'][0]['path'][:offset] if 
len(params['AO_FileLocations']['Files']) != 1 else params['DFS_URL'] + 
params['AO_FileLocations']['Files'][0]['path']
+return 'AO', filepath, params['AO_Schema'], 
params['Distribution_Policy']
+
+
+def create_table(dburl, tablename, schema_info, fmt, distrbution_policy):
+try:
+schema = ','.join([k['name'] + ' ' + k['type'] for k in 
schema_info])
+fmt = 'ROW' if fmt == 'AO' else fmt
+query = 'create table %s(%s) with (appendonly=true, 
orientation=%s) %s;' % (tablename, schema, fmt, distrbution_policy)
+conn = dbconn.connect(dburl, False)
+rows = dbconn.execSQL(conn, query)
+conn.commit()
+except DatabaseError, ex:
+logger.error('Failed to execute query ""%s"' % query)
 sys.exit(1)
 
 
-def get_seg_name(options, databasename, tablename):
+def get_seg_name(dburl, tablename, database, fmt):
 try:
-relfilenode = 0
-relname = ""
-query = ("select pg_class2.relname from pg_class as pg_class1, 
pg_appendonly, pg_class as pg_class2 where pg_class1.relname ='%s' "
- "and  pg_class1.oid = pg_appendonly.relid and 
pg_appendonly.segrelid = pg_class2.oid;") % tablename
-dburl = dbconn.DbURL(hostname=options.host, port=options.port, 
username=options.user, dbname=databasename)
+relname = ''
+tablename = tablename.split('.')[-1]
+query = ("select pg_class2.relname from pg_class as pg_class1, 
pg_appendonly, pg_class as pg_class2 "
+ "where pg_class1.relname ='%s' and pg_class1.oid = 
pg_appendonly.relid and pg_appendonly.segrelid = pg_class2.oid;") % tablename
 conn = dbconn.connect(dburl, True)
 rows = dbconn.execSQL(conn, query)
-   conn.commit()
-if rows.rowcount == 0:
-logger.error("table '%s' not found in db '%s'" % (tablename, 
databasename));
+conn.commit()
+if not rows.rowcount:
+logger.error('table "%s" not found in db "%s"' % (tablename, 
database))
 sys.exit(1)
 for row in rows:
 relname = 

[GitHub] incubator-hawq pull request #846: Hawq 991. Add support for "HAWQ register" ...

2016-08-17 Thread ictmalili
Github user ictmalili commented on a diff in the pull request:

https://github.com/apache/incubator-hawq/pull/846#discussion_r75243154
  
--- Diff: tools/bin/hawqregister ---
@@ -40,186 +39,195 @@ EXECNAME = os.path.split(__file__)[-1]
 setup_tool_logging(EXECNAME,getLocalHostname(),getUserName())
 
 
-def create_opt_parser(version):
+def option_parser():
 parser = OptParser(option_class=OptChecker,
-   usage='usage: %prog [options] database_name 
table_name file_or_dir_path_in_hdfs',
-   version=version)
+   usage='usage: %prog [options] table_name',
+   version='%prog version $Revision: #1 $')
 parser.remove_option('-h')
 parser.add_option('-?', '--help', action='help')
-parser.add_option('-h', '--host', help="host of the target DB")
-parser.add_option('-p', '--port', help="port of the target DB", 
type='int', default=0)
-parser.add_option('-U', '--user', help="username of the target DB")
-return parser
-
-
-def check_hadoop_command():
-hdfscmd = "hadoop"
-result = local_ssh(hdfscmd);
-if result != 0:
-logger.error("command 'hadoop' is not available, please set 
environment variable $PATH to fix this")
+parser.add_option('-h', '--host', help='host of the target DB')
+parser.add_option('-p', '--port', help='port of the target DB', 
type='int', default=0)
+parser.add_option('-U', '--user', help='username of the target DB')
+parser.add_option('-d', '--database', default = 'postgres', dest = 
'database', help='database name')
+parser.add_option('-f', '--filepath', dest = 'filepath', help='file 
name in HDFS')
+parser.add_option('-c', '--config', dest = 'yml_config', default = '', 
help='configuration file in YAML format')
+return parser.parse_args()
+
+
+def option_parser_yml(yml_file):
+import yaml
+with open(yml_file, 'r') as f:
+params = yaml.load(f)
+if params['FileFormat'] == 'Parquet':
+offset = 
params['Parquet_FileLocations']['Files'][0]['path'].rfind('/')
+filepath = params['DFS_URL'] + 
params['Parquet_FileLocations']['Files'][0]['path'][:offset] if 
len(params['Parquet_FileLocations']['Files']) != 1 else params['DFS_URL'] + 
params['Parquet_FileLocations']['Files'][0]['path']
+return 'Parquet', filepath, params['Parquet_Schema'], 
params['Distribution_Policy']
+offset = params['AO_FileLocations']['Files'][0]['path'].rfind('/')
+filepath = params['DFS_URL'] + 
params['AO_FileLocations']['Files'][0]['path'][:offset] if 
len(params['AO_FileLocations']['Files']) != 1 else params['DFS_URL'] + 
params['AO_FileLocations']['Files'][0]['path']
+return 'AO', filepath, params['AO_Schema'], 
params['Distribution_Policy']
+
+
+def create_table(dburl, tablename, schema_info, fmt, distrbution_policy):
+try:
+schema = ','.join([k['name'] + ' ' + k['type'] for k in 
schema_info])
+fmt = 'ROW' if fmt == 'AO' else fmt
+query = 'create table %s(%s) with (appendonly=true, 
orientation=%s) %s;' % (tablename, schema, fmt, distrbution_policy)
+conn = dbconn.connect(dburl, False)
+rows = dbconn.execSQL(conn, query)
+conn.commit()
+except DatabaseError, ex:
+logger.error('Failed to execute query ""%s"' % query)
 sys.exit(1)
 
 
-def get_seg_name(options, databasename, tablename):
+def get_seg_name(dburl, tablename, database, fmt):
 try:
-relfilenode = 0
-relname = ""
-query = ("select pg_class2.relname from pg_class as pg_class1, 
pg_appendonly, pg_class as pg_class2 where pg_class1.relname ='%s' "
- "and  pg_class1.oid = pg_appendonly.relid and 
pg_appendonly.segrelid = pg_class2.oid;") % tablename
-dburl = dbconn.DbURL(hostname=options.host, port=options.port, 
username=options.user, dbname=databasename)
+relname = ''
+tablename = tablename.split('.')[-1]
+query = ("select pg_class2.relname from pg_class as pg_class1, 
pg_appendonly, pg_class as pg_class2 "
+ "where pg_class1.relname ='%s' and pg_class1.oid = 
pg_appendonly.relid and pg_appendonly.segrelid = pg_class2.oid;") % tablename
 conn = dbconn.connect(dburl, True)
 rows = dbconn.execSQL(conn, query)
-   conn.commit()
-if rows.rowcount == 0:
-logger.error("table '%s' not found in db '%s'" % (tablename, 
databasename));
+conn.commit()
+if not rows.rowcount:
+logger.error('table "%s" not found in db "%s"' % (tablename, 
database))
 sys.exit(1)
 for row in rows:
 relname = 

[GitHub] incubator-hawq pull request #846: Hawq 991. Add support for "HAWQ register" ...

2016-08-17 Thread ictmalili
Github user ictmalili commented on a diff in the pull request:

https://github.com/apache/incubator-hawq/pull/846#discussion_r75242515
  
--- Diff: tools/bin/hawqregister ---
@@ -40,186 +39,195 @@ EXECNAME = os.path.split(__file__)[-1]
 setup_tool_logging(EXECNAME,getLocalHostname(),getUserName())
 
 
-def create_opt_parser(version):
+def option_parser():
 parser = OptParser(option_class=OptChecker,
-   usage='usage: %prog [options] database_name 
table_name file_or_dir_path_in_hdfs',
-   version=version)
+   usage='usage: %prog [options] table_name',
+   version='%prog version $Revision: #1 $')
 parser.remove_option('-h')
 parser.add_option('-?', '--help', action='help')
-parser.add_option('-h', '--host', help="host of the target DB")
-parser.add_option('-p', '--port', help="port of the target DB", 
type='int', default=0)
-parser.add_option('-U', '--user', help="username of the target DB")
-return parser
-
-
-def check_hadoop_command():
-hdfscmd = "hadoop"
-result = local_ssh(hdfscmd);
-if result != 0:
-logger.error("command 'hadoop' is not available, please set 
environment variable $PATH to fix this")
+parser.add_option('-h', '--host', help='host of the target DB')
+parser.add_option('-p', '--port', help='port of the target DB', 
type='int', default=0)
+parser.add_option('-U', '--user', help='username of the target DB')
+parser.add_option('-d', '--database', default = 'postgres', dest = 
'database', help='database name')
+parser.add_option('-f', '--filepath', dest = 'filepath', help='file 
name in HDFS')
+parser.add_option('-c', '--config', dest = 'yml_config', default = '', 
help='configuration file in YAML format')
+return parser.parse_args()
+
+
+def option_parser_yml(yml_file):
+import yaml
+with open(yml_file, 'r') as f:
+params = yaml.load(f)
+if params['FileFormat'] == 'Parquet':
+offset = 
params['Parquet_FileLocations']['Files'][0]['path'].rfind('/')
--- End diff --

What is the yml file has corrupted? For example, 
params['Parquet_FileLocations']['Files'] is null?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] incubator-hawq pull request #846: Hawq 991. Add support for "HAWQ register" ...

2016-08-17 Thread ictmalili
Github user ictmalili commented on a diff in the pull request:

https://github.com/apache/incubator-hawq/pull/846#discussion_r75242545
  
--- Diff: tools/bin/hawqregister ---
@@ -40,186 +39,195 @@ EXECNAME = os.path.split(__file__)[-1]
 setup_tool_logging(EXECNAME,getLocalHostname(),getUserName())
 
 
-def create_opt_parser(version):
+def option_parser():
 parser = OptParser(option_class=OptChecker,
-   usage='usage: %prog [options] database_name 
table_name file_or_dir_path_in_hdfs',
-   version=version)
+   usage='usage: %prog [options] table_name',
+   version='%prog version $Revision: #1 $')
 parser.remove_option('-h')
 parser.add_option('-?', '--help', action='help')
-parser.add_option('-h', '--host', help="host of the target DB")
-parser.add_option('-p', '--port', help="port of the target DB", 
type='int', default=0)
-parser.add_option('-U', '--user', help="username of the target DB")
-return parser
-
-
-def check_hadoop_command():
-hdfscmd = "hadoop"
-result = local_ssh(hdfscmd);
-if result != 0:
-logger.error("command 'hadoop' is not available, please set 
environment variable $PATH to fix this")
+parser.add_option('-h', '--host', help='host of the target DB')
+parser.add_option('-p', '--port', help='port of the target DB', 
type='int', default=0)
+parser.add_option('-U', '--user', help='username of the target DB')
+parser.add_option('-d', '--database', default = 'postgres', dest = 
'database', help='database name')
+parser.add_option('-f', '--filepath', dest = 'filepath', help='file 
name in HDFS')
+parser.add_option('-c', '--config', dest = 'yml_config', default = '', 
help='configuration file in YAML format')
+return parser.parse_args()
+
+
+def option_parser_yml(yml_file):
+import yaml
+with open(yml_file, 'r') as f:
+params = yaml.load(f)
+if params['FileFormat'] == 'Parquet':
+offset = 
params['Parquet_FileLocations']['Files'][0]['path'].rfind('/')
+filepath = params['DFS_URL'] + 
params['Parquet_FileLocations']['Files'][0]['path'][:offset] if 
len(params['Parquet_FileLocations']['Files']) != 1 else params['DFS_URL'] + 
params['Parquet_FileLocations']['Files'][0]['path']
--- End diff --

Could we change the line to multiple lines?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] incubator-hawq pull request #846: Hawq 991. Add support for "HAWQ register" ...

2016-08-17 Thread xunzhang
Github user xunzhang commented on a diff in the pull request:

https://github.com/apache/incubator-hawq/pull/846#discussion_r75242500
  
--- Diff: src/test/feature/ManagementTool/test_hawq_register.cpp ---
@@ -199,9 +199,121 @@ TEST_F(TestHawqRegister, TestNotHDFSPath) {
util.execute("create table hawqregister(i int);");
util.query("select * from hawqregister;", 0);
 
-   EXPECT_EQ(1, Command::getCommandStatus("hawq register " + (string) 
HAWQ_DB + "hawqregister /hawq_register_hawq.paq"));
+   EXPECT_EQ(1, Command::getCommandStatus("hawq register -d " + (string) 
HAWQ_DB + " -f /hawq_register_hawq.paq hawqregister"));
util.query("select * from hawqregister;", 0);
 
EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -rm 
hdfs://localhost:8020/hawq_register_hawq.paq"));
util.execute("drop table hawqregister;");
 }
+
+TEST_F(TestHawqRegister, TestUsage1ParquetRandomly) {
+  SQLUtility util;
+  string rootPath(util.getTestRootPath());
+  string relativePath("/ManagementTool/test_hawq_register_hawq.paq");
+  string filePath = rootPath + relativePath;
+  EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -put -f " + filePath + 
" hdfs://localhost:8020/hawq_register_hawq.paq"));
+  util.execute("drop table if exists nt;");
+  util.execute("create table nt(i int) with (appendonly=true, 
orientation=parquet);");
+  EXPECT_EQ(0, Command::getCommandStatus("hawq register -d " + (string) 
HAWQ_DB + " -f hdfs://localhost:8020/hawq_register_hawq.paq nt"));
+   util.query("select * from nt;", 3);
+   util.execute("insert into nt values(1);");
+   util.query("select * from nt;", 4);
+  util.execute("drop table nt;");
+}
+
+TEST_F(TestHawqRegister, TestUsage1ParquetRandomly2) {
+  SQLUtility util;
--- End diff --

@ictmalili yes, I made a mistake here, I will update the commit soon.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---


[GitHub] incubator-hawq pull request #846: Hawq 991. Add support for "HAWQ register" ...

2016-08-17 Thread ictmalili
Github user ictmalili commented on a diff in the pull request:

https://github.com/apache/incubator-hawq/pull/846#discussion_r75241823
  
--- Diff: src/test/feature/ManagementTool/test_hawq_register.cpp ---
@@ -199,9 +199,121 @@ TEST_F(TestHawqRegister, TestNotHDFSPath) {
util.execute("create table hawqregister(i int);");
util.query("select * from hawqregister;", 0);
 
-   EXPECT_EQ(1, Command::getCommandStatus("hawq register " + (string) 
HAWQ_DB + "hawqregister /hawq_register_hawq.paq"));
+   EXPECT_EQ(1, Command::getCommandStatus("hawq register -d " + (string) 
HAWQ_DB + " -f /hawq_register_hawq.paq hawqregister"));
util.query("select * from hawqregister;", 0);
 
EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -rm 
hdfs://localhost:8020/hawq_register_hawq.paq"));
util.execute("drop table hawqregister;");
 }
+
+TEST_F(TestHawqRegister, TestUsage1ParquetRandomly) {
+  SQLUtility util;
+  string rootPath(util.getTestRootPath());
+  string relativePath("/ManagementTool/test_hawq_register_hawq.paq");
+  string filePath = rootPath + relativePath;
+  EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -put -f " + filePath + 
" hdfs://localhost:8020/hawq_register_hawq.paq"));
+  util.execute("drop table if exists nt;");
+  util.execute("create table nt(i int) with (appendonly=true, 
orientation=parquet);");
+  EXPECT_EQ(0, Command::getCommandStatus("hawq register -d " + (string) 
HAWQ_DB + " -f hdfs://localhost:8020/hawq_register_hawq.paq nt"));
+   util.query("select * from nt;", 3);
+   util.execute("insert into nt values(1);");
+   util.query("select * from nt;", 4);
+  util.execute("drop table nt;");
+}
+
+TEST_F(TestHawqRegister, TestUsage1ParquetRandomly2) {
+  SQLUtility util;
--- End diff --

@xunzhang What's the difference between these two test cases: 
TestUsage1ParquetRandomly and TestUsage1ParquetRandomly2?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---