[ https://issues.apache.org/jira/browse/HAWQ-778?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15316145#comment-15316145 ]
ASF GitHub Bot commented on HAWQ-778: ------------------------------------- Github user ictmalili commented on a diff in the pull request: https://github.com/apache/incubator-hawq/pull/690#discussion_r65832341 --- Diff: tools/bin/hawqregister --- @@ -103,6 +113,61 @@ def check_hash_type(options, databasename, tablename): sys.exit(1) +def get_metadata_from_database(options, databasename, tablename, seg_name): + try: + query = "select segno from pg_aoseg.%s;" % seg_name + dburl = dbconn.DbURL(hostname=options.host, port=options.port, username=options.user, dbname=databasename) + conn = dbconn.connect(dburl, False) + rows = dbconn.execSQL(conn, query) + conn.commit() + conn.close() + + except DatabaseError, ex: + logger.error("Failed to connect to database, this script can only be run when the database is up") + logger.error("host = %s, port = %d, user = %s, dbname = %s, query = %s" % (options.host, options.port, options.user, databasename, query)) + sys.exit(1) + + firstsegno = rows.rowcount + 1 + + # get the full path of correspoding file for target table + try: + query = ("select location, gp_persistent_tablespace_node.tablespace_oid, database_oid, relfilenode from pg_class, gp_persistent_relation_node, " + "gp_persistent_tablespace_node, gp_persistent_filespace_node where relname = '%s' and pg_class.relfilenode = " + "gp_persistent_relation_node.relfilenode_oid and gp_persistent_relation_node.tablespace_oid = gp_persistent_tablespace_node.tablespace_oid " + "and gp_persistent_filespace_node.filespace_oid = gp_persistent_filespace_node.filespace_oid;") % tablename + dburl = dbconn.DbURL(hostname=options.host, port=options.port, username=options.user, dbname=databasename) + conn = dbconn.connect(dburl, False) + rows = dbconn.execSQL(conn, query) + conn.commit() + conn.close() + + except DatabaseError, ex: + logger.error("Failed to connect to database, this script can only be run when the database is up") + logger.error("host = %s, port = %d, user = %s, dbname = %s, query = %s" % (options.host, options.port, options.user, databasename, query)) + sys.exit(1) + + for row in rows: + tabledir = row[0].strip() + "/" + str(row[1]) + "/" + str(row[2]) + "/" + str(row[3]) + "/" + + return firstsegno, tabledir + + +def check_files_and_table_in_same_hdfs_cluster(filepath, tabledir): + # check whether the files to be registered is in hdfs + filesystem = filepath.split('://') + if filesystem[0] != 'hdfs': + logger.error("Only support to register file(s) in hdfs") + sys.exit(1) + fileroot = filepath.split('/') + tableroot = tabledir.split('/') + print fileroot + print tableroot + # check the root url of them. eg: for 'hdfs://localhost:8020/temp/tempfile', we check 'hdfs://localohst:8020' + if fileroot[0] != tableroot[0] or fileroot[1] != tableroot[1] or fileroot[2] != tableroot[2]: + logger.error("Files to be registered and the table are not in the same hdfs cluster.") --- End diff -- It's better to print out the two hdfs clusters. > Refine hawq register with some sanity checks > -------------------------------------------- > > Key: HAWQ-778 > URL: https://issues.apache.org/jira/browse/HAWQ-778 > Project: Apache HAWQ > Issue Type: Sub-task > Components: Command Line Tools > Reporter: Yangcheng Luo > Assignee: Lei Chang > Priority: Minor > > Add check to the existence of command 'hadoop'. If not exist, remind user to > export the path of command 'hadoop' into environment variable $PATH. > Add check to the path entered by user. It must start with 'hdfs://', and the > file(s) to be registered and the table in HAWQ must be in the same HDFS > cluster. > Modify the output of hawq register to give more information. > Modify the tests to fit the changes above. -- This message was sent by Atlassian JIRA (v6.3.4#6332)