Repository: incubator-hawq
Updated Branches:
  refs/heads/master a12012581 -> 9c97bccfd


HAWQ-991. Fix bug of yaml configuration file contains only files under table 
directory in --force mode.


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/9c97bccf
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/9c97bccf
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/9c97bccf

Branch: refs/heads/master
Commit: 9c97bccfdd9a58ef16f13ecdb028b773c7699484
Parents: a37b3de
Author: hzhang2 <zhanghuan...@163.com>
Authored: Mon Sep 26 19:12:52 2016 +0800
Committer: hzhang2 <zhanghuan...@163.com>
Committed: Tue Sep 27 07:11:54 2016 +0800

----------------------------------------------------------------------
 tools/bin/hawqregister | 44 ++++++++++++++++++++++++++++++--------------
 1 file changed, 30 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/9c97bccf/tools/bin/hawqregister
----------------------------------------------------------------------
diff --git a/tools/bin/hawqregister b/tools/bin/hawqregister
index c7c207d..6c01b77 100755
--- a/tools/bin/hawqregister
+++ b/tools/bin/hawqregister
@@ -564,7 +564,12 @@ class HawqRegister(object):
                 self.failure_handler.rollback()
                 sys.exit(1)
 
+        if not self.yml:
+            check_no_regex_filepath([self.filepath])
+            self.files, self.sizes = self._get_files_in_hdfs(self.filepath)
+       
         self.do_not_move, self.files_update, self.sizes_update = False, [], []
+        self.newfiles, self.newsizes = [f for f in self.files], [sz for sz in 
self.sizes]
         if self.mode == 'force':
             existed_files, _ = self._get_files_in_hdfs(self.tabledir)
             if len(self.files) == len(existed_files):
@@ -574,24 +579,21 @@ class HawqRegister(object):
                     sys.exit(1)
                 else:
                     self.do_not_move, self.files_update, self.sizes_update = 
True, self.files, self.sizes
-                    self.files, self.sizes = [], []
             elif len(self.files) < len(existed_files):
                 logger.error('In force mode, you should include existing table 
files in yaml configuration file. Otherwise you should drop the previous table 
before register --force.')
                 self.failure_handler.rollback()
                 sys.exit(1)
             else:
-                files_old, sizes_old = [f for f in self.files], [sz for sz in 
self.sizes]
-                for k, f in enumerate(files_old):
+                for k, f in enumerate(self.files):
                     if f in existed_files:
-                        self.files_update.append(files_old[k])
-                        self.sizes_update.append(sizes_old[k])
-                        self.files.remove(files_old[k])
-                        self.sizes.remove(sizes_old[k])
+                        self.files_update.append(self.files[k])
+                        self.sizes_update.append(self.sizes[k])
+                        self.newfiles.remove(self.files[k])
+                        self.newsizes.remove(self.sizes[k])
                 if sorted(self.files_update) != sorted(existed_files):
                     logger.error('In force mode, you should include existing 
table files in yaml configuration file. Otherwise you should drop the previous 
table before register --force.')
                     self.failure_handler.rollback()
                     sys.exit(1)
-
         elif self.mode == 'repair':
             self.do_not_move = True
             self.files_update, self.sizes_update = [fn for fn in self.files], 
[sz for sz in self.sizes]
@@ -603,9 +605,6 @@ class HawqRegister(object):
 
         self._check_files_and_table_in_same_hdfs_cluster(self.filepath, 
self.tabledir)
 
-        if not self.yml:
-            check_no_regex_filepath([self.filepath])
-            self.files, self.sizes = self._get_files_in_hdfs(self.filepath)
         print 'New file(s) to be registered: ', self.files
         if self.files_update:
             print 'Catalog info need to be updated for these files: ', 
self.files_update
@@ -692,7 +691,7 @@ class HawqRegister(object):
     def _move_files_in_hdfs(self):
         '''Move file(s) in src path into the folder correspoding to the target 
table'''
         segno = self.firstsegno
-        for f in self.files:
+        for f in self.newfiles:
             srcfile = f
             dstfile = self.tabledir + str(segno)
             segno += 1
@@ -729,6 +728,23 @@ class HawqRegister(object):
                 for k, eof in enumerate(eofs[1:]):
                     query += ',(%d, %d, %d, %d, %d)' % (self.firstsegno + k + 
1, eof, -1, -1, -1)
             query += ';'
+        elif mode == 'force':
+            eofs = self.sizes
+            query = "set allow_system_table_mods='dml';"
+            query += "begin transaction;"
+            segno_lst = [f.split('/')[-1] for f in self.files]
+            query += "delete from pg_aoseg.%s;" % (self.seg_name)
+            self.firstsegno = 1
+            if self.file_format == 'Parquet':
+                query += 'insert into pg_aoseg.%s values(%d, %d, %d, %d)' % 
(self.seg_name, self.firstsegno, eofs[0], -1, -1)
+                for k, eof in enumerate(eofs[1:]):
+                    query += ',(%d, %d, %d, %d)' % (self.firstsegno + k + 1, 
eof, -1, -1)
+            else:
+                query += 'insert into pg_aoseg.%s values(%d, %d, %d, %d, %d)' 
% (self.seg_name, self.firstsegno, eofs[0], -1, -1, -1)
+                for k, eof in enumerate(eofs[1:]):
+                    query += ',(%d, %d, %d, %d, %d)' % (self.firstsegno + k + 
1, eof, -1, -1, -1)
+            query += ';'
+            query += "end transaction;"
         elif mode == 'update':
             eofs = self.sizes_update
             query = "set allow_system_table_mods='dml';"
@@ -792,10 +808,10 @@ class HawqRegister(object):
         if not self.do_not_move:
             self._move_files_in_hdfs()
         if (not self.do_not_move) and self.mode == 'force':
-            self._modify_metadata('update_and_insert')
+            self._modify_metadata('force')
         else:
             if self.mode == 'force':
-                self._modify_metadata('update')
+                self._modify_metadata('force')
             elif self.mode == 'repair':
                 self._modify_metadata('update')
                 if self.files_delete:

Reply via email to