Repository: incubator-hawq Updated Branches: refs/heads/master a12012581 -> 9c97bccfd
HAWQ-991. Fix bug of yaml configuration file contains only files under table directory in --force mode. Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/9c97bccf Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/9c97bccf Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/9c97bccf Branch: refs/heads/master Commit: 9c97bccfdd9a58ef16f13ecdb028b773c7699484 Parents: a37b3de Author: hzhang2 <zhanghuan...@163.com> Authored: Mon Sep 26 19:12:52 2016 +0800 Committer: hzhang2 <zhanghuan...@163.com> Committed: Tue Sep 27 07:11:54 2016 +0800 ---------------------------------------------------------------------- tools/bin/hawqregister | 44 ++++++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/9c97bccf/tools/bin/hawqregister ---------------------------------------------------------------------- diff --git a/tools/bin/hawqregister b/tools/bin/hawqregister index c7c207d..6c01b77 100755 --- a/tools/bin/hawqregister +++ b/tools/bin/hawqregister @@ -564,7 +564,12 @@ class HawqRegister(object): self.failure_handler.rollback() sys.exit(1) + if not self.yml: + check_no_regex_filepath([self.filepath]) + self.files, self.sizes = self._get_files_in_hdfs(self.filepath) + self.do_not_move, self.files_update, self.sizes_update = False, [], [] + self.newfiles, self.newsizes = [f for f in self.files], [sz for sz in self.sizes] if self.mode == 'force': existed_files, _ = self._get_files_in_hdfs(self.tabledir) if len(self.files) == len(existed_files): @@ -574,24 +579,21 @@ class HawqRegister(object): sys.exit(1) else: self.do_not_move, self.files_update, self.sizes_update = True, self.files, self.sizes - self.files, self.sizes = [], [] elif len(self.files) < len(existed_files): logger.error('In force mode, you should include existing table files in yaml configuration file. Otherwise you should drop the previous table before register --force.') self.failure_handler.rollback() sys.exit(1) else: - files_old, sizes_old = [f for f in self.files], [sz for sz in self.sizes] - for k, f in enumerate(files_old): + for k, f in enumerate(self.files): if f in existed_files: - self.files_update.append(files_old[k]) - self.sizes_update.append(sizes_old[k]) - self.files.remove(files_old[k]) - self.sizes.remove(sizes_old[k]) + self.files_update.append(self.files[k]) + self.sizes_update.append(self.sizes[k]) + self.newfiles.remove(self.files[k]) + self.newsizes.remove(self.sizes[k]) if sorted(self.files_update) != sorted(existed_files): logger.error('In force mode, you should include existing table files in yaml configuration file. Otherwise you should drop the previous table before register --force.') self.failure_handler.rollback() sys.exit(1) - elif self.mode == 'repair': self.do_not_move = True self.files_update, self.sizes_update = [fn for fn in self.files], [sz for sz in self.sizes] @@ -603,9 +605,6 @@ class HawqRegister(object): self._check_files_and_table_in_same_hdfs_cluster(self.filepath, self.tabledir) - if not self.yml: - check_no_regex_filepath([self.filepath]) - self.files, self.sizes = self._get_files_in_hdfs(self.filepath) print 'New file(s) to be registered: ', self.files if self.files_update: print 'Catalog info need to be updated for these files: ', self.files_update @@ -692,7 +691,7 @@ class HawqRegister(object): def _move_files_in_hdfs(self): '''Move file(s) in src path into the folder correspoding to the target table''' segno = self.firstsegno - for f in self.files: + for f in self.newfiles: srcfile = f dstfile = self.tabledir + str(segno) segno += 1 @@ -729,6 +728,23 @@ class HawqRegister(object): for k, eof in enumerate(eofs[1:]): query += ',(%d, %d, %d, %d, %d)' % (self.firstsegno + k + 1, eof, -1, -1, -1) query += ';' + elif mode == 'force': + eofs = self.sizes + query = "set allow_system_table_mods='dml';" + query += "begin transaction;" + segno_lst = [f.split('/')[-1] for f in self.files] + query += "delete from pg_aoseg.%s;" % (self.seg_name) + self.firstsegno = 1 + if self.file_format == 'Parquet': + query += 'insert into pg_aoseg.%s values(%d, %d, %d, %d)' % (self.seg_name, self.firstsegno, eofs[0], -1, -1) + for k, eof in enumerate(eofs[1:]): + query += ',(%d, %d, %d, %d)' % (self.firstsegno + k + 1, eof, -1, -1) + else: + query += 'insert into pg_aoseg.%s values(%d, %d, %d, %d, %d)' % (self.seg_name, self.firstsegno, eofs[0], -1, -1, -1) + for k, eof in enumerate(eofs[1:]): + query += ',(%d, %d, %d, %d, %d)' % (self.firstsegno + k + 1, eof, -1, -1, -1) + query += ';' + query += "end transaction;" elif mode == 'update': eofs = self.sizes_update query = "set allow_system_table_mods='dml';" @@ -792,10 +808,10 @@ class HawqRegister(object): if not self.do_not_move: self._move_files_in_hdfs() if (not self.do_not_move) and self.mode == 'force': - self._modify_metadata('update_and_insert') + self._modify_metadata('force') else: if self.mode == 'force': - self._modify_metadata('update') + self._modify_metadata('force') elif self.mode == 'repair': self._modify_metadata('update') if self.files_delete: