AMBARI-22056. Solr Data Manager script should use gzip compression type (mgergely)
Change-Id: Ib61f1a03a885a2c81c11b32e5952c3c328a4064f Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/a8736260 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/a8736260 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/a8736260 Branch: refs/heads/branch-3.0-ams Commit: a8736260897c937bab1318ea4991d1edd1aae1eb Parents: 7af3152 Author: Miklos Gergely <mgerg...@hortonworks.com> Authored: Tue Sep 26 16:41:43 2017 +0200 Committer: Miklos Gergely <mgerg...@hortonworks.com> Committed: Tue Sep 26 16:41:59 2017 +0200 ---------------------------------------------------------------------- .../src/main/python/solrDataManager.py | 35 +++++++++++++------- 1 file changed, 23 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/a8736260/ambari-infra/ambari-infra-solr-client/src/main/python/solrDataManager.py ---------------------------------------------------------------------- diff --git a/ambari-infra/ambari-infra-solr-client/src/main/python/solrDataManager.py b/ambari-infra/ambari-infra-solr-client/src/main/python/solrDataManager.py index 18a4da7..e0356bb 100644 --- a/ambari-infra/ambari-infra-solr-client/src/main/python/solrDataManager.py +++ b/ambari-infra/ambari-infra-solr-client/src/main/python/solrDataManager.py @@ -32,6 +32,8 @@ from subprocess import call, Popen, PIPE from urllib import quote, unquote from zipfile import ZipFile, ZIP_DEFLATED import tarfile +import gzip +import shutil VERSION = "1.0" @@ -69,7 +71,7 @@ def parse_arguments(): parser.add_option("-g", "--ignore-unfinished-uploading", dest="ignore_unfinished_uploading", action="store_true", default=False) parser.add_option("--json-file", dest="json_file", help="create a json file instead of line delimited json", action="store_true", default=False) - parser.add_option("-z", "--compression", dest="compression", help="none | tar.gz | tar.bz2 | zip", default="tar.gz") + parser.add_option("-z", "--compression", dest="compression", help="none | tar.gz | tar.bz2 | zip | gz", default="gz") parser.add_option("-k", "--solr-keytab", dest="solr_keytab", type="string", help="the keytab for a kerberized solr") parser.add_option("-n", "--solr-principal", dest="solr_principal", type="string", help="the principal for a kerberized solr") @@ -122,7 +124,7 @@ def parse_arguments(): parser.print_help() sys.exit() - compression_values = ["none", "tar.gz", "tar.bz2", "zip"] + compression_values = ["none", "tar.gz", "tar.bz2", "zip", "gz"] if options.compression not in compression_values: print "compression must be one of {0}".format(" | ".join(compression_values)) parser.print_help() @@ -469,35 +471,44 @@ def upload_block(solr_kinit_command, hdfs_kinit_command, curl_prefix, solr_url, os.remove("{0}/command.json".format(working_dir)) def compress_file(working_dir, tmp_file_path, file_name, compression): + data_file_name = "{0}.json".format(file_name) if compression == "none": upload_file_path = "{0}/{1}.json".format(working_dir, file_name) os.rename(tmp_file_path, upload_file_path) elif compression == "tar.gz": - upload_file_path = "{0}/{1}.tar.gz".format(working_dir, file_name) - zipped_file_name = "{0}.json".format(file_name) + upload_file_path = "{0}/{1}.json.tar.gz".format(working_dir, file_name) tar = tarfile.open(upload_file_path, mode="w:gz") try: - tar.add(tmp_file_path, arcname=zipped_file_name) + tar.add(tmp_file_path, arcname=data_file_name) finally: tar.close() elif compression == "tar.bz2": - upload_file_path = "{0}/{1}.tar.bz2".format(working_dir, file_name) - zipped_file_name = "{0}.json".format(file_name) + upload_file_path = "{0}/{1}.json.tar.bz2".format(working_dir, file_name) tar = tarfile.open(upload_file_path, mode="w:bz2") try: - tar.add(tmp_file_path, arcname=zipped_file_name) + tar.add(tmp_file_path, arcname=data_file_name) finally: tar.close() elif compression == "zip": - upload_file_path = "{0}/{1}.zip".format(working_dir, file_name) - zipped_file_name = "{0}.json".format(file_name) + upload_file_path = "{0}/{1}.json.zip".format(working_dir, file_name) zip = ZipFile(upload_file_path, 'w') - zip.write(tmp_file_path, zipped_file_name, ZIP_DEFLATED) - logger.info("Created file %s", zipped_file_name) + zip.write(tmp_file_path, data_file_name, ZIP_DEFLATED) + elif compression == "gz": + upload_file_path = "{0}/{1}.json.gz".format(working_dir, file_name) + gz = gzip.open(upload_file_path, mode="wb") + f = open(tmp_file_path) + try: + shutil.copyfileobj(f, gz) + finally: + gz.close() + f.close() else: logger.warn("Unknown compression type") sys.exit() + logger.info("Created data file %s", data_file_name) + + return upload_file_path def create_command_file(upload, working_dir, upload_file_path, solr_url, collection, filter_field, id_field, prev_lot_end_value,