kumaab commented on code in PR #307: URL: https://github.com/apache/ranger/pull/307#discussion_r1548582891
########## ranger-tools/src/main/python/stress/stress-hbase-loadgenerator.py: ########## @@ -0,0 +1,106 @@ +import subprocess +import time +import argparse +import os +from datetime import datetime + +def increase_memory_for_loadgenerator(): + try: + cmd = "export HBASE_OPTS='-Xmx10g'" + print(cmd) + op = subprocess.call(cmd, shell=True) + print("Output:", op) + except subprocess.CalledProcessError as e: + print("Error in setting HBASE_HEAPSIZE:", e) + exit(1) +def login(): + try: + cmd = "kinit -kt <systest.keytab> systest" + print(cmd) + login_op = subprocess.call(cmd, shell=True) + print("Login output:", login_op) + except subprocess.CalledProcessError as e: + print("Error in login:", e) + exit(1) + +def create_ltt_command_multiput(num_cols_per_cf=1000, num_threads=10, num_keys=100, table_name = "multitest",avg_data_size=2, num_col_families=3, col_family_pattern="cf%d", num_regions_per_server=1): + def get_column_families(): + col_families = [] + for i in range(num_col_families): + col_families.append(col_family_pattern % i) + return ','.join(col_families) + #Sample: hbase ltt -tn multitest -families f1,f2,f3 -write 20000:2:20 -multiput -num_keys 1000 -num_regions_per_server 1 + cmd = f"hbase ltt -tn {table_name} -families {get_column_families()} -write {num_cols_per_cf}:{avg_data_size}:{num_threads}" \ + f" -multiput -num_keys {num_keys} -num_regions_per_server {num_regions_per_server}" + return cmd + + +def create_pe_command_multiget(multiget_batchsize=500, num_threads=10, num_keys=100, table_name="multitest", num_col_families=3): + #Sample: hbase pe --table=multitest --families=3 --columns=10000 --multiGet=10 --rows=1000 --nomapred randomRead 5 + + cmd = f"hbase pe --table={table_name} --families={num_col_families} --columns={num_cols_per_cf} " \ + f"--multiGet={multiget_batchsize} --rows={num_keys} --nomapred randomRead {num_threads}" + return cmd + + + +def generate_hbase_load(op_type, multiget_batchsize, num_cf, num_rows_list, num_cols_per_cf, num_threads_list, metadata, csv_outfile="/root/ltt_output.csv", ): + #if output file does not exist only then write the header + if(not os.path.exists(csv_outfile)): + with open(csv_outfile, "w") as f: + f.write("op,num_cf,num_keys,num_cols_per_cf,num_threads,time_taken,command,metadata,date_start,time_start,date_end,time_end\n") + assert type(num_threads_list) == list + assert type(num_rows_list) == list + for num_keys in num_rows_list: + for num_threads in num_threads_list: + if op_type == "multiput": + cmd = create_ltt_command_multiput(num_cols_per_cf=num_cols_per_cf, + num_threads=num_threads, + num_keys=num_keys, + num_col_families=num_cf) + elif op_type == "multiget": + cmd = create_pe_command_multiget(multiget_batchsize=multiget_batchsize, + num_threads=num_threads, + num_keys=num_keys, + num_col_families=num_cf) + else: + print("Invalid op_type") + exit(1) + + datetime_start = datetime.now() + date_start_str = datetime_start.date() + time_start_str = str(datetime_start.time()).split(".")[0] + time_start = time.time() + ltt_out = subprocess.call(cmd, shell=True) Review Comment: subprocess.run() is a recommended approach, please see: https://docs.python.org/3/library/subprocess.html#using-the-subprocess-module -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@ranger.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org