Repository: systemml Updated Branches: refs/heads/master 4384ebbda -> 114200724
[MINOR] fixes for HDFS path Closes #624 Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/11420072 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/11420072 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/11420072 Branch: refs/heads/master Commit: 11420072412c0c873b72267d1e9764c87abc57b4 Parents: 4384ebb Author: krishnakalyan3 <krishnakaly...@gmail.com> Authored: Thu Aug 17 11:43:49 2017 -0700 Committer: Nakul Jindal <naku...@gmail.com> Committed: Thu Aug 17 11:43:49 2017 -0700 ---------------------------------------------------------------------- bin/utils.py | 9 +++---- scripts/perftest/python/run_perftest.py | 17 +++++++------- scripts/perftest/python/utils_exec.py | 19 ++++++++++++++- scripts/perftest/python/utils_fs.py | 11 +++++++++ scripts/perftest/python/utils_misc.py | 35 ++++++++++++++++++++++++++++ 5 files changed, 78 insertions(+), 13 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/11420072/bin/utils.py ---------------------------------------------------------------------- diff --git a/bin/utils.py b/bin/utils.py index 6f40881..cf17960 100644 --- a/bin/utils.py +++ b/bin/utils.py @@ -74,15 +74,16 @@ def find_dml_file(systemml_home, script_file): Location of the dml script """ scripts_dir = join(systemml_home, 'scripts') - if not (exists(script_file)): - script_file = find_file(script_file, scripts_dir) - if script_file is None: + if not exists(script_file): + script_file_path = find_file(script_file, scripts_dir) + if script_file_path is not None: + return script_file_path + else: print('Could not find DML script: ' + script_file) sys.exit() return script_file - def log4j_path(systemml_home): """ Create log4j.properties from the template if not exist http://git-wip-us.apache.org/repos/asf/systemml/blob/11420072/scripts/perftest/python/run_perftest.py ---------------------------------------------------------------------- diff --git a/scripts/perftest/python/run_perftest.py b/scripts/perftest/python/run_perftest.py index 8c3d1fa..20f5380 100755 --- a/scripts/perftest/python/run_perftest.py +++ b/scripts/perftest/python/run_perftest.py @@ -32,7 +32,8 @@ from datagen import config_packets_datagen from train import config_packets_train from predict import config_packets_predict from utils_misc import get_families, config_reader, \ - exec_dml_and_parse_time, exec_test_data, check_predict, get_folder_metrics, split_config_args + exec_dml_and_parse_time, exec_test_data, check_predict, get_folder_metrics, split_config_args, \ + get_default_dir from utils_fs import create_dir_local, write_success, check_SUCCESS_file_exists # A packet is a dictionary @@ -275,7 +276,7 @@ if __name__ == '__main__': default_mat_shape = ['10k_100'] # Default temp directory, contains everything generated in perftest - default_temp_dir = join(systemml_home, 'scripts', 'perftest', 'temp') + default_config_dir = join(systemml_home, 'scripts', 'perftest', 'temp') # Initialize time start_time = time.time() @@ -308,7 +309,7 @@ if __name__ == '__main__': cparser.add_argument('--mat-shape', default=default_mat_shape, help='space separated list of shapes of matrices ' 'to generate (e.g 10k_1k, 20M_4k)', metavar='', nargs='+') - cparser.add_argument('--config-dir', default=default_temp_dir, help='temporary directory ' + cparser.add_argument('--config-dir', default=default_config_dir, help='temporary directory ' 'where generated, training and prediction data is put', metavar='') cparser.add_argument('--filename', default='perf_test', help='name of the output file for the perf' ' metrics', metavar='') @@ -316,8 +317,7 @@ if __name__ == '__main__': help='space separated list of types of workloads to run (available: data-gen, train, predict)', metavar='', choices=workload, nargs='+') # Change this to temp-dir - cparser.add_argument('--temp-dir', default=default_temp_dir, - help='define the file system to work on', metavar='') + cparser.add_argument('--temp-dir', help='define the file system to work on', metavar='') # Configuration Options cparser.add_argument('-stats', help='Monitor and report caching/recompilation statistics, ' @@ -350,8 +350,8 @@ if __name__ == '__main__': # Global variables perftest_args_dict, systemml_args_dict, backend_args_dict = split_config_args(all_arg_dict) - # Debug arguments - # print(arg_dict) + # temp_dir hdfs / local path check + perftest_args_dict['temp_dir'] = get_default_dir(args.temp_dir, args.exec_type, default_config_dir) # default_mat_type validity if len(args.mat_type) > 2: @@ -401,4 +401,5 @@ if __name__ == '__main__': perf_test_entry(**perftest_args_dict) total_time = (time.time() - start_time) - logging.info('Performance tests complete {0:.3f} secs \n'.format(total_time)) + logging.info('total_time,none,none,none,none,{}'.format(total_time)) + logging.info('Performance tests complete') http://git-wip-us.apache.org/repos/asf/systemml/blob/11420072/scripts/perftest/python/utils_exec.py ---------------------------------------------------------------------- diff --git a/scripts/perftest/python/utils_exec.py b/scripts/perftest/python/utils_exec.py index cf98d0f..92a267f 100755 --- a/scripts/perftest/python/utils_exec.py +++ b/scripts/perftest/python/utils_exec.py @@ -20,6 +20,7 @@ # #------------------------------------------------------------- +import sys import subprocess import shlex import re @@ -45,7 +46,7 @@ def subprocess_exec(cmd_string, log_file_path=None, extract=None): Based on extract we return the relevant string """ # Debug - # print(cmd_string) + #print(cmd_string) exec_command = shlex.split(cmd_string) proc1 = subprocess.Popen(exec_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) @@ -63,6 +64,8 @@ def subprocess_exec(cmd_string, log_file_path=None, extract=None): return_data = parse_time(std_outs) if extract == 'dir': return_data = parse_hdfs_paths(std_outs) + if extract == 'hdfs_base': + return_data = parse_hdfs_base(std_outs) if extract is None: return_data = 0 @@ -73,6 +76,20 @@ def subprocess_exec(cmd_string, log_file_path=None, extract=None): return return_data +def parse_hdfs_base(std_outs): + """ + return: String + hdfs base uri + """ + hdfs_uri = None + for line in std_outs: + if line.startswith('hdfs://'): + hdfs_uri = line + if hdfs_uri is None: + sys.exit('HDFS URI not found') + return hdfs_uri + + def write_logs(std_outs, log_file_path): """ Write all logs to the specified location http://git-wip-us.apache.org/repos/asf/systemml/blob/11420072/scripts/perftest/python/utils_fs.py ---------------------------------------------------------------------- diff --git a/scripts/perftest/python/utils_fs.py b/scripts/perftest/python/utils_fs.py index 7e04907..b3cc659 100755 --- a/scripts/perftest/python/utils_fs.py +++ b/scripts/perftest/python/utils_fs.py @@ -21,6 +21,7 @@ #------------------------------------------------------------- import os +import sys from os.path import join import glob from functools import reduce @@ -101,6 +102,16 @@ def contains_dir(hdfs_dirs, sub_folder): return False +def check_hdfs_path(path): + """ + Check if a path is present in HDFS + """ + cmd = ['hdfs', 'dfs', '-test', '-e', path] + return_code = subprocess_exec(' '.join(cmd)) + if return_code != 0: + return sys.exit('Please create {}'.format(path)) + + def relevant_folders(path, algo, family, matrix_type, matrix_shape, mode): """ Finds the right folder to read the data based on given parameters http://git-wip-us.apache.org/repos/asf/systemml/blob/11420072/scripts/perftest/python/utils_misc.py ---------------------------------------------------------------------- diff --git a/scripts/perftest/python/utils_misc.py b/scripts/perftest/python/utils_misc.py index a3c98c2..704f22b 100755 --- a/scripts/perftest/python/utils_misc.py +++ b/scripts/perftest/python/utils_misc.py @@ -25,7 +25,9 @@ import os import json import re import sys +import getpass from utils_exec import subprocess_exec +from utils_fs import check_hdfs_path # This file contains all misc utility functions required by performance test module @@ -361,3 +363,36 @@ def mat_type_check(current_family, matrix_types, dense_algos): current_type.append(current_matrix_type) return current_type + + +def get_default_dir(temp_dir, exec_mode, config_dir): + """ + temp_dir: String + exec_mode: String + config_dir: String + + return: String + Local or HDFS home directory + """ + + if exec_mode == 'singlenode': + if temp_dir is None: + return config_dir + if temp_dir is not None: + return temp_dir + + if exec_mode == 'hybrid_spark': + cmd = ['hdfs', 'getconf', '-confKey', 'fs.default.name'] + hdfs_base = subprocess_exec(' '.join(cmd), extract='hdfs_base') + + if temp_dir is None: + hdfs_home = join(hdfs_base, 'user', getpass.getuser()) + check_hdfs_path(hdfs_home) + return hdfs_home + + if temp_dir is not None: + if temp_dir.startswith('hdfs'): + return temp_dir + else: + hdfs_home = join(hdfs_base, 'user', getpass.getuser(), temp_dir) + return hdfs_home