A few clean up items when cleaning up and standardizing the benchmark config file to look similar the server config file.
Project: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/commit/4d6fde6b Tree: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/tree/4d6fde6b Diff: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/diff/4d6fde6b Branch: refs/heads/prestonc/hash_join Commit: 4d6fde6b1becf307bb1c2ff38084fe60c1ea8bb2 Parents: 785b154 Author: Preston Carman <[email protected]> Authored: Mon Feb 24 14:02:29 2014 -0800 Committer: Preston Carman <[email protected]> Committed: Mon Feb 24 14:02:29 2014 -0800 ---------------------------------------------------------------------- .../scripts/weather_benchmark.py | 26 -------------- .../noaa-ghcn-daily/scripts/weather_cli.py | 11 ++---- .../noaa-ghcn-daily/scripts/weather_config.py | 4 +-- .../scripts/weather_data_files.py | 36 ++------------------ .../scripts/weather_download_files.py | 2 +- .../noaa-ghcn-daily/scripts/weather_example.xml | 6 ++-- .../scripts/weather_example_cluster.xml | 22 ++++++------ .../src/main/resources/scripts/cluster_cli.py | 2 +- 8 files changed, 22 insertions(+), 87 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/4d6fde6b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py index 68c93b3..6d9301e 100644 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py @@ -23,7 +23,6 @@ import socket from weather_config import * from weather_data_files import * -from collections import OrderedDict # Weather data files created to manage the conversion process. # Allows partition and picking up where you left off. @@ -143,29 +142,6 @@ class WeatherBenchmark: def get_partition_folders(self, base_path): glob.glob(base_path + "partitions/d*_p*_i*") -# test_data_path = self.base_path + "/" + self.test + "/data" -# if not os.path.isdir(test_data_path): -# os.makedirs(test_data_path) -# -# if self.test == "local_speed_up": -# for i in range(virtual_partitions): -# # one virtual partition per disk -# split = 0 -# for j in range(len(base_paths)): -# # for each disk look at each partition -# for index, path in enumerate(partition_list): -# offset = partitions * j -# group = partitions / (i + 1) -# -# if (group) * split + offset <= index and index < (group) * (split + 1) + offset: -# split += 1 -# -# test_partition_path = test_data_path + "/p" + str(i + 1) + ".i" + str(split) + ".d" + str(j + 1) -# if not os.path.isdir(test_partition_path): -# os.makedirs(test_partition_path) -# os.symlink(path, test_partition_path + "/index" + str(index)) - - def copy_query_files(self): for test in self.dataset.get_tests(): if test in self.BENCHMARK_LOCAL_TESTS: @@ -215,12 +191,10 @@ class WeatherBenchmark: sys.stdout.write(line.replace(self.QUERY_REPLACEMENT_KEY + collection, replace_string)) def get_number_of_slices(self): - print self.dataset if len(self.dataset.get_tests()) == 0: print "No test has been defined in config file." else: for test in self.dataset.get_tests(): - print "test = " + test if test in self.BENCHMARK_LOCAL_TESTS: return get_local_virtual_partitions(self.partitions) elif test in self.BENCHMARK_CLUSTER_TESTS: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/4d6fde6b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py index 92145a2..0f529f2 100644 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py @@ -34,7 +34,6 @@ COMPRESSED = False def main(argv): append = False max_records = 0 - package = "ghcnd_gsn" process_file_name = "" reset = False section = "all" @@ -79,7 +78,7 @@ def main(argv): print 'Error: Argument must be a file name for --file (-f).' sys.exit() elif opt in ('-l', "--locality"): - if arg in ("download", "progress_file", "sensor_build", "station_build", "partition", "test_links", "queries", "statistics"): + if arg in ("download", "progress_file", "sensor_build", "station_build", "partition", "test_links", "queries", "statistics"): section = arg else: print 'Error: Argument must be a string for --locality (-l) and a valid locality.' @@ -90,12 +89,6 @@ def main(argv): else: print 'Error: Argument must be an integer for --max_station_files (-m).' sys.exit() - elif opt in ('-p', "--package"): - if arg in ("all", "gsn", "hcn"): - package = "ghcnd_" + arg - else: - print 'Error: Argument must be an string for one of the known weather packages: "all", "gsn", "hcn"' - sys.exit() elif opt == '-r': reset = True elif opt == '-u': @@ -137,7 +130,7 @@ def main(argv): download.download_all_files(reset) # Unzip the required file. - download.unzip_package(package, reset) + download.unzip_package(config.get_package(), reset) # Create some basic paths for save files and references. http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/4d6fde6b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py index 9d2e289..a6513c2 100644 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py @@ -51,10 +51,10 @@ class WeatherConfig: # Node Specific Functions # -------------------------------------------------------------------------- def get_node_ip(self, node): - return self.get_text(node.getElementsByTagName("ip_address")[0]) + return self.get_text(node.getElementsByTagName("cluster_ip")[0]) def get_node_name(self, node): - return self.get_text(node.getElementsByTagName("name")[0]) + return self.get_text(node.getElementsByTagName("id")[0]) # -------------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/4d6fde6b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py index 42dea81..64b86d6 100644 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_data_files.py @@ -92,38 +92,6 @@ class WeatherDataFiles: self.close_progress_data(True) self.reset() - def create_test_links(self, save_path, xml_save_path, test, node, partitions, virtual_partitions, base_paths=[]): - if (len(base_paths) == 0): - base_paths.append(os.path.dirname(save_path)) - partition_list = sorted(get_partition_paths(partitions, base_paths)) - - test_path = save_path + "/" + test - if not os.path.isdir(test_path): - os.makedirs(test_path) - for i in range(virtual_partitions): - # one virtual partition per disk - for j in range(len(base_paths)): - for index, path in enumerate(partition_list): - offset = partitions * j - test_partition_path = test_path + "/partition" + str(i + 1) + "_disk" + str(j + 1) - if not os.path.isdir(test_partition_path): - os.makedirs(test_partition_path) - if (node <= i): - if test == "speed_up": - group = partitions / (i + 1) - elif test == "batch_scale_up": - group = partitions / virtual_partitions - else: - group = -1 - # link - if (group) * node + offset <= index and index < (group) * (node + 1) + offset: - os.symlink(path, test_partition_path + "/index" + str(index)) - else: - # fake directories - os.makedirs(test_partition_path + "/sensors") - os.makedirs(test_partition_path + "/stations") - - # Once the initial data has been generated, the data can be copied into a set number of partitions. def copy_to_n_partitions(self, save_path, partitions, base_paths=[]): if (len(base_paths) == 0): @@ -347,7 +315,7 @@ class WeatherDataFiles: break return columns[self.INDEX_DATA_FILE_NAME] -def get_partition_paths(partitions, base_paths, key = "partitions"): +def get_partition_paths(partitions, base_paths, key="partitions"): partition_paths = [] for i in range(0, partitions): for j in range(0, len(base_paths)): @@ -356,6 +324,6 @@ def get_partition_paths(partitions, base_paths, key = "partitions"): return partition_paths def get_partition_folder(disks, partitions, index): - return "d" + str(disks) +"_p" + str(partitions) + "_i" + str(index) + return "d" + str(disks) + "_p" + str(partitions) + "_i" + str(index) http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/4d6fde6b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py index 72f66bb..87adb11 100644 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_download_files.py @@ -65,7 +65,7 @@ def report_download_status(count, block, size): line_size = 50 erase = "\b" * line_size sys.stdout.write(erase) - report = get_report_line( (float(count) * block / size), line_size) + report = get_report_line((float(count) * block / size), line_size) sys.stdout.write(report) # Creates a string to be used in reporting the percentage done. http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/4d6fde6b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example.xml ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example.xml b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example.xml index 7af1e9d..4f31dff 100644 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example.xml +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example.xml @@ -17,10 +17,10 @@ <data xmlns="data"> <name>Local Example</name> <save_path>/data</save_path> - <package>all</package> + <package>ghcnd_all</package> <node> - <name>localhost</name> - <ip_address>127.0.0.1</ip_address> + <id>localhost</id> + <cluster_ip>127.0.0.1</cluster_ip> </node> <dataset> <name>tiny</name> http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/4d6fde6b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example_cluster.xml ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example_cluster.xml b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example_cluster.xml index 34be0df..87be4e3 100644 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example_cluster.xml +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example_cluster.xml @@ -17,26 +17,26 @@ <data xmlns="data"> <name>Cluster Example</name> <save_path>/data</save_path> - <package>all</package> + <package>ghcnd_all</package> <node> - <name>machine1</name> - <ip_address>127.0.0.1</ip_address> + <id>machine1</id> + <cluster_ip>127.0.0.1</cluster_ip> </node> <node> - <name>machine2</name> - <ip_address>127.0.0.2</ip_address> + <id>machine2</id> + <cluster_ip>127.0.0.2</cluster_ip> </node> <node> - <name>machine3</name> - <ip_address>127.0.0.3</ip_address> + <id>machine3</id> + <cluster_ip>127.0.0.3</cluster_ip> </node> <node> - <name>machine4</name> - <ip_address>127.0.0.4</ip_address> + <id>machine4</id> + <cluster_ip>127.0.0.4</cluster_ip> </node> <node> - <name>machine5</name> - <ip_address>127.0.0.5</ip_address> + <id>machine5</id> + <cluster_ip>127.0.0.5</cluster_ip> </node> <dataset> <name>tiny-1drive</name> http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/4d6fde6b/vxquery-server/src/main/resources/scripts/cluster_cli.py ---------------------------------------------------------------------- diff --git a/vxquery-server/src/main/resources/scripts/cluster_cli.py b/vxquery-server/src/main/resources/scripts/cluster_cli.py index 370e77f..089ad08 100644 --- a/vxquery-server/src/main/resources/scripts/cluster_cli.py +++ b/vxquery-server/src/main/resources/scripts/cluster_cli.py @@ -59,7 +59,7 @@ def main(argv): else: deploy_path = arg else: - print 'Error: Argument must be a file name for --folder (-f).' + print 'Error: Argument must be a file name for --deploy_folder (-d).' sys.exit() # Required fields to run the script.
