Benchmark additional scripts and new conf folder.

- Adding more utility scripts for the benchmark.
- Also moved the configuration files into a separate folder.


Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/b8fc3612
Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/b8fc3612
Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/b8fc3612

Branch: refs/heads/site
Commit: b8fc36122eab4320c710c9922a617670260e5c58
Parents: 6c90193
Author: Preston Carman <[email protected]>
Authored: Mon Jun 23 14:28:28 2014 -0700
Committer: Preston Carman <[email protected]>
Committed: Mon Jun 23 14:28:28 2014 -0700

----------------------------------------------------------------------
 .../noaa-ghcn-daily/conf/weather_example.xml    | 35 +++++++
 .../conf/weather_example_cluster.xml            | 58 ++++++++++++
 .../noaa-ghcn-daily/scripts/weather_example.xml | 35 -------
 .../scripts/weather_example_cluster.xml         | 58 ------------
 .../resources/util/find_averages_in_logs.py     | 97 ++++++++++++++++++++
 .../src/main/resources/util/merge_xml_files.py  | 88 ++++++++++++++++++
 6 files changed, 278 insertions(+), 93 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/vxquery/blob/b8fc3612/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/conf/weather_example.xml
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/conf/weather_example.xml 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/conf/weather_example.xml
new file mode 100644
index 0000000..2c15a33
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/conf/weather_example.xml
@@ -0,0 +1,35 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<data xmlns="data">
+    <name>Local Example</name>
+    <save_path>/data</save_path>
+    <package>ghcnd_all</package>
+    <node>
+        <id>localhost</id>
+        <cluster_ip>127.0.0.1</cluster_ip>
+    </node>
+    <dataset>
+        <name>tiny-example</name>
+        <test>local_speed_up</test>
+        <save_path>/data</save_path>
+        <partition_type>small_files</partition_type>
+        <partitions_per_path>1</partitions_per_path>
+        <partitions_per_path>2</partitions_per_path>
+        <partitions_per_path>4</partitions_per_path>
+        <partitions_per_path>8</partitions_per_path>
+    </dataset>
+</data>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b8fc3612/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/conf/weather_example_cluster.xml
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/conf/weather_example_cluster.xml
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/conf/weather_example_cluster.xml
new file mode 100644
index 0000000..7d05ac0
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/conf/weather_example_cluster.xml
@@ -0,0 +1,58 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<data xmlns="data">
+    <name>Cluster Example</name>
+    <save_path>/data</save_path>
+    <package>ghcnd_all</package>
+    <node>
+        <id>machine1</id>
+        <cluster_ip>127.0.0.1</cluster_ip>
+    </node>
+    <node>
+        <id>machine2</id>
+        <cluster_ip>127.0.0.2</cluster_ip>
+    </node>
+    <node>
+        <id>machine3</id>
+        <cluster_ip>127.0.0.3</cluster_ip>
+    </node>
+    <node>
+        <id>machine4</id>
+        <cluster_ip>127.0.0.4</cluster_ip>
+    </node>
+    <node>
+        <id>machine5</id>
+        <cluster_ip>127.0.0.5</cluster_ip>
+    </node>
+    <dataset>
+        <name>tiny-1drive</name>
+        <test>speed_up</test>
+        <test>batch_scale_out</test>
+        <save_path>/data</save_path>
+        <partition_type>small_files</partition_type>
+        <partitions_per_path>1</partitions_per_path>
+    </dataset>
+    <dataset>
+        <name>small-2drives</name>
+        <test>speed_up</test>
+        <test>batch_scale_out</test>
+        <save_path>/data</save_path>
+        <save_path>/data2</save_path>
+        <partition_type>large_files</partition_type>
+        <partitions_per_path>1</partitions_per_path>
+    </dataset>
+</data>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b8fc3612/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example.xml
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example.xml
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example.xml
deleted file mode 100644
index 2c15a33..0000000
--- 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example.xml
+++ /dev/null
@@ -1,35 +0,0 @@
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
--->
-<data xmlns="data">
-    <name>Local Example</name>
-    <save_path>/data</save_path>
-    <package>ghcnd_all</package>
-    <node>
-        <id>localhost</id>
-        <cluster_ip>127.0.0.1</cluster_ip>
-    </node>
-    <dataset>
-        <name>tiny-example</name>
-        <test>local_speed_up</test>
-        <save_path>/data</save_path>
-        <partition_type>small_files</partition_type>
-        <partitions_per_path>1</partitions_per_path>
-        <partitions_per_path>2</partitions_per_path>
-        <partitions_per_path>4</partitions_per_path>
-        <partitions_per_path>8</partitions_per_path>
-    </dataset>
-</data>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b8fc3612/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example_cluster.xml
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example_cluster.xml
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example_cluster.xml
deleted file mode 100644
index 7d05ac0..0000000
--- 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example_cluster.xml
+++ /dev/null
@@ -1,58 +0,0 @@
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
--->
-<data xmlns="data">
-    <name>Cluster Example</name>
-    <save_path>/data</save_path>
-    <package>ghcnd_all</package>
-    <node>
-        <id>machine1</id>
-        <cluster_ip>127.0.0.1</cluster_ip>
-    </node>
-    <node>
-        <id>machine2</id>
-        <cluster_ip>127.0.0.2</cluster_ip>
-    </node>
-    <node>
-        <id>machine3</id>
-        <cluster_ip>127.0.0.3</cluster_ip>
-    </node>
-    <node>
-        <id>machine4</id>
-        <cluster_ip>127.0.0.4</cluster_ip>
-    </node>
-    <node>
-        <id>machine5</id>
-        <cluster_ip>127.0.0.5</cluster_ip>
-    </node>
-    <dataset>
-        <name>tiny-1drive</name>
-        <test>speed_up</test>
-        <test>batch_scale_out</test>
-        <save_path>/data</save_path>
-        <partition_type>small_files</partition_type>
-        <partitions_per_path>1</partitions_per_path>
-    </dataset>
-    <dataset>
-        <name>small-2drives</name>
-        <test>speed_up</test>
-        <test>batch_scale_out</test>
-        <save_path>/data</save_path>
-        <save_path>/data2</save_path>
-        <partition_type>large_files</partition_type>
-        <partitions_per_path>1</partitions_per_path>
-    </dataset>
-</data>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b8fc3612/vxquery-benchmark/src/main/resources/util/find_averages_in_logs.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/find_averages_in_logs.py 
b/vxquery-benchmark/src/main/resources/util/find_averages_in_logs.py
new file mode 100644
index 0000000..1cd7939
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/util/find_averages_in_logs.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import fnmatch
+import getopt
+import glob
+import os
+import sys
+import csv
+
+SEARCH_STRING = 'Average execution time:'
+
+def find_files(directory, pattern):
+    for root, dirs, files in os.walk(directory):
+        for basename in files:
+            if fnmatch.fnmatch(basename, pattern):
+                yield (root, basename)
+    
+    
+def main(argv):
+    ''' Same as bash: find $FOLDER -type f -name "*.xml" -exec basename {} \; 
> list_xml.csv
+    '''
+    log_folder = ""
+    save_file = ""
+    data_type = ""
+    
+    # Get the base folder
+    try:
+        opts, args = getopt.getopt(argv, "f:hs:t:", ["folder=", "save_file=", 
"data_type="])
+    except getopt.GetoptError:
+        print 'The file options for list_xml_files.py were not correctly 
specified.'
+        print 'To see a full list of options try:'
+        print '  $ python list_xml_files.py -h'
+        sys.exit(2)
+    for opt, arg in opts:
+        if opt == '-h':
+            print 'Options:'
+            print '    -f        The base folder to build XML file list.'
+            print '    -s        The save file.'
+            sys.exit()
+        elif opt in ('-f', "--folder"):
+            # check if file exists.
+            if os.path.exists(arg):
+                log_folder = arg
+            else:
+                print 'Error: Argument must be a folder name for --folder 
(-f).'
+                sys.exit()
+        elif opt in ('-s', "--save_file"):
+            save_file = arg
+        elif opt in ('-t', "--data_type"):
+            data_type = arg
+  
+    # Required fields to run the script.
+    if log_folder == "" or not os.path.exists(log_folder):
+        print 'Error: The folder path option must be supplied:  --folder (-f).'
+        sys.exit()
+    if save_file == "":
+        print 'Error: The folder path option must be supplied:  --save_file 
(-s).'
+        sys.exit()
+      
+    list_xml_csv = ''
+    with open(save_file, 'w') as outfile:
+        csvfile = csv.writer(outfile)
+        for path, filename in find_files(log_folder, '*.log'):
+            # Only write out a specific type of data xml documents found in a 
specific path.
+            with open(path + "/" + filename) as infile:
+                folders = path.replace(log_folder, "")
+                for line in infile:
+                    # Skip the root tags.
+                    if line.startswith(SEARCH_STRING):
+                        time_split = line.split(" ")
+                        name_split = filename.split(".")
+                        folder_split = folders.split("/")
+
+                        # Build data row
+                        row = folder_split
+                        row.append(name_split[0])
+                        row.append(time_split[3])
+                        row.append(name_split[2])
+                        csvfile.writerow(row)
+        
+          
+if __name__ == "__main__":
+    main(sys.argv[1:])

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b8fc3612/vxquery-benchmark/src/main/resources/util/merge_xml_files.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/util/merge_xml_files.py 
b/vxquery-benchmark/src/main/resources/util/merge_xml_files.py
new file mode 100644
index 0000000..8a6952b
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/util/merge_xml_files.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import fnmatch
+import getopt
+import glob
+import os
+import sys
+
+XML_PREFIX = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?><root>' + 
"\n"
+XML_SUFFIX = '</root>' + "\n"
+
+def find_files(directory, pattern):
+    for root, dirs, files in os.walk(directory):
+        for basename in files:
+            if fnmatch.fnmatch(basename, pattern):
+                yield (root, basename)
+    
+    
+def main(argv):
+    ''' Same as bash: find $FOLDER -type f -name "*.xml" -exec basename {} \; 
> list_xml.csv
+    '''
+    xml_folder = ""
+    save_file = ""
+    data_type = ""
+     
+    # Get the base folder
+    try:
+        opts, args = getopt.getopt(argv, "f:hs:t:", ["folder=", "save_file=", 
"data_type="])
+    except getopt.GetoptError:
+        print 'The file options for list_xml_files.py were not correctly 
specified.'
+        print 'To see a full list of options try:'
+        print '  $ python list_xml_files.py -h'
+        sys.exit(2)
+    for opt, arg in opts:
+        if opt == '-h':
+            print 'Options:'
+            print '    -f        The base folder to build XML file list.'
+            print '    -s        The save file.'
+            sys.exit()
+        elif opt in ('-f', "--folder"):
+            # check if file exists.
+            if os.path.exists(arg):
+                xml_folder = arg
+            else:
+                print 'Error: Argument must be a folder name for --folder 
(-f).'
+                sys.exit()
+        elif opt in ('-s', "--save_file"):
+            save_file = arg
+        elif opt in ('-t', "--data_type"):
+            data_type = arg
+  
+    # Required fields to run the script.
+    if xml_folder == "" or not os.path.exists(xml_folder):
+        print 'Error: The folder path option must be supplied:  --folder (-f).'
+        sys.exit()
+    if save_file == "":
+        print 'Error: The folder path option must be supplied:  --save_file 
(-s).'
+        sys.exit()
+      
+    list_xml_csv = ''
+    with open(save_file, 'w') as outfile:
+        outfile.write(XML_PREFIX)
+        for path, filename in find_files(xml_folder, '*.xml'):
+            # Only write out a specific type of data xml documents found in a 
specific path.
+            if data_type in path:
+                with open(path + "/" + filename) as infile:
+                    for line in infile:
+                        # Skip the root tags.
+                        if line != XML_PREFIX and line != XML_SUFFIX:
+                            outfile.write(line)
+        outfile.write(XML_SUFFIX)
+          
+if __name__ == "__main__":
+    main(sys.argv[1:])

Reply via email to