http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_count_sensor.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_count_sensor.xq
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_count_sensor.xq
new file mode 100644
index 0000000..15b5160
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_count_sensor.xq
@@ -0,0 +1,29 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(:
+XQuery Join Query
+-------------------
+Count all the weather sensor readings on 1976-07-04.
+:)
+count(
+    let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r in collection($sensor_collection)/dataCollection/data
+    let $date := xs:date(fn:substring(xs:string(fn:data($r/date)), 0, 11))
+    where $date eq xs:date("1976-07-04")
+    return $r
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_count_station.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_count_station.xq
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_count_station.xq
new file mode 100644
index 0000000..d21fe37
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_count_station.xq
@@ -0,0 +1,28 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(:
+XQuery Join Query
+-------------------
+Count all the weather stations for Washington state.
+:)
+count(
+    let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
+    for $s in collection($station_collection)/stationCollection/station
+    where (some $x in $s/locationLabels satisfies ($x/type eq "ST" and 
fn:upper-case(fn:data($x/displayName)) eq "WASHINGTON"))
+    return $s
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq
new file mode 100644
index 0000000..c95f3f5
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq
@@ -0,0 +1,33 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: XQuery Join Aggregate Query :)
+(: Find the lowest recorded temperature (TMIN) in the United States for     :)
+(: 2001.                                                                      
:)
+fn:min(
+    let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
+    for $s in collection($station_collection)/stationCollection/station
+    
+    let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r in collection($sensor_collection)/dataCollection/data
+    
+    where $s/id eq $r/station
+        and (some $x in $s/locationLabels satisfies ($x/type eq "CNTRY" and 
$x/id eq "FIPS:US"))
+        and $r/dataType eq "TMIN" 
+        and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2001
+    return $r/value
+) div 10
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_join.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_join.xq
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_join.xq
new file mode 100644
index 0000000..76e3458
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_join.xq
@@ -0,0 +1,35 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(:
+XQuery Join Aggregate Query
+-------------------
+Find the lowest recorded temperature (TMIN) in the United States for 2001.
+:)
+fn:count(
+    let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
+    for $s in collection($station_collection)/stationCollection/station
+    
+    let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r in collection($sensor_collection)/dataCollection/data
+    
+    where $s/id eq $r/station
+        and (some $x in $s/locationLabels satisfies ($x/type eq "CNTRY" and 
$x/id eq "FIPS:US"))
+        and $r/dataType eq "TMIN" 
+        and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2001
+    return $r
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_sensor.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_sensor.xq
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_sensor.xq
new file mode 100644
index 0000000..3b1046b
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_sensor.xq
@@ -0,0 +1,31 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: 
+XQuery Join Aggregate Query
+-------------------
+Count all sensor readings for TMIN in 2001.
+:)
+count(
+    let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r in collection($sensor_collection)/dataCollection/data
+    
+    let $date := xs:date(fn:substring(xs:string(fn:data($r/date)), 0, 11))
+    where $r/dataType eq "TMIN" 
+        and fn:year-from-date($date) eq 2001
+    return $r/value
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_station.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_station.xq
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_station.xq
new file mode 100644
index 0000000..7c2a7ef
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_count_station.xq
@@ -0,0 +1,28 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: 
+XQuery Join Aggregate Query
+-------------------
+Count all stations in the United States.
+:)
+count(
+    let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
+    for $s in collection($station_collection)/stationCollection/station
+    where (some $x in $s/locationLabels satisfies ($x/type eq "CNTRY" and 
$x/id eq "FIPS:US"))
+    return $s
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06.xq 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06.xq
new file mode 100644
index 0000000..5c8ed54
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06.xq
@@ -0,0 +1,30 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: XQuery Join Query :)
+(: Find the highest recorded temperature (TMAX) for each station for each     
:)
+(: day over the year 2000.                                                    
:)
+let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
+for $s in collection($station_collection)/stationCollection/station
+
+let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+for $r in collection($sensor_collection)/dataCollection/data
+
+where $s/id eq $r/station
+    and $r/dataType eq "TMAX" 
+    and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2000
+return ($s/displayName, $r/date, $r/value)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_join.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_join.xq
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_join.xq
new file mode 100644
index 0000000..bad6406
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_join.xq
@@ -0,0 +1,34 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(:
+XQuery Join Query
+-------------------
+Find the highest recorded temperature (TMAX) for each station for each day 
over the year 2000.
+:)
+fn:count(
+    let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
+    for $s in collection($station_collection)/stationCollection/station
+    
+    let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r in collection($sensor_collection)/dataCollection/data
+    
+    where $s/id eq $r/station
+        and $r/dataType eq "TMAX" 
+        and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2000
+    return $r
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_sensor.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_sensor.xq
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_sensor.xq
new file mode 100644
index 0000000..54d81c6
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_sensor.xq
@@ -0,0 +1,29 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(:
+XQuery Join Query
+-------------------
+Count max temperature (TMAX) readings for 2000-01-01.
+:)
+count(
+    let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r in collection($sensor_collection)/dataCollection/data
+    where $r/dataType eq "TMAX" 
+       and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2000
+    return $r
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_station.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_station.xq
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_station.xq
new file mode 100644
index 0000000..c94dc78
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_count_station.xq
@@ -0,0 +1,27 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(:
+XQuery Join Query
+-------------------
+Count all the stations.
+:)
+count(
+    let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
+    for $s in collection($station_collection)/stationCollection/station
+    return $s
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07.xq 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07.xq
new file mode 100644
index 0000000..5b1f2ac
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07.xq
@@ -0,0 +1,33 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: XQuery Self Join Query :)
+(: Self join with all stations finding the difference in min and max       :)
+(: temperature and get the average.                                        :)
+fn:avg(
+    let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r_min in collection($sensor_collection_min)/dataCollection/data
+    
+    let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r_max in collection($sensor_collection_max)/dataCollection/data
+    
+    where $r_min/station eq $r_max/station
+        and $r_min/date eq $r_max/date
+        and $r_min/dataType eq "TMIN"
+        and $r_max/dataType eq "TMAX"
+    return $r_max/value - $r_min/value
+) div 10
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_join.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_join.xq
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_join.xq
new file mode 100644
index 0000000..0ddada0
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_join.xq
@@ -0,0 +1,35 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(:
+XQuery Join Query
+-------------------
+Find the all the records for TMIN.
+:)
+fn:count(
+    let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r_min in collection($sensor_collection_min)/dataCollection/data
+    
+    let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r_max in collection($sensor_collection_max)/dataCollection/data
+    
+    where $r_min/station eq $r_max/station
+        and $r_min/date eq $r_max/date
+        and $r_min/dataType eq "TMIN"
+        and $r_max/dataType eq "TMAX"
+    return $r_max
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_tmax.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_tmax.xq
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_tmax.xq
new file mode 100644
index 0000000..0b5511f
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_tmax.xq
@@ -0,0 +1,28 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(:
+XQuery Join Query
+-------------------
+Find the all the records for TMAX.
+:)
+count(
+    let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r_max in collection($sensor_collection_max)/dataCollection/data
+    where $r_max/dataType eq "TMAX"
+    return $r_max
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_tmin.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_tmin.xq
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_tmin.xq
new file mode 100644
index 0000000..fda029a
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_tmin.xq
@@ -0,0 +1,28 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(:
+XQuery Join Query
+-------------------
+Find the all the records for TMIN.
+:)
+count(
+    let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r_min in collection($sensor_collection_min)/dataCollection/data
+    where $r_min/dataType eq "TMIN"
+    return $r_min
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/README.md
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/README.md 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/README.md
new file mode 100644
index 0000000..58bea51
--- /dev/null
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/README.md
@@ -0,0 +1,51 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+Weather Data Conversion To XML
+=====================
+
+# Introduction
+
+The NOAA has hosted DAILY GLOBAL HISTORICAL CLIMATOLOGY NETWORK (GHCN-DAILY) 
+.dat files. Weather.gov has an RSS/XML feed that gives current weather sensor 
+readings. Using the RSS feed as a template, the GHCN-DAILY historical 
+information is used to generate past RSS feed XML documents. The process 
allows 
+testing on a large set of information with out having to continually monitor 
+the weather.gov site for all the weather details for years.
+
+# Detailed Description
+
+Detailed GHDN-DAILY information: 
+<http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt>
+
+The process takes a save folder for the data. The folder contains a several 
+folders:
+
+ - all_xml_files (The generated xml files for a given package)
+ - downloads (All files taken from the NOAA HTTP site)
+ - dataset-[name] (all files related to a single dataset)
+     
+     
+# Examples commands
+
+Building
+
+
+Partitioning
+python weather_cli.py -x weather_example.xml
+
+Linking
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties
new file mode 100644
index 0000000..2fb0af0
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties
@@ -0,0 +1 @@
+java.util.logging.ConsoleHandler.level=OFF
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
new file mode 100755
index 0000000..88339bd
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#      http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Run all the queries and save a log. 
+# First argument: Supply the folder which houses all the queries (recursive).
+# Second argument: adds options to the VXQuery CLI.
+#
+# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/
+# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ 
"-client-net-ip-address 169.235.27.138"
+# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "" q03
+#
+REPEAT=5
+IGNORE=2
+FRAME_SIZE=$((8*1024))
+BUFFER_SIZE=$((32*1024*1024))
+JOIN_HASH_SIZE=-1
+
+if [ -z "${1}" ]
+then
+    echo "Please supply a directory for query files to be found."
+    exit
+fi
+
+export JAVA_OPTS="$JAVA_OPTS -server -Xmx8G -XX:+HeapDumpOnOutOfMemoryError 
-Djava.util.logging.config.file=./vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties"
+
+for j in $(find ${1} -name '*q??.xq')
+do
+    if [ -z "${3}" ] || [[ "${j}" =~ "${3}" ]] 
+    then
+        date
+        echo "Running query: ${j}"
+        log_file="$(basename ${j}).$(date +%Y%m%d%H%M).log"
+        log_base_path=$(dirname ${j/queries/query_logs})
+        mkdir -p ${log_base_path}
+        time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${2} -timing 
-showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size 
${BUFFER_SIZE} -join-hash-size ${JOIN_HASH_SIZE} -repeatexec ${REPEAT} > 
${log_base_path}/${log_file} 2>&1
+        echo "\nBuffer Size: ${BUFFER_SIZE}" >> ${log_base_path}/${log_file}
+        echo "\nFrame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file}
+        echo "\nJoin Hash Size: ${JOIN_HASH_SIZE}" >> 
${log_base_path}/${log_file}
+        fi;
+done
+
+if which programname >/dev/null;
+then
+    echo "Sending out e-mail notification."
+    SUBJECT="Benchmark Tests Finished"
+    EMAIL="[email protected]"
+    /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
+    Completed all tests in folder ${1}.
+    EOM
+else
+    echo "No mail command to use."
+fi;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
new file mode 100755
index 0000000..98ab04b
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
@@ -0,0 +1,90 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#      http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Run all the queries and save a log. 
+# First argument: Supply the folder which houses all the queries (recursive).
+# Second argument: adds options to the VXQuery CLI.
+#
+# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/
+# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ 
"-client-net-ip-address 169.235.27.138"
+# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "" q03
+#
+CLUSTER="uci"
+REPEAT=5
+FRAME_SIZE=$((8*1024))
+BUFFER_SIZE=$((32*1024*1024))
+#JOIN_HASH_SIZE=$((256*1024*1024))
+JOIN_HASH_SIZE=-1
+
+if [ -z "${1}" ]
+then
+    echo "Please supply a directory for query files to be found."
+    exit
+fi
+
+if [ -z "${2}" ]
+then
+    echo "Please the number of nodes (start at 0)."
+    exit
+fi
+
+# Run queries for the specified number of nodes.
+echo "Starting ${2} cluster nodes"
+python vxquery-server/src/main/resources/scripts/cluster_cli.py -c 
vxquery-server/src/main/resources/conf/${CLUSTER}/${2}nodes.xml -a start
+
+# wait for cluster to finish setting up  
+sleep 5
+
+export JAVA_OPTS="$JAVA_OPTS -server -Xmx8G -XX:+HeapDumpOnOutOfMemoryError 
-Djava.util.logging.config.file=./vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties"
+
+for j in $(find ${1} -name '*q??.xq')
+do
+    # Only work with i nodes.
+    if [[ "${j}" =~ "${2}nodes" ]]
+    then
+        # Only run for specified queries.
+        if [ -z "${4}" ] || [[ "${j}" =~ "${4}" ]]
+        then
+            date
+            echo "Running query: ${j}"
+            log_file="$(basename ${j}).$(date +%Y%m%d%H%M).log"
+            log_base_path=$(dirname ${j/queries/query_logs})
+            mkdir -p ${log_base_path}
+            time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${3} 
-timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size 
${BUFFER_SIZE} -join-hash-size ${JOIN_HASH_SIZE} -repeatexec ${REPEAT} > 
${log_base_path}/${log_file} 2>&1
+            echo "\nBuffer Size: ${BUFFER_SIZE}" >> 
${log_base_path}/${log_file}
+            echo "\nFrame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file}
+            echo "\nJoin Hash Size: ${JOIN_HASH_SIZE}" >> 
${log_base_path}/${log_file}
+        fi;
+    fi;
+done
+    
+# Stop cluster.
+python vxquery-server/src/main/resources/scripts/cluster_cli.py -c 
vxquery-server/src/main/resources/conf/${CLUSTER}/${2}nodes.xml -a stop
+
+if which programname >/dev/null;
+then
+    echo "Sending out e-mail notification."
+    SUBJECT="Benchmark Cluster Tests Finished"
+    EMAIL="[email protected]"
+    /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
+    Completed all tests in folder ${1} for a ${2} node cluster using 
${HOSTNAME}.
+    EOM
+else
+    echo "No mail command to use."
+fi;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh
new file mode 100755
index 0000000..58976b7
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#      http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+DATASET="dataset-hcn-d2"
+cluster_ip=${1}
+base_weather_folder=${2}
+
+for n in 7 6 5 3 4 2 1 0
+do
+    #for t in "batch_scale_out" "speed_up"
+    for t in "batch_scale_out"
+    #for t in "speed_up"
+    do 
+        for p in 2 
+        do 
+            for c in 4
+            do 
+                echo " ==== node ${n} test ${t} partition ${p} cores ${c} ===="
+                sh 
vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
 ${base_weather_folder}/${DATASET}/queries/${t}/${n}nodes/d2_p${p}/ ${n} 
"-client-net-ip-address ${cluster_ip} -available-processors ${c}"
+            done
+        done
+    done
+done
+
+if which programname >/dev/null;
+then
+    echo "Sending out e-mail notification."
+    SUBJECT="Benchmark Group Tests Finished"
+    EMAIL="[email protected]"
+    /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
+    Completed all tests in the predefined group for ${DATASET}.
+    EOM
+else
+    echo "No mail command to use."
+fi;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
new file mode 100755
index 0000000..a6788be
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#      http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+export JAVA_HOME=/home/ecarm002/java/jdk1.6.0_45
+REPEAT=${1}
+DATASET="hcn"
+
+for n in `seq 0 7`
+#for n in 0
+do
+    date
+    echo "Running q0${n} on ${DATASET} for MRQL."
+    time for i in {1..${REPEAT}}; do ~/mrql/incubator-mrql/bin/mrql -dist 
-nodes 5 
~/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_${DATASET}/q0${n}.mrql
 >> weather_data/mrql/query_logs/${DATASET}/q0${n}.mrql.log 2>&1; done; 
+done
+
+if which programname >/dev/null;
+then
+    echo "Sending out e-mail notification."
+    SUBJECT="MRQL Tests Finished (${DATASET})"
+    EMAIL="[email protected]"
+    /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
+    Completed all MRQL tests on ${DATASET}.
+    EOM
+else
+    echo "No mail command to use."
+fi;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
new file mode 100644
index 0000000..4f81f86
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
@@ -0,0 +1,377 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os.path
+import linecache
+import distutils.core
+import fileinput
+import socket
+
+from weather_config import *
+from weather_data_files import *
+
+# Weather data files created to manage the conversion process.
+# Allows partition and picking up where you left off.
+#
+# benchmark_name/
+#   data/
+#   queries/
+#   logs/
+class WeatherBenchmark:
+
+    DATA_LINKS_FOLDER = "data_links/"
+    LARGE_FILE_ROOT_TAG = WeatherDataFiles.LARGE_FILE_ROOT_TAG
+    QUERY_REPLACEMENT_KEY = "/tmp/1.0_partition_ghcnd_all_xml/"
+    QUERY_MASTER_FOLDER = "../queries/"
+    QUERY_FILE_LIST = [
+                       "q00.xq",
+                       "q01.xq",
+                       "q02.xq",
+                       "q03.xq",
+                       "q04.xq",
+                       "q05.xq",
+                       "q06.xq",
+                       "q07.xq"
+                       ] 
+    QUERY_UTILITY_LIST = [
+                          "no_result.xq",
+                          "sensor_count.xq",
+                          "station_count.xq",
+                          "q04_sensor.xq",
+                          "q04_station.xq",
+                          "q05_sensor.xq",
+                          "q05_station.xq",
+                          "q06_sensor.xq",
+                          "q06_station.xq",
+                          "q07_tmin.xq",
+                          "q07_tmax.xq",
+                          ] 
+    BENCHMARK_LOCAL_TESTS = ["local_speed_up", "local_batch_scale_out"] 
+    BENCHMARK_CLUSTER_TESTS = ["speed_up", "batch_scale_out"] 
+    QUERY_COLLECTIONS = ["sensors", "stations"]
+
+    SEPERATOR = "|"
+    
+    def __init__(self, base_paths, partitions, dataset, nodes):
+        self.base_paths = base_paths
+        self.partitions = partitions
+        self.dataset = dataset
+        self.nodes = nodes
+        
+    def print_partition_scheme(self):
+        if (len(self.base_paths) == 0):
+            return
+        for test in self.dataset.get_tests():
+            if test in self.BENCHMARK_LOCAL_TESTS:
+                self.print_local_partition_schemes(test)
+            elif test in self.BENCHMARK_CLUSTER_TESTS:
+                self.print_cluster_partition_schemes(test)
+            else:
+                print "Unknown test."
+                exit()
+            
+    def print_local_partition_schemes(self, test):
+        node_index = 0
+        virtual_disk_partitions = 
get_local_virtual_disk_partitions(self.partitions)
+        for p in self.partitions:
+            scheme = self.get_local_partition_scheme(test, p)
+            self.print_partition_schemes(virtual_disk_partitions, scheme, 
test, p, node_index)
+        
+    def print_cluster_partition_schemes(self, test):
+        node_index = self.get_current_node_index()
+        virtual_disk_partitions = 
get_cluster_virtual_disk_partitions(self.nodes, self.partitions)
+        for p in self.partitions:
+            scheme = self.get_cluster_partition_scheme(test, p)
+            self.print_partition_schemes(virtual_disk_partitions, scheme, 
test, p, node_index)
+        
+    def print_partition_schemes(self, virtual_partitions, scheme, test, 
partitions, node_id):
+        print
+        print "---------------- Partition Scheme --------------------"
+        print "    Test: " + test
+        print "    Virtual Partitions: " + str(virtual_partitions)
+        print "    Disks: " + str(len(self.base_paths))
+        print "    Partitions: " + str(partitions)
+        print "    Node Id: " + str(node_id)
+        
+        if isinstance(scheme, (tuple, list, dict, set)) and len(scheme) > 0:
+            folder_length = len(scheme[0][3]) + 5
+            row_format = "{:>5} {:>5} {:>5} {:<" + str(folder_length) + "} 
{:<" + str(folder_length) + "}"
+            HEADER = ("Disk", "Index", "Link", "Data Path", "Link Path")
+            print row_format.format(*HEADER)
+            for row in scheme:
+                print row_format.format(*row)
+            print
+        else:
+            print "    Scheme is EMPTY."
+
+    def get_local_partition_scheme(self, test, partition):
+        scheme = []
+        virtual_disk_partitions = 
get_local_virtual_disk_partitions(self.partitions)
+        data_schemes = get_disk_partition_scheme(0, virtual_disk_partitions, 
self.base_paths)
+        link_base_schemes = get_disk_partition_scheme(0, partition, 
self.base_paths, self.DATA_LINKS_FOLDER + test)
+
+        # Match link paths to real data paths.
+        group_size = len(data_schemes) / len(link_base_schemes)
+        for d in range(len(self.base_paths)):
+            offset = 0
+            for link_node, link_disk, link_virtual, link_index, link_path in 
link_base_schemes:
+                if d == link_disk:
+                    # Only consider a single disk at a time.
+                    for data_node, data_disk, data_virtual, data_index, 
data_path in data_schemes:
+                        if test == "local_speed_up" and data_disk == link_disk 
\
+                                and offset <= data_index and data_index < 
offset + group_size:
+                            scheme.append([data_disk, data_index, link_index, 
data_path, link_path])
+                        elif test == "local_batch_scale_out" and data_disk == 
link_disk \
+                                and data_index == link_index:
+                            scheme.append([data_disk, data_index, link_index, 
data_path, link_path])
+                    offset += group_size
+        return scheme
+    
+    def get_cluster_partition_scheme(self, test, partition):
+        node_index = self.get_current_node_index()
+        if node_index == -1:
+            print "Unknown host."
+            return 
+        
+        scheme = []
+        virtual_disk_partitions = 
get_cluster_virtual_disk_partitions(self.nodes, self.partitions)
+        data_schemes = get_disk_partition_scheme(node_index, 
virtual_disk_partitions, self.base_paths)
+        link_base_schemes = get_cluster_link_scheme(len(self.nodes), 
partition, self.base_paths, self.DATA_LINKS_FOLDER + test)
+
+        # Match link paths to real data paths.
+        for link_node, link_disk, link_virtual, link_index, link_path in 
link_base_schemes:
+            # Prep
+            if test == "speed_up":
+                group_size = virtual_disk_partitions / (link_node + 1) / 
partition
+            elif test == "batch_scale_out":
+                group_size = virtual_disk_partitions / len(self.nodes) / 
partition
+            else:
+                print "Unknown test."
+                return
+            
+            node_offset = group_size * node_index * partition
+            node_offset += group_size * link_index
+            has_data = True
+            if link_node < node_index:
+                has_data = False
+    
+            # Make links
+            for date_node, data_disk, data_virtual, data_index, data_path in 
data_schemes:
+                if has_data and data_disk == link_disk \
+                        and node_offset <= data_index and data_index < 
node_offset + group_size:
+                    scheme.append([link_disk, data_index, link_index, 
data_path, link_path])
+            scheme.append([link_disk, -1, link_index, "", link_path])
+        return scheme
+    
+    def build_data_links(self, reset):
+        if (len(self.base_paths) == 0):
+            return
+        if reset:
+            shutil.rmtree(self.base_paths[0] + self.DATA_LINKS_FOLDER)
+        for test in self.dataset.get_tests():
+            if test in self.BENCHMARK_LOCAL_TESTS:
+                for i in self.partitions:
+                    scheme = self.get_local_partition_scheme(test, i)
+                    self.build_data_links_scheme(scheme)
+                if 1 in self.partitions and len(self.base_paths) > 1:
+                    scheme = self.build_data_links_local_zero_partition(test)
+                    self.build_data_links_scheme(scheme)
+            elif test in self.BENCHMARK_CLUSTER_TESTS:
+                for i in self.partitions:
+                    scheme = self.get_cluster_partition_scheme(test, i)
+                    self.build_data_links_scheme(scheme)
+                if 1 in self.partitions and len(self.base_paths) > 1:
+                    scheme = self.build_data_links_cluster_zero_partition(test)
+                    self.build_data_links_scheme(scheme)
+            else:
+                print "Unknown test."
+                exit()
+    
+    def build_data_links_scheme(self, scheme):
+        '''Build all the data links based on the scheme information.'''
+        for (data_disk, data_index, partition, data_path, link_path) in scheme:
+            self.add_collection_links_for(data_path, link_path, data_index)
+    
+    def build_data_links_cluster_zero_partition(self, test):
+        '''Build a scheme for all data in one symbolically linked folder. (0 
partition)'''
+        scheme = []
+        link_base_schemes = get_cluster_link_scheme(len(self.nodes), 1, 
self.base_paths, self.DATA_LINKS_FOLDER + test)
+        for link_node, link_disk, link_virtual, link_index, link_path in 
link_base_schemes:
+            new_link_path = self.get_zero_partition_path(link_node, 
self.DATA_LINKS_FOLDER + test + "/" + str(link_node) + "nodes")
+            scheme.append([0, link_disk, 0, link_path, new_link_path])
+        return scheme
+
+    def build_data_links_local_zero_partition(self, test):
+        '''Build a scheme for all data in one symbolically linked folder. (0 
partition)'''
+        scheme = []
+        index = 0
+        link_base_schemes = get_disk_partition_scheme(0, 1, self.base_paths, 
self.DATA_LINKS_FOLDER + test)
+        for link_node, link_disk, link_virtual, link_index, link_path in 
link_base_schemes:
+            if test == "local_batch_scale_out" and index > 0:
+                continue
+            new_link_path = self.get_zero_partition_path(link_node, 
self.DATA_LINKS_FOLDER + test)
+            scheme.append([0, index, 0, link_path, new_link_path])
+            index += 1
+        return scheme
+
+    def get_zero_partition_path(self, node, key):
+        '''Return a partition path for the zero partition.'''
+        base_path = self.base_paths[0]
+        new_link_path = get_disk_partition_scheme(node, 1, [base_path], 
key)[0][PARTITION_INDEX_PATH]
+        return new_link_path.replace("p1", "p0")
+        
+    def get_current_node_index(self):
+        found = False
+        node_index = 0
+        for machine in self.nodes:
+            if socket.gethostname().startswith(machine.get_node_name()):
+                found = True
+                break
+            node_index += 1
+    
+        if found:
+            return node_index
+        else:
+            return -1
+    
+    def add_collection_links_for(self, real_path, link_path, index):
+        for collection in self.QUERY_COLLECTIONS:
+            collection_path = link_path + collection + "/"
+            collection_index = collection_path + "index" + str(index)
+            if not os.path.isdir(collection_path):
+                os.makedirs(collection_path)
+            if index >= 0:
+                if os.path.islink(collection_index):
+                    os.unlink(collection_index)
+                os.symlink(real_path + collection + "/", collection_index)
+            
+    def copy_query_files(self, reset):
+        for test in self.dataset.get_tests():
+            if test in self.BENCHMARK_LOCAL_TESTS:
+                self.copy_local_query_files(test, reset)
+            elif test in self.BENCHMARK_CLUSTER_TESTS:
+                self.copy_cluster_query_files(test, reset)
+            else:
+                print "Unknown test."
+                exit()
+            
+    def copy_cluster_query_files(self, test, reset):
+        '''Determine the data_link path for cluster query files and copy with
+        new location for collection.'''
+        if 1 in self.partitions and len(self.base_paths) > 1:
+            for n in range(len(self.nodes)):
+                query_path = get_cluster_query_path(self.base_paths, test, 0, 
n)
+                prepare_path(query_path, reset)
+            
+                # Copy query files.
+                new_link_path = self.get_zero_partition_path(n, 
self.DATA_LINKS_FOLDER + test + "/" + str(n) + "nodes")
+                self.copy_and_replace_query(query_path, [new_link_path])
+        for n in range(len(self.nodes)):
+            for p in self.partitions:
+                query_path = get_cluster_query_path(self.base_paths, test, p, 
n)
+                prepare_path(query_path, reset)
+            
+                # Copy query files.
+                partition_paths = get_disk_partition_paths(n, p, 
self.base_paths, self.DATA_LINKS_FOLDER + test + "/" + str(n) + "nodes")
+                self.copy_and_replace_query(query_path, partition_paths)
+
+    def copy_local_query_files(self, test, reset):
+        '''Determine the data_link path for local query files and copy with
+        new location for collection.'''
+        if 1 in self.partitions and len(self.base_paths) > 1:
+            query_path = get_local_query_path(self.base_paths, test, 0)
+            prepare_path(query_path, reset)
+    
+            # Copy query files.
+            new_link_path = self.get_zero_partition_path(0, 
self.DATA_LINKS_FOLDER + test)
+            self.copy_and_replace_query(query_path, [new_link_path])
+        for p in self.partitions:
+            query_path = get_local_query_path(self.base_paths, test, p)
+            prepare_path(query_path, reset)
+    
+            # Copy query files.
+            partition_paths = get_disk_partition_paths(0, p, self.base_paths, 
self.DATA_LINKS_FOLDER + test)
+            self.copy_and_replace_query(query_path, partition_paths)
+
+    def copy_and_replace_query(self, query_path, replacement_list):
+        '''Copy the query files over to the query_path and replace the path
+        for the where the collection data is located.'''
+        for query_file in self.QUERY_FILE_LIST + self.QUERY_UTILITY_LIST:
+            shutil.copyfile(self.QUERY_MASTER_FOLDER + query_file, query_path 
+ query_file)
+        
+            # Make a search replace for each collection.
+            for collection in self.QUERY_COLLECTIONS:
+                replacement_list_with_type = []
+                for replace in replacement_list:
+                    replacement_list_with_type.append(replace + collection)
+
+                replace_string = 
self.SEPERATOR.join(replacement_list_with_type)
+                for line in fileinput.input(query_path + query_file, True):
+                    sys.stdout.write(line.replace(self.QUERY_REPLACEMENT_KEY + 
collection, replace_string))
+                    
+            # Make a search replace for partition type.
+            if self.dataset.get_partition_type() == "large_files":
+                for line in fileinput.input(query_path + query_file, True):
+                    sys.stdout.write(line.replace("/stationCollection", "/" + 
self.LARGE_FILE_ROOT_TAG + "/stationCollection"))
+                for line in fileinput.input(query_path + query_file, True):
+                    sys.stdout.write(line.replace("/dataCollection", "/" + 
self.LARGE_FILE_ROOT_TAG + "/dataCollection"))
+                    
+    def get_number_of_slices_per_disk(self):
+        if len(self.dataset.get_tests()) == 0:
+            print "No test has been defined in config file."
+        else:
+            for test in self.dataset.get_tests():
+                if test in self.BENCHMARK_LOCAL_TESTS:
+                    return get_local_virtual_disk_partitions(self.partitions)
+                elif test in self.BENCHMARK_CLUSTER_TESTS:
+                    return get_cluster_virtual_disk_partitions(self.nodes, 
self.partitions)
+                else:
+                    print "Unknown test."
+                    exit()
+
+def get_cluster_link_scheme(nodes, partition, base_paths, key="partitions"):   
     
+    link_paths = []
+    for n in range(0, nodes):
+        new_link_path = get_disk_partition_scheme(n, partition, base_paths, 
key + "/" + str(n) + "nodes")
+        link_paths.extend(new_link_path)
+    return link_paths
+
+def get_local_query_path(base_paths, test, partition):        
+    return base_paths[0] + "queries/" + test + "/" + 
get_local_query_folder(len(base_paths), partition) + "/"
+
+def get_local_query_folder(disks, partitions):        
+    return "d" + str(disks) + "_p" + str(partitions)
+
+def get_cluster_query_path(base_paths, test, partition, nodes):        
+    return base_paths[0] + "queries/" + test + "/" + str(nodes) + "nodes/" + 
get_local_query_folder(len(base_paths), partition) + "/"
+
+def get_cluster_virtual_disk_partitions(nodes, partitions):
+    vp = get_local_virtual_disk_partitions(partitions)
+    vn = calculate_partitions(range(1, len(nodes)+1, 1))
+    return vp * vn
+
+def get_local_virtual_disk_partitions(partitions):
+    return calculate_partitions(partitions)
+
+def calculate_partitions(list):
+    x = 1
+    for i in list:
+        if x % i != 0:
+            if i % x == 0:
+                x = i
+            else:
+                x *= i
+    return x

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
new file mode 100644
index 0000000..eeae25c
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
@@ -0,0 +1,236 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys, getopt
+
+# Custom modules.
+from weather_data_files import *
+from weather_download_files import *
+from weather_convert_to_xml import *
+from weather_config import *
+from weather_benchmark import *
+
+DEBUG_OUTPUT = False
+
+#
+# Weather conversion for GHCN-DAILY files to xml.
+#
+# http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt
+#
+def main(argv):
+    append = False
+    max_records = 0
+    process_file_name = ""
+    reset = False
+    section = "all"
+    token = ""
+    update = False
+    xml_config_path = ""
+    
+    try:
+        opts, args = getopt.getopt(argv, "af:hl:m:ruvw:x:", ["file=", 
"locality=", "max_station_files=", "web_service=", "xml_config="])
+    except getopt.GetoptError:
+        print 'The file options for weather_cli.py were not correctly 
specified.'
+        print 'To see a full list of options try:'
+        print '  $ python weather_cli.py -h'
+        sys.exit(2)
+    for opt, arg in opts:
+        if opt == '-h':
+            print 'Converting weather daily files to xml options:'
+            print '    -a        Append the results to the progress file.'
+            print '    -f (str)  The file name of a specific station to 
process.'
+            print '              * Helpful when testing a single stations XML 
file output.'
+            print '    -l (str)  Select the locality of the scripts execution 
(download, progress_file, sensor_build, station_build, partition, 
partition_scheme, test_links, queries, inventory, statistics).'
+            print '    -m (int)  Limits the number of files created for each 
station.'
+            print '              * Helpful when testing to make sure all 
elements are supported for each station.'
+            print '              Alternate form: --max_station_files=(int)'
+            print '    -r        Reset the build process. (For one section or 
all sections depending on other parameters.)'
+            print '    -u        Recalculate the file count and data size for 
each data source file.'
+            print '    -v        Extra debug information.'
+            print '    -w (str)  Downloads the station XML file form the web 
service.'
+            print '    -x (str)  XML config file for weather data.'
+            sys.exit()
+        elif opt in ('-a', "--append"):
+            append = True
+        elif opt in ('-f', "--file"):
+            # check if file exists.
+            if os.path.exists(arg):
+                process_file_name = arg
+            else:
+                print 'Error: Argument must be a file name for --file (-f).'
+                sys.exit()
+        elif opt in ('-l', "--locality"):
+            if arg in ("download", "progress_file", "sensor_build", 
"station_build", "partition", "partition_scheme", "test_links", "queries", 
"inventory", "statistics"):
+                section = arg
+            else:
+                print 'Error: Argument must be a string for --locality (-l) 
and a valid locality.'
+                sys.exit()
+        elif opt in ('-m', "--max_station_files"):
+            if arg.isdigit():
+                max_records = int(arg)
+            else:
+                print 'Error: Argument must be an integer for 
--max_station_files (-m).'
+                sys.exit()
+        elif opt == '-r':
+            reset = True
+        elif opt == '-u':
+            update = True
+        elif opt == '-v':
+            global DEBUG_OUTPUT
+            DEBUG_OUTPUT = True
+        elif opt == '-w':
+            # check if file exists.
+            if arg is not "":
+                token = arg
+            else:
+                print 'Error: Argument must be a string --web_service (-w).'
+                sys.exit()
+        elif opt in ('-x', "--xml_config"):
+            # check if file exists.
+            if os.path.exists(arg):
+                xml_config_path = arg
+            else:
+                print 'Error: Argument must be a xml file for --xml_config 
(-x).'
+                sys.exit()
+
+    # Required fields to run the script.
+    if xml_config_path == "" or not os.path.exists(xml_config_path):
+        print 'Error: The xml config option must be supplied: --xml_config 
(-x).'
+        sys.exit()
+    config = WeatherConfig(xml_config_path)
+    
+    # Required fields to run the script.
+    if config.get_save_path() == "" or not 
os.path.exists(config.get_save_path()):
+        print 'Error: The save directory option must be supplied in the config 
file.'
+        sys.exit()
+
+    # Set up downloads folder.
+    download_path = config.get_save_path() + "/downloads"
+    if section in ("all", "download"):
+        print 'Processing the download section.'
+        download = WeatherDownloadFiles(download_path)
+        download.download_ghcnd_files(reset)
+        download.download_mshr_files(reset)
+
+        # Unzip the required file.
+        download.unzip_ghcnd_package(config.get_package(), reset)
+        download.unzip_mshr_files(reset)
+
+
+    # Create some basic paths for save files and references.
+    ghcnd_data_dly_path = download_path + '/' + config.get_package() + '/' + 
config.get_package()
+    xml_data_save_path = config.get_save_path() + '/all_xml_files/'
+
+    # Make sure the xml folder is available.
+    if not os.path.isdir(xml_data_save_path):
+        os.makedirs(xml_data_save_path)
+
+    # Set up the XML build objects.
+    convert = WeatherWebServiceMonthlyXMLFile(download_path, 
xml_data_save_path, DEBUG_OUTPUT)
+    progress_file = xml_data_save_path + "_data_progress.csv"
+    data = WeatherDataFiles(ghcnd_data_dly_path, progress_file)
+    if section in ("all", "progress_file"):
+        print 'Processing the progress_file section.'
+        options = list()
+        if append:
+            options.append('append')
+        if update:
+            options.append('recalculate')
+        if reset:
+            options.append('reset')
+        data.build_progress_file(options, convert)
+    
+    if section in ("all", "sensor_build"):
+        print 'Processing the sensor_build section.'
+        if process_file_name is not "":
+            # process a single file
+            if os.path.exists(process_file_name):
+                (file_count, data_size) = 
convert.process_sensor_file(process_file_name, max_records, 4)
+                data.update_file_sensor_status(process_file_name, 
WeatherDataFiles.DATA_FILE_GENERATED, file_count, data_size)
+            else:
+                data.update_file_sensor_status(process_file_name, 
WeatherDataFiles.DATA_FILE_MISSING)
+        else:
+            # process directory
+            data.reset()
+            data.set_type("sensor")
+            data.set_data_reset(reset)
+            for file_name in data:
+                file_path = ghcnd_data_dly_path + '/' + file_name
+                if os.path.exists(file_path):
+                    (file_count, data_size) = 
convert.process_sensor_file(file_path, max_records, 4)
+                    data.update_file_sensor_status(file_name, 
WeatherDataFiles.DATA_FILE_GENERATED, file_count, data_size)
+                else:
+                    data.update_file_sensor_status(file_name, 
WeatherDataFiles.DATA_FILE_MISSING)
+                
+    if section in ("all", "station_build"):
+        print 'Processing the station_build section.'
+        data.reset()
+        data.set_type("station")
+        data.set_data_reset(reset)
+        if token is not "":
+            convert.set_token(token)
+        for file_name in data: 
+            file_path = ghcnd_data_dly_path + '/' + file_name
+            if os.path.exists(file_path):
+                return_status = convert.process_station_file(file_path)
+                status = data.get_station_status(return_status)
+                data.update_file_station_status(file_name, status)
+            else:
+                data.update_file_station_status(file_name, 
WeatherDataFiles.DATA_FILE_MISSING)
+                    
+    for dataset in config.get_dataset_list():
+        # Set up the setting for each dataset.
+        dataset_folder = "/dataset-" + dataset.get_name()
+        progress_file = config.get_save_path() + dataset_folder + 
"/_data_progress.csv"
+        data = WeatherDataFiles(ghcnd_data_dly_path, progress_file)
+
+        base_paths = []
+        for paths in dataset.get_save_paths():
+            base_paths.append(paths + dataset_folder + "/")
+        benchmark = WeatherBenchmark(base_paths, dataset.get_partitions(), 
dataset, config.get_node_machine_list())
+        
+        if section in ("all", "partition", "partition_scheme"):
+            slices = benchmark.get_number_of_slices_per_disk()
+            print 'Processing the partition section (' + dataset.get_name() + 
':d' + str(len(base_paths)) + ':s' + str(slices) + ').'
+            data.reset()
+            if section == "partition_scheme":
+                benchmark.print_partition_scheme()
+            else:
+                if dataset.get_partition_type() == "large_files":
+                    data.build_to_n_partition_files(xml_data_save_path, 
slices, base_paths, reset)
+                else:
+                    data.copy_to_n_partitions(xml_data_save_path, slices, 
base_paths, reset)
+    
+        if section in ("all", "test_links"):
+            # TODO determine current node 
+            print 'Processing the test links section (' + dataset.get_name() + 
').'
+            benchmark.print_partition_scheme()
+            benchmark.build_data_links(reset)
+
+        if section in ("all", "queries"):
+            print 'Processing the queries section (' + dataset.get_name() + 
').'
+            benchmark.copy_query_files(reset)
+    
+    if section in ("inventory"):
+        print 'Processing the inventory section.'
+        convert.process_inventory_file()
+                  
+#     if section in ("statistics"):
+#         print 'Processing the statistics section.'
+#         data.print_progress_file_stats(convert)
+                  
+if __name__ == "__main__":
+    main(sys.argv[1:])

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py
new file mode 100644
index 0000000..80607b8
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py
@@ -0,0 +1,134 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from xml.dom.minidom import parse
+
+class WeatherConfig:
+    def __init__(self, config_xml_file):
+        self.config_xml_file = config_xml_file
+        
+        self.config = parse(self.config_xml_file)
+
+    def get_save_path(self):
+        return self.get_text(self.config.getElementsByTagName("save_path")[0])
+
+    def get_package(self):
+        return self.get_text(self.config.getElementsByTagName("package")[0])
+
+    def get_node_machine_list(self):
+        nodes = []
+        for node in self.config.getElementsByTagName("node"):
+            id = self.get_node_name(node)
+            ip = self.get_node_ip(node)
+            nodes.append(Machine(id, ip))
+        return nodes
+
+    def get_dataset_list(self):
+        nodes = []
+        for node in self.config.getElementsByTagName("dataset"):
+            name = self.get_dataset_name(node)
+            save_paths = self.get_dataset_save_paths(node)
+            partition_type = self.get_dataset_partition_type(node)
+            partitions = self.get_dataset_partitions(node)
+            tests = self.get_dataset_tests(node)
+            nodes.append(Dataset(name, save_paths, partition_type, partitions, 
tests))
+        return nodes
+
+
+    # 
--------------------------------------------------------------------------
+    # Node Specific Functions
+    # 
--------------------------------------------------------------------------
+    def get_node_ip(self, node):
+        return self.get_text(node.getElementsByTagName("cluster_ip")[0])
+
+    def get_node_name(self, node):
+        return self.get_text(node.getElementsByTagName("id")[0])
+
+    
+    # 
--------------------------------------------------------------------------
+    # Dataset Specific Functions
+    # 
--------------------------------------------------------------------------
+    def get_dataset_name(self, node):
+        return self.get_text(node.getElementsByTagName("name")[0])
+
+    def get_dataset_save_paths(self, node):
+        paths = []
+        for item in node.getElementsByTagName("save_path"):
+            paths.append(self.get_text(item))
+        return paths
+
+    def get_dataset_partition_type(self, node):
+        return self.get_text(node.getElementsByTagName("partition_type")[0])
+
+    def get_dataset_partitions(self, node):
+        paths = []
+        for item in node.getElementsByTagName("partitions_per_path"):
+            paths.append(int(self.get_text(item)))
+        return paths
+
+    def get_dataset_tests(self, node):
+        tests = []
+        for item in node.getElementsByTagName("test"):
+            tests.append(self.get_text(item))
+        return tests
+
+    def get_text(self, xml_node):
+        rc = []
+        for node in xml_node.childNodes:
+            if node.nodeType == node.TEXT_NODE:
+                rc.append(node.data)
+        return ''.join(rc)
+
+class Machine:
+    def __init__(self, id, ip):
+        self.id = id
+        self.ip = ip
+    
+    def get_node_name(self):
+        return self.id
+    
+    def get_node_ip(self):
+        return self.ip
+    
+    def __repr__(self):
+        return self.id + "(" + self.ip + ")"
+    
+class Dataset:
+    def __init__(self, name, save_paths, partition_type, partitions, tests):
+        self.name = name
+        self.save_paths = save_paths
+        self.partitions = partitions
+        self.partition_type = partition_type
+        self.tests = tests
+    
+    def get_name(self):
+        return self.name
+    
+    def get_save_paths(self):
+        return self.save_paths
+    
+    def get_partitions(self):
+        return self.partitions
+    
+    def get_partition_type(self):
+        return self.partition_type
+    
+    def get_tests(self):
+        return self.tests
+    
+    def __repr__(self):
+        return self.name + ":" + str(self.save_paths) + ":" + 
str(self.partitions)
+    

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
new file mode 100644
index 0000000..04fff52
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Base URL used to get all the required files.
+BASE_DOWNLOAD_URL = 'http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/'
+
+# List of required files for a build.
+FILE_NAMES = []
+FILE_NAMES.append('ghcnd-countries.txt')
+FILE_NAMES.append('ghcnd-inventory.txt')
+FILE_NAMES.append('ghcnd-states.txt')
+FILE_NAMES.append('ghcnd-stations.txt')
+FILE_NAMES.append('ghcnd-version.txt')
+FILE_NAMES.append('ghcnd_all.tar.gz')
+FILE_NAMES.append('ghcnd_gsn.tar.gz')
+FILE_NAMES.append('ghcnd_hcn.tar.gz')
+FILE_NAMES.append('readme.txt')
+FILE_NAMES.append('status.txt')
+
+# Store the row details here.
+
+# Index values of each field details.
+FIELD_INDEX_NAME = 0
+FIELD_INDEX_START = 1
+FIELD_INDEX_END = 2
+FIELD_INDEX_TYPE = 3
+
+DLY_FIELD_ID = 0
+DLY_FIELD_YEAR = 1
+DLY_FIELD_MONTH = 2
+DLY_FIELD_ELEMENT = 3
+
+DLY_FIELD_DAY_OFFSET = 4
+DLY_FIELD_DAY_FIELDS = 4
+
+DLY_FIELDS = []
+
+# Details about the row.
+DLY_FIELDS.append(['ID', 1, 11, 'Character'])
+DLY_FIELDS.append(['YEAR', 12, 15, 'Integer'])
+DLY_FIELDS.append(['MONTH', 16, 17, 'Integer'])
+DLY_FIELDS.append(['ELEMENT', 18, 21, 'Character'])
+
+# Days in each row.
+for i in range(1, 32):
+    start = 22 + ((i - 1) * 8)
+    DLY_FIELDS.append(['VALUE' + str(i), (start + 0), (start + 4), 'Integer'])
+    DLY_FIELDS.append(['MFLAG' + str(i), (start + 5), (start + 5), 
'Character'])
+    DLY_FIELDS.append(['QFLAG' + str(i), (start + 6), (start + 6), 
'Character'])
+    DLY_FIELDS.append(['SFLAG' + str(i), (start + 7), (start + 7), 
'Character'])
+
+# Details about the row.
+STATIONS_FIELDS = {}
+STATIONS_FIELDS['ID'] = ['ID', 1, 11, 'Character']
+STATIONS_FIELDS['LATITUDE'] = ['LATITUDE', 13, 20, 'Real']
+STATIONS_FIELDS['LONGITUDE'] = ['LONGITUDE', 22, 30, 'Real']
+STATIONS_FIELDS['ELEVATION'] = ['ELEVATION', 32, 37, 'Real']
+STATIONS_FIELDS['STATE'] = ['STATE', 39, 40, 'Character']
+STATIONS_FIELDS['NAME'] = ['NAME', 42, 71, 'Character']
+STATIONS_FIELDS['GSNFLAG'] = ['GSNFLAG', 73, 75, 'Character']
+STATIONS_FIELDS['HCNFLAG'] = ['HCNFLAG', 77, 79, 'Character']
+STATIONS_FIELDS['WMOID'] = ['WMOID', 81, 85, 'Character']
+
+# Details about the row.
+COUNTRIES_FIELDS = {}
+COUNTRIES_FIELDS['CODE'] = ['CODE', 1, 2, 'Character']
+COUNTRIES_FIELDS['NAME'] = ['NAME', 4, 50, 'Character']
+
+# Details about the row.
+STATES_FIELDS = {}
+STATES_FIELDS['CODE'] = ['CODE', 1, 2, 'Character']
+STATES_FIELDS['NAME'] = ['NAME', 4, 50, 'Character']
+
+# Details about the row.
+INVENTORY_FIELDS = {}
+INVENTORY_FIELDS['ID'] = ['ID', 1, 11, 'Character']
+INVENTORY_FIELDS['LATITUDE'] = ['LATITUDE', 13, 20, 'Real']
+INVENTORY_FIELDS['LONGITUDE'] = ['LONGITUDE', 22, 30, 'Real']
+INVENTORY_FIELDS['ELEMENT'] = ['ELEMENT', 32, 35, 'Character']
+INVENTORY_FIELDS['FIRSTYEAR'] = ['FIRSTYEAR', 37, 40, 'Integer']
+INVENTORY_FIELDS['LASTYEAR'] = ['LASTYEAR', 42, 45, 'Integer']

http://git-wip-us.apache.org/repos/asf/vxquery/blob/c182925c/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py
new file mode 100644
index 0000000..7b1434f
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# List of required files for a build.
+MSHR_URLS = []
+MSHR_URLS.append('ftp://ftp.ncdc.noaa.gov/pub/data/homr/docs/MSHR_Enhanced_Table.txt')
+MSHR_URLS.append('http://www.ncdc.noaa.gov/homr/file/mshr_enhanced.txt.zip')
+
+# Index values of each field details.
+MSHR_FIELD_INDEX_NAME = 0
+MSHR_FIELD_INDEX_START = 1
+MSHR_FIELD_INDEX_END = 2
+MSHR_FIELD_INDEX_TYPE = 3
+
+# Store the row details here.
+MSHR_FIELDS = {}
+
+# Details about the row.
+MSHR_FIELDS['SOURCE_ID'] = ['SOURCE_ID', 1, 20, 'X(20)']
+MSHR_FIELDS['SOURCE'] = ['SOURCE', 22, 31, 'X(10)']
+MSHR_FIELDS['BEGIN_DATE'] = ['BEGIN_DATE', 33, 40, 'YYYYMMDD']
+MSHR_FIELDS['END_DATE'] = ['END_DATE', 42, 49, 'YYYYMMDD']
+MSHR_FIELDS['STATION_STATUS'] = ['STATION_STATUS', 51, 70, 'X(20)']
+MSHR_FIELDS['NCDCSTN_ID'] = ['NCDCSTN_ID', 72, 91, 'X(20)']
+MSHR_FIELDS['ICAO_ID'] = ['ICAO_ID', 93, 112, 'X(20)']
+MSHR_FIELDS['WBAN_ID'] = ['WBAN_ID', 114, 133, 'X(20)']
+MSHR_FIELDS['FAA_ID'] = ['FAA_ID', 135, 154, 'X(20)']
+MSHR_FIELDS['NWSLI_ID'] = ['NWSLI_ID', 156, 175, 'X(20)']
+MSHR_FIELDS['WMO_ID'] = ['WMO_ID', 177, 196, 'X(20)']
+MSHR_FIELDS['COOP_ID'] = ['COOP_ID', 198, 217, 'X(20)']
+MSHR_FIELDS['TRANSMITTAL_ID'] = ['TRANSMITTAL_ID', 219, 238, 'X(20)']
+MSHR_FIELDS['GHCND_ID'] = ['GHCND_ID', 240, 259, 'X(20)']
+MSHR_FIELDS['NAME_PRINCIPAL'] = ['NAME_PRINCIPAL', 261, 360, 'X(100)']
+MSHR_FIELDS['NAME_PRINCIPAL_SHORT'] = ['NAME_PRINCIPAL_SHORT', 362, 391, 
'X(30)']
+MSHR_FIELDS['NAME_COOP'] = ['NAME_COOP', 393, 492, 'X(100)']
+MSHR_FIELDS['NAME_COOP_SHORT'] = ['NAME_COOP_SHORT', 494, 523, 'X(30)']
+MSHR_FIELDS['NAME_PUBLICATION'] = ['NAME_PUBLICATION', 525, 624, 'X(100)']
+MSHR_FIELDS['NAME_ALIAS'] = ['NAME_ALIAS', 626, 725, 'X(100)']
+MSHR_FIELDS['NWS_CLIM_DIV'] = ['NWS_CLIM_DIV', 727, 736, 'X(10)']
+MSHR_FIELDS['NWS_CLIM_DIV_NAME'] = ['NWS_CLIM_DIV_NAME', 738, 777, 'X(40)']
+MSHR_FIELDS['STATE_PROV'] = ['STATE_PROV', 779, 788, 'X(10)']
+MSHR_FIELDS['COUNTY'] = ['COUNTY', 790, 839, 'X(50)']
+MSHR_FIELDS['NWS_ST_CODE'] = ['NWS_ST_CODE', 841, 842, 'X(2)']
+MSHR_FIELDS['FIPS_COUNTRY_CODE'] = ['FIPS_COUNTRY_CODE', 844, 845, 'X(2)']
+MSHR_FIELDS['FIPS_COUNTRY_NAME'] = ['FIPS_COUNTRY_NAME', 847, 946, 'X(100)']
+MSHR_FIELDS['NWS_REGION'] = ['NWS_REGION', 948, 977, 'X(30)']
+MSHR_FIELDS['NWS_WFO'] = ['NWS_WFO', 979, 988, 'X(10)']
+MSHR_FIELDS['ELEV_GROUND'] = ['ELEV_GROUND', 990, 1029, 'X(40)']
+MSHR_FIELDS['ELEV_GROUND_UNIT'] = ['ELEV_GROUND_UNIT', 1031, 1050, 'X(20)']
+MSHR_FIELDS['ELEV_BAROM'] = ['ELEV_BAROM', 1052, 1091, 'X(40)']
+MSHR_FIELDS['ELEV_BAROM_UNIT'] = ['ELEV_BAROM_UNIT', 1093, 1112, 'X(20)']
+MSHR_FIELDS['ELEV_AIR'] = ['ELEV_AIR', 1114, 1153, 'X(40)']
+MSHR_FIELDS['ELEV_AIR_UNIT'] = ['ELEV_AIR_UNIT', 1155, 1174, 'X(20)']
+MSHR_FIELDS['ELEV_ZERODAT'] = ['ELEV_ZERODAT', 1176, 1215, 'X(40)']
+MSHR_FIELDS['ELEV_ZERODAT_UNIT'] = ['ELEV_ZERODAT_UNIT', 1217, 1236, 'X(20)']
+MSHR_FIELDS['ELEV_UNK'] = ['ELEV_UNK', 1238, 1277, 'X(40)']
+MSHR_FIELDS['ELEV_UNK_UNIT'] = ['ELEV_UNK_UNIT', 1279, 1298, 'X(20)']
+MSHR_FIELDS['LAT_DEC'] = ['LAT_DEC', 1300, 1319, 'X(20)']
+MSHR_FIELDS['LON_DEC'] = ['LON_DEC', 1321, 1340, 'X(20)']
+MSHR_FIELDS['LAT_LON_PRECISION'] = ['LAT_LON_PRECISION', 1342, 1351, 'X(10)']
+MSHR_FIELDS['RELOCATION'] = ['RELOCATION', 1353, 1414, 'X(62)']
+MSHR_FIELDS['UTC_OFFSET'] = ['UTC_OFFSET', 1416, 1431, '9(16)']
+MSHR_FIELDS['OBS_ENV'] = ['OBS_ENV', 1433, 1472, 'X(40) ']
+MSHR_FIELDS['PLATFORM'] = ['PLATFORM', 1474, 1573, 'X(100)']

Reply via email to